From d112a2a95e5e12c54c5aa7ee0ba3daaf26f677d3 Mon Sep 17 00:00:00 2001
From: Ivan Podkidyshev <ipodkidyshev@nvidia.com>
Date: Fri, 29 May 2026 11:50:34 -0700
Subject: [PATCH 01/16] multiple aiperf runs

---
 conf/experimental/ai_dynamo/test/sglang.toml  |  15 ++
 conf/experimental/ai_dynamo/test/vllm.toml    |  15 ++
 .../ai_dynamo/test_scenario/vllm_lmcache.toml |  16 +-
 src/cloudai/workloads/ai_dynamo/__init__.py   |   4 +
 src/cloudai/workloads/ai_dynamo/ai_dynamo.py  |  28 ++-
 src/cloudai/workloads/ai_dynamo/aiperf.sh     | 178 +-----------------
 .../workloads/ai_dynamo/runtime/aiperf.py     |  81 ++++++++
 .../ai_dynamo/slurm_command_gen_strategy.py   | 142 +++++++++++++-
 .../test_command_gen_strategy_slurm.py        |  94 +++++++++
 .../ai_dynamo/test_runtime_aiperf.py          |  63 +++++++
 10 files changed, 456 insertions(+), 180 deletions(-)
 create mode 100644 src/cloudai/workloads/ai_dynamo/runtime/aiperf.py
 create mode 100644 tests/workloads/ai_dynamo/test_runtime_aiperf.py

diff --git a/conf/experimental/ai_dynamo/test/sglang.toml b/conf/experimental/ai_dynamo/test/sglang.toml
index 67fc999f0..34bc9cbff 100644
--- a/conf/experimental/ai_dynamo/test/sglang.toml
+++ b/conf/experimental/ai_dynamo/test/sglang.toml
@@ -18,6 +18,7 @@ name = "sglang"
 description = "sglang backend"
 test_template_name = "AIDynamo"
 extra_container_mounts = ["/run/udev:/run/udev"]
+dse_excluded_args = ["cmd_args.aiperf_phases"]
 
 [cmd_args]
 docker_image_url = "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.1.1"
@@ -88,6 +89,20 @@ workloads = "aiperf.sh"
     request-count = 50
     synthetic-input-tokens-mean = 300
 
+  [[cmd_args.aiperf_phases]]
+  name = "round_1"
+
+    [cmd_args.aiperf_phases.args]
+    concurrency = 2
+    request-count = 50
+
+  [[cmd_args.aiperf_phases]]
+  name = "round_2"
+
+    [cmd_args.aiperf_phases.args]
+    concurrency = 4
+    request-count = 50
+
   [cmd_args.aiperf_accuracy]
   entrypoint = "aiperf profile"
   setup-cmd = "python -m pip install --break-system-packages --ignore-installed blinker==1.9.0 && python -m pip install --break-system-packages --upgrade aiperf==0.8.0"
diff --git a/conf/experimental/ai_dynamo/test/vllm.toml b/conf/experimental/ai_dynamo/test/vllm.toml
index 8a5f3b939..0667f1cab 100644
--- a/conf/experimental/ai_dynamo/test/vllm.toml
+++ b/conf/experimental/ai_dynamo/test/vllm.toml
@@ -18,6 +18,7 @@ name = "vLLM"
 description = "vLLM backend"
 test_template_name = "AIDynamo"
 extra_container_mounts = ["/run/udev:/run/udev"]
+dse_excluded_args = ["cmd_args.aiperf_phases"]
 
 [cmd_args]
 docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1"
@@ -78,6 +79,20 @@ workloads = "aiperf.sh"
     request-count = 50
     synthetic-input-tokens-mean = 300
 
+  [[cmd_args.aiperf_phases]]
+  name = "round_1"
+
+    [cmd_args.aiperf_phases.args]
+    concurrency = 2
+    request-count = 50
+
+  [[cmd_args.aiperf_phases]]
+  name = "round_2"
+
+    [cmd_args.aiperf_phases.args]
+    concurrency = 4
+    request-count = 50
+
   [cmd_args.aiperf_accuracy]
   entrypoint = "aiperf profile"
   setup-cmd = "python -m pip install --break-system-packages --upgrade aiperf==0.8.0"
diff --git a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml
index 564311240..f975e784e 100644
--- a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml
+++ b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml
@@ -24,7 +24,7 @@ description = "Self-contained AIDynamo scenario wiring vLLM disaggregated infere
 test_template_name = "AIDynamo"
 time_limit = "00:10:00"
 extra_container_mounts = ["/run/udev:/run/udev"]
-dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"]
+dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports", "cmd_args.aiperf_phases"]
 
   [Tests.cmd_args]
   docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1"
@@ -90,6 +90,20 @@ dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"]
       request-count = 50
       synthetic-input-tokens-mean = 300
 
+    [[Tests.cmd_args.aiperf_phases]]
+    name = "round_1"
+
+      [Tests.cmd_args.aiperf_phases.args]
+      concurrency = 2
+      request-count = 50
+
+    [[Tests.cmd_args.aiperf_phases]]
+    name = "round_2"
+
+      [Tests.cmd_args.aiperf_phases.args]
+      concurrency = 4
+      request-count = 50
+
     [Tests.cmd_args.aiperf_accuracy]
     entrypoint = "aiperf profile"
     setup-cmd = "python -m pip install --break-system-packages --upgrade aiperf==0.8.0"
diff --git a/src/cloudai/workloads/ai_dynamo/__init__.py b/src/cloudai/workloads/ai_dynamo/__init__.py
index 5e430068d..57e2eb99e 100644
--- a/src/cloudai/workloads/ai_dynamo/__init__.py
+++ b/src/cloudai/workloads/ai_dynamo/__init__.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 
 from .ai_dynamo import (
+    AIPERF_COMMANDS_FILE_NAME,
     LMCACHE_CONFIG_BACKUP_FILE_NAME,
     LMCACHE_CONFIG_FILE_NAME,
     AIDynamoArgs,
@@ -22,6 +23,7 @@
     AIDynamoTestDefinition,
     AIPerf,
     AIPerfAccuracy,
+    AIPerfPhase,
     GenAIPerf,
     LMCacheController,
     WorkerBaseArgs,
@@ -32,6 +34,7 @@
 from .slurm_command_gen_strategy import AIDynamoSlurmCommandGenStrategy
 
 __all__ = [
+    "AIPERF_COMMANDS_FILE_NAME",
     "LMCACHE_CONFIG_BACKUP_FILE_NAME",
     "LMCACHE_CONFIG_FILE_NAME",
     "AIDynamoArgs",
@@ -42,6 +45,7 @@
     "AIDynamoTestDefinition",
     "AIPerf",
     "AIPerfAccuracy",
+    "AIPerfPhase",
     "GenAIPerf",
     "LMCacheController",
     "WorkerBaseArgs",
diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
index 7f8da4165..5c45a149b 100644
--- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
+++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
@@ -42,6 +42,7 @@
 from cloudai.systems.slurm import SlurmSystem
 
 AIPERF_ARTIFACTS_DIR = "aiperf_artifacts"
+AIPERF_COMMANDS_FILE_NAME = "aiperf_commands.json"
 AIPERF_ACCURACY_ARTIFACTS_DIR = "aiperf_accuracy_artifacts"
 AIPERF_ACCURACY_RESULTS_CSV = "accuracy_results.csv"
 LMCACHE_CONFIG_FILE_NAME = "lmcache-config.yaml"
@@ -254,6 +255,7 @@ class AIPerf(Workload):
     name: str = "aiperf"
     cmd: str = "aiperf profile"
     script: File = File(Path(__file__).parent.parent / "ai_dynamo/aiperf.sh")
+    runtime: File = Field(default=File(Path(__file__).parent.parent / "ai_dynamo/runtime/aiperf.py"), exclude=True)
     setup_cmd: str | None = Field(
         default=None,
         serialization_alias="setup-cmd",
@@ -267,7 +269,13 @@ class AIPerf(Workload):
 
     @property
     def installables(self) -> list[Installable]:
-        return [self.script]
+        return [self.script, self.runtime]
+
+
+class AIPerfPhase(AIPerf):
+    """Named AIPerf phase that overrides the base AIPerf configuration."""
+
+    name: str = Field(min_length=1, pattern=r"^[A-Za-z0-9_.-]+$")
 
 
 class AIPerfAccuracy(BaseModel):
@@ -324,6 +332,7 @@ class AIDynamoCmdArgs(CmdArgs):
     lmcache_controller: LMCacheController | None = None
     genai_perf: GenAIPerf = Field(default_factory=GenAIPerf)
     aiperf: AIPerf = Field(default_factory=AIPerf)
+    aiperf_phases: list[AIPerfPhase] | None = None
     aiperf_accuracy: AIPerfAccuracy | None = None
     workloads: str = "genai_perf.sh"
 
@@ -341,6 +350,23 @@ def validate_workloads(cls, v: str) -> str:
     def workloads_list(self) -> list[str]:
         return [w.strip() for w in self.workloads.split(",")]
 
+    @model_validator(mode="after")
+    def validate_aiperf_phases(self) -> "AIDynamoCmdArgs":
+        """Validate AIPerf phases."""
+        if not self.aiperf_phases:
+            return self
+
+        seen = set()
+        duplicates = set()
+        for phase in self.aiperf_phases:
+            if phase.name in seen:
+                duplicates.add(phase.name)
+            seen.add(phase.name)
+        if duplicates:
+            raise ValueError(f"AIPerf phase names must be unique. Duplicates: {sorted(duplicates)}")
+
+        return self
+
     @property
     def installables(self) -> list[Installable]:
         return [
diff --git a/src/cloudai/workloads/ai_dynamo/aiperf.sh b/src/cloudai/workloads/ai_dynamo/aiperf.sh
index 15cee3a58..476ee3062 100644
--- a/src/cloudai/workloads/ai_dynamo/aiperf.sh
+++ b/src/cloudai/workloads/ai_dynamo/aiperf.sh
@@ -2,182 +2,8 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
 # Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# aiperf.sh — aiperf profile wrapper for ai_dynamo workloads.
-#
-# Called from ai_dynamo.sh's launch_workload() with:
-#   bash aiperf.sh --result-dir <dir> --model <model> --url <url> --port <port>
-#                  [--cmd <cmd>] [--report-name <name>] [--artifact-dir-name <name>] [--extra-args <args>]
-#                  -- <aiperf-args>...
-#
-# Context flags (before --) that are recognised and used:
-#   --result-dir    Directory where artifacts and the final report are written.
-#   --model         HuggingFace model identifier (e.g. Qwen/Qwen3-0.6B).
-#   --url           Base URL of the dynamo.frontend (e.g. http://node01).
-#   --port          HTTP port the dynamo.frontend is listening on.
-#   --report-name   Output CSV name (default: aiperf_report.csv).
-#   --artifact-dir-name  Artifact directory name under --result-dir (default: aiperf_artifacts).
-#   --cmd           Full launch command including subcommand (default: "aiperf profile").
-#   --setup-cmd     Optional shell command run before launching aiperf.
-#   --extra-args    Raw string appended verbatim after all other flags.
-#
-# All unrecognised flags (--install-dir, --gpus-per-node, etc.) are silently
-# consumed so this script is forward-compatible with launch_workload additions.
-#
-# Everything after -- is passed directly to the aiperf profile invocation.
 
 set -Eeuo pipefail
 
-result_dir=""
-model=""
-url="http://localhost"
-port=8000
-report_name="aiperf_report.csv"
-artifact_dir_name="aiperf_artifacts"
-cmd="aiperf profile"
-setup_cmd=""
-declare -a extra_args=()
-declare -a profile_args=()
-
-log() {
-  echo "[$(date '+%F %T') $(hostname)]: $*"
-}
-
-_parse_aiperf_args() {
-  while [[ $# -ge 2 ]]; do
-    case "$1" in
-      --*) profile_args+=("$1" "$2"); shift 2 ;;
-      *)   shift ;;
-    esac
-  done
-  # Capture a trailing lone boolean flag if present.
-  # Use if/fi — not [[ ]] && — so set -e does not trigger on a false condition.
-  if [[ $# -eq 1 && "$1" == --* ]]; then
-    profile_args+=("$1")
-  fi
-}
-
-process_args() {
-  while [[ $# -gt 0 ]]; do
-    case "$1" in
-      --result-dir)   result_dir="$2";  shift 2 ;;
-      --model)        model="$2";       shift 2 ;;
-      --url)          url="$2";         shift 2 ;;
-      --port)         port="$2";        shift 2 ;;
-      --report-name)  report_name="$2"; shift 2 ;;
-      --artifact-dir-name) artifact_dir_name="$2"; shift 2 ;;
-      --cmd)               cmd="$2";               shift 2 ;;
-      --setup-cmd)         setup_cmd="$2";         shift 2 ;;
-      --extra-args)        read -ra extra_args <<< "$2"; shift 2 ;;
-      --)                  shift; _parse_aiperf_args "$@"; break ;;
-      --*)            if [[ -n "${2:-}" && "${2}" != -* ]]; then shift 2; else shift 1; fi ;;  # consume unknown flag; shift 2 only if next arg is a value
-      *)              shift ;;
-    esac
-  done
-
-  log "Parsed args:
-    result_dir:   $result_dir
-    model:        $model
-    url:          $url
-    port:         $port
-    report_name:  $report_name
-    artifact_dir: $artifact_dir_name
-    cmd:          $cmd
-    setup_cmd:    ${setup_cmd:-}
-    extra_args:   ${extra_args[*]:-}
-    profile_args: ${profile_args[*]:-}"
-}
-
-run_setup_cmd() {
-  if [[ -z "$setup_cmd" ]]; then
-    return
-  fi
-
-  log "Running AIPerf setup command: $setup_cmd"
-  bash -lc "$setup_cmd"
-  log "AIPerf setup command complete"
-}
-
-process_results() {
-  local artifact_dir="$result_dir/$artifact_dir_name"
-  local csv_path=""
-
-  if [[ -f "$artifact_dir/profile_export_aiperf.csv" ]]; then
-    csv_path="$artifact_dir/profile_export_aiperf.csv"
-  else
-    csv_path=$(find "$artifact_dir" -name "*aiperf*.csv" -print -quit 2>/dev/null || true)
-  fi
-
-  if [[ -n "$csv_path" ]]; then
-    cp "$csv_path" "$result_dir/$report_name"
-    log "aiperf report saved to $result_dir/$report_name"
-  else
-    log "ERROR: no CSV found in $artifact_dir — aiperf may not have completed"
-    exit 1
-  fi
-
-}
-
-run_aiperf() {
-  local full_url="$1"
-  local artifact_dir="$2"
-  local -a run_cmd=()
-  read -ra run_cmd <<< "$cmd"
-  local -a launch_cmd=(
-    "${run_cmd[@]}"
-    --model "$model"
-    --url "$full_url"
-    --endpoint-type chat
-    --streaming
-    --artifact-dir "$artifact_dir"
-    --no-server-metrics
-  )
-
-  log "Launching aiperf: ${run_cmd[*]} --model $model --url $full_url"
-
-  if [[ "${#profile_args[@]}" -gt 0 ]]; then
-    launch_cmd+=("${profile_args[@]}")
-  fi
-  if [[ "${#extra_args[@]}" -gt 0 ]]; then
-    launch_cmd+=("${extra_args[@]}")
-  fi
-
-  "${launch_cmd[@]}"
-
-  log "aiperf run complete"
-}
-
-main() {
-  process_args "$@"
-
-  if [[ -z "$result_dir" ]]; then
-    log "ERROR: --result-dir is required"; exit 1
-  fi
-  if [[ -z "$model" ]]; then
-    log "ERROR: --model is required"; exit 1
-  fi
-
-  run_setup_cmd
-
-  local full_url="${url}:${port}"
-  local artifact_dir="$result_dir/$artifact_dir_name"
-  rm -rf "$artifact_dir"
-
-  run_aiperf "$full_url" "$artifact_dir"
-  process_results
-}
-
-main "$@"
-exit 0
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+exec python3 "${SCRIPT_DIR}/aiperf.py" "$@"
diff --git a/src/cloudai/workloads/ai_dynamo/runtime/aiperf.py b/src/cloudai/workloads/ai_dynamo/runtime/aiperf.py
new file mode 100644
index 000000000..b5476d571
--- /dev/null
+++ b/src/cloudai/workloads/ai_dynamo/runtime/aiperf.py
@@ -0,0 +1,81 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Execute generated AIPerf runtime entries."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import shlex
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+from typing import Any
+
+
+def log(message: str) -> None:
+    print(message, flush=True)
+
+
+def substitute_frontend_url(values: list[str], frontend_url: str) -> list[str]:
+    return [value.replace("{frontend_url}", frontend_url) for value in values]
+
+
+def run_entry(entry: dict[str, Any], frontend_url: str) -> None:
+    argv = substitute_frontend_url([*entry["cmd"], *entry.get("cli", [])], frontend_url)
+    output_folder = entry.get("output_folder")
+    if output_folder:
+        shutil.rmtree(output_folder, ignore_errors=True)
+
+    log(f"Running {entry['name']}: {shlex.join(argv)}")
+    log_file = entry.get("log_file")
+    if log_file:
+        log_path = Path(log_file)
+        log_path.parent.mkdir(parents=True, exist_ok=True)
+        with log_path.open("w", encoding="utf-8") as fp:
+            subprocess.run(argv, stdout=fp, stderr=subprocess.STDOUT, check=True)
+    else:
+        subprocess.run(argv, check=True)
+
+    report_source = entry.get("report_source")
+    report_file = entry.get("report_file")
+    if report_source and report_file:
+        report_path = Path(report_file)
+        report_path.parent.mkdir(parents=True, exist_ok=True)
+        shutil.copy2(report_source, report_path)
+        log(f"AIPerf report saved to {report_path}")
+
+    final_report_file = entry.get("final_report_file")
+    if final_report_file and report_file:
+        shutil.copy2(report_file, final_report_file)
+        log(f"Final AIPerf report saved to {final_report_file}")
+
+
+def parse_args(argv: list[str]) -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--commands-file", required=True)
+    parser.add_argument("--url", required=True)
+    args, _ = parser.parse_known_args(argv)
+    return args
+
+
+def main(argv: list[str]) -> int:
+    try:
+        args = parse_args(argv)
+        with Path(args.commands_file).open(encoding="utf-8") as fp:
+            entries = json.load(fp)
+
+        for entry in entries:
+            run_entry(entry, args.url)
+    except Exception as exc:
+        log(f"ERROR: {exc}")
+        return 1
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
index 861a4c469..4cbb33823 100644
--- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
@@ -14,10 +14,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
 import logging
 import shlex
 from pathlib import Path
-from typing import List, cast
+from typing import Any, List, cast
 
 import yaml
 from pydantic import BaseModel, TypeAdapter, ValidationError
@@ -25,7 +26,15 @@
 from cloudai.core import File, GitRepo
 from cloudai.systems.slurm import SlurmCommandGenStrategy
 
-from .ai_dynamo import LMCACHE_CONFIG_BACKUP_FILE_NAME, LMCACHE_CONFIG_FILE_NAME, AIDynamoTestDefinition
+from .ai_dynamo import (
+    AIPERF_ARTIFACTS_DIR,
+    AIPERF_COMMANDS_FILE_NAME,
+    LMCACHE_CONFIG_BACKUP_FILE_NAME,
+    LMCACHE_CONFIG_FILE_NAME,
+    AIDynamoTestDefinition,
+    AIPerf,
+    AIPerfPhase,
+)
 
 
 class AIDynamoSlurmCommandGenStrategy(SlurmCommandGenStrategy):
@@ -109,8 +118,135 @@ def _prepare_lmcache_config(self):
         (self.test_run.output_path / LMCACHE_CONFIG_FILE_NAME).write_text(config)
         (self.test_run.output_path / LMCACHE_CONFIG_BACKUP_FILE_NAME).write_text(config)
 
+    def _aiperf_config_dict(self, aiperf: AIPerf, *, exclude_unset: bool = False) -> dict[str, Any]:
+        return aiperf.model_dump(
+            by_alias=True,
+            exclude={"args", "name", "repo", "script", "runtime"},
+            exclude_none=True,
+            exclude_unset=exclude_unset,
+        )
+
+    def _aiperf_args_dict(self, aiperf: AIPerf, *, exclude_unset: bool = False) -> dict[str, Any]:
+        return aiperf.args.model_dump(by_alias=True, exclude_none=True, exclude_unset=exclude_unset)
+
+    def _aiperf_args_argv(self, args: dict[str, Any]) -> list[str]:
+        result = []
+        for key, value in args.items():
+            result.append(f"--{key}")
+            if value is not None:
+                result.append(str(value))
+        return result
+
+    def _runtime_result_path(self, path: str) -> str:
+        if Path(path).is_absolute():
+            return path
+        return f"{self.CONTAINER_MOUNT_OUTPUT}/{path}"
+
+    def _split_extra_args(self, value: Any) -> list[str]:
+        if value is None:
+            return []
+        if isinstance(value, list):
+            return [str(item) for item in value]
+        return shlex.split(str(value))
+
+    def _aiperf_phase_manifest_entry(self, base: AIPerf, phase: AIPerfPhase, *, single_phase: bool) -> dict[str, Any]:
+        base_config = self._aiperf_config_dict(base)
+        phase_config = self._aiperf_config_dict(phase, exclude_unset=True)
+        config = {**base_config, **phase_config}
+
+        base_args = self._aiperf_args_dict(base)
+        phase_args = self._aiperf_args_dict(phase, exclude_unset=True)
+        args = {**base_args, **phase_args}
+
+        if "artifact-dir-name" not in phase_config:
+            base_artifact_dir = base_config.get("artifact-dir-name", AIPERF_ARTIFACTS_DIR)
+            config["artifact-dir-name"] = base_artifact_dir if single_phase else f"{base_artifact_dir}/{phase.name}"
+        if "report-name" not in phase_config:
+            base_report_name = base_config.get("report-name", "aiperf_report.csv")
+            config["report-name"] = base_report_name if single_phase else f"aiperf_{phase.name}_report.csv"
+
+        return {
+            "name": phase.name,
+            "config": config,
+            "profile_args": self._aiperf_args_argv(args),
+        }
+
+    def _aiperf_entries(self) -> list[dict[str, Any]]:
+        phases = self.td.cmd_args.aiperf_phases or [AIPerfPhase.model_validate({"name": "aiperf"})]
+        return [
+            self._aiperf_phase_manifest_entry(
+                self.td.cmd_args.aiperf,
+                phase,
+                single_phase=len(phases) == 1,
+            )
+            for phase in phases
+        ]
+
+    def _aiperf_run_entry(self, entry: dict[str, Any], *, write_phase_log: bool, is_final: bool) -> dict[str, Any]:
+        config = entry["config"]
+        artifact_dir_name = config["artifact-dir-name"]
+        artifact_dir = self._runtime_result_path(artifact_dir_name)
+        runtime_entry = {
+            "name": entry["name"],
+            "cmd": shlex.split(config["cmd"]),
+            "cli": [
+                "--model",
+                self.td.cmd_args.dynamo.model,
+                "--url",
+                f"{{frontend_url}}:{self.td.cmd_args.dynamo.port}",
+                "--endpoint-type",
+                "chat",
+                "--streaming",
+                "--artifact-dir",
+                artifact_dir,
+                "--no-server-metrics",
+                *entry["profile_args"],
+                *self._split_extra_args(config.get("extra-args")),
+            ],
+            "output_folder": artifact_dir,
+            "report_source": f"{artifact_dir}/profile_export_aiperf.csv",
+            "report_file": self._runtime_result_path(config["report-name"]),
+        }
+        if write_phase_log:
+            runtime_entry["log_file"] = self._runtime_result_path(f"aiperf_{entry['name']}.log")
+        if is_final:
+            runtime_entry["final_report_file"] = self._runtime_result_path("aiperf_report.csv")
+        return runtime_entry
+
+    def _aiperf_setup_entry(self, setup_cmd: str) -> dict[str, Any]:
+        return {
+            "name": "aiperf_setup",
+            "cmd": ["bash", "-lc", setup_cmd],
+            "cli": [],
+        }
+
+    def _prepare_aiperf_commands(self) -> str | None:
+        if "aiperf.sh" not in self.td.cmd_args.workloads_list:
+            return None
+
+        self.test_run.output_path.mkdir(parents=True, exist_ok=True)
+        entries = self._aiperf_entries()
+        runtime_entries = []
+        setup_cmd = entries[0]["config"].get("setup-cmd")
+        if setup_cmd:
+            runtime_entries.append(self._aiperf_setup_entry(setup_cmd))
+
+        write_phase_logs = len(entries) > 1
+        for idx, entry in enumerate(entries):
+            runtime_entries.append(
+                self._aiperf_run_entry(
+                    entry,
+                    write_phase_log=write_phase_logs,
+                    is_final=len(entries) > 1 and idx == len(entries) - 1,
+                )
+            )
+
+        (self.test_run.output_path / AIPERF_COMMANDS_FILE_NAME).write_text(json.dumps(runtime_entries, indent=2))
+        return f"{self.CONTAINER_MOUNT_OUTPUT}/{AIPERF_COMMANDS_FILE_NAME}"
+
     def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]:
         self._prepare_lmcache_config()
+        aiperf_commands_file = self._prepare_aiperf_commands()
         if not td.repo.installed_path:
             raise ValueError("Dynamo repo is not installed")
         args = [
@@ -146,6 +282,8 @@ def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]:
 
         args.extend(self._get_nested_toml_args(td.cmd_args.genai_perf, "--genai_perf-"))
         args.extend(self._get_nested_toml_args(td.cmd_args.aiperf, "--aiperf-"))
+        if aiperf_commands_file:
+            args.append(f"--aiperf-commands-file {aiperf_commands_file}")
         if td.cmd_args.aiperf_accuracy is not None:
             args.extend(self._get_nested_toml_args(td.cmd_args.aiperf_accuracy, "--aiperf_accuracy-"))
 
diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
index 0e2f23061..a279297ce 100644
--- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
+++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
 import shlex
 from pathlib import Path
 from typing import cast
@@ -25,6 +26,7 @@
 from cloudai.core import GitRepo
 from cloudai.systems.slurm import SlurmSystem
 from cloudai.workloads.ai_dynamo import (
+    AIPERF_COMMANDS_FILE_NAME,
     LMCACHE_CONFIG_BACKUP_FILE_NAME,
     LMCACHE_CONFIG_FILE_NAME,
     AIDynamoArgs,
@@ -33,6 +35,7 @@
     AIDynamoTestDefinition,
     AIPerf,
     AIPerfAccuracy,
+    AIPerfPhase,
     GenAIPerf,
     LMCacheController,
     WorkerBaseArgs,
@@ -218,6 +221,97 @@ def test_gen_script_args_contains_custom_aiperf_accuracy_args(strategy: AIDynamo
     assert f'--aiperf_accuracy-cli "{cli}"' in result
 
 
+def test_gen_script_args_writes_resolved_aiperf_commands(strategy: AIDynamoSlurmCommandGenStrategy) -> None:
+    td = cast(AIDynamoTestDefinition, strategy.test_run.test)
+    td.cmd_args.workloads = "aiperf.sh"
+    td.cmd_args.aiperf = AIPerf.model_validate(
+        {
+            "setup-cmd": "python -m pip install --upgrade aiperf",
+            "args": {
+                "concurrency": 2,
+                "request-count": 50,
+                "synthetic-input-tokens-mean": 300,
+                "output-tokens-mean": 500,
+            },
+        }
+    )
+    td.cmd_args.aiperf_phases = [
+        AIPerfPhase.model_validate({"name": "round_1", "args": {"concurrency": 1}}),
+        AIPerfPhase.model_validate({"name": "round_2", "args": {"request-count": 10}}),
+    ]
+
+    result = strategy._gen_script_args(td)
+
+    assert f"--aiperf-commands-file {strategy.CONTAINER_MOUNT_OUTPUT}/{AIPERF_COMMANDS_FILE_NAME}" in result
+    entries = json.loads((strategy.test_run.output_path / AIPERF_COMMANDS_FILE_NAME).read_text())
+    assert entries[0] == {
+        "name": "aiperf_setup",
+        "cmd": ["bash", "-lc", "python -m pip install --upgrade aiperf"],
+        "cli": [],
+    }
+    assert entries[1]["name"] == "round_1"
+    assert entries[1]["cmd"] == ["aiperf", "profile"]
+    assert entries[1]["cli"][:9] == [
+        "--model",
+        "model",
+        "--url",
+        "{frontend_url}:8000",
+        "--endpoint-type",
+        "chat",
+        "--streaming",
+        "--artifact-dir",
+        f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_artifacts/round_1",
+    ]
+    assert entries[1]["cli"][-8:] == [
+        "--concurrency",
+        "1",
+        "--request-count",
+        "50",
+        "--synthetic-input-tokens-mean",
+        "300",
+        "--output-tokens-mean",
+        "500",
+    ]
+    assert entries[1]["log_file"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_round_1.log"
+    assert entries[1]["report_file"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_round_1_report.csv"
+    assert entries[2]["cli"][-8:] == [
+        "--concurrency",
+        "2",
+        "--request-count",
+        "10",
+        "--synthetic-input-tokens-mean",
+        "300",
+        "--output-tokens-mean",
+        "500",
+    ]
+    assert entries[2]["final_report_file"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_report.csv"
+
+
+def test_single_aiperf_phase_keeps_legacy_artifact_defaults(strategy: AIDynamoSlurmCommandGenStrategy) -> None:
+    td = cast(AIDynamoTestDefinition, strategy.test_run.test)
+    td.cmd_args.workloads = "aiperf.sh"
+    td.cmd_args.aiperf_phases = [AIPerfPhase.model_validate({"name": "round_1", "args": {"request-count": 10}})]
+
+    strategy._gen_script_args(td)
+
+    entries = json.loads((strategy.test_run.output_path / AIPERF_COMMANDS_FILE_NAME).read_text())
+    assert entries[0]["output_folder"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_artifacts"
+    assert entries[0]["report_file"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_report.csv"
+    assert "log_file" not in entries[0]
+
+
+def test_aiperf_phase_names_must_be_unique(cmd_args: AIDynamoCmdArgs) -> None:
+    with pytest.raises(ValueError, match="AIPerf phase names must be unique"):
+        AIDynamoCmdArgs(
+            docker_image_url=cmd_args.docker_image_url,
+            dynamo=cmd_args.dynamo,
+            aiperf_phases=[
+                AIPerfPhase.model_validate({"name": "round_1"}),
+                AIPerfPhase.model_validate({"name": "round_1"}),
+            ],
+        )
+
+
 def test_gen_script_args_quotes_worker_json_args(strategy: AIDynamoSlurmCommandGenStrategy) -> None:
     td = cast(AIDynamoTestDefinition, strategy.test_run.test)
     config = '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
diff --git a/tests/workloads/ai_dynamo/test_runtime_aiperf.py b/tests/workloads/ai_dynamo/test_runtime_aiperf.py
new file mode 100644
index 000000000..18045b2a2
--- /dev/null
+++ b/tests/workloads/ai_dynamo/test_runtime_aiperf.py
@@ -0,0 +1,63 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import sys
+from pathlib import Path
+
+from cloudai.workloads.ai_dynamo.runtime import aiperf
+
+
+def _write_fake_aiperf(tmp_path: Path) -> Path:
+    script = tmp_path / "fake_aiperf.py"
+    script.write_text(
+        """
+import sys
+from pathlib import Path
+
+artifact_dir = Path(sys.argv[sys.argv.index("--artifact-dir") + 1])
+url = sys.argv[sys.argv.index("--url") + 1]
+artifact_dir.mkdir(parents=True, exist_ok=True)
+(artifact_dir / "profile_export_aiperf.csv").write_text(f"url\\n{url}\\n", encoding="utf-8")
+""".strip(),
+        encoding="utf-8",
+    )
+    return script
+
+
+def test_runtime_executes_entries_and_copies_final_report(tmp_path: Path) -> None:
+    fake_aiperf = _write_fake_aiperf(tmp_path)
+    commands_file = tmp_path / "aiperf_commands.json"
+    artifact_dir = tmp_path / "aiperf_artifacts" / "round_1"
+    report_file = tmp_path / "aiperf_round_1_report.csv"
+    final_report_file = tmp_path / "aiperf_report.csv"
+    commands_file.write_text(
+        json.dumps(
+            [
+                {
+                    "name": "round_1",
+                    "cmd": [sys.executable, str(fake_aiperf)],
+                    "cli": [
+                        "--url",
+                        "{frontend_url}:8000",
+                        "--artifact-dir",
+                        str(artifact_dir),
+                    ],
+                    "output_folder": str(artifact_dir),
+                    "log_file": str(tmp_path / "aiperf_round_1.log"),
+                    "report_source": str(artifact_dir / "profile_export_aiperf.csv"),
+                    "report_file": str(report_file),
+                    "final_report_file": str(final_report_file),
+                }
+            ]
+        ),
+        encoding="utf-8",
+    )
+
+    result = aiperf.main(["--url", "http://frontend", "--commands-file", str(commands_file)])
+
+    assert result == 0
+    assert report_file.read_text(encoding="utf-8") == "url\nhttp://frontend:8000\n"
+    assert final_report_file.read_text(encoding="utf-8") == "url\nhttp://frontend:8000\n"
+    assert (tmp_path / "aiperf_round_1.log").is_file()

From c3877ca478b5c1ea6508070fcaf956c2856568f2 Mon Sep 17 00:00:00 2001
From: Ivan Podkidyshev <ipodkidyshev@nvidia.com>
Date: Fri, 29 May 2026 18:39:20 -0700
Subject: [PATCH 02/16] fix vllm config

---
 conf/experimental/ai_dynamo/test/vllm.toml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/conf/experimental/ai_dynamo/test/vllm.toml b/conf/experimental/ai_dynamo/test/vllm.toml
index 0667f1cab..581ecf3e7 100644
--- a/conf/experimental/ai_dynamo/test/vllm.toml
+++ b/conf/experimental/ai_dynamo/test/vllm.toml
@@ -39,6 +39,7 @@ workloads = "aiperf.sh"
       tensor-parallel-size = 8
       pipeline-parallel-size = 1
       data-parallel-size = 1
+      kv-transfer-config = '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
 
     [cmd_args.dynamo.decode_worker]
     num-nodes = 1
@@ -51,6 +52,7 @@ workloads = "aiperf.sh"
       tensor-parallel-size = 8
       pipeline-parallel-size = 1
       data-parallel-size = 1
+      kv-transfer-config = '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
 
   [cmd_args.lmcache_controller]
   cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-port 9001"

From c4512dc38a52af050138aaf9a373e8fe212f4137 Mon Sep 17 00:00:00 2001
From: Ivan Podkidyshev <ipodkidyshev@nvidia.com>
Date: Mon, 1 Jun 2026 12:08:16 +0200
Subject: [PATCH 03/16] fix filenames for different aiperf iterations

---
 .../workloads/ai_dynamo/runtime/aiperf.py     | 20 +++++++----
 .../ai_dynamo/slurm_command_gen_strategy.py   | 13 ++++++--
 .../test_command_gen_strategy_slurm.py        |  1 +
 .../ai_dynamo/test_runtime_aiperf.py          | 33 +++++++++++++++++++
 4 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/src/cloudai/workloads/ai_dynamo/runtime/aiperf.py b/src/cloudai/workloads/ai_dynamo/runtime/aiperf.py
index b5476d571..d76361cfa 100644
--- a/src/cloudai/workloads/ai_dynamo/runtime/aiperf.py
+++ b/src/cloudai/workloads/ai_dynamo/runtime/aiperf.py
@@ -24,6 +24,18 @@ def substitute_frontend_url(values: list[str], frontend_url: str) -> list[str]:
     return [value.replace("{frontend_url}", frontend_url) for value in values]
 
 
+def copy_file(source: str, destination: str, message: str) -> None:
+    source_path = Path(source)
+    destination_path = Path(destination)
+    destination_path.parent.mkdir(parents=True, exist_ok=True)
+    if source_path.resolve() == destination_path.resolve():
+        log(f"{message} {destination_path}")
+        return
+
+    shutil.copy2(source_path, destination_path)
+    log(f"{message} {destination_path}")
+
+
 def run_entry(entry: dict[str, Any], frontend_url: str) -> None:
     argv = substitute_frontend_url([*entry["cmd"], *entry.get("cli", [])], frontend_url)
     output_folder = entry.get("output_folder")
@@ -43,15 +55,11 @@ def run_entry(entry: dict[str, Any], frontend_url: str) -> None:
     report_source = entry.get("report_source")
     report_file = entry.get("report_file")
     if report_source and report_file:
-        report_path = Path(report_file)
-        report_path.parent.mkdir(parents=True, exist_ok=True)
-        shutil.copy2(report_source, report_path)
-        log(f"AIPerf report saved to {report_path}")
+        copy_file(report_source, report_file, "AIPerf report saved to")
 
     final_report_file = entry.get("final_report_file")
     if final_report_file and report_file:
-        shutil.copy2(report_file, final_report_file)
-        log(f"Final AIPerf report saved to {final_report_file}")
+        copy_file(report_file, final_report_file, "Final AIPerf report saved to")
 
 
 def parse_args(argv: list[str]) -> argparse.Namespace:
diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
index 4cbb33823..567d8d7a5 100644
--- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
@@ -149,6 +149,13 @@ def _split_extra_args(self, value: Any) -> list[str]:
             return [str(item) for item in value]
         return shlex.split(str(value))
 
+    def _aiperf_phase_has_explicit_value(self, phase: AIPerfPhase, field_name: str, *extra_aliases: str) -> bool:
+        if field_name in phase.model_fields_set:
+            return True
+
+        extra = phase.model_extra or {}
+        return any(alias in extra for alias in extra_aliases)
+
     def _aiperf_phase_manifest_entry(self, base: AIPerf, phase: AIPerfPhase, *, single_phase: bool) -> dict[str, Any]:
         base_config = self._aiperf_config_dict(base)
         phase_config = self._aiperf_config_dict(phase, exclude_unset=True)
@@ -158,10 +165,12 @@ def _aiperf_phase_manifest_entry(self, base: AIPerf, phase: AIPerfPhase, *, sing
         phase_args = self._aiperf_args_dict(phase, exclude_unset=True)
         args = {**base_args, **phase_args}
 
-        if "artifact-dir-name" not in phase_config:
+        if not self._aiperf_phase_has_explicit_value(
+            phase, "artifact_dir_name", "artifact-dir-name", "artifact_dir_name"
+        ):
             base_artifact_dir = base_config.get("artifact-dir-name", AIPERF_ARTIFACTS_DIR)
             config["artifact-dir-name"] = base_artifact_dir if single_phase else f"{base_artifact_dir}/{phase.name}"
-        if "report-name" not in phase_config:
+        if not self._aiperf_phase_has_explicit_value(phase, "report_name", "report-name", "report_name"):
             base_report_name = base_config.get("report-name", "aiperf_report.csv")
             config["report-name"] = base_report_name if single_phase else f"aiperf_{phase.name}_report.csv"
 
diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
index a279297ce..e8e0f68d5 100644
--- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
+++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
@@ -284,6 +284,7 @@ def test_gen_script_args_writes_resolved_aiperf_commands(strategy: AIDynamoSlurm
         "--output-tokens-mean",
         "500",
     ]
+    assert entries[2]["report_file"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_round_2_report.csv"
     assert entries[2]["final_report_file"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_report.csv"
 
 
diff --git a/tests/workloads/ai_dynamo/test_runtime_aiperf.py b/tests/workloads/ai_dynamo/test_runtime_aiperf.py
index 18045b2a2..b54988102 100644
--- a/tests/workloads/ai_dynamo/test_runtime_aiperf.py
+++ b/tests/workloads/ai_dynamo/test_runtime_aiperf.py
@@ -61,3 +61,36 @@ def test_runtime_executes_entries_and_copies_final_report(tmp_path: Path) -> Non
     assert report_file.read_text(encoding="utf-8") == "url\nhttp://frontend:8000\n"
     assert final_report_file.read_text(encoding="utf-8") == "url\nhttp://frontend:8000\n"
     assert (tmp_path / "aiperf_round_1.log").is_file()
+
+
+def test_runtime_allows_final_report_to_match_report_file(tmp_path: Path) -> None:
+    fake_aiperf = _write_fake_aiperf(tmp_path)
+    commands_file = tmp_path / "aiperf_commands.json"
+    artifact_dir = tmp_path / "aiperf_artifacts"
+    report_file = tmp_path / "aiperf_report.csv"
+    commands_file.write_text(
+        json.dumps(
+            [
+                {
+                    "name": "aiperf",
+                    "cmd": [sys.executable, str(fake_aiperf)],
+                    "cli": [
+                        "--url",
+                        "{frontend_url}:8000",
+                        "--artifact-dir",
+                        str(artifact_dir),
+                    ],
+                    "output_folder": str(artifact_dir),
+                    "report_source": str(artifact_dir / "profile_export_aiperf.csv"),
+                    "report_file": str(report_file),
+                    "final_report_file": str(report_file),
+                }
+            ]
+        ),
+        encoding="utf-8",
+    )
+
+    result = aiperf.main(["--url", "http://frontend", "--commands-file", str(commands_file)])
+
+    assert result == 0
+    assert report_file.read_text(encoding="utf-8") == "url\nhttp://frontend:8000\n"

From 2ab10fae661c2d9130fdabb61ae9d46f465b1a28 Mon Sep 17 00:00:00 2001
From: Ivan Podkidyshev <ipodkidyshev@nvidia.com>
Date: Mon, 1 Jun 2026 12:42:37 +0200
Subject: [PATCH 04/16] fix copy crash

---
 src/cloudai/workloads/ai_dynamo/ai_dynamo.py  | 23 +++++++++++++++++--
 .../ai_dynamo/slurm_command_gen_strategy.py   |  8 +++----
 .../test_command_gen_strategy_slurm.py        | 21 +++++++++++++++++
 3 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
index 5c45a149b..4f08f68d8 100644
--- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
+++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
@@ -272,10 +272,29 @@ def installables(self) -> list[Installable]:
         return [self.script, self.runtime]
 
 
-class AIPerfPhase(AIPerf):
+class AIPerfPhase(BaseModel):
     """Named AIPerf phase that overrides the base AIPerf configuration."""
 
-    name: str = Field(min_length=1, pattern=r"^[A-Za-z0-9_.-]+$")
+    model_config = ConfigDict(extra="allow", populate_by_name=True)
+
+    name: str = Field(..., min_length=1, pattern=r"^[A-Za-z0-9_.-]+$")
+    cmd: str | None = None
+    setup_cmd: str | None = Field(
+        default=None,
+        serialization_alias="setup-cmd",
+        validation_alias=AliasChoices("setup-cmd", "setup_cmd"),
+    )
+    report_name: str | None = Field(
+        default=None,
+        serialization_alias="report-name",
+        validation_alias=AliasChoices("report-name", "report_name"),
+    )
+    args: Args = Field(default_factory=Args)
+    extra_args: str | list[str] | None = Field(
+        default=None,
+        serialization_alias="extra-args",
+        validation_alias=AliasChoices("extra-args", "extra_args"),
+    )
 
 
 class AIPerfAccuracy(BaseModel):
diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
index 567d8d7a5..b1c1b2526 100644
--- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
@@ -118,7 +118,7 @@ def _prepare_lmcache_config(self):
         (self.test_run.output_path / LMCACHE_CONFIG_FILE_NAME).write_text(config)
         (self.test_run.output_path / LMCACHE_CONFIG_BACKUP_FILE_NAME).write_text(config)
 
-    def _aiperf_config_dict(self, aiperf: AIPerf, *, exclude_unset: bool = False) -> dict[str, Any]:
+    def _aiperf_config_dict(self, aiperf: AIPerf | AIPerfPhase, *, exclude_unset: bool = False) -> dict[str, Any]:
         return aiperf.model_dump(
             by_alias=True,
             exclude={"args", "name", "repo", "script", "runtime"},
@@ -126,7 +126,7 @@ def _aiperf_config_dict(self, aiperf: AIPerf, *, exclude_unset: bool = False) ->
             exclude_unset=exclude_unset,
         )
 
-    def _aiperf_args_dict(self, aiperf: AIPerf, *, exclude_unset: bool = False) -> dict[str, Any]:
+    def _aiperf_args_dict(self, aiperf: AIPerf | AIPerfPhase, *, exclude_unset: bool = False) -> dict[str, Any]:
         return aiperf.args.model_dump(by_alias=True, exclude_none=True, exclude_unset=exclude_unset)
 
     def _aiperf_args_argv(self, args: dict[str, Any]) -> list[str]:
@@ -150,11 +150,11 @@ def _split_extra_args(self, value: Any) -> list[str]:
         return shlex.split(str(value))
 
     def _aiperf_phase_has_explicit_value(self, phase: AIPerfPhase, field_name: str, *extra_aliases: str) -> bool:
-        if field_name in phase.model_fields_set:
+        if field_name in phase.model_fields_set and getattr(phase, field_name) is not None:
             return True
 
         extra = phase.model_extra or {}
-        return any(alias in extra for alias in extra_aliases)
+        return any(extra.get(alias) is not None for alias in extra_aliases)
 
     def _aiperf_phase_manifest_entry(self, base: AIPerf, phase: AIPerfPhase, *, single_phase: bool) -> dict[str, Any]:
         base_config = self._aiperf_config_dict(base)
diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
index e8e0f68d5..78ef40f1a 100644
--- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
+++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
@@ -288,6 +288,27 @@ def test_gen_script_args_writes_resolved_aiperf_commands(strategy: AIDynamoSlurm
     assert entries[2]["final_report_file"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_report.csv"
 
 
+def test_aiperf_phase_roundtrip_does_not_emit_default_report_name(strategy: AIDynamoSlurmCommandGenStrategy) -> None:
+    td = cast(AIDynamoTestDefinition, strategy.test_run.test)
+    td.cmd_args.workloads = "aiperf.sh"
+    td.cmd_args.aiperf_phases = [
+        AIPerfPhase.model_validate({"name": "round_1"}),
+        AIPerfPhase.model_validate({"name": "round_2"}),
+    ]
+
+    roundtripped = AIDynamoTestDefinition.model_validate(td.model_dump())
+    strategy.test_run.test = roundtripped
+
+    assert roundtripped.cmd_args.aiperf_phases is not None
+    assert [phase.report_name for phase in roundtripped.cmd_args.aiperf_phases] == [None, None]
+
+    strategy._gen_script_args(roundtripped)
+
+    entries = json.loads((strategy.test_run.output_path / AIPERF_COMMANDS_FILE_NAME).read_text())
+    assert entries[0]["report_file"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_round_1_report.csv"
+    assert entries[1]["report_file"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_round_2_report.csv"
+
+
 def test_single_aiperf_phase_keeps_legacy_artifact_defaults(strategy: AIDynamoSlurmCommandGenStrategy) -> None:
     td = cast(AIDynamoTestDefinition, strategy.test_run.test)
     td.cmd_args.workloads = "aiperf.sh"

From bbd5cf2118b45e7b1dc60893dfc4f943021f19d1 Mon Sep 17 00:00:00 2001
From: Ivan Podkidyshev <ipodkidyshev@nvidia.com>
Date: Mon, 1 Jun 2026 15:03:44 +0200
Subject: [PATCH 05/16] refactor and more tests

---
 src/cloudai/workloads/ai_dynamo/__init__.py   |   2 -
 src/cloudai/workloads/ai_dynamo/ai_dynamo.py  |  14 +-
 src/cloudai/workloads/ai_dynamo/ai_dynamo.sh  |   1 +
 src/cloudai/workloads/ai_dynamo/aiperf.sh     |   5 +-
 .../workloads/ai_dynamo/runtime/aiperf.py     |  89 ---------
 .../ai_dynamo/slurm_command_gen_strategy.py   | 186 +++++++++---------
 tests/ref_data/ai-dynamo-aiperf.sh            |  25 +++
 tests/ref_data/ai-dynamo.sbatch               |  10 +-
 tests/test_acceptance.py                      |  24 +++
 .../test_command_gen_strategy_slurm.py        |  75 ++-----
 .../ai_dynamo/test_runtime_aiperf.py          |  96 ---------
 11 files changed, 182 insertions(+), 345 deletions(-)
 delete mode 100644 src/cloudai/workloads/ai_dynamo/runtime/aiperf.py
 create mode 100644 tests/ref_data/ai-dynamo-aiperf.sh
 delete mode 100644 tests/workloads/ai_dynamo/test_runtime_aiperf.py

diff --git a/src/cloudai/workloads/ai_dynamo/__init__.py b/src/cloudai/workloads/ai_dynamo/__init__.py
index 57e2eb99e..86938dfec 100644
--- a/src/cloudai/workloads/ai_dynamo/__init__.py
+++ b/src/cloudai/workloads/ai_dynamo/__init__.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 
 from .ai_dynamo import (
-    AIPERF_COMMANDS_FILE_NAME,
     LMCACHE_CONFIG_BACKUP_FILE_NAME,
     LMCACHE_CONFIG_FILE_NAME,
     AIDynamoArgs,
@@ -34,7 +33,6 @@
 from .slurm_command_gen_strategy import AIDynamoSlurmCommandGenStrategy
 
 __all__ = [
-    "AIPERF_COMMANDS_FILE_NAME",
     "LMCACHE_CONFIG_BACKUP_FILE_NAME",
     "LMCACHE_CONFIG_FILE_NAME",
     "AIDynamoArgs",
diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
index 4f08f68d8..ed04d63ba 100644
--- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
+++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
@@ -42,7 +42,6 @@
 from cloudai.systems.slurm import SlurmSystem
 
 AIPERF_ARTIFACTS_DIR = "aiperf_artifacts"
-AIPERF_COMMANDS_FILE_NAME = "aiperf_commands.json"
 AIPERF_ACCURACY_ARTIFACTS_DIR = "aiperf_accuracy_artifacts"
 AIPERF_ACCURACY_RESULTS_CSV = "accuracy_results.csv"
 LMCACHE_CONFIG_FILE_NAME = "lmcache-config.yaml"
@@ -255,7 +254,6 @@ class AIPerf(Workload):
     name: str = "aiperf"
     cmd: str = "aiperf profile"
     script: File = File(Path(__file__).parent.parent / "ai_dynamo/aiperf.sh")
-    runtime: File = Field(default=File(Path(__file__).parent.parent / "ai_dynamo/runtime/aiperf.py"), exclude=True)
     setup_cmd: str | None = Field(
         default=None,
         serialization_alias="setup-cmd",
@@ -266,10 +264,15 @@ class AIPerf(Workload):
         serialization_alias="report-name",
         validation_alias=AliasChoices("report-name", "report_name"),
     )
+    artifact_dir_name: str = Field(
+        default=AIPERF_ARTIFACTS_DIR,
+        serialization_alias="artifact-dir-name",
+        validation_alias=AliasChoices("artifact-dir-name", "artifact_dir_name"),
+    )
 
     @property
     def installables(self) -> list[Installable]:
-        return [self.script, self.runtime]
+        return [self.script]
 
 
 class AIPerfPhase(BaseModel):
@@ -289,6 +292,11 @@ class AIPerfPhase(BaseModel):
         serialization_alias="report-name",
         validation_alias=AliasChoices("report-name", "report_name"),
     )
+    artifact_dir_name: str | None = Field(
+        default=None,
+        serialization_alias="artifact-dir-name",
+        validation_alias=AliasChoices("artifact-dir-name", "artifact_dir_name"),
+    )
     args: Args = Field(default_factory=Args)
     extra_args: str | list[str] | None = Field(
         default=None,
diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh
index 52e975850..25c4126fc 100644
--- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh
+++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh
@@ -1058,6 +1058,7 @@ function launch_workload()
 
   local workload_name="${workload_config_ref["--name"]}"
   local script="${workload_config_ref["--script"]}"
+  export FRONTEND_URL="${dynamo_args["url"]}"
 
   # Build config and workload args as proper bash arrays to preserve
   # multi-word values (e.g. --cmd "genai-perf profile") through word splitting.
diff --git a/src/cloudai/workloads/ai_dynamo/aiperf.sh b/src/cloudai/workloads/ai_dynamo/aiperf.sh
index 476ee3062..22eb0541c 100644
--- a/src/cloudai/workloads/ai_dynamo/aiperf.sh
+++ b/src/cloudai/workloads/ai_dynamo/aiperf.sh
@@ -5,5 +5,6 @@
 
 set -Eeuo pipefail
 
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-exec python3 "${SCRIPT_DIR}/aiperf.py" "$@"
+echo "AIPerf scripts are generated per test run by the AIDynamo Slurm command generator." >&2
+echo "This installable placeholder should be overridden by --aiperf-script /cloudai_run_results/aiperf.sh." >&2
+exit 1
diff --git a/src/cloudai/workloads/ai_dynamo/runtime/aiperf.py b/src/cloudai/workloads/ai_dynamo/runtime/aiperf.py
deleted file mode 100644
index d76361cfa..000000000
--- a/src/cloudai/workloads/ai_dynamo/runtime/aiperf.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-"""Execute generated AIPerf runtime entries."""
-
-from __future__ import annotations
-
-import argparse
-import json
-import shlex
-import shutil
-import subprocess
-import sys
-from pathlib import Path
-from typing import Any
-
-
-def log(message: str) -> None:
-    print(message, flush=True)
-
-
-def substitute_frontend_url(values: list[str], frontend_url: str) -> list[str]:
-    return [value.replace("{frontend_url}", frontend_url) for value in values]
-
-
-def copy_file(source: str, destination: str, message: str) -> None:
-    source_path = Path(source)
-    destination_path = Path(destination)
-    destination_path.parent.mkdir(parents=True, exist_ok=True)
-    if source_path.resolve() == destination_path.resolve():
-        log(f"{message} {destination_path}")
-        return
-
-    shutil.copy2(source_path, destination_path)
-    log(f"{message} {destination_path}")
-
-
-def run_entry(entry: dict[str, Any], frontend_url: str) -> None:
-    argv = substitute_frontend_url([*entry["cmd"], *entry.get("cli", [])], frontend_url)
-    output_folder = entry.get("output_folder")
-    if output_folder:
-        shutil.rmtree(output_folder, ignore_errors=True)
-
-    log(f"Running {entry['name']}: {shlex.join(argv)}")
-    log_file = entry.get("log_file")
-    if log_file:
-        log_path = Path(log_file)
-        log_path.parent.mkdir(parents=True, exist_ok=True)
-        with log_path.open("w", encoding="utf-8") as fp:
-            subprocess.run(argv, stdout=fp, stderr=subprocess.STDOUT, check=True)
-    else:
-        subprocess.run(argv, check=True)
-
-    report_source = entry.get("report_source")
-    report_file = entry.get("report_file")
-    if report_source and report_file:
-        copy_file(report_source, report_file, "AIPerf report saved to")
-
-    final_report_file = entry.get("final_report_file")
-    if final_report_file and report_file:
-        copy_file(report_file, final_report_file, "Final AIPerf report saved to")
-
-
-def parse_args(argv: list[str]) -> argparse.Namespace:
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--commands-file", required=True)
-    parser.add_argument("--url", required=True)
-    args, _ = parser.parse_known_args(argv)
-    return args
-
-
-def main(argv: list[str]) -> int:
-    try:
-        args = parse_args(argv)
-        with Path(args.commands_file).open(encoding="utf-8") as fp:
-            entries = json.load(fp)
-
-        for entry in entries:
-            run_entry(entry, args.url)
-    except Exception as exc:
-        log(f"ERROR: {exc}")
-        return 1
-
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main(sys.argv[1:]))
diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
index b1c1b2526..f02104d2d 100644
--- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import json
 import logging
 import shlex
 from pathlib import Path
@@ -27,8 +26,6 @@
 from cloudai.systems.slurm import SlurmCommandGenStrategy
 
 from .ai_dynamo import (
-    AIPERF_ARTIFACTS_DIR,
-    AIPERF_COMMANDS_FILE_NAME,
     LMCACHE_CONFIG_BACKUP_FILE_NAME,
     LMCACHE_CONFIG_FILE_NAME,
     AIDynamoTestDefinition,
@@ -36,6 +33,8 @@
     AIPerfPhase,
 )
 
+AIPERF_SCRIPT_FILE_NAME = "aiperf.sh"
+
 
 class AIDynamoSlurmCommandGenStrategy(SlurmCommandGenStrategy):
     """Command generation strategy for AI Dynamo on Slurm systems."""
@@ -118,17 +117,6 @@ def _prepare_lmcache_config(self):
         (self.test_run.output_path / LMCACHE_CONFIG_FILE_NAME).write_text(config)
         (self.test_run.output_path / LMCACHE_CONFIG_BACKUP_FILE_NAME).write_text(config)
 
-    def _aiperf_config_dict(self, aiperf: AIPerf | AIPerfPhase, *, exclude_unset: bool = False) -> dict[str, Any]:
-        return aiperf.model_dump(
-            by_alias=True,
-            exclude={"args", "name", "repo", "script", "runtime"},
-            exclude_none=True,
-            exclude_unset=exclude_unset,
-        )
-
-    def _aiperf_args_dict(self, aiperf: AIPerf | AIPerfPhase, *, exclude_unset: bool = False) -> dict[str, Any]:
-        return aiperf.args.model_dump(by_alias=True, exclude_none=True, exclude_unset=exclude_unset)
-
     def _aiperf_args_argv(self, args: dict[str, Any]) -> list[str]:
         result = []
         for key, value in args.items():
@@ -156,106 +144,110 @@ def _aiperf_phase_has_explicit_value(self, phase: AIPerfPhase, field_name: str,
         extra = phase.model_extra or {}
         return any(extra.get(alias) is not None for alias in extra_aliases)
 
-    def _aiperf_phase_manifest_entry(self, base: AIPerf, phase: AIPerfPhase, *, single_phase: bool) -> dict[str, Any]:
-        base_config = self._aiperf_config_dict(base)
-        phase_config = self._aiperf_config_dict(phase, exclude_unset=True)
-        config = {**base_config, **phase_config}
-
-        base_args = self._aiperf_args_dict(base)
-        phase_args = self._aiperf_args_dict(phase, exclude_unset=True)
-        args = {**base_args, **phase_args}
-
-        if not self._aiperf_phase_has_explicit_value(
-            phase, "artifact_dir_name", "artifact-dir-name", "artifact_dir_name"
-        ):
-            base_artifact_dir = base_config.get("artifact-dir-name", AIPERF_ARTIFACTS_DIR)
-            config["artifact-dir-name"] = base_artifact_dir if single_phase else f"{base_artifact_dir}/{phase.name}"
-        if not self._aiperf_phase_has_explicit_value(phase, "report_name", "report-name", "report_name"):
-            base_report_name = base_config.get("report-name", "aiperf_report.csv")
-            config["report-name"] = base_report_name if single_phase else f"aiperf_{phase.name}_report.csv"
-
-        return {
-            "name": phase.name,
-            "config": config,
-            "profile_args": self._aiperf_args_argv(args),
-        }
-
-    def _aiperf_entries(self) -> list[dict[str, Any]]:
+    def _resolve_aiperf_phase(self, phase: AIPerfPhase) -> AIPerf:
+        resolved = self.td.cmd_args.aiperf.model_copy(deep=True)
+        resolved.name = phase.name
+        single_phase = self.td.cmd_args.aiperf_phases is None or len(self.td.cmd_args.aiperf_phases) == 1
+
+        for field_name in ("cmd", "setup_cmd", "report_name", "artifact_dir_name", "extra_args"):
+            if self._aiperf_phase_has_explicit_value(phase, field_name, field_name.replace("_", "-")):
+                setattr(resolved, field_name, getattr(phase, field_name))
+
+        if not self._aiperf_phase_has_explicit_value(phase, "artifact_dir_name", "artifact-dir-name"):
+            base_artifact_dir = resolved.artifact_dir_name
+            resolved.artifact_dir_name = base_artifact_dir if single_phase else f"{base_artifact_dir}/{phase.name}"
+        if not self._aiperf_phase_has_explicit_value(phase, "report_name", "report-name"):
+            base_report_name = resolved.report_name
+            resolved.report_name = base_report_name if single_phase else f"aiperf_{phase.name}_report.csv"
+
+        resolved.args = resolved.args.model_copy(
+            update=phase.args.model_dump(by_alias=True, exclude_none=True, exclude_unset=True)
+        )
+        return resolved
+
+    def _render_aiperf_script(self) -> str:
         phases = self.td.cmd_args.aiperf_phases or [AIPerfPhase.model_validate({"name": "aiperf"})]
-        return [
-            self._aiperf_phase_manifest_entry(
-                self.td.cmd_args.aiperf,
-                phase,
-                single_phase=len(phases) == 1,
-            )
-            for phase in phases
+        single_phase = len(phases) == 1
+        setup_cmd = self._resolve_aiperf_phase(phases[0]).setup_cmd
+        lines = [
+            "#!/usr/bin/env bash",
+            "set -Eeuo pipefail",
+            "",
+            'log() { echo "[$(date +%F\\ %T) $(hostname)]: $*"; }',
+            "",
+            ': "${FRONTEND_URL:?FRONTEND_URL is not set}"',
+            "",
         ]
 
-    def _aiperf_run_entry(self, entry: dict[str, Any], *, write_phase_log: bool, is_final: bool) -> dict[str, Any]:
-        config = entry["config"]
-        artifact_dir_name = config["artifact-dir-name"]
-        artifact_dir = self._runtime_result_path(artifact_dir_name)
-        runtime_entry = {
-            "name": entry["name"],
-            "cmd": shlex.split(config["cmd"]),
-            "cli": [
+        if setup_cmd:
+            setup_argv = ["bash", "-lc", setup_cmd]
+            lines.extend(
+                [
+                    f"log {shlex.quote(f'Running aiperf setup: {shlex.join(setup_argv)}')}",
+                    shlex.join(setup_argv),
+                    "",
+                ]
+            )
+
+        write_phase_logs = not single_phase
+        for idx, phase in enumerate(phases):
+            resolved_phase = self._resolve_aiperf_phase(phase)
+            artifact_dir = self._runtime_result_path(resolved_phase.artifact_dir_name)
+            report_source = f"{artifact_dir}/profile_export_aiperf.csv"
+            report_file = self._runtime_result_path(resolved_phase.report_name)
+            argv = [
+                *shlex.split(resolved_phase.cmd),
                 "--model",
                 self.td.cmd_args.dynamo.model,
-                "--url",
-                f"{{frontend_url}}:{self.td.cmd_args.dynamo.port}",
                 "--endpoint-type",
                 "chat",
                 "--streaming",
                 "--artifact-dir",
                 artifact_dir,
                 "--no-server-metrics",
-                *entry["profile_args"],
-                *self._split_extra_args(config.get("extra-args")),
-            ],
-            "output_folder": artifact_dir,
-            "report_source": f"{artifact_dir}/profile_export_aiperf.csv",
-            "report_file": self._runtime_result_path(config["report-name"]),
-        }
-        if write_phase_log:
-            runtime_entry["log_file"] = self._runtime_result_path(f"aiperf_{entry['name']}.log")
-        if is_final:
-            runtime_entry["final_report_file"] = self._runtime_result_path("aiperf_report.csv")
-        return runtime_entry
-
-    def _aiperf_setup_entry(self, setup_cmd: str) -> dict[str, Any]:
-        return {
-            "name": "aiperf_setup",
-            "cmd": ["bash", "-lc", setup_cmd],
-            "cli": [],
-        }
-
-    def _prepare_aiperf_commands(self) -> str | None:
+                *self._aiperf_args_argv(resolved_phase.args.model_dump(by_alias=True, exclude_none=True)),
+                *self._split_extra_args(resolved_phase.extra_args),
+            ]
+            cmd = f'{shlex.join(argv)} --url "$FRONTEND_URL"'
+            log_message = f"Running {phase.name}: {cmd}"
+            lines.append(f"rm -rf {shlex.quote(artifact_dir)}")
+            lines.append(f"mkdir -p {shlex.quote(artifact_dir)}")
+            lines.append(f"log {shlex.quote(log_message)}")
+            if write_phase_logs:
+                log_file = self._runtime_result_path(f"aiperf_{phase.name}.log")
+                lines.append(f"{cmd} > {shlex.quote(log_file)} 2>&1")
+            else:
+                lines.append(cmd)
+
+            lines.append(f"mkdir -p {shlex.quote(str(Path(report_file).parent))}")
+            if report_source != report_file:
+                lines.append(f"cp {shlex.quote(report_source)} {shlex.quote(report_file)}")
+            lines.append(f"log {shlex.quote(f'AIPerf report saved to {report_file}')}")
+
+            if not single_phase and idx == len(phases) - 1:
+                final_report_file = self._runtime_result_path("aiperf_report.csv")
+                lines.append(f"mkdir -p {shlex.quote(str(Path(final_report_file).parent))}")
+                if report_file != final_report_file:
+                    lines.append(f"cp {shlex.quote(report_file)} {shlex.quote(final_report_file)}")
+                lines.append(f"log {shlex.quote(f'Final AIPerf report saved to {final_report_file}')}")
+            lines.append("")
+
+        return "\n".join(lines)
+
+    def _prepare_aiperf_script(self) -> str | None:
         if "aiperf.sh" not in self.td.cmd_args.workloads_list:
             return None
 
         self.test_run.output_path.mkdir(parents=True, exist_ok=True)
-        entries = self._aiperf_entries()
-        runtime_entries = []
-        setup_cmd = entries[0]["config"].get("setup-cmd")
-        if setup_cmd:
-            runtime_entries.append(self._aiperf_setup_entry(setup_cmd))
-
-        write_phase_logs = len(entries) > 1
-        for idx, entry in enumerate(entries):
-            runtime_entries.append(
-                self._aiperf_run_entry(
-                    entry,
-                    write_phase_log=write_phase_logs,
-                    is_final=len(entries) > 1 and idx == len(entries) - 1,
-                )
-            )
 
-        (self.test_run.output_path / AIPERF_COMMANDS_FILE_NAME).write_text(json.dumps(runtime_entries, indent=2))
-        return f"{self.CONTAINER_MOUNT_OUTPUT}/{AIPERF_COMMANDS_FILE_NAME}"
+        script_path = self.test_run.output_path / AIPERF_SCRIPT_FILE_NAME
+        script_path.write_text(self._render_aiperf_script() + "\n")
+        script_path.chmod(0o755)
+        return f"{self.CONTAINER_MOUNT_OUTPUT}/{AIPERF_SCRIPT_FILE_NAME}"
 
     def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]:
         self._prepare_lmcache_config()
-        aiperf_commands_file = self._prepare_aiperf_commands()
+        aiperf_script = self._prepare_aiperf_script()
         if not td.repo.installed_path:
             raise ValueError("Dynamo repo is not installed")
         args = [
@@ -291,8 +283,8 @@ def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]:
 
         args.extend(self._get_nested_toml_args(td.cmd_args.genai_perf, "--genai_perf-"))
         args.extend(self._get_nested_toml_args(td.cmd_args.aiperf, "--aiperf-"))
-        if aiperf_commands_file:
-            args.append(f"--aiperf-commands-file {aiperf_commands_file}")
+        if aiperf_script:
+            args.append(f"--aiperf-script {aiperf_script}")
         if td.cmd_args.aiperf_accuracy is not None:
             args.extend(self._get_nested_toml_args(td.cmd_args.aiperf_accuracy, "--aiperf_accuracy-"))
 
diff --git a/tests/ref_data/ai-dynamo-aiperf.sh b/tests/ref_data/ai-dynamo-aiperf.sh
new file mode 100644
index 000000000..3fcd013cb
--- /dev/null
+++ b/tests/ref_data/ai-dynamo-aiperf.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+set -Eeuo pipefail
+
+log() { echo "[$(date +%F\ %T) $(hostname)]: $*"; }
+
+: "${FRONTEND_URL:?FRONTEND_URL is not set}"
+
+rm -rf /cloudai_run_results/aiperf_artifacts/round_1
+mkdir -p /cloudai_run_results/aiperf_artifacts/round_1
+log 'Running round_1: aiperf profile --model model --endpoint-type chat --streaming --artifact-dir /cloudai_run_results/aiperf_artifacts/round_1 --no-server-metrics --concurrency 1 --request-count 50 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --url "$FRONTEND_URL"'
+aiperf profile --model model --endpoint-type chat --streaming --artifact-dir /cloudai_run_results/aiperf_artifacts/round_1 --no-server-metrics --concurrency 1 --request-count 50 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --url "$FRONTEND_URL" > /cloudai_run_results/aiperf_round_1.log 2>&1
+mkdir -p /cloudai_run_results
+cp /cloudai_run_results/aiperf_artifacts/round_1/profile_export_aiperf.csv /cloudai_run_results/aiperf_round_1_report.csv
+log 'AIPerf report saved to /cloudai_run_results/aiperf_round_1_report.csv'
+
+rm -rf /cloudai_run_results/aiperf_artifacts/round_2
+mkdir -p /cloudai_run_results/aiperf_artifacts/round_2
+log 'Running round_2: aiperf profile --model model --endpoint-type chat --streaming --artifact-dir /cloudai_run_results/aiperf_artifacts/round_2 --no-server-metrics --concurrency 2 --request-count 10 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --url "$FRONTEND_URL"'
+aiperf profile --model model --endpoint-type chat --streaming --artifact-dir /cloudai_run_results/aiperf_artifacts/round_2 --no-server-metrics --concurrency 2 --request-count 10 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --url "$FRONTEND_URL" > /cloudai_run_results/aiperf_round_2.log 2>&1
+mkdir -p /cloudai_run_results
+cp /cloudai_run_results/aiperf_artifacts/round_2/profile_export_aiperf.csv /cloudai_run_results/aiperf_round_2_report.csv
+log 'AIPerf report saved to /cloudai_run_results/aiperf_round_2_report.csv'
+mkdir -p /cloudai_run_results
+cp /cloudai_run_results/aiperf_round_2_report.csv /cloudai_run_results/aiperf_report.csv
+log 'Final AIPerf report saved to /cloudai_run_results/aiperf_report.csv'
diff --git a/tests/ref_data/ai-dynamo.sbatch b/tests/ref_data/ai-dynamo.sbatch
index 865444b81..0f4a74569 100644
--- a/tests/ref_data/ai-dynamo.sbatch
+++ b/tests/ref_data/ai-dynamo.sbatch
@@ -32,7 +32,7 @@ srun \
   --results-dir /cloudai_run_results \
   --dynamo-repo /cloudai_install/dynamo__f7e468c7e8ff0d1426db987564e60572167e8464 \
   --hf-home /cloudai_install/huggingface \
-  --workloads genai_perf.sh \
+  --workloads aiperf.sh \
   --failure-marker /cloudai_run_results/failure-marker.txt \
   --success-marker /cloudai_run_results/success-marker.txt \
   --storage-cache-dir /tmp \
@@ -75,4 +75,10 @@ srun \
   --aiperf-name "aiperf" \
   --aiperf-cmd "aiperf profile" \
   --aiperf-script "/cloudai_install/aiperf.sh" \
-  --aiperf-report-name "aiperf_report.csv"
+  --aiperf-report-name "aiperf_report.csv" \
+  --aiperf-artifact-dir-name "aiperf_artifacts" \
+  --aiperf-args-concurrency "2" \
+  --aiperf-args-request-count "50" \
+  --aiperf-args-synthetic-input-tokens-mean "300" \
+  --aiperf-args-output-tokens-mean "500" \
+  --aiperf-script /cloudai_run_results/aiperf.sh
diff --git a/tests/test_acceptance.py b/tests/test_acceptance.py
index d45416595..78902298e 100644
--- a/tests/test_acceptance.py
+++ b/tests/test_acceptance.py
@@ -34,6 +34,8 @@
     AIDynamoArgs,
     AIDynamoCmdArgs,
     AIDynamoTestDefinition,
+    AIPerf,
+    AIPerfPhase,
     GenAIPerf,
     WorkerBaseArgs,
     WorkerConfig,
@@ -493,6 +495,7 @@ def test_req(request, slurm_system: SlurmSystem, partial_tr: partial[TestRun]) -
                 ),
                 cmd_args=AIDynamoCmdArgs(
                     docker_image_url="nvcr.io/nvidia/ai-dynamo:24.09",
+                    workloads="aiperf.sh",
                     dynamo=AIDynamoArgs(
                         model="model",
                         backend="vllm",
@@ -526,6 +529,20 @@ def test_req(request, slurm_system: SlurmSystem, partial_tr: partial[TestRun]) -
                             "warmup-request-count": 10,
                         }
                     ),
+                    aiperf=AIPerf.model_validate(
+                        {
+                            "args": {
+                                "concurrency": 2,
+                                "request-count": 50,
+                                "synthetic-input-tokens-mean": 300,
+                                "output-tokens-mean": 500,
+                            }
+                        }
+                    ),
+                    aiperf_phases=[
+                        AIPerfPhase.model_validate({"name": "round_1", "args": {"concurrency": 1}}),
+                        AIPerfPhase.model_validate({"name": "round_2", "args": {"request-count": 10}}),
+                    ],
                 ),
             ),
         ),
@@ -745,3 +762,10 @@ def test_sbatch_generation(slurm_system: SlurmSystem, test_req: tuple[TestRun, s
             "__INSTALL_DIR__", str(slurm_system.install_path.absolute())
         )
         assert curr_launcher == ref_launcher, "nixl-ep-launch.sh does not match reference"
+
+    if test_req[1] == "ai-dynamo.sbatch":
+        aiperf_script = slurm_system.output_path / "aiperf.sh"
+        assert aiperf_script.exists(), "aiperf.sh was not generated"
+        curr_aiperf = aiperf_script.read_text().strip()
+        ref_aiperf = (Path(__file__).parent / "ref_data" / "ai-dynamo-aiperf.sh").read_text().strip()
+        assert curr_aiperf == ref_aiperf, "aiperf.sh does not match reference"
diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
index 78ef40f1a..0ece2e800 100644
--- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
+++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import json
 import shlex
 from pathlib import Path
 from typing import cast
@@ -26,7 +25,6 @@
 from cloudai.core import GitRepo
 from cloudai.systems.slurm import SlurmSystem
 from cloudai.workloads.ai_dynamo import (
-    AIPERF_COMMANDS_FILE_NAME,
     LMCACHE_CONFIG_BACKUP_FILE_NAME,
     LMCACHE_CONFIG_FILE_NAME,
     AIDynamoArgs,
@@ -221,7 +219,7 @@ def test_gen_script_args_contains_custom_aiperf_accuracy_args(strategy: AIDynamo
     assert f'--aiperf_accuracy-cli "{cli}"' in result
 
 
-def test_gen_script_args_writes_resolved_aiperf_commands(strategy: AIDynamoSlurmCommandGenStrategy) -> None:
+def test_gen_script_args_writes_resolved_aiperf_script(strategy: AIDynamoSlurmCommandGenStrategy) -> None:
     td = cast(AIDynamoTestDefinition, strategy.test_run.test)
     td.cmd_args.workloads = "aiperf.sh"
     td.cmd_args.aiperf = AIPerf.model_validate(
@@ -242,50 +240,19 @@ def test_gen_script_args_writes_resolved_aiperf_commands(strategy: AIDynamoSlurm
 
     result = strategy._gen_script_args(td)
 
-    assert f"--aiperf-commands-file {strategy.CONTAINER_MOUNT_OUTPUT}/{AIPERF_COMMANDS_FILE_NAME}" in result
-    entries = json.loads((strategy.test_run.output_path / AIPERF_COMMANDS_FILE_NAME).read_text())
-    assert entries[0] == {
-        "name": "aiperf_setup",
-        "cmd": ["bash", "-lc", "python -m pip install --upgrade aiperf"],
-        "cli": [],
-    }
-    assert entries[1]["name"] == "round_1"
-    assert entries[1]["cmd"] == ["aiperf", "profile"]
-    assert entries[1]["cli"][:9] == [
-        "--model",
-        "model",
-        "--url",
-        "{frontend_url}:8000",
-        "--endpoint-type",
-        "chat",
-        "--streaming",
-        "--artifact-dir",
-        f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_artifacts/round_1",
-    ]
-    assert entries[1]["cli"][-8:] == [
-        "--concurrency",
-        "1",
-        "--request-count",
-        "50",
-        "--synthetic-input-tokens-mean",
-        "300",
-        "--output-tokens-mean",
-        "500",
-    ]
-    assert entries[1]["log_file"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_round_1.log"
-    assert entries[1]["report_file"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_round_1_report.csv"
-    assert entries[2]["cli"][-8:] == [
-        "--concurrency",
-        "2",
-        "--request-count",
-        "10",
-        "--synthetic-input-tokens-mean",
-        "300",
-        "--output-tokens-mean",
-        "500",
-    ]
-    assert entries[2]["report_file"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_round_2_report.csv"
-    assert entries[2]["final_report_file"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_report.csv"
+    assert f"--aiperf-script {strategy.CONTAINER_MOUNT_OUTPUT}/aiperf.sh" in result
+    script = (strategy.test_run.output_path / "aiperf.sh").read_text()
+    assert "bash -lc 'python -m pip install --upgrade aiperf'" in script
+    assert ': "${FRONTEND_URL:?FRONTEND_URL is not set}"' in script
+    assert '--url "$FRONTEND_URL"' in script
+    assert f"--artifact-dir {strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_artifacts/round_1" in script
+    assert f"--artifact-dir {strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_artifacts/round_2" in script
+    assert "--concurrency 1 --request-count 50" in script
+    assert "--concurrency 2 --request-count 10" in script
+    assert f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_round_1.log" in script
+    assert f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_round_1_report.csv" in script
+    assert f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_round_2_report.csv" in script
+    assert f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_report.csv" in script
 
 
 def test_aiperf_phase_roundtrip_does_not_emit_default_report_name(strategy: AIDynamoSlurmCommandGenStrategy) -> None:
@@ -304,9 +271,9 @@ def test_aiperf_phase_roundtrip_does_not_emit_default_report_name(strategy: AIDy
 
     strategy._gen_script_args(roundtripped)
 
-    entries = json.loads((strategy.test_run.output_path / AIPERF_COMMANDS_FILE_NAME).read_text())
-    assert entries[0]["report_file"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_round_1_report.csv"
-    assert entries[1]["report_file"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_round_2_report.csv"
+    script = (strategy.test_run.output_path / "aiperf.sh").read_text()
+    assert f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_round_1_report.csv" in script
+    assert f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_round_2_report.csv" in script
 
 
 def test_single_aiperf_phase_keeps_legacy_artifact_defaults(strategy: AIDynamoSlurmCommandGenStrategy) -> None:
@@ -316,10 +283,10 @@ def test_single_aiperf_phase_keeps_legacy_artifact_defaults(strategy: AIDynamoSl
 
     strategy._gen_script_args(td)
 
-    entries = json.loads((strategy.test_run.output_path / AIPERF_COMMANDS_FILE_NAME).read_text())
-    assert entries[0]["output_folder"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_artifacts"
-    assert entries[0]["report_file"] == f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_report.csv"
-    assert "log_file" not in entries[0]
+    script = (strategy.test_run.output_path / "aiperf.sh").read_text()
+    assert f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_artifacts" in script
+    assert f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_report.csv" in script
+    assert f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_round_1.log" not in script
 
 
 def test_aiperf_phase_names_must_be_unique(cmd_args: AIDynamoCmdArgs) -> None:
diff --git a/tests/workloads/ai_dynamo/test_runtime_aiperf.py b/tests/workloads/ai_dynamo/test_runtime_aiperf.py
deleted file mode 100644
index b54988102..000000000
--- a/tests/workloads/ai_dynamo/test_runtime_aiperf.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-
-import json
-import sys
-from pathlib import Path
-
-from cloudai.workloads.ai_dynamo.runtime import aiperf
-
-
-def _write_fake_aiperf(tmp_path: Path) -> Path:
-    script = tmp_path / "fake_aiperf.py"
-    script.write_text(
-        """
-import sys
-from pathlib import Path
-
-artifact_dir = Path(sys.argv[sys.argv.index("--artifact-dir") + 1])
-url = sys.argv[sys.argv.index("--url") + 1]
-artifact_dir.mkdir(parents=True, exist_ok=True)
-(artifact_dir / "profile_export_aiperf.csv").write_text(f"url\\n{url}\\n", encoding="utf-8")
-""".strip(),
-        encoding="utf-8",
-    )
-    return script
-
-
-def test_runtime_executes_entries_and_copies_final_report(tmp_path: Path) -> None:
-    fake_aiperf = _write_fake_aiperf(tmp_path)
-    commands_file = tmp_path / "aiperf_commands.json"
-    artifact_dir = tmp_path / "aiperf_artifacts" / "round_1"
-    report_file = tmp_path / "aiperf_round_1_report.csv"
-    final_report_file = tmp_path / "aiperf_report.csv"
-    commands_file.write_text(
-        json.dumps(
-            [
-                {
-                    "name": "round_1",
-                    "cmd": [sys.executable, str(fake_aiperf)],
-                    "cli": [
-                        "--url",
-                        "{frontend_url}:8000",
-                        "--artifact-dir",
-                        str(artifact_dir),
-                    ],
-                    "output_folder": str(artifact_dir),
-                    "log_file": str(tmp_path / "aiperf_round_1.log"),
-                    "report_source": str(artifact_dir / "profile_export_aiperf.csv"),
-                    "report_file": str(report_file),
-                    "final_report_file": str(final_report_file),
-                }
-            ]
-        ),
-        encoding="utf-8",
-    )
-
-    result = aiperf.main(["--url", "http://frontend", "--commands-file", str(commands_file)])
-
-    assert result == 0
-    assert report_file.read_text(encoding="utf-8") == "url\nhttp://frontend:8000\n"
-    assert final_report_file.read_text(encoding="utf-8") == "url\nhttp://frontend:8000\n"
-    assert (tmp_path / "aiperf_round_1.log").is_file()
-
-
-def test_runtime_allows_final_report_to_match_report_file(tmp_path: Path) -> None:
-    fake_aiperf = _write_fake_aiperf(tmp_path)
-    commands_file = tmp_path / "aiperf_commands.json"
-    artifact_dir = tmp_path / "aiperf_artifacts"
-    report_file = tmp_path / "aiperf_report.csv"
-    commands_file.write_text(
-        json.dumps(
-            [
-                {
-                    "name": "aiperf",
-                    "cmd": [sys.executable, str(fake_aiperf)],
-                    "cli": [
-                        "--url",
-                        "{frontend_url}:8000",
-                        "--artifact-dir",
-                        str(artifact_dir),
-                    ],
-                    "output_folder": str(artifact_dir),
-                    "report_source": str(artifact_dir / "profile_export_aiperf.csv"),
-                    "report_file": str(report_file),
-                    "final_report_file": str(report_file),
-                }
-            ]
-        ),
-        encoding="utf-8",
-    )
-
-    result = aiperf.main(["--url", "http://frontend", "--commands-file", str(commands_file)])
-
-    assert result == 0
-    assert report_file.read_text(encoding="utf-8") == "url\nhttp://frontend:8000\n"

From dda28ef8d75b20e51d6449a463d502728a5fb255 Mon Sep 17 00:00:00 2001
From: Ivan Podkidyshev <ipodkidyshev@nvidia.com>
Date: Mon, 1 Jun 2026 15:10:28 +0200
Subject: [PATCH 06/16] cleaner phases merge

---
 .../ai_dynamo/slurm_command_gen_strategy.py   | 31 +++++--------------
 1 file changed, 8 insertions(+), 23 deletions(-)

diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
index f02104d2d..0de74ccef 100644
--- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
@@ -22,6 +22,7 @@
 import yaml
 from pydantic import BaseModel, TypeAdapter, ValidationError
 
+import cloudai.util
 from cloudai.core import File, GitRepo
 from cloudai.systems.slurm import SlurmCommandGenStrategy
 
@@ -137,33 +138,17 @@ def _split_extra_args(self, value: Any) -> list[str]:
             return [str(item) for item in value]
         return shlex.split(str(value))
 
-    def _aiperf_phase_has_explicit_value(self, phase: AIPerfPhase, field_name: str, *extra_aliases: str) -> bool:
-        if field_name in phase.model_fields_set and getattr(phase, field_name) is not None:
-            return True
-
-        extra = phase.model_extra or {}
-        return any(extra.get(alias) is not None for alias in extra_aliases)
-
     def _resolve_aiperf_phase(self, phase: AIPerfPhase) -> AIPerf:
-        resolved = self.td.cmd_args.aiperf.model_copy(deep=True)
-        resolved.name = phase.name
+        base_data = self.td.cmd_args.aiperf.model_dump(by_alias=True, exclude_none=True)
+        phase_data = phase.model_dump(by_alias=True, exclude_none=True, exclude_unset=True)
         single_phase = self.td.cmd_args.aiperf_phases is None or len(self.td.cmd_args.aiperf_phases) == 1
 
-        for field_name in ("cmd", "setup_cmd", "report_name", "artifact_dir_name", "extra_args"):
-            if self._aiperf_phase_has_explicit_value(phase, field_name, field_name.replace("_", "-")):
-                setattr(resolved, field_name, getattr(phase, field_name))
+        if "artifact-dir-name" not in phase_data and not single_phase:
+            phase_data["artifact-dir-name"] = f"{self.td.cmd_args.aiperf.artifact_dir_name}/{phase.name}"
+        if "report-name" not in phase_data and not single_phase:
+            phase_data["report-name"] = f"aiperf_{phase.name}_report.csv"
 
-        if not self._aiperf_phase_has_explicit_value(phase, "artifact_dir_name", "artifact-dir-name"):
-            base_artifact_dir = resolved.artifact_dir_name
-            resolved.artifact_dir_name = base_artifact_dir if single_phase else f"{base_artifact_dir}/{phase.name}"
-        if not self._aiperf_phase_has_explicit_value(phase, "report_name", "report-name"):
-            base_report_name = resolved.report_name
-            resolved.report_name = base_report_name if single_phase else f"aiperf_{phase.name}_report.csv"
-
-        resolved.args = resolved.args.model_copy(
-            update=phase.args.model_dump(by_alias=True, exclude_none=True, exclude_unset=True)
-        )
-        return resolved
+        return AIPerf.model_validate(cloudai.util.deep_merge(base_data, phase_data))
 
     def _render_aiperf_script(self) -> str:
         phases = self.td.cmd_args.aiperf_phases or [AIPerfPhase.model_validate({"name": "aiperf"})]

From ea08df789de2deac17c5eb787626fa97c726c7b3 Mon Sep 17 00:00:00 2001
From: Ivan Podkidyshev <ipodkidyshev@nvidia.com>
Date: Mon, 1 Jun 2026 15:30:50 +0200
Subject: [PATCH 07/16] simplify aiperf handling

---
 .../workloads/ai_dynamo/slurm_command_gen_strategy.py     | 4 +++-
 tests/ref_data/ai-dynamo.sbatch                           | 8 --------
 .../ai_dynamo/test_command_gen_strategy_slurm.py          | 7 ++++---
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
index 0de74ccef..447ca1154 100644
--- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
@@ -267,9 +267,11 @@ def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]:
         args.extend(self._get_nested_toml_args(td.cmd_args.dynamo.decode_worker, "--decode-"))
 
         args.extend(self._get_nested_toml_args(td.cmd_args.genai_perf, "--genai_perf-"))
-        args.extend(self._get_nested_toml_args(td.cmd_args.aiperf, "--aiperf-"))
         if aiperf_script:
+            args.append(f'--aiperf-name "{td.cmd_args.aiperf.name}"')
             args.append(f"--aiperf-script {aiperf_script}")
+        else:
+            args.extend(self._get_nested_toml_args(td.cmd_args.aiperf, "--aiperf-"))
         if td.cmd_args.aiperf_accuracy is not None:
             args.extend(self._get_nested_toml_args(td.cmd_args.aiperf_accuracy, "--aiperf_accuracy-"))
 
diff --git a/tests/ref_data/ai-dynamo.sbatch b/tests/ref_data/ai-dynamo.sbatch
index 0f4a74569..e3384343b 100644
--- a/tests/ref_data/ai-dynamo.sbatch
+++ b/tests/ref_data/ai-dynamo.sbatch
@@ -73,12 +73,4 @@ srun \
   --genai_perf-synthetic-input-tokens-mean "550" \
   --genai_perf-warmup-request-count "10" \
   --aiperf-name "aiperf" \
-  --aiperf-cmd "aiperf profile" \
-  --aiperf-script "/cloudai_install/aiperf.sh" \
-  --aiperf-report-name "aiperf_report.csv" \
-  --aiperf-artifact-dir-name "aiperf_artifacts" \
-  --aiperf-args-concurrency "2" \
-  --aiperf-args-request-count "50" \
-  --aiperf-args-synthetic-input-tokens-mean "300" \
-  --aiperf-args-output-tokens-mean "500" \
   --aiperf-script /cloudai_run_results/aiperf.sh
diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
index 0ece2e800..c1a8b3c64 100644
--- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
+++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
@@ -193,9 +193,10 @@ def test_gen_script_args_contains_split_aiperf_accuracy_args(strategy: AIDynamoS
 
     result = strategy._gen_script_args(td)
 
-    assert '--aiperf-args-request-count "50"' in result
-    assert '--aiperf-args-synthetic-input-tokens-mean "300"' in result
-    assert '--aiperf-args-output-tokens-mean "500"' in result
+    script = (strategy.test_run.output_path / "aiperf.sh").read_text()
+    assert "--request-count 50" in script
+    assert "--synthetic-input-tokens-mean 300" in script
+    assert "--output-tokens-mean 500" in script
     assert f'--aiperf_accuracy-setup-cmd "{setup_cmd}"' in result
     assert '--aiperf_accuracy-name "aiperf_accuracy"' in result
     assert '--aiperf_accuracy-entrypoint "aiperf profile"' in result

From 10c1a0048cc1e2093dcdcf46dc60b9afa30feec5 Mon Sep 17 00:00:00 2001
From: Ivan Podkidyshev <ipodkidyshev@nvidia.com>
Date: Mon, 1 Jun 2026 15:59:15 +0200
Subject: [PATCH 08/16] add remaning fork functionality

---
 conf/experimental/ai_dynamo/test/vllm.toml    |   6 +
 .../ai_dynamo/test_scenario/vllm_lmcache.toml |   5 +-
 .../ai_dynamo/test_scenario/vllm_slurm.toml   |  30 +++
 src/cloudai/workloads/ai_dynamo/__init__.py   |   2 +
 src/cloudai/workloads/ai_dynamo/ai_dynamo.py  |  25 +++
 src/cloudai/workloads/ai_dynamo/ai_dynamo.sh  |  38 ++++
 .../ai_dynamo/slurm_command_gen_strategy.py   | 212 +++++++++++++++---
 tests/ref_data/ai-dynamo-aiperf.sh            |  57 +++--
 .../test_command_gen_strategy_slurm.py        |  42 ++++
 9 files changed, 378 insertions(+), 39 deletions(-)

diff --git a/conf/experimental/ai_dynamo/test/vllm.toml b/conf/experimental/ai_dynamo/test/vllm.toml
index 581ecf3e7..ea2a4552c 100644
--- a/conf/experimental/ai_dynamo/test/vllm.toml
+++ b/conf/experimental/ai_dynamo/test/vllm.toml
@@ -74,11 +74,16 @@ workloads = "aiperf.sh"
     concurrency = 2
 
   [cmd_args.aiperf]
+  health-check-between-phases = true
+  continue-on-phase-failure = false
     [cmd_args.aiperf.args]
     concurrency = 2
+    endpoint-type = "chat"
     extra-inputs = '{"min_tokens":10}'
     output-tokens-mean = 500
     request-count = 50
+    server-metrics = "auto"
+    streaming = true
     synthetic-input-tokens-mean = 300
 
   [[cmd_args.aiperf_phases]]
@@ -94,6 +99,7 @@ workloads = "aiperf.sh"
     [cmd_args.aiperf_phases.args]
     concurrency = 4
     request-count = 50
+    streaming = false
 
   [cmd_args.aiperf_accuracy]
   entrypoint = "aiperf profile"
diff --git a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml
index f975e784e..c63319b4e 100644
--- a/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml
+++ b/conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml
@@ -24,7 +24,10 @@ description = "Self-contained AIDynamo scenario wiring vLLM disaggregated infere
 test_template_name = "AIDynamo"
 time_limit = "00:10:00"
 extra_container_mounts = ["/run/udev:/run/udev"]
-dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports", "cmd_args.aiperf_phases"]
+dse_excluded_args = [
+  "cmd_args.lmcache.lmcache_worker_ports",
+  "cmd_args.aiperf_phases",
+]
 
   [Tests.cmd_args]
   docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1"
diff --git a/conf/experimental/ai_dynamo/test_scenario/vllm_slurm.toml b/conf/experimental/ai_dynamo/test_scenario/vllm_slurm.toml
index 45031da3a..7f279ab71 100644
--- a/conf/experimental/ai_dynamo/test_scenario/vllm_slurm.toml
+++ b/conf/experimental/ai_dynamo/test_scenario/vllm_slurm.toml
@@ -36,12 +36,28 @@ time_limit = "00:10:00"
       tensor-parallel-size = 4
       pipeline-parallel-size = 1
 
+    [[Tests.cmd_args.aiperf_phases]]
+    name = "round_1"
+      [Tests.cmd_args.aiperf_phases.args]
+      concurrency = 2
+      request-count = 50
+      server-metrics = "auto"
+
+    [[Tests.cmd_args.aiperf_phases]]
+    name = "round_2"
+      [Tests.cmd_args.aiperf_phases.args]
+      concurrency = 4
+      request-count = 50
+
 [[Tests]]
 id = "test.disagg.multinode"
 test_name = "vLLM"
 time_limit = "00:10:00"
 
   [Tests.cmd_args]
+    [Tests.cmd_args.dynamo.dcgm_exporter]
+    enabled = true
+
     [Tests.cmd_args.dynamo.prefill_worker]
     num-nodes = 2
       [Tests.cmd_args.dynamo.prefill_worker.args]
@@ -53,3 +69,17 @@ time_limit = "00:10:00"
       [Tests.cmd_args.dynamo.decode_worker.args]
       tensor-parallel-size = 4
       pipeline-parallel-size = 1
+
+    [[Tests.cmd_args.aiperf_phases]]
+    name = "round_1"
+      [Tests.cmd_args.aiperf_phases.args]
+      concurrency = 4
+      request-count = 50
+      server-metrics = "auto"
+
+    [[Tests.cmd_args.aiperf_phases]]
+    name = "round_2"
+      [Tests.cmd_args.aiperf_phases.args]
+      concurrency = 8
+      request-count = 50
+      server-metrics = "auto"
diff --git a/src/cloudai/workloads/ai_dynamo/__init__.py b/src/cloudai/workloads/ai_dynamo/__init__.py
index 86938dfec..fc7e2b376 100644
--- a/src/cloudai/workloads/ai_dynamo/__init__.py
+++ b/src/cloudai/workloads/ai_dynamo/__init__.py
@@ -23,6 +23,7 @@
     AIPerf,
     AIPerfAccuracy,
     AIPerfPhase,
+    DCGMExporter,
     GenAIPerf,
     LMCacheController,
     WorkerBaseArgs,
@@ -44,6 +45,7 @@
     "AIPerf",
     "AIPerfAccuracy",
     "AIPerfPhase",
+    "DCGMExporter",
     "GenAIPerf",
     "LMCacheController",
     "WorkerBaseArgs",
diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
index ed04d63ba..fcc6f2f27 100644
--- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
+++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
@@ -140,6 +140,20 @@ class WorkerConfig(BaseModel):
     )
 
 
+class DCGMExporter(BaseModel):
+    """Optional DCGM exporter launch configuration."""
+
+    model_config = ConfigDict(extra="forbid", populate_by_name=True)
+
+    enabled: bool = False
+    image_url: str = Field(
+        default="nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless",
+        serialization_alias="image-url",
+        validation_alias=AliasChoices("image-url", "image_url"),
+    )
+    port: int = 9401
+
+
 class AIDynamoArgs(BaseModel):
     """Arguments for AI Dynamo setup."""
 
@@ -205,6 +219,7 @@ def validate_connector(cls, v: str | list[str] | None) -> str | list[str] | None
         serialization_alias="nats-port",
         validation_alias=AliasChoices("nats-port", "nats_port"),
     )
+    dcgm_exporter: DCGMExporter = Field(default_factory=DCGMExporter)
 
     decode_worker: WorkerConfig = WorkerConfig(
         cmd="python3 -m dynamo.vllm",
@@ -269,6 +284,16 @@ class AIPerf(Workload):
         serialization_alias="artifact-dir-name",
         validation_alias=AliasChoices("artifact-dir-name", "artifact_dir_name"),
     )
+    health_check_between_phases: bool = Field(
+        default=True,
+        serialization_alias="health-check-between-phases",
+        validation_alias=AliasChoices("health-check-between-phases", "health_check_between_phases"),
+    )
+    continue_on_phase_failure: bool = Field(
+        default=False,
+        serialization_alias="continue-on-phase-failure",
+        validation_alias=AliasChoices("continue-on-phase-failure", "continue_on_phase_failure"),
+    )
 
     @property
     def installables(self) -> list[Installable]:
diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh
index 25c4126fc..add2cf61b 100644
--- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh
+++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh
@@ -60,6 +60,8 @@ dynamo_args["worker-error-pattern"]="zmq.error.ZMQError:.Address.already.in.use|
 dynamo_args["sgl-http-port"]=9001
 dynamo_args["prefill-port"]=30011
 dynamo_args["decode-port"]=30021
+dynamo_args["dcgm-exporter-enabled"]="False"
+dynamo_args["dcgm-exporter-port"]=9401
 
 function log()
 {
@@ -892,6 +894,38 @@ _query_frontend() {
   curl -s -X POST "${dynamo_args["url"]}/v1/chat/completions" -H "Content-Type: application/json" -d @$RESULTS_DIR/curl_cmd.json
 }
 
+_resolve_aiperf_server_metrics_urls() {
+  local urls="http://${dynamo_args["frontend-node"]}:${dynamo_args["port"]}/metrics"
+  local base_system_port=${DYN_SYSTEM_PORT:-9090}
+  local decode_workers_per_node=${decode_config["workers-per-node"]:-1}
+  local prefill_workers_per_node=${prefill_config["workers-per-node"]:-1}
+  local IFS_SAVE="$IFS"
+  local node i
+
+  IFS=','
+  for node in ${prefill_config["node-list"]:-}; do
+    for i in $(seq 0 $(( prefill_workers_per_node - 1 ))); do
+      urls="${urls},http://${node}:$((base_system_port + i))/metrics"
+    done
+  done
+
+  for node in ${decode_config["node-list"]:-}; do
+    for i in $(seq 0 $(( decode_workers_per_node - 1 ))); do
+      urls="${urls},http://${node}:$((base_system_port + i))/metrics"
+    done
+  done
+
+  if [[ "${dynamo_args["dcgm-exporter-enabled"]}" == "True" || "${dynamo_args["dcgm-exporter-enabled"]}" == "true" ]]; then
+    for node in ${decode_config["node-list"]:-},${prefill_config["node-list"]:-}; do
+      [[ -z "$node" ]] && continue
+      urls="${urls},http://${node}:${dynamo_args["dcgm-exporter-port"]}/metrics"
+    done
+  fi
+  IFS="$IFS_SAVE"
+
+  echo "$urls"
+}
+
 function setup_cufile()
 {
   export CUFILE_ENV_PATH_JSON="$RESULTS_DIR/cufile.json"
@@ -1059,6 +1093,10 @@ function launch_workload()
   local workload_name="${workload_config_ref["--name"]}"
   local script="${workload_config_ref["--script"]}"
   export FRONTEND_URL="${dynamo_args["url"]}"
+  export AIPERF_MODEL="${dynamo_args["model"]}"
+  export AIPERF_ENDPOINT="${dynamo_args["endpoint"]}"
+  export AIPERF_FAILURE_MARKER="${FATAL_ERROR_MARKER}"
+  export AIPERF_SERVER_METRICS_URLS="$(_resolve_aiperf_server_metrics_urls)"
 
   # Build config and workload args as proper bash arrays to preserve
   # multi-word values (e.g. --cmd "genai-perf profile") through word splitting.
diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
index 447ca1154..32f962af3 100644
--- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
@@ -118,13 +118,23 @@ def _prepare_lmcache_config(self):
         (self.test_run.output_path / LMCACHE_CONFIG_FILE_NAME).write_text(config)
         (self.test_run.output_path / LMCACHE_CONFIG_BACKUP_FILE_NAME).write_text(config)
 
-    def _aiperf_args_argv(self, args: dict[str, Any]) -> list[str]:
-        result = []
+    def _render_aiperf_args(self, args: dict[str, Any]) -> str:
+        parts: list[str] = []
         for key, value in args.items():
-            result.append(f"--{key}")
-            if value is not None:
-                result.append(str(value))
-        return result
+            if value is None or value is False:
+                continue
+
+            parts.append(f"--{key}")
+            if value is True:
+                continue
+
+            values = [",".join(str(item) for item in value)] if isinstance(value, list) else [str(value)]
+            for rendered_value in values:
+                if rendered_value in {"$FRONTEND_URL", "$AIPERF_SERVER_METRICS_URLS"}:
+                    parts.append(f'"{rendered_value}"')
+                else:
+                    parts.append(shlex.quote(rendered_value))
+        return " ".join(parts)
 
     def _runtime_result_path(self, path: str) -> str:
         if Path(path).is_absolute():
@@ -138,6 +148,23 @@ def _split_extra_args(self, value: Any) -> list[str]:
             return [str(item) for item in value]
         return shlex.split(str(value))
 
+    def _aiperf_phase_args(self, resolved_phase: AIPerf, artifact_dir: str) -> dict[str, Any]:
+        args: dict[str, Any] = {
+            "model": self.td.cmd_args.dynamo.model,
+            "endpoint-type": "chat",
+            "streaming": True,
+            "url": "$FRONTEND_URL",
+        }
+        args.update(resolved_phase.args.model_dump(by_alias=True, exclude_none=True))
+        args["artifact-dir"] = artifact_dir
+
+        if args.get("server-metrics") == "auto":
+            args["server-metrics"] = "$AIPERF_SERVER_METRICS_URLS"
+        if "server-metrics" not in args and "no-server-metrics" not in args:
+            args["no-server-metrics"] = True
+
+        return args
+
     def _resolve_aiperf_phase(self, phase: AIPerfPhase) -> AIPerf:
         base_data = self.td.cmd_args.aiperf.model_dump(by_alias=True, exclude_none=True)
         phase_data = phase.model_dump(by_alias=True, exclude_none=True, exclude_unset=True)
@@ -161,6 +188,9 @@ def _render_aiperf_script(self) -> str:
             'log() { echo "[$(date +%F\\ %T) $(hostname)]: $*"; }',
             "",
             ': "${FRONTEND_URL:?FRONTEND_URL is not set}"',
+            f': "${{AIPERF_MODEL:={self.td.cmd_args.dynamo.model}}}"',
+            f': "${{AIPERF_ENDPOINT:={self.td.cmd_args.dynamo.endpoint}}}"',
+            f': "${{AIPERF_FAILURE_MARKER:={self.CONTAINER_MOUNT_OUTPUT}/{self.td.failure_marker}}}"',
             "",
         ]
 
@@ -180,41 +210,63 @@ def _render_aiperf_script(self) -> str:
             artifact_dir = self._runtime_result_path(resolved_phase.artifact_dir_name)
             report_source = f"{artifact_dir}/profile_export_aiperf.csv"
             report_file = self._runtime_result_path(resolved_phase.report_name)
-            argv = [
-                *shlex.split(resolved_phase.cmd),
-                "--model",
-                self.td.cmd_args.dynamo.model,
-                "--endpoint-type",
-                "chat",
-                "--streaming",
-                "--artifact-dir",
-                artifact_dir,
-                "--no-server-metrics",
-                *self._aiperf_args_argv(resolved_phase.args.model_dump(by_alias=True, exclude_none=True)),
-                *self._split_extra_args(resolved_phase.extra_args),
+            cmd_parts = [
+                shlex.join(shlex.split(resolved_phase.cmd)),
+                self._render_aiperf_args(self._aiperf_phase_args(resolved_phase, artifact_dir)),
+                shlex.join(self._split_extra_args(resolved_phase.extra_args)),
             ]
-            cmd = f'{shlex.join(argv)} --url "$FRONTEND_URL"'
+            cmd = " ".join(part for part in cmd_parts if part)
             log_message = f"Running {phase.name}: {cmd}"
             lines.append(f"rm -rf {shlex.quote(artifact_dir)}")
             lines.append(f"mkdir -p {shlex.quote(artifact_dir)}")
             lines.append(f"log {shlex.quote(log_message)}")
+            lines.append("phase_status=0")
             if write_phase_logs:
                 log_file = self._runtime_result_path(f"aiperf_{phase.name}.log")
+                lines.append("set +e")
                 lines.append(f"{cmd} > {shlex.quote(log_file)} 2>&1")
+                lines.append("phase_status=$?")
+                lines.append("set -e")
             else:
+                lines.append("set +e")
                 lines.append(cmd)
+                lines.append("phase_status=$?")
+                lines.append("set -e")
 
-            lines.append(f"mkdir -p {shlex.quote(str(Path(report_file).parent))}")
+            lines.append('if [[ "$phase_status" -ne 0 ]]; then')
+            lines.append(f"  log {shlex.quote(f'AIPerf phase {phase.name} failed')}")
+            if not resolved_phase.continue_on_phase_failure:
+                lines.append('  exit "$phase_status"')
+            lines.append("fi")
+            lines.append('if [[ "$phase_status" -eq 0 ]]; then')
+
+            lines.append(f"  mkdir -p {shlex.quote(str(Path(report_file).parent))}")
             if report_source != report_file:
-                lines.append(f"cp {shlex.quote(report_source)} {shlex.quote(report_file)}")
-            lines.append(f"log {shlex.quote(f'AIPerf report saved to {report_file}')}")
+                lines.append(f"  cp {shlex.quote(report_source)} {shlex.quote(report_file)}")
+            lines.append(f"  log {shlex.quote(f'AIPerf report saved to {report_file}')}")
 
             if not single_phase and idx == len(phases) - 1:
                 final_report_file = self._runtime_result_path("aiperf_report.csv")
-                lines.append(f"mkdir -p {shlex.quote(str(Path(final_report_file).parent))}")
+                lines.append(f"  mkdir -p {shlex.quote(str(Path(final_report_file).parent))}")
                 if report_file != final_report_file:
-                    lines.append(f"cp {shlex.quote(report_file)} {shlex.quote(final_report_file)}")
-                lines.append(f"log {shlex.quote(f'Final AIPerf report saved to {final_report_file}')}")
+                    lines.append(f"  cp {shlex.quote(report_file)} {shlex.quote(final_report_file)}")
+                lines.append(f"  log {shlex.quote(f'Final AIPerf report saved to {final_report_file}')}")
+            if not single_phase and idx < len(phases) - 1 and resolved_phase.health_check_between_phases:
+                lines.append('  if [[ -f "$AIPERF_FAILURE_MARKER" ]]; then')
+                lines.append("    log 'FATAL: failure marker found between AIPerf phases'")
+                lines.append("    exit 1")
+                lines.append("  fi")
+                lines.append(
+                    '  if ! curl -fsS -X POST "${FRONTEND_URL}/${AIPERF_ENDPOINT}" '
+                    "-H 'Content-Type: application/json' "
+                    '-d "{\\"model\\":\\"${AIPERF_MODEL}\\",\\"messages\\":[{\\"role\\":\\"user\\",'
+                    '\\"content\\":\\"ping\\"}],\\"stream\\":false,\\"max_tokens\\":1}" '
+                    ">/dev/null; then"
+                )
+                lines.append("    log 'FATAL: frontend health probe failed between AIPerf phases'")
+                lines.append("    exit 1")
+                lines.append("  fi")
+            lines.append("fi")
             lines.append("")
 
         return "\n".join(lines)
@@ -258,9 +310,14 @@ def _gen_script_args(self, td: AIDynamoTestDefinition) -> List[str]:
                 exclude=[
                     "prefill_worker",
                     "decode_worker",
+                    "dcgm_exporter",
+                    "dcgm-exporter",
                 ],
             )
         )
+        if td.cmd_args.dynamo.dcgm_exporter.enabled:
+            args.append('--dynamo-dcgm-exporter-enabled "True"')
+            args.append(f'--dynamo-dcgm-exporter-port "{td.cmd_args.dynamo.dcgm_exporter.port}"')
 
         if td.cmd_args.dynamo.prefill_worker:
             args.extend(self._get_nested_toml_args(td.cmd_args.dynamo.prefill_worker, "--prefill-"))
@@ -298,6 +355,111 @@ def _gen_srun_command(self) -> str:
         srun_cmd.extend(self._gen_script_args(self.td))
         return " \\\n  ".join(srun_cmd) + "\n"
 
+    def _gen_dcgm_launcher_block(self) -> list[str]:
+        if not self.td.cmd_args.dynamo.dcgm_exporter.enabled:
+            return []
+
+        num_nodes, node_list = self.get_cached_nodes_spec()
+        out_dir = self.test_run.output_path.absolute()
+        port = self.td.cmd_args.dynamo.dcgm_exporter.port
+        image_url = self.td.cmd_args.dynamo.dcgm_exporter.image_url
+        wrapper_body = [
+            "#!/bin/bash",
+            "set -e",
+            "nohup docker run --rm --user root --gpus all --cap-add SYS_ADMIN \\",
+            f"  -e DCGM_EXPORTER_LISTEN=:{port} -p {port}:{port} \\",
+            '  -v "${RESULTS_DIR}:/cloudai_run_results" \\',
+            '  "${DCGM_IMAGE}" dcgm-exporter \\',
+            '  >> "${RESULTS_DIR}/dcgm_exporter_node${SLURM_NODEID:-0}.log" 2>&1 &',
+            "disown",
+            "exit 0",
+        ]
+        srun_parts = [
+            "srun",
+            "--export=ALL",
+            f"-N{num_nodes}",
+            *([] if not node_list else [f"--nodelist={','.join(node_list)}"]),
+            f"--ntasks={num_nodes}",
+            "--ntasks-per-node=1",
+            f"--output={out_dir / 'dcgm-node-%n-stdout.txt'}",
+            f"--error={out_dir / 'dcgm-node-%n-stderr.txt'}",
+            "bash",
+            str(out_dir / "run_dcgm.sh"),
+        ]
+
+        block = [
+            "# Start DCGM exporter via Docker on each node.",
+            f"export RESULTS_DIR={out_dir}",
+            f"export DCGM_IMAGE={shlex.quote(image_url)}",
+            "cat > \"$RESULTS_DIR/run_dcgm.sh\" << 'WRAPPER_DCGM_EOF'",
+            *wrapper_body,
+            "WRAPPER_DCGM_EOF",
+            'chmod +x "$RESULTS_DIR/run_dcgm.sh"',
+            " ".join(srun_parts),
+            "sleep 5",
+        ]
+        if node_list:
+            block.extend(
+                [
+                    "echo 'DCGM endpoints:' > \"$RESULTS_DIR/dcgm_endpoints.txt\"",
+                    "for n in "
+                    + " ".join(node_list)
+                    + f'; do echo "  http://$n:{port}/metrics" >> "$RESULTS_DIR/dcgm_endpoints.txt"; done',
+                    "",
+                ]
+            )
+        return block
+
+    def _gen_dcgm_cleanup_command(self) -> str | None:
+        if not self.td.cmd_args.dynamo.dcgm_exporter.enabled:
+            return None
+
+        num_nodes, node_list = self.get_cached_nodes_spec()
+        kill_cmd = 'docker ps -q -f ancestor="$DCGM_IMAGE" 2>/dev/null | xargs -r docker kill 2>/dev/null || true'
+        parts = [
+            "srun",
+            "--export=ALL",
+            f"-N{num_nodes}",
+            *([] if not node_list else [f"--nodelist={','.join(node_list)}"]),
+            f"--ntasks={num_nodes}",
+            "--ntasks-per-node=1",
+            "bash",
+            "-c",
+            shlex.quote(kill_cmd),
+        ]
+        return " ".join(parts)
+
+    def gen_exec_command(self) -> str:
+        srun_command = self._gen_srun_command()
+        command_list = []
+        indent = ""
+
+        if self.test_run.pre_test:
+            pre_test_command = self.gen_pre_test(self.test_run.pre_test, self.test_run.output_path)
+            command_list.extend([pre_test_command, "if [ $PRE_TEST_SUCCESS -eq 1 ]; then"])
+            indent = "    "
+
+        dcgm_block = self._gen_dcgm_launcher_block()
+        if dcgm_block:
+            command_list.extend(f"{indent}{line}" for line in dcgm_block)
+
+        command_list.append(f"{indent}{srun_command}")
+
+        dcgm_cleanup = self._gen_dcgm_cleanup_command()
+        if dcgm_cleanup:
+            command_list.append(f"{indent}# Kill DCGM exporter containers when test finishes")
+            command_list.append(f"{indent}{dcgm_cleanup}")
+
+        if self.test_run.post_test:
+            post_test_command = self.gen_post_test(self.test_run.post_test, self.test_run.output_path)
+            command_list.append(f"{indent}{post_test_command}")
+
+        if self.test_run.pre_test:
+            command_list.append("fi")
+
+        full_command = "\n".join(command_list).strip()
+        return self._write_sbatch_script(full_command)
+
     def _validate_worker_nodes(
         self, node_list: list[str], worker_nodes: str | None, num_nodes: int, worker_type: str
     ) -> None:
diff --git a/tests/ref_data/ai-dynamo-aiperf.sh b/tests/ref_data/ai-dynamo-aiperf.sh
index 3fcd013cb..bd73f2ab7 100644
--- a/tests/ref_data/ai-dynamo-aiperf.sh
+++ b/tests/ref_data/ai-dynamo-aiperf.sh
@@ -4,22 +4,53 @@ set -Eeuo pipefail
 log() { echo "[$(date +%F\ %T) $(hostname)]: $*"; }
 
 : "${FRONTEND_URL:?FRONTEND_URL is not set}"
+: "${AIPERF_MODEL:=model}"
+: "${AIPERF_ENDPOINT:=v1/chat/completions}"
+: "${AIPERF_FAILURE_MARKER:=/cloudai_run_results/failure-marker.txt}"
 
 rm -rf /cloudai_run_results/aiperf_artifacts/round_1
 mkdir -p /cloudai_run_results/aiperf_artifacts/round_1
-log 'Running round_1: aiperf profile --model model --endpoint-type chat --streaming --artifact-dir /cloudai_run_results/aiperf_artifacts/round_1 --no-server-metrics --concurrency 1 --request-count 50 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --url "$FRONTEND_URL"'
-aiperf profile --model model --endpoint-type chat --streaming --artifact-dir /cloudai_run_results/aiperf_artifacts/round_1 --no-server-metrics --concurrency 1 --request-count 50 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --url "$FRONTEND_URL" > /cloudai_run_results/aiperf_round_1.log 2>&1
-mkdir -p /cloudai_run_results
-cp /cloudai_run_results/aiperf_artifacts/round_1/profile_export_aiperf.csv /cloudai_run_results/aiperf_round_1_report.csv
-log 'AIPerf report saved to /cloudai_run_results/aiperf_round_1_report.csv'
+log 'Running round_1: aiperf profile --model model --endpoint-type chat --streaming --url "$FRONTEND_URL" --concurrency 1 --request-count 50 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --artifact-dir /cloudai_run_results/aiperf_artifacts/round_1 --no-server-metrics'
+phase_status=0
+set +e
+aiperf profile --model model --endpoint-type chat --streaming --url "$FRONTEND_URL" --concurrency 1 --request-count 50 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --artifact-dir /cloudai_run_results/aiperf_artifacts/round_1 --no-server-metrics > /cloudai_run_results/aiperf_round_1.log 2>&1
+phase_status=$?
+set -e
+if [[ "$phase_status" -ne 0 ]]; then
+  log 'AIPerf phase round_1 failed'
+  exit "$phase_status"
+fi
+if [[ "$phase_status" -eq 0 ]]; then
+  mkdir -p /cloudai_run_results
+  cp /cloudai_run_results/aiperf_artifacts/round_1/profile_export_aiperf.csv /cloudai_run_results/aiperf_round_1_report.csv
+  log 'AIPerf report saved to /cloudai_run_results/aiperf_round_1_report.csv'
+  if [[ -f "$AIPERF_FAILURE_MARKER" ]]; then
+    log 'FATAL: failure marker found between AIPerf phases'
+    exit 1
+  fi
+  if ! curl -fsS -X POST "${FRONTEND_URL}/${AIPERF_ENDPOINT}" -H 'Content-Type: application/json' -d "{\"model\":\"${AIPERF_MODEL}\",\"messages\":[{\"role\":\"user\",\"content\":\"ping\"}],\"stream\":false,\"max_tokens\":1}" >/dev/null; then
+    log 'FATAL: frontend health probe failed between AIPerf phases'
+    exit 1
+  fi
+fi
 
 rm -rf /cloudai_run_results/aiperf_artifacts/round_2
 mkdir -p /cloudai_run_results/aiperf_artifacts/round_2
-log 'Running round_2: aiperf profile --model model --endpoint-type chat --streaming --artifact-dir /cloudai_run_results/aiperf_artifacts/round_2 --no-server-metrics --concurrency 2 --request-count 10 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --url "$FRONTEND_URL"'
-aiperf profile --model model --endpoint-type chat --streaming --artifact-dir /cloudai_run_results/aiperf_artifacts/round_2 --no-server-metrics --concurrency 2 --request-count 10 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --url "$FRONTEND_URL" > /cloudai_run_results/aiperf_round_2.log 2>&1
-mkdir -p /cloudai_run_results
-cp /cloudai_run_results/aiperf_artifacts/round_2/profile_export_aiperf.csv /cloudai_run_results/aiperf_round_2_report.csv
-log 'AIPerf report saved to /cloudai_run_results/aiperf_round_2_report.csv'
-mkdir -p /cloudai_run_results
-cp /cloudai_run_results/aiperf_round_2_report.csv /cloudai_run_results/aiperf_report.csv
-log 'Final AIPerf report saved to /cloudai_run_results/aiperf_report.csv'
+log 'Running round_2: aiperf profile --model model --endpoint-type chat --streaming --url "$FRONTEND_URL" --concurrency 2 --request-count 10 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --artifact-dir /cloudai_run_results/aiperf_artifacts/round_2 --no-server-metrics'
+phase_status=0
+set +e
+aiperf profile --model model --endpoint-type chat --streaming --url "$FRONTEND_URL" --concurrency 2 --request-count 10 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --artifact-dir /cloudai_run_results/aiperf_artifacts/round_2 --no-server-metrics > /cloudai_run_results/aiperf_round_2.log 2>&1
+phase_status=$?
+set -e
+if [[ "$phase_status" -ne 0 ]]; then
+  log 'AIPerf phase round_2 failed'
+  exit "$phase_status"
+fi
+if [[ "$phase_status" -eq 0 ]]; then
+  mkdir -p /cloudai_run_results
+  cp /cloudai_run_results/aiperf_artifacts/round_2/profile_export_aiperf.csv /cloudai_run_results/aiperf_round_2_report.csv
+  log 'AIPerf report saved to /cloudai_run_results/aiperf_round_2_report.csv'
+  mkdir -p /cloudai_run_results
+  cp /cloudai_run_results/aiperf_round_2_report.csv /cloudai_run_results/aiperf_report.csv
+  log 'Final AIPerf report saved to /cloudai_run_results/aiperf_report.csv'
+fi
diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
index c1a8b3c64..999fa0b60 100644
--- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
+++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
@@ -256,6 +256,48 @@ def test_gen_script_args_writes_resolved_aiperf_script(strategy: AIDynamoSlurmCo
     assert f"{strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_report.csv" in script
 
 
+def test_generated_aiperf_script_supports_core_overrides_and_server_metrics_auto(
+    strategy: AIDynamoSlurmCommandGenStrategy,
+) -> None:
+    td = cast(AIDynamoTestDefinition, strategy.test_run.test)
+    td.cmd_args.workloads = "aiperf.sh"
+    td.cmd_args.aiperf = AIPerf.model_validate(
+        {
+            "args": {
+                "model": "custom-model",
+                "endpoint-type": "completions",
+                "streaming": False,
+                "server-metrics": "auto",
+                "request-count": 10,
+            },
+        }
+    )
+
+    strategy._gen_script_args(td)
+
+    script = (strategy.test_run.output_path / "aiperf.sh").read_text()
+    assert "--model custom-model" in script
+    assert "--endpoint-type completions" in script
+    assert "--streaming" not in script
+    assert '--server-metrics "$AIPERF_SERVER_METRICS_URLS"' in script
+    assert "--no-server-metrics" not in script
+
+
+def test_dcgm_exporter_generates_launcher_and_runtime_flags(strategy: AIDynamoSlurmCommandGenStrategy) -> None:
+    td = cast(AIDynamoTestDefinition, strategy.test_run.test)
+    td.cmd_args.dynamo.dcgm_exporter.enabled = True
+    td.cmd_args.dynamo.dcgm_exporter.image_url = "nvcr.io/test/dcgm:latest"
+    td.cmd_args.dynamo.dcgm_exporter.port = 9501
+
+    args = strategy._gen_script_args(td)
+    block = strategy._gen_dcgm_launcher_block()
+
+    assert '--dynamo-dcgm-exporter-enabled "True"' in args
+    assert '--dynamo-dcgm-exporter-port "9501"' in args
+    assert any("nvcr.io/test/dcgm:latest" in line for line in block)
+    assert any("DCGM_EXPORTER_LISTEN=:9501" in line for line in block)
+
+
 def test_aiperf_phase_roundtrip_does_not_emit_default_report_name(strategy: AIDynamoSlurmCommandGenStrategy) -> None:
     td = cast(AIDynamoTestDefinition, strategy.test_run.test)
     td.cmd_args.workloads = "aiperf.sh"

From 08d8e0a6623e5b1aca97a7be820359e8d1fcefce Mon Sep 17 00:00:00 2001
From: Ivan Podkidyshev <ipodkidyshev@nvidia.com>
Date: Mon, 1 Jun 2026 16:09:13 +0200
Subject: [PATCH 09/16] fix dcgm endpoint url

---
 src/cloudai/workloads/ai_dynamo/ai_dynamo.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh
index add2cf61b..5697a78ea 100644
--- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh
+++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.sh
@@ -916,7 +916,8 @@ _resolve_aiperf_server_metrics_urls() {
   done
 
   if [[ "${dynamo_args["dcgm-exporter-enabled"]}" == "True" || "${dynamo_args["dcgm-exporter-enabled"]}" == "true" ]]; then
-    for node in ${decode_config["node-list"]:-},${prefill_config["node-list"]:-}; do
+    local dcgm_nodes="${decode_config["node-list"]:-},${prefill_config["node-list"]:-}"
+    for node in $dcgm_nodes; do
       [[ -z "$node" ]] && continue
       urls="${urls},http://${node}:${dynamo_args["dcgm-exporter-port"]}/metrics"
     done

From 16e9639d8c680d804d4ab5680ea07f31fa1b88c3 Mon Sep 17 00:00:00 2001
From: Ivan Podkidyshev <ipodkidyshev@nvidia.com>
Date: Mon, 1 Jun 2026 16:21:00 +0200
Subject: [PATCH 10/16] switch dcgm to use enroot to run the image

---
 .../ai_dynamo/test_scenario/vllm_slurm.toml   |  1 +
 src/cloudai/workloads/ai_dynamo/ai_dynamo.py  | 22 +++++-
 .../ai_dynamo/slurm_command_gen_strategy.py   | 74 +++++++++----------
 .../test_command_gen_strategy_slurm.py        | 20 ++++-
 4 files changed, 71 insertions(+), 46 deletions(-)

diff --git a/conf/experimental/ai_dynamo/test_scenario/vllm_slurm.toml b/conf/experimental/ai_dynamo/test_scenario/vllm_slurm.toml
index 7f279ab71..decfead3d 100644
--- a/conf/experimental/ai_dynamo/test_scenario/vllm_slurm.toml
+++ b/conf/experimental/ai_dynamo/test_scenario/vllm_slurm.toml
@@ -57,6 +57,7 @@ time_limit = "00:10:00"
   [Tests.cmd_args]
     [Tests.cmd_args.dynamo.dcgm_exporter]
     enabled = true
+    docker-image-url = "nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless"
 
     [Tests.cmd_args.dynamo.prefill_worker]
     num-nodes = 2
diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
index fcc6f2f27..3f72ed1c7 100644
--- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
+++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
@@ -146,10 +146,10 @@ class DCGMExporter(BaseModel):
     model_config = ConfigDict(extra="forbid", populate_by_name=True)
 
     enabled: bool = False
-    image_url: str = Field(
+    docker_image_url: str = Field(
         default="nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless",
-        serialization_alias="image-url",
-        validation_alias=AliasChoices("image-url", "image_url"),
+        serialization_alias="docker-image-url",
+        validation_alias=AliasChoices("docker-image-url", "docker_image_url", "image-url", "image_url"),
     )
     port: int = 9401
 
@@ -434,6 +434,7 @@ class AIDynamoTestDefinition(TestDefinition):
     model_config = ConfigDict(extra="forbid")
     cmd_args: AIDynamoCmdArgs
     _docker_image: Optional[DockerImage] = None
+    _dcgm_exporter_image: Optional[DockerImage] = None
     script: File = File(Path(__file__).parent.parent / "ai_dynamo/ai_dynamo.sh")
     repo: GitRepo = GitRepo(
         url="https://github.com/ai-dynamo/dynamo.git", commit="f7e468c7e8ff0d1426db987564e60572167e8464"
@@ -467,6 +468,16 @@ def docker_image(self) -> DockerImage:
             self._docker_image = DockerImage(url=self.cmd_args.docker_image_url)
         return self._docker_image
 
+    @property
+    def dcgm_exporter_image(self) -> DockerImage | None:
+        if not self.cmd_args.dynamo.dcgm_exporter.enabled:
+            return None
+
+        image_url = self.cmd_args.dynamo.dcgm_exporter.docker_image_url
+        if not self._dcgm_exporter_image or self._dcgm_exporter_image.url != image_url:
+            self._dcgm_exporter_image = DockerImage(url=image_url)
+        return self._dcgm_exporter_image
+
     @property
     def hf_model(self) -> HFModel:
         if not self._hf_model:
@@ -477,13 +488,16 @@ def hf_model(self) -> HFModel:
     @property
     def installables(self) -> list[Installable]:
         """Get all installables for this test definition."""
-        return [
+        installables = [
             self.docker_image,
             self.repo,
             self.script,
             self.hf_model,
             *self.cmd_args.installables,
         ]
+        if self.dcgm_exporter_image:
+            installables.append(self.dcgm_exporter_image)
+        return installables
 
     def _has_aiperf_accuracy_results(self, output_path: Path) -> bool:
         accuracy = parse_aiperf_accuracy(output_path)
diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
index 32f962af3..50486e6f1 100644
--- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
@@ -23,7 +23,7 @@
 from pydantic import BaseModel, TypeAdapter, ValidationError
 
 import cloudai.util
-from cloudai.core import File, GitRepo
+from cloudai.core import File, GitRepo, System, TestRun
 from cloudai.systems.slurm import SlurmCommandGenStrategy
 
 from .ai_dynamo import (
@@ -40,6 +40,10 @@
 class AIDynamoSlurmCommandGenStrategy(SlurmCommandGenStrategy):
     """Command generation strategy for AI Dynamo on Slurm systems."""
 
+    def __init__(self, system: System, test_run: TestRun) -> None:
+        super().__init__(system, test_run)
+        self._current_image_path: str | None = None
+
     @property
     def td(self) -> AIDynamoTestDefinition:
         return cast(AIDynamoTestDefinition, self.test_run.test)
@@ -65,10 +69,20 @@ def final_env_vars(self, value: dict[str, str | list[str]]) -> None:
         self._final_env_vars = value
 
     def image_path(self) -> str | None:
+        if self._current_image_path:
+            return self._current_image_path
         if self.td.docker_image and self.td.docker_image.installed_path:
             return str(self.td.docker_image.installed_path)
         return None
 
+    def _gen_srun_prefix_for_image(self, image_path: str) -> list[str]:
+        current_image_path = self._current_image_path
+        self._current_image_path = image_path
+        try:
+            return self.gen_srun_prefix(with_num_nodes=False)
+        finally:
+            self._current_image_path = current_image_path
+
     def _get_toml_args(self, base_model: BaseModel, prefix: str, exclude: List[str] | None = None) -> List[str]:
         args = []
         exclude = exclude or []
@@ -356,27 +370,17 @@ def _gen_srun_command(self) -> str:
         return " \\\n  ".join(srun_cmd) + "\n"
 
     def _gen_dcgm_launcher_block(self) -> list[str]:
-        if not self.td.cmd_args.dynamo.dcgm_exporter.enabled:
+        dcgm_image = self.td.dcgm_exporter_image
+        if not dcgm_image:
             return []
 
         num_nodes, node_list = self.get_cached_nodes_spec()
         out_dir = self.test_run.output_path.absolute()
         port = self.td.cmd_args.dynamo.dcgm_exporter.port
-        image_url = self.td.cmd_args.dynamo.dcgm_exporter.image_url
-        wrapper_body = [
-            "#!/bin/bash",
-            "set -e",
-            "nohup docker run --rm --user root --gpus all --cap-add SYS_ADMIN \\",
-            f"  -e DCGM_EXPORTER_LISTEN=:{port} -p {port}:{port} \\",
-            '  -v "${RESULTS_DIR}:/cloudai_run_results" \\',
-            '  "${DCGM_IMAGE}" dcgm-exporter \\',
-            '  >> "${RESULTS_DIR}/dcgm_exporter_node${SLURM_NODEID:-0}.log" 2>&1 &',
-            "disown",
-            "exit 0",
-        ]
+        dcgm_cmd = f"DCGM_EXPORTER_LISTEN=:{port} dcgm-exporter"
         srun_parts = [
-            "srun",
-            "--export=ALL",
+            *self._gen_srun_prefix_for_image(str(dcgm_image.installed_path)),
+            "--overlap",
             f"-N{num_nodes}",
             *([] if not node_list else [f"--nodelist={','.join(node_list)}"]),
             f"--ntasks={num_nodes}",
@@ -384,18 +388,16 @@ def _gen_dcgm_launcher_block(self) -> list[str]:
             f"--output={out_dir / 'dcgm-node-%n-stdout.txt'}",
             f"--error={out_dir / 'dcgm-node-%n-stderr.txt'}",
             "bash",
-            str(out_dir / "run_dcgm.sh"),
+            "-lc",
+            shlex.quote(dcgm_cmd),
         ]
 
         block = [
-            "# Start DCGM exporter via Docker on each node.",
-            f"export RESULTS_DIR={out_dir}",
-            f"export DCGM_IMAGE={shlex.quote(image_url)}",
-            "cat > \"$RESULTS_DIR/run_dcgm.sh\" << 'WRAPPER_DCGM_EOF'",
-            *wrapper_body,
-            "WRAPPER_DCGM_EOF",
-            'chmod +x "$RESULTS_DIR/run_dcgm.sh"',
-            " ".join(srun_parts),
+            "# Start DCGM exporter on each node.",
+            'echo "Starting DCGM exporter..."',
+            " ".join(srun_parts) + " &",
+            "DCGM_EXPORTER_SRUN_PID=$!",
+            'echo "DCGM exporter srun PID: ${DCGM_EXPORTER_SRUN_PID}"',
             "sleep 5",
         ]
         if node_list:
@@ -414,20 +416,12 @@ def _gen_dcgm_cleanup_command(self) -> str | None:
         if not self.td.cmd_args.dynamo.dcgm_exporter.enabled:
             return None
 
-        num_nodes, node_list = self.get_cached_nodes_spec()
-        kill_cmd = 'docker ps -q -f ancestor="$DCGM_IMAGE" 2>/dev/null | xargs -r docker kill 2>/dev/null || true'
-        parts = [
-            "srun",
-            "--export=ALL",
-            f"-N{num_nodes}",
-            *([] if not node_list else [f"--nodelist={','.join(node_list)}"]),
-            f"--ntasks={num_nodes}",
-            "--ntasks-per-node=1",
-            "bash",
-            "-c",
-            shlex.quote(kill_cmd),
-        ]
-        return " ".join(parts)
+        return (
+            'if [[ -n "${DCGM_EXPORTER_SRUN_PID:-}" ]]; then '
+            'kill "${DCGM_EXPORTER_SRUN_PID}" 2>/dev/null || true; '
+            'wait "${DCGM_EXPORTER_SRUN_PID}" 2>/dev/null || true; '
+            "fi"
+        )
 
     def gen_exec_command(self) -> str:
         srun_command = self._gen_srun_command()
@@ -447,7 +441,7 @@ def gen_exec_command(self) -> str:
 
         dcgm_cleanup = self._gen_dcgm_cleanup_command()
         if dcgm_cleanup:
-            command_list.append(f"{indent}# Kill DCGM exporter containers when test finishes")
+            command_list.append(f"{indent}# Stop DCGM exporter when test finishes")
             command_list.append(f"{indent}{dcgm_cleanup}")
 
         if self.test_run.post_test:
diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
index 999fa0b60..a6c1ba7a3 100644
--- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
+++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
@@ -286,7 +286,7 @@ def test_generated_aiperf_script_supports_core_overrides_and_server_metrics_auto
 def test_dcgm_exporter_generates_launcher_and_runtime_flags(strategy: AIDynamoSlurmCommandGenStrategy) -> None:
     td = cast(AIDynamoTestDefinition, strategy.test_run.test)
     td.cmd_args.dynamo.dcgm_exporter.enabled = True
-    td.cmd_args.dynamo.dcgm_exporter.image_url = "nvcr.io/test/dcgm:latest"
+    td.cmd_args.dynamo.dcgm_exporter.docker_image_url = "nvcr.io/test/dcgm:latest"
     td.cmd_args.dynamo.dcgm_exporter.port = 9501
 
     args = strategy._gen_script_args(td)
@@ -294,8 +294,24 @@ def test_dcgm_exporter_generates_launcher_and_runtime_flags(strategy: AIDynamoSl
 
     assert '--dynamo-dcgm-exporter-enabled "True"' in args
     assert '--dynamo-dcgm-exporter-port "9501"' in args
-    assert any("nvcr.io/test/dcgm:latest" in line for line in block)
+    assert any("--container-image=nvcr.io/test/dcgm:latest" in line for line in block)
     assert any("DCGM_EXPORTER_LISTEN=:9501" in line for line in block)
+    assert not any("docker run" in line for line in block)
+
+
+def test_dcgm_exporter_adds_configured_docker_image_installable(cmd_args: AIDynamoCmdArgs) -> None:
+    cmd_args.dynamo.dcgm_exporter.enabled = True
+    cmd_args.dynamo.dcgm_exporter.docker_image_url = "nvcr.io/test/dcgm:latest"
+    tdef = AIDynamoTestDefinition(
+        name="test",
+        description="desc",
+        test_template_name="template",
+        cmd_args=cmd_args,
+    )
+
+    assert tdef.dcgm_exporter_image is not None
+    assert tdef.dcgm_exporter_image.url == "nvcr.io/test/dcgm:latest"
+    assert tdef.dcgm_exporter_image in tdef.installables
 
 
 def test_aiperf_phase_roundtrip_does_not_emit_default_report_name(strategy: AIDynamoSlurmCommandGenStrategy) -> None:

From baa04d95db6e650876a643caa0de42ca7cf62cd0 Mon Sep 17 00:00:00 2001
From: Ivan Podkidyshev <ipodkidyshev@nvidia.com>
Date: Mon, 1 Jun 2026 16:27:35 +0200
Subject: [PATCH 11/16] remove state

---
 .../ai_dynamo/slurm_command_gen_strategy.py   | 29 +++++++++----------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
index 50486e6f1..2d377805b 100644
--- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
@@ -23,7 +23,7 @@
 from pydantic import BaseModel, TypeAdapter, ValidationError
 
 import cloudai.util
-from cloudai.core import File, GitRepo, System, TestRun
+from cloudai.core import File, GitRepo
 from cloudai.systems.slurm import SlurmCommandGenStrategy
 
 from .ai_dynamo import (
@@ -40,10 +40,6 @@
 class AIDynamoSlurmCommandGenStrategy(SlurmCommandGenStrategy):
     """Command generation strategy for AI Dynamo on Slurm systems."""
 
-    def __init__(self, system: System, test_run: TestRun) -> None:
-        super().__init__(system, test_run)
-        self._current_image_path: str | None = None
-
     @property
     def td(self) -> AIDynamoTestDefinition:
         return cast(AIDynamoTestDefinition, self.test_run.test)
@@ -69,19 +65,22 @@ def final_env_vars(self, value: dict[str, str | list[str]]) -> None:
         self._final_env_vars = value
 
     def image_path(self) -> str | None:
-        if self._current_image_path:
-            return self._current_image_path
         if self.td.docker_image and self.td.docker_image.installed_path:
             return str(self.td.docker_image.installed_path)
         return None
 
-    def _gen_srun_prefix_for_image(self, image_path: str) -> list[str]:
-        current_image_path = self._current_image_path
-        self._current_image_path = image_path
-        try:
-            return self.gen_srun_prefix(with_num_nodes=False)
-        finally:
-            self._current_image_path = current_image_path
+    def _gen_dcgm_srun_prefix(self, image_path: str) -> list[str]:
+        srun_parts = ["srun", "--export=ALL", f"--mpi={self.mpi}", f"--container-image={image_path}"]
+        mounts = self.container_mounts()
+        if mounts:
+            srun_parts.append(f"--container-mounts={','.join(mounts)}")
+        if not self.system.container_mount_home:
+            srun_parts.append("--no-container-mount-home")
+        if self.system.extra_srun_args:
+            srun_parts.append(self.system.extra_srun_args)
+        if self.test_run.extra_srun_args:
+            srun_parts.append(self.test_run.extra_srun_args)
+        return srun_parts
 
     def _get_toml_args(self, base_model: BaseModel, prefix: str, exclude: List[str] | None = None) -> List[str]:
         args = []
@@ -379,7 +378,7 @@ def _gen_dcgm_launcher_block(self) -> list[str]:
         port = self.td.cmd_args.dynamo.dcgm_exporter.port
         dcgm_cmd = f"DCGM_EXPORTER_LISTEN=:{port} dcgm-exporter"
         srun_parts = [
-            *self._gen_srun_prefix_for_image(str(dcgm_image.installed_path)),
+            *self._gen_dcgm_srun_prefix(str(dcgm_image.installed_path)),
             "--overlap",
             f"-N{num_nodes}",
             *([] if not node_list else [f"--nodelist={','.join(node_list)}"]),

From b749cb35b02ce02624d58f0b09a4b34f36510348 Mon Sep 17 00:00:00 2001
From: Ivan Podkidyshev <ipodkidyshev@nvidia.com>
Date: Mon, 1 Jun 2026 16:37:48 +0200
Subject: [PATCH 12/16] fail early if dcgm fails

---
 .../ai_dynamo/slurm_command_gen_strategy.py   | 38 ++++++++++++++++---
 .../test_command_gen_strategy_slurm.py        |  3 ++
 2 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
index 2d377805b..c36408ca9 100644
--- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
@@ -398,17 +398,45 @@ def _gen_dcgm_launcher_block(self) -> list[str]:
             "DCGM_EXPORTER_SRUN_PID=$!",
             'echo "DCGM exporter srun PID: ${DCGM_EXPORTER_SRUN_PID}"',
             "sleep 5",
+            'echo "Checking DCGM exporter metrics endpoints..."',
+            "DCGM_EXPORTER_STARTUP_TIMEOUT=${DCGM_EXPORTER_STARTUP_TIMEOUT:-60}",
         ]
         if node_list:
             block.extend(
                 [
-                    "echo 'DCGM endpoints:' > \"$RESULTS_DIR/dcgm_endpoints.txt\"",
-                    "for n in "
-                    + " ".join(node_list)
-                    + f'; do echo "  http://$n:{port}/metrics" >> "$RESULTS_DIR/dcgm_endpoints.txt"; done',
-                    "",
+                    "dcgm_nodes=(" + " ".join(shlex.quote(node) for node in node_list) + ")",
                 ]
             )
+        else:
+            block.append('mapfile -t dcgm_nodes < <(scontrol show hostnames "$SLURM_JOB_NODELIST")')
+        endpoints_file = shlex.quote(str(out_dir / "dcgm_endpoints.txt"))
+        block.extend(
+            [
+                f": > {endpoints_file}",
+                "dcgm_failed=0",
+                'for node in "${dcgm_nodes[@]}"; do',
+                f'    dcgm_url="http://${{node}}:{port}/metrics"',
+                f'    echo "  ${{dcgm_url}}" >> {endpoints_file}',
+                "    deadline=$((SECONDS + DCGM_EXPORTER_STARTUP_TIMEOUT))",
+                '    until curl -fsS --max-time 2 "${dcgm_url}" >/dev/null; do',
+                "        if (( SECONDS >= deadline )); then",
+                '            echo "FATAL: DCGM exporter metrics endpoint is unreachable: ${dcgm_url}" >&2',
+                "            dcgm_failed=1",
+                "            break",
+                "        fi",
+                "        sleep 2",
+                "    done",
+                "    if (( dcgm_failed != 0 )); then break; fi",
+                '    echo "DCGM exporter reachable: ${dcgm_url}"',
+                "done",
+                "if (( dcgm_failed != 0 )); then",
+                '    kill "${DCGM_EXPORTER_SRUN_PID}" 2>/dev/null || true',
+                '    wait "${DCGM_EXPORTER_SRUN_PID}" 2>/dev/null || true',
+                "    exit 1",
+                "fi",
+                "",
+            ]
+        )
         return block
 
     def _gen_dcgm_cleanup_command(self) -> str | None:
diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
index a6c1ba7a3..533b059b0 100644
--- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
+++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
@@ -296,6 +296,9 @@ def test_dcgm_exporter_generates_launcher_and_runtime_flags(strategy: AIDynamoSl
     assert '--dynamo-dcgm-exporter-port "9501"' in args
     assert any("--container-image=nvcr.io/test/dcgm:latest" in line for line in block)
     assert any("DCGM_EXPORTER_LISTEN=:9501" in line for line in block)
+    assert any("DCGM_EXPORTER_STARTUP_TIMEOUT" in line for line in block)
+    assert any('curl -fsS --max-time 2 "${dcgm_url}"' in line for line in block)
+    assert any("FATAL: DCGM exporter metrics endpoint is unreachable" in line for line in block)
     assert not any("docker run" in line for line in block)
 
 

From f39797abd376b892e783b5d07046a840cc0eb52b Mon Sep 17 00:00:00 2001
From: Ivan Podkidyshev <ipodkidyshev@nvidia.com>
Date: Mon, 1 Jun 2026 16:49:31 +0200
Subject: [PATCH 13/16] cleanup hardcoded env vars escaping

---
 .../ai_dynamo/slurm_command_gen_strategy.py   | 30 ++++++++++++++-----
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
index c36408ca9..563fd7054 100644
--- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
@@ -143,10 +143,7 @@ def _render_aiperf_args(self, args: dict[str, Any]) -> str:
 
             values = [",".join(str(item) for item in value)] if isinstance(value, list) else [str(value)]
             for rendered_value in values:
-                if rendered_value in {"$FRONTEND_URL", "$AIPERF_SERVER_METRICS_URLS"}:
-                    parts.append(f'"{rendered_value}"')
-                else:
-                    parts.append(shlex.quote(rendered_value))
+                parts.append(shlex.quote(rendered_value))
         return " ".join(parts)
 
     def _runtime_result_path(self, path: str) -> str:
@@ -166,18 +163,35 @@ def _aiperf_phase_args(self, resolved_phase: AIPerf, artifact_dir: str) -> dict[
             "model": self.td.cmd_args.dynamo.model,
             "endpoint-type": "chat",
             "streaming": True,
-            "url": "$FRONTEND_URL",
         }
         args.update(resolved_phase.args.model_dump(by_alias=True, exclude_none=True))
         args["artifact-dir"] = artifact_dir
 
-        if args.get("server-metrics") == "auto":
-            args["server-metrics"] = "$AIPERF_SERVER_METRICS_URLS"
         if "server-metrics" not in args and "no-server-metrics" not in args:
             args["no-server-metrics"] = True
 
         return args
 
+    def _render_aiperf_phase_args(self, resolved_phase: AIPerf, artifact_dir: str) -> str:
+        args = self._aiperf_phase_args(resolved_phase, artifact_dir)
+        url = args.pop("url", None)
+        server_metrics_auto = args.get("server-metrics") == "auto"
+        if server_metrics_auto:
+            args.pop("server-metrics")
+
+        parts = []
+        for key in ("model", "endpoint-type", "streaming"):
+            if key in args:
+                parts.append(self._render_aiperf_args({key: args.pop(key)}))
+        if url is None:
+            parts.append('--url "$FRONTEND_URL"')
+        else:
+            parts.append(self._render_aiperf_args({"url": url}))
+        parts.append(self._render_aiperf_args(args))
+        if server_metrics_auto:
+            parts.append('--server-metrics "$AIPERF_SERVER_METRICS_URLS"')
+        return " ".join(part for part in parts if part)
+
     def _resolve_aiperf_phase(self, phase: AIPerfPhase) -> AIPerf:
         base_data = self.td.cmd_args.aiperf.model_dump(by_alias=True, exclude_none=True)
         phase_data = phase.model_dump(by_alias=True, exclude_none=True, exclude_unset=True)
@@ -225,7 +239,7 @@ def _render_aiperf_script(self) -> str:
             report_file = self._runtime_result_path(resolved_phase.report_name)
             cmd_parts = [
                 shlex.join(shlex.split(resolved_phase.cmd)),
-                self._render_aiperf_args(self._aiperf_phase_args(resolved_phase, artifact_dir)),
+                self._render_aiperf_phase_args(resolved_phase, artifact_dir),
                 shlex.join(self._split_extra_args(resolved_phase.extra_args)),
             ]
             cmd = " ".join(part for part in cmd_parts if part)

From 9042f60ac6a46e6d17c8589aa19d2d546b5b1eb0 Mon Sep 17 00:00:00 2001
From: Ivan Podkidyshev <ipodkidyshev@nvidia.com>
Date: Mon, 1 Jun 2026 18:42:22 +0200
Subject: [PATCH 14/16] replace kill with scancel

---
 src/cloudai/workloads/ai_dynamo/ai_dynamo.py  |  8 ++-
 .../ai_dynamo/slurm_command_gen_strategy.py   | 49 ++++++++++-------
 tests/ref_data/ai-dynamo-aiperf.sh            |  8 +--
 tests/ref_data/ai-dynamo.sbatch               | 52 +++++++++++++++++++
 tests/test_acceptance.py                      |  6 ++-
 .../test_command_gen_strategy_slurm.py        | 20 +++++++
 6 files changed, 118 insertions(+), 25 deletions(-)

diff --git a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
index 3f72ed1c7..b85b35d9a 100644
--- a/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
+++ b/src/cloudai/workloads/ai_dynamo/ai_dynamo.py
@@ -299,6 +299,12 @@ class AIPerf(Workload):
     def installables(self) -> list[Installable]:
         return [self.script]
 
+    @model_validator(mode="after")
+    def validate_extra_args(self) -> "AIPerf":
+        if isinstance(self.extra_args, list):
+            raise ValueError("AIPerf extra_args must be a string with explicit CLI syntax")
+        return self
+
 
 class AIPerfPhase(BaseModel):
     """Named AIPerf phase that overrides the base AIPerf configuration."""
@@ -323,7 +329,7 @@ class AIPerfPhase(BaseModel):
         validation_alias=AliasChoices("artifact-dir-name", "artifact_dir_name"),
     )
     args: Args = Field(default_factory=Args)
-    extra_args: str | list[str] | None = Field(
+    extra_args: str | None = Field(
         default=None,
         serialization_alias="extra-args",
         validation_alias=AliasChoices("extra-args", "extra_args"),
diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
index 563fd7054..29eb9fd3c 100644
--- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
@@ -136,14 +136,17 @@ def _render_aiperf_args(self, args: dict[str, Any]) -> str:
         for key, value in args.items():
             if value is None or value is False:
                 continue
+            if isinstance(value, list | dict):
+                raise ValueError(
+                    f"AIPerf argument {key!r} must be a scalar value. "
+                    "Use a string with AIPerf CLI syntax for multi-value arguments."
+                )
 
             parts.append(f"--{key}")
             if value is True:
                 continue
 
-            values = [",".join(str(item) for item in value)] if isinstance(value, list) else [str(value)]
-            for rendered_value in values:
-                parts.append(shlex.quote(rendered_value))
+            parts.append(shlex.quote(str(value)))
         return " ".join(parts)
 
     def _runtime_result_path(self, path: str) -> str:
@@ -151,13 +154,6 @@ def _runtime_result_path(self, path: str) -> str:
             return path
         return f"{self.CONTAINER_MOUNT_OUTPUT}/{path}"
 
-    def _split_extra_args(self, value: Any) -> list[str]:
-        if value is None:
-            return []
-        if isinstance(value, list):
-            return [str(item) for item in value]
-        return shlex.split(str(value))
-
     def _aiperf_phase_args(self, resolved_phase: AIPerf, artifact_dir: str) -> dict[str, Any]:
         args: dict[str, Any] = {
             "model": self.td.cmd_args.dynamo.model,
@@ -237,10 +233,12 @@ def _render_aiperf_script(self) -> str:
             artifact_dir = self._runtime_result_path(resolved_phase.artifact_dir_name)
             report_source = f"{artifact_dir}/profile_export_aiperf.csv"
             report_file = self._runtime_result_path(resolved_phase.report_name)
+            if isinstance(resolved_phase.extra_args, list):
+                raise ValueError("AIPerf extra_args must be a string with explicit CLI syntax")
             cmd_parts = [
                 shlex.join(shlex.split(resolved_phase.cmd)),
                 self._render_aiperf_phase_args(resolved_phase, artifact_dir),
-                shlex.join(self._split_extra_args(resolved_phase.extra_args)),
+                resolved_phase.extra_args or "",
             ]
             cmd = " ".join(part for part in cmd_parts if part)
             log_message = f"Running {phase.name}: {cmd}"
@@ -391,9 +389,11 @@ def _gen_dcgm_launcher_block(self) -> list[str]:
         out_dir = self.test_run.output_path.absolute()
         port = self.td.cmd_args.dynamo.dcgm_exporter.port
         dcgm_cmd = f"DCGM_EXPORTER_LISTEN=:{port} dcgm-exporter"
+        dcgm_step_name = "cloudai-dcgm-exporter"
         srun_parts = [
             *self._gen_dcgm_srun_prefix(str(dcgm_image.installed_path)),
             "--overlap",
+            f"--job-name={dcgm_step_name}",
             f"-N{num_nodes}",
             *([] if not node_list else [f"--nodelist={','.join(node_list)}"]),
             f"--ntasks={num_nodes}",
@@ -411,6 +411,23 @@ def _gen_dcgm_launcher_block(self) -> list[str]:
             " ".join(srun_parts) + " &",
             "DCGM_EXPORTER_SRUN_PID=$!",
             'echo "DCGM exporter srun PID: ${DCGM_EXPORTER_SRUN_PID}"',
+            "DCGM_EXPORTER_STEP_ID=",
+            "for _ in {1..10}; do",
+            '    DCGM_EXPORTER_STEP_ID=$(squeue --noheader --steps --job "$SLURM_JOB_ID" '
+            f'--format="%i %j" | awk \'$2 == "{dcgm_step_name}" {{ print $1; exit }}\')',
+            '    if [[ -n "${DCGM_EXPORTER_STEP_ID}" ]]; then break; fi',
+            "    sleep 1",
+            "done",
+            'echo "DCGM exporter step ID: ${DCGM_EXPORTER_STEP_ID:-unknown}"',
+            "function stop_dcgm_exporter()",
+            "{",
+            '    if [[ -n "${DCGM_EXPORTER_STEP_ID:-}" ]]; then',
+            '        scancel --signal=TERM "${DCGM_EXPORTER_STEP_ID}" 2>/dev/null || true',
+            "    fi",
+            '    if [[ -n "${DCGM_EXPORTER_SRUN_PID:-}" ]]; then',
+            '        wait "${DCGM_EXPORTER_SRUN_PID}" 2>/dev/null || true',
+            "    fi",
+            "}",
             "sleep 5",
             'echo "Checking DCGM exporter metrics endpoints..."',
             "DCGM_EXPORTER_STARTUP_TIMEOUT=${DCGM_EXPORTER_STARTUP_TIMEOUT:-60}",
@@ -444,8 +461,7 @@ def _gen_dcgm_launcher_block(self) -> list[str]:
                 '    echo "DCGM exporter reachable: ${dcgm_url}"',
                 "done",
                 "if (( dcgm_failed != 0 )); then",
-                '    kill "${DCGM_EXPORTER_SRUN_PID}" 2>/dev/null || true',
-                '    wait "${DCGM_EXPORTER_SRUN_PID}" 2>/dev/null || true',
+                "    stop_dcgm_exporter",
                 "    exit 1",
                 "fi",
                 "",
@@ -457,12 +473,7 @@ def _gen_dcgm_cleanup_command(self) -> str | None:
         if not self.td.cmd_args.dynamo.dcgm_exporter.enabled:
             return None
 
-        return (
-            'if [[ -n "${DCGM_EXPORTER_SRUN_PID:-}" ]]; then '
-            'kill "${DCGM_EXPORTER_SRUN_PID}" 2>/dev/null || true; '
-            'wait "${DCGM_EXPORTER_SRUN_PID}" 2>/dev/null || true; '
-            "fi"
-        )
+        return "stop_dcgm_exporter"
 
     def gen_exec_command(self) -> str:
         srun_command = self._gen_srun_command()
diff --git a/tests/ref_data/ai-dynamo-aiperf.sh b/tests/ref_data/ai-dynamo-aiperf.sh
index bd73f2ab7..60798ef8b 100644
--- a/tests/ref_data/ai-dynamo-aiperf.sh
+++ b/tests/ref_data/ai-dynamo-aiperf.sh
@@ -10,10 +10,10 @@ log() { echo "[$(date +%F\ %T) $(hostname)]: $*"; }
 
 rm -rf /cloudai_run_results/aiperf_artifacts/round_1
 mkdir -p /cloudai_run_results/aiperf_artifacts/round_1
-log 'Running round_1: aiperf profile --model model --endpoint-type chat --streaming --url "$FRONTEND_URL" --concurrency 1 --request-count 50 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --artifact-dir /cloudai_run_results/aiperf_artifacts/round_1 --no-server-metrics'
+log 'Running round_1: aiperf profile --model model --endpoint-type chat --streaming --url "$FRONTEND_URL" --concurrency 1 --request-count 50 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --artifact-dir /cloudai_run_results/aiperf_artifacts/round_1 --server-metrics "$AIPERF_SERVER_METRICS_URLS" --server-metrics-formats json csv'
 phase_status=0
 set +e
-aiperf profile --model model --endpoint-type chat --streaming --url "$FRONTEND_URL" --concurrency 1 --request-count 50 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --artifact-dir /cloudai_run_results/aiperf_artifacts/round_1 --no-server-metrics > /cloudai_run_results/aiperf_round_1.log 2>&1
+aiperf profile --model model --endpoint-type chat --streaming --url "$FRONTEND_URL" --concurrency 1 --request-count 50 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --artifact-dir /cloudai_run_results/aiperf_artifacts/round_1 --server-metrics "$AIPERF_SERVER_METRICS_URLS" --server-metrics-formats json csv > /cloudai_run_results/aiperf_round_1.log 2>&1
 phase_status=$?
 set -e
 if [[ "$phase_status" -ne 0 ]]; then
@@ -36,10 +36,10 @@ fi
 
 rm -rf /cloudai_run_results/aiperf_artifacts/round_2
 mkdir -p /cloudai_run_results/aiperf_artifacts/round_2
-log 'Running round_2: aiperf profile --model model --endpoint-type chat --streaming --url "$FRONTEND_URL" --concurrency 2 --request-count 10 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --artifact-dir /cloudai_run_results/aiperf_artifacts/round_2 --no-server-metrics'
+log 'Running round_2: aiperf profile --model model --endpoint-type chat --streaming --url "$FRONTEND_URL" --concurrency 2 --request-count 10 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --artifact-dir /cloudai_run_results/aiperf_artifacts/round_2 --server-metrics "$AIPERF_SERVER_METRICS_URLS" --server-metrics-formats json csv'
 phase_status=0
 set +e
-aiperf profile --model model --endpoint-type chat --streaming --url "$FRONTEND_URL" --concurrency 2 --request-count 10 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --artifact-dir /cloudai_run_results/aiperf_artifacts/round_2 --no-server-metrics > /cloudai_run_results/aiperf_round_2.log 2>&1
+aiperf profile --model model --endpoint-type chat --streaming --url "$FRONTEND_URL" --concurrency 2 --request-count 10 --synthetic-input-tokens-mean 300 --output-tokens-mean 500 --artifact-dir /cloudai_run_results/aiperf_artifacts/round_2 --server-metrics "$AIPERF_SERVER_METRICS_URLS" --server-metrics-formats json csv > /cloudai_run_results/aiperf_round_2.log 2>&1
 phase_status=$?
 set -e
 if [[ "$phase_status" -ne 0 ]]; then
diff --git a/tests/ref_data/ai-dynamo.sbatch b/tests/ref_data/ai-dynamo.sbatch
index e3384343b..c00906d40 100644
--- a/tests/ref_data/ai-dynamo.sbatch
+++ b/tests/ref_data/ai-dynamo.sbatch
@@ -14,6 +14,53 @@ srun --export=ALL --mpi=pmix -N2 --container-image=nvcr.io/nvidia/ai-dynamo:24.0
 
 srun --export=ALL --mpi=pmix -N2 --container-image=nvcr.io/nvidia/ai-dynamo:24.09 --container-mounts=__OUTPUT_DIR__/output:/cloudai_run_results,__INSTALL_DIR__:/cloudai_install,__OUTPUT_DIR__/output,__INSTALL_DIR__/huggingface:/cloudai_install/huggingface,/tmp:/tmp --ntasks=2 --ntasks-per-node=1 --output=__OUTPUT_DIR__/output/metadata/node-%N.toml --error=__OUTPUT_DIR__/output/metadata/nodes.err bash /cloudai_install/slurm-metadata.sh
 
+# Start DCGM exporter on each node.
+echo "Starting DCGM exporter..."
+srun --export=ALL --mpi=pmix --container-image=nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless --container-mounts=__OUTPUT_DIR__/output:/cloudai_run_results,__INSTALL_DIR__:/cloudai_install,__OUTPUT_DIR__/output,__INSTALL_DIR__/huggingface:/cloudai_install/huggingface,/tmp:/tmp --overlap --job-name=cloudai-dcgm-exporter -N2 --ntasks=2 --ntasks-per-node=1 --output=__OUTPUT_DIR__/output/dcgm-node-%n-stdout.txt --error=__OUTPUT_DIR__/output/dcgm-node-%n-stderr.txt bash -lc 'DCGM_EXPORTER_LISTEN=:9501 dcgm-exporter' &
+DCGM_EXPORTER_SRUN_PID=$!
+echo "DCGM exporter srun PID: ${DCGM_EXPORTER_SRUN_PID}"
+DCGM_EXPORTER_STEP_ID=
+for _ in {1..10}; do
+    DCGM_EXPORTER_STEP_ID=$(squeue --noheader --steps --job "$SLURM_JOB_ID" --format="%i %j" | awk '$2 == "cloudai-dcgm-exporter" { print $1; exit }')
+    if [[ -n "${DCGM_EXPORTER_STEP_ID}" ]]; then break; fi
+    sleep 1
+done
+echo "DCGM exporter step ID: ${DCGM_EXPORTER_STEP_ID:-unknown}"
+function stop_dcgm_exporter()
+{
+    if [[ -n "${DCGM_EXPORTER_STEP_ID:-}" ]]; then
+        scancel --signal=TERM "${DCGM_EXPORTER_STEP_ID}" 2>/dev/null || true
+    fi
+    if [[ -n "${DCGM_EXPORTER_SRUN_PID:-}" ]]; then
+        wait "${DCGM_EXPORTER_SRUN_PID}" 2>/dev/null || true
+    fi
+}
+sleep 5
+echo "Checking DCGM exporter metrics endpoints..."
+DCGM_EXPORTER_STARTUP_TIMEOUT=${DCGM_EXPORTER_STARTUP_TIMEOUT:-60}
+mapfile -t dcgm_nodes < <(scontrol show hostnames "$SLURM_JOB_NODELIST")
+: > __OUTPUT_DIR__/output/dcgm_endpoints.txt
+dcgm_failed=0
+for node in "${dcgm_nodes[@]}"; do
+    dcgm_url="http://${node}:9501/metrics"
+    echo "  ${dcgm_url}" >> __OUTPUT_DIR__/output/dcgm_endpoints.txt
+    deadline=$((SECONDS + DCGM_EXPORTER_STARTUP_TIMEOUT))
+    until curl -fsS --max-time 2 "${dcgm_url}" >/dev/null; do
+        if (( SECONDS >= deadline )); then
+            echo "FATAL: DCGM exporter metrics endpoint is unreachable: ${dcgm_url}" >&2
+            dcgm_failed=1
+            break
+        fi
+        sleep 2
+    done
+    if (( dcgm_failed != 0 )); then break; fi
+    echo "DCGM exporter reachable: ${dcgm_url}"
+done
+if (( dcgm_failed != 0 )); then
+    stop_dcgm_exporter
+    exit 1
+fi
+
 srun \
   --export=ALL \
   --mpi=pmix \
@@ -47,6 +94,8 @@ srun \
   --dynamo-etcd-port "2379" \
   --dynamo-nats-cmd "nats-server -js" \
   --dynamo-nats-port "4222" \
+  --dynamo-dcgm-exporter-enabled "True" \
+  --dynamo-dcgm-exporter-port "9501" \
   --prefill-cmd "python3 -m dynamo.vllm --is-prefill-worker" \
   --prefill-worker-initialized-regex "VllmWorker.*has.been.initialized" \
   --prefill-multiple-workers-per-node "False" \
@@ -74,3 +123,6 @@ srun \
   --genai_perf-warmup-request-count "10" \
   --aiperf-name "aiperf" \
   --aiperf-script /cloudai_run_results/aiperf.sh
+
+# Stop DCGM exporter when test finishes
+stop_dcgm_exporter
diff --git a/tests/test_acceptance.py b/tests/test_acceptance.py
index 78902298e..151c6fb9e 100644
--- a/tests/test_acceptance.py
+++ b/tests/test_acceptance.py
@@ -36,6 +36,7 @@
     AIDynamoTestDefinition,
     AIPerf,
     AIPerfPhase,
+    DCGMExporter,
     GenAIPerf,
     WorkerBaseArgs,
     WorkerConfig,
@@ -501,6 +502,7 @@ def test_req(request, slurm_system: SlurmSystem, partial_tr: partial[TestRun]) -
                         backend="vllm",
                         endpoint="v1/chat/completions",
                         workspace_path="/workspace",
+                        dcgm_exporter=DCGMExporter(enabled=True, port=9501),
                         prefill_worker=WorkerConfig(
                             cmd="python3 -m dynamo.vllm --is-prefill-worker",
                             worker_initialized_regex="VllmWorker.*has.been.initialized",
@@ -531,12 +533,14 @@ def test_req(request, slurm_system: SlurmSystem, partial_tr: partial[TestRun]) -
                     ),
                     aiperf=AIPerf.model_validate(
                         {
+                            "extra-args": "--server-metrics-formats json csv",
                             "args": {
                                 "concurrency": 2,
                                 "request-count": 50,
                                 "synthetic-input-tokens-mean": 300,
                                 "output-tokens-mean": 500,
-                            }
+                                "server-metrics": "auto",
+                            },
                         }
                     ),
                     aiperf_phases=[
diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
index 533b059b0..d55741311 100644
--- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
+++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
@@ -283,6 +283,23 @@ def test_generated_aiperf_script_supports_core_overrides_and_server_metrics_auto
     assert "--no-server-metrics" not in script
 
 
+def test_generated_aiperf_script_rejects_list_args(strategy: AIDynamoSlurmCommandGenStrategy) -> None:
+    td = cast(AIDynamoTestDefinition, strategy.test_run.test)
+    td.cmd_args.workloads = "aiperf.sh"
+    td.cmd_args.aiperf = AIPerf.model_validate({"args": {"server-metrics-formats": ["json", "csv"]}})
+
+    with pytest.raises(ValueError, match="AIPerf argument 'server-metrics-formats' must be a scalar value"):
+        strategy._gen_script_args(td)
+
+
+def test_aiperf_extra_args_must_be_string() -> None:
+    with pytest.raises(ValueError):
+        AIPerf.model_validate({"extra-args": ["--server-metrics-formats", "json"]})
+
+    with pytest.raises(ValueError):
+        AIPerfPhase.model_validate({"name": "round_1", "extra-args": ["--server-metrics-formats", "json"]})
+
+
 def test_dcgm_exporter_generates_launcher_and_runtime_flags(strategy: AIDynamoSlurmCommandGenStrategy) -> None:
     td = cast(AIDynamoTestDefinition, strategy.test_run.test)
     td.cmd_args.dynamo.dcgm_exporter.enabled = True
@@ -299,7 +316,10 @@ def test_dcgm_exporter_generates_launcher_and_runtime_flags(strategy: AIDynamoSl
     assert any("DCGM_EXPORTER_STARTUP_TIMEOUT" in line for line in block)
     assert any('curl -fsS --max-time 2 "${dcgm_url}"' in line for line in block)
     assert any("FATAL: DCGM exporter metrics endpoint is unreachable" in line for line in block)
+    assert any('scancel --signal=TERM "${DCGM_EXPORTER_STEP_ID}"' in line for line in block)
+    assert strategy._gen_dcgm_cleanup_command() == "stop_dcgm_exporter"
     assert not any("docker run" in line for line in block)
+    assert not any('kill "${DCGM_EXPORTER_SRUN_PID}"' in line for line in block)
 
 
 def test_dcgm_exporter_adds_configured_docker_image_installable(cmd_args: AIDynamoCmdArgs) -> None:

From a1f225d0092d1ff0422285d497e4058e018ad965 Mon Sep 17 00:00:00 2001
From: Ivan Podkidyshev <ipodkidyshev@nvidia.com>
Date: Mon, 1 Jun 2026 18:59:50 +0200
Subject: [PATCH 15/16] reformat aiperf script cmd generation

---
 .../ai_dynamo/slurm_command_gen_strategy.py   | 129 ++++++++++--------
 1 file changed, 75 insertions(+), 54 deletions(-)

diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
index 29eb9fd3c..5a9d3320b 100644
--- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
@@ -16,6 +16,7 @@
 
 import logging
 import shlex
+import textwrap
 from pathlib import Path
 from typing import Any, List, cast
 
@@ -204,27 +205,31 @@ def _render_aiperf_script(self) -> str:
         phases = self.td.cmd_args.aiperf_phases or [AIPerfPhase.model_validate({"name": "aiperf"})]
         single_phase = len(phases) == 1
         setup_cmd = self._resolve_aiperf_phase(phases[0]).setup_cmd
-        lines = [
-            "#!/usr/bin/env bash",
-            "set -Eeuo pipefail",
-            "",
-            'log() { echo "[$(date +%F\\ %T) $(hostname)]: $*"; }',
-            "",
-            ': "${FRONTEND_URL:?FRONTEND_URL is not set}"',
-            f': "${{AIPERF_MODEL:={self.td.cmd_args.dynamo.model}}}"',
-            f': "${{AIPERF_ENDPOINT:={self.td.cmd_args.dynamo.endpoint}}}"',
-            f': "${{AIPERF_FAILURE_MARKER:={self.CONTAINER_MOUNT_OUTPUT}/{self.td.failure_marker}}}"',
-            "",
+        blocks = [
+            textwrap.dedent(
+                f"""\
+                #!/usr/bin/env bash
+                set -Eeuo pipefail
+
+                log() {{ echo "[$(date +%F\\ %T) $(hostname)]: $*"; }}
+
+                : "${{FRONTEND_URL:?FRONTEND_URL is not set}}"
+                : "${{AIPERF_MODEL:={self.td.cmd_args.dynamo.model}}}"
+                : "${{AIPERF_ENDPOINT:={self.td.cmd_args.dynamo.endpoint}}}"
+                : "${{AIPERF_FAILURE_MARKER:={self.CONTAINER_MOUNT_OUTPUT}/{self.td.failure_marker}}}"
+                """
+            ).rstrip()
         ]
 
         if setup_cmd:
             setup_argv = ["bash", "-lc", setup_cmd]
-            lines.extend(
-                [
-                    f"log {shlex.quote(f'Running aiperf setup: {shlex.join(setup_argv)}')}",
-                    shlex.join(setup_argv),
-                    "",
-                ]
+            blocks.append(
+                textwrap.dedent(
+                    f"""\
+                    log {shlex.quote(f"Running aiperf setup: {shlex.join(setup_argv)}")}
+                    {shlex.join(setup_argv)}
+                    """
+                ).rstrip()
             )
 
         write_phase_logs = not single_phase
@@ -241,60 +246,76 @@ def _render_aiperf_script(self) -> str:
                 resolved_phase.extra_args or "",
             ]
             cmd = " ".join(part for part in cmd_parts if part)
-            log_message = f"Running {phase.name}: {cmd}"
-            lines.append(f"rm -rf {shlex.quote(artifact_dir)}")
-            lines.append(f"mkdir -p {shlex.quote(artifact_dir)}")
-            lines.append(f"log {shlex.quote(log_message)}")
-            lines.append("phase_status=0")
             if write_phase_logs:
                 log_file = self._runtime_result_path(f"aiperf_{phase.name}.log")
-                lines.append("set +e")
-                lines.append(f"{cmd} > {shlex.quote(log_file)} 2>&1")
-                lines.append("phase_status=$?")
-                lines.append("set -e")
+                run_cmd = f"{cmd} > {shlex.quote(log_file)} 2>&1"
             else:
-                lines.append("set +e")
-                lines.append(cmd)
-                lines.append("phase_status=$?")
-                lines.append("set -e")
-
-            lines.append('if [[ "$phase_status" -ne 0 ]]; then')
-            lines.append(f"  log {shlex.quote(f'AIPerf phase {phase.name} failed')}")
+                run_cmd = cmd
+            log_message = f"Running {phase.name}: {cmd}"
+            phase_lines = [
+                textwrap.dedent(
+                    f"""\
+                    rm -rf {shlex.quote(artifact_dir)}
+                    mkdir -p {shlex.quote(artifact_dir)}
+                    log {shlex.quote(log_message)}
+                    phase_status=0
+                    set +e
+                    {run_cmd}
+                    phase_status=$?
+                    set -e
+                    if [[ "$phase_status" -ne 0 ]]; then
+                      log {shlex.quote(f"AIPerf phase {phase.name} failed")}
+                    """
+                ).rstrip()
+            ]
             if not resolved_phase.continue_on_phase_failure:
-                lines.append('  exit "$phase_status"')
-            lines.append("fi")
-            lines.append('if [[ "$phase_status" -eq 0 ]]; then')
-
-            lines.append(f"  mkdir -p {shlex.quote(str(Path(report_file).parent))}")
+                phase_lines.append('  exit "$phase_status"')
+            phase_lines.extend(
+                [
+                    "fi",
+                    textwrap.dedent(
+                        f"""\
+                        if [[ "$phase_status" -eq 0 ]]; then
+                          mkdir -p {shlex.quote(str(Path(report_file).parent))}
+                        """
+                    ).rstrip(),
+                ]
+            )
             if report_source != report_file:
-                lines.append(f"  cp {shlex.quote(report_source)} {shlex.quote(report_file)}")
-            lines.append(f"  log {shlex.quote(f'AIPerf report saved to {report_file}')}")
+                phase_lines.append(f"  cp {shlex.quote(report_source)} {shlex.quote(report_file)}")
+            phase_lines.append(f"  log {shlex.quote(f'AIPerf report saved to {report_file}')}")
 
             if not single_phase and idx == len(phases) - 1:
                 final_report_file = self._runtime_result_path("aiperf_report.csv")
-                lines.append(f"  mkdir -p {shlex.quote(str(Path(final_report_file).parent))}")
+                phase_lines.append(f"  mkdir -p {shlex.quote(str(Path(final_report_file).parent))}")
                 if report_file != final_report_file:
-                    lines.append(f"  cp {shlex.quote(report_file)} {shlex.quote(final_report_file)}")
-                lines.append(f"  log {shlex.quote(f'Final AIPerf report saved to {final_report_file}')}")
+                    phase_lines.append(f"  cp {shlex.quote(report_file)} {shlex.quote(final_report_file)}")
+                phase_lines.append(f"  log {shlex.quote(f'Final AIPerf report saved to {final_report_file}')}")
+
             if not single_phase and idx < len(phases) - 1 and resolved_phase.health_check_between_phases:
-                lines.append('  if [[ -f "$AIPERF_FAILURE_MARKER" ]]; then')
-                lines.append("    log 'FATAL: failure marker found between AIPerf phases'")
-                lines.append("    exit 1")
-                lines.append("  fi")
-                lines.append(
+                health_probe_cmd = (
                     '  if ! curl -fsS -X POST "${FRONTEND_URL}/${AIPERF_ENDPOINT}" '
                     "-H 'Content-Type: application/json' "
                     '-d "{\\"model\\":\\"${AIPERF_MODEL}\\",\\"messages\\":[{\\"role\\":\\"user\\",'
                     '\\"content\\":\\"ping\\"}],\\"stream\\":false,\\"max_tokens\\":1}" '
                     ">/dev/null; then"
                 )
-                lines.append("    log 'FATAL: frontend health probe failed between AIPerf phases'")
-                lines.append("    exit 1")
-                lines.append("  fi")
-            lines.append("fi")
-            lines.append("")
+                phase_lines.extend(
+                    [
+                        '  if [[ -f "$AIPERF_FAILURE_MARKER" ]]; then',
+                        "    log 'FATAL: failure marker found between AIPerf phases'",
+                        "    exit 1",
+                        "  fi",
+                        health_probe_cmd,
+                        "    log 'FATAL: frontend health probe failed between AIPerf phases'",
+                        "    exit 1",
+                        "  fi",
+                    ]
+                )
+            phase_lines.append("fi")
+            blocks.append("\n".join(phase_lines))
 
-        return "\n".join(lines)
+        return "\n\n".join(blocks)
 
     def _prepare_aiperf_script(self) -> str | None:
         if "aiperf.sh" not in self.td.cmd_args.workloads_list:

From 2113080e426b37c04ddbbcff97ef81e2c7b55eca Mon Sep 17 00:00:00 2001
From: Ivan Podkidyshev <ipodkidyshev@nvidia.com>
Date: Mon, 1 Jun 2026 20:10:28 +0200
Subject: [PATCH 16/16] honor per-phase AIPerf setup commands

---
 .../ai_dynamo/slurm_command_gen_strategy.py   | 32 +++++++++++--------
 .../test_command_gen_strategy_slurm.py        | 12 ++++++-
 2 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
index 5a9d3320b..cc3b51273 100644
--- a/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
+++ b/src/cloudai/workloads/ai_dynamo/slurm_command_gen_strategy.py
@@ -201,10 +201,23 @@ def _resolve_aiperf_phase(self, phase: AIPerfPhase) -> AIPerf:
 
         return AIPerf.model_validate(cloudai.util.deep_merge(base_data, phase_data))
 
+    def _render_aiperf_setup_blocks(self, log_message: str, setup_cmd: str | None) -> list[str]:
+        if not setup_cmd:
+            return []
+
+        setup_argv = ["bash", "-lc", setup_cmd]
+        return [
+            textwrap.dedent(
+                f"""\
+                log {shlex.quote(f"{log_message}: {shlex.join(setup_argv)}")}
+                {shlex.join(setup_argv)}
+                """
+            ).rstrip()
+        ]
+
     def _render_aiperf_script(self) -> str:
         phases = self.td.cmd_args.aiperf_phases or [AIPerfPhase.model_validate({"name": "aiperf"})]
         single_phase = len(phases) == 1
-        setup_cmd = self._resolve_aiperf_phase(phases[0]).setup_cmd
         blocks = [
             textwrap.dedent(
                 f"""\
@@ -221,16 +234,7 @@ def _render_aiperf_script(self) -> str:
             ).rstrip()
         ]
 
-        if setup_cmd:
-            setup_argv = ["bash", "-lc", setup_cmd]
-            blocks.append(
-                textwrap.dedent(
-                    f"""\
-                    log {shlex.quote(f"Running aiperf setup: {shlex.join(setup_argv)}")}
-                    {shlex.join(setup_argv)}
-                    """
-                ).rstrip()
-            )
+        blocks.extend(self._render_aiperf_setup_blocks("Running aiperf setup", self.td.cmd_args.aiperf.setup_cmd))
 
         write_phase_logs = not single_phase
         for idx, phase in enumerate(phases):
@@ -252,7 +256,9 @@ def _render_aiperf_script(self) -> str:
             else:
                 run_cmd = cmd
             log_message = f"Running {phase.name}: {cmd}"
-            phase_lines = [
+            phase_setup = phase.setup_cmd if "setup_cmd" in phase.model_fields_set else None
+            phase_lines = self._render_aiperf_setup_blocks(f"Running AIPerf phase setup for {phase.name}", phase_setup)
+            phase_lines.append(
                 textwrap.dedent(
                     f"""\
                     rm -rf {shlex.quote(artifact_dir)}
@@ -267,7 +273,7 @@ def _render_aiperf_script(self) -> str:
                       log {shlex.quote(f"AIPerf phase {phase.name} failed")}
                     """
                 ).rstrip()
-            ]
+            )
             if not resolved_phase.continue_on_phase_failure:
                 phase_lines.append('  exit "$phase_status"')
             phase_lines.extend(
diff --git a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
index 2c353dc91..46a10906b 100644
--- a/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
+++ b/tests/workloads/ai_dynamo/test_command_gen_strategy_slurm.py
@@ -236,14 +236,24 @@ def test_gen_script_args_writes_resolved_aiperf_script(strategy: AIDynamoSlurmCo
     )
     td.cmd_args.aiperf_phases = [
         AIPerfPhase.model_validate({"name": "round_1", "args": {"concurrency": 1}}),
-        AIPerfPhase.model_validate({"name": "round_2", "args": {"request-count": 10}}),
+        AIPerfPhase.model_validate(
+            {
+                "name": "round_2",
+                "setup-cmd": "python -m pip install --upgrade another-aiperf-plugin",
+                "args": {"request-count": 10},
+            }
+        ),
     ]
 
     result = strategy._gen_script_args(td)
 
     assert f"--aiperf-script {strategy.CONTAINER_MOUNT_OUTPUT}/aiperf.sh" in result
     script = (strategy.test_run.output_path / "aiperf.sh").read_text()
+    assert script.count("Running aiperf setup:") == 1
     assert "bash -lc 'python -m pip install --upgrade aiperf'" in script
+    assert "Running AIPerf phase setup for round_1" not in script
+    assert "Running AIPerf phase setup for round_2" in script
+    assert "bash -lc 'python -m pip install --upgrade another-aiperf-plugin'" in script
     assert ': "${FRONTEND_URL:?FRONTEND_URL is not set}"' in script
     assert '--url "$FRONTEND_URL"' in script
     assert f"--artifact-dir {strategy.CONTAINER_MOUNT_OUTPUT}/aiperf_artifacts/round_1" in script