Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions conf/experimental/ai_dynamo/test/sglang.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ name = "sglang"
description = "sglang backend"
test_template_name = "AIDynamo"
extra_container_mounts = ["/run/udev:/run/udev"]
dse_excluded_args = ["cmd_args.aiperf_phases"]

[cmd_args]
docker_image_url = "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.1.1"
Expand Down Expand Up @@ -88,6 +89,20 @@ workloads = "aiperf.sh"
request-count = 50
synthetic-input-tokens-mean = 300

[[cmd_args.aiperf_phases]]
name = "round_1"

[cmd_args.aiperf_phases.args]
concurrency = 2
request-count = 50

[[cmd_args.aiperf_phases]]
name = "round_2"

[cmd_args.aiperf_phases.args]
concurrency = 4
request-count = 50

[cmd_args.aiperf_accuracy]
entrypoint = "aiperf profile"
setup-cmd = "python -m pip install --break-system-packages --ignore-installed blinker==1.9.0 && python -m pip install --break-system-packages --upgrade aiperf==0.8.0"
Expand Down
21 changes: 21 additions & 0 deletions conf/experimental/ai_dynamo/test/vllm.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ name = "vLLM"
description = "vLLM backend"
test_template_name = "AIDynamo"
extra_container_mounts = ["/run/udev:/run/udev"]
dse_excluded_args = ["cmd_args.aiperf_phases"]

[cmd_args]
docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1"
Expand Down Expand Up @@ -73,13 +74,33 @@ workloads = "aiperf.sh"
concurrency = 2

[cmd_args.aiperf]
health-check-between-phases = true
continue-on-phase-failure = false
[cmd_args.aiperf.args]
concurrency = 2
endpoint-type = "chat"
extra-inputs = '{"min_tokens":10}'
output-tokens-mean = 500
request-count = 50
server-metrics = "auto"
streaming = true
synthetic-input-tokens-mean = 300

[[cmd_args.aiperf_phases]]
name = "round_1"

[cmd_args.aiperf_phases.args]
concurrency = 2
request-count = 50

[[cmd_args.aiperf_phases]]
name = "round_2"

[cmd_args.aiperf_phases.args]
concurrency = 4
request-count = 50
streaming = false

[cmd_args.aiperf_accuracy]
entrypoint = "aiperf profile"
setup-cmd = "python -m pip install --break-system-packages --upgrade aiperf==0.8.0"
Expand Down
19 changes: 18 additions & 1 deletion conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@ description = "Self-contained AIDynamo scenario wiring vLLM disaggregated infere
test_template_name = "AIDynamo"
time_limit = "00:10:00"
extra_container_mounts = ["/run/udev:/run/udev"]
dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"]
dse_excluded_args = [
"cmd_args.lmcache.lmcache_worker_ports",
"cmd_args.aiperf_phases",
]

[Tests.cmd_args]
docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1"
Expand Down Expand Up @@ -90,6 +93,20 @@ dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"]
request-count = 50
synthetic-input-tokens-mean = 300

[[Tests.cmd_args.aiperf_phases]]
name = "round_1"

[Tests.cmd_args.aiperf_phases.args]
concurrency = 2
request-count = 50

[[Tests.cmd_args.aiperf_phases]]
name = "round_2"

[Tests.cmd_args.aiperf_phases.args]
concurrency = 4
request-count = 50

[Tests.cmd_args.aiperf_accuracy]
entrypoint = "aiperf profile"
setup-cmd = "python -m pip install --break-system-packages --upgrade aiperf==0.8.0"
Expand Down
31 changes: 31 additions & 0 deletions conf/experimental/ai_dynamo/test_scenario/vllm_slurm.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,29 @@ time_limit = "00:10:00"
tensor-parallel-size = 4
pipeline-parallel-size = 1

[[Tests.cmd_args.aiperf_phases]]
name = "round_1"
[Tests.cmd_args.aiperf_phases.args]
concurrency = 2
request-count = 50
server-metrics = "auto"

[[Tests.cmd_args.aiperf_phases]]
name = "round_2"
[Tests.cmd_args.aiperf_phases.args]
concurrency = 4
request-count = 50

[[Tests]]
id = "test.disagg.multinode"
test_name = "vLLM"
time_limit = "00:10:00"

[Tests.cmd_args]
[Tests.cmd_args.dynamo.dcgm_exporter]
enabled = true
docker-image-url = "nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless"

[Tests.cmd_args.dynamo.prefill_worker]
num-nodes = 2
[Tests.cmd_args.dynamo.prefill_worker.args]
Expand All @@ -53,3 +70,17 @@ time_limit = "00:10:00"
[Tests.cmd_args.dynamo.decode_worker.args]
tensor-parallel-size = 4
pipeline-parallel-size = 1

[[Tests.cmd_args.aiperf_phases]]
name = "round_1"
[Tests.cmd_args.aiperf_phases.args]
concurrency = 4
request-count = 50
server-metrics = "auto"

[[Tests.cmd_args.aiperf_phases]]
name = "round_2"
[Tests.cmd_args.aiperf_phases.args]
concurrency = 8
request-count = 50
server-metrics = "auto"
4 changes: 4 additions & 0 deletions src/cloudai/workloads/ai_dynamo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
AIDynamoTestDefinition,
AIPerf,
AIPerfAccuracy,
AIPerfPhase,
DCGMExporter,
GenAIPerf,
LMCacheController,
WorkerBaseArgs,
Expand All @@ -42,6 +44,8 @@
"AIDynamoTestDefinition",
"AIPerf",
"AIPerfAccuracy",
"AIPerfPhase",
"DCGMExporter",
"GenAIPerf",
"LMCacheController",
"WorkerBaseArgs",
Expand Down
100 changes: 99 additions & 1 deletion src/cloudai/workloads/ai_dynamo/ai_dynamo.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,20 @@ class WorkerConfig(BaseModel):
)


class DCGMExporter(BaseModel):
"""Optional DCGM exporter launch configuration."""

model_config = ConfigDict(extra="forbid", populate_by_name=True)

enabled: bool = False
docker_image_url: str = Field(
default="nvcr.io/nvidia/k8s/dcgm-exporter:4.5.2-4.8.1-distroless",
serialization_alias="docker-image-url",
validation_alias=AliasChoices("docker-image-url", "docker_image_url", "image-url", "image_url"),
)
port: int = 9401


class AIDynamoArgs(BaseModel):
"""Arguments for AI Dynamo setup."""

Expand Down Expand Up @@ -205,6 +219,7 @@ def validate_connector(cls, v: str | list[str] | None) -> str | list[str] | None
serialization_alias="nats-port",
validation_alias=AliasChoices("nats-port", "nats_port"),
)
dcgm_exporter: DCGMExporter = Field(default_factory=DCGMExporter)

decode_worker: WorkerConfig = WorkerConfig(
cmd="python3 -m dynamo.vllm",
Expand Down Expand Up @@ -264,11 +279,62 @@ class AIPerf(Workload):
serialization_alias="report-name",
validation_alias=AliasChoices("report-name", "report_name"),
)
artifact_dir_name: str = Field(
default=AIPERF_ARTIFACTS_DIR,
serialization_alias="artifact-dir-name",
validation_alias=AliasChoices("artifact-dir-name", "artifact_dir_name"),
)
health_check_between_phases: bool = Field(
default=True,
serialization_alias="health-check-between-phases",
validation_alias=AliasChoices("health-check-between-phases", "health_check_between_phases"),
)
continue_on_phase_failure: bool = Field(
default=False,
serialization_alias="continue-on-phase-failure",
validation_alias=AliasChoices("continue-on-phase-failure", "continue_on_phase_failure"),
)

@property
def installables(self) -> list[Installable]:
return [self.script]

@model_validator(mode="after")
def validate_extra_args(self) -> "AIPerf":
if isinstance(self.extra_args, list):
raise ValueError("AIPerf extra_args must be a string with explicit CLI syntax")
return self


class AIPerfPhase(BaseModel):
"""Named AIPerf phase that overrides the base AIPerf configuration."""

model_config = ConfigDict(extra="allow", populate_by_name=True)

name: str = Field(..., min_length=1, pattern=r"^[A-Za-z0-9_.-]+$")
cmd: str | None = None
setup_cmd: str | None = Field(
default=None,
serialization_alias="setup-cmd",
validation_alias=AliasChoices("setup-cmd", "setup_cmd"),
)
report_name: str | None = Field(
default=None,
serialization_alias="report-name",
validation_alias=AliasChoices("report-name", "report_name"),
)
artifact_dir_name: str | None = Field(
default=None,
serialization_alias="artifact-dir-name",
validation_alias=AliasChoices("artifact-dir-name", "artifact_dir_name"),
)
args: Args = Field(default_factory=Args)
extra_args: str | None = Field(
default=None,
serialization_alias="extra-args",
validation_alias=AliasChoices("extra-args", "extra_args"),
)


class AIPerfAccuracy(BaseModel):
"""Optional accuracy benchmark configuration."""
Expand Down Expand Up @@ -324,6 +390,7 @@ class AIDynamoCmdArgs(CmdArgs):
lmcache_controller: LMCacheController | None = None
genai_perf: GenAIPerf = Field(default_factory=GenAIPerf)
aiperf: AIPerf = Field(default_factory=AIPerf)
aiperf_phases: list[AIPerfPhase] | None = None
aiperf_accuracy: AIPerfAccuracy | None = None
workloads: str = "genai_perf.sh"

Expand All @@ -341,6 +408,23 @@ def validate_workloads(cls, v: str) -> str:
def workloads_list(self) -> list[str]:
return [w.strip() for w in self.workloads.split(",")]

@model_validator(mode="after")
def validate_aiperf_phases(self) -> "AIDynamoCmdArgs":
"""Validate AIPerf phases."""
if not self.aiperf_phases:
return self

seen = set()
duplicates = set()
for phase in self.aiperf_phases:
if phase.name in seen:
duplicates.add(phase.name)
seen.add(phase.name)
if duplicates:
raise ValueError(f"AIPerf phase names must be unique. Duplicates: {sorted(duplicates)}")

return self

@property
def installables(self) -> list[Installable]:
return [
Expand All @@ -356,6 +440,7 @@ class AIDynamoTestDefinition(TestDefinition):
model_config = ConfigDict(extra="forbid")
cmd_args: AIDynamoCmdArgs
_docker_image: Optional[DockerImage] = None
_dcgm_exporter_image: Optional[DockerImage] = None
script: File = File(Path(__file__).parent.parent / "ai_dynamo/ai_dynamo.sh")
repo: GitRepo = GitRepo(
url="https://github.com/ai-dynamo/dynamo.git", commit="f7e468c7e8ff0d1426db987564e60572167e8464"
Expand Down Expand Up @@ -389,6 +474,16 @@ def docker_image(self) -> DockerImage:
self._docker_image = DockerImage(url=self.cmd_args.docker_image_url)
return self._docker_image

@property
def dcgm_exporter_image(self) -> DockerImage | None:
if not self.cmd_args.dynamo.dcgm_exporter.enabled:
return None

image_url = self.cmd_args.dynamo.dcgm_exporter.docker_image_url
if not self._dcgm_exporter_image or self._dcgm_exporter_image.url != image_url:
self._dcgm_exporter_image = DockerImage(url=image_url)
return self._dcgm_exporter_image

@property
def hf_model(self) -> HFModel:
if not self._hf_model:
Expand All @@ -399,13 +494,16 @@ def hf_model(self) -> HFModel:
@property
def installables(self) -> list[Installable]:
"""Get all installables for this test definition."""
return [
installables = [
self.docker_image,
self.repo,
self.script,
self.hf_model,
*self.cmd_args.installables,
]
if self.dcgm_exporter_image:
installables.append(self.dcgm_exporter_image)
return installables

def _has_aiperf_accuracy_results(self, output_path: Path) -> bool:
accuracy = parse_aiperf_accuracy(output_path)
Expand Down
40 changes: 40 additions & 0 deletions src/cloudai/workloads/ai_dynamo/ai_dynamo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ dynamo_args["worker-error-pattern"]="zmq.error.ZMQError:.Address.already.in.use|
dynamo_args["sgl-http-port"]=9001
dynamo_args["prefill-port"]=30011
dynamo_args["decode-port"]=30021
dynamo_args["dcgm-exporter-enabled"]="False"
dynamo_args["dcgm-exporter-port"]=9401

function log()
{
Expand Down Expand Up @@ -892,6 +894,39 @@ _query_frontend() {
curl -s -X POST "${dynamo_args["url"]}/v1/chat/completions" -H "Content-Type: application/json" -d @$RESULTS_DIR/curl_cmd.json
}

_resolve_aiperf_server_metrics_urls() {
local urls="http://${dynamo_args["frontend-node"]}:${dynamo_args["port"]}/metrics"
local base_system_port=${DYN_SYSTEM_PORT:-9090}
local decode_workers_per_node=${decode_config["workers-per-node"]:-1}
local prefill_workers_per_node=${prefill_config["workers-per-node"]:-1}
local IFS_SAVE="$IFS"
local node i

IFS=','
for node in ${prefill_config["node-list"]:-}; do
for i in $(seq 0 $(( prefill_workers_per_node - 1 ))); do
urls="${urls},http://${node}:$((base_system_port + i))/metrics"
done
done

for node in ${decode_config["node-list"]:-}; do
for i in $(seq 0 $(( decode_workers_per_node - 1 ))); do
urls="${urls},http://${node}:$((base_system_port + i))/metrics"
done
done

if [[ "${dynamo_args["dcgm-exporter-enabled"]}" == "True" || "${dynamo_args["dcgm-exporter-enabled"]}" == "true" ]]; then
local dcgm_nodes="${decode_config["node-list"]:-},${prefill_config["node-list"]:-}"
for node in $dcgm_nodes; do
[[ -z "$node" ]] && continue
urls="${urls},http://${node}:${dynamo_args["dcgm-exporter-port"]}/metrics"
done
fi
IFS="$IFS_SAVE"

echo "$urls"
}

function setup_cufile()
{
export CUFILE_ENV_PATH_JSON="$RESULTS_DIR/cufile.json"
Expand Down Expand Up @@ -1058,6 +1093,11 @@ function launch_workload()

local workload_name="${workload_config_ref["--name"]}"
local script="${workload_config_ref["--script"]}"
export FRONTEND_URL="${dynamo_args["url"]}"
export AIPERF_MODEL="${dynamo_args["model"]}"
export AIPERF_ENDPOINT="${dynamo_args["endpoint"]}"
export AIPERF_FAILURE_MARKER="${FATAL_ERROR_MARKER}"
export AIPERF_SERVER_METRICS_URLS="$(_resolve_aiperf_server_metrics_urls)"

# Build config and workload args as proper bash arrays to preserve
# multi-word values (e.g. --cmd "genai-perf profile") through word splitting.
Expand Down
Loading
Loading