From 2d532b448e820fc3270325d60bad4f7f5ee682cd Mon Sep 17 00:00:00 2001 From: Sydney Firmin Date: Thu, 9 Apr 2026 12:29:29 -0700 Subject: [PATCH] ci(benchmarks): move Devstral nightly tests to H100 Signed-off-by: Sydney Firmin --- .github/workflows/nightly-benchmark.yml | 1 + e2e_test/benchmarks/test_nightly_perf.py | 15 ++++++++++++++- e2e_test/infra/model_specs.py | 9 +++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/.github/workflows/nightly-benchmark.yml b/.github/workflows/nightly-benchmark.yml index fba3023ad..da93ca298 100644 --- a/.github/workflows/nightly-benchmark.yml +++ b/.github/workflows/nightly-benchmark.yml @@ -121,6 +121,7 @@ jobs: - { id: Qwen/Qwen2.5-7B-Instruct, slug: Qwen-Qwen2.5-7B-Instruct, test_class: TestNightlyQwen7bSingle } - { id: Qwen/Qwen3-30B-A3B, slug: Qwen-Qwen3-30B-A3B, test_class: TestNightlyQwen30bSingle } - { id: openai/gpt-oss-20b, slug: openai-gpt-oss-20b, test_class: TestNightlyGptOss20bSingle } + - { id: mistralai/Devstral-2-123B-Instruct-2512, slug: mistralai-Devstral-2-123B-Instruct-2512, test_class: TestNightlyDevstral2Single } - { id: meta-llama/Llama-4-Scout-17B-16E-Instruct, slug: meta-llama-Llama-4-Scout-17B-16E-Instruct, test_class: TestNightlyLlama4ScoutSingle } - { id: meta-llama/Llama-3.3-70B-Instruct, slug: meta-llama-Llama-3.3-70B-Instruct, test_class: TestNightlyLlama70bSingle } - { id: RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic, slug: RedHatAI-Llama-3.3-70B-Instruct-FP8-dynamic, test_class: TestNightlyLlama70bFp8Single } diff --git a/e2e_test/benchmarks/test_nightly_perf.py b/e2e_test/benchmarks/test_nightly_perf.py index 51b3d290a..9312aa56e 100644 --- a/e2e_test/benchmarks/test_nightly_perf.py +++ b/e2e_test/benchmarks/test_nightly_perf.py @@ -102,6 +102,13 @@ def _run_nightly(setup_backend, genai_bench_runner, model_id, worker_count=1, ** ("Qwen/Qwen3-30B-A3B", "Qwen30b", 4, ["http", "grpc"], {}), ("openai/gpt-oss-20b", "GptOss20b", 1, ["http", "grpc"], {}), ("minimaxai/minimax-m2", "MinimaxM2", 1, ["http", "grpc"], {}), + ( + "mistralai/Devstral-2-123B-Instruct-2512", + "Devstral2", + 1, + ["http", "grpc"], + {}, + ), ( "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "Llama4Maverick", @@ -132,6 +139,9 @@ def _run_nightly(setup_backend, genai_bench_runner, model_id, worker_count=1, ** ), ] +_SINGLE_ONLY_NIGHTLY_MODELS = { + "mistralai/Devstral-2-123B-Instruct-2512", +} # --------------------------------------------------------------------------- # Dynamic test class generation @@ -163,7 +173,10 @@ def test_nightly_perf(self, setup_backend, genai_bench_runner): for _model_id, _name, _multi_workers, _backends, _extra in _NIGHTLY_MODELS: - for _suffix, _count in [("Single", 1), ("Multi", _multi_workers)]: + _variants = [("Single", 1)] + if _model_id not in _SINGLE_ONLY_NIGHTLY_MODELS: + _variants.append(("Multi", _multi_workers)) + for _suffix, _count in _variants: _cls_name = f"TestNightly{_name}{_suffix}" _cls = _make_test_class(_model_id, _count, _backends, _extra) _cls.__name__ = _cls_name diff --git a/e2e_test/infra/model_specs.py b/e2e_test/infra/model_specs.py index 3fd1eb2cd..b8b0a4e01 100644 --- a/e2e_test/infra/model_specs.py +++ b/e2e_test/infra/model_specs.py @@ -114,6 +114,15 @@ def _resolve_model_path(hf_path: str) -> str: "sglang_args": ["--trust-remote-code"], "vllm_args": ["--trust-remote-code"], }, + # Devstral 2 123B - Nightly benchmarks + "mistralai/Devstral-2-123B-Instruct-2512": { + "model": _resolve_model_path("mistralai/Devstral-2-123B-Instruct-2512"), + "tp": 4, + "features": ["chat", "streaming", "function_calling", "reasoning"], + "startup_timeout": 1200, + "sglang_args": ["--trust-remote-code"], + "vllm_args": ["--trust-remote-code"], + }, # Vision-language model for multimodal benchmarks (MMMU) "Qwen/Qwen3-VL-8B-Instruct": { "model": _resolve_model_path("Qwen/Qwen3-VL-8B-Instruct"),