Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion mistralrs-cli/src/commands/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use mistralrs_core::{
};
use mistralrs_server_core::mistralrs_for_server_builder::MistralRsForServerBuilder;
use std::sync::Arc;
use std::time::Instant;
use std::time::{Duration, Instant};
use tokio::sync::mpsc::channel;
use tracing::info;

Expand Down Expand Up @@ -102,6 +102,14 @@ pub async fn run_bench(
}
info!("Warmup complete.");

// Flush KV state from warmup. TerminateAllSeqsNextStep sets a global
// AtomicBool; the engine reads and acts on it at the top of its next
// scheduler iteration. The sleep yields to allow that iteration to
// complete before the next benchmark request enters the channel.
let sender = mistralrs.get_sender(None).unwrap();
let _ = sender.send(mistralrs_core::Request::TerminateAllSeqsNextStep).await;
tokio::time::sleep(Duration::from_millis(50)).await;

// Reset logger counters so benchmark stats are clean
if let Ok(logger) = mistralrs.get_logger(None) {
logger.reset();
Expand Down Expand Up @@ -142,6 +150,13 @@ pub async fn run_bench(
let ms_per_tok = 1000.0 / tok_per_sec;
decode_results.push((tok_per_sec, ms_per_tok));
}

// Flush KV state between iterations. Same mechanism as the post-warmup
// flush above: set the flag then sleep so the engine gets one full loop
// iteration to process the termination before the next run begins.
let sender = mistralrs.get_sender(None).unwrap();
let _ = sender.send(mistralrs_core::Request::TerminateAllSeqsNextStep).await;
tokio::time::sleep(Duration::from_millis(50)).await;
}

// Calculate statistics
Expand Down
Loading