From 9b1e8e7e903508dec6e9b69abd9c07e0979fc1bb Mon Sep 17 00:00:00 2001 From: Gil Levkovich <69595609+glevkovich@users.noreply.github.com> Date: Mon, 1 Jun 2026 17:08:32 +0700 Subject: [PATCH] perf(facade): batch V2 control-path replies in ProcessControlMessages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When ProcessControlMessages wakes with multiple messages queued, each std::visit call triggers FinishScope() -> Flush(), producing one sendmsg syscall per message (PubSub, Monitor, Invalidation). Enable batch mode when dispatch_q_ holds more than one message, so all replies in the drain pass accumulate in the send buffer and flush together at the main loop's idle-await point. Single-message wakeups are unaffected: SetBatchMode(false) leaves FinishScope() flushing immediately, preserving p=1 latency. Cross-machine benchmark (10 subscribers, 4 threads): PIPELINE RPS Δ SYSCALLS Δ 1 +3% -24% 10 +12% -68% 100 -1.4% -69% 500 -2% -70% RPS is publisher-bound at p>=100; the syscall reduction is the real gain — freeing kernel and NIC resources under fan-out load. Signed-off-by: Gil Levkovich <69595609+glevkovich@users.noreply.github.com> --- src/facade/dragonfly_connection.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/facade/dragonfly_connection.cc b/src/facade/dragonfly_connection.cc index 77b479383b3c..6ae2146636b4 100644 --- a/src/facade/dragonfly_connection.cc +++ b/src/facade/dragonfly_connection.cc @@ -1574,6 +1574,12 @@ bool Connection::ProcessControlMessages(uint32_t quota) { // PubSub replies flush immediately via FinishScope() only when batched_ is false. // ReplyBatch() and ExecuteBatch() both reset it via absl::Cleanup guards on all return paths. DCHECK(!reply_builder_->IsBatchMode()); + + // Batch control-message replies when multiple are queued, + // avoiding per-message sendmsg syscalls while preserving latency for single-message wakeups. + reply_builder_->SetBatchMode(dispatch_q_.size() > 1); + absl::Cleanup batch_guard = [this] { reply_builder_->SetBatchMode(false); }; + uint32_t dispatched = 0; while (!dispatch_q_.empty()) {