From 4b5a1aef583443a6ff9b2725ab42fcae458bb515 Mon Sep 17 00:00:00 2001 From: Sergei Isaev <48261488+s-isaev@users.noreply.github.com> Date: Thu, 28 May 2026 14:42:29 +0200 Subject: [PATCH 1/2] [vllm] reset all caches after weight updates --- verl/workers/rollout/vllm_rollout/vllm_async_server.py | 8 ++++++++ verl/workers/rollout/vllm_rollout/vllm_rollout.py | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/verl/workers/rollout/vllm_rollout/vllm_async_server.py b/verl/workers/rollout/vllm_rollout/vllm_async_server.py index d4194e1ce55..6dccf0b0305 100644 --- a/verl/workers/rollout/vllm_rollout/vllm_async_server.py +++ b/verl/workers/rollout/vllm_rollout/vllm_async_server.py @@ -641,6 +641,14 @@ async def clear_kv_cache(self): # is a no-op success, so we can pass it unconditionally. await self.engine.reset_prefix_cache(**_RESET_PREFIX_CACHE_KWARGS) + async def clear_all_caches(self): + await self.clear_kv_cache() + if self.node_rank == 0: + if _VLLM_VERSION >= version.parse("0.9.0"): + await self.engine.reset_mm_cache() + if _VLLM_VERSION >= version.parse("0.16.0"): + await self.engine.reset_encoder_cache() + async def release_kv_cache(self): """Release only kv_cache GPU memory, keeping model weights intact. # TODO: support true release of kv_cache diff --git a/verl/workers/rollout/vllm_rollout/vllm_rollout.py b/verl/workers/rollout/vllm_rollout/vllm_rollout.py index fffe57565d0..698f022bd13 100644 --- a/verl/workers/rollout/vllm_rollout/vllm_rollout.py +++ b/verl/workers/rollout/vllm_rollout/vllm_rollout.py @@ -190,9 +190,9 @@ async def update_weights( if future is not None: await future - # reset prefix cache after updating weights + # reset all caches after updating weights if self.rollout_rank == 0: - await self.server_handle.clear_kv_cache.remote() + await self.server_handle.clear_all_caches.remote() if global_steps is not None: await self.server_handle.set_global_steps.remote(global_steps) From 8fcb2f60fd458d58b2eac56ebab0bdb195fb274c Mon Sep 17 00:00:00 2001 From: Sergei Isaev <48261488+s-isaev@users.noreply.github.com> Date: Fri, 29 May 2026 14:00:34 +0200 Subject: [PATCH 2/2] [vllm] address cache reset review comment --- verl/workers/rollout/vllm_rollout/vllm_async_server.py | 3 --- verl/workers/rollout/vllm_rollout/vllm_rollout.py | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/verl/workers/rollout/vllm_rollout/vllm_async_server.py b/verl/workers/rollout/vllm_rollout/vllm_async_server.py index 6dccf0b0305..3b24590410e 100644 --- a/verl/workers/rollout/vllm_rollout/vllm_async_server.py +++ b/verl/workers/rollout/vllm_rollout/vllm_async_server.py @@ -641,9 +641,6 @@ async def clear_kv_cache(self): # is a no-op success, so we can pass it unconditionally. await self.engine.reset_prefix_cache(**_RESET_PREFIX_CACHE_KWARGS) - async def clear_all_caches(self): - await self.clear_kv_cache() - if self.node_rank == 0: if _VLLM_VERSION >= version.parse("0.9.0"): await self.engine.reset_mm_cache() if _VLLM_VERSION >= version.parse("0.16.0"): diff --git a/verl/workers/rollout/vllm_rollout/vllm_rollout.py b/verl/workers/rollout/vllm_rollout/vllm_rollout.py index 698f022bd13..052abaf8664 100644 --- a/verl/workers/rollout/vllm_rollout/vllm_rollout.py +++ b/verl/workers/rollout/vllm_rollout/vllm_rollout.py @@ -190,9 +190,9 @@ async def update_weights( if future is not None: await future - # reset all caches after updating weights + # reset caches after updating weights if self.rollout_rank == 0: - await self.server_handle.clear_all_caches.remote() + await self.server_handle.clear_kv_cache.remote() if global_steps is not None: await self.server_handle.set_global_steps.remote(global_steps)