diff --git a/verl/workers/rollout/vllm_rollout/vllm_async_server.py b/verl/workers/rollout/vllm_rollout/vllm_async_server.py index d4194e1ce55..3b24590410e 100644 --- a/verl/workers/rollout/vllm_rollout/vllm_async_server.py +++ b/verl/workers/rollout/vllm_rollout/vllm_async_server.py @@ -641,6 +641,11 @@ async def clear_kv_cache(self): # is a no-op success, so we can pass it unconditionally. await self.engine.reset_prefix_cache(**_RESET_PREFIX_CACHE_KWARGS) + if _VLLM_VERSION >= version.parse("0.9.0"): + await self.engine.reset_mm_cache() + if _VLLM_VERSION >= version.parse("0.16.0"): + await self.engine.reset_encoder_cache() + async def release_kv_cache(self): """Release only kv_cache GPU memory, keeping model weights intact. # TODO: support true release of kv_cache diff --git a/verl/workers/rollout/vllm_rollout/vllm_rollout.py b/verl/workers/rollout/vllm_rollout/vllm_rollout.py index fffe57565d0..052abaf8664 100644 --- a/verl/workers/rollout/vllm_rollout/vllm_rollout.py +++ b/verl/workers/rollout/vllm_rollout/vllm_rollout.py @@ -190,7 +190,7 @@ async def update_weights( if future is not None: await future - # reset prefix cache after updating weights + # reset caches after updating weights if self.rollout_rank == 0: await self.server_handle.clear_kv_cache.remote() if global_steps is not None: