From 2e3009cbca24186fe7e671a0f8f8c517acf68410 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 21 May 2026 12:13:46 +0000 Subject: [PATCH 1/6] Initial plan From c3d074e613197139e65016d15ae63398de02abd4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 21 May 2026 12:24:27 +0000 Subject: [PATCH 2/6] Use SharedMemory APIs for SHM transport Agent-Logs-Url: https://github.com/hlin99/LMCache/sessions/5885bfad-28e5-4d4e-8ef5-c6fe1d4b89ae Co-authored-by: hlin99 <73271530+hlin99@users.noreply.github.com> --- lmcache/v1/distributed/memory_manager.py | 26 ++++++++++------- .../v1/multiprocess/non_gpu_context_shm.py | 28 ++++++------------- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/lmcache/v1/distributed/memory_manager.py b/lmcache/v1/distributed/memory_manager.py index 048dee3d246..8da552ae31c 100644 --- a/lmcache/v1/distributed/memory_manager.py +++ b/lmcache/v1/distributed/memory_manager.py @@ -1,8 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 # Standard -import os +from multiprocessing import shared_memory import shutil +import sys # First Party from lmcache.logging import init_logger @@ -28,13 +29,16 @@ def _unlink_stale_shm(shm_name: str) -> None: return if not normalized.startswith("lmcache_l1_pool_"): return - shm_path = os.path.join("/dev/shm", normalized) try: - os.unlink(shm_path) + shm = shared_memory.SharedMemory(name=normalized, create=False) + shm.close() + shm.unlink() except FileNotFoundError: return except OSError: - logger.warning("Failed to remove stale shm segment %s", shm_path, exc_info=True) + logger.warning( + "Failed to remove stale shm segment %s", normalized, exc_info=True + ) def create_memory_allocator(config: L1MemoryManagerConfig) -> MemoryAllocatorInterface: @@ -73,12 +77,14 @@ def create_memory_allocator(config: L1MemoryManagerConfig) -> MemoryAllocatorInt if not bare.startswith("lmcache_l1_pool_"): shm_name = f"lmcache_l1_pool_{bare}" try: - free_bytes = shutil.disk_usage("/dev/shm").free - if free_bytes < config.size_in_bytes: - raise RuntimeError( - "insufficient /dev/shm capacity: " - f"need {config.size_in_bytes} bytes, have {free_bytes} bytes" - ) + if sys.platform == "linux": + free_bytes = shutil.disk_usage("/dev/shm").free + if free_bytes < config.size_in_bytes: + raise RuntimeError( + "insufficient /dev/shm capacity: " + f"need {config.size_in_bytes} bytes, " + f"have {free_bytes} bytes" + ) _unlink_stale_shm(shm_name) return MixedMemoryAllocator( config.size_in_bytes, diff --git a/lmcache/v1/multiprocess/non_gpu_context_shm.py b/lmcache/v1/multiprocess/non_gpu_context_shm.py index d42a8031eba..6f458171be9 100644 --- a/lmcache/v1/multiprocess/non_gpu_context_shm.py +++ b/lmcache/v1/multiprocess/non_gpu_context_shm.py @@ -2,9 +2,8 @@ """Shared-memory NonGpuContext implementation for multiprocess mode.""" # Standard +from multiprocessing import shared_memory from typing import Any -import mmap -import os # Third Party import torch @@ -16,8 +15,6 @@ ) from lmcache.v1.multiprocess.protocol import RequestType, get_response_class -INVALID_SHM_FD = -1 - class NonGpuContextShm(NonGpuContext): """Shared-memory implementation of :class:`NonGpuContext`.""" @@ -36,17 +33,10 @@ def __init__( self._shm_name = shm_name self._pool_size = pool_size - self._shm_fd = INVALID_SHM_FD - shm_path = os.path.join("/dev/shm", shm_name.lstrip("/")) - self._shm_fd = os.open(shm_path, os.O_RDWR) - try: - self._mmap_obj = mmap.mmap( - self._shm_fd, self._pool_size, access=mmap.ACCESS_WRITE - ) - except Exception: - os.close(self._shm_fd) - self._shm_fd = INVALID_SHM_FD - raise + self._shm: shared_memory.SharedMemory | None = shared_memory.SharedMemory( + name=shm_name.lstrip("/"), create=False + ) + self._mmap_obj = self._shm.buf def _make_tensor_view( self, @@ -150,11 +140,9 @@ def commit_retrieve(self, key: Any, instance_id: int) -> bool: return False def close(self) -> None: - if self._shm_fd == INVALID_SHM_FD: + if self._shm is None: return try: - self._mmap_obj.close() + self._shm.close() finally: - fd = self._shm_fd - self._shm_fd = INVALID_SHM_FD - os.close(fd) + self._shm = None From 55c61b1fde9bc33f18d8c63e8d07b2787eeda189 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 21 May 2026 12:27:30 +0000 Subject: [PATCH 3/6] Polish SharedMemory transport follow-ups Agent-Logs-Url: https://github.com/hlin99/LMCache/sessions/5885bfad-28e5-4d4e-8ef5-c6fe1d4b89ae Co-authored-by: hlin99 <73271530+hlin99@users.noreply.github.com> --- lmcache/v1/distributed/memory_manager.py | 2 +- lmcache/v1/multiprocess/non_gpu_context_shm.py | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/lmcache/v1/distributed/memory_manager.py b/lmcache/v1/distributed/memory_manager.py index 8da552ae31c..5f19474de78 100644 --- a/lmcache/v1/distributed/memory_manager.py +++ b/lmcache/v1/distributed/memory_manager.py @@ -77,7 +77,7 @@ def create_memory_allocator(config: L1MemoryManagerConfig) -> MemoryAllocatorInt if not bare.startswith("lmcache_l1_pool_"): shm_name = f"lmcache_l1_pool_{bare}" try: - if sys.platform == "linux": + if sys.platform.startswith("linux"): free_bytes = shutil.disk_usage("/dev/shm").free if free_bytes < config.size_in_bytes: raise RuntimeError( diff --git a/lmcache/v1/multiprocess/non_gpu_context_shm.py b/lmcache/v1/multiprocess/non_gpu_context_shm.py index 6f458171be9..3c6d6ba1bed 100644 --- a/lmcache/v1/multiprocess/non_gpu_context_shm.py +++ b/lmcache/v1/multiprocess/non_gpu_context_shm.py @@ -33,10 +33,10 @@ def __init__( self._shm_name = shm_name self._pool_size = pool_size - self._shm: shared_memory.SharedMemory | None = shared_memory.SharedMemory( - name=shm_name.lstrip("/"), create=False - ) - self._mmap_obj = self._shm.buf + self._shm: shared_memory.SharedMemory | None = None + self._shm_buffer: memoryview | None = None + self._shm = shared_memory.SharedMemory(name=shm_name.lstrip("/"), create=False) + self._shm_buffer = self._shm.buf def _make_tensor_view( self, @@ -53,8 +53,10 @@ def _make_tensor_view( if itemsize <= 0: raise ValueError(f"Invalid dtype size for {dtype_str}") count = length // itemsize + if self._shm_buffer is None: + raise RuntimeError("Shared memory buffer is not available") tensor_1d = torch.frombuffer( - self._mmap_obj, dtype=dtype, count=count, offset=offset + self._shm_buffer, dtype=dtype, count=count, offset=offset ) return tensor_1d.view(torch.Size(shape)) @@ -146,3 +148,4 @@ def close(self) -> None: self._shm.close() finally: self._shm = None + self._shm_buffer = None From e2f3717220d914135f4b905f3082ecb9e74d80c6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 21 May 2026 12:30:22 +0000 Subject: [PATCH 4/6] Address SharedMemory review feedback Agent-Logs-Url: https://github.com/hlin99/LMCache/sessions/5885bfad-28e5-4d4e-8ef5-c6fe1d4b89ae Co-authored-by: hlin99 <73271530+hlin99@users.noreply.github.com> --- lmcache/v1/distributed/memory_manager.py | 2 ++ lmcache/v1/multiprocess/non_gpu_context_shm.py | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/lmcache/v1/distributed/memory_manager.py b/lmcache/v1/distributed/memory_manager.py index 5f19474de78..ca28fb1ab2f 100644 --- a/lmcache/v1/distributed/memory_manager.py +++ b/lmcache/v1/distributed/memory_manager.py @@ -77,6 +77,8 @@ def create_memory_allocator(config: L1MemoryManagerConfig) -> MemoryAllocatorInt if not bare.startswith("lmcache_l1_pool_"): shm_name = f"lmcache_l1_pool_{bare}" try: + # /dev/shm capacity is only meaningful on Linux, where POSIX shm + # is backed by a tmpfs mount with a bounded free-space view. if sys.platform.startswith("linux"): free_bytes = shutil.disk_usage("/dev/shm").free if free_bytes < config.size_in_bytes: diff --git a/lmcache/v1/multiprocess/non_gpu_context_shm.py b/lmcache/v1/multiprocess/non_gpu_context_shm.py index 3c6d6ba1bed..3096383632c 100644 --- a/lmcache/v1/multiprocess/non_gpu_context_shm.py +++ b/lmcache/v1/multiprocess/non_gpu_context_shm.py @@ -35,8 +35,15 @@ def __init__( self._pool_size = pool_size self._shm: shared_memory.SharedMemory | None = None self._shm_buffer: memoryview | None = None - self._shm = shared_memory.SharedMemory(name=shm_name.lstrip("/"), create=False) - self._shm_buffer = self._shm.buf + try: + self._shm = shared_memory.SharedMemory( + name=shm_name.lstrip("/"), create=False + ) + self._shm_buffer = self._shm.buf + except Exception: + self._shm = None + self._shm_buffer = None + raise def _make_tensor_view( self, @@ -54,7 +61,9 @@ def _make_tensor_view( raise ValueError(f"Invalid dtype size for {dtype_str}") count = length // itemsize if self._shm_buffer is None: - raise RuntimeError("Shared memory buffer is not available") + raise RuntimeError( + f"Shared memory buffer not initialized for shm_name={self._shm_name}" + ) tensor_1d = torch.frombuffer( self._shm_buffer, dtype=dtype, count=count, offset=offset ) From 2bdb6aeabda3ee39111da09c312865d58fa5c845 Mon Sep 17 00:00:00 2001 From: Tony Lin Date: Thu, 21 May 2026 20:42:53 +0800 Subject: [PATCH 5/6] Add fallback to pickle transport when SHM context initialization fails --- lmcache/v1/multiprocess/non_gpu_context.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/lmcache/v1/multiprocess/non_gpu_context.py b/lmcache/v1/multiprocess/non_gpu_context.py index 4017c1700e1..9fac3ea3ad9 100644 --- a/lmcache/v1/multiprocess/non_gpu_context.py +++ b/lmcache/v1/multiprocess/non_gpu_context.py @@ -120,6 +120,8 @@ def create_non_gpu_context( Returns SHM-based implementation when shared-memory pool information is available; otherwise falls back to the pickle-based implementation. + If SHM initialization fails for any reason (e.g. segment not found, + permission error), gracefully falls back to pickle transport. Args: metadata: Layout metadata for the non-GPU context. @@ -135,12 +137,20 @@ def create_non_gpu_context( # Local from .non_gpu_context_shm import NonGpuContextShm - logger.info( - "Creating NonGpuContextShm (shm_name=%s, pool_size=%d)", - shm_name, - pool_size, - ) - return NonGpuContextShm(metadata, mq_client, mq_timeout, shm_name, pool_size) + try: + logger.info( + "Creating NonGpuContextShm (shm_name=%s, pool_size=%d)", + shm_name, + pool_size, + ) + return NonGpuContextShm(metadata, mq_client, mq_timeout, shm_name, pool_size) + except Exception: + logger.warning( + "Failed to initialize SHM context (shm_name=%s), " + "falling back to pickle transport", + shm_name, + exc_info=True, + ) # Local from .non_gpu_context_pickle import NonGpuContextPickle From d15ff6a580994f85c597d7f44183fd7ccc397337 Mon Sep 17 00:00:00 2001 From: Tony Lin Date: Thu, 21 May 2026 20:55:16 +0800 Subject: [PATCH 6/6] fix: unregister SHM from resource tracker to prevent premature unlink Workers do not own the SHM segment (server does), so we must prevent Python's resource_tracker from unlinking it when the worker exits. Without this, the second worker startup fails with FileNotFoundError. --- lmcache/v1/multiprocess/non_gpu_context_shm.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lmcache/v1/multiprocess/non_gpu_context_shm.py b/lmcache/v1/multiprocess/non_gpu_context_shm.py index 3096383632c..b52c62956e9 100644 --- a/lmcache/v1/multiprocess/non_gpu_context_shm.py +++ b/lmcache/v1/multiprocess/non_gpu_context_shm.py @@ -3,6 +3,7 @@ # Standard from multiprocessing import shared_memory +from multiprocessing.resource_tracker import unregister from typing import Any # Third Party @@ -39,6 +40,10 @@ def __init__( self._shm = shared_memory.SharedMemory( name=shm_name.lstrip("/"), create=False ) + # The SHM segment is owned by the server process. Unregister it + # from this worker's resource tracker so that Python does not + # unlink the segment when this worker exits. + unregister(f"/{self._shm.name}", "shared_memory") self._shm_buffer = self._shm.buf except Exception: self._shm = None