From 75fe4e8f957467e8d0c4fb1364e72f0f19bb8994 Mon Sep 17 00:00:00 2001 From: Pushkinist <4850452+Pushkinist@users.noreply.github.com> Date: Thu, 18 Jun 2026 15:05:20 +0700 Subject: [PATCH] chore(release): 0.2.3 --- CHANGELOG.md | 33 ++++++++++++++++++++++++++++++++- Cargo.lock | 24 ++++++++++++------------ Cargo.toml | 2 +- 3 files changed, 45 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d22e64..a3911ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,36 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.2.3] - 2026-06-18 + +Multi-model registry hardening. Two `--registry` serving bugs fixed: the +multimodal encoder-output cache no longer leaks vision/audio features across +models, and eager model preload now respects `--max-loaded-models`. No breaking +changes. + +### Fixed + +- **Multimodal encoder-output cache cross-model leak.** In `--registry` + multi-model mode the vision/audio encoder cache was keyed on the + post-preprocess content hash only, so a cached image encoding produced for one + model (projected to its `hidden_size`) was returned to a different model for + the same image — a vision-feature shape mismatch (HTTP 503) when the hidden + sizes differed. The cache key now folds in a stable per-model signature, so + entries are never shared across models; same-model repeats still hit. (#132) +- **Registry eager-preload ignored `--max-loaded-models`.** `rmlx serve + --registry` preloaded every model at startup even with a smaller resident cap, + paying the full load cost for models that were immediately evicted (a + ~5-minute boot for a 13-model registry). Preload is now bounded to at most + `--max-loaded-models` entries (the alphabetically-first ids, since the + registry is id-sorted); the rest load on demand. (#133) + +### Changed + +- `README.md` refreshed to 0.2.3 with an accurate "What works" summary, and + `docs/CLI.md` documents that the multimodal cache key now includes model + identity (no cross-model sharing) and that registry preload is bounded to the + resident cap. + ## [0.2.2] - 2026-06-18 Multimodal release. Whisper transcription works end to end (decode correctness @@ -342,7 +372,8 @@ inference + conversion backend for Apple Silicon — no Python at runtime. - Speculative drafters validated against their verifiers: Qwen 3.6 MTP sidecar and the Gemma 4 assistant drafter. -[Unreleased]: https://github.com/Pushkinist/rMLX/compare/v0.2.2...HEAD +[Unreleased]: https://github.com/Pushkinist/rMLX/compare/v0.2.3...HEAD +[0.2.3]: https://github.com/Pushkinist/rMLX/releases/tag/v0.2.3 [0.2.2]: https://github.com/Pushkinist/rMLX/releases/tag/v0.2.2 [0.2.1]: https://github.com/Pushkinist/rMLX/releases/tag/v0.2.1 [0.2.0]: https://github.com/Pushkinist/rMLX/releases/tag/v0.2.0 diff --git a/Cargo.lock b/Cargo.lock index 9615257..6377e33 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1921,7 +1921,7 @@ checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "rmlx-audio" -version = "0.2.2" +version = "0.2.3" dependencies = [ "miniz_oxide", "rmlx-core", @@ -1939,7 +1939,7 @@ dependencies = [ [[package]] name = "rmlx-cli" -version = "0.2.2" +version = "0.2.3" dependencies = [ "anyhow", "chrono", @@ -1974,7 +1974,7 @@ dependencies = [ [[package]] name = "rmlx-core" -version = "0.2.2" +version = "0.2.3" dependencies = [ "chrono", "libc", @@ -1988,7 +1988,7 @@ dependencies = [ [[package]] name = "rmlx-kv-quant" -version = "0.2.2" +version = "0.2.3" dependencies = [ "rmlx-core", "rmlx-mlx", @@ -1998,7 +1998,7 @@ dependencies = [ [[package]] name = "rmlx-kv-ssd" -version = "0.2.2" +version = "0.2.3" dependencies = [ "rmlx-core", "rmlx-kv-quant", @@ -2014,7 +2014,7 @@ dependencies = [ [[package]] name = "rmlx-loader" -version = "0.2.2" +version = "0.2.3" dependencies = [ "memmap2", "rayon", @@ -2030,7 +2030,7 @@ dependencies = [ [[package]] name = "rmlx-metrics" -version = "0.2.2" +version = "0.2.3" dependencies = [ "csv", "regex-lite", @@ -2049,7 +2049,7 @@ dependencies = [ [[package]] name = "rmlx-mlx" -version = "0.2.2" +version = "0.2.3" dependencies = [ "bindgen", "rmlx-core", @@ -2061,7 +2061,7 @@ dependencies = [ [[package]] name = "rmlx-models" -version = "0.2.2" +version = "0.2.3" dependencies = [ "criterion", "image", @@ -2088,7 +2088,7 @@ dependencies = [ [[package]] name = "rmlx-quant" -version = "0.2.2" +version = "0.2.3" dependencies = [ "criterion", "rmlx-core", @@ -2099,7 +2099,7 @@ dependencies = [ [[package]] name = "rmlx-runtime" -version = "0.2.2" +version = "0.2.3" dependencies = [ "rmlx-core", "rmlx-mlx", @@ -2109,7 +2109,7 @@ dependencies = [ [[package]] name = "rmlx-server" -version = "0.2.2" +version = "0.2.3" dependencies = [ "anyhow", "axum", diff --git a/Cargo.toml b/Cargo.toml index c4651f0..4339d82 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ members = [ ] [workspace.package] -version = "0.2.2" +version = "0.2.3" edition = "2021" rust-version = "1.95" license = "MIT OR Apache-2.0"