From 24dc15299af2b25091c75f81e094c3fdaeb8ec6d Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 3 May 2026 16:31:01 +0000 Subject: [PATCH] [Bridge surgery] flock + bridge package + readiness doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five MoE roles converged on bridge-readiness work directed by Joseph: - Substrate Engineer: stdlib fcntl cross-process lockfile in Moneta.__init__ (POSIX-only, opt-in via snapshot_path; lockfile at snapshot_path.with_suffix('.lock')). Released in close() before _ACTIVE_URIS discard. Hardens the in-process registry against multi-process corruption flagged in the audit. - USD Engineer: codeless CozyRoom + CozyMemory schema at bridge/schema/ for Comfy-Cozy session emissions. UUID-based prim paths (substrate convention #1). embeddingHint contract documented (empty = bridge embeds; non-empty length must match embedder dim). - Bridge Engineer: bridge/ package skeleton with Embedder Protocol + SentenceTransformersEmbedder default (all-MiniLM-L6-v2, 384-dim, normalize_embeddings=True). Lazy sentence-transformers import so the module is importable without the dep. - Documentarian: promoted bridge readiness assessment from /root/.claude/plans/ into docs/bridge-readiness.md. Flagged CLAUDE.md drift (says v1.0.0; pyproject says v1.2.0rc1) for a follow-up pass. - Test Engineer: 5 adversarial flock tests (in-process collision, cross-process block, release-on-close, ephemeral no-flock, lockfile-path), all POSIX-skipped on win32. 6 embedder tests (one verifies lazy-import without sentence-transformers; five use importorskip for the live model). Verification: 75 unit + 22 integration green (97 total, 4 skipped on pxr unavailability — unchanged). smoke_check passes. Ruff delta on api.py: 17 -> 18 errors (one Optional[Any] matching codebase convention; B904 fix pre-empted a +2 delta). Net new: bridge/ package, docs/bridge-readiness.md, tests/unit/test_api_flock.py, +63 lines src/moneta/api.py. https://claude.ai/code/session_01WhWjg9f9D6X76tDiZCvj1p --- .gitignore | 3 + bridge/README.md | 50 ++++++++ bridge/moneta_bridge/__init__.py | 10 ++ bridge/moneta_bridge/embedder.py | 78 ++++++++++++ bridge/pyproject.toml | 22 ++++ bridge/schema/CozySchema.usda | 109 ++++++++++++++++ bridge/schema/plugInfo.json | 37 ++++++ bridge/tests/__init__.py | 0 bridge/tests/test_embedder.py | 91 +++++++++++++ docs/bridge-readiness.md | 202 +++++++++++++++++++++++++++++ src/moneta/api.py | 64 +++++++++- tests/unit/test_api_flock.py | 211 +++++++++++++++++++++++++++++++ 12 files changed, 876 insertions(+), 1 deletion(-) create mode 100644 bridge/README.md create mode 100644 bridge/moneta_bridge/__init__.py create mode 100644 bridge/moneta_bridge/embedder.py create mode 100644 bridge/pyproject.toml create mode 100644 bridge/schema/CozySchema.usda create mode 100644 bridge/schema/plugInfo.json create mode 100644 bridge/tests/__init__.py create mode 100644 bridge/tests/test_embedder.py create mode 100644 docs/bridge-readiness.md create mode 100644 tests/unit/test_api_flock.py diff --git a/.gitignore b/.gitignore index 3f4014d..4e4f12e 100644 --- a/.gitignore +++ b/.gitignore @@ -53,6 +53,9 @@ desktop.ini !schema/*.usdc !schema/**/*.usda !schema/**/*.usdc +# Bridge package schema is also source (codeless USD schema for Comfy-Cozy). +!bridge/schema/**/*.usda +!bridge/schema/**/*.usdc snapshots/ data/ cache/ diff --git a/bridge/README.md b/bridge/README.md new file mode 100644 index 0000000..e1f42d8 --- /dev/null +++ b/bridge/README.md @@ -0,0 +1,50 @@ +# moneta-bridge + +External adapter package wiring Comfy-Cozy session emissions into the +Moneta substrate. Comfy-Cozy is "frozen as law" — zero edits to that +repo. All wiring lives here. + +**Status:** v0.1.0a0 — codeless USD schema and embedder landed. +`ingest.py` and `egress.py` not yet implemented. + +## Layout + +- `moneta_bridge/embedder.py` — `Embedder` Protocol + default + `SentenceTransformersEmbedder`. +- `schema/CozySchema.usda` — codeless USD schema for Comfy-Cozy + emissions (`CozyRoom`, `CozyMemory`). +- `schema/plugInfo.json` — schema registration. +- `tests/` — unit tests. + +See [`../docs/bridge-readiness.md`](../docs/bridge-readiness.md) for +the readiness assessment that scoped this package. + +## Embedder + +Default: `sentence-transformers/all-MiniLM-L6-v2`, 384-dim, +`normalize_embeddings=True`. Chosen because it is offline (no API +key, no PII egress), deterministic given a fixed model version, +small (~80MB RAM, 384-dim cheap for cosine), and the de facto +standard in the agent-memory ecosystem. The embedder is exposed +behind a `Protocol` so swapping is one line. + +```python +from moneta_bridge import SentenceTransformersEmbedder + +embedder = SentenceTransformersEmbedder() +vec = embedder.embed("the user prefers concise answers") +assert len(vec) == 384 +``` + +## Usage with Moneta + +```python +import moneta +from moneta_bridge import SentenceTransformersEmbedder + +embedder = SentenceTransformersEmbedder() +with moneta.Moneta(moneta.MonetaConfig.ephemeral()) as m: + text = "remember this fact" + eid = m.deposit(text, embedder.embed(text)) + hits = m.query(embedder.embed("what do I know about facts?")) +``` diff --git a/bridge/moneta_bridge/__init__.py b/bridge/moneta_bridge/__init__.py new file mode 100644 index 0000000..22fa8be --- /dev/null +++ b/bridge/moneta_bridge/__init__.py @@ -0,0 +1,10 @@ +"""moneta-bridge — Comfy-Cozy <-> Moneta adapter package. + +Re-exports the embedder Protocol and the default +SentenceTransformersEmbedder. The default embedder lazily imports +sentence-transformers at construction, so this module is importable +without sentence-transformers installed. +""" +from .embedder import Embedder, SentenceTransformersEmbedder + +__all__ = ["Embedder", "SentenceTransformersEmbedder"] diff --git a/bridge/moneta_bridge/embedder.py b/bridge/moneta_bridge/embedder.py new file mode 100644 index 0000000..484ae3d --- /dev/null +++ b/bridge/moneta_bridge/embedder.py @@ -0,0 +1,78 @@ +"""Embedder Protocol + default SentenceTransformersEmbedder. + +The bridge does not assume Comfy-Cozy provides embeddings; the +default path is "bridge embeds the payload itself" using a local +sentence-transformers model. + +Choice rationale (Architect-on-deck decision, locked): +- Offline: no API key, no network, no PII egress to a third party. +- Deterministic given a fixed model version + seed. +- 384-dim is cheap for cosine similarity in Moneta's vector index. +- The de facto standard in the agent-memory ecosystem + (mem0, llama-index, langchain). + +The Embedder Protocol allows callers to swap in any embedder without +patching bridge code — useful for offline/air-gapped deployments +that ship a different model, or for callers who want OpenAI / BGE +embeddings. + +The sentence-transformers import is deferred to construction time so +this module is importable on machines that do not have +sentence-transformers installed (e.g., CI workers running pure-Python +tests against the Protocol shape). +""" +from __future__ import annotations + +from typing import List, Protocol, runtime_checkable + + +@runtime_checkable +class Embedder(Protocol): + """Contract for any embedder usable by the bridge. + + Implementations must: + - Expose ``dim`` as the integer embedding dimensionality. + - Implement ``embed(text)`` returning a ``List[float]`` of + length ``dim``. + - Implement ``embed_batch(texts)`` returning a list of + vectors, each ``List[float]`` of length ``dim``. + + Embeddings should be L2-normalized so cosine similarity reduces + to dot product (matches Moneta's vector_index expectations). + """ + + @property + def dim(self) -> int: ... + + def embed(self, text: str) -> List[float]: ... + + def embed_batch(self, texts: List[str]) -> List[List[float]]: ... + + +class SentenceTransformersEmbedder: + """Default embedder: sentence-transformers/all-MiniLM-L6-v2. + + 384-dim, L2-normalized. Lazy-imports sentence-transformers on + construction so the module is importable without the dep. + """ + + def __init__( + self, + model_name: str = "sentence-transformers/all-MiniLM-L6-v2", + ) -> None: + from sentence_transformers import SentenceTransformer + + self._model = SentenceTransformer(model_name) + self._dim = int(self._model.get_sentence_embedding_dimension()) + + @property + def dim(self) -> int: + return self._dim + + def embed(self, text: str) -> List[float]: + vec = self._model.encode(text, normalize_embeddings=True) + return [float(x) for x in vec.tolist()] + + def embed_batch(self, texts: List[str]) -> List[List[float]]: + mat = self._model.encode(texts, normalize_embeddings=True) + return [[float(x) for x in row] for row in mat.tolist()] diff --git a/bridge/pyproject.toml b/bridge/pyproject.toml new file mode 100644 index 0000000..3fec808 --- /dev/null +++ b/bridge/pyproject.toml @@ -0,0 +1,22 @@ +[project] +name = "moneta-bridge" +version = "0.1.0a0" +description = "Comfy-Cozy <-> Moneta bridge: codeless USD schema, embedder, ingest/egress (in development)." +requires-python = ">=3.11" +dependencies = [ + "sentence-transformers>=2.2.0", +] + +[project.optional-dependencies] +dev = ["pytest>=8.0", "ruff"] + +[build-system] +requires = ["setuptools>=68"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +where = ["."] +include = ["moneta_bridge*"] + +[tool.pytest.ini_options] +pythonpath = ["."] diff --git a/bridge/schema/CozySchema.usda b/bridge/schema/CozySchema.usda new file mode 100644 index 0000000..0d6e072 --- /dev/null +++ b/bridge/schema/CozySchema.usda @@ -0,0 +1,109 @@ +#usda 1.0 +( + subLayers = [ + @usd/schema.usda@ + ] +) + +over "GLOBAL" ( + customData = { + string libraryName = "moneta_bridge" + string libraryPath = "./" + bool skipCodeGeneration = true + } +) +{ +} + +class CozyRoom "CozyRoom" ( + inherits = + customData = { + string className = "CozyRoom" + string schemaKind = "concreteTyped" + } + doc = """A Comfy-Cozy session/space container. + +Authored by Comfy-Cozy when it flushes a session as USD; consumed by +the Moneta bridge ingester. The prim path is /Cozy/Room_ where + is a uuid4 hex string (32 chars, no dashes). Substrate +convention #1 (docs/substrate-conventions.md): never construct prim +names from natural language — content lives in string attributes.""" +) +{ + token roomId ( + doc = "UUID hex (32 chars) identifying this room." + ) + + double createdAt ( + doc = "Unix seconds when the room first opened." + ) + + double closedAt ( + doc = "Unix seconds when this stage was flushed." + ) + + token kind ( + allowedTokens = ["conversation", "task", "session", "thread"] + doc = "Coarse classification of the room's purpose." + ) + + string topic ( + doc = "Optional human-facing summary; may be empty." + ) +} + +class CozyMemory "CozyMemory" ( + inherits = + customData = { + string className = "CozyMemory" + string schemaKind = "concreteTyped" + } + doc = """A single memory minted inside a CozyRoom. + +The prim path is /Cozy/Room_/Memory_, both +UUID hex (substrate convention #1, never NL in prim names). The +bridge ingester reads each CozyMemory and calls +moneta.deposit(payload, embedding, protected_floor); attentionWeight +is forwarded as a deposit-time attention seed. + +embeddingHint contract: empty array means "bridge will embed the +payload using its configured embedder." When non-empty, length MUST +equal the bridge embedder's dim (currently 384 for +sentence-transformers/all-MiniLM-L6-v2). The bridge does NOT pad or +truncate — a length mismatch is a contract violation and the bridge +raises.""" +) +{ + token memoryId ( + doc = "UUID hex (32 chars) identifying this memory." + ) + + string payload ( + doc = "Text content; this is what the bridge embeds and deposits." + ) + + double createdAt ( + doc = "Unix seconds when the memory was minted in Comfy-Cozy." + ) + + float protectedFloor ( + doc = "Pinning hint forwarded to moneta.deposit; 0.0 = unprotected." + ) + + float[] embeddingHint ( + doc = "Optional pre-computed embedding. Empty array = bridge embeds. Non-empty length must match bridge embedder dim (384 for default MiniLM-L6-v2)." + ) + + float attentionWeight ( + doc = "Initial attention seed forwarded to signal_attention; 0.0 = no seed." + ) + + token kind ( + allowedTokens = ["event", "fact", "preference", "task", "observation", "decision"] + doc = "Coarse classification of what this memory captures." + ) + + string[] sourceRefs ( + doc = "Opaque pointers back into Comfy-Cozy. Bridge round-trips them; does not interpret." + ) +} diff --git a/bridge/schema/plugInfo.json b/bridge/schema/plugInfo.json new file mode 100644 index 0000000..ab3816d --- /dev/null +++ b/bridge/schema/plugInfo.json @@ -0,0 +1,37 @@ +{ + "Plugins": [ + { + "Info": { + "Types": { + "MonetaBridgeCozyRoom": { + "alias": { + "UsdSchemaBase": "CozyRoom" + }, + "autoGenerated": true, + "bases": [ + "UsdTyped" + ], + "schemaIdentifier": "CozyRoom", + "schemaKind": "concreteTyped" + }, + "MonetaBridgeCozyMemory": { + "alias": { + "UsdSchemaBase": "CozyMemory" + }, + "autoGenerated": true, + "bases": [ + "UsdTyped" + ], + "schemaIdentifier": "CozyMemory", + "schemaKind": "concreteTyped" + } + } + }, + "LibraryPath": "", + "Name": "moneta_bridge", + "ResourcePath": ".", + "Root": ".", + "Type": "resource" + } + ] +} diff --git a/bridge/tests/__init__.py b/bridge/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bridge/tests/test_embedder.py b/bridge/tests/test_embedder.py new file mode 100644 index 0000000..b204a32 --- /dev/null +++ b/bridge/tests/test_embedder.py @@ -0,0 +1,91 @@ +"""Adversarial tests for the bridge embedder contract. + +Test Engineer (Commandment #7): structurally separate from the +Bridge Engineer who wrote the impl. These tests target the locked +contract, not the impl details. +""" +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +# Ensure `bridge/` is on the path regardless of how pytest is +# invoked (from repo root or from bridge/). +_BRIDGE_ROOT = Path(__file__).resolve().parents[1] +if str(_BRIDGE_ROOT) not in sys.path: + sys.path.insert(0, str(_BRIDGE_ROOT)) + + +def test_embedder_module_imports_without_sentence_transformers() -> None: + """The bridge module must be importable without sentence-transformers + installed. Lazy-import discipline is part of the contract.""" + from moneta_bridge import Embedder, SentenceTransformersEmbedder + + assert Embedder is not None + assert SentenceTransformersEmbedder is not None + + +def test_default_embedder_constructs_and_reports_384_dim() -> None: + pytest.importorskip("sentence_transformers") + from moneta_bridge import SentenceTransformersEmbedder + + e = SentenceTransformersEmbedder() + assert e.dim == 384 + + +def test_embed_returns_list_of_float_length_384() -> None: + pytest.importorskip("sentence_transformers") + from moneta_bridge import SentenceTransformersEmbedder + + e = SentenceTransformersEmbedder() + result = e.embed("hello world") + + assert isinstance(result, list) + assert len(result) == 384 + for x in result: + assert isinstance(x, float) + + +def test_embed_batch_returns_correct_shape() -> None: + pytest.importorskip("sentence_transformers") + from moneta_bridge import SentenceTransformersEmbedder + + e = SentenceTransformersEmbedder() + result = e.embed_batch(["a", "b", "c"]) + + assert isinstance(result, list) + assert len(result) == 3 + for row in result: + assert isinstance(row, list) + assert len(row) == 384 + for x in row: + assert isinstance(x, float) + + +def test_embeddings_are_l2_normalized() -> None: + pytest.importorskip("sentence_transformers") + from moneta_bridge import SentenceTransformersEmbedder + + e = SentenceTransformersEmbedder() + vec = e.embed("normalization test") + norm_sq = sum(x * x for x in vec) + assert abs(norm_sq - 1.0) < 1e-3, ( + f"expected L2-normalized embedding (norm^2 ~= 1.0), " + f"got norm^2 = {norm_sq}" + ) + + +def test_protocol_runtime_checkable_or_structural_match() -> None: + pytest.importorskip("sentence_transformers") + from moneta_bridge import Embedder, SentenceTransformersEmbedder + + e = SentenceTransformersEmbedder() + try: + assert isinstance(e, Embedder) + except TypeError: + # Non-runtime_checkable Protocol — fall back to structural check. + assert hasattr(e, "dim") + assert hasattr(e, "embed") + assert hasattr(e, "embed_batch") diff --git a/docs/bridge-readiness.md b/docs/bridge-readiness.md new file mode 100644 index 0000000..ad97b2d --- /dev/null +++ b/docs/bridge-readiness.md @@ -0,0 +1,202 @@ +# Bridge readiness assessment (Moneta ↔ Comfy-Cozy) + +**Status:** Authored 2026-05-03. Reflects Moneta v1.2.0rc1 (`pyproject.toml:version`, commit 9a46293). Doc artifact promoted from `/root/.claude/plans/`. + +## Context + +Joseph is scoping an external bridge to wire Moneta into Comfy-Cozy. +Comfy-Cozy is "frozen as law" — no edits to that repo. All wiring lives +Moneta-side or in a separate bridge package. Before scoping bridge +work, this document confirms what Moneta v1.x exposes today, and where +the gaps are between "current Moneta" and "Moneta consumable by a +Comfy-Cozy bridge daemon." Read-only investigation; nothing was +modified. Below is the 6-part assessment requested. + +--- + +## 1. PUBLIC API SURFACE + +The handle is `moneta.Moneta`, constructed with a single +`MonetaConfig` (frozen dataclass). Both are re-exported at package +root from `src/moneta/__init__.py:9-15`. Construction without args is +intentionally a `TypeError` (`src/moneta/api.py:190-193`); the +canonical pattern is `with Moneta(MonetaConfig.ephemeral()) as m:` +(`README.md:18-26`, `src/moneta/api.py:480-524` smoke_check). The four +agent-facing ops, signatures verbatim from +`src/moneta/api.py:328-446`: + +```python +def deposit(self, payload: str, embedding: List[float], + protected_floor: float = 0.0) -> UUID # 328-377 +def query(self, embedding: List[float], + limit: int = 5) -> List[Memory] # 379-417 +def signal_attention(self, weights: Dict[UUID, float]) -> None # 419-436 +def get_consolidation_manifest(self) -> List[Memory] # 438-446 +``` + +The sleep-pass / consolidation trigger is harness-level (not part of +the four-op spec, per ARCHITECTURE.md §2.1) and exposed as +`Moneta.run_sleep_pass(self) -> ConsolidationResult` +(`src/moneta/api.py:452-472`), delegating to +`ConsolidationRunner.run_pass(...)` +(`src/moneta/consolidation.py:124-132`). + +## 2. EMBEDDING ASSUMPTION + +Caller-provided. `deposit(payload, embedding, ...)` takes a +pre-computed `List[float]`; there is no internal embedder anywhere in +`src/moneta/`. Dimensionality is configurable via +`MonetaConfig.embedding_dim` (`src/moneta/api.py:130`, +`src/moneta/vector_index.py:74`) but optional: if unset, the first +`deposit` infers `_dim` from `len(vector)` +(`src/moneta/vector_index.py:109`); subsequent deposits with a +different length raise `ValueError` +(`src/moneta/vector_index.py:110-113`). The contract is documented in +the deposit docstring and README, but there is no canonical "use +embedder X" guidance — the bridge owns embedding choice. + +## 3. USD INGESTION PATH + +**None.** There is zero code path today that consumes an +externally-authored `.usda` file. No `from_usda`, `import_stage`, +`ingest`, or `load` helper exists in `src/moneta/api.py`, +`src/moneta/usd_target.py`, or `src/moneta/consolidation.py`. The only +`Sdf.Layer.FindOrOpen` / `Usd.Stage.Open` calls are at +`src/moneta/usd_target.py:222,224,246`, and they construct/recover +**Moneta's own** root and sublayers — they never read caller-supplied +stages. ARCHITECTURE.md §8 acknowledges a future `UsdLink` field for +hydrated entities but defers it (CLAUDE.md "non-goals" still lists +"Cross-session USD hydration at cold start"). A consumer handing +Moneta a `.usda` today must parse it themselves and call `deposit()` +per entity. + +## 4. USD HYDRATION PATH + +Partial, and not via `query`. Moneta authors `.usda` files only as a +side effect of `run_sleep_pass()` → `SequentialWriter.commit_staging` +→ `UsdTarget.author_stage_batch` + `flush` +(`src/moneta/usd_target.py:290-367`). Output is a fixed sublayer +structure rooted at `{log_path}/cortex_root.usda` with +`cortex_protected.usda` at strongest position and rolling +`cortex_YYYY_MM_DD[_NNN].usda` sublayers +(`src/moneta/usd_target.py:218-281`). Each memory is a `MonetaMemory` +prim at `/Memory_{hex}` carrying `payload`, `utility`, +`attendedCount`, `protectedFloor`, `lastEvaluated`, `priorState` +(`schema/MonetaSchema.usda:18-57`, +`src/moneta/usd_target.py:79-84,164-166,319`). `query()` itself +returns `List[Memory]` (Python objects), not a stage and not a +`.usda` file (`src/moneta/api.py:379-417`); there is no +`to_usda` / `serialize_to_usd` / `export_query` helper. Closest +existing capability for an external USD consumer: open +`cortex_root.usda` directly and traverse — the schema is documented +at `docs/substrate-conventions.md:23-99` and +`ARCHITECTURE.md:257-280`, but there is no consumer-facing +"query → ad-hoc usda" path. + +## 5. CONCURRENCY / LIFECYCLE + +Designed for long-running handles, **single-process only**. A handle +holds ECS, attention log, vector index, durability manager, +sequential writer, and authoring target for its full lifetime +(`src/moneta/api.py:195-287`); `close()` is idempotent and ordered +(`api.py:300-320`). Daemon restart is supported when +`MonetaConfig.snapshot_path` and `wal_path` are set — `__init__` +calls `DurabilityManager.hydrate()` to load snapshot + replay WAL +entries newer than the snapshot timestamp +(`src/moneta/api.py:220-247`, +`src/moneta/durability.py:181-225`). **Critical constraint:** the +exclusivity guard is an in-process Python set, +`_ACTIVE_URIS: set[str]` (`src/moneta/api.py:169,198-204`), with no +`fcntl`/`flock`/lockfile — a second OS process pointed at the same +`storage_uri` is **not** prevented and will silently corrupt state. +WAL writes are guarded by an internal `threading.Lock` +(`src/moneta/durability.py:75,147-149`); the attention log is +GIL-atomic lock-free +(`src/moneta/attention_log.py:63,72`); the USD writer takes a narrow +lock scoped to `Sdf.ChangeBlock` only +(`src/moneta/usd_target.py:314`, ARCHITECTURE.md §15.6). Sleep-pass +must be single-reducer (`src/moneta/attention_log.py:27-32`). For the +bridge: one daemon process holding one `Moneta` handle is fine; two +processes touching the same store is not. + +## 6. WHAT'S MISSING + +For a Comfy-Cozy bridge that (a) watches `.usda` session flushes, +(b) feeds them into Moneta, and (c) hands query results back as USD, +three concrete gaps exist: + +1. **No USD ingestion at all** (§3). The bridge must own .usda + parsing — open the stage, walk prims, extract whatever + Comfy-Cozy's prim schema is, generate `(payload, embedding)` + pairs, and call `deposit()` per entity. Moneta provides nothing + here. If the bridge wants a "feed me a .usda path" entry point, + it has to be built (likely bridge-side, since it's + Comfy-Cozy-schema-specific, not Moneta-generic). + +2. **No `query → .usda` egress** (§4). The bridge can read + `cortex_root.usda` directly (schema is stable and documented), + or it can call `query()` and synthesize a `.usda` from the + returned `List[Memory]`. Moneta does not export query results + as a stage. If Comfy-Cozy expects "give me a .usda of relevant + memories," the bridge must serialize. + +3. **No inter-process lock** (§5). `_ACTIVE_URIS` is in-memory. + If the bridge architecture might ever have two processes (a + watcher + a query server, say) pointed at the same store, the + bridge needs to enforce single-process discipline itself + (lockfile, supervisor, OS-level mutex), or this becomes a + §9-Trigger-2 surprise the first time it ships. + +Embedding is **not** a gap — it's an explicit caller responsibility, +documented; the bridge picks an embedder. Daemon lifecycle is **not** +a gap — `DurabilityManager.hydrate()` covers restart. + +Net: Moneta's four-op surface is consumable as-is from a single +daemon process. The bridge work is real but bounded: a USD-side +adapter (parse Comfy-Cozy `.usda` → deposits; serialize query +results → `.usda`) plus a process-level lock guard. Nothing inside +Moneta needs to change for a v1 bridge unless the bridge wants USD +ingestion to live Moneta-side, which would re-open Phase 1 +non-goals and likely warrant a §9 escalation. + +--- + +## Verification + +To validate the claims above end-to-end: + +```bash +# Confirm public API + four-op flow +python -c "import moneta; moneta.smoke_check(); print('OK')" + +# Confirm restart story +pytest tests/integration/test_durability_roundtrip.py -v + +# Confirm USD authoring shape (requires pxr-capable interpreter) +pytest tests/integration/test_real_usd_end_to_end.py -v + +# Inspect what Moneta writes to disk: +python -c " +import moneta, tempfile, pathlib +with tempfile.TemporaryDirectory() as d: + cfg = moneta.MonetaConfig.ephemeral() # mock target + # for real USD: pass use_real_usd=True, usd_target_path=pathlib.Path(d) + ... +" +# Then ls {log_path}/cortex_*.usda to see the sublayer structure +# described in §4. +``` + +No file in `src/moneta/` was modified during this investigation. + +--- + +## Documentation drift flagged + +This audit surfaced version drift between `CLAUDE.md` and the actual +codebase. Recording for the next Documentarian pass: + +- **`CLAUDE.md` declares "v1.0.0 shipped. All three phases complete."** and frames Phase 3 as recently closed. +- **`pyproject.toml` declares `version = "1.2.0rc1"`,** with a "codeless schema" surgery between v1.0.0 and current (commits `9a46293`, `8eb0956`, `41d5385`, `f7b6253`). +- The Documentarian contract per `CLAUDE.md` ("do not let documentation lag implementation by more than one PR") is currently at risk. +- **Recommendation:** a follow-up Documentarian pass should update `CLAUDE.md` to reflect v1.2.0rc1 reality — including the codeless-schema surgery, the current schema layout at `schema/MonetaSchema.usda`, and any Phase-3-onwards changes that landed since v1.0.0. **Out of scope for this assessment** — flagging only. diff --git a/src/moneta/api.py b/src/moneta/api.py index 5032c22..9ab591c 100644 --- a/src/moneta/api.py +++ b/src/moneta/api.py @@ -32,6 +32,7 @@ from __future__ import annotations import logging +import sys import time import uuid as _uuid from dataclasses import dataclass @@ -206,6 +207,39 @@ def __init__(self, config: MonetaConfig) -> None: try: self.config: MonetaConfig = config self._closed: bool = False + self._lock_fd: Optional[Any] = None + + # Cross-process flock — POSIX-only, opt-in via snapshot_path. + # Hardens the in-process _ACTIVE_URIS registry against + # multi-process corruption of the same on-disk store. + # See docs/bridge-readiness.md §5. + if config.snapshot_path is not None: + if sys.platform == "win32": + _logger.info( + "Moneta.flock skipped: POSIX-only; storage at %s " + "is NOT cross-process-safe on Windows", + config.snapshot_path, + ) + else: + import fcntl as _fcntl + + lock_path = config.snapshot_path.with_suffix(".lock") + lock_path.parent.mkdir(parents=True, exist_ok=True) + fd = open(lock_path, "a") + try: + _fcntl.flock( + fd.fileno(), + _fcntl.LOCK_EX | _fcntl.LOCK_NB, + ) + except BlockingIOError: + fd.close() + raise MonetaResourceLockedError( + f"snapshot_path {config.snapshot_path!r} is " + f"held by another process (cross-process " + f"flock on {lock_path}); release the holding " + f"process before reconstructing" + ) from None + self._lock_fd = fd self.decay: DecayConfig = DecayConfig( half_life_seconds=config.half_life_seconds @@ -271,7 +305,20 @@ def __init__(self, config: MonetaConfig) -> None: max_entities=config.max_entities ) except BaseException: - # Partial init — release the lock so the consumer can retry. + # Partial init — release any acquired flock and the URI lock + # so the consumer can retry. + if getattr(self, "_lock_fd", None) is not None: + try: + if sys.platform != "win32": + import fcntl as _fcntl + + _fcntl.flock( + self._lock_fd.fileno(), _fcntl.LOCK_UN + ) + self._lock_fd.close() + except Exception: + pass + self._lock_fd = None _ACTIVE_URIS.discard(config.storage_uri) raise @@ -314,6 +361,21 @@ def close(self) -> None: if hasattr(self.authoring_target, "close"): self.authoring_target.close() finally: + # Release cross-process flock before discarding the URI; a + # re-construct on the same URI would otherwise race the + # holding fd's lifetime. + if self._lock_fd is not None: + try: + if sys.platform != "win32": + import fcntl as _fcntl + + _fcntl.flock( + self._lock_fd.fileno(), _fcntl.LOCK_UN + ) + self._lock_fd.close() + except Exception: + pass + self._lock_fd = None _ACTIVE_URIS.discard(self.config.storage_uri) _logger.info( "Moneta.close uri=%s", self.config.storage_uri diff --git a/tests/unit/test_api_flock.py b/tests/unit/test_api_flock.py new file mode 100644 index 0000000..adce0b9 --- /dev/null +++ b/tests/unit/test_api_flock.py @@ -0,0 +1,211 @@ +"""Adversarial tests for cross-process flock in Moneta.__init__. + +Test Engineer (Commandment #7): structurally separate from the +Substrate Engineer. Tests target the locked flock contract. + +Contract: +- flock acquired ONLY when config.snapshot_path is not None. +- Lockfile path: config.snapshot_path.with_suffix('.lock'). +- Failure: MonetaResourceLockedError with substring "cross-process". +- POSIX-only; Windows is no-op. +- Released on close(). +""" +from __future__ import annotations + +import os +import subprocess +import sys +import textwrap +from pathlib import Path + +import pytest + +from moneta import Moneta, MonetaConfig, MonetaResourceLockedError + +_REPO_ROOT = Path(__file__).resolve().parents[2] +_SRC_PATH = _REPO_ROOT / "src" + + +def _spawn_construct( + snapshot_path: Path, + wal_path: Path, + storage_uri: str, + timeout: float = 30.0, +) -> subprocess.CompletedProcess: + """Spawn a subprocess that tries to construct Moneta on the given paths. + + Returns CompletedProcess with stdout containing 'LOCKED:' if + MonetaResourceLockedError was raised, 'OK' if construction + succeeded, or 'OTHER:' for any other exception. + """ + code = textwrap.dedent(f""" + import sys + sys.path.insert(0, {str(_SRC_PATH)!r}) + from pathlib import Path + from moneta import Moneta, MonetaConfig, MonetaResourceLockedError + try: + m = Moneta(MonetaConfig( + storage_uri={storage_uri!r}, + snapshot_path=Path({str(snapshot_path)!r}), + wal_path=Path({str(wal_path)!r}), + )) + print("OK") + m.close() + except MonetaResourceLockedError as e: + print("LOCKED:" + str(e)) + except Exception as e: + print("OTHER:" + repr(e)) + """) + env = os.environ.copy() + env["PYTHONPATH"] = ( + str(_SRC_PATH) + os.pathsep + env.get("PYTHONPATH", "") + ) + return subprocess.run( + [sys.executable, "-c", code], + capture_output=True, + text=True, + env=env, + timeout=timeout, + ) + + +def test_durability_config_in_same_process_still_raises_in_process_error( + tmp_path: Path, +) -> None: + """Same-process collision: _ACTIVE_URIS fires BEFORE flock is even + attempted. Existing in-process behavior must be preserved.""" + snap = tmp_path / "s.json" + wal = tmp_path / "w.jsonl" + + m1 = Moneta(MonetaConfig( + storage_uri="moneta://test-same-process", + snapshot_path=snap, + wal_path=wal, + )) + try: + with pytest.raises(MonetaResourceLockedError) as excinfo: + Moneta(MonetaConfig( + storage_uri="moneta://test-same-process", + snapshot_path=snap, + wal_path=wal, + )) + # In-process variant of the error must NOT mention cross-process. + assert "cross-process" not in str(excinfo.value) + finally: + m1.close() + + +@pytest.mark.skipif(sys.platform == "win32", reason="flock is POSIX-only") +def test_cross_process_durability_lock_blocks_second_writer( + tmp_path: Path, +) -> None: + """Different process, same snapshot_path -> flock collision -> LOCKED.""" + snap = tmp_path / "s.json" + wal = tmp_path / "w.jsonl" + + parent = Moneta(MonetaConfig( + storage_uri="moneta://test-cross-process-parent", + snapshot_path=snap, + wal_path=wal, + )) + try: + # Subprocess uses a DIFFERENT storage_uri so the in-process + # registry can't collide — only the on-disk flock can. + result = _spawn_construct( + snap, + wal, + storage_uri="moneta://test-cross-process-child", + ) + assert "LOCKED" in result.stdout, ( + f"expected LOCKED, got stdout={result.stdout!r} " + f"stderr={result.stderr!r}" + ) + assert "cross-process" in result.stdout, ( + f"expected 'cross-process' substring in error message, " + f"got: {result.stdout!r}" + ) + finally: + parent.close() + + +@pytest.mark.skipif(sys.platform == "win32", reason="flock is POSIX-only") +def test_cross_process_lock_released_on_close(tmp_path: Path) -> None: + """After parent.close(), a fresh subprocess can acquire the lock.""" + snap = tmp_path / "s.json" + wal = tmp_path / "w.jsonl" + + parent = Moneta(MonetaConfig( + storage_uri="moneta://test-release-parent", + snapshot_path=snap, + wal_path=wal, + )) + parent.close() + + result = _spawn_construct( + snap, + wal, + storage_uri="moneta://test-release-child", + ) + assert "OK" in result.stdout, ( + f"expected OK after parent close, got stdout={result.stdout!r} " + f"stderr={result.stderr!r}" + ) + + +@pytest.mark.skipif(sys.platform == "win32", reason="flock is POSIX-only") +def test_ephemeral_config_does_not_acquire_flock() -> None: + """Ephemeral configs (no snapshot_path) -> no flock -> two + subprocess constructions both succeed.""" + code = textwrap.dedent(f""" + import sys + sys.path.insert(0, {str(_SRC_PATH)!r}) + from moneta import Moneta, MonetaConfig + try: + with Moneta(MonetaConfig.ephemeral()) as m: + print("OK") + except Exception as e: + print("OTHER:" + repr(e)) + """) + env = os.environ.copy() + env["PYTHONPATH"] = ( + str(_SRC_PATH) + os.pathsep + env.get("PYTHONPATH", "") + ) + + r1 = subprocess.run( + [sys.executable, "-c", code], + capture_output=True, + text=True, + env=env, + timeout=30, + ) + r2 = subprocess.run( + [sys.executable, "-c", code], + capture_output=True, + text=True, + env=env, + timeout=30, + ) + assert "OK" in r1.stdout, ( + f"r1 stdout={r1.stdout!r} stderr={r1.stderr!r}" + ) + assert "OK" in r2.stdout, ( + f"r2 stdout={r2.stdout!r} stderr={r2.stderr!r}" + ) + + +@pytest.mark.skipif(sys.platform == "win32", reason="flock is POSIX-only") +def test_lock_file_lives_at_expected_path(tmp_path: Path) -> None: + """Lockfile is created at config.snapshot_path.with_suffix('.lock').""" + snap = tmp_path / "s.json" + wal = tmp_path / "w.jsonl" + expected_lockfile = snap.with_suffix(".lock") + + with Moneta(MonetaConfig( + storage_uri="moneta://test-lockfile-path", + snapshot_path=snap, + wal_path=wal, + )): + assert expected_lockfile.exists(), ( + f"expected lockfile at {expected_lockfile}, " + f"contents of tmp_path: {list(tmp_path.iterdir())}" + )