From 238072493aab3d97a3fcdf9fe4b50392736f83fc Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 7 Jun 2026 23:02:43 +0000 Subject: [PATCH 1/2] Add keyword/substring search mode alongside semantic search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Semantic search ranks by meaning and can miss exact terms (names, IDs, URLs, rare tokens). Add an opt-in mode="keyword" to search (REST + MCP) that does a case-insensitive substring match over memory text, newest matches first. Default stays "semantic", so existing behavior is unchanged. Adapts the OB1 keyword-search idea. Reuses the same vector_store.list() payload path already validated against live Qdrant for the dedup feature — no new Qdrant capability or index required. The match runs in Python over the user's memories (scoped by user_id), bounded by a generous scan limit and fail-open (a store error returns no results). - app/memory.py: keyword_search() + _point_to_result() shaping. - app/rest.py: SearchRequest.mode ("semantic"|"keyword"); route keyword path. - app/mcp_server.py: search_memories gains a mode arg (docstring guides the model to use keyword for exact terms). - tests: keyword_search unit cases (substring/case, recency-first+limit, field shaping, no-match, fail-open) + REST and MCP integration + invalid-mode 422. - docs: USER_GUIDE search section (keyword mode + scan-limit caveat), DEVELOPER_GUIDE memory.py description. Closes #54. https://claude.ai/code/session_017835DVrvURaYnbQiPQwzue --- app/mcp_server.py | 19 +++++++---- app/memory.py | 52 ++++++++++++++++++++++++++++++ app/rest.py | 11 +++++-- docs/DEVELOPER_GUIDE.md | 5 ++- docs/USER_GUIDE.md | 17 ++++++++++ tests/test_mcp.py | 12 +++++++ tests/test_memory.py | 71 +++++++++++++++++++++++++++++++++++++++++ tests/test_rest.py | 26 +++++++++++++++ 8 files changed, 203 insertions(+), 10 deletions(-) diff --git a/app/mcp_server.py b/app/mcp_server.py index c7bdc71..9fde2ac 100644 --- a/app/mcp_server.py +++ b/app/mcp_server.py @@ -34,16 +34,23 @@ def add_memory(content: str, agent_id: str | None = None, metadata: dict | None return memory_mod.add_memory(content, **kwargs) @mcp.tool - def search_memories(query: str, limit: int = 10, recency_weight: float = 0.0) -> dict: - """Search long-term memory by semantic similarity. + def search_memories( + query: str, limit: int = 10, recency_weight: float = 0.0, mode: str = "semantic" + ) -> dict: + """Search long-term memory. Searches the single shared memory store for the user, across all agents. - recency_weight (0.0-1.0) optionally biases results toward more recently - created or updated memories. Leave it at 0 for pure semantic relevance; - raise it (e.g. 0.3) when the user asks what is *latest* or *current* and - recency matters more than an exact topical match. + mode: "semantic" (default) ranks by meaning/similarity. Use "keyword" for + a case-insensitive substring match when you need an exact term the + semantic search may miss — a name, identifier, URL, or rare token. + + recency_weight (0.0-1.0, semantic mode only) optionally biases results + toward more recently created or updated memories. Leave it at 0 for pure + relevance; raise it (e.g. 0.3) when the user asks what is *latest*. """ + if mode == "keyword": + return memory_mod.keyword_search(query, user_id=default_user, limit=limit) results = memory.search(query=query, filters={"user_id": default_user}, top_k=limit) return rerank_by_recency(results, recency_weight) diff --git a/app/memory.py b/app/memory.py index f042a86..dbbf733 100644 --- a/app/memory.py +++ b/app/memory.py @@ -121,3 +121,55 @@ def add_memory(content, *, dedup: bool = True, **kwargs) -> dict: metadata = dict(kwargs.pop("metadata", None) or {}) metadata["content_fp"] = fingerprint return memory.add(content, metadata=metadata, **kwargs) + + +# Upper bound on how many of the user's memories a keyword search scans in one +# pass. Generous for a single-user store; keyword search is a literal-match +# fallback, not the primary retrieval path. +DEFAULT_KEYWORD_SCAN_LIMIT = 5000 + + +def _point_to_result(point) -> dict: + """Shape a Qdrant point into a search-result dict (memory text + payload).""" + payload = dict(getattr(point, "payload", None) or {}) + payload.pop("text_lemmatized", None) # internal BM25 helper — noise in results + memory_text = payload.pop("data", None) + return {"id": getattr(point, "id", None), "memory": memory_text, **payload} + + +def keyword_search( + query: str, + *, + user_id: str | None = None, + limit: int = 10, + scan_limit: int = DEFAULT_KEYWORD_SCAN_LIMIT, +) -> dict: + """Case-insensitive substring search over stored memory text. + + A literal-match fallback for terms semantic search misses (names, IDs, URLs, + rare tokens). Scans up to `scan_limit` of the user's memories via the vector + store's payload listing and matches `query` as a case-insensitive substring + of each memory's text, returning the most recent matches first. Scoped by + `user_id` only (it spans the whole user store, like the MCP read tools). + Fail-open: any store error returns no results. + """ + memory = get_memory() + filters = {"user_id": user_id} if user_id else None + try: + result = memory.vector_store.list(filters=filters, top_k=scan_limit) + except Exception: + return {"results": []} + points = result[0] if isinstance(result, tuple) else result + needle = query.casefold() + matches = [ + point + for point in (points or []) + if isinstance((getattr(point, "payload", None) or {}).get("data"), str) + and needle in point.payload["data"].casefold() + ] + # ISO-8601 UTC timestamps sort chronologically as plain strings; newest first. + matches.sort( + key=lambda p: (getattr(p, "payload", None) or {}).get("created_at", ""), + reverse=True, + ) + return {"results": [_point_to_result(p) for p in matches[:limit]]} diff --git a/app/rest.py b/app/rest.py index c7bd778..8ff8402 100644 --- a/app/rest.py +++ b/app/rest.py @@ -37,8 +37,11 @@ class SearchRequest(BaseModel): agent_id: str | None = None run_id: str | None = None limit: int = Field(default=10, ge=1, le=100) - # Opt-in recency boost. 0 = pure semantic similarity (unchanged behavior), - # 1 = order almost entirely by how recently a memory was created/updated. + # "semantic" (default, vector similarity) or "keyword" (case-insensitive + # substring match for exact terms semantic search misses). + mode: Literal["semantic", "keyword"] = "semantic" + # Opt-in recency boost (semantic mode only). 0 = pure semantic similarity + # (unchanged), 1 = order almost entirely by how recently a memory was touched. recency_weight: float = Field(default=0.0, ge=0.0, le=1.0) recency_half_life_days: float = Field(default=30.0, gt=0.0) @@ -72,8 +75,10 @@ def add_memory(req: AddMemoryRequest) -> dict: @router.post("/memories/search") def search_memories(req: SearchRequest) -> dict: - memory = memory_mod.get_memory() filters = _scope_kwargs(req.user_id, req.agent_id, req.run_id) + if req.mode == "keyword": + return memory_mod.keyword_search(req.query, user_id=filters["user_id"], limit=req.limit) + memory = memory_mod.get_memory() results = memory.search(query=req.query, filters=filters, top_k=req.limit) return rerank_by_recency(results, req.recency_weight, req.recency_half_life_days) diff --git a/docs/DEVELOPER_GUIDE.md b/docs/DEVELOPER_GUIDE.md index 9807ed1..8e48a9f 100644 --- a/docs/DEVELOPER_GUIDE.md +++ b/docs/DEVELOPER_GUIDE.md @@ -63,7 +63,10 @@ app/ cheap content-fingerprint dedup: it SHA-256s the normalized raw input, stores it in the `content_fp` payload field, and skips the LLM extraction if a memory with that fingerprint already exists (fail-open — a lookup error just proceeds). - The most tweak-prone file. + keyword_search() is the substring-match fallback behind search mode="keyword": + it scans the user's memories via vector_store.list() and matches the query as a + case-insensitive substring of the `data` payload (fail-open). The most tweak-prone + file. mcp_server.py build_mcp(): the six MCP tools, each thinly wrapping a mem0 op with user_id defaulted to MEM0_DEFAULT_USER_ID. rest.py REST router under /api/v1 (mounted with prefix in main.py). Pydantic request diff --git a/docs/USER_GUIDE.md b/docs/USER_GUIDE.md index d66bf47..877dc4e 100644 --- a/docs/USER_GUIDE.md +++ b/docs/USER_GUIDE.md @@ -542,6 +542,23 @@ When `recency_weight > 0`, each returned result carries a `rerank_score` showing the blended similarity-plus-recency value it was sorted by. The MCP `search_memories` tool accepts the same `recency_weight` argument. +**Keyword search (optional).** Semantic search ranks by *meaning*, which can miss +an exact term — a name, identifier, URL, or rare token. Pass `"mode": "keyword"` +to instead do a **case-insensitive substring match** over memory text, returning +the most recent matches first: + +```bash +curl -X POST https://mem0.your-domain.com/api/v1/memories/search \ + -H "Authorization: Bearer $MEM0_API_KEY" -H "Content-Type: application/json" \ + -d '{"query": "Philips Hue", "mode": "keyword"}' +``` + +The default is `"mode": "semantic"`. The MCP `search_memories` tool accepts the +same `mode` argument. Keyword mode spans the whole user store and scans up to a +few thousand of the most recent memories per query — ample for a personal store; +it's a literal-match fallback, not a replacement for semantic retrieval. +(`recency_weight` applies to semantic mode only.) + ### List memories — `GET /api/v1/memories` Query params: `agent_id`, `run_id`, `user_id`, `limit` (1–100, default 50). diff --git a/tests/test_mcp.py b/tests/test_mcp.py index 59f8292..5a76f5c 100644 --- a/tests/test_mcp.py +++ b/tests/test_mcp.py @@ -70,6 +70,18 @@ async def test_search_exposes_recency_weight(mcp): tools = {t.name: t for t in await client.list_tools()} props = (tools["search_memories"].inputSchema or {}).get("properties", {}) assert "recency_weight" in props + assert "mode" in props # keyword vs semantic + + +async def test_search_keyword_mode_uses_listing(mcp, mem): + from types import SimpleNamespace + + point = SimpleNamespace(id="1", payload={"data": "Philips hub", "created_at": "2026-06-01T00:00:00+00:00"}) # noqa: E501 + mem.vector_store.list.return_value = ([point], None) + async with Client(mcp) as client: + await client.call_tool("search_memories", {"query": "philips", "mode": "keyword"}) + mem.search.assert_not_called() # keyword mode bypasses vector search + mem.vector_store.list.assert_called_once() async def test_search_with_recency_weight_invokes_mem(mcp, mem): diff --git a/tests/test_memory.py b/tests/test_memory.py index e680526..d54dfba 100644 --- a/tests/test_memory.py +++ b/tests/test_memory.py @@ -7,9 +7,14 @@ _existing_fingerprint_id, add_memory, content_fingerprint, + keyword_search, ) +def _point(id, data, created_at="2026-06-01T00:00:00+00:00", **extra): + return SimpleNamespace(id=id, payload={"data": data, "created_at": created_at, **extra}) + + def test_build_config_shape(): cfg = _build_config(Settings()) @@ -137,3 +142,69 @@ def test_add_memory_merges_existing_metadata(monkeypatch): _, kwargs = fake.add.call_args assert kwargs["metadata"]["source"] == "import" assert "content_fp" in kwargs["metadata"] + + +# --- keyword_search ---------------------------------------------------------- + + +def _patch_keyword(monkeypatch, points): + import app.memory as m + + fake = MagicMock() + fake.vector_store.list.return_value = (points, None) + monkeypatch.setattr(m, "get_memory", lambda: fake) + return fake + + +def test_keyword_search_matches_case_insensitive_substring(monkeypatch): + fake = _patch_keyword( + monkeypatch, + [ + _point("1", "Ian uses Philips Hue lights", user_id="ian"), + _point("2", "Prefers oat milk in coffee"), + ], + ) + out = keyword_search("philips", user_id="ian") + assert [r["id"] for r in out["results"]] == ["1"] + assert out["results"][0]["memory"] == "Ian uses Philips Hue lights" + _, kwargs = fake.vector_store.list.call_args + assert kwargs["filters"] == {"user_id": "ian"} # scoped to the user + + +def test_keyword_search_sorts_recent_first_and_limits(monkeypatch): + _patch_keyword( + monkeypatch, + [ + _point("old", "alpha one", created_at="2020-01-01T00:00:00+00:00"), + _point("new", "alpha two", created_at="2026-06-01T00:00:00+00:00"), + _point("mid", "alpha three", created_at="2023-01-01T00:00:00+00:00"), + ], + ) + out = keyword_search("alpha", user_id="ian", limit=2) + assert [r["id"] for r in out["results"]] == ["new", "mid"] # newest first, capped at 2 + + +def test_keyword_search_drops_internal_fields_keeps_metadata(monkeypatch): + _patch_keyword( + monkeypatch, + [_point("1", "match me", agent_id="cli", text_lemmatized="match me")], + ) + result = keyword_search("match", user_id="ian")["results"][0] + assert result["memory"] == "match me" + assert result["agent_id"] == "cli" + assert result["created_at"] + assert "data" not in result and "text_lemmatized" not in result + + +def test_keyword_search_no_match_returns_empty(monkeypatch): + _patch_keyword(monkeypatch, [_point("1", "nothing relevant")]) + assert keyword_search("zzz", user_id="ian") == {"results": []} + + +def test_keyword_search_fails_open(monkeypatch): + import app.memory as m + + fake = MagicMock() + fake.vector_store.list.side_effect = RuntimeError("qdrant down") + monkeypatch.setattr(m, "get_memory", lambda: fake) + assert keyword_search("x", user_id="ian") == {"results": []} diff --git a/tests/test_rest.py b/tests/test_rest.py index 536e7ab..ecfc5e2 100644 --- a/tests/test_rest.py +++ b/tests/test_rest.py @@ -130,6 +130,32 @@ def test_search_recency_weight_out_of_range_rejected(app_instance, mem, auth_hea ) +def test_search_keyword_mode(app_instance, mem, auth_header): + from types import SimpleNamespace + + point = SimpleNamespace(id="1", payload={"data": "the Philips hub", "created_at": "2026-06-01T00:00:00+00:00"}) # noqa: E501 + mem.vector_store.list.return_value = ([point], None) + c = _client(app_instance) + resp = c.post( + "/api/v1/memories/search", + json={"query": "philips", "mode": "keyword"}, + headers=auth_header, + ) + assert resp.status_code == 200 + assert resp.json()["results"][0]["id"] == "1" + mem.search.assert_not_called() # keyword mode bypasses vector search + _, kwargs = mem.vector_store.list.call_args + assert kwargs["filters"] == {"user_id": "default-user"} + + +def test_search_invalid_mode_rejected(app_instance, mem, auth_header): + c = _client(app_instance) + resp = c.post( + "/api/v1/memories/search", json={"query": "x", "mode": "fuzzy"}, headers=auth_header + ) + assert resp.status_code == 422 + + def test_search_scoped_by_run_id(app_instance, mem, auth_header): mem.search.return_value = {"results": []} c = _client(app_instance) From dc5d049c36920fbbbeed0995f62ed915853e5b12 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 7 Jun 2026 23:20:40 +0000 Subject: [PATCH 2/2] Address keyword-search review: strip content_fp, guard empty query, robust ordering, validate MCP mode - app/memory.py: _point_to_result now also strips the internal `content_fp` dedup fingerprint from results. keyword_search trims the query and returns no results for an empty/whitespace query (instead of matching everything). Ordering now parses timestamps and prefers updated_at over created_at (via ranking._parse_timestamp), so it's correct regardless of tz representation and consistent with the recency-boost ranking. - app/mcp_server.py: search_memories validates `mode` and raises on unknown values, matching the REST API's strict 422 behavior. - tests: content_fp stripped, empty-query short-circuit (no store scan), updated_at-preferred ordering (incl. mixed Z/offset forms), MCP unknown mode raises ToolError. https://claude.ai/code/session_017835DVrvURaYnbQiPQwzue --- app/mcp_server.py | 2 ++ app/memory.py | 26 ++++++++++++++++++-------- tests/test_mcp.py | 9 +++++++++ tests/test_memory.py | 39 +++++++++++++++++++++++++++++++++++++-- 4 files changed, 66 insertions(+), 10 deletions(-) diff --git a/app/mcp_server.py b/app/mcp_server.py index 9fde2ac..644db7c 100644 --- a/app/mcp_server.py +++ b/app/mcp_server.py @@ -49,6 +49,8 @@ def search_memories( toward more recently created or updated memories. Leave it at 0 for pure relevance; raise it (e.g. 0.3) when the user asks what is *latest*. """ + if mode not in ("semantic", "keyword"): + raise ValueError(f"mode must be 'semantic' or 'keyword', got {mode!r}") if mode == "keyword": return memory_mod.keyword_search(query, user_id=default_user, limit=limit) results = memory.search(query=query, filters={"user_id": default_user}, top_k=limit) diff --git a/app/memory.py b/app/memory.py index dbbf733..4553a59 100644 --- a/app/memory.py +++ b/app/memory.py @@ -1,8 +1,10 @@ import hashlib import json +from datetime import UTC, datetime from functools import lru_cache from app.config import Settings, get_settings +from app.ranking import _parse_timestamp def _provider_config(model: str, api_key: str | None) -> dict: @@ -132,11 +134,20 @@ def add_memory(content, *, dedup: bool = True, **kwargs) -> dict: def _point_to_result(point) -> dict: """Shape a Qdrant point into a search-result dict (memory text + payload).""" payload = dict(getattr(point, "payload", None) or {}) - payload.pop("text_lemmatized", None) # internal BM25 helper — noise in results + # Drop internal plumbing that shouldn't surface in results. + payload.pop("text_lemmatized", None) # BM25 helper + payload.pop("content_fp", None) # dedup fingerprint memory_text = payload.pop("data", None) return {"id": getattr(point, "id", None), "memory": memory_text, **payload} +def _point_recency(point) -> datetime: + """Sort key for keyword results: updated_at (preferred) or created_at, parsed.""" + payload = getattr(point, "payload", None) or {} + ts = _parse_timestamp(payload.get("updated_at")) or _parse_timestamp(payload.get("created_at")) + return ts or datetime.min.replace(tzinfo=UTC) + + def keyword_search( query: str, *, @@ -151,8 +162,12 @@ def keyword_search( store's payload listing and matches `query` as a case-insensitive substring of each memory's text, returning the most recent matches first. Scoped by `user_id` only (it spans the whole user store, like the MCP read tools). - Fail-open: any store error returns no results. + An empty/whitespace query matches nothing. Fail-open: any store error + returns no results. """ + needle = query.strip().casefold() + if not needle: + return {"results": []} memory = get_memory() filters = {"user_id": user_id} if user_id else None try: @@ -160,16 +175,11 @@ def keyword_search( except Exception: return {"results": []} points = result[0] if isinstance(result, tuple) else result - needle = query.casefold() matches = [ point for point in (points or []) if isinstance((getattr(point, "payload", None) or {}).get("data"), str) and needle in point.payload["data"].casefold() ] - # ISO-8601 UTC timestamps sort chronologically as plain strings; newest first. - matches.sort( - key=lambda p: (getattr(p, "payload", None) or {}).get("created_at", ""), - reverse=True, - ) + matches.sort(key=_point_recency, reverse=True) # most recently touched first return {"results": [_point_to_result(p) for p in matches[:limit]]} diff --git a/tests/test_mcp.py b/tests/test_mcp.py index 5a76f5c..4308a8d 100644 --- a/tests/test_mcp.py +++ b/tests/test_mcp.py @@ -1,5 +1,6 @@ import pytest from fastmcp import Client +from fastmcp.exceptions import ToolError from app.mcp_server import build_mcp @@ -84,6 +85,14 @@ async def test_search_keyword_mode_uses_listing(mcp, mem): mem.vector_store.list.assert_called_once() +async def test_search_rejects_unknown_mode(mcp, mem): + # Unknown mode must error, matching the REST API's strict validation. + async with Client(mcp) as client: + with pytest.raises(ToolError): + await client.call_tool("search_memories", {"query": "x", "mode": "fuzzy"}) + mem.search.assert_not_called() + + async def test_search_with_recency_weight_invokes_mem(mcp, mem): mem.search.return_value = {"results": []} async with Client(mcp) as client: diff --git a/tests/test_memory.py b/tests/test_memory.py index d54dfba..95753ff 100644 --- a/tests/test_memory.py +++ b/tests/test_memory.py @@ -184,16 +184,45 @@ def test_keyword_search_sorts_recent_first_and_limits(monkeypatch): assert [r["id"] for r in out["results"]] == ["new", "mid"] # newest first, capped at 2 +def test_keyword_search_prefers_updated_at_for_ordering(monkeypatch): + # "old" was created later but "new" was updated more recently → "new" first. + _patch_keyword( + monkeypatch, + [ + _point("old", "alpha one", created_at="2026-06-05T00:00:00+00:00"), + _point( + "new", + "alpha two", + created_at="2020-01-01T00:00:00+00:00", + updated_at="2026-06-06T00:00:00Z", # note: Zulu form, different tz repr + ), + ], + ) + out = keyword_search("alpha", user_id="ian") + assert [r["id"] for r in out["results"]] == ["new", "old"] + + def test_keyword_search_drops_internal_fields_keeps_metadata(monkeypatch): _patch_keyword( monkeypatch, - [_point("1", "match me", agent_id="cli", text_lemmatized="match me")], + [ + _point( + "1", + "match me", + agent_id="cli", + text_lemmatized="match me", + content_fp="deadbeef", + ) + ], ) result = keyword_search("match", user_id="ian")["results"][0] assert result["memory"] == "match me" assert result["agent_id"] == "cli" assert result["created_at"] - assert "data" not in result and "text_lemmatized" not in result + # Internal plumbing must not leak into results. + assert "data" not in result + assert "text_lemmatized" not in result + assert "content_fp" not in result def test_keyword_search_no_match_returns_empty(monkeypatch): @@ -201,6 +230,12 @@ def test_keyword_search_no_match_returns_empty(monkeypatch): assert keyword_search("zzz", user_id="ian") == {"results": []} +def test_keyword_search_empty_query_matches_nothing(monkeypatch): + fake = _patch_keyword(monkeypatch, [_point("1", "anything")]) + assert keyword_search(" ", user_id="ian") == {"results": []} + fake.vector_store.list.assert_not_called() # short-circuits before scanning + + def test_keyword_search_fails_open(monkeypatch): import app.memory as m