From 238072493aab3d97a3fcdf9fe4b50392736f83fc Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 7 Jun 2026 23:02:43 +0000
Subject: [PATCH 1/2] Add keyword/substring search mode alongside semantic
 search
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Semantic search ranks by meaning and can miss exact terms (names, IDs, URLs,
rare tokens). Add an opt-in mode="keyword" to search (REST + MCP) that does a
case-insensitive substring match over memory text, newest matches first.
Default stays "semantic", so existing behavior is unchanged. Adapts the OB1
keyword-search idea.

Reuses the same vector_store.list() payload path already validated against live
Qdrant for the dedup feature — no new Qdrant capability or index required. The
match runs in Python over the user's memories (scoped by user_id), bounded by a
generous scan limit and fail-open (a store error returns no results).

- app/memory.py: keyword_search() + _point_to_result() shaping.
- app/rest.py: SearchRequest.mode ("semantic"|"keyword"); route keyword path.
- app/mcp_server.py: search_memories gains a mode arg (docstring guides the model
  to use keyword for exact terms).
- tests: keyword_search unit cases (substring/case, recency-first+limit, field
  shaping, no-match, fail-open) + REST and MCP integration + invalid-mode 422.
- docs: USER_GUIDE search section (keyword mode + scan-limit caveat),
  DEVELOPER_GUIDE memory.py description.

Closes #54.

https://claude.ai/code/session_017835DVrvURaYnbQiPQwzue
---
 app/mcp_server.py       | 19 +++++++----
 app/memory.py           | 52 ++++++++++++++++++++++++++++++
 app/rest.py             | 11 +++++--
 docs/DEVELOPER_GUIDE.md |  5 ++-
 docs/USER_GUIDE.md      | 17 ++++++++++
 tests/test_mcp.py       | 12 +++++++
 tests/test_memory.py    | 71 +++++++++++++++++++++++++++++++++++++++++
 tests/test_rest.py      | 26 +++++++++++++++
 8 files changed, 203 insertions(+), 10 deletions(-)

diff --git a/app/mcp_server.py b/app/mcp_server.py
index c7bdc71..9fde2ac 100644
--- a/app/mcp_server.py
+++ b/app/mcp_server.py
@@ -34,16 +34,23 @@ def add_memory(content: str, agent_id: str | None = None, metadata: dict | None
         return memory_mod.add_memory(content, **kwargs)
 
     @mcp.tool
-    def search_memories(query: str, limit: int = 10, recency_weight: float = 0.0) -> dict:
-        """Search long-term memory by semantic similarity.
+    def search_memories(
+        query: str, limit: int = 10, recency_weight: float = 0.0, mode: str = "semantic"
+    ) -> dict:
+        """Search long-term memory.
 
         Searches the single shared memory store for the user, across all agents.
 
-        recency_weight (0.0-1.0) optionally biases results toward more recently
-        created or updated memories. Leave it at 0 for pure semantic relevance;
-        raise it (e.g. 0.3) when the user asks what is *latest* or *current* and
-        recency matters more than an exact topical match.
+        mode: "semantic" (default) ranks by meaning/similarity. Use "keyword" for
+        a case-insensitive substring match when you need an exact term the
+        semantic search may miss — a name, identifier, URL, or rare token.
+
+        recency_weight (0.0-1.0, semantic mode only) optionally biases results
+        toward more recently created or updated memories. Leave it at 0 for pure
+        relevance; raise it (e.g. 0.3) when the user asks what is *latest*.
         """
+        if mode == "keyword":
+            return memory_mod.keyword_search(query, user_id=default_user, limit=limit)
         results = memory.search(query=query, filters={"user_id": default_user}, top_k=limit)
         return rerank_by_recency(results, recency_weight)
 
diff --git a/app/memory.py b/app/memory.py
index f042a86..dbbf733 100644
--- a/app/memory.py
+++ b/app/memory.py
@@ -121,3 +121,55 @@ def add_memory(content, *, dedup: bool = True, **kwargs) -> dict:
     metadata = dict(kwargs.pop("metadata", None) or {})
     metadata["content_fp"] = fingerprint
     return memory.add(content, metadata=metadata, **kwargs)
+
+
+# Upper bound on how many of the user's memories a keyword search scans in one
+# pass. Generous for a single-user store; keyword search is a literal-match
+# fallback, not the primary retrieval path.
+DEFAULT_KEYWORD_SCAN_LIMIT = 5000
+
+
+def _point_to_result(point) -> dict:
+    """Shape a Qdrant point into a search-result dict (memory text + payload)."""
+    payload = dict(getattr(point, "payload", None) or {})
+    payload.pop("text_lemmatized", None)  # internal BM25 helper — noise in results
+    memory_text = payload.pop("data", None)
+    return {"id": getattr(point, "id", None), "memory": memory_text, **payload}
+
+
+def keyword_search(
+    query: str,
+    *,
+    user_id: str | None = None,
+    limit: int = 10,
+    scan_limit: int = DEFAULT_KEYWORD_SCAN_LIMIT,
+) -> dict:
+    """Case-insensitive substring search over stored memory text.
+
+    A literal-match fallback for terms semantic search misses (names, IDs, URLs,
+    rare tokens). Scans up to `scan_limit` of the user's memories via the vector
+    store's payload listing and matches `query` as a case-insensitive substring
+    of each memory's text, returning the most recent matches first. Scoped by
+    `user_id` only (it spans the whole user store, like the MCP read tools).
+    Fail-open: any store error returns no results.
+    """
+    memory = get_memory()
+    filters = {"user_id": user_id} if user_id else None
+    try:
+        result = memory.vector_store.list(filters=filters, top_k=scan_limit)
+    except Exception:
+        return {"results": []}
+    points = result[0] if isinstance(result, tuple) else result
+    needle = query.casefold()
+    matches = [
+        point
+        for point in (points or [])
+        if isinstance((getattr(point, "payload", None) or {}).get("data"), str)
+        and needle in point.payload["data"].casefold()
+    ]
+    # ISO-8601 UTC timestamps sort chronologically as plain strings; newest first.
+    matches.sort(
+        key=lambda p: (getattr(p, "payload", None) or {}).get("created_at", ""),
+        reverse=True,
+    )
+    return {"results": [_point_to_result(p) for p in matches[:limit]]}
diff --git a/app/rest.py b/app/rest.py
index c7bd778..8ff8402 100644
--- a/app/rest.py
+++ b/app/rest.py
@@ -37,8 +37,11 @@ class SearchRequest(BaseModel):
     agent_id: str | None = None
     run_id: str | None = None
     limit: int = Field(default=10, ge=1, le=100)
-    # Opt-in recency boost. 0 = pure semantic similarity (unchanged behavior),
-    # 1 = order almost entirely by how recently a memory was created/updated.
+    # "semantic" (default, vector similarity) or "keyword" (case-insensitive
+    # substring match for exact terms semantic search misses).
+    mode: Literal["semantic", "keyword"] = "semantic"
+    # Opt-in recency boost (semantic mode only). 0 = pure semantic similarity
+    # (unchanged), 1 = order almost entirely by how recently a memory was touched.
     recency_weight: float = Field(default=0.0, ge=0.0, le=1.0)
     recency_half_life_days: float = Field(default=30.0, gt=0.0)
 
@@ -72,8 +75,10 @@ def add_memory(req: AddMemoryRequest) -> dict:
 
 @router.post("/memories/search")
 def search_memories(req: SearchRequest) -> dict:
-    memory = memory_mod.get_memory()
     filters = _scope_kwargs(req.user_id, req.agent_id, req.run_id)
+    if req.mode == "keyword":
+        return memory_mod.keyword_search(req.query, user_id=filters["user_id"], limit=req.limit)
+    memory = memory_mod.get_memory()
     results = memory.search(query=req.query, filters=filters, top_k=req.limit)
     return rerank_by_recency(results, req.recency_weight, req.recency_half_life_days)
 
diff --git a/docs/DEVELOPER_GUIDE.md b/docs/DEVELOPER_GUIDE.md
index 9807ed1..8e48a9f 100644
--- a/docs/DEVELOPER_GUIDE.md
+++ b/docs/DEVELOPER_GUIDE.md
@@ -63,7 +63,10 @@ app/
                     cheap content-fingerprint dedup: it SHA-256s the normalized raw input, stores
                     it in the `content_fp` payload field, and skips the LLM extraction if a memory
                     with that fingerprint already exists (fail-open — a lookup error just proceeds).
-                    The most tweak-prone file.
+                    keyword_search() is the substring-match fallback behind search mode="keyword":
+                    it scans the user's memories via vector_store.list() and matches the query as a
+                    case-insensitive substring of the `data` payload (fail-open). The most tweak-prone
+                    file.
   mcp_server.py     build_mcp(): the six MCP tools, each thinly wrapping a mem0 op with
                     user_id defaulted to MEM0_DEFAULT_USER_ID.
   rest.py           REST router under /api/v1 (mounted with prefix in main.py). Pydantic request
diff --git a/docs/USER_GUIDE.md b/docs/USER_GUIDE.md
index d66bf47..877dc4e 100644
--- a/docs/USER_GUIDE.md
+++ b/docs/USER_GUIDE.md
@@ -542,6 +542,23 @@ When `recency_weight > 0`, each returned result carries a `rerank_score` showing
 the blended similarity-plus-recency value it was sorted by. The MCP
 `search_memories` tool accepts the same `recency_weight` argument.
 
+**Keyword search (optional).** Semantic search ranks by *meaning*, which can miss
+an exact term — a name, identifier, URL, or rare token. Pass `"mode": "keyword"`
+to instead do a **case-insensitive substring match** over memory text, returning
+the most recent matches first:
+
+```bash
+curl -X POST https://mem0.your-domain.com/api/v1/memories/search \
+  -H "Authorization: Bearer $MEM0_API_KEY" -H "Content-Type: application/json" \
+  -d '{"query": "Philips Hue", "mode": "keyword"}'
+```
+
+The default is `"mode": "semantic"`. The MCP `search_memories` tool accepts the
+same `mode` argument. Keyword mode spans the whole user store and scans up to a
+few thousand of the most recent memories per query — ample for a personal store;
+it's a literal-match fallback, not a replacement for semantic retrieval.
+(`recency_weight` applies to semantic mode only.)
+
 ### List memories — `GET /api/v1/memories`
 
 Query params: `agent_id`, `run_id`, `user_id`, `limit` (1–100, default 50).
diff --git a/tests/test_mcp.py b/tests/test_mcp.py
index 59f8292..5a76f5c 100644
--- a/tests/test_mcp.py
+++ b/tests/test_mcp.py
@@ -70,6 +70,18 @@ async def test_search_exposes_recency_weight(mcp):
         tools = {t.name: t for t in await client.list_tools()}
     props = (tools["search_memories"].inputSchema or {}).get("properties", {})
     assert "recency_weight" in props
+    assert "mode" in props  # keyword vs semantic
+
+
+async def test_search_keyword_mode_uses_listing(mcp, mem):
+    from types import SimpleNamespace
+
+    point = SimpleNamespace(id="1", payload={"data": "Philips hub", "created_at": "2026-06-01T00:00:00+00:00"})  # noqa: E501
+    mem.vector_store.list.return_value = ([point], None)
+    async with Client(mcp) as client:
+        await client.call_tool("search_memories", {"query": "philips", "mode": "keyword"})
+    mem.search.assert_not_called()  # keyword mode bypasses vector search
+    mem.vector_store.list.assert_called_once()
 
 
 async def test_search_with_recency_weight_invokes_mem(mcp, mem):
diff --git a/tests/test_memory.py b/tests/test_memory.py
index e680526..d54dfba 100644
--- a/tests/test_memory.py
+++ b/tests/test_memory.py
@@ -7,9 +7,14 @@
     _existing_fingerprint_id,
     add_memory,
     content_fingerprint,
+    keyword_search,
 )
 
 
+def _point(id, data, created_at="2026-06-01T00:00:00+00:00", **extra):
+    return SimpleNamespace(id=id, payload={"data": data, "created_at": created_at, **extra})
+
+
 def test_build_config_shape():
     cfg = _build_config(Settings())
 
@@ -137,3 +142,69 @@ def test_add_memory_merges_existing_metadata(monkeypatch):
     _, kwargs = fake.add.call_args
     assert kwargs["metadata"]["source"] == "import"
     assert "content_fp" in kwargs["metadata"]
+
+
+# --- keyword_search ----------------------------------------------------------
+
+
+def _patch_keyword(monkeypatch, points):
+    import app.memory as m
+
+    fake = MagicMock()
+    fake.vector_store.list.return_value = (points, None)
+    monkeypatch.setattr(m, "get_memory", lambda: fake)
+    return fake
+
+
+def test_keyword_search_matches_case_insensitive_substring(monkeypatch):
+    fake = _patch_keyword(
+        monkeypatch,
+        [
+            _point("1", "Ian uses Philips Hue lights", user_id="ian"),
+            _point("2", "Prefers oat milk in coffee"),
+        ],
+    )
+    out = keyword_search("philips", user_id="ian")
+    assert [r["id"] for r in out["results"]] == ["1"]
+    assert out["results"][0]["memory"] == "Ian uses Philips Hue lights"
+    _, kwargs = fake.vector_store.list.call_args
+    assert kwargs["filters"] == {"user_id": "ian"}  # scoped to the user
+
+
+def test_keyword_search_sorts_recent_first_and_limits(monkeypatch):
+    _patch_keyword(
+        monkeypatch,
+        [
+            _point("old", "alpha one", created_at="2020-01-01T00:00:00+00:00"),
+            _point("new", "alpha two", created_at="2026-06-01T00:00:00+00:00"),
+            _point("mid", "alpha three", created_at="2023-01-01T00:00:00+00:00"),
+        ],
+    )
+    out = keyword_search("alpha", user_id="ian", limit=2)
+    assert [r["id"] for r in out["results"]] == ["new", "mid"]  # newest first, capped at 2
+
+
+def test_keyword_search_drops_internal_fields_keeps_metadata(monkeypatch):
+    _patch_keyword(
+        monkeypatch,
+        [_point("1", "match me", agent_id="cli", text_lemmatized="match me")],
+    )
+    result = keyword_search("match", user_id="ian")["results"][0]
+    assert result["memory"] == "match me"
+    assert result["agent_id"] == "cli"
+    assert result["created_at"]
+    assert "data" not in result and "text_lemmatized" not in result
+
+
+def test_keyword_search_no_match_returns_empty(monkeypatch):
+    _patch_keyword(monkeypatch, [_point("1", "nothing relevant")])
+    assert keyword_search("zzz", user_id="ian") == {"results": []}
+
+
+def test_keyword_search_fails_open(monkeypatch):
+    import app.memory as m
+
+    fake = MagicMock()
+    fake.vector_store.list.side_effect = RuntimeError("qdrant down")
+    monkeypatch.setattr(m, "get_memory", lambda: fake)
+    assert keyword_search("x", user_id="ian") == {"results": []}
diff --git a/tests/test_rest.py b/tests/test_rest.py
index 536e7ab..ecfc5e2 100644
--- a/tests/test_rest.py
+++ b/tests/test_rest.py
@@ -130,6 +130,32 @@ def test_search_recency_weight_out_of_range_rejected(app_instance, mem, auth_hea
     )
 
 
+def test_search_keyword_mode(app_instance, mem, auth_header):
+    from types import SimpleNamespace
+
+    point = SimpleNamespace(id="1", payload={"data": "the Philips hub", "created_at": "2026-06-01T00:00:00+00:00"})  # noqa: E501
+    mem.vector_store.list.return_value = ([point], None)
+    c = _client(app_instance)
+    resp = c.post(
+        "/api/v1/memories/search",
+        json={"query": "philips", "mode": "keyword"},
+        headers=auth_header,
+    )
+    assert resp.status_code == 200
+    assert resp.json()["results"][0]["id"] == "1"
+    mem.search.assert_not_called()  # keyword mode bypasses vector search
+    _, kwargs = mem.vector_store.list.call_args
+    assert kwargs["filters"] == {"user_id": "default-user"}
+
+
+def test_search_invalid_mode_rejected(app_instance, mem, auth_header):
+    c = _client(app_instance)
+    resp = c.post(
+        "/api/v1/memories/search", json={"query": "x", "mode": "fuzzy"}, headers=auth_header
+    )
+    assert resp.status_code == 422
+
+
 def test_search_scoped_by_run_id(app_instance, mem, auth_header):
     mem.search.return_value = {"results": []}
     c = _client(app_instance)

From dc5d049c36920fbbbeed0995f62ed915853e5b12 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 7 Jun 2026 23:20:40 +0000
Subject: [PATCH 2/2] Address keyword-search review: strip content_fp, guard
 empty query, robust ordering, validate MCP mode

- app/memory.py: _point_to_result now also strips the internal `content_fp`
  dedup fingerprint from results. keyword_search trims the query and returns no
  results for an empty/whitespace query (instead of matching everything).
  Ordering now parses timestamps and prefers updated_at over created_at (via
  ranking._parse_timestamp), so it's correct regardless of tz representation and
  consistent with the recency-boost ranking.
- app/mcp_server.py: search_memories validates `mode` and raises on unknown
  values, matching the REST API's strict 422 behavior.
- tests: content_fp stripped, empty-query short-circuit (no store scan),
  updated_at-preferred ordering (incl. mixed Z/offset forms), MCP unknown mode
  raises ToolError.

https://claude.ai/code/session_017835DVrvURaYnbQiPQwzue
---
 app/mcp_server.py    |  2 ++
 app/memory.py        | 26 ++++++++++++++++++--------
 tests/test_mcp.py    |  9 +++++++++
 tests/test_memory.py | 39 +++++++++++++++++++++++++++++++++++++--
 4 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/app/mcp_server.py b/app/mcp_server.py
index 9fde2ac..644db7c 100644
--- a/app/mcp_server.py
+++ b/app/mcp_server.py
@@ -49,6 +49,8 @@ def search_memories(
         toward more recently created or updated memories. Leave it at 0 for pure
         relevance; raise it (e.g. 0.3) when the user asks what is *latest*.
         """
+        if mode not in ("semantic", "keyword"):
+            raise ValueError(f"mode must be 'semantic' or 'keyword', got {mode!r}")
         if mode == "keyword":
             return memory_mod.keyword_search(query, user_id=default_user, limit=limit)
         results = memory.search(query=query, filters={"user_id": default_user}, top_k=limit)
diff --git a/app/memory.py b/app/memory.py
index dbbf733..4553a59 100644
--- a/app/memory.py
+++ b/app/memory.py
@@ -1,8 +1,10 @@
 import hashlib
 import json
+from datetime import UTC, datetime
 from functools import lru_cache
 
 from app.config import Settings, get_settings
+from app.ranking import _parse_timestamp
 
 
 def _provider_config(model: str, api_key: str | None) -> dict:
@@ -132,11 +134,20 @@ def add_memory(content, *, dedup: bool = True, **kwargs) -> dict:
 def _point_to_result(point) -> dict:
     """Shape a Qdrant point into a search-result dict (memory text + payload)."""
     payload = dict(getattr(point, "payload", None) or {})
-    payload.pop("text_lemmatized", None)  # internal BM25 helper — noise in results
+    # Drop internal plumbing that shouldn't surface in results.
+    payload.pop("text_lemmatized", None)  # BM25 helper
+    payload.pop("content_fp", None)  # dedup fingerprint
     memory_text = payload.pop("data", None)
     return {"id": getattr(point, "id", None), "memory": memory_text, **payload}
 
 
+def _point_recency(point) -> datetime:
+    """Sort key for keyword results: updated_at (preferred) or created_at, parsed."""
+    payload = getattr(point, "payload", None) or {}
+    ts = _parse_timestamp(payload.get("updated_at")) or _parse_timestamp(payload.get("created_at"))
+    return ts or datetime.min.replace(tzinfo=UTC)
+
+
 def keyword_search(
     query: str,
     *,
@@ -151,8 +162,12 @@ def keyword_search(
     store's payload listing and matches `query` as a case-insensitive substring
     of each memory's text, returning the most recent matches first. Scoped by
     `user_id` only (it spans the whole user store, like the MCP read tools).
-    Fail-open: any store error returns no results.
+    An empty/whitespace query matches nothing. Fail-open: any store error
+    returns no results.
     """
+    needle = query.strip().casefold()
+    if not needle:
+        return {"results": []}
     memory = get_memory()
     filters = {"user_id": user_id} if user_id else None
     try:
@@ -160,16 +175,11 @@ def keyword_search(
     except Exception:
         return {"results": []}
     points = result[0] if isinstance(result, tuple) else result
-    needle = query.casefold()
     matches = [
         point
         for point in (points or [])
         if isinstance((getattr(point, "payload", None) or {}).get("data"), str)
         and needle in point.payload["data"].casefold()
     ]
-    # ISO-8601 UTC timestamps sort chronologically as plain strings; newest first.
-    matches.sort(
-        key=lambda p: (getattr(p, "payload", None) or {}).get("created_at", ""),
-        reverse=True,
-    )
+    matches.sort(key=_point_recency, reverse=True)  # most recently touched first
     return {"results": [_point_to_result(p) for p in matches[:limit]]}
diff --git a/tests/test_mcp.py b/tests/test_mcp.py
index 5a76f5c..4308a8d 100644
--- a/tests/test_mcp.py
+++ b/tests/test_mcp.py
@@ -1,5 +1,6 @@
 import pytest
 from fastmcp import Client
+from fastmcp.exceptions import ToolError
 
 from app.mcp_server import build_mcp
 
@@ -84,6 +85,14 @@ async def test_search_keyword_mode_uses_listing(mcp, mem):
     mem.vector_store.list.assert_called_once()
 
 
+async def test_search_rejects_unknown_mode(mcp, mem):
+    # Unknown mode must error, matching the REST API's strict validation.
+    async with Client(mcp) as client:
+        with pytest.raises(ToolError):
+            await client.call_tool("search_memories", {"query": "x", "mode": "fuzzy"})
+    mem.search.assert_not_called()
+
+
 async def test_search_with_recency_weight_invokes_mem(mcp, mem):
     mem.search.return_value = {"results": []}
     async with Client(mcp) as client:
diff --git a/tests/test_memory.py b/tests/test_memory.py
index d54dfba..95753ff 100644
--- a/tests/test_memory.py
+++ b/tests/test_memory.py
@@ -184,16 +184,45 @@ def test_keyword_search_sorts_recent_first_and_limits(monkeypatch):
     assert [r["id"] for r in out["results"]] == ["new", "mid"]  # newest first, capped at 2
 
 
+def test_keyword_search_prefers_updated_at_for_ordering(monkeypatch):
+    # "old" was created later but "new" was updated more recently → "new" first.
+    _patch_keyword(
+        monkeypatch,
+        [
+            _point("old", "alpha one", created_at="2026-06-05T00:00:00+00:00"),
+            _point(
+                "new",
+                "alpha two",
+                created_at="2020-01-01T00:00:00+00:00",
+                updated_at="2026-06-06T00:00:00Z",  # note: Zulu form, different tz repr
+            ),
+        ],
+    )
+    out = keyword_search("alpha", user_id="ian")
+    assert [r["id"] for r in out["results"]] == ["new", "old"]
+
+
 def test_keyword_search_drops_internal_fields_keeps_metadata(monkeypatch):
     _patch_keyword(
         monkeypatch,
-        [_point("1", "match me", agent_id="cli", text_lemmatized="match me")],
+        [
+            _point(
+                "1",
+                "match me",
+                agent_id="cli",
+                text_lemmatized="match me",
+                content_fp="deadbeef",
+            )
+        ],
     )
     result = keyword_search("match", user_id="ian")["results"][0]
     assert result["memory"] == "match me"
     assert result["agent_id"] == "cli"
     assert result["created_at"]
-    assert "data" not in result and "text_lemmatized" not in result
+    # Internal plumbing must not leak into results.
+    assert "data" not in result
+    assert "text_lemmatized" not in result
+    assert "content_fp" not in result
 
 
 def test_keyword_search_no_match_returns_empty(monkeypatch):
@@ -201,6 +230,12 @@ def test_keyword_search_no_match_returns_empty(monkeypatch):
     assert keyword_search("zzz", user_id="ian") == {"results": []}
 
 
+def test_keyword_search_empty_query_matches_nothing(monkeypatch):
+    fake = _patch_keyword(monkeypatch, [_point("1", "anything")])
+    assert keyword_search("   ", user_id="ian") == {"results": []}
+    fake.vector_store.list.assert_not_called()  # short-circuits before scanning
+
+
 def test_keyword_search_fails_open(monkeypatch):
     import app.memory as m