imonroe · imonroe · Jun 7, 2026 · Jun 7, 2026 · Jun 7, 2026
diff --git a/app/mcp_server.py b/app/mcp_server.py
@@ -34,16 +34,25 @@ def add_memory(content: str, agent_id: str | None = None, metadata: dict | None
         return memory_mod.add_memory(content, **kwargs)
 
     @mcp.tool
-    def search_memories(query: str, limit: int = 10, recency_weight: float = 0.0) -> dict:
-        """Search long-term memory by semantic similarity.
+    def search_memories(
+        query: str, limit: int = 10, recency_weight: float = 0.0, mode: str = "semantic"
+    ) -> dict:
+        """Search long-term memory.
 
         Searches the single shared memory store for the user, across all agents.
 
-        recency_weight (0.0-1.0) optionally biases results toward more recently
-        created or updated memories. Leave it at 0 for pure semantic relevance;
-        raise it (e.g. 0.3) when the user asks what is *latest* or *current* and
-        recency matters more than an exact topical match.
+        mode: "semantic" (default) ranks by meaning/similarity. Use "keyword" for
+        a case-insensitive substring match when you need an exact term the
+        semantic search may miss — a name, identifier, URL, or rare token.
+
+        recency_weight (0.0-1.0, semantic mode only) optionally biases results
+        toward more recently created or updated memories. Leave it at 0 for pure
+        relevance; raise it (e.g. 0.3) when the user asks what is *latest*.
         """
+        if mode not in ("semantic", "keyword"):
+            raise ValueError(f"mode must be 'semantic' or 'keyword', got {mode!r}")
+        if mode == "keyword":
+            return memory_mod.keyword_search(query, user_id=default_user, limit=limit)
         results = memory.search(query=query, filters={"user_id": default_user}, top_k=limit)
         return rerank_by_recency(results, recency_weight)
 

diff --git a/app/memory.py b/app/memory.py
@@ -1,8 +1,10 @@
 import hashlib
 import json
+from datetime import UTC, datetime
 from functools import lru_cache
 
 from app.config import Settings, get_settings
+from app.ranking import _parse_timestamp
 
 
 def _provider_config(model: str, api_key: str | None) -> dict:
@@ -121,3 +123,63 @@ def add_memory(content, *, dedup: bool = True, **kwargs) -> dict:
     metadata = dict(kwargs.pop("metadata", None) or {})
     metadata["content_fp"] = fingerprint
     return memory.add(content, metadata=metadata, **kwargs)
+
+
+# Upper bound on how many of the user's memories a keyword search scans in one
+# pass. Generous for a single-user store; keyword search is a literal-match
+# fallback, not the primary retrieval path.
+DEFAULT_KEYWORD_SCAN_LIMIT = 5000
+
+
+def _point_to_result(point) -> dict:
+    """Shape a Qdrant point into a search-result dict (memory text + payload)."""
+    payload = dict(getattr(point, "payload", None) or {})
+    # Drop internal plumbing that shouldn't surface in results.
+    payload.pop("text_lemmatized", None)  # BM25 helper
+    payload.pop("content_fp", None)  # dedup fingerprint
+    memory_text = payload.pop("data", None)
+    return {"id": getattr(point, "id", None), "memory": memory_text, **payload}
+
+
+def _point_recency(point) -> datetime:
+    """Sort key for keyword results: updated_at (preferred) or created_at, parsed."""
+    payload = getattr(point, "payload", None) or {}
+    ts = _parse_timestamp(payload.get("updated_at")) or _parse_timestamp(payload.get("created_at"))
+    return ts or datetime.min.replace(tzinfo=UTC)
+
+
+def keyword_search(
+    query: str,
+    *,
+    user_id: str | None = None,
+    limit: int = 10,
+    scan_limit: int = DEFAULT_KEYWORD_SCAN_LIMIT,
+) -> dict:
+    """Case-insensitive substring search over stored memory text.
+
+    A literal-match fallback for terms semantic search misses (names, IDs, URLs,
+    rare tokens). Scans up to `scan_limit` of the user's memories via the vector
+    store's payload listing and matches `query` as a case-insensitive substring
+    of each memory's text, returning the most recent matches first. Scoped by
+    `user_id` only (it spans the whole user store, like the MCP read tools).
+    An empty/whitespace query matches nothing. Fail-open: any store error
+    returns no results.
+    """
+    needle = query.strip().casefold()
+    if not needle:
+        return {"results": []}
+    memory = get_memory()
+    filters = {"user_id": user_id} if user_id else None
+    try:
+        result = memory.vector_store.list(filters=filters, top_k=scan_limit)
+    except Exception:
+        return {"results": []}
+    points = result[0] if isinstance(result, tuple) else result
+    matches = [
+        point
+        for point in (points or [])
+        if isinstance((getattr(point, "payload", None) or {}).get("data"), str)
+        and needle in point.payload["data"].casefold()
+    ]
+    matches.sort(key=_point_recency, reverse=True)  # most recently touched first
+    return {"results": [_point_to_result(p) for p in matches[:limit]]}
diff --git a/app/rest.py b/app/rest.py
@@ -37,8 +37,11 @@ class SearchRequest(BaseModel):
     agent_id: str | None = None
     run_id: str | None = None
     limit: int = Field(default=10, ge=1, le=100)
-    # Opt-in recency boost. 0 = pure semantic similarity (unchanged behavior),
-    # 1 = order almost entirely by how recently a memory was created/updated.
+    # "semantic" (default, vector similarity) or "keyword" (case-insensitive
+    # substring match for exact terms semantic search misses).
+    mode: Literal["semantic", "keyword"] = "semantic"
+    # Opt-in recency boost (semantic mode only). 0 = pure semantic similarity
+    # (unchanged), 1 = order almost entirely by how recently a memory was touched.
     recency_weight: float = Field(default=0.0, ge=0.0, le=1.0)
     recency_half_life_days: float = Field(default=30.0, gt=0.0)
 
@@ -72,8 +75,10 @@ def add_memory(req: AddMemoryRequest) -> dict:
 
 @router.post("/memories/search")
 def search_memories(req: SearchRequest) -> dict:
-    memory = memory_mod.get_memory()
     filters = _scope_kwargs(req.user_id, req.agent_id, req.run_id)
+    if req.mode == "keyword":
+        return memory_mod.keyword_search(req.query, user_id=filters["user_id"], limit=req.limit)
+    memory = memory_mod.get_memory()
     results = memory.search(query=req.query, filters=filters, top_k=req.limit)
     return rerank_by_recency(results, req.recency_weight, req.recency_half_life_days)
 

diff --git a/docs/DEVELOPER_GUIDE.md b/docs/DEVELOPER_GUIDE.md
@@ -63,7 +63,10 @@ app/
                     cheap content-fingerprint dedup: it SHA-256s the normalized raw input, stores
                     it in the `content_fp` payload field, and skips the LLM extraction if a memory
                     with that fingerprint already exists (fail-open — a lookup error just proceeds).
-                    The most tweak-prone file.
+                    keyword_search() is the substring-match fallback behind search mode="keyword":
+                    it scans the user's memories via vector_store.list() and matches the query as a
+                    case-insensitive substring of the `data` payload (fail-open). The most tweak-prone
+                    file.
   mcp_server.py     build_mcp(): the six MCP tools, each thinly wrapping a mem0 op with
                     user_id defaulted to MEM0_DEFAULT_USER_ID.
   rest.py           REST router under /api/v1 (mounted with prefix in main.py). Pydantic request

diff --git a/docs/USER_GUIDE.md b/docs/USER_GUIDE.md
@@ -542,6 +542,23 @@ When `recency_weight > 0`, each returned result carries a `rerank_score` showing
 the blended similarity-plus-recency value it was sorted by. The MCP
 `search_memories` tool accepts the same `recency_weight` argument.
 
+**Keyword search (optional).** Semantic search ranks by *meaning*, which can miss
+an exact term — a name, identifier, URL, or rare token. Pass `"mode": "keyword"`
+to instead do a **case-insensitive substring match** over memory text, returning
+the most recent matches first:
+
+```bash
+curl -X POST https://mem0.your-domain.com/api/v1/memories/search \
+  -H "Authorization: Bearer $MEM0_API_KEY" -H "Content-Type: application/json" \
+  -d '{"query": "Philips Hue", "mode": "keyword"}'
+```
+
+The default is `"mode": "semantic"`. The MCP `search_memories` tool accepts the
+same `mode` argument. Keyword mode spans the whole user store and scans up to a
+few thousand of the most recent memories per query — ample for a personal store;
+it's a literal-match fallback, not a replacement for semantic retrieval.
+(`recency_weight` applies to semantic mode only.)
+
 ### List memories — `GET /api/v1/memories`
 
 Query params: `agent_id`, `run_id`, `user_id`, `limit` (1–100, default 50).

diff --git a/tests/test_mcp.py b/tests/test_mcp.py
@@ -1,5 +1,6 @@
 import pytest
 from fastmcp import Client
+from fastmcp.exceptions import ToolError
 
 from app.mcp_server import build_mcp
 
@@ -70,6 +71,26 @@ async def test_search_exposes_recency_weight(mcp):
         tools = {t.name: t for t in await client.list_tools()}
     props = (tools["search_memories"].inputSchema or {}).get("properties", {})
     assert "recency_weight" in props
+    assert "mode" in props  # keyword vs semantic
+
+
+async def test_search_keyword_mode_uses_listing(mcp, mem):
+    from types import SimpleNamespace
+
+    point = SimpleNamespace(id="1", payload={"data": "Philips hub", "created_at": "2026-06-01T00:00:00+00:00"})  # noqa: E501
+    mem.vector_store.list.return_value = ([point], None)
+    async with Client(mcp) as client:
+        await client.call_tool("search_memories", {"query": "philips", "mode": "keyword"})
+    mem.search.assert_not_called()  # keyword mode bypasses vector search
+    mem.vector_store.list.assert_called_once()
+
+
+async def test_search_rejects_unknown_mode(mcp, mem):
+    # Unknown mode must error, matching the REST API's strict validation.
+    async with Client(mcp) as client:
+        with pytest.raises(ToolError):
+            await client.call_tool("search_memories", {"query": "x", "mode": "fuzzy"})
+    mem.search.assert_not_called()
 
 
 async def test_search_with_recency_weight_invokes_mem(mcp, mem):

diff --git a/tests/test_memory.py b/tests/test_memory.py
@@ -7,9 +7,14 @@
     _existing_fingerprint_id,
     add_memory,
     content_fingerprint,
+    keyword_search,
 )
 
 
+def _point(id, data, created_at="2026-06-01T00:00:00+00:00", **extra):
+    return SimpleNamespace(id=id, payload={"data": data, "created_at": created_at, **extra})
+
+
 def test_build_config_shape():
     cfg = _build_config(Settings())
 
@@ -137,3 +142,104 @@ def test_add_memory_merges_existing_metadata(monkeypatch):
     _, kwargs = fake.add.call_args
     assert kwargs["metadata"]["source"] == "import"
     assert "content_fp" in kwargs["metadata"]
+
+
+# --- keyword_search ----------------------------------------------------------
+
+
+def _patch_keyword(monkeypatch, points):
+    import app.memory as m
+
+    fake = MagicMock()
+    fake.vector_store.list.return_value = (points, None)
+    monkeypatch.setattr(m, "get_memory", lambda: fake)
+    return fake
+
+
+def test_keyword_search_matches_case_insensitive_substring(monkeypatch):
+    fake = _patch_keyword(
+        monkeypatch,
+        [
+            _point("1", "Ian uses Philips Hue lights", user_id="ian"),
+            _point("2", "Prefers oat milk in coffee"),
+        ],
+    )
+    out = keyword_search("philips", user_id="ian")
+    assert [r["id"] for r in out["results"]] == ["1"]
+    assert out["results"][0]["memory"] == "Ian uses Philips Hue lights"
+    _, kwargs = fake.vector_store.list.call_args
+    assert kwargs["filters"] == {"user_id": "ian"}  # scoped to the user
+
+
+def test_keyword_search_sorts_recent_first_and_limits(monkeypatch):
+    _patch_keyword(
+        monkeypatch,
+        [
+            _point("old", "alpha one", created_at="2020-01-01T00:00:00+00:00"),
+            _point("new", "alpha two", created_at="2026-06-01T00:00:00+00:00"),
+            _point("mid", "alpha three", created_at="2023-01-01T00:00:00+00:00"),
+        ],
+    )
+    out = keyword_search("alpha", user_id="ian", limit=2)
+    assert [r["id"] for r in out["results"]] == ["new", "mid"]  # newest first, capped at 2
+
+
+def test_keyword_search_prefers_updated_at_for_ordering(monkeypatch):
+    # "old" was created later but "new" was updated more recently → "new" first.
+    _patch_keyword(
+        monkeypatch,
+        [
+            _point("old", "alpha one", created_at="2026-06-05T00:00:00+00:00"),
+            _point(
+                "new",
+                "alpha two",
+                created_at="2020-01-01T00:00:00+00:00",
+                updated_at="2026-06-06T00:00:00Z",  # note: Zulu form, different tz repr
+            ),
+        ],
+    )
+    out = keyword_search("alpha", user_id="ian")
+    assert [r["id"] for r in out["results"]] == ["new", "old"]
+
+
+def test_keyword_search_drops_internal_fields_keeps_metadata(monkeypatch):
+    _patch_keyword(
+        monkeypatch,
+        [
+            _point(
+                "1",
+                "match me",
+                agent_id="cli",
+                text_lemmatized="match me",
+                content_fp="deadbeef",
+            )
+        ],
+    )
+    result = keyword_search("match", user_id="ian")["results"][0]
+    assert result["memory"] == "match me"
+    assert result["agent_id"] == "cli"
+    assert result["created_at"]
+    # Internal plumbing must not leak into results.
+    assert "data" not in result
+    assert "text_lemmatized" not in result
+    assert "content_fp" not in result
+
+
+def test_keyword_search_no_match_returns_empty(monkeypatch):
+    _patch_keyword(monkeypatch, [_point("1", "nothing relevant")])
+    assert keyword_search("zzz", user_id="ian") == {"results": []}
+
+
+def test_keyword_search_empty_query_matches_nothing(monkeypatch):
+    fake = _patch_keyword(monkeypatch, [_point("1", "anything")])
+    assert keyword_search("   ", user_id="ian") == {"results": []}
+    fake.vector_store.list.assert_not_called()  # short-circuits before scanning
+
+
+def test_keyword_search_fails_open(monkeypatch):
+    import app.memory as m
+
+    fake = MagicMock()
+    fake.vector_store.list.side_effect = RuntimeError("qdrant down")
+    monkeypatch.setattr(m, "get_memory", lambda: fake)
+    assert keyword_search("x", user_id="ian") == {"results": []}
diff --git a/tests/test_rest.py b/tests/test_rest.py
@@ -130,6 +130,32 @@ def test_search_recency_weight_out_of_range_rejected(app_instance, mem, auth_hea
     )
 
 
+def test_search_keyword_mode(app_instance, mem, auth_header):
+    from types import SimpleNamespace
+
+    point = SimpleNamespace(id="1", payload={"data": "the Philips hub", "created_at": "2026-06-01T00:00:00+00:00"})  # noqa: E501
+    mem.vector_store.list.return_value = ([point], None)
+    c = _client(app_instance)
+    resp = c.post(
+        "/api/v1/memories/search",
+        json={"query": "philips", "mode": "keyword"},
+        headers=auth_header,
+    )
+    assert resp.status_code == 200
+    assert resp.json()["results"][0]["id"] == "1"
+    mem.search.assert_not_called()  # keyword mode bypasses vector search
+    _, kwargs = mem.vector_store.list.call_args
+    assert kwargs["filters"] == {"user_id": "default-user"}
+
+
+def test_search_invalid_mode_rejected(app_instance, mem, auth_header):
+    c = _client(app_instance)
+    resp = c.post(
+        "/api/v1/memories/search", json={"query": "x", "mode": "fuzzy"}, headers=auth_header
+    )
+    assert resp.status_code == 422
+
+
 def test_search_scoped_by_run_id(app_instance, mem, auth_header):
     mem.search.return_value = {"results": []}
     c = _client(app_instance)