diff --git a/app/mcp_server.py b/app/mcp_server.py index c7bdc71..644db7c 100644 --- a/app/mcp_server.py +++ b/app/mcp_server.py @@ -34,16 +34,25 @@ def add_memory(content: str, agent_id: str | None = None, metadata: dict | None return memory_mod.add_memory(content, **kwargs) @mcp.tool - def search_memories(query: str, limit: int = 10, recency_weight: float = 0.0) -> dict: - """Search long-term memory by semantic similarity. + def search_memories( + query: str, limit: int = 10, recency_weight: float = 0.0, mode: str = "semantic" + ) -> dict: + """Search long-term memory. Searches the single shared memory store for the user, across all agents. - recency_weight (0.0-1.0) optionally biases results toward more recently - created or updated memories. Leave it at 0 for pure semantic relevance; - raise it (e.g. 0.3) when the user asks what is *latest* or *current* and - recency matters more than an exact topical match. + mode: "semantic" (default) ranks by meaning/similarity. Use "keyword" for + a case-insensitive substring match when you need an exact term the + semantic search may miss — a name, identifier, URL, or rare token. + + recency_weight (0.0-1.0, semantic mode only) optionally biases results + toward more recently created or updated memories. Leave it at 0 for pure + relevance; raise it (e.g. 0.3) when the user asks what is *latest*. """ + if mode not in ("semantic", "keyword"): + raise ValueError(f"mode must be 'semantic' or 'keyword', got {mode!r}") + if mode == "keyword": + return memory_mod.keyword_search(query, user_id=default_user, limit=limit) results = memory.search(query=query, filters={"user_id": default_user}, top_k=limit) return rerank_by_recency(results, recency_weight) diff --git a/app/memory.py b/app/memory.py index f042a86..4553a59 100644 --- a/app/memory.py +++ b/app/memory.py @@ -1,8 +1,10 @@ import hashlib import json +from datetime import UTC, datetime from functools import lru_cache from app.config import Settings, get_settings +from app.ranking import _parse_timestamp def _provider_config(model: str, api_key: str | None) -> dict: @@ -121,3 +123,63 @@ def add_memory(content, *, dedup: bool = True, **kwargs) -> dict: metadata = dict(kwargs.pop("metadata", None) or {}) metadata["content_fp"] = fingerprint return memory.add(content, metadata=metadata, **kwargs) + + +# Upper bound on how many of the user's memories a keyword search scans in one +# pass. Generous for a single-user store; keyword search is a literal-match +# fallback, not the primary retrieval path. +DEFAULT_KEYWORD_SCAN_LIMIT = 5000 + + +def _point_to_result(point) -> dict: + """Shape a Qdrant point into a search-result dict (memory text + payload).""" + payload = dict(getattr(point, "payload", None) or {}) + # Drop internal plumbing that shouldn't surface in results. + payload.pop("text_lemmatized", None) # BM25 helper + payload.pop("content_fp", None) # dedup fingerprint + memory_text = payload.pop("data", None) + return {"id": getattr(point, "id", None), "memory": memory_text, **payload} + + +def _point_recency(point) -> datetime: + """Sort key for keyword results: updated_at (preferred) or created_at, parsed.""" + payload = getattr(point, "payload", None) or {} + ts = _parse_timestamp(payload.get("updated_at")) or _parse_timestamp(payload.get("created_at")) + return ts or datetime.min.replace(tzinfo=UTC) + + +def keyword_search( + query: str, + *, + user_id: str | None = None, + limit: int = 10, + scan_limit: int = DEFAULT_KEYWORD_SCAN_LIMIT, +) -> dict: + """Case-insensitive substring search over stored memory text. + + A literal-match fallback for terms semantic search misses (names, IDs, URLs, + rare tokens). Scans up to `scan_limit` of the user's memories via the vector + store's payload listing and matches `query` as a case-insensitive substring + of each memory's text, returning the most recent matches first. Scoped by + `user_id` only (it spans the whole user store, like the MCP read tools). + An empty/whitespace query matches nothing. Fail-open: any store error + returns no results. + """ + needle = query.strip().casefold() + if not needle: + return {"results": []} + memory = get_memory() + filters = {"user_id": user_id} if user_id else None + try: + result = memory.vector_store.list(filters=filters, top_k=scan_limit) + except Exception: + return {"results": []} + points = result[0] if isinstance(result, tuple) else result + matches = [ + point + for point in (points or []) + if isinstance((getattr(point, "payload", None) or {}).get("data"), str) + and needle in point.payload["data"].casefold() + ] + matches.sort(key=_point_recency, reverse=True) # most recently touched first + return {"results": [_point_to_result(p) for p in matches[:limit]]} diff --git a/app/rest.py b/app/rest.py index c7bd778..8ff8402 100644 --- a/app/rest.py +++ b/app/rest.py @@ -37,8 +37,11 @@ class SearchRequest(BaseModel): agent_id: str | None = None run_id: str | None = None limit: int = Field(default=10, ge=1, le=100) - # Opt-in recency boost. 0 = pure semantic similarity (unchanged behavior), - # 1 = order almost entirely by how recently a memory was created/updated. + # "semantic" (default, vector similarity) or "keyword" (case-insensitive + # substring match for exact terms semantic search misses). + mode: Literal["semantic", "keyword"] = "semantic" + # Opt-in recency boost (semantic mode only). 0 = pure semantic similarity + # (unchanged), 1 = order almost entirely by how recently a memory was touched. recency_weight: float = Field(default=0.0, ge=0.0, le=1.0) recency_half_life_days: float = Field(default=30.0, gt=0.0) @@ -72,8 +75,10 @@ def add_memory(req: AddMemoryRequest) -> dict: @router.post("/memories/search") def search_memories(req: SearchRequest) -> dict: - memory = memory_mod.get_memory() filters = _scope_kwargs(req.user_id, req.agent_id, req.run_id) + if req.mode == "keyword": + return memory_mod.keyword_search(req.query, user_id=filters["user_id"], limit=req.limit) + memory = memory_mod.get_memory() results = memory.search(query=req.query, filters=filters, top_k=req.limit) return rerank_by_recency(results, req.recency_weight, req.recency_half_life_days) diff --git a/docs/DEVELOPER_GUIDE.md b/docs/DEVELOPER_GUIDE.md index 9807ed1..8e48a9f 100644 --- a/docs/DEVELOPER_GUIDE.md +++ b/docs/DEVELOPER_GUIDE.md @@ -63,7 +63,10 @@ app/ cheap content-fingerprint dedup: it SHA-256s the normalized raw input, stores it in the `content_fp` payload field, and skips the LLM extraction if a memory with that fingerprint already exists (fail-open — a lookup error just proceeds). - The most tweak-prone file. + keyword_search() is the substring-match fallback behind search mode="keyword": + it scans the user's memories via vector_store.list() and matches the query as a + case-insensitive substring of the `data` payload (fail-open). The most tweak-prone + file. mcp_server.py build_mcp(): the six MCP tools, each thinly wrapping a mem0 op with user_id defaulted to MEM0_DEFAULT_USER_ID. rest.py REST router under /api/v1 (mounted with prefix in main.py). Pydantic request diff --git a/docs/USER_GUIDE.md b/docs/USER_GUIDE.md index d66bf47..877dc4e 100644 --- a/docs/USER_GUIDE.md +++ b/docs/USER_GUIDE.md @@ -542,6 +542,23 @@ When `recency_weight > 0`, each returned result carries a `rerank_score` showing the blended similarity-plus-recency value it was sorted by. The MCP `search_memories` tool accepts the same `recency_weight` argument. +**Keyword search (optional).** Semantic search ranks by *meaning*, which can miss +an exact term — a name, identifier, URL, or rare token. Pass `"mode": "keyword"` +to instead do a **case-insensitive substring match** over memory text, returning +the most recent matches first: + +```bash +curl -X POST https://mem0.your-domain.com/api/v1/memories/search \ + -H "Authorization: Bearer $MEM0_API_KEY" -H "Content-Type: application/json" \ + -d '{"query": "Philips Hue", "mode": "keyword"}' +``` + +The default is `"mode": "semantic"`. The MCP `search_memories` tool accepts the +same `mode` argument. Keyword mode spans the whole user store and scans up to a +few thousand of the most recent memories per query — ample for a personal store; +it's a literal-match fallback, not a replacement for semantic retrieval. +(`recency_weight` applies to semantic mode only.) + ### List memories — `GET /api/v1/memories` Query params: `agent_id`, `run_id`, `user_id`, `limit` (1–100, default 50). diff --git a/tests/test_mcp.py b/tests/test_mcp.py index 59f8292..4308a8d 100644 --- a/tests/test_mcp.py +++ b/tests/test_mcp.py @@ -1,5 +1,6 @@ import pytest from fastmcp import Client +from fastmcp.exceptions import ToolError from app.mcp_server import build_mcp @@ -70,6 +71,26 @@ async def test_search_exposes_recency_weight(mcp): tools = {t.name: t for t in await client.list_tools()} props = (tools["search_memories"].inputSchema or {}).get("properties", {}) assert "recency_weight" in props + assert "mode" in props # keyword vs semantic + + +async def test_search_keyword_mode_uses_listing(mcp, mem): + from types import SimpleNamespace + + point = SimpleNamespace(id="1", payload={"data": "Philips hub", "created_at": "2026-06-01T00:00:00+00:00"}) # noqa: E501 + mem.vector_store.list.return_value = ([point], None) + async with Client(mcp) as client: + await client.call_tool("search_memories", {"query": "philips", "mode": "keyword"}) + mem.search.assert_not_called() # keyword mode bypasses vector search + mem.vector_store.list.assert_called_once() + + +async def test_search_rejects_unknown_mode(mcp, mem): + # Unknown mode must error, matching the REST API's strict validation. + async with Client(mcp) as client: + with pytest.raises(ToolError): + await client.call_tool("search_memories", {"query": "x", "mode": "fuzzy"}) + mem.search.assert_not_called() async def test_search_with_recency_weight_invokes_mem(mcp, mem): diff --git a/tests/test_memory.py b/tests/test_memory.py index e680526..95753ff 100644 --- a/tests/test_memory.py +++ b/tests/test_memory.py @@ -7,9 +7,14 @@ _existing_fingerprint_id, add_memory, content_fingerprint, + keyword_search, ) +def _point(id, data, created_at="2026-06-01T00:00:00+00:00", **extra): + return SimpleNamespace(id=id, payload={"data": data, "created_at": created_at, **extra}) + + def test_build_config_shape(): cfg = _build_config(Settings()) @@ -137,3 +142,104 @@ def test_add_memory_merges_existing_metadata(monkeypatch): _, kwargs = fake.add.call_args assert kwargs["metadata"]["source"] == "import" assert "content_fp" in kwargs["metadata"] + + +# --- keyword_search ---------------------------------------------------------- + + +def _patch_keyword(monkeypatch, points): + import app.memory as m + + fake = MagicMock() + fake.vector_store.list.return_value = (points, None) + monkeypatch.setattr(m, "get_memory", lambda: fake) + return fake + + +def test_keyword_search_matches_case_insensitive_substring(monkeypatch): + fake = _patch_keyword( + monkeypatch, + [ + _point("1", "Ian uses Philips Hue lights", user_id="ian"), + _point("2", "Prefers oat milk in coffee"), + ], + ) + out = keyword_search("philips", user_id="ian") + assert [r["id"] for r in out["results"]] == ["1"] + assert out["results"][0]["memory"] == "Ian uses Philips Hue lights" + _, kwargs = fake.vector_store.list.call_args + assert kwargs["filters"] == {"user_id": "ian"} # scoped to the user + + +def test_keyword_search_sorts_recent_first_and_limits(monkeypatch): + _patch_keyword( + monkeypatch, + [ + _point("old", "alpha one", created_at="2020-01-01T00:00:00+00:00"), + _point("new", "alpha two", created_at="2026-06-01T00:00:00+00:00"), + _point("mid", "alpha three", created_at="2023-01-01T00:00:00+00:00"), + ], + ) + out = keyword_search("alpha", user_id="ian", limit=2) + assert [r["id"] for r in out["results"]] == ["new", "mid"] # newest first, capped at 2 + + +def test_keyword_search_prefers_updated_at_for_ordering(monkeypatch): + # "old" was created later but "new" was updated more recently → "new" first. + _patch_keyword( + monkeypatch, + [ + _point("old", "alpha one", created_at="2026-06-05T00:00:00+00:00"), + _point( + "new", + "alpha two", + created_at="2020-01-01T00:00:00+00:00", + updated_at="2026-06-06T00:00:00Z", # note: Zulu form, different tz repr + ), + ], + ) + out = keyword_search("alpha", user_id="ian") + assert [r["id"] for r in out["results"]] == ["new", "old"] + + +def test_keyword_search_drops_internal_fields_keeps_metadata(monkeypatch): + _patch_keyword( + monkeypatch, + [ + _point( + "1", + "match me", + agent_id="cli", + text_lemmatized="match me", + content_fp="deadbeef", + ) + ], + ) + result = keyword_search("match", user_id="ian")["results"][0] + assert result["memory"] == "match me" + assert result["agent_id"] == "cli" + assert result["created_at"] + # Internal plumbing must not leak into results. + assert "data" not in result + assert "text_lemmatized" not in result + assert "content_fp" not in result + + +def test_keyword_search_no_match_returns_empty(monkeypatch): + _patch_keyword(monkeypatch, [_point("1", "nothing relevant")]) + assert keyword_search("zzz", user_id="ian") == {"results": []} + + +def test_keyword_search_empty_query_matches_nothing(monkeypatch): + fake = _patch_keyword(monkeypatch, [_point("1", "anything")]) + assert keyword_search(" ", user_id="ian") == {"results": []} + fake.vector_store.list.assert_not_called() # short-circuits before scanning + + +def test_keyword_search_fails_open(monkeypatch): + import app.memory as m + + fake = MagicMock() + fake.vector_store.list.side_effect = RuntimeError("qdrant down") + monkeypatch.setattr(m, "get_memory", lambda: fake) + assert keyword_search("x", user_id="ian") == {"results": []} diff --git a/tests/test_rest.py b/tests/test_rest.py index 536e7ab..ecfc5e2 100644 --- a/tests/test_rest.py +++ b/tests/test_rest.py @@ -130,6 +130,32 @@ def test_search_recency_weight_out_of_range_rejected(app_instance, mem, auth_hea ) +def test_search_keyword_mode(app_instance, mem, auth_header): + from types import SimpleNamespace + + point = SimpleNamespace(id="1", payload={"data": "the Philips hub", "created_at": "2026-06-01T00:00:00+00:00"}) # noqa: E501 + mem.vector_store.list.return_value = ([point], None) + c = _client(app_instance) + resp = c.post( + "/api/v1/memories/search", + json={"query": "philips", "mode": "keyword"}, + headers=auth_header, + ) + assert resp.status_code == 200 + assert resp.json()["results"][0]["id"] == "1" + mem.search.assert_not_called() # keyword mode bypasses vector search + _, kwargs = mem.vector_store.list.call_args + assert kwargs["filters"] == {"user_id": "default-user"} + + +def test_search_invalid_mode_rejected(app_instance, mem, auth_header): + c = _client(app_instance) + resp = c.post( + "/api/v1/memories/search", json={"query": "x", "mode": "fuzzy"}, headers=auth_header + ) + assert resp.status_code == 422 + + def test_search_scoped_by_run_id(app_instance, mem, auth_header): mem.search.return_value = {"results": []} c = _client(app_instance)