Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions app/mcp_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,25 @@ def add_memory(content: str, agent_id: str | None = None, metadata: dict | None
return memory_mod.add_memory(content, **kwargs)

@mcp.tool
def search_memories(query: str, limit: int = 10, recency_weight: float = 0.0) -> dict:
"""Search long-term memory by semantic similarity.
def search_memories(
query: str, limit: int = 10, recency_weight: float = 0.0, mode: str = "semantic"
) -> dict:
"""Search long-term memory.

Searches the single shared memory store for the user, across all agents.

recency_weight (0.0-1.0) optionally biases results toward more recently
created or updated memories. Leave it at 0 for pure semantic relevance;
raise it (e.g. 0.3) when the user asks what is *latest* or *current* and
recency matters more than an exact topical match.
mode: "semantic" (default) ranks by meaning/similarity. Use "keyword" for
a case-insensitive substring match when you need an exact term the
semantic search may miss — a name, identifier, URL, or rare token.

recency_weight (0.0-1.0, semantic mode only) optionally biases results
toward more recently created or updated memories. Leave it at 0 for pure
relevance; raise it (e.g. 0.3) when the user asks what is *latest*.
"""
if mode not in ("semantic", "keyword"):
raise ValueError(f"mode must be 'semantic' or 'keyword', got {mode!r}")
if mode == "keyword":
return memory_mod.keyword_search(query, user_id=default_user, limit=limit)
results = memory.search(query=query, filters={"user_id": default_user}, top_k=limit)
return rerank_by_recency(results, recency_weight)
Comment thread
imonroe marked this conversation as resolved.

Expand Down
62 changes: 62 additions & 0 deletions app/memory.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import hashlib
import json
from datetime import UTC, datetime
from functools import lru_cache

from app.config import Settings, get_settings
from app.ranking import _parse_timestamp


def _provider_config(model: str, api_key: str | None) -> dict:
Expand Down Expand Up @@ -121,3 +123,63 @@ def add_memory(content, *, dedup: bool = True, **kwargs) -> dict:
metadata = dict(kwargs.pop("metadata", None) or {})
metadata["content_fp"] = fingerprint
return memory.add(content, metadata=metadata, **kwargs)


# Upper bound on how many of the user's memories a keyword search scans in one
# pass. Generous for a single-user store; keyword search is a literal-match
# fallback, not the primary retrieval path.
DEFAULT_KEYWORD_SCAN_LIMIT = 5000


def _point_to_result(point) -> dict:
"""Shape a Qdrant point into a search-result dict (memory text + payload)."""
payload = dict(getattr(point, "payload", None) or {})
# Drop internal plumbing that shouldn't surface in results.
payload.pop("text_lemmatized", None) # BM25 helper
payload.pop("content_fp", None) # dedup fingerprint
memory_text = payload.pop("data", None)
return {"id": getattr(point, "id", None), "memory": memory_text, **payload}
Comment thread
imonroe marked this conversation as resolved.


def _point_recency(point) -> datetime:
"""Sort key for keyword results: updated_at (preferred) or created_at, parsed."""
payload = getattr(point, "payload", None) or {}
ts = _parse_timestamp(payload.get("updated_at")) or _parse_timestamp(payload.get("created_at"))
return ts or datetime.min.replace(tzinfo=UTC)


def keyword_search(
query: str,
*,
user_id: str | None = None,
limit: int = 10,
scan_limit: int = DEFAULT_KEYWORD_SCAN_LIMIT,
) -> dict:
"""Case-insensitive substring search over stored memory text.

A literal-match fallback for terms semantic search misses (names, IDs, URLs,
rare tokens). Scans up to `scan_limit` of the user's memories via the vector
store's payload listing and matches `query` as a case-insensitive substring
of each memory's text, returning the most recent matches first. Scoped by
`user_id` only (it spans the whole user store, like the MCP read tools).
An empty/whitespace query matches nothing. Fail-open: any store error
returns no results.
"""
needle = query.strip().casefold()
if not needle:
return {"results": []}
memory = get_memory()
filters = {"user_id": user_id} if user_id else None
try:
result = memory.vector_store.list(filters=filters, top_k=scan_limit)
except Exception:
return {"results": []}
points = result[0] if isinstance(result, tuple) else result
matches = [
point
for point in (points or [])
if isinstance((getattr(point, "payload", None) or {}).get("data"), str)
and needle in point.payload["data"].casefold()
]
Comment thread
imonroe marked this conversation as resolved.
matches.sort(key=_point_recency, reverse=True) # most recently touched first
return {"results": [_point_to_result(p) for p in matches[:limit]]}
11 changes: 8 additions & 3 deletions app/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,11 @@ class SearchRequest(BaseModel):
agent_id: str | None = None
run_id: str | None = None
limit: int = Field(default=10, ge=1, le=100)
# Opt-in recency boost. 0 = pure semantic similarity (unchanged behavior),
# 1 = order almost entirely by how recently a memory was created/updated.
# "semantic" (default, vector similarity) or "keyword" (case-insensitive
# substring match for exact terms semantic search misses).
mode: Literal["semantic", "keyword"] = "semantic"
# Opt-in recency boost (semantic mode only). 0 = pure semantic similarity
# (unchanged), 1 = order almost entirely by how recently a memory was touched.
recency_weight: float = Field(default=0.0, ge=0.0, le=1.0)
recency_half_life_days: float = Field(default=30.0, gt=0.0)

Expand Down Expand Up @@ -72,8 +75,10 @@ def add_memory(req: AddMemoryRequest) -> dict:

@router.post("/memories/search")
def search_memories(req: SearchRequest) -> dict:
memory = memory_mod.get_memory()
filters = _scope_kwargs(req.user_id, req.agent_id, req.run_id)
if req.mode == "keyword":
return memory_mod.keyword_search(req.query, user_id=filters["user_id"], limit=req.limit)
memory = memory_mod.get_memory()
results = memory.search(query=req.query, filters=filters, top_k=req.limit)
return rerank_by_recency(results, req.recency_weight, req.recency_half_life_days)

Expand Down
5 changes: 4 additions & 1 deletion docs/DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ app/
cheap content-fingerprint dedup: it SHA-256s the normalized raw input, stores
it in the `content_fp` payload field, and skips the LLM extraction if a memory
with that fingerprint already exists (fail-open — a lookup error just proceeds).
The most tweak-prone file.
keyword_search() is the substring-match fallback behind search mode="keyword":
it scans the user's memories via vector_store.list() and matches the query as a
case-insensitive substring of the `data` payload (fail-open). The most tweak-prone
file.
mcp_server.py build_mcp(): the six MCP tools, each thinly wrapping a mem0 op with
user_id defaulted to MEM0_DEFAULT_USER_ID.
rest.py REST router under /api/v1 (mounted with prefix in main.py). Pydantic request
Expand Down
17 changes: 17 additions & 0 deletions docs/USER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,23 @@ When `recency_weight > 0`, each returned result carries a `rerank_score` showing
the blended similarity-plus-recency value it was sorted by. The MCP
`search_memories` tool accepts the same `recency_weight` argument.

**Keyword search (optional).** Semantic search ranks by *meaning*, which can miss
an exact term — a name, identifier, URL, or rare token. Pass `"mode": "keyword"`
to instead do a **case-insensitive substring match** over memory text, returning
the most recent matches first:

```bash
curl -X POST https://mem0.your-domain.com/api/v1/memories/search \
-H "Authorization: Bearer $MEM0_API_KEY" -H "Content-Type: application/json" \
-d '{"query": "Philips Hue", "mode": "keyword"}'
```

The default is `"mode": "semantic"`. The MCP `search_memories` tool accepts the
same `mode` argument. Keyword mode spans the whole user store and scans up to a
few thousand of the most recent memories per query — ample for a personal store;
it's a literal-match fallback, not a replacement for semantic retrieval.
(`recency_weight` applies to semantic mode only.)

### List memories — `GET /api/v1/memories`

Query params: `agent_id`, `run_id`, `user_id`, `limit` (1–100, default 50).
Expand Down
21 changes: 21 additions & 0 deletions tests/test_mcp.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
from fastmcp import Client
from fastmcp.exceptions import ToolError

from app.mcp_server import build_mcp

Expand Down Expand Up @@ -70,6 +71,26 @@ async def test_search_exposes_recency_weight(mcp):
tools = {t.name: t for t in await client.list_tools()}
props = (tools["search_memories"].inputSchema or {}).get("properties", {})
assert "recency_weight" in props
assert "mode" in props # keyword vs semantic


async def test_search_keyword_mode_uses_listing(mcp, mem):
from types import SimpleNamespace

point = SimpleNamespace(id="1", payload={"data": "Philips hub", "created_at": "2026-06-01T00:00:00+00:00"}) # noqa: E501
mem.vector_store.list.return_value = ([point], None)
async with Client(mcp) as client:
await client.call_tool("search_memories", {"query": "philips", "mode": "keyword"})
mem.search.assert_not_called() # keyword mode bypasses vector search
mem.vector_store.list.assert_called_once()


async def test_search_rejects_unknown_mode(mcp, mem):
# Unknown mode must error, matching the REST API's strict validation.
async with Client(mcp) as client:
with pytest.raises(ToolError):
await client.call_tool("search_memories", {"query": "x", "mode": "fuzzy"})
mem.search.assert_not_called()


async def test_search_with_recency_weight_invokes_mem(mcp, mem):
Expand Down
106 changes: 106 additions & 0 deletions tests/test_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,14 @@
_existing_fingerprint_id,
add_memory,
content_fingerprint,
keyword_search,
)


def _point(id, data, created_at="2026-06-01T00:00:00+00:00", **extra):
return SimpleNamespace(id=id, payload={"data": data, "created_at": created_at, **extra})


def test_build_config_shape():
cfg = _build_config(Settings())

Expand Down Expand Up @@ -137,3 +142,104 @@ def test_add_memory_merges_existing_metadata(monkeypatch):
_, kwargs = fake.add.call_args
assert kwargs["metadata"]["source"] == "import"
assert "content_fp" in kwargs["metadata"]


# --- keyword_search ----------------------------------------------------------


def _patch_keyword(monkeypatch, points):
import app.memory as m

fake = MagicMock()
fake.vector_store.list.return_value = (points, None)
monkeypatch.setattr(m, "get_memory", lambda: fake)
return fake


def test_keyword_search_matches_case_insensitive_substring(monkeypatch):
fake = _patch_keyword(
monkeypatch,
[
_point("1", "Ian uses Philips Hue lights", user_id="ian"),
_point("2", "Prefers oat milk in coffee"),
],
)
out = keyword_search("philips", user_id="ian")
assert [r["id"] for r in out["results"]] == ["1"]
assert out["results"][0]["memory"] == "Ian uses Philips Hue lights"
_, kwargs = fake.vector_store.list.call_args
assert kwargs["filters"] == {"user_id": "ian"} # scoped to the user


def test_keyword_search_sorts_recent_first_and_limits(monkeypatch):
_patch_keyword(
monkeypatch,
[
_point("old", "alpha one", created_at="2020-01-01T00:00:00+00:00"),
_point("new", "alpha two", created_at="2026-06-01T00:00:00+00:00"),
_point("mid", "alpha three", created_at="2023-01-01T00:00:00+00:00"),
],
)
out = keyword_search("alpha", user_id="ian", limit=2)
assert [r["id"] for r in out["results"]] == ["new", "mid"] # newest first, capped at 2


def test_keyword_search_prefers_updated_at_for_ordering(monkeypatch):
# "old" was created later but "new" was updated more recently → "new" first.
_patch_keyword(
monkeypatch,
[
_point("old", "alpha one", created_at="2026-06-05T00:00:00+00:00"),
_point(
"new",
"alpha two",
created_at="2020-01-01T00:00:00+00:00",
updated_at="2026-06-06T00:00:00Z", # note: Zulu form, different tz repr
),
],
)
out = keyword_search("alpha", user_id="ian")
assert [r["id"] for r in out["results"]] == ["new", "old"]


def test_keyword_search_drops_internal_fields_keeps_metadata(monkeypatch):
_patch_keyword(
monkeypatch,
[
_point(
"1",
"match me",
agent_id="cli",
text_lemmatized="match me",
content_fp="deadbeef",
)
],
)
result = keyword_search("match", user_id="ian")["results"][0]
assert result["memory"] == "match me"
assert result["agent_id"] == "cli"
assert result["created_at"]
# Internal plumbing must not leak into results.
assert "data" not in result
assert "text_lemmatized" not in result
assert "content_fp" not in result


def test_keyword_search_no_match_returns_empty(monkeypatch):
_patch_keyword(monkeypatch, [_point("1", "nothing relevant")])
assert keyword_search("zzz", user_id="ian") == {"results": []}


def test_keyword_search_empty_query_matches_nothing(monkeypatch):
fake = _patch_keyword(monkeypatch, [_point("1", "anything")])
assert keyword_search(" ", user_id="ian") == {"results": []}
fake.vector_store.list.assert_not_called() # short-circuits before scanning


def test_keyword_search_fails_open(monkeypatch):
import app.memory as m

fake = MagicMock()
fake.vector_store.list.side_effect = RuntimeError("qdrant down")
monkeypatch.setattr(m, "get_memory", lambda: fake)
assert keyword_search("x", user_id="ian") == {"results": []}
26 changes: 26 additions & 0 deletions tests/test_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,32 @@ def test_search_recency_weight_out_of_range_rejected(app_instance, mem, auth_hea
)


def test_search_keyword_mode(app_instance, mem, auth_header):
from types import SimpleNamespace

point = SimpleNamespace(id="1", payload={"data": "the Philips hub", "created_at": "2026-06-01T00:00:00+00:00"}) # noqa: E501
mem.vector_store.list.return_value = ([point], None)
c = _client(app_instance)
resp = c.post(
"/api/v1/memories/search",
json={"query": "philips", "mode": "keyword"},
headers=auth_header,
)
assert resp.status_code == 200
assert resp.json()["results"][0]["id"] == "1"
mem.search.assert_not_called() # keyword mode bypasses vector search
_, kwargs = mem.vector_store.list.call_args
assert kwargs["filters"] == {"user_id": "default-user"}


def test_search_invalid_mode_rejected(app_instance, mem, auth_header):
c = _client(app_instance)
resp = c.post(
"/api/v1/memories/search", json={"query": "x", "mode": "fuzzy"}, headers=auth_header
)
assert resp.status_code == 422


def test_search_scoped_by_run_id(app_instance, mem, auth_header):
mem.search.return_value = {"results": []}
c = _client(app_instance)
Expand Down
Loading