Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions app/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""Map backend/provider failures to stable, sanitized HTTP error responses.

mem0 calls Qdrant, an LLM provider, and an embedder synchronously inside
request handlers; without this module any of those failing surfaces as an
opaque 500 whose body may leak backend details (hosts, model names, key
prefixes). classify_exception() sorts the concrete SDK exception types into a
small taxonomy with fixed, content-free messages:

- vector store / network failures -> 503 backend_unavailable
- LLM / embedder provider failures -> 502 upstream_provider_error
- anything else -> 500 internal_error

The full exception is always logged server-side (with the request_id) and
never echoed to the client. New mem0 call sites need no wrapping — the
classifier runs from the app-level exception handler in app/main.py.
"""

import anthropic
import httpx
import openai
from qdrant_client.http.exceptions import (
ResponseHandlingException,
UnexpectedResponse,
)

# Provider SDK errors are checked first: openai/anthropic connection errors
# wrap httpx exceptions, so the network-level check below would otherwise
# misfile them as vector-store trouble.
_PROVIDER_ERRORS = (openai.OpenAIError, anthropic.AnthropicError)

# Qdrant client failures and raw transport errors. ConnectionError/TimeoutError
# cover the stdlib variants some client paths raise.
_BACKEND_ERRORS = (
ResponseHandlingException,
UnexpectedResponse,
httpx.TransportError,
httpx.TimeoutException,
# Raised if a client path surfaces a raw non-2xx instead of wrapping it in
# the qdrant exceptions above.
httpx.HTTPStatusError,
ConnectionError,
TimeoutError,
)


def classify_exception(exc: BaseException) -> tuple[int, str, str]:
"""Return (status_code, error_code, client-safe detail) for an exception."""
if isinstance(exc, _PROVIDER_ERRORS):
return (
502,
"upstream_provider_error",
"An upstream model provider (LLM or embedder) failed; "
"check provider keys and status.",
)
if isinstance(exc, _BACKEND_ERRORS):
return (
503,
"backend_unavailable",
"The vector store is unreachable or returned an error; try again later.",
)
return (500, "internal_error", "Internal server error.")
27 changes: 27 additions & 0 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from starlette.routing import Route

from app.config import get_settings
from app.errors import classify_exception
from app.logging_setup import configure_logging
from app.mcp_server import build_mcp
from app.metrics import observe_request
Expand Down Expand Up @@ -62,6 +63,9 @@ async def log_requests(request: Request, call_next):
# The Authorization header is never read here, so tokens are never logged.
request_id = request.headers.get("x-request-id") or uuid.uuid4().hex[:12]
structlog.contextvars.bind_contextvars(request_id=request_id)
# Also stashed on request.state for the exception handler: by the time it
# runs, this middleware's finally block has already cleared the contextvars.
request.state.request_id = request_id
start = time.perf_counter()
status = 500 # if call_next raises, the request is logged as a 500
try:
Expand Down Expand Up @@ -94,6 +98,29 @@ async def log_requests(request: Request, call_next):
structlog.contextvars.clear_contextvars()


@app.exception_handler(Exception)
async def unhandled_exception_handler(request: Request, exc: Exception) -> JSONResponse:
"""Translate unhandled errors into stable, sanitized JSON.

Backend (Qdrant/network) failures become 503, model-provider failures 502,
everything else a generic 500. The response never includes exception text —
it carries the request_id instead, which correlates with the server-side
log line holding the full traceback.
"""
status, code, detail = classify_exception(exc)
request_id = getattr(request.state, "request_id", None)
_log.error(
"unhandled_exception",
request_id=request_id,
error_code=code,
exc_info=exc,
)
return JSONResponse(
status_code=status,
content={"detail": detail, "error": code, "request_id": request_id},
)


app.include_router(rest_router, prefix="/api/v1")

if settings.oauth_enabled:
Expand Down
29 changes: 25 additions & 4 deletions app/mcp_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,19 +76,40 @@ def list_memories(limit: int = 50, offset: int = 0) -> dict:
filters={"user_id": default_user}, limit=limit, offset=offset
)

def _not_found(memory_id: str) -> dict:
# MCP tools return a structured error instead of raising, so the model
# sees a usable signal rather than an opaque tool exception.
return {"error": "not_found", "memory_id": memory_id}

@mcp.tool
def get_memory(memory_id: str) -> dict:
"""Fetch a single memory by ID."""
return memory.get(memory_id=memory_id)
"""Fetch a single memory by ID.

Returns {"error": "not_found", ...} if no memory has that ID.
"""
result = memory.get(memory_id=memory_id)
# Explicit None check: a falsy-but-present result (e.g. {}) is a found
# memory, not a miss.
return _not_found(memory_id) if result is None else result

@mcp.tool
def update_memory(memory_id: str, content: str) -> dict:
"""Replace the content of an existing memory."""
"""Replace the content of an existing memory.

Returns {"error": "not_found", ...} if no memory has that ID.
"""
if not memory.get(memory_id=memory_id):
return _not_found(memory_id)
return memory.update(memory_id=memory_id, data=content)

@mcp.tool
def delete_memory(memory_id: str) -> dict:
"""Permanently delete a memory."""
"""Permanently delete a memory.

Returns {"error": "not_found", ...} if no memory has that ID.
"""
if not memory.get(memory_id=memory_id):
return _not_found(memory_id)
memory.delete(memory_id=memory_id)
return {"deleted": True, "memory_id": memory_id}

Expand Down
98 changes: 88 additions & 10 deletions app/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import httpx
from fastapi import APIRouter, Depends, HTTPException, Query
from pydantic import BaseModel, Field
from pydantic import BaseModel, ConfigDict, Field

from app import memory as memory_mod
from app.auth import require_bearer
Expand Down Expand Up @@ -55,6 +55,59 @@ class UpdateMemoryRequest(BaseModel):
content: str


# --- Response models ---------------------------------------------------------
# These document and validate the *stable* parts of mem0's payloads without
# freezing them: extra="allow" passes unexpected mem0 fields through untouched,
# and every route sets response_model_exclude_unset=True so fields mem0 didn't
# send aren't fabricated as nulls — the wire format stays exactly what mem0
# returned, but /docs and client generators get a real schema.


class MemoryItem(BaseModel):
model_config = ConfigDict(extra="allow")

id: str | None = None
memory: str | None = None
hash: str | None = None
created_at: str | None = None
updated_at: str | None = None
user_id: str | None = None
agent_id: str | None = None
run_id: str | None = None
score: float | None = None
metadata: dict | None = None


class MemoryResults(BaseModel):
model_config = ConfigDict(extra="allow")

results: list[MemoryItem] = Field(default_factory=list)


class AddMemoryResponse(MemoryResults):
# Set when the add was skipped because identical content already exists.
deduplicated: bool | None = None
memory_id: str | None = None


class UpdateResponse(BaseModel):
model_config = ConfigDict(extra="allow")

# mem0's update() returns a success message, not the updated item.
message: str | None = None


class DeleteResponse(BaseModel):
deleted: bool
memory_id: str


class HistoryResponse(BaseModel):
model_config = ConfigDict(extra="allow")

history: list = Field(default_factory=list)


def _provenance_filters(
source: str | None, confidence: str | None, review_status: str | None
) -> dict:
Expand All @@ -75,7 +128,9 @@ def _scope_kwargs(
return kwargs


@router.post("/memories")
@router.post(
"/memories", response_model=AddMemoryResponse, response_model_exclude_unset=True
)
def add_memory(req: AddMemoryRequest) -> dict:
if not req.content and not req.messages:
raise HTTPException(status_code=422, detail="Provide either 'content' or 'messages'")
Expand All @@ -86,7 +141,9 @@ def add_memory(req: AddMemoryRequest) -> dict:
return memory_mod.add_memory(payload, dedup=req.dedup, **kwargs)


@router.post("/memories/search")
@router.post(
"/memories/search", response_model=MemoryResults, response_model_exclude_unset=True
)
def search_memories(req: SearchRequest) -> dict:
prov = _provenance_filters(req.source, req.confidence, req.review_status)
if req.mode == "keyword":
Expand All @@ -104,7 +161,9 @@ def search_memories(req: SearchRequest) -> dict:
return memory_mod.drop_expired(results) if req.exclude_expired else results


@router.get("/memories")
@router.get(
"/memories", response_model=MemoryResults, response_model_exclude_unset=True
)
def list_memories(
user_id: str | None = None,
agent_id: str | None = None,
Expand All @@ -126,29 +185,48 @@ def list_memories(
return memory_mod.drop_expired(results) if exclude_expired else results


@router.get("/memories/{memory_id}")
def get_memory_by_id(memory_id: str) -> dict:
memory = memory_mod.get_memory()
def _get_or_404(memory, memory_id: str) -> dict:
result = memory.get(memory_id=memory_id)
if not result:
raise HTTPException(status_code=404, detail="Memory not found")
return result


@router.put("/memories/{memory_id}")
@router.get(
"/memories/{memory_id}",
response_model=MemoryItem,
response_model_exclude_unset=True,
)
def get_memory_by_id(memory_id: str) -> dict:
return _get_or_404(memory_mod.get_memory(), memory_id)


@router.put(
"/memories/{memory_id}",
response_model=UpdateResponse,
response_model_exclude_unset=True,
)
def update_memory(memory_id: str, req: UpdateMemoryRequest) -> dict:
memory = memory_mod.get_memory()
# Depending on the mem0 version, update() on a missing id either raises or
# silently no-ops; pre-checking makes it a 404 like GET.
_get_or_404(memory, memory_id)
return memory.update(memory_id=memory_id, data=req.content)


@router.delete("/memories/{memory_id}")
@router.delete("/memories/{memory_id}", response_model=DeleteResponse)
def delete_memory(memory_id: str) -> dict:
memory = memory_mod.get_memory()
_get_or_404(memory, memory_id)
memory.delete(memory_id=memory_id)
return {"deleted": True, "memory_id": memory_id}


@router.get("/memories/{memory_id}/history")
@router.get(
"/memories/{memory_id}/history",
response_model=HistoryResponse,
response_model_exclude_unset=True,
)
def memory_history(memory_id: str) -> dict:
memory = memory_mod.get_memory()
return {"history": memory.history(memory_id=memory_id)}
Expand Down
4 changes: 4 additions & 0 deletions docs/DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ app/
wiring, build_verifier() selecting Phase 1 vs Phase 2.
oauth.py Phase 2 OAuth 2.1 + PKCE + DCR endpoints, JWT issuance, JWKS, AS/PR metadata.
oauth_store.py SQLite store for OAuth clients, auth codes, refresh tokens (/app/data/oauth.db).
errors.py classify_exception(): maps concrete SDK exceptions (qdrant/httpx -> 503,
openai/anthropic -> 502, else 500) to sanitized JSON via the app-level
exception handler in main.py. New mem0 call sites need no wrapping; if a new
backend dependency is added, add its exception types to the tuples here.
ratelimit.py Per-IP fixed-window rate limiting of *failed* auth attempts, applied as the
rate_limit_middleware over four surfaces: REST (/api/v1), MCP (/mcp), OAuth
consent (POST /oauth/authorize) and token (/oauth/token). In-process state,
Expand Down
24 changes: 21 additions & 3 deletions docs/USER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,25 @@ and drive the same six tools.
## REST API reference

All endpoints live under `/api/v1` and require `Authorization: Bearer <MEM0_API_KEY>`. Request and
response bodies are JSON. `user_id` defaults to `MEM0_DEFAULT_USER_ID` if omitted.
response bodies are JSON. `user_id` defaults to `MEM0_DEFAULT_USER_ID` if omitted. Response
schemas are published in the interactive docs at `/docs` (OpenAPI).

### Error responses

Failures return a stable JSON shape:

```json
{"detail": "human-readable summary", "error": "machine_code", "request_id": "abc123def456"}
```

| Status | `error` | Meaning |
|---|---|---|
| `401` | — | Missing/invalid bearer token (plain `detail` only). |
| `404` | — | Memory ID does not exist (`GET`/`PUT`/`DELETE` by ID). |
| `422` | — | Request validation failed (FastAPI's standard shape). |
| `502` | `upstream_provider_error` | The LLM or embedding provider failed — check provider keys/status. |
| `503` | `backend_unavailable` | Qdrant is unreachable or erroring — same condition `/healthz` reports. |
| `500` | `internal_error` | Unexpected failure. The body never contains internals; quote the `request_id` (also settable via an `X-Request-Id` request header) when digging through server logs. |

### Add a memory — `POST /api/v1/memories`

Expand Down Expand Up @@ -650,11 +668,11 @@ Returns 404 if the memory does not exist.

### Update — `PUT /api/v1/memories/{memory_id}`

Body: `{"content": "new text"}`.
Body: `{"content": "new text"}`. Returns 404 if the memory does not exist.

### Delete — `DELETE /api/v1/memories/{memory_id}`

Returns `{"deleted": true, "memory_id": "…"}`.
Returns `{"deleted": true, "memory_id": "…"}`, or 404 if the memory does not exist.

### History — `GET /api/v1/memories/{memory_id}/history`

Expand Down
5 changes: 4 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ def _reset_rate_limiters():

@pytest.fixture
def mem():
FAKE_MEMORY.reset_mock()
# Full reset: plain reset_mock() keeps return_value/side_effect, which
# would leak one test's stubbing (e.g. get -> None, search -> raise) into
# every later test.
FAKE_MEMORY.reset_mock(return_value=True, side_effect=True)
# Default: no existing fingerprint, so add_memory()'s dedup check is a no-op
# and proceeds to call .add(). Tests exercising dedup override this.
FAKE_MEMORY.vector_store.list.return_value = ([], None)
Expand Down
Loading
Loading