From 68c59adf6e0598cdb8a99df0c6cc88066e9219c3 Mon Sep 17 00:00:00 2001
From: xprilion <xprilion@gmail.com>
Date: Mon, 27 Apr 2026 18:38:47 +0530
Subject: [PATCH 1/7] Add workspaces and projects

- Project entity with persistent workspace directories (Docker volume)
- Knowledge graph (networkx) for cross-conversation memory
- File tree panel, interactive terminal, project selector UI
- Workspace tool for agent: status, search, notes, knowledge CRUD
- ArXiv API fixed to use HTTPS
- Security hardened: path traversal prevention, env scrubbing,
  size limits, sandbox containment, filename sanitization,
  DB operation allowlists, contextvars for session isolation
- 65 new tests (projects, workspace, knowledge graph, tools)
- Docs: projects.md, updated tools.md, README, VitePress nav
---
 .env.example                                  |   8 +
 README.md                                     |   6 +-
 backend/configs/prompts/system_prompt.yaml    |  22 +
 backend/openmlr/app.py                        |  10 +-
 backend/openmlr/compute/workspace.py          | 177 ++++--
 backend/openmlr/db/engine.py                  |   5 +
 .../migrations/versions/004_add_projects.py   |  48 ++
 backend/openmlr/db/models.py                  |  94 ++-
 backend/openmlr/db/operations.py              | 256 ++++++--
 backend/openmlr/routes/projects.py            | 545 ++++++++++++++++++
 backend/openmlr/routes/terminal.py            | 275 +++++++++
 backend/openmlr/sandbox/local.py              |  55 +-
 backend/openmlr/sandbox/manager.py            |  25 +-
 backend/openmlr/tools/papers.py               | 181 ++++--
 backend/openmlr/tools/registry.py             |  59 +-
 backend/openmlr/tools/workspace_tools.py      | 383 ++++++++++++
 backend/openmlr/workspace/__init__.py         |   6 +
 backend/openmlr/workspace/knowledge.py        | 375 ++++++++++++
 backend/openmlr/workspace/persistence.py      | 353 ++++++++++++
 backend/pyproject.toml                        |   3 +
 backend/tests/test_projects.py                | 196 +++++++
 backend/tests/test_tools_workspace.py         | 208 +++++++
 backend/tests/test_workspace.py               | 333 +++++++++++
 docker-compose.prod.yml                       |   4 +
 docker-compose.yml                            |   3 +
 frontend/src/App.tsx                          |  33 +-
 frontend/src/__tests__/RightPanel.test.tsx    |  10 +
 frontend/src/__tests__/Sidebar.test.tsx       |  36 ++
 frontend/src/api.ts                           |  22 +
 frontend/src/components/FileTree.tsx          | 303 ++++++++++
 frontend/src/components/ProjectModal.tsx      | 104 ++++
 frontend/src/components/RightPanel.tsx        |  52 +-
 frontend/src/components/Sidebar.tsx           |  59 +-
 frontend/src/components/Terminal.tsx          | 208 +++++++
 frontend/src/types.ts                         |  24 +
 site/docs/.vitepress/config.ts                |   2 +
 site/docs/projects.md                         | 173 ++++++
 site/docs/tools.md                            |  23 +-
 38 files changed, 4482 insertions(+), 197 deletions(-)
 create mode 100644 backend/openmlr/db/migrations/versions/004_add_projects.py
 create mode 100644 backend/openmlr/routes/projects.py
 create mode 100644 backend/openmlr/routes/terminal.py
 create mode 100644 backend/openmlr/tools/workspace_tools.py
 create mode 100644 backend/openmlr/workspace/__init__.py
 create mode 100644 backend/openmlr/workspace/knowledge.py
 create mode 100644 backend/openmlr/workspace/persistence.py
 create mode 100644 backend/tests/test_projects.py
 create mode 100644 backend/tests/test_tools_workspace.py
 create mode 100644 backend/tests/test_workspace.py
 create mode 100644 frontend/src/components/FileTree.tsx
 create mode 100644 frontend/src/components/ProjectModal.tsx
 create mode 100644 frontend/src/components/Terminal.tsx
 create mode 100644 site/docs/projects.md

diff --git a/.env.example b/.env.example
index 30c6638..cd0d11c 100644
--- a/.env.example
+++ b/.env.example
@@ -78,6 +78,14 @@ GITHUB_TOKEN=
 # OpenAlex email for faster rate limits (paper search works without it)
 # OPENALEX_EMAIL=you@example.com
 
+# ═══════════════════════════════════════════════════════════
+# PROJECT WORKSPACES
+# ═══════════════════════════════════════════════════════════
+
+# Where project workspaces are stored (bind mount in production)
+# Docker Compose dev uses a named volume; production uses this path.
+# OPENMLR_WORKSPACES_PATH=./.workspaces
+
 # ═══════════════════════════════════════════════════════════
 # SANDBOX / CODE EXECUTION
 # ═══════════════════════════════════════════════════════════
diff --git a/README.md b/README.md
index bc781b3..d1cbc2d 100644
--- a/README.md
+++ b/README.md
@@ -19,10 +19,12 @@
 
 ## Features
 
+- **Projects & Workspaces** — Persistent workspaces with knowledge graphs, file trees, and cross-conversation memory. Research accumulates across chats.
+- **Interactive terminal** — Built-in terminal connected to the project workspace. Run commands directly alongside AI-driven research.
 - **Plan + Execute modes** — Plan mode gathers context; Execute mode does the work. Toggle with `Cmd+M`.
 - **Paper research** — OpenAlex, Semantic Scholar, arXiv, CrossRef, Papers With Code. Reads full papers, crawls citation graphs.
 - **Paper writing** — Section-by-section drafting with auto-save. Export to Markdown/LaTeX.
-- **Compute environments** — Execute code on local Docker, SSH remotes, or Modal cloud. Probe GPU/CPU capabilities.
+- **Compute environments** — Execute code on local Docker, SSH remotes, or Modal cloud. Workspace persists independently of compute.
 - **Background jobs** — Celery + Redis. Close the browser, come back later.
 - **Multi-provider LLMs** — OpenAI, Anthropic, OpenRouter, plus local models (Ollama, LM Studio).
 - **MCP servers** — Connect external tools via the Model Context Protocol.
@@ -40,6 +42,8 @@ make up
 
 Open `http://localhost:3000`. Create an account. Add your API keys in **Settings > Providers**.
 
+Project workspaces are stored in a persistent Docker volume (`.workspaces/`), so your research data survives container rebuilds.
+
 > No API keys needed to start — the app guides you through configuration after login.
 
 ## Development
diff --git a/backend/configs/prompts/system_prompt.yaml b/backend/configs/prompts/system_prompt.yaml
index bd340c8..ae4c1d2 100644
--- a/backend/configs/prompts/system_prompt.yaml
+++ b/backend/configs/prompts/system_prompt.yaml
@@ -150,8 +150,30 @@ prompt: |
   {{ compute_env }}
   {% endif %}
 
+  # Project Workspace
+
+  If a project workspace is active, use it to persist knowledge across conversations:
+  
+  - Use `workspace status` at the start of a conversation to understand what has
+    been done before (papers found, notes written, experiments run, known failures)
+  - Use `workspace knowledge_add` to record important entities (papers, methods,
+    datasets, findings) in the knowledge graph
+  - Use `workspace knowledge_relate` to link entities (e.g., paper --proposes--> method)
+  - Use `workspace note` to save research summaries and important findings
+  - Use `workspace knowledge_summary` to review accumulated knowledge
+  - Use `workspace recent_failures` to check for known tool/API issues before retrying
+  
+  The workspace persists independently of compute resources. Files in the workspace
+  (code/, data/, papers/, research/, outputs/) survive compute changes and new conversations.
+  
+  **Important**: The workspace is the source of truth for the project. Always check it
+  before doing redundant work. Save important findings so future conversations can build on them.
+
   # Compute Planning
   When starting tasks that require significant computation (training models, processing large datasets, etc.):
   1. Use `compute_plan` to verify the active node meets requirements
   2. If not, use `compute_select` to switch to a suitable node
   3. Always `sandbox_probe` before executing code on a node for the first time
+
+  Note: The compute resource is separate from the project workspace. Switching compute
+  does not affect your workspace files. The workspace is always available locally.
diff --git a/backend/openmlr/app.py b/backend/openmlr/app.py
index 66f1d03..dead080 100644
--- a/backend/openmlr/app.py
+++ b/backend/openmlr/app.py
@@ -22,6 +22,7 @@
 async def lifespan(app: FastAPI):
     """Startup: create tables & shared state.  Shutdown: teardown sessions."""
     import logging
+
     logger = logging.getLogger("openmlr.app")
 
     async with engine.begin() as conn:
@@ -57,7 +58,9 @@ async def lifespan(app: FastAPI):
 )
 
 # CORS configuration - restrict in production
-_cors_origins = os.environ.get("CORS_ORIGINS", "http://localhost:3000,http://localhost:5173").split(",")
+_cors_origins = os.environ.get("CORS_ORIGINS", "http://localhost:3000,http://localhost:5173").split(
+    ","
+)
 _cors_origins = [origin.strip() for origin in _cors_origins if origin.strip()]
 
 app.add_middleware(
@@ -74,7 +77,9 @@ async def lifespan(app: FastAPI):
 from .routes.compute import router as compute_router
 from .routes.health import router as health_router
 from .routes.keys import router as keys_router
+from .routes.projects import router as projects_router
 from .routes.settings import router as settings_router
+from .routes.terminal import router as terminal_router
 
 app.include_router(auth_router)
 app.include_router(agent_router)
@@ -82,12 +87,15 @@ async def lifespan(app: FastAPI):
 app.include_router(health_router)
 app.include_router(keys_router)
 app.include_router(compute_router)
+app.include_router(projects_router)
+app.include_router(terminal_router)
 
 
 # ── Global error handler ────────────────────────────────
 @app.exception_handler(Exception)
 async def global_exception_handler(request: Request, exc: Exception):
     import logging
+
     logger = logging.getLogger(__name__)
     logger.exception(f"Unhandled exception: {exc}")
     # Don't leak internal details to client
diff --git a/backend/openmlr/compute/workspace.py b/backend/openmlr/compute/workspace.py
index 9517bf3..f342793 100644
--- a/backend/openmlr/compute/workspace.py
+++ b/backend/openmlr/compute/workspace.py
@@ -1,14 +1,54 @@
-"""Workspace Manager — per-conversation filesystem isolation."""
+"""Workspace Manager — project-scoped filesystem with backward-compatible conversation support.
 
+The workspace is the persistent home for all project artifacts:
+code, data, models, outputs, papers, research notes, logs, and knowledge graph.
+It persists across conversations and compute resource changes.
+"""
+
+import json
+import logging
 import os
 import shutil
 import tarfile
 from datetime import UTC, datetime
 from pathlib import Path
 
+log = logging.getLogger(__name__)
+
+# Default workspace root — overridden by OPENMLR_WORKSPACES_PATH in Docker
+WORKSPACES_ROOT = Path(os.environ.get("OPENMLR_WORKSPACES_PATH", "/app/.workspaces"))
+
+# Standard project workspace subdirectories
+PROJECT_SUBDIRS = [
+    "code",
+    "data",
+    "models",
+    "outputs",
+    "papers",
+    "research",
+    "research/searches",
+    "research/notes",
+    "research/citations",
+    "logs",
+    "logs/tool_failures",
+    "logs/compute",
+    "logs/experiments",
+    "venvs",
+    ".project-meta",
+    ".project-meta/plans",
+]
+
+# Legacy conversation-only subdirectories (backward compat)
+LEGACY_SUBDIRS = ["data", "models", "code", "outputs", ".openmlr-meta"]
+
 
 class WorkspaceManager:
-    """Manages isolated workspace directories for each conversation."""
+    """Manages isolated workspace directories for projects and conversations.
+
+    Supports two modes:
+    - Project mode: workspace at WORKSPACES_ROOT/{project_slug}/
+    - Legacy mode: workspace at ~/.openmlr/workspaces/workspace-{uuid}/
+    """
 
     def __init__(self, base_dir: str | Path = None):
         self.base_dir = Path(base_dir) if base_dir else Path.home() / ".openmlr"
@@ -21,23 +61,103 @@ def _ensure_dirs(self) -> None:
         self.workspace_dir.mkdir(parents=True, exist_ok=True)
         self.archive_dir.mkdir(parents=True, exist_ok=True)
 
+    # ── Project-scoped workspaces ────────────────────────
+
+    @staticmethod
+    def get_project_workspace_path(project_slug: str) -> Path:
+        """Get the workspace directory for a project."""
+        return WORKSPACES_ROOT / project_slug
+
+    @staticmethod
+    def create_project_workspace(project_slug: str, name: str = "", description: str = "") -> Path:
+        """Create a new project workspace with all standard subdirectories."""
+        path = WORKSPACES_ROOT / project_slug
+        path.mkdir(parents=True, exist_ok=True)
+
+        for subdir in PROJECT_SUBDIRS:
+            (path / subdir).mkdir(parents=True, exist_ok=True)
+
+        # Write initial project metadata if it doesn't exist
+        meta_path = path / ".project-meta" / "project.json"
+        if not meta_path.exists():
+            meta_path.write_text(
+                json.dumps(
+                    {
+                        "name": name or project_slug,
+                        "slug": project_slug,
+                        "description": description,
+                        "created_at": datetime.now(UTC).isoformat(),
+                    },
+                    indent=2,
+                )
+            )
+
+        # Initialize empty knowledge graph if it doesn't exist
+        kg_path = path / ".project-meta" / "knowledge.json"
+        if not kg_path.exists():
+            kg_path.write_text(
+                json.dumps(
+                    {
+                        "nodes": [],
+                        "edges": [],
+                        "version": 1,
+                    },
+                    indent=2,
+                )
+            )
+
+        # Initialize empty state file for cross-conversation persistence
+        state_path = path / ".project-meta" / "state.json"
+        if not state_path.exists():
+            state_path.write_text(
+                json.dumps(
+                    {
+                        "last_conversation_uuid": None,
+                        "open_questions": [],
+                        "key_findings": [],
+                        "active_experiments": [],
+                    },
+                    indent=2,
+                )
+            )
+
+        return path
+
+    @staticmethod
+    def project_workspace_exists(project_slug: str) -> bool:
+        """Check if a project workspace exists."""
+        return (WORKSPACES_ROOT / project_slug).exists()
+
+    @staticmethod
+    def get_project_workspace_size(project_slug: str) -> int:
+        """Get total size of a project workspace in bytes."""
+        path = WORKSPACES_ROOT / project_slug
+        if not path.exists():
+            return 0
+        total = 0
+        for dirpath, _, filenames in os.walk(path):
+            for f in filenames:
+                fp = Path(dirpath) / f
+                if fp.exists():
+                    total += fp.stat().st_size
+        return total
+
+    # ── Legacy conversation-scoped workspaces ────────────
+
     def get_workspace_path(self, conversation_uuid: str) -> Path:
-        """Get the workspace directory for a conversation."""
+        """Get the workspace directory for a conversation (legacy mode)."""
         return self.workspace_dir / f"workspace-{conversation_uuid}"
 
     def create_workspace(self, conversation_uuid: str) -> Path:
-        """Create a new workspace directory for a conversation."""
+        """Create a new workspace directory for a conversation (legacy mode)."""
         path = self.get_workspace_path(conversation_uuid)
         path.mkdir(parents=True, exist_ok=True)
-        # Create standard subdirectories
-        for subdir in ["data", "models", "code", "outputs"]:
+        for subdir in LEGACY_SUBDIRS:
             (path / subdir).mkdir(exist_ok=True)
-        # Create meta directory (hidden from agent)
-        (path / ".openmlr-meta").mkdir(exist_ok=True)
         return path
 
     def workspace_exists(self, conversation_uuid: str) -> bool:
-        """Check if a workspace exists."""
+        """Check if a conversation workspace exists."""
         return self.get_workspace_path(conversation_uuid).exists()
 
     def archive_workspace(self, conversation_uuid: str) -> Path | None:
@@ -82,34 +202,27 @@ def get_workspace_size(self, conversation_uuid: str) -> int:
         return total
 
     def list_workspaces(self) -> list[dict]:
-        """List all workspaces with metadata."""
+        """List all conversation workspaces with metadata."""
         workspaces = []
         for path in self.workspace_dir.glob("workspace-*"):
             if path.is_dir():
                 uuid = path.name.replace("workspace-", "")
                 size = self.get_workspace_size(uuid)
-                workspaces.append({
-                    "uuid": uuid,
-                    "path": str(path),
-                    "size_bytes": size,
-                    "created": datetime.fromtimestamp(path.stat().st_ctime, UTC).isoformat(),
-                })
+                workspaces.append(
+                    {
+                        "uuid": uuid,
+                        "path": str(path),
+                        "size_bytes": size,
+                        "created": datetime.fromtimestamp(path.stat().st_ctime, UTC).isoformat(),
+                    }
+                )
         return sorted(workspaces, key=lambda x: x["created"], reverse=True)
 
     def cleanup_archives(self, max_age_days: int = 30, max_count: int = 100) -> dict:
-        """Clean up old workspace archives.
-
-        Args:
-            max_age_days: Delete archives older than this many days
-            max_count: Keep at most this many archives, delete oldest first
-
-        Returns:
-            Dict with deleted count and freed bytes
-        """
+        """Clean up old workspace archives."""
         deleted = 0
         freed_bytes = 0
 
-        # Get all archives sorted by modification time (oldest first)
         archives = []
         for path in self.archive_dir.glob("workspace-*.tar.gz"):
             if path.is_file():
@@ -118,7 +231,6 @@ def cleanup_archives(self, max_age_days: int = 30, max_count: int = 100) -> dict
 
         archives.sort(key=lambda x: x["mtime"])
 
-        # Delete old archives
         now = datetime.now(UTC)
         for archive in archives:
             age_days = (now - archive["mtime"]).days
@@ -127,7 +239,6 @@ def cleanup_archives(self, max_age_days: int = 30, max_count: int = 100) -> dict
                 archive["path"].unlink()
                 deleted += 1
 
-        # Delete excess archives (oldest first)
         remaining = [a for a in archives if a["path"].exists()]
         while len(remaining) > max_count:
             oldest = remaining.pop(0)
@@ -138,15 +249,7 @@ def cleanup_archives(self, max_age_days: int = 30, max_count: int = 100) -> dict
         return {"deleted": deleted, "freed_bytes": freed_bytes}
 
     def cleanup_workspaces(self, conversation_uuids: list[str], archive: bool = True) -> dict:
-        """Clean up workspaces for deleted conversations.
-
-        Args:
-            conversation_uuids: List of conversation UUIDs to keep
-            archive: Whether to archive before deleting
-
-        Returns:
-            Dict with deleted count and freed bytes
-        """
+        """Clean up workspaces for deleted conversations."""
         deleted = 0
         freed_bytes = 0
         keep_set = set(conversation_uuids)
diff --git a/backend/openmlr/db/engine.py b/backend/openmlr/db/engine.py
index 8c3e995..0afc5fd 100644
--- a/backend/openmlr/db/engine.py
+++ b/backend/openmlr/db/engine.py
@@ -55,3 +55,8 @@ async def get_db() -> AsyncSession:
             yield session
         finally:
             await session.close()
+
+
+def get_async_session():
+    """Get an async session as a context manager (for non-dependency use like WebSockets)."""
+    return async_session()
diff --git a/backend/openmlr/db/migrations/versions/004_add_projects.py b/backend/openmlr/db/migrations/versions/004_add_projects.py
new file mode 100644
index 0000000..95f69a8
--- /dev/null
+++ b/backend/openmlr/db/migrations/versions/004_add_projects.py
@@ -0,0 +1,48 @@
+"""Add projects table and project_id to conversations
+
+Revision ID: 004_add_projects
+Revises: 003_migrate_sandbox_to_compute
+Create Date: 2026-04-27
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision: str = '004_add_projects'
+down_revision: Union[str, None] = '003_migrate_sandbox_to_compute'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # Create projects table
+    op.create_table(
+        'projects',
+        sa.Column('id', sa.Integer(), primary_key=True),
+        sa.Column('uuid', sa.String(36), unique=True, nullable=False),
+        sa.Column('user_id', sa.Integer(), sa.ForeignKey('users.id', ondelete='CASCADE'), nullable=False),
+        sa.Column('name', sa.String(255), nullable=False),
+        sa.Column('slug', sa.String(255), nullable=False),
+        sa.Column('description', sa.Text(), nullable=True),
+        sa.Column('workspace_path', sa.String(1000), nullable=True),
+        sa.Column('status', sa.String(20), server_default='active', nullable=False),
+        sa.Column('settings', sa.JSON(), nullable=True),
+        sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
+        sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
+    )
+    op.create_index('ix_projects_user_id', 'projects', ['user_id'])
+    op.create_unique_constraint('uq_projects_user_slug', 'projects', ['user_id', 'slug'])
+
+    # Add project_id column to conversations
+    op.add_column(
+        'conversations',
+        sa.Column('project_id', sa.Integer(), sa.ForeignKey('projects.id', ondelete='SET NULL'), nullable=True),
+    )
+    op.create_index('ix_conversations_project_id', 'conversations', ['project_id'])
+
+
+def downgrade() -> None:
+    op.drop_index('ix_conversations_project_id', table_name='conversations')
+    op.drop_column('conversations', 'project_id')
+    op.drop_table('projects')
diff --git a/backend/openmlr/db/models.py b/backend/openmlr/db/models.py
index ef40f74..59900e7 100644
--- a/backend/openmlr/db/models.py
+++ b/backend/openmlr/db/models.py
@@ -12,6 +12,7 @@
     Integer,
     String,
     Text,
+    UniqueConstraint,
 )
 from sqlalchemy.dialects.postgresql import ARRAY
 from sqlalchemy.orm import DeclarativeBase, relationship
@@ -37,12 +38,21 @@ class User(Base):
     updated_at = Column(DateTime(timezone=True), default=_utcnow, onupdate=_utcnow, nullable=False)
 
     settings = relationship("UserSetting", back_populates="user", cascade="all, delete-orphan")
-    conversations = relationship("Conversation", back_populates="user", cascade="all, delete-orphan")
-    sandbox_configs = relationship("SandboxConfig", back_populates="user", cascade="all, delete-orphan")
+    conversations = relationship(
+        "Conversation", back_populates="user", cascade="all, delete-orphan"
+    )
+    projects = relationship("Project", back_populates="user", cascade="all, delete-orphan")
+    sandbox_configs = relationship(
+        "SandboxConfig", back_populates="user", cascade="all, delete-orphan"
+    )
     ssh_keys = relationship("SSHKey", back_populates="user", cascade="all, delete-orphan")
     compute_nodes = relationship("ComputeNode", back_populates="user", cascade="all, delete-orphan")
-    research_corpus = relationship("ResearchCorpus", back_populates="user", cascade="all, delete-orphan")
-    writing_projects = relationship("WritingProject", back_populates="user", cascade="all, delete-orphan")
+    research_corpus = relationship(
+        "ResearchCorpus", back_populates="user", cascade="all, delete-orphan"
+    )
+    writing_projects = relationship(
+        "WritingProject", back_populates="user", cascade="all, delete-orphan"
+    )
 
 
 class UserSetting(Base):
@@ -63,26 +73,57 @@ class UserSetting(Base):
     )
 
 
+class Project(Base):
+    """A project groups multiple conversations around a persistent workspace."""
+
+    __tablename__ = "projects"
+
+    id = Column(Integer, primary_key=True)
+    uuid = Column(String(36), unique=True, nullable=False, default=lambda: str(uuid.uuid4()))
+    user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
+    name = Column(String(255), nullable=False)
+    slug = Column(String(255), nullable=False)
+    description = Column(Text, nullable=True)
+    workspace_path = Column(String(1000), nullable=True)  # absolute path to workspace dir
+    status = Column(String(20), default="active", nullable=False)  # active, archived
+    settings = Column("settings", JSON, nullable=True)  # project-level overrides
+    created_at = Column(DateTime(timezone=True), default=_utcnow, nullable=False)
+    updated_at = Column(DateTime(timezone=True), default=_utcnow, onupdate=_utcnow, nullable=False)
+
+    user = relationship("User", back_populates="projects")
+    conversations = relationship("Conversation", back_populates="project")
+
+    __table_args__ = (UniqueConstraint("user_id", "slug", name="uq_projects_user_slug"),)
+
+
 class Conversation(Base):
     __tablename__ = "conversations"
 
     id = Column(Integer, primary_key=True)
     uuid = Column(String(36), unique=True, nullable=False, default=lambda: str(uuid.uuid4()))
     user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
+    project_id = Column(Integer, ForeignKey("projects.id", ondelete="SET NULL"), nullable=True)
     title = Column(String(255), default="New conversation", nullable=False)
     model = Column(String(100), nullable=True)
-    mode = Column(String(20), default="general", nullable=False)  # research, writing, coding, general
+    mode = Column(
+        String(20), default="general", nullable=False
+    )  # research, writing, coding, general
     user_message_count = Column(Integer, default=0, nullable=False)
     extra = Column("extra", JSON, nullable=True)
     created_at = Column(DateTime(timezone=True), default=_utcnow, nullable=False)
     updated_at = Column(DateTime(timezone=True), default=_utcnow, onupdate=_utcnow, nullable=False)
 
     user = relationship("User", back_populates="conversations")
+    project = relationship("Project", back_populates="conversations")
     messages = relationship("Message", back_populates="conversation", cascade="all, delete-orphan")
     corpus = relationship("ResearchCorpus", back_populates="conversation")
     writing_project = relationship("WritingProject", back_populates="conversation")
-    tasks = relationship("ConversationTask", back_populates="conversation", cascade="all, delete-orphan")
-    resources = relationship("ConversationResource", back_populates="conversation", cascade="all, delete-orphan")
+    tasks = relationship(
+        "ConversationTask", back_populates="conversation", cascade="all, delete-orphan"
+    )
+    resources = relationship(
+        "ConversationResource", back_populates="conversation", cascade="all, delete-orphan"
+    )
     jobs = relationship("AgentJob", back_populates="conversation", cascade="all, delete-orphan")
 
 
@@ -90,7 +131,9 @@ class Message(Base):
     __tablename__ = "messages"
 
     id = Column(Integer, primary_key=True)
-    conversation_id = Column(Integer, ForeignKey("conversations.id", ondelete="CASCADE"), nullable=False)
+    conversation_id = Column(
+        Integer, ForeignKey("conversations.id", ondelete="CASCADE"), nullable=False
+    )
     role = Column(String(20), nullable=False)  # system, user, assistant, tool
     content = Column(Text, nullable=False)
     meta = Column("meta", JSON, nullable=True)
@@ -160,7 +203,9 @@ class ResearchCorpus(Base):
     __tablename__ = "research_corpus"
 
     id = Column(Integer, primary_key=True)
-    conversation_id = Column(Integer, ForeignKey("conversations.id", ondelete="SET NULL"), nullable=True)
+    conversation_id = Column(
+        Integer, ForeignKey("conversations.id", ondelete="SET NULL"), nullable=True
+    )
     user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
     paper_id = Column(String(100), nullable=True)  # arxiv ID or DOI
     title = Column(String(500), nullable=False)
@@ -181,7 +226,9 @@ class WritingProject(Base):
 
     id = Column(Integer, primary_key=True)
     user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
-    conversation_id = Column(Integer, ForeignKey("conversations.id", ondelete="SET NULL"), nullable=True)
+    conversation_id = Column(
+        Integer, ForeignKey("conversations.id", ondelete="SET NULL"), nullable=True
+    )
     title = Column(String(500), nullable=False)
     outline = Column(JSON, nullable=True)  # section structure
     sections = Column(JSON, default=dict, nullable=False)  # section_id -> markdown content
@@ -197,12 +244,17 @@ class WritingProject(Base):
 
 class ConversationTask(Base):
     """Persisted tasks (todo items) for a conversation."""
+
     __tablename__ = "conversation_tasks"
 
     id = Column(Integer, primary_key=True)
-    conversation_id = Column(Integer, ForeignKey("conversations.id", ondelete="CASCADE"), nullable=False)
+    conversation_id = Column(
+        Integer, ForeignKey("conversations.id", ondelete="CASCADE"), nullable=False
+    )
     title = Column(String(500), nullable=False)
-    status = Column(String(20), default="pending", nullable=False)  # pending, in_progress, completed, cancelled
+    status = Column(
+        String(20), default="pending", nullable=False
+    )  # pending, in_progress, completed, cancelled
     priority = Column(String(20), default="medium", nullable=True)  # high, medium, low
     order_index = Column(Integer, default=0, nullable=False)
     created_at = Column(DateTime(timezone=True), default=_utcnow, nullable=False)
@@ -213,11 +265,16 @@ class ConversationTask(Base):
 
 class ConversationResource(Base):
     """Persisted resources (papers, code, datasets, reports) for a conversation."""
+
     __tablename__ = "conversation_resources"
 
     id = Column(Integer, primary_key=True)
-    conversation_id = Column(Integer, ForeignKey("conversations.id", ondelete="CASCADE"), nullable=False)
-    resource_id = Column(String(100), unique=True, nullable=False, default=lambda: str(uuid.uuid4())[:8])
+    conversation_id = Column(
+        Integer, ForeignKey("conversations.id", ondelete="CASCADE"), nullable=False
+    )
+    resource_id = Column(
+        String(100), unique=True, nullable=False, default=lambda: str(uuid.uuid4())[:8]
+    )
     title = Column(String(500), nullable=False)
     url = Column(String(2000), nullable=True)
     type = Column(String(20), default="doc", nullable=False)  # paper, code, dataset, doc, report
@@ -229,13 +286,18 @@ class ConversationResource(Base):
 
 class AgentJob(Base):
     """Background job tracking for agent execution."""
+
     __tablename__ = "agent_jobs"
 
     id = Column(Integer, primary_key=True)
     job_id = Column(String(100), unique=True, nullable=False, default=lambda: str(uuid.uuid4()))
-    conversation_id = Column(Integer, ForeignKey("conversations.id", ondelete="CASCADE"), nullable=False)
+    conversation_id = Column(
+        Integer, ForeignKey("conversations.id", ondelete="CASCADE"), nullable=False
+    )
     user_id = Column(Integer, ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
-    status = Column(String(20), default="queued", nullable=False)  # queued, running, completed, failed, cancelled
+    status = Column(
+        String(20), default="queued", nullable=False
+    )  # queued, running, completed, failed, cancelled
     message = Column(Text, nullable=True)  # The user message that triggered this job
     mode = Column(String(20), nullable=True)  # research, writing, coding, general
     error = Column(Text, nullable=True)  # Error message if failed
diff --git a/backend/openmlr/db/operations.py b/backend/openmlr/db/operations.py
index 7bafae0..c5dbaa9 100644
--- a/backend/openmlr/db/operations.py
+++ b/backend/openmlr/db/operations.py
@@ -12,24 +12,158 @@
     ConversationResource,
     ConversationTask,
     Message,
+    Project,
     SSHKey,
     UserSetting,
 )
 
+# ---- Projects ----
+
+
+async def create_project(
+    db: AsyncSession,
+    user_id: int,
+    name: str,
+    slug: str,
+    description: str | None = None,
+    workspace_path: str | None = None,
+    settings: dict | None = None,
+) -> Project:
+    project = Project(
+        user_id=user_id,
+        name=name,
+        slug=slug,
+        description=description,
+        workspace_path=workspace_path,
+        settings=settings,
+    )
+    db.add(project)
+    await db.commit()
+    await db.refresh(project)
+    return project
+
+
+async def get_user_projects(
+    db: AsyncSession,
+    user_id: int,
+    include_archived: bool = False,
+) -> list[Project]:
+    query = select(Project).where(Project.user_id == user_id)
+    if not include_archived:
+        query = query.where(Project.status == "active")
+    query = query.order_by(Project.updated_at.desc())
+    result = await db.execute(query)
+    return list(result.scalars().all())
+
+
+async def get_project_by_id(
+    db: AsyncSession, project_id: int, user_id: int | None = None
+) -> Project | None:
+    query = select(Project).where(Project.id == project_id)
+    if user_id is not None:
+        query = query.where(Project.user_id == user_id)
+    result = await db.execute(query)
+    return result.scalar_one_or_none()
+
+
+async def get_project_by_uuid(
+    db: AsyncSession, uuid: str, user_id: int | None = None
+) -> Project | None:
+    query = select(Project).where(Project.uuid == uuid)
+    if user_id is not None:
+        query = query.where(Project.user_id == user_id)
+    result = await db.execute(query)
+    return result.scalar_one_or_none()
+
+
+async def get_project_by_slug(db: AsyncSession, user_id: int, slug: str) -> Project | None:
+    result = await db.execute(
+        select(Project).where(Project.user_id == user_id, Project.slug == slug)
+    )
+    return result.scalar_one_or_none()
+
+
+# Explicit allowlist of fields that can be updated via update_project.
+# Prevents injection of workspace_path, user_id, id, uuid, etc.
+_PROJECT_UPDATABLE_FIELDS = {"name", "slug", "description", "settings", "status"}
+
+
+async def update_project(
+    db: AsyncSession,
+    project_id: int,
+    user_id: int,
+    **kwargs,
+) -> Project | None:
+    result = await db.execute(
+        select(Project).where(Project.id == project_id, Project.user_id == user_id)
+    )
+    project = result.scalar_one_or_none()
+    if not project:
+        return None
+    for key, value in kwargs.items():
+        if key in _PROJECT_UPDATABLE_FIELDS:
+            setattr(project, key, value)
+    await db.commit()
+    await db.refresh(project)
+    return project
+
+
+async def archive_project(db: AsyncSession, project_id: int, user_id: int) -> Project | None:
+    return await update_project(db, project_id, user_id, status="archived")
+
+
+async def get_project_conversations(db: AsyncSession, project_id: int) -> list[Conversation]:
+    result = await db.execute(
+        select(Conversation)
+        .where(Conversation.project_id == project_id)
+        .order_by(Conversation.updated_at.desc())
+    )
+    return list(result.scalars().all())
+
+
+async def attach_conversation_to_project(
+    db: AsyncSession,
+    conversation_id: int,
+    project_id: int | None,
+    user_id: int | None = None,
+) -> bool:
+    """Attach or detach a conversation from a project.
+
+    When user_id is provided, verifies ownership of both conversation and project.
+    """
+    conv = await get_conversation_by_id(db, conversation_id)
+    if not conv:
+        return False
+    # Verify conversation ownership when user_id is provided
+    if user_id is not None and conv.user_id != user_id:
+        return False
+    # Verify project ownership when attaching (not detaching)
+    if project_id is not None and user_id is not None:
+        project = await get_project_by_id(db, project_id, user_id)
+        if not project:
+            return False
+    conv.project_id = project_id
+    await db.commit()
+    return True
+
+
 # ---- Conversations ----
 
+
 async def create_conversation(
     db: AsyncSession,
     user_id: int,
     title: str = "New conversation",
     model: str | None = None,
     mode: str = "general",
+    project_id: int | None = None,
 ) -> Conversation:
     conv = Conversation(
         user_id=user_id,
         title=title,
         model=model,
         mode=mode,
+        project_id=project_id,
     )
     db.add(conv)
     await db.commit()
@@ -66,23 +200,17 @@ async def delete_conversation(db: AsyncSession, conv_id: int) -> bool:
 
 
 async def update_conversation_title(db: AsyncSession, conv_id: int, title: str):
-    await db.execute(
-        update(Conversation).where(Conversation.id == conv_id).values(title=title)
-    )
+    await db.execute(update(Conversation).where(Conversation.id == conv_id).values(title=title))
     await db.commit()
 
 
 async def update_conversation_model(db: AsyncSession, conv_id: int, model: str):
-    await db.execute(
-        update(Conversation).where(Conversation.id == conv_id).values(model=model)
-    )
+    await db.execute(update(Conversation).where(Conversation.id == conv_id).values(model=model))
     await db.commit()
 
 
 async def update_conversation_extra(db: AsyncSession, conv_id: int, extra: dict):
-    await db.execute(
-        update(Conversation).where(Conversation.id == conv_id).values(extra=extra)
-    )
+    await db.execute(update(Conversation).where(Conversation.id == conv_id).values(extra=extra))
     await db.commit()
 
 
@@ -97,11 +225,10 @@ async def increment_user_message_count(db: AsyncSession, conv_id: int):
 
 # ---- Messages ----
 
+
 async def get_messages(db: AsyncSession, conv_id: int) -> list[Message]:
     result = await db.execute(
-        select(Message)
-        .where(Message.conversation_id == conv_id)
-        .order_by(Message.created_at.asc())
+        select(Message).where(Message.conversation_id == conv_id).order_by(Message.created_at.asc())
     )
     return list(result.scalars().all())
 
@@ -126,21 +253,18 @@ async def add_message(
 
 
 async def clear_messages(db: AsyncSession, conv_id: int):
-    await db.execute(
-        delete(Message).where(Message.conversation_id == conv_id)
-    )
+    await db.execute(delete(Message).where(Message.conversation_id == conv_id))
     await db.commit()
 
 
 # ---- Settings ----
 
-async def get_user_setting(
-    db: AsyncSession, user_id: int, category: str, key: str
-) -> dict | None:
+
+async def get_user_setting(db: AsyncSession, user_id: int, category: str, key: str) -> dict | None:
     from .models import UserSetting
+
     result = await db.execute(
-        select(UserSetting)
-        .where(
+        select(UserSetting).where(
             UserSetting.user_id == user_id,
             UserSetting.category == category,
             UserSetting.key == key,
@@ -151,12 +275,16 @@ async def get_user_setting(
 
 
 async def set_user_setting(
-    db: AsyncSession, user_id: int, category: str, key: str, value: dict | list | str | int | float | bool
+    db: AsyncSession,
+    user_id: int,
+    category: str,
+    key: str,
+    value: dict | list | str | int | float | bool,
 ):
     from .models import UserSetting
+
     result = await db.execute(
-        select(UserSetting)
-        .where(
+        select(UserSetting).where(
             UserSetting.user_id == user_id,
             UserSetting.category == category,
             UserSetting.key == key,
@@ -185,6 +313,7 @@ def _clean_json_value(val: object) -> object:
 
 async def get_all_settings(db: AsyncSession, user_id: int, category: str | None = None) -> dict:
     from .models import UserSetting
+
     query = select(UserSetting).where(UserSetting.user_id == user_id)
     if category:
         query = query.where(UserSetting.category == category)
@@ -201,6 +330,7 @@ async def get_all_settings(db: AsyncSession, user_id: int, category: str | None
 
 async def delete_user_setting(db: AsyncSession, user_id: int, category: str, key: str):
     from .models import UserSetting
+
     await db.execute(
         delete(UserSetting).where(
             UserSetting.user_id == user_id,
@@ -213,6 +343,7 @@ async def delete_user_setting(db: AsyncSession, user_id: int, category: str, key
 
 # ---- Conversation Tasks ----
 
+
 async def get_conversation_tasks(db: AsyncSession, conv_id: int) -> list[ConversationTask]:
     result = await db.execute(
         select(ConversationTask)
@@ -229,9 +360,7 @@ async def upsert_conversation_tasks(
 ) -> list[ConversationTask]:
     """Replace all tasks for a conversation with the new list."""
     # Delete existing tasks
-    await db.execute(
-        delete(ConversationTask).where(ConversationTask.conversation_id == conv_id)
-    )
+    await db.execute(delete(ConversationTask).where(ConversationTask.conversation_id == conv_id))
 
     # Insert new tasks
     new_tasks = []
@@ -267,6 +396,7 @@ async def update_task_status(
 
 # ---- Conversation Resources ----
 
+
 async def get_conversation_resources(db: AsyncSession, conv_id: int) -> list[ConversationResource]:
     result = await db.execute(
         select(ConversationResource)
@@ -286,6 +416,7 @@ async def add_conversation_resource(
     resource_id: str | None = None,
 ) -> ConversationResource:
     import uuid as uuid_mod
+
     resource = ConversationResource(
         conversation_id=conv_id,
         resource_id=resource_id or str(uuid_mod.uuid4())[:8],
@@ -341,7 +472,9 @@ async def upsert_conversation_resources(
 PLAN_RESOURCE_ID = "plan-md"
 
 
-async def upsert_plan_resource(db: AsyncSession, conv_id: int, content: str) -> ConversationResource:
+async def upsert_plan_resource(
+    db: AsyncSession, conv_id: int, content: str
+) -> ConversationResource:
     """Create or update the pinned PLAN.md resource for a conversation."""
     existing = await get_resource_by_id(db, f"{PLAN_RESOURCE_ID}-{conv_id}")
     if existing:
@@ -350,7 +483,8 @@ async def upsert_plan_resource(db: AsyncSession, conv_id: int, content: str) ->
         await db.refresh(existing)
         return existing
     return await add_conversation_resource(
-        db, conv_id,
+        db,
+        conv_id,
         title="PLAN.md",
         resource_type="plan",
         content=content,
@@ -362,7 +496,10 @@ async def upsert_plan_resource(db: AsyncSession, conv_id: int, content: str) ->
 
 
 async def upsert_paper_resource(
-    db: AsyncSession, conv_id: int, title: str, content: str,
+    db: AsyncSession,
+    conv_id: int,
+    title: str,
+    content: str,
 ) -> ConversationResource:
     """Create or update the paper draft resource for a conversation."""
     rid = f"{PAPER_RESOURCE_ID}-{conv_id}"
@@ -374,7 +511,8 @@ async def upsert_paper_resource(
         await db.refresh(existing)
         return existing
     return await add_conversation_resource(
-        db, conv_id,
+        db,
+        conv_id,
         title=title,
         resource_type="paper",
         content=content,
@@ -383,9 +521,13 @@ async def upsert_paper_resource(
 
 
 async def upsert_resource(
-    db: AsyncSession, conv_id: int,
-    resource_id: str, title: str, resource_type: str,
-    content: str | None = None, url: str | None = None,
+    db: AsyncSession,
+    conv_id: int,
+    resource_id: str,
+    title: str,
+    resource_type: str,
+    content: str | None = None,
+    url: str | None = None,
 ) -> ConversationResource:
     """Create or update a resource by resource_id."""
     existing = await get_resource_by_id(db, resource_id)
@@ -398,7 +540,8 @@ async def upsert_resource(
         await db.refresh(existing)
         return existing
     return await add_conversation_resource(
-        db, conv_id,
+        db,
+        conv_id,
         title=title,
         resource_type=resource_type,
         content=content,
@@ -409,6 +552,7 @@ async def upsert_resource(
 
 # ---- Agent Jobs ----
 
+
 async def create_agent_job(
     db: AsyncSession,
     conv_id: int,
@@ -417,6 +561,7 @@ async def create_agent_job(
     mode: str | None = None,
 ) -> AgentJob:
     import uuid as uuid_mod
+
     job = AgentJob(
         job_id=str(uuid_mod.uuid4()),
         conversation_id=conv_id,
@@ -432,9 +577,7 @@ async def create_agent_job(
 
 
 async def get_agent_job(db: AsyncSession, job_id: str) -> AgentJob | None:
-    result = await db.execute(
-        select(AgentJob).where(AgentJob.job_id == job_id)
-    )
+    result = await db.execute(select(AgentJob).where(AgentJob.job_id == job_id))
     return result.scalar_one_or_none()
 
 
@@ -478,6 +621,7 @@ async def update_job_status(
 
 # ---- User Settings ----
 
+
 async def get_user_settings(db: AsyncSession, user_id: int, category: str | None = None) -> dict:
     """Get user settings as a dict. Optionally filter by category."""
     query = select(UserSetting).where(UserSetting.user_id == user_id)
@@ -495,10 +639,7 @@ async def get_user_settings(db: AsyncSession, user_id: int, category: str | None
 async def get_user_agent_settings(db: AsyncSession, user_id: int) -> dict:
     """Get user's agent settings (default_model, research_model, yolo_mode)."""
     result = await db.execute(
-        select(UserSetting).where(
-            UserSetting.user_id == user_id,
-            UserSetting.category == "agent"
-        )
+        select(UserSetting).where(UserSetting.user_id == user_id, UserSetting.category == "agent")
     )
     settings = {}
     for s in result.scalars().all():
@@ -508,9 +649,15 @@ async def get_user_agent_settings(db: AsyncSession, user_id: int) -> dict:
 
 # ---- SSH Keys ----
 
+
 async def create_ssh_key(
-    db: AsyncSession, user_id: int, filename: str, fingerprint: str,
-    algorithm: str, public_key: str, comment: str | None = None,
+    db: AsyncSession,
+    user_id: int,
+    filename: str,
+    fingerprint: str,
+    algorithm: str,
+    public_key: str,
+    comment: str | None = None,
 ) -> SSHKey:
     key = SSHKey(
         user_id=user_id,
@@ -554,9 +701,15 @@ async def delete_ssh_key(db: AsyncSession, user_id: int, filename: str) -> bool:
 
 # ---- Compute Nodes ----
 
+
 async def create_compute_node(
-    db: AsyncSession, user_id: int, name: str, node_type: str, config: dict,
-    is_default: bool = False, priority: int = 0,
+    db: AsyncSession,
+    user_id: int,
+    name: str,
+    node_type: str,
+    config: dict,
+    is_default: bool = False,
+    priority: int = 0,
 ) -> ComputeNode:
     node = ComputeNode(
         user_id=user_id,
@@ -574,12 +727,16 @@ async def create_compute_node(
 
 async def get_compute_nodes(db: AsyncSession, user_id: int) -> list[ComputeNode]:
     result = await db.execute(
-        select(ComputeNode).where(ComputeNode.user_id == user_id).order_by(ComputeNode.priority.desc(), ComputeNode.created_at.desc())
+        select(ComputeNode)
+        .where(ComputeNode.user_id == user_id)
+        .order_by(ComputeNode.priority.desc(), ComputeNode.created_at.desc())
     )
     return list(result.scalars().all())
 
 
-async def get_compute_node_by_id(db: AsyncSession, node_id: int, user_id: int | None = None) -> ComputeNode | None:
+async def get_compute_node_by_id(
+    db: AsyncSession, node_id: int, user_id: int | None = None
+) -> ComputeNode | None:
     query = select(ComputeNode).where(ComputeNode.id == node_id)
     if user_id is not None:
         query = query.where(ComputeNode.user_id == user_id)
@@ -595,7 +752,10 @@ async def get_compute_node_by_name(db: AsyncSession, user_id: int, name: str) ->
 
 
 async def update_compute_node(
-    db: AsyncSession, node_id: int, user_id: int, **kwargs,
+    db: AsyncSession,
+    node_id: int,
+    user_id: int,
+    **kwargs,
 ) -> ComputeNode | None:
     result = await db.execute(
         select(ComputeNode).where(ComputeNode.id == node_id, ComputeNode.user_id == user_id)
diff --git a/backend/openmlr/routes/projects.py b/backend/openmlr/routes/projects.py
new file mode 100644
index 0000000..ae37f64
--- /dev/null
+++ b/backend/openmlr/routes/projects.py
@@ -0,0 +1,545 @@
+"""Project routes — CRUD, file tree, file operations.
+
+Security:
+- Path traversal prevention via relative_to() (not str.startswith)
+- Upload/write size limits
+- Symlink-aware rmtree
+- No server-side paths leaked to API responses
+- Workspace root fallback uses restrictive permissions
+"""
+
+import json
+import logging
+import mimetypes
+import os
+import re
+import shutil
+import uuid as uuid_mod
+from pathlib import Path
+
+from fastapi import APIRouter, Depends, HTTPException, Request, UploadFile
+from fastapi.responses import FileResponse
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from ..db import operations as ops
+from ..db.engine import get_db
+from ..db.models import User
+from ..dependencies import get_current_user
+
+router = APIRouter(prefix="/api/projects", tags=["projects"])
+
+log = logging.getLogger(__name__)
+
+# Size limits
+MAX_UPLOAD_BYTES = 100 * 1024 * 1024  # 100 MB
+MAX_WRITE_BYTES = 10 * 1024 * 1024  # 10 MB
+
+
+def _get_workspaces_root() -> Path:
+    """Get the workspace root directory, falling back to a temp dir if needed."""
+    configured = os.environ.get("OPENMLR_WORKSPACES_PATH")
+    if configured:
+        return Path(configured)
+    default = Path("/app/.workspaces")
+    if default.parent.exists():
+        return default
+    # Fallback for non-Docker environments (tests, native dev)
+    import tempfile
+
+    fallback = Path(tempfile.gettempdir()) / "openmlr-workspaces"
+    fallback.mkdir(parents=True, exist_ok=True, mode=0o700)
+    return fallback
+
+
+WORKSPACES_ROOT = _get_workspaces_root()
+
+
+def _slugify(name: str) -> str:
+    """Generate a filesystem-safe slug from a project name."""
+    slug = name.lower().strip()
+    slug = re.sub(r"[^\w\s-]", "", slug)
+    slug = re.sub(r"[\s_]+", "-", slug)
+    slug = re.sub(r"-+", "-", slug).strip("-")
+    return slug[:60] or "project"
+
+
+def _project_dict(project, conv_count: int | None = None) -> dict:
+    """Serialize a project for the API. No server-side paths exposed."""
+    d = {
+        "id": project.id,
+        "uuid": project.uuid,
+        "name": project.name,
+        "slug": project.slug,
+        "description": project.description,
+        "status": project.status,
+        "settings": project.settings or {},
+        "created_at": project.created_at.isoformat() if project.created_at else None,
+        "updated_at": project.updated_at.isoformat() if project.updated_at else None,
+    }
+    if conv_count is not None:
+        d["conversation_count"] = conv_count
+    return d
+
+
+def _ensure_workspace(workspace_path: str) -> Path:
+    """Ensure workspace directory and standard subdirs exist."""
+    ws = Path(workspace_path)
+    ws.mkdir(parents=True, exist_ok=True)
+    for subdir in [
+        "code",
+        "data",
+        "models",
+        "outputs",
+        "papers",
+        "research",
+        "research/searches",
+        "research/notes",
+        "research/citations",
+        "logs",
+        "logs/tool_failures",
+        "logs/compute",
+        "logs/experiments",
+        "venvs",
+        ".project-meta",
+        ".project-meta/plans",
+    ]:
+        (ws / subdir).mkdir(parents=True, exist_ok=True)
+    return ws
+
+
+def _safe_resolve(workspace_path: str, relative_path: str) -> Path:
+    """Resolve a relative path within the workspace, preventing traversal attacks.
+
+    Uses Path.relative_to() for correct containment checking (not str.startswith).
+    """
+    ws = Path(workspace_path).resolve()
+    target = (ws / relative_path).resolve()
+    try:
+        target.relative_to(ws)
+    except ValueError:
+        raise HTTPException(status_code=400, detail="Path traversal not allowed")
+    return target
+
+
+def _safe_rmtree(target: Path, workspace_path: str) -> None:
+    """Remove a directory tree, refusing to follow symlinks that escape the workspace."""
+    ws = Path(workspace_path).resolve()
+
+    # Check for symlinks that point outside workspace before deleting
+    for root, dirs, files in os.walk(str(target)):
+        root_path = Path(root)
+        for name in dirs + files:
+            item = root_path / name
+            if item.is_symlink():
+                link_target = item.resolve()
+                try:
+                    link_target.relative_to(ws)
+                except ValueError:
+                    raise HTTPException(
+                        status_code=400,
+                        detail="Cannot delete: contains symlink to outside workspace",
+                    )
+
+    shutil.rmtree(target)
+
+
+# ── Project CRUD ─────────────────────────────────────────
+
+
+@router.get("")
+async def list_projects(
+    include_archived: bool = False,
+    user: User = Depends(get_current_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """List all projects for the current user."""
+    projects = await ops.get_user_projects(db, user.id, include_archived=include_archived)
+    result = []
+    for p in projects:
+        convs = await ops.get_project_conversations(db, p.id)
+        result.append(_project_dict(p, conv_count=len(convs)))
+    return {"projects": result}
+
+
+@router.post("")
+async def create_project(
+    request: Request,
+    user: User = Depends(get_current_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """Create a new project with a workspace directory."""
+    body = await request.json()
+    name = body.get("name", "").strip()
+    description = body.get("description", "").strip() or None
+
+    if not name:
+        raise HTTPException(status_code=400, detail="Missing 'name'")
+
+    slug = _slugify(name)
+
+    # Check for duplicate slug
+    existing = await ops.get_project_by_slug(db, user.id, slug)
+    if existing:
+        slug = f"{slug}-{str(uuid_mod.uuid4())[:6]}"
+
+    # Create workspace directory
+    workspace_path = str(WORKSPACES_ROOT / slug)
+    _ensure_workspace(workspace_path)
+
+    # Write initial project metadata
+    meta_path = Path(workspace_path) / ".project-meta" / "project.json"
+    meta_path.write_text(
+        json.dumps(
+            {
+                "name": name,
+                "slug": slug,
+                "description": description,
+                "created_by": user.username,
+            },
+            indent=2,
+        )
+    )
+
+    # Initialize empty knowledge graph
+    kg_path = Path(workspace_path) / ".project-meta" / "knowledge.json"
+    kg_path.write_text(
+        json.dumps(
+            {
+                "nodes": [],
+                "edges": [],
+                "version": 1,
+            },
+            indent=2,
+        )
+    )
+
+    project = await ops.create_project(
+        db,
+        user.id,
+        name,
+        slug,
+        description=description,
+        workspace_path=workspace_path,
+        settings=body.get("settings"),
+    )
+
+    return {"project": _project_dict(project, conv_count=0)}
+
+
+@router.get("/{project_uuid}")
+async def get_project(
+    project_uuid: str,
+    user: User = Depends(get_current_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """Get project details including conversation count."""
+    project = await ops.get_project_by_uuid(db, project_uuid, user.id)
+    if not project:
+        raise HTTPException(status_code=404, detail="Project not found")
+    convs = await ops.get_project_conversations(db, project.id)
+    return {"project": _project_dict(project, conv_count=len(convs))}
+
+
+@router.put("/{project_uuid}")
+async def update_project(
+    project_uuid: str,
+    request: Request,
+    user: User = Depends(get_current_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """Update project name, description, or settings."""
+    project = await ops.get_project_by_uuid(db, project_uuid, user.id)
+    if not project:
+        raise HTTPException(status_code=404, detail="Project not found")
+
+    body = await request.json()
+    updates = {}
+    if "name" in body:
+        updates["name"] = body["name"].strip()
+    if "description" in body:
+        updates["description"] = body["description"].strip() or None
+    if "settings" in body:
+        updates["settings"] = body["settings"]
+
+    updated = await ops.update_project(db, project.id, user.id, **updates)
+    return {"project": _project_dict(updated)}
+
+
+@router.delete("/{project_uuid}")
+async def delete_project(
+    project_uuid: str,
+    user: User = Depends(get_current_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """Archive a project (soft delete). Workspace files are preserved."""
+    project = await ops.get_project_by_uuid(db, project_uuid, user.id)
+    if not project:
+        raise HTTPException(status_code=404, detail="Project not found")
+    await ops.archive_project(db, project.id, user.id)
+    return {"ok": True}
+
+
+@router.get("/{project_uuid}/conversations")
+async def list_project_conversations(
+    project_uuid: str,
+    user: User = Depends(get_current_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """List all conversations within a project."""
+    project = await ops.get_project_by_uuid(db, project_uuid, user.id)
+    if not project:
+        raise HTTPException(status_code=404, detail="Project not found")
+    convs = await ops.get_project_conversations(db, project.id)
+    return {
+        "conversations": [
+            {
+                "id": c.id,
+                "uuid": c.uuid,
+                "title": c.title,
+                "model": c.model,
+                "mode": c.mode,
+                "user_message_count": c.user_message_count,
+                "created_at": c.created_at.isoformat() if c.created_at else None,
+                "updated_at": c.updated_at.isoformat() if c.updated_at else None,
+            }
+            for c in convs
+        ]
+    }
+
+
+@router.post("/{project_uuid}/attach/{conversation_uuid}")
+async def attach_conversation(
+    project_uuid: str,
+    conversation_uuid: str,
+    user: User = Depends(get_current_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """Attach an existing conversation to a project."""
+    project = await ops.get_project_by_uuid(db, project_uuid, user.id)
+    if not project:
+        raise HTTPException(status_code=404, detail="Project not found")
+    conv = await ops.get_conversation_by_uuid(db, conversation_uuid)
+    if not conv or conv.user_id != user.id:
+        raise HTTPException(status_code=404, detail="Conversation not found")
+    await ops.attach_conversation_to_project(db, conv.id, project.id, user.id)
+    return {"ok": True}
+
+
+@router.post("/{project_uuid}/detach/{conversation_uuid}")
+async def detach_conversation(
+    project_uuid: str,
+    conversation_uuid: str,
+    user: User = Depends(get_current_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """Detach a conversation from a project."""
+    # Verify both project and conversation ownership
+    project = await ops.get_project_by_uuid(db, project_uuid, user.id)
+    if not project:
+        raise HTTPException(status_code=404, detail="Project not found")
+    conv = await ops.get_conversation_by_uuid(db, conversation_uuid)
+    if not conv or conv.user_id != user.id:
+        raise HTTPException(status_code=404, detail="Conversation not found")
+    await ops.attach_conversation_to_project(db, conv.id, None, user.id)
+    return {"ok": True}
+
+
+# ── File Tree & File Operations ──────────────────────────
+
+
+@router.get("/{project_uuid}/files")
+async def list_files(
+    project_uuid: str,
+    path: str = "",
+    user: User = Depends(get_current_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """List files and directories in the project workspace."""
+    project = await ops.get_project_by_uuid(db, project_uuid, user.id)
+    if not project or not project.workspace_path:
+        raise HTTPException(status_code=404, detail="Project not found")
+
+    target = _safe_resolve(project.workspace_path, path)
+    if not target.exists():
+        raise HTTPException(status_code=404, detail="Path not found")
+    if not target.is_dir():
+        raise HTTPException(status_code=400, detail="Not a directory")
+
+    entries = []
+    try:
+        for item in sorted(
+            target.iterdir(),
+            key=lambda p: (not p.is_dir(), p.name.lower()),
+        ):
+            # Skip hidden files except .project-meta
+            if item.name.startswith(".") and item.name != ".project-meta":
+                continue
+            try:
+                stat = item.stat(follow_symlinks=False)
+            except OSError:
+                continue
+            entries.append(
+                {
+                    "name": item.name,
+                    "path": str(item.relative_to(Path(project.workspace_path))),
+                    "is_dir": item.is_dir(),
+                    "size": stat.st_size if item.is_file() else None,
+                    "modified": stat.st_mtime,
+                }
+            )
+    except PermissionError as exc:
+        raise HTTPException(status_code=403, detail="Permission denied") from exc
+
+    return {"path": path, "entries": entries}
+
+
+@router.get("/{project_uuid}/files/{file_path:path}")
+async def read_file(
+    project_uuid: str,
+    file_path: str,
+    user: User = Depends(get_current_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """Read a file from the project workspace."""
+    project = await ops.get_project_by_uuid(db, project_uuid, user.id)
+    if not project or not project.workspace_path:
+        raise HTTPException(status_code=404, detail="Project not found")
+
+    target = _safe_resolve(project.workspace_path, file_path)
+    if not target.exists():
+        raise HTTPException(status_code=404, detail="File not found")
+    if target.is_dir():
+        return await list_files(project_uuid, file_path, user, db)
+
+    # Reject symlinks that point outside workspace
+    if target.is_symlink():
+        try:
+            target.resolve().relative_to(Path(project.workspace_path).resolve())
+        except ValueError:
+            raise HTTPException(status_code=400, detail="Symlink points outside workspace")
+
+    # For text files, return content as JSON
+    mime, _ = mimetypes.guess_type(str(target))
+    is_text = (
+        mime is None
+        or mime.startswith("text/")
+        or mime in ("application/json", "application/xml", "application/x-yaml")
+    )
+
+    if is_text:
+        try:
+            content = target.read_text(encoding="utf-8", errors="replace")
+            if len(content) > 500_000:
+                content = content[:500_000] + "\n\n[... truncated at 500KB ...]"
+            return {
+                "path": file_path,
+                "content": content,
+                "size": target.stat().st_size,
+            }
+        except Exception:
+            pass
+
+    return FileResponse(str(target), filename=target.name)
+
+
+@router.put("/{project_uuid}/files/{file_path:path}")
+async def write_file(
+    project_uuid: str,
+    file_path: str,
+    request: Request,
+    user: User = Depends(get_current_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """Write content to a file in the project workspace."""
+    project = await ops.get_project_by_uuid(db, project_uuid, user.id)
+    if not project or not project.workspace_path:
+        raise HTTPException(status_code=404, detail="Project not found")
+
+    target = _safe_resolve(project.workspace_path, file_path)
+
+    body = await request.json()
+    content = body.get("content", "")
+
+    # Enforce write size limit
+    if len(content) > MAX_WRITE_BYTES:
+        raise HTTPException(
+            status_code=413,
+            detail=f"Content too large (max {MAX_WRITE_BYTES // 1024 // 1024}MB)",
+        )
+
+    target.parent.mkdir(parents=True, exist_ok=True)
+    target.write_text(content, encoding="utf-8")
+
+    return {"ok": True, "path": file_path, "size": target.stat().st_size}
+
+
+@router.delete("/{project_uuid}/files/{file_path:path}")
+async def delete_file(
+    project_uuid: str,
+    file_path: str,
+    user: User = Depends(get_current_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """Delete a file or directory from the project workspace."""
+    project = await ops.get_project_by_uuid(db, project_uuid, user.id)
+    if not project or not project.workspace_path:
+        raise HTTPException(status_code=404, detail="Project not found")
+
+    target = _safe_resolve(project.workspace_path, file_path)
+    if not target.exists():
+        raise HTTPException(status_code=404, detail="File not found")
+
+    # Prevent deleting top-level standard dirs
+    ws = Path(project.workspace_path)
+    rel = target.relative_to(ws)
+    protected = {
+        "code",
+        "data",
+        "models",
+        "outputs",
+        "papers",
+        "research",
+        "logs",
+        ".project-meta",
+    }
+    if str(rel) in protected:
+        raise HTTPException(
+            status_code=400,
+            detail="Cannot delete standard workspace directory",
+        )
+
+    if target.is_dir():
+        _safe_rmtree(target, project.workspace_path)
+    else:
+        target.unlink()
+
+    return {"ok": True}
+
+
+@router.post("/{project_uuid}/upload/{file_path:path}")
+async def upload_file(
+    project_uuid: str,
+    file_path: str,
+    file: UploadFile,
+    user: User = Depends(get_current_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """Upload a file to the project workspace."""
+    project = await ops.get_project_by_uuid(db, project_uuid, user.id)
+    if not project or not project.workspace_path:
+        raise HTTPException(status_code=404, detail="Project not found")
+
+    target = _safe_resolve(project.workspace_path, file_path)
+
+    # Read with size limit to prevent OOM
+    content = await file.read(MAX_UPLOAD_BYTES + 1)
+    if len(content) > MAX_UPLOAD_BYTES:
+        raise HTTPException(
+            status_code=413,
+            detail=f"File too large (max {MAX_UPLOAD_BYTES // 1024 // 1024}MB)",
+        )
+
+    target.parent.mkdir(parents=True, exist_ok=True)
+    target.write_bytes(content)
+
+    return {"ok": True, "path": file_path, "size": len(content)}
diff --git a/backend/openmlr/routes/terminal.py b/backend/openmlr/routes/terminal.py
new file mode 100644
index 0000000..dbd2028
--- /dev/null
+++ b/backend/openmlr/routes/terminal.py
@@ -0,0 +1,275 @@
+"""Terminal WebSocket endpoint — interactive PTY connected to compute resource.
+
+Provides a real terminal experience via xterm.js on the frontend,
+connected to the project workspace's compute environment.
+
+Security:
+- Minimal environment (no server secrets leaked)
+- Workspace path validated against WORKSPACES_ROOT
+- Shell spawned via subprocess (not os.fork) to avoid async corruption
+- --norc --noprofile to prevent .bashrc injection
+- Proper zombie process cleanup with SIGKILL escalation
+"""
+
+import asyncio
+import fcntl
+import json
+import logging
+import os
+import pty
+import signal
+import struct
+import subprocess
+import termios
+from pathlib import Path
+
+from fastapi import APIRouter, Query, WebSocket, WebSocketDisconnect
+
+from ..auth.security import decode_access_token
+from ..db import operations as ops
+from ..db.engine import get_async_session
+from ..db.models import User
+
+router = APIRouter(tags=["terminal"])
+
+log = logging.getLogger(__name__)
+
+# Allowlisted environment variables for the PTY process.
+# Server secrets (DATABASE_URL, API keys, JWT_SECRET_KEY, etc.) are NOT passed.
+_SAFE_ENV_KEYS = {"LANG", "LC_ALL", "LC_CTYPE", "TZ"}
+
+
+def _build_safe_env(workspace_path: str) -> dict[str, str]:
+    """Build a minimal, safe environment for the PTY child process."""
+    env = {
+        "TERM": "xterm-256color",
+        "HOME": workspace_path,
+        "PWD": workspace_path,
+        "PATH": "/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin",
+        "SHELL": "/bin/bash",
+        "USER": "openmlr",
+    }
+    # Copy only safe locale/timezone vars from parent
+    for key in _SAFE_ENV_KEYS:
+        val = os.environ.get(key)
+        if val:
+            env[key] = val
+    return env
+
+
+def _validate_workspace_path(workspace_path: str) -> bool:
+    """Validate that a workspace path is within the expected root."""
+    from .projects import WORKSPACES_ROOT
+
+    try:
+        resolved = Path(workspace_path).resolve()
+        resolved.relative_to(WORKSPACES_ROOT.resolve())
+        return True
+    except (ValueError, RuntimeError):
+        return False
+
+
+async def _authenticate_ws(token: str | None) -> User | None:
+    """Authenticate WebSocket connection via token query param."""
+    if not token:
+        return None
+
+    payload = decode_access_token(token)
+    if not payload:
+        return None
+
+    async with get_async_session() as db:
+        from sqlalchemy import select
+
+        result = await db.execute(
+            select(User).where(
+                User.id == int(payload["sub"]),
+                User.is_active == True,  # noqa: E712
+            )
+        )
+        return result.scalar_one_or_none()
+
+
+async def _cleanup_process(pid: int, master_fd: int) -> None:
+    """Clean up PTY process with SIGKILL escalation to prevent zombies."""
+    # Close the master fd first
+    try:
+        os.close(master_fd)
+    except OSError:
+        pass
+
+    if pid <= 0:
+        return
+
+    # Send SIGTERM and wait with timeout
+    try:
+        os.kill(pid, signal.SIGTERM)
+    except ProcessLookupError:
+        return
+
+    # Poll up to 2 seconds for graceful exit
+    for _ in range(20):
+        try:
+            result, _ = os.waitpid(pid, os.WNOHANG)
+            if result != 0:
+                return  # Process exited
+        except ChildProcessError:
+            return  # Already reaped
+        await asyncio.sleep(0.1)
+
+    # Escalate to SIGKILL
+    try:
+        os.kill(pid, signal.SIGKILL)
+        os.waitpid(pid, 0)  # Blocking wait after SIGKILL
+    except (ProcessLookupError, ChildProcessError):
+        pass
+
+
+@router.websocket("/api/terminal/{project_uuid}")
+async def terminal_websocket(
+    websocket: WebSocket,
+    project_uuid: str,
+    token: str = Query(default=None),
+):
+    """WebSocket endpoint for interactive terminal sessions.
+
+    Spawns a PTY process in the project workspace directory.
+    Messages from the client are written to the PTY stdin.
+    Output from the PTY is sent back to the client.
+
+    Special messages (JSON):
+    - {"type": "resize", "cols": 80, "rows": 24} - resize the terminal
+    - {"type": "input", "data": "..."} - send input to the PTY
+    - Plain text messages are treated as input
+    """
+    # Authenticate
+    user = await _authenticate_ws(token)
+    if not user:
+        await websocket.close(code=4001, reason="Unauthorized")
+        return
+
+    # Look up the project to get the workspace path
+    async with get_async_session() as db:
+        project = await ops.get_project_by_uuid(db, project_uuid, user.id)
+        if not project or not project.workspace_path:
+            await websocket.close(code=4004, reason="Project not found")
+            return
+        workspace_path = project.workspace_path
+
+    # Validate workspace path is within allowed root
+    if not _validate_workspace_path(workspace_path):
+        log.warning(
+            f"Terminal rejected: workspace path {workspace_path} "
+            f"is outside allowed root (user={user.id})"
+        )
+        await websocket.close(code=4003, reason="Invalid workspace path")
+        return
+
+    # Verify workspace exists
+    if not Path(workspace_path).exists():
+        await websocket.close(code=4004, reason="Workspace not found")
+        return
+
+    await websocket.accept()
+
+    # Spawn PTY using subprocess instead of os.fork() to avoid
+    # corrupting the async event loop and leaking file descriptors.
+    master_fd, slave_fd = pty.openpty()
+    env = _build_safe_env(workspace_path)
+    shell = "/bin/bash"
+
+    try:
+        proc = subprocess.Popen(
+            [shell, "--norc", "--noprofile"],
+            stdin=slave_fd,
+            stdout=slave_fd,
+            stderr=slave_fd,
+            cwd=workspace_path,
+            env=env,
+            start_new_session=True,
+            close_fds=True,
+        )
+        pid = proc.pid
+    except Exception as e:
+        log.error(f"Failed to spawn terminal: {e}")
+        os.close(master_fd)
+        os.close(slave_fd)
+        await websocket.close(code=4500, reason="Failed to spawn shell")
+        return
+
+    # Close slave fd in parent — only the child uses it
+    os.close(slave_fd)
+
+    # Set master fd to non-blocking
+    flags = fcntl.fcntl(master_fd, fcntl.F_GETFL)
+    fcntl.fcntl(master_fd, fcntl.F_SETFL, flags | os.O_NONBLOCK)
+
+    async def read_pty():
+        """Read from PTY and send to WebSocket."""
+        loop = asyncio.get_event_loop()
+        try:
+            while True:
+                try:
+                    data = await loop.run_in_executor(None, lambda: os.read(master_fd, 4096))
+                    if not data:
+                        break
+                    await websocket.send_bytes(data)
+                except OSError:
+                    break
+                except WebSocketDisconnect:
+                    break
+        except Exception as e:
+            log.debug(f"PTY read ended: {e}")
+
+    async def write_pty():
+        """Read from WebSocket and write to PTY."""
+        try:
+            while True:
+                msg = await websocket.receive()
+                if msg.get("type") == "websocket.disconnect":
+                    break
+
+                if "text" in msg:
+                    try:
+                        data = json.loads(msg["text"])
+                        if isinstance(data, dict):
+                            if data.get("type") == "resize":
+                                cols = min(int(data.get("cols", 80)), 500)
+                                rows = min(int(data.get("rows", 24)), 200)
+                                winsize = struct.pack("HHHH", rows, cols, 0, 0)
+                                fcntl.ioctl(master_fd, termios.TIOCSWINSZ, winsize)
+                                continue
+                            elif data.get("type") == "input":
+                                input_data = data.get("data", "")
+                                if isinstance(input_data, str):
+                                    os.write(master_fd, input_data.encode()[:4096])
+                                continue
+                    except (json.JSONDecodeError, ValueError):
+                        pass
+                    # Plain text input — cap at 4KB per message
+                    os.write(master_fd, msg["text"].encode()[:4096])
+
+                elif "bytes" in msg:
+                    os.write(master_fd, msg["bytes"][:4096])
+
+        except WebSocketDisconnect:
+            pass
+        except Exception as e:
+            log.debug(f"PTY write ended: {e}")
+
+    # Run reader and writer concurrently
+    try:
+        reader_task = asyncio.create_task(read_pty())
+        writer_task = asyncio.create_task(write_pty())
+        done, pending = await asyncio.wait(
+            [reader_task, writer_task],
+            return_when=asyncio.FIRST_COMPLETED,
+        )
+        for task in pending:
+            task.cancel()
+    finally:
+        await _cleanup_process(pid, master_fd)
+        try:
+            await websocket.close()
+        except Exception:
+            pass
diff --git a/backend/openmlr/sandbox/local.py b/backend/openmlr/sandbox/local.py
index 4b578d3..2968f15 100644
--- a/backend/openmlr/sandbox/local.py
+++ b/backend/openmlr/sandbox/local.py
@@ -15,19 +15,30 @@ class LocalSandbox(SandboxInterface):
     def __init__(self, workdir: str = None, workspace_manager=None):
         self._workspace_manager = workspace_manager
         self._conversation_uuid = None
+        self._project_workspace = None  # project workspace path (takes priority)
         self.workdir = workdir or os.getcwd()
 
     async def create(self, config: dict) -> "LocalSandbox":
         self.workdir = config.get("workdir", os.getcwd())
         self._conversation_uuid = config.get("conversation_uuid")
+        self._project_workspace = config.get("project_workspace_path")
 
-        # If workspace manager is available and conversation UUID is set,
-        # use the per-conversation workspace
-        if self._workspace_manager and self._conversation_uuid:
+        # Priority: project workspace > conversation workspace > default workdir
+        if self._project_workspace:
+            # Validate the project workspace path is within the allowed root
+            ws_path = Path(self._project_workspace).resolve()
+            from ..compute.workspace import WORKSPACES_ROOT
+
+            try:
+                ws_path.relative_to(WORKSPACES_ROOT.resolve())
+            except ValueError:
+                raise ValueError("Project workspace path is outside allowed root")
+            ws_path.mkdir(parents=True, exist_ok=True)
+            self.workdir = str(ws_path)
+        elif self._workspace_manager and self._conversation_uuid:
             ws_path = self._workspace_manager.create_workspace(self._conversation_uuid)
             self.workdir = str(ws_path)
         elif self._workspace_manager:
-            # Fallback: create workspace without UUID
             ws_path = self._workspace_manager.create_workspace("default")
             self.workdir = str(ws_path)
 
@@ -36,7 +47,9 @@ async def create(self, config: dict) -> "LocalSandbox":
     async def execute(self, command: str, timeout: int = 120) -> ExecutionResult:
         return await self.execute_stream(command, timeout)
 
-    async def execute_stream(self, command: str, timeout: int = 120, on_chunk=None) -> ExecutionResult:
+    async def execute_stream(
+        self, command: str, timeout: int = 120, on_chunk=None
+    ) -> ExecutionResult:
         """Execute a command with optional streaming output."""
         start = time.monotonic()
         try:
@@ -99,16 +112,27 @@ async def _read_stream(stream, is_stderr):
         except Exception as e:
             return ExecutionResult(output=f"Error: {str(e)}", success=False, exit_code=-1)
 
-    async def read_file(self, path: str) -> str:
+    def _resolve_path(self, path: str) -> Path:
+        """Resolve a path relative to workdir, preventing traversal outside it."""
         target = Path(path).expanduser()
         if not target.is_absolute():
             target = Path(self.workdir) / target
+        resolved = target.resolve()
+        workdir_resolved = Path(self.workdir).resolve()
+        # Allow paths within workdir, or absolute paths if no project workspace
+        if self._project_workspace:
+            try:
+                resolved.relative_to(workdir_resolved)
+            except ValueError:
+                raise PermissionError("Access denied: path outside workspace")
+        return resolved
+
+    async def read_file(self, path: str) -> str:
+        target = self._resolve_path(path)
         return target.read_text(encoding="utf-8", errors="replace")
 
     async def write_file(self, path: str, content: str) -> bool:
-        target = Path(path).expanduser()
-        if not target.is_absolute():
-            target = Path(self.workdir) / target
+        target = self._resolve_path(path)
         target.parent.mkdir(parents=True, exist_ok=True)
         target.write_text(content, encoding="utf-8")
         return True
@@ -122,19 +146,12 @@ async def edit_file(self, path: str, old: str, new: str) -> bool:
         return True
 
     async def file_exists(self, path: str) -> bool:
-        target = Path(path).expanduser()
-        if not target.is_absolute():
-            target = Path(self.workdir) / target
+        target = self._resolve_path(path)
         return target.exists()
 
     async def list_files(self, path: str = ".") -> list[str]:
-        target = Path(path).expanduser()
-        if not target.is_absolute():
-            target = Path(self.workdir) / target
-        return sorted([
-            f"{e.name}{'/' if e.is_dir() else ''}"
-            for e in target.iterdir()
-        ])
+        target = self._resolve_path(path)
+        return sorted([f"{e.name}{'/' if e.is_dir() else ''}" for e in target.iterdir()])
 
     async def probe_environment(self):
         return await probe_sandbox(self)
diff --git a/backend/openmlr/sandbox/manager.py b/backend/openmlr/sandbox/manager.py
index 32060a0..7dd893b 100644
--- a/backend/openmlr/sandbox/manager.py
+++ b/backend/openmlr/sandbox/manager.py
@@ -1,4 +1,10 @@
-"""SandboxManager — lifecycle management and provider selection."""
+"""SandboxManager — lifecycle management and provider selection.
+
+The sandbox handles code execution on a compute resource.
+The workspace (project-scoped) is decoupled: it persists independently
+of which compute resource is active. The sandbox receives the workspace
+path so it can operate within the project directory.
+"""
 
 from .interface import SandboxInterface
 from .local import LocalSandbox
@@ -7,13 +13,24 @@
 
 
 class SandboxManager:
-    """Manages sandbox lifecycle: create, switch, destroy."""
+    """Manages sandbox lifecycle: create, switch, destroy.
+
+    Workspace and compute are decoupled:
+    - project_workspace_path: persistent project directory (survives compute changes)
+    - provider/config: determines WHERE code executes (local, ssh, modal)
+    """
 
-    def __init__(self, workspace_manager=None, conversation_uuid: str = None):
+    def __init__(
+        self,
+        workspace_manager=None,
+        conversation_uuid: str = None,
+        project_workspace_path: str = None,
+    ):
         self._active: SandboxInterface | None = None
         self.active_type: str = "none"
         self._workspace_manager = workspace_manager
         self._conversation_uuid = conversation_uuid
+        self._project_workspace_path = project_workspace_path
 
     def get_active(self) -> SandboxInterface | None:
         return self._active
@@ -28,6 +45,8 @@ async def create(self, provider: str, config: dict = None) -> SandboxInterface:
 
         # Inject workspace and conversation context
         config["conversation_uuid"] = self._conversation_uuid
+        if self._project_workspace_path:
+            config["project_workspace_path"] = self._project_workspace_path
 
         if provider == "local":
             sandbox = LocalSandbox(workspace_manager=self._workspace_manager)
diff --git a/backend/openmlr/tools/papers.py b/backend/openmlr/tools/papers.py
index ffd97db..37561dc 100644
--- a/backend/openmlr/tools/papers.py
+++ b/backend/openmlr/tools/papers.py
@@ -20,7 +20,7 @@
 OPENALEX_API = "https://api.openalex.org"
 SEMANTIC_SCHOLAR_API = "https://api.semanticscholar.org/graph/v1"
 CROSSREF_API = "https://api.crossref.org"
-ARXIV_API = "http://export.arxiv.org/api/query"
+ARXIV_API = "https://export.arxiv.org/api/query"
 AR5IV_BASE = "https://ar5iv.labs.arxiv.org/html"
 PWC_API = "https://paperswithcode.com/api/v1"
 
@@ -72,8 +72,17 @@ def create_papers_tool() -> ToolSpec:
                 "operation": {
                     "type": "string",
                     "enum": [
-                        "search", "arxiv_search", "semantic_search", "trending", "details", "read_paper",
-                        "citations", "recommend", "find_code", "find_datasets", "author_papers",
+                        "search",
+                        "arxiv_search",
+                        "semantic_search",
+                        "trending",
+                        "details",
+                        "read_paper",
+                        "citations",
+                        "recommend",
+                        "find_code",
+                        "find_datasets",
+                        "author_papers",
                     ],
                     "description": (
                         "Operation to perform: "
@@ -126,22 +135,36 @@ def create_papers_tool() -> ToolSpec:
 _search_counts: dict[int, int] = {}  # session hash -> count
 _BUDGET_DEFAULT = 25
 
+
 def _check_budget(session=None) -> tuple[bool, str]:
     """Check if search budget allows another API call. Returns (ok, message)."""
     key = id(session) if session else 0
     count = _search_counts.get(key, 0)
-    budget = session.config.paper_search_budget if session and hasattr(session, 'config') else _BUDGET_DEFAULT
+    budget = (
+        session.config.paper_search_budget
+        if session and hasattr(session, "config")
+        else _BUDGET_DEFAULT
+    )
     if count >= budget:
-        return False, f"Search budget exhausted ({count}/{budget} calls). Ask the user before continuing."
+        return (
+            False,
+            f"Search budget exhausted ({count}/{budget} calls). Ask the user before continuing.",
+        )
     return True, ""
 
+
 def _increment_budget(session=None):
     key = id(session) if session else 0
     _search_counts[key] = _search_counts.get(key, 0) + 1
 
+
 def _get_budget_info(session=None) -> dict:
     key = id(session) if session else 0
-    budget = session.config.paper_search_budget if session and hasattr(session, 'config') else _BUDGET_DEFAULT
+    budget = (
+        session.config.paper_search_budget
+        if session and hasattr(session, "config")
+        else _BUDGET_DEFAULT
+    )
     return {"used": _search_counts.get(key, 0), "max": budget}
 
 
@@ -158,7 +181,18 @@ async def _handle_papers(
     **kwargs,
 ) -> tuple[str, bool]:
     # Budget check for API-calling operations
-    api_ops = {"search", "arxiv_search", "semantic_search", "trending", "details", "citations", "recommend", "find_code", "find_datasets", "author_papers"}
+    api_ops = {
+        "search",
+        "arxiv_search",
+        "semantic_search",
+        "trending",
+        "details",
+        "citations",
+        "recommend",
+        "find_code",
+        "find_datasets",
+        "author_papers",
+    }
     if operation in api_ops:
         ok, msg = _check_budget(session)
         if not ok:
@@ -167,10 +201,13 @@ async def _handle_papers(
         # Emit budget update
         if session:
             from ..agent.types import AgentEvent
-            await session.emit(AgentEvent(
-                event_type="search_budget",
-                data=_get_budget_info(session),
-            ))
+
+            await session.emit(
+                AgentEvent(
+                    event_type="search_budget",
+                    data=_get_budget_info(session),
+                )
+            )
 
     handlers = {
         "search": lambda: _search(query, year_from, year_to, limit, source),
@@ -196,7 +233,10 @@ async def _handle_papers(
 
 # ── Search (OpenAlex with S2 fallback) ────────────────────────────────────
 
-async def _search(query: str, year_from: int = None, year_to: int = None, limit: int = 10, source: str = "auto") -> tuple[str, bool]:
+
+async def _search(
+    query: str, year_from: int = None, year_to: int = None, limit: int = 10, source: str = "auto"
+) -> tuple[str, bool]:
     if not query:
         return "Provide a 'query' for search.", False
 
@@ -219,7 +259,9 @@ async def _search(query: str, year_from: int = None, year_to: int = None, limit:
     return "Invalid source specified", False
 
 
-async def _openalex_search(query: str, year_from: int = None, year_to: int = None, limit: int = 10) -> tuple[str, bool]:
+async def _openalex_search(
+    query: str, year_from: int = None, year_to: int = None, limit: int = 10
+) -> tuple[str, bool]:
     """Search using OpenAlex API with retry logic."""
     params = _get_openalex_params({"search": query, "per_page": min(limit, 50)})
 
@@ -254,7 +296,9 @@ async def _openalex_search(query: str, year_from: int = None, year_to: int = Non
     total = r.json().get("meta", {}).get("count", len(works))
     lines = [f"Found {total} papers for '{query}' (via OpenAlex):\n"]
     for i, w in enumerate(works, 1):
-        authors = ", ".join(a.get("author", {}).get("display_name", "") for a in (w.get("authorships") or [])[:3])
+        authors = ", ".join(
+            a.get("author", {}).get("display_name", "") for a in (w.get("authorships") or [])[:3]
+        )
         if len(w.get("authorships", [])) > 3:
             authors += " et al."
         doi = (w.get("doi") or "").replace("https://doi.org/", "")
@@ -270,7 +314,10 @@ async def _openalex_search(query: str, year_from: int = None, year_to: int = Non
 
 # ── arXiv Search ────────────────────────────────────
 
-async def _arxiv_search(query: str, year_from: int = None, year_to: int = None, limit: int = 10) -> tuple[str, bool]:
+
+async def _arxiv_search(
+    query: str, year_from: int = None, year_to: int = None, limit: int = 10
+) -> tuple[str, bool]:
     """Search arXiv papers directly. Great for ML/CS/Physics preprints."""
     if not query:
         return "Provide a 'query' for search.", False
@@ -367,7 +414,10 @@ async def _arxiv_search(query: str, year_from: int = None, year_to: int = None,
 
 # ── Semantic Scholar Search ────────────────────────────────────
 
-async def _semantic_scholar_search(query: str, year_from: int = None, year_to: int = None, limit: int = 10) -> tuple[str, bool]:
+
+async def _semantic_scholar_search(
+    query: str, year_from: int = None, year_to: int = None, limit: int = 10
+) -> tuple[str, bool]:
     """Search using Semantic Scholar API with retry logic."""
     if not query:
         return "Provide a 'query' for search.", False
@@ -400,13 +450,19 @@ async def _semantic_scholar_search(query: str, year_from: int = None, year_to: i
             max_retries=3,
         )
     except RateLimitError:
-        return "Semantic Scholar rate limit reached. Try again later or add SEMANTIC_SCHOLAR_API_KEY.", False
+        return (
+            "Semantic Scholar rate limit reached. Try again later or add SEMANTIC_SCHOLAR_API_KEY.",
+            False,
+        )
     except Exception as e:
         log.warning(f"Semantic Scholar search error: {e}")
         return f"Semantic Scholar error: {str(e)[:200]}", False
 
     if r.status_code == 429:
-        return "Semantic Scholar rate limit reached. Try again later or add SEMANTIC_SCHOLAR_API_KEY.", False
+        return (
+            "Semantic Scholar rate limit reached. Try again later or add SEMANTIC_SCHOLAR_API_KEY.",
+            False,
+        )
     if r.status_code != 200:
         return f"Semantic Scholar error {r.status_code}: {r.text[:300]}", False
 
@@ -441,12 +497,15 @@ async def _semantic_scholar_search(query: str, year_from: int = None, year_to: i
 
 # ── Trending (OpenAlex) ──────────────────────────────────
 
+
 async def _trending(query: str = None, limit: int = 10) -> tuple[str, bool]:
-    params = _get_openalex_params({
-        "sort": "cited_by_count:desc",
-        "filter": "from_publication_date:2024-01-01",
-        "per_page": min(limit, 50),
-    })
+    params = _get_openalex_params(
+        {
+            "sort": "cited_by_count:desc",
+            "filter": "from_publication_date:2024-01-01",
+            "per_page": min(limit, 50),
+        }
+    )
     if query:
         params["search"] = query
 
@@ -472,7 +531,9 @@ async def _trending(query: str = None, limit: int = 10) -> tuple[str, bool]:
 
     lines = [f"Trending papers{f' on: {query}' if query else ''}:\n"]
     for i, w in enumerate(works, 1):
-        authors = ", ".join(a.get("author", {}).get("display_name", "") for a in (w.get("authorships") or [])[:3])
+        authors = ", ".join(
+            a.get("author", {}).get("display_name", "") for a in (w.get("authorships") or [])[:3]
+        )
         lines.append(
             f"{i}. **{w.get('title', 'Untitled')}** ({w.get('publication_year', '?')})\n"
             f"   {authors}  |  {w.get('cited_by_count', 0)} citations\n"
@@ -482,6 +543,7 @@ async def _trending(query: str = None, limit: int = 10) -> tuple[str, bool]:
 
 # ── Details (OpenAlex + CrossRef) ─────────────────────────
 
+
 async def _details(paper_id: str) -> tuple[str, bool]:
     if not paper_id:
         return "Provide a 'paper_id'.", False
@@ -511,7 +573,9 @@ async def _details(paper_id: str) -> tuple[str, bool]:
         return f"Paper not found: {paper_id}", False
 
     w = r.json()
-    authors = ", ".join(a.get("author", {}).get("display_name", "") for a in (w.get("authorships") or []))
+    authors = ", ".join(
+        a.get("author", {}).get("display_name", "") for a in (w.get("authorships") or [])
+    )
     doi = (w.get("doi") or "").replace("https://doi.org/", "")
     oa_url = (w.get("open_access") or {}).get("oa_url", "")
     arxiv_id = _extract_arxiv_from_ids(w.get("ids", {}))
@@ -555,8 +619,12 @@ async def _crossref_details(doi: str) -> tuple[str, bool]:
 
     w = r.json().get("message", {})
     title = (w.get("title") or ["Untitled"])[0]
-    authors = ", ".join(f"{a.get('given', '')} {a.get('family', '')}" for a in (w.get("author") or []))
-    year = (w.get("published-print") or w.get("published-online") or {}).get("date-parts", [[None]])[0][0]
+    authors = ", ".join(
+        f"{a.get('given', '')} {a.get('family', '')}" for a in (w.get("author") or [])
+    )
+    year = (w.get("published-print") or w.get("published-online") or {}).get(
+        "date-parts", [[None]]
+    )[0][0]
 
     lines = [
         f"# {title}",
@@ -571,6 +639,7 @@ async def _crossref_details(doi: str) -> tuple[str, bool]:
 
 # ── Read Paper (ArXiv HTML via ar5iv) ─────────────────────
 
+
 async def _read_paper(paper_id: str, section: str = None) -> tuple[str, bool]:
     if not paper_id:
         return "Provide a 'paper_id' (arXiv ID like '2301.12345').", False
@@ -594,6 +663,7 @@ async def _read_paper(paper_id: str, section: str = None) -> tuple[str, bool]:
         return f"Failed to fetch paper HTML (status {r.status_code}).", False
 
     from bs4 import BeautifulSoup
+
     soup = BeautifulSoup(r.text, "lxml")
     sections = _parse_sections(soup)
 
@@ -620,6 +690,7 @@ async def _read_paper(paper_id: str, section: str = None) -> tuple[str, bool]:
 
 # ── Citations (OpenAlex) ──────────────────────────────────
 
+
 async def _citations(paper_id: str, limit: int = 10) -> tuple[str, bool]:
     if not paper_id:
         return "Provide a 'paper_id'.", False
@@ -645,7 +716,9 @@ async def _citations(paper_id: str, limit: int = 10) -> tuple[str, bool]:
 
     w = r.json()
     ref_ids = w.get("referenced_works", [])[:limit]
-    lines = [f"## References ({len(w.get('referenced_works', []))} total, showing {len(ref_ids)})\n"]
+    lines = [
+        f"## References ({len(w.get('referenced_works', []))} total, showing {len(ref_ids)})\n"
+    ]
 
     # Batch-fetch referenced works
     if ref_ids:
@@ -671,11 +744,13 @@ async def _citations(paper_id: str, limit: int = 10) -> tuple[str, bool]:
     try:
         r3 = await fetch_with_retry(
             f"{OPENALEX_API}/works",
-            params=_get_openalex_params({
-                "filter": f"cites:{oa_id}",
-                "sort": "cited_by_count:desc",
-                "per_page": limit,
-            }),
+            params=_get_openalex_params(
+                {
+                    "filter": f"cites:{oa_id}",
+                    "sort": "cited_by_count:desc",
+                    "per_page": limit,
+                }
+            ),
             timeout=20,
             max_retries=2,
         )
@@ -693,6 +768,7 @@ async def _citations(paper_id: str, limit: int = 10) -> tuple[str, bool]:
 
 # ── Recommendations (OpenAlex related_works) ──────────────
 
+
 async def _recommend(paper_id: str, limit: int = 10) -> tuple[str, bool]:
     if not paper_id:
         return "Provide a 'paper_id'.", False
@@ -736,7 +812,9 @@ async def _recommend(paper_id: str, limit: int = 10) -> tuple[str, bool]:
 
     lines = ["## Related Papers\n"]
     for i, w in enumerate(r2.json().get("results", []), 1):
-        authors = ", ".join(a.get("author", {}).get("display_name", "") for a in (w.get("authorships") or [])[:3])
+        authors = ", ".join(
+            a.get("author", {}).get("display_name", "") for a in (w.get("authorships") or [])[:3]
+        )
         lines.append(
             f"{i}. **{w.get('title', 'Untitled')}** ({w.get('publication_year', '?')})\n"
             f"   {authors}  |  {w.get('cited_by_count', 0)} citations\n"
@@ -746,6 +824,7 @@ async def _recommend(paper_id: str, limit: int = 10) -> tuple[str, bool]:
 
 # ── Find Code (Papers With Code) ─────────────────────────
 
+
 async def _find_code(query: str) -> tuple[str, bool]:
     if not query:
         return "Provide a query.", False
@@ -781,6 +860,7 @@ async def _find_code(query: str) -> tuple[str, bool]:
 
 # ── Find Datasets (Papers With Code) ─────────────────────
 
+
 async def _find_datasets(query: str) -> tuple[str, bool]:
     if not query:
         return "Provide a query.", False
@@ -815,6 +895,7 @@ async def _find_datasets(query: str) -> tuple[str, bool]:
 
 # ── Author Papers (Semantic Scholar) ─────────────────────
 
+
 async def _author_papers(author_query: str, limit: int = 10) -> tuple[str, bool]:
     """Find papers by a specific author using Semantic Scholar."""
     if not author_query:
@@ -833,13 +914,19 @@ async def _author_papers(author_query: str, limit: int = 10) -> tuple[str, bool]
             max_retries=3,
         )
     except RateLimitError:
-        return "Semantic Scholar rate limit reached. Try again later or add SEMANTIC_SCHOLAR_API_KEY.", False
+        return (
+            "Semantic Scholar rate limit reached. Try again later or add SEMANTIC_SCHOLAR_API_KEY.",
+            False,
+        )
     except Exception as e:
         log.warning(f"Semantic Scholar author search error: {e}")
         return f"Author search error: {str(e)[:200]}", False
 
     if r.status_code == 429:
-        return "Semantic Scholar rate limit reached. Try again later or add SEMANTIC_SCHOLAR_API_KEY.", False
+        return (
+            "Semantic Scholar rate limit reached. Try again later or add SEMANTIC_SCHOLAR_API_KEY.",
+            False,
+        )
     if r.status_code != 200:
         return f"Author search error {r.status_code}: {r.text[:300]}", False
 
@@ -867,7 +954,10 @@ async def _author_papers(author_query: str, limit: int = 10) -> tuple[str, bool]
             max_retries=3,
         )
     except RateLimitError:
-        return "Semantic Scholar rate limit reached. Try again later or add SEMANTIC_SCHOLAR_API_KEY.", False
+        return (
+            "Semantic Scholar rate limit reached. Try again later or add SEMANTIC_SCHOLAR_API_KEY.",
+            False,
+        )
     except Exception as e:
         log.warning(f"Semantic Scholar author papers error: {e}")
         return f"Error fetching author papers: {str(e)[:200]}", False
@@ -904,6 +994,7 @@ async def _author_papers(author_query: str, limit: int = 10) -> tuple[str, bool]
 
 # ── Helpers ───────────────────────────────────────────────
 
+
 def _to_openalex_id(paper_id: str) -> str:
     """Convert various IDs to OpenAlex lookup format."""
     if paper_id.startswith("W") or paper_id.startswith("https://openalex.org/"):
@@ -917,10 +1008,10 @@ def _to_openalex_id(paper_id: str) -> str:
 
 
 def _extract_arxiv_id(text: str) -> str | None:
-    match = re.search(r'(\d{4}\.\d{4,5}(?:v\d+)?)', text)
+    match = re.search(r"(\d{4}\.\d{4,5}(?:v\d+)?)", text)
     if match:
         return match.group(1)
-    match = re.search(r'arxiv\.org/(?:abs|pdf)/(\d{4}\.\d{4,5}(?:v\d+)?)', text)
+    match = re.search(r"arxiv\.org/(?:abs|pdf)/(\d{4}\.\d{4,5}(?:v\d+)?)", text)
     if match:
         return match.group(1)
     return None
@@ -955,11 +1046,13 @@ def _parse_sections(soup) -> list[dict]:
 
     abstract = soup.find("div", class_="ltx_abstract")
     if abstract:
-        sections.append({
-            "title": "Abstract",
-            "text": abstract.get_text(strip=True).replace("Abstract", "", 1).strip(),
-            "level": 2,
-        })
+        sections.append(
+            {
+                "title": "Abstract",
+                "text": abstract.get_text(strip=True).replace("Abstract", "", 1).strip(),
+                "level": 2,
+            }
+        )
 
     for heading in soup.find_all(["h2", "h3", "h4"]):
         level = int(heading.name[1])
diff --git a/backend/openmlr/tools/registry.py b/backend/openmlr/tools/registry.py
index 67f9997..1d8bc28 100644
--- a/backend/openmlr/tools/registry.py
+++ b/backend/openmlr/tools/registry.py
@@ -10,15 +10,28 @@
     "plan": {
         # Plan mode: ask questions, create plans, read context — NO execution tools
         "allowed": {
-            "ask_user", "plan_tool",
+            "ask_user",
+            "plan_tool",
             # Read-only tools for gathering context
-            "read_file", "list_dir", "glob_files", "grep_search",
-            "web_search", "papers",
-            "github_search", "github_read_file", "github_read_repo",
-            "github_find_examples", "github_search_repos", "github_get_readme",
+            "read_file",
+            "list_dir",
+            "glob_files",
+            "grep_search",
+            "web_search",
+            "papers",
+            "github_search",
+            "github_read_file",
+            "github_read_repo",
+            "github_find_examples",
+            "github_search_repos",
+            "github_get_readme",
             "github_list_repos",
             # Compute planning (read-only / advisory)
-            "compute_list", "compute_plan", "compute_probe",
+            "compute_list",
+            "compute_plan",
+            "compute_probe",
+            # Workspace (knowledge graph, notes, search — always accessible)
+            "workspace",
         },
         "blocked_message": (
             "Tool '{tool}' is not available in PLAN mode. "
@@ -87,7 +100,9 @@ def is_tool_allowed(self, name: str) -> tuple[bool, str]:
         blocked_tools = restrictions.get("blocked", set())
         if blocked_tools:
             if name in blocked_tools:
-                error_msg = restrictions.get("blocked_message", "Tool '{tool}' not allowed in this mode.")
+                error_msg = restrictions.get(
+                    "blocked_message", "Tool '{tool}' not allowed in this mode."
+                )
                 return False, error_msg.format(tool=name, mode=self._current_mode)
             return True, ""
 
@@ -116,14 +131,16 @@ def get_tool_specs_for_llm(self, filter_by_mode: bool = True) -> list[dict]:
                 if not allowed:
                     continue
 
-            specs.append({
-                "type": "function",
-                "function": {
-                    "name": tool.name,
-                    "description": tool.description,
-                    "parameters": tool.parameters,
-                },
-            })
+            specs.append(
+                {
+                    "type": "function",
+                    "function": {
+                        "name": tool.name,
+                        "description": tool.description,
+                        "parameters": tool.parameters,
+                    },
+                }
+            )
         return specs
 
     def get_raw_specs(self) -> list[ToolSpec]:
@@ -175,7 +192,10 @@ async def call_tool(
                 return await tool.handler(**kwargs) if kwargs else await tool.handler(**arguments)
             except TypeError as e:
                 # Handle argument mismatches (model sending wrong param names)
-                return f"Tool argument error: {e}. Expected parameters: {list(sig.parameters.keys())}", False
+                return (
+                    f"Tool argument error: {e}. Expected parameters: {list(sig.parameters.keys())}",
+                    False,
+                )
 
         # MCP tool (no handler — dispatch to MCP client)
         if self._mcp_client:
@@ -253,11 +273,18 @@ def create_tool_router(sandbox_manager=None) -> ToolRouter:
 
     # Register compute tools
     from .compute_tools import create_compute_tools
+
     router.register_many(create_compute_tools())
 
+    # Register workspace tools
+    from .workspace_tools import create_workspace_tools
+
+    router.register_many(create_workspace_tools())
+
     # Register sandbox tools if manager provided
     if sandbox_manager:
         from .sandbox_tools import create_sandbox_tools
+
         router.register_many(create_sandbox_tools(sandbox_manager))
 
     return router
diff --git a/backend/openmlr/tools/workspace_tools.py b/backend/openmlr/tools/workspace_tools.py
new file mode 100644
index 0000000..2789a0f
--- /dev/null
+++ b/backend/openmlr/tools/workspace_tools.py
@@ -0,0 +1,383 @@
+"""Workspace tools — project workspace operations for the agent.
+
+Provides tools for the agent to interact with the project workspace:
+- View workspace status and file tree
+- Search files in workspace
+- Save research notes
+- Read/update the knowledge graph
+- Log tool failures
+"""
+
+import json
+import logging
+from contextvars import ContextVar
+
+from ..agent.types import ToolSpec
+from ..workspace.knowledge import KnowledgeGraph
+from ..workspace.persistence import WorkspacePersistence
+
+log = logging.getLogger(__name__)
+
+# Per-async-context workspace references — safe for concurrent sessions.
+# ContextVar ensures each request/task has its own workspace context,
+# preventing cross-user contamination in the async server.
+_workspace_path_var: ContextVar[str | None] = ContextVar("workspace_path", default=None)
+_persistence_var: ContextVar[WorkspacePersistence | None] = ContextVar("persistence", default=None)
+_knowledge_var: ContextVar[KnowledgeGraph | None] = ContextVar("knowledge", default=None)
+
+
+def set_workspace_context(workspace_path: str | None) -> None:
+    """Set the project workspace path for the current async context."""
+    _workspace_path_var.set(workspace_path)
+    if workspace_path:
+        _persistence_var.set(WorkspacePersistence(workspace_path))
+        _knowledge_var.set(KnowledgeGraph(workspace_path))
+    else:
+        _persistence_var.set(None)
+        _knowledge_var.set(None)
+
+
+def _require_workspace() -> tuple[WorkspacePersistence, KnowledgeGraph]:
+    """Ensure workspace is configured for the current context."""
+    persistence = _persistence_var.get()
+    knowledge = _knowledge_var.get()
+    if not persistence or not knowledge:
+        raise ValueError("No project workspace is active. Create or select a project first.")
+    return persistence, knowledge
+
+
+async def _handle_workspace(
+    operation: str,
+    # workspace_status
+    # workspace_search
+    query: str = "",
+    # workspace_note
+    topic: str = "",
+    content: str = "",
+    # knowledge_add
+    entity_id: str = "",
+    entity_type: str = "",
+    label: str = "",
+    properties: str = "",
+    # knowledge_relate
+    source_id: str = "",
+    target_id: str = "",
+    relationship: str = "",
+    # knowledge_query
+    # knowledge_summary
+    session=None,
+    **kwargs,
+) -> tuple[str, bool]:
+    """Handle workspace tool operations."""
+    try:
+        if operation == "status":
+            return await _workspace_status(session)
+        elif operation == "search":
+            return await _workspace_search(query)
+        elif operation == "note":
+            return await _workspace_note(topic, content, session)
+        elif operation == "knowledge_add":
+            return await _knowledge_add(entity_id, entity_type, label, properties, session)
+        elif operation == "knowledge_relate":
+            return await _knowledge_relate(source_id, target_id, relationship, session)
+        elif operation == "knowledge_query":
+            return await _knowledge_query(query)
+        elif operation == "knowledge_summary":
+            return await _knowledge_summary()
+        elif operation == "recent_failures":
+            return await _recent_failures()
+        else:
+            return f"Unknown workspace operation: {operation}", False
+    except ValueError as e:
+        return str(e), False
+    except Exception as e:
+        log.warning(f"Workspace tool error ({operation}): {e}")
+        return "Workspace operation failed. Check server logs for details.", False
+
+
+async def _workspace_status(session=None) -> tuple[str, bool]:
+    """Get workspace status and summary."""
+    persistence, knowledge = _require_workspace()
+
+    summary = persistence.get_workspace_summary()
+    kg_summary = knowledge.get_summary()
+
+    lines = [
+        "## Workspace Status",
+        "",
+        f"**Papers:** {summary['papers']}",
+        f"**Research notes:** {summary['research_notes']}",
+        f"**Search results saved:** {summary['search_results']}",
+        f"**Code files:** {summary['code_files']}",
+        f"**Experiments logged:** {summary['experiments']}",
+        f"**Tool failures logged:** {summary['tool_failures']}",
+        "",
+        "### Knowledge Graph",
+        f"Entities: {kg_summary['total_nodes']} | Relationships: {kg_summary['total_edges']}",
+    ]
+
+    if kg_summary.get("type_counts"):
+        lines.append(
+            "Types: " + ", ".join(f"{t}: {c}" for t, c in kg_summary["type_counts"].items())
+        )
+
+    if summary.get("recent_tool_failures"):
+        lines.append("\n### Recent Tool Failures")
+        for f in summary["recent_tool_failures"]:
+            lines.append(f"- **{f['tool']}**: {f['error'][:100]}")
+
+    state = persistence.get_state()
+    if state.get("key_findings"):
+        lines.append("\n### Key Findings")
+        for finding in state["key_findings"][-5:]:
+            lines.append(f"- {finding}")
+
+    if state.get("open_questions"):
+        lines.append("\n### Open Questions")
+        for q in state["open_questions"][-5:]:
+            lines.append(f"- {q}")
+
+    return "\n".join(lines), True
+
+
+async def _workspace_search(query: str) -> tuple[str, bool]:
+    """Search files in workspace by name or content."""
+    import os
+
+    persistence, _ = _require_workspace()
+
+    if not query:
+        return "Please provide a search query.", False
+
+    results = []
+    query_lower = query.lower()
+    ws_path = persistence.workspace_path
+
+    # Limits to prevent DoS from deeply nested or very large workspaces
+    max_depth = 8
+    max_files_scanned = 5000
+    files_scanned = 0
+    ws_path_str = str(ws_path)
+
+    for dirpath, dirnames, filenames in os.walk(ws_path):
+        # Enforce depth limit
+        depth = dirpath[len(ws_path_str) :].count(os.sep)
+        if depth >= max_depth:
+            dirnames.clear()  # Don't descend further
+            continue
+
+        for fname in filenames:
+            if files_scanned >= max_files_scanned:
+                break
+            if fname.startswith("."):
+                continue
+
+            files_scanned += 1
+            fpath = os.path.join(dirpath, fname)
+            rel_path = os.path.relpath(fpath, ws_path)
+
+            # Name match
+            if query_lower in fname.lower():
+                results.append(f"- **{rel_path}** (name match)")
+                continue
+
+            # Content match (text files only, skip large files)
+            try:
+                if os.path.getsize(fpath) > 500_000:
+                    continue
+                with open(fpath, encoding="utf-8", errors="ignore") as f:
+                    content = f.read(10000)
+                if query_lower in content.lower():
+                    results.append(f"- **{rel_path}** (content match)")
+            except Exception:
+                continue
+
+        if files_scanned >= max_files_scanned:
+            break
+
+    if not results:
+        return f"No files found matching '{query}'.", True
+
+    return f"## Search Results for '{query}'\n\n" + "\n".join(results[:30]), True
+
+
+async def _workspace_note(topic: str, content: str, session=None) -> tuple[str, bool]:
+    """Save a research note to the workspace."""
+    persistence, _ = _require_workspace()
+
+    if not topic or not content:
+        return "Please provide both 'topic' and 'content' for the note.", False
+
+    conv_uuid = getattr(session, "conversation_uuid", None) if session else None
+    filepath = persistence.save_research_note(topic, content, conv_uuid)
+
+    return f"Research note saved: {filepath.name}", True
+
+
+async def _knowledge_add(
+    entity_id: str,
+    entity_type: str,
+    label: str,
+    properties: str,
+    session=None,
+) -> tuple[str, bool]:
+    """Add an entity to the knowledge graph."""
+    _, knowledge = _require_workspace()
+
+    if not entity_id or not entity_type or not label:
+        return "Please provide entity_id, entity_type, and label.", False
+
+    props = {}
+    if properties:
+        try:
+            props = json.loads(properties)
+        except json.JSONDecodeError:
+            return "Invalid JSON in properties.", False
+
+    conv_uuid = getattr(session, "conversation_uuid", None) if session else None
+    is_new = knowledge.add_entity(entity_id, entity_type, label, props, conv_uuid)
+    knowledge.save()
+
+    action = "Added" if is_new else "Updated"
+    return f"{action} entity: {label} ({entity_type})", True
+
+
+async def _knowledge_relate(
+    source_id: str,
+    target_id: str,
+    relationship: str,
+    session=None,
+) -> tuple[str, bool]:
+    """Add a relationship between entities in the knowledge graph."""
+    _, knowledge = _require_workspace()
+
+    if not source_id or not target_id or not relationship:
+        return "Please provide source_id, target_id, and relationship.", False
+
+    conv_uuid = getattr(session, "conversation_uuid", None) if session else None
+    success = knowledge.add_relationship(
+        source_id, target_id, relationship, conversation_uuid=conv_uuid
+    )
+    if success:
+        knowledge.save()
+        return f"Added relationship: {source_id} --[{relationship}]--> {target_id}", True
+    return "Failed to add relationship. Ensure both entities exist.", False
+
+
+async def _knowledge_query(query: str) -> tuple[str, bool]:
+    """Search the knowledge graph."""
+    _, knowledge = _require_workspace()
+
+    if not query:
+        return "Please provide a search query.", False
+
+    results = knowledge.search_entities(query)
+    if not results:
+        return f"No entities found matching '{query}'.", True
+
+    lines = [f"## Knowledge Graph: '{query}'\n"]
+    for entity in results:
+        lines.append(f"- **{entity.get('label', entity['id'])}** ({entity.get('type', '?')})")
+        neighbors = knowledge.get_neighbors(entity["id"])
+        for n in neighbors[:5]:
+            lines.append(f"  - {n.get('relationship', '?')} -> {n.get('label', n['id'])}")
+
+    return "\n".join(lines), True
+
+
+async def _knowledge_summary() -> tuple[str, bool]:
+    """Get a full knowledge graph summary for context."""
+    _, knowledge = _require_workspace()
+    context = knowledge.get_context_for_conversation()
+    if not context:
+        return "Knowledge graph is empty.", True
+    return context, True
+
+
+async def _recent_failures() -> tuple[str, bool]:
+    """Get recent tool failure logs."""
+    persistence, _ = _require_workspace()
+    failures = persistence.get_recent_failures(limit=10)
+    if not failures:
+        return "No recent tool failures.", True
+
+    lines = ["## Recent Tool Failures\n"]
+    for f in failures:
+        lines.append(f"- **{f['tool']}** ({f.get('timestamp', '?')}): {f['error'][:200]}")
+    return "\n".join(lines), True
+
+
+def create_workspace_tools() -> list[ToolSpec]:
+    """Create workspace tool specs."""
+    return [
+        ToolSpec(
+            name="workspace",
+            description=(
+                "Interact with the project workspace — persistent storage for research data, "
+                "knowledge graph, notes, and logs.\n\n"
+                "Operations:\n"
+                "- status: View workspace summary (file counts, knowledge graph size, recent failures)\n"
+                "- search: Search files by name or content (requires 'query')\n"
+                "- note: Save a research note (requires 'topic' and 'content')\n"
+                "- knowledge_add: Add entity to knowledge graph (requires 'entity_id', 'entity_type', 'label'; optional 'properties' as JSON)\n"
+                "- knowledge_relate: Add relationship (requires 'source_id', 'target_id', 'relationship')\n"
+                "- knowledge_query: Search knowledge graph (requires 'query')\n"
+                "- knowledge_summary: Get full knowledge graph context\n"
+                "- recent_failures: View recent tool/API failure logs\n\n"
+                "Entity types: paper, concept, method, dataset, finding, question, experiment, tool, author, code_artifact\n"
+                "Relationship types: cites, implements, evaluates_on, proposes, introduces, relates_to, answers, depends_on, uses, produces, contradicts, extends"
+            ),
+            parameters={
+                "type": "object",
+                "properties": {
+                    "operation": {
+                        "type": "string",
+                        "enum": [
+                            "status",
+                            "search",
+                            "note",
+                            "knowledge_add",
+                            "knowledge_relate",
+                            "knowledge_query",
+                            "knowledge_summary",
+                            "recent_failures",
+                        ],
+                        "description": "The workspace operation to perform.",
+                    },
+                    "query": {
+                        "type": "string",
+                        "description": "Search query (for search, knowledge_query).",
+                    },
+                    "topic": {"type": "string", "description": "Note topic (for note)."},
+                    "content": {"type": "string", "description": "Note content (for note)."},
+                    "entity_id": {
+                        "type": "string",
+                        "description": "Entity ID (for knowledge_add).",
+                    },
+                    "entity_type": {
+                        "type": "string",
+                        "description": "Entity type (for knowledge_add).",
+                    },
+                    "label": {"type": "string", "description": "Entity label (for knowledge_add)."},
+                    "properties": {
+                        "type": "string",
+                        "description": "JSON string of additional properties (for knowledge_add).",
+                    },
+                    "source_id": {
+                        "type": "string",
+                        "description": "Source entity ID (for knowledge_relate).",
+                    },
+                    "target_id": {
+                        "type": "string",
+                        "description": "Target entity ID (for knowledge_relate).",
+                    },
+                    "relationship": {
+                        "type": "string",
+                        "description": "Relationship type (for knowledge_relate).",
+                    },
+                },
+                "required": ["operation"],
+            },
+            handler=_handle_workspace,
+        ),
+    ]
diff --git a/backend/openmlr/workspace/__init__.py b/backend/openmlr/workspace/__init__.py
new file mode 100644
index 0000000..c475db8
--- /dev/null
+++ b/backend/openmlr/workspace/__init__.py
@@ -0,0 +1,6 @@
+"""Workspace package — project-scoped persistence, knowledge graph, and data logging."""
+
+from .knowledge import KnowledgeGraph
+from .persistence import WorkspacePersistence
+
+__all__ = ["KnowledgeGraph", "WorkspacePersistence"]
diff --git a/backend/openmlr/workspace/knowledge.py b/backend/openmlr/workspace/knowledge.py
new file mode 100644
index 0000000..db5746a
--- /dev/null
+++ b/backend/openmlr/workspace/knowledge.py
@@ -0,0 +1,375 @@
+"""Knowledge Graph — lightweight persistent knowledge store backed by networkx.
+
+Stores entities (papers, concepts, methods, datasets, findings) and their
+relationships as a directed graph. Serialized as JSON in the project workspace.
+
+The graph enables:
+- Cross-conversation knowledge accumulation
+- Context injection when starting new conversations
+- Finding related prior work within a project
+- Tracking what the agent knows vs. doesn't know
+"""
+
+import json
+import logging
+from datetime import UTC, datetime
+from pathlib import Path
+
+import networkx as nx
+
+log = logging.getLogger(__name__)
+
+# Node types for the knowledge graph
+NODE_TYPES = {
+    "paper",
+    "concept",
+    "method",
+    "dataset",
+    "finding",
+    "question",
+    "experiment",
+    "tool",
+    "author",
+    "code_artifact",
+}
+
+# Edge types (relationships)
+EDGE_TYPES = {
+    "cites",  # paper -> paper
+    "implements",  # code_artifact -> method
+    "evaluates_on",  # experiment -> dataset
+    "proposes",  # paper -> method
+    "introduces",  # paper -> dataset
+    "relates_to",  # any -> any
+    "answers",  # finding -> question
+    "depends_on",  # method -> method, code -> code
+    "authored_by",  # paper -> author
+    "uses",  # experiment -> method
+    "produces",  # experiment -> finding
+    "contradicts",  # finding -> finding
+    "extends",  # method -> method
+}
+
+
+# Size limits to prevent DoS via unbounded graph growth
+MAX_NODES = 10_000
+MAX_EDGES = 50_000
+
+
+class KnowledgeGraph:
+    """A persistent knowledge graph for a project workspace.
+
+    Uses networkx DiGraph internally and serializes to JSON.
+    Thread-safe for single-writer (agent loop is single-threaded per conversation).
+    """
+
+    def __init__(self, workspace_path: str | Path):
+        self.workspace_path = Path(workspace_path)
+        self.kg_path = self.workspace_path / ".project-meta" / "knowledge.json"
+        self._graph: nx.DiGraph = nx.DiGraph()
+        self._dirty = False
+        self._load()
+
+    def _load(self) -> None:
+        """Load the knowledge graph from disk."""
+        if not self.kg_path.exists():
+            self._graph = nx.DiGraph()
+            return
+
+        try:
+            data = json.loads(self.kg_path.read_text(encoding="utf-8"))
+            if data.get("nodes") or data.get("edges"):
+                self._graph = nx.DiGraph()
+                for node in data.get("nodes", []):
+                    node_id = node.get("id")
+                    if not node_id:
+                        log.warning("Skipping node without 'id' in knowledge graph")
+                        continue
+                    attrs = {k: v for k, v in node.items() if k != "id"}
+                    self._graph.add_node(node_id, **attrs)
+                for edge in data.get("edges", []):
+                    src = edge.get("source")
+                    tgt = edge.get("target")
+                    if not src or not tgt:
+                        log.warning("Skipping edge without source/target in knowledge graph")
+                        continue
+                    attrs = {k: v for k, v in edge.items() if k not in ("source", "target")}
+                    self._graph.add_edge(src, tgt, **attrs)
+            else:
+                self._graph = nx.DiGraph()
+        except Exception as e:
+            log.warning(f"Failed to load knowledge graph: {e}")
+            self._graph = nx.DiGraph()
+
+    def save(self) -> None:
+        """Persist the knowledge graph to disk."""
+        if not self._dirty and self.kg_path.exists():
+            return
+
+        self.kg_path.parent.mkdir(parents=True, exist_ok=True)
+
+        nodes = []
+        for node_id, attrs in self._graph.nodes(data=True):
+            nodes.append({"id": node_id, **attrs})
+
+        edges = []
+        for src, tgt, attrs in self._graph.edges(data=True):
+            edges.append({"source": src, "target": tgt, **attrs})
+
+        data = {
+            "version": 1,
+            "updated_at": datetime.now(UTC).isoformat(),
+            "node_count": len(nodes),
+            "edge_count": len(edges),
+            "nodes": nodes,
+            "edges": edges,
+        }
+
+        self.kg_path.write_text(json.dumps(data, indent=2, default=str), encoding="utf-8")
+        self._dirty = False
+
+    # ── Node operations ──────────────────────────────────
+
+    # Reserved attribute names that properties cannot overwrite
+    _RESERVED_ATTRS = {"type", "label", "created_at", "updated_at", "source_conversation", "id"}
+
+    def add_entity(
+        self,
+        entity_id: str,
+        entity_type: str,
+        label: str,
+        properties: dict | None = None,
+        conversation_uuid: str | None = None,
+    ) -> bool:
+        """Add or update an entity node.
+
+        Returns True if the entity was newly added, False if updated.
+        Validates entity_type against NODE_TYPES.
+        Enforces MAX_NODES limit.
+        """
+        if entity_type not in NODE_TYPES:
+            log.warning(f"Invalid entity type '{entity_type}', using 'concept'")
+            entity_type = "concept"
+
+        is_new = entity_id not in self._graph
+        if is_new and self._graph.number_of_nodes() >= MAX_NODES:
+            log.warning(f"Knowledge graph at capacity ({MAX_NODES} nodes)")
+            return False
+
+        attrs = {
+            "type": entity_type,
+            "label": label,
+            "updated_at": datetime.now(UTC).isoformat(),
+        }
+        if is_new:
+            attrs["created_at"] = datetime.now(UTC).isoformat()
+        if conversation_uuid:
+            attrs["source_conversation"] = conversation_uuid
+        if properties:
+            # Filter out reserved keys to prevent internal field overwrite
+            safe_props = {k: v for k, v in properties.items() if k not in self._RESERVED_ATTRS}
+            attrs.update(safe_props)
+
+        self._graph.add_node(entity_id, **attrs)
+        self._dirty = True
+        return is_new
+
+    def get_entity(self, entity_id: str) -> dict | None:
+        """Get an entity by ID."""
+        if entity_id not in self._graph:
+            return None
+        return {"id": entity_id, **self._graph.nodes[entity_id]}
+
+    def find_entities(self, entity_type: str | None = None, limit: int = 50) -> list[dict]:
+        """Find entities, optionally filtered by type."""
+        results = []
+        for node_id, attrs in self._graph.nodes(data=True):
+            if entity_type and attrs.get("type") != entity_type:
+                continue
+            results.append({"id": node_id, **attrs})
+            if len(results) >= limit:
+                break
+        return results
+
+    def search_entities(self, query: str, limit: int = 20) -> list[dict]:
+        """Search entities by label (case-insensitive substring match)."""
+        query_lower = query.lower()
+        results = []
+        for node_id, attrs in self._graph.nodes(data=True):
+            label = attrs.get("label", "")
+            if query_lower in label.lower() or query_lower in node_id.lower():
+                results.append({"id": node_id, **attrs})
+                if len(results) >= limit:
+                    break
+        return results
+
+    def remove_entity(self, entity_id: str) -> bool:
+        """Remove an entity and all its edges."""
+        if entity_id not in self._graph:
+            return False
+        self._graph.remove_node(entity_id)
+        self._dirty = True
+        return True
+
+    # ── Edge operations ──────────────────────────────────
+
+    def add_relationship(
+        self,
+        source_id: str,
+        target_id: str,
+        relationship: str,
+        properties: dict | None = None,
+        conversation_uuid: str | None = None,
+    ) -> bool:
+        """Add a directed relationship between two entities.
+
+        Both entities must already exist. Returns True if edge was newly added.
+        Validates relationship against EDGE_TYPES.
+        Enforces MAX_EDGES limit.
+        """
+        if relationship not in EDGE_TYPES:
+            log.warning(f"Invalid relationship type '{relationship}', using 'relates_to'")
+            relationship = "relates_to"
+
+        if source_id not in self._graph or target_id not in self._graph:
+            log.warning(
+                f"Cannot add edge {source_id}->{target_id}: "
+                f"missing {'source' if source_id not in self._graph else 'target'}"
+            )
+            return False
+
+        is_new = not self._graph.has_edge(source_id, target_id)
+        if is_new and self._graph.number_of_edges() >= MAX_EDGES:
+            log.warning(f"Knowledge graph at edge capacity ({MAX_EDGES} edges)")
+            return False
+
+        attrs = {
+            "type": relationship,
+            "updated_at": datetime.now(UTC).isoformat(),
+        }
+        if is_new:
+            attrs["created_at"] = datetime.now(UTC).isoformat()
+        if conversation_uuid:
+            attrs["source_conversation"] = conversation_uuid
+        if properties:
+            safe_props = {k: v for k, v in properties.items() if k not in self._RESERVED_ATTRS}
+            attrs.update(safe_props)
+
+        self._graph.add_edge(source_id, target_id, **attrs)
+        self._dirty = True
+        return is_new
+
+    def get_neighbors(self, entity_id: str, direction: str = "both") -> list[dict]:
+        """Get connected entities.
+
+        Args:
+            entity_id: The entity to find neighbors for.
+            direction: "out" (successors), "in" (predecessors), or "both".
+        """
+        if entity_id not in self._graph:
+            return []
+
+        neighbors = set()
+        if direction in ("out", "both"):
+            neighbors.update(self._graph.successors(entity_id))
+        if direction in ("in", "both"):
+            neighbors.update(self._graph.predecessors(entity_id))
+
+        results = []
+        for nid in neighbors:
+            edge_data = self._graph.edges.get((entity_id, nid), {}) or self._graph.edges.get(
+                (nid, entity_id), {}
+            )
+            results.append(
+                {
+                    "id": nid,
+                    **self._graph.nodes[nid],
+                    "relationship": edge_data.get("type", "relates_to"),
+                }
+            )
+        return results
+
+    # ── Query helpers ────────────────────────────────────
+
+    def get_summary(self) -> dict:
+        """Get a summary of the knowledge graph for context injection."""
+        type_counts: dict[str, int] = {}
+        for _, attrs in self._graph.nodes(data=True):
+            t = attrs.get("type", "unknown")
+            type_counts[t] = type_counts.get(t, 0) + 1
+
+        # Get recent entities (by updated_at)
+        recent = sorted(
+            [{"id": nid, **attrs} for nid, attrs in self._graph.nodes(data=True)],
+            key=lambda x: x.get("updated_at", ""),
+            reverse=True,
+        )[:10]
+
+        return {
+            "total_nodes": self._graph.number_of_nodes(),
+            "total_edges": self._graph.number_of_edges(),
+            "type_counts": type_counts,
+            "recent_entities": [
+                {"id": e["id"], "type": e.get("type"), "label": e.get("label")} for e in recent
+            ],
+        }
+
+    def get_context_for_conversation(self, max_tokens_approx: int = 2000) -> str:
+        """Generate a text summary of the knowledge graph for injecting into agent context.
+
+        Produces a compact representation suitable for the system prompt.
+        """
+        if self._graph.number_of_nodes() == 0:
+            return ""
+
+        lines = ["## Project Knowledge Graph\n"]
+
+        # Group by type
+        by_type: dict[str, list] = {}
+        for nid, attrs in self._graph.nodes(data=True):
+            t = attrs.get("type", "other")
+            by_type.setdefault(t, []).append((nid, attrs))
+
+        char_count = 0
+        for entity_type, entities in by_type.items():
+            if char_count > max_tokens_approx * 4:  # rough char estimate
+                lines.append(
+                    f"\n... and more ({self._graph.number_of_nodes() - len(lines)} entities)"
+                )
+                break
+
+            lines.append(f"\n### {entity_type.replace('_', ' ').title()}s")
+            for nid, attrs in entities[:15]:  # cap per type
+                label = attrs.get("label", nid)
+                line = f"- **{label}**"
+                # Add key properties
+                if attrs.get("abstract"):
+                    line += f": {attrs['abstract'][:150]}..."
+                elif attrs.get("description"):
+                    line += f": {attrs['description'][:150]}..."
+                lines.append(line)
+                char_count += len(line)
+
+        # Add key relationships
+        if self._graph.number_of_edges() > 0:
+            lines.append("\n### Key Relationships")
+            edge_count = 0
+            for src, tgt, attrs in self._graph.edges(data=True):
+                if edge_count >= 20:
+                    lines.append(f"... and {self._graph.number_of_edges() - edge_count} more")
+                    break
+                src_label = self._graph.nodes[src].get("label", src)
+                tgt_label = self._graph.nodes[tgt].get("label", tgt)
+                rel = attrs.get("type", "relates_to")
+                lines.append(f"- {src_label} --[{rel}]--> {tgt_label}")
+                edge_count += 1
+
+        return "\n".join(lines)
+
+    @property
+    def node_count(self) -> int:
+        return self._graph.number_of_nodes()
+
+    @property
+    def edge_count(self) -> int:
+        return self._graph.number_of_edges()
diff --git a/backend/openmlr/workspace/persistence.py b/backend/openmlr/workspace/persistence.py
new file mode 100644
index 0000000..1ca330d
--- /dev/null
+++ b/backend/openmlr/workspace/persistence.py
@@ -0,0 +1,353 @@
+"""Workspace Persistence — file-based storage for project working data.
+
+Handles saving/loading of:
+- Search results (paper searches, web searches)
+- Research notes and summaries
+- Tool failure logs
+- Compute capability snapshots
+- Experiment logs
+- Cross-conversation state
+"""
+
+import json
+import logging
+from datetime import UTC, datetime
+from pathlib import Path
+
+log = logging.getLogger(__name__)
+
+
+class WorkspacePersistence:
+    """File-based persistence for a project workspace."""
+
+    def __init__(self, workspace_path: str | Path):
+        self.workspace_path = Path(workspace_path)
+        if not self.workspace_path.exists():
+            log.warning(f"Workspace path does not exist: {workspace_path}")
+
+    def _ensure_dir(self, *parts: str) -> Path:
+        """Ensure a subdirectory exists and return its path."""
+        path = self.workspace_path.joinpath(*parts)
+        path.mkdir(parents=True, exist_ok=True)
+        return path
+
+    def _timestamp(self) -> str:
+        return datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+
+    @staticmethod
+    def _sanitize_filename(name: str, max_len: int = 80) -> str:
+        """Sanitize a string for safe use in filenames. Alphanumeric + hyphen/underscore only."""
+        return "".join(c if c.isalnum() or c in "-_" else "_" for c in name)[:max_len] or "unknown"
+
+    # ── Search Results ───────────────────────────────────
+
+    def save_search_results(
+        self,
+        query: str,
+        source: str,
+        results: list[dict],
+        conversation_uuid: str | None = None,
+    ) -> Path:
+        """Save paper/web search results to workspace."""
+        dir_path = self._ensure_dir("research", "searches")
+        filename = f"{self._timestamp()}_{self._sanitize_filename(source)}.json"
+        filepath = dir_path / filename
+
+        data = {
+            "query": query,
+            "source": source,
+            "timestamp": datetime.now(UTC).isoformat(),
+            "conversation_uuid": conversation_uuid,
+            "result_count": len(results),
+            "results": results,
+        }
+        filepath.write_text(json.dumps(data, indent=2, default=str), encoding="utf-8")
+        log.debug(f"Saved {len(results)} search results to {filepath}")
+        return filepath
+
+    def get_recent_searches(self, limit: int = 10) -> list[dict]:
+        """Get recent search results (metadata only, not full results)."""
+        dir_path = self.workspace_path / "research" / "searches"
+        if not dir_path.exists():
+            return []
+
+        searches = []
+        for filepath in sorted(dir_path.glob("*.json"), reverse=True):
+            try:
+                data = json.loads(filepath.read_text(encoding="utf-8"))
+                searches.append(
+                    {
+                        "query": data.get("query"),
+                        "source": data.get("source"),
+                        "timestamp": data.get("timestamp"),
+                        "result_count": data.get("result_count", 0),
+                        "filename": filepath.name,
+                    }
+                )
+                if len(searches) >= limit:
+                    break
+            except Exception:
+                continue
+        return searches
+
+    # ── Research Notes ───────────────────────────────────
+
+    def save_research_note(
+        self,
+        topic: str,
+        content: str,
+        conversation_uuid: str | None = None,
+    ) -> Path:
+        """Save a research note or summary."""
+        dir_path = self._ensure_dir("research", "notes")
+        # Sanitize topic for filename
+        safe_topic = "".join(c if c.isalnum() or c in "-_ " else "" for c in topic)
+        safe_topic = safe_topic.strip().replace(" ", "_")[:100] or "note"
+        filename = f"{self._timestamp()}_{safe_topic}.md"
+        filepath = dir_path / filename
+
+        header = f"# {topic}\n\n"
+        header += f"_Generated: {datetime.now(UTC).isoformat()}_\n"
+        if conversation_uuid:
+            header += f"_Conversation: {conversation_uuid}_\n"
+        header += "\n---\n\n"
+
+        filepath.write_text(header + content, encoding="utf-8")
+        log.debug(f"Saved research note to {filepath}")
+        return filepath
+
+    def get_research_notes(self, limit: int = 20) -> list[dict]:
+        """List available research notes."""
+        dir_path = self.workspace_path / "research" / "notes"
+        if not dir_path.exists():
+            return []
+
+        notes = []
+        for filepath in sorted(dir_path.glob("*.md"), reverse=True):
+            try:
+                content = filepath.read_text(encoding="utf-8")
+                # Extract title from first line
+                title = content.split("\n")[0].lstrip("# ").strip()
+                notes.append(
+                    {
+                        "title": title,
+                        "filename": filepath.name,
+                        "size": filepath.stat().st_size,
+                        "modified": filepath.stat().st_mtime,
+                    }
+                )
+                if len(notes) >= limit:
+                    break
+            except Exception:
+                continue
+        return notes
+
+    # ── Paper Storage ────────────────────────────────────
+
+    def save_paper(
+        self,
+        paper_id: str,
+        title: str,
+        content: str,
+        metadata: dict | None = None,
+    ) -> Path:
+        """Save a parsed paper to the workspace."""
+        dir_path = self._ensure_dir("papers")
+        # Use paper_id as filename (strictly sanitized)
+        safe_id = self._sanitize_filename(paper_id)
+        filepath = dir_path / f"{safe_id}.md"
+
+        header = f"# {title}\n\n"
+        if metadata:
+            if metadata.get("authors"):
+                header += f"**Authors:** {metadata['authors']}\n"
+            if metadata.get("year"):
+                header += f"**Year:** {metadata['year']}\n"
+            if metadata.get("url"):
+                header += f"**URL:** {metadata['url']}\n"
+            header += "\n---\n\n"
+
+        filepath.write_text(header + content, encoding="utf-8")
+
+        # Save metadata separately as JSON
+        meta_path = dir_path / f"{safe_id}.meta.json"
+        meta_data = {
+            "paper_id": paper_id,
+            "title": title,
+            "saved_at": datetime.now(UTC).isoformat(),
+            **(metadata or {}),
+        }
+        meta_path.write_text(json.dumps(meta_data, indent=2, default=str), encoding="utf-8")
+
+        return filepath
+
+    # ── Tool Failure Logs ────────────────────────────────
+
+    def log_tool_failure(
+        self,
+        tool_name: str,
+        error: str,
+        args: dict | None = None,
+        conversation_uuid: str | None = None,
+    ) -> Path:
+        """Log a tool/API/MCP failure for future reference."""
+        dir_path = self._ensure_dir("logs", "tool_failures")
+        filename = f"{self._timestamp()}_{self._sanitize_filename(tool_name)}.json"
+        filepath = dir_path / filename
+
+        data = {
+            "tool": tool_name,
+            "error": error,
+            "args": args,
+            "timestamp": datetime.now(UTC).isoformat(),
+            "conversation_uuid": conversation_uuid,
+        }
+        filepath.write_text(json.dumps(data, indent=2, default=str), encoding="utf-8")
+        log.debug(f"Logged tool failure: {tool_name} -> {filepath}")
+        return filepath
+
+    def get_recent_failures(self, limit: int = 10) -> list[dict]:
+        """Get recent tool failure logs."""
+        dir_path = self.workspace_path / "logs" / "tool_failures"
+        if not dir_path.exists():
+            return []
+
+        failures = []
+        for filepath in sorted(dir_path.glob("*.json"), reverse=True):
+            try:
+                data = json.loads(filepath.read_text(encoding="utf-8"))
+                failures.append(data)
+                if len(failures) >= limit:
+                    break
+            except Exception:
+                continue
+        return failures
+
+    # ── Compute Logs ─────────────────────────────────────
+
+    def log_compute_probe(
+        self,
+        node_name: str,
+        capabilities: dict,
+    ) -> Path:
+        """Log compute node probe results."""
+        dir_path = self._ensure_dir("logs", "compute")
+        filename = f"{self._timestamp()}_{self._sanitize_filename(node_name)}.json"
+        filepath = dir_path / filename
+
+        data = {
+            "node_name": node_name,
+            "capabilities": capabilities,
+            "probed_at": datetime.now(UTC).isoformat(),
+        }
+        filepath.write_text(json.dumps(data, indent=2, default=str), encoding="utf-8")
+        return filepath
+
+    # ── Experiment Logs ──────────────────────────────────
+
+    def log_experiment(
+        self,
+        name: str,
+        command: str,
+        result: dict,
+        conversation_uuid: str | None = None,
+    ) -> Path:
+        """Log an experiment execution."""
+        dir_path = self._ensure_dir("logs", "experiments")
+        safe_name = "".join(c if c.isalnum() or c in "-_" else "_" for c in name)[:80]
+        filename = f"{self._timestamp()}_{safe_name}.json"
+        filepath = dir_path / filename
+
+        data = {
+            "name": name,
+            "command": command,
+            "result": result,
+            "timestamp": datetime.now(UTC).isoformat(),
+            "conversation_uuid": conversation_uuid,
+        }
+        filepath.write_text(json.dumps(data, indent=2, default=str), encoding="utf-8")
+        return filepath
+
+    # ── Cross-conversation State ─────────────────────────
+
+    def get_state(self) -> dict:
+        """Load the cross-conversation state."""
+        state_path = self.workspace_path / ".project-meta" / "state.json"
+        if not state_path.exists():
+            return {
+                "last_conversation_uuid": None,
+                "open_questions": [],
+                "key_findings": [],
+                "active_experiments": [],
+            }
+        try:
+            return json.loads(state_path.read_text(encoding="utf-8"))
+        except Exception:
+            return {}
+
+    def save_state(self, state: dict) -> None:
+        """Save the cross-conversation state."""
+        state_path = self.workspace_path / ".project-meta" / "state.json"
+        state_path.parent.mkdir(parents=True, exist_ok=True)
+        state_path.write_text(json.dumps(state, indent=2, default=str), encoding="utf-8")
+
+    def update_state(self, **kwargs) -> dict:
+        """Update specific fields in the cross-conversation state."""
+        state = self.get_state()
+        state.update(kwargs)
+        self.save_state(state)
+        return state
+
+    # ── Plan Storage ─────────────────────────────────────
+
+    def save_plan(
+        self,
+        plan_content: str,
+        conversation_uuid: str,
+    ) -> Path:
+        """Save a task plan to the workspace."""
+        dir_path = self._ensure_dir(".project-meta", "plans")
+        # Sanitize the UUID to prevent path injection
+        safe_uuid = self._sanitize_filename(conversation_uuid)
+        filename = f"{safe_uuid}.md"
+        filepath = dir_path / filename
+        filepath.write_text(plan_content, encoding="utf-8")
+        return filepath
+
+    # ── Workspace Summary ────────────────────────────────
+
+    def get_workspace_summary(self) -> dict:
+        """Get a summary of all workspace contents for context injection."""
+        summary = {
+            "papers": self._count_files("papers", "*.md"),
+            "research_notes": self._count_files("research/notes", "*.md"),
+            "search_results": self._count_files("research/searches", "*.json"),
+            "code_files": self._count_files_recursive("code"),
+            "experiments": self._count_files("logs/experiments", "*.json"),
+            "tool_failures": self._count_files("logs/tool_failures", "*.json"),
+        }
+
+        # Add recent failures as warnings
+        recent_failures = self.get_recent_failures(limit=5)
+        if recent_failures:
+            summary["recent_tool_failures"] = [
+                {"tool": f["tool"], "error": f["error"][:200], "time": f.get("timestamp")}
+                for f in recent_failures
+            ]
+
+        return summary
+
+    def _count_files(self, subdir: str, pattern: str) -> int:
+        path = self.workspace_path / subdir
+        if not path.exists():
+            return 0
+        return len(list(path.glob(pattern)))
+
+    def _count_files_recursive(self, subdir: str) -> int:
+        path = self.workspace_path / subdir
+        if not path.exists():
+            return 0
+        count = 0
+        for _ in path.rglob("*"):
+            count += 1
+        return count
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
index f6ac4d8..0ffb822 100644
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -38,6 +38,9 @@ dependencies = [
     "python-dotenv>=1.0.0",
     "pyyaml>=6.0.0",
 
+    # Knowledge graph (workspace persistence)
+    "networkx>=3.0",
+
     # Background jobs
     "celery>=5.4.0",
     "redis>=5.0.0",
diff --git a/backend/tests/test_projects.py b/backend/tests/test_projects.py
new file mode 100644
index 0000000..62930a8
--- /dev/null
+++ b/backend/tests/test_projects.py
@@ -0,0 +1,196 @@
+"""Tests for Project model, DB operations, and API routes."""
+
+import pytest
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from openmlr.db import operations as ops
+from openmlr.db.models import User
+
+pytestmark = pytest.mark.asyncio
+
+
+# ── DB Operations ────────────────────────────────────────
+
+
+class TestProjectOperations:
+    async def test_create_project(self, db_session: AsyncSession, test_user: User):
+        project = await ops.create_project(
+            db_session,
+            test_user.id,
+            name="Test Project",
+            slug="test-project",
+            description="A test project",
+        )
+        assert project.id is not None
+        assert project.uuid is not None
+        assert project.name == "Test Project"
+        assert project.slug == "test-project"
+        assert project.status == "active"
+
+    async def test_get_user_projects(self, db_session: AsyncSession, test_user: User):
+        await ops.create_project(db_session, test_user.id, "P1", "p1")
+        await ops.create_project(db_session, test_user.id, "P2", "p2")
+
+        projects = await ops.get_user_projects(db_session, test_user.id)
+        assert len(projects) == 2
+
+    async def test_get_project_by_uuid(self, db_session: AsyncSession, test_user: User):
+        project = await ops.create_project(db_session, test_user.id, "Find Me", "find-me")
+        found = await ops.get_project_by_uuid(db_session, project.uuid, test_user.id)
+        assert found is not None
+        assert found.name == "Find Me"
+
+    async def test_get_project_by_uuid_wrong_user(self, db_session: AsyncSession, test_user: User):
+        project = await ops.create_project(db_session, test_user.id, "Private", "private")
+        found = await ops.get_project_by_uuid(db_session, project.uuid, user_id=9999)
+        assert found is None
+
+    async def test_get_project_by_slug(self, db_session: AsyncSession, test_user: User):
+        await ops.create_project(db_session, test_user.id, "Slug Test", "slug-test")
+        found = await ops.get_project_by_slug(db_session, test_user.id, "slug-test")
+        assert found is not None
+        assert found.name == "Slug Test"
+
+    async def test_update_project(self, db_session: AsyncSession, test_user: User):
+        project = await ops.create_project(db_session, test_user.id, "Original", "original")
+        updated = await ops.update_project(
+            db_session,
+            project.id,
+            test_user.id,
+            name="Updated Name",
+            description="New description",
+        )
+        assert updated.name == "Updated Name"
+        assert updated.description == "New description"
+
+    async def test_archive_project(self, db_session: AsyncSession, test_user: User):
+        project = await ops.create_project(db_session, test_user.id, "To Archive", "to-archive")
+        archived = await ops.archive_project(db_session, project.id, test_user.id)
+        assert archived.status == "archived"
+
+    async def test_get_user_projects_excludes_archived(
+        self, db_session: AsyncSession, test_user: User
+    ):
+        await ops.create_project(db_session, test_user.id, "Active", "active")
+        p2 = await ops.create_project(db_session, test_user.id, "To Archive", "to-archive")
+        await ops.archive_project(db_session, p2.id, test_user.id)
+
+        active_only = await ops.get_user_projects(db_session, test_user.id, include_archived=False)
+        assert len(active_only) == 1
+        assert active_only[0].name == "Active"
+
+        all_projects = await ops.get_user_projects(db_session, test_user.id, include_archived=True)
+        assert len(all_projects) == 2
+
+    async def test_attach_conversation_to_project(self, db_session: AsyncSession, test_user: User):
+        project = await ops.create_project(db_session, test_user.id, "With Conv", "with-conv")
+        conv = await ops.create_conversation(db_session, test_user.id, title="Test Conv")
+
+        success = await ops.attach_conversation_to_project(db_session, conv.id, project.id)
+        assert success is True
+
+        convs = await ops.get_project_conversations(db_session, project.id)
+        assert len(convs) == 1
+        assert convs[0].title == "Test Conv"
+
+    async def test_detach_conversation_from_project(
+        self, db_session: AsyncSession, test_user: User
+    ):
+        project = await ops.create_project(db_session, test_user.id, "Detach Test", "detach-test")
+        conv = await ops.create_conversation(db_session, test_user.id, project_id=project.id)
+
+        convs = await ops.get_project_conversations(db_session, project.id)
+        assert len(convs) == 1
+
+        await ops.attach_conversation_to_project(db_session, conv.id, None)
+        convs = await ops.get_project_conversations(db_session, project.id)
+        assert len(convs) == 0
+
+    async def test_create_conversation_with_project(
+        self, db_session: AsyncSession, test_user: User
+    ):
+        project = await ops.create_project(db_session, test_user.id, "Direct", "direct")
+        conv = await ops.create_conversation(
+            db_session,
+            test_user.id,
+            title="Project Conv",
+            project_id=project.id,
+        )
+        assert conv.project_id == project.id
+
+
+# ── API Routes ───────────────────────────────────────────
+
+
+class TestProjectRoutes:
+    async def test_create_project_api(self, auth_client):
+        resp = await auth_client.post(
+            "/api/projects",
+            json={
+                "name": "API Project",
+                "description": "Created via API",
+            },
+        )
+        assert resp.status_code == 200
+        data = resp.json()
+        assert data["project"]["name"] == "API Project"
+        assert data["project"]["slug"] == "api-project"
+        assert data["project"]["status"] == "active"
+
+    async def test_list_projects_api(self, auth_client):
+        await auth_client.post("/api/projects", json={"name": "Project 1"})
+        await auth_client.post("/api/projects", json={"name": "Project 2"})
+
+        resp = await auth_client.get("/api/projects")
+        assert resp.status_code == 200
+        data = resp.json()
+        assert len(data["projects"]) == 2
+
+    async def test_get_project_api(self, auth_client):
+        create_resp = await auth_client.post("/api/projects", json={"name": "Get Me"})
+        uuid = create_resp.json()["project"]["uuid"]
+
+        resp = await auth_client.get(f"/api/projects/{uuid}")
+        assert resp.status_code == 200
+        assert resp.json()["project"]["name"] == "Get Me"
+
+    async def test_update_project_api(self, auth_client):
+        create_resp = await auth_client.post("/api/projects", json={"name": "Update Me"})
+        uuid = create_resp.json()["project"]["uuid"]
+
+        resp = await auth_client.put(
+            f"/api/projects/{uuid}",
+            json={
+                "name": "Updated",
+                "description": "New desc",
+            },
+        )
+        assert resp.status_code == 200
+        assert resp.json()["project"]["name"] == "Updated"
+
+    async def test_delete_project_api(self, auth_client):
+        create_resp = await auth_client.post("/api/projects", json={"name": "Delete Me"})
+        uuid = create_resp.json()["project"]["uuid"]
+
+        resp = await auth_client.delete(f"/api/projects/{uuid}")
+        assert resp.status_code == 200
+
+        # Should be archived, not truly deleted
+        get_resp = await auth_client.get(f"/api/projects/{uuid}")
+        assert get_resp.json()["project"]["status"] == "archived"
+
+    async def test_create_project_missing_name(self, auth_client):
+        resp = await auth_client.post("/api/projects", json={})
+        assert resp.status_code == 400
+
+    async def test_get_nonexistent_project(self, auth_client):
+        resp = await auth_client.get("/api/projects/nonexistent-uuid")
+        assert resp.status_code == 404
+
+    async def test_project_conversations_api(self, auth_client):
+        create_resp = await auth_client.post("/api/projects", json={"name": "Conv Test"})
+        uuid = create_resp.json()["project"]["uuid"]
+
+        resp = await auth_client.get(f"/api/projects/{uuid}/conversations")
+        assert resp.status_code == 200
+        assert resp.json()["conversations"] == []
diff --git a/backend/tests/test_tools_workspace.py b/backend/tests/test_tools_workspace.py
new file mode 100644
index 0000000..69edca8
--- /dev/null
+++ b/backend/tests/test_tools_workspace.py
@@ -0,0 +1,208 @@
+"""Tests for workspace agent tools."""
+
+import os
+import tempfile
+
+import pytest
+
+from openmlr.tools.workspace_tools import (
+    _handle_workspace,
+    create_workspace_tools,
+    set_workspace_context,
+)
+
+pytestmark = pytest.mark.asyncio
+
+
+@pytest.fixture
+def workspace_dir():
+    """Create a temporary workspace directory with standard structure."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        for subdir in [
+            "code",
+            "data",
+            "models",
+            "outputs",
+            "papers",
+            "research",
+            "research/searches",
+            "research/notes",
+            "research/citations",
+            "logs",
+            "logs/tool_failures",
+            "logs/compute",
+            "logs/experiments",
+            ".project-meta",
+            ".project-meta/plans",
+        ]:
+            os.makedirs(os.path.join(tmpdir, subdir), exist_ok=True)
+        set_workspace_context(tmpdir)
+        yield tmpdir
+        set_workspace_context(None)
+
+
+class TestWorkspaceTools:
+    def test_create_workspace_tools(self):
+        tools = create_workspace_tools()
+        assert len(tools) == 1
+        assert tools[0].name == "workspace"
+
+    async def test_status_operation(self, workspace_dir):
+        result, success = await _handle_workspace(operation="status")
+        assert success is True
+        assert "Workspace Status" in result
+
+    async def test_note_operation(self, workspace_dir):
+        result, success = await _handle_workspace(
+            operation="note",
+            topic="Test Topic",
+            content="This is a test note.",
+        )
+        assert success is True
+        assert "saved" in result.lower()
+
+    async def test_note_missing_params(self, workspace_dir):
+        result, success = await _handle_workspace(operation="note", topic="", content="")
+        assert success is False
+
+    async def test_knowledge_add_operation(self, workspace_dir):
+        result, success = await _handle_workspace(
+            operation="knowledge_add",
+            entity_id="paper-1",
+            entity_type="paper",
+            label="Test Paper",
+        )
+        assert success is True
+        assert "Added" in result
+
+    async def test_knowledge_add_with_properties(self, workspace_dir):
+        result, success = await _handle_workspace(
+            operation="knowledge_add",
+            entity_id="paper-2",
+            entity_type="paper",
+            label="Paper 2",
+            properties='{"year": 2024, "venue": "NeurIPS"}',
+        )
+        assert success is True
+
+    async def test_knowledge_add_missing_params(self, workspace_dir):
+        result, success = await _handle_workspace(
+            operation="knowledge_add",
+            entity_id="",
+            entity_type="",
+            label="",
+        )
+        assert success is False
+
+    async def test_knowledge_relate_operation(self, workspace_dir):
+        # Add entities first
+        await _handle_workspace(
+            operation="knowledge_add",
+            entity_id="p1",
+            entity_type="paper",
+            label="Paper 1",
+        )
+        await _handle_workspace(
+            operation="knowledge_add",
+            entity_id="m1",
+            entity_type="method",
+            label="Method 1",
+        )
+
+        result, success = await _handle_workspace(
+            operation="knowledge_relate",
+            source_id="p1",
+            target_id="m1",
+            relationship="proposes",
+        )
+        assert success is True
+        assert "proposes" in result
+
+    async def test_knowledge_relate_missing_entity(self, workspace_dir):
+        await _handle_workspace(
+            operation="knowledge_add",
+            entity_id="p1",
+            entity_type="paper",
+            label="Paper 1",
+        )
+        result, success = await _handle_workspace(
+            operation="knowledge_relate",
+            source_id="p1",
+            target_id="missing",
+            relationship="proposes",
+        )
+        assert success is False
+
+    async def test_knowledge_query_operation(self, workspace_dir):
+        await _handle_workspace(
+            operation="knowledge_add",
+            entity_id="attn",
+            entity_type="method",
+            label="Self-Attention",
+        )
+        result, success = await _handle_workspace(
+            operation="knowledge_query",
+            query="attention",
+        )
+        assert success is True
+        assert "Self-Attention" in result
+
+    async def test_knowledge_query_no_results(self, workspace_dir):
+        result, success = await _handle_workspace(
+            operation="knowledge_query",
+            query="nonexistent",
+        )
+        assert success is True
+        assert "No entities found" in result
+
+    async def test_knowledge_summary_empty(self, workspace_dir):
+        result, success = await _handle_workspace(operation="knowledge_summary")
+        assert success is True
+        assert "empty" in result.lower()
+
+    async def test_knowledge_summary_with_data(self, workspace_dir):
+        await _handle_workspace(
+            operation="knowledge_add",
+            entity_id="p1",
+            entity_type="paper",
+            label="Test Paper",
+        )
+        result, success = await _handle_workspace(operation="knowledge_summary")
+        assert success is True
+        assert "Test Paper" in result
+
+    async def test_recent_failures_empty(self, workspace_dir):
+        result, success = await _handle_workspace(operation="recent_failures")
+        assert success is True
+        assert "No recent" in result
+
+    async def test_search_operation(self, workspace_dir):
+        # Create a test file
+        test_file = os.path.join(workspace_dir, "code", "test.py")
+        with open(test_file, "w") as f:
+            f.write("import torch\nmodel = TransformerModel()")
+
+        result, success = await _handle_workspace(
+            operation="search",
+            query="transformer",
+        )
+        assert success is True
+        assert "test.py" in result
+
+    async def test_search_no_results(self, workspace_dir):
+        result, success = await _handle_workspace(
+            operation="search",
+            query="xyznonexistent",
+        )
+        assert success is True
+        assert "No files found" in result
+
+    async def test_unknown_operation(self, workspace_dir):
+        result, success = await _handle_workspace(operation="unknown_op")
+        assert success is False
+
+    async def test_no_workspace_context(self):
+        set_workspace_context(None)
+        result, success = await _handle_workspace(operation="status")
+        assert success is False
+        assert "No project workspace" in result
diff --git a/backend/tests/test_workspace.py b/backend/tests/test_workspace.py
new file mode 100644
index 0000000..d3f59c2
--- /dev/null
+++ b/backend/tests/test_workspace.py
@@ -0,0 +1,333 @@
+"""Tests for workspace persistence and knowledge graph."""
+
+import json
+import os
+import tempfile
+
+import pytest
+
+from openmlr.workspace.knowledge import KnowledgeGraph
+from openmlr.workspace.persistence import WorkspacePersistence
+
+pytestmark = pytest.mark.asyncio
+
+
+@pytest.fixture
+def workspace_dir():
+    """Create a temporary workspace directory with standard structure."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # Create standard subdirs
+        for subdir in [
+            "code",
+            "data",
+            "models",
+            "outputs",
+            "papers",
+            "research",
+            "research/searches",
+            "research/notes",
+            "research/citations",
+            "logs",
+            "logs/tool_failures",
+            "logs/compute",
+            "logs/experiments",
+            "venvs",
+            ".project-meta",
+            ".project-meta/plans",
+        ]:
+            os.makedirs(os.path.join(tmpdir, subdir), exist_ok=True)
+        yield tmpdir
+
+
+# ── Knowledge Graph ──────────────────────────────────────
+
+
+class TestKnowledgeGraph:
+    def test_init_empty(self, workspace_dir):
+        kg = KnowledgeGraph(workspace_dir)
+        assert kg.node_count == 0
+        assert kg.edge_count == 0
+
+    def test_add_entity(self, workspace_dir):
+        kg = KnowledgeGraph(workspace_dir)
+        is_new = kg.add_entity("paper-1", "paper", "Attention Is All You Need")
+        assert is_new is True
+        assert kg.node_count == 1
+
+    def test_add_entity_update(self, workspace_dir):
+        kg = KnowledgeGraph(workspace_dir)
+        kg.add_entity("paper-1", "paper", "Original Label")
+        is_new = kg.add_entity("paper-1", "paper", "Updated Label")
+        assert is_new is False
+        assert kg.node_count == 1
+
+        entity = kg.get_entity("paper-1")
+        assert entity["label"] == "Updated Label"
+
+    def test_add_entity_with_properties(self, workspace_dir):
+        kg = KnowledgeGraph(workspace_dir)
+        kg.add_entity(
+            "paper-1",
+            "paper",
+            "Test Paper",
+            properties={
+                "year": 2017,
+                "abstract": "We propose a new architecture...",
+            },
+        )
+        entity = kg.get_entity("paper-1")
+        assert entity["year"] == 2017
+        assert "architecture" in entity["abstract"]
+
+    def test_get_nonexistent_entity(self, workspace_dir):
+        kg = KnowledgeGraph(workspace_dir)
+        assert kg.get_entity("nope") is None
+
+    def test_find_entities_by_type(self, workspace_dir):
+        kg = KnowledgeGraph(workspace_dir)
+        kg.add_entity("p1", "paper", "Paper 1")
+        kg.add_entity("p2", "paper", "Paper 2")
+        kg.add_entity("m1", "method", "Method 1")
+
+        papers = kg.find_entities("paper")
+        assert len(papers) == 2
+
+        methods = kg.find_entities("method")
+        assert len(methods) == 1
+
+    def test_search_entities(self, workspace_dir):
+        kg = KnowledgeGraph(workspace_dir)
+        kg.add_entity("attn", "method", "Self-Attention Mechanism")
+        kg.add_entity("conv", "method", "Convolutional Neural Network")
+        kg.add_entity("bert", "paper", "BERT: Pre-training")
+
+        results = kg.search_entities("attention")
+        assert len(results) == 1
+        assert results[0]["id"] == "attn"
+
+    def test_remove_entity(self, workspace_dir):
+        kg = KnowledgeGraph(workspace_dir)
+        kg.add_entity("rm-me", "paper", "Remove Me")
+        assert kg.node_count == 1
+
+        removed = kg.remove_entity("rm-me")
+        assert removed is True
+        assert kg.node_count == 0
+
+    def test_remove_nonexistent_entity(self, workspace_dir):
+        kg = KnowledgeGraph(workspace_dir)
+        assert kg.remove_entity("nope") is False
+
+    def test_add_relationship(self, workspace_dir):
+        kg = KnowledgeGraph(workspace_dir)
+        kg.add_entity("p1", "paper", "Paper 1")
+        kg.add_entity("m1", "method", "Method 1")
+
+        is_new = kg.add_relationship("p1", "m1", "proposes")
+        assert is_new is True
+        assert kg.edge_count == 1
+
+    def test_add_relationship_missing_entity(self, workspace_dir):
+        kg = KnowledgeGraph(workspace_dir)
+        kg.add_entity("p1", "paper", "Paper 1")
+
+        is_new = kg.add_relationship("p1", "missing", "proposes")
+        assert is_new is False
+        assert kg.edge_count == 0
+
+    def test_get_neighbors(self, workspace_dir):
+        kg = KnowledgeGraph(workspace_dir)
+        kg.add_entity("p1", "paper", "Paper 1")
+        kg.add_entity("m1", "method", "Method 1")
+        kg.add_entity("m2", "method", "Method 2")
+        kg.add_relationship("p1", "m1", "proposes")
+        kg.add_relationship("p1", "m2", "proposes")
+
+        neighbors = kg.get_neighbors("p1", direction="out")
+        assert len(neighbors) == 2
+
+        neighbors_in = kg.get_neighbors("m1", direction="in")
+        assert len(neighbors_in) == 1
+
+    def test_save_and_reload(self, workspace_dir):
+        kg = KnowledgeGraph(workspace_dir)
+        kg.add_entity("p1", "paper", "Paper 1", properties={"year": 2020})
+        kg.add_entity("m1", "method", "Method 1")
+        kg.add_relationship("p1", "m1", "proposes")
+        kg.save()
+
+        # Reload from disk
+        kg2 = KnowledgeGraph(workspace_dir)
+        assert kg2.node_count == 2
+        assert kg2.edge_count == 1
+
+        entity = kg2.get_entity("p1")
+        assert entity["label"] == "Paper 1"
+        assert entity["year"] == 2020
+
+    def test_get_summary(self, workspace_dir):
+        kg = KnowledgeGraph(workspace_dir)
+        kg.add_entity("p1", "paper", "Paper 1")
+        kg.add_entity("m1", "method", "Method 1")
+        kg.add_relationship("p1", "m1", "proposes")
+
+        summary = kg.get_summary()
+        assert summary["total_nodes"] == 2
+        assert summary["total_edges"] == 1
+        assert "paper" in summary["type_counts"]
+        assert "method" in summary["type_counts"]
+
+    def test_get_context_for_conversation_empty(self, workspace_dir):
+        kg = KnowledgeGraph(workspace_dir)
+        context = kg.get_context_for_conversation()
+        assert context == ""
+
+    def test_get_context_for_conversation(self, workspace_dir):
+        kg = KnowledgeGraph(workspace_dir)
+        kg.add_entity("p1", "paper", "Attention Paper")
+        kg.add_entity("m1", "method", "Self-Attention")
+        kg.add_relationship("p1", "m1", "proposes")
+
+        context = kg.get_context_for_conversation()
+        assert "Attention Paper" in context
+        assert "Self-Attention" in context
+        assert "proposes" in context
+
+
+# ── Workspace Persistence ────────────────────────────────
+
+
+class TestWorkspacePersistence:
+    def test_save_search_results(self, workspace_dir):
+        wp = WorkspacePersistence(workspace_dir)
+        filepath = wp.save_search_results(
+            query="transformer attention",
+            source="arxiv",
+            results=[{"title": "Paper 1"}, {"title": "Paper 2"}],
+        )
+        assert filepath.exists()
+        data = json.loads(filepath.read_text())
+        assert data["query"] == "transformer attention"
+        assert data["source"] == "arxiv"
+        assert len(data["results"]) == 2
+
+    def test_get_recent_searches(self, workspace_dir):
+        wp = WorkspacePersistence(workspace_dir)
+        wp.save_search_results("q1", "arxiv", [{"t": "r1"}])
+        wp.save_search_results("q2", "openalex", [{"t": "r2"}])
+
+        searches = wp.get_recent_searches(limit=10)
+        assert len(searches) == 2
+
+    def test_save_research_note(self, workspace_dir):
+        wp = WorkspacePersistence(workspace_dir)
+        filepath = wp.save_research_note(
+            topic="Attention Mechanisms",
+            content="Self-attention allows models to...",
+        )
+        assert filepath.exists()
+        content = filepath.read_text()
+        assert "Attention Mechanisms" in content
+        assert "Self-attention allows" in content
+
+    def test_get_research_notes(self, workspace_dir):
+        wp = WorkspacePersistence(workspace_dir)
+        wp.save_research_note("Note 1", "Content 1")
+        wp.save_research_note("Note 2", "Content 2")
+
+        notes = wp.get_research_notes()
+        assert len(notes) == 2
+
+    def test_save_paper(self, workspace_dir):
+        wp = WorkspacePersistence(workspace_dir)
+        filepath = wp.save_paper(
+            paper_id="2301.12345",
+            title="Test Paper",
+            content="## Introduction\n\nThis paper...",
+            metadata={"authors": "Smith et al.", "year": 2023},
+        )
+        assert filepath.exists()
+        content = filepath.read_text()
+        assert "Test Paper" in content
+        assert "Introduction" in content
+
+    def test_log_tool_failure(self, workspace_dir):
+        wp = WorkspacePersistence(workspace_dir)
+        filepath = wp.log_tool_failure(
+            tool_name="papers",
+            error="arXiv rate limit reached",
+            args={"query": "test"},
+        )
+        assert filepath.exists()
+        data = json.loads(filepath.read_text())
+        assert data["tool"] == "papers"
+        assert "rate limit" in data["error"]
+
+    def test_get_recent_failures(self, workspace_dir):
+        wp = WorkspacePersistence(workspace_dir)
+        wp.log_tool_failure("papers", "Error 1")
+        wp.log_tool_failure("web_search", "Error 2")
+
+        failures = wp.get_recent_failures()
+        assert len(failures) == 2
+
+    def test_log_compute_probe(self, workspace_dir):
+        wp = WorkspacePersistence(workspace_dir)
+        filepath = wp.log_compute_probe(
+            node_name="gpu-server",
+            capabilities={"gpu": "A100", "ram_gb": 128},
+        )
+        assert filepath.exists()
+
+    def test_log_experiment(self, workspace_dir):
+        wp = WorkspacePersistence(workspace_dir)
+        filepath = wp.log_experiment(
+            name="train-bert",
+            command="python train.py --lr 0.001",
+            result={"loss": 0.05, "accuracy": 0.95},
+        )
+        assert filepath.exists()
+        data = json.loads(filepath.read_text())
+        assert data["name"] == "train-bert"
+        assert data["result"]["accuracy"] == 0.95
+
+    def test_state_persistence(self, workspace_dir):
+        wp = WorkspacePersistence(workspace_dir)
+
+        # Initial state
+        state = wp.get_state()
+        assert state.get("key_findings") == [] or state.get("key_findings") is None
+
+        # Update state
+        wp.update_state(
+            key_findings=["Attention is effective for NLP"],
+            open_questions=["Does it scale?"],
+        )
+
+        # Reload
+        wp2 = WorkspacePersistence(workspace_dir)
+        state2 = wp2.get_state()
+        assert "Attention is effective for NLP" in state2["key_findings"]
+        assert "Does it scale?" in state2["open_questions"]
+
+    def test_save_plan(self, workspace_dir):
+        wp = WorkspacePersistence(workspace_dir)
+        filepath = wp.save_plan(
+            plan_content="# Plan\n\n1. Read papers\n2. Train model",
+            conversation_uuid="test-conv-uuid",
+        )
+        assert filepath.exists()
+        assert "Read papers" in filepath.read_text()
+
+    def test_get_workspace_summary(self, workspace_dir):
+        wp = WorkspacePersistence(workspace_dir)
+
+        # Add some files
+        wp.save_search_results("q1", "arxiv", [])
+        wp.save_research_note("Note", "Content")
+        wp.log_tool_failure("test", "Error")
+
+        summary = wp.get_workspace_summary()
+        assert summary["search_results"] >= 1
+        assert summary["research_notes"] >= 1
+        assert summary["tool_failures"] >= 1
diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
index d479428..230115e 100644
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -67,6 +67,7 @@ services:
     volumes:
       - ./backend/configs:/app/backend/configs
       - ./.keys:/app/.keys
+      - ${OPENMLR_WORKSPACES_PATH:-./.workspaces}:/app/.workspaces
     security_opt:
       - no-new-privileges:true
     restart: unless-stopped
@@ -95,6 +96,7 @@ services:
     volumes:
       - ./backend/configs:/app/backend/configs
       - ./.keys:/app/.keys
+      - ${OPENMLR_WORKSPACES_PATH:-./.workspaces}:/app/.workspaces
     security_opt:
       - no-new-privileges:true
     restart: unless-stopped
@@ -116,3 +118,5 @@ services:
 volumes:
   pgdata:
   redisdata:
+  # Note: workspaces use a bind mount (OPENMLR_WORKSPACES_PATH or ./.workspaces)
+  # for easy backup and inspection. Not a named volume.
diff --git a/docker-compose.yml b/docker-compose.yml
index 6eaadfd..127590f 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -71,6 +71,7 @@ services:
       - backend-venv:/app/backend/.venv
       - ./frontend/dist:/app/frontend/dist
       - ./.keys:/app/.keys
+      - workspaces:/app/.workspaces
 
   # Worker with auto-restart on code changes
   worker:
@@ -105,6 +106,7 @@ services:
       - ./backend:/app/backend
       - backend-venv:/app/backend/.venv
       - ./.keys:/app/.keys
+      - workspaces:/app/.workspaces
 
   # Docs site with live reload
   docs:
@@ -122,3 +124,4 @@ volumes:
   redisdata:
   backend-venv:
   docs-node-modules:
+  workspaces:       # Project workspaces — persists across container rebuilds
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index 806ce81..501a017 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -5,7 +5,7 @@ import { ComputeSelector } from './components/ComputeSelector';
 import { useSSE } from './hooks/useSSE';
 import { useJobStatus } from './hooks/useJobStatus';
 import { api } from './api';
-import type { AgentEvent, Message, Conversation, User, QuestionsPayload, PlanTask, Resource, ContextUsage, SearchBudget } from './types';
+import type { AgentEvent, Message, Conversation, User, QuestionsPayload, PlanTask, Resource, ContextUsage, SearchBudget, Project } from './types';
 import { MessageList } from './components/MessageList';
 import { InputArea, type Mode } from './components/InputArea';
 import { Sidebar } from './components/Sidebar';
@@ -17,6 +17,8 @@ import { RightPanel } from './components/RightPanel';
 import { ReportDrawer } from './components/ReportDrawer';
 import { AuthGuard } from './components/AuthGuard';
 import { OnboardingModal } from './components/OnboardingModal';
+import { Terminal } from './components/Terminal';
+import { ProjectModal } from './components/ProjectModal';
 import { SettingsPage } from './components/SettingsPage';
 import { ProvidersSettings } from './components/settings/ProvidersSettings';
 import { AgentSettings } from './components/settings/AgentSettings';
@@ -103,6 +105,10 @@ function ChatUI({
   const [inputText, setInputText] = useState('');
   const [computeNodes, setComputeNodes] = useState<any[]>([]);
   const [activeCompute, setActiveCompute] = useState<any>(null);
+  const [projects, setProjects] = useState<Project[]>([]);
+  const [activeProject, setActiveProject] = useState<Project | null>(null);
+  const [showProjectModal, setShowProjectModal] = useState(false);
+  const [terminalOpen, setTerminalOpen] = useState(false);
 
   // Ref to always have current conv UUID in SSE callback (avoids stale closure)
   const currentConvUuidRef = useRef<string | null>(currentConvUuid);
@@ -132,6 +138,15 @@ function ChatUI({
     }
   }, []);
 
+  const loadProjects = useCallback(async () => {
+    try {
+      const data = await api.listProjects();
+      setProjects(data.projects || []);
+    } catch {
+      setProjects([]);
+    }
+  }, []);
+
   const loadActiveCompute = useCallback(async (uuid: string) => {
     try {
       const data = await api.getConversationCompute(uuid);
@@ -144,7 +159,7 @@ function ChatUI({
   // Initial load - load conversations and activate the correct one
   useEffect(() => { 
     const init = async () => {
-      await loadComputeNodes();
+      await Promise.all([loadComputeNodes(), loadProjects()]);
       const convs = await loadConversations();
       
       // If URL has a conversation UUID, load it directly
@@ -578,8 +593,12 @@ function ChatUI({
         <Sidebar
           conversations={conversations} currentUuid={currentConvUuid} user={user}
           convStatuses={convStatuses}
+          projects={projects}
+          activeProject={activeProject}
           onSwitch={handleSwitchConversation} onNew={handleNewConversation}
           onDelete={handleDeleteConversation}
+          onSelectProject={setActiveProject}
+          onNewProject={() => setShowProjectModal(true)}
         />
         
         <div 
@@ -630,10 +649,18 @@ function ChatUI({
         </div>
         
         {/* RightPanel is fixed position, doesn't affect flex layout */}
-        <RightPanel tasks={tasks} resources={resources} contextUsage={contextUsage} searchBudget={searchBudget} visible={rightPanelOpen} onToggle={() => setRightPanelOpen((v) => !v)} onViewReport={(r) => setViewingReport(r)} />
+        <RightPanel tasks={tasks} resources={resources} contextUsage={contextUsage} searchBudget={searchBudget} visible={rightPanelOpen} projectUuid={activeProject?.uuid || null} onToggle={() => setRightPanelOpen((v) => !v)} onViewReport={(r) => setViewingReport(r)} />
       </div>
+
+      {/* Terminal panel */}
+      <Terminal
+        projectUuid={activeProject?.uuid || null}
+        visible={terminalOpen}
+        onToggle={() => setTerminalOpen((v) => !v)}
+      />
       
       {viewingReport && <ReportDrawer reportId={viewingReport.id || ''} title={viewingReport.title} cachedContent={viewingReport.content} onClose={() => setViewingReport(null)} />}
+      {showProjectModal && <ProjectModal onClose={() => setShowProjectModal(false)} onCreate={(p) => { setProjects((prev) => [p, ...prev]); setActiveProject(p); }} />}
     </div>
   );
 }
diff --git a/frontend/src/__tests__/RightPanel.test.tsx b/frontend/src/__tests__/RightPanel.test.tsx
index 8b0b5b0..eb5ae61 100644
--- a/frontend/src/__tests__/RightPanel.test.tsx
+++ b/frontend/src/__tests__/RightPanel.test.tsx
@@ -33,6 +33,7 @@ describe('RightPanel', () => {
         contextUsage={mockContext}
         searchBudget={null}
         visible={false}
+        projectUuid={null}
         onToggle={vi.fn()}
         onViewReport={vi.fn()}
       />
@@ -48,6 +49,7 @@ describe('RightPanel', () => {
         contextUsage={null}
         searchBudget={null}
         visible={true}
+        projectUuid={null}
         onToggle={vi.fn()}
         onViewReport={vi.fn()}
       />
@@ -65,6 +67,7 @@ describe('RightPanel', () => {
         contextUsage={null}
         searchBudget={null}
         visible={true}
+        projectUuid={null}
         onToggle={vi.fn()}
         onViewReport={vi.fn()}
       />
@@ -80,6 +83,7 @@ describe('RightPanel', () => {
         contextUsage={null}
         searchBudget={null}
         visible={true}
+        projectUuid={null}
         onToggle={vi.fn()}
         onViewReport={vi.fn()}
       />
@@ -96,6 +100,7 @@ describe('RightPanel', () => {
         contextUsage={mockContext}
         searchBudget={null}
         visible={true}
+        projectUuid={null}
         onToggle={vi.fn()}
         onViewReport={vi.fn()}
       />
@@ -112,6 +117,7 @@ describe('RightPanel', () => {
         contextUsage={null}
         searchBudget={mockSearchBudget}
         visible={true}
+        projectUuid={null}
         onToggle={vi.fn()}
         onViewReport={vi.fn()}
       />
@@ -127,6 +133,7 @@ describe('RightPanel', () => {
         contextUsage={null}
         searchBudget={null}
         visible={true}
+        projectUuid={null}
         onToggle={vi.fn()}
         onViewReport={vi.fn()}
       />
@@ -144,6 +151,7 @@ describe('RightPanel', () => {
         contextUsage={null}
         searchBudget={null}
         visible={true}
+        projectUuid={null}
         onToggle={vi.fn()}
         onViewReport={vi.fn()}
       />
@@ -160,6 +168,7 @@ describe('RightPanel', () => {
         contextUsage={null}
         searchBudget={null}
         visible={false}
+        projectUuid={null}
         onToggle={vi.fn()}
         onViewReport={vi.fn()}
       />
@@ -175,6 +184,7 @@ describe('RightPanel', () => {
         contextUsage={null}
         searchBudget={null}
         visible={false}
+        projectUuid={null}
         onToggle={vi.fn()}
         onViewReport={vi.fn()}
       />
diff --git a/frontend/src/__tests__/Sidebar.test.tsx b/frontend/src/__tests__/Sidebar.test.tsx
index c5fe47b..f31447f 100644
--- a/frontend/src/__tests__/Sidebar.test.tsx
+++ b/frontend/src/__tests__/Sidebar.test.tsx
@@ -51,9 +51,13 @@ describe('Sidebar', () => {
           currentUuid={null}
           user={mockUser}
           convStatuses={{}}
+          projects={[]}
+          activeProject={null}
           onSwitch={vi.fn()}
           onNew={vi.fn()}
           onDelete={vi.fn()}
+          onSelectProject={vi.fn()}
+          onNewProject={vi.fn()}
         />
       </MemoryRouter>
     );
@@ -68,9 +72,13 @@ describe('Sidebar', () => {
           currentUuid={null}
           user={mockUser}
           convStatuses={{}}
+          projects={[]}
+          activeProject={null}
           onSwitch={vi.fn()}
           onNew={vi.fn()}
           onDelete={vi.fn()}
+          onSelectProject={vi.fn()}
+          onNewProject={vi.fn()}
         />
       </MemoryRouter>
     );
@@ -86,9 +94,13 @@ describe('Sidebar', () => {
           currentUuid="conv-1"
           user={mockUser}
           convStatuses={{}}
+          projects={[]}
+          activeProject={null}
           onSwitch={vi.fn()}
           onNew={vi.fn()}
           onDelete={vi.fn()}
+          onSelectProject={vi.fn()}
+          onNewProject={vi.fn()}
         />
       </MemoryRouter>
     );
@@ -107,9 +119,13 @@ describe('Sidebar', () => {
           currentUuid={null}
           user={mockUser}
           convStatuses={{}}
+          projects={[]}
+          activeProject={null}
           onSwitch={onSwitch}
           onNew={vi.fn()}
           onDelete={vi.fn()}
+          onSelectProject={vi.fn()}
+          onNewProject={vi.fn()}
         />
       </MemoryRouter>
     );
@@ -125,9 +141,13 @@ describe('Sidebar', () => {
           currentUuid={null}
           user={mockUser}
           convStatuses={{}}
+          projects={[]}
+          activeProject={null}
           onSwitch={vi.fn()}
           onNew={vi.fn()}
           onDelete={vi.fn()}
+          onSelectProject={vi.fn()}
+          onNewProject={vi.fn()}
         />
       </MemoryRouter>
     );
@@ -142,9 +162,13 @@ describe('Sidebar', () => {
           currentUuid={null}
           user={mockUser}
           convStatuses={{}}
+          projects={[]}
+          activeProject={null}
           onSwitch={vi.fn()}
           onNew={vi.fn()}
           onDelete={vi.fn()}
+          onSelectProject={vi.fn()}
+          onNewProject={vi.fn()}
         />
       </MemoryRouter>
     );
@@ -159,9 +183,13 @@ describe('Sidebar', () => {
           currentUuid={null}
           user={mockUser}
           convStatuses={{}}
+          projects={[]}
+          activeProject={null}
           onSwitch={vi.fn()}
           onNew={vi.fn()}
           onDelete={vi.fn()}
+          onSelectProject={vi.fn()}
+          onNewProject={vi.fn()}
         />
       </MemoryRouter>
     );
@@ -177,9 +205,13 @@ describe('Sidebar', () => {
           currentUuid={null}
           user={mockUser}
           convStatuses={{}}
+          projects={[]}
+          activeProject={null}
           onSwitch={vi.fn()}
           onNew={vi.fn()}
           onDelete={vi.fn()}
+          onSelectProject={vi.fn()}
+          onNewProject={vi.fn()}
         />
       </MemoryRouter>
     );
@@ -198,9 +230,13 @@ describe('Sidebar', () => {
           currentUuid={null}
           user={mockUser}
           convStatuses={{}}
+          projects={[]}
+          activeProject={null}
           onSwitch={vi.fn()}
           onNew={onNew}
           onDelete={vi.fn()}
+          onSelectProject={vi.fn()}
+          onNewProject={vi.fn()}
         />
       </MemoryRouter>
     );
diff --git a/frontend/src/api.ts b/frontend/src/api.ts
index 7d5a3ee..781fd78 100644
--- a/frontend/src/api.ts
+++ b/frontend/src/api.ts
@@ -116,6 +116,28 @@ export const api = {
   createKey: (body: Record<string, any>) => post('/api/keys', body),
   deleteKey: (filename: string) => del(`/api/keys/${filename}`),
 
+  // Projects
+  listProjects: (includeArchived = false) => get(`/api/projects${includeArchived ? '?include_archived=true' : ''}`),
+  createProject: (name: string, description?: string) => post('/api/projects', { name, description }),
+  getProject: (uuid: string) => get(`/api/projects/${uuid}`),
+  updateProject: (uuid: string, body: Record<string, any>) => put(`/api/projects/${uuid}`, body),
+  deleteProject: (uuid: string) => del(`/api/projects/${uuid}`),
+  listProjectConversations: (uuid: string) => get(`/api/projects/${uuid}/conversations`),
+  attachConversation: (projectUuid: string, convUuid: string) =>
+    post(`/api/projects/${projectUuid}/attach/${convUuid}`, {}),
+  detachConversation: (projectUuid: string, convUuid: string) =>
+    post(`/api/projects/${projectUuid}/detach/${convUuid}`, {}),
+
+  // Project Files
+  listFiles: (projectUuid: string, path = '') =>
+    get(`/api/projects/${projectUuid}/files${path ? `?path=${encodeURIComponent(path)}` : ''}`),
+  readFile: (projectUuid: string, filePath: string) =>
+    get(`/api/projects/${projectUuid}/files/${encodeURIComponent(filePath)}`),
+  writeFile: (projectUuid: string, filePath: string, content: string) =>
+    put(`/api/projects/${projectUuid}/files/${encodeURIComponent(filePath)}`, { content }),
+  deleteFile: (projectUuid: string, filePath: string) =>
+    del(`/api/projects/${projectUuid}/files/${encodeURIComponent(filePath)}`),
+
   // Compute Nodes
   getComputeNodes: () => get('/api/compute/nodes'),
   createComputeNode: (body: Record<string, any>) => post('/api/compute/nodes', body),
diff --git a/frontend/src/components/FileTree.tsx b/frontend/src/components/FileTree.tsx
new file mode 100644
index 0000000..a892cc6
--- /dev/null
+++ b/frontend/src/components/FileTree.tsx
@@ -0,0 +1,303 @@
+import { useState, useCallback, useEffect } from 'react';
+import {
+  Folder,
+  FolderOpen,
+  FileText,
+  FileCode,
+  FileJson,
+  Image,
+  ChevronRight,
+  ChevronDown,
+  RefreshCw,
+  AlertCircle,
+} from 'lucide-react';
+import { api } from '../api';
+import type { FileNode } from '../types';
+
+interface Props {
+  projectUuid: string;
+  onFileSelect?: (path: string, content: string) => void;
+}
+
+interface TreeNode extends FileNode {
+  children?: TreeNode[];
+  loading?: boolean;
+  expanded?: boolean;
+}
+
+const FILE_ICONS: Record<string, React.ReactNode> = {
+  '.py': <FileCode size={14} className="text-blue-400" />,
+  '.js': <FileCode size={14} className="text-yellow-400" />,
+  '.ts': <FileCode size={14} className="text-blue-500" />,
+  '.tsx': <FileCode size={14} className="text-blue-500" />,
+  '.json': <FileJson size={14} className="text-green-400" />,
+  '.md': <FileText size={14} className="text-text-dim" />,
+  '.txt': <FileText size={14} className="text-text-dim" />,
+  '.yaml': <FileCode size={14} className="text-red-400" />,
+  '.yml': <FileCode size={14} className="text-red-400" />,
+  '.png': <Image size={14} className="text-purple-400" />,
+  '.jpg': <Image size={14} className="text-purple-400" />,
+  '.svg': <Image size={14} className="text-purple-400" />,
+};
+
+function getFileIcon(name: string, isDir: boolean): React.ReactNode {
+  if (isDir) return null; // handled by folder icons
+  const ext = name.includes('.') ? '.' + name.split('.').pop() : '';
+  return FILE_ICONS[ext] || <FileText size={14} className="text-text-dim" />;
+}
+
+function formatSize(bytes: number | null): string {
+  if (bytes === null || bytes === undefined) return '';
+  if (bytes < 1024) return `${bytes}B`;
+  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)}K`;
+  return `${(bytes / (1024 * 1024)).toFixed(1)}M`;
+}
+
+function TreeItem({
+  node,
+  depth,
+  onToggle,
+  onSelect,
+}: {
+  node: TreeNode;
+  depth: number;
+  onToggle: (path: string) => void;
+  onSelect: (path: string) => void;
+}) {
+  return (
+    <div>
+      <button
+        className={`flex items-center gap-1.5 w-full text-left px-2 py-1 text-sm rounded hover:bg-surface-hover transition-colors group ${
+          node.is_dir ? 'text-text' : 'text-text-dim hover:text-text'
+        }`}
+        style={{ paddingLeft: `${depth * 16 + 8}px` }}
+        onClick={() => node.is_dir ? onToggle(node.path) : onSelect(node.path)}
+      >
+        {/* Expand/collapse chevron for directories */}
+        {node.is_dir ? (
+          <span className="shrink-0 text-text-dim">
+            {node.loading ? (
+              <RefreshCw size={12} className="animate-spin" />
+            ) : node.expanded ? (
+              <ChevronDown size={12} />
+            ) : (
+              <ChevronRight size={12} />
+            )}
+          </span>
+        ) : (
+          <span className="w-3 shrink-0" /> // spacer for files
+        )}
+
+        {/* Icon */}
+        <span className="shrink-0">
+          {node.is_dir ? (
+            node.expanded ? <FolderOpen size={14} className="text-primary" /> : <Folder size={14} className="text-primary" />
+          ) : (
+            getFileIcon(node.name, false)
+          )}
+        </span>
+
+        {/* Name */}
+        <span className="truncate flex-1">{node.name}</span>
+
+        {/* Size (for files) */}
+        {!node.is_dir && node.size !== null && (
+          <span className="text-xs text-text-dim shrink-0 opacity-0 group-hover:opacity-100 transition-opacity">
+            {formatSize(node.size)}
+          </span>
+        )}
+      </button>
+
+      {/* Children */}
+      {node.is_dir && node.expanded && node.children && (
+        <div>
+          {node.children.map((child) => (
+            <TreeItem
+              key={child.path}
+              node={child}
+              depth={depth + 1}
+              onToggle={onToggle}
+              onSelect={onSelect}
+            />
+          ))}
+          {node.children.length === 0 && (
+            <div
+              className="text-xs text-text-dim italic px-2 py-1"
+              style={{ paddingLeft: `${(depth + 1) * 16 + 8}px` }}
+            >
+              (empty)
+            </div>
+          )}
+        </div>
+      )}
+    </div>
+  );
+}
+
+export function FileTree({ projectUuid, onFileSelect }: Props) {
+  const [nodes, setNodes] = useState<TreeNode[]>([]);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState<string | null>(null);
+  const [selectedFile, setSelectedFile] = useState<string | null>(null);
+  const [fileContent, setFileContent] = useState<string | null>(null);
+  const [fileLoading, setFileLoading] = useState(false);
+
+  const loadDirectory = useCallback(async (path: string = ''): Promise<TreeNode[]> => {
+    try {
+      const data = await api.listFiles(projectUuid, path);
+      return (data.entries || []).map((entry: FileNode) => ({
+        ...entry,
+        expanded: false,
+        children: entry.is_dir ? undefined : undefined,
+      }));
+    } catch (err: any) {
+      setError(err.message);
+      return [];
+    }
+  }, [projectUuid]);
+
+  // Initial load
+  useEffect(() => {
+    setLoading(true);
+    setError(null);
+    loadDirectory('').then((entries) => {
+      setNodes(entries);
+      setLoading(false);
+    });
+  }, [projectUuid, loadDirectory]);
+
+  const handleToggle = useCallback(async (path: string) => {
+    setNodes((prev) => {
+      const update = (items: TreeNode[]): TreeNode[] =>
+        items.map((item) => {
+          if (item.path === path) {
+            if (item.expanded) {
+              return { ...item, expanded: false };
+            }
+            return { ...item, loading: true, expanded: true };
+          }
+          if (item.children) {
+            return { ...item, children: update(item.children) };
+          }
+          return item;
+        });
+      return update(prev);
+    });
+
+    // Load children
+    const children = await loadDirectory(path);
+    setNodes((prev) => {
+      const update = (items: TreeNode[]): TreeNode[] =>
+        items.map((item) => {
+          if (item.path === path) {
+            return { ...item, loading: false, children };
+          }
+          if (item.children) {
+            return { ...item, children: update(item.children) };
+          }
+          return item;
+        });
+      return update(prev);
+    });
+  }, [loadDirectory]);
+
+  const handleSelect = useCallback(async (path: string) => {
+    setSelectedFile(path);
+    setFileLoading(true);
+    setFileContent(null);
+    try {
+      const data = await api.readFile(projectUuid, path);
+      if (data.content !== undefined) {
+        setFileContent(data.content);
+        onFileSelect?.(path, data.content);
+      }
+    } catch {
+      setFileContent(null);
+    }
+    setFileLoading(false);
+  }, [projectUuid, onFileSelect]);
+
+  const handleRefresh = useCallback(async () => {
+    setLoading(true);
+    setError(null);
+    const entries = await loadDirectory('');
+    setNodes(entries);
+    setLoading(false);
+  }, [loadDirectory]);
+
+  if (loading) {
+    return (
+      <div className="flex items-center justify-center py-8 text-text-dim text-sm">
+        <RefreshCw size={14} className="animate-spin mr-2" />
+        Loading files...
+      </div>
+    );
+  }
+
+  if (error) {
+    return (
+      <div className="flex items-center gap-2 px-3 py-4 text-error text-sm">
+        <AlertCircle size={14} />
+        {error}
+      </div>
+    );
+  }
+
+  return (
+    <div className="flex flex-col h-full">
+      {/* Header */}
+      <div className="flex items-center justify-between px-3 py-2 border-b border-border">
+        <span className="text-xs uppercase tracking-wider text-text-dim font-semibold">Files</span>
+        <button
+          className="w-6 h-6 rounded flex items-center justify-center text-text-dim hover:text-text hover:bg-surface-hover transition-colors"
+          onClick={handleRefresh}
+          title="Refresh"
+        >
+          <RefreshCw size={12} />
+        </button>
+      </div>
+
+      {/* Tree */}
+      <div className="flex-1 overflow-auto py-1">
+        {nodes.length === 0 ? (
+          <div className="text-sm text-text-dim px-3 py-4">No files yet</div>
+        ) : (
+          nodes.map((node) => (
+            <TreeItem
+              key={node.path}
+              node={node}
+              depth={0}
+              onToggle={handleToggle}
+              onSelect={handleSelect}
+            />
+          ))
+        )}
+      </div>
+
+      {/* Selected file preview */}
+      {selectedFile && (
+        <div className="border-t border-border">
+          <div className="flex items-center justify-between px-3 py-1.5 bg-surface-hover">
+            <span className="text-xs text-text-dim truncate">{selectedFile}</span>
+            <button
+              className="w-5 h-5 rounded flex items-center justify-center text-text-dim hover:text-text"
+              onClick={() => { setSelectedFile(null); setFileContent(null); }}
+              title="Close"
+            >
+              ×
+            </button>
+          </div>
+          {fileLoading ? (
+            <div className="px-3 py-4 text-xs text-text-dim">Loading...</div>
+          ) : fileContent !== null ? (
+            <pre className="px-3 py-2 text-xs text-text overflow-auto max-h-48 font-mono whitespace-pre-wrap">
+              {fileContent.length > 5000 ? fileContent.slice(0, 5000) + '\n...' : fileContent}
+            </pre>
+          ) : (
+            <div className="px-3 py-4 text-xs text-text-dim">Binary file</div>
+          )}
+        </div>
+      )}
+    </div>
+  );
+}
diff --git a/frontend/src/components/ProjectModal.tsx b/frontend/src/components/ProjectModal.tsx
new file mode 100644
index 0000000..7a4fbe7
--- /dev/null
+++ b/frontend/src/components/ProjectModal.tsx
@@ -0,0 +1,104 @@
+import { useState } from 'react';
+import { X, FolderPlus } from 'lucide-react';
+import { api } from '../api';
+import type { Project } from '../types';
+
+interface Props {
+  onClose: () => void;
+  onCreate: (project: Project) => void;
+}
+
+export function ProjectModal({ onClose, onCreate }: Props) {
+  const [name, setName] = useState('');
+  const [description, setDescription] = useState('');
+  const [loading, setLoading] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+
+  const handleCreate = async () => {
+    if (!name.trim()) return;
+    setLoading(true);
+    setError(null);
+    try {
+      const data = await api.createProject(name.trim(), description.trim() || undefined);
+      onCreate(data.project);
+      onClose();
+    } catch (err: any) {
+      setError(err.message || 'Failed to create project');
+    } finally {
+      setLoading(false);
+    }
+  };
+
+  return (
+    <div className="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm">
+      <div className="bg-surface border border-border rounded-xl shadow-xl w-full max-w-md mx-4">
+        {/* Header */}
+        <div className="flex items-center justify-between px-6 py-4 border-b border-border">
+          <div className="flex items-center gap-2">
+            <FolderPlus size={18} className="text-primary" />
+            <h2 className="text-lg font-semibold text-text">New Project</h2>
+          </div>
+          <button
+            className="w-8 h-8 rounded-lg flex items-center justify-center text-text-dim hover:bg-surface-hover hover:text-text transition-colors"
+            onClick={onClose}
+          >
+            <X size={16} />
+          </button>
+        </div>
+
+        {/* Form */}
+        <div className="px-6 py-5 space-y-4">
+          <div>
+            <label className="block text-sm font-medium text-text mb-1.5">Project Name</label>
+            <input
+              type="text"
+              value={name}
+              onChange={(e) => setName(e.target.value)}
+              placeholder="e.g., Attention Mechanism Survey"
+              className="w-full px-3 py-2 bg-bg border border-border rounded-lg text-text placeholder-text-dim focus:border-primary focus:outline-none transition-colors"
+              autoFocus
+              onKeyDown={(e) => e.key === 'Enter' && handleCreate()}
+            />
+            <p className="text-xs text-text-dim mt-1">
+              A workspace directory will be created for this project
+            </p>
+          </div>
+
+          <div>
+            <label className="block text-sm font-medium text-text mb-1.5">
+              Description <span className="text-text-dim font-normal">(optional)</span>
+            </label>
+            <textarea
+              value={description}
+              onChange={(e) => setDescription(e.target.value)}
+              placeholder="Brief description of the research goal..."
+              rows={3}
+              className="w-full px-3 py-2 bg-bg border border-border rounded-lg text-text placeholder-text-dim focus:border-primary focus:outline-none transition-colors resize-none"
+            />
+          </div>
+
+          {error && (
+            <div className="text-sm text-error bg-error/10 px-3 py-2 rounded-lg">{error}</div>
+          )}
+        </div>
+
+        {/* Footer */}
+        <div className="flex items-center justify-end gap-3 px-6 py-4 border-t border-border">
+          <button
+            className="px-4 py-2 text-sm text-text-dim hover:text-text transition-colors"
+            onClick={onClose}
+          >
+            Cancel
+          </button>
+          <button
+            className="px-4 py-2 text-sm font-medium bg-primary text-white rounded-lg hover:bg-primary/90 transition-colors disabled:opacity-50"
+            onClick={handleCreate}
+            disabled={!name.trim() || loading}
+          >
+            {loading ? 'Creating...' : 'Create Project'}
+          </button>
+        </div>
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/components/RightPanel.tsx b/frontend/src/components/RightPanel.tsx
index f7e7a20..573cb5f 100644
--- a/frontend/src/components/RightPanel.tsx
+++ b/frontend/src/components/RightPanel.tsx
@@ -15,17 +15,23 @@ import {
   Download,
   ExternalLink,
   ChevronDown,
-  ChevronRight
+  ChevronRight,
+  ListTodo,
+  Files,
 } from 'lucide-react';
 import { api } from '../api';
+import { FileTree } from './FileTree';
 import type { PlanTask, Resource, ContextUsage, SearchBudget } from '../types';
 
+type TabId = 'tasks' | 'files';
+
 interface Props {
   tasks: PlanTask[];
   resources: Resource[];
   contextUsage: ContextUsage | null;
   searchBudget: SearchBudget | null;
   visible: boolean;
+  projectUuid: string | null;
   onToggle: () => void;
   onViewReport: (resource: Resource) => void;
 }
@@ -91,12 +97,13 @@ function downloadFile(content: string, filename: string, mime: string) {
   URL.revokeObjectURL(url);
 }
 
-export function RightPanel({ tasks, resources, contextUsage, searchBudget, visible, onToggle, onViewReport }: Props) {
+export function RightPanel({ tasks, resources, contextUsage, searchBudget, visible, projectUuid, onToggle, onViewReport }: Props) {
   const hasContent = tasks.length > 0 || resources.length > 0;
   const [splitY, setSplitY] = useState(50); // percentage for tasks section
   const [exporting, setExporting] = useState(false);
   const [tasksCollapsed, setTasksCollapsed] = useState(false);
   const [resourcesCollapsed, setResourcesCollapsed] = useState(false);
+  const [activeTab, setActiveTab] = useState<TabId>('tasks');
   const dragging = useRef(false);
   const panelRef = useRef<HTMLElement>(null);
 
@@ -175,9 +182,31 @@ export function RightPanel({ tasks, resources, contextUsage, searchBudget, visib
       onMouseUp={onMouseUp}
       onMouseLeave={onMouseUp}
     >
-      {/* Header */}
-      <div className="flex items-center justify-between px-4 py-3 border-b border-border">
-        <span className="font-semibold text-text">Tasks & Resources</span>
+      {/* Header with tabs */}
+      <div className="flex items-center justify-between px-4 py-2 border-b border-border">
+        <div className="flex items-center gap-1">
+          <button
+            className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${activeTab === 'tasks' ? 'bg-primary/10 text-primary' : 'text-text-dim hover:text-text'}`}
+            onClick={() => setActiveTab('tasks')}
+          >
+            <span className="flex items-center gap-1.5">
+              <ListTodo size={12} />
+              Tasks
+              {tasks.length > 0 && <span className="text-[10px]">({tasks.length})</span>}
+            </span>
+          </button>
+          {projectUuid && (
+            <button
+              className={`px-3 py-1.5 text-xs font-medium rounded-md transition-colors ${activeTab === 'files' ? 'bg-primary/10 text-primary' : 'text-text-dim hover:text-text'}`}
+              onClick={() => setActiveTab('files')}
+            >
+              <span className="flex items-center gap-1.5">
+                <Files size={12} />
+                Files
+              </span>
+            </button>
+          )}
+        </div>
         <button 
           className="w-7 h-7 rounded-lg flex items-center justify-center text-text-dim hover:bg-surface-hover hover:text-text transition-colors"
           onClick={onToggle}
@@ -213,6 +242,16 @@ export function RightPanel({ tasks, resources, contextUsage, searchBudget, visib
         </div>
       )}
 
+      {/* Files tab */}
+      {activeTab === 'files' && projectUuid && (
+        <div className="flex-1 overflow-hidden">
+          <FileTree projectUuid={projectUuid} />
+        </div>
+      )}
+
+      {/* Tasks tab content */}
+      {activeTab === 'tasks' && <>
+
       {/* Paper section — only shown when a paper resource exists */}
       {paperResource && (
         <div className="px-4 py-3 border-b border-border">
@@ -344,6 +383,9 @@ export function RightPanel({ tasks, resources, contextUsage, searchBudget, visib
           </div>
         )}
       </div>
+
+      {/* End of tasks tab */}
+      </>}
     </aside>
   );
 }
diff --git a/frontend/src/components/Sidebar.tsx b/frontend/src/components/Sidebar.tsx
index 0ee69ef..8192ca2 100644
--- a/frontend/src/components/Sidebar.tsx
+++ b/frontend/src/components/Sidebar.tsx
@@ -1,7 +1,7 @@
 import { useState, useMemo } from 'react';
 import { useNavigate } from 'react-router-dom';
 import { setToken } from '../api';
-import type { Conversation, User } from '../types';
+import type { Conversation, User, Project } from '../types';
 import { ConfirmDialog } from './ConfirmDialog';
 import { 
   PanelLeftClose, 
@@ -10,7 +10,10 @@ import {
   Search, 
   Settings, 
   LogOut, 
-  Trash2 
+  Trash2,
+  FolderOpen,
+  ChevronDown,
+  Layers,
 } from 'lucide-react';
 
 type ConvStatus = 'idle' | 'processing' | 'waiting_approval' | 'waiting_input';
@@ -20,9 +23,13 @@ interface Props {
   currentUuid: string | null;
   user: User | null;
   convStatuses: Record<string, ConvStatus>;
+  projects: Project[];
+  activeProject: Project | null;
   onSwitch: (uuid: string) => void;
   onNew: (mode?: string) => void;
   onDelete: (uuid: string) => void;
+  onSelectProject: (project: Project | null) => void;
+  onNewProject: () => void;
 }
 
 function groupByDate(conversations: Conversation[]) {
@@ -52,11 +59,12 @@ function ConvIcon({ status }: { status: ConvStatus }) {
   return <span className={`${base} bg-border`} />;
 }
 
-export function Sidebar({ conversations, currentUuid, user, convStatuses, onSwitch, onNew, onDelete }: Props) {
+export function Sidebar({ conversations, currentUuid, user, convStatuses, projects, activeProject, onSwitch, onNew, onDelete, onSelectProject, onNewProject }: Props) {
   const navigate = useNavigate();
   const [pendingDelete, setPendingDelete] = useState<{ uuid: string; title: string } | null>(null);
   const [search, setSearch] = useState('');
   const [collapsed, setCollapsed] = useState(false);
+  const [projectDropdownOpen, setProjectDropdownOpen] = useState(false);
 
   const filtered = useMemo(() => {
     if (!search.trim()) return conversations;
@@ -107,6 +115,51 @@ export function Sidebar({ conversations, currentUuid, user, convStatuses, onSwit
         </button>
       </div>
 
+      {/* Project selector */}
+      <div className="relative">
+        <button
+          className="w-full flex items-center gap-2 px-3 py-2 bg-bg border border-border rounded-lg text-sm text-text hover:border-primary transition-colors"
+          onClick={() => setProjectDropdownOpen(!projectDropdownOpen)}
+        >
+          <FolderOpen size={14} className="text-primary shrink-0" />
+          <span className="flex-1 truncate text-left">
+            {activeProject ? activeProject.name : 'All Conversations'}
+          </span>
+          <ChevronDown size={14} className={`text-text-dim shrink-0 transition-transform ${projectDropdownOpen ? 'rotate-180' : ''}`} />
+        </button>
+        {projectDropdownOpen && (
+          <div className="absolute left-0 right-0 top-full mt-1 bg-surface border border-border rounded-lg shadow-xl z-20 max-h-60 overflow-auto">
+            <button
+              className={`w-full flex items-center gap-2 px-3 py-2 text-sm text-left hover:bg-surface-hover transition-colors ${!activeProject ? 'text-primary' : 'text-text'}`}
+              onClick={() => { onSelectProject(null); setProjectDropdownOpen(false); }}
+            >
+              <Layers size={14} />
+              All Conversations
+            </button>
+            {projects.map((p) => (
+              <button
+                key={p.uuid}
+                className={`w-full flex items-center gap-2 px-3 py-2 text-sm text-left hover:bg-surface-hover transition-colors ${activeProject?.uuid === p.uuid ? 'text-primary bg-primary/5' : 'text-text'}`}
+                onClick={() => { onSelectProject(p); setProjectDropdownOpen(false); }}
+              >
+                <FolderOpen size={14} />
+                <span className="flex-1 truncate">{p.name}</span>
+                {p.conversation_count !== undefined && (
+                  <span className="text-xs text-text-dim">{p.conversation_count}</span>
+                )}
+              </button>
+            ))}
+            <button
+              className="w-full flex items-center gap-2 px-3 py-2 text-sm text-primary hover:bg-surface-hover transition-colors border-t border-border"
+              onClick={() => { onNewProject(); setProjectDropdownOpen(false); }}
+            >
+              <Plus size={14} />
+              New Project
+            </button>
+          </div>
+        )}
+      </div>
+
       {/* Search */}
       <div className="relative">
         <Search size={16} className="absolute left-3 top-1/2 -translate-y-1/2 text-text-dim" />
diff --git a/frontend/src/components/Terminal.tsx b/frontend/src/components/Terminal.tsx
new file mode 100644
index 0000000..73da6b2
--- /dev/null
+++ b/frontend/src/components/Terminal.tsx
@@ -0,0 +1,208 @@
+import { useEffect, useRef, useState, useCallback } from 'react';
+import {
+  Terminal as TerminalIcon,
+  X,
+  Maximize2,
+  Minimize2,
+} from 'lucide-react';
+
+interface Props {
+  projectUuid: string | null;
+  visible: boolean;
+  onToggle: () => void;
+}
+
+/**
+ * Interactive terminal connected to the project workspace via WebSocket.
+ * Uses a basic approach without xterm.js dependency — renders terminal output
+ * in a pre element and captures keyboard input.
+ *
+ * For a production deployment, install @xterm/xterm and use the attach addon.
+ * This implementation provides core functionality without the extra dependency.
+ */
+export function Terminal({ projectUuid, visible, onToggle }: Props) {
+  const [connected, setConnected] = useState(false);
+  const [output, setOutput] = useState<string[]>([]);
+  const [maximized, setMaximized] = useState(false);
+  const wsRef = useRef<WebSocket | null>(null);
+  const outputRef = useRef<HTMLDivElement>(null);
+  const inputRef = useRef<HTMLTextAreaElement>(null);
+  const [inputLine, setInputLine] = useState('');
+  const reconnectTimer = useRef<ReturnType<typeof setTimeout> | null>(null);
+
+  const connect = useCallback(() => {
+    if (!projectUuid) return;
+
+    const token = localStorage.getItem('openmlr_token');
+    if (!token) return;
+
+    const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
+    const wsUrl = `${protocol}//${window.location.host}/api/terminal/${projectUuid}?token=${token}`;
+
+    try {
+      const ws = new WebSocket(wsUrl);
+      wsRef.current = ws;
+
+      ws.onopen = () => {
+        setConnected(true);
+        setOutput((prev) => [...prev, '\r\n--- Connected ---\r\n']);
+        // Send initial resize
+        ws.send(JSON.stringify({ type: 'resize', cols: 120, rows: 30 }));
+      };
+
+      ws.onmessage = (event) => {
+        if (event.data instanceof Blob) {
+          event.data.text().then((text: string) => {
+            setOutput((prev) => [...prev, text]);
+          });
+        } else {
+          setOutput((prev) => [...prev, event.data]);
+        }
+      };
+
+      ws.onclose = () => {
+        setConnected(false);
+        setOutput((prev) => [...prev, '\r\n--- Disconnected ---\r\n']);
+        wsRef.current = null;
+      };
+
+      ws.onerror = () => {
+        setConnected(false);
+      };
+    } catch {
+      setConnected(false);
+    }
+  }, [projectUuid]);
+
+  // Connect when visible and project is set
+  useEffect(() => {
+    if (visible && projectUuid && !wsRef.current) {
+      connect();
+    }
+    return () => {
+      if (reconnectTimer.current) {
+        clearTimeout(reconnectTimer.current);
+      }
+    };
+  }, [visible, projectUuid, connect]);
+
+  // Disconnect when hidden
+  useEffect(() => {
+    if (!visible && wsRef.current) {
+      wsRef.current.close();
+      wsRef.current = null;
+    }
+  }, [visible]);
+
+  // Auto-scroll to bottom
+  useEffect(() => {
+    if (outputRef.current) {
+      outputRef.current.scrollTop = outputRef.current.scrollHeight;
+    }
+  }, [output]);
+
+  const handleKeyDown = useCallback((e: React.KeyboardEvent) => {
+    if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) return;
+
+    if (e.key === 'Enter') {
+      e.preventDefault();
+      wsRef.current.send(JSON.stringify({ type: 'input', data: inputLine + '\n' }));
+      setInputLine('');
+    } else if (e.key === 'Tab') {
+      e.preventDefault();
+      wsRef.current.send(JSON.stringify({ type: 'input', data: '\t' }));
+    } else if (e.ctrlKey && e.key === 'c') {
+      e.preventDefault();
+      wsRef.current.send(JSON.stringify({ type: 'input', data: '\x03' }));
+    } else if (e.ctrlKey && e.key === 'd') {
+      e.preventDefault();
+      wsRef.current.send(JSON.stringify({ type: 'input', data: '\x04' }));
+    } else if (e.ctrlKey && e.key === 'l') {
+      e.preventDefault();
+      wsRef.current.send(JSON.stringify({ type: 'input', data: '\x0c' }));
+      setOutput([]);
+    }
+  }, [inputLine]);
+
+  if (!visible) {
+    return (
+      <button
+        className="fixed bottom-4 right-4 z-30 w-10 h-10 rounded-lg bg-surface border border-border flex items-center justify-center text-text-dim hover:text-text hover:border-primary transition-all shadow-md"
+        onClick={onToggle}
+        title="Open terminal"
+      >
+        <TerminalIcon size={18} />
+      </button>
+    );
+  }
+
+  return (
+    <div
+      className={`bg-[#0d0d0d] border-t border-border flex flex-col ${
+        maximized ? 'fixed inset-0 z-50' : ''
+      }`}
+      style={maximized ? undefined : { height: '280px' }}
+    >
+      {/* Header */}
+      <div className="flex items-center justify-between px-3 py-1.5 bg-[#1a1a1a] border-b border-border shrink-0">
+        <div className="flex items-center gap-2">
+          <TerminalIcon size={14} className="text-primary" />
+          <span className="text-xs font-medium text-text">Terminal</span>
+          <span className={`w-2 h-2 rounded-full ${connected ? 'bg-success' : 'bg-error'}`} />
+          {!connected && projectUuid && (
+            <button
+              className="text-xs text-primary hover:underline"
+              onClick={connect}
+            >
+              Connect
+            </button>
+          )}
+          {!projectUuid && (
+            <span className="text-xs text-text-dim">No project selected</span>
+          )}
+        </div>
+        <div className="flex items-center gap-1">
+          <button
+            className="w-6 h-6 rounded flex items-center justify-center text-text-dim hover:text-text hover:bg-surface-hover transition-colors"
+            onClick={() => setMaximized(!maximized)}
+            title={maximized ? 'Minimize' : 'Maximize'}
+          >
+            {maximized ? <Minimize2 size={12} /> : <Maximize2 size={12} />}
+          </button>
+          <button
+            className="w-6 h-6 rounded flex items-center justify-center text-text-dim hover:text-text hover:bg-surface-hover transition-colors"
+            onClick={onToggle}
+            title="Close terminal"
+          >
+            <X size={12} />
+          </button>
+        </div>
+      </div>
+
+      {/* Output area */}
+      <div
+        ref={outputRef}
+        className="flex-1 overflow-auto px-3 py-2 font-mono text-xs text-green-400 whitespace-pre-wrap"
+        onClick={() => inputRef.current?.focus()}
+      >
+        {output.join('')}
+      </div>
+
+      {/* Input line */}
+      <div className="flex items-center gap-2 px-3 py-1.5 bg-[#1a1a1a] border-t border-border shrink-0">
+        <span className="text-xs text-primary font-mono">$</span>
+        <textarea
+          ref={inputRef}
+          className="flex-1 bg-transparent text-xs text-green-400 font-mono outline-none resize-none"
+          rows={1}
+          value={inputLine}
+          onChange={(e) => setInputLine(e.target.value)}
+          onKeyDown={handleKeyDown}
+          placeholder={connected ? 'Type command...' : 'Not connected'}
+          disabled={!connected}
+          autoFocus
+        />
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/types.ts b/frontend/src/types.ts
index c0f0a14..cc52cbd 100644
--- a/frontend/src/types.ts
+++ b/frontend/src/types.ts
@@ -81,6 +81,30 @@ export interface QuestionsPayload {
   suggest_mode?: string | null;
 }
 
+// ── Projects ────────────────────────────────────────────
+
+export interface Project {
+  id: number;
+  uuid: string;
+  name: string;
+  slug: string;
+  description: string | null;
+  workspace_path: string | null;
+  status: 'active' | 'archived';
+  settings: Record<string, any>;
+  conversation_count?: number;
+  created_at: string;
+  updated_at: string;
+}
+
+export interface FileNode {
+  name: string;
+  path: string;
+  is_dir: boolean;
+  size: number | null;
+  modified: number;
+}
+
 // ── Task Plan & Resources ───────────────────────────────
 
 export interface PlanTask {
diff --git a/site/docs/.vitepress/config.ts b/site/docs/.vitepress/config.ts
index 9295d0d..7e8173e 100644
--- a/site/docs/.vitepress/config.ts
+++ b/site/docs/.vitepress/config.ts
@@ -87,6 +87,7 @@ export default defineConfig({
       "modes",
       "tools",
       "compute",
+      "projects",
       "architecture",
       "agent-harness",
       "api",
@@ -123,6 +124,7 @@ export default defineConfig({
       {
         text: "Usage",
         items: [
+          { text: "Projects & Workspaces", link: "/projects" },
           { text: "Modes (Plan / Execute)", link: "/modes" },
           { text: "Agent Tools", link: "/tools" },
           { text: "Compute Environments", link: "/compute" },
diff --git a/site/docs/projects.md b/site/docs/projects.md
new file mode 100644
index 0000000..2b3689c
--- /dev/null
+++ b/site/docs/projects.md
@@ -0,0 +1,173 @@
+---
+title: Projects & Workspaces - OpenMLR
+description: Persistent project workspaces in OpenMLR. Knowledge graphs, file trees, interactive terminals, and cross-conversation persistence for ML research.
+---
+
+# Projects & Workspaces
+
+Projects are the central organizing unit in OpenMLR. A project provides a persistent workspace where all research artifacts -- papers, code, data, notes, experiment logs, and a knowledge graph -- accumulate across multiple conversations.
+
+## Overview
+
+| Concept | Description |
+|---------|-------------|
+| **Project** | A named research initiative (e.g., "Attention Mechanism Survey") |
+| **Workspace** | The filesystem directory backing a project, stored in a Docker volume |
+| **Knowledge Graph** | A lightweight graph of entities and relationships, persisted as JSON |
+| **Conversation** | A chat session; multiple conversations can belong to one project |
+
+### Key Principle: Workspace vs. Compute
+
+The workspace and compute resource are **decoupled**:
+
+- **Workspace** (persistent, local): Always available. Stores all project files. Survives compute changes and new conversations.
+- **Compute** (swappable): Only used for code execution. Can be local Docker, SSH remote, or Modal cloud. Changing compute does not affect workspace files.
+
+## Creating a Project
+
+1. Click the project selector dropdown in the sidebar
+2. Click "New Project"
+3. Enter a name and optional description
+4. A workspace directory is created automatically
+
+## Workspace Directory Structure
+
+```
+.workspaces/
+  my-project/
+    code/                   # Scripts, notebooks, source code
+    data/                   # Downloaded datasets
+    models/                 # Trained models, checkpoints
+    outputs/                # Experiment results, plots, figures
+    papers/                 # Downloaded and parsed papers (.md)
+    research/
+      searches/             # Saved search results (JSON)
+      notes/                # Agent-generated research notes (.md)
+      citations/            # Bibliography, references
+    logs/
+      tool_failures/        # Timestamped logs of failed tools/APIs
+      compute/              # Compute probe results
+      experiments/          # Experiment execution logs
+    venvs/                  # Python virtual environments
+    .project-meta/
+      project.json          # Project metadata
+      knowledge.json        # Knowledge graph (networkx JSON)
+      state.json            # Cross-conversation state
+      plans/                # Task plans per conversation
+```
+
+## Knowledge Graph
+
+Each project has a lightweight knowledge graph powered by [networkx](https://networkx.org/). The graph stores entities and their relationships, enabling cross-conversation knowledge accumulation.
+
+### Entity Types
+
+| Type | Description | Example |
+|------|-------------|---------|
+| `paper` | Research paper | "Attention Is All You Need" |
+| `concept` | Abstract concept | "Self-attention" |
+| `method` | Technique or algorithm | "Multi-head attention" |
+| `dataset` | Dataset | "WMT 2014 EN-DE" |
+| `finding` | Research finding | "Attention outperforms RNNs on translation" |
+| `question` | Open research question | "Does attention scale to 1B params?" |
+| `experiment` | Experiment run | "BLEU comparison on WMT" |
+| `tool` | Software tool | "PyTorch" |
+| `author` | Researcher | "Vaswani et al." |
+| `code_artifact` | Code implementation | "transformer.py" |
+
+### Relationship Types
+
+| Relationship | Description |
+|-------------|-------------|
+| `cites` | Paper cites another paper |
+| `proposes` | Paper proposes a method |
+| `implements` | Code implements a method |
+| `evaluates_on` | Experiment uses a dataset |
+| `introduces` | Paper introduces a dataset |
+| `relates_to` | General association |
+| `answers` | Finding answers a question |
+| `depends_on` | Method/code depends on another |
+| `uses` | Experiment uses a method |
+| `produces` | Experiment produces a finding |
+| `contradicts` | Finding contradicts another |
+| `extends` | Method extends another |
+
+### Agent Usage
+
+The agent uses the `workspace` tool to interact with the knowledge graph:
+
+```
+workspace knowledge_add entity_id="attention-paper" entity_type="paper" label="Attention Is All You Need"
+workspace knowledge_add entity_id="mha" entity_type="method" label="Multi-Head Attention"
+workspace knowledge_relate source_id="attention-paper" target_id="mha" relationship="proposes"
+workspace knowledge_summary   # Get full graph context
+```
+
+## Cross-Conversation Persistence
+
+When you start a new conversation within a project, the agent can:
+
+1. Read the knowledge graph summary to understand prior work
+2. Check recent tool failures to avoid known issues
+3. Review research notes from previous conversations
+4. Build on existing code and data in the workspace
+
+This is powered by the `.project-meta/state.json` file, which tracks:
+- Key findings across conversations
+- Open research questions
+- Active experiments
+
+## File Tree
+
+The right panel includes a **Files** tab (visible when a project is active) that shows the workspace directory tree. You can:
+
+- Browse directories
+- View file contents
+- See file sizes and types
+
+## Interactive Terminal
+
+A terminal panel at the bottom of the screen provides a real shell connected to the project workspace directory. Use it for:
+
+- Quick file inspections
+- Running one-off commands
+- Interactive debugging
+- Package management
+
+The terminal connects to the workspace directory regardless of which compute resource is selected.
+
+## Docker Volume
+
+Project workspaces are stored in a Docker volume that persists across container rebuilds:
+
+**Development** (`docker-compose.yml`):
+```yaml
+volumes:
+  workspaces:  # Named volume
+```
+
+**Production** (`docker-compose.prod.yml`):
+```yaml
+volumes:
+  - ${OPENMLR_WORKSPACES_PATH:-./.workspaces}:/app/.workspaces
+```
+
+For production, set `OPENMLR_WORKSPACES_PATH` in your `.env` to a bind mount location for easy backup.
+
+## API Reference
+
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/api/projects` | GET | List projects |
+| `/api/projects` | POST | Create project |
+| `/api/projects/:uuid` | GET | Project details |
+| `/api/projects/:uuid` | PUT | Update project |
+| `/api/projects/:uuid` | DELETE | Archive project |
+| `/api/projects/:uuid/conversations` | GET | List project conversations |
+| `/api/projects/:uuid/attach/:conv_uuid` | POST | Attach conversation |
+| `/api/projects/:uuid/detach/:conv_uuid` | POST | Detach conversation |
+| `/api/projects/:uuid/files` | GET | List workspace files |
+| `/api/projects/:uuid/files/:path` | GET | Read file |
+| `/api/projects/:uuid/files/:path` | PUT | Write file |
+| `/api/projects/:uuid/files/:path` | DELETE | Delete file |
+| `ws://host/api/terminal/:uuid` | WS | Interactive terminal |
diff --git a/site/docs/tools.md b/site/docs/tools.md
index 87c555b..9860002 100644
--- a/site/docs/tools.md
+++ b/site/docs/tools.md
@@ -95,10 +95,31 @@ In Plan mode, only read-only filesystem tools are available.
 | **SSH** | Remote machine via SSH |
 | **Modal** | Cloud sandbox via Modal |
 
+## Workspace Tools
+
+| Tool | Description | Plan | Execute |
+|------|-------------|:----:|:-------:|
+| `workspace` | Project workspace operations — knowledge graph, notes, search, failure logs | yes | yes |
+
+### Workspace Operations
+
+| Operation | Description |
+|-----------|-------------|
+| `status` | View workspace summary (file counts, knowledge graph size, recent failures) |
+| `search` | Search files by name or content |
+| `note` | Save a research note with topic and content |
+| `knowledge_add` | Add entity to the knowledge graph |
+| `knowledge_relate` | Add relationship between entities |
+| `knowledge_query` | Search entities in the knowledge graph |
+| `knowledge_summary` | Get full knowledge graph context for the conversation |
+| `recent_failures` | View recent tool/API failure logs |
+
+See [Projects & Workspaces](/projects) for details on the knowledge graph entity and relationship types.
+
 ## Mode Restrictions
 
 Tools are filtered based on the current mode before being sent to the LLM. See [Modes](/modes) for details on the enforcement layers.
 
 In summary:
-- **Plan mode**: `ask_user`, `plan_tool`, read-only filesystem, web search, papers, GitHub
+- **Plan mode**: `ask_user`, `plan_tool`, `workspace`, read-only filesystem, web search, papers, GitHub
 - **Execute mode**: Everything except `ask_user`

From ac1512a972b60d659d88d05e05cf93963fcb7215 Mon Sep 17 00:00:00 2001
From: xprilion <xprilion@gmail.com>
Date: Mon, 27 Apr 2026 18:40:08 +0530
Subject: [PATCH 2/7] Fix eslint pre-commit hook path resolution for new files

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d4c6539..05daa53 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -20,7 +20,7 @@ repos:
     hooks:
       - id: eslint
         name: eslint (frontend)
-        entry: bash -c 'cd frontend && npx eslint --max-warnings 50 "$@"' --
+        entry: bash -c 'args=(); for f; do args+=("${f#frontend/}"); done; cd frontend && npx eslint --max-warnings 50 "${args[@]}"' --
         language: system
         files: ^frontend/src/.*\.(ts|tsx)$
         types: [file]

From fd2d4345200614e5986c061112d2988b172f9b6b Mon Sep 17 00:00:00 2001
From: xprilion <xprilion@gmail.com>
Date: Mon, 27 Apr 2026 18:42:57 +0530
Subject: [PATCH 3/7] Add python-multipart dependency for file upload endpoint

---
 backend/pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/backend/pyproject.toml b/backend/pyproject.toml
index 0ffb822..e91b37f 100644
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -11,6 +11,7 @@ dependencies = [
     "fastapi>=0.115.0",
     "uvicorn[standard]>=0.34.0",
     "sse-starlette>=2.0.0",
+    "python-multipart>=0.0.18",
 
     # Database
     "sqlalchemy[asyncio]>=2.0.0",

From fa0fde3033cad05a60a47d76b12cc14bd33a0cdd Mon Sep 17 00:00:00 2001
From: xprilion <xprilion@gmail.com>
Date: Mon, 27 Apr 2026 18:45:05 +0530
Subject: [PATCH 4/7] Fix dev entrypoint to always sync deps on startup

---
 Dockerfile.dev | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Dockerfile.dev b/Dockerfile.dev
index a7620ae..eeda066 100644
--- a/Dockerfile.dev
+++ b/Dockerfile.dev
@@ -36,9 +36,11 @@ if ! /app/backend/.venv/bin/python -c "import sys" 2>/dev/null; then
     cd /app/backend && uv sync && uv pip install "watchdog[watchmedo]"
     echo "[dev-entrypoint] Dependencies installed."
 else
-    # Venv exists and works, but ensure watchmedo is there too
+    # Venv works — run uv sync to pick up any new/changed dependencies
+    echo "[dev-entrypoint] Syncing dependencies..."
+    cd /app/backend && uv sync --quiet 2>/dev/null || true
+    # Ensure watchmedo is there too
     if ! command -v watchmedo >/dev/null 2>&1; then
-        echo "[dev-entrypoint] Installing watchmedo..."
         cd /app/backend && uv pip install "watchdog[watchmedo]"
     fi
 fi

From 6d31bb273cf0b7a4dcc158ca360fedf1aeb0e241 Mon Sep 17 00:00:00 2001
From: xprilion <xprilion@gmail.com>
Date: Mon, 27 Apr 2026 22:01:52 +0530
Subject: [PATCH 5/7] Add terminal and better model picker

---
 .gitignore                                    |   2 +
 README.md                                     |   6 +-
 backend/benchmark_small_models.py             | 192 +++---
 backend/openmlr/agent/context.py              |  19 +-
 backend/openmlr/agent/llm.py                  | 162 +++--
 backend/openmlr/agent/loop.py                 | 317 +++++-----
 backend/openmlr/agent/types.py                |   8 +
 backend/openmlr/app.py                        |  67 ++-
 backend/openmlr/auth/security.py              |   6 +-
 backend/openmlr/celery_app.py                 |   6 -
 backend/openmlr/compute/capabilities.py       |   2 +
 backend/openmlr/compute/probe.py              |  10 +-
 backend/openmlr/config.py                     |   3 +
 backend/openmlr/keys/manager.py               |  20 +-
 backend/openmlr/models.py                     |  19 +
 backend/openmlr/routes/agent.py               | 100 +++-
 backend/openmlr/routes/compute.py             |  24 +-
 backend/openmlr/routes/keys.py                |  14 +-
 backend/openmlr/routes/projects.py            |  43 +-
 backend/openmlr/routes/settings.py            | 556 +++++++++++++++---
 backend/openmlr/routes/terminal.py            |  38 +-
 backend/openmlr/sandbox/interface.py          |   1 +
 backend/openmlr/sandbox/modal_sandbox.py      |   6 +-
 backend/openmlr/sandbox/ssh.py                |  26 +-
 backend/openmlr/services/event_bus.py         |   2 +
 backend/openmlr/services/job_manager.py       |   2 +
 backend/openmlr/services/session_manager.py   |  45 +-
 backend/openmlr/tasks/agent_tasks.py          |  72 ++-
 backend/openmlr/tasks/compute_tasks.py        |  20 +-
 backend/openmlr/tools/ask_user.py             |  48 +-
 backend/openmlr/tools/compute_tools.py        |  72 ++-
 backend/openmlr/tools/github.py               |  33 +-
 backend/openmlr/tools/http_utils.py           |  30 +-
 backend/openmlr/tools/local.py                | 100 +++-
 backend/openmlr/tools/mcp.py                  |   9 +-
 backend/openmlr/tools/plan.py                 |  92 ++-
 backend/openmlr/tools/research.py             | 211 ++++---
 backend/openmlr/tools/sandbox_tools.py        |  43 +-
 backend/openmlr/tools/search.py               |   5 +-
 backend/openmlr/tools/writing.py              |  29 +-
 backend/tests/test_agent_loop.py              |  89 ++-
 backend/tests/test_app.py                     |   3 +
 backend/tests/test_auth.py                    |   4 +
 backend/tests/test_celery_app.py              |   8 +
 backend/tests/test_compute.py                 | 249 +++++---
 backend/tests/test_config.py                  |   4 +
 backend/tests/test_context.py                 |  32 +-
 backend/tests/test_db_engine.py               |   7 +
 backend/tests/test_db_operations.py           | 134 +++--
 backend/tests/test_dependencies.py            |   3 +
 backend/tests/test_doom_loop.py               |   7 +-
 backend/tests/test_event_bus.py               |  12 +-
 backend/tests/test_job_manager.py             |  14 +-
 backend/tests/test_llm.py                     |  53 +-
 backend/tests/test_models.py                  |  29 +-
 backend/tests/test_prompts.py                 |  11 +-
 backend/tests/test_redis_pubsub.py            |   4 +
 backend/tests/test_routes_health.py           |   1 +
 backend/tests/test_routes_settings.py         |  32 +-
 backend/tests/test_session.py                 |   6 +
 backend/tests/test_session_manager.py         |   4 +-
 backend/tests/test_tool_registry.py           |   4 +-
 backend/tests/test_tools_local.py             |   1 +
 backend/tests/test_tools_papers.py            |   1 +
 backend/tests/test_tools_writing.py           |  69 ++-
 docker-compose.yml                            |  21 +-
 frontend/package.json                         |   3 +
 frontend/src/App.tsx                          |  37 +-
 frontend/src/__tests__/ModelModal.test.tsx    |  91 ++-
 .../src/__tests__/ProvidersSettings.test.tsx  |   2 +
 frontend/src/api.ts                           |   3 +-
 frontend/src/components/ModelModal.tsx        | 355 ++++++++---
 frontend/src/components/OnboardingModal.tsx   | 142 +++--
 .../src/components/ProjectManageModal.tsx     | 194 ++++++
 frontend/src/components/Sidebar.tsx           |  45 +-
 frontend/src/components/Terminal.tsx          | 216 ++++---
 .../components/settings/ProvidersSettings.tsx | 270 ++++++++-
 frontend/src/index.css                        |  26 +-
 frontend/src/types.ts                         |   4 +
 frontend/vite.config.ts                       |  12 +-
 pnpm-lock.yaml                                |  24 +
 site/docs/configuration.md                    |  38 +-
 site/docs/setup.md                            |  28 +-
 83 files changed, 3519 insertions(+), 1233 deletions(-)
 create mode 100644 frontend/src/components/ProjectManageModal.tsx

diff --git a/.gitignore b/.gitignore
index ada3f1a..4d8f9cc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,8 @@ Thumbs.db
 *.swo
 *~
 
+.pnpm-store/
+
 # Environment
 .env
 .env.local
diff --git a/README.md b/README.md
index d1cbc2d..b46cb7a 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,8 @@
 - **Paper writing** — Section-by-section drafting with auto-save. Export to Markdown/LaTeX.
 - **Compute environments** — Execute code on local Docker, SSH remotes, or Modal cloud. Workspace persists independently of compute.
 - **Background jobs** — Celery + Redis. Close the browser, come back later.
-- **Multi-provider LLMs** — OpenAI, Anthropic, OpenRouter, plus local models (Ollama, LM Studio).
+- **Multi-provider LLMs** — OpenAI, Anthropic, OpenRouter, plus local models (Ollama, LM Studio). Add custom providers with OpenAI SDK, Anthropic SDK, OpenRouter, or LiteLLM compatibility.
+- **Model picker** — Browse models grouped by provider with logos, sorted by release date. Recently used models at the top. Fetches live from [models.dev](https://models.dev).
 - **MCP servers** — Connect external tools via the Model Context Protocol.
 - **Onboarding flow** — Guided setup when no LLM provider is configured.
 
@@ -54,6 +55,9 @@ make dev-up         # Start with live reload
 make dev-logs       # Watch logs
 ```
 
+Open `http://localhost:5173` for the UI (Vite HMR).
+`http://localhost:3000` serves the API with Swagger docs at `/docs`.
+
 ### Native
 ```bash
 make install        # Install dependencies
diff --git a/backend/benchmark_small_models.py b/backend/benchmark_small_models.py
index b17815b..da93ddf 100644
--- a/backend/benchmark_small_models.py
+++ b/backend/benchmark_small_models.py
@@ -4,46 +4,22 @@
 Tests performance, memory usage, and inference speed
 """
 
-import torch
-import time
-import psutil
 import gc
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from typing import Dict, List, Tuple
 import json
+import time
+
+import psutil
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
 
 # Test models (all <1B parameters)
 MODELS_TO_TEST = [
-    {
-        "name": "Microsoft DialoGPT-small", 
-        "model_id": "microsoft/DialoGPT-small",
-        "params": "117M"
-    },
-    {
-        "name": "DistilGPT-2", 
-        "model_id": "distilgpt2",
-        "params": "82M"
-    },
-    {
-        "name": "GPT-2 Small", 
-        "model_id": "gpt2",
-        "params": "124M"
-    },
-    {
-        "name": "Qwen2-0.5B", 
-        "model_id": "Qwen/Qwen2-0.5B",
-        "params": "494M"
-    },
-    {
-        "name": "SmolLM-135M", 
-        "model_id": "HuggingFaceTB/SmolLM-135M",
-        "params": "135M"
-    },
-    {
-        "name": "TinyLlama-1.1B", 
-        "model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-        "params": "1.1B"
-    }
+    {"name": "Microsoft DialoGPT-small", "model_id": "microsoft/DialoGPT-small", "params": "117M"},
+    {"name": "DistilGPT-2", "model_id": "distilgpt2", "params": "82M"},
+    {"name": "GPT-2 Small", "model_id": "gpt2", "params": "124M"},
+    {"name": "Qwen2-0.5B", "model_id": "Qwen/Qwen2-0.5B", "params": "494M"},
+    {"name": "SmolLM-135M", "model_id": "HuggingFaceTB/SmolLM-135M", "params": "135M"},
+    {"name": "TinyLlama-1.1B", "model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "params": "1.1B"},
 ]
 
 # Test prompts for different capabilities
@@ -55,199 +31,213 @@
     "What is 15 * 23 + 7?",
 ]
 
+
 def get_memory_usage() -> float:
     """Get current memory usage in GB"""
     return psutil.virtual_memory().used / (1024**3)
 
+
 def get_model_size(model) -> float:
     """Get model size in MB"""
-    param_count = sum(p.numel() for p in model.parameters())
     param_size = sum(p.numel() * p.element_size() for p in model.parameters())
     return param_size / (1024**2)
 
-def benchmark_model(model_info: Dict) -> Dict:
+
+def benchmark_model(model_info: dict) -> dict:
     """Benchmark a single model"""
     print(f"\n🔍 Testing {model_info['name']} ({model_info['params']})")
-    
+
     results = {
-        "name": model_info['name'],
-        "model_id": model_info['model_id'],
-        "params": model_info['params'],
+        "name": model_info["name"],
+        "model_id": model_info["model_id"],
+        "params": model_info["params"],
         "load_time": 0,
         "model_size_mb": 0,
         "memory_usage_gb": 0,
         "inference_times": [],
         "tokens_per_second": [],
         "outputs": {},
-        "errors": []
+        "errors": [],
     }
-    
+
     try:
         # Clear memory
         gc.collect()
         if torch.backends.mps.is_available():
             torch.mps.empty_cache()
-        
+
         initial_memory = get_memory_usage()
-        
+
         # Load model
         start_time = time.time()
         print("  Loading tokenizer and model...")
-        
-        tokenizer = AutoTokenizer.from_pretrained(model_info['model_id'])
+
+        tokenizer = AutoTokenizer.from_pretrained(model_info["model_id"])
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
-            
+
         model = AutoModelForCausalLM.from_pretrained(
-            model_info['model_id'],
+            model_info["model_id"],
             torch_dtype=torch.float16 if torch.backends.mps.is_available() else torch.float32,
             device_map="auto" if torch.backends.mps.is_available() else None,
-            trust_remote_code=True
+            trust_remote_code=True,
         )
-        
+
         # Move to MPS if available
         if torch.backends.mps.is_available():
             model = model.to("mps")
-        
+
         load_time = time.time() - start_time
         results["load_time"] = load_time
         results["model_size_mb"] = get_model_size(model)
         results["memory_usage_gb"] = get_memory_usage() - initial_memory
-        
+
         print(f"  ✅ Loaded in {load_time:.2f}s")
         print(f"  📦 Model size: {results['model_size_mb']:.1f} MB")
         print(f"  🧠 Memory usage: {results['memory_usage_gb']:.2f} GB")
-        
+
         # Test inference
         print("  🚀 Running inference tests...")
-        
+
         for i, prompt in enumerate(TEST_PROMPTS):
             try:
                 # Tokenize
                 inputs = tokenizer(prompt, return_tensors="pt", padding=True)
                 if torch.backends.mps.is_available():
                     inputs = {k: v.to("mps") for k, v in inputs.items()}
-                
+
                 # Generate
                 start_time = time.time()
-                
+
                 with torch.no_grad():
                     outputs = model.generate(
                         **inputs,
                         max_new_tokens=50,
                         do_sample=True,
                         temperature=0.7,
-                        pad_token_id=tokenizer.eos_token_id
+                        pad_token_id=tokenizer.eos_token_id,
                     )
-                
+
                 inference_time = time.time() - start_time
-                
+
                 # Decode output
                 generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-                
+
                 # Calculate tokens/second
-                new_tokens = len(outputs[0]) - len(inputs['input_ids'][0])
+                new_tokens = len(outputs[0]) - len(inputs["input_ids"][0])
                 tokens_per_sec = new_tokens / inference_time if inference_time > 0 else 0
-                
+
                 results["inference_times"].append(inference_time)
                 results["tokens_per_second"].append(tokens_per_sec)
                 results["outputs"][f"prompt_{i}"] = {
                     "prompt": prompt,
                     "output": generated_text,
                     "inference_time": inference_time,
-                    "tokens_per_sec": tokens_per_sec
+                    "tokens_per_sec": tokens_per_sec,
                 }
-                
-                print(f"    Prompt {i+1}: {tokens_per_sec:.1f} tokens/sec")
-                
+
+                print(f"    Prompt {i + 1}: {tokens_per_sec:.1f} tokens/sec")
+
             except Exception as e:
                 error_msg = f"Error on prompt {i}: {str(e)}"
                 results["errors"].append(error_msg)
                 print(f"    ❌ {error_msg}")
-        
+
         # Calculate averages
         if results["inference_times"]:
-            results["avg_inference_time"] = sum(results["inference_times"]) / len(results["inference_times"])
-            results["avg_tokens_per_second"] = sum(results["tokens_per_second"]) / len(results["tokens_per_second"])
-        
+            results["avg_inference_time"] = sum(results["inference_times"]) / len(
+                results["inference_times"]
+            )
+            results["avg_tokens_per_second"] = sum(results["tokens_per_second"]) / len(
+                results["tokens_per_second"]
+            )
+
         print(f"  📊 Average: {results.get('avg_tokens_per_second', 0):.1f} tokens/sec")
-        
+
     except Exception as e:
         error_msg = f"Failed to load {model_info['name']}: {str(e)}"
         results["errors"].append(error_msg)
         print(f"  ❌ {error_msg}")
-    
+
     finally:
         # Cleanup
-        if 'model' in locals():
+        if "model" in locals():
             del model
-        if 'tokenizer' in locals():
+        if "tokenizer" in locals():
             del tokenizer
         gc.collect()
         if torch.backends.mps.is_available():
             torch.mps.empty_cache()
-    
+
     return results
 
+
 def main():
     """Run benchmarks on all models"""
     print("🤖 Small Language Model Benchmark")
-    print("="*50)
-    print(f"💻 Device: Apple M3 with MPS")
+    print("=" * 50)
+    print("💻 Device: Apple M3 with MPS")
     print(f"🧠 Total Memory: {psutil.virtual_memory().total / (1024**3):.1f} GB")
     print(f"🔥 PyTorch: {torch.__version__}")
-    
+
     all_results = []
-    
+
     for model_info in MODELS_TO_TEST:
         results = benchmark_model(model_info)
         all_results.append(results)
-    
+
     # Summary
-    print("\n" + "="*60)
+    print("\n" + "=" * 60)
     print("📊 BENCHMARK SUMMARY")
-    print("="*60)
-    
+    print("=" * 60)
+
     successful_models = [r for r in all_results if not r["errors"]]
-    
+
     if successful_models:
         # Sort by tokens per second
         successful_models.sort(key=lambda x: x.get("avg_tokens_per_second", 0), reverse=True)
-        
+
         print(f"{'Model':<25} {'Params':<8} {'Size (MB)':<10} {'Tokens/sec':<12} {'Load Time':<10}")
         print("-" * 75)
-        
+
         for result in successful_models:
-            print(f"{result['name']:<25} "
-                  f"{result['params']:<8} "
-                  f"{result['model_size_mb']:<10.1f} "
-                  f"{result.get('avg_tokens_per_second', 0):<12.1f} "
-                  f"{result['load_time']:<10.2f}")
-        
+            print(
+                f"{result['name']:<25} "
+                f"{result['params']:<8} "
+                f"{result['model_size_mb']:<10.1f} "
+                f"{result.get('avg_tokens_per_second', 0):<12.1f} "
+                f"{result['load_time']:<10.2f}"
+            )
+
         # Best performer
         best = successful_models[0]
         print(f"\n🏆 Best Performer: {best['name']}")
         print(f"   Speed: {best.get('avg_tokens_per_second', 0):.1f} tokens/sec")
-        print(f"   Efficiency: {best.get('avg_tokens_per_second', 0)/best['model_size_mb']:.3f} tokens/sec/MB")
-        
+        print(
+            f"   Efficiency: {best.get('avg_tokens_per_second', 0) / best['model_size_mb']:.3f} tokens/sec/MB"
+        )
+
         # Show sample output from best model
         if "prompt_0" in best["outputs"]:
             print(f"\n💬 Sample output from {best['name']}:")
             print(f"   Prompt: {best['outputs']['prompt_0']['prompt']}")
             print(f"   Output: {best['outputs']['prompt_0']['output']}")
-    
+
     # Save detailed results
     with open("benchmark_results.json", "w") as f:
         json.dump(all_results, f, indent=2)
-    
-    print(f"\n📄 Detailed results saved to benchmark_results.json")
-    
+
+    print("\n📄 Detailed results saved to benchmark_results.json")
+
     # Failed models
     failed_models = [r for r in all_results if r["errors"]]
     if failed_models:
         print(f"\n❌ Failed to test {len(failed_models)} models:")
         for result in failed_models:
-            print(f"   {result['name']}: {result['errors'][0] if result['errors'] else 'Unknown error'}")
+            print(
+                f"   {result['name']}: {result['errors'][0] if result['errors'] else 'Unknown error'}"
+            )
+
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/backend/openmlr/agent/context.py b/backend/openmlr/agent/context.py
index bae7c9d..5dc2b01 100644
--- a/backend/openmlr/agent/context.py
+++ b/backend/openmlr/agent/context.py
@@ -96,8 +96,7 @@ def _patch_dangling_tool_calls(self) -> None:
             if msg.role == "assistant" and msg.tool_calls:
                 for tc in msg.tool_calls:
                     has_result = any(
-                        m.role == "tool" and m.tool_call_id == tc.id
-                        for m in self.messages[i + 1 :]
+                        m.role == "tool" and m.tool_call_id == tc.id for m in self.messages[i + 1 :]
                     )
                     if not has_result:
                         stub = Message(
@@ -122,13 +121,15 @@ async def compact(self, llm_call) -> str | None:
         ]
         for msg in middle:
             summary_messages.append({"role": msg.role, "content": msg.content})
-        summary_messages.append({
-            "role": "user",
-            "content": (
-                "Provide a concise summary focusing on: key decisions, problems solved, "
-                "current task progress, files/resources created, and what to do next."
-            ),
-        })
+        summary_messages.append(
+            {
+                "role": "user",
+                "content": (
+                    "Provide a concise summary focusing on: key decisions, problems solved, "
+                    "current task progress, files/resources created, and what to do next."
+                ),
+            }
+        )
 
         summary = await llm_call(summary_messages, self.config)
         if summary:
diff --git a/backend/openmlr/agent/llm.py b/backend/openmlr/agent/llm.py
index 03bfc86..3bdc2e0 100644
--- a/backend/openmlr/agent/llm.py
+++ b/backend/openmlr/agent/llm.py
@@ -13,8 +13,24 @@ class LLMProvider:
     """Handles LLM calls across multiple providers with streaming and retry."""
 
     @staticmethod
-    def _get_api_key(model_name: str) -> str | None:
+    def _find_custom_provider(model_name: str, custom_providers: list | None) -> dict | None:
+        """Find matching custom provider for a model name."""
+        if not custom_providers:
+            return None
+        mn = model_name.lower()
+        for cp in custom_providers:
+            pid = cp.get("id", "").lower()
+            if pid and mn.startswith(f"{pid}/"):
+                return cp
+        return None
+
+    @staticmethod
+    def _get_api_key(model_name: str, custom_providers: list | None = None) -> str | None:
         mn = model_name.lower()
+        # Check custom providers first
+        cp = LLMProvider._find_custom_provider(model_name, custom_providers)
+        if cp:
+            return cp.get("api_key")
         if mn.startswith("openai/"):
             return os.environ.get("OPENAI_API_KEY")
         if mn.startswith("anthropic/"):
@@ -33,16 +49,35 @@ def _get_api_key(model_name: str) -> str | None:
         )
 
     @staticmethod
-    def _normalize_model(model_name: str) -> str:
-        for prefix in ("openai/", "openrouter/", "anthropic/", "litellm/", "local/", "ollama/", "lmstudio/", "opencode-go/"):
+    def _normalize_model(model_name: str, custom_providers: list | None = None) -> str:
+        # Check custom provider prefixes first
+        cp = LLMProvider._find_custom_provider(model_name, custom_providers)
+        if cp:
+            pid = cp.get("id", "")
+            if pid and model_name.lower().startswith(f"{pid.lower()}/"):
+                return model_name[len(pid) + 1 :]
+        for prefix in (
+            "openai/",
+            "openrouter/",
+            "anthropic/",
+            "litellm/",
+            "local/",
+            "ollama/",
+            "lmstudio/",
+            "opencode-go/",
+        ):
             if model_name.startswith(prefix):
-                return model_name[len(prefix):]
+                return model_name[len(prefix) :]
         return model_name
 
     @staticmethod
-    def _get_base_url(model_name: str) -> str | None:
+    def _get_base_url(model_name: str, custom_providers: list | None = None) -> str | None:
         """Get the base URL for local/custom OpenAI-compatible APIs."""
         mn = model_name.lower()
+        # Check custom providers first
+        cp = LLMProvider._find_custom_provider(model_name, custom_providers)
+        if cp:
+            return cp.get("api_base", "").rstrip("/")
         if mn.startswith("local/"):
             # Custom base URL from env
             return os.environ.get("LOCAL_API_BASE", "http://localhost:8000/v1")
@@ -78,11 +113,16 @@ def _is_anthropic_model(model_name: str) -> bool:
         return model_name.lower().startswith("anthropic/")
 
     @staticmethod
-    def _uses_anthropic_format(model_name: str) -> bool:
-        """Check if model uses Anthropic message format (native Anthropic or OpenCode Go Anthropic models)."""
+    def _uses_anthropic_format(model_name: str, custom_providers: list | None = None) -> bool:
+        """Check if model uses Anthropic message format (native Anthropic, OpenCode Go Anthropic, or custom provider with anthropic-sdk)."""
         if LLMProvider._is_anthropic_model(model_name):
             return True
-        return LLMProvider._is_opencode_go_anthropic_format(model_name)
+        if LLMProvider._is_opencode_go_anthropic_format(model_name):
+            return True
+        cp = LLMProvider._find_custom_provider(model_name, custom_providers)
+        if cp and cp.get("sdk_type") == "anthropic-sdk":
+            return True
+        return False
 
     # ── Public API ────────────────────────────────────────
 
@@ -135,10 +175,20 @@ async def generate_title(
     @staticmethod
     def _is_retryable(e: Exception) -> bool:
         msg = str(e).lower()
-        return any(x in msg for x in [
-            "429", "rate", "timeout", "server_error", "503", "502",
-            "overloaded", "connection", "capacity",
-        ])
+        return any(
+            x in msg
+            for x in [
+                "429",
+                "rate",
+                "timeout",
+                "server_error",
+                "503",
+                "502",
+                "overloaded",
+                "connection",
+                "capacity",
+            ]
+        )
 
     @staticmethod
     async def _call_with_retry(
@@ -150,7 +200,7 @@ async def _call_with_retry(
         last_error = None
         for attempt in range(max_retries):
             try:
-                if LLMProvider._uses_anthropic_format(config.model_name):
+                if LLMProvider._uses_anthropic_format(config.model_name, config.custom_providers):
                     return await LLMProvider._call_anthropic(messages, config, tools)
                 else:
                     return await LLMProvider._call_openai(messages, config, tools)
@@ -171,7 +221,7 @@ async def _stream_with_retry(
         last_error = None
         for attempt in range(3):
             try:
-                if LLMProvider._uses_anthropic_format(config.model_name):
+                if LLMProvider._uses_anthropic_format(config.model_name, config.custom_providers):
                     async for chunk in LLMProvider._stream_anthropic(messages, config, tools):
                         yield chunk
                 else:
@@ -193,12 +243,15 @@ def _openai_client(config: AgentConfig):
         import logging
 
         from openai import AsyncOpenAI
+
         logger = logging.getLogger(__name__)
 
-        api_key = LLMProvider._get_api_key(config.model_name)
-        base_url = LLMProvider._get_base_url(config.model_name)
+        api_key = LLMProvider._get_api_key(config.model_name, config.custom_providers)
+        base_url = LLMProvider._get_base_url(config.model_name, config.custom_providers)
 
-        logger.debug(f"[LLM] Model: {config.model_name}, Base URL: {base_url}, API key set: {bool(api_key)}")
+        logger.debug(
+            f"[LLM] Model: {config.model_name}, Base URL: {base_url}, API key set: {bool(api_key)}"
+        )
 
         kwargs = {"api_key": api_key}
         if base_url:
@@ -227,7 +280,7 @@ async def _call_openai(
         tools: list[dict] | None,
     ) -> LLMResult:
         client = LLMProvider._openai_client(config)
-        model = LLMProvider._normalize_model(config.model_name)
+        model = LLMProvider._normalize_model(config.model_name, config.custom_providers)
 
         params = {"model": model, "messages": messages, "max_tokens": 4096}
         openai_tools = LLMProvider._openai_tool_param(tools)
@@ -268,7 +321,7 @@ async def _stream_openai(
         tools: list[dict] | None,
     ) -> AsyncGenerator[str | ToolCall | dict, None]:
         client = LLMProvider._openai_client(config)
-        model = LLMProvider._normalize_model(config.model_name)
+        model = LLMProvider._normalize_model(config.model_name, config.custom_providers)
 
         params = {
             "model": model,
@@ -345,15 +398,17 @@ def _anthropic_tool_param(tools: list[dict] | None) -> list[dict] | None:
         for t in tools:
             # Unwrap if in OpenAI format
             func = t.get("function", t)
-            result.append({
-                "name": func["name"],
-                "description": func.get("description", ""),
-                "input_schema": {
-                    "type": "object",
-                    "properties": func.get("parameters", {}).get("properties", {}),
-                    "required": func.get("parameters", {}).get("required", []),
-                },
-            })
+            result.append(
+                {
+                    "name": func["name"],
+                    "description": func.get("description", ""),
+                    "input_schema": {
+                        "type": "object",
+                        "properties": func.get("parameters", {}).get("properties", {}),
+                        "required": func.get("parameters", {}).get("required", []),
+                    },
+                }
+            )
         return result
 
     @staticmethod
@@ -372,30 +427,45 @@ def _to_anthropic_messages(messages: list[dict]) -> tuple[str, list[dict]]:
                     content_blocks.append({"type": "text", "text": m["content"]})
                 for tc in m.get("tool_calls", []):
                     func = tc.get("function", tc)
-                    content_blocks.append({
-                        "type": "tool_use",
-                        "id": tc.get("id", ""),
-                        "name": func.get("name", tc.get("name", "")),
-                        "input": func.get("arguments", tc.get("arguments", {})),
-                    })
-                chat.append({"role": "assistant", "content": content_blocks or m.get("content", "")})
+                    content_blocks.append(
+                        {
+                            "type": "tool_use",
+                            "id": tc.get("id", ""),
+                            "name": func.get("name", tc.get("name", "")),
+                            "input": func.get("arguments", tc.get("arguments", {})),
+                        }
+                    )
+                chat.append(
+                    {"role": "assistant", "content": content_blocks or m.get("content", "")}
+                )
             elif m["role"] == "tool":
-                chat.append({
-                    "role": "user",
-                    "content": [{
-                        "type": "tool_result",
-                        "tool_use_id": m.get("tool_call_id", ""),
-                        "content": m["content"],
-                    }],
-                })
+                chat.append(
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "tool_result",
+                                "tool_use_id": m.get("tool_call_id", ""),
+                                "content": m["content"],
+                            }
+                        ],
+                    }
+                )
         return "\n\n".join(system_parts), chat
 
     @staticmethod
     def _anthropic_client(config: AgentConfig):
-        """Create Anthropic client with appropriate settings for native or OpenCode Go."""
+        """Create Anthropic client with appropriate settings for native, OpenCode Go, or custom provider."""
         from anthropic import AsyncAnthropic
 
         mn = config.model_name.lower()
+        # Check custom providers first
+        cp = LLMProvider._find_custom_provider(config.model_name, config.custom_providers)
+        if cp and cp.get("sdk_type") == "anthropic-sdk":
+            return AsyncAnthropic(
+                api_key=cp.get("api_key"),
+                base_url=cp.get("api_base", "").rstrip("/"),
+            )
         if mn.startswith("opencode-go/"):
             # OpenCode Go uses Anthropic format but different endpoint/key
             return AsyncAnthropic(
@@ -411,7 +481,7 @@ async def _call_anthropic(
         config: AgentConfig,
         tools: list[dict] | None,
     ) -> LLMResult:
-        model = LLMProvider._normalize_model(config.model_name)
+        model = LLMProvider._normalize_model(config.model_name, config.custom_providers)
         client = LLMProvider._anthropic_client(config)
         system_prompt, chat_msgs = LLMProvider._to_anthropic_messages(messages)
 
@@ -453,7 +523,7 @@ async def _stream_anthropic(
         config: AgentConfig,
         tools: list[dict] | None,
     ) -> AsyncGenerator[str | ToolCall | dict, None]:
-        model = LLMProvider._normalize_model(config.model_name)
+        model = LLMProvider._normalize_model(config.model_name, config.custom_providers)
         client = LLMProvider._anthropic_client(config)
         system_prompt, chat_msgs = LLMProvider._to_anthropic_messages(messages)
 
diff --git a/backend/openmlr/agent/loop.py b/backend/openmlr/agent/loop.py
index b7fc698..5128f56 100644
--- a/backend/openmlr/agent/loop.py
+++ b/backend/openmlr/agent/loop.py
@@ -33,7 +33,9 @@ async def submission_loop(session: Session, tool_router) -> None:
             break
 
 
-async def run_agent_turn(session: Session, tool_router, user_message: str, mode: str = None) -> None:
+async def run_agent_turn(
+    session: Session, tool_router, user_message: str, mode: str = None
+) -> None:
     """Direct entry point: run one agent turn."""
     await _run_agent(session, tool_router, user_message, mode)
 
@@ -50,11 +52,10 @@ async def _run_agent(session: Session, tool_router, user_message: str, mode: str
     tool_router.set_mode(effective_mode)
 
     # Inject per-message mode hint (short reinforcement of system prompt rules)
-    mode_hint = (
-        f"[Mode: {effective_mode.upper()}] "
-        + ("Plan only — ask questions, gather context, create plan. No execution."
-           if effective_mode == "plan" else
-           "Execute the plan — do the work, no questions. All tools except ask_user.")
+    mode_hint = f"[Mode: {effective_mode.upper()}] " + (
+        "Plan only — ask questions, gather context, create plan. No execution."
+        if effective_mode == "plan"
+        else "Execute the plan — do the work, no questions. All tools except ask_user."
     )
     session.context_manager.add_message(Message(role="system", content=mode_hint))
 
@@ -70,31 +71,35 @@ async def _run_agent(session: Session, tool_router, user_message: str, mode: str
 
             # Auto-compaction check
             if session.context_manager.needs_compaction():
-                await session.emit(AgentEvent(
-                    event_type="tool_log",
-                    data={"message": "Context nearing limit, compacting..."},
-                ))
+                await session.emit(
+                    AgentEvent(
+                        event_type="tool_log",
+                        data={"message": "Context nearing limit, compacting..."},
+                    )
+                )
                 summary = await session.context_manager.compact(
                     lambda msgs, cfg: _compact_llm_call(msgs, cfg)
                 )
                 if summary:
-                    await session.emit(AgentEvent(
-                        event_type="compacted",
-                        data={"summary": summary[:500]},
-                    ))
+                    await session.emit(
+                        AgentEvent(
+                            event_type="compacted",
+                            data={"summary": summary[:500]},
+                        )
+                    )
 
             # Doom loop detection
             doom_msg = detect_doom_loop(session.context_manager.messages)
             if doom_msg:
-                session.context_manager.add_message(
-                    Message(role="system", content=doom_msg)
-                )
+                session.context_manager.add_message(Message(role="system", content=doom_msg))
 
             # Emit context usage for frontend gauge
-            await session.emit(AgentEvent(
-                event_type="context_usage",
-                data=session.context_manager.get_token_usage(),
-            ))
+            await session.emit(
+                AgentEvent(
+                    event_type="context_usage",
+                    data=session.context_manager.get_token_usage(),
+                )
+            )
 
             # Get tool specs for LLM
             tool_specs = tool_router.get_tool_specs_for_llm()
@@ -114,17 +119,21 @@ async def _run_agent(session: Session, tool_router, user_message: str, mode: str
             # Update token count
             if result.usage:
                 session.context_manager.running_token_count = result.usage.get(
-                    "total_tokens", result.usage.get("input_tokens", 0) + result.usage.get("output_tokens", 0)
+                    "total_tokens",
+                    result.usage.get("input_tokens", 0) + result.usage.get("output_tokens", 0),
                 )
 
             # Handle finish_reason == "length" with truncated tool calls
             if result.finish_reason == "length" and result.tool_calls:
                 # Drop truncated tool calls and hint
                 session.context_manager.add_message(
-                    Message(role="system", content=(
-                        "[System: Your response was truncated due to length. "
-                        "Please be more concise and focus on essential tool calls only.]"
-                    ))
+                    Message(
+                        role="system",
+                        content=(
+                            "[System: Your response was truncated due to length. "
+                            "Please be more concise and focus on essential tool calls only.]"
+                        ),
+                    )
                 )
                 continue
 
@@ -134,18 +143,22 @@ async def _run_agent(session: Session, tool_router, user_message: str, mode: str
                     session.context_manager.add_message(
                         Message(role="assistant", content=result.content)
                     )
-                    await session.emit(AgentEvent(
-                        event_type="assistant_message",
-                        data={"content": result.content},
-                    ))
+                    await session.emit(
+                        AgentEvent(
+                            event_type="assistant_message",
+                            data={"content": result.content},
+                        )
+                    )
                 break
 
             # Add assistant message with tool calls to context
-            session.context_manager.add_message(Message(
-                role="assistant",
-                content=result.content,
-                tool_calls=result.tool_calls,
-            ))
+            session.context_manager.add_message(
+                Message(
+                    role="assistant",
+                    content=result.content,
+                    tool_calls=result.tool_calls,
+                )
+            )
 
             # Check for approval-required tools
             needs_approval = []
@@ -173,22 +186,26 @@ async def _run_agent(session: Session, tool_router, user_message: str, mode: str
                         output, success = res
 
                     # Add tool result to context
-                    session.context_manager.add_message(Message(
-                        role="tool",
-                        content=output,
-                        tool_call_id=tc.id,
-                        name=tc.name,
-                    ))
-
-                    await session.emit(AgentEvent(
-                        event_type="tool_output",
-                        data={
-                            "tool": tc.name,
-                            "tool_call_id": tc.id,
-                            "output": output[:10000],
-                            "success": success,
-                        },
-                    ))
+                    session.context_manager.add_message(
+                        Message(
+                            role="tool",
+                            content=output,
+                            tool_call_id=tc.id,
+                            name=tc.name,
+                        )
+                    )
+
+                    await session.emit(
+                        AgentEvent(
+                            event_type="tool_output",
+                            data={
+                                "tool": tc.name,
+                                "tool_call_id": tc.id,
+                                "output": output[:10000],
+                                "success": success,
+                            },
+                        )
+                    )
 
             # Handle approval-required tools
             if needs_approval:
@@ -196,34 +213,42 @@ async def _run_agent(session: Session, tool_router, user_message: str, mode: str
                     "tool_calls": needs_approval,
                     "tool_router": tool_router,
                 }
-                await session.emit(AgentEvent(
-                    event_type="approval_required",
-                    data={
-                        "tool_calls": [
-                            {
-                                "id": tc.id,
-                                "name": tc.name,
-                                "arguments": tc.arguments,
-                            }
-                            for tc in needs_approval
-                        ],
-                    },
-                ))
+                await session.emit(
+                    AgentEvent(
+                        event_type="approval_required",
+                        data={
+                            "tool_calls": [
+                                {
+                                    "id": tc.id,
+                                    "name": tc.name,
+                                    "arguments": tc.arguments,
+                                }
+                                for tc in needs_approval
+                            ],
+                        },
+                    )
+                )
                 break  # Wait for approval submission
 
     except Exception as e:
-        await session.emit(AgentEvent(
-            event_type="error",
-            data={"error": str(e), "traceback": traceback.format_exc()},
-        ))
+        await session.emit(
+            AgentEvent(
+                event_type="error",
+                data={"error": str(e), "traceback": traceback.format_exc()},
+            )
+        )
     finally:
         session.turn_count += 1
         # Emit final context usage
-        await session.emit(AgentEvent(
-            event_type="context_usage",
-            data=session.context_manager.get_token_usage(),
-        ))
-        await session.emit(AgentEvent(event_type="turn_complete", data={"turns": session.turn_count}))
+        await session.emit(
+            AgentEvent(
+                event_type="context_usage",
+                data=session.context_manager.get_token_usage(),
+            )
+        )
+        await session.emit(
+            AgentEvent(event_type="turn_complete", data={"turns": session.turn_count})
+        )
         await session.emit(AgentEvent(event_type="status", data={"status": "ready"}))
 
 
@@ -243,20 +268,26 @@ async def _stream_llm_call(
 
         if isinstance(chunk, str):
             content_buffer += chunk
-            await session.emit(AgentEvent(
-                event_type="assistant_chunk",
-                data={"chunk": chunk},
-            ))
+            await session.emit(
+                AgentEvent(
+                    event_type="assistant_chunk",
+                    data={"chunk": chunk},
+                )
+            )
         elif isinstance(chunk, ToolCall):
             tool_calls.append(chunk)
-            await session.emit(AgentEvent(
-                event_type="tool_call",
-                data={
-                    "id": chunk.id,
-                    "tool": chunk.name,
-                    "arguments": json.dumps(chunk.arguments) if isinstance(chunk.arguments, dict) else str(chunk.arguments),
-                },
-            ))
+            await session.emit(
+                AgentEvent(
+                    event_type="tool_call",
+                    data={
+                        "id": chunk.id,
+                        "tool": chunk.name,
+                        "arguments": json.dumps(chunk.arguments)
+                        if isinstance(chunk.arguments, dict)
+                        else str(chunk.arguments),
+                    },
+                )
+            )
         elif isinstance(chunk, dict):
             if chunk.get("event") == "usage":
                 usage_data = chunk.get("usage")
@@ -281,21 +312,25 @@ async def _non_stream_llm_call(
     result = await LLMProvider.generate(messages, session.config, tools)
 
     if result.content:
-        await session.emit(AgentEvent(
-            event_type="assistant_chunk",
-            data={"chunk": result.content},
-        ))
+        await session.emit(
+            AgentEvent(
+                event_type="assistant_chunk",
+                data={"chunk": result.content},
+            )
+        )
         await session.emit(AgentEvent(event_type="assistant_stream_end"))
 
     for tc in result.tool_calls:
-        await session.emit(AgentEvent(
-            event_type="tool_call",
-            data={
-                "id": tc.id,
-                "tool": tc.name,
-                "arguments": json.dumps(tc.arguments),
-            },
-        ))
+        await session.emit(
+            AgentEvent(
+                event_type="tool_call",
+                data={
+                    "id": tc.id,
+                    "tool": tc.name,
+                    "arguments": json.dumps(tc.arguments),
+                },
+            )
+        )
 
     return result
 
@@ -306,10 +341,12 @@ async def _execute_tool(
     tool_call: ToolCall,
 ) -> tuple[str, bool]:
     """Execute a single tool call."""
-    await session.emit(AgentEvent(
-        event_type="tool_state_change",
-        data={"tool_call_id": tool_call.id, "state": "running"},
-    ))
+    await session.emit(
+        AgentEvent(
+            event_type="tool_state_change",
+            data={"tool_call_id": tool_call.id, "state": "running"},
+        )
+    )
 
     try:
         output, success = await tool_router.call_tool(
@@ -319,10 +356,12 @@ async def _execute_tool(
     except Exception as e:
         return f"Tool execution error: {str(e)}", False
     finally:
-        await session.emit(AgentEvent(
-            event_type="tool_state_change",
-            data={"tool_call_id": tool_call.id, "state": "done"},
-        ))
+        await session.emit(
+            AgentEvent(
+                event_type="tool_state_change",
+                data={"tool_call_id": tool_call.id, "state": "done"},
+            )
+        )
 
 
 async def _handle_approval(
@@ -345,21 +384,25 @@ async def _handle_approval(
             output = "Tool execution rejected by user."
             success = False
 
-        session.context_manager.add_message(Message(
-            role="tool",
-            content=output,
-            tool_call_id=tc.id,
-            name=tc.name,
-        ))
-        await session.emit(AgentEvent(
-            event_type="tool_output",
-            data={
-                "tool": tc.name,
-                "tool_call_id": tc.id,
-                "output": output[:10000],
-                "success": success,
-            },
-        ))
+        session.context_manager.add_message(
+            Message(
+                role="tool",
+                content=output,
+                tool_call_id=tc.id,
+                name=tc.name,
+            )
+        )
+        await session.emit(
+            AgentEvent(
+                event_type="tool_output",
+                data={
+                    "tool": tc.name,
+                    "tool_call_id": tc.id,
+                    "output": output[:10000],
+                    "success": success,
+                },
+            )
+        )
 
     # Continue the agent loop after approval
     await _run_agent(session, tool_router, "")
@@ -367,28 +410,32 @@ async def _handle_approval(
 
 async def _compact(session: Session) -> None:
     """Compact the context."""
-    summary = await session.context_manager.compact(
-        lambda msgs, cfg: _compact_llm_call(msgs, cfg)
-    )
+    summary = await session.context_manager.compact(lambda msgs, cfg: _compact_llm_call(msgs, cfg))
     if summary:
-        await session.emit(AgentEvent(
-            event_type="compacted",
-            data={"summary": summary[:500]},
-        ))
+        await session.emit(
+            AgentEvent(
+                event_type="compacted",
+                data={"summary": summary[:500]},
+            )
+        )
     else:
-        await session.emit(AgentEvent(
-            event_type="compacted",
-            data={"summary": "Nothing to compact."},
-        ))
+        await session.emit(
+            AgentEvent(
+                event_type="compacted",
+                data={"summary": "Nothing to compact."},
+            )
+        )
 
 
 async def _undo(session: Session) -> None:
     """Undo the last turn."""
     removed = session.context_manager.undo_last_turn()
-    await session.emit(AgentEvent(
-        event_type="undo_complete",
-        data={"removed_messages": removed},
-    ))
+    await session.emit(
+        AgentEvent(
+            event_type="undo_complete",
+            data={"removed_messages": removed},
+        )
+    )
 
 
 async def _compact_llm_call(messages: list[dict], config: AgentConfig) -> str:
diff --git a/backend/openmlr/agent/types.py b/backend/openmlr/agent/types.py
index c0964a8..8409e30 100644
--- a/backend/openmlr/agent/types.py
+++ b/backend/openmlr/agent/types.py
@@ -9,6 +9,7 @@
 @dataclass
 class ToolCall:
     """A tool call requested by the LLM."""
+
     id: str
     name: str
     arguments: dict[str, Any]
@@ -17,6 +18,7 @@ class ToolCall:
 @dataclass
 class ToolSpec:
     """Specification for an agent tool."""
+
     name: str
     description: str
     parameters: dict[str, Any]  # JSON Schema
@@ -27,6 +29,7 @@ class ToolSpec:
 @dataclass
 class Message:
     """A message in the conversation context."""
+
     role: str  # "system", "user", "assistant", "tool"
     content: str
     tool_calls: list[ToolCall] | None = None
@@ -37,16 +40,19 @@ class Message:
 @dataclass(kw_only=True)
 class AgentEvent:
     """Event emitted by the agent loop for SSE streaming."""
+
     event_type: str
     data: dict[str, Any] | None = None
 
     def to_sse(self) -> str:
         import json
+
         return f"data: {json.dumps({'event_type': self.event_type, 'data': self.data})}\n\n"
 
 
 class OpType(str, Enum):
     """Operation types submitted to the agent loop."""
+
     USER_INPUT = "user_input"
     EXEC_APPROVAL = "exec_approval"
     COMPACT = "compact"
@@ -58,6 +64,7 @@ class OpType(str, Enum):
 @dataclass
 class Submission:
     """A submission to the agent loop."""
+
     op: OpType
     data: Any = None
 
@@ -65,6 +72,7 @@ class Submission:
 @dataclass
 class LLMResult:
     """Result of an LLM call."""
+
     content: str
     tool_calls: list[ToolCall]
     finish_reason: str
diff --git a/backend/openmlr/app.py b/backend/openmlr/app.py
index dead080..2d26d02 100644
--- a/backend/openmlr/app.py
+++ b/backend/openmlr/app.py
@@ -50,17 +50,22 @@ async def lifespan(app: FastAPI):
     await engine.dispose()
 
 
+_DEV_MODE = os.environ.get("DEV_MODE", "").lower() in ("1", "true", "yes")
+
 app = FastAPI(
     title="OpenMLR",
     description="ML research intern — reads papers, trains models, writes papers",
     version="0.3.0",
     lifespan=lifespan,
+    docs_url="/docs" if _DEV_MODE else None,
+    redoc_url="/redoc" if _DEV_MODE else None,
 )
 
-# CORS configuration - restrict in production
-_cors_origins = os.environ.get("CORS_ORIGINS", "http://localhost:3000,http://localhost:5173").split(
-    ","
-)
+# CORS configuration
+# In dev mode, allow the Vite dev server origin explicitly.
+# In production, restrict to the same origin (frontend served from same port).
+_default_cors = "http://localhost:3000,http://localhost:5173"
+_cors_origins = os.environ.get("CORS_ORIGINS", _default_cors).split(",")
 _cors_origins = [origin.strip() for origin in _cors_origins if origin.strip()]
 
 app.add_middleware(
@@ -69,6 +74,7 @@ async def lifespan(app: FastAPI):
     allow_credentials=True,
     allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
     allow_headers=["Authorization", "Content-Type"],
+    expose_headers=["Content-Type"],
 )
 
 # ── API routers ──────────────────────────────────────────
@@ -105,25 +111,34 @@ async def global_exception_handler(request: Request, exc: Exception):
     )
 
 
-# ── Static frontend serving ─────────────────────────────
-# Mount only if a production build exists; otherwise Vite dev server handles it.
-if FRONTEND_DIST.is_dir() and (FRONTEND_DIST / "index.html").exists():
-    # Serve hashed asset bundles
-    if (FRONTEND_DIST / "assets").is_dir():
-        app.mount(
-            "/assets",
-            StaticFiles(directory=str(FRONTEND_DIST / "assets")),
-            name="assets",
-        )
-
-    @app.get("/{full_path:path}")
-    async def serve_frontend(full_path: str):
-        """SPA fallback — serve index.html for all non-API routes."""
-        if full_path.startswith("api/"):
-            return JSONResponse(status_code=404, content={"error": "Not found"})
-
-        file_path = FRONTEND_DIST / full_path
-        if file_path.is_file() and file_path.suffix:
-            return FileResponse(str(file_path))
-
-        return FileResponse(str(FRONTEND_DIST / "index.html"))
+# ── Static frontend serving / Dev mode Swagger ──────────
+if _DEV_MODE:
+    # In dev mode: no static frontend — Vite dev server on :5173 handles the UI.
+    # Redirect root to Swagger docs so :3000 is useful for API exploration.
+    from fastapi.responses import RedirectResponse
+
+    @app.get("/", include_in_schema=False)
+    async def root_redirect():
+        return RedirectResponse(url="/docs")
+else:
+    # Production: serve the built frontend SPA from frontend/dist.
+    if FRONTEND_DIST.is_dir() and (FRONTEND_DIST / "index.html").exists():
+        # Serve hashed asset bundles
+        if (FRONTEND_DIST / "assets").is_dir():
+            app.mount(
+                "/assets",
+                StaticFiles(directory=str(FRONTEND_DIST / "assets")),
+                name="assets",
+            )
+
+        @app.get("/{full_path:path}")
+        async def serve_frontend(full_path: str):
+            """SPA fallback — serve index.html for all non-API routes."""
+            if full_path.startswith("api/"):
+                return JSONResponse(status_code=404, content={"error": "Not found"})
+
+            file_path = FRONTEND_DIST / full_path
+            if file_path.is_file() and file_path.suffix:
+                return FileResponse(str(file_path))
+
+            return FileResponse(str(FRONTEND_DIST / "index.html"))
diff --git a/backend/openmlr/auth/security.py b/backend/openmlr/auth/security.py
index 2942710..26acf7f 100644
--- a/backend/openmlr/auth/security.py
+++ b/backend/openmlr/auth/security.py
@@ -16,11 +16,13 @@
     if os.environ.get("ENVIRONMENT", "development") == "production":
         raise RuntimeError(
             "JWT_SECRET_KEY environment variable is required in production. "
-            "Generate one with: python -c \"import secrets; print(secrets.token_urlsafe(32))\""
+            'Generate one with: python -c "import secrets; print(secrets.token_urlsafe(32))"'
         )
     # Generate a random secret for development (changes on restart - fine for dev)
     _jwt_secret = secrets.token_urlsafe(32)
-    logger.warning("JWT_SECRET_KEY not set - using random secret (sessions won't persist across restarts)")
+    logger.warning(
+        "JWT_SECRET_KEY not set - using random secret (sessions won't persist across restarts)"
+    )
 
 SECRET_KEY = _jwt_secret
 ALGORITHM = "HS256"
diff --git a/backend/openmlr/celery_app.py b/backend/openmlr/celery_app.py
index 7190264..781ac48 100644
--- a/backend/openmlr/celery_app.py
+++ b/backend/openmlr/celery_app.py
@@ -24,26 +24,20 @@
     result_serializer="json",
     timezone="UTC",
     enable_utc=True,
-
     # Task execution settings
     task_acks_late=True,  # Acknowledge after completion for reliability
     task_reject_on_worker_lost=True,
     worker_prefetch_multiplier=1,  # Don't prefetch, process one at a time
-
     # Result backend settings
     result_expires=3600,  # Results expire after 1 hour
-
     # Worker settings
     worker_concurrency=4,  # Number of concurrent workers
-
     # Task routing (optional - can route different tasks to different queues)
     task_routes={
         "openmlr.tasks.agent_tasks.process_agent_message": {"queue": "agent"},
     },
-
     # Default queue
     task_default_queue="default",
-
     # Beat schedule for periodic tasks
     beat_schedule={
         "health-check-all-nodes": {
diff --git a/backend/openmlr/compute/capabilities.py b/backend/openmlr/compute/capabilities.py
index 21c36e8..b2b6bb2 100644
--- a/backend/openmlr/compute/capabilities.py
+++ b/backend/openmlr/compute/capabilities.py
@@ -7,6 +7,7 @@
 @dataclass
 class GPUInfo:
     """Information about a GPU."""
+
     model: str = ""
     vram_gb: float = 0.0
     cuda_version: str = ""
@@ -16,6 +17,7 @@ class GPUInfo:
 @dataclass
 class ComputeCapabilities:
     """Comprehensive capabilities of a compute node."""
+
     # Platform
     platform: str = "unknown"
     cpu_cores: int = 0
diff --git a/backend/openmlr/compute/probe.py b/backend/openmlr/compute/probe.py
index c8f9e8f..f519428 100644
--- a/backend/openmlr/compute/probe.py
+++ b/backend/openmlr/compute/probe.py
@@ -16,7 +16,9 @@ async def probe_sandbox(sandbox) -> ComputeCapabilities:
         caps.platform = result.output.strip()
 
     # CPU cores and architecture
-    result = await sandbox.execute("nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo '0'", timeout=5)
+    result = await sandbox.execute(
+        "nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo '0'", timeout=5
+    )
     if result.success:
         try:
             caps.cpu_cores = int(result.output.strip())
@@ -29,8 +31,7 @@ async def probe_sandbox(sandbox) -> ComputeCapabilities:
 
     # RAM (Linux)
     result = await sandbox.execute(
-        "free -g 2>/dev/null | grep Mem | awk '{print $2, $7}' || "
-        "echo '0 0'",
+        "free -g 2>/dev/null | grep Mem | awk '{print $2, $7}' || echo '0 0'",
         timeout=5,
     )
     if result.success:
@@ -129,7 +130,8 @@ async def probe_sandbox(sandbox) -> ComputeCapabilities:
     )
     if result.success:
         caps.installed_packages = [
-            line.strip() for line in result.output.strip().split("\n")
+            line.strip()
+            for line in result.output.strip().split("\n")
             if line.strip() and "==" in line
         ]
 
diff --git a/backend/openmlr/config.py b/backend/openmlr/config.py
index 30d47dd..8ecd975 100644
--- a/backend/openmlr/config.py
+++ b/backend/openmlr/config.py
@@ -26,6 +26,9 @@ class AgentConfig:
     paper_search_budget: int = 25
     require_plan_approval: bool = True
     mcp_servers: dict = field(default_factory=dict)
+    custom_providers: list = field(
+        default_factory=list
+    )  # [{id, name, sdk_type, api_base, api_key, models}]
 
 
 DEFAULT_CONFIG_PATH = Path(__file__).parent.parent / "configs" / "agent_config.yaml"
diff --git a/backend/openmlr/keys/manager.py b/backend/openmlr/keys/manager.py
index d0ca741..acc7296 100644
--- a/backend/openmlr/keys/manager.py
+++ b/backend/openmlr/keys/manager.py
@@ -12,7 +12,9 @@ class KeyManager:
     """Manages SSH private keys stored in a dedicated directory."""
 
     def __init__(self, keys_dir: str | Path = None):
-        self.keys_dir = Path(keys_dir) if keys_dir else Path(__file__).parent.parent.parent.parent / ".keys"
+        self.keys_dir = (
+            Path(keys_dir) if keys_dir else Path(__file__).parent.parent.parent.parent / ".keys"
+        )
         self._ensure_dir()
 
     def _ensure_dir(self) -> None:
@@ -28,11 +30,13 @@ def list_keys(self) -> list[dict]:
             if path.suffix == ".pub":
                 continue
             pub_path = path.with_suffix(path.suffix + ".pub")
-            keys.append({
-                "filename": path.name,
-                "has_public": pub_path.exists(),
-                "size_bytes": path.stat().st_size,
-            })
+            keys.append(
+                {
+                    "filename": path.name,
+                    "has_public": pub_path.exists(),
+                    "size_bytes": path.stat().st_size,
+                }
+            )
         return keys
 
     def key_exists(self, filename: str) -> bool:
@@ -74,7 +78,9 @@ def delete_key(self, filename: str) -> bool:
             deleted = True
         return deleted
 
-    def generate_key_pair(self, filename: str, algorithm: str = "ed25519", comment: str = "") -> tuple[Path, Path]:
+    def generate_key_pair(
+        self, filename: str, algorithm: str = "ed25519", comment: str = ""
+    ) -> tuple[Path, Path]:
         """Generate a new SSH key pair and write to disk."""
         key_path = self.keys_dir / filename
         pub_path = key_path.with_suffix(key_path.suffix + ".pub")
diff --git a/backend/openmlr/models.py b/backend/openmlr/models.py
index d178eea..ac21a15 100644
--- a/backend/openmlr/models.py
+++ b/backend/openmlr/models.py
@@ -7,20 +7,24 @@
 
 # ---- Auth ----
 
+
 class UserRegister(BaseModel):
     username: str = Field(min_length=3, max_length=50)
     password: str = Field(min_length=6, max_length=128)
     display_name: str | None = None
 
+
 class UserLogin(BaseModel):
     username: str
     password: str
 
+
 class TokenResponse(BaseModel):
     access_token: str
     token_type: str = "bearer"
     user: dict
 
+
 class UserInfo(BaseModel):
     id: int
     username: str
@@ -28,13 +32,16 @@ class UserInfo(BaseModel):
     is_active: bool
     created_at: datetime
 
+
 # ---- Conversations ----
 
+
 class ConversationCreate(BaseModel):
     title: str | None = "New conversation"
     model: str | None = None
     mode: str | None = "general"  # "research", "writing", "coding", "general"
 
+
 class ConversationResponse(BaseModel):
     id: int
     uuid: str
@@ -45,6 +52,7 @@ class ConversationResponse(BaseModel):
     created_at: datetime
     updated_at: datetime
 
+
 class MessageResponse(BaseModel):
     id: int
     role: str
@@ -52,24 +60,31 @@ class MessageResponse(BaseModel):
     metadata: dict | None = None
     created_at: datetime
 
+
 class ConversationDetail(BaseModel):
     conversation: ConversationResponse
     messages: list[MessageResponse]
 
+
 # ---- Messaging ----
 
+
 class MessageSend(BaseModel):
     message: str
     mode: str | None = None  # plan, research, write — per-message mode override
 
+
 class ApprovalRequest(BaseModel):
     approvals: dict[str, bool]  # tool_call_id -> approved
 
+
 # ---- Settings ----
 
+
 class SettingUpdate(BaseModel):
     value: Any
 
+
 class ProviderConfig(BaseModel):
     openai_api_key: str | None = None
     anthropic_api_key: str | None = None
@@ -80,13 +95,17 @@ class ProviderConfig(BaseModel):
     modal_token_id: str | None = None
     modal_token_secret: str | None = None
 
+
 # ---- Model Management ----
 
+
 class ModelSwitch(BaseModel):
     model: str
 
+
 # ---- Event (SSE) ----
 
+
 class AgentEvent(BaseModel):
     event_type: str
     data: dict | None = None
diff --git a/backend/openmlr/routes/agent.py b/backend/openmlr/routes/agent.py
index bc17185..cd4c68c 100644
--- a/backend/openmlr/routes/agent.py
+++ b/backend/openmlr/routes/agent.py
@@ -29,14 +29,17 @@ def _bus(request: Request):
 
 # ── SSE Events ───────────────────────────────────────────
 
+
 @router.get("/events")
 async def events(request: Request, token: str = None):
     """SSE event stream. Uses raw StreamingResponse for immediate flushing."""
     if token:
         from ..auth.security import decode_access_token
+
         payload = decode_access_token(token)
         if not payload:
             from fastapi.responses import JSONResponse
+
             return JSONResponse(status_code=401, content={"error": "Invalid token"})
 
     event_bus = _bus(request)
@@ -45,6 +48,7 @@ async def events(request: Request, token: str = None):
 
     async def _stream():
         import json
+
         try:
             while True:
                 try:
@@ -92,6 +96,7 @@ async def _test_stream():
 
 # ── Conversations ────────────────────────────────────────
 
+
 @router.get("/conversations")
 async def list_conversations(
     user: User = Depends(get_current_user),
@@ -109,7 +114,11 @@ async def create_conversation(
     db: AsyncSession = Depends(get_db),
 ):
     conv = await ops.create_conversation(
-        db, user.id, title=body.title, model=body.model, mode=body.mode,
+        db,
+        user.id,
+        title=body.title,
+        model=body.model,
+        mode=body.mode,
     )
     _sm(request).current_conversation_id = conv.id
     return {"conversation": _conv_dict(conv)}
@@ -159,6 +168,7 @@ async def delete_conversation(
     # Cancel any running background jobs for this conversation
     try:
         from ..services.job_manager import get_job_manager
+
         job_manager = get_job_manager()
         active_jobs = await job_manager.get_active_jobs(db, conv.id)
         for job_info in active_jobs:
@@ -194,10 +204,14 @@ async def switch_conversation(
     effective_model = conv.model or user_agent_settings.get("default_model")
 
     active = await sm.get_or_create_session(
-        conv.id, conv.uuid,
-        model=effective_model, mode=conv.mode or "general",
-        existing_messages=msg_dicts, username=user.display_name or user.username,
-        user_id=user.id, db=db,
+        conv.id,
+        conv.uuid,
+        model=effective_model,
+        mode=conv.mode or "general",
+        existing_messages=msg_dicts,
+        username=user.display_name or user.username,
+        user_id=user.id,
+        db=db,
     )
     sm.current_conversation_id = conv.id
 
@@ -209,6 +223,7 @@ async def switch_conversation(
 
 # ── Per-Conversation Compute ─────────────────────────────
 
+
 @router.get("/conversations/{uuid}/compute")
 async def get_conversation_compute(
     uuid: str,
@@ -331,6 +346,7 @@ async def clear_conversation_compute(
 
 # ── Messaging ────────────────────────────────────────────
 
+
 @router.post("/message")
 async def send_message(
     body: MessageSend,
@@ -379,9 +395,7 @@ async def send_message(
         # Title generation (still async in web process for now)
         if user_count in (1, 3):
             msg_dicts = await _load_messages(db, conv.id)
-            asyncio.create_task(
-                _auto_title(sm, event_bus, db, conv.id, conv.uuid, msg_dicts)
-            )
+            asyncio.create_task(_auto_title(sm, event_bus, db, conv.id, conv.uuid, msg_dicts))
 
         return {"ok": True, "job_id": job.job_id if job else None, "background": True}
 
@@ -401,10 +415,14 @@ async def send_message(
         history = None
 
     active = await sm.get_or_create_session(
-        conv.id, conv.uuid,
-        model=effective_model, mode=conv.mode or "general",
-        existing_messages=history, username=user.display_name or user.username,
-        user_id=user.id, db=db,
+        conv.id,
+        conv.uuid,
+        model=effective_model,
+        mode=conv.mode or "general",
+        existing_messages=history,
+        username=user.display_name or user.username,
+        user_id=user.id,
+        db=db,
     )
 
     # Wire DB persistence once per session
@@ -416,15 +434,14 @@ async def send_message(
 
     if user_count in (1, 3):
         msg_dicts = await _load_messages(db, conv.id)
-        asyncio.create_task(
-            _auto_title(sm, event_bus, db, conv.id, conv.uuid, msg_dicts)
-        )
+        asyncio.create_task(_auto_title(sm, event_bus, db, conv.id, conv.uuid, msg_dicts))
 
     return {"ok": True, "background": False}
 
 
 # ── Agent controls ───────────────────────────────────────
 
+
 @router.get("/jobs/{job_id}")
 async def get_job_status(
     job_id: str,
@@ -433,6 +450,7 @@ async def get_job_status(
 ):
     """Get the status of a background job."""
     from ..services.job_manager import get_job_manager
+
     job_manager = get_job_manager()
     status = await job_manager.get_job_status(db, job_id)
     if not status:
@@ -448,6 +466,7 @@ async def get_conversation_jobs(
 ):
     """Get all active jobs for a conversation."""
     from ..services.job_manager import get_job_manager
+
     conv = await _get_conv_or_404(db, uuid, user.id)
     job_manager = get_job_manager()
     jobs = await job_manager.get_active_jobs(db, conv.id)
@@ -462,10 +481,13 @@ async def cancel_job(
 ):
     """Cancel a queued job."""
     from ..services.job_manager import get_job_manager
+
     job_manager = get_job_manager()
     success = await job_manager.cancel_job(db, job_id)
     if not success:
-        raise HTTPException(status_code=400, detail="Cannot cancel job (may be running or completed)")
+        raise HTTPException(
+            status_code=400, detail="Cannot cancel job (may be running or completed)"
+        )
     return {"ok": True}
 
 
@@ -476,6 +498,7 @@ async def get_report(
 ):
     """Get a completion report by ID."""
     from ..tools.plan import get_report_content
+
     content = await get_report_content(report_id)
     if not content:
         raise HTTPException(status_code=404, detail="Report not found")
@@ -494,7 +517,7 @@ async def submit_answers(
 
     # Try in-process session first (inline mode)
     active = _sm(request).get_current_session()
-    if active and hasattr(active.session, 'pending_answers') and active.session.pending_answers:
+    if active and hasattr(active.session, "pending_answers") and active.session.pending_answers:
         if not active.session.pending_answers.done():
             active.session.pending_answers.set_result(answers)
             return {"ok": True}
@@ -502,6 +525,7 @@ async def submit_answers(
     # Publish to Redis for background job workers
     try:
         from ..services.redis_pubsub import publish_answers
+
         sm = _sm(request)
         if sm.current_conversation_id:
             await publish_answers(sm.current_conversation_id, answers)
@@ -529,11 +553,13 @@ async def interrupt(
     conv_id = sm.current_conversation_id
     if conv_id:
         from ..services.redis_pubsub import publish_interrupt
+
         await publish_interrupt(conv_id)
 
         # Also try to revoke active Celery tasks for this conversation
         try:
             from ..services.job_manager import USE_BACKGROUND_JOBS, get_job_manager
+
             if USE_BACKGROUND_JOBS:
                 job_manager = get_job_manager()
                 active_jobs = await job_manager.get_active_jobs(db, conv_id)
@@ -561,9 +587,8 @@ async def submit_approval(
     active = _sm(request).get_current_session()
     if active and active.session.pending_approval:
         from ..agent.loop import _handle_approval
-        asyncio.create_task(
-            _handle_approval(active.session, active.tool_router, body.approvals)
-        )
+
+        asyncio.create_task(_handle_approval(active.session, active.tool_router, body.approvals))
     return {"ok": True}
 
 
@@ -572,6 +597,7 @@ async def undo(request: Request, user: User = Depends(get_current_user)):
     active = _sm(request).get_current_session()
     if active:
         from ..agent.loop import _undo
+
         await _undo(active.session)
     return {"ok": True}
 
@@ -581,6 +607,7 @@ async def compact(request: Request, user: User = Depends(get_current_user)):
     active = _sm(request).get_current_session()
     if active:
         from ..agent.loop import _compact
+
         await _compact(active.session)
     return {"ok": True}
 
@@ -600,14 +627,24 @@ async def switch_model(
     # Persist as the user's sticky default model
     await ops.set_user_setting(db, user.id, "agent", "default_model", body.model)
 
-    await _bus(request).broadcast(
-        AgentEvent(event_type="model_info", data={"model": body.model})
-    )
+    # Update recent models list (max 10, deduplicated, most recent first)
+    agent_settings = await ops.get_user_agent_settings(db, user.id)
+    recent = agent_settings.get("recent_models", [])
+    if not isinstance(recent, list):
+        recent = []
+    # Remove existing entry and prepend
+    recent = [m for m in recent if m != body.model]
+    recent.insert(0, body.model)
+    recent = recent[:10]
+    await ops.set_user_setting(db, user.id, "agent", "recent_models", recent)
+
+    await _bus(request).broadcast(AgentEvent(event_type="model_info", data={"model": body.model}))
     return {"ok": True}
 
 
 # ── Helpers ──────────────────────────────────────────────
 
+
 async def _get_conv_or_404(db, uuid: str, user_id: int):
     conv = await ops.get_conversation_by_uuid(db, uuid)
     if not conv or conv.user_id != user_id:
@@ -649,13 +686,20 @@ async def _persist(event: AgentEvent):
             if event.event_type == "assistant_message" and event.data and event.data.get("content"):
                 await ops.add_message(db, conv_id, "assistant", event.data["content"])
             elif event.event_type == "tool_output" and event.data:
-                await ops.add_message(db, conv_id, "tool", event.data.get("output", ""), {
-                    "tool": event.data.get("tool"),
-                    "tool_call_id": event.data.get("tool_call_id"),
-                    "success": event.data.get("success"),
-                })
+                await ops.add_message(
+                    db,
+                    conv_id,
+                    "tool",
+                    event.data.get("output", ""),
+                    {
+                        "tool": event.data.get("tool"),
+                        "tool_call_id": event.data.get("tool_call_id"),
+                        "success": event.data.get("success"),
+                    },
+                )
         except Exception:
             pass
+
     active.session.on_event(_persist)
 
 
diff --git a/backend/openmlr/routes/compute.py b/backend/openmlr/routes/compute.py
index fbc2c25..5a3995a 100644
--- a/backend/openmlr/routes/compute.py
+++ b/backend/openmlr/routes/compute.py
@@ -53,6 +53,7 @@ def _node_dict(node) -> dict:
 
 # ── Compute Nodes ────────────────────────────────────────
 
+
 @router.get("/nodes")
 async def list_nodes(
     user: User = Depends(get_current_user),
@@ -97,8 +98,13 @@ async def create_node(
         await ops.set_default_compute_node(db, user.id, None)
 
     node = await ops.create_compute_node(
-        db, user.id, name, node_type, config,
-        is_default=is_default, priority=priority,
+        db,
+        user.id,
+        name,
+        node_type,
+        config,
+        is_default=is_default,
+        priority=priority,
     )
 
     return {"node": _node_dict(node)}
@@ -268,7 +274,9 @@ async def probe_node(
 
         # Update node in database
         await ops.update_compute_node(
-            db, node.id, user.id,
+            db,
+            node.id,
+            user.id,
             capabilities=caps.to_dict(),
             health_status="online",
             last_probed_at=datetime.now(UTC),
@@ -283,7 +291,9 @@ async def probe_node(
 
     except Exception as e:
         await ops.update_compute_node(
-            db, node.id, user.id,
+            db,
+            node.id,
+            user.id,
             health_status="offline",
         )
         return {"ok": False, "error": str(e)}
@@ -303,6 +313,7 @@ async def _test_ssh_node(node):
     password = config.get("password")
 
     try:
+
         def _do_test():
             client = paramiko.SSHClient()
             # Use WarningPolicy to get host key without auto-adding
@@ -353,7 +364,9 @@ def _do_test():
         return {
             "ok": result["connected"],
             "host_key_fingerprint": result.get("host_key_fingerprint"),
-            "message": "Connected successfully" if result["connected"] else f"Unexpected output: {result['output']}",
+            "message": "Connected successfully"
+            if result["connected"]
+            else f"Unexpected output: {result['output']}",
         }
 
     except Exception as e:
@@ -381,6 +394,7 @@ async def _test_modal_node(node):
     """Test Modal connectivity."""
     try:
         import importlib.util
+
         if importlib.util.find_spec("modal") is not None:
             return {"ok": True, "message": "Modal client available"}
         return {"ok": False, "error": "Modal client not installed"}
diff --git a/backend/openmlr/routes/keys.py b/backend/openmlr/routes/keys.py
index 866bc96..13d9385 100644
--- a/backend/openmlr/routes/keys.py
+++ b/backend/openmlr/routes/keys.py
@@ -52,6 +52,7 @@ async def create_key(
 
     # Prevent path traversal in filename
     from pathlib import Path as PyPath
+
     safe_filename = PyPath(filename).name
     if not safe_filename or safe_filename.startswith("."):
         raise HTTPException(status_code=400, detail="Invalid filename")
@@ -86,8 +87,13 @@ async def create_key(
         raise HTTPException(status_code=400, detail="action must be 'upload' or 'generate'")
 
     key = await ops.create_ssh_key(
-        db, user.id, safe_filename, meta["fingerprint"],
-        meta["algorithm"], meta["public_key"], body.get("comment"),
+        db,
+        user.id,
+        safe_filename,
+        meta["fingerprint"],
+        meta["algorithm"],
+        meta["public_key"],
+        body.get("comment"),
     )
 
     return {
@@ -112,6 +118,7 @@ async def delete_key(
     """Delete an SSH key and its public counterpart."""
     # Sanitize filename to prevent path traversal
     from pathlib import Path as PyPath
+
     safe_filename = PyPath(filename).name
     if not safe_filename or safe_filename != filename or safe_filename.startswith("."):
         raise HTTPException(status_code=400, detail="Invalid filename")
@@ -124,8 +131,7 @@ async def delete_key(
     if dependent_nodes:
         node_names = ", ".join(n.name for n in dependent_nodes)
         raise HTTPException(
-            status_code=409,
-            detail=f"Cannot delete key: used by compute nodes: {node_names}"
+            status_code=409, detail=f"Cannot delete key: used by compute nodes: {node_names}"
         )
 
     deleted_db = await ops.delete_ssh_key(db, user.id, filename)
diff --git a/backend/openmlr/routes/projects.py b/backend/openmlr/routes/projects.py
index ae37f64..c3857e0 100644
--- a/backend/openmlr/routes/projects.py
+++ b/backend/openmlr/routes/projects.py
@@ -53,6 +53,30 @@ def _get_workspaces_root() -> Path:
 
 WORKSPACES_ROOT = _get_workspaces_root()
 
+DEFAULT_PROJECT_SLUG = "_default"
+DEFAULT_PROJECT_NAME = "All Conversations"
+
+
+async def get_or_create_default_project(db, user_id: int):
+    """Get (or create) the user's default project. Every user has exactly one."""
+    existing = await ops.get_project_by_slug(db, user_id, DEFAULT_PROJECT_SLUG)
+    if existing:
+        return existing
+
+    workspace_path = str(WORKSPACES_ROOT / f"user-{user_id}" / DEFAULT_PROJECT_SLUG)
+    _ensure_workspace(workspace_path)
+
+    project = await ops.create_project(
+        db,
+        user_id,
+        DEFAULT_PROJECT_NAME,
+        DEFAULT_PROJECT_SLUG,
+        description="Default workspace for all conversations",
+        workspace_path=workspace_path,
+        settings={"is_default": True},
+    )
+    return project
+
 
 def _slugify(name: str) -> str:
     """Generate a filesystem-safe slug from a project name."""
@@ -152,12 +176,17 @@ async def list_projects(
     user: User = Depends(get_current_user),
     db: AsyncSession = Depends(get_db),
 ):
-    """List all projects for the current user."""
+    """List all projects for the current user. Ensures default project exists."""
+    # Ensure default project exists
+    await get_or_create_default_project(db, user.id)
+
     projects = await ops.get_user_projects(db, user.id, include_archived=include_archived)
     result = []
     for p in projects:
         convs = await ops.get_project_conversations(db, p.id)
-        result.append(_project_dict(p, conv_count=len(convs)))
+        d = _project_dict(p, conv_count=len(convs))
+        d["is_default"] = bool(p.settings and p.settings.get("is_default"))
+        result.append(d)
     return {"projects": result}
 
 
@@ -252,14 +281,22 @@ async def update_project(
     if not project:
         raise HTTPException(status_code=404, detail="Project not found")
 
+    is_default = project.settings and project.settings.get("is_default")
+
     body = await request.json()
     updates = {}
     if "name" in body:
+        if is_default:
+            raise HTTPException(status_code=400, detail="Cannot rename the default project")
         updates["name"] = body["name"].strip()
     if "description" in body:
         updates["description"] = body["description"].strip() or None
     if "settings" in body:
-        updates["settings"] = body["settings"]
+        # Prevent removing the is_default flag
+        new_settings = body["settings"]
+        if is_default and isinstance(new_settings, dict):
+            new_settings["is_default"] = True
+        updates["settings"] = new_settings
 
     updated = await ops.update_project(db, project.id, user.id, **updates)
     return {"project": _project_dict(updated)}
diff --git a/backend/openmlr/routes/settings.py b/backend/openmlr/routes/settings.py
index 673f10e..a0a813c 100644
--- a/backend/openmlr/routes/settings.py
+++ b/backend/openmlr/routes/settings.py
@@ -1,6 +1,7 @@
 """Settings routes — user settings, provider config, model management."""
 
 import os
+from datetime import UTC, datetime
 
 import httpx
 from fastapi import APIRouter, Depends, HTTPException, Request
@@ -16,6 +17,7 @@
 
 # ---- User Settings ----
 
+
 @router.get("/settings")
 async def get_all_settings(
     user: User = Depends(get_current_user),
@@ -108,8 +110,56 @@ async def delete_setting(
     return {"ok": True}
 
 
+# ---- Helpers for configured status ----
+
+
+def _is_provider_configured(provider_id: str, provider_settings: dict) -> bool:
+    """Check if a standard provider is configured via env or user setting."""
+    env_map = {
+        "openai": "OPENAI_API_KEY",
+        "anthropic": "ANTHROPIC_API_KEY",
+        "openrouter": "OPENROUTER_API_KEY",
+        "opencode-go": "OPENCODE_GO_API_KEY",
+        "ollama": "OLLAMA_API_BASE",
+        "lmstudio": "LMSTUDIO_API_BASE",
+        "brave": "BRAVE_API_KEY",
+        "github": "GITHUB_TOKEN",
+        "semantic_scholar": "SEMANTIC_SCHOLAR_API_KEY",
+        "openalex": "OPENALEX_API_KEY",
+        "modal": "MODAL_TOKEN_ID",
+    }
+    env_key = env_map.get(provider_id)
+    if env_key and os.environ.get(env_key):
+        return True
+    setting_key = {
+        "openai": "openai_api_key",
+        "anthropic": "anthropic_api_key",
+        "openrouter": "openrouter_api_key",
+        "opencode-go": "opencode_go_api_key",
+        "ollama": "ollama_api_base",
+        "lmstudio": "lmstudio_api_base",
+        "brave": "brave_api_key",
+        "github": "github_token",
+        "semantic_scholar": "semantic_scholar_api_key",
+        "openalex": "openalex_api_key",
+        "modal": "modal_token_id",
+    }.get(provider_id)
+    if setting_key and provider_settings.get(setting_key):
+        return True
+    return False
+
+
+def _get_custom_providers(provider_settings: dict) -> list[dict]:
+    """Extract custom providers from user settings."""
+    raw = provider_settings.get("custom_providers")
+    if isinstance(raw, list):
+        return raw
+    return []
+
+
 # ---- Providers ----
 
+
 @router.get("/providers")
 async def list_providers(
     user: User = Depends(get_current_user),
@@ -124,7 +174,7 @@ async def list_providers(
             "id": "openai",
             "name": "OpenAI",
             "key_env": "OPENAI_API_KEY",
-            "configured": bool(os.environ.get("OPENAI_API_KEY") or provider_settings.get("openai_api_key")),
+            "configured": _is_provider_configured("openai", provider_settings),
             "categories": ["models"],
             "docs_url": "https://platform.openai.com/docs/api-reference",
         },
@@ -132,7 +182,7 @@ async def list_providers(
             "id": "anthropic",
             "name": "Anthropic",
             "key_env": "ANTHROPIC_API_KEY",
-            "configured": bool(os.environ.get("ANTHROPIC_API_KEY") or provider_settings.get("anthropic_api_key")),
+            "configured": _is_provider_configured("anthropic", provider_settings),
             "categories": ["models"],
             "docs_url": "https://docs.anthropic.com/en/api/getting-started",
         },
@@ -140,7 +190,7 @@ async def list_providers(
             "id": "openrouter",
             "name": "OpenRouter",
             "key_env": "OPENROUTER_API_KEY",
-            "configured": bool(os.environ.get("OPENROUTER_API_KEY") or provider_settings.get("openrouter_api_key")),
+            "configured": _is_provider_configured("openrouter", provider_settings),
             "categories": ["models"],
             "docs_url": "https://openrouter.ai/docs",
         },
@@ -148,7 +198,7 @@ async def list_providers(
             "id": "opencode-go",
             "name": "OpenCode Go",
             "key_env": "OPENCODE_GO_API_KEY",
-            "configured": bool(os.environ.get("OPENCODE_GO_API_KEY") or provider_settings.get("opencode_go_api_key")),
+            "configured": _is_provider_configured("opencode-go", provider_settings),
             "categories": ["models"],
             "docs_url": "https://go.opencode.ai/docs",
         },
@@ -156,7 +206,7 @@ async def list_providers(
             "id": "ollama",
             "name": "Ollama (Local)",
             "key_env": "OLLAMA_API_BASE",
-            "configured": bool(os.environ.get("OLLAMA_API_BASE") or provider_settings.get("ollama_api_base")),
+            "configured": _is_provider_configured("ollama", provider_settings),
             "categories": ["models"],
             "docs_url": "https://ollama.com/docs",
         },
@@ -164,7 +214,7 @@ async def list_providers(
             "id": "lmstudio",
             "name": "LM Studio (Local)",
             "key_env": "LMSTUDIO_API_BASE",
-            "configured": bool(os.environ.get("LMSTUDIO_API_BASE") or provider_settings.get("lmstudio_api_base")),
+            "configured": _is_provider_configured("lmstudio", provider_settings),
             "categories": ["models"],
             "docs_url": "https://lmstudio.ai/docs",
         },
@@ -172,7 +222,7 @@ async def list_providers(
             "id": "brave",
             "name": "Brave Search",
             "key_env": "BRAVE_API_KEY",
-            "configured": bool(os.environ.get("BRAVE_API_KEY") or provider_settings.get("brave_api_key")),
+            "configured": _is_provider_configured("brave", provider_settings),
             "categories": ["search"],
             "docs_url": "https://brave.com/search/api/",
         },
@@ -180,7 +230,7 @@ async def list_providers(
             "id": "github",
             "name": "GitHub",
             "key_env": "GITHUB_TOKEN",
-            "configured": bool(os.environ.get("GITHUB_TOKEN") or provider_settings.get("github_token")),
+            "configured": _is_provider_configured("github", provider_settings),
             "categories": ["papers", "others"],
             "docs_url": "https://docs.github.com/en/rest",
         },
@@ -188,7 +238,7 @@ async def list_providers(
             "id": "semantic_scholar",
             "name": "Semantic Scholar",
             "key_env": "SEMANTIC_SCHOLAR_API_KEY",
-            "configured": bool(os.environ.get("SEMANTIC_SCHOLAR_API_KEY") or provider_settings.get("semantic_scholar_api_key")),
+            "configured": _is_provider_configured("semantic_scholar", provider_settings),
             "categories": ["papers"],
             "docs_url": "https://api.semanticscholar.org/api-docs/",
         },
@@ -196,7 +246,7 @@ async def list_providers(
             "id": "openalex",
             "name": "OpenAlex",
             "key_env": "OPENALEX_API_KEY",
-            "configured": bool(os.environ.get("OPENALEX_API_KEY") or provider_settings.get("openalex_api_key")),
+            "configured": _is_provider_configured("openalex", provider_settings),
             "categories": ["papers"],
             "docs_url": "https://docs.openalex.org/",
         },
@@ -204,16 +254,34 @@ async def list_providers(
             "id": "modal",
             "name": "Modal",
             "key_env": "MODAL_TOKEN_ID",
-            "configured": bool(os.environ.get("MODAL_TOKEN_ID") or provider_settings.get("modal_token_id")),
+            "configured": _is_provider_configured("modal", provider_settings),
             "categories": ["compute"],
             "docs_url": "https://modal.com/docs",
         },
     ]
+
+    # Add custom providers
+    for cp in _get_custom_providers(provider_settings):
+        providers.append(
+            {
+                "id": cp.get("id", ""),
+                "name": cp.get("name", cp.get("id", "")),
+                "key_env": f"{cp.get('id', '').upper()}_API_KEY",
+                "configured": bool(cp.get("api_key") and cp.get("api_base")),
+                "categories": ["models"],
+                "docs_url": cp.get("api_base", ""),
+                "is_custom": True,
+                "sdk_type": cp.get("sdk_type", "openai-sdk"),
+                "api_base": cp.get("api_base", ""),
+            }
+        )
+
     return {"providers": providers}
 
 
 # ---- App Status (model, config) ----
 
+
 @router.get("/status")
 async def get_status(
     request: Request,
@@ -231,19 +299,25 @@ async def get_status(
 
     # Only need onboarding if no providers are configured at all
     # (i.e., auto-detection also failed to find anything useful)
-    has_any_provider = any([
-        os.environ.get("ANTHROPIC_API_KEY"),
-        os.environ.get("OPENAI_API_KEY"),
-        os.environ.get("OPENROUTER_API_KEY"),
-        os.environ.get("OPENCODE_GO_API_KEY"),
-        os.environ.get("OLLAMA_API_BASE"),
-        os.environ.get("LMSTUDIO_API_BASE"),
-    ])
+    has_any_provider = any(
+        [
+            os.environ.get("ANTHROPIC_API_KEY"),
+            os.environ.get("OPENAI_API_KEY"),
+            os.environ.get("OPENROUTER_API_KEY"),
+            os.environ.get("OPENCODE_GO_API_KEY"),
+            os.environ.get("OLLAMA_API_BASE"),
+            os.environ.get("LMSTUDIO_API_BASE"),
+        ]
+    )
     # Check user-configured providers too
     if not has_any_provider:
         user_providers = await ops.get_all_settings(db, user.id, category="providers")
         prov = user_providers.get("providers", {})
         has_any_provider = any(v for v in prov.values() if v)
+        # Also check custom providers
+        if not has_any_provider:
+            custom = _get_custom_providers(prov)
+            has_any_provider = any(bool(cp.get("api_key") and cp.get("api_base")) for cp in custom)
 
     return {
         "model": effective_model,
@@ -255,75 +329,417 @@ async def get_status(
 
 # ---- Models ----
 
-@router.get("/models")
-async def list_models():
-    """List available LLM models."""
+
+# Standard fallback models (used when models.dev is unreachable)
+_FALLBACK_MODELS = [
+    {"id": "openai/gpt-4o", "name": "GPT-4o", "provider": "openai", "release_date": "2024-05-13"},
+    {
+        "id": "openai/gpt-4o-mini",
+        "name": "GPT-4o Mini",
+        "provider": "openai",
+        "release_date": "2024-07-18",
+    },
+    {"id": "openai/o3-mini", "name": "o3-mini", "provider": "openai", "release_date": "2025-01-31"},
+    {
+        "id": "anthropic/claude-sonnet-4-20250514",
+        "name": "Claude Sonnet 4",
+        "provider": "anthropic",
+        "release_date": "2025-05-14",
+    },
+    {
+        "id": "anthropic/claude-opus-4-20250514",
+        "name": "Claude Opus 4",
+        "provider": "anthropic",
+        "release_date": "2025-05-14",
+    },
+    {
+        "id": "anthropic/claude-haiku-4-20250514",
+        "name": "Claude Haiku 4",
+        "provider": "anthropic",
+        "release_date": "2025-05-14",
+    },
+    {
+        "id": "openrouter/openai/gpt-4o",
+        "name": "OpenRouter GPT-4o",
+        "provider": "openrouter",
+        "release_date": "2024-05-13",
+    },
+    {
+        "id": "openrouter/anthropic/claude-sonnet-4",
+        "name": "OR Claude Sonnet",
+        "provider": "openrouter",
+        "release_date": "2025-05-14",
+    },
+    {
+        "id": "openrouter/google/gemini-2.5-pro",
+        "name": "OR Gemini 2.5 Pro",
+        "provider": "openrouter",
+        "release_date": "2025-03-25",
+    },
+    {
+        "id": "openrouter/google/gemini-2.5-flash",
+        "name": "OR Gemini 2.5 Flash",
+        "provider": "openrouter",
+        "release_date": "2025-04-15",
+    },
+]
+
+# OpenCode Go models
+_OPENCODE_GO_MODELS = [
+    {
+        "id": "opencode-go/glm-5.1",
+        "name": "GLM-5.1",
+        "provider": "opencode-go",
+        "release_date": "2025-04-01",
+    },
+    {
+        "id": "opencode-go/glm-5",
+        "name": "GLM-5",
+        "provider": "opencode-go",
+        "release_date": "2025-03-01",
+    },
+    {
+        "id": "opencode-go/kimi-k2.6",
+        "name": "Kimi K2.6",
+        "provider": "opencode-go",
+        "release_date": "2025-04-20",
+    },
+    {
+        "id": "opencode-go/kimi-k2.5",
+        "name": "Kimi K2.5",
+        "provider": "opencode-go",
+        "release_date": "2025-03-15",
+    },
+    {
+        "id": "opencode-go/deepseek-v4-pro",
+        "name": "DeepSeek V4 Pro",
+        "provider": "opencode-go",
+        "release_date": "2025-04-10",
+    },
+    {
+        "id": "opencode-go/deepseek-v4-flash",
+        "name": "DeepSeek V4 Flash",
+        "provider": "opencode-go",
+        "release_date": "2025-04-10",
+    },
+    {
+        "id": "opencode-go/mimo-v2.5-pro",
+        "name": "MiMo-V2.5-Pro",
+        "provider": "opencode-go",
+        "release_date": "2025-03-20",
+    },
+    {
+        "id": "opencode-go/mimo-v2.5",
+        "name": "MiMo-V2.5",
+        "provider": "opencode-go",
+        "release_date": "2025-03-20",
+    },
+    {
+        "id": "opencode-go/minimax-m2.7",
+        "name": "MiniMax M2.7",
+        "provider": "opencode-go",
+        "release_date": "2025-04-05",
+    },
+    {
+        "id": "opencode-go/minimax-m2.5",
+        "name": "MiniMax M2.5",
+        "provider": "opencode-go",
+        "release_date": "2025-03-10",
+    },
+    {
+        "id": "opencode-go/qwen3.6-plus",
+        "name": "Qwen3.6 Plus",
+        "provider": "opencode-go",
+        "release_date": "2025-04-15",
+    },
+    {
+        "id": "opencode-go/qwen3.5-plus",
+        "name": "Qwen3.5 Plus",
+        "provider": "opencode-go",
+        "release_date": "2025-03-01",
+    },
+]
+
+
+async def _fetch_models_dev() -> list[dict]:
+    """Fetch models from models.dev and return flat list with provider info."""
     models = []
     try:
         async with httpx.AsyncClient() as client:
-            resp = await client.get("https://models.dev/api/v1/models", timeout=10)
+            resp = await client.get("https://models.dev/api.json", timeout=15)
             if resp.status_code == 200:
                 data = resp.json()
-                if isinstance(data.get("models"), list):
-                    models = [
-                        {
-                            "id": m.get("id", m.get("modelId", "")),
-                            "name": m.get("name", m.get("id", "")),
-                            "provider": m.get("provider", "unknown"),
-                        }
-                        for m in data["models"]
-                    ]
+                for provider_id, provider_data in data.items():
+                    if not isinstance(provider_data, dict):
+                        continue
+                    provider_models = provider_data.get("models", {})
+                    if isinstance(provider_models, dict):
+                        for model_id, model_info in provider_models.items():
+                            if not isinstance(model_info, dict):
+                                continue
+                            release_date = model_info.get("release_date", "")
+                            # Skip entries without a release date (not real models)
+                            if not release_date:
+                                continue
+                            models.append(
+                                {
+                                    "id": f"{provider_id}/{model_id}",
+                                    "name": model_info.get("name", model_id),
+                                    "provider": provider_id,
+                                    "release_date": release_date,
+                                }
+                            )
     except Exception:
         pass
+    return models
 
-    if not models:
-        models = [
-            {"id": "openai/gpt-4o", "name": "GPT-4o", "provider": "openai"},
-            {"id": "openai/gpt-4o-mini", "name": "GPT-4o Mini", "provider": "openai"},
-            {"id": "openai/o3-mini", "name": "o3-mini", "provider": "openai"},
-            {"id": "anthropic/claude-sonnet-4", "name": "Claude Sonnet 4", "provider": "anthropic"},
-            {"id": "anthropic/claude-opus-4", "name": "Claude Opus 4", "provider": "anthropic"},
-            {"id": "anthropic/claude-haiku-4", "name": "Claude Haiku 4", "provider": "anthropic"},
-            {"id": "openrouter/openai/gpt-4o", "name": "OpenRouter GPT-4o", "provider": "openrouter"},
-            {"id": "openrouter/anthropic/claude-sonnet-4", "name": "OR Claude Sonnet", "provider": "openrouter"},
-            {"id": "openrouter/google/gemini-2.5-pro", "name": "OR Gemini 2.5 Pro", "provider": "openrouter"},
-            {"id": "openrouter/google/gemini-2.5-flash", "name": "OR Gemini 2.5 Flash", "provider": "openrouter"},
-        ]
 
-    # Add OpenCode Go models
-    opencode_go_models = [
-        {"id": "opencode-go/glm-5.1", "name": "GLM-5.1", "provider": "opencode-go"},
-        {"id": "opencode-go/glm-5", "name": "GLM-5", "provider": "opencode-go"},
-        {"id": "opencode-go/kimi-k2.6", "name": "Kimi K2.6", "provider": "opencode-go"},
-        {"id": "opencode-go/kimi-k2.5", "name": "Kimi K2.5", "provider": "opencode-go"},
-        {"id": "opencode-go/deepseek-v4-pro", "name": "DeepSeek V4 Pro", "provider": "opencode-go"},
-        {"id": "opencode-go/deepseek-v4-flash", "name": "DeepSeek V4 Flash", "provider": "opencode-go"},
-        {"id": "opencode-go/mimo-v2.5-pro", "name": "MiMo-V2.5-Pro", "provider": "opencode-go"},
-        {"id": "opencode-go/mimo-v2.5", "name": "MiMo-V2.5", "provider": "opencode-go"},
-        {"id": "opencode-go/minimax-m2.7", "name": "MiniMax M2.7", "provider": "opencode-go"},
-        {"id": "opencode-go/minimax-m2.5", "name": "MiniMax M2.5", "provider": "opencode-go"},
-        {"id": "opencode-go/qwen3.6-plus", "name": "Qwen3.6 Plus", "provider": "opencode-go"},
-        {"id": "opencode-go/qwen3.5-plus", "name": "Qwen3.5 Plus", "provider": "opencode-go"},
-    ]
-    models.extend(opencode_go_models)
+@router.get("/models")
+async def list_models(
+    request: Request,
+    provider: str | None = None,
+    user: User = Depends(get_current_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """List available LLM models from configured providers.
+
+    If `provider` is specified, only return models for that provider.
+    Otherwise return models from all configured providers.
+    """
+    # Get user's provider settings to check what's configured
+    user_settings = await ops.get_all_settings(db, user.id, category="providers")
+    provider_settings = user_settings.get("providers", {})
+
+    # Determine which providers are configured
+    configured_providers = set()
+    for pid in ["openai", "anthropic", "openrouter", "opencode-go", "ollama", "lmstudio"]:
+        if _is_provider_configured(pid, provider_settings):
+            configured_providers.add(pid)
+
+    # Add custom providers
+    custom_providers = _get_custom_providers(provider_settings)
+    for cp in custom_providers:
+        if cp.get("api_key") and cp.get("api_base"):
+            configured_providers.add(cp.get("id", ""))
+
+    # If a specific provider is requested, only use that one
+    target_providers = {provider} if provider else configured_providers
+
+    # Fetch from models.dev
+    all_models = await _fetch_models_dev()
+
+    # Filter to target providers
+    models = [m for m in all_models if m.get("provider") in target_providers]
+
+    # If models.dev failed or returned nothing, use fallbacks
+    if not models:
+        fallback = []
+        for m in _FALLBACK_MODELS:
+            if not provider or m["provider"] == provider:
+                if m["provider"] in configured_providers:
+                    fallback.append(m)
+        for m in _OPENCODE_GO_MODELS:
+            if not provider or m["provider"] == provider:
+                if m["provider"] in configured_providers:
+                    fallback.append(m)
+        models = fallback
+    else:
+        # Add fallback models for providers not in models.dev response
+        # or for when models.dev is missing some providers
+        existing_ids = {m["id"] for m in models}
+        for m in _FALLBACK_MODELS:
+            if m["id"] not in existing_ids:
+                if (not provider or m["provider"] == provider) and m[
+                    "provider"
+                ] in configured_providers:
+                    models.append(m)
+        for m in _OPENCODE_GO_MODELS:
+            if m["id"] not in existing_ids:
+                if (not provider or m["provider"] == provider) and m[
+                    "provider"
+                ] in configured_providers:
+                    models.append(m)
 
     # Add local model placeholders if configured
-    if os.environ.get("OLLAMA_API_BASE"):
+    if "ollama" in configured_providers and (not provider or provider == "ollama"):
         ollama_model = os.environ.get("OLLAMA_MODEL", "llama3.1")
-        models.append({"id": f"ollama/{ollama_model}", "name": f"Ollama: {ollama_model}", "provider": "ollama"})
-        # Common Ollama models
-        for m in ["llama3.1", "llama3.2", "qwen2.5-coder", "codellama", "deepseek-coder-v2", "mistral"]:
-            if m != ollama_model:
-                models.append({"id": f"ollama/{m}", "name": f"Ollama: {m}", "provider": "ollama"})
+        if not any(m["id"] == f"ollama/{ollama_model}" for m in models):
+            models.append(
+                {
+                    "id": f"ollama/{ollama_model}",
+                    "name": f"Ollama: {ollama_model}",
+                    "provider": "ollama",
+                    "release_date": "",
+                }
+            )
+        for m in [
+            "llama3.1",
+            "llama3.2",
+            "qwen2.5-coder",
+            "codellama",
+            "deepseek-coder-v2",
+            "mistral",
+        ]:
+            if not any(x["id"] == f"ollama/{m}" for x in models):
+                models.append(
+                    {
+                        "id": f"ollama/{m}",
+                        "name": f"Ollama: {m}",
+                        "provider": "ollama",
+                        "release_date": "",
+                    }
+                )
+
+    if "lmstudio" in configured_providers and (not provider or provider == "lmstudio"):
+        if not any(m["id"] == "lmstudio/default" for m in models):
+            models.append(
+                {
+                    "id": "lmstudio/default",
+                    "name": "LM Studio (default)",
+                    "provider": "lmstudio",
+                    "release_date": "",
+                }
+            )
+
+    # Add custom provider cached models
+    for cp in custom_providers:
+        cp_id = cp.get("id", "")
+        if cp_id not in configured_providers:
+            continue
+        if provider and cp_id != provider:
+            continue
+        for cm in cp.get("models", []):
+            model_entry = {
+                "id": f"{cp_id}/{cm.get('id', cm.get('modelId', ''))}",
+                "name": cm.get("name", cm.get("id", "")),
+                "provider": cp_id,
+                "release_date": cm.get("release_date", ""),
+            }
+            if not any(m["id"] == model_entry["id"] for m in models):
+                models.append(model_entry)
+
+    # Get recent models
+    agent_settings = await ops.get_user_agent_settings(db, user.id)
+    recent_model_ids = agent_settings.get("recent_models", [])
+    if not isinstance(recent_model_ids, list):
+        recent_model_ids = []
+
+    # Build recent model entries (preserve order, most recent first)
+    recent_models = []
+    seen_recent = set()
+    for mid in recent_model_ids[:10]:
+        if mid in seen_recent:
+            continue
+        seen_recent.add(mid)
+        # Find model info from the full list
+        model_info = None
+        for m in models:
+            if m["id"] == mid:
+                model_info = m
+                break
+        if model_info:
+            recent_models.append(model_info)
+
+    # Sort models by release_date descending within each provider
+    def _sort_key(m):
+        rd = m.get("release_date", "")
+        # Use a very old date for models without release_date so they sort to bottom
+        return (m.get("provider", ""), rd if rd else "1900-01-01")
+
+    models.sort(key=_sort_key, reverse=True)
+    # Actually reverse sort needs to be per-provider. Let's do a stable sort.
+    # Sort by provider first, then by release_date descending
+    models.sort(
+        key=lambda m: (m.get("provider", ""), m.get("release_date", "1900-01-01")), reverse=False
+    )
+    # Hmm, this won't work for reverse per-field. Let's use two sorts.
+    models.sort(key=lambda m: m.get("release_date", "1900-01-01"), reverse=True)
+    models.sort(key=lambda m: m.get("provider", ""))
+
+    return {
+        "models": models,
+        "recent_models": recent_models[:5],
+    }
 
-    if os.environ.get("LMSTUDIO_API_BASE"):
-        models.append({"id": "lmstudio/default", "name": "LM Studio (default)", "provider": "lmstudio"})
 
-    return {"models": models}
+# ---- Custom Provider Model Fetching ----
+
+
+@router.post("/providers/{provider_id}/fetch-models")
+async def fetch_custom_provider_models(
+    provider_id: str,
+    request: Request,
+    user: User = Depends(get_current_user),
+    db: AsyncSession = Depends(get_db),
+):
+    """Fetch models from a custom provider's API and cache them."""
+    user_settings = await ops.get_all_settings(db, user.id, category="providers")
+    provider_settings = user_settings.get("providers", {})
+
+    custom_providers = _get_custom_providers(provider_settings)
+    cp = None
+    for c in custom_providers:
+        if c.get("id") == provider_id:
+            cp = c
+            break
+
+    if not cp:
+        raise HTTPException(status_code=404, detail="Custom provider not found")
+
+    sdk_type = cp.get("sdk_type", "openai-sdk")
+    api_base = cp.get("api_base", "").rstrip("/")
+    api_key = cp.get("api_key", "")
+
+    if not api_base or not api_key:
+        raise HTTPException(status_code=400, detail="Provider missing api_base or api_key")
+
+    fetched_models = []
+
+    if sdk_type in ("openai-sdk", "openrouter", "litellm"):
+        # OpenAI-compatible /models endpoint
+        try:
+            async with httpx.AsyncClient() as client:
+                headers = {"Authorization": f"Bearer {api_key}"}
+                resp = await client.get(f"{api_base}/models", headers=headers, timeout=15)
+                if resp.status_code == 200:
+                    data = resp.json()
+                    for m in data.get("data", []):
+                        if isinstance(m, dict):
+                            fetched_models.append(
+                                {
+                                    "id": m.get("id", ""),
+                                    "name": m.get("id", ""),  # OpenAI /models usually only has id
+                                    "release_date": "",
+                                }
+                            )
+                else:
+                    raise HTTPException(
+                        status_code=502, detail=f"Provider returned {resp.status_code}"
+                    )
+        except httpx.RequestError as e:
+            raise HTTPException(status_code=502, detail=f"Failed to reach provider: {str(e)}")
+    elif sdk_type == "anthropic-sdk":
+        # Anthropic doesn't expose a models list API
+        # Return empty list — user will need to add models manually
+        pass
+    else:
+        raise HTTPException(status_code=400, detail=f"Unsupported sdk_type: {sdk_type}")
+
+    # Update the custom provider with fetched models
+    for i, c in enumerate(custom_providers):
+        if c.get("id") == provider_id:
+            custom_providers[i]["models"] = fetched_models
+            custom_providers[i]["last_fetched_at"] = datetime.now(UTC).isoformat()
+            break
+
+    await ops.set_user_setting(db, user.id, "providers", "custom_providers", custom_providers)
+
+    return {"models": fetched_models}
 
 
 # ---- Config (legacy .env writing — for backward compat) ----
 
+
 @router.post("/config")
 async def save_config(
     request: Request,
diff --git a/backend/openmlr/routes/terminal.py b/backend/openmlr/routes/terminal.py
index dbd2028..cba8ba4 100644
--- a/backend/openmlr/routes/terminal.py
+++ b/backend/openmlr/routes/terminal.py
@@ -126,14 +126,17 @@ async def _cleanup_process(pid: int, master_fd: int) -> None:
 
 
 @router.websocket("/api/terminal/{project_uuid}")
+@router.websocket("/api/terminal")
 async def terminal_websocket(
     websocket: WebSocket,
-    project_uuid: str,
+    project_uuid: str | None = None,
     token: str = Query(default=None),
 ):
     """WebSocket endpoint for interactive terminal sessions.
 
     Spawns a PTY process in the project workspace directory.
+    If no project_uuid is provided, uses the user's default project workspace.
+
     Messages from the client are written to the PTY stdin.
     Output from the PTY is sent back to the client.
 
@@ -150,7 +153,14 @@ async def terminal_websocket(
 
     # Look up the project to get the workspace path
     async with get_async_session() as db:
-        project = await ops.get_project_by_uuid(db, project_uuid, user.id)
+        if project_uuid:
+            project = await ops.get_project_by_uuid(db, project_uuid, user.id)
+        else:
+            # Use default project
+            from .projects import get_or_create_default_project
+
+            project = await get_or_create_default_project(db, user.id)
+
         if not project or not project.workspace_path:
             await websocket.close(code=4004, reason="Project not found")
             return
@@ -200,9 +210,25 @@ async def terminal_websocket(
     # Close slave fd in parent — only the child uses it
     os.close(slave_fd)
 
-    # Set master fd to non-blocking
-    flags = fcntl.fcntl(master_fd, fcntl.F_GETFL)
-    fcntl.fcntl(master_fd, fcntl.F_SETFL, flags | os.O_NONBLOCK)
+    # Keep master fd blocking — reads happen in a thread pool executor
+    # so blocking is fine and avoids premature EAGAIN exits.
+
+    def _blocking_read(fd: int) -> bytes:
+        """Read from PTY fd. Blocks in the thread pool until data is available."""
+        import select as _select
+
+        while True:
+            # Wait for data with a 0.5s timeout so the thread can be interrupted
+            ready, _, _ = _select.select([fd], [], [], 0.5)
+            if ready:
+                return os.read(fd, 4096)
+            # Check if the child process is still alive
+            try:
+                pid_result, _ = os.waitpid(proc.pid, os.WNOHANG)
+                if pid_result != 0:
+                    return b""  # Child exited
+            except ChildProcessError:
+                return b""
 
     async def read_pty():
         """Read from PTY and send to WebSocket."""
@@ -210,7 +236,7 @@ async def read_pty():
         try:
             while True:
                 try:
-                    data = await loop.run_in_executor(None, lambda: os.read(master_fd, 4096))
+                    data = await loop.run_in_executor(None, lambda: _blocking_read(master_fd))
                     if not data:
                         break
                     await websocket.send_bytes(data)
diff --git a/backend/openmlr/sandbox/interface.py b/backend/openmlr/sandbox/interface.py
index 19b904a..f1415e4 100644
--- a/backend/openmlr/sandbox/interface.py
+++ b/backend/openmlr/sandbox/interface.py
@@ -9,6 +9,7 @@
 @dataclass
 class ExecutionResult:
     """Result of a command execution."""
+
     output: str
     success: bool
     exit_code: int = 0
diff --git a/backend/openmlr/sandbox/modal_sandbox.py b/backend/openmlr/sandbox/modal_sandbox.py
index ad3277c..d161bbb 100644
--- a/backend/openmlr/sandbox/modal_sandbox.py
+++ b/backend/openmlr/sandbox/modal_sandbox.py
@@ -25,9 +25,7 @@ async def create(self, config: dict) -> "ModalSandbox":
         try:
             import modal
         except ImportError:
-            raise RuntimeError(
-                "Modal is not installed. Install with: pip install modal-client"
-            )
+            raise RuntimeError("Modal is not installed. Install with: pip install modal-client")
 
         def _do_create():
             app = modal.App.lookup("openmlr-sandbox", create_if_missing=True)
@@ -150,8 +148,10 @@ async def probe_environment(self):
     async def destroy(self) -> None:
         if self._sandbox:
             try:
+
                 def _terminate():
                     self._sandbox.terminate()
+
                 await asyncio.to_thread(_terminate)
             except Exception:
                 pass
diff --git a/backend/openmlr/sandbox/ssh.py b/backend/openmlr/sandbox/ssh.py
index 7ef3ee1..165af51 100644
--- a/backend/openmlr/sandbox/ssh.py
+++ b/backend/openmlr/sandbox/ssh.py
@@ -20,9 +20,12 @@ def __init__(self, expected_fingerprint: str | None = None):
 
     def missing_host_key(self, client, hostname, key):
         import paramiko
+
         actual = key.get_fingerprint().hex()
         self.actual_fingerprint = actual
-        if self.expected and actual != self.expected.lower().replace(":", "").replace("sha256:", ""):
+        if self.expected and actual != self.expected.lower().replace(":", "").replace(
+            "sha256:", ""
+        ):
             raise paramiko.SSHException(
                 f"Host key mismatch for {hostname}: expected {self.expected}, got {actual}"
             )
@@ -39,7 +42,7 @@ class SSHConnectionPool:
     _instance: "SSHConnectionPool | None" = None
 
     def __init__(self, ttl_seconds: int = 300):
-        self._connections: dict[str, tuple] = {}   # key -> (client, sftp, fingerprint)
+        self._connections: dict[str, tuple] = {}  # key -> (client, sftp, fingerprint)
         self._last_used: dict[str, float] = {}
         self._ttl = ttl_seconds
 
@@ -135,6 +138,7 @@ async def create(self, config: dict) -> "SSHSandbox":
 
         if self.key_filename:
             from ..keys import KeyManager
+
             self._key_manager = KeyManager()
 
         if not self.host:
@@ -154,7 +158,9 @@ async def _ensure_remote_workspace(self, remote_path: str) -> None:
         self._ensure_connected()
 
         def _do_mkdir():
-            subdirs = " ".join(f"{remote_path}/{d}" for d in ["data", "models", "code", "outputs", ".openmlr-meta"])
+            subdirs = " ".join(
+                f"{remote_path}/{d}" for d in ["data", "models", "code", "outputs", ".openmlr-meta"]
+            )
             cmd = f"mkdir -p {subdirs}"
             stdin, stdout, stderr = self._client.exec_command(cmd, timeout=10)
             exit_code = stdout.channel.recv_exit_status()
@@ -180,6 +186,7 @@ async def _connect(self):
 
         def _do_connect():
             import paramiko
+
             client = paramiko.SSHClient()
 
             if self.host_key_fingerprint:
@@ -223,13 +230,19 @@ def _do_connect():
         pool.put(self.host, self.port, self.username, self._client, self._sftp, actual_fp)
 
     def _ensure_connected(self):
-        if not self._client or not self._client.get_transport() or not self._client.get_transport().is_active():
+        if (
+            not self._client
+            or not self._client.get_transport()
+            or not self._client.get_transport().is_active()
+        ):
             raise RuntimeError("SSH connection lost. Recreate the sandbox.")
 
     async def execute(self, command: str, timeout: int = 120) -> ExecutionResult:
         return await self.execute_stream(command, timeout)
 
-    async def execute_stream(self, command: str, timeout: int = 120, on_chunk=None) -> ExecutionResult:
+    async def execute_stream(
+        self, command: str, timeout: int = 120, on_chunk=None
+    ) -> ExecutionResult:
         self._ensure_connected()
         start = time.monotonic()
 
@@ -297,6 +310,7 @@ async def read_file(self, path: str) -> str:
 
         def _do_read():
             import io
+
             buf = io.BytesIO()
             self._sftp.getfo(path, buf)
             buf.seek(0)
@@ -309,6 +323,7 @@ async def write_file(self, path: str, content: str) -> bool:
 
         def _do_write():
             import io
+
             buf = io.BytesIO(content.encode("utf-8"))
             self._sftp.putfo(buf, path)
 
@@ -344,6 +359,7 @@ def _do_list():
                 result = []
                 for e in sorted(entries, key=lambda x: x.filename):
                     import stat
+
                     suffix = "/" if stat.S_ISDIR(e.st_mode) else ""
                     result.append(f"{e.filename}{suffix}")
                 return result
diff --git a/backend/openmlr/services/event_bus.py b/backend/openmlr/services/event_bus.py
index db39c7c..92b4d72 100644
--- a/backend/openmlr/services/event_bus.py
+++ b/backend/openmlr/services/event_bus.py
@@ -65,6 +65,7 @@ async def broadcast(self, event: AgentEvent | dict) -> None:
         if USE_REDIS:
             try:
                 from .redis_pubsub import publish_event
+
                 await publish_event(AgentEvent(event_type=et, data=data.get("data")))
             except Exception as e:
                 logger.warning(f"Failed to publish to Redis: {e}")
@@ -87,6 +88,7 @@ async def start_redis_bridge(self) -> None:
 
         async def _listen():
             from .redis_pubsub import subscribe_events
+
             logger.info("Redis subscription loop started")
             try:
                 async for event in subscribe_events():
diff --git a/backend/openmlr/services/job_manager.py b/backend/openmlr/services/job_manager.py
index 0a2a729..53e49da 100644
--- a/backend/openmlr/services/job_manager.py
+++ b/backend/openmlr/services/job_manager.py
@@ -25,6 +25,7 @@ def celery_app(self):
         """Lazy load Celery app to avoid import issues."""
         if self._celery_app is None and USE_BACKGROUND_JOBS:
             from ..celery_app import celery_app
+
             self._celery_app = celery_app
         return self._celery_app
 
@@ -57,6 +58,7 @@ async def create_job(
 
         # Enqueue Celery task
         from ..tasks.agent_tasks import process_agent_message
+
         process_agent_message.delay(
             job_id=job.job_id,
             conversation_id=conversation_id,
diff --git a/backend/openmlr/services/session_manager.py b/backend/openmlr/services/session_manager.py
index 825d779..5a48cb3 100644
--- a/backend/openmlr/services/session_manager.py
+++ b/backend/openmlr/services/session_manager.py
@@ -65,7 +65,7 @@ async def get_or_create_session(
         existing_messages: list[dict] = None,
         username: str = "user",
         user_id: int | None = None,
-        db = None,
+        db=None,
     ) -> ActiveSession:
         """Get existing session or create a new one with system prompt."""
         existing = self.sessions.get(conversation_id)
@@ -89,6 +89,21 @@ async def get_or_create_session(
         # Import here (not at module level) to avoid circular imports
         from ..db import operations as ops
 
+        # Load custom providers from user settings
+        if user_id and db:
+            try:
+                from ..routes.settings import _get_custom_providers
+
+                user_settings = await ops.get_all_settings(db, user_id, category="providers")
+                provider_settings = user_settings.get("providers", {})
+                custom_providers = _get_custom_providers(provider_settings)
+                # Filter to only fully configured custom providers
+                config.custom_providers = [
+                    cp for cp in custom_providers if cp.get("api_key") and cp.get("api_base")
+                ]
+            except Exception as e:
+                log.warning(f"Session {conversation_id}: failed to load custom providers - {e}")
+
         # Determine effective compute node
         effective_node = None
         if user_id and db:
@@ -98,19 +113,24 @@ async def get_or_create_session(
                 if conv and conv.extra:
                     override_node_id = conv.extra.get("compute_node_id")
                     if override_node_id:
-                        effective_node = await ops.get_compute_node_by_id(db, override_node_id, user_id)
+                        effective_node = await ops.get_compute_node_by_id(
+                            db, override_node_id, user_id
+                        )
 
                 # Fall back to user default
                 if not effective_node:
                     effective_node = await ops.get_default_compute_node(db, user_id)
 
                 if effective_node:
-                    log.info(f"Session {conversation_id}: using compute node '{effective_node.name}' ({effective_node.type})")
+                    log.info(
+                        f"Session {conversation_id}: using compute node '{effective_node.name}' ({effective_node.type})"
+                    )
             except Exception as e:
                 log.warning(f"Session {conversation_id}: failed to load compute node - {e}")
 
         # Initialize workspace manager and sandbox manager
         from ..compute import WorkspaceManager
+
         workspace_manager = WorkspaceManager()
         sandbox_manager = SandboxManager(
             workspace_manager=workspace_manager,
@@ -122,7 +142,9 @@ async def get_or_create_session(
             try:
                 await sandbox_manager.create(effective_node.type, effective_node.config)
             except Exception as e:
-                log.warning(f"Session {conversation_id}: failed to create sandbox for node '{effective_node.name}' - {e}")
+                log.warning(
+                    f"Session {conversation_id}: failed to create sandbox for node '{effective_node.name}' - {e}"
+                )
 
         tool_router = create_tool_router(sandbox_manager)
         # Inject user/db context for compute tools
@@ -151,17 +173,23 @@ async def get_or_create_session(
         compute_env = ""
         if effective_node:
             caps = effective_node.capabilities or {}
-            lines = [f"\n## Active Compute Environment: {effective_node.name} ({effective_node.type})"]
+            lines = [
+                f"\n## Active Compute Environment: {effective_node.name} ({effective_node.type})"
+            ]
             if caps.get("platform"):
                 lines.append(f"- Platform: {caps['platform']}")
             if caps.get("cpu_cores"):
-                lines.append(f"- CPU: {caps['cpu_cores']} cores ({caps.get('cpu_arch', 'unknown')})")
+                lines.append(
+                    f"- CPU: {caps['cpu_cores']} cores ({caps.get('cpu_arch', 'unknown')})"
+                )
             if caps.get("available_ram_gb"):
                 lines.append(f"- RAM: {caps['available_ram_gb']:.1f} GB available")
             if caps.get("gpu_available"):
                 gpu_info = caps.get("gpu_info", [])
                 for gpu in gpu_info[:1]:
-                    lines.append(f"- GPU: {gpu.get('model', 'unknown')} ({gpu.get('vram_gb', 0):.0f} GB VRAM)")
+                    lines.append(
+                        f"- GPU: {gpu.get('model', 'unknown')} ({gpu.get('vram_gb', 0):.0f} GB VRAM)"
+                    )
                     if gpu.get("cuda_version"):
                         lines.append(f"  - CUDA: {gpu['cuda_version']}")
             if caps.get("python_versions"):
@@ -199,6 +227,7 @@ async def get_or_create_session(
         # Wire event broadcasting
         async def _broadcast(event: AgentEvent):
             await self.event_bus.broadcast(event)
+
         session.on_event(_broadcast)
 
         # Load existing messages (but skip user messages that would be re-added)
@@ -223,7 +252,7 @@ async def remove_session(self, conversation_id: int) -> None:
             # Cancel any running agent turn
             active.session.cancel()
             # Resolve any pending question/approval futures to unblock the loop
-            if hasattr(active.session, 'pending_answers') and active.session.pending_answers:
+            if hasattr(active.session, "pending_answers") and active.session.pending_answers:
                 try:
                     if not active.session.pending_answers.done():
                         active.session.pending_answers.cancel()
diff --git a/backend/openmlr/tasks/agent_tasks.py b/backend/openmlr/tasks/agent_tasks.py
index 92ac255..9da7fa9 100644
--- a/backend/openmlr/tasks/agent_tasks.py
+++ b/backend/openmlr/tasks/agent_tasks.py
@@ -88,10 +88,7 @@ async def _async_process_message(
 
         # Load existing messages for context
         messages = await ops.get_messages(db, conversation_id)
-        existing_messages = [
-            {"role": m.role, "content": m.content}
-            for m in messages
-        ]
+        existing_messages = [{"role": m.role, "content": m.content} for m in messages]
 
         # Increment user message count
         await ops.increment_user_message_count(db, conversation_id)
@@ -100,10 +97,12 @@ async def _async_process_message(
         await ops.add_message(db, conversation_id, "user", message)
 
     # Broadcast that we're processing
-    await publish_event(AgentEvent(
-        event_type="status",
-        data={"status": "thinking...", "job_id": job_id},
-    ))
+    await publish_event(
+        AgentEvent(
+            event_type="status",
+            data={"status": "thinking...", "job_id": job_id},
+        )
+    )
 
     # Create agent session
     config = load_config()
@@ -140,11 +139,17 @@ async def _broadcast(event: AgentEvent):
                 await ops.add_message(db, conversation_id, "assistant", event.data["content"])
         elif event.event_type == "tool_output" and event.data:
             async with worker_session() as db:
-                await ops.add_message(db, conversation_id, "tool", event.data.get("output", ""), {
-                    "tool": event.data.get("tool"),
-                    "tool_call_id": event.data.get("tool_call_id"),
-                    "success": event.data.get("success"),
-                })
+                await ops.add_message(
+                    db,
+                    conversation_id,
+                    "tool",
+                    event.data.get("output", ""),
+                    {
+                        "tool": event.data.get("tool"),
+                        "tool_call_id": event.data.get("tool_call_id"),
+                        "success": event.data.get("success"),
+                    },
+                )
 
     session.on_event(_broadcast)
 
@@ -152,11 +157,14 @@ async def _broadcast(event: AgentEvent):
     # and cancels the session when found.
     async def _poll_interrupt():
         from ..services.redis_pubsub import check_interrupt, clear_interrupt
+
         try:
             while True:
                 await asyncio.sleep(2)
                 if await check_interrupt(conversation_id):
-                    logger.info(f"Interrupt detected via Redis for conversation {conversation_id}, cancelling session")
+                    logger.info(
+                        f"Interrupt detected via Redis for conversation {conversation_id}, cancelling session"
+                    )
                     session.cancel()
                     await clear_interrupt(conversation_id)
                     break
@@ -176,20 +184,29 @@ async def _poll_interrupt():
             await ops.update_job_status(db, job_id, "completed")
 
         # Broadcast completion
-        await publish_event(AgentEvent(
-            event_type="job_complete",
-            data={"job_id": job_id, "conversation_uuid": uuid, "status": "completed"},
-        ))
+        await publish_event(
+            AgentEvent(
+                event_type="job_complete",
+                data={"job_id": job_id, "conversation_uuid": uuid, "status": "completed"},
+            )
+        )
 
     except Exception as e:
         logger.exception(f"Agent processing failed for job {job_id}: {e}")
         async with worker_session() as db:
             await ops.update_job_status(db, job_id, "failed", error=str(e))
 
-        await publish_event(AgentEvent(
-            event_type="job_complete",
-            data={"job_id": job_id, "conversation_uuid": uuid, "status": "failed", "error": str(e)},
-        ))
+        await publish_event(
+            AgentEvent(
+                event_type="job_complete",
+                data={
+                    "job_id": job_id,
+                    "conversation_uuid": uuid,
+                    "status": "failed",
+                    "error": str(e),
+                },
+            )
+        )
         raise
 
     finally:
@@ -209,15 +226,18 @@ async def _poll_interrupt():
         # Clear any lingering interrupt key
         try:
             from ..services.redis_pubsub import clear_interrupt
+
             await clear_interrupt(conversation_id)
         except Exception:
             pass
 
         # Broadcast ready status
-        await publish_event(AgentEvent(
-            event_type="status",
-            data={"status": "ready", "job_id": job_id},
-        ))
+        await publish_event(
+            AgentEvent(
+                event_type="status",
+                data={"status": "ready", "job_id": job_id},
+            )
+        )
 
 
 async def _mark_job_failed(job_id: str, error: str):
diff --git a/backend/openmlr/tasks/compute_tasks.py b/backend/openmlr/tasks/compute_tasks.py
index e369286..8cbd04d 100644
--- a/backend/openmlr/tasks/compute_tasks.py
+++ b/backend/openmlr/tasks/compute_tasks.py
@@ -31,6 +31,7 @@ async def _cleanup_orphaned():
             from sqlalchemy import select
 
             from ..db.models import Conversation
+
             result = await db.execute(select(Conversation.uuid))
             active_uuids = {row[0] for row in result.all()}
 
@@ -49,6 +50,7 @@ async def _cleanup_orphaned():
 @celery_app.task(bind=True, max_retries=3)
 def check_compute_node_health(self, node_id: int, user_id: int):
     """Check health of a single compute node."""
+
     async def _check():
         session_factory = get_worker_session()
         async with session_factory() as db:
@@ -68,7 +70,9 @@ async def _check():
                 if sandbox:
                     caps = await probe_sandbox(sandbox)
                     await ops.update_compute_node(
-                        db, node.id, user_id,
+                        db,
+                        node.id,
+                        user_id,
                         capabilities=caps.to_dict(),
                         health_status="online",
                         last_seen_at=datetime.now(UTC),
@@ -76,13 +80,19 @@ async def _check():
                     logger.info(f"Health check passed for node '{node.name}'")
                 else:
                     await ops.update_compute_node(
-                        db, node.id, user_id,
+                        db,
+                        node.id,
+                        user_id,
                         health_status="offline",
                     )
-                    logger.warning(f"Health check failed for node '{node.name}': sandbox not created")
+                    logger.warning(
+                        f"Health check failed for node '{node.name}': sandbox not created"
+                    )
             except Exception as e:
                 await ops.update_compute_node(
-                    db, node.id, user_id,
+                    db,
+                    node.id,
+                    user_id,
                     health_status="offline",
                 )
                 logger.warning(f"Health check failed for node '{node.name}': {e}")
@@ -95,12 +105,14 @@ async def _check():
 @celery_app.task
 def health_check_all_nodes():
     """Run health checks on all compute nodes for all users."""
+
     async def _check_all():
         session_factory = get_worker_session()
         async with session_factory() as db:
             from sqlalchemy import select
 
             from ..db.models import User
+
             result = await db.execute(select(User))
             users = result.scalars().all()
 
diff --git a/backend/openmlr/tools/ask_user.py b/backend/openmlr/tools/ask_user.py
index 26254cf..995c099 100644
--- a/backend/openmlr/tools/ask_user.py
+++ b/backend/openmlr/tools/ask_user.py
@@ -29,17 +29,29 @@ def create_ask_user_tool() -> ToolSpec:
                     "items": {
                         "type": "object",
                         "properties": {
-                            "id": {"type": "string", "description": "Unique question ID (e.g. q1, q2)"},
+                            "id": {
+                                "type": "string",
+                                "description": "Unique question ID (e.g. q1, q2)",
+                            },
                             "question": {"type": "string", "description": "The question text"},
-                            "allow_text": {"type": "boolean", "description": "Allow typing a custom answer (default true)"},
+                            "allow_text": {
+                                "type": "boolean",
+                                "description": "Allow typing a custom answer (default true)",
+                            },
                             "options": {
                                 "type": "array",
                                 "description": "2-4 options to choose from",
                                 "items": {
                                     "type": "object",
                                     "properties": {
-                                        "label": {"type": "string", "description": "Short option label"},
-                                        "description": {"type": "string", "description": "Explanation of this option"},
+                                        "label": {
+                                            "type": "string",
+                                            "description": "Short option label",
+                                        },
+                                        "description": {
+                                            "type": "string",
+                                            "description": "Explanation of this option",
+                                        },
                                     },
                                     "required": ["label"],
                                 },
@@ -84,17 +96,22 @@ async def _handle_ask_user(
         if len(opts) < 2:
             return f"Question '{q.get('id', '?')}' needs at least 2 options.", False
         if len(opts) > 4:
-            return f"Question '{q.get('id', '?')}' has {len(opts)} options. Max is 4 — split into multiple questions.", False
+            return (
+                f"Question '{q.get('id', '?')}' has {len(opts)} options. Max is 4 — split into multiple questions.",
+                False,
+            )
 
     # Emit the questions event
-    await session.emit(AgentEvent(
-        event_type="questions",
-        data={
-            "questions": questions,
-            "context": context,
-            "suggest_mode": suggest_mode,
-        },
-    ))
+    await session.emit(
+        AgentEvent(
+            event_type="questions",
+            data={
+                "questions": questions,
+                "context": context,
+                "suggest_mode": suggest_mode,
+            },
+        )
+    )
 
     answers = None
 
@@ -103,6 +120,7 @@ async def _handle_ask_user(
         import os
 
         from ..services.redis_pubsub import wait_for_answers
+
         if os.environ.get("USE_BACKGROUND_JOBS", "").lower() in ("true", "1", "yes"):
             answers = await wait_for_answers(session.conversation_id, timeout=300)
     except Exception:
@@ -132,6 +150,8 @@ async def _handle_ask_user(
         lines.append(f"- {q.get('question', '')}: **{answer}**")
 
     if suggest_mode:
-        lines.append(f"\n[Agent suggested switching to {suggest_mode} mode after this planning phase.]")
+        lines.append(
+            f"\n[Agent suggested switching to {suggest_mode} mode after this planning phase.]"
+        )
 
     return "\n".join(lines), True
diff --git a/backend/openmlr/tools/compute_tools.py b/backend/openmlr/tools/compute_tools.py
index 7ccc6b0..8802808 100644
--- a/backend/openmlr/tools/compute_tools.py
+++ b/backend/openmlr/tools/compute_tools.py
@@ -26,6 +26,7 @@ async def _handle_list(user_id: int = None, db=None, **kwargs):
         return "Database connection required for compute_list", False
 
     from ..db import operations as ops
+
     nodes = await ops.get_compute_nodes(db, user_id)
 
     if not nodes:
@@ -59,6 +60,7 @@ async def _handle_probe(node_name: str, user_id: int = None, db=None, **kwargs):
         return "Database connection required for compute_probe", False
 
     from ..db import operations as ops
+
     node = await ops.get_compute_node_by_name(db, user_id, node_name)
     if not node:
         return f"Node '{node_name}' not found", False
@@ -80,7 +82,9 @@ async def _handle_probe(node_name: str, user_id: int = None, db=None, **kwargs):
 
         # Update node in database
         await ops.update_compute_node(
-            db, node.id, user_id,
+            db,
+            node.id,
+            user_id,
             capabilities=caps.to_dict(),
             health_status="online",
             last_probed_at=datetime.now(UTC),
@@ -92,8 +96,12 @@ async def _handle_probe(node_name: str, user_id: int = None, db=None, **kwargs):
         lines = [f"## {node.name} Capabilities\n"]
         lines.append(f"Platform: {caps.platform}")
         lines.append(f"CPU: {caps.cpu_cores} cores ({caps.cpu_arch})")
-        lines.append(f"RAM: {caps.available_ram_gb:.1f} GB available / {caps.total_ram_gb:.1f} GB total")
-        lines.append(f"Disk: {caps.available_disk_gb:.1f} GB available / {caps.total_disk_gb:.1f} GB total")
+        lines.append(
+            f"RAM: {caps.available_ram_gb:.1f} GB available / {caps.total_ram_gb:.1f} GB total"
+        )
+        lines.append(
+            f"Disk: {caps.available_disk_gb:.1f} GB available / {caps.total_disk_gb:.1f} GB total"
+        )
 
         if caps.gpu_available:
             for gpu in caps.gpu_info:
@@ -121,7 +129,9 @@ async def _handle_probe(node_name: str, user_id: int = None, db=None, **kwargs):
         except Exception:
             pass
         await ops.update_compute_node(
-            db, node.id, user_id,
+            db,
+            node.id,
+            user_id,
             health_status="offline",
         )
         return f"Probe failed for {node_name}: {str(e)}", False
@@ -133,12 +143,13 @@ async def _handle_select(node_name: str, user_id: int = None, db=None, session=N
         return "Database connection required for compute_select", False
 
     from ..db import operations as ops
+
     node = await ops.get_compute_node_by_name(db, user_id, node_name)
     if not node:
         return f"Node '{node_name}' not found", False
 
     # If session is provided, update the active sandbox
-    if session and hasattr(session, 'conversation_id'):
+    if session and hasattr(session, "conversation_id"):
         # Update conversation extra
         conv_id = session.conversation_id
         conv = await ops.get_conversation_by_id(db, conv_id)
@@ -151,13 +162,16 @@ async def _handle_select(node_name: str, user_id: int = None, db=None, session=N
     return f"Active compute switched to: {node.name} ({node.type})", True
 
 
-async def _handle_plan(task: str, requirements: dict = None, user_id: int = None, db=None, **kwargs):
+async def _handle_plan(
+    task: str, requirements: dict = None, user_id: int = None, db=None, **kwargs
+):
     """Recommend the best compute node for a task."""
     if not db:
         return "Database connection required for compute_plan", False
 
     requirements = requirements or {}
     from ..db import operations as ops
+
     nodes = await ops.get_compute_nodes(db, user_id)
 
     if not nodes:
@@ -214,11 +228,13 @@ async def _handle_plan(task: str, requirements: dict = None, user_id: int = None
         elif node.type == "modal":
             reasons.append("modal (cloud)")
 
-        scores.append({
-            "node": node,
-            "score": score,
-            "reasons": reasons,
-        })
+        scores.append(
+            {
+                "node": node,
+                "score": score,
+                "reasons": reasons,
+            }
+        )
 
     if not scores:
         return "No compute nodes meet the requirements.", False
@@ -234,7 +250,9 @@ async def _handle_plan(task: str, requirements: dict = None, user_id: int = None
     if len(scores) > 1:
         lines.append("\n### Alternatives")
         for alt in scores[1:3]:
-            lines.append(f"- {alt['node'].name} (score: {alt['score']:.1f}, {', '.join(alt['reasons'])})")
+            lines.append(
+                f"- {alt['node'].name} (score: {alt['score']:.1f}, {', '.join(alt['reasons'])})"
+            )
 
     return "\n".join(lines), True
 
@@ -242,25 +260,30 @@ async def _handle_plan(task: str, requirements: dict = None, user_id: int = None
 async def _get_sync_context(user_id, db, session):
     """Helper: resolve conversation UUID and workspace path for sync ops."""
     from ..db import operations as ops
+
     conv_uuid = None
-    if session and hasattr(session, 'conversation_id'):
+    if session and hasattr(session, "conversation_id"):
         conv = await ops.get_conversation_by_id(db, session.conversation_id)
         if conv:
             conv_uuid = conv.uuid
     if not conv_uuid:
         return None, None, "No active conversation workspace found"
     from ..compute import WorkspaceManager
+
     wm = WorkspaceManager()
     local_ws = wm.get_workspace_path(conv_uuid)
     return conv_uuid, local_ws, None
 
 
-async def _handle_sync_up(paths: list, node_name: str, user_id: int = None, db=None, session=None, **kwargs):
+async def _handle_sync_up(
+    paths: list, node_name: str, user_id: int = None, db=None, session=None, **kwargs
+):
     """Sync files from local workspace to remote compute node."""
     if not db:
         return "Database connection required", False
 
     from ..db import operations as ops
+
     node = await ops.get_compute_node_by_name(db, user_id, node_name)
     if not node:
         return f"Node '{node_name}' not found", False
@@ -276,6 +299,7 @@ async def _handle_sync_up(paths: list, node_name: str, user_id: int = None, db=N
 
     elif node.type == "ssh":
         from ..sandbox.ssh import SSHSandbox
+
         ssh_sandbox = SSHSandbox()
         try:
             config = dict(node.config)
@@ -329,12 +353,15 @@ async def _handle_sync_up(paths: list, node_name: str, user_id: int = None, db=N
     return "Unsupported node type", False
 
 
-async def _handle_sync_down(paths: list, node_name: str, user_id: int = None, db=None, session=None, **kwargs):
+async def _handle_sync_down(
+    paths: list, node_name: str, user_id: int = None, db=None, session=None, **kwargs
+):
     """Sync files from remote compute node to local workspace."""
     if not db:
         return "Database connection required", False
 
     from ..db import operations as ops
+
     node = await ops.get_compute_node_by_name(db, user_id, node_name)
     if not node:
         return f"Node '{node_name}' not found", False
@@ -349,6 +376,7 @@ async def _handle_sync_down(paths: list, node_name: str, user_id: int = None, db
 
     elif node.type == "ssh":
         from ..sandbox.ssh import SSHSandbox
+
         ssh_sandbox = SSHSandbox()
         try:
             config = dict(node.config)
@@ -389,7 +417,9 @@ def _do_get(rpath=rp):
 
                 elif remote_type == "dir":
                     result = await ssh_sandbox.execute(f"find '{remote_path}' -type f", timeout=10)
-                    remote_files = [ln.strip() for ln in result.output.strip().split("\n") if ln.strip()]
+                    remote_files = [
+                        ln.strip() for ln in result.output.strip().split("\n") if ln.strip()
+                    ]
                     for rf in remote_files:
                         rel = rf.replace(remote_path + "/", "", 1)
                         dst = local_path / rel
@@ -476,9 +506,15 @@ def create_compute_tools() -> list[ToolSpec]:
                         "description": "Hardware requirements",
                         "properties": {
                             "gpu": {"type": "boolean", "description": "GPU required"},
-                            "min_vram_gb": {"type": "number", "description": "Minimum GPU VRAM in GB"},
+                            "min_vram_gb": {
+                                "type": "number",
+                                "description": "Minimum GPU VRAM in GB",
+                            },
                             "min_ram_gb": {"type": "number", "description": "Minimum RAM in GB"},
-                            "min_disk_gb": {"type": "number", "description": "Minimum free disk in GB"},
+                            "min_disk_gb": {
+                                "type": "number",
+                                "description": "Minimum free disk in GB",
+                            },
                         },
                     },
                 },
diff --git a/backend/openmlr/tools/github.py b/backend/openmlr/tools/github.py
index 5d0a750..07baeca 100644
--- a/backend/openmlr/tools/github.py
+++ b/backend/openmlr/tools/github.py
@@ -24,8 +24,7 @@ def create_github_tools() -> list[ToolSpec]:
         ToolSpec(
             name="github_read_file",
             description=(
-                "Read a file from a GitHub repository. "
-                "Provide owner/repo and path within the repo."
+                "Read a file from a GitHub repository. Provide owner/repo and path within the repo."
             ),
             parameters={
                 "type": "object",
@@ -69,8 +68,14 @@ def create_github_tools() -> list[ToolSpec]:
             parameters={
                 "type": "object",
                 "properties": {
-                    "query": {"type": "string", "description": "Search query (e.g. 'LoRA fine-tune llama')"},
-                    "language": {"type": "string", "description": "Filter by language (e.g. 'python')"},
+                    "query": {
+                        "type": "string",
+                        "description": "Search query (e.g. 'LoRA fine-tune llama')",
+                    },
+                    "language": {
+                        "type": "string",
+                        "description": "Filter by language (e.g. 'python')",
+                    },
                     "limit": {"type": "integer", "description": "Max results (default 10)"},
                 },
                 "required": ["query"],
@@ -86,8 +91,14 @@ def create_github_tools() -> list[ToolSpec]:
             parameters={
                 "type": "object",
                 "properties": {
-                    "query": {"type": "string", "description": "Search query (paper title, method name, topic)"},
-                    "topic": {"type": "string", "description": "Filter by topic (e.g. 'machine-learning', 'deep-learning')"},
+                    "query": {
+                        "type": "string",
+                        "description": "Search query (paper title, method name, topic)",
+                    },
+                    "topic": {
+                        "type": "string",
+                        "description": "Filter by topic (e.g. 'machine-learning', 'deep-learning')",
+                    },
                     "min_stars": {"type": "integer", "description": "Minimum stars (default 10)"},
                     "sort": {
                         "type": "string",
@@ -153,10 +164,11 @@ async def _handle_read_file(
 
     if data.get("encoding") == "base64":
         import base64
+
         content = base64.b64decode(data["content"]).decode("utf-8", errors="replace")
         # Add line numbers
         lines = content.split("\n")
-        numbered = [f"{i+1}: {line}" for i, line in enumerate(lines)]
+        numbered = [f"{i + 1}: {line}" for i, line in enumerate(lines)]
         output = "\n".join(numbered)
         if len(output) > 50000:
             output = output[:50000] + "\n...[truncated]"
@@ -262,7 +274,7 @@ async def _handle_search_repos(
     min_stars: int = 10,
     sort: str = "stars",
     limit: int = 10,
-    **kwargs
+    **kwargs,
 ) -> tuple[str, bool]:
     """Search GitHub repositories with retry logic."""
     url = f"{GITHUB_API}/search/repositories"
@@ -322,9 +334,7 @@ async def _handle_search_repos(
     return "\n".join(lines), True
 
 
-async def _handle_get_readme(
-    owner: str, repo: str, **kwargs
-) -> tuple[str, bool]:
+async def _handle_get_readme(owner: str, repo: str, **kwargs) -> tuple[str, bool]:
     """Get README from a GitHub repository with retry logic."""
     url = f"{GITHUB_API}/repos/{owner}/{repo}/readme"
 
@@ -350,6 +360,7 @@ async def _handle_get_readme(
 
     if data.get("encoding") == "base64":
         import base64
+
         content = base64.b64decode(data["content"]).decode("utf-8", errors="replace")
         # Truncate if too long
         if len(content) > 30000:
diff --git a/backend/openmlr/tools/http_utils.py b/backend/openmlr/tools/http_utils.py
index 25cb29a..137ca69 100644
--- a/backend/openmlr/tools/http_utils.py
+++ b/backend/openmlr/tools/http_utils.py
@@ -16,13 +16,17 @@
 
 class RateLimitError(Exception):
     """Raised when rate limit is hit."""
+
     def __init__(self, retry_after: float | None = None):
         self.retry_after = retry_after
-        super().__init__(f"Rate limit hit, retry after {retry_after}s" if retry_after else "Rate limit hit")
+        super().__init__(
+            f"Rate limit hit, retry after {retry_after}s" if retry_after else "Rate limit hit"
+        )
 
 
 class APIError(Exception):
     """Raised for non-retryable API errors."""
+
     def __init__(self, status_code: int, message: str):
         self.status_code = status_code
         self.message = message
@@ -90,8 +94,14 @@ async def fetch_with_retry(
                 if response.status_code == 429:
                     retry_after = _parse_retry_after(response)
                     if attempt < max_retries:
-                        delay = retry_after if retry_after else _calculate_delay(attempt, base_delay, max_delay)
-                        log.warning(f"Rate limit hit for {url}, retrying in {delay:.1f}s (attempt {attempt + 1}/{max_retries + 1})")
+                        delay = (
+                            retry_after
+                            if retry_after
+                            else _calculate_delay(attempt, base_delay, max_delay)
+                        )
+                        log.warning(
+                            f"Rate limit hit for {url}, retrying in {delay:.1f}s (attempt {attempt + 1}/{max_retries + 1})"
+                        )
                         await asyncio.sleep(delay)
                         continue
                     raise RateLimitError(retry_after)
@@ -100,7 +110,9 @@ async def fetch_with_retry(
                 if response.status_code in retry_statuses:
                     if attempt < max_retries:
                         delay = _calculate_delay(attempt, base_delay, max_delay)
-                        log.warning(f"Server error {response.status_code} for {url}, retrying in {delay:.1f}s")
+                        log.warning(
+                            f"Server error {response.status_code} for {url}, retrying in {delay:.1f}s"
+                        )
                         await asyncio.sleep(delay)
                         continue
 
@@ -133,7 +145,7 @@ async def fetch_with_retry(
 
 def _calculate_delay(attempt: int, base_delay: float, max_delay: float) -> float:
     """Calculate exponential backoff delay with jitter."""
-    delay = min(base_delay * (2 ** attempt), max_delay)
+    delay = min(base_delay * (2**attempt), max_delay)
     # Add jitter (±25%)
     jitter = delay * 0.25 * (random.random() * 2 - 1)
     return max(0.1, delay + jitter)
@@ -160,6 +172,7 @@ def with_retry(
 
     The decorated function should raise RateLimitError or APIError for retryable errors.
     """
+
     def decorator(func: Callable[..., T]) -> Callable[..., T]:
         @wraps(func)
         async def wrapper(*args, **kwargs) -> T:
@@ -171,7 +184,11 @@ async def wrapper(*args, **kwargs) -> T:
                 except RateLimitError as e:
                     last_exception = e
                     if attempt < max_retries:
-                        delay = e.retry_after if e.retry_after else _calculate_delay(attempt, base_delay, max_delay)
+                        delay = (
+                            e.retry_after
+                            if e.retry_after
+                            else _calculate_delay(attempt, base_delay, max_delay)
+                        )
                         log.warning(f"Rate limit in {func.__name__}, retrying in {delay:.1f}s")
                         await asyncio.sleep(delay)
                         continue
@@ -190,4 +207,5 @@ async def wrapper(*args, **kwargs) -> T:
             raise RuntimeError("Unexpected retry loop exit")
 
         return wrapper
+
     return decorator
diff --git a/backend/openmlr/tools/local.py b/backend/openmlr/tools/local.py
index 73b016d..fa0a6af 100644
--- a/backend/openmlr/tools/local.py
+++ b/backend/openmlr/tools/local.py
@@ -73,10 +73,23 @@ def _validate_path(path: Path) -> tuple[Path, str | None]:
             except ValueError:
                 # Also allow paths that are explicitly absolute and exist (for reading configs etc)
                 # But block obvious dangerous paths
-                dangerous_prefixes = ["/etc", "/root", "/var", "/usr", "/bin", "/sbin", "/boot", "/sys", "/proc"]
+                dangerous_prefixes = [
+                    "/etc",
+                    "/root",
+                    "/var",
+                    "/usr",
+                    "/bin",
+                    "/sbin",
+                    "/boot",
+                    "/sys",
+                    "/proc",
+                ]
                 for prefix in dangerous_prefixes:
                     if str(resolved).startswith(prefix):
-                        return resolved, f"Access denied: {resolved} is in a protected system directory"
+                        return (
+                            resolved,
+                            f"Access denied: {resolved} is in a protected system directory",
+                        )
 
         return resolved, None
     except Exception as e:
@@ -96,8 +109,14 @@ def create_local_tools() -> list[ToolSpec]:
                 "type": "object",
                 "properties": {
                     "command": {"type": "string", "description": "Shell command to execute"},
-                    "timeout": {"type": "integer", "description": "Timeout in seconds (default 120, max 3600)"},
-                    "workdir": {"type": "string", "description": "Working directory inside container (default /workspace)"},
+                    "timeout": {
+                        "type": "integer",
+                        "description": "Timeout in seconds (default 120, max 3600)",
+                    },
+                    "workdir": {
+                        "type": "string",
+                        "description": "Working directory inside container (default /workspace)",
+                    },
                 },
                 "required": ["command"],
             },
@@ -110,7 +129,10 @@ def create_local_tools() -> list[ToolSpec]:
                 "type": "object",
                 "properties": {
                     "path": {"type": "string", "description": "File path to read"},
-                    "offset": {"type": "integer", "description": "Start line (1-indexed, default 1)"},
+                    "offset": {
+                        "type": "integer",
+                        "description": "Start line (1-indexed, default 1)",
+                    },
                     "limit": {"type": "integer", "description": "Max lines (default 2000)"},
                 },
                 "required": ["path"],
@@ -139,7 +161,10 @@ def create_local_tools() -> list[ToolSpec]:
                     "path": {"type": "string", "description": "File path to edit"},
                     "old_string": {"type": "string", "description": "Exact string to find"},
                     "new_string": {"type": "string", "description": "Replacement string"},
-                    "replace_all": {"type": "boolean", "description": "Replace all occurrences (default false)"},
+                    "replace_all": {
+                        "type": "boolean",
+                        "description": "Replace all occurrences (default false)",
+                    },
                 },
                 "required": ["path", "old_string", "new_string"],
             },
@@ -150,11 +175,13 @@ def create_local_tools() -> list[ToolSpec]:
 
 # ── Docker bash ──────────────────────────────────────────
 
+
 async def _docker_available() -> bool:
     """Check if Docker is running."""
     try:
         proc = await asyncio.create_subprocess_exec(
-            "docker", "info",
+            "docker",
+            "info",
             stdout=asyncio.subprocess.DEVNULL,
             stderr=asyncio.subprocess.DEVNULL,
         )
@@ -164,7 +191,9 @@ async def _docker_available() -> bool:
         return False
 
 
-async def _handle_bash(command: str, timeout: int = 120, workdir: str = None, **kwargs) -> tuple[str, bool]:
+async def _handle_bash(
+    command: str, timeout: int = 120, workdir: str = None, **kwargs
+) -> tuple[str, bool]:
     timeout = min(int(timeout), 3600)
     cwd = workdir or os.getcwd()
 
@@ -180,7 +209,9 @@ async def _handle_bash(command: str, timeout: int = 120, workdir: str = None, **
     else:
         # Fallback to direct execution only if explicitly allowed
         if ALLOW_DIRECT_EXEC:
-            logger.warning(f"Docker unavailable, falling back to direct host execution for: {command[:100]}")
+            logger.warning(
+                f"Docker unavailable, falling back to direct host execution for: {command[:100]}"
+            )
             output, success = await _direct_exec(command, timeout, cwd)
             warning = "[WARNING: Docker not available — running directly on host]\n\n"
             return warning + output, success
@@ -188,27 +219,39 @@ async def _handle_bash(command: str, timeout: int = 120, workdir: str = None, **
             return (
                 "Docker is not available and direct host execution is disabled for security.\n"
                 "Please ensure Docker is running, or set OPENMLR_ALLOW_DIRECT_EXEC=true to enable fallback.",
-                False
+                False,
             )
 
 
-async def _docker_exec(command: str, timeout: int, host_cwd: str, workdir: str = None) -> tuple[str, bool]:
+async def _docker_exec(
+    command: str, timeout: int, host_cwd: str, workdir: str = None
+) -> tuple[str, bool]:
     """Run command in a Docker container with workspace mount."""
     container_workdir = workdir or "/workspace"
 
     # Security: Use bridge network (default) instead of host network
     # This isolates container networking from the host
     docker_cmd = [
-        "docker", "run", "--rm",
-        "-v", f"{host_cwd}:/workspace",
-        "-w", container_workdir,
-        "--memory", "8g",
-        "--pids-limit", "256",  # Prevent fork bombs
+        "docker",
+        "run",
+        "--rm",
+        "-v",
+        f"{host_cwd}:/workspace",
+        "-w",
+        container_workdir,
+        "--memory",
+        "8g",
+        "--pids-limit",
+        "256",  # Prevent fork bombs
         "--read-only",  # Read-only root filesystem
-        "--tmpfs", "/tmp:rw,noexec,nosuid,size=1g",  # Writable /tmp
-        "--security-opt", "no-new-privileges:true",
+        "--tmpfs",
+        "/tmp:rw,noexec,nosuid,size=1g",  # Writable /tmp
+        "--security-opt",
+        "no-new-privileges:true",
         DOCKER_IMAGE,
-        "bash", "-c", command,
+        "bash",
+        "-c",
+        command,
     ]
 
     try:
@@ -273,6 +316,7 @@ async def _direct_exec(command: str, timeout: int, cwd: str) -> tuple[str, bool]
 
 # ── File tools (host filesystem) ─────────────────────────
 
+
 async def _handle_read(path: str, offset: int = 1, limit: int = 2000, **kwargs) -> tuple[str, bool]:
     try:
         target = Path(path).expanduser()
@@ -286,7 +330,9 @@ async def _handle_read(path: str, offset: int = 1, limit: int = 2000, **kwargs)
 
         if target.is_dir():
             entries = sorted(target.iterdir())
-            return "\n".join(f"{e.name}{'/' if e.is_dir() else ''}" for e in entries) or "(empty directory)", True
+            return "\n".join(
+                f"{e.name}{'/' if e.is_dir() else ''}" for e in entries
+            ) or "(empty directory)", True
 
         if not target.exists():
             return f"File not found: {target}", False
@@ -297,7 +343,9 @@ async def _handle_read(path: str, offset: int = 1, limit: int = 2000, **kwargs)
         start = max(0, offset - 1)
         end = start + limit
         selected = all_lines[start:end]
-        result = "\n".join(f"{i}: {line.rstrip()}" for i, line in enumerate(selected, start=start + 1))
+        result = "\n".join(
+            f"{i}: {line.rstrip()}" for i, line in enumerate(selected, start=start + 1)
+        )
         total = len(all_lines)
         if end < total:
             result += f"\n\n[Showing lines {start + 1}-{min(end, total)} of {total}]"
@@ -335,7 +383,9 @@ async def _handle_write(path: str = "", content: str = "", **kwargs) -> tuple[st
         return f"Error writing: {str(e)}", False
 
 
-async def _handle_edit(path: str, old_string: str, new_string: str, replace_all: bool = False, **kwargs) -> tuple[str, bool]:
+async def _handle_edit(
+    path: str, old_string: str, new_string: str, replace_all: bool = False, **kwargs
+) -> tuple[str, bool]:
     try:
         target = Path(path).expanduser()
         if not target.is_absolute():
@@ -356,7 +406,11 @@ async def _handle_edit(path: str, old_string: str, new_string: str, replace_all:
         if count > 1 and not replace_all:
             return f"Found {count} matches. Use replace_all=true or provide more context.", False
 
-        new_content = content.replace(old_string, new_string) if replace_all else content.replace(old_string, new_string, 1)
+        new_content = (
+            content.replace(old_string, new_string)
+            if replace_all
+            else content.replace(old_string, new_string, 1)
+        )
         target.write_text(new_content, encoding="utf-8")
         return f"Replaced {count if replace_all else 1} occurrence(s) in {target}", True
     except Exception as e:
diff --git a/backend/openmlr/tools/mcp.py b/backend/openmlr/tools/mcp.py
index 5119913..ebb3048 100644
--- a/backend/openmlr/tools/mcp.py
+++ b/backend/openmlr/tools/mcp.py
@@ -9,10 +9,12 @@
 
 def substitute_env_vars(text: str) -> str:
     """Substitute ${VAR_NAME} patterns with environment variable values."""
+
     def _replace(match):
         var_name = match.group(1)
         return os.environ.get(var_name, match.group(0))
-    return re.sub(r'\$\{(\w+)\}', _replace, text)
+
+    return re.sub(r"\$\{(\w+)\}", _replace, text)
 
 
 def process_mcp_config(config: dict) -> dict:
@@ -24,10 +26,7 @@ def process_mcp_config(config: dict) -> dict:
         elif isinstance(value, dict):
             processed[key] = process_mcp_config(value)
         elif isinstance(value, list):
-            processed[key] = [
-                substitute_env_vars(v) if isinstance(v, str) else v
-                for v in value
-            ]
+            processed[key] = [substitute_env_vars(v) if isinstance(v, str) else v for v in value]
         else:
             processed[key] = value
     return processed
diff --git a/backend/openmlr/tools/plan.py b/backend/openmlr/tools/plan.py
index 609bc4e..f0d796e 100644
--- a/backend/openmlr/tools/plan.py
+++ b/backend/openmlr/tools/plan.py
@@ -15,10 +15,12 @@
 def _get_session_factory():
     """Get the correct async session factory for the current context (web or worker)."""
     from ..db.engine import _worker_engine, async_session
+
     # If we're in a Celery worker context, use the worker engine
     eng = _worker_engine.get(None)
     if eng is not None:
         from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
+
         return async_sessionmaker(eng, class_=AsyncSession, expire_on_commit=False)
     # Otherwise use the main web engine
     return async_session
@@ -54,19 +56,37 @@ def create_plan_tool() -> ToolSpec:
                         "type": "object",
                         "properties": {
                             "title": {"type": "string"},
-                            "status": {"type": "string", "enum": ["pending", "in_progress", "completed", "cancelled"]},
+                            "status": {
+                                "type": "string",
+                                "enum": ["pending", "in_progress", "completed", "cancelled"],
+                            },
                         },
                         "required": ["title"],
                     },
                 },
                 "task_index": {"type": "integer", "description": "For 'update': 0-based index"},
-                "status": {"type": "string", "enum": ["pending", "in_progress", "completed", "cancelled"]},
+                "status": {
+                    "type": "string",
+                    "enum": ["pending", "in_progress", "completed", "cancelled"],
+                },
                 "title": {"type": "string", "description": "For 'add'/'add_resource': title"},
-                "summary": {"type": "string", "description": "For 'update' to completed: summary of what was done"},
-                "next_hints": {"type": "string", "description": "For 'update' to completed: hints for next tasks"},
+                "summary": {
+                    "type": "string",
+                    "description": "For 'update' to completed: summary of what was done",
+                },
+                "next_hints": {
+                    "type": "string",
+                    "description": "For 'update' to completed: hints for next tasks",
+                },
                 "url": {"type": "string", "description": "For 'add_resource': URL"},
-                "resource_type": {"type": "string", "enum": ["paper", "code", "dataset", "doc", "report"]},
-                "content": {"type": "string", "description": "For 'add_resource' type=report: markdown report content"},
+                "resource_type": {
+                    "type": "string",
+                    "enum": ["paper", "code", "dataset", "doc", "report"],
+                },
+                "content": {
+                    "type": "string",
+                    "description": "For 'add_resource' type=report: markdown report content",
+                },
             },
             "required": ["operation"],
         },
@@ -100,7 +120,9 @@ async def _handle_plan(
             if not tasks:
                 return "Provide 'tasks' array.", False
 
-            task_list = [{"title": t.get("title", ""), "status": t.get("status", "pending")} for t in tasks]
+            task_list = [
+                {"title": t.get("title", ""), "status": t.get("status", "pending")} for t in tasks
+            ]
             await ops.upsert_conversation_tasks(db, conv_id, task_list)
             await _emit_plan(session, conv_id, db)
 
@@ -117,7 +139,9 @@ async def _handle_plan(
 
             # Get existing tasks and append
             existing = await ops.get_conversation_tasks(db, conv_id)
-            task_list = [{"title": t.title, "status": t.status, "priority": t.priority} for t in existing]
+            task_list = [
+                {"title": t.title, "status": t.status, "priority": t.priority} for t in existing
+            ]
             task_list.append({"title": title, "status": "pending"})
             await ops.upsert_conversation_tasks(db, conv_id, task_list)
             await _emit_plan(session, conv_id, db)
@@ -149,8 +173,7 @@ async def _handle_plan(
                     # Check if there's a completion report for the in-progress task
                     resources = await ops.get_conversation_resources(db, conv_id)
                     has_report = any(
-                        r.type == "report" and prev_task.title in r.title
-                        for r in resources
+                        r.type == "report" and prev_task.title in r.title for r in resources
                     )
                     if not has_report:
                         return (
@@ -163,7 +186,9 @@ async def _handle_plan(
                         ), False
 
             # Update status
-            task_list = [{"title": t.title, "status": t.status, "priority": t.priority} for t in existing]
+            task_list = [
+                {"title": t.title, "status": t.status, "priority": t.priority} for t in existing
+            ]
             task_list[task_index]["status"] = status
             await ops.upsert_conversation_tasks(db, conv_id, task_list)
             await _emit_plan(session, conv_id, db)
@@ -184,7 +209,8 @@ async def _handle_plan(
                 report_id = f"report-{task_index}-{len(existing)}"
 
                 await ops.add_conversation_resource(
-                    db, conv_id,
+                    db,
+                    conv_id,
                     title=f"Report: {task.title}",
                     resource_type="report",
                     content=report,
@@ -222,11 +248,13 @@ async def _handle_plan(
             resource_content = None
             if resource_type == "report" and content:
                 import uuid
+
                 resource_id = f"report-manual-{str(uuid.uuid4())[:8]}"
                 resource_content = content
 
             await ops.add_conversation_resource(
-                db, conv_id,
+                db,
+                conv_id,
                 title=title,
                 resource_type=resource_type,
                 url=url,
@@ -265,7 +293,9 @@ def _generate_plan_md(tasks: list[dict]) -> str:
     return "\n".join(lines)
 
 
-def _generate_completion_report(task_title: str, summary: str = None, next_hints: str = None) -> str:
+def _generate_completion_report(
+    task_title: str, summary: str = None, next_hints: str = None
+) -> str:
     """Generate a structured markdown completion report."""
     now = datetime.now(UTC).strftime("%Y-%m-%d %H:%M UTC")
     lines = [
@@ -279,12 +309,14 @@ def _generate_completion_report(task_title: str, summary: str = None, next_hints
         "",
     ]
     if next_hints:
-        lines.extend([
-            "## Next Steps",
-            "",
-            next_hints,
-            "",
-        ])
+        lines.extend(
+            [
+                "## Next Steps",
+                "",
+                next_hints,
+                "",
+            ]
+        )
     return "\n".join(lines)
 
 
@@ -293,10 +325,12 @@ async def _emit_plan(session, conv_id: int, db):
     if session:
         tasks = await ops.get_conversation_tasks(db, conv_id)
         task_list = [{"title": t.title, "status": t.status} for t in tasks]
-        await session.emit(AgentEvent(
-            event_type="plan_update",
-            data={"tasks": task_list},
-        ))
+        await session.emit(
+            AgentEvent(
+                event_type="plan_update",
+                data={"tasks": task_list},
+            )
+        )
 
 
 async def _emit_resources(session, conv_id: int, db):
@@ -312,10 +346,12 @@ async def _emit_resources(session, conv_id: int, db):
             }
             for r in resources
         ]
-        await session.emit(AgentEvent(
-            event_type="resources_update",
-            data={"resources": res_list},
-        ))
+        await session.emit(
+            AgentEvent(
+                event_type="resources_update",
+                data={"resources": res_list},
+            )
+        )
 
 
 async def _format_plan(db, conv_id: int) -> str:
diff --git a/backend/openmlr/tools/research.py b/backend/openmlr/tools/research.py
index 3d6d7b9..46cd98d 100644
--- a/backend/openmlr/tools/research.py
+++ b/backend/openmlr/tools/research.py
@@ -63,7 +63,9 @@ def create_research_tool() -> ToolSpec:
     )
 
 
-async def _handle_research(query: str, focus: str = "general", session=None, **kwargs) -> tuple[str, bool]:
+async def _handle_research(
+    query: str, focus: str = "general", session=None, **kwargs
+) -> tuple[str, bool]:
     """Execute research sub-agent with independent context."""
 
     # Get read-only tool subset for the sub-agent
@@ -79,7 +81,10 @@ async def _handle_research(query: str, focus: str = "general", session=None, **k
     # Independent context
     messages = [
         {"role": "system", "content": RESEARCH_SYSTEM_PROMPT},
-        {"role": "user", "content": f"Research the following topic thoroughly:\n\n{query}\n\nFocus: {focus}"},
+        {
+            "role": "user",
+            "content": f"Research the following topic thoroughly:\n\n{query}\n\nFocus: {focus}",
+        },
     ]
 
     # Generate a parent ID for grouping sub-agent events
@@ -89,15 +94,17 @@ async def _handle_research(query: str, focus: str = "general", session=None, **k
 
     # Emit sub-agent start
     if session:
-        await session.emit(AgentEvent(
-            event_type="sub_agent_start",
-            data={
-                "agent_type": "research",
-                "description": f"Research: {query[:100]}",
-                "parent_tool_call_id": parent_tc_id,
-                "focus": focus,
-            },
-        ))
+        await session.emit(
+            AgentEvent(
+                event_type="sub_agent_start",
+                data={
+                    "agent_type": "research",
+                    "description": f"Research: {query[:100]}",
+                    "parent_tool_call_id": parent_tc_id,
+                    "focus": focus,
+                },
+            )
+        )
 
     accumulated_content = ""
 
@@ -107,7 +114,9 @@ async def _handle_research(query: str, focus: str = "general", session=None, **k
             result = await LLMProvider.generate(messages, config, research_tools)
 
             # Check for doom loop
-            doom_messages = [Message(role=m["role"], content=m.get("content", "")) for m in messages]
+            doom_messages = [
+                Message(role=m["role"], content=m.get("content", "")) for m in messages
+            ]
             doom_msg = detect_doom_loop(doom_messages)
             if doom_msg:
                 messages.append({"role": "system", "content": doom_msg})
@@ -119,18 +128,20 @@ async def _handle_research(query: str, focus: str = "general", session=None, **k
                 break
 
             # Add assistant message with tool calls
-            messages.append({
-                "role": "assistant",
-                "content": result.content,
-                "tool_calls": [
-                    {
-                        "id": tc.id,
-                        "type": "function",
-                        "function": {"name": tc.name, "arguments": tc.arguments},
-                    }
-                    for tc in result.tool_calls
-                ],
-            })
+            messages.append(
+                {
+                    "role": "assistant",
+                    "content": result.content,
+                    "tool_calls": [
+                        {
+                            "id": tc.id,
+                            "type": "function",
+                            "function": {"name": tc.name, "arguments": tc.arguments},
+                        }
+                        for tc in result.tool_calls
+                    ],
+                }
+            )
 
             # Execute tools and emit granular events
             for tc in result.tool_calls:
@@ -138,49 +149,59 @@ async def _handle_research(query: str, focus: str = "general", session=None, **k
 
                 # Emit sub-agent tool call
                 if session:
-                    await session.emit(AgentEvent(
-                        event_type="sub_agent_tool_call",
-                        data={
-                            "parent_tool_call_id": parent_tc_id,
-                            "tool": tc.name,
-                            "tool_call_id": tc.id,
-                            "args": json.dumps(tc.arguments)[:200] if isinstance(tc.arguments, dict) else str(tc.arguments)[:200],
-                        },
-                    ))
+                    await session.emit(
+                        AgentEvent(
+                            event_type="sub_agent_tool_call",
+                            data={
+                                "parent_tool_call_id": parent_tc_id,
+                                "tool": tc.name,
+                                "tool_call_id": tc.id,
+                                "args": json.dumps(tc.arguments)[:200]
+                                if isinstance(tc.arguments, dict)
+                                else str(tc.arguments)[:200],
+                            },
+                        )
+                    )
 
                 output, success = await _execute_research_tool(tc)
-                messages.append({
-                    "role": "tool",
-                    "content": output[:10000],
-                    "tool_call_id": tc.id,
-                })
+                messages.append(
+                    {
+                        "role": "tool",
+                        "content": output[:10000],
+                        "tool_call_id": tc.id,
+                    }
+                )
 
                 # Emit sub-agent tool output
                 if session:
-                    await session.emit(AgentEvent(
-                        event_type="sub_agent_tool_output",
-                        data={
-                            "parent_tool_call_id": parent_tc_id,
-                            "tool_call_id": tc.id,
-                            "tool": tc.name,
-                            "output": output[:500],
-                            "success": success,
-                        },
-                    ))
+                    await session.emit(
+                        AgentEvent(
+                            event_type="sub_agent_tool_output",
+                            data={
+                                "parent_tool_call_id": parent_tc_id,
+                                "tool_call_id": tc.id,
+                                "tool": tc.name,
+                                "output": output[:500],
+                                "success": success,
+                            },
+                        )
+                    )
 
     except Exception as e:
         duration = time.time() - start_time
         if session:
-            await session.emit(AgentEvent(
-                event_type="sub_agent_end",
-                data={
-                    "parent_tool_call_id": parent_tc_id,
-                    "tool_count": tool_count,
-                    "duration_seconds": round(duration, 1),
-                    "summary": f"Error: {str(e)}",
-                    "success": False,
-                },
-            ))
+            await session.emit(
+                AgentEvent(
+                    event_type="sub_agent_end",
+                    data={
+                        "parent_tool_call_id": parent_tc_id,
+                        "tool_count": tool_count,
+                        "duration_seconds": round(duration, 1),
+                        "summary": f"Error: {str(e)}",
+                        "success": False,
+                    },
+                )
+            )
         return f"Research sub-agent error: {str(e)}", False
 
     if not accumulated_content:
@@ -190,16 +211,18 @@ async def _handle_research(query: str, focus: str = "general", session=None, **k
 
     # Emit sub-agent end with stats
     if session:
-        await session.emit(AgentEvent(
-            event_type="sub_agent_end",
-            data={
-                "parent_tool_call_id": parent_tc_id,
-                "tool_count": tool_count,
-                "duration_seconds": round(duration, 1),
-                "summary": accumulated_content[:500],
-                "success": True,
-            },
-        ))
+        await session.emit(
+            AgentEvent(
+                event_type="sub_agent_end",
+                data={
+                    "parent_tool_call_id": parent_tc_id,
+                    "tool_count": tool_count,
+                    "duration_seconds": round(duration, 1),
+                    "summary": accumulated_content[:500],
+                    "success": True,
+                },
+            )
+        )
 
     return accumulated_content, True
 
@@ -212,35 +235,41 @@ def _get_research_tool_specs() -> list[dict]:
 
     tools = []
     for spec in create_search_tools():
-        tools.append({
-            "type": "function",
-            "function": {
-                "name": spec.name,
-                "description": spec.description,
-                "parameters": spec.parameters,
-            },
-        })
-
-    papers = create_papers_tool()
-    tools.append({
-        "type": "function",
-        "function": {
-            "name": papers.name,
-            "description": papers.description,
-            "parameters": papers.parameters,
-        },
-    })
-
-    for spec in create_github_tools():
-        if spec.name in ("github_read_file", "github_find_examples"):
-            tools.append({
+        tools.append(
+            {
                 "type": "function",
                 "function": {
                     "name": spec.name,
                     "description": spec.description,
                     "parameters": spec.parameters,
                 },
-            })
+            }
+        )
+
+    papers = create_papers_tool()
+    tools.append(
+        {
+            "type": "function",
+            "function": {
+                "name": papers.name,
+                "description": papers.description,
+                "parameters": papers.parameters,
+            },
+        }
+    )
+
+    for spec in create_github_tools():
+        if spec.name in ("github_read_file", "github_find_examples"):
+            tools.append(
+                {
+                    "type": "function",
+                    "function": {
+                        "name": spec.name,
+                        "description": spec.description,
+                        "parameters": spec.parameters,
+                    },
+                }
+            )
 
     return tools
 
diff --git a/backend/openmlr/tools/sandbox_tools.py b/backend/openmlr/tools/sandbox_tools.py
index 2792927..733b166 100644
--- a/backend/openmlr/tools/sandbox_tools.py
+++ b/backend/openmlr/tools/sandbox_tools.py
@@ -42,8 +42,14 @@ def create_sandbox_tools(sandbox_manager) -> list[ToolSpec]:
                             "host": {"type": "string", "description": "SSH hostname"},
                             "port": {"type": "integer", "description": "SSH port (default 22)"},
                             "username": {"type": "string", "description": "SSH username"},
-                            "key_path": {"type": "string", "description": "Path to SSH private key"},
-                            "gpu": {"type": "string", "description": "Modal GPU type (e.g. T4, A100)"},
+                            "key_path": {
+                                "type": "string",
+                                "description": "Path to SSH private key",
+                            },
+                            "gpu": {
+                                "type": "string",
+                                "description": "Modal GPU type (e.g. T4, A100)",
+                            },
                             "image": {"type": "string", "description": "Modal container image"},
                             "workdir": {"type": "string", "description": "Working directory"},
                         },
@@ -64,8 +70,14 @@ def create_sandbox_tools(sandbox_manager) -> list[ToolSpec]:
                 "type": "object",
                 "properties": {
                     "command": {"type": "string", "description": "Shell command to execute"},
-                    "timeout": {"type": "integer", "description": "Timeout in seconds (default 120, max 3600)"},
-                    "stream": {"type": "boolean", "description": "Stream output in real-time for long-running commands (default false)"},
+                    "timeout": {
+                        "type": "integer",
+                        "description": "Timeout in seconds (default 120, max 3600)",
+                    },
+                    "stream": {
+                        "type": "boolean",
+                        "description": "Stream output in real-time for long-running commands (default false)",
+                    },
                 },
                 "required": ["command"],
             },
@@ -143,8 +155,12 @@ async def _local_probe() -> str:
         lines.append("Python: unknown")
 
     try:
-        gpu = subprocess.run(["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader"],
-                           capture_output=True, text=True, timeout=5)
+        gpu = subprocess.run(
+            ["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader"],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
         if gpu.returncode == 0:
             lines.append(f"GPU: {gpu.stdout.strip()}")
         else:
@@ -158,7 +174,9 @@ async def _local_probe() -> str:
     return "\n".join(lines)
 
 
-async def _handle_create(sandbox_manager, provider: str, config: dict = None, session=None, **kwargs) -> tuple[str, bool]:
+async def _handle_create(
+    sandbox_manager, provider: str, config: dict = None, session=None, **kwargs
+) -> tuple[str, bool]:
     try:
         await sandbox_manager.create(provider, config or {})
         return f"Sandbox created: {provider} ({sandbox_manager.active_type})", True
@@ -166,11 +184,14 @@ async def _handle_create(sandbox_manager, provider: str, config: dict = None, se
         return f"Failed to create sandbox: {str(e)}", False
 
 
-async def _handle_exec(sandbox_manager, command: str, timeout: int = 120, stream: bool = False, session=None, **kwargs) -> tuple[str, bool]:
+async def _handle_exec(
+    sandbox_manager, command: str, timeout: int = 120, stream: bool = False, session=None, **kwargs
+) -> tuple[str, bool]:
     sandbox = sandbox_manager.get_active()
     if not sandbox:
         # Fall back to local execution
         from .local import _handle_bash
+
         return await _handle_bash(command=command, timeout=timeout)
 
     try:
@@ -201,6 +222,7 @@ async def _handle_read(sandbox_manager, path: str, session=None, **kwargs) -> tu
     sandbox = sandbox_manager.get_active()
     if not sandbox:
         from .local import _handle_read as local_read
+
         return await local_read(path=path)
 
     try:
@@ -210,10 +232,13 @@ async def _handle_read(sandbox_manager, path: str, session=None, **kwargs) -> tu
         return f"Read error: {str(e)}", False
 
 
-async def _handle_write(sandbox_manager, path: str, content: str, session=None, **kwargs) -> tuple[str, bool]:
+async def _handle_write(
+    sandbox_manager, path: str, content: str, session=None, **kwargs
+) -> tuple[str, bool]:
     sandbox = sandbox_manager.get_active()
     if not sandbox:
         from .local import _handle_write as local_write
+
         return await local_write(path=path, content=content)
 
     try:
diff --git a/backend/openmlr/tools/search.py b/backend/openmlr/tools/search.py
index da98a1b..e6b3bba 100644
--- a/backend/openmlr/tools/search.py
+++ b/backend/openmlr/tools/search.py
@@ -22,7 +22,10 @@ def create_search_tools() -> list[ToolSpec]:
                 "type": "object",
                 "properties": {
                     "query": {"type": "string", "description": "Search query"},
-                    "count": {"type": "integer", "description": "Number of results (default 5, max 20)"},
+                    "count": {
+                        "type": "integer",
+                        "description": "Number of results (default 5, max 20)",
+                    },
                 },
                 "required": ["query"],
             },
diff --git a/backend/openmlr/tools/writing.py b/backend/openmlr/tools/writing.py
index 2956541..8b3823a 100644
--- a/backend/openmlr/tools/writing.py
+++ b/backend/openmlr/tools/writing.py
@@ -17,9 +17,11 @@
 def _get_session_factory():
     """Get the correct async session factory for the current context."""
     from ..db.engine import _worker_engine, async_session
+
     eng = _worker_engine.get(None)
     if eng is not None:
         from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
+
         return async_sessionmaker(eng, class_=AsyncSession, expire_on_commit=False)
     return async_session
 
@@ -75,7 +77,8 @@ async def _save_project(conv_id: int, proj: dict) -> None:
     async with session_factory() as db:
         # Save project metadata (structure, bibliography, etc.)
         await ops.upsert_resource(
-            db, conv_id,
+            db,
+            conv_id,
             resource_id=f"paper-meta-{conv_id}",
             title=f"Paper Metadata: {proj.get('title', 'Untitled')}",
             resource_type="doc",
@@ -103,8 +106,12 @@ def create_writing_tool() -> ToolSpec:
                 "operation": {
                     "type": "string",
                     "enum": [
-                        "create_project", "set_outline", "write_section",
-                        "refine_section", "add_citation", "get_draft",
+                        "create_project",
+                        "set_outline",
+                        "write_section",
+                        "refine_section",
+                        "add_citation",
+                        "get_draft",
                         "list_sections",
                     ],
                     "description": "Which writing operation to perform",
@@ -345,7 +352,9 @@ def _get_draft_from_proj(proj: dict, author_info: dict | None = None) -> tuple[s
         if author_info.get("email"):
             author_lines.append(f"Email: {author_info['email']}")
         if author_info.get("orcid"):
-            author_lines.append(f"ORCID: [{author_info['orcid']}](https://orcid.org/{author_info['orcid']})")
+            author_lines.append(
+                f"ORCID: [{author_info['orcid']}](https://orcid.org/{author_info['orcid']})"
+            )
 
         if author_lines:
             lines.append("\n".join(author_lines))
@@ -374,7 +383,7 @@ def _get_draft_from_proj(proj: dict, author_info: dict | None = None) -> tuple[s
             author = c.get("author", "Unknown")
             title = c.get("title", "Untitled")
             year = c.get("year", "?")
-            lines.append(f"[{key}] {author}. \"{title}\". {year}.")
+            lines.append(f'[{key}] {author}. "{title}". {year}.')
 
     return "\n".join(lines), True
 
@@ -407,10 +416,12 @@ async def _emit_resources(session, conv_id: int) -> None:
             {"title": r.title, "url": r.url or "", "type": r.type, "id": r.resource_id}
             for r in resources
         ]
-        await session.emit(AgentEvent(
-            event_type="resources_update",
-            data={"resources": res_list},
-        ))
+        await session.emit(
+            AgentEvent(
+                event_type="resources_update",
+                data={"resources": res_list},
+            )
+        )
 
 
 def _count_sections(outline: list) -> int:
diff --git a/backend/tests/test_agent_loop.py b/backend/tests/test_agent_loop.py
index a430bf3..92380ff 100644
--- a/backend/tests/test_agent_loop.py
+++ b/backend/tests/test_agent_loop.py
@@ -32,10 +32,12 @@
 
 # ── Test fixtures ──────────────────────────────────────────
 
+
 @pytest.fixture
 def config():
     return AgentConfig(model_name="test/model", max_iterations=10, stream=False)
 
+
 @pytest.fixture
 def mock_session(config):
     session = MagicMock(spec=Session)
@@ -53,6 +55,7 @@ def mock_session(config):
     session.update_model = MagicMock()
     return session
 
+
 @pytest.fixture
 def mock_router():
     router = MagicMock(spec=ToolRouter)
@@ -66,6 +69,7 @@ def mock_router():
 
 # ── Tool Execution ─────────────────────────────────────────
 
+
 class TestExecuteTool:
     async def test_executes_tool_and_returns_output(self, mock_session, mock_router):
         tc = ToolCall(id="tc1", name="bash", arguments={"cmd": "ls"})
@@ -95,7 +99,7 @@ async def test_emits_both_state_changes(self, mock_session, mock_router):
         emitted_event_types = []
         for call in mock_session.emit.call_args_list:
             args = call[0]
-            if args and hasattr(args[0], 'event_type'):
+            if args and hasattr(args[0], "event_type"):
                 emitted_event_types.append(args[0].event_type)
         assert len(emitted_event_types) >= 2
         assert "tool_state_change" in emitted_event_types
@@ -103,6 +107,7 @@ async def test_emits_both_state_changes(self, mock_session, mock_router):
 
 # ── Approval Handling ──────────────────────────────────────
 
+
 class TestHandleApproval:
     async def test_approves_tool_calls(self, mock_session, mock_router):
         tcs = [ToolCall(id="tc1", name="bash", arguments={"cmd": "ls"})]
@@ -136,6 +141,7 @@ async def test_no_pending_approval_returns(self, mock_session, mock_router):
 
 # ── Undo ───────────────────────────────────────────────────
 
+
 class TestUndo:
     async def test_undo_calls_context_manager(self, mock_session):
         mock_session.context_manager.undo_last_turn.return_value = 3
@@ -148,6 +154,7 @@ async def test_undo_calls_context_manager(self, mock_session):
 
 # ── Compaction ─────────────────────────────────────────────
 
+
 class TestCompact:
     async def test_compact_calls_context_manager(self, mock_session):
         mock_session.context_manager.compact = AsyncMock(return_value="Summary of conversation")
@@ -160,12 +167,17 @@ async def test_compact_calls_context_manager(self, mock_session):
 
 # ── Run Agent ──────────────────────────────────────────────
 
+
 class TestRunAgent:
     async def test_runs_with_no_tool_calls(self, mock_session, mock_router):
         """Agent processes a message, LLM returns content with no tool calls."""
         mock_session.context_manager.get_messages.return_value = []
         mock_session.context_manager.needs_compaction.return_value = False
-        mock_session.context_manager.get_token_usage.return_value = {"used": 100, "max": 200000, "ratio": 0.0}
+        mock_session.context_manager.get_token_usage.return_value = {
+            "used": 100,
+            "max": 200000,
+            "ratio": 0.0,
+        }
         mock_session.config.stream = False
 
         with patch("openmlr.agent.loop.LLMProvider.generate") as mock_gen:
@@ -193,7 +205,11 @@ async def test_cancelled_stops_early(self, mock_session, mock_router):
         mock_session.is_cancelled.return_value = True
         mock_session.context_manager.get_messages.return_value = []
         mock_session.context_manager.needs_compaction.return_value = False
-        mock_session.context_manager.get_token_usage.return_value = {"used": 0, "max": 200000, "ratio": 0.0}
+        mock_session.context_manager.get_token_usage.return_value = {
+            "used": 0,
+            "max": 200000,
+            "ratio": 0.0,
+        }
 
         await _run_agent(mock_session, mock_router, "test")
 
@@ -209,7 +225,9 @@ async def test_delegates_to_run_agent(self, mock_session, mock_router):
 
         with patch("openmlr.agent.loop.LLMProvider.generate") as mock_gen:
             mock_gen.return_value = LLMResult(
-                content="Hello!", tool_calls=[], finish_reason="stop",
+                content="Hello!",
+                tool_calls=[],
+                finish_reason="stop",
             )
 
             await run_agent_turn(mock_session, mock_router, "Hi", mode="plan")
@@ -224,7 +242,9 @@ async def test_default_mode_is_execute(self, mock_session, mock_router):
 
         with patch("openmlr.agent.loop.LLMProvider.generate") as mock_gen:
             mock_gen.return_value = LLMResult(
-                content="Ok", tool_calls=[], finish_reason="stop",
+                content="Ok",
+                tool_calls=[],
+                finish_reason="stop",
             )
             await run_agent_turn(mock_session, mock_router, "test", mode="unknown")
 
@@ -233,12 +253,15 @@ async def test_default_mode_is_execute(self, mock_session, mock_router):
 
 # ── Submissions ────────────────────────────────────────────
 
+
 class TestSubmissionLoop:
     async def test_processes_user_input(self, mock_session, mock_router):
-        mock_session.submission_queue.get = AsyncMock(side_effect=[
-            Submission(op=OpType.USER_INPUT, data="hello"),
-            Submission(op=OpType.SHUTDOWN),
-        ])
+        mock_session.submission_queue.get = AsyncMock(
+            side_effect=[
+                Submission(op=OpType.USER_INPUT, data="hello"),
+                Submission(op=OpType.SHUTDOWN),
+            ]
+        )
         mock_session.context_manager.get_messages.return_value = []
         mock_session.context_manager.needs_compaction.return_value = False
         mock_session.context_manager.get_token_usage.return_value = {"ratio": 0.0}
@@ -246,17 +269,21 @@ async def test_processes_user_input(self, mock_session, mock_router):
 
         with patch("openmlr.agent.loop.LLMProvider.generate") as mock_gen:
             mock_gen.return_value = LLMResult(
-                content="Hi!", tool_calls=[], finish_reason="stop",
+                content="Hi!",
+                tool_calls=[],
+                finish_reason="stop",
             )
             await submission_loop(mock_session, mock_router)
 
         assert mock_session.emit.called
 
     async def test_processes_compact(self, mock_session, mock_router):
-        mock_session.submission_queue.get = AsyncMock(side_effect=[
-            Submission(op=OpType.COMPACT),
-            Submission(op=OpType.SHUTDOWN),
-        ])
+        mock_session.submission_queue.get = AsyncMock(
+            side_effect=[
+                Submission(op=OpType.COMPACT),
+                Submission(op=OpType.SHUTDOWN),
+            ]
+        )
         mock_session.context_manager.compact = AsyncMock(return_value="Summary")
 
         await submission_loop(mock_session, mock_router)
@@ -264,10 +291,12 @@ async def test_processes_compact(self, mock_session, mock_router):
         mock_session.context_manager.compact.assert_called_once()
 
     async def test_processes_undo(self, mock_session, mock_router):
-        mock_session.submission_queue.get = AsyncMock(side_effect=[
-            Submission(op=OpType.UNDO),
-            Submission(op=OpType.SHUTDOWN),
-        ])
+        mock_session.submission_queue.get = AsyncMock(
+            side_effect=[
+                Submission(op=OpType.UNDO),
+                Submission(op=OpType.SHUTDOWN),
+            ]
+        )
         mock_session.context_manager.undo_last_turn.return_value = 3
 
         await submission_loop(mock_session, mock_router)
@@ -275,10 +304,12 @@ async def test_processes_undo(self, mock_session, mock_router):
         mock_session.context_manager.undo_last_turn.assert_called_once()
 
     async def test_processes_interrupt(self, mock_session, mock_router):
-        mock_session.submission_queue.get = AsyncMock(side_effect=[
-            Submission(op=OpType.INTERRUPT),
-            Submission(op=OpType.SHUTDOWN),
-        ])
+        mock_session.submission_queue.get = AsyncMock(
+            side_effect=[
+                Submission(op=OpType.INTERRUPT),
+                Submission(op=OpType.SHUTDOWN),
+            ]
+        )
 
         await submission_loop(mock_session, mock_router)
 
@@ -294,6 +325,7 @@ async def test_shutdown_exits(self, mock_session, mock_router):
 
 # ── LLM Call Helpers ───────────────────────────────────────
 
+
 class TestNonStreamLLMCall:
     async def test_returns_llm_result(self, mock_session):
         mock_session.is_cancelled.return_value = False
@@ -302,7 +334,9 @@ async def test_returns_llm_result(self, mock_session):
 
         with patch("openmlr.agent.loop.LLMProvider.generate") as mock_gen:
             mock_gen.return_value = LLMResult(
-                content="Response", tool_calls=[], finish_reason="stop",
+                content="Response",
+                tool_calls=[],
+                finish_reason="stop",
             )
             result = await _non_stream_llm_call(mock_session, messages, tools)
 
@@ -314,7 +348,9 @@ async def test_emits_chunk_and_end(self, mock_session):
 
         with patch("openmlr.agent.loop.LLMProvider.generate") as mock_gen:
             mock_gen.return_value = LLMResult(
-                content="Output", tool_calls=[], finish_reason="stop",
+                content="Output",
+                tool_calls=[],
+                finish_reason="stop",
             )
             await _non_stream_llm_call(mock_session, [], [])
 
@@ -372,6 +408,7 @@ async def mock_stream(messages, config, tools):
 
 # ── Compact LLM Call ───────────────────────────────────────
 
+
 class TestCompactLLMCall:
     async def test_returns_content(self):
         messages = [{"role": "user", "content": "summarize"}]
@@ -379,7 +416,9 @@ async def test_returns_content(self):
 
         with patch("openmlr.agent.loop.LLMProvider.generate") as mock_gen:
             mock_gen.return_value = LLMResult(
-                content="A summary.", tool_calls=[], finish_reason="stop",
+                content="A summary.",
+                tool_calls=[],
+                finish_reason="stop",
             )
             result = await _compact_llm_call(messages, config)
 
diff --git a/backend/tests/test_app.py b/backend/tests/test_app.py
index 2a94fdc..5a574d6 100644
--- a/backend/tests/test_app.py
+++ b/backend/tests/test_app.py
@@ -23,6 +23,7 @@ async def test_app_routers_registered(self):
 
     async def test_cors_middleware_configured(self):
         from fastapi.middleware.cors import CORSMiddleware
+
         middlewares = [m.cls for m in app.user_middleware]
         assert CORSMiddleware in middlewares
 
@@ -34,11 +35,13 @@ async def test_global_exception_handler_configured(self):
 class TestMainModule:
     async def test_main_is_callable(self):
         from openmlr.main import main
+
         assert callable(main)
 
     async def test_main_contains_uvicorn_import(self):
         import inspect
 
         from openmlr.main import main
+
         source = inspect.getsource(main)
         assert "uvicorn" in source
diff --git a/backend/tests/test_auth.py b/backend/tests/test_auth.py
index 2903844..8a49e32 100644
--- a/backend/tests/test_auth.py
+++ b/backend/tests/test_auth.py
@@ -23,6 +23,7 @@ def _setup_db():
 
 # ── hash_password ──────────────────────────────────────────────────────────
 
+
 class TestHashPassword:
     def test_returns_valid_bcrypt_hash(self):
         hashed = hash_password("my_secret")
@@ -38,6 +39,7 @@ def test_different_calls_produce_different_hashes(self):
 
 # ── verify_password ────────────────────────────────────────────────────────
 
+
 class TestVerifyPassword:
     def test_correct_password_returns_true(self):
         hashed = hash_password("correct_horse")
@@ -55,6 +57,7 @@ def test_empty_password(self):
 
 # ── create_access_token ────────────────────────────────────────────────────
 
+
 class TestCreateAccessToken:
     def test_returns_string(self):
         token = create_access_token(user_id=1, username="alice")
@@ -76,6 +79,7 @@ def test_user_id_stored_as_string(self):
 
 # ── decode_access_token ────────────────────────────────────────────────────
 
+
 class TestDecodeAccessToken:
     def test_decodes_valid_token(self):
         token = create_access_token(user_id=7, username="dave")
diff --git a/backend/tests/test_celery_app.py b/backend/tests/test_celery_app.py
index 837b692..8b14fbb 100644
--- a/backend/tests/test_celery_app.py
+++ b/backend/tests/test_celery_app.py
@@ -6,36 +6,44 @@
 class TestCeleryApp:
     def test_is_celery_instance(self):
         from openmlr.celery_app import celery_app
+
         assert isinstance(celery_app, Celery)
 
     def test_has_correct_name(self):
         from openmlr.celery_app import celery_app
+
         assert celery_app.main == "openmlr"
 
     def test_config_has_serializer(self):
         from openmlr.celery_app import celery_app
+
         assert celery_app.conf.task_serializer == "json"
         assert "json" in celery_app.conf.accept_content
 
     def test_config_has_timezone(self):
         from openmlr.celery_app import celery_app
+
         assert celery_app.conf.timezone == "UTC"
         assert celery_app.conf.enable_utc is True
 
     def test_config_worker_settings(self):
         from openmlr.celery_app import celery_app
+
         assert celery_app.conf.worker_prefetch_multiplier == 1
         assert celery_app.conf.task_acks_late is True
 
     def test_config_result_expiry(self):
         from openmlr.celery_app import celery_app
+
         assert celery_app.conf.result_expires == 3600
 
     def test_task_routing_configured(self):
         from openmlr.celery_app import celery_app
+
         routes = celery_app.conf.task_routes
         assert routes is not None
 
     def test_get_celery_app(self):
         from openmlr.celery_app import celery_app, get_celery_app
+
         assert get_celery_app() is celery_app
diff --git a/backend/tests/test_compute.py b/backend/tests/test_compute.py
index d82d46d..5258e1f 100644
--- a/backend/tests/test_compute.py
+++ b/backend/tests/test_compute.py
@@ -19,6 +19,7 @@
 # Fixtures
 # ---------------------------------------------------------------------------
 
+
 @pytest.fixture
 def tmp_keys_dir(tmp_path):
     keys_dir = tmp_path / ".keys"
@@ -45,6 +46,7 @@ def workspace_manager(tmp_workspace_dir):
 # KeyManager
 # ---------------------------------------------------------------------------
 
+
 class TestKeyManager:
     def test_init_creates_dir(self, tmp_keys_dir, key_manager):
         assert tmp_keys_dir.exists()
@@ -95,7 +97,9 @@ def test_delete_nonexistent(self, key_manager):
         assert result is False
 
     def test_write_and_read_key(self, key_manager):
-        key_manager.write_key("id_manual", "-----BEGIN FAKE KEY-----\ndata\n-----END FAKE KEY-----\n")
+        key_manager.write_key(
+            "id_manual", "-----BEGIN FAKE KEY-----\ndata\n-----END FAKE KEY-----\n"
+        )
         content = key_manager.read_key("id_manual")
         assert "FAKE KEY" in content
 
@@ -132,6 +136,7 @@ def test_get_key_path(self, key_manager, tmp_keys_dir):
 # WorkspaceManager
 # ---------------------------------------------------------------------------
 
+
 class TestWorkspaceManager:
     def test_create_workspace(self, workspace_manager):
         path = workspace_manager.create_workspace("test-uuid-123")
@@ -216,6 +221,7 @@ def test_cleanup_workspaces_orphaned(self, workspace_manager):
 # ComputeCapabilities
 # ---------------------------------------------------------------------------
 
+
 class TestComputeCapabilities:
     def test_defaults(self):
         caps = ComputeCapabilities()
@@ -241,7 +247,14 @@ def test_from_dict(self):
             "platform": "Linux",
             "cpu_cores": 4,
             "gpu_available": True,
-            "gpu_info": [{"model": "RTX 4090", "vram_gb": 24, "cuda_version": "12.4", "driver_version": "545"}],
+            "gpu_info": [
+                {
+                    "model": "RTX 4090",
+                    "vram_gb": 24,
+                    "cuda_version": "12.4",
+                    "driver_version": "545",
+                }
+            ],
         }
         caps = ComputeCapabilities.from_dict(d)
         assert caps.platform == "Linux"
@@ -276,6 +289,7 @@ def test_roundtrip(self):
 # ComputeManager (validation)
 # ---------------------------------------------------------------------------
 
+
 class TestComputeManager:
     def test_validate_ssh_missing_host(self, key_manager):
         cm = ComputeManager(key_manager)
@@ -296,9 +310,14 @@ def test_validate_ssh_ok(self, key_manager):
 
     def test_validate_ssh_missing_key(self, key_manager):
         cm = ComputeManager(key_manager)
-        ok, err = cm.validate_node_config("ssh", {
-            "host": "x", "username": "u", "key_filename": "nonexistent",
-        })
+        ok, err = cm.validate_node_config(
+            "ssh",
+            {
+                "host": "x",
+                "username": "u",
+                "key_filename": "nonexistent",
+            },
+        )
         assert ok is False
         assert "not found" in err
 
@@ -330,6 +349,7 @@ def test_validate_unknown_type(self, key_manager):
 # SSHConnectionPool
 # ---------------------------------------------------------------------------
 
+
 class TestSSHConnectionPool:
     def test_singleton(self):
         pool1 = SSHConnectionPool.get_pool()
@@ -396,6 +416,7 @@ def test_remove(self):
 # Path traversal validation
 # ---------------------------------------------------------------------------
 
+
 class TestPathTraversal:
     def test_valid_relative_path(self, tmp_path):
         ws = tmp_path / "workspace"
@@ -429,6 +450,7 @@ def test_nested_valid_path(self, tmp_path):
 # ToolRouter compute context injection
 # ---------------------------------------------------------------------------
 
+
 class TestToolRouterContext:
     def test_set_context(self):
         router = ToolRouter()
@@ -444,8 +466,11 @@ async def handler(user_id: int = None, db=None, arg: str = "") -> tuple[str, boo
             return f"uid={user_id},db={db},arg={arg}", True
 
         from openmlr.agent.types import ToolSpec
+
         tool = ToolSpec(
-            name="ctx_test", description="test", parameters={"type": "object", "properties": {}},
+            name="ctx_test",
+            description="test",
+            parameters={"type": "object", "properties": {}},
             handler=handler,
         )
         router.register(tool)
@@ -460,6 +485,7 @@ async def handler(user_id: int = None, db=None, arg: str = "") -> tuple[str, boo
 # Plan mode allows compute tools
 # ---------------------------------------------------------------------------
 
+
 class TestPlanModeComputeTools:
     def test_compute_list_allowed(self):
         assert "compute_list" in MODE_TOOL_RESTRICTIONS["plan"]["allowed"]
@@ -478,9 +504,11 @@ def test_compute_select_not_in_plan(self):
 # Config redaction (routes/compute.py)
 # ---------------------------------------------------------------------------
 
+
 class TestConfigRedaction:
     def test_redact_password(self):
         from openmlr.routes.compute import _redact_config
+
         config = {"host": "example.com", "password": "secret123", "username": "user"}
         redacted = _redact_config(config)
         assert redacted["host"] == "example.com"
@@ -489,11 +517,13 @@ def test_redact_password(self):
 
     def test_redact_empty_config(self):
         from openmlr.routes.compute import _redact_config
+
         assert _redact_config({}) == {}
         assert _redact_config(None) == {}
 
     def test_redact_no_sensitive_fields(self):
         from openmlr.routes.compute import _redact_config
+
         config = {"host": "x", "port": 22}
         assert _redact_config(config) == config
 
@@ -502,6 +532,7 @@ def test_redact_no_sensitive_fields(self):
 # Routes (keys + compute) — integration via httpx
 # ---------------------------------------------------------------------------
 
+
 class TestKeyRoutes:
     async def test_list_keys_empty(self, auth_client):
         resp = await auth_client.get("/api/keys")
@@ -509,12 +540,15 @@ async def test_list_keys_empty(self, auth_client):
         assert resp.json()["keys"] == []
 
     async def test_generate_key(self, auth_client):
-        resp = await auth_client.post("/api/keys", json={
-            "action": "generate",
-            "filename": "id_test_route",
-            "algorithm": "ed25519",
-            "comment": "test",
-        })
+        resp = await auth_client.post(
+            "/api/keys",
+            json={
+                "action": "generate",
+                "filename": "id_test_route",
+                "algorithm": "ed25519",
+                "comment": "test",
+            },
+        )
         assert resp.status_code == 200
         data = resp.json()["key"]
         assert data["filename"] == "id_test_route"
@@ -522,18 +556,33 @@ async def test_generate_key(self, auth_client):
         assert data["fingerprint"].startswith("SHA256:")
 
     async def test_generate_duplicate(self, auth_client):
-        await auth_client.post("/api/keys", json={
-            "action": "generate", "filename": "id_dup", "algorithm": "ed25519",
-        })
-        resp = await auth_client.post("/api/keys", json={
-            "action": "generate", "filename": "id_dup", "algorithm": "ed25519",
-        })
+        await auth_client.post(
+            "/api/keys",
+            json={
+                "action": "generate",
+                "filename": "id_dup",
+                "algorithm": "ed25519",
+            },
+        )
+        resp = await auth_client.post(
+            "/api/keys",
+            json={
+                "action": "generate",
+                "filename": "id_dup",
+                "algorithm": "ed25519",
+            },
+        )
         assert resp.status_code == 409
 
     async def test_delete_key(self, auth_client):
-        await auth_client.post("/api/keys", json={
-            "action": "generate", "filename": "id_to_del", "algorithm": "ed25519",
-        })
+        await auth_client.post(
+            "/api/keys",
+            json={
+                "action": "generate",
+                "filename": "id_to_del",
+                "algorithm": "ed25519",
+            },
+        )
         resp = await auth_client.delete("/api/keys/id_to_del")
         assert resp.status_code == 200
         assert resp.json()["ok"] is True
@@ -547,9 +596,13 @@ async def test_create_key_missing_filename(self, auth_client):
         assert resp.status_code == 400
 
     async def test_create_key_invalid_action(self, auth_client):
-        resp = await auth_client.post("/api/keys", json={
-            "action": "nope", "filename": "id_x",
-        })
+        resp = await auth_client.post(
+            "/api/keys",
+            json={
+                "action": "nope",
+                "filename": "id_x",
+            },
+        )
         assert resp.status_code == 400
 
     async def test_unauthenticated_keys(self, client):
@@ -564,11 +617,14 @@ async def test_list_empty(self, auth_client):
         assert resp.json()["nodes"] == []
 
     async def test_create_local_node(self, auth_client):
-        resp = await auth_client.post("/api/compute/nodes", json={
-            "name": "My Laptop",
-            "type": "local",
-            "config": {},
-        })
+        resp = await auth_client.post(
+            "/api/compute/nodes",
+            json={
+                "name": "My Laptop",
+                "type": "local",
+                "config": {},
+            },
+        )
         assert resp.status_code == 200
         node = resp.json()["node"]
         assert node["name"] == "My Laptop"
@@ -576,53 +632,91 @@ async def test_create_local_node(self, auth_client):
         assert node["health_status"] == "unknown"
 
     async def test_create_duplicate_name(self, auth_client):
-        await auth_client.post("/api/compute/nodes", json={
-            "name": "Dup", "type": "local", "config": {},
-        })
-        resp = await auth_client.post("/api/compute/nodes", json={
-            "name": "Dup", "type": "local", "config": {},
-        })
+        await auth_client.post(
+            "/api/compute/nodes",
+            json={
+                "name": "Dup",
+                "type": "local",
+                "config": {},
+            },
+        )
+        resp = await auth_client.post(
+            "/api/compute/nodes",
+            json={
+                "name": "Dup",
+                "type": "local",
+                "config": {},
+            },
+        )
         assert resp.status_code == 409
 
     async def test_create_invalid_type(self, auth_client):
-        resp = await auth_client.post("/api/compute/nodes", json={
-            "name": "Bad", "type": "kubernetes", "config": {},
-        })
+        resp = await auth_client.post(
+            "/api/compute/nodes",
+            json={
+                "name": "Bad",
+                "type": "kubernetes",
+                "config": {},
+            },
+        )
         assert resp.status_code == 400
 
     async def test_get_node(self, auth_client):
-        create_resp = await auth_client.post("/api/compute/nodes", json={
-            "name": "Get Test", "type": "local", "config": {},
-        })
+        create_resp = await auth_client.post(
+            "/api/compute/nodes",
+            json={
+                "name": "Get Test",
+                "type": "local",
+                "config": {},
+            },
+        )
         node_id = create_resp.json()["node"]["id"]
         resp = await auth_client.get(f"/api/compute/nodes/{node_id}")
         assert resp.status_code == 200
         assert resp.json()["node"]["name"] == "Get Test"
 
     async def test_update_node(self, auth_client):
-        create_resp = await auth_client.post("/api/compute/nodes", json={
-            "name": "Update Test", "type": "local", "config": {},
-        })
+        create_resp = await auth_client.post(
+            "/api/compute/nodes",
+            json={
+                "name": "Update Test",
+                "type": "local",
+                "config": {},
+            },
+        )
         node_id = create_resp.json()["node"]["id"]
-        resp = await auth_client.put(f"/api/compute/nodes/{node_id}", json={
-            "name": "Updated Name",
-        })
+        resp = await auth_client.put(
+            f"/api/compute/nodes/{node_id}",
+            json={
+                "name": "Updated Name",
+            },
+        )
         assert resp.status_code == 200
         assert resp.json()["node"]["name"] == "Updated Name"
 
     async def test_delete_node(self, auth_client):
-        create_resp = await auth_client.post("/api/compute/nodes", json={
-            "name": "Delete Test", "type": "local", "config": {},
-        })
+        create_resp = await auth_client.post(
+            "/api/compute/nodes",
+            json={
+                "name": "Delete Test",
+                "type": "local",
+                "config": {},
+            },
+        )
         node_id = create_resp.json()["node"]["id"]
         resp = await auth_client.delete(f"/api/compute/nodes/{node_id}")
         assert resp.status_code == 200
         assert resp.json()["ok"] is True
 
     async def test_set_default(self, auth_client):
-        create_resp = await auth_client.post("/api/compute/nodes", json={
-            "name": "Default Test", "type": "local", "config": {},
-        })
+        create_resp = await auth_client.post(
+            "/api/compute/nodes",
+            json={
+                "name": "Default Test",
+                "type": "local",
+                "config": {},
+            },
+        )
         node_id = create_resp.json()["node"]["id"]
         resp = await auth_client.post(f"/api/compute/nodes/{node_id}/set-default")
         assert resp.status_code == 200
@@ -632,20 +726,28 @@ async def test_set_default(self, auth_client):
         assert get_resp.json()["node"]["is_default"] is True
 
     async def test_config_redacted_in_response(self, auth_client):
-        resp = await auth_client.post("/api/compute/nodes", json={
-            "name": "Redact Test",
-            "type": "ssh",
-            "config": {"host": "x", "username": "u", "password": "secret"},
-        })
+        resp = await auth_client.post(
+            "/api/compute/nodes",
+            json={
+                "name": "Redact Test",
+                "type": "ssh",
+                "config": {"host": "x", "username": "u", "password": "secret"},
+            },
+        )
         assert resp.status_code == 200
         node = resp.json()["node"]
         assert node["config"]["password"] == "***"
         assert node["config"]["host"] == "x"
 
     async def test_test_local_node(self, auth_client):
-        create_resp = await auth_client.post("/api/compute/nodes", json={
-            "name": "Test Local", "type": "local", "config": {},
-        })
+        create_resp = await auth_client.post(
+            "/api/compute/nodes",
+            json={
+                "name": "Test Local",
+                "type": "local",
+                "config": {},
+            },
+        )
         node_id = create_resp.json()["node"]["id"]
         resp = await auth_client.post(f"/api/compute/nodes/{node_id}/test")
         assert resp.status_code == 200
@@ -653,18 +755,24 @@ async def test_test_local_node(self, auth_client):
         assert resp.json()["ok"] is True
 
     async def test_test_config_endpoint(self, auth_client):
-        resp = await auth_client.post("/api/compute/test", json={
-            "type": "local",
-            "config": {},
-        })
+        resp = await auth_client.post(
+            "/api/compute/test",
+            json={
+                "type": "local",
+                "config": {},
+            },
+        )
         assert resp.status_code == 200
         assert resp.json()["ok"] is True
 
     async def test_test_config_invalid_type(self, auth_client):
-        resp = await auth_client.post("/api/compute/test", json={
-            "type": "kubernetes",
-            "config": {},
-        })
+        resp = await auth_client.post(
+            "/api/compute/test",
+            json={
+                "type": "kubernetes",
+                "config": {},
+            },
+        )
         assert resp.status_code == 200
         assert resp.json()["ok"] is False
 
@@ -677,9 +785,11 @@ async def test_unauthenticated(self, client):
 # System prompt includes compute_env
 # ---------------------------------------------------------------------------
 
+
 class TestSystemPromptCompute:
     def test_prompt_includes_compute_env(self):
         from openmlr.agent.prompts import build_system_prompt
+
         prompt = build_system_prompt(
             tool_specs=[],
             compute_env="## Active Compute: TestNode (ssh)\n- CPU: 8 cores",
@@ -689,5 +799,6 @@ def test_prompt_includes_compute_env(self):
 
     def test_prompt_without_compute_env(self):
         from openmlr.agent.prompts import build_system_prompt
+
         prompt = build_system_prompt(tool_specs=[], compute_env="")
         assert "Active Compute" not in prompt
diff --git a/backend/tests/test_config.py b/backend/tests/test_config.py
index ed4699f..f2d676a 100644
--- a/backend/tests/test_config.py
+++ b/backend/tests/test_config.py
@@ -9,6 +9,7 @@
 # AgentConfig defaults
 # ---------------------------------------------------------------------------
 
+
 class TestAgentConfigDefaults:
     def test_model_name_default_empty(self):
         cfg = AgentConfig()
@@ -67,6 +68,7 @@ def test_mcp_servers_not_shared_across_instances(self):
 # get_model_max_tokens — known models
 # ---------------------------------------------------------------------------
 
+
 class TestGetModelMaxTokensKnown:
     @pytest.mark.parametrize(
         "model_name, expected",
@@ -104,6 +106,7 @@ def test_model_name_with_prefix(self):
 # get_model_max_tokens — unknown models
 # ---------------------------------------------------------------------------
 
+
 class TestGetModelMaxTokensUnknown:
     def test_unknown_model_returns_default(self):
         assert get_model_max_tokens("my-custom-model") == 200_000
@@ -119,6 +122,7 @@ def test_gibberish_returns_default(self):
 # estimate_tokens (from context.py)
 # ---------------------------------------------------------------------------
 
+
 class TestEstimateTokens:
     def test_empty_string_returns_one(self):
         """Empty string yields at least 1 (max(1, 0//4))."""
diff --git a/backend/tests/test_context.py b/backend/tests/test_context.py
index 3f51c6a..b498b93 100644
--- a/backend/tests/test_context.py
+++ b/backend/tests/test_context.py
@@ -16,7 +16,7 @@ def _setup_db():
 def _make_config(**overrides) -> AgentConfig:
     """Build an AgentConfig with sensible test defaults."""
     defaults = {
-        "model_name": "gpt-4o",          # 128 000 max tokens
+        "model_name": "gpt-4o",  # 128 000 max tokens
         "compact_threshold_ratio": 0.90,
         "untouched_messages": 5,
     }
@@ -26,6 +26,7 @@ def _make_config(**overrides) -> AgentConfig:
 
 # ── estimate_tokens ────────────────────────────────────────────────────────
 
+
 class TestEstimateTokens:
     def test_returns_roughly_len_over_4(self):
         text = "a" * 100
@@ -44,6 +45,7 @@ def test_longer_text(self):
 
 # ── ContextManager.add_message ─────────────────────────────────────────────
 
+
 class TestAddMessage:
     def test_adds_message_object(self):
         cm = ContextManager(config=_make_config())
@@ -68,13 +70,15 @@ def test_tracks_token_count(self):
 
     def test_dict_with_tool_calls(self):
         cm = ContextManager(config=_make_config())
-        cm.add_message({
-            "role": "assistant",
-            "content": "",
-            "tool_calls": [
-                {"id": "tc1", "name": "bash", "arguments": {"cmd": "ls"}},
-            ],
-        })
+        cm.add_message(
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {"id": "tc1", "name": "bash", "arguments": {"cmd": "ls"}},
+                ],
+            }
+        )
         assert len(cm.messages) == 1
         assert cm.messages[0].tool_calls is not None
         assert cm.messages[0].tool_calls[0].name == "bash"
@@ -82,6 +86,7 @@ def test_dict_with_tool_calls(self):
 
 # ── ContextManager.get_messages ────────────────────────────────────────────
 
+
 class TestGetMessages:
     def test_returns_messages_in_order(self):
         cm = ContextManager(config=_make_config())
@@ -125,9 +130,9 @@ def test_serialises_tool_calls(self):
 
     def test_serialises_tool_call_id(self):
         cm = ContextManager(config=_make_config())
-        cm.add_message(Message(
-            role="tool", content="file contents", tool_call_id="tc1", name="read"
-        ))
+        cm.add_message(
+            Message(role="tool", content="file contents", tool_call_id="tc1", name="read")
+        )
         result = cm.get_messages(include_system=False)
         assert result[0]["tool_call_id"] == "tc1"
         assert result[0]["name"] == "read"
@@ -135,6 +140,7 @@ def test_serialises_tool_call_id(self):
 
 # ── ContextManager.needs_compaction ────────────────────────────────────────
 
+
 class TestNeedsCompaction:
     def test_returns_false_when_under_threshold(self):
         cm = ContextManager(config=_make_config())
@@ -143,7 +149,7 @@ def test_returns_false_when_under_threshold(self):
 
     def test_returns_true_when_over_threshold(self):
         cfg = _make_config(
-            model_name="gpt-4o",          # 128 000 max tokens
+            model_name="gpt-4o",  # 128 000 max tokens
             compact_threshold_ratio=0.90,  # threshold = 115 200
         )
         cm = ContextManager(config=cfg)
@@ -167,6 +173,7 @@ def test_boundary_just_over(self):
 
 # ── ContextManager.undo_last_turn ──────────────────────────────────────────
 
+
 class TestUndoLastTurn:
     def test_removes_assistant_and_user_messages(self):
         cm = ContextManager(config=_make_config())
@@ -216,6 +223,7 @@ def test_noop_on_empty(self):
 
 # ── ContextManager.clear ───────────────────────────────────────────────────
 
+
 class TestClear:
     def test_empties_messages(self):
         cm = ContextManager(config=_make_config())
diff --git a/backend/tests/test_db_engine.py b/backend/tests/test_db_engine.py
index 95b71a9..aa7d59c 100644
--- a/backend/tests/test_db_engine.py
+++ b/backend/tests/test_db_engine.py
@@ -6,21 +6,25 @@
 class TestEngineConfig:
     def test_database_url_exists(self):
         from openmlr.db.engine import DATABASE_URL
+
         assert DATABASE_URL is not None
         assert len(DATABASE_URL) > 0
 
     def test_engine_created(self):
         from openmlr.db.engine import engine
+
         assert engine is not None
 
     def test_async_session_created(self):
         from openmlr.db.engine import async_session
+
         assert async_session is not None
 
     def test_worker_engine_context_var(self):
         from contextvars import ContextVar
 
         from openmlr.db.engine import _worker_engine
+
         assert isinstance(_worker_engine, ContextVar)
 
 
@@ -28,8 +32,10 @@ def test_worker_engine_context_var(self):
 class TestGetWorkerSession:
     async def test_returns_sessionmaker(self):
         from openmlr.db.engine import get_worker_session
+
         result = get_worker_session()
         from sqlalchemy.ext.asyncio import async_sessionmaker
+
         assert isinstance(result, async_sessionmaker)
 
 
@@ -37,6 +43,7 @@ async def test_returns_sessionmaker(self):
 class TestGetDB:
     async def test_yields_session(self):
         from openmlr.db.engine import get_db
+
         sessions = []
         async for s in get_db():
             sessions.append(s)
diff --git a/backend/tests/test_db_operations.py b/backend/tests/test_db_operations.py
index c53050a..1c595f9 100644
--- a/backend/tests/test_db_operations.py
+++ b/backend/tests/test_db_operations.py
@@ -18,7 +18,9 @@ async def test_create_conversation(self, db_session: AsyncSession, test_user):
         assert conv.mode == "general"
 
     async def test_create_conversation_with_model(self, db_session: AsyncSession, test_user):
-        conv = await ops.create_conversation(db_session, test_user.id, model="gpt-4o", mode="coding")
+        conv = await ops.create_conversation(
+            db_session, test_user.id, model="gpt-4o", mode="coding"
+        )
         assert conv.model == "gpt-4o"
         assert conv.mode == "coding"
 
@@ -87,6 +89,7 @@ async def test_conversations_isolated_by_user(self, db_session: AsyncSession, te
         # Create another user
         from openmlr.auth.security import hash_password
         from openmlr.db.models import User
+
         user2 = User(username="user2", password_hash=hash_password("pwd"), is_active=True)
         db_session.add(user2)
         await db_session.flush()
@@ -116,7 +119,10 @@ async def test_add_message(self, db_session: AsyncSession):
 
     async def test_add_message_with_metadata(self, db_session: AsyncSession):
         msg = await ops.add_message(
-            db_session, self.conv.id, "assistant", "Done",
+            db_session,
+            self.conv.id,
+            "assistant",
+            "Done",
             metadata={"tool": "search", "duration": 1.5},
         )
         assert msg.meta == {"tool": "search", "duration": 1.5}
@@ -214,12 +220,20 @@ async def test_upsert_tasks_create(self, db_session: AsyncSession):
         assert result[1].order_index == 1
 
     async def test_upsert_tasks_replace(self, db_session: AsyncSession):
-        await ops.upsert_conversation_tasks(db_session, self.conv.id, [
-            {"title": "Old Task"},
-        ])
-        result = await ops.upsert_conversation_tasks(db_session, self.conv.id, [
-            {"title": "New Task"},
-        ])
+        await ops.upsert_conversation_tasks(
+            db_session,
+            self.conv.id,
+            [
+                {"title": "Old Task"},
+            ],
+        )
+        result = await ops.upsert_conversation_tasks(
+            db_session,
+            self.conv.id,
+            [
+                {"title": "New Task"},
+            ],
+        )
         assert len(result) == 1
         assert result[0].title == "New Task"
 
@@ -228,28 +242,40 @@ async def test_get_tasks_empty(self, db_session: AsyncSession):
         assert tasks == []
 
     async def test_get_tasks(self, db_session: AsyncSession):
-        await ops.upsert_conversation_tasks(db_session, self.conv.id, [
-            {"title": "T1", "status": "pending", "priority": "high"},
-            {"title": "T2", "status": "completed"},
-        ])
+        await ops.upsert_conversation_tasks(
+            db_session,
+            self.conv.id,
+            [
+                {"title": "T1", "status": "pending", "priority": "high"},
+                {"title": "T2", "status": "completed"},
+            ],
+        )
         tasks = await ops.get_conversation_tasks(db_session, self.conv.id)
         assert len(tasks) == 2
         assert tasks[0].title == "T1"
         assert tasks[0].priority == "high"
 
     async def test_update_task_status(self, db_session: AsyncSession):
-        await ops.upsert_conversation_tasks(db_session, self.conv.id, [
-            {"title": "Do this", "status": "pending"},
-        ])
+        await ops.upsert_conversation_tasks(
+            db_session,
+            self.conv.id,
+            [
+                {"title": "Do this", "status": "pending"},
+            ],
+        )
         ok = await ops.update_task_status(db_session, self.conv.id, 0, "completed")
         assert ok is True
         tasks = await ops.get_conversation_tasks(db_session, self.conv.id)
         assert tasks[0].status == "completed"
 
     async def test_update_task_status_out_of_range(self, db_session: AsyncSession):
-        await ops.upsert_conversation_tasks(db_session, self.conv.id, [
-            {"title": "Only one"},
-        ])
+        await ops.upsert_conversation_tasks(
+            db_session,
+            self.conv.id,
+            [
+                {"title": "Only one"},
+            ],
+        )
         ok = await ops.update_task_status(db_session, self.conv.id, 5, "completed")
         assert ok is False
 
@@ -261,8 +287,11 @@ async def _conv(self, db_session: AsyncSession, test_user):
 
     async def test_add_resource(self, db_session: AsyncSession):
         res = await ops.add_conversation_resource(
-            db_session, self.conv.id,
-            title="Paper 1", resource_type="paper", url="https://example.com",
+            db_session,
+            self.conv.id,
+            title="Paper 1",
+            resource_type="paper",
+            url="https://example.com",
         )
         assert res.id is not None
         assert res.title == "Paper 1"
@@ -270,24 +299,37 @@ async def test_add_resource(self, db_session: AsyncSession):
         assert res.url == "https://example.com"
 
     async def test_get_resources(self, db_session: AsyncSession):
-        await ops.add_conversation_resource(db_session, self.conv.id, title="R1", resource_type="doc")
-        await ops.add_conversation_resource(db_session, self.conv.id, title="R2", resource_type="code")
+        await ops.add_conversation_resource(
+            db_session, self.conv.id, title="R1", resource_type="doc"
+        )
+        await ops.add_conversation_resource(
+            db_session, self.conv.id, title="R2", resource_type="code"
+        )
         resources = await ops.get_conversation_resources(db_session, self.conv.id)
         assert len(resources) == 2
 
     async def test_get_resource_by_id(self, db_session: AsyncSession):
         res = await ops.add_conversation_resource(
-            db_session, self.conv.id, title="Test", resource_type="doc",
+            db_session,
+            self.conv.id,
+            title="Test",
+            resource_type="doc",
         )
         found = await ops.get_resource_by_id(db_session, res.resource_id)
         assert found is not None
         assert found.title == "Test"
 
     async def test_upsert_resources_replace(self, db_session: AsyncSession):
-        await ops.add_conversation_resource(db_session, self.conv.id, title="Old", resource_type="doc")
-        result = await ops.upsert_conversation_resources(db_session, self.conv.id, [
-            {"title": "New", "type": "doc"},
-        ])
+        await ops.add_conversation_resource(
+            db_session, self.conv.id, title="Old", resource_type="doc"
+        )
+        result = await ops.upsert_conversation_resources(
+            db_session,
+            self.conv.id,
+            [
+                {"title": "New", "type": "doc"},
+            ],
+        )
         assert len(result) == 1
         assert result[0].title == "New"
 
@@ -304,7 +346,10 @@ async def test_upsert_plan_resource_update(self, db_session: AsyncSession):
 
     async def test_upsert_paper_resource(self, db_session: AsyncSession):
         res = await ops.upsert_paper_resource(
-            db_session, self.conv.id, "My Paper", "## Abstract\nContent",
+            db_session,
+            self.conv.id,
+            "My Paper",
+            "## Abstract\nContent",
         )
         assert res.title == "My Paper"
         assert res.type == "paper"
@@ -312,8 +357,11 @@ async def test_upsert_paper_resource(self, db_session: AsyncSession):
 
     async def test_upsert_resource_create(self, db_session: AsyncSession):
         res = await ops.upsert_resource(
-            db_session, self.conv.id,
-            resource_id="custom-id", title="Custom", resource_type="report",
+            db_session,
+            self.conv.id,
+            resource_id="custom-id",
+            title="Custom",
+            resource_type="report",
             content="Report content",
         )
         assert res.resource_id == "custom-id"
@@ -321,12 +369,20 @@ async def test_upsert_resource_create(self, db_session: AsyncSession):
 
     async def test_upsert_resource_update(self, db_session: AsyncSession):
         await ops.upsert_resource(
-            db_session, self.conv.id,
-            resource_id="rid", title="Old Title", resource_type="doc", content="Old",
+            db_session,
+            self.conv.id,
+            resource_id="rid",
+            title="Old Title",
+            resource_type="doc",
+            content="Old",
         )
         res = await ops.upsert_resource(
-            db_session, self.conv.id,
-            resource_id="rid", title="New Title", resource_type="doc", content="New",
+            db_session,
+            self.conv.id,
+            resource_id="rid",
+            title="New Title",
+            resource_type="doc",
+            content="New",
         )
         assert res.title == "New Title"
         assert res.content == "New"
@@ -339,7 +395,10 @@ async def _conv(self, db_session: AsyncSession, test_user):
 
     async def test_create_agent_job(self, db_session: AsyncSession):
         job = await ops.create_agent_job(
-            db_session, self.conv.id, self.conv.user_id, "Process this",
+            db_session,
+            self.conv.id,
+            self.conv.user_id,
+            "Process this",
         )
         assert job.job_id is not None
         assert job.status == "queued"
@@ -347,7 +406,10 @@ async def test_create_agent_job(self, db_session: AsyncSession):
 
     async def test_get_agent_job(self, db_session: AsyncSession):
         job = await ops.create_agent_job(
-            db_session, self.conv.id, self.conv.user_id, "Test",
+            db_session,
+            self.conv.id,
+            self.conv.user_id,
+            "Test",
         )
         found = await ops.get_agent_job(db_session, job.job_id)
         assert found is not None
diff --git a/backend/tests/test_dependencies.py b/backend/tests/test_dependencies.py
index c5fc776..dc2562c 100644
--- a/backend/tests/test_dependencies.py
+++ b/backend/tests/test_dependencies.py
@@ -9,6 +9,7 @@
 class TestGetConfig:
     async def test_get_config_returns_agent_config(self):
         from openmlr.dependencies import get_config
+
         config = get_config()
         assert config is not None
         assert hasattr(config, "model_name")
@@ -16,6 +17,7 @@ async def test_get_config_returns_agent_config(self):
 
     async def test_get_config_is_cached(self):
         from openmlr.dependencies import get_config
+
         config1 = get_config()
         config2 = get_config()
         assert config1 is config2
@@ -41,6 +43,7 @@ async def test_invalid_token_returns_401(self, client: AsyncClient):
 class TestGetDB:
     async def test_db_session_yielded(self, client: AsyncClient):
         from openmlr.dependencies import get_db
+
         sessions = []
         async for s in get_db():
             sessions.append(s)
diff --git a/backend/tests/test_doom_loop.py b/backend/tests/test_doom_loop.py
index e9a4f66..83fdf0a 100644
--- a/backend/tests/test_doom_loop.py
+++ b/backend/tests/test_doom_loop.py
@@ -17,14 +17,13 @@ def _assistant_with_tool(name: str, args: dict | None = None) -> Message:
     return Message(
         role="assistant",
         content="",
-        tool_calls=[
-            ToolCall(id=f"call_{name}", name=name, arguments=args or {})
-        ],
+        tool_calls=[ToolCall(id=f"call_{name}", name=name, arguments=args or {})],
     )
 
 
 # ── Edge cases / no detection ──────────────────────────────────────────────
 
+
 class TestNoDetection:
     def test_returns_none_for_empty_list(self):
         assert detect_doom_loop([]) is None
@@ -57,6 +56,7 @@ def test_ignores_non_assistant_messages(self):
 
 # ── Pattern 1: identical consecutive calls ─────────────────────────────────
 
+
 class TestIdenticalConsecutive:
     def test_detects_3_identical_calls(self):
         msgs = [
@@ -93,6 +93,7 @@ def test_different_args_are_not_identical(self):
 
 # ── Pattern 2: repeating sequences ─────────────────────────────────────────
 
+
 class TestRepeatingSequences:
     def test_detects_AB_AB_pattern(self):
         msgs = [
diff --git a/backend/tests/test_event_bus.py b/backend/tests/test_event_bus.py
index 628ae22..a38251b 100644
--- a/backend/tests/test_event_bus.py
+++ b/backend/tests/test_event_bus.py
@@ -11,6 +11,7 @@
 # Fixtures
 # ---------------------------------------------------------------------------
 
+
 @pytest.fixture
 def bus() -> EventBus:
     return EventBus()
@@ -20,6 +21,7 @@ def bus() -> EventBus:
 # subscribe
 # ---------------------------------------------------------------------------
 
+
 class TestSubscribe:
     def test_subscribe_returns_queue(self, bus: EventBus):
         queue = bus.subscribe()
@@ -41,6 +43,7 @@ def test_subscribe_queue_has_maxsize(self, bus: EventBus):
 # unsubscribe
 # ---------------------------------------------------------------------------
 
+
 class TestUnsubscribe:
     def test_unsubscribe_removes_queue(self, bus: EventBus):
         q = bus.subscribe()
@@ -66,6 +69,7 @@ def test_unsubscribe_only_removes_target(self, bus: EventBus):
 # subscriber_count
 # ---------------------------------------------------------------------------
 
+
 class TestSubscriberCount:
     def test_starts_at_zero(self, bus: EventBus):
         assert bus.subscriber_count == 0
@@ -87,6 +91,7 @@ def test_decrements_on_unsubscribe(self, bus: EventBus):
 # broadcast
 # ---------------------------------------------------------------------------
 
+
 class TestBroadcast:
     @pytest.mark.asyncio
     async def test_broadcast_dict_event(self, bus: EventBus):
@@ -158,6 +163,7 @@ async def test_broadcast_agent_event_with_none_data(self, bus: EventBus):
 # AgentEvent serialization (to_sse)
 # ---------------------------------------------------------------------------
 
+
 class TestAgentEventSerialization:
     def test_to_sse_format(self):
         event = AgentEvent(event_type="done", data={"result": 42})
@@ -166,7 +172,8 @@ def test_to_sse_format(self):
         assert sse.endswith("\n\n")
 
         import json
-        payload = json.loads(sse[len("data: "):-2])
+
+        payload = json.loads(sse[len("data: ") : -2])
         assert payload["event_type"] == "done"
         assert payload["data"]["result"] == 42
 
@@ -174,5 +181,6 @@ def test_to_sse_none_data(self):
         event = AgentEvent(event_type="ping")
         sse = event.to_sse()
         import json
-        payload = json.loads(sse[len("data: "):-2])
+
+        payload = json.loads(sse[len("data: ") : -2])
         assert payload["data"] is None
diff --git a/backend/tests/test_job_manager.py b/backend/tests/test_job_manager.py
index 401207e..10854a9 100644
--- a/backend/tests/test_job_manager.py
+++ b/backend/tests/test_job_manager.py
@@ -21,10 +21,13 @@ async def test_get_job_manager_singleton(self):
         jm2 = get_job_manager()
         assert jm1 is jm2
 
-    async def test_create_job_disabled_by_default(self, db_session: AsyncSession, conversation, test_user):
+    async def test_create_job_disabled_by_default(
+        self, db_session: AsyncSession, conversation, test_user
+    ):
         jm = JobManager()
         # USE_BACKGROUND_JOBS is controlled by env — test without making assumptions
         from openmlr.services.job_manager import USE_BACKGROUND_JOBS
+
         job = await jm.create_job(
             db=db_session,
             conversation_id=conversation.id,
@@ -44,7 +47,10 @@ async def test_get_job_status_nonexistent(self, db_session: AsyncSession):
 
     async def test_get_job_status_from_db(self, db_session: AsyncSession, conversation, test_user):
         job = await ops.create_agent_job(
-            db_session, conversation.id, test_user.id, "Test",
+            db_session,
+            conversation.id,
+            test_user.id,
+            "Test",
         )
         jm = JobManager()
         status = await jm.get_job_status(db_session, job.job_id)
@@ -76,7 +82,9 @@ async def test_cancel_nonexistent_job(self, db_session: AsyncSession):
         cancelled = await jm.cancel_job(db_session, "nonexistent")
         assert cancelled is False
 
-    async def test_cancel_already_running_job(self, db_session: AsyncSession, conversation, test_user):
+    async def test_cancel_already_running_job(
+        self, db_session: AsyncSession, conversation, test_user
+    ):
         job = await ops.create_agent_job(db_session, conversation.id, test_user.id, "Test")
         await ops.update_job_status(db_session, job.job_id, "running")
         jm = JobManager()
diff --git a/backend/tests/test_llm.py b/backend/tests/test_llm.py
index e636ab1..2cf4d9a 100644
--- a/backend/tests/test_llm.py
+++ b/backend/tests/test_llm.py
@@ -6,12 +6,15 @@
 
 
 class TestGetApiKey:
-    @pytest.mark.parametrize("model_name,env_var", [
-        ("openai/gpt-4o", "OPENAI_API_KEY"),
-        ("anthropic/claude-sonnet-4", "ANTHROPIC_API_KEY"),
-        ("openrouter/openai/gpt-4o", "OPENROUTER_API_KEY"),
-        ("opencode-go/glm-5.1", "OPENCODE_GO_API_KEY"),
-    ])
+    @pytest.mark.parametrize(
+        "model_name,env_var",
+        [
+            ("openai/gpt-4o", "OPENAI_API_KEY"),
+            ("anthropic/claude-sonnet-4", "ANTHROPIC_API_KEY"),
+            ("openrouter/openai/gpt-4o", "OPENROUTER_API_KEY"),
+            ("opencode-go/glm-5.1", "OPENCODE_GO_API_KEY"),
+        ],
+    )
     def test_model_prefix_maps_to_env_var(self, monkeypatch, model_name, env_var):
         monkeypatch.setenv(env_var, f"test-key-{env_var}")
         key = LLMProvider._get_api_key(model_name)
@@ -47,15 +50,18 @@ def test_fallback_to_any_available_key(self, monkeypatch):
 
 
 class TestNormalizeModel:
-    @pytest.mark.parametrize("full_name,normalized", [
-        ("openai/gpt-4o", "gpt-4o"),
-        ("anthropic/claude-sonnet-4", "claude-sonnet-4"),
-        ("openrouter/anthropic/claude-3-sonnet", "anthropic/claude-3-sonnet"),
-        ("ollama/llama3.1", "llama3.1"),
-        ("lmstudio/default", "default"),
-        ("local/custom-model", "custom-model"),
-        ("opencode-go/glm-5.1", "glm-5.1"),
-    ])
+    @pytest.mark.parametrize(
+        "full_name,normalized",
+        [
+            ("openai/gpt-4o", "gpt-4o"),
+            ("anthropic/claude-sonnet-4", "claude-sonnet-4"),
+            ("openrouter/anthropic/claude-3-sonnet", "anthropic/claude-3-sonnet"),
+            ("ollama/llama3.1", "llama3.1"),
+            ("lmstudio/default", "default"),
+            ("local/custom-model", "custom-model"),
+            ("opencode-go/glm-5.1", "glm-5.1"),
+        ],
+    )
     def test_normalize_strips_prefix(self, full_name, normalized):
         result = LLMProvider._normalize_model(full_name)
         assert result == normalized
@@ -136,7 +142,11 @@ def test_openai_tool_param_empty(self):
 
     def test_openai_tool_param_raw_format(self):
         tools = [
-            {"name": "search", "description": "Search web", "parameters": {"type": "object", "properties": {}}},
+            {
+                "name": "search",
+                "description": "Search web",
+                "parameters": {"type": "object", "properties": {}},
+            },
         ]
         result = LLMProvider._openai_tool_param(tools)
         assert len(result) == 1
@@ -145,7 +155,10 @@ def test_openai_tool_param_raw_format(self):
 
     def test_openai_tool_param_already_formatted(self):
         tools = [
-            {"type": "function", "function": {"name": "bash", "description": "Run cmd", "parameters": {}}},
+            {
+                "type": "function",
+                "function": {"name": "bash", "description": "Run cmd", "parameters": {}},
+            },
         ]
         result = LLMProvider._openai_tool_param(tools)
         assert len(result) == 1
@@ -178,7 +191,11 @@ def test_anthropic_tool_param_conversion(self):
 
     def test_anthropic_tool_param_unwrapped(self):
         tools = [
-            {"name": "bash", "description": "Run cmd", "parameters": {"type": "object", "properties": {}}},
+            {
+                "name": "bash",
+                "description": "Run cmd",
+                "parameters": {"type": "object", "properties": {}},
+            },
         ]
         result = LLMProvider._anthropic_tool_param(tools)
         assert len(result) == 1
diff --git a/backend/tests/test_models.py b/backend/tests/test_models.py
index db1f9da..73f5e80 100644
--- a/backend/tests/test_models.py
+++ b/backend/tests/test_models.py
@@ -82,11 +82,17 @@ def test_custom(self):
 class TestConversationResponse:
     def test_creation(self):
         from datetime import datetime
+
         now = datetime.now(UTC)
         c = ConversationResponse(
-            id=1, uuid="abc-def", title="Test Conv", model="gpt-4o",
-            mode="general", user_message_count=5,
-            created_at=now, updated_at=now,
+            id=1,
+            uuid="abc-def",
+            title="Test Conv",
+            model="gpt-4o",
+            mode="general",
+            user_message_count=5,
+            created_at=now,
+            updated_at=now,
         )
         assert c.id == 1
         assert c.uuid == "abc-def"
@@ -96,6 +102,7 @@ def test_creation(self):
 class TestMessageResponse:
     def test_creation(self):
         from datetime import datetime
+
         now = datetime.now(UTC)
         m = MessageResponse(id=1, role="user", content="Hello", metadata=None, created_at=now)
         assert m.id == 1
@@ -104,18 +111,28 @@ def test_creation(self):
 
     def test_with_metadata(self):
         from datetime import datetime
+
         now = datetime.now(UTC)
-        m = MessageResponse(id=2, role="assistant", content="Hi", metadata={"tool": "search"}, created_at=now)
+        m = MessageResponse(
+            id=2, role="assistant", content="Hi", metadata={"tool": "search"}, created_at=now
+        )
         assert m.metadata == {"tool": "search"}
 
 
 class TestConversationDetail:
     def test_creation(self):
         from datetime import datetime
+
         now = datetime.now(UTC)
         conv = ConversationResponse(
-            id=1, uuid="x", title="C", model=None, mode="general",
-            user_message_count=0, created_at=now, updated_at=now,
+            id=1,
+            uuid="x",
+            title="C",
+            model=None,
+            mode="general",
+            user_message_count=0,
+            created_at=now,
+            updated_at=now,
         )
         msgs = [MessageResponse(id=1, role="user", content="Hi", metadata=None, created_at=now)]
         cd = ConversationDetail(conversation=conv, messages=msgs)
diff --git a/backend/tests/test_prompts.py b/backend/tests/test_prompts.py
index 31483b4..66ec032 100644
--- a/backend/tests/test_prompts.py
+++ b/backend/tests/test_prompts.py
@@ -1,6 +1,5 @@
 """Tests for system prompt builder."""
 
-
 from openmlr.agent.prompts import COMPACT_PROMPT, build_system_prompt
 from openmlr.agent.types import ToolSpec
 
@@ -28,17 +27,22 @@ def test_renders_with_sandbox_info(self):
         assert isinstance(prompt, str)
 
     def test_renders_with_mode_plan(self):
-        tools = [ToolSpec(name="ask_user", description="Ask questions", parameters={"type": "object"})]
+        tools = [
+            ToolSpec(name="ask_user", description="Ask questions", parameters={"type": "object"})
+        ]
         prompt = build_system_prompt(tool_specs=tools, mode="plan")
         assert isinstance(prompt, str)
 
     def test_renders_with_mode_research(self):
-        tools = [ToolSpec(name="papers", description="Search papers", parameters={"type": "object"})]
+        tools = [
+            ToolSpec(name="papers", description="Search papers", parameters={"type": "object"})
+        ]
         prompt = build_system_prompt(tool_specs=tools, mode="research")
         assert isinstance(prompt, str)
 
     def test_renders_with_config(self):
         from openmlr.config import AgentConfig
+
         config = AgentConfig(model_name="test/model", max_iterations=10)
         tools = [ToolSpec(name="test", description="Test tool", parameters={"type": "object"})]
         prompt = build_system_prompt(tool_specs=tools, config=config)
@@ -63,6 +67,7 @@ def test_contains_date_and_time(self):
         prompt = build_system_prompt(tool_specs=tools)
         # Contains date in YYYY-MM-DD format
         import re
+
         assert re.search(r"\d{4}-\d{2}-\d{2}", prompt)
 
     def test_contains_sandbox_info_in_prompt(self):
diff --git a/backend/tests/test_redis_pubsub.py b/backend/tests/test_redis_pubsub.py
index cd0c3bd..1b98681 100644
--- a/backend/tests/test_redis_pubsub.py
+++ b/backend/tests/test_redis_pubsub.py
@@ -163,14 +163,17 @@ async def test_returns_none_on_redis_error(self):
 class TestModuleConstants:
     def test_channel_name(self):
         from openmlr.services.redis_pubsub import CHANNEL_NAME
+
         assert CHANNEL_NAME == "openmlr:events"
 
     def test_answers_key_prefix(self):
         from openmlr.services.redis_pubsub import ANSWERS_KEY_PREFIX
+
         assert ANSWERS_KEY_PREFIX == "openmlr:answers:"
 
     def test_interrupt_key_prefix(self):
         from openmlr.services.redis_pubsub import INTERRUPT_KEY_PREFIX
+
         assert INTERRUPT_KEY_PREFIX == "openmlr:interrupt:"
 
     def test_redis_url_from_env(self, monkeypatch):
@@ -178,6 +181,7 @@ def test_redis_url_from_env(self, monkeypatch):
         from importlib import reload
 
         import openmlr.services.redis_pubsub
+
         reload(openmlr.services.redis_pubsub)
         assert openmlr.services.redis_pubsub.REDIS_URL == "redis://custom:6379/1"
         # Restore
diff --git a/backend/tests/test_routes_health.py b/backend/tests/test_routes_health.py
index 6af06e6..fc6d349 100644
--- a/backend/tests/test_routes_health.py
+++ b/backend/tests/test_routes_health.py
@@ -38,5 +38,6 @@ async def test_health_timestamp_is_iso_format(self, client: AsyncClient):
         resp = await client.get("/health")
         data = resp.json()
         import re
+
         iso_pattern = r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}"
         assert re.search(iso_pattern, data["timestamp"])
diff --git a/backend/tests/test_routes_settings.py b/backend/tests/test_routes_settings.py
index fc4af07..530065e 100644
--- a/backend/tests/test_routes_settings.py
+++ b/backend/tests/test_routes_settings.py
@@ -18,7 +18,9 @@ async def test_get_all_settings_empty(self, auth_client: AsyncClient):
         data = resp.json()
         assert "settings" in data
 
-    async def test_get_all_settings_after_set(self, auth_client: AsyncClient, db_session: AsyncSession, test_user):
+    async def test_get_all_settings_after_set(
+        self, auth_client: AsyncClient, db_session: AsyncSession, test_user
+    ):
         await ops.set_user_setting(db_session, test_user.id, "agent", "default_model", "claude")
         resp = await auth_client.get("/api/settings")
         assert resp.status_code == 200
@@ -26,7 +28,9 @@ async def test_get_all_settings_after_set(self, auth_client: AsyncClient, db_ses
         assert "agent" in data["settings"]
         assert data["settings"]["agent"]["default_model"] == "claude"
 
-    async def test_get_settings_category(self, auth_client: AsyncClient, db_session: AsyncSession, test_user):
+    async def test_get_settings_category(
+        self, auth_client: AsyncClient, db_session: AsyncSession, test_user
+    ):
         await ops.set_user_setting(db_session, test_user.id, "agent", "yolo_mode", True)
         resp = await auth_client.get("/api/settings/agent")
         assert resp.status_code == 200
@@ -34,7 +38,9 @@ async def test_get_settings_category(self, auth_client: AsyncClient, db_session:
         assert "settings" in data
         assert data["settings"]["yolo_mode"] is True
 
-    async def test_update_setting(self, auth_client: AsyncClient, db_session: AsyncSession, test_user):
+    async def test_update_setting(
+        self, auth_client: AsyncClient, db_session: AsyncSession, test_user
+    ):
         resp = await auth_client.put(
             "/api/settings/agent/test_key",
             json={"value": "test_value"},
@@ -56,13 +62,17 @@ async def test_update_setting_with_dict(self, auth_client: AsyncClient):
         )
         assert resp.status_code == 200
 
-    async def test_delete_setting(self, auth_client: AsyncClient, db_session: AsyncSession, test_user):
+    async def test_delete_setting(
+        self, auth_client: AsyncClient, db_session: AsyncSession, test_user
+    ):
         await ops.set_user_setting(db_session, test_user.id, "agent", "remove_me", "yes")
         resp = await auth_client.delete("/api/settings/agent/remove_me")
         assert resp.status_code == 200
         assert resp.json() == {"ok": True}
 
-    async def test_update_provider_key_sets_env(self, auth_client: AsyncClient, db_session: AsyncSession, test_user):
+    async def test_update_provider_key_sets_env(
+        self, auth_client: AsyncClient, db_session: AsyncSession, test_user
+    ):
         resp = await auth_client.put(
             "/api/settings/providers/openai_api_key",
             json={"value": "sk-test-key"},
@@ -112,21 +122,33 @@ async def test_get_status(self, auth_client: AsyncClient):
 
 class TestModels:
     async def test_list_models(self, auth_client: AsyncClient):
+        # Configure a provider so models are returned
+        await auth_client.put(
+            "/api/settings/providers/openai_api_key",
+            json={"value": "sk-test-key"},
+        )
         resp = await auth_client.get("/api/models")
         assert resp.status_code == 200
         data = resp.json()
         assert "models" in data
+        assert "recent_models" in data
         models = data["models"]
         assert isinstance(models, list)
         assert len(models) > 0
 
     async def test_models_have_required_fields(self, auth_client: AsyncClient):
+        # Configure a provider so models are returned
+        await auth_client.put(
+            "/api/settings/providers/openai_api_key",
+            json={"value": "sk-test-key"},
+        )
         resp = await auth_client.get("/api/models")
         models = resp.json()["models"]
         for m in models:
             assert "id" in m
             assert "name" in m
             assert "provider" in m
+            assert "release_date" in m
 
 
 class TestConfigEndpoint:
diff --git a/backend/tests/test_session.py b/backend/tests/test_session.py
index 1e52399..a57feb7 100644
--- a/backend/tests/test_session.py
+++ b/backend/tests/test_session.py
@@ -13,6 +13,7 @@
 # Fixtures
 # ---------------------------------------------------------------------------
 
+
 @pytest.fixture
 def config() -> AgentConfig:
     return AgentConfig(model_name="test-model")
@@ -27,6 +28,7 @@ def session(config: AgentConfig) -> Session:
 # Initialization
 # ---------------------------------------------------------------------------
 
+
 class TestInit:
     def test_session_creates_context_manager(self, session: Session):
         assert isinstance(session.context_manager, ContextManager)
@@ -52,6 +54,7 @@ def test_defaults(self, session: Session):
 # emit
 # ---------------------------------------------------------------------------
 
+
 class TestEmit:
     @pytest.mark.asyncio
     async def test_emit_puts_event_in_queue(self, session: Session):
@@ -120,6 +123,7 @@ def bad_listener(e):
 # cancel / is_cancelled / clear_cancel
 # ---------------------------------------------------------------------------
 
+
 class TestCancellation:
     def test_not_cancelled_initially(self, session: Session):
         assert session.is_cancelled() is False
@@ -147,6 +151,7 @@ def test_cancel_clear_cancel_cycle(self, session: Session):
 # on_event
 # ---------------------------------------------------------------------------
 
+
 class TestOnEvent:
     def test_registers_listener(self, session: Session):
         assert len(session._listeners) == 0
@@ -164,6 +169,7 @@ def test_registers_multiple_listeners(self, session: Session):
 # update_model
 # ---------------------------------------------------------------------------
 
+
 class TestUpdateModel:
     def test_update_model_changes_config(self, session: Session):
         assert session.config.model_name == "test-model"
diff --git a/backend/tests/test_session_manager.py b/backend/tests/test_session_manager.py
index 5e56118..c9a4386 100644
--- a/backend/tests/test_session_manager.py
+++ b/backend/tests/test_session_manager.py
@@ -84,7 +84,9 @@ async def test_session_loads_existing_messages(self, session_manager):
             {"role": "assistant", "content": "Hi there!"},
         ]
         active = await session_manager.get_or_create_session(
-            1, "u1", existing_messages=messages,
+            1,
+            "u1",
+            existing_messages=messages,
         )
         msgs = active.session.context_manager.get_messages()
         assert len(msgs) >= 2  # includes system prompt + existing messages
diff --git a/backend/tests/test_tool_registry.py b/backend/tests/test_tool_registry.py
index 5811350..2c9a61a 100644
--- a/backend/tests/test_tool_registry.py
+++ b/backend/tests/test_tool_registry.py
@@ -119,7 +119,9 @@ async def handler(required_arg: str) -> tuple[str, bool]:
             return "ok", True
 
         tool = ToolSpec(
-            name="strict", description="Needs arg", parameters={"type": "object"},
+            name="strict",
+            description="Needs arg",
+            parameters={"type": "object"},
             handler=handler,
         )
         router.register(tool)
diff --git a/backend/tests/test_tools_local.py b/backend/tests/test_tools_local.py
index a1f007f..c598aa3 100644
--- a/backend/tests/test_tools_local.py
+++ b/backend/tests/test_tools_local.py
@@ -221,6 +221,7 @@ def test_container_prefix(self):
     def test_allow_direct_exec_default(self, monkeypatch):
         monkeypatch.delenv("OPENMLR_ALLOW_DIRECT_EXEC", raising=False)
         import openmlr.tools.local
+
         allow = openmlr.tools.local.ALLOW_DIRECT_EXEC
         assert allow is False
 
diff --git a/backend/tests/test_tools_papers.py b/backend/tests/test_tools_papers.py
index adb922a..df7627b 100644
--- a/backend/tests/test_tools_papers.py
+++ b/backend/tests/test_tools_papers.py
@@ -96,6 +96,7 @@ async def test_get_budget_info(self):
 
     async def test_increment_and_check(self):
         from openmlr.tools.papers import _search_counts
+
         _search_counts.clear()
         _increment_budget()
         info = _get_budget_info()
diff --git a/backend/tests/test_tools_writing.py b/backend/tests/test_tools_writing.py
index d2d7cee..efd525d 100644
--- a/backend/tests/test_tools_writing.py
+++ b/backend/tests/test_tools_writing.py
@@ -33,6 +33,7 @@ async def test_creates_tool(self):
 class TestCreateProject:
     async def test_creates_project(self):
         from openmlr.tools.writing import _projects
+
         _projects.clear()
         result, ok = _create_project(conv_id=1, title="My Paper")
         assert ok is True
@@ -51,6 +52,7 @@ async def test_requires_title(self):
 class TestSetOutline:
     async def test_no_project(self):
         from openmlr.tools.writing import _projects
+
         _projects.clear()
         result, ok = _set_outline(conv_id=999, outline=[])
         assert ok is False
@@ -58,6 +60,7 @@ async def test_no_project(self):
 
     async def test_requires_outline(self):
         from openmlr.tools.writing import _projects
+
         _projects.clear()
         _create_project(conv_id=1, title="Test")
         result, ok = _set_outline(conv_id=1, outline=None)
@@ -66,13 +69,18 @@ async def test_requires_outline(self):
 
     async def test_sets_outline(self):
         from openmlr.tools.writing import _projects
+
         _projects.clear()
         _create_project(conv_id=1, title="Test")
         outline = [
             {"id": "sec1", "title": "Introduction"},
-            {"id": "sec2", "title": "Methods", "subsections": [
-                {"id": "sec2.1", "title": "Setup"},
-            ]},
+            {
+                "id": "sec2",
+                "title": "Methods",
+                "subsections": [
+                    {"id": "sec2.1", "title": "Setup"},
+                ],
+            },
         ]
         result, ok = _set_outline(conv_id=1, outline=outline)
         assert ok is True
@@ -85,12 +93,14 @@ async def test_sets_outline(self):
 class TestWriteSection:
     async def test_no_project(self):
         from openmlr.tools.writing import _projects
+
         _projects.clear()
         result, ok = _write_section(conv_id=999, section_id="s1", content="text")
         assert ok is False
 
     async def test_writes_section(self):
         from openmlr.tools.writing import _projects
+
         _projects.clear()
         _create_project(conv_id=1, title="Test")
         _set_outline(conv_id=1, outline=[{"id": "intro", "title": "Introduction"}])
@@ -106,9 +116,12 @@ async def test_no_project(self):
         from unittest.mock import AsyncMock, patch
 
         from openmlr.tools.writing import _projects
+
         _projects.clear()
         # Mock _get_author_info to avoid database calls
-        with patch('openmlr.tools.writing._get_author_info', new_callable=AsyncMock, return_value=None):
+        with patch(
+            "openmlr.tools.writing._get_author_info", new_callable=AsyncMock, return_value=None
+        ):
             result, ok = await _get_draft(conv_id=999)
         assert ok is False
 
@@ -116,12 +129,15 @@ async def test_generates_draft(self):
         from unittest.mock import AsyncMock, patch
 
         from openmlr.tools.writing import _projects
+
         _projects.clear()
         _create_project(conv_id=1, title="The Paper")
         _set_outline(conv_id=1, outline=[{"id": "intro", "title": "Introduction"}])
         _write_section(conv_id=1, section_id="intro", content="This is the intro.")
         # Mock _get_author_info to avoid database calls
-        with patch('openmlr.tools.writing._get_author_info', new_callable=AsyncMock, return_value=None):
+        with patch(
+            "openmlr.tools.writing._get_author_info", new_callable=AsyncMock, return_value=None
+        ):
             result, ok = await _get_draft(conv_id=1)
         assert ok is True
         assert "# The Paper" in result
@@ -136,9 +152,13 @@ async def test_generates_full_draft(self):
             "title": "ML Research",
             "outline": [
                 {"id": "abstract", "title": "Abstract"},
-                {"id": "method", "title": "Method", "subsections": [
-                    {"id": "method.experimental", "title": "Experimental Setup"},
-                ]},
+                {
+                    "id": "method",
+                    "title": "Method",
+                    "subsections": [
+                        {"id": "method.experimental", "title": "Experimental Setup"},
+                    ],
+                },
             ],
             "sections": {
                 "abstract": "This is the abstract.",
@@ -162,6 +182,7 @@ async def test_generates_full_draft(self):
 class TestListSections:
     async def test_no_project(self):
         from openmlr.tools.writing import _projects
+
         _projects.clear()
         result, ok = _list_sections(conv_id=999)
         assert ok is False
@@ -169,12 +190,16 @@ async def test_no_project(self):
 
     async def test_lists_sections_with_status(self):
         from openmlr.tools.writing import _projects
+
         _projects.clear()
         _create_project(conv_id=1, title="Test")
-        _set_outline(conv_id=1, outline=[
-            {"id": "s1", "title": "Section 1"},
-            {"id": "s2", "title": "Section 2"},
-        ])
+        _set_outline(
+            conv_id=1,
+            outline=[
+                {"id": "s1", "title": "Section 1"},
+                {"id": "s2", "title": "Section 2"},
+            ],
+        )
         _write_section(conv_id=1, section_id="s1", content="written")
         result, ok = _list_sections(conv_id=1)
         assert ok is True
@@ -186,6 +211,7 @@ async def test_lists_sections_with_status(self):
 class TestAddCitation:
     async def test_adds_citation(self):
         from openmlr.tools.writing import _projects
+
         _projects.clear()
         _create_project(conv_id=1, title="Test")
         citation = {
@@ -203,13 +229,16 @@ async def test_adds_citation(self):
 class TestRefineSection:
     async def test_returns_feedback_mode(self):
         from openmlr.tools.writing import _projects
+
         _projects.clear()
         _create_project(conv_id=1, title="Test")
         _set_outline(conv_id=1, outline=[{"id": "s1", "title": "Section"}])
         _write_section(conv_id=1, section_id="s1", content="original content")
         result, ok = _refine_section(
-            conv_id=1, section_id="s1",
-            content=None, feedback="make it better",
+            conv_id=1,
+            section_id="s1",
+            content=None,
+            feedback="make it better",
         )
         assert ok is True
         assert "feedback" in result.lower()
@@ -220,10 +249,14 @@ class TestCountSections:
     async def test_counts_with_subsections(self):
         outline = [
             {"id": "a", "title": "A"},
-            {"id": "b", "title": "B", "subsections": [
-                {"id": "b1", "title": "B1"},
-                {"id": "b2", "title": "B2"},
-            ]},
+            {
+                "id": "b",
+                "title": "B",
+                "subsections": [
+                    {"id": "b1", "title": "B1"},
+                    {"id": "b2", "title": "B2"},
+                ],
+            },
         ]
         assert _count_sections(outline) == 4
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 127590f..1c4969a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -39,7 +39,7 @@ services:
       timeout: 5s
       retries: 5
 
-  # Web with live reload - mounts source code
+  # Backend API with live reload — serves Swagger docs at :3000 in dev
   web:
     build:
       context: .
@@ -59,6 +59,8 @@ services:
       REDIS_URL: redis://redis:6379/0
       USE_BACKGROUND_JOBS: "true"
       USE_REDIS_PUBSUB: "true"
+      DEV_MODE: "true"
+      CORS_ORIGINS: "http://localhost:3000,http://localhost:5173"
       PYTHONDONTWRITEBYTECODE: "1"
       PYTHONUNBUFFERED: "1"
     depends_on:
@@ -69,7 +71,6 @@ services:
     volumes:
       - ./backend:/app/backend
       - backend-venv:/app/backend/.venv
-      - ./frontend/dist:/app/frontend/dist
       - ./.keys:/app/.keys
       - workspaces:/app/.workspaces
 
@@ -108,6 +109,21 @@ services:
       - ./.keys:/app/.keys
       - workspaces:/app/.workspaces
 
+  # Frontend dev server with hot-reload (HMR) — access UI at :5173
+  frontend:
+    image: node:20-slim
+    working_dir: /app/frontend
+    command: sh -c "corepack enable && pnpm install && pnpm dev"
+    ports:
+      - "5173:5173"
+    environment:
+      VITE_API_URL: "http://web:3000"
+    depends_on:
+      - web
+    volumes:
+      - ./frontend:/app/frontend
+      - frontend-node-modules:/app/frontend/node_modules
+
   # Docs site with live reload
   docs:
     image: node:20-slim
@@ -123,5 +139,6 @@ volumes:
   pgdata:
   redisdata:
   backend-venv:
+  frontend-node-modules:
   docs-node-modules:
   workspaces:       # Project workspaces — persists across container rebuilds
diff --git a/frontend/package.json b/frontend/package.json
index 4fdd2c7..a8de748 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -13,6 +13,9 @@
     "lint:fix": "eslint src/ --fix"
   },
   "dependencies": {
+    "@xterm/addon-fit": "^0.11.0",
+    "@xterm/addon-web-links": "^0.12.0",
+    "@xterm/xterm": "^6.0.0",
     "lucide-react": "^1.11.0",
     "react": "^19.0.0",
     "react-dom": "^19.0.0",
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index 501a017..2a71f8e 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -19,6 +19,7 @@ import { AuthGuard } from './components/AuthGuard';
 import { OnboardingModal } from './components/OnboardingModal';
 import { Terminal } from './components/Terminal';
 import { ProjectModal } from './components/ProjectModal';
+import { ProjectManageModal } from './components/ProjectManageModal';
 import { SettingsPage } from './components/SettingsPage';
 import { ProvidersSettings } from './components/settings/ProvidersSettings';
 import { AgentSettings } from './components/settings/AgentSettings';
@@ -108,6 +109,7 @@ function ChatUI({
   const [projects, setProjects] = useState<Project[]>([]);
   const [activeProject, setActiveProject] = useState<Project | null>(null);
   const [showProjectModal, setShowProjectModal] = useState(false);
+  const [showManageProjects, setShowManageProjects] = useState(false);
   const [terminalOpen, setTerminalOpen] = useState(false);
 
   // Ref to always have current conv UUID in SSE callback (avoids stale closure)
@@ -599,6 +601,7 @@ function ChatUI({
           onDelete={handleDeleteConversation}
           onSelectProject={setActiveProject}
           onNewProject={() => setShowProjectModal(true)}
+          onManageProjects={() => setShowManageProjects(true)}
         />
         
         <div 
@@ -607,25 +610,26 @@ function ChatUI({
         >
           {/* Empty state */}
           {messages.length === 0 && !effectiveProcessing && (
-            <div className="flex flex-col items-center justify-center flex-1 text-center px-4 sm:px-6 py-8 sm:py-12 relative overflow-hidden">
-              {/* Large embossed background text */}
-              <div 
-                className="absolute inset-0 flex items-center justify-center pointer-events-none select-none px-2"
-                aria-hidden="true"
-              >
-                <span 
-                  className="text-[4rem] xs:text-[6rem] sm:text-[10rem] md:text-[14rem] lg:text-[18rem] xl:text-[20rem] font-black tracking-tighter whitespace-nowrap animate-[emboss-pulse_4s_ease-in-out_infinite]"
+            <div className="flex flex-col items-center justify-center flex-1 text-center px-4 sm:px-6 py-8 sm:py-12">
+              <div className="relative mb-8">
+                {/* Glow ring behind logo */}
+                <div
+                  className="absolute inset-0 rounded-full animate-[hero-glow_6s_ease-in-out_infinite]"
                   style={{
-                    color: 'transparent',
-                    WebkitTextStroke: '1px rgba(59, 130, 246, 0.12)',
-                    textShadow: '0 0 60px rgba(59, 130, 246, 0.08), 0 0 120px rgba(59, 130, 246, 0.04)',
+                    background: 'radial-gradient(circle, rgba(59,130,246,0.12) 0%, transparent 70%)',
+                    transform: 'scale(2.5)',
                   }}
-                >
-                  OpenMLR
-                </span>
+                />
+                {/* Floating logo */}
+                <img
+                  src="/logo-512.png"
+                  alt="OpenMLR"
+                  className="relative w-24 h-24 sm:w-32 sm:h-32 select-none pointer-events-none animate-[hero-float_6s_ease-in-out_infinite]"
+                  style={{ opacity: 0.35 }}
+                  draggable={false}
+                />
               </div>
-              {/* Foreground prompt */}
-              <p className="text-lg sm:text-xl text-text-dim z-10">What would you like to research?</p>
+              <p className="text-lg sm:text-xl text-text-dim animate-[fade-in_0.6s_ease-out]">What would you like to research?</p>
             </div>
           )}
           
@@ -661,6 +665,7 @@ function ChatUI({
       
       {viewingReport && <ReportDrawer reportId={viewingReport.id || ''} title={viewingReport.title} cachedContent={viewingReport.content} onClose={() => setViewingReport(null)} />}
       {showProjectModal && <ProjectModal onClose={() => setShowProjectModal(false)} onCreate={(p) => { setProjects((prev) => [p, ...prev]); setActiveProject(p); }} />}
+      {showManageProjects && <ProjectManageModal projects={projects} onClose={() => setShowManageProjects(false)} onChanged={() => { loadProjects(); }} />}
     </div>
   );
 }
diff --git a/frontend/src/__tests__/ModelModal.test.tsx b/frontend/src/__tests__/ModelModal.test.tsx
index 8bfa61d..b24a35e 100644
--- a/frontend/src/__tests__/ModelModal.test.tsx
+++ b/frontend/src/__tests__/ModelModal.test.tsx
@@ -3,6 +3,11 @@ import { render, screen, fireEvent, waitFor } from '@testing-library/react';
 import { ModelModal } from '../components/ModelModal';
 import { api } from '../api';
 
+const mockNavigate = vi.fn();
+vi.mock('react-router-dom', () => ({
+  useNavigate: () => mockNavigate,
+}));
+
 vi.mock('../api', () => ({
   api: {
     getProviders: vi.fn(),
@@ -13,21 +18,25 @@ vi.mock('../api', () => ({
 }));
 
 const defaultProviders = [
-  { id: 'openai', name: 'OpenAI', key_env: 'OPENAI_API_KEY', configured: true },
-  { id: 'anthropic', name: 'Anthropic', key_env: 'ANTHROPIC_API_KEY', configured: false },
+  { id: 'openai', name: 'OpenAI', key_env: 'OPENAI_API_KEY', configured: true, categories: ['models'] },
+  { id: 'anthropic', name: 'Anthropic', key_env: 'ANTHROPIC_API_KEY', configured: true, categories: ['models'] },
 ];
 
 const defaultModels = [
-  { id: 'openai/gpt-4o', name: 'GPT-4o', provider: 'openai' },
-  { id: 'openai/gpt-4o-mini', name: 'GPT-4o Mini', provider: 'openai' },
-  { id: 'anthropic/claude-4', name: 'Claude 4', provider: 'anthropic' },
+  { id: 'openai/gpt-4o', name: 'GPT-4o', provider: 'openai', release_date: '2024-05-13' },
+  { id: 'openai/gpt-4o-mini', name: 'GPT-4o Mini', provider: 'openai', release_date: '2024-07-18' },
+  { id: 'anthropic/claude-4', name: 'Claude 4', provider: 'anthropic', release_date: '2025-01-01' },
+];
+
+const defaultRecent = [
+  { id: 'openai/gpt-4o', name: 'GPT-4o', provider: 'openai', release_date: '2024-05-13' },
 ];
 
 describe('ModelModal', () => {
   beforeEach(() => {
     vi.resetAllMocks();
     vi.mocked(api.getProviders).mockResolvedValue({ providers: defaultProviders });
-    vi.mocked(api.getModels).mockResolvedValue({ models: defaultModels });
+    vi.mocked(api.getModels).mockResolvedValue({ models: defaultModels, recent_models: defaultRecent });
   });
 
   it('renders current model button', () => {
@@ -42,14 +51,35 @@ describe('ModelModal', () => {
     expect(screen.getByText('Providers')).toBeInTheDocument();
   });
 
+  it('shows recent models section when opened', async () => {
+    render(<ModelModal currentModel="openai/gpt-4o" onModelChange={vi.fn()} />);
+    fireEvent.click(screen.getByText('openai/gpt-4o'));
+
+    await waitFor(() => {
+      expect(screen.getByText('Recently Used')).toBeInTheDocument();
+      expect(screen.getAllByText('GPT-4o').length).toBeGreaterThanOrEqual(1);
+    });
+  });
+
+  it('shows provider group headings', async () => {
+    render(<ModelModal currentModel="openai/gpt-4o" onModelChange={vi.fn()} />);
+    fireEvent.click(screen.getByText('openai/gpt-4o'));
+
+    await waitFor(() => {
+      // OpenAI appears as both a heading and filter option
+      expect(screen.getAllByText('OpenAI').length).toBeGreaterThanOrEqual(1);
+      expect(screen.getAllByText('Anthropic').length).toBeGreaterThanOrEqual(1);
+    });
+  });
+
   it('shows model list when opened', async () => {
     render(<ModelModal currentModel="openai/gpt-4o" onModelChange={vi.fn()} />);
     fireEvent.click(screen.getByText('openai/gpt-4o'));
 
     await waitFor(() => {
-      expect(screen.getByText('GPT-4o')).toBeInTheDocument();
+      // GPT-4o appears twice (recent + provider group)
+      expect(screen.getAllByText('GPT-4o').length).toBeGreaterThanOrEqual(1);
       expect(screen.getByText('GPT-4o Mini')).toBeInTheDocument();
-      expect(screen.getByText('Claude 4')).toBeInTheDocument();
     });
   });
 
@@ -59,13 +89,21 @@ describe('ModelModal', () => {
 
     await waitFor(() => {
       // Find the button containing "GPT-4o Mini"
-      const miniButton = screen.getByText('GPT-4o Mini').closest('button');
+      const miniButton = screen.getAllByText('GPT-4o Mini')[0].closest('button');
       // Check that it has the active styling (border-primary)
       expect(miniButton?.className).toContain('border-primary');
     });
   });
 
   it('switches to providers tab', async () => {
+    // Use a mix of configured and unconfigured providers for this test
+    vi.mocked(api.getProviders).mockResolvedValue({
+      providers: [
+        { id: 'openai', name: 'OpenAI', key_env: 'OPENAI_API_KEY', configured: true, categories: ['models'] },
+        { id: 'anthropic', name: 'Anthropic', key_env: 'ANTHROPIC_API_KEY', configured: false, categories: ['models'] },
+      ],
+    });
+
     render(<ModelModal currentModel="openai/gpt-4o" onModelChange={vi.fn()} />);
     fireEvent.click(screen.getByText('openai/gpt-4o'));
     fireEvent.click(screen.getByText('Providers'));
@@ -76,7 +114,7 @@ describe('ModelModal', () => {
     });
   });
 
-  it('filters models by provider', async () => {
+  it('filters models by search', async () => {
     render(<ModelModal currentModel="openai/gpt-4o" onModelChange={vi.fn()} />);
     fireEvent.click(screen.getByText('openai/gpt-4o'));
 
@@ -84,11 +122,11 @@ describe('ModelModal', () => {
       expect(screen.getByText('Claude 4')).toBeInTheDocument();
     });
 
-    const select = document.querySelector('select')!;
-    fireEvent.change(select, { target: { value: 'openai' } });
+    const input = screen.getByPlaceholderText('Search models...');
+    fireEvent.change(input, { target: { value: 'gpt-4o' } });
 
     await waitFor(() => {
-      expect(screen.getByText('GPT-4o')).toBeInTheDocument();
+      expect(screen.getAllByText('GPT-4o').length).toBeGreaterThanOrEqual(1);
       expect(screen.queryByText('Claude 4')).not.toBeInTheDocument();
     });
   });
@@ -104,7 +142,7 @@ describe('ModelModal', () => {
       expect(screen.getByText('Claude 4')).toBeInTheDocument();
     });
 
-    fireEvent.click(screen.getByText('Claude 4'));
+    fireEvent.click(screen.getAllByText('Claude 4')[0]);
 
     await waitFor(() => {
       expect(api.setModel).toHaveBeenCalledWith('anthropic/claude-4');
@@ -119,7 +157,7 @@ describe('ModelModal', () => {
     fireEvent.click(screen.getByText('openai/gpt-4o'));
 
     await waitFor(() => {
-      expect(screen.getByText('GPT-4o')).toBeInTheDocument();
+      expect(screen.getAllByText('GPT-4o').length).toBeGreaterThanOrEqual(1);
     });
 
     fireEvent.click(screen.getByText('Close'));
@@ -164,7 +202,7 @@ describe('ModelModal', () => {
     fireEvent.click(screen.getByText('openai/gpt-4o'));
 
     await waitFor(() => {
-      expect(screen.getByText('GPT-4o')).toBeInTheDocument();
+      expect(screen.getAllByText('GPT-4o').length).toBeGreaterThanOrEqual(1);
     });
 
     // Find the overlay (the fixed div with bg-black/60)
@@ -175,4 +213,25 @@ describe('ModelModal', () => {
       expect(screen.queryByText('GPT-4o Mini')).not.toBeInTheDocument();
     });
   });
+
+  it('navigates to settings when "More provider settings" clicked', async () => {
+    render(<ModelModal currentModel="openai/gpt-4o" onModelChange={vi.fn()} />);
+    fireEvent.click(screen.getByText('openai/gpt-4o'));
+
+    await waitFor(() => {
+      expect(screen.getByText('More provider settings')).toBeInTheDocument();
+    });
+
+    fireEvent.click(screen.getByText('More provider settings'));
+    expect(mockNavigate).toHaveBeenCalledWith('/settings/providers');
+  });
+
+  it('shows provider filter dropdown', async () => {
+    render(<ModelModal currentModel="openai/gpt-4o" onModelChange={vi.fn()} />);
+    fireEvent.click(screen.getByText('openai/gpt-4o'));
+
+    await waitFor(() => {
+      expect(screen.getByTitle('Filter by provider')).toBeInTheDocument();
+    });
+  });
 });
diff --git a/frontend/src/__tests__/ProvidersSettings.test.tsx b/frontend/src/__tests__/ProvidersSettings.test.tsx
index a134038..0bb064a 100644
--- a/frontend/src/__tests__/ProvidersSettings.test.tsx
+++ b/frontend/src/__tests__/ProvidersSettings.test.tsx
@@ -7,6 +7,8 @@ vi.mock('../api', () => ({
   api: {
     getProviders: vi.fn(),
     updateSetting: vi.fn(),
+    getSettingsCategory: vi.fn(),
+    fetchCustomProviderModels: vi.fn(),
   },
 }));
 
diff --git a/frontend/src/api.ts b/frontend/src/api.ts
index 781fd78..3f5ce35 100644
--- a/frontend/src/api.ts
+++ b/frontend/src/api.ts
@@ -107,9 +107,10 @@ export const api = {
 
   // Providers & Models
   getProviders: () => get('/api/providers'),
-  getModels: () => get('/api/models'),
+  getModels: (provider?: string) => get(`/api/models${provider ? `?provider=${encodeURIComponent(provider)}` : ''}`),
   getStatus: () => get('/api/status'),
   saveConfig: (config: Record<string, string>) => post('/api/config', config),
+  fetchCustomProviderModels: (providerId: string) => post(`/api/providers/${encodeURIComponent(providerId)}/fetch-models`, {}),
 
   // SSH Keys
   getKeys: () => get('/api/keys'),
diff --git a/frontend/src/components/ModelModal.tsx b/frontend/src/components/ModelModal.tsx
index 1b05d95..d3f6377 100644
--- a/frontend/src/components/ModelModal.tsx
+++ b/frontend/src/components/ModelModal.tsx
@@ -1,6 +1,7 @@
-import { useState, useEffect, useMemo } from 'react';
-import { Search, ChevronDown, Check, X, Save } from 'lucide-react';
+import { useState, useEffect, useMemo, useCallback } from 'react';
+import { Search, ChevronDown, Check, X, Filter, Save } from 'lucide-react';
 import { api } from '../api';
+import { useNavigate } from 'react-router-dom';
 
 interface Provider {
   id: string;
@@ -8,12 +9,16 @@ interface Provider {
   key_env: string;
   configured: boolean;
   categories?: string[];
+  is_custom?: boolean;
+  sdk_type?: string;
+  api_base?: string;
 }
 
 interface ModelInfo {
   id: string;
   name: string;
   provider: string;
+  release_date?: string;
 }
 
 interface Props {
@@ -23,45 +28,158 @@ interface Props {
 
 type Tab = 'models' | 'providers';
 
+/** Tiny provider logo from models.dev — uses currentColor, gracefully falls back */
+function ProviderLogo({ providerId, size = 16 }: { providerId: string; size?: number }) {
+  return (
+    <img
+      src={`https://models.dev/logos/${providerId}.svg`}
+      alt=""
+      width={size}
+      height={size}
+      className="shrink-0 opacity-60"
+      style={{ filter: 'grayscale(0.3)' }}
+      loading="lazy"
+      onError={(e) => { (e.target as HTMLImageElement).style.display = 'none'; }}
+    />
+  );
+}
+
+/** Skeleton rows shown while loading */
+function LoadingSkeleton() {
+  return (
+    <div className="flex-1 flex flex-col gap-1 animate-pulse">
+      {/* Fake "Recently Used" heading */}
+      <div className="px-2 py-1">
+        <div className="h-3 w-24 bg-surface-hover rounded" />
+      </div>
+      {[1, 2].map((i) => (
+        <div key={`r${i}`} className="flex items-center justify-between px-4 py-3">
+          <div className="h-4 w-32 bg-surface-hover rounded" />
+          <div className="h-3 w-24 bg-surface-hover rounded" />
+        </div>
+      ))}
+      <div className="my-2 border-t border-border" />
+      {/* Fake provider group */}
+      <div className="px-2 py-1 flex items-center gap-2">
+        <div className="h-4 w-4 bg-surface-hover rounded" />
+        <div className="h-3 w-20 bg-surface-hover rounded" />
+      </div>
+      {[1, 2, 3, 4].map((i) => (
+        <div key={`m${i}`} className="flex items-center justify-between px-4 py-3">
+          <div className="h-4 bg-surface-hover rounded" style={{ width: `${100 + i * 20}px` }} />
+          <div className="h-3 w-28 bg-surface-hover rounded" />
+        </div>
+      ))}
+      {/* Second fake group */}
+      <div className="px-2 py-1 mt-2 flex items-center gap-2">
+        <div className="h-4 w-4 bg-surface-hover rounded" />
+        <div className="h-3 w-16 bg-surface-hover rounded" />
+      </div>
+      {[1, 2, 3].map((i) => (
+        <div key={`n${i}`} className="flex items-center justify-between px-4 py-3">
+          <div className="h-4 bg-surface-hover rounded" style={{ width: `${80 + i * 25}px` }} />
+          <div className="h-3 w-32 bg-surface-hover rounded" />
+        </div>
+      ))}
+    </div>
+  );
+}
+
 export function ModelModal({ currentModel, onModelChange }: Props) {
+  const navigate = useNavigate();
   const [open, setOpen] = useState(false);
   const [tab, setTab] = useState<Tab>('models');
   const [providers, setProviders] = useState<Provider[]>([]);
   const [models, setModels] = useState<ModelInfo[]>([]);
+  const [recentModels, setRecentModels] = useState<ModelInfo[]>([]);
   const [loading, setLoading] = useState(false);
   const [search, setSearch] = useState('');
-  const [selectedProvider, setSelectedProvider] = useState<string>('all');
+  const [filterProvider, setFilterProvider] = useState<string>('all');
   const [customModel, setCustomModel] = useState('');
 
   // Provider key inputs
   const [keyInputs, setKeyInputs] = useState<Record<string, string>>({});
   const [savingKeys, setSavingKeys] = useState(false);
 
+  const loadData = useCallback(async () => {
+    setLoading(true);
+    try {
+      const [pData, mData] = await Promise.all([api.getProviders(), api.getModels()]);
+      const provs = pData.providers || [];
+      setProviders(provs);
+      setModels(mData.models || []);
+      setRecentModels(mData.recent_models || []);
+    } catch {
+      // ignore
+    } finally {
+      setLoading(false);
+    }
+  }, []);
+
   useEffect(() => {
     if (!open) return;
-    setLoading(true);
-    Promise.all([api.getProviders(), api.getModels()])
-      .then(([pData, mData]) => {
-        setProviders(pData.providers || []);
-        setModels(mData.models || []);
-      })
-      .catch(() => {
-        // ignore
-      })
-      .finally(() => setLoading(false));
-  }, [open]);
+    loadData();
+  }, [open, loadData]);
+
+  // Get only configured providers that are in the models category
+  const configuredModelProviders = useMemo(() => {
+    return providers.filter((p) => p.configured && p.categories?.includes('models'));
+  }, [providers]);
 
-  const filteredModels = useMemo(() => {
-    let list = models;
-    if (selectedProvider !== 'all') {
-      list = list.filter((m) => m.provider === selectedProvider);
+  // Group models by provider, sorted by release_date descending
+  const groupedModels = useMemo(() => {
+    const groups: Record<string, ModelInfo[]> = {};
+    for (const p of configuredModelProviders) {
+      groups[p.id] = [];
+    }
+    for (const m of models) {
+      if (groups[m.provider]) {
+        groups[m.provider].push(m);
+      }
+    }
+    // Sort each group by release_date descending (newest first)
+    for (const pid of Object.keys(groups)) {
+      groups[pid].sort((a, b) => {
+        const da = a.release_date || '1900-01-01';
+        const db = b.release_date || '1900-01-01';
+        return db.localeCompare(da);
+      });
+    }
+    return groups;
+  }, [models, configuredModelProviders]);
+
+  // Filter across recent + all grouped models
+  const filteredRecent = useMemo(() => {
+    let list = recentModels;
+    if (filterProvider !== 'all') {
+      list = list.filter((m) => m.provider === filterProvider);
     }
     if (search.trim()) {
       const q = search.toLowerCase();
-      list = list.filter((m) => m.name.toLowerCase().includes(q) || m.id.toLowerCase().includes(q));
+      list = list.filter(
+        (m) => m.name.toLowerCase().includes(q) || m.id.toLowerCase().includes(q)
+      );
     }
     return list;
-  }, [models, selectedProvider, search]);
+  }, [recentModels, search, filterProvider]);
+
+  const filteredGroups = useMemo(() => {
+    const q = search.toLowerCase();
+    const result: Record<string, ModelInfo[]> = {};
+    for (const pid of Object.keys(groupedModels)) {
+      // Skip providers that don't match the filter
+      if (filterProvider !== 'all' && pid !== filterProvider) continue;
+      const list = search.trim()
+        ? groupedModels[pid].filter(
+            (m) => m.name.toLowerCase().includes(q) || m.id.toLowerCase().includes(q)
+          )
+        : groupedModels[pid];
+      if (list.length > 0) {
+        result[pid] = list;
+      }
+    }
+    return result;
+  }, [groupedModels, search, filterProvider]);
 
   const selectModel = async (modelId: string) => {
     await api.setModel(modelId);
@@ -81,7 +199,6 @@ export function ModelModal({ currentModel, onModelChange }: Props) {
     setSavingKeys(true);
     try {
       await api.saveConfig(toSave);
-      // Refresh providers to show updated status
       const data = await api.getProviders();
       setProviders(data.providers || []);
       setKeyInputs({});
@@ -92,12 +209,35 @@ export function ModelModal({ currentModel, onModelChange }: Props) {
     }
   };
 
+  const goToSettings = () => {
+    setOpen(false);
+    navigate('/settings/providers');
+  };
+
   // Extract raw model ID from label (which may include context info)
   const rawModelId = currentModel.split(' ')[0];
 
+  const renderModelButton = (m: ModelInfo) => (
+    <button
+      key={m.id}
+      className={`flex items-center justify-between px-4 py-2.5 rounded-lg text-left transition-all w-full ${
+        m.id === rawModelId
+          ? 'bg-primary/15 border border-primary'
+          : 'bg-transparent border border-transparent hover:bg-surface-hover'
+      }`}
+      onClick={() => selectModel(m.id)}
+    >
+      <div className="flex items-center gap-2 min-w-0">
+        {m.id === rawModelId && <Check size={14} className="text-primary shrink-0" />}
+        <span className="text-sm font-medium text-text truncate">{m.name}</span>
+      </div>
+      <span className="text-xs text-text-dim font-mono shrink-0 ml-2">{m.id}</span>
+    </button>
+  );
+
   return (
     <>
-      <button 
+      <button
         className="flex items-center gap-1 sm:gap-2 bg-surface-hover border border-border text-text-dim px-2 sm:px-3 py-1.5 rounded-lg text-sm font-mono cursor-pointer transition-all max-w-[120px] sm:max-w-[200px] hover:border-primary hover:text-text"
         onClick={() => setOpen(true)}
         title={currentModel}
@@ -107,16 +247,18 @@ export function ModelModal({ currentModel, onModelChange }: Props) {
       </button>
 
       {open && (
-        <div 
+        <div
           className="fixed inset-0 bg-black/60 flex items-center justify-center z-50 p-4"
           onClick={() => setOpen(false)}
         >
-          <div 
-            className="bg-surface rounded-xl border border-border w-full max-w-lg max-h-[80vh] flex flex-col shadow-xl"
+          {/* Fixed-size modal — never changes dimensions between loading and loaded */}
+          <div
+            className="bg-surface rounded-xl border border-border w-full max-w-lg flex flex-col shadow-xl"
+            style={{ height: 'min(80vh, 600px)' }}
             onClick={(e) => e.stopPropagation()}
           >
             {/* Tabs */}
-            <div className="flex border-b border-border">
+            <div className="flex border-b border-border shrink-0">
               <button
                 className={`flex-1 py-3 px-4 text-sm font-medium transition-colors ${
                   tab === 'models' ? 'text-primary border-b-2 border-primary' : 'text-text-dim hover:text-text'
@@ -137,9 +279,9 @@ export function ModelModal({ currentModel, onModelChange }: Props) {
 
             {/* Models tab */}
             {tab === 'models' && (
-              <div className="flex-1 flex flex-col overflow-hidden p-4">
-                {/* Filters */}
-                <div className="flex gap-3 mb-4">
+              <div className="flex-1 flex flex-col overflow-hidden p-4 min-h-0">
+                {/* Search + Provider filter — always visible */}
+                <div className="flex gap-2 mb-3 shrink-0">
                   <div className="relative flex-1">
                     <Search size={16} className="absolute left-3 top-1/2 -translate-y-1/2 text-text-dim" />
                     <input
@@ -148,54 +290,85 @@ export function ModelModal({ currentModel, onModelChange }: Props) {
                       placeholder="Search models..."
                       value={search}
                       onChange={(e) => setSearch(e.target.value)}
+                      autoFocus
                     />
                   </div>
-                  <select
-                    className="bg-bg border border-border rounded-lg px-3 py-2 text-sm text-text focus:border-primary focus:outline-none"
-                    value={selectedProvider}
-                    onChange={(e) => setSelectedProvider(e.target.value)}
-                  >
-                    <option value="all">All providers</option>
-                    {providers.map((p) => (
-                      <option key={p.id} value={p.id}>
-                        {p.name}
-                      </option>
-                    ))}
-                  </select>
+                  <div className="relative shrink-0">
+                    <Filter size={14} className="absolute left-2.5 top-1/2 -translate-y-1/2 text-text-dim pointer-events-none" />
+                    <select
+                      className="bg-bg border border-border rounded-lg pl-8 pr-3 py-2 text-sm text-text focus:border-primary focus:outline-none appearance-none cursor-pointer"
+                      value={filterProvider}
+                      onChange={(e) => setFilterProvider(e.target.value)}
+                      title="Filter by provider"
+                    >
+                      <option value="all">All</option>
+                      {configuredModelProviders.map((p) => (
+                        <option key={p.id} value={p.id}>{p.name}</option>
+                      ))}
+                    </select>
+                  </div>
                 </div>
 
-                {/* Model list */}
-                {loading ? (
-                  <div className="flex-1 flex items-center justify-center text-text-dim">Loading models...</div>
-                ) : (
-                  <div className="flex-1 overflow-y-auto flex flex-col gap-1">
-                    {filteredModels.map((m) => (
-                      <button
-                        key={m.id}
-                        className={`flex items-center justify-between px-4 py-3 rounded-lg text-left transition-all ${
-                          m.id === rawModelId 
-                            ? 'bg-primary/15 border border-primary' 
-                            : 'bg-transparent border border-transparent hover:bg-surface-hover'
-                        }`}
-                        onClick={() => selectModel(m.id)}
-                      >
-                        <div className="flex items-center gap-2">
-                          {m.id === rawModelId && <Check size={16} className="text-primary shrink-0" />}
-                          <span className="font-medium text-text">{m.name}</span>
+                {/* Scrollable model list area — same size whether loading or loaded */}
+                <div className="flex-1 overflow-y-auto min-h-0">
+                  {loading ? (
+                    <LoadingSkeleton />
+                  ) : (
+                    <div className="flex flex-col gap-0.5">
+                      {/* Recently used */}
+                      {!search.trim() && filteredRecent.length > 0 && (
+                        <>
+                          <div className="px-2 py-1">
+                            <span className="text-xs font-semibold text-text-dim uppercase tracking-wider">Recently Used</span>
+                          </div>
+                          {filteredRecent.map(renderModelButton)}
+                          <div className="my-2 border-t border-border" />
+                        </>
+                      )}
+
+                      {/* Provider groups */}
+                      {configuredModelProviders.map((p) => {
+                        const groupModels = filteredGroups[p.id] || [];
+                        if (groupModels.length === 0) return null;
+                        return (
+                          <div key={p.id} className="mb-2">
+                            <div className="px-2 py-1.5 flex items-center gap-2">
+                              <ProviderLogo providerId={p.id} size={16} />
+                              <span className="text-xs font-semibold text-text-dim uppercase tracking-wider">{p.name}</span>
+                              {p.is_custom && (
+                                <span className="text-[10px] px-1.5 py-0.5 bg-surface-hover rounded text-text-dim">{p.sdk_type}</span>
+                              )}
+                            </div>
+                            <div className="flex flex-col gap-0.5">
+                              {groupModels.map(renderModelButton)}
+                            </div>
+                          </div>
+                        );
+                      })}
+
+                      {Object.keys(filteredGroups).length === 0 && filteredRecent.length === 0 && (
+                        <div className="flex-1 flex items-center justify-center text-text-dim text-sm py-12">
+                          No models found
                         </div>
-                        <span className="text-xs text-text-dim font-mono">{m.id}</span>
-                      </button>
-                    ))}
-                    {filteredModels.length === 0 && (
-                      <div className="flex-1 flex items-center justify-center text-text-dim text-sm">
-                        No models found
-                      </div>
-                    )}
-                  </div>
-                )}
+                      )}
 
-                {/* Custom model */}
-                <div className="flex gap-2 mt-4 pt-4 border-t border-border">
+                      {configuredModelProviders.length === 0 && !loading && (
+                        <div className="flex-1 flex flex-col items-center justify-center gap-3 text-text-dim text-sm py-12">
+                          <p>No providers configured.</p>
+                          <button
+                            className="px-4 py-2 bg-primary text-white rounded-lg text-sm font-medium hover:bg-primary-hover transition-colors"
+                            onClick={goToSettings}
+                          >
+                            Configure Providers
+                          </button>
+                        </div>
+                      )}
+                    </div>
+                  )}
+                </div>
+
+                {/* Custom model — always at the bottom */}
+                <div className="flex gap-2 mt-3 pt-3 border-t border-border shrink-0">
                   <input
                     type="text"
                     className="flex-1 bg-bg border border-border rounded-lg px-3 py-2 text-sm text-text placeholder-text-dim focus:border-primary focus:outline-none"
@@ -221,11 +394,11 @@ export function ModelModal({ currentModel, onModelChange }: Props) {
 
             {/* Providers tab */}
             {tab === 'providers' && (
-              <div className="flex-1 flex flex-col overflow-hidden p-4">
-                <div className="flex-1 overflow-y-auto flex flex-col gap-3">
+              <div className="flex-1 flex flex-col overflow-hidden p-4 min-h-0">
+                <div className="flex-1 overflow-y-auto flex flex-col gap-3 min-h-0">
                   {providers.map((p) => (
                     <div key={p.id} className="flex items-center gap-3 p-3 bg-bg rounded-lg border border-border">
-                      <div className="flex-1">
+                      <div className="flex-1 min-w-0">
                         <span className="font-medium text-text block">{p.name}</span>
                         <span className={`text-xs flex items-center gap-1 ${p.configured ? 'text-success' : 'text-text-dim'}`}>
                           {p.configured && <Check size={12} />}
@@ -234,7 +407,7 @@ export function ModelModal({ currentModel, onModelChange }: Props) {
                       </div>
                       <input
                         type="password"
-                        className="w-48 bg-surface border border-border rounded-lg px-3 py-2 text-sm text-text placeholder-text-dim focus:border-primary focus:outline-none"
+                        className="w-48 bg-surface border border-border rounded-lg px-3 py-2 text-sm text-text placeholder-text-dim focus:border-primary focus:outline-none shrink-0"
                         placeholder={`Paste ${p.key_env}`}
                         value={keyInputs[p.id] || ''}
                         onChange={(e) =>
@@ -244,30 +417,38 @@ export function ModelModal({ currentModel, onModelChange }: Props) {
                     </div>
                   ))}
                 </div>
-                
+
                 <button
-                  className="mt-4 w-full py-3 bg-primary text-white rounded-lg font-medium hover:bg-primary-hover transition-colors disabled:opacity-50 disabled:cursor-not-allowed flex items-center justify-center gap-2"
+                  className="mt-4 w-full py-3 bg-primary text-white rounded-lg font-medium hover:bg-primary-hover transition-colors disabled:opacity-50 disabled:cursor-not-allowed flex items-center justify-center gap-2 shrink-0"
                   onClick={saveKeys}
                   disabled={savingKeys || Object.values(keyInputs).every((v) => !v?.trim())}
                 >
                   <Save size={16} />
                   {savingKeys ? 'Saving...' : 'Save API Keys'}
                 </button>
-                
-                <p className="mt-3 text-xs text-text-dim text-center">
+
+                <p className="mt-3 text-xs text-text-dim text-center shrink-0">
                   Keys are saved to your <code className="bg-bg px-1 rounded">`.env`</code> file. You can also set them manually.
                 </p>
               </div>
             )}
 
-            {/* Close button */}
-            <button 
-              className="mx-4 mb-4 py-2 text-center text-sm text-text-dim hover:text-text transition-colors flex items-center justify-center gap-1"
-              onClick={() => setOpen(false)}
-            >
-              <X size={14} />
-              Close
-            </button>
+            {/* Footer: settings link + close */}
+            <div className="mx-4 mb-4 flex items-center justify-between shrink-0">
+              <button
+                className="text-xs text-text-dim hover:text-primary transition-colors"
+                onClick={goToSettings}
+              >
+                More provider settings
+              </button>
+              <button
+                className="py-2 text-sm text-text-dim hover:text-text transition-colors flex items-center gap-1"
+                onClick={() => setOpen(false)}
+              >
+                <X size={14} />
+                Close
+              </button>
+            </div>
           </div>
         </div>
       )}
diff --git a/frontend/src/components/OnboardingModal.tsx b/frontend/src/components/OnboardingModal.tsx
index 9d4be57..b4f2f4a 100644
--- a/frontend/src/components/OnboardingModal.tsx
+++ b/frontend/src/components/OnboardingModal.tsx
@@ -29,24 +29,30 @@ export function OnboardingModal({ onComplete }: Props) {
   const [search, setSearch] = useState('');
   const [selectedProvider, setSelectedProvider] = useState<string>('all');
 
-  useEffect(() => {
+  const loadData = async () => {
     setLoading(true);
-    Promise.all([api.getProviders(), api.getModels()])
-      .then(([pData, mData]) => {
-        const provs = pData.providers || [];
-        setProviders(provs);
-        setModels(mData.models || []);
-        // If any provider is already configured, skip to model selection
-        if (provs.some((p: Provider) => p.configured)) {
-          setStep('model');
-        }
-      })
-      .finally(() => setLoading(false));
+    try {
+      const [pData, mData] = await Promise.all([api.getProviders(), api.getModels()]);
+      const provs = pData.providers || [];
+      const mdls = mData.models || [];
+      setProviders(provs);
+      setModels(mdls);
+      // If any provider is configured AND models are available, skip to model selection
+      if (provs.some((p: Provider) => p.configured) && mdls.length > 0) {
+        setStep('model');
+      }
+    } finally {
+      setLoading(false);
+    }
+  };
+
+  useEffect(() => {
+    loadData();
   }, []);
 
   const configuredProviders = providers.filter((p) => p.configured);
   const llmProviders = providers.filter((p) =>
-    ['openai', 'anthropic', 'openrouter', 'opencode-go', 'ollama', 'lmstudio'].includes(p.id)
+    p.categories?.includes('models') || ['openai', 'anthropic', 'openrouter', 'opencode-go', 'ollama', 'lmstudio'].includes(p.id)
   );
 
   const filteredModels = useMemo(() => {
@@ -71,10 +77,15 @@ export function OnboardingModal({ onComplete }: Props) {
     setSaving(true);
     try {
       await api.saveConfig(toSave);
-      const data = await api.getProviders();
-      setProviders(data.providers || []);
+      // Refresh providers and models after saving keys
+      const [pData, mData] = await Promise.all([api.getProviders(), api.getModels()]);
+      setProviders(pData.providers || []);
+      setModels(mData.models || []);
       setKeyInputs({});
-      setStep('model');
+      // Only go to model step if we now have models
+      if ((mData.models || []).length > 0) {
+        setStep('model');
+      }
     } finally {
       setSaving(false);
     }
@@ -146,7 +157,7 @@ export function OnboardingModal({ onComplete }: Props) {
               >
                 {saving ? 'Saving...' : 'Save & Continue'}
               </button>
-              {configuredProviders.length > 0 && (
+              {configuredProviders.length > 0 && models.length > 0 && (
                 <button 
                   className="px-6 py-3 bg-surface-hover border border-border text-text-dim rounded-lg hover:text-text transition-colors"
                   onClick={() => setStep('model')}
@@ -161,53 +172,64 @@ export function OnboardingModal({ onComplete }: Props) {
         {/* Model step */}
         {step === 'model' && (
           <div className="flex-1 overflow-hidden flex flex-col px-8 pb-8">
-            {/* Filters */}
-            <div className="flex gap-3 py-4">
-              <input
-                type="text"
-                className="flex-1 bg-bg border border-border rounded-md px-3 py-2 text-sm text-text placeholder-text-dim focus:border-primary focus:outline-none"
-                placeholder="Search models..."
-                value={search}
-                onChange={(e) => setSearch(e.target.value)}
-              />
-              <select 
-                className="bg-bg border border-border rounded-md px-3 py-2 text-sm text-text focus:border-primary focus:outline-none"
-                value={selectedProvider} 
-                onChange={(e) => setSelectedProvider(e.target.value)}
-              >
-                <option value="all">All providers</option>
-                {providers.filter((p) => p.configured).map((p) => (
-                  <option key={p.id} value={p.id}>{p.name}</option>
-                ))}
-              </select>
-            </div>
-            
-            {/* Model list */}
-            <div className="flex-1 overflow-y-auto flex flex-col gap-1">
-              {filteredModels.map((m) => (
+            {models.length === 0 ? (
+              /* No models available — send user back to configure a provider */
+              <div className="flex-1 flex flex-col items-center justify-center py-8 gap-4">
+                <div className="text-center">
+                  <p className="text-text font-medium mb-2">No models available</p>
+                  <p className="text-text-dim text-sm">
+                    No LLM providers are configured yet. Add at least one provider API key to see available models.
+                  </p>
+                </div>
                 <button 
-                  key={m.id} 
-                  className="flex items-center justify-between px-4 py-3 rounded-lg text-left hover:bg-surface-hover transition-colors"
-                  onClick={() => selectModel(m.id)}
+                  className="py-3 px-8 bg-primary text-white rounded-lg font-semibold hover:bg-primary-hover transition-colors"
+                  onClick={() => setStep('providers')}
                 >
-                  <span className="font-medium text-text">{m.name}</span>
-                  <span className="text-xs text-text-dim font-mono">{m.id}</span>
+                  Configure a Provider
                 </button>
-              ))}
-              {filteredModels.length === 0 && (
-                <div className="flex-1 flex items-center justify-center text-text-dim text-center py-8">
-                  No models found. Configure a provider first.
+              </div>
+            ) : (
+              <>
+                {/* Filters */}
+                <div className="flex gap-3 py-4">
+                  <input
+                    type="text"
+                    className="flex-1 bg-bg border border-border rounded-md px-3 py-2 text-sm text-text placeholder-text-dim focus:border-primary focus:outline-none"
+                    placeholder="Search models..."
+                    value={search}
+                    onChange={(e) => setSearch(e.target.value)}
+                  />
+                  <select 
+                    className="bg-bg border border-border rounded-md px-3 py-2 text-sm text-text focus:border-primary focus:outline-none"
+                    value={selectedProvider} 
+                    onChange={(e) => setSelectedProvider(e.target.value)}
+                  >
+                    <option value="all">All providers</option>
+                    {configuredProviders.map((p) => (
+                      <option key={p.id} value={p.id}>{p.name}</option>
+                    ))}
+                  </select>
                 </div>
-              )}
-            </div>
-            
-            {configuredProviders.length === 0 && (
-              <button 
-                className="mt-4 py-3 bg-surface-hover border border-border text-text-dim rounded-lg hover:text-text transition-colors"
-                onClick={() => setStep('providers')}
-              >
-                Configure a provider
-              </button>
+                
+                {/* Model list */}
+                <div className="flex-1 overflow-y-auto flex flex-col gap-1">
+                  {filteredModels.map((m) => (
+                    <button 
+                      key={m.id} 
+                      className="flex items-center justify-between px-4 py-3 rounded-lg text-left hover:bg-surface-hover transition-colors"
+                      onClick={() => selectModel(m.id)}
+                    >
+                      <span className="font-medium text-text">{m.name}</span>
+                      <span className="text-xs text-text-dim font-mono">{m.id}</span>
+                    </button>
+                  ))}
+                  {filteredModels.length === 0 && (
+                    <div className="flex-1 flex items-center justify-center text-text-dim text-center py-8">
+                      No models match your search.
+                    </div>
+                  )}
+                </div>
+              </>
             )}
           </div>
         )}
diff --git a/frontend/src/components/ProjectManageModal.tsx b/frontend/src/components/ProjectManageModal.tsx
new file mode 100644
index 0000000..93f1512
--- /dev/null
+++ b/frontend/src/components/ProjectManageModal.tsx
@@ -0,0 +1,194 @@
+import { useState, useRef, useEffect } from 'react';
+import { X, FolderOpen, Pencil, Trash2, Check, Layers } from 'lucide-react';
+import { api } from '../api';
+import type { Project } from '../types';
+import { ConfirmDialog } from './ConfirmDialog';
+
+interface Props {
+  projects: Project[];
+  onClose: () => void;
+  onChanged: () => void;
+}
+
+function ProjectRow({ project, onChanged }: { project: Project; onChanged: () => void }) {
+  const [renaming, setRenaming] = useState(false);
+  const [name, setName] = useState(project.name);
+  const [confirmDelete, setConfirmDelete] = useState(false);
+  const [saving, setSaving] = useState(false);
+  const inputRef = useRef<HTMLInputElement>(null);
+  const isDefault = project.is_default;
+
+  useEffect(() => {
+    if (renaming) {
+      inputRef.current?.focus();
+      inputRef.current?.select();
+    }
+  }, [renaming]);
+
+  const handleRename = async () => {
+    const trimmed = name.trim();
+    if (!trimmed || trimmed === project.name) {
+      setRenaming(false);
+      setName(project.name);
+      return;
+    }
+    setSaving(true);
+    try {
+      await api.updateProject(project.uuid, { name: trimmed });
+      onChanged();
+    } catch {
+      setName(project.name);
+    } finally {
+      setSaving(false);
+      setRenaming(false);
+    }
+  };
+
+  const handleDelete = async () => {
+    try {
+      await api.deleteProject(project.uuid);
+      onChanged();
+    } catch {
+      // ignore
+    }
+    setConfirmDelete(false);
+  };
+
+  return (
+    <>
+      <div className="flex items-center gap-3 p-3 bg-bg rounded-lg border border-border group">
+        <FolderOpen size={16} className={`shrink-0 ${isDefault ? 'text-primary' : 'text-text-dim'}`} />
+
+        {/* Name / inline rename */}
+        <div className="flex-1 min-w-0">
+          {renaming ? (
+            <div className="flex items-center gap-1.5">
+              <input
+                ref={inputRef}
+                className="flex-1 bg-surface border border-primary rounded px-2 py-1 text-sm text-text focus:outline-none min-w-0"
+                value={name}
+                onChange={(e) => setName(e.target.value)}
+                onKeyDown={(e) => {
+                  if (e.key === 'Enter') handleRename();
+                  if (e.key === 'Escape') { setRenaming(false); setName(project.name); }
+                }}
+                disabled={saving}
+              />
+              <button
+                className="p-1 rounded text-success hover:bg-success/10 transition-colors"
+                onClick={handleRename}
+                disabled={saving}
+              >
+                <Check size={14} />
+              </button>
+              <button
+                className="p-1 rounded text-text-dim hover:bg-surface-hover transition-colors"
+                onClick={() => { setRenaming(false); setName(project.name); }}
+              >
+                <X size={14} />
+              </button>
+            </div>
+          ) : (
+            <div>
+              <span className="text-sm font-medium text-text block truncate">{project.name}</span>
+              {project.description && (
+                <span className="text-xs text-text-dim block truncate">{project.description}</span>
+              )}
+              <span className="text-xs text-text-dim">
+                {project.conversation_count ?? 0} conversation{(project.conversation_count ?? 0) !== 1 ? 's' : ''}
+              </span>
+            </div>
+          )}
+        </div>
+
+        {/* Actions (not for default project, not during rename) */}
+        {!isDefault && !renaming && (
+          <div className="flex items-center gap-1 shrink-0 opacity-0 group-hover:opacity-100 transition-opacity">
+            <button
+              className="p-1.5 rounded text-text-dim hover:text-text hover:bg-surface-hover transition-colors"
+              onClick={() => setRenaming(true)}
+              title="Rename"
+            >
+              <Pencil size={14} />
+            </button>
+            <button
+              className="p-1.5 rounded text-text-dim hover:text-error hover:bg-error/10 transition-colors"
+              onClick={() => setConfirmDelete(true)}
+              title="Delete"
+            >
+              <Trash2 size={14} />
+            </button>
+          </div>
+        )}
+
+        {/* Default badge */}
+        {isDefault && (
+          <span className="text-[10px] px-2 py-0.5 rounded-full bg-primary/10 text-primary shrink-0">Default</span>
+        )}
+      </div>
+
+      {confirmDelete && (
+        <ConfirmDialog
+          title="Delete Project"
+          message={`Delete "${project.name}" and its workspace? All files in the workspace will be permanently removed. This cannot be undone.`}
+          confirmLabel="Delete Project"
+          cancelLabel="Cancel"
+          danger
+          onConfirm={handleDelete}
+          onCancel={() => setConfirmDelete(false)}
+        />
+      )}
+    </>
+  );
+}
+
+export function ProjectManageModal({ projects, onClose, onChanged }: Props) {
+  // Sort: default first, then alphabetical
+  const sorted = [...projects].sort((a, b) => {
+    if (a.is_default && !b.is_default) return -1;
+    if (!a.is_default && b.is_default) return 1;
+    return a.name.localeCompare(b.name);
+  });
+
+  return (
+    <div
+      className="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm p-4"
+      onClick={onClose}
+    >
+      <div
+        className="bg-surface border border-border rounded-xl shadow-xl w-full max-w-lg max-h-[70vh] flex flex-col"
+        onClick={(e) => e.stopPropagation()}
+      >
+        {/* Header */}
+        <div className="flex items-center justify-between px-6 py-4 border-b border-border shrink-0">
+          <div className="flex items-center gap-2">
+            <Layers size={18} className="text-primary" />
+            <h2 className="text-lg font-semibold text-text">Manage Projects</h2>
+          </div>
+          <button
+            className="w-8 h-8 rounded-lg flex items-center justify-center text-text-dim hover:bg-surface-hover hover:text-text transition-colors"
+            onClick={onClose}
+          >
+            <X size={16} />
+          </button>
+        </div>
+
+        {/* Project list */}
+        <div className="flex-1 overflow-y-auto p-4 flex flex-col gap-2 min-h-0">
+          {sorted.length === 0 ? (
+            <p className="text-center text-text-dim py-8">No projects yet.</p>
+          ) : (
+            sorted.map((p) => (
+              <ProjectRow key={p.uuid} project={p} onChanged={onChanged} />
+            ))
+          )}
+        </div>
+
+        {/* Footer hint */}
+        <div className="px-6 py-3 border-t border-border text-xs text-text-dim text-center shrink-0">
+          Hover a project to rename or delete. The default project cannot be modified.
+        </div>
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/components/Sidebar.tsx b/frontend/src/components/Sidebar.tsx
index 8192ca2..0282575 100644
--- a/frontend/src/components/Sidebar.tsx
+++ b/frontend/src/components/Sidebar.tsx
@@ -14,6 +14,7 @@ import {
   FolderOpen,
   ChevronDown,
   Layers,
+  SlidersHorizontal,
 } from 'lucide-react';
 
 type ConvStatus = 'idle' | 'processing' | 'waiting_approval' | 'waiting_input';
@@ -30,6 +31,7 @@ interface Props {
   onDelete: (uuid: string) => void;
   onSelectProject: (project: Project | null) => void;
   onNewProject: () => void;
+  onManageProjects?: () => void;
 }
 
 function groupByDate(conversations: Conversation[]) {
@@ -59,7 +61,7 @@ function ConvIcon({ status }: { status: ConvStatus }) {
   return <span className={`${base} bg-border`} />;
 }
 
-export function Sidebar({ conversations, currentUuid, user, convStatuses, projects, activeProject, onSwitch, onNew, onDelete, onSelectProject, onNewProject }: Props) {
+export function Sidebar({ conversations, currentUuid, user, convStatuses, projects, activeProject, onSwitch, onNew, onDelete, onSelectProject, onNewProject, onManageProjects }: Props) {
   const navigate = useNavigate();
   const [pendingDelete, setPendingDelete] = useState<{ uuid: string; title: string } | null>(null);
   const [search, setSearch] = useState('');
@@ -74,6 +76,9 @@ export function Sidebar({ conversations, currentUuid, user, convStatuses, projec
 
   const groups = useMemo(() => groupByDate(filtered), [filtered]);
 
+  // Non-default projects for the dropdown
+  const userProjects = useMemo(() => projects.filter((p) => !p.is_default), [projects]);
+
   if (collapsed) {
     return (
       <aside className="w-14 bg-surface border-r border-border flex flex-col items-center py-4 gap-3 shrink-0">
@@ -128,7 +133,8 @@ export function Sidebar({ conversations, currentUuid, user, convStatuses, projec
           <ChevronDown size={14} className={`text-text-dim shrink-0 transition-transform ${projectDropdownOpen ? 'rotate-180' : ''}`} />
         </button>
         {projectDropdownOpen && (
-          <div className="absolute left-0 right-0 top-full mt-1 bg-surface border border-border rounded-lg shadow-xl z-20 max-h-60 overflow-auto">
+          <div className="absolute left-0 right-0 top-full mt-1 bg-surface border border-border rounded-lg shadow-xl z-20 max-h-72 overflow-auto">
+            {/* All Conversations */}
             <button
               className={`w-full flex items-center gap-2 px-3 py-2 text-sm text-left hover:bg-surface-hover transition-colors ${!activeProject ? 'text-primary' : 'text-text'}`}
               onClick={() => { onSelectProject(null); setProjectDropdownOpen(false); }}
@@ -136,10 +142,14 @@ export function Sidebar({ conversations, currentUuid, user, convStatuses, projec
               <Layers size={14} />
               All Conversations
             </button>
-            {projects.map((p) => (
+
+            {/* User projects */}
+            {userProjects.map((p) => (
               <button
                 key={p.uuid}
-                className={`w-full flex items-center gap-2 px-3 py-2 text-sm text-left hover:bg-surface-hover transition-colors ${activeProject?.uuid === p.uuid ? 'text-primary bg-primary/5' : 'text-text'}`}
+                className={`w-full flex items-center gap-2 px-3 py-2 text-sm text-left hover:bg-surface-hover transition-colors ${
+                  activeProject?.uuid === p.uuid ? 'text-primary bg-primary/5' : 'text-text'
+                }`}
                 onClick={() => { onSelectProject(p); setProjectDropdownOpen(false); }}
               >
                 <FolderOpen size={14} />
@@ -149,13 +159,26 @@ export function Sidebar({ conversations, currentUuid, user, convStatuses, projec
                 )}
               </button>
             ))}
-            <button
-              className="w-full flex items-center gap-2 px-3 py-2 text-sm text-primary hover:bg-surface-hover transition-colors border-t border-border"
-              onClick={() => { onNewProject(); setProjectDropdownOpen(false); }}
-            >
-              <Plus size={14} />
-              New Project
-            </button>
+
+            {/* Actions */}
+            <div className="border-t border-border">
+              <button
+                className="w-full flex items-center gap-2 px-3 py-2 text-sm text-primary hover:bg-surface-hover transition-colors"
+                onClick={() => { onNewProject(); setProjectDropdownOpen(false); }}
+              >
+                <Plus size={14} />
+                New Project
+              </button>
+              {userProjects.length > 0 && (
+                <button
+                  className="w-full flex items-center gap-2 px-3 py-2 text-sm text-text-dim hover:bg-surface-hover hover:text-text transition-colors"
+                  onClick={() => { onManageProjects?.(); setProjectDropdownOpen(false); }}
+                >
+                  <SlidersHorizontal size={14} />
+                  Manage Projects
+                </button>
+              )}
+            </div>
           </div>
         )}
       </div>
diff --git a/frontend/src/components/Terminal.tsx b/frontend/src/components/Terminal.tsx
index 73da6b2..c27c7ea 100644
--- a/frontend/src/components/Terminal.tsx
+++ b/frontend/src/components/Terminal.tsx
@@ -5,6 +5,10 @@ import {
   Maximize2,
   Minimize2,
 } from 'lucide-react';
+import { Terminal as XTerm } from '@xterm/xterm';
+import { FitAddon } from '@xterm/addon-fit';
+import { WebLinksAddon } from '@xterm/addon-web-links';
+import '@xterm/xterm/css/xterm.css';
 
 interface Props {
   projectUuid: string | null;
@@ -12,78 +16,128 @@ interface Props {
   onToggle: () => void;
 }
 
-/**
- * Interactive terminal connected to the project workspace via WebSocket.
- * Uses a basic approach without xterm.js dependency — renders terminal output
- * in a pre element and captures keyboard input.
- *
- * For a production deployment, install @xterm/xterm and use the attach addon.
- * This implementation provides core functionality without the extra dependency.
- */
 export function Terminal({ projectUuid, visible, onToggle }: Props) {
   const [connected, setConnected] = useState(false);
-  const [output, setOutput] = useState<string[]>([]);
   const [maximized, setMaximized] = useState(false);
   const wsRef = useRef<WebSocket | null>(null);
-  const outputRef = useRef<HTMLDivElement>(null);
-  const inputRef = useRef<HTMLTextAreaElement>(null);
-  const [inputLine, setInputLine] = useState('');
-  const reconnectTimer = useRef<ReturnType<typeof setTimeout> | null>(null);
-
+  const termRef = useRef<XTerm | null>(null);
+  const fitAddonRef = useRef<FitAddon | null>(null);
+  const containerRef = useRef<HTMLDivElement>(null);
+  const termInitialized = useRef(false);
+
+  // Initialize xterm instance once
+  const initTerm = useCallback(() => {
+    if (termInitialized.current || !containerRef.current) return;
+
+    const term = new XTerm({
+      cursorBlink: true,
+      fontSize: 13,
+      fontFamily: "'JetBrains Mono', 'Fira Code', 'Cascadia Code', Menlo, Monaco, monospace",
+      theme: {
+        background: '#0d0d0d',
+        foreground: '#e0e0e0',
+        cursor: '#3b82f6',
+        selectionBackground: '#3b82f640',
+        black: '#1a1a1a',
+        red: '#ef4444',
+        green: '#22c55e',
+        yellow: '#eab308',
+        blue: '#3b82f6',
+        magenta: '#a855f7',
+        cyan: '#06b6d4',
+        white: '#e0e0e0',
+        brightBlack: '#525252',
+        brightRed: '#f87171',
+        brightGreen: '#4ade80',
+        brightYellow: '#fde047',
+        brightBlue: '#60a5fa',
+        brightMagenta: '#c084fc',
+        brightCyan: '#22d3ee',
+        brightWhite: '#ffffff',
+      },
+      scrollback: 5000,
+      convertEol: true,
+      allowProposedApi: true,
+    });
+
+    const fitAddon = new FitAddon();
+    term.loadAddon(fitAddon);
+    term.loadAddon(new WebLinksAddon());
+
+    term.open(containerRef.current);
+    fitAddon.fit();
+
+    termRef.current = term;
+    fitAddonRef.current = fitAddon;
+    termInitialized.current = true;
+
+    return term;
+  }, []);
+
+  // Connect WebSocket
   const connect = useCallback(() => {
-    if (!projectUuid) return;
-
     const token = localStorage.getItem('openmlr_token');
     if (!token) return;
 
+    const term = termRef.current || initTerm();
+    if (!term) return;
+
     const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
-    const wsUrl = `${protocol}//${window.location.host}/api/terminal/${projectUuid}?token=${token}`;
+    const path = projectUuid ? `/api/terminal/${projectUuid}` : '/api/terminal';
+    const wsUrl = `${protocol}//${window.location.host}${path}?token=${token}`;
 
     try {
       const ws = new WebSocket(wsUrl);
       wsRef.current = ws;
 
+      ws.binaryType = 'arraybuffer';
+
       ws.onopen = () => {
         setConnected(true);
-        setOutput((prev) => [...prev, '\r\n--- Connected ---\r\n']);
-        // Send initial resize
-        ws.send(JSON.stringify({ type: 'resize', cols: 120, rows: 30 }));
+        // Send resize to match current terminal dimensions
+        const dims = fitAddonRef.current?.proposeDimensions();
+        if (dims) {
+          ws.send(JSON.stringify({ type: 'resize', cols: dims.cols, rows: dims.rows }));
+        }
       };
 
       ws.onmessage = (event) => {
-        if (event.data instanceof Blob) {
-          event.data.text().then((text: string) => {
-            setOutput((prev) => [...prev, text]);
-          });
+        if (event.data instanceof ArrayBuffer) {
+          term.write(new Uint8Array(event.data));
         } else {
-          setOutput((prev) => [...prev, event.data]);
+          term.write(event.data);
         }
       };
 
       ws.onclose = () => {
         setConnected(false);
-        setOutput((prev) => [...prev, '\r\n--- Disconnected ---\r\n']);
+        term.writeln('\r\n\x1b[90m--- Disconnected ---\x1b[0m');
         wsRef.current = null;
       };
 
       ws.onerror = () => {
         setConnected(false);
       };
+
+      // Forward terminal input to WebSocket
+      term.onData((data) => {
+        if (ws.readyState === WebSocket.OPEN) {
+          ws.send(JSON.stringify({ type: 'input', data }));
+        }
+      });
+
     } catch {
       setConnected(false);
     }
-  }, [projectUuid]);
+  }, [projectUuid, initTerm]);
 
-  // Connect when visible and project is set
+  // Connect when visible
   useEffect(() => {
-    if (visible && projectUuid && !wsRef.current) {
-      connect();
+    if (visible && !wsRef.current) {
+      // Small delay to ensure container is rendered
+      const timer = setTimeout(() => connect(), 50);
+      return () => clearTimeout(timer);
     }
-    return () => {
-      if (reconnectTimer.current) {
-        clearTimeout(reconnectTimer.current);
-      }
-    };
   }, [visible, projectUuid, connect]);
 
   // Disconnect when hidden
@@ -94,35 +148,38 @@ export function Terminal({ projectUuid, visible, onToggle }: Props) {
     }
   }, [visible]);
 
-  // Auto-scroll to bottom
+  // Fit terminal on resize or maximize toggle
   useEffect(() => {
-    if (outputRef.current) {
-      outputRef.current.scrollTop = outputRef.current.scrollHeight;
-    }
-  }, [output]);
-
-  const handleKeyDown = useCallback((e: React.KeyboardEvent) => {
-    if (!wsRef.current || wsRef.current.readyState !== WebSocket.OPEN) return;
-
-    if (e.key === 'Enter') {
-      e.preventDefault();
-      wsRef.current.send(JSON.stringify({ type: 'input', data: inputLine + '\n' }));
-      setInputLine('');
-    } else if (e.key === 'Tab') {
-      e.preventDefault();
-      wsRef.current.send(JSON.stringify({ type: 'input', data: '\t' }));
-    } else if (e.ctrlKey && e.key === 'c') {
-      e.preventDefault();
-      wsRef.current.send(JSON.stringify({ type: 'input', data: '\x03' }));
-    } else if (e.ctrlKey && e.key === 'd') {
-      e.preventDefault();
-      wsRef.current.send(JSON.stringify({ type: 'input', data: '\x04' }));
-    } else if (e.ctrlKey && e.key === 'l') {
-      e.preventDefault();
-      wsRef.current.send(JSON.stringify({ type: 'input', data: '\x0c' }));
-      setOutput([]);
-    }
-  }, [inputLine]);
+    if (!visible || !fitAddonRef.current) return;
+
+    const handleResize = () => {
+      fitAddonRef.current?.fit();
+      // Notify backend of new size
+      if (wsRef.current?.readyState === WebSocket.OPEN) {
+        const dims = fitAddonRef.current?.proposeDimensions();
+        if (dims) {
+          wsRef.current.send(JSON.stringify({ type: 'resize', cols: dims.cols, rows: dims.rows }));
+        }
+      }
+    };
+
+    // Fit after layout settles
+    const timer = setTimeout(handleResize, 100);
+    window.addEventListener('resize', handleResize);
+    return () => {
+      clearTimeout(timer);
+      window.removeEventListener('resize', handleResize);
+    };
+  }, [visible, maximized]);
+
+  // Cleanup on unmount
+  useEffect(() => {
+    return () => {
+      wsRef.current?.close();
+      termRef.current?.dispose();
+      termInitialized.current = false;
+    };
+  }, []);
 
   if (!visible) {
     return (
@@ -149,7 +206,7 @@ export function Terminal({ projectUuid, visible, onToggle }: Props) {
           <TerminalIcon size={14} className="text-primary" />
           <span className="text-xs font-medium text-text">Terminal</span>
           <span className={`w-2 h-2 rounded-full ${connected ? 'bg-success' : 'bg-error'}`} />
-          {!connected && projectUuid && (
+          {!connected && (
             <button
               className="text-xs text-primary hover:underline"
               onClick={connect}
@@ -157,9 +214,6 @@ export function Terminal({ projectUuid, visible, onToggle }: Props) {
               Connect
             </button>
           )}
-          {!projectUuid && (
-            <span className="text-xs text-text-dim">No project selected</span>
-          )}
         </div>
         <div className="flex items-center gap-1">
           <button
@@ -179,30 +233,12 @@ export function Terminal({ projectUuid, visible, onToggle }: Props) {
         </div>
       </div>
 
-      {/* Output area */}
+      {/* xterm.js container */}
       <div
-        ref={outputRef}
-        className="flex-1 overflow-auto px-3 py-2 font-mono text-xs text-green-400 whitespace-pre-wrap"
-        onClick={() => inputRef.current?.focus()}
-      >
-        {output.join('')}
-      </div>
-
-      {/* Input line */}
-      <div className="flex items-center gap-2 px-3 py-1.5 bg-[#1a1a1a] border-t border-border shrink-0">
-        <span className="text-xs text-primary font-mono">$</span>
-        <textarea
-          ref={inputRef}
-          className="flex-1 bg-transparent text-xs text-green-400 font-mono outline-none resize-none"
-          rows={1}
-          value={inputLine}
-          onChange={(e) => setInputLine(e.target.value)}
-          onKeyDown={handleKeyDown}
-          placeholder={connected ? 'Type command...' : 'Not connected'}
-          disabled={!connected}
-          autoFocus
-        />
-      </div>
+        ref={containerRef}
+        className="flex-1 min-h-0 px-1"
+        onClick={() => termRef.current?.focus()}
+      />
     </div>
   );
 }
diff --git a/frontend/src/components/settings/ProvidersSettings.tsx b/frontend/src/components/settings/ProvidersSettings.tsx
index 0ed129d..07596cd 100644
--- a/frontend/src/components/settings/ProvidersSettings.tsx
+++ b/frontend/src/components/settings/ProvidersSettings.tsx
@@ -13,6 +13,25 @@ const TABS = [
 
 type TabId = typeof TABS[number]['id'];
 
+type SdkType = 'openai-sdk' | 'anthropic-sdk' | 'openrouter' | 'litellm';
+
+interface CustomProvider {
+  id: string;
+  name: string;
+  sdk_type: SdkType;
+  api_base: string;
+  api_key: string;
+  models?: { id: string; name: string; release_date?: string }[];
+  last_fetched_at?: string;
+}
+
+const SDK_OPTIONS: { value: SdkType; label: string }[] = [
+  { value: 'openai-sdk', label: 'OpenAI SDK' },
+  { value: 'anthropic-sdk', label: 'Anthropic SDK' },
+  { value: 'openrouter', label: 'OpenRouter' },
+  { value: 'litellm', label: 'LiteLLM' },
+];
+
 export function ProvidersSettings() {
   const [providers, setProviders] = useState<Provider[]>([]);
   const [keyInputs, setKeyInputs] = useState<Record<string, string>>({});
@@ -20,6 +39,24 @@ export function ProvidersSettings() {
   const [saveMsg, setSaveMsg] = useState('');
   const [activeTab, setActiveTab] = useState<TabId>('models');
 
+  // Custom provider modal state
+  const [showCustomModal, setShowCustomModal] = useState(false);
+  const [customForm, setCustomForm] = useState<{
+    name: string;
+    id: string;
+    sdk_type: SdkType;
+    api_base: string;
+    api_key: string;
+  }>({
+    name: '',
+    id: '',
+    sdk_type: 'openai-sdk',
+    api_base: '',
+    api_key: '',
+  });
+  const [fetchingModels, setFetchingModels] = useState(false);
+  const [fetchMsg, setFetchMsg] = useState('');
+
   useEffect(() => {
     api.getProviders().then((d) => setProviders(d.providers || [])).catch(() => {});
   }, []);
@@ -28,7 +65,7 @@ export function ProvidersSettings() {
   const providersByTab = useMemo(() => {
     const grouped: Record<string, Provider[]> = {};
     for (const tab of TABS) {
-      grouped[tab.id] = providers.filter((p) => 
+      grouped[tab.id] = providers.filter((p) =>
         p.categories?.includes(tab.id)
       );
     }
@@ -63,12 +100,85 @@ export function ProvidersSettings() {
     }
   };
 
+  const saveCustomProvider = async () => {
+    if (!customForm.name.trim() || !customForm.id.trim() || !customForm.api_base.trim() || !customForm.api_key.trim()) {
+      flash('Please fill in all fields');
+      return;
+    }
+
+    // Load existing custom providers
+    const settingsRes = await api.getSettingsCategory('providers').catch(() => ({ settings: {} }));
+    const existing: CustomProvider[] = Array.isArray(settingsRes.settings?.custom_providers)
+      ? settingsRes.settings.custom_providers
+      : [];
+
+    // Remove existing entry with same ID if present
+    const filtered = existing.filter((cp) => cp.id !== customForm.id.trim());
+
+    const newProvider: CustomProvider = {
+      id: customForm.id.trim(),
+      name: customForm.name.trim(),
+      sdk_type: customForm.sdk_type,
+      api_base: customForm.api_base.trim(),
+      api_key: customForm.api_key.trim(),
+      models: [],
+    };
+
+    filtered.push(newProvider);
+
+    await api.updateSetting('providers', 'custom_providers', filtered);
+
+    // Reset form and refresh
+    setCustomForm({ name: '', id: '', sdk_type: 'openai-sdk', api_base: '', api_key: '' });
+    setShowCustomModal(false);
+    const data = await api.getProviders();
+    setProviders(data.providers || []);
+    flash('Custom provider added');
+  };
+
+  const deleteCustomProvider = async (providerId: string) => {
+    const settingsRes = await api.getSettingsCategory('providers').catch(() => ({ settings: {} }));
+    const existing: CustomProvider[] = Array.isArray(settingsRes.settings?.custom_providers)
+      ? settingsRes.settings.custom_providers
+      : [];
+
+    const filtered = existing.filter((cp) => cp.id !== providerId);
+    await api.updateSetting('providers', 'custom_providers', filtered);
+
+    const data = await api.getProviders();
+    setProviders(data.providers || []);
+    flash('Custom provider removed');
+  };
+
+  const fetchModelsForProvider = async (providerId: string) => {
+    setFetchingModels(true);
+    setFetchMsg('');
+    try {
+      const res = await api.fetchCustomProviderModels(providerId);
+      const count = res.models?.length || 0;
+      setFetchMsg(count > 0 ? `Fetched ${count} models` : 'No models found');
+      // Refresh providers to get updated model list
+      const data = await api.getProviders();
+      setProviders(data.providers || []);
+    } catch (err: any) {
+      setFetchMsg(err?.message || 'Failed to fetch models');
+    } finally {
+      setFetchingModels(false);
+      setTimeout(() => setFetchMsg(''), 3000);
+    }
+  };
+
   const renderProviderCard = (p: Provider) => (
     <div key={p.id} className="flex items-center gap-3 p-4 bg-bg rounded-lg border border-border">
-      <div className="flex-1">
+      <div className="flex-1 min-w-0">
         <div className="flex items-center gap-2">
           <span className="font-medium text-text">{p.name}</span>
-          {p.docs_url && (
+          {p.is_custom && (
+            <span className="text-[10px] px-1.5 py-0.5 bg-surface-hover rounded text-text-dim border border-border">
+              {p.sdk_type}
+            </span>
+          )}
+          {p.docs_url && !p.is_custom && (
             <a
               href={p.docs_url}
               target="_blank"
@@ -96,16 +206,37 @@ export function ProvidersSettings() {
         <span className={`text-xs ${p.configured ? 'text-success' : 'text-text-dim'}`}>
           {p.configured ? 'Configured' : 'Not set'}
         </span>
+        {p.is_custom && p.api_base && (
+          <span className="text-xs text-text-dim block truncate">{p.api_base}</span>
+        )}
       </div>
-      <input
-        type="password"
-        className="w-48 bg-surface border border-border rounded-md px-3 py-2 text-sm text-text placeholder-text-dim focus:border-primary focus:outline-none transition-colors"
-        placeholder={p.key_env}
-        value={keyInputs[p.id] || ''}
-        onChange={(e) =>
-          setKeyInputs((prev) => ({ ...prev, [p.id]: e.target.value }))
-        }
-      />
+      {!p.is_custom ? (
+        <input
+          type="password"
+          className="w-48 bg-surface border border-border rounded-md px-3 py-2 text-sm text-text placeholder-text-dim focus:border-primary focus:outline-none transition-colors shrink-0"
+          placeholder={p.key_env}
+          value={keyInputs[p.id] || ''}
+          onChange={(e) =>
+            setKeyInputs((prev) => ({ ...prev, [p.id]: e.target.value }))
+          }
+        />
+      ) : (
+        <div className="flex items-center gap-2 shrink-0">
+          <button
+            className="px-3 py-2 bg-surface-hover border border-border rounded-md text-sm text-text-dim hover:text-text transition-colors"
+            onClick={() => fetchModelsForProvider(p.id)}
+            disabled={fetchingModels}
+          >
+            {fetchingModels ? '...' : 'Fetch Models'}
+          </button>
+          <button
+            className="px-3 py-2 bg-error/10 border border-error/30 rounded-md text-sm text-error hover:bg-error/20 transition-colors"
+            onClick={() => deleteCustomProvider(p.id)}
+          >
+            Delete
+          </button>
+        </div>
+      )}
     </div>
   );
 
@@ -116,19 +247,25 @@ export function ProvidersSettings() {
           {saveMsg}
         </div>
       )}
-      
+
+      {fetchMsg && (
+        <div className={`mb-4 px-4 py-2 rounded-lg text-sm ${fetchMsg.includes('Failed') || fetchMsg.includes('Error') ? 'bg-error/10 text-error' : 'bg-success/10 text-success'}`}>
+          {fetchMsg}
+        </div>
+      )}
+
       <p className="text-text-dim mb-6">
         API keys are stored in the database per-user. They override .env values.
       </p>
-      
+
       {/* Tab navigation */}
       <div className="flex flex-wrap gap-2 mb-6">
         {TABS.map((tab) => (
           <button
             key={tab.id}
             className={`px-4 py-2 rounded-lg text-sm font-medium transition-all ${
-              activeTab === tab.id 
-                ? 'bg-primary text-white' 
+              activeTab === tab.id
+                ? 'bg-primary text-white'
                 : 'bg-surface-hover text-text-dim hover:text-text'
             }`}
             onClick={() => setActiveTab(tab.id)}
@@ -154,6 +291,18 @@ export function ProvidersSettings() {
         )}
       </div>
 
+      {/* Add Custom Provider button (only on Models tab) */}
+      {activeTab === 'models' && (
+        <div className="mb-6">
+          <button
+            className="w-full py-3 bg-surface-hover border border-border border-dashed text-text-dim rounded-lg font-medium hover:text-text hover:border-primary transition-colors"
+            onClick={() => setShowCustomModal(true)}
+          >
+            + Add Custom Provider
+          </button>
+        </div>
+      )}
+
       <button
         className="w-full py-3 bg-primary text-white rounded-lg font-medium hover:bg-primary-hover transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
         onClick={saveProviderKeys}
@@ -161,6 +310,95 @@ export function ProvidersSettings() {
       >
         {saving ? 'Saving...' : 'Save Keys'}
       </button>
+
+      {/* Custom Provider Modal */}
+      {showCustomModal && (
+        <div
+          className="fixed inset-0 bg-black/60 flex items-center justify-center z-50 p-4"
+          onClick={() => setShowCustomModal(false)}
+        >
+          <div
+            className="bg-surface rounded-xl border border-border w-full max-w-md flex flex-col shadow-xl p-6"
+            onClick={(e) => e.stopPropagation()}
+          >
+            <h3 className="text-lg font-semibold text-text mb-4">Add Custom Provider</h3>
+
+            <div className="flex flex-col gap-4">
+              <div>
+                <label className="block text-sm text-text-dim mb-1">Display Name</label>
+                <input
+                  type="text"
+                  className="w-full bg-bg border border-border rounded-lg px-3 py-2 text-sm text-text placeholder-text-dim focus:border-primary focus:outline-none"
+                  placeholder="e.g. My Organization"
+                  value={customForm.name}
+                  onChange={(e) => setCustomForm((f) => ({ ...f, name: e.target.value }))}
+                />
+              </div>
+
+              <div>
+                <label className="block text-sm text-text-dim mb-1">Provider ID (prefix)</label>
+                <input
+                  type="text"
+                  className="w-full bg-bg border border-border rounded-lg px-3 py-2 text-sm text-text placeholder-text-dim focus:border-primary focus:outline-none"
+                  placeholder="e.g. my-org (models will be my-org/model-name)"
+                  value={customForm.id}
+                  onChange={(e) => setCustomForm((f) => ({ ...f, id: e.target.value }))}
+                />
+              </div>
+
+              <div>
+                <label className="block text-sm text-text-dim mb-1">SDK Type</label>
+                <select
+                  className="w-full bg-bg border border-border rounded-lg px-3 py-2 text-sm text-text focus:border-primary focus:outline-none"
+                  value={customForm.sdk_type}
+                  onChange={(e) => setCustomForm((f) => ({ ...f, sdk_type: e.target.value as SdkType }))}
+                >
+                  {SDK_OPTIONS.map((opt) => (
+                    <option key={opt.value} value={opt.value}>{opt.label}</option>
+                  ))}
+                </select>
+              </div>
+
+              <div>
+                <label className="block text-sm text-text-dim mb-1">API Base URL</label>
+                <input
+                  type="text"
+                  className="w-full bg-bg border border-border rounded-lg px-3 py-2 text-sm text-text placeholder-text-dim focus:border-primary focus:outline-none"
+                  placeholder="https://api.my-org.com/v1"
+                  value={customForm.api_base}
+                  onChange={(e) => setCustomForm((f) => ({ ...f, api_base: e.target.value }))}
+                />
+              </div>
+
+              <div>
+                <label className="block text-sm text-text-dim mb-1">API Key</label>
+                <input
+                  type="password"
+                  className="w-full bg-bg border border-border rounded-lg px-3 py-2 text-sm text-text placeholder-text-dim focus:border-primary focus:outline-none"
+                  placeholder="sk-..."
+                  value={customForm.api_key}
+                  onChange={(e) => setCustomForm((f) => ({ ...f, api_key: e.target.value }))}
+                />
+              </div>
+            </div>
+
+            <div className="flex gap-3 mt-6">
+              <button
+                className="flex-1 py-3 bg-primary text-white rounded-lg font-semibold hover:bg-primary-hover transition-colors"
+                onClick={saveCustomProvider}
+              >
+                Save Provider
+              </button>
+              <button
+                className="px-6 py-3 bg-surface-hover border border-border text-text-dim rounded-lg hover:text-text transition-colors"
+                onClick={() => setShowCustomModal(false)}
+              >
+                Cancel
+              </button>
+            </div>
+          </div>
+        </div>
+      )}
     </div>
   );
 }
diff --git a/frontend/src/index.css b/frontend/src/index.css
index 91f6261..88e1401 100644
--- a/frontend/src/index.css
+++ b/frontend/src/index.css
@@ -81,7 +81,8 @@
   --animate-fade-in: fade-in 0.15s ease-out;
   --animate-slide-up: slide-up 0.2s ease-out;
   --animate-slide-in-right: slide-in-right 0.25s ease-out;
-  --animate-emboss-pulse: emboss-pulse 4s ease-in-out infinite;
+  --animate-hero-float: hero-float 6s ease-in-out infinite;
+  --animate-hero-glow: hero-glow 6s ease-in-out infinite;
 }
 
 @keyframes pulse {
@@ -89,16 +90,25 @@
   50% { opacity: 0.6; }
 }
 
-@keyframes emboss-pulse {
+@keyframes hero-float {
   0%, 100% {
-    opacity: 1;
-    filter: blur(0px);
-    transform: scale(1);
+    transform: translateY(0px);
+    filter: drop-shadow(0 0 20px rgba(59, 130, 246, 0.0));
+  }
+  50% {
+    transform: translateY(-8px);
+    filter: drop-shadow(0 0 30px rgba(59, 130, 246, 0.15));
+  }
+}
+
+@keyframes hero-glow {
+  0%, 100% {
+    opacity: 0;
+    transform: scale(0.8);
   }
   50% {
-    opacity: 0.7;
-    filter: blur(1px);
-    transform: scale(1.01);
+    opacity: 1;
+    transform: scale(1);
   }
 }
 
diff --git a/frontend/src/types.ts b/frontend/src/types.ts
index cc52cbd..9dedcf7 100644
--- a/frontend/src/types.ts
+++ b/frontend/src/types.ts
@@ -59,6 +59,9 @@ export interface Provider {
   configured: boolean;
   categories: string[];
   docs_url?: string;
+  is_custom?: boolean;
+  sdk_type?: string;
+  api_base?: string;
 }
 
 // ── Structured Questions ────────────────────────────────
@@ -92,6 +95,7 @@ export interface Project {
   workspace_path: string | null;
   status: 'active' | 'archived';
   settings: Record<string, any>;
+  is_default?: boolean;
   conversation_count?: number;
   created_at: string;
   updated_at: string;
diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts
index 4b0c245..7b585a0 100644
--- a/frontend/vite.config.ts
+++ b/frontend/vite.config.ts
@@ -5,8 +5,18 @@ export default defineConfig({
   plugins: [react()],
   server: {
     port: 5173,
+    host: true,
     proxy: {
-      '/api': 'http://localhost:3000',
+      '/api/terminal': {
+        target: process.env.VITE_API_URL || 'http://localhost:3000',
+        ws: true,
+      },
+      '/api/events': {
+        target: process.env.VITE_API_URL || 'http://localhost:3000',
+        ws: true,
+      },
+      '/api': process.env.VITE_API_URL || 'http://localhost:3000',
+      '/health': process.env.VITE_API_URL || 'http://localhost:3000',
     },
   },
   build: {
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index d8b8185..38ab7e4 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -10,6 +10,15 @@ importers:
 
   frontend:
     dependencies:
+      '@xterm/addon-fit':
+        specifier: ^0.11.0
+        version: 0.11.0
+      '@xterm/addon-web-links':
+        specifier: ^0.12.0
+        version: 0.12.0
+      '@xterm/xterm':
+        specifier: ^6.0.0
+        version: 6.0.0
       lucide-react:
         specifier: ^1.11.0
         version: 1.11.0(react@19.2.5)
@@ -902,6 +911,15 @@ packages:
   '@vitest/utils@4.1.5':
     resolution: {integrity: sha512-76wdkrmfXfqGjueGgnb45ITPyUi1ycZ4IHgC2bhPDUfWHklY/q3MdLOAB+TF1e6xfl8NxNY0ZYaPCFNWSsw3Ug==}
 
+  '@xterm/addon-fit@0.11.0':
+    resolution: {integrity: sha512-jYcgT6xtVYhnhgxh3QgYDnnNMYTcf8ElbxxFzX0IZo+vabQqSPAjC3c1wJrKB5E19VwQei89QCiZZP86DCPF7g==}
+
+  '@xterm/addon-web-links@0.12.0':
+    resolution: {integrity: sha512-4Smom3RPyVp7ZMYOYDoC/9eGJJJqYhnPLGGqJ6wOBfB8VxPViJNSKdgRYb8NpaM6YSelEKbA2SStD7lGyqaobw==}
+
+  '@xterm/xterm@6.0.0':
+    resolution: {integrity: sha512-TQwDdQGtwwDt+2cgKDLn0IRaSxYu1tSUjgKarSDkUM0ZNiSRXFpjxEsvc/Zgc5kq5omJ+V0a8/kIM2WD3sMOYg==}
+
   acorn-jsx@5.3.2:
     resolution: {integrity: sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==}
     peerDependencies:
@@ -2704,6 +2722,12 @@ snapshots:
       convert-source-map: 2.0.0
       tinyrainbow: 3.1.0
 
+  '@xterm/addon-fit@0.11.0': {}
+
+  '@xterm/addon-web-links@0.12.0': {}
+
+  '@xterm/xterm@6.0.0': {}
+
   acorn-jsx@5.3.2(acorn@8.16.0):
     dependencies:
       acorn: 8.16.0
diff --git a/site/docs/configuration.md b/site/docs/configuration.md
index 7d1a830..3e497ed 100644
--- a/site/docs/configuration.md
+++ b/site/docs/configuration.md
@@ -26,6 +26,7 @@ However, for the app to be **functional**, you need:
 | [Database](#database) | Yes (auto in Docker) | No |
 | [Security](#security) | Auto-generated in dev | No |
 | [LLM Providers](#llm-providers) | No | **Yes** |
+| [Development](#development) | No | No |
 | [Background Jobs](#background-jobs) | No | No |
 | [Tools & Integrations](#tools-integrations) | No | **Yes** |
 | [Sandbox](#sandbox) | No | Partial |
@@ -100,9 +101,28 @@ For self-hosted models with OpenAI-compatible APIs:
 | `LOCAL_MODEL` | `local/default` | Custom model name |
 | `LOCAL_API_KEY` | `not-needed` | API key if required |
 
+### Custom Providers
+
+You can add custom OpenAI-compatible or Anthropic-compatible providers via **Settings > Providers > Add Custom Provider**. Each custom provider requires:
+
+| Field | Description |
+|-------|-------------|
+| Display Name | Human-readable name shown in the model picker |
+| Provider ID | Prefix for model IDs (e.g., `my-org` makes models like `my-org/model-name`) |
+| SDK Type | `OpenAI SDK`, `Anthropic SDK`, `OpenRouter`, or `LiteLLM` |
+| API Base URL | The provider's API endpoint |
+| API Key | Authentication key |
+
+After saving, use the **Fetch Models** button to retrieve the provider's model list via its `/models` endpoint. Fetched models appear in the model picker alongside standard providers.
+
 ### Model Selection
 
-Models are auto-detected based on configured keys. Override in **Settings > Agent** or the model dropdown.
+Models are auto-detected based on configured keys. The model picker shows:
+- **Recently used models** (top 5) for quick access
+- **Models grouped by provider** with logos, sorted by release date (newest first)
+- Live model lists from [models.dev](https://models.dev) for standard providers
+
+Override in **Settings > Agent** or the model dropdown.
 
 | Key Present | Example Model |
 |-------------|---------------|
@@ -113,6 +133,22 @@ Models are auto-detected based on configured keys. Override in **Settings > Agen
 
 ---
 
+## Development
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `DEV_MODE` | `false` | Enables Swagger UI at `/docs`, disables static frontend serving |
+
+When `DEV_MODE=true`:
+- Swagger UI is available at `http://localhost:3000/docs`
+- ReDoc is available at `http://localhost:3000/redoc`
+- The root URL (`/`) redirects to `/docs`
+- The static frontend bundle is **not** served (use Vite dev server on port 5173 instead)
+
+This is auto-set in Docker development mode (`docker-compose.yml`).
+
+---
+
 ## Background Jobs
 
 Enable persistent processing that survives browser refreshes. **Auto-configured in Docker Compose.**
diff --git a/site/docs/setup.md b/site/docs/setup.md
index fee0829..731d65b 100644
--- a/site/docs/setup.md
+++ b/site/docs/setup.md
@@ -62,7 +62,7 @@ External ports are mapped to non-standard ports to avoid conflicts with local se
 
 ## Docker Development
 
-Development mode with live reload. Code changes are reflected immediately.
+Development mode with live reload. Code changes are reflected immediately — both backend and frontend.
 
 ### Setup
 
@@ -74,7 +74,9 @@ make dev-up
 # or: docker compose up -d
 ```
 
-Open `http://localhost:3000`.
+Open `http://localhost:5173` for the UI (Vite with hot module replacement).
+
+`http://localhost:3000` serves the backend API with interactive **Swagger docs** at `/docs`.
 
 ### Commands
 
@@ -86,12 +88,22 @@ Open `http://localhost:3000`.
 | `make dev-clean` | Stop and remove volumes |
 | `make dev-build` | Rebuild dev images |
 
+### Services
+
+| Service | Port | Description |
+|---------|------|-------------|
+| frontend | :5173 | Vite dev server with HMR (proxies `/api` to backend) |
+| web | :3000 | FastAPI backend with Swagger docs at `/docs` |
+| worker | - | Celery background jobs (auto-restarts on code changes) |
+| db | :5433 | PostgreSQL 16 |
+| redis | :6380 | Redis 7 |
+
 ### How it works
 
 The default `docker-compose.yml` is configured for development:
-- Mounts `backend/` into the container
-- Runs uvicorn with `--reload`
-- Uses `watchmedo` for worker auto-restart on code changes
+- **Frontend**: Vite dev server with hot module replacement on port 5173. Edits to `.tsx`/`.ts` files reflect instantly in the browser. Proxies `/api`, `/health`, and `/events` to the backend.
+- **Backend**: Mounts `backend/` into the container, runs uvicorn with `--reload`. In dev mode (`DEV_MODE=true`), Swagger UI is served at `/docs` and the stale static frontend is not served.
+- **Worker**: Uses `watchmedo` for auto-restart on code changes.
 
 ---
 
@@ -139,10 +151,10 @@ Do **not** create a virtual environment at the project root. The backend manages
 
 | Server | Port | Description |
 |--------|------|-------------|
-| Backend | 3000 | FastAPI with auto-reload |
-| Frontend | 5173 | Vite with HMR |
+| Backend | 3000 | FastAPI API + Swagger docs at `/docs` (when `DEV_MODE=true`) |
+| Frontend | 5173 | Vite with HMR (proxies `/api` to backend) |
 
-The Vite dev server proxies `/api` requests to the backend.
+Open `http://localhost:5173` for the UI. The Vite dev server proxies `/api`, `/health`, and `/events` requests to the backend.
 
 ### With background jobs
 

From af9a41c50f1a0f3e63faf47c70de96d58ca189bb Mon Sep 17 00:00:00 2001
From: xprilion <xprilion@gmail.com>
Date: Mon, 27 Apr 2026 22:16:55 +0530
Subject: [PATCH 6/7] Update version

---
 Makefile                                    | 53 ++++++++++++++++++++-
 VERSION                                     |  1 +
 backend/openmlr/__init__.py                 |  9 ++++
 backend/openmlr/app.py                      |  3 +-
 backend/openmlr/routes/health.py            |  4 +-
 backend/pyproject.toml                      |  2 +-
 backend/tests/test_app.py                   |  3 +-
 backend/tests/test_projects.py              |  4 +-
 frontend/package.json                       |  2 +-
 frontend/src/components/Sidebar.tsx         | 43 +++++++++--------
 frontend/src/vite-env.d.ts                  |  4 ++
 frontend/vite.config.ts                     |  7 +++
 package.json                                |  2 +-
 site/docs/.vitepress/config.ts              | 12 ++++-
 site/docs/.vitepress/theme/CustomFooter.vue | 10 +++-
 15 files changed, 127 insertions(+), 32 deletions(-)
 create mode 100644 VERSION
 create mode 100644 frontend/src/vite-env.d.ts

diff --git a/Makefile b/Makefile
index 6b73bd8..350114a 100644
--- a/Makefile
+++ b/Makefile
@@ -7,10 +7,61 @@ BACKEND       := backend
 FRONTEND      := frontend
 PORT          ?= 3000
 DOCKER_USER   ?= xprilion
-VERSION       ?= 0.3.0
+VERSION       := $(shell cat VERSION 2>/dev/null || echo 0.0.0)
 DOCKER_COMPOSE := docker compose
 LOGO_SRC      := assets/full-logo.png
 
+# ─── Versioning ───────────────────────────────────────────
+# Single source of truth: VERSION file at repo root.
+# Bump targets compute the new version, write it, then sync everywhere.
+
+MAJOR := $(word 1,$(subst ., ,$(VERSION)))
+MINOR := $(word 2,$(subst ., ,$(VERSION)))
+PATCH := $(word 3,$(subst ., ,$(VERSION)))
+
+.PHONY: version
+version: ## Print current version
+	@echo $(VERSION)
+
+.PHONY: version-major
+version-major: ## Bump major version (e.g. 0.3.1 -> 1.0.0)
+	@NEW=$$(( $(MAJOR) + 1 )).0.0; \
+	echo "$$NEW" > VERSION; \
+	$(MAKE) _version-sync; \
+	echo "Version bumped: $(VERSION) -> $$NEW"
+
+.PHONY: version-minor
+version-minor: ## Bump minor version (e.g. 0.3.1 -> 0.4.0)
+	@NEW=$(MAJOR).$$(( $(MINOR) + 1 )).0; \
+	echo "$$NEW" > VERSION; \
+	$(MAKE) _version-sync; \
+	echo "Version bumped: $(VERSION) -> $$NEW"
+
+.PHONY: version-patch
+version-patch: ## Bump patch version (e.g. 0.3.0 -> 0.3.1)
+	@NEW=$(MAJOR).$(MINOR).$$(( $(PATCH) + 1 )); \
+	echo "$$NEW" > VERSION; \
+	$(MAKE) _version-sync; \
+	echo "Version bumped: $(VERSION) -> $$NEW"
+
+.PHONY: version-set
+version-set: ## Set explicit version (V=1.2.3)
+	@if [ -z "$(V)" ]; then echo "Usage: make version-set V=1.2.3"; exit 1; fi
+	@echo "$(V)" > VERSION
+	@$(MAKE) _version-sync
+	@echo "Version set: $(VERSION) -> $(V)"
+
+.PHONY: _version-sync
+_version-sync: # (internal) propagate VERSION file to all project files
+	$(eval NEW_VERSION := $(shell cat VERSION))
+	@# backend/pyproject.toml
+	@sed -i '' 's/^version = ".*"/version = "$(NEW_VERSION)"/' $(BACKEND)/pyproject.toml
+	@# package.json (root)
+	@sed -i '' 's/"version": ".*"/"version": "$(NEW_VERSION)"/' package.json
+	@# frontend/package.json
+	@sed -i '' 's/"version": ".*"/"version": "$(NEW_VERSION)"/' $(FRONTEND)/package.json
+	@echo "Synced version $(NEW_VERSION) to all project files"
+
 # ─── Setup ────────────────────────────────────────────────
 
 .PHONY: install
diff --git a/VERSION b/VERSION
new file mode 100644
index 0000000..1d0ba9e
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+0.4.0
diff --git a/backend/openmlr/__init__.py b/backend/openmlr/__init__.py
index e69de29..afaabb9 100644
--- a/backend/openmlr/__init__.py
+++ b/backend/openmlr/__init__.py
@@ -0,0 +1,9 @@
+"""OpenMLR — AI-powered ML Research Agent."""
+
+from pathlib import Path
+
+_version_file = Path(__file__).resolve().parent.parent.parent / "VERSION"
+try:
+    __version__ = _version_file.read_text().strip()
+except FileNotFoundError:
+    __version__ = "0.0.0"
diff --git a/backend/openmlr/app.py b/backend/openmlr/app.py
index 2d26d02..3d485f6 100644
--- a/backend/openmlr/app.py
+++ b/backend/openmlr/app.py
@@ -9,6 +9,7 @@
 from fastapi.responses import FileResponse, JSONResponse
 from fastapi.staticfiles import StaticFiles
 
+from . import __version__
 from .config import load_config
 from .db.engine import engine
 from .db.models import Base
@@ -55,7 +56,7 @@ async def lifespan(app: FastAPI):
 app = FastAPI(
     title="OpenMLR",
     description="ML research intern — reads papers, trains models, writes papers",
-    version="0.3.0",
+    version=__version__,
     lifespan=lifespan,
     docs_url="/docs" if _DEV_MODE else None,
     redoc_url="/redoc" if _DEV_MODE else None,
diff --git a/backend/openmlr/routes/health.py b/backend/openmlr/routes/health.py
index f4fea71..cb88e38 100644
--- a/backend/openmlr/routes/health.py
+++ b/backend/openmlr/routes/health.py
@@ -4,9 +4,9 @@
 
 from fastapi import APIRouter
 
-router = APIRouter(tags=["health"])
+from openmlr import __version__ as VERSION
 
-VERSION = "0.3.0"
+router = APIRouter(tags=["health"])
 
 
 @router.get("/api/health")
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
index e91b37f..63816c9 100644
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openmlr"
-version = "0.3.0"
+version = "0.4.0"
 description = "OpenMLR — an ML research intern that reads papers, trains models, and ships code"
 requires-python = ">=3.12"
 license = { text = "MIT" }
diff --git a/backend/tests/test_app.py b/backend/tests/test_app.py
index 5a574d6..98a88b8 100644
--- a/backend/tests/test_app.py
+++ b/backend/tests/test_app.py
@@ -2,6 +2,7 @@
 
 import pytest
 
+from openmlr import __version__
 from openmlr.app import app
 
 pytestmark = pytest.mark.asyncio
@@ -12,7 +13,7 @@ async def test_app_title(self):
         assert app.title == "OpenMLR"
 
     async def test_app_version(self):
-        assert app.version == "0.3.0"
+        assert app.version == __version__
 
     async def test_app_routers_registered(self):
         route_paths = [r.path for r in app.routes]
diff --git a/backend/tests/test_projects.py b/backend/tests/test_projects.py
index 62930a8..76a5acc 100644
--- a/backend/tests/test_projects.py
+++ b/backend/tests/test_projects.py
@@ -144,7 +144,9 @@ async def test_list_projects_api(self, auth_client):
         resp = await auth_client.get("/api/projects")
         assert resp.status_code == 200
         data = resp.json()
-        assert len(data["projects"]) == 2
+        # The endpoint auto-creates a default project, so filter it out
+        non_default = [p for p in data["projects"] if not p.get("is_default")]
+        assert len(non_default) == 2
 
     async def test_get_project_api(self, auth_client):
         create_resp = await auth_client.post("/api/projects", json={"name": "Get Me"})
diff --git a/frontend/package.json b/frontend/package.json
index a8de748..f99bdfd 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -1,7 +1,7 @@
 {
   "name": "openmlr-frontend",
   "private": true,
-  "version": "1.0.0",
+  "version": "0.4.0",
   "type": "module",
   "scripts": {
     "dev": "vite",
diff --git a/frontend/src/components/Sidebar.tsx b/frontend/src/components/Sidebar.tsx
index 0282575..d3256f1 100644
--- a/frontend/src/components/Sidebar.tsx
+++ b/frontend/src/components/Sidebar.tsx
@@ -234,26 +234,29 @@ export function Sidebar({ conversations, currentUuid, user, convStatuses, projec
       </div>
 
       {/* Footer */}
-      <div className="flex items-center gap-2 pt-3 border-t border-border">
-        <button 
-          className="w-9 h-9 rounded-lg flex items-center justify-center text-text-dim hover:bg-surface-hover hover:text-text transition-colors"
-          onClick={() => navigate('/settings')} 
-          title="Settings"
-        >
-          <Settings size={18} />
-        </button>
-        {user && (
-          <span className="flex-1 truncate text-sm text-text-dim" title={user.username}>
-            {user.display_name || user.username}
-          </span>
-        )}
-        <button 
-          className="w-9 h-9 rounded-lg flex items-center justify-center text-text-dim hover:bg-surface-hover hover:text-error transition-colors"
-          onClick={() => { setToken(null); window.location.reload(); }} 
-          title="Sign out"
-        >
-          <LogOut size={18} />
-        </button>
+      <div className="pt-3 border-t border-border flex flex-col gap-1">
+        <div className="flex items-center gap-2">
+          <button 
+            className="w-9 h-9 rounded-lg flex items-center justify-center text-text-dim hover:bg-surface-hover hover:text-text transition-colors"
+            onClick={() => navigate('/settings')} 
+            title="Settings"
+          >
+            <Settings size={18} />
+          </button>
+          {user && (
+            <span className="flex-1 truncate text-sm text-text-dim" title={user.username}>
+              {user.display_name || user.username}
+            </span>
+          )}
+          <button 
+            className="w-9 h-9 rounded-lg flex items-center justify-center text-text-dim hover:bg-surface-hover hover:text-error transition-colors"
+            onClick={() => { setToken(null); window.location.reload(); }} 
+            title="Sign out"
+          >
+            <LogOut size={18} />
+          </button>
+        </div>
+        <span className="text-[11px] text-text-dim px-2">v{__APP_VERSION__}</span>
       </div>
 
       {pendingDelete && (
diff --git a/frontend/src/vite-env.d.ts b/frontend/src/vite-env.d.ts
new file mode 100644
index 0000000..71f2e9d
--- /dev/null
+++ b/frontend/src/vite-env.d.ts
@@ -0,0 +1,4 @@
+/// <reference types="vite/client" />
+
+/** Injected by Vite at build time from the root VERSION file. */
+declare const __APP_VERSION__: string;
diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts
index 7b585a0..42d5f19 100644
--- a/frontend/vite.config.ts
+++ b/frontend/vite.config.ts
@@ -1,8 +1,15 @@
+import { readFileSync } from 'fs'
+import { resolve } from 'path'
 import { defineConfig } from 'vite'
 import react from '@vitejs/plugin-react'
 
+const version = readFileSync(resolve(__dirname, '..', 'VERSION'), 'utf-8').trim()
+
 export default defineConfig({
   plugins: [react()],
+  define: {
+    __APP_VERSION__: JSON.stringify(version),
+  },
   server: {
     port: 5173,
     host: true,
diff --git a/package.json b/package.json
index e749113..cc9322e 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
   "name": "openmlr-workspace",
   "private": true,
-  "version": "2.0.0",
+  "version": "0.4.0",
   "description": "OpenMLR — ML research intern (frontend workspace)",
   "scripts": {
     "dev:frontend": "pnpm --filter openmlr-frontend dev",
diff --git a/site/docs/.vitepress/config.ts b/site/docs/.vitepress/config.ts
index 7e8173e..ac0773e 100644
--- a/site/docs/.vitepress/config.ts
+++ b/site/docs/.vitepress/config.ts
@@ -1,6 +1,8 @@
 import { defineConfig } from "vitepress";
-import { copyFileSync } from "fs";
-import { join } from "path";
+import { copyFileSync, readFileSync } from "fs";
+import { join, resolve } from "path";
+
+const version = readFileSync(resolve(__dirname, "../../..", "VERSION"), "utf-8").trim();
 
 export default defineConfig({
   title: "OpenMLR",
@@ -104,6 +106,12 @@ export default defineConfig({
     }
   },
 
+  vite: {
+    define: {
+      __APP_VERSION__: JSON.stringify(version),
+    },
+  },
+
   themeConfig: {
     logo: "/logo-64.png",
     nav: [
diff --git a/site/docs/.vitepress/theme/CustomFooter.vue b/site/docs/.vitepress/theme/CustomFooter.vue
index a73a1b0..7f094f6 100644
--- a/site/docs/.vitepress/theme/CustomFooter.vue
+++ b/site/docs/.vitepress/theme/CustomFooter.vue
@@ -1,11 +1,13 @@
 <script setup lang="ts">
+declare const __APP_VERSION__: string;
+const version = __APP_VERSION__;
 </script>
 
 <template>
   <footer class="custom-footer">
     <div class="custom-footer-container">
       <div class="footer-left">
-        <div class="footer-name">OpenMLR</div>
+        <div class="footer-name">OpenMLR <span class="footer-version">v{{ version }}</span></div>
         <div class="footer-copyright">Copyright © 2025 Anubhav Singh. Released under the MIT License.</div>
       </div>
       <div class="footer-right">
@@ -44,6 +46,12 @@
   color: var(--vp-c-text-1);
 }
 
+.footer-version {
+  font-size: 12px;
+  font-weight: 400;
+  color: var(--vp-c-text-3);
+}
+
 .footer-copyright {
   font-size: 13px;
   color: var(--vp-c-text-2);

From 50c0f916f03af2a5887fbcf3eb4476530664ef51 Mon Sep 17 00:00:00 2001
From: xprilion <xprilion@gmail.com>
Date: Mon, 27 Apr 2026 22:25:36 +0530
Subject: [PATCH 7/7] Update version fix

---
 Makefile                                    |  6 ++++++
 backend/openmlr/__init__.py                 |  8 +-------
 frontend/src/components/Sidebar.tsx         |  3 ++-
 frontend/src/version.ts                     |  1 +
 frontend/src/vite-env.d.ts                  |  4 ----
 frontend/vite.config.ts                     |  7 -------
 site/docs/.vitepress/config.ts              | 12 ++----------
 site/docs/.vitepress/theme/CustomFooter.vue |  4 ++--
 site/docs/.vitepress/version.ts             |  1 +
 9 files changed, 15 insertions(+), 31 deletions(-)
 create mode 100644 frontend/src/version.ts
 delete mode 100644 frontend/src/vite-env.d.ts
 create mode 100644 site/docs/.vitepress/version.ts

diff --git a/Makefile b/Makefile
index 350114a..f165256 100644
--- a/Makefile
+++ b/Makefile
@@ -54,8 +54,14 @@ version-set: ## Set explicit version (V=1.2.3)
 .PHONY: _version-sync
 _version-sync: # (internal) propagate VERSION file to all project files
 	$(eval NEW_VERSION := $(shell cat VERSION))
+	@# backend/openmlr/__init__.py
+	@sed -i '' 's/^__version__ = ".*"/__version__ = "$(NEW_VERSION)"/' $(BACKEND)/openmlr/__init__.py
 	@# backend/pyproject.toml
 	@sed -i '' 's/^version = ".*"/version = "$(NEW_VERSION)"/' $(BACKEND)/pyproject.toml
+	@# frontend/src/version.ts
+	@sed -i '' 's/^export const APP_VERSION = ".*"/export const APP_VERSION = "$(NEW_VERSION)"/' $(FRONTEND)/src/version.ts
+	@# site/docs/.vitepress/version.ts
+	@sed -i '' 's/^export const APP_VERSION = ".*"/export const APP_VERSION = "$(NEW_VERSION)"/' site/docs/.vitepress/version.ts
 	@# package.json (root)
 	@sed -i '' 's/"version": ".*"/"version": "$(NEW_VERSION)"/' package.json
 	@# frontend/package.json
diff --git a/backend/openmlr/__init__.py b/backend/openmlr/__init__.py
index afaabb9..3355a53 100644
--- a/backend/openmlr/__init__.py
+++ b/backend/openmlr/__init__.py
@@ -1,9 +1,3 @@
 """OpenMLR — AI-powered ML Research Agent."""
 
-from pathlib import Path
-
-_version_file = Path(__file__).resolve().parent.parent.parent / "VERSION"
-try:
-    __version__ = _version_file.read_text().strip()
-except FileNotFoundError:
-    __version__ = "0.0.0"
+__version__ = "0.4.0"
diff --git a/frontend/src/components/Sidebar.tsx b/frontend/src/components/Sidebar.tsx
index d3256f1..6b4ba4b 100644
--- a/frontend/src/components/Sidebar.tsx
+++ b/frontend/src/components/Sidebar.tsx
@@ -2,6 +2,7 @@ import { useState, useMemo } from 'react';
 import { useNavigate } from 'react-router-dom';
 import { setToken } from '../api';
 import type { Conversation, User, Project } from '../types';
+import { APP_VERSION } from '../version';
 import { ConfirmDialog } from './ConfirmDialog';
 import { 
   PanelLeftClose, 
@@ -256,7 +257,7 @@ export function Sidebar({ conversations, currentUuid, user, convStatuses, projec
             <LogOut size={18} />
           </button>
         </div>
-        <span className="text-[11px] text-text-dim px-2">v{__APP_VERSION__}</span>
+        <span className="text-[11px] text-text-dim px-2">v{APP_VERSION}</span>
       </div>
 
       {pendingDelete && (
diff --git a/frontend/src/version.ts b/frontend/src/version.ts
new file mode 100644
index 0000000..be79714
--- /dev/null
+++ b/frontend/src/version.ts
@@ -0,0 +1 @@
+export const APP_VERSION = "0.4.0";
diff --git a/frontend/src/vite-env.d.ts b/frontend/src/vite-env.d.ts
deleted file mode 100644
index 71f2e9d..0000000
--- a/frontend/src/vite-env.d.ts
+++ /dev/null
@@ -1,4 +0,0 @@
-/// <reference types="vite/client" />
-
-/** Injected by Vite at build time from the root VERSION file. */
-declare const __APP_VERSION__: string;
diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts
index 42d5f19..7b585a0 100644
--- a/frontend/vite.config.ts
+++ b/frontend/vite.config.ts
@@ -1,15 +1,8 @@
-import { readFileSync } from 'fs'
-import { resolve } from 'path'
 import { defineConfig } from 'vite'
 import react from '@vitejs/plugin-react'
 
-const version = readFileSync(resolve(__dirname, '..', 'VERSION'), 'utf-8').trim()
-
 export default defineConfig({
   plugins: [react()],
-  define: {
-    __APP_VERSION__: JSON.stringify(version),
-  },
   server: {
     port: 5173,
     host: true,
diff --git a/site/docs/.vitepress/config.ts b/site/docs/.vitepress/config.ts
index ac0773e..7e8173e 100644
--- a/site/docs/.vitepress/config.ts
+++ b/site/docs/.vitepress/config.ts
@@ -1,8 +1,6 @@
 import { defineConfig } from "vitepress";
-import { copyFileSync, readFileSync } from "fs";
-import { join, resolve } from "path";
-
-const version = readFileSync(resolve(__dirname, "../../..", "VERSION"), "utf-8").trim();
+import { copyFileSync } from "fs";
+import { join } from "path";
 
 export default defineConfig({
   title: "OpenMLR",
@@ -106,12 +104,6 @@ export default defineConfig({
     }
   },
 
-  vite: {
-    define: {
-      __APP_VERSION__: JSON.stringify(version),
-    },
-  },
-
   themeConfig: {
     logo: "/logo-64.png",
     nav: [
diff --git a/site/docs/.vitepress/theme/CustomFooter.vue b/site/docs/.vitepress/theme/CustomFooter.vue
index 7f094f6..9336ed5 100644
--- a/site/docs/.vitepress/theme/CustomFooter.vue
+++ b/site/docs/.vitepress/theme/CustomFooter.vue
@@ -1,6 +1,6 @@
 <script setup lang="ts">
-declare const __APP_VERSION__: string;
-const version = __APP_VERSION__;
+import { APP_VERSION } from '../version';
+const version = APP_VERSION;
 </script>
 
 <template>
diff --git a/site/docs/.vitepress/version.ts b/site/docs/.vitepress/version.ts
new file mode 100644
index 0000000..be79714
--- /dev/null
+++ b/site/docs/.vitepress/version.ts
@@ -0,0 +1 @@
+export const APP_VERSION = "0.4.0";