diff --git a/backend/app/api/__init__.py b/backend/app/api/__init__.py
index 02fc736..ebecf06 100644
--- a/backend/app/api/__init__.py
+++ b/backend/app/api/__init__.py
@@ -6,6 +6,8 @@
     api_keys,
     apps,
     auth,
+    auto_tuning,
+    chat_proxy,
     containers,
     conversations,
     dashboard,
@@ -62,3 +64,9 @@
 
 # Semantic Router
 api_router.include_router(semantic_router.router)
+
+# Chat Proxy for external endpoints
+api_router.include_router(chat_proxy.router, tags=["chat-proxy"])
+
+# Auto-Tuning Agent
+api_router.include_router(auto_tuning.router, prefix="/auto-tuning", tags=["auto-tuning"])
diff --git a/backend/app/api/auto_tuning.py b/backend/app/api/auto_tuning.py
new file mode 100644
index 0000000..7a4dcc6
--- /dev/null
+++ b/backend/app/api/auto_tuning.py
@@ -0,0 +1,722 @@
+"""Auto-Tuning API routes
+
+Implements the Auto-Tuning Agent workflow:
+1. Environment Analysis - Query hardware and model info
+2. Knowledge Base Query - Search for similar configurations
+3. Configuration Space Exploration - Generate candidate configs
+4. Auto Benchmark - Test each configuration
+5. Result Analysis - Recommend best configuration
+"""
+
+import logging
+
+from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query
+from pydantic import BaseModel
+from sqlalchemy import func, select
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.orm import selectinload
+
+from app.core.deps import require_operator, require_viewer
+from app.database import get_db
+from app.models.deployment import Deployment, DeploymentStatus
+from app.models.llm_model import LLMModel
+from app.models.tuning import (
+    BenchmarkResult,
+    OptimizationTarget,
+    PerformanceKnowledge,
+    TuningJob,
+    TuningJobStatus,
+)
+from app.models.user import User
+from app.models.worker import Worker
+from app.schemas.tuning import (
+    BenchmarkMetrics,
+    BenchmarkRequest,
+    BenchmarkResultListResponse,
+    BenchmarkResultResponse,
+    KnowledgeQuery,
+    KnowledgeQueryResponse,
+    KnowledgeRecord,
+    KnowledgeSaveRequest,
+    TuningJobCreate,
+    TuningJobListResponse,
+    TuningJobProgress,
+    TuningJobResponse,
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter()
+
+
+# ============================================================================
+# Helper Functions
+# ============================================================================
+
+
+def tuning_job_to_response(job: TuningJob, include_conversation: bool = True) -> TuningJobResponse:
+    """Convert tuning job model to response schema"""
+    progress = None
+    if job.progress:
+        progress = TuningJobProgress(**job.progress)
+
+    # Parse conversation log
+    conversation_log = None
+    if include_conversation and job.conversation_log:
+        from app.schemas.tuning import ConversationMessage
+
+        conversation_log = [ConversationMessage(**msg) for msg in job.conversation_log]
+
+    return TuningJobResponse(
+        id=job.id,
+        model_id=job.model_id,
+        worker_id=job.worker_id,
+        optimization_target=job.optimization_target,
+        status=job.status,
+        status_message=job.status_message,
+        current_step=job.current_step,
+        total_steps=job.total_steps,
+        progress=progress,
+        best_config=job.best_config,
+        all_results=job.all_results,
+        conversation_log=conversation_log,
+        created_at=job.created_at,
+        updated_at=job.updated_at,
+        completed_at=job.completed_at,
+        model_name=job.model.name if job.model else None,
+        worker_name=job.worker.name if job.worker else None,
+    )
+
+
+def benchmark_result_to_response(result: BenchmarkResult) -> BenchmarkResultResponse:
+    """Convert benchmark result model to response schema"""
+    return BenchmarkResultResponse(
+        id=result.id,
+        tuning_job_id=result.tuning_job_id,
+        deployment_id=result.deployment_id,
+        config=result.config,
+        test_type=result.test_type,
+        test_duration_seconds=result.test_duration_seconds,
+        input_length=result.input_length,
+        output_length=result.output_length,
+        concurrency=result.concurrency,
+        metrics=BenchmarkMetrics(
+            throughput_tps=result.throughput_tps,
+            ttft_ms=result.ttft_ms,
+            tpot_ms=result.tpot_ms,
+            total_latency_ms=result.total_latency_ms,
+            gpu_utilization=result.gpu_utilization,
+            vram_usage_gb=result.vram_usage_gb,
+        ),
+        error_message=result.error_message,
+        created_at=result.created_at,
+    )
+
+
+# ============================================================================
+# Tuning Job Endpoints
+# ============================================================================
+
+
+@router.get("/jobs", response_model=TuningJobListResponse)
+async def list_tuning_jobs(
+    skip: int = Query(0, ge=0),
+    limit: int = Query(50, ge=1, le=100),
+    status: TuningJobStatus | None = None,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(require_viewer),
+):
+    """List all tuning jobs"""
+    query = select(TuningJob).options(
+        selectinload(TuningJob.model),
+        selectinload(TuningJob.worker),
+    )
+
+    if status:
+        query = query.where(TuningJob.status == status.value)
+
+    # Count
+    count_query = select(func.count()).select_from(
+        select(TuningJob).where(*([TuningJob.status == status.value] if status else [])).subquery()
+    )
+    total = await db.scalar(count_query)
+
+    # Get results
+    query = query.offset(skip).limit(limit).order_by(TuningJob.created_at.desc())
+    result = await db.execute(query)
+    jobs = result.scalars().all()
+
+    return TuningJobListResponse(
+        items=[tuning_job_to_response(j, include_conversation=False) for j in jobs],
+        total=total or 0,
+    )
+
+
+@router.post("/jobs", response_model=TuningJobResponse, status_code=201)
+async def create_tuning_job(
+    job_in: TuningJobCreate,
+    background_tasks: BackgroundTasks,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(require_operator),
+):
+    """Create a new auto-tuning job"""
+    # Verify model exists
+    model_result = await db.execute(select(LLMModel).where(LLMModel.id == job_in.model_id))
+    model = model_result.scalar_one_or_none()
+    if not model:
+        raise HTTPException(status_code=404, detail="Model not found")
+
+    # Verify worker exists
+    worker_result = await db.execute(select(Worker).where(Worker.id == job_in.worker_id))
+    worker = worker_result.scalar_one_or_none()
+    if not worker:
+        raise HTTPException(status_code=404, detail="Worker not found")
+
+    # Create tuning job
+    job = TuningJob(
+        model_id=job_in.model_id,
+        worker_id=job_in.worker_id,
+        optimization_target=job_in.optimization_target.value,
+        status=TuningJobStatus.PENDING.value,
+        progress={
+            "step": 0,
+            "total_steps": 5,
+            "step_name": "Initializing",
+            "step_description": "Preparing auto-tuning job...",
+            "configs_tested": 0,
+            "configs_total": 0,
+        },
+    )
+
+    db.add(job)
+    await db.commit()
+    await db.refresh(job)
+
+    # Prepare LLM config for agent
+    llm_config = None
+    if job_in.llm_config:
+        llm_config = job_in.llm_config.model_dump()
+
+    # Start tuning in background
+    background_tasks.add_task(run_auto_tuning, job.id, llm_config)
+
+    # Reload with relationships
+    result = await db.execute(
+        select(TuningJob)
+        .where(TuningJob.id == job.id)
+        .options(
+            selectinload(TuningJob.model),
+            selectinload(TuningJob.worker),
+        )
+    )
+    job = result.scalar_one()
+
+    return tuning_job_to_response(job)
+
+
+@router.get("/jobs/{job_id}", response_model=TuningJobResponse)
+async def get_tuning_job(
+    job_id: int,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(require_viewer),
+):
+    """Get a tuning job by ID"""
+    result = await db.execute(
+        select(TuningJob)
+        .where(TuningJob.id == job_id)
+        .options(
+            selectinload(TuningJob.model),
+            selectinload(TuningJob.worker),
+        )
+    )
+    job = result.scalar_one_or_none()
+
+    if not job:
+        raise HTTPException(status_code=404, detail="Tuning job not found")
+
+    return tuning_job_to_response(job)
+
+
+@router.post("/jobs/{job_id}/cancel", response_model=TuningJobResponse)
+async def cancel_tuning_job(
+    job_id: int,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(require_operator),
+):
+    """Cancel a running tuning job"""
+    result = await db.execute(
+        select(TuningJob)
+        .where(TuningJob.id == job_id)
+        .options(
+            selectinload(TuningJob.model),
+            selectinload(TuningJob.worker),
+        )
+    )
+    job = result.scalar_one_or_none()
+
+    if not job:
+        raise HTTPException(status_code=404, detail="Tuning job not found")
+
+    if job.status in [TuningJobStatus.COMPLETED.value, TuningJobStatus.FAILED.value]:
+        raise HTTPException(status_code=400, detail="Job is already finished")
+
+    job.status = TuningJobStatus.CANCELLED.value
+    job.status_message = "Cancelled by user"
+    await db.commit()
+    await db.refresh(job)
+
+    return tuning_job_to_response(job)
+
+
+@router.delete("/jobs/{job_id}")
+async def delete_tuning_job(
+    job_id: int,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(require_operator),
+):
+    """Delete a tuning job"""
+    result = await db.execute(select(TuningJob).where(TuningJob.id == job_id))
+    job = result.scalar_one_or_none()
+
+    if not job:
+        raise HTTPException(status_code=404, detail="Tuning job not found")
+
+    # Don't allow deleting running jobs
+    if job.status in ["pending", "analyzing", "querying_kb", "exploring", "benchmarking"]:
+        raise HTTPException(status_code=400, detail="Cannot delete a running job. Cancel it first.")
+
+    await db.delete(job)
+    await db.commit()
+
+    return {"success": True, "message": f"Tuning job {job_id} deleted"}
+
+
+# ============================================================================
+# Benchmark Endpoints
+# ============================================================================
+
+
+@router.post("/benchmarks/run", response_model=BenchmarkResultResponse)
+async def run_benchmark(
+    request: BenchmarkRequest,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(require_operator),
+):
+    """Run a standalone benchmark on a deployment"""
+    # Verify deployment exists and is running
+    result = await db.execute(
+        select(Deployment)
+        .where(Deployment.id == request.deployment_id)
+        .options(selectinload(Deployment.model))
+    )
+    deployment = result.scalar_one_or_none()
+
+    if not deployment:
+        raise HTTPException(status_code=404, detail="Deployment not found")
+
+    if deployment.status != DeploymentStatus.RUNNING.value:
+        raise HTTPException(status_code=400, detail="Deployment is not running")
+
+    # Run benchmark
+    metrics = await _run_benchmark_test(deployment, request)
+
+    # Save result
+    benchmark_result = BenchmarkResult(
+        deployment_id=deployment.id,
+        config={
+            "engine": deployment.backend,
+            "gpu_indexes": deployment.gpu_indexes,
+            "extra_params": deployment.extra_params,
+        },
+        test_type=request.test_type,
+        test_duration_seconds=request.duration_seconds,
+        input_length=request.input_length,
+        output_length=request.output_length,
+        concurrency=request.concurrency,
+        throughput_tps=metrics.get("throughput_tps"),
+        ttft_ms=metrics.get("ttft_ms"),
+        tpot_ms=metrics.get("tpot_ms"),
+        total_latency_ms=metrics.get("total_latency_ms"),
+        gpu_utilization=metrics.get("gpu_utilization"),
+        vram_usage_gb=metrics.get("vram_usage_gb"),
+        raw_results=metrics.get("raw"),
+        error_message=metrics.get("error"),
+    )
+
+    db.add(benchmark_result)
+    await db.commit()
+    await db.refresh(benchmark_result)
+
+    return benchmark_result_to_response(benchmark_result)
+
+
+@router.get("/benchmarks", response_model=BenchmarkResultListResponse)
+async def list_benchmark_results(
+    skip: int = Query(0, ge=0),
+    limit: int = Query(50, ge=1, le=100),
+    deployment_id: int | None = None,
+    tuning_job_id: int | None = None,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(require_viewer),
+):
+    """List benchmark results"""
+    query = select(BenchmarkResult)
+
+    if deployment_id:
+        query = query.where(BenchmarkResult.deployment_id == deployment_id)
+    if tuning_job_id:
+        query = query.where(BenchmarkResult.tuning_job_id == tuning_job_id)
+
+    # Count
+    count_query = select(func.count()).select_from(
+        select(BenchmarkResult)
+        .where(
+            *([BenchmarkResult.deployment_id == deployment_id] if deployment_id else []),
+            *([BenchmarkResult.tuning_job_id == tuning_job_id] if tuning_job_id else []),
+        )
+        .subquery()
+    )
+    total = await db.scalar(count_query)
+
+    # Get results
+    query = query.offset(skip).limit(limit).order_by(BenchmarkResult.created_at.desc())
+    result = await db.execute(query)
+    results = result.scalars().all()
+
+    return BenchmarkResultListResponse(
+        items=[benchmark_result_to_response(r) for r in results],
+        total=total or 0,
+    )
+
+
+# ============================================================================
+# Knowledge Base Endpoints
+# ============================================================================
+
+
+@router.post("/knowledge/query", response_model=KnowledgeQueryResponse)
+async def query_knowledge_base(
+    query: KnowledgeQuery,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(require_viewer),
+):
+    """Query the performance knowledge base for similar configurations"""
+    stmt = select(PerformanceKnowledge)
+
+    if query.model_name:
+        stmt = stmt.where(PerformanceKnowledge.model_name.ilike(f"%{query.model_name}%"))
+    if query.model_family:
+        stmt = stmt.where(PerformanceKnowledge.model_family == query.model_family)
+    if query.gpu_model:
+        stmt = stmt.where(PerformanceKnowledge.gpu_model.ilike(f"%{query.gpu_model}%"))
+    if query.min_vram_gb:
+        stmt = stmt.where(PerformanceKnowledge.total_vram_gb >= query.min_vram_gb)
+
+    # Order by score (computed based on optimization target)
+    if query.optimization_target == OptimizationTarget.THROUGHPUT:
+        stmt = stmt.order_by(PerformanceKnowledge.throughput_tps.desc())
+    elif query.optimization_target == OptimizationTarget.LATENCY:
+        stmt = stmt.order_by(PerformanceKnowledge.ttft_ms.asc())
+    else:
+        # Balanced - order by a combined score
+        stmt = stmt.order_by(PerformanceKnowledge.score.desc().nulls_last())
+
+    stmt = stmt.limit(query.limit)
+
+    result = await db.execute(stmt)
+    records = result.scalars().all()
+
+    # Count total matches
+    count_stmt = select(func.count()).select_from(PerformanceKnowledge)
+    if query.model_name:
+        count_stmt = count_stmt.where(
+            PerformanceKnowledge.model_name.ilike(f"%{query.model_name}%")
+        )
+    if query.model_family:
+        count_stmt = count_stmt.where(PerformanceKnowledge.model_family == query.model_family)
+    if query.gpu_model:
+        count_stmt = count_stmt.where(PerformanceKnowledge.gpu_model.ilike(f"%{query.gpu_model}%"))
+    if query.min_vram_gb:
+        count_stmt = count_stmt.where(PerformanceKnowledge.total_vram_gb >= query.min_vram_gb)
+
+    total = await db.scalar(count_stmt)
+
+    return KnowledgeQueryResponse(
+        items=[KnowledgeRecord.model_validate(r) for r in records],
+        total=total or 0,
+        query=query,
+    )
+
+
+@router.post("/knowledge/save", response_model=KnowledgeRecord)
+async def save_to_knowledge_base(
+    request: KnowledgeSaveRequest,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(require_operator),
+):
+    """Save a benchmark result to the knowledge base"""
+    # Get benchmark result with related data
+    result = await db.execute(
+        select(BenchmarkResult)
+        .where(BenchmarkResult.id == request.benchmark_result_id)
+        .options(selectinload(BenchmarkResult.deployment))
+    )
+    benchmark = result.scalar_one_or_none()
+
+    if not benchmark:
+        raise HTTPException(status_code=404, detail="Benchmark result not found")
+
+    if not benchmark.throughput_tps or not benchmark.ttft_ms or not benchmark.tpot_ms:
+        raise HTTPException(status_code=400, detail="Benchmark result has incomplete metrics")
+
+    # Get deployment and worker info
+    deployment = benchmark.deployment
+    worker_result = await db.execute(select(Worker).where(Worker.id == deployment.worker_id))
+    worker = worker_result.scalar_one_or_none()
+
+    model_result = await db.execute(select(LLMModel).where(LLMModel.id == deployment.model_id))
+    model = model_result.scalar_one_or_none()
+
+    if not worker or not model:
+        raise HTTPException(status_code=400, detail="Missing worker or model info")
+
+    # Extract GPU info from worker
+    gpu_info = worker.gpu_info or []
+    gpu_model = gpu_info[0].get("name", "Unknown") if gpu_info else "Unknown"
+    gpu_count = len(deployment.gpu_indexes) if deployment.gpu_indexes else len(gpu_info)
+    total_vram = sum(g.get("memory_total", 0) for g in gpu_info) / 1024  # Convert to GB
+
+    # Compute score (balanced)
+    # Higher throughput is better, lower latency is better
+    # Normalize and combine
+    score = benchmark.throughput_tps / (benchmark.ttft_ms + benchmark.tpot_ms * 100)
+
+    # Create knowledge record
+    record = PerformanceKnowledge(
+        gpu_model=gpu_model,
+        gpu_count=gpu_count,
+        total_vram_gb=total_vram,
+        model_name=model.name,
+        model_family=request.model_family,
+        model_params_b=request.model_params_b,
+        engine=deployment.backend,
+        quantization=benchmark.config.get("quantization"),
+        tensor_parallel=len(deployment.gpu_indexes) if deployment.gpu_indexes else 1,
+        extra_args=deployment.extra_params,
+        throughput_tps=benchmark.throughput_tps,
+        ttft_ms=benchmark.ttft_ms,
+        tpot_ms=benchmark.tpot_ms,
+        gpu_utilization=benchmark.gpu_utilization,
+        vram_usage_gb=benchmark.vram_usage_gb,
+        test_dataset="synthetic",
+        input_length=benchmark.input_length,
+        output_length=benchmark.output_length,
+        concurrency=benchmark.concurrency,
+        score=score,
+        source_tuning_job_id=benchmark.tuning_job_id,
+    )
+
+    db.add(record)
+    await db.commit()
+    await db.refresh(record)
+
+    return KnowledgeRecord.model_validate(record)
+
+
+# ============================================================================
+# Agent Chat Endpoint
+# ============================================================================
+
+
+class AgentChatRequest(BaseModel):
+    """Request for agent chat"""
+
+    message: str
+    config: dict
+    history: list[dict] = []
+
+
+class AgentChatResponse(BaseModel):
+    """Response from agent chat"""
+
+    content: str
+    tool_calls: list[dict] | None = None
+
+
+@router.post("/agent/chat", response_model=AgentChatResponse)
+async def agent_chat(
+    request: AgentChatRequest,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(require_viewer),
+):
+    """Chat with the Auto-Tuning Agent"""
+    from app.services.tuning_agent import AGENT_SYSTEM_PROMPT, AgentToolExecutor, get_agent_tools
+
+    config = request.config
+    provider = config.get("provider", "system")
+
+    # Build client based on provider
+    if provider == "system":
+        # Use a system deployment
+        deployment_id = config.get("deploymentId")
+        if not deployment_id:
+            raise HTTPException(status_code=400, detail="No deployment selected")
+
+        result = await db.execute(
+            select(Deployment)
+            .where(Deployment.id == deployment_id)
+            .options(selectinload(Deployment.worker))
+        )
+        deployment = result.scalar_one_or_none()
+
+        if not deployment:
+            raise HTTPException(status_code=404, detail="Deployment not found")
+
+        if deployment.status != DeploymentStatus.RUNNING.value:
+            raise HTTPException(status_code=400, detail="Deployment is not running")
+
+        worker = deployment.worker
+        base_url = f"http://{worker.host}:{deployment.port}/v1"
+        api_key = "dummy"
+        model = "default"
+
+    elif provider == "openai":
+        base_url = "https://api.openai.com/v1"
+        api_key = config.get("apiKey")
+        model = config.get("model", "gpt-4o")
+
+    elif provider == "anthropic":
+        # Anthropic uses different API format, need adapter
+        raise HTTPException(
+            status_code=400, detail="Anthropic not yet supported, use OpenAI-compatible endpoint"
+        )
+
+    elif provider == "custom":
+        base_url = config.get("baseUrl")
+        api_key = config.get("apiKey", "dummy")
+        model = config.get("model", "default")
+
+    else:
+        raise HTTPException(status_code=400, detail=f"Unknown provider: {provider}")
+
+    if not api_key:
+        raise HTTPException(status_code=400, detail="API key is required")
+
+    # Build messages
+    messages = [{"role": "system", "content": AGENT_SYSTEM_PROMPT}]
+
+    # Add history
+    for msg in request.history[-10:]:
+        if msg.get("role") in ["user", "assistant"]:
+            messages.append({"role": msg["role"], "content": msg["content"]})
+
+    # Add current message
+    messages.append({"role": "user", "content": request.message})
+
+    # Create dummy job for tool executor
+    class DummyJob:
+        id = 0
+        model_id = 0
+        worker_id = 0
+
+    executor = AgentToolExecutor(db, DummyJob())
+
+    try:
+        from openai import AsyncOpenAI
+
+        client = AsyncOpenAI(api_key=api_key, base_url=base_url)
+
+        # Call LLM with tools
+        response = await client.chat.completions.create(
+            model=model,
+            messages=messages,
+            tools=get_agent_tools(),
+            tool_choice="auto",
+            max_tokens=4096,
+        )
+
+        assistant_message = response.choices[0].message
+        content = assistant_message.content or ""
+        tool_calls_result = []
+
+        # Execute tool calls if any
+        if assistant_message.tool_calls:
+            for tool_call in assistant_message.tool_calls:
+                tool_name = tool_call.function.name
+                import json
+
+                tool_args = json.loads(tool_call.function.arguments)
+
+                # Execute tool
+                result = await executor.execute(tool_name, tool_args)
+
+                tool_calls_result.append(
+                    {
+                        "name": tool_name,
+                        "args": tool_args,
+                        "result": (
+                            json.loads(result)
+                            if result.startswith("{") or result.startswith("[")
+                            else result
+                        ),
+                    }
+                )
+
+            # If there were tool calls but no content, generate a summary
+            if not content and tool_calls_result:
+                content = f"I executed {len(tool_calls_result)} tool(s). See the results below."
+
+        return AgentChatResponse(
+            content=content,
+            tool_calls=tool_calls_result if tool_calls_result else None,
+        )
+
+    except Exception as e:
+        logger.exception(f"Agent chat error: {e}")
+        raise HTTPException(status_code=500, detail=f"Agent error: {str(e)}")
+
+
+# ============================================================================
+# Auto-Tuning Agent Runner
+# ============================================================================
+
+
+async def run_auto_tuning(job_id: int, llm_config: dict | None = None):
+    """Run the LLM-driven Auto-Tuning Agent"""
+    from app.services.tuning_agent import run_tuning_agent
+
+    await run_tuning_agent(job_id, llm_config)
+
+
+async def _run_benchmark_test(deployment: Deployment, request: BenchmarkRequest) -> dict:
+    """Run actual benchmark test on a deployment using HTTP requests"""
+    from app.services.tuning_agent import _run_http_benchmark
+
+    # Get worker info
+    worker = deployment.worker
+    if not worker:
+        return {"error": "Worker not found"}
+
+    base_url = f"http://{worker.host}:{deployment.port}/v1"
+
+    result = await _run_http_benchmark(
+        base_url=base_url,
+        num_requests=max(10, request.concurrency * 5),
+        concurrency=request.concurrency,
+        input_tokens=request.input_length,
+        output_tokens=request.output_length,
+    )
+
+    if not result.get("success"):
+        return {"error": result.get("error", "Benchmark failed")}
+
+    metrics = result.get("metrics", {})
+    return {
+        "throughput_tps": metrics.get("throughput_tps"),
+        "ttft_ms": metrics.get("avg_ttft_ms"),
+        "tpot_ms": metrics.get("avg_tpot_ms"),
+        "total_latency_ms": None,  # Not directly measured
+        "gpu_utilization": None,  # Would need GPU monitoring
+        "vram_usage_gb": None,  # Would need GPU monitoring
+        "raw": result.get("summary"),
+    }
diff --git a/backend/app/api/chat_proxy.py b/backend/app/api/chat_proxy.py
new file mode 100644
index 0000000..6d7d64d
--- /dev/null
+++ b/backend/app/api/chat_proxy.py
@@ -0,0 +1,208 @@
+"""Chat Proxy API - Proxy requests to external OpenAI-compatible endpoints."""
+
+import logging
+
+import httpx
+from fastapi import APIRouter, Depends, HTTPException
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
+
+from app.core.deps import get_current_user
+from app.models.user import User
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter()
+
+HTTP_TIMEOUT = 300.0  # 5 minutes
+
+
+class ChatProxyRequest(BaseModel):
+    """Request body for chat proxy endpoint."""
+
+    endpoint: str
+    api_key: str | None = None
+    payload: dict
+
+
+class FetchModelsRequest(BaseModel):
+    """Request body for fetching models from external endpoint."""
+
+    endpoint: str
+    api_key: str | None = None
+
+
+@router.post("/chat-proxy")
+async def proxy_chat_request(
+    request: ChatProxyRequest,
+    current_user: User = Depends(get_current_user),
+):
+    """
+    Proxy chat requests to external OpenAI-compatible endpoints.
+
+    This endpoint allows the frontend to make requests to external LLM APIs
+    without running into CORS issues.
+    """
+    # Normalize endpoint URL
+    endpoint = request.endpoint.strip()
+    if endpoint.endswith("/"):
+        endpoint = endpoint[:-1]
+
+    # Append /chat/completions if not present
+    if not endpoint.endswith("/chat/completions"):
+        endpoint = f"{endpoint}/chat/completions"
+
+    # Build headers
+    headers = {"Content-Type": "application/json"}
+    if request.api_key:
+        headers["Authorization"] = f"Bearer {request.api_key}"
+
+    # Log request details for debugging
+    logger.info(f"Proxying request to: {endpoint}")
+    logger.info(f"Payload model: {request.payload.get('model', 'not specified')}")
+
+    # Check if streaming is requested
+    is_streaming = request.payload.get("stream", False)
+
+    if is_streaming:
+        # Streaming response - client lifecycle managed inside generator
+        async def stream_response():
+            client = httpx.AsyncClient(timeout=HTTP_TIMEOUT)
+            try:
+                async with client.stream(
+                    "POST",
+                    endpoint,
+                    json=request.payload,
+                    headers=headers,
+                ) as response:
+                    if response.status_code != 200:
+                        error_text = await response.aread()
+                        yield f"data: {error_text.decode()}\n\n"
+                        return
+
+                    async for chunk in response.aiter_bytes():
+                        yield chunk
+            except httpx.ConnectError as e:
+                logger.error(f"Connection error to {endpoint}: {e}")
+                yield 'data: {"error": "Failed to connect to endpoint"}\n\n'
+            except httpx.TimeoutException as e:
+                logger.error(f"Timeout connecting to {endpoint}: {e}")
+                yield 'data: {"error": "Request timed out"}\n\n'
+            except Exception as e:
+                logger.error(f"Error proxying request to {endpoint}: {e}")
+                yield f'data: {{"error": "{str(e)}"}}\n\n'
+            finally:
+                await client.aclose()
+
+        return StreamingResponse(
+            stream_response(),
+            media_type="text/event-stream",
+            headers={
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+            },
+        )
+    else:
+        # Non-streaming response
+        try:
+            async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client:
+                response = await client.post(
+                    endpoint,
+                    json=request.payload,
+                    headers=headers,
+                )
+
+                if response.status_code != 200:
+                    raise HTTPException(
+                        status_code=response.status_code,
+                        detail=response.text,
+                    )
+
+                return response.json()
+
+        except httpx.ConnectError as e:
+            logger.error(f"Connection error to {endpoint}: {e}")
+            raise HTTPException(
+                status_code=502,
+                detail=f"Failed to connect to endpoint: {endpoint}",
+            )
+        except httpx.TimeoutException as e:
+            logger.error(f"Timeout connecting to {endpoint}: {e}")
+            raise HTTPException(
+                status_code=504,
+                detail="Request to endpoint timed out",
+            )
+        except Exception as e:
+            logger.error(f"Error proxying request to {endpoint}: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail=str(e),
+            )
+
+
+@router.post("/fetch-models")
+async def fetch_remote_models(
+    request: FetchModelsRequest,
+    current_user: User = Depends(get_current_user),
+):
+    """
+    Fetch available models from an external OpenAI-compatible endpoint.
+
+    Returns a list of model IDs available at the endpoint.
+    """
+    # Normalize endpoint URL
+    endpoint = request.endpoint.strip()
+    if endpoint.endswith("/"):
+        endpoint = endpoint[:-1]
+
+    # Build models endpoint
+    models_endpoint = f"{endpoint}/models"
+
+    # Build headers
+    headers = {"Content-Type": "application/json"}
+    if request.api_key:
+        headers["Authorization"] = f"Bearer {request.api_key}"
+
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.get(models_endpoint, headers=headers)
+
+            if response.status_code != 200:
+                raise HTTPException(
+                    status_code=response.status_code,
+                    detail=f"Failed to fetch models: {response.text}",
+                )
+
+            data = response.json()
+
+            # Extract model IDs from OpenAI-compatible response
+            models = []
+            if "data" in data:
+                for model in data["data"]:
+                    model_id = model.get("id")
+                    if model_id:
+                        models.append(
+                            {
+                                "id": model_id,
+                                "owned_by": model.get("owned_by", "unknown"),
+                            }
+                        )
+
+            return {"models": models}
+
+    except httpx.ConnectError:
+        raise HTTPException(
+            status_code=502,
+            detail=f"Failed to connect to endpoint: {models_endpoint}",
+        )
+    except httpx.TimeoutException:
+        raise HTTPException(
+            status_code=504,
+            detail="Request to endpoint timed out",
+        )
+    except Exception as e:
+        logger.error(f"Error fetching models from {models_endpoint}: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail=str(e),
+        )
diff --git a/backend/app/config.py b/backend/app/config.py
index 3bcfd3c..d573104 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -49,6 +49,11 @@ class Settings(BaseSettings):
     # Data directory
     data_dir: Path = Path("./data")
 
+    # Auto-Tuning Agent LLM settings
+    openai_api_key: str = ""
+    openai_base_url: str = ""  # For OpenAI-compatible endpoints
+    openai_model: str = "gpt-4o"  # Model to use for agent reasoning
+
     def get_cors_origins(self) -> list[str]:
         """Parse CORS origins from comma-separated string."""
         if self.cors_origins == "*":
diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py
index 4865b1f..7fd3bf8 100644
--- a/backend/app/models/__init__.py
+++ b/backend/app/models/__init__.py
@@ -6,6 +6,13 @@
 from app.models.deployment import Deployment
 from app.models.llm_model import LLMModel
 from app.models.registration_token import RegistrationToken
+from app.models.tuning import (
+    BenchmarkResult,
+    OptimizationTarget,
+    PerformanceKnowledge,
+    TuningJob,
+    TuningJobStatus,
+)
 from app.models.user import User, UserRole
 from app.models.worker import Worker
 
@@ -24,4 +31,9 @@
     "AppStatus",
     "APP_DEFINITIONS",
     "RegistrationToken",
+    "TuningJob",
+    "TuningJobStatus",
+    "OptimizationTarget",
+    "BenchmarkResult",
+    "PerformanceKnowledge",
 ]
diff --git a/backend/app/models/tuning.py b/backend/app/models/tuning.py
new file mode 100644
index 0000000..14696d5
--- /dev/null
+++ b/backend/app/models/tuning.py
@@ -0,0 +1,200 @@
+"""Auto-Tuning and Benchmark models"""
+
+from datetime import UTC, datetime
+from enum import Enum
+
+from sqlalchemy import JSON, DateTime, Float, ForeignKey, Integer, String, Text
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+
+from app.database import Base
+
+
+class TuningJobStatus(str, Enum):
+    """Tuning job status"""
+
+    PENDING = "pending"
+    ANALYZING = "analyzing"
+    QUERYING_KB = "querying_kb"
+    EXPLORING = "exploring"
+    BENCHMARKING = "benchmarking"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    CANCELLED = "cancelled"
+
+
+class OptimizationTarget(str, Enum):
+    """Optimization target for tuning"""
+
+    THROUGHPUT = "throughput"  # Maximize TPS
+    LATENCY = "latency"  # Minimize TTFT/TPOT
+    COST = "cost"  # Minimize resource usage
+    BALANCED = "balanced"  # Balance all factors
+
+
+class TuningJob(Base):
+    """Auto-tuning job record"""
+
+    __tablename__ = "tuning_jobs"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+
+    # Target configuration
+    model_id: Mapped[int] = mapped_column(Integer, ForeignKey("llm_models.id"), nullable=False)
+    worker_id: Mapped[int] = mapped_column(Integer, ForeignKey("workers.id"), nullable=False)
+    optimization_target: Mapped[str] = mapped_column(
+        String(50), default=OptimizationTarget.BALANCED.value
+    )
+
+    # Job status
+    status: Mapped[str] = mapped_column(String(50), default=TuningJobStatus.PENDING.value)
+    status_message: Mapped[str | None] = mapped_column(Text, nullable=True)
+    current_step: Mapped[int] = mapped_column(Integer, default=0)
+    total_steps: Mapped[int] = mapped_column(Integer, default=5)
+
+    # Progress details (JSON for flexibility)
+    progress: Mapped[dict | None] = mapped_column(JSON, nullable=True)
+
+    # Results
+    best_config: Mapped[dict | None] = mapped_column(JSON, nullable=True)
+    all_results: Mapped[list | None] = mapped_column(JSON, nullable=True)
+
+    # Agent conversation log (for UI display)
+    # Format: [{"role": "user"|"assistant"|"tool", "content": "...", "tool_calls": [...], "timestamp": "..."}]
+    conversation_log: Mapped[list | None] = mapped_column(JSON, nullable=True)
+
+    # Metadata
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), default=lambda: datetime.now(UTC)
+    )
+    updated_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True),
+        default=lambda: datetime.now(UTC),
+        onupdate=lambda: datetime.now(UTC),
+    )
+    completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+
+    # Relationships
+    model = relationship("LLMModel", backref="tuning_jobs")
+    worker = relationship("Worker", backref="tuning_jobs")
+
+
+class BenchmarkResult(Base):
+    """Benchmark result for a specific configuration"""
+
+    __tablename__ = "benchmark_results"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+
+    # Associated tuning job (optional - can run standalone benchmarks)
+    tuning_job_id: Mapped[int | None] = mapped_column(
+        Integer, ForeignKey("tuning_jobs.id"), nullable=True
+    )
+
+    # Deployment being benchmarked
+    deployment_id: Mapped[int] = mapped_column(
+        Integer, ForeignKey("deployments.id"), nullable=False
+    )
+
+    # Configuration tested
+    config: Mapped[dict] = mapped_column(JSON, nullable=False)
+    # Example config:
+    # {
+    #     "engine": "vllm",
+    #     "quantization": "fp16",
+    #     "tensor_parallel": 1,
+    #     "extra_args": {...}
+    # }
+
+    # Test parameters
+    test_type: Mapped[str] = mapped_column(String(50), default="throughput")
+    test_duration_seconds: Mapped[int] = mapped_column(Integer, default=60)
+    input_length: Mapped[int] = mapped_column(Integer, default=512)
+    output_length: Mapped[int] = mapped_column(Integer, default=128)
+    concurrency: Mapped[int] = mapped_column(Integer, default=1)
+
+    # Performance metrics
+    throughput_tps: Mapped[float | None] = mapped_column(Float, nullable=True)  # Tokens per second
+    ttft_ms: Mapped[float | None] = mapped_column(Float, nullable=True)  # Time to first token (ms)
+    tpot_ms: Mapped[float | None] = mapped_column(
+        Float, nullable=True
+    )  # Time per output token (ms)
+    total_latency_ms: Mapped[float | None] = mapped_column(Float, nullable=True)
+
+    # Resource usage
+    gpu_utilization: Mapped[float | None] = mapped_column(Float, nullable=True)  # 0-100%
+    vram_usage_gb: Mapped[float | None] = mapped_column(Float, nullable=True)
+
+    # Raw results
+    raw_results: Mapped[dict | None] = mapped_column(JSON, nullable=True)
+    error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
+
+    # Metadata
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), default=lambda: datetime.now(UTC)
+    )
+
+    # Relationships
+    tuning_job = relationship("TuningJob", backref="benchmark_results")
+    deployment = relationship("Deployment", backref="benchmark_results")
+
+
+class PerformanceKnowledge(Base):
+    """Performance knowledge base for configuration recommendations
+
+    This table stores historical tuning results to enable:
+    1. Fast lookup of known-good configurations
+    2. Transfer learning across similar models/hardware
+    """
+
+    __tablename__ = "performance_knowledge"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+
+    # Hardware info
+    gpu_model: Mapped[str] = mapped_column(String(100), nullable=False)  # e.g., "NVIDIA H100 80GB"
+    gpu_count: Mapped[int] = mapped_column(Integer, nullable=False)
+    total_vram_gb: Mapped[float] = mapped_column(Float, nullable=False)
+
+    # Model info
+    model_name: Mapped[str] = mapped_column(String(255), nullable=False)  # e.g., "Qwen/Qwen2.5-72B"
+    model_family: Mapped[str] = mapped_column(String(100), nullable=False)  # e.g., "Qwen"
+    model_params_b: Mapped[float | None] = mapped_column(
+        Float, nullable=True
+    )  # Parameters in billions
+
+    # Configuration
+    engine: Mapped[str] = mapped_column(String(50), nullable=False)  # vllm, sglang, ollama
+    quantization: Mapped[str | None] = mapped_column(
+        String(50), nullable=True
+    )  # fp16, fp8, awq, gptq
+    tensor_parallel: Mapped[int] = mapped_column(Integer, default=1)
+    extra_args: Mapped[dict | None] = mapped_column(JSON, nullable=True)
+
+    # Performance metrics
+    throughput_tps: Mapped[float] = mapped_column(Float, nullable=False)
+    ttft_ms: Mapped[float] = mapped_column(Float, nullable=False)
+    tpot_ms: Mapped[float] = mapped_column(Float, nullable=False)
+    gpu_utilization: Mapped[float | None] = mapped_column(Float, nullable=True)
+    vram_usage_gb: Mapped[float | None] = mapped_column(Float, nullable=True)
+
+    # Test conditions
+    test_dataset: Mapped[str] = mapped_column(String(100), default="synthetic")
+    input_length: Mapped[int] = mapped_column(Integer, default=512)
+    output_length: Mapped[int] = mapped_column(Integer, default=128)
+    concurrency: Mapped[int] = mapped_column(Integer, default=1)
+
+    # Recommendation score (computed based on optimization target)
+    score: Mapped[float | None] = mapped_column(Float, nullable=True)
+
+    # Source
+    source_tuning_job_id: Mapped[int | None] = mapped_column(
+        Integer, ForeignKey("tuning_jobs.id"), nullable=True
+    )
+
+    # Metadata
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), default=lambda: datetime.now(UTC)
+    )
+
+    # Relationships
+    source_tuning_job = relationship("TuningJob", backref="knowledge_records")
diff --git a/backend/app/schemas/tuning.py b/backend/app/schemas/tuning.py
new file mode 100644
index 0000000..43513bc
--- /dev/null
+++ b/backend/app/schemas/tuning.py
@@ -0,0 +1,220 @@
+"""Auto-Tuning and Benchmark Pydantic schemas"""
+
+from datetime import datetime
+
+from pydantic import BaseModel, Field
+
+from app.models.tuning import OptimizationTarget
+
+# ============================================================================
+# LLM Configuration for Agent
+# ============================================================================
+
+
+class LLMConfig(BaseModel):
+    """Configuration for the LLM used by the auto-tuning agent"""
+
+    deployment_id: int | None = Field(None, description="Use a local deployment as the agent LLM")
+    base_url: str | None = Field(None, description="Custom OpenAI-compatible endpoint URL")
+    api_key: str | None = Field(None, description="API key for the endpoint")
+    model: str | None = Field(None, description="Model name to use")
+
+
+# ============================================================================
+# Tuning Job Schemas
+# ============================================================================
+
+
+class TuningJobCreate(BaseModel):
+    """Schema for creating a tuning job"""
+
+    model_id: int = Field(..., description="ID of the model to tune")
+    worker_id: int = Field(..., description="ID of the worker to use")
+    optimization_target: OptimizationTarget = Field(
+        default=OptimizationTarget.BALANCED, description="What to optimize for"
+    )
+    llm_config: LLMConfig | None = Field(
+        None, description="LLM configuration for the agent (uses chat panel's selected model)"
+    )
+
+
+class TuningJobProgress(BaseModel):
+    """Progress information for a tuning job"""
+
+    step: int
+    total_steps: int
+    step_name: str
+    step_description: str
+    configs_tested: int = 0
+    configs_total: int = 0
+    current_config: dict | None = None
+    best_config_so_far: dict | None = None
+    best_score_so_far: float | None = None
+
+
+class ConversationMessage(BaseModel):
+    """A message in the agent conversation log"""
+
+    role: str  # "user", "assistant", or "tool"
+    content: str
+    timestamp: str | None = None
+    tool_calls: list[dict] | None = None  # For assistant messages with tool calls
+    tool_call_id: str | None = None  # For tool responses
+    name: str | None = None  # Tool name for tool responses
+
+
+class TuningJobResponse(BaseModel):
+    """Schema for tuning job response"""
+
+    id: int
+    model_id: int
+    worker_id: int
+    optimization_target: str
+    status: str
+    status_message: str | None = None
+    current_step: int
+    total_steps: int
+    progress: TuningJobProgress | None = None
+    best_config: dict | None = None
+    all_results: list | None = None
+    conversation_log: list[ConversationMessage] | None = None
+    created_at: datetime
+    updated_at: datetime
+    completed_at: datetime | None = None
+
+    # Related info
+    model_name: str | None = None
+    worker_name: str | None = None
+
+    class Config:
+        from_attributes = True
+
+
+class TuningJobListResponse(BaseModel):
+    """Schema for listing tuning jobs"""
+
+    items: list[TuningJobResponse]
+    total: int
+
+
+# ============================================================================
+# Benchmark Schemas
+# ============================================================================
+
+
+class BenchmarkConfig(BaseModel):
+    """Configuration for a benchmark test"""
+
+    engine: str = Field(..., description="Inference engine: vllm, sglang, ollama")
+    quantization: str | None = Field(default=None, description="Quantization: fp16, fp8, awq, gptq")
+    tensor_parallel: int = Field(default=1, description="Tensor parallelism degree")
+    extra_args: dict | None = Field(default=None, description="Additional engine arguments")
+
+
+class BenchmarkRequest(BaseModel):
+    """Schema for running a benchmark"""
+
+    deployment_id: int = Field(..., description="ID of the deployment to benchmark")
+    test_type: str = Field(default="throughput", description="Test type: throughput, latency")
+    duration_seconds: int = Field(default=60, ge=10, le=600, description="Test duration")
+    input_length: int = Field(default=512, ge=1, le=32768, description="Input token length")
+    output_length: int = Field(default=128, ge=1, le=8192, description="Output token length")
+    concurrency: int = Field(default=1, ge=1, le=64, description="Number of concurrent requests")
+
+
+class BenchmarkMetrics(BaseModel):
+    """Benchmark performance metrics"""
+
+    throughput_tps: float | None = Field(None, description="Tokens per second")
+    ttft_ms: float | None = Field(None, description="Time to first token (ms)")
+    tpot_ms: float | None = Field(None, description="Time per output token (ms)")
+    total_latency_ms: float | None = Field(None, description="Total request latency (ms)")
+    gpu_utilization: float | None = Field(None, description="GPU utilization (0-100%)")
+    vram_usage_gb: float | None = Field(None, description="VRAM usage in GB")
+
+
+class BenchmarkResultResponse(BaseModel):
+    """Schema for benchmark result response"""
+
+    id: int
+    tuning_job_id: int | None = None
+    deployment_id: int
+    config: dict
+    test_type: str
+    test_duration_seconds: int
+    input_length: int
+    output_length: int
+    concurrency: int
+    metrics: BenchmarkMetrics
+    error_message: str | None = None
+    created_at: datetime
+
+    class Config:
+        from_attributes = True
+
+
+class BenchmarkResultListResponse(BaseModel):
+    """Schema for listing benchmark results"""
+
+    items: list[BenchmarkResultResponse]
+    total: int
+
+
+# ============================================================================
+# Knowledge Base Schemas
+# ============================================================================
+
+
+class KnowledgeQuery(BaseModel):
+    """Query for the knowledge base"""
+
+    model_name: str | None = Field(default=None, description="Model name pattern to match")
+    model_family: str | None = Field(default=None, description="Model family: Qwen, Llama, etc.")
+    gpu_model: str | None = Field(default=None, description="GPU model pattern")
+    min_vram_gb: float | None = Field(default=None, description="Minimum VRAM")
+    optimization_target: OptimizationTarget = Field(
+        default=OptimizationTarget.BALANCED, description="Optimization target for scoring"
+    )
+    limit: int = Field(default=10, ge=1, le=100, description="Max results to return")
+
+
+class KnowledgeRecord(BaseModel):
+    """A knowledge base record"""
+
+    id: int
+    gpu_model: str
+    gpu_count: int
+    total_vram_gb: float
+    model_name: str
+    model_family: str
+    model_params_b: float | None = None
+    engine: str
+    quantization: str | None = None
+    tensor_parallel: int
+    extra_args: dict | None = None
+    throughput_tps: float
+    ttft_ms: float
+    tpot_ms: float
+    gpu_utilization: float | None = None
+    vram_usage_gb: float | None = None
+    score: float | None = None
+    created_at: datetime
+
+    class Config:
+        from_attributes = True
+
+
+class KnowledgeQueryResponse(BaseModel):
+    """Response for knowledge base query"""
+
+    items: list[KnowledgeRecord]
+    total: int
+    query: KnowledgeQuery
+
+
+class KnowledgeSaveRequest(BaseModel):
+    """Request to save a record to knowledge base"""
+
+    benchmark_result_id: int = Field(..., description="ID of the benchmark result to save")
+    model_family: str = Field(..., description="Model family for categorization")
+    model_params_b: float | None = Field(default=None, description="Model parameters in billions")
diff --git a/backend/app/services/tuning_agent.py b/backend/app/services/tuning_agent.py
new file mode 100644
index 0000000..311163e
--- /dev/null
+++ b/backend/app/services/tuning_agent.py
@@ -0,0 +1,1506 @@
+"""
+Auto-Tuning Agent Service
+
+A true LLM-driven agent that:
+1. Uses an LLM to reason about configurations
+2. Actually deploys models with different configs
+3. Runs real benchmarks against deployed endpoints
+4. Analyzes results and decides next steps
+"""
+
+import asyncio
+import json
+import logging
+import time
+from datetime import UTC, datetime
+
+import httpx
+from openai import AsyncOpenAI
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.orm import selectinload
+
+from app.config import get_settings
+from app.database import async_session_maker
+from app.models.deployment import Deployment, DeploymentStatus
+from app.models.llm_model import LLMModel
+from app.models.tuning import PerformanceKnowledge, TuningJob, TuningJobStatus
+from app.models.worker import Worker
+
+logger = logging.getLogger(__name__)
+
+
+# =============================================================================
+# Agent System Prompt
+# =============================================================================
+
+AGENT_SYSTEM_PROMPT = """You are an Auto-Tuning Agent helping to find the optimal deployment configuration for LLM models.
+
+IMPORTANT COMMUNICATION RULES:
+1. ALWAYS explain what you're about to do BEFORE calling any tool
+2. After each tool result, briefly summarize what you learned
+3. Be conversational - talk like you're explaining to a colleague
+4. No emojis, keep it professional but friendly
+
+=== OPTIMIZATION TARGETS ===
+
+**Throughput** (tokens per second):
+- Goal: Maximize TPS for batch processing / high volume
+- Strategy: Use vLLM with large batch sizes, enable continuous batching
+- Key metric: throughput_tps (higher is better)
+- Trade-off: May have higher latency per request
+
+**Latency** (response time):
+- Goal: Minimize time-to-first-token (TTFT) and time-per-output-token (TPOT)
+- Strategy: Use smaller batch sizes, consider sglang for multi-turn
+- Key metrics: avg_ttft_ms, avg_tpot_ms (lower is better)
+- Trade-off: Lower overall throughput
+
+**Balanced**:
+- Goal: Good balance between throughput and latency
+- Strategy: Test multiple configs, calculate combined score
+- Score formula: throughput_tps / (avg_ttft_ms * 0.01) - balance speed and responsiveness
+- Pick config with best combined score
+
+**Cost** (minimum resources):
+- Goal: Use minimum GPU memory while maintaining acceptable performance
+- Strategy: Try quantization (awq, gptq), use fewer GPUs if possible
+- Key consideration: memory_used_gb, still need decent throughput
+- Trade-off: May sacrifice some performance for efficiency
+
+=== AVAILABLE ENGINES ===
+- vllm: Best throughput, tensor parallelism, supports fp8/awq/gptq quantization
+- sglang: Good for multi-turn, efficient memory, fast prefix caching
+- ollama: Simple deployment, good for smaller models, easy setup
+
+=== QUANTIZATION NOTES ===
+- AWQ/GPTQ: Requires a pre-quantized model (e.g., "Qwen/Qwen3-0.6B-AWQ")
+  Do NOT use quantization=awq with a base model like "Qwen/Qwen3-0.6B"
+- FP8: Only works on Hopper+ GPUs (H100, etc.), not consumer GPUs
+- For consumer GPUs (RTX 4090, etc.), use default FP16 or find a pre-quantized model
+
+=== PROCESS ===
+1. Check hardware (GPU model, VRAM, count)
+2. Query knowledge base for similar setups
+3. Based on optimization target, choose 2-3 promising configs to test
+4. For EACH config:
+   a. Deploy model
+   b. Wait for deployment (use short timeout like 120s)
+   c. If timeout/slow: Check logs with get_deployment_logs to diagnose
+   d. If failed: STOP deployment, analyze error, try next config
+   e. If success: Run benchmark, record results, STOP deployment
+5. Compare all results, call finish_tuning with recommendation
+
+=== DIAGNOSING DEPLOYMENT ISSUES ===
+When wait_for_deployment times out:
+1. FIRST call test_deployment_endpoint to check if API is actually ready
+   - If ready=true: Great! Proceed to run_benchmark
+   - If ready=false: Continue to step 2
+2. Call get_deployment_logs to check container logs (use tail=100 or more)
+3. Look for common patterns in logs:
+   - "Loading checkpoint shards" or "Loading model weights" - model is loading, keep waiting
+   - "INFO: Started server process" or "Uvicorn running" - vLLM is ready!
+   - "CUDA out of memory" - try quantization or fewer GPUs
+   - "Error" or "Exception" - check the error message
+4. Based on logs, decide:
+   - If model loading: call test_deployment_endpoint every 30s until ready
+   - If OOM error: stop_deployment and try with quantization
+   - If other error: stop_deployment and try different engine/config
+
+DO NOT just give up on timeout - always test endpoint and check logs first!
+A 0.6B model should load in 1-2 minutes, larger models (7B+) may take 5-10 minutes.
+
+=== HANDLING LOW GPU MEMORY ===
+If deploy_model fails with "GPU memory is low":
+1. Call list_deployments(worker_id=X) to find existing deployments
+2. Stop all running deployments using stop_deployment(deployment_id=X)
+3. If no deployments found, GPU is used by external processes - inform user
+4. After stopping, retry deploy_model
+
+IMPORTANT: ALWAYS stop a deployment before starting a new one!
+- If deployment times out → check logs, then stop_deployment
+- If deployment fails → stop_deployment immediately
+- After benchmark complete → stop_deployment before next test
+- Never have multiple test deployments running at once!
+
+=== EXAMPLE FLOW ===
+"Let me first check what hardware we have available..."
+[call get_hardware_info]
+"I can see we have 1x RTX 4090 with 24GB VRAM. Let me check if we have historical data..."
+[call query_knowledge_base]
+"No historical data found. Since we're optimizing for throughput, I'll test vLLM first..."
+[call deploy_model]
+"Deployment created with ID 1. Let me wait for it to become ready..."
+[call wait_for_deployment(deployment_id=1, timeout_seconds=120)]
+-- If timeout occurs --
+"Wait timed out after 120s. Let me first test if the endpoint is actually ready..."
+[call test_deployment_endpoint(deployment_id=1)]
+-- If ready=true --
+"The endpoint is responding! The model is ready. Let me run the benchmark now..."
+[call run_benchmark(deployment_id=1)]
+-- If ready=false --
+"Endpoint not ready yet. Let me check the container logs..."
+[call get_deployment_logs(deployment_id=1, tail=100)]
+"I see from the logs: 'Loading checkpoint shards: 3/4 (75%)' - model is still loading.
+Let me test the endpoint again in a moment..."
+[call test_deployment_endpoint(deployment_id=1)]
+-- Keep testing until ready, then run benchmark --
+-- OR if logs show an error --
+"The logs show 'CUDA out of memory'. I need to stop and try a different config..."
+[call stop_deployment(deployment_id=1)]
+[call deploy_model with different params]
+
+ALWAYS provide context. Never call tools silently.
+ALWAYS test endpoint and check logs before giving up on a deployment.
+"""
+
+
+# =============================================================================
+# Tools for the Agent
+# =============================================================================
+
+
+def get_agent_tools() -> list[dict]:
+    """Define tools available to the agent"""
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_hardware_info",
+                "description": "Get detailed hardware information for a worker node including GPU model, VRAM, count, and current utilization.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "worker_id": {"type": "integer", "description": "ID of the worker to query"}
+                    },
+                    "required": ["worker_id"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "get_model_info",
+                "description": "Get information about the model to be deployed.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "model_id": {"type": "integer", "description": "ID of the model"}
+                    },
+                    "required": ["model_id"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "query_knowledge_base",
+                "description": "Query historical performance data for similar model/hardware combinations.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "model_family": {
+                            "type": "string",
+                            "description": "Model family (e.g., Qwen, Llama, Mistral)",
+                        },
+                        "gpu_model": {
+                            "type": "string",
+                            "description": "GPU model pattern (e.g., RTX 4090, A100)",
+                        },
+                    },
+                    "required": [],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "list_deployments",
+                "description": "List all deployments on a worker. Use this to find existing deployments that may be using GPU memory.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "worker_id": {"type": "integer", "description": "Worker ID to query"}
+                    },
+                    "required": ["worker_id"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "deploy_model",
+                "description": "Deploy a model with specific configuration. Returns deployment ID if successful.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "model_id": {"type": "integer"},
+                        "worker_id": {"type": "integer"},
+                        "engine": {"type": "string", "enum": ["vllm", "sglang", "ollama"]},
+                        "gpu_indexes": {
+                            "type": "array",
+                            "items": {"type": "integer"},
+                            "description": "GPU indices to use",
+                        },
+                        "extra_params": {
+                            "type": "object",
+                            "description": "Additional engine parameters",
+                        },
+                    },
+                    "required": ["model_id", "worker_id", "engine"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "wait_for_deployment",
+                "description": "Wait for a deployment to be ready (running status).",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "deployment_id": {"type": "integer"},
+                        "timeout_seconds": {
+                            "type": "integer",
+                            "default": 300,
+                            "description": "Maximum time to wait",
+                        },
+                    },
+                    "required": ["deployment_id"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "run_benchmark",
+                "description": "Run performance benchmark on a running deployment. Returns throughput, TTFT, TPOT metrics.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "deployment_id": {"type": "integer"},
+                        "num_requests": {
+                            "type": "integer",
+                            "default": 20,
+                            "description": "Number of requests to send",
+                        },
+                        "concurrency": {
+                            "type": "integer",
+                            "default": 4,
+                            "description": "Concurrent requests",
+                        },
+                        "input_tokens": {
+                            "type": "integer",
+                            "default": 128,
+                            "description": "Approximate input token count",
+                        },
+                        "output_tokens": {
+                            "type": "integer",
+                            "default": 64,
+                            "description": "Max output tokens",
+                        },
+                    },
+                    "required": ["deployment_id"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "stop_deployment",
+                "description": "Stop and remove a deployment.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"deployment_id": {"type": "integer"}},
+                    "required": ["deployment_id"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "get_deployment_logs",
+                "description": "Get the Docker container logs for a deployment. Use this to check why a deployment is slow to start or failing.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "deployment_id": {"type": "integer"},
+                        "tail": {
+                            "type": "integer",
+                            "default": 100,
+                            "description": "Number of log lines to retrieve",
+                        },
+                    },
+                    "required": ["deployment_id"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "check_deployment_status",
+                "description": "Check the current status of a deployment without waiting. Use this after wait_for_deployment times out to see if the model is still loading or has failed.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"deployment_id": {"type": "integer"}},
+                    "required": ["deployment_id"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "test_deployment_endpoint",
+                "description": "Test if the deployment API endpoint is responding. Use this to check if a model is ready even if wait_for_deployment timed out.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"deployment_id": {"type": "integer"}},
+                    "required": ["deployment_id"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "finish_tuning",
+                "description": "Complete the tuning process with final recommendation.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "best_config": {
+                            "type": "object",
+                            "description": "The recommended configuration",
+                        },
+                        "reasoning": {
+                            "type": "string",
+                            "description": "Explanation of why this is the best config",
+                        },
+                        "all_results": {
+                            "type": "array",
+                            "description": "All benchmark results collected",
+                        },
+                    },
+                    "required": ["best_config", "reasoning"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "abort_tuning",
+                "description": "Abort the tuning process when it cannot be completed (e.g., GPU memory used by external processes, hardware issues).",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "reason": {
+                            "type": "string",
+                            "description": "Explanation of why tuning cannot be completed",
+                        }
+                    },
+                    "required": ["reason"],
+                },
+            },
+        },
+    ]
+
+
+# =============================================================================
+# Tool Implementations
+# =============================================================================
+
+
+class AgentToolExecutor:
+    """Execute agent tools with real system interactions"""
+
+    def __init__(self, db: AsyncSession, job: TuningJob):
+        self.db = db
+        self.job = job
+        self.created_deployments: list[int] = []
+
+    async def execute(self, tool_name: str, args: dict) -> str:
+        """Execute a tool and return result as string"""
+        try:
+            method = getattr(self, f"_tool_{tool_name}", None)
+            if method:
+                result = await method(**args)
+                return json.dumps(result, indent=2, default=str)
+            return json.dumps({"error": f"Unknown tool: {tool_name}"})
+        except Exception as e:
+            logger.error(f"Tool {tool_name} failed: {e}")
+            return json.dumps({"error": str(e)})
+
+    async def _tool_get_hardware_info(self, worker_id: int) -> dict:
+        """Get hardware info for a worker"""
+        result = await self.db.execute(select(Worker).where(Worker.id == worker_id))
+        worker = result.scalar_one_or_none()
+
+        if not worker:
+            return {"error": "Worker not found"}
+
+        gpu_info = worker.gpu_info or []
+
+        # Determine the unit divisor from memory_total (which is always large)
+        # memory_total for a typical GPU should be 8-80 GB
+        def get_divisor(memory_total: int | float) -> float:
+            """Determine the divisor to convert memory values to GB.
+
+            We use memory_total to figure out what unit the values are in:
+            - If memory_total > 1 billion: values are in bytes
+            - If memory_total > 1 million: values are in KB
+            - If memory_total > 1000: values are in MB
+            - Otherwise: values are already in GB
+            """
+            if memory_total > 1_000_000_000:
+                return 1024 * 1024 * 1024  # bytes to GB
+            elif memory_total > 1_000_000:
+                return 1024 * 1024  # KB to GB
+            elif memory_total > 1000:
+                return 1024  # MB to GB
+            else:
+                return 1  # already GB
+
+        def convert_gpu_memory(gpu: dict) -> dict:
+            """Convert a single GPU's memory values to GB."""
+            mem_total = gpu.get("memory_total", 0)
+            mem_used = gpu.get("memory_used", 0)
+
+            divisor = get_divisor(mem_total)
+
+            return {
+                "memory_total_gb": round(mem_total / divisor, 1) if mem_total else 0,
+                "memory_used_gb": round(mem_used / divisor, 1) if mem_used else 0,
+                "memory_free_gb": round((mem_total - mem_used) / divisor, 1) if mem_total else 0,
+            }
+
+        # Convert GPU memory values
+        gpus_converted = []
+        total_vram_gb = 0
+        for i, g in enumerate(gpu_info):
+            mem = convert_gpu_memory(g)
+            gpus_converted.append(
+                {
+                    "index": g.get("index", i),
+                    "name": g.get("name", "Unknown"),
+                    "memory_total_gb": mem["memory_total_gb"],
+                    "memory_used_gb": mem["memory_used_gb"],
+                    "memory_free_gb": mem["memory_free_gb"],
+                    "utilization_percent": g.get("utilization_gpu", 0),
+                }
+            )
+            total_vram_gb += mem["memory_total_gb"]
+
+        return {
+            "worker_id": worker.id,
+            "worker_name": worker.name,
+            "status": worker.status,
+            "gpu_count": len(gpu_info),
+            "gpus": gpus_converted,
+            "total_vram_gb": round(total_vram_gb, 1),
+        }
+
+    async def _tool_get_model_info(self, model_id: int) -> dict:
+        """Get model info"""
+        result = await self.db.execute(select(LLMModel).where(LLMModel.id == model_id))
+        model = result.scalar_one_or_none()
+
+        if not model:
+            return {"error": "Model not found"}
+
+        # Extract model family from name
+        model_family = _extract_model_family(model.name)
+
+        return {
+            "model_id": model.id,
+            "name": model.name,
+            "model_id_hf": model.model_id,
+            "source": model.source,
+            "model_family": model_family,
+            "default_backend": model.backend,
+        }
+
+    async def _tool_query_knowledge_base(
+        self, model_family: str | None = None, gpu_model: str | None = None
+    ) -> dict:
+        """Query knowledge base for similar configurations"""
+        stmt = select(PerformanceKnowledge)
+
+        if model_family:
+            stmt = stmt.where(PerformanceKnowledge.model_family.ilike(f"%{model_family}%"))
+        if gpu_model:
+            stmt = stmt.where(PerformanceKnowledge.gpu_model.ilike(f"%{gpu_model}%"))
+
+        stmt = stmt.order_by(PerformanceKnowledge.score.desc().nulls_last()).limit(5)
+
+        result = await self.db.execute(stmt)
+        records = result.scalars().all()
+
+        if not records:
+            return {
+                "found": 0,
+                "message": "No historical data found. You'll need to run benchmarks to gather data.",
+                "records": [],
+            }
+
+        return {
+            "found": len(records),
+            "records": [
+                {
+                    "model_name": r.model_name,
+                    "model_family": r.model_family,
+                    "gpu_model": r.gpu_model,
+                    "gpu_count": r.gpu_count,
+                    "engine": r.engine,
+                    "quantization": r.quantization,
+                    "tensor_parallel": r.tensor_parallel,
+                    "throughput_tps": r.throughput_tps,
+                    "ttft_ms": r.ttft_ms,
+                    "tpot_ms": r.tpot_ms,
+                    "score": r.score,
+                }
+                for r in records
+            ],
+        }
+
+    async def _tool_list_deployments(self, worker_id: int) -> dict:
+        """List all deployments on a worker"""
+        try:
+            result = await self.db.execute(
+                select(Deployment)
+                .where(Deployment.worker_id == worker_id)
+                .options(selectinload(Deployment.model))
+            )
+            deployments = result.scalars().all()
+
+            if not deployments:
+                return {
+                    "worker_id": worker_id,
+                    "count": 0,
+                    "deployments": [],
+                    "message": "No deployments found on this worker. GPU memory may be used by processes outside LMStack.",
+                }
+
+            deployment_list = []
+            for d in deployments:
+                deployment_list.append(
+                    {
+                        "deployment_id": d.id,
+                        "name": d.name,
+                        "model_name": d.model.name if d.model else "Unknown",
+                        "status": d.status,
+                        "backend": d.backend,
+                        "port": d.port,
+                        "container_id": d.container_id[:12] if d.container_id else None,
+                    }
+                )
+
+            return {
+                "worker_id": worker_id,
+                "count": len(deployments),
+                "deployments": deployment_list,
+                "message": f"Found {len(deployments)} deployment(s). Stop running deployments to free GPU memory.",
+            }
+        except Exception as e:
+            logger.exception(f"Failed to list deployments: {e}")
+            return {"error": str(e)}
+
+    async def _tool_deploy_model(
+        self,
+        model_id: int,
+        worker_id: int,
+        engine: str,
+        gpu_indexes: list[int] | None = None,
+        extra_params: dict | None = None,
+    ) -> dict:
+        """Deploy a model with specific configuration"""
+        from app.services.deployer import DeployerService
+
+        try:
+            # Check if there are any pending deployments from this tuning job
+            if self.created_deployments:
+                return {
+                    "success": False,
+                    "error": f"You still have active deployments: {self.created_deployments}. "
+                    f"Please stop them first using stop_deployment before creating a new one.",
+                }
+
+            # Check GPU memory availability
+            worker_result = await self.db.execute(select(Worker).where(Worker.id == worker_id))
+            worker = worker_result.scalar_one_or_none()
+            if worker and worker.gpu_info:
+                for g in worker.gpu_info:
+                    mem_total = g.get("memory_total", 0)
+                    mem_used = g.get("memory_used", 0)
+                    # Check if less than 20% memory is free
+                    if mem_total > 0 and (mem_total - mem_used) / mem_total < 0.2:
+                        free_pct = round((mem_total - mem_used) / mem_total * 100, 1)
+                        return {
+                            "success": False,
+                            "error": f"GPU memory is low (only {free_pct}% free). "
+                            f"Please stop any existing deployments first.",
+                        }
+
+            # Get model to generate deployment name
+            model_result = await self.db.execute(select(LLMModel).where(LLMModel.id == model_id))
+            model = model_result.scalar_one_or_none()
+            if not model:
+                return {"success": False, "error": "Model not found"}
+
+            # Generate unique deployment name
+            import time
+
+            deploy_name = f"tuning-{model.name.replace('/', '-')[:30]}-{int(time.time())}"
+
+            # Create deployment
+            deployment = Deployment(
+                name=deploy_name,
+                model_id=model_id,
+                worker_id=worker_id,
+                backend=engine,
+                gpu_indexes=gpu_indexes or [0],
+                extra_params=extra_params or {},
+                status=DeploymentStatus.PENDING.value,
+            )
+
+            self.db.add(deployment)
+            await self.db.commit()
+            await self.db.refresh(deployment)
+
+            self.created_deployments.append(deployment.id)
+
+            # Start deployment in background using DeployerService
+            deployer = DeployerService()
+            asyncio.create_task(deployer.deploy(deployment.id))
+
+            return {
+                "success": True,
+                "deployment_id": deployment.id,
+                "deployment_name": deploy_name,
+                "config": {
+                    "engine": engine,
+                    "gpu_indexes": gpu_indexes or [0],
+                    "extra_params": extra_params,
+                },
+                "message": "Deployment created. Use wait_for_deployment to wait until ready.",
+            }
+        except Exception as e:
+            logger.exception(f"Failed to deploy model: {e}")
+            return {"success": False, "error": str(e)}
+
+    async def _tool_wait_for_deployment(
+        self, deployment_id: int, timeout_seconds: int = 300
+    ) -> dict:
+        """Wait for deployment to be ready"""
+        start_time = time.time()
+
+        while time.time() - start_time < timeout_seconds:
+            result = await self.db.execute(
+                select(Deployment)
+                .where(Deployment.id == deployment_id)
+                .options(selectinload(Deployment.worker))
+            )
+            deployment = result.scalar_one_or_none()
+
+            if not deployment:
+                return {
+                    "success": False,
+                    "deployment_id": deployment_id,
+                    "error": "Deployment not found. It may have been deleted.",
+                }
+
+            if deployment.status == DeploymentStatus.RUNNING.value:
+                return {
+                    "success": True,
+                    "deployment_id": deployment_id,
+                    "status": "running",
+                    "port": deployment.port,
+                    "endpoint": f"http://{deployment.worker.address.split(':')[0] if deployment.worker else 'localhost'}:{deployment.port}/v1",
+                    "wait_time_seconds": round(time.time() - start_time, 1),
+                }
+            elif deployment.status in [
+                DeploymentStatus.ERROR.value,
+                DeploymentStatus.STOPPED.value,
+            ]:
+                return {
+                    "success": False,
+                    "deployment_id": deployment_id,
+                    "status": deployment.status,
+                    "error": deployment.status_message or "Deployment failed",
+                    "action_required": "Call stop_deployment to clean up before trying again",
+                }
+
+            await asyncio.sleep(5)
+
+        return {
+            "success": False,
+            "deployment_id": deployment_id,
+            "error": f"Timeout after {timeout_seconds}s",
+            "action_required": (
+                f"1. Call get_deployment_logs({deployment_id}) to check what's happening\n"
+                f"2. If model is still loading, wait more with wait_for_deployment(timeout_seconds=300)\n"
+                f"3. If there's an error, call stop_deployment({deployment_id}) and try a different config"
+            ),
+        }
+
+    async def _tool_run_benchmark(
+        self,
+        deployment_id: int,
+        num_requests: int = 20,
+        concurrency: int = 4,
+        input_tokens: int = 128,
+        output_tokens: int = 64,
+    ) -> dict:
+        """Run actual benchmark against deployment"""
+        result = await self.db.execute(
+            select(Deployment)
+            .where(Deployment.id == deployment_id)
+            .options(
+                selectinload(Deployment.worker),
+                selectinload(Deployment.model),
+            )
+        )
+        deployment = result.scalar_one_or_none()
+
+        if not deployment:
+            return {"error": "Deployment not found"}
+
+        if deployment.status != DeploymentStatus.RUNNING.value:
+            return {"error": f"Deployment is not running (status: {deployment.status})"}
+
+        # Build endpoint URL
+        worker = deployment.worker
+        worker_ip = worker.address.split(":")[0]
+        base_url = f"http://{worker_ip}:{deployment.port}/v1"
+
+        # Get the model name for API calls
+        model_name = deployment.model.model_id if deployment.model else "default"
+
+        # Run benchmark
+        metrics = await _run_http_benchmark(
+            base_url=base_url,
+            model_name=model_name,
+            num_requests=num_requests,
+            concurrency=concurrency,
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+        )
+
+        return metrics
+
+    async def _tool_stop_deployment(self, deployment_id: int) -> dict:
+        """Stop and remove a deployment"""
+        from app.services.deployer import DeployerService
+
+        try:
+            # Get deployment
+            result = await self.db.execute(select(Deployment).where(Deployment.id == deployment_id))
+            deployment = result.scalar_one_or_none()
+
+            if not deployment:
+                return {"success": False, "error": "Deployment not found"}
+
+            # Stop container if running
+            if deployment.container_id:
+                deployer = DeployerService()
+                await deployer.stop(deployment_id)
+
+            # Delete deployment record
+            await self.db.delete(deployment)
+            await self.db.commit()
+
+            if deployment_id in self.created_deployments:
+                self.created_deployments.remove(deployment_id)
+
+            return {"success": True, "message": f"Deployment {deployment_id} stopped and removed"}
+        except Exception as e:
+            logger.exception(f"Failed to stop deployment: {e}")
+            return {"success": False, "error": str(e)}
+
+    async def _tool_check_deployment_status(self, deployment_id: int) -> dict:
+        """Check the current status of a deployment without waiting"""
+        try:
+            result = await self.db.execute(
+                select(Deployment)
+                .where(Deployment.id == deployment_id)
+                .options(selectinload(Deployment.worker))
+            )
+            deployment = result.scalar_one_or_none()
+
+            if not deployment:
+                return {"error": "Deployment not found"}
+
+            return {
+                "deployment_id": deployment_id,
+                "status": deployment.status,
+                "status_message": deployment.status_message,
+                "container_id": deployment.container_id,
+                "port": deployment.port,
+                "backend": deployment.backend,
+                "is_ready": deployment.status == DeploymentStatus.RUNNING.value,
+                "is_failed": deployment.status == DeploymentStatus.ERROR.value,
+                "is_loading": deployment.status == DeploymentStatus.STARTING.value,
+            }
+        except Exception as e:
+            logger.exception(f"Failed to check deployment status: {e}")
+            return {"error": str(e)}
+
+    async def _tool_test_deployment_endpoint(self, deployment_id: int) -> dict:
+        """Test if the deployment API endpoint is responding"""
+        try:
+            result = await self.db.execute(
+                select(Deployment)
+                .where(Deployment.id == deployment_id)
+                .options(selectinload(Deployment.worker))
+            )
+            deployment = result.scalar_one_or_none()
+
+            if not deployment:
+                return {"error": "Deployment not found"}
+
+            if not deployment.worker or not deployment.port:
+                return {
+                    "deployment_id": deployment_id,
+                    "ready": False,
+                    "error": "Deployment not fully initialized (no worker or port)",
+                }
+
+            # Build endpoint URL
+            worker = deployment.worker
+            worker_ip = worker.address.split(":")[0]
+            base_url = f"http://{worker_ip}:{deployment.port}/v1"
+
+            # Test the /v1/models endpoint
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                try:
+                    response = await client.get(f"{base_url}/models")
+                    if response.status_code == 200:
+                        data = response.json()
+                        models = data.get("data", [])
+                        if models:
+                            return {
+                                "deployment_id": deployment_id,
+                                "ready": True,
+                                "endpoint": base_url,
+                                "models": [m.get("id") for m in models],
+                                "message": "Deployment is ready! You can now run benchmarks.",
+                            }
+                        else:
+                            return {
+                                "deployment_id": deployment_id,
+                                "ready": False,
+                                "endpoint": base_url,
+                                "message": "API responding but no models loaded yet",
+                            }
+                    else:
+                        return {
+                            "deployment_id": deployment_id,
+                            "ready": False,
+                            "endpoint": base_url,
+                            "status_code": response.status_code,
+                            "message": f"API returned status {response.status_code}",
+                        }
+                except httpx.ConnectError:
+                    return {
+                        "deployment_id": deployment_id,
+                        "ready": False,
+                        "endpoint": base_url,
+                        "message": "Cannot connect to endpoint - container may still be starting",
+                    }
+                except httpx.ReadTimeout:
+                    return {
+                        "deployment_id": deployment_id,
+                        "ready": False,
+                        "endpoint": base_url,
+                        "message": "Connection timeout - model may still be loading",
+                    }
+        except Exception as e:
+            logger.exception(f"Failed to test deployment endpoint: {e}")
+            return {"error": str(e)}
+
+    async def _tool_get_deployment_logs(self, deployment_id: int, tail: int = 100) -> dict:
+        """Get Docker container logs for a deployment"""
+        from app.services.deployer import DeployerService
+
+        try:
+            # Get deployment with worker
+            result = await self.db.execute(
+                select(Deployment)
+                .where(Deployment.id == deployment_id)
+                .options(selectinload(Deployment.worker))
+            )
+            deployment = result.scalar_one_or_none()
+
+            if not deployment:
+                return {"error": "Deployment not found"}
+
+            if not deployment.container_id:
+                return {
+                    "deployment_id": deployment_id,
+                    "status": deployment.status,
+                    "error": "No container ID - deployment may not have started yet",
+                    "status_message": deployment.status_message,
+                }
+
+            # Use DeployerService to get logs (handles both local and remote)
+            deployer = DeployerService()
+            logs = await deployer.get_logs(deployment, tail=tail)
+
+            return {
+                "deployment_id": deployment_id,
+                "container_id": deployment.container_id,
+                "status": deployment.status,
+                "status_message": deployment.status_message,
+                "logs": logs,
+            }
+        except Exception as e:
+            logger.exception(f"Failed to get deployment logs: {e}")
+            return {"error": str(e)}
+
+    async def _tool_finish_tuning(
+        self, best_config: dict, reasoning: str, all_results: list | None = None
+    ) -> dict:
+        """Mark tuning as complete and save to knowledge base"""
+        # Update job status
+        self.job.status = TuningJobStatus.COMPLETED.value
+        self.job.status_message = "Auto-tuning completed successfully"
+        self.job.best_config = {**best_config, "reasoning": reasoning}
+        self.job.all_results = all_results or []
+        self.job.completed_at = datetime.now(UTC)
+
+        # Update progress to 100%
+        # Use the total_steps from current progress (set during agent loop) or default
+        current_total = self.job.progress.get("total_steps", 20) if self.job.progress else 20
+        self.job.current_step = current_total
+        self.job.total_steps = current_total
+        self.job.progress = {
+            "step": current_total,
+            "total_steps": current_total,
+            "step_name": "completed",
+            "step_description": "Tuning completed successfully",
+            "configs_tested": len(all_results) if all_results else 1,
+            "configs_total": len(all_results) if all_results else 1,
+        }
+
+        # Save results to knowledge base
+        saved_count = 0
+        if all_results:
+            # Get model and worker info for knowledge base
+            model = self.job.model
+            worker = self.job.worker
+            gpu_info = worker.gpu_info[0] if worker.gpu_info else {}
+            gpu_name = gpu_info.get("name", "Unknown GPU")
+
+            for result in all_results:
+                metrics = result.get("metrics", {})
+                if not metrics:
+                    continue
+
+                # Create knowledge record
+                knowledge = PerformanceKnowledge(
+                    gpu_model=gpu_name,
+                    gpu_count=len(result.get("gpu_indexes", [0])),
+                    total_vram_gb=sum(
+                        (
+                            g.get("memory_total", 0) / (1024**3)
+                            if g.get("memory_total", 0) > 1_000_000
+                            else g.get("memory_total", 0)
+                        )
+                        for g in (worker.gpu_info or [])
+                    ),
+                    model_name=model.name,
+                    model_family=_extract_model_family(model.name),
+                    engine=result.get("engine", best_config.get("engine", "vllm")),
+                    quantization=result.get("extra_params", {}).get("quantization"),
+                    tensor_parallel=len(result.get("gpu_indexes", [0])),
+                    extra_args=result.get("extra_params"),
+                    throughput_tps=metrics.get("throughput_tps", 0),
+                    ttft_ms=metrics.get("avg_ttft_ms", 0),
+                    tpot_ms=metrics.get("avg_tpot_ms", 0),
+                    input_length=128,  # Default test params
+                    output_length=64,
+                    concurrency=4,
+                    score=metrics.get("throughput_tps", 0),  # For throughput optimization
+                    source_tuning_job_id=self.job.id,
+                )
+                self.db.add(knowledge)
+                saved_count += 1
+
+        await self.db.commit()
+
+        return {
+            "success": True,
+            "message": f"Tuning completed. Saved {saved_count} result(s) to knowledge base.",
+            "best_config": best_config,
+            "reasoning": reasoning,
+        }
+
+    async def _tool_abort_tuning(self, reason: str) -> dict:
+        """Abort the tuning process"""
+        self.job.status = TuningJobStatus.FAILED.value
+        self.job.status_message = f"Aborted: {reason}"
+        self.job.completed_at = datetime.now(UTC)
+
+        # Update progress to show aborted state
+        self.job.progress = {
+            "step": self.job.current_step,
+            "total_steps": self.job.total_steps,
+            "step_name": "aborted",
+            "step_description": reason,
+        }
+
+        await self.db.commit()
+
+        return {"success": True, "message": "Tuning aborted", "reason": reason}
+
+    async def cleanup(self):
+        """Clean up any deployments created during tuning"""
+        for deployment_id in self.created_deployments:
+            try:
+                await self._tool_stop_deployment(deployment_id)
+            except Exception as e:
+                logger.warning(f"Failed to cleanup deployment {deployment_id}: {e}")
+
+
+# =============================================================================
+# Benchmark Implementation
+# =============================================================================
+
+
+async def _run_http_benchmark(
+    base_url: str,
+    model_name: str = "default",
+    num_requests: int = 20,
+    concurrency: int = 4,
+    input_tokens: int = 128,
+    output_tokens: int = 64,
+) -> dict:
+    """Run actual HTTP benchmark against an OpenAI-compatible endpoint"""
+
+    # Generate test prompt with approximate token count
+    test_prompt = "Write a detailed explanation about " + " ".join(
+        ["artificial intelligence"] * (input_tokens // 3)
+    )
+
+    results = []
+    errors = 0
+
+    semaphore = asyncio.Semaphore(concurrency)
+
+    async def make_request(client: httpx.AsyncClient) -> dict | None:
+        nonlocal errors
+        async with semaphore:
+            start_time = time.perf_counter()
+            first_token_time = None
+            token_times = []
+            total_tokens = 0
+
+            try:
+                async with client.stream(
+                    "POST",
+                    f"{base_url}/chat/completions",
+                    json={
+                        "model": model_name,
+                        "messages": [{"role": "user", "content": test_prompt}],
+                        "max_tokens": output_tokens,
+                        "stream": True,
+                    },
+                    timeout=60.0,
+                ) as response:
+                    if response.status_code != 200:
+                        errors += 1
+                        return None
+
+                    async for line in response.aiter_lines():
+                        if line.startswith("data: "):
+                            data = line[6:]
+                            if data == "[DONE]":
+                                break
+                            try:
+                                chunk = json.loads(data)
+                                content = (
+                                    chunk.get("choices", [{}])[0]
+                                    .get("delta", {})
+                                    .get("content", "")
+                                )
+                                if content:
+                                    current_time = time.perf_counter()
+                                    if first_token_time is None:
+                                        first_token_time = current_time
+                                    token_times.append(current_time)
+                                    total_tokens += 1
+                            except json.JSONDecodeError:
+                                pass
+
+                end_time = time.perf_counter()
+
+                if first_token_time and total_tokens > 0:
+                    ttft = (first_token_time - start_time) * 1000  # ms
+                    total_time = end_time - start_time
+
+                    # Calculate TPOT (time per output token) excluding TTFT
+                    if total_tokens > 1:
+                        generation_time = end_time - first_token_time
+                        tpot = (generation_time / (total_tokens - 1)) * 1000  # ms
+                    else:
+                        tpot = 0
+
+                    return {
+                        "ttft_ms": ttft,
+                        "tpot_ms": tpot,
+                        "total_tokens": total_tokens,
+                        "total_time_s": total_time,
+                    }
+            except Exception as e:
+                logger.warning(f"Benchmark request failed: {e}")
+                errors += 1
+                return None
+
+    async with httpx.AsyncClient() as client:
+        # Warm up with a few requests
+        logger.info("Warming up benchmark endpoint...")
+        for _ in range(min(2, num_requests)):
+            await make_request(client)
+
+        # Run actual benchmark
+        logger.info(f"Running {num_requests} benchmark requests with concurrency {concurrency}...")
+        tasks = [make_request(client) for _ in range(num_requests)]
+        results = await asyncio.gather(*tasks)
+
+    # Filter out failed requests
+    valid_results = [r for r in results if r is not None]
+
+    if not valid_results:
+        return {"success": False, "error": "All requests failed", "errors": errors}
+
+    # Calculate metrics
+    ttft_values = [r["ttft_ms"] for r in valid_results]
+    tpot_values = [r["tpot_ms"] for r in valid_results if r["tpot_ms"] > 0]
+    total_tokens = sum(r["total_tokens"] for r in valid_results)
+    total_time = sum(r["total_time_s"] for r in valid_results)
+
+    avg_ttft = sum(ttft_values) / len(ttft_values)
+    avg_tpot = sum(tpot_values) / len(tpot_values) if tpot_values else 0
+    throughput = total_tokens / total_time if total_time > 0 else 0
+
+    return {
+        "success": True,
+        "metrics": {
+            "throughput_tps": round(throughput, 2),
+            "avg_ttft_ms": round(avg_ttft, 2),
+            "avg_tpot_ms": round(avg_tpot, 2),
+            "p50_ttft_ms": round(sorted(ttft_values)[len(ttft_values) // 2], 2),
+            "p99_ttft_ms": (
+                round(sorted(ttft_values)[int(len(ttft_values) * 0.99)], 2)
+                if len(ttft_values) > 1
+                else round(ttft_values[0], 2)
+            ),
+        },
+        "summary": {
+            "total_requests": num_requests,
+            "successful_requests": len(valid_results),
+            "failed_requests": errors,
+            "total_tokens_generated": total_tokens,
+        },
+    }
+
+
+def _extract_model_family(model_name: str) -> str:
+    """Extract model family from name"""
+    name_lower = model_name.lower()
+    families = {
+        "qwen": "Qwen",
+        "llama": "Llama",
+        "mistral": "Mistral",
+        "deepseek": "DeepSeek",
+        "phi": "Phi",
+        "gemma": "Gemma",
+        "yi": "Yi",
+        "glm": "GLM",
+    }
+    for key, value in families.items():
+        if key in name_lower:
+            return value
+    return "Unknown"
+
+
+# =============================================================================
+# Main Agent Runner
+# =============================================================================
+
+
+async def run_tuning_agent(job_id: int, llm_config: dict | None = None):
+    """Run the Auto-Tuning Agent for a job
+
+    Args:
+        job_id: The tuning job ID
+        llm_config: Optional LLM configuration from chat panel:
+            - deployment_id: Use a local deployment
+            - base_url: Custom endpoint URL
+            - api_key: API key for the endpoint
+            - model: Model name
+    """
+    settings = get_settings()
+
+    async with async_session_maker() as db:
+        # Load job with relationships
+        result = await db.execute(
+            select(TuningJob)
+            .where(TuningJob.id == job_id)
+            .options(
+                selectinload(TuningJob.model),
+                selectinload(TuningJob.worker),
+            )
+        )
+        job = result.scalar_one_or_none()
+
+        if not job:
+            logger.error(f"Tuning job {job_id} not found")
+            return
+
+        # Initialize tool executor
+        executor = AgentToolExecutor(db, job)
+
+        try:
+            # Determine LLM configuration (priority: llm_config > settings > auto-detect)
+            api_key = None
+            base_url = None
+            model_name = "gpt-4o"
+
+            if llm_config:
+                # Use config from chat panel
+                if llm_config.get("deployment_id"):
+                    # Use specified local deployment
+                    from app.models.deployment import Deployment, DeploymentStatus
+
+                    deploy_result = await db.execute(
+                        select(Deployment)
+                        .where(Deployment.id == llm_config["deployment_id"])
+                        .options(selectinload(Deployment.worker), selectinload(Deployment.model))
+                    )
+                    deployment = deploy_result.scalar_one_or_none()
+
+                    if deployment and deployment.worker:
+                        worker_ip = deployment.worker.address.split(":")[0]
+                        base_url = f"http://{worker_ip}:{deployment.port}/v1"
+                        api_key = "dummy"
+                        model_name = deployment.model.model_id if deployment.model else model_name
+                        logger.info(
+                            f"Using specified deployment as agent LLM: {base_url} ({model_name})"
+                        )
+                    else:
+                        job.status = TuningJobStatus.FAILED.value
+                        job.status_message = (
+                            f"Deployment {llm_config['deployment_id']} not found or not running"
+                        )
+                        await db.commit()
+                        return
+                elif llm_config.get("base_url"):
+                    # Use custom endpoint
+                    base_url = llm_config["base_url"]
+                    api_key = llm_config.get("api_key") or "dummy"
+                    model_name = llm_config.get("model") or model_name
+                    logger.info(f"Using custom endpoint as agent LLM: {base_url} ({model_name})")
+
+            # Fall back to settings if no llm_config
+            if not api_key:
+                api_key = settings.openai_api_key
+                base_url = settings.openai_base_url
+                model_name = settings.openai_model or model_name
+
+            # If still no API key, try to find any running deployment
+            if not api_key:
+                from app.models.deployment import Deployment, DeploymentStatus
+
+                deploy_result = await db.execute(
+                    select(Deployment)
+                    .where(Deployment.status == DeploymentStatus.RUNNING.value)
+                    .options(selectinload(Deployment.worker), selectinload(Deployment.model))
+                    .limit(1)
+                )
+                local_deployment = deploy_result.scalar_one_or_none()
+
+                if local_deployment and local_deployment.worker:
+                    worker_ip = local_deployment.worker.address.split(":")[0]
+                    base_url = f"http://{worker_ip}:{local_deployment.port}/v1"
+                    api_key = "dummy"
+                    model_name = (
+                        local_deployment.model.model_id if local_deployment.model else model_name
+                    )
+                    logger.info(
+                        f"Auto-detected local deployment as agent LLM: {base_url} ({model_name})"
+                    )
+                else:
+                    job.status = TuningJobStatus.FAILED.value
+                    job.status_message = (
+                        "No LLM configured for Auto-Tuning Agent. "
+                        "Please select a model in the chat panel, or deploy a model first."
+                    )
+                    await db.commit()
+                    return
+
+            # Initialize OpenAI client (supports OpenAI-compatible endpoints)
+            client = AsyncOpenAI(api_key=api_key, base_url=base_url or "https://api.openai.com/v1")
+
+            # Build initial user message
+            user_message = f"""Help me find the best deployment configuration for {job.model.name} on {job.worker.name}. I want to optimize for {job.optimization_target}.
+
+Model ID: {job.model_id}, Worker ID: {job.worker_id}"""
+
+            messages = [
+                {"role": "system", "content": AGENT_SYSTEM_PROMPT},
+                {"role": "user", "content": user_message},
+            ]
+
+            # Initialize conversation log for UI display
+            conversation_log = [
+                {
+                    "role": "user",
+                    "content": user_message,
+                    "timestamp": datetime.now(UTC).isoformat(),
+                }
+            ]
+
+            # Helper to save conversation log
+            async def save_log():
+                job.conversation_log = conversation_log
+                await db.commit()
+
+            # Update job status
+            job.status = TuningJobStatus.ANALYZING.value
+            job.status_message = "Agent is analyzing the environment..."
+            job.conversation_log = conversation_log
+            await db.commit()
+
+            # Agent loop
+            max_iterations = 20
+            iteration = 0
+
+            while iteration < max_iterations:
+                iteration += 1
+
+                # Check if cancelled
+                await db.refresh(job)
+                if job.status == TuningJobStatus.CANCELLED.value:
+                    logger.info(f"Job {job_id} was cancelled")
+                    await executor.cleanup()
+                    return
+
+                # Call LLM
+                logger.info(f"Agent iteration {iteration}, calling LLM with model: {model_name}...")
+
+                response = await client.chat.completions.create(
+                    model=model_name,
+                    messages=messages,
+                    tools=get_agent_tools(),
+                    tool_choice="auto",
+                    max_tokens=4096,
+                )
+
+                assistant_message = response.choices[0].message
+                messages.append(assistant_message.model_dump(exclude_none=True))
+
+                # Add assistant message to conversation log
+                log_entry = {
+                    "role": "assistant",
+                    "content": assistant_message.content or "",
+                    "timestamp": datetime.now(UTC).isoformat(),
+                }
+                if assistant_message.tool_calls:
+                    log_entry["tool_calls"] = [
+                        {
+                            "id": tc.id,
+                            "name": tc.function.name,
+                            "arguments": tc.function.arguments,
+                        }
+                        for tc in assistant_message.tool_calls
+                    ]
+                conversation_log.append(log_entry)
+                await save_log()
+
+                # Check if no tool calls - prompt the agent to take action
+                if not assistant_message.tool_calls:
+                    logger.warning(f"Agent responded without tool calls at iteration {iteration}")
+                    # Add a user message to prompt the agent to take action
+                    prompt_message = (
+                        "You need to call a tool to proceed. Available actions:\n"
+                        "1. list_deployments - Find existing deployments on the worker\n"
+                        "2. stop_deployment - Stop a deployment to free GPU memory\n"
+                        "3. deploy_model - Deploy a model with specific config\n"
+                        "4. test_deployment_endpoint - Check if deployment is ready\n"
+                        "5. get_deployment_logs - Check container logs\n"
+                        "6. run_benchmark - Run performance benchmark\n"
+                        "7. finish_tuning - Complete with recommendation\n"
+                        "8. abort_tuning - Abort if cannot proceed\n"
+                        "Do not respond with just text - you must call a tool."
+                    )
+                    messages.append({"role": "user", "content": prompt_message})
+                    conversation_log.append(
+                        {
+                            "role": "user",
+                            "content": prompt_message,
+                            "timestamp": datetime.now(UTC).isoformat(),
+                        }
+                    )
+                    await save_log()
+                    continue  # Continue the loop to get tool calls
+
+                # Execute tool calls
+                for tool_call in assistant_message.tool_calls:
+                    tool_name = tool_call.function.name
+                    tool_args = json.loads(tool_call.function.arguments)
+
+                    logger.info(f"Executing tool: {tool_name}({tool_args})")
+
+                    # Update job progress
+                    job.status_message = f"Executing: {tool_name}"
+                    job.progress = {
+                        "step": iteration,
+                        "total_steps": max_iterations,
+                        "step_name": tool_name,
+                        "step_description": f"Executing {tool_name} with args: {tool_args}",
+                        "configs_tested": 0,
+                        "configs_total": 0,
+                    }
+                    await db.commit()
+
+                    # Execute tool
+                    result = await executor.execute(tool_name, tool_args)
+
+                    # Add tool result to conversation log
+                    conversation_log.append(
+                        {
+                            "role": "tool",
+                            "tool_call_id": tool_call.id,
+                            "name": tool_name,
+                            "content": result,
+                            "timestamp": datetime.now(UTC).isoformat(),
+                        }
+                    )
+                    await save_log()
+
+                    # Check if this was a termination tool
+                    if tool_name == "finish_tuning":
+                        logger.info(f"Agent completed tuning for job {job_id}")
+                        return
+                    if tool_name == "abort_tuning":
+                        logger.info(f"Agent aborted tuning for job {job_id}")
+                        return
+
+                    # Add tool result to messages
+                    messages.append(
+                        {"role": "tool", "tool_call_id": tool_call.id, "content": result}
+                    )
+
+            # If we reached max iterations without finishing
+            job.status = TuningJobStatus.FAILED.value
+            job.status_message = "Agent reached maximum iterations without completing"
+            await db.commit()
+
+        except Exception as e:
+            logger.exception(f"Agent error for job {job_id}: {e}")
+            job.status = TuningJobStatus.FAILED.value
+            job.status_message = f"Agent error: {str(e)}"
+            await db.commit()
+
+        finally:
+            # Cleanup any test deployments
+            await executor.cleanup()
diff --git a/backend/migrations/009_add_tuning.py b/backend/migrations/009_add_tuning.py
new file mode 100644
index 0000000..9bec7f4
--- /dev/null
+++ b/backend/migrations/009_add_tuning.py
@@ -0,0 +1,173 @@
+"""
+Migration: Add auto-tuning tables
+
+This migration creates tables for auto-tuning, benchmarks, and performance knowledge base.
+
+Run with: python -m migrations.009_add_tuning
+"""
+
+import asyncio
+import sys
+from pathlib import Path
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from sqlalchemy import text
+from sqlalchemy.ext.asyncio import create_async_engine
+
+from app.config import get_settings
+
+
+async def table_exists(conn, table_name: str) -> bool:
+    """Check if a table exists (SQLite compatible)"""
+    result = await conn.execute(
+        text(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}'")
+    )
+    return result.fetchone() is not None
+
+
+async def migrate():
+    settings = get_settings()
+    engine = create_async_engine(settings.database_url, echo=True)
+
+    async with engine.begin() as conn:
+        # Create tuning_jobs table
+        if not await table_exists(conn, "tuning_jobs"):
+            print("Creating 'tuning_jobs' table...")
+            await conn.execute(
+                text(
+                    """
+                CREATE TABLE tuning_jobs (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    model_id INTEGER NOT NULL,
+                    worker_id INTEGER NOT NULL,
+                    optimization_target VARCHAR(50) DEFAULT 'balanced',
+                    status VARCHAR(50) DEFAULT 'pending',
+                    status_message TEXT,
+                    current_step INTEGER DEFAULT 0,
+                    total_steps INTEGER DEFAULT 5,
+                    progress JSON,
+                    best_config JSON,
+                    all_results JSON,
+                    created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    completed_at DATETIME,
+                    FOREIGN KEY (model_id) REFERENCES llm_models(id),
+                    FOREIGN KEY (worker_id) REFERENCES workers(id)
+                )
+            """
+                )
+            )
+            print("'tuning_jobs' table created successfully!")
+        else:
+            print("'tuning_jobs' table already exists")
+
+        # Create benchmark_results table
+        if not await table_exists(conn, "benchmark_results"):
+            print("Creating 'benchmark_results' table...")
+            await conn.execute(
+                text(
+                    """
+                CREATE TABLE benchmark_results (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    tuning_job_id INTEGER,
+                    deployment_id INTEGER NOT NULL,
+                    config JSON NOT NULL,
+                    test_type VARCHAR(50) DEFAULT 'throughput',
+                    test_duration_seconds INTEGER DEFAULT 60,
+                    input_length INTEGER DEFAULT 512,
+                    output_length INTEGER DEFAULT 128,
+                    concurrency INTEGER DEFAULT 1,
+                    throughput_tps REAL,
+                    ttft_ms REAL,
+                    tpot_ms REAL,
+                    total_latency_ms REAL,
+                    gpu_utilization REAL,
+                    vram_usage_gb REAL,
+                    raw_results JSON,
+                    error_message TEXT,
+                    created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    FOREIGN KEY (tuning_job_id) REFERENCES tuning_jobs(id),
+                    FOREIGN KEY (deployment_id) REFERENCES deployments(id)
+                )
+            """
+                )
+            )
+            print("'benchmark_results' table created successfully!")
+        else:
+            print("'benchmark_results' table already exists")
+
+        # Create performance_knowledge table
+        if not await table_exists(conn, "performance_knowledge"):
+            print("Creating 'performance_knowledge' table...")
+            await conn.execute(
+                text(
+                    """
+                CREATE TABLE performance_knowledge (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    gpu_model VARCHAR(255) NOT NULL,
+                    gpu_count INTEGER DEFAULT 1,
+                    total_vram_gb REAL NOT NULL,
+                    model_name VARCHAR(255) NOT NULL,
+                    model_family VARCHAR(100) NOT NULL,
+                    model_params_b REAL,
+                    engine VARCHAR(50) NOT NULL,
+                    quantization VARCHAR(50),
+                    tensor_parallel INTEGER DEFAULT 1,
+                    extra_args JSON,
+                    throughput_tps REAL NOT NULL,
+                    ttft_ms REAL NOT NULL,
+                    tpot_ms REAL NOT NULL,
+                    gpu_utilization REAL,
+                    vram_usage_gb REAL,
+                    test_dataset VARCHAR(100) DEFAULT 'synthetic',
+                    input_length INTEGER DEFAULT 512,
+                    output_length INTEGER DEFAULT 128,
+                    concurrency INTEGER DEFAULT 1,
+                    score REAL,
+                    source_tuning_job_id INTEGER,
+                    created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    FOREIGN KEY (source_tuning_job_id) REFERENCES tuning_jobs(id)
+                )
+            """
+                )
+            )
+            print("'performance_knowledge' table created successfully!")
+        else:
+            print("'performance_knowledge' table already exists")
+
+        # Create indexes for performance knowledge queries
+        print("Creating indexes...")
+        try:
+            await conn.execute(
+                text(
+                    "CREATE INDEX IF NOT EXISTS idx_knowledge_model_family ON performance_knowledge(model_family)"
+                )
+            )
+            await conn.execute(
+                text(
+                    "CREATE INDEX IF NOT EXISTS idx_knowledge_gpu_model ON performance_knowledge(gpu_model)"
+                )
+            )
+            await conn.execute(
+                text(
+                    "CREATE INDEX IF NOT EXISTS idx_knowledge_engine ON performance_knowledge(engine)"
+                )
+            )
+            await conn.execute(
+                text("CREATE INDEX IF NOT EXISTS idx_tuning_jobs_status ON tuning_jobs(status)")
+            )
+            print("Indexes created successfully!")
+        except Exception as e:
+            print(f"Note: Some indexes may already exist: {e}")
+
+        print("\n" + "=" * 50)
+        print("Migration completed successfully!")
+        print("=" * 50)
+
+    await engine.dispose()
+
+
+if __name__ == "__main__":
+    asyncio.run(migrate())
diff --git a/backend/migrations/010_add_conversation_log.py b/backend/migrations/010_add_conversation_log.py
new file mode 100644
index 0000000..ffc36f0
--- /dev/null
+++ b/backend/migrations/010_add_conversation_log.py
@@ -0,0 +1,50 @@
+"""
+Migration: Add conversation_log column to tuning_jobs
+
+This migration adds a JSON column to store the agent's conversation history.
+
+Run with: python -m migrations.010_add_conversation_log
+"""
+
+import asyncio
+import sys
+from pathlib import Path
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from sqlalchemy import text
+from sqlalchemy.ext.asyncio import create_async_engine
+
+from app.config import get_settings
+
+
+async def column_exists(conn, table_name: str, column_name: str) -> bool:
+    """Check if a column exists (SQLite compatible)"""
+    result = await conn.execute(text(f"PRAGMA table_info({table_name})"))
+    columns = result.fetchall()
+    return any(col[1] == column_name for col in columns)
+
+
+async def migrate():
+    settings = get_settings()
+    engine = create_async_engine(settings.database_url, echo=True)
+
+    async with engine.begin() as conn:
+        # Add conversation_log column to tuning_jobs
+        if not await column_exists(conn, "tuning_jobs", "conversation_log"):
+            print("Adding 'conversation_log' column to 'tuning_jobs' table...")
+            await conn.execute(text("ALTER TABLE tuning_jobs ADD COLUMN conversation_log JSON"))
+            print("'conversation_log' column added successfully!")
+        else:
+            print("'conversation_log' column already exists")
+
+        print("\n" + "=" * 50)
+        print("Migration completed successfully!")
+        print("=" * 50)
+
+    await engine.dispose()
+
+
+if __name__ == "__main__":
+    asyncio.run(migrate())
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index 85adeb4..6bc12a8 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -12,7 +12,7 @@ import {
   useLocation,
   useNavigate,
 } from "react-router-dom";
-import { Layout, ConfigProvider, theme } from "antd";
+import { Layout, ConfigProvider, theme, Button, Tooltip } from "antd";
 import {
   DashboardOutlined,
   CloudServerOutlined,
@@ -28,11 +28,19 @@ import {
   RocketOutlined,
   GlobalOutlined,
   HddOutlined,
+  CommentOutlined,
+  ThunderboltOutlined,
 } from "@ant-design/icons";
 
 import { AuthProvider, useAuth } from "./contexts/AuthContext";
 import { useAppTheme, useResponsive } from "./hooks";
 import { Header, Sidebar, MobileSidebar } from "./components/layout";
+import {
+  ChatPanel,
+  CHAT_PANEL_STORAGE_KEY,
+  DEFAULT_PANEL_WIDTH,
+  TUNING_JOB_EVENT_KEY,
+} from "./components/chat-panel";
 import Loading from "./components/Loading";
 
 // Page Components
@@ -51,6 +59,7 @@ import Settings from "./pages/Settings";
 import Headscale from "./pages/Headscale";
 import Login from "./pages/Login";
 import Setup from "./pages/Setup";
+import AutoTuning from "./pages/AutoTuning";
 
 const { Content } = Layout;
 
@@ -165,6 +174,11 @@ function getMenuItems(isAdmin: boolean) {
       label: "Deploy Model",
     },
     { key: "/deploy-apps", icon: <RocketOutlined />, label: "Deploy Apps" },
+    {
+      key: "/auto-tuning",
+      icon: <ThunderboltOutlined />,
+      label: "Auto-Tuning",
+    },
     {
       key: "/api-keys",
       icon: <ApiOutlined />,
@@ -224,6 +238,25 @@ function getCurrentPageTitle(menuItems: any[], pathname: string) {
 // Main Layout
 // ============================================================================
 
+/**
+ * Load chat panel state from localStorage
+ */
+function loadChatPanelState(): { isOpen: boolean; width: number } {
+  try {
+    const saved = localStorage.getItem(CHAT_PANEL_STORAGE_KEY);
+    if (saved) {
+      const state = JSON.parse(saved);
+      return {
+        isOpen: state.isOpen ?? false,
+        width: state.width ?? DEFAULT_PANEL_WIDTH,
+      };
+    }
+  } catch {
+    // Ignore
+  }
+  return { isOpen: false, width: DEFAULT_PANEL_WIDTH };
+}
+
 function AppLayout() {
   const navigate = useNavigate();
   const location = useLocation();
@@ -233,6 +266,65 @@ function AppLayout() {
 
   const [sidebarCollapsed, setSidebarCollapsed] = useState(false);
   const [mobileDrawerOpen, setMobileDrawerOpen] = useState(false);
+  const [chatPanelOpen, setChatPanelOpen] = useState(
+    () => loadChatPanelState().isOpen,
+  );
+  const [chatPanelWidth, setChatPanelWidth] = useState(
+    () => loadChatPanelState().width,
+  );
+
+  // Persist chat panel state
+  useEffect(() => {
+    try {
+      const current = localStorage.getItem(CHAT_PANEL_STORAGE_KEY);
+      const state = current ? JSON.parse(current) : {};
+      localStorage.setItem(
+        CHAT_PANEL_STORAGE_KEY,
+        JSON.stringify({ ...state, isOpen: chatPanelOpen }),
+      );
+    } catch {
+      // Ignore
+    }
+  }, [chatPanelOpen]);
+
+  // Listen for tuning job events to auto-open chat panel
+  useEffect(() => {
+    const handleStorageChange = (e: StorageEvent) => {
+      if (e.key === TUNING_JOB_EVENT_KEY && e.newValue) {
+        try {
+          const data = JSON.parse(e.newValue);
+          if (data.jobId) {
+            setChatPanelOpen(true);
+          }
+        } catch {
+          // Ignore
+        }
+      }
+    };
+
+    // Check on mount if there's a pending tuning job
+    const checkInitial = () => {
+      const stored = localStorage.getItem(TUNING_JOB_EVENT_KEY);
+      if (stored) {
+        try {
+          const data = JSON.parse(stored);
+          if (
+            data.jobId &&
+            data.timestamp &&
+            Date.now() - data.timestamp < 5000
+          ) {
+            setChatPanelOpen(true);
+          }
+        } catch {
+          // Ignore
+        }
+      }
+    };
+
+    checkInitial();
+    window.addEventListener("storage", handleStorageChange);
+    return () => window.removeEventListener("storage", handleStorageChange);
+  }, []);
 
   useEffect(() => {
     document.body.setAttribute("data-theme", isDark ? "dark" : "light");
@@ -403,8 +495,9 @@ function AppLayout() {
         <Layout
           style={{
             marginLeft: isMobile ? 0 : sidebarWidth,
+            marginRight: chatPanelOpen && !isMobile ? chatPanelWidth : 0,
             background: colors.bg,
-            transition: "margin-left 0.2s ease",
+            transition: "margin-left 0.2s ease, margin-right 0.2s ease",
           }}
         >
           <Header
@@ -435,6 +528,7 @@ function AppLayout() {
               <Route path="/models" element={<Models />} />
               <Route path="/deployments" element={<Deployments />} />
               <Route path="/deploy-apps" element={<DeployApps />} />
+              <Route path="/auto-tuning" element={<AutoTuning />} />
               <Route path="/api-keys" element={<ApiKeys />} />
               <Route
                 path="/users"
@@ -463,6 +557,39 @@ function AppLayout() {
             </Routes>
           </Content>
         </Layout>
+
+        {/* Floating chat button */}
+        {!chatPanelOpen && (
+          <Tooltip title="Open AI Chat" placement="left">
+            <Button
+              type="primary"
+              shape="circle"
+              size="large"
+              icon={<CommentOutlined style={{ fontSize: 20 }} />}
+              onClick={() => setChatPanelOpen(true)}
+              style={{
+                position: "fixed",
+                bottom: 24,
+                right: 24,
+                width: 56,
+                height: 56,
+                zIndex: 998,
+                boxShadow: isDark
+                  ? "0 4px 16px rgba(0, 0, 0, 0.4)"
+                  : "0 4px 16px rgba(0, 0, 0, 0.15)",
+              }}
+            />
+          </Tooltip>
+        )}
+
+        {/* Chat panel */}
+        <ChatPanel
+          isOpen={chatPanelOpen}
+          onClose={() => setChatPanelOpen(false)}
+          onWidthChange={setChatPanelWidth}
+          isDark={isDark}
+          colors={colors}
+        />
       </Layout>
     </ConfigProvider>
   );
diff --git a/frontend/src/components/chat-panel/ChatPanel.tsx b/frontend/src/components/chat-panel/ChatPanel.tsx
new file mode 100644
index 0000000..b0ffa52
--- /dev/null
+++ b/frontend/src/components/chat-panel/ChatPanel.tsx
@@ -0,0 +1,954 @@
+/**
+ * Chat Panel Component
+ *
+ * A slide-out chat panel that can be used from any page.
+ * Similar to Cursor's AI chat panel on the right side.
+ */
+import { useState, useEffect, useRef, useCallback } from "react";
+import { Button, Tooltip, Collapse } from "antd";
+import {
+  CloseOutlined,
+  ClearOutlined,
+  DownOutlined,
+  RobotOutlined,
+  UserOutlined,
+  BulbOutlined,
+  CheckCircleFilled,
+  ToolOutlined,
+  LoadingOutlined,
+} from "@ant-design/icons";
+import {
+  ChatInput,
+  MessageContent,
+  getThemeColors,
+  getChatStyles,
+} from "../chat";
+import type { ThemeColors, ChatMessage } from "../chat";
+import { ModelSelector } from "./ModelSelector";
+import { useChat } from "./useChat";
+import { ToolConfirmModal } from "./ToolConfirmModal";
+import { TuningJobView } from "./TuningJobView";
+import type { ChatModelConfig, CustomEndpoint, ChatPanelState } from "./types";
+import {
+  DEFAULT_PANEL_WIDTH,
+  MIN_PANEL_WIDTH,
+  MAX_PANEL_WIDTH,
+  CHAT_PANEL_STORAGE_KEY,
+} from "./types";
+import type { AppColors } from "../../hooks/useTheme";
+
+// Event key for tuning job notifications
+export const TUNING_JOB_EVENT_KEY = "lmstack-active-tuning-job";
+
+interface ChatPanelProps {
+  isOpen: boolean;
+  onClose: () => void;
+  onWidthChange?: (width: number) => void;
+  isDark: boolean;
+  colors: AppColors;
+}
+
+/**
+ * Load panel state from localStorage
+ */
+function loadPanelState(): Partial<ChatPanelState> {
+  try {
+    const saved = localStorage.getItem(CHAT_PANEL_STORAGE_KEY);
+    if (saved) {
+      return JSON.parse(saved);
+    }
+  } catch {
+    // Ignore parse errors
+  }
+  return {};
+}
+
+/**
+ * Save panel state to localStorage
+ */
+function savePanelState(state: Partial<ChatPanelState>) {
+  try {
+    const current = loadPanelState();
+    localStorage.setItem(
+      CHAT_PANEL_STORAGE_KEY,
+      JSON.stringify({ ...current, ...state }),
+    );
+  } catch {
+    // Ignore save errors
+  }
+}
+
+/**
+ * Global chat panel component
+ */
+export function ChatPanel({
+  isOpen,
+  onClose,
+  onWidthChange,
+  isDark,
+  colors,
+}: ChatPanelProps) {
+  const chatColors = getThemeColors(isDark);
+
+  // Panel state
+  const [width, setWidth] = useState(
+    () => loadPanelState().width || DEFAULT_PANEL_WIDTH,
+  );
+  const [selectedModel, setSelectedModel] = useState<ChatModelConfig | null>(
+    () => loadPanelState().selectedModel || null,
+  );
+  const [customEndpoints, setCustomEndpoints] = useState<CustomEndpoint[]>(
+    () => loadPanelState().customEndpoints || [],
+  );
+
+  // Chat state
+  const [inputValue, setInputValue] = useState("");
+  const {
+    messages,
+    isStreaming,
+    isExecutingTool,
+    currentToolName,
+    pendingTools,
+    showConfirmModal,
+    systemContext,
+    refreshContext,
+    sendMessage,
+    stopStreaming,
+    clearMessages,
+    confirmToolExecution,
+    cancelToolExecution,
+  } = useChat();
+
+  // Active tuning job (shows TuningJobView instead of chat)
+  const [activeTuningJobId, setActiveTuningJobId] = useState<number | null>(
+    null,
+  );
+
+  // Listen for tuning job events
+  useEffect(() => {
+    const handleStorageChange = (e: StorageEvent) => {
+      if (e.key === TUNING_JOB_EVENT_KEY && e.newValue) {
+        try {
+          const data = JSON.parse(e.newValue);
+          if (data.jobId) {
+            setActiveTuningJobId(data.jobId);
+          }
+        } catch {
+          // Ignore
+        }
+      }
+    };
+
+    // Also check on mount
+    const checkInitial = () => {
+      const stored = localStorage.getItem(TUNING_JOB_EVENT_KEY);
+      if (stored) {
+        try {
+          const data = JSON.parse(stored);
+          if (
+            data.jobId &&
+            data.timestamp &&
+            Date.now() - data.timestamp < 5000
+          ) {
+            setActiveTuningJobId(data.jobId);
+            // Clear it after reading
+            localStorage.removeItem(TUNING_JOB_EVENT_KEY);
+          }
+        } catch {
+          // Ignore
+        }
+      }
+    };
+
+    checkInitial();
+    window.addEventListener("storage", handleStorageChange);
+    return () => window.removeEventListener("storage", handleStorageChange);
+  }, []);
+
+  // Refs
+  const messagesContainerRef = useRef<HTMLDivElement>(null);
+  const messagesEndRef = useRef<HTMLDivElement>(null);
+  const resizeHandleRef = useRef<HTMLDivElement>(null);
+  const [isResizing, setIsResizing] = useState(false);
+  const [showScrollButton, setShowScrollButton] = useState(false);
+  const userScrolledUpRef = useRef(false);
+
+  // Persist state changes
+  useEffect(() => {
+    savePanelState({ selectedModel });
+  }, [selectedModel]);
+
+  useEffect(() => {
+    savePanelState({ customEndpoints });
+  }, [customEndpoints]);
+
+  useEffect(() => {
+    savePanelState({ width });
+    onWidthChange?.(width);
+  }, [width, onWidthChange]);
+
+  // Handle resize
+  useEffect(() => {
+    if (!isResizing) return;
+
+    const handleMouseMove = (e: MouseEvent) => {
+      const newWidth = window.innerWidth - e.clientX;
+      setWidth(Math.max(MIN_PANEL_WIDTH, Math.min(MAX_PANEL_WIDTH, newWidth)));
+    };
+
+    const handleMouseUp = () => {
+      setIsResizing(false);
+    };
+
+    document.addEventListener("mousemove", handleMouseMove);
+    document.addEventListener("mouseup", handleMouseUp);
+
+    return () => {
+      document.removeEventListener("mousemove", handleMouseMove);
+      document.removeEventListener("mouseup", handleMouseUp);
+    };
+  }, [isResizing]);
+
+  // Scroll handling
+  const handleScroll = useCallback(() => {
+    const container = messagesContainerRef.current;
+    if (!container) return;
+
+    const { scrollTop, scrollHeight, clientHeight } = container;
+    const isNearBottom = scrollHeight - scrollTop - clientHeight < 100;
+
+    setShowScrollButton(!isNearBottom);
+    if (isNearBottom) {
+      userScrolledUpRef.current = false;
+    } else if (isStreaming) {
+      userScrolledUpRef.current = true;
+    }
+  }, [isStreaming]);
+
+  const scrollToBottom = useCallback(() => {
+    const container = messagesContainerRef.current;
+    if (container) {
+      container.scrollTop = container.scrollHeight;
+    }
+    userScrolledUpRef.current = false;
+    setShowScrollButton(false);
+  }, []);
+
+  // Auto-scroll during streaming
+  useEffect(() => {
+    if (!isStreaming) return;
+
+    if (!userScrolledUpRef.current) {
+      scrollToBottom();
+    }
+
+    const interval = setInterval(() => {
+      if (!userScrolledUpRef.current) {
+        const container = messagesContainerRef.current;
+        if (container) {
+          container.scrollTop = container.scrollHeight;
+        }
+      }
+    }, 50);
+
+    return () => clearInterval(interval);
+  }, [isStreaming, scrollToBottom]);
+
+  // Scroll on new messages
+  useEffect(() => {
+    if (!isStreaming && messages.length > 0) {
+      scrollToBottom();
+    }
+  }, [messages.length, isStreaming, scrollToBottom]);
+
+  // Send message handler
+  const handleSend = useCallback(() => {
+    if (!inputValue.trim() || !selectedModel) return;
+    sendMessage(inputValue, selectedModel);
+    setInputValue("");
+  }, [inputValue, selectedModel, sendMessage]);
+
+  // Handle clear
+  const handleClear = useCallback(() => {
+    clearMessages();
+  }, [clearMessages]);
+
+  if (!isOpen) return null;
+
+  return (
+    <>
+      {/* Dynamic styles for markdown */}
+      <style>{getChatStyles({ isDark, colors: chatColors })}</style>
+
+      {/* Backdrop for mobile */}
+      <div
+        onClick={onClose}
+        style={{
+          position: "fixed",
+          inset: 0,
+          background: "rgba(0, 0, 0, 0.3)",
+          zIndex: 999,
+          display: "none", // Enable for mobile
+        }}
+      />
+
+      {/* Panel */}
+      <div
+        style={{
+          position: "fixed",
+          top: 0,
+          right: 0,
+          bottom: 0,
+          width,
+          background: isDark ? "#0d0d0d" : "#ffffff",
+          borderLeft: `1px solid ${colors.border}`,
+          zIndex: 1000,
+          display: "flex",
+          flexDirection: "column",
+          boxShadow: isDark
+            ? "-4px 0 24px rgba(0, 0, 0, 0.4)"
+            : "-4px 0 24px rgba(0, 0, 0, 0.1)",
+        }}
+      >
+        {/* Resize handle */}
+        <div
+          ref={resizeHandleRef}
+          onMouseDown={() => setIsResizing(true)}
+          style={{
+            position: "absolute",
+            left: 0,
+            top: 0,
+            bottom: 0,
+            width: 4,
+            cursor: "ew-resize",
+            background: isResizing ? colors.accent : "transparent",
+            transition: "background 0.15s",
+          }}
+          onMouseEnter={(e) => {
+            if (!isResizing) {
+              e.currentTarget.style.background = colors.border;
+            }
+          }}
+          onMouseLeave={(e) => {
+            if (!isResizing) {
+              e.currentTarget.style.background = "transparent";
+            }
+          }}
+        />
+
+        {/* Header - different for tuning view vs chat */}
+        {activeTuningJobId ? (
+          <div
+            style={{
+              display: "flex",
+              alignItems: "center",
+              justifyContent: "space-between",
+              padding: "12px 16px",
+              borderBottom: `1px solid ${colors.border}`,
+              flexShrink: 0,
+            }}
+          >
+            <div style={{ display: "flex", alignItems: "center", gap: 8 }}>
+              <span style={{ fontWeight: 500, color: chatColors.text }}>
+                Auto-Tuning Agent
+              </span>
+            </div>
+            <div style={{ display: "flex", alignItems: "center", gap: 4 }}>
+              <Tooltip title="Back to Chat">
+                <Button
+                  type="text"
+                  size="small"
+                  onClick={() => setActiveTuningJobId(null)}
+                  style={{ color: colors.textMuted, fontSize: 12 }}
+                >
+                  Back to Chat
+                </Button>
+              </Tooltip>
+              <Tooltip title="Close">
+                <Button
+                  type="text"
+                  size="small"
+                  icon={<CloseOutlined />}
+                  onClick={onClose}
+                  style={{ color: colors.textMuted }}
+                />
+              </Tooltip>
+            </div>
+          </div>
+        ) : (
+          <div
+            style={{
+              display: "flex",
+              alignItems: "center",
+              justifyContent: "space-between",
+              padding: "12px 16px",
+              borderBottom: `1px solid ${colors.border}`,
+              flexShrink: 0,
+            }}
+          >
+            <ModelSelector
+              value={selectedModel}
+              onChange={setSelectedModel}
+              customEndpoints={customEndpoints}
+              onCustomEndpointsChange={setCustomEndpoints}
+              isDark={isDark}
+              colors={colors}
+              compact
+            />
+
+            <div style={{ display: "flex", alignItems: "center", gap: 4 }}>
+              {/* System status indicator */}
+              {systemContext && (
+                <Tooltip
+                  title={
+                    <div style={{ fontSize: 12 }}>
+                      <div>Workers: {systemContext.workers.length}</div>
+                      <div>
+                        Deployments:{" "}
+                        {
+                          systemContext.deployments.filter(
+                            (d) => d.status === "running",
+                          ).length
+                        }
+                      </div>
+                      <div>Models: {systemContext.models.length}</div>
+                      <div style={{ marginTop: 4, color: "#8c8c8c" }}>
+                        Click to refresh
+                      </div>
+                    </div>
+                  }
+                >
+                  <Button
+                    type="text"
+                    size="small"
+                    icon={
+                      <CheckCircleFilled
+                        style={{ color: "#52c41a", fontSize: 12 }}
+                      />
+                    }
+                    onClick={refreshContext}
+                    style={{ color: colors.textMuted, padding: "0 4px" }}
+                  >
+                    <span style={{ fontSize: 11, marginLeft: 4 }}>
+                      {systemContext.workers.length}W /{" "}
+                      {
+                        systemContext.deployments.filter(
+                          (d) => d.status === "running",
+                        ).length
+                      }
+                      D
+                    </span>
+                  </Button>
+                </Tooltip>
+              )}
+              {messages.length > 0 && (
+                <Tooltip title="Clear chat">
+                  <Button
+                    type="text"
+                    size="small"
+                    icon={<ClearOutlined />}
+                    onClick={handleClear}
+                    style={{ color: colors.textMuted }}
+                  />
+                </Tooltip>
+              )}
+              <Tooltip title="Close">
+                <Button
+                  type="text"
+                  size="small"
+                  icon={<CloseOutlined />}
+                  onClick={onClose}
+                  style={{ color: colors.textMuted }}
+                />
+              </Tooltip>
+            </div>
+          </div>
+        )}
+
+        {/* Content - either TuningJobView or normal chat */}
+        {activeTuningJobId ? (
+          <TuningJobView jobId={activeTuningJobId} isDark={isDark} />
+        ) : (
+          <>
+            {/* Messages */}
+            <div
+              ref={messagesContainerRef}
+              onScroll={handleScroll}
+              style={{
+                flex: 1,
+                overflow: "auto",
+                padding: "16px",
+              }}
+            >
+              {messages.length === 0 ? (
+                <EmptyState
+                  selectedModel={selectedModel}
+                  systemContext={systemContext}
+                  colors={chatColors}
+                />
+              ) : (
+                <div
+                  style={{ display: "flex", flexDirection: "column", gap: 20 }}
+                >
+                  {messages.map((msg, index) => {
+                    const isLast = index === messages.length - 1;
+                    const showStreaming =
+                      isLast && isStreaming && msg.role === "assistant";
+                    const showToolExecution =
+                      isLast && isExecutingTool && msg.role === "assistant";
+
+                    return (
+                      <MessageBubble
+                        key={msg.id}
+                        message={msg}
+                        isStreaming={showStreaming}
+                        isExecutingTool={showToolExecution}
+                        currentToolName={
+                          showToolExecution ? currentToolName : null
+                        }
+                        isDark={isDark}
+                        colors={chatColors}
+                      />
+                    );
+                  })}
+                  <div ref={messagesEndRef} />
+                </div>
+              )}
+            </div>
+
+            {/* Scroll to bottom button */}
+            {showScrollButton && (
+              <div
+                style={{
+                  position: "absolute",
+                  bottom: 100,
+                  left: "50%",
+                  transform: "translateX(-50%)",
+                  zIndex: 10,
+                }}
+              >
+                <Button
+                  type="default"
+                  shape="circle"
+                  size="small"
+                  icon={<DownOutlined />}
+                  onClick={scrollToBottom}
+                  style={{
+                    boxShadow: "0 2px 8px rgba(0,0,0,0.15)",
+                    background: isDark ? "#27272a" : "#ffffff",
+                    borderColor: isDark ? "#3f3f46" : "#e4e4e7",
+                  }}
+                />
+              </div>
+            )}
+
+            {/* Input */}
+            <div
+              style={{
+                padding: "12px 16px",
+                borderTop: `1px solid ${colors.border}`,
+              }}
+            >
+              <ChatInput
+                value={inputValue}
+                onChange={setInputValue}
+                onSend={handleSend}
+                onStop={stopStreaming}
+                isStreaming={isStreaming}
+                disabled={!selectedModel}
+                isDark={isDark}
+                colors={chatColors}
+              />
+            </div>
+          </>
+        )}
+      </div>
+
+      {/* Tool Confirmation Modal */}
+      <ToolConfirmModal
+        visible={showConfirmModal}
+        pendingTools={pendingTools}
+        onConfirm={confirmToolExecution}
+        onCancel={cancelToolExecution}
+        isDark={isDark}
+      />
+    </>
+  );
+}
+
+/**
+ * Empty state component
+ */
+interface EmptyStateProps {
+  selectedModel: ChatModelConfig | null;
+  systemContext: import("./systemContext").SystemContext | null;
+  colors: ThemeColors;
+}
+
+function EmptyState({ selectedModel, systemContext, colors }: EmptyStateProps) {
+  const activeDeployments =
+    systemContext?.deployments.filter((d) => d.status === "running") || [];
+  const runningContainers =
+    systemContext?.containers.filter(
+      (c) =>
+        c.status.toLowerCase().includes("running") ||
+        c.status.toLowerCase().includes("up"),
+    ) || [];
+
+  return (
+    <div
+      style={{
+        display: "flex",
+        flexDirection: "column",
+        alignItems: "center",
+        justifyContent: "center",
+        height: "100%",
+        textAlign: "center",
+        padding: "20px",
+      }}
+    >
+      <div
+        style={{
+          width: 48,
+          height: 48,
+          borderRadius: "50%",
+          background: colors.userBubble,
+          display: "flex",
+          alignItems: "center",
+          justifyContent: "center",
+          marginBottom: 16,
+        }}
+      >
+        <RobotOutlined style={{ fontSize: 24, color: colors.textSecondary }} />
+      </div>
+      <div
+        style={{
+          fontSize: 15,
+          fontWeight: 500,
+          color: colors.text,
+          marginBottom: 8,
+        }}
+      >
+        {selectedModel ? "LMStack AI Assistant" : "Select a Model"}
+      </div>
+      <div style={{ fontSize: 13, color: colors.textMuted, marginBottom: 16 }}>
+        {selectedModel
+          ? "I can help you manage your LLM infrastructure"
+          : "Choose a model from the dropdown above"}
+      </div>
+
+      {/* System summary */}
+      {selectedModel && systemContext && (
+        <div
+          style={{
+            width: "100%",
+            maxWidth: 300,
+            padding: 12,
+            borderRadius: 8,
+            background: colors.userBubble,
+            textAlign: "left",
+            fontSize: 12,
+          }}
+        >
+          <div style={{ fontWeight: 500, marginBottom: 8, color: colors.text }}>
+            System Overview
+          </div>
+          <div style={{ color: colors.textMuted, lineHeight: 1.8 }}>
+            <div>
+              • {systemContext.workers.length} Worker
+              {systemContext.workers.length !== 1 ? "s" : ""}
+            </div>
+            <div>
+              • {runningContainers.length}/{systemContext.containers.length}{" "}
+              Container{systemContext.containers.length !== 1 ? "s" : ""}{" "}
+              running
+            </div>
+            <div>
+              • {activeDeployments.length} Model deployment
+              {activeDeployments.length !== 1 ? "s" : ""} active
+            </div>
+            <div>
+              • {systemContext.models.length} Model
+              {systemContext.models.length !== 1 ? "s" : ""} available
+            </div>
+            <div>
+              • {systemContext.images.length} Docker image
+              {systemContext.images.length !== 1 ? "s" : ""}
+            </div>
+          </div>
+          <div style={{ marginTop: 12, fontSize: 11, color: colors.textMuted }}>
+            Try: "有幾個容器在運行？" or "GPU 記憶體剩多少？"
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}
+
+/**
+ * Format tool name for display
+ */
+function formatToolName(name: string): string {
+  return name.replace(/_/g, " ").replace(/\b\w/g, (c) => c.toUpperCase());
+}
+
+/**
+ * Message bubble component
+ */
+interface MessageBubbleProps {
+  message: ChatMessage;
+  isStreaming: boolean;
+  isExecutingTool: boolean;
+  currentToolName: string | null;
+  isDark: boolean;
+  colors: ThemeColors;
+}
+
+function MessageBubble({
+  message,
+  isStreaming,
+  isExecutingTool,
+  currentToolName,
+  isDark,
+  colors,
+}: MessageBubbleProps) {
+  const isUser = message.role === "user";
+
+  return (
+    <div
+      style={{
+        display: "flex",
+        justifyContent: isUser ? "flex-end" : "flex-start",
+        gap: 8,
+      }}
+    >
+      {/* Avatar */}
+      {!isUser && (
+        <div
+          style={{
+            width: 28,
+            height: 28,
+            borderRadius: "50%",
+            background: isDark ? "#3f3f46" : "#e4e4e7",
+            display: "flex",
+            alignItems: "center",
+            justifyContent: "center",
+            flexShrink: 0,
+            marginTop: 2,
+          }}
+        >
+          <RobotOutlined
+            style={{ fontSize: 12, color: colors.textSecondary }}
+          />
+        </div>
+      )}
+
+      {/* Content */}
+      <div
+        style={{
+          maxWidth: "85%",
+          padding: isUser ? "10px 14px" : "0",
+          borderRadius: isUser ? 18 : 0,
+          background: isUser ? colors.userBubble : "transparent",
+        }}
+      >
+        {isUser ? (
+          <div
+            style={{
+              fontSize: 14,
+              lineHeight: 1.5,
+              color: colors.text,
+              whiteSpace: "pre-wrap",
+            }}
+          >
+            {message.content}
+          </div>
+        ) : (
+          <>
+            {/* Thinking section for reasoning models */}
+            {message.thinking && (
+              <Collapse
+                size="small"
+                ghost
+                style={{ marginBottom: 8 }}
+                items={[
+                  {
+                    key: "thinking",
+                    label: (
+                      <span style={{ fontSize: 12, color: colors.textMuted }}>
+                        <BulbOutlined style={{ marginRight: 6 }} />
+                        Thinking Process
+                      </span>
+                    ),
+                    children: (
+                      <div
+                        style={{
+                          fontSize: 13,
+                          color: colors.textSecondary,
+                          whiteSpace: "pre-wrap",
+                          lineHeight: 1.6,
+                          maxHeight: 300,
+                          overflow: "auto",
+                          padding: "8px 0",
+                        }}
+                      >
+                        {message.thinking}
+                      </div>
+                    ),
+                  },
+                ]}
+              />
+            )}
+
+            {/* Tool execution indicator - show when executing */}
+            {isExecutingTool && currentToolName && (
+              <div
+                style={{
+                  marginBottom: 12,
+                  padding: "10px 12px",
+                  borderRadius: 8,
+                  background: isDark
+                    ? "rgba(24, 144, 255, 0.1)"
+                    : "rgba(24, 144, 255, 0.08)",
+                  border: `1px solid ${isDark ? "rgba(24, 144, 255, 0.3)" : "rgba(24, 144, 255, 0.2)"}`,
+                  display: "flex",
+                  alignItems: "center",
+                  gap: 8,
+                }}
+              >
+                <LoadingOutlined
+                  style={{ color: "#1890ff", fontSize: 14 }}
+                  spin
+                />
+                <span
+                  style={{ fontSize: 13, color: "#1890ff", fontWeight: 500 }}
+                >
+                  Executing: {formatToolName(currentToolName)}
+                </span>
+              </div>
+            )}
+
+            {/* Tool calls display - always show in history */}
+            {message.toolCalls &&
+              message.toolCalls.length > 0 &&
+              !isExecutingTool && (
+                <div
+                  style={{
+                    marginBottom: 12,
+                    padding: "10px 12px",
+                    borderRadius: 8,
+                    background: isDark
+                      ? "rgba(82, 196, 26, 0.1)"
+                      : "rgba(82, 196, 26, 0.08)",
+                    border: `1px solid ${isDark ? "rgba(82, 196, 26, 0.3)" : "rgba(82, 196, 26, 0.2)"}`,
+                  }}
+                >
+                  <div
+                    style={{
+                      fontSize: 12,
+                      color: "#52c41a",
+                      fontWeight: 500,
+                      marginBottom: 8,
+                    }}
+                  >
+                    <ToolOutlined style={{ marginRight: 6 }} />
+                    Tool Calls Executed
+                  </div>
+                  {message.toolCalls.map((tc) => {
+                    let args: Record<string, any> = {};
+                    try {
+                      args = JSON.parse(tc.function.arguments);
+                    } catch {
+                      args = {};
+                    }
+                    return (
+                      <div
+                        key={tc.id}
+                        style={{
+                          fontSize: 12,
+                          color: colors.textSecondary,
+                          marginTop: 6,
+                          padding: "6px 8px",
+                          borderRadius: 4,
+                          background: isDark
+                            ? "rgba(255,255,255,0.03)"
+                            : "rgba(0,0,0,0.02)",
+                        }}
+                      >
+                        <div
+                          style={{
+                            display: "flex",
+                            alignItems: "center",
+                            gap: 6,
+                          }}
+                        >
+                          <CheckCircleFilled
+                            style={{ color: "#52c41a", fontSize: 12 }}
+                          />
+                          <span style={{ fontWeight: 500, color: colors.text }}>
+                            {formatToolName(tc.function.name)}
+                          </span>
+                        </div>
+                        {Object.keys(args).length > 0 && (
+                          <div
+                            style={{
+                              marginTop: 4,
+                              marginLeft: 18,
+                              fontSize: 11,
+                              color: colors.textMuted,
+                            }}
+                          >
+                            {Object.entries(args).map(([key, value]) => (
+                              <div key={key}>
+                                {key}:{" "}
+                                {typeof value === "object"
+                                  ? JSON.stringify(value)
+                                  : String(value)}
+                              </div>
+                            ))}
+                          </div>
+                        )}
+                      </div>
+                    );
+                  })}
+                </div>
+              )}
+
+            <MessageContent
+              content={message.content}
+              isStreaming={isStreaming}
+              isDark={isDark}
+              colors={colors}
+            />
+            {message.model && (
+              <div
+                style={{ marginTop: 6, fontSize: 11, color: colors.textMuted }}
+              >
+                via {message.model}
+              </div>
+            )}
+          </>
+        )}
+      </div>
+
+      {/* User avatar */}
+      {isUser && (
+        <div
+          style={{
+            width: 28,
+            height: 28,
+            borderRadius: "50%",
+            background: isDark ? "#3f3f46" : "#e4e4e7",
+            display: "flex",
+            alignItems: "center",
+            justifyContent: "center",
+            flexShrink: 0,
+            marginTop: 2,
+          }}
+        >
+          <UserOutlined style={{ fontSize: 12, color: colors.textSecondary }} />
+        </div>
+      )}
+    </div>
+  );
+}
diff --git a/frontend/src/components/chat-panel/ModelSelector.tsx b/frontend/src/components/chat-panel/ModelSelector.tsx
new file mode 100644
index 0000000..f5a17b4
--- /dev/null
+++ b/frontend/src/components/chat-panel/ModelSelector.tsx
@@ -0,0 +1,541 @@
+/**
+ * Model Selector Component
+ *
+ * Dropdown for selecting chat models from deployments, Semantic Router,
+ * or custom OpenAI-compatible endpoints.
+ */
+import { useState, useEffect, useCallback } from "react";
+import {
+  Dropdown,
+  Button,
+  Modal,
+  Form,
+  Input,
+  Space,
+  message,
+  Tooltip,
+  Select,
+  Spin,
+} from "antd";
+import {
+  RobotOutlined,
+  ThunderboltOutlined,
+  PlusOutlined,
+  ApiOutlined,
+  DeleteOutlined,
+  DownOutlined,
+  EditOutlined,
+  SyncOutlined,
+} from "@ant-design/icons";
+import { deploymentsApi, semanticRouterApi } from "../../services/api";
+import { api } from "../../api/client";
+import type { Deployment } from "../../types";
+import type { SemanticRouterStatus } from "../../services/api";
+import type { ChatModelConfig, CustomEndpoint } from "./types";
+import type { AppColors } from "../../hooks/useTheme";
+
+interface RemoteModel {
+  id: string;
+  owned_by: string;
+}
+
+interface ModelSelectorProps {
+  value: ChatModelConfig | null;
+  onChange: (config: ChatModelConfig | null) => void;
+  customEndpoints: CustomEndpoint[];
+  onCustomEndpointsChange: (endpoints: CustomEndpoint[]) => void;
+  isDark: boolean;
+  colors: AppColors;
+  compact?: boolean;
+}
+
+/**
+ * Model selector with support for multiple model sources
+ */
+export function ModelSelector({
+  value,
+  onChange,
+  customEndpoints,
+  onCustomEndpointsChange,
+  isDark,
+  colors,
+  compact = false,
+}: ModelSelectorProps) {
+  const [deployments, setDeployments] = useState<Deployment[]>([]);
+  const [semanticRouterStatus, setSemanticRouterStatus] =
+    useState<SemanticRouterStatus | null>(null);
+  const [customModalOpen, setCustomModalOpen] = useState(false);
+  const [editingEndpoint, setEditingEndpoint] = useState<CustomEndpoint | null>(
+    null,
+  );
+  const [form] = Form.useForm();
+
+  // Remote models state
+  const [remoteModels, setRemoteModels] = useState<RemoteModel[]>([]);
+  const [fetchingModels, setFetchingModels] = useState(false);
+
+  // Fetch models from remote endpoint
+  const fetchRemoteModels = useCallback(async () => {
+    const endpoint = form.getFieldValue("endpoint");
+    if (!endpoint) {
+      message.warning("Please enter a Base URL first");
+      return;
+    }
+
+    setFetchingModels(true);
+    setRemoteModels([]);
+
+    try {
+      const response = await api.post<{ models: RemoteModel[] }>(
+        "/fetch-models",
+        {
+          endpoint,
+          api_key: form.getFieldValue("apiKey") || null,
+        },
+      );
+
+      if (response.data.models.length === 0) {
+        message.info("No models found at this endpoint");
+      } else {
+        setRemoteModels(response.data.models);
+        // Auto-select if only one model
+        if (response.data.models.length === 1) {
+          form.setFieldValue("modelId", response.data.models[0].id);
+        }
+        message.success(`Found ${response.data.models.length} model(s)`);
+      }
+    } catch (error) {
+      console.error("Failed to fetch models:", error);
+      message.error("Failed to fetch models from endpoint");
+    } finally {
+      setFetchingModels(false);
+    }
+  }, [form]);
+
+  // Fetch deployments
+  useEffect(() => {
+    const fetchData = async () => {
+      try {
+        const [deploymentsRes, srStatus] = await Promise.all([
+          deploymentsApi.list({ status: "running" }),
+          semanticRouterApi.getStatus().catch(() => null),
+        ]);
+        setDeployments(deploymentsRes.items);
+        setSemanticRouterStatus(srStatus);
+      } catch (error) {
+        console.error("Failed to fetch models:", error);
+      }
+    };
+
+    fetchData();
+    const interval = setInterval(fetchData, 10000);
+    return () => clearInterval(interval);
+  }, []);
+
+  // Handle custom endpoint save
+  const handleSaveCustomEndpoint = useCallback(async () => {
+    try {
+      const values = await form.validateFields();
+      const endpoint: CustomEndpoint = {
+        id: editingEndpoint?.id || `custom_${Date.now()}`,
+        name: values.name,
+        endpoint: values.endpoint,
+        apiKey: values.apiKey || undefined,
+        modelId: values.modelId || undefined,
+      };
+
+      if (editingEndpoint) {
+        onCustomEndpointsChange(
+          customEndpoints.map((e) =>
+            e.id === editingEndpoint.id ? endpoint : e,
+          ),
+        );
+      } else {
+        onCustomEndpointsChange([...customEndpoints, endpoint]);
+      }
+
+      // Auto-select the new endpoint
+      onChange({
+        type: "custom",
+        name: endpoint.name,
+        modelId: endpoint.modelId,
+        endpoint: endpoint.endpoint,
+        apiKey: endpoint.apiKey,
+      });
+
+      setCustomModalOpen(false);
+      setEditingEndpoint(null);
+      form.resetFields();
+      message.success(editingEndpoint ? "Endpoint updated" : "Endpoint added");
+    } catch {
+      // Validation failed
+    }
+  }, [
+    form,
+    editingEndpoint,
+    customEndpoints,
+    onCustomEndpointsChange,
+    onChange,
+  ]);
+
+  // Handle custom endpoint delete
+  const handleDeleteCustomEndpoint = useCallback(
+    (id: string) => {
+      onCustomEndpointsChange(customEndpoints.filter((e) => e.id !== id));
+      if (
+        value?.type === "custom" &&
+        value.endpoint === customEndpoints.find((e) => e.id === id)?.endpoint
+      ) {
+        onChange(null);
+      }
+    },
+    [customEndpoints, onCustomEndpointsChange, value, onChange],
+  );
+
+  // Build menu items
+  const menuItems: any[] = [];
+
+  // Semantic Router option
+  if (semanticRouterStatus?.deployed) {
+    menuItems.push({
+      key: "semantic-router",
+      label: (
+        <div
+          style={{
+            display: "flex",
+            alignItems: "center",
+            gap: 8,
+            padding: "4px 0",
+          }}
+        >
+          <ThunderboltOutlined style={{ color: "#faad14" }} />
+          <div>
+            <div>Semantic Router</div>
+            <div style={{ fontSize: 11, color: colors.textMuted }}>
+              Auto-select best model
+            </div>
+          </div>
+        </div>
+      ),
+      onClick: () =>
+        onChange({
+          type: "semantic-router",
+          name: "Semantic Router",
+        }),
+    });
+  }
+
+  // Deployments
+  if (deployments.length > 0) {
+    if (menuItems.length > 0) {
+      menuItems.push({ type: "divider" });
+    }
+    menuItems.push({
+      key: "deployments-header",
+      label: (
+        <span style={{ fontSize: 11, color: colors.textMuted }}>
+          DEPLOYED MODELS
+        </span>
+      ),
+      disabled: true,
+    });
+
+    deployments.forEach((d) => {
+      menuItems.push({
+        key: `deployment-${d.id}`,
+        label: (
+          <div
+            style={{
+              display: "flex",
+              alignItems: "center",
+              gap: 8,
+              padding: "4px 0",
+            }}
+          >
+            <RobotOutlined />
+            <div>
+              <div>{d.model?.name || d.name}</div>
+              <div style={{ fontSize: 11, color: colors.textMuted }}>
+                @{d.worker?.name}
+              </div>
+            </div>
+          </div>
+        ),
+        onClick: () =>
+          onChange({
+            type: "deployment",
+            deploymentId: d.id,
+            name: d.model?.name || d.name,
+          }),
+      });
+    });
+  }
+
+  // Custom endpoints section (always shown to allow adding new endpoints)
+  if (menuItems.length > 0) {
+    menuItems.push({ type: "divider" });
+  }
+  menuItems.push({
+    key: "custom-header",
+    label: (
+      <span style={{ fontSize: 11, color: colors.textMuted }}>
+        CUSTOM ENDPOINTS
+      </span>
+    ),
+    disabled: true,
+  });
+
+  customEndpoints.forEach((ep) => {
+    menuItems.push({
+      key: `custom-${ep.id}`,
+      label: (
+        <div
+          style={{
+            display: "flex",
+            alignItems: "center",
+            justifyContent: "space-between",
+            padding: "4px 0",
+            width: "100%",
+          }}
+        >
+          <div style={{ display: "flex", alignItems: "center", gap: 8 }}>
+            <ApiOutlined />
+            <div>
+              <div>{ep.name}</div>
+              <div
+                style={{
+                  fontSize: 11,
+                  color: colors.textMuted,
+                  maxWidth: 180,
+                  overflow: "hidden",
+                  textOverflow: "ellipsis",
+                }}
+              >
+                {ep.endpoint}
+              </div>
+            </div>
+          </div>
+          <Space size={4}>
+            <Tooltip title="Edit">
+              <Button
+                type="text"
+                size="small"
+                icon={<EditOutlined style={{ fontSize: 12 }} />}
+                onClick={(e) => {
+                  e.stopPropagation();
+                  setEditingEndpoint(ep);
+                  form.setFieldsValue(ep);
+                  setRemoteModels([]);
+                  setCustomModalOpen(true);
+                }}
+                style={{ width: 24, height: 24, minWidth: 24 }}
+              />
+            </Tooltip>
+            <Tooltip title="Delete">
+              <Button
+                type="text"
+                size="small"
+                danger
+                icon={<DeleteOutlined style={{ fontSize: 12 }} />}
+                onClick={(e) => {
+                  e.stopPropagation();
+                  handleDeleteCustomEndpoint(ep.id);
+                }}
+                style={{ width: 24, height: 24, minWidth: 24 }}
+              />
+            </Tooltip>
+          </Space>
+        </div>
+      ),
+      onClick: () =>
+        onChange({
+          type: "custom",
+          name: ep.name,
+          modelId: ep.modelId,
+          endpoint: ep.endpoint,
+          apiKey: ep.apiKey,
+        }),
+    });
+  });
+
+  // Add custom endpoint button
+  menuItems.push({
+    key: "add-custom",
+    label: (
+      <div
+        style={{
+          display: "flex",
+          alignItems: "center",
+          gap: 8,
+          padding: "4px 0",
+          color: colors.textSecondary,
+        }}
+      >
+        <PlusOutlined />
+        <span>Add Custom Endpoint</span>
+      </div>
+    ),
+    onClick: () => {
+      setEditingEndpoint(null);
+      form.resetFields();
+      setRemoteModels([]);
+      setCustomModalOpen(true);
+    },
+  });
+
+  // Get display label
+  const getDisplayLabel = () => {
+    if (!value) return "Select Model";
+    if (value.type === "semantic-router") return "Semantic Router";
+    return value.name;
+  };
+
+  // Get display icon
+  const getDisplayIcon = () => {
+    if (!value) return <RobotOutlined />;
+    if (value.type === "semantic-router")
+      return <ThunderboltOutlined style={{ color: "#faad14" }} />;
+    if (value.type === "custom") return <ApiOutlined />;
+    return <RobotOutlined />;
+  };
+
+  return (
+    <>
+      <Dropdown
+        menu={{
+          items: menuItems,
+          style: {
+            background: isDark ? "#1f1f1f" : "#ffffff",
+            borderRadius: 12,
+            maxHeight: 400,
+            overflow: "auto",
+            boxShadow: isDark
+              ? "0 6px 16px rgba(0, 0, 0, 0.4)"
+              : "0 6px 16px rgba(0, 0, 0, 0.12)",
+          },
+        }}
+        trigger={["click"]}
+        placement="bottomLeft"
+      >
+        <Button
+          type="text"
+          style={{
+            display: "flex",
+            alignItems: "center",
+            gap: 6,
+            height: compact ? 32 : 36,
+            padding: compact ? "0 8px" : "0 12px",
+            borderRadius: 8,
+            background: colors.menuItemHover,
+            border: "none",
+            fontSize: compact ? 13 : 14,
+            fontWeight: 500,
+            color: colors.text,
+            maxWidth: compact ? 160 : 220,
+          }}
+        >
+          {getDisplayIcon()}
+          <span
+            style={{
+              overflow: "hidden",
+              textOverflow: "ellipsis",
+              whiteSpace: "nowrap",
+            }}
+          >
+            {getDisplayLabel()}
+          </span>
+          <DownOutlined
+            style={{ fontSize: 10, color: colors.textMuted, flexShrink: 0 }}
+          />
+        </Button>
+      </Dropdown>
+
+      {/* Custom endpoint modal */}
+      <Modal
+        title={editingEndpoint ? "Edit Endpoint" : "Add Custom Endpoint"}
+        open={customModalOpen}
+        onOk={handleSaveCustomEndpoint}
+        onCancel={() => {
+          setCustomModalOpen(false);
+          setEditingEndpoint(null);
+          form.resetFields();
+          setRemoteModels([]);
+        }}
+        okText={editingEndpoint ? "Save" : "Add"}
+        width={480}
+      >
+        <Form form={form} layout="vertical" style={{ marginTop: 16 }}>
+          <Form.Item
+            name="name"
+            label="Display Name"
+            rules={[{ required: true, message: "Please enter a name" }]}
+          >
+            <Input placeholder="e.g., Local Ollama" />
+          </Form.Item>
+          <Form.Item
+            name="endpoint"
+            label="Base URL"
+            rules={[{ required: true, message: "Please enter the base URL" }]}
+            extra="OpenAI-compatible API base URL (e.g., http://ip:port/v1/)"
+          >
+            <Input placeholder="http://192.168.201.17:30000/v1/" />
+          </Form.Item>
+          <Form.Item label="Model" extra="Select a model or enter manually">
+            <Space.Compact style={{ width: "100%" }}>
+              <Form.Item name="modelId" noStyle>
+                <Select
+                  placeholder="Click refresh to detect models"
+                  allowClear
+                  showSearch
+                  style={{ width: "100%" }}
+                  loading={fetchingModels}
+                  options={remoteModels.map((m) => ({
+                    label: m.id,
+                    value: m.id,
+                  }))}
+                  dropdownRender={(menu) => (
+                    <>
+                      {menu}
+                      {remoteModels.length === 0 && !fetchingModels && (
+                        <div
+                          style={{
+                            padding: 8,
+                            textAlign: "center",
+                            color: colors.textMuted,
+                          }}
+                        >
+                          No models loaded
+                        </div>
+                      )}
+                    </>
+                  )}
+                  notFoundContent={
+                    fetchingModels ? (
+                      <div style={{ padding: 8, textAlign: "center" }}>
+                        <Spin size="small" /> Loading...
+                      </div>
+                    ) : null
+                  }
+                />
+              </Form.Item>
+              <Tooltip title="Detect available models">
+                <Button
+                  icon={<SyncOutlined spin={fetchingModels} />}
+                  onClick={fetchRemoteModels}
+                  loading={fetchingModels}
+                />
+              </Tooltip>
+            </Space.Compact>
+          </Form.Item>
+          <Form.Item
+            name="apiKey"
+            label="API Key (Optional)"
+            extra="Leave empty if not required"
+          >
+            <Input.Password placeholder="sk-..." />
+          </Form.Item>
+        </Form>
+      </Modal>
+    </>
+  );
+}
diff --git a/frontend/src/components/chat-panel/ToolConfirmModal.tsx b/frontend/src/components/chat-panel/ToolConfirmModal.tsx
new file mode 100644
index 0000000..5627359
--- /dev/null
+++ b/frontend/src/components/chat-panel/ToolConfirmModal.tsx
@@ -0,0 +1,218 @@
+/**
+ * Tool Confirmation Modal
+ *
+ * A modal dialog that asks for user confirmation before executing
+ * AI-requested tool actions.
+ */
+import { Modal, Button, Tag, Descriptions } from "antd";
+import {
+  ExclamationCircleOutlined,
+  CheckCircleOutlined,
+  DeleteOutlined,
+  RocketOutlined,
+  PauseCircleOutlined,
+  PlayCircleOutlined,
+  PlusOutlined,
+  StopOutlined,
+  ThunderboltOutlined,
+  KeyOutlined,
+  CloudDownloadOutlined,
+  DatabaseOutlined,
+  PieChartOutlined,
+  ClearOutlined,
+} from "@ant-design/icons";
+import type { PendingToolExecution } from "./tools";
+
+interface ToolConfirmModalProps {
+  visible: boolean;
+  pendingTools: PendingToolExecution[];
+  onConfirm: () => void;
+  onCancel: () => void;
+  isDark: boolean;
+}
+
+/**
+ * Get icon for tool
+ */
+function getToolIcon(iconName: string) {
+  const iconMap: Record<string, React.ReactNode> = {
+    delete: <DeleteOutlined />,
+    rocket: <RocketOutlined />,
+    "pause-circle": <PauseCircleOutlined />,
+    "play-circle": <PlayCircleOutlined />,
+    plus: <PlusOutlined />,
+    stop: <StopOutlined />,
+    thunderbolt: <ThunderboltOutlined />,
+    key: <KeyOutlined />,
+    download: <CloudDownloadOutlined />,
+    database: <DatabaseOutlined />,
+    "pie-chart": <PieChartOutlined />,
+    clear: <ClearOutlined />,
+  };
+  return iconMap[iconName] || <CheckCircleOutlined />;
+}
+
+/**
+ * Format argument value for display
+ */
+function formatArgValue(value: any): string {
+  if (value === null || value === undefined) {
+    return "-";
+  }
+  if (Array.isArray(value)) {
+    return value.join(", ");
+  }
+  if (typeof value === "object") {
+    return JSON.stringify(value);
+  }
+  return String(value);
+}
+
+/**
+ * Tool Confirmation Modal Component
+ */
+export function ToolConfirmModal({
+  visible,
+  pendingTools,
+  onConfirm,
+  onCancel,
+  isDark,
+}: ToolConfirmModalProps) {
+  if (pendingTools.length === 0) return null;
+
+  const hasDangerous = pendingTools.some((t) => t.meta.dangerous);
+
+  return (
+    <Modal
+      open={visible}
+      title={
+        <div style={{ display: "flex", alignItems: "center", gap: 8 }}>
+          {hasDangerous ? (
+            <ExclamationCircleOutlined
+              style={{ color: "#faad14", fontSize: 20 }}
+            />
+          ) : (
+            <CheckCircleOutlined style={{ color: "#52c41a", fontSize: 20 }} />
+          )}
+          <span>Confirm Action</span>
+        </div>
+      }
+      onCancel={onCancel}
+      footer={[
+        <Button key="cancel" onClick={onCancel}>
+          Cancel
+        </Button>,
+        <Button
+          key="confirm"
+          type="primary"
+          danger={hasDangerous}
+          onClick={onConfirm}
+        >
+          Confirm
+        </Button>,
+      ]}
+      width={500}
+      centered
+      styles={{
+        body: {
+          background: isDark ? "#1f1f1f" : "#ffffff",
+        },
+        header: {
+          background: isDark ? "#1f1f1f" : "#ffffff",
+        },
+        content: {
+          background: isDark ? "#1f1f1f" : "#ffffff",
+        },
+      }}
+    >
+      <div style={{ display: "flex", flexDirection: "column", gap: 16 }}>
+        <div style={{ color: isDark ? "#a1a1aa" : "#52525b", fontSize: 14 }}>
+          AI assistant wants to execute the following actions:
+        </div>
+
+        {pendingTools.map((tool, index) => (
+          <div
+            key={index}
+            style={{
+              padding: 16,
+              borderRadius: 8,
+              background: isDark ? "#262626" : "#f5f5f5",
+              border: tool.meta.dangerous
+                ? "1px solid #ff4d4f"
+                : "1px solid transparent",
+            }}
+          >
+            {/* Tool header */}
+            <div
+              style={{
+                display: "flex",
+                alignItems: "center",
+                gap: 8,
+                marginBottom: 12,
+              }}
+            >
+              <span
+                style={{
+                  color: tool.meta.dangerous ? "#ff4d4f" : "#1890ff",
+                  fontSize: 18,
+                }}
+              >
+                {getToolIcon(tool.meta.icon)}
+              </span>
+              <span
+                style={{
+                  fontWeight: 600,
+                  fontSize: 15,
+                  color: isDark ? "#fafafa" : "#09090b",
+                }}
+              >
+                {tool.meta.displayName}
+              </span>
+              {tool.meta.dangerous && (
+                <Tag color="error" style={{ marginLeft: "auto" }}>
+                  Dangerous
+                </Tag>
+              )}
+            </div>
+
+            {/* Tool arguments */}
+            <Descriptions
+              column={1}
+              size="small"
+              labelStyle={{
+                color: isDark ? "#71717a" : "#a1a1aa",
+                width: 120,
+              }}
+              contentStyle={{
+                color: isDark ? "#fafafa" : "#09090b",
+              }}
+            >
+              {Object.entries(tool.parsedArgs).map(([key, value]) => (
+                <Descriptions.Item key={key} label={key.replace(/_/g, " ")}>
+                  {formatArgValue(value)}
+                </Descriptions.Item>
+              ))}
+            </Descriptions>
+          </div>
+        ))}
+
+        {hasDangerous && (
+          <div
+            style={{
+              padding: "8px 12px",
+              borderRadius: 6,
+              background: isDark
+                ? "rgba(255, 77, 79, 0.1)"
+                : "rgba(255, 77, 79, 0.08)",
+              color: "#ff4d4f",
+              fontSize: 13,
+            }}
+          >
+            <ExclamationCircleOutlined style={{ marginRight: 8 }} />
+            This action may be irreversible. Please confirm before proceeding.
+          </div>
+        )}
+      </div>
+    </Modal>
+  );
+}
diff --git a/frontend/src/components/chat-panel/TuningJobView.tsx b/frontend/src/components/chat-panel/TuningJobView.tsx
new file mode 100644
index 0000000..61e8c6f
--- /dev/null
+++ b/frontend/src/components/chat-panel/TuningJobView.tsx
@@ -0,0 +1,659 @@
+/**
+ * Tuning Job View Component
+ *
+ * Displays a tuning job's conversation in the Chat Panel with a terminal-like UI.
+ */
+
+import { useEffect, useState, useRef, useCallback } from "react";
+import { Tag, Spin } from "antd";
+import {
+  LoadingOutlined,
+  CheckCircleOutlined,
+  CloseCircleOutlined,
+  RobotOutlined,
+  ToolOutlined,
+} from "@ant-design/icons";
+import { api } from "../../api/client";
+import dayjs from "dayjs";
+
+interface ConversationMessage {
+  role: "user" | "assistant" | "tool";
+  content: string;
+  timestamp?: string;
+  tool_calls?: Array<{
+    id: string;
+    name: string;
+    arguments: string;
+  }>;
+  tool_call_id?: string;
+  name?: string;
+}
+
+interface TuningJob {
+  id: number;
+  model_name?: string;
+  worker_name?: string;
+  optimization_target: string;
+  status: string;
+  status_message?: string;
+  conversation_log?: ConversationMessage[];
+  best_config?: Record<string, unknown>;
+}
+
+interface TuningJobViewProps {
+  jobId: number;
+  isDark: boolean;
+}
+
+function getStatusIcon(status: string) {
+  switch (status) {
+    case "completed":
+      return <CheckCircleOutlined style={{ color: "#52c41a" }} />;
+    case "failed":
+    case "cancelled":
+      return <CloseCircleOutlined style={{ color: "#ff4d4f" }} />;
+    default:
+      return <LoadingOutlined spin style={{ color: "#1677ff" }} />;
+  }
+}
+
+function getStatusColor(status: string): string {
+  const colors: Record<string, string> = {
+    pending: "default",
+    analyzing: "processing",
+    querying_kb: "processing",
+    exploring: "processing",
+    benchmarking: "processing",
+    completed: "success",
+    failed: "error",
+    cancelled: "warning",
+  };
+  return colors[status] || "default";
+}
+
+// Tool result display component - shows results in a more readable format
+interface ToolResultDisplayProps {
+  name: string;
+  content: string;
+  timestamp?: string;
+  isDark: boolean;
+  colors: Record<string, string>;
+}
+
+function ToolResultDisplay({
+  name,
+  content,
+  isDark,
+  colors,
+}: ToolResultDisplayProps) {
+  const [expanded, setExpanded] = useState(false);
+
+  // Parse and format the result
+  let parsedResult: Record<string, unknown> | null = null;
+  let isError = false;
+  let summary = "";
+
+  try {
+    parsedResult = JSON.parse(content);
+    isError = !!parsedResult?.error;
+
+    // Generate summary based on tool name
+    if (name === "get_hardware_info" && parsedResult) {
+      const gpuCount = parsedResult.gpu_count || 0;
+      const gpus =
+        (parsedResult.gpus as Array<{
+          name: string;
+          memory_total_gb: number;
+        }>) || [];
+      const gpuName = gpus[0]?.name || "Unknown GPU";
+      const totalVram = parsedResult.total_vram_gb || 0;
+      summary = `${gpuCount}x ${gpuName}, ${totalVram}GB VRAM total`;
+    } else if (name === "get_model_info" && parsedResult) {
+      summary = `${parsedResult.name} (${parsedResult.model_family})`;
+    } else if (name === "query_knowledge_base" && parsedResult) {
+      const found = (parsedResult.found as number) || 0;
+      summary =
+        found > 0
+          ? `Found ${found} historical record(s)`
+          : "No historical data found";
+    } else if (name === "deploy_model" && parsedResult) {
+      summary = parsedResult.success
+        ? `Deployment #${parsedResult.deployment_id} created`
+        : `Failed: ${parsedResult.error}`;
+    } else if (name === "wait_for_deployment" && parsedResult) {
+      summary = parsedResult.success
+        ? `Ready in ${parsedResult.wait_time_seconds}s`
+        : `Failed: ${parsedResult.error}`;
+    } else if (name === "run_benchmark" && parsedResult) {
+      if (parsedResult.success && parsedResult.metrics) {
+        const m = parsedResult.metrics as Record<string, number>;
+        summary = `${m.throughput_tps} TPS, TTFT: ${m.avg_ttft_ms}ms, TPOT: ${m.avg_tpot_ms}ms`;
+      } else {
+        summary = `Failed: ${parsedResult.error}`;
+      }
+    } else if (name === "stop_deployment" && parsedResult) {
+      summary = parsedResult.success
+        ? "Deployment stopped"
+        : `Failed: ${parsedResult.error}`;
+    } else if (name === "finish_tuning" && parsedResult) {
+      summary = "Tuning completed successfully";
+    } else if (isError) {
+      summary = `Error: ${parsedResult?.error}`;
+    } else {
+      summary = "Completed";
+    }
+  } catch {
+    summary = content.length > 50 ? content.slice(0, 50) + "..." : content;
+  }
+
+  return (
+    <div
+      style={{
+        borderRadius: 8,
+        background: isError
+          ? isDark
+            ? "rgba(239, 68, 68, 0.1)"
+            : "rgba(239, 68, 68, 0.05)"
+          : colors.toolBg,
+        border: `1px solid ${isError ? (isDark ? "#7f1d1d" : "#fecaca") : colors.toolBorder}`,
+        overflow: "hidden",
+      }}
+    >
+      {/* Header - clickable to expand */}
+      <div
+        onClick={() => setExpanded(!expanded)}
+        style={{
+          padding: "8px 12px",
+          display: "flex",
+          alignItems: "center",
+          gap: 8,
+          cursor: "pointer",
+          background: isError
+            ? isDark
+              ? "rgba(239, 68, 68, 0.15)"
+              : "rgba(239, 68, 68, 0.1)"
+            : isDark
+              ? "rgba(255,255,255,0.02)"
+              : "rgba(0,0,0,0.02)",
+        }}
+      >
+        {isError ? (
+          <CloseCircleOutlined style={{ color: "#ef4444", fontSize: 12 }} />
+        ) : (
+          <CheckCircleOutlined style={{ color: "#22c55e", fontSize: 12 }} />
+        )}
+        <span style={{ fontSize: 12, color: colors.textSecondary, flex: 1 }}>
+          {summary}
+        </span>
+        <span
+          style={{
+            fontSize: 10,
+            color: colors.textMuted,
+            transform: expanded ? "rotate(180deg)" : "rotate(0deg)",
+            transition: "transform 0.2s",
+          }}
+        >
+          ▼
+        </span>
+      </div>
+
+      {/* Expanded content */}
+      {expanded && (
+        <div
+          style={{
+            padding: "8px 12px",
+            borderTop: `1px solid ${colors.border}`,
+          }}
+        >
+          <pre
+            style={{
+              margin: 0,
+              fontSize: 11,
+              fontFamily:
+                "'SF Mono', 'Monaco', 'Inconsolata', 'Roboto Mono', monospace",
+              maxHeight: 300,
+              overflow: "auto",
+              whiteSpace: "pre-wrap",
+              wordBreak: "break-word",
+              color: colors.textSecondary,
+              lineHeight: 1.5,
+            }}
+          >
+            {parsedResult ? JSON.stringify(parsedResult, null, 2) : content}
+          </pre>
+        </div>
+      )}
+    </div>
+  );
+}
+
+export function TuningJobView({ jobId, isDark }: TuningJobViewProps) {
+  const [job, setJob] = useState<TuningJob | null>(null);
+  const [loading, setLoading] = useState(true);
+  const messagesEndRef = useRef<HTMLDivElement>(null);
+
+  // Theme colors
+  const colors = {
+    bg: isDark ? "#0d0d0d" : "#ffffff",
+    cardBg: isDark ? "#1a1a1a" : "#f8f9fa",
+    border: isDark ? "#2a2a2a" : "#e8e8e8",
+    text: isDark ? "#e4e4e7" : "#18181b",
+    textSecondary: isDark ? "#a1a1aa" : "#71717a",
+    textMuted: isDark ? "#71717a" : "#a1a1aa",
+    accent: "#3b82f6",
+    agentBg: isDark ? "#1e293b" : "#f0f9ff",
+    agentBorder: isDark ? "#334155" : "#bae6fd",
+    toolBg: isDark ? "#18181b" : "#f4f4f5",
+    toolBorder: isDark ? "#3f3f46" : "#d4d4d8",
+    successBg: isDark ? "#14532d" : "#dcfce7",
+    successBorder: isDark ? "#166534" : "#86efac",
+  };
+
+  // Fetch job data
+  const fetchJob = useCallback(async () => {
+    try {
+      const response = await api.get(`/auto-tuning/jobs/${jobId}`);
+      setJob(response.data);
+      setLoading(false);
+    } catch (error) {
+      console.error("Failed to fetch tuning job:", error);
+      setLoading(false);
+    }
+  }, [jobId]);
+
+  // Initial fetch
+  useEffect(() => {
+    fetchJob();
+  }, [fetchJob]);
+
+  // Auto-refresh for running jobs
+  useEffect(() => {
+    if (!job) return;
+    const isRunning = [
+      "pending",
+      "analyzing",
+      "querying_kb",
+      "exploring",
+      "benchmarking",
+    ].includes(job.status);
+    if (!isRunning) return;
+
+    const interval = setInterval(fetchJob, 2000);
+    return () => clearInterval(interval);
+  }, [job?.status, fetchJob]);
+
+  // Auto-scroll to bottom
+  useEffect(() => {
+    if (messagesEndRef.current) {
+      messagesEndRef.current.scrollIntoView({ behavior: "smooth" });
+    }
+  }, [job?.conversation_log?.length]);
+
+  const isRunning =
+    job &&
+    [
+      "pending",
+      "analyzing",
+      "querying_kb",
+      "exploring",
+      "benchmarking",
+    ].includes(job.status);
+
+  return (
+    <div
+      style={{
+        display: "flex",
+        flexDirection: "column",
+        flex: 1,
+        background: colors.bg,
+        overflow: "hidden",
+      }}
+    >
+      {/* Status Bar */}
+      <div
+        style={{
+          padding: "10px 16px",
+          borderBottom: `1px solid ${colors.border}`,
+          display: "flex",
+          alignItems: "center",
+          justifyContent: "space-between",
+          background: colors.cardBg,
+        }}
+      >
+        <div style={{ display: "flex", alignItems: "center", gap: 10 }}>
+          {job && (
+            <>
+              <Tag
+                color={getStatusColor(job.status)}
+                icon={getStatusIcon(job.status)}
+                style={{ margin: 0, fontWeight: 500 }}
+              >
+                {job.status.toUpperCase()}
+              </Tag>
+              <span style={{ fontSize: 13, color: colors.textSecondary }}>
+                {job.model_name}
+              </span>
+            </>
+          )}
+        </div>
+        {job && (
+          <span style={{ fontSize: 12, color: colors.textMuted }}>
+            {job.worker_name} · {job.optimization_target}
+          </span>
+        )}
+      </div>
+
+      {/* Messages */}
+      <div
+        style={{
+          flex: 1,
+          overflowY: "auto",
+          padding: 16,
+          background: colors.bg,
+        }}
+      >
+        {loading ? (
+          <div
+            style={{
+              display: "flex",
+              flexDirection: "column",
+              alignItems: "center",
+              justifyContent: "center",
+              height: "100%",
+              gap: 12,
+            }}
+          >
+            <Spin size="large" />
+            <span style={{ color: colors.textMuted, fontSize: 14 }}>
+              Loading tuning job...
+            </span>
+          </div>
+        ) : job?.conversation_log && job.conversation_log.length > 0 ? (
+          <div style={{ display: "flex", flexDirection: "column", gap: 12 }}>
+            {job.conversation_log.map((msg, idx) => (
+              <div key={idx}>
+                {/* Agent Message */}
+                {msg.role === "assistant" && (
+                  <div style={{ display: "flex", gap: 10 }}>
+                    {/* Avatar */}
+                    <div
+                      style={{
+                        width: 32,
+                        height: 32,
+                        borderRadius: 8,
+                        background: `linear-gradient(135deg, ${colors.accent}, #8b5cf6)`,
+                        display: "flex",
+                        alignItems: "center",
+                        justifyContent: "center",
+                        flexShrink: 0,
+                      }}
+                    >
+                      <RobotOutlined style={{ color: "#fff", fontSize: 16 }} />
+                    </div>
+                    {/* Content */}
+                    <div style={{ flex: 1, minWidth: 0 }}>
+                      <div
+                        style={{
+                          fontSize: 12,
+                          color: colors.textMuted,
+                          marginBottom: 4,
+                          display: "flex",
+                          alignItems: "center",
+                          gap: 8,
+                        }}
+                      >
+                        <span
+                          style={{
+                            fontWeight: 500,
+                            color: colors.textSecondary,
+                          }}
+                        >
+                          Agent
+                        </span>
+                        {msg.timestamp && (
+                          <span>{dayjs(msg.timestamp).format("HH:mm:ss")}</span>
+                        )}
+                      </div>
+                      {/* Agent text content */}
+                      {msg.content && (
+                        <div
+                          style={{
+                            padding: "12px 14px",
+                            borderRadius: 12,
+                            background: colors.agentBg,
+                            border: `1px solid ${colors.agentBorder}`,
+                            color: colors.text,
+                            fontSize: 14,
+                            lineHeight: 1.6,
+                            whiteSpace: "pre-wrap",
+                          }}
+                        >
+                          {msg.content}
+                        </div>
+                      )}
+                      {/* Tool Calls - show what actions the agent is taking */}
+                      {msg.tool_calls && msg.tool_calls.length > 0 && (
+                        <div
+                          style={{
+                            marginTop: msg.content ? 8 : 0,
+                            padding: "10px 12px",
+                            borderRadius: 8,
+                            background: isDark ? "#1c1c1e" : "#f0f0f0",
+                            border: `1px solid ${colors.border}`,
+                          }}
+                        >
+                          <div
+                            style={{
+                              fontSize: 11,
+                              color: colors.textMuted,
+                              marginBottom: 6,
+                              textTransform: "uppercase",
+                              letterSpacing: "0.5px",
+                            }}
+                          >
+                            Executing Actions
+                          </div>
+                          {msg.tool_calls.map((tc, tcIdx) => {
+                            let argsPreview = "";
+                            try {
+                              const args = JSON.parse(tc.arguments);
+                              argsPreview = Object.entries(args)
+                                .map(
+                                  ([k, v]) =>
+                                    `${k}=${typeof v === "object" ? JSON.stringify(v) : v}`,
+                                )
+                                .join(", ");
+                            } catch {
+                              argsPreview = tc.arguments;
+                            }
+                            return (
+                              <div
+                                key={tcIdx}
+                                style={{
+                                  display: "flex",
+                                  alignItems: "flex-start",
+                                  gap: 8,
+                                  padding: "6px 0",
+                                  borderTop:
+                                    tcIdx > 0
+                                      ? `1px solid ${colors.border}`
+                                      : "none",
+                                }}
+                              >
+                                <ToolOutlined
+                                  style={{ color: colors.accent, marginTop: 2 }}
+                                />
+                                <div>
+                                  <div
+                                    style={{
+                                      fontWeight: 500,
+                                      color: colors.text,
+                                      fontSize: 13,
+                                    }}
+                                  >
+                                    {tc.name
+                                      .replace(/_/g, " ")
+                                      .replace(/\b\w/g, (c) => c.toUpperCase())}
+                                  </div>
+                                  {argsPreview && (
+                                    <div
+                                      style={{
+                                        fontSize: 11,
+                                        color: colors.textMuted,
+                                        marginTop: 2,
+                                        fontFamily: "monospace",
+                                      }}
+                                    >
+                                      {argsPreview.length > 80
+                                        ? argsPreview.slice(0, 80) + "..."
+                                        : argsPreview}
+                                    </div>
+                                  )}
+                                </div>
+                              </div>
+                            );
+                          })}
+                        </div>
+                      )}
+                    </div>
+                  </div>
+                )}
+
+                {/* Tool Response */}
+                {msg.role === "tool" && (
+                  <div style={{ marginLeft: 42, marginTop: 4 }}>
+                    <ToolResultDisplay
+                      name={msg.name || "unknown"}
+                      content={msg.content}
+                      timestamp={msg.timestamp}
+                      isDark={isDark}
+                      colors={colors}
+                    />
+                  </div>
+                )}
+
+                {/* User Message (system prompt) */}
+                {msg.role === "user" && (
+                  <div
+                    style={{
+                      padding: "8px 12px",
+                      borderRadius: 8,
+                      background: isDark ? "#1e1e1e" : "#fafafa",
+                      border: `1px dashed ${colors.border}`,
+                      fontSize: 12,
+                      color: colors.textMuted,
+                    }}
+                  >
+                    <span style={{ fontWeight: 500 }}>System: </span>
+                    {msg.content.length > 100
+                      ? msg.content.slice(0, 100) + "..."
+                      : msg.content}
+                  </div>
+                )}
+              </div>
+            ))}
+
+            {/* Running indicator */}
+            {isRunning && (
+              <div
+                style={{
+                  display: "flex",
+                  alignItems: "center",
+                  gap: 10,
+                  padding: "12px 14px",
+                  borderRadius: 12,
+                  background: colors.agentBg,
+                  border: `1px solid ${colors.agentBorder}`,
+                  marginLeft: 42,
+                }}
+              >
+                <LoadingOutlined
+                  spin
+                  style={{ color: colors.accent, fontSize: 16 }}
+                />
+                <span style={{ color: colors.textSecondary, fontSize: 14 }}>
+                  {job.status_message || "Processing..."}
+                </span>
+              </div>
+            )}
+
+            <div ref={messagesEndRef} />
+          </div>
+        ) : (
+          <div
+            style={{
+              display: "flex",
+              flexDirection: "column",
+              alignItems: "center",
+              justifyContent: "center",
+              height: "100%",
+              gap: 12,
+            }}
+          >
+            <div
+              style={{
+                width: 48,
+                height: 48,
+                borderRadius: 12,
+                background: `linear-gradient(135deg, ${colors.accent}, #8b5cf6)`,
+                display: "flex",
+                alignItems: "center",
+                justifyContent: "center",
+              }}
+            >
+              <RobotOutlined style={{ color: "#fff", fontSize: 24 }} />
+            </div>
+            <span style={{ color: colors.textMuted, fontSize: 14 }}>
+              {job?.status === "pending"
+                ? "Waiting for agent to start..."
+                : "No conversation yet"}
+            </span>
+          </div>
+        )}
+      </div>
+
+      {/* Best Config */}
+      {job?.best_config && (
+        <div
+          style={{
+            padding: 16,
+            borderTop: `1px solid ${colors.border}`,
+            background: colors.successBg,
+          }}
+        >
+          <div
+            style={{
+              fontSize: 13,
+              fontWeight: 600,
+              color: isDark ? "#4ade80" : "#16a34a",
+              marginBottom: 8,
+              display: "flex",
+              alignItems: "center",
+              gap: 6,
+            }}
+          >
+            <CheckCircleOutlined />
+            Best Configuration Found
+          </div>
+          <pre
+            style={{
+              margin: 0,
+              padding: 12,
+              background: colors.bg,
+              borderRadius: 8,
+              border: `1px solid ${colors.successBorder}`,
+              fontSize: 12,
+              fontFamily:
+                "'SF Mono', 'Monaco', 'Inconsolata', 'Roboto Mono', monospace",
+              maxHeight: 120,
+              overflow: "auto",
+              color: colors.text,
+            }}
+          >
+            {JSON.stringify(job.best_config, null, 2)}
+          </pre>
+        </div>
+      )}
+    </div>
+  );
+}
diff --git a/frontend/src/components/chat-panel/index.ts b/frontend/src/components/chat-panel/index.ts
new file mode 100644
index 0000000..a198527
--- /dev/null
+++ b/frontend/src/components/chat-panel/index.ts
@@ -0,0 +1,44 @@
+/**
+ * Chat Panel Components
+ *
+ * Global chat panel for AI conversations accessible from any page.
+ */
+
+// Components
+export { ChatPanel, TUNING_JOB_EVENT_KEY } from "./ChatPanel";
+export { ModelSelector } from "./ModelSelector";
+export { ToolConfirmModal } from "./ToolConfirmModal";
+
+// Hooks
+export { useChat } from "./useChat";
+
+// Types
+export type {
+  ChatModelConfig,
+  CustomEndpoint,
+  ChatPanelState,
+  ModelSourceType,
+} from "./types";
+
+export type {
+  ToolDefinition,
+  ToolCall,
+  ToolResult,
+  ToolMeta,
+  PendingToolExecution,
+} from "./tools";
+
+export {
+  DEFAULT_PANEL_WIDTH,
+  MIN_PANEL_WIDTH,
+  MAX_PANEL_WIDTH,
+  CHAT_PANEL_STORAGE_KEY,
+} from "./types";
+
+export {
+  CHAT_TOOLS,
+  TOOL_META,
+  requiresConfirmation,
+  getToolMeta,
+  executeTool,
+} from "./tools";
diff --git a/frontend/src/components/chat-panel/systemContext.ts b/frontend/src/components/chat-panel/systemContext.ts
new file mode 100644
index 0000000..a6631a1
--- /dev/null
+++ b/frontend/src/components/chat-panel/systemContext.ts
@@ -0,0 +1,515 @@
+/**
+ * System Context Builder
+ *
+ * Builds system context for the AI assistant to understand the current
+ * state of the LMStack platform.
+ */
+import { api } from "../../api/client";
+
+export interface SystemContext {
+  workers: WorkerInfo[];
+  deployments: DeploymentInfo[];
+  models: ModelInfo[];
+  containers: ContainerInfo[];
+  images: ImageInfo[];
+  storageVolumes: StorageVolumeInfo[];
+  semanticRouter: SemanticRouterInfo | null;
+  timestamp: string;
+}
+
+interface WorkerInfo {
+  id: number;
+  name: string;
+  host: string;
+  status: string;
+  gpus: GpuInfo[];
+}
+
+interface GpuInfo {
+  index: number;
+  name: string;
+  memoryTotal: number;
+  memoryUsed: number;
+  utilizationGpu: number;
+}
+
+interface DeploymentInfo {
+  id: number;
+  name: string;
+  modelName: string;
+  workerName: string;
+  status: string;
+  endpoint?: string;
+}
+
+interface ModelInfo {
+  id: number;
+  name: string;
+  source: string;
+  parameters?: string;
+  quantization?: string;
+}
+
+interface ContainerInfo {
+  id: string;
+  name: string;
+  image: string;
+  status: string;
+  workerName: string;
+}
+
+interface ImageInfo {
+  id: string;
+  name: string;
+  tag: string;
+  size: number;
+  workerName: string;
+}
+
+interface StorageVolumeInfo {
+  name: string;
+  driver: string;
+  mountpoint: string;
+  workerName: string;
+}
+
+interface SemanticRouterInfo {
+  deployed: boolean;
+  status: string;
+  models: string[];
+}
+
+/**
+ * Fetch current system state
+ */
+export async function fetchSystemContext(): Promise<SystemContext> {
+  try {
+    const [
+      workersRes,
+      deploymentsRes,
+      modelsRes,
+      containersRes,
+      imagesRes,
+      storageRes,
+      srStatus,
+    ] = await Promise.all([
+      api.get("/workers").catch(() => ({ data: { items: [] } })),
+      api.get("/deployments").catch(() => ({ data: { items: [] } })),
+      api.get("/models").catch(() => ({ data: { items: [] } })),
+      api.get("/containers").catch(() => ({ data: { items: [] } })),
+      api.get("/images").catch(() => ({ data: { items: [] } })),
+      api.get("/storage/volumes").catch(() => ({ data: [] })),
+      api.get("/semantic-router/status").catch(() => ({ data: null })),
+    ]);
+
+    const workers: WorkerInfo[] = (workersRes.data.items || []).map(
+      (w: any) => ({
+        id: w.id,
+        name: w.name,
+        host: w.host,
+        status: w.status,
+        gpus: (w.gpu_info || []).map((g: any) => ({
+          index: g.index,
+          name: g.name,
+          memoryTotal: g.memory_total,
+          memoryUsed: g.memory_used,
+          utilizationGpu: g.utilization_gpu,
+        })),
+      }),
+    );
+
+    const deployments: DeploymentInfo[] = (deploymentsRes.data.items || []).map(
+      (d: any) => ({
+        id: d.id,
+        name: d.name,
+        modelName: d.model?.name || d.name,
+        workerName: d.worker?.name || "unknown",
+        status: d.status,
+        endpoint:
+          d.status === "running" ? `/api/deployments/${d.id}/chat` : undefined,
+      }),
+    );
+
+    const models: ModelInfo[] = (modelsRes.data.items || []).map((m: any) => ({
+      id: m.id,
+      name: m.name,
+      source: m.source,
+      parameters: m.parameters,
+      quantization: m.quantization,
+    }));
+
+    const containers: ContainerInfo[] = (containersRes.data.items || []).map(
+      (c: any) => ({
+        id: c.id || c.container_id,
+        name: c.name,
+        image: c.image,
+        status: c.status,
+        workerName: c.worker?.name || c.worker_name || "unknown",
+      }),
+    );
+
+    const images: ImageInfo[] = (imagesRes.data.items || []).map((i: any) => ({
+      id: i.id || i.image_id,
+      name: i.name || i.repository,
+      tag: i.tag || "latest",
+      size: i.size || 0,
+      workerName: i.worker?.name || i.worker_name || "unknown",
+    }));
+
+    // Backend /storage/volumes returns a list directly, not { items: [] }
+    const storageVolumes: StorageVolumeInfo[] = (
+      Array.isArray(storageRes.data) ? storageRes.data : []
+    ).map((v: any) => ({
+      name: v.name,
+      driver: v.driver || "local",
+      mountpoint: v.mountpoint || "",
+      workerName: v.worker_name || "unknown",
+    }));
+
+    const semanticRouter: SemanticRouterInfo | null = srStatus.data
+      ? {
+          deployed: srStatus.data.deployed,
+          status: srStatus.data.status,
+          models: srStatus.data.models || [],
+        }
+      : null;
+
+    return {
+      workers,
+      deployments,
+      models,
+      containers,
+      images,
+      storageVolumes,
+      semanticRouter,
+      timestamp: new Date().toISOString(),
+    };
+  } catch (error) {
+    console.error("Failed to fetch system context:", error);
+    return {
+      workers: [],
+      deployments: [],
+      models: [],
+      containers: [],
+      images: [],
+      storageVolumes: [],
+      semanticRouter: null,
+      timestamp: new Date().toISOString(),
+    };
+  }
+}
+
+/**
+ * Page routes for navigation links
+ */
+const PAGE_ROUTES = {
+  dashboard: "/dashboard",
+  workers: "/workers",
+  containers: "/containers",
+  images: "/images",
+  storage: "/storage",
+  models: "/models",
+  deployments: "/deployments",
+  chat: "/chat",
+  apiKeys: "/api-keys",
+  settings: "/settings",
+};
+
+/**
+ * Action links that open modals directly
+ */
+const ACTION_LINKS = {
+  newDeployment: "/deployments?action=new",
+  newModel: "/models?action=new",
+  newApiKey: "/api-keys?action=new",
+};
+
+/**
+ * Format system context as a system message for the LLM
+ */
+export function formatSystemPrompt(context: SystemContext): string {
+  const lines: string[] = [
+    "You are an AI assistant for the LMStack platform - an LLM deployment and management system.",
+    "You have access to real-time system information and can help users manage their AI infrastructure.",
+    "",
+    "## Navigation Links",
+    "When referencing pages, use markdown links so users can click to navigate:",
+    `- Workers: [Workers](${PAGE_ROUTES.workers})`,
+    `- Docker Containers: [Containers](${PAGE_ROUTES.containers})`,
+    `- Docker Images: [Images](${PAGE_ROUTES.images})`,
+    `- Storage Volumes: [Storage](${PAGE_ROUTES.storage})`,
+    `- Models: [Models](${PAGE_ROUTES.models})`,
+    `- Deployments: [Deployments](${PAGE_ROUTES.deployments})`,
+    `- API Keys: [API Keys](${PAGE_ROUTES.apiKeys})`,
+    "",
+    "## Quick Action Links (ALWAYS use for create/deploy/add operations)",
+    "**CRITICAL: For deploying models, adding models, or creating API keys, ALWAYS guide users to the UI. NEVER use deploy_model, add_model, or create_api_key tools directly.**",
+    "",
+    "These links open the action dialog directly:",
+    `- Deploy a Model: [New Deployment](${ACTION_LINKS.newDeployment})`,
+    `- Add a Model: [Add Model](${ACTION_LINKS.newModel})`,
+    `- Create API Key: [Create API Key](${ACTION_LINKS.newApiKey})`,
+    "",
+    "**TOOL USAGE RULES:**",
+    "- deploy_model, add_model, create_api_key → NEVER use these. Always guide to UI instead.",
+    "- stop_deployment, delete_deployment, stop_container, remove_container, delete_* → OK to use (destructive actions need confirmation)",
+    "- list_*, get_* → OK to use (query tools, no confirmation needed)",
+    "",
+    "**EXAMPLES:**",
+    `- User: '我想部署模型' → '請點擊 [New Deployment](${ACTION_LINKS.newDeployment}) 開啟部署表單。'`,
+    `- User: '幫我部署 Qwen' → '請點擊 [New Deployment](${ACTION_LINKS.newDeployment}) 來部署，選擇 Qwen 模型即可。'`,
+    `- User: '幫我新增模型' → '請點擊 [Add Model](${ACTION_LINKS.newModel}) 來新增模型。'`,
+    "- User: '有哪些模型?' → Use list_models tool",
+    "- User: '停止 deployment 1' → Use stop_deployment tool",
+    "",
+    "## Current System Status",
+    `Last updated: ${new Date(context.timestamp).toLocaleString()}`,
+    "",
+  ];
+
+  // Workers section
+  lines.push("### Workers");
+  if (context.workers.length === 0) {
+    lines.push("No workers registered. Go to [Workers](/workers) to add one.");
+  } else {
+    lines.push(`Total: ${context.workers.length} worker(s)`);
+    for (const worker of context.workers) {
+      lines.push(`- **${worker.name}** (${worker.host}): ${worker.status}`);
+      for (const gpu of worker.gpus) {
+        const memUsedGB = (gpu.memoryUsed / 1024).toFixed(1);
+        const memTotalGB = (gpu.memoryTotal / 1024).toFixed(1);
+        const memFreeGB = ((gpu.memoryTotal - gpu.memoryUsed) / 1024).toFixed(
+          1,
+        );
+        lines.push(
+          `  - GPU ${gpu.index}: ${gpu.name}, Used: ${memUsedGB}GB, Free: ${memFreeGB}GB, Total: ${memTotalGB}GB, Util: ${gpu.utilizationGpu}%`,
+        );
+      }
+    }
+  }
+  lines.push("");
+
+  // Docker Containers section
+  lines.push("### Docker Containers");
+  if (context.containers.length === 0) {
+    lines.push(
+      "No containers running. View [Containers](/containers) page for details.",
+    );
+  } else {
+    const runningContainers = context.containers.filter(
+      (c) =>
+        c.status.toLowerCase().includes("running") ||
+        c.status.toLowerCase().includes("up"),
+    );
+    lines.push(
+      `Total: ${context.containers.length} container(s), Running: ${runningContainers.length}`,
+    );
+    for (const container of context.containers.slice(0, 10)) {
+      lines.push(
+        `- **${container.name}** (${container.image}): ${container.status} on ${container.workerName}`,
+      );
+    }
+    if (context.containers.length > 10) {
+      lines.push(
+        `  ... and ${context.containers.length - 10} more. See [Containers](/containers) for full list.`,
+      );
+    }
+  }
+  lines.push("");
+
+  // Docker Images section
+  lines.push("### Docker Images");
+  if (context.images.length === 0) {
+    lines.push("No images found. View [Images](/images) page for details.");
+  } else {
+    lines.push(`Total: ${context.images.length} image(s)`);
+    const imagesByWorker: Record<string, typeof context.images> = {};
+    for (const img of context.images) {
+      if (!imagesByWorker[img.workerName]) imagesByWorker[img.workerName] = [];
+      imagesByWorker[img.workerName].push(img);
+    }
+    for (const [worker, imgs] of Object.entries(imagesByWorker)) {
+      lines.push(`- ${worker}: ${imgs.length} image(s)`);
+    }
+  }
+  lines.push("");
+
+  // Storage section
+  lines.push("### Storage Volumes");
+  if (context.storageVolumes.length === 0) {
+    lines.push(
+      "No storage volumes. View [Storage](/storage) page for details.",
+    );
+  } else {
+    lines.push(`Total: ${context.storageVolumes.length} volume(s)`);
+  }
+  lines.push("");
+
+  // Deployments section
+  lines.push("### Model Deployments");
+  const activeDeployments = context.deployments.filter(
+    (d) => d.status === "running",
+  );
+  const allDeployments = context.deployments;
+  lines.push(
+    `Total: ${allDeployments.length}, Running: ${activeDeployments.length}`,
+  );
+  if (activeDeployments.length === 0) {
+    lines.push(
+      "No active model deployments. Go to [Deployments](/deployments) to deploy a model.",
+    );
+  } else {
+    for (const dep of activeDeployments) {
+      lines.push(
+        `- **${dep.modelName}** on ${dep.workerName} (ID: ${dep.id}) - running`,
+      );
+    }
+  }
+  lines.push("");
+
+  // Available models section
+  lines.push("### Available Models");
+  if (context.models.length === 0) {
+    lines.push("No models registered. Go to [Models](/models) to add models.");
+  } else {
+    lines.push(`Total: ${context.models.length} model(s)`);
+    const modelsBySource: Record<string, typeof context.models> = {};
+    for (const model of context.models) {
+      if (!modelsBySource[model.source]) {
+        modelsBySource[model.source] = [];
+      }
+      modelsBySource[model.source].push(model);
+    }
+    for (const [source, models] of Object.entries(modelsBySource)) {
+      lines.push(`- **${source}:** ${models.map((m) => m.name).join(", ")}`);
+    }
+  }
+  lines.push("");
+
+  // Semantic Router
+  if (context.semanticRouter) {
+    lines.push("### Semantic Router");
+    lines.push(
+      `Status: ${context.semanticRouter.deployed ? "Deployed" : "Not deployed"}`,
+    );
+    if (context.semanticRouter.models.length > 0) {
+      lines.push(
+        `Connected models: ${context.semanticRouter.models.join(", ")}`,
+      );
+    }
+    lines.push("");
+  }
+
+  // Tool Calling Capabilities
+  lines.push("## Available Actions (Tool Calling)");
+  lines.push(
+    "You have access to tools that allow you to TAKE ACTIONS on the system:",
+  );
+  lines.push("");
+  lines.push("### Query Tools (No confirmation needed)");
+  lines.push("- `get_system_status`: Get complete system overview");
+  lines.push("- `list_workers`: List all workers with GPU information");
+  lines.push(
+    "- `list_containers`: List Docker containers (filter by status/worker_id)",
+  );
+  lines.push("- `list_deployments`: List model deployments (filter by status)");
+  lines.push("- `list_models`: List available models (filter by source)");
+  lines.push("- `get_gpu_status`: Get detailed GPU status");
+  lines.push("");
+  lines.push("### Model Management (Requires user confirmation)");
+  lines.push(
+    "- `add_model`: Add a new model (name, source: huggingface/ollama)",
+  );
+  lines.push("- `delete_model`: Delete a model (model_id)");
+  lines.push("");
+  lines.push("### Deployment Management (Requires user confirmation)");
+  lines.push(
+    "- `deploy_model`: Deploy a model to a worker (model_id, worker_id, gpu_ids?)",
+  );
+  lines.push("- `stop_deployment`: Stop a running deployment (deployment_id)");
+  lines.push(
+    "- `start_deployment`: Start a stopped deployment (deployment_id)",
+  );
+  lines.push(
+    "- `delete_deployment`: Delete a deployment permanently (deployment_id)",
+  );
+  lines.push("");
+  lines.push("### Container Management (Requires user confirmation)");
+  lines.push(
+    "- `stop_container`: Stop a Docker container (container_name, worker_id)",
+  );
+  lines.push(
+    "- `remove_container`: Remove a Docker container (container_name, worker_id, force?)",
+  );
+  lines.push("");
+  lines.push("### API Key Management");
+  lines.push("- `list_api_keys`: List all API keys (No confirmation needed)");
+  lines.push(
+    "- `create_api_key`: Create a new API key (name, description?, expires_in_days?)",
+  );
+  lines.push("- `delete_api_key`: Delete an API key (api_key_id)");
+  lines.push("");
+  lines.push("### Docker Image Management");
+  lines.push(
+    "- `list_images`: List all Docker images (No confirmation needed)",
+  );
+  lines.push("- `pull_image`: Pull a Docker image (worker_id, image)");
+  lines.push(
+    "- `delete_image`: Delete a Docker image (image_id, worker_id, force?)",
+  );
+  lines.push("");
+  lines.push("### Storage Management");
+  lines.push(
+    "- `list_storage_volumes`: List storage volumes (No confirmation needed)",
+  );
+  lines.push(
+    "- `get_disk_usage`: Get disk usage statistics (No confirmation needed)",
+  );
+  lines.push(
+    "- `delete_storage_volume`: Delete a storage volume (volume_name, worker_id, force?)",
+  );
+  lines.push(
+    "- `prune_storage`: Clean up unused Docker resources (images?, containers?, volumes?, build_cache?)",
+  );
+  lines.push("");
+  lines.push("**WORKFLOW FOR CONTAINER/IMAGE OPERATIONS:**");
+  lines.push(
+    "1. If user asks to stop/remove a container by name, FIRST call list_containers to find the worker_id",
+  );
+  lines.push(
+    "2. Then call stop_container or remove_container with both container_name AND worker_id",
+  );
+  lines.push(
+    "3. Same workflow applies to images - use list_images first to find worker_id",
+  );
+  lines.push("");
+  lines.push(
+    "**IMPORTANT:** When users ask you to perform actions, USE THE TOOLS to execute them. The user will see a confirmation dialog before any action is executed.",
+  );
+  lines.push("");
+
+  // Capabilities and instructions
+  lines.push("## Instructions");
+  lines.push(
+    "1. Always use markdown links when mentioning pages (e.g., [Containers](/containers))",
+  );
+  lines.push("2. Provide specific numbers from the system data above");
+  lines.push("3. Be concise and accurate");
+  lines.push(
+    "4. When users ask about 'containers' or 'docker containers', refer to the Docker Containers section",
+  );
+  lines.push(
+    "5. When users ask about 'deployments', distinguish between Docker containers and Model Deployments",
+  );
+  lines.push(
+    "6. When users ask you to deploy a model, USE the deploy_model tool",
+  );
+  lines.push(
+    "7. When users ask you to stop/start/delete a deployment, USE the corresponding tool",
+  );
+  lines.push("8. After executing an action, report the result to the user");
+  lines.push("9. Respond in the same language as the user's query");
+  lines.push("");
+
+  return lines.join("\n");
+}
diff --git a/frontend/src/components/chat-panel/tools.ts b/frontend/src/components/chat-panel/tools.ts
new file mode 100644
index 0000000..125cce1
--- /dev/null
+++ b/frontend/src/components/chat-panel/tools.ts
@@ -0,0 +1,2252 @@
+/**
+ * Chat Tools Definition
+ *
+ * Tools that the AI assistant can call to interact with LMStack.
+ * Includes confirmation flow for dangerous operations.
+ */
+import { api } from "../../api/client";
+import type { ChatModelConfig } from "./types";
+
+/**
+ * Tool definition for OpenAI-compatible API
+ */
+export interface ToolDefinition {
+  type: "function";
+  function: {
+    name: string;
+    description: string;
+    parameters: {
+      type: "object";
+      properties: Record<string, any>;
+      required?: string[];
+    };
+  };
+}
+
+/**
+ * Tool call from LLM response
+ */
+export interface ToolCall {
+  id: string;
+  type: "function";
+  function: {
+    name: string;
+    arguments: string;
+  };
+}
+
+/**
+ * Tool execution result
+ */
+export interface ToolResult {
+  tool_call_id: string;
+  role: "tool";
+  content: string;
+}
+
+/**
+ * Tool metadata for UI display
+ */
+export interface ToolMeta {
+  name: string;
+  displayName: string;
+  description: string;
+  category: "query" | "action";
+  dangerous: boolean;
+  icon: string;
+}
+
+/**
+ * Pending tool execution for confirmation
+ */
+export interface PendingToolExecution {
+  toolCall: ToolCall;
+  parsedArgs: Record<string, any>;
+  meta: ToolMeta;
+}
+
+/**
+ * Tool metadata registry
+ */
+export const TOOL_META: Record<string, ToolMeta> = {
+  // Query tools (no confirmation needed)
+  get_system_status: {
+    name: "get_system_status",
+    displayName: "Get System Status",
+    description: "Query complete system status",
+    category: "query",
+    dangerous: false,
+    icon: "dashboard",
+  },
+  list_workers: {
+    name: "list_workers",
+    displayName: "List Workers",
+    description: "Query all worker nodes",
+    category: "query",
+    dangerous: false,
+    icon: "cluster",
+  },
+  list_containers: {
+    name: "list_containers",
+    displayName: "List Containers",
+    description: "Query Docker containers",
+    category: "query",
+    dangerous: false,
+    icon: "container",
+  },
+  list_deployments: {
+    name: "list_deployments",
+    displayName: "List Deployments",
+    description: "Query model deployments",
+    category: "query",
+    dangerous: false,
+    icon: "rocket",
+  },
+  list_models: {
+    name: "list_models",
+    displayName: "List Models",
+    description: "Query available models",
+    category: "query",
+    dangerous: false,
+    icon: "robot",
+  },
+  get_gpu_status: {
+    name: "get_gpu_status",
+    displayName: "Get GPU Status",
+    description: "Query GPU usage",
+    category: "query",
+    dangerous: false,
+    icon: "thunderbolt",
+  },
+
+  // Action tools (confirmation needed)
+  add_model: {
+    name: "add_model",
+    displayName: "Add Model",
+    description: "Add a new model to the system",
+    category: "action",
+    dangerous: false,
+    icon: "plus",
+  },
+  delete_model: {
+    name: "delete_model",
+    displayName: "Delete Model",
+    description: "Delete a model from the system",
+    category: "action",
+    dangerous: true,
+    icon: "delete",
+  },
+  deploy_model: {
+    name: "deploy_model",
+    displayName: "Deploy Model",
+    description: "Deploy a model to a worker",
+    category: "action",
+    dangerous: false,
+    icon: "rocket",
+  },
+  stop_deployment: {
+    name: "stop_deployment",
+    displayName: "Stop Deployment",
+    description: "Stop a running deployment",
+    category: "action",
+    dangerous: true,
+    icon: "pause-circle",
+  },
+  start_deployment: {
+    name: "start_deployment",
+    displayName: "Start Deployment",
+    description: "Start a stopped deployment",
+    category: "action",
+    dangerous: false,
+    icon: "play-circle",
+  },
+  delete_deployment: {
+    name: "delete_deployment",
+    displayName: "Delete Deployment",
+    description: "Permanently delete a deployment",
+    category: "action",
+    dangerous: true,
+    icon: "delete",
+  },
+  stop_container: {
+    name: "stop_container",
+    displayName: "Stop Container",
+    description: "Stop a Docker container",
+    category: "action",
+    dangerous: true,
+    icon: "stop",
+  },
+  remove_container: {
+    name: "remove_container",
+    displayName: "Remove Container",
+    description: "Remove a Docker container",
+    category: "action",
+    dangerous: true,
+    icon: "delete",
+  },
+
+  // API Key tools
+  list_api_keys: {
+    name: "list_api_keys",
+    displayName: "List API Keys",
+    description: "Query all API keys",
+    category: "query",
+    dangerous: false,
+    icon: "key",
+  },
+  create_api_key: {
+    name: "create_api_key",
+    displayName: "Create API Key",
+    description: "Create a new API key",
+    category: "action",
+    dangerous: false,
+    icon: "plus",
+  },
+  delete_api_key: {
+    name: "delete_api_key",
+    displayName: "Delete API Key",
+    description: "Delete an API key",
+    category: "action",
+    dangerous: true,
+    icon: "delete",
+  },
+
+  // Docker Image tools
+  list_images: {
+    name: "list_images",
+    displayName: "List Images",
+    description: "Query Docker images",
+    category: "query",
+    dangerous: false,
+    icon: "container",
+  },
+  pull_image: {
+    name: "pull_image",
+    displayName: "Pull Image",
+    description: "Pull a Docker image from registry",
+    category: "action",
+    dangerous: false,
+    icon: "download",
+  },
+  delete_image: {
+    name: "delete_image",
+    displayName: "Delete Image",
+    description: "Delete a Docker image",
+    category: "action",
+    dangerous: true,
+    icon: "delete",
+  },
+
+  // Storage tools
+  list_storage_volumes: {
+    name: "list_storage_volumes",
+    displayName: "List Storage Volumes",
+    description: "Query Docker storage volumes",
+    category: "query",
+    dangerous: false,
+    icon: "database",
+  },
+  get_disk_usage: {
+    name: "get_disk_usage",
+    displayName: "Get Disk Usage",
+    description: "Query disk usage statistics",
+    category: "query",
+    dangerous: false,
+    icon: "pie-chart",
+  },
+  delete_storage_volume: {
+    name: "delete_storage_volume",
+    displayName: "Delete Storage Volume",
+    description: "Delete a Docker storage volume",
+    category: "action",
+    dangerous: true,
+    icon: "delete",
+  },
+  prune_storage: {
+    name: "prune_storage",
+    displayName: "Prune Storage",
+    description: "Clean up unused Docker resources",
+    category: "action",
+    dangerous: true,
+    icon: "clear",
+  },
+
+  // Auto-Tuning tools
+  list_tuning_jobs: {
+    name: "list_tuning_jobs",
+    displayName: "List Tuning Jobs",
+    description: "Query all auto-tuning jobs",
+    category: "query",
+    dangerous: false,
+    icon: "thunderbolt",
+  },
+  start_auto_tuning: {
+    name: "start_auto_tuning",
+    displayName: "Start Auto-Tuning",
+    description: "Start a new auto-tuning job",
+    category: "action",
+    dangerous: false,
+    icon: "experiment",
+  },
+  get_tuning_job: {
+    name: "get_tuning_job",
+    displayName: "Get Tuning Job",
+    description: "Get details of a tuning job",
+    category: "query",
+    dangerous: false,
+    icon: "info-circle",
+  },
+  cancel_tuning_job: {
+    name: "cancel_tuning_job",
+    displayName: "Cancel Tuning Job",
+    description: "Cancel a running tuning job",
+    category: "action",
+    dangerous: true,
+    icon: "stop",
+  },
+  query_knowledge_base: {
+    name: "query_knowledge_base",
+    displayName: "Query Knowledge Base",
+    description: "Query performance knowledge base",
+    category: "query",
+    dangerous: false,
+    icon: "database",
+  },
+  run_benchmark: {
+    name: "run_benchmark",
+    displayName: "Run Benchmark",
+    description: "Run performance benchmark on a deployment",
+    category: "action",
+    dangerous: false,
+    icon: "bar-chart",
+  },
+};
+
+/**
+ * Check if a tool requires confirmation
+ */
+export function requiresConfirmation(toolName: string): boolean {
+  const meta = TOOL_META[toolName];
+  return meta?.category === "action";
+}
+
+/**
+ * Get tool metadata
+ */
+export function getToolMeta(toolName: string): ToolMeta {
+  return (
+    TOOL_META[toolName] || {
+      name: toolName,
+      displayName: toolName,
+      description: "Unknown tool",
+      category: "action",
+      dangerous: false,
+      icon: "question",
+    }
+  );
+}
+
+/**
+ * Available tools for the AI assistant
+ */
+export const CHAT_TOOLS: ToolDefinition[] = [
+  // ============== Query Tools ==============
+  {
+    type: "function",
+    function: {
+      name: "get_system_status",
+      description:
+        "Get complete LMStack system status including workers, GPUs, containers, and deployments. Call this to get the latest system information.",
+      parameters: {
+        type: "object",
+        properties: {},
+        required: [],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "list_workers",
+      description:
+        "List all worker nodes with their GPU status, memory usage, and availability.",
+      parameters: {
+        type: "object",
+        properties: {},
+        required: [],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "list_containers",
+      description: "List all Docker containers running across all workers.",
+      parameters: {
+        type: "object",
+        properties: {
+          status: {
+            type: "string",
+            description: "Filter by status: running, stopped, or all",
+            enum: ["running", "stopped", "all"],
+          },
+          worker_id: {
+            type: "number",
+            description: "Optional: Filter by specific worker ID",
+          },
+        },
+        required: [],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "list_deployments",
+      description: "List all model deployments with their status.",
+      parameters: {
+        type: "object",
+        properties: {
+          status: {
+            type: "string",
+            description: "Filter by status: running, stopped, or all",
+            enum: ["running", "stopped", "all"],
+          },
+        },
+        required: [],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "list_models",
+      description: "List all available models that can be deployed.",
+      parameters: {
+        type: "object",
+        properties: {
+          source: {
+            type: "string",
+            description: "Filter by source",
+            enum: ["huggingface", "ollama", "local"],
+          },
+        },
+        required: [],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "get_gpu_status",
+      description:
+        "Get detailed GPU status including memory usage, utilization, and temperature for all workers.",
+      parameters: {
+        type: "object",
+        properties: {
+          worker_id: {
+            type: "number",
+            description: "Optional: Filter by specific worker ID",
+          },
+        },
+        required: [],
+      },
+    },
+  },
+
+  // ============== Model Management Tools ==============
+  {
+    type: "function",
+    function: {
+      name: "add_model",
+      description:
+        "Add a new model to the system. Supports HuggingFace and Ollama models.",
+      parameters: {
+        type: "object",
+        properties: {
+          name: {
+            type: "string",
+            description:
+              "Model name/identifier (e.g., 'Qwen/Qwen2.5-7B-Instruct' for HuggingFace, 'llama3.2' for Ollama)",
+          },
+          source: {
+            type: "string",
+            description: "Model source",
+            enum: ["huggingface", "ollama"],
+          },
+          parameters: {
+            type: "string",
+            description: "Optional: Model parameters (e.g., '7B', '13B')",
+          },
+          quantization: {
+            type: "string",
+            description:
+              "Optional: Quantization format (e.g., 'GPTQ', 'AWQ', 'GGUF')",
+          },
+        },
+        required: ["name", "source"],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "delete_model",
+      description:
+        "Delete a model from the system. This will NOT delete any deployments using this model.",
+      parameters: {
+        type: "object",
+        properties: {
+          model_id: {
+            type: "number",
+            description:
+              "ID of the model to delete (use list_models to find IDs)",
+          },
+        },
+        required: ["model_id"],
+      },
+    },
+  },
+
+  // ============== Deployment Tools ==============
+  {
+    type: "function",
+    function: {
+      name: "deploy_model",
+      description:
+        "Deploy a model to a worker. This will start the model inference service.",
+      parameters: {
+        type: "object",
+        properties: {
+          model_id: {
+            type: "number",
+            description:
+              "ID of the model to deploy (use list_models to find IDs)",
+          },
+          worker_id: {
+            type: "number",
+            description:
+              "ID of the worker to deploy to (use list_workers to find IDs)",
+          },
+          gpu_ids: {
+            type: "array",
+            items: { type: "number" },
+            description:
+              "Optional: Specific GPU indices to use. If not provided, GPUs will be auto-selected.",
+          },
+        },
+        required: ["model_id", "worker_id"],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "stop_deployment",
+      description: "Stop a running model deployment.",
+      parameters: {
+        type: "object",
+        properties: {
+          deployment_id: {
+            type: "number",
+            description:
+              "ID of the deployment to stop (use list_deployments to find IDs)",
+          },
+        },
+        required: ["deployment_id"],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "start_deployment",
+      description: "Start a stopped model deployment.",
+      parameters: {
+        type: "object",
+        properties: {
+          deployment_id: {
+            type: "number",
+            description: "ID of the deployment to start",
+          },
+        },
+        required: ["deployment_id"],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "delete_deployment",
+      description:
+        "Delete a model deployment completely. This cannot be undone.",
+      parameters: {
+        type: "object",
+        properties: {
+          deployment_id: {
+            type: "number",
+            description: "ID of the deployment to delete",
+          },
+        },
+        required: ["deployment_id"],
+      },
+    },
+  },
+
+  // ============== Container Tools ==============
+  {
+    type: "function",
+    function: {
+      name: "stop_container",
+      description:
+        "Stop a running Docker container. If you don't know the worker_id, call list_containers first to find it.",
+      parameters: {
+        type: "object",
+        properties: {
+          container_name: {
+            type: "string",
+            description:
+              "Name of the container to stop (e.g., 'lmstack-llama')",
+          },
+          worker_id: {
+            type: "number",
+            description:
+              "ID of the worker where the container is running. Use list_containers to find this.",
+          },
+        },
+        required: ["container_name", "worker_id"],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "remove_container",
+      description:
+        "Remove/delete a Docker container. If you don't know the worker_id, call list_containers first to find it.",
+      parameters: {
+        type: "object",
+        properties: {
+          container_name: {
+            type: "string",
+            description: "Name of the container to remove",
+          },
+          worker_id: {
+            type: "number",
+            description:
+              "ID of the worker where the container is located. Use list_containers to find this.",
+          },
+          force: {
+            type: "boolean",
+            description: "Force remove even if running (default: false)",
+          },
+        },
+        required: ["container_name", "worker_id"],
+      },
+    },
+  },
+
+  // ============== API Key Tools ==============
+  {
+    type: "function",
+    function: {
+      name: "list_api_keys",
+      description:
+        "List all API keys in the system with their usage statistics.",
+      parameters: {
+        type: "object",
+        properties: {},
+        required: [],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "create_api_key",
+      description: "Create a new API key for accessing the LMStack API.",
+      parameters: {
+        type: "object",
+        properties: {
+          name: {
+            type: "string",
+            description:
+              "Name for the API key (e.g., 'production-key', 'test-key')",
+          },
+          description: {
+            type: "string",
+            description: "Optional description for the API key",
+          },
+          expires_in_days: {
+            type: "number",
+            description:
+              "Optional: Number of days until the key expires. If not set, the key never expires.",
+          },
+        },
+        required: ["name"],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "delete_api_key",
+      description: "Delete an API key from the system.",
+      parameters: {
+        type: "object",
+        properties: {
+          api_key_id: {
+            type: "number",
+            description:
+              "ID of the API key to delete (use list_api_keys to find IDs)",
+          },
+        },
+        required: ["api_key_id"],
+      },
+    },
+  },
+
+  // ============== Docker Image Tools ==============
+  {
+    type: "function",
+    function: {
+      name: "list_images",
+      description: "List all Docker images across all workers.",
+      parameters: {
+        type: "object",
+        properties: {
+          worker_id: {
+            type: "number",
+            description: "Optional: Filter by specific worker ID",
+          },
+          repository: {
+            type: "string",
+            description: "Optional: Filter by repository name",
+          },
+        },
+        required: [],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "pull_image",
+      description: "Pull a Docker image from a registry to a worker.",
+      parameters: {
+        type: "object",
+        properties: {
+          worker_id: {
+            type: "number",
+            description: "ID of the worker to pull the image to",
+          },
+          image: {
+            type: "string",
+            description:
+              "Image reference (e.g., 'nginx:latest', 'python:3.11')",
+          },
+        },
+        required: ["worker_id", "image"],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "delete_image",
+      description: "Delete a Docker image from a worker.",
+      parameters: {
+        type: "object",
+        properties: {
+          image_id: {
+            type: "string",
+            description: "ID or name of the image to delete",
+          },
+          worker_id: {
+            type: "number",
+            description: "ID of the worker where the image is located",
+          },
+          force: {
+            type: "boolean",
+            description:
+              "Force removal even if image is in use (default: false)",
+          },
+        },
+        required: ["image_id", "worker_id"],
+      },
+    },
+  },
+
+  // ============== Storage Tools ==============
+  {
+    type: "function",
+    function: {
+      name: "list_storage_volumes",
+      description: "List all Docker storage volumes across all workers.",
+      parameters: {
+        type: "object",
+        properties: {
+          worker_id: {
+            type: "number",
+            description: "Optional: Filter by specific worker ID",
+          },
+        },
+        required: [],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "get_disk_usage",
+      description:
+        "Get Docker disk usage statistics including images, containers, volumes, and build cache.",
+      parameters: {
+        type: "object",
+        properties: {
+          worker_id: {
+            type: "number",
+            description: "Optional: Filter by specific worker ID",
+          },
+        },
+        required: [],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "delete_storage_volume",
+      description: "Delete a Docker storage volume from a worker.",
+      parameters: {
+        type: "object",
+        properties: {
+          volume_name: {
+            type: "string",
+            description: "Name of the volume to delete",
+          },
+          worker_id: {
+            type: "number",
+            description: "ID of the worker where the volume is located",
+          },
+          force: {
+            type: "boolean",
+            description: "Force removal (default: false)",
+          },
+        },
+        required: ["volume_name", "worker_id"],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "prune_storage",
+      description:
+        "Clean up unused Docker resources (images, containers, volumes, build cache) to free disk space.",
+      parameters: {
+        type: "object",
+        properties: {
+          worker_id: {
+            type: "number",
+            description:
+              "Optional: Only prune on specific worker. If not set, prunes on all workers.",
+          },
+          images: {
+            type: "boolean",
+            description: "Prune unused images (default: true)",
+          },
+          containers: {
+            type: "boolean",
+            description: "Prune stopped containers (default: true)",
+          },
+          volumes: {
+            type: "boolean",
+            description: "Prune unused volumes (default: false - be careful!)",
+          },
+          build_cache: {
+            type: "boolean",
+            description: "Prune build cache (default: true)",
+          },
+        },
+        required: [],
+      },
+    },
+  },
+
+  // ============== Auto-Tuning Tools ==============
+  {
+    type: "function",
+    function: {
+      name: "list_tuning_jobs",
+      description: "List all auto-tuning jobs with their status and progress.",
+      parameters: {
+        type: "object",
+        properties: {
+          status: {
+            type: "string",
+            description: "Filter by status",
+            enum: [
+              "pending",
+              "analyzing",
+              "querying_kb",
+              "exploring",
+              "benchmarking",
+              "completed",
+              "failed",
+              "cancelled",
+            ],
+          },
+        },
+        required: [],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "start_auto_tuning",
+      description:
+        "Start a new auto-tuning job to find the best deployment configuration for a model. The agent will analyze the environment, query the knowledge base, explore configuration space, run benchmarks, and find the optimal settings.",
+      parameters: {
+        type: "object",
+        properties: {
+          model_id: {
+            type: "number",
+            description:
+              "ID of the model to tune (use list_models to find IDs)",
+          },
+          worker_id: {
+            type: "number",
+            description:
+              "ID of the worker to use for tuning (use list_workers to find IDs)",
+          },
+          optimization_target: {
+            type: "string",
+            description: "What to optimize for",
+            enum: ["throughput", "latency", "cost", "balanced"],
+          },
+        },
+        required: ["model_id", "worker_id"],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "get_tuning_job",
+      description:
+        "Get detailed information about a specific tuning job including progress, best configuration, and all results.",
+      parameters: {
+        type: "object",
+        properties: {
+          job_id: {
+            type: "number",
+            description: "ID of the tuning job",
+          },
+        },
+        required: ["job_id"],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "cancel_tuning_job",
+      description: "Cancel a running auto-tuning job.",
+      parameters: {
+        type: "object",
+        properties: {
+          job_id: {
+            type: "number",
+            description: "ID of the tuning job to cancel",
+          },
+        },
+        required: ["job_id"],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "query_knowledge_base",
+      description:
+        "Query the performance knowledge base to find similar configurations and their benchmark results. This uses transfer learning from previous tuning results.",
+      parameters: {
+        type: "object",
+        properties: {
+          model_name: {
+            type: "string",
+            description: "Model name pattern to match (e.g., 'Qwen', 'Llama')",
+          },
+          model_family: {
+            type: "string",
+            description: "Model family: Qwen, Llama, Mistral, etc.",
+          },
+          gpu_model: {
+            type: "string",
+            description: "GPU model pattern (e.g., 'RTX 4090', 'A100')",
+          },
+          optimization_target: {
+            type: "string",
+            description: "Optimization target for scoring",
+            enum: ["throughput", "latency", "cost", "balanced"],
+          },
+          limit: {
+            type: "number",
+            description: "Maximum number of results to return (default: 10)",
+          },
+        },
+        required: [],
+      },
+    },
+  },
+  {
+    type: "function",
+    function: {
+      name: "run_benchmark",
+      description:
+        "Run a performance benchmark on a deployment to measure throughput, latency, and resource usage.",
+      parameters: {
+        type: "object",
+        properties: {
+          deployment_id: {
+            type: "number",
+            description:
+              "ID of the deployment to benchmark (use list_deployments to find IDs)",
+          },
+          test_type: {
+            type: "string",
+            description: "Type of benchmark test",
+            enum: ["throughput", "latency"],
+          },
+          duration_seconds: {
+            type: "number",
+            description: "Test duration in seconds (10-600, default: 60)",
+          },
+          input_length: {
+            type: "number",
+            description: "Input token length (default: 512)",
+          },
+          output_length: {
+            type: "number",
+            description: "Output token length (default: 128)",
+          },
+          concurrency: {
+            type: "number",
+            description: "Number of concurrent requests (1-64, default: 1)",
+          },
+        },
+        required: ["deployment_id"],
+      },
+    },
+  },
+];
+
+/**
+ * Execute a tool call and return the result
+ * @param toolCall - The tool call to execute
+ * @param modelConfig - Optional model config for tools that need LLM access (like auto-tuning)
+ */
+export async function executeTool(
+  toolCall: ToolCall,
+  modelConfig?: ChatModelConfig,
+): Promise<ToolResult> {
+  const { name, arguments: argsStr } = toolCall.function;
+  let args: Record<string, any> = {};
+
+  try {
+    args = JSON.parse(argsStr);
+  } catch {
+    return {
+      tool_call_id: toolCall.id,
+      role: "tool",
+      content: `Error: Invalid arguments JSON: ${argsStr}`,
+    };
+  }
+
+  try {
+    let result: string;
+
+    switch (name) {
+      // Query tools
+      case "get_system_status":
+        result = await getSystemStatus();
+        break;
+
+      case "list_workers":
+        result = await listWorkers();
+        break;
+
+      case "list_containers":
+        result = await listContainers(args.status, args.worker_id);
+        break;
+
+      case "list_deployments":
+        result = await listDeployments(args.status);
+        break;
+
+      case "list_models":
+        result = await listModels(args.source);
+        break;
+
+      case "get_gpu_status":
+        result = await getGpuStatus(args.worker_id);
+        break;
+
+      // Model management tools
+      case "add_model":
+        result = await addModel(
+          args.name,
+          args.source,
+          args.parameters,
+          args.quantization,
+        );
+        break;
+
+      case "delete_model":
+        result = await deleteModel(args.model_id);
+        break;
+
+      // Deployment tools
+      case "deploy_model":
+        result = await deployModel(args.model_id, args.worker_id, args.gpu_ids);
+        break;
+
+      case "stop_deployment":
+        result = await stopDeployment(args.deployment_id);
+        break;
+
+      case "start_deployment":
+        result = await startDeployment(args.deployment_id);
+        break;
+
+      case "delete_deployment":
+        result = await deleteDeployment(args.deployment_id);
+        break;
+
+      // Container tools
+      case "stop_container":
+        result = await stopContainer(args.container_name, args.worker_id);
+        break;
+
+      case "remove_container":
+        result = await removeContainer(
+          args.container_name,
+          args.worker_id,
+          args.force,
+        );
+        break;
+
+      // API Key tools
+      case "list_api_keys":
+        result = await listApiKeys();
+        break;
+
+      case "create_api_key":
+        result = await createApiKey(
+          args.name,
+          args.description,
+          args.expires_in_days,
+        );
+        break;
+
+      case "delete_api_key":
+        result = await deleteApiKey(args.api_key_id);
+        break;
+
+      // Docker Image tools
+      case "list_images":
+        result = await listImages(args.worker_id, args.repository);
+        break;
+
+      case "pull_image":
+        result = await pullImage(args.worker_id, args.image);
+        break;
+
+      case "delete_image":
+        result = await deleteImage(args.image_id, args.worker_id, args.force);
+        break;
+
+      // Storage tools
+      case "list_storage_volumes":
+        result = await listStorageVolumes(args.worker_id);
+        break;
+
+      case "get_disk_usage":
+        result = await getDiskUsage(args.worker_id);
+        break;
+
+      case "delete_storage_volume":
+        result = await deleteStorageVolume(
+          args.volume_name,
+          args.worker_id,
+          args.force,
+        );
+        break;
+
+      case "prune_storage":
+        result = await pruneStorage(
+          args.worker_id,
+          args.images,
+          args.containers,
+          args.volumes,
+          args.build_cache,
+        );
+        break;
+
+      // Auto-Tuning tools
+      case "list_tuning_jobs":
+        result = await listTuningJobs(args.status);
+        break;
+
+      case "start_auto_tuning":
+        result = await startAutoTuning(
+          args.model_id,
+          args.worker_id,
+          args.optimization_target,
+          modelConfig,
+        );
+        break;
+
+      case "get_tuning_job":
+        result = await getTuningJob(args.job_id);
+        break;
+
+      case "cancel_tuning_job":
+        result = await cancelTuningJob(args.job_id);
+        break;
+
+      case "query_knowledge_base":
+        result = await queryKnowledgeBase(
+          args.model_name,
+          args.model_family,
+          args.gpu_model,
+          args.optimization_target,
+          args.limit,
+        );
+        break;
+
+      case "run_benchmark":
+        result = await runBenchmark(
+          args.deployment_id,
+          args.test_type,
+          args.duration_seconds,
+          args.input_length,
+          args.output_length,
+          args.concurrency,
+        );
+        break;
+
+      default:
+        result = `Unknown tool: ${name}`;
+    }
+
+    return {
+      tool_call_id: toolCall.id,
+      role: "tool",
+      content: result,
+    };
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    return {
+      tool_call_id: toolCall.id,
+      role: "tool",
+      content: `Error executing ${name}: ${message}`,
+    };
+  }
+}
+
+// ============================================================================
+// Tool Implementations
+// ============================================================================
+
+async function getSystemStatus(): Promise<string> {
+  const [workers, containers, deployments, models] = await Promise.all([
+    api.get("/workers").then((r) => r.data.items || []),
+    api.get("/containers").then((r) => r.data.items || []),
+    api.get("/deployments").then((r) => r.data.items || []),
+    api.get("/models").then((r) => r.data.items || []),
+  ]);
+
+  const onlineWorkers = workers.filter((w: any) => w.status === "online");
+  const runningContainers = containers.filter(
+    (c: any) =>
+      c.status?.toLowerCase().includes("running") ||
+      c.status?.toLowerCase().includes("up"),
+  );
+  const runningDeployments = deployments.filter(
+    (d: any) => d.status === "running",
+  );
+
+  let totalGpuMem = 0,
+    usedGpuMem = 0;
+  for (const w of workers) {
+    for (const g of w.gpu_info || []) {
+      totalGpuMem += g.memory_total || 0;
+      usedGpuMem += g.memory_used || 0;
+    }
+  }
+
+  return JSON.stringify(
+    {
+      summary: {
+        workers: `${onlineWorkers.length}/${workers.length} online`,
+        containers: `${runningContainers.length}/${containers.length} running`,
+        deployments: `${runningDeployments.length}/${deployments.length} running`,
+        models: `${models.length} available`,
+        gpu_memory: {
+          used_gb: (usedGpuMem / 1024).toFixed(1),
+          free_gb: ((totalGpuMem - usedGpuMem) / 1024).toFixed(1),
+          total_gb: (totalGpuMem / 1024).toFixed(1),
+        },
+      },
+      workers: workers.map((w: any) => ({
+        id: w.id,
+        name: w.name,
+        status: w.status,
+        gpus: (w.gpu_info || []).map((g: any) => ({
+          index: g.index,
+          name: g.name,
+          memory_used_gb: (g.memory_used / 1024).toFixed(1),
+          memory_free_gb: ((g.memory_total - g.memory_used) / 1024).toFixed(1),
+          memory_total_gb: (g.memory_total / 1024).toFixed(1),
+          utilization: g.utilization_gpu,
+        })),
+      })),
+      running_deployments: runningDeployments.map((d: any) => ({
+        id: d.id,
+        model: d.model?.name || d.name,
+        worker: d.worker?.name,
+      })),
+    },
+    null,
+    2,
+  );
+}
+
+async function listWorkers(): Promise<string> {
+  const response = await api.get("/workers");
+  const workers = response.data.items || [];
+
+  return JSON.stringify(
+    workers.map((w: any) => ({
+      id: w.id,
+      name: w.name,
+      host: w.host,
+      status: w.status,
+      gpus: (w.gpu_info || []).map((g: any) => ({
+        index: g.index,
+        name: g.name,
+        memory_used_gb: (g.memory_used / 1024).toFixed(1),
+        memory_free_gb: ((g.memory_total - g.memory_used) / 1024).toFixed(1),
+        memory_total_gb: (g.memory_total / 1024).toFixed(1),
+        utilization_percent: g.utilization_gpu,
+      })),
+    })),
+    null,
+    2,
+  );
+}
+
+async function listContainers(
+  status?: string,
+  workerId?: number,
+): Promise<string> {
+  const response = await api.get("/containers");
+  let containers = response.data.items || [];
+
+  if (workerId) {
+    containers = containers.filter(
+      (c: any) => c.worker?.id === workerId || c.worker_id === workerId,
+    );
+  }
+
+  if (status && status !== "all") {
+    containers = containers.filter((c: any) => {
+      const s = c.status?.toLowerCase() || "";
+      if (status === "running") {
+        return s.includes("running") || s.includes("up");
+      }
+      return s.includes(status);
+    });
+  }
+
+  return JSON.stringify(
+    containers.map((c: any) => ({
+      id: c.id?.substring(0, 12),
+      name: c.name,
+      image: c.image,
+      status: c.status,
+      worker: c.worker?.name || c.worker_name,
+      worker_id: c.worker?.id || c.worker_id,
+    })),
+    null,
+    2,
+  );
+}
+
+async function listDeployments(status?: string): Promise<string> {
+  const response = await api.get("/deployments");
+  let deployments = response.data.items || [];
+
+  if (status && status !== "all") {
+    deployments = deployments.filter((d: any) => d.status === status);
+  }
+
+  return JSON.stringify(
+    deployments.map((d: any) => ({
+      id: d.id,
+      name: d.name,
+      model: d.model?.name,
+      model_id: d.model?.id,
+      worker: d.worker?.name,
+      worker_id: d.worker?.id,
+      status: d.status,
+      gpu_ids: d.gpu_ids,
+      port: d.port,
+      created_at: d.created_at,
+    })),
+    null,
+    2,
+  );
+}
+
+async function listModels(source?: string): Promise<string> {
+  const response = await api.get("/models");
+  let models = response.data.items || [];
+
+  if (source) {
+    models = models.filter((m: any) => m.source === source);
+  }
+
+  return JSON.stringify(
+    models.map((m: any) => ({
+      id: m.id,
+      name: m.name,
+      source: m.source,
+      parameters: m.parameters,
+      quantization: m.quantization,
+    })),
+    null,
+    2,
+  );
+}
+
+async function getGpuStatus(workerId?: number): Promise<string> {
+  const response = await api.get("/workers");
+  let workers = response.data.items || [];
+
+  if (workerId) {
+    workers = workers.filter((w: any) => w.id === workerId);
+  }
+
+  const result = workers.map((w: any) => ({
+    worker_id: w.id,
+    worker_name: w.name,
+    status: w.status,
+    gpus: (w.gpu_info || []).map((g: any) => ({
+      index: g.index,
+      name: g.name,
+      memory_used_gb: (g.memory_used / 1024).toFixed(1),
+      memory_free_gb: ((g.memory_total - g.memory_used) / 1024).toFixed(1),
+      memory_total_gb: (g.memory_total / 1024).toFixed(1),
+      utilization_percent: g.utilization_gpu,
+      temperature: g.temperature,
+    })),
+  }));
+
+  return JSON.stringify(result, null, 2);
+}
+
+async function deployModel(
+  modelId: number,
+  workerId: number,
+  gpuIds?: number[],
+): Promise<string> {
+  if (!modelId || !workerId) {
+    return "Error: model_id and worker_id are required";
+  }
+
+  const response = await api.post("/deployments", {
+    model_id: modelId,
+    worker_id: workerId,
+    gpu_ids: gpuIds,
+  });
+
+  const deployment = response.data;
+  const deploymentId = deployment.id;
+
+  // Poll deployment status for up to 60 seconds
+  const maxPollTime = 60000;
+  const pollInterval = 3000;
+  const startTime = Date.now();
+  let lastStatus = deployment.status;
+  const statusUpdates: string[] = [`Initial status: ${deployment.status}`];
+
+  while (Date.now() - startTime < maxPollTime) {
+    await new Promise((resolve) => setTimeout(resolve, pollInterval));
+
+    try {
+      const statusResponse = await api.get(`/deployments/${deploymentId}`);
+      const currentStatus = statusResponse.data.status;
+      const statusMessage = statusResponse.data.status_message;
+
+      if (currentStatus !== lastStatus) {
+        statusUpdates.push(
+          `Status changed: ${lastStatus} → ${currentStatus}${statusMessage ? ` (${statusMessage})` : ""}`,
+        );
+        lastStatus = currentStatus;
+      }
+
+      // Stop polling if deployment reached a terminal state
+      if (["running", "error", "stopped"].includes(currentStatus)) {
+        return JSON.stringify(
+          {
+            success: currentStatus === "running",
+            message:
+              currentStatus === "running"
+                ? `Deployment completed successfully! Model is now running.`
+                : currentStatus === "error"
+                  ? `Deployment failed: ${statusMessage || "Unknown error"}`
+                  : `Deployment stopped`,
+            deployment: {
+              id: deploymentId,
+              status: currentStatus,
+              status_message: statusMessage,
+              model: deployment.model?.name,
+              worker: deployment.worker?.name,
+              port: statusResponse.data.port,
+            },
+            status_history: statusUpdates,
+          },
+          null,
+          2,
+        );
+      }
+    } catch (error) {
+      // Continue polling even if one request fails
+    }
+  }
+
+  // Timeout - return current status
+  return JSON.stringify(
+    {
+      success: false,
+      message: `Deployment is still in progress (status: ${lastStatus}). Check [Deployments](/deployments) page for updates.`,
+      deployment: {
+        id: deploymentId,
+        status: lastStatus,
+        model: deployment.model?.name,
+        worker: deployment.worker?.name,
+      },
+      status_history: statusUpdates,
+      note: "Deployment is taking longer than expected. This is normal for large models that need to be downloaded.",
+    },
+    null,
+    2,
+  );
+}
+
+async function stopDeployment(deploymentId: number): Promise<string> {
+  if (!deploymentId) {
+    return "Error: deployment_id is required";
+  }
+
+  await api.post(`/deployments/${deploymentId}/stop`);
+  return JSON.stringify(
+    {
+      success: true,
+      message: `Deployment ${deploymentId} stopped successfully`,
+    },
+    null,
+    2,
+  );
+}
+
+async function startDeployment(deploymentId: number): Promise<string> {
+  if (!deploymentId) {
+    return "Error: deployment_id is required";
+  }
+
+  await api.post(`/deployments/${deploymentId}/start`);
+  return JSON.stringify(
+    {
+      success: true,
+      message: `Deployment ${deploymentId} started successfully`,
+    },
+    null,
+    2,
+  );
+}
+
+async function deleteDeployment(deploymentId: number): Promise<string> {
+  if (!deploymentId) {
+    return "Error: deployment_id is required";
+  }
+
+  await api.delete(`/deployments/${deploymentId}`);
+  return JSON.stringify(
+    {
+      success: true,
+      message: `Deployment ${deploymentId} deleted successfully`,
+    },
+    null,
+    2,
+  );
+}
+
+// ============================================================================
+// Model Management Tools
+// ============================================================================
+
+async function addModel(
+  name: string,
+  source: string,
+  parameters?: string,
+  quantization?: string,
+): Promise<string> {
+  if (!name || !source) {
+    return "Error: name and source are required";
+  }
+
+  const response = await api.post("/models", {
+    name,
+    source,
+    parameters,
+    quantization,
+  });
+
+  const model = response.data;
+  return JSON.stringify(
+    {
+      success: true,
+      message: `Model added successfully`,
+      model: {
+        id: model.id,
+        name: model.name,
+        source: model.source,
+        parameters: model.parameters,
+        quantization: model.quantization,
+      },
+    },
+    null,
+    2,
+  );
+}
+
+async function deleteModel(modelId: number): Promise<string> {
+  if (!modelId) {
+    return "Error: model_id is required";
+  }
+
+  await api.delete(`/models/${modelId}`);
+  return JSON.stringify(
+    {
+      success: true,
+      message: `Model ${modelId} deleted successfully`,
+    },
+    null,
+    2,
+  );
+}
+
+// ============================================================================
+// Container Tools
+// ============================================================================
+
+async function stopContainer(
+  containerName: string,
+  workerId: number,
+): Promise<string> {
+  if (!containerName || !workerId) {
+    return "Error: container_name and worker_id are required";
+  }
+
+  // Backend expects: POST /containers/{container_id}/stop?worker_id=X
+  await api.post(
+    `/containers/${encodeURIComponent(containerName)}/stop`,
+    null,
+    {
+      params: { worker_id: workerId },
+    },
+  );
+
+  return JSON.stringify(
+    {
+      success: true,
+      message: `Container "${containerName}" stopped successfully`,
+    },
+    null,
+    2,
+  );
+}
+
+async function removeContainer(
+  containerName: string,
+  workerId: number,
+  force?: boolean,
+): Promise<string> {
+  if (!containerName || !workerId) {
+    return "Error: container_name and worker_id are required";
+  }
+
+  // Backend expects: DELETE /containers/{container_id}?worker_id=X&force=Y
+  await api.delete(`/containers/${encodeURIComponent(containerName)}`, {
+    params: { worker_id: workerId, force: force || false },
+  });
+
+  return JSON.stringify(
+    {
+      success: true,
+      message: `Container "${containerName}" removed successfully`,
+    },
+    null,
+    2,
+  );
+}
+
+// ============================================================================
+// API Key Tools
+// ============================================================================
+
+async function listApiKeys(): Promise<string> {
+  const response = await api.get("/api-keys");
+  const apiKeys = response.data.items || [];
+
+  return JSON.stringify(
+    {
+      total: response.data.total || apiKeys.length,
+      api_keys: apiKeys.map((k: any) => ({
+        id: k.id,
+        name: k.name,
+        description: k.description,
+        access_key: k.access_key,
+        expires_at: k.expires_at,
+        created_at: k.created_at,
+        last_used_at: k.last_used_at,
+      })),
+    },
+    null,
+    2,
+  );
+}
+
+async function createApiKey(
+  name: string,
+  description?: string,
+  expiresInDays?: number,
+): Promise<string> {
+  if (!name) {
+    return "Error: name is required";
+  }
+
+  const response = await api.post("/api-keys", {
+    name,
+    description,
+    expires_in_days: expiresInDays,
+  });
+
+  const apiKey = response.data;
+  return JSON.stringify(
+    {
+      success: true,
+      message: "API key created successfully",
+      api_key: {
+        id: apiKey.id,
+        name: apiKey.name,
+        access_key: apiKey.access_key,
+        full_key: apiKey.api_key, // The full key is only shown once!
+        expires_at: apiKey.expires_at,
+      },
+      warning: "Save the full API key now! It will not be shown again.",
+    },
+    null,
+    2,
+  );
+}
+
+async function deleteApiKey(apiKeyId: number): Promise<string> {
+  if (!apiKeyId) {
+    return "Error: api_key_id is required";
+  }
+
+  await api.delete(`/api-keys/${apiKeyId}`);
+  return JSON.stringify(
+    {
+      success: true,
+      message: `API key ${apiKeyId} deleted successfully`,
+    },
+    null,
+    2,
+  );
+}
+
+// ============================================================================
+// Docker Image Tools
+// ============================================================================
+
+async function listImages(
+  workerId?: number,
+  repository?: string,
+): Promise<string> {
+  const params: any = {};
+  if (workerId) params.worker_id = workerId;
+  if (repository) params.repository = repository;
+
+  const response = await api.get("/images", { params });
+  const images = response.data.items || [];
+
+  return JSON.stringify(
+    {
+      total: response.data.total || images.length,
+      images: images.map((img: any) => ({
+        id: img.id?.substring(0, 12),
+        repository: img.repository,
+        tag: img.tag,
+        full_name: img.full_name,
+        size_mb: (img.size / 1024 / 1024).toFixed(1),
+        created_at: img.created_at,
+        worker: img.worker_name,
+        worker_id: img.worker_id,
+      })),
+    },
+    null,
+    2,
+  );
+}
+
+async function pullImage(workerId: number, image: string): Promise<string> {
+  if (!workerId || !image) {
+    return "Error: worker_id and image are required";
+  }
+
+  const response = await api.post("/images/pull", {
+    worker_id: workerId,
+    image,
+  });
+
+  return JSON.stringify(
+    {
+      success: true,
+      message: `Image "${image}" pulled successfully`,
+      image: response.data.image,
+    },
+    null,
+    2,
+  );
+}
+
+async function deleteImage(
+  imageId: string,
+  workerId: number,
+  force?: boolean,
+): Promise<string> {
+  if (!imageId || !workerId) {
+    return "Error: image_id and worker_id are required";
+  }
+
+  await api.delete(`/images/${encodeURIComponent(imageId)}`, {
+    params: { worker_id: workerId, force: force || false },
+  });
+
+  return JSON.stringify(
+    {
+      success: true,
+      message: `Image "${imageId}" deleted successfully`,
+    },
+    null,
+    2,
+  );
+}
+
+// ============================================================================
+// Storage Tools
+// ============================================================================
+
+async function listStorageVolumes(workerId?: number): Promise<string> {
+  const params: any = {};
+  if (workerId) params.worker_id = workerId;
+
+  const response = await api.get("/storage/volumes", { params });
+  const volumes = Array.isArray(response.data) ? response.data : [];
+
+  return JSON.stringify(
+    {
+      total: volumes.length,
+      volumes: volumes.map((v: any) => ({
+        name: v.name,
+        driver: v.driver,
+        mountpoint: v.mountpoint,
+        created_at: v.created_at,
+        worker: v.worker_name,
+        worker_id: v.worker_id,
+      })),
+    },
+    null,
+    2,
+  );
+}
+
+async function getDiskUsage(workerId?: number): Promise<string> {
+  const params: any = {};
+  if (workerId) params.worker_id = workerId;
+
+  const response = await api.get("/storage/disk-usage", { params });
+  const usageList = Array.isArray(response.data) ? response.data : [];
+
+  const formatSize = (bytes: number) => {
+    if (bytes >= 1024 * 1024 * 1024) {
+      return `${(bytes / 1024 / 1024 / 1024).toFixed(2)} GB`;
+    }
+    return `${(bytes / 1024 / 1024).toFixed(2)} MB`;
+  };
+
+  return JSON.stringify(
+    {
+      workers: usageList.map((u: any) => ({
+        worker: u.worker_name,
+        worker_id: u.worker_id,
+        images: {
+          count: u.images.count,
+          size: formatSize(u.images.size),
+          reclaimable: formatSize(u.images.reclaimable),
+        },
+        containers: {
+          count: u.containers.count,
+          size: formatSize(u.containers.size),
+          reclaimable: formatSize(u.containers.reclaimable),
+        },
+        volumes: {
+          count: u.volumes.count,
+          size: formatSize(u.volumes.size),
+          reclaimable: formatSize(u.volumes.reclaimable),
+        },
+        build_cache: {
+          count: u.build_cache.count,
+          size: formatSize(u.build_cache.size),
+          reclaimable: formatSize(u.build_cache.reclaimable),
+        },
+        total_size: formatSize(u.total_size),
+        total_reclaimable: formatSize(u.total_reclaimable),
+      })),
+    },
+    null,
+    2,
+  );
+}
+
+async function deleteStorageVolume(
+  volumeName: string,
+  workerId: number,
+  force?: boolean,
+): Promise<string> {
+  if (!volumeName || !workerId) {
+    return "Error: volume_name and worker_id are required";
+  }
+
+  await api.delete(`/storage/volumes/${encodeURIComponent(volumeName)}`, {
+    params: { worker_id: workerId, force: force || false },
+  });
+
+  return JSON.stringify(
+    {
+      success: true,
+      message: `Volume "${volumeName}" deleted successfully`,
+    },
+    null,
+    2,
+  );
+}
+
+async function pruneStorage(
+  workerId?: number,
+  images: boolean = true,
+  containers: boolean = true,
+  volumes: boolean = false,
+  buildCache: boolean = true,
+): Promise<string> {
+  const params: any = {};
+  if (workerId) params.worker_id = workerId;
+
+  const response = await api.post(
+    "/storage/prune",
+    {
+      images,
+      containers,
+      volumes,
+      build_cache: buildCache,
+    },
+    { params },
+  );
+
+  const results = Array.isArray(response.data) ? response.data : [];
+
+  const formatSize = (bytes: number) => {
+    if (bytes >= 1024 * 1024 * 1024) {
+      return `${(bytes / 1024 / 1024 / 1024).toFixed(2)} GB`;
+    }
+    return `${(bytes / 1024 / 1024).toFixed(2)} MB`;
+  };
+
+  return JSON.stringify(
+    {
+      success: true,
+      message: "Storage pruned successfully",
+      results: results.map((r: any) => ({
+        worker: r.worker_name,
+        images_deleted: r.images_deleted,
+        containers_deleted: r.containers_deleted,
+        volumes_deleted: r.volumes_deleted,
+        build_cache_deleted: r.build_cache_deleted,
+        space_reclaimed: formatSize(r.space_reclaimed),
+      })),
+    },
+    null,
+    2,
+  );
+}
+
+// ============================================================================
+// Auto-Tuning Tools
+// ============================================================================
+
+async function listTuningJobs(status?: string): Promise<string> {
+  const response = await api.get("/auto-tuning/jobs");
+  let jobs = response.data.items || [];
+
+  if (status) {
+    jobs = jobs.filter((j: any) => j.status === status);
+  }
+
+  return JSON.stringify(
+    {
+      total: jobs.length,
+      jobs: jobs.map((j: any) => ({
+        id: j.id,
+        model: j.model_name,
+        worker: j.worker_name,
+        optimization_target: j.optimization_target,
+        status: j.status,
+        progress: j.progress
+          ? {
+              step: j.progress.step,
+              total_steps: j.progress.total_steps,
+              step_name: j.progress.step_name,
+              configs_tested: j.progress.configs_tested,
+              configs_total: j.progress.configs_total,
+              best_score: j.progress.best_score_so_far,
+            }
+          : null,
+        created_at: j.created_at,
+      })),
+      note: "View detailed results at [Auto-Tuning](/auto-tuning) page",
+    },
+    null,
+    2,
+  );
+}
+
+async function startAutoTuning(
+  modelId: number,
+  workerId: number,
+  optimizationTarget?: string,
+  modelConfig?: ChatModelConfig,
+): Promise<string> {
+  if (!modelId || !workerId) {
+    return "Error: model_id and worker_id are required";
+  }
+
+  // Build the LLM configuration for the agent
+  const llmConfig: Record<string, any> = {};
+  if (modelConfig) {
+    if (modelConfig.type === "deployment" && modelConfig.deploymentId) {
+      // Use local deployment
+      llmConfig.deployment_id = modelConfig.deploymentId;
+    } else if (modelConfig.type === "custom" && modelConfig.endpoint) {
+      // Use custom endpoint
+      llmConfig.base_url = modelConfig.endpoint;
+      llmConfig.api_key = modelConfig.apiKey || "";
+      llmConfig.model = modelConfig.modelId || modelConfig.name;
+    }
+  }
+
+  const response = await api.post("/auto-tuning/jobs", {
+    model_id: modelId,
+    worker_id: workerId,
+    optimization_target: optimizationTarget || "balanced",
+    llm_config: Object.keys(llmConfig).length > 0 ? llmConfig : undefined,
+  });
+
+  const job = response.data;
+
+  return JSON.stringify(
+    {
+      success: true,
+      message:
+        "Auto-tuning job started! The agent will use the selected model for reasoning.",
+      job: {
+        id: job.id,
+        model: job.model_name,
+        worker: job.worker_name,
+        optimization_target: job.optimization_target,
+        status: job.status,
+        agent_llm: modelConfig?.name || "auto-detected",
+      },
+      note: "Track progress at [Auto-Tuning](/auto-tuning) page. This may take several minutes depending on the number of configurations to test.",
+    },
+    null,
+    2,
+  );
+}
+
+async function getTuningJob(jobId: number): Promise<string> {
+  if (!jobId) {
+    return "Error: job_id is required";
+  }
+
+  const response = await api.get(`/auto-tuning/jobs/${jobId}`);
+  const job = response.data;
+
+  return JSON.stringify(
+    {
+      id: job.id,
+      model: job.model_name,
+      worker: job.worker_name,
+      optimization_target: job.optimization_target,
+      status: job.status,
+      status_message: job.status_message,
+      progress: job.progress
+        ? {
+            step: job.progress.step,
+            total_steps: job.progress.total_steps,
+            step_name: job.progress.step_name,
+            step_description: job.progress.step_description,
+            configs_tested: job.progress.configs_tested,
+            configs_total: job.progress.configs_total,
+            current_config: job.progress.current_config,
+            best_config_so_far: job.progress.best_config_so_far,
+            best_score_so_far: job.progress.best_score_so_far,
+          }
+        : null,
+      best_config: job.best_config,
+      all_results: job.all_results,
+      created_at: job.created_at,
+      completed_at: job.completed_at,
+    },
+    null,
+    2,
+  );
+}
+
+async function cancelTuningJob(jobId: number): Promise<string> {
+  if (!jobId) {
+    return "Error: job_id is required";
+  }
+
+  await api.post(`/auto-tuning/jobs/${jobId}/cancel`);
+
+  return JSON.stringify(
+    {
+      success: true,
+      message: `Tuning job ${jobId} cancelled successfully`,
+    },
+    null,
+    2,
+  );
+}
+
+async function queryKnowledgeBase(
+  modelName?: string,
+  modelFamily?: string,
+  gpuModel?: string,
+  optimizationTarget?: string,
+  limit?: number,
+): Promise<string> {
+  const response = await api.post("/auto-tuning/knowledge/query", {
+    model_name: modelName,
+    model_family: modelFamily,
+    gpu_model: gpuModel,
+    optimization_target: optimizationTarget || "balanced",
+    limit: limit || 10,
+  });
+
+  const data = response.data;
+
+  return JSON.stringify(
+    {
+      total: data.total,
+      query: data.query,
+      results: data.items.map((r: any) => ({
+        gpu: `${r.gpu_count}x ${r.gpu_model}`,
+        total_vram_gb: r.total_vram_gb,
+        model: r.model_name,
+        model_family: r.model_family,
+        model_params_b: r.model_params_b,
+        engine: r.engine,
+        quantization: r.quantization,
+        tensor_parallel: r.tensor_parallel,
+        throughput_tps: r.throughput_tps,
+        ttft_ms: r.ttft_ms,
+        tpot_ms: r.tpot_ms,
+        score: r.score,
+      })),
+      note: "These are benchmark results from similar configurations. Use this to guide your deployment decisions.",
+    },
+    null,
+    2,
+  );
+}
+
+async function runBenchmark(
+  deploymentId: number,
+  testType?: string,
+  durationSeconds?: number,
+  inputLength?: number,
+  outputLength?: number,
+  concurrency?: number,
+): Promise<string> {
+  if (!deploymentId) {
+    return "Error: deployment_id is required";
+  }
+
+  const response = await api.post("/auto-tuning/benchmarks/run", {
+    deployment_id: deploymentId,
+    test_type: testType || "throughput",
+    duration_seconds: durationSeconds || 60,
+    input_length: inputLength || 512,
+    output_length: outputLength || 128,
+    concurrency: concurrency || 1,
+  });
+
+  const result = response.data;
+
+  return JSON.stringify(
+    {
+      success: true,
+      message: "Benchmark completed",
+      benchmark: {
+        id: result.id,
+        deployment_id: result.deployment_id,
+        test_type: result.test_type,
+        duration_seconds: result.test_duration_seconds,
+        config: {
+          input_length: result.input_length,
+          output_length: result.output_length,
+          concurrency: result.concurrency,
+        },
+        metrics: {
+          throughput_tps: result.metrics?.throughput_tps,
+          ttft_ms: result.metrics?.ttft_ms,
+          tpot_ms: result.metrics?.tpot_ms,
+          total_latency_ms: result.metrics?.total_latency_ms,
+          gpu_utilization: result.metrics?.gpu_utilization,
+          vram_usage_gb: result.metrics?.vram_usage_gb,
+        },
+      },
+      note: "Higher throughput (TPS) is better. Lower latency (TTFT, TPOT) is better.",
+    },
+    null,
+    2,
+  );
+}
diff --git a/frontend/src/components/chat-panel/types.ts b/frontend/src/components/chat-panel/types.ts
new file mode 100644
index 0000000..482e823
--- /dev/null
+++ b/frontend/src/components/chat-panel/types.ts
@@ -0,0 +1,65 @@
+/**
+ * Chat Panel Type Definitions
+ *
+ * Types for the global chat panel component.
+ */
+
+/**
+ * Model source types
+ */
+export type ModelSourceType = "deployment" | "semantic-router" | "custom";
+
+/**
+ * Model configuration for chat
+ */
+export interface ChatModelConfig {
+  type: ModelSourceType;
+  /** Deployment ID (when type is "deployment") */
+  deploymentId?: number;
+  /** Display name */
+  name: string;
+  /** Model ID for API requests (when type is "custom") */
+  modelId?: string;
+  /** Custom endpoint URL (when type is "custom") */
+  endpoint?: string;
+  /** Custom API key (when type is "custom") */
+  apiKey?: string;
+}
+
+/**
+ * Custom endpoint configuration
+ */
+export interface CustomEndpoint {
+  id: string;
+  name: string;
+  endpoint: string;
+  apiKey?: string;
+  /** Model ID for API requests */
+  modelId?: string;
+}
+
+/**
+ * Chat panel state stored in localStorage
+ */
+export interface ChatPanelState {
+  isOpen: boolean;
+  width: number;
+  selectedModel: ChatModelConfig | null;
+  customEndpoints: CustomEndpoint[];
+}
+
+/**
+ * Default panel width
+ */
+export const DEFAULT_PANEL_WIDTH = 420;
+
+/**
+ * Min/max panel width
+ */
+export const MIN_PANEL_WIDTH = 320;
+export const MAX_PANEL_WIDTH = 600;
+
+/**
+ * Storage key for chat panel state
+ */
+export const CHAT_PANEL_STORAGE_KEY = "lmstack-chat-panel";
diff --git a/frontend/src/components/chat-panel/useChat.ts b/frontend/src/components/chat-panel/useChat.ts
new file mode 100644
index 0000000..4b3353e
--- /dev/null
+++ b/frontend/src/components/chat-panel/useChat.ts
@@ -0,0 +1,762 @@
+/**
+ * useChat Hook
+ *
+ * Encapsulates chat logic for streaming conversations with LLM endpoints.
+ * Supports deployments, Semantic Router, and custom OpenAI-compatible endpoints.
+ * Includes system context injection for AI assistant capabilities.
+ * Supports Tool Calling for LLM to interact with LMStack system.
+ */
+import { useState, useRef, useCallback, useEffect } from "react";
+import { message } from "antd";
+import { generateMessageId } from "../chat";
+import type { ChatMessage } from "../chat";
+import type { ChatModelConfig } from "./types";
+import { STORAGE_KEYS } from "../../constants";
+import {
+  fetchSystemContext,
+  formatSystemPrompt,
+  type SystemContext,
+} from "./systemContext";
+import {
+  CHAT_TOOLS,
+  executeTool,
+  requiresConfirmation,
+  getToolMeta,
+  type ToolCall,
+  type ToolResult,
+  type PendingToolExecution,
+} from "./tools";
+
+interface UseChatOptions {
+  /** Called when a new message is added */
+  onMessageAdded?: (message: ChatMessage) => void;
+  /** Called when streaming completes */
+  onStreamComplete?: (userMsg: ChatMessage, assistantMsg: ChatMessage) => void;
+}
+
+interface UseChatReturn {
+  messages: ChatMessage[];
+  isStreaming: boolean;
+  isExecutingTool: boolean;
+  currentToolName: string | null;
+  pendingTools: PendingToolExecution[];
+  showConfirmModal: boolean;
+  systemContext: SystemContext | null;
+  refreshContext: () => Promise<void>;
+  sendMessage: (content: string, model: ChatModelConfig) => Promise<void>;
+  stopStreaming: () => void;
+  clearMessages: () => void;
+  setMessages: React.Dispatch<React.SetStateAction<ChatMessage[]>>;
+  confirmToolExecution: () => void;
+  cancelToolExecution: () => void;
+}
+
+/**
+ * Hook for managing chat state and streaming
+ */
+export function useChat(options: UseChatOptions = {}): UseChatReturn {
+  const { onMessageAdded, onStreamComplete } = options;
+
+  const [messages, setMessages] = useState<ChatMessage[]>([]);
+  const [isStreaming, setIsStreaming] = useState(false);
+  const [isExecutingTool, setIsExecutingTool] = useState(false);
+  const [currentToolName, setCurrentToolName] = useState<string | null>(null);
+  const [systemContext, setSystemContext] = useState<SystemContext | null>(
+    null,
+  );
+
+  // Tool confirmation state
+  const [pendingTools, setPendingTools] = useState<PendingToolExecution[]>([]);
+  const [showConfirmModal, setShowConfirmModal] = useState(false);
+  const pendingToolResolveRef = useRef<((confirmed: boolean) => void) | null>(
+    null,
+  );
+
+  const abortControllerRef = useRef<AbortController | null>(null);
+
+  // Fetch system context on mount and periodically refresh
+  const refreshContext = useCallback(async () => {
+    const context = await fetchSystemContext();
+    setSystemContext(context);
+  }, []);
+
+  useEffect(() => {
+    refreshContext();
+    const interval = setInterval(refreshContext, 30000); // Refresh every 30s
+    return () => clearInterval(interval);
+  }, [refreshContext]);
+
+  /**
+   * Check if model uses proxy endpoint
+   */
+  const isProxyRequest = useCallback((model: ChatModelConfig): boolean => {
+    return model.type === "custom";
+  }, []);
+
+  /**
+   * Get the chat endpoint URL based on model config
+   */
+  const getEndpointUrl = useCallback((model: ChatModelConfig): string => {
+    switch (model.type) {
+      case "deployment":
+        return `/api/deployments/${model.deploymentId}/chat`;
+      case "semantic-router":
+        return `/api/semantic-router/chat`;
+      case "custom":
+        // Use backend proxy to avoid CORS issues
+        return `/api/chat-proxy`;
+      default:
+        return "";
+    }
+  }, []);
+
+  /**
+   * Get headers for the request
+   */
+  const getHeaders = useCallback((): HeadersInit => {
+    const headers: HeadersInit = {
+      "Content-Type": "application/json",
+    };
+
+    const token = localStorage.getItem(STORAGE_KEYS.TOKEN);
+    if (token) {
+      headers["Authorization"] = `Bearer ${token}`;
+    }
+
+    return headers;
+  }, []);
+
+  /**
+   * Stream a chat completion request
+   */
+  const streamChatCompletion = useCallback(
+    async (
+      endpoint: string,
+      requestBody: any,
+      assistantMessageId: string,
+      signal: AbortSignal,
+    ): Promise<{
+      content: string;
+      thinking: string;
+      model?: string;
+      toolCalls?: ToolCall[];
+      finishReason?: string;
+    }> => {
+      const response = await fetch(endpoint, {
+        method: "POST",
+        headers: getHeaders(),
+        body: JSON.stringify(requestBody),
+        signal,
+      });
+
+      if (!response.ok) {
+        throw new Error(`API error: ${response.status} ${response.statusText}`);
+      }
+
+      const reader = response.body?.getReader();
+      if (!reader) throw new Error("No response body");
+
+      const decoder = new TextDecoder();
+      let accumulatedContent = "";
+      let accumulatedThinking = "";
+      let responseModel: string | undefined;
+      let buffer = "";
+      let finishReason: string | undefined;
+
+      // Track tool calls being accumulated
+      const toolCallsMap: Map<
+        number,
+        { id: string; name: string; arguments: string }
+      > = new Map();
+
+      // eslint-disable-next-line no-constant-condition
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+
+        buffer += decoder.decode(value, { stream: true });
+        const lines = buffer.split("\n");
+        buffer = lines.pop() || "";
+
+        for (const line of lines) {
+          const trimmedLine = line.trim();
+          if (!trimmedLine || !trimmedLine.startsWith("data:")) continue;
+
+          const data = trimmedLine.slice(5).trim();
+          if (data === "[DONE]") continue;
+
+          try {
+            const parsed = JSON.parse(data);
+
+            if (parsed.error) {
+              throw new Error(parsed.error.message || "API error");
+            }
+
+            const choice = parsed.choices?.[0];
+            const delta = choice?.delta;
+
+            // Track finish reason
+            if (choice?.finish_reason) {
+              finishReason = choice.finish_reason;
+            }
+
+            // Handle regular content
+            const deltaContent = delta?.content || "";
+            accumulatedContent += deltaContent;
+
+            // Handle thinking/reasoning content (for models like DeepSeek-R1)
+            const deltaThinking = delta?.reasoning_content || "";
+            accumulatedThinking += deltaThinking;
+
+            // Handle tool calls streaming
+            if (delta?.tool_calls) {
+              for (const tc of delta.tool_calls) {
+                const index = tc.index ?? 0;
+                if (!toolCallsMap.has(index)) {
+                  toolCallsMap.set(index, {
+                    id: tc.id || "",
+                    name: "",
+                    arguments: "",
+                  });
+                }
+                const existing = toolCallsMap.get(index)!;
+                if (tc.id) existing.id = tc.id;
+                if (tc.function?.name) existing.name = tc.function.name;
+                if (tc.function?.arguments)
+                  existing.arguments += tc.function.arguments;
+              }
+            }
+
+            if (!responseModel && parsed.model) {
+              responseModel = parsed.model;
+            }
+
+            setMessages((prev) =>
+              prev.map((m) =>
+                m.id === assistantMessageId
+                  ? {
+                      ...m,
+                      content: accumulatedContent,
+                      thinking: accumulatedThinking || undefined,
+                      model: responseModel,
+                    }
+                  : m,
+              ),
+            );
+          } catch {
+            // Skip invalid JSON
+          }
+        }
+      }
+
+      // Process remaining buffer
+      if (buffer.trim().startsWith("data:")) {
+        const data = buffer.trim().slice(5).trim();
+        if (data !== "[DONE]") {
+          try {
+            const parsed = JSON.parse(data);
+            const choice = parsed.choices?.[0];
+            const delta = choice?.delta;
+            if (choice?.finish_reason) finishReason = choice.finish_reason;
+            const deltaContent = delta?.content || "";
+            const deltaThinking = delta?.reasoning_content || "";
+            accumulatedContent += deltaContent;
+            accumulatedThinking += deltaThinking;
+            if (!responseModel && parsed.model) {
+              responseModel = parsed.model;
+            }
+          } catch {
+            // Skip invalid JSON
+          }
+        }
+      }
+
+      // Convert tool calls map to array
+      const toolCalls: ToolCall[] = [];
+      for (const [, tc] of toolCallsMap) {
+        if (tc.id && tc.name) {
+          toolCalls.push({
+            id: tc.id,
+            type: "function",
+            function: { name: tc.name, arguments: tc.arguments },
+          });
+        }
+      }
+
+      return {
+        content: accumulatedContent,
+        thinking: accumulatedThinking,
+        model: responseModel,
+        toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
+        finishReason,
+      };
+    },
+    [getHeaders],
+  );
+
+  /**
+   * Send a message and stream the response with tool calling support
+   */
+  const sendMessage = useCallback(
+    async (content: string, model: ChatModelConfig) => {
+      if (!content.trim() || isStreaming) return;
+
+      const endpoint = getEndpointUrl(model);
+      if (!endpoint) {
+        message.error("Invalid endpoint configuration");
+        return;
+      }
+
+      setIsStreaming(true);
+
+      const userMessage: ChatMessage = {
+        id: generateMessageId(),
+        role: "user",
+        content: content.trim(),
+        timestamp: new Date(),
+      };
+
+      const assistantMessage: ChatMessage = {
+        id: generateMessageId(),
+        role: "assistant",
+        content: "",
+        timestamp: new Date(),
+      };
+
+      setMessages((prev) => [...prev, userMessage, assistantMessage]);
+      onMessageAdded?.(userMessage);
+
+      try {
+        abortControllerRef.current = new AbortController();
+
+        const modelName = model.modelId || model.name;
+
+        // Build messages array with system context
+        type ChatMessagePayload = {
+          role: string;
+          content: string | null;
+          tool_calls?: ToolCall[];
+          tool_call_id?: string;
+        };
+        const chatMessages: ChatMessagePayload[] = [];
+
+        // Add system prompt with current context
+        if (systemContext) {
+          chatMessages.push({
+            role: "system",
+            content: formatSystemPrompt(systemContext),
+          });
+        }
+
+        // Add conversation history
+        chatMessages.push(
+          ...messages.map((m) => ({ role: m.role, content: m.content })),
+          { role: "user", content: content.trim() },
+        );
+
+        // Build the chat payload with tools
+        const chatPayload = {
+          model: modelName,
+          messages: chatMessages,
+          stream: true,
+          temperature: 0.7,
+          tools: CHAT_TOOLS,
+          tool_choice: "auto" as const,
+        };
+
+        // Build request body
+        const requestBody = isProxyRequest(model)
+          ? {
+              endpoint: model.endpoint,
+              api_key: model.apiKey || null,
+              payload: chatPayload,
+            }
+          : chatPayload;
+
+        // Track the current message being streamed to
+        let currentMessageId = assistantMessage.id;
+
+        // First streaming request
+        let result = await streamChatCompletion(
+          endpoint,
+          requestBody,
+          currentMessageId,
+          abortControllerRef.current.signal,
+        );
+
+        // Tool calling loop - continue until no more tool calls
+        let iterationCount = 0;
+        const maxIterations = 10; // Prevent infinite loops
+
+        while (
+          result.toolCalls &&
+          result.toolCalls.length > 0 &&
+          iterationCount < maxIterations
+        ) {
+          iterationCount++;
+
+          // Check if any tool requires confirmation
+          const toolsNeedingConfirmation = result.toolCalls.filter((tc) =>
+            requiresConfirmation(tc.function.name),
+          );
+
+          if (toolsNeedingConfirmation.length > 0) {
+            // Prepare pending tools for confirmation
+            const pending: PendingToolExecution[] =
+              toolsNeedingConfirmation.map((tc) => {
+                let parsedArgs: Record<string, any> = {};
+                try {
+                  parsedArgs = JSON.parse(tc.function.arguments);
+                } catch {
+                  parsedArgs = { raw: tc.function.arguments };
+                }
+                return {
+                  toolCall: tc,
+                  parsedArgs,
+                  meta: getToolMeta(tc.function.name),
+                };
+              });
+
+            setPendingTools(pending);
+            setShowConfirmModal(true);
+
+            // Update message to show waiting for confirmation
+            setMessages((prev) =>
+              prev.map((m) =>
+                m.id === currentMessageId
+                  ? {
+                      ...m,
+                      content: result.content || "",
+                      toolCalls: result.toolCalls,
+                    }
+                  : m,
+              ),
+            );
+
+            // Wait for user confirmation
+            const confirmed = await new Promise<boolean>((resolve) => {
+              pendingToolResolveRef.current = resolve;
+            });
+
+            setPendingTools([]);
+            setShowConfirmModal(false);
+            pendingToolResolveRef.current = null;
+
+            if (!confirmed) {
+              // User cancelled - stop tool execution and inform LLM
+              const cancelledResults: ToolResult[] =
+                toolsNeedingConfirmation.map((tc) => ({
+                  tool_call_id: tc.id,
+                  role: "tool" as const,
+                  content: JSON.stringify({
+                    success: false,
+                    message: "User cancelled the operation",
+                  }),
+                }));
+
+              // Execute query tools that don't need confirmation
+              const queryTools = result.toolCalls.filter(
+                (tc) => !requiresConfirmation(tc.function.name),
+              );
+              for (const tc of queryTools) {
+                const queryResult = await executeTool(tc, model);
+                cancelledResults.push(queryResult);
+              }
+
+              // Update message
+              setMessages((prev) =>
+                prev.map((m) =>
+                  m.id === currentMessageId
+                    ? {
+                        ...m,
+                        content: result.content || "",
+                        toolCalls: result.toolCalls,
+                      }
+                    : m,
+                ),
+              );
+
+              // Build new messages with cancelled results
+              const newMessages: ChatMessagePayload[] = [
+                ...chatMessages,
+                {
+                  role: "assistant",
+                  content: result.content || null,
+                  tool_calls: result.toolCalls,
+                },
+                ...cancelledResults.map((tr) => ({
+                  role: "tool",
+                  content: tr.content,
+                  tool_call_id: tr.tool_call_id,
+                })),
+              ];
+
+              // Continue to let LLM know about cancellation
+              const continuationMessage: ChatMessage = {
+                id: generateMessageId(),
+                role: "assistant",
+                content: "",
+                timestamp: new Date(),
+              };
+
+              setMessages((prev) =>
+                prev
+                  .map((m) =>
+                    m.id === currentMessageId
+                      ? {
+                          ...m,
+                          content: result.content || "",
+                          toolCalls: result.toolCalls,
+                        }
+                      : m,
+                  )
+                  .concat(continuationMessage),
+              );
+
+              // Update current message ID to the continuation
+              currentMessageId = continuationMessage.id;
+
+              const continuationPayload = {
+                model: modelName,
+                messages: newMessages,
+                stream: true,
+                temperature: 0.7,
+                tools: CHAT_TOOLS,
+                tool_choice: "auto" as const,
+              };
+
+              const continuationRequestBody = isProxyRequest(model)
+                ? {
+                    endpoint: model.endpoint,
+                    api_key: model.apiKey || null,
+                    payload: continuationPayload,
+                  }
+                : continuationPayload;
+
+              result = await streamChatCompletion(
+                endpoint,
+                continuationRequestBody,
+                currentMessageId,
+                abortControllerRef.current.signal,
+              );
+
+              chatMessages.length = 0;
+              chatMessages.push(...newMessages);
+              continue;
+            }
+          }
+
+          // Execute all tool calls (confirmed or query-only)
+          setIsExecutingTool(true);
+          const toolResults: ToolResult[] = [];
+
+          for (const toolCall of result.toolCalls) {
+            setCurrentToolName(toolCall.function.name);
+
+            // Update message to show tool execution
+            setMessages((prev) =>
+              prev.map((m) =>
+                m.id === currentMessageId
+                  ? {
+                      ...m,
+                      content: result.content || "",
+                      toolCalls: result.toolCalls,
+                    }
+                  : m,
+              ),
+            );
+
+            const toolResult = await executeTool(toolCall, model);
+            toolResults.push(toolResult);
+
+            // Refresh system context after tool execution (data may have changed)
+            await refreshContext();
+          }
+
+          setIsExecutingTool(false);
+          setCurrentToolName(null);
+
+          // Build new messages array with tool calls and results
+          const newMessages: ChatMessagePayload[] = [
+            ...chatMessages,
+            // Assistant message with tool calls
+            {
+              role: "assistant",
+              content: result.content || null,
+              tool_calls: result.toolCalls,
+            },
+            // Tool results
+            ...toolResults.map((tr) => ({
+              role: "tool",
+              content: tr.content,
+              tool_call_id: tr.tool_call_id,
+            })),
+          ];
+
+          // Create new assistant message for continued response
+          const continuationMessage: ChatMessage = {
+            id: generateMessageId(),
+            role: "assistant",
+            content: "",
+            timestamp: new Date(),
+          };
+
+          setMessages((prev) => {
+            // Update the current assistant message and add continuation
+            return prev
+              .map((m) =>
+                m.id === currentMessageId
+                  ? {
+                      ...m,
+                      content: result.content || "",
+                      toolCalls: result.toolCalls,
+                    }
+                  : m,
+              )
+              .concat(continuationMessage);
+          });
+
+          // Update current message ID to the continuation
+          currentMessageId = continuationMessage.id;
+
+          // Build new chat payload with tool results
+          const continuationPayload = {
+            model: modelName,
+            messages: newMessages,
+            stream: true,
+            temperature: 0.7,
+            tools: CHAT_TOOLS,
+            tool_choice: "auto" as const,
+          };
+
+          const continuationRequestBody = isProxyRequest(model)
+            ? {
+                endpoint: model.endpoint,
+                api_key: model.apiKey || null,
+                payload: continuationPayload,
+              }
+            : continuationPayload;
+
+          // Continue streaming
+          result = await streamChatCompletion(
+            endpoint,
+            continuationRequestBody,
+            currentMessageId,
+            abortControllerRef.current.signal,
+          );
+
+          // Update chat messages for next iteration if needed
+          chatMessages.length = 0;
+          chatMessages.push(...newMessages);
+        }
+
+        // Final update - only update the current (last) message
+        setMessages((prev) =>
+          prev.map((m) => {
+            if (m.id === currentMessageId) {
+              return {
+                ...m,
+                content: result.content,
+                thinking: result.thinking || undefined,
+                model: result.model,
+                // Only set toolCalls if there are any (don't overwrite with undefined)
+                ...(result.toolCalls ? { toolCalls: result.toolCalls } : {}),
+              };
+            }
+            return m;
+          }),
+        );
+
+        // Get the final message for callback
+        const finalAssistantMsg: ChatMessage = {
+          id: currentMessageId,
+          role: "assistant",
+          content: result.content,
+          thinking: result.thinking || undefined,
+          model: result.model,
+          toolCalls: result.toolCalls,
+          timestamp: new Date(),
+        };
+
+        onStreamComplete?.(userMessage, finalAssistantMsg);
+      } catch (error: unknown) {
+        const err = error as Error;
+        if (err.name === "AbortError") {
+          message.info("Generation stopped");
+        } else {
+          message.error(`Error: ${err.message}`);
+          // Remove the initial assistant message on error
+          setMessages((prev) =>
+            prev.filter((m) => m.id !== assistantMessage.id),
+          );
+        }
+      } finally {
+        setIsStreaming(false);
+        setIsExecutingTool(false);
+        setCurrentToolName(null);
+        abortControllerRef.current = null;
+      }
+    },
+    [
+      messages,
+      isStreaming,
+      systemContext,
+      getEndpointUrl,
+      isProxyRequest,
+      getHeaders,
+      onMessageAdded,
+      onStreamComplete,
+      streamChatCompletion,
+      refreshContext,
+    ],
+  );
+
+  /**
+   * Stop the current streaming response
+   */
+  const stopStreaming = useCallback(() => {
+    abortControllerRef.current?.abort();
+  }, []);
+
+  /**
+   * Clear all messages
+   */
+  const clearMessages = useCallback(() => {
+    setMessages([]);
+  }, []);
+
+  /**
+   * Confirm pending tool execution
+   */
+  const confirmToolExecution = useCallback(() => {
+    if (pendingToolResolveRef.current) {
+      pendingToolResolveRef.current(true);
+    }
+  }, []);
+
+  /**
+   * Cancel pending tool execution
+   */
+  const cancelToolExecution = useCallback(() => {
+    if (pendingToolResolveRef.current) {
+      pendingToolResolveRef.current(false);
+    }
+  }, []);
+
+  return {
+    messages,
+    isStreaming,
+    isExecutingTool,
+    currentToolName,
+    pendingTools,
+    showConfirmModal,
+    systemContext,
+    refreshContext,
+    sendMessage,
+    stopStreaming,
+    clearMessages,
+    setMessages,
+    confirmToolExecution,
+    cancelToolExecution,
+  };
+}
diff --git a/frontend/src/components/chat/MessageContent.tsx b/frontend/src/components/chat/MessageContent.tsx
index 93c3c4b..0fdc184 100644
--- a/frontend/src/components/chat/MessageContent.tsx
+++ b/frontend/src/components/chat/MessageContent.tsx
@@ -133,5 +133,49 @@ function createMarkdownComponents(isDark: boolean, colors: ThemeColors) {
         </InlineCode>
       );
     },
+    // Custom link renderer for internal navigation
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    a({ href, children, ...props }: any) {
+      const isInternal = href?.startsWith("/");
+
+      if (isInternal) {
+        return (
+          <a
+            href={href}
+            onClick={(e) => {
+              e.preventDefault();
+              // Use window.location for navigation (works with React Router)
+              window.history.pushState({}, "", href);
+              window.dispatchEvent(new PopStateEvent("popstate"));
+            }}
+            style={{
+              color: isDark ? "#60a5fa" : "#2563eb",
+              textDecoration: "none",
+              fontWeight: 500,
+              cursor: "pointer",
+            }}
+            {...props}
+          >
+            {children}
+          </a>
+        );
+      }
+
+      // External links open in new tab
+      return (
+        <a
+          href={href}
+          target="_blank"
+          rel="noopener noreferrer"
+          style={{
+            color: isDark ? "#60a5fa" : "#2563eb",
+            textDecoration: "none",
+          }}
+          {...props}
+        >
+          {children}
+        </a>
+      );
+    },
   };
 }
diff --git a/frontend/src/components/chat/index.ts b/frontend/src/components/chat/index.ts
index 4c76bf4..91c9a43 100644
--- a/frontend/src/components/chat/index.ts
+++ b/frontend/src/components/chat/index.ts
@@ -6,7 +6,12 @@
  */
 
 // Types and utilities
-export type { ThemeColors, ChatMessage, ParsedContent } from "./types";
+export type {
+  ThemeColors,
+  ChatMessage,
+  ParsedContent,
+  ToolCall,
+} from "./types";
 export {
   getThemeColors,
   parseThinkingContent,
diff --git a/frontend/src/components/chat/types.ts b/frontend/src/components/chat/types.ts
index dfefaca..38cb3b9 100644
--- a/frontend/src/components/chat/types.ts
+++ b/frontend/src/components/chat/types.ts
@@ -4,11 +4,27 @@
  * Shared types for chat-related components.
  */
 
+/** Tool call from LLM response */
+export interface ToolCall {
+  id: string;
+  type: "function";
+  function: {
+    name: string;
+    arguments: string;
+  };
+}
+
 export interface ChatMessage {
   id: string;
   role: "user" | "assistant";
   content: string;
   timestamp: Date;
+  /** Model name that generated this response (for MoM/Semantic Router) */
+  model?: string;
+  /** Extended thinking content */
+  thinking?: string;
+  /** Tool calls made by the assistant */
+  toolCalls?: ToolCall[];
 }
 
 export interface ParsedContent {
diff --git a/frontend/src/pages/ApiKeys.tsx b/frontend/src/pages/ApiKeys.tsx
index 44f17f7..db6503e 100644
--- a/frontend/src/pages/ApiKeys.tsx
+++ b/frontend/src/pages/ApiKeys.tsx
@@ -7,6 +7,7 @@
  * @module pages/ApiKeys
  */
 import { useEffect, useState, useCallback, useMemo } from "react";
+import { useSearchParams } from "react-router-dom";
 import {
   Button,
   Card,
@@ -298,6 +299,7 @@ function StatsCard({ title, value, icon, suffix, isDark }: StatsCardProps) {
 // ============================================================================
 
 export default function ApiKeys() {
+  const [searchParams, setSearchParams] = useSearchParams();
   const [apiKeys, setApiKeys] = useState<ApiKey[]>([]);
   const [models, setModels] = useState<LLMModel[]>([]);
   const [runningDeployments, setRunningDeployments] = useState<Deployment[]>(
@@ -319,6 +321,15 @@ export default function ApiKeys() {
   const { isDark } = useAppTheme();
   const { canEdit } = useAuth();
 
+  // Handle URL action parameters (e.g., ?action=new)
+  useEffect(() => {
+    const action = searchParams.get("action");
+    if (action === "new") {
+      setCreateModalOpen(true);
+      setSearchParams({}, { replace: true });
+    }
+  }, [searchParams, setSearchParams]);
+
   // Get base URL for API Gateway (always port 52000)
   const baseUrl = useMemo(() => {
     const { protocol, hostname } = window.location;
diff --git a/frontend/src/pages/AutoTuning.tsx b/frontend/src/pages/AutoTuning.tsx
new file mode 100644
index 0000000..ca054bf
--- /dev/null
+++ b/frontend/src/pages/AutoTuning.tsx
@@ -0,0 +1,1307 @@
+import { useEffect, useState, useCallback } from "react";
+import {
+  Button,
+  Card,
+  Form,
+  Modal,
+  Select,
+  Space,
+  Table,
+  Tag,
+  message,
+  Progress,
+  Typography,
+  Empty,
+  Tooltip,
+  Radio,
+  Tabs,
+  Statistic,
+  Row,
+  Col,
+  Input,
+  Divider,
+  Alert,
+  Popconfirm,
+} from "antd";
+import {
+  PlusOutlined,
+  ReloadOutlined,
+  ThunderboltOutlined,
+  CheckCircleOutlined,
+  CloseCircleOutlined,
+  LoadingOutlined,
+  ExperimentOutlined,
+  DatabaseOutlined,
+  RocketOutlined,
+  BarChartOutlined,
+  HistoryOutlined,
+  ApiOutlined,
+  DeleteOutlined,
+  CommentOutlined,
+} from "@ant-design/icons";
+import { useAppTheme } from "../hooks/useTheme";
+import { workersApi, modelsApi } from "../services/api";
+import { deploymentsApi } from "../api";
+import { api } from "../api/client";
+import type { Worker, LLMModel, Deployment } from "../types";
+import { useResponsive } from "../hooks";
+import { useAuth } from "../contexts/AuthContext";
+import {
+  CHAT_PANEL_STORAGE_KEY,
+  TUNING_JOB_EVENT_KEY,
+  type CustomEndpoint,
+  type ChatPanelState,
+} from "../components/chat-panel";
+import dayjs from "dayjs";
+import relativeTime from "dayjs/plugin/relativeTime";
+
+dayjs.extend(relativeTime);
+
+const { Text, Paragraph } = Typography;
+
+// Helper to load chat panel state (shared with Chat Panel)
+function loadChatPanelState(): Partial<ChatPanelState> {
+  try {
+    const saved = localStorage.getItem(CHAT_PANEL_STORAGE_KEY);
+    if (saved) {
+      return JSON.parse(saved);
+    }
+  } catch {
+    // Ignore load errors
+  }
+  return {};
+}
+
+// Helper to save chat panel state (shared with Chat Panel)
+function saveChatPanelState(state: Partial<ChatPanelState>) {
+  try {
+    const current = loadChatPanelState();
+    localStorage.setItem(
+      CHAT_PANEL_STORAGE_KEY,
+      JSON.stringify({ ...current, ...state }),
+    );
+  } catch {
+    // Ignore save errors
+  }
+}
+
+const REFRESH_INTERVAL = 3000;
+
+// Types
+interface TuningJobProgress {
+  step: number;
+  total_steps: number;
+  step_name: string;
+  step_description: string;
+  configs_tested: number;
+  configs_total: number;
+  current_config?: Record<string, unknown>;
+  best_config_so_far?: Record<string, unknown>;
+  best_score_so_far?: number;
+}
+
+interface ConversationMessage {
+  role: "user" | "assistant" | "tool";
+  content: string;
+  timestamp?: string;
+  tool_calls?: Array<{
+    id: string;
+    name: string;
+    arguments: string;
+  }>;
+  tool_call_id?: string;
+  name?: string; // tool name
+}
+
+interface TuningJob {
+  id: number;
+  model_id: number;
+  worker_id: number;
+  optimization_target: string;
+  status: string;
+  status_message?: string;
+  current_step: number;
+  total_steps: number;
+  progress?: TuningJobProgress;
+  best_config?: Record<string, unknown>;
+  all_results?: Record<string, unknown>[];
+  conversation_log?: ConversationMessage[];
+  created_at: string;
+  updated_at: string;
+  completed_at?: string;
+  model_name?: string;
+  worker_name?: string;
+}
+
+interface KnowledgeRecord {
+  id: number;
+  gpu_model: string;
+  gpu_count: number;
+  total_vram_gb: number;
+  model_name: string;
+  model_family: string;
+  engine: string;
+  quantization?: string;
+  tensor_parallel: number;
+  throughput_tps: number;
+  ttft_ms: number;
+  tpot_ms: number;
+  score?: number;
+  created_at: string;
+}
+
+// Helper functions
+function getStatusColor(status: string): string {
+  const colors: Record<string, string> = {
+    pending: "default",
+    analyzing: "processing",
+    querying_kb: "processing",
+    exploring: "processing",
+    benchmarking: "processing",
+    completed: "success",
+    failed: "error",
+    cancelled: "warning",
+  };
+  return colors[status] || "default";
+}
+
+function getStatusIcon(status: string) {
+  const icons: Record<string, React.ReactNode> = {
+    pending: <LoadingOutlined />,
+    analyzing: <LoadingOutlined spin />,
+    querying_kb: <DatabaseOutlined />,
+    exploring: <ExperimentOutlined />,
+    benchmarking: <BarChartOutlined />,
+    completed: <CheckCircleOutlined />,
+    failed: <CloseCircleOutlined />,
+    cancelled: <CloseCircleOutlined />,
+  };
+  return icons[status] || <LoadingOutlined />;
+}
+
+function getOptimizationTargetLabel(target: string): string {
+  const labels: Record<string, string> = {
+    throughput: "Throughput (TPS)",
+    latency: "Latency (TTFT/TPOT)",
+    cost: "Cost (Min Resources)",
+    balanced: "Balanced",
+  };
+  return labels[target] || target;
+}
+
+export default function AutoTuning() {
+  const [jobs, setJobs] = useState<TuningJob[]>([]);
+  const [workers, setWorkers] = useState<Worker[]>([]);
+  const [models, setModels] = useState<LLMModel[]>([]);
+  const [deployments, setDeployments] = useState<Deployment[]>([]);
+  const [knowledge, setKnowledge] = useState<KnowledgeRecord[]>([]);
+  const [loading, setLoading] = useState(true);
+  const [modalOpen, setModalOpen] = useState(false);
+  const [detailModal, setDetailModal] = useState<TuningJob | null>(null);
+  const [form] = Form.useForm();
+  const [addEndpointForm] = Form.useForm();
+  const { isMobile } = useResponsive();
+  const { isDark } = useAppTheme();
+  const { canEdit } = useAuth();
+
+  // Custom endpoints from shared localStorage (same as Chat Panel)
+  const [customEndpoints, setCustomEndpoints] = useState<CustomEndpoint[]>(
+    () => loadChatPanelState().customEndpoints || [],
+  );
+
+  // LLM source type for modal
+  const [llmSourceType, setLlmSourceType] = useState<"deployment" | "custom">(
+    "deployment",
+  );
+  const [showAddEndpoint, setShowAddEndpoint] = useState(false);
+
+  // Save custom endpoints to shared localStorage (same as Chat Panel)
+  useEffect(() => {
+    saveChatPanelState({ customEndpoints });
+  }, [customEndpoints]);
+
+  // Fetch tuning jobs
+  const fetchJobs = useCallback(async () => {
+    try {
+      const response = await api.get("/auto-tuning/jobs");
+      setJobs(response.data.items || []);
+    } catch (error) {
+      console.error("Failed to fetch tuning jobs:", error);
+    }
+  }, []);
+
+  // Fetch knowledge base
+  const fetchKnowledge = useCallback(async () => {
+    try {
+      const response = await api.post("/auto-tuning/knowledge/query", {
+        limit: 50,
+      });
+      setKnowledge(response.data.items || []);
+    } catch (error) {
+      console.error("Failed to fetch knowledge:", error);
+    }
+  }, []);
+
+  // Fetch workers, models, and deployments
+  const fetchResources = useCallback(async () => {
+    try {
+      const [workersRes, modelsRes, deploymentsRes] = await Promise.all([
+        workersApi.list(),
+        modelsApi.list(),
+        deploymentsApi.list({ status: "running" }),
+      ]);
+      setWorkers(workersRes.items || []);
+      setModels(modelsRes.items || []);
+      setDeployments(deploymentsRes.items || []);
+    } catch (error) {
+      console.error("Failed to fetch resources:", error);
+    }
+  }, []);
+
+  // Initial load
+  useEffect(() => {
+    const load = async () => {
+      setLoading(true);
+      await Promise.all([fetchJobs(), fetchResources(), fetchKnowledge()]);
+      setLoading(false);
+    };
+    load();
+  }, [fetchJobs, fetchResources, fetchKnowledge]);
+
+  // Auto refresh for running jobs
+  useEffect(() => {
+    const hasRunningJobs = jobs.some((j) =>
+      [
+        "pending",
+        "analyzing",
+        "querying_kb",
+        "exploring",
+        "benchmarking",
+      ].includes(j.status),
+    );
+
+    if (!hasRunningJobs) return;
+
+    const interval = setInterval(fetchJobs, REFRESH_INTERVAL);
+    return () => clearInterval(interval);
+  }, [jobs, fetchJobs]);
+
+  // Create new tuning job
+  const handleCreate = async (values: {
+    model_id: number;
+    worker_id: number;
+    optimization_target: string;
+    llm_deployment_id?: number;
+    llm_custom_endpoint?: string;
+  }) => {
+    try {
+      // Build LLM config based on selection
+      let llm_config: Record<string, unknown> | undefined;
+
+      if (llmSourceType === "deployment" && values.llm_deployment_id) {
+        llm_config = { deployment_id: values.llm_deployment_id };
+      } else if (llmSourceType === "custom" && values.llm_custom_endpoint) {
+        const endpoint = customEndpoints.find(
+          (e) => e.id === values.llm_custom_endpoint,
+        );
+        if (endpoint) {
+          llm_config = {
+            base_url: endpoint.endpoint,
+            api_key: endpoint.apiKey,
+            model: endpoint.modelId,
+          };
+        }
+      }
+
+      const response = await api.post("/auto-tuning/jobs", {
+        model_id: values.model_id,
+        worker_id: values.worker_id,
+        optimization_target: values.optimization_target,
+        llm_config,
+      });
+      message.success("Auto-tuning job started");
+      setModalOpen(false);
+      form.resetFields();
+      setLlmSourceType("deployment");
+      fetchJobs();
+
+      // Trigger Chat Panel to open with tuning job view
+      const jobId = response.data.id;
+      if (jobId) {
+        localStorage.setItem(
+          TUNING_JOB_EVENT_KEY,
+          JSON.stringify({
+            jobId,
+            timestamp: Date.now(),
+          }),
+        );
+        // Dispatch storage event for same-window listeners
+        window.dispatchEvent(
+          new StorageEvent("storage", {
+            key: TUNING_JOB_EVENT_KEY,
+            newValue: JSON.stringify({ jobId, timestamp: Date.now() }),
+          }),
+        );
+      }
+    } catch (error: unknown) {
+      const err = error as { response?: { data?: { detail?: string } } };
+      message.error(err.response?.data?.detail || "Failed to start tuning job");
+    }
+  };
+
+  // Add custom endpoint
+  const handleAddEndpoint = (values: {
+    name: string;
+    endpoint: string;
+    apiKey?: string;
+    modelId?: string;
+  }) => {
+    const newEndpoint: CustomEndpoint = {
+      id: `custom-${Date.now()}`,
+      name: values.name,
+      endpoint: values.endpoint,
+      apiKey: values.apiKey,
+      modelId: values.modelId,
+    };
+    setCustomEndpoints((prev) => [...prev, newEndpoint]);
+    addEndpointForm.resetFields();
+    setShowAddEndpoint(false);
+    message.success("Endpoint added");
+  };
+
+  // Delete custom endpoint
+  const handleDeleteEndpoint = (id: string) => {
+    setCustomEndpoints((prev) => prev.filter((e) => e.id !== id));
+    message.success("Endpoint removed");
+  };
+
+  // Cancel job
+  const handleCancel = async (jobId: number) => {
+    try {
+      await api.post(`/auto-tuning/jobs/${jobId}/cancel`);
+      message.success("Tuning job cancelled");
+      fetchJobs();
+    } catch (error: unknown) {
+      const err = error as { response?: { data?: { detail?: string } } };
+      message.error(err.response?.data?.detail || "Failed to cancel job");
+    }
+  };
+
+  // Delete job
+  const handleDelete = async (jobId: number) => {
+    try {
+      await api.delete(`/auto-tuning/jobs/${jobId}`);
+      message.success("Tuning job deleted");
+      fetchJobs();
+    } catch (error: unknown) {
+      const err = error as { response?: { data?: { detail?: string } } };
+      message.error(err.response?.data?.detail || "Failed to delete job");
+    }
+  };
+
+  // View job details (fetch with conversation log)
+  const [detailLoading, setDetailLoading] = useState(false);
+  const handleViewDetail = async (job: TuningJob) => {
+    setDetailModal(job); // Show modal immediately with basic info
+    setDetailLoading(true);
+    try {
+      const response = await api.get(`/auto-tuning/jobs/${job.id}`);
+      setDetailModal(response.data);
+    } catch (error) {
+      console.error("Failed to fetch job details:", error);
+    } finally {
+      setDetailLoading(false);
+    }
+  };
+
+  // Auto-refresh detail modal for running jobs
+  useEffect(() => {
+    if (!detailModal) return;
+    const isRunning = [
+      "pending",
+      "analyzing",
+      "querying_kb",
+      "exploring",
+      "benchmarking",
+    ].includes(detailModal.status);
+    if (!isRunning) return;
+
+    const interval = setInterval(async () => {
+      try {
+        const response = await api.get(`/auto-tuning/jobs/${detailModal.id}`);
+        setDetailModal(response.data);
+      } catch (error) {
+        console.error("Failed to refresh job:", error);
+      }
+    }, 2000);
+
+    return () => clearInterval(interval);
+  }, [detailModal?.id, detailModal?.status]);
+
+  // Stats
+  const completedJobs = jobs.filter((j) => j.status === "completed").length;
+  const runningJobs = jobs.filter((j) =>
+    [
+      "pending",
+      "analyzing",
+      "querying_kb",
+      "exploring",
+      "benchmarking",
+    ].includes(j.status),
+  ).length;
+
+  // Table columns for jobs
+  const jobColumns = [
+    {
+      title: "Model",
+      dataIndex: "model_name",
+      key: "model_name",
+      render: (name: string) => <Text strong>{name || "Unknown"}</Text>,
+    },
+    {
+      title: "Worker",
+      dataIndex: "worker_name",
+      key: "worker_name",
+      responsive: ["md" as const],
+    },
+    {
+      title: "Target",
+      dataIndex: "optimization_target",
+      key: "optimization_target",
+      responsive: ["sm" as const],
+      render: (target: string) => (
+        <Tag color="blue">{getOptimizationTargetLabel(target)}</Tag>
+      ),
+    },
+    {
+      title: "Status",
+      dataIndex: "status",
+      key: "status",
+      render: (status: string, record: TuningJob) => (
+        <Space size={4} wrap>
+          <Tag icon={getStatusIcon(status)} color={getStatusColor(status)}>
+            {status.toUpperCase()}
+          </Tag>
+          {record.progress && ["benchmarking"].includes(status) && (
+            <Text type="secondary" style={{ fontSize: 12 }}>
+              {record.progress.configs_tested}/{record.progress.configs_total}
+            </Text>
+          )}
+        </Space>
+      ),
+    },
+    {
+      title: "Progress",
+      key: "progress",
+      width: 120,
+      render: (_: unknown, record: TuningJob) => {
+        if (!record.progress) return "-";
+        const percent = Math.round(
+          (record.progress.step / record.progress.total_steps) * 100,
+        );
+        return (
+          <Tooltip title={record.progress.step_description}>
+            <Progress
+              percent={percent}
+              size="small"
+              status={
+                record.status === "failed"
+                  ? "exception"
+                  : record.status === "completed"
+                    ? "success"
+                    : "active"
+              }
+              style={{ width: 100, minWidth: 80 }}
+            />
+          </Tooltip>
+        );
+      },
+    },
+    {
+      title: "Score",
+      key: "best_score",
+      responsive: ["lg" as const],
+      render: (_: unknown, record: TuningJob) => {
+        const score =
+          record.progress?.best_score_so_far ??
+          (record.best_config?.score as number | undefined);
+        return typeof score === "number" ? (
+          <Text type="success">{score.toFixed(2)}</Text>
+        ) : (
+          "-"
+        );
+      },
+    },
+    {
+      title: "Created",
+      dataIndex: "created_at",
+      key: "created_at",
+      responsive: ["md" as const],
+      render: (date: string) => dayjs(date).fromNow(),
+    },
+    {
+      title: "Actions",
+      key: "actions",
+      render: (_: unknown, record: TuningJob) => {
+        const isRunning = [
+          "pending",
+          "analyzing",
+          "querying_kb",
+          "exploring",
+          "benchmarking",
+        ].includes(record.status);
+        return (
+          <Space size={4} wrap>
+            <Tooltip title="在 Chat Panel 中查看">
+              <Button
+                size="small"
+                icon={<CommentOutlined />}
+                onClick={() => {
+                  // Trigger Chat Panel to open with tuning job view
+                  localStorage.setItem(
+                    TUNING_JOB_EVENT_KEY,
+                    JSON.stringify({
+                      jobId: record.id,
+                      timestamp: Date.now(),
+                    }),
+                  );
+                  window.dispatchEvent(
+                    new StorageEvent("storage", {
+                      key: TUNING_JOB_EVENT_KEY,
+                      newValue: JSON.stringify({
+                        jobId: record.id,
+                        timestamp: Date.now(),
+                      }),
+                    }),
+                  );
+                }}
+              >
+                {isMobile ? "" : "Chat"}
+              </Button>
+            </Tooltip>
+            <Button size="small" onClick={() => handleViewDetail(record)}>
+              {isMobile ? "Log" : "Log"}
+            </Button>
+            {isRunning && canEdit && (
+              <Button
+                size="small"
+                danger
+                onClick={() => handleCancel(record.id)}
+              >
+                {isMobile ? "X" : "Cancel"}
+              </Button>
+            )}
+            {!isRunning && canEdit && (
+              <Popconfirm
+                title="Delete this job?"
+                description="This action cannot be undone."
+                onConfirm={() => handleDelete(record.id)}
+                okText="Delete"
+                cancelText="Cancel"
+                okButtonProps={{ danger: true }}
+              >
+                <Button size="small" danger icon={<DeleteOutlined />} />
+              </Popconfirm>
+            )}
+          </Space>
+        );
+      },
+    },
+  ];
+
+  // Table columns for knowledge base
+  const knowledgeColumns = [
+    {
+      title: "Model",
+      dataIndex: "model_name",
+      key: "model_name",
+      render: (name: string, record: KnowledgeRecord) => (
+        <div>
+          <Text strong>{name}</Text>
+          {!isMobile && (
+            <>
+              <br />
+              <Text type="secondary" style={{ fontSize: 12 }}>
+                {record.model_family}
+              </Text>
+            </>
+          )}
+        </div>
+      ),
+    },
+    {
+      title: "GPU",
+      key: "gpu",
+      responsive: ["md" as const],
+      render: (_: unknown, record: KnowledgeRecord) => (
+        <div>
+          <Text>
+            {record.gpu_count}x {record.gpu_model}
+          </Text>
+          <br />
+          <Text type="secondary" style={{ fontSize: 12 }}>
+            {record.total_vram_gb.toFixed(1)} GB
+          </Text>
+        </div>
+      ),
+    },
+    {
+      title: "Engine",
+      dataIndex: "engine",
+      key: "engine",
+      render: (engine: string, record: KnowledgeRecord) => (
+        <Space direction="vertical" size={0}>
+          <Tag color="blue">{engine}</Tag>
+          {record.quantization && (
+            <Tag color="green" style={{ marginTop: 2 }}>
+              {record.quantization}
+            </Tag>
+          )}
+        </Space>
+      ),
+    },
+    {
+      title: "TP",
+      dataIndex: "tensor_parallel",
+      key: "tensor_parallel",
+      responsive: ["lg" as const],
+    },
+    {
+      title: "TPS",
+      dataIndex: "throughput_tps",
+      key: "throughput_tps",
+      render: (v: number) => <Text type="success">{v.toFixed(1)}</Text>,
+      sorter: (a: KnowledgeRecord, b: KnowledgeRecord) =>
+        a.throughput_tps - b.throughput_tps,
+    },
+    {
+      title: "TTFT",
+      dataIndex: "ttft_ms",
+      key: "ttft_ms",
+      responsive: ["sm" as const],
+      render: (v: number) => `${v.toFixed(0)} ms`,
+      sorter: (a: KnowledgeRecord, b: KnowledgeRecord) => a.ttft_ms - b.ttft_ms,
+    },
+    {
+      title: "TPOT",
+      dataIndex: "tpot_ms",
+      key: "tpot_ms",
+      responsive: ["md" as const],
+      render: (v: number) => `${v.toFixed(1)} ms`,
+      sorter: (a: KnowledgeRecord, b: KnowledgeRecord) => a.tpot_ms - b.tpot_ms,
+    },
+    {
+      title: "Score",
+      dataIndex: "score",
+      key: "score",
+      responsive: ["lg" as const],
+      render: (v: number | undefined) =>
+        v ? <Text type="success">{v.toFixed(2)}</Text> : "-",
+      sorter: (a: KnowledgeRecord, b: KnowledgeRecord) =>
+        (a.score || 0) - (b.score || 0),
+    },
+  ];
+
+  // Online workers with GPUs
+  const availableWorkers = workers.filter(
+    (w) => w.status === "online" && w.gpu_info && w.gpu_info.length > 0,
+  );
+
+  return (
+    <div style={{ padding: isMobile ? 16 : 24 }}>
+      {/* Stats Cards */}
+      <Row gutter={16} style={{ marginBottom: 16 }}>
+        <Col xs={24} sm={8}>
+          <Card>
+            <Statistic
+              title="Completed Jobs"
+              value={completedJobs}
+              prefix={<CheckCircleOutlined style={{ color: "#52c41a" }} />}
+            />
+          </Card>
+        </Col>
+        <Col xs={24} sm={8}>
+          <Card>
+            <Statistic
+              title="Running Jobs"
+              value={runningJobs}
+              prefix={
+                <RocketOutlined
+                  style={{ color: runningJobs > 0 ? "#1890ff" : "#d9d9d9" }}
+                />
+              }
+            />
+          </Card>
+        </Col>
+        <Col xs={24} sm={8}>
+          <Card>
+            <Statistic
+              title="Knowledge Records"
+              value={knowledge.length}
+              prefix={<DatabaseOutlined style={{ color: "#722ed1" }} />}
+            />
+          </Card>
+        </Col>
+      </Row>
+
+      {/* Main Content */}
+      <Card
+        title={
+          <Space>
+            <ThunderboltOutlined />
+            <span>Auto-Tuning Agent</span>
+          </Space>
+        }
+        extra={
+          <Space>
+            <Button
+              icon={<ReloadOutlined />}
+              onClick={() => {
+                fetchJobs();
+                fetchKnowledge();
+              }}
+            >
+              Refresh
+            </Button>
+            {canEdit && (
+              <Button
+                type="primary"
+                icon={<PlusOutlined />}
+                onClick={() => setModalOpen(true)}
+              >
+                New Tuning Job
+              </Button>
+            )}
+          </Space>
+        }
+      >
+        <Paragraph type="secondary" style={{ marginBottom: 16 }}>
+          Auto-Tuning Agent automatically finds the best deployment
+          configuration. Use the <strong>Chat Panel</strong> on the right to
+          interact with the agent, or start a job directly below.
+        </Paragraph>
+
+        <Tabs
+          defaultActiveKey="jobs"
+          items={[
+            {
+              key: "jobs",
+              label: (
+                <span>
+                  <HistoryOutlined /> Job History
+                </span>
+              ),
+              children: (
+                <Table
+                  dataSource={jobs}
+                  columns={jobColumns}
+                  rowKey="id"
+                  loading={loading}
+                  pagination={{ pageSize: 10 }}
+                  scroll={{ x: "max-content" }}
+                  style={{ overflowX: "auto" }}
+                  locale={{
+                    emptyText: (
+                      <Empty
+                        image={Empty.PRESENTED_IMAGE_SIMPLE}
+                        description="No tuning jobs yet"
+                      >
+                        {canEdit && (
+                          <Button
+                            type="primary"
+                            icon={<RocketOutlined />}
+                            onClick={() => setModalOpen(true)}
+                          >
+                            Start Auto-Tuning
+                          </Button>
+                        )}
+                      </Empty>
+                    ),
+                  }}
+                />
+              ),
+            },
+            {
+              key: "knowledge",
+              label: (
+                <span>
+                  <DatabaseOutlined /> Knowledge Base
+                </span>
+              ),
+              children: (
+                <div>
+                  <Paragraph type="secondary" style={{ marginBottom: 16 }}>
+                    Historical benchmark results used for configuration
+                    recommendations. The agent uses this data to suggest optimal
+                    configs for similar setups.
+                  </Paragraph>
+                  <Table
+                    dataSource={knowledge}
+                    columns={knowledgeColumns}
+                    rowKey="id"
+                    loading={loading}
+                    pagination={{ pageSize: 10 }}
+                    scroll={{ x: "max-content" }}
+                    style={{ overflowX: "auto" }}
+                    locale={{
+                      emptyText: (
+                        <Empty
+                          image={Empty.PRESENTED_IMAGE_SIMPLE}
+                          description="No knowledge records yet. Run benchmarks to populate the knowledge base."
+                        />
+                      ),
+                    }}
+                  />
+                </div>
+              ),
+            },
+          ]}
+        />
+      </Card>
+
+      {/* Create Modal */}
+      <Modal
+        title={
+          <Space>
+            <ThunderboltOutlined />
+            <span>Start Auto-Tuning</span>
+          </Space>
+        }
+        open={modalOpen}
+        onCancel={() => {
+          setModalOpen(false);
+          form.resetFields();
+          setLlmSourceType("deployment");
+          setShowAddEndpoint(false);
+        }}
+        footer={null}
+        width={600}
+      >
+        <Form form={form} layout="vertical" onFinish={handleCreate}>
+          {/* Model to tune */}
+          <Form.Item
+            name="model_id"
+            label="Model to Tune"
+            rules={[{ required: true, message: "Please select a model" }]}
+          >
+            <Select
+              placeholder="Select model to tune"
+              showSearch
+              optionFilterProp="children"
+            >
+              {models.map((model) => (
+                <Select.Option key={model.id} value={model.id}>
+                  {model.name}
+                </Select.Option>
+              ))}
+            </Select>
+          </Form.Item>
+
+          {/* Worker */}
+          <Form.Item
+            name="worker_id"
+            label="Worker"
+            rules={[{ required: true, message: "Please select a worker" }]}
+          >
+            <Select placeholder="Select worker">
+              {availableWorkers.map((worker) => (
+                <Select.Option key={worker.id} value={worker.id}>
+                  {worker.name} ({worker.gpu_info?.length || 0} GPUs)
+                </Select.Option>
+              ))}
+            </Select>
+          </Form.Item>
+
+          {/* Optimization Target */}
+          <Form.Item
+            name="optimization_target"
+            label="Optimization Target"
+            initialValue="balanced"
+          >
+            <Radio.Group>
+              <Radio.Button value="throughput">Throughput</Radio.Button>
+              <Radio.Button value="latency">Latency</Radio.Button>
+              <Radio.Button value="balanced">Balanced</Radio.Button>
+              <Radio.Button value="cost">Cost</Radio.Button>
+            </Radio.Group>
+          </Form.Item>
+
+          <Divider>
+            <Space>
+              <ApiOutlined />
+              <span>Agent LLM</span>
+            </Space>
+          </Divider>
+
+          {/* Agent LLM Selection */}
+          <Alert
+            message="Select which LLM the agent will use for reasoning and decision-making"
+            type="info"
+            showIcon
+            style={{ marginBottom: 16 }}
+          />
+
+          <Form.Item label="LLM Source">
+            <Radio.Group
+              value={llmSourceType}
+              onChange={(e) => setLlmSourceType(e.target.value)}
+            >
+              <Radio.Button value="deployment">Local Deployment</Radio.Button>
+              <Radio.Button value="custom">Custom Endpoint</Radio.Button>
+            </Radio.Group>
+          </Form.Item>
+
+          {llmSourceType === "deployment" && (
+            <Form.Item
+              name="llm_deployment_id"
+              label="Select Deployment"
+              rules={[
+                { required: true, message: "Please select a deployment" },
+              ]}
+            >
+              <Select placeholder="Select a running deployment">
+                {deployments.length === 0 ? (
+                  <Select.Option value="" disabled>
+                    No running deployments available
+                  </Select.Option>
+                ) : (
+                  deployments.map((d) => (
+                    <Select.Option key={d.id} value={d.id}>
+                      {d.name} ({d.model?.name || "Unknown model"})
+                    </Select.Option>
+                  ))
+                )}
+              </Select>
+            </Form.Item>
+          )}
+
+          {llmSourceType === "custom" && (
+            <>
+              <Form.Item
+                name="llm_custom_endpoint"
+                label="Select Endpoint"
+                rules={[
+                  {
+                    required: customEndpoints.length > 0,
+                    message: "Please select an endpoint",
+                  },
+                ]}
+              >
+                <Select
+                  placeholder={
+                    customEndpoints.length === 0
+                      ? "No endpoints - add one below"
+                      : "Select an endpoint"
+                  }
+                  disabled={customEndpoints.length === 0}
+                  dropdownRender={(menu) => (
+                    <>
+                      {menu}
+                      <Divider style={{ margin: "8px 0" }} />
+                      <Button
+                        type="link"
+                        icon={<PlusOutlined />}
+                        onClick={() => setShowAddEndpoint(true)}
+                        style={{ width: "100%", textAlign: "left" }}
+                      >
+                        Add New Endpoint
+                      </Button>
+                    </>
+                  )}
+                >
+                  {customEndpoints.map((ep) => (
+                    <Select.Option key={ep.id} value={ep.id}>
+                      <Space
+                        style={{
+                          width: "100%",
+                          justifyContent: "space-between",
+                        }}
+                      >
+                        <span>{ep.name}</span>
+                        <Button
+                          type="text"
+                          size="small"
+                          danger
+                          icon={<DeleteOutlined />}
+                          onClick={(e) => {
+                            e.stopPropagation();
+                            handleDeleteEndpoint(ep.id);
+                          }}
+                        />
+                      </Space>
+                    </Select.Option>
+                  ))}
+                </Select>
+              </Form.Item>
+
+              {customEndpoints.length === 0 && !showAddEndpoint && (
+                <Button
+                  type="dashed"
+                  icon={<PlusOutlined />}
+                  onClick={() => setShowAddEndpoint(true)}
+                  block
+                  style={{ marginBottom: 16 }}
+                >
+                  Add Custom Endpoint
+                </Button>
+              )}
+
+              {showAddEndpoint && (
+                <Card
+                  size="small"
+                  title="Add New Endpoint"
+                  extra={
+                    <Button
+                      type="text"
+                      size="small"
+                      onClick={() => setShowAddEndpoint(false)}
+                    >
+                      Cancel
+                    </Button>
+                  }
+                  style={{ marginBottom: 16 }}
+                >
+                  <Form
+                    form={addEndpointForm}
+                    layout="vertical"
+                    size="small"
+                    onFinish={handleAddEndpoint}
+                  >
+                    <Form.Item
+                      name="name"
+                      label="Name"
+                      rules={[{ required: true, message: "Required" }]}
+                    >
+                      <Input placeholder="e.g., OpenAI GPT-4" />
+                    </Form.Item>
+                    <Form.Item
+                      name="endpoint"
+                      label="Endpoint URL"
+                      rules={[{ required: true, message: "Required" }]}
+                    >
+                      <Input placeholder="https://api.openai.com/v1" />
+                    </Form.Item>
+                    <Form.Item name="apiKey" label="API Key">
+                      <Input.Password placeholder="sk-..." />
+                    </Form.Item>
+                    <Form.Item name="modelId" label="Model ID">
+                      <Input placeholder="gpt-4o" />
+                    </Form.Item>
+                    <Button type="primary" htmlType="submit" size="small">
+                      Add Endpoint
+                    </Button>
+                  </Form>
+                </Card>
+              )}
+            </>
+          )}
+
+          <Form.Item style={{ marginTop: 24 }}>
+            <Space>
+              <Button
+                type="primary"
+                htmlType="submit"
+                icon={<RocketOutlined />}
+              >
+                Start Tuning
+              </Button>
+              <Button onClick={() => setModalOpen(false)}>Cancel</Button>
+            </Space>
+          </Form.Item>
+        </Form>
+      </Modal>
+
+      {/* Detail Modal - Docker-style Log View */}
+      <Modal
+        title={
+          <Space>
+            <ExperimentOutlined />
+            <span>Tuning Log - {detailModal?.model_name}</span>
+            <Tag
+              icon={detailModal ? getStatusIcon(detailModal.status) : null}
+              color={
+                detailModal ? getStatusColor(detailModal.status) : "default"
+              }
+            >
+              {detailModal?.status.toUpperCase()}
+            </Tag>
+            {detailLoading && <LoadingOutlined spin />}
+          </Space>
+        }
+        open={!!detailModal}
+        onCancel={() => setDetailModal(null)}
+        footer={null}
+        width={900}
+        styles={{ body: { padding: 0 } }}
+      >
+        {detailModal && (
+          <div>
+            {/* Docker-style Log Container */}
+            <div
+              style={{
+                background: "#1e1e1e",
+                color: "#d4d4d4",
+                fontFamily: "'Consolas', 'Monaco', 'Courier New', monospace",
+                fontSize: 13,
+                lineHeight: 1.5,
+                padding: 16,
+                maxHeight: "60vh",
+                overflowY: "auto",
+                whiteSpace: "pre-wrap",
+                wordBreak: "break-word",
+              }}
+            >
+              {detailModal.conversation_log &&
+              detailModal.conversation_log.length > 0 ? (
+                detailModal.conversation_log.map((msg, idx) => {
+                  const timestamp = msg.timestamp
+                    ? dayjs(msg.timestamp).format("HH:mm:ss")
+                    : "";
+
+                  if (msg.role === "user") {
+                    return (
+                      <div key={idx} style={{ marginBottom: 12 }}>
+                        <span style={{ color: "#569cd6" }}>[{timestamp}]</span>
+                        <span style={{ color: "#4ec9b0" }}> [USER] </span>
+                        <span style={{ color: "#ce9178" }}>{msg.content}</span>
+                      </div>
+                    );
+                  }
+
+                  if (msg.role === "assistant") {
+                    return (
+                      <div key={idx} style={{ marginBottom: 12 }}>
+                        <span style={{ color: "#569cd6" }}>[{timestamp}]</span>
+                        <span style={{ color: "#dcdcaa" }}> [AGENT] </span>
+                        {msg.content && (
+                          <span style={{ color: "#d4d4d4" }}>
+                            {msg.content}
+                          </span>
+                        )}
+                        {msg.tool_calls && msg.tool_calls.length > 0 && (
+                          <div style={{ marginLeft: 20, marginTop: 4 }}>
+                            {msg.tool_calls.map((tc, tcIdx) => (
+                              <div key={tcIdx} style={{ color: "#9cdcfe" }}>
+                                -&gt; Calling: {tc.name}(
+                                {(() => {
+                                  try {
+                                    const args = JSON.parse(tc.arguments);
+                                    return Object.entries(args)
+                                      .map(
+                                        ([k, v]) => `${k}=${JSON.stringify(v)}`,
+                                      )
+                                      .join(", ");
+                                  } catch {
+                                    return tc.arguments;
+                                  }
+                                })()}
+                                )
+                              </div>
+                            ))}
+                          </div>
+                        )}
+                      </div>
+                    );
+                  }
+
+                  if (msg.role === "tool") {
+                    let content = msg.content;
+                    try {
+                      const parsed = JSON.parse(msg.content);
+                      content = JSON.stringify(parsed, null, 2);
+                    } catch {
+                      // Keep original
+                    }
+                    return (
+                      <div key={idx} style={{ marginBottom: 12 }}>
+                        <span style={{ color: "#569cd6" }}>[{timestamp}]</span>
+                        <span style={{ color: "#6a9955" }}>
+                          {" "}
+                          [TOOL:{msg.name}]{" "}
+                        </span>
+                        <div
+                          style={{
+                            marginLeft: 20,
+                            marginTop: 4,
+                            padding: 8,
+                            background: "rgba(255,255,255,0.05)",
+                            borderRadius: 4,
+                            maxHeight: 200,
+                            overflow: "auto",
+                            color: "#b5cea8",
+                          }}
+                        >
+                          {content}
+                        </div>
+                      </div>
+                    );
+                  }
+
+                  return null;
+                })
+              ) : (
+                <div style={{ color: "#808080" }}>
+                  {detailLoading
+                    ? "Loading logs..."
+                    : detailModal.status === "pending"
+                      ? "Waiting for agent to start..."
+                      : "No logs available"}
+                </div>
+              )}
+
+              {/* Running indicator */}
+              {[
+                "pending",
+                "analyzing",
+                "querying_kb",
+                "exploring",
+                "benchmarking",
+              ].includes(detailModal.status) && (
+                <div style={{ marginTop: 8 }}>
+                  <span style={{ color: "#569cd6" }}>
+                    [{dayjs().format("HH:mm:ss")}]
+                  </span>
+                  <span style={{ color: "#c586c0" }}> [STATUS] </span>
+                  <span style={{ color: "#808080" }}>
+                    {detailModal.status_message || "Processing..."}
+                    <span
+                      className="blink"
+                      style={{ animation: "blink 1s infinite" }}
+                    >
+                      {" "}
+                      _
+                    </span>
+                  </span>
+                </div>
+              )}
+            </div>
+
+            {/* Best Config Section */}
+            {detailModal.best_config && (
+              <div
+                style={{
+                  padding: 16,
+                  borderTop: `1px solid ${isDark ? "#303030" : "#e8e8e8"}`,
+                }}
+              >
+                <Text strong>Best Configuration:</Text>
+                <pre
+                  style={{
+                    background: isDark ? "#1f1f1f" : "#f5f5f5",
+                    padding: 12,
+                    borderRadius: 6,
+                    overflow: "auto",
+                    margin: "8px 0 0 0",
+                    fontSize: 12,
+                  }}
+                >
+                  {JSON.stringify(detailModal.best_config, null, 2)}
+                </pre>
+              </div>
+            )}
+          </div>
+        )}
+      </Modal>
+    </div>
+  );
+}
diff --git a/frontend/src/pages/Deployments.tsx b/frontend/src/pages/Deployments.tsx
index 50da57a..bfe0b5f 100644
--- a/frontend/src/pages/Deployments.tsx
+++ b/frontend/src/pages/Deployments.tsx
@@ -1,4 +1,5 @@
 import { useEffect, useState, useCallback, useRef } from "react";
+import { useSearchParams } from "react-router-dom";
 import {
   Button,
   Card,
@@ -54,11 +55,22 @@ const { Text } = Typography;
 const REFRESH_INTERVAL = 5000;
 
 export default function Deployments() {
+  const [searchParams, setSearchParams] = useSearchParams();
   const [deployments, setDeployments] = useState<Deployment[]>([]);
   const [workers, setWorkers] = useState<Worker[]>([]);
   const [models, setModels] = useState<LLMModel[]>([]);
   const [loading, setLoading] = useState(true);
   const [modalOpen, setModalOpen] = useState(false);
+
+  // Handle URL action parameters (e.g., ?action=new)
+  useEffect(() => {
+    const action = searchParams.get("action");
+    if (action === "new") {
+      setModalOpen(true);
+      // Clear the action param from URL
+      setSearchParams({}, { replace: true });
+    }
+  }, [searchParams, setSearchParams]);
   const [logsModal, setLogsModal] = useState<{
     id: number;
     name: string;
diff --git a/frontend/src/pages/Models.tsx b/frontend/src/pages/Models.tsx
index 2535469..04e2c49 100644
--- a/frontend/src/pages/Models.tsx
+++ b/frontend/src/pages/Models.tsx
@@ -1,4 +1,5 @@
 import { useEffect, useState } from "react";
+import { useSearchParams } from "react-router-dom";
 import {
   Button,
   Card,
@@ -99,6 +100,7 @@ function useResponsive() {
 }
 
 export default function Models() {
+  const [searchParams, setSearchParams] = useSearchParams();
   const { canEdit } = useAuth();
   const [models, setModels] = useState<LLMModel[]>([]);
   const [loading, setLoading] = useState(true);
@@ -118,6 +120,15 @@ export default function Models() {
 
   const SOURCE_CONFIG = getSourceConfig(isDark);
 
+  // Handle URL action parameters (e.g., ?action=new)
+  useEffect(() => {
+    const action = searchParams.get("action");
+    if (action === "new") {
+      setModalOpen(true);
+      setSearchParams({}, { replace: true });
+    }
+  }, [searchParams, setSearchParams]);
+
   const fetchModels = async () => {
     setLoading(true);
     try {
diff --git a/mcp-server/README.md b/mcp-server/README.md
new file mode 100644
index 0000000..c35bbc1
--- /dev/null
+++ b/mcp-server/README.md
@@ -0,0 +1,106 @@
+# LMStack MCP Server
+
+Model Context Protocol (MCP) server for LMStack platform. Enables AI assistants like Claude Desktop, Cursor, and other MCP-compatible clients to interact with your LMStack infrastructure.
+
+## Features
+
+### Resources
+
+| Resource URI | Description |
+|-------------|-------------|
+| `lmstack://system/status` | Complete system overview |
+| `lmstack://workers` | Worker nodes with GPU info |
+| `lmstack://containers` | Docker containers |
+| `lmstack://deployments` | Model deployments |
+| `lmstack://models` | Available models |
+
+### Tools
+
+| Tool | Description |
+|------|-------------|
+| `get_system_status` | Get complete system status |
+| `list_workers` | List all workers with GPU status |
+| `list_containers` | List Docker containers |
+| `list_deployments` | List model deployments |
+| `list_models` | List available models |
+| `get_gpu_status` | Get detailed GPU information |
+| `deploy_model` | Deploy a model to a worker |
+| `stop_deployment` | Stop a running deployment |
+
+## Installation
+
+```bash
+cd mcp-server
+npm install
+npm run build
+```
+
+## Configuration
+
+Set environment variables:
+
+```bash
+export LMSTACK_API_URL="http://localhost:8000/api"
+export LMSTACK_API_TOKEN="your-api-token"
+```
+
+## Usage with Claude Desktop
+
+Add to your Claude Desktop config (`~/.claude/claude_desktop_config.json`):
+
+```json
+{
+  "mcpServers": {
+    "lmstack": {
+      "command": "node",
+      "args": ["/path/to/lmstack/mcp-server/dist/index.js"],
+      "env": {
+        "LMSTACK_API_URL": "http://localhost:8000/api",
+        "LMSTACK_API_TOKEN": "your-token"
+      }
+    }
+  }
+}
+```
+
+## Usage with Cursor
+
+Add to Cursor settings:
+
+```json
+{
+  "mcp.servers": {
+    "lmstack": {
+      "command": "node",
+      "args": ["/path/to/lmstack/mcp-server/dist/index.js"],
+      "env": {
+        "LMSTACK_API_URL": "http://localhost:8000/api"
+      }
+    }
+  }
+}
+```
+
+## Development
+
+```bash
+# Run in development mode
+npm run dev
+
+# Inspect with MCP Inspector
+npm run inspect
+```
+
+## Example Queries
+
+Once connected, you can ask your AI assistant:
+
+- "Show me the current system status"
+- "How much GPU memory is available?"
+- "List all running containers"
+- "Deploy the Qwen model to worker-1"
+- "Stop deployment 5"
+
+## License
+
+MIT
diff --git a/mcp-server/package-lock.json b/mcp-server/package-lock.json
new file mode 100644
index 0000000..6a7a1a6
--- /dev/null
+++ b/mcp-server/package-lock.json
@@ -0,0 +1,1840 @@
+{
+  "name": "@lmstack/mcp-server",
+  "version": "0.1.0",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "@lmstack/mcp-server",
+      "version": "0.1.0",
+      "license": "MIT",
+      "dependencies": {
+        "@modelcontextprotocol/sdk": "^1.0.0",
+        "axios": "^1.6.0"
+      },
+      "bin": {
+        "lmstack-mcp": "dist/index.js"
+      },
+      "devDependencies": {
+        "@types/node": "^20.0.0",
+        "tsx": "^4.0.0",
+        "typescript": "^5.3.0"
+      }
+    },
+    "node_modules/@esbuild/aix-ppc64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.2.tgz",
+      "integrity": "sha512-GZMB+a0mOMZs4MpDbj8RJp4cw+w1WV5NYD6xzgvzUJ5Ek2jerwfO2eADyI6ExDSUED+1X8aMbegahsJi+8mgpw==",
+      "cpu": [
+        "ppc64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "aix"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/android-arm": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.2.tgz",
+      "integrity": "sha512-DVNI8jlPa7Ujbr1yjU2PfUSRtAUZPG9I1RwW4F4xFB1Imiu2on0ADiI/c3td+KmDtVKNbi+nffGDQMfcIMkwIA==",
+      "cpu": [
+        "arm"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/android-arm64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.2.tgz",
+      "integrity": "sha512-pvz8ZZ7ot/RBphf8fv60ljmaoydPU12VuXHImtAs0XhLLw+EXBi2BLe3OYSBslR4rryHvweW5gmkKFwTiFy6KA==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/android-x64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.2.tgz",
+      "integrity": "sha512-z8Ank4Byh4TJJOh4wpz8g2vDy75zFL0TlZlkUkEwYXuPSgX8yzep596n6mT7905kA9uHZsf/o2OJZubl2l3M7A==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/darwin-arm64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.2.tgz",
+      "integrity": "sha512-davCD2Zc80nzDVRwXTcQP/28fiJbcOwvdolL0sOiOsbwBa72kegmVU0Wrh1MYrbuCL98Omp5dVhQFWRKR2ZAlg==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/darwin-x64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.2.tgz",
+      "integrity": "sha512-ZxtijOmlQCBWGwbVmwOF/UCzuGIbUkqB1faQRf5akQmxRJ1ujusWsb3CVfk/9iZKr2L5SMU5wPBi1UWbvL+VQA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/freebsd-arm64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.2.tgz",
+      "integrity": "sha512-lS/9CN+rgqQ9czogxlMcBMGd+l8Q3Nj1MFQwBZJyoEKI50XGxwuzznYdwcav6lpOGv5BqaZXqvBSiB/kJ5op+g==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "freebsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/freebsd-x64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.2.tgz",
+      "integrity": "sha512-tAfqtNYb4YgPnJlEFu4c212HYjQWSO/w/h/lQaBK7RbwGIkBOuNKQI9tqWzx7Wtp7bTPaGC6MJvWI608P3wXYA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "freebsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-arm": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.2.tgz",
+      "integrity": "sha512-vWfq4GaIMP9AIe4yj1ZUW18RDhx6EPQKjwe7n8BbIecFtCQG4CfHGaHuh7fdfq+y3LIA2vGS/o9ZBGVxIDi9hw==",
+      "cpu": [
+        "arm"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-arm64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.2.tgz",
+      "integrity": "sha512-hYxN8pr66NsCCiRFkHUAsxylNOcAQaxSSkHMMjcpx0si13t1LHFphxJZUiGwojB1a/Hd5OiPIqDdXONia6bhTw==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-ia32": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.2.tgz",
+      "integrity": "sha512-MJt5BRRSScPDwG2hLelYhAAKh9imjHK5+NE/tvnRLbIqUWa+0E9N4WNMjmp/kXXPHZGqPLxggwVhz7QP8CTR8w==",
+      "cpu": [
+        "ia32"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-loong64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.2.tgz",
+      "integrity": "sha512-lugyF1atnAT463aO6KPshVCJK5NgRnU4yb3FUumyVz+cGvZbontBgzeGFO1nF+dPueHD367a2ZXe1NtUkAjOtg==",
+      "cpu": [
+        "loong64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-mips64el": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.2.tgz",
+      "integrity": "sha512-nlP2I6ArEBewvJ2gjrrkESEZkB5mIoaTswuqNFRv/WYd+ATtUpe9Y09RnJvgvdag7he0OWgEZWhviS1OTOKixw==",
+      "cpu": [
+        "mips64el"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-ppc64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.2.tgz",
+      "integrity": "sha512-C92gnpey7tUQONqg1n6dKVbx3vphKtTHJaNG2Ok9lGwbZil6DrfyecMsp9CrmXGQJmZ7iiVXvvZH6Ml5hL6XdQ==",
+      "cpu": [
+        "ppc64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-riscv64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.2.tgz",
+      "integrity": "sha512-B5BOmojNtUyN8AXlK0QJyvjEZkWwy/FKvakkTDCziX95AowLZKR6aCDhG7LeF7uMCXEJqwa8Bejz5LTPYm8AvA==",
+      "cpu": [
+        "riscv64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-s390x": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.2.tgz",
+      "integrity": "sha512-p4bm9+wsPwup5Z8f4EpfN63qNagQ47Ua2znaqGH6bqLlmJ4bx97Y9JdqxgGZ6Y8xVTixUnEkoKSHcpRlDnNr5w==",
+      "cpu": [
+        "s390x"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/linux-x64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.2.tgz",
+      "integrity": "sha512-uwp2Tip5aPmH+NRUwTcfLb+W32WXjpFejTIOWZFw/v7/KnpCDKG66u4DLcurQpiYTiYwQ9B7KOeMJvLCu/OvbA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/netbsd-arm64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.2.tgz",
+      "integrity": "sha512-Kj6DiBlwXrPsCRDeRvGAUb/LNrBASrfqAIok+xB0LxK8CHqxZ037viF13ugfsIpePH93mX7xfJp97cyDuTZ3cw==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "netbsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/netbsd-x64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.2.tgz",
+      "integrity": "sha512-HwGDZ0VLVBY3Y+Nw0JexZy9o/nUAWq9MlV7cahpaXKW6TOzfVno3y3/M8Ga8u8Yr7GldLOov27xiCnqRZf0tCA==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "netbsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/openbsd-arm64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.2.tgz",
+      "integrity": "sha512-DNIHH2BPQ5551A7oSHD0CKbwIA/Ox7+78/AWkbS5QoRzaqlev2uFayfSxq68EkonB+IKjiuxBFoV8ESJy8bOHA==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "openbsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/openbsd-x64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.2.tgz",
+      "integrity": "sha512-/it7w9Nb7+0KFIzjalNJVR5bOzA9Vay+yIPLVHfIQYG/j+j9VTH84aNB8ExGKPU4AzfaEvN9/V4HV+F+vo8OEg==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "openbsd"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/openharmony-arm64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.2.tgz",
+      "integrity": "sha512-LRBbCmiU51IXfeXk59csuX/aSaToeG7w48nMwA6049Y4J4+VbWALAuXcs+qcD04rHDuSCSRKdmY63sruDS5qag==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "openharmony"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/sunos-x64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.2.tgz",
+      "integrity": "sha512-kMtx1yqJHTmqaqHPAzKCAkDaKsffmXkPHThSfRwZGyuqyIeBvf08KSsYXl+abf5HDAPMJIPnbBfXvP2ZC2TfHg==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "sunos"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/win32-arm64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.2.tgz",
+      "integrity": "sha512-Yaf78O/B3Kkh+nKABUF++bvJv5Ijoy9AN1ww904rOXZFLWVc5OLOfL56W+C8F9xn5JQZa3UX6m+IktJnIb1Jjg==",
+      "cpu": [
+        "arm64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/win32-ia32": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.2.tgz",
+      "integrity": "sha512-Iuws0kxo4yusk7sw70Xa2E2imZU5HoixzxfGCdxwBdhiDgt9vX9VUCBhqcwY7/uh//78A1hMkkROMJq9l27oLQ==",
+      "cpu": [
+        "ia32"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@esbuild/win32-x64": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.2.tgz",
+      "integrity": "sha512-sRdU18mcKf7F+YgheI/zGf5alZatMUTKj/jNS6l744f9u3WFu4v7twcUI9vu4mknF4Y9aDlblIie0IM+5xxaqQ==",
+      "cpu": [
+        "x64"
+      ],
+      "dev": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@hono/node-server": {
+      "version": "1.19.9",
+      "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.9.tgz",
+      "integrity": "sha512-vHL6w3ecZsky+8P5MD+eFfaGTyCeOHUIFYMGpQGbrBTSmNNoxv0if69rEZ5giu36weC5saFuznL411gRX7bJDw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=18.14.1"
+      },
+      "peerDependencies": {
+        "hono": "^4"
+      }
+    },
+    "node_modules/@modelcontextprotocol/sdk": {
+      "version": "1.25.3",
+      "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.25.3.tgz",
+      "integrity": "sha512-vsAMBMERybvYgKbg/l4L1rhS7VXV1c0CtyJg72vwxONVX0l4ZfKVAnZEWTQixJGTzKnELjQ59e4NbdFDALRiAQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@hono/node-server": "^1.19.9",
+        "ajv": "^8.17.1",
+        "ajv-formats": "^3.0.1",
+        "content-type": "^1.0.5",
+        "cors": "^2.8.5",
+        "cross-spawn": "^7.0.5",
+        "eventsource": "^3.0.2",
+        "eventsource-parser": "^3.0.0",
+        "express": "^5.0.1",
+        "express-rate-limit": "^7.5.0",
+        "jose": "^6.1.1",
+        "json-schema-typed": "^8.0.2",
+        "pkce-challenge": "^5.0.0",
+        "raw-body": "^3.0.0",
+        "zod": "^3.25 || ^4.0",
+        "zod-to-json-schema": "^3.25.0"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "peerDependencies": {
+        "@cfworker/json-schema": "^4.1.1",
+        "zod": "^3.25 || ^4.0"
+      },
+      "peerDependenciesMeta": {
+        "@cfworker/json-schema": {
+          "optional": true
+        },
+        "zod": {
+          "optional": false
+        }
+      }
+    },
+    "node_modules/@types/node": {
+      "version": "20.19.30",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.30.tgz",
+      "integrity": "sha512-WJtwWJu7UdlvzEAUm484QNg5eAoq5QR08KDNx7g45Usrs2NtOPiX8ugDqmKdXkyL03rBqU5dYNYVQetEpBHq2g==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "undici-types": "~6.21.0"
+      }
+    },
+    "node_modules/accepts": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/accepts/-/accepts-2.0.0.tgz",
+      "integrity": "sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==",
+      "license": "MIT",
+      "dependencies": {
+        "mime-types": "^3.0.0",
+        "negotiator": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/ajv": {
+      "version": "8.17.1",
+      "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz",
+      "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==",
+      "license": "MIT",
+      "dependencies": {
+        "fast-deep-equal": "^3.1.3",
+        "fast-uri": "^3.0.1",
+        "json-schema-traverse": "^1.0.0",
+        "require-from-string": "^2.0.2"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/epoberezkin"
+      }
+    },
+    "node_modules/ajv-formats": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-3.0.1.tgz",
+      "integrity": "sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==",
+      "license": "MIT",
+      "dependencies": {
+        "ajv": "^8.0.0"
+      },
+      "peerDependencies": {
+        "ajv": "^8.0.0"
+      },
+      "peerDependenciesMeta": {
+        "ajv": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/asynckit": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
+      "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==",
+      "license": "MIT"
+    },
+    "node_modules/axios": {
+      "version": "1.13.2",
+      "resolved": "https://registry.npmjs.org/axios/-/axios-1.13.2.tgz",
+      "integrity": "sha512-VPk9ebNqPcy5lRGuSlKx752IlDatOjT9paPlm8A7yOuW2Fbvp4X3JznJtT4f0GzGLLiWE9W8onz51SqLYwzGaA==",
+      "license": "MIT",
+      "dependencies": {
+        "follow-redirects": "^1.15.6",
+        "form-data": "^4.0.4",
+        "proxy-from-env": "^1.1.0"
+      }
+    },
+    "node_modules/body-parser": {
+      "version": "2.2.2",
+      "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-2.2.2.tgz",
+      "integrity": "sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA==",
+      "license": "MIT",
+      "dependencies": {
+        "bytes": "^3.1.2",
+        "content-type": "^1.0.5",
+        "debug": "^4.4.3",
+        "http-errors": "^2.0.0",
+        "iconv-lite": "^0.7.0",
+        "on-finished": "^2.4.1",
+        "qs": "^6.14.1",
+        "raw-body": "^3.0.1",
+        "type-is": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/bytes": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz",
+      "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/call-bind-apply-helpers": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
+      "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "function-bind": "^1.1.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/call-bound": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz",
+      "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==",
+      "license": "MIT",
+      "dependencies": {
+        "call-bind-apply-helpers": "^1.0.2",
+        "get-intrinsic": "^1.3.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/combined-stream": {
+      "version": "1.0.8",
+      "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
+      "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
+      "license": "MIT",
+      "dependencies": {
+        "delayed-stream": "~1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/content-disposition": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-1.0.1.tgz",
+      "integrity": "sha512-oIXISMynqSqm241k6kcQ5UwttDILMK4BiurCfGEREw6+X9jkkpEe5T9FZaApyLGGOnFuyMWZpdolTXMtvEJ08Q==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/content-type": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz",
+      "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/cookie": {
+      "version": "0.7.2",
+      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz",
+      "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/cookie-signature": {
+      "version": "1.2.2",
+      "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.2.2.tgz",
+      "integrity": "sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.6.0"
+      }
+    },
+    "node_modules/cors": {
+      "version": "2.8.6",
+      "resolved": "https://registry.npmjs.org/cors/-/cors-2.8.6.tgz",
+      "integrity": "sha512-tJtZBBHA6vjIAaF6EnIaq6laBBP9aq/Y3ouVJjEfoHbRBcHBAHYcMh/w8LDrk2PvIMMq8gmopa5D4V8RmbrxGw==",
+      "license": "MIT",
+      "dependencies": {
+        "object-assign": "^4",
+        "vary": "^1"
+      },
+      "engines": {
+        "node": ">= 0.10"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/cross-spawn": {
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
+      "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
+      "license": "MIT",
+      "dependencies": {
+        "path-key": "^3.1.0",
+        "shebang-command": "^2.0.0",
+        "which": "^2.0.1"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/debug": {
+      "version": "4.4.3",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
+      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
+      "license": "MIT",
+      "dependencies": {
+        "ms": "^2.1.3"
+      },
+      "engines": {
+        "node": ">=6.0"
+      },
+      "peerDependenciesMeta": {
+        "supports-color": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/delayed-stream": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
+      "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.4.0"
+      }
+    },
+    "node_modules/depd": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz",
+      "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/dunder-proto": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
+      "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
+      "license": "MIT",
+      "dependencies": {
+        "call-bind-apply-helpers": "^1.0.1",
+        "es-errors": "^1.3.0",
+        "gopd": "^1.2.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/ee-first": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
+      "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==",
+      "license": "MIT"
+    },
+    "node_modules/encodeurl": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz",
+      "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/es-define-property": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
+      "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-errors": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
+      "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-object-atoms": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
+      "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-set-tostringtag": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
+      "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "get-intrinsic": "^1.2.6",
+        "has-tostringtag": "^1.0.2",
+        "hasown": "^2.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/esbuild": {
+      "version": "0.27.2",
+      "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.2.tgz",
+      "integrity": "sha512-HyNQImnsOC7X9PMNaCIeAm4ISCQXs5a5YasTXVliKv4uuBo1dKrG0A+uQS8M5eXjVMnLg3WgXaKvprHlFJQffw==",
+      "dev": true,
+      "hasInstallScript": true,
+      "license": "MIT",
+      "bin": {
+        "esbuild": "bin/esbuild"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "optionalDependencies": {
+        "@esbuild/aix-ppc64": "0.27.2",
+        "@esbuild/android-arm": "0.27.2",
+        "@esbuild/android-arm64": "0.27.2",
+        "@esbuild/android-x64": "0.27.2",
+        "@esbuild/darwin-arm64": "0.27.2",
+        "@esbuild/darwin-x64": "0.27.2",
+        "@esbuild/freebsd-arm64": "0.27.2",
+        "@esbuild/freebsd-x64": "0.27.2",
+        "@esbuild/linux-arm": "0.27.2",
+        "@esbuild/linux-arm64": "0.27.2",
+        "@esbuild/linux-ia32": "0.27.2",
+        "@esbuild/linux-loong64": "0.27.2",
+        "@esbuild/linux-mips64el": "0.27.2",
+        "@esbuild/linux-ppc64": "0.27.2",
+        "@esbuild/linux-riscv64": "0.27.2",
+        "@esbuild/linux-s390x": "0.27.2",
+        "@esbuild/linux-x64": "0.27.2",
+        "@esbuild/netbsd-arm64": "0.27.2",
+        "@esbuild/netbsd-x64": "0.27.2",
+        "@esbuild/openbsd-arm64": "0.27.2",
+        "@esbuild/openbsd-x64": "0.27.2",
+        "@esbuild/openharmony-arm64": "0.27.2",
+        "@esbuild/sunos-x64": "0.27.2",
+        "@esbuild/win32-arm64": "0.27.2",
+        "@esbuild/win32-ia32": "0.27.2",
+        "@esbuild/win32-x64": "0.27.2"
+      }
+    },
+    "node_modules/escape-html": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz",
+      "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==",
+      "license": "MIT"
+    },
+    "node_modules/etag": {
+      "version": "1.8.1",
+      "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz",
+      "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/eventsource": {
+      "version": "3.0.7",
+      "resolved": "https://registry.npmjs.org/eventsource/-/eventsource-3.0.7.tgz",
+      "integrity": "sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==",
+      "license": "MIT",
+      "dependencies": {
+        "eventsource-parser": "^3.0.1"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/eventsource-parser": {
+      "version": "3.0.6",
+      "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.6.tgz",
+      "integrity": "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
+    "node_modules/express": {
+      "version": "5.2.1",
+      "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz",
+      "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==",
+      "license": "MIT",
+      "dependencies": {
+        "accepts": "^2.0.0",
+        "body-parser": "^2.2.1",
+        "content-disposition": "^1.0.0",
+        "content-type": "^1.0.5",
+        "cookie": "^0.7.1",
+        "cookie-signature": "^1.2.1",
+        "debug": "^4.4.0",
+        "depd": "^2.0.0",
+        "encodeurl": "^2.0.0",
+        "escape-html": "^1.0.3",
+        "etag": "^1.8.1",
+        "finalhandler": "^2.1.0",
+        "fresh": "^2.0.0",
+        "http-errors": "^2.0.0",
+        "merge-descriptors": "^2.0.0",
+        "mime-types": "^3.0.0",
+        "on-finished": "^2.4.1",
+        "once": "^1.4.0",
+        "parseurl": "^1.3.3",
+        "proxy-addr": "^2.0.7",
+        "qs": "^6.14.0",
+        "range-parser": "^1.2.1",
+        "router": "^2.2.0",
+        "send": "^1.1.0",
+        "serve-static": "^2.2.0",
+        "statuses": "^2.0.1",
+        "type-is": "^2.0.1",
+        "vary": "^1.1.2"
+      },
+      "engines": {
+        "node": ">= 18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/express-rate-limit": {
+      "version": "7.5.1",
+      "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-7.5.1.tgz",
+      "integrity": "sha512-7iN8iPMDzOMHPUYllBEsQdWVB6fPDMPqwjBaFrgr4Jgr/+okjvzAy+UHlYYL/Vs0OsOrMkwS6PJDkFlJwoxUnw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 16"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/express-rate-limit"
+      },
+      "peerDependencies": {
+        "express": ">= 4.11"
+      }
+    },
+    "node_modules/fast-deep-equal": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
+      "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==",
+      "license": "MIT"
+    },
+    "node_modules/fast-uri": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz",
+      "integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/fastify"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/fastify"
+        }
+      ],
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/finalhandler": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-2.1.1.tgz",
+      "integrity": "sha512-S8KoZgRZN+a5rNwqTxlZZePjT/4cnm0ROV70LedRHZ0p8u9fRID0hJUZQpkKLzro8LfmC8sx23bY6tVNxv8pQA==",
+      "license": "MIT",
+      "dependencies": {
+        "debug": "^4.4.0",
+        "encodeurl": "^2.0.0",
+        "escape-html": "^1.0.3",
+        "on-finished": "^2.4.1",
+        "parseurl": "^1.3.3",
+        "statuses": "^2.0.1"
+      },
+      "engines": {
+        "node": ">= 18.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/follow-redirects": {
+      "version": "1.15.11",
+      "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz",
+      "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==",
+      "funding": [
+        {
+          "type": "individual",
+          "url": "https://github.com/sponsors/RubenVerborgh"
+        }
+      ],
+      "license": "MIT",
+      "engines": {
+        "node": ">=4.0"
+      },
+      "peerDependenciesMeta": {
+        "debug": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/form-data": {
+      "version": "4.0.5",
+      "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz",
+      "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==",
+      "license": "MIT",
+      "dependencies": {
+        "asynckit": "^0.4.0",
+        "combined-stream": "^1.0.8",
+        "es-set-tostringtag": "^2.1.0",
+        "hasown": "^2.0.2",
+        "mime-types": "^2.1.12"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/form-data/node_modules/mime-db": {
+      "version": "1.52.0",
+      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
+      "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/form-data/node_modules/mime-types": {
+      "version": "2.1.35",
+      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
+      "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
+      "license": "MIT",
+      "dependencies": {
+        "mime-db": "1.52.0"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/forwarded": {
+      "version": "0.2.0",
+      "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
+      "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/fresh": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/fresh/-/fresh-2.0.0.tgz",
+      "integrity": "sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/fsevents": {
+      "version": "2.3.3",
+      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
+      "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
+      "dev": true,
+      "hasInstallScript": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
+      }
+    },
+    "node_modules/function-bind": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
+      "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/get-intrinsic": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
+      "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
+      "license": "MIT",
+      "dependencies": {
+        "call-bind-apply-helpers": "^1.0.2",
+        "es-define-property": "^1.0.1",
+        "es-errors": "^1.3.0",
+        "es-object-atoms": "^1.1.1",
+        "function-bind": "^1.1.2",
+        "get-proto": "^1.0.1",
+        "gopd": "^1.2.0",
+        "has-symbols": "^1.1.0",
+        "hasown": "^2.0.2",
+        "math-intrinsics": "^1.1.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/get-proto": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz",
+      "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
+      "license": "MIT",
+      "dependencies": {
+        "dunder-proto": "^1.0.1",
+        "es-object-atoms": "^1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/get-tsconfig": {
+      "version": "4.13.0",
+      "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.0.tgz",
+      "integrity": "sha512-1VKTZJCwBrvbd+Wn3AOgQP/2Av+TfTCOlE4AcRJE72W1ksZXbAx8PPBR9RzgTeSPzlPMHrbANMH3LbltH73wxQ==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "resolve-pkg-maps": "^1.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
+      }
+    },
+    "node_modules/gopd": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
+      "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/has-symbols": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
+      "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/has-tostringtag": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
+      "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
+      "license": "MIT",
+      "dependencies": {
+        "has-symbols": "^1.0.3"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/hasown": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
+      "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
+      "license": "MIT",
+      "dependencies": {
+        "function-bind": "^1.1.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/hono": {
+      "version": "4.11.5",
+      "resolved": "https://registry.npmjs.org/hono/-/hono-4.11.5.tgz",
+      "integrity": "sha512-WemPi9/WfyMwZs+ZUXdiwcCh9Y+m7L+8vki9MzDw3jJ+W9Lc+12HGsd368Qc1vZi1xwW8BWMMsnK5efYKPdt4g==",
+      "license": "MIT",
+      "peer": true,
+      "engines": {
+        "node": ">=16.9.0"
+      }
+    },
+    "node_modules/http-errors": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz",
+      "integrity": "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==",
+      "license": "MIT",
+      "dependencies": {
+        "depd": "~2.0.0",
+        "inherits": "~2.0.4",
+        "setprototypeof": "~1.2.0",
+        "statuses": "~2.0.2",
+        "toidentifier": "~1.0.1"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/iconv-lite": {
+      "version": "0.7.2",
+      "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.7.2.tgz",
+      "integrity": "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==",
+      "license": "MIT",
+      "dependencies": {
+        "safer-buffer": ">= 2.1.2 < 3.0.0"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/inherits": {
+      "version": "2.0.4",
+      "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
+      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
+      "license": "ISC"
+    },
+    "node_modules/ipaddr.js": {
+      "version": "1.9.1",
+      "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
+      "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.10"
+      }
+    },
+    "node_modules/is-promise": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/is-promise/-/is-promise-4.0.0.tgz",
+      "integrity": "sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==",
+      "license": "MIT"
+    },
+    "node_modules/isexe": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
+      "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
+      "license": "ISC"
+    },
+    "node_modules/jose": {
+      "version": "6.1.3",
+      "resolved": "https://registry.npmjs.org/jose/-/jose-6.1.3.tgz",
+      "integrity": "sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==",
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/panva"
+      }
+    },
+    "node_modules/json-schema-traverse": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz",
+      "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==",
+      "license": "MIT"
+    },
+    "node_modules/json-schema-typed": {
+      "version": "8.0.2",
+      "resolved": "https://registry.npmjs.org/json-schema-typed/-/json-schema-typed-8.0.2.tgz",
+      "integrity": "sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA==",
+      "license": "BSD-2-Clause"
+    },
+    "node_modules/math-intrinsics": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
+      "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/media-typer": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-1.1.0.tgz",
+      "integrity": "sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/merge-descriptors": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-2.0.0.tgz",
+      "integrity": "sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/mime-db": {
+      "version": "1.54.0",
+      "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz",
+      "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/mime-types": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.2.tgz",
+      "integrity": "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==",
+      "license": "MIT",
+      "dependencies": {
+        "mime-db": "^1.54.0"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/ms": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
+      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
+      "license": "MIT"
+    },
+    "node_modules/negotiator": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-1.0.0.tgz",
+      "integrity": "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/object-assign": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
+      "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/object-inspect": {
+      "version": "1.13.4",
+      "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz",
+      "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/on-finished": {
+      "version": "2.4.1",
+      "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz",
+      "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==",
+      "license": "MIT",
+      "dependencies": {
+        "ee-first": "1.1.1"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/once": {
+      "version": "1.4.0",
+      "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
+      "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
+      "license": "ISC",
+      "dependencies": {
+        "wrappy": "1"
+      }
+    },
+    "node_modules/parseurl": {
+      "version": "1.3.3",
+      "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
+      "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/path-key": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
+      "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/path-to-regexp": {
+      "version": "8.3.0",
+      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-8.3.0.tgz",
+      "integrity": "sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA==",
+      "license": "MIT",
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/pkce-challenge": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/pkce-challenge/-/pkce-challenge-5.0.1.tgz",
+      "integrity": "sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=16.20.0"
+      }
+    },
+    "node_modules/proxy-addr": {
+      "version": "2.0.7",
+      "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
+      "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==",
+      "license": "MIT",
+      "dependencies": {
+        "forwarded": "0.2.0",
+        "ipaddr.js": "1.9.1"
+      },
+      "engines": {
+        "node": ">= 0.10"
+      }
+    },
+    "node_modules/proxy-from-env": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
+      "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==",
+      "license": "MIT"
+    },
+    "node_modules/qs": {
+      "version": "6.14.1",
+      "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.1.tgz",
+      "integrity": "sha512-4EK3+xJl8Ts67nLYNwqw/dsFVnCf+qR7RgXSK9jEEm9unao3njwMDdmsdvoKBKHzxd7tCYz5e5M+SnMjdtXGQQ==",
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "side-channel": "^1.1.0"
+      },
+      "engines": {
+        "node": ">=0.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/range-parser": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz",
+      "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/raw-body": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-3.0.2.tgz",
+      "integrity": "sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==",
+      "license": "MIT",
+      "dependencies": {
+        "bytes": "~3.1.2",
+        "http-errors": "~2.0.1",
+        "iconv-lite": "~0.7.0",
+        "unpipe": "~1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.10"
+      }
+    },
+    "node_modules/require-from-string": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
+      "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/resolve-pkg-maps": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
+      "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
+      "dev": true,
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
+      }
+    },
+    "node_modules/router": {
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/router/-/router-2.2.0.tgz",
+      "integrity": "sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==",
+      "license": "MIT",
+      "dependencies": {
+        "debug": "^4.4.0",
+        "depd": "^2.0.0",
+        "is-promise": "^4.0.0",
+        "parseurl": "^1.3.3",
+        "path-to-regexp": "^8.0.0"
+      },
+      "engines": {
+        "node": ">= 18"
+      }
+    },
+    "node_modules/safer-buffer": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
+      "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==",
+      "license": "MIT"
+    },
+    "node_modules/send": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/send/-/send-1.2.1.tgz",
+      "integrity": "sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ==",
+      "license": "MIT",
+      "dependencies": {
+        "debug": "^4.4.3",
+        "encodeurl": "^2.0.0",
+        "escape-html": "^1.0.3",
+        "etag": "^1.8.1",
+        "fresh": "^2.0.0",
+        "http-errors": "^2.0.1",
+        "mime-types": "^3.0.2",
+        "ms": "^2.1.3",
+        "on-finished": "^2.4.1",
+        "range-parser": "^1.2.1",
+        "statuses": "^2.0.2"
+      },
+      "engines": {
+        "node": ">= 18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/serve-static": {
+      "version": "2.2.1",
+      "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-2.2.1.tgz",
+      "integrity": "sha512-xRXBn0pPqQTVQiC8wyQrKs2MOlX24zQ0POGaj0kultvoOCstBQM5yvOhAVSUwOMjQtTvsPWoNCHfPGwaaQJhTw==",
+      "license": "MIT",
+      "dependencies": {
+        "encodeurl": "^2.0.0",
+        "escape-html": "^1.0.3",
+        "parseurl": "^1.3.3",
+        "send": "^1.2.0"
+      },
+      "engines": {
+        "node": ">= 18"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/setprototypeof": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
+      "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==",
+      "license": "ISC"
+    },
+    "node_modules/shebang-command": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
+      "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
+      "license": "MIT",
+      "dependencies": {
+        "shebang-regex": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/shebang-regex": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
+      "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/side-channel": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz",
+      "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==",
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "object-inspect": "^1.13.3",
+        "side-channel-list": "^1.0.0",
+        "side-channel-map": "^1.0.1",
+        "side-channel-weakmap": "^1.0.2"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/side-channel-list": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz",
+      "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==",
+      "license": "MIT",
+      "dependencies": {
+        "es-errors": "^1.3.0",
+        "object-inspect": "^1.13.3"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/side-channel-map": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz",
+      "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==",
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.2",
+        "es-errors": "^1.3.0",
+        "get-intrinsic": "^1.2.5",
+        "object-inspect": "^1.13.3"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/side-channel-weakmap": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz",
+      "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==",
+      "license": "MIT",
+      "dependencies": {
+        "call-bound": "^1.0.2",
+        "es-errors": "^1.3.0",
+        "get-intrinsic": "^1.2.5",
+        "object-inspect": "^1.13.3",
+        "side-channel-map": "^1.0.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/statuses": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz",
+      "integrity": "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/toidentifier": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz",
+      "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.6"
+      }
+    },
+    "node_modules/tsx": {
+      "version": "4.21.0",
+      "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz",
+      "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "esbuild": "~0.27.0",
+        "get-tsconfig": "^4.7.5"
+      },
+      "bin": {
+        "tsx": "dist/cli.mjs"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      },
+      "optionalDependencies": {
+        "fsevents": "~2.3.3"
+      }
+    },
+    "node_modules/type-is": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/type-is/-/type-is-2.0.1.tgz",
+      "integrity": "sha512-OZs6gsjF4vMp32qrCbiVSkrFmXtG/AZhY3t0iAMrMBiAZyV9oALtXO8hsrHbMXF9x6L3grlFuwW2oAz7cav+Gw==",
+      "license": "MIT",
+      "dependencies": {
+        "content-type": "^1.0.5",
+        "media-typer": "^1.1.0",
+        "mime-types": "^3.0.0"
+      },
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/typescript": {
+      "version": "5.9.3",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
+      "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "bin": {
+        "tsc": "bin/tsc",
+        "tsserver": "bin/tsserver"
+      },
+      "engines": {
+        "node": ">=14.17"
+      }
+    },
+    "node_modules/undici-types": {
+      "version": "6.21.0",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
+      "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
+      "dev": true,
+      "license": "MIT"
+    },
+    "node_modules/unpipe": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
+      "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/vary": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
+      "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/which": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
+      "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
+      "license": "ISC",
+      "dependencies": {
+        "isexe": "^2.0.0"
+      },
+      "bin": {
+        "node-which": "bin/node-which"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/wrappy": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
+      "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
+      "license": "ISC"
+    },
+    "node_modules/zod": {
+      "version": "4.3.6",
+      "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz",
+      "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==",
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/colinhacks"
+      }
+    },
+    "node_modules/zod-to-json-schema": {
+      "version": "3.25.1",
+      "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.25.1.tgz",
+      "integrity": "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==",
+      "license": "ISC",
+      "peerDependencies": {
+        "zod": "^3.25 || ^4"
+      }
+    }
+  }
+}
diff --git a/mcp-server/package.json b/mcp-server/package.json
new file mode 100644
index 0000000..3e42c57
--- /dev/null
+++ b/mcp-server/package.json
@@ -0,0 +1,34 @@
+{
+  "name": "@lmstack/mcp-server",
+  "version": "0.1.0",
+  "description": "MCP Server for LMStack - LLM Deployment Platform",
+  "type": "module",
+  "main": "dist/index.js",
+  "bin": {
+    "lmstack-mcp": "dist/index.js"
+  },
+  "scripts": {
+    "build": "tsc",
+    "dev": "tsx watch src/index.ts",
+    "start": "node dist/index.js",
+    "inspect": "npx @anthropic-ai/mcp-inspector dist/index.js"
+  },
+  "keywords": [
+    "mcp",
+    "llm",
+    "lmstack",
+    "ai",
+    "model-context-protocol"
+  ],
+  "author": "",
+  "license": "MIT",
+  "dependencies": {
+    "@modelcontextprotocol/sdk": "^1.0.0",
+    "axios": "^1.6.0"
+  },
+  "devDependencies": {
+    "@types/node": "^20.0.0",
+    "tsx": "^4.0.0",
+    "typescript": "^5.3.0"
+  }
+}
diff --git a/mcp-server/src/client.ts b/mcp-server/src/client.ts
new file mode 100644
index 0000000..3e0e21f
--- /dev/null
+++ b/mcp-server/src/client.ts
@@ -0,0 +1,227 @@
+/**
+ * LMStack API Client
+ *
+ * HTTP client for communicating with LMStack backend API.
+ * This client is shared between MCP Server and Web Chat tools.
+ */
+import axios, { AxiosInstance } from "axios";
+
+export class LMStackClient {
+  private client: AxiosInstance;
+
+  constructor(baseURL: string, token?: string) {
+    this.client = axios.create({
+      baseURL,
+      timeout: 30000,
+      headers: {
+        "Content-Type": "application/json",
+        ...(token ? { Authorization: `Bearer ${token}` } : {}),
+      },
+    });
+  }
+
+  // ============================================================================
+  // Workers
+  // ============================================================================
+
+  async getWorkers(): Promise<any[]> {
+    const response = await this.client.get("/workers");
+    return response.data.items || [];
+  }
+
+  // ============================================================================
+  // Containers
+  // ============================================================================
+
+  async getContainers(): Promise<any[]> {
+    const response = await this.client.get("/containers");
+    return response.data.items || [];
+  }
+
+  async stopContainer(containerName: string, workerId: number): Promise<void> {
+    await this.client.post(`/containers/${encodeURIComponent(containerName)}/stop`, null, {
+      params: { worker_id: workerId },
+    });
+  }
+
+  async removeContainer(containerName: string, workerId: number, force?: boolean): Promise<void> {
+    await this.client.delete(`/containers/${encodeURIComponent(containerName)}`, {
+      params: { worker_id: workerId, force: force || false },
+    });
+  }
+
+  // ============================================================================
+  // Models
+  // ============================================================================
+
+  async getModels(): Promise<any[]> {
+    const response = await this.client.get("/models");
+    return response.data.items || [];
+  }
+
+  async addModel(
+    name: string,
+    source: string,
+    parameters?: string,
+    quantization?: string
+  ): Promise<any> {
+    const response = await this.client.post("/models", {
+      name,
+      source,
+      parameters,
+      quantization,
+    });
+    return response.data;
+  }
+
+  async deleteModel(modelId: number): Promise<void> {
+    await this.client.delete(`/models/${modelId}`);
+  }
+
+  // ============================================================================
+  // Deployments
+  // ============================================================================
+
+  async getDeployments(): Promise<any[]> {
+    const response = await this.client.get("/deployments");
+    return response.data.items || [];
+  }
+
+  async getDeployment(deploymentId: number): Promise<any> {
+    const response = await this.client.get(`/deployments/${deploymentId}`);
+    return response.data;
+  }
+
+  async deployModel(
+    modelId: number,
+    workerId: number,
+    gpuIds?: number[]
+  ): Promise<any> {
+    const response = await this.client.post("/deployments", {
+      model_id: modelId,
+      worker_id: workerId,
+      gpu_ids: gpuIds,
+    });
+    return response.data;
+  }
+
+  async stopDeployment(deploymentId: number): Promise<void> {
+    await this.client.post(`/deployments/${deploymentId}/stop`);
+  }
+
+  async startDeployment(deploymentId: number): Promise<void> {
+    await this.client.post(`/deployments/${deploymentId}/start`);
+  }
+
+  async deleteDeployment(deploymentId: number): Promise<void> {
+    await this.client.delete(`/deployments/${deploymentId}`);
+  }
+
+  // ============================================================================
+  // API Keys
+  // ============================================================================
+
+  async getApiKeys(): Promise<any> {
+    const response = await this.client.get("/api-keys");
+    return response.data;
+  }
+
+  async createApiKey(
+    name: string,
+    description?: string,
+    expiresInDays?: number
+  ): Promise<any> {
+    const response = await this.client.post("/api-keys", {
+      name,
+      description,
+      expires_in_days: expiresInDays,
+    });
+    return response.data;
+  }
+
+  async deleteApiKey(apiKeyId: number): Promise<void> {
+    await this.client.delete(`/api-keys/${apiKeyId}`);
+  }
+
+  // ============================================================================
+  // Docker Images
+  // ============================================================================
+
+  async getImages(workerId?: number, repository?: string): Promise<any[]> {
+    const params: any = {};
+    if (workerId) params.worker_id = workerId;
+    if (repository) params.repository = repository;
+
+    const response = await this.client.get("/images", { params });
+    return response.data.items || [];
+  }
+
+  async pullImage(workerId: number, image: string): Promise<any> {
+    const response = await this.client.post("/images/pull", {
+      worker_id: workerId,
+      image,
+    });
+    return response.data;
+  }
+
+  async deleteImage(imageId: string, workerId: number, force?: boolean): Promise<void> {
+    await this.client.delete(`/images/${encodeURIComponent(imageId)}`, {
+      params: { worker_id: workerId, force: force || false },
+    });
+  }
+
+  // ============================================================================
+  // Storage
+  // ============================================================================
+
+  async getStorageVolumes(workerId?: number): Promise<any[]> {
+    const params: any = {};
+    if (workerId) params.worker_id = workerId;
+
+    const response = await this.client.get("/storage/volumes", { params });
+    return Array.isArray(response.data) ? response.data : [];
+  }
+
+  async getDiskUsage(workerId?: number): Promise<any[]> {
+    const params: any = {};
+    if (workerId) params.worker_id = workerId;
+
+    const response = await this.client.get("/storage/disk-usage", { params });
+    return Array.isArray(response.data) ? response.data : [];
+  }
+
+  async deleteStorageVolume(volumeName: string, workerId: number, force?: boolean): Promise<void> {
+    await this.client.delete(`/storage/volumes/${encodeURIComponent(volumeName)}`, {
+      params: { worker_id: workerId, force: force || false },
+    });
+  }
+
+  async pruneStorage(
+    workerId?: number,
+    images: boolean = true,
+    containers: boolean = true,
+    volumes: boolean = false,
+    buildCache: boolean = true
+  ): Promise<any[]> {
+    const params: any = {};
+    if (workerId) params.worker_id = workerId;
+
+    const response = await this.client.post("/storage/prune", {
+      images,
+      containers,
+      volumes,
+      build_cache: buildCache,
+    }, { params });
+
+    return Array.isArray(response.data) ? response.data : [];
+  }
+
+  // ============================================================================
+  // Dashboard
+  // ============================================================================
+
+  async getDashboard(): Promise<any> {
+    const response = await this.client.get("/dashboard");
+    return response.data;
+  }
+}
diff --git a/mcp-server/src/formatters.ts b/mcp-server/src/formatters.ts
new file mode 100644
index 0000000..b7a984a
--- /dev/null
+++ b/mcp-server/src/formatters.ts
@@ -0,0 +1,384 @@
+/**
+ * Data Formatters
+ *
+ * Format LMStack API responses into human-readable text for MCP.
+ * These formatters produce markdown output suitable for AI agents.
+ */
+
+/**
+ * Helper to format byte sizes
+ */
+function formatSize(bytes: number): string {
+  if (bytes >= 1024 * 1024 * 1024) {
+    return `${(bytes / 1024 / 1024 / 1024).toFixed(2)} GB`;
+  }
+  if (bytes >= 1024 * 1024) {
+    return `${(bytes / 1024 / 1024).toFixed(2)} MB`;
+  }
+  return `${(bytes / 1024).toFixed(2)} KB`;
+}
+
+/**
+ * Format workers list
+ */
+export function formatWorkers(workers: any[]): string {
+  if (workers.length === 0) {
+    return "No workers registered.";
+  }
+
+  const lines: string[] = [`# Workers (${workers.length} total)\n`];
+
+  for (const worker of workers) {
+    const statusEmoji = worker.status === "online" ? "🟢" : "🔴";
+    lines.push(`## ${statusEmoji} ${worker.name}`);
+    lines.push(`- **Host:** ${worker.host}`);
+    lines.push(`- **Status:** ${worker.status}`);
+    lines.push(`- **ID:** ${worker.id}`);
+
+    if (worker.gpu_info && worker.gpu_info.length > 0) {
+      lines.push(`- **GPUs:** ${worker.gpu_info.length}`);
+      for (const gpu of worker.gpu_info) {
+        const usedGB = (gpu.memory_used / 1024).toFixed(1);
+        const totalGB = (gpu.memory_total / 1024).toFixed(1);
+        const freeGB = ((gpu.memory_total - gpu.memory_used) / 1024).toFixed(1);
+        const util = gpu.utilization_gpu || 0;
+        lines.push(`  - GPU ${gpu.index}: ${gpu.name}`);
+        lines.push(`    - Memory: ${usedGB}GB / ${totalGB}GB (${freeGB}GB free)`);
+        lines.push(`    - Utilization: ${util}%`);
+      }
+    } else {
+      lines.push(`- **GPUs:** None detected`);
+    }
+    lines.push("");
+  }
+
+  return lines.join("\n");
+}
+
+/**
+ * Format containers list
+ */
+export function formatContainers(containers: any[]): string {
+  if (containers.length === 0) {
+    return "No containers found.";
+  }
+
+  const running = containers.filter(
+    (c) => c.status?.toLowerCase().includes("running") || c.status?.toLowerCase().includes("up")
+  );
+
+  const lines: string[] = [
+    `# Docker Containers (${containers.length} total, ${running.length} running)\n`,
+  ];
+
+  // Group by worker
+  const byWorker: Record<string, any[]> = {};
+  for (const container of containers) {
+    const workerName = container.worker?.name || container.worker_name || "Unknown";
+    if (!byWorker[workerName]) {
+      byWorker[workerName] = [];
+    }
+    byWorker[workerName].push(container);
+  }
+
+  for (const [workerName, workerContainers] of Object.entries(byWorker)) {
+    lines.push(`## ${workerName}`);
+    for (const container of workerContainers) {
+      const statusEmoji = container.status?.toLowerCase().includes("running") ||
+                          container.status?.toLowerCase().includes("up") ? "🟢" : "⚪";
+      lines.push(`- ${statusEmoji} **${container.name}**`);
+      lines.push(`  - Image: ${container.image}`);
+      lines.push(`  - Status: ${container.status}`);
+      lines.push(`  - ID: ${container.id?.substring(0, 12) || "N/A"}`);
+    }
+    lines.push("");
+  }
+
+  return lines.join("\n");
+}
+
+/**
+ * Format deployments list
+ */
+export function formatDeployments(deployments: any[]): string {
+  if (deployments.length === 0) {
+    return "No model deployments found.";
+  }
+
+  const running = deployments.filter((d) => d.status === "running");
+
+  const lines: string[] = [
+    `# Model Deployments (${deployments.length} total, ${running.length} running)\n`,
+  ];
+
+  for (const dep of deployments) {
+    const statusEmoji =
+      dep.status === "running" ? "🟢" :
+      dep.status === "starting" ? "🟡" :
+      dep.status === "stopped" ? "⚪" : "🔴";
+
+    lines.push(`## ${statusEmoji} ${dep.model?.name || dep.name}`);
+    lines.push(`- **ID:** ${dep.id}`);
+    lines.push(`- **Status:** ${dep.status}`);
+    lines.push(`- **Worker:** ${dep.worker?.name || "Unknown"}`);
+
+    if (dep.status === "running" && dep.port) {
+      lines.push(`- **Endpoint:** http://${dep.worker?.host}:${dep.port}/v1`);
+    }
+
+    if (dep.gpu_ids && dep.gpu_ids.length > 0) {
+      lines.push(`- **GPUs:** ${dep.gpu_ids.join(", ")}`);
+    }
+
+    if (dep.created_at) {
+      lines.push(`- **Created:** ${new Date(dep.created_at).toLocaleString()}`);
+    }
+    lines.push("");
+  }
+
+  return lines.join("\n");
+}
+
+/**
+ * Format models list
+ */
+export function formatModels(models: any[]): string {
+  if (models.length === 0) {
+    return "No models registered.";
+  }
+
+  const lines: string[] = [`# Available Models (${models.length} total)\n`];
+
+  // Group by source
+  const bySource: Record<string, any[]> = {};
+  for (const model of models) {
+    const source = model.source || "unknown";
+    if (!bySource[source]) {
+      bySource[source] = [];
+    }
+    bySource[source].push(model);
+  }
+
+  for (const [source, sourceModels] of Object.entries(bySource)) {
+    lines.push(`## ${source.charAt(0).toUpperCase() + source.slice(1)} (${sourceModels.length})`);
+    for (const model of sourceModels) {
+      lines.push(`- **${model.name}** (ID: ${model.id})`);
+      if (model.parameters) {
+        lines.push(`  - Parameters: ${model.parameters}`);
+      }
+      if (model.quantization) {
+        lines.push(`  - Quantization: ${model.quantization}`);
+      }
+    }
+    lines.push("");
+  }
+
+  return lines.join("\n");
+}
+
+/**
+ * Format complete system status
+ */
+export function formatSystemStatus(
+  workers: any[],
+  containers: any[],
+  deployments: any[],
+  models: any[]
+): string {
+  const onlineWorkers = workers.filter((w) => w.status === "online");
+  const runningContainers = containers.filter(
+    (c) => c.status?.toLowerCase().includes("running") || c.status?.toLowerCase().includes("up")
+  );
+  const runningDeployments = deployments.filter((d) => d.status === "running");
+
+  // Calculate total GPU memory
+  let totalGpuMemory = 0;
+  let usedGpuMemory = 0;
+  for (const worker of workers) {
+    for (const gpu of worker.gpu_info || []) {
+      totalGpuMemory += gpu.memory_total || 0;
+      usedGpuMemory += gpu.memory_used || 0;
+    }
+  }
+  const freeGpuMemory = totalGpuMemory - usedGpuMemory;
+
+  const lines: string[] = [
+    "# LMStack System Status",
+    "",
+    `**Last Updated:** ${new Date().toLocaleString()}`,
+    "",
+    "## Summary",
+    `- 🖥️ **Workers:** ${onlineWorkers.length}/${workers.length} online`,
+    `- 📦 **Containers:** ${runningContainers.length}/${containers.length} running`,
+    `- 🚀 **Deployments:** ${runningDeployments.length}/${deployments.length} running`,
+    `- 🤖 **Models:** ${models.length} available`,
+    `- 🎮 **GPU Memory:** ${(usedGpuMemory / 1024).toFixed(1)}GB used / ${(freeGpuMemory / 1024).toFixed(1)}GB free / ${(totalGpuMemory / 1024).toFixed(1)}GB total`,
+    "",
+  ];
+
+  // Add workers section
+  lines.push(formatWorkers(workers));
+  lines.push("");
+
+  // Add running deployments
+  if (runningDeployments.length > 0) {
+    lines.push("## Active Deployments");
+    for (const dep of runningDeployments) {
+      lines.push(`- **${dep.model?.name || dep.name}** on ${dep.worker?.name} (ID: ${dep.id})`);
+    }
+    lines.push("");
+  }
+
+  return lines.join("\n");
+}
+
+/**
+ * Format API keys list
+ */
+export function formatApiKeys(apiKeysData: any): string {
+  const apiKeys = apiKeysData?.items || [];
+
+  if (apiKeys.length === 0) {
+    return "No API keys found.";
+  }
+
+  const lines: string[] = [`# API Keys (${apiKeys.length} total)\n`];
+
+  for (const key of apiKeys) {
+    lines.push(`## ${key.name}`);
+    lines.push(`- **ID:** ${key.id}`);
+    lines.push(`- **Access Key:** ${key.access_key || "N/A"}`);
+    if (key.description) {
+      lines.push(`- **Description:** ${key.description}`);
+    }
+    if (key.expires_at) {
+      lines.push(`- **Expires:** ${new Date(key.expires_at).toLocaleString()}`);
+    } else {
+      lines.push(`- **Expires:** Never`);
+    }
+    if (key.last_used_at) {
+      lines.push(`- **Last Used:** ${new Date(key.last_used_at).toLocaleString()}`);
+    }
+    lines.push(`- **Created:** ${new Date(key.created_at).toLocaleString()}`);
+    lines.push("");
+  }
+
+  return lines.join("\n");
+}
+
+/**
+ * Format Docker images list
+ */
+export function formatImages(images: any[]): string {
+  if (images.length === 0) {
+    return "No Docker images found.";
+  }
+
+  const lines: string[] = [`# Docker Images (${images.length} total)\n`];
+
+  // Group by worker
+  const byWorker: Record<string, any[]> = {};
+  for (const img of images) {
+    const workerName = img.worker_name || "Unknown";
+    if (!byWorker[workerName]) {
+      byWorker[workerName] = [];
+    }
+    byWorker[workerName].push(img);
+  }
+
+  for (const [workerName, workerImages] of Object.entries(byWorker)) {
+    lines.push(`## ${workerName} (${workerImages.length} images)`);
+    for (const img of workerImages) {
+      const name = img.full_name || `${img.repository || ""}:${img.tag || "latest"}`;
+      lines.push(`- **${name}**`);
+      lines.push(`  - ID: ${img.id?.substring(0, 12) || "N/A"}`);
+      lines.push(`  - Size: ${formatSize(img.size || 0)}`);
+      if (img.created_at) {
+        lines.push(`  - Created: ${new Date(img.created_at).toLocaleString()}`);
+      }
+    }
+    lines.push("");
+  }
+
+  return lines.join("\n");
+}
+
+/**
+ * Format storage volumes list
+ */
+export function formatStorageVolumes(volumes: any[]): string {
+  if (volumes.length === 0) {
+    return "No storage volumes found.";
+  }
+
+  const lines: string[] = [`# Storage Volumes (${volumes.length} total)\n`];
+
+  // Group by worker
+  const byWorker: Record<string, any[]> = {};
+  for (const vol of volumes) {
+    const workerName = vol.worker_name || "Unknown";
+    if (!byWorker[workerName]) {
+      byWorker[workerName] = [];
+    }
+    byWorker[workerName].push(vol);
+  }
+
+  for (const [workerName, workerVolumes] of Object.entries(byWorker)) {
+    lines.push(`## ${workerName} (${workerVolumes.length} volumes)`);
+    for (const vol of workerVolumes) {
+      lines.push(`- **${vol.name}**`);
+      lines.push(`  - Driver: ${vol.driver || "local"}`);
+      if (vol.mountpoint) {
+        lines.push(`  - Mountpoint: ${vol.mountpoint}`);
+      }
+      if (vol.created_at) {
+        lines.push(`  - Created: ${new Date(vol.created_at).toLocaleString()}`);
+      }
+    }
+    lines.push("");
+  }
+
+  return lines.join("\n");
+}
+
+/**
+ * Format disk usage statistics
+ */
+export function formatDiskUsage(usageList: any[]): string {
+  if (usageList.length === 0) {
+    return "No disk usage data available.";
+  }
+
+  const lines: string[] = ["# Disk Usage\n"];
+
+  for (const u of usageList) {
+    lines.push(`## ${u.worker_name || "Worker"}`);
+
+    lines.push("### Images");
+    lines.push(`- Count: ${u.images?.count || 0}`);
+    lines.push(`- Size: ${formatSize(u.images?.size || 0)}`);
+    lines.push(`- Reclaimable: ${formatSize(u.images?.reclaimable || 0)}`);
+
+    lines.push("### Containers");
+    lines.push(`- Count: ${u.containers?.count || 0}`);
+    lines.push(`- Size: ${formatSize(u.containers?.size || 0)}`);
+    lines.push(`- Reclaimable: ${formatSize(u.containers?.reclaimable || 0)}`);
+
+    lines.push("### Volumes");
+    lines.push(`- Count: ${u.volumes?.count || 0}`);
+    lines.push(`- Size: ${formatSize(u.volumes?.size || 0)}`);
+    lines.push(`- Reclaimable: ${formatSize(u.volumes?.reclaimable || 0)}`);
+
+    lines.push("### Build Cache");
+    lines.push(`- Count: ${u.build_cache?.count || 0}`);
+    lines.push(`- Size: ${formatSize(u.build_cache?.size || 0)}`);
+    lines.push(`- Reclaimable: ${formatSize(u.build_cache?.reclaimable || 0)}`);
+
+    lines.push("");
+    lines.push(`**Total Size:** ${formatSize(u.total_size || 0)}`);
+    lines.push(`**Total Reclaimable:** ${formatSize(u.total_reclaimable || 0)}`);
+    lines.push("");
+  }
+
+  return lines.join("\n");
+}
diff --git a/mcp-server/src/index.ts b/mcp-server/src/index.ts
new file mode 100644
index 0000000..f0d6279
--- /dev/null
+++ b/mcp-server/src/index.ts
@@ -0,0 +1,1200 @@
+#!/usr/bin/env node
+/**
+ * LMStack MCP Server
+ *
+ * Model Context Protocol server for LMStack platform.
+ * Provides resources and tools for managing LLM infrastructure.
+ *
+ * This MCP Server exposes the SAME tools as the Web Chat interface,
+ * allowing AI agents (e.g., Claude Desktop, Cursor) to manage LMStack.
+ */
+import { Server } from "@modelcontextprotocol/sdk/server/index.js";
+import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
+import {
+  CallToolRequestSchema,
+  ListResourcesRequestSchema,
+  ListToolsRequestSchema,
+  ReadResourceRequestSchema,
+} from "@modelcontextprotocol/sdk/types.js";
+import { LMStackClient } from "./client.js";
+import {
+  formatWorkers,
+  formatContainers,
+  formatDeployments,
+  formatModels,
+  formatSystemStatus,
+  formatApiKeys,
+  formatImages,
+  formatStorageVolumes,
+  formatDiskUsage,
+} from "./formatters.js";
+
+// Configuration from environment
+const LMSTACK_API_URL = process.env.LMSTACK_API_URL || "http://localhost:8000/api";
+const LMSTACK_API_TOKEN = process.env.LMSTACK_API_TOKEN || "";
+
+// Initialize LMStack client
+const client = new LMStackClient(LMSTACK_API_URL, LMSTACK_API_TOKEN);
+
+// Create MCP server
+const server = new Server(
+  {
+    name: "lmstack-mcp-server",
+    version: "0.2.0",
+  },
+  {
+    capabilities: {
+      resources: {},
+      tools: {},
+    },
+  }
+);
+
+/**
+ * List available resources
+ */
+server.setRequestHandler(ListResourcesRequestSchema, async () => {
+  return {
+    resources: [
+      {
+        uri: "lmstack://system/status",
+        name: "System Status",
+        description: "Complete LMStack system status including workers, deployments, and containers",
+        mimeType: "text/plain",
+      },
+      {
+        uri: "lmstack://workers",
+        name: "Workers",
+        description: "List of all worker nodes with GPU information",
+        mimeType: "text/plain",
+      },
+      {
+        uri: "lmstack://containers",
+        name: "Docker Containers",
+        description: "List of all Docker containers across workers",
+        mimeType: "text/plain",
+      },
+      {
+        uri: "lmstack://deployments",
+        name: "Model Deployments",
+        description: "List of all model deployments",
+        mimeType: "text/plain",
+      },
+      {
+        uri: "lmstack://models",
+        name: "Available Models",
+        description: "List of all registered models",
+        mimeType: "text/plain",
+      },
+      {
+        uri: "lmstack://api-keys",
+        name: "API Keys",
+        description: "List of all API keys",
+        mimeType: "text/plain",
+      },
+      {
+        uri: "lmstack://images",
+        name: "Docker Images",
+        description: "List of all Docker images across workers",
+        mimeType: "text/plain",
+      },
+      {
+        uri: "lmstack://storage",
+        name: "Storage Volumes",
+        description: "List of all storage volumes and disk usage",
+        mimeType: "text/plain",
+      },
+    ],
+  };
+});
+
+/**
+ * Read resource content
+ */
+server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
+  const { uri } = request.params;
+
+  try {
+    switch (uri) {
+      case "lmstack://system/status": {
+        const [workers, containers, deployments, models] = await Promise.all([
+          client.getWorkers(),
+          client.getContainers(),
+          client.getDeployments(),
+          client.getModels(),
+        ]);
+        return {
+          contents: [
+            {
+              uri,
+              mimeType: "text/plain",
+              text: formatSystemStatus(workers, containers, deployments, models),
+            },
+          ],
+        };
+      }
+
+      case "lmstack://workers": {
+        const workers = await client.getWorkers();
+        return {
+          contents: [
+            {
+              uri,
+              mimeType: "text/plain",
+              text: formatWorkers(workers),
+            },
+          ],
+        };
+      }
+
+      case "lmstack://containers": {
+        const containers = await client.getContainers();
+        return {
+          contents: [
+            {
+              uri,
+              mimeType: "text/plain",
+              text: formatContainers(containers),
+            },
+          ],
+        };
+      }
+
+      case "lmstack://deployments": {
+        const deployments = await client.getDeployments();
+        return {
+          contents: [
+            {
+              uri,
+              mimeType: "text/plain",
+              text: formatDeployments(deployments),
+            },
+          ],
+        };
+      }
+
+      case "lmstack://models": {
+        const models = await client.getModels();
+        return {
+          contents: [
+            {
+              uri,
+              mimeType: "text/plain",
+              text: formatModels(models),
+            },
+          ],
+        };
+      }
+
+      case "lmstack://api-keys": {
+        const apiKeysData = await client.getApiKeys();
+        return {
+          contents: [
+            {
+              uri,
+              mimeType: "text/plain",
+              text: formatApiKeys(apiKeysData),
+            },
+          ],
+        };
+      }
+
+      case "lmstack://images": {
+        const images = await client.getImages();
+        return {
+          contents: [
+            {
+              uri,
+              mimeType: "text/plain",
+              text: formatImages(images),
+            },
+          ],
+        };
+      }
+
+      case "lmstack://storage": {
+        const [volumes, diskUsage] = await Promise.all([
+          client.getStorageVolumes(),
+          client.getDiskUsage(),
+        ]);
+        return {
+          contents: [
+            {
+              uri,
+              mimeType: "text/plain",
+              text: formatStorageVolumes(volumes) + "\n\n" + formatDiskUsage(diskUsage),
+            },
+          ],
+        };
+      }
+
+      default:
+        throw new Error(`Unknown resource: ${uri}`);
+    }
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    return {
+      contents: [
+        {
+          uri,
+          mimeType: "text/plain",
+          text: `Error fetching resource: ${message}`,
+        },
+      ],
+    };
+  }
+});
+
+/**
+ * List available tools
+ * These tools match the Web Chat tools in tools.ts
+ */
+server.setRequestHandler(ListToolsRequestSchema, async () => {
+  return {
+    tools: [
+      // ============== Query Tools ==============
+      {
+        name: "get_system_status",
+        description: "Get complete LMStack system status including workers, GPUs, containers, and deployments",
+        inputSchema: {
+          type: "object",
+          properties: {},
+          required: [],
+        },
+      },
+      {
+        name: "list_workers",
+        description: "List all worker nodes with their GPU status and memory usage",
+        inputSchema: {
+          type: "object",
+          properties: {},
+          required: [],
+        },
+      },
+      {
+        name: "list_containers",
+        description: "List all Docker containers running on workers",
+        inputSchema: {
+          type: "object",
+          properties: {
+            status: {
+              type: "string",
+              description: "Filter by status: running, stopped, all (default: all)",
+              enum: ["running", "stopped", "all"],
+            },
+            worker_id: {
+              type: "number",
+              description: "Filter by specific worker ID",
+            },
+          },
+          required: [],
+        },
+      },
+      {
+        name: "list_deployments",
+        description: "List all model deployments",
+        inputSchema: {
+          type: "object",
+          properties: {
+            status: {
+              type: "string",
+              description: "Filter by status: running, stopped, all (default: all)",
+              enum: ["running", "stopped", "all"],
+            },
+          },
+          required: [],
+        },
+      },
+      {
+        name: "list_models",
+        description: "List all available models that can be deployed",
+        inputSchema: {
+          type: "object",
+          properties: {
+            source: {
+              type: "string",
+              description: "Filter by source: huggingface, ollama, local (optional)",
+              enum: ["huggingface", "ollama", "local"],
+            },
+          },
+          required: [],
+        },
+      },
+      {
+        name: "get_gpu_status",
+        description: "Get detailed GPU status including memory usage and utilization",
+        inputSchema: {
+          type: "object",
+          properties: {
+            worker_id: {
+              type: "number",
+              description: "Filter by specific worker ID",
+            },
+          },
+          required: [],
+        },
+      },
+
+      // ============== Model Management Tools ==============
+      {
+        name: "add_model",
+        description: "Add a new model to the system. Supports HuggingFace and Ollama models.",
+        inputSchema: {
+          type: "object",
+          properties: {
+            name: {
+              type: "string",
+              description: "Model name/identifier (e.g., 'Qwen/Qwen2.5-7B-Instruct' for HuggingFace, 'llama3.2' for Ollama)",
+            },
+            source: {
+              type: "string",
+              description: "Model source",
+              enum: ["huggingface", "ollama"],
+            },
+            parameters: {
+              type: "string",
+              description: "Optional: Model parameters (e.g., '7B', '13B')",
+            },
+            quantization: {
+              type: "string",
+              description: "Optional: Quantization format (e.g., 'GPTQ', 'AWQ', 'GGUF')",
+            },
+          },
+          required: ["name", "source"],
+        },
+      },
+      {
+        name: "delete_model",
+        description: "Delete a model from the system. This will NOT delete any deployments using this model.",
+        inputSchema: {
+          type: "object",
+          properties: {
+            model_id: {
+              type: "number",
+              description: "ID of the model to delete (use list_models to find IDs)",
+            },
+          },
+          required: ["model_id"],
+        },
+      },
+
+      // ============== Deployment Tools ==============
+      {
+        name: "deploy_model",
+        description: "Deploy a model to a worker. Returns deployment ID on success.",
+        inputSchema: {
+          type: "object",
+          properties: {
+            model_id: {
+              type: "number",
+              description: "ID of the model to deploy",
+            },
+            worker_id: {
+              type: "number",
+              description: "ID of the worker to deploy to",
+            },
+            gpu_ids: {
+              type: "array",
+              items: { type: "number" },
+              description: "GPU indices to use (optional, defaults to auto-select)",
+            },
+          },
+          required: ["model_id", "worker_id"],
+        },
+      },
+      {
+        name: "stop_deployment",
+        description: "Stop a running model deployment",
+        inputSchema: {
+          type: "object",
+          properties: {
+            deployment_id: {
+              type: "number",
+              description: "ID of the deployment to stop",
+            },
+          },
+          required: ["deployment_id"],
+        },
+      },
+      {
+        name: "start_deployment",
+        description: "Start a stopped model deployment",
+        inputSchema: {
+          type: "object",
+          properties: {
+            deployment_id: {
+              type: "number",
+              description: "ID of the deployment to start",
+            },
+          },
+          required: ["deployment_id"],
+        },
+      },
+      {
+        name: "delete_deployment",
+        description: "Delete a model deployment completely. This cannot be undone.",
+        inputSchema: {
+          type: "object",
+          properties: {
+            deployment_id: {
+              type: "number",
+              description: "ID of the deployment to delete",
+            },
+          },
+          required: ["deployment_id"],
+        },
+      },
+
+      // ============== Container Tools ==============
+      {
+        name: "stop_container",
+        description: "Stop a running Docker container. Use list_containers first to find worker_id.",
+        inputSchema: {
+          type: "object",
+          properties: {
+            container_name: {
+              type: "string",
+              description: "Name of the container to stop",
+            },
+            worker_id: {
+              type: "number",
+              description: "ID of the worker where the container is running",
+            },
+          },
+          required: ["container_name", "worker_id"],
+        },
+      },
+      {
+        name: "remove_container",
+        description: "Remove/delete a Docker container. Use list_containers first to find worker_id.",
+        inputSchema: {
+          type: "object",
+          properties: {
+            container_name: {
+              type: "string",
+              description: "Name of the container to remove",
+            },
+            worker_id: {
+              type: "number",
+              description: "ID of the worker where the container is located",
+            },
+            force: {
+              type: "boolean",
+              description: "Force remove even if running (default: false)",
+            },
+          },
+          required: ["container_name", "worker_id"],
+        },
+      },
+
+      // ============== API Key Tools ==============
+      {
+        name: "list_api_keys",
+        description: "List all API keys in the system with their usage statistics.",
+        inputSchema: {
+          type: "object",
+          properties: {},
+          required: [],
+        },
+      },
+      {
+        name: "create_api_key",
+        description: "Create a new API key for accessing the LMStack API.",
+        inputSchema: {
+          type: "object",
+          properties: {
+            name: {
+              type: "string",
+              description: "Name for the API key (e.g., 'production-key', 'test-key')",
+            },
+            description: {
+              type: "string",
+              description: "Optional description for the API key",
+            },
+            expires_in_days: {
+              type: "number",
+              description: "Optional: Number of days until the key expires. If not set, the key never expires.",
+            },
+          },
+          required: ["name"],
+        },
+      },
+      {
+        name: "delete_api_key",
+        description: "Delete an API key from the system.",
+        inputSchema: {
+          type: "object",
+          properties: {
+            api_key_id: {
+              type: "number",
+              description: "ID of the API key to delete (use list_api_keys to find IDs)",
+            },
+          },
+          required: ["api_key_id"],
+        },
+      },
+
+      // ============== Docker Image Tools ==============
+      {
+        name: "list_images",
+        description: "List all Docker images across all workers.",
+        inputSchema: {
+          type: "object",
+          properties: {
+            worker_id: {
+              type: "number",
+              description: "Optional: Filter by specific worker ID",
+            },
+            repository: {
+              type: "string",
+              description: "Optional: Filter by repository name",
+            },
+          },
+          required: [],
+        },
+      },
+      {
+        name: "pull_image",
+        description: "Pull a Docker image from a registry to a worker.",
+        inputSchema: {
+          type: "object",
+          properties: {
+            worker_id: {
+              type: "number",
+              description: "ID of the worker to pull the image to",
+            },
+            image: {
+              type: "string",
+              description: "Image reference (e.g., 'nginx:latest', 'python:3.11')",
+            },
+          },
+          required: ["worker_id", "image"],
+        },
+      },
+      {
+        name: "delete_image",
+        description: "Delete a Docker image from a worker.",
+        inputSchema: {
+          type: "object",
+          properties: {
+            image_id: {
+              type: "string",
+              description: "ID or name of the image to delete",
+            },
+            worker_id: {
+              type: "number",
+              description: "ID of the worker where the image is located",
+            },
+            force: {
+              type: "boolean",
+              description: "Force removal even if image is in use (default: false)",
+            },
+          },
+          required: ["image_id", "worker_id"],
+        },
+      },
+
+      // ============== Storage Tools ==============
+      {
+        name: "list_storage_volumes",
+        description: "List all Docker storage volumes across all workers.",
+        inputSchema: {
+          type: "object",
+          properties: {
+            worker_id: {
+              type: "number",
+              description: "Optional: Filter by specific worker ID",
+            },
+          },
+          required: [],
+        },
+      },
+      {
+        name: "get_disk_usage",
+        description: "Get Docker disk usage statistics including images, containers, volumes, and build cache.",
+        inputSchema: {
+          type: "object",
+          properties: {
+            worker_id: {
+              type: "number",
+              description: "Optional: Filter by specific worker ID",
+            },
+          },
+          required: [],
+        },
+      },
+      {
+        name: "delete_storage_volume",
+        description: "Delete a Docker storage volume from a worker.",
+        inputSchema: {
+          type: "object",
+          properties: {
+            volume_name: {
+              type: "string",
+              description: "Name of the volume to delete",
+            },
+            worker_id: {
+              type: "number",
+              description: "ID of the worker where the volume is located",
+            },
+            force: {
+              type: "boolean",
+              description: "Force removal (default: false)",
+            },
+          },
+          required: ["volume_name", "worker_id"],
+        },
+      },
+      {
+        name: "prune_storage",
+        description: "Clean up unused Docker resources (images, containers, volumes, build cache) to free disk space.",
+        inputSchema: {
+          type: "object",
+          properties: {
+            worker_id: {
+              type: "number",
+              description: "Optional: Only prune on specific worker. If not set, prunes on all workers.",
+            },
+            images: {
+              type: "boolean",
+              description: "Prune unused images (default: true)",
+            },
+            containers: {
+              type: "boolean",
+              description: "Prune stopped containers (default: true)",
+            },
+            volumes: {
+              type: "boolean",
+              description: "Prune unused volumes (default: false - be careful!)",
+            },
+            build_cache: {
+              type: "boolean",
+              description: "Prune build cache (default: true)",
+            },
+          },
+          required: [],
+        },
+      },
+    ],
+  };
+});
+
+/**
+ * Execute tools
+ */
+server.setRequestHandler(CallToolRequestSchema, async (request) => {
+  const { name, arguments: args } = request.params;
+
+  try {
+    switch (name) {
+      // ============== Query Tools ==============
+      case "get_system_status": {
+        const [workers, containers, deployments, models] = await Promise.all([
+          client.getWorkers(),
+          client.getContainers(),
+          client.getDeployments(),
+          client.getModels(),
+        ]);
+        return {
+          content: [
+            {
+              type: "text",
+              text: formatSystemStatus(workers, containers, deployments, models),
+            },
+          ],
+        };
+      }
+
+      case "list_workers": {
+        const workers = await client.getWorkers();
+        return {
+          content: [
+            {
+              type: "text",
+              text: formatWorkers(workers),
+            },
+          ],
+        };
+      }
+
+      case "list_containers": {
+        const containers = await client.getContainers();
+        let filtered = containers;
+
+        if (args?.worker_id) {
+          filtered = filtered.filter((c: any) =>
+            c.worker?.id === args.worker_id || c.worker_id === args.worker_id
+          );
+        }
+
+        if (args?.status && args.status !== "all") {
+          filtered = filtered.filter((c: any) => {
+            const s = c.status?.toLowerCase() || "";
+            if (args.status === "running") {
+              return s.includes("running") || s.includes("up");
+            }
+            return s.includes(String(args.status));
+          });
+        }
+
+        return {
+          content: [
+            {
+              type: "text",
+              text: formatContainers(filtered),
+            },
+          ],
+        };
+      }
+
+      case "list_deployments": {
+        const deployments = await client.getDeployments();
+        let filtered = deployments;
+
+        if (args?.status && args.status !== "all") {
+          filtered = filtered.filter((d: any) =>
+            d.status?.toLowerCase() === String(args.status).toLowerCase()
+          );
+        }
+
+        return {
+          content: [
+            {
+              type: "text",
+              text: formatDeployments(filtered),
+            },
+          ],
+        };
+      }
+
+      case "list_models": {
+        const models = await client.getModels();
+        let filtered = models;
+
+        if (args?.source) {
+          filtered = filtered.filter((m: any) =>
+            m.source?.toLowerCase() === String(args.source).toLowerCase()
+          );
+        }
+
+        return {
+          content: [
+            {
+              type: "text",
+              text: formatModels(filtered),
+            },
+          ],
+        };
+      }
+
+      case "get_gpu_status": {
+        const workers = await client.getWorkers();
+        let filtered = workers;
+
+        if (args?.worker_id) {
+          filtered = filtered.filter((w: any) => w.id === args.worker_id);
+        }
+
+        const lines: string[] = ["# GPU Status\n"];
+        for (const worker of filtered) {
+          lines.push(`## ${worker.name} (${worker.status})`);
+          if (worker.gpu_info && worker.gpu_info.length > 0) {
+            for (const gpu of worker.gpu_info) {
+              const usedGB = (gpu.memory_used / 1024).toFixed(1);
+              const totalGB = (gpu.memory_total / 1024).toFixed(1);
+              const freeGB = ((gpu.memory_total - gpu.memory_used) / 1024).toFixed(1);
+              lines.push(`- GPU ${gpu.index}: ${gpu.name}`);
+              lines.push(`  - Memory: ${usedGB}GB used / ${freeGB}GB free / ${totalGB}GB total`);
+              lines.push(`  - Utilization: ${gpu.utilization_gpu}%`);
+            }
+          } else {
+            lines.push("- No GPU information available");
+          }
+          lines.push("");
+        }
+
+        return {
+          content: [
+            {
+              type: "text",
+              text: lines.join("\n"),
+            },
+          ],
+        };
+      }
+
+      // ============== Model Management Tools ==============
+      case "add_model": {
+        if (!args?.name || !args?.source) {
+          throw new Error("name and source are required");
+        }
+
+        const model = await client.addModel(
+          String(args.name),
+          String(args.source),
+          args.parameters ? String(args.parameters) : undefined,
+          args.quantization ? String(args.quantization) : undefined
+        );
+
+        return {
+          content: [
+            {
+              type: "text",
+              text: `Successfully added model!\n\nModel ID: ${model.id}\nName: ${model.name}\nSource: ${model.source}`,
+            },
+          ],
+        };
+      }
+
+      case "delete_model": {
+        if (!args?.model_id) {
+          throw new Error("model_id is required");
+        }
+
+        await client.deleteModel(Number(args.model_id));
+
+        return {
+          content: [
+            {
+              type: "text",
+              text: `Successfully deleted model ${args.model_id}`,
+            },
+          ],
+        };
+      }
+
+      // ============== Deployment Tools ==============
+      case "deploy_model": {
+        if (!args?.model_id || !args?.worker_id) {
+          throw new Error("model_id and worker_id are required");
+        }
+
+        const result = await client.deployModel(
+          Number(args.model_id),
+          Number(args.worker_id),
+          args.gpu_ids as number[] | undefined
+        );
+
+        return {
+          content: [
+            {
+              type: "text",
+              text: `Successfully started deployment!\n\nDeployment ID: ${result.id}\nStatus: ${result.status}\n\nThe model is being deployed. Use list_deployments to check status.`,
+            },
+          ],
+        };
+      }
+
+      case "stop_deployment": {
+        if (!args?.deployment_id) {
+          throw new Error("deployment_id is required");
+        }
+
+        await client.stopDeployment(Number(args.deployment_id));
+
+        return {
+          content: [
+            {
+              type: "text",
+              text: `Successfully stopped deployment ${args.deployment_id}`,
+            },
+          ],
+        };
+      }
+
+      case "start_deployment": {
+        if (!args?.deployment_id) {
+          throw new Error("deployment_id is required");
+        }
+
+        await client.startDeployment(Number(args.deployment_id));
+
+        return {
+          content: [
+            {
+              type: "text",
+              text: `Successfully started deployment ${args.deployment_id}`,
+            },
+          ],
+        };
+      }
+
+      case "delete_deployment": {
+        if (!args?.deployment_id) {
+          throw new Error("deployment_id is required");
+        }
+
+        await client.deleteDeployment(Number(args.deployment_id));
+
+        return {
+          content: [
+            {
+              type: "text",
+              text: `Successfully deleted deployment ${args.deployment_id}`,
+            },
+          ],
+        };
+      }
+
+      // ============== Container Tools ==============
+      case "stop_container": {
+        if (!args?.container_name || !args?.worker_id) {
+          throw new Error("container_name and worker_id are required");
+        }
+
+        await client.stopContainer(String(args.container_name), Number(args.worker_id));
+
+        return {
+          content: [
+            {
+              type: "text",
+              text: `Successfully stopped container "${args.container_name}"`,
+            },
+          ],
+        };
+      }
+
+      case "remove_container": {
+        if (!args?.container_name || !args?.worker_id) {
+          throw new Error("container_name and worker_id are required");
+        }
+
+        await client.removeContainer(
+          String(args.container_name),
+          Number(args.worker_id),
+          args.force as boolean | undefined
+        );
+
+        return {
+          content: [
+            {
+              type: "text",
+              text: `Successfully removed container "${args.container_name}"`,
+            },
+          ],
+        };
+      }
+
+      // ============== API Key Tools ==============
+      case "list_api_keys": {
+        const apiKeysData = await client.getApiKeys();
+        return {
+          content: [
+            {
+              type: "text",
+              text: formatApiKeys(apiKeysData),
+            },
+          ],
+        };
+      }
+
+      case "create_api_key": {
+        if (!args?.name) {
+          throw new Error("name is required");
+        }
+
+        const apiKey = await client.createApiKey(
+          String(args.name),
+          args.description ? String(args.description) : undefined,
+          args.expires_in_days ? Number(args.expires_in_days) : undefined
+        );
+
+        return {
+          content: [
+            {
+              type: "text",
+              text: `Successfully created API key!\n\nID: ${apiKey.id}\nName: ${apiKey.name}\nAccess Key: ${apiKey.access_key}\nFull Key: ${apiKey.api_key}\n\n**IMPORTANT:** Save the full API key now! It will not be shown again.`,
+            },
+          ],
+        };
+      }
+
+      case "delete_api_key": {
+        if (!args?.api_key_id) {
+          throw new Error("api_key_id is required");
+        }
+
+        await client.deleteApiKey(Number(args.api_key_id));
+
+        return {
+          content: [
+            {
+              type: "text",
+              text: `Successfully deleted API key ${args.api_key_id}`,
+            },
+          ],
+        };
+      }
+
+      // ============== Docker Image Tools ==============
+      case "list_images": {
+        const images = await client.getImages(
+          args?.worker_id ? Number(args.worker_id) : undefined,
+          args?.repository ? String(args.repository) : undefined
+        );
+        return {
+          content: [
+            {
+              type: "text",
+              text: formatImages(images),
+            },
+          ],
+        };
+      }
+
+      case "pull_image": {
+        if (!args?.worker_id || !args?.image) {
+          throw new Error("worker_id and image are required");
+        }
+
+        const result = await client.pullImage(Number(args.worker_id), String(args.image));
+
+        return {
+          content: [
+            {
+              type: "text",
+              text: `Successfully pulled image "${args.image}"\n\n${JSON.stringify(result, null, 2)}`,
+            },
+          ],
+        };
+      }
+
+      case "delete_image": {
+        if (!args?.image_id || !args?.worker_id) {
+          throw new Error("image_id and worker_id are required");
+        }
+
+        await client.deleteImage(
+          String(args.image_id),
+          Number(args.worker_id),
+          args.force as boolean | undefined
+        );
+
+        return {
+          content: [
+            {
+              type: "text",
+              text: `Successfully deleted image "${args.image_id}"`,
+            },
+          ],
+        };
+      }
+
+      // ============== Storage Tools ==============
+      case "list_storage_volumes": {
+        const volumes = await client.getStorageVolumes(
+          args?.worker_id ? Number(args.worker_id) : undefined
+        );
+        return {
+          content: [
+            {
+              type: "text",
+              text: formatStorageVolumes(volumes),
+            },
+          ],
+        };
+      }
+
+      case "get_disk_usage": {
+        const diskUsage = await client.getDiskUsage(
+          args?.worker_id ? Number(args.worker_id) : undefined
+        );
+        return {
+          content: [
+            {
+              type: "text",
+              text: formatDiskUsage(diskUsage),
+            },
+          ],
+        };
+      }
+
+      case "delete_storage_volume": {
+        if (!args?.volume_name || !args?.worker_id) {
+          throw new Error("volume_name and worker_id are required");
+        }
+
+        await client.deleteStorageVolume(
+          String(args.volume_name),
+          Number(args.worker_id),
+          args.force as boolean | undefined
+        );
+
+        return {
+          content: [
+            {
+              type: "text",
+              text: `Successfully deleted volume "${args.volume_name}"`,
+            },
+          ],
+        };
+      }
+
+      case "prune_storage": {
+        const results = await client.pruneStorage(
+          args?.worker_id ? Number(args.worker_id) : undefined,
+          args?.images !== false,
+          args?.containers !== false,
+          args?.volumes === true,
+          args?.build_cache !== false
+        );
+
+        const formatSize = (bytes: number) => {
+          if (bytes >= 1024 * 1024 * 1024) {
+            return `${(bytes / 1024 / 1024 / 1024).toFixed(2)} GB`;
+          }
+          return `${(bytes / 1024 / 1024).toFixed(2)} MB`;
+        };
+
+        const lines = ["# Storage Pruned Successfully\n"];
+        for (const r of results) {
+          lines.push(`## ${r.worker_name || "Worker"}`);
+          lines.push(`- Images deleted: ${r.images_deleted || 0}`);
+          lines.push(`- Containers deleted: ${r.containers_deleted || 0}`);
+          lines.push(`- Volumes deleted: ${r.volumes_deleted || 0}`);
+          lines.push(`- Build cache deleted: ${r.build_cache_deleted || 0}`);
+          lines.push(`- Space reclaimed: ${formatSize(r.space_reclaimed || 0)}`);
+          lines.push("");
+        }
+
+        return {
+          content: [
+            {
+              type: "text",
+              text: lines.join("\n"),
+            },
+          ],
+        };
+      }
+
+      default:
+        throw new Error(`Unknown tool: ${name}`);
+    }
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    return {
+      content: [
+        {
+          type: "text",
+          text: `Error: ${message}`,
+        },
+      ],
+      isError: true,
+    };
+  }
+});
+
+/**
+ * Start the server
+ */
+async function main() {
+  const transport = new StdioServerTransport();
+  await server.connect(transport);
+  console.error("LMStack MCP Server running on stdio");
+  console.error(`API URL: ${LMSTACK_API_URL}`);
+}
+
+main().catch((error) => {
+  console.error("Fatal error:", error);
+  process.exit(1);
+});
diff --git a/mcp-server/tsconfig.json b/mcp-server/tsconfig.json
new file mode 100644
index 0000000..e9005d0
--- /dev/null
+++ b/mcp-server/tsconfig.json
@@ -0,0 +1,16 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "NodeNext",
+    "moduleResolution": "NodeNext",
+    "outDir": "./dist",
+    "rootDir": "./src",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "declaration": true
+  },
+  "include": ["src/**/*"],
+  "exclude": ["node_modules", "dist"]
+}