diff --git a/backend/app/api/__init__.py b/backend/app/api/__init__.py index 02fc736..ebecf06 100644 --- a/backend/app/api/__init__.py +++ b/backend/app/api/__init__.py @@ -6,6 +6,8 @@ api_keys, apps, auth, + auto_tuning, + chat_proxy, containers, conversations, dashboard, @@ -62,3 +64,9 @@ # Semantic Router api_router.include_router(semantic_router.router) + +# Chat Proxy for external endpoints +api_router.include_router(chat_proxy.router, tags=["chat-proxy"]) + +# Auto-Tuning Agent +api_router.include_router(auto_tuning.router, prefix="/auto-tuning", tags=["auto-tuning"]) diff --git a/backend/app/api/auto_tuning.py b/backend/app/api/auto_tuning.py new file mode 100644 index 0000000..7a4dcc6 --- /dev/null +++ b/backend/app/api/auto_tuning.py @@ -0,0 +1,722 @@ +"""Auto-Tuning API routes + +Implements the Auto-Tuning Agent workflow: +1. Environment Analysis - Query hardware and model info +2. Knowledge Base Query - Search for similar configurations +3. Configuration Space Exploration - Generate candidate configs +4. Auto Benchmark - Test each configuration +5. Result Analysis - Recommend best configuration +""" + +import logging + +from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query +from pydantic import BaseModel +from sqlalchemy import func, select +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import selectinload + +from app.core.deps import require_operator, require_viewer +from app.database import get_db +from app.models.deployment import Deployment, DeploymentStatus +from app.models.llm_model import LLMModel +from app.models.tuning import ( + BenchmarkResult, + OptimizationTarget, + PerformanceKnowledge, + TuningJob, + TuningJobStatus, +) +from app.models.user import User +from app.models.worker import Worker +from app.schemas.tuning import ( + BenchmarkMetrics, + BenchmarkRequest, + BenchmarkResultListResponse, + BenchmarkResultResponse, + KnowledgeQuery, + KnowledgeQueryResponse, + KnowledgeRecord, + KnowledgeSaveRequest, + TuningJobCreate, + TuningJobListResponse, + TuningJobProgress, + TuningJobResponse, +) + +logger = logging.getLogger(__name__) + +router = APIRouter() + + +# ============================================================================ +# Helper Functions +# ============================================================================ + + +def tuning_job_to_response(job: TuningJob, include_conversation: bool = True) -> TuningJobResponse: + """Convert tuning job model to response schema""" + progress = None + if job.progress: + progress = TuningJobProgress(**job.progress) + + # Parse conversation log + conversation_log = None + if include_conversation and job.conversation_log: + from app.schemas.tuning import ConversationMessage + + conversation_log = [ConversationMessage(**msg) for msg in job.conversation_log] + + return TuningJobResponse( + id=job.id, + model_id=job.model_id, + worker_id=job.worker_id, + optimization_target=job.optimization_target, + status=job.status, + status_message=job.status_message, + current_step=job.current_step, + total_steps=job.total_steps, + progress=progress, + best_config=job.best_config, + all_results=job.all_results, + conversation_log=conversation_log, + created_at=job.created_at, + updated_at=job.updated_at, + completed_at=job.completed_at, + model_name=job.model.name if job.model else None, + worker_name=job.worker.name if job.worker else None, + ) + + +def benchmark_result_to_response(result: BenchmarkResult) -> BenchmarkResultResponse: + """Convert benchmark result model to response schema""" + return BenchmarkResultResponse( + id=result.id, + tuning_job_id=result.tuning_job_id, + deployment_id=result.deployment_id, + config=result.config, + test_type=result.test_type, + test_duration_seconds=result.test_duration_seconds, + input_length=result.input_length, + output_length=result.output_length, + concurrency=result.concurrency, + metrics=BenchmarkMetrics( + throughput_tps=result.throughput_tps, + ttft_ms=result.ttft_ms, + tpot_ms=result.tpot_ms, + total_latency_ms=result.total_latency_ms, + gpu_utilization=result.gpu_utilization, + vram_usage_gb=result.vram_usage_gb, + ), + error_message=result.error_message, + created_at=result.created_at, + ) + + +# ============================================================================ +# Tuning Job Endpoints +# ============================================================================ + + +@router.get("/jobs", response_model=TuningJobListResponse) +async def list_tuning_jobs( + skip: int = Query(0, ge=0), + limit: int = Query(50, ge=1, le=100), + status: TuningJobStatus | None = None, + db: AsyncSession = Depends(get_db), + current_user: User = Depends(require_viewer), +): + """List all tuning jobs""" + query = select(TuningJob).options( + selectinload(TuningJob.model), + selectinload(TuningJob.worker), + ) + + if status: + query = query.where(TuningJob.status == status.value) + + # Count + count_query = select(func.count()).select_from( + select(TuningJob).where(*([TuningJob.status == status.value] if status else [])).subquery() + ) + total = await db.scalar(count_query) + + # Get results + query = query.offset(skip).limit(limit).order_by(TuningJob.created_at.desc()) + result = await db.execute(query) + jobs = result.scalars().all() + + return TuningJobListResponse( + items=[tuning_job_to_response(j, include_conversation=False) for j in jobs], + total=total or 0, + ) + + +@router.post("/jobs", response_model=TuningJobResponse, status_code=201) +async def create_tuning_job( + job_in: TuningJobCreate, + background_tasks: BackgroundTasks, + db: AsyncSession = Depends(get_db), + current_user: User = Depends(require_operator), +): + """Create a new auto-tuning job""" + # Verify model exists + model_result = await db.execute(select(LLMModel).where(LLMModel.id == job_in.model_id)) + model = model_result.scalar_one_or_none() + if not model: + raise HTTPException(status_code=404, detail="Model not found") + + # Verify worker exists + worker_result = await db.execute(select(Worker).where(Worker.id == job_in.worker_id)) + worker = worker_result.scalar_one_or_none() + if not worker: + raise HTTPException(status_code=404, detail="Worker not found") + + # Create tuning job + job = TuningJob( + model_id=job_in.model_id, + worker_id=job_in.worker_id, + optimization_target=job_in.optimization_target.value, + status=TuningJobStatus.PENDING.value, + progress={ + "step": 0, + "total_steps": 5, + "step_name": "Initializing", + "step_description": "Preparing auto-tuning job...", + "configs_tested": 0, + "configs_total": 0, + }, + ) + + db.add(job) + await db.commit() + await db.refresh(job) + + # Prepare LLM config for agent + llm_config = None + if job_in.llm_config: + llm_config = job_in.llm_config.model_dump() + + # Start tuning in background + background_tasks.add_task(run_auto_tuning, job.id, llm_config) + + # Reload with relationships + result = await db.execute( + select(TuningJob) + .where(TuningJob.id == job.id) + .options( + selectinload(TuningJob.model), + selectinload(TuningJob.worker), + ) + ) + job = result.scalar_one() + + return tuning_job_to_response(job) + + +@router.get("/jobs/{job_id}", response_model=TuningJobResponse) +async def get_tuning_job( + job_id: int, + db: AsyncSession = Depends(get_db), + current_user: User = Depends(require_viewer), +): + """Get a tuning job by ID""" + result = await db.execute( + select(TuningJob) + .where(TuningJob.id == job_id) + .options( + selectinload(TuningJob.model), + selectinload(TuningJob.worker), + ) + ) + job = result.scalar_one_or_none() + + if not job: + raise HTTPException(status_code=404, detail="Tuning job not found") + + return tuning_job_to_response(job) + + +@router.post("/jobs/{job_id}/cancel", response_model=TuningJobResponse) +async def cancel_tuning_job( + job_id: int, + db: AsyncSession = Depends(get_db), + current_user: User = Depends(require_operator), +): + """Cancel a running tuning job""" + result = await db.execute( + select(TuningJob) + .where(TuningJob.id == job_id) + .options( + selectinload(TuningJob.model), + selectinload(TuningJob.worker), + ) + ) + job = result.scalar_one_or_none() + + if not job: + raise HTTPException(status_code=404, detail="Tuning job not found") + + if job.status in [TuningJobStatus.COMPLETED.value, TuningJobStatus.FAILED.value]: + raise HTTPException(status_code=400, detail="Job is already finished") + + job.status = TuningJobStatus.CANCELLED.value + job.status_message = "Cancelled by user" + await db.commit() + await db.refresh(job) + + return tuning_job_to_response(job) + + +@router.delete("/jobs/{job_id}") +async def delete_tuning_job( + job_id: int, + db: AsyncSession = Depends(get_db), + current_user: User = Depends(require_operator), +): + """Delete a tuning job""" + result = await db.execute(select(TuningJob).where(TuningJob.id == job_id)) + job = result.scalar_one_or_none() + + if not job: + raise HTTPException(status_code=404, detail="Tuning job not found") + + # Don't allow deleting running jobs + if job.status in ["pending", "analyzing", "querying_kb", "exploring", "benchmarking"]: + raise HTTPException(status_code=400, detail="Cannot delete a running job. Cancel it first.") + + await db.delete(job) + await db.commit() + + return {"success": True, "message": f"Tuning job {job_id} deleted"} + + +# ============================================================================ +# Benchmark Endpoints +# ============================================================================ + + +@router.post("/benchmarks/run", response_model=BenchmarkResultResponse) +async def run_benchmark( + request: BenchmarkRequest, + db: AsyncSession = Depends(get_db), + current_user: User = Depends(require_operator), +): + """Run a standalone benchmark on a deployment""" + # Verify deployment exists and is running + result = await db.execute( + select(Deployment) + .where(Deployment.id == request.deployment_id) + .options(selectinload(Deployment.model)) + ) + deployment = result.scalar_one_or_none() + + if not deployment: + raise HTTPException(status_code=404, detail="Deployment not found") + + if deployment.status != DeploymentStatus.RUNNING.value: + raise HTTPException(status_code=400, detail="Deployment is not running") + + # Run benchmark + metrics = await _run_benchmark_test(deployment, request) + + # Save result + benchmark_result = BenchmarkResult( + deployment_id=deployment.id, + config={ + "engine": deployment.backend, + "gpu_indexes": deployment.gpu_indexes, + "extra_params": deployment.extra_params, + }, + test_type=request.test_type, + test_duration_seconds=request.duration_seconds, + input_length=request.input_length, + output_length=request.output_length, + concurrency=request.concurrency, + throughput_tps=metrics.get("throughput_tps"), + ttft_ms=metrics.get("ttft_ms"), + tpot_ms=metrics.get("tpot_ms"), + total_latency_ms=metrics.get("total_latency_ms"), + gpu_utilization=metrics.get("gpu_utilization"), + vram_usage_gb=metrics.get("vram_usage_gb"), + raw_results=metrics.get("raw"), + error_message=metrics.get("error"), + ) + + db.add(benchmark_result) + await db.commit() + await db.refresh(benchmark_result) + + return benchmark_result_to_response(benchmark_result) + + +@router.get("/benchmarks", response_model=BenchmarkResultListResponse) +async def list_benchmark_results( + skip: int = Query(0, ge=0), + limit: int = Query(50, ge=1, le=100), + deployment_id: int | None = None, + tuning_job_id: int | None = None, + db: AsyncSession = Depends(get_db), + current_user: User = Depends(require_viewer), +): + """List benchmark results""" + query = select(BenchmarkResult) + + if deployment_id: + query = query.where(BenchmarkResult.deployment_id == deployment_id) + if tuning_job_id: + query = query.where(BenchmarkResult.tuning_job_id == tuning_job_id) + + # Count + count_query = select(func.count()).select_from( + select(BenchmarkResult) + .where( + *([BenchmarkResult.deployment_id == deployment_id] if deployment_id else []), + *([BenchmarkResult.tuning_job_id == tuning_job_id] if tuning_job_id else []), + ) + .subquery() + ) + total = await db.scalar(count_query) + + # Get results + query = query.offset(skip).limit(limit).order_by(BenchmarkResult.created_at.desc()) + result = await db.execute(query) + results = result.scalars().all() + + return BenchmarkResultListResponse( + items=[benchmark_result_to_response(r) for r in results], + total=total or 0, + ) + + +# ============================================================================ +# Knowledge Base Endpoints +# ============================================================================ + + +@router.post("/knowledge/query", response_model=KnowledgeQueryResponse) +async def query_knowledge_base( + query: KnowledgeQuery, + db: AsyncSession = Depends(get_db), + current_user: User = Depends(require_viewer), +): + """Query the performance knowledge base for similar configurations""" + stmt = select(PerformanceKnowledge) + + if query.model_name: + stmt = stmt.where(PerformanceKnowledge.model_name.ilike(f"%{query.model_name}%")) + if query.model_family: + stmt = stmt.where(PerformanceKnowledge.model_family == query.model_family) + if query.gpu_model: + stmt = stmt.where(PerformanceKnowledge.gpu_model.ilike(f"%{query.gpu_model}%")) + if query.min_vram_gb: + stmt = stmt.where(PerformanceKnowledge.total_vram_gb >= query.min_vram_gb) + + # Order by score (computed based on optimization target) + if query.optimization_target == OptimizationTarget.THROUGHPUT: + stmt = stmt.order_by(PerformanceKnowledge.throughput_tps.desc()) + elif query.optimization_target == OptimizationTarget.LATENCY: + stmt = stmt.order_by(PerformanceKnowledge.ttft_ms.asc()) + else: + # Balanced - order by a combined score + stmt = stmt.order_by(PerformanceKnowledge.score.desc().nulls_last()) + + stmt = stmt.limit(query.limit) + + result = await db.execute(stmt) + records = result.scalars().all() + + # Count total matches + count_stmt = select(func.count()).select_from(PerformanceKnowledge) + if query.model_name: + count_stmt = count_stmt.where( + PerformanceKnowledge.model_name.ilike(f"%{query.model_name}%") + ) + if query.model_family: + count_stmt = count_stmt.where(PerformanceKnowledge.model_family == query.model_family) + if query.gpu_model: + count_stmt = count_stmt.where(PerformanceKnowledge.gpu_model.ilike(f"%{query.gpu_model}%")) + if query.min_vram_gb: + count_stmt = count_stmt.where(PerformanceKnowledge.total_vram_gb >= query.min_vram_gb) + + total = await db.scalar(count_stmt) + + return KnowledgeQueryResponse( + items=[KnowledgeRecord.model_validate(r) for r in records], + total=total or 0, + query=query, + ) + + +@router.post("/knowledge/save", response_model=KnowledgeRecord) +async def save_to_knowledge_base( + request: KnowledgeSaveRequest, + db: AsyncSession = Depends(get_db), + current_user: User = Depends(require_operator), +): + """Save a benchmark result to the knowledge base""" + # Get benchmark result with related data + result = await db.execute( + select(BenchmarkResult) + .where(BenchmarkResult.id == request.benchmark_result_id) + .options(selectinload(BenchmarkResult.deployment)) + ) + benchmark = result.scalar_one_or_none() + + if not benchmark: + raise HTTPException(status_code=404, detail="Benchmark result not found") + + if not benchmark.throughput_tps or not benchmark.ttft_ms or not benchmark.tpot_ms: + raise HTTPException(status_code=400, detail="Benchmark result has incomplete metrics") + + # Get deployment and worker info + deployment = benchmark.deployment + worker_result = await db.execute(select(Worker).where(Worker.id == deployment.worker_id)) + worker = worker_result.scalar_one_or_none() + + model_result = await db.execute(select(LLMModel).where(LLMModel.id == deployment.model_id)) + model = model_result.scalar_one_or_none() + + if not worker or not model: + raise HTTPException(status_code=400, detail="Missing worker or model info") + + # Extract GPU info from worker + gpu_info = worker.gpu_info or [] + gpu_model = gpu_info[0].get("name", "Unknown") if gpu_info else "Unknown" + gpu_count = len(deployment.gpu_indexes) if deployment.gpu_indexes else len(gpu_info) + total_vram = sum(g.get("memory_total", 0) for g in gpu_info) / 1024 # Convert to GB + + # Compute score (balanced) + # Higher throughput is better, lower latency is better + # Normalize and combine + score = benchmark.throughput_tps / (benchmark.ttft_ms + benchmark.tpot_ms * 100) + + # Create knowledge record + record = PerformanceKnowledge( + gpu_model=gpu_model, + gpu_count=gpu_count, + total_vram_gb=total_vram, + model_name=model.name, + model_family=request.model_family, + model_params_b=request.model_params_b, + engine=deployment.backend, + quantization=benchmark.config.get("quantization"), + tensor_parallel=len(deployment.gpu_indexes) if deployment.gpu_indexes else 1, + extra_args=deployment.extra_params, + throughput_tps=benchmark.throughput_tps, + ttft_ms=benchmark.ttft_ms, + tpot_ms=benchmark.tpot_ms, + gpu_utilization=benchmark.gpu_utilization, + vram_usage_gb=benchmark.vram_usage_gb, + test_dataset="synthetic", + input_length=benchmark.input_length, + output_length=benchmark.output_length, + concurrency=benchmark.concurrency, + score=score, + source_tuning_job_id=benchmark.tuning_job_id, + ) + + db.add(record) + await db.commit() + await db.refresh(record) + + return KnowledgeRecord.model_validate(record) + + +# ============================================================================ +# Agent Chat Endpoint +# ============================================================================ + + +class AgentChatRequest(BaseModel): + """Request for agent chat""" + + message: str + config: dict + history: list[dict] = [] + + +class AgentChatResponse(BaseModel): + """Response from agent chat""" + + content: str + tool_calls: list[dict] | None = None + + +@router.post("/agent/chat", response_model=AgentChatResponse) +async def agent_chat( + request: AgentChatRequest, + db: AsyncSession = Depends(get_db), + current_user: User = Depends(require_viewer), +): + """Chat with the Auto-Tuning Agent""" + from app.services.tuning_agent import AGENT_SYSTEM_PROMPT, AgentToolExecutor, get_agent_tools + + config = request.config + provider = config.get("provider", "system") + + # Build client based on provider + if provider == "system": + # Use a system deployment + deployment_id = config.get("deploymentId") + if not deployment_id: + raise HTTPException(status_code=400, detail="No deployment selected") + + result = await db.execute( + select(Deployment) + .where(Deployment.id == deployment_id) + .options(selectinload(Deployment.worker)) + ) + deployment = result.scalar_one_or_none() + + if not deployment: + raise HTTPException(status_code=404, detail="Deployment not found") + + if deployment.status != DeploymentStatus.RUNNING.value: + raise HTTPException(status_code=400, detail="Deployment is not running") + + worker = deployment.worker + base_url = f"http://{worker.host}:{deployment.port}/v1" + api_key = "dummy" + model = "default" + + elif provider == "openai": + base_url = "https://api.openai.com/v1" + api_key = config.get("apiKey") + model = config.get("model", "gpt-4o") + + elif provider == "anthropic": + # Anthropic uses different API format, need adapter + raise HTTPException( + status_code=400, detail="Anthropic not yet supported, use OpenAI-compatible endpoint" + ) + + elif provider == "custom": + base_url = config.get("baseUrl") + api_key = config.get("apiKey", "dummy") + model = config.get("model", "default") + + else: + raise HTTPException(status_code=400, detail=f"Unknown provider: {provider}") + + if not api_key: + raise HTTPException(status_code=400, detail="API key is required") + + # Build messages + messages = [{"role": "system", "content": AGENT_SYSTEM_PROMPT}] + + # Add history + for msg in request.history[-10:]: + if msg.get("role") in ["user", "assistant"]: + messages.append({"role": msg["role"], "content": msg["content"]}) + + # Add current message + messages.append({"role": "user", "content": request.message}) + + # Create dummy job for tool executor + class DummyJob: + id = 0 + model_id = 0 + worker_id = 0 + + executor = AgentToolExecutor(db, DummyJob()) + + try: + from openai import AsyncOpenAI + + client = AsyncOpenAI(api_key=api_key, base_url=base_url) + + # Call LLM with tools + response = await client.chat.completions.create( + model=model, + messages=messages, + tools=get_agent_tools(), + tool_choice="auto", + max_tokens=4096, + ) + + assistant_message = response.choices[0].message + content = assistant_message.content or "" + tool_calls_result = [] + + # Execute tool calls if any + if assistant_message.tool_calls: + for tool_call in assistant_message.tool_calls: + tool_name = tool_call.function.name + import json + + tool_args = json.loads(tool_call.function.arguments) + + # Execute tool + result = await executor.execute(tool_name, tool_args) + + tool_calls_result.append( + { + "name": tool_name, + "args": tool_args, + "result": ( + json.loads(result) + if result.startswith("{") or result.startswith("[") + else result + ), + } + ) + + # If there were tool calls but no content, generate a summary + if not content and tool_calls_result: + content = f"I executed {len(tool_calls_result)} tool(s). See the results below." + + return AgentChatResponse( + content=content, + tool_calls=tool_calls_result if tool_calls_result else None, + ) + + except Exception as e: + logger.exception(f"Agent chat error: {e}") + raise HTTPException(status_code=500, detail=f"Agent error: {str(e)}") + + +# ============================================================================ +# Auto-Tuning Agent Runner +# ============================================================================ + + +async def run_auto_tuning(job_id: int, llm_config: dict | None = None): + """Run the LLM-driven Auto-Tuning Agent""" + from app.services.tuning_agent import run_tuning_agent + + await run_tuning_agent(job_id, llm_config) + + +async def _run_benchmark_test(deployment: Deployment, request: BenchmarkRequest) -> dict: + """Run actual benchmark test on a deployment using HTTP requests""" + from app.services.tuning_agent import _run_http_benchmark + + # Get worker info + worker = deployment.worker + if not worker: + return {"error": "Worker not found"} + + base_url = f"http://{worker.host}:{deployment.port}/v1" + + result = await _run_http_benchmark( + base_url=base_url, + num_requests=max(10, request.concurrency * 5), + concurrency=request.concurrency, + input_tokens=request.input_length, + output_tokens=request.output_length, + ) + + if not result.get("success"): + return {"error": result.get("error", "Benchmark failed")} + + metrics = result.get("metrics", {}) + return { + "throughput_tps": metrics.get("throughput_tps"), + "ttft_ms": metrics.get("avg_ttft_ms"), + "tpot_ms": metrics.get("avg_tpot_ms"), + "total_latency_ms": None, # Not directly measured + "gpu_utilization": None, # Would need GPU monitoring + "vram_usage_gb": None, # Would need GPU monitoring + "raw": result.get("summary"), + } diff --git a/backend/app/api/chat_proxy.py b/backend/app/api/chat_proxy.py new file mode 100644 index 0000000..6d7d64d --- /dev/null +++ b/backend/app/api/chat_proxy.py @@ -0,0 +1,208 @@ +"""Chat Proxy API - Proxy requests to external OpenAI-compatible endpoints.""" + +import logging + +import httpx +from fastapi import APIRouter, Depends, HTTPException +from fastapi.responses import StreamingResponse +from pydantic import BaseModel + +from app.core.deps import get_current_user +from app.models.user import User + +logger = logging.getLogger(__name__) + +router = APIRouter() + +HTTP_TIMEOUT = 300.0 # 5 minutes + + +class ChatProxyRequest(BaseModel): + """Request body for chat proxy endpoint.""" + + endpoint: str + api_key: str | None = None + payload: dict + + +class FetchModelsRequest(BaseModel): + """Request body for fetching models from external endpoint.""" + + endpoint: str + api_key: str | None = None + + +@router.post("/chat-proxy") +async def proxy_chat_request( + request: ChatProxyRequest, + current_user: User = Depends(get_current_user), +): + """ + Proxy chat requests to external OpenAI-compatible endpoints. + + This endpoint allows the frontend to make requests to external LLM APIs + without running into CORS issues. + """ + # Normalize endpoint URL + endpoint = request.endpoint.strip() + if endpoint.endswith("/"): + endpoint = endpoint[:-1] + + # Append /chat/completions if not present + if not endpoint.endswith("/chat/completions"): + endpoint = f"{endpoint}/chat/completions" + + # Build headers + headers = {"Content-Type": "application/json"} + if request.api_key: + headers["Authorization"] = f"Bearer {request.api_key}" + + # Log request details for debugging + logger.info(f"Proxying request to: {endpoint}") + logger.info(f"Payload model: {request.payload.get('model', 'not specified')}") + + # Check if streaming is requested + is_streaming = request.payload.get("stream", False) + + if is_streaming: + # Streaming response - client lifecycle managed inside generator + async def stream_response(): + client = httpx.AsyncClient(timeout=HTTP_TIMEOUT) + try: + async with client.stream( + "POST", + endpoint, + json=request.payload, + headers=headers, + ) as response: + if response.status_code != 200: + error_text = await response.aread() + yield f"data: {error_text.decode()}\n\n" + return + + async for chunk in response.aiter_bytes(): + yield chunk + except httpx.ConnectError as e: + logger.error(f"Connection error to {endpoint}: {e}") + yield 'data: {"error": "Failed to connect to endpoint"}\n\n' + except httpx.TimeoutException as e: + logger.error(f"Timeout connecting to {endpoint}: {e}") + yield 'data: {"error": "Request timed out"}\n\n' + except Exception as e: + logger.error(f"Error proxying request to {endpoint}: {e}") + yield f'data: {{"error": "{str(e)}"}}\n\n' + finally: + await client.aclose() + + return StreamingResponse( + stream_response(), + media_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + }, + ) + else: + # Non-streaming response + try: + async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client: + response = await client.post( + endpoint, + json=request.payload, + headers=headers, + ) + + if response.status_code != 200: + raise HTTPException( + status_code=response.status_code, + detail=response.text, + ) + + return response.json() + + except httpx.ConnectError as e: + logger.error(f"Connection error to {endpoint}: {e}") + raise HTTPException( + status_code=502, + detail=f"Failed to connect to endpoint: {endpoint}", + ) + except httpx.TimeoutException as e: + logger.error(f"Timeout connecting to {endpoint}: {e}") + raise HTTPException( + status_code=504, + detail="Request to endpoint timed out", + ) + except Exception as e: + logger.error(f"Error proxying request to {endpoint}: {e}") + raise HTTPException( + status_code=500, + detail=str(e), + ) + + +@router.post("/fetch-models") +async def fetch_remote_models( + request: FetchModelsRequest, + current_user: User = Depends(get_current_user), +): + """ + Fetch available models from an external OpenAI-compatible endpoint. + + Returns a list of model IDs available at the endpoint. + """ + # Normalize endpoint URL + endpoint = request.endpoint.strip() + if endpoint.endswith("/"): + endpoint = endpoint[:-1] + + # Build models endpoint + models_endpoint = f"{endpoint}/models" + + # Build headers + headers = {"Content-Type": "application/json"} + if request.api_key: + headers["Authorization"] = f"Bearer {request.api_key}" + + try: + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.get(models_endpoint, headers=headers) + + if response.status_code != 200: + raise HTTPException( + status_code=response.status_code, + detail=f"Failed to fetch models: {response.text}", + ) + + data = response.json() + + # Extract model IDs from OpenAI-compatible response + models = [] + if "data" in data: + for model in data["data"]: + model_id = model.get("id") + if model_id: + models.append( + { + "id": model_id, + "owned_by": model.get("owned_by", "unknown"), + } + ) + + return {"models": models} + + except httpx.ConnectError: + raise HTTPException( + status_code=502, + detail=f"Failed to connect to endpoint: {models_endpoint}", + ) + except httpx.TimeoutException: + raise HTTPException( + status_code=504, + detail="Request to endpoint timed out", + ) + except Exception as e: + logger.error(f"Error fetching models from {models_endpoint}: {e}") + raise HTTPException( + status_code=500, + detail=str(e), + ) diff --git a/backend/app/config.py b/backend/app/config.py index 3bcfd3c..d573104 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -49,6 +49,11 @@ class Settings(BaseSettings): # Data directory data_dir: Path = Path("./data") + # Auto-Tuning Agent LLM settings + openai_api_key: str = "" + openai_base_url: str = "" # For OpenAI-compatible endpoints + openai_model: str = "gpt-4o" # Model to use for agent reasoning + def get_cors_origins(self) -> list[str]: """Parse CORS origins from comma-separated string.""" if self.cors_origins == "*": diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py index 4865b1f..7fd3bf8 100644 --- a/backend/app/models/__init__.py +++ b/backend/app/models/__init__.py @@ -6,6 +6,13 @@ from app.models.deployment import Deployment from app.models.llm_model import LLMModel from app.models.registration_token import RegistrationToken +from app.models.tuning import ( + BenchmarkResult, + OptimizationTarget, + PerformanceKnowledge, + TuningJob, + TuningJobStatus, +) from app.models.user import User, UserRole from app.models.worker import Worker @@ -24,4 +31,9 @@ "AppStatus", "APP_DEFINITIONS", "RegistrationToken", + "TuningJob", + "TuningJobStatus", + "OptimizationTarget", + "BenchmarkResult", + "PerformanceKnowledge", ] diff --git a/backend/app/models/tuning.py b/backend/app/models/tuning.py new file mode 100644 index 0000000..14696d5 --- /dev/null +++ b/backend/app/models/tuning.py @@ -0,0 +1,200 @@ +"""Auto-Tuning and Benchmark models""" + +from datetime import UTC, datetime +from enum import Enum + +from sqlalchemy import JSON, DateTime, Float, ForeignKey, Integer, String, Text +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from app.database import Base + + +class TuningJobStatus(str, Enum): + """Tuning job status""" + + PENDING = "pending" + ANALYZING = "analyzing" + QUERYING_KB = "querying_kb" + EXPLORING = "exploring" + BENCHMARKING = "benchmarking" + COMPLETED = "completed" + FAILED = "failed" + CANCELLED = "cancelled" + + +class OptimizationTarget(str, Enum): + """Optimization target for tuning""" + + THROUGHPUT = "throughput" # Maximize TPS + LATENCY = "latency" # Minimize TTFT/TPOT + COST = "cost" # Minimize resource usage + BALANCED = "balanced" # Balance all factors + + +class TuningJob(Base): + """Auto-tuning job record""" + + __tablename__ = "tuning_jobs" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + + # Target configuration + model_id: Mapped[int] = mapped_column(Integer, ForeignKey("llm_models.id"), nullable=False) + worker_id: Mapped[int] = mapped_column(Integer, ForeignKey("workers.id"), nullable=False) + optimization_target: Mapped[str] = mapped_column( + String(50), default=OptimizationTarget.BALANCED.value + ) + + # Job status + status: Mapped[str] = mapped_column(String(50), default=TuningJobStatus.PENDING.value) + status_message: Mapped[str | None] = mapped_column(Text, nullable=True) + current_step: Mapped[int] = mapped_column(Integer, default=0) + total_steps: Mapped[int] = mapped_column(Integer, default=5) + + # Progress details (JSON for flexibility) + progress: Mapped[dict | None] = mapped_column(JSON, nullable=True) + + # Results + best_config: Mapped[dict | None] = mapped_column(JSON, nullable=True) + all_results: Mapped[list | None] = mapped_column(JSON, nullable=True) + + # Agent conversation log (for UI display) + # Format: [{"role": "user"|"assistant"|"tool", "content": "...", "tool_calls": [...], "timestamp": "..."}] + conversation_log: Mapped[list | None] = mapped_column(JSON, nullable=True) + + # Metadata + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), default=lambda: datetime.now(UTC) + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + default=lambda: datetime.now(UTC), + onupdate=lambda: datetime.now(UTC), + ) + completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True) + + # Relationships + model = relationship("LLMModel", backref="tuning_jobs") + worker = relationship("Worker", backref="tuning_jobs") + + +class BenchmarkResult(Base): + """Benchmark result for a specific configuration""" + + __tablename__ = "benchmark_results" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + + # Associated tuning job (optional - can run standalone benchmarks) + tuning_job_id: Mapped[int | None] = mapped_column( + Integer, ForeignKey("tuning_jobs.id"), nullable=True + ) + + # Deployment being benchmarked + deployment_id: Mapped[int] = mapped_column( + Integer, ForeignKey("deployments.id"), nullable=False + ) + + # Configuration tested + config: Mapped[dict] = mapped_column(JSON, nullable=False) + # Example config: + # { + # "engine": "vllm", + # "quantization": "fp16", + # "tensor_parallel": 1, + # "extra_args": {...} + # } + + # Test parameters + test_type: Mapped[str] = mapped_column(String(50), default="throughput") + test_duration_seconds: Mapped[int] = mapped_column(Integer, default=60) + input_length: Mapped[int] = mapped_column(Integer, default=512) + output_length: Mapped[int] = mapped_column(Integer, default=128) + concurrency: Mapped[int] = mapped_column(Integer, default=1) + + # Performance metrics + throughput_tps: Mapped[float | None] = mapped_column(Float, nullable=True) # Tokens per second + ttft_ms: Mapped[float | None] = mapped_column(Float, nullable=True) # Time to first token (ms) + tpot_ms: Mapped[float | None] = mapped_column( + Float, nullable=True + ) # Time per output token (ms) + total_latency_ms: Mapped[float | None] = mapped_column(Float, nullable=True) + + # Resource usage + gpu_utilization: Mapped[float | None] = mapped_column(Float, nullable=True) # 0-100% + vram_usage_gb: Mapped[float | None] = mapped_column(Float, nullable=True) + + # Raw results + raw_results: Mapped[dict | None] = mapped_column(JSON, nullable=True) + error_message: Mapped[str | None] = mapped_column(Text, nullable=True) + + # Metadata + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), default=lambda: datetime.now(UTC) + ) + + # Relationships + tuning_job = relationship("TuningJob", backref="benchmark_results") + deployment = relationship("Deployment", backref="benchmark_results") + + +class PerformanceKnowledge(Base): + """Performance knowledge base for configuration recommendations + + This table stores historical tuning results to enable: + 1. Fast lookup of known-good configurations + 2. Transfer learning across similar models/hardware + """ + + __tablename__ = "performance_knowledge" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + + # Hardware info + gpu_model: Mapped[str] = mapped_column(String(100), nullable=False) # e.g., "NVIDIA H100 80GB" + gpu_count: Mapped[int] = mapped_column(Integer, nullable=False) + total_vram_gb: Mapped[float] = mapped_column(Float, nullable=False) + + # Model info + model_name: Mapped[str] = mapped_column(String(255), nullable=False) # e.g., "Qwen/Qwen2.5-72B" + model_family: Mapped[str] = mapped_column(String(100), nullable=False) # e.g., "Qwen" + model_params_b: Mapped[float | None] = mapped_column( + Float, nullable=True + ) # Parameters in billions + + # Configuration + engine: Mapped[str] = mapped_column(String(50), nullable=False) # vllm, sglang, ollama + quantization: Mapped[str | None] = mapped_column( + String(50), nullable=True + ) # fp16, fp8, awq, gptq + tensor_parallel: Mapped[int] = mapped_column(Integer, default=1) + extra_args: Mapped[dict | None] = mapped_column(JSON, nullable=True) + + # Performance metrics + throughput_tps: Mapped[float] = mapped_column(Float, nullable=False) + ttft_ms: Mapped[float] = mapped_column(Float, nullable=False) + tpot_ms: Mapped[float] = mapped_column(Float, nullable=False) + gpu_utilization: Mapped[float | None] = mapped_column(Float, nullable=True) + vram_usage_gb: Mapped[float | None] = mapped_column(Float, nullable=True) + + # Test conditions + test_dataset: Mapped[str] = mapped_column(String(100), default="synthetic") + input_length: Mapped[int] = mapped_column(Integer, default=512) + output_length: Mapped[int] = mapped_column(Integer, default=128) + concurrency: Mapped[int] = mapped_column(Integer, default=1) + + # Recommendation score (computed based on optimization target) + score: Mapped[float | None] = mapped_column(Float, nullable=True) + + # Source + source_tuning_job_id: Mapped[int | None] = mapped_column( + Integer, ForeignKey("tuning_jobs.id"), nullable=True + ) + + # Metadata + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), default=lambda: datetime.now(UTC) + ) + + # Relationships + source_tuning_job = relationship("TuningJob", backref="knowledge_records") diff --git a/backend/app/schemas/tuning.py b/backend/app/schemas/tuning.py new file mode 100644 index 0000000..43513bc --- /dev/null +++ b/backend/app/schemas/tuning.py @@ -0,0 +1,220 @@ +"""Auto-Tuning and Benchmark Pydantic schemas""" + +from datetime import datetime + +from pydantic import BaseModel, Field + +from app.models.tuning import OptimizationTarget + +# ============================================================================ +# LLM Configuration for Agent +# ============================================================================ + + +class LLMConfig(BaseModel): + """Configuration for the LLM used by the auto-tuning agent""" + + deployment_id: int | None = Field(None, description="Use a local deployment as the agent LLM") + base_url: str | None = Field(None, description="Custom OpenAI-compatible endpoint URL") + api_key: str | None = Field(None, description="API key for the endpoint") + model: str | None = Field(None, description="Model name to use") + + +# ============================================================================ +# Tuning Job Schemas +# ============================================================================ + + +class TuningJobCreate(BaseModel): + """Schema for creating a tuning job""" + + model_id: int = Field(..., description="ID of the model to tune") + worker_id: int = Field(..., description="ID of the worker to use") + optimization_target: OptimizationTarget = Field( + default=OptimizationTarget.BALANCED, description="What to optimize for" + ) + llm_config: LLMConfig | None = Field( + None, description="LLM configuration for the agent (uses chat panel's selected model)" + ) + + +class TuningJobProgress(BaseModel): + """Progress information for a tuning job""" + + step: int + total_steps: int + step_name: str + step_description: str + configs_tested: int = 0 + configs_total: int = 0 + current_config: dict | None = None + best_config_so_far: dict | None = None + best_score_so_far: float | None = None + + +class ConversationMessage(BaseModel): + """A message in the agent conversation log""" + + role: str # "user", "assistant", or "tool" + content: str + timestamp: str | None = None + tool_calls: list[dict] | None = None # For assistant messages with tool calls + tool_call_id: str | None = None # For tool responses + name: str | None = None # Tool name for tool responses + + +class TuningJobResponse(BaseModel): + """Schema for tuning job response""" + + id: int + model_id: int + worker_id: int + optimization_target: str + status: str + status_message: str | None = None + current_step: int + total_steps: int + progress: TuningJobProgress | None = None + best_config: dict | None = None + all_results: list | None = None + conversation_log: list[ConversationMessage] | None = None + created_at: datetime + updated_at: datetime + completed_at: datetime | None = None + + # Related info + model_name: str | None = None + worker_name: str | None = None + + class Config: + from_attributes = True + + +class TuningJobListResponse(BaseModel): + """Schema for listing tuning jobs""" + + items: list[TuningJobResponse] + total: int + + +# ============================================================================ +# Benchmark Schemas +# ============================================================================ + + +class BenchmarkConfig(BaseModel): + """Configuration for a benchmark test""" + + engine: str = Field(..., description="Inference engine: vllm, sglang, ollama") + quantization: str | None = Field(default=None, description="Quantization: fp16, fp8, awq, gptq") + tensor_parallel: int = Field(default=1, description="Tensor parallelism degree") + extra_args: dict | None = Field(default=None, description="Additional engine arguments") + + +class BenchmarkRequest(BaseModel): + """Schema for running a benchmark""" + + deployment_id: int = Field(..., description="ID of the deployment to benchmark") + test_type: str = Field(default="throughput", description="Test type: throughput, latency") + duration_seconds: int = Field(default=60, ge=10, le=600, description="Test duration") + input_length: int = Field(default=512, ge=1, le=32768, description="Input token length") + output_length: int = Field(default=128, ge=1, le=8192, description="Output token length") + concurrency: int = Field(default=1, ge=1, le=64, description="Number of concurrent requests") + + +class BenchmarkMetrics(BaseModel): + """Benchmark performance metrics""" + + throughput_tps: float | None = Field(None, description="Tokens per second") + ttft_ms: float | None = Field(None, description="Time to first token (ms)") + tpot_ms: float | None = Field(None, description="Time per output token (ms)") + total_latency_ms: float | None = Field(None, description="Total request latency (ms)") + gpu_utilization: float | None = Field(None, description="GPU utilization (0-100%)") + vram_usage_gb: float | None = Field(None, description="VRAM usage in GB") + + +class BenchmarkResultResponse(BaseModel): + """Schema for benchmark result response""" + + id: int + tuning_job_id: int | None = None + deployment_id: int + config: dict + test_type: str + test_duration_seconds: int + input_length: int + output_length: int + concurrency: int + metrics: BenchmarkMetrics + error_message: str | None = None + created_at: datetime + + class Config: + from_attributes = True + + +class BenchmarkResultListResponse(BaseModel): + """Schema for listing benchmark results""" + + items: list[BenchmarkResultResponse] + total: int + + +# ============================================================================ +# Knowledge Base Schemas +# ============================================================================ + + +class KnowledgeQuery(BaseModel): + """Query for the knowledge base""" + + model_name: str | None = Field(default=None, description="Model name pattern to match") + model_family: str | None = Field(default=None, description="Model family: Qwen, Llama, etc.") + gpu_model: str | None = Field(default=None, description="GPU model pattern") + min_vram_gb: float | None = Field(default=None, description="Minimum VRAM") + optimization_target: OptimizationTarget = Field( + default=OptimizationTarget.BALANCED, description="Optimization target for scoring" + ) + limit: int = Field(default=10, ge=1, le=100, description="Max results to return") + + +class KnowledgeRecord(BaseModel): + """A knowledge base record""" + + id: int + gpu_model: str + gpu_count: int + total_vram_gb: float + model_name: str + model_family: str + model_params_b: float | None = None + engine: str + quantization: str | None = None + tensor_parallel: int + extra_args: dict | None = None + throughput_tps: float + ttft_ms: float + tpot_ms: float + gpu_utilization: float | None = None + vram_usage_gb: float | None = None + score: float | None = None + created_at: datetime + + class Config: + from_attributes = True + + +class KnowledgeQueryResponse(BaseModel): + """Response for knowledge base query""" + + items: list[KnowledgeRecord] + total: int + query: KnowledgeQuery + + +class KnowledgeSaveRequest(BaseModel): + """Request to save a record to knowledge base""" + + benchmark_result_id: int = Field(..., description="ID of the benchmark result to save") + model_family: str = Field(..., description="Model family for categorization") + model_params_b: float | None = Field(default=None, description="Model parameters in billions") diff --git a/backend/app/services/tuning_agent.py b/backend/app/services/tuning_agent.py new file mode 100644 index 0000000..311163e --- /dev/null +++ b/backend/app/services/tuning_agent.py @@ -0,0 +1,1506 @@ +""" +Auto-Tuning Agent Service + +A true LLM-driven agent that: +1. Uses an LLM to reason about configurations +2. Actually deploys models with different configs +3. Runs real benchmarks against deployed endpoints +4. Analyzes results and decides next steps +""" + +import asyncio +import json +import logging +import time +from datetime import UTC, datetime + +import httpx +from openai import AsyncOpenAI +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import selectinload + +from app.config import get_settings +from app.database import async_session_maker +from app.models.deployment import Deployment, DeploymentStatus +from app.models.llm_model import LLMModel +from app.models.tuning import PerformanceKnowledge, TuningJob, TuningJobStatus +from app.models.worker import Worker + +logger = logging.getLogger(__name__) + + +# ============================================================================= +# Agent System Prompt +# ============================================================================= + +AGENT_SYSTEM_PROMPT = """You are an Auto-Tuning Agent helping to find the optimal deployment configuration for LLM models. + +IMPORTANT COMMUNICATION RULES: +1. ALWAYS explain what you're about to do BEFORE calling any tool +2. After each tool result, briefly summarize what you learned +3. Be conversational - talk like you're explaining to a colleague +4. No emojis, keep it professional but friendly + +=== OPTIMIZATION TARGETS === + +**Throughput** (tokens per second): +- Goal: Maximize TPS for batch processing / high volume +- Strategy: Use vLLM with large batch sizes, enable continuous batching +- Key metric: throughput_tps (higher is better) +- Trade-off: May have higher latency per request + +**Latency** (response time): +- Goal: Minimize time-to-first-token (TTFT) and time-per-output-token (TPOT) +- Strategy: Use smaller batch sizes, consider sglang for multi-turn +- Key metrics: avg_ttft_ms, avg_tpot_ms (lower is better) +- Trade-off: Lower overall throughput + +**Balanced**: +- Goal: Good balance between throughput and latency +- Strategy: Test multiple configs, calculate combined score +- Score formula: throughput_tps / (avg_ttft_ms * 0.01) - balance speed and responsiveness +- Pick config with best combined score + +**Cost** (minimum resources): +- Goal: Use minimum GPU memory while maintaining acceptable performance +- Strategy: Try quantization (awq, gptq), use fewer GPUs if possible +- Key consideration: memory_used_gb, still need decent throughput +- Trade-off: May sacrifice some performance for efficiency + +=== AVAILABLE ENGINES === +- vllm: Best throughput, tensor parallelism, supports fp8/awq/gptq quantization +- sglang: Good for multi-turn, efficient memory, fast prefix caching +- ollama: Simple deployment, good for smaller models, easy setup + +=== QUANTIZATION NOTES === +- AWQ/GPTQ: Requires a pre-quantized model (e.g., "Qwen/Qwen3-0.6B-AWQ") + Do NOT use quantization=awq with a base model like "Qwen/Qwen3-0.6B" +- FP8: Only works on Hopper+ GPUs (H100, etc.), not consumer GPUs +- For consumer GPUs (RTX 4090, etc.), use default FP16 or find a pre-quantized model + +=== PROCESS === +1. Check hardware (GPU model, VRAM, count) +2. Query knowledge base for similar setups +3. Based on optimization target, choose 2-3 promising configs to test +4. For EACH config: + a. Deploy model + b. Wait for deployment (use short timeout like 120s) + c. If timeout/slow: Check logs with get_deployment_logs to diagnose + d. If failed: STOP deployment, analyze error, try next config + e. If success: Run benchmark, record results, STOP deployment +5. Compare all results, call finish_tuning with recommendation + +=== DIAGNOSING DEPLOYMENT ISSUES === +When wait_for_deployment times out: +1. FIRST call test_deployment_endpoint to check if API is actually ready + - If ready=true: Great! Proceed to run_benchmark + - If ready=false: Continue to step 2 +2. Call get_deployment_logs to check container logs (use tail=100 or more) +3. Look for common patterns in logs: + - "Loading checkpoint shards" or "Loading model weights" - model is loading, keep waiting + - "INFO: Started server process" or "Uvicorn running" - vLLM is ready! + - "CUDA out of memory" - try quantization or fewer GPUs + - "Error" or "Exception" - check the error message +4. Based on logs, decide: + - If model loading: call test_deployment_endpoint every 30s until ready + - If OOM error: stop_deployment and try with quantization + - If other error: stop_deployment and try different engine/config + +DO NOT just give up on timeout - always test endpoint and check logs first! +A 0.6B model should load in 1-2 minutes, larger models (7B+) may take 5-10 minutes. + +=== HANDLING LOW GPU MEMORY === +If deploy_model fails with "GPU memory is low": +1. Call list_deployments(worker_id=X) to find existing deployments +2. Stop all running deployments using stop_deployment(deployment_id=X) +3. If no deployments found, GPU is used by external processes - inform user +4. After stopping, retry deploy_model + +IMPORTANT: ALWAYS stop a deployment before starting a new one! +- If deployment times out → check logs, then stop_deployment +- If deployment fails → stop_deployment immediately +- After benchmark complete → stop_deployment before next test +- Never have multiple test deployments running at once! + +=== EXAMPLE FLOW === +"Let me first check what hardware we have available..." +[call get_hardware_info] +"I can see we have 1x RTX 4090 with 24GB VRAM. Let me check if we have historical data..." +[call query_knowledge_base] +"No historical data found. Since we're optimizing for throughput, I'll test vLLM first..." +[call deploy_model] +"Deployment created with ID 1. Let me wait for it to become ready..." +[call wait_for_deployment(deployment_id=1, timeout_seconds=120)] +-- If timeout occurs -- +"Wait timed out after 120s. Let me first test if the endpoint is actually ready..." +[call test_deployment_endpoint(deployment_id=1)] +-- If ready=true -- +"The endpoint is responding! The model is ready. Let me run the benchmark now..." +[call run_benchmark(deployment_id=1)] +-- If ready=false -- +"Endpoint not ready yet. Let me check the container logs..." +[call get_deployment_logs(deployment_id=1, tail=100)] +"I see from the logs: 'Loading checkpoint shards: 3/4 (75%)' - model is still loading. +Let me test the endpoint again in a moment..." +[call test_deployment_endpoint(deployment_id=1)] +-- Keep testing until ready, then run benchmark -- +-- OR if logs show an error -- +"The logs show 'CUDA out of memory'. I need to stop and try a different config..." +[call stop_deployment(deployment_id=1)] +[call deploy_model with different params] + +ALWAYS provide context. Never call tools silently. +ALWAYS test endpoint and check logs before giving up on a deployment. +""" + + +# ============================================================================= +# Tools for the Agent +# ============================================================================= + + +def get_agent_tools() -> list[dict]: + """Define tools available to the agent""" + return [ + { + "type": "function", + "function": { + "name": "get_hardware_info", + "description": "Get detailed hardware information for a worker node including GPU model, VRAM, count, and current utilization.", + "parameters": { + "type": "object", + "properties": { + "worker_id": {"type": "integer", "description": "ID of the worker to query"} + }, + "required": ["worker_id"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "get_model_info", + "description": "Get information about the model to be deployed.", + "parameters": { + "type": "object", + "properties": { + "model_id": {"type": "integer", "description": "ID of the model"} + }, + "required": ["model_id"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "query_knowledge_base", + "description": "Query historical performance data for similar model/hardware combinations.", + "parameters": { + "type": "object", + "properties": { + "model_family": { + "type": "string", + "description": "Model family (e.g., Qwen, Llama, Mistral)", + }, + "gpu_model": { + "type": "string", + "description": "GPU model pattern (e.g., RTX 4090, A100)", + }, + }, + "required": [], + }, + }, + }, + { + "type": "function", + "function": { + "name": "list_deployments", + "description": "List all deployments on a worker. Use this to find existing deployments that may be using GPU memory.", + "parameters": { + "type": "object", + "properties": { + "worker_id": {"type": "integer", "description": "Worker ID to query"} + }, + "required": ["worker_id"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "deploy_model", + "description": "Deploy a model with specific configuration. Returns deployment ID if successful.", + "parameters": { + "type": "object", + "properties": { + "model_id": {"type": "integer"}, + "worker_id": {"type": "integer"}, + "engine": {"type": "string", "enum": ["vllm", "sglang", "ollama"]}, + "gpu_indexes": { + "type": "array", + "items": {"type": "integer"}, + "description": "GPU indices to use", + }, + "extra_params": { + "type": "object", + "description": "Additional engine parameters", + }, + }, + "required": ["model_id", "worker_id", "engine"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "wait_for_deployment", + "description": "Wait for a deployment to be ready (running status).", + "parameters": { + "type": "object", + "properties": { + "deployment_id": {"type": "integer"}, + "timeout_seconds": { + "type": "integer", + "default": 300, + "description": "Maximum time to wait", + }, + }, + "required": ["deployment_id"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "run_benchmark", + "description": "Run performance benchmark on a running deployment. Returns throughput, TTFT, TPOT metrics.", + "parameters": { + "type": "object", + "properties": { + "deployment_id": {"type": "integer"}, + "num_requests": { + "type": "integer", + "default": 20, + "description": "Number of requests to send", + }, + "concurrency": { + "type": "integer", + "default": 4, + "description": "Concurrent requests", + }, + "input_tokens": { + "type": "integer", + "default": 128, + "description": "Approximate input token count", + }, + "output_tokens": { + "type": "integer", + "default": 64, + "description": "Max output tokens", + }, + }, + "required": ["deployment_id"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "stop_deployment", + "description": "Stop and remove a deployment.", + "parameters": { + "type": "object", + "properties": {"deployment_id": {"type": "integer"}}, + "required": ["deployment_id"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "get_deployment_logs", + "description": "Get the Docker container logs for a deployment. Use this to check why a deployment is slow to start or failing.", + "parameters": { + "type": "object", + "properties": { + "deployment_id": {"type": "integer"}, + "tail": { + "type": "integer", + "default": 100, + "description": "Number of log lines to retrieve", + }, + }, + "required": ["deployment_id"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "check_deployment_status", + "description": "Check the current status of a deployment without waiting. Use this after wait_for_deployment times out to see if the model is still loading or has failed.", + "parameters": { + "type": "object", + "properties": {"deployment_id": {"type": "integer"}}, + "required": ["deployment_id"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "test_deployment_endpoint", + "description": "Test if the deployment API endpoint is responding. Use this to check if a model is ready even if wait_for_deployment timed out.", + "parameters": { + "type": "object", + "properties": {"deployment_id": {"type": "integer"}}, + "required": ["deployment_id"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "finish_tuning", + "description": "Complete the tuning process with final recommendation.", + "parameters": { + "type": "object", + "properties": { + "best_config": { + "type": "object", + "description": "The recommended configuration", + }, + "reasoning": { + "type": "string", + "description": "Explanation of why this is the best config", + }, + "all_results": { + "type": "array", + "description": "All benchmark results collected", + }, + }, + "required": ["best_config", "reasoning"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "abort_tuning", + "description": "Abort the tuning process when it cannot be completed (e.g., GPU memory used by external processes, hardware issues).", + "parameters": { + "type": "object", + "properties": { + "reason": { + "type": "string", + "description": "Explanation of why tuning cannot be completed", + } + }, + "required": ["reason"], + }, + }, + }, + ] + + +# ============================================================================= +# Tool Implementations +# ============================================================================= + + +class AgentToolExecutor: + """Execute agent tools with real system interactions""" + + def __init__(self, db: AsyncSession, job: TuningJob): + self.db = db + self.job = job + self.created_deployments: list[int] = [] + + async def execute(self, tool_name: str, args: dict) -> str: + """Execute a tool and return result as string""" + try: + method = getattr(self, f"_tool_{tool_name}", None) + if method: + result = await method(**args) + return json.dumps(result, indent=2, default=str) + return json.dumps({"error": f"Unknown tool: {tool_name}"}) + except Exception as e: + logger.error(f"Tool {tool_name} failed: {e}") + return json.dumps({"error": str(e)}) + + async def _tool_get_hardware_info(self, worker_id: int) -> dict: + """Get hardware info for a worker""" + result = await self.db.execute(select(Worker).where(Worker.id == worker_id)) + worker = result.scalar_one_or_none() + + if not worker: + return {"error": "Worker not found"} + + gpu_info = worker.gpu_info or [] + + # Determine the unit divisor from memory_total (which is always large) + # memory_total for a typical GPU should be 8-80 GB + def get_divisor(memory_total: int | float) -> float: + """Determine the divisor to convert memory values to GB. + + We use memory_total to figure out what unit the values are in: + - If memory_total > 1 billion: values are in bytes + - If memory_total > 1 million: values are in KB + - If memory_total > 1000: values are in MB + - Otherwise: values are already in GB + """ + if memory_total > 1_000_000_000: + return 1024 * 1024 * 1024 # bytes to GB + elif memory_total > 1_000_000: + return 1024 * 1024 # KB to GB + elif memory_total > 1000: + return 1024 # MB to GB + else: + return 1 # already GB + + def convert_gpu_memory(gpu: dict) -> dict: + """Convert a single GPU's memory values to GB.""" + mem_total = gpu.get("memory_total", 0) + mem_used = gpu.get("memory_used", 0) + + divisor = get_divisor(mem_total) + + return { + "memory_total_gb": round(mem_total / divisor, 1) if mem_total else 0, + "memory_used_gb": round(mem_used / divisor, 1) if mem_used else 0, + "memory_free_gb": round((mem_total - mem_used) / divisor, 1) if mem_total else 0, + } + + # Convert GPU memory values + gpus_converted = [] + total_vram_gb = 0 + for i, g in enumerate(gpu_info): + mem = convert_gpu_memory(g) + gpus_converted.append( + { + "index": g.get("index", i), + "name": g.get("name", "Unknown"), + "memory_total_gb": mem["memory_total_gb"], + "memory_used_gb": mem["memory_used_gb"], + "memory_free_gb": mem["memory_free_gb"], + "utilization_percent": g.get("utilization_gpu", 0), + } + ) + total_vram_gb += mem["memory_total_gb"] + + return { + "worker_id": worker.id, + "worker_name": worker.name, + "status": worker.status, + "gpu_count": len(gpu_info), + "gpus": gpus_converted, + "total_vram_gb": round(total_vram_gb, 1), + } + + async def _tool_get_model_info(self, model_id: int) -> dict: + """Get model info""" + result = await self.db.execute(select(LLMModel).where(LLMModel.id == model_id)) + model = result.scalar_one_or_none() + + if not model: + return {"error": "Model not found"} + + # Extract model family from name + model_family = _extract_model_family(model.name) + + return { + "model_id": model.id, + "name": model.name, + "model_id_hf": model.model_id, + "source": model.source, + "model_family": model_family, + "default_backend": model.backend, + } + + async def _tool_query_knowledge_base( + self, model_family: str | None = None, gpu_model: str | None = None + ) -> dict: + """Query knowledge base for similar configurations""" + stmt = select(PerformanceKnowledge) + + if model_family: + stmt = stmt.where(PerformanceKnowledge.model_family.ilike(f"%{model_family}%")) + if gpu_model: + stmt = stmt.where(PerformanceKnowledge.gpu_model.ilike(f"%{gpu_model}%")) + + stmt = stmt.order_by(PerformanceKnowledge.score.desc().nulls_last()).limit(5) + + result = await self.db.execute(stmt) + records = result.scalars().all() + + if not records: + return { + "found": 0, + "message": "No historical data found. You'll need to run benchmarks to gather data.", + "records": [], + } + + return { + "found": len(records), + "records": [ + { + "model_name": r.model_name, + "model_family": r.model_family, + "gpu_model": r.gpu_model, + "gpu_count": r.gpu_count, + "engine": r.engine, + "quantization": r.quantization, + "tensor_parallel": r.tensor_parallel, + "throughput_tps": r.throughput_tps, + "ttft_ms": r.ttft_ms, + "tpot_ms": r.tpot_ms, + "score": r.score, + } + for r in records + ], + } + + async def _tool_list_deployments(self, worker_id: int) -> dict: + """List all deployments on a worker""" + try: + result = await self.db.execute( + select(Deployment) + .where(Deployment.worker_id == worker_id) + .options(selectinload(Deployment.model)) + ) + deployments = result.scalars().all() + + if not deployments: + return { + "worker_id": worker_id, + "count": 0, + "deployments": [], + "message": "No deployments found on this worker. GPU memory may be used by processes outside LMStack.", + } + + deployment_list = [] + for d in deployments: + deployment_list.append( + { + "deployment_id": d.id, + "name": d.name, + "model_name": d.model.name if d.model else "Unknown", + "status": d.status, + "backend": d.backend, + "port": d.port, + "container_id": d.container_id[:12] if d.container_id else None, + } + ) + + return { + "worker_id": worker_id, + "count": len(deployments), + "deployments": deployment_list, + "message": f"Found {len(deployments)} deployment(s). Stop running deployments to free GPU memory.", + } + except Exception as e: + logger.exception(f"Failed to list deployments: {e}") + return {"error": str(e)} + + async def _tool_deploy_model( + self, + model_id: int, + worker_id: int, + engine: str, + gpu_indexes: list[int] | None = None, + extra_params: dict | None = None, + ) -> dict: + """Deploy a model with specific configuration""" + from app.services.deployer import DeployerService + + try: + # Check if there are any pending deployments from this tuning job + if self.created_deployments: + return { + "success": False, + "error": f"You still have active deployments: {self.created_deployments}. " + f"Please stop them first using stop_deployment before creating a new one.", + } + + # Check GPU memory availability + worker_result = await self.db.execute(select(Worker).where(Worker.id == worker_id)) + worker = worker_result.scalar_one_or_none() + if worker and worker.gpu_info: + for g in worker.gpu_info: + mem_total = g.get("memory_total", 0) + mem_used = g.get("memory_used", 0) + # Check if less than 20% memory is free + if mem_total > 0 and (mem_total - mem_used) / mem_total < 0.2: + free_pct = round((mem_total - mem_used) / mem_total * 100, 1) + return { + "success": False, + "error": f"GPU memory is low (only {free_pct}% free). " + f"Please stop any existing deployments first.", + } + + # Get model to generate deployment name + model_result = await self.db.execute(select(LLMModel).where(LLMModel.id == model_id)) + model = model_result.scalar_one_or_none() + if not model: + return {"success": False, "error": "Model not found"} + + # Generate unique deployment name + import time + + deploy_name = f"tuning-{model.name.replace('/', '-')[:30]}-{int(time.time())}" + + # Create deployment + deployment = Deployment( + name=deploy_name, + model_id=model_id, + worker_id=worker_id, + backend=engine, + gpu_indexes=gpu_indexes or [0], + extra_params=extra_params or {}, + status=DeploymentStatus.PENDING.value, + ) + + self.db.add(deployment) + await self.db.commit() + await self.db.refresh(deployment) + + self.created_deployments.append(deployment.id) + + # Start deployment in background using DeployerService + deployer = DeployerService() + asyncio.create_task(deployer.deploy(deployment.id)) + + return { + "success": True, + "deployment_id": deployment.id, + "deployment_name": deploy_name, + "config": { + "engine": engine, + "gpu_indexes": gpu_indexes or [0], + "extra_params": extra_params, + }, + "message": "Deployment created. Use wait_for_deployment to wait until ready.", + } + except Exception as e: + logger.exception(f"Failed to deploy model: {e}") + return {"success": False, "error": str(e)} + + async def _tool_wait_for_deployment( + self, deployment_id: int, timeout_seconds: int = 300 + ) -> dict: + """Wait for deployment to be ready""" + start_time = time.time() + + while time.time() - start_time < timeout_seconds: + result = await self.db.execute( + select(Deployment) + .where(Deployment.id == deployment_id) + .options(selectinload(Deployment.worker)) + ) + deployment = result.scalar_one_or_none() + + if not deployment: + return { + "success": False, + "deployment_id": deployment_id, + "error": "Deployment not found. It may have been deleted.", + } + + if deployment.status == DeploymentStatus.RUNNING.value: + return { + "success": True, + "deployment_id": deployment_id, + "status": "running", + "port": deployment.port, + "endpoint": f"http://{deployment.worker.address.split(':')[0] if deployment.worker else 'localhost'}:{deployment.port}/v1", + "wait_time_seconds": round(time.time() - start_time, 1), + } + elif deployment.status in [ + DeploymentStatus.ERROR.value, + DeploymentStatus.STOPPED.value, + ]: + return { + "success": False, + "deployment_id": deployment_id, + "status": deployment.status, + "error": deployment.status_message or "Deployment failed", + "action_required": "Call stop_deployment to clean up before trying again", + } + + await asyncio.sleep(5) + + return { + "success": False, + "deployment_id": deployment_id, + "error": f"Timeout after {timeout_seconds}s", + "action_required": ( + f"1. Call get_deployment_logs({deployment_id}) to check what's happening\n" + f"2. If model is still loading, wait more with wait_for_deployment(timeout_seconds=300)\n" + f"3. If there's an error, call stop_deployment({deployment_id}) and try a different config" + ), + } + + async def _tool_run_benchmark( + self, + deployment_id: int, + num_requests: int = 20, + concurrency: int = 4, + input_tokens: int = 128, + output_tokens: int = 64, + ) -> dict: + """Run actual benchmark against deployment""" + result = await self.db.execute( + select(Deployment) + .where(Deployment.id == deployment_id) + .options( + selectinload(Deployment.worker), + selectinload(Deployment.model), + ) + ) + deployment = result.scalar_one_or_none() + + if not deployment: + return {"error": "Deployment not found"} + + if deployment.status != DeploymentStatus.RUNNING.value: + return {"error": f"Deployment is not running (status: {deployment.status})"} + + # Build endpoint URL + worker = deployment.worker + worker_ip = worker.address.split(":")[0] + base_url = f"http://{worker_ip}:{deployment.port}/v1" + + # Get the model name for API calls + model_name = deployment.model.model_id if deployment.model else "default" + + # Run benchmark + metrics = await _run_http_benchmark( + base_url=base_url, + model_name=model_name, + num_requests=num_requests, + concurrency=concurrency, + input_tokens=input_tokens, + output_tokens=output_tokens, + ) + + return metrics + + async def _tool_stop_deployment(self, deployment_id: int) -> dict: + """Stop and remove a deployment""" + from app.services.deployer import DeployerService + + try: + # Get deployment + result = await self.db.execute(select(Deployment).where(Deployment.id == deployment_id)) + deployment = result.scalar_one_or_none() + + if not deployment: + return {"success": False, "error": "Deployment not found"} + + # Stop container if running + if deployment.container_id: + deployer = DeployerService() + await deployer.stop(deployment_id) + + # Delete deployment record + await self.db.delete(deployment) + await self.db.commit() + + if deployment_id in self.created_deployments: + self.created_deployments.remove(deployment_id) + + return {"success": True, "message": f"Deployment {deployment_id} stopped and removed"} + except Exception as e: + logger.exception(f"Failed to stop deployment: {e}") + return {"success": False, "error": str(e)} + + async def _tool_check_deployment_status(self, deployment_id: int) -> dict: + """Check the current status of a deployment without waiting""" + try: + result = await self.db.execute( + select(Deployment) + .where(Deployment.id == deployment_id) + .options(selectinload(Deployment.worker)) + ) + deployment = result.scalar_one_or_none() + + if not deployment: + return {"error": "Deployment not found"} + + return { + "deployment_id": deployment_id, + "status": deployment.status, + "status_message": deployment.status_message, + "container_id": deployment.container_id, + "port": deployment.port, + "backend": deployment.backend, + "is_ready": deployment.status == DeploymentStatus.RUNNING.value, + "is_failed": deployment.status == DeploymentStatus.ERROR.value, + "is_loading": deployment.status == DeploymentStatus.STARTING.value, + } + except Exception as e: + logger.exception(f"Failed to check deployment status: {e}") + return {"error": str(e)} + + async def _tool_test_deployment_endpoint(self, deployment_id: int) -> dict: + """Test if the deployment API endpoint is responding""" + try: + result = await self.db.execute( + select(Deployment) + .where(Deployment.id == deployment_id) + .options(selectinload(Deployment.worker)) + ) + deployment = result.scalar_one_or_none() + + if not deployment: + return {"error": "Deployment not found"} + + if not deployment.worker or not deployment.port: + return { + "deployment_id": deployment_id, + "ready": False, + "error": "Deployment not fully initialized (no worker or port)", + } + + # Build endpoint URL + worker = deployment.worker + worker_ip = worker.address.split(":")[0] + base_url = f"http://{worker_ip}:{deployment.port}/v1" + + # Test the /v1/models endpoint + async with httpx.AsyncClient(timeout=10.0) as client: + try: + response = await client.get(f"{base_url}/models") + if response.status_code == 200: + data = response.json() + models = data.get("data", []) + if models: + return { + "deployment_id": deployment_id, + "ready": True, + "endpoint": base_url, + "models": [m.get("id") for m in models], + "message": "Deployment is ready! You can now run benchmarks.", + } + else: + return { + "deployment_id": deployment_id, + "ready": False, + "endpoint": base_url, + "message": "API responding but no models loaded yet", + } + else: + return { + "deployment_id": deployment_id, + "ready": False, + "endpoint": base_url, + "status_code": response.status_code, + "message": f"API returned status {response.status_code}", + } + except httpx.ConnectError: + return { + "deployment_id": deployment_id, + "ready": False, + "endpoint": base_url, + "message": "Cannot connect to endpoint - container may still be starting", + } + except httpx.ReadTimeout: + return { + "deployment_id": deployment_id, + "ready": False, + "endpoint": base_url, + "message": "Connection timeout - model may still be loading", + } + except Exception as e: + logger.exception(f"Failed to test deployment endpoint: {e}") + return {"error": str(e)} + + async def _tool_get_deployment_logs(self, deployment_id: int, tail: int = 100) -> dict: + """Get Docker container logs for a deployment""" + from app.services.deployer import DeployerService + + try: + # Get deployment with worker + result = await self.db.execute( + select(Deployment) + .where(Deployment.id == deployment_id) + .options(selectinload(Deployment.worker)) + ) + deployment = result.scalar_one_or_none() + + if not deployment: + return {"error": "Deployment not found"} + + if not deployment.container_id: + return { + "deployment_id": deployment_id, + "status": deployment.status, + "error": "No container ID - deployment may not have started yet", + "status_message": deployment.status_message, + } + + # Use DeployerService to get logs (handles both local and remote) + deployer = DeployerService() + logs = await deployer.get_logs(deployment, tail=tail) + + return { + "deployment_id": deployment_id, + "container_id": deployment.container_id, + "status": deployment.status, + "status_message": deployment.status_message, + "logs": logs, + } + except Exception as e: + logger.exception(f"Failed to get deployment logs: {e}") + return {"error": str(e)} + + async def _tool_finish_tuning( + self, best_config: dict, reasoning: str, all_results: list | None = None + ) -> dict: + """Mark tuning as complete and save to knowledge base""" + # Update job status + self.job.status = TuningJobStatus.COMPLETED.value + self.job.status_message = "Auto-tuning completed successfully" + self.job.best_config = {**best_config, "reasoning": reasoning} + self.job.all_results = all_results or [] + self.job.completed_at = datetime.now(UTC) + + # Update progress to 100% + # Use the total_steps from current progress (set during agent loop) or default + current_total = self.job.progress.get("total_steps", 20) if self.job.progress else 20 + self.job.current_step = current_total + self.job.total_steps = current_total + self.job.progress = { + "step": current_total, + "total_steps": current_total, + "step_name": "completed", + "step_description": "Tuning completed successfully", + "configs_tested": len(all_results) if all_results else 1, + "configs_total": len(all_results) if all_results else 1, + } + + # Save results to knowledge base + saved_count = 0 + if all_results: + # Get model and worker info for knowledge base + model = self.job.model + worker = self.job.worker + gpu_info = worker.gpu_info[0] if worker.gpu_info else {} + gpu_name = gpu_info.get("name", "Unknown GPU") + + for result in all_results: + metrics = result.get("metrics", {}) + if not metrics: + continue + + # Create knowledge record + knowledge = PerformanceKnowledge( + gpu_model=gpu_name, + gpu_count=len(result.get("gpu_indexes", [0])), + total_vram_gb=sum( + ( + g.get("memory_total", 0) / (1024**3) + if g.get("memory_total", 0) > 1_000_000 + else g.get("memory_total", 0) + ) + for g in (worker.gpu_info or []) + ), + model_name=model.name, + model_family=_extract_model_family(model.name), + engine=result.get("engine", best_config.get("engine", "vllm")), + quantization=result.get("extra_params", {}).get("quantization"), + tensor_parallel=len(result.get("gpu_indexes", [0])), + extra_args=result.get("extra_params"), + throughput_tps=metrics.get("throughput_tps", 0), + ttft_ms=metrics.get("avg_ttft_ms", 0), + tpot_ms=metrics.get("avg_tpot_ms", 0), + input_length=128, # Default test params + output_length=64, + concurrency=4, + score=metrics.get("throughput_tps", 0), # For throughput optimization + source_tuning_job_id=self.job.id, + ) + self.db.add(knowledge) + saved_count += 1 + + await self.db.commit() + + return { + "success": True, + "message": f"Tuning completed. Saved {saved_count} result(s) to knowledge base.", + "best_config": best_config, + "reasoning": reasoning, + } + + async def _tool_abort_tuning(self, reason: str) -> dict: + """Abort the tuning process""" + self.job.status = TuningJobStatus.FAILED.value + self.job.status_message = f"Aborted: {reason}" + self.job.completed_at = datetime.now(UTC) + + # Update progress to show aborted state + self.job.progress = { + "step": self.job.current_step, + "total_steps": self.job.total_steps, + "step_name": "aborted", + "step_description": reason, + } + + await self.db.commit() + + return {"success": True, "message": "Tuning aborted", "reason": reason} + + async def cleanup(self): + """Clean up any deployments created during tuning""" + for deployment_id in self.created_deployments: + try: + await self._tool_stop_deployment(deployment_id) + except Exception as e: + logger.warning(f"Failed to cleanup deployment {deployment_id}: {e}") + + +# ============================================================================= +# Benchmark Implementation +# ============================================================================= + + +async def _run_http_benchmark( + base_url: str, + model_name: str = "default", + num_requests: int = 20, + concurrency: int = 4, + input_tokens: int = 128, + output_tokens: int = 64, +) -> dict: + """Run actual HTTP benchmark against an OpenAI-compatible endpoint""" + + # Generate test prompt with approximate token count + test_prompt = "Write a detailed explanation about " + " ".join( + ["artificial intelligence"] * (input_tokens // 3) + ) + + results = [] + errors = 0 + + semaphore = asyncio.Semaphore(concurrency) + + async def make_request(client: httpx.AsyncClient) -> dict | None: + nonlocal errors + async with semaphore: + start_time = time.perf_counter() + first_token_time = None + token_times = [] + total_tokens = 0 + + try: + async with client.stream( + "POST", + f"{base_url}/chat/completions", + json={ + "model": model_name, + "messages": [{"role": "user", "content": test_prompt}], + "max_tokens": output_tokens, + "stream": True, + }, + timeout=60.0, + ) as response: + if response.status_code != 200: + errors += 1 + return None + + async for line in response.aiter_lines(): + if line.startswith("data: "): + data = line[6:] + if data == "[DONE]": + break + try: + chunk = json.loads(data) + content = ( + chunk.get("choices", [{}])[0] + .get("delta", {}) + .get("content", "") + ) + if content: + current_time = time.perf_counter() + if first_token_time is None: + first_token_time = current_time + token_times.append(current_time) + total_tokens += 1 + except json.JSONDecodeError: + pass + + end_time = time.perf_counter() + + if first_token_time and total_tokens > 0: + ttft = (first_token_time - start_time) * 1000 # ms + total_time = end_time - start_time + + # Calculate TPOT (time per output token) excluding TTFT + if total_tokens > 1: + generation_time = end_time - first_token_time + tpot = (generation_time / (total_tokens - 1)) * 1000 # ms + else: + tpot = 0 + + return { + "ttft_ms": ttft, + "tpot_ms": tpot, + "total_tokens": total_tokens, + "total_time_s": total_time, + } + except Exception as e: + logger.warning(f"Benchmark request failed: {e}") + errors += 1 + return None + + async with httpx.AsyncClient() as client: + # Warm up with a few requests + logger.info("Warming up benchmark endpoint...") + for _ in range(min(2, num_requests)): + await make_request(client) + + # Run actual benchmark + logger.info(f"Running {num_requests} benchmark requests with concurrency {concurrency}...") + tasks = [make_request(client) for _ in range(num_requests)] + results = await asyncio.gather(*tasks) + + # Filter out failed requests + valid_results = [r for r in results if r is not None] + + if not valid_results: + return {"success": False, "error": "All requests failed", "errors": errors} + + # Calculate metrics + ttft_values = [r["ttft_ms"] for r in valid_results] + tpot_values = [r["tpot_ms"] for r in valid_results if r["tpot_ms"] > 0] + total_tokens = sum(r["total_tokens"] for r in valid_results) + total_time = sum(r["total_time_s"] for r in valid_results) + + avg_ttft = sum(ttft_values) / len(ttft_values) + avg_tpot = sum(tpot_values) / len(tpot_values) if tpot_values else 0 + throughput = total_tokens / total_time if total_time > 0 else 0 + + return { + "success": True, + "metrics": { + "throughput_tps": round(throughput, 2), + "avg_ttft_ms": round(avg_ttft, 2), + "avg_tpot_ms": round(avg_tpot, 2), + "p50_ttft_ms": round(sorted(ttft_values)[len(ttft_values) // 2], 2), + "p99_ttft_ms": ( + round(sorted(ttft_values)[int(len(ttft_values) * 0.99)], 2) + if len(ttft_values) > 1 + else round(ttft_values[0], 2) + ), + }, + "summary": { + "total_requests": num_requests, + "successful_requests": len(valid_results), + "failed_requests": errors, + "total_tokens_generated": total_tokens, + }, + } + + +def _extract_model_family(model_name: str) -> str: + """Extract model family from name""" + name_lower = model_name.lower() + families = { + "qwen": "Qwen", + "llama": "Llama", + "mistral": "Mistral", + "deepseek": "DeepSeek", + "phi": "Phi", + "gemma": "Gemma", + "yi": "Yi", + "glm": "GLM", + } + for key, value in families.items(): + if key in name_lower: + return value + return "Unknown" + + +# ============================================================================= +# Main Agent Runner +# ============================================================================= + + +async def run_tuning_agent(job_id: int, llm_config: dict | None = None): + """Run the Auto-Tuning Agent for a job + + Args: + job_id: The tuning job ID + llm_config: Optional LLM configuration from chat panel: + - deployment_id: Use a local deployment + - base_url: Custom endpoint URL + - api_key: API key for the endpoint + - model: Model name + """ + settings = get_settings() + + async with async_session_maker() as db: + # Load job with relationships + result = await db.execute( + select(TuningJob) + .where(TuningJob.id == job_id) + .options( + selectinload(TuningJob.model), + selectinload(TuningJob.worker), + ) + ) + job = result.scalar_one_or_none() + + if not job: + logger.error(f"Tuning job {job_id} not found") + return + + # Initialize tool executor + executor = AgentToolExecutor(db, job) + + try: + # Determine LLM configuration (priority: llm_config > settings > auto-detect) + api_key = None + base_url = None + model_name = "gpt-4o" + + if llm_config: + # Use config from chat panel + if llm_config.get("deployment_id"): + # Use specified local deployment + from app.models.deployment import Deployment, DeploymentStatus + + deploy_result = await db.execute( + select(Deployment) + .where(Deployment.id == llm_config["deployment_id"]) + .options(selectinload(Deployment.worker), selectinload(Deployment.model)) + ) + deployment = deploy_result.scalar_one_or_none() + + if deployment and deployment.worker: + worker_ip = deployment.worker.address.split(":")[0] + base_url = f"http://{worker_ip}:{deployment.port}/v1" + api_key = "dummy" + model_name = deployment.model.model_id if deployment.model else model_name + logger.info( + f"Using specified deployment as agent LLM: {base_url} ({model_name})" + ) + else: + job.status = TuningJobStatus.FAILED.value + job.status_message = ( + f"Deployment {llm_config['deployment_id']} not found or not running" + ) + await db.commit() + return + elif llm_config.get("base_url"): + # Use custom endpoint + base_url = llm_config["base_url"] + api_key = llm_config.get("api_key") or "dummy" + model_name = llm_config.get("model") or model_name + logger.info(f"Using custom endpoint as agent LLM: {base_url} ({model_name})") + + # Fall back to settings if no llm_config + if not api_key: + api_key = settings.openai_api_key + base_url = settings.openai_base_url + model_name = settings.openai_model or model_name + + # If still no API key, try to find any running deployment + if not api_key: + from app.models.deployment import Deployment, DeploymentStatus + + deploy_result = await db.execute( + select(Deployment) + .where(Deployment.status == DeploymentStatus.RUNNING.value) + .options(selectinload(Deployment.worker), selectinload(Deployment.model)) + .limit(1) + ) + local_deployment = deploy_result.scalar_one_or_none() + + if local_deployment and local_deployment.worker: + worker_ip = local_deployment.worker.address.split(":")[0] + base_url = f"http://{worker_ip}:{local_deployment.port}/v1" + api_key = "dummy" + model_name = ( + local_deployment.model.model_id if local_deployment.model else model_name + ) + logger.info( + f"Auto-detected local deployment as agent LLM: {base_url} ({model_name})" + ) + else: + job.status = TuningJobStatus.FAILED.value + job.status_message = ( + "No LLM configured for Auto-Tuning Agent. " + "Please select a model in the chat panel, or deploy a model first." + ) + await db.commit() + return + + # Initialize OpenAI client (supports OpenAI-compatible endpoints) + client = AsyncOpenAI(api_key=api_key, base_url=base_url or "https://api.openai.com/v1") + + # Build initial user message + user_message = f"""Help me find the best deployment configuration for {job.model.name} on {job.worker.name}. I want to optimize for {job.optimization_target}. + +Model ID: {job.model_id}, Worker ID: {job.worker_id}""" + + messages = [ + {"role": "system", "content": AGENT_SYSTEM_PROMPT}, + {"role": "user", "content": user_message}, + ] + + # Initialize conversation log for UI display + conversation_log = [ + { + "role": "user", + "content": user_message, + "timestamp": datetime.now(UTC).isoformat(), + } + ] + + # Helper to save conversation log + async def save_log(): + job.conversation_log = conversation_log + await db.commit() + + # Update job status + job.status = TuningJobStatus.ANALYZING.value + job.status_message = "Agent is analyzing the environment..." + job.conversation_log = conversation_log + await db.commit() + + # Agent loop + max_iterations = 20 + iteration = 0 + + while iteration < max_iterations: + iteration += 1 + + # Check if cancelled + await db.refresh(job) + if job.status == TuningJobStatus.CANCELLED.value: + logger.info(f"Job {job_id} was cancelled") + await executor.cleanup() + return + + # Call LLM + logger.info(f"Agent iteration {iteration}, calling LLM with model: {model_name}...") + + response = await client.chat.completions.create( + model=model_name, + messages=messages, + tools=get_agent_tools(), + tool_choice="auto", + max_tokens=4096, + ) + + assistant_message = response.choices[0].message + messages.append(assistant_message.model_dump(exclude_none=True)) + + # Add assistant message to conversation log + log_entry = { + "role": "assistant", + "content": assistant_message.content or "", + "timestamp": datetime.now(UTC).isoformat(), + } + if assistant_message.tool_calls: + log_entry["tool_calls"] = [ + { + "id": tc.id, + "name": tc.function.name, + "arguments": tc.function.arguments, + } + for tc in assistant_message.tool_calls + ] + conversation_log.append(log_entry) + await save_log() + + # Check if no tool calls - prompt the agent to take action + if not assistant_message.tool_calls: + logger.warning(f"Agent responded without tool calls at iteration {iteration}") + # Add a user message to prompt the agent to take action + prompt_message = ( + "You need to call a tool to proceed. Available actions:\n" + "1. list_deployments - Find existing deployments on the worker\n" + "2. stop_deployment - Stop a deployment to free GPU memory\n" + "3. deploy_model - Deploy a model with specific config\n" + "4. test_deployment_endpoint - Check if deployment is ready\n" + "5. get_deployment_logs - Check container logs\n" + "6. run_benchmark - Run performance benchmark\n" + "7. finish_tuning - Complete with recommendation\n" + "8. abort_tuning - Abort if cannot proceed\n" + "Do not respond with just text - you must call a tool." + ) + messages.append({"role": "user", "content": prompt_message}) + conversation_log.append( + { + "role": "user", + "content": prompt_message, + "timestamp": datetime.now(UTC).isoformat(), + } + ) + await save_log() + continue # Continue the loop to get tool calls + + # Execute tool calls + for tool_call in assistant_message.tool_calls: + tool_name = tool_call.function.name + tool_args = json.loads(tool_call.function.arguments) + + logger.info(f"Executing tool: {tool_name}({tool_args})") + + # Update job progress + job.status_message = f"Executing: {tool_name}" + job.progress = { + "step": iteration, + "total_steps": max_iterations, + "step_name": tool_name, + "step_description": f"Executing {tool_name} with args: {tool_args}", + "configs_tested": 0, + "configs_total": 0, + } + await db.commit() + + # Execute tool + result = await executor.execute(tool_name, tool_args) + + # Add tool result to conversation log + conversation_log.append( + { + "role": "tool", + "tool_call_id": tool_call.id, + "name": tool_name, + "content": result, + "timestamp": datetime.now(UTC).isoformat(), + } + ) + await save_log() + + # Check if this was a termination tool + if tool_name == "finish_tuning": + logger.info(f"Agent completed tuning for job {job_id}") + return + if tool_name == "abort_tuning": + logger.info(f"Agent aborted tuning for job {job_id}") + return + + # Add tool result to messages + messages.append( + {"role": "tool", "tool_call_id": tool_call.id, "content": result} + ) + + # If we reached max iterations without finishing + job.status = TuningJobStatus.FAILED.value + job.status_message = "Agent reached maximum iterations without completing" + await db.commit() + + except Exception as e: + logger.exception(f"Agent error for job {job_id}: {e}") + job.status = TuningJobStatus.FAILED.value + job.status_message = f"Agent error: {str(e)}" + await db.commit() + + finally: + # Cleanup any test deployments + await executor.cleanup() diff --git a/backend/migrations/009_add_tuning.py b/backend/migrations/009_add_tuning.py new file mode 100644 index 0000000..9bec7f4 --- /dev/null +++ b/backend/migrations/009_add_tuning.py @@ -0,0 +1,173 @@ +""" +Migration: Add auto-tuning tables + +This migration creates tables for auto-tuning, benchmarks, and performance knowledge base. + +Run with: python -m migrations.009_add_tuning +""" + +import asyncio +import sys +from pathlib import Path + +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from sqlalchemy import text +from sqlalchemy.ext.asyncio import create_async_engine + +from app.config import get_settings + + +async def table_exists(conn, table_name: str) -> bool: + """Check if a table exists (SQLite compatible)""" + result = await conn.execute( + text(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table_name}'") + ) + return result.fetchone() is not None + + +async def migrate(): + settings = get_settings() + engine = create_async_engine(settings.database_url, echo=True) + + async with engine.begin() as conn: + # Create tuning_jobs table + if not await table_exists(conn, "tuning_jobs"): + print("Creating 'tuning_jobs' table...") + await conn.execute( + text( + """ + CREATE TABLE tuning_jobs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + model_id INTEGER NOT NULL, + worker_id INTEGER NOT NULL, + optimization_target VARCHAR(50) DEFAULT 'balanced', + status VARCHAR(50) DEFAULT 'pending', + status_message TEXT, + current_step INTEGER DEFAULT 0, + total_steps INTEGER DEFAULT 5, + progress JSON, + best_config JSON, + all_results JSON, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, + completed_at DATETIME, + FOREIGN KEY (model_id) REFERENCES llm_models(id), + FOREIGN KEY (worker_id) REFERENCES workers(id) + ) + """ + ) + ) + print("'tuning_jobs' table created successfully!") + else: + print("'tuning_jobs' table already exists") + + # Create benchmark_results table + if not await table_exists(conn, "benchmark_results"): + print("Creating 'benchmark_results' table...") + await conn.execute( + text( + """ + CREATE TABLE benchmark_results ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + tuning_job_id INTEGER, + deployment_id INTEGER NOT NULL, + config JSON NOT NULL, + test_type VARCHAR(50) DEFAULT 'throughput', + test_duration_seconds INTEGER DEFAULT 60, + input_length INTEGER DEFAULT 512, + output_length INTEGER DEFAULT 128, + concurrency INTEGER DEFAULT 1, + throughput_tps REAL, + ttft_ms REAL, + tpot_ms REAL, + total_latency_ms REAL, + gpu_utilization REAL, + vram_usage_gb REAL, + raw_results JSON, + error_message TEXT, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (tuning_job_id) REFERENCES tuning_jobs(id), + FOREIGN KEY (deployment_id) REFERENCES deployments(id) + ) + """ + ) + ) + print("'benchmark_results' table created successfully!") + else: + print("'benchmark_results' table already exists") + + # Create performance_knowledge table + if not await table_exists(conn, "performance_knowledge"): + print("Creating 'performance_knowledge' table...") + await conn.execute( + text( + """ + CREATE TABLE performance_knowledge ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + gpu_model VARCHAR(255) NOT NULL, + gpu_count INTEGER DEFAULT 1, + total_vram_gb REAL NOT NULL, + model_name VARCHAR(255) NOT NULL, + model_family VARCHAR(100) NOT NULL, + model_params_b REAL, + engine VARCHAR(50) NOT NULL, + quantization VARCHAR(50), + tensor_parallel INTEGER DEFAULT 1, + extra_args JSON, + throughput_tps REAL NOT NULL, + ttft_ms REAL NOT NULL, + tpot_ms REAL NOT NULL, + gpu_utilization REAL, + vram_usage_gb REAL, + test_dataset VARCHAR(100) DEFAULT 'synthetic', + input_length INTEGER DEFAULT 512, + output_length INTEGER DEFAULT 128, + concurrency INTEGER DEFAULT 1, + score REAL, + source_tuning_job_id INTEGER, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (source_tuning_job_id) REFERENCES tuning_jobs(id) + ) + """ + ) + ) + print("'performance_knowledge' table created successfully!") + else: + print("'performance_knowledge' table already exists") + + # Create indexes for performance knowledge queries + print("Creating indexes...") + try: + await conn.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_knowledge_model_family ON performance_knowledge(model_family)" + ) + ) + await conn.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_knowledge_gpu_model ON performance_knowledge(gpu_model)" + ) + ) + await conn.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_knowledge_engine ON performance_knowledge(engine)" + ) + ) + await conn.execute( + text("CREATE INDEX IF NOT EXISTS idx_tuning_jobs_status ON tuning_jobs(status)") + ) + print("Indexes created successfully!") + except Exception as e: + print(f"Note: Some indexes may already exist: {e}") + + print("\n" + "=" * 50) + print("Migration completed successfully!") + print("=" * 50) + + await engine.dispose() + + +if __name__ == "__main__": + asyncio.run(migrate()) diff --git a/backend/migrations/010_add_conversation_log.py b/backend/migrations/010_add_conversation_log.py new file mode 100644 index 0000000..ffc36f0 --- /dev/null +++ b/backend/migrations/010_add_conversation_log.py @@ -0,0 +1,50 @@ +""" +Migration: Add conversation_log column to tuning_jobs + +This migration adds a JSON column to store the agent's conversation history. + +Run with: python -m migrations.010_add_conversation_log +""" + +import asyncio +import sys +from pathlib import Path + +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from sqlalchemy import text +from sqlalchemy.ext.asyncio import create_async_engine + +from app.config import get_settings + + +async def column_exists(conn, table_name: str, column_name: str) -> bool: + """Check if a column exists (SQLite compatible)""" + result = await conn.execute(text(f"PRAGMA table_info({table_name})")) + columns = result.fetchall() + return any(col[1] == column_name for col in columns) + + +async def migrate(): + settings = get_settings() + engine = create_async_engine(settings.database_url, echo=True) + + async with engine.begin() as conn: + # Add conversation_log column to tuning_jobs + if not await column_exists(conn, "tuning_jobs", "conversation_log"): + print("Adding 'conversation_log' column to 'tuning_jobs' table...") + await conn.execute(text("ALTER TABLE tuning_jobs ADD COLUMN conversation_log JSON")) + print("'conversation_log' column added successfully!") + else: + print("'conversation_log' column already exists") + + print("\n" + "=" * 50) + print("Migration completed successfully!") + print("=" * 50) + + await engine.dispose() + + +if __name__ == "__main__": + asyncio.run(migrate()) diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 85adeb4..6bc12a8 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -12,7 +12,7 @@ import { useLocation, useNavigate, } from "react-router-dom"; -import { Layout, ConfigProvider, theme } from "antd"; +import { Layout, ConfigProvider, theme, Button, Tooltip } from "antd"; import { DashboardOutlined, CloudServerOutlined, @@ -28,11 +28,19 @@ import { RocketOutlined, GlobalOutlined, HddOutlined, + CommentOutlined, + ThunderboltOutlined, } from "@ant-design/icons"; import { AuthProvider, useAuth } from "./contexts/AuthContext"; import { useAppTheme, useResponsive } from "./hooks"; import { Header, Sidebar, MobileSidebar } from "./components/layout"; +import { + ChatPanel, + CHAT_PANEL_STORAGE_KEY, + DEFAULT_PANEL_WIDTH, + TUNING_JOB_EVENT_KEY, +} from "./components/chat-panel"; import Loading from "./components/Loading"; // Page Components @@ -51,6 +59,7 @@ import Settings from "./pages/Settings"; import Headscale from "./pages/Headscale"; import Login from "./pages/Login"; import Setup from "./pages/Setup"; +import AutoTuning from "./pages/AutoTuning"; const { Content } = Layout; @@ -165,6 +174,11 @@ function getMenuItems(isAdmin: boolean) { label: "Deploy Model", }, { key: "/deploy-apps", icon: , label: "Deploy Apps" }, + { + key: "/auto-tuning", + icon: , + label: "Auto-Tuning", + }, { key: "/api-keys", icon: , @@ -224,6 +238,25 @@ function getCurrentPageTitle(menuItems: any[], pathname: string) { // Main Layout // ============================================================================ +/** + * Load chat panel state from localStorage + */ +function loadChatPanelState(): { isOpen: boolean; width: number } { + try { + const saved = localStorage.getItem(CHAT_PANEL_STORAGE_KEY); + if (saved) { + const state = JSON.parse(saved); + return { + isOpen: state.isOpen ?? false, + width: state.width ?? DEFAULT_PANEL_WIDTH, + }; + } + } catch { + // Ignore + } + return { isOpen: false, width: DEFAULT_PANEL_WIDTH }; +} + function AppLayout() { const navigate = useNavigate(); const location = useLocation(); @@ -233,6 +266,65 @@ function AppLayout() { const [sidebarCollapsed, setSidebarCollapsed] = useState(false); const [mobileDrawerOpen, setMobileDrawerOpen] = useState(false); + const [chatPanelOpen, setChatPanelOpen] = useState( + () => loadChatPanelState().isOpen, + ); + const [chatPanelWidth, setChatPanelWidth] = useState( + () => loadChatPanelState().width, + ); + + // Persist chat panel state + useEffect(() => { + try { + const current = localStorage.getItem(CHAT_PANEL_STORAGE_KEY); + const state = current ? JSON.parse(current) : {}; + localStorage.setItem( + CHAT_PANEL_STORAGE_KEY, + JSON.stringify({ ...state, isOpen: chatPanelOpen }), + ); + } catch { + // Ignore + } + }, [chatPanelOpen]); + + // Listen for tuning job events to auto-open chat panel + useEffect(() => { + const handleStorageChange = (e: StorageEvent) => { + if (e.key === TUNING_JOB_EVENT_KEY && e.newValue) { + try { + const data = JSON.parse(e.newValue); + if (data.jobId) { + setChatPanelOpen(true); + } + } catch { + // Ignore + } + } + }; + + // Check on mount if there's a pending tuning job + const checkInitial = () => { + const stored = localStorage.getItem(TUNING_JOB_EVENT_KEY); + if (stored) { + try { + const data = JSON.parse(stored); + if ( + data.jobId && + data.timestamp && + Date.now() - data.timestamp < 5000 + ) { + setChatPanelOpen(true); + } + } catch { + // Ignore + } + } + }; + + checkInitial(); + window.addEventListener("storage", handleStorageChange); + return () => window.removeEventListener("storage", handleStorageChange); + }, []); useEffect(() => { document.body.setAttribute("data-theme", isDark ? "dark" : "light"); @@ -403,8 +495,9 @@ function AppLayout() {
} /> } /> } /> + } /> } /> + + {/* Floating chat button */} + {!chatPanelOpen && ( + + + + + + + )} + {messages.length > 0 && ( + + + + + {/* Custom endpoint modal */} + { + setCustomModalOpen(false); + setEditingEndpoint(null); + form.resetFields(); + setRemoteModels([]); + }} + okText={editingEndpoint ? "Save" : "Add"} + width={480} + > +
+ + + + + + + + + + + {models.map((model) => ( + + {model.name} + + ))} + + + + {/* Worker */} + + + + + {/* Optimization Target */} + + + Throughput + Latency + Balanced + Cost + + + + + + + Agent LLM + + + + {/* Agent LLM Selection */} + + + + setLlmSourceType(e.target.value)} + > + Local Deployment + Custom Endpoint + + + + {llmSourceType === "deployment" && ( + + + + )} + + {llmSourceType === "custom" && ( + <> + 0, + message: "Please select an endpoint", + }, + ]} + > + + + + {customEndpoints.length === 0 && !showAddEndpoint && ( + + )} + + {showAddEndpoint && ( + setShowAddEndpoint(false)} + > + Cancel + + } + style={{ marginBottom: 16 }} + > + + + + + + + + + + + + + + + + + )} + + )} + + + + + + + + +
+ + {/* Detail Modal - Docker-style Log View */} + + + Tuning Log - {detailModal?.model_name} + + {detailModal?.status.toUpperCase()} + + {detailLoading && } + + } + open={!!detailModal} + onCancel={() => setDetailModal(null)} + footer={null} + width={900} + styles={{ body: { padding: 0 } }} + > + {detailModal && ( +
+ {/* Docker-style Log Container */} +
+ {detailModal.conversation_log && + detailModal.conversation_log.length > 0 ? ( + detailModal.conversation_log.map((msg, idx) => { + const timestamp = msg.timestamp + ? dayjs(msg.timestamp).format("HH:mm:ss") + : ""; + + if (msg.role === "user") { + return ( +
+ [{timestamp}] + [USER] + {msg.content} +
+ ); + } + + if (msg.role === "assistant") { + return ( +
+ [{timestamp}] + [AGENT] + {msg.content && ( + + {msg.content} + + )} + {msg.tool_calls && msg.tool_calls.length > 0 && ( +
+ {msg.tool_calls.map((tc, tcIdx) => ( +
+ -> Calling: {tc.name}( + {(() => { + try { + const args = JSON.parse(tc.arguments); + return Object.entries(args) + .map( + ([k, v]) => `${k}=${JSON.stringify(v)}`, + ) + .join(", "); + } catch { + return tc.arguments; + } + })()} + ) +
+ ))} +
+ )} +
+ ); + } + + if (msg.role === "tool") { + let content = msg.content; + try { + const parsed = JSON.parse(msg.content); + content = JSON.stringify(parsed, null, 2); + } catch { + // Keep original + } + return ( +
+ [{timestamp}] + + {" "} + [TOOL:{msg.name}]{" "} + +
+ {content} +
+
+ ); + } + + return null; + }) + ) : ( +
+ {detailLoading + ? "Loading logs..." + : detailModal.status === "pending" + ? "Waiting for agent to start..." + : "No logs available"} +
+ )} + + {/* Running indicator */} + {[ + "pending", + "analyzing", + "querying_kb", + "exploring", + "benchmarking", + ].includes(detailModal.status) && ( +
+ + [{dayjs().format("HH:mm:ss")}] + + [STATUS] + + {detailModal.status_message || "Processing..."} + + {" "} + _ + + +
+ )} +
+ + {/* Best Config Section */} + {detailModal.best_config && ( +
+ Best Configuration: +
+                  {JSON.stringify(detailModal.best_config, null, 2)}
+                
+
+ )} +
+ )} +
+ + ); +} diff --git a/frontend/src/pages/Deployments.tsx b/frontend/src/pages/Deployments.tsx index 50da57a..bfe0b5f 100644 --- a/frontend/src/pages/Deployments.tsx +++ b/frontend/src/pages/Deployments.tsx @@ -1,4 +1,5 @@ import { useEffect, useState, useCallback, useRef } from "react"; +import { useSearchParams } from "react-router-dom"; import { Button, Card, @@ -54,11 +55,22 @@ const { Text } = Typography; const REFRESH_INTERVAL = 5000; export default function Deployments() { + const [searchParams, setSearchParams] = useSearchParams(); const [deployments, setDeployments] = useState([]); const [workers, setWorkers] = useState([]); const [models, setModels] = useState([]); const [loading, setLoading] = useState(true); const [modalOpen, setModalOpen] = useState(false); + + // Handle URL action parameters (e.g., ?action=new) + useEffect(() => { + const action = searchParams.get("action"); + if (action === "new") { + setModalOpen(true); + // Clear the action param from URL + setSearchParams({}, { replace: true }); + } + }, [searchParams, setSearchParams]); const [logsModal, setLogsModal] = useState<{ id: number; name: string; diff --git a/frontend/src/pages/Models.tsx b/frontend/src/pages/Models.tsx index 2535469..04e2c49 100644 --- a/frontend/src/pages/Models.tsx +++ b/frontend/src/pages/Models.tsx @@ -1,4 +1,5 @@ import { useEffect, useState } from "react"; +import { useSearchParams } from "react-router-dom"; import { Button, Card, @@ -99,6 +100,7 @@ function useResponsive() { } export default function Models() { + const [searchParams, setSearchParams] = useSearchParams(); const { canEdit } = useAuth(); const [models, setModels] = useState([]); const [loading, setLoading] = useState(true); @@ -118,6 +120,15 @@ export default function Models() { const SOURCE_CONFIG = getSourceConfig(isDark); + // Handle URL action parameters (e.g., ?action=new) + useEffect(() => { + const action = searchParams.get("action"); + if (action === "new") { + setModalOpen(true); + setSearchParams({}, { replace: true }); + } + }, [searchParams, setSearchParams]); + const fetchModels = async () => { setLoading(true); try { diff --git a/mcp-server/README.md b/mcp-server/README.md new file mode 100644 index 0000000..c35bbc1 --- /dev/null +++ b/mcp-server/README.md @@ -0,0 +1,106 @@ +# LMStack MCP Server + +Model Context Protocol (MCP) server for LMStack platform. Enables AI assistants like Claude Desktop, Cursor, and other MCP-compatible clients to interact with your LMStack infrastructure. + +## Features + +### Resources + +| Resource URI | Description | +|-------------|-------------| +| `lmstack://system/status` | Complete system overview | +| `lmstack://workers` | Worker nodes with GPU info | +| `lmstack://containers` | Docker containers | +| `lmstack://deployments` | Model deployments | +| `lmstack://models` | Available models | + +### Tools + +| Tool | Description | +|------|-------------| +| `get_system_status` | Get complete system status | +| `list_workers` | List all workers with GPU status | +| `list_containers` | List Docker containers | +| `list_deployments` | List model deployments | +| `list_models` | List available models | +| `get_gpu_status` | Get detailed GPU information | +| `deploy_model` | Deploy a model to a worker | +| `stop_deployment` | Stop a running deployment | + +## Installation + +```bash +cd mcp-server +npm install +npm run build +``` + +## Configuration + +Set environment variables: + +```bash +export LMSTACK_API_URL="http://localhost:8000/api" +export LMSTACK_API_TOKEN="your-api-token" +``` + +## Usage with Claude Desktop + +Add to your Claude Desktop config (`~/.claude/claude_desktop_config.json`): + +```json +{ + "mcpServers": { + "lmstack": { + "command": "node", + "args": ["/path/to/lmstack/mcp-server/dist/index.js"], + "env": { + "LMSTACK_API_URL": "http://localhost:8000/api", + "LMSTACK_API_TOKEN": "your-token" + } + } + } +} +``` + +## Usage with Cursor + +Add to Cursor settings: + +```json +{ + "mcp.servers": { + "lmstack": { + "command": "node", + "args": ["/path/to/lmstack/mcp-server/dist/index.js"], + "env": { + "LMSTACK_API_URL": "http://localhost:8000/api" + } + } + } +} +``` + +## Development + +```bash +# Run in development mode +npm run dev + +# Inspect with MCP Inspector +npm run inspect +``` + +## Example Queries + +Once connected, you can ask your AI assistant: + +- "Show me the current system status" +- "How much GPU memory is available?" +- "List all running containers" +- "Deploy the Qwen model to worker-1" +- "Stop deployment 5" + +## License + +MIT diff --git a/mcp-server/package-lock.json b/mcp-server/package-lock.json new file mode 100644 index 0000000..6a7a1a6 --- /dev/null +++ b/mcp-server/package-lock.json @@ -0,0 +1,1840 @@ +{ + "name": "@lmstack/mcp-server", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "@lmstack/mcp-server", + "version": "0.1.0", + "license": "MIT", + "dependencies": { + "@modelcontextprotocol/sdk": "^1.0.0", + "axios": "^1.6.0" + }, + "bin": { + "lmstack-mcp": "dist/index.js" + }, + "devDependencies": { + "@types/node": "^20.0.0", + "tsx": "^4.0.0", + "typescript": "^5.3.0" + } + }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.2.tgz", + "integrity": "sha512-GZMB+a0mOMZs4MpDbj8RJp4cw+w1WV5NYD6xzgvzUJ5Ek2jerwfO2eADyI6ExDSUED+1X8aMbegahsJi+8mgpw==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.2.tgz", + "integrity": "sha512-DVNI8jlPa7Ujbr1yjU2PfUSRtAUZPG9I1RwW4F4xFB1Imiu2on0ADiI/c3td+KmDtVKNbi+nffGDQMfcIMkwIA==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.2.tgz", + "integrity": "sha512-pvz8ZZ7ot/RBphf8fv60ljmaoydPU12VuXHImtAs0XhLLw+EXBi2BLe3OYSBslR4rryHvweW5gmkKFwTiFy6KA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.2.tgz", + "integrity": "sha512-z8Ank4Byh4TJJOh4wpz8g2vDy75zFL0TlZlkUkEwYXuPSgX8yzep596n6mT7905kA9uHZsf/o2OJZubl2l3M7A==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.2.tgz", + "integrity": "sha512-davCD2Zc80nzDVRwXTcQP/28fiJbcOwvdolL0sOiOsbwBa72kegmVU0Wrh1MYrbuCL98Omp5dVhQFWRKR2ZAlg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.2.tgz", + "integrity": "sha512-ZxtijOmlQCBWGwbVmwOF/UCzuGIbUkqB1faQRf5akQmxRJ1ujusWsb3CVfk/9iZKr2L5SMU5wPBi1UWbvL+VQA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.2.tgz", + "integrity": "sha512-lS/9CN+rgqQ9czogxlMcBMGd+l8Q3Nj1MFQwBZJyoEKI50XGxwuzznYdwcav6lpOGv5BqaZXqvBSiB/kJ5op+g==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.2.tgz", + "integrity": "sha512-tAfqtNYb4YgPnJlEFu4c212HYjQWSO/w/h/lQaBK7RbwGIkBOuNKQI9tqWzx7Wtp7bTPaGC6MJvWI608P3wXYA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.2.tgz", + "integrity": "sha512-vWfq4GaIMP9AIe4yj1ZUW18RDhx6EPQKjwe7n8BbIecFtCQG4CfHGaHuh7fdfq+y3LIA2vGS/o9ZBGVxIDi9hw==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.2.tgz", + "integrity": "sha512-hYxN8pr66NsCCiRFkHUAsxylNOcAQaxSSkHMMjcpx0si13t1LHFphxJZUiGwojB1a/Hd5OiPIqDdXONia6bhTw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.2.tgz", + "integrity": "sha512-MJt5BRRSScPDwG2hLelYhAAKh9imjHK5+NE/tvnRLbIqUWa+0E9N4WNMjmp/kXXPHZGqPLxggwVhz7QP8CTR8w==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.2.tgz", + "integrity": "sha512-lugyF1atnAT463aO6KPshVCJK5NgRnU4yb3FUumyVz+cGvZbontBgzeGFO1nF+dPueHD367a2ZXe1NtUkAjOtg==", + "cpu": [ + "loong64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.2.tgz", + "integrity": "sha512-nlP2I6ArEBewvJ2gjrrkESEZkB5mIoaTswuqNFRv/WYd+ATtUpe9Y09RnJvgvdag7he0OWgEZWhviS1OTOKixw==", + "cpu": [ + "mips64el" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.2.tgz", + "integrity": "sha512-C92gnpey7tUQONqg1n6dKVbx3vphKtTHJaNG2Ok9lGwbZil6DrfyecMsp9CrmXGQJmZ7iiVXvvZH6Ml5hL6XdQ==", + "cpu": [ + "ppc64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.2.tgz", + "integrity": "sha512-B5BOmojNtUyN8AXlK0QJyvjEZkWwy/FKvakkTDCziX95AowLZKR6aCDhG7LeF7uMCXEJqwa8Bejz5LTPYm8AvA==", + "cpu": [ + "riscv64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.2.tgz", + "integrity": "sha512-p4bm9+wsPwup5Z8f4EpfN63qNagQ47Ua2znaqGH6bqLlmJ4bx97Y9JdqxgGZ6Y8xVTixUnEkoKSHcpRlDnNr5w==", + "cpu": [ + "s390x" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.2.tgz", + "integrity": "sha512-uwp2Tip5aPmH+NRUwTcfLb+W32WXjpFejTIOWZFw/v7/KnpCDKG66u4DLcurQpiYTiYwQ9B7KOeMJvLCu/OvbA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-arm64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.2.tgz", + "integrity": "sha512-Kj6DiBlwXrPsCRDeRvGAUb/LNrBASrfqAIok+xB0LxK8CHqxZ037viF13ugfsIpePH93mX7xfJp97cyDuTZ3cw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.2.tgz", + "integrity": "sha512-HwGDZ0VLVBY3Y+Nw0JexZy9o/nUAWq9MlV7cahpaXKW6TOzfVno3y3/M8Ga8u8Yr7GldLOov27xiCnqRZf0tCA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-arm64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.2.tgz", + "integrity": "sha512-DNIHH2BPQ5551A7oSHD0CKbwIA/Ox7+78/AWkbS5QoRzaqlev2uFayfSxq68EkonB+IKjiuxBFoV8ESJy8bOHA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.2.tgz", + "integrity": "sha512-/it7w9Nb7+0KFIzjalNJVR5bOzA9Vay+yIPLVHfIQYG/j+j9VTH84aNB8ExGKPU4AzfaEvN9/V4HV+F+vo8OEg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/openharmony-arm64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.2.tgz", + "integrity": "sha512-LRBbCmiU51IXfeXk59csuX/aSaToeG7w48nMwA6049Y4J4+VbWALAuXcs+qcD04rHDuSCSRKdmY63sruDS5qag==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "openharmony" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.2.tgz", + "integrity": "sha512-kMtx1yqJHTmqaqHPAzKCAkDaKsffmXkPHThSfRwZGyuqyIeBvf08KSsYXl+abf5HDAPMJIPnbBfXvP2ZC2TfHg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.2.tgz", + "integrity": "sha512-Yaf78O/B3Kkh+nKABUF++bvJv5Ijoy9AN1ww904rOXZFLWVc5OLOfL56W+C8F9xn5JQZa3UX6m+IktJnIb1Jjg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.2.tgz", + "integrity": "sha512-Iuws0kxo4yusk7sw70Xa2E2imZU5HoixzxfGCdxwBdhiDgt9vX9VUCBhqcwY7/uh//78A1hMkkROMJq9l27oLQ==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.2.tgz", + "integrity": "sha512-sRdU18mcKf7F+YgheI/zGf5alZatMUTKj/jNS6l744f9u3WFu4v7twcUI9vu4mknF4Y9aDlblIie0IM+5xxaqQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/@hono/node-server": { + "version": "1.19.9", + "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.9.tgz", + "integrity": "sha512-vHL6w3ecZsky+8P5MD+eFfaGTyCeOHUIFYMGpQGbrBTSmNNoxv0if69rEZ5giu36weC5saFuznL411gRX7bJDw==", + "license": "MIT", + "engines": { + "node": ">=18.14.1" + }, + "peerDependencies": { + "hono": "^4" + } + }, + "node_modules/@modelcontextprotocol/sdk": { + "version": "1.25.3", + "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.25.3.tgz", + "integrity": "sha512-vsAMBMERybvYgKbg/l4L1rhS7VXV1c0CtyJg72vwxONVX0l4ZfKVAnZEWTQixJGTzKnELjQ59e4NbdFDALRiAQ==", + "license": "MIT", + "dependencies": { + "@hono/node-server": "^1.19.9", + "ajv": "^8.17.1", + "ajv-formats": "^3.0.1", + "content-type": "^1.0.5", + "cors": "^2.8.5", + "cross-spawn": "^7.0.5", + "eventsource": "^3.0.2", + "eventsource-parser": "^3.0.0", + "express": "^5.0.1", + "express-rate-limit": "^7.5.0", + "jose": "^6.1.1", + "json-schema-typed": "^8.0.2", + "pkce-challenge": "^5.0.0", + "raw-body": "^3.0.0", + "zod": "^3.25 || ^4.0", + "zod-to-json-schema": "^3.25.0" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@cfworker/json-schema": "^4.1.1", + "zod": "^3.25 || ^4.0" + }, + "peerDependenciesMeta": { + "@cfworker/json-schema": { + "optional": true + }, + "zod": { + "optional": false + } + } + }, + "node_modules/@types/node": { + "version": "20.19.30", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.30.tgz", + "integrity": "sha512-WJtwWJu7UdlvzEAUm484QNg5eAoq5QR08KDNx7g45Usrs2NtOPiX8ugDqmKdXkyL03rBqU5dYNYVQetEpBHq2g==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~6.21.0" + } + }, + "node_modules/accepts": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-2.0.0.tgz", + "integrity": "sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==", + "license": "MIT", + "dependencies": { + "mime-types": "^3.0.0", + "negotiator": "^1.0.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/ajv": { + "version": "8.17.1", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", + "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ajv-formats": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-3.0.1.tgz", + "integrity": "sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==", + "license": "MIT", + "dependencies": { + "ajv": "^8.0.0" + }, + "peerDependencies": { + "ajv": "^8.0.0" + }, + "peerDependenciesMeta": { + "ajv": { + "optional": true + } + } + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "license": "MIT" + }, + "node_modules/axios": { + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.13.2.tgz", + "integrity": "sha512-VPk9ebNqPcy5lRGuSlKx752IlDatOjT9paPlm8A7yOuW2Fbvp4X3JznJtT4f0GzGLLiWE9W8onz51SqLYwzGaA==", + "license": "MIT", + "dependencies": { + "follow-redirects": "^1.15.6", + "form-data": "^4.0.4", + "proxy-from-env": "^1.1.0" + } + }, + "node_modules/body-parser": { + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-2.2.2.tgz", + "integrity": "sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA==", + "license": "MIT", + "dependencies": { + "bytes": "^3.1.2", + "content-type": "^1.0.5", + "debug": "^4.4.3", + "http-errors": "^2.0.0", + "iconv-lite": "^0.7.0", + "on-finished": "^2.4.1", + "qs": "^6.14.1", + "raw-body": "^3.0.1", + "type-is": "^2.0.1" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/bytes": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", + "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/call-bound": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz", + "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "get-intrinsic": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/content-disposition": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-1.0.1.tgz", + "integrity": "sha512-oIXISMynqSqm241k6kcQ5UwttDILMK4BiurCfGEREw6+X9jkkpEe5T9FZaApyLGGOnFuyMWZpdolTXMtvEJ08Q==", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/content-type": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz", + "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie": { + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz", + "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie-signature": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.2.2.tgz", + "integrity": "sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg==", + "license": "MIT", + "engines": { + "node": ">=6.6.0" + } + }, + "node_modules/cors": { + "version": "2.8.6", + "resolved": "https://registry.npmjs.org/cors/-/cors-2.8.6.tgz", + "integrity": "sha512-tJtZBBHA6vjIAaF6EnIaq6laBBP9aq/Y3ouVJjEfoHbRBcHBAHYcMh/w8LDrk2PvIMMq8gmopa5D4V8RmbrxGw==", + "license": "MIT", + "dependencies": { + "object-assign": "^4", + "vary": "^1" + }, + "engines": { + "node": ">= 0.10" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/cross-spawn": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "license": "MIT", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/depd": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", + "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==", + "license": "MIT" + }, + "node_modules/encodeurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz", + "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-set-tostringtag": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", + "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.6", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/esbuild": { + "version": "0.27.2", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.2.tgz", + "integrity": "sha512-HyNQImnsOC7X9PMNaCIeAm4ISCQXs5a5YasTXVliKv4uuBo1dKrG0A+uQS8M5eXjVMnLg3WgXaKvprHlFJQffw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.27.2", + "@esbuild/android-arm": "0.27.2", + "@esbuild/android-arm64": "0.27.2", + "@esbuild/android-x64": "0.27.2", + "@esbuild/darwin-arm64": "0.27.2", + "@esbuild/darwin-x64": "0.27.2", + "@esbuild/freebsd-arm64": "0.27.2", + "@esbuild/freebsd-x64": "0.27.2", + "@esbuild/linux-arm": "0.27.2", + "@esbuild/linux-arm64": "0.27.2", + "@esbuild/linux-ia32": "0.27.2", + "@esbuild/linux-loong64": "0.27.2", + "@esbuild/linux-mips64el": "0.27.2", + "@esbuild/linux-ppc64": "0.27.2", + "@esbuild/linux-riscv64": "0.27.2", + "@esbuild/linux-s390x": "0.27.2", + "@esbuild/linux-x64": "0.27.2", + "@esbuild/netbsd-arm64": "0.27.2", + "@esbuild/netbsd-x64": "0.27.2", + "@esbuild/openbsd-arm64": "0.27.2", + "@esbuild/openbsd-x64": "0.27.2", + "@esbuild/openharmony-arm64": "0.27.2", + "@esbuild/sunos-x64": "0.27.2", + "@esbuild/win32-arm64": "0.27.2", + "@esbuild/win32-ia32": "0.27.2", + "@esbuild/win32-x64": "0.27.2" + } + }, + "node_modules/escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==", + "license": "MIT" + }, + "node_modules/etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/eventsource": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/eventsource/-/eventsource-3.0.7.tgz", + "integrity": "sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==", + "license": "MIT", + "dependencies": { + "eventsource-parser": "^3.0.1" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/eventsource-parser": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.6.tgz", + "integrity": "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==", + "license": "MIT", + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/express": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", + "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", + "license": "MIT", + "dependencies": { + "accepts": "^2.0.0", + "body-parser": "^2.2.1", + "content-disposition": "^1.0.0", + "content-type": "^1.0.5", + "cookie": "^0.7.1", + "cookie-signature": "^1.2.1", + "debug": "^4.4.0", + "depd": "^2.0.0", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "etag": "^1.8.1", + "finalhandler": "^2.1.0", + "fresh": "^2.0.0", + "http-errors": "^2.0.0", + "merge-descriptors": "^2.0.0", + "mime-types": "^3.0.0", + "on-finished": "^2.4.1", + "once": "^1.4.0", + "parseurl": "^1.3.3", + "proxy-addr": "^2.0.7", + "qs": "^6.14.0", + "range-parser": "^1.2.1", + "router": "^2.2.0", + "send": "^1.1.0", + "serve-static": "^2.2.0", + "statuses": "^2.0.1", + "type-is": "^2.0.1", + "vary": "^1.1.2" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/express-rate-limit": { + "version": "7.5.1", + "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-7.5.1.tgz", + "integrity": "sha512-7iN8iPMDzOMHPUYllBEsQdWVB6fPDMPqwjBaFrgr4Jgr/+okjvzAy+UHlYYL/Vs0OsOrMkwS6PJDkFlJwoxUnw==", + "license": "MIT", + "engines": { + "node": ">= 16" + }, + "funding": { + "url": "https://github.com/sponsors/express-rate-limit" + }, + "peerDependencies": { + "express": ">= 4.11" + } + }, + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", + "license": "MIT" + }, + "node_modules/fast-uri": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz", + "integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "BSD-3-Clause" + }, + "node_modules/finalhandler": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-2.1.1.tgz", + "integrity": "sha512-S8KoZgRZN+a5rNwqTxlZZePjT/4cnm0ROV70LedRHZ0p8u9fRID0hJUZQpkKLzro8LfmC8sx23bY6tVNxv8pQA==", + "license": "MIT", + "dependencies": { + "debug": "^4.4.0", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "on-finished": "^2.4.1", + "parseurl": "^1.3.3", + "statuses": "^2.0.1" + }, + "engines": { + "node": ">= 18.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/follow-redirects": { + "version": "1.15.11", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.11.tgz", + "integrity": "sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.5", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz", + "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.2", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/form-data/node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/form-data/node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/forwarded": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", + "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/fresh": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-2.0.0.tgz", + "integrity": "sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/get-tsconfig": { + "version": "4.13.0", + "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.0.tgz", + "integrity": "sha512-1VKTZJCwBrvbd+Wn3AOgQP/2Av+TfTCOlE4AcRJE72W1ksZXbAx8PPBR9RzgTeSPzlPMHrbANMH3LbltH73wxQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "resolve-pkg-maps": "^1.0.0" + }, + "funding": { + "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "license": "MIT", + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/hono": { + "version": "4.11.5", + "resolved": "https://registry.npmjs.org/hono/-/hono-4.11.5.tgz", + "integrity": "sha512-WemPi9/WfyMwZs+ZUXdiwcCh9Y+m7L+8vki9MzDw3jJ+W9Lc+12HGsd368Qc1vZi1xwW8BWMMsnK5efYKPdt4g==", + "license": "MIT", + "peer": true, + "engines": { + "node": ">=16.9.0" + } + }, + "node_modules/http-errors": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz", + "integrity": "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==", + "license": "MIT", + "dependencies": { + "depd": "~2.0.0", + "inherits": "~2.0.4", + "setprototypeof": "~1.2.0", + "statuses": "~2.0.2", + "toidentifier": "~1.0.1" + }, + "engines": { + "node": ">= 0.8" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/iconv-lite": { + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.7.2.tgz", + "integrity": "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "license": "ISC" + }, + "node_modules/ipaddr.js": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", + "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==", + "license": "MIT", + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/is-promise": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/is-promise/-/is-promise-4.0.0.tgz", + "integrity": "sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==", + "license": "MIT" + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "license": "ISC" + }, + "node_modules/jose": { + "version": "6.1.3", + "resolved": "https://registry.npmjs.org/jose/-/jose-6.1.3.tgz", + "integrity": "sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/panva" + } + }, + "node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", + "license": "MIT" + }, + "node_modules/json-schema-typed": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/json-schema-typed/-/json-schema-typed-8.0.2.tgz", + "integrity": "sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA==", + "license": "BSD-2-Clause" + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/media-typer": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-1.1.0.tgz", + "integrity": "sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/merge-descriptors": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-2.0.0.tgz", + "integrity": "sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g==", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/mime-db": { + "version": "1.54.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz", + "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.2.tgz", + "integrity": "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==", + "license": "MIT", + "dependencies": { + "mime-db": "^1.54.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, + "node_modules/negotiator": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-1.0.0.tgz", + "integrity": "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/object-inspect": { + "version": "1.13.4", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", + "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/on-finished": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", + "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==", + "license": "MIT", + "dependencies": { + "ee-first": "1.1.1" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "license": "ISC", + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/parseurl": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/path-to-regexp": { + "version": "8.3.0", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-8.3.0.tgz", + "integrity": "sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/pkce-challenge": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/pkce-challenge/-/pkce-challenge-5.0.1.tgz", + "integrity": "sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ==", + "license": "MIT", + "engines": { + "node": ">=16.20.0" + } + }, + "node_modules/proxy-addr": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", + "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==", + "license": "MIT", + "dependencies": { + "forwarded": "0.2.0", + "ipaddr.js": "1.9.1" + }, + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", + "license": "MIT" + }, + "node_modules/qs": { + "version": "6.14.1", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.1.tgz", + "integrity": "sha512-4EK3+xJl8Ts67nLYNwqw/dsFVnCf+qR7RgXSK9jEEm9unao3njwMDdmsdvoKBKHzxd7tCYz5e5M+SnMjdtXGQQ==", + "license": "BSD-3-Clause", + "dependencies": { + "side-channel": "^1.1.0" + }, + "engines": { + "node": ">=0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/range-parser": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", + "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/raw-body": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-3.0.2.tgz", + "integrity": "sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==", + "license": "MIT", + "dependencies": { + "bytes": "~3.1.2", + "http-errors": "~2.0.1", + "iconv-lite": "~0.7.0", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/resolve-pkg-maps": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", + "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", + "dev": true, + "license": "MIT", + "funding": { + "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" + } + }, + "node_modules/router": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/router/-/router-2.2.0.tgz", + "integrity": "sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==", + "license": "MIT", + "dependencies": { + "debug": "^4.4.0", + "depd": "^2.0.0", + "is-promise": "^4.0.0", + "parseurl": "^1.3.3", + "path-to-regexp": "^8.0.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", + "license": "MIT" + }, + "node_modules/send": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/send/-/send-1.2.1.tgz", + "integrity": "sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ==", + "license": "MIT", + "dependencies": { + "debug": "^4.4.3", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "etag": "^1.8.1", + "fresh": "^2.0.0", + "http-errors": "^2.0.1", + "mime-types": "^3.0.2", + "ms": "^2.1.3", + "on-finished": "^2.4.1", + "range-parser": "^1.2.1", + "statuses": "^2.0.2" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/serve-static": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-2.2.1.tgz", + "integrity": "sha512-xRXBn0pPqQTVQiC8wyQrKs2MOlX24zQ0POGaj0kultvoOCstBQM5yvOhAVSUwOMjQtTvsPWoNCHfPGwaaQJhTw==", + "license": "MIT", + "dependencies": { + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "parseurl": "^1.3.3", + "send": "^1.2.0" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/setprototypeof": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", + "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==", + "license": "ISC" + }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "license": "MIT", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/side-channel": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", + "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3", + "side-channel-list": "^1.0.0", + "side-channel-map": "^1.0.1", + "side-channel-weakmap": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-list": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz", + "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-map": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz", + "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==", + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-weakmap": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz", + "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==", + "license": "MIT", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3", + "side-channel-map": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/statuses": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz", + "integrity": "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/toidentifier": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", + "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==", + "license": "MIT", + "engines": { + "node": ">=0.6" + } + }, + "node_modules/tsx": { + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz", + "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==", + "dev": true, + "license": "MIT", + "dependencies": { + "esbuild": "~0.27.0", + "get-tsconfig": "^4.7.5" + }, + "bin": { + "tsx": "dist/cli.mjs" + }, + "engines": { + "node": ">=18.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + } + }, + "node_modules/type-is": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-2.0.1.tgz", + "integrity": "sha512-OZs6gsjF4vMp32qrCbiVSkrFmXtG/AZhY3t0iAMrMBiAZyV9oALtXO8hsrHbMXF9x6L3grlFuwW2oAz7cav+Gw==", + "license": "MIT", + "dependencies": { + "content-type": "^1.0.5", + "media-typer": "^1.1.0", + "mime-types": "^3.0.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "6.21.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", + "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "license": "ISC", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "license": "ISC" + }, + "node_modules/zod": { + "version": "4.3.6", + "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", + "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-to-json-schema": { + "version": "3.25.1", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.25.1.tgz", + "integrity": "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==", + "license": "ISC", + "peerDependencies": { + "zod": "^3.25 || ^4" + } + } + } +} diff --git a/mcp-server/package.json b/mcp-server/package.json new file mode 100644 index 0000000..3e42c57 --- /dev/null +++ b/mcp-server/package.json @@ -0,0 +1,34 @@ +{ + "name": "@lmstack/mcp-server", + "version": "0.1.0", + "description": "MCP Server for LMStack - LLM Deployment Platform", + "type": "module", + "main": "dist/index.js", + "bin": { + "lmstack-mcp": "dist/index.js" + }, + "scripts": { + "build": "tsc", + "dev": "tsx watch src/index.ts", + "start": "node dist/index.js", + "inspect": "npx @anthropic-ai/mcp-inspector dist/index.js" + }, + "keywords": [ + "mcp", + "llm", + "lmstack", + "ai", + "model-context-protocol" + ], + "author": "", + "license": "MIT", + "dependencies": { + "@modelcontextprotocol/sdk": "^1.0.0", + "axios": "^1.6.0" + }, + "devDependencies": { + "@types/node": "^20.0.0", + "tsx": "^4.0.0", + "typescript": "^5.3.0" + } +} diff --git a/mcp-server/src/client.ts b/mcp-server/src/client.ts new file mode 100644 index 0000000..3e0e21f --- /dev/null +++ b/mcp-server/src/client.ts @@ -0,0 +1,227 @@ +/** + * LMStack API Client + * + * HTTP client for communicating with LMStack backend API. + * This client is shared between MCP Server and Web Chat tools. + */ +import axios, { AxiosInstance } from "axios"; + +export class LMStackClient { + private client: AxiosInstance; + + constructor(baseURL: string, token?: string) { + this.client = axios.create({ + baseURL, + timeout: 30000, + headers: { + "Content-Type": "application/json", + ...(token ? { Authorization: `Bearer ${token}` } : {}), + }, + }); + } + + // ============================================================================ + // Workers + // ============================================================================ + + async getWorkers(): Promise { + const response = await this.client.get("/workers"); + return response.data.items || []; + } + + // ============================================================================ + // Containers + // ============================================================================ + + async getContainers(): Promise { + const response = await this.client.get("/containers"); + return response.data.items || []; + } + + async stopContainer(containerName: string, workerId: number): Promise { + await this.client.post(`/containers/${encodeURIComponent(containerName)}/stop`, null, { + params: { worker_id: workerId }, + }); + } + + async removeContainer(containerName: string, workerId: number, force?: boolean): Promise { + await this.client.delete(`/containers/${encodeURIComponent(containerName)}`, { + params: { worker_id: workerId, force: force || false }, + }); + } + + // ============================================================================ + // Models + // ============================================================================ + + async getModels(): Promise { + const response = await this.client.get("/models"); + return response.data.items || []; + } + + async addModel( + name: string, + source: string, + parameters?: string, + quantization?: string + ): Promise { + const response = await this.client.post("/models", { + name, + source, + parameters, + quantization, + }); + return response.data; + } + + async deleteModel(modelId: number): Promise { + await this.client.delete(`/models/${modelId}`); + } + + // ============================================================================ + // Deployments + // ============================================================================ + + async getDeployments(): Promise { + const response = await this.client.get("/deployments"); + return response.data.items || []; + } + + async getDeployment(deploymentId: number): Promise { + const response = await this.client.get(`/deployments/${deploymentId}`); + return response.data; + } + + async deployModel( + modelId: number, + workerId: number, + gpuIds?: number[] + ): Promise { + const response = await this.client.post("/deployments", { + model_id: modelId, + worker_id: workerId, + gpu_ids: gpuIds, + }); + return response.data; + } + + async stopDeployment(deploymentId: number): Promise { + await this.client.post(`/deployments/${deploymentId}/stop`); + } + + async startDeployment(deploymentId: number): Promise { + await this.client.post(`/deployments/${deploymentId}/start`); + } + + async deleteDeployment(deploymentId: number): Promise { + await this.client.delete(`/deployments/${deploymentId}`); + } + + // ============================================================================ + // API Keys + // ============================================================================ + + async getApiKeys(): Promise { + const response = await this.client.get("/api-keys"); + return response.data; + } + + async createApiKey( + name: string, + description?: string, + expiresInDays?: number + ): Promise { + const response = await this.client.post("/api-keys", { + name, + description, + expires_in_days: expiresInDays, + }); + return response.data; + } + + async deleteApiKey(apiKeyId: number): Promise { + await this.client.delete(`/api-keys/${apiKeyId}`); + } + + // ============================================================================ + // Docker Images + // ============================================================================ + + async getImages(workerId?: number, repository?: string): Promise { + const params: any = {}; + if (workerId) params.worker_id = workerId; + if (repository) params.repository = repository; + + const response = await this.client.get("/images", { params }); + return response.data.items || []; + } + + async pullImage(workerId: number, image: string): Promise { + const response = await this.client.post("/images/pull", { + worker_id: workerId, + image, + }); + return response.data; + } + + async deleteImage(imageId: string, workerId: number, force?: boolean): Promise { + await this.client.delete(`/images/${encodeURIComponent(imageId)}`, { + params: { worker_id: workerId, force: force || false }, + }); + } + + // ============================================================================ + // Storage + // ============================================================================ + + async getStorageVolumes(workerId?: number): Promise { + const params: any = {}; + if (workerId) params.worker_id = workerId; + + const response = await this.client.get("/storage/volumes", { params }); + return Array.isArray(response.data) ? response.data : []; + } + + async getDiskUsage(workerId?: number): Promise { + const params: any = {}; + if (workerId) params.worker_id = workerId; + + const response = await this.client.get("/storage/disk-usage", { params }); + return Array.isArray(response.data) ? response.data : []; + } + + async deleteStorageVolume(volumeName: string, workerId: number, force?: boolean): Promise { + await this.client.delete(`/storage/volumes/${encodeURIComponent(volumeName)}`, { + params: { worker_id: workerId, force: force || false }, + }); + } + + async pruneStorage( + workerId?: number, + images: boolean = true, + containers: boolean = true, + volumes: boolean = false, + buildCache: boolean = true + ): Promise { + const params: any = {}; + if (workerId) params.worker_id = workerId; + + const response = await this.client.post("/storage/prune", { + images, + containers, + volumes, + build_cache: buildCache, + }, { params }); + + return Array.isArray(response.data) ? response.data : []; + } + + // ============================================================================ + // Dashboard + // ============================================================================ + + async getDashboard(): Promise { + const response = await this.client.get("/dashboard"); + return response.data; + } +} diff --git a/mcp-server/src/formatters.ts b/mcp-server/src/formatters.ts new file mode 100644 index 0000000..b7a984a --- /dev/null +++ b/mcp-server/src/formatters.ts @@ -0,0 +1,384 @@ +/** + * Data Formatters + * + * Format LMStack API responses into human-readable text for MCP. + * These formatters produce markdown output suitable for AI agents. + */ + +/** + * Helper to format byte sizes + */ +function formatSize(bytes: number): string { + if (bytes >= 1024 * 1024 * 1024) { + return `${(bytes / 1024 / 1024 / 1024).toFixed(2)} GB`; + } + if (bytes >= 1024 * 1024) { + return `${(bytes / 1024 / 1024).toFixed(2)} MB`; + } + return `${(bytes / 1024).toFixed(2)} KB`; +} + +/** + * Format workers list + */ +export function formatWorkers(workers: any[]): string { + if (workers.length === 0) { + return "No workers registered."; + } + + const lines: string[] = [`# Workers (${workers.length} total)\n`]; + + for (const worker of workers) { + const statusEmoji = worker.status === "online" ? "🟢" : "🔴"; + lines.push(`## ${statusEmoji} ${worker.name}`); + lines.push(`- **Host:** ${worker.host}`); + lines.push(`- **Status:** ${worker.status}`); + lines.push(`- **ID:** ${worker.id}`); + + if (worker.gpu_info && worker.gpu_info.length > 0) { + lines.push(`- **GPUs:** ${worker.gpu_info.length}`); + for (const gpu of worker.gpu_info) { + const usedGB = (gpu.memory_used / 1024).toFixed(1); + const totalGB = (gpu.memory_total / 1024).toFixed(1); + const freeGB = ((gpu.memory_total - gpu.memory_used) / 1024).toFixed(1); + const util = gpu.utilization_gpu || 0; + lines.push(` - GPU ${gpu.index}: ${gpu.name}`); + lines.push(` - Memory: ${usedGB}GB / ${totalGB}GB (${freeGB}GB free)`); + lines.push(` - Utilization: ${util}%`); + } + } else { + lines.push(`- **GPUs:** None detected`); + } + lines.push(""); + } + + return lines.join("\n"); +} + +/** + * Format containers list + */ +export function formatContainers(containers: any[]): string { + if (containers.length === 0) { + return "No containers found."; + } + + const running = containers.filter( + (c) => c.status?.toLowerCase().includes("running") || c.status?.toLowerCase().includes("up") + ); + + const lines: string[] = [ + `# Docker Containers (${containers.length} total, ${running.length} running)\n`, + ]; + + // Group by worker + const byWorker: Record = {}; + for (const container of containers) { + const workerName = container.worker?.name || container.worker_name || "Unknown"; + if (!byWorker[workerName]) { + byWorker[workerName] = []; + } + byWorker[workerName].push(container); + } + + for (const [workerName, workerContainers] of Object.entries(byWorker)) { + lines.push(`## ${workerName}`); + for (const container of workerContainers) { + const statusEmoji = container.status?.toLowerCase().includes("running") || + container.status?.toLowerCase().includes("up") ? "🟢" : "⚪"; + lines.push(`- ${statusEmoji} **${container.name}**`); + lines.push(` - Image: ${container.image}`); + lines.push(` - Status: ${container.status}`); + lines.push(` - ID: ${container.id?.substring(0, 12) || "N/A"}`); + } + lines.push(""); + } + + return lines.join("\n"); +} + +/** + * Format deployments list + */ +export function formatDeployments(deployments: any[]): string { + if (deployments.length === 0) { + return "No model deployments found."; + } + + const running = deployments.filter((d) => d.status === "running"); + + const lines: string[] = [ + `# Model Deployments (${deployments.length} total, ${running.length} running)\n`, + ]; + + for (const dep of deployments) { + const statusEmoji = + dep.status === "running" ? "🟢" : + dep.status === "starting" ? "🟡" : + dep.status === "stopped" ? "⚪" : "🔴"; + + lines.push(`## ${statusEmoji} ${dep.model?.name || dep.name}`); + lines.push(`- **ID:** ${dep.id}`); + lines.push(`- **Status:** ${dep.status}`); + lines.push(`- **Worker:** ${dep.worker?.name || "Unknown"}`); + + if (dep.status === "running" && dep.port) { + lines.push(`- **Endpoint:** http://${dep.worker?.host}:${dep.port}/v1`); + } + + if (dep.gpu_ids && dep.gpu_ids.length > 0) { + lines.push(`- **GPUs:** ${dep.gpu_ids.join(", ")}`); + } + + if (dep.created_at) { + lines.push(`- **Created:** ${new Date(dep.created_at).toLocaleString()}`); + } + lines.push(""); + } + + return lines.join("\n"); +} + +/** + * Format models list + */ +export function formatModels(models: any[]): string { + if (models.length === 0) { + return "No models registered."; + } + + const lines: string[] = [`# Available Models (${models.length} total)\n`]; + + // Group by source + const bySource: Record = {}; + for (const model of models) { + const source = model.source || "unknown"; + if (!bySource[source]) { + bySource[source] = []; + } + bySource[source].push(model); + } + + for (const [source, sourceModels] of Object.entries(bySource)) { + lines.push(`## ${source.charAt(0).toUpperCase() + source.slice(1)} (${sourceModels.length})`); + for (const model of sourceModels) { + lines.push(`- **${model.name}** (ID: ${model.id})`); + if (model.parameters) { + lines.push(` - Parameters: ${model.parameters}`); + } + if (model.quantization) { + lines.push(` - Quantization: ${model.quantization}`); + } + } + lines.push(""); + } + + return lines.join("\n"); +} + +/** + * Format complete system status + */ +export function formatSystemStatus( + workers: any[], + containers: any[], + deployments: any[], + models: any[] +): string { + const onlineWorkers = workers.filter((w) => w.status === "online"); + const runningContainers = containers.filter( + (c) => c.status?.toLowerCase().includes("running") || c.status?.toLowerCase().includes("up") + ); + const runningDeployments = deployments.filter((d) => d.status === "running"); + + // Calculate total GPU memory + let totalGpuMemory = 0; + let usedGpuMemory = 0; + for (const worker of workers) { + for (const gpu of worker.gpu_info || []) { + totalGpuMemory += gpu.memory_total || 0; + usedGpuMemory += gpu.memory_used || 0; + } + } + const freeGpuMemory = totalGpuMemory - usedGpuMemory; + + const lines: string[] = [ + "# LMStack System Status", + "", + `**Last Updated:** ${new Date().toLocaleString()}`, + "", + "## Summary", + `- 🖥️ **Workers:** ${onlineWorkers.length}/${workers.length} online`, + `- 📦 **Containers:** ${runningContainers.length}/${containers.length} running`, + `- 🚀 **Deployments:** ${runningDeployments.length}/${deployments.length} running`, + `- 🤖 **Models:** ${models.length} available`, + `- 🎮 **GPU Memory:** ${(usedGpuMemory / 1024).toFixed(1)}GB used / ${(freeGpuMemory / 1024).toFixed(1)}GB free / ${(totalGpuMemory / 1024).toFixed(1)}GB total`, + "", + ]; + + // Add workers section + lines.push(formatWorkers(workers)); + lines.push(""); + + // Add running deployments + if (runningDeployments.length > 0) { + lines.push("## Active Deployments"); + for (const dep of runningDeployments) { + lines.push(`- **${dep.model?.name || dep.name}** on ${dep.worker?.name} (ID: ${dep.id})`); + } + lines.push(""); + } + + return lines.join("\n"); +} + +/** + * Format API keys list + */ +export function formatApiKeys(apiKeysData: any): string { + const apiKeys = apiKeysData?.items || []; + + if (apiKeys.length === 0) { + return "No API keys found."; + } + + const lines: string[] = [`# API Keys (${apiKeys.length} total)\n`]; + + for (const key of apiKeys) { + lines.push(`## ${key.name}`); + lines.push(`- **ID:** ${key.id}`); + lines.push(`- **Access Key:** ${key.access_key || "N/A"}`); + if (key.description) { + lines.push(`- **Description:** ${key.description}`); + } + if (key.expires_at) { + lines.push(`- **Expires:** ${new Date(key.expires_at).toLocaleString()}`); + } else { + lines.push(`- **Expires:** Never`); + } + if (key.last_used_at) { + lines.push(`- **Last Used:** ${new Date(key.last_used_at).toLocaleString()}`); + } + lines.push(`- **Created:** ${new Date(key.created_at).toLocaleString()}`); + lines.push(""); + } + + return lines.join("\n"); +} + +/** + * Format Docker images list + */ +export function formatImages(images: any[]): string { + if (images.length === 0) { + return "No Docker images found."; + } + + const lines: string[] = [`# Docker Images (${images.length} total)\n`]; + + // Group by worker + const byWorker: Record = {}; + for (const img of images) { + const workerName = img.worker_name || "Unknown"; + if (!byWorker[workerName]) { + byWorker[workerName] = []; + } + byWorker[workerName].push(img); + } + + for (const [workerName, workerImages] of Object.entries(byWorker)) { + lines.push(`## ${workerName} (${workerImages.length} images)`); + for (const img of workerImages) { + const name = img.full_name || `${img.repository || ""}:${img.tag || "latest"}`; + lines.push(`- **${name}**`); + lines.push(` - ID: ${img.id?.substring(0, 12) || "N/A"}`); + lines.push(` - Size: ${formatSize(img.size || 0)}`); + if (img.created_at) { + lines.push(` - Created: ${new Date(img.created_at).toLocaleString()}`); + } + } + lines.push(""); + } + + return lines.join("\n"); +} + +/** + * Format storage volumes list + */ +export function formatStorageVolumes(volumes: any[]): string { + if (volumes.length === 0) { + return "No storage volumes found."; + } + + const lines: string[] = [`# Storage Volumes (${volumes.length} total)\n`]; + + // Group by worker + const byWorker: Record = {}; + for (const vol of volumes) { + const workerName = vol.worker_name || "Unknown"; + if (!byWorker[workerName]) { + byWorker[workerName] = []; + } + byWorker[workerName].push(vol); + } + + for (const [workerName, workerVolumes] of Object.entries(byWorker)) { + lines.push(`## ${workerName} (${workerVolumes.length} volumes)`); + for (const vol of workerVolumes) { + lines.push(`- **${vol.name}**`); + lines.push(` - Driver: ${vol.driver || "local"}`); + if (vol.mountpoint) { + lines.push(` - Mountpoint: ${vol.mountpoint}`); + } + if (vol.created_at) { + lines.push(` - Created: ${new Date(vol.created_at).toLocaleString()}`); + } + } + lines.push(""); + } + + return lines.join("\n"); +} + +/** + * Format disk usage statistics + */ +export function formatDiskUsage(usageList: any[]): string { + if (usageList.length === 0) { + return "No disk usage data available."; + } + + const lines: string[] = ["# Disk Usage\n"]; + + for (const u of usageList) { + lines.push(`## ${u.worker_name || "Worker"}`); + + lines.push("### Images"); + lines.push(`- Count: ${u.images?.count || 0}`); + lines.push(`- Size: ${formatSize(u.images?.size || 0)}`); + lines.push(`- Reclaimable: ${formatSize(u.images?.reclaimable || 0)}`); + + lines.push("### Containers"); + lines.push(`- Count: ${u.containers?.count || 0}`); + lines.push(`- Size: ${formatSize(u.containers?.size || 0)}`); + lines.push(`- Reclaimable: ${formatSize(u.containers?.reclaimable || 0)}`); + + lines.push("### Volumes"); + lines.push(`- Count: ${u.volumes?.count || 0}`); + lines.push(`- Size: ${formatSize(u.volumes?.size || 0)}`); + lines.push(`- Reclaimable: ${formatSize(u.volumes?.reclaimable || 0)}`); + + lines.push("### Build Cache"); + lines.push(`- Count: ${u.build_cache?.count || 0}`); + lines.push(`- Size: ${formatSize(u.build_cache?.size || 0)}`); + lines.push(`- Reclaimable: ${formatSize(u.build_cache?.reclaimable || 0)}`); + + lines.push(""); + lines.push(`**Total Size:** ${formatSize(u.total_size || 0)}`); + lines.push(`**Total Reclaimable:** ${formatSize(u.total_reclaimable || 0)}`); + lines.push(""); + } + + return lines.join("\n"); +} diff --git a/mcp-server/src/index.ts b/mcp-server/src/index.ts new file mode 100644 index 0000000..f0d6279 --- /dev/null +++ b/mcp-server/src/index.ts @@ -0,0 +1,1200 @@ +#!/usr/bin/env node +/** + * LMStack MCP Server + * + * Model Context Protocol server for LMStack platform. + * Provides resources and tools for managing LLM infrastructure. + * + * This MCP Server exposes the SAME tools as the Web Chat interface, + * allowing AI agents (e.g., Claude Desktop, Cursor) to manage LMStack. + */ +import { Server } from "@modelcontextprotocol/sdk/server/index.js"; +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; +import { + CallToolRequestSchema, + ListResourcesRequestSchema, + ListToolsRequestSchema, + ReadResourceRequestSchema, +} from "@modelcontextprotocol/sdk/types.js"; +import { LMStackClient } from "./client.js"; +import { + formatWorkers, + formatContainers, + formatDeployments, + formatModels, + formatSystemStatus, + formatApiKeys, + formatImages, + formatStorageVolumes, + formatDiskUsage, +} from "./formatters.js"; + +// Configuration from environment +const LMSTACK_API_URL = process.env.LMSTACK_API_URL || "http://localhost:8000/api"; +const LMSTACK_API_TOKEN = process.env.LMSTACK_API_TOKEN || ""; + +// Initialize LMStack client +const client = new LMStackClient(LMSTACK_API_URL, LMSTACK_API_TOKEN); + +// Create MCP server +const server = new Server( + { + name: "lmstack-mcp-server", + version: "0.2.0", + }, + { + capabilities: { + resources: {}, + tools: {}, + }, + } +); + +/** + * List available resources + */ +server.setRequestHandler(ListResourcesRequestSchema, async () => { + return { + resources: [ + { + uri: "lmstack://system/status", + name: "System Status", + description: "Complete LMStack system status including workers, deployments, and containers", + mimeType: "text/plain", + }, + { + uri: "lmstack://workers", + name: "Workers", + description: "List of all worker nodes with GPU information", + mimeType: "text/plain", + }, + { + uri: "lmstack://containers", + name: "Docker Containers", + description: "List of all Docker containers across workers", + mimeType: "text/plain", + }, + { + uri: "lmstack://deployments", + name: "Model Deployments", + description: "List of all model deployments", + mimeType: "text/plain", + }, + { + uri: "lmstack://models", + name: "Available Models", + description: "List of all registered models", + mimeType: "text/plain", + }, + { + uri: "lmstack://api-keys", + name: "API Keys", + description: "List of all API keys", + mimeType: "text/plain", + }, + { + uri: "lmstack://images", + name: "Docker Images", + description: "List of all Docker images across workers", + mimeType: "text/plain", + }, + { + uri: "lmstack://storage", + name: "Storage Volumes", + description: "List of all storage volumes and disk usage", + mimeType: "text/plain", + }, + ], + }; +}); + +/** + * Read resource content + */ +server.setRequestHandler(ReadResourceRequestSchema, async (request) => { + const { uri } = request.params; + + try { + switch (uri) { + case "lmstack://system/status": { + const [workers, containers, deployments, models] = await Promise.all([ + client.getWorkers(), + client.getContainers(), + client.getDeployments(), + client.getModels(), + ]); + return { + contents: [ + { + uri, + mimeType: "text/plain", + text: formatSystemStatus(workers, containers, deployments, models), + }, + ], + }; + } + + case "lmstack://workers": { + const workers = await client.getWorkers(); + return { + contents: [ + { + uri, + mimeType: "text/plain", + text: formatWorkers(workers), + }, + ], + }; + } + + case "lmstack://containers": { + const containers = await client.getContainers(); + return { + contents: [ + { + uri, + mimeType: "text/plain", + text: formatContainers(containers), + }, + ], + }; + } + + case "lmstack://deployments": { + const deployments = await client.getDeployments(); + return { + contents: [ + { + uri, + mimeType: "text/plain", + text: formatDeployments(deployments), + }, + ], + }; + } + + case "lmstack://models": { + const models = await client.getModels(); + return { + contents: [ + { + uri, + mimeType: "text/plain", + text: formatModels(models), + }, + ], + }; + } + + case "lmstack://api-keys": { + const apiKeysData = await client.getApiKeys(); + return { + contents: [ + { + uri, + mimeType: "text/plain", + text: formatApiKeys(apiKeysData), + }, + ], + }; + } + + case "lmstack://images": { + const images = await client.getImages(); + return { + contents: [ + { + uri, + mimeType: "text/plain", + text: formatImages(images), + }, + ], + }; + } + + case "lmstack://storage": { + const [volumes, diskUsage] = await Promise.all([ + client.getStorageVolumes(), + client.getDiskUsage(), + ]); + return { + contents: [ + { + uri, + mimeType: "text/plain", + text: formatStorageVolumes(volumes) + "\n\n" + formatDiskUsage(diskUsage), + }, + ], + }; + } + + default: + throw new Error(`Unknown resource: ${uri}`); + } + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { + contents: [ + { + uri, + mimeType: "text/plain", + text: `Error fetching resource: ${message}`, + }, + ], + }; + } +}); + +/** + * List available tools + * These tools match the Web Chat tools in tools.ts + */ +server.setRequestHandler(ListToolsRequestSchema, async () => { + return { + tools: [ + // ============== Query Tools ============== + { + name: "get_system_status", + description: "Get complete LMStack system status including workers, GPUs, containers, and deployments", + inputSchema: { + type: "object", + properties: {}, + required: [], + }, + }, + { + name: "list_workers", + description: "List all worker nodes with their GPU status and memory usage", + inputSchema: { + type: "object", + properties: {}, + required: [], + }, + }, + { + name: "list_containers", + description: "List all Docker containers running on workers", + inputSchema: { + type: "object", + properties: { + status: { + type: "string", + description: "Filter by status: running, stopped, all (default: all)", + enum: ["running", "stopped", "all"], + }, + worker_id: { + type: "number", + description: "Filter by specific worker ID", + }, + }, + required: [], + }, + }, + { + name: "list_deployments", + description: "List all model deployments", + inputSchema: { + type: "object", + properties: { + status: { + type: "string", + description: "Filter by status: running, stopped, all (default: all)", + enum: ["running", "stopped", "all"], + }, + }, + required: [], + }, + }, + { + name: "list_models", + description: "List all available models that can be deployed", + inputSchema: { + type: "object", + properties: { + source: { + type: "string", + description: "Filter by source: huggingface, ollama, local (optional)", + enum: ["huggingface", "ollama", "local"], + }, + }, + required: [], + }, + }, + { + name: "get_gpu_status", + description: "Get detailed GPU status including memory usage and utilization", + inputSchema: { + type: "object", + properties: { + worker_id: { + type: "number", + description: "Filter by specific worker ID", + }, + }, + required: [], + }, + }, + + // ============== Model Management Tools ============== + { + name: "add_model", + description: "Add a new model to the system. Supports HuggingFace and Ollama models.", + inputSchema: { + type: "object", + properties: { + name: { + type: "string", + description: "Model name/identifier (e.g., 'Qwen/Qwen2.5-7B-Instruct' for HuggingFace, 'llama3.2' for Ollama)", + }, + source: { + type: "string", + description: "Model source", + enum: ["huggingface", "ollama"], + }, + parameters: { + type: "string", + description: "Optional: Model parameters (e.g., '7B', '13B')", + }, + quantization: { + type: "string", + description: "Optional: Quantization format (e.g., 'GPTQ', 'AWQ', 'GGUF')", + }, + }, + required: ["name", "source"], + }, + }, + { + name: "delete_model", + description: "Delete a model from the system. This will NOT delete any deployments using this model.", + inputSchema: { + type: "object", + properties: { + model_id: { + type: "number", + description: "ID of the model to delete (use list_models to find IDs)", + }, + }, + required: ["model_id"], + }, + }, + + // ============== Deployment Tools ============== + { + name: "deploy_model", + description: "Deploy a model to a worker. Returns deployment ID on success.", + inputSchema: { + type: "object", + properties: { + model_id: { + type: "number", + description: "ID of the model to deploy", + }, + worker_id: { + type: "number", + description: "ID of the worker to deploy to", + }, + gpu_ids: { + type: "array", + items: { type: "number" }, + description: "GPU indices to use (optional, defaults to auto-select)", + }, + }, + required: ["model_id", "worker_id"], + }, + }, + { + name: "stop_deployment", + description: "Stop a running model deployment", + inputSchema: { + type: "object", + properties: { + deployment_id: { + type: "number", + description: "ID of the deployment to stop", + }, + }, + required: ["deployment_id"], + }, + }, + { + name: "start_deployment", + description: "Start a stopped model deployment", + inputSchema: { + type: "object", + properties: { + deployment_id: { + type: "number", + description: "ID of the deployment to start", + }, + }, + required: ["deployment_id"], + }, + }, + { + name: "delete_deployment", + description: "Delete a model deployment completely. This cannot be undone.", + inputSchema: { + type: "object", + properties: { + deployment_id: { + type: "number", + description: "ID of the deployment to delete", + }, + }, + required: ["deployment_id"], + }, + }, + + // ============== Container Tools ============== + { + name: "stop_container", + description: "Stop a running Docker container. Use list_containers first to find worker_id.", + inputSchema: { + type: "object", + properties: { + container_name: { + type: "string", + description: "Name of the container to stop", + }, + worker_id: { + type: "number", + description: "ID of the worker where the container is running", + }, + }, + required: ["container_name", "worker_id"], + }, + }, + { + name: "remove_container", + description: "Remove/delete a Docker container. Use list_containers first to find worker_id.", + inputSchema: { + type: "object", + properties: { + container_name: { + type: "string", + description: "Name of the container to remove", + }, + worker_id: { + type: "number", + description: "ID of the worker where the container is located", + }, + force: { + type: "boolean", + description: "Force remove even if running (default: false)", + }, + }, + required: ["container_name", "worker_id"], + }, + }, + + // ============== API Key Tools ============== + { + name: "list_api_keys", + description: "List all API keys in the system with their usage statistics.", + inputSchema: { + type: "object", + properties: {}, + required: [], + }, + }, + { + name: "create_api_key", + description: "Create a new API key for accessing the LMStack API.", + inputSchema: { + type: "object", + properties: { + name: { + type: "string", + description: "Name for the API key (e.g., 'production-key', 'test-key')", + }, + description: { + type: "string", + description: "Optional description for the API key", + }, + expires_in_days: { + type: "number", + description: "Optional: Number of days until the key expires. If not set, the key never expires.", + }, + }, + required: ["name"], + }, + }, + { + name: "delete_api_key", + description: "Delete an API key from the system.", + inputSchema: { + type: "object", + properties: { + api_key_id: { + type: "number", + description: "ID of the API key to delete (use list_api_keys to find IDs)", + }, + }, + required: ["api_key_id"], + }, + }, + + // ============== Docker Image Tools ============== + { + name: "list_images", + description: "List all Docker images across all workers.", + inputSchema: { + type: "object", + properties: { + worker_id: { + type: "number", + description: "Optional: Filter by specific worker ID", + }, + repository: { + type: "string", + description: "Optional: Filter by repository name", + }, + }, + required: [], + }, + }, + { + name: "pull_image", + description: "Pull a Docker image from a registry to a worker.", + inputSchema: { + type: "object", + properties: { + worker_id: { + type: "number", + description: "ID of the worker to pull the image to", + }, + image: { + type: "string", + description: "Image reference (e.g., 'nginx:latest', 'python:3.11')", + }, + }, + required: ["worker_id", "image"], + }, + }, + { + name: "delete_image", + description: "Delete a Docker image from a worker.", + inputSchema: { + type: "object", + properties: { + image_id: { + type: "string", + description: "ID or name of the image to delete", + }, + worker_id: { + type: "number", + description: "ID of the worker where the image is located", + }, + force: { + type: "boolean", + description: "Force removal even if image is in use (default: false)", + }, + }, + required: ["image_id", "worker_id"], + }, + }, + + // ============== Storage Tools ============== + { + name: "list_storage_volumes", + description: "List all Docker storage volumes across all workers.", + inputSchema: { + type: "object", + properties: { + worker_id: { + type: "number", + description: "Optional: Filter by specific worker ID", + }, + }, + required: [], + }, + }, + { + name: "get_disk_usage", + description: "Get Docker disk usage statistics including images, containers, volumes, and build cache.", + inputSchema: { + type: "object", + properties: { + worker_id: { + type: "number", + description: "Optional: Filter by specific worker ID", + }, + }, + required: [], + }, + }, + { + name: "delete_storage_volume", + description: "Delete a Docker storage volume from a worker.", + inputSchema: { + type: "object", + properties: { + volume_name: { + type: "string", + description: "Name of the volume to delete", + }, + worker_id: { + type: "number", + description: "ID of the worker where the volume is located", + }, + force: { + type: "boolean", + description: "Force removal (default: false)", + }, + }, + required: ["volume_name", "worker_id"], + }, + }, + { + name: "prune_storage", + description: "Clean up unused Docker resources (images, containers, volumes, build cache) to free disk space.", + inputSchema: { + type: "object", + properties: { + worker_id: { + type: "number", + description: "Optional: Only prune on specific worker. If not set, prunes on all workers.", + }, + images: { + type: "boolean", + description: "Prune unused images (default: true)", + }, + containers: { + type: "boolean", + description: "Prune stopped containers (default: true)", + }, + volumes: { + type: "boolean", + description: "Prune unused volumes (default: false - be careful!)", + }, + build_cache: { + type: "boolean", + description: "Prune build cache (default: true)", + }, + }, + required: [], + }, + }, + ], + }; +}); + +/** + * Execute tools + */ +server.setRequestHandler(CallToolRequestSchema, async (request) => { + const { name, arguments: args } = request.params; + + try { + switch (name) { + // ============== Query Tools ============== + case "get_system_status": { + const [workers, containers, deployments, models] = await Promise.all([ + client.getWorkers(), + client.getContainers(), + client.getDeployments(), + client.getModels(), + ]); + return { + content: [ + { + type: "text", + text: formatSystemStatus(workers, containers, deployments, models), + }, + ], + }; + } + + case "list_workers": { + const workers = await client.getWorkers(); + return { + content: [ + { + type: "text", + text: formatWorkers(workers), + }, + ], + }; + } + + case "list_containers": { + const containers = await client.getContainers(); + let filtered = containers; + + if (args?.worker_id) { + filtered = filtered.filter((c: any) => + c.worker?.id === args.worker_id || c.worker_id === args.worker_id + ); + } + + if (args?.status && args.status !== "all") { + filtered = filtered.filter((c: any) => { + const s = c.status?.toLowerCase() || ""; + if (args.status === "running") { + return s.includes("running") || s.includes("up"); + } + return s.includes(String(args.status)); + }); + } + + return { + content: [ + { + type: "text", + text: formatContainers(filtered), + }, + ], + }; + } + + case "list_deployments": { + const deployments = await client.getDeployments(); + let filtered = deployments; + + if (args?.status && args.status !== "all") { + filtered = filtered.filter((d: any) => + d.status?.toLowerCase() === String(args.status).toLowerCase() + ); + } + + return { + content: [ + { + type: "text", + text: formatDeployments(filtered), + }, + ], + }; + } + + case "list_models": { + const models = await client.getModels(); + let filtered = models; + + if (args?.source) { + filtered = filtered.filter((m: any) => + m.source?.toLowerCase() === String(args.source).toLowerCase() + ); + } + + return { + content: [ + { + type: "text", + text: formatModels(filtered), + }, + ], + }; + } + + case "get_gpu_status": { + const workers = await client.getWorkers(); + let filtered = workers; + + if (args?.worker_id) { + filtered = filtered.filter((w: any) => w.id === args.worker_id); + } + + const lines: string[] = ["# GPU Status\n"]; + for (const worker of filtered) { + lines.push(`## ${worker.name} (${worker.status})`); + if (worker.gpu_info && worker.gpu_info.length > 0) { + for (const gpu of worker.gpu_info) { + const usedGB = (gpu.memory_used / 1024).toFixed(1); + const totalGB = (gpu.memory_total / 1024).toFixed(1); + const freeGB = ((gpu.memory_total - gpu.memory_used) / 1024).toFixed(1); + lines.push(`- GPU ${gpu.index}: ${gpu.name}`); + lines.push(` - Memory: ${usedGB}GB used / ${freeGB}GB free / ${totalGB}GB total`); + lines.push(` - Utilization: ${gpu.utilization_gpu}%`); + } + } else { + lines.push("- No GPU information available"); + } + lines.push(""); + } + + return { + content: [ + { + type: "text", + text: lines.join("\n"), + }, + ], + }; + } + + // ============== Model Management Tools ============== + case "add_model": { + if (!args?.name || !args?.source) { + throw new Error("name and source are required"); + } + + const model = await client.addModel( + String(args.name), + String(args.source), + args.parameters ? String(args.parameters) : undefined, + args.quantization ? String(args.quantization) : undefined + ); + + return { + content: [ + { + type: "text", + text: `Successfully added model!\n\nModel ID: ${model.id}\nName: ${model.name}\nSource: ${model.source}`, + }, + ], + }; + } + + case "delete_model": { + if (!args?.model_id) { + throw new Error("model_id is required"); + } + + await client.deleteModel(Number(args.model_id)); + + return { + content: [ + { + type: "text", + text: `Successfully deleted model ${args.model_id}`, + }, + ], + }; + } + + // ============== Deployment Tools ============== + case "deploy_model": { + if (!args?.model_id || !args?.worker_id) { + throw new Error("model_id and worker_id are required"); + } + + const result = await client.deployModel( + Number(args.model_id), + Number(args.worker_id), + args.gpu_ids as number[] | undefined + ); + + return { + content: [ + { + type: "text", + text: `Successfully started deployment!\n\nDeployment ID: ${result.id}\nStatus: ${result.status}\n\nThe model is being deployed. Use list_deployments to check status.`, + }, + ], + }; + } + + case "stop_deployment": { + if (!args?.deployment_id) { + throw new Error("deployment_id is required"); + } + + await client.stopDeployment(Number(args.deployment_id)); + + return { + content: [ + { + type: "text", + text: `Successfully stopped deployment ${args.deployment_id}`, + }, + ], + }; + } + + case "start_deployment": { + if (!args?.deployment_id) { + throw new Error("deployment_id is required"); + } + + await client.startDeployment(Number(args.deployment_id)); + + return { + content: [ + { + type: "text", + text: `Successfully started deployment ${args.deployment_id}`, + }, + ], + }; + } + + case "delete_deployment": { + if (!args?.deployment_id) { + throw new Error("deployment_id is required"); + } + + await client.deleteDeployment(Number(args.deployment_id)); + + return { + content: [ + { + type: "text", + text: `Successfully deleted deployment ${args.deployment_id}`, + }, + ], + }; + } + + // ============== Container Tools ============== + case "stop_container": { + if (!args?.container_name || !args?.worker_id) { + throw new Error("container_name and worker_id are required"); + } + + await client.stopContainer(String(args.container_name), Number(args.worker_id)); + + return { + content: [ + { + type: "text", + text: `Successfully stopped container "${args.container_name}"`, + }, + ], + }; + } + + case "remove_container": { + if (!args?.container_name || !args?.worker_id) { + throw new Error("container_name and worker_id are required"); + } + + await client.removeContainer( + String(args.container_name), + Number(args.worker_id), + args.force as boolean | undefined + ); + + return { + content: [ + { + type: "text", + text: `Successfully removed container "${args.container_name}"`, + }, + ], + }; + } + + // ============== API Key Tools ============== + case "list_api_keys": { + const apiKeysData = await client.getApiKeys(); + return { + content: [ + { + type: "text", + text: formatApiKeys(apiKeysData), + }, + ], + }; + } + + case "create_api_key": { + if (!args?.name) { + throw new Error("name is required"); + } + + const apiKey = await client.createApiKey( + String(args.name), + args.description ? String(args.description) : undefined, + args.expires_in_days ? Number(args.expires_in_days) : undefined + ); + + return { + content: [ + { + type: "text", + text: `Successfully created API key!\n\nID: ${apiKey.id}\nName: ${apiKey.name}\nAccess Key: ${apiKey.access_key}\nFull Key: ${apiKey.api_key}\n\n**IMPORTANT:** Save the full API key now! It will not be shown again.`, + }, + ], + }; + } + + case "delete_api_key": { + if (!args?.api_key_id) { + throw new Error("api_key_id is required"); + } + + await client.deleteApiKey(Number(args.api_key_id)); + + return { + content: [ + { + type: "text", + text: `Successfully deleted API key ${args.api_key_id}`, + }, + ], + }; + } + + // ============== Docker Image Tools ============== + case "list_images": { + const images = await client.getImages( + args?.worker_id ? Number(args.worker_id) : undefined, + args?.repository ? String(args.repository) : undefined + ); + return { + content: [ + { + type: "text", + text: formatImages(images), + }, + ], + }; + } + + case "pull_image": { + if (!args?.worker_id || !args?.image) { + throw new Error("worker_id and image are required"); + } + + const result = await client.pullImage(Number(args.worker_id), String(args.image)); + + return { + content: [ + { + type: "text", + text: `Successfully pulled image "${args.image}"\n\n${JSON.stringify(result, null, 2)}`, + }, + ], + }; + } + + case "delete_image": { + if (!args?.image_id || !args?.worker_id) { + throw new Error("image_id and worker_id are required"); + } + + await client.deleteImage( + String(args.image_id), + Number(args.worker_id), + args.force as boolean | undefined + ); + + return { + content: [ + { + type: "text", + text: `Successfully deleted image "${args.image_id}"`, + }, + ], + }; + } + + // ============== Storage Tools ============== + case "list_storage_volumes": { + const volumes = await client.getStorageVolumes( + args?.worker_id ? Number(args.worker_id) : undefined + ); + return { + content: [ + { + type: "text", + text: formatStorageVolumes(volumes), + }, + ], + }; + } + + case "get_disk_usage": { + const diskUsage = await client.getDiskUsage( + args?.worker_id ? Number(args.worker_id) : undefined + ); + return { + content: [ + { + type: "text", + text: formatDiskUsage(diskUsage), + }, + ], + }; + } + + case "delete_storage_volume": { + if (!args?.volume_name || !args?.worker_id) { + throw new Error("volume_name and worker_id are required"); + } + + await client.deleteStorageVolume( + String(args.volume_name), + Number(args.worker_id), + args.force as boolean | undefined + ); + + return { + content: [ + { + type: "text", + text: `Successfully deleted volume "${args.volume_name}"`, + }, + ], + }; + } + + case "prune_storage": { + const results = await client.pruneStorage( + args?.worker_id ? Number(args.worker_id) : undefined, + args?.images !== false, + args?.containers !== false, + args?.volumes === true, + args?.build_cache !== false + ); + + const formatSize = (bytes: number) => { + if (bytes >= 1024 * 1024 * 1024) { + return `${(bytes / 1024 / 1024 / 1024).toFixed(2)} GB`; + } + return `${(bytes / 1024 / 1024).toFixed(2)} MB`; + }; + + const lines = ["# Storage Pruned Successfully\n"]; + for (const r of results) { + lines.push(`## ${r.worker_name || "Worker"}`); + lines.push(`- Images deleted: ${r.images_deleted || 0}`); + lines.push(`- Containers deleted: ${r.containers_deleted || 0}`); + lines.push(`- Volumes deleted: ${r.volumes_deleted || 0}`); + lines.push(`- Build cache deleted: ${r.build_cache_deleted || 0}`); + lines.push(`- Space reclaimed: ${formatSize(r.space_reclaimed || 0)}`); + lines.push(""); + } + + return { + content: [ + { + type: "text", + text: lines.join("\n"), + }, + ], + }; + } + + default: + throw new Error(`Unknown tool: ${name}`); + } + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + return { + content: [ + { + type: "text", + text: `Error: ${message}`, + }, + ], + isError: true, + }; + } +}); + +/** + * Start the server + */ +async function main() { + const transport = new StdioServerTransport(); + await server.connect(transport); + console.error("LMStack MCP Server running on stdio"); + console.error(`API URL: ${LMSTACK_API_URL}`); +} + +main().catch((error) => { + console.error("Fatal error:", error); + process.exit(1); +}); diff --git a/mcp-server/tsconfig.json b/mcp-server/tsconfig.json new file mode 100644 index 0000000..e9005d0 --- /dev/null +++ b/mcp-server/tsconfig.json @@ -0,0 +1,16 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "NodeNext", + "moduleResolution": "NodeNext", + "outDir": "./dist", + "rootDir": "./src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "declaration": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +}