diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index facd18f..497b496 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -44,6 +44,9 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -79,7 +82,7 @@ jobs: labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha cache-to: type=gha,mode=max - platforms: linux/amd64 + platforms: linux/amd64,linux/arm64 # Notify on completion notify: diff --git a/backend/Dockerfile b/backend/Dockerfile index 323cd1e..9f995e0 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -21,9 +21,11 @@ WORKDIR /app # Install docker CLI for local worker spawn feature # Using Docker 27.x for API version 1.47 compatibility -RUN apt-get update && apt-get install -y --no-install-recommends \ - curl \ - && curl -fsSL https://download.docker.com/linux/static/stable/x86_64/docker-27.4.1.tgz | tar xz --strip-components=1 -C /usr/local/bin docker/docker \ +# Auto-detect architecture (x86_64 or aarch64) +RUN apt-get update && apt-get install -y --no-install-recommends curl \ + && ARCH=$(uname -m) \ + && if [ "$ARCH" = "aarch64" ] || [ "$ARCH" = "arm64" ]; then DOCKER_ARCH="aarch64"; else DOCKER_ARCH="x86_64"; fi \ + && curl -fsSL "https://download.docker.com/linux/static/stable/${DOCKER_ARCH}/docker-27.4.1.tgz" | tar xz --strip-components=1 -C /usr/local/bin docker/docker \ && rm -rf /var/lib/apt/lists/* # Copy installed packages from builder diff --git a/backend/app/api/workers.py b/backend/app/api/workers.py index ac448c4..8d61a2d 100644 --- a/backend/app/api/workers.py +++ b/backend/app/api/workers.py @@ -531,6 +531,9 @@ def _generate_docker_command(token: str, name: str, backend_url: str) -> str: Command is single-line for cross-platform compatibility (Linux/Mac/Windows). """ + from app.config import get_settings + + settings = get_settings() return ( f"docker run -d --name lmstack-worker --restart unless-stopped " f"--network host --gpus all --privileged " @@ -540,7 +543,7 @@ def _generate_docker_command(token: str, name: str, backend_url: str) -> str: f"-e BACKEND_URL={backend_url} " f"-e WORKER_NAME={name} " f"-e REGISTRATION_TOKEN={token} " - f"infinirc/lmstack-worker:latest" + f"{settings.worker_image}" ) diff --git a/backend/app/config.py b/backend/app/config.py index d573104..673898c 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -32,6 +32,7 @@ class Settings(BaseSettings): # Worker settings worker_heartbeat_interval: int = 10 # seconds between status checks worker_timeout: int = 30 # seconds to consider worker offline + worker_image: str = "infinirc/lmstack-worker:latest" # Docker image for local worker # vLLM defaults vllm_default_image: str = "vllm/vllm-openai:latest" diff --git a/backend/app/database.py b/backend/app/database.py index 2f7ea8f..e21c7ed 100644 --- a/backend/app/database.py +++ b/backend/app/database.py @@ -37,85 +37,98 @@ async def get_db() -> AsyncSession: await session.close() +def _get_column_type_sql(column) -> str: + """Convert SQLAlchemy column type to SQLite type string.""" + from sqlalchemy import JSON, Boolean, DateTime, Float, Integer, String, Text + + col_type = type(column.type) + + if col_type == Integer or "Integer" in str(col_type): + return "INTEGER" + elif col_type == String or "String" in str(col_type): + length = getattr(column.type, "length", None) + return f"VARCHAR({length})" if length else "VARCHAR(255)" + elif col_type == Text or "Text" in str(col_type): + return "TEXT" + elif col_type == Boolean or "Boolean" in str(col_type): + return "BOOLEAN" + elif col_type == Float or "Float" in str(col_type): + return "FLOAT" + elif col_type == DateTime or "DateTime" in str(col_type): + return "DATETIME" + elif col_type == JSON or "JSON" in str(col_type): + return "JSON" + else: + # Default fallback + return "TEXT" + + async def _run_migrations(conn): - """Run schema migrations for new columns (SQLite compatible).""" + """Auto-detect and add missing columns by comparing models with database schema.""" from sqlalchemy import text - async def column_exists(table_name: str, column_name: str) -> bool: - """Check if a column exists in a table.""" - result = await conn.execute(text(f"PRAGMA table_info({table_name})")) - columns = [row[1] for row in result.fetchall()] - return column_name in columns - - # Migration: Add container_name to deployments (for Windows Docker compatibility) - if not await column_exists("deployments", "container_name"): - logger.info("Adding 'container_name' column to deployments table...") - await conn.execute(text("ALTER TABLE deployments ADD COLUMN container_name VARCHAR(255)")) - logger.info("'container_name' column added!") - - # Migration: Add is_local to registration_tokens (for local worker detection) - if not await column_exists("registration_tokens", "is_local"): - logger.info("Adding 'is_local' column to registration_tokens table...") - await conn.execute( - text("ALTER TABLE registration_tokens ADD COLUMN is_local BOOLEAN DEFAULT 0") - ) - logger.info("'is_local' column added!") - - # Migration: Add conversation_type to conversations (for Agent chat support) - if not await column_exists("conversations", "conversation_type"): - logger.info("Adding 'conversation_type' column to conversations table...") - await conn.execute( - text( - "ALTER TABLE conversations ADD COLUMN conversation_type VARCHAR(20) DEFAULT 'chat' NOT NULL" - ) + async def get_table_columns(table_name: str) -> set[str]: + """Get all column names from a database table.""" + try: + result = await conn.execute(text(f"PRAGMA table_info({table_name})")) + return {row[1] for row in result.fetchall()} + except Exception: + return set() + + async def table_exists(table_name: str) -> bool: + """Check if a table exists in the database.""" + result = await conn.execute( + text("SELECT name FROM sqlite_master WHERE type='table' AND name=:name"), + {"name": table_name}, ) - logger.info("'conversation_type' column added!") - - # Migration: Add agent_config to conversations (for Agent configuration) - if not await column_exists("conversations", "agent_config"): - logger.info("Adding 'agent_config' column to conversations table...") - await conn.execute(text("ALTER TABLE conversations ADD COLUMN agent_config JSON")) - logger.info("'agent_config' column added!") - - # Migration: Add tool_calls to messages (for Agent tool calls) - if not await column_exists("messages", "tool_calls"): - logger.info("Adding 'tool_calls' column to messages table...") - await conn.execute(text("ALTER TABLE messages ADD COLUMN tool_calls JSON")) - logger.info("'tool_calls' column added!") - - # Migration: Add tool_call_id to messages (for Agent tool results) - if not await column_exists("messages", "tool_call_id"): - logger.info("Adding 'tool_call_id' column to messages table...") - await conn.execute(text("ALTER TABLE messages ADD COLUMN tool_call_id VARCHAR(100)")) - logger.info("'tool_call_id' column added!") - - # Migration: Add step_type to messages (for Agent execution steps) - if not await column_exists("messages", "step_type"): - logger.info("Adding 'step_type' column to messages table...") - await conn.execute(text("ALTER TABLE messages ADD COLUMN step_type VARCHAR(50)")) - logger.info("'step_type' column added!") - - # Migration: Add execution_time_ms to messages (for tool execution timing) - if not await column_exists("messages", "execution_time_ms"): - logger.info("Adding 'execution_time_ms' column to messages table...") - await conn.execute(text("ALTER TABLE messages ADD COLUMN execution_time_ms FLOAT")) - logger.info("'execution_time_ms' column added!") - - # Migration: Add tuning_config to tuning_jobs (for multi-framework testing) - if not await column_exists("tuning_jobs", "tuning_config"): - logger.info("Adding 'tuning_config' column to tuning_jobs table...") - await conn.execute(text("ALTER TABLE tuning_jobs ADD COLUMN tuning_config JSON")) - logger.info("'tuning_config' column added!") - - # Migration: Add conversation_id to tuning_jobs (for Agent Chat integration) - if not await column_exists("tuning_jobs", "conversation_id"): - logger.info("Adding 'conversation_id' column to tuning_jobs table...") - await conn.execute(text("ALTER TABLE tuning_jobs ADD COLUMN conversation_id INTEGER")) - logger.info("'conversation_id' column added!") + return result.fetchone() is not None + + # Iterate through all tables defined in models + for table_name, table in Base.metadata.tables.items(): + # Skip if table doesn't exist yet (will be created by create_all) + if not await table_exists(table_name): + continue + + # Get existing columns in database + existing_columns = await get_table_columns(table_name) + + # Check each column in the model + for column in table.columns: + if column.name not in existing_columns: + # Build ALTER TABLE statement + col_type = _get_column_type_sql(column) + + # Handle default values + default_clause = "" + if column.default is not None: + default_val = column.default.arg + if callable(default_val): + default_val = default_val(None) + if isinstance(default_val, str): + default_clause = f" DEFAULT '{default_val}'" + elif isinstance(default_val, bool): + default_clause = f" DEFAULT {1 if default_val else 0}" + elif default_val is not None: + default_clause = f" DEFAULT {default_val}" + + sql = ( + f"ALTER TABLE {table_name} ADD COLUMN {column.name} {col_type}{default_clause}" + ) + + logger.info(f"Auto-migration: Adding '{column.name}' column to {table_name}...") + try: + await conn.execute(text(sql)) + logger.info(f"Column '{column.name}' added to {table_name}!") + except Exception as e: + logger.warning(f"Failed to add column {column.name} to {table_name}: {e}") async def init_db(): """Initialize database tables and run migrations""" + # Import all models to register them with Base.metadata + # This ensures all tables are created by create_all() + import app.models # noqa: F401 + try: async with engine.begin() as conn: await conn.run_sync(Base.metadata.create_all) diff --git a/backend/app/services/deployment_sync.py b/backend/app/services/deployment_sync.py index 3e9e55d..bf38b3f 100644 --- a/backend/app/services/deployment_sync.py +++ b/backend/app/services/deployment_sync.py @@ -122,6 +122,69 @@ async def check_with_semaphore(deployment: Deployment): return stats + def _is_native_deployment(self, deployment: Deployment) -> bool: + """Check if this is a native Mac deployment (not Docker).""" + # Native deployments have container_id like "native-123" + if deployment.container_id and deployment.container_id.startswith("native-"): + return True + + # Mac-only backends are always native + native_only_backends = {"mlx", "llama_cpp"} + if deployment.backend in native_only_backends: + return True + + # For Ollama, check if worker is Mac + if deployment.backend == BackendType.OLLAMA.value: + if deployment.worker and deployment.worker.is_mac: + return True + + return False + + async def _check_native_deployment(self, deployment: Deployment) -> str: + """Check a native Mac deployment's API health. + + Native deployments run as processes, not Docker containers. + We can only check if the API endpoint is responding. + """ + try: + # For native deployments, if worker is offline, keep current status + # and let the health check loop retry later (worker may be reconnecting) + if deployment.worker.status != "online": + logger.info( + f"Native deployment {deployment.name}: worker offline, " + "keeping current status (may be reconnecting)" + ) + # Don't change status - worker might be in the process of reconnecting + return "skipped" + + # Check API health via worker + api_healthy = await self._check_api_health( + deployment.worker.address, + deployment.port, + deployment.backend, + None, # No container_name for native + ) + + if api_healthy: + if deployment.status != DeploymentStatus.RUNNING.value: + deployment.status = DeploymentStatus.RUNNING.value + deployment.status_message = "Model ready (native process verified)" + logger.info(f"Native deployment {deployment.name}: healthy") + return "running_verified" + else: + # Process might have died or not started yet + # Mark as STARTING instead of ERROR to allow retry + deployment.status = DeploymentStatus.STARTING.value + deployment.status_message = "Native process not responding. Waiting for recovery..." + logger.info(f"Native deployment {deployment.name}: API not responding, waiting...") + return "api_not_ready" + + except Exception as e: + logger.error(f"Error checking native deployment {deployment.name}: {e}") + deployment.status = DeploymentStatus.STARTING.value + deployment.status_message = f"Checking status: {e}" + return "api_not_ready" + async def _check_and_update_deployment(self, deployment: Deployment, db) -> str: """Check a single deployment and update its status. @@ -134,6 +197,10 @@ async def _check_and_update_deployment(self, deployment: Deployment, db) -> str: logger.warning(f"Deployment {deployment.id} has no worker, skipping") return "skipped" + # Check if this is a native deployment (Mac without Docker) + if self._is_native_deployment(deployment): + return await self._check_native_deployment(deployment) + if not deployment.container_id: # If deployment is still starting, skip it if deployment.status == DeploymentStatus.STARTING.value: diff --git a/backend/app/services/local_worker.py b/backend/app/services/local_worker.py index c6be866..75a2c63 100644 --- a/backend/app/services/local_worker.py +++ b/backend/app/services/local_worker.py @@ -205,12 +205,24 @@ def spawn_docker_worker( backend_url: str, registration_token: str, container_name: str = "lmstack-worker", + worker_image: str | None = None, ) -> dict: """Spawn a Docker worker container on the local machine. + Args: + worker_name: Name for the worker + backend_url: URL of the backend server + registration_token: Token for worker registration + container_name: Name for the Docker container + worker_image: Docker image to use (defaults to settings.worker_image) + Returns: dict with keys: success, message, container_id (if success) """ + from app.config import get_settings + + settings = get_settings() + image = worker_image or settings.worker_image # On Mac, ensure Ollama is running with external access before starting Docker if platform.system() == "Darwin": logger.info("Mac detected, ensuring Ollama is running with external access...") @@ -270,11 +282,11 @@ def spawn_docker_worker( f"WORKER_NAME={worker_name}", "-e", f"REGISTRATION_TOKEN={registration_token}", - "infinirc/lmstack-worker:latest", + image, ] try: - logger.info(f"Spawning Docker worker: {worker_name}") + logger.info(f"Spawning Docker worker: {worker_name} with image {image}") result = subprocess.run( cmd, capture_output=True, diff --git a/backend/pyproject.toml b/backend/pyproject.toml index b11ee91..6f3156e 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -19,6 +19,11 @@ dependencies = [ "httpx>=0.26.0", "docker>=7.0.0", "python-multipart>=0.0.6", + "python-jose[cryptography]>=3.3.0", + "email-validator>=2.0.0", + "psutil>=5.9.0", + "optuna>=3.5.0", + "openai>=1.0.0", ] [project.optional-dependencies] diff --git a/backend/requirements.txt b/backend/requirements.txt index 0729e40..001cf1e 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -11,3 +11,4 @@ python-jose[cryptography]>=3.3.0 email-validator>=2.0.0 psutil>=5.9.0 optuna>=3.5.0 +openai>=1.0.0 diff --git a/docker-compose.local.yml b/docker-compose.local.yml index 4423911..b44a7fe 100644 --- a/docker-compose.local.yml +++ b/docker-compose.local.yml @@ -5,13 +5,14 @@ # Run: docker compose -f docker-compose.local.yml up -d services: - server: + backend: image: infinirc/lmstack-backend:local container_name: lmstack-backend user: root - # Use bridge network for Windows compatibility (network_mode: host doesn't work on Windows) ports: - - "0.0.0.0:52000:52000" + - "52000:52000" + extra_hosts: + - "host.docker.internal:host-gateway" volumes: - lmstack-data:/app/data - /var/run/docker.sock:/var/run/docker.sock @@ -19,6 +20,12 @@ services: environment: - LMSTACK_SECRET_KEY=${SECRET_KEY:-dev-secret-key} - LMSTACK_EXTERNAL_URL=${EXTERNAL_URL:-} + healthcheck: + test: ["CMD", "python", "-c", "import httpx; r=httpx.get('http://127.0.0.1:52000/health'); exit(0 if r.status_code==200 else 1)"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s restart: unless-stopped networks: - lmstack @@ -27,12 +34,21 @@ services: image: infinirc/lmstack-frontend:local container_name: lmstack-frontend ports: - - "0.0.0.0:3000:80" + - "3000:80" + extra_hosts: + - "host.docker.internal:host-gateway" environment: - - BACKEND_HOST=server + - BACKEND_HOST=backend - NGINX_ENVSUBST_FILTER=BACKEND_HOST depends_on: - - server + backend: + condition: service_healthy + healthcheck: + test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://127.0.0.1:80/"] + interval: 10s + timeout: 5s + retries: 3 + start_period: 5s restart: unless-stopped networks: - lmstack diff --git a/frontend/package-lock.json b/frontend/package-lock.json index c2b2f48..ad6c288 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -9,6 +9,7 @@ "version": "0.1.0", "dependencies": { "@ant-design/icons": "^5.2.6", + "@monaco-editor/react": "^4.7.0", "antd": "^5.12.0", "axios": "^1.6.0", "dayjs": "^1.11.10", @@ -20,7 +21,8 @@ "react-syntax-highlighter": "^16.1.0", "recharts": "^3.6.0", "rehype-raw": "^7.0.0", - "remark-gfm": "^4.0.1" + "remark-gfm": "^4.0.1", + "yaml": "^2.8.2" }, "devDependencies": { "@testing-library/jest-dom": "^6.1.0", @@ -1252,6 +1254,29 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@monaco-editor/loader": { + "version": "1.7.0", + "resolved": "https://registry.npmjs.org/@monaco-editor/loader/-/loader-1.7.0.tgz", + "integrity": "sha512-gIwR1HrJrrx+vfyOhYmCZ0/JcWqG5kbfG7+d3f/C1LXk2EvzAbHSg3MQ5lO2sMlo9izoAZ04shohfKLVT6crVA==", + "license": "MIT", + "dependencies": { + "state-local": "^1.0.6" + } + }, + "node_modules/@monaco-editor/react": { + "version": "4.7.0", + "resolved": "https://registry.npmjs.org/@monaco-editor/react/-/react-4.7.0.tgz", + "integrity": "sha512-cyzXQCtO47ydzxpQtCGSQGOC8Gk3ZUeBXFAxD+CWXYFo5OqZyZUonFl0DwUlTyAfRHntBfw2p3w4s9R6oe1eCA==", + "license": "MIT", + "dependencies": { + "@monaco-editor/loader": "^1.5.0" + }, + "peerDependencies": { + "monaco-editor": ">= 0.25.0 < 1", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0", + "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" + } + }, "node_modules/@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", @@ -2195,6 +2220,14 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/trusted-types": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/@types/trusted-types/-/trusted-types-2.0.7.tgz", + "integrity": "sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==", + "license": "MIT", + "optional": true, + "peer": true + }, "node_modules/@types/unist": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz", @@ -3668,6 +3701,16 @@ "dev": true, "license": "MIT" }, + "node_modules/dompurify": { + "version": "3.2.7", + "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.2.7.tgz", + "integrity": "sha512-WhL/YuveyGXJaerVlMYGWhvQswa7myDG17P7Vu65EWC05o8vfeNbvNf4d/BOvH99+ZW+LlQsc1GDKMa1vNK6dw==", + "license": "(MPL-2.0 OR Apache-2.0)", + "peer": true, + "optionalDependencies": { + "@types/trusted-types": "^2.0.7" + } + }, "node_modules/dunder-proto": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", @@ -5674,6 +5717,19 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/marked": { + "version": "14.0.0", + "resolved": "https://registry.npmjs.org/marked/-/marked-14.0.0.tgz", + "integrity": "sha512-uIj4+faQ+MgHgwUW1l2PsPglZLOLOT1uErt06dAPtx2kjteLAkbsd/0FiYg/MGS+i7ZKLb7w2WClxHkzOOuryQ==", + "license": "MIT", + "peer": true, + "bin": { + "marked": "bin/marked.js" + }, + "engines": { + "node": ">= 18" + } + }, "node_modules/math-intrinsics": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", @@ -6646,6 +6702,17 @@ "dev": true, "license": "MIT" }, + "node_modules/monaco-editor": { + "version": "0.55.1", + "resolved": "https://registry.npmjs.org/monaco-editor/-/monaco-editor-0.55.1.tgz", + "integrity": "sha512-jz4x+TJNFHwHtwuV9vA9rMujcZRb0CEilTEwG2rRSpe/A7Jdkuj8xPKttCgOh+v/lkHy7HsZ64oj+q3xoAFl9A==", + "license": "MIT", + "peer": true, + "dependencies": { + "dompurify": "3.2.7", + "marked": "14.0.0" + } + }, "node_modules/mrmime": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/mrmime/-/mrmime-2.0.1.tgz", @@ -8510,6 +8577,12 @@ "dev": true, "license": "MIT" }, + "node_modules/state-local": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/state-local/-/state-local-1.0.7.tgz", + "integrity": "sha512-HTEHMNieakEnoe33shBYcZ7NX83ACUjCu8c40iOGEZsngj9zRnkqS9j1pqQPXwobB0ZcVTk27REb7COQ0UR59w==", + "license": "MIT" + }, "node_modules/std-env": { "version": "3.10.0", "resolved": "https://registry.npmjs.org/std-env/-/std-env-3.10.0.tgz", @@ -9502,6 +9575,21 @@ "dev": true, "license": "ISC" }, + "node_modules/yaml": { + "version": "2.8.2", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.2.tgz", + "integrity": "sha512-mplynKqc1C2hTVYxd0PU2xQAc22TI1vShAYGksCCfxbn/dFwnHTNi1bvYsBTkhdUNtGIf5xNOg938rrSSYvS9A==", + "license": "ISC", + "bin": { + "yaml": "bin.mjs" + }, + "engines": { + "node": ">= 14.6" + }, + "funding": { + "url": "https://github.com/sponsors/eemeli" + } + }, "node_modules/yocto-queue": { "version": "0.1.0", "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", diff --git a/frontend/package.json b/frontend/package.json index 964a24d..308a97b 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -14,6 +14,7 @@ }, "dependencies": { "@ant-design/icons": "^5.2.6", + "@monaco-editor/react": "^4.7.0", "antd": "^5.12.0", "axios": "^1.6.0", "dayjs": "^1.11.10", @@ -25,7 +26,8 @@ "react-syntax-highlighter": "^16.1.0", "recharts": "^3.6.0", "rehype-raw": "^7.0.0", - "remark-gfm": "^4.0.1" + "remark-gfm": "^4.0.1", + "yaml": "^2.8.2" }, "devDependencies": { "@testing-library/jest-dom": "^6.1.0", diff --git a/frontend/src/components/logos/index.tsx b/frontend/src/components/logos/index.tsx index 029b8b7..bf955ba 100644 --- a/frontend/src/components/logos/index.tsx +++ b/frontend/src/components/logos/index.tsx @@ -132,6 +132,21 @@ interface IconProps { className?: string; } +export function AppleIcon({ size = 14, style, className }: IconProps) { + return ( + + + + ); +} + export function DockerIcon({ size = 14, style, className }: IconProps) { return ( ( null, ); + // YAML editor state + const [showYamlPanel, setShowYamlPanel] = useState(true); // Show YAML panel on desktop + const [yamlContent, setYamlContent] = useState(""); + const [yamlError, setYamlError] = useState(null); + const [isYamlUserEditing, setIsYamlUserEditing] = useState(false); // Track if user is editing YAML + const yamlSyncTimeoutRef = useRef | null>(null); const { isMobile } = useResponsive(); const { isDark } = useAppTheme(); const { canEdit } = useAuth(); @@ -151,6 +166,141 @@ export default function Deployments() { const BACKEND_CONFIG = getBackendConfig(isDark); + // Check if backend is Docker-based (not native Mac) + const isDockerBackend = (backend: string, worker?: Worker | null) => { + // Native Mac backends + if (backend === "mlx" || backend === "llama_cpp") return false; + // Ollama on Mac is native + if (backend === "ollama" && worker?.os_type === "darwin") return false; + // vLLM on Mac (vLLM-Metal) is native + if (backend === "vllm" && worker?.os_type === "darwin") return false; + return true; + }; + + // Generate YAML from current form values + const generateYamlFromForm = useCallback(() => { + const values = form.getFieldsValue(); + // Use selectedModelId as fallback if form value not available yet + const modelId = values.model_id || selectedModelId; + const model = models.find((m) => m.id === modelId); + + if (!model) return ""; + + const config: DeploymentConfig = { + name: values.name || "deployment", + model_id: model.model_id, + model_name: model.name, + backend: selectedBackend, + worker_name: selectedWorker?.name, + gpu_indexes: + selectedGpuIndexes.length > 0 ? selectedGpuIndexes : undefined, + extra_params: values.extra_params || {}, + }; + + return generateDockerCompose(config); + }, [ + form, + models, + selectedModelId, + selectedBackend, + selectedWorker, + selectedGpuIndexes, + ]); + + // Watch form values for auto-sync to YAML + const formName = Form.useWatch("name", form); + const formModelId = Form.useWatch("model_id", form); + const formExtraParams = Form.useWatch("extra_params", form); + + // Auto-update YAML when form values change (if not user-editing YAML) + useEffect(() => { + if (!showYamlPanel) return; + if (isYamlUserEditing) return; // Don't overwrite user edits + if (!selectedModelId && !formModelId) return; + + const yaml = generateYamlFromForm(); + if (yaml && yaml !== yamlContent) { + setYamlContent(yaml); + } + }, [ + showYamlPanel, + isYamlUserEditing, + formName, + formModelId, + formExtraParams, + selectedModelId, + selectedBackend, + selectedWorker, + selectedGpuIndexes, + generateYamlFromForm, + yamlContent, + ]); + + // Handle YAML edit with debounced sync back to form + const handleYamlChange = useCallback( + (newYaml: string) => { + setYamlContent(newYaml); + setIsYamlUserEditing(true); + + // Validate YAML + const validation = validateDockerCompose(newYaml); + setYamlError(validation.valid ? null : validation.error || null); + + // Debounce sync to form + if (yamlSyncTimeoutRef.current) { + clearTimeout(yamlSyncTimeoutRef.current); + } + + if (validation.valid) { + yamlSyncTimeoutRef.current = setTimeout(() => { + const config = parseDockerCompose(newYaml); + if (config) { + // Update form fields that correspond to YAML values + if (config.name && config.name !== form.getFieldValue("name")) { + form.setFieldValue("name", config.name); + } + if (config.extra_params?.docker_image) { + form.setFieldValue( + ["extra_params", "docker_image"], + config.extra_params.docker_image, + ); + } + if (config.extra_params?.tensor_parallel_size !== undefined) { + form.setFieldValue( + ["extra_params", "tensor_parallel_size"], + config.extra_params.tensor_parallel_size, + ); + } + if (config.extra_params?.max_model_len !== undefined) { + form.setFieldValue( + ["extra_params", "max_model_len"], + config.extra_params.max_model_len, + ); + } + if (config.extra_params?.gpu_memory_utilization !== undefined) { + form.setFieldValue( + ["extra_params", "gpu_memory_utilization"], + config.extra_params.gpu_memory_utilization, + ); + } + } + // Reset editing flag after sync + setIsYamlUserEditing(false); + }, 500); // 500ms debounce + } + }, + [form], + ); + + // Cleanup timeout on unmount + useEffect(() => { + return () => { + if (yamlSyncTimeoutRef.current) { + clearTimeout(yamlSyncTimeoutRef.current); + } + }; + }, []); + const fetchDeployments = useCallback(async () => { try { const response = await deploymentsApi.list(); @@ -390,12 +540,22 @@ export default function Deployments() { display: "inline-flex", alignItems: "center", gap: 2, - background: "rgba(13, 148, 227, 0.1)", - border: "1px solid rgba(13, 148, 227, 0.3)", - color: "#0d94e3", + background: record.container_id.startsWith("native-") + ? "rgba(147, 147, 147, 0.1)" + : "rgba(13, 148, 227, 0.1)", + border: record.container_id.startsWith("native-") + ? "1px solid rgba(147, 147, 147, 0.3)" + : "1px solid rgba(13, 148, 227, 0.3)", + color: record.container_id.startsWith("native-") + ? "#666" + : "#0d94e3", }} > - + {record.container_id.startsWith("native-") ? ( + + ) : ( + + )} )} @@ -535,13 +695,28 @@ export default function Deployments() { display: "inline-flex", alignItems: "center", gap: 3, - background: "rgba(13, 148, 227, 0.1)", - border: "1px solid rgba(13, 148, 227, 0.3)", - color: "#0d94e3", + background: record.container_id.startsWith("native-") + ? "rgba(147, 147, 147, 0.1)" + : "rgba(13, 148, 227, 0.1)", + border: record.container_id.startsWith("native-") + ? "1px solid rgba(147, 147, 147, 0.3)" + : "1px solid rgba(13, 148, 227, 0.3)", + color: record.container_id.startsWith("native-") + ? "#666" + : "#0d94e3", }} > - - Docker + {record.container_id.startsWith("native-") ? ( + <> + + Native + + ) : ( + <> + + Docker + + )} )} @@ -769,7 +944,36 @@ export default function Deployments() { + New Deployment + {isDockerBackend(selectedBackend, selectedWorker) && !isMobile && ( + + )} + + } open={modalOpen} onCancel={() => { setModalOpen(false); @@ -777,509 +981,620 @@ export default function Deployments() { setSelectedWorkerId(null); setSelectedGpuIndexes([]); setSelectedBackend("vllm"); + setShowYamlPanel(true); + setYamlContent(""); + setYamlError(null); form.resetFields(); }} footer={null} - width={isMobile ? "100%" : 600} + width={ + isMobile + ? "100%" + : showYamlPanel && isDockerBackend(selectedBackend, selectedWorker) + ? 1100 + : 600 + } style={ isMobile ? { top: 20, maxWidth: "100%", margin: "0 8px" } : undefined } >
- - - - - - + + + + { - setSelectedWorkerId(value); - // Reset GPU selection when worker changes - setSelectedGpuIndexes([]); - form.setFieldValue("gpu_indexes", undefined); - // Check if current backend is available on the new worker - const newWorker = workers.find((w) => w.id === value); - const isMac = newWorker?.os_type === "darwin"; - const macBackends = ["ollama", "mlx", "llama_cpp", "vllm"]; - const linuxBackends = ["vllm", "sglang", "ollama"]; - const newAvailable = isMac ? macBackends : linuxBackends; - // Reset to first available backend if current is not available - if (!newAvailable.includes(selectedBackend)) { - const defaultBackend = isMac ? "vllm" : "vllm"; - setSelectedBackend( - defaultBackend as - | "vllm" - | "sglang" - | "ollama" - | "mlx" - | "llama_cpp", - ); - form.setFieldValue("backend", defaultBackend); - } - }} - options={workers.map((w) => ({ - label: ( - - - {w.name} ({w.address}) - - - {w.os_type === "darwin" && ( - macOS - )} - {w.gpu_info && w.gpu_info.length > 0 && ( - - {w.gpu_info.length} GPU - {w.gpu_info.length > 1 ? "s" : ""} - - )} - - - ), - value: w.id, - }))} - /> - + ), + value: m.id, + }; + })} + /> + - - { + setSelectedWorkerId(value); + // Reset GPU selection when worker changes + setSelectedGpuIndexes([]); + form.setFieldValue("gpu_indexes", undefined); + // Check if current backend is available on the new worker + const newWorker = workers.find((w) => w.id === value); + const isMac = newWorker?.os_type === "darwin"; + const macBackends = ["ollama", "mlx", "llama_cpp", "vllm"]; + const linuxBackends = ["vllm", "sglang", "ollama"]; + const newAvailable = isMac ? macBackends : linuxBackends; + // Reset to first available backend if current is not available + if (!newAvailable.includes(selectedBackend)) { + const defaultBackend = isMac ? "vllm" : "vllm"; + setSelectedBackend( + defaultBackend as + | "vllm" + | "sglang" + | "ollama" + | "mlx" + | "llama_cpp", + ); + form.setFieldValue("backend", defaultBackend); + } + }} + options={workers.map((w) => ({ + label: ( - {config.icon} + + {w.name} ({w.address}) + + + {w.os_type === "darwin" && ( + macOS + )} + {w.gpu_info && w.gpu_info.length > 0 && ( + + {w.gpu_info.length} GPU + {w.gpu_info.length > 1 ? "s" : ""} + + )} + - {label} - - ), - value: b, - }; - })} - /> - - - {/* macOS Ollama Warning - only show when Ollama backend is selected */} - {selectedWorker && - selectedWorker.os_type === "darwin" && - selectedBackend === "ollama" && - !selectedWorker.capabilities?.ollama && ( - -

- This Mac worker does not have Ollama installed. Please - install it first: -

-
-                      brew install ollama{"\n"}
-                      brew services start ollama
-                    
-

- After installation, the worker will detect Ollama on the - next heartbeat. -

- - } - type="error" - showIcon - style={{ marginBottom: 16 }} - /> - )} - - {/* macOS Ollama Not Running Warning - only show when Ollama backend is selected */} - {selectedWorker && - selectedWorker.os_type === "darwin" && - selectedBackend === "ollama" && - selectedWorker.capabilities?.ollama && - !selectedWorker.capabilities?.ollama_running && ( - -

- Ollama is installed but not running. Please start it: -

-
-                      brew services start ollama
-                    
- - } - type="warning" - showIcon - style={{ marginBottom: 16 }} - /> - )} - - {/* macOS Backend Info - show auto-install message */} - {selectedWorker && - selectedWorker.os_type === "darwin" && - selectedBackend === "vllm" && ( - - Uses Apple Silicon GPU acceleration. Will be automatically - installed on first deployment. - - } - type="info" - showIcon - style={{ marginBottom: 16 }} - /> - )} - {selectedWorker && - selectedWorker.os_type === "darwin" && - selectedBackend === "mlx" && ( - - Native Apple Silicon ML framework. Will be automatically - installed on first deployment. - - } - type="info" - showIcon - style={{ marginBottom: 16 }} - /> - )} - {selectedWorker && - selectedWorker.os_type === "darwin" && - selectedBackend === "llama_cpp" && ( - - High-performance inference with Metal acceleration. Will be - automatically installed via Homebrew on first deployment. - - } - type="info" - showIcon - style={{ marginBottom: 16 }} - /> - )} + ), + value: w.id, + }))} + /> + - {/* macOS Info */} - {selectedWorker && - selectedWorker.os_type === "darwin" && - selectedWorker.capabilities?.ollama_running && ( - -

- This worker supports native Apple Silicon backends: -

-
    -
  • - Ollama - Easiest, pull and run models - directly -
  • -
  • - MLX - Apple's ML framework, optimized - for Apple Silicon -
  • -
  • - llama.cpp - Cross-platform with Metal - acceleration -
  • -
-

- For MLX/llama.cpp, HuggingFace models will be - automatically converted if needed. -

- + - )} - - 0 - ? "Leave empty to use GPU 0" - : "No GPUs detected on this worker" - : "Select a worker first" - } - > - setSelectedBackend(value)} + options={availableBackends.map((b) => { + const config = BACKEND_CONFIG[b]; + // Show "vLLM-Metal" for vllm on Mac workers + const label = + b === "vllm" && selectedWorker?.os_type === "darwin" + ? "vLLM-Metal" + : config.label; + return { label: ( - - GPU {gpu.index}: {gpu.name} - - 0.5 - ? "green" - : "orange" - } - style={{ marginLeft: 8, fontSize: 11 }} + - {Math.round(gpu.memory_free / 1024 / 1024 / 1024)}GB - free - + {config.icon} + + {label} ), - value: gpu.index, - })) - : [{ label: GPU 0, value: 0 }] - } - /> - + value: b, + }; + })} + /> + - {/* Model Compatibility Check - Show when model is selected for vLLM/SGLang */} - {selectedModel && - selectedModel.source !== "ollama" && - !["mlx", "llama_cpp"].includes(selectedBackend) && ( - - )} + {/* macOS Ollama Warning - only show when Ollama backend is selected */} + {selectedWorker && + selectedWorker.os_type === "darwin" && + selectedBackend === "ollama" && + !selectedWorker.capabilities?.ollama && ( + +

+ This Mac worker does not have Ollama installed. Please + install it first: +

+
+                          brew install ollama{"\n"}
+                          brew services start ollama
+                        
+

+ After installation, the worker will detect Ollama on + the next heartbeat. +

+ + } + type="error" + showIcon + style={{ marginBottom: 16 }} + /> + )} - {/* Model Format Compatibility - Show for MLX/llama.cpp backends */} - {selectedModel && - selectedModel.source !== "ollama" && - ["mlx", "llama_cpp"].includes(selectedBackend) && ( - - )} + {/* macOS Ollama Not Running Warning - only show when Ollama backend is selected */} + {selectedWorker && + selectedWorker.os_type === "darwin" && + selectedBackend === "ollama" && + selectedWorker.capabilities?.ollama && + !selectedWorker.capabilities?.ollama_running && ( + +

+ Ollama is installed but not running. Please start it: +

+
+                          brew services start ollama
+                        
+ + } + type="warning" + showIcon + style={{ marginBottom: 16 }} + /> + )} + + {/* macOS Backend Info - show auto-install message */} + {selectedWorker && + selectedWorker.os_type === "darwin" && + selectedBackend === "vllm" && ( + + Uses Apple Silicon GPU acceleration. Will be + automatically installed on first deployment. + + } + type="info" + showIcon + style={{ marginBottom: 16 }} + /> + )} + {selectedWorker && + selectedWorker.os_type === "darwin" && + selectedBackend === "mlx" && ( + + Native Apple Silicon ML framework. Will be automatically + installed on first deployment. + + } + type="info" + showIcon + style={{ marginBottom: 16 }} + /> + )} + {selectedWorker && + selectedWorker.os_type === "darwin" && + selectedBackend === "llama_cpp" && ( + + High-performance inference with Metal acceleration. Will + be automatically installed via Homebrew on first + deployment. + + } + type="info" + showIcon + style={{ marginBottom: 16 }} + /> + )} + + {/* macOS Info */} + {selectedWorker && + selectedWorker.os_type === "darwin" && + selectedWorker.capabilities?.ollama_running && ( + +

+ This worker supports native Apple Silicon backends: +

+
    +
  • + Ollama - Easiest, pull and run + models directly +
  • +
  • + MLX - Apple's ML framework, + optimized for Apple Silicon +
  • +
  • + llama.cpp - Cross-platform with + Metal acceleration +
  • +
+

+ For MLX/llama.cpp, HuggingFace models will be + automatically converted if needed. +

+ + } + type="info" + showIcon + style={{ marginBottom: 16 }} + /> + )} - {/* Version Override - Show when model is selected (not for MLX/llama.cpp) */} - {selectedModelId && - !["mlx", "llama_cpp"].includes(selectedBackend) && ( 0 + ? "Leave empty to use GPU 0" + : "No GPUs detected on this worker" + : "Select a worker first" + } > ; + } + > + )[selectedBackend]?.versions || [] + ).map((v) => ({ + label: ( + + {v.version} + {v.recommended && ( + + Recommended + + )} + + ), + value: v.image, + }))} + /> + + )} + + {/* Advanced Parameters - Show when model is selected (not for MLX/llama.cpp) */} + {selectedModelId && + !["mlx", "llama_cpp"].includes(selectedBackend) && ( + + )} + + + {/* Right side: YAML Editor (desktop only, Docker backends only) */} + {showYamlPanel && + !isMobile && + isDockerBackend(selectedBackend, selectedWorker) && ( +
+
+ Docker Compose + {yamlError && ( + + {yamlError} + + )} +
+
+ handleYamlChange(value || "")} + onMount={() => setIsYamlUserEditing(false)} + options={{ + minimap: { enabled: false }, + fontSize: 13, + lineNumbers: "on", + scrollBeyondLastLine: false, + wordWrap: "on", + tabSize: 2, + automaticLayout: true, + padding: { top: 8, bottom: 8 }, + }} + /> +
+
+ Form ↔ YAML auto-sync. Edit either side. +
+
+ )} + - +