From 311a63624138ed5e74103ecb83692cc15ebc2199 Mon Sep 17 00:00:00 2001 From: Selcukatli Date: Wed, 24 Sep 2025 17:41:25 -0400 Subject: [PATCH 1/3] updates to cli and added rules --- CLAUDE.md | 118 +++++++++++++++ bin/fal-cli | 1 + cli.js | 2 +- rules/fal-cli-integration.md | 231 +++++++++++++++++++++++++++++ rules/fal-mcp-integration.md | 280 +++++++++++++++++++++++++++++++++++ show-model-schemas.js | 70 +++++++++ test-fal.js | 45 ++++++ 7 files changed, 746 insertions(+), 1 deletion(-) create mode 100644 CLAUDE.md create mode 120000 bin/fal-cli create mode 100644 rules/fal-cli-integration.md create mode 100644 rules/fal-mcp-integration.md create mode 100644 show-model-schemas.js create mode 100644 test-fal.js diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..378d9fa --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,118 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +FAL CLI is an unofficial command-line interface for FAL AI Models that provides both traditional CLI usage and MCP (Model Context Protocol) server functionality for AI assistants. It enables high-quality image generation using models like FLUX, Imagen, and Qwen Image. + +## Common Development Commands + +```bash +# Start the CLI application +npm start +npm run dev +node cli.js + +# Run specific CLI commands +npm run models # List available models +npm run generate # Start image generation +npm run optimize # Optimize prompts +npm run config # Configure settings + +# Start MCP server for AI assistants +npm run mcp-server # Start on default port 3001 +PORT=3002 npm run mcp-server # Custom port + +# Test CLI functionality +npm test # Shows help output +node cli.js --help # Command documentation +``` + +## Architecture & Code Structure + +### Dual Interface Architecture +The project implements two parallel interfaces that share core business logic: + +1. **CLI Interface** (`cli.js`): Traditional command-line interface with interactive prompts using Commander.js and Inquirer.js +2. **MCP Server** (`mcp-server.js`): Protocol-based server for AI assistant integration using Model Context Protocol SDK + +Both interfaces rely on shared core modules in the `core/` directory to avoid code duplication. + +### Core Module Architecture + +**`core/image-generator.js`**: Handles all image generation logic +- `generateSingleImage()`: Single image generation with FAL API +- `generateBatch()`: Batch processing with concurrency control +- `calculateCost()`: Cost estimation before generation + +**`core/prompt-optimizer.js`**: AI-powered prompt enhancement +- `optimizePrompt()`: Single prompt optimization using LLM +- `optimizeBatch()`: Batch prompt optimization with model-specific adjustments + +**`core/model-manager.js`**: Model configuration and discovery +- `loadModels()`: Loads model configurations from JSON files +- `getModelById()`: Retrieves specific model configuration +- `getFilteredModels()`: Filters models by criteria (cost, category) +- `getModelRecommendations()`: Suggests optimal models for use cases + +### Supporting Modules + +**`models-new.js`**: Model loader utilities +- Loads model configurations from `models/` directory +- Provides shared parameter definitions +- Handles model validation + +**`secure-storage.js`**: API key management +- AES-256-GCM encryption for stored keys +- Machine-specific encryption keys +- Cross-platform secure storage locations + +### Model Configuration System +Models are defined as JSON files in the `models/` directory with this structure: +- Model ID, pricing, and metadata +- Default parameters and constraints +- Supported aspect ratios and formats +- Model-specific optimization prompts + +## Key Implementation Details + +### API Key Management +- Primary: Environment variable `FAL_KEY` in `.env` file +- Secondary: Encrypted storage via `secure-storage.js` +- MCP Server: Can receive key via environment or config + +### Cost Control System +- $5 spending limit enforced in MCP server +- Cost calculation before any generation +- User confirmation required for high-cost operations + +### Generation Workflow +1. API key validation +2. Model selection and configuration +3. Optional prompt optimization +4. Cost estimation and user confirmation +5. Image generation with progress tracking +6. Result storage with unique filenames +7. Optional browser opening for results + +### File Naming Convention +Generated images use this pattern: +``` +{model-key}_{prompt-index}_{iteration}_{image-index}_{timestamp}_{random}.png +``` + +### Error Handling Strategy +- Comprehensive try-catch blocks throughout +- User-friendly error messages with suggestions +- Graceful degradation for missing features +- Validation at multiple levels (API key, model, parameters) + +## Important Notes + +- This is an **unofficial** CLI tool, not affiliated with FAL +- No test suite currently exists +- No linting or type checking commands configured +- Uses ES modules (type: "module" in package.json) +- Requires Node.js 18+ for ES module support +- MCP server imports from non-existent `core/` modules (need to be implemented or references updated) \ No newline at end of file diff --git a/bin/fal-cli b/bin/fal-cli new file mode 120000 index 0000000..954db12 --- /dev/null +++ b/bin/fal-cli @@ -0,0 +1 @@ +../lib/node_modules/fal-cli/cli.js \ No newline at end of file diff --git a/cli.js b/cli.js index 8709ece..5ed7c1b 100755 --- a/cli.js +++ b/cli.js @@ -1372,7 +1372,7 @@ const runDirectGeneration = async (options) => { imagesPerModel = 1; // Set output directory - outputDirectory = path.resolve(options.output); + outputDirectory = options.output ? path.resolve(options.output) : path.join(process.cwd(), 'generated-images'); // Generate await generateImages(); diff --git a/rules/fal-cli-integration.md b/rules/fal-cli-integration.md new file mode 100644 index 0000000..e1cd00a --- /dev/null +++ b/rules/fal-cli-integration.md @@ -0,0 +1,231 @@ +# FAL CLI Integration Rules for AI Agents + +**Tag this file when you need AI image generation in any project: `@fal-cli-integration.md`** + +## Overview +The FAL CLI is a globally installed command-line tool for generating high-quality AI images using FAL AI models. This document provides rules and guidelines for AI agents to use the FAL CLI in any project. + +## ⚠️ CRITICAL SECURITY RULES + +### API Key Protection +- **NEVER** include FAL API keys directly in code, commands, or outputs +- **NEVER** echo, print, or display the FAL_KEY environment variable +- **NEVER** commit API keys to version control +- **ALWAYS** assume the API key is already configured in the user's environment +- **ALWAYS** use the CLI without exposing credentials + +## Available Commands + +### Basic Image Generation +```bash +# Generate with specific model and prompt +fal-cli generate -p "your prompt here" -m "model-id" --no-optimize + +# Generate with prompt optimization +fal-cli generate -p "your prompt here" -m "model-id" + +# Generate with custom output directory +fal-cli generate -p "your prompt here" -m "model-id" -o ./output/path +``` + +### Model Discovery +```bash +# List all available models with pricing +fal-cli models + +# Get models in JSON format for parsing +fal-cli models --json + +# Filter models by category +fal-cli models -c "Ultra Quality" +``` + +### Prompt Optimization +```bash +# Optimize a prompt for better results +fal-cli optimize "your basic prompt" + +# Optimize for specific model +fal-cli optimize "your prompt" -m "flux-pro-ultra" +``` + +## Available Models & Costs + +| Model ID | Name | Cost | Best For | +|----------|------|------|----------| +| `flux-kontext-pro` | FLUX Pro Kontext | $0.04/img | Contextual understanding, cheapest option | +| `flux-kontext-max` | FLUX Pro Kontext Max | $0.08/img | Advanced context, complex scenes | +| `flux-pro-ultra` | FLUX Pro Ultra v1.1 | $0.06/img | Ultra-high quality, 2K-4MP resolution | +| `imagen4-ultra` | Imagen 4 Ultra | $0.06/img | Photorealistic images | +| `qwen-image` | Qwen Image | $0.05/img | Excellent text rendering in images | + +## Integration Patterns + +### For Web Projects +```bash +# Generate assets for web application +fal-cli generate -p "hero banner with abstract gradient" -m "flux-pro-ultra" -o ./public/images + +# Generate multiple icon variations +fal-cli generate -p "minimalist app icon" -m "qwen-image" -o ./src/assets/icons +``` + +### For Design Projects +```bash +# Generate design concepts +fal-cli generate -p "modern dashboard UI concept" -m "imagen4-ultra" -o ./designs/concepts + +# Generate background patterns +fal-cli generate -p "seamless geometric pattern" -m "flux-kontext-pro" -o ./assets/patterns +``` + +### For Content Creation +```bash +# Generate blog post images +fal-cli generate -p "illustration for tech blog about AI" -m "flux-pro-ultra" -o ./content/images + +# Generate social media content +fal-cli generate -p "instagram post about sustainability" -m "imagen4-ultra" -o ./social/instagram +``` + +## Output Structure +Generated images are saved with this structure: +``` +output_directory/ +└── gen_{timestamp}_{id}/ + └── {model}_{prompt#}_{iteration}_{image#}_{timestamp}_{random}.png +``` + +## Best Practices + +### 1. Model Selection +- Use `flux-kontext-pro` for testing and development (cheapest at $0.04) +- Use `flux-pro-ultra` or `imagen4-ultra` for production-quality images +- Use `qwen-image` when text needs to be rendered in the image + +### 2. Prompt Optimization +- Always use `--no-optimize` flag for testing to save API calls +- Use optimization for final production images +- Optimize prompts separately with `fal-cli optimize` to reuse + +### 3. Output Management +- Always specify output directory with `-o` flag +- Use project-relative paths for consistency +- Create dedicated directories for different image types + +### 4. Cost Management +- Check costs before bulk generation: `fal-cli models` +- Start with single images before batch generation +- Use cheaper models for prototyping + +## Error Handling + +### Common Issues & Solutions + +```bash +# If "Unauthorized" error occurs +# Check API key configuration: +fal-cli config --show + +# If "command not found" error +# Ensure global installation: +which fal-cli + +# If output directory error +# Use absolute or relative paths: +fal-cli generate -p "test" -m "flux-kontext-pro" -o ~/Desktop/test +``` + +## Automation Examples + +### Bash Script Integration +```bash +#!/bin/bash +# generate-assets.sh + +# Generate multiple images for a project +PROMPTS=( + "hero section background" + "feature icon set" + "testimonial avatars" +) + +for prompt in "${PROMPTS[@]}"; do + fal-cli generate -p "$prompt" -m "flux-kontext-pro" -o ./generated +done +``` + +### Node.js Integration +```javascript +// generate-images.js +import { exec } from 'child_process'; +import { promisify } from 'util'; + +const execAsync = promisify(exec); + +async function generateImage(prompt, model = 'flux-kontext-pro', outputDir = './images') { + try { + const command = `fal-cli generate -p "${prompt}" -m "${model}" -o ${outputDir} --no-optimize`; + const { stdout, stderr } = await execAsync(command); + console.log('Generated:', stdout); + return stdout; + } catch (error) { + console.error('Generation failed:', error); + throw error; + } +} + +// Usage +await generateImage('modern website hero image', 'flux-pro-ultra', './public/images'); +``` + +## DO's and DON'Ts + +### ✅ DO's +- DO use the CLI for generating project assets +- DO specify output directories explicitly +- DO check model costs before bulk generation +- DO use prompt optimization for production images +- DO handle errors gracefully in scripts + +### ❌ DON'Ts +- DON'T expose or log API keys +- DON'T hardcode API keys in scripts +- DON'T generate without checking costs first +- DON'T use expensive models for testing +- DON'T assume API key exists - check configuration first + +## Quick Reference + +```bash +# Check if FAL CLI is available +which fal-cli + +# Check configuration (without showing key) +fal-cli config --show + +# Generate cheap test image +fal-cli generate -p "test" -m "flux-kontext-pro" --no-optimize -o ./test + +# Generate production image with optimization +fal-cli generate -p "professional headshot" -m "imagen4-ultra" -o ./final + +# List models with costs +fal-cli models + +# Get help +fal-cli --help +fal-cli generate --help +``` + +## Notes for AI Agents + +When assisting users with FAL CLI: +1. Always protect API key security +2. Suggest cost-effective models for testing +3. Recommend appropriate output directories +4. Provide complete, working commands +5. Include error handling in scripts +6. Explain cost implications of choices + +Remember: The FAL CLI is a powerful tool for AI image generation. Use it responsibly and always prioritize security and cost-effectiveness. \ No newline at end of file diff --git a/rules/fal-mcp-integration.md b/rules/fal-mcp-integration.md new file mode 100644 index 0000000..9be8fb3 --- /dev/null +++ b/rules/fal-mcp-integration.md @@ -0,0 +1,280 @@ +# FAL MCP Server Integration Rules + +**Tag this file when you need FAL AI image generation through MCP: `@fal-mcp-integration.md`** + +## Overview +The FAL MCP Server provides Model Context Protocol integration for AI assistants to generate high-quality AI images using FAL AI models. + +## ⚠️ CRITICAL SECURITY RULES + +### API Key Protection +- **NEVER** include FAL API keys directly in configurations shown to users +- **NEVER** display or log the FAL_KEY in any output +- **ALWAYS** use environment variables or secure config files +- **ALWAYS** refer to the key as "your_fal_api_key" in examples + +## MCP Server Setup + +### Starting the Server Standalone +```bash +# Default port (3001) +npm run mcp-server + +# Custom port +PORT=3002 npm run mcp-server + +# From any directory (if globally installed) +cd /path/to/fal-cli && npm run mcp-server +``` + +## Integration Configurations + +### Claude Desktop Integration +Claude Desktop is the standalone desktop application. Configure in: +`~/Library/Application Support/Claude/claude_desktop_config.json` + +```json +{ + "mcpServers": { + "fal-cli": { + "command": "node", + "args": ["/Users/your-username/Documents/mcps/fal-cli/mcp-server.js"], + "env": { + "FAL_KEY": "your_fal_api_key" + } + } + } +} +``` + +### Claude Code Integration (CLI) +Claude Code is the CLI tool that can use MCP servers. According to the docs, it supports: +- Local stdio servers +- Remote SSE servers +- Remote HTTP servers + +For local stdio integration with Claude Code: +```bash +# Start the MCP server +cd /path/to/fal-cli +FAL_KEY="your_api_key" npm run mcp-server + +# The server will communicate via stdio with Claude Code +``` + +### Cursor Integration +Add to Cursor's MCP settings: +```json +{ + "mcpServers": { + "fal-cli": { + "command": "node", + "args": ["/path/to/fal-cli/mcp-server.js"], + "env": { + "FAL_KEY": "your_fal_api_key" + } + } + } +} +``` + +## Available MCP Tools + +### 1. `generate_image` +Generate a single image with any FAL AI model. + +**Parameters:** +- `model` (required): Model ID (e.g., "flux-kontext-pro") +- `prompt` (required): Text description +- `num_images`: Number of images (1-4) +- `aspect_ratio`: Image ratio ("16:9", "1:1", "9:16", etc.) +- `output_directory`: Where to save images + +**Usage in AI Assistant:** +``` +Generate a cyberpunk city scene using flux-pro-ultra model with 16:9 aspect ratio +``` + +### 2. `list_models` +Get all available models with pricing and capabilities. + +**Parameters:** +- `category` (optional): Filter by category +- `format` (optional): Output format + +**Usage in AI Assistant:** +``` +List all Ultra Quality models with their pricing +``` + +### 3. `optimize_prompt` +Enhance prompts using AI for better generation results. + +**Parameters:** +- `prompt` (required): Original prompt +- `model` (optional): Target model +- `style` (optional): Style preference + +**Usage in AI Assistant:** +``` +Optimize "a cat sitting" for photorealistic style +``` + +### 4. `batch_generate` +Generate multiple images with different prompts/models. + +**Parameters:** +- `tasks` (required): Array of generation tasks +- `output_directory`: Where to save images +- `optimize_prompts`: Whether to optimize first + +**Usage in AI Assistant:** +``` +Generate 3 different hero images using flux-kontext-pro +``` + +### 5. `calculate_cost` +Estimate costs before generation. + +**Parameters:** +- `tasks` (required): Array of tasks +- `include_optimization`: Include optimization costs + +**Usage in AI Assistant:** +``` +Calculate cost for 5 images with imagen4-ultra +``` + +### 6. `get_model_info` +Get detailed information about a specific model. + +**Parameters:** +- `model_id` (required): Model identifier +- `include_schema`: Include parameter schema + +**Usage in AI Assistant:** +``` +Get details about flux-pro-ultra including parameters +``` + +## Model Pricing Reference + +| Model | Cost/Image | Best For | +|-------|------------|----------| +| `flux-kontext-pro` | $0.04 | Testing, context understanding | +| `flux-kontext-max` | $0.08 | Advanced context, complex scenes | +| `flux-pro-ultra` | $0.06 | Ultra-high quality, 2K-4MP | +| `imagen4-ultra` | $0.06 | Photorealistic images | +| `qwen-image` | $0.05 | Text rendering in images | + +## Cost Control Features + +The MCP server includes: +- **$5 spending limit** per session by default +- Automatic cost calculation before generation +- Requires confirmation for high-cost operations +- Batch operations show total cost upfront + +## Testing MCP Server Connection + +### 1. Verify Server Starts +```bash +cd /path/to/fal-cli +npm run mcp-server +# Should show: "FAL CLI MCP Server running on stdio" +``` + +### 2. Check Available Tools +Once connected, the AI assistant should be able to: +- List available models +- Calculate costs +- Generate images +- Optimize prompts + +### 3. Test Generation +Ask the AI assistant: +``` +"Use the FAL MCP server to generate a simple test image with the cheapest model" +``` + +## Common Issues & Solutions + +### "Unauthorized" Error +- Check FAL_KEY environment variable is set +- Verify API key is valid at fal.ai +- Ensure key is passed to MCP server + +### "Model not found" Error +- Use `list_models` to see available models +- Check model ID spelling +- Use exact IDs like "flux-kontext-pro" + +### "Command not found" Error +- Verify full path to mcp-server.js +- Check Node.js is installed +- Ensure fal-cli dependencies are installed + +### Server Won't Start +```bash +# Debug steps +cd /path/to/fal-cli +npm install # Ensure dependencies installed +node --version # Check Node.js 18+ +FAL_KEY="your_key" node mcp-server.js # Test directly +``` + +## Best Practices + +### For Development +1. Use `flux-kontext-pro` (cheapest at $0.04) +2. Always check costs before generation +3. Test with single images first + +### For Production +1. Use `flux-pro-ultra` or `imagen4-ultra` +2. Optimize all prompts before generation +3. Set appropriate output directories + +### For Integration +1. Store API keys securely +2. Use absolute paths in configurations +3. Test connection before bulk operations + +## DO's and DON'Ts + +### ✅ DO's +- DO protect API keys in all configurations +- DO use cost calculation before bulk operations +- DO specify full paths to mcp-server.js +- DO test with cheap models first + +### ❌ DON'Ts +- DON'T expose API keys in examples or logs +- DON'T hardcode keys in configurations +- DON'T exceed spending limits carelessly +- DON'T use relative paths in configs + +## Quick Test Commands + +```bash +# Start server with debug output +FAL_KEY="your_key" node /path/to/fal-cli/mcp-server.js 2>&1 | tee mcp.log + +# Check if port is in use +lsof -i :3001 + +# Test API key validity +FAL_KEY="your_key" node -e "console.log('Key format valid')" +``` + +## Notes for AI Assistants + +When using FAL MCP Server: +1. Never expose actual API keys +2. Always suggest cheapest models for testing +3. Calculate costs before any generation +4. Provide clear error messages +5. Confirm high-cost operations +6. Use absolute paths in examples + +Remember: The MCP server bridges FAL AI's powerful image generation with AI assistants while maintaining security and cost control. \ No newline at end of file diff --git a/show-model-schemas.js b/show-model-schemas.js new file mode 100644 index 0000000..8a99ad9 --- /dev/null +++ b/show-model-schemas.js @@ -0,0 +1,70 @@ +#!/usr/bin/env node + +/** + * Display detailed schema information for all FAL models + */ + +import { getAllModels } from './models-new.js'; +import chalk from 'chalk'; + +async function showModelSchemas() { + const models = await getAllModels(); + + console.log(chalk.cyan.bold('\n📋 FAL Model Schemas & Parameters\n')); + console.log('=' .repeat(80)); + + for (const model of models) { + console.log(chalk.yellow.bold(`\n📦 ${model.name}`)); + console.log(chalk.gray(`ID: ${model.id}`)); + console.log(chalk.gray(`Category: ${model.category} | Cost: $${model.costPerImage}/image | Max: ${model.maxImages} images`)); + console.log(chalk.white(`Description: ${model.description}`)); + + console.log(chalk.green('\n📝 Parameters:')); + + // Default parameters + console.log(chalk.cyan(' Default Values:')); + if (model.defaultParams) { + Object.entries(model.defaultParams).forEach(([key, value]) => { + console.log(` • ${key}: ${JSON.stringify(value)}`); + }); + } + + // Supported aspect ratios + if (model.supportedAspectRatios) { + console.log(chalk.cyan('\n Supported Aspect Ratios:')); + console.log(` ${model.supportedAspectRatios.join(', ')}`); + } + + // Supported formats + if (model.supportedFormats) { + console.log(chalk.cyan('\n Supported Output Formats:')); + console.log(` ${model.supportedFormats.join(', ')}`); + } + + // Additional schema details + console.log(chalk.cyan('\n API Schema:')); + console.log(' Required parameters:'); + console.log(' • prompt (string): The text description of the image to generate'); + console.log(' Optional parameters:'); + console.log(' • num_images (integer): Number of images to generate (1-' + model.maxImages + ')'); + console.log(' • aspect_ratio (string): Image aspect ratio'); + + if (model.defaultParams?.guidance_scale !== undefined) { + console.log(' • guidance_scale (float): Controls adherence to prompt (default: ' + model.defaultParams.guidance_scale + ')'); + } + + if (model.defaultParams?.num_inference_steps !== undefined) { + console.log(' • num_inference_steps (integer): Quality/speed tradeoff (default: ' + model.defaultParams.num_inference_steps + ')'); + } + + console.log('\n' + '-'.repeat(80)); + } + + console.log(chalk.blue.bold('\n📚 Additional Information:')); + console.log('• All models accept a "prompt" parameter (required)'); + console.log('• Use --json flag with "fal-cli models" to get raw JSON data'); + console.log('• Model-specific optimizations are applied when using prompt optimization'); + console.log('• Each model has unique strengths - check descriptions for best use cases'); +} + +showModelSchemas().catch(console.error); \ No newline at end of file diff --git a/test-fal.js b/test-fal.js new file mode 100644 index 0000000..58e918d --- /dev/null +++ b/test-fal.js @@ -0,0 +1,45 @@ +import { fal } from '@fal-ai/client'; +import dotenv from 'dotenv'; + +dotenv.config(); + +async function testFalAPI() { + try { + console.log('Testing FAL API with key...'); + + // Configure FAL client with API key + const FAL_KEY = process.env.FAL_KEY; + if (!FAL_KEY) { + throw new Error('FAL_KEY not found in environment'); + } + + console.log('API Key found, attempting generation...'); + + const result = await fal.subscribe('fal-ai/flux-pro/v1.1', { + input: { + prompt: "a simple red circle on white background", + num_images: 1, + image_size: { + width: 512, + height: 512 + } + }, + credentials: FAL_KEY, + logs: true + }); + + console.log('Success! Result:', result); + + if (result.images && result.images.length > 0) { + console.log('Image URL:', result.images[0].url); + } + + } catch (error) { + console.error('Error:', error.message); + if (error.body) { + console.error('Error details:', error.body); + } + } +} + +testFalAPI(); From 432ddb9568cf4e4482c434a16c3bb26bd66b4736 Mon Sep 17 00:00:00 2001 From: Selcukatli Date: Thu, 25 Sep 2025 13:09:30 -0400 Subject: [PATCH 2/3] Add 8 new models and fix MCP server list_models response - Added 8 new FAL models: * Gemini 2.5 Flash (text-to-image and edit variants) * Nano Banana (text-to-image and edit variants) * FLUX Pro Kontext Edit * Gemini Flash Edit * Ideogram Character Edit * Qwen Image Edit Plus - Fixed MCP server list_models tool to return proper format * Wrapped response in content array with type/text structure * Now compatible with MCP protocol expectations - Enhanced image extraction for edit model responses * Added support for single image responses from edit models * Handles both 'image' and 'edited_image' response formats - Added schema fetcher utility (fetch-model-schemas.js) * Fetches model schemas directly from FAL OpenAPI endpoints * Automatically generates model configurations with proper parameters * Supports both text-to-image and edit models - Updated existing model configs with complete OpenAPI schemas * All models now have accurate parameter definitions * Includes types, descriptions, defaults, and constraints Author: Selcuk Atli Co-Authored-By: Claude --- core/image-generator.js | 24 ++ fetch-model-schemas.js | 346 ++++++++++++++++++++++++++++ mcp-server.js | 25 +- models/flux-kontext-max.json | 101 +++++++- models/flux-kontext-pro.json | 108 ++++++++- models/flux-pro-kontext-edit.json | 118 ++++++++++ models/flux-pro-ultra.json | 97 +++++++- models/gemini-25-flash-edit.json | 67 ++++++ models/gemini-25-flash-image.json | 57 +++++ models/gemini-flash-edit.json | 36 +++ models/ideogram-character-edit.json | 113 +++++++++ models/imagen4-ultra.json | 73 +++++- models/nano-banana-edit.json | 67 ++++++ models/nano-banana.json | 57 +++++ models/qwen-image-edit-plus.json | 121 ++++++++++ models/qwen-image.json | 113 ++++++++- 16 files changed, 1487 insertions(+), 36 deletions(-) create mode 100644 fetch-model-schemas.js create mode 100644 models/flux-pro-kontext-edit.json create mode 100644 models/gemini-25-flash-edit.json create mode 100644 models/gemini-25-flash-image.json create mode 100644 models/gemini-flash-edit.json create mode 100644 models/ideogram-character-edit.json create mode 100644 models/nano-banana-edit.json create mode 100644 models/nano-banana.json create mode 100644 models/qwen-image-edit-plus.json diff --git a/core/image-generator.js b/core/image-generator.js index 0d60a25..1ad200d 100644 --- a/core/image-generator.js +++ b/core/image-generator.js @@ -175,11 +175,35 @@ export function extractImageUrls(result) { } }); } else if (result.image && result.image.url) { + // Single image response (common for edit models) images.push(result.image.url); + } else if (result.image && typeof result.image === 'string') { + // Direct image URL string + images.push(result.image); + } else if (result.edited_image && result.edited_image.url) { + // Some edit models return edited_image + images.push(result.edited_image.url); + } else if (result.output && result.output.url) { + // Direct output URL + images.push(result.output.url); + } else if (result.output && result.output.image) { + // Output with nested image + if (result.output.image.url) { + images.push(result.output.image.url); + } else if (typeof result.output.image === 'string') { + images.push(result.output.image); + } } else if (result.data && result.data.images) { result.data.images.forEach(img => { if (img.url) images.push(img.url); }); + } else if (result.data && result.data.image) { + // Single image in data + if (result.data.image.url) { + images.push(result.data.image.url); + } else if (typeof result.data.image === 'string') { + images.push(result.data.image); + } } else if (result.output && result.output.images) { result.output.images.forEach(img => { if (img.url) images.push(img.url); diff --git a/fetch-model-schemas.js b/fetch-model-schemas.js new file mode 100644 index 0000000..01275d7 --- /dev/null +++ b/fetch-model-schemas.js @@ -0,0 +1,346 @@ +#!/usr/bin/env node + +/** + * Fetch FAL model schemas from OpenAPI and generate config files + * Uses FAL's OpenAPI endpoint to get accurate parameter definitions + */ + +import fs from 'fs-extra'; +import path from 'path'; +import { fileURLToPath } from 'url'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + +// Models to fetch schemas for +const MODELS_TO_FETCH = [ + // Original models that need schema updates + { + id: 'fal-ai/flux-pro/kontext/max/text-to-image', + name: 'FLUX Pro Kontext Max', + category: 'Professional', + costPerImage: 0.08, + description: 'Advanced FLUX model with maximum context understanding', + modelKey: 'flux-kontext-max' + }, + { + id: 'fal-ai/flux-pro/kontext/text-to-image', + name: 'FLUX Pro Kontext', + category: 'Professional', + costPerImage: 0.04, + description: 'Professional FLUX model with enhanced context understanding', + modelKey: 'flux-kontext-pro' + }, + { + id: 'fal-ai/flux-pro/v1.1-ultra', + name: 'FLUX Pro Ultra v1.1', + category: 'Ultra Quality', + costPerImage: 0.06, + description: 'Latest FLUX Pro model with ultra-high quality output', + modelKey: 'flux-pro-ultra' + }, + { + id: 'fal-ai/imagen4/preview/ultra', + name: 'Imagen 4 Ultra (Preview)', + category: 'Ultra Quality', + costPerImage: 0.06, + description: "Google's latest high-quality image generation model", + modelKey: 'imagen4-ultra' + }, + { + id: 'fal-ai/qwen-image', + name: 'Qwen Image', + category: 'General Purpose', + costPerImage: 0.05, + description: 'High-quality text-to-image model with excellent text rendering', + modelKey: 'qwen-image' + }, + // New models added + { + id: 'fal-ai/gemini-25-flash-image', + name: 'Gemini 2.5 Flash Image', + category: 'Advanced', + costPerImage: 0.03, + description: 'Google Gemini 2.5 Flash for fast image generation', + modelKey: 'gemini-25-flash-image' + }, + { + id: 'fal-ai/gemini-25-flash-image/edit', + name: 'Gemini 2.5 Flash Edit', + category: 'Image Editing', + costPerImage: 0.03, + description: 'Google Gemini 2.5 Flash for image editing', + isEditModel: true, + modelKey: 'gemini-25-flash-edit' + }, + { + id: 'fal-ai/gemini-flash-edit', + name: 'Gemini Flash Edit', + category: 'Image Editing', + costPerImage: 0.04, + description: 'Edit images using Google Gemini with natural language', + isEditModel: true, + modelKey: 'gemini-flash-edit' + }, + { + id: 'fal-ai/flux-pro/kontext', + name: 'FLUX Pro Kontext Edit', + category: 'Image Editing', + costPerImage: 0.05, + description: 'FLUX Pro model for image-to-image transformation', + isEditModel: true, + modelKey: 'flux-pro-kontext-edit' + }, + { + id: 'fal-ai/nano-banana', + name: 'Nano Banana', + category: 'Advanced', + costPerImage: 0.04, + description: "Google's state-of-the-art image generation model", + modelKey: 'nano-banana' + }, + { + id: 'fal-ai/nano-banana/edit', + name: 'Nano Banana Edit', + category: 'Image Editing', + costPerImage: 0.04, + description: "Google's state-of-the-art image editing model", + isEditModel: true, + modelKey: 'nano-banana-edit' + }, + { + id: 'fal-ai/ideogram/character/edit', + name: 'Ideogram Character Edit', + category: 'Image Editing', + costPerImage: 0.05, + description: 'Modify consistent characters while preserving core identity', + isEditModel: true, + modelKey: 'ideogram-character-edit' + }, + { + id: 'fal-ai/qwen-image-edit-plus', + name: 'Qwen Image Edit Plus', + category: 'Image Editing', + costPerImage: 0.04, + description: 'Advanced image editing with Qwen model', + isEditModel: true, + modelKey: 'qwen-image-edit-plus' + } +]; + +async function fetchModelSchema(modelId) { + const url = `https://fal.ai/api/openapi/queue/openapi.json?endpoint_id=${modelId}`; + + try { + const response = await fetch(url); + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${response.statusText}`); + } + return await response.json(); + } catch (error) { + console.error(`Failed to fetch schema for ${modelId}:`, error.message); + return null; + } +} + +function extractParametersFromSchema(openApiSchema, modelInfo) { + // Default structure + const config = { + id: modelInfo.id, + name: modelInfo.name, + description: modelInfo.description, + category: modelInfo.category, + costPerImage: modelInfo.costPerImage, + type: modelInfo.isEditModel ? 'image-edit' : 'text-to-image', + defaultParams: {}, + supportedAspectRatios: [], + supportedFormats: ['jpeg', 'png'], + maxImages: 4, + parameters: {} + }; + + try { + // Look for main endpoint path + const paths = openApiSchema.paths || {}; + const mainPath = `/fal-ai/${modelInfo.id.replace('fal-ai/', '')}`; + + let inputSchema = null; + + // Try to find the input schema + if (paths[mainPath]?.post?.requestBody?.content?.['application/json']?.schema) { + const schema = paths[mainPath].post.requestBody.content['application/json'].schema; + + // Handle $ref to components + if (schema.$ref) { + const refName = schema.$ref.split('/').pop(); + if (openApiSchema.components?.schemas?.[refName]) { + inputSchema = openApiSchema.components.schemas[refName]; + } + } else { + inputSchema = schema; + } + } + + // Fallback: look for input schema in components + if (!inputSchema && openApiSchema.components?.schemas) { + const inputSchemaName = Object.keys(openApiSchema.components.schemas).find(name => + name.toLowerCase().includes('input') || name.toLowerCase().includes('request') + ); + if (inputSchemaName) { + inputSchema = openApiSchema.components.schemas[inputSchemaName]; + } + } + + // Parse the schema if found + if (inputSchema) { + const properties = inputSchema.properties || {}; + const required = inputSchema.required || []; + + // Extract parameters + for (const [key, value] of Object.entries(properties)) { + config.parameters[key] = { + type: value.type, + required: required.includes(key), + description: value.description || '', + default: value.default, + enum: value.enum, + minimum: value.minimum, + maximum: value.maximum + }; + + // Set defaults + if (value.default !== undefined) { + config.defaultParams[key] = value.default; + } + + // Extract enums for aspect ratios + if (key === 'aspect_ratio' && value.enum) { + config.supportedAspectRatios = value.enum; + } + + // Extract supported formats + if (key === 'output_format' && value.enum) { + config.supportedFormats = value.enum; + } + + // Extract max images + if (key === 'num_images' && value.maximum) { + config.maxImages = value.maximum; + } + } + } + + // For edit models, ensure we have image_url parameter + if (modelInfo.isEditModel) { + config.parameters.image_url = { + type: 'string', + required: true, + description: 'URL of the image to edit' + }; + config.parameters.prompt = { + type: 'string', + required: true, + description: 'Description of the edit to make' + }; + } + + // Set reasonable defaults if not found + if (!config.defaultParams.num_images) { + config.defaultParams.num_images = 1; + } + + if (config.supportedAspectRatios.length === 0) { + config.supportedAspectRatios = ['1:1', '16:9', '9:16', '4:3', '3:4']; + } + + } catch (error) { + console.warn(`Warning: Could not fully parse schema for ${modelInfo.id}:`, error.message); + } + + return config; +} + +async function generateModelConfigs() { + console.log('🔍 Fetching model schemas from FAL OpenAPI...\n'); + + const modelsDir = path.join(__dirname, 'models'); + await fs.ensureDir(modelsDir); + + let successCount = 0; + let failCount = 0; + + for (const modelInfo of MODELS_TO_FETCH) { + console.log(`📥 Fetching schema for ${modelInfo.name}...`); + + const schema = await fetchModelSchema(modelInfo.id); + + if (schema) { + const config = extractParametersFromSchema(schema, modelInfo); + + // Generate filename from model ID (or use custom key) + const filename = (modelInfo.modelKey || modelInfo.id + .replace('fal-ai/', '') + .replace(/\//g, '-') + .replace(/[^a-z0-9-]/gi, '-') + .toLowerCase()) + '.json'; + + const filepath = path.join(modelsDir, filename); + + // Add optimization prompt based on model type + if (modelInfo.isEditModel) { + config.optimization_system_prompt = `You are optimizing edit instructions for ${modelInfo.name}. This model excels at precise image modifications while preserving the original image's structure and quality. Focus on: 1) Clear, specific edit instructions that describe exactly what to change. 2) Preserve elements that should remain unchanged by explicitly mentioning what to keep. 3) Use directional and positional language for spatial edits. 4) For style changes, describe the target style in detail. Example: 'change color to red' → 'Change the car's color from blue to bright cherry red while preserving all reflections, shadows, and environmental lighting'`; + } else { + config.optimization_system_prompt = `You are optimizing prompts for ${modelInfo.name}. Focus on detailed visual descriptions, artistic style, composition, and technical aspects that enhance image quality. Be specific about lighting, colors, textures, and spatial relationships.`; + } + + await fs.writeJson(filepath, config, { spaces: 2 }); + console.log(`✅ Created ${filename}`); + successCount++; + } else { + console.log(`❌ Failed to fetch schema for ${modelInfo.id}`); + + // Create basic config anyway + const filename = (modelInfo.modelKey || modelInfo.id + .replace('fal-ai/', '') + .replace(/\//g, '-') + .replace(/[^a-z0-9-]/gi, '-') + .toLowerCase()) + '.json'; + + const basicConfig = { + id: modelInfo.id, + name: modelInfo.name, + description: modelInfo.description, + category: modelInfo.category, + costPerImage: modelInfo.costPerImage, + type: modelInfo.isEditModel ? 'image-edit' : 'text-to-image', + defaultParams: { + num_images: 1 + }, + supportedAspectRatios: ['1:1', '16:9', '9:16', '4:3', '3:4'], + supportedFormats: ['jpeg', 'png'], + maxImages: modelInfo.isEditModel ? 1 : 4, + parameters: modelInfo.isEditModel ? { + prompt: { type: 'string', required: true }, + image_url: { type: 'string', required: true } + } : { + prompt: { type: 'string', required: true }, + num_images: { type: 'integer', required: false } + } + }; + + const filepath = path.join(modelsDir, filename); + await fs.writeJson(filepath, basicConfig, { spaces: 2 }); + console.log(`⚠️ Created basic config for ${filename} (schema fetch failed)`); + failCount++; + } + } + + console.log(`\n✨ Done! Created ${successCount} configs from schemas, ${failCount} basic configs`); + console.log(`📁 Model configs saved to: ${modelsDir}`); +} + +// Run if called directly +if (import.meta.url === `file://${process.argv[1]}`) { + generateModelConfigs().catch(console.error); +} + +export { fetchModelSchema, extractParametersFromSchema, generateModelConfigs }; \ No newline at end of file diff --git a/mcp-server.js b/mcp-server.js index ed9c9e3..6534b09 100644 --- a/mcp-server.js +++ b/mcp-server.js @@ -613,14 +613,16 @@ server.setRequestHandler(CallToolRequestSchema, handleErrors(async (request) => } case 'list_models': { - const { type, provider, max_cost, quality } = args; + const { type, provider, max_cost, quality } = args || {}; // Load models with enhanced filtering - let models = await getFilteredModels({ - type, - provider, - maxCost: max_cost - }); + // Only pass filters if they're actually provided + const filters = {}; + if (type) filters.type = type; + if (provider) filters.provider = provider; + if (max_cost) filters.maxCost = max_cost; + + let models = await getFilteredModels(filters); // Apply quality-based intelligent sorting if (quality) { @@ -638,7 +640,7 @@ server.setRequestHandler(CallToolRequestSchema, handleErrors(async (request) => } // Enhanced response with intelligent insights - return { + const response = { models: models.map(model => ({ id: model.id, name: model.name, @@ -668,6 +670,15 @@ server.setRequestHandler(CallToolRequestSchema, handleErrors(async (request) => ).slice(0, 3).map(m => m.id) } }; + + return { + content: [ + { + type: 'text', + text: JSON.stringify(response, null, 2) + } + ] + }; } case 'get_model_info': { diff --git a/models/flux-kontext-max.json b/models/flux-kontext-max.json index ba6c3cb..762d0c0 100644 --- a/models/flux-kontext-max.json +++ b/models/flux-kontext-max.json @@ -4,19 +4,25 @@ "description": "Advanced FLUX model with maximum context understanding", "category": "Professional", "costPerImage": 0.08, + "type": "text-to-image", "defaultParams": { - "aspect_ratio": "16:9", + "aspect_ratio": "1:1", "num_images": 1, + "output_format": "jpeg", + "sync_mode": false, + "safety_tolerance": "2", "guidance_scale": 3.5, - "num_inference_steps": 28 + "enhance_prompt": false }, "supportedAspectRatios": [ - "1:1", + "21:9", + "16:9", "4:3", + "3:2", + "1:1", + "2:3", "3:4", - "16:9", "9:16", - "21:9", "9:21" ], "supportedFormats": [ @@ -24,5 +30,86 @@ "png" ], "maxImages": 4, - "optimization_system_prompt": "You are FLUX-PROMPTSMITH, an elite prompt engineer for “FLUX Pro Kontext Max”. For every user request, output one single, richly detailed English prompt that: Embeds intricate, inter-element relationships spanning foreground, mid-ground, and background to form a coherent narrative scene. Uses sophisticated descriptive language to convey layered symbolism, temporal cues, nuanced emotional states, and atmospheric mood. Integrates cultural, historical, or metaphorical references the model can render with high fidelity. Describes clear cause-and-effect dynamics so each object’s presence influences the overall story composition. Preserves zero visual drift by specifying precise spatial arrangement, perspective, lighting, and color harmony. Fits within 80–150 words, flows as a single paragraph, and omits any text that could appear as captions or watermarks. Output absolutely nothing except this prompt—no explanations, no headings, no markdown fences, no metadata." -} \ No newline at end of file + "parameters": { + "prompt": { + "type": "string", + "required": true, + "description": "The prompt to generate an image from." + }, + "aspect_ratio": { + "type": "string", + "required": false, + "description": "The aspect ratio of the generated image.", + "default": "1:1", + "enum": [ + "21:9", + "16:9", + "4:3", + "3:2", + "1:1", + "2:3", + "3:4", + "9:16", + "9:21" + ] + }, + "num_images": { + "type": "integer", + "required": false, + "description": "The number of images to generate.", + "default": 1, + "minimum": 1, + "maximum": 4 + }, + "output_format": { + "type": "string", + "required": false, + "description": "The format of the generated image.", + "default": "jpeg", + "enum": [ + "jpeg", + "png" + ] + }, + "sync_mode": { + "type": "boolean", + "required": false, + "description": "\n If set to true, the function will wait for the image to be generated and uploaded\n before returning the response. This will increase the latency of the function but\n it allows you to get the image directly in the response without going through the CDN.\n ", + "default": false + }, + "safety_tolerance": { + "type": "string", + "required": false, + "description": "The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive.", + "default": "2", + "enum": [ + "1", + "2", + "3", + "4", + "5", + "6" + ] + }, + "guidance_scale": { + "type": "number", + "required": false, + "description": "\n The CFG (Classifier Free Guidance) scale is a measure of how close you want\n the model to stick to your prompt when looking for a related image to show you.\n ", + "default": 3.5, + "minimum": 1, + "maximum": 20 + }, + "seed": { + "type": "integer", + "required": false, + "description": "\n The same seed and the same prompt given to the same version of the model\n will output the same image every time.\n " + }, + "enhance_prompt": { + "type": "boolean", + "required": false, + "description": "Whether to enhance the prompt for better results.", + "default": false + } + }, + "optimization_system_prompt": "You are optimizing prompts for FLUX Pro Kontext Max. Focus on detailed visual descriptions, artistic style, composition, and technical aspects that enhance image quality. Be specific about lighting, colors, textures, and spatial relationships." +} diff --git a/models/flux-kontext-pro.json b/models/flux-kontext-pro.json index e371f79..6eeeeee 100644 --- a/models/flux-kontext-pro.json +++ b/models/flux-kontext-pro.json @@ -4,14 +4,112 @@ "description": "Professional FLUX model with enhanced context understanding", "category": "Professional", "costPerImage": 0.04, + "type": "text-to-image", "defaultParams": { - "aspect_ratio": "16:9", + "aspect_ratio": "1:1", "num_images": 1, + "output_format": "jpeg", + "sync_mode": false, + "safety_tolerance": "2", "guidance_scale": 3.5, - "num_inference_steps": 28 + "enhance_prompt": false }, - "supportedAspectRatios": ["1:1", "4:3", "3:4", "16:9", "9:16", "21:9", "9:21"], - "supportedFormats": ["jpeg", "png"], + "supportedAspectRatios": [ + "21:9", + "16:9", + "4:3", + "3:2", + "1:1", + "2:3", + "3:4", + "9:16", + "9:21" + ], + "supportedFormats": [ + "jpeg", + "png" + ], "maxImages": 4, - "optimization_system_prompt": "You are optimizing prompts for FLUX Pro Kontext, which excels at contextual coherence and maintaining consistency across compositional elements. This model interprets prompts through sophisticated contextual understanding, preserving character consistency and spatial relationships better than standard models. Focus on: 1) Structure prompts with clear contextual relationships between elements - the model excels when objects, people, and environments have logical connections. 2) Use detailed descriptive language for spatial arrangements and compositional balance - the model responds well to specific positioning and environmental context. 3) Emphasize character consistency and environmental coherence - describe how elements relate to each other within the scene. 4) Leverage the model's strength in maintaining visual consistency across complex scenes with multiple focal points. 5) Include contextual details that establish mood, atmosphere, and logical scene progression. 6) Structure prompts with layered descriptions that build context progressively. Example optimizations: 'office meeting' → 'boardroom scene with executives seated around polished conference table, each participant engaged in collaborative discussion, natural light streaming through tall windows illuminating documents spread across table surface, body language conveying attentive focus, modern architectural elements creating structured professional environment'" + "parameters": { + "prompt": { + "type": "string", + "required": true, + "description": "The prompt to generate an image from." + }, + "aspect_ratio": { + "type": "string", + "required": false, + "description": "The aspect ratio of the generated image.", + "default": "1:1", + "enum": [ + "21:9", + "16:9", + "4:3", + "3:2", + "1:1", + "2:3", + "3:4", + "9:16", + "9:21" + ] + }, + "num_images": { + "type": "integer", + "required": false, + "description": "The number of images to generate.", + "default": 1, + "minimum": 1, + "maximum": 4 + }, + "output_format": { + "type": "string", + "required": false, + "description": "The format of the generated image.", + "default": "jpeg", + "enum": [ + "jpeg", + "png" + ] + }, + "sync_mode": { + "type": "boolean", + "required": false, + "description": "\n If set to true, the function will wait for the image to be generated and uploaded\n before returning the response. This will increase the latency of the function but\n it allows you to get the image directly in the response without going through the CDN.\n ", + "default": false + }, + "safety_tolerance": { + "type": "string", + "required": false, + "description": "The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive.", + "default": "2", + "enum": [ + "1", + "2", + "3", + "4", + "5", + "6" + ] + }, + "guidance_scale": { + "type": "number", + "required": false, + "description": "\n The CFG (Classifier Free Guidance) scale is a measure of how close you want\n the model to stick to your prompt when looking for a related image to show you.\n ", + "default": 3.5, + "minimum": 1, + "maximum": 20 + }, + "seed": { + "type": "integer", + "required": false, + "description": "\n The same seed and the same prompt given to the same version of the model\n will output the same image every time.\n " + }, + "enhance_prompt": { + "type": "boolean", + "required": false, + "description": "Whether to enhance the prompt for better results.", + "default": false + } + }, + "optimization_system_prompt": "You are optimizing prompts for FLUX Pro Kontext. Focus on detailed visual descriptions, artistic style, composition, and technical aspects that enhance image quality. Be specific about lighting, colors, textures, and spatial relationships." } diff --git a/models/flux-pro-kontext-edit.json b/models/flux-pro-kontext-edit.json new file mode 100644 index 0000000..52c3c0e --- /dev/null +++ b/models/flux-pro-kontext-edit.json @@ -0,0 +1,118 @@ +{ + "id": "fal-ai/flux-pro/kontext", + "name": "FLUX Pro Kontext Edit", + "description": "FLUX Pro model for image-to-image transformation", + "category": "Image Editing", + "costPerImage": 0.05, + "type": "image-edit", + "defaultParams": { + "num_images": 1, + "output_format": "jpeg", + "sync_mode": false, + "safety_tolerance": "2", + "guidance_scale": 3.5, + "enhance_prompt": false + }, + "supportedAspectRatios": [ + "21:9", + "16:9", + "4:3", + "3:2", + "1:1", + "2:3", + "3:4", + "9:16", + "9:21" + ], + "supportedFormats": [ + "jpeg", + "png" + ], + "maxImages": 4, + "parameters": { + "prompt": { + "type": "string", + "required": true, + "description": "Description of the edit to make" + }, + "aspect_ratio": { + "type": "string", + "required": false, + "description": "The aspect ratio of the generated image.", + "enum": [ + "21:9", + "16:9", + "4:3", + "3:2", + "1:1", + "2:3", + "3:4", + "9:16", + "9:21" + ] + }, + "num_images": { + "type": "integer", + "required": false, + "description": "The number of images to generate.", + "default": 1, + "minimum": 1, + "maximum": 4 + }, + "output_format": { + "type": "string", + "required": false, + "description": "The format of the generated image.", + "default": "jpeg", + "enum": [ + "jpeg", + "png" + ] + }, + "image_url": { + "type": "string", + "required": true, + "description": "URL of the image to edit" + }, + "sync_mode": { + "type": "boolean", + "required": false, + "description": "\n If set to true, the function will wait for the image to be generated and uploaded\n before returning the response. This will increase the latency of the function but\n it allows you to get the image directly in the response without going through the CDN.\n ", + "default": false + }, + "safety_tolerance": { + "type": "string", + "required": false, + "description": "The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive.", + "default": "2", + "enum": [ + "1", + "2", + "3", + "4", + "5", + "6" + ] + }, + "guidance_scale": { + "type": "number", + "required": false, + "description": "\n The CFG (Classifier Free Guidance) scale is a measure of how close you want\n the model to stick to your prompt when looking for a related image to show you.\n ", + "default": 3.5, + "minimum": 1, + "maximum": 20 + }, + "seed": { + "type": "integer", + "required": false, + "description": "\n The same seed and the same prompt given to the same version of the model\n will output the same image every time.\n " + }, + "enhance_prompt": { + "type": "boolean", + "required": false, + "description": "Whether to enhance the prompt for better results.", + "default": false + } + }, + "optimization_system_prompt": "You are optimizing edit instructions for FLUX Pro Kontext Edit. This model excels at precise image modifications while preserving the original image's structure and quality. Focus on: 1) Clear, specific edit instructions that describe exactly what to change. 2) Preserve elements that should remain unchanged by explicitly mentioning what to keep. 3) Use directional and positional language for spatial edits. 4) For style changes, describe the target style in detail. Example: 'change color to red' → 'Change the car's color from blue to bright cherry red while preserving all reflections, shadows, and environmental lighting'" +} diff --git a/models/flux-pro-ultra.json b/models/flux-pro-ultra.json index 58817cc..edf1afb 100644 --- a/models/flux-pro-ultra.json +++ b/models/flux-pro-ultra.json @@ -4,14 +4,101 @@ "description": "Latest FLUX Pro model with ultra-high quality output", "category": "Ultra Quality", "costPerImage": 0.06, + "type": "text-to-image", "defaultParams": { "aspect_ratio": "16:9", + "enhance_prompt": false, "num_images": 1, - "guidance_scale": 3.5, - "num_inference_steps": 28 + "output_format": "jpeg", + "sync_mode": false, + "safety_tolerance": "2", + "enable_safety_checker": true, + "raw": false }, - "supportedAspectRatios": ["1:1", "4:3", "3:4", "16:9", "9:16", "21:9", "9:21"], - "supportedFormats": ["jpeg", "png"], + "supportedAspectRatios": [ + "1:1", + "16:9", + "9:16", + "4:3", + "3:4" + ], + "supportedFormats": [ + "jpeg", + "png" + ], "maxImages": 4, - "optimization_system_prompt": "You are optimizing prompts for FLUX Pro Ultra v1.1, which excels at ultra-high resolution detail generation (2K-4MP) with exceptional material texture fidelity and micro-detail precision. This model interprets prompts with focus on surface textures, material properties, and fine detail resolution. Focus on: 1) Emphasize specific material descriptions and surface texture details - the model excels at rendering complex material properties like metal reflections, fabric weaves, and organic textures. 2) Use precise descriptive language for micro-details and surface characteristics - describe how light interacts with different materials and surfaces. 3) Structure prompts with detailed texture hierarchies - from macro composition down to microscopic surface details that the model can render at high resolution. 4) Leverage the model's strength in material authenticity - specify exact material properties, surface treatments, and texture variations. 5) Include detailed lighting descriptions that reveal surface characteristics - the model responds well to specific lighting angles and reflection patterns. 6) Build prompts with layered detail descriptions from overall form to intricate surface textures. Example optimizations: 'elegant jewelry' → 'platinum ring featuring precisely cut diamond with crisp facet edges, hand-engraved filigree patterns creating intricate surface textures, polished metal surfaces reflecting ambient light with mirror-like clarity, fine surface scratches and tool marks revealing handcrafted authenticity, crystal-clear gemstone displaying internal light refraction and fire dispersion patterns'" + "parameters": { + "prompt": { + "type": "string", + "required": true, + "description": "The prompt to generate an image from." + }, + "aspect_ratio": { + "required": false, + "description": "The aspect ratio of the generated image.", + "default": "16:9" + }, + "enhance_prompt": { + "type": "boolean", + "required": false, + "description": "Whether to enhance the prompt for better results.", + "default": false + }, + "num_images": { + "type": "integer", + "required": false, + "description": "The number of images to generate.", + "default": 1, + "minimum": 1, + "maximum": 4 + }, + "output_format": { + "type": "string", + "required": false, + "description": "The format of the generated image.", + "default": "jpeg", + "enum": [ + "jpeg", + "png" + ] + }, + "sync_mode": { + "type": "boolean", + "required": false, + "description": "\n If set to true, the function will wait for the image to be generated and uploaded\n before returning the response. This will increase the latency of the function but\n it allows you to get the image directly in the response without going through the CDN.\n ", + "default": false + }, + "safety_tolerance": { + "type": "string", + "required": false, + "description": "The safety tolerance level for the generated image. 1 being the most strict and 5 being the most permissive.", + "default": "2", + "enum": [ + "1", + "2", + "3", + "4", + "5", + "6" + ] + }, + "enable_safety_checker": { + "type": "boolean", + "required": false, + "description": "If set to true, the safety checker will be enabled.", + "default": true + }, + "seed": { + "type": "integer", + "required": false, + "description": "\n The same seed and the same prompt given to the same version of the model\n will output the same image every time.\n " + }, + "raw": { + "type": "boolean", + "required": false, + "description": "Generate less processed, more natural-looking images.", + "default": false + } + }, + "optimization_system_prompt": "You are optimizing prompts for FLUX Pro Ultra v1.1. Focus on detailed visual descriptions, artistic style, composition, and technical aspects that enhance image quality. Be specific about lighting, colors, textures, and spatial relationships." } diff --git a/models/gemini-25-flash-edit.json b/models/gemini-25-flash-edit.json new file mode 100644 index 0000000..2251be5 --- /dev/null +++ b/models/gemini-25-flash-edit.json @@ -0,0 +1,67 @@ +{ + "id": "fal-ai/gemini-25-flash-image/edit", + "name": "Gemini 2.5 Flash Edit", + "description": "Google Gemini 2.5 Flash for image editing", + "category": "Image Editing", + "costPerImage": 0.03, + "type": "image-edit", + "defaultParams": { + "num_images": 1, + "sync_mode": false, + "output_format": "jpeg" + }, + "supportedAspectRatios": [ + "1:1", + "16:9", + "9:16", + "4:3", + "3:4" + ], + "supportedFormats": [ + "jpeg", + "png" + ], + "maxImages": 4, + "parameters": { + "prompt": { + "type": "string", + "required": true, + "description": "Description of the edit to make" + }, + "num_images": { + "type": "integer", + "required": false, + "description": "Number of images to generate", + "default": 1, + "minimum": 1, + "maximum": 4 + }, + "sync_mode": { + "type": "boolean", + "required": false, + "description": "When true, images will be returned as data URIs instead of URLs.", + "default": false + }, + "image_urls": { + "type": "array", + "required": true, + "description": "List of URLs of input images for editing." + }, + "output_format": { + "type": "string", + "required": false, + "description": "Output format for the images", + "default": "jpeg", + "enum": [ + "jpeg", + "png" + ] + }, + "image_url": { + "type": "string", + "required": true, + "description": "URL of the image to edit" + } + }, + "optimization_system_prompt": "You are optimizing edit instructions for Gemini 2.5 Flash Edit. This model excels at precise image modifications while preserving the original image's structure and quality. Focus on: 1) Clear, specific edit instructions that describe exactly what to change. 2) Preserve elements that should remain unchanged by explicitly mentioning what to keep. 3) Use directional and positional language for spatial edits. 4) For style changes, describe the target style in detail. Example: 'change color to red' → 'Change the car's color from blue to bright cherry red while preserving all reflections, shadows, and environmental lighting'" +} diff --git a/models/gemini-25-flash-image.json b/models/gemini-25-flash-image.json new file mode 100644 index 0000000..4e073d9 --- /dev/null +++ b/models/gemini-25-flash-image.json @@ -0,0 +1,57 @@ +{ + "id": "fal-ai/gemini-25-flash-image", + "name": "Gemini 2.5 Flash Image", + "description": "Google Gemini 2.5 Flash for fast image generation", + "category": "Advanced", + "costPerImage": 0.03, + "type": "text-to-image", + "defaultParams": { + "num_images": 1, + "sync_mode": false, + "output_format": "jpeg" + }, + "supportedAspectRatios": [ + "1:1", + "16:9", + "9:16", + "4:3", + "3:4" + ], + "supportedFormats": [ + "jpeg", + "png" + ], + "maxImages": 4, + "parameters": { + "prompt": { + "type": "string", + "required": true, + "description": "The prompt for image generation" + }, + "num_images": { + "type": "integer", + "required": false, + "description": "Number of images to generate", + "default": 1, + "minimum": 1, + "maximum": 4 + }, + "sync_mode": { + "type": "boolean", + "required": false, + "description": "When true, images will be returned as data URIs instead of URLs.", + "default": false + }, + "output_format": { + "type": "string", + "required": false, + "description": "Output format for the images", + "default": "jpeg", + "enum": [ + "jpeg", + "png" + ] + } + }, + "optimization_system_prompt": "You are optimizing prompts for Gemini 2.5 Flash Image. Focus on detailed visual descriptions, artistic style, composition, and technical aspects that enhance image quality. Be specific about lighting, colors, textures, and spatial relationships." +} diff --git a/models/gemini-flash-edit.json b/models/gemini-flash-edit.json new file mode 100644 index 0000000..10d9ec0 --- /dev/null +++ b/models/gemini-flash-edit.json @@ -0,0 +1,36 @@ +{ + "id": "fal-ai/gemini-flash-edit", + "name": "Gemini Flash Edit", + "description": "Edit images using Google Gemini with natural language", + "category": "Image Editing", + "costPerImage": 0.04, + "type": "image-edit", + "defaultParams": { + "num_images": 1 + }, + "supportedAspectRatios": [ + "1:1", + "16:9", + "9:16", + "4:3", + "3:4" + ], + "supportedFormats": [ + "jpeg", + "png" + ], + "maxImages": 4, + "parameters": { + "prompt": { + "type": "string", + "required": true, + "description": "Description of the edit to make" + }, + "image_url": { + "type": "string", + "required": true, + "description": "URL of the image to edit" + } + }, + "optimization_system_prompt": "You are optimizing edit instructions for Gemini Flash Edit. This model excels at precise image modifications while preserving the original image's structure and quality. Focus on: 1) Clear, specific edit instructions that describe exactly what to change. 2) Preserve elements that should remain unchanged by explicitly mentioning what to keep. 3) Use directional and positional language for spatial edits. 4) For style changes, describe the target style in detail. Example: 'change color to red' → 'Change the car's color from blue to bright cherry red while preserving all reflections, shadows, and environmental lighting'" +} diff --git a/models/ideogram-character-edit.json b/models/ideogram-character-edit.json new file mode 100644 index 0000000..7ed5f16 --- /dev/null +++ b/models/ideogram-character-edit.json @@ -0,0 +1,113 @@ +{ + "id": "fal-ai/ideogram/character/edit", + "name": "Ideogram Character Edit", + "description": "Modify consistent characters while preserving core identity", + "category": "Image Editing", + "costPerImage": 0.05, + "type": "image-edit", + "defaultParams": { + "style": "AUTO", + "expand_prompt": true, + "rendering_speed": "BALANCED", + "num_images": 1, + "sync_mode": false + }, + "supportedAspectRatios": [ + "1:1", + "16:9", + "9:16", + "4:3", + "3:4" + ], + "supportedFormats": [ + "jpeg", + "png" + ], + "maxImages": 8, + "parameters": { + "prompt": { + "type": "string", + "required": true, + "description": "Description of the edit to make" + }, + "style": { + "type": "string", + "required": false, + "description": "The style type to generate with. Cannot be used with style_codes.", + "default": "AUTO", + "enum": [ + "AUTO", + "REALISTIC", + "FICTION" + ] + }, + "expand_prompt": { + "type": "boolean", + "required": false, + "description": "Determine if MagicPrompt should be used in generating the request or not.", + "default": true + }, + "rendering_speed": { + "type": "string", + "required": false, + "description": "The rendering speed to use.", + "default": "BALANCED", + "enum": [ + "TURBO", + "BALANCED", + "QUALITY" + ] + }, + "reference_mask_urls": { + "type": "array", + "required": false, + "description": "A set of masks to apply to the character references. Currently only 1 mask is supported, rest will be ignored. (maximum total size 10MB across all character references). The masks should be in JPEG, PNG or WebP format" + }, + "reference_image_urls": { + "type": "array", + "required": true, + "description": "A set of images to use as character references. Currently only 1 image is supported, rest will be ignored. (maximum total size 10MB across all character references). The images should be in JPEG, PNG or WebP format" + }, + "image_urls": { + "required": false, + "description": "A set of images to use as style references (maximum total size 10MB across all style references). The images should be in JPEG, PNG or WebP format" + }, + "num_images": { + "type": "integer", + "required": false, + "description": "Number of images to generate.", + "default": 1, + "minimum": 1, + "maximum": 8 + }, + "image_url": { + "type": "string", + "required": true, + "description": "URL of the image to edit" + }, + "style_codes": { + "required": false, + "description": "A list of 8 character hexadecimal codes representing the style of the image. Cannot be used in conjunction with style_reference_images or style" + }, + "color_palette": { + "required": false, + "description": "A color palette for generation, must EITHER be specified via one of the presets (name) or explicitly via hexadecimal representations of the color with optional weights (members)" + }, + "sync_mode": { + "type": "boolean", + "required": false, + "description": "\n If set to true, the function will wait for the image to be generated and uploaded\n before returning the response. This will increase the latency of the function but\n it allows you to get the image directly in the response without going through the CDN.\n ", + "default": false + }, + "seed": { + "required": false, + "description": "Seed for the random number generator" + }, + "mask_url": { + "type": "string", + "required": true, + "description": "The mask URL to inpaint the image. MUST have the exact same dimensions (width and height) as the input image." + } + }, + "optimization_system_prompt": "You are optimizing edit instructions for Ideogram Character Edit. This model excels at precise image modifications while preserving the original image's structure and quality. Focus on: 1) Clear, specific edit instructions that describe exactly what to change. 2) Preserve elements that should remain unchanged by explicitly mentioning what to keep. 3) Use directional and positional language for spatial edits. 4) For style changes, describe the target style in detail. Example: 'change color to red' → 'Change the car's color from blue to bright cherry red while preserving all reflections, shadows, and environmental lighting'" +} diff --git a/models/imagen4-ultra.json b/models/imagen4-ultra.json index a05c53a..b8e7eb1 100644 --- a/models/imagen4-ultra.json +++ b/models/imagen4-ultra.json @@ -4,12 +4,73 @@ "description": "Google's latest high-quality image generation model", "category": "Ultra Quality", "costPerImage": 0.06, + "type": "text-to-image", "defaultParams": { - "aspect_ratio": "16:9", - "num_images": 1 + "aspect_ratio": "1:1", + "num_images": 1, + "resolution": "1K", + "negative_prompt": "" }, - "supportedAspectRatios": ["1:1", "4:3", "3:4", "16:9", "9:16", "21:9", "9:21"], - "supportedFormats": ["jpeg", "png"], - "maxImages": 4, - "optimization_system_prompt": "You are optimizing prompts for Imagen 4 Ultra, which excels at precise prompt following and photorealistic detail generation with exceptional instruction alignment. This model interprets prompts with high fidelity to specific technical parameters and photographic characteristics. Focus on: 1) Use precise photographic terminology and specific technical parameters - the model responds exceptionally well to detailed camera settings, lighting specifications, and optical characteristics. 2) Structure prompts with explicit technical details about depth of field, focal length, aperture settings, and lighting conditions - the model translates these into accurate visual representations. 3) Emphasize realistic material properties and authentic surface textures - describe how different materials interact with light and environmental conditions. 4) Leverage the model's strength in accurate prompt interpretation by being specific about atmospheric conditions, weather effects, and environmental lighting. 5) Include detailed descriptions of optical phenomena like bokeh patterns, light refraction, and shadow characteristics - the model excels at rendering these accurately. 6) Build prompts with layered technical specifications that the model can interpret with high precision. Example optimizations: 'mountain landscape' → 'alpine mountain range captured with 35mm focal length at f/8 aperture, natural depth of field from 3 meters to infinity, golden hour lighting creating warm 3200K color temperature, volumetric light rays penetrating morning mist, snow-covered granite peaks showing natural geological stratification, atmospheric perspective creating layered depth, natural color grading with enhanced dynamic range'" + "supportedAspectRatios": [ + "1:1", + "16:9", + "9:16", + "3:4", + "4:3" + ], + "supportedFormats": [ + "jpeg", + "png" + ], + "maxImages": 1, + "parameters": { + "prompt": { + "type": "string", + "required": true, + "description": "The text prompt describing what you want to see" + }, + "aspect_ratio": { + "type": "string", + "required": false, + "description": "The aspect ratio of the generated image", + "default": "1:1", + "enum": [ + "1:1", + "16:9", + "9:16", + "3:4", + "4:3" + ] + }, + "num_images": { + "type": "integer", + "required": false, + "description": "Number of images to generate (1-4)", + "default": 1, + "minimum": 1, + "maximum": 1 + }, + "resolution": { + "type": "string", + "required": false, + "description": "", + "default": "1K", + "enum": [ + "1K", + "2K" + ] + }, + "seed": { + "type": "integer", + "required": false, + "description": "Random seed for reproducible generation" + }, + "negative_prompt": { + "type": "string", + "required": false, + "description": "A description of what to discourage in the generated images", + "default": "" + } + }, + "optimization_system_prompt": "You are optimizing prompts for Imagen 4 Ultra (Preview). Focus on detailed visual descriptions, artistic style, composition, and technical aspects that enhance image quality. Be specific about lighting, colors, textures, and spatial relationships." } diff --git a/models/nano-banana-edit.json b/models/nano-banana-edit.json new file mode 100644 index 0000000..35185e2 --- /dev/null +++ b/models/nano-banana-edit.json @@ -0,0 +1,67 @@ +{ + "id": "fal-ai/nano-banana/edit", + "name": "Nano Banana Edit", + "description": "Google's state-of-the-art image editing model", + "category": "Image Editing", + "costPerImage": 0.04, + "type": "image-edit", + "defaultParams": { + "num_images": 1, + "sync_mode": false, + "output_format": "jpeg" + }, + "supportedAspectRatios": [ + "1:1", + "16:9", + "9:16", + "4:3", + "3:4" + ], + "supportedFormats": [ + "jpeg", + "png" + ], + "maxImages": 4, + "parameters": { + "prompt": { + "type": "string", + "required": true, + "description": "Description of the edit to make" + }, + "num_images": { + "type": "integer", + "required": false, + "description": "Number of images to generate", + "default": 1, + "minimum": 1, + "maximum": 4 + }, + "sync_mode": { + "type": "boolean", + "required": false, + "description": "When true, images will be returned as data URIs instead of URLs.", + "default": false + }, + "image_urls": { + "type": "array", + "required": true, + "description": "List of URLs of input images for editing." + }, + "output_format": { + "type": "string", + "required": false, + "description": "Output format for the images", + "default": "jpeg", + "enum": [ + "jpeg", + "png" + ] + }, + "image_url": { + "type": "string", + "required": true, + "description": "URL of the image to edit" + } + }, + "optimization_system_prompt": "You are optimizing edit instructions for Nano Banana Edit. This model excels at precise image modifications while preserving the original image's structure and quality. Focus on: 1) Clear, specific edit instructions that describe exactly what to change. 2) Preserve elements that should remain unchanged by explicitly mentioning what to keep. 3) Use directional and positional language for spatial edits. 4) For style changes, describe the target style in detail. Example: 'change color to red' → 'Change the car's color from blue to bright cherry red while preserving all reflections, shadows, and environmental lighting'" +} diff --git a/models/nano-banana.json b/models/nano-banana.json new file mode 100644 index 0000000..d424fb1 --- /dev/null +++ b/models/nano-banana.json @@ -0,0 +1,57 @@ +{ + "id": "fal-ai/nano-banana", + "name": "Nano Banana", + "description": "Google's state-of-the-art image generation model", + "category": "Advanced", + "costPerImage": 0.04, + "type": "text-to-image", + "defaultParams": { + "num_images": 1, + "sync_mode": false, + "output_format": "jpeg" + }, + "supportedAspectRatios": [ + "1:1", + "16:9", + "9:16", + "4:3", + "3:4" + ], + "supportedFormats": [ + "jpeg", + "png" + ], + "maxImages": 4, + "parameters": { + "prompt": { + "type": "string", + "required": true, + "description": "The prompt for image generation" + }, + "num_images": { + "type": "integer", + "required": false, + "description": "Number of images to generate", + "default": 1, + "minimum": 1, + "maximum": 4 + }, + "sync_mode": { + "type": "boolean", + "required": false, + "description": "When true, images will be returned as data URIs instead of URLs.", + "default": false + }, + "output_format": { + "type": "string", + "required": false, + "description": "Output format for the images", + "default": "jpeg", + "enum": [ + "jpeg", + "png" + ] + } + }, + "optimization_system_prompt": "You are optimizing prompts for Nano Banana. Focus on detailed visual descriptions, artistic style, composition, and technical aspects that enhance image quality. Be specific about lighting, colors, textures, and spatial relationships." +} diff --git a/models/qwen-image-edit-plus.json b/models/qwen-image-edit-plus.json new file mode 100644 index 0000000..c67440e --- /dev/null +++ b/models/qwen-image-edit-plus.json @@ -0,0 +1,121 @@ +{ + "id": "fal-ai/qwen-image-edit-plus", + "name": "Qwen Image Edit Plus", + "description": "Advanced image editing with Qwen model", + "category": "Image Editing", + "costPerImage": 0.04, + "type": "image-edit", + "defaultParams": { + "num_images": 1, + "image_size": "square_hd", + "enable_safety_checker": true, + "acceleration": "regular", + "output_format": "png", + "sync_mode": false, + "guidance_scale": 4, + "num_inference_steps": 50, + "negative_prompt": " " + }, + "supportedAspectRatios": [ + "1:1", + "16:9", + "9:16", + "4:3", + "3:4" + ], + "supportedFormats": [ + "jpeg", + "png" + ], + "maxImages": 4, + "parameters": { + "prompt": { + "type": "string", + "required": true, + "description": "Description of the edit to make" + }, + "num_images": { + "type": "integer", + "required": false, + "description": "The number of images to generate.", + "default": 1, + "minimum": 1, + "maximum": 4 + }, + "image_size": { + "required": false, + "description": "The size of the generated image.", + "default": "square_hd" + }, + "enable_safety_checker": { + "type": "boolean", + "required": false, + "description": "If set to true, the safety checker will be enabled.", + "default": true + }, + "acceleration": { + "type": "string", + "required": false, + "description": "Acceleration level for image generation. Options: 'none', 'regular'. Higher acceleration increases speed. 'regular' balances speed and quality.", + "default": "regular", + "enum": [ + "none", + "regular" + ] + }, + "output_format": { + "type": "string", + "required": false, + "description": "The format of the generated image.", + "default": "png", + "enum": [ + "jpeg", + "png" + ] + }, + "sync_mode": { + "type": "boolean", + "required": false, + "description": "\n If set to true, the function will wait for the image to be generated and uploaded\n before returning the response. This will increase the latency of the function but\n it allows you to get the image directly in the response without going through the CDN.\n ", + "default": false + }, + "guidance_scale": { + "type": "number", + "required": false, + "description": "\n The CFG (Classifier Free Guidance) scale is a measure of how close you want\n the model to stick to your prompt when looking for a related image to show you.\n ", + "default": 4, + "minimum": 0, + "maximum": 20 + }, + "num_inference_steps": { + "type": "integer", + "required": false, + "description": "The number of inference steps to perform.", + "default": 50, + "minimum": 2, + "maximum": 100 + }, + "image_urls": { + "type": "array", + "required": true, + "description": "The URLs of the images to edit." + }, + "negative_prompt": { + "type": "string", + "required": false, + "description": "The negative prompt for the generation", + "default": " " + }, + "seed": { + "type": "integer", + "required": false, + "description": "\n The same seed and the same prompt given to the same version of the model\n will output the same image every time.\n " + }, + "image_url": { + "type": "string", + "required": true, + "description": "URL of the image to edit" + } + }, + "optimization_system_prompt": "You are optimizing edit instructions for Qwen Image Edit Plus. This model excels at precise image modifications while preserving the original image's structure and quality. Focus on: 1) Clear, specific edit instructions that describe exactly what to change. 2) Preserve elements that should remain unchanged by explicitly mentioning what to keep. 3) Use directional and positional language for spatial edits. 4) For style changes, describe the target style in detail. Example: 'change color to red' → 'Change the car's color from blue to bright cherry red while preserving all reflections, shadows, and environmental lighting'" +} diff --git a/models/qwen-image.json b/models/qwen-image.json index a889698..3132a83 100644 --- a/models/qwen-image.json +++ b/models/qwen-image.json @@ -4,15 +4,116 @@ "description": "High-quality text-to-image model with excellent text rendering", "category": "General Purpose", "costPerImage": 0.05, + "type": "text-to-image", "defaultParams": { + "num_images": 1, "image_size": "landscape_4_3", - "num_inference_steps": 30, + "acceleration": "none", + "output_format": "png", + "sync_mode": false, + "loras": [], "guidance_scale": 2.5, - "enable_safety_checker": true, - "output_format": "png" + "num_inference_steps": 30, + "negative_prompt": " ", + "enable_safety_checker": true }, - "supportedAspectRatios": ["landscape_4_3", "portrait_3_4", "square", "landscape_16_9", "portrait_9_16"], - "supportedFormats": ["jpeg", "png"], + "supportedAspectRatios": [ + "1:1", + "16:9", + "9:16", + "4:3", + "3:4" + ], + "supportedFormats": [ + "jpeg", + "png" + ], "maxImages": 4, - "optimization_system_prompt": "You are optimizing prompts for Qwen Image, which excels at accurate text rendering and typography integration within images using advanced dual-encoding mechanisms. This model interprets text-based prompts with exceptional precision for character accuracy and typographic fidelity. Focus on: 1) Specify exact text content with precise spelling and character accuracy - the model excels at rendering complex scripts, rare characters, and multi-language text with high fidelity. 2) Describe detailed typographic specifications including font characteristics, text sizing, spacing, and alignment - the model responds well to specific typography terminology. 3) Structure prompts with clear text-image integration descriptions - explain how text elements relate spatially and visually to surrounding image components. 4) Leverage the model's strength in text hierarchy and readability by describing text layering, contrast relationships, and visual prominence within the composition. 5) Include specific descriptions of text styling effects like shadows, outlines, textures, and material properties applied to letterforms. 6) Build prompts with detailed text positioning and environmental integration - describe how text interacts with lighting, surfaces, and background elements. Example optimizations: 'vintage coffee shop sign' → 'weathered wooden sign displaying CAFÉ NOIR in hand-painted serif letterforms, individual characters showing authentic paint wear patterns, text positioned with balanced spacing and centered alignment, cream-colored subtitle EST. 1892 in smaller sans-serif characters below main text, letters integrated naturally with wood grain texture and surface weathering'" + "parameters": { + "prompt": { + "type": "string", + "required": true, + "description": "The prompt to generate the image with" + }, + "num_images": { + "type": "integer", + "required": false, + "description": "The number of images to generate.", + "default": 1, + "minimum": 1, + "maximum": 4 + }, + "image_size": { + "required": false, + "description": "The size of the generated image.", + "default": "landscape_4_3" + }, + "acceleration": { + "type": "string", + "required": false, + "description": "Acceleration level for image generation. Options: 'none', 'regular', 'high'. Higher acceleration increases speed. 'regular' balances speed and quality. 'high' is recommended for images without text.", + "default": "none", + "enum": [ + "none", + "regular", + "high" + ] + }, + "output_format": { + "type": "string", + "required": false, + "description": "The format of the generated image.", + "default": "png", + "enum": [ + "jpeg", + "png" + ] + }, + "sync_mode": { + "type": "boolean", + "required": false, + "description": "\n If set to true, the function will wait for the image to be generated and uploaded\n before returning the response. This will increase the latency of the function but\n it allows you to get the image directly in the response without going through the CDN.\n ", + "default": false + }, + "loras": { + "type": "array", + "required": false, + "description": "\n The LoRAs to use for the image generation. You can use up to 3 LoRAs\n and they will be merged together to generate the final image.\n ", + "default": [] + }, + "guidance_scale": { + "type": "number", + "required": false, + "description": "\n The CFG (Classifier Free Guidance) scale is a measure of how close you want\n the model to stick to your prompt when looking for a related image to show you.\n ", + "default": 2.5, + "minimum": 0, + "maximum": 20 + }, + "num_inference_steps": { + "type": "integer", + "required": false, + "description": "The number of inference steps to perform.", + "default": 30, + "minimum": 2, + "maximum": 250 + }, + "seed": { + "type": "integer", + "required": false, + "description": "\n The same seed and the same prompt given to the same version of the model\n will output the same image every time.\n " + }, + "negative_prompt": { + "type": "string", + "required": false, + "description": "The negative prompt for the generation", + "default": " " + }, + "enable_safety_checker": { + "type": "boolean", + "required": false, + "description": "If set to true, the safety checker will be enabled.", + "default": true + } + }, + "optimization_system_prompt": "You are optimizing prompts for Qwen Image. Focus on detailed visual descriptions, artistic style, composition, and technical aspects that enhance image quality. Be specific about lighting, colors, textures, and spatial relationships." } From 234b6332afc17fac196a5039a910531ec06bb078 Mon Sep 17 00:00:00 2001 From: Selcukatli Date: Thu, 25 Sep 2025 13:29:57 -0400 Subject: [PATCH 3/3] Consolidate FAL documentation into unified README - Merged fal-cli-integration.md and fal-mcp-integration.md - Created comprehensive fal-cli-mcp-readme.md - Added Claude Code CLI integration instructions - Updated with all 13 models and pricing - Improved MCP tool documentation with examples - Better organization separating CLI and MCP interfaces Author: Selcuk Atli --- rules/fal-cli-integration.md | 231 ------------------------ rules/fal-cli-mcp-readme.md | 337 +++++++++++++++++++++++++++++++++++ rules/fal-mcp-integration.md | 280 ----------------------------- 3 files changed, 337 insertions(+), 511 deletions(-) delete mode 100644 rules/fal-cli-integration.md create mode 100644 rules/fal-cli-mcp-readme.md delete mode 100644 rules/fal-mcp-integration.md diff --git a/rules/fal-cli-integration.md b/rules/fal-cli-integration.md deleted file mode 100644 index e1cd00a..0000000 --- a/rules/fal-cli-integration.md +++ /dev/null @@ -1,231 +0,0 @@ -# FAL CLI Integration Rules for AI Agents - -**Tag this file when you need AI image generation in any project: `@fal-cli-integration.md`** - -## Overview -The FAL CLI is a globally installed command-line tool for generating high-quality AI images using FAL AI models. This document provides rules and guidelines for AI agents to use the FAL CLI in any project. - -## ⚠️ CRITICAL SECURITY RULES - -### API Key Protection -- **NEVER** include FAL API keys directly in code, commands, or outputs -- **NEVER** echo, print, or display the FAL_KEY environment variable -- **NEVER** commit API keys to version control -- **ALWAYS** assume the API key is already configured in the user's environment -- **ALWAYS** use the CLI without exposing credentials - -## Available Commands - -### Basic Image Generation -```bash -# Generate with specific model and prompt -fal-cli generate -p "your prompt here" -m "model-id" --no-optimize - -# Generate with prompt optimization -fal-cli generate -p "your prompt here" -m "model-id" - -# Generate with custom output directory -fal-cli generate -p "your prompt here" -m "model-id" -o ./output/path -``` - -### Model Discovery -```bash -# List all available models with pricing -fal-cli models - -# Get models in JSON format for parsing -fal-cli models --json - -# Filter models by category -fal-cli models -c "Ultra Quality" -``` - -### Prompt Optimization -```bash -# Optimize a prompt for better results -fal-cli optimize "your basic prompt" - -# Optimize for specific model -fal-cli optimize "your prompt" -m "flux-pro-ultra" -``` - -## Available Models & Costs - -| Model ID | Name | Cost | Best For | -|----------|------|------|----------| -| `flux-kontext-pro` | FLUX Pro Kontext | $0.04/img | Contextual understanding, cheapest option | -| `flux-kontext-max` | FLUX Pro Kontext Max | $0.08/img | Advanced context, complex scenes | -| `flux-pro-ultra` | FLUX Pro Ultra v1.1 | $0.06/img | Ultra-high quality, 2K-4MP resolution | -| `imagen4-ultra` | Imagen 4 Ultra | $0.06/img | Photorealistic images | -| `qwen-image` | Qwen Image | $0.05/img | Excellent text rendering in images | - -## Integration Patterns - -### For Web Projects -```bash -# Generate assets for web application -fal-cli generate -p "hero banner with abstract gradient" -m "flux-pro-ultra" -o ./public/images - -# Generate multiple icon variations -fal-cli generate -p "minimalist app icon" -m "qwen-image" -o ./src/assets/icons -``` - -### For Design Projects -```bash -# Generate design concepts -fal-cli generate -p "modern dashboard UI concept" -m "imagen4-ultra" -o ./designs/concepts - -# Generate background patterns -fal-cli generate -p "seamless geometric pattern" -m "flux-kontext-pro" -o ./assets/patterns -``` - -### For Content Creation -```bash -# Generate blog post images -fal-cli generate -p "illustration for tech blog about AI" -m "flux-pro-ultra" -o ./content/images - -# Generate social media content -fal-cli generate -p "instagram post about sustainability" -m "imagen4-ultra" -o ./social/instagram -``` - -## Output Structure -Generated images are saved with this structure: -``` -output_directory/ -└── gen_{timestamp}_{id}/ - └── {model}_{prompt#}_{iteration}_{image#}_{timestamp}_{random}.png -``` - -## Best Practices - -### 1. Model Selection -- Use `flux-kontext-pro` for testing and development (cheapest at $0.04) -- Use `flux-pro-ultra` or `imagen4-ultra` for production-quality images -- Use `qwen-image` when text needs to be rendered in the image - -### 2. Prompt Optimization -- Always use `--no-optimize` flag for testing to save API calls -- Use optimization for final production images -- Optimize prompts separately with `fal-cli optimize` to reuse - -### 3. Output Management -- Always specify output directory with `-o` flag -- Use project-relative paths for consistency -- Create dedicated directories for different image types - -### 4. Cost Management -- Check costs before bulk generation: `fal-cli models` -- Start with single images before batch generation -- Use cheaper models for prototyping - -## Error Handling - -### Common Issues & Solutions - -```bash -# If "Unauthorized" error occurs -# Check API key configuration: -fal-cli config --show - -# If "command not found" error -# Ensure global installation: -which fal-cli - -# If output directory error -# Use absolute or relative paths: -fal-cli generate -p "test" -m "flux-kontext-pro" -o ~/Desktop/test -``` - -## Automation Examples - -### Bash Script Integration -```bash -#!/bin/bash -# generate-assets.sh - -# Generate multiple images for a project -PROMPTS=( - "hero section background" - "feature icon set" - "testimonial avatars" -) - -for prompt in "${PROMPTS[@]}"; do - fal-cli generate -p "$prompt" -m "flux-kontext-pro" -o ./generated -done -``` - -### Node.js Integration -```javascript -// generate-images.js -import { exec } from 'child_process'; -import { promisify } from 'util'; - -const execAsync = promisify(exec); - -async function generateImage(prompt, model = 'flux-kontext-pro', outputDir = './images') { - try { - const command = `fal-cli generate -p "${prompt}" -m "${model}" -o ${outputDir} --no-optimize`; - const { stdout, stderr } = await execAsync(command); - console.log('Generated:', stdout); - return stdout; - } catch (error) { - console.error('Generation failed:', error); - throw error; - } -} - -// Usage -await generateImage('modern website hero image', 'flux-pro-ultra', './public/images'); -``` - -## DO's and DON'Ts - -### ✅ DO's -- DO use the CLI for generating project assets -- DO specify output directories explicitly -- DO check model costs before bulk generation -- DO use prompt optimization for production images -- DO handle errors gracefully in scripts - -### ❌ DON'Ts -- DON'T expose or log API keys -- DON'T hardcode API keys in scripts -- DON'T generate without checking costs first -- DON'T use expensive models for testing -- DON'T assume API key exists - check configuration first - -## Quick Reference - -```bash -# Check if FAL CLI is available -which fal-cli - -# Check configuration (without showing key) -fal-cli config --show - -# Generate cheap test image -fal-cli generate -p "test" -m "flux-kontext-pro" --no-optimize -o ./test - -# Generate production image with optimization -fal-cli generate -p "professional headshot" -m "imagen4-ultra" -o ./final - -# List models with costs -fal-cli models - -# Get help -fal-cli --help -fal-cli generate --help -``` - -## Notes for AI Agents - -When assisting users with FAL CLI: -1. Always protect API key security -2. Suggest cost-effective models for testing -3. Recommend appropriate output directories -4. Provide complete, working commands -5. Include error handling in scripts -6. Explain cost implications of choices - -Remember: The FAL CLI is a powerful tool for AI image generation. Use it responsibly and always prioritize security and cost-effectiveness. \ No newline at end of file diff --git a/rules/fal-cli-mcp-readme.md b/rules/fal-cli-mcp-readme.md new file mode 100644 index 0000000..44ee3d4 --- /dev/null +++ b/rules/fal-cli-mcp-readme.md @@ -0,0 +1,337 @@ +# FAL CLI & MCP Server README + +**Tag this file when you need AI image generation: `@fal-cli-mcp-readme.md`** + +## Overview +FAL AI provides powerful image generation models accessible through two interfaces: +1. **FAL CLI**: Command-line tool for direct terminal usage +2. **FAL MCP Server**: Model Context Protocol integration for AI assistants + +Both interfaces share the same core functionality and models. + +## ⚠️ CRITICAL SECURITY RULES + +### API Key Protection +- **NEVER** include FAL API keys directly in code, configs, or outputs +- **NEVER** echo, print, log, or display the FAL_KEY +- **NEVER** commit API keys to version control +- **ALWAYS** use environment variables or secure config files +- **ALWAYS** refer to keys as "your_fal_api_key" in examples + +## Available Models & Pricing + +| Model ID | Name | Cost/Image | Best For | +|----------|------|------------|----------| +| `flux-kontext-pro` | FLUX Pro Kontext | $0.04 | Testing, context understanding (cheapest) | +| `flux-kontext-max` | FLUX Pro Kontext Max | $0.08 | Advanced context, complex scenes | +| `flux-pro-ultra` | FLUX Pro Ultra v1.1 | $0.06 | Ultra-high quality, 2K-4MP | +| `flux-pro-kontext-edit` | FLUX Pro Kontext Edit | $0.05 | Image-to-image transformation | +| `imagen4-ultra` | Imagen 4 Ultra | $0.06 | Photorealistic images | +| `qwen-image` | Qwen Image | $0.05 | Text rendering in images | +| `qwen-image-edit-plus` | Qwen Image Edit Plus | $0.04 | Advanced image editing | +| `gemini-25-flash-image` | Gemini 2.5 Flash | $0.03 | Fast generation | +| `gemini-25-flash-edit` | Gemini 2.5 Flash Edit | $0.03 | Fast image editing | +| `nano-banana` | Nano Banana | $0.04 | Google's versatile model | +| `nano-banana-edit` | Nano Banana Edit | $0.04 | Google's edit model | +| `ideogram-character-edit` | Ideogram Character Edit | $0.05 | Character consistency | + +--- + +## INTERFACE 1: FAL CLI (Command Line) + +### Installation Check +```bash +# Check if globally installed +which fal-cli + +# Check configuration (without showing key) +fal-cli config --show +``` + +### Basic Commands + +#### Generate Images +```bash +# Basic generation +fal-cli generate -p "your prompt" -m "model-id" -o ./output + +# With prompt optimization +fal-cli generate -p "your prompt" -m "model-id" + +# Skip optimization (faster, cheaper) +fal-cli generate -p "your prompt" -m "model-id" --no-optimize +``` + +#### List Models +```bash +# Show all models with pricing +fal-cli models + +# Get JSON output for parsing +fal-cli models --json + +# Filter by category +fal-cli models -c "Ultra Quality" +``` + +#### Optimize Prompts +```bash +# Basic optimization +fal-cli optimize "your basic prompt" + +# For specific model +fal-cli optimize "your prompt" -m "flux-pro-ultra" +``` + +### Integration Examples + +#### Bash Script +```bash +#!/bin/bash +# Generate multiple project assets +PROMPTS=("hero background" "feature icons" "testimonials") + +for prompt in "${PROMPTS[@]}"; do + fal-cli generate -p "$prompt" -m "flux-kontext-pro" -o ./generated +done +``` + +#### Node.js Integration +```javascript +import { exec } from 'child_process'; +import { promisify } from 'util'; + +const execAsync = promisify(exec); + +async function generateImage(prompt, model = 'flux-kontext-pro', outputDir = './images') { + const command = `fal-cli generate -p "${prompt}" -m "${model}" -o ${outputDir} --no-optimize`; + const { stdout } = await execAsync(command); + return stdout; +} +``` + +--- + +## INTERFACE 2: MCP SERVER (AI Assistant Integration) + +### Starting the Server +```bash +# Default port (3001) +npm run mcp-server + +# Custom port +PORT=3002 npm run mcp-server + +# Direct execution +FAL_KEY="your_key" node /path/to/fal-cli/mcp-server.js +``` + +### Configuration for Different Clients + +#### Claude Desktop +`~/Library/Application Support/Claude/claude_desktop_config.json`: +```json +{ + "mcpServers": { + "fal-cli": { + "command": "node", + "args": ["/path/to/fal-cli/mcp-server.js"], + "env": { + "FAL_KEY": "your_fal_api_key" + } + } + } +} +``` + +#### Claude Code CLI +```bash +# Add at user level (recommended) +claude mcp add fal -s user -e FAL_KEY="your_key" -- node /path/to/fal-cli/mcp-server.js + +# Add at project level +claude mcp add fal -s project -e FAL_KEY="your_key" -- node /path/to/fal-cli/mcp-server.js + +# Add for current session only +claude mcp add fal -s local -e FAL_KEY="your_key" -- node /path/to/fal-cli/mcp-server.js +``` + +### Available MCP Tools + +#### 1. `mcp__fal__generate_image` +Generate images with any FAL model. + +**Example:** +```javascript +mcp__fal__generate_image({ + prompt: "A modern dashboard UI", + model: "fal-ai/flux-pro/kontext/text-to-image", + save_to_disk: true, + output_directory: "./generated", + parameters: { + aspect_ratio: "16:9", + num_images: 1, + guidance_scale: 3.5 + } +}) +``` + +#### 2. `mcp__fal__list_models` +Get available models with pricing. +- Parameters: `quality`, `max_cost`, `provider` +- Note: `type` parameter currently not working + +#### 3. `mcp__fal__optimize_prompt` +Enhance prompts using AI. +- Parameters: `prompt`, `model`, `style` + +#### 4. `mcp__fal__batch_generate` +Generate multiple images. +- Parameters: `tasks`, `output_directory`, `batch_size` + +#### 5. `mcp__fal__calculate_cost` +Estimate generation costs. +- Parameters: `tasks`, `include_optimization` + +#### 6. `mcp__fal__get_model_info` +Get model details. +- Parameters: `model_id` + +#### 7. `mcp__fal__get_model_recommendations` +Get model suggestions. +- Parameters: `quality`, `speed`, `budget`, `type` + +--- + +## Common Issues & Solutions + +### "Unauthorized" Error +```bash +# Check API key configuration +fal-cli config --show + +# Verify environment variable +echo $FAL_KEY | head -c 10 # Show first 10 chars only + +# Test with direct command +FAL_KEY="your_key" fal-cli models +``` + +### "Command not found" +```bash +# Check installation +which fal-cli +npm list -g fal-cli + +# Reinstall if needed +npm install -g fal-cli +``` + +### MCP Tools Not Available +1. Restart Claude Code/Desktop completely +2. Verify MCP server is running +3. Check configuration path is correct +4. Ensure FAL_KEY is set + +### Model Not Found +- Use exact model IDs from the table above +- Run `fal-cli models` to see current list +- Check for typos in model names + +--- + +## Best Practices + +### Development +1. Use `flux-kontext-pro` for testing ($0.04 - cheapest) +2. Always specify output directories with `-o` +3. Use `--no-optimize` flag during development +4. Test with single images before batch generation + +### Production +1. Use `flux-pro-ultra` or `imagen4-ultra` for quality +2. Always optimize prompts for better results +3. Calculate costs before bulk operations +4. Set up proper error handling in scripts + +### Cost Control +- Default $5 spending limit per session +- Check costs: `fal-cli models` or `mcp__fal__calculate_cost` +- Start with cheaper models for prototyping +- Batch similar requests for efficiency + +--- + +## Output Structure +``` +output_directory/ +└── gen_{timestamp}_{id}/ + └── {model}_{prompt#}_{iteration}_{image#}_{timestamp}_{random}.png +``` + +--- + +## Quick Reference + +### CLI Commands +```bash +# Test generation (cheap) +fal-cli generate -p "test" -m "flux-kontext-pro" --no-optimize -o ./test + +# Production generation +fal-cli generate -p "professional photo" -m "imagen4-ultra" -o ./final + +# List all models +fal-cli models + +# Optimize prompt +fal-cli optimize "basic prompt" +``` + +### MCP Testing +```bash +# Start server +npm run mcp-server + +# Debug mode +FAL_KEY="your_key" node mcp-server.js 2>&1 | tee mcp.log + +# Check if running +ps aux | grep mcp-server +``` + +--- + +## DO's and DON'Ts + +### ✅ DO's +- DO protect API keys at all times +- DO check costs before generation +- DO use absolute paths in configs +- DO test with cheap models first +- DO specify output directories +- DO handle errors gracefully + +### ❌ DON'Ts +- DON'T expose API keys in any output +- DON'T hardcode keys in scripts +- DON'T use expensive models for testing +- DON'T exceed spending limits +- DON'T use relative paths in MCP configs +- DON'T commit generated test images + +--- + +## Notes for AI Agents + +When assisting with FAL: +1. **Security First**: Never expose API keys +2. **Cost Awareness**: Always mention pricing implications +3. **Clear Examples**: Provide complete, working commands +4. **Error Handling**: Include troubleshooting steps +5. **Interface Choice**: + - Use CLI for scripts and automation + - Use MCP for AI assistant integration +6. **Model Selection**: Recommend appropriate models for use case + +Remember: FAL provides powerful AI image generation. Help users choose the right interface (CLI vs MCP), model (quality vs cost), and approach (single vs batch) for their specific needs. \ No newline at end of file diff --git a/rules/fal-mcp-integration.md b/rules/fal-mcp-integration.md deleted file mode 100644 index 9be8fb3..0000000 --- a/rules/fal-mcp-integration.md +++ /dev/null @@ -1,280 +0,0 @@ -# FAL MCP Server Integration Rules - -**Tag this file when you need FAL AI image generation through MCP: `@fal-mcp-integration.md`** - -## Overview -The FAL MCP Server provides Model Context Protocol integration for AI assistants to generate high-quality AI images using FAL AI models. - -## ⚠️ CRITICAL SECURITY RULES - -### API Key Protection -- **NEVER** include FAL API keys directly in configurations shown to users -- **NEVER** display or log the FAL_KEY in any output -- **ALWAYS** use environment variables or secure config files -- **ALWAYS** refer to the key as "your_fal_api_key" in examples - -## MCP Server Setup - -### Starting the Server Standalone -```bash -# Default port (3001) -npm run mcp-server - -# Custom port -PORT=3002 npm run mcp-server - -# From any directory (if globally installed) -cd /path/to/fal-cli && npm run mcp-server -``` - -## Integration Configurations - -### Claude Desktop Integration -Claude Desktop is the standalone desktop application. Configure in: -`~/Library/Application Support/Claude/claude_desktop_config.json` - -```json -{ - "mcpServers": { - "fal-cli": { - "command": "node", - "args": ["/Users/your-username/Documents/mcps/fal-cli/mcp-server.js"], - "env": { - "FAL_KEY": "your_fal_api_key" - } - } - } -} -``` - -### Claude Code Integration (CLI) -Claude Code is the CLI tool that can use MCP servers. According to the docs, it supports: -- Local stdio servers -- Remote SSE servers -- Remote HTTP servers - -For local stdio integration with Claude Code: -```bash -# Start the MCP server -cd /path/to/fal-cli -FAL_KEY="your_api_key" npm run mcp-server - -# The server will communicate via stdio with Claude Code -``` - -### Cursor Integration -Add to Cursor's MCP settings: -```json -{ - "mcpServers": { - "fal-cli": { - "command": "node", - "args": ["/path/to/fal-cli/mcp-server.js"], - "env": { - "FAL_KEY": "your_fal_api_key" - } - } - } -} -``` - -## Available MCP Tools - -### 1. `generate_image` -Generate a single image with any FAL AI model. - -**Parameters:** -- `model` (required): Model ID (e.g., "flux-kontext-pro") -- `prompt` (required): Text description -- `num_images`: Number of images (1-4) -- `aspect_ratio`: Image ratio ("16:9", "1:1", "9:16", etc.) -- `output_directory`: Where to save images - -**Usage in AI Assistant:** -``` -Generate a cyberpunk city scene using flux-pro-ultra model with 16:9 aspect ratio -``` - -### 2. `list_models` -Get all available models with pricing and capabilities. - -**Parameters:** -- `category` (optional): Filter by category -- `format` (optional): Output format - -**Usage in AI Assistant:** -``` -List all Ultra Quality models with their pricing -``` - -### 3. `optimize_prompt` -Enhance prompts using AI for better generation results. - -**Parameters:** -- `prompt` (required): Original prompt -- `model` (optional): Target model -- `style` (optional): Style preference - -**Usage in AI Assistant:** -``` -Optimize "a cat sitting" for photorealistic style -``` - -### 4. `batch_generate` -Generate multiple images with different prompts/models. - -**Parameters:** -- `tasks` (required): Array of generation tasks -- `output_directory`: Where to save images -- `optimize_prompts`: Whether to optimize first - -**Usage in AI Assistant:** -``` -Generate 3 different hero images using flux-kontext-pro -``` - -### 5. `calculate_cost` -Estimate costs before generation. - -**Parameters:** -- `tasks` (required): Array of tasks -- `include_optimization`: Include optimization costs - -**Usage in AI Assistant:** -``` -Calculate cost for 5 images with imagen4-ultra -``` - -### 6. `get_model_info` -Get detailed information about a specific model. - -**Parameters:** -- `model_id` (required): Model identifier -- `include_schema`: Include parameter schema - -**Usage in AI Assistant:** -``` -Get details about flux-pro-ultra including parameters -``` - -## Model Pricing Reference - -| Model | Cost/Image | Best For | -|-------|------------|----------| -| `flux-kontext-pro` | $0.04 | Testing, context understanding | -| `flux-kontext-max` | $0.08 | Advanced context, complex scenes | -| `flux-pro-ultra` | $0.06 | Ultra-high quality, 2K-4MP | -| `imagen4-ultra` | $0.06 | Photorealistic images | -| `qwen-image` | $0.05 | Text rendering in images | - -## Cost Control Features - -The MCP server includes: -- **$5 spending limit** per session by default -- Automatic cost calculation before generation -- Requires confirmation for high-cost operations -- Batch operations show total cost upfront - -## Testing MCP Server Connection - -### 1. Verify Server Starts -```bash -cd /path/to/fal-cli -npm run mcp-server -# Should show: "FAL CLI MCP Server running on stdio" -``` - -### 2. Check Available Tools -Once connected, the AI assistant should be able to: -- List available models -- Calculate costs -- Generate images -- Optimize prompts - -### 3. Test Generation -Ask the AI assistant: -``` -"Use the FAL MCP server to generate a simple test image with the cheapest model" -``` - -## Common Issues & Solutions - -### "Unauthorized" Error -- Check FAL_KEY environment variable is set -- Verify API key is valid at fal.ai -- Ensure key is passed to MCP server - -### "Model not found" Error -- Use `list_models` to see available models -- Check model ID spelling -- Use exact IDs like "flux-kontext-pro" - -### "Command not found" Error -- Verify full path to mcp-server.js -- Check Node.js is installed -- Ensure fal-cli dependencies are installed - -### Server Won't Start -```bash -# Debug steps -cd /path/to/fal-cli -npm install # Ensure dependencies installed -node --version # Check Node.js 18+ -FAL_KEY="your_key" node mcp-server.js # Test directly -``` - -## Best Practices - -### For Development -1. Use `flux-kontext-pro` (cheapest at $0.04) -2. Always check costs before generation -3. Test with single images first - -### For Production -1. Use `flux-pro-ultra` or `imagen4-ultra` -2. Optimize all prompts before generation -3. Set appropriate output directories - -### For Integration -1. Store API keys securely -2. Use absolute paths in configurations -3. Test connection before bulk operations - -## DO's and DON'Ts - -### ✅ DO's -- DO protect API keys in all configurations -- DO use cost calculation before bulk operations -- DO specify full paths to mcp-server.js -- DO test with cheap models first - -### ❌ DON'Ts -- DON'T expose API keys in examples or logs -- DON'T hardcode keys in configurations -- DON'T exceed spending limits carelessly -- DON'T use relative paths in configs - -## Quick Test Commands - -```bash -# Start server with debug output -FAL_KEY="your_key" node /path/to/fal-cli/mcp-server.js 2>&1 | tee mcp.log - -# Check if port is in use -lsof -i :3001 - -# Test API key validity -FAL_KEY="your_key" node -e "console.log('Key format valid')" -``` - -## Notes for AI Assistants - -When using FAL MCP Server: -1. Never expose actual API keys -2. Always suggest cheapest models for testing -3. Calculate costs before any generation -4. Provide clear error messages -5. Confirm high-cost operations -6. Use absolute paths in examples - -Remember: The MCP server bridges FAL AI's powerful image generation with AI assistants while maintaining security and cost control. \ No newline at end of file