diff --git a/README.md b/README.md index aa0dccf41..42178de3e 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,7 @@ Data flowing into the brain. Each integration is a recipe — markdown + setup h - **Voice**: Phone calls create brain pages via Twilio + OpenAI Realtime (or DIY STT+LLM+TTS). Setup recipe: [`recipes/twilio-voice-brain.md`](recipes/twilio-voice-brain.md). - **Email + calendar**: webhook handlers that route to brain signals. [`docs/integrations/meeting-webhooks.md`](docs/integrations/meeting-webhooks.md). -- **Embedding providers**: 14 recipes covering OpenAI (default fallback), Voyage, ZeroEntropy (default), Google Gemini, Azure OpenAI, MiniMax, Alibaba DashScope, Zhipu, Ollama (local), llama.cpp llama-server (local), LiteLLM proxy. Pricing matrix + decision tree in [`docs/integrations/embedding-providers.md`](docs/integrations/embedding-providers.md). +- **Embedding providers**: 15 recipes covering OpenAI (default fallback), OpenRouter, Voyage, ZeroEntropy (default), Google Gemini, Azure OpenAI, MiniMax, Alibaba DashScope, Zhipu, Ollama (local), llama.cpp llama-server (local), LiteLLM proxy. Pricing matrix + decision tree in [`docs/integrations/embedding-providers.md`](docs/integrations/embedding-providers.md). - **Credential gateway**: vault-aware secret distribution. [`docs/integrations/credential-gateway.md`](docs/integrations/credential-gateway.md). - **MCP clients**: every major MCP client is supported. [`docs/mcp/`](docs/mcp/) per-client setup. diff --git a/docs/integrations/embedding-providers.md b/docs/integrations/embedding-providers.md index 943728e96..0f88e96d9 100644 --- a/docs/integrations/embedding-providers.md +++ b/docs/integrations/embedding-providers.md @@ -1,6 +1,6 @@ # Embedding providers -GBrain ships with 14 embedding-provider recipes covering OpenAI, the major hosted alternatives, three local options, and a universal escape hatch (LiteLLM proxy). Run `gbrain providers list` to see the live registry; `gbrain providers explain --json` emits a machine-readable matrix for agents. +GBrain ships with 15 embedding-provider recipes covering OpenAI, the major hosted alternatives, three local options, and a universal escape hatch (LiteLLM proxy). Run `gbrain providers list` to see the live registry; `gbrain providers explain --json` emits a machine-readable matrix for agents. This page is the human-readable counterpart: capability per provider, env-var setup, dimensions, cost, and known constraints. @@ -20,6 +20,7 @@ gbrain init --pglite --model voyage # use a non-default provider | `openai` | `OPENAI_API_KEY` | 1536 | 0.13 | no | no | | `voyage` | `VOYAGE_API_KEY` | 1024 | 0.18 | no | yes (`voyage-multimodal-3`) | | `google` | `GOOGLE_GENERATIVE_AI_API_KEY` | 768 | 0.025 | no | no | +| `openrouter` | `OPENROUTER_API_KEY` | 1536 | 0.02 | no | model-dependent | | `azure-openai` | `AZURE_OPENAI_API_KEY`, `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_DEPLOYMENT` | 1536 | 0.13 | no | no | | `minimax` | `MINIMAX_API_KEY` | 1536 | 0.07 | no | no | | `dashscope` | `DASHSCOPE_API_KEY` | 1024 | varies | no | no | @@ -37,6 +38,7 @@ gbrain init --pglite --model voyage # use a non-default provider - **Cost-sensitive, English-only**: Ollama (free, local) or Voyage (paid, best quality per dollar). - **Quality-first**: Voyage `voyage-4-large` (1024-2048 dims, ~3-4× more dense tokens than OpenAI tiktoken). - **Reranking pair**: Voyage (their reranker `rerank-2.5` pairs cleanly with Voyage embeddings). +- **One key for many hosted models**: OpenRouter. Set `OPENROUTER_API_KEY` and use `openrouter:`. - **Enterprise compliance**: Azure OpenAI (data residency + private endpoints) or self-hosted via llama-server / Ollama. - **China region**: DashScope (Alibaba) or Zhipu (BigModel). DashScope's international endpoint at `dashscope-intl.aliyuncs.com`; override `provider_base_urls.dashscope` for the China endpoint. - **OSS local, full control**: llama-server (`llama.cpp`) for any GGUF model; Ollama for the curated catalog. @@ -60,6 +62,12 @@ Set `GOOGLE_GENERATIVE_AI_API_KEY` (the AI Studio public API key). Model: `gemin For GCP service-account / Vertex AI auth (production deployments), see the v0.32.x follow-up — Vertex ADC is on the roadmap. +### OpenRouter + +Set `OPENROUTER_API_KEY`. Optional `OPENROUTER_BASE_URL` overrides the default `https://openrouter.ai/api/v1`. Default embedding model: `openai/text-embedding-3-small` (1536 dims). OpenRouter's catalog is dynamic; use `gbrain providers test --model openrouter:` to smoke-test any embedding model listed in OpenRouter's embeddings catalog. + +Chat models route through the same OpenAI-compatible endpoint, so `openrouter:` works for model IDs such as `openai/gpt-5.2`, `anthropic/claude-haiku-4.5`, or any other OpenRouter chat model your account can access. Tool-calling behavior remains model-dependent. + ### Azure OpenAI Enterprise OpenAI behind Azure tenancy. Required env: `AZURE_OPENAI_API_KEY`, `AZURE_OPENAI_ENDPOINT` (e.g. `https://my-resource.openai.azure.com`), `AZURE_OPENAI_DEPLOYMENT` (the deployment name from your Azure portal). Optional: `AZURE_OPENAI_API_VERSION` (defaults to `2024-10-21`). @@ -113,11 +121,12 @@ For most users: **stay at 1024 or 1536**. Bigger isn't better below the noise fl ## My provider isn't listed -Three options: +Four options: 1. **Use LiteLLM proxy** (above) — the universal escape hatch. Works for 100+ providers. -2. **Open a feature request** at [github.com/garrytan/gbrain/issues](https://github.com/garrytan/gbrain/issues) with the provider's API docs URL and a setup snippet. Recipes are ~30-40 lines of TypeScript. -3. **Submit a recipe**: clone, copy `src/core/ai/recipes/voyage.ts` as the gold-standard openai-compat template, register in `src/core/ai/recipes/index.ts`, add a per-recipe smoke test under `test/ai/recipe-.test.ts`. The recipe contract test (`test/ai/recipes-contract.test.ts`) and IRON RULE regression test pin the structural invariants. +2. **Use OpenRouter** when the provider/model is available through their OpenAI-compatible API. +3. **Open a feature request** at [github.com/garrytan/gbrain/issues](https://github.com/garrytan/gbrain/issues) with the provider's API docs URL and a setup snippet. Recipes are ~30-40 lines of TypeScript. +4. **Submit a recipe**: clone, copy `src/core/ai/recipes/voyage.ts` as the gold-standard openai-compat template, register in `src/core/ai/recipes/index.ts`, add a per-recipe smoke test under `test/ai/recipe-.test.ts`. The recipe contract test (`test/ai/recipes-contract.test.ts`) and IRON RULE regression test pin the structural invariants. ## Switching providers on an existing brain diff --git a/src/cli.ts b/src/cli.ts index 41a8294f0..32937de75 100755 --- a/src/cli.ts +++ b/src/cli.ts @@ -1401,6 +1401,7 @@ function buildGatewayConfig(c: GBrainConfig): AIGatewayConfig { if (process.env.OLLAMA_BASE_URL) envBaseUrls['ollama'] = process.env.OLLAMA_BASE_URL; if (process.env.LMSTUDIO_BASE_URL) envBaseUrls['lmstudio'] = process.env.LMSTUDIO_BASE_URL; if (process.env.LITELLM_BASE_URL) envBaseUrls['litellm'] = process.env.LITELLM_BASE_URL; + if (process.env.OPENROUTER_BASE_URL) envBaseUrls['openrouter'] = process.env.OPENROUTER_BASE_URL; return { embedding_model: c.embedding_model, diff --git a/src/core/ai/recipes/index.ts b/src/core/ai/recipes/index.ts index 5915c9a92..0a979393f 100644 --- a/src/core/ai/recipes/index.ts +++ b/src/core/ai/recipes/index.ts @@ -21,6 +21,7 @@ import { dashscope } from './dashscope.ts'; import { zhipu } from './zhipu.ts'; import { azureOpenAI } from './azure-openai.ts'; import { zeroentropyai } from './zeroentropyai.ts'; +import { openrouter } from './openrouter.ts'; const ALL: Recipe[] = [ openai, @@ -38,6 +39,7 @@ const ALL: Recipe[] = [ zhipu, azureOpenAI, zeroentropyai, + openrouter, ]; /** Map from `provider:id` key to recipe. */ diff --git a/src/core/ai/recipes/openrouter.ts b/src/core/ai/recipes/openrouter.ts new file mode 100644 index 000000000..990b07951 --- /dev/null +++ b/src/core/ai/recipes/openrouter.ts @@ -0,0 +1,37 @@ +import type { Recipe } from '../types.ts'; + +export const openrouter: Recipe = { + id: 'openrouter', + name: 'OpenRouter', + tier: 'openai-compat', + implementation: 'openai-compatible', + base_url_default: 'https://openrouter.ai/api/v1', + auth_env: { + required: ['OPENROUTER_API_KEY'], + optional: ['OPENROUTER_BASE_URL'], + setup_url: 'https://openrouter.ai/settings/keys', + }, + touchpoints: { + embedding: { + models: ['openai/text-embedding-3-small'], + default_dims: 1536, + cost_per_1m_tokens_usd: 0.02, + price_last_verified: '2026-05-19', + max_batch_tokens: 8192, + }, + chat: { + models: [ + 'openai/gpt-5.2', + 'anthropic/claude-haiku-4.5', + 'google/gemini-3-flash', + 'deepseek/deepseek-chat', + ], + supports_tools: true, + supports_subagent_loop: false, + supports_prompt_cache: false, + max_context_tokens: 200000, + price_last_verified: '2026-05-19', + }, + }, + setup_hint: 'Get an API key at https://openrouter.ai/settings/keys, then `export OPENROUTER_API_KEY=...` and use `openrouter:`.', +}; diff --git a/test/ai/recipe-openrouter.test.ts b/test/ai/recipe-openrouter.test.ts new file mode 100644 index 000000000..171a1787d --- /dev/null +++ b/test/ai/recipe-openrouter.test.ts @@ -0,0 +1,54 @@ +/** + * OpenRouter recipe smoke. + */ + +import { describe, expect, test } from 'bun:test'; +import { getRecipe } from '../../src/core/ai/recipes/index.ts'; +import { defaultResolveAuth } from '../../src/core/ai/gateway.ts'; +import { assertTouchpoint } from '../../src/core/ai/model-resolver.ts'; +import { AIConfigError } from '../../src/core/ai/errors.ts'; + +describe('recipe: openrouter', () => { + test('registered with expected shape', () => { + const r = getRecipe('openrouter'); + expect(r).toBeDefined(); + expect(r!.id).toBe('openrouter'); + expect(r!.tier).toBe('openai-compat'); + expect(r!.implementation).toBe('openai-compatible'); + expect(r!.base_url_default).toBe('https://openrouter.ai/api/v1'); + expect(r!.auth_env?.required).toEqual(['OPENROUTER_API_KEY']); + expect(r!.auth_env?.optional).toContain('OPENROUTER_BASE_URL'); + }); + + test('embedding touchpoint defaults to OpenAI small embeddings at 1536 dims', () => { + const r = getRecipe('openrouter')!; + expect(r.touchpoints.embedding).toBeDefined(); + expect(r.touchpoints.embedding!.models[0]).toBe('openai/text-embedding-3-small'); + expect(r.touchpoints.embedding!.default_dims).toBe(1536); + expect(r.touchpoints.embedding!.max_batch_tokens).toBeGreaterThan(0); + }); + + test('chat touchpoint accepts arbitrary OpenRouter model ids', () => { + const r = getRecipe('openrouter')!; + expect(r.touchpoints.chat).toBeDefined(); + expect(r.touchpoints.chat!.supports_tools).toBe(true); + expect(r.touchpoints.chat!.supports_subagent_loop).toBe(false); + expect(() => assertTouchpoint(r, 'chat', 'some/provider-model')).not.toThrow(); + }); + + test('default auth: OPENROUTER_API_KEY set -> "Bearer "', () => { + const r = getRecipe('openrouter')!; + const auth = defaultResolveAuth( + r, + { OPENROUTER_API_KEY: 'sk-or-fake' }, + 'embedding', + ); + expect(auth.headerName).toBe('Authorization'); + expect(auth.token).toBe('Bearer sk-or-fake'); + }); + + test('default auth: missing OPENROUTER_API_KEY -> AIConfigError', () => { + const r = getRecipe('openrouter')!; + expect(() => defaultResolveAuth(r, {}, 'embedding')).toThrow(AIConfigError); + }); +});