From 6abeffd88ab9f2c69b8e0d2a19edaa306ee14479 Mon Sep 17 00:00:00 2001 From: oglenyaboss Date: Wed, 8 Apr 2026 12:13:28 +0500 Subject: [PATCH] feat: add OpenRouter as LLM and embedding provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add OpenRouter as a first-class provider, enabling access to 200+ models (Claude, GPT, Gemini, Llama, Qwen, etc.) through a single API key. LLM provider: - New OpenRouterProvider using OpenAI-compatible endpoint - Supports generate() and stream_chat() (ChatProvider protocol) - Sets recommended HTTP-Referer and X-Title headers - Default model: anthropic/claude-sonnet-4.6 - Rate limits: 60 RPM / 200K TPM - Cost tracking intentionally disabled (OpenRouter proxies models with varying prices — users should check the OpenRouter dashboard) Embedding provider: - New OpenRouterEmbedder for vector search and chat - Default model: google/gemini-embedding-001 (768 dims) - One OPENROUTER_API_KEY covers both LLM and embeddings Integration: - Registered in LLM and embedding registries (lazy import) - CLI auto-detection from OPENROUTER_API_KEY env var - Interactive provider selection in `repowise init` - Embedder selection in `repowise serve` - Server provider catalog for web UI - No new pip dependency (uses existing openai package) Tests: - 13 unit tests (construction, generation, error mapping, headers) - Registry test updated (builtin count 6 → 7) - Integration test (skipped without OPENROUTER_API_KEY) --- .../cli/src/repowise/cli/commands/init_cmd.py | 2 + .../src/repowise/cli/commands/serve_cmd.py | 24 +- packages/cli/src/repowise/cli/helpers.py | 12 +- packages/cli/src/repowise/cli/ui.py | 3 + .../src/repowise/core/providers/__init__.py | 2 +- .../core/providers/embedding/openrouter.py | 90 ++++++ .../core/providers/embedding/registry.py | 6 +- .../repowise/core/providers/llm/__init__.py | 1 + .../repowise/core/providers/llm/openrouter.py | 306 ++++++++++++++++++ .../repowise/core/providers/llm/registry.py | 13 +- .../core/src/repowise/core/rate_limiter.py | 1 + packages/server/src/repowise/server/app.py | 14 +- .../src/repowise/server/provider_config.py | 13 + tests/integration/test_provider_live.py | 30 ++ tests/providers/test_registry.py | 5 +- .../test_openrouter_provider.py | 176 ++++++++++ 16 files changed, 678 insertions(+), 20 deletions(-) create mode 100644 packages/core/src/repowise/core/providers/embedding/openrouter.py create mode 100644 packages/core/src/repowise/core/providers/llm/openrouter.py create mode 100644 tests/unit/test_providers/test_openrouter_provider.py diff --git a/packages/cli/src/repowise/cli/commands/init_cmd.py b/packages/cli/src/repowise/cli/commands/init_cmd.py index 1ba9bec..a0b4605 100644 --- a/packages/cli/src/repowise/cli/commands/init_cmd.py +++ b/packages/cli/src/repowise/cli/commands/init_cmd.py @@ -40,6 +40,8 @@ def _resolve_embedder(embedder_flag: str | None) -> str: return "gemini" if os.environ.get("OPENAI_API_KEY"): return "openai" + if os.environ.get("OPENROUTER_API_KEY"): + return "openrouter" return "mock" diff --git a/packages/cli/src/repowise/cli/commands/serve_cmd.py b/packages/cli/src/repowise/cli/commands/serve_cmd.py index 6624910..00816ed 100644 --- a/packages/cli/src/repowise/cli/commands/serve_cmd.py +++ b/packages/cli/src/repowise/cli/commands/serve_cmd.py @@ -41,6 +41,7 @@ def _setup_embedder() -> None: # Detect which providers already have keys in the environment. has_gemini = bool(os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY")) has_openai = bool(os.environ.get("OPENAI_API_KEY")) + has_openrouter = bool(os.environ.get("OPENROUTER_API_KEY")) console.print( "\n[bold]Chat & search require an embedder.[/bold] " @@ -51,18 +52,24 @@ def _setup_embedder() -> None: labels = [] if has_gemini: options.append("gemini") - labels.append("[1] gemini [green]✓ key set[/green]") + labels.append("[1] gemini [green]✓ key set[/green]") else: options.append("gemini") - labels.append("[1] gemini [dim]needs GEMINI_API_KEY / GOOGLE_API_KEY[/dim]") + labels.append("[1] gemini [dim]needs GEMINI_API_KEY / GOOGLE_API_KEY[/dim]") if has_openai: options.append("openai") - labels.append("[2] openai [green]✓ key set[/green]") + labels.append("[2] openai [green]✓ key set[/green]") else: options.append("openai") - labels.append("[2] openai [dim]needs OPENAI_API_KEY[/dim]") + labels.append("[2] openai [dim]needs OPENAI_API_KEY[/dim]") + if has_openrouter: + options.append("openrouter") + labels.append("[3] openrouter [green]✓ key set[/green]") + else: + options.append("openrouter") + labels.append("[3] openrouter [dim]needs OPENROUTER_API_KEY[/dim]") options.append("skip") - labels.append("[3] skip [dim]no chat/search[/dim]") + labels.append(f"[{len(options)}] skip [dim]no chat/search[/dim]") for label in labels: console.print(f" {label}") @@ -106,6 +113,11 @@ def _get_or_prompt_api_key(embedder: str) -> str: if key: return key return click.prompt(" OPENAI_API_KEY", default="", show_default=False).strip() + if embedder == "openrouter": + key = os.environ.get("OPENROUTER_API_KEY", "") + if key: + return key + return click.prompt(" OPENROUTER_API_KEY", default="", show_default=False).strip() return "" @@ -116,6 +128,8 @@ def _set_api_key_env(embedder: str, key: str) -> None: os.environ.setdefault("GEMINI_API_KEY", key) elif embedder == "openai": os.environ.setdefault("OPENAI_API_KEY", key) + elif embedder == "openrouter": + os.environ.setdefault("OPENROUTER_API_KEY", key) def _save_global_embedder(embedder: str, api_key: str) -> None: diff --git a/packages/cli/src/repowise/cli/helpers.py b/packages/cli/src/repowise/cli/helpers.py index 68518c3..4cc4841 100644 --- a/packages/cli/src/repowise/cli/helpers.py +++ b/packages/cli/src/repowise/cli/helpers.py @@ -249,6 +249,8 @@ def resolve_provider( os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY") ): kwargs["api_key"] = os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY") + elif provider_name == "openrouter" and os.environ.get("OPENROUTER_API_KEY"): + kwargs["api_key"] = os.environ["OPENROUTER_API_KEY"] elif provider_name == "ollama" and os.environ.get("OLLAMA_BASE_URL"): kwargs["base_url"] = os.environ["OLLAMA_BASE_URL"] @@ -269,6 +271,13 @@ def resolve_provider( else {"api_key": os.environ["OPENAI_API_KEY"]} ) return get_provider("openai", **kwargs) + if os.environ.get("OPENROUTER_API_KEY") and os.environ["OPENROUTER_API_KEY"].strip(): + kwargs = ( + {"model": model, "api_key": os.environ["OPENROUTER_API_KEY"]} + if model + else {"api_key": os.environ["OPENROUTER_API_KEY"]} + ) + return get_provider("openrouter", **kwargs) if os.environ.get("OLLAMA_BASE_URL") and os.environ["OLLAMA_BASE_URL"].strip(): kwargs = ( {"model": model, "base_url": os.environ["OLLAMA_BASE_URL"]} @@ -285,7 +294,7 @@ def resolve_provider( raise click.ClickException( "No provider configured. Use --provider, set REPOWISE_PROVIDER, " - "or set ANTHROPIC_API_KEY / OPENAI_API_KEY / OLLAMA_BASE_URL / GEMINI_API_KEY / GOOGLE_API_KEY." + "or set ANTHROPIC_API_KEY / OPENAI_API_KEY / OPENROUTER_API_KEY / OLLAMA_BASE_URL / GEMINI_API_KEY / GOOGLE_API_KEY." ) @@ -319,6 +328,7 @@ def _is_env_var_exists(var_name: str) -> bool: provider_env_vars = { "anthropic": ["ANTHROPIC_API_KEY"], "openai": ["OPENAI_API_KEY"], + "openrouter": ["OPENROUTER_API_KEY"], "gemini": ["GEMINI_API_KEY", "GOOGLE_API_KEY"], # Either one "ollama": ["OLLAMA_BASE_URL"], "litellm": ["LITELLM_API_KEY"], # May need others depending on backend diff --git a/packages/cli/src/repowise/cli/ui.py b/packages/cli/src/repowise/cli/ui.py index 633c052..fbefd60 100644 --- a/packages/cli/src/repowise/cli/ui.py +++ b/packages/cli/src/repowise/cli/ui.py @@ -81,6 +81,7 @@ def print_phase_header( "openai": "gpt-4.1", "anthropic": "claude-sonnet-4-6", "ollama": "llama3.2", + "openrouter": "anthropic/claude-sonnet-4.6", "litellm": "groq/llama-3.1-70b-versatile", } @@ -89,6 +90,7 @@ def print_phase_header( "openai": "OPENAI_API_KEY", "anthropic": "ANTHROPIC_API_KEY", "ollama": "OLLAMA_BASE_URL", + "openrouter": "OPENROUTER_API_KEY", } _PROVIDER_SIGNUP: dict[str, str] = { @@ -96,6 +98,7 @@ def print_phase_header( "openai": "https://platform.openai.com/api-keys", "anthropic": "https://console.anthropic.com/settings/keys", "ollama": "https://ollama.com/download", + "openrouter": "https://openrouter.ai/keys", } diff --git a/packages/core/src/repowise/core/providers/__init__.py b/packages/core/src/repowise/core/providers/__init__.py index b920422..6d75223 100644 --- a/packages/core/src/repowise/core/providers/__init__.py +++ b/packages/core/src/repowise/core/providers/__init__.py @@ -1,7 +1,7 @@ """repowise provider package. Sub-packages: - llm/ — LLM providers (Anthropic, OpenAI, Gemini, Ollama, LiteLLM) + llm/ — LLM providers (Anthropic, OpenAI, OpenRouter, Gemini, Ollama, LiteLLM) embedding/ — Embedding providers (OpenAI, Gemini, Mock) Preferred entry points: diff --git a/packages/core/src/repowise/core/providers/embedding/openrouter.py b/packages/core/src/repowise/core/providers/embedding/openrouter.py new file mode 100644 index 0000000..dc499e0 --- /dev/null +++ b/packages/core/src/repowise/core/providers/embedding/openrouter.py @@ -0,0 +1,90 @@ +"""OpenRouter embedding support for repowise semantic search. + +Uses the OpenAI-compatible endpoint at ``https://openrouter.ai/api/v1``. +No additional pip install required — uses the ``openai`` package. + +Default model: google/gemini-embedding-001 (768 dims) + +Usage: + from repowise.core.providers.embedding.openrouter import OpenRouterEmbedder + + embedder = OpenRouterEmbedder(api_key="sk-or-...") + vectors = await embedder.embed(["some text"]) +""" + +from __future__ import annotations + +import asyncio +import math +import os + + +class OpenRouterEmbedder: + """OpenRouter embedding adapter implementing the repowise Embedder protocol. + + Args: + api_key: OpenRouter API key. Falls back to OPENROUTER_API_KEY env var. + model: Embedding model name. Default: "google/gemini-embedding-001". + """ + + _DIMS: dict[str, int] = { + "google/gemini-embedding-001": 768, + "openai/text-embedding-3-small": 1536, + "openai/text-embedding-3-large": 3072, + } + + _DEFAULT_TIMEOUT: float = 10.0 + + def __init__( + self, + api_key: str | None = None, + model: str = "google/gemini-embedding-001", + timeout: float = _DEFAULT_TIMEOUT, + ) -> None: + self._api_key = api_key or os.environ.get("OPENROUTER_API_KEY") + if not self._api_key: + raise ValueError( + "OpenRouter API key required. Pass api_key= or set OPENROUTER_API_KEY env var." + ) + self._model = model + self._timeout = timeout + self._client: object | None = None + + @property + def dimensions(self) -> int: + return self._DIMS.get(self._model, 768) + + async def embed(self, texts: list[str]) -> list[list[float]]: + """Embed a batch of texts using OpenRouter. + + Runs the synchronous SDK call in a thread pool to avoid blocking the + asyncio event loop. + """ + if not texts: + return [] + + model = self._model + timeout = self._timeout + + def _embed_sync() -> list[list[float]]: + import openai + + if self._client is None: + self._client = openai.OpenAI( + api_key=self._api_key, + base_url="https://openrouter.ai/api/v1", + timeout=timeout, + ) + response = self._client.embeddings.create(model=model, input=texts) # type: ignore[union-attr] + raw_vectors = [list(item.embedding) for item in response.data] + return [_l2_normalize(v) for v in raw_vectors] + + return await asyncio.to_thread(_embed_sync) + + +def _l2_normalize(vec: list[float]) -> list[float]: + """L2-normalize a vector to unit length.""" + norm = math.sqrt(sum(x * x for x in vec)) + if norm == 0.0: + norm = 1.0 + return [x / norm for x in vec] diff --git a/packages/core/src/repowise/core/providers/embedding/registry.py b/packages/core/src/repowise/core/providers/embedding/registry.py index d4b7d2c..7135060 100644 --- a/packages/core/src/repowise/core/providers/embedding/registry.py +++ b/packages/core/src/repowise/core/providers/embedding/registry.py @@ -24,8 +24,9 @@ _BUILTIN_EMBEDDERS: dict[str, tuple[str, str]] = { "openai": ("repowise.core.providers.embedding.openai", "OpenAIEmbedder"), - "gemini": ("repowise.core.providers.embedding.gemini", "GeminiEmbedder"), - "mock": ("repowise.core.providers.embedding.base", "MockEmbedder"), + "gemini": ("repowise.core.providers.embedding.gemini", "GeminiEmbedder"), + "openrouter": ("repowise.core.providers.embedding.openrouter", "OpenRouterEmbedder"), + "mock": ("repowise.core.providers.embedding.base", "MockEmbedder"), } _custom_embedders: dict[str, Callable[..., Embedder]] = {} @@ -80,6 +81,7 @@ def get_embedder(name: str, **kwargs: Any) -> Embedder: _missing = { "openai": "openai", "gemini": "google-genai", + "openrouter": "openai", # openrouter uses the openai package } try: module = importlib.import_module(module_path) diff --git a/packages/core/src/repowise/core/providers/llm/__init__.py b/packages/core/src/repowise/core/providers/llm/__init__.py index e23502e..fa6aee0 100644 --- a/packages/core/src/repowise/core/providers/llm/__init__.py +++ b/packages/core/src/repowise/core/providers/llm/__init__.py @@ -12,6 +12,7 @@ anthropic — claude-opus-4-6, claude-sonnet-4-6, claude-haiku-4-5 openai — gpt-5.4-nano, gpt-5.4-mini, gpt-5.4 gemini — gemini-3.1-flash-lite-preview, gemini-3-flash-preview, gemini-3.1-pro-preview + openrouter — 200+ models via OpenRouter (anthropic/claude-sonnet-4.6, etc.) ollama — local inference (llama3.2, codellama, etc.) litellm — 100+ providers via LiteLLM proxy mock — deterministic test provider diff --git a/packages/core/src/repowise/core/providers/llm/openrouter.py b/packages/core/src/repowise/core/providers/llm/openrouter.py new file mode 100644 index 0000000..b956423 --- /dev/null +++ b/packages/core/src/repowise/core/providers/llm/openrouter.py @@ -0,0 +1,306 @@ +"""OpenRouter provider for repowise. + +Routes requests to 200+ models (Claude, GPT, Gemini, Llama, Mistral, etc.) +through a single API key via an OpenAI-compatible endpoint. + +No additional pip install required — uses the ``openai`` package. + +Popular models: + - anthropic/claude-sonnet-4.6 — Anthropic Claude Sonnet + - google/gemini-3.1-flash-lite-preview — Google Gemini Flash + - meta-llama/llama-4-maverick — Meta Llama open model +""" + +from __future__ import annotations + +import os + +import structlog +from openai import AsyncOpenAI +from openai import RateLimitError as _OpenAIRateLimitError +from openai import APIStatusError as _OpenAIAPIStatusError +from tenacity import ( + retry, + retry_if_exception_type, + stop_after_attempt, + wait_exponential_jitter, + RetryError, +) + +from repowise.core.providers.llm.base import ( + BaseProvider, + ChatStreamEvent, + ChatToolCall, + GeneratedResponse, + ProviderError, + RateLimitError, +) + +from typing import TYPE_CHECKING, Any, AsyncIterator +from repowise.core.rate_limiter import RateLimiter + +if TYPE_CHECKING: + from repowise.core.generation.cost_tracker import CostTracker + +log = structlog.get_logger(__name__) + +_MAX_RETRIES = 3 +_MIN_WAIT = 1.0 +_MAX_WAIT = 4.0 + + +class OpenRouterProvider(BaseProvider): + """OpenRouter provider — access 200+ models via a single API key. + + Uses the OpenAI-compatible endpoint at ``https://openrouter.ai/api/v1``. + + Args: + api_key: OpenRouter API key. Falls back to OPENROUTER_API_KEY env var. + model: Model identifier (vendor/model format). Defaults to anthropic/claude-sonnet-4. + base_url: Override the OpenRouter API URL (rarely needed). + rate_limiter: Optional RateLimiter instance. + cost_tracker: Optional CostTracker instance. + http_referer: Optional site URL for OpenRouter rankings/leaderboards. + app_title: App name shown on OpenRouter dashboard. Defaults to "repowise". + """ + + def __init__( + self, + api_key: str | None = None, + model: str = "anthropic/claude-sonnet-4.6", + base_url: str = "https://openrouter.ai/api/v1", + rate_limiter: RateLimiter | None = None, + cost_tracker: "CostTracker | None" = None, + http_referer: str | None = None, + app_title: str = "repowise", + ) -> None: + resolved_key = api_key or os.environ.get("OPENROUTER_API_KEY") + if not resolved_key: + raise ProviderError( + "openrouter", + "No API key provided. Pass api_key= or set OPENROUTER_API_KEY.", + ) + + headers: dict[str, str] = {} + if http_referer: + headers["HTTP-Referer"] = http_referer + if app_title: + headers["X-Title"] = app_title + + self._client = AsyncOpenAI( + api_key=resolved_key, + base_url=base_url, + default_headers=headers or None, + ) + self._model = model + self._rate_limiter = rate_limiter + # Cost tracking disabled: OpenRouter proxies 200+ models with varying + # prices. The fallback pricing in cost_tracker would show inflated + # numbers. Users should check the OpenRouter dashboard for actual costs. + self._cost_tracker = None + + @property + def provider_name(self) -> str: + return "openrouter" + + @property + def model_name(self) -> str: + return self._model + + async def generate( + self, + system_prompt: str, + user_prompt: str, + max_tokens: int = 4096, + temperature: float = 0.3, + request_id: str | None = None, + ) -> GeneratedResponse: + if self._rate_limiter: + await self._rate_limiter.acquire(estimated_tokens=max_tokens) + + log.debug( + "openrouter.generate.start", + model=self._model, + max_tokens=max_tokens, + request_id=request_id, + ) + + try: + return await self._generate_with_retry( + system_prompt=system_prompt, + user_prompt=user_prompt, + max_tokens=max_tokens, + temperature=temperature, + request_id=request_id, + ) + except RetryError as exc: + raise ProviderError( + "openrouter", + f"All {_MAX_RETRIES} retries exhausted: {exc}", + ) from exc + + @retry( + retry=retry_if_exception_type(ProviderError), + stop=stop_after_attempt(_MAX_RETRIES), + wait=wait_exponential_jitter(initial=_MIN_WAIT, max=_MAX_WAIT), + reraise=True, + ) + async def _generate_with_retry( + self, + system_prompt: str, + user_prompt: str, + max_tokens: int, + temperature: float, + request_id: str | None, + ) -> GeneratedResponse: + try: + response = await self._client.chat.completions.create( + model=self._model, + max_completion_tokens=max_tokens, + temperature=temperature, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + ) + except _OpenAIRateLimitError as exc: + raise RateLimitError("openrouter", str(exc), status_code=429) from exc + except _OpenAIAPIStatusError as exc: + raise ProviderError( + "openrouter", str(exc), status_code=exc.status_code + ) from exc + + usage = response.usage + result = GeneratedResponse( + content=response.choices[0].message.content or "", + input_tokens=usage.prompt_tokens if usage else 0, + output_tokens=usage.completion_tokens if usage else 0, + cached_tokens=0, + usage={ + "prompt_tokens": usage.prompt_tokens if usage else 0, + "completion_tokens": usage.completion_tokens if usage else 0, + "total_tokens": usage.total_tokens if usage else 0, + }, + ) + log.debug( + "openrouter.generate.done", + input_tokens=result.input_tokens, + output_tokens=result.output_tokens, + request_id=request_id, + ) + + if self._cost_tracker is not None: + import asyncio + + try: + asyncio.get_event_loop().create_task( + self._cost_tracker.record( + model=self._model, + input_tokens=result.input_tokens, + output_tokens=result.output_tokens, + operation="doc_generation", + file_path=None, + ) + ) + except RuntimeError: + pass # No running event loop — skip async record + + return result + + # --- ChatProvider protocol implementation --- + + async def stream_chat( + self, + messages: list[dict[str, Any]], + tools: list[dict[str, Any]], + system_prompt: str, + max_tokens: int = 8192, + temperature: float = 0.7, + request_id: str | None = None, + tool_executor: Any | None = None, + ) -> AsyncIterator[ChatStreamEvent]: + import json as _json + + full_messages = [{"role": "system", "content": system_prompt}, *messages] + kwargs: dict[str, Any] = { + "model": self._model, + "max_completion_tokens": max_tokens, + "temperature": temperature, + "messages": full_messages, + "stream": True, + } + if tools: + kwargs["tools"] = tools + + try: + stream = await self._client.chat.completions.create(**kwargs) + except _OpenAIRateLimitError as exc: + raise RateLimitError("openrouter", str(exc), status_code=429) from exc + except _OpenAIAPIStatusError as exc: + raise ProviderError("openrouter", str(exc), status_code=exc.status_code) from exc + + # Track in-progress tool calls (OpenAI-compatible streaming) + tool_calls_acc: dict[int, dict[str, Any]] = {} + + try: + async for chunk in stream: + choice = chunk.choices[0] if chunk.choices else None + if not choice: + if chunk.usage: + yield ChatStreamEvent( + type="usage", + input_tokens=chunk.usage.prompt_tokens or 0, + output_tokens=chunk.usage.completion_tokens or 0, + ) + continue + + delta = choice.delta + finish = choice.finish_reason + + # Text content + if delta and delta.content: + yield ChatStreamEvent(type="text_delta", text=delta.content) + + # Tool call fragments + if delta and delta.tool_calls: + for tc_delta in delta.tool_calls: + idx = tc_delta.index + if idx not in tool_calls_acc: + tool_calls_acc[idx] = { + "id": tc_delta.id or "", + "name": "", + "arguments": "", + } + acc = tool_calls_acc[idx] + if tc_delta.id: + acc["id"] = tc_delta.id + if tc_delta.function: + if tc_delta.function.name: + acc["name"] = tc_delta.function.name + if tc_delta.function.arguments: + acc["arguments"] += tc_delta.function.arguments + + if finish: + # Emit accumulated tool calls + for idx in sorted(tool_calls_acc.keys()): + acc = tool_calls_acc[idx] + try: + args = _json.loads(acc["arguments"]) if acc["arguments"] else {} + except Exception: + args = {} + yield ChatStreamEvent( + type="tool_start", + tool_call=ChatToolCall( + id=acc["id"], + name=acc["name"], + arguments=args, + ), + ) + tool_calls_acc.clear() + + stop_reason = "tool_use" if finish == "tool_calls" else "end_turn" + yield ChatStreamEvent(type="stop", stop_reason=stop_reason) + except _OpenAIRateLimitError as exc: + raise RateLimitError("openrouter", str(exc), status_code=429) from exc + except _OpenAIAPIStatusError as exc: + raise ProviderError("openrouter", str(exc), status_code=exc.status_code) from exc diff --git a/packages/core/src/repowise/core/providers/llm/registry.py b/packages/core/src/repowise/core/providers/llm/registry.py index 48e07a7..46c3612 100644 --- a/packages/core/src/repowise/core/providers/llm/registry.py +++ b/packages/core/src/repowise/core/providers/llm/registry.py @@ -5,11 +5,12 @@ enabling community-contributed providers without forking repowise. Built-in providers: - - anthropic → AnthropicProvider - - openai → OpenAIProvider - - ollama → OllamaProvider - - litellm → LiteLLMProvider - - mock → MockProvider (testing only) + - anthropic → AnthropicProvider + - openai → OpenAIProvider + - openrouter → OpenRouterProvider + - ollama → OllamaProvider + - litellm → LiteLLMProvider + - mock → MockProvider (testing only) Custom provider registration: from repowise.core.providers import register_provider @@ -36,6 +37,7 @@ _BUILTIN_PROVIDERS: dict[str, tuple[str, str]] = { "anthropic": ("repowise.core.providers.llm.anthropic", "AnthropicProvider"), "openai": ("repowise.core.providers.llm.openai", "OpenAIProvider"), + "openrouter": ("repowise.core.providers.llm.openrouter", "OpenRouterProvider"), "gemini": ("repowise.core.providers.llm.gemini", "GeminiProvider"), "ollama": ("repowise.core.providers.llm.ollama", "OllamaProvider"), "litellm": ("repowise.core.providers.llm.litellm", "LiteLLMProvider"), @@ -134,6 +136,7 @@ def get_provider( "openai": "openai", "gemini": "google-genai", "ollama": "openai", # ollama uses the openai package + "openrouter": "openai", # openrouter uses the openai package "litellm": "litellm", } package = _missing.get(name, name) diff --git a/packages/core/src/repowise/core/rate_limiter.py b/packages/core/src/repowise/core/rate_limiter.py index 3612624..0e2c306 100644 --- a/packages/core/src/repowise/core/rate_limiter.py +++ b/packages/core/src/repowise/core/rate_limiter.py @@ -45,6 +45,7 @@ class RateLimitConfig: PROVIDER_DEFAULTS: dict[str, RateLimitConfig] = { "anthropic": RateLimitConfig(requests_per_minute=50, tokens_per_minute=100_000), "openai": RateLimitConfig(requests_per_minute=60, tokens_per_minute=150_000), + "openrouter": RateLimitConfig(requests_per_minute=60, tokens_per_minute=200_000), "gemini": RateLimitConfig(requests_per_minute=60, tokens_per_minute=1_000_000), # Ollama runs locally — effectively unlimited, but we cap to avoid OOM "ollama": RateLimitConfig(requests_per_minute=1_000, tokens_per_minute=10_000_000), diff --git a/packages/server/src/repowise/server/app.py b/packages/server/src/repowise/server/app.py index 14ef79f..2d7a6e8 100644 --- a/packages/server/src/repowise/server/app.py +++ b/packages/server/src/repowise/server/app.py @@ -56,9 +56,10 @@ def _build_embedder(): """Build an embedder from REPOWISE_EMBEDDER env var (default: mock). Supported values: - mock — deterministic 8-dim SHA-256 embedder (default, no API key needed) - gemini — GeminiEmbedder via GEMINI_API_KEY / GOOGLE_API_KEY env var - openai — OpenAIEmbedder via OPENAI_API_KEY env var + mock — deterministic 8-dim SHA-256 embedder (default, no API key needed) + gemini — GeminiEmbedder via GEMINI_API_KEY / GOOGLE_API_KEY env var + openai — OpenAIEmbedder via OPENAI_API_KEY env var + openrouter — OpenRouterEmbedder via OPENROUTER_API_KEY env var """ name = os.environ.get("REPOWISE_EMBEDDER", "mock").lower() if name == "gemini": @@ -71,7 +72,12 @@ def _build_embedder(): model = os.environ.get("REPOWISE_EMBEDDING_MODEL", "text-embedding-3-small") return OpenAIEmbedder(model=model) - logger.warning("embedder.mock_active — set REPOWISE_EMBEDDER=gemini or openai for real RAG") + if name == "openrouter": + from repowise.core.providers.embedding.openrouter import OpenRouterEmbedder + + model = os.environ.get("REPOWISE_EMBEDDING_MODEL", "google/gemini-embedding-001") + return OpenRouterEmbedder(model=model) + logger.warning("embedder.mock_active — set REPOWISE_EMBEDDER=gemini, openai, or openrouter for real RAG") return MockEmbedder() diff --git a/packages/server/src/repowise/server/provider_config.py b/packages/server/src/repowise/server/provider_config.py index 60f6c60..e429637 100644 --- a/packages/server/src/repowise/server/provider_config.py +++ b/packages/server/src/repowise/server/provider_config.py @@ -47,6 +47,19 @@ "env_keys": ["OPENAI_API_KEY"], "requires_key": True, }, + { + "id": "openrouter", + "name": "OpenRouter", + "default_model": "anthropic/claude-sonnet-4.6", + "models": [ + "anthropic/claude-sonnet-4.6", + "google/gemini-3.1-flash-lite-preview", + "meta-llama/llama-4-maverick", + "openai/gpt-4o", + ], + "env_keys": ["OPENROUTER_API_KEY"], + "requires_key": True, + }, { "id": "ollama", "name": "Ollama (Local)", diff --git a/tests/integration/test_provider_live.py b/tests/integration/test_provider_live.py index 2be4fb0..570165d 100644 --- a/tests/integration/test_provider_live.py +++ b/tests/integration/test_provider_live.py @@ -108,3 +108,33 @@ async def test_anthropic_live(model): print( f"\n[{model}] tokens: {result.input_tokens}in / {result.output_tokens}out | content: {result.content!r}" ) + + +# --------------------------------------------------------------------------- +# OpenRouter +# --------------------------------------------------------------------------- + +OPENROUTER_KEY = os.environ.get("OPENROUTER_API_KEY", "") + + +@pytest.mark.skipif(not OPENROUTER_KEY, reason="OPENROUTER_API_KEY not set") +@pytest.mark.parametrize( + "model", + ["anthropic/claude-sonnet-4.6", "google/gemini-3.1-flash-lite-preview"], +) +async def test_openrouter_live(model): + from repowise.core.providers.llm.openrouter import OpenRouterProvider + + provider = OpenRouterProvider(api_key=OPENROUTER_KEY, model=model) + result = await provider.generate( + system_prompt="You are a concise assistant.", + user_prompt="Reply with exactly: OK", + max_tokens=16, + ) + assert isinstance(result, GeneratedResponse) + assert result.content.strip() + assert result.input_tokens > 0 + assert result.output_tokens > 0 + print( + f"\n[{model}] tokens: {result.input_tokens}in / {result.output_tokens}out | content: {result.content!r}" + ) diff --git a/tests/providers/test_registry.py b/tests/providers/test_registry.py index 92a71f4..0f86085 100644 --- a/tests/providers/test_registry.py +++ b/tests/providers/test_registry.py @@ -23,6 +23,7 @@ def test_includes_all_builtin_providers(self) -> None: providers = list_providers() assert "anthropic" in providers assert "openai" in providers + assert "openrouter" in providers assert "ollama" in providers assert "litellm" in providers assert "mock" in providers @@ -114,5 +115,5 @@ def factory(**kw: object) -> MockProvider: assert received.get("api_key") == "key-123" def test_builtin_count(self) -> None: - """Sanity check: we have exactly 6 built-in providers.""" - assert len(_BUILTIN_PROVIDERS) == 6 + """Sanity check: we have exactly 7 built-in providers.""" + assert len(_BUILTIN_PROVIDERS) == 7 diff --git a/tests/unit/test_providers/test_openrouter_provider.py b/tests/unit/test_providers/test_openrouter_provider.py new file mode 100644 index 0000000..47cf346 --- /dev/null +++ b/tests/unit/test_providers/test_openrouter_provider.py @@ -0,0 +1,176 @@ +"""Unit tests for OpenRouterProvider. + +All tests mock the AsyncOpenAI client — no real API calls are made. +""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +pytest.importorskip("openai", reason="openai SDK not installed") + +from repowise.core.providers.llm.base import GeneratedResponse, ProviderError, RateLimitError +from repowise.core.providers.llm.openrouter import OpenRouterProvider + +# --------------------------------------------------------------------------- +# Construction +# --------------------------------------------------------------------------- + + +def test_provider_name(): + p = OpenRouterProvider(api_key="sk-or-test") + assert p.provider_name == "openrouter" + + +def test_default_model(): + p = OpenRouterProvider(api_key="sk-or-test") + assert p.model_name == "anthropic/claude-sonnet-4.6" + + +def test_api_key_from_env(monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-env-test") + p = OpenRouterProvider() + assert p.provider_name == "openrouter" + + +def test_missing_api_key_raises(monkeypatch): + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + with pytest.raises(ProviderError): + OpenRouterProvider() + + +def test_custom_model(): + p = OpenRouterProvider(api_key="sk-or-test", model="google/gemini-3.1-flash-lite-preview") + assert p.model_name == "google/gemini-3.1-flash-lite-preview" + + +def test_default_headers_app_title(): + """Default app_title='repowise' sets X-Title header.""" + p = OpenRouterProvider(api_key="sk-or-test") + headers = p._client._custom_headers + assert headers.get("X-Title") == "repowise" + + +def test_default_headers_with_referer(): + """When http_referer is provided, HTTP-Referer header is set.""" + p = OpenRouterProvider(api_key="sk-or-test", http_referer="https://example.com") + headers = p._client._custom_headers + assert headers.get("HTTP-Referer") == "https://example.com" + assert headers.get("X-Title") == "repowise" + + +def test_no_headers_when_empty(): + """When app_title is empty and no referer, no custom headers.""" + p = OpenRouterProvider(api_key="sk-or-test", app_title="") + # default_headers should be None → no custom headers set + headers = p._client._custom_headers + assert not headers.get("X-Title") + + +# --------------------------------------------------------------------------- +# Successful generation +# --------------------------------------------------------------------------- + + +def _make_mock_chat_response(text: str = "# Doc\nContent.") -> MagicMock: + usage = MagicMock() + usage.prompt_tokens = 120 + usage.completion_tokens = 60 + usage.total_tokens = 180 + + choice = MagicMock() + choice.message.content = text + + response = MagicMock() + response.choices = [choice] + response.usage = usage + return response + + +async def test_generate_returns_generated_response(): + provider = OpenRouterProvider(api_key="sk-or-test") + mock_response = _make_mock_chat_response("Hello from OpenRouter") + + with patch("openai.AsyncOpenAI") as mock_client: + mock_client.return_value.chat.completions.create = AsyncMock(return_value=mock_response) + provider._client = mock_client.return_value + result = await provider.generate("sys", "user") + + assert isinstance(result, GeneratedResponse) + assert result.content == "Hello from OpenRouter" + + +async def test_generate_token_counts(): + provider = OpenRouterProvider(api_key="sk-or-test") + mock_response = _make_mock_chat_response() + + with patch("openai.AsyncOpenAI") as mock_client: + mock_client.return_value.chat.completions.create = AsyncMock(return_value=mock_response) + provider._client = mock_client.return_value + result = await provider.generate("sys", "user") + + assert result.input_tokens == 120 + assert result.output_tokens == 60 + assert result.cached_tokens == 0 + + +async def test_generate_sends_correct_messages(): + provider = OpenRouterProvider(api_key="sk-or-test", model="google/gemini-3.1-flash-lite-preview") + mock_response = _make_mock_chat_response() + captured_kwargs: list[dict] = [] + + async def fake_create(**kwargs): + captured_kwargs.append(kwargs) + return mock_response + + with patch("openai.AsyncOpenAI") as mock_client: + mock_client.return_value.chat.completions.create = fake_create + provider._client = mock_client.return_value + await provider.generate("system msg", "user msg", max_tokens=2048, temperature=0.5) + + kw = captured_kwargs[0] + assert kw["model"] == "google/gemini-3.1-flash-lite-preview" + assert kw["max_completion_tokens"] == 2048 + assert kw["temperature"] == 0.5 + messages = kw["messages"] + assert messages[0] == {"role": "system", "content": "system msg"} + assert messages[1] == {"role": "user", "content": "user msg"} + + +# --------------------------------------------------------------------------- +# Error mapping +# --------------------------------------------------------------------------- + + +async def test_rate_limit_error(): + from openai import RateLimitError as _OpenAIRateLimitError + + provider = OpenRouterProvider(api_key="sk-or-test") + + with patch("openai.AsyncOpenAI") as mock_client: + mock_client.return_value.chat.completions.create = AsyncMock( + side_effect=_OpenAIRateLimitError( + "rate limit", response=MagicMock(status_code=429), body={} + ) + ) + provider._client = mock_client.return_value + with pytest.raises(RateLimitError): + await provider.generate("sys", "user") + + +async def test_api_status_error(): + from openai import APIStatusError as _OpenAIAPIStatusError + + provider = OpenRouterProvider(api_key="sk-or-test") + + with patch("openai.AsyncOpenAI") as mock_client: + mock_client.return_value.chat.completions.create = AsyncMock( + side_effect=_OpenAIAPIStatusError( + "server error", response=MagicMock(status_code=500), body={} + ) + ) + provider._client = mock_client.return_value + with pytest.raises(ProviderError): + await provider.generate("sys", "user")