diff --git a/content/docs/meta.json b/content/docs/meta.json index fe51841..95f05fa 100644 --- a/content/docs/meta.json +++ b/content/docs/meta.json @@ -24,7 +24,9 @@ "evals", "rl", "sft", + "---Integrations---", + "opik", "---Contributing---", "contributing" ] -} \ No newline at end of file +} diff --git a/content/docs/opik.mdx b/content/docs/opik.mdx new file mode 100644 index 0000000..e0abe20 --- /dev/null +++ b/content/docs/opik.mdx @@ -0,0 +1,108 @@ +--- +title: Opik +description: LLM observability and evaluation for Harbor trials +--- + +[Opik](https://github.com/comet-ml/opik) is an open-source LLM observability and evaluation platform by [Comet](https://www.comet.com/site/). It lets you debug, evaluate, and monitor LLM applications with comprehensive tracing, automated evaluations, and production-ready dashboards. Harbor integrates with Opik to log traces for all trial executions. + +![Opik experiments tab](/harbor-opik-experiments.png) + +Opik logs the following from Harbor trials: + +- **Experiments** for each Harbor job allowing you to group and compare runs across different agents, models, or configurations +- **Trial results** as Opik traces with timing, metadata, and feedback scores from verifier rewards +- **Trajectory steps** as nested spans showing the complete agent-environment interaction +- **Tool calls and observations** as detailed execution records +- **Token usage and costs** aggregated from ATIF metrics + +## Installation + +Install the `opik` package alongside Harbor: + +```bash tab="uv" +uv pip install opik +``` + +```bash tab="pip" +pip install opik +``` + +## Configuration + +Create a free [Comet account](https://www.comet.com/signup?from=llm&utm_source=opik&utm_medium=colab&utm_content=harbor&utm_campaign=opik) and grab your API key, or run Opik locally using the [self-hosting guide](https://www.comet.com/docs/opik/self-host/overview/). + +Configure the Opik SDK using the CLI: + +```bash +opik configure +``` + +Or configure in code: + +```python +import opik +opik.configure() +``` + +## CLI Usage + +The easiest way to use Harbor with Opik is through the `opik harbor` CLI command. This automatically enables Opik tracking for all trial executions. All standard Harbor commands are available as subcommands. + +```bash +# Run a benchmark with Opik tracking +opik harbor run -d terminal-bench@head -a terminus_2 -m gpt-4.1 + +# Use a configuration file +opik harbor run -c config.yaml +``` + +See the [Configuration](#configuration) section for additional options like setting the project name via environment variables. + +Once you run the command, you can track your Harbor run in real-time in the Opik experiments tab. Each trial is logged as a trace with timing, metadata, and verifier rewards as feedback scores. Trajectory steps appear as nested spans with tool calls, observations, and token usage. You can then compare runs across different agents or models to identify what drives differences in trial results. + +![Opik trace view](/harbor-opik-integration.png) + +## Custom Agent Tracking + +When building [custom agents](/docs/agents), you can use Opik's `@track` decorator on methods within your agent implementation. These decorated functions will automatically be captured as spans within the trial trace: + +```python +from harbor.agents.base import BaseAgent +from opik import track + +class MyCustomAgent(BaseAgent): + @staticmethod + def name() -> str: + return "my-custom-agent" + + @track + async def plan_next_action(self, observation: str) -> str: + # This function will appear as a span in Opik + return action + + @track + async def execute_tool(self, tool_name: str, args: dict) -> str: + # This will also be tracked as a nested span + result = await self._run_tool(tool_name, args) + return result + + async def run(self, instruction: str, environment, context) -> None: + # Your main agent loop + while not done: + observation = await environment.exec("pwd") + action = await self.plan_next_action(observation) + result = await self.execute_tool(action.tool, action.args) +``` + +## Environment Variables + +| Variable | Description | +|----------|-------------| +| `OPIK_PROJECT_NAME` | Default project name for traces | +| `OPIK_API_KEY` | API key for Opik Cloud | +| `OPIK_WORKSPACE` | Workspace name (for Opik Cloud) | + + +If you have questions about the Opik integration or want to report an issue, please open an issue on the [Opik GitHub repository](https://github.com/comet-ml/opik/issues). + + diff --git a/package-lock.json b/package-lock.json index b5146e9..fb1fe5b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -30,7 +30,7 @@ "@types/react": "^19.2.2", "@types/react-dom": "^19.2.2", "eslint": "^9.38.0", - "eslint-config-next": "16.0.1", + "eslint-config-next": "^16.0.8", "postcss": "^8.5.6", "tailwindcss": "^4.1.16", "tw-animate-css": "^1.4.0", @@ -1517,9 +1517,9 @@ "license": "MIT" }, "node_modules/@next/eslint-plugin-next": { - "version": "16.0.1", - "resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-16.0.1.tgz", - "integrity": "sha512-g4Cqmv/gyFEXNeVB2HkqDlYKfy+YrlM2k8AVIO/YQVEPfhVruH1VA99uT1zELLnPLIeOnx8IZ6Ddso0asfTIdw==", + "version": "16.0.8", + "resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-16.0.8.tgz", + "integrity": "sha512-1miV0qXDcLUaOdHridVPCh4i39ElRIAraseVIbb3BEqyZ5ol9sPyjTP/GNTPV5rBxqxjF6/vv5zQTVbhiNaLqA==", "dev": true, "license": "MIT", "dependencies": { @@ -4831,13 +4831,13 @@ } }, "node_modules/eslint-config-next": { - "version": "16.0.1", - "resolved": "https://registry.npmjs.org/eslint-config-next/-/eslint-config-next-16.0.1.tgz", - "integrity": "sha512-wNuHw5gNOxwLUvpg0cu6IL0crrVC9hAwdS/7UwleNkwyaMiWIOAwf8yzXVqBBzL3c9A7jVRngJxjoSpPP1aEhg==", + "version": "16.0.8", + "resolved": "https://registry.npmjs.org/eslint-config-next/-/eslint-config-next-16.0.8.tgz", + "integrity": "sha512-8J5cOAboXIV3f8OD6BOyj7Fik6n/as7J4MboiUSExWruf/lCu1OPR3ZVSdnta6WhzebrmAATEmNSBZsLWA6kbg==", "dev": true, "license": "MIT", "dependencies": { - "@next/eslint-plugin-next": "16.0.1", + "@next/eslint-plugin-next": "16.0.8", "eslint-import-resolver-node": "^0.3.6", "eslint-import-resolver-typescript": "^3.5.2", "eslint-plugin-import": "^2.32.0", diff --git a/package.json b/package.json index 99801b4..be96638 100644 --- a/package.json +++ b/package.json @@ -31,7 +31,7 @@ "@types/react": "^19.2.2", "@types/react-dom": "^19.2.2", "eslint": "^9.38.0", - "eslint-config-next": "16.0.1", + "eslint-config-next": "^16.0.8", "postcss": "^8.5.6", "tailwindcss": "^4.1.16", "tw-animate-css": "^1.4.0", diff --git a/public/harbor-opik-experiments.png b/public/harbor-opik-experiments.png new file mode 100644 index 0000000..cd7a1b3 Binary files /dev/null and b/public/harbor-opik-experiments.png differ diff --git a/public/harbor-opik-integration.png b/public/harbor-opik-integration.png new file mode 100644 index 0000000..23e98b0 Binary files /dev/null and b/public/harbor-opik-integration.png differ