diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..8416e3a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,62 @@ +name: Bug report +description: Report a reproducible bug in the CLI, backend, testkit, or infra docs. +title: "bug: " +labels: + - bug +body: + - type: markdown + attributes: + value: | + Thanks for the report. Please include a minimal reproducer so we can get to a fix quickly. + + - type: textarea + id: summary + attributes: + label: What happened? + description: What did you expect, and what happened instead? + placeholder: "`expert ask` hangs after the first SSE chunk..." + validations: + required: true + + - type: textarea + id: steps + attributes: + label: Reproduction + description: Smallest set of steps, files, and commands that reproduces the bug. + placeholder: | + 1. `expert init demo` + 2. `cd demo` + 3. `expert validate` + validations: + required: true + + - type: textarea + id: logs + attributes: + label: Relevant output + description: Paste traceback, stderr, screenshots, or logs. + render: shell + + - type: input + id: version + attributes: + label: expert version + placeholder: "expert --version" + validations: + required: true + + - type: input + id: python + attributes: + label: Python version + placeholder: "python --version" + validations: + required: true + + - type: input + id: os + attributes: + label: OS + placeholder: "macOS 15 / Ubuntu 24.04 / Windows 11" + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..a1d3085 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: false +contact_links: + - name: Security disclosure + url: https://github.com/feliperbroering/expert-agent/security/advisories/new + about: Please report vulnerabilities privately, not via public issues. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..9a726dd --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,40 @@ +name: Feature request +description: Pitch a new capability or workflow improvement. +title: "feat: " +labels: + - enhancement +body: + - type: markdown + attributes: + value: | + Please lead with the user problem, not the implementation detail. + + - type: textarea + id: problem + attributes: + label: Problem to solve + description: Who is blocked today, and by what? + placeholder: "As someone curating multiple private agents, I want..." + validations: + required: true + + - type: textarea + id: proposal + attributes: + label: Proposed shape + description: CLI flag, new schema field, backend endpoint, docs flow, etc. + placeholder: "`expert deploy --agent cardiology` ..." + validations: + required: true + + - type: textarea + id: alternatives + attributes: + label: Alternatives considered + description: What else did you try or think about? + + - type: textarea + id: context + attributes: + label: Extra context + description: Links, screenshots, prior art, related issues. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..cd230d0 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,22 @@ +## Summary + +- + +## Why + +- + +## Test plan + +- [ ] `uv run ruff check .` +- [ ] `uv run ruff format .` +- [ ] `uv run mypy backend cli` +- [ ] `uv run pytest` +- [ ] `expert test ...` (if CLI / HTTP contract changed) + +## Checklist + +- [ ] Conventional Commit(s) +- [ ] Docs updated (`README.md`, `docs/`, or examples) if needed +- [ ] No unrelated refactors +- [ ] Any breaking change is called out explicitly diff --git a/.github/workflows/expert-e2e.yml b/.github/workflows/expert-e2e.yml index 45592c1..e1ba508 100644 --- a/.github/workflows/expert-e2e.yml +++ b/.github/workflows/expert-e2e.yml @@ -7,7 +7,7 @@ name: expert-e2e (reusable) # e2e: # uses: feliperbroering/expert-agent/.github/workflows/expert-e2e.yml@main # with: -# schema: ecg-expert/agent_schema.yaml +# schema: my-expert/agent_schema.yaml # suite: 05_ask_latency # secrets: # endpoint: ${{ secrets.EXPERT_AGENT_ENDPOINT }} @@ -25,6 +25,13 @@ on: required: false type: string default: agent_schema.yaml + agent: + description: >- + Agent name (as declared in expert.toml or the name of a sibling + dir containing agent_schema.yaml). Omit for single-agent repos. + required: false + type: string + default: "" suite: description: 'Robot suite stem to run (e.g. "05_ask_latency"). Omit for all.' required: false @@ -102,6 +109,9 @@ jobs: id: test run: | args=() + if [ -n "${{ inputs.agent }}" ]; then + args+=(--agent "${{ inputs.agent }}") + fi if [ -n "${{ inputs.suite }}" ]; then args+=(--suite "${{ inputs.suite }}") fi diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..37d8c78 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,136 @@ +# AGENTS.md — contract for AI contributors + +This document is the short version of [CONTRIBUTING.md](./CONTRIBUTING.md) tuned for AI coding agents (Cursor, Claude Code, Codex, Aider, Cline, Continue, and friends). Reading this in full before proposing changes will save you a round of review. + +--- + +## Project mental model + +`expert-agent` is a framework for shipping **ultra-specialist AI agents** — declarative YAML spec + corpus of reference docs → deployable API on Google Cloud Run. Three layers: + +1. **`backend/`** — FastAPI app (`app.main:app`). Stateless containers. State lives in GCS + Firestore + Chroma. +2. **`cli/`** — `expert` (Typer + Rich). Scaffold, validate, sync, ask, manage sessions, run E2E tests. **Workspace-aware** (multi-agent via `expert.toml`). +3. **`infra/`** — OpenTofu stacks (platform, chroma, agent). Per-project shared infra + per-agent Cloud Run service. + +Ground rule: **the backend is stateless**. Every new feature must survive `min=0` scale-to-zero. If you need state, put it in GCS or Firestore. + +--- + +## Non-negotiables + +- **No emoji in source code.** Ever. Use `rich` colors and Unicode box-drawing. See `cli/expert/ui.py` + `cli/expert/brand.py` for the visual identity. +- **Type hints everywhere.** `mypy --strict` must stay green on `backend/` and `cli/`. +- **Lint + format must pass.** `ruff check .` + `ruff format .`. +- **Tests must pass.** `pytest` green locally *and* in CI. Add tests for every behaviour change. +- **Conventional Commits.** `feat(scope): ...`, `fix(scope): ...`, etc. Release-please reads these. +- **No breaking API changes on `main`** without a migration note in the PR description. + +--- + +## Where to put things + +| Change | Goes in | +|-----------------------------------------------------|-----------------------------------------------------------------------------| +| New CLI command | `cli/expert/commands/.py` + register in `cli/expert/main.py` | +| New backend endpoint | `backend/app/routes/.py` + mount in `backend/app/main.py` | +| New schema field | `backend/app/schema.py` (pydantic) + update `example-schema/` sample | +| New UI helper | `cli/expert/ui.py` (follow the existing API shape: `print_*`) | +| New infra resource | Right `infra//` — `platform` (shared), `chroma` (shared), `agent` (per-agent) | +| New E2E test | `cli/expert/testkit/suites/NN_.robot` + keywords in `.resource` | +| New multi-agent resolution rule | `cli/expert/workspace.py::Workspace.resolve` (document the precedence!) | + +--- + +## The visual identity + +The brand wordmark + tagline live in `cli/expert/brand.py`. Do **not** touch the ASCII art without approval — it's shared with the author's other open-source CLIs (`feliperbroering/eai`) and exists to create a coherent family look. + +UI conventions for any user-facing text: + +```text +✓ success → print_success("message") +✗ error → print_error("message") +⚠ warning → print_warning("message") +› neutral info → print_info("message") +▶ streamed output → reserved for assistant output in `expert ask` +``` + +Never invent new glyphs without updating `cli/expert/ui.py` and its docstring. + +--- + +## Before opening a PR + +Run this locally. It's what CI runs. If any line fails, fix it before pushing: + +```bash +source .venv/bin/activate +uv run ruff check . && uv run ruff format . +uv run mypy backend cli +uv run pytest -q +``` + +### Writing good CLI tests + +- Use `typer.testing.CliRunner()` (see `cli/tests/test_main_alias.py` for the canonical pattern). +- **Don't pin on glyphs.** Assert on stable strings like `"name must match"` — the `✗` prefix is a skin, not an API. +- For workspace tests, build minimal `expert.toml` + `agent_schema.yaml` in `tmp_path`. + +### Writing good backend tests + +- Use `pytest-asyncio` (auto mode) + `respx` for HTTP stubbing. +- Firestore is mocked via `mock-firestore`. Do not hit real Google APIs in tests. +- Every new `/route` gets at least: auth test, happy path, one error path. + +--- + +## Things that will get your PR rejected + +- Adding a dependency without justifying it in the PR description. +- Introducing state outside GCS / Firestore / Chroma (e.g. in-memory caches that assume a single replica). +- Silencing `mypy` with `# type: ignore` without a comment explaining why. +- Reformatting unrelated code. +- Commits that are not Conventional Commits. +- Breaking `ruff` (lint *or* format) without documented reason. +- Copying the ASCII brand into other files — it's re-exported from `cli/expert/brand.py` precisely so we change it in one place. + +--- + +## Multi-agent workspaces — the part you'll probably touch + +A single repo can host many agents. Resolution precedence (first match wins): + +1. `--agent ` flag +2. `@` positional shortcut (rewritten to `--agent` by `_rewrite_at_alias` in `main.py`) +3. `EXPERT_AGENT` env var +4. `expert use ` pin (written to `.expert/state.json`) +5. `default_agent` in `expert.toml` +6. Single-agent short-circuit (workspace has exactly one agent) +7. `--schema ` overrides everything (legacy bypass for `expert validate` etc.) + +If you add a new command that needs to target an agent: + +```python +from ..context import resolve_context + +def cmd( + agent: Annotated[str | None, typer.Option("--agent", "-a", ...)] = None, + # other flags +) -> None: + ctx = resolve_context(selector=agent, ...) + # ctx.name, ctx.schema_path, ctx.endpoint, ctx.api_key are all filled in +``` + +Don't roll your own resolution logic. + +--- + +## License + +By contributing, you agree your work is released under the [MIT License](./LICENSE). The CLA is: **open a PR, you've agreed**. Nothing to sign. + +--- + +## When in doubt + +Open a draft PR or file an issue. Showing intent beats writing the wrong thing twice. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..50f30a3 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,129 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +- Demonstrating empathy and kindness toward other people +- Being respectful of differing opinions, viewpoints, and experiences +- Giving and gracefully accepting constructive feedback +- Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +- Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +- The use of sexualized language or imagery, and sexual attention or advances + of any kind +- Trolling, insulting or derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or email address, + without their explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for +moderation decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official email address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +[hi@felipe.run](mailto:hi@felipe.run). All complaints will be reviewed and +investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html). + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity). + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq](https://www.contributor-covenant.org/faq). +Translations are available at +[https://www.contributor-covenant.org/translations](https://www.contributor-covenant.org/translations). + +[homepage]: https://www.contributor-covenant.org diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..5209c20 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,139 @@ +# Contributing to expert-agent + +Thanks for taking the time to contribute. This project is MIT-licensed and run in public — every patch, typo fix, and review comment is genuinely welcome. + +If you're an AI coding agent (Cursor, Claude Code, Codex, Aider, etc.) **read [AGENTS.md](./AGENTS.md) first** — it's the short version of this document tuned for AI collaborators. + +--- + +## Before you open a PR + +1. **Check the open issues and PRs** for duplicates. If something is already in flight, ping there instead of forking a parallel effort. +2. **For non-trivial changes, open an issue first.** A two-line "I'd like to implement X, is it welcome?" saves everyone time. +3. **Keep PRs focused.** One feature or one fix per PR. Refactors are welcome but ship them separately from behaviour changes. + +--- + +## Local setup + +Requires Python 3.12+, [`uv`](https://docs.astral.sh/uv/), and (optionally) Docker + OpenTofu if you're touching infra. + +```bash +git clone https://github.com/feliperbroering/expert-agent +cd expert-agent + +uv venv --python 3.12 +source .venv/bin/activate +uv pip install -e ".[dev,test,vertex,otel]" + +# quick smoke test +expert --version +expert --help +``` + +--- + +## Checks that must pass + +The CI runs exactly these three commands. Run them locally before pushing: + +```bash +uv run ruff check . # lint — use `--fix` for auto-fixes +uv run ruff format . # formatter +uv run mypy backend cli # strict type checks +uv run pytest # unit + integration tests +``` + +Target coverage: **85%+** on backend, **90%+** on CLI. New code must include tests. + +### End-to-end + +If you're changing user-facing CLI behaviour or the HTTP contract, run the Robot Framework suite against a local or staging agent: + +```bash +expert test --suite 01_validate # offline suites, no endpoint needed +expert test --endpoint http://... --api-key ... --suite 04_deploy +``` + +--- + +## Code style + +- **No emoji in source code.** Visual cues come from `rich` colors and Unicode box-drawing (see `cli/expert/ui.py`). +- **Docstrings over comments.** Functions and classes get docstrings; inline comments only explain *why*, not *what*. +- **Type hints everywhere.** `mypy --strict` is non-negotiable on the backend and CLI. +- **Prefer explicit over clever.** The project is a library people read; optimise for clarity. +- **Conventional Commits.** Every commit subject follows [`(): `](https://www.conventionalcommits.org/). Allowed types: `feat`, `fix`, `docs`, `chore`, `refactor`, `test`, `style`, `perf`, `build`, `ci`. + +Releases are automated — [release-please](https://github.com/googleapis/release-please) reads Conventional Commits on `main` and opens version-bump PRs. + +--- + +## What lives where + +``` +backend/app/ FastAPI app code +backend/tests/ backend unit + integration tests +cli/expert/ the `expert` Typer app +cli/expert/testkit/ Robot Framework suites shipped inside the wheel +cli/tests/ CLI unit tests +example-schema/ annotated sample AgentSchema + identity.md +infra/ OpenTofu stacks (platform / chroma / agent) +scripts/ one-off bootstrap + migration scripts +docs/ PRIVATE_AGENT_REPO, AGENT_E2E_SETUP +.github/workflows/ ci, release-please, deploy, expert-e2e (reusable) +``` + +When you touch one layer, stay in it. Cross-cutting refactors (e.g. renaming a pydantic field used by backend + CLI) are fine but should update *both* in the same PR. + +--- + +## Tests in the CLI — important patterns + +- Use `typer.testing.CliRunner()` for integration tests (see `cli/tests/test_main_alias.py`). +- Avoid asserting on colored/glyph-decorated output text — pin to the **stable** part of the message (e.g. `"name must match"` instead of the `✗` glyph). See `cli/tests/test_init.py` for the pattern. +- For workspace-dependent tests, use the `tmp_path` fixture and build minimal `expert.toml` / `agent_schema.yaml` files inline. + +--- + +## Filing a good bug report + +Please include: + +- `expert --version` +- Python version (`python --version`) +- Minimal reproducer (schema + command + expected vs actual) +- Relevant traceback, trimmed +- OS + +A template is provided at [`.github/ISSUE_TEMPLATE/bug_report.yml`](./.github/ISSUE_TEMPLATE/bug_report.yml). + +--- + +## Filing a good feature request + +Please describe: + +- The problem (user story) — *"as a curator of X, I want to Y so that Z"* +- The shape of the solution you'd expect (CLI flag? new schema field? new endpoint?) +- Alternatives you considered + +A template is provided at [`.github/ISSUE_TEMPLATE/feature_request.yml`](./.github/ISSUE_TEMPLATE/feature_request.yml). + +--- + +## Security + +If you think you've found a vulnerability, **do not open a public issue.** Follow the private disclosure process in [SECURITY.md](./SECURITY.md). + +--- + +## Code of Conduct + +Participation in this project is governed by the [Contributor Covenant v2.1](./CODE_OF_CONDUCT.md). In short: be kind, assume good faith, and don't make it weird. + +--- + +## License + +By contributing, you agree that your contribution is licensed under the [MIT License](./LICENSE). diff --git a/LICENSE b/LICENSE index 1185446..45a0d31 100644 --- a/LICENSE +++ b/LICENSE @@ -1,201 +1,21 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2026 Felipe Broering - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +MIT License + +Copyright (c) 2026 Felipe Broering + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 17e9896..b1c5d6b 100644 --- a/README.md +++ b/README.md @@ -1,116 +1,115 @@ -# expert-agent +
-> **NotebookLM as an API.** Open-source framework for ultra-specialist AI agents -> grounded in a curated knowledge base, powered by Gemini long-context + Context -> Caching, with multi-layer persistent memory. +``` + ███████╗██╗ ██╗██████╗ ███████╗██████╗ ████████╗ + ██╔════╝╚██╗██╔╝██╔══██╗██╔════╝██╔══██╗╚══██╔══╝ ╭───╮ + █████╗ ╚███╔╝ ██████╔╝█████╗ ██████╔╝ ██║ │ ≡ │ + ██╔══╝ ██╔██╗ ██╔═══╝ ██╔══╝ ██╔══██╗ ██║ ╰───╯ + ███████╗██╔╝ ██╗██║ ███████╗██║ ██║ ██║ + ╚══════╝╚═╝ ╚═╝╚═╝ ╚══════╝╚═╝ ╚═╝ ╚═╝ +``` + +**ground a model on your docs. ship it as an API.** + +declarative ultra-specialist agents on Cloud Run — Gemini long-context, Context Cache, persistent memory. [![CI](https://github.com/feliperbroering/expert-agent/actions/workflows/ci.yml/badge.svg)](https://github.com/feliperbroering/expert-agent/actions/workflows/ci.yml) -[![License: Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](./LICENSE) +[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](./LICENSE) [![Python 3.12+](https://img.shields.io/badge/python-3.12%2B-blue.svg)](https://www.python.org/) +[![Status: alpha](https://img.shields.io/badge/status-alpha-orange.svg)](#status) -> **Status — alpha.** End-to-end production deploy validated on Google Cloud -> Run (FastAPI backend + Chroma HTTP + Firestore + GCS). API surface and -> schema are still subject to breaking changes until `v1.0`. +
--- -## What you get +```bash +# 1. scaffold an agent +expert init cardio-expert -You define an agent declaratively: +# 2. drop your corpus +cp ~/papers/*.pdf cardio-expert/docs/ -1. A **system prompt** (the agent's identity and behaviour). -2. A **directory of reference documents** (`.md`, `.pdf`, `.txt`). -3. A **YAML schema** (`agent_schema.yaml`) wiring the two together. +# 3. validate + size the context cache +expert validate && expert count-tokens -…and `expert-agent` gives you a deployable Cloud Run service exposing: +# 4. deploy once, ask forever +expert sync && expert ask "qual fórmula de correção do QTc a AHA recomenda?" +``` -- **`/ask`** — streaming Q&A grounded in the corpus, with optional citations. -- **`/docs/sync`** — incremental upload of the knowledge base (SHA-keyed). -- **`/sessions/...`** — short-term conversational memory (LGPD/GDPR friendly). -- **`/memory/...`** — long-term semantic recall (verbatim, not summarised). -- **`/health`** + **`/ready`** — liveness + dependency probes. +You define the agent declaratively — a system prompt, a directory of reference documents, and a YAML schema. `expert-agent` gives you back a deployable API with grounded answers, citations, long-term memory, and LGPD-friendly session controls. -A Python CLI (`expert`) handles scaffolding, validation, sync, and -ad-hoc queries against any deployed agent. +> [!NOTE] +> **Status — alpha.** End-to-end production deploy validated on Google Cloud Run (FastAPI + Chroma HTTP + Firestore + GCS). API surface and schema are still subject to breaking changes until `v1.0`. --- -## Architecture +## What you get -``` - client (CLI / HTTP) - │ - ▼ - ┌──────────────────────────────┐ - │ agent (Cloud Run, FastAPI) │ ◀── reads agent_schema.yaml - │ ├─ /ask (SSE) │ from gs://docs-bucket//schema/ - │ ├─ /docs/sync │ - │ ├─ /sessions /memory │ - │ └─ /health /ready │ - └────┬───────────┬─────────┬───┘ - │ │ │ - ▼ ▼ ▼ - Gemini API Firestore Chroma HTTP (Cloud Run, min=1) - (Context (sessions ├─ shared per project - Cache) + state) └─ persisted via GCS FUSE → gs://memory/chroma - ▲ - │ File API mirror - │ - GCS (durable source of truth) - ├─ gs://docs/// knowledge base - ├─ gs://docs//_state/sync_manifest.json - ├─ gs://docs//schema/... schema + prompts - └─ gs://memory// long-term memory snapshots -``` +Out of the box, your deployed agent exposes: -**Key design choices** ([deeper notes in `infra/README.md`](./infra/README.md)): +- **`POST /ask`** — streaming Q&A grounded in your corpus, with optional citations +- **`POST /docs/sync`** — incremental upload of the knowledge base (SHA-keyed) +- **`GET/DELETE /sessions/...`** — short-term conversational memory (LGPD/GDPR) +- **`POST /memory/...`** — long-term verbatim recall (not summaries) +- **`GET /health` / `/ready`** — liveness + dependency probes -- **Stateless agent containers.** All state lives in GCS or Firestore. Cloud - Run can scale to zero and back without losing context. -- **Context Cache as the grounding source.** Documents go into a Gemini - Context Cache built once per knowledge-base SHA; subsequent `/ask` calls - reuse it (`cached_tokens ≈ input_tokens` in steady state). -- **Multi-layer memory.** Firestore holds the last N turns of conversation - (short-term) plus a verbatim recall index (long-term, indexed in Chroma - via [MemPalace](https://pypi.org/project/mempalace/)). -- **One Chroma HTTP server per project** (Cloud Run, `min=max=1`, GCS FUSE - for persistence) — shared across every agent in the project. +A Python CLI (`expert`) handles scaffolding, validation, sync, ad-hoc queries, multi-agent workspace management, and a ready-to-run Robot Framework E2E kit. --- ## Quick start -### 1. Bootstrap a GCP project (one-time) +### Install ```bash -PROJECT_ID=my-agents-prod -REGION=us-central1 +# uv (recommended — single static binary experience) +uv tool install "git+https://github.com/feliperbroering/expert-agent.git" +# or pipx +pipx install "git+https://github.com/feliperbroering/expert-agent.git" +``` + +Verify: + +```bash +expert --version +``` + +### Scaffold your first agent + +```bash +expert init my-expert +cd my-expert +$EDITOR prompts/identity.md # define behaviour +cp ~/your-corpus/*.pdf docs/ # drop in your reference material +expert validate # schema contract check +expert count-tokens # size the context cache +``` + +### Deploy to Google Cloud + +One-time project bootstrap: + +```bash +PROJECT_ID=my-agents-prod REGION=us-central1 gcloud auth login gcloud auth application-default login gcloud config set project "$PROJECT_ID" -# Enables APIs, creates tfstate bucket, Artifact Registry, Firestore, -# and the empty `gemini-api-key` secret. Idempotent. ./scripts/bootstrap-project.sh "$PROJECT_ID" "$REGION" -# Inject your Gemini API key (get one at https://aistudio.google.com/apikey). echo -n "YOUR_GEMINI_KEY" | \ gcloud secrets versions add gemini-api-key --data-file=- --project="$PROJECT_ID" ``` -### 2. Apply the shared platform stacks +Apply the shared infra (runs per project, not per agent): ```bash -cd infra/platform -tofu init -backend-config="bucket=${PROJECT_ID}-tfstate" -tofu apply -var="project_id=${PROJECT_ID}" -var="region=${REGION}" -cd ../chroma -tofu init -backend-config="bucket=${PROJECT_ID}-tfstate" -tofu apply -var="project_id=${PROJECT_ID}" -var="region=${REGION}" +(cd infra/platform && tofu init -backend-config="bucket=${PROJECT_ID}-tfstate" && tofu apply -var="project_id=${PROJECT_ID}" -var="region=${REGION}") +(cd infra/chroma && tofu init -backend-config="bucket=${PROJECT_ID}-tfstate" && tofu apply -var="project_id=${PROJECT_ID}" -var="region=${REGION}") ``` -### 3. Build & push the backend image +Build + ship the backend image: ```bash gcloud builds submit . \ @@ -118,20 +117,9 @@ gcloud builds submit . \ --substitutions=_IMAGE="${REGION}-docker.pkg.dev/${PROJECT_ID}/expert-agent/backend:v0.1.0" ``` -### 4. Scaffold and deploy your first agent +Provision this agent's Cloud Run service: ```bash -# Install the CLI (uv tool style — single root pyproject.toml) -uv tool install "git+https://github.com/feliperbroering/expert-agent.git" - -# Scaffold an agent locally -expert init my-expert -cd my-expert -$EDITOR prompts/identity.md # define behaviour -cp ~/papers/*.pdf docs/ # drop in your corpus -expert validate --schema ./agent_schema.yaml - -# Provision Cloud Run + IAM + secrets for this agent cd ../infra/agent tofu init -reconfigure \ -backend-config="bucket=${PROJECT_ID}-tfstate" \ @@ -141,25 +129,63 @@ tofu apply \ -var="region=${REGION}" \ -var="agent_id=my-expert" \ -var="image=${REGION}-docker.pkg.dev/${PROJECT_ID}/expert-agent/backend:v0.1.0" +``` + +Seed the admin key, push docs, ask: -# Seed the per-agent admin key (one-time) +```bash ADMIN_KEY=$(python -c 'import secrets; print(secrets.token_urlsafe(32))') echo -n "$ADMIN_KEY" | \ gcloud secrets versions add admin-key-my-expert --data-file=- --project="${PROJECT_ID}" -# Push docs + create the Context Cache expert sync \ - --schema ./agent_schema.yaml \ - --endpoint "$(gcloud run services describe agent-my-expert \ - --region="${REGION}" --format='value(status.url)')" \ + --endpoint "$(gcloud run services describe agent-my-expert --region="${REGION}" --format='value(status.url)')" \ --api-key "$ADMIN_KEY" -# Ask something -expert ask "What does my corpus say about X?" \ - --endpoint --api-key "$ADMIN_KEY" +expert ask "what does my corpus say about X?" --api-key "$ADMIN_KEY" ``` -> See [`infra/README.md`](./infra/README.md) for the full per-stack reference. +See [`docs/PRIVATE_AGENT_REPO.md`](./docs/PRIVATE_AGENT_REPO.md) for the full private-repo checklist (one agent) and [`infra/README.md`](./infra/README.md) for the per-stack reference. + +--- + +## Multi-agent workspaces + +A single repo can host many agents. `expert` auto-detects them and offers three equivalent ways to pick which one a command targets: + +```bash +expert agents # list everything the workspace knows about + +# pick per-invocation +expert ask --agent derm "..." # explicit flag +expert @derm ask "..." # @alias positional shortcut +EXPERT_AGENT=derm expert ask "..." # env var + +# pin for a session +expert use derm # write .expert/state.json +expert ask "..." # uses derm from now on +expert which # which agent would run? +``` + +Declare agents explicitly via `expert.toml` for full control: + +```toml +default_agent = "derm" + +[agents.derm] +schema = "derm-expert/agent_schema.yaml" +endpoint_env = "DERM_AGENT_ENDPOINT" +api_key_env = "DERM_AGENT_API_KEY" + +[agents.my-expert] +schema = "my-expert/agent_schema.yaml" +endpoint_env = "MY_EXPERT_ENDPOINT" +api_key_env = "MY_EXPERT_API_KEY" +``` + +…or skip the file entirely — any sibling directory with an `agent_schema.yaml` is discovered automatically. + +**Resolution precedence** (first match wins): `--agent` flag → `@alias` → `EXPERT_AGENT` env var → `expert use` pin → `expert.toml` default → single-agent short-circuit. --- @@ -177,8 +203,8 @@ metadata: spec: model: - provider: gemini # or `gemini-vertex` (optional `[vertex]` extra) - name: gemini-2.5-pro # any Pro tier with Context Caching support + provider: gemini # or `gemini-vertex` + name: gemini-2.5-pro # any tier with Context Caching temperature: 0.2 max_output_tokens: 8192 @@ -192,7 +218,7 @@ spec: context_cache: enabled: true - ttl_seconds: 3600 # 1 h is the AI Studio sweet spot + ttl_seconds: 3600 # 1 h — the AI Studio sweet spot refresh_before_expiry_seconds: 300 memory: @@ -201,47 +227,48 @@ spec: persistence: { type: chroma-http } } grounding: - # AI Studio rejects `tools=GoogleSearch` together with `cachedContent`. - # Vertex supports both — flip this on if you migrate. - enabled: false + enabled: false # AI Studio rejects `tools=GoogleSearch` + `cachedContent` max_citations: 10 rate_limit: { requests_per_minute: 30, tokens_per_day: 1000000 } ``` -A full annotated example lives in [`example-schema/`](./example-schema/). +Full annotated example: [`example-schema/`](./example-schema/). --- ## CLI reference ```text -expert init Scaffold a new agent project -expert validate Validate agent_schema.yaml against the contract -expert count-tokens Estimate corpus tokens (Context Cache budgeting) -expert sync Push docs + rebuild Context Cache -expert ask "" Stream answer from a deployed agent -expert sessions list/delete Manage user sessions (LGPD) -expert test Run the packaged E2E Robot Framework kit +expert init scaffold a new agent project +expert agents list agents in this workspace +expert use pin an agent as active +expert which show which agent a bare command targets +expert validate validate agent_schema.yaml +expert count-tokens estimate corpus tokens (cache budgeting) +expert sync push docs + rebuild Context Cache +expert ask "" stream answer from the deployed agent +expert sessions list/show/delete manage user sessions (LGPD) +expert test run the packaged Robot Framework E2E kit +expert --version show the brand + version ``` -Every command supports `--help` for full options. +Every command supports `--help`, `--agent ` (or `@alias`), `--endpoint`, `--api-key`. --- ## End-to-end testing -A ready-made Robot Framework kit ships with the CLI. Install with the -`[test]` extra and run against any agent: +A ready-made Robot Framework kit ships with the CLI. Install with the `[test]` extra: ```bash uv tool install 'expert-agent[test] @ git+https://github.com/feliperbroering/expert-agent.git' export EXPERT_AGENT_ENDPOINT=https://my-agent-xxxx.a.run.app -export EXPERT_AGENT_API_KEY=$(gcloud secrets versions access latest --secret=my-agent-api-key) +export EXPERT_AGENT_API_KEY=$(gcloud secrets versions access latest --secret=admin-key-my-expert) -expert test --schema ./agent_schema.yaml # all suites -expert test --suite 05_ask_latency # single suite -expert test --list # discover suites +expert test # all suites +expert test --suite 05_ask_latency # single suite +expert test --list # discover suites ``` Suites shipped: @@ -250,33 +277,82 @@ Suites shipped: |-------------------|:--------:|---------------------------------------------------------| | `01_validate` | yes | `expert validate` succeeds on the agent schema | | `02_create` | yes | `expert init --yes` scaffolds + validates out of the box| -| `03_update` | yes | Edit → validate loop preserves schema integrity | +| `03_update` | yes | edit → validate loop preserves schema integrity | | `04_deploy` | no | `/health`, `/ready` respond 200; unauth calls get 401 | -| `05_ask_latency` | no | Warmup + steady-state TTFT budgets + cache hit signal | +| `05_ask_latency` | no | warmup + steady-state TTFT budgets + cache-hit signal | | `06_sessions` | no | LGPD: create → list → delete round-trip | ### Reusable GitHub Actions workflow -Private agent repos inherit the same suites via a reusable workflow — no -submodules or copy-paste. See -[`.github/workflows/expert-e2e.yml`](.github/workflows/expert-e2e.yml): +Private agent repos inherit the same suites via a reusable workflow — no submodules, no copy-paste. See [`.github/workflows/expert-e2e.yml`](.github/workflows/expert-e2e.yml): ```yaml jobs: e2e: uses: feliperbroering/expert-agent/.github/workflows/expert-e2e.yml@main with: - schema: ecg-expert/agent_schema.yaml - suite: 05_ask_latency # optional — omit to run all + schema: my-expert/agent_schema.yaml + suite: 05_ask_latency # optional — omit to run everything secrets: endpoint: ${{ secrets.EXPERT_AGENT_ENDPOINT }} api-key: ${{ secrets.EXPERT_AGENT_API_KEY }} ``` -> **Wiring this into a private repo for the first time?** Follow -> [`docs/AGENT_E2E_SETUP.md`](docs/AGENT_E2E_SETUP.md) — a copy-pasteable, -> agent-friendly checklist that takes you from "empty repo with a schema" to -> "green nightly E2E job" in five steps. +For monorepos hosting multiple agents, use a matrix strategy (see [`docs/AGENT_E2E_SETUP.md`](docs/AGENT_E2E_SETUP.md)). + +--- + +## Architecture + +``` + client (CLI / HTTP) + │ + ▼ + ┌──────────────────────────────┐ + │ agent (Cloud Run, FastAPI) │ ◀── reads agent_schema.yaml + │ ├─ /ask (SSE) │ from gs://docs-bucket//schema/ + │ ├─ /docs/sync │ + │ ├─ /sessions /memory │ + │ └─ /health /ready │ + └────┬───────────┬─────────┬───┘ + │ │ │ + ▼ ▼ ▼ + Gemini API Firestore Chroma HTTP (Cloud Run, min=1) + (Context (sessions ├─ shared per project + Cache) + state) └─ persisted via GCS FUSE → gs://memory/chroma + ▲ + │ File API mirror + │ + GCS (durable source of truth) + ├─ gs://docs/// knowledge base + ├─ gs://docs//_state/sync_manifest.json + ├─ gs://docs//schema/... schema + prompts + └─ gs://memory// long-term memory snapshots +``` + +**Key design choices** ([deeper notes in `infra/README.md`](./infra/README.md)): + +- **Stateless agent containers.** All state lives in GCS or Firestore. Cloud Run can scale to zero and back without losing context. +- **Context Cache as the grounding source.** Documents go into a Gemini Context Cache built once per knowledge-base SHA; subsequent `/ask` calls reuse it (`cached_tokens ≈ input_tokens` in steady state). +- **Multi-layer memory.** Firestore holds the last N turns (short-term) plus a verbatim recall index (long-term, indexed in Chroma via [MemPalace](https://pypi.org/project/mempalace/)). +- **One Chroma HTTP server per project** (Cloud Run, `min=max=1`, GCS FUSE for persistence) — shared across every agent in the project. + +--- + +## vs other ways to ship a RAG agent + +| | expert-agent | NotebookLM | OpenAI Assistants | Bring-your-own RAG stack | +|-----------------------------------------|:------------------------:|:------------------:|:------------------------:|:------------------------:| +| **API you own** | ✓ (your Cloud Run) | ✗ (Google UI only) | ✓ (OpenAI hosted) | ✓ | +| **Grounded in your corpus** | ✓ (Context Cache) | ✓ | ✓ (file_search) | ✓ (you wire it up) | +| **Long-context native** (100k+ tokens) | ✓ (Gemini 2.5 Pro) | ✓ | partial (chunked) | depends | +| **Declarative YAML spec** | ✓ (`agent_schema.yaml`) | ✗ | ✗ | ✗ | +| **Multi-agent in one repo** | ✓ (`expert.toml` + `@`) | n/a | ✗ | DIY | +| **Persistent conversation memory** | ✓ (Firestore + MemPalace)| partial | ✓ | DIY | +| **E2E test kit** (Robot Framework) | ✓ (reusable workflow) | ✗ | ✗ | DIY | +| **LGPD/GDPR session delete** | ✓ (`/sessions/:id`) | ✗ | partial | DIY | +| **Self-hosted** | ✓ (your GCP project) | ✗ | ✗ | ✓ | +| **Open source** | ✓ (MIT) | ✗ | ✗ | varies | --- @@ -284,55 +360,47 @@ jobs: Cloud Run uses **two layers of bearer auth**, intentionally: -| Header | Audience | Required for | -|---------------------------------|-------------------------|--------------------------------| -| `X-Serverless-Authorization` | Cloud Run IAM (ID token)| Reaching the service at all | -| `Authorization: Bearer ` | App layer (admin key) | `/ask`, `/docs/sync`, `/memory`| +| Header | Audience | Required for | +|---------------------------------|---------------------------|--------------------------------| +| `X-Serverless-Authorization` | Cloud Run IAM (ID token) | reaching the service at all | +| `Authorization: Bearer ` | App layer (admin key) | `/ask`, `/docs/sync`, `/memory`| -The split avoids the well-known collision where Cloud Run's IAM strips -`Authorization` before the app sees it. Public endpoints (`/health`, -`/ready`) only need the ID token. - -For local dev you can run with `APP_ENV=development` and disable the -admin-key check entirely (see `backend/app/auth.py`). +The split avoids the well-known collision where Cloud Run's IAM strips `Authorization` before the app sees it. Public endpoints (`/health`, `/ready`) only need the ID token. For local dev, set `APP_ENV=development` to skip the admin-key check (see `backend/app/auth.py`). --- ## Repository layout ``` -backend/ FastAPI app (`app.main:app`) + tests - app/llm/ LLMClient protocol + Gemini AI Studio / Vertex implementations - app/cache/ Context Cache manager + background refresher - app/docs/ Manifest model + DocsSyncService (incremental SHA diff) - app/memory/ Short-term (Firestore) + long-term (MemPalace/Chroma) + orchestrator - app/routes/ /ask /docs/sync /sessions /memory /health -cli/ `expert` (Typer + Rich) -example-schema/ Annotated AgentSchema + prompt template -infra/ OpenTofu stacks: platform, chroma, agent (per agent) -scripts/ bootstrap-project.sh, bootstrap_docs_to_gcs.py -.github/workflows/ ci.yml, release-please.yml, deploy.yml +backend/ FastAPI app (`app.main:app`) + tests + app/llm/ LLMClient protocol + Gemini AI Studio / Vertex implementations + app/cache/ Context Cache manager + background refresher + app/docs/ Manifest + DocsSyncService (incremental SHA diff) + app/memory/ Short-term (Firestore) + long-term (MemPalace/Chroma) + orchestrator + app/routes/ /ask /docs/sync /sessions /memory /health +cli/ `expert` CLI (Typer + Rich) + Robot Framework testkit +example-schema/ annotated AgentSchema + prompt template +infra/ OpenTofu stacks: platform, chroma, agent (per agent) +scripts/ bootstrap-project.sh, bootstrap_docs_to_gcs.py +docs/ PRIVATE_AGENT_REPO.md, AGENT_E2E_SETUP.md +.github/workflows/ ci.yml, release-please.yml, deploy.yml, expert-e2e.yml ``` --- ## Cost ballpark -For a single project hosting one or more agents on `us-central1` (or -similar), idling on Cloud Run scale-to-zero: +For a single project hosting one or more agents on `us-central1`, idling on Cloud Run scale-to-zero: | Component | Idle | Notes | |--------------------------------------|----------------|----------------------------------------| -| Chroma server (Cloud Run, min=max=1) | **~$40 / mo** | Always-on, shared across all agents | -| Each agent (Cloud Run, min=0) | **~$0** | Pay only on request | -| Firestore | **~$0** | Free tier covers low-QPS use | +| Chroma server (Cloud Run, min=max=1) | **~$40 / mo** | always-on, shared across all agents | +| Each agent (Cloud Run, min=0) | **~$0** | pay only on request | +| Firestore | **~$0** | free tier covers low-QPS use | | Gemini Pro requests | **variable** | `cached_tokens` are heavily discounted | -| GCS storage | **~$0.02/GiB** | Docs + memory snapshots | +| GCS storage | **~$0.02/GiB** | docs + memory snapshots | -Headline efficiency win: with Context Caching on, a typical `/ask` against -a ~800 k-token corpus shows `cached_tokens / input_tokens ≈ 0.999`, i.e. -the prompt portion of the cost is essentially flat regardless of how big -your corpus is. +Headline efficiency win: with Context Caching on, a typical `/ask` against a ~800 k-token corpus shows `cached_tokens / input_tokens ≈ 0.999` — the prompt portion of the cost is essentially flat regardless of how big your corpus is. --- @@ -344,15 +412,13 @@ your corpus is. - [ ] Multi-tenant agent (per-tenant memory + cache) for SaaS use cases. - [ ] Web UI / playground for non-technical curators. - [ ] `release-please`-driven versioned container tags pushed to GHCR. +- [ ] PyPI release (`pip install expert-agent`) + Homebrew tap. --- ## Contributing -Issues and PRs are welcome. The project follows -[Conventional Commits](https://www.conventionalcommits.org/) and uses -[release-please](https://github.com/googleapis/release-please) for SemVer -automation. Run the full check suite with: +Issues and PRs welcome. See [CONTRIBUTING.md](./CONTRIBUTING.md) for dev setup, style, and conventions. AI coding agents: read [AGENTS.md](./AGENTS.md) first. Please report security issues privately via [SECURITY.md](./SECURITY.md). We follow the [Contributor Covenant v2.1](./CODE_OF_CONDUCT.md). ```bash uv sync --extra dev --extra vertex --extra otel @@ -363,6 +429,12 @@ uv run pytest --- +## Acknowledgements + +`expert-agent` stands on the shoulders of giants: [Gemini](https://ai.google.dev/), [FastAPI](https://fastapi.tiangolo.com/), [Typer](https://typer.tiangolo.com/) + [Rich](https://rich.readthedocs.io/), [Chroma](https://www.trychroma.com/), [MemPalace](https://pypi.org/project/mempalace/), [Robot Framework](https://robotframework.org/), [OpenTofu](https://opentofu.org/). + +--- + ## License -Apache-2.0 — see [LICENSE](./LICENSE). +[MIT](./LICENSE) — do what you want, just don't sue us. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..de384fb --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,66 @@ +# Security Policy + +## Supported versions + +`expert-agent` is in alpha. Security fixes are shipped against the latest `main` and the most recent tagged release. Older releases are not patched — please upgrade. + +| Version | Supported | +|----------|--------------------| +| `main` | ✓ (latest fixes) | +| `0.1.x` | ✓ (latest tag) | +| `< 0.1` | ✗ | + +## Reporting a vulnerability + +**Please do not open a public GitHub issue.** + +Report vulnerabilities privately through one of: + +1. **GitHub private advisory** — [new advisory](https://github.com/feliperbroering/expert-agent/security/advisories/new) (preferred — keeps the timeline tied to the repo). +2. **Email** — [hi@felipe.run](mailto:hi@felipe.run) with subject `[expert-agent security]`. Please include: + - A description of the issue and its impact. + - Steps to reproduce (or a proof-of-concept). + - The commit SHA or version you tested against. + - Your preferred contact method for the follow-up. + +You'll get an acknowledgement within **72 hours** and a triage update within **7 days**. + +## Disclosure timeline + +1. **Day 0** — you report privately. +2. **Day ≤ 3** — we acknowledge and start triage. +3. **Day ≤ 30** — we ship a fix on `main` and cut a patch release. For critical issues we aim for ≤ 7 days. +4. **Day ≤ 60** — we publish a GitHub Security Advisory crediting you (unless you opt out). + +If a fix cannot land in 60 days (e.g. requires upstream changes in Gemini, Chroma, or FastAPI), we'll coordinate the disclosure window with you. + +## Scope + +In scope: + +- `backend/` — FastAPI app, auth middleware, data-handling paths. +- `cli/` — command-injection, credential handling, file-write paths. +- `infra/` — IAM bindings, Cloud Run config, Secret Manager usage. +- Supply chain — pinned dependencies, Docker base image, CI workflow secrets. + +Out of scope (please *don't* report these): + +- Misconfiguration of **your** GCP project or leaked API keys you created. +- Volumetric DoS against a self-hosted deploy (Cloud Run throttling is your config). +- Vulnerabilities in Gemini, Firestore, Chroma, or other upstream services — report those to the upstream maintainer. +- Social engineering of maintainers. + +## Hardening checklist for operators + +If you self-host an expert agent, here's the short audit we suggest running: + +- **Rotate admin keys** regularly (`admin-key-` secret version bump). +- Keep **`APP_ENV=production`** — development mode disables the bearer-key check. +- Scope the **Cloud Run service account** to exactly the secrets + GCS prefixes it needs. +- Use **CMEK** (customer-managed encryption keys) on GCS + Firestore if your data sensitivity requires it. +- Pin the backend image by **digest**, not by tag, in your `tofu apply`. +- Enable **Cloud Run request logging** + **VPC Service Controls** if you're in a regulated environment. + +## Credits + +Security contributors will be listed in release notes and (if desired) in a `SECURITY_HALL_OF_FAME.md`. Thanks for keeping this project safe. diff --git a/backend/app/llm/gemini_ai_studio.py b/backend/app/llm/gemini_ai_studio.py index 2e4d8fe..ad0c01b 100644 --- a/backend/app/llm/gemini_ai_studio.py +++ b/backend/app/llm/gemini_ai_studio.py @@ -36,7 +36,7 @@ # Conservative default for unit tests / smoke runs. Production agents pin a # real Pro tier (e.g. ``gemini-2.5-pro``) via ``spec.model.name`` in their # AgentSchema; bump together with the SDK version when a newer Pro ships. -DEFAULT_MODEL = "gemini-2.0-flash-exp" +DEFAULT_MODEL = "gemini-2.5-flash" _TRANSIENT_EXC: tuple[type[BaseException], ...] = (TimeoutError, ConnectionError) @@ -73,7 +73,7 @@ class GeminiAIStudioClient: Gemini AI Studio API key (secret). model: Model identifier, e.g. ``"gemini-2.5-pro"`` or - ``"gemini-2.0-flash-exp"`` for cheaper smoke tests. + ``"gemini-2.5-flash"`` for cheaper smoke tests. max_citations: Upper bound on citations surfaced per generation chunk. """ diff --git a/cli/expert/__init__.py b/cli/expert/__init__.py index 657073e..f87e932 100644 --- a/cli/expert/__init__.py +++ b/cli/expert/__init__.py @@ -1,3 +1,8 @@ """expert — CLI for expert-agent (init, validate, sync, ask, sessions).""" -__version__ = "0.1.0" +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version("expert-agent") +except PackageNotFoundError: + __version__ = "0.0.0+local" diff --git a/cli/expert/brand.py b/cli/expert/brand.py new file mode 100644 index 0000000..d5f8c07 --- /dev/null +++ b/cli/expert/brand.py @@ -0,0 +1,85 @@ +"""ASCII brand + helpers — shared visual identity across the CLI. + +The logo uses the classic *ANSI Shadow* figlet font for the word ``EXPERT`` +paired with a small knowledge glyph box on the right (`[ ≡ ]`, three stacked +lines = a book/corpus). This mirrors the design language of the author's +other CLI tools (see ``feliperbroering/eai``) while keeping a distinct +accent so they read as a family. + +The brand renders with zero emoji characters — visual cues come from +Unicode box-drawing, Rich colors, and restrained accent tokens. +""" + +from __future__ import annotations + +from rich.console import Console +from rich.text import Text + +from . import __version__ + +# The logo is split into (a) the ANSI-shadow wordmark and (b) a small +# knowledge-glyph box rendered to the right. Rendering them as two columns +# keeps them in sync regardless of terminal width and lets us tint them +# independently. +_WORDMARK = ( + "███████╗██╗ ██╗██████╗ ███████╗██████╗ ████████╗", + "██╔════╝╚██╗██╔╝██╔══██╗██╔════╝██╔══██╗╚══██╔══╝", + "█████╗ ╚███╔╝ ██████╔╝█████╗ ██████╔╝ ██║ ", + "██╔══╝ ██╔██╗ ██╔═══╝ ██╔══╝ ██╔══██╗ ██║ ", + "███████╗██╔╝ ██╗██║ ███████╗██║ ██║ ██║ ", + "╚══════╝╚═╝ ╚═╝╚═╝ ╚══════╝╚═╝ ╚═╝ ╚═╝ ", +) + +_GLYPH = ( + " ", + " ╭───╮ ", + " │ ≡ │ ", + " ╰───╯ ", + " ", + " ", +) + +TAGLINE = "ground a model on your docs. ship it as an API." +SUBTITLE = "declarative ultra-specialist agents on Cloud Run — Gemini long-context, Context Cache, persistent memory." + +# Accent colors — picked to read well on both dark and light terminals and +# to stay distinct from `eai` (which leans green/cyan). +_ACCENT = "bright_cyan" +_DIM = "grey50" +_HEADLINE = "bold white" + + +def render_brand(console: Console, *, include_version: bool = True) -> None: + """Render the full brand block (wordmark + glyph + tagline).""" + for wm, gl in zip(_WORDMARK, _GLYPH, strict=True): + line = Text() + line.append(" ") + line.append(wm, style=_ACCENT) + line.append(gl, style=_DIM) + console.print(line) + + console.print() + headline = Text() + headline.append(" ") + headline.append(TAGLINE, style=_HEADLINE) + console.print(headline) + + subtitle = Text() + subtitle.append(" ") + subtitle.append(SUBTITLE, style=_DIM) + console.print(subtitle) + + if include_version: + console.print() + ver = Text() + ver.append(" ") + ver.append("expert ", style=_ACCENT) + ver.append(f"v{__version__}", style=_DIM) + ver.append(" ") + ver.append("MIT", style=_DIM) + ver.append(" ") + ver.append("github.com/feliperbroering/expert-agent", style=_DIM) + console.print(ver) + + +__all__ = ["SUBTITLE", "TAGLINE", "render_brand"] diff --git a/cli/expert/commands/agents.py b/cli/expert/commands/agents.py new file mode 100644 index 0000000..406b539 --- /dev/null +++ b/cli/expert/commands/agents.py @@ -0,0 +1,131 @@ +"""Workspace-aware agent management commands: ``agents``, ``use``, ``which``. + +These are the only commands that *never* need to resolve to a single agent — +they inspect, select, or describe the workspace itself. +""" + +from __future__ import annotations + +from typing import Annotated + +import typer +from rich.table import Table + +from ..ui import console, print_error, print_info, print_success +from ..workspace import ( + AgentNotFoundError, + AmbiguousAgentError, + Workspace, + WorkspaceError, +) + + +def agents_cmd( + verbose: Annotated[ + bool, + typer.Option("--verbose", "-v", help="Show schema paths and endpoints."), + ] = False, +) -> None: + """List every agent known to this workspace.""" + ws = Workspace.discover() + agents = ws.agents() + if not agents: + print_info( + "No agents found. Scaffold one with `expert init ` or create " + "an `expert.toml` workspace file." + ) + return + + active = ws.active() + table = Table(title=f"Agents — workspace: {ws.root}") + table.add_column("Active", width=6, justify="center") + table.add_column("Name", style="bold") + table.add_column("Source", style="dim") + if verbose: + table.add_column("Schema") + table.add_column("Endpoint") + table.add_column("Description", overflow="fold") + + for info in agents: + is_active = "✓" if info.name == active else "" + row = [ + is_active, + info.name, + info.source, + ] + if verbose: + try: + schema_rel = str(info.schema_path.relative_to(ws.root)) + except ValueError: + schema_rel = str(info.schema_path) + row.extend([schema_rel, info.endpoint or "—"]) + row.append(info.description or "") + table.add_row(*row) + + console.print(table) + if ws.default_agent: + print_info(f"default (expert.toml): [cyan]{ws.default_agent}[/cyan]") + if active: + print_info(f"active (expert use): [cyan]{active}[/cyan]") + + +def use_cmd( + name: Annotated[ + str | None, + typer.Argument( + help="Agent name to pin as active. Omit to clear the pin.", + ), + ] = None, + clear: Annotated[ + bool, + typer.Option("--clear", help="Remove the active-agent pointer."), + ] = False, +) -> None: + """Pin an agent as the active one for this workspace (stored locally).""" + ws = Workspace.discover() + + if clear or (name is None): + if ws.state_file.is_file(): + ws.clear_active() + print_success("Cleared active agent pointer.") + else: + print_info("No active agent set.") + return + + try: + # Re-use matcher so `expert use derm` works when `derm-expert` is declared. + canonical = ws._match(name) + ws.set_active(canonical) + except (AgentNotFoundError, AmbiguousAgentError, WorkspaceError) as exc: + print_error(str(exc)) + raise typer.Exit(code=1) from exc + + print_success(f"Active agent set to [cyan]{canonical}[/cyan].") + print_info(f"State stored in {ws.state_file}") + + +def which_cmd( + agent: Annotated[ + str | None, + typer.Option( + "--agent", + "-a", + help="Preview resolution for the given selector without running anything.", + ), + ] = None, +) -> None: + """Print the agent a bare command (no --agent, no @alias) would resolve to.""" + ws = Workspace.discover() + try: + ctx = ws.resolve(selector=agent) + except (AgentNotFoundError, AmbiguousAgentError, WorkspaceError) as exc: + print_error(str(exc)) + raise typer.Exit(code=1) from exc + + print_info(f"Active agent: [bold cyan]{ctx.name}[/bold cyan] (source: {ctx.selector_source})") + print_info(f" schema: {ctx.schema_path}") + print_info(f" endpoint: {ctx.endpoint or '—'}") + print_info(f" api key: {'set' if ctx.api_key else '—'}") + + +__all__ = ["agents_cmd", "use_cmd", "which_cmd"] diff --git a/cli/expert/commands/ask.py b/cli/expert/commands/ask.py index b204443..0d18356 100644 --- a/cli/expert/commands/ask.py +++ b/cli/expert/commands/ask.py @@ -44,6 +44,7 @@ from rich.text import Text from ..config import make_http_client +from ..context import resolve as resolve_context from ..ui import console, print_error, print_info, print_success _USER_ID = "cli" @@ -56,22 +57,26 @@ def cmd( question: Annotated[str, typer.Argument(help="Question to send to the agent.")], + agent: Annotated[ + str | None, + typer.Option("--agent", "-a", help="Agent name from the workspace."), + ] = None, endpoint: Annotated[ - str, + str | None, typer.Option( "--endpoint", envvar="EXPERT_AGENT_ENDPOINT", - help="Base URL of the running agent.", + help="Override the agent's endpoint.", ), - ], + ] = None, api_key: Annotated[ - str, + str | None, typer.Option( "--api-key", envvar="EXPERT_AGENT_API_KEY", - help="Admin bearer token.", + help="Override the agent's admin bearer token.", ), - ], + ] = None, session: Annotated[ str | None, typer.Option( @@ -88,6 +93,15 @@ def cmd( ] = True, ) -> None: """Ask the agent a question.""" + ctx = resolve_context( + agent=agent, + endpoint=endpoint, + api_key=api_key, + require_remote=True, + ) + endpoint, api_key = ctx.require_remote() + if ctx.selector_source not in ("single", "schema-flag"): + print_info(f"→ [cyan]{ctx.name}[/cyan] ({ctx.selector_source})") if session is None: session = str(uuid.uuid4()) print_info(f"Starting new session [cyan]{session}[/cyan].") diff --git a/cli/expert/commands/count_tokens.py b/cli/expert/commands/count_tokens.py index b71b1e8..55aed8e 100644 --- a/cli/expert/commands/count_tokens.py +++ b/cli/expert/commands/count_tokens.py @@ -15,6 +15,7 @@ import typer from app.schema import AgentSchema +from ..context import resolve as resolve_context from ..ui import console, print_error, print_info, print_success, print_warning if TYPE_CHECKING: @@ -140,20 +141,31 @@ def cmd( help="API key for google-genai token counting.", ), ], + agent: Annotated[ + str | None, + typer.Option("--agent", "-a", help="Agent name from the workspace."), + ] = None, schema_path: Annotated[ - Path, - typer.Option("--schema", "-s", help="Path to agent_schema.yaml."), - ] = Path("./agent_schema.yaml"), + Path | None, + typer.Option( + "--schema", + "-s", + help="Explicit path to agent_schema.yaml (bypasses workspace resolution).", + ), + ] = None, model: Annotated[ str, typer.Option("--model", help="Model used for the count_tokens API call."), - ] = "gemini-2.0-flash-exp", + ] = "gemini-2.5-flash", ) -> None: """Walk the knowledge base and sum the estimated token count per file.""" - schema_path = schema_path.resolve() + ctx = resolve_context(agent=agent, schema=schema_path) + schema_path = ctx.schema_path if not schema_path.is_file(): print_error(f"schema file not found: {schema_path}") raise typer.Exit(code=1) + if ctx.selector_source not in ("single", "schema-flag"): + print_info(f"agent [cyan]{ctx.name}[/cyan] ({ctx.selector_source})") try: schema = AgentSchema.from_yaml(schema_path) diff --git a/cli/expert/commands/init.py b/cli/expert/commands/init.py index 600ab68..aea98fd 100644 --- a/cli/expert/commands/init.py +++ b/cli/expert/commands/init.py @@ -217,5 +217,24 @@ def cmd( raise typer.Exit(code=1) from exc print_success(f"Created new agent at [cyan]{path}[/cyan].") + _print_workspace_hint(path, name) print_info("Next step: [bold]expert validate --schema ./agent_schema.yaml[/bold]") console.print() + + +def _print_workspace_hint(path: Path, name: str) -> None: + """If the new agent lives inside a multi-agent workspace, nudge the user.""" + from ..workspace import Workspace + + parent = path.parent + try: + ws = Workspace.discover(cwd=parent) + except Exception: # pragma: no cover - discovery is best-effort here + return + + # Only hint when there's >1 agent (either discovered or declared). + if len(ws.agents_by_name) >= 2: + print_info( + f"Detected multi-agent workspace at [cyan]{ws.root}[/cyan]. " + f"Use [bold]expert agents[/bold] to list, or [bold]expert @{name} [/bold]." + ) diff --git a/cli/expert/commands/sessions.py b/cli/expert/commands/sessions.py index 6f37c58..74b231a 100644 --- a/cli/expert/commands/sessions.py +++ b/cli/expert/commands/sessions.py @@ -11,6 +11,7 @@ from rich.table import Table from ..config import make_http_client +from ..context import resolve as resolve_context from ..ui import console, print_error, print_info, print_success, print_warning app = typer.Typer( @@ -20,6 +21,23 @@ ) +def _remote( + agent: str | None, + endpoint_override: str | None, + api_key_override: str | None, +) -> tuple[str, str]: + """Resolve (endpoint, api_key) for every session command via the workspace.""" + ctx = resolve_context( + agent=agent, + endpoint=endpoint_override, + api_key=api_key_override, + require_remote=True, + ) + if ctx.selector_source not in ("single", "schema-flag"): + print_info(f"→ [cyan]{ctx.name}[/cyan] ({ctx.selector_source})") + return ctx.require_remote() + + async def _get_json(endpoint: str, api_key: str, path: str) -> Any: async with make_http_client(endpoint=endpoint, api_key=api_key) as client: response = await client.get(path) @@ -51,30 +69,44 @@ def _run(coro: Any) -> Any: raise typer.Exit(code=2) from exc +_AgentOpt = Annotated[ + str | None, + typer.Option("--agent", "-a", help="Agent name from the workspace."), +] _EndpointOpt = Annotated[ - str, - typer.Option("--endpoint", envvar="EXPERT_AGENT_ENDPOINT", help="Base URL of the agent."), + str | None, + typer.Option( + "--endpoint", + envvar="EXPERT_AGENT_ENDPOINT", + help="Override the agent's endpoint.", + ), ] _ApiKeyOpt = Annotated[ - str, - typer.Option("--api-key", envvar="EXPERT_AGENT_API_KEY", help="Admin bearer token."), + str | None, + typer.Option( + "--api-key", + envvar="EXPERT_AGENT_API_KEY", + help="Override the agent's admin bearer token.", + ), ] @app.command("list") def list_cmd( - endpoint: _EndpointOpt, - api_key: _ApiKeyOpt, + agent: _AgentOpt = None, + endpoint: _EndpointOpt = None, + api_key: _ApiKeyOpt = None, user: Annotated[ str | None, typer.Option("--user", help="Filter sessions by user_id."), ] = None, ) -> None: """List active sessions.""" + endpoint_resolved, api_key_resolved = _remote(agent, endpoint, api_key) path = "/sessions" if user: path = f"/sessions?user_id={user}" - body = _run(_get_json(endpoint.rstrip("/"), api_key, path)) + body = _run(_get_json(endpoint_resolved, api_key_resolved, path)) items: list[dict[str, Any]] if isinstance(body, list): items = [x for x in body if isinstance(x, dict)] @@ -105,11 +137,13 @@ def list_cmd( @app.command("show") def show_cmd( session_id: Annotated[str, typer.Argument(help="Session ID.")], - endpoint: _EndpointOpt, - api_key: _ApiKeyOpt, + agent: _AgentOpt = None, + endpoint: _EndpointOpt = None, + api_key: _ApiKeyOpt = None, ) -> None: """Show the message history of a single session.""" - body = _run(_get_json(endpoint.rstrip("/"), api_key, f"/sessions/{session_id}")) + endpoint_resolved, api_key_resolved = _remote(agent, endpoint, api_key) + body = _run(_get_json(endpoint_resolved, api_key_resolved, f"/sessions/{session_id}")) if not isinstance(body, dict): print_error("unexpected response shape.") raise typer.Exit(code=2) @@ -132,14 +166,16 @@ def show_cmd( @app.command("delete") def delete_cmd( session_id: Annotated[str, typer.Argument(help="Session ID to delete.")], - endpoint: _EndpointOpt, - api_key: _ApiKeyOpt, + agent: _AgentOpt = None, + endpoint: _EndpointOpt = None, + api_key: _ApiKeyOpt = None, yes: Annotated[ bool, typer.Option("--yes", "-y", help="Skip the confirmation prompt."), ] = False, ) -> None: """Delete a session and its message history (LGPD right-to-erasure).""" + endpoint_resolved, api_key_resolved = _remote(agent, endpoint, api_key) if not yes: confirmed = typer.confirm( f"Delete session {session_id}? This action is irreversible.", @@ -149,5 +185,5 @@ def delete_cmd( print_warning("Aborted.") raise typer.Exit(code=0) - _run(_delete(endpoint.rstrip("/"), api_key, f"/sessions/{session_id}")) + _run(_delete(endpoint_resolved, api_key_resolved, f"/sessions/{session_id}")) print_success(f"Session [cyan]{session_id}[/cyan] deleted.") diff --git a/cli/expert/commands/sync.py b/cli/expert/commands/sync.py index 92bbcdb..0396323 100644 --- a/cli/expert/commands/sync.py +++ b/cli/expert/commands/sync.py @@ -17,6 +17,7 @@ from app.schema import AgentSchema from ..config import make_http_client +from ..context import resolve as resolve_context from ..ui import console, print_diff_table, print_error, print_info, print_success @@ -85,32 +86,46 @@ async def _post_sync( def cmd( + agent: Annotated[ + str | None, + typer.Option("--agent", "-a", help="Agent name from the workspace."), + ] = None, endpoint: Annotated[ - str, + str | None, typer.Option( "--endpoint", envvar="EXPERT_AGENT_ENDPOINT", - help="Base URL of the running agent.", + help="Override the agent's endpoint (defaults to workspace/env value).", ), - ], + ] = None, api_key: Annotated[ - str, + str | None, typer.Option( "--api-key", envvar="EXPERT_AGENT_API_KEY", - help="Admin bearer token.", + help="Override the agent's admin bearer token.", ), - ], + ] = None, schema_path: Annotated[ - Path, - typer.Option("--schema", "-s", help="Path to agent_schema.yaml."), - ] = Path("./agent_schema.yaml"), + Path | None, + typer.Option("--schema", "-s", help="Explicit path to agent_schema.yaml."), + ] = None, ) -> None: """Upload the local knowledge base and trigger a Context Cache rebuild.""" - schema_path = schema_path.resolve() + ctx = resolve_context( + agent=agent, + schema=schema_path, + endpoint=endpoint, + api_key=api_key, + require_remote=True, + ) + schema_path = ctx.schema_path + endpoint, api_key = ctx.require_remote() if not schema_path.is_file(): print_error(f"schema file not found: {schema_path}") raise typer.Exit(code=1) + if ctx.selector_source not in ("single", "schema-flag"): + print_info(f"agent [cyan]{ctx.name}[/cyan] ({ctx.selector_source})") try: schema = AgentSchema.from_yaml(schema_path) diff --git a/cli/expert/commands/test.py b/cli/expert/commands/test.py index fbc84d6..d305e2c 100644 --- a/cli/expert/commands/test.py +++ b/cli/expert/commands/test.py @@ -18,6 +18,7 @@ import typer +from ..context import resolve as resolve_context from ..ui import console, print_error, print_info, print_success # Canonical order of the packaged suites. The numeric prefixes keep `robot` @@ -33,11 +34,19 @@ def cmd( + agent: Annotated[ + str | None, + typer.Option( + "--agent", + "-a", + help="Agent name from the workspace. Resolved via `expert agents`.", + ), + ] = None, suite: Annotated[ list[str] | None, typer.Option( "--suite", - "-s", + "-S", help=( "Run only the given suite(s) by stem (e.g. '05_ask_latency'). " "Can be passed multiple times. Default: all." @@ -72,16 +81,17 @@ def cmd( Path | None, typer.Option( "--schema", - help="Path to agent_schema.yaml (defaults to env EXPERT_AGENT_SCHEMA).", + "-s", + help="Explicit path to agent_schema.yaml (bypasses workspace resolution).", ), ] = None, endpoint: Annotated[ str | None, - typer.Option("--endpoint", help="Override EXPERT_AGENT_ENDPOINT."), + typer.Option("--endpoint", help="Override the agent's endpoint."), ] = None, api_key: Annotated[ str | None, - typer.Option("--api-key", help="Override EXPERT_AGENT_API_KEY."), + typer.Option("--api-key", help="Override the agent's admin bearer token."), ] = None, dry_run: Annotated[ bool, @@ -118,15 +128,25 @@ def cmd( print_error(f"No suites matched selection {suite!r}. Available: {available}") raise typer.Exit(code=2) - # Propagate overrides to the environment so ExpertLibrary's defaults pick - # them up without needing --var boilerplate in simple cases. - env_overrides: dict[str, str] = {} - if endpoint: - env_overrides["EXPERT_AGENT_ENDPOINT"] = endpoint - if api_key: - env_overrides["EXPERT_AGENT_API_KEY"] = api_key - if schema: - env_overrides["EXPERT_AGENT_SCHEMA"] = str(schema) + # Resolve the agent context (supports --agent / @alias / `expert use`) + # so that the packaged Robot suites see fully-populated env vars even + # in multi-agent workspaces without requiring --var or --endpoint. + # We fall back to a bare resolve (schema-only) so that the offline + # suites still work when endpoint/api_key are not configured. + ctx = resolve_context( + agent=agent, + schema=schema, + endpoint=endpoint, + api_key=api_key, + ) + if ctx.selector_source not in ("single", "schema-flag"): + print_info(f"→ [cyan]{ctx.name}[/cyan] ({ctx.selector_source})") + + env_overrides: dict[str, str] = {"EXPERT_AGENT_SCHEMA": str(ctx.schema_path)} + if ctx.endpoint: + env_overrides["EXPERT_AGENT_ENDPOINT"] = ctx.endpoint + if ctx.api_key: + env_overrides["EXPERT_AGENT_API_KEY"] = ctx.api_key for key, value in env_overrides.items(): os.environ[key] = value diff --git a/cli/expert/commands/validate.py b/cli/expert/commands/validate.py index 7f97500..216f5f6 100644 --- a/cli/expert/commands/validate.py +++ b/cli/expert/commands/validate.py @@ -9,7 +9,8 @@ from app.schema import AgentSchema from pydantic import ValidationError -from ..ui import print_error, print_schema, print_success, print_warning +from ..context import resolve as resolve_context +from ..ui import print_error, print_info, print_schema, print_success, print_warning def _iter_matching_files( @@ -30,16 +31,31 @@ def _iter_matching_files( def cmd( + agent: Annotated[ + str | None, + typer.Option( + "--agent", + "-a", + help="Agent name (from expert.toml or sibling dirs). See `expert agents`.", + ), + ] = None, schema_path: Annotated[ - Path, - typer.Option("--schema", "-s", help="Path to agent_schema.yaml."), - ] = Path("./agent_schema.yaml"), + Path | None, + typer.Option( + "--schema", + "-s", + help="Explicit path to agent_schema.yaml (bypasses workspace resolution).", + ), + ] = None, ) -> None: """Validate an agent schema and its referenced filesystem layout.""" - schema_path = schema_path.resolve() + ctx = resolve_context(agent=agent, schema=schema_path) + schema_path = ctx.schema_path if not schema_path.is_file(): print_error(f"schema file not found: {schema_path}") raise typer.Exit(code=1) + if ctx.selector_source not in ("single", "schema-flag"): + print_info(f"agent [cyan]{ctx.name}[/cyan] ({ctx.selector_source})") try: schema = AgentSchema.from_yaml(schema_path) diff --git a/cli/expert/context.py b/cli/expert/context.py new file mode 100644 index 0000000..ce56787 --- /dev/null +++ b/cli/expert/context.py @@ -0,0 +1,70 @@ +"""Shared helpers used by every command that needs an :class:`AgentContext`. + +Commands should call :func:`resolve` at their very top, forward flag-overrides +in, and then read ``ctx.schema_path`` / ``ctx.endpoint`` / ``ctx.api_key``. + +This keeps the multi-agent resolution logic in one place — if we ever change +precedence rules, every command picks it up automatically. +""" + +from __future__ import annotations + +from dataclasses import replace +from pathlib import Path + +import typer + +from .ui import print_error +from .workspace import ( + AgentContext, + AgentNotFoundError, + AmbiguousAgentError, + Workspace, + WorkspaceError, +) + + +def resolve( + *, + agent: str | None = None, + schema: Path | None = None, + endpoint: str | None = None, + api_key: str | None = None, + require_remote: bool = False, +) -> AgentContext: + """Resolve an :class:`AgentContext` or abort the CLI with a helpful message. + + Flag-level overrides take priority over workspace-derived values so that + scripts / CI can still force an endpoint or API key on a single run + without editing ``expert.toml``. + + When ``require_remote`` is set, missing ``endpoint`` / ``api_key`` turn + into a non-zero exit instead of being silently ``None``. + """ + ws = Workspace.discover() + try: + ctx = ws.resolve(selector=agent, schema_override=schema) + except (AgentNotFoundError, AmbiguousAgentError, WorkspaceError) as exc: + print_error(str(exc)) + raise typer.Exit(code=1) from exc + + # Flag overrides from the caller take precedence over anything the + # workspace resolver produced. + if endpoint or api_key: + ctx = replace( + ctx, + endpoint=endpoint or ctx.endpoint, + api_key=api_key or ctx.api_key, + ) + + if require_remote: + try: + ctx.require_remote() + except WorkspaceError as exc: + print_error(str(exc)) + raise typer.Exit(code=2) from exc + + return ctx + + +__all__ = ["resolve"] diff --git a/cli/expert/main.py b/cli/expert/main.py index dc1779d..1f8e00c 100644 --- a/cli/expert/main.py +++ b/cli/expert/main.py @@ -1,18 +1,34 @@ -"""Top-level `typer` app for `expert`.""" +"""Top-level `typer` app for `expert`. + +The CLI is aware of *multi-agent workspaces*: a repo can host several +`agent_schema.yaml` files and the user can target them individually via: + +- Explicit flag: ``expert ask --agent derm "..."`` +- Active pointer: ``expert use derm`` then ``expert ask "..."`` +- Positional shortcut: ``expert @derm ask "..."`` + +The ``@alias`` form is handled **here** in the entrypoint via a small +argv rewriter that runs before Typer parses its arguments. The rewriter +turns ``expert @ ...`` into +``expert --agent ...`` so downstream commands just need +to accept the standard ``--agent`` flag. +""" from __future__ import annotations +import sys from typing import Annotated import typer -from . import __version__ +from .brand import render_brand +from .commands import agents as agents_commands from .commands import ask, count_tokens, init, sessions, sync, test, validate from .ui import console app = typer.Typer( name="expert", - help="CLI for the **expert-agent** framework — scaffold, validate, sync, ask.", + help="ground a model on your docs. ship it as an API.", no_args_is_help=True, rich_markup_mode="markdown", add_completion=True, @@ -21,10 +37,15 @@ def _version_callback(value: bool) -> None: if value: - console.print(f"expert {__version__}") + render_brand(console, include_version=True) raise typer.Exit(code=0) +def _brand_cmd() -> None: + """Print the expert brand block (wordmark + tagline + version).""" + render_brand(console, include_version=True) + + @app.callback() def _root( version: Annotated[ @@ -41,6 +62,55 @@ def _root( _ = version +# Subcommands that accept `--agent`. Used by the @alias rewriter so that +# nonsense like `expert @derm use my-expert` falls through to a useful error +# instead of silently rewriting into `expert use my-expert --agent derm`. +_AGENT_AWARE: frozenset[str] = frozenset( + {"ask", "validate", "count-tokens", "sync", "test", "sessions", "which"} +) + + +def _rewrite_at_alias(argv: list[str]) -> list[str]: + """Expand a leading ``@`` token into ``--agent `` further right. + + Examples:: + + expert @my-expert ask "hi" → expert ask "hi" --agent my-expert + expert @derm sessions list → expert sessions list --agent derm + expert @my-expert → expert agents --agent my-expert (listing mode) + + Safe no-ops: + + - ``@`` in argv[1] that isn't the immediate prefix to a known + agent-aware subcommand is left alone (so ``expert @derm use foo`` + is *not* silently rewritten). + - Options like ``--foo=@bar`` are never touched because we only look at + ``argv[1]``. + """ + if len(argv) < 2 or not argv[1].startswith("@") or len(argv[1]) < 2: + return argv + if argv[1] in ("@-", "@"): + return argv + alias = argv[1][1:] + rest = argv[2:] + + subcommand_idx: int | None = None + for idx, token in enumerate(rest): + if not token.startswith("-"): + subcommand_idx = idx + break + if subcommand_idx is None or rest[subcommand_idx] not in _AGENT_AWARE: + # No agent-aware subcommand present: leave argv alone so Typer can + # render a useful error instead of rewriting into a wrong shape. + return argv + + # Append `--agent ` at the end so it flows through regardless of + # whether the subcommand is a leaf (`ask`) or a sub-Typer (`sessions + # list`). Typer happily routes the flag to the deepest command that + # declares it. + return [argv[0], *rest, "--agent", alias] + + app.command(name="init", help="Scaffold a new agent project.")(init.cmd) app.command(name="validate", help="Validate an agent_schema.yaml locally.")(validate.cmd) app.command( @@ -54,7 +124,30 @@ def _root( name="test", help="Run the packaged Robot Framework E2E kit against the current agent.", )(test.cmd) +app.command( + name="agents", + help="List agents known to this workspace.", +)(agents_commands.agents_cmd) +app.command( + name="use", + help="Pin an agent as the active one for this workspace.", +)(agents_commands.use_cmd) +app.command( + name="which", + help="Print which agent a bare command would resolve to.", +)(agents_commands.which_cmd) +app.command( + name="brand", + help="Print the expert wordmark + version (fun, mostly).", + hidden=True, +)(_brand_cmd) -if __name__ == "__main__": +def main() -> None: + """Entry point that runs the ``@alias`` rewriter before dispatching.""" + sys.argv = _rewrite_at_alias(sys.argv) app() + + +if __name__ == "__main__": + main() diff --git a/cli/expert/testkit/ExpertLibrary.py b/cli/expert/testkit/ExpertLibrary.py index 807f9c9..c15ca17 100644 --- a/cli/expert/testkit/ExpertLibrary.py +++ b/cli/expert/testkit/ExpertLibrary.py @@ -57,11 +57,15 @@ def __init__( # ------------------------------------------------------------------ @keyword("Run Expert CLI") - def run_cli(self, *args: str, expect_rc: int = 0, cwd: str | None = None) -> dict[str, Any]: + def run_cli( + self, *args: str, expect_rc: int | None = 0, cwd: str | None = None + ) -> dict[str, Any]: """Execute `expert ` and return `{rc, stdout, stderr, elapsed_ms}`. Fails if the exit code differs from ``expect_rc`` (use ``expect_rc=None`` - to skip the check entirely). + to skip the check entirely). ``expect_rc`` is typed ``int | None`` so + Robot Framework's dynamic-argument converter accepts ``${None}`` from + suite files without trying to coerce it into ``int`` (which fails). """ binary = shutil.which("expert") if binary is None: diff --git a/cli/expert/ui.py b/cli/expert/ui.py index f7fa89f..e4a58f6 100644 --- a/cli/expert/ui.py +++ b/cli/expert/ui.py @@ -1,8 +1,13 @@ """Shared Rich helpers used across commands. -The CLI follows a strict no-emoji policy in source code — visual cues come -exclusively from Rich colors, icons (drawn via Unicode box/arrow characters -that Rich supports) and markdown glyphs. No emoji characters are used. +The CLI follows the visual identity shared with the author's other open-source +tools (see ``feliperbroering/eai``): + +- No emoji characters. Visual cues come from Unicode box-drawing, arrows, + and restrained accent glyphs (``>``, ``✓``, ``✗``, ``⚠``, ``▶``). +- Success / error / warning lines are prefixed with a single colored glyph, + not a shouted word in caps. Screen real estate is precious. +- Rich colors are the accent; plain monospace is the norm. """ from __future__ import annotations @@ -21,65 +26,78 @@ def print_error(msg: str) -> None: - """Render an error line in bold red prefixed with `ERROR`.""" - console.print(f"[bold red]ERROR[/bold red] {msg}") + """Render an error line: dim red cross + message.""" + console.print(f"[red]✗[/red] {msg}", highlight=False) def print_success(msg: str) -> None: - """Render a success line in green prefixed with a check mark.""" - console.print(f"[bold green]OK[/bold green] {msg}") + """Render a success line: green check + message.""" + console.print(f"[green]✓[/green] {msg}", highlight=False) def print_warning(msg: str) -> None: - """Render a warning line in yellow prefixed with `WARN`.""" - console.print(f"[bold yellow]WARN[/bold yellow] {msg}") + """Render a warning line: yellow warning glyph + message.""" + console.print(f"[yellow]⚠[/yellow] {msg}", highlight=False) def print_info(msg: str) -> None: - """Render a neutral informational line.""" - console.print(f"[bold cyan]INFO[/bold cyan] {msg}") + """Render a neutral informational line prefixed with a subtle chevron.""" + console.print(f"[dim cyan]>[/dim cyan] {msg}", highlight=False) + + +def print_hint(cmd: str, *, label: str = "try") -> None: + """Render a cyan-accented hint pointing the user at a command.""" + console.print(f" [dim]{label}:[/dim] [bold cyan]{cmd}[/bold cyan]", highlight=False) + + +def print_step(current: int, total: int, msg: str) -> None: + """Render a numbered step in the classic ``[n/N]`` style.""" + console.print(f" [dim]\\[{current}/{total}][/dim] {msg}", highlight=False) + + +def print_kv(label: str, value: str, *, width: int = 12) -> None: + """Render a dim ``label: value`` pair with consistent column alignment.""" + console.print(f" [dim]{label:<{width}}[/dim] {value}", highlight=False) def print_diff_table(diff: dict[str, Any]) -> None: """Render a sync diff using a Rich `Table`. - The expected input is a mapping such as: + Expected input:: - ```python - { - "added": [{"path": "docs/a.md", "sha": "abc1234", "size": 1024}, ...], - "updated": [...], - "removed": [{"path": "docs/old.md", "sha": "def4567", "size": 512}, ...], - } - ``` + { + "added": [{"path": "docs/a.md", "sha": "abc1234", "size": 1024}, ...], + "updated": [...], + "removed": [{"path": "docs/old.md", "sha": "def4567", "size": 512}, ...], + } """ - table = Table(title="Sync diff", show_lines=False) - table.add_column("Action", style="bold", no_wrap=True) + table = Table(title="Sync diff", show_lines=False, border_style="dim") + table.add_column("", width=1, no_wrap=True) table.add_column("Path", overflow="fold") - table.add_column("SHA", no_wrap=True) - table.add_column("Size", justify="right", no_wrap=True) + table.add_column("SHA", no_wrap=True, style="dim") + table.add_column("Size", justify="right", no_wrap=True, style="dim") - actions: list[tuple[str, str, list[dict[str, Any]]]] = [ - ("added", "green", list(diff.get("added", []) or [])), - ("updated", "yellow", list(diff.get("updated", []) or [])), - ("removed", "red", list(diff.get("removed", []) or [])), + actions: list[tuple[str, str, str, list[dict[str, Any]]]] = [ + ("+", "green", "added", list(diff.get("added", []) or [])), + ("~", "yellow", "updated", list(diff.get("updated", []) or [])), + ("-", "red", "removed", list(diff.get("removed", []) or [])), ] total = 0 - for action, color, entries in actions: + for glyph, color, _name, entries in actions: for entry in entries: total += 1 sha = str(entry.get("sha", ""))[:12] size = entry.get("size") size_str = _fmt_size(size) if isinstance(size, int) else "-" table.add_row( - f"[{color}]{action}[/{color}]", + f"[{color}]{glyph}[/{color}]", str(entry.get("path", "")), sha, size_str, ) if total == 0: - console.print("[dim]No changes — remote is in sync with local.[/dim]") + console.print("[dim] no changes — remote is in sync with local[/dim]") return console.print(table) @@ -90,44 +108,44 @@ def print_schema(schema: AgentSchema) -> None: spec = schema.spec tree = Tree(f"[bold]{meta.name}[/bold] [dim]v{meta.version}[/dim]") if meta.description: - tree.add(f"[italic]{meta.description}[/italic]") + tree.add(f"[italic dim]{meta.description}[/italic dim]") model = tree.add("[bold]model[/bold]") - model.add(f"provider: [cyan]{spec.model.provider}[/cyan]") - model.add(f"name: [cyan]{spec.model.name}[/cyan]") - model.add(f"temperature: {spec.model.temperature}") - model.add(f"max_output_tokens: {spec.model.max_output_tokens}") + model.add(f"[dim]provider[/dim] {spec.model.provider}") + model.add(f"[dim]name[/dim] {spec.model.name}") + model.add(f"[dim]temp[/dim] {spec.model.temperature}") + model.add(f"[dim]max_out[/dim] {spec.model.max_output_tokens}") identity = tree.add("[bold]identity[/bold]") if spec.identity.system_prompt_file is not None: - identity.add(f"system_prompt_file: [cyan]{spec.identity.system_prompt_file}[/cyan]") + identity.add(f"[dim]file[/dim] {spec.identity.system_prompt_file}") if spec.identity.system_prompt is not None: preview = spec.identity.system_prompt[:60].replace("\n", " ") - identity.add(f"system_prompt: [cyan]{preview}...[/cyan]") + identity.add(f"[dim]inline[/dim] {preview}…") knowledge = tree.add("[bold]knowledge[/bold]") - knowledge.add(f"reference_docs_dir: [cyan]{spec.knowledge.reference_docs_dir}[/cyan]") - knowledge.add(f"include_patterns: {spec.knowledge.include_patterns}") - knowledge.add(f"exclude_patterns: {spec.knowledge.exclude_patterns}") + knowledge.add(f"[dim]docs_dir[/dim] {spec.knowledge.reference_docs_dir}") + knowledge.add(f"[dim]include[/dim] {spec.knowledge.include_patterns}") + knowledge.add(f"[dim]exclude[/dim] {spec.knowledge.exclude_patterns}") cache = tree.add("[bold]context_cache[/bold]") - cache.add(f"enabled: {spec.context_cache.enabled}") - cache.add(f"ttl_seconds: {spec.context_cache.ttl_seconds}") + cache.add(f"[dim]enabled[/dim] {spec.context_cache.enabled}") + cache.add(f"[dim]ttl[/dim] {spec.context_cache.ttl_seconds}s") memory = tree.add("[bold]memory[/bold]") - memory.add(f"short_term.buffer_size: {spec.memory.short_term.buffer_size}") - memory.add(f"long_term.enabled: {spec.memory.long_term.enabled}") - memory.add(f"long_term.persistence.type: {spec.memory.long_term.persistence.type}") + memory.add(f"[dim]short_buf[/dim] {spec.memory.short_term.buffer_size}") + memory.add(f"[dim]long_on[/dim] {spec.memory.long_term.enabled}") + memory.add(f"[dim]store[/dim] {spec.memory.long_term.persistence.type}") grounding = tree.add("[bold]grounding[/bold]") - grounding.add(f"enabled: {spec.grounding.enabled}") - grounding.add(f"max_citations: {spec.grounding.max_citations}") + grounding.add(f"[dim]enabled[/dim] {spec.grounding.enabled}") + grounding.add(f"[dim]max_cite[/dim] {spec.grounding.max_citations}") rate = tree.add("[bold]rate_limit[/bold]") - rate.add(f"requests_per_minute: {spec.rate_limit.requests_per_minute}") - rate.add(f"tokens_per_day: {spec.rate_limit.tokens_per_day}") + rate.add(f"[dim]rpm[/dim] {spec.rate_limit.requests_per_minute}") + rate.add(f"[dim]tpd[/dim] {spec.rate_limit.tokens_per_day}") - console.print(Panel(tree, title="Agent schema", border_style="cyan")) + console.print(Panel(tree, title="agent schema", border_style="cyan", title_align="left")) def _fmt_size(num: int) -> str: @@ -143,8 +161,11 @@ def _fmt_size(num: int) -> str: "console", "print_diff_table", "print_error", + "print_hint", "print_info", + "print_kv", "print_schema", + "print_step", "print_success", "print_warning", ] diff --git a/cli/expert/workspace.py b/cli/expert/workspace.py new file mode 100644 index 0000000..2b48511 --- /dev/null +++ b/cli/expert/workspace.py @@ -0,0 +1,460 @@ +"""Multi-agent workspace: discovery, `expert.toml`, and active-agent state. + +A *workspace* is the repository (or subtree) that hosts one or more agent +schemas. The CLI supports three equivalent ways of pointing a command at a +specific agent inside a multi-agent workspace: + +1. **Explicit flag** — ``expert ask --agent derm "hi"``. +2. **Positional `@alias`** — ``expert @derm ask "hi"`` (intercepted in + ``main.py`` and rewritten into the flag above, transparently). +3. **Active pointer** — ``expert use derm`` persists a pointer in + ``.expert/state.json`` so subsequent ``expert ask "..."`` calls in that + cwd stay on ``derm`` until the user runs ``expert use`` again. + +When none of these disambiguate an unambiguous single agent, commands raise +:class:`AmbiguousAgentError` with a helpful message listing the candidates. + +## Discovery + +Workspace detection walks up from ``cwd`` looking for the first parent that +contains **any** of these markers: + +- ``expert.toml`` (explicit, strongest signal — anchors the workspace). +- ``.expert/state.json`` (previously `expert use`-d directory). +- a sibling pattern of ``*/agent_schema.yaml`` (multi-agent repo by + convention). + +If none is found the workspace defaults to a *single-agent* mode rooted at +cwd, preserving the historical behaviour (``./agent_schema.yaml``). + +## ``expert.toml`` schema + +```toml +# Optional per-workspace defaults. +[defaults] +agent = "my-expert" # Default agent when no flag / active pointer is set. + +# One section per agent. The key becomes the canonical name. +[agents.my-expert] +schema = "my-expert/agent_schema.yaml" # Required. Relative to this file. +endpoint = "https://my-expert-xxx.a.run.app" # Optional override. +api_key_env = "MY_EXPERT_ADMIN_KEY" # Optional. Takes precedence over api_key. +api_key = "..." # Optional, discouraged (use env). +description = "Primary expert agent (free-form)." # Optional. + +[agents.derm] +schema = "derm-expert/agent_schema.yaml" +``` + +Any agent that is **auto-discovered** via ``*/agent_schema.yaml`` but not +explicitly declared in ``expert.toml`` is still selectable by its directory +name, and inherits endpoint/api_key from the global ``EXPERT_AGENT_*`` env +vars. +""" + +from __future__ import annotations + +import json +import os +import tomllib +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +_STATE_DIR = ".expert" +_STATE_FILE = "state.json" +_WORKSPACE_FILE = "expert.toml" +_SCHEMA_FILENAME = "agent_schema.yaml" +_DISCOVERY_MAX_DEPTH = 3 +_ENV_ACTIVE_AGENT = "EXPERT_AGENT" + + +class WorkspaceError(RuntimeError): + """Base for workspace-related errors. Carries an exit-code hint.""" + + exit_code: int = 1 + + +class AgentNotFoundError(WorkspaceError): + """Raised when the caller names an agent that doesn't exist in the workspace.""" + + +class AmbiguousAgentError(WorkspaceError): + """Raised when a selector matches zero, or more than one, agents. + + ``candidates`` holds every known agent so callers can render a helpful + prompt/error with the available options. + """ + + def __init__(self, message: str, *, candidates: list[AgentInfo]) -> None: + super().__init__(message) + self.candidates = candidates + + +@dataclass(frozen=True) +class AgentInfo: + """Metadata about an agent known to the workspace (pre-resolution).""" + + name: str + schema_path: Path + endpoint: str | None = None + api_key: str | None = None + description: str | None = None + # "toml" — declared in expert.toml; "auto" — discovered by convention. + source: str = "auto" + + +@dataclass(frozen=True) +class AgentContext: + """Fully-resolved agent context a command can rely on. + + ``api_key`` / ``endpoint`` may still be ``None`` if the agent is offline + (e.g. for ``expert validate`` which only needs the schema). Commands that + require remote access should call :meth:`require_remote` instead of + reading the fields directly. + """ + + name: str + schema_path: Path + endpoint: str | None + api_key: str | None + description: str | None + selector_source: str # "flag", "@alias", "active", "env", "default", "auto", "single" + + def require_remote(self) -> tuple[str, str]: + """Return ``(endpoint, api_key)``, raising a user-friendly error if missing.""" + if not self.endpoint or not self.api_key: + raise WorkspaceError( + f"Agent '{self.name}' has no endpoint/api_key configured. " + "Set EXPERT_AGENT_ENDPOINT + EXPERT_AGENT_API_KEY, or declare " + "them in expert.toml under [agents." + f"{self.name}]." + ) + return self.endpoint.rstrip("/"), self.api_key + + +@dataclass +class Workspace: + """Discovered multi-agent workspace rooted at ``root``.""" + + root: Path + agents_by_name: dict[str, AgentInfo] = field(default_factory=dict) + default_agent: str | None = None + # True when no expert.toml AND no sibling schemas found — legacy single-agent mode. + single_agent_mode: bool = False + + @classmethod + def discover(cls, *, cwd: Path | None = None) -> Workspace: + """Discover the workspace rooted at (or above) ``cwd``.""" + start = (cwd or Path.cwd()).resolve() + root, toml_path = _find_workspace_root(start) + ws = cls(root=root) + + if toml_path is not None: + ws._load_toml(toml_path) + + # Auto-discover siblings regardless of whether a TOML exists — the TOML + # only adds aliases/metadata, it doesn't preclude extra agents shipped + # in sibling dirs. + ws._discover_siblings() + + if not ws.agents_by_name: + # Legacy single-agent mode: one schema next to the user's cwd. + local = start / _SCHEMA_FILENAME + if local.is_file(): + ws.agents_by_name["."] = AgentInfo( + name=".", + schema_path=local, + source="single", + ) + ws.single_agent_mode = True + + return ws + + # --------------------------- TOML loading --------------------------- # + + def _load_toml(self, path: Path) -> None: + try: + with path.open("rb") as fh: + raw = tomllib.load(fh) + except (OSError, tomllib.TOMLDecodeError) as exc: # pragma: no cover - rare + raise WorkspaceError(f"failed to parse {path}: {exc}") from exc + + defaults = raw.get("defaults") if isinstance(raw.get("defaults"), dict) else {} + default_name = defaults.get("agent") if isinstance(defaults, dict) else None + if isinstance(default_name, str): + self.default_agent = default_name + + agents_section = raw.get("agents") if isinstance(raw.get("agents"), dict) else {} + if not isinstance(agents_section, dict): + return + + for name, body in agents_section.items(): + if not isinstance(name, str) or not isinstance(body, dict): + continue + schema_rel = body.get("schema") + if not isinstance(schema_rel, str) or not schema_rel: + raise WorkspaceError(f"expert.toml: agent '{name}' is missing a 'schema' field.") + schema_abs = (path.parent / schema_rel).resolve() + api_key = _resolve_api_key(body) + self.agents_by_name[name] = AgentInfo( + name=name, + schema_path=schema_abs, + endpoint=_opt_str(body.get("endpoint")), + api_key=api_key, + description=_opt_str(body.get("description")), + source="toml", + ) + + # --------------------------- Auto-discovery ------------------------- # + + def _discover_siblings(self) -> None: + """Walk immediate children of ``root`` for ``*/agent_schema.yaml``.""" + if not self.root.is_dir(): + return + for child in sorted(self.root.iterdir()): + if not child.is_dir() or child.name.startswith("."): + continue + schema = child / _SCHEMA_FILENAME + if not schema.is_file(): + continue + # Skip if already declared via TOML under a different key — the + # TOML entry is authoritative for that schema. + if any(info.schema_path == schema for info in self.agents_by_name.values()): + continue + # Skip if the directory name collides with a declared TOML name; + # declared ones win. + if child.name in self.agents_by_name: + continue + self.agents_by_name[child.name] = AgentInfo( + name=child.name, + schema_path=schema, + source="auto", + ) + + # --------------------------- State file ----------------------------- # + + @property + def state_file(self) -> Path: + return self.root / _STATE_DIR / _STATE_FILE + + def active(self) -> str | None: + """Return the agent name pinned via ``expert use``, if any.""" + path = self.state_file + if not path.is_file(): + return None + try: + data = json.loads(path.read_text()) + except (OSError, json.JSONDecodeError): + return None + name = data.get("agent") if isinstance(data, dict) else None + return name if isinstance(name, str) else None + + def set_active(self, name: str) -> None: + if name not in self.agents_by_name: + raise AgentNotFoundError( + f"Unknown agent '{name}'. Run `expert agents` to list candidates." + ) + path = self.state_file + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps({"agent": name}, indent=2) + "\n") + + def clear_active(self) -> None: + path = self.state_file + if path.is_file(): + path.unlink() + + # --------------------------- Listing -------------------------------- # + + def agents(self) -> list[AgentInfo]: + return sorted(self.agents_by_name.values(), key=lambda a: a.name) + + # --------------------------- Resolution ----------------------------- # + + def resolve( + self, + selector: str | None = None, + *, + env: dict[str, str] | None = None, + schema_override: Path | None = None, + ) -> AgentContext: + """Return a fully-resolved :class:`AgentContext`. + + Resolution order (first match wins): + + 1. Explicit ``selector`` (from ``--agent`` or ``@alias``). + 2. ``EXPERT_AGENT`` env var. + 3. ``.expert/state.json`` (set by ``expert use``). + 4. ``[defaults] agent = "..."`` in ``expert.toml``. + 5. Exactly-one-agent short-circuit. + 6. ``schema_override`` (``--schema`` flag, purely file-based fallback). + + Fails with :class:`AmbiguousAgentError` otherwise. + """ + env = env if env is not None else dict(os.environ) + source: str + name: str | None = None + + # An explicit --schema path short-circuits resolution entirely: + # the caller is telling us "use this file, don't touch the + # workspace". This mirrors the pre-multi-agent CLI behaviour. + if schema_override is not None and selector is None: + return AgentContext( + name=schema_override.parent.name or ".", + schema_path=schema_override.resolve(), + endpoint=env.get("EXPERT_AGENT_ENDPOINT"), + api_key=env.get("EXPERT_AGENT_API_KEY"), + description=None, + selector_source="schema-flag", + ) + + if selector: + name, source = self._match(selector), "flag" + elif env.get(_ENV_ACTIVE_AGENT): + name, source = self._match(env[_ENV_ACTIVE_AGENT]), "env" + elif (pinned := self.active()) is not None: + name, source = self._match(pinned), "active" + elif self.default_agent is not None: + name, source = self._match(self.default_agent), "default" + elif len(self.agents_by_name) == 1: + name, source = ( + next(iter(self.agents_by_name)), + ("single" if self.single_agent_mode else "auto"), + ) + + if name is None: + raise AmbiguousAgentError( + self._ambiguity_message(selector, env), + candidates=self.agents(), + ) + + info = self.agents_by_name[name] + endpoint = info.endpoint or env.get("EXPERT_AGENT_ENDPOINT") + api_key = info.api_key or env.get("EXPERT_AGENT_API_KEY") + schema_path = schema_override.resolve() if schema_override else info.schema_path + return AgentContext( + name=info.name, + schema_path=schema_path, + endpoint=endpoint, + api_key=api_key, + description=info.description, + selector_source=source, + ) + + # --------------------------- Internals ------------------------------ # + + def _match(self, selector: str) -> str: + """Resolve an agent selector (exact name or unique prefix). + + Accepts and strips a leading ``@`` so that the same helper can back + both ``--agent derm`` and ``@derm`` transparently. + """ + if not selector: + raise AgentNotFoundError("empty agent selector") + needle = selector.lstrip("@") + if needle in self.agents_by_name: + return needle + matches = [n for n in self.agents_by_name if n.startswith(needle)] + if len(matches) == 1: + return matches[0] + if not matches: + raise AgentNotFoundError( + f"No agent named '{needle}'. " + f"Available: {', '.join(sorted(self.agents_by_name)) or '(none)'}." + ) + raise AmbiguousAgentError( + f"Prefix '{needle}' is ambiguous — matches: {', '.join(sorted(matches))}. " + "Use the full name or a longer prefix.", + candidates=[self.agents_by_name[m] for m in matches], + ) + + def _ambiguity_message(self, selector: str | None, env: dict[str, str]) -> str: + if not self.agents_by_name: + return ( + "No agent_schema.yaml found in this workspace. " + "Run `expert init ` to scaffold one, or pass " + "--schema explicitly." + ) + lines = [ + "Multiple agents found in this workspace and no selector was given.", + "", + "Candidates:", + ] + for info in self.agents(): + rel = _safe_relpath(info.schema_path, self.root) + badge = "[toml]" if info.source == "toml" else "[auto]" + lines.append(f" • {info.name:<20} {rel} {badge}") + lines.extend( + [ + "", + "Pick one, in order of preference:", + " expert @ # one-off shortcut", + " expert --agent # explicit flag (CI-friendly)", + " expert use # pin for this workspace", + ] + ) + _ = selector, env + return "\n".join(lines) + + +# ------------------------------------------------------------------------- # +# Helpers +# ------------------------------------------------------------------------- # + + +def _find_workspace_root(start: Path) -> tuple[Path, Path | None]: + """Walk up from ``start`` to find a workspace root + optional TOML path. + + Returns ``(root, toml_path)`` where ``toml_path`` may be ``None``. The + ``root`` is: + + - The first ancestor containing ``expert.toml`` (authoritative marker). + - Else the first ancestor containing ``.expert/state.json`` (previously + pinned via ``expert use``). + - Else ``start`` itself. Sibling-schema discovery is always rooted at + ``start`` — we never silently promote an unrelated ancestor to + ``root`` just because it happens to have other agent directories + lying around. + """ + current = start + for _ in range(_DISCOVERY_MAX_DEPTH + 1): + toml = current / _WORKSPACE_FILE + if toml.is_file(): + return current, toml + if (current / _STATE_DIR / _STATE_FILE).is_file(): + return current, None + if current.parent == current: + break + current = current.parent + return start, None + + +def _opt_str(value: Any) -> str | None: + return value if isinstance(value, str) and value else None + + +def _resolve_api_key(body: dict[str, Any]) -> str | None: + env_var = body.get("api_key_env") + if isinstance(env_var, str) and env_var: + env_value = os.environ.get(env_var) + if env_value: + return env_value + raw = body.get("api_key") + return raw if isinstance(raw, str) and raw else None + + +def _safe_relpath(path: Path, base: Path) -> str: + try: + return str(path.relative_to(base)) + except ValueError: + return str(path) + + +__all__ = [ + "AgentContext", + "AgentInfo", + "AgentNotFoundError", + "AmbiguousAgentError", + "Workspace", + "WorkspaceError", +] diff --git a/cli/tests/test_brand.py b/cli/tests/test_brand.py new file mode 100644 index 0000000..2737fe0 --- /dev/null +++ b/cli/tests/test_brand.py @@ -0,0 +1,34 @@ +"""Smoke tests for the ASCII brand + the ``expert brand`` / ``--version`` paths.""" + +from expert.main import app +from typer.testing import CliRunner + + +def test_brand_command_prints_wordmark_and_tagline() -> None: + runner = CliRunner() + result = runner.invoke(app, ["brand"]) + assert result.exit_code == 0, result.output + # Wordmark: one row of the ANSI-shadow figlet should always be present. + assert "███████╗" in result.output + # Tagline + knowledge glyph box. + assert "ground a model on your docs" in result.output + assert "╭───╮" in result.output + # Version footer. + assert "MIT" in result.output + assert "github.com/feliperbroering/expert-agent" in result.output + + +def test_version_flag_renders_brand() -> None: + runner = CliRunner() + result = runner.invoke(app, ["--version"]) + assert result.exit_code == 0, result.output + assert "expert" in result.output + assert "███████╗" in result.output + + +def test_brand_command_is_hidden_in_help() -> None: + runner = CliRunner() + result = runner.invoke(app, ["--help"]) + assert result.exit_code == 0, result.output + # `brand` is a hidden easter-egg command; it must not pollute --help output. + assert "brand" not in result.output.split("Commands")[-1] diff --git a/cli/tests/test_count_tokens.py b/cli/tests/test_count_tokens.py index 2f6fd72..cda7208 100644 --- a/cli/tests/test_count_tokens.py +++ b/cli/tests/test_count_tokens.py @@ -64,7 +64,7 @@ async def fake_count(*, model: str, contents: Any) -> Any: "--gemini-api-key", "test-key", "--model", - "gemini-2.0-flash-exp", + "gemini-2.5-flash", ], ) assert result.exit_code == 0, result.output diff --git a/cli/tests/test_init.py b/cli/tests/test_init.py index ef112be..77e8fa3 100644 --- a/cli/tests/test_init.py +++ b/cli/tests/test_init.py @@ -59,6 +59,9 @@ def test_init_rejects_invalid_name(tmp_path: Path) -> None: input="Invalid_Name\nvalid-name\nDescription.\n", ) assert result.exit_code == 0, result.output - assert "ERROR" in result.output + # Invalid name is rejected with a visible error before the retry accepts + # the valid one. We check for the (stable) regex contract rather than the + # transient glyph/color-coded prefix. + assert "name must match" in result.output schema = AgentSchema.from_yaml(dest / "agent_schema.yaml") assert schema.metadata.name == "valid-name" diff --git a/cli/tests/test_main_alias.py b/cli/tests/test_main_alias.py new file mode 100644 index 0000000..6ea12f4 --- /dev/null +++ b/cli/tests/test_main_alias.py @@ -0,0 +1,183 @@ +"""Tests for the `@alias` argv rewriter and workspace-aware commands.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +from expert.main import _rewrite_at_alias, app +from typer.testing import CliRunner + + +def test_rewrite_at_alias_for_agent_aware_subcommand() -> None: + argv = ["expert", "@my-expert", "ask", "hi", "--no-stream"] + assert _rewrite_at_alias(argv) == [ + "expert", + "ask", + "hi", + "--no-stream", + "--agent", + "my-expert", + ] + + +def test_rewrite_at_alias_preserves_nested_subcommand() -> None: + argv = ["expert", "@derm", "sessions", "list"] + assert _rewrite_at_alias(argv) == [ + "expert", + "sessions", + "list", + "--agent", + "derm", + ] + + +def test_rewrite_at_alias_no_rewrite_for_non_agent_command() -> None: + """`use`/`agents` aren't in the allow-list; argv is returned unchanged.""" + argv = ["expert", "@my-expert", "use", "my-expert"] + assert _rewrite_at_alias(argv) == argv + + +def test_rewrite_at_alias_no_arg_after() -> None: + """`expert @my-expert` with nothing else is left alone (Typer will show help).""" + argv = ["expert", "@my-expert"] + assert _rewrite_at_alias(argv) == argv + + +def test_rewrite_ignores_dashed_tokens_between_alias_and_subcommand() -> None: + argv = ["expert", "@my-expert", "--verbose", "validate"] + assert _rewrite_at_alias(argv) == [ + "expert", + "--verbose", + "validate", + "--agent", + "my-expert", + ] + + +def test_rewrite_appends_agent_at_end_for_sessions_list() -> None: + """Appending at the end routes the flag to the deepest sub-Typer.""" + argv = ["expert", "@derm", "sessions", "list", "--user", "u1"] + assert _rewrite_at_alias(argv) == [ + "expert", + "sessions", + "list", + "--user", + "u1", + "--agent", + "derm", + ] + + +def test_rewrite_handles_empty_alias() -> None: + argv = ["expert", "@", "ask", "hi"] + # Too short — should no-op rather than misinterpret. + assert _rewrite_at_alias(argv) == argv + + +# ------------------------------------------------------------------------- # +# Integration: workspace-aware commands +# ------------------------------------------------------------------------- # + + +def _seed(tmp_path: Path) -> Path: + (tmp_path / "my-expert").mkdir() + (tmp_path / "derm").mkdir() + (tmp_path / "my-expert" / "agent_schema.yaml").write_text("x") + (tmp_path / "derm" / "agent_schema.yaml").write_text("x") + (tmp_path / "expert.toml").write_text( + '[defaults]\nagent = "my-expert"\n\n' + '[agents.my-expert]\nschema = "my-expert/agent_schema.yaml"\n' + 'endpoint = "https://my-expert.example"\napi_key = "sk-test"\n\n' + '[agents.derm]\nschema = "derm/agent_schema.yaml"\n', + ) + return tmp_path + + +def test_agents_command_lists(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + _seed(tmp_path) + monkeypatch.chdir(tmp_path) + runner = CliRunner() + result = runner.invoke(app, ["agents"]) + assert result.exit_code == 0, result.output + assert "my-expert" in result.output + assert "derm" in result.output + + +def test_which_uses_toml_default(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + _seed(tmp_path) + monkeypatch.chdir(tmp_path) + monkeypatch.delenv("EXPERT_AGENT", raising=False) + monkeypatch.delenv("EXPERT_AGENT_ENDPOINT", raising=False) + runner = CliRunner() + result = runner.invoke(app, ["which"]) + assert result.exit_code == 0, result.output + assert "my-expert" in result.output + assert "default" in result.output + + +def test_use_then_which(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + _seed(tmp_path) + monkeypatch.chdir(tmp_path) + monkeypatch.delenv("EXPERT_AGENT", raising=False) + runner = CliRunner() + + res = runner.invoke(app, ["use", "derm"]) + assert res.exit_code == 0, res.output + + res = runner.invoke(app, ["which"]) + assert res.exit_code == 0, res.output + assert "derm" in res.output + assert "active" in res.output + + +def test_which_with_agent_flag_overrides_pin( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _seed(tmp_path) + monkeypatch.chdir(tmp_path) + monkeypatch.delenv("EXPERT_AGENT", raising=False) + runner = CliRunner() + runner.invoke(app, ["use", "derm"]) + res = runner.invoke(app, ["which", "--agent", "my-expert"]) + assert res.exit_code == 0, res.output + assert "my-expert" in res.output + assert "flag" in res.output + + +def test_use_clear(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + _seed(tmp_path) + monkeypatch.chdir(tmp_path) + runner = CliRunner() + runner.invoke(app, ["use", "derm"]) + assert (tmp_path / ".expert" / "state.json").is_file() + + res = runner.invoke(app, ["use", "--clear"]) + assert res.exit_code == 0, res.output + assert not (tmp_path / ".expert" / "state.json").is_file() + + +def test_use_unknown_agent(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + _seed(tmp_path) + monkeypatch.chdir(tmp_path) + runner = CliRunner() + res = runner.invoke(app, ["use", "does-not-exist"]) + assert res.exit_code != 0 + + +def test_ambiguous_workspace_shows_helpful_error( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Two auto-discovered agents, no selector → helpful multi-line error.""" + (tmp_path / "a").mkdir() + (tmp_path / "b").mkdir() + (tmp_path / "a" / "agent_schema.yaml").write_text("x") + (tmp_path / "b" / "agent_schema.yaml").write_text("x") + monkeypatch.chdir(tmp_path) + monkeypatch.delenv("EXPERT_AGENT", raising=False) + + runner = CliRunner() + res = runner.invoke(app, ["which"]) + assert res.exit_code != 0 + assert "expert @" in res.output or "@" in res.output + assert "--agent" in res.output diff --git a/cli/tests/test_workspace.py b/cli/tests/test_workspace.py new file mode 100644 index 0000000..daa7e04 --- /dev/null +++ b/cli/tests/test_workspace.py @@ -0,0 +1,267 @@ +"""Tests for multi-agent workspace discovery and resolution.""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest +from expert.workspace import ( + AgentNotFoundError, + AmbiguousAgentError, + Workspace, + WorkspaceError, +) + +# ------------------------------------------------------------------------- # +# Fixtures +# ------------------------------------------------------------------------- # + + +def _mk_schema(dir_: Path, name: str = "a") -> Path: + dir_.mkdir(parents=True, exist_ok=True) + f = dir_ / "agent_schema.yaml" + f.write_text(f"# dummy schema for {name}\n") + return f + + +def _mk_workspace( + root: Path, + *, + agents: dict[str, dict[str, object]] | None = None, + default: str | None = None, +) -> None: + """Create a workspace directory with optional expert.toml. + + ``agents`` maps canonical names to dicts of ``schema``/``endpoint``/etc. + Schemas are materialised on disk relative to ``root``. + """ + if agents is None: + return + lines: list[str] = [] + if default: + lines.extend(["[defaults]", f'agent = "{default}"', ""]) + for name, body in agents.items(): + schema_rel = body.get("schema") or f"{name}/agent_schema.yaml" + assert isinstance(schema_rel, str) + _mk_schema(root / Path(schema_rel).parent, name=name) + lines.append(f"[agents.{name}]") + lines.append(f'schema = "{schema_rel}"') + for key in ("endpoint", "api_key", "api_key_env", "description"): + value = body.get(key) + if isinstance(value, str): + lines.append(f'{key} = "{value}"') + lines.append("") + (root / "expert.toml").write_text("\n".join(lines)) + + +# ------------------------------------------------------------------------- # +# Discovery +# ------------------------------------------------------------------------- # + + +def test_single_agent_mode(tmp_path: Path) -> None: + _mk_schema(tmp_path) + ws = Workspace.discover(cwd=tmp_path) + assert ws.single_agent_mode is True + assert list(ws.agents_by_name) == ["."] + + ctx = ws.resolve() + assert ctx.name == "." + assert ctx.selector_source == "single" + + +def test_auto_discover_siblings(tmp_path: Path) -> None: + _mk_schema(tmp_path / "my-expert") + _mk_schema(tmp_path / "derm") + ws = Workspace.discover(cwd=tmp_path) + assert ws.single_agent_mode is False + assert set(ws.agents_by_name) == {"my-expert", "derm"} + assert all(info.source == "auto" for info in ws.agents()) + + +def test_toml_overrides_auto(tmp_path: Path) -> None: + _mk_workspace( + tmp_path, + agents={ + "my-expert": {"schema": "my-expert/agent_schema.yaml", "endpoint": "https://my-expert"}, + "derm": {"schema": "derm/agent_schema.yaml"}, + }, + default="my-expert", + ) + ws = Workspace.discover(cwd=tmp_path) + assert ws.default_agent == "my-expert" + assert ws.agents_by_name["my-expert"].source == "toml" + assert ws.agents_by_name["my-expert"].endpoint == "https://my-expert" + + +def test_toml_plus_sibling_not_declared(tmp_path: Path) -> None: + """Declared agents + undeclared siblings should coexist.""" + _mk_workspace( + tmp_path, + agents={"my-expert": {"schema": "my-expert/agent_schema.yaml"}}, + ) + _mk_schema(tmp_path / "derm") + ws = Workspace.discover(cwd=tmp_path) + assert set(ws.agents_by_name) == {"my-expert", "derm"} + assert ws.agents_by_name["my-expert"].source == "toml" + assert ws.agents_by_name["derm"].source == "auto" + + +# ------------------------------------------------------------------------- # +# Resolution precedence +# ------------------------------------------------------------------------- # + + +def test_resolve_explicit_selector_wins(tmp_path: Path) -> None: + _mk_workspace( + tmp_path, + agents={"my-expert": {}, "derm": {}}, + default="my-expert", + ) + ws = Workspace.discover(cwd=tmp_path) + ws.set_active("my-expert") + ctx = ws.resolve(selector="derm") + assert ctx.name == "derm" + assert ctx.selector_source == "flag" + + +def test_resolve_env_var(tmp_path: Path) -> None: + _mk_workspace(tmp_path, agents={"my-expert": {}, "derm": {}}) + ws = Workspace.discover(cwd=tmp_path) + ctx = ws.resolve(env={"EXPERT_AGENT": "derm"}) + assert ctx.name == "derm" + assert ctx.selector_source == "env" + + +def test_resolve_active_pin(tmp_path: Path) -> None: + _mk_workspace(tmp_path, agents={"my-expert": {}, "derm": {}}) + ws = Workspace.discover(cwd=tmp_path) + ws.set_active("derm") + ctx = ws.resolve(env={}) + assert ctx.name == "derm" + assert ctx.selector_source == "active" + + +def test_resolve_default_from_toml(tmp_path: Path) -> None: + _mk_workspace(tmp_path, agents={"my-expert": {}, "derm": {}}, default="my-expert") + ws = Workspace.discover(cwd=tmp_path) + ctx = ws.resolve(env={}) + assert ctx.name == "my-expert" + assert ctx.selector_source == "default" + + +def test_resolve_ambiguous(tmp_path: Path) -> None: + _mk_workspace(tmp_path, agents={"my-expert": {}, "derm": {}}) + ws = Workspace.discover(cwd=tmp_path) + with pytest.raises(AmbiguousAgentError) as exc_info: + ws.resolve(env={}) + assert "Multiple agents" in str(exc_info.value) + names = {c.name for c in exc_info.value.candidates} + assert names == {"my-expert", "derm"} + + +def test_resolve_unique_prefix(tmp_path: Path) -> None: + _mk_workspace(tmp_path, agents={"my-expert": {}, "derm": {}}) + ws = Workspace.discover(cwd=tmp_path) + ctx = ws.resolve(selector="my") + assert ctx.name == "my-expert" + + +def test_resolve_ambiguous_prefix(tmp_path: Path) -> None: + _mk_workspace(tmp_path, agents={"my-expert": {}, "my-trainer": {}}) + ws = Workspace.discover(cwd=tmp_path) + with pytest.raises(AmbiguousAgentError): + ws.resolve(selector="my") + + +def test_resolve_unknown_selector(tmp_path: Path) -> None: + _mk_workspace(tmp_path, agents={"my-expert": {}}) + ws = Workspace.discover(cwd=tmp_path) + with pytest.raises(AgentNotFoundError): + ws.resolve(selector="nope") + + +def test_resolve_at_alias_prefix_strip(tmp_path: Path) -> None: + _mk_workspace(tmp_path, agents={"my-expert": {}, "derm": {}}) + ws = Workspace.discover(cwd=tmp_path) + ctx = ws.resolve(selector="@my-expert") + assert ctx.name == "my-expert" + + +def test_resolve_schema_override_bypasses_workspace(tmp_path: Path) -> None: + standalone = tmp_path / "orphan" + schema = _mk_schema(standalone) + # No workspace here — ensure the flag-based fallback works. + ws = Workspace.discover(cwd=tmp_path) + ctx = ws.resolve(schema_override=schema, env={}) + assert ctx.schema_path == schema + assert ctx.selector_source == "schema-flag" + + +# ------------------------------------------------------------------------- # +# API key resolution +# ------------------------------------------------------------------------- # + + +def test_api_key_from_env_via_api_key_env( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setenv("MY_EXPERT_KEY", "sk-from-env") + _mk_workspace( + tmp_path, + agents={ + "my-expert": { + "schema": "my-expert/agent_schema.yaml", + "api_key_env": "MY_EXPERT_KEY", + } + }, + ) + ws = Workspace.discover(cwd=tmp_path) + assert ws.agents_by_name["my-expert"].api_key == "sk-from-env" + + +def test_env_endpoint_fills_when_toml_missing(tmp_path: Path) -> None: + _mk_workspace(tmp_path, agents={"my-expert": {}}) + ws = Workspace.discover(cwd=tmp_path) + ctx = ws.resolve(env={"EXPERT_AGENT_ENDPOINT": "https://x", "EXPERT_AGENT_API_KEY": "k"}) + assert ctx.endpoint == "https://x" + assert ctx.api_key == "k" + + +def test_require_remote_raises_when_incomplete(tmp_path: Path) -> None: + _mk_workspace(tmp_path, agents={"my-expert": {}}) + ws = Workspace.discover(cwd=tmp_path) + ctx = ws.resolve(env={}) + with pytest.raises(WorkspaceError): + ctx.require_remote() + + +# ------------------------------------------------------------------------- # +# Pin state file +# ------------------------------------------------------------------------- # + + +def test_set_active_writes_state(tmp_path: Path) -> None: + _mk_workspace(tmp_path, agents={"my-expert": {}, "derm": {}}) + ws = Workspace.discover(cwd=tmp_path) + ws.set_active("derm") + state = json.loads(ws.state_file.read_text()) + assert state == {"agent": "derm"} + assert ws.active() == "derm" + + +def test_clear_active(tmp_path: Path) -> None: + _mk_workspace(tmp_path, agents={"my-expert": {}}) + ws = Workspace.discover(cwd=tmp_path) + ws.set_active("my-expert") + ws.clear_active() + assert ws.active() is None + + +def test_set_active_rejects_unknown(tmp_path: Path) -> None: + _mk_workspace(tmp_path, agents={"my-expert": {}}) + ws = Workspace.discover(cwd=tmp_path) + with pytest.raises(AgentNotFoundError): + ws.set_active("nope") diff --git a/docs/AGENT_E2E_SETUP.md b/docs/AGENT_E2E_SETUP.md index e161815..9f07fea 100644 --- a/docs/AGENT_E2E_SETUP.md +++ b/docs/AGENT_E2E_SETUP.md @@ -45,8 +45,16 @@ first or warn the user. - [ ] Repo settings → *Actions → General → Workflow permissions* allow reading from public actions (default). -If the repo hosts **multiple agents** (a monorepo), each agent gets its own -workflow file pointing at its own schema. +If the repo hosts **multiple agents** (a monorepo), you have two options: + +1. **One workflow per agent** — each file pins a different `schema:` and a + different set of secrets. Recommended when the agents are owned by + different teams or deployed to different projects. +2. **One workflow, matrix-over-agents** — declare an `expert.toml` at the + repo root and let `expert test` resolve each agent by name. See the + "matrix" snippet in [§6. Customising for your agent](#6-customising-for-your-agent). + +Both integrations share the same reusable workflow; only the caller changes. --- @@ -128,9 +136,9 @@ jobs: | Placeholder | Concrete example | Notes | |----------------------------------------|-------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------| -| `<>` | `ecg-expert` | Just for the workflow filename + display name. | -| `<>` | `ecg-expert/agent_schema.yaml` | **Must be relative to the repo root.** Validated by the reusable workflow before running. | -| `<>` | `"Qual fórmula de correção do QTc a AHA recomenda como padrão?"` | Used by `05_ask_latency`. Pick something representative of real traffic. | +| `<>` | `my-expert` | Just for the workflow filename + display name. | +| `<>` | `my-expert/agent_schema.yaml` | **Must be relative to the repo root.** Validated by the reusable workflow before running. | +| `<>` | `"What's a representative question for your agent?"` | Used by `05_ask_latency`. Pick something representative of real traffic. | | `<>` | `main`, `v0.1.1`, `v0.2.0` | Pin to a tag for stable runs (e.g. `v0.1.1`); use `main` only if you want to live on the bleeding edge. | ### File naming convention @@ -138,7 +146,7 @@ jobs: Use `e2e-.yml`. Examples: ``` -.github/workflows/e2e-ecg-expert.yml +.github/workflows/e2e-my-expert.yml .github/workflows/e2e-derm-expert.yml .github/workflows/e2e-pharma-expert.yml ``` @@ -209,6 +217,42 @@ You almost never need to fork the suites. Knobs available out of the box: | Run only one suite | Trigger with the `suite:` choice input (`gh workflow run … -f suite=05_ask_latency`). | | Pin to a stable upstream version | Replace `@main` with `@v0.1.1` everywhere (both `uses:` and `cli-ref:`). | | Add a per-deploy smoke check | Call the reusable workflow from your `deploy.yml` after the Cloud Run rollout finishes. | +| Test N agents in one monorepo | See "matrix" snippet below, or keep one workflow-per-agent for clearer blame. | + +### Matrix over agents (monorepo) + +If `expert.toml` at the repo root declares several agents, the CLI already +understands `expert test --agent `. You can call the reusable workflow +once per agent via a matrix: + +```yaml +jobs: + e2e: + strategy: + fail-fast: false + matrix: + agent: + - { name: my-expert, schema: my-expert/agent_schema.yaml, endpoint_secret: MY_EXPERT_ENDPOINT, key_secret: MY_EXPERT_API_KEY } + - { name: derm, schema: derm-expert/agent_schema.yaml, endpoint_secret: DERM_ENDPOINT, key_secret: DERM_API_KEY } + uses: feliperbroering/expert-agent/.github/workflows/expert-e2e.yml@<> + with: + schema: ${{ matrix.agent.schema }} + sample-question: "ping" + cli-ref: <> + secrets: + endpoint: ${{ secrets[matrix.agent.endpoint_secret] }} + api-key: ${{ secrets[matrix.agent.key_secret] }} +``` + +Locally, the same layout lets you do: + +```bash +expert agents # list all known agents +expert use my-expert # pin my-expert for this shell +expert ask "..." # routes to my-expert +expert @derm ask "..." # one-off hop to derm +expert test --agent derm # run the packaged E2E kit against derm +``` If you genuinely need a *new* assertion the upstream suites don't cover, contribute it back to `expert-agent` rather than vendoring locally — the kit diff --git a/docs/PRIVATE_AGENT_REPO.md b/docs/PRIVATE_AGENT_REPO.md new file mode 100644 index 0000000..364f062 --- /dev/null +++ b/docs/PRIVATE_AGENT_REPO.md @@ -0,0 +1,365 @@ +# Private agent repo guide + +This guide shows the cleanest way to create a **private repo for your own specialist agents** while reusing the open-source `expert-agent` framework. + +Use it when you want: + +- private prompts and docs +- your own deploy cadence +- one repo with one agent, or one repo with many agents +- the same `expert` CLI and Robot Framework E2E kit from the public repo + +--- + +## Recommended repo shapes + +### Option A — one repo, one agent + +Best when each agent has its own owner, deploy cadence, and secrets. + +```text +my-private-agent/ +├─ agent_schema.yaml +├─ prompts/ +│ └─ identity.md +├─ docs/ +│ ├─ paper-1.pdf +│ └─ protocol.md +├─ expert.toml # optional in single-agent repos +└─ .github/workflows/ + └─ e2e.yml +``` + +This gives you the nicest UX: + +```bash +expert validate +expert count-tokens +expert sync +expert ask "..." +``` + +### Option B — one repo, many agents + +Best when the agents share docs, ownership, or infra. + +```text +my-private-agents/ +├─ expert.toml +├─ cardiology/ +│ ├─ agent_schema.yaml +│ ├─ prompts/ +│ └─ docs/ +├─ dermatology/ +│ ├─ agent_schema.yaml +│ ├─ prompts/ +│ └─ docs/ +└─ oncology/ + ├─ agent_schema.yaml + ├─ prompts/ + └─ docs/ +``` + +Then use the workspace-aware CLI: + +```bash +expert agents +expert @cardiology validate +expert @dermatology ask "..." +expert use oncology +expert which +``` + +--- + +## Step 1 — install the CLI + +On your machine: + +```bash +uv tool install "git+https://github.com/feliperbroering/expert-agent.git" +expert --version +``` + +If you also want the packaged Robot Framework kit: + +```bash +uv tool install "expert-agent[test] @ git+https://github.com/feliperbroering/expert-agent.git" +``` + +--- + +## Step 2 — scaffold the repo + +### Single-agent + +```bash +mkdir my-private-agent && cd my-private-agent +expert init . +``` + +### Multi-agent + +```bash +mkdir my-private-agents && cd my-private-agents +expert init cardiology +expert init dermatology +expert init oncology +``` + +Then add `expert.toml`: + +```toml +default_agent = "cardiology" + +[agents.cardiology] +schema = "cardiology/agent_schema.yaml" +endpoint_env = "CARDIOLOGY_AGENT_ENDPOINT" +api_key_env = "CARDIOLOGY_AGENT_API_KEY" + +[agents.dermatology] +schema = "dermatology/agent_schema.yaml" +endpoint_env = "DERM_AGENT_ENDPOINT" +api_key_env = "DERM_AGENT_API_KEY" + +[agents.oncology] +schema = "oncology/agent_schema.yaml" +endpoint_env = "ONCO_AGENT_ENDPOINT" +api_key_env = "ONCO_AGENT_API_KEY" +``` + +`expert.toml` is optional but recommended in private multi-agent repos because it: + +- makes endpoints and secret env vars explicit +- avoids ambiguity when names overlap +- gives you a default agent + +--- + +## Step 3 — add your private knowledge base + +For each agent: + +1. Edit `prompts/identity.md` +2. Replace the placeholder file in `docs/` +3. Keep sensitive source material **out of git** unless your repo policy allows it + +Recommended patterns: + +- Commit curated Markdown summaries and public PDFs +- Keep raw source dumps, exports, and OCR artifacts in a private storage bucket +- Add `_drafts/` to the schema's `exclude_patterns` + +Validate locally: + +```bash +expert validate +expert count-tokens +``` + +Or, in a multi-agent repo: + +```bash +expert @cardiology validate +expert @cardiology count-tokens +``` + +--- + +## Step 4 — deploy + +The easiest mental model is: + +- `infra/platform` = once per GCP project +- `infra/chroma` = once per GCP project +- `infra/agent` = once per agent + +If your private repo only contains the agent specs, you still have two clean options: + +### Option A — central infra repo + +Keep OpenTofu in a separate infra repo and point it at the backend image + agent IDs. This is the cleanest setup for teams. + +### Option B — vendor/copy the `infra/` folder + +Copy `infra/` into your private repo and own it there. This is simpler if you're a solo maintainer and want one repo to rule everything. + +If you're bootstrapping from scratch, start with the public repo's `infra/` folder and [`infra/README.md`](../infra/README.md). + +--- + +## Step 5 — wire local defaults + +After deploy, export endpoint + API key: + +```bash +export EXPERT_AGENT_ENDPOINT="https://my-agent-xxxx.a.run.app" +export EXPERT_AGENT_API_KEY="$(gcloud secrets versions access latest --secret=admin-key-my-agent)" +``` + +Now the bare commands work: + +```bash +expert sync +expert ask "..." +``` + +For multi-agent repos, prefer per-agent env vars referenced by `expert.toml`: + +```bash +export CARDIOLOGY_AGENT_ENDPOINT="https://cardiology-xxxx.a.run.app" +export CARDIOLOGY_AGENT_API_KEY="..." + +export DERM_AGENT_ENDPOINT="https://derm-xxxx.a.run.app" +export DERM_AGENT_API_KEY="..." +``` + +Then: + +```bash +expert @cardiology ask "..." +expert @dermatology sync +``` + +--- + +## Step 6 — CI with the reusable E2E workflow + +Create `.github/workflows/e2e.yml` in your private repo. + +### Single-agent repo + +```yaml +name: expert-e2e + +on: + pull_request: + workflow_dispatch: + +jobs: + e2e: + uses: feliperbroering/expert-agent/.github/workflows/expert-e2e.yml@main + with: + schema: agent_schema.yaml + secrets: + endpoint: ${{ secrets.EXPERT_AGENT_ENDPOINT }} + api-key: ${{ secrets.EXPERT_AGENT_API_KEY }} +``` + +### Multi-agent repo + +```yaml +name: expert-e2e + +on: + pull_request: + workflow_dispatch: + +jobs: + e2e: + strategy: + fail-fast: false + matrix: + agent: + - name: cardiology + schema: cardiology/agent_schema.yaml + endpoint_secret: CARDIOLOGY_AGENT_ENDPOINT + api_key_secret: CARDIOLOGY_AGENT_API_KEY + - name: dermatology + schema: dermatology/agent_schema.yaml + endpoint_secret: DERM_AGENT_ENDPOINT + api_key_secret: DERM_AGENT_API_KEY + uses: feliperbroering/expert-agent/.github/workflows/expert-e2e.yml@main + with: + agent: ${{ matrix.agent.name }} + schema: ${{ matrix.agent.schema }} + secrets: + endpoint: ${{ secrets[matrix.agent.endpoint_secret] }} + api-key: ${{ secrets[matrix.agent.api_key_secret] }} +``` + +More detail: [`docs/AGENT_E2E_SETUP.md`](./AGENT_E2E_SETUP.md). + +--- + +## Suggested repo extras + +If you're making the private repo pleasant for future-you or for teammates, add: + +- `README.md` with the repo's purpose + the list of hosted agents +- `expert.toml` even in single-agent repos if you want explicit endpoint wiring +- `.gitignore` covering PDFs, exports, `.env`, and generated reports +- `docs/OPERATIONS.md` with deploy / rotate-key / rollback steps +- `.github/CODEOWNERS` if multiple specialists own different agents + +Nice next step: + +- add a tiny `Makefile` or `justfile` with `validate`, `sync`, `ask`, `e2e` + +--- + +## Suggested `.gitignore` + +```gitignore +.env +.venv/ +.expert/ +report.html +log.html +output.xml +*.tfstate +*.tfstate.* +*.tfplan +docs/_raw/ +docs/_exports/ +``` + +--- + +## Common workflows + +### Single-agent daily loop + +```bash +expert validate +expert count-tokens +expert sync +expert ask "what changed in the 2025 guideline?" +``` + +### Multi-agent daily loop + +```bash +expert agents +expert @cardiology validate +expert @cardiology sync +expert @cardiology ask "..." +expert @dermatology ask "..." +``` + +### Pin one agent for the day + +```bash +expert use cardiology +expert ask "..." # targets cardiology +expert which +expert use --clear +``` + +--- + +## Decision guide + +Choose **one repo per agent** when: + +- each agent has its own deploy cadence +- prompts/docs are highly sensitive +- different teams own different agents + +Choose **one repo with many agents** when: + +- the same team curates all agents +- the agents share domain docs or infra +- you want one CI surface and one CLI workspace + +If you're unsure, start with **one repo per agent**. You can always merge into a multi-agent workspace later with `expert.toml`. diff --git a/pyproject.toml b/pyproject.toml index f90db24..fad7dc9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,13 +4,13 @@ version = "0.1.1" description = "Ultra-specialist AI agents as a service — NotebookLM-style, powered by Gemini long-context + Context Caching." readme = "README.md" requires-python = ">=3.12" -license = { text = "Apache-2.0" } +license = { text = "MIT" } authors = [{ name = "Felipe Broering", email = "hi@felipe.run" }] keywords = ["ai", "agents", "gemini", "llm", "notebooklm", "cloud-run", "fastapi"] classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", + "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.12", ] @@ -70,7 +70,7 @@ test = [ ] [project.scripts] -expert = "expert.main:app" +expert = "expert.main:main" expert-agent-backend = "app.main:run" [project.urls] diff --git a/uv.lock b/uv.lock index 1f039f9..5b248d7 100644 --- a/uv.lock +++ b/uv.lock @@ -544,7 +544,7 @@ wheels = [ [[package]] name = "expert-agent" -version = "0.1.0" +version = "0.1.1" source = { editable = "." } dependencies = [ { name = "bcrypt" },