diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
new file mode 100644
index 0000000..8416e3a
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,62 @@
+name: Bug report
+description: Report a reproducible bug in the CLI, backend, testkit, or infra docs.
+title: "bug: "
+labels:
+  - bug
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for the report. Please include a minimal reproducer so we can get to a fix quickly.
+
+  - type: textarea
+    id: summary
+    attributes:
+      label: What happened?
+      description: What did you expect, and what happened instead?
+      placeholder: "`expert ask` hangs after the first SSE chunk..."
+    validations:
+      required: true
+
+  - type: textarea
+    id: steps
+    attributes:
+      label: Reproduction
+      description: Smallest set of steps, files, and commands that reproduces the bug.
+      placeholder: |
+        1. `expert init demo`
+        2. `cd demo`
+        3. `expert validate`
+    validations:
+      required: true
+
+  - type: textarea
+    id: logs
+    attributes:
+      label: Relevant output
+      description: Paste traceback, stderr, screenshots, or logs.
+      render: shell
+
+  - type: input
+    id: version
+    attributes:
+      label: expert version
+      placeholder: "expert --version"
+    validations:
+      required: true
+
+  - type: input
+    id: python
+    attributes:
+      label: Python version
+      placeholder: "python --version"
+    validations:
+      required: true
+
+  - type: input
+    id: os
+    attributes:
+      label: OS
+      placeholder: "macOS 15 / Ubuntu 24.04 / Windows 11"
+    validations:
+      required: true
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000..a1d3085
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,5 @@
+blank_issues_enabled: false
+contact_links:
+  - name: Security disclosure
+    url: https://github.com/feliperbroering/expert-agent/security/advisories/new
+    about: Please report vulnerabilities privately, not via public issues.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
new file mode 100644
index 0000000..9a726dd
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,40 @@
+name: Feature request
+description: Pitch a new capability or workflow improvement.
+title: "feat: "
+labels:
+  - enhancement
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Please lead with the user problem, not the implementation detail.
+
+  - type: textarea
+    id: problem
+    attributes:
+      label: Problem to solve
+      description: Who is blocked today, and by what?
+      placeholder: "As someone curating multiple private agents, I want..."
+    validations:
+      required: true
+
+  - type: textarea
+    id: proposal
+    attributes:
+      label: Proposed shape
+      description: CLI flag, new schema field, backend endpoint, docs flow, etc.
+      placeholder: "`expert deploy --agent cardiology` ..."
+    validations:
+      required: true
+
+  - type: textarea
+    id: alternatives
+    attributes:
+      label: Alternatives considered
+      description: What else did you try or think about?
+
+  - type: textarea
+    id: context
+    attributes:
+      label: Extra context
+      description: Links, screenshots, prior art, related issues.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..cd230d0
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,22 @@
+## Summary
+
+- 
+
+## Why
+
+- 
+
+## Test plan
+
+- [ ] `uv run ruff check .`
+- [ ] `uv run ruff format .`
+- [ ] `uv run mypy backend cli`
+- [ ] `uv run pytest`
+- [ ] `expert test ...` (if CLI / HTTP contract changed)
+
+## Checklist
+
+- [ ] Conventional Commit(s)
+- [ ] Docs updated (`README.md`, `docs/`, or examples) if needed
+- [ ] No unrelated refactors
+- [ ] Any breaking change is called out explicitly
diff --git a/.github/workflows/expert-e2e.yml b/.github/workflows/expert-e2e.yml
index 45592c1..e1ba508 100644
--- a/.github/workflows/expert-e2e.yml
+++ b/.github/workflows/expert-e2e.yml
@@ -7,7 +7,7 @@ name: expert-e2e (reusable)
 #     e2e:
 #       uses: feliperbroering/expert-agent/.github/workflows/expert-e2e.yml@main
 #       with:
-#         schema: ecg-expert/agent_schema.yaml
+#         schema: my-expert/agent_schema.yaml
 #         suite: 05_ask_latency
 #       secrets:
 #         endpoint: ${{ secrets.EXPERT_AGENT_ENDPOINT }}
@@ -25,6 +25,13 @@ on:
         required: false
         type: string
         default: agent_schema.yaml
+      agent:
+        description: >-
+          Agent name (as declared in expert.toml or the name of a sibling
+          dir containing agent_schema.yaml). Omit for single-agent repos.
+        required: false
+        type: string
+        default: ""
       suite:
         description: 'Robot suite stem to run (e.g. "05_ask_latency"). Omit for all.'
         required: false
@@ -102,6 +109,9 @@ jobs:
         id: test
         run: |
           args=()
+          if [ -n "${{ inputs.agent }}" ]; then
+            args+=(--agent "${{ inputs.agent }}")
+          fi
           if [ -n "${{ inputs.suite }}" ]; then
             args+=(--suite "${{ inputs.suite }}")
           fi
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..37d8c78
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,136 @@
+# AGENTS.md — contract for AI contributors
+
+This document is the short version of [CONTRIBUTING.md](./CONTRIBUTING.md) tuned for AI coding agents (Cursor, Claude Code, Codex, Aider, Cline, Continue, and friends). Reading this in full before proposing changes will save you a round of review.
+
+---
+
+## Project mental model
+
+`expert-agent` is a framework for shipping **ultra-specialist AI agents** — declarative YAML spec + corpus of reference docs → deployable API on Google Cloud Run. Three layers:
+
+1. **`backend/`** — FastAPI app (`app.main:app`). Stateless containers. State lives in GCS + Firestore + Chroma.
+2. **`cli/`** — `expert` (Typer + Rich). Scaffold, validate, sync, ask, manage sessions, run E2E tests. **Workspace-aware** (multi-agent via `expert.toml`).
+3. **`infra/`** — OpenTofu stacks (platform, chroma, agent). Per-project shared infra + per-agent Cloud Run service.
+
+Ground rule: **the backend is stateless**. Every new feature must survive `min=0` scale-to-zero. If you need state, put it in GCS or Firestore.
+
+---
+
+## Non-negotiables
+
+- **No emoji in source code.** Ever. Use `rich` colors and Unicode box-drawing. See `cli/expert/ui.py` + `cli/expert/brand.py` for the visual identity.
+- **Type hints everywhere.** `mypy --strict` must stay green on `backend/` and `cli/`.
+- **Lint + format must pass.** `ruff check .` + `ruff format .`.
+- **Tests must pass.** `pytest` green locally *and* in CI. Add tests for every behaviour change.
+- **Conventional Commits.** `feat(scope): ...`, `fix(scope): ...`, etc. Release-please reads these.
+- **No breaking API changes on `main`** without a migration note in the PR description.
+
+---
+
+## Where to put things
+
+| Change                                              | Goes in                                                                     |
+|-----------------------------------------------------|-----------------------------------------------------------------------------|
+| New CLI command                                     | `cli/expert/commands/<name>.py` + register in `cli/expert/main.py`          |
+| New backend endpoint                                | `backend/app/routes/<name>.py` + mount in `backend/app/main.py`             |
+| New schema field                                    | `backend/app/schema.py` (pydantic) + update `example-schema/` sample        |
+| New UI helper                                       | `cli/expert/ui.py` (follow the existing API shape: `print_*`)               |
+| New infra resource                                  | Right `infra/<stack>/` — `platform` (shared), `chroma` (shared), `agent` (per-agent) |
+| New E2E test                                        | `cli/expert/testkit/suites/NN_<name>.robot` + keywords in `.resource`       |
+| New multi-agent resolution rule                     | `cli/expert/workspace.py::Workspace.resolve` (document the precedence!)     |
+
+---
+
+## The visual identity
+
+The brand wordmark + tagline live in `cli/expert/brand.py`. Do **not** touch the ASCII art without approval — it's shared with the author's other open-source CLIs (`feliperbroering/eai`) and exists to create a coherent family look.
+
+UI conventions for any user-facing text:
+
+```text
+✓ success              → print_success("message")
+✗ error                → print_error("message")
+⚠ warning              → print_warning("message")
+› neutral info         → print_info("message")
+▶ streamed output      → reserved for assistant output in `expert ask`
+```
+
+Never invent new glyphs without updating `cli/expert/ui.py` and its docstring.
+
+---
+
+## Before opening a PR
+
+Run this locally. It's what CI runs. If any line fails, fix it before pushing:
+
+```bash
+source .venv/bin/activate
+uv run ruff check . && uv run ruff format .
+uv run mypy backend cli
+uv run pytest -q
+```
+
+### Writing good CLI tests
+
+- Use `typer.testing.CliRunner()` (see `cli/tests/test_main_alias.py` for the canonical pattern).
+- **Don't pin on glyphs.** Assert on stable strings like `"name must match"` — the `✗` prefix is a skin, not an API.
+- For workspace tests, build minimal `expert.toml` + `agent_schema.yaml` in `tmp_path`.
+
+### Writing good backend tests
+
+- Use `pytest-asyncio` (auto mode) + `respx` for HTTP stubbing.
+- Firestore is mocked via `mock-firestore`. Do not hit real Google APIs in tests.
+- Every new `/route` gets at least: auth test, happy path, one error path.
+
+---
+
+## Things that will get your PR rejected
+
+- Adding a dependency without justifying it in the PR description.
+- Introducing state outside GCS / Firestore / Chroma (e.g. in-memory caches that assume a single replica).
+- Silencing `mypy` with `# type: ignore` without a comment explaining why.
+- Reformatting unrelated code.
+- Commits that are not Conventional Commits.
+- Breaking `ruff` (lint *or* format) without documented reason.
+- Copying the ASCII brand into other files — it's re-exported from `cli/expert/brand.py` precisely so we change it in one place.
+
+---
+
+## Multi-agent workspaces — the part you'll probably touch
+
+A single repo can host many agents. Resolution precedence (first match wins):
+
+1. `--agent <name>` flag
+2. `@<alias>` positional shortcut (rewritten to `--agent` by `_rewrite_at_alias` in `main.py`)
+3. `EXPERT_AGENT` env var
+4. `expert use <name>` pin (written to `.expert/state.json`)
+5. `default_agent` in `expert.toml`
+6. Single-agent short-circuit (workspace has exactly one agent)
+7. `--schema <path>` overrides everything (legacy bypass for `expert validate` etc.)
+
+If you add a new command that needs to target an agent:
+
+```python
+from ..context import resolve_context
+
+def cmd(
+    agent: Annotated[str | None, typer.Option("--agent", "-a", ...)] = None,
+    # other flags
+) -> None:
+    ctx = resolve_context(selector=agent, ...)
+    # ctx.name, ctx.schema_path, ctx.endpoint, ctx.api_key are all filled in
+```
+
+Don't roll your own resolution logic.
+
+---
+
+## License
+
+By contributing, you agree your work is released under the [MIT License](./LICENSE). The CLA is: **open a PR, you've agreed**. Nothing to sign.
+
+---
+
+## When in doubt
+
+Open a draft PR or file an issue. Showing intent beats writing the wrong thing twice.
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..50f30a3
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,129 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+We as members, contributors, and leaders pledge to make participation in our
+community a harassment-free experience for everyone, regardless of age, body
+size, visible or invisible disability, ethnicity, sex characteristics, gender
+identity and expression, level of experience, education, socio-economic status,
+nationality, personal appearance, race, religion, or sexual identity
+and orientation.
+
+We pledge to act and interact in ways that contribute to an open, welcoming,
+diverse, inclusive, and healthy community.
+
+## Our Standards
+
+Examples of behavior that contributes to a positive environment for our
+community include:
+
+- Demonstrating empathy and kindness toward other people
+- Being respectful of differing opinions, viewpoints, and experiences
+- Giving and gracefully accepting constructive feedback
+- Accepting responsibility and apologizing to those affected by our mistakes,
+  and learning from the experience
+- Focusing on what is best not just for us as individuals, but for the overall
+  community
+
+Examples of unacceptable behavior include:
+
+- The use of sexualized language or imagery, and sexual attention or advances
+  of any kind
+- Trolling, insulting or derogatory comments, and personal or political attacks
+- Public or private harassment
+- Publishing others' private information, such as a physical or email address,
+  without their explicit permission
+- Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Enforcement Responsibilities
+
+Community leaders are responsible for clarifying and enforcing our standards of
+acceptable behavior and will take appropriate and fair corrective action in
+response to any behavior that they deem inappropriate, threatening, offensive,
+or harmful.
+
+Community leaders have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, and will communicate reasons for
+moderation decisions when appropriate.
+
+## Scope
+
+This Code of Conduct applies within all community spaces, and also applies when
+an individual is officially representing the community in public spaces.
+Examples of representing our community include using an official email address,
+posting via an official social media account, or acting as an appointed
+representative at an online or offline event.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported to the community leaders responsible for enforcement at
+[hi@felipe.run](mailto:hi@felipe.run). All complaints will be reviewed and
+investigated promptly and fairly.
+
+All community leaders are obligated to respect the privacy and security of the
+reporter of any incident.
+
+## Enforcement Guidelines
+
+Community leaders will follow these Community Impact Guidelines in determining
+the consequences for any action they deem in violation of this Code of Conduct:
+
+### 1. Correction
+
+**Community Impact**: Use of inappropriate language or other behavior deemed
+unprofessional or unwelcome in the community.
+
+**Consequence**: A private, written warning from community leaders, providing
+clarity around the nature of the violation and an explanation of why the
+behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+
+**Community Impact**: A violation through a single incident or series
+of actions.
+
+**Consequence**: A warning with consequences for continued behavior. No
+interaction with the people involved, including unsolicited interaction with
+those enforcing the Code of Conduct, for a specified period of time. This
+includes avoiding interactions in community spaces as well as external channels
+like social media. Violating these terms may lead to a temporary or permanent
+ban.
+
+### 3. Temporary Ban
+
+**Community Impact**: A serious violation of community standards, including
+sustained inappropriate behavior.
+
+**Consequence**: A temporary ban from any sort of interaction or public
+communication with the community for a specified period of time. No public or
+private interaction with the people involved, including unsolicited interaction
+with those enforcing the Code of Conduct, is allowed during this period.
+Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+
+**Community Impact**: Demonstrating a pattern of violation of community
+standards, including sustained inappropriate behavior, harassment of an
+individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within
+the community.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 2.1, available at
+[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html).
+
+Community Impact Guidelines were inspired by
+[Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity).
+
+For answers to common questions about this code of conduct, see the FAQ at
+[https://www.contributor-covenant.org/faq](https://www.contributor-covenant.org/faq).
+Translations are available at
+[https://www.contributor-covenant.org/translations](https://www.contributor-covenant.org/translations).
+
+[homepage]: https://www.contributor-covenant.org
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..5209c20
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,139 @@
+# Contributing to expert-agent
+
+Thanks for taking the time to contribute. This project is MIT-licensed and run in public — every patch, typo fix, and review comment is genuinely welcome.
+
+If you're an AI coding agent (Cursor, Claude Code, Codex, Aider, etc.) **read [AGENTS.md](./AGENTS.md) first** — it's the short version of this document tuned for AI collaborators.
+
+---
+
+## Before you open a PR
+
+1. **Check the open issues and PRs** for duplicates. If something is already in flight, ping there instead of forking a parallel effort.
+2. **For non-trivial changes, open an issue first.** A two-line "I'd like to implement X, is it welcome?" saves everyone time.
+3. **Keep PRs focused.** One feature or one fix per PR. Refactors are welcome but ship them separately from behaviour changes.
+
+---
+
+## Local setup
+
+Requires Python 3.12+, [`uv`](https://docs.astral.sh/uv/), and (optionally) Docker + OpenTofu if you're touching infra.
+
+```bash
+git clone https://github.com/feliperbroering/expert-agent
+cd expert-agent
+
+uv venv --python 3.12
+source .venv/bin/activate
+uv pip install -e ".[dev,test,vertex,otel]"
+
+# quick smoke test
+expert --version
+expert --help
+```
+
+---
+
+## Checks that must pass
+
+The CI runs exactly these three commands. Run them locally before pushing:
+
+```bash
+uv run ruff check .            # lint — use `--fix` for auto-fixes
+uv run ruff format .           # formatter
+uv run mypy backend cli        # strict type checks
+uv run pytest                  # unit + integration tests
+```
+
+Target coverage: **85%+** on backend, **90%+** on CLI. New code must include tests.
+
+### End-to-end
+
+If you're changing user-facing CLI behaviour or the HTTP contract, run the Robot Framework suite against a local or staging agent:
+
+```bash
+expert test --suite 01_validate       # offline suites, no endpoint needed
+expert test --endpoint http://... --api-key ... --suite 04_deploy
+```
+
+---
+
+## Code style
+
+- **No emoji in source code.** Visual cues come from `rich` colors and Unicode box-drawing (see `cli/expert/ui.py`).
+- **Docstrings over comments.** Functions and classes get docstrings; inline comments only explain *why*, not *what*.
+- **Type hints everywhere.** `mypy --strict` is non-negotiable on the backend and CLI.
+- **Prefer explicit over clever.** The project is a library people read; optimise for clarity.
+- **Conventional Commits.** Every commit subject follows [`<type>(<scope>): <message>`](https://www.conventionalcommits.org/). Allowed types: `feat`, `fix`, `docs`, `chore`, `refactor`, `test`, `style`, `perf`, `build`, `ci`.
+
+Releases are automated — [release-please](https://github.com/googleapis/release-please) reads Conventional Commits on `main` and opens version-bump PRs.
+
+---
+
+## What lives where
+
+```
+backend/app/        FastAPI app code
+backend/tests/      backend unit + integration tests
+cli/expert/         the `expert` Typer app
+cli/expert/testkit/ Robot Framework suites shipped inside the wheel
+cli/tests/          CLI unit tests
+example-schema/     annotated sample AgentSchema + identity.md
+infra/              OpenTofu stacks (platform / chroma / agent)
+scripts/            one-off bootstrap + migration scripts
+docs/               PRIVATE_AGENT_REPO, AGENT_E2E_SETUP
+.github/workflows/  ci, release-please, deploy, expert-e2e (reusable)
+```
+
+When you touch one layer, stay in it. Cross-cutting refactors (e.g. renaming a pydantic field used by backend + CLI) are fine but should update *both* in the same PR.
+
+---
+
+## Tests in the CLI — important patterns
+
+- Use `typer.testing.CliRunner()` for integration tests (see `cli/tests/test_main_alias.py`).
+- Avoid asserting on colored/glyph-decorated output text — pin to the **stable** part of the message (e.g. `"name must match"` instead of the `✗` glyph). See `cli/tests/test_init.py` for the pattern.
+- For workspace-dependent tests, use the `tmp_path` fixture and build minimal `expert.toml` / `agent_schema.yaml` files inline.
+
+---
+
+## Filing a good bug report
+
+Please include:
+
+- `expert --version`
+- Python version (`python --version`)
+- Minimal reproducer (schema + command + expected vs actual)
+- Relevant traceback, trimmed
+- OS
+
+A template is provided at [`.github/ISSUE_TEMPLATE/bug_report.yml`](./.github/ISSUE_TEMPLATE/bug_report.yml).
+
+---
+
+## Filing a good feature request
+
+Please describe:
+
+- The problem (user story) — *"as a curator of X, I want to Y so that Z"*
+- The shape of the solution you'd expect (CLI flag? new schema field? new endpoint?)
+- Alternatives you considered
+
+A template is provided at [`.github/ISSUE_TEMPLATE/feature_request.yml`](./.github/ISSUE_TEMPLATE/feature_request.yml).
+
+---
+
+## Security
+
+If you think you've found a vulnerability, **do not open a public issue.** Follow the private disclosure process in [SECURITY.md](./SECURITY.md).
+
+---
+
+## Code of Conduct
+
+Participation in this project is governed by the [Contributor Covenant v2.1](./CODE_OF_CONDUCT.md). In short: be kind, assume good faith, and don't make it weird.
+
+---
+
+## License
+
+By contributing, you agree that your contribution is licensed under the [MIT License](./LICENSE).
diff --git a/LICENSE b/LICENSE
index 1185446..45a0d31 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,201 +1,21 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright 2026 Felipe Broering
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
+MIT License
+
+Copyright (c) 2026 Felipe Broering
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
index 17e9896..b1c5d6b 100644
--- a/README.md
+++ b/README.md
@@ -1,116 +1,115 @@
-# expert-agent
+<div align="center">
 
-> **NotebookLM as an API.** Open-source framework for ultra-specialist AI agents
-> grounded in a curated knowledge base, powered by Gemini long-context + Context
-> Caching, with multi-layer persistent memory.
+```
+ ███████╗██╗  ██╗██████╗ ███████╗██████╗ ████████╗         
+ ██╔════╝╚██╗██╔╝██╔══██╗██╔════╝██╔══██╗╚══██╔══╝  ╭───╮  
+ █████╗   ╚███╔╝ ██████╔╝█████╗  ██████╔╝   ██║     │ ≡ │  
+ ██╔══╝   ██╔██╗ ██╔═══╝ ██╔══╝  ██╔══██╗   ██║     ╰───╯  
+ ███████╗██╔╝ ██╗██║     ███████╗██║  ██║   ██║            
+ ╚══════╝╚═╝  ╚═╝╚═╝     ╚══════╝╚═╝  ╚═╝   ╚═╝            
+```
+
+**ground a model on your docs. ship it as an API.**
+
+declarative ultra-specialist agents on Cloud Run — Gemini long-context, Context Cache, persistent memory.
 
 [![CI](https://github.com/feliperbroering/expert-agent/actions/workflows/ci.yml/badge.svg)](https://github.com/feliperbroering/expert-agent/actions/workflows/ci.yml)
-[![License: Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](./LICENSE)
+[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](./LICENSE)
 [![Python 3.12+](https://img.shields.io/badge/python-3.12%2B-blue.svg)](https://www.python.org/)
+[![Status: alpha](https://img.shields.io/badge/status-alpha-orange.svg)](#status)
 
-> **Status — alpha.** End-to-end production deploy validated on Google Cloud
-> Run (FastAPI backend + Chroma HTTP + Firestore + GCS). API surface and
-> schema are still subject to breaking changes until `v1.0`.
+</div>
 
 ---
 
-## What you get
+```bash
+# 1. scaffold an agent
+expert init cardio-expert
 
-You define an agent declaratively:
+# 2. drop your corpus
+cp ~/papers/*.pdf cardio-expert/docs/
 
-1. A **system prompt** (the agent's identity and behaviour).
-2. A **directory of reference documents** (`.md`, `.pdf`, `.txt`).
-3. A **YAML schema** (`agent_schema.yaml`) wiring the two together.
+# 3. validate + size the context cache
+expert validate && expert count-tokens
 
-…and `expert-agent` gives you a deployable Cloud Run service exposing:
+# 4. deploy once, ask forever
+expert sync && expert ask "qual fórmula de correção do QTc a AHA recomenda?"
+```
 
-- **`/ask`** — streaming Q&A grounded in the corpus, with optional citations.
-- **`/docs/sync`** — incremental upload of the knowledge base (SHA-keyed).
-- **`/sessions/...`** — short-term conversational memory (LGPD/GDPR friendly).
-- **`/memory/...`** — long-term semantic recall (verbatim, not summarised).
-- **`/health`** + **`/ready`** — liveness + dependency probes.
+You define the agent declaratively — a system prompt, a directory of reference documents, and a YAML schema. `expert-agent` gives you back a deployable API with grounded answers, citations, long-term memory, and LGPD-friendly session controls.
 
-A Python CLI (`expert`) handles scaffolding, validation, sync, and
-ad-hoc queries against any deployed agent.
+> [!NOTE]
+> **Status — alpha.** End-to-end production deploy validated on Google Cloud Run (FastAPI + Chroma HTTP + Firestore + GCS). API surface and schema are still subject to breaking changes until `v1.0`.
 
 ---
 
-## Architecture
+## What you get
 
-```
-   client (CLI / HTTP)
-          │
-          ▼
-   ┌──────────────────────────────┐
-   │  agent  (Cloud Run, FastAPI) │ ◀── reads agent_schema.yaml
-   │  ├─ /ask         (SSE)       │     from gs://docs-bucket/<agent>/schema/
-   │  ├─ /docs/sync               │
-   │  ├─ /sessions /memory        │
-   │  └─ /health /ready           │
-   └────┬───────────┬─────────┬───┘
-        │           │         │
-        ▼           ▼         ▼
-   Gemini API   Firestore   Chroma HTTP (Cloud Run, min=1)
-   (Context     (sessions   ├─ shared per project
-    Cache)       + state)   └─ persisted via GCS FUSE → gs://memory/chroma
-        ▲
-        │ File API mirror
-        │
-   GCS (durable source of truth)
-   ├─ gs://docs/<agent>/<sha>/<file>          knowledge base
-   ├─ gs://docs/<agent>/_state/sync_manifest.json
-   ├─ gs://docs/<agent>/schema/...            schema + prompts
-   └─ gs://memory/<agent>/                    long-term memory snapshots
-```
+Out of the box, your deployed agent exposes:
 
-**Key design choices** ([deeper notes in `infra/README.md`](./infra/README.md)):
+- **`POST /ask`** — streaming Q&A grounded in your corpus, with optional citations
+- **`POST /docs/sync`** — incremental upload of the knowledge base (SHA-keyed)
+- **`GET/DELETE /sessions/...`** — short-term conversational memory (LGPD/GDPR)
+- **`POST /memory/...`** — long-term verbatim recall (not summaries)
+- **`GET /health` / `/ready`** — liveness + dependency probes
 
-- **Stateless agent containers.** All state lives in GCS or Firestore. Cloud
-  Run can scale to zero and back without losing context.
-- **Context Cache as the grounding source.** Documents go into a Gemini
-  Context Cache built once per knowledge-base SHA; subsequent `/ask` calls
-  reuse it (`cached_tokens ≈ input_tokens` in steady state).
-- **Multi-layer memory.** Firestore holds the last N turns of conversation
-  (short-term) plus a verbatim recall index (long-term, indexed in Chroma
-  via [MemPalace](https://pypi.org/project/mempalace/)).
-- **One Chroma HTTP server per project** (Cloud Run, `min=max=1`, GCS FUSE
-  for persistence) — shared across every agent in the project.
+A Python CLI (`expert`) handles scaffolding, validation, sync, ad-hoc queries, multi-agent workspace management, and a ready-to-run Robot Framework E2E kit.
 
 ---
 
 ## Quick start
 
-### 1. Bootstrap a GCP project (one-time)
+### Install
 
 ```bash
-PROJECT_ID=my-agents-prod
-REGION=us-central1
+# uv (recommended — single static binary experience)
+uv tool install "git+https://github.com/feliperbroering/expert-agent.git"
 
+# or pipx
+pipx install "git+https://github.com/feliperbroering/expert-agent.git"
+```
+
+Verify:
+
+```bash
+expert --version
+```
+
+### Scaffold your first agent
+
+```bash
+expert init my-expert
+cd my-expert
+$EDITOR prompts/identity.md         # define behaviour
+cp ~/your-corpus/*.pdf docs/        # drop in your reference material
+expert validate                     # schema contract check
+expert count-tokens                 # size the context cache
+```
+
+### Deploy to Google Cloud
+
+One-time project bootstrap:
+
+```bash
+PROJECT_ID=my-agents-prod REGION=us-central1
 gcloud auth login
 gcloud auth application-default login
 gcloud config set project "$PROJECT_ID"
 
-# Enables APIs, creates tfstate bucket, Artifact Registry, Firestore,
-# and the empty `gemini-api-key` secret. Idempotent.
 ./scripts/bootstrap-project.sh "$PROJECT_ID" "$REGION"
 
-# Inject your Gemini API key (get one at https://aistudio.google.com/apikey).
 echo -n "YOUR_GEMINI_KEY" | \
   gcloud secrets versions add gemini-api-key --data-file=- --project="$PROJECT_ID"
 ```
 
-### 2. Apply the shared platform stacks
+Apply the shared infra (runs per project, not per agent):
 
 ```bash
-cd infra/platform
-tofu init -backend-config="bucket=${PROJECT_ID}-tfstate"
-tofu apply -var="project_id=${PROJECT_ID}" -var="region=${REGION}"
-cd ../chroma
-tofu init -backend-config="bucket=${PROJECT_ID}-tfstate"
-tofu apply -var="project_id=${PROJECT_ID}" -var="region=${REGION}"
+(cd infra/platform && tofu init -backend-config="bucket=${PROJECT_ID}-tfstate" && tofu apply -var="project_id=${PROJECT_ID}" -var="region=${REGION}")
+(cd infra/chroma   && tofu init -backend-config="bucket=${PROJECT_ID}-tfstate" && tofu apply -var="project_id=${PROJECT_ID}" -var="region=${REGION}")
 ```
 
-### 3. Build & push the backend image
+Build + ship the backend image:
 
 ```bash
 gcloud builds submit . \
@@ -118,20 +117,9 @@ gcloud builds submit . \
   --substitutions=_IMAGE="${REGION}-docker.pkg.dev/${PROJECT_ID}/expert-agent/backend:v0.1.0"
 ```
 
-### 4. Scaffold and deploy your first agent
+Provision this agent's Cloud Run service:
 
 ```bash
-# Install the CLI (uv tool style — single root pyproject.toml)
-uv tool install "git+https://github.com/feliperbroering/expert-agent.git"
-
-# Scaffold an agent locally
-expert init my-expert
-cd my-expert
-$EDITOR prompts/identity.md            # define behaviour
-cp ~/papers/*.pdf docs/                # drop in your corpus
-expert validate --schema ./agent_schema.yaml
-
-# Provision Cloud Run + IAM + secrets for this agent
 cd ../infra/agent
 tofu init -reconfigure \
   -backend-config="bucket=${PROJECT_ID}-tfstate" \
@@ -141,25 +129,63 @@ tofu apply \
   -var="region=${REGION}" \
   -var="agent_id=my-expert" \
   -var="image=${REGION}-docker.pkg.dev/${PROJECT_ID}/expert-agent/backend:v0.1.0"
+```
+
+Seed the admin key, push docs, ask:
 
-# Seed the per-agent admin key (one-time)
+```bash
 ADMIN_KEY=$(python -c 'import secrets; print(secrets.token_urlsafe(32))')
 echo -n "$ADMIN_KEY" | \
   gcloud secrets versions add admin-key-my-expert --data-file=- --project="${PROJECT_ID}"
 
-# Push docs + create the Context Cache
 expert sync \
-  --schema ./agent_schema.yaml \
-  --endpoint "$(gcloud run services describe agent-my-expert \
-                  --region="${REGION}" --format='value(status.url)')" \
+  --endpoint "$(gcloud run services describe agent-my-expert --region="${REGION}" --format='value(status.url)')" \
   --api-key "$ADMIN_KEY"
 
-# Ask something
-expert ask "What does my corpus say about X?" \
-  --endpoint <SERVICE_URL> --api-key "$ADMIN_KEY"
+expert ask "what does my corpus say about X?" --api-key "$ADMIN_KEY"
 ```
 
-> See [`infra/README.md`](./infra/README.md) for the full per-stack reference.
+See [`docs/PRIVATE_AGENT_REPO.md`](./docs/PRIVATE_AGENT_REPO.md) for the full private-repo checklist (one agent) and [`infra/README.md`](./infra/README.md) for the per-stack reference.
+
+---
+
+## Multi-agent workspaces
+
+A single repo can host many agents. `expert` auto-detects them and offers three equivalent ways to pick which one a command targets:
+
+```bash
+expert agents                           # list everything the workspace knows about
+
+# pick per-invocation
+expert ask --agent derm "..."           # explicit flag
+expert @derm ask "..."                  # @alias positional shortcut
+EXPERT_AGENT=derm expert ask "..."      # env var
+
+# pin for a session
+expert use derm                         # write .expert/state.json
+expert ask "..."                        # uses derm from now on
+expert which                            # which agent would run?
+```
+
+Declare agents explicitly via `expert.toml` for full control:
+
+```toml
+default_agent = "derm"
+
+[agents.derm]
+schema = "derm-expert/agent_schema.yaml"
+endpoint_env = "DERM_AGENT_ENDPOINT"
+api_key_env = "DERM_AGENT_API_KEY"
+
+[agents.my-expert]
+schema = "my-expert/agent_schema.yaml"
+endpoint_env = "MY_EXPERT_ENDPOINT"
+api_key_env = "MY_EXPERT_API_KEY"
+```
+
+…or skip the file entirely — any sibling directory with an `agent_schema.yaml` is discovered automatically.
+
+**Resolution precedence** (first match wins): `--agent` flag → `@alias` → `EXPERT_AGENT` env var → `expert use` pin → `expert.toml` default → single-agent short-circuit.
 
 ---
 
@@ -177,8 +203,8 @@ metadata:
 
 spec:
   model:
-    provider: gemini             # or `gemini-vertex` (optional `[vertex]` extra)
-    name: gemini-2.5-pro          # any Pro tier with Context Caching support
+    provider: gemini                    # or `gemini-vertex`
+    name: gemini-2.5-pro                # any tier with Context Caching
     temperature: 0.2
     max_output_tokens: 8192
 
@@ -192,7 +218,7 @@ spec:
 
   context_cache:
     enabled: true
-    ttl_seconds: 3600                  # 1 h is the AI Studio sweet spot
+    ttl_seconds: 3600                   # 1 h — the AI Studio sweet spot
     refresh_before_expiry_seconds: 300
 
   memory:
@@ -201,47 +227,48 @@ spec:
                   persistence: { type: chroma-http } }
 
   grounding:
-    # AI Studio rejects `tools=GoogleSearch` together with `cachedContent`.
-    # Vertex supports both — flip this on if you migrate.
-    enabled: false
+    enabled: false                      # AI Studio rejects `tools=GoogleSearch` + `cachedContent`
     max_citations: 10
 
   rate_limit: { requests_per_minute: 30, tokens_per_day: 1000000 }
 ```
 
-A full annotated example lives in [`example-schema/`](./example-schema/).
+Full annotated example: [`example-schema/`](./example-schema/).
 
 ---
 
 ## CLI reference
 
 ```text
-expert init <name>           Scaffold a new agent project
-expert validate              Validate agent_schema.yaml against the contract
-expert count-tokens          Estimate corpus tokens (Context Cache budgeting)
-expert sync                  Push docs + rebuild Context Cache
-expert ask "<question>"      Stream answer from a deployed agent
-expert sessions list/delete  Manage user sessions (LGPD)
-expert test                  Run the packaged E2E Robot Framework kit
+expert init <name>                  scaffold a new agent project
+expert agents                       list agents in this workspace
+expert use <name>                   pin an agent as active
+expert which                        show which agent a bare command targets
+expert validate                     validate agent_schema.yaml
+expert count-tokens                 estimate corpus tokens (cache budgeting)
+expert sync                         push docs + rebuild Context Cache
+expert ask "<question>"             stream answer from the deployed agent
+expert sessions list/show/delete    manage user sessions (LGPD)
+expert test                         run the packaged Robot Framework E2E kit
+expert --version                    show the brand + version
 ```
 
-Every command supports `--help` for full options.
+Every command supports `--help`, `--agent <name>` (or `@alias`), `--endpoint`, `--api-key`.
 
 ---
 
 ## End-to-end testing
 
-A ready-made Robot Framework kit ships with the CLI. Install with the
-`[test]` extra and run against any agent:
+A ready-made Robot Framework kit ships with the CLI. Install with the `[test]` extra:
 
 ```bash
 uv tool install 'expert-agent[test] @ git+https://github.com/feliperbroering/expert-agent.git'
 export EXPERT_AGENT_ENDPOINT=https://my-agent-xxxx.a.run.app
-export EXPERT_AGENT_API_KEY=$(gcloud secrets versions access latest --secret=my-agent-api-key)
+export EXPERT_AGENT_API_KEY=$(gcloud secrets versions access latest --secret=admin-key-my-expert)
 
-expert test --schema ./agent_schema.yaml      # all suites
-expert test --suite 05_ask_latency            # single suite
-expert test --list                            # discover suites
+expert test                         # all suites
+expert test --suite 05_ask_latency  # single suite
+expert test --list                  # discover suites
 ```
 
 Suites shipped:
@@ -250,33 +277,82 @@ Suites shipped:
 |-------------------|:--------:|---------------------------------------------------------|
 | `01_validate`     | yes      | `expert validate` succeeds on the agent schema          |
 | `02_create`       | yes      | `expert init --yes` scaffolds + validates out of the box|
-| `03_update`       | yes      | Edit → validate loop preserves schema integrity         |
+| `03_update`       | yes      | edit → validate loop preserves schema integrity         |
 | `04_deploy`       | no       | `/health`, `/ready` respond 200; unauth calls get 401   |
-| `05_ask_latency`  | no       | Warmup + steady-state TTFT budgets + cache hit signal   |
+| `05_ask_latency`  | no       | warmup + steady-state TTFT budgets + cache-hit signal   |
 | `06_sessions`     | no       | LGPD: create → list → delete round-trip                 |
 
 ### Reusable GitHub Actions workflow
 
-Private agent repos inherit the same suites via a reusable workflow — no
-submodules or copy-paste. See
-[`.github/workflows/expert-e2e.yml`](.github/workflows/expert-e2e.yml):
+Private agent repos inherit the same suites via a reusable workflow — no submodules, no copy-paste. See [`.github/workflows/expert-e2e.yml`](.github/workflows/expert-e2e.yml):
 
 ```yaml
 jobs:
   e2e:
     uses: feliperbroering/expert-agent/.github/workflows/expert-e2e.yml@main
     with:
-      schema: ecg-expert/agent_schema.yaml
-      suite: 05_ask_latency                  # optional — omit to run all
+      schema: my-expert/agent_schema.yaml
+      suite: 05_ask_latency               # optional — omit to run everything
     secrets:
       endpoint: ${{ secrets.EXPERT_AGENT_ENDPOINT }}
       api-key:  ${{ secrets.EXPERT_AGENT_API_KEY }}
 ```
 
-> **Wiring this into a private repo for the first time?** Follow
-> [`docs/AGENT_E2E_SETUP.md`](docs/AGENT_E2E_SETUP.md) — a copy-pasteable,
-> agent-friendly checklist that takes you from "empty repo with a schema" to
-> "green nightly E2E job" in five steps.
+For monorepos hosting multiple agents, use a matrix strategy (see [`docs/AGENT_E2E_SETUP.md`](docs/AGENT_E2E_SETUP.md)).
+
+---
+
+## Architecture
+
+```
+   client (CLI / HTTP)
+          │
+          ▼
+   ┌──────────────────────────────┐
+   │  agent  (Cloud Run, FastAPI) │ ◀── reads agent_schema.yaml
+   │  ├─ /ask         (SSE)       │     from gs://docs-bucket/<agent>/schema/
+   │  ├─ /docs/sync               │
+   │  ├─ /sessions /memory        │
+   │  └─ /health /ready           │
+   └────┬───────────┬─────────┬───┘
+        │           │         │
+        ▼           ▼         ▼
+   Gemini API   Firestore   Chroma HTTP (Cloud Run, min=1)
+   (Context     (sessions   ├─ shared per project
+    Cache)       + state)   └─ persisted via GCS FUSE → gs://memory/chroma
+        ▲
+        │ File API mirror
+        │
+   GCS (durable source of truth)
+   ├─ gs://docs/<agent>/<sha>/<file>          knowledge base
+   ├─ gs://docs/<agent>/_state/sync_manifest.json
+   ├─ gs://docs/<agent>/schema/...            schema + prompts
+   └─ gs://memory/<agent>/                    long-term memory snapshots
+```
+
+**Key design choices** ([deeper notes in `infra/README.md`](./infra/README.md)):
+
+- **Stateless agent containers.** All state lives in GCS or Firestore. Cloud Run can scale to zero and back without losing context.
+- **Context Cache as the grounding source.** Documents go into a Gemini Context Cache built once per knowledge-base SHA; subsequent `/ask` calls reuse it (`cached_tokens ≈ input_tokens` in steady state).
+- **Multi-layer memory.** Firestore holds the last N turns (short-term) plus a verbatim recall index (long-term, indexed in Chroma via [MemPalace](https://pypi.org/project/mempalace/)).
+- **One Chroma HTTP server per project** (Cloud Run, `min=max=1`, GCS FUSE for persistence) — shared across every agent in the project.
+
+---
+
+## vs other ways to ship a RAG agent
+
+|                                         | expert-agent             | NotebookLM         | OpenAI Assistants        | Bring-your-own RAG stack |
+|-----------------------------------------|:------------------------:|:------------------:|:------------------------:|:------------------------:|
+| **API you own**                         | ✓ (your Cloud Run)       | ✗ (Google UI only) | ✓ (OpenAI hosted)        | ✓                        |
+| **Grounded in your corpus**             | ✓ (Context Cache)        | ✓                  | ✓ (file_search)          | ✓ (you wire it up)       |
+| **Long-context native** (100k+ tokens)  | ✓ (Gemini 2.5 Pro)       | ✓                  | partial (chunked)        | depends                  |
+| **Declarative YAML spec**               | ✓ (`agent_schema.yaml`)  | ✗                  | ✗                        | ✗                        |
+| **Multi-agent in one repo**             | ✓ (`expert.toml` + `@`)  | n/a                | ✗                        | DIY                      |
+| **Persistent conversation memory**      | ✓ (Firestore + MemPalace)| partial            | ✓                        | DIY                      |
+| **E2E test kit** (Robot Framework)      | ✓ (reusable workflow)    | ✗                  | ✗                        | DIY                      |
+| **LGPD/GDPR session delete**            | ✓ (`/sessions/:id`)      | ✗                  | partial                  | DIY                      |
+| **Self-hosted**                         | ✓ (your GCP project)     | ✗                  | ✗                        | ✓                        |
+| **Open source**                         | ✓ (MIT)                  | ✗                  | ✗                        | varies                   |
 
 ---
 
@@ -284,55 +360,47 @@ jobs:
 
 Cloud Run uses **two layers of bearer auth**, intentionally:
 
-| Header                          | Audience                | Required for                   |
-|---------------------------------|-------------------------|--------------------------------|
-| `X-Serverless-Authorization`    | Cloud Run IAM (ID token)| Reaching the service at all    |
-| `Authorization: Bearer <KEY>`   | App layer (admin key)   | `/ask`, `/docs/sync`, `/memory`|
+| Header                          | Audience                  | Required for                   |
+|---------------------------------|---------------------------|--------------------------------|
+| `X-Serverless-Authorization`    | Cloud Run IAM (ID token)  | reaching the service at all    |
+| `Authorization: Bearer <KEY>`   | App layer (admin key)     | `/ask`, `/docs/sync`, `/memory`|
 
-The split avoids the well-known collision where Cloud Run's IAM strips
-`Authorization` before the app sees it. Public endpoints (`/health`,
-`/ready`) only need the ID token.
-
-For local dev you can run with `APP_ENV=development` and disable the
-admin-key check entirely (see `backend/app/auth.py`).
+The split avoids the well-known collision where Cloud Run's IAM strips `Authorization` before the app sees it. Public endpoints (`/health`, `/ready`) only need the ID token. For local dev, set `APP_ENV=development` to skip the admin-key check (see `backend/app/auth.py`).
 
 ---
 
 ## Repository layout
 
 ```
-backend/        FastAPI app (`app.main:app`) + tests
-  app/llm/      LLMClient protocol + Gemini AI Studio / Vertex implementations
-  app/cache/    Context Cache manager + background refresher
-  app/docs/     Manifest model + DocsSyncService (incremental SHA diff)
-  app/memory/   Short-term (Firestore) + long-term (MemPalace/Chroma) + orchestrator
-  app/routes/   /ask /docs/sync /sessions /memory /health
-cli/            `expert` (Typer + Rich)
-example-schema/ Annotated AgentSchema + prompt template
-infra/          OpenTofu stacks: platform, chroma, agent (per agent)
-scripts/        bootstrap-project.sh, bootstrap_docs_to_gcs.py
-.github/workflows/  ci.yml, release-please.yml, deploy.yml
+backend/            FastAPI app (`app.main:app`) + tests
+  app/llm/          LLMClient protocol + Gemini AI Studio / Vertex implementations
+  app/cache/        Context Cache manager + background refresher
+  app/docs/         Manifest + DocsSyncService (incremental SHA diff)
+  app/memory/       Short-term (Firestore) + long-term (MemPalace/Chroma) + orchestrator
+  app/routes/       /ask /docs/sync /sessions /memory /health
+cli/                `expert` CLI (Typer + Rich) + Robot Framework testkit
+example-schema/     annotated AgentSchema + prompt template
+infra/              OpenTofu stacks: platform, chroma, agent (per agent)
+scripts/            bootstrap-project.sh, bootstrap_docs_to_gcs.py
+docs/               PRIVATE_AGENT_REPO.md, AGENT_E2E_SETUP.md
+.github/workflows/  ci.yml, release-please.yml, deploy.yml, expert-e2e.yml
 ```
 
 ---
 
 ## Cost ballpark
 
-For a single project hosting one or more agents on `us-central1` (or
-similar), idling on Cloud Run scale-to-zero:
+For a single project hosting one or more agents on `us-central1`, idling on Cloud Run scale-to-zero:
 
 | Component                            | Idle           | Notes                                  |
 |--------------------------------------|----------------|----------------------------------------|
-| Chroma server (Cloud Run, min=max=1) | **~$40 / mo**  | Always-on, shared across all agents    |
-| Each agent (Cloud Run, min=0)        | **~$0**        | Pay only on request                    |
-| Firestore                            | **~$0**        | Free tier covers low-QPS use           |
+| Chroma server (Cloud Run, min=max=1) | **~$40 / mo**  | always-on, shared across all agents    |
+| Each agent (Cloud Run, min=0)        | **~$0**        | pay only on request                    |
+| Firestore                            | **~$0**        | free tier covers low-QPS use           |
 | Gemini Pro requests                  | **variable**   | `cached_tokens` are heavily discounted |
-| GCS storage                          | **~$0.02/GiB** | Docs + memory snapshots                |
+| GCS storage                          | **~$0.02/GiB** | docs + memory snapshots                |
 
-Headline efficiency win: with Context Caching on, a typical `/ask` against
-a ~800 k-token corpus shows `cached_tokens / input_tokens ≈ 0.999`, i.e.
-the prompt portion of the cost is essentially flat regardless of how big
-your corpus is.
+Headline efficiency win: with Context Caching on, a typical `/ask` against a ~800 k-token corpus shows `cached_tokens / input_tokens ≈ 0.999` — the prompt portion of the cost is essentially flat regardless of how big your corpus is.
 
 ---
 
@@ -344,15 +412,13 @@ your corpus is.
 - [ ] Multi-tenant agent (per-tenant memory + cache) for SaaS use cases.
 - [ ] Web UI / playground for non-technical curators.
 - [ ] `release-please`-driven versioned container tags pushed to GHCR.
+- [ ] PyPI release (`pip install expert-agent`) + Homebrew tap.
 
 ---
 
 ## Contributing
 
-Issues and PRs are welcome. The project follows
-[Conventional Commits](https://www.conventionalcommits.org/) and uses
-[release-please](https://github.com/googleapis/release-please) for SemVer
-automation. Run the full check suite with:
+Issues and PRs welcome. See [CONTRIBUTING.md](./CONTRIBUTING.md) for dev setup, style, and conventions. AI coding agents: read [AGENTS.md](./AGENTS.md) first. Please report security issues privately via [SECURITY.md](./SECURITY.md). We follow the [Contributor Covenant v2.1](./CODE_OF_CONDUCT.md).
 
 ```bash
 uv sync --extra dev --extra vertex --extra otel
@@ -363,6 +429,12 @@ uv run pytest
 
 ---
 
+## Acknowledgements
+
+`expert-agent` stands on the shoulders of giants: [Gemini](https://ai.google.dev/), [FastAPI](https://fastapi.tiangolo.com/), [Typer](https://typer.tiangolo.com/) + [Rich](https://rich.readthedocs.io/), [Chroma](https://www.trychroma.com/), [MemPalace](https://pypi.org/project/mempalace/), [Robot Framework](https://robotframework.org/), [OpenTofu](https://opentofu.org/).
+
+---
+
 ## License
 
-Apache-2.0 — see [LICENSE](./LICENSE).
+[MIT](./LICENSE) — do what you want, just don't sue us.
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000..de384fb
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,66 @@
+# Security Policy
+
+## Supported versions
+
+`expert-agent` is in alpha. Security fixes are shipped against the latest `main` and the most recent tagged release. Older releases are not patched — please upgrade.
+
+| Version  | Supported          |
+|----------|--------------------|
+| `main`   | ✓ (latest fixes)   |
+| `0.1.x`  | ✓ (latest tag)     |
+| `< 0.1`  | ✗                  |
+
+## Reporting a vulnerability
+
+**Please do not open a public GitHub issue.**
+
+Report vulnerabilities privately through one of:
+
+1. **GitHub private advisory** — [new advisory](https://github.com/feliperbroering/expert-agent/security/advisories/new) (preferred — keeps the timeline tied to the repo).
+2. **Email** — [hi@felipe.run](mailto:hi@felipe.run) with subject `[expert-agent security]`. Please include:
+   - A description of the issue and its impact.
+   - Steps to reproduce (or a proof-of-concept).
+   - The commit SHA or version you tested against.
+   - Your preferred contact method for the follow-up.
+
+You'll get an acknowledgement within **72 hours** and a triage update within **7 days**.
+
+## Disclosure timeline
+
+1. **Day 0** — you report privately.
+2. **Day ≤ 3** — we acknowledge and start triage.
+3. **Day ≤ 30** — we ship a fix on `main` and cut a patch release. For critical issues we aim for ≤ 7 days.
+4. **Day ≤ 60** — we publish a GitHub Security Advisory crediting you (unless you opt out).
+
+If a fix cannot land in 60 days (e.g. requires upstream changes in Gemini, Chroma, or FastAPI), we'll coordinate the disclosure window with you.
+
+## Scope
+
+In scope:
+
+- `backend/` — FastAPI app, auth middleware, data-handling paths.
+- `cli/` — command-injection, credential handling, file-write paths.
+- `infra/` — IAM bindings, Cloud Run config, Secret Manager usage.
+- Supply chain — pinned dependencies, Docker base image, CI workflow secrets.
+
+Out of scope (please *don't* report these):
+
+- Misconfiguration of **your** GCP project or leaked API keys you created.
+- Volumetric DoS against a self-hosted deploy (Cloud Run throttling is your config).
+- Vulnerabilities in Gemini, Firestore, Chroma, or other upstream services — report those to the upstream maintainer.
+- Social engineering of maintainers.
+
+## Hardening checklist for operators
+
+If you self-host an expert agent, here's the short audit we suggest running:
+
+- **Rotate admin keys** regularly (`admin-key-<agent>` secret version bump).
+- Keep **`APP_ENV=production`** — development mode disables the bearer-key check.
+- Scope the **Cloud Run service account** to exactly the secrets + GCS prefixes it needs.
+- Use **CMEK** (customer-managed encryption keys) on GCS + Firestore if your data sensitivity requires it.
+- Pin the backend image by **digest**, not by tag, in your `tofu apply`.
+- Enable **Cloud Run request logging** + **VPC Service Controls** if you're in a regulated environment.
+
+## Credits
+
+Security contributors will be listed in release notes and (if desired) in a `SECURITY_HALL_OF_FAME.md`. Thanks for keeping this project safe.
diff --git a/backend/app/llm/gemini_ai_studio.py b/backend/app/llm/gemini_ai_studio.py
index 2e4d8fe..ad0c01b 100644
--- a/backend/app/llm/gemini_ai_studio.py
+++ b/backend/app/llm/gemini_ai_studio.py
@@ -36,7 +36,7 @@
 # Conservative default for unit tests / smoke runs. Production agents pin a
 # real Pro tier (e.g. ``gemini-2.5-pro``) via ``spec.model.name`` in their
 # AgentSchema; bump together with the SDK version when a newer Pro ships.
-DEFAULT_MODEL = "gemini-2.0-flash-exp"
+DEFAULT_MODEL = "gemini-2.5-flash"
 
 _TRANSIENT_EXC: tuple[type[BaseException], ...] = (TimeoutError, ConnectionError)
 
@@ -73,7 +73,7 @@ class GeminiAIStudioClient:
         Gemini AI Studio API key (secret).
     model:
         Model identifier, e.g. ``"gemini-2.5-pro"`` or
-        ``"gemini-2.0-flash-exp"`` for cheaper smoke tests.
+        ``"gemini-2.5-flash"`` for cheaper smoke tests.
     max_citations:
         Upper bound on citations surfaced per generation chunk.
     """
diff --git a/cli/expert/__init__.py b/cli/expert/__init__.py
index 657073e..f87e932 100644
--- a/cli/expert/__init__.py
+++ b/cli/expert/__init__.py
@@ -1,3 +1,8 @@
 """expert — CLI for expert-agent (init, validate, sync, ask, sessions)."""
 
-__version__ = "0.1.0"
+from importlib.metadata import PackageNotFoundError, version
+
+try:
+    __version__ = version("expert-agent")
+except PackageNotFoundError:
+    __version__ = "0.0.0+local"
diff --git a/cli/expert/brand.py b/cli/expert/brand.py
new file mode 100644
index 0000000..d5f8c07
--- /dev/null
+++ b/cli/expert/brand.py
@@ -0,0 +1,85 @@
+"""ASCII brand + helpers — shared visual identity across the CLI.
+
+The logo uses the classic *ANSI Shadow* figlet font for the word ``EXPERT``
+paired with a small knowledge glyph box on the right (`[ ≡ ]`, three stacked
+lines = a book/corpus). This mirrors the design language of the author's
+other CLI tools (see ``feliperbroering/eai``) while keeping a distinct
+accent so they read as a family.
+
+The brand renders with zero emoji characters — visual cues come from
+Unicode box-drawing, Rich colors, and restrained accent tokens.
+"""
+
+from __future__ import annotations
+
+from rich.console import Console
+from rich.text import Text
+
+from . import __version__
+
+# The logo is split into (a) the ANSI-shadow wordmark and (b) a small
+# knowledge-glyph box rendered to the right. Rendering them as two columns
+# keeps them in sync regardless of terminal width and lets us tint them
+# independently.
+_WORDMARK = (
+    "███████╗██╗  ██╗██████╗ ███████╗██████╗ ████████╗",
+    "██╔════╝╚██╗██╔╝██╔══██╗██╔════╝██╔══██╗╚══██╔══╝",
+    "█████╗   ╚███╔╝ ██████╔╝█████╗  ██████╔╝   ██║   ",
+    "██╔══╝   ██╔██╗ ██╔═══╝ ██╔══╝  ██╔══██╗   ██║   ",
+    "███████╗██╔╝ ██╗██║     ███████╗██║  ██║   ██║   ",
+    "╚══════╝╚═╝  ╚═╝╚═╝     ╚══════╝╚═╝  ╚═╝   ╚═╝   ",
+)
+
+_GLYPH = (
+    "         ",
+    "  ╭───╮  ",
+    "  │ ≡ │  ",
+    "  ╰───╯  ",
+    "         ",
+    "         ",
+)
+
+TAGLINE = "ground a model on your docs. ship it as an API."
+SUBTITLE = "declarative ultra-specialist agents on Cloud Run — Gemini long-context, Context Cache, persistent memory."
+
+# Accent colors — picked to read well on both dark and light terminals and
+# to stay distinct from `eai` (which leans green/cyan).
+_ACCENT = "bright_cyan"
+_DIM = "grey50"
+_HEADLINE = "bold white"
+
+
+def render_brand(console: Console, *, include_version: bool = True) -> None:
+    """Render the full brand block (wordmark + glyph + tagline)."""
+    for wm, gl in zip(_WORDMARK, _GLYPH, strict=True):
+        line = Text()
+        line.append(" ")
+        line.append(wm, style=_ACCENT)
+        line.append(gl, style=_DIM)
+        console.print(line)
+
+    console.print()
+    headline = Text()
+    headline.append(" ")
+    headline.append(TAGLINE, style=_HEADLINE)
+    console.print(headline)
+
+    subtitle = Text()
+    subtitle.append(" ")
+    subtitle.append(SUBTITLE, style=_DIM)
+    console.print(subtitle)
+
+    if include_version:
+        console.print()
+        ver = Text()
+        ver.append(" ")
+        ver.append("expert ", style=_ACCENT)
+        ver.append(f"v{__version__}", style=_DIM)
+        ver.append("   ")
+        ver.append("MIT", style=_DIM)
+        ver.append("   ")
+        ver.append("github.com/feliperbroering/expert-agent", style=_DIM)
+        console.print(ver)
+
+
+__all__ = ["SUBTITLE", "TAGLINE", "render_brand"]
diff --git a/cli/expert/commands/agents.py b/cli/expert/commands/agents.py
new file mode 100644
index 0000000..406b539
--- /dev/null
+++ b/cli/expert/commands/agents.py
@@ -0,0 +1,131 @@
+"""Workspace-aware agent management commands: ``agents``, ``use``, ``which``.
+
+These are the only commands that *never* need to resolve to a single agent —
+they inspect, select, or describe the workspace itself.
+"""
+
+from __future__ import annotations
+
+from typing import Annotated
+
+import typer
+from rich.table import Table
+
+from ..ui import console, print_error, print_info, print_success
+from ..workspace import (
+    AgentNotFoundError,
+    AmbiguousAgentError,
+    Workspace,
+    WorkspaceError,
+)
+
+
+def agents_cmd(
+    verbose: Annotated[
+        bool,
+        typer.Option("--verbose", "-v", help="Show schema paths and endpoints."),
+    ] = False,
+) -> None:
+    """List every agent known to this workspace."""
+    ws = Workspace.discover()
+    agents = ws.agents()
+    if not agents:
+        print_info(
+            "No agents found. Scaffold one with `expert init <name>` or create "
+            "an `expert.toml` workspace file."
+        )
+        return
+
+    active = ws.active()
+    table = Table(title=f"Agents — workspace: {ws.root}")
+    table.add_column("Active", width=6, justify="center")
+    table.add_column("Name", style="bold")
+    table.add_column("Source", style="dim")
+    if verbose:
+        table.add_column("Schema")
+        table.add_column("Endpoint")
+    table.add_column("Description", overflow="fold")
+
+    for info in agents:
+        is_active = "✓" if info.name == active else ""
+        row = [
+            is_active,
+            info.name,
+            info.source,
+        ]
+        if verbose:
+            try:
+                schema_rel = str(info.schema_path.relative_to(ws.root))
+            except ValueError:
+                schema_rel = str(info.schema_path)
+            row.extend([schema_rel, info.endpoint or "—"])
+        row.append(info.description or "")
+        table.add_row(*row)
+
+    console.print(table)
+    if ws.default_agent:
+        print_info(f"default (expert.toml): [cyan]{ws.default_agent}[/cyan]")
+    if active:
+        print_info(f"active (expert use): [cyan]{active}[/cyan]")
+
+
+def use_cmd(
+    name: Annotated[
+        str | None,
+        typer.Argument(
+            help="Agent name to pin as active. Omit to clear the pin.",
+        ),
+    ] = None,
+    clear: Annotated[
+        bool,
+        typer.Option("--clear", help="Remove the active-agent pointer."),
+    ] = False,
+) -> None:
+    """Pin an agent as the active one for this workspace (stored locally)."""
+    ws = Workspace.discover()
+
+    if clear or (name is None):
+        if ws.state_file.is_file():
+            ws.clear_active()
+            print_success("Cleared active agent pointer.")
+        else:
+            print_info("No active agent set.")
+        return
+
+    try:
+        # Re-use matcher so `expert use derm` works when `derm-expert` is declared.
+        canonical = ws._match(name)
+        ws.set_active(canonical)
+    except (AgentNotFoundError, AmbiguousAgentError, WorkspaceError) as exc:
+        print_error(str(exc))
+        raise typer.Exit(code=1) from exc
+
+    print_success(f"Active agent set to [cyan]{canonical}[/cyan].")
+    print_info(f"State stored in {ws.state_file}")
+
+
+def which_cmd(
+    agent: Annotated[
+        str | None,
+        typer.Option(
+            "--agent",
+            "-a",
+            help="Preview resolution for the given selector without running anything.",
+        ),
+    ] = None,
+) -> None:
+    """Print the agent a bare command (no --agent, no @alias) would resolve to."""
+    ws = Workspace.discover()
+    try:
+        ctx = ws.resolve(selector=agent)
+    except (AgentNotFoundError, AmbiguousAgentError, WorkspaceError) as exc:
+        print_error(str(exc))
+        raise typer.Exit(code=1) from exc
+
+    print_info(f"Active agent: [bold cyan]{ctx.name}[/bold cyan]  (source: {ctx.selector_source})")
+    print_info(f"  schema:   {ctx.schema_path}")
+    print_info(f"  endpoint: {ctx.endpoint or '—'}")
+    print_info(f"  api key:  {'set' if ctx.api_key else '—'}")
+
+
+__all__ = ["agents_cmd", "use_cmd", "which_cmd"]
diff --git a/cli/expert/commands/ask.py b/cli/expert/commands/ask.py
index b204443..0d18356 100644
--- a/cli/expert/commands/ask.py
+++ b/cli/expert/commands/ask.py
@@ -44,6 +44,7 @@
 from rich.text import Text
 
 from ..config import make_http_client
+from ..context import resolve as resolve_context
 from ..ui import console, print_error, print_info, print_success
 
 _USER_ID = "cli"
@@ -56,22 +57,26 @@
 
 def cmd(
     question: Annotated[str, typer.Argument(help="Question to send to the agent.")],
+    agent: Annotated[
+        str | None,
+        typer.Option("--agent", "-a", help="Agent name from the workspace."),
+    ] = None,
     endpoint: Annotated[
-        str,
+        str | None,
         typer.Option(
             "--endpoint",
             envvar="EXPERT_AGENT_ENDPOINT",
-            help="Base URL of the running agent.",
+            help="Override the agent's endpoint.",
         ),
-    ],
+    ] = None,
     api_key: Annotated[
-        str,
+        str | None,
         typer.Option(
             "--api-key",
             envvar="EXPERT_AGENT_API_KEY",
-            help="Admin bearer token.",
+            help="Override the agent's admin bearer token.",
         ),
-    ],
+    ] = None,
     session: Annotated[
         str | None,
         typer.Option(
@@ -88,6 +93,15 @@ def cmd(
     ] = True,
 ) -> None:
     """Ask the agent a question."""
+    ctx = resolve_context(
+        agent=agent,
+        endpoint=endpoint,
+        api_key=api_key,
+        require_remote=True,
+    )
+    endpoint, api_key = ctx.require_remote()
+    if ctx.selector_source not in ("single", "schema-flag"):
+        print_info(f"→ [cyan]{ctx.name}[/cyan] ({ctx.selector_source})")
     if session is None:
         session = str(uuid.uuid4())
         print_info(f"Starting new session [cyan]{session}[/cyan].")
diff --git a/cli/expert/commands/count_tokens.py b/cli/expert/commands/count_tokens.py
index b71b1e8..55aed8e 100644
--- a/cli/expert/commands/count_tokens.py
+++ b/cli/expert/commands/count_tokens.py
@@ -15,6 +15,7 @@
 import typer
 from app.schema import AgentSchema
 
+from ..context import resolve as resolve_context
 from ..ui import console, print_error, print_info, print_success, print_warning
 
 if TYPE_CHECKING:
@@ -140,20 +141,31 @@ def cmd(
             help="API key for google-genai token counting.",
         ),
     ],
+    agent: Annotated[
+        str | None,
+        typer.Option("--agent", "-a", help="Agent name from the workspace."),
+    ] = None,
     schema_path: Annotated[
-        Path,
-        typer.Option("--schema", "-s", help="Path to agent_schema.yaml."),
-    ] = Path("./agent_schema.yaml"),
+        Path | None,
+        typer.Option(
+            "--schema",
+            "-s",
+            help="Explicit path to agent_schema.yaml (bypasses workspace resolution).",
+        ),
+    ] = None,
     model: Annotated[
         str,
         typer.Option("--model", help="Model used for the count_tokens API call."),
-    ] = "gemini-2.0-flash-exp",
+    ] = "gemini-2.5-flash",
 ) -> None:
     """Walk the knowledge base and sum the estimated token count per file."""
-    schema_path = schema_path.resolve()
+    ctx = resolve_context(agent=agent, schema=schema_path)
+    schema_path = ctx.schema_path
     if not schema_path.is_file():
         print_error(f"schema file not found: {schema_path}")
         raise typer.Exit(code=1)
+    if ctx.selector_source not in ("single", "schema-flag"):
+        print_info(f"agent [cyan]{ctx.name}[/cyan] ({ctx.selector_source})")
 
     try:
         schema = AgentSchema.from_yaml(schema_path)
diff --git a/cli/expert/commands/init.py b/cli/expert/commands/init.py
index 600ab68..aea98fd 100644
--- a/cli/expert/commands/init.py
+++ b/cli/expert/commands/init.py
@@ -217,5 +217,24 @@ def cmd(
         raise typer.Exit(code=1) from exc
 
     print_success(f"Created new agent at [cyan]{path}[/cyan].")
+    _print_workspace_hint(path, name)
     print_info("Next step: [bold]expert validate --schema ./agent_schema.yaml[/bold]")
     console.print()
+
+
+def _print_workspace_hint(path: Path, name: str) -> None:
+    """If the new agent lives inside a multi-agent workspace, nudge the user."""
+    from ..workspace import Workspace
+
+    parent = path.parent
+    try:
+        ws = Workspace.discover(cwd=parent)
+    except Exception:  # pragma: no cover - discovery is best-effort here
+        return
+
+    # Only hint when there's >1 agent (either discovered or declared).
+    if len(ws.agents_by_name) >= 2:
+        print_info(
+            f"Detected multi-agent workspace at [cyan]{ws.root}[/cyan]. "
+            f"Use [bold]expert agents[/bold] to list, or [bold]expert @{name} <cmd>[/bold]."
+        )
diff --git a/cli/expert/commands/sessions.py b/cli/expert/commands/sessions.py
index 6f37c58..74b231a 100644
--- a/cli/expert/commands/sessions.py
+++ b/cli/expert/commands/sessions.py
@@ -11,6 +11,7 @@
 from rich.table import Table
 
 from ..config import make_http_client
+from ..context import resolve as resolve_context
 from ..ui import console, print_error, print_info, print_success, print_warning
 
 app = typer.Typer(
@@ -20,6 +21,23 @@
 )
 
 
+def _remote(
+    agent: str | None,
+    endpoint_override: str | None,
+    api_key_override: str | None,
+) -> tuple[str, str]:
+    """Resolve (endpoint, api_key) for every session command via the workspace."""
+    ctx = resolve_context(
+        agent=agent,
+        endpoint=endpoint_override,
+        api_key=api_key_override,
+        require_remote=True,
+    )
+    if ctx.selector_source not in ("single", "schema-flag"):
+        print_info(f"→ [cyan]{ctx.name}[/cyan] ({ctx.selector_source})")
+    return ctx.require_remote()
+
+
 async def _get_json(endpoint: str, api_key: str, path: str) -> Any:
     async with make_http_client(endpoint=endpoint, api_key=api_key) as client:
         response = await client.get(path)
@@ -51,30 +69,44 @@ def _run(coro: Any) -> Any:
         raise typer.Exit(code=2) from exc
 
 
+_AgentOpt = Annotated[
+    str | None,
+    typer.Option("--agent", "-a", help="Agent name from the workspace."),
+]
 _EndpointOpt = Annotated[
-    str,
-    typer.Option("--endpoint", envvar="EXPERT_AGENT_ENDPOINT", help="Base URL of the agent."),
+    str | None,
+    typer.Option(
+        "--endpoint",
+        envvar="EXPERT_AGENT_ENDPOINT",
+        help="Override the agent's endpoint.",
+    ),
 ]
 _ApiKeyOpt = Annotated[
-    str,
-    typer.Option("--api-key", envvar="EXPERT_AGENT_API_KEY", help="Admin bearer token."),
+    str | None,
+    typer.Option(
+        "--api-key",
+        envvar="EXPERT_AGENT_API_KEY",
+        help="Override the agent's admin bearer token.",
+    ),
 ]
 
 
 @app.command("list")
 def list_cmd(
-    endpoint: _EndpointOpt,
-    api_key: _ApiKeyOpt,
+    agent: _AgentOpt = None,
+    endpoint: _EndpointOpt = None,
+    api_key: _ApiKeyOpt = None,
     user: Annotated[
         str | None,
         typer.Option("--user", help="Filter sessions by user_id."),
     ] = None,
 ) -> None:
     """List active sessions."""
+    endpoint_resolved, api_key_resolved = _remote(agent, endpoint, api_key)
     path = "/sessions"
     if user:
         path = f"/sessions?user_id={user}"
-    body = _run(_get_json(endpoint.rstrip("/"), api_key, path))
+    body = _run(_get_json(endpoint_resolved, api_key_resolved, path))
     items: list[dict[str, Any]]
     if isinstance(body, list):
         items = [x for x in body if isinstance(x, dict)]
@@ -105,11 +137,13 @@ def list_cmd(
 @app.command("show")
 def show_cmd(
     session_id: Annotated[str, typer.Argument(help="Session ID.")],
-    endpoint: _EndpointOpt,
-    api_key: _ApiKeyOpt,
+    agent: _AgentOpt = None,
+    endpoint: _EndpointOpt = None,
+    api_key: _ApiKeyOpt = None,
 ) -> None:
     """Show the message history of a single session."""
-    body = _run(_get_json(endpoint.rstrip("/"), api_key, f"/sessions/{session_id}"))
+    endpoint_resolved, api_key_resolved = _remote(agent, endpoint, api_key)
+    body = _run(_get_json(endpoint_resolved, api_key_resolved, f"/sessions/{session_id}"))
     if not isinstance(body, dict):
         print_error("unexpected response shape.")
         raise typer.Exit(code=2)
@@ -132,14 +166,16 @@ def show_cmd(
 @app.command("delete")
 def delete_cmd(
     session_id: Annotated[str, typer.Argument(help="Session ID to delete.")],
-    endpoint: _EndpointOpt,
-    api_key: _ApiKeyOpt,
+    agent: _AgentOpt = None,
+    endpoint: _EndpointOpt = None,
+    api_key: _ApiKeyOpt = None,
     yes: Annotated[
         bool,
         typer.Option("--yes", "-y", help="Skip the confirmation prompt."),
     ] = False,
 ) -> None:
     """Delete a session and its message history (LGPD right-to-erasure)."""
+    endpoint_resolved, api_key_resolved = _remote(agent, endpoint, api_key)
     if not yes:
         confirmed = typer.confirm(
             f"Delete session {session_id}? This action is irreversible.",
@@ -149,5 +185,5 @@ def delete_cmd(
             print_warning("Aborted.")
             raise typer.Exit(code=0)
 
-    _run(_delete(endpoint.rstrip("/"), api_key, f"/sessions/{session_id}"))
+    _run(_delete(endpoint_resolved, api_key_resolved, f"/sessions/{session_id}"))
     print_success(f"Session [cyan]{session_id}[/cyan] deleted.")
diff --git a/cli/expert/commands/sync.py b/cli/expert/commands/sync.py
index 92bbcdb..0396323 100644
--- a/cli/expert/commands/sync.py
+++ b/cli/expert/commands/sync.py
@@ -17,6 +17,7 @@
 from app.schema import AgentSchema
 
 from ..config import make_http_client
+from ..context import resolve as resolve_context
 from ..ui import console, print_diff_table, print_error, print_info, print_success
 
 
@@ -85,32 +86,46 @@ async def _post_sync(
 
 
 def cmd(
+    agent: Annotated[
+        str | None,
+        typer.Option("--agent", "-a", help="Agent name from the workspace."),
+    ] = None,
     endpoint: Annotated[
-        str,
+        str | None,
         typer.Option(
             "--endpoint",
             envvar="EXPERT_AGENT_ENDPOINT",
-            help="Base URL of the running agent.",
+            help="Override the agent's endpoint (defaults to workspace/env value).",
         ),
-    ],
+    ] = None,
     api_key: Annotated[
-        str,
+        str | None,
         typer.Option(
             "--api-key",
             envvar="EXPERT_AGENT_API_KEY",
-            help="Admin bearer token.",
+            help="Override the agent's admin bearer token.",
         ),
-    ],
+    ] = None,
     schema_path: Annotated[
-        Path,
-        typer.Option("--schema", "-s", help="Path to agent_schema.yaml."),
-    ] = Path("./agent_schema.yaml"),
+        Path | None,
+        typer.Option("--schema", "-s", help="Explicit path to agent_schema.yaml."),
+    ] = None,
 ) -> None:
     """Upload the local knowledge base and trigger a Context Cache rebuild."""
-    schema_path = schema_path.resolve()
+    ctx = resolve_context(
+        agent=agent,
+        schema=schema_path,
+        endpoint=endpoint,
+        api_key=api_key,
+        require_remote=True,
+    )
+    schema_path = ctx.schema_path
+    endpoint, api_key = ctx.require_remote()
     if not schema_path.is_file():
         print_error(f"schema file not found: {schema_path}")
         raise typer.Exit(code=1)
+    if ctx.selector_source not in ("single", "schema-flag"):
+        print_info(f"agent [cyan]{ctx.name}[/cyan] ({ctx.selector_source})")
 
     try:
         schema = AgentSchema.from_yaml(schema_path)
diff --git a/cli/expert/commands/test.py b/cli/expert/commands/test.py
index fbc84d6..d305e2c 100644
--- a/cli/expert/commands/test.py
+++ b/cli/expert/commands/test.py
@@ -18,6 +18,7 @@
 
 import typer
 
+from ..context import resolve as resolve_context
 from ..ui import console, print_error, print_info, print_success
 
 # Canonical order of the packaged suites. The numeric prefixes keep `robot`
@@ -33,11 +34,19 @@
 
 
 def cmd(
+    agent: Annotated[
+        str | None,
+        typer.Option(
+            "--agent",
+            "-a",
+            help="Agent name from the workspace. Resolved via `expert agents`.",
+        ),
+    ] = None,
     suite: Annotated[
         list[str] | None,
         typer.Option(
             "--suite",
-            "-s",
+            "-S",
             help=(
                 "Run only the given suite(s) by stem (e.g. '05_ask_latency'). "
                 "Can be passed multiple times. Default: all."
@@ -72,16 +81,17 @@ def cmd(
         Path | None,
         typer.Option(
             "--schema",
-            help="Path to agent_schema.yaml (defaults to env EXPERT_AGENT_SCHEMA).",
+            "-s",
+            help="Explicit path to agent_schema.yaml (bypasses workspace resolution).",
         ),
     ] = None,
     endpoint: Annotated[
         str | None,
-        typer.Option("--endpoint", help="Override EXPERT_AGENT_ENDPOINT."),
+        typer.Option("--endpoint", help="Override the agent's endpoint."),
     ] = None,
     api_key: Annotated[
         str | None,
-        typer.Option("--api-key", help="Override EXPERT_AGENT_API_KEY."),
+        typer.Option("--api-key", help="Override the agent's admin bearer token."),
     ] = None,
     dry_run: Annotated[
         bool,
@@ -118,15 +128,25 @@ def cmd(
         print_error(f"No suites matched selection {suite!r}. Available: {available}")
         raise typer.Exit(code=2)
 
-    # Propagate overrides to the environment so ExpertLibrary's defaults pick
-    # them up without needing --var boilerplate in simple cases.
-    env_overrides: dict[str, str] = {}
-    if endpoint:
-        env_overrides["EXPERT_AGENT_ENDPOINT"] = endpoint
-    if api_key:
-        env_overrides["EXPERT_AGENT_API_KEY"] = api_key
-    if schema:
-        env_overrides["EXPERT_AGENT_SCHEMA"] = str(schema)
+    # Resolve the agent context (supports --agent / @alias / `expert use`)
+    # so that the packaged Robot suites see fully-populated env vars even
+    # in multi-agent workspaces without requiring --var or --endpoint.
+    # We fall back to a bare resolve (schema-only) so that the offline
+    # suites still work when endpoint/api_key are not configured.
+    ctx = resolve_context(
+        agent=agent,
+        schema=schema,
+        endpoint=endpoint,
+        api_key=api_key,
+    )
+    if ctx.selector_source not in ("single", "schema-flag"):
+        print_info(f"→ [cyan]{ctx.name}[/cyan] ({ctx.selector_source})")
+
+    env_overrides: dict[str, str] = {"EXPERT_AGENT_SCHEMA": str(ctx.schema_path)}
+    if ctx.endpoint:
+        env_overrides["EXPERT_AGENT_ENDPOINT"] = ctx.endpoint
+    if ctx.api_key:
+        env_overrides["EXPERT_AGENT_API_KEY"] = ctx.api_key
     for key, value in env_overrides.items():
         os.environ[key] = value
 
diff --git a/cli/expert/commands/validate.py b/cli/expert/commands/validate.py
index 7f97500..216f5f6 100644
--- a/cli/expert/commands/validate.py
+++ b/cli/expert/commands/validate.py
@@ -9,7 +9,8 @@
 from app.schema import AgentSchema
 from pydantic import ValidationError
 
-from ..ui import print_error, print_schema, print_success, print_warning
+from ..context import resolve as resolve_context
+from ..ui import print_error, print_info, print_schema, print_success, print_warning
 
 
 def _iter_matching_files(
@@ -30,16 +31,31 @@ def _iter_matching_files(
 
 
 def cmd(
+    agent: Annotated[
+        str | None,
+        typer.Option(
+            "--agent",
+            "-a",
+            help="Agent name (from expert.toml or sibling dirs). See `expert agents`.",
+        ),
+    ] = None,
     schema_path: Annotated[
-        Path,
-        typer.Option("--schema", "-s", help="Path to agent_schema.yaml."),
-    ] = Path("./agent_schema.yaml"),
+        Path | None,
+        typer.Option(
+            "--schema",
+            "-s",
+            help="Explicit path to agent_schema.yaml (bypasses workspace resolution).",
+        ),
+    ] = None,
 ) -> None:
     """Validate an agent schema and its referenced filesystem layout."""
-    schema_path = schema_path.resolve()
+    ctx = resolve_context(agent=agent, schema=schema_path)
+    schema_path = ctx.schema_path
     if not schema_path.is_file():
         print_error(f"schema file not found: {schema_path}")
         raise typer.Exit(code=1)
+    if ctx.selector_source not in ("single", "schema-flag"):
+        print_info(f"agent [cyan]{ctx.name}[/cyan] ({ctx.selector_source})")
 
     try:
         schema = AgentSchema.from_yaml(schema_path)
diff --git a/cli/expert/context.py b/cli/expert/context.py
new file mode 100644
index 0000000..ce56787
--- /dev/null
+++ b/cli/expert/context.py
@@ -0,0 +1,70 @@
+"""Shared helpers used by every command that needs an :class:`AgentContext`.
+
+Commands should call :func:`resolve` at their very top, forward flag-overrides
+in, and then read ``ctx.schema_path`` / ``ctx.endpoint`` / ``ctx.api_key``.
+
+This keeps the multi-agent resolution logic in one place — if we ever change
+precedence rules, every command picks it up automatically.
+"""
+
+from __future__ import annotations
+
+from dataclasses import replace
+from pathlib import Path
+
+import typer
+
+from .ui import print_error
+from .workspace import (
+    AgentContext,
+    AgentNotFoundError,
+    AmbiguousAgentError,
+    Workspace,
+    WorkspaceError,
+)
+
+
+def resolve(
+    *,
+    agent: str | None = None,
+    schema: Path | None = None,
+    endpoint: str | None = None,
+    api_key: str | None = None,
+    require_remote: bool = False,
+) -> AgentContext:
+    """Resolve an :class:`AgentContext` or abort the CLI with a helpful message.
+
+    Flag-level overrides take priority over workspace-derived values so that
+    scripts / CI can still force an endpoint or API key on a single run
+    without editing ``expert.toml``.
+
+    When ``require_remote`` is set, missing ``endpoint`` / ``api_key`` turn
+    into a non-zero exit instead of being silently ``None``.
+    """
+    ws = Workspace.discover()
+    try:
+        ctx = ws.resolve(selector=agent, schema_override=schema)
+    except (AgentNotFoundError, AmbiguousAgentError, WorkspaceError) as exc:
+        print_error(str(exc))
+        raise typer.Exit(code=1) from exc
+
+    # Flag overrides from the caller take precedence over anything the
+    # workspace resolver produced.
+    if endpoint or api_key:
+        ctx = replace(
+            ctx,
+            endpoint=endpoint or ctx.endpoint,
+            api_key=api_key or ctx.api_key,
+        )
+
+    if require_remote:
+        try:
+            ctx.require_remote()
+        except WorkspaceError as exc:
+            print_error(str(exc))
+            raise typer.Exit(code=2) from exc
+
+    return ctx
+
+
+__all__ = ["resolve"]
diff --git a/cli/expert/main.py b/cli/expert/main.py
index dc1779d..1f8e00c 100644
--- a/cli/expert/main.py
+++ b/cli/expert/main.py
@@ -1,18 +1,34 @@
-"""Top-level `typer` app for `expert`."""
+"""Top-level `typer` app for `expert`.
+
+The CLI is aware of *multi-agent workspaces*: a repo can host several
+`agent_schema.yaml` files and the user can target them individually via:
+
+- Explicit flag: ``expert ask --agent derm "..."``
+- Active pointer: ``expert use derm`` then ``expert ask "..."``
+- Positional shortcut: ``expert @derm ask "..."``
+
+The ``@alias`` form is handled **here** in the entrypoint via a small
+argv rewriter that runs before Typer parses its arguments. The rewriter
+turns ``expert @<name> <command> ...`` into
+``expert <command> --agent <name> ...`` so downstream commands just need
+to accept the standard ``--agent`` flag.
+"""
 
 from __future__ import annotations
 
+import sys
 from typing import Annotated
 
 import typer
 
-from . import __version__
+from .brand import render_brand
+from .commands import agents as agents_commands
 from .commands import ask, count_tokens, init, sessions, sync, test, validate
 from .ui import console
 
 app = typer.Typer(
     name="expert",
-    help="CLI for the **expert-agent** framework — scaffold, validate, sync, ask.",
+    help="ground a model on your docs. ship it as an API.",
     no_args_is_help=True,
     rich_markup_mode="markdown",
     add_completion=True,
@@ -21,10 +37,15 @@
 
 def _version_callback(value: bool) -> None:
     if value:
-        console.print(f"expert {__version__}")
+        render_brand(console, include_version=True)
         raise typer.Exit(code=0)
 
 
+def _brand_cmd() -> None:
+    """Print the expert brand block (wordmark + tagline + version)."""
+    render_brand(console, include_version=True)
+
+
 @app.callback()
 def _root(
     version: Annotated[
@@ -41,6 +62,55 @@ def _root(
     _ = version
 
 
+# Subcommands that accept `--agent`. Used by the @alias rewriter so that
+# nonsense like `expert @derm use my-expert` falls through to a useful error
+# instead of silently rewriting into `expert use my-expert --agent derm`.
+_AGENT_AWARE: frozenset[str] = frozenset(
+    {"ask", "validate", "count-tokens", "sync", "test", "sessions", "which"}
+)
+
+
+def _rewrite_at_alias(argv: list[str]) -> list[str]:
+    """Expand a leading ``@<name>`` token into ``--agent <name>`` further right.
+
+    Examples::
+
+        expert @my-expert ask "hi"  → expert ask "hi" --agent my-expert
+        expert @derm sessions list  → expert sessions list --agent derm
+        expert @my-expert           → expert agents --agent my-expert (listing mode)
+
+    Safe no-ops:
+
+    - ``@`` in argv[1] that isn't the immediate prefix to a known
+      agent-aware subcommand is left alone (so ``expert @derm use foo``
+      is *not* silently rewritten).
+    - Options like ``--foo=@bar`` are never touched because we only look at
+      ``argv[1]``.
+    """
+    if len(argv) < 2 or not argv[1].startswith("@") or len(argv[1]) < 2:
+        return argv
+    if argv[1] in ("@-", "@"):
+        return argv
+    alias = argv[1][1:]
+    rest = argv[2:]
+
+    subcommand_idx: int | None = None
+    for idx, token in enumerate(rest):
+        if not token.startswith("-"):
+            subcommand_idx = idx
+            break
+    if subcommand_idx is None or rest[subcommand_idx] not in _AGENT_AWARE:
+        # No agent-aware subcommand present: leave argv alone so Typer can
+        # render a useful error instead of rewriting into a wrong shape.
+        return argv
+
+    # Append `--agent <alias>` at the end so it flows through regardless of
+    # whether the subcommand is a leaf (`ask`) or a sub-Typer (`sessions
+    # list`). Typer happily routes the flag to the deepest command that
+    # declares it.
+    return [argv[0], *rest, "--agent", alias]
+
+
 app.command(name="init", help="Scaffold a new agent project.")(init.cmd)
 app.command(name="validate", help="Validate an agent_schema.yaml locally.")(validate.cmd)
 app.command(
@@ -54,7 +124,30 @@ def _root(
     name="test",
     help="Run the packaged Robot Framework E2E kit against the current agent.",
 )(test.cmd)
+app.command(
+    name="agents",
+    help="List agents known to this workspace.",
+)(agents_commands.agents_cmd)
+app.command(
+    name="use",
+    help="Pin an agent as the active one for this workspace.",
+)(agents_commands.use_cmd)
+app.command(
+    name="which",
+    help="Print which agent a bare command would resolve to.",
+)(agents_commands.which_cmd)
+app.command(
+    name="brand",
+    help="Print the expert wordmark + version (fun, mostly).",
+    hidden=True,
+)(_brand_cmd)
 
 
-if __name__ == "__main__":
+def main() -> None:
+    """Entry point that runs the ``@alias`` rewriter before dispatching."""
+    sys.argv = _rewrite_at_alias(sys.argv)
     app()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cli/expert/testkit/ExpertLibrary.py b/cli/expert/testkit/ExpertLibrary.py
index 807f9c9..c15ca17 100644
--- a/cli/expert/testkit/ExpertLibrary.py
+++ b/cli/expert/testkit/ExpertLibrary.py
@@ -57,11 +57,15 @@ def __init__(
     # ------------------------------------------------------------------
 
     @keyword("Run Expert CLI")
-    def run_cli(self, *args: str, expect_rc: int = 0, cwd: str | None = None) -> dict[str, Any]:
+    def run_cli(
+        self, *args: str, expect_rc: int | None = 0, cwd: str | None = None
+    ) -> dict[str, Any]:
         """Execute `expert <args>` and return `{rc, stdout, stderr, elapsed_ms}`.
 
         Fails if the exit code differs from ``expect_rc`` (use ``expect_rc=None``
-        to skip the check entirely).
+        to skip the check entirely). ``expect_rc`` is typed ``int | None`` so
+        Robot Framework's dynamic-argument converter accepts ``${None}`` from
+        suite files without trying to coerce it into ``int`` (which fails).
         """
         binary = shutil.which("expert")
         if binary is None:
diff --git a/cli/expert/ui.py b/cli/expert/ui.py
index f7fa89f..e4a58f6 100644
--- a/cli/expert/ui.py
+++ b/cli/expert/ui.py
@@ -1,8 +1,13 @@
 """Shared Rich helpers used across commands.
 
-The CLI follows a strict no-emoji policy in source code — visual cues come
-exclusively from Rich colors, icons (drawn via Unicode box/arrow characters
-that Rich supports) and markdown glyphs. No emoji characters are used.
+The CLI follows the visual identity shared with the author's other open-source
+tools (see ``feliperbroering/eai``):
+
+- No emoji characters. Visual cues come from Unicode box-drawing, arrows,
+  and restrained accent glyphs (``>``, ``✓``, ``✗``, ``⚠``, ``▶``).
+- Success / error / warning lines are prefixed with a single colored glyph,
+  not a shouted word in caps. Screen real estate is precious.
+- Rich colors are the accent; plain monospace is the norm.
 """
 
 from __future__ import annotations
@@ -21,65 +26,78 @@
 
 
 def print_error(msg: str) -> None:
-    """Render an error line in bold red prefixed with `ERROR`."""
-    console.print(f"[bold red]ERROR[/bold red] {msg}")
+    """Render an error line: dim red cross + message."""
+    console.print(f"[red]✗[/red] {msg}", highlight=False)
 
 
 def print_success(msg: str) -> None:
-    """Render a success line in green prefixed with a check mark."""
-    console.print(f"[bold green]OK[/bold green] {msg}")
+    """Render a success line: green check + message."""
+    console.print(f"[green]✓[/green] {msg}", highlight=False)
 
 
 def print_warning(msg: str) -> None:
-    """Render a warning line in yellow prefixed with `WARN`."""
-    console.print(f"[bold yellow]WARN[/bold yellow] {msg}")
+    """Render a warning line: yellow warning glyph + message."""
+    console.print(f"[yellow]⚠[/yellow] {msg}", highlight=False)
 
 
 def print_info(msg: str) -> None:
-    """Render a neutral informational line."""
-    console.print(f"[bold cyan]INFO[/bold cyan] {msg}")
+    """Render a neutral informational line prefixed with a subtle chevron."""
+    console.print(f"[dim cyan]>[/dim cyan] {msg}", highlight=False)
+
+
+def print_hint(cmd: str, *, label: str = "try") -> None:
+    """Render a cyan-accented hint pointing the user at a command."""
+    console.print(f"  [dim]{label}:[/dim] [bold cyan]{cmd}[/bold cyan]", highlight=False)
+
+
+def print_step(current: int, total: int, msg: str) -> None:
+    """Render a numbered step in the classic ``[n/N]`` style."""
+    console.print(f"  [dim]\\[{current}/{total}][/dim] {msg}", highlight=False)
+
+
+def print_kv(label: str, value: str, *, width: int = 12) -> None:
+    """Render a dim ``label: value`` pair with consistent column alignment."""
+    console.print(f"  [dim]{label:<{width}}[/dim] {value}", highlight=False)
 
 
 def print_diff_table(diff: dict[str, Any]) -> None:
     """Render a sync diff using a Rich `Table`.
 
-    The expected input is a mapping such as:
+    Expected input::
 
-    ```python
-    {
-        "added":   [{"path": "docs/a.md", "sha": "abc1234", "size": 1024}, ...],
-        "updated": [...],
-        "removed": [{"path": "docs/old.md", "sha": "def4567", "size": 512}, ...],
-    }
-    ```
+        {
+            "added":   [{"path": "docs/a.md", "sha": "abc1234", "size": 1024}, ...],
+            "updated": [...],
+            "removed": [{"path": "docs/old.md", "sha": "def4567", "size": 512}, ...],
+        }
     """
-    table = Table(title="Sync diff", show_lines=False)
-    table.add_column("Action", style="bold", no_wrap=True)
+    table = Table(title="Sync diff", show_lines=False, border_style="dim")
+    table.add_column("", width=1, no_wrap=True)
     table.add_column("Path", overflow="fold")
-    table.add_column("SHA", no_wrap=True)
-    table.add_column("Size", justify="right", no_wrap=True)
+    table.add_column("SHA", no_wrap=True, style="dim")
+    table.add_column("Size", justify="right", no_wrap=True, style="dim")
 
-    actions: list[tuple[str, str, list[dict[str, Any]]]] = [
-        ("added", "green", list(diff.get("added", []) or [])),
-        ("updated", "yellow", list(diff.get("updated", []) or [])),
-        ("removed", "red", list(diff.get("removed", []) or [])),
+    actions: list[tuple[str, str, str, list[dict[str, Any]]]] = [
+        ("+", "green", "added", list(diff.get("added", []) or [])),
+        ("~", "yellow", "updated", list(diff.get("updated", []) or [])),
+        ("-", "red", "removed", list(diff.get("removed", []) or [])),
     ]
     total = 0
-    for action, color, entries in actions:
+    for glyph, color, _name, entries in actions:
         for entry in entries:
             total += 1
             sha = str(entry.get("sha", ""))[:12]
             size = entry.get("size")
             size_str = _fmt_size(size) if isinstance(size, int) else "-"
             table.add_row(
-                f"[{color}]{action}[/{color}]",
+                f"[{color}]{glyph}[/{color}]",
                 str(entry.get("path", "")),
                 sha,
                 size_str,
             )
 
     if total == 0:
-        console.print("[dim]No changes — remote is in sync with local.[/dim]")
+        console.print("[dim]  no changes — remote is in sync with local[/dim]")
         return
     console.print(table)
 
@@ -90,44 +108,44 @@ def print_schema(schema: AgentSchema) -> None:
     spec = schema.spec
     tree = Tree(f"[bold]{meta.name}[/bold] [dim]v{meta.version}[/dim]")
     if meta.description:
-        tree.add(f"[italic]{meta.description}[/italic]")
+        tree.add(f"[italic dim]{meta.description}[/italic dim]")
 
     model = tree.add("[bold]model[/bold]")
-    model.add(f"provider: [cyan]{spec.model.provider}[/cyan]")
-    model.add(f"name: [cyan]{spec.model.name}[/cyan]")
-    model.add(f"temperature: {spec.model.temperature}")
-    model.add(f"max_output_tokens: {spec.model.max_output_tokens}")
+    model.add(f"[dim]provider[/dim]  {spec.model.provider}")
+    model.add(f"[dim]name[/dim]      {spec.model.name}")
+    model.add(f"[dim]temp[/dim]      {spec.model.temperature}")
+    model.add(f"[dim]max_out[/dim]   {spec.model.max_output_tokens}")
 
     identity = tree.add("[bold]identity[/bold]")
     if spec.identity.system_prompt_file is not None:
-        identity.add(f"system_prompt_file: [cyan]{spec.identity.system_prompt_file}[/cyan]")
+        identity.add(f"[dim]file[/dim]      {spec.identity.system_prompt_file}")
     if spec.identity.system_prompt is not None:
         preview = spec.identity.system_prompt[:60].replace("\n", " ")
-        identity.add(f"system_prompt: [cyan]{preview}...[/cyan]")
+        identity.add(f"[dim]inline[/dim]    {preview}…")
 
     knowledge = tree.add("[bold]knowledge[/bold]")
-    knowledge.add(f"reference_docs_dir: [cyan]{spec.knowledge.reference_docs_dir}[/cyan]")
-    knowledge.add(f"include_patterns: {spec.knowledge.include_patterns}")
-    knowledge.add(f"exclude_patterns: {spec.knowledge.exclude_patterns}")
+    knowledge.add(f"[dim]docs_dir[/dim]  {spec.knowledge.reference_docs_dir}")
+    knowledge.add(f"[dim]include[/dim]   {spec.knowledge.include_patterns}")
+    knowledge.add(f"[dim]exclude[/dim]   {spec.knowledge.exclude_patterns}")
 
     cache = tree.add("[bold]context_cache[/bold]")
-    cache.add(f"enabled: {spec.context_cache.enabled}")
-    cache.add(f"ttl_seconds: {spec.context_cache.ttl_seconds}")
+    cache.add(f"[dim]enabled[/dim]   {spec.context_cache.enabled}")
+    cache.add(f"[dim]ttl[/dim]       {spec.context_cache.ttl_seconds}s")
 
     memory = tree.add("[bold]memory[/bold]")
-    memory.add(f"short_term.buffer_size: {spec.memory.short_term.buffer_size}")
-    memory.add(f"long_term.enabled: {spec.memory.long_term.enabled}")
-    memory.add(f"long_term.persistence.type: {spec.memory.long_term.persistence.type}")
+    memory.add(f"[dim]short_buf[/dim] {spec.memory.short_term.buffer_size}")
+    memory.add(f"[dim]long_on[/dim]   {spec.memory.long_term.enabled}")
+    memory.add(f"[dim]store[/dim]     {spec.memory.long_term.persistence.type}")
 
     grounding = tree.add("[bold]grounding[/bold]")
-    grounding.add(f"enabled: {spec.grounding.enabled}")
-    grounding.add(f"max_citations: {spec.grounding.max_citations}")
+    grounding.add(f"[dim]enabled[/dim]   {spec.grounding.enabled}")
+    grounding.add(f"[dim]max_cite[/dim]  {spec.grounding.max_citations}")
 
     rate = tree.add("[bold]rate_limit[/bold]")
-    rate.add(f"requests_per_minute: {spec.rate_limit.requests_per_minute}")
-    rate.add(f"tokens_per_day: {spec.rate_limit.tokens_per_day}")
+    rate.add(f"[dim]rpm[/dim]       {spec.rate_limit.requests_per_minute}")
+    rate.add(f"[dim]tpd[/dim]       {spec.rate_limit.tokens_per_day}")
 
-    console.print(Panel(tree, title="Agent schema", border_style="cyan"))
+    console.print(Panel(tree, title="agent schema", border_style="cyan", title_align="left"))
 
 
 def _fmt_size(num: int) -> str:
@@ -143,8 +161,11 @@ def _fmt_size(num: int) -> str:
     "console",
     "print_diff_table",
     "print_error",
+    "print_hint",
     "print_info",
+    "print_kv",
     "print_schema",
+    "print_step",
     "print_success",
     "print_warning",
 ]
diff --git a/cli/expert/workspace.py b/cli/expert/workspace.py
new file mode 100644
index 0000000..2b48511
--- /dev/null
+++ b/cli/expert/workspace.py
@@ -0,0 +1,460 @@
+"""Multi-agent workspace: discovery, `expert.toml`, and active-agent state.
+
+A *workspace* is the repository (or subtree) that hosts one or more agent
+schemas. The CLI supports three equivalent ways of pointing a command at a
+specific agent inside a multi-agent workspace:
+
+1. **Explicit flag** — ``expert ask --agent derm "hi"``.
+2. **Positional `@alias`** — ``expert @derm ask "hi"`` (intercepted in
+   ``main.py`` and rewritten into the flag above, transparently).
+3. **Active pointer** — ``expert use derm`` persists a pointer in
+   ``.expert/state.json`` so subsequent ``expert ask "..."`` calls in that
+   cwd stay on ``derm`` until the user runs ``expert use`` again.
+
+When none of these disambiguate an unambiguous single agent, commands raise
+:class:`AmbiguousAgentError` with a helpful message listing the candidates.
+
+## Discovery
+
+Workspace detection walks up from ``cwd`` looking for the first parent that
+contains **any** of these markers:
+
+- ``expert.toml`` (explicit, strongest signal — anchors the workspace).
+- ``.expert/state.json`` (previously `expert use`-d directory).
+- a sibling pattern of ``*/agent_schema.yaml`` (multi-agent repo by
+  convention).
+
+If none is found the workspace defaults to a *single-agent* mode rooted at
+cwd, preserving the historical behaviour (``./agent_schema.yaml``).
+
+## ``expert.toml`` schema
+
+```toml
+# Optional per-workspace defaults.
+[defaults]
+agent = "my-expert"  # Default agent when no flag / active pointer is set.
+
+# One section per agent. The key becomes the canonical name.
+[agents.my-expert]
+schema      = "my-expert/agent_schema.yaml"      # Required. Relative to this file.
+endpoint    = "https://my-expert-xxx.a.run.app"  # Optional override.
+api_key_env = "MY_EXPERT_ADMIN_KEY"              # Optional. Takes precedence over api_key.
+api_key     = "..."                              # Optional, discouraged (use env).
+description = "Primary expert agent (free-form)."  # Optional.
+
+[agents.derm]
+schema = "derm-expert/agent_schema.yaml"
+```
+
+Any agent that is **auto-discovered** via ``*/agent_schema.yaml`` but not
+explicitly declared in ``expert.toml`` is still selectable by its directory
+name, and inherits endpoint/api_key from the global ``EXPERT_AGENT_*`` env
+vars.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import tomllib
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+_STATE_DIR = ".expert"
+_STATE_FILE = "state.json"
+_WORKSPACE_FILE = "expert.toml"
+_SCHEMA_FILENAME = "agent_schema.yaml"
+_DISCOVERY_MAX_DEPTH = 3
+_ENV_ACTIVE_AGENT = "EXPERT_AGENT"
+
+
+class WorkspaceError(RuntimeError):
+    """Base for workspace-related errors. Carries an exit-code hint."""
+
+    exit_code: int = 1
+
+
+class AgentNotFoundError(WorkspaceError):
+    """Raised when the caller names an agent that doesn't exist in the workspace."""
+
+
+class AmbiguousAgentError(WorkspaceError):
+    """Raised when a selector matches zero, or more than one, agents.
+
+    ``candidates`` holds every known agent so callers can render a helpful
+    prompt/error with the available options.
+    """
+
+    def __init__(self, message: str, *, candidates: list[AgentInfo]) -> None:
+        super().__init__(message)
+        self.candidates = candidates
+
+
+@dataclass(frozen=True)
+class AgentInfo:
+    """Metadata about an agent known to the workspace (pre-resolution)."""
+
+    name: str
+    schema_path: Path
+    endpoint: str | None = None
+    api_key: str | None = None
+    description: str | None = None
+    # "toml" — declared in expert.toml; "auto" — discovered by convention.
+    source: str = "auto"
+
+
+@dataclass(frozen=True)
+class AgentContext:
+    """Fully-resolved agent context a command can rely on.
+
+    ``api_key`` / ``endpoint`` may still be ``None`` if the agent is offline
+    (e.g. for ``expert validate`` which only needs the schema). Commands that
+    require remote access should call :meth:`require_remote` instead of
+    reading the fields directly.
+    """
+
+    name: str
+    schema_path: Path
+    endpoint: str | None
+    api_key: str | None
+    description: str | None
+    selector_source: str  # "flag", "@alias", "active", "env", "default", "auto", "single"
+
+    def require_remote(self) -> tuple[str, str]:
+        """Return ``(endpoint, api_key)``, raising a user-friendly error if missing."""
+        if not self.endpoint or not self.api_key:
+            raise WorkspaceError(
+                f"Agent '{self.name}' has no endpoint/api_key configured. "
+                "Set EXPERT_AGENT_ENDPOINT + EXPERT_AGENT_API_KEY, or declare "
+                "them in expert.toml under [agents."
+                f"{self.name}]."
+            )
+        return self.endpoint.rstrip("/"), self.api_key
+
+
+@dataclass
+class Workspace:
+    """Discovered multi-agent workspace rooted at ``root``."""
+
+    root: Path
+    agents_by_name: dict[str, AgentInfo] = field(default_factory=dict)
+    default_agent: str | None = None
+    # True when no expert.toml AND no sibling schemas found — legacy single-agent mode.
+    single_agent_mode: bool = False
+
+    @classmethod
+    def discover(cls, *, cwd: Path | None = None) -> Workspace:
+        """Discover the workspace rooted at (or above) ``cwd``."""
+        start = (cwd or Path.cwd()).resolve()
+        root, toml_path = _find_workspace_root(start)
+        ws = cls(root=root)
+
+        if toml_path is not None:
+            ws._load_toml(toml_path)
+
+        # Auto-discover siblings regardless of whether a TOML exists — the TOML
+        # only adds aliases/metadata, it doesn't preclude extra agents shipped
+        # in sibling dirs.
+        ws._discover_siblings()
+
+        if not ws.agents_by_name:
+            # Legacy single-agent mode: one schema next to the user's cwd.
+            local = start / _SCHEMA_FILENAME
+            if local.is_file():
+                ws.agents_by_name["."] = AgentInfo(
+                    name=".",
+                    schema_path=local,
+                    source="single",
+                )
+                ws.single_agent_mode = True
+
+        return ws
+
+    # --------------------------- TOML loading --------------------------- #
+
+    def _load_toml(self, path: Path) -> None:
+        try:
+            with path.open("rb") as fh:
+                raw = tomllib.load(fh)
+        except (OSError, tomllib.TOMLDecodeError) as exc:  # pragma: no cover - rare
+            raise WorkspaceError(f"failed to parse {path}: {exc}") from exc
+
+        defaults = raw.get("defaults") if isinstance(raw.get("defaults"), dict) else {}
+        default_name = defaults.get("agent") if isinstance(defaults, dict) else None
+        if isinstance(default_name, str):
+            self.default_agent = default_name
+
+        agents_section = raw.get("agents") if isinstance(raw.get("agents"), dict) else {}
+        if not isinstance(agents_section, dict):
+            return
+
+        for name, body in agents_section.items():
+            if not isinstance(name, str) or not isinstance(body, dict):
+                continue
+            schema_rel = body.get("schema")
+            if not isinstance(schema_rel, str) or not schema_rel:
+                raise WorkspaceError(f"expert.toml: agent '{name}' is missing a 'schema' field.")
+            schema_abs = (path.parent / schema_rel).resolve()
+            api_key = _resolve_api_key(body)
+            self.agents_by_name[name] = AgentInfo(
+                name=name,
+                schema_path=schema_abs,
+                endpoint=_opt_str(body.get("endpoint")),
+                api_key=api_key,
+                description=_opt_str(body.get("description")),
+                source="toml",
+            )
+
+    # --------------------------- Auto-discovery ------------------------- #
+
+    def _discover_siblings(self) -> None:
+        """Walk immediate children of ``root`` for ``*/agent_schema.yaml``."""
+        if not self.root.is_dir():
+            return
+        for child in sorted(self.root.iterdir()):
+            if not child.is_dir() or child.name.startswith("."):
+                continue
+            schema = child / _SCHEMA_FILENAME
+            if not schema.is_file():
+                continue
+            # Skip if already declared via TOML under a different key — the
+            # TOML entry is authoritative for that schema.
+            if any(info.schema_path == schema for info in self.agents_by_name.values()):
+                continue
+            # Skip if the directory name collides with a declared TOML name;
+            # declared ones win.
+            if child.name in self.agents_by_name:
+                continue
+            self.agents_by_name[child.name] = AgentInfo(
+                name=child.name,
+                schema_path=schema,
+                source="auto",
+            )
+
+    # --------------------------- State file ----------------------------- #
+
+    @property
+    def state_file(self) -> Path:
+        return self.root / _STATE_DIR / _STATE_FILE
+
+    def active(self) -> str | None:
+        """Return the agent name pinned via ``expert use``, if any."""
+        path = self.state_file
+        if not path.is_file():
+            return None
+        try:
+            data = json.loads(path.read_text())
+        except (OSError, json.JSONDecodeError):
+            return None
+        name = data.get("agent") if isinstance(data, dict) else None
+        return name if isinstance(name, str) else None
+
+    def set_active(self, name: str) -> None:
+        if name not in self.agents_by_name:
+            raise AgentNotFoundError(
+                f"Unknown agent '{name}'. Run `expert agents` to list candidates."
+            )
+        path = self.state_file
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(json.dumps({"agent": name}, indent=2) + "\n")
+
+    def clear_active(self) -> None:
+        path = self.state_file
+        if path.is_file():
+            path.unlink()
+
+    # --------------------------- Listing -------------------------------- #
+
+    def agents(self) -> list[AgentInfo]:
+        return sorted(self.agents_by_name.values(), key=lambda a: a.name)
+
+    # --------------------------- Resolution ----------------------------- #
+
+    def resolve(
+        self,
+        selector: str | None = None,
+        *,
+        env: dict[str, str] | None = None,
+        schema_override: Path | None = None,
+    ) -> AgentContext:
+        """Return a fully-resolved :class:`AgentContext`.
+
+        Resolution order (first match wins):
+
+        1. Explicit ``selector`` (from ``--agent`` or ``@alias``).
+        2. ``EXPERT_AGENT`` env var.
+        3. ``.expert/state.json`` (set by ``expert use``).
+        4. ``[defaults] agent = "..."`` in ``expert.toml``.
+        5. Exactly-one-agent short-circuit.
+        6. ``schema_override`` (``--schema`` flag, purely file-based fallback).
+
+        Fails with :class:`AmbiguousAgentError` otherwise.
+        """
+        env = env if env is not None else dict(os.environ)
+        source: str
+        name: str | None = None
+
+        # An explicit --schema path short-circuits resolution entirely:
+        # the caller is telling us "use this file, don't touch the
+        # workspace". This mirrors the pre-multi-agent CLI behaviour.
+        if schema_override is not None and selector is None:
+            return AgentContext(
+                name=schema_override.parent.name or ".",
+                schema_path=schema_override.resolve(),
+                endpoint=env.get("EXPERT_AGENT_ENDPOINT"),
+                api_key=env.get("EXPERT_AGENT_API_KEY"),
+                description=None,
+                selector_source="schema-flag",
+            )
+
+        if selector:
+            name, source = self._match(selector), "flag"
+        elif env.get(_ENV_ACTIVE_AGENT):
+            name, source = self._match(env[_ENV_ACTIVE_AGENT]), "env"
+        elif (pinned := self.active()) is not None:
+            name, source = self._match(pinned), "active"
+        elif self.default_agent is not None:
+            name, source = self._match(self.default_agent), "default"
+        elif len(self.agents_by_name) == 1:
+            name, source = (
+                next(iter(self.agents_by_name)),
+                ("single" if self.single_agent_mode else "auto"),
+            )
+
+        if name is None:
+            raise AmbiguousAgentError(
+                self._ambiguity_message(selector, env),
+                candidates=self.agents(),
+            )
+
+        info = self.agents_by_name[name]
+        endpoint = info.endpoint or env.get("EXPERT_AGENT_ENDPOINT")
+        api_key = info.api_key or env.get("EXPERT_AGENT_API_KEY")
+        schema_path = schema_override.resolve() if schema_override else info.schema_path
+        return AgentContext(
+            name=info.name,
+            schema_path=schema_path,
+            endpoint=endpoint,
+            api_key=api_key,
+            description=info.description,
+            selector_source=source,
+        )
+
+    # --------------------------- Internals ------------------------------ #
+
+    def _match(self, selector: str) -> str:
+        """Resolve an agent selector (exact name or unique prefix).
+
+        Accepts and strips a leading ``@`` so that the same helper can back
+        both ``--agent derm`` and ``@derm`` transparently.
+        """
+        if not selector:
+            raise AgentNotFoundError("empty agent selector")
+        needle = selector.lstrip("@")
+        if needle in self.agents_by_name:
+            return needle
+        matches = [n for n in self.agents_by_name if n.startswith(needle)]
+        if len(matches) == 1:
+            return matches[0]
+        if not matches:
+            raise AgentNotFoundError(
+                f"No agent named '{needle}'. "
+                f"Available: {', '.join(sorted(self.agents_by_name)) or '(none)'}."
+            )
+        raise AmbiguousAgentError(
+            f"Prefix '{needle}' is ambiguous — matches: {', '.join(sorted(matches))}. "
+            "Use the full name or a longer prefix.",
+            candidates=[self.agents_by_name[m] for m in matches],
+        )
+
+    def _ambiguity_message(self, selector: str | None, env: dict[str, str]) -> str:
+        if not self.agents_by_name:
+            return (
+                "No agent_schema.yaml found in this workspace. "
+                "Run `expert init <name>` to scaffold one, or pass "
+                "--schema explicitly."
+            )
+        lines = [
+            "Multiple agents found in this workspace and no selector was given.",
+            "",
+            "Candidates:",
+        ]
+        for info in self.agents():
+            rel = _safe_relpath(info.schema_path, self.root)
+            badge = "[toml]" if info.source == "toml" else "[auto]"
+            lines.append(f"  • {info.name:<20} {rel}  {badge}")
+        lines.extend(
+            [
+                "",
+                "Pick one, in order of preference:",
+                "  expert @<name> <command>          # one-off shortcut",
+                "  expert <command> --agent <name>   # explicit flag (CI-friendly)",
+                "  expert use <name>                 # pin for this workspace",
+            ]
+        )
+        _ = selector, env
+        return "\n".join(lines)
+
+
+# ------------------------------------------------------------------------- #
+# Helpers
+# ------------------------------------------------------------------------- #
+
+
+def _find_workspace_root(start: Path) -> tuple[Path, Path | None]:
+    """Walk up from ``start`` to find a workspace root + optional TOML path.
+
+    Returns ``(root, toml_path)`` where ``toml_path`` may be ``None``. The
+    ``root`` is:
+
+    - The first ancestor containing ``expert.toml`` (authoritative marker).
+    - Else the first ancestor containing ``.expert/state.json`` (previously
+      pinned via ``expert use``).
+    - Else ``start`` itself. Sibling-schema discovery is always rooted at
+      ``start`` — we never silently promote an unrelated ancestor to
+      ``root`` just because it happens to have other agent directories
+      lying around.
+    """
+    current = start
+    for _ in range(_DISCOVERY_MAX_DEPTH + 1):
+        toml = current / _WORKSPACE_FILE
+        if toml.is_file():
+            return current, toml
+        if (current / _STATE_DIR / _STATE_FILE).is_file():
+            return current, None
+        if current.parent == current:
+            break
+        current = current.parent
+    return start, None
+
+
+def _opt_str(value: Any) -> str | None:
+    return value if isinstance(value, str) and value else None
+
+
+def _resolve_api_key(body: dict[str, Any]) -> str | None:
+    env_var = body.get("api_key_env")
+    if isinstance(env_var, str) and env_var:
+        env_value = os.environ.get(env_var)
+        if env_value:
+            return env_value
+    raw = body.get("api_key")
+    return raw if isinstance(raw, str) and raw else None
+
+
+def _safe_relpath(path: Path, base: Path) -> str:
+    try:
+        return str(path.relative_to(base))
+    except ValueError:
+        return str(path)
+
+
+__all__ = [
+    "AgentContext",
+    "AgentInfo",
+    "AgentNotFoundError",
+    "AmbiguousAgentError",
+    "Workspace",
+    "WorkspaceError",
+]
diff --git a/cli/tests/test_brand.py b/cli/tests/test_brand.py
new file mode 100644
index 0000000..2737fe0
--- /dev/null
+++ b/cli/tests/test_brand.py
@@ -0,0 +1,34 @@
+"""Smoke tests for the ASCII brand + the ``expert brand`` / ``--version`` paths."""
+
+from expert.main import app
+from typer.testing import CliRunner
+
+
+def test_brand_command_prints_wordmark_and_tagline() -> None:
+    runner = CliRunner()
+    result = runner.invoke(app, ["brand"])
+    assert result.exit_code == 0, result.output
+    # Wordmark: one row of the ANSI-shadow figlet should always be present.
+    assert "███████╗" in result.output
+    # Tagline + knowledge glyph box.
+    assert "ground a model on your docs" in result.output
+    assert "╭───╮" in result.output
+    # Version footer.
+    assert "MIT" in result.output
+    assert "github.com/feliperbroering/expert-agent" in result.output
+
+
+def test_version_flag_renders_brand() -> None:
+    runner = CliRunner()
+    result = runner.invoke(app, ["--version"])
+    assert result.exit_code == 0, result.output
+    assert "expert" in result.output
+    assert "███████╗" in result.output
+
+
+def test_brand_command_is_hidden_in_help() -> None:
+    runner = CliRunner()
+    result = runner.invoke(app, ["--help"])
+    assert result.exit_code == 0, result.output
+    # `brand` is a hidden easter-egg command; it must not pollute --help output.
+    assert "brand" not in result.output.split("Commands")[-1]
diff --git a/cli/tests/test_count_tokens.py b/cli/tests/test_count_tokens.py
index 2f6fd72..cda7208 100644
--- a/cli/tests/test_count_tokens.py
+++ b/cli/tests/test_count_tokens.py
@@ -64,7 +64,7 @@ async def fake_count(*, model: str, contents: Any) -> Any:
             "--gemini-api-key",
             "test-key",
             "--model",
-            "gemini-2.0-flash-exp",
+            "gemini-2.5-flash",
         ],
     )
     assert result.exit_code == 0, result.output
diff --git a/cli/tests/test_init.py b/cli/tests/test_init.py
index ef112be..77e8fa3 100644
--- a/cli/tests/test_init.py
+++ b/cli/tests/test_init.py
@@ -59,6 +59,9 @@ def test_init_rejects_invalid_name(tmp_path: Path) -> None:
         input="Invalid_Name\nvalid-name\nDescription.\n",
     )
     assert result.exit_code == 0, result.output
-    assert "ERROR" in result.output
+    # Invalid name is rejected with a visible error before the retry accepts
+    # the valid one. We check for the (stable) regex contract rather than the
+    # transient glyph/color-coded prefix.
+    assert "name must match" in result.output
     schema = AgentSchema.from_yaml(dest / "agent_schema.yaml")
     assert schema.metadata.name == "valid-name"
diff --git a/cli/tests/test_main_alias.py b/cli/tests/test_main_alias.py
new file mode 100644
index 0000000..6ea12f4
--- /dev/null
+++ b/cli/tests/test_main_alias.py
@@ -0,0 +1,183 @@
+"""Tests for the `@alias` argv rewriter and workspace-aware commands."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+from expert.main import _rewrite_at_alias, app
+from typer.testing import CliRunner
+
+
+def test_rewrite_at_alias_for_agent_aware_subcommand() -> None:
+    argv = ["expert", "@my-expert", "ask", "hi", "--no-stream"]
+    assert _rewrite_at_alias(argv) == [
+        "expert",
+        "ask",
+        "hi",
+        "--no-stream",
+        "--agent",
+        "my-expert",
+    ]
+
+
+def test_rewrite_at_alias_preserves_nested_subcommand() -> None:
+    argv = ["expert", "@derm", "sessions", "list"]
+    assert _rewrite_at_alias(argv) == [
+        "expert",
+        "sessions",
+        "list",
+        "--agent",
+        "derm",
+    ]
+
+
+def test_rewrite_at_alias_no_rewrite_for_non_agent_command() -> None:
+    """`use`/`agents` aren't in the allow-list; argv is returned unchanged."""
+    argv = ["expert", "@my-expert", "use", "my-expert"]
+    assert _rewrite_at_alias(argv) == argv
+
+
+def test_rewrite_at_alias_no_arg_after() -> None:
+    """`expert @my-expert` with nothing else is left alone (Typer will show help)."""
+    argv = ["expert", "@my-expert"]
+    assert _rewrite_at_alias(argv) == argv
+
+
+def test_rewrite_ignores_dashed_tokens_between_alias_and_subcommand() -> None:
+    argv = ["expert", "@my-expert", "--verbose", "validate"]
+    assert _rewrite_at_alias(argv) == [
+        "expert",
+        "--verbose",
+        "validate",
+        "--agent",
+        "my-expert",
+    ]
+
+
+def test_rewrite_appends_agent_at_end_for_sessions_list() -> None:
+    """Appending at the end routes the flag to the deepest sub-Typer."""
+    argv = ["expert", "@derm", "sessions", "list", "--user", "u1"]
+    assert _rewrite_at_alias(argv) == [
+        "expert",
+        "sessions",
+        "list",
+        "--user",
+        "u1",
+        "--agent",
+        "derm",
+    ]
+
+
+def test_rewrite_handles_empty_alias() -> None:
+    argv = ["expert", "@", "ask", "hi"]
+    # Too short — should no-op rather than misinterpret.
+    assert _rewrite_at_alias(argv) == argv
+
+
+# ------------------------------------------------------------------------- #
+# Integration: workspace-aware commands
+# ------------------------------------------------------------------------- #
+
+
+def _seed(tmp_path: Path) -> Path:
+    (tmp_path / "my-expert").mkdir()
+    (tmp_path / "derm").mkdir()
+    (tmp_path / "my-expert" / "agent_schema.yaml").write_text("x")
+    (tmp_path / "derm" / "agent_schema.yaml").write_text("x")
+    (tmp_path / "expert.toml").write_text(
+        '[defaults]\nagent = "my-expert"\n\n'
+        '[agents.my-expert]\nschema = "my-expert/agent_schema.yaml"\n'
+        'endpoint = "https://my-expert.example"\napi_key = "sk-test"\n\n'
+        '[agents.derm]\nschema = "derm/agent_schema.yaml"\n',
+    )
+    return tmp_path
+
+
+def test_agents_command_lists(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    _seed(tmp_path)
+    monkeypatch.chdir(tmp_path)
+    runner = CliRunner()
+    result = runner.invoke(app, ["agents"])
+    assert result.exit_code == 0, result.output
+    assert "my-expert" in result.output
+    assert "derm" in result.output
+
+
+def test_which_uses_toml_default(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    _seed(tmp_path)
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.delenv("EXPERT_AGENT", raising=False)
+    monkeypatch.delenv("EXPERT_AGENT_ENDPOINT", raising=False)
+    runner = CliRunner()
+    result = runner.invoke(app, ["which"])
+    assert result.exit_code == 0, result.output
+    assert "my-expert" in result.output
+    assert "default" in result.output
+
+
+def test_use_then_which(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    _seed(tmp_path)
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.delenv("EXPERT_AGENT", raising=False)
+    runner = CliRunner()
+
+    res = runner.invoke(app, ["use", "derm"])
+    assert res.exit_code == 0, res.output
+
+    res = runner.invoke(app, ["which"])
+    assert res.exit_code == 0, res.output
+    assert "derm" in res.output
+    assert "active" in res.output
+
+
+def test_which_with_agent_flag_overrides_pin(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    _seed(tmp_path)
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.delenv("EXPERT_AGENT", raising=False)
+    runner = CliRunner()
+    runner.invoke(app, ["use", "derm"])
+    res = runner.invoke(app, ["which", "--agent", "my-expert"])
+    assert res.exit_code == 0, res.output
+    assert "my-expert" in res.output
+    assert "flag" in res.output
+
+
+def test_use_clear(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    _seed(tmp_path)
+    monkeypatch.chdir(tmp_path)
+    runner = CliRunner()
+    runner.invoke(app, ["use", "derm"])
+    assert (tmp_path / ".expert" / "state.json").is_file()
+
+    res = runner.invoke(app, ["use", "--clear"])
+    assert res.exit_code == 0, res.output
+    assert not (tmp_path / ".expert" / "state.json").is_file()
+
+
+def test_use_unknown_agent(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+    _seed(tmp_path)
+    monkeypatch.chdir(tmp_path)
+    runner = CliRunner()
+    res = runner.invoke(app, ["use", "does-not-exist"])
+    assert res.exit_code != 0
+
+
+def test_ambiguous_workspace_shows_helpful_error(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """Two auto-discovered agents, no selector → helpful multi-line error."""
+    (tmp_path / "a").mkdir()
+    (tmp_path / "b").mkdir()
+    (tmp_path / "a" / "agent_schema.yaml").write_text("x")
+    (tmp_path / "b" / "agent_schema.yaml").write_text("x")
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.delenv("EXPERT_AGENT", raising=False)
+
+    runner = CliRunner()
+    res = runner.invoke(app, ["which"])
+    assert res.exit_code != 0
+    assert "expert @" in res.output or "@<name>" in res.output
+    assert "--agent" in res.output
diff --git a/cli/tests/test_workspace.py b/cli/tests/test_workspace.py
new file mode 100644
index 0000000..daa7e04
--- /dev/null
+++ b/cli/tests/test_workspace.py
@@ -0,0 +1,267 @@
+"""Tests for multi-agent workspace discovery and resolution."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+from expert.workspace import (
+    AgentNotFoundError,
+    AmbiguousAgentError,
+    Workspace,
+    WorkspaceError,
+)
+
+# ------------------------------------------------------------------------- #
+# Fixtures
+# ------------------------------------------------------------------------- #
+
+
+def _mk_schema(dir_: Path, name: str = "a") -> Path:
+    dir_.mkdir(parents=True, exist_ok=True)
+    f = dir_ / "agent_schema.yaml"
+    f.write_text(f"# dummy schema for {name}\n")
+    return f
+
+
+def _mk_workspace(
+    root: Path,
+    *,
+    agents: dict[str, dict[str, object]] | None = None,
+    default: str | None = None,
+) -> None:
+    """Create a workspace directory with optional expert.toml.
+
+    ``agents`` maps canonical names to dicts of ``schema``/``endpoint``/etc.
+    Schemas are materialised on disk relative to ``root``.
+    """
+    if agents is None:
+        return
+    lines: list[str] = []
+    if default:
+        lines.extend(["[defaults]", f'agent = "{default}"', ""])
+    for name, body in agents.items():
+        schema_rel = body.get("schema") or f"{name}/agent_schema.yaml"
+        assert isinstance(schema_rel, str)
+        _mk_schema(root / Path(schema_rel).parent, name=name)
+        lines.append(f"[agents.{name}]")
+        lines.append(f'schema = "{schema_rel}"')
+        for key in ("endpoint", "api_key", "api_key_env", "description"):
+            value = body.get(key)
+            if isinstance(value, str):
+                lines.append(f'{key} = "{value}"')
+        lines.append("")
+    (root / "expert.toml").write_text("\n".join(lines))
+
+
+# ------------------------------------------------------------------------- #
+# Discovery
+# ------------------------------------------------------------------------- #
+
+
+def test_single_agent_mode(tmp_path: Path) -> None:
+    _mk_schema(tmp_path)
+    ws = Workspace.discover(cwd=tmp_path)
+    assert ws.single_agent_mode is True
+    assert list(ws.agents_by_name) == ["."]
+
+    ctx = ws.resolve()
+    assert ctx.name == "."
+    assert ctx.selector_source == "single"
+
+
+def test_auto_discover_siblings(tmp_path: Path) -> None:
+    _mk_schema(tmp_path / "my-expert")
+    _mk_schema(tmp_path / "derm")
+    ws = Workspace.discover(cwd=tmp_path)
+    assert ws.single_agent_mode is False
+    assert set(ws.agents_by_name) == {"my-expert", "derm"}
+    assert all(info.source == "auto" for info in ws.agents())
+
+
+def test_toml_overrides_auto(tmp_path: Path) -> None:
+    _mk_workspace(
+        tmp_path,
+        agents={
+            "my-expert": {"schema": "my-expert/agent_schema.yaml", "endpoint": "https://my-expert"},
+            "derm": {"schema": "derm/agent_schema.yaml"},
+        },
+        default="my-expert",
+    )
+    ws = Workspace.discover(cwd=tmp_path)
+    assert ws.default_agent == "my-expert"
+    assert ws.agents_by_name["my-expert"].source == "toml"
+    assert ws.agents_by_name["my-expert"].endpoint == "https://my-expert"
+
+
+def test_toml_plus_sibling_not_declared(tmp_path: Path) -> None:
+    """Declared agents + undeclared siblings should coexist."""
+    _mk_workspace(
+        tmp_path,
+        agents={"my-expert": {"schema": "my-expert/agent_schema.yaml"}},
+    )
+    _mk_schema(tmp_path / "derm")
+    ws = Workspace.discover(cwd=tmp_path)
+    assert set(ws.agents_by_name) == {"my-expert", "derm"}
+    assert ws.agents_by_name["my-expert"].source == "toml"
+    assert ws.agents_by_name["derm"].source == "auto"
+
+
+# ------------------------------------------------------------------------- #
+# Resolution precedence
+# ------------------------------------------------------------------------- #
+
+
+def test_resolve_explicit_selector_wins(tmp_path: Path) -> None:
+    _mk_workspace(
+        tmp_path,
+        agents={"my-expert": {}, "derm": {}},
+        default="my-expert",
+    )
+    ws = Workspace.discover(cwd=tmp_path)
+    ws.set_active("my-expert")
+    ctx = ws.resolve(selector="derm")
+    assert ctx.name == "derm"
+    assert ctx.selector_source == "flag"
+
+
+def test_resolve_env_var(tmp_path: Path) -> None:
+    _mk_workspace(tmp_path, agents={"my-expert": {}, "derm": {}})
+    ws = Workspace.discover(cwd=tmp_path)
+    ctx = ws.resolve(env={"EXPERT_AGENT": "derm"})
+    assert ctx.name == "derm"
+    assert ctx.selector_source == "env"
+
+
+def test_resolve_active_pin(tmp_path: Path) -> None:
+    _mk_workspace(tmp_path, agents={"my-expert": {}, "derm": {}})
+    ws = Workspace.discover(cwd=tmp_path)
+    ws.set_active("derm")
+    ctx = ws.resolve(env={})
+    assert ctx.name == "derm"
+    assert ctx.selector_source == "active"
+
+
+def test_resolve_default_from_toml(tmp_path: Path) -> None:
+    _mk_workspace(tmp_path, agents={"my-expert": {}, "derm": {}}, default="my-expert")
+    ws = Workspace.discover(cwd=tmp_path)
+    ctx = ws.resolve(env={})
+    assert ctx.name == "my-expert"
+    assert ctx.selector_source == "default"
+
+
+def test_resolve_ambiguous(tmp_path: Path) -> None:
+    _mk_workspace(tmp_path, agents={"my-expert": {}, "derm": {}})
+    ws = Workspace.discover(cwd=tmp_path)
+    with pytest.raises(AmbiguousAgentError) as exc_info:
+        ws.resolve(env={})
+    assert "Multiple agents" in str(exc_info.value)
+    names = {c.name for c in exc_info.value.candidates}
+    assert names == {"my-expert", "derm"}
+
+
+def test_resolve_unique_prefix(tmp_path: Path) -> None:
+    _mk_workspace(tmp_path, agents={"my-expert": {}, "derm": {}})
+    ws = Workspace.discover(cwd=tmp_path)
+    ctx = ws.resolve(selector="my")
+    assert ctx.name == "my-expert"
+
+
+def test_resolve_ambiguous_prefix(tmp_path: Path) -> None:
+    _mk_workspace(tmp_path, agents={"my-expert": {}, "my-trainer": {}})
+    ws = Workspace.discover(cwd=tmp_path)
+    with pytest.raises(AmbiguousAgentError):
+        ws.resolve(selector="my")
+
+
+def test_resolve_unknown_selector(tmp_path: Path) -> None:
+    _mk_workspace(tmp_path, agents={"my-expert": {}})
+    ws = Workspace.discover(cwd=tmp_path)
+    with pytest.raises(AgentNotFoundError):
+        ws.resolve(selector="nope")
+
+
+def test_resolve_at_alias_prefix_strip(tmp_path: Path) -> None:
+    _mk_workspace(tmp_path, agents={"my-expert": {}, "derm": {}})
+    ws = Workspace.discover(cwd=tmp_path)
+    ctx = ws.resolve(selector="@my-expert")
+    assert ctx.name == "my-expert"
+
+
+def test_resolve_schema_override_bypasses_workspace(tmp_path: Path) -> None:
+    standalone = tmp_path / "orphan"
+    schema = _mk_schema(standalone)
+    # No workspace here — ensure the flag-based fallback works.
+    ws = Workspace.discover(cwd=tmp_path)
+    ctx = ws.resolve(schema_override=schema, env={})
+    assert ctx.schema_path == schema
+    assert ctx.selector_source == "schema-flag"
+
+
+# ------------------------------------------------------------------------- #
+# API key resolution
+# ------------------------------------------------------------------------- #
+
+
+def test_api_key_from_env_via_api_key_env(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setenv("MY_EXPERT_KEY", "sk-from-env")
+    _mk_workspace(
+        tmp_path,
+        agents={
+            "my-expert": {
+                "schema": "my-expert/agent_schema.yaml",
+                "api_key_env": "MY_EXPERT_KEY",
+            }
+        },
+    )
+    ws = Workspace.discover(cwd=tmp_path)
+    assert ws.agents_by_name["my-expert"].api_key == "sk-from-env"
+
+
+def test_env_endpoint_fills_when_toml_missing(tmp_path: Path) -> None:
+    _mk_workspace(tmp_path, agents={"my-expert": {}})
+    ws = Workspace.discover(cwd=tmp_path)
+    ctx = ws.resolve(env={"EXPERT_AGENT_ENDPOINT": "https://x", "EXPERT_AGENT_API_KEY": "k"})
+    assert ctx.endpoint == "https://x"
+    assert ctx.api_key == "k"
+
+
+def test_require_remote_raises_when_incomplete(tmp_path: Path) -> None:
+    _mk_workspace(tmp_path, agents={"my-expert": {}})
+    ws = Workspace.discover(cwd=tmp_path)
+    ctx = ws.resolve(env={})
+    with pytest.raises(WorkspaceError):
+        ctx.require_remote()
+
+
+# ------------------------------------------------------------------------- #
+# Pin state file
+# ------------------------------------------------------------------------- #
+
+
+def test_set_active_writes_state(tmp_path: Path) -> None:
+    _mk_workspace(tmp_path, agents={"my-expert": {}, "derm": {}})
+    ws = Workspace.discover(cwd=tmp_path)
+    ws.set_active("derm")
+    state = json.loads(ws.state_file.read_text())
+    assert state == {"agent": "derm"}
+    assert ws.active() == "derm"
+
+
+def test_clear_active(tmp_path: Path) -> None:
+    _mk_workspace(tmp_path, agents={"my-expert": {}})
+    ws = Workspace.discover(cwd=tmp_path)
+    ws.set_active("my-expert")
+    ws.clear_active()
+    assert ws.active() is None
+
+
+def test_set_active_rejects_unknown(tmp_path: Path) -> None:
+    _mk_workspace(tmp_path, agents={"my-expert": {}})
+    ws = Workspace.discover(cwd=tmp_path)
+    with pytest.raises(AgentNotFoundError):
+        ws.set_active("nope")
diff --git a/docs/AGENT_E2E_SETUP.md b/docs/AGENT_E2E_SETUP.md
index e161815..9f07fea 100644
--- a/docs/AGENT_E2E_SETUP.md
+++ b/docs/AGENT_E2E_SETUP.md
@@ -45,8 +45,16 @@ first or warn the user.
 - [ ] Repo settings → *Actions → General → Workflow permissions* allow
       reading from public actions (default).
 
-If the repo hosts **multiple agents** (a monorepo), each agent gets its own
-workflow file pointing at its own schema.
+If the repo hosts **multiple agents** (a monorepo), you have two options:
+
+1. **One workflow per agent** — each file pins a different `schema:` and a
+   different set of secrets. Recommended when the agents are owned by
+   different teams or deployed to different projects.
+2. **One workflow, matrix-over-agents** — declare an `expert.toml` at the
+   repo root and let `expert test` resolve each agent by name. See the
+   "matrix" snippet in [§6. Customising for your agent](#6-customising-for-your-agent).
+
+Both integrations share the same reusable workflow; only the caller changes.
 
 ---
 
@@ -128,9 +136,9 @@ jobs:
 
 | Placeholder                            | Concrete example                                                              | Notes                                                                                                              |
 |----------------------------------------|-------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------|
-| `<<agent-id>>`                         | `ecg-expert`                                                                  | Just for the workflow filename + display name.                                                                     |
-| `<<path/to/agent_schema.yaml>>`        | `ecg-expert/agent_schema.yaml`                                                | **Must be relative to the repo root.** Validated by the reusable workflow before running.                          |
-| `<<a question your agent should answer well>>` | `"Qual fórmula de correção do QTc a AHA recomenda como padrão?"` | Used by `05_ask_latency`. Pick something representative of real traffic.                                           |
+| `<<agent-id>>`                         | `my-expert`                                                                   | Just for the workflow filename + display name.                                                                     |
+| `<<path/to/agent_schema.yaml>>`        | `my-expert/agent_schema.yaml`                                                 | **Must be relative to the repo root.** Validated by the reusable workflow before running.                          |
+| `<<a question your agent should answer well>>` | `"What's a representative question for your agent?"`                | Used by `05_ask_latency`. Pick something representative of real traffic.                                           |
 | `<<ref>>`                              | `main`, `v0.1.1`, `v0.2.0`                                                    | Pin to a tag for stable runs (e.g. `v0.1.1`); use `main` only if you want to live on the bleeding edge.            |
 
 ### File naming convention
@@ -138,7 +146,7 @@ jobs:
 Use `e2e-<agent-id>.yml`. Examples:
 
 ```
-.github/workflows/e2e-ecg-expert.yml
+.github/workflows/e2e-my-expert.yml
 .github/workflows/e2e-derm-expert.yml
 .github/workflows/e2e-pharma-expert.yml
 ```
@@ -209,6 +217,42 @@ You almost never need to fork the suites. Knobs available out of the box:
 | Run only one suite                     | Trigger with the `suite:` choice input (`gh workflow run … -f suite=05_ask_latency`).                |
 | Pin to a stable upstream version       | Replace `@main` with `@v0.1.1` everywhere (both `uses:` and `cli-ref:`).                             |
 | Add a per-deploy smoke check           | Call the reusable workflow from your `deploy.yml` after the Cloud Run rollout finishes.              |
+| Test N agents in one monorepo          | See "matrix" snippet below, or keep one workflow-per-agent for clearer blame.                        |
+
+### Matrix over agents (monorepo)
+
+If `expert.toml` at the repo root declares several agents, the CLI already
+understands `expert test --agent <name>`. You can call the reusable workflow
+once per agent via a matrix:
+
+```yaml
+jobs:
+  e2e:
+    strategy:
+      fail-fast: false
+      matrix:
+        agent:
+          - { name: my-expert, schema: my-expert/agent_schema.yaml, endpoint_secret: MY_EXPERT_ENDPOINT, key_secret: MY_EXPERT_API_KEY }
+          - { name: derm,      schema: derm-expert/agent_schema.yaml, endpoint_secret: DERM_ENDPOINT,    key_secret: DERM_API_KEY }
+    uses: feliperbroering/expert-agent/.github/workflows/expert-e2e.yml@<<ref>>
+    with:
+      schema: ${{ matrix.agent.schema }}
+      sample-question: "ping"
+      cli-ref: <<ref>>
+    secrets:
+      endpoint: ${{ secrets[matrix.agent.endpoint_secret] }}
+      api-key:  ${{ secrets[matrix.agent.key_secret] }}
+```
+
+Locally, the same layout lets you do:
+
+```bash
+expert agents                   # list all known agents
+expert use my-expert            # pin my-expert for this shell
+expert ask "..."                # routes to my-expert
+expert @derm ask "..."          # one-off hop to derm
+expert test --agent derm        # run the packaged E2E kit against derm
+```
 
 If you genuinely need a *new* assertion the upstream suites don't cover,
 contribute it back to `expert-agent` rather than vendoring locally — the kit
diff --git a/docs/PRIVATE_AGENT_REPO.md b/docs/PRIVATE_AGENT_REPO.md
new file mode 100644
index 0000000..364f062
--- /dev/null
+++ b/docs/PRIVATE_AGENT_REPO.md
@@ -0,0 +1,365 @@
+# Private agent repo guide
+
+This guide shows the cleanest way to create a **private repo for your own specialist agents** while reusing the open-source `expert-agent` framework.
+
+Use it when you want:
+
+- private prompts and docs
+- your own deploy cadence
+- one repo with one agent, or one repo with many agents
+- the same `expert` CLI and Robot Framework E2E kit from the public repo
+
+---
+
+## Recommended repo shapes
+
+### Option A — one repo, one agent
+
+Best when each agent has its own owner, deploy cadence, and secrets.
+
+```text
+my-private-agent/
+├─ agent_schema.yaml
+├─ prompts/
+│  └─ identity.md
+├─ docs/
+│  ├─ paper-1.pdf
+│  └─ protocol.md
+├─ expert.toml                 # optional in single-agent repos
+└─ .github/workflows/
+   └─ e2e.yml
+```
+
+This gives you the nicest UX:
+
+```bash
+expert validate
+expert count-tokens
+expert sync
+expert ask "..."
+```
+
+### Option B — one repo, many agents
+
+Best when the agents share docs, ownership, or infra.
+
+```text
+my-private-agents/
+├─ expert.toml
+├─ cardiology/
+│  ├─ agent_schema.yaml
+│  ├─ prompts/
+│  └─ docs/
+├─ dermatology/
+│  ├─ agent_schema.yaml
+│  ├─ prompts/
+│  └─ docs/
+└─ oncology/
+   ├─ agent_schema.yaml
+   ├─ prompts/
+   └─ docs/
+```
+
+Then use the workspace-aware CLI:
+
+```bash
+expert agents
+expert @cardiology validate
+expert @dermatology ask "..."
+expert use oncology
+expert which
+```
+
+---
+
+## Step 1 — install the CLI
+
+On your machine:
+
+```bash
+uv tool install "git+https://github.com/feliperbroering/expert-agent.git"
+expert --version
+```
+
+If you also want the packaged Robot Framework kit:
+
+```bash
+uv tool install "expert-agent[test] @ git+https://github.com/feliperbroering/expert-agent.git"
+```
+
+---
+
+## Step 2 — scaffold the repo
+
+### Single-agent
+
+```bash
+mkdir my-private-agent && cd my-private-agent
+expert init .
+```
+
+### Multi-agent
+
+```bash
+mkdir my-private-agents && cd my-private-agents
+expert init cardiology
+expert init dermatology
+expert init oncology
+```
+
+Then add `expert.toml`:
+
+```toml
+default_agent = "cardiology"
+
+[agents.cardiology]
+schema = "cardiology/agent_schema.yaml"
+endpoint_env = "CARDIOLOGY_AGENT_ENDPOINT"
+api_key_env = "CARDIOLOGY_AGENT_API_KEY"
+
+[agents.dermatology]
+schema = "dermatology/agent_schema.yaml"
+endpoint_env = "DERM_AGENT_ENDPOINT"
+api_key_env = "DERM_AGENT_API_KEY"
+
+[agents.oncology]
+schema = "oncology/agent_schema.yaml"
+endpoint_env = "ONCO_AGENT_ENDPOINT"
+api_key_env = "ONCO_AGENT_API_KEY"
+```
+
+`expert.toml` is optional but recommended in private multi-agent repos because it:
+
+- makes endpoints and secret env vars explicit
+- avoids ambiguity when names overlap
+- gives you a default agent
+
+---
+
+## Step 3 — add your private knowledge base
+
+For each agent:
+
+1. Edit `prompts/identity.md`
+2. Replace the placeholder file in `docs/`
+3. Keep sensitive source material **out of git** unless your repo policy allows it
+
+Recommended patterns:
+
+- Commit curated Markdown summaries and public PDFs
+- Keep raw source dumps, exports, and OCR artifacts in a private storage bucket
+- Add `_drafts/` to the schema's `exclude_patterns`
+
+Validate locally:
+
+```bash
+expert validate
+expert count-tokens
+```
+
+Or, in a multi-agent repo:
+
+```bash
+expert @cardiology validate
+expert @cardiology count-tokens
+```
+
+---
+
+## Step 4 — deploy
+
+The easiest mental model is:
+
+- `infra/platform` = once per GCP project
+- `infra/chroma` = once per GCP project
+- `infra/agent` = once per agent
+
+If your private repo only contains the agent specs, you still have two clean options:
+
+### Option A — central infra repo
+
+Keep OpenTofu in a separate infra repo and point it at the backend image + agent IDs. This is the cleanest setup for teams.
+
+### Option B — vendor/copy the `infra/` folder
+
+Copy `infra/` into your private repo and own it there. This is simpler if you're a solo maintainer and want one repo to rule everything.
+
+If you're bootstrapping from scratch, start with the public repo's `infra/` folder and [`infra/README.md`](../infra/README.md).
+
+---
+
+## Step 5 — wire local defaults
+
+After deploy, export endpoint + API key:
+
+```bash
+export EXPERT_AGENT_ENDPOINT="https://my-agent-xxxx.a.run.app"
+export EXPERT_AGENT_API_KEY="$(gcloud secrets versions access latest --secret=admin-key-my-agent)"
+```
+
+Now the bare commands work:
+
+```bash
+expert sync
+expert ask "..."
+```
+
+For multi-agent repos, prefer per-agent env vars referenced by `expert.toml`:
+
+```bash
+export CARDIOLOGY_AGENT_ENDPOINT="https://cardiology-xxxx.a.run.app"
+export CARDIOLOGY_AGENT_API_KEY="..."
+
+export DERM_AGENT_ENDPOINT="https://derm-xxxx.a.run.app"
+export DERM_AGENT_API_KEY="..."
+```
+
+Then:
+
+```bash
+expert @cardiology ask "..."
+expert @dermatology sync
+```
+
+---
+
+## Step 6 — CI with the reusable E2E workflow
+
+Create `.github/workflows/e2e.yml` in your private repo.
+
+### Single-agent repo
+
+```yaml
+name: expert-e2e
+
+on:
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+  e2e:
+    uses: feliperbroering/expert-agent/.github/workflows/expert-e2e.yml@main
+    with:
+      schema: agent_schema.yaml
+    secrets:
+      endpoint: ${{ secrets.EXPERT_AGENT_ENDPOINT }}
+      api-key: ${{ secrets.EXPERT_AGENT_API_KEY }}
+```
+
+### Multi-agent repo
+
+```yaml
+name: expert-e2e
+
+on:
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+  e2e:
+    strategy:
+      fail-fast: false
+      matrix:
+        agent:
+          - name: cardiology
+            schema: cardiology/agent_schema.yaml
+            endpoint_secret: CARDIOLOGY_AGENT_ENDPOINT
+            api_key_secret: CARDIOLOGY_AGENT_API_KEY
+          - name: dermatology
+            schema: dermatology/agent_schema.yaml
+            endpoint_secret: DERM_AGENT_ENDPOINT
+            api_key_secret: DERM_AGENT_API_KEY
+    uses: feliperbroering/expert-agent/.github/workflows/expert-e2e.yml@main
+    with:
+      agent: ${{ matrix.agent.name }}
+      schema: ${{ matrix.agent.schema }}
+    secrets:
+      endpoint: ${{ secrets[matrix.agent.endpoint_secret] }}
+      api-key: ${{ secrets[matrix.agent.api_key_secret] }}
+```
+
+More detail: [`docs/AGENT_E2E_SETUP.md`](./AGENT_E2E_SETUP.md).
+
+---
+
+## Suggested repo extras
+
+If you're making the private repo pleasant for future-you or for teammates, add:
+
+- `README.md` with the repo's purpose + the list of hosted agents
+- `expert.toml` even in single-agent repos if you want explicit endpoint wiring
+- `.gitignore` covering PDFs, exports, `.env`, and generated reports
+- `docs/OPERATIONS.md` with deploy / rotate-key / rollback steps
+- `.github/CODEOWNERS` if multiple specialists own different agents
+
+Nice next step:
+
+- add a tiny `Makefile` or `justfile` with `validate`, `sync`, `ask`, `e2e`
+
+---
+
+## Suggested `.gitignore`
+
+```gitignore
+.env
+.venv/
+.expert/
+report.html
+log.html
+output.xml
+*.tfstate
+*.tfstate.*
+*.tfplan
+docs/_raw/
+docs/_exports/
+```
+
+---
+
+## Common workflows
+
+### Single-agent daily loop
+
+```bash
+expert validate
+expert count-tokens
+expert sync
+expert ask "what changed in the 2025 guideline?"
+```
+
+### Multi-agent daily loop
+
+```bash
+expert agents
+expert @cardiology validate
+expert @cardiology sync
+expert @cardiology ask "..."
+expert @dermatology ask "..."
+```
+
+### Pin one agent for the day
+
+```bash
+expert use cardiology
+expert ask "..."            # targets cardiology
+expert which
+expert use --clear
+```
+
+---
+
+## Decision guide
+
+Choose **one repo per agent** when:
+
+- each agent has its own deploy cadence
+- prompts/docs are highly sensitive
+- different teams own different agents
+
+Choose **one repo with many agents** when:
+
+- the same team curates all agents
+- the agents share domain docs or infra
+- you want one CI surface and one CLI workspace
+
+If you're unsure, start with **one repo per agent**. You can always merge into a multi-agent workspace later with `expert.toml`.
diff --git a/pyproject.toml b/pyproject.toml
index f90db24..fad7dc9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,13 +4,13 @@ version = "0.1.1"
 description = "Ultra-specialist AI agents as a service — NotebookLM-style, powered by Gemini long-context + Context Caching."
 readme = "README.md"
 requires-python = ">=3.12"
-license = { text = "Apache-2.0" }
+license = { text = "MIT" }
 authors = [{ name = "Felipe Broering", email = "hi@felipe.run" }]
 keywords = ["ai", "agents", "gemini", "llm", "notebooklm", "cloud-run", "fastapi"]
 classifiers = [
     "Development Status :: 3 - Alpha",
     "Intended Audience :: Developers",
-    "License :: OSI Approved :: Apache Software License",
+    "License :: OSI Approved :: MIT License",
     "Programming Language :: Python :: 3 :: Only",
     "Programming Language :: Python :: 3.12",
 ]
@@ -70,7 +70,7 @@ test = [
 ]
 
 [project.scripts]
-expert = "expert.main:app"
+expert = "expert.main:main"
 expert-agent-backend = "app.main:run"
 
 [project.urls]
diff --git a/uv.lock b/uv.lock
index 1f039f9..5b248d7 100644
--- a/uv.lock
+++ b/uv.lock
@@ -544,7 +544,7 @@ wheels = [
 
 [[package]]
 name = "expert-agent"
-version = "0.1.0"
+version = "0.1.1"
 source = { editable = "." }
 dependencies = [
     { name = "bcrypt" },