diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 59afe96..b7684a9 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "orq", - "version": "0.0.2", + "version": "0.1.0", "description": "Agent skills for building, deploying, evaluating, and monitoring LLM pipelines on the orq.ai platform.", "author": { "name": "orq.ai", diff --git a/.codex-plugin/plugin.json b/.codex-plugin/plugin.json index 519d7c3..38a41b4 100644 --- a/.codex-plugin/plugin.json +++ b/.codex-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "orq", - "version": "0.0.2", + "version": "0.1.0", "description": "Agent skills for building, deploying, evaluating, and monitoring LLM pipelines on the orq.ai platform.", "author": { "name": "orq.ai", diff --git a/.cursor-plugin/plugin.json b/.cursor-plugin/plugin.json index 3a175c1..99c7f64 100644 --- a/.cursor-plugin/plugin.json +++ b/.cursor-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "orq", "displayName": "orq.ai", - "version": "0.0.2", + "version": "0.1.0", "description": "Agent skills for building, deploying, evaluating, and monitoring LLM pipelines on the orq.ai platform.", "author": { "name": "orq.ai", diff --git a/CHANGELOG.md b/CHANGELOG.md index de563f9..d0932e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.1.0] - 2026-05-14 + +### Added +- `manage-skills` skill — CRUD workflow for the orq.ai Skills entity (formerly Prompt Snippets), backed by `/v2/skills`. Covers list, get, create, update, soft-disable (`enabled: false`), and delete via the `*_skill` MCP tools. Includes authoring guidance (`display_name`, `description`, `tags`, `project_id`, `path`, `enabled`) and disambiguates the platform Skill entity from this repo's code-assistant Orq Skills and from the unrelated A2A `AgentCard.skills` array. +- `manage-skills`: documents the `{{snippet.}}` template placeholder as the only mechanism for consuming Skills inside prompts and agent instructions (the `snippet.` prefix is a backwards-compat holdover from the rename — there is no `{{skill.<...>}}` syntax). +- `manage-skills`: reference-scan-before-delete workflow — paginates `search_entities`, fetches each candidate's body with `get_deployment` / `get_agent` / `get_skill`, and substring-matches `{{snippet.}}` to surface consumers before any destructive operation. Defaults to `enabled: false` (soft disable) when references are found. +- `manage-skills`: rename-breaks-references warning on `display_name` updates — runs the same reference scan before any rename and offers to fan out updates in the same session. +- `manage-skills`: documents `GET /v2/skills` cursor pagination (`limit` / `starting_after` / `ending_before`) and the lack of server-side filters; pushes `project_id` / `tags` / `display_name` filtering to the client. +- `manage-skills`: anti-pattern guidance against `+NEVER+` / "you MUST refuse" prose constraints in `instructions` — recommends MCP tool gates for hard guardrails. +- `manage-skills`: error-handling guidance for `create_skill` `AlreadyExists` (offers either a renamed create or `update_skill` against the existing Skill). +- `/manage-skills` slash command — routes to list / get / create / update / disable / delete phases. + ## [0.0.2] - 2026-04-21 ### Added diff --git a/README.md b/README.md index d6e4b17..d2b4ab6 100644 --- a/README.md +++ b/README.md @@ -171,6 +171,7 @@ Skills are triggered by describing what you need. Claude picks the right skill a | **compare-agents** | Run cross-framework agent comparisons using evaluatorq — compare orq.ai, LangGraph, CrewAI, OpenAI Agents SDK, and others | [SKILL.md](skills/compare-agents/SKILL.md) | | **generate-synthetic-dataset** | Generate and curate evaluation datasets — structured generation, quick from description, expansion, and dataset maintenance | [SKILL.md](skills/generate-synthetic-dataset/SKILL.md) | | **optimize-prompt** | Analyze and optimize system prompts using a structured prompting guidelines framework | [SKILL.md](skills/optimize-prompt/SKILL.md) | +| **manage-skills** | Manage orq.ai Skills (the platform entity) — list/get/create/update/delete, authoring guidance, governance (`agent.skills[]` wiring), and platform-caveat workarounds | [SKILL.md](skills/manage-skills/SKILL.md) | --- diff --git a/agents/AGENTS.md b/agents/AGENTS.md index e31b6da..fc59c64 100644 --- a/agents/AGENTS.md +++ b/agents/AGENTS.md @@ -14,6 +14,7 @@ These skills are: - compare-agents -> "skills/compare-agents/SKILL.md" - generate-synthetic-dataset -> "skills/generate-synthetic-dataset/SKILL.md" - invoke-deployment -> "skills/invoke-deployment/SKILL.md" + - manage-skills -> "skills/manage-skills/SKILL.md" - optimize-prompt -> "skills/optimize-prompt/SKILL.md" - run-experiment -> "skills/run-experiment/SKILL.md" - setup-observability -> "skills/setup-observability/SKILL.md" @@ -40,6 +41,8 @@ compare-agents: `Run cross-framework agent comparisons using evaluatorq — comp setup-observability: `Set up orq.ai observability for LLM applications — AI Router proxy, OpenTelemetry, tracing setup, and trace enrichment. Use when setting up tracing, adding the AI Router proxy, integrating OpenTelemetry, auditing existing instrumentation, or enriching traces with metadata. Do NOT use when traces already exist and you need to debug failures (use analyze-trace-failures).` +manage-skills: `Manage orq.ai Skills (the platform entity, formerly Snippets — distinct from this repo's code-assistant skills) end-to-end: list, get, create, update, enable/disable, and delete Skills via the /v2/skills API. Covers authoring guidance (display_name, description, tags, project_id, path, enabled), how Skills get consumed via the {{snippet.}} placeholder in prompts and agent instructions, the reference-scan-before-delete workflow, the rename-breaks-references warning, and the +NEVER+ prose anti-pattern. Use when the user wants to create, audit, edit, soft-disable, or retire orq.ai Skills.` + Paths referenced within SKILL folders are relative to that SKILL. For example the build-evaluator `resources/judge-prompt-template.md` would be referenced as `skills/build-evaluator/resources/judge-prompt-template.md`. diff --git a/commands/manage-skills.md b/commands/manage-skills.md new file mode 100644 index 0000000..277dc2a --- /dev/null +++ b/commands/manage-skills.md @@ -0,0 +1,45 @@ +--- +name: manage-skills +description: Manage orq.ai Skills — list, get, create, update, disable, or delete Skills (the platform entity, formerly Snippets) and find the prompts/agents that reference them +argument-hint: [list|get|create|update|disable|delete] [name-or-id] +allowed-tools: AskUserQuestion, mcp__orq-workspace__list_skills, mcp__orq-workspace__get_skill, mcp__orq-workspace__create_skill, mcp__orq-workspace__update_skill, mcp__orq-workspace__delete_skill, mcp__orq-workspace__search_entities, mcp__orq-workspace__get_deployment, mcp__orq-workspace__get_agent +--- + +# Manage Skills + +Quick entry point into the `manage-skills` skill. Routes to the right phase based on the first argument, or asks if no argument is given. + +## Instructions + +### 1. Parse arguments + +`$ARGUMENTS` may contain an action and optionally a Skill `display_name` or `skill_id`: + +- `list` — Phase 1 (list / audit) +- `get ` — Phase 2 (inspect a Skill) +- `create` — Phase 3 (create a new Skill) +- `update ` — Phase 4 (edit, including `enabled` / `display_name` / `instructions`) +- `disable ` — Phase 4 shortcut: flip `enabled: false` (soft-retire) +- `delete ` — Phase 5 (reference scan + delete) + +If `$ARGUMENTS` is empty, ask the user which action they want via `AskUserQuestion` and offer the six choices above. + +If `$ARGUMENTS` contains an action that requires a name/id but none was provided (e.g., `get`, `update`, `disable`, `delete`), call `list_skills` first and ask the user to pick. + +### 2. Delegate to `manage-skills` + +Read `skills/manage-skills/SKILL.md` and execute the matching phase. Pass the parsed name/id along. + +### 3. Safety rails + +- **Never** auto-execute `delete_skill` from this command — always route through Phase 5's reference-scan + warn-then-confirm flow. +- **Always** offer `enabled: false` (soft disable) as the default first step when the reference scan finds consumers. +- **Always** confirm project scope before `create_skill`. +- **Always** warn before sending a `display_name` rename — it silently breaks every `{{snippet.}}` reference. + +### 4. Error handling + +- **Auth errors** — "Authentication failed. Check that your `ORQ_API_KEY` is valid." +- **`AlreadyExists` on create** — surface the conflicting Skill (paginate `list_skills`, find by `display_name`) and offer either a renamed create or `update_skill` against the existing one. +- **Skill-tool unavailable** — "The orq MCP server doesn't expose `*_skill` tools in this workspace. Falling back to REST `/v2/skills` — confirm before proceeding." +- **MCP unreachable** — "Could not reach the orq.ai MCP server. Make sure it's configured: `claude mcp add --transport http orq-workspace https://my.orq.ai/v2/mcp --header 'Authorization: Bearer ${ORQ_API_KEY}'`" diff --git a/skills/manage-skills/SKILL.md b/skills/manage-skills/SKILL.md new file mode 100644 index 0000000..3d4442a --- /dev/null +++ b/skills/manage-skills/SKILL.md @@ -0,0 +1,245 @@ +--- +name: manage-skills +description: > + Manage orq.ai Skills (the platform entity, formerly called Snippets) end-to-end — + list, get, create, update, enable/disable, and delete Skills, plus authoring + guidance (display name, description, tags, project scoping, path placement), + and how Skills get consumed (the `{{snippet.}}` template placeholder + inside prompts and agent instructions). Use when the user wants to create, + audit, edit, retire, or hook up orq.ai Skills. +allowed-tools: Bash, Read, Write, Edit, Grep, Glob, WebFetch, Task, AskUserQuestion, mcp__orq-workspace__list_skills, mcp__orq-workspace__get_skill, mcp__orq-workspace__create_skill, mcp__orq-workspace__update_skill, mcp__orq-workspace__delete_skill, mcp__orq-workspace__search_entities, mcp__orq-workspace__get_deployment, mcp__orq-workspace__get_agent +--- + +# Manage Skills + +You are an **orq.ai Skills lifecycle specialist**. Your job is the full CRUD workflow for the **Skills entity on the orq.ai platform** — historically called *Prompt Snippets* and renamed to *Skills* in the platform-api / Studio. Skills are modular, reusable instruction blocks intended to be inlined into prompts and agent instructions via the `{{snippet.}}` template placeholder. (The placeholder kept the legacy `snippet.` prefix for backwards compatibility.) + +## Production-readiness notice (verify before relying on this skill) + +The Skills entity (`/v2/skills` REST + `*_skill` MCP tools) is delivered on a backend feature branch and may not be available in every workspace yet. **Run the preflight check** in [Prerequisites](#prerequisites) before any phase. If the MCP `*_skill` tools or the REST endpoints are missing, fall back to managing Prompt Snippets via the legacy `/v2/prompts/snippets` endpoints — the *entity is the same*, just under the older name. + +Two known wiring gaps to surface to the user when relevant: + +1. **Snippet→Skill migration is one-way and asynchronous.** Existing Prompt Snippets are migrated to Skills via a backend cronjob; Skills created via the new API are *not* back-propagated to the snippet representation. +2. **Renderer wiring may lag.** The `{{snippet.}}` template resolver reads from a Redis cache that has historically been populated by the legacy snippet handlers. Whether Skills created through the new API land in that cache depends on whether the entity-event subscriber that bridges them is live in the user's workspace. **Always verify in a test prompt that a newly created Skill actually renders before promoting it to production.** + +## Disambiguation: which "Skill" are we talking about? + +This skill manages the **platform Skill entity on orq.ai** (`/v2/skills`, surfaced as Skills in the Studio, formerly Prompt Snippets). It is *not*: + +- **Orq Skills (this repo):** code-assistant skills like `manage-skills` itself, distributed via the `assistant-plugins` marketplace and documented at . Those live in `skills//SKILL.md` files in this repo. +- **Anthropic / Agent Skills standard:** the cross-vendor SKILL.md format (same shape as the repo skills above; unrelated to the platform entity). +- **The A2A `AgentCard.skills` array on agents:** that field is AI-generated capability metadata, not a list of platform-Skill references. Deleting a platform Skill does **not** orphan anything in `AgentCard.skills`. + +When the user says "create a Skill" without context, ask which one they mean. The rest of this document is exclusively about the platform entity. + +## When to use + +- "List the Skills in my workspace" / "audit my Skills" +- "Create a Skill called X" / "make a snippet for Y" +- "Update / rename / re-tag this Skill" +- "Disable this Skill" (soft retire) or "delete this Skill" +- "How do I reference a Skill from a prompt or agent instruction?" +- "I deleted a Skill — what breaks?" + +## When NOT to use + +- **Build the agent itself?** → `build-agent` +- **Invoke a deployment or agent?** → `invoke-deployment` +- **Evaluate an agent that uses the Skill?** → `run-experiment` +- **Improve the prose inside `instructions`?** → `optimize-prompt` is tuned for system prompts; reuse its clarity heuristics but apply judgment — Skill `instructions` are typically shorter and more capability-scoped. +- **Debug why a referenced Skill isn't rendering?** → `analyze-trace-failures` + +## Companion Skills + +- `build-agent` — author the agents whose instructions reference these Skills via `{{snippet.}}` +- `optimize-prompt` — review prose quality for `instructions` +- `run-experiment` — verify a Skill change improves downstream behavior +- `analyze-trace-failures` — diagnose Skills that aren't producing the expected output in production + +## Constraints + +- **ALWAYS** confirm the project scope (`project_id` set vs. workspace-wide) before `create_skill`. Default to project-scoped unless the user is explicit. +- **ALWAYS** read the current Skill with `get_skill` before `update_skill` — never blind-overwrite tags, description, or instructions. +- **ALWAYS** before `delete_skill`, find places that may reference the Skill via `{{snippet.}}` (other Skills' `instructions`, deployment prompts, agent instructions) and warn the user — those references will silently render to empty/missing content after the Skill is gone. +- **ALWAYS** offer `enabled: false` (soft disable) as an alternative to `delete_skill`. A disabled Skill is still resolvable and is a safer first step when you're not sure who depends on it. +- **NEVER** rely on `+NEVER+` (or any prose negation) inside `instructions` as a hard guardrail. Skill instructions are *soft* hints to the model; hard constraints belong in **MCP tool gates** (refuse the call at the tool layer). See [resources/known-caveats.md](resources/known-caveats.md). + +## orq.ai Documentation + +> **Snippets (the entity, now also called Skills) overview:** +> **Using snippets in agent instructions:** (see the Snippets section) +> **Code-assistant Orq Skills (disambiguation):** + +### orq MCP Tools + +| Tool | Purpose | +|------|---------| +| `list_skills` | List Skills in the workspace; cursor-paginated, **no server-side filters beyond pagination** — see Pagination & Filtering below | +| `get_skill` | Fetch a single Skill by `skill_id` (returns full Skill object) | +| `create_skill` | Create a new Skill (`display_name`, `description`, `tags`, `path`, `project_id`, `instructions`, `enabled`). Returns `AlreadyExists` if the `display_name` is taken in the workspace — handle that error rather than pre-checking. | +| `update_skill` | Patch an existing Skill by `skill_id` (any of: `display_name`, `description`, `tags`, `path`, `instructions`, `enabled`). PATCH semantics — only sent fields change. | +| `delete_skill` | Permanently delete a Skill by `skill_id`. Does not scrub references in prompts/agent instructions — see Phase 5. | +| `search_entities` | Used to find deployments/agents that may inline the Skill via `{{snippet.}}`; combine with `get_deployment` / `get_agent` for the actual reference scan. | + +> **Tool discovery:** Before the first run, list the connected MCP server's tools (`/mcp` in Claude Code, or inspect via the client) and confirm the `*_skill` tools above exist. Tool names sometimes vary by workspace or MCP server version. +> +> **REST fallback:** All five tools are backed by `/v2/skills` REST endpoints — `GET /v2/skills` (list, cursor-paginated), `GET /v2/skills/{skill_id}`, `POST /v2/skills`, `PATCH /v2/skills/{skill_id}`, `DELETE /v2/skills/{skill_id}`. Use these directly with `Authorization: Bearer ${ORQ_API_KEY}` if the MCP tools aren't exposed. + +### Pagination & Filtering + +`GET /v2/skills` (and the `list_skills` MCP tool) accepts **only** cursor-pagination parameters: `limit` (default 10, max 200), `starting_after`, `ending_before`. **There is no server-side filter for `project_id`, `tags`, `display_name`, or free text.** Filter by those facets **client-side** after pagination, or use `search_entities` if it indexes Skills. + +**Pagination loop (pseudocode):** + +```text +cursor = None +all_skills = [] +while True: + page = list_skills(limit=200, starting_after=cursor) + all_skills.extend(page.data) + if not page.has_more: + break + cursor = page.data[-1].id # the response uses "id", not "skill_id" (it's the same value) +``` + +After collecting all Skills, filter in memory: + +```text +project_skills = [s for s in all_skills if s.project_id == target_project_id] +tagged_skills = [s for s in all_skills if "policy" in s.tags] +``` + +### Field reference + +| Field | Direction | Notes | +|------|------|------| +| `display_name` | create / update / read | Human-facing label and the **lookup key** used by `{{snippet.}}`. Regex: `^[A-Za-z0-9]+(?:[_-][A-Za-z0-9]+)*$`, max 255 chars. Must be unique within the workspace; `create_skill` returns `AlreadyExists` on conflict. | +| `description` | create / update / read | Short explanation of what the Skill does. Surfaces in the Studio's Skill picker. | +| `tags` | create / update / read | Array of strings. Filtering is client-side (see above). | +| `path` | create / update / read | Finder-style location, e.g. `Default/Skills` or `cs/policies`. Defaults to project's default skill folder. | +| `project_id` | create / update / read | Optional — omit for workspace-wide. | +| `instructions` | create / update / read | The actual Skill body — modular markdown that gets inlined wherever the Skill is referenced. | +| `enabled` | create / update / read | Boolean (default `true`). Whether `{{snippet.}}` references for a disabled Skill render to empty/pass-through depends on workspace renderer wiring (verified at the time of writing: the resolver reads from the legacy snippet KV cache, which has no notion of `enabled`; behavior may change). Treat `enabled: false` as a soft-disable signal in the API and audit log; verify the actual render effect before relying on it. | +| `skill_id` | read / update / delete | Server-generated id. **The list/get response surfaces it as `id`** but the update/delete inputs take it as `skill_id`. Same value. | +| `workspace_id` | read only | Audit. | +| `created_at`, `updated_at`, `created_by_id`, `updated_by_id` | read only | Audit metadata. | + +> **Note on versioning:** The Skill object does **not** carry a `version` field. The platform records a semantic-version *activity log entry* on each create/update (visible in the Skill's history in the Studio), but you cannot read or set a version on the Skill itself. Don't ask the user "is this a major/minor/patch change?" — there's no field to write it to. + +> **`{{snippet.}}` template placeholder:** The primary way Skills get consumed is by referencing them inside any prompt template or agent instruction with `{{snippet.}}`. At render time the placeholder is replaced with the Skill's `instructions`. The `snippet.` prefix is a backwards-compatibility holdover from when the entity was called Prompt Snippets — there is no `{{skill.<...>}}` equivalent. Keep this in mind when authoring user-facing copy. + +## Resources + +- **Authoring guide** (display name, description, tags, project scoping, path): See [resources/authoring-guide.md](resources/authoring-guide.md) +- **Governance** (consumption patterns, ownership, lifecycle): See [resources/governance-guide.md](resources/governance-guide.md) +- **Known caveats** (template-reference scrubbing on delete, prose-negation anti-pattern): See [resources/known-caveats.md](resources/known-caveats.md) + +## Prerequisites + +- The orq.ai MCP server is connected (run the `quickstart` skill / `/orq:quickstart` to verify in Claude Code, or the equivalent onboarding flow in your assistant). +- `ORQ_API_KEY` is set. +- The user knows which **project** the Skill belongs to (run `search_directories` if not). +- **Preflight: confirm the Skills API is available.** Try `list_skills` once at session start. If the tool is unknown OR returns "method not found" against `/v2/skills`, the workspace's backend doesn't expose the new entity yet. Two options: + 1. Tell the user and fall back to managing the entity under its legacy name (Prompt Snippet via `/v2/prompts/snippets`). + 2. Ask the user whether to proceed anyway against any partial endpoints they have. + +--- + +## Workflow + +Pick the phase that matches the user's intent. Most sessions are a single phase; the **delete** phase always pairs with a reference scan. + +### Phase 1: List / audit + +Use when the user wants visibility into existing Skills. + +1. Call `list_skills` and **paginate to completion** (see Pagination & Filtering above). Default `limit=200` to minimize round-trips. +2. **Apply user filters client-side** — `list_skills` does not accept `project_id` / `tags` / `q` / `display_name` filters. Examples: + - "Skills in the `cs` project" → filter `project_id == ` (resolve project key → id via `search_directories` first if needed). + - "Skills tagged `policy`" → filter `"policy" in s.tags`. + - "Skills whose name contains `refund`" → substring match on `display_name`. +3. Present a scannable table: + ``` + Skills (12) + - customer-support-tone (cs, [tone, voice], path: cs/style) — enabled + - extract-receipt-fields (finance, [extraction], path: Default/Skills) — enabled + - refund-policy (workspace-wide, [policy, cs], path: Default/Skills) — DISABLED + ... + ``` +4. For each Skill, surface: `display_name`, project (or "workspace-wide"), `tags`, `path`, `enabled` state. **Reference counts are expensive** — they require text-searching prompts/agent instructions for `{{snippet.}}`. Compute them lazily on user request, not for every row. (See Phase 5 for the reference-scan pattern.) + +### Phase 2: Get / inspect + +Use before any update or delete, and whenever the user asks "what does Skill X do?" + +1. Call `get_skill(skill_id=...)`. +2. Display: `display_name`, `description`, `tags`, `project_id` (or "workspace-wide"), `path`, `enabled`, `instructions` (truncated). Mention how it's likely consumed: `{{snippet.}}` inside prompts or agent instructions. +3. If the user asks "where is this used?", run a reference scan (see Phase 5 step 1). + +### Phase 3: Create + +Use when the user wants a new Skill. + +1. **Gather inputs** via `AskUserQuestion`: + - **`display_name`** — short, descriptive, regex `^[A-Za-z0-9]+(?:[_-][A-Za-z0-9]+)*$`, ≤255 chars on the platform. This repo's recommended convention is kebab-case ≤50 chars (recommend rather than enforce). See [authoring-guide](resources/authoring-guide.md). + - **`description`** — one sentence describing *when to apply the Skill*. Used by humans (Studio picker); not a runtime trigger. + - **`tags`** — at least one functional tag; reuse existing tags where possible (paginate `list_skills` first). + - **`project_id`** — the target project's id, OR omit for workspace-wide. Default to **project-scoped**; confirm before going workspace-wide. If the user gives a project key, resolve it to an id via `search_directories`. + - **`path`** — finder location for the Skill, e.g. `Default/Skills` or `policies/refunds`. Default to the project's standard Skill folder. + - **`instructions`** — the actual content that will be inlined wherever the Skill is referenced. Keep it focused on one capability. + - **`enabled`** — defaults to `true` on create. Ask only if the user wants to seed a disabled Skill. +2. **Validate** before submitting: + - Description starts with "Use when…" or describes a trigger condition. + - `instructions` does NOT rely on `+NEVER+` / "always refuse" prose for hard guardrails — link the user to [known-caveats](resources/known-caveats.md) and recommend an MCP tool gate instead. +3. Call `create_skill` with the validated payload. + - **Error: `AlreadyExists`** — the `display_name` is already taken in the workspace. Show the conflicting Skill (paginate `list_skills`, find by `display_name`) and offer either a renamed create or an `update_skill` against the existing one. + - **Error: project / path validation failure** — the API will return a `CodeInvalidArgument`. Re-ask for `project_id` / `path` and retry. +4. Echo back the new Skill's `id`, `path`, and a one-line summary. Tell the user how to consume it: `{{snippet.}}` inside any prompt template or agent instruction. + +### Phase 4: Update + +Use when the user wants to edit an existing Skill. + +1. **Always `get_skill` first.** Show the current state and confirm the diff the user is about to apply. +2. **Patch fields explicitly.** Only send the fields being changed (`update_skill` is a patch — don't echo back unchanged tags or `instructions`). +3. **`display_name` rename — DANGER.** The `display_name` IS the lookup key for `{{snippet.}}`. Renaming it silently breaks every prompt or agent instruction that references the old name. Before sending a rename, run the reference scan from Phase 5 step 1 and warn the user. Offer to update the references in the same session. +4. **`enabled: false`** — flipping a Skill to disabled is the soft-retirement path. Existing references stop resolving (verify in your workspace what they render to — empty string, missing, or pass-through). Recommend this as the default first step when retiring; reserve `delete_skill` for actual cleanup. +5. **`instructions` changes:** if the user is rewriting the body, run a clarity pass first — reuse `optimize-prompt`'s heuristics (clarity, structure, no soft-constraint anti-patterns) but adapt; Skill `instructions` are typically shorter and capability-scoped, not full system prompts. +6. **Verify** by calling `get_skill` post-update and confirming the change landed. + +### Phase 5: Delete (with reference scan) + +Use when the user wants to permanently retire a Skill. **`delete_skill` is irreversible** and does not scrub `{{snippet.}}` references elsewhere — those references silently fail to resolve after delete. Always offer `enabled: false` first (Phase 4 step 4) and only proceed to delete when the user is sure. + +1. **Reference scan.** Find places that may reference the Skill by its `display_name`: + - Run `search_entities` to enumerate prompts, deployments, agents, and other Skills in the workspace. + - For each candidate, fetch its full body (`get_deployment` for deployments; `get_agent` for agents; `get_skill` for other Skills' `instructions`) and grep the body for `{{snippet.}}` (case-sensitive — match the Skill's exact `display_name`). + - Note: this scan can be expensive in large workspaces. Cache results within the session. + - If the user has a faster way to grep their workspace (e.g., a synced repo of prompts), prefer that. +2. **Warn and confirm.** Show the user: + - The Skill's `display_name`, `id`, project scope, and `enabled` state. + - The list of references found (or "no references found in scanned entities — but the scan only covers prompts/agents/Skills surfaced via `search_entities`; manual checks may be needed"). + - **The two-option choice:** *"(a) Soft-disable now (`enabled: false`) and revisit in N days, or (b) hard-delete and accept that any reference I missed will silently fail to render?"* Default to (a) when the scan found references; default to (b) only when the scan was comprehensive AND empty AND the user has confirmed. +3. **If the user picks delete:** call `delete_skill(skill_id=...)`. Confirm the API success. +4. **Report.** Summarize: Skill deleted (or disabled); references that the user should manually check or update; recommended follow-up if any. + +See [resources/known-caveats.md](resources/known-caveats.md) for the full caveat context. + +--- + +## Done When + +- The user's intent (list / get / create / update / disable / delete) is fully resolved. +- Any `delete_skill` was preceded by a reference scan AND an explicit choice to delete-rather-than-disable. +- `display_name` renames are gated behind a reference scan and the user understands the breakage risk. +- `instructions` changes were sanity-checked for clarity and for prose-negation anti-patterns before save. +- New or updated Skills have a non-empty `description`, at least one tag, an explicit project scope, and a sensible `path`. +- The user has a clear pointer to how the Skill is (or will be) consumed: `{{snippet.}}` inside prompts or agent instructions. + +## Open in orq.ai + +- **Skills index:** [my.orq.ai](https://my.orq.ai/) → Skills +- **Studio:** [my.orq.ai](https://my.orq.ai/) → Studio (Skills appear in the snippet/skill picker when authoring prompts and agents) + +When this skill conflicts with live API responses or docs.orq.ai, trust the API. diff --git a/skills/manage-skills/resources/authoring-guide.md b/skills/manage-skills/resources/authoring-guide.md new file mode 100644 index 0000000..27fa052 --- /dev/null +++ b/skills/manage-skills/resources/authoring-guide.md @@ -0,0 +1,129 @@ +# Authoring Guide: Display Name, Description, Tags, Project Scope, Path + +How to author an orq.ai Skill so it's discoverable, scoped correctly, and renders cleanly wherever it's referenced. + +--- + +## `display_name` (the lookup key) + +`display_name` is both the human-facing label AND the lookup key used by `{{snippet.}}` placeholders. Pick it carefully — renaming it after consumers exist silently breaks every reference. See [known-caveats.md](known-caveats.md). + +**Platform constraints (enforced):** +- Regex: `^[A-Za-z0-9]+(?:[_-][A-Za-z0-9]+)*$` (alphanumeric with optional single dash/underscore separators) +- Max 255 characters +- Must be unique within the workspace — `create_skill` returns `AlreadyExists` on conflict + +**This repo's recommended convention** (a stricter subset that keeps lists scannable and placeholders readable): +- **kebab-case**, lowercase, ASCII only — e.g., `extract-receipt-fields` +- **≤50 characters** — long names get truncated in Studio tables and bloat placeholders +- **Verb-noun preferred** — `summarize-ticket`, `classify-intent`, `extract-pii` +- **Avoid generic verbs alone** — `handle-thing`, `do-task`, `process` say nothing +- **No version suffixes** — `summarize-ticket-v2` is an anti-pattern; treat the Skill itself as the unit of change and rely on the activity log for history + +These are recommendations, not enforced by the API. Diverge if a stronger convention already exists in the workspace, but stay consistent. + +**Good (recommended convention):** +- `extract-invoice-line-items` → referenced as `{{snippet.extract-invoice-line-items}}` +- `redact-pii-from-transcript` +- `format-currency-eur` + +**Bad:** +- `helper` (too vague) +- `the-skill-that-handles-customer-support-emails-with-tone-checking` (too long; ugly in placeholders) +- `summarize-ticket-v2` (version belongs in the activity log) + +--- + +## `description` + +`description` is human-facing copy shown in the Studio's Skill picker and audit views. **It is not a runtime trigger** — Skills are inlined wherever a `{{snippet.}}` placeholder exists in a prompt/agent instruction; the model doesn't pick them based on description. + +**Rules:** +- **One sentence.** Keep it scannable. +- **Lead with what the Skill does**, not how. Implementation detail belongs in `instructions`. +- **Mention the intended consumer** if it's not obvious from the name — e.g., "Reusable PII redaction block for customer-support agents." +- **Avoid "always" / "never" / "must"** — those are constraints, not descriptions. Hard rules belong in tool gates, not in description text. + +**Good:** +> Reusable receipt-extraction snippet — extracts merchant, total, tax, and line items into structured JSON. Inline in any prompt that processes receipt images or PDFs. + +**Bad:** +> This skill is a powerful tool that helps you handle receipts in many different formats using OCR. +> *(no concrete output, marketing voice, implementation leak)* + +--- + +## `tags` + +Tags group Skills in the Studio and let callers narrow `list_skills` output **client-side** (`GET /v2/skills` does not accept a `tags` filter — paginate, then filter in memory). Good tagging makes a workspace navigable; bad tagging makes Skills invisible. + +**Rules:** +- **At least one tag.** Untagged Skills are easy to lose in long lists. +- **Reuse existing tags.** Paginate `list_skills` and see which tags are already in use before inventing a new one. Tag sprawl is the silent killer of Skill discoverability. +- **Two axes of tagging are usually enough:** + - **Functional** — what the Skill *does*: `extraction`, `summarization`, `classification`, `formatting`, `tone`, `policy` + - **Domain** — where it applies: `finance`, `cs` (customer support), `legal`, `internal` +- **Avoid consumer-specific tags.** A tag like `used-by-checkout-agent` becomes wrong the moment a second consumer adopts the Skill — use the reference scan in [governance-guide.md](governance-guide.md#finding-the-consumers-of-a-skill) to find consumers on demand. +- **Lowercase, kebab-case** for consistency. + +**Recommended tag count:** 1–4 tags per Skill. More than 5 tags usually means the Skill is doing too many things. + +--- + +## `project_id` (project scoping) + +Every Skill is either **project-scoped** (`project_id` set to a project's id) or **workspace-wide** (`project_id` omitted). Workspace-wide Skills are visible to every consumer across the workspace. + +**Default to project-scoped.** Workspace-wide Skills are shared infrastructure — every workspace member can see them, every prompt can reference them, and a bad edit affects everyone. + +**When project-scoped is right:** +- The Skill encodes project-specific business logic (e.g., a refund policy that only applies to the EU project) +- The Skill is still being iterated on and shouldn't be discoverable across teams yet +- Different projects need different versions of the same idea (e.g., `extract-receipt-fields` per region) + +**When workspace-wide is right:** +- The Skill is genuinely reusable across teams and projects (e.g., `redact-pii`, `format-currency`) +- The Skill has stabilized — no recent breaking changes, used by ≥2 consumers +- Ownership is clear (named owner in the description or `owner:` tag) + +**How to choose:** + +1. Start project-scoped (set `project_id`). +2. After the Skill has been stable for ≥2 weeks and used by ≥2 consumers in the same project, ask: "would another project benefit from this?" +3. If yes, **create a copy** with `project_id` omitted (workspace-wide). Don't move — existing references still point at the project-scoped `display_name`. Sunset the original after consumers are re-pointed. + +> **Resolving project keys → ids:** if the user gives you a project key/name, run `search_directories` to convert it to the `project_id` value the API expects. + +--- + +## `path` + +`path` is the finder-style location of the Skill inside its project (e.g., `Default/Skills`, `cs/policies`, `finance/extraction`). It controls where the Skill appears in the Studio's folder tree. + +**Rules:** +- **Default to the project's standard Skill folder** (often `Default/Skills`) unless the team has an explicit folder convention. +- **Mirror existing folders.** Paginate `list_skills` and reuse paths already in the target project — divergent paths fragment the Studio. +- **Use slashes, not backslashes**, and keep segment names short and descriptive. +- **Group by purpose, not by owner.** Folder-by-team becomes wrong the moment a Skill moves teams; folder-by-purpose ages better. + +--- + +## `enabled` + +`enabled` is a boolean that defaults to `true` on create. When `false`, the Skill is preserved in the workspace but `{{snippet.}}` references stop resolving (verify the exact render behavior in your workspace — empty, pass-through, or skip). + +**When to seed `enabled: false`:** +- You're staging the Skill for review before any consumer points at it. +- You're setting up parallel versions for a controlled cutover. + +In practice you almost always create with `enabled: true` (the default) and use `enabled` later as the soft-retirement lever (see [governance-guide.md](governance-guide.md#retire)). + +--- + +## `instructions` (the Skill body) + +`instructions` is the actual content that gets inlined wherever the Skill is referenced. Keep it: +- **Focused on one capability.** If you find yourself writing "and also…", split into two Skills. +- **Specific.** Include 1–2 input/output examples. +- **Free of hard constraints expressed as prose.** Don't write "NEVER do X" or "you MUST refuse Y" — those are soft hints, not enforcement. See [known-caveats.md](known-caveats.md#anti-pattern-never-prose-constraints-in-instructions). +- **Sanity-checked before save.** Reuse `optimize-prompt`'s clarity heuristics, but apply judgment — Skill `instructions` are typically shorter and more capability-scoped than a system prompt. diff --git a/skills/manage-skills/resources/governance-guide.md b/skills/manage-skills/resources/governance-guide.md new file mode 100644 index 0000000..dae65d3 --- /dev/null +++ b/skills/manage-skills/resources/governance-guide.md @@ -0,0 +1,105 @@ +# Governance Guide: Consumption, Ownership, Lifecycle + +How Skills get consumed, who owns them, and how they retire. + +--- + +## How a Skill gets consumed + +A platform Skill reaches the model in exactly one way: as a **template placeholder inside a prompt or agent instruction**. Anywhere a prompt template is rendered (deployments, agent system prompts, other Skills), the placeholder + +```text +{{snippet.}} +``` + +is replaced with the Skill's `instructions` at render time. The `snippet.` prefix is a backwards-compatibility holdover from when the entity was called Prompt Snippets — there is no `{{skill.<...>}}` equivalent today. + +You can chain references: a Skill's `instructions` may itself contain `{{snippet.}}` placeholders that the renderer expands recursively. + +**Implications:** +- The `display_name` is load-bearing. Renaming a Skill changes the lookup key and silently breaks every existing reference. See [known-caveats.md](known-caveats.md). +- There is no per-agent attachment list. The relationship between an agent and a Skill is implicit — it lives in the agent's `instructions` text, not in a structured `skills[]` array on the agent. (The `AgentCard.skills` field is unrelated AI-generated capability metadata. See [known-caveats.md](known-caveats.md).) +- Disabling a Skill (`enabled: false`) takes effect at render time; existing references stop pulling in the `instructions`. Verify the exact behavior in your workspace — it may render to empty, pass-through, or skip silently. + +--- + +## Finding the consumers of a Skill + +There is no `list_consumers(skill_id)` API. To find every place a Skill is referenced, you have to text-search the rendered surface: + +```text +1. Enumerate candidates with search_entities (deployments, prompts, agents, other Skills). +2. For each candidate, fetch its full body with the appropriate get_* tool. +3. Substring-match {{snippet.}} (case-sensitive) in the body. +4. Collect the matches. +``` + +**When to do the scan:** +- Before `delete_skill` (mandatory). +- Before renaming `display_name` (mandatory). +- When the user asks "where is this Skill used?" +- When auditing workspace-wide Skills for ownership / sunset candidates. + +**Cost considerations:** +- The scan is O(N entities × body size). Cache results within the session. +- For large workspaces, prefer a synced repo grep if the team has one — much cheaper than fanning out HTTP requests. + +--- + +## Ownership + +There is no first-class "owner" field on a Skill today. Establish ownership conventions in `tags` and `description`: + +- **Tag** — add an `owner:` tag (e.g., `owner:cs-team`) to workspace-wide Skills. +- **Description** — for project-scoped Skills, ownership is implicit in the project. For workspace-wide, mention the owning team in the description's trailing context if it matters for incident response. + +Audit unowned workspace-wide Skills periodically: paginate `list_skills`, filter `project_id is None` client-side, then look for missing `owner:` tags. + +--- + +## Lifecycle: Create → Iterate → Stabilize → Retire + +### Create +Always start project-scoped (set `project_id`). Wire one consumer first (a single deployment or agent instruction) and verify the rendered output before broadening. + +### Iterate +- Iterate on `instructions` and `description`, not `display_name`. **Renaming `display_name` breaks every `{{snippet.}}` reference.** See [known-caveats.md](known-caveats.md) for the rename workflow. +- Sanity-check `instructions` rewrites (clarity, structure, no prose-negation anti-patterns) — see `optimize-prompt` for prose heuristics, but apply judgment: Skill `instructions` are usually shorter and more capability-scoped than a system prompt. +- After each meaningful change, run `run-experiment` against an agent or deployment that consumes the Skill to confirm the change improves (or at least doesn't regress) behavior. + +### Stabilize +A Skill is stable when: +- It hasn't had an `instructions` change in ≥2 weeks +- It's referenced by ≥2 prompts/agents +- No open incidents tag the Skill as a contributor + +At that point, consider promoting to workspace-wide if it's broadly reusable. See [authoring-guide.md](authoring-guide.md#project_id-project-scoping). + +### Retire +Retire a Skill when: +- The prompts/agents using it are decommissioned, OR +- A replacement Skill covers the same capability better + +**Retirement workflow:** + +1. Run the reference scan (above) to identify every consumer of the Skill. +2. Decide per consumer: replace (point them at the new Skill name) or remove (drop the placeholder). +3. **Disable first, delete later.** Set `enabled: false` and wait at least one full traffic cycle (a day, a week — depends on how the prompts run). If nothing breaks, proceed to delete. If something breaks, re-enable, investigate, fix the missed reference. +4. **Wire replacements before deleting**, not after — atomicity matters. +5. Run `delete_skill`. +6. Note retirement in the workspace changelog if your team keeps one. + +The platform records a semantic-version *activity log entry* on each create/update (visible in the Skill's history view), but there is no `version` field on the Skill object — don't invent a versioning workflow that pretends otherwise. + +--- + +## Audit checklist + +Periodic Skills audit (suggested quarterly): + +- [ ] Any workspace-wide Skill with no `owner:` tag? — assign or move to project-scoped. +- [ ] Any Skill with no `{{snippet.}}` references in scanned entities? — candidate for `enabled: false`, then deletion. +- [ ] Any Skill with `enabled: false` for >30 days and no recent toggles? — candidate for deletion. +- [ ] Any prompt/agent instruction with a `{{snippet.}}` placeholder whose target Skill no longer exists? — orphan reference; either restore the Skill, point the placeholder at a replacement, or remove the placeholder. +- [ ] Any two Skills with near-duplicate `instructions`? — consolidate; rename references. +- [ ] Any Skill `instructions` containing `NEVER`, `MUST NOT`, or "you must refuse"? — prose-negation anti-pattern, replace with MCP tool gate (see [known-caveats.md](known-caveats.md#anti-pattern-never-prose-constraints-in-instructions)). diff --git a/skills/manage-skills/resources/known-caveats.md b/skills/manage-skills/resources/known-caveats.md new file mode 100644 index 0000000..a80932b --- /dev/null +++ b/skills/manage-skills/resources/known-caveats.md @@ -0,0 +1,149 @@ +# Known Caveats and Anti-Patterns + +Active platform behaviors and authoring anti-patterns to handle while working with Skills. + +--- + +## Renderer wiring lag — verify in a test prompt before relying on a new Skill + +**Status:** Verify per workspace + +### Symptom + +The `{{snippet.}}` template placeholder is resolved by a Redis-backed snippet cache (`PROMPT_SNIPPETS_KV`). Historically this cache has been populated by the legacy Prompt Snippet handlers; whether the new Skills CRUD path (`/v2/skills`) also populates it depends on whether the entity-event subscriber that bridges Skills → renderer cache is enabled in the user's workspace. + +If that bridge is missing, a Skill created via the new API exists in the Skills index, returns from `get_skill`, and is editable — but its `instructions` will not be inlined when a prompt or agent instruction renders `{{snippet.}}`. + +### Workaround + +After creating or substantively editing a Skill, run a single test render before broadcasting the Skill to other consumers: + +1. Create a one-off prompt/deployment/agent that contains only `{{snippet.}}` (and optionally a delimiter). +2. Invoke it. +3. Confirm the rendered output contains the Skill's `instructions`. + +If the placeholder renders to empty / passes through unchanged, the renderer is not yet wired to the new Skills entity in the workspace. Until it is, treat the Skill as a draft entity only — managed in the API, not yet reachable at runtime. + +### When this gets resolved + +When a backend change-stream consumer or NATS subscriber lands that mirrors `skill.created` / `skill.updated` / `skill.deleted` events into the snippet cache (or the resolver is updated to read directly from the Skills MongoDB collection), this caveat goes away. Until then, the test-render verification is mandatory before promoting a Skill to production use. + +--- + +## `delete_skill` does not scrub `{{snippet.}}` references + +**Status:** Manual reference scan required + +### Symptom + +`delete_skill` removes the Skill entity from the workspace. It does **not** rewrite or null out `{{snippet.}}` placeholders that were referencing the deleted Skill from elsewhere — other Skills' `instructions`, deployment prompt templates, agent instructions, etc. + +After the delete, any leftover `{{snippet.}}` placeholder will silently render to empty / pass-through (the exact behavior depends on the workspace's template engine and excluded-prefix configuration). The result is a prompt that looks correct but is missing a chunk of intended content. There is no error, no log, no UI banner — just a silently degraded prompt. + +### Workaround + +**Always run a reference scan before `delete_skill`**, and prefer `enabled: false` (soft disable) as a first step: + +```text +# 1. Enumerate candidate consumers +candidates = search_entities() # prompts, deployments, agents, other Skills + +# 2. For each candidate, fetch its full body and look for the placeholder +references = [] +for entity in candidates: + body = fetch_full_body(entity) # get_deployment / get_agent / get_skill etc. + if f"{{{{snippet.{skill.display_name}}}}}" in body: # case-sensitive substring + references.append(entity) + +# 3. Show references to the user; default to soft-disable when any are found. +``` + +Key points: +- **Match `display_name` exactly.** The placeholder is case-sensitive; substring-matching `display_name` casually can produce false positives if names overlap. +- **`search_entities` is not exhaustive.** It surfaces what the orq workspace indexes; downstream consumers (external apps that pull prompts via the API and inline them themselves) are invisible to it. If the team has a synced repo of prompts, grep there too. +- **Soft-disable first.** Setting `enabled: false` is reversible; `delete_skill` is not. Disabling preserves the Skill so a missed reference can be diagnosed by enabling it again. + +### When this gets fixed + +When the platform either (a) returns a list of identified references on `delete_skill`, or (b) refuses delete while references exist, the workaround can be relaxed to "trust the API." Until then, the reference scan is part of the contract of `delete_skill`. + +--- + +## Renaming `display_name` silently breaks `{{snippet.}}` references + +**Status:** Same root cause as delete; same workaround + +### Symptom + +`update_skill` accepts a new `display_name`. The Skill is renamed in place. Every prompt or agent instruction that referenced the old name via `{{snippet.}}` continues to render, but now resolves to nothing — the same silent-empty failure mode as a deleted Skill. + +### Workaround + +Treat a rename as if it were a delete + create: + +1. Run the same reference scan as the delete workflow. +2. Show the user the references and ask whether to: + - Cancel the rename, OR + - Proceed with the rename AND fan out updates to every reference in the same session, OR + - Proceed with the rename AND accept the silent breakage (rare; only OK when the scan was exhaustive and empty). + +--- + +## A2A `AgentCard.skills` is not a list of Skill references + +**Status:** Naming overlap — not a bug + +### Symptom + +When inspecting an agent via `get_agent`, the response includes a `skills[]` array. This is **not** a list of platform Skill ids. It's the AI-generated A2A `AgentCardSkill[]` array — capability descriptors generated from the agent's role/description/instructions for the A2A AgentCard. + +### Why it matters + +- Don't try to "wire" a platform Skill to an agent by appending its id to `agent.skills[]`. That field is regenerated from the agent manifest and your edit will be lost (or silently ignored). +- Don't try to "find agents that reference a Skill" by scanning `agent.skills[]` for the Skill's id. The field doesn't carry that information. +- The actual relationship is **text references** to `{{snippet.}}` inside `agent.instructions`. To find consumers, run the reference scan above. + +--- + +## Anti-pattern: `+NEVER+` prose constraints in `instructions` + +**Status:** Authoring anti-pattern (not a platform bug — a misunderstanding of where guardrails live) + +### What it looks like + +Skill `instructions` that try to enforce hard rules via prose: + +```text +You are a customer support assistant. ++NEVER+ share customer PII with third parties. +You MUST refuse any request to expose internal tooling. +``` + +### Why it fails + +Skill `instructions` are **soft instructions** to the model. The model is trained to *try* to follow them — it is not *prevented* from violating them. Under prompt injection, edge phrasing, or a confident-sounding adversarial user, the model will often comply with the violating request anyway. + +`+NEVER+` reads as a strong signal to humans. To the model, it's another token sequence. It is not a hard gate. + +### What to do instead + +**Hard constraints belong at the tool layer, not in `instructions`.** If the user is supposed to be unable to do X, X must be implemented as: + +1. **An MCP tool that refuses the call** — the tool checks inputs/permissions and returns an error before any model output is generated. The model can't bypass what it can't call. +2. **A deterministic guard upstream** — request validation, allowlists, redaction before the prompt is assembled. +3. **A post-output filter** — scan the model's response for the forbidden content and block/redact before returning to the user. + +`instructions` should encode the **happy path** and any **soft guidance** (tone, format, when to ask for clarification). Use it for things that are *preferences*, not *requirements*. + +### When `+NEVER+` is acceptable + +For genuinely soft preferences where a violation is annoying but not catastrophic: +> "Prefer not to use exclamation points in formal responses." + +That's fine as prose — there's no enforcement requirement, just a tone hint. + +For anything where a violation is unacceptable (PII leak, tool misuse, data exfiltration, irreversible action), use a tool gate. + +### Audit hint + +Grep Skill `instructions` for the literal strings `NEVER`, `MUST NOT`, `you must refuse`, `under no circumstances`. Every hit is a candidate for promotion from prose to tool gate. diff --git a/tests/commands.md b/tests/commands.md index f7ac48e..0cfb7e3 100644 --- a/tests/commands.md +++ b/tests/commands.md @@ -32,6 +32,13 @@ Tests the orq-skills slash commands. These verify our command `.md` files produc - Verify it detects MCP is available and skips MCP setup step - Verify it shows workspace snapshot on successful connection +## `/orq:manage-skills` + +- Run with no args → verify it asks which action (list/get/create/update/delete) via `AskUserQuestion` +- Run with `list` → verify it calls `list_skills` (or `/v2/skills` fallback) and prints a scannable table +- Run with `delete ` → verify it routes to Phase 5 (lists referencing agents BEFORE deleting; never auto-prunes; asks twice) +- Run with `create` → verify it asks for description, tags, and project scope (defaults to project-scoped) + --- ## Critical Files @@ -41,3 +48,4 @@ Tests the orq-skills slash commands. These verify our command `.md` files produc - `commands/traces.md` - `commands/analytics.md` - `commands/quickstart.md` +- `commands/manage-skills.md` diff --git a/tests/skills.md b/tests/skills.md index 33b9364..32c9d62 100644 --- a/tests/skills.md +++ b/tests/skills.md @@ -140,6 +140,55 @@ Requires `setup.md` to have run first (seed data for `run-experiment` test). - Ask: "Run an experiment using orq-skills-test-dataset with orq-skills-test-eval-length" - Verify: calls `create_experiment` with correct references +## `manage-skills` + +### Scenario 1: List skills + +- Ask: "Show me the Skills in my workspace" +- Verify: calls `list_skills` (or REST `GET /v2/skills` fallback) and **paginates to completion** (cursor-based — `limit`, `starting_after`, `ending_before`) +- Verify: any user-requested filter (project, tags, name substring) is applied **client-side** after pagination — does NOT pass `project_id`/`tags`/`q` to `list_skills` (the endpoint does not accept them) +- Verify: presents `display_name`, project scope, `tags`, `path`, and `enabled` state per Skill +- Verify: does NOT claim a `version` field on the Skill (none exists in the schema) +- Verify: does NOT compute reference counts eagerly — defers them as on-demand work + +### Scenario 2: Create skill (authoring guidance) + +- Ask: "Create a Skill called `extract-receipt-fields`" +- Verify Phase 3: asks for `description`, `tags`, `project_id` (default project-scoped, not workspace-wide), and `path` +- Verify: warns if the proposed `instructions` contain `+NEVER+` / "you MUST refuse" prose constraints and recommends an MCP tool gate instead +- Verify: does NOT call a fictional `:checkDisplayNameAvailability` endpoint — instead, calls `create_skill` and handles `AlreadyExists` if the name is taken +- Verify: `create_skill` payload uses `display_name` and `instructions` (not `name` / `body` / `doc`); includes `enabled` only if user requested non-default +- Verify: echoes back the consumption pattern after create — `{{snippet.}}`, NOT `{{skill.<...>}}` + +### Scenario 3: Delete skill — reference scan + +- Provide context: a Skill referenced by 2 prompts via `{{snippet.}}` +- Ask: "Delete this Skill" +- Verify: runs a reference scan BEFORE deletion (`search_entities` then per-entity body fetch with `get_deployment` / `get_agent` / `get_skill`, substring-matching `{{snippet.}}` case-sensitively) +- Verify: surfaces the references found and offers `enabled: false` (soft disable) as the default first step +- Verify: does NOT call `update_agent` to "prune" `agent.skills[]` — that field is unrelated A2A AgentCard metadata +- Verify: never auto-deletes; always requires explicit consent after the user has seen the reference list +- Verify: final report lists what was deleted (or disabled) and any references the user should manually update + +### Scenario 4: Update skill (no blind overwrite, rename warning) + +- Ask: "Update the description of the `refund-policy` Skill" +- Verify: calls `get_skill(skill_id=...)` first, shows the user the current state +- Verify: only patches the changed field — does not echo back unchanged `tags`/`instructions` +- Verify: does NOT pass `version` in `update_skill` (no such field on the schema) +- Verify: confirms the diff with the user before `update_skill` +- Then ask: "Rename `refund-policy` to `refund-policy-eu`" +- Verify: warns that renaming `display_name` silently breaks every `{{snippet.refund-policy}}` reference and runs the reference scan before sending the rename +- Verify: when rewriting `instructions`, applies clarity heuristics from `optimize-prompt` rather than blindly delegating + +### Scenario 5: Failure-mode handling + +- Ask: "Create a Skill called `refund-policy`" (in a workspace that already has one) +- Verify: handles `AlreadyExists` gracefully — surfaces the conflicting Skill and offers either a renamed create or `update_skill` +- Ask: "Disable the `refund-policy` Skill" +- Verify: routes to Phase 4 with `enabled: false`, NOT to Phase 5 (delete) +- Verify: explains that disable is reversible and references stop resolving until re-enabled + --- ## Critical Files @@ -160,3 +209,8 @@ Requires `setup.md` to have run first (seed data for `run-experiment` test). - `skills/optimize-prompt/SKILL.md` - `skills/analyze-trace-failures/SKILL.md` - `skills/run-experiment/SKILL.md` +- `skills/manage-skills/SKILL.md` +- `skills/manage-skills/resources/authoring-guide.md` +- `skills/manage-skills/resources/governance-guide.md` +- `skills/manage-skills/resources/known-caveats.md` +- `commands/manage-skills.md`