diff --git a/internal/scaffold/fullsend-repo/agents/review.md b/internal/scaffold/fullsend-repo/agents/review.md index dc286129b..f5277f11a 100644 --- a/internal/scaffold/fullsend-repo/agents/review.md +++ b/internal/scaffold/fullsend-repo/agents/review.md @@ -278,6 +278,8 @@ fields such as `outcome`, `summary`, `prior_review_sha`, or | `description` | string | yes | Finding description (min 1 char) | | `remediation` | string | no | Suggested fix | | `actionable` | boolean | no | When true on low/info findings in an `approve` result, marks the finding for future follow-up issue creation (temporarily disabled; see #1137) | +| `verified_variables` | array | yes | Variables confirmed as having the security control applied. Use `[]` for non-security findings. | +| `unchecked_variables` | array | yes | Variables in the same context not confirmed as having the security control. Use `[]` for non-security findings. | Schema validation failures trigger a harness retry iteration. The jq examples below show the exact JSON shape for each action. @@ -305,7 +307,7 @@ jq -n \ --arg repo "" \ --arg head_sha "" \ --arg body "" \ - --argjson findings '' \ + --argjson findings '[{"severity":"low","category":"","file":"","description":"","actionable":true,"verified_variables":[],"unchecked_variables":[]}]' \ '{action: $action, pr_number: $pr_number, repo: $repo, head_sha: $head_sha, body: $body, findings: $findings}' \ > "$FULLSEND_OUTPUT_DIR/agent-result.json" @@ -320,7 +322,7 @@ jq -n \ --arg repo "" \ --arg head_sha "" \ --arg body "" \ - --argjson findings '' \ + --argjson findings '[{"severity":"high","category":"","file":"","description":"","verified_variables":[""],"unchecked_variables":[""]}]' \ '{action: $action, pr_number: $pr_number, repo: $repo, head_sha: $head_sha, body: $body, findings: $findings}' \ > "$FULLSEND_OUTPUT_DIR/agent-result.json" diff --git a/internal/scaffold/fullsend-repo/schemas/review-result.schema.json b/internal/scaffold/fullsend-repo/schemas/review-result.schema.json index 4c4227a89..13ba8fdc2 100644 --- a/internal/scaffold/fullsend-repo/schemas/review-result.schema.json +++ b/internal/scaffold/fullsend-repo/schemas/review-result.schema.json @@ -53,7 +53,7 @@ "$defs": { "finding": { "type": "object", - "required": ["severity", "category", "file", "description"], + "required": ["severity", "category", "file", "description", "verified_variables", "unchecked_variables"], "properties": { "severity": { "type": "string", "enum": ["critical", "high", "medium", "low", "info"] }, "category": { "type": "string", "minLength": 1 }, @@ -64,6 +64,16 @@ "actionable": { "type": "boolean", "description": "True when this non-blocking finding should be tracked as a follow-up issue if the review approves." + }, + "verified_variables": { + "type": "array", + "items": { "type": "string", "minLength": 1 }, + "description": "Variables in the security-sensitive context that were verified as having the security control applied." + }, + "unchecked_variables": { + "type": "array", + "items": { "type": "string", "minLength": 1 }, + "description": "Variables in the security-sensitive context that were not verified as having the security control applied." } }, "additionalProperties": false diff --git a/internal/scaffold/fullsend-repo/scripts/validate-output-schema-test.sh b/internal/scaffold/fullsend-repo/scripts/validate-output-schema-test.sh index 44bd813ac..884e28774 100755 --- a/internal/scaffold/fullsend-repo/scripts/validate-output-schema-test.sh +++ b/internal/scaffold/fullsend-repo/scripts/validate-output-schema-test.sh @@ -162,13 +162,37 @@ run_test_custom_filename "custom-output-file-invalid" \ "false" run_test_custom_filename "review-approve-actionable-finding-valid" \ - '{"action":"approve","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Approved with follow-ups.","findings":[{"severity":"low","category":"docs","file":"README.md","line":3,"description":"Document the flag.","remediation":"Add a short usage note.","actionable":true}]}' \ + '{"action":"approve","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Approved with follow-ups.","findings":[{"severity":"low","category":"docs","file":"README.md","line":3,"description":"Document the flag.","remediation":"Add a short usage note.","actionable":true,"verified_variables":[],"unchecked_variables":[]}]}' \ "agent-result.json" \ "${REVIEW_SCHEMA}" \ "true" run_test_custom_filename "review-finding-additional-property-rejected" \ - '{"action":"approve","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Approved.","findings":[{"severity":"low","category":"docs","file":"README.md","description":"Document the flag.","unexpected":true}]}' \ + '{"action":"approve","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Approved.","findings":[{"severity":"low","category":"docs","file":"README.md","description":"Document the flag.","unexpected":true,"verified_variables":[],"unchecked_variables":[]}]}' \ + "agent-result.json" \ + "${REVIEW_SCHEMA}" \ + "false" + +run_test_custom_filename "review-finding-with-verified-variables-valid" \ + '{"action":"request-changes","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Sanitization gap.","findings":[{"severity":"high","category":"gha-injection","file":"action.yml","description":"Partial sanitization.","verified_variables":["message"],"unchecked_variables":["source","subtype"]}]}' \ + "agent-result.json" \ + "${REVIEW_SCHEMA}" \ + "true" + +run_test_custom_filename "review-finding-all-variables-verified-valid" \ + '{"action":"request-changes","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"All checked.","findings":[{"severity":"low","category":"gha-injection","file":"action.yml","description":"Full coverage.","verified_variables":["message","source","subtype"],"unchecked_variables":[]}]}' \ + "agent-result.json" \ + "${REVIEW_SCHEMA}" \ + "true" + +run_test_custom_filename "review-finding-missing-required-variables-rejected" \ + '{"action":"request-changes","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Missing fields.","findings":[{"severity":"high","category":"gha-injection","file":"action.yml","description":"No variable arrays."}]}' \ + "agent-result.json" \ + "${REVIEW_SCHEMA}" \ + "false" + +run_test_custom_filename "review-finding-verified-variables-empty-string-rejected" \ + '{"action":"request-changes","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Bad var.","findings":[{"severity":"high","category":"gha-injection","file":"action.yml","description":"Bad.","verified_variables":[""],"unchecked_variables":["source"]}]}' \ "agent-result.json" \ "${REVIEW_SCHEMA}" \ "false" @@ -215,11 +239,11 @@ run_test_custom_filename_output() { } run_test_custom_filename_output "nested-additional-property-shows-allowed" \ - '{"action":"approve","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Approved.","findings":[{"severity":"low","category":"docs","file":"README.md","description":"Document the flag.","unexpected":true}]}' \ + '{"action":"approve","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Approved.","findings":[{"severity":"low","category":"docs","file":"README.md","description":"Document the flag.","unexpected":true,"verified_variables":[],"unchecked_variables":[]}]}' \ "agent-result.json" \ "${REVIEW_SCHEMA}" \ "false" \ - "allowed properties: actionable, category, description, file, line, remediation, severity" + "allowed properties: actionable, category, description, file, line, remediation, severity, unchecked_variables, verified_variables" # --- Structural failures --- @@ -347,7 +371,7 @@ run_test_custom_filename "path-traversal-stripped" \ REVIEW_SCHEMA="${SCRIPT_DIR}/../schemas/review-result.schema.json" run_test_custom_filename "review-reject-valid" \ - '{"action":"reject","pr_number":1,"repo":"org/repo","head_sha":"abc1234","body":"Wrong approach.","findings":[{"severity":"high","category":"intent-alignment","file":"main.go","description":"Wrong design."}]}' \ + '{"action":"reject","pr_number":1,"repo":"org/repo","head_sha":"abc1234","body":"Wrong approach.","findings":[{"severity":"high","category":"intent-alignment","file":"main.go","description":"Wrong design.","verified_variables":[],"unchecked_variables":[]}]}' \ "agent-result.json" \ "${REVIEW_SCHEMA}" \ "true" @@ -359,7 +383,7 @@ run_test_custom_filename "review-reject-missing-findings" \ "false" run_test_custom_filename "review-reject-missing-body" \ - '{"action":"reject","pr_number":1,"repo":"org/repo","head_sha":"abc1234","findings":[{"severity":"high","category":"intent-alignment","file":"main.go","description":"Wrong design."}]}' \ + '{"action":"reject","pr_number":1,"repo":"org/repo","head_sha":"abc1234","findings":[{"severity":"high","category":"intent-alignment","file":"main.go","description":"Wrong design.","verified_variables":[],"unchecked_variables":[]}]}' \ "agent-result.json" \ "${REVIEW_SCHEMA}" \ "false" diff --git a/internal/scaffold/fullsend-repo/skills/code-review/SKILL.md b/internal/scaffold/fullsend-repo/skills/code-review/SKILL.md index f67c35a17..ce60af042 100644 --- a/internal/scaffold/fullsend-repo/skills/code-review/SKILL.md +++ b/internal/scaffold/fullsend-repo/skills/code-review/SKILL.md @@ -118,11 +118,13 @@ dimension carry over to another — each requires its own scrutiny. covers all attack surfaces based on verifying a subset. When you find a security-relevant function applied to one variable, explicitly enumerate ALL other variables in the same context and verify each one - individually. In your findings, state which inputs you verified as - protected and which you could not confirm. If any input lacks the - control, raise a finding even if the unprotected input appears - low-risk — the risk assessment belongs in the finding's severity, not - in a decision to omit the finding. + individually. In your findings, populate `verified_variables` with the + names of inputs you confirmed are protected, and `unchecked_variables` + with inputs you could not confirm. Both arrays are required on every + finding (use `[]` for non-security findings). + If any input lacks the control, raise a finding even if the unprotected + input appears low-risk — the risk assessment belongs in the finding's + severity, not in a decision to omit the finding. - Content security: does the change affect how user-supplied content is handled or rendered? Are there sandboxing gaps? - **Permission manifest changes:** If the diff modifies any file that diff --git a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/security.md b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/security.md index 3380a91e3..7dcc60463 100644 --- a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/security.md +++ b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/security.md @@ -50,10 +50,12 @@ When evaluating any security control, follow this procedure: 2. **Verify each independently.** For each enumerated input, confirm whether the security control is applied. Do not assume that applying the control to one input means others are covered. -3. **Report coverage explicitly.** In your findings, state which - inputs you verified as protected and which you could not confirm. - A finding that says "sanitization is handled" without listing the - verified inputs is incomplete. +3. **Report coverage explicitly.** In your findings, populate the + `verified_variables` array with the names of inputs you confirmed + are protected, and the `unchecked_variables` array with inputs you + could not confirm. Both arrays are required on every finding (use + `[]` for non-security findings). A finding that says "sanitization + is handled" without listing the verified inputs is incomplete. 4. **Flag gaps, don't dismiss them.** If any input lacks the security control, raise a finding — even if the unprotected input appears low-risk. The risk assessment belongs in the finding's severity,