From ba05c9ef308944fc52e2a1e778b522ec76887630 Mon Sep 17 00:00:00 2001 From: fullsend-code <278716306+fullsend-ai-coder[bot]@users.noreply.github.com> Date: Tue, 16 Jun 2026 20:02:55 +0000 Subject: [PATCH 1/2] feat(#2095): add verified/unchecked variable fields to review findings schema Add optional `verified_variables` and `unchecked_variables` array fields to the finding object in review-result.schema.json. These fields force the review agent's security sub-agent to structurally enumerate which variables it verified as sanitized and which it did not, preventing blanket safety claims from partial verification. Changes: - review-result.schema.json: add both arrays (string items, minLength 1) to the finding definition under additionalProperties: false - security.md (sub-agent): update verification methodology step 3 to require populating the new structured fields instead of prose - code-review/SKILL.md: update exhaustive security-control verification guidance to reference the new fields - agents/review.md: add new fields to the finding object documentation table - validate-output-schema-test.sh: add three test cases (partial coverage valid, full coverage valid, empty-string variable rejected) and update the allowed-properties assertion Pre-commit could not run (shellcheck install failed due to network restrictions in the sandbox). The post-script runs pre-commit authoritatively. Closes #2095 --- .../scaffold/fullsend-repo/agents/review.md | 2 ++ .../schemas/review-result.schema.json | 10 ++++++++++ .../scripts/validate-output-schema-test.sh | 20 ++++++++++++++++++- .../fullsend-repo/skills/code-review/SKILL.md | 12 ++++++----- .../skills/pr-review/sub-agents/security.md | 12 +++++++---- 5 files changed, 46 insertions(+), 10 deletions(-) diff --git a/internal/scaffold/fullsend-repo/agents/review.md b/internal/scaffold/fullsend-repo/agents/review.md index dc286129b..c4704b630 100644 --- a/internal/scaffold/fullsend-repo/agents/review.md +++ b/internal/scaffold/fullsend-repo/agents/review.md @@ -278,6 +278,8 @@ fields such as `outcome`, `summary`, `prior_review_sha`, or | `description` | string | yes | Finding description (min 1 char) | | `remediation` | string | no | Suggested fix | | `actionable` | boolean | no | When true on low/info findings in an `approve` result, marks the finding for future follow-up issue creation (temporarily disabled; see #1137) | +| `verified_variables` | array | no | Variables confirmed as having the security control applied. Required in findings that identify a sanitization or security control function. | +| `unchecked_variables` | array | no | Variables in the same context not confirmed as having the security control. Required in findings that identify a sanitization or security control function. | Schema validation failures trigger a harness retry iteration. The jq examples below show the exact JSON shape for each action. diff --git a/internal/scaffold/fullsend-repo/schemas/review-result.schema.json b/internal/scaffold/fullsend-repo/schemas/review-result.schema.json index 4c4227a89..e6ff7281d 100644 --- a/internal/scaffold/fullsend-repo/schemas/review-result.schema.json +++ b/internal/scaffold/fullsend-repo/schemas/review-result.schema.json @@ -64,6 +64,16 @@ "actionable": { "type": "boolean", "description": "True when this non-blocking finding should be tracked as a follow-up issue if the review approves." + }, + "verified_variables": { + "type": "array", + "items": { "type": "string", "minLength": 1 }, + "description": "Variables in the security-sensitive context that were verified as having the security control applied." + }, + "unchecked_variables": { + "type": "array", + "items": { "type": "string", "minLength": 1 }, + "description": "Variables in the security-sensitive context that were not verified as having the security control applied." } }, "additionalProperties": false diff --git a/internal/scaffold/fullsend-repo/scripts/validate-output-schema-test.sh b/internal/scaffold/fullsend-repo/scripts/validate-output-schema-test.sh index 44bd813ac..ab5751666 100755 --- a/internal/scaffold/fullsend-repo/scripts/validate-output-schema-test.sh +++ b/internal/scaffold/fullsend-repo/scripts/validate-output-schema-test.sh @@ -173,6 +173,24 @@ run_test_custom_filename "review-finding-additional-property-rejected" \ "${REVIEW_SCHEMA}" \ "false" +run_test_custom_filename "review-finding-with-verified-variables-valid" \ + '{"action":"request-changes","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Sanitization gap.","findings":[{"severity":"high","category":"gha-injection","file":"action.yml","description":"Partial sanitization.","verified_variables":["message"],"unchecked_variables":["source","subtype"]}]}' \ + "agent-result.json" \ + "${REVIEW_SCHEMA}" \ + "true" + +run_test_custom_filename "review-finding-all-variables-verified-valid" \ + '{"action":"request-changes","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"All checked.","findings":[{"severity":"low","category":"gha-injection","file":"action.yml","description":"Full coverage.","verified_variables":["message","source","subtype"],"unchecked_variables":[]}]}' \ + "agent-result.json" \ + "${REVIEW_SCHEMA}" \ + "true" + +run_test_custom_filename "review-finding-verified-variables-empty-string-rejected" \ + '{"action":"request-changes","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Bad var.","findings":[{"severity":"high","category":"gha-injection","file":"action.yml","description":"Bad.","verified_variables":[""],"unchecked_variables":["source"]}]}' \ + "agent-result.json" \ + "${REVIEW_SCHEMA}" \ + "false" + # Helper for custom-filename tests that also assert output content. run_test_custom_filename_output() { local test_name="$1" @@ -219,7 +237,7 @@ run_test_custom_filename_output "nested-additional-property-shows-allowed" \ "agent-result.json" \ "${REVIEW_SCHEMA}" \ "false" \ - "allowed properties: actionable, category, description, file, line, remediation, severity" + "allowed properties: actionable, category, description, file, line, remediation, severity, unchecked_variables, verified_variables" # --- Structural failures --- diff --git a/internal/scaffold/fullsend-repo/skills/code-review/SKILL.md b/internal/scaffold/fullsend-repo/skills/code-review/SKILL.md index f67c35a17..5221bf22a 100644 --- a/internal/scaffold/fullsend-repo/skills/code-review/SKILL.md +++ b/internal/scaffold/fullsend-repo/skills/code-review/SKILL.md @@ -118,11 +118,13 @@ dimension carry over to another — each requires its own scrutiny. covers all attack surfaces based on verifying a subset. When you find a security-relevant function applied to one variable, explicitly enumerate ALL other variables in the same context and verify each one - individually. In your findings, state which inputs you verified as - protected and which you could not confirm. If any input lacks the - control, raise a finding even if the unprotected input appears - low-risk — the risk assessment belongs in the finding's severity, not - in a decision to omit the finding. + individually. In your findings, populate `verified_variables` with the + names of inputs you confirmed are protected, and `unchecked_variables` + with inputs you could not confirm. Both arrays must be present whenever + a sanitization or security control function is identified in a finding. + If any input lacks the control, raise a finding even if the unprotected + input appears low-risk — the risk assessment belongs in the finding's + severity, not in a decision to omit the finding. - Content security: does the change affect how user-supplied content is handled or rendered? Are there sandboxing gaps? - **Permission manifest changes:** If the diff modifies any file that diff --git a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/security.md b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/security.md index 3380a91e3..f548e4083 100644 --- a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/security.md +++ b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/security.md @@ -50,10 +50,14 @@ When evaluating any security control, follow this procedure: 2. **Verify each independently.** For each enumerated input, confirm whether the security control is applied. Do not assume that applying the control to one input means others are covered. -3. **Report coverage explicitly.** In your findings, state which - inputs you verified as protected and which you could not confirm. - A finding that says "sanitization is handled" without listing the - verified inputs is incomplete. +3. **Report coverage explicitly.** In your findings, populate the + `verified_variables` array with the names of inputs you confirmed + are protected, and the `unchecked_variables` array with inputs you + could not confirm. A finding that says "sanitization is handled" + without listing the verified inputs is incomplete. Both arrays must + be present whenever a sanitization or security control function is + identified in a finding — omitting them is equivalent to claiming + unverified coverage. 4. **Flag gaps, don't dismiss them.** If any input lacks the security control, raise a finding — even if the unprotected input appears low-risk. The risk assessment belongs in the finding's severity, From f80108104fea123375e31464ec748e1a580da286 Mon Sep 17 00:00:00 2001 From: fullsend-fix <278716306+fullsend-ai-coder[bot]@users.noreply.github.com> Date: Thu, 18 Jun 2026 15:38:37 +0000 Subject: [PATCH 2/2] fix: make verified/unchecked variable arrays required in finding schema Make verified_variables and unchecked_variables unconditionally required in the finding schema so agents must always consciously populate them (using [] for non-security findings). Update jq templates in review.md to include the fields, preventing invalid JSON that would burn retry iterations. Add test for missing required variable arrays. Addresses review feedback on #2363 --- internal/scaffold/fullsend-repo/agents/review.md | 8 ++++---- .../schemas/review-result.schema.json | 2 +- .../scripts/validate-output-schema-test.sh | 16 +++++++++++----- .../fullsend-repo/skills/code-review/SKILL.md | 4 ++-- .../skills/pr-review/sub-agents/security.md | 8 +++----- 5 files changed, 21 insertions(+), 17 deletions(-) diff --git a/internal/scaffold/fullsend-repo/agents/review.md b/internal/scaffold/fullsend-repo/agents/review.md index c4704b630..f5277f11a 100644 --- a/internal/scaffold/fullsend-repo/agents/review.md +++ b/internal/scaffold/fullsend-repo/agents/review.md @@ -278,8 +278,8 @@ fields such as `outcome`, `summary`, `prior_review_sha`, or | `description` | string | yes | Finding description (min 1 char) | | `remediation` | string | no | Suggested fix | | `actionable` | boolean | no | When true on low/info findings in an `approve` result, marks the finding for future follow-up issue creation (temporarily disabled; see #1137) | -| `verified_variables` | array | no | Variables confirmed as having the security control applied. Required in findings that identify a sanitization or security control function. | -| `unchecked_variables` | array | no | Variables in the same context not confirmed as having the security control. Required in findings that identify a sanitization or security control function. | +| `verified_variables` | array | yes | Variables confirmed as having the security control applied. Use `[]` for non-security findings. | +| `unchecked_variables` | array | yes | Variables in the same context not confirmed as having the security control. Use `[]` for non-security findings. | Schema validation failures trigger a harness retry iteration. The jq examples below show the exact JSON shape for each action. @@ -307,7 +307,7 @@ jq -n \ --arg repo "" \ --arg head_sha "" \ --arg body "" \ - --argjson findings '' \ + --argjson findings '[{"severity":"low","category":"","file":"","description":"","actionable":true,"verified_variables":[],"unchecked_variables":[]}]' \ '{action: $action, pr_number: $pr_number, repo: $repo, head_sha: $head_sha, body: $body, findings: $findings}' \ > "$FULLSEND_OUTPUT_DIR/agent-result.json" @@ -322,7 +322,7 @@ jq -n \ --arg repo "" \ --arg head_sha "" \ --arg body "" \ - --argjson findings '' \ + --argjson findings '[{"severity":"high","category":"","file":"","description":"","verified_variables":[""],"unchecked_variables":[""]}]' \ '{action: $action, pr_number: $pr_number, repo: $repo, head_sha: $head_sha, body: $body, findings: $findings}' \ > "$FULLSEND_OUTPUT_DIR/agent-result.json" diff --git a/internal/scaffold/fullsend-repo/schemas/review-result.schema.json b/internal/scaffold/fullsend-repo/schemas/review-result.schema.json index e6ff7281d..13ba8fdc2 100644 --- a/internal/scaffold/fullsend-repo/schemas/review-result.schema.json +++ b/internal/scaffold/fullsend-repo/schemas/review-result.schema.json @@ -53,7 +53,7 @@ "$defs": { "finding": { "type": "object", - "required": ["severity", "category", "file", "description"], + "required": ["severity", "category", "file", "description", "verified_variables", "unchecked_variables"], "properties": { "severity": { "type": "string", "enum": ["critical", "high", "medium", "low", "info"] }, "category": { "type": "string", "minLength": 1 }, diff --git a/internal/scaffold/fullsend-repo/scripts/validate-output-schema-test.sh b/internal/scaffold/fullsend-repo/scripts/validate-output-schema-test.sh index ab5751666..884e28774 100755 --- a/internal/scaffold/fullsend-repo/scripts/validate-output-schema-test.sh +++ b/internal/scaffold/fullsend-repo/scripts/validate-output-schema-test.sh @@ -162,13 +162,13 @@ run_test_custom_filename "custom-output-file-invalid" \ "false" run_test_custom_filename "review-approve-actionable-finding-valid" \ - '{"action":"approve","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Approved with follow-ups.","findings":[{"severity":"low","category":"docs","file":"README.md","line":3,"description":"Document the flag.","remediation":"Add a short usage note.","actionable":true}]}' \ + '{"action":"approve","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Approved with follow-ups.","findings":[{"severity":"low","category":"docs","file":"README.md","line":3,"description":"Document the flag.","remediation":"Add a short usage note.","actionable":true,"verified_variables":[],"unchecked_variables":[]}]}' \ "agent-result.json" \ "${REVIEW_SCHEMA}" \ "true" run_test_custom_filename "review-finding-additional-property-rejected" \ - '{"action":"approve","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Approved.","findings":[{"severity":"low","category":"docs","file":"README.md","description":"Document the flag.","unexpected":true}]}' \ + '{"action":"approve","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Approved.","findings":[{"severity":"low","category":"docs","file":"README.md","description":"Document the flag.","unexpected":true,"verified_variables":[],"unchecked_variables":[]}]}' \ "agent-result.json" \ "${REVIEW_SCHEMA}" \ "false" @@ -185,6 +185,12 @@ run_test_custom_filename "review-finding-all-variables-verified-valid" \ "${REVIEW_SCHEMA}" \ "true" +run_test_custom_filename "review-finding-missing-required-variables-rejected" \ + '{"action":"request-changes","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Missing fields.","findings":[{"severity":"high","category":"gha-injection","file":"action.yml","description":"No variable arrays."}]}' \ + "agent-result.json" \ + "${REVIEW_SCHEMA}" \ + "false" + run_test_custom_filename "review-finding-verified-variables-empty-string-rejected" \ '{"action":"request-changes","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Bad var.","findings":[{"severity":"high","category":"gha-injection","file":"action.yml","description":"Bad.","verified_variables":[""],"unchecked_variables":["source"]}]}' \ "agent-result.json" \ @@ -233,7 +239,7 @@ run_test_custom_filename_output() { } run_test_custom_filename_output "nested-additional-property-shows-allowed" \ - '{"action":"approve","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Approved.","findings":[{"severity":"low","category":"docs","file":"README.md","description":"Document the flag.","unexpected":true}]}' \ + '{"action":"approve","pr_number":42,"repo":"owner/repo","head_sha":"abcdef0123456789abcdef0123456789abcdef01","body":"Approved.","findings":[{"severity":"low","category":"docs","file":"README.md","description":"Document the flag.","unexpected":true,"verified_variables":[],"unchecked_variables":[]}]}' \ "agent-result.json" \ "${REVIEW_SCHEMA}" \ "false" \ @@ -365,7 +371,7 @@ run_test_custom_filename "path-traversal-stripped" \ REVIEW_SCHEMA="${SCRIPT_DIR}/../schemas/review-result.schema.json" run_test_custom_filename "review-reject-valid" \ - '{"action":"reject","pr_number":1,"repo":"org/repo","head_sha":"abc1234","body":"Wrong approach.","findings":[{"severity":"high","category":"intent-alignment","file":"main.go","description":"Wrong design."}]}' \ + '{"action":"reject","pr_number":1,"repo":"org/repo","head_sha":"abc1234","body":"Wrong approach.","findings":[{"severity":"high","category":"intent-alignment","file":"main.go","description":"Wrong design.","verified_variables":[],"unchecked_variables":[]}]}' \ "agent-result.json" \ "${REVIEW_SCHEMA}" \ "true" @@ -377,7 +383,7 @@ run_test_custom_filename "review-reject-missing-findings" \ "false" run_test_custom_filename "review-reject-missing-body" \ - '{"action":"reject","pr_number":1,"repo":"org/repo","head_sha":"abc1234","findings":[{"severity":"high","category":"intent-alignment","file":"main.go","description":"Wrong design."}]}' \ + '{"action":"reject","pr_number":1,"repo":"org/repo","head_sha":"abc1234","findings":[{"severity":"high","category":"intent-alignment","file":"main.go","description":"Wrong design.","verified_variables":[],"unchecked_variables":[]}]}' \ "agent-result.json" \ "${REVIEW_SCHEMA}" \ "false" diff --git a/internal/scaffold/fullsend-repo/skills/code-review/SKILL.md b/internal/scaffold/fullsend-repo/skills/code-review/SKILL.md index 5221bf22a..ce60af042 100644 --- a/internal/scaffold/fullsend-repo/skills/code-review/SKILL.md +++ b/internal/scaffold/fullsend-repo/skills/code-review/SKILL.md @@ -120,8 +120,8 @@ dimension carry over to another — each requires its own scrutiny. enumerate ALL other variables in the same context and verify each one individually. In your findings, populate `verified_variables` with the names of inputs you confirmed are protected, and `unchecked_variables` - with inputs you could not confirm. Both arrays must be present whenever - a sanitization or security control function is identified in a finding. + with inputs you could not confirm. Both arrays are required on every + finding (use `[]` for non-security findings). If any input lacks the control, raise a finding even if the unprotected input appears low-risk — the risk assessment belongs in the finding's severity, not in a decision to omit the finding. diff --git a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/security.md b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/security.md index f548e4083..7dcc60463 100644 --- a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/security.md +++ b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/security.md @@ -53,11 +53,9 @@ When evaluating any security control, follow this procedure: 3. **Report coverage explicitly.** In your findings, populate the `verified_variables` array with the names of inputs you confirmed are protected, and the `unchecked_variables` array with inputs you - could not confirm. A finding that says "sanitization is handled" - without listing the verified inputs is incomplete. Both arrays must - be present whenever a sanitization or security control function is - identified in a finding — omitting them is equivalent to claiming - unverified coverage. + could not confirm. Both arrays are required on every finding (use + `[]` for non-security findings). A finding that says "sanitization + is handled" without listing the verified inputs is incomplete. 4. **Flag gaps, don't dismiss them.** If any input lacks the security control, raise a finding — even if the unprotected input appears low-risk. The risk assessment belongs in the finding's severity,