diff --git a/CHANGELOG.md b/CHANGELOG.md index 029833b..191cf9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,24 @@ All notable changes to cc-taskrunner will be documented in this file. Format follows [Keep a Changelog](https://keepachangelog.com/). +## [1.6.0] — 2026-04-11 + +### Added +- **Ratchet mode — measure-before-after validation for autonomous improvements** (#16). The runner now captures a baseline snapshot of `npm run typecheck` + `npm test` pass/fail on `main` before creating the task branch, re-runs the same checks on the branch after the task commits, and automatically reverts the task (delete branch, skip push/PR, mark failed) when a check transitioned `pass → fail`. Gates regressions from reaching origin. + + **Opt-in paths:** + - `"ratchet": true` in the task JSON (explicit per-task) + - Category defaults: `refactor` and `bugfix` ratchet automatically + - `CC_RATCHET=1` environment override (force-enable every task) + + **Never ratcheted:** `docs`, `tests`, `research`, `deploy` — no regression surface or outcomes aren't code-level. + + **Decision rule:** only `pass → fail` transitions revert. `fail → fail` (unchanged broken surface) and `skip → fail` (first-time check on pre-existing breakage) are both `keep`. `fail → pass` is `keep` (improvement). + + **Env knobs:** `CC_RATCHET=1|0`, `CC_RATCHET_TIMEOUT=` (default 180), `CC_DISABLE_RATCHET=1` (legacy alias). + + Applied symmetrically to `taskrunner.sh` and `plugin/taskrunner.sh`. Pure bash + python3 — zero new dependencies. Degrades to no-op when the repo has no `typecheck` or `test` script. Only runs on branch-isolated tasks (operator-authority tasks skip ratchet entirely). + ## [1.5.0] — 2026-04-09 ### Added diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..f9027e1 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,45 @@ +# Security + +For the full Stackbilt security policy, see https://docs.stackbilt.dev/security/. + +## Reporting a Vulnerability + +**Do not open a public GitHub issue for security vulnerabilities.** + +### How to report + +- **Primary channel:** email `admin@stackbilt.dev` with "SECURITY:" in the subject line +- **GitHub Security Advisory:** https://github.com/Stackbilt-dev/cc-taskrunner/security/advisories/new +- Include: vulnerability description, reproduction steps, potential impact, and any suggested mitigation + +### Response targets + +| Severity | Acknowledgement | Fix target | +|---|---|---| +| Critical — active exploitation, data exposure | 24 hours | 7 days | +| High — exploitable with effort | 48 hours | 14 days | +| Medium / Low | 5 business days | Next release cycle | + +These are targets, not contractual SLAs. Stackbilt is a solo-founder operation and response times reflect that reality honestly. Critical issues affecting user data are prioritized above everything else. + +### Scope + +This policy covers all software published in this repository. For the full policy covering the entire Stackbilt-dev organization, see the [canonical security policy](https://docs.stackbilt.dev/security/). + +### Out of scope + +- Denial of service against free-tier services (Cloudflare handles DDoS) +- Rate limiting bypass on non-authenticated endpoints (unless it enables data access) +- Missing security headers on non-production deployments +- Vulnerabilities in third-party dependencies where this repo is not the upstream maintainer + +### Disclosure + +- Stackbilt practices **coordinated disclosure** with a minimum 90-day window (30 days for critical). +- Reporters are credited in release notes unless anonymity is requested. +- Good-faith security research within this policy will not face legal action. + +### Contact + +- **Primary:** admin@stackbilt.dev +- **Canonical policy:** https://docs.stackbilt.dev/security/ diff --git a/plugin/taskrunner.sh b/plugin/taskrunner.sh index f32bade..33afd0d 100644 --- a/plugin/taskrunner.sh +++ b/plugin/taskrunner.sh @@ -143,6 +143,77 @@ print("\n".join(lines)) ' } +# ─── Ratchet mode (#16) ───────────────────────────────────── +# Measure-before-after validation. See taskrunner.sh for the full doc — +# this is the parallel plugin copy. Keep them in sync. + +ratchet_enabled_for_task() { + local task_json="$1" + if [[ "${CC_DISABLE_RATCHET:-0}" = "1" ]]; then return 1; fi + if [[ "${CC_RATCHET:-}" = "0" ]]; then return 1; fi + if [[ "${CC_RATCHET:-}" = "1" ]]; then return 0; fi + local explicit category + explicit=$(echo "$task_json" | python3 -c 'import json,sys; v=json.load(sys.stdin).get("ratchet"); print("" if v is None else str(v).lower())' 2>/dev/null) + category=$(echo "$task_json" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("category", ""))' 2>/dev/null) + if [[ "$explicit" = "true" ]]; then return 0; fi + if [[ "$explicit" = "false" ]]; then return 1; fi + case "$category" in + refactor|bugfix) return 0 ;; + docs|tests|research|deploy) return 1 ;; + *) return 1 ;; + esac +} + +ratchet_snapshot() { + local repo_path="$1" label="$2" + local timeout_secs="${CC_RATCHET_TIMEOUT:-180}" + local tc_status="skip" test_status="skip" + if [[ -f "${repo_path}/package.json" ]] && command -v python3 >/dev/null 2>&1; then + local has_typecheck has_test + has_typecheck=$(python3 -c 'import json,sys; d=json.load(open(sys.argv[1])); print("1" if "typecheck" in d.get("scripts", {}) else "0")' "${repo_path}/package.json" 2>/dev/null || echo 0) + has_test=$(python3 -c 'import json,sys; d=json.load(open(sys.argv[1])); print("1" if "test" in d.get("scripts", {}) else "0")' "${repo_path}/package.json" 2>/dev/null || echo 0) + if [[ "$has_typecheck" = "1" ]]; then + if ( cd "$repo_path" && timeout "$timeout_secs" npm run typecheck >/dev/null 2>&1 ); then + tc_status="pass" + else + tc_status="fail" + fi + fi + if [[ "$has_test" = "1" ]]; then + if ( cd "$repo_path" && timeout "$timeout_secs" npm test >/dev/null 2>&1 ); then + test_status="pass" + else + test_status="fail" + fi + fi + fi + printf '{"label":"%s","typecheck":"%s","test":"%s"}' "$label" "$tc_status" "$test_status" +} + +ratchet_decision() { + local baseline="$1" post="$2" + local bt pt bx px + bt=$(echo "$baseline" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("typecheck","skip"))' 2>/dev/null || echo skip) + pt=$(echo "$post" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("typecheck","skip"))' 2>/dev/null || echo skip) + bx=$(echo "$baseline" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("test","skip"))' 2>/dev/null || echo skip) + px=$(echo "$post" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("test","skip"))' 2>/dev/null || echo skip) + local reverts=() + if [[ "$bt" = "pass" && "$pt" = "fail" ]]; then + reverts+=("typecheck regression (pass → fail)") + fi + if [[ "$bx" = "pass" && "$px" = "fail" ]]; then + reverts+=("test regression (pass → fail)") + fi + if [[ ${#reverts[@]} -gt 0 ]]; then + local reason + reason=$(IFS=', '; echo "${reverts[*]}") + echo "revert: $reason" + return 1 + fi + echo "keep: baseline=tc:${bt},test:${bx} post=tc:${pt},test:${px}" + return 0 +} + # ─── Queue management ─────────────────────────────────────── init_queue() { @@ -375,6 +446,10 @@ execute_task() { local branch="" local use_branch=false local stashed=false + # Ratchet state — initialized up front so the post-validation block + # can reference them on paths that skip branch creation entirely. + local RATCHET_ENABLED=false + local RATCHET_BASELINE="" cd "$repo_path" # Non-operator tasks get their own branch @@ -407,6 +482,14 @@ execute_task() { git checkout main 2>/dev/null || git checkout master 2>/dev/null git pull --ff-only 2>/dev/null || true + # ── Ratchet baseline capture (#16) ──────────────────────── + if ratchet_enabled_for_task "$task_json"; then + RATCHET_ENABLED=true + log "│ Ratchet: capturing baseline on main…" + RATCHET_BASELINE="$(ratchet_snapshot "$repo_path" baseline)" + log "│ Ratchet baseline: ${RATCHET_BASELINE}" + fi + # Create or reset task branch if git rev-parse --verify "$branch" >/dev/null 2>&1; then git checkout "$branch" @@ -543,6 +626,36 @@ Task: ${title}" 2>/dev/null || true commit_count=$((commit_count + 1)) fi + # ── Ratchet post-validation (#16) ───────────────────────── + local ratchet_verdict="" + if [[ "$RATCHET_ENABLED" = "true" && "$commit_count" -gt 0 ]]; then + log "│ Ratchet: validating branch…" + local ratchet_post + ratchet_post="$(ratchet_snapshot "$repo_path" post)" + log "│ Ratchet post: ${ratchet_post}" + ratchet_verdict="$(ratchet_decision "$RATCHET_BASELINE" "$ratchet_post")" + local ratchet_rc=$? + log "│ Ratchet verdict: ${ratchet_verdict}" + + if [[ $ratchet_rc -ne 0 ]]; then + log "│ Ratchet REVERT — dropping branch and skipping PR" + git checkout main 2>/dev/null || git checkout master 2>/dev/null + git branch -D "$branch" 2>/dev/null || true + branch="" + commit_count=0 + if [[ "$stashed" == "true" ]]; then + git stash pop 2>/dev/null && log "│ Restored stashed changes" || true + stashed=false + fi + result_text="[ratchet_revert] ${ratchet_verdict} + +${result_text}" + update_task_status "$task_id" "failed" "ratchet revert: ${ratchet_verdict}" + log "└─ Task reverted by ratchet gate" + return 1 + fi + fi + # Push and create PR if there are commits if [[ "$commit_count" -gt 0 ]]; then log "│ Pushing ${commit_count} commit(s) to ${branch}..." diff --git a/taskrunner.sh b/taskrunner.sh index 192c7f6..ad682d0 100644 --- a/taskrunner.sh +++ b/taskrunner.sh @@ -196,6 +196,126 @@ print("\n".join(lines)) ' } +# ─── Ratchet mode (#16) ───────────────────────────────────── +# Measure-before-after validation for autonomous improvements. +# Captures baseline metrics on main BEFORE the branch exists, re-runs the +# same checks on the branch AFTER the task commits, and reverts the task +# (deletes branch, skips push/PR) if metrics regressed. +# +# Opt-in paths: +# 1. Explicit per-task field: `"ratchet": true` in the task JSON +# 2. Category default: `refactor` and `bugfix` tasks ratchet automatically +# 3. Environment override: `CC_RATCHET=1` enables for every task +# (override with `CC_RATCHET=0`) +# +# Categories NEVER ratcheted (signal noise > signal value): +# - docs, tests — no regression surface +# - research, deploy — outcomes aren't code-level +# +# Checks captured in the snapshot (each is independent and degrades to +# `skip` when not applicable to the target repo): +# - typecheck: `npm run typecheck` exit code +# - tests: `npm test` exit code +# +# Regression rule: if a check transitioned from `pass` → `fail`, the +# branch is reverted. `skip → fail` is NOT a regression (first time the +# check ran, pre-existing breakage). `fail → fail` is NOT a regression +# either — the task wasn't expected to fix that surface. +# +# Environment knobs: +# CC_RATCHET=1|0 — force-enable or force-disable (overrides task fields) +# CC_RATCHET_TIMEOUT= — per-check timeout (default: 180) +# CC_DISABLE_RATCHET=1 — legacy alias for CC_RATCHET=0 + +ratchet_enabled_for_task() { + local task_json="$1" + + # Environment force-overrides win + if [[ "${CC_DISABLE_RATCHET:-0}" = "1" ]]; then return 1; fi + if [[ "${CC_RATCHET:-}" = "0" ]]; then return 1; fi + if [[ "${CC_RATCHET:-}" = "1" ]]; then return 0; fi + + local explicit category + explicit=$(echo "$task_json" | python3 -c 'import json,sys; v=json.load(sys.stdin).get("ratchet"); print("" if v is None else str(v).lower())' 2>/dev/null) + category=$(echo "$task_json" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("category", ""))' 2>/dev/null) + + # Explicit per-task flag wins over category defaults + if [[ "$explicit" = "true" ]]; then return 0; fi + if [[ "$explicit" = "false" ]]; then return 1; fi + + case "$category" in + refactor|bugfix) return 0 ;; + docs|tests|research|deploy) return 1 ;; + *) return 1 ;; + esac +} + +# ratchet_snapshot