JFK · JFK · Jun 13, 2026 · Jun 13, 2026 · Jun 13, 2026 · Jun 13, 2026
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,9 @@
+# Keep command prompt files and the token-baseline snapshot LF-normalized so the
+# size census (tests/token-baseline.sh, issue #87) is byte-for-byte reproducible
+# across Windows/WSL (CRLF risk) and Linux CI (LF).
+commands/*.md text eol=lf
+tests/fixtures/token-baseline.txt text eol=lf
+
+# Shell scripts must be LF so they execute on Linux CI regardless of a
+# contributor's core.autocrlf setting (a CRLF shebang/line breaks `bash`).
+*.sh text eol=lf
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -63,3 +63,9 @@ jobs:
 
       - name: Verify exit_reason and hitl_decision enums stay in sync across config.md / ship.md / review.md
         run: bash tests/enum-sync-check.sh
+
+      - name: Self-test the command-file token-baseline census tool
+        run: bash tests/token-baseline-test.sh
+
+      - name: Command-file token-baseline census (informational — prints table + drift)
+        run: bash tests/token-baseline.sh --check
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -38,6 +38,11 @@ test "$(jq -r .version .claude-plugin/plugin.json)" \
 
 # Frontmatter parses (matches CI)
 python3 .github/workflows/check-frontmatter.py
+
+# Command-file size census — informational table + drift vs snapshot (never fails)
+bash tests/token-baseline.sh --check
+# Self-test for the census tool itself
+bash tests/token-baseline-test.sh
 ```
 
 To try the plugin in a sandbox without publishing:
@@ -98,6 +103,20 @@ The fixture itself is markdown-only, so the measurement is not affected by netwo
 
 Only the four token-efficiency flags (and their config equivalents) carry this measurement obligation. PRs that don't change cascade gating or skill invocation paths can skip the table — `rtk gain` is for verifying claims about token impact, not a universal PR requirement.
 
+## Static command-file size baseline (`tests/token-baseline.sh`)
+
+`rtk gain` above measures *runtime* token consumption. This is the complementary *static* measure: a deterministic census of the command prompt files themselves (`commands/*.md`), used to prove the per-command token reductions in the v0.14.0 optimization milestone (#87) and to catch accidental bloat.
+
+```bash
+bash tests/token-baseline.sh --check    # print the per-file table + drift vs snapshot (always exits 0)
+bash tests/token-baseline.sh --update   # refresh the committed snapshot after an intended change
+```
+
+- `~tokens` is an **approximation** (`bytes / 4`) — no tokenizer is involved; a byte census is enough to track reductions.
+- The committed snapshot lives at `tests/fixtures/token-baseline.txt`. A compression PR is expected to change it — run `--update` and commit the refreshed snapshot as part of the PR so the diff shows the reduction.
+- Byte counts are reproducible across Windows/WSL and Linux CI: `commands/*.md` are pinned to LF via `.gitattributes`, and the script strips `CR` before counting.
+- `--check` is **informational only** (it never hard-fails on growth or shrinkage). A bloat hard-fail guard is intentionally deferred until after the compression milestone.
+
 ## Design principles
 
 A few load-bearing principles that shape what gets accepted into `commands/`:

diff --git a/tests/fixtures/token-baseline.txt b/tests/fixtures/token-baseline.txt
@@ -0,0 +1,13 @@
+# token-baseline snapshot — commands/*.md size census (issue #87)
+# ~tokens is APPROXIMATE (bytes/4); byte counts assume LF (CR stripped, see .gitattributes)
+# columns: file<TAB>lines<TAB>bytes<TAB>~tokens   (regenerate: bash tests/token-baseline.sh --update)
+config.md	379	30846	7711
+doctor.md	471	27637	6909
+goal.md	241	32295	8073
+propose.md	552	24730	6182
+review.md	543	33737	8434
+ship.md	663	48858	12214
+start.md	1107	85109	21277
+status.md	192	11918	2979
+tag.md	425	18580	4645
+TOTAL	4573	313710	78424
diff --git a/tests/token-baseline-test.sh b/tests/token-baseline-test.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+# tests/token-baseline-test.sh
+#
+# Self-test for tests/token-baseline.sh (issue #87, v0.14.0 milestone).
+# Verifies the size-census TOOL behaves to contract — it does NOT assert the
+# snapshot is in sync (the snapshot churns intentionally as commands/*.md are
+# compressed in #89-#92). What it guards:
+#   - the tool exists and is executable
+#   - --check is informational (exits 0) and prints a per-file table + TOTAL
+#   - ~tokens is the documented bytes/4 approximation
+#   - --update is deterministic (idempotent), so snapshot diffs stay to the point
+#   - right after --update, --check reports an exact match
+#
+# Run from anywhere: bash tests/token-baseline-test.sh
+
+set -euo pipefail
+
+SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
+REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
+TOOL="$REPO_ROOT/tests/token-baseline.sh"
+CMD_DIR="$REPO_ROOT/commands"
+SNAPSHOT="$REPO_ROOT/tests/fixtures/token-baseline.txt"
+
+fail() { echo "FAIL: $1"; exit 1; }
+
+[ -x "$TOOL" ] || fail "tests/token-baseline.sh missing or not executable"
+
+# --check must be informational: exit 0 even if the snapshot drifts.
+OUT=$(bash "$TOOL" --check) || fail "--check exited non-zero (must be informational/exit 0)"
+
+# Column header present.
+printf '%s\n' "$OUT" | grep -q 'file' || fail "no column header line containing 'file'"
+
+# One data row per command file, plus a TOTAL row. Use awk with a TAB field
+# separator so we never embed literal tabs in this test.
+NFILES=$(ls "$CMD_DIR"/*.md | wc -l | tr -d ' ')
+NROWS=$(printf '%s\n' "$OUT" | awk -F'\t' 'NF==4 && $1 ~ /\.md$/ {c++} END{print c+0}')
+[ "$NROWS" -eq "$NFILES" ] || fail "expected $NFILES file rows, got $NROWS"
+printf '%s\n' "$OUT" | awk -F'\t' '$1=="TOTAL" && NF==4 {found=1} END{exit found?0:1}' \
+  || fail "no TOTAL row with 4 tab-separated columns"
+
+# ~tokens is bytes/4 (integer) for start.md.
+B=$(printf '%s\n' "$OUT" | awk -F'\t' '$1=="start.md"{print $3}')
+T=$(printf '%s\n' "$OUT" | awk -F'\t' '$1=="start.md"{print $4}')
+[ -n "$B" ] || fail "no start.md row"
+[ "$T" -eq "$((B / 4))" ] || fail "~tokens != bytes/4 for start.md (bytes=$B tokens=$T)"
+
+# --update is deterministic.
+bash "$TOOL" --update >/dev/null
+H1=$(sha1sum "$SNAPSHOT" | cut -d' ' -f1)
+bash "$TOOL" --update >/dev/null
+H2=$(sha1sum "$SNAPSHOT" | cut -d' ' -f1)
+[ "$H1" = "$H2" ] || fail "--update is not deterministic (snapshot hash changed between runs)"
+
+# Right after --update, --check must report an exact match.
+bash "$TOOL" --check | grep -q 'OK: matches snapshot' \
+  || fail "--check should report 'OK: matches snapshot' immediately after --update"
+
+# Drift case — the contract the tool exists to surface (AC #3): when the snapshot
+# drifts, --check must STILL exit 0 (informational, never hard-fails) AND warn.
+printf 'drift-sentinel\t1\t1\t0\n' >> "$SNAPSHOT"
+DRIFT_OUT=$(bash "$TOOL" --check 2>&1) || fail "--check must exit 0 even on snapshot drift (AC #3)"
+printf '%s\n' "$DRIFT_OUT" | grep -q 'WARN: size drift' \
+  || fail "--check should warn on drift (WARN goes to stderr)"
+bash "$TOOL" --update >/dev/null   # restore the snapshot to the real census
+
+echo "PASS: token-baseline.sh ($NFILES command files measured)"
diff --git a/tests/token-baseline.sh b/tests/token-baseline.sh
@@ -0,0 +1,93 @@
+#!/usr/bin/env bash
+# tests/token-baseline.sh
+#
+# Deterministic size census of commands/*.md (issue #87, v0.14.0 milestone).
+# Measures per-command lines / bytes / approximate tokens, prints a table, and
+# compares against the committed snapshot tests/fixtures/token-baseline.txt.
+#
+# WHY: before compressing the command prompt files (#89-#92) we need a
+# deterministic baseline to prove the reduction and catch accidental bloat.
+# Token count is a census, not a sample — no LLM/tokenizer needed.
+#
+# ~tokens is an APPROXIMATION (bytes / 4). There is no real tokenizer here; a
+# byte census is enough to track the compression milestone's reductions.
+#
+# REPRODUCIBILITY: byte counts depend on line endings. This plugin is developed
+# on Windows/WSL (CRLF risk) and CI runs on Linux (LF). Two defenses keep the
+# numbers identical on both: (1) commands/*.md are pinned to LF via
+# .gitattributes (text eol=lf); (2) this script strips CR before measuring, so
+# a CRLF working-tree checkout still yields the same bytes as LF.
+#
+# MODES:
+#   (default) / --check : measure, print the table, diff vs the snapshot, print
+#                         any drift as a WARNING. ALWAYS exits 0 — informational
+#                         only, never hard-fails on reduction or growth. (A
+#                         bloat hard-fail guard is intentionally deferred to a
+#                         follow-up after the compression milestone; see #87.)
+#   --update            : (re)generate the snapshot file in place.
+#
+# Run from anywhere: bash tests/token-baseline.sh [--check|--update]
+
+set -euo pipefail
+
+SCRIPT_DIR=$(cd "$(dirname "$0")" && pwd)
+REPO_ROOT=$(cd "$SCRIPT_DIR/.." && pwd)
+SNAPSHOT="$REPO_ROOT/tests/fixtures/token-baseline.txt"
+CMD_DIR="$REPO_ROOT/commands"
+
+MODE="check"
+case "${1:-}" in
+  --update)      MODE="update" ;;
+  --check | "")  MODE="check" ;;
+  *) echo "usage: token-baseline.sh [--check|--update]" >&2; exit 2 ;;
+esac
+
+# Emit the full snapshot text (header comments + sorted per-file rows + TOTAL).
+# Deterministic: rows are sorted by filename; CR is stripped before counting.
+generate() {
+  local rows="" total_lines=0 total_bytes=0 total_tok=0
+  local f name lines bytes tok
+  for f in "$CMD_DIR"/*.md; do
+    [ -e "$f" ] || continue
+    name=$(basename "$f")
+    lines=$(tr -d '\r' < "$f" | wc -l | tr -d ' ')
+    bytes=$(tr -d '\r' < "$f" | wc -c | tr -d ' ')
+    tok=$(( bytes / 4 ))
+    total_lines=$(( total_lines + lines ))
+    total_bytes=$(( total_bytes + bytes ))
+    total_tok=$(( total_tok + tok ))
+    rows+=$(printf '%s\t%s\t%s\t%s' "$name" "$lines" "$bytes" "$tok")$'\n'
+  done
+  printf '# token-baseline snapshot — commands/*.md size census (issue #87)\n'
+  printf '# ~tokens is APPROXIMATE (bytes/4); byte counts assume LF (CR stripped, see .gitattributes)\n'
+  printf '# columns: file<TAB>lines<TAB>bytes<TAB>~tokens   (regenerate: bash tests/token-baseline.sh --update)\n'
+  printf '%s' "$rows" | sort
+  printf 'TOTAL\t%s\t%s\t%s\n' "$total_lines" "$total_bytes" "$total_tok"
+}
+
+if [ "$MODE" = "update" ]; then
+  mkdir -p "$(dirname "$SNAPSHOT")"
+  generate > "$SNAPSHOT"
+  echo "updated snapshot: tests/fixtures/token-baseline.txt"
+  exit 0
+fi
+
+# --check mode: print the table, then diff against the snapshot (informational).
+TMP=$(mktemp)
+trap 'rm -f "$TMP"' EXIT
+generate > "$TMP"
+cat "$TMP"
+
+if [ ! -f "$SNAPSHOT" ]; then
+  echo "WARN: no snapshot yet — create it with: bash tests/token-baseline.sh --update" >&2
+  exit 0
+fi
+
+if diff -u "$SNAPSHOT" "$TMP" >/dev/null 2>&1; then
+  echo "OK: matches snapshot"
+else
+  echo "WARN: size drift vs snapshot (informational — not a failure):" >&2
+  diff -u "$SNAPSHOT" "$TMP" >&2 || true
+  echo "      if this drift is intended (e.g. a compression PR), refresh with: bash tests/token-baseline.sh --update" >&2
+fi
+exit 0