Wenjix · Wenjix · Jun 13, 2026 · Jun 12, 2026 · Jun 13, 2026 · Jun 13, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,21 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: npm
+      - run: npm ci
+      - run: npm run typecheck
+      - run: npm test
+      - run: npm run build:skill
diff --git a/.gitignore b/.gitignore
@@ -3,3 +3,5 @@ dist/
 .quorum/
 .DS_Store
 *.log
+__pycache__/
+*.pyc
diff --git a/package.json b/package.json
@@ -1,17 +1,28 @@
 {
   "name": "quorum",
   "version": "0.2.0",
-  "private": true,
   "type": "module",
   "description": "Quorum review synthesis plus Cursor Cloud exploration DAG runner.",
   "bin": {
     "quorum": "./dist/src/cli.js",
     "quorum-cloud": "./dist/src/cli.js"
   },
+  "files": [
+    "dist/src/",
+    "scripts/",
+    "references/",
+    "SKILL.md",
+    "quorum.skill",
+    "LICENSE",
+    "README.md"
+  ],
   "scripts": {
+    "prebuild": "rm -rf dist",
     "build": "tsc -p tsconfig.json",
+    "build:skill": "rm -f quorum.skill && mkdir -p .skill-build/quorum && cp SKILL.md .skill-build/quorum/ && cp -r scripts references .skill-build/quorum/ && cd .skill-build && zip -r ../quorum.skill quorum/ && cd .. && rm -rf .skill-build && echo 'quorum.skill rebuilt'",
     "test": "npm run build && node --test dist/test/*.test.js",
-    "typecheck": "tsc -p tsconfig.json --noEmit"
+    "typecheck": "tsc -p tsconfig.json --noEmit",
+    "prepublishOnly": "npm run build && npm run build:skill && npm test"
   },
   "engines": {
     "node": ">=22"

diff --git a/quorum.skill b/quorum.skill
diff --git a/references/clustering-rubric.md b/references/clustering-rubric.md
@@ -0,0 +1,91 @@
+# Clustering rubric
+
+You are deduplicating findings from automated code reviewers (Bugbot, Copilot, Devin, …) on a single pull request. Partition **all** findings into clusters, where each cluster represents exactly one underlying issue. You are the judge; read this file in full, then write `clusters.json`.
+
+Two role boundaries, non-negotiable:
+
+- **You match; you do not re-review.** Never drop, merge away, or down-rank a finding because you think it's wrong. Validity gets measured later, from fix data.
+- **You do not compute quorum.** That happens deterministically in `validate_partition.py`.
+
+## The test
+
+Two findings belong in the same cluster **if and only if a single code change would plausibly resolve both.** Same root cause, same fix → same cluster.
+
+### MERGE when findings describe the same defect even if they are:
+
+- anchored to different lines — one flags where the bug originates, another where it manifests downstream
+- worded very differently, or given different severities by their reviewers
+- at different abstraction levels: one general ("no error handling in this function"), one a specific instance ("this await can reject unhandled") → merge with `match_type: "general-specific"`
+- from the **same** reviewer — bots duplicate themselves across re-review passes → `match_type: "within-reviewer-dup"`
+
+### DO NOT MERGE when:
+
+- findings are the same *category* of bug at independent locations requiring independent fixes — two separate missing null checks are two clusters
+- different issues happen to anchor on the same line
+- one is functional and the other purely stylistic, even if co-located
+- **you are uncertain.** A wrong merge fabricates reviewer consensus, which is worse than leaving a duplicate. Only merge when the single-fix test clearly passes.
+
+Cross-file merges are allowed **only** when the root cause is literally shared (e.g. both findings trace to the same mutated shared constant). Set `"cross_file": true` and justify in `match_rationale`.
+
+Reviewer identity is irrelevant to whether two findings match. Use the `hunk` diff context to judge — comment prose alone is often too vague to tell whether two descriptions point at the same defect.
+
+## Output schema
+
+Write strict JSON to `clusters.json`. Hard constraints:
+
+- Every input finding `id` appears in **exactly one** cluster. No omissions, no duplicates, no invented ids.
+- Singleton clusters are expected and fine — most clusters will be singletons.
+- `canonical_title`: ≤ 80 chars, names the defect, not the symptom.
+- `canonical_description`: 1–3 sentences synthesizing the **union** of information across members — if one reviewer adds detail the others missed, keep it.
+- `category`: one of `logic | concurrency | security | performance | error-handling | data-integrity | api-contract | style | docs | test-gap | other`
+- `severity`: max across members, one of `critical | major | minor | nit`
+- `match_type`: `exact | same-root-cause | general-specific | within-reviewer-dup | singleton`
+- `match_confidence`: 0.0–1.0; use 1.0 for singletons. Multi-finding clusters below 0.7 will be split back into singletons by the validator — that gate is intentional, do not inflate confidence to dodge it.
+- `match_rationale`: one sentence, required for clusters of size > 1.
+
+```json
+{
+  "clusters": [
+    {
+      "cluster_id": "c1",
+      "member_ids": ["bugbot-3", "devin-1"],
+      "canonical_title": "string",
+      "canonical_description": "string",
+      "category": "logic",
+      "severity": "major",
+      "primary_location": {"file": "string", "start_line": 0, "end_line": 0},
+      "match_type": "same-root-cause",
+      "match_confidence": 0.9,
+      "match_rationale": "one sentence; required for size > 1",
+      "cross_file": false
+    }
+  ]
+}
+```
+
+`primary_location` is where a human should look first — usually the origin of the defect, not a downstream symptom.
+
+## Worked examples
+
+**A — merge across different lines (same-root-cause):**
+
+- `bugbot-2` @ `utils/options.ts:14` — "Object.assign(DEFAULT_OPTIONS, userOpts) mutates the shared default object; later callers inherit this user's prefs."
+- `devin-4` @ `routes/trip.ts:88` — "Route preferences appear to leak between requests; defaults polluted by prior calls."
+
+→ One fix (clone before assign) resolves both. **MERGE**, primary_location at the mutation site.
+
+**B — do not merge (same category, independent instances):**
+
+- `copilot-1` @ `api/users.ts:42` — missing null check on `req.user`
+- `bugbot-5` @ `api/orders.ts:17` — missing null check on `order.customer`
+
+→ Independent fixes. **TWO clusters.**
+
+**C — merge general + specific:**
+
+- `devin-2` @ `services/sync.ts` (function-level) — "No error handling in syncAll; any failure leaves partial state."
+- `bugbot-7` @ `services/sync.ts:103` — "await push() can reject and is unhandled."
+
+→ The specific is an instance of the general. **MERGE**, `match_type: "general-specific"`; the description covers the broad gap and cites line 103 as a concrete instance.
+
+<!-- Add new worked examples here from dogfood logs: every bad merge you catch in the wild is the next example. -->
diff --git a/scripts/fetch_findings.sh b/scripts/fetch_findings.sh
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+# fetch_findings.sh — pull line-anchored review comments from a PR, keep the
+# ones authored by review bots, and normalize them into Quorum finding records.
+#
+# usage: fetch_findings.sh OWNER/REPO PR_NUMBER [OUT=findings.json]
+# env:   QUORUM_BOTS  case-insensitive regex matched against author login
+#                     (default: 'cursor\[bot\]|copilot|devin')
+#
+# Output record:
+#   { id, reviewer, login, file, lines:[start,end], outdated,
+#     body, hunk, url, comment_id, node_id }
+set -euo pipefail
+
+usage() { echo "usage: fetch_findings.sh OWNER/REPO PR_NUMBER [out.json]" >&2; exit 2; }
+[[ $# -ge 2 ]] || usage
+
+REPO=$1
+PR=$2
+OUT=${3:-findings.json}
+BOTS_RE=${QUORUM_BOTS:-'cursor\[bot\]|copilot|devin'}
+
+command -v gh >/dev/null || { echo "error: gh CLI not found" >&2; exit 1; }
+command -v jq >/dev/null || { echo "error: jq not found" >&2; exit 1; }
+
+RAW=$(mktemp)
+trap 'rm -f "$RAW"' EXIT
+
+# --paginate emits one JSON document per page; --jq '.[]' flattens to a
+# stream of comment objects, jq -s reassembles a single array.
+gh api "repos/$REPO/pulls/$PR/comments" --paginate --jq '.[]' | jq -s '.' > "$RAW"
+
+TOTAL=$(jq 'length' "$RAW")
+AUTHORS=$(jq -r '[.[].user.login] | unique | join(", ")' "$RAW")
+
+jq --arg re "$BOTS_RE" '
+  def short(l): (l | ascii_downcase) as $x
+    | if   ($x | test("cursor"))  then "bugbot"
+      elif ($x | test("copilot")) then "copilot"
+      elif ($x | test("devin"))   then "devin"
+      else ($x | gsub("\\[bot\\]$"; "") | gsub("[^a-z0-9]+"; "-"))
+      end;
+
+  [ .[] | select(.user.login | test($re; "i")) ]
+  | sort_by(.path, (.line // .original_line // 0))
+  | group_by(.user.login)
+  | map(
+      to_entries
+      | map(
+          .value as $c
+          | {
+              id:       (short($c.user.login) + "-" + ((.key + 1) | tostring)),
+              reviewer: short($c.user.login),
+              login:    $c.user.login,
+              file:     $c.path,
+              lines: [
+                ($c.start_line // $c.original_start_line // $c.line // $c.original_line),
+                ($c.line // $c.original_line // $c.start_line // $c.original_start_line)
+              ],
+              outdated: ($c.line == null),
+              body:     $c.body,
+              hunk:     ($c.diff_hunk // ""),
+              url:      $c.html_url,
+              comment_id: $c.id,
+              node_id:  $c.node_id
+            }
+        )
+    )
+  | add // []
+  | sort_by(.file, (.lines[0] // 0))
+' "$RAW" > "$OUT"
+
+N=$(jq 'length' "$OUT")
+echo "PR #$PR: $TOTAL review comment(s) total; authors seen: ${AUTHORS:-none}" >&2
+echo "Matched $N bot finding(s) -> $OUT" >&2
+jq -r 'group_by(.reviewer)[] | "  \(.[0].reviewer): \(length)"' "$OUT" >&2 || true
+
+if [[ "$N" -eq 0 ]]; then
+  echo "" >&2
+  echo "No findings matched filter '$BOTS_RE'." >&2
+  echo "If review bots did comment, set QUORUM_BOTS to match the author logins listed above and re-run." >&2
+fi