diff --git a/Makefile b/Makefile index cf9b49b4..08d06eed 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ DB ?= postgres # ATOMIC COMPONENTS - Infrastructure Management # ============================================================================ -.PHONY: start-database stop-database clean-database start-redis stop-redis clean-redis start-db stop-db start-nats stop-nats clean-nats +.PHONY: start-database stop-database clean-database start-redis stop-redis clean-redis start-db stop-db start-nats stop-nats clean-nats install-hooks start-database: @uv run scripts/manage-database.py start @@ -59,6 +59,12 @@ stop-nats: ## Stop the NATS JetStream container clean-nats: ## Remove the NATS JetStream container @uv run scripts/manage-nats.py clean +install-hooks: ## Install Git hooks (points core.hooksPath at scripts/hooks) + @git config core.hooksPath scripts/hooks + @chmod +x scripts/hooks/pre-commit scripts/hooks/post-commit + @echo "Git hooks installed (core.hooksPath -> scripts/hooks)" + @echo "Note: this supersedes any personal hooks in .git/hooks/ (Git ignores that dir while core.hooksPath is set)." + # Test Infrastructure - Ephemeral containers for integration tests (isolated from dev) .PHONY: start-test-database stop-test-database clean-test-database start-test-redis stop-test-redis clean-test-redis clean-test-infrastructure diff --git a/docs/superpowers/plans/2026-07-01-ci-security-scanners.md b/docs/superpowers/plans/2026-07-01-ci-security-scanners.md new file mode 100644 index 00000000..f42d4f6b --- /dev/null +++ b/docs/superpowers/plans/2026-07-01-ci-security-scanners.md @@ -0,0 +1,208 @@ +# CI Security Scanners Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add a blocking `govulncheck` job and an informational standalone `gosec` job (results uploaded to the GitHub Security tab) to CI, without changing CodeQL or the `gosec` already embedded in `golangci-lint`. + +**Architecture:** Two new jobs appended to `.github/workflows/security.yml`. Both reuse the existing checkout + `setup-go` + tmi-clients replace-shim pattern already used by the `lint`/`test` jobs. `govulncheck` fails the build on findings; `gosec` runs with `-no-fail` and uploads SARIF via `github/codeql-action/upload-sarif`. + +**Tech Stack:** GitHub Actions, `golang.org/x/vuln/cmd/govulncheck`, `github.com/securego/gosec/v2`, `github/codeql-action/upload-sarif`. + +## Global Constraints + +- Tool versions are **pinned** by repo convention (cf. `vacuum` `0.29.7`, `golangci-lint` `v2.12.2`). Do not use `@latest` in the committed workflow. +- Every Go-running job must apply the tmi-clients replace-shim, or `go` commands fail: checkout `ericfitz/tmi-clients` into `.tmi-clients`, then `sed -i 's|=> ../tmi-clients/|=> ./.tmi-clients/|' go.mod`. +- `setup-go` uses `go-version-file: go.mod` with `cache: true` (match existing jobs). +- Generated `api/api.go` carries a `Code generated ... DO NOT EDIT` header — exclude it from `gosec` via `-exclude-generated`. +- The `gosec` job must never block the build (`-no-fail` + non-fatal upload). +- Triggers match the workflow: `pull_request` and `push` to `main` (already set at the `on:` level). + +--- + +### Task 1: Confirm the repo is currently govulncheck-clean (pre-flight) + +**Files:** none (local check only). + +**Interfaces:** +- Consumes: nothing. +- Produces: confirmation that making `govulncheck` a required gate won't immediately red-wall CI. If findings exist, they must be resolved (dependency bumps) BEFORE Task 2 — that is a separate dep-bump effort, not part of this plan. + +- [ ] **Step 1: Run govulncheck locally against the whole module** + +Run: `govulncheck ./...` +Expected: `No vulnerabilities found.` (or a summary with zero *called* vulnerabilities). + +- [ ] **Step 2: If findings exist, stop and record them** + +If `govulncheck` reports called vulnerabilities, do NOT proceed to make the job blocking. Record the module/CVE and hand off to a dependency bump (`deps:bump` skill / Dependabot). Re-run Step 1 until clean. Only continue to Task 2 once the module is clean. + +--- + +### Task 2: Add the blocking `govulncheck` job + +**Files:** +- Modify: `.github/workflows/security.yml` (add a `govulncheck` job under `jobs:`) + +**Interfaces:** +- Consumes: the module being govulncheck-clean (Task 1). +- Produces: a required CI job named `govulncheck` that fails on findings. + +- [ ] **Step 1: Add the govulncheck job** + +Append this job to the `jobs:` map in `.github/workflows/security.yml` (sibling of `lint`, `build`, `test`): + +```yaml + govulncheck: + name: Vulnerability Scan (govulncheck) + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout repository + uses: actions/checkout@v7 + + - name: Setup Go + uses: actions/setup-go@v6 + with: + go-version-file: go.mod + cache: true + + - name: Provision generated client (tmi-clients) + uses: actions/checkout@v7 + with: + repository: ericfitz/tmi-clients + path: .tmi-clients + - name: Point go.mod replace at the checked-out client + run: sed -i 's|=> ../tmi-clients/|=> ./.tmi-clients/|' go.mod + + - name: Install govulncheck + # Pinned per repo convention. Verify this is the current release and + # bump if a newer stable tag exists. + run: go install golang.org/x/vuln/cmd/govulncheck@v1.1.4 + + - name: Run govulncheck + run: govulncheck ./... +``` + +- [ ] **Step 2: Validate the workflow YAML syntax** + +Run: `python3 -c "import yaml,sys; yaml.safe_load(open('.github/workflows/security.yml')); print('YAML OK')"` +Expected: `YAML OK` + +- [ ] **Step 3: (If actionlint is available) lint the workflow** + +Run: `command -v actionlint >/dev/null && actionlint .github/workflows/security.yml || echo "actionlint not installed, skipping"` +Expected: no errors. + +- [ ] **Step 4: Commit** + +```bash +git add .github/workflows/security.yml +git commit -m "ci(security): add blocking govulncheck job" +``` + +--- + +### Task 3: Add the informational `gosec` job (SARIF -> Security tab) + +**Files:** +- Modify: `.github/workflows/security.yml` (add a `gosec` job under `jobs:`) + +**Interfaces:** +- Consumes: nothing from prior tasks. +- Produces: a non-blocking CI job named `gosec` whose SARIF appears under Security -> Code scanning. + +- [ ] **Step 1: Add the gosec job** + +Append this job to the `jobs:` map in `.github/workflows/security.yml`: + +```yaml + gosec: + name: Static Security Scan (gosec, informational) + runs-on: ubuntu-latest + permissions: + contents: read + security-events: write # required to upload SARIF to the Security tab + steps: + - name: Checkout repository + uses: actions/checkout@v7 + + - name: Setup Go + uses: actions/setup-go@v6 + with: + go-version-file: go.mod + cache: true + + - name: Provision generated client (tmi-clients) + uses: actions/checkout@v7 + with: + repository: ericfitz/tmi-clients + path: .tmi-clients + - name: Point go.mod replace at the checked-out client + run: sed -i 's|=> ../tmi-clients/|=> ./.tmi-clients/|' go.mod + + - name: Install gosec + # Pinned per repo convention. Verify current release and bump if needed. + run: go install github.com/securego/gosec/v2/cmd/gosec@v2.22.5 + + - name: Run gosec (never fails the build) + # -no-fail: exit 0 even on findings (informational job). + # -exclude-generated: skip oapi-codegen's api/api.go and other generated code. + # -exclude-dir: skip the checked-out client shim. + run: gosec -no-fail -exclude-generated -exclude-dir=.tmi-clients -fmt sarif -out gosec.sarif ./... + + - name: Upload SARIF to the Security tab + if: always() + uses: github/codeql-action/upload-sarif@v4 + with: + sarif_file: gosec.sarif + category: gosec +``` + +- [ ] **Step 2: Validate the workflow YAML syntax** + +Run: `python3 -c "import yaml,sys; yaml.safe_load(open('.github/workflows/security.yml')); print('YAML OK')"` +Expected: `YAML OK` + +- [ ] **Step 3: (Optional) smoke-test gosec locally** + +Run: `go install github.com/securego/gosec/v2/cmd/gosec@v2.22.5 && gosec -no-fail -exclude-generated -exclude-dir=.tmi-clients -fmt sarif -out /tmp/gosec.sarif ./... && python3 -c "import json; d=json.load(open('/tmp/gosec.sarif')); print('runs:', len(d['runs']))"` +Expected: gosec completes with exit 0 and a valid SARIF file (`runs: 1`). Findings are fine — this job is informational. + +- [ ] **Step 4: Commit** + +```bash +git add .github/workflows/security.yml +git commit -m "ci(security): add informational gosec job with SARIF upload" +``` + +--- + +### Task 4: Verify on a branch and mark govulncheck required + +**Files:** none (CI + GitHub settings). + +**Interfaces:** +- Consumes: Tasks 2 and 3. +- Produces: confirmed CI behavior; `govulncheck` added to required checks. + +- [ ] **Step 1: Push the branch and open a PR** + +Push the working branch and open a PR against `main`. Both new jobs run. + +- [ ] **Step 2: Confirm job behavior** + +Expected: +- `Vulnerability Scan (govulncheck)` runs and PASSES (module is clean per Task 1). +- `Static Security Scan (gosec, informational)` runs, completes green regardless of findings, and its results appear under **Security -> Code scanning** (category `gosec`). + +- [ ] **Step 3: Add govulncheck to required status checks** + +In the repo's branch-protection rules for `main`, add `Vulnerability Scan (govulncheck)` to the required checks so it blocks merge. (This is a GitHub Settings action; do NOT add `gosec` — it is informational by design.) + +--- + +## Notes / follow-ups (not tasks) + +- The `gosec` linter already inside `golangci-lint` stays as-is (blocking, with `.golangci.yml` exclusions). The standalone job is broader-scope + Security-tab visibility; this duplication is intentional. +- If the informational `gosec` output proves high-signal after triage, a future change can tighten it to blocking (drop `-no-fail`). diff --git a/docs/superpowers/plans/2026-07-01-k3s-dev-target.md b/docs/superpowers/plans/2026-07-01-k3s-dev-target.md new file mode 100644 index 00000000..55fe63f1 --- /dev/null +++ b/docs/superpowers/plans/2026-07-01-k3s-dev-target.md @@ -0,0 +1,617 @@ +# k3s Dev Target (`CLUSTER=k3s`) Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add `CLUSTER=k3s` as a selectable dev-deployment target that deploys TMI to the existing remote `k3s-rp` cluster with an in-cluster registry, single-node in-cluster Postgres and Redis, and a `kubectl port-forward` that preserves the `localhost:8080` contract — while keeping `kind` the default, unchanged. + +**Architecture:** Mirror the existing `DB=oracle` selector with a new orthogonal `CLUSTER` selector (`kind` default) threaded Makefile → `scripts/devenv.py` → `scripts/lib/cluster.py` / `scripts/lib/deploy.py`. `CLUSTER=k3s` switches the kube-context to `k3s-rp` (never creating/deleting it), points image push/refs at an in-cluster registry (`rp2:30500`), selects a new `deployments/k8s/dev/k3s/` kustomize overlay that adds in-cluster Postgres, rewrites the DB URL host to the in-cluster `postgres` Service, and exposes the server via `kubectl port-forward svc/tmi-server 8080:8080`. + +**Tech Stack:** k3s, kubectl, kustomize, Docker buildx, Python dev tooling (`uv`), Chainguard images, GORM AutoMigrate (schema-on-startup). + +## Global Constraints + +- **`kind` stays the default and must not regress.** `make dev-up` (no args) behaves exactly as today. +- `CLUSTER ?= kind`; only values `kind` and `k3s` are supported. `CLUSTER` is orthogonal to `DB` (all four `CLUSTER`×`DB` combos should be selectable, though this plan validates `CLUSTER=k3s DB=postgres`). +- We do **not** own the `k3s-rp` cluster: never `create`/`delete` it. `dev-nuke CLUSTER=k3s` removes only the `tmi-platform` namespace + PVCs. +- Remote cluster API server: `https://rp2:6443`; nodes and Mac dev host are both **arm64**. +- The in-cluster registry is `rp2:30500` (NodePort 30500), plain-HTTP (insecure). Requires one-time Mac Docker `insecure-registries` config and one-time k3s node `registries.yaml` config. +- Schema is created by the server's **GORM `AutoMigrate` at startup** (same as kind) — no separate migration Job. *(This deviates from the spec's "in-cluster migration Job" note; it is a deliberate simplification to match the existing kind path. Confirm before implementing.)* +- Redis stays in-cluster (`deployments/k8s/dev/redis.yml`), reused unchanged. +- Use Make targets; Python is invoked via `uv run scripts/devenv.py`. +- Never use Go's std `log`; use `slogging` — N/A here (no Go changes expected). + +## Pre-flight environment facts to confirm (run once, before Task 1) + +- [ ] `kubectl config get-contexts` shows `k3s-rp`; `kubectl --context k3s-rp get nodes` returns Ready nodes. +- [ ] `rp2` resolves from the Mac: `ping -c1 rp2` (or `getent hosts rp2`). If not, add it to `/etc/hosts` or use the node IP consistently in place of `rp2` throughout. +- [ ] Default StorageClass exists on k3s: `kubectl --context k3s-rp get storageclass` shows `local-path (default)`. PVCs in this plan omit `storageClassName` and rely on it. +- [ ] Node arch is arm64: `kubectl --context k3s-rp get nodes -o wide` (or `kubectl ... get node -o jsonpath='{.items[*].status.nodeInfo.architecture}'`) → `arm64`. + +--- + +### Task 1: Thread the `CLUSTER` selector (Makefile + devenv.py), default `kind` + +**Files:** +- Modify: `Makefile` (add `CLUSTER ?= kind`; pass `--cluster $(CLUSTER)` to every `devenv.py` dev invocation, next to the existing `--db $(DB)`) +- Modify: `scripts/devenv.py` (`_add_global_options()` ~line 127-169; `cmd_up`/`cmd_down`/`cmd_status`/`cmd_nuke`/`cmd_reset` ~line 40+; `cmd_cluster` ~line 106) + +**Interfaces:** +- Consumes: nothing. +- Produces: `args.cluster` (`"kind"|"k3s"`, default `"kind"`) available to all dev commands and passed into `deploy.start(...)` / cluster lifecycle as a `cluster=` kwarg. Later tasks branch on it. + +- [ ] **Step 1: Add the CLUSTER default and plumb it in the Makefile** + +Near the top of `Makefile` (by `DB ?= postgres`, ~line 22), add: + +```makefile +# Default kube cluster target for dev environment (kind|k3s) +CLUSTER ?= kind +``` + +Then, for every dev target that invokes `devenv.py` with `--db $(DB)`, add `--cluster $(CLUSTER)`. Example (apply the same edit to `dev-up`, `dev-down`, `dev-status`, `dev-reset`, `dev-nuke`, and the cluster up/down targets): + +```makefile +# before: @uv run scripts/devenv.py up --db $(DB) +# after: + @uv run scripts/devenv.py up --db $(DB) --cluster $(CLUSTER) +``` + +- [ ] **Step 2: Add the `--cluster` global option in devenv.py** + +In `scripts/devenv.py`, inside `_add_global_options()` (where `--db` is defined), add: + +```python + parser.add_argument( + "--cluster", + choices=["kind", "k3s"], + default="kind", + help="Kube cluster target: 'kind' (local, default) or 'k3s' (remote k3s-rp).", + ) +``` + +- [ ] **Step 3: Pass `args.cluster` through the command dispatchers** + +In each `cmd_*` that calls into `deploy`/`cluster` (e.g. `cmd_up` calling `deploy.start(db=args.db, ...)`), thread the new kwarg, e.g.: + +```python + deploy.start(db=args.db, cluster=args.cluster, workers=..., ...) +``` + +Do the same for `cmd_down`, `cmd_status`, `cmd_reset`, `cmd_nuke`, and `cmd_cluster`. (Later tasks add the `cluster=` parameter to those functions; for now, adding it here with the functions still ignoring it is fine because default is `"kind"`.) + +- [ ] **Step 4: Verify no regression to the default path** + +Run: `make dev-status` +Expected: behaves exactly as before (reports kind `tmi-dev` status); no error about unknown `--cluster` arg. + +Run: `uv run scripts/devenv.py status --cluster k3s` (should parse cleanly even though later tasks implement the behavior) +Expected: argument accepted (no argparse error). Behavior may still be kind-oriented until later tasks land. + +- [ ] **Step 5: Commit** + +```bash +git add Makefile scripts/devenv.py +git commit -m "feat(dev): thread CLUSTER selector (default kind) through dev tooling" +``` + +--- + +### Task 2: Cluster-aware lifecycle & context guard in `cluster.py` + +**Files:** +- Modify: `scripts/lib/cluster.py` (constants ~line 16-31; `up()` ~119-136; `down()` ~138-142; `local_image_ref()` ~29-31; context-guard helper used by `deploy._guard_context()` ~188) + +**Interfaces:** +- Consumes: `cluster` string from Task 1. +- Produces: + - `K3S_CONTEXT = "k3s-rp"`, `K3S_REGISTRY = "rp2:30500"`. + - `registry_for(cluster) -> str` → `"localhost:5000"` (kind) or `"rp2:30500"` (k3s). + - `local_image_ref(name, tag="dev", *, cluster="kind")` → registry-correct image ref. + - `up(cluster="kind")` / `down(cluster="kind")` that no-op cluster creation/deletion for k3s and switch context instead. + - context guard accepts `k3s-rp` as a valid target when `cluster == "k3s"`. + +- [ ] **Step 1: Add k3s constants and a registry selector** + +In `scripts/lib/cluster.py`, near the existing registry constants: + +```python +K3S_CONTEXT = "k3s-rp" +K3S_REGISTRY = "rp2:30500" # in-cluster registry exposed via NodePort 30500 + + +def registry_for(cluster: str) -> str: + """Return the image registry hostname for the given cluster target.""" + return K3S_REGISTRY if cluster == "k3s" else LOCAL_REGISTRY +``` + +- [ ] **Step 2: Make `local_image_ref` cluster-aware** + +Update the signature (currently `local_image_ref(name, tag="dev", registry=LOCAL_REGISTRY)` at ~line 29-31): + +```python +def local_image_ref(name: str, tag: str = "dev", *, cluster: str = "kind") -> str: + """Fully-qualified dev image ref for the given cluster's registry.""" + return f"{registry_for(cluster)}/{name}:{tag}" +``` + +Update existing callers that pass `registry=` to pass `cluster=` instead (search: `local_image_ref(`). + +- [ ] **Step 3: Branch `up()`/`down()` on cluster** + +At the top of `up()` (~line 119) and `down()` (~line 138): + +```python +def up(cluster: str = "kind") -> None: + if cluster == "k3s": + # We do not own k3s-rp: just select its context. No create, no local registry. + run(["kubectl", "config", "use-context", K3S_CONTEXT]) + return + # ... existing kind path unchanged ... + + +def down(cluster: str = "kind") -> None: + if cluster == "k3s": + # Never delete a cluster we don't own; namespace teardown is handled by deploy. + return + # ... existing kind path unchanged ... +``` + +- [ ] **Step 4: Accept `k3s-rp` in the context guard** + +`deploy._guard_context()` (deploy.py ~line 188) currently asserts the active kube-context is the local kind context. Make the allowed context depend on `cluster`. In `cluster.py`, expose: + +```python +def expected_context(cluster: str) -> str: + """The kube-context that must be active for the given cluster target.""" + return K3S_CONTEXT if cluster == "k3s" else f"kind-{CLUSTER_NAME}" +``` + +(Task 3/5 wire `deploy._guard_context(cluster)` to use this; for now just add the helper.) + +- [ ] **Step 5: Verify kind path unchanged** + +Run: `python3 -c "import sys; sys.path.insert(0,'scripts'); from lib import cluster; print(cluster.registry_for('kind'), cluster.registry_for('k3s')); print(cluster.local_image_ref('tmi-server', cluster='kind')); print(cluster.local_image_ref('tmi-server', cluster='k3s')); print(cluster.expected_context('kind'), cluster.expected_context('k3s'))"` +Expected: +``` +localhost:5000 rp2:30500 +localhost:5000/tmi-server:dev +rp2:30500/tmi-server:dev +kind-tmi-dev k3s-rp +``` + +- [ ] **Step 6: Commit** + +```bash +git add scripts/lib/cluster.py scripts/lib/deploy.py +git commit -m "feat(dev): cluster-aware registry, image refs, and context guard" +``` + +--- + +### Task 3: In-cluster registry + one-time insecure-registry config + cluster-aware build/push + +**Files:** +- Create: `deployments/k8s/dev/k3s/registry.yml` (registry Deployment + PVC + Service NodePort 30500) +- Modify: `scripts/lib/deploy.py` (`build_and_push(db)` ~300-330 → add `cluster` param; `ensure_registry` usage; push target) +- Create: `scripts/lib/k3s-node-setup.md` (documented one-time node config; optional helper) + +**Interfaces:** +- Consumes: `registry_for(cluster)` (Task 2). +- Produces: a running `registry` Service reachable at `rp2:30500`; `build_and_push(db, cluster="kind")` that tags/pushes to the correct registry and builds arm64 for k3s. + +- [ ] **Step 1: Write the in-cluster registry manifest** + +Create `deployments/k8s/dev/k3s/registry.yml`: + +```yaml +apiVersion: v1 +kind: Namespace +metadata: + name: tmi-platform +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: registry-data + namespace: tmi-platform +spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 10Gi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: registry + namespace: tmi-platform +spec: + replicas: 1 + selector: + matchLabels: { app: registry } + template: + metadata: + labels: { app: registry } + spec: + containers: + - name: registry + image: registry:2 + ports: + - containerPort: 5000 + volumeMounts: + - name: data + mountPath: /var/lib/registry + resources: + requests: { cpu: 50m, memory: 128Mi } + limits: { cpu: "1", memory: 512Mi } + volumes: + - name: data + persistentVolumeClaim: + claimName: registry-data +--- +apiVersion: v1 +kind: Service +metadata: + name: registry + namespace: tmi-platform +spec: + type: NodePort + selector: { app: registry } + ports: + - port: 5000 + targetPort: 5000 + nodePort: 30500 +``` + +- [ ] **Step 2: Deploy the registry to k3s-rp** + +Run: `kubectl --context k3s-rp apply -f deployments/k8s/dev/k3s/registry.yml` +Then: `kubectl --context k3s-rp -n tmi-platform rollout status deploy/registry --timeout=120s` +Expected: rollout succeeds; `kubectl --context k3s-rp -n tmi-platform get svc registry` shows NodePort 30500. + +- [ ] **Step 3: One-time Mac Docker insecure-registry config** + +Add `rp2:30500` to the Docker daemon's insecure registries so `docker push` works over plain HTTP. In Docker Desktop → Settings → Docker Engine, add: + +```json +{ "insecure-registries": ["rp2:30500"] } +``` + +Apply & restart Docker. Verify: `docker info --format '{{.RegistryConfig.InsecureRegistryCIDRs}} {{range .RegistryConfig.IndexConfigs}}{{.Name}} {{end}}'` (or `docker info | grep -A3 "Insecure Registries"`) lists `rp2:30500`. + +- [ ] **Step 4: One-time k3s node registries.yaml config (documented)** + +Create `deployments/k8s/dev/k3s/README-node-setup.md` documenting the manual, per-node step (needs SSH/root on each Pi node — cannot be driven from the Mac): + +``` +On EACH k3s node, create/merge /etc/rancher/k3s/registries.yaml: + + mirrors: + "rp2:30500": + endpoint: + - "http://rp2:30500" + +Then restart k3s so containerd picks it up: + sudo systemctl restart k3s # on server nodes + sudo systemctl restart k3s-agent # on agent nodes + +Verify containerd can pull: + sudo k3s crictl pull rp2:30500/tmi-server:dev # after first push +``` + +*(Optional helper: a small script that SSHes to a provided node list and installs the file. Keep it out of the default `dev-up` path — it needs credentials we don't manage.)* + +- [ ] **Step 5: Make `build_and_push` cluster-aware (arm64 for k3s)** + +In `deploy.py`, add a `cluster` parameter to `build_and_push` (~line 300) and use `cluster.registry_for(cluster)` for tags/push. For k3s, build explicitly for arm64 via buildx (Mac is arm64, so `docker build` already yields arm64, but be explicit and push in one shot): + +```python +def build_and_push(db: str, cluster: str = "kind") -> None: + registry = cluster_lib.registry_for(cluster) + # ... stage tmi-client deps (unchanged) ... + for name, dockerfile, build_args in image_builds_for(db): + ref = f"{registry}/{name}:dev" + if cluster == "k3s": + # arm64 nodes; buildx --push tags+pushes to the in-cluster registry. + cmd = ["docker", "buildx", "build", "--platform", "linux/arm64", + "-f", dockerfile] + for k, v in build_args.items(): + cmd += ["--build-arg", f"{k}={v}"] + cmd += ["-t", ref, "--push", str(PROJECT_ROOT)] + run(cmd) + else: + # ... existing kind docker build + docker push to localhost:5000 ... +``` + +- [ ] **Step 6: Verify a build+push to the in-cluster registry** + +Run: `uv run scripts/devenv.py --help` (sanity: tooling imports cleanly), then trigger the k3s build path once Task 4/5 wiring exists. Standalone check now: +`docker buildx build --platform linux/arm64 -f Dockerfile.server --build-arg BUILD_TAGS=dev -t rp2:30500/tmi-server:dev --push .` +Then: `curl -s http://rp2:30500/v2/tmi-server/tags/list` +Expected: `{"name":"tmi-server","tags":["dev"]}` + +- [ ] **Step 7: Commit** + +```bash +git add deployments/k8s/dev/k3s/registry.yml deployments/k8s/dev/k3s/README-node-setup.md scripts/lib/deploy.py +git commit -m "feat(dev): in-cluster k3s registry and cluster-aware image build/push" +``` + +--- + +### Task 4: k3s kustomize overlay with in-cluster Postgres + DB-host rewrite + +**Files:** +- Create: `deployments/k8s/dev/k3s/postgres.yml` (StatefulSet + PVC + Service + Secret) +- Create: `deployments/k8s/dev/k3s/kustomization.yaml` (overlay: reuse server/redis/controller, remap images to `rp2:30500`, add postgres) +- Modify: `scripts/lib/deploy.py` (`overlay_dir_for` ~83-85 to be cluster-aware; DB-host rewrite ~117-125 & 354; `_no_workers_files` ~88-96) + +**Interfaces:** +- Consumes: registry from Task 3; server manifest `deployments/k8s/dev/server.yml` (image `localhost:5000/tmi-server`). +- Produces: an overlay applied for `CLUSTER=k3s DB=postgres` that stands up in-cluster Postgres (`postgres:5432`) and points the server's DB URL at it. + +- [ ] **Step 1: Write the in-cluster Postgres manifest** + +Create `deployments/k8s/dev/k3s/postgres.yml`. Credentials MUST match `config-development.yml`'s `database.url` (user/password/db). Confirm those values first: `rg 'url:|postgres' config-development.yml`. This template assumes `tmi_dev` / `dev123` / `tmi_dev` — adjust to match: + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: tmi-postgres + namespace: tmi-platform +type: Opaque +stringData: + POSTGRES_USER: "tmi_dev" + POSTGRES_PASSWORD: "dev123" + POSTGRES_DB: "tmi_dev" +--- +apiVersion: v1 +kind: Service +metadata: + name: postgres + namespace: tmi-platform +spec: + selector: { app: postgres } + ports: + - port: 5432 + targetPort: 5432 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: postgres + namespace: tmi-platform +spec: + serviceName: postgres + replicas: 1 + selector: + matchLabels: { app: postgres } + template: + metadata: + labels: { app: postgres } + spec: + containers: + - name: postgres + image: cgr.dev/chainguard/postgres:latest + envFrom: + - secretRef: { name: tmi-postgres } + ports: + - containerPort: 5432 + volumeMounts: + - name: data + mountPath: /var/lib/postgresql/data + readinessProbe: + exec: { command: ["pg_isready", "-U", "tmi_dev"] } + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + requests: { cpu: 100m, memory: 256Mi } + limits: { cpu: "1", memory: 1Gi } + volumeClaimTemplates: + - metadata: + name: data + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 10Gi +``` + +- [ ] **Step 2: Write the k3s kustomize overlay** + +Create `deployments/k8s/dev/k3s/kustomization.yaml`. Reuse the base resources, add postgres, and remap every `localhost:5000` image to `rp2:30500` via the images transformer: + +```yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: tmi-platform +resources: + - postgres.yml + - ../controller.yml + - ../redis.yml + - ../server.yml + - ../../platform/components/tmi-extractor.yml + - ../../platform/components/tmi-chunk-embed.yml +images: + - name: localhost:5000/tmi-server + newName: rp2:30500/tmi-server + - name: localhost:5000/tmi-component-controller + newName: rp2:30500/tmi-component-controller + - name: localhost:5000/tmi-extractor + newName: rp2:30500/tmi-extractor + - name: localhost:5000/tmi-chunk-embed + newName: rp2:30500/tmi-chunk-embed +``` + +*(If the base applies image patches via `patches/extractor-image.yaml` that set `localhost:5000/...`, the `images:` transformer above still rewrites the final ref. Verify with the render in Step 5.)* + +- [ ] **Step 3: Make `overlay_dir_for` cluster-aware** + +`overlay_dir_for(db)` (deploy.py ~83-85) currently returns `dev/oracle` for oracle else `dev`. Extend to consider cluster: + +```python +def overlay_dir_for(db: str, cluster: str = "kind") -> str: + if cluster == "k3s": + return "deployments/k8s/dev/k3s" # postgres-in-cluster overlay + if db == "oracle": + return "deployments/k8s/dev/oracle" + return "deployments/k8s/dev" +``` + +*(Oracle-on-k3s is out of scope for this plan; `CLUSTER=k3s` implies the postgres overlay.)* + +- [ ] **Step 4: Rewrite the DB host to the in-cluster Service for k3s** + +`rewrite_db_host_for_incluster(config_text)` (deploy.py ~117-125) rewrites `localhost` → `host.docker.internal`. Make the target host cluster-aware: + +```python +def in_cluster_db_host(cluster: str) -> str: + # kind: Postgres is a container on the Mac, reached via host.docker.internal. + # k3s: Postgres runs in-cluster as the `postgres` Service. + return "postgres" if cluster == "k3s" else "host.docker.internal" + + +def rewrite_db_host_for_incluster(config_text: str, cluster: str = "kind") -> str: + host = in_cluster_db_host(cluster) + # ... existing regex, substituting `host` for the hardcoded host.docker.internal ... +``` + +Update the caller in `deliver_config()` (~line 354) to pass `cluster`. + +- [ ] **Step 5: Verify the overlay renders with correct images and DB host** + +Run: `kubectl kustomize deployments/k8s/dev/k3s | rg 'image:|name: postgres|host'` +Expected: server/controller/extractor/chunk-embed images are `rp2:30500/...`; a `postgres` StatefulSet + Service are present. + +Run (DB-host rewrite unit check): +`python3 -c "import sys; sys.path.insert(0,'scripts'); from lib import deploy; print(deploy.rewrite_db_host_for_incluster('url: postgres://tmi_dev:dev123@localhost:5432/tmi_dev', cluster='k3s'))"` +Expected: the URL host becomes `postgres` (`postgres://tmi_dev:dev123@postgres:5432/tmi_dev`). + +- [ ] **Step 6: Commit** + +```bash +git add deployments/k8s/dev/k3s/postgres.yml deployments/k8s/dev/k3s/kustomization.yaml scripts/lib/deploy.py +git commit -m "feat(dev): k3s overlay with in-cluster postgres and DB-host rewrite" +``` + +--- + +### Task 5: Server exposure via port-forward + wire dev-up/down/status/nuke for k3s + +**Files:** +- Modify: `scripts/lib/deploy.py` (`start()` orchestration; add `start_server_port_forward()` mirroring `start_redis_port_forward()` ~495-509; `wait_for_server()` ~472-493; teardown/nuke path; `_guard_context()` ~188) + +**Interfaces:** +- Consumes: overlay + registry + context from Tasks 2-4; `expected_context(cluster)` (Task 2). +- Produces: `make dev-up CLUSTER=k3s` brings up the stack and serves `http://localhost:8080` via port-forward; `dev-down`/`dev-nuke CLUSTER=k3s` tear down cleanly without touching the cluster. + +- [ ] **Step 1: Guard the correct context per cluster** + +Update `_guard_context()` (deploy.py ~188) to accept a `cluster` arg and assert the active context equals `cluster_lib.expected_context(cluster)` (kind-tmi-dev for kind, k3s-rp for k3s). Fail with a clear message if not. + +- [ ] **Step 2: Add a server port-forward for k3s** + +k3s has no kind `extraPortMappings`, so preserve `localhost:8080` with a background port-forward (mirror the existing `start_redis_port_forward()` at ~495-509): + +```python +def start_server_port_forward() -> None: + """Preserve the localhost:8080 contract on k3s via kubectl port-forward. + + NodePort rp2:30080 remains available for CATS/high-throughput runs where the + userspace port-forward proxy throttles (see #463).""" + _spawn_port_forward("svc/tmi-server", f"{HOST_PORT}:{HOST_PORT}") +``` + +Call it in `start()` only when `cluster == "k3s"` (after the server Deployment is Ready, before `wait_for_server()`). For kind, do nothing (the extraPortMappings path is unchanged). `wait_for_server()` already polls `http://localhost:8080`, so it works unchanged once the forward is up. + +- [ ] **Step 3: Branch `start()` orchestration on cluster** + +In `deploy.start(db, cluster="kind", ...)`: +- call `cluster_lib.up(cluster)` (switches context for k3s; creates kind otherwise), +- `_guard_context(cluster)`, +- `build_and_push(db, cluster)`, +- `ensure_namespace()` (unchanged; overlay also declares it), +- `deliver_config()` with `cluster` (DB-host rewrite), +- apply `overlay_dir_for(db, cluster)` via `kubectl apply -k`, +- for k3s: wait for `postgres` + `redis` rollouts, then `start_server_port_forward()`, +- `wait_for_server()`. + +- [ ] **Step 4: Branch teardown / nuke on cluster** + +- `dev-down CLUSTER=k3s`: kill the port-forward(s), `cluster_lib.down(cluster)` (no-op for k3s). Leave namespace running (parity with kind `dev-down` keeping data). +- `dev-nuke CLUSTER=k3s`: `kubectl --context k3s-rp delete namespace tmi-platform` (removes Deployments, StatefulSet, PVCs, registry). Do NOT delete the cluster. + +Implement `dev-nuke` k3s branch: + +```python +if cluster == "k3s": + run(["kubectl", "--context", cluster_lib.K3S_CONTEXT, + "delete", "namespace", "tmi-platform", "--ignore-not-found"]) + return +``` + +- [ ] **Step 5: Verify status reporting for k3s** + +Run: `make dev-status CLUSTER=k3s` +Expected: reports the `k3s-rp` context and `tmi-platform` workloads (no attempt to query a kind cluster). + +- [ ] **Step 6: Commit** + +```bash +git add scripts/lib/deploy.py +git commit -m "feat(dev): k3s server port-forward, lifecycle, and nuke wiring" +``` + +--- + +### Task 6: End-to-end verification (k3s up) + kind non-regression + +**Files:** none (validation). + +**Interfaces:** +- Consumes: Tasks 1-5 and the one-time pre-flight node/Mac config. +- Produces: a confirmed working `CLUSTER=k3s` path and a confirmed-unchanged `kind` default. + +- [ ] **Step 1: Bring up the k3s stack** + +Run: `make dev-up CLUSTER=k3s` +Expected: images build (arm64) and push to `rp2:30500`; nodes pull them; `postgres`, `redis`, `tmi-server` (and workers) become Ready; a port-forward binds `localhost:8080`. + +- [ ] **Step 2: Confirm the server is reachable and migrated** + +Run: `curl -s http://localhost:8080/ | head -c 400` +Expected: the root endpoint returns the running version JSON (proves the server started and GORM AutoMigrate created the schema against in-cluster Postgres). + +- [ ] **Step 3: Confirm DB tooling reaches in-cluster Postgres via port-forward** + +Run: `kubectl --context k3s-rp -n tmi-platform port-forward svc/postgres 5432:5432 &` then `PGPASSWORD=dev123 psql -h localhost -U tmi_dev -d tmi_dev -c '\dt' | head` +Expected: TMI tables are listed. Kill the port-forward afterward. + +- [ ] **Step 4: Confirm teardown leaves the cluster intact** + +Run: `make dev-nuke CLUSTER=k3s` +Then: `kubectl --context k3s-rp get ns tmi-platform` → `NotFound`; `kubectl --context k3s-rp get nodes` → still Ready. +Expected: TMI namespace gone; cluster untouched. + +- [ ] **Step 5: Confirm the kind default did not regress** + +Run: `make dev-up` (no args), then `curl -s http://localhost:8080/ | head -c 200`, then `make dev-down`. +Expected: kind `tmi-dev` comes up and serves as before; no k3s involvement. + +- [ ] **Step 6: Final commit (docs/status only, if any)** + +```bash +git add -A +git commit -m "docs(dev): note CLUSTER=k3s usage" || echo "nothing to commit" +``` + +--- + +## Notes / follow-ups (not tasks) + +- **Migration mechanism deviation:** this plan relies on the server's startup `AutoMigrate` (matching kind), not a discrete migration Job as the spec's open note suggested. Confirm this is acceptable; if a Job is required, add it before Task 5's `wait_for_server`. +- **Oracle-on-k3s** is out of scope (`CLUSTER=k3s` ⇒ postgres overlay). Supporting `CLUSTER=k3s DB=oracle` would need a `dev/k3s-oracle` overlay + secret wiring. +- **CATS / high-throughput:** the port-forward proxy throttles under load (#463). Document `rp2:30080` (NodePort) as the escape hatch for CATS runs against k3s. +- **User onboarding** for the one-time Mac `insecure-registries` and node `registries.yaml` steps belongs in the GitHub Wiki (per docs policy). +- **rp2 name resolution:** if `rp2` doesn't resolve on the Mac, every `rp2:30500`/`rp2:30080` reference must use the node IP or an `/etc/hosts` entry consistently. diff --git a/docs/superpowers/plans/2026-07-01-precommit-hook.md b/docs/superpowers/plans/2026-07-01-precommit-hook.md new file mode 100644 index 00000000..5a457a36 --- /dev/null +++ b/docs/superpowers/plans/2026-07-01-precommit-hook.md @@ -0,0 +1,209 @@ +# Pre-commit Hook Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add a tracked, installable Git pre-commit hook that runs `gofmt`, `go vet`, and `golangci-lint run --fast-only` on staged Go files and blocks the commit on failure. + +**Architecture:** Move Git to a tracked hooks directory (`scripts/hooks/`) via `core.hooksPath`, relocate the existing untracked `post-commit` version-bump hook there so it keeps running, add a new `pre-commit` hook script, and provide `make install-hooks` to wire it up. + +**Tech Stack:** Bash, Git hooks, Go toolchain (`gofmt`, `go vet`), `golangci-lint` v2. + +## Global Constraints + +- Never use the standard `log` package / print logging in Go — N/A here (shell only). +- Use Make targets by convention; add the hook installer as a Make target. +- `golangci-lint` is v2.x; the fast-subset flag is `--fast-only` (NOT `--fast`). +- The existing `post-commit` hook (version bump on `main`) must keep working byte-for-byte. +- `gofmt` and `go vet` ship with the Go toolchain; `golangci-lint` is a separate install. +- Bypass mechanism is the standard `git commit --no-verify`. + +--- + +### Task 1: Create the tracked hooks directory (pre-commit + relocated post-commit) + +**Files:** +- Create: `scripts/hooks/pre-commit` +- Create: `scripts/hooks/post-commit` (copied verbatim from `.git/hooks/post-commit`) + +**Interfaces:** +- Consumes: nothing. +- Produces: two executable hook scripts under `scripts/hooks/`. Task 2 installs them via `core.hooksPath`. + +- [ ] **Step 1: Copy the existing post-commit hook into the tracked directory** + +The current version-bump hook lives untracked at `.git/hooks/post-commit`. Copy it verbatim so it survives the switch to `core.hooksPath` (which makes Git ignore `.git/hooks/*`). + +```bash +mkdir -p scripts/hooks +cp .git/hooks/post-commit scripts/hooks/post-commit +chmod +x scripts/hooks/post-commit +``` + +- [ ] **Step 2: Verify the copy is byte-for-byte identical** + +Run: `diff .git/hooks/post-commit scripts/hooks/post-commit && echo IDENTICAL` +Expected: `IDENTICAL` (no diff output). + +- [ ] **Step 3: Write the pre-commit hook** + +Create `scripts/hooks/pre-commit`: + +```bash +#!/bin/bash +# pre-commit hook - lightweight static analysis on staged Go files. +# +# Runs gofmt, go vet, and golangci-lint --fast-only on the packages of the +# staged Go files, and blocks the commit on any failure. +# +# Bypass with: git commit --no-verify +set -uo pipefail + +# Staged Go files (Added/Copied/Modified), excluding deletions. Git runs hooks +# with the working directory at the repository root, so paths are repo-relative. +staged_go=$(git diff --cached --diff-filter=ACM --name-only -- '*.go') + +if [ -z "$staged_go" ]; then + # Nothing Go-related staged (docs/config commit) -> no-op. + exit 0 +fi + +fail=0 + +# 1. gofmt: list any staged file that is not gofmt-clean. +unformatted=$(gofmt -l $staged_go) +if [ -n "$unformatted" ]; then + echo "pre-commit: the following staged files are not gofmt-formatted:" >&2 + echo "$unformatted" | sed 's/^/ /' >&2 + echo " fix with: gofmt -w " >&2 + fail=1 +fi + +# Unique package directories of the staged files, as ./dir patterns. +pkg_dirs=$(echo "$staged_go" | xargs -n1 dirname | sort -u | sed 's,^,./,') + +# 2. go vet on just the affected packages (fast; whole-module vet is slow). +if ! go vet $pkg_dirs; then + echo "pre-commit: go vet reported problems (see above)." >&2 + fail=1 +fi + +# 3. golangci-lint --fast-only on the affected packages (honors .golangci.yml). +if ! command -v golangci-lint >/dev/null 2>&1; then + echo "pre-commit: golangci-lint is not installed." >&2 + echo " install: https://golangci-lint.run/welcome/install/ (or: brew install golangci-lint)" >&2 + fail=1 +elif ! golangci-lint run --fast-only $pkg_dirs; then + echo "pre-commit: golangci-lint reported problems (see above)." >&2 + fail=1 +fi + +if [ "$fail" -ne 0 ]; then + echo "pre-commit: checks failed; commit aborted (bypass with 'git commit --no-verify')." >&2 + exit 1 +fi + +exit 0 +``` + +- [ ] **Step 4: Make the pre-commit hook executable** + +Run: `chmod +x scripts/hooks/pre-commit` +Expected: no output; `test -x scripts/hooks/pre-commit && echo OK` prints `OK`. + +- [ ] **Step 5: Lint the shell scripts (if shellcheck is available)** + +Run: `command -v shellcheck >/dev/null && shellcheck scripts/hooks/pre-commit scripts/hooks/post-commit || echo "shellcheck not installed, skipping"` +Expected: no errors (warnings about intentional word-splitting of `$pkg_dirs`/`$staged_go` are acceptable — that splitting is deliberate). + +- [ ] **Step 6: Commit** + +```bash +git add scripts/hooks/pre-commit scripts/hooks/post-commit +git commit -m "build(hooks): add tracked pre-commit hook and relocate post-commit" +``` + +--- + +### Task 2: Add `make install-hooks` and verify end-to-end + +**Files:** +- Modify: `Makefile` (add `install-hooks` target + `.PHONY` entry) + +**Interfaces:** +- Consumes: `scripts/hooks/pre-commit`, `scripts/hooks/post-commit` from Task 1. +- Produces: `make install-hooks`, which sets `git config core.hooksPath scripts/hooks`. + +- [ ] **Step 1: Add the install-hooks target to the Makefile** + +Add near the other infrastructure/atomic-component targets (the section that begins with `start-database:`). Match the existing `##`-help-comment style: + +```makefile +.PHONY: install-hooks + +install-hooks: ## Install Git hooks (points core.hooksPath at scripts/hooks) + @git config core.hooksPath scripts/hooks + @chmod +x scripts/hooks/pre-commit scripts/hooks/post-commit + @echo "Git hooks installed (core.hooksPath -> scripts/hooks)" +``` + +- [ ] **Step 2: Install the hooks** + +Run: `make install-hooks` +Expected: prints `Git hooks installed (core.hooksPath -> scripts/hooks)`. + +- [ ] **Step 3: Verify core.hooksPath is set** + +Run: `git config core.hooksPath` +Expected: `scripts/hooks` + +- [ ] **Step 4: Verify the no-op path (no staged Go files)** + +```bash +echo "note" >> README.md +git add README.md +git commit -m "chore: no-op hook check" --dry-run 2>/dev/null; \ + git commit -m "test: docs-only commit runs no Go checks" +``` +Expected: commit succeeds immediately with no gofmt/vet/lint output. Then undo: `git reset --soft HEAD~1 && git restore --staged README.md && git checkout -- README.md`. + +- [ ] **Step 5: Verify the gofmt block** + +Create a deliberately misformatted Go file and try to commit it: + +```bash +printf 'package tmp\nfunc Bad( ){\n}\n' > internal/hooktmp.go +git add internal/hooktmp.go +git commit -m "should be blocked" +``` +Expected: commit is ABORTED; output lists `internal/hooktmp.go` as not gofmt-formatted and prints the `gofmt -w` hint and the abort message. + +- [ ] **Step 6: Verify the bypass and then clean up** + +```bash +git commit -m "test: bypass" --no-verify # should succeed despite bad formatting +git reset --hard HEAD~1 # drop the bypass commit +rm -f internal/hooktmp.go +git restore --staged internal/hooktmp.go 2>/dev/null || true +``` +Expected: the `--no-verify` commit succeeds (proves bypass works), then the reset/removal leaves the tree clean. + +- [ ] **Step 7: Verify the relocated post-commit still bumps the version on main** + +Confirm the relocated hook is active. On `main`, a commit touching production Go triggers the version bump (`scripts/update-version.sh`) and amends the commit. Do a safe check that the hook file is the one Git will run: + +Run: `git rev-parse --git-path hooks/post-commit; echo "active hooksPath: $(git config core.hooksPath)"` +Expected: `core.hooksPath` is `scripts/hooks`, so `scripts/hooks/post-commit` is the active hook. (Full behavior is exercised by any later real production-Go commit on `main`; no separate destructive test needed.) + +- [ ] **Step 8: Commit** + +```bash +git add Makefile +git commit -m "build(hooks): add make install-hooks target" +``` + +--- + +## Notes / follow-ups (not tasks) + +- Onboarding documentation for `make install-hooks` belongs in the GitHub Wiki (per project docs policy), not `docs/`. File a wiki update separately. +- `core.hooksPath` is per-clone local config; every developer runs `make install-hooks` once. Consider mentioning it in the repo's setup script if one exists. diff --git a/docs/superpowers/specs/2026-07-01-ci-security-scanners-design.md b/docs/superpowers/specs/2026-07-01-ci-security-scanners-design.md new file mode 100644 index 00000000..7a6d2c39 --- /dev/null +++ b/docs/superpowers/specs/2026-07-01-ci-security-scanners-design.md @@ -0,0 +1,62 @@ +# Design: CI security scanners (govulncheck + standalone gosec) + +**Date:** 2026-07-01 +**Status:** Approved (brainstorming), pending implementation plan +**Scope:** 2 of 3 — independent of the pre-commit-hook and k3s-dev-target specs. + +## Summary + +Add two jobs to CI: `govulncheck` (**blocking**) and a standalone `gosec` +(**informational**, results uploaded to the GitHub Security tab), alongside the existing +CodeQL workflow and the `gosec` already embedded in `golangci-lint`. + +## Context + +`gosec` **already runs today** as a blocking linter inside `golangci-lint` (enabled in +`.golangci.yml`, with a set of exclusions). The additions here are intentionally +**non-redundant**: + +- `govulncheck` finds *known, published* vulnerabilities (CVEs) in dependencies and the + standard library that actually reach the code — different from `gosec`'s heuristics. +- The standalone `gosec` runs with broader scope than the linter-embedded one and + publishes results to the **GitHub Security tab** for visibility, without blocking. + +CodeQL (`.github/workflows/codeql.yml`) is left unchanged. + +## Design + +New jobs added to `.github/workflows/security.yml`. + +### `govulncheck` job (blocking) + +- Standard checkout + `setup-go` + the tmi-clients replace-shim steps used by the other + jobs in this workflow (checkout `ericfitz/tmi-clients` into `.tmi-clients`, `sed` the + `go.mod` replace directive). +- Install `golang.org/x/vuln/cmd/govulncheck` at a **pinned** version (repo convention: + tool versions are pinned — cf. `vacuum` at `0.29.7`, `golangci-lint` at `v2.12.2`). +- Run `govulncheck ./...`; a non-zero exit **fails the job** (blocks merge). +- Triggers: `pull_request` + `push` to `main`. + +### `gosec` job (informational) + +- `permissions: security-events: write`. +- Run `gosec -no-fail -fmt sarif -out gosec.sarif -exclude-dir= ./...` at a + **pinned** gosec version. `-no-fail` guarantees the job never blocks the build. +- Upload with `github/codeql-action/upload-sarif` so findings appear under + **Security → Code scanning**. Generated `api/api.go` is excluded to match the linter's + noise policy. + +## Verification + +- On a PR branch: the `govulncheck` job appears and is required; the `gosec` job runs and + its SARIF shows up under **Security → Code scanning**. +- Introduce a temporary known-vuln dependency → `govulncheck` fails the build; remove it + → passes. +- Confirm a `gosec`-style finding does **not** block the build. + +## Out of scope + +- Changing CodeQL configuration. +- Removing or reconfiguring the `gosec` linter already inside `golangci-lint`. +- Making the standalone `gosec` blocking (can be tightened later once its output is + triaged). diff --git a/docs/superpowers/specs/2026-07-01-k3s-dev-target-design.md b/docs/superpowers/specs/2026-07-01-k3s-dev-target-design.md new file mode 100644 index 00000000..f022d1ca --- /dev/null +++ b/docs/superpowers/specs/2026-07-01-k3s-dev-target-design.md @@ -0,0 +1,103 @@ +# Design: k3s dev deployment target (`CLUSTER=k3s`) + +**Date:** 2026-07-01 +**Status:** Approved (brainstorming), pending implementation plan +**Scope:** 3 of 3 — independent of the pre-commit-hook and CI-scanners specs. This is the +largest of the three and warrants its own implementation plan + PR. + +## Summary + +Add `CLUSTER=k3s` as a selectable dev deployment target that deploys to the existing +remote `k3s-rp` cluster, with an in-cluster image registry, single-node in-cluster +Postgres and Redis, and `kubectl port-forward` preserving the `localhost:8080` contract. +**kind remains the default and keeps working unchanged.** + +Mirrors the existing `DB=oracle` pattern: a `CLUSTER` selector (default `kind`) plumbed +through the Python dev tooling, plus a kustomize overlay describing the k3s topology. + +## Facts driving the design + +- The `k3s-rp` context already exists locally; its API server is `https://rp2:6443` — a + **remote LAN** cluster (Raspberry-Pi-class, arm64). The Mac dev host is also arm64. +- kind currently provides two things k3s must replace: + - a local registry mirror (`localhost:5000`), and + - `extraPortMappings` exposing NodePort 30080 as `localhost:8080` (a kind-only + feature). +- In the kind topology, Redis runs **in-cluster** (`deployments/k8s/dev/redis.yml`) and + Postgres runs as a **container on the Mac**, reached via `host.docker.internal` + (`IN_CLUSTER_DB_HOST` in `scripts/lib/deploy.py`). `host.docker.internal` does not + resolve from `rp2`. + +## Decisions (from brainstorming) + +- **Selectable, not a replacement.** `CLUSTER=k3s` is opt-in; `CLUSTER=kind` is the + default. +- **Fully self-contained cluster:** in-cluster registry + single-node in-cluster + Postgres + single-node in-cluster Redis. No cross-LAN dependency on the Mac at runtime. +- **`localhost:8080` preserved** via `kubectl port-forward`; NodePort `rp2:30080` + documented as the high-throughput / CATS escape hatch. +- **Migrations** run as an **in-cluster Job**; Mac-side `dbtool`/`psql` reach Postgres + via `kubectl port-forward svc/postgres 5432:5432`. + +## Components + +1. **Cluster lifecycle** (`devenv.py`, `scripts/lib/cluster.py`, `scripts/lib/deploy.py`): + - `CLUSTER=kind` → existing kind create/delete flow. + - `CLUSTER=k3s` → **use** the existing `k3s-rp` context + (`kubectl config use-context k3s-rp`); never create or delete the cluster (we don't + own it). `dev-up`/`dev-down`/`dev-status`/`dev-nuke` branch on `CLUSTER`. + - `dev-nuke` under k3s tears down only the TMI namespace(s) + PVCs, not the cluster. + +2. **In-cluster image registry:** + - Deploy a registry (Deployment + PVC + Service, NodePort `30500`) into k3s-rp. + - Build tooling (`scripts/build-app-containers.py`) tags/pushes images to + `rp2:30500/tmi/...`; the k3s overlay manifests reference the same. + - **One-time manual node config:** add `rp2:30500` as an insecure (plain-HTTP) + registry to `/etc/rancher/k3s/registries.yaml` on each node and restart k3s. This + requires SSH/root on the Pi nodes and cannot be driven from the Mac; it will be + documented (optionally with a helper script that SSHes to the nodes). + +3. **In-cluster Postgres (single node):** + - New `postgres.yml` for the k3s overlay: StatefulSet + PVC + Service, credentials via + a Secret. Server reaches it by service DNS (`postgres:5432`). + - Schema applied by an in-cluster **migration Job**. + - Mac `dbtool`/`psql`/`make migrate` reach it via `kubectl port-forward`. + +4. **In-cluster Redis (single node):** reuse the existing `redis.yml` (already + in-cluster) — moves to k3s for free. + +5. **Server exposure:** + - `make dev-up CLUSTER=k3s` starts a background + `kubectl port-forward svc/tmi-server 8080:8080` so `localhost:8080` behaves exactly + as under kind (OAuth stub callbacks, integration tests, curl unchanged). + - NodePort `rp2:30080` documented for CATS / high-throughput runs (the port-forward + userspace proxy throttles under that load — the #463 problem). + +6. **Architecture / image builds:** build arm64 images (`buildx --platform linux/arm64`). + `CGO_ENABLED=0` static binaries + Chainguard multi-arch bases make this clean; both + Mac and Pi nodes are arm64. + +## Verification + +- `make dev-up` (no args) still brings up kind and serves `localhost:8080` — **no + regression to the default path.** +- `make dev-up CLUSTER=k3s`: + - pushes arm64 images to the in-cluster registry and the nodes pull them, + - brings up in-cluster Postgres + Redis, runs migrations, + - serves the API at `localhost:8080` via port-forward, + - `make dev-status CLUSTER=k3s` reports healthy, + - `make dev-down CLUSTER=k3s` / `dev-nuke CLUSTER=k3s` tear down TMI resources but + leave the `k3s-rp` cluster intact. +- `curl http://localhost:8080/` returns the running version. + +## Open implementation details (not blockers) + +- Exact StatefulSet vs Deployment for the single-node Postgres (PVC either way). +- Whether the registries.yaml node step is documented-only or scripted via SSH. +- Storage class / PVC sizing appropriate to the Pi nodes. + +## Out of scope + +- Replacing kind (kind stays the default). +- Multi-node HA for in-cluster Postgres/Redis (single node is intentional for dev). +- Migrating CI or production deployment targets — this only affects local dev. diff --git a/docs/superpowers/specs/2026-07-01-precommit-hook-design.md b/docs/superpowers/specs/2026-07-01-precommit-hook-design.md new file mode 100644 index 00000000..c5fccf39 --- /dev/null +++ b/docs/superpowers/specs/2026-07-01-precommit-hook-design.md @@ -0,0 +1,73 @@ +# Design: Pre-commit hook for lightweight static analysis + +**Date:** 2026-07-01 +**Status:** Approved (brainstorming), pending implementation plan +**Scope:** 1 of 3 — independent of the CI-scanners and k3s-dev-target specs. + +## Summary + +Add a tracked, installable Git **pre-commit** hook that runs `gofmt`, `go vet`, and +`golangci-lint run --fast-only` on staged Go files and **blocks** the commit on failure. + +## Problem + +There is no tracked hooks directory and no installer. `core.hooksPath` is at its default +(`.git/hooks`), and the existing version-bumping `post-commit` hook lives **untracked** +in `.git/hooks/post-commit`. Any shared pre-commit hook therefore needs both a tracked +home and an install mechanism. + +## Design + +### Layout and install + +- Create a **tracked** `scripts/hooks/` directory containing: + - `pre-commit` (new). + - `post-commit` (relocated from `.git/hooks/post-commit`, byte-for-byte behavior + preserved). +- Add a `make install-hooks` target that runs `git config core.hooksPath scripts/hooks`. + A single switch points Git at the tracked directory and versions both hooks. + +**Why the relocation is mandatory:** setting `core.hooksPath` makes Git ignore +`.git/hooks/*` entirely. If the versioning `post-commit` were left there it would +silently stop running. Moving it into `scripts/hooks/` keeps it active and brings it +under version control. + +### `pre-commit` behavior + +Operates on **staged Go files only**: + +``` +git diff --cached --diff-filter=ACM --name-only -- '*.go' +``` + +1. **gofmt** — `gofmt -l` on the staged files; fail if any are unformatted (report the + offending files and the `gofmt -w` fix). +2. **go vet** — scoped to the unique package directories of the staged files (not the + whole module, for speed). +3. **golangci-lint** — `golangci-lint run --fast-only` on those same package + directories. This automatically honors the existing `.golangci.yml` (including the + generated-code exclusions for `api/api.go`). + +### Control flow / edge cases + +- No staged Go files → exit 0 immediately (hook is a no-op for docs/config commits). +- `git commit --no-verify` bypasses the hook (standard Git behavior; documented). +- The versioning `post-commit` amends with `git commit --amend --no-verify`, which does + not re-trigger `pre-commit` — no loop. +- If `golangci-lint` is not installed, fail with a clear install hint. It is a + deliberate gate; silently skipping would defeat the purpose. (`gofmt` and `go vet` + ship with the Go toolchain and are always present.) + +## Verification + +- `make install-hooks`, then confirm `git config core.hooksPath` == `scripts/hooks`. +- Stage an unformatted file → commit blocked; `gofmt -w` → commit succeeds. +- Stage a `go vet` / lint violation → blocked; fix → succeeds. +- `--no-verify` bypasses. +- A commit that changes production Go still triggers the relocated `post-commit` + version bump on `main`. + +## Out of scope + +- Running the full (non-`--fast-only`) linter or tests in the hook — CI covers that. +- Any change to CI workflows (separate spec). diff --git a/scripts/hooks/post-commit b/scripts/hooks/post-commit new file mode 100755 index 00000000..2c01695c --- /dev/null +++ b/scripts/hooks/post-commit @@ -0,0 +1,48 @@ +#!/bin/bash +# post-commit hook - Automatically update version and amend commit +# +# This hook increments the build number and amends the commit with the updated version. +# Only triggers for changes to production code: +# - Go files (not *_test.go) in: api/, auth/, cmd/server/, internal/ +# - Any file in static/ + +# Only run on the main branch +CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD) +if [ "$CURRENT_BRANCH" != "main" ]; then + exit 0 +fi + +# Check if we're already in an amend operation to prevent infinite loop +if [ -f ".git/AMEND_IN_PROGRESS" ]; then + rm -f ".git/AMEND_IN_PROGRESS" + exit 0 +fi + +# Get files changed in this commit +CHANGED_FILES=$(git diff-tree --no-commit-id --name-only -r HEAD) + +# Check for production Go files (in specific directories, excluding tests) +PROD_GO=$(echo "$CHANGED_FILES" | grep -E "^(api|auth|cmd/server|internal)/" | grep '\.go$' | grep -v '_test\.go$' || true) + +# Check for static assets +STATIC=$(echo "$CHANGED_FILES" | grep "^static/" || true) + +# Exit early if no production files changed +if [ -z "$PROD_GO" ] && [ -z "$STATIC" ]; then + exit 0 +fi + +# Mark that we're starting an amend +touch ".git/AMEND_IN_PROGRESS" + +# Run the version update script in commit mode +./scripts/update-version.sh --commit + +# Amend the commit with the version files (no message change, no hooks) +git commit --amend --no-edit --no-verify + +# Clean up the marker file +rm -f ".git/AMEND_IN_PROGRESS" + +# Exit with success +exit 0 diff --git a/scripts/hooks/pre-commit b/scripts/hooks/pre-commit new file mode 100755 index 00000000..56bc657e --- /dev/null +++ b/scripts/hooks/pre-commit @@ -0,0 +1,54 @@ +#!/bin/bash +# pre-commit hook - lightweight static analysis on staged Go files. +# +# Runs gofmt, go vet, and golangci-lint --fast-only on the packages of the +# staged Go files, and blocks the commit on any failure. +# +# Bypass with: git commit --no-verify +set -uo pipefail + +# Staged Go files (Added/Copied/Modified), excluding deletions. Git runs hooks +# with the working directory at the repository root, so paths are repo-relative. +staged_go=$(git diff --cached --diff-filter=ACM --name-only -- '*.go') + +if [ -z "$staged_go" ]; then + # Nothing Go-related staged (docs/config commit) -> no-op. + exit 0 +fi + +fail=0 + +# 1. gofmt: list any staged file that is not gofmt-clean. +unformatted=$(gofmt -l $staged_go) +if [ -n "$unformatted" ]; then + echo "pre-commit: the following staged files are not gofmt-formatted:" >&2 + echo "$unformatted" | sed 's/^/ /' >&2 + echo " fix with: gofmt -w " >&2 + fail=1 +fi + +# Unique package directories of the staged files, as ./dir patterns. +pkg_dirs=$(echo "$staged_go" | xargs -n1 dirname | sort -u | sed 's,^,./,') + +# 2. go vet on just the affected packages (fast; whole-module vet is slow). +if ! go vet $pkg_dirs; then + echo "pre-commit: go vet reported problems (see above)." >&2 + fail=1 +fi + +# 3. golangci-lint --fast-only on the affected packages (honors .golangci.yml). +if ! command -v golangci-lint >/dev/null 2>&1; then + echo "pre-commit: golangci-lint is not installed." >&2 + echo " install: https://golangci-lint.run/welcome/install/ (or: brew install golangci-lint)" >&2 + fail=1 +elif ! golangci-lint run --fast-only $pkg_dirs; then + echo "pre-commit: golangci-lint reported problems (see above)." >&2 + fail=1 +fi + +if [ "$fail" -ne 0 ]; then + echo "pre-commit: checks failed; commit aborted (bypass with 'git commit --no-verify')." >&2 + exit 1 +fi + +exit 0