From 4bcdd57648c2b0a639f2bacd7a892d3a49d7ad42 Mon Sep 17 00:00:00 2001 From: chodaict Date: Tue, 16 Jun 2026 10:21:54 +0900 Subject: [PATCH 1/2] test: cover leftovers classification, multi-account routing, clean() and locale Add functional regression guards for the paths that were only ever verified by hand: lf_scan's dead/likely-orphan/kept buckets (a live or com.apple item is never flagged as junk), to_trash multi-account routing (each account's item lands in its own Trash), restore undoing a leftovers sweep, the sweep's clean() delete semantics (real empties but keeps the dir; --dry-run deletes nothing), and ss_resolve_lang locale mapping incl. the Simplified-Chinese -> English fallback. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 8 +++ scripts/func-test.sh | 147 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 308f5eb..fff8d14 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,14 @@ project adheres to [Semantic Versioning](https://semver.org/). ## [Unreleased] +- **Wider functional test coverage** (no behaviour change): `lf_scan`'s + dead/likely-orphan/kept classification (a live or Apple item is never flagged), + multi-account Trash routing (each account's item lands in its *own* Trash), + `restore` undoing a `leftovers` sweep, the sweep's `clean()` delete semantics + (real empties-but-keeps-the-dir; `--dry-run` deletes nothing), and locale + resolution (Traditional Chinese maps through; Simplified + others fall back to + English). + ## [0.4.0] - New verb: **`sheersweep leftovers`** — finds orphaned startup/background items diff --git a/scripts/func-test.sh b/scripts/func-test.sh index 5a5eed8..e4343de 100755 --- a/scripts/func-test.sh +++ b/scripts/func-test.sh @@ -223,6 +223,153 @@ else fi teardown +# (L1) leftovers classification: lf_scan must sort launchd plists into DEAD (the +# launched binary is gone), REVIEW (interpreter that references a now-missing +# /Applications app), and KEPT (program exists, OR Apple's own job, OR an +# interpreter with no app refs). This is the honest brain — a working updater must +# never land in DEAD/REVIEW. We feed five synthetic plists and check the buckets. +setup +AG="$SBX/agents"; mkdir -p "$AG" +# a) DEAD — Program points to a binary that doesn't exist (note: $SBX expands). +cat > "$AG/com.test.dead.plist" < + + +Labelcom.test.dead +Program$SBX/gone/nope-binary + +PLIST +# b) REVIEW — /bin/bash referencing a missing /Applications app. +cat > "$AG/com.test.review.plist" <<'PLIST' + + + +Labelcom.test.review +ProgramArguments +/bin/bash-c +test -d "/Applications/Definitely Gone 9000.app" + +PLIST +# c) KEPT — Program exists (/bin/ls), so it's a live item, not junk. +cat > "$AG/com.test.live.plist" <<'PLIST' + + + +Labelcom.test.live +Program/bin/ls + +PLIST +# d) KEPT — Apple's own job is skipped BEFORE the dead check, even with a missing +# program (proves com.apple.* is never flagged). +cat > "$AG/com.apple.something.plist" <<'PLIST' + + + +Labelcom.apple.something +Program/nonexistent/apple/thing + +PLIST +# e) KEPT — interpreter with NO /Applications references → can't call it junk. +cat > "$AG/com.test.quiet.plist" <<'PLIST' + + + +Labelcom.test.quiet +ProgramArguments/bin/bash-cecho hi + +PLIST +LF_DEAD_PATH=(); LF_DEAD_OWNER=(); LF_DEAD_INFO=() +LF_REVIEW_PATH=(); LF_REVIEW_OWNER=(); LF_REVIEW_INFO=() +LF_KEPT=0 +lf_scan "$AG" "$SBX/home" +if [ "${#LF_DEAD_PATH[@]}" -eq 1 ] && [ "${#LF_REVIEW_PATH[@]}" -eq 1 ] && [ "$LF_KEPT" -eq 3 ] \ + && [ "${LF_DEAD_PATH[0]}" = "$AG/com.test.dead.plist" ] \ + && [ "${LF_DEAD_OWNER[0]}" = "$SBX/home" ] \ + && [ "${LF_DEAD_INFO[0]}" = "$SBX/gone/nope-binary" ] \ + && [ "${LF_REVIEW_PATH[0]}" = "$AG/com.test.review.plist" ] \ + && [ "${LF_REVIEW_OWNER[0]}" = "$SBX/home" ] \ + && printf '%s' "${LF_REVIEW_INFO[0]}" | grep -q 'Definitely Gone 9000.app'; then + pass "lf_scan: dead / likely-orphan / kept classified correctly (live + apple never flagged)" +else + fail "lf_scan misclassified (dead=${#LF_DEAD_PATH[@]} review=${#LF_REVIEW_PATH[@]} kept=$LF_KEPT)" +fi +teardown + +# (M1) Multi-account routing: an item from account A must land in A's OWN Trash, +# and B's in B's — never crossed. This is the multi-account guarantee that can't +# be exercised with one real login, proven here at the routing level. +setup +A="$SBX/acctA"; B="$SBX/acctB"; mkdir -p "$A" "$B" "$SBX/src" +echo a > "$SBX/src/itemA"; echo b > "$SBX/src/itemB" +to_trash "$SBX/src/itemA" "$A"; da="$TRASH_DEST" +to_trash "$SBX/src/itemB" "$B"; db="$TRASH_DEST" +if [ -f "$A/.Trash/itemA" ] && [ -f "$B/.Trash/itemB" ] \ + && [ ! -e "$B/.Trash/itemA" ] && [ ! -e "$A/.Trash/itemB" ] \ + && [ "$da" = "$A/.Trash/itemA" ] && [ "$db" = "$B/.Trash/itemB" ]; then + pass "to_trash routes each item to its OWNER account's Trash (multi-account)" +else + fail "to_trash crossed accounts (da=$da db=$db)" +fi +teardown + +# (L2) restore must undo a LEFTOVERS sweep too: a leftovers receipt (app=leftovers) +# moves an orphaned plist back from the Trash to where it lived. Proves the two +# verbs share one undo. +setup +REAL_HOME="$SBX/home" +mkdir -p "$REAL_HOME/.sheersweep/uninstalls" "$REAL_HOME/.Trash" +echo plist > "$REAL_HOME/.Trash/com.ea.origin.ESHelper.plist" +orig="$SBX/Library/LaunchDaemons/com.ea.origin.ESHelper.plist" +rfile="$REAL_HOME/.sheersweep/uninstalls/20260616-000010-leftovers.tsv" +{ echo "# sheersweep leftovers receipt"; echo "# app=leftovers"; echo "# bid=leftovers" + echo "# date=20260616-000010"; echo "# format=nul-pairs"; printf '\0' + printf '%s\0%s\0' "$REAL_HOME/.Trash/com.ea.origin.ESHelper.plist" "$orig"; } > "$rfile" +printf 'y\n' | do_restore >/dev/null 2>&1 +if [ -f "$orig" ] && [ "$(cat "$orig")" = "plist" ] && [ -f "$rfile.restored" ]; then + pass "restore undoes a leftovers sweep (leftovers receipt round-trips)" +else + fail "restore failed to undo a leftovers sweep (orig exists: $([ -f "$orig" ] && echo y || echo n))" +fi +teardown + +# (C1) the sweep's clean(): a real run empties a dir's CONTENTS but KEEPS the dir; +# --dry-run reports and deletes nothing. This is the only delete path in the sweep. +setup +mkdir -p "$SBX/cache/sub" +dd if=/dev/zero of="$SBX/cache/blob" bs=1024 count=8 2>/dev/null +echo x > "$SBX/cache/sub/f" +DRY=0 +clean "$SBX/cache" >/dev/null 2>&1 +real_ok=0 +{ [ -d "$SBX/cache" ] && [ -z "$(find "$SBX/cache" -mindepth 1 2>/dev/null)" ]; } && real_ok=1 +mkdir -p "$SBX/cache2"; echo keep > "$SBX/cache2/file" +DRY=1 +clean "$SBX/cache2" >/dev/null 2>&1 +# shellcheck disable=SC2034 # DRY is read inside the sourced clean(), not in this file +DRY=0 +dry_ok=0 +[ -f "$SBX/cache2/file" ] && dry_ok=1 +if [ "$real_ok" -eq 1 ] && [ "$dry_ok" -eq 1 ]; then + pass "clean(): real run empties the dir but keeps it; --dry-run deletes nothing" +else + fail "clean() wrong (real_ok=$real_ok dry_ok=$dry_ok)" +fi +teardown + +# (i18n) locale resolution: ja and either Traditional-Chinese spelling map through; +# Simplified Chinese and any other locale fall back to en-US (the stated policy). +ja="$(SHEERSWEEP_LANG=ja_JP ss_resolve_lang)" +tw="$(SHEERSWEEP_LANG=zh_TW ss_resolve_lang)" +hant="$(SHEERSWEEP_LANG=zh-Hant ss_resolve_lang)" +cn="$(SHEERSWEEP_LANG=zh_CN ss_resolve_lang)" +fr="$(SHEERSWEEP_LANG=fr_FR ss_resolve_lang)" +if [ "$ja" = "ja-JP" ] && [ "$tw" = "zh-TW" ] && [ "$hant" = "zh-TW" ] \ + && [ "$cn" = "en-US" ] && [ "$fr" = "en-US" ]; then + pass "locale: ja / zh-Hant / zh-TW resolve; Simplified + others fall back to en-US" +else + fail "locale resolution wrong (ja=$ja tw=$tw hant=$hant cn=$cn fr=$fr)" +fi + echo "→ func-test done" [ "$fails" -eq 0 ] || { echo "❌ $fails functional test(s) failed"; exit 1; } echo "✅ func-test all green" From 61b785b08f589dffd73c447f868e96bbe759dfd6 Mon Sep 17 00:00:00 2001 From: chodaict Date: Sun, 21 Jun 2026 10:39:57 +0900 Subject: [PATCH 2/2] docs: add build-artifact reclaim spec + action tagline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add docs/reclaim-spec.md proposing a two-layer model for reclaiming build output: fold fixed-location caches into the existing sweep, and add an opt-in 'reclaim' verb for in-project build artifacts gated three ways (gitignored + build pattern + sibling manifest). All deletion stays on the move-to-Trash + receipt + restore rail, with the rebuild command recorded as the recommended undo — one consistent trust promise, no asterisk. Add the 'Scan, Confirm, Trash, the delete is yours' tagline to the README, encoding the trust model: sheersweep only ever stages to the Trash; the one irreversible step (emptying it) is always the user's. Co-Authored-By: Claude Opus 4.8 (1M context) --- README.md | 2 + docs/reclaim-spec.md | 201 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 203 insertions(+) create mode 100644 docs/reclaim-spec.md diff --git a/README.md b/README.md index 92b8d52..b56fa57 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ [![CI](https://github.com/CVERInc/sheersweep/actions/workflows/ci.yml/badge.svg)](https://github.com/CVERInc/sheersweep/actions/workflows/ci.yml) > The Mac cleaner you can **read**. Open source · dry-run first · a hard never-touch list · sweeps every account. +> +> **Scan · Confirm · Trash — the delete is yours.** Most Mac cleaners are a black box that asks you to *trust* them while they delete things you can't see, behind a subscription, with a little fear in the marketing. diff --git a/docs/reclaim-spec.md b/docs/reclaim-spec.md new file mode 100644 index 0000000..f17168c --- /dev/null +++ b/docs/reclaim-spec.md @@ -0,0 +1,201 @@ +# Spec: reclaiming build artifacts + +**Status:** Proposed (design only — not yet implemented). +**Scope:** Add the ability to reclaim space from *build output* (regenerable +artifacts a build tool produces), without betraying the trust rules that make +sheersweep the opposite of a black-box cleaner. + +## The problem + +Build output is some of the heaviest junk on a developer's Mac — Swift +`.build`, `node_modules`, `target`, `dist`, `.next` — easily tens of GB across a +dozen repos. It is also genuinely regenerable: one command rebuilds it. So it +*looks* like a perfect fit for a cleaner. + +It is also exactly where every other cleaner loses trust: to find it, the tool +has to walk *into your project directories* and delete things. Get the +heuristic slightly wrong and it removes a folder named `dist` that was actually +your data. That is the CleanMyMac failure mode this project exists to refute. + +So the answer is not "add build dirs to the sweep." It is two layers, drawn +along the line the sweep already refuses to cross. + +## Two layers + +### Layer 1 — regenerable caches in fixed cache locations → fold into the sweep + +These live at well-known, unambiguous cache paths. Their meaning is not in +doubt and no project-directory walking is required. The sweep already does this +class (`~/.npm`, Xcode `DerivedData`, Cargo/Gradle caches). Additions in the +same spirit, low ceremony, no new verb: + +- `$TMPDIR/node-compile-cache` — Node's V8 compile cache (regenerated on next run) +- `~/Library/Caches/pytest`, `.pytest_cache` under cache roots +- `~/Library/Caches/Homebrew` (already covered by `brew cleanup`) +- any other tool cache that lives under a `Caches`/`.cache` root, not inside a repo + +These belong to the existing `clean()` path against fixed roots. Same dry-run, +same never-touch list, same multi-account pass. Nothing about Layer 1 changes +the trust story. + +### Layer 2 — build output *inside project directories* → a new, opt-in verb + +This is the valuable, dangerous part. It gets its own deliberate verb — +`reclaim` — exactly as `uninstall` and `leftovers` are separate verbs and are +never folded into the sweep. The default sweep must keep its promise from the +README ("never grow aggressive or deep modes that rummage through app data"). + +```bash +sheersweep reclaim # scan, preview grouped by repo, confirm +sheersweep reclaim --dry-run # preview only +sheersweep reclaim --stale 30d # only artifacts whose repo hasn't been touched in 30 days +``` + +## The three gates (what makes a directory eligible) + +A candidate is flagged **only when all three are true**. Any one alone is not +enough — the conjunction is what lets us say "this is regenerable output, not +your data": + +1. **Ignored by git.** The path is matched by the repo's `.gitignore` + (`git -C check-ignore ` succeeds). Build output is virtually + always ignored; your data virtually never is. +2. **Matches a known build pattern.** Directory name is one of a fixed, + readable allow-list: `.build`, `node_modules`, `target`, `dist`, `.next`, + `build`, `.gradle`, `Pods`, … (kept short and auditable, like the + never-touch list). +3. **A sibling manifest proves the build tool.** The parent directory contains + the corresponding manifest — `Package.swift`/`.xcodeproj` for `.build`, + `package.json` for `node_modules`/`dist`/`.next`, `Cargo.toml` for `target`, + `Podfile` for `Pods`. No manifest → not eligible, no matter the name. + +Gate 3 is the one that saves the user who has a content folder literally named +`dist`: without a `package.json` beside it, it is invisible to `reclaim`. + +The never-touch list still applies on top: anything under Photos/Documents/ +Desktop, any Obsidian vault, any cloud-sync folder is excluded even if it +somehow passed the gates. + +## Every candidate carries its provenance (shared data model) + +This is the unifying idea: `uninstall`, `leftovers`, and `reclaim` are all the +same primitive — *surface a removable set, each item with its provenance and +its undo, get consent, remove*. They differ only in their **finder**. Every +candidate, regardless of finder, has the same shape: + +``` +{ path, size, why_safe, undo, last_touched } +``` + +| finder | why_safe | undo | +|-------------|-----------------------------------|----------------------------| +| uninstall | "belongs to , bundle id …" | restore from Trash | +| leftovers | "launches a binary that is gone" | restore from Trash | +| reclaim | "gitignored + build pattern + manifest" | rebuild (`swift build` / `npm install`) — *or* restore from Trash | + +The preview is the anti-CleanMyMac payload made concrete. CleanMyMac says +"Junk — 3.8 GB — Clean." sheersweep says: + +``` +snapsift/app/.build 773 MB rebuild: swift build (14 days untouched) +motifmint/node_modules 261 MB rebuild: npm install (31 days untouched) +``` + +It tells you what it is, how to undo it, and how stale it is — so *you* judge +alive vs dead. The machine never decides a repo is "done." + +## Delete policy: reclaim moves to the Trash, like every other verb + +`reclaim` removes through the Trash and writes a receipt, exactly as `uninstall` +and `leftovers` do. **One promise, no asterisk:** the only operations that +delete anything move files to the Trash, and `restore` puts them back. That +single sentence stays true and auditable — which is worth more than any +optimization that would force a footnote onto the headline trust claim. + +An earlier draft argued `reclaim` should use `rm` (the three gates prove the +bytes are regenerable, so the "real" undo is a rebuild, not file recovery). That +argument does not survive contact with how APFS actually works: + +- Each volume's `~/.Trash` sits on the **same APFS data volume** as the project. + Moving a build dir there is an **instant rename**, not a 690 MB copy — it does + not duplicate or temporarily double the space. +- Space is reclaimed when the Trash is emptied — the **same one-step model the + README already documents** for `uninstall` ("Emptying the Trash is what + finally reclaims the space"). No new mental model. +- Trashing ~50k files is a rename of the *top directory*, not a per-file move. + Emptying it later unlinks the inodes — exactly what `rm` would have done + anyway. There is no speed win to trade trust for. + +So the only real benefits `rm` offered (fast, doesn't hog space) are things a +same-volume Trash rename already gives for free. Spending the headline promise +to buy them would be paying for nothing. + +**The regenerability still matters — it just shapes the undo, not the delete.** +The receipt records the **rebuild command as the recommended undo**, so after a +reclaim the user has two paths: + +- `npm install` / `swift build` / `cargo build` — rebuild a clean tree + (recommended; avoids any lockfile drift a Trashed tree might carry), and +- restore the files from the Trash (the safety net, identical to every other + verb). + +`sheersweep restore` after a reclaim restores the files like any other restore, +and additionally prints the rebuild commands as the cleaner alternative. Two +undos, one consistent promise. + +## Staleness filter + +`--stale ` (e.g. `7d`, `30d`) shows only artifacts whose repo's most +recent source change is older than the threshold (newest mtime of tracked files, +or last commit). This turns `last_touched` from a displayed column into an +active filter and is the safest high-value mode: + +> "reclaim everything not touched in 30 days" + +is a sentence a user can trust, and it removes the need for the machine to guess +which repo is "mature" — it uses an objective signal (how long since you touched +it) and leaves the call with the user. + +## UX rules (non-negotiable) + +- **Never folded into the sweep.** A separate verb, invoked on purpose. +- **Dry-run honoured.** `reclaim --dry-run` deletes nothing. +- **Preview grouped by repo**, with per-item size, rebuild command, staleness, + and a grand total — then a typed confirmation, as `uninstall` does. +- **Per-item / per-category opt-in.** Selectable; never a single blind "reclaim + all." (This is why feelreef was kept while motifmint was cleared in the + session that motivated this spec — that call is the user's, not the tool's.) +- **Same multi-account, same never-touch list, same readable allow-lists.** + +## Architecture + +``` +finders: uninstall-finder leftovers-finder reclaim-finder (3 gates) + \ | / + v v v + candidate model { path, size, why_safe, undo, last_touched } + | + present -> consent -> remove + (grouped preview) (typed) (to_trash, all verbs) +``` + +`reclaim` reuses the existing preview/confirm/`to_trash()`/receipt/restore +machinery almost wholesale (`pick`-style selection, `receipt_*`, `do_restore`) +and adds only: + +- `reclaim_scan()` — walk candidate repos, apply the three gates, emit the model +- a receipt field carrying the **rebuild command**, so `restore` can offer + rebuild as the recommended alternative to restoring the files + +## Open questions + +- **Where to scan.** A configured roots list (e.g. `~/Developer`) vs walking all + of `$HOME` minus never-touch. Leaning toward an explicit roots list to keep it + fast and predictable. +- **Monorepo nesting.** Report the top-most eligible dir only, or each nested + `node_modules`? Probably top-most, with total size rolled up. +- **`restore` semantics after reclaim.** Restores files from the Trash like any + other verb, and additionally prints the rebuild commands as the recommended + alternative. Worth a distinct receipt type so `restore` knows to surface the + rebuild hint for a reclaim. +```