Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 198 additions & 0 deletions tests/skill/understand/test_parse_git_changes.test.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
import { describe, it, expect, afterEach } from 'vitest';
import { mkdtempSync, writeFileSync, readFileSync, rmSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join, dirname, resolve } from 'node:path';
import { spawnSync } from 'node:child_process';
import { fileURLToPath } from 'node:url';

import { parseGitNameStatus }
from '../../../understand-anything-plugin/skills/understand/parse-git-changes.mjs';

const __dirname = dirname(fileURLToPath(import.meta.url));
const SCRIPT = resolve(
__dirname,
'../../../understand-anything-plugin/skills/understand/parse-git-changes.mjs',
);

describe('parseGitNameStatus — module API', () => {
it('returns empty sets for empty input', () => {
const { changedSet, pruneSet } = parseGitNameStatus('');
expect([...changedSet]).toEqual([]);
expect([...pruneSet]).toEqual([]);
});

it('returns empty sets for null/undefined input', () => {
expect(parseGitNameStatus(null).changedSet.size).toBe(0);
expect(parseGitNameStatus(undefined).pruneSet.size).toBe(0);
});

it('classifies M / A as changed-only', () => {
const diff = ['M\tsrc/a.ts', 'A\tsrc/b.ts'].join('\n');
const { changedSet, pruneSet } = parseGitNameStatus(diff);
expect([...changedSet].sort()).toEqual(['src/a.ts', 'src/b.ts']);
expect([...pruneSet]).toEqual([]);
});

it('classifies D as prune-only (no analyze)', () => {
const diff = 'D\tsrc/gone.ts';
const { changedSet, pruneSet } = parseGitNameStatus(diff);
expect([...changedSet]).toEqual([]);
expect([...pruneSet]).toEqual(['src/gone.ts']);
});

it('classifies R<score> as BOTH prune-old AND analyze-new (the Bug 1 fix)', () => {
// The exact repro from issue #366: `git mv docs/a.md docs/b.md`.
const diff = 'R100\tdocs/a.md\tdocs/b.md';
const { changedSet, pruneSet } = parseGitNameStatus(diff);
expect([...changedSet]).toEqual(['docs/b.md']);
expect([...pruneSet]).toEqual(['docs/a.md']);
});

it('classifies R with a non-100 similarity score (rename + small edit)', () => {
// Renamed AND modified — git emits R<score> with score < 100.
const diff = 'R087\tsrc/old/util.ts\tsrc/new/util.ts';
const { changedSet, pruneSet } = parseGitNameStatus(diff);
expect([...changedSet]).toEqual(['src/new/util.ts']);
expect([...pruneSet]).toEqual(['src/old/util.ts']);
});

it('classifies C<score> as analyze-new only (source file still exists)', () => {
// Copy detection: original survives, must NOT be pruned.
const diff = 'C50\tsrc/template.ts\tsrc/copy.ts';
const { changedSet, pruneSet } = parseGitNameStatus(diff);
expect([...changedSet]).toEqual(['src/copy.ts']);
expect([...pruneSet]).toEqual([]);
});

it('classifies T (type change) as changed-only', () => {
// T appears when a file flips between regular file / symlink / submodule.
const diff = 'T\tsrc/link.ts';
const { changedSet, pruneSet } = parseGitNameStatus(diff);
expect([...changedSet]).toEqual(['src/link.ts']);
expect([...pruneSet]).toEqual([]);
});

it('classifies U (unmerged) as changed so user must resolve it', () => {
const diff = 'U\tsrc/conflict.ts';
const { changedSet, pruneSet } = parseGitNameStatus(diff);
expect([...changedSet]).toEqual(['src/conflict.ts']);
expect([...pruneSet]).toEqual([]);
});

it('skips blank lines silently', () => {
const diff = ['', 'M\tsrc/a.ts', '', '', 'A\tsrc/b.ts', ''].join('\n');
const { changedSet, pruneSet } = parseGitNameStatus(diff);
expect([...changedSet].sort()).toEqual(['src/a.ts', 'src/b.ts']);
expect([...pruneSet]).toEqual([]);
});

it('handles a realistic mixed diff (M + A + D + R + C)', () => {
const diff = [
'M\tpackages/core/src/foo.ts',
'A\tpackages/core/src/new.ts',
'D\tpackages/core/src/dead.ts',
'R100\tdocs/a.md\tdocs/b.md',
'R087\tsrc/util.ts\tsrc/utils/index.ts',
'C75\tsrc/template.ts\tsrc/copy.ts',
].join('\n');
const { changedSet, pruneSet } = parseGitNameStatus(diff);
expect([...changedSet].sort()).toEqual([
'docs/b.md',
'packages/core/src/foo.ts',
'packages/core/src/new.ts',
'src/copy.ts',
'src/utils/index.ts',
]);
expect([...pruneSet].sort()).toEqual([
'docs/a.md',
'packages/core/src/dead.ts',
'src/util.ts',
]);
// No path may appear in BOTH sets — they MUST be disjoint, otherwise the
// prune step would delete the freshly-re-analyzed nodes.
for (const p of changedSet) expect(pruneSet.has(p)).toBe(false);
});

it('emits a warning to stderr for unknown status codes (does not throw)', () => {
// Use a status code git would never emit today (e.g. 'X' = unknown).
// We don't actually capture stderr from inside the test, but we can at
// least assert the parser is robust enough to skip and continue.
const diff = ['X\tsomething/weird.ts', 'M\tsrc/a.ts'].join('\n');
const { changedSet, pruneSet } = parseGitNameStatus(diff);
expect([...changedSet]).toEqual(['src/a.ts']);
expect([...pruneSet]).toEqual([]);
});
});

describe('parse-git-changes.mjs — CLI', () => {
let tmp;

afterEach(() => {
if (tmp) rmSync(tmp, { recursive: true, force: true });
});

it('writes sorted changed + prune sets to the two output files', () => {
tmp = mkdtempSync(join(tmpdir(), 'ua-pgc-'));
const diffPath = join(tmp, 'diff.txt');
const changedPath = join(tmp, 'changed.txt');
const prunePath = join(tmp, 'prune.txt');

// Mixed diff covering every branch the parser handles.
const diff = [
'M\tsrc/a.ts',
'A\tsrc/b.ts',
'D\tsrc/dead.ts',
'R100\tdocs/old.md\tdocs/new.md',
'C50\tsrc/template.ts\tsrc/copy.ts',
].join('\n');
writeFileSync(diffPath, diff);

const r = spawnSync('node', [SCRIPT, diffPath, changedPath, prunePath], {
encoding: 'utf-8',
});
expect(r.status).toBe(0);
// Stderr summary line is informational, not a Warning:
expect(r.stderr).toMatch(/parse-git-changes: 4 changed, 2 prune-only/);

// Outputs are sorted, newline-terminated.
expect(readFileSync(changedPath, 'utf-8')).toBe(
['docs/new.md', 'src/a.ts', 'src/b.ts', 'src/copy.ts'].join('\n') + '\n',
);
expect(readFileSync(prunePath, 'utf-8')).toBe(
['docs/old.md', 'src/dead.ts'].join('\n') + '\n',
);
});

it('CLI handles empty diff (no commits between base..HEAD)', () => {
tmp = mkdtempSync(join(tmpdir(), 'ua-pgc-empty-'));
const diffPath = join(tmp, 'diff.txt');
const changedPath = join(tmp, 'changed.txt');
const prunePath = join(tmp, 'prune.txt');
writeFileSync(diffPath, '');

const r = spawnSync('node', [SCRIPT, diffPath, changedPath, prunePath], {
encoding: 'utf-8',
});
expect(r.status).toBe(0);
// Empty outputs — empty strings, not "missing files".
expect(readFileSync(changedPath, 'utf-8')).toBe('');
expect(readFileSync(prunePath, 'utf-8')).toBe('');
});

it('CLI exits non-zero with no args', () => {
const r = spawnSync('node', [SCRIPT], { encoding: 'utf-8' });
expect(r.status).toBe(1);
expect(r.stderr).toMatch(/Usage: node parse-git-changes\.mjs/);
});

it('CLI exits non-zero when diff file is missing', () => {
tmp = mkdtempSync(join(tmpdir(), 'ua-pgc-miss-'));
const r = spawnSync(
'node',
[SCRIPT, join(tmp, 'no-such.txt'), join(tmp, 'c.txt'), join(tmp, 'p.txt')],
{ encoding: 'utf-8' },
);
expect(r.status).toBe(1);
expect(r.stderr).toMatch(/cannot read diff input/);
});
});
46 changes: 33 additions & 13 deletions understand-anything-plugin/skills/understand/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,25 @@ Determine whether to run a full analysis or incremental update.

**Review-only path:** Copy the existing `knowledge-graph.json` to `$PROJECT_ROOT/.understand-anything/intermediate/assembled-graph.json`, then jump directly to Phase 6 step 3.

For incremental updates, get the changed file list:
For incremental updates, get the changed file list using `--name-status -M` so renames are reported as **both** the old and new path (a plain `--name-only` only reports the new path, leaving the pre-rename node + every edge that referenced it orphaned in the graph — issue #366, Bug 1):

```bash
git diff <lastCommitHash>..HEAD --name-only
git diff <lastCommitHash>..HEAD --name-status -M \
> $PROJECT_ROOT/.understand-anything/tmp/git-changes.txt
node <SKILL_DIR>/parse-git-changes.mjs \
$PROJECT_ROOT/.understand-anything/tmp/git-changes.txt \
$PROJECT_ROOT/.understand-anything/tmp/changed-files.txt \
$PROJECT_ROOT/.understand-anything/tmp/prune-files.txt
```
If this returns no files, report "Graph is up to date" and STOP.

The parser splits each `--name-status` line into two disjoint sets:

- `changed-files.txt` — newline-delimited paths to **analyze** (statuses `M`, `A`, `T`, `U`, plus the new path of `R<score>` renames and `C<score>` copies).
- `prune-files.txt` — newline-delimited paths to **prune-only** (statuses `D`, plus the old path of `R<score>` renames). These are never re-analyzed; their nodes and edges are dropped from the existing graph so renamed/deleted files don't leave orphans.

Copies (`C<score>`) intentionally do **not** prune the source path because the source file still exists on disk.

If both files are empty, report "Graph is up to date" and STOP. (mkdir `$PROJECT_ROOT/.understand-anything/tmp` first if it does not already exist — Phase 0 step 3 creates it as part of normal flow.)

8. **Collect project context for subagent injection:**
- Read `README.md` (or `README.rst`, `readme.md`) from `$PROJECT_ROOT` if it exists. Store as `$README_CONTENT` (first 3000 characters).
Expand Down Expand Up @@ -362,12 +376,14 @@ Include the script's warnings in `$PHASE_WARNINGS` for the reviewer.

### Incremental update path

Write the changed-files list (one path per line) to a temp file:
```bash
git diff <lastCommitHash>..HEAD --name-only > $PROJECT_ROOT/.understand-anything/tmp/changed-files.txt
```
Reuse the two files produced by Phase 0 step 7's `parse-git-changes.mjs`:

- `$PROJECT_ROOT/.understand-anything/tmp/changed-files.txt` — paths to re-analyze (`M`, `A`, `T`, `U`, plus `R<score>` new paths and `C<score>` new paths).
- `$PROJECT_ROOT/.understand-anything/tmp/prune-files.txt` — paths to prune-only (`D` deletions plus `R<score>` old paths). These are NOT re-analyzed; their existing nodes and edges are removed from the graph so renamed/deleted files don't leave orphans.

If Phase 0 did not write these files yet (e.g. when this path is reached out of order), re-run the parser now using the same commands as Phase 0 step 7.

Run compute-batches with `--changed-files`:
Run compute-batches with `--changed-files` (only the changed-set — the prune-only set has nothing to analyze):
```bash
node <SKILL_DIR>/compute-batches.mjs $PROJECT_ROOT \
--changed-files=$PROJECT_ROOT/.understand-anything/tmp/changed-files.txt
Expand All @@ -377,15 +393,19 @@ This produces a `batches.json` that contains only batches with changed files, bu

Then dispatch file-analyzer subagents per the same template as the full path.

After batches complete:
1. Remove old nodes whose `filePath` matches any changed file from the existing graph
2. Remove old edges whose `source` or `target` references a removed node
3. Write the pruned existing nodes/edges as `batch-existing.json` in the intermediate directory
4. Run the same merge script — it will combine `batch-existing.json` with the fresh `batch-*.json` files:
After batches complete, **prune using the union of both sets** (`changed-files.txt` ∪ `prune-files.txt`) so renamed/deleted files no longer appear in the graph:

1. Build the prune-path set: read `$PROJECT_ROOT/.understand-anything/tmp/changed-files.txt` AND `$PROJECT_ROOT/.understand-anything/tmp/prune-files.txt`, take the union.
2. Remove old nodes whose `filePath` matches any path in the prune-path set from the existing graph.
3. Remove old edges whose `source` or `target` references a removed node.
4. Write the pruned existing nodes/edges as `batch-existing.json` in the intermediate directory.
5. Run the same merge script — it will combine `batch-existing.json` with the fresh `batch-*.json` files:
```bash
python <SKILL_DIR>/merge-batch-graphs.py $PROJECT_ROOT
```

**Why the union?** Re-analyzed files (`changed-files.txt`) need their old nodes/edges removed before the fresh batches add the new ones, otherwise stale function/class nodes survive alongside the rewritten file. Prune-only files (`prune-files.txt`) need removal without re-analysis — there is no replacement node to merge in.

---

## Phase 3 — ASSEMBLE REVIEW
Expand Down
Loading
Loading