Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 79 additions & 15 deletions packages/cli/src/__tests__/diff.routes.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import fs from "node:fs/promises";
import http from "node:http";
import os from "node:os";
import path from "node:path";
import type { DiffResponse } from "@stagereview/types/diff";
import { afterEach, beforeEach, describe, expect, it } from "vitest";
import { closeDb, getDb } from "../db/client.js";
import { chapterRun } from "../db/schema/index.js";
Expand Down Expand Up @@ -140,21 +141,30 @@ function rawRequest(port: number, requestPath: string): Promise<RawResponse> {
});
}

function parseDiffResponse(body: string): DiffResponse {
return JSON.parse(body) as DiffResponse;
}

describe("diff API", () => {
it("GET /api/runs/:runId/diff.patch streams the committed-scope unified diff", async () => {
it("GET /api/runs/:runId/diff.patch returns JSON with patch and fileContents", async () => {
const { baseSha, headSha } = await initRepoWithTwoCommits();
const runId = insertCommittedRun(baseSha, headSha);

const { port } = await startWithRoutes();
const res = await rawRequest(port, `/api/runs/${runId}/diff.patch`);

expect(res.status).toBe(200);
expect(res.headers["content-type"]).toMatch(/text\/plain/);
expect(res.headers["content-type"]).toMatch(/application\/json/);
expect(res.headers["cache-control"]).toBe("private, max-age=300");
expect(res.body).toContain("diff --git a/file.txt b/file.txt");
expect(res.body).toContain("+world");
// --no-color should suppress ANSI escapes (literal ESC character).
expect(res.body).not.toContain(`${String.fromCharCode(27)}[`);

const data = parseDiffResponse(res.body);
expect(data.patch).toContain("diff --git a/file.txt b/file.txt");
expect(data.patch).toContain("+world");
expect(data.patch).not.toContain(`${String.fromCharCode(27)}[`);

expect(data.fileContents["file.txt"]).toBeDefined();
expect(data.fileContents["file.txt"]?.oldContent).toBe("hello\n");
expect(data.fileContents["file.txt"]?.newContent).toBe("hello\nworld\n");
});

it("returns the unstaged diff for workingTree/unstaged runs", async () => {
Expand All @@ -167,14 +177,16 @@ describe("diff API", () => {

expect(res.status).toBe(200);
expect(res.headers["cache-control"]).toBe("no-store");
expect(res.body).toContain("+unstaged");

const data = parseDiffResponse(res.body);
expect(data.patch).toContain("+unstaged");
expect(data.fileContents["file.txt"]?.newContent).toBe("hello\nworld\nunstaged\n");
});

it("returns the staged diff for workingTree/staged runs", async () => {
await initRepoWithTwoCommits();
await fs.writeFile(path.join(repoRoot, "file.txt"), "hello\nworld\nstaged\n");
git("add", "file.txt");
// Add an extra unstaged change that must NOT appear in the staged diff.
await fs.writeFile(path.join(repoRoot, "file.txt"), "hello\nworld\nstaged\nleak\n");
const runId = insertWorkingTreeRun(WORKING_TREE_REF.STAGED);

Expand All @@ -183,26 +195,78 @@ describe("diff API", () => {

expect(res.status).toBe(200);
expect(res.headers["cache-control"]).toBe("no-store");
expect(res.body).toContain("+staged");
expect(res.body).not.toContain("+leak");

const data = parseDiffResponse(res.body);
expect(data.patch).toContain("+staged");
expect(data.patch).not.toContain("+leak");
expect(data.fileContents["file.txt"]?.newContent).toBe("hello\nworld\nstaged\n");
});

it("returns the combined diff vs HEAD for workingTree/work runs", async () => {
await initRepoWithTwoCommits();
// staged change
await fs.writeFile(path.join(repoRoot, "file.txt"), "hello\nworld\nstaged\n");
git("add", "file.txt");
// plus an unstaged change on top
await fs.writeFile(path.join(repoRoot, "file.txt"), "hello\nworld\nstaged\nunstaged\n");
const runId = insertWorkingTreeRun(WORKING_TREE_REF.WORK);

const { port } = await startWithRoutes();
const res = await rawRequest(port, `/api/runs/${runId}/diff.patch`);

expect(res.status).toBe(200);
// `git diff HEAD` includes both staged and unstaged changes.
expect(res.body).toContain("+staged");
expect(res.body).toContain("+unstaged");

const data = parseDiffResponse(res.body);
expect(data.patch).toContain("+staged");
expect(data.patch).toContain("+unstaged");
expect(data.fileContents["file.txt"]?.newContent).toBe("hello\nworld\nstaged\nunstaged\n");
});

it("returns null oldContent for added files", async () => {
git("init", "--initial-branch=main");
git("config", "user.email", "test@example.com");
git("config", "user.name", "Test");
git("config", "commit.gpgsign", "false");

await fs.writeFile(path.join(repoRoot, "initial.txt"), "init\n");
git("add", "initial.txt");
git("commit", "-m", "initial");
const baseSha = git("rev-parse", "HEAD").trim();

await fs.writeFile(path.join(repoRoot, "new-file.txt"), "brand new\n");
git("add", "new-file.txt");
git("commit", "-m", "add new file");
const headSha = git("rev-parse", "HEAD").trim();

const runId = insertCommittedRun(baseSha, headSha);
const { port } = await startWithRoutes();
const res = await rawRequest(port, `/api/runs/${runId}/diff.patch`);

const data = parseDiffResponse(res.body);
expect(data.fileContents["new-file.txt"]?.oldContent).toBeNull();
expect(data.fileContents["new-file.txt"]?.newContent).toBe("brand new\n");
});

it("returns null newContent for deleted files", async () => {
git("init", "--initial-branch=main");
git("config", "user.email", "test@example.com");
git("config", "user.name", "Test");
git("config", "commit.gpgsign", "false");

await fs.writeFile(path.join(repoRoot, "doomed.txt"), "goodbye\n");
git("add", "doomed.txt");
git("commit", "-m", "add file");
const baseSha = git("rev-parse", "HEAD").trim();

git("rm", "doomed.txt");
git("commit", "-m", "delete file");
const headSha = git("rev-parse", "HEAD").trim();

const runId = insertCommittedRun(baseSha, headSha);
const { port } = await startWithRoutes();
const res = await rawRequest(port, `/api/runs/${runId}/diff.patch`);

const data = parseDiffResponse(res.body);
expect(data.fileContents["doomed.txt"]?.oldContent).toBe("goodbye\n");
expect(data.fileContents["doomed.txt"]?.newContent).toBeNull();
});

it("returns 404 for unknown runId", async () => {
Expand Down
209 changes: 137 additions & 72 deletions packages/cli/src/routes/diff.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
import { spawn } from "node:child_process";
import type { ServerResponse } from "node:http";
import { execFile } from "node:child_process";
import fs from "node:fs/promises";
import path from "node:path";
import { promisify } from "node:util";
import type { DiffResponse, FileContentsMap } from "@stagereview/types/diff";
import { eq } from "drizzle-orm";
import type { StageDb } from "../db/client.js";
import type { ChapterRunRow } from "../db/schema/chapter-run.js";
import { chapterRun } from "../db/schema/index.js";
import { buildDiffArgs } from "../git.js";
import { SCOPE_KIND } from "../schema.js";
import { SCOPE_KIND, WORKING_TREE_REF } from "../schema.js";
import type { Route } from "../server.js";
import { writeJson } from "./json.js";

const execFileAsync = promisify(execFile);

const MAX_FILE_BYTES = 5 * 1024 * 1024;

export function diffRoutes(db: StageDb): Route[] {
return [
{
Expand All @@ -27,9 +34,6 @@ export function diffRoutes(db: StageDb): Route[] {
return;
}

// Defense in depth: repoRoot was validated at ingest, but the diff endpoint is
// a fresh boundary. Refuse non-absolute paths or any path containing `..`
// segments so we can't be tricked into spawning git against a traversal.
const repoRoot = run.repoRoot;
if (!path.isAbsolute(repoRoot) || repoRoot.split(path.sep).includes("..")) {
writeJson(res, 500, {
Expand All @@ -42,81 +46,142 @@ export function diffRoutes(db: StageDb): Route[] {
const cacheControl =
run.scopeKind === SCOPE_KIND.COMMITTED ? "private, max-age=300" : "no-store";

await streamGitDiff(res, repoRoot, args, cacheControl);
try {
const { stdout: patch } = await execFileAsync("git", args, {
cwd: repoRoot,
encoding: "utf8",
maxBuffer: 50 * 1024 * 1024,
});
const fileContents = await buildFileContents(run, repoRoot, patch);
const body: DiffResponse = { patch, fileContents };
res.writeHead(200, {
"Content-Type": "application/json; charset=utf-8",
"Cache-Control": cacheControl,
});
res.end(JSON.stringify(body));
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
writeJson(res, 500, { error: message });
}
},
},
];
}

function streamGitDiff(
res: ServerResponse,
const MINUS_RE = /^--- (?:a\/)?(.+)$/m;
const PLUS_RE = /^\+\+\+ (?:b\/)?(.+)$/m;
Comment thread
dastratakos marked this conversation as resolved.
const BINARY_RE = /^Binary files/m;

interface ParsedFilePaths {
oldPath: string | null;
newPath: string | null;
isBinary: boolean;
}

function parseFilePathsFromPatch(patch: string): ParsedFilePaths[] {
if (!patch.trim()) return [];

const segments = patch.split(/\ndiff --git /);
const results: ParsedFilePaths[] = [];

for (let i = 0; i < segments.length; i++) {
const segment = segments[i];
if (segment === undefined) continue;
const text = i === 0 ? segment : `diff --git ${segment}`;
if (!text.startsWith("diff --git ")) continue;

const isBinary = BINARY_RE.test(text);

const minus = text.match(MINUS_RE);
const plus = text.match(PLUS_RE);

const oldPath = minus?.[1] && minus[1] !== "/dev/null" ? minus[1] : null;
const newPath = plus?.[1] && plus[1] !== "/dev/null" ? plus[1] : null;

results.push({ oldPath, newPath, isBinary });
}

return results;
}

async function getGitFileContent(
cwd: string,
args: string[],
cacheControl: string,
): Promise<void> {
return new Promise((resolve) => {
const child = spawn("git", args, { cwd, stdio: ["ignore", "pipe", "pipe"] });

let stderr = "";
let settled = false;

const settle = () => {
if (settled) return;
settled = true;
resolve();
};

const writeSuccessHeaders = () => {
if (res.headersSent) return;
res.writeHead(200, {
"Content-Type": "text/plain; charset=utf-8",
"Cache-Control": cacheControl,
});
};

// If the client disconnects before git finishes, kill the child so we don't leak
// a long-running diff for a request nobody is reading.
res.once("close", () => {
if (!child.killed) child.kill("SIGTERM");
ref: string,
filePath: string,
): Promise<string | null> {
try {
const { stdout } = await execFileAsync("git", ["show", `${ref}:${filePath}`], {
cwd,
encoding: "utf8",
maxBuffer: MAX_FILE_BYTES,
});
return stdout;
} catch {
return null;
}
}

child.on("error", (err) => {
if (!res.headersSent) {
writeJson(res, 500, { error: `git failed: ${err.message}` });
} else if (!res.writableEnded) {
res.end();
}
settle();
});
async function readFileContent(repoRoot: string, filePath: string): Promise<string | null> {
const resolved = path.resolve(repoRoot, filePath);
const rel = path.relative(repoRoot, resolved);
if (rel.startsWith("..") || path.isAbsolute(rel)) return null;
try {
return await fs.readFile(resolved, "utf8");
Comment on lines +125 to +129
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Keep file-content reads inside repo after symlink resolution

readFileContent checks the lexical path with path.resolve/path.relative, but then reads through fs.readFile, which follows symlinks. In working-tree runs, a changed symlink inside the repo (for example, foo -> /etc/passwd) passes this check and causes /diff.patch to return data from outside repoRoot in fileContents. This introduces a local file disclosure path that did not exist before; resolve and validate the real path (or refuse symlinks) before reading.

Useful? React with 👍 / 👎.

} catch {
return null;
}
}
Comment thread
dastratakos marked this conversation as resolved.

child.stderr.on("data", (chunk: Buffer) => {
stderr += chunk.toString("utf8");
});
function getContentRefs(run: ChapterRunRow): { oldRef: string; newRef: string | "DISK" } {
if (run.scopeKind === SCOPE_KIND.COMMITTED) {
return { oldRef: run.baseSha, newRef: run.headSha };
}
switch (run.workingTreeRef) {
case WORKING_TREE_REF.UNSTAGED:
return { oldRef: "", newRef: "DISK" };
case WORKING_TREE_REF.STAGED:
return { oldRef: "HEAD", newRef: "" };
case WORKING_TREE_REF.WORK:
return { oldRef: "HEAD", newRef: "DISK" };
default:
return { oldRef: "HEAD", newRef: "HEAD" };
}
}

child.stdout.on("data", (chunk: Buffer) => {
writeSuccessHeaders();
if (!res.write(chunk)) {
child.stdout.pause();
res.once("drain", () => child.stdout.resume());
}
});
function fetchContent(
repoRoot: string,
ref: string | "DISK",
filePath: string,
): Promise<string | null> {
if (ref === "DISK") return readFileContent(repoRoot, filePath);
return getGitFileContent(repoRoot, ref, filePath);
}

child.on("close", (code) => {
if (settled) return;
if (code === 0) {
// Successful exit with no stdout (empty diff) still needs headers + end.
writeSuccessHeaders();
res.end();
} else if (!res.headersSent) {
const message = stderr.trim() || `git exited with code ${code}`;
writeJson(res, 500, { error: message });
} else {
// Headers were already sent, so we can't change the status. Log and terminate
// the response — the client will see a truncated patch.
process.stderr.write(`git diff failed mid-stream (exit ${code}): ${stderr}\n`);
if (!res.writableEnded) res.end();
}
settle();
});
});
async function buildFileContents(
run: ChapterRunRow,
repoRoot: string,
patch: string,
): Promise<FileContentsMap> {
const files = parseFilePathsFromPatch(patch);
const { oldRef, newRef } = getContentRefs(run);

const entries = await Promise.all(
Comment thread
dastratakos marked this conversation as resolved.
files.map(async ({ oldPath, newPath, isBinary }) => {
const key = newPath ?? oldPath;
if (!key || isBinary) return null;

const [oldContent, newContent] = await Promise.all([
oldPath ? fetchContent(repoRoot, oldRef, oldPath) : Promise.resolve(null),
newPath ? fetchContent(repoRoot, newRef, newPath) : Promise.resolve(null),
]);
Comment on lines +168 to +176
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Bound git-show fanout when building fileContents

buildFileContents uses nested Promise.all and can start up to two git show subprocesses per changed file at once. On large diffs, this unbounded fanout can exhaust process or file-descriptor limits (EAGAIN/EMFILE) and make the endpoint return 500 even though git diff succeeded. Use a bounded concurrency pool (or batching) for per-file content fetches.

Useful? React with 👍 / 👎.


return [key, { oldContent, newContent }] as const;
}),
);

const map: FileContentsMap = {};
for (const entry of entries) {
if (entry) map[entry[0]] = entry[1];
}
return map;
}
Loading
Loading