From fd8eb4b3a1220addda2df2beb8e5b0aff9099b8d Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Thu, 11 Jun 2026 18:04:45 +0800 Subject: [PATCH] feat(#550): git co-change ranking signal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git.zig gains parseCoChange — a pure parser over git log --name-only --pretty=format:%H output (commit boundaries detected as 40-hex lines, mega-commits and git-quoted names skipped) — plus buildCoChange (shell out via cio.runCapture, bounded: 500 commits, 32 files/commit, top 8 partners/file) and freeCoChange. Explorer builds the map lazily (ensureCoChange, own mutex, attempted flag so a non-repo never re-shells; root path captured in setRoot) and folds it into both ranking paths as coChangeBoost: files that historically change with the files DEFINING the queried symbol get a >=1 multiplier (noise floor 2 shared commits, saturating at 8 -> x1.25). Seeds are defining files only, so plain-word queries never trigger the one-time git shell-out. CODEDB_NO_COCHANGE opts out. The tier0 defines scan now shares the new fileDefinesSymbol helper. Suite: 813/813. Co-Authored-By: Claude Fable 5 --- src/explore.zig | 113 ++++++++++++++++++++++++++++++---- src/git.zig | 143 ++++++++++++++++++++++++++++++++++++++++++++ src/test_core.zig | 73 ++++++++++++---------- src/test_search.zig | 53 ++++++++++++++++ 4 files changed, 340 insertions(+), 42 deletions(-) diff --git a/src/explore.zig b/src/explore.zig index b9883327..30a20b82 100644 --- a/src/explore.zig +++ b/src/explore.zig @@ -10,6 +10,7 @@ const MmapTrigramIndex = idx.MmapTrigramIndex; const AnyTrigramIndex = idx.AnyTrigramIndex; const SparseNgramIndex = idx.SparseNgramIndex; const codegraph = @import("codegraph.zig"); +const git = @import("git.zig"); /// Fast hash context for u32-keyed maps on hot paths (ranked search aggregation). /// Zig's AutoHashMap runs the 4 key bytes through Wyhash even for an integer key; @@ -775,6 +776,16 @@ pub const Explorer = struct { call_graph: ?CallGraph = null, centrality_build_mu: cio.Mutex = .{}, root_dir: ?std.Io.Dir = null, + /// Absolute project root path (duped in setRoot) — needed to shell out to + /// git for the co-change map (#550). + root_path: ?[]const u8 = null, + /// file → strongest git co-change partners (#550). Built lazily from + /// `git log --name-only`; null until built (or unavailable: no root, not + /// a repo). Guarded by cochange_build_mu; attempted-flag stops re-shelling + /// when git is absent. + co_change: ?std.StringHashMap([]git.CoChangePartner) = null, + co_change_attempted: bool = false, + cochange_build_mu: cio.Mutex = .{}, io: ?std.Io = null, /// When non-null, append one JSON line per searchContent invocation /// to this path (v0 rerank-trace experiment). Borrowed; caller owns @@ -807,6 +818,8 @@ pub const Explorer = struct { pub fn setRoot(self: *Explorer, io: std.Io, root_path: []const u8) void { self.io = io; self.root_dir = std.Io.Dir.cwd().openDir(io, root_path, .{}) catch null; + if (self.root_path) |old| self.allocator.free(old); + self.root_path = self.allocator.dupe(u8, root_path) catch null; } pub fn init(allocator: std.mem.Allocator, content_cache_capacity: u32) Explorer { @@ -848,6 +861,8 @@ pub const Explorer = struct { self.contents.deinit(); if (self.call_centrality) |*c| c.deinit(); if (self.call_graph) |*cg| cg.deinit(self.allocator); + if (self.co_change) |*cc| git.freeCoChange(cc, self.allocator); + if (self.root_path) |p| self.allocator.free(p); self.word_index.deinit(); self.trigram_index.deinit(); @@ -2520,17 +2535,7 @@ pub const Explorer = struct { const gop = tier0_files_by_path.getOrPut(hit_path) catch continue; if (!gop.found_existing) { const is_doc = isDocLanguage(detectLanguage(hit_path)); - var defines = false; - if (!is_doc) { - if (self.outlines.get(hit_path)) |outline| { - for (outline.symbols.items) |sym| { - if (asciiEqlIgnoreCase(sym.name, query)) { - defines = true; - break; - } - } - } - } + const defines = !is_doc and self.fileDefinesSymbol(hit_path, query); gop.value_ptr.* = .{ .path = hit_path, .count = 0, @@ -3007,9 +3012,23 @@ pub const Explorer = struct { graph_dist = self.queryGraphDistances(&gd_terms, ga); } + // #550 signal 2: git co-change. Seeds are the result files that + // DEFINE the queried symbol, so plain word queries never trigger the + // one-time `git log` shell-out. + var cc_seeds = std.StringHashMap(void).init(allocator); + defer cc_seeds.deinit(); + if (cio.posixGetenv("CODEDB_NO_COCHANGE") == null) { + for (result_list.items) |r| { + if (cc_seeds.contains(r.path)) continue; + if (self.fileDefinesSymbol(r.path, query)) cc_seeds.put(r.path, {}) catch {}; + } + if (cc_seeds.count() > 0) self.ensureCoChange(); + } + for (result_list.items) |*r| { r.score = self.rerankSignalScore(r.*, query); r.score *= graphDistanceBoost(graph_dist, r.path); + r.score *= self.coChangeBoost(&cc_seeds, r.path); if (lfp.enabled) r.score *= lfp.multiplier(file_hit_counts.get(r.path) orelse 1, max_file_hits); if (sp.enabled) r.score *= sp.multiplier(self, r.path); } @@ -3308,6 +3327,54 @@ pub const Explorer = struct { return false; } + /// True when `path`'s outline defines a symbol named `name` (any kind, + /// case-insensitive). Outline-based so it works on snapshot fast-loads + /// where symbol_index is deferred (#564). + fn fileDefinesSymbol(self: *const Explorer, path: []const u8, name: []const u8) bool { + const outline = self.outlines.get(path) orelse return false; + for (outline.symbols.items) |sym| { + if (asciiEqlIgnoreCase(sym.name, name)) return true; + } + return false; + } + + /// Build the git co-change map once (#550): `git log --name-only` over + /// the last 500 commits, mega-commits (>32 files) skipped, top 8 partners + /// per file. Needs root_path (setRoot); silently unavailable outside a + /// git repo — the attempted flag stops re-shelling. Mirrors + /// ensureCallGraph: call while holding at least a shared lock on `mu`. + fn ensureCoChange(self: *Explorer) void { + if (self.co_change != null or self.co_change_attempted) return; + self.cochange_build_mu.lock(); + defer self.cochange_build_mu.unlock(); + if (self.co_change != null or self.co_change_attempted) return; + self.co_change_attempted = true; + const root = self.root_path orelse return; + self.co_change = git.buildCoChange(self.allocator, root, 500, 32, 8); + } + + /// Boost for files that historically change together with the files + /// defining the queried symbol — git co-change, the temporal sibling of + /// graphDistanceBoost (#550). Always ≥ 1, never a filter; 1.0 when the + /// map or seeds are absent. Two shared commits is the noise floor; + /// strength saturates at eight (×1.25). + fn coChangeBoost(self: *const Explorer, seeds: *const std.StringHashMap(void), path: []const u8) f32 { + const cc = self.co_change orelse return 1.0; + if (seeds.count() == 0) return 1.0; + if (seeds.contains(path)) return 1.0; + var best: u32 = 0; + var it = seeds.keyIterator(); + while (it.next()) |s| { + const partners = cc.get(s.*) orelse continue; + for (partners) |p| { + if (p.count > best and std.mem.eql(u8, p.path, path)) best = p.count; + } + } + if (best < 2) return 1.0; + const strength = @min(@as(f32, @floatFromInt(best)) / 8.0, 1.0); + return 1.0 + 0.25 * strength; + } + /// Public, lock-acquiring entry point for single-threaded callers (the /// index/scan path) to pre-build call_centrality before persisting a snapshot, /// so a later load can restore it instead of paying the lazy first-query build. @@ -3694,6 +3761,27 @@ pub const Explorer = struct { } if (per_doc.count() == 0) return try allocator.alloc(SearchResult, 0); + // #550 signal 2: git co-change. Seeds are the candidate files that + // DEFINE a query-named symbol; without seeds the one-time `git log` + // shell-out never happens. + var cc_seeds = std.StringHashMap(void).init(ta); + if (cio.posixGetenv("CODEDB_NO_COCHANGE") == null) { + var seed_iter = per_doc.iterator(); + while (seed_iter.next()) |entry| { + const doc_id = entry.key_ptr.*; + const p = if (doc_id < self.word_index.id_to_path.items.len) self.word_index.id_to_path.items[doc_id] else ""; + if (p.len == 0 or cc_seeds.contains(p)) continue; + var t_it = terms_set.keyIterator(); + while (t_it.next()) |t| { + if (self.fileDefinesSymbol(p, t.*)) { + cc_seeds.put(p, {}) catch {}; + break; + } + } + } + if (cc_seeds.count() > 0) self.ensureCoChange(); + } + const Cand = struct { doc_id: u32, score: f32, best_line: u32 }; var cands: std.ArrayList(Cand) = .empty; defer cands.deinit(ta); @@ -3707,7 +3795,8 @@ pub const Explorer = struct { .score = entry.value_ptr.score * pathRelevanceMultiplier(cand_path, &terms_set) * self.centralityBoost(cand_path) * - graphDistanceBoost(graph_dist, cand_path), + graphDistanceBoost(graph_dist, cand_path) * + self.coChangeBoost(&cc_seeds, cand_path), .best_line = entry.value_ptr.best_line, }); } diff --git a/src/git.zig b/src/git.zig index b8170385..829b5e2c 100644 --- a/src/git.zig +++ b/src/git.zig @@ -29,3 +29,146 @@ pub fn getGitHead(root: []const u8, allocator: std.mem.Allocator) !?[40]u8 { @memcpy(&out, trimmed[0..40]); return out; } + +pub const CoChangePartner = struct { + path: []const u8, + count: u32, +}; + +fn isCommitSha(line: []const u8) bool { + if (line.len != 40) return false; + for (line) |c| { + if (!std.ascii.isHex(c)) return false; + } + return true; +} + +/// Parse `git log --name-only --pretty=format:%H` output into a co-change +/// map: file → strongest co-change partners, by shared-commit count. Pure +/// over the log text. Commits touching more than max_files_per_commit files +/// are skipped (vendor drops and formatting sweeps are co-change noise), as +/// are git-quoted exotic filenames. Caller owns the returned map — free with +/// freeCoChange. +pub fn parseCoChange( + allocator: std.mem.Allocator, + log_text: []const u8, + max_files_per_commit: usize, + max_partners: usize, +) !std.StringHashMap([]CoChangePartner) { + var arena_state = std.heap.ArenaAllocator.init(allocator); + defer arena_state.deinit(); + const a = arena_state.allocator(); + + var pair_counts = std.StringHashMap(u32).init(a); + var commit_files: std.ArrayList([]const u8) = .empty; + + var lines = std.mem.splitScalar(u8, log_text, '\n'); + var done = false; + while (!done) { + const maybe = lines.next(); + if (maybe == null) done = true; + const line = std.mem.trimEnd(u8, maybe orelse "", "\r"); + if (done or isCommitSha(line)) { + if (commit_files.items.len >= 2 and commit_files.items.len <= max_files_per_commit) { + for (commit_files.items, 0..) |fa, i| { + for (commit_files.items[i + 1 ..]) |fb| { + if (std.mem.eql(u8, fa, fb)) continue; + const lo = if (std.mem.lessThan(u8, fa, fb)) fa else fb; + const hi = if (std.mem.lessThan(u8, fa, fb)) fb else fa; + const key = try std.fmt.allocPrint(a, "{s}\x00{s}", .{ lo, hi }); + const gop = try pair_counts.getOrPut(key); + if (!gop.found_existing) gop.value_ptr.* = 0; + gop.value_ptr.* += 1; + } + } + } + commit_files.clearRetainingCapacity(); + continue; + } + if (line.len == 0) continue; + if (line[0] == '"') continue; + try commit_files.append(a, line); + } + + var per_file = std.StringHashMap(std.ArrayList(CoChangePartner)).init(a); + var pc_it = pair_counts.iterator(); + while (pc_it.next()) |entry| { + const key = entry.key_ptr.*; + const sep = std.mem.indexOfScalar(u8, key, 0) orelse continue; + const pair = [2][]const u8{ key[0..sep], key[sep + 1 ..] }; + for (pair, 0..) |file, side| { + const gop = try per_file.getOrPut(file); + if (!gop.found_existing) gop.value_ptr.* = .empty; + try gop.value_ptr.append(a, .{ .path = pair[1 - side], .count = entry.value_ptr.* }); + } + } + + var out = std.StringHashMap([]CoChangePartner).init(allocator); + errdefer freeCoChange(&out, allocator); + var pf_it = per_file.iterator(); + while (pf_it.next()) |entry| { + std.mem.sort(CoChangePartner, entry.value_ptr.items, {}, struct { + fn lt(_: void, x: CoChangePartner, y: CoChangePartner) bool { + if (x.count != y.count) return x.count > y.count; + return std.mem.lessThan(u8, x.path, y.path); + } + }.lt); + const n = @min(entry.value_ptr.items.len, max_partners); + const slice = try allocator.alloc(CoChangePartner, n); + var filled: usize = 0; + errdefer { + for (slice[0..filled]) |p| allocator.free(p.path); + allocator.free(slice); + } + for (entry.value_ptr.items[0..n]) |src| { + slice[filled] = .{ .path = try allocator.dupe(u8, src.path), .count = src.count }; + filled += 1; + } + const owned_key = try allocator.dupe(u8, entry.key_ptr.*); + errdefer allocator.free(owned_key); + try out.put(owned_key, slice); + } + return out; +} + +pub fn freeCoChange(map: *std.StringHashMap([]CoChangePartner), allocator: std.mem.Allocator) void { + var it = map.iterator(); + while (it.next()) |entry| { + for (entry.value_ptr.*) |p| allocator.free(p.path); + allocator.free(entry.value_ptr.*); + allocator.free(entry.key_ptr.*); + } + map.deinit(); +} + +/// Shell out to git log in `root` and build the co-change map (#550). Null +/// on any failure: not a git repo, git missing, empty history. A shallow +/// clone just yields a sparser map. +pub fn buildCoChange( + allocator: std.mem.Allocator, + root: []const u8, + max_commits: u32, + max_files_per_commit: usize, + max_partners: usize, +) ?std.StringHashMap([]CoChangePartner) { + var nbuf: [16]u8 = undefined; + const nstr = std.fmt.bufPrint(&nbuf, "{d}", .{max_commits}) catch return null; + const result = cio.runCapture(.{ + .allocator = allocator, + .argv = &.{ "git", "log", "--name-only", "--no-merges", "--pretty=format:%H", "-n", nstr }, + .cwd = root, + .max_output_bytes = 8 * 1024 * 1024, + }) catch return null; + defer allocator.free(result.stdout); + defer allocator.free(result.stderr); + switch (result.term) { + .Exited => |code| if (code != 0) return null, + else => return null, + } + var map = parseCoChange(allocator, result.stdout, max_files_per_commit, max_partners) catch return null; + if (map.count() == 0) { + map.deinit(); + return null; + } + return map; +} diff --git a/src/test_core.zig b/src/test_core.zig index 16525382..a9bc2a6d 100644 --- a/src/test_core.zig +++ b/src/test_core.zig @@ -12,7 +12,7 @@ const Explorer = explore.Explorer; const linter = @import("linter.zig"); const linter_pref = @import("linter_pref.zig"); const ContentCache = @import("hot_cache.zig").ContentCache; - +const git = @import("git.zig"); test "store: record and retrieve snapshots" { var store = Store.init(testing.allocator); @@ -26,7 +26,6 @@ test "store: record and retrieve snapshots" { try testing.expect(store.currentSeq() == 2); } - test "store: getLatest returns most recent version" { var store = Store.init(testing.allocator); defer store.deinit(); @@ -40,7 +39,6 @@ test "store: getLatest returns most recent version" { try testing.expect(latest.hash == 0x222); } - test "store: getLatest returns null for unknown file" { var store = Store.init(testing.allocator); defer store.deinit(); @@ -48,7 +46,6 @@ test "store: getLatest returns null for unknown file" { try testing.expect(store.getLatest("nope.zig") == null); } - test "store: changesSince counts correctly" { var store = Store.init(testing.allocator); defer store.deinit(); @@ -62,7 +59,6 @@ test "store: changesSince counts correctly" { try testing.expect(store.changesSince(3) == 0); } - test "store: changesSinceDetailed" { var store = Store.init(testing.allocator); defer store.deinit(); @@ -77,7 +73,6 @@ test "store: changesSinceDetailed" { try testing.expect(changes.len == 2); // a.zig and b.zig both changed } - test "store: recordDelete creates tombstone" { var store = Store.init(testing.allocator); defer store.deinit(); @@ -90,7 +85,6 @@ test "store: recordDelete creates tombstone" { try testing.expect(latest.size == 0); } - test "store: getAtCursor" { var store = Store.init(testing.allocator); defer store.deinit(); @@ -109,7 +103,6 @@ test "store: getAtCursor" { try testing.expect(at3.size == 30); } - test "store: recordEdit persists diff data to data log" { var tmp_dir = testing.tmpDir(.{}); defer tmp_dir.cleanup(); @@ -142,7 +135,6 @@ test "store: recordEdit persists diff data to data log" { try testing.expectEqualStrings(diff, buf[0..diff.len]); } - test "agent: register and heartbeat" { var agents = AgentRegistry.init(testing.allocator); defer agents.deinit(); @@ -154,7 +146,6 @@ test "agent: register and heartbeat" { // No crash = success } - test "agent: register multiple agents" { var agents = AgentRegistry.init(testing.allocator); defer agents.deinit(); @@ -165,7 +156,6 @@ test "agent: register multiple agents" { try testing.expect(b == 2); } - test "agent: lock and unlock" { var agents = AgentRegistry.init(testing.allocator); defer agents.deinit(); @@ -178,7 +168,6 @@ test "agent: lock and unlock" { agents.releaseLock(id, "file.zig"); } - test "agent: lock contention between agents" { var agents = AgentRegistry.init(testing.allocator); defer agents.deinit(); @@ -202,7 +191,6 @@ test "agent: lock contention between agents" { try testing.expect(got_b2 == true); } - test "agent: same-agent relock does not duplicate lock key" { var agents = AgentRegistry.init(testing.allocator); defer agents.deinit(); @@ -219,7 +207,6 @@ test "agent: same-agent relock does not duplicate lock key" { try testing.expect(agent.locked_paths.count() == 0); } - test "agent: reapStale frees lock keys and clears map" { var agents = AgentRegistry.init(testing.allocator); defer agents.deinit(); @@ -236,7 +223,6 @@ test "agent: reapStale frees lock keys and clears map" { try testing.expect(agent.locked_paths.count() == 0); } - test "issue-411: tryLock grants new locks to a crashed agent" { var agents = AgentRegistry.init(testing.allocator); defer agents.deinit(); @@ -282,7 +268,6 @@ test "issue-528: each MCP session registers a distinct edit-lock owner (not shar try testing.expect(try agents.tryLock(session_b, "x.zig", 60_000)); } - test "issue-401: insert with after=null is a no-op but consumes seq and writes file" { var tmp = testing.tmpDir(.{}); defer tmp.cleanup(); @@ -319,7 +304,6 @@ test "issue-401: insert with after=null is a no-op but consumes seq and writes f } } - test "issue-404: applyEdit corrupts CRLF line endings into mixed LF/CRLF" { var tmp = testing.tmpDir(.{}); defer tmp.cleanup(); @@ -366,7 +350,6 @@ test "issue-404: applyEdit corrupts CRLF line endings into mixed LF/CRLF" { } } - test "issue-409: replacing whole file with empty content leaves a stray newline" { var tmp = testing.tmpDir(.{}); defer tmp.cleanup(); @@ -407,7 +390,6 @@ test "issue-409: replacing whole file with empty content leaves a stray newline" try testing.expectEqual(@as(u64, 0), result.new_size); } - // ── Post-edit syntax health (trial/graph-based-codedb) ──────────────────── test "edit-health: flags unmatched close from a mis-spliced import edit (httpx-style)" { @@ -582,7 +564,6 @@ test "edit-health: a name re-imported from another module is not flagged" { try testing.expect(msg == null); } - // ── Anchor-based str_replace (P2, trial/graph-based-codedb) ─────────────── test "edit-str_replace: anchored replace updates the unique occurrence exactly" { @@ -699,7 +680,6 @@ test "edit-str_replace: health check still runs on an anchored edit that breaks try testing.expect(std.mem.indexOf(u8, result.health.?, "never closed") != null); } - // ── op=create: author new files (trial/graph-based-codedb) ──────────────── test "edit-create: op=create authors a new file that did not exist" { @@ -758,7 +738,6 @@ test "edit-create: op=create refuses to clobber an existing file" { try testing.expectEqualStrings("keep = 1\n", after); } - // ── Tier-1 linter registry + session policy (trial/graph-based-codedb) ──── fn argsHaveFileToken(args: []const []const u8) bool { @@ -823,7 +802,6 @@ test "linter: toolOnPath returns false for a non-existent executable" { try testing.expect(!linter.toolOnPath(testing.allocator, "codedb_definitely_not_a_real_tool_zzz")); } - // ── Linter opt-in preference persistence (trial/graph-based-codedb) ─────── test "linter-pref: parseBody maps tokens to the three states" { @@ -857,7 +835,6 @@ test "linter-pref: write then read round-trips on/off; missing file is unset" { try testing.expectEqual(linter_pref.Pref.off, linter_pref.readAt(io, path)); } - // ── Linter execution + output parsing (trial/graph-based-codedb) ────────── test "linter: installFor returns installers for installable tools, null for toolchain langs" { @@ -933,7 +910,6 @@ test "linter: the interactive prompt entrypoint compiles (analysis guard)" { _ = &cio.readLine; } - // ── Diagnostics cache (trial/graph-based-codedb) ────────────────────────── test "diag-cache: store + appendIfFresh matches on (path,hash), misses otherwise" { @@ -994,7 +970,6 @@ test "diag-cache: eviction stays bounded and leak-free past MAX entries" { try testing.expect(c.appendLatest(testing.allocator, &out, last)); // newest retained } - test "issue-101: Store.max_versions is configurable (caps per-file history)" { // Default cap is 100. After setting max_versions = 3, writing 5 versions // of the same file must leave exactly 3 in-memory. @@ -1015,7 +990,6 @@ test "issue-101: Store.max_versions is configurable (caps per-file history)" { try testing.expectEqual(@as(u64, 0x555), entry.versions.items[2].hash); } - test "issue-102: Explorer.init capacity flows to ContentCache" { // Verifies that the capacity arg to Explorer.init actually sets the // ContentCache capacity — the bug that issue-102 was filed for. @@ -1025,7 +999,6 @@ test "issue-102: Explorer.init capacity flows to ContentCache" { try testing.expectEqual(@as(u32, 8), explorer.contents.capacity); } - test "issue-101+102: .codedbrc max_cached threads through to ContentCache capacity" { // End-to-end: parse a .codedbrc body, construct Explorer with the parsed // max_cached, verify the ContentCache capacity matches. @@ -1050,7 +1023,6 @@ test "issue-101+102: .codedbrc max_cached threads through to ContentCache capaci try testing.expectEqual(@as(u32, 32), explorer.contents.capacity); } - test "issue-584: ContentCache probe-window — overflow inserts, holes, and duplicate keys" { // putImpl's overflow path evicts via a global CLOCK hand, so the new entry // lands OUTSIDE the key's 4-slot probe window: get() can never find it. @@ -1173,7 +1145,6 @@ test "issue-597: data log compacts orphaned diff ranges and fixes offsets" { try testing.expectEqualStrings("DDDDD", &buf); } - test "issue-603: appendVersion failed key dupe leaves a poisoned files entry" { var failing = std.testing.FailingAllocator.init(testing.allocator, .{ .fail_index = 1 }); var store = Store.init(failing.allocator()); @@ -1182,3 +1153,45 @@ test "issue-603: appendVersion failed key dupe leaves a poisoned files entry" { try testing.expectEqual(@as(usize, 0), store.files.count()); store.deinit(); } + +test "issue-550: parseCoChange builds bounded per-file partner lists" { + const alloc = testing.allocator; + const log = + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n" ++ + "\n" ++ + "src/a.zig\n" ++ + "src/b.zig\n" ++ + "\n" ++ + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n" ++ + "\n" ++ + "src/a.zig\n" ++ + "src/b.zig\n" ++ + "src/c.zig\n" ++ + "\n" ++ + "cccccccccccccccccccccccccccccccccccccccc\n" ++ + "\n" ++ + "src/mega1.zig\n" ++ + "src/mega2.zig\n" ++ + "src/mega3.zig\n" ++ + "src/mega4.zig\n"; + + var map = try git.parseCoChange(alloc, log, 3, 8); + defer git.freeCoChange(&map, alloc); + + const a_partners = map.get("src/a.zig") orelse return testing.expect(false); + try testing.expectEqual(@as(usize, 2), a_partners.len); + try testing.expectEqualStrings("src/b.zig", a_partners[0].path); + try testing.expectEqual(@as(u32, 2), a_partners[0].count); + try testing.expectEqualStrings("src/c.zig", a_partners[1].path); + try testing.expectEqual(@as(u32, 1), a_partners[1].count); + + // The 4-file commit exceeds max_files_per_commit=3 — contributes nothing. + try testing.expect(map.get("src/mega1.zig") == null); + + // max_partners truncates after the count-descending sort. + var capped = try git.parseCoChange(alloc, log, 3, 1); + defer git.freeCoChange(&capped, alloc); + const a_capped = capped.get("src/a.zig") orelse return testing.expect(false); + try testing.expectEqual(@as(usize, 1), a_capped.len); + try testing.expectEqualStrings("src/b.zig", a_capped[0].path); +} diff --git a/src/test_search.zig b/src/test_search.zig index 3956b034..bae85775 100644 --- a/src/test_search.zig +++ b/src/test_search.zig @@ -10,6 +10,7 @@ const TrigramIndex = @import("index.zig").TrigramIndex; const SparseNgramIndex = @import("index.zig").SparseNgramIndex; const explore = @import("explore.zig"); const Language = explore.Language; +const git = @import("git.zig"); const SymbolKind = explore.SymbolKind; const DependencyGraph = explore.DependencyGraph; const SymbolLocation = explore.SymbolLocation; @@ -2245,3 +2246,55 @@ test "issue-550: call-graph distance ranks structurally-near files above equal-l try testing.expect(noise_score > 0); try testing.expect(helper_score > noise_score); } + +test "issue-550: co-change partner of the defining file outranks an unrelated equal-lexical file" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const aa = arena.allocator(); + var explorer = Explorer.init(aa, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + + try explorer.indexFile("src/def.zig", + \\pub fn frobnicate() void {} + ); + // partner and stranger mention the term identically; only the injected + // co-change history distinguishes them. + try explorer.indexFile("src/partner.zig", + \\pub fn helper() void { + \\ // frobnicate mention + \\} + ); + try explorer.indexFile("src/stranger.zig", + \\pub fn other() void { + \\ // frobnicate mention + \\} + ); + + const partners = try aa.alloc(git.CoChangePartner, 1); + partners[0] = .{ .path = "src/partner.zig", .count = 8 }; + var cc = std.StringHashMap([]git.CoChangePartner).init(aa); + try cc.put("src/def.zig", partners); + explorer.co_change = cc; + explorer.co_change_attempted = true; + + const results = try explorer.searchContent("frobnicate", testing.allocator, 8); + defer { + for (results) |r| { + testing.allocator.free(r.line_text); + testing.allocator.free(r.path); + } + testing.allocator.free(results); + } + + try testing.expect(results.len >= 3); + try testing.expectEqualStrings("src/def.zig", results[0].path); + + var partner_score: f32 = -1.0; + var stranger_score: f32 = -1.0; + for (results) |r| { + if (partner_score < 0 and std.mem.eql(u8, r.path, "src/partner.zig")) partner_score = r.score; + if (stranger_score < 0 and std.mem.eql(u8, r.path, "src/stranger.zig")) stranger_score = r.score; + } + try testing.expect(partner_score > 0); + try testing.expect(stranger_score > 0); + try testing.expect(partner_score > stranger_score); +}