diff --git a/src/benchmark.zig b/src/benchmark.zig index 2729dcd..6224a3b 100644 --- a/src/benchmark.zig +++ b/src/benchmark.zig @@ -310,12 +310,21 @@ pub fn main(init: std.process.Init.Minimal) !void { }; const index_ns = t0.read(); - // Queries + // Queries — the per-query rows measure UNCACHED searches so they stay + // comparable across versions; one extra row shows the result-cache hit + // latency for a representative query. + cio.posixSetenv("CODEDB_NO_SEARCH_CACHE", "1"); var qlist: std.ArrayList(QueryResult) = .empty; defer qlist.deinit(alloc); for ([_][]const u8{ "middleware", "authentication", "webhook", "database", "error" }) |q| (qlist.append(alloc, try benchSearch(&explorer, q, args.iterations, alloc)) catch {}); + cio.posixUnsetenv("CODEDB_NO_SEARCH_CACHE"); + if (benchSearch(&explorer, "error", args.iterations, alloc) catch null) |cached_qr| { + var qr = cached_qr; + qr.kind = "cached"; + qlist.append(alloc, qr) catch {}; + } for ([_][]const u8{ "request", "response", "config", "error" }) |q| (qlist.append(alloc, try benchWord(&explorer, q, args.iterations, alloc)) catch {}); for ([_][]const u8{ "init", "main", "handleRequest", "render" }) |q| diff --git a/src/explore.zig b/src/explore.zig index ee1226b..076752e 100644 --- a/src/explore.zig +++ b/src/explore.zig @@ -924,6 +924,327 @@ const LineOffsetCache = struct { return n; } }; + +/// Whole-query result cache for searchContent: agents re-issue identical +/// searches constantly, and a hit copies the cached results into the +/// caller's allocator instead of re-running the search tiers. An entry is +/// served only when BOTH its generation (Explorer.search_gen, bumped by +/// every mutation that can change results: indexing, removal, word-index +/// rebuilds, and the lazy symbol-index / call-graph / co-change builds) and +/// its env fingerprint (the ranking kill-switch variables) still match — so +/// a hit can never observe state a fresh search would not have produced. +/// CODEDB_NO_SEARCH_CACHE=1 disables it entirely (the repo benchmark sets +/// this so its per-query numbers keep measuring uncached searches). +const SearchResultCache = struct { + const CachedResult = struct { + path: []u8, + line_num: u32, + line_text: []u8, + score: f32, + }; + const Entry = struct { + query: []u8, + max_results: usize, + gen: u64, + env_fp: u64, + results: []CachedResult, + bytes: usize, + last_used: u64, + }; + + allocator: std.mem.Allocator, + entries: std.ArrayList(Entry) = .empty, + mu: cio.Mutex = .{}, + tick: u64 = 0, + total_bytes: usize = 0, + /// Test-visible counters; production code does not read them. + hits: u64 = 0, + misses: u64 = 0, + + const MAX_ENTRIES: usize = 64; + const MAX_BYTES: usize = 4 * 1024 * 1024; + /// One entry may not claim more than a quarter of the byte budget. + const MAX_ENTRY_BYTES: usize = MAX_BYTES / 4; + + fn init(allocator: std.mem.Allocator) SearchResultCache { + return .{ .allocator = allocator }; + } + + fn freeEntry(self: *SearchResultCache, e: *Entry) void { + self.allocator.free(e.query); + for (e.results) |r| { + self.allocator.free(r.path); + self.allocator.free(r.line_text); + } + self.allocator.free(e.results); + } + + fn deinit(self: *SearchResultCache) void { + for (self.entries.items) |*e| self.freeEntry(e); + self.entries.deinit(self.allocator); + } + + /// On hit, returns results duped into `out_allocator` (caller owns each + /// path/line_text and the slice, same contract as a fresh search). A + /// stale same-key entry is dropped so the slot can refill. + fn get(self: *SearchResultCache, query: []const u8, max_results: usize, gen: u64, env_fp: u64, out_allocator: std.mem.Allocator) ?[]const SearchResult { + self.mu.lock(); + defer self.mu.unlock(); + var i: usize = 0; + while (i < self.entries.items.len) : (i += 1) { + const e = &self.entries.items[i]; + if (e.max_results != max_results or !std.mem.eql(u8, e.query, query)) continue; + if (e.gen != gen or e.env_fp != env_fp) { + var dead = self.entries.swapRemove(i); + self.total_bytes -= dead.bytes; + self.freeEntry(&dead); + self.misses += 1; + return null; + } + const out = out_allocator.alloc(SearchResult, e.results.len) catch return null; + var n: usize = 0; + for (e.results) |r| { + const p = out_allocator.dupe(u8, r.path) catch break; + const t = out_allocator.dupe(u8, r.line_text) catch { + out_allocator.free(p); + break; + }; + out[n] = .{ .path = p, .line_num = r.line_num, .line_text = t, .score = r.score }; + n += 1; + } + if (n != e.results.len) { + // OOM mid-copy: release and fall through to a fresh search. + for (out[0..n]) |r| { + out_allocator.free(r.path); + out_allocator.free(r.line_text); + } + out_allocator.free(out); + return null; + } + self.tick += 1; + e.last_used = self.tick; + self.hits += 1; + return out; + } + self.misses += 1; + return null; + } + + /// Copies `results` into cache-owned memory. Any OOM just skips caching. + fn put(self: *SearchResultCache, query: []const u8, max_results: usize, gen: u64, env_fp: u64, results: []const SearchResult) void { + var bytes: usize = query.len + @sizeOf(Entry); + for (results) |r| bytes += r.path.len + r.line_text.len + @sizeOf(CachedResult); + if (bytes > MAX_ENTRY_BYTES) return; + + const owned = self.allocator.alloc(CachedResult, results.len) catch return; + var n: usize = 0; + for (results) |r| { + const p = self.allocator.dupe(u8, r.path) catch break; + const t = self.allocator.dupe(u8, r.line_text) catch { + self.allocator.free(p); + break; + }; + owned[n] = .{ .path = p, .line_num = r.line_num, .line_text = t, .score = r.score }; + n += 1; + } + const query_copy = if (n == results.len) self.allocator.dupe(u8, query) catch null else null; + if (query_copy == null) { + for (owned[0..n]) |r| { + self.allocator.free(r.path); + self.allocator.free(r.line_text); + } + self.allocator.free(owned); + return; + } + + self.mu.lock(); + defer self.mu.unlock(); + // Replace any existing same-key entry (it was a miss or stale). + var i: usize = 0; + while (i < self.entries.items.len) { + const e = &self.entries.items[i]; + if (e.max_results == max_results and std.mem.eql(u8, e.query, query)) { + var dead = self.entries.swapRemove(i); + self.total_bytes -= dead.bytes; + self.freeEntry(&dead); + continue; + } + i += 1; + } + // Evict least-recently-used entries until the new one fits. + while (self.entries.items.len >= MAX_ENTRIES or + (self.entries.items.len > 0 and self.total_bytes + bytes > MAX_BYTES)) + { + var lru: usize = 0; + for (self.entries.items, 0..) |e, j| { + if (e.last_used < self.entries.items[lru].last_used) lru = j; + } + var dead = self.entries.swapRemove(lru); + self.total_bytes -= dead.bytes; + self.freeEntry(&dead); + } + self.tick += 1; + self.entries.append(self.allocator, .{ + .query = query_copy.?, + .max_results = max_results, + .gen = gen, + .env_fp = env_fp, + .results = owned, + .bytes = bytes, + .last_used = self.tick, + }) catch { + self.allocator.free(query_copy.?); + for (owned) |r| { + self.allocator.free(r.path); + self.allocator.free(r.line_text); + } + self.allocator.free(owned); + return; + }; + self.total_bytes += bytes; + } +}; + +/// Rendered-output sibling of SearchResultCache for renderPlainSearch — the +/// MCP fast path renders straight to text and never reaches searchContent, +/// so it gets its own (query, max_results, paths_only) → bytes cache with +/// identical generation + env-fingerprint validation and LRU discipline. +const PlainRenderCache = struct { + const Entry = struct { + query: []u8, + max_results: usize, + paths_only: bool, + gen: u64, + env_fp: u64, + bytes: []u8, + last_used: u64, + }; + + allocator: std.mem.Allocator, + entries: std.ArrayList(Entry) = .empty, + mu: cio.Mutex = .{}, + tick: u64 = 0, + total_bytes: usize = 0, + /// Test-visible counters; production code does not read them. + hits: u64 = 0, + misses: u64 = 0, + + const MAX_ENTRIES: usize = 64; + const MAX_BYTES: usize = 4 * 1024 * 1024; + const MAX_ENTRY_BYTES: usize = MAX_BYTES / 4; + + fn init(allocator: std.mem.Allocator) PlainRenderCache { + return .{ .allocator = allocator }; + } + + fn freeEntry(self: *PlainRenderCache, e: *Entry) void { + self.allocator.free(e.query); + self.allocator.free(e.bytes); + } + + fn deinit(self: *PlainRenderCache) void { + for (self.entries.items) |*e| self.freeEntry(e); + self.entries.deinit(self.allocator); + } + + /// On hit, appends the cached rendering to `out` and returns true. + fn render(self: *PlainRenderCache, query: []const u8, max_results: usize, paths_only: bool, gen: u64, env_fp: u64, out_allocator: std.mem.Allocator, out: *std.ArrayList(u8)) bool { + self.mu.lock(); + defer self.mu.unlock(); + var i: usize = 0; + while (i < self.entries.items.len) : (i += 1) { + const e = &self.entries.items[i]; + if (e.max_results != max_results or e.paths_only != paths_only or !std.mem.eql(u8, e.query, query)) continue; + if (e.gen != gen or e.env_fp != env_fp) { + var dead = self.entries.swapRemove(i); + self.total_bytes -= dead.bytes.len; + self.freeEntry(&dead); + self.misses += 1; + return false; + } + out.appendSlice(out_allocator, e.bytes) catch return false; + self.tick += 1; + e.last_used = self.tick; + self.hits += 1; + return true; + } + self.misses += 1; + return false; + } + + /// Copies `bytes` into cache-owned memory. Any OOM just skips caching. + fn put(self: *PlainRenderCache, query: []const u8, max_results: usize, paths_only: bool, gen: u64, env_fp: u64, bytes: []const u8) void { + if (bytes.len > MAX_ENTRY_BYTES) return; + const bytes_copy = self.allocator.dupe(u8, bytes) catch return; + const query_copy = self.allocator.dupe(u8, query) catch { + self.allocator.free(bytes_copy); + return; + }; + + self.mu.lock(); + defer self.mu.unlock(); + var i: usize = 0; + while (i < self.entries.items.len) { + const e = &self.entries.items[i]; + if (e.max_results == max_results and e.paths_only == paths_only and std.mem.eql(u8, e.query, query)) { + var dead = self.entries.swapRemove(i); + self.total_bytes -= dead.bytes.len; + self.freeEntry(&dead); + continue; + } + i += 1; + } + while (self.entries.items.len >= MAX_ENTRIES or + (self.entries.items.len > 0 and self.total_bytes + bytes_copy.len > MAX_BYTES)) + { + var lru: usize = 0; + for (self.entries.items, 0..) |e, j| { + if (e.last_used < self.entries.items[lru].last_used) lru = j; + } + var dead = self.entries.swapRemove(lru); + self.total_bytes -= dead.bytes.len; + self.freeEntry(&dead); + } + self.tick += 1; + self.entries.append(self.allocator, .{ + .query = query_copy, + .max_results = max_results, + .paths_only = paths_only, + .gen = gen, + .env_fp = env_fp, + .bytes = bytes_copy, + .last_used = self.tick, + }) catch { + self.allocator.free(query_copy); + self.allocator.free(bytes_copy); + return; + }; + self.total_bytes += bytes_copy.len; + } +}; + +/// Fingerprint of the env kill-switches that change ranking/search output — +/// part of the SearchResultCache key so toggling one (tests do this +/// mid-process) can never serve results computed under the other setting. +fn rankingEnvFingerprint() u64 { + var h = std.hash.Wyhash.init(0x5eed); + inline for ([_][]const u8{ + "CODEDB_NO_CENTRALITY", + "CODEDB_NO_GRAPH_DISTANCE", + "CODEDB_NO_COCHANGE", + "CODEDB_LEX_FREQ_PENALTY", + "CODEDB_LEX_FREQ_AMP", + "CODEDB_RVSM_SIZE_PRIOR", + "CODEDB_RVSM_AMP", + "CODEDB_RVSM_K", + "CODEDB_IN_DEGREE_CENTRALITY", + }) |name| { + h.update(name); + if (cio.posixGetenv(name)) |v| h.update(v) else h.update(&[_]u8{0}); + } + return h.final(); +} + pub const Explorer = struct { outlines: std.StringHashMap(FileOutline), dep_graph: DependencyGraph, @@ -979,6 +1300,14 @@ pub const Explorer = struct { /// Production code does not read this field. search_tier5_count: u64 = 0, last_search_breakdown: SearchBreakdown = .{}, + /// Whole-query result cache for searchContent, validated against + /// search_gen + the ranking-env fingerprint (see SearchResultCache). + search_cache: SearchResultCache, + /// Rendered-output cache for renderPlainSearch (same validation). + plain_render_cache: PlainRenderCache, + /// Bumped (atomically — searches run under the SHARED lock) by every + /// mutation that can change search results; see bumpSearchGen callers. + search_gen: std.atomic.Value(u64) = std.atomic.Value(u64).init(0), /// Buffers adopted from snapshot loads (the raw outline_state section). /// Restored FileOutlines borrow their import/symbol strings as slices into /// these (see FileOutline.borrows_strings), so they must outlive every @@ -1015,6 +1344,8 @@ pub const Explorer = struct { .dep_graph = DependencyGraph.init(allocator), .contents = try ContentCache.initAlloc(allocator, content_cache_capacity), .line_offsets = LineOffsetCache.init(allocator), + .search_cache = SearchResultCache.init(allocator), + .plain_render_cache = PlainRenderCache.init(allocator), .symbol_index = std.StringHashMap(std.ArrayList(SymbolLocation)).init(allocator), .symbol_index_complete = true, .word_index = WordIndex.init(allocator), @@ -1043,6 +1374,8 @@ pub const Explorer = struct { self.contents.deinit(); self.line_offsets.deinit(); + self.search_cache.deinit(); + self.plain_render_cache.deinit(); if (self.call_centrality) |*c| c.deinit(); if (self.call_graph) |*cg| cg.deinit(self.allocator); if (self.co_change) |*cc| git.freeCoChange(cc, self.allocator); @@ -1118,6 +1451,7 @@ pub const Explorer = struct { } pub fn commitParsedFileOwnedOutline(self: *Explorer, path: []const u8, content: []const u8, outline: FileOutline, full_index: bool, skip_trigram: bool) !void { + self.bumpSearchGen(); var owned_outline = outline; // One deinit only: an errdefer here would stack with the defer below // and double-free the parsed outline on any post-clone error. @@ -1624,6 +1958,15 @@ pub const Explorer = struct { const parsed = try parseContentForIndexing(self.allocator, path, content); return self.commitParsedFileOwnedOutline(path, parsed.content, parsed.outline, full_index, skip_trigram); } + + /// Invalidate the searchContent result cache. Called by every mutation + /// that can change what a search returns — file commits/removals, index + /// rebuilds, and the one-shot lazy ranking-signal builds (symbol index, + /// call graph, co-change). Atomic because the lazy builds run under the + /// SHARED lock. + fn bumpSearchGen(self: *Explorer) void { + _ = self.search_gen.fetchAdd(1, .release); + } /// Rebuild trigram index from the stored file contents. /// Used after a cache hit to populate trigrams when they were skipped during the fast scan. pub fn rebuildTrigrams(self: *Explorer) !void { @@ -1646,6 +1989,7 @@ pub const Explorer = struct { /// by streaming source files from the project root when the content cache /// was capped during fast snapshot restore. pub fn rebuildWordIndex(self: *Explorer) !void { + self.bumpSearchGen(); const source_paths = blk: { self.mu.lockShared(); defer self.mu.unlockShared(); @@ -1741,6 +2085,7 @@ pub const Explorer = struct { self.mu.lock(); defer self.mu.unlock(); if (self.symbol_index_complete) return; + self.bumpSearchGen(); self.symbol_index_complete = true; var it = self.outlines.iterator(); while (it.next()) |entry| { @@ -1802,6 +2147,7 @@ pub const Explorer = struct { } pub fn removeFile(self: *Explorer, path: []const u8) void { + self.bumpSearchGen(); self.mu.lock(); defer self.mu.unlock(); if (!self.word_index_complete) { @@ -2654,6 +3000,25 @@ pub const Explorer = struct { } pub fn searchContent(self: *Explorer, query: []const u8, allocator: std.mem.Allocator, max_results: usize) ![]const SearchResult { + // Result-cache front door: a hit copies the cached results into the + // caller's allocator (same ownership contract as a fresh search). + // The generation is read BEFORE the search runs, so any concurrent + // mutation makes the stored entry stale immediately — conservative + // in the safe direction. Lazy builds during a query's FIRST run bump + // the generation mid-search; that entry just never hits and the next + // run stores a valid one. + const cacheable = max_results > 0 and query.len > 0 and query.len <= 1024 and + cio.posixGetenv("CODEDB_NO_SEARCH_CACHE") == null; + if (!cacheable) return self.searchContentUncached(query, allocator, max_results); + const gen = self.search_gen.load(.acquire); + const env_fp = rankingEnvFingerprint(); + if (self.search_cache.get(query, max_results, gen, env_fp, allocator)) |hit| return hit; + const res = try self.searchContentUncached(query, allocator, max_results); + self.search_cache.put(query, max_results, gen, env_fp, res); + return res; + } + + fn searchContentUncached(self: *Explorer, query: []const u8, allocator: std.mem.Allocator, max_results: usize) ![]const SearchResult { // #539: ensure the word index — Tier 0's recall source — is populated. // After a snapshot fast-load it is built lazily (see issue-220); without // this, searchContent's recall collapses to the trigram/skip_trigram tiers @@ -3072,6 +3437,26 @@ pub const Explorer = struct { } pub fn renderPlainSearch(self: *Explorer, query: []const u8, allocator: std.mem.Allocator, out: *std.ArrayList(u8), max_results: usize, paths_only: bool) !bool { + // Rendered-output cache front door — same generation discipline as + // searchContent's cache (see that wrapper for the staleness + // reasoning). Only successful renders (rendered == true) are stored; + // a false return falls through to searchContentAuto, which caches + // through searchContent itself. + const cacheable = max_results > 0 and query.len > 0 and query.len <= 1024 and + cio.posixGetenv("CODEDB_NO_SEARCH_CACHE") == null; + const cache_gen = self.search_gen.load(.acquire); + const cache_fp = if (cacheable) rankingEnvFingerprint() else 0; + if (cacheable and self.plain_render_cache.render(query, max_results, paths_only, cache_gen, cache_fp, allocator, out)) return true; + const out_start = out.items.len; + + const rendered = try self.renderPlainSearchUncached(query, allocator, out, max_results, paths_only); + if (rendered and cacheable) { + self.plain_render_cache.put(query, max_results, paths_only, cache_gen, cache_fp, out.items[out_start..]); + } + return rendered; + } + + fn renderPlainSearchUncached(self: *Explorer, query: []const u8, allocator: std.mem.Allocator, out: *std.ArrayList(u8), max_results: usize, paths_only: bool) !bool { self.mu.lockShared(); defer self.mu.unlockShared(); @@ -3849,6 +4234,9 @@ pub const Explorer = struct { self.co_change_attempted = true; const root = self.root_path orelse return; self.co_change = git.buildCoChange(self.allocator, root, 500, 32, 8); + // The co-change boost just became available: results cached before + // the build could differ from a fresh search. + if (self.co_change != null) self.bumpSearchGen(); } /// Boost for files that historically change together with the files @@ -4012,6 +4400,9 @@ pub const Explorer = struct { .node_name = nn, .node_line = nl, }; + // The graph-distance boost gate (`call_graph != null`) just flipped: + // results cached before the build could differ from a fresh search. + self.bumpSearchGen(); } /// Build `call_centrality` once (idempotent, mutex-guarded). Must be called diff --git a/src/test_search.zig b/src/test_search.zig index bae8577..d845fa7 100644 --- a/src/test_search.zig +++ b/src/test_search.zig @@ -2298,3 +2298,167 @@ test "issue-550: co-change partner of the defining file outranks an unrelated eq try testing.expect(stranger_score > 0); try testing.expect(partner_score > stranger_score); } + +// ── searchContent result cache ──────────────────────────────────────────────── + +fn freeSearchResults(results: []const SearchResult) void { + for (results) |r| { + testing.allocator.free(r.path); + testing.allocator.free(r.line_text); + } + testing.allocator.free(results); +} + +test "search-cache: repeated query is served from cache with identical caller-owned results" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + // Plain content mention (no symbol named like the query) so no lazy + // ranking build bumps the generation during the first search. + try explorer.indexFile("src/alpha.zig", "const a = 1; // zebraword here\nconst b = 2; // zebraword again\n"); + + const r1 = try explorer.searchContent("zebraword", testing.allocator, 10); + defer freeSearchResults(r1); + try testing.expect(r1.len >= 2); + try testing.expectEqual(@as(u64, 0), explorer.search_cache.hits); + + const r2 = try explorer.searchContent("zebraword", testing.allocator, 10); + defer freeSearchResults(r2); + try testing.expectEqual(@as(u64, 1), explorer.search_cache.hits); + try testing.expectEqual(r1.len, r2.len); + for (r1, r2) |a, b| { + try testing.expectEqualStrings(a.path, b.path); + try testing.expectEqual(a.line_num, b.line_num); + try testing.expectEqualStrings(a.line_text, b.line_text); + } +} + +test "search-cache: indexFile invalidates cached results" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/one.zig", "const a = 1; // quaggatok\n"); + + const r1 = try explorer.searchContent("quaggatok", testing.allocator, 10); + freeSearchResults(r1); + try explorer.indexFile("src/two.zig", "const b = 2; // quaggatok\n"); + + const r2 = try explorer.searchContent("quaggatok", testing.allocator, 10); + defer freeSearchResults(r2); + // The new file must appear — a stale hit would still show one result. + try testing.expectEqual(@as(u64, 0), explorer.search_cache.hits); + var saw_two = false; + for (r2) |r| { + if (std.mem.eql(u8, r.path, "src/two.zig")) saw_two = true; + } + try testing.expect(saw_two); +} + +test "search-cache: removeFile invalidates cached results" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/keep.zig", "const a = 1; // okapitok\n"); + try explorer.indexFile("src/gone.zig", "const b = 2; // okapitok\n"); + + const r1 = try explorer.searchContent("okapitok", testing.allocator, 10); + try testing.expect(r1.len == 2); + freeSearchResults(r1); + explorer.removeFile("src/gone.zig"); + + const r2 = try explorer.searchContent("okapitok", testing.allocator, 10); + defer freeSearchResults(r2); + try testing.expectEqual(@as(u64, 0), explorer.search_cache.hits); + for (r2) |r| { + try testing.expect(!std.mem.eql(u8, r.path, "src/gone.zig")); + } +} + +test "search-cache: ranking env toggle prevents stale hits" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/env.zig", "const a = 1; // gnutok\n"); + + const r1 = try explorer.searchContent("gnutok", testing.allocator, 10); + freeSearchResults(r1); + + // Different fingerprint -> the cached entry must NOT be served. + _ = setenv("CODEDB_RVSM_SIZE_PRIOR", "1", 1); + defer _ = unsetenv("CODEDB_RVSM_SIZE_PRIOR"); + const r2 = try explorer.searchContent("gnutok", testing.allocator, 10); + freeSearchResults(r2); + try testing.expectEqual(@as(u64, 0), explorer.search_cache.hits); + + // Same fingerprint as the stored entry -> now it hits. + const r3 = try explorer.searchContent("gnutok", testing.allocator, 10); + freeSearchResults(r3); + try testing.expectEqual(@as(u64, 1), explorer.search_cache.hits); +} + +test "search-cache: CODEDB_NO_SEARCH_CACHE disables caching entirely" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/off.zig", "const a = 1; // dingotok\n"); + + _ = setenv("CODEDB_NO_SEARCH_CACHE", "1", 1); + defer _ = unsetenv("CODEDB_NO_SEARCH_CACHE"); + const r1 = try explorer.searchContent("dingotok", testing.allocator, 10); + freeSearchResults(r1); + const r2 = try explorer.searchContent("dingotok", testing.allocator, 10); + freeSearchResults(r2); + try testing.expectEqual(@as(u64, 0), explorer.search_cache.hits); + try testing.expectEqual(@as(u64, 0), explorer.search_cache.misses); + try testing.expectEqual(@as(usize, 0), explorer.search_cache.entries.items.len); +} + +test "search-cache: entry count stays bounded under distinct queries" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/bound.zig", "const a = 1; // boundtok\n"); + + var i: usize = 0; + while (i < 100) : (i += 1) { + var qbuf: [32]u8 = undefined; + const q = try std.fmt.bufPrint(&qbuf, "boundquery{d}", .{i}); + const r = try explorer.searchContent(q, testing.allocator, 10); + freeSearchResults(r); + } + try testing.expect(explorer.search_cache.entries.items.len <= 64); +} + +test "search-cache: renderPlainSearch repeated query is served from the render cache" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/render.zig", "const a = 1; // ibextok\nconst b = 2; // ibextok\n"); + + var out1: std.ArrayList(u8) = .empty; + defer out1.deinit(testing.allocator); + // renderPlainSearch only renders when Tier 0 alone can satisfy + // max_results, so ask for exactly the two hits the file contains. + const rendered1 = try explorer.renderPlainSearch("ibextok", testing.allocator, &out1, 2, false); + try testing.expect(rendered1); + try testing.expectEqual(@as(u64, 0), explorer.plain_render_cache.hits); + + var out2: std.ArrayList(u8) = .empty; + defer out2.deinit(testing.allocator); + const rendered2 = try explorer.renderPlainSearch("ibextok", testing.allocator, &out2, 2, false); + try testing.expect(rendered2); + try testing.expectEqual(@as(u64, 1), explorer.plain_render_cache.hits); + try testing.expectEqualStrings(out1.items, out2.items); +} + +test "search-cache: renderPlainSearch cache is invalidated by indexFile" { + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + try explorer.indexFile("src/r1.zig", "const a = 1; // lemurtok\n"); + + var out1: std.ArrayList(u8) = .empty; + defer out1.deinit(testing.allocator); + const rendered1 = try explorer.renderPlainSearch("lemurtok", testing.allocator, &out1, 1, false); + try testing.expect(rendered1); + + try explorer.indexFile("src/r2.zig", "const b = 2; // lemurtok\n"); + + // The generation moved, so the cached rendering must NOT be served. + var out2: std.ArrayList(u8) = .empty; + defer out2.deinit(testing.allocator); + _ = try explorer.renderPlainSearch("lemurtok", testing.allocator, &out2, 1, false); + try testing.expectEqual(@as(u64, 0), explorer.plain_render_cache.hits); +}