diff --git a/src/snapshot.zig b/src/snapshot.zig index 95b7170..40d9333 100644 --- a/src/snapshot.zig +++ b/src/snapshot.zig @@ -1298,9 +1298,9 @@ fn parseJsonU64(json: []const u8, key: []const u8) ?u64 { } /// Returns true for secret/credential paths that must never be persisted to a -/// snapshot or live-indexed. Kept in lockstep with `watcher.isSensitivePath`; -/// the two are parity-tested in test_snapshot.zig ("issue-528: isSensitivePath -/// parity") so any future drift in this security filter fails CI. +/// snapshot or live-indexed. Single implementation of this security filter; +/// `watcher.isSensitivePath` delegates here (parity-tested in test_snapshot.zig +/// "issue-528: isSensitivePath parity"). pub fn isSensitivePath(path: []const u8) bool { const basename = if (std.mem.lastIndexOfScalar(u8, path, '/')) |sep| path[sep + 1 ..] else path; // Fast path: most source files have extensions like .zig, .ts, .py — none start with '.' diff --git a/src/test_bench.zig b/src/test_bench.zig index ebf29e2..356cded 100644 --- a/src/test_bench.zig +++ b/src/test_bench.zig @@ -32,7 +32,7 @@ test "bench: fuzzyScore throughput" { const elapsed: u64 = @intCast(cio.nanoTimestamp() - t0); const total_calls = iterations * queries.len; const per_call_ns = elapsed / total_calls; - std.debug.print("\n fuzzyScore: {d} calls in {d}ms ({d}ns/call)\n", .{ + if (cio.posixGetenv("CODEDB_BENCH_VERBOSE") != null) std.debug.print("\n fuzzyScore: {d} calls in {d}ms ({d}ns/call)\n", .{ total_calls, elapsed / std.time.ns_per_ms, per_call_ns, @@ -63,7 +63,7 @@ test "bench: detectLanguage + isDocLanguage" { } const elapsed: u64 = @intCast(cio.nanoTimestamp() - t0); - std.debug.print("\n detectLanguage+isDocLanguage: {d} calls in {d}ms ({d}ns/call, {d} docs)\n", .{ + if (cio.posixGetenv("CODEDB_BENCH_VERBOSE") != null) std.debug.print("\n detectLanguage+isDocLanguage: {d} calls in {d}ms ({d}ns/call, {d} docs)\n", .{ iterations, elapsed / std.time.ns_per_ms, elapsed / iterations, diff --git a/src/watcher.zig b/src/watcher.zig index 9052f9b..226da1e 100644 --- a/src/watcher.zig +++ b/src/watcher.zig @@ -1171,51 +1171,10 @@ fn shouldSkipFile(path: []const u8) bool { } /// Check if a path refers to a sensitive file (secrets, keys, credentials). -/// Replicates the filter from snapshot.zig so live indexing and snapshots -/// apply the same exclusion rules. Optimized: basename check + early exit. +/// Delegates to snapshot.zig so live indexing and snapshots apply the same +/// exclusion rules from a single implementation. pub fn isSensitivePath(path: []const u8) bool { - const basename = if (std.mem.lastIndexOfScalar(u8, path, '/')) |sep| path[sep + 1 ..] else path; - // Fast path: most source files have extensions like .zig, .ts, .py — none start with '.' - // or match sensitive patterns. Skip the full check for common cases. - if (basename.len == 0) return false; - const first = basename[0]; - // Only check sensitive names if basename starts with '.', 'c', 's', 'i' or has key/cert extension - if (first != '.' and first != 'c' and first != 's' and first != 'i') { - // Still need to check extensions and directory patterns - if (std.mem.endsWith(u8, basename, ".env") or - std.mem.endsWith(u8, basename, ".pem") or - std.mem.endsWith(u8, basename, ".key") or - std.mem.endsWith(u8, basename, ".p12") or - std.mem.endsWith(u8, basename, ".pfx") or - std.mem.endsWith(u8, basename, ".jks")) return true; - if (std.mem.indexOf(u8, path, ".ssh/") != null or - std.mem.indexOf(u8, path, ".gnupg/") != null or - std.mem.indexOf(u8, path, ".aws/") != null) return true; - return false; - } - // .env, .env.; do NOT match .envoy, .envrc, .environment, etc. - if (basename.len >= 4 and std.mem.eql(u8, basename[0..4], ".env") and - (basename.len == 4 or basename[4] == '.' or basename[4] == '-' or basename[4] == '_')) return true; - // Exact matches - const sensitive_names = [_][]const u8{ - ".dev.vars", ".npmrc", ".pypirc", ".netrc", - "credentials.json", "service-account.json", "secrets.json", "secrets.yaml", - "secrets.yml", "id_rsa", "id_ed25519", ".git-credentials", - "id_ecdsa", "id_dsa", "id_ecdsa_sk", "id_ed25519_sk", - }; - for (sensitive_names) |name| { - if (std.mem.eql(u8, basename, name)) return true; - } - if (std.mem.endsWith(u8, basename, ".env") or - std.mem.endsWith(u8, basename, ".pem") or - std.mem.endsWith(u8, basename, ".key") or - std.mem.endsWith(u8, basename, ".p12") or - std.mem.endsWith(u8, basename, ".pfx") or - std.mem.endsWith(u8, basename, ".jks")) return true; - if (std.mem.indexOf(u8, path, ".ssh/") != null or - std.mem.indexOf(u8, path, ".gnupg/") != null or - std.mem.indexOf(u8, path, ".aws/") != null) return true; - return false; + return @import("snapshot.zig").isSensitivePath(path); } fn indexFileContent(io: std.Io, explorer: *Explorer, dir: std.Io.Dir, path: []const u8, allocator: std.mem.Allocator, skip_trigram: bool) !void {