Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/snapshot.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1298,9 +1298,9 @@ fn parseJsonU64(json: []const u8, key: []const u8) ?u64 {
}

/// Returns true for secret/credential paths that must never be persisted to a
/// snapshot or live-indexed. Kept in lockstep with `watcher.isSensitivePath`;
/// the two are parity-tested in test_snapshot.zig ("issue-528: isSensitivePath
/// parity") so any future drift in this security filter fails CI.
/// snapshot or live-indexed. Single implementation of this security filter;
/// `watcher.isSensitivePath` delegates here (parity-tested in test_snapshot.zig
/// "issue-528: isSensitivePath parity").
pub fn isSensitivePath(path: []const u8) bool {
const basename = if (std.mem.lastIndexOfScalar(u8, path, '/')) |sep| path[sep + 1 ..] else path;
// Fast path: most source files have extensions like .zig, .ts, .py — none start with '.'
Expand Down
4 changes: 2 additions & 2 deletions src/test_bench.zig
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ test "bench: fuzzyScore throughput" {
const elapsed: u64 = @intCast(cio.nanoTimestamp() - t0);
const total_calls = iterations * queries.len;
const per_call_ns = elapsed / total_calls;
std.debug.print("\n fuzzyScore: {d} calls in {d}ms ({d}ns/call)\n", .{
if (cio.posixGetenv("CODEDB_BENCH_VERBOSE") != null) std.debug.print("\n fuzzyScore: {d} calls in {d}ms ({d}ns/call)\n", .{
total_calls,
elapsed / std.time.ns_per_ms,
per_call_ns,
Expand Down Expand Up @@ -63,7 +63,7 @@ test "bench: detectLanguage + isDocLanguage" {
}

const elapsed: u64 = @intCast(cio.nanoTimestamp() - t0);
std.debug.print("\n detectLanguage+isDocLanguage: {d} calls in {d}ms ({d}ns/call, {d} docs)\n", .{
if (cio.posixGetenv("CODEDB_BENCH_VERBOSE") != null) std.debug.print("\n detectLanguage+isDocLanguage: {d} calls in {d}ms ({d}ns/call, {d} docs)\n", .{
iterations,
elapsed / std.time.ns_per_ms,
elapsed / iterations,
Expand Down
47 changes: 3 additions & 44 deletions src/watcher.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1171,51 +1171,10 @@ fn shouldSkipFile(path: []const u8) bool {
}

/// Check if a path refers to a sensitive file (secrets, keys, credentials).
/// Replicates the filter from snapshot.zig so live indexing and snapshots
/// apply the same exclusion rules. Optimized: basename check + early exit.
/// Delegates to snapshot.zig so live indexing and snapshots apply the same
/// exclusion rules from a single implementation.
pub fn isSensitivePath(path: []const u8) bool {
const basename = if (std.mem.lastIndexOfScalar(u8, path, '/')) |sep| path[sep + 1 ..] else path;
// Fast path: most source files have extensions like .zig, .ts, .py — none start with '.'
// or match sensitive patterns. Skip the full check for common cases.
if (basename.len == 0) return false;
const first = basename[0];
// Only check sensitive names if basename starts with '.', 'c', 's', 'i' or has key/cert extension
if (first != '.' and first != 'c' and first != 's' and first != 'i') {
// Still need to check extensions and directory patterns
if (std.mem.endsWith(u8, basename, ".env") or
std.mem.endsWith(u8, basename, ".pem") or
std.mem.endsWith(u8, basename, ".key") or
std.mem.endsWith(u8, basename, ".p12") or
std.mem.endsWith(u8, basename, ".pfx") or
std.mem.endsWith(u8, basename, ".jks")) return true;
if (std.mem.indexOf(u8, path, ".ssh/") != null or
std.mem.indexOf(u8, path, ".gnupg/") != null or
std.mem.indexOf(u8, path, ".aws/") != null) return true;
return false;
}
// .env, .env.<token>; do NOT match .envoy, .envrc, .environment, etc.
if (basename.len >= 4 and std.mem.eql(u8, basename[0..4], ".env") and
(basename.len == 4 or basename[4] == '.' or basename[4] == '-' or basename[4] == '_')) return true;
// Exact matches
const sensitive_names = [_][]const u8{
".dev.vars", ".npmrc", ".pypirc", ".netrc",
"credentials.json", "service-account.json", "secrets.json", "secrets.yaml",
"secrets.yml", "id_rsa", "id_ed25519", ".git-credentials",
"id_ecdsa", "id_dsa", "id_ecdsa_sk", "id_ed25519_sk",
};
for (sensitive_names) |name| {
if (std.mem.eql(u8, basename, name)) return true;
}
if (std.mem.endsWith(u8, basename, ".env") or
std.mem.endsWith(u8, basename, ".pem") or
std.mem.endsWith(u8, basename, ".key") or
std.mem.endsWith(u8, basename, ".p12") or
std.mem.endsWith(u8, basename, ".pfx") or
std.mem.endsWith(u8, basename, ".jks")) return true;
if (std.mem.indexOf(u8, path, ".ssh/") != null or
std.mem.indexOf(u8, path, ".gnupg/") != null or
std.mem.indexOf(u8, path, ".aws/") != null) return true;
return false;
return @import("snapshot.zig").isSensitivePath(path);
}

fn indexFileContent(io: std.Io, explorer: *Explorer, dir: std.Io.Dir, path: []const u8, allocator: std.mem.Allocator, skip_trigram: bool) !void {
Expand Down
Loading