Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 121 additions & 18 deletions src/core/symbol-references.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ import path from 'path';
import { CODEBASE_CONTEXT_DIRNAME, KEYWORD_INDEX_FILENAME } from '../constants/codebase-context.js';
import { IndexCorruptedError } from '../errors/index.js';
import type { UsageLocation } from '../types/index.js';
import { detectLanguage } from '../utils/language-detection.js';
import { findIdentifierOccurrences } from '../utils/tree-sitter.js';

interface IndexedChunk {
content?: unknown;
Expand Down Expand Up @@ -59,6 +61,46 @@ function buildPreview(content: string, lineOffset: number): string {
return previewLines.join('\n').trim();
}

function buildPreviewFromFileLines(lines: string[], line: number): string {
const start = Math.max(0, line - 2);
const end = Math.min(lines.length, line + 1);
return lines.slice(start, end).join('\n').trim();
}

function resolveAbsoluteChunkPath(rootPath: string, chunk: IndexedChunk): string | null {
const resolvedRoot = path.resolve(rootPath);
const isWithinRoot = (candidate: string): boolean => {
const resolvedCandidate = path.resolve(candidate);
const relative = path.relative(resolvedRoot, resolvedCandidate);
return Boolean(relative) && !relative.startsWith('..') && !path.isAbsolute(relative);
};

if (typeof chunk.filePath === 'string' && chunk.filePath.trim()) {
const raw = chunk.filePath.trim();
if (path.isAbsolute(raw)) {
return isWithinRoot(raw) ? raw : null;
}
const resolved = path.resolve(resolvedRoot, raw);
return isWithinRoot(resolved) ? resolved : null;
}

if (typeof chunk.relativePath === 'string' && chunk.relativePath.trim()) {
const resolved = path.resolve(resolvedRoot, chunk.relativePath.trim());
return isWithinRoot(resolved) ? resolved : null;
}

return null;
}

async function fileExists(targetPath: string): Promise<boolean> {
try {
const stat = await fs.stat(targetPath);
return stat.isFile();
} catch {
return false;
}
}

export async function findSymbolReferences(
rootPath: string,
symbol: string,
Expand Down Expand Up @@ -110,34 +152,95 @@ export async function findSymbolReferences(
let usageCount = 0;

const escapedSymbol = escapeRegex(normalizedSymbol);
const prefilter = new RegExp(`\\b${escapedSymbol}\\b`);
const matcher = new RegExp(`\\b${escapedSymbol}\\b`, 'g');

// Prefilter candidate files from the keyword index. We do not trust chunk contents for
// exact reference counting when Tree-sitter is available; chunks only guide which files to scan.
const chunksByFile = new Map<
string,
{ relPath: string; absPath: string | null; chunks: IndexedChunk[] }
>();

for (const chunkRaw of chunks) {
const chunk = chunkRaw as IndexedChunk;
if (typeof chunk.content !== 'string') {
continue;
if (typeof chunk.content !== 'string') continue;
if (!prefilter.test(chunk.content)) continue;

const relPath = getUsageFile(rootPath, chunk);
const absPath = resolveAbsoluteChunkPath(rootPath, chunk);

const entry = chunksByFile.get(relPath);
if (entry) {
entry.chunks.push(chunk);
// Prefer a real absolute path when available
if (!entry.absPath && absPath) {
entry.absPath = absPath;
}
} else {
chunksByFile.set(relPath, { relPath, absPath, chunks: [chunk] });
}
}

const chunkContent = chunk.content;
const startLine = typeof chunk.startLine === 'number' ? chunk.startLine : 1;
matcher.lastIndex = 0;
for (const entry of chunksByFile.values()) {
const relPath = entry.relPath;
const absPath = entry.absPath;

// Preferred: Tree-sitter identifier walk on the real file content.
if (absPath && (await fileExists(absPath))) {
try {
const raw = await fs.readFile(absPath, 'utf-8');
const content = raw.replace(/\r\n/g, '\n');
const language = detectLanguage(absPath);
const occurrences = await findIdentifierOccurrences(content, language, normalizedSymbol);

if (occurrences) {
usageCount += occurrences.length;

if (usages.length < normalizedLimit && occurrences.length > 0) {
const lines = content.split('\n');
for (const occ of occurrences) {
if (usages.length >= normalizedLimit) break;
usages.push({
file: relPath,
line: occ.line,
preview: buildPreviewFromFileLines(lines, occ.line)
});
}
}

continue;
}
} catch {
// Fall through to chunk-regex fallback (missing grammar, parse failure, etc.)
}
}

let match: RegExpExecArray | null;
while ((match = matcher.exec(chunkContent)) !== null) {
usageCount += 1;
// Fallback: regex scan inside the matched chunks (legacy behavior).
for (const chunk of entry.chunks) {
if (typeof chunk.content !== 'string') continue;

if (usages.length >= normalizedLimit) {
continue;
}
const chunkContent = chunk.content;
const startLine = typeof chunk.startLine === 'number' ? chunk.startLine : 1;
matcher.lastIndex = 0;

const prefix = chunkContent.slice(0, match.index);
const lineOffset = prefix.split('\n').length - 1;
let match: RegExpExecArray | null;
while ((match = matcher.exec(chunkContent)) !== null) {
usageCount += 1;

usages.push({
file: getUsageFile(rootPath, chunk),
line: startLine + lineOffset,
preview: buildPreview(chunkContent, lineOffset)
});
if (usages.length >= normalizedLimit) {
continue;
}

const prefix = chunkContent.slice(0, match.index);
const lineOffset = prefix.split('\n').length - 1;

usages.push({
file: relPath,
line: startLine + lineOffset,
preview: buildPreview(chunkContent, lineOffset)
});
}
}
}

Expand Down
135 changes: 135 additions & 0 deletions src/utils/tree-sitter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -381,3 +381,138 @@ export async function extractTreeSitterSymbols(
return null;
}
}

export interface IdentifierOccurrence {
line: number;
startIndex: number;
endIndex: number;
nodeType: string;
}

const IDENTIFIER_NODE_TYPES = [
'identifier',
'type_identifier',
'property_identifier',
'field_identifier',
'shorthand_property_identifier_pattern',
'shorthand_property_identifier',
'jsx_identifier',
'scoped_identifier'
] as const;

const NON_CODE_ANCESTOR_TYPE_FRAGMENTS = [
'comment',
'string',
'template_string',
'regex',
'jsx_text'
] as const;

function isInsideNonCodeContext(node: Node): boolean {
let cursor: Node | null = node;
let depth = 0;
while (cursor && depth < 40) {
const cursorType = cursor.type;
for (const fragment of NON_CODE_ANCESTOR_TYPE_FRAGMENTS) {
if (cursorType.includes(fragment)) {
return true;
}
}
cursor = cursor.parent;
depth += 1;
}
return false;
}

/**
* Find identifier occurrences of `symbol` in `content` using Tree-sitter.
* Returns null when Tree-sitter isn't available/supported, so callers can fall back safely.
*/
export async function findIdentifierOccurrences(
content: string,
language: string,
symbol: string
): Promise<IdentifierOccurrence[] | null> {
const normalizedSymbol = symbol.trim();
if (!normalizedSymbol) {
return [];
}

if (!supportsTreeSitter(language) || !content.trim()) {
return null;
}

if (Buffer.byteLength(content, 'utf8') > MAX_TREE_SITTER_PARSE_BYTES) {
return null;
}

try {
const parser = await getParserForLanguage(language);
setParseTimeout(parser);

let tree: ReturnType<Parser['parse']>;
try {
tree = parser.parse(content);
} catch (error) {
evictParser(language, parser);
throw error;
}

if (!tree) {
evictParser(language, parser);
return null;
}

try {
const hasErrorValue = tree.rootNode.hasError as unknown;
const rootHasError =
typeof hasErrorValue === 'function'
? Boolean((hasErrorValue as () => unknown)())
: Boolean(hasErrorValue);

if (rootHasError) {
return null;
}

const nodes = tree.rootNode.descendantsOfType([...IDENTIFIER_NODE_TYPES]);
const occurrences: IdentifierOccurrence[] = [];
const seen = new Set<string>();

for (const node of nodes) {
if (!node || !node.isNamed) continue;
if (node.text !== normalizedSymbol) continue;
if (isInsideNonCodeContext(node)) continue;

const occ: IdentifierOccurrence = {
line: node.startPosition.row + 1,
startIndex: node.startIndex,
endIndex: node.endIndex,
nodeType: node.type
};
const key = `${occ.line}:${occ.startIndex}:${occ.endIndex}:${occ.nodeType}`;
if (seen.has(key)) continue;
seen.add(key);
occurrences.push(occ);
}

occurrences.sort((a, b) => {
if (a.line !== b.line) return a.line - b.line;
return a.startIndex - b.startIndex;
});

return occurrences;
} finally {
tree.delete();
}
} catch (error) {
evictParser(language);

if (isTreeSitterDebugEnabled()) {
console.error(
`[DEBUG] Tree-sitter identifier occurrence scan failed for '${language}':`,
error instanceof Error ? error.message : String(error)
);
}
return null;
}
}
Loading
Loading