From e94b21dcd7c77e668da9e5c9cadc57ba67a80504 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Mar 2026 19:27:10 +0000 Subject: [PATCH 1/5] Initial plan From e1841852be209ed76ed7bf3d4dbe5c71e3f1f3ea Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Mar 2026 19:46:13 +0000 Subject: [PATCH 2/5] feat: implement AST-based semantic file hashing for change detection Use ast-grep to parse supported language files (25+ languages) and compute hashes from the canonical AST representation instead of raw file bytes. This ignores comments, whitespace, and formatting changes so that only genuine semantic modifications trigger re-indexing. Changes: - Add compute_semantic_file_hash() and helpers to discovery.py - Update DiscoveredFile.__init__, from_path, and file_hash property - Update indexing_service._process_discovery_batch to use semantic hashing - Add comprehensive unit tests (23 tests) for semantic hashing Co-authored-by: bashandbone <89049923+bashandbone@users.noreply.github.com> --- src/codeweaver/core/discovery.py | 116 ++++++- .../engine/services/indexing_service.py | 3 +- tests/unit/core/test_semantic_hashing.py | 285 ++++++++++++++++++ 3 files changed, 391 insertions(+), 13 deletions(-) create mode 100644 tests/unit/core/test_semantic_hashing.py diff --git a/src/codeweaver/core/discovery.py b/src/codeweaver/core/discovery.py index 3e80372d..0ae8d73f 100644 --- a/src/codeweaver/core/discovery.py +++ b/src/codeweaver/core/discovery.py @@ -29,9 +29,9 @@ from codeweaver.core import BasedModel, ResolvedProjectPathDep from codeweaver.core.chunks import CodeChunk from codeweaver.core.di import INJECTED -from codeweaver.core.language import is_semantic_config_ext +from codeweaver.core.language import SemanticSearchLanguage, is_semantic_config_ext from codeweaver.core.metadata import ExtCategory -from codeweaver.core.types import MISSING, BlakeHashKey, BlakeKey, Missing +from codeweaver.core.types import MISSING, BlakeHashKey, BlakeKey, FileExt, LiteralStringT, Missing from codeweaver.core.utils import ( get_blake_hash, get_git_branch, @@ -56,6 +56,89 @@ logger = logging.getLogger(__name__) +def _walk_ast_nodes(node: Any, parts: list[str]) -> None: + """Walk AST tree and collect semantic tokens, excluding comments. + + Traverses the full AST including both named and unnamed nodes to capture + all semantically meaningful content (identifiers, operators, keywords, etc.) + while skipping comment nodes. + """ + kind: str = node.kind() + if "comment" in kind.lower(): + return + children = node.children() + if not children: + parts.append(f"{kind}:{node.text()}") + else: + parts.append(kind) + for child in children: + _walk_ast_nodes(child, parts) + + +def _compute_ast_hash(content: str, language_name: str) -> BlakeHashKey | None: + """Compute a blake3 hash from the AST representation, excluding comments. + + Parse the content using ast-grep and build a canonical string from the AST + node kinds and leaf text values. Comment nodes are excluded so that comment-only + changes do not alter the hash. Whitespace and formatting are inherently normalized + by the AST since they are not represented as tree nodes. + + Return None if parsing fails or produces no meaningful nodes. + """ + try: + from ast_grep_py import SgRoot + + root = SgRoot(content, language_name) + node = root.root() + parts: list[str] = [] + _walk_ast_nodes(node, parts) + if not parts: + return None + canonical = "\n".join(parts) + return get_blake_hash(canonical) + except (KeyboardInterrupt, SystemExit): + raise + except BaseException: + return None + + +def _get_semantic_language( + file_path: Path, ext_category: ExtCategory | None = None +) -> SemanticSearchLanguage | None: + """Return the SemanticSearchLanguage for a file, or None if unsupported.""" + if ext_category and isinstance(ext_category.language, SemanticSearchLanguage): + return ext_category.language + return SemanticSearchLanguage.from_extension( + FileExt(cast(LiteralStringT, file_path.suffix or file_path.name)) + ) + + +def compute_semantic_file_hash( + content_bytes: bytes, + file_path: Path, + *, + ext_category: ExtCategory | None = None, +) -> BlakeHashKey: + """Compute a file hash using AST-based hashing for supported semantic languages. + + For files with a supported AST language (Python, JavaScript, etc.), parse the file + to an AST and hash the canonical tree representation. This ignores comments, + whitespace, and formatting changes so that only genuine semantic modifications + trigger a different hash. + + Fall back to a raw content blake3 hash for unsupported languages or when AST + parsing fails. + """ + if language := _get_semantic_language(file_path, ext_category): + try: + content_str = content_bytes.decode("utf-8", errors="replace") + if ast_hash := _compute_ast_hash(content_str, language.variable): + return ast_hash + except Exception: + logger.debug("AST hashing failed for %s, falling back to content hash", file_path) + return get_blake_hash(content_bytes) + + def _get_git_branch(path: Path) -> str | None: """Get the git branch for the given path, if available.""" try: @@ -84,7 +167,7 @@ class DiscoveredFile(BasedModel): _file_hash: Annotated[ BlakeHashKey | None, Field( - description="blake3 hash of the file contents. File hashes are from non-normalized content, so two files with different line endings, white spaces, unicode characters, etc. will have different hashes." + description="blake3 hash of the file. For files with a supported AST language, the hash is computed from the canonical AST representation, ignoring comments, whitespace, and formatting. For other files, the hash is computed from the raw content bytes." ), ] = None _git_branch: Annotated[ @@ -125,14 +208,17 @@ def __init__( """Initialize DiscoveredFile with optional file_hash and git_branch.""" object.__setattr__(self, "path", path) object.__setattr__(self, "project_path", project_path) - if ext_category: - object.__setattr__(self, "ext_category", ext_category) - else: - object.__setattr__(self, "ext_category", ExtCategory.from_file(path)) + resolved_ext = ext_category or ExtCategory.from_file(path) + object.__setattr__(self, "ext_category", resolved_ext) if file_hash: object.__setattr__(self, "_file_hash", file_hash) elif path.is_file(): - object.__setattr__(self, "_file_hash", get_blake_hash(path.read_bytes())) + content_bytes = path.read_bytes() + object.__setattr__( + self, + "_file_hash", + compute_semantic_file_hash(content_bytes, path, ext_category=resolved_ext), + ) else: object.__setattr__(self, "_file_hash", None) if git_branch and git_branch is not MISSING: @@ -179,7 +265,10 @@ def from_path( """Create a DiscoveredFile from a file path.""" branch = get_git_branch(path if path.is_dir() else path.parent) or "main" if ext_category := ExtCategory.from_file(path): - new_hash = get_blake_hash(path.read_bytes()) + content_bytes = path.read_bytes() + new_hash = compute_semantic_file_hash( + content_bytes, path, ext_category=ext_category + ) if file_hash and new_hash != file_hash: logger.warning( "Provided file_hash does not match computed hash for %s. Using computed hash.", @@ -240,11 +329,14 @@ def size(self) -> NonNegativeInt: @computed_field @property def file_hash(self) -> BlakeHashKey: - """Return the blake3 hash of the file contents, if available.""" + """Return the blake3 hash of the file, using AST-based hashing when supported.""" if self._file_hash is not None: return self._file_hash if self.path.exists() and self.path.is_file(): - content_hash = get_blake_hash(self.path.read_bytes()) + content_bytes = self.path.read_bytes() + content_hash = compute_semantic_file_hash( + content_bytes, self.path, ext_category=self.ext_category + ) with contextlib.suppress(Exception): object.__setattr__(self, "_file_hash", content_hash) return content_hash @@ -325,4 +417,4 @@ def normalize_content(content: str | bytes | bytearray) -> str: return sanitize_unicode(content) -__all__ = ("DiscoveredFile",) +__all__ = ("DiscoveredFile", "compute_semantic_file_hash") diff --git a/src/codeweaver/engine/services/indexing_service.py b/src/codeweaver/engine/services/indexing_service.py index 1b6f0368..6ba6d2ce 100644 --- a/src/codeweaver/engine/services/indexing_service.py +++ b/src/codeweaver/engine/services/indexing_service.py @@ -24,6 +24,7 @@ PRIMARY_SPARSE_VECTOR_NAME, ZERO, ) +from codeweaver.core.discovery import compute_semantic_file_hash from codeweaver.providers import EmbeddingRegistryDep @@ -312,7 +313,7 @@ def _hash_batch(paths: list[Path]) -> list[tuple[Path, bytes]]: continue seen_files.add(relative_path) - current_hash = get_blake_hash(content_bytes) + current_hash = compute_semantic_file_hash(content_bytes, path) if not self._file_manifest: self._file_manifest = self._manifest_manager.create_new() needs_reindex, _ = self._file_manifest.file_needs_reindexing( diff --git a/tests/unit/core/test_semantic_hashing.py b/tests/unit/core/test_semantic_hashing.py new file mode 100644 index 00000000..522e35f8 --- /dev/null +++ b/tests/unit/core/test_semantic_hashing.py @@ -0,0 +1,285 @@ +# SPDX-FileCopyrightText: 2026 Knitli Inc. +# +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Unit tests for AST-based semantic file hashing.""" + +from pathlib import Path + +import pytest + +from codeweaver.core.discovery import ( + DiscoveredFile, + _compute_ast_hash, + _get_semantic_language, + compute_semantic_file_hash, +) +from codeweaver.core.metadata import ExtCategory +from codeweaver.core.utils import get_blake_hash + + +pytestmark = [pytest.mark.unit] + + +@pytest.fixture +def temp_project(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + """Provide a temporary project directory with env var and CWD set.""" + project_dir = tmp_path / "project" + project_dir.mkdir() + monkeypatch.setenv("CODEWEAVER_PROJECT_PATH", str(project_dir)) + monkeypatch.chdir(project_dir) + return project_dir + + +# --------------------------------------------------------------------------- +# _compute_ast_hash +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +class TestComputeAstHash: + """Test the low-level _compute_ast_hash function.""" + + def test_returns_hash_for_valid_python(self) -> None: + """Produce a hash for syntactically valid Python code.""" + result = _compute_ast_hash("def foo(): pass", "python") + assert result is not None + assert len(result) == 64 # blake3 hex digest + + def test_returns_none_for_empty_content(self) -> None: + """Return None when parsing produces no meaningful nodes.""" + result = _compute_ast_hash("", "python") + # Empty source still produces a 'module' root node + # which is acceptable - the hash just captures the empty structure + assert result is None or len(result) == 64 + + def test_returns_none_for_unsupported_language(self) -> None: + """Return None when the language is not supported by ast-grep.""" + result = _compute_ast_hash("some content", "nonexistent_language_xyz") + assert result is None + + def test_comment_changes_same_hash(self) -> None: + """Produce identical hashes when only comments differ.""" + code_with_comment = "# This is a comment\ndef add(a, b):\n return a + b\n" + code_no_comment = "def add(a, b):\n return a + b\n" + code_different_comment = "# Different comment\ndef add(a, b):\n return a + b\n" + + hash1 = _compute_ast_hash(code_with_comment, "python") + hash2 = _compute_ast_hash(code_no_comment, "python") + hash3 = _compute_ast_hash(code_different_comment, "python") + + assert hash1 == hash2 + assert hash2 == hash3 + + def test_whitespace_changes_same_hash(self) -> None: + """Produce identical hashes when only whitespace/formatting differs.""" + code_compact = "def add(a,b):\n return a+b\n" + code_spaced = "def add(a, b):\n return a + b\n" + + hash1 = _compute_ast_hash(code_compact, "python") + hash2 = _compute_ast_hash(code_spaced, "python") + + assert hash1 == hash2 + + def test_semantic_change_different_hash(self) -> None: + """Produce different hashes when the code logic changes.""" + code_add = "def calc(a, b):\n return a + b\n" + code_mul = "def calc(a, b):\n return a * b\n" + + hash1 = _compute_ast_hash(code_add, "python") + hash2 = _compute_ast_hash(code_mul, "python") + + assert hash1 != hash2 + + def test_identifier_change_different_hash(self) -> None: + """Produce different hashes when an identifier is renamed.""" + code_a = "def foo(x):\n return x\n" + code_b = "def bar(x):\n return x\n" + + assert _compute_ast_hash(code_a, "python") != _compute_ast_hash(code_b, "python") + + def test_javascript_comment_changes_same_hash(self) -> None: + """Comment changes in JavaScript also produce the same hash.""" + js_with_comment = "// a comment\nfunction add(a, b) { return a + b; }\n" + js_no_comment = "function add(a, b) { return a + b; }\n" + js_block_comment = "/* block */\nfunction add(a, b) { return a + b; }\n" + + h1 = _compute_ast_hash(js_with_comment, "javascript") + h2 = _compute_ast_hash(js_no_comment, "javascript") + h3 = _compute_ast_hash(js_block_comment, "javascript") + + assert h1 == h2 + assert h2 == h3 + + def test_docstring_change_different_hash(self) -> None: + """Docstring changes DO alter the hash since docstrings are semantic content.""" + code_a = 'def foo():\n """Docstring A."""\n pass\n' + code_b = 'def foo():\n """Docstring B."""\n pass\n' + + assert _compute_ast_hash(code_a, "python") != _compute_ast_hash(code_b, "python") + + +# --------------------------------------------------------------------------- +# _get_semantic_language +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +class TestGetSemanticLanguage: + """Test language detection for semantic hashing.""" + + def test_python_file(self) -> None: + """Detect Python from a .py file path.""" + lang = _get_semantic_language(Path("foo.py")) + assert lang is not None + assert lang.variable == "python" + + def test_javascript_file(self) -> None: + """Detect JavaScript from a .js file path.""" + lang = _get_semantic_language(Path("app.js")) + assert lang is not None + assert lang.variable == "javascript" + + def test_non_semantic_file(self) -> None: + """Return None for a file without a supported AST language.""" + lang = _get_semantic_language(Path("readme.md")) + assert lang is None + + def test_ext_category_takes_precedence(self) -> None: + """Use the language from ext_category when provided.""" + from codeweaver.core.language import SemanticSearchLanguage + + ext_cat = ExtCategory(language=SemanticSearchLanguage.PYTHON, kind="code") + lang = _get_semantic_language(Path("file.txt"), ext_category=ext_cat) + assert lang is not None + assert lang.variable == "python" + + +# --------------------------------------------------------------------------- +# compute_semantic_file_hash +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +class TestComputeSemanticFileHash: + """Test the public compute_semantic_file_hash function.""" + + def test_python_file_uses_ast_hash(self) -> None: + """Use AST-based hashing for Python content.""" + code = b"def foo(): pass\n" + ast_hash = compute_semantic_file_hash(code, Path("test.py")) + content_hash = get_blake_hash(code) + # AST hash should differ from raw content hash + assert ast_hash != content_hash + + def test_non_semantic_file_uses_content_hash(self) -> None: + """Fall back to raw content hash for non-semantic files.""" + content = b"# just some markdown\n" + result = compute_semantic_file_hash(content, Path("readme.md")) + assert result == get_blake_hash(content) + + def test_comment_only_change_same_hash(self) -> None: + """Semantic hashing ignores comment-only changes in Python.""" + code_v1 = b"# version 1 comment\ndef foo(): pass\n" + code_v2 = b"# version 2 comment\ndef foo(): pass\n" + + assert compute_semantic_file_hash(code_v1, Path("x.py")) == compute_semantic_file_hash( + code_v2, Path("x.py") + ) + + def test_logic_change_different_hash(self) -> None: + """Semantic hashing detects logic changes.""" + code_v1 = b"def foo(a, b): return a + b\n" + code_v2 = b"def foo(a, b): return a - b\n" + + assert compute_semantic_file_hash(code_v1, Path("x.py")) != compute_semantic_file_hash( + code_v2, Path("x.py") + ) + + def test_ext_category_passed_through(self) -> None: + """Use ext_category when explicitly provided.""" + from codeweaver.core.language import SemanticSearchLanguage + + code = b"def bar(): pass\n" + ext = ExtCategory(language=SemanticSearchLanguage.PYTHON, kind="code") + # Should produce AST hash even though path has .txt extension + hash_with_ext = compute_semantic_file_hash(code, Path("file.txt"), ext_category=ext) + hash_without_ext = compute_semantic_file_hash(code, Path("file.txt")) + # Without ext_category, .txt falls back to content hash + assert hash_with_ext != hash_without_ext + + +# --------------------------------------------------------------------------- +# DiscoveredFile integration +# --------------------------------------------------------------------------- + + +@pytest.mark.unit +class TestDiscoveredFileSemanticHash: + """Test that DiscoveredFile uses AST-based hashing for semantic files.""" + + def test_python_file_hash_ignores_comments(self, temp_project: Path) -> None: + """DiscoveredFile produces same hash for Python files differing only in comments.""" + file_v1 = temp_project / "mod_v1.py" + file_v2 = temp_project / "mod_v2.py" + + file_v1.write_text("# old comment\ndef greet(): print('hi')\n") + file_v2.write_text("# new comment\ndef greet(): print('hi')\n") + + df1 = DiscoveredFile.from_path(file_v1) + df2 = DiscoveredFile.from_path(file_v2) + + assert df1 is not None + assert df2 is not None + assert df1.file_hash == df2.file_hash + + def test_python_file_hash_detects_logic_change(self, temp_project: Path) -> None: + """DiscoveredFile produces different hash when Python logic changes.""" + file_v1 = temp_project / "calc_v1.py" + file_v2 = temp_project / "calc_v2.py" + + file_v1.write_text("def add(a, b): return a + b\n") + file_v2.write_text("def add(a, b): return a - b\n") + + df1 = DiscoveredFile.from_path(file_v1) + df2 = DiscoveredFile.from_path(file_v2) + + assert df1 is not None + assert df2 is not None + assert df1.file_hash != df2.file_hash + + def test_non_semantic_file_uses_content_hash(self, temp_project: Path) -> None: + """DiscoveredFile uses raw content hash for non-semantic file types.""" + txt = temp_project / "notes.txt" + txt.write_text("hello world") + + df = DiscoveredFile.from_path(txt) + assert df is not None + assert df.file_hash == get_blake_hash(b"hello world") + + def test_is_same_with_semantic_hash(self, temp_project: Path) -> None: + """is_same returns True for files with same semantics but different comments.""" + file_a = temp_project / "a.py" + file_b = temp_project / "b.py" + + file_a.write_text("# comment A\ndef f(): pass\n") + file_b.write_text("# comment B\ndef f(): pass\n") + + df = DiscoveredFile.from_path(file_a) + assert df is not None + assert df.is_same(file_b) + + def test_formatting_only_change_same_hash(self, temp_project: Path) -> None: + """Formatting-only changes produce the same hash for Python files.""" + file_v1 = temp_project / "fmt_v1.py" + file_v2 = temp_project / "fmt_v2.py" + + file_v1.write_text("def add(a,b):\n return a+b\n") + file_v2.write_text("def add(a, b):\n return a + b\n") + + df1 = DiscoveredFile.from_path(file_v1) + df2 = DiscoveredFile.from_path(file_v2) + + assert df1 is not None + assert df2 is not None + assert df1.file_hash == df2.file_hash From 6092a9f318a165f7d790e198959d3cf2f5824b09 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Mar 2026 19:49:09 +0000 Subject: [PATCH 3/5] fix: add debug logging with exc_info to AST hashing fallback paths Co-authored-by: bashandbone <89049923+bashandbone@users.noreply.github.com> --- src/codeweaver/core/discovery.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/codeweaver/core/discovery.py b/src/codeweaver/core/discovery.py index 0ae8d73f..6c8ecbdc 100644 --- a/src/codeweaver/core/discovery.py +++ b/src/codeweaver/core/discovery.py @@ -99,6 +99,7 @@ def _compute_ast_hash(content: str, language_name: str) -> BlakeHashKey | None: except (KeyboardInterrupt, SystemExit): raise except BaseException: + logger.debug("AST parsing failed for language %s", language_name, exc_info=True) return None @@ -135,7 +136,11 @@ def compute_semantic_file_hash( if ast_hash := _compute_ast_hash(content_str, language.variable): return ast_hash except Exception: - logger.debug("AST hashing failed for %s, falling back to content hash", file_path) + logger.debug( + "AST hashing failed for %s, falling back to content hash", + file_path, + exc_info=True, + ) return get_blake_hash(content_bytes) From d2d313423036a6aa9169e738dbd55205f386c661 Mon Sep 17 00:00:00 2001 From: Adam Poulemanos <89049923+bashandbone@users.noreply.github.com> Date: Thu, 19 Mar 2026 19:52:17 -0400 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Adam Poulemanos <89049923+bashandbone@users.noreply.github.com> --- tests/unit/core/test_semantic_hashing.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/unit/core/test_semantic_hashing.py b/tests/unit/core/test_semantic_hashing.py index 522e35f8..1a8e9abb 100644 --- a/tests/unit/core/test_semantic_hashing.py +++ b/tests/unit/core/test_semantic_hashing.py @@ -46,12 +46,13 @@ def test_returns_hash_for_valid_python(self) -> None: assert result is not None assert len(result) == 64 # blake3 hex digest - def test_returns_none_for_empty_content(self) -> None: - """Return None when parsing produces no meaningful nodes.""" + def test_returns_hash_for_empty_content(self) -> None: + """Produce a hash even when parsing only yields an empty module root.""" result = _compute_ast_hash("", "python") - # Empty source still produces a 'module' root node - # which is acceptable - the hash just captures the empty structure - assert result is None or len(result) == 64 + # Empty source still produces a 'module' root node, which is acceptable; + # the hash just captures the empty structure. + assert result is not None + assert len(result) == 64 def test_returns_none_for_unsupported_language(self) -> None: """Return None when the language is not supported by ast-grep.""" From ed23aa2140b9d480741577c65080110ab4980ff1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 20 Mar 2026 13:03:39 +0000 Subject: [PATCH 5/5] fix: use absolute_path in file_hash and narrow pyo3 exception catch - file_hash property now uses self.absolute_path for existence checks and reads so relative paths resolve correctly regardless of CWD - _compute_ast_hash catches Exception for standard errors, then catches BaseException only for pyo3_runtime.PanicException identified by module name, re-raising all other non-recoverable errors Co-authored-by: bashandbone <89049923+bashandbone@users.noreply.github.com> --- src/codeweaver/core/discovery.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/codeweaver/core/discovery.py b/src/codeweaver/core/discovery.py index 6c8ecbdc..c1789597 100644 --- a/src/codeweaver/core/discovery.py +++ b/src/codeweaver/core/discovery.py @@ -96,11 +96,18 @@ def _compute_ast_hash(content: str, language_name: str) -> BlakeHashKey | None: return None canonical = "\n".join(parts) return get_blake_hash(canonical) - except (KeyboardInterrupt, SystemExit): - raise - except BaseException: + except Exception: logger.debug("AST parsing failed for language %s", language_name, exc_info=True) return None + except BaseException as exc: + # pyo3_runtime.PanicException (from ast-grep's Rust backend) inherits + # from BaseException, not Exception, and cannot be imported directly. + # Identify it by module name so we degrade gracefully without swallowing + # unrelated non-recoverable errors (GeneratorExit, etc.). + if getattr(type(exc), "__module__", None) == "pyo3_runtime": + logger.debug("AST parsing panicked for language %s", language_name, exc_info=True) + return None + raise def _get_semantic_language( @@ -337,10 +344,11 @@ def file_hash(self) -> BlakeHashKey: """Return the blake3 hash of the file, using AST-based hashing when supported.""" if self._file_hash is not None: return self._file_hash - if self.path.exists() and self.path.is_file(): - content_bytes = self.path.read_bytes() + abs_path = self.absolute_path + if abs_path.exists() and abs_path.is_file(): + content_bytes = abs_path.read_bytes() content_hash = compute_semantic_file_hash( - content_bytes, self.path, ext_category=self.ext_category + content_bytes, abs_path, ext_category=self.ext_category ) with contextlib.suppress(Exception): object.__setattr__(self, "_file_hash", content_hash)