-
Notifications
You must be signed in to change notification settings - Fork 67
fix: apply compilation.exclude patterns during primitive discovery #476
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Coolomina
wants to merge
1
commit into
microsoft:main
from
Coolomina:fix/exclude-primitive-discovery
Closed
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,9 +1,10 @@ | ||
| """Discovery functionality for primitive files.""" | ||
|
|
||
| import fnmatch | ||
| import os | ||
| import glob | ||
| from pathlib import Path | ||
| from typing import List, Dict | ||
| from typing import List, Dict, Optional | ||
|
|
||
| from .models import PrimitiveCollection | ||
| from .parser import parse_primitive_file, parse_skill_file | ||
|
|
@@ -52,55 +53,60 @@ | |
| } | ||
|
|
||
|
|
||
| def discover_primitives(base_dir: str = ".") -> PrimitiveCollection: | ||
| def discover_primitives(base_dir: str = ".", exclude_patterns: Optional[List[str]] = None) -> PrimitiveCollection: | ||
| """Find all APM primitive files in the project. | ||
|
|
||
| Searches for .chatmode.md, .instructions.md, .context.md, .memory.md files | ||
| in both .apm/ and .github/ directory structures, plus SKILL.md at root. | ||
|
|
||
| Args: | ||
| base_dir (str): Base directory to search in. Defaults to current directory. | ||
|
|
||
| exclude_patterns: Glob patterns for paths to exclude from discovery. | ||
|
|
||
| Returns: | ||
| PrimitiveCollection: Collection of discovered and parsed primitives. | ||
| """ | ||
| collection = PrimitiveCollection() | ||
|
|
||
| base_path = Path(base_dir) | ||
|
|
||
| # Find and parse files for each primitive type | ||
| for primitive_type, patterns in LOCAL_PRIMITIVE_PATTERNS.items(): | ||
| files = find_primitive_files(base_dir, patterns) | ||
|
|
||
| for file_path in files: | ||
| if _matches_exclude_patterns(file_path, base_path, exclude_patterns): | ||
| continue | ||
| try: | ||
| primitive = parse_primitive_file(file_path, source="local") | ||
| collection.add_primitive(primitive) | ||
| except Exception as e: | ||
| print(f"Warning: Failed to parse {file_path}: {e}") | ||
|
|
||
| # Discover SKILL.md at project root | ||
| _discover_local_skill(base_dir, collection) | ||
|
|
||
| return collection | ||
|
|
||
|
|
||
| def discover_primitives_with_dependencies(base_dir: str = ".") -> PrimitiveCollection: | ||
| def discover_primitives_with_dependencies(base_dir: str = ".", exclude_patterns: Optional[List[str]] = None) -> PrimitiveCollection: | ||
| """Enhanced primitive discovery including dependency sources. | ||
|
|
||
| Priority Order: | ||
| 1. Local .apm/ (highest priority - always wins) | ||
| 2. Dependencies in declaration order (first declared wins) | ||
| 3. Plugins (lowest priority) | ||
|
|
||
| Args: | ||
| base_dir (str): Base directory to search in. Defaults to current directory. | ||
|
|
||
| exclude_patterns: Glob patterns for paths to exclude from discovery. | ||
|
|
||
| Returns: | ||
| PrimitiveCollection: Collection of discovered and parsed primitives with source tracking. | ||
| """ | ||
| collection = PrimitiveCollection() | ||
|
|
||
| # Phase 1: Local primitives (highest priority) | ||
| scan_local_primitives(base_dir, collection) | ||
| scan_local_primitives(base_dir, collection, exclude_patterns=exclude_patterns) | ||
|
|
||
| # Phase 1b: Local SKILL.md | ||
| _discover_local_skill(base_dir, collection) | ||
|
|
@@ -113,27 +119,30 @@ def discover_primitives_with_dependencies(base_dir: str = ".") -> PrimitiveColle | |
| return collection | ||
|
|
||
|
|
||
| def scan_local_primitives(base_dir: str, collection: PrimitiveCollection) -> None: | ||
| def scan_local_primitives(base_dir: str, collection: PrimitiveCollection, exclude_patterns: Optional[List[str]] = None) -> None: | ||
| """Scan local .apm/ directory for primitives. | ||
|
|
||
| Args: | ||
| base_dir (str): Base directory to search in. | ||
| collection (PrimitiveCollection): Collection to add primitives to. | ||
| exclude_patterns: Glob patterns for paths to exclude from discovery. | ||
| """ | ||
| # Find and parse files for each primitive type | ||
| for primitive_type, patterns in LOCAL_PRIMITIVE_PATTERNS.items(): | ||
| files = find_primitive_files(base_dir, patterns) | ||
|
|
||
| # Filter out files from apm_modules to avoid conflicts with dependency scanning | ||
| local_files = [] | ||
| base_path = Path(base_dir) | ||
| apm_modules_path = base_path / "apm_modules" | ||
|
|
||
| for file_path in files: | ||
| # Only include files that are NOT in apm_modules directory | ||
| if not _is_under_directory(file_path, apm_modules_path): | ||
| local_files.append(file_path) | ||
|
|
||
| if _is_under_directory(file_path, apm_modules_path): | ||
| continue | ||
| if _matches_exclude_patterns(file_path, base_path, exclude_patterns): | ||
| continue | ||
| local_files.append(file_path) | ||
|
|
||
| for file_path in local_files: | ||
| try: | ||
| primitive = parse_primitive_file(file_path, source="local") | ||
|
|
@@ -144,11 +153,11 @@ def scan_local_primitives(base_dir: str, collection: PrimitiveCollection) -> Non | |
|
|
||
| def _is_under_directory(file_path: Path, directory: Path) -> bool: | ||
| """Check if a file path is under a specific directory. | ||
|
|
||
| Args: | ||
| file_path (Path): Path to check. | ||
| directory (Path): Directory to check against. | ||
|
|
||
| Returns: | ||
| bool: True if file_path is under directory, False otherwise. | ||
| """ | ||
|
|
@@ -159,6 +168,105 @@ def _is_under_directory(file_path: Path, directory: Path) -> bool: | |
| return False | ||
|
|
||
|
|
||
| def _matches_exclude_patterns(file_path: Path, base_dir: Path, exclude_patterns: Optional[List[str]] = None) -> bool: | ||
| """Check if a file path matches any exclusion pattern. | ||
|
|
||
| Args: | ||
| file_path: Absolute path to check. | ||
| base_dir: Base directory for computing relative paths. | ||
| exclude_patterns: Glob patterns to match against. | ||
|
|
||
| Returns: | ||
| True if file should be excluded, False otherwise. | ||
| """ | ||
| if not exclude_patterns: | ||
| return False | ||
|
|
||
| try: | ||
| rel_path_str = str(file_path.resolve().relative_to(base_dir.resolve())).replace(os.sep, '/') | ||
| except ValueError: | ||
| return False | ||
|
|
||
| for pattern in exclude_patterns: | ||
| normalized = pattern.replace('\\', '/').replace(os.sep, '/') | ||
| if _matches_pattern(rel_path_str, normalized): | ||
| return True | ||
|
|
||
| return False | ||
|
|
||
|
|
||
| def _matches_pattern(rel_path_str: str, pattern: str) -> bool: | ||
| """Check if a relative path matches an exclusion pattern. | ||
|
|
||
| Mirrors ContextOptimizer._matches_pattern semantics so that | ||
| discovery and optimization exclude behavior stays consistent. | ||
|
|
||
| Args: | ||
| rel_path_str: Forward-slash-normalized path relative to base_dir. | ||
| pattern: Forward-slash-normalized exclusion pattern (glob syntax). | ||
|
|
||
| Returns: | ||
| True if path matches pattern, False otherwise. | ||
| """ | ||
| # Handle ** patterns with recursive matching | ||
| if '**' in pattern: | ||
| path_parts = rel_path_str.split('/') | ||
| pattern_parts = pattern.split('/') | ||
| # Check the full path and each parent directory, since the optimizer | ||
| # matches directories (which get pruned) while we match file paths | ||
| for i in range(len(path_parts), 0, -1): | ||
| if _match_glob_recursive(path_parts[:i], pattern_parts): | ||
| return True | ||
| return False | ||
|
|
||
| # Simple fnmatch for patterns without ** | ||
| if fnmatch.fnmatch(rel_path_str, pattern): | ||
| return True | ||
|
|
||
| # Directory-prefix matching (e.g. "tmp/" or "tmp" matching "tmp/foo/bar") | ||
| if pattern.endswith('/'): | ||
| if rel_path_str.startswith(pattern) or rel_path_str == pattern.rstrip('/'): | ||
| return True | ||
| else: | ||
| if rel_path_str.startswith(pattern + '/') or rel_path_str == pattern: | ||
| return True | ||
|
|
||
| return False | ||
|
|
||
|
|
||
| def _match_glob_recursive(path_parts: list, pattern_parts: list) -> bool: | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dupped, I know, but extracting it and refactor the code felt out of the scope of this bug. |
||
| """Recursively match path parts against pattern parts with ** support. | ||
|
|
||
| Mirrors ContextOptimizer._match_glob_recursive. | ||
|
|
||
| Args: | ||
| path_parts: List of path components. | ||
| pattern_parts: List of pattern components. | ||
|
|
||
| Returns: | ||
| True if path matches pattern, False otherwise. | ||
| """ | ||
| if not pattern_parts: | ||
| return not path_parts | ||
|
|
||
| if not path_parts: | ||
| return all(p == '**' or p == '' for p in pattern_parts) | ||
|
|
||
| pattern_part = pattern_parts[0] | ||
|
|
||
| if pattern_part == '**': | ||
| # ** matches zero or more directories | ||
| if _match_glob_recursive(path_parts, pattern_parts[1:]): | ||
| return True | ||
| if _match_glob_recursive(path_parts[1:], pattern_parts): | ||
| return True | ||
| return False | ||
| else: | ||
| if fnmatch.fnmatch(path_parts[0], pattern_part): | ||
| return _match_glob_recursive(path_parts[1:], pattern_parts[1:]) | ||
| return False | ||
|
|
||
|
|
||
| def scan_dependency_primitives(base_dir: str, collection: PrimitiveCollection) -> None: | ||
| """Scan all dependencies in apm_modules/ with priority handling. | ||
|
|
||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,126 @@ | ||
| """Tests for exclude pattern filtering during primitive discovery. | ||
|
|
||
| Mirrors TestDirectoryExclusion in test_context_optimizer.py to ensure | ||
| discovery and optimization exclude behavior stays consistent. | ||
| """ | ||
|
|
||
| import tempfile | ||
| from pathlib import Path | ||
|
|
||
| import pytest | ||
|
|
||
| from apm_cli.primitives.discovery import ( | ||
| _match_glob_recursive, | ||
| _matches_exclude_patterns, | ||
| _matches_pattern, | ||
| ) | ||
|
|
||
|
|
||
| class TestMatchesPattern: | ||
| """Test _matches_pattern against all pattern styles from ContextOptimizer.""" | ||
|
|
||
| def test_simple_directory_name(self): | ||
| assert _matches_pattern("tmp/foo/bar.md", "tmp") | ||
| assert _matches_pattern("tmp", "tmp") | ||
| assert not _matches_pattern("src/foo.md", "tmp") | ||
|
|
||
| def test_trailing_slash(self): | ||
| assert _matches_pattern("tmp/foo/bar.md", "tmp/") | ||
| assert _matches_pattern("tmp", "tmp/") | ||
| assert not _matches_pattern("src/foo.md", "tmp/") | ||
|
|
||
| def test_glob_star_star_suffix(self): | ||
| assert _matches_pattern("docs/labs/foo.md", "docs/**") | ||
| assert _matches_pattern("docs/a/b/c.md", "docs/**") | ||
| assert not _matches_pattern("src/docs/foo.md", "docs/**") | ||
|
|
||
| def test_glob_star_star_prefix(self): | ||
| assert _matches_pattern("test-fixtures/foo.md", "**/test-fixtures") | ||
| assert _matches_pattern("src/test-fixtures/foo.md", "**/test-fixtures") | ||
| assert not _matches_pattern("src/foo.md", "**/test-fixtures") | ||
|
|
||
| def test_glob_star_star_both(self): | ||
| assert _matches_pattern("node_modules/pkg/index.js", "**/node_modules/**") | ||
| assert _matches_pattern("src/node_modules/pkg.js", "**/node_modules/**") | ||
| assert not _matches_pattern("src/foo.js", "**/node_modules/**") | ||
|
|
||
| def test_nested_directory(self): | ||
| assert _matches_pattern("projects/packages/apm/src/foo.py", "projects/packages/apm") | ||
| assert _matches_pattern("projects/packages/apm", "projects/packages/apm") | ||
| assert not _matches_pattern("projects/packages/other/foo.py", "projects/packages/apm") | ||
|
|
||
| def test_fnmatch_wildcards(self): | ||
| assert _matches_pattern("tmp123/foo.md", "tmp*") | ||
| assert _matches_pattern("mycache/foo.md", "*cache*") | ||
| assert not _matches_pattern("src/foo.md", "tmp*") | ||
|
|
||
| def test_coverage_star_star_suffix(self): | ||
| assert _matches_pattern("coverage/report/index.html", "coverage/**") | ||
| assert _matches_pattern("coverage", "coverage/**") | ||
| assert not _matches_pattern("src/coverage.py", "coverage/**") | ||
|
|
||
|
|
||
| class TestMatchGlobRecursive: | ||
| """Test _match_glob_recursive edge cases.""" | ||
|
|
||
| def test_empty_pattern_empty_path(self): | ||
| assert _match_glob_recursive([], []) | ||
|
|
||
| def test_star_star_matches_zero_dirs(self): | ||
| assert _match_glob_recursive(["foo.md"], ["**", "foo.md"]) | ||
|
|
||
| def test_star_star_matches_multiple_dirs(self): | ||
| assert _match_glob_recursive( | ||
| ["a", "b", "c", "foo.md"], ["**", "foo.md"] | ||
| ) | ||
|
|
||
| def test_trailing_star_star(self): | ||
| assert _match_glob_recursive(["docs", "a", "b"], ["docs", "**"]) | ||
|
|
||
| def test_no_match(self): | ||
| assert not _match_glob_recursive(["src", "foo.py"], ["docs", "**"]) | ||
|
|
||
|
|
||
| class TestMatchesExcludePatterns: | ||
| """Test _matches_exclude_patterns integration with file paths.""" | ||
|
|
||
| def test_no_patterns(self): | ||
| assert not _matches_exclude_patterns(Path("/p/foo.md"), Path("/p"), None) | ||
| assert not _matches_exclude_patterns(Path("/p/foo.md"), Path("/p"), []) | ||
|
|
||
| def test_path_outside_base_dir(self): | ||
| assert not _matches_exclude_patterns( | ||
| Path("/other/docs/foo.md"), Path("/project"), ["docs/**"] | ||
| ) | ||
|
|
||
| def test_filters_with_real_files(self): | ||
| with tempfile.TemporaryDirectory() as tmpdir: | ||
| base = Path(tmpdir) | ||
| (base / "docs" / "labs").mkdir(parents=True) | ||
| (base / "src").mkdir() | ||
| doc_file = base / "docs" / "labs" / "example.instructions.md" | ||
| src_file = base / "src" / "real.instructions.md" | ||
| doc_file.touch() | ||
| src_file.touch() | ||
|
|
||
| patterns = ["docs/**"] | ||
| assert _matches_exclude_patterns(doc_file, base, patterns) | ||
| assert not _matches_exclude_patterns(src_file, base, patterns) | ||
|
|
||
| def test_multiple_patterns(self): | ||
| with tempfile.TemporaryDirectory() as tmpdir: | ||
| base = Path(tmpdir) | ||
| (base / "docs").mkdir() | ||
| (base / "tmp").mkdir() | ||
| (base / "src").mkdir() | ||
| doc = base / "docs" / "foo.md" | ||
| tmp = base / "tmp" / "bar.md" | ||
| src = base / "src" / "real.md" | ||
| doc.touch() | ||
| tmp.touch() | ||
| src.touch() | ||
|
|
||
| patterns = ["docs/**", "tmp"] | ||
| assert _matches_exclude_patterns(doc, base, patterns) | ||
| assert _matches_exclude_patterns(tmp, base, patterns) | ||
| assert not _matches_exclude_patterns(src, base, patterns) |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.