From d1f1abfe1ee1571f0036585f8dff7ed1d31c350b Mon Sep 17 00:00:00 2001 From: Alejandro Colomina Date: Fri, 27 Mar 2026 11:59:02 +0100 Subject: [PATCH] fix: apply compilation.exclude patterns during primitive discovery Propagate exclude patterns from apm.yml to the primitive discovery phase. Previously, patterns were only applied during context optimization, allowing files in excluded directories to leak into compiled output. Add exclude_patterns parameter to discover_primitives(), discover_primitives_with_dependencies(), and scan_local_primitives(). Add _matches_exclude_patterns() helper supporting dir/**, **/dir/**, and fnmatch-style glob patterns. Fixes #475 Co-Authored-By: Claude Opus 4.6 (1M context) --- src/apm_cli/compilation/agents_compiler.py | 4 +- src/apm_cli/primitives/discovery.py | 156 +++++++++++++++--- .../test_agents_compiler_coverage.py | 2 +- .../unit/primitives/test_exclude_patterns.py | 126 ++++++++++++++ 4 files changed, 261 insertions(+), 27 deletions(-) create mode 100644 tests/unit/primitives/test_exclude_patterns.py diff --git a/src/apm_cli/compilation/agents_compiler.py b/src/apm_cli/compilation/agents_compiler.py index a1619648..587cfb4c 100644 --- a/src/apm_cli/compilation/agents_compiler.py +++ b/src/apm_cli/compilation/agents_compiler.py @@ -187,11 +187,11 @@ def compile(self, config: CompilationConfig, primitives: Optional[PrimitiveColle if primitives is None: if config.local_only: # Use basic discovery for local-only mode - primitives = discover_primitives(str(self.base_dir)) + primitives = discover_primitives(str(self.base_dir), exclude_patterns=config.exclude) else: # Use enhanced discovery with dependencies (Task 4 integration) from ..primitives.discovery import discover_primitives_with_dependencies - primitives = discover_primitives_with_dependencies(str(self.base_dir)) + primitives = discover_primitives_with_dependencies(str(self.base_dir), exclude_patterns=config.exclude) # Route to targets based on config.target results: List[CompilationResult] = [] diff --git a/src/apm_cli/primitives/discovery.py b/src/apm_cli/primitives/discovery.py index bc9d97be..eabbd636 100644 --- a/src/apm_cli/primitives/discovery.py +++ b/src/apm_cli/primitives/discovery.py @@ -1,9 +1,10 @@ """Discovery functionality for primitive files.""" +import fnmatch import os import glob from pathlib import Path -from typing import List, Dict +from typing import List, Dict, Optional from .models import PrimitiveCollection from .parser import parse_primitive_file, parse_skill_file @@ -52,55 +53,60 @@ } -def discover_primitives(base_dir: str = ".") -> PrimitiveCollection: +def discover_primitives(base_dir: str = ".", exclude_patterns: Optional[List[str]] = None) -> PrimitiveCollection: """Find all APM primitive files in the project. - + Searches for .chatmode.md, .instructions.md, .context.md, .memory.md files in both .apm/ and .github/ directory structures, plus SKILL.md at root. - + Args: base_dir (str): Base directory to search in. Defaults to current directory. - + exclude_patterns: Glob patterns for paths to exclude from discovery. + Returns: PrimitiveCollection: Collection of discovered and parsed primitives. """ collection = PrimitiveCollection() - + base_path = Path(base_dir) + # Find and parse files for each primitive type for primitive_type, patterns in LOCAL_PRIMITIVE_PATTERNS.items(): files = find_primitive_files(base_dir, patterns) - + for file_path in files: + if _matches_exclude_patterns(file_path, base_path, exclude_patterns): + continue try: primitive = parse_primitive_file(file_path, source="local") collection.add_primitive(primitive) except Exception as e: print(f"Warning: Failed to parse {file_path}: {e}") - + # Discover SKILL.md at project root _discover_local_skill(base_dir, collection) - + return collection -def discover_primitives_with_dependencies(base_dir: str = ".") -> PrimitiveCollection: +def discover_primitives_with_dependencies(base_dir: str = ".", exclude_patterns: Optional[List[str]] = None) -> PrimitiveCollection: """Enhanced primitive discovery including dependency sources. - + Priority Order: 1. Local .apm/ (highest priority - always wins) 2. Dependencies in declaration order (first declared wins) 3. Plugins (lowest priority) - + Args: base_dir (str): Base directory to search in. Defaults to current directory. - + exclude_patterns: Glob patterns for paths to exclude from discovery. + Returns: PrimitiveCollection: Collection of discovered and parsed primitives with source tracking. """ collection = PrimitiveCollection() # Phase 1: Local primitives (highest priority) - scan_local_primitives(base_dir, collection) + scan_local_primitives(base_dir, collection, exclude_patterns=exclude_patterns) # Phase 1b: Local SKILL.md _discover_local_skill(base_dir, collection) @@ -113,27 +119,30 @@ def discover_primitives_with_dependencies(base_dir: str = ".") -> PrimitiveColle return collection -def scan_local_primitives(base_dir: str, collection: PrimitiveCollection) -> None: +def scan_local_primitives(base_dir: str, collection: PrimitiveCollection, exclude_patterns: Optional[List[str]] = None) -> None: """Scan local .apm/ directory for primitives. - + Args: base_dir (str): Base directory to search in. collection (PrimitiveCollection): Collection to add primitives to. + exclude_patterns: Glob patterns for paths to exclude from discovery. """ # Find and parse files for each primitive type for primitive_type, patterns in LOCAL_PRIMITIVE_PATTERNS.items(): files = find_primitive_files(base_dir, patterns) - + # Filter out files from apm_modules to avoid conflicts with dependency scanning local_files = [] base_path = Path(base_dir) apm_modules_path = base_path / "apm_modules" - + for file_path in files: - # Only include files that are NOT in apm_modules directory - if not _is_under_directory(file_path, apm_modules_path): - local_files.append(file_path) - + if _is_under_directory(file_path, apm_modules_path): + continue + if _matches_exclude_patterns(file_path, base_path, exclude_patterns): + continue + local_files.append(file_path) + for file_path in local_files: try: primitive = parse_primitive_file(file_path, source="local") @@ -144,11 +153,11 @@ def scan_local_primitives(base_dir: str, collection: PrimitiveCollection) -> Non def _is_under_directory(file_path: Path, directory: Path) -> bool: """Check if a file path is under a specific directory. - + Args: file_path (Path): Path to check. directory (Path): Directory to check against. - + Returns: bool: True if file_path is under directory, False otherwise. """ @@ -159,6 +168,105 @@ def _is_under_directory(file_path: Path, directory: Path) -> bool: return False +def _matches_exclude_patterns(file_path: Path, base_dir: Path, exclude_patterns: Optional[List[str]] = None) -> bool: + """Check if a file path matches any exclusion pattern. + + Args: + file_path: Absolute path to check. + base_dir: Base directory for computing relative paths. + exclude_patterns: Glob patterns to match against. + + Returns: + True if file should be excluded, False otherwise. + """ + if not exclude_patterns: + return False + + try: + rel_path_str = str(file_path.resolve().relative_to(base_dir.resolve())).replace(os.sep, '/') + except ValueError: + return False + + for pattern in exclude_patterns: + normalized = pattern.replace('\\', '/').replace(os.sep, '/') + if _matches_pattern(rel_path_str, normalized): + return True + + return False + + +def _matches_pattern(rel_path_str: str, pattern: str) -> bool: + """Check if a relative path matches an exclusion pattern. + + Mirrors ContextOptimizer._matches_pattern semantics so that + discovery and optimization exclude behavior stays consistent. + + Args: + rel_path_str: Forward-slash-normalized path relative to base_dir. + pattern: Forward-slash-normalized exclusion pattern (glob syntax). + + Returns: + True if path matches pattern, False otherwise. + """ + # Handle ** patterns with recursive matching + if '**' in pattern: + path_parts = rel_path_str.split('/') + pattern_parts = pattern.split('/') + # Check the full path and each parent directory, since the optimizer + # matches directories (which get pruned) while we match file paths + for i in range(len(path_parts), 0, -1): + if _match_glob_recursive(path_parts[:i], pattern_parts): + return True + return False + + # Simple fnmatch for patterns without ** + if fnmatch.fnmatch(rel_path_str, pattern): + return True + + # Directory-prefix matching (e.g. "tmp/" or "tmp" matching "tmp/foo/bar") + if pattern.endswith('/'): + if rel_path_str.startswith(pattern) or rel_path_str == pattern.rstrip('/'): + return True + else: + if rel_path_str.startswith(pattern + '/') or rel_path_str == pattern: + return True + + return False + + +def _match_glob_recursive(path_parts: list, pattern_parts: list) -> bool: + """Recursively match path parts against pattern parts with ** support. + + Mirrors ContextOptimizer._match_glob_recursive. + + Args: + path_parts: List of path components. + pattern_parts: List of pattern components. + + Returns: + True if path matches pattern, False otherwise. + """ + if not pattern_parts: + return not path_parts + + if not path_parts: + return all(p == '**' or p == '' for p in pattern_parts) + + pattern_part = pattern_parts[0] + + if pattern_part == '**': + # ** matches zero or more directories + if _match_glob_recursive(path_parts, pattern_parts[1:]): + return True + if _match_glob_recursive(path_parts[1:], pattern_parts): + return True + return False + else: + if fnmatch.fnmatch(path_parts[0], pattern_part): + return _match_glob_recursive(path_parts[1:], pattern_parts[1:]) + return False + + def scan_dependency_primitives(base_dir: str, collection: PrimitiveCollection) -> None: """Scan all dependencies in apm_modules/ with priority handling. diff --git a/tests/unit/compilation/test_agents_compiler_coverage.py b/tests/unit/compilation/test_agents_compiler_coverage.py index 9274fe6e..8c5832b9 100644 --- a/tests/unit/compilation/test_agents_compiler_coverage.py +++ b/tests/unit/compilation/test_agents_compiler_coverage.py @@ -205,7 +205,7 @@ def test_compile_local_only_calls_basic_discover(self): ) as mock_disc: result = compiler.compile(config) # no primitives passed → discovers - mock_disc.assert_called_once_with(str(compiler.base_dir)) + mock_disc.assert_called_once_with(str(compiler.base_dir), exclude_patterns=config.exclude) # --------------------------------------------------------------------------- diff --git a/tests/unit/primitives/test_exclude_patterns.py b/tests/unit/primitives/test_exclude_patterns.py new file mode 100644 index 00000000..019b8914 --- /dev/null +++ b/tests/unit/primitives/test_exclude_patterns.py @@ -0,0 +1,126 @@ +"""Tests for exclude pattern filtering during primitive discovery. + +Mirrors TestDirectoryExclusion in test_context_optimizer.py to ensure +discovery and optimization exclude behavior stays consistent. +""" + +import tempfile +from pathlib import Path + +import pytest + +from apm_cli.primitives.discovery import ( + _match_glob_recursive, + _matches_exclude_patterns, + _matches_pattern, +) + + +class TestMatchesPattern: + """Test _matches_pattern against all pattern styles from ContextOptimizer.""" + + def test_simple_directory_name(self): + assert _matches_pattern("tmp/foo/bar.md", "tmp") + assert _matches_pattern("tmp", "tmp") + assert not _matches_pattern("src/foo.md", "tmp") + + def test_trailing_slash(self): + assert _matches_pattern("tmp/foo/bar.md", "tmp/") + assert _matches_pattern("tmp", "tmp/") + assert not _matches_pattern("src/foo.md", "tmp/") + + def test_glob_star_star_suffix(self): + assert _matches_pattern("docs/labs/foo.md", "docs/**") + assert _matches_pattern("docs/a/b/c.md", "docs/**") + assert not _matches_pattern("src/docs/foo.md", "docs/**") + + def test_glob_star_star_prefix(self): + assert _matches_pattern("test-fixtures/foo.md", "**/test-fixtures") + assert _matches_pattern("src/test-fixtures/foo.md", "**/test-fixtures") + assert not _matches_pattern("src/foo.md", "**/test-fixtures") + + def test_glob_star_star_both(self): + assert _matches_pattern("node_modules/pkg/index.js", "**/node_modules/**") + assert _matches_pattern("src/node_modules/pkg.js", "**/node_modules/**") + assert not _matches_pattern("src/foo.js", "**/node_modules/**") + + def test_nested_directory(self): + assert _matches_pattern("projects/packages/apm/src/foo.py", "projects/packages/apm") + assert _matches_pattern("projects/packages/apm", "projects/packages/apm") + assert not _matches_pattern("projects/packages/other/foo.py", "projects/packages/apm") + + def test_fnmatch_wildcards(self): + assert _matches_pattern("tmp123/foo.md", "tmp*") + assert _matches_pattern("mycache/foo.md", "*cache*") + assert not _matches_pattern("src/foo.md", "tmp*") + + def test_coverage_star_star_suffix(self): + assert _matches_pattern("coverage/report/index.html", "coverage/**") + assert _matches_pattern("coverage", "coverage/**") + assert not _matches_pattern("src/coverage.py", "coverage/**") + + +class TestMatchGlobRecursive: + """Test _match_glob_recursive edge cases.""" + + def test_empty_pattern_empty_path(self): + assert _match_glob_recursive([], []) + + def test_star_star_matches_zero_dirs(self): + assert _match_glob_recursive(["foo.md"], ["**", "foo.md"]) + + def test_star_star_matches_multiple_dirs(self): + assert _match_glob_recursive( + ["a", "b", "c", "foo.md"], ["**", "foo.md"] + ) + + def test_trailing_star_star(self): + assert _match_glob_recursive(["docs", "a", "b"], ["docs", "**"]) + + def test_no_match(self): + assert not _match_glob_recursive(["src", "foo.py"], ["docs", "**"]) + + +class TestMatchesExcludePatterns: + """Test _matches_exclude_patterns integration with file paths.""" + + def test_no_patterns(self): + assert not _matches_exclude_patterns(Path("/p/foo.md"), Path("/p"), None) + assert not _matches_exclude_patterns(Path("/p/foo.md"), Path("/p"), []) + + def test_path_outside_base_dir(self): + assert not _matches_exclude_patterns( + Path("/other/docs/foo.md"), Path("/project"), ["docs/**"] + ) + + def test_filters_with_real_files(self): + with tempfile.TemporaryDirectory() as tmpdir: + base = Path(tmpdir) + (base / "docs" / "labs").mkdir(parents=True) + (base / "src").mkdir() + doc_file = base / "docs" / "labs" / "example.instructions.md" + src_file = base / "src" / "real.instructions.md" + doc_file.touch() + src_file.touch() + + patterns = ["docs/**"] + assert _matches_exclude_patterns(doc_file, base, patterns) + assert not _matches_exclude_patterns(src_file, base, patterns) + + def test_multiple_patterns(self): + with tempfile.TemporaryDirectory() as tmpdir: + base = Path(tmpdir) + (base / "docs").mkdir() + (base / "tmp").mkdir() + (base / "src").mkdir() + doc = base / "docs" / "foo.md" + tmp = base / "tmp" / "bar.md" + src = base / "src" / "real.md" + doc.touch() + tmp.touch() + src.touch() + + patterns = ["docs/**", "tmp"] + assert _matches_exclude_patterns(doc, base, patterns) + assert _matches_exclude_patterns(tmp, base, patterns) + assert not _matches_exclude_patterns(src, base, patterns)