diff --git a/src/apm_cli/compilation/agents_compiler.py b/src/apm_cli/compilation/agents_compiler.py index a1619648..587cfb4c 100644 --- a/src/apm_cli/compilation/agents_compiler.py +++ b/src/apm_cli/compilation/agents_compiler.py @@ -187,11 +187,11 @@ def compile(self, config: CompilationConfig, primitives: Optional[PrimitiveColle if primitives is None: if config.local_only: # Use basic discovery for local-only mode - primitives = discover_primitives(str(self.base_dir)) + primitives = discover_primitives(str(self.base_dir), exclude_patterns=config.exclude) else: # Use enhanced discovery with dependencies (Task 4 integration) from ..primitives.discovery import discover_primitives_with_dependencies - primitives = discover_primitives_with_dependencies(str(self.base_dir)) + primitives = discover_primitives_with_dependencies(str(self.base_dir), exclude_patterns=config.exclude) # Route to targets based on config.target results: List[CompilationResult] = [] diff --git a/src/apm_cli/primitives/discovery.py b/src/apm_cli/primitives/discovery.py index bc9d97be..eabbd636 100644 --- a/src/apm_cli/primitives/discovery.py +++ b/src/apm_cli/primitives/discovery.py @@ -1,9 +1,10 @@ """Discovery functionality for primitive files.""" +import fnmatch import os import glob from pathlib import Path -from typing import List, Dict +from typing import List, Dict, Optional from .models import PrimitiveCollection from .parser import parse_primitive_file, parse_skill_file @@ -52,55 +53,60 @@ } -def discover_primitives(base_dir: str = ".") -> PrimitiveCollection: +def discover_primitives(base_dir: str = ".", exclude_patterns: Optional[List[str]] = None) -> PrimitiveCollection: """Find all APM primitive files in the project. - + Searches for .chatmode.md, .instructions.md, .context.md, .memory.md files in both .apm/ and .github/ directory structures, plus SKILL.md at root. - + Args: base_dir (str): Base directory to search in. Defaults to current directory. - + exclude_patterns: Glob patterns for paths to exclude from discovery. + Returns: PrimitiveCollection: Collection of discovered and parsed primitives. """ collection = PrimitiveCollection() - + base_path = Path(base_dir) + # Find and parse files for each primitive type for primitive_type, patterns in LOCAL_PRIMITIVE_PATTERNS.items(): files = find_primitive_files(base_dir, patterns) - + for file_path in files: + if _matches_exclude_patterns(file_path, base_path, exclude_patterns): + continue try: primitive = parse_primitive_file(file_path, source="local") collection.add_primitive(primitive) except Exception as e: print(f"Warning: Failed to parse {file_path}: {e}") - + # Discover SKILL.md at project root _discover_local_skill(base_dir, collection) - + return collection -def discover_primitives_with_dependencies(base_dir: str = ".") -> PrimitiveCollection: +def discover_primitives_with_dependencies(base_dir: str = ".", exclude_patterns: Optional[List[str]] = None) -> PrimitiveCollection: """Enhanced primitive discovery including dependency sources. - + Priority Order: 1. Local .apm/ (highest priority - always wins) 2. Dependencies in declaration order (first declared wins) 3. Plugins (lowest priority) - + Args: base_dir (str): Base directory to search in. Defaults to current directory. - + exclude_patterns: Glob patterns for paths to exclude from discovery. + Returns: PrimitiveCollection: Collection of discovered and parsed primitives with source tracking. """ collection = PrimitiveCollection() # Phase 1: Local primitives (highest priority) - scan_local_primitives(base_dir, collection) + scan_local_primitives(base_dir, collection, exclude_patterns=exclude_patterns) # Phase 1b: Local SKILL.md _discover_local_skill(base_dir, collection) @@ -113,27 +119,30 @@ def discover_primitives_with_dependencies(base_dir: str = ".") -> PrimitiveColle return collection -def scan_local_primitives(base_dir: str, collection: PrimitiveCollection) -> None: +def scan_local_primitives(base_dir: str, collection: PrimitiveCollection, exclude_patterns: Optional[List[str]] = None) -> None: """Scan local .apm/ directory for primitives. - + Args: base_dir (str): Base directory to search in. collection (PrimitiveCollection): Collection to add primitives to. + exclude_patterns: Glob patterns for paths to exclude from discovery. """ # Find and parse files for each primitive type for primitive_type, patterns in LOCAL_PRIMITIVE_PATTERNS.items(): files = find_primitive_files(base_dir, patterns) - + # Filter out files from apm_modules to avoid conflicts with dependency scanning local_files = [] base_path = Path(base_dir) apm_modules_path = base_path / "apm_modules" - + for file_path in files: - # Only include files that are NOT in apm_modules directory - if not _is_under_directory(file_path, apm_modules_path): - local_files.append(file_path) - + if _is_under_directory(file_path, apm_modules_path): + continue + if _matches_exclude_patterns(file_path, base_path, exclude_patterns): + continue + local_files.append(file_path) + for file_path in local_files: try: primitive = parse_primitive_file(file_path, source="local") @@ -144,11 +153,11 @@ def scan_local_primitives(base_dir: str, collection: PrimitiveCollection) -> Non def _is_under_directory(file_path: Path, directory: Path) -> bool: """Check if a file path is under a specific directory. - + Args: file_path (Path): Path to check. directory (Path): Directory to check against. - + Returns: bool: True if file_path is under directory, False otherwise. """ @@ -159,6 +168,105 @@ def _is_under_directory(file_path: Path, directory: Path) -> bool: return False +def _matches_exclude_patterns(file_path: Path, base_dir: Path, exclude_patterns: Optional[List[str]] = None) -> bool: + """Check if a file path matches any exclusion pattern. + + Args: + file_path: Absolute path to check. + base_dir: Base directory for computing relative paths. + exclude_patterns: Glob patterns to match against. + + Returns: + True if file should be excluded, False otherwise. + """ + if not exclude_patterns: + return False + + try: + rel_path_str = str(file_path.resolve().relative_to(base_dir.resolve())).replace(os.sep, '/') + except ValueError: + return False + + for pattern in exclude_patterns: + normalized = pattern.replace('\\', '/').replace(os.sep, '/') + if _matches_pattern(rel_path_str, normalized): + return True + + return False + + +def _matches_pattern(rel_path_str: str, pattern: str) -> bool: + """Check if a relative path matches an exclusion pattern. + + Mirrors ContextOptimizer._matches_pattern semantics so that + discovery and optimization exclude behavior stays consistent. + + Args: + rel_path_str: Forward-slash-normalized path relative to base_dir. + pattern: Forward-slash-normalized exclusion pattern (glob syntax). + + Returns: + True if path matches pattern, False otherwise. + """ + # Handle ** patterns with recursive matching + if '**' in pattern: + path_parts = rel_path_str.split('/') + pattern_parts = pattern.split('/') + # Check the full path and each parent directory, since the optimizer + # matches directories (which get pruned) while we match file paths + for i in range(len(path_parts), 0, -1): + if _match_glob_recursive(path_parts[:i], pattern_parts): + return True + return False + + # Simple fnmatch for patterns without ** + if fnmatch.fnmatch(rel_path_str, pattern): + return True + + # Directory-prefix matching (e.g. "tmp/" or "tmp" matching "tmp/foo/bar") + if pattern.endswith('/'): + if rel_path_str.startswith(pattern) or rel_path_str == pattern.rstrip('/'): + return True + else: + if rel_path_str.startswith(pattern + '/') or rel_path_str == pattern: + return True + + return False + + +def _match_glob_recursive(path_parts: list, pattern_parts: list) -> bool: + """Recursively match path parts against pattern parts with ** support. + + Mirrors ContextOptimizer._match_glob_recursive. + + Args: + path_parts: List of path components. + pattern_parts: List of pattern components. + + Returns: + True if path matches pattern, False otherwise. + """ + if not pattern_parts: + return not path_parts + + if not path_parts: + return all(p == '**' or p == '' for p in pattern_parts) + + pattern_part = pattern_parts[0] + + if pattern_part == '**': + # ** matches zero or more directories + if _match_glob_recursive(path_parts, pattern_parts[1:]): + return True + if _match_glob_recursive(path_parts[1:], pattern_parts): + return True + return False + else: + if fnmatch.fnmatch(path_parts[0], pattern_part): + return _match_glob_recursive(path_parts[1:], pattern_parts[1:]) + return False + + def scan_dependency_primitives(base_dir: str, collection: PrimitiveCollection) -> None: """Scan all dependencies in apm_modules/ with priority handling. diff --git a/tests/unit/compilation/test_agents_compiler_coverage.py b/tests/unit/compilation/test_agents_compiler_coverage.py index 9274fe6e..8c5832b9 100644 --- a/tests/unit/compilation/test_agents_compiler_coverage.py +++ b/tests/unit/compilation/test_agents_compiler_coverage.py @@ -205,7 +205,7 @@ def test_compile_local_only_calls_basic_discover(self): ) as mock_disc: result = compiler.compile(config) # no primitives passed → discovers - mock_disc.assert_called_once_with(str(compiler.base_dir)) + mock_disc.assert_called_once_with(str(compiler.base_dir), exclude_patterns=config.exclude) # --------------------------------------------------------------------------- diff --git a/tests/unit/primitives/test_exclude_patterns.py b/tests/unit/primitives/test_exclude_patterns.py new file mode 100644 index 00000000..019b8914 --- /dev/null +++ b/tests/unit/primitives/test_exclude_patterns.py @@ -0,0 +1,126 @@ +"""Tests for exclude pattern filtering during primitive discovery. + +Mirrors TestDirectoryExclusion in test_context_optimizer.py to ensure +discovery and optimization exclude behavior stays consistent. +""" + +import tempfile +from pathlib import Path + +import pytest + +from apm_cli.primitives.discovery import ( + _match_glob_recursive, + _matches_exclude_patterns, + _matches_pattern, +) + + +class TestMatchesPattern: + """Test _matches_pattern against all pattern styles from ContextOptimizer.""" + + def test_simple_directory_name(self): + assert _matches_pattern("tmp/foo/bar.md", "tmp") + assert _matches_pattern("tmp", "tmp") + assert not _matches_pattern("src/foo.md", "tmp") + + def test_trailing_slash(self): + assert _matches_pattern("tmp/foo/bar.md", "tmp/") + assert _matches_pattern("tmp", "tmp/") + assert not _matches_pattern("src/foo.md", "tmp/") + + def test_glob_star_star_suffix(self): + assert _matches_pattern("docs/labs/foo.md", "docs/**") + assert _matches_pattern("docs/a/b/c.md", "docs/**") + assert not _matches_pattern("src/docs/foo.md", "docs/**") + + def test_glob_star_star_prefix(self): + assert _matches_pattern("test-fixtures/foo.md", "**/test-fixtures") + assert _matches_pattern("src/test-fixtures/foo.md", "**/test-fixtures") + assert not _matches_pattern("src/foo.md", "**/test-fixtures") + + def test_glob_star_star_both(self): + assert _matches_pattern("node_modules/pkg/index.js", "**/node_modules/**") + assert _matches_pattern("src/node_modules/pkg.js", "**/node_modules/**") + assert not _matches_pattern("src/foo.js", "**/node_modules/**") + + def test_nested_directory(self): + assert _matches_pattern("projects/packages/apm/src/foo.py", "projects/packages/apm") + assert _matches_pattern("projects/packages/apm", "projects/packages/apm") + assert not _matches_pattern("projects/packages/other/foo.py", "projects/packages/apm") + + def test_fnmatch_wildcards(self): + assert _matches_pattern("tmp123/foo.md", "tmp*") + assert _matches_pattern("mycache/foo.md", "*cache*") + assert not _matches_pattern("src/foo.md", "tmp*") + + def test_coverage_star_star_suffix(self): + assert _matches_pattern("coverage/report/index.html", "coverage/**") + assert _matches_pattern("coverage", "coverage/**") + assert not _matches_pattern("src/coverage.py", "coverage/**") + + +class TestMatchGlobRecursive: + """Test _match_glob_recursive edge cases.""" + + def test_empty_pattern_empty_path(self): + assert _match_glob_recursive([], []) + + def test_star_star_matches_zero_dirs(self): + assert _match_glob_recursive(["foo.md"], ["**", "foo.md"]) + + def test_star_star_matches_multiple_dirs(self): + assert _match_glob_recursive( + ["a", "b", "c", "foo.md"], ["**", "foo.md"] + ) + + def test_trailing_star_star(self): + assert _match_glob_recursive(["docs", "a", "b"], ["docs", "**"]) + + def test_no_match(self): + assert not _match_glob_recursive(["src", "foo.py"], ["docs", "**"]) + + +class TestMatchesExcludePatterns: + """Test _matches_exclude_patterns integration with file paths.""" + + def test_no_patterns(self): + assert not _matches_exclude_patterns(Path("/p/foo.md"), Path("/p"), None) + assert not _matches_exclude_patterns(Path("/p/foo.md"), Path("/p"), []) + + def test_path_outside_base_dir(self): + assert not _matches_exclude_patterns( + Path("/other/docs/foo.md"), Path("/project"), ["docs/**"] + ) + + def test_filters_with_real_files(self): + with tempfile.TemporaryDirectory() as tmpdir: + base = Path(tmpdir) + (base / "docs" / "labs").mkdir(parents=True) + (base / "src").mkdir() + doc_file = base / "docs" / "labs" / "example.instructions.md" + src_file = base / "src" / "real.instructions.md" + doc_file.touch() + src_file.touch() + + patterns = ["docs/**"] + assert _matches_exclude_patterns(doc_file, base, patterns) + assert not _matches_exclude_patterns(src_file, base, patterns) + + def test_multiple_patterns(self): + with tempfile.TemporaryDirectory() as tmpdir: + base = Path(tmpdir) + (base / "docs").mkdir() + (base / "tmp").mkdir() + (base / "src").mkdir() + doc = base / "docs" / "foo.md" + tmp = base / "tmp" / "bar.md" + src = base / "src" / "real.md" + doc.touch() + tmp.touch() + src.touch() + + patterns = ["docs/**", "tmp"] + assert _matches_exclude_patterns(doc, base, patterns) + assert _matches_exclude_patterns(tmp, base, patterns) + assert not _matches_exclude_patterns(src, base, patterns)