Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/apm_cli/compilation/agents_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,11 +187,11 @@ def compile(self, config: CompilationConfig, primitives: Optional[PrimitiveColle
if primitives is None:
if config.local_only:
# Use basic discovery for local-only mode
primitives = discover_primitives(str(self.base_dir))
primitives = discover_primitives(str(self.base_dir), exclude_patterns=config.exclude)
else:
# Use enhanced discovery with dependencies (Task 4 integration)
from ..primitives.discovery import discover_primitives_with_dependencies
primitives = discover_primitives_with_dependencies(str(self.base_dir))
primitives = discover_primitives_with_dependencies(str(self.base_dir), exclude_patterns=config.exclude)

# Route to targets based on config.target
results: List[CompilationResult] = []
Expand Down
156 changes: 132 additions & 24 deletions src/apm_cli/primitives/discovery.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
"""Discovery functionality for primitive files."""

import fnmatch
import os
import glob
from pathlib import Path
from typing import List, Dict
from typing import List, Dict, Optional

from .models import PrimitiveCollection
from .parser import parse_primitive_file, parse_skill_file
Expand Down Expand Up @@ -52,55 +53,60 @@
}


def discover_primitives(base_dir: str = ".") -> PrimitiveCollection:
def discover_primitives(base_dir: str = ".", exclude_patterns: Optional[List[str]] = None) -> PrimitiveCollection:
"""Find all APM primitive files in the project.

Searches for .chatmode.md, .instructions.md, .context.md, .memory.md files
in both .apm/ and .github/ directory structures, plus SKILL.md at root.

Args:
base_dir (str): Base directory to search in. Defaults to current directory.

exclude_patterns: Glob patterns for paths to exclude from discovery.

Returns:
PrimitiveCollection: Collection of discovered and parsed primitives.
"""
collection = PrimitiveCollection()

base_path = Path(base_dir)

# Find and parse files for each primitive type
for primitive_type, patterns in LOCAL_PRIMITIVE_PATTERNS.items():
files = find_primitive_files(base_dir, patterns)

for file_path in files:
if _matches_exclude_patterns(file_path, base_path, exclude_patterns):
continue
try:
primitive = parse_primitive_file(file_path, source="local")
collection.add_primitive(primitive)
except Exception as e:
print(f"Warning: Failed to parse {file_path}: {e}")

# Discover SKILL.md at project root
_discover_local_skill(base_dir, collection)

return collection


def discover_primitives_with_dependencies(base_dir: str = ".") -> PrimitiveCollection:
def discover_primitives_with_dependencies(base_dir: str = ".", exclude_patterns: Optional[List[str]] = None) -> PrimitiveCollection:
"""Enhanced primitive discovery including dependency sources.

Priority Order:
1. Local .apm/ (highest priority - always wins)
2. Dependencies in declaration order (first declared wins)
3. Plugins (lowest priority)

Args:
base_dir (str): Base directory to search in. Defaults to current directory.

exclude_patterns: Glob patterns for paths to exclude from discovery.

Returns:
PrimitiveCollection: Collection of discovered and parsed primitives with source tracking.
"""
collection = PrimitiveCollection()

# Phase 1: Local primitives (highest priority)
scan_local_primitives(base_dir, collection)
scan_local_primitives(base_dir, collection, exclude_patterns=exclude_patterns)

# Phase 1b: Local SKILL.md
_discover_local_skill(base_dir, collection)
Expand All @@ -113,27 +119,30 @@ def discover_primitives_with_dependencies(base_dir: str = ".") -> PrimitiveColle
return collection


def scan_local_primitives(base_dir: str, collection: PrimitiveCollection) -> None:
def scan_local_primitives(base_dir: str, collection: PrimitiveCollection, exclude_patterns: Optional[List[str]] = None) -> None:
"""Scan local .apm/ directory for primitives.

Args:
base_dir (str): Base directory to search in.
collection (PrimitiveCollection): Collection to add primitives to.
exclude_patterns: Glob patterns for paths to exclude from discovery.
"""
# Find and parse files for each primitive type
for primitive_type, patterns in LOCAL_PRIMITIVE_PATTERNS.items():
files = find_primitive_files(base_dir, patterns)

# Filter out files from apm_modules to avoid conflicts with dependency scanning
local_files = []
base_path = Path(base_dir)
apm_modules_path = base_path / "apm_modules"

for file_path in files:
# Only include files that are NOT in apm_modules directory
if not _is_under_directory(file_path, apm_modules_path):
local_files.append(file_path)

if _is_under_directory(file_path, apm_modules_path):
continue
if _matches_exclude_patterns(file_path, base_path, exclude_patterns):
continue
local_files.append(file_path)

for file_path in local_files:
try:
primitive = parse_primitive_file(file_path, source="local")
Expand All @@ -144,11 +153,11 @@ def scan_local_primitives(base_dir: str, collection: PrimitiveCollection) -> Non

def _is_under_directory(file_path: Path, directory: Path) -> bool:
"""Check if a file path is under a specific directory.

Args:
file_path (Path): Path to check.
directory (Path): Directory to check against.

Returns:
bool: True if file_path is under directory, False otherwise.
"""
Expand All @@ -159,6 +168,105 @@ def _is_under_directory(file_path: Path, directory: Path) -> bool:
return False


def _matches_exclude_patterns(file_path: Path, base_dir: Path, exclude_patterns: Optional[List[str]] = None) -> bool:
"""Check if a file path matches any exclusion pattern.

Args:
file_path: Absolute path to check.
base_dir: Base directory for computing relative paths.
exclude_patterns: Glob patterns to match against.

Returns:
True if file should be excluded, False otherwise.
"""
if not exclude_patterns:
return False

try:
rel_path_str = str(file_path.resolve().relative_to(base_dir.resolve())).replace(os.sep, '/')
except ValueError:
return False

for pattern in exclude_patterns:
normalized = pattern.replace('\\', '/').replace(os.sep, '/')
if _matches_pattern(rel_path_str, normalized):
return True

return False


def _matches_pattern(rel_path_str: str, pattern: str) -> bool:
"""Check if a relative path matches an exclusion pattern.

Mirrors ContextOptimizer._matches_pattern semantics so that
discovery and optimization exclude behavior stays consistent.

Args:
rel_path_str: Forward-slash-normalized path relative to base_dir.
pattern: Forward-slash-normalized exclusion pattern (glob syntax).

Returns:
True if path matches pattern, False otherwise.
"""
# Handle ** patterns with recursive matching
if '**' in pattern:
path_parts = rel_path_str.split('/')
pattern_parts = pattern.split('/')
# Check the full path and each parent directory, since the optimizer
# matches directories (which get pruned) while we match file paths
for i in range(len(path_parts), 0, -1):
if _match_glob_recursive(path_parts[:i], pattern_parts):
return True
return False

# Simple fnmatch for patterns without **
if fnmatch.fnmatch(rel_path_str, pattern):
return True

# Directory-prefix matching (e.g. "tmp/" or "tmp" matching "tmp/foo/bar")
if pattern.endswith('/'):
if rel_path_str.startswith(pattern) or rel_path_str == pattern.rstrip('/'):
return True
else:
if rel_path_str.startswith(pattern + '/') or rel_path_str == pattern:
return True

return False


def _match_glob_recursive(path_parts: list, pattern_parts: list) -> bool:
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dupped, I know, but extracting it and refactor the code felt out of the scope of this bug.

"""Recursively match path parts against pattern parts with ** support.

Mirrors ContextOptimizer._match_glob_recursive.

Args:
path_parts: List of path components.
pattern_parts: List of pattern components.

Returns:
True if path matches pattern, False otherwise.
"""
if not pattern_parts:
return not path_parts

if not path_parts:
return all(p == '**' or p == '' for p in pattern_parts)

pattern_part = pattern_parts[0]

if pattern_part == '**':
# ** matches zero or more directories
if _match_glob_recursive(path_parts, pattern_parts[1:]):
return True
if _match_glob_recursive(path_parts[1:], pattern_parts):
return True
return False
else:
if fnmatch.fnmatch(path_parts[0], pattern_part):
return _match_glob_recursive(path_parts[1:], pattern_parts[1:])
return False


def scan_dependency_primitives(base_dir: str, collection: PrimitiveCollection) -> None:
"""Scan all dependencies in apm_modules/ with priority handling.

Expand Down
2 changes: 1 addition & 1 deletion tests/unit/compilation/test_agents_compiler_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def test_compile_local_only_calls_basic_discover(self):
) as mock_disc:
result = compiler.compile(config) # no primitives passed → discovers

mock_disc.assert_called_once_with(str(compiler.base_dir))
mock_disc.assert_called_once_with(str(compiler.base_dir), exclude_patterns=config.exclude)


# ---------------------------------------------------------------------------
Expand Down
126 changes: 126 additions & 0 deletions tests/unit/primitives/test_exclude_patterns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
"""Tests for exclude pattern filtering during primitive discovery.

Mirrors TestDirectoryExclusion in test_context_optimizer.py to ensure
discovery and optimization exclude behavior stays consistent.
"""

import tempfile
from pathlib import Path

import pytest

from apm_cli.primitives.discovery import (
_match_glob_recursive,
_matches_exclude_patterns,
_matches_pattern,
)


class TestMatchesPattern:
"""Test _matches_pattern against all pattern styles from ContextOptimizer."""

def test_simple_directory_name(self):
assert _matches_pattern("tmp/foo/bar.md", "tmp")
assert _matches_pattern("tmp", "tmp")
assert not _matches_pattern("src/foo.md", "tmp")

def test_trailing_slash(self):
assert _matches_pattern("tmp/foo/bar.md", "tmp/")
assert _matches_pattern("tmp", "tmp/")
assert not _matches_pattern("src/foo.md", "tmp/")

def test_glob_star_star_suffix(self):
assert _matches_pattern("docs/labs/foo.md", "docs/**")
assert _matches_pattern("docs/a/b/c.md", "docs/**")
assert not _matches_pattern("src/docs/foo.md", "docs/**")

def test_glob_star_star_prefix(self):
assert _matches_pattern("test-fixtures/foo.md", "**/test-fixtures")
assert _matches_pattern("src/test-fixtures/foo.md", "**/test-fixtures")
assert not _matches_pattern("src/foo.md", "**/test-fixtures")

def test_glob_star_star_both(self):
assert _matches_pattern("node_modules/pkg/index.js", "**/node_modules/**")
assert _matches_pattern("src/node_modules/pkg.js", "**/node_modules/**")
assert not _matches_pattern("src/foo.js", "**/node_modules/**")

def test_nested_directory(self):
assert _matches_pattern("projects/packages/apm/src/foo.py", "projects/packages/apm")
assert _matches_pattern("projects/packages/apm", "projects/packages/apm")
assert not _matches_pattern("projects/packages/other/foo.py", "projects/packages/apm")

def test_fnmatch_wildcards(self):
assert _matches_pattern("tmp123/foo.md", "tmp*")
assert _matches_pattern("mycache/foo.md", "*cache*")
assert not _matches_pattern("src/foo.md", "tmp*")

def test_coverage_star_star_suffix(self):
assert _matches_pattern("coverage/report/index.html", "coverage/**")
assert _matches_pattern("coverage", "coverage/**")
assert not _matches_pattern("src/coverage.py", "coverage/**")


class TestMatchGlobRecursive:
"""Test _match_glob_recursive edge cases."""

def test_empty_pattern_empty_path(self):
assert _match_glob_recursive([], [])

def test_star_star_matches_zero_dirs(self):
assert _match_glob_recursive(["foo.md"], ["**", "foo.md"])

def test_star_star_matches_multiple_dirs(self):
assert _match_glob_recursive(
["a", "b", "c", "foo.md"], ["**", "foo.md"]
)

def test_trailing_star_star(self):
assert _match_glob_recursive(["docs", "a", "b"], ["docs", "**"])

def test_no_match(self):
assert not _match_glob_recursive(["src", "foo.py"], ["docs", "**"])


class TestMatchesExcludePatterns:
"""Test _matches_exclude_patterns integration with file paths."""

def test_no_patterns(self):
assert not _matches_exclude_patterns(Path("/p/foo.md"), Path("/p"), None)
assert not _matches_exclude_patterns(Path("/p/foo.md"), Path("/p"), [])

def test_path_outside_base_dir(self):
assert not _matches_exclude_patterns(
Path("/other/docs/foo.md"), Path("/project"), ["docs/**"]
)

def test_filters_with_real_files(self):
with tempfile.TemporaryDirectory() as tmpdir:
base = Path(tmpdir)
(base / "docs" / "labs").mkdir(parents=True)
(base / "src").mkdir()
doc_file = base / "docs" / "labs" / "example.instructions.md"
src_file = base / "src" / "real.instructions.md"
doc_file.touch()
src_file.touch()

patterns = ["docs/**"]
assert _matches_exclude_patterns(doc_file, base, patterns)
assert not _matches_exclude_patterns(src_file, base, patterns)

def test_multiple_patterns(self):
with tempfile.TemporaryDirectory() as tmpdir:
base = Path(tmpdir)
(base / "docs").mkdir()
(base / "tmp").mkdir()
(base / "src").mkdir()
doc = base / "docs" / "foo.md"
tmp = base / "tmp" / "bar.md"
src = base / "src" / "real.md"
doc.touch()
tmp.touch()
src.touch()

patterns = ["docs/**", "tmp"]
assert _matches_exclude_patterns(doc, base, patterns)
assert _matches_exclude_patterns(tmp, base, patterns)
assert not _matches_exclude_patterns(src, base, patterns)