jaywang98
diff --git a/‎pyproject.toml‎
Lines changed: 3 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎src/flatcode/core/ignore.py‎
Lines changed: 21 additions & 5 deletions b/‎src/flatcode/core/ignore.py‎
Lines changed: 21 additions & 5 deletions
diff --git a/‎src/flatcode/core/scanner.py‎
Lines changed: 73 additions & 29 deletions b/‎src/flatcode/core/scanner.py‎
Lines changed: 73 additions & 29 deletions
diff --git a/‎tests/test_cli.py‎
Lines changed: 0 additions & 93 deletions b/‎tests/test_cli.py‎
Lines changed: 0 additions & 93 deletions
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "flatcode"
-version = "0.1.2"
+version = "0.1.3"
 authors = [
     { name="jaywang98", email="cryptojayw@gmail.com" },
 ]
@@ -23,7 +23,8 @@ classifiers = [
     "Topic :: Utilities",
 ]
 dependencies = [
-    "tiktoken"
+    "tiktoken",
+    "pathspec>=0.11.0"
 ]
 
 [project.scripts]
 
@@ -18,8 +18,6 @@ def bootstrap_mergeignore(root_dir: Path, output_filename: str) -> Path:
     try:
         patterns_to_write = []
         if gitignore_file.exists():
-            # 注意：在重构中，input() 等副作用最好通过依赖注入处理，
-            # 但为了保持简单，这里暂时保留
             choice = input(f"> Found .gitignore. Copy rules to .mergeignore? (Y/n): ").strip().lower()
             if choice != 'n':
                 with open(gitignore_file, "r", encoding="utf-8") as f_git:
@@ -59,20 +57,38 @@ def load_ignore_rules(mergeignore_file: Path) -> List[Tuple[str, bool]]:
                 rules.append((line.strip(), False))
     return rules
 
-def is_path_ignored(rel_path: Path, rules: List[Tuple[str, bool]]) -> bool:
+def is_path_ignored(rel_path: Path, rules: List[Tuple[str, bool]], is_directory: bool = False) -> bool:
+    """
+    Checks if a path should be ignored.
+    :param is_directory: Hint to help match patterns ending in '/' against directory paths without the slash.
+    """
     rel_path_posix = rel_path.as_posix()
+    
+    # If checking a directory "venv" against "venv/", we append a slash to force matching logic
+    if is_directory and not rel_path_posix.endswith("/"):
+        check_path = rel_path_posix + "/"
+    else:
+        check_path = rel_path_posix
+
     ignored = False
 
     for pattern, is_inclusion in rules:
         match = False
+        
+        # 1. Directory-specific pattern (ends with /)
         if pattern.endswith('/'):
-            if rel_path_posix.startswith(pattern):
+            # If pattern is "venv/", matches "venv/" (directory) or "venv/lib/..."
+            if check_path.startswith(pattern) or check_path == pattern:
                 match = True
+        
+        # 2. General pattern (glob)
         else:
+            # Match full path or file name
+            # e.g. "*.log" matches "logs/app.log" (via name)
             if fnmatch.fnmatch(rel_path_posix, pattern) or fnmatch.fnmatch(rel_path.name, pattern):
                 match = True
 
         if match:
             ignored = not is_inclusion
 
-    return ignored
+    return ignored
@@ -1,5 +1,6 @@
 # src/flatcode/core/scanner.py
 import sys
+import os
 from pathlib import Path
 from typing import Iterator, Set, List, Tuple
 
@@ -14,35 +15,78 @@ def __init__(self, root_dir: Path, ignore_rules: List[Tuple[str, bool]], extensi
         self.extensions = extensions
         self.match_all = "*" in extensions
 
+    def _is_binary_file(self, path: Path) -> bool:
+        """
+        Reads the first 1024 bytes to check for null bytes.
+        Returns True if likely binary, False if likely text.
+        """
+        try:
+            with path.open("rb") as f:
+                chunk = f.read(1024)
+                return b'\0' in chunk
+        except Exception:
+            # If we can't read it (permission, etc), treat as unsafe/binary
+            return True
+
     def scan(self) -> Iterator[FileContext]:
-        """Yields FileContext objects for valid files."""
-        for path in self.root_dir.rglob("*"):
-            if not path.is_file():
-                continue
-            
-            rel_path = path.relative_to(self.root_dir)
+        """
+        Walks the directory tree, pruning ignored directories efficiently,
+        and yields FileContext objects for valid text files.
+        """
+        # 使用 os.walk 可以让我们修改 dirs 列表，从而阻止进入被忽略的目录 (Pruning)
+        for root, dirs, files in os.walk(self.root_dir):
+            root_path = Path(root)
 
-            # 1. Ignore Check
-            if is_path_ignored(rel_path, self.ignore_rules):
-                continue
-            
-            # 2. Extension Check (Skip if match_all is True)
-            if not self.match_all:
-                if not (path.suffix in self.extensions or path.name in self.extensions):
+            # --- 1. Prune Directories (In-place modification of dirs) ---
+            # 这里的 dirs 是一个列表，os.walk 会根据它决定下一步进入哪里。
+            # 我们通过倒序遍历安全地移除元素。
+            for d in list(dirs):
+                dir_abs_path = root_path / d
+                try:
+                    dir_rel_path = dir_abs_path.relative_to(self.root_dir)
+                except ValueError:
+                    continue # Should not happen in standard walk
+
+                # Check if directory should be ignored
+                # We pass is_directory=True to handle "venv/" vs "venv" matching
+                if is_path_ignored(dir_rel_path, self.ignore_rules, is_directory=True):
+                    dirs.remove(d)
+                    # Optional: Debug output
+                    # print(f"  [Debug] Pruning directory: {dir_rel_path}")
+
+            # --- 2. Process Files ---
+            for f in files:
+                file_abs_path = root_path / f
+                try:
+                    rel_path = file_abs_path.relative_to(self.root_dir)
+                except ValueError:
                     continue
-            
-            # 3. Read & Tokenize
-            try:
-                content = path.read_text(encoding="utf-8")
-                tokens = Tokenizer.count(content)
-                yield FileContext(
-                    path=path,
-                    rel_path=rel_path.as_posix(),
-                    content=content,
-                    token_count=tokens
-                )
-            except UnicodeDecodeError:
-                # Silently skip binary files
-                continue
-            except Exception as e:
-                print(f"  > [Warning] Skipping {rel_path.as_posix()} (read error: {e})", file=sys.stderr)
+
+                # A. Ignore Check
+                if is_path_ignored(rel_path, self.ignore_rules, is_directory=False):
+                    continue
+
+                # B. Extension Check (Skip if match_all is True)
+                if not self.match_all:
+                    if not (file_abs_path.suffix in self.extensions or file_abs_path.name in self.extensions):
+                        continue
+
+                # C. Binary Check & Read
+                if self._is_binary_file(file_abs_path):
+                    # Silently skip binary files (or log in verbose mode)
+                    continue
+
+                try:
+                    content = file_abs_path.read_text(encoding="utf-8")
+                    tokens = Tokenizer.count(content)
+                    yield FileContext(
+                        path=file_abs_path,
+                        rel_path=rel_path.as_posix(),
+                        content=content,
+                        token_count=tokens
+                    )
+                except UnicodeDecodeError:
+                    # Double safety: mostly caught by _is_binary_file, but just in case
+                    continue
+                except Exception as e:
+                    print(f"  > [Warning] Skipping {rel_path.as_posix()} (read error: {e})", file=sys.stderr)
Original file line number	Diff line number	Diff line change
`@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"`
`6`	`6`
`7`	`7`	`[project]`
`8`	`8`	`name = "flatcode"`
`9`		`-version = "0.1.2"`
	`9`	`+version = "0.1.3"`
`10`	`10`	`authors = [`
`11`	`11`	`{ name="jaywang98", email="cryptojayw@gmail.com" },`
`12`	`12`	`]`
`@@ -23,7 +23,8 @@ classifiers = [`
`23`	`23`	`"Topic :: Utilities",`
`24`	`24`	`]`
`25`	`25`	`dependencies = [`
`26`		`- "tiktoken"`
	`26`	`+ "tiktoken",`
	`27`	`+ "pathspec>=0.11.0"`
`27`	`28`	`]`
`28`	`29`
`29`	`30`	`[project.scripts]`