Skip to content

Commit 36e0b23

Browse files
committed
fix: bugs
1 parent d833e19 commit 36e0b23

5 files changed

Lines changed: 205 additions & 253 deletions

File tree

pyproject.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
66

77
[project]
88
name = "flatcode"
9-
version = "0.1.2"
9+
version = "0.1.3"
1010
authors = [
1111
{ name="jaywang98", email="cryptojayw@gmail.com" },
1212
]
@@ -23,7 +23,8 @@ classifiers = [
2323
"Topic :: Utilities",
2424
]
2525
dependencies = [
26-
"tiktoken"
26+
"tiktoken",
27+
"pathspec>=0.11.0"
2728
]
2829

2930
[project.scripts]

src/flatcode/core/ignore.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ def bootstrap_mergeignore(root_dir: Path, output_filename: str) -> Path:
1818
try:
1919
patterns_to_write = []
2020
if gitignore_file.exists():
21-
# 注意:在重构中,input() 等副作用最好通过依赖注入处理,
22-
# 但为了保持简单,这里暂时保留
2321
choice = input(f"> Found .gitignore. Copy rules to .mergeignore? (Y/n): ").strip().lower()
2422
if choice != 'n':
2523
with open(gitignore_file, "r", encoding="utf-8") as f_git:
@@ -59,20 +57,38 @@ def load_ignore_rules(mergeignore_file: Path) -> List[Tuple[str, bool]]:
5957
rules.append((line.strip(), False))
6058
return rules
6159

62-
def is_path_ignored(rel_path: Path, rules: List[Tuple[str, bool]]) -> bool:
60+
def is_path_ignored(rel_path: Path, rules: List[Tuple[str, bool]], is_directory: bool = False) -> bool:
61+
"""
62+
Checks if a path should be ignored.
63+
:param is_directory: Hint to help match patterns ending in '/' against directory paths without the slash.
64+
"""
6365
rel_path_posix = rel_path.as_posix()
66+
67+
# If checking a directory "venv" against "venv/", we append a slash to force matching logic
68+
if is_directory and not rel_path_posix.endswith("/"):
69+
check_path = rel_path_posix + "/"
70+
else:
71+
check_path = rel_path_posix
72+
6473
ignored = False
6574

6675
for pattern, is_inclusion in rules:
6776
match = False
77+
78+
# 1. Directory-specific pattern (ends with /)
6879
if pattern.endswith('/'):
69-
if rel_path_posix.startswith(pattern):
80+
# If pattern is "venv/", matches "venv/" (directory) or "venv/lib/..."
81+
if check_path.startswith(pattern) or check_path == pattern:
7082
match = True
83+
84+
# 2. General pattern (glob)
7185
else:
86+
# Match full path or file name
87+
# e.g. "*.log" matches "logs/app.log" (via name)
7288
if fnmatch.fnmatch(rel_path_posix, pattern) or fnmatch.fnmatch(rel_path.name, pattern):
7389
match = True
7490

7591
if match:
7692
ignored = not is_inclusion
7793

78-
return ignored
94+
return ignored

src/flatcode/core/scanner.py

Lines changed: 73 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# src/flatcode/core/scanner.py
22
import sys
3+
import os
34
from pathlib import Path
45
from typing import Iterator, Set, List, Tuple
56

@@ -14,35 +15,78 @@ def __init__(self, root_dir: Path, ignore_rules: List[Tuple[str, bool]], extensi
1415
self.extensions = extensions
1516
self.match_all = "*" in extensions
1617

18+
def _is_binary_file(self, path: Path) -> bool:
19+
"""
20+
Reads the first 1024 bytes to check for null bytes.
21+
Returns True if likely binary, False if likely text.
22+
"""
23+
try:
24+
with path.open("rb") as f:
25+
chunk = f.read(1024)
26+
return b'\0' in chunk
27+
except Exception:
28+
# If we can't read it (permission, etc), treat as unsafe/binary
29+
return True
30+
1731
def scan(self) -> Iterator[FileContext]:
18-
"""Yields FileContext objects for valid files."""
19-
for path in self.root_dir.rglob("*"):
20-
if not path.is_file():
21-
continue
22-
23-
rel_path = path.relative_to(self.root_dir)
32+
"""
33+
Walks the directory tree, pruning ignored directories efficiently,
34+
and yields FileContext objects for valid text files.
35+
"""
36+
# 使用 os.walk 可以让我们修改 dirs 列表,从而阻止进入被忽略的目录 (Pruning)
37+
for root, dirs, files in os.walk(self.root_dir):
38+
root_path = Path(root)
2439

25-
# 1. Ignore Check
26-
if is_path_ignored(rel_path, self.ignore_rules):
27-
continue
28-
29-
# 2. Extension Check (Skip if match_all is True)
30-
if not self.match_all:
31-
if not (path.suffix in self.extensions or path.name in self.extensions):
40+
# --- 1. Prune Directories (In-place modification of dirs) ---
41+
# 这里的 dirs 是一个列表,os.walk 会根据它决定下一步进入哪里。
42+
# 我们通过倒序遍历安全地移除元素。
43+
for d in list(dirs):
44+
dir_abs_path = root_path / d
45+
try:
46+
dir_rel_path = dir_abs_path.relative_to(self.root_dir)
47+
except ValueError:
48+
continue # Should not happen in standard walk
49+
50+
# Check if directory should be ignored
51+
# We pass is_directory=True to handle "venv/" vs "venv" matching
52+
if is_path_ignored(dir_rel_path, self.ignore_rules, is_directory=True):
53+
dirs.remove(d)
54+
# Optional: Debug output
55+
# print(f" [Debug] Pruning directory: {dir_rel_path}")
56+
57+
# --- 2. Process Files ---
58+
for f in files:
59+
file_abs_path = root_path / f
60+
try:
61+
rel_path = file_abs_path.relative_to(self.root_dir)
62+
except ValueError:
3263
continue
33-
34-
# 3. Read & Tokenize
35-
try:
36-
content = path.read_text(encoding="utf-8")
37-
tokens = Tokenizer.count(content)
38-
yield FileContext(
39-
path=path,
40-
rel_path=rel_path.as_posix(),
41-
content=content,
42-
token_count=tokens
43-
)
44-
except UnicodeDecodeError:
45-
# Silently skip binary files
46-
continue
47-
except Exception as e:
48-
print(f" > [Warning] Skipping {rel_path.as_posix()} (read error: {e})", file=sys.stderr)
64+
65+
# A. Ignore Check
66+
if is_path_ignored(rel_path, self.ignore_rules, is_directory=False):
67+
continue
68+
69+
# B. Extension Check (Skip if match_all is True)
70+
if not self.match_all:
71+
if not (file_abs_path.suffix in self.extensions or file_abs_path.name in self.extensions):
72+
continue
73+
74+
# C. Binary Check & Read
75+
if self._is_binary_file(file_abs_path):
76+
# Silently skip binary files (or log in verbose mode)
77+
continue
78+
79+
try:
80+
content = file_abs_path.read_text(encoding="utf-8")
81+
tokens = Tokenizer.count(content)
82+
yield FileContext(
83+
path=file_abs_path,
84+
rel_path=rel_path.as_posix(),
85+
content=content,
86+
token_count=tokens
87+
)
88+
except UnicodeDecodeError:
89+
# Double safety: mostly caught by _is_binary_file, but just in case
90+
continue
91+
except Exception as e:
92+
print(f" > [Warning] Skipping {rel_path.as_posix()} (read error: {e})", file=sys.stderr)

tests/test_cli.py

Lines changed: 0 additions & 93 deletions
This file was deleted.

0 commit comments

Comments
 (0)