Skip to content

Commit 0e1ab39

Browse files
committed
fix: stabilize mypy type checking and optional dependency handling
- add conditional tomli/tomllib import - implement TYPE_CHECKING-safe optional dependency patterns - remove unused type ignores and replace with typed coercion/validation - add missing variable annotations - correct return type handling for untyped third-party values - configure mypy per-module overrides for external libraries
1 parent 8ad1074 commit 0e1ab39

9 files changed

Lines changed: 314 additions & 100 deletions

File tree

pyproject.toml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,21 @@ ignore = ["E501"]
8383

8484
[tool.mypy]
8585
python_version = "3.10"
86+
files = ["src"]
87+
warn_unused_ignores = true
8688
strict = true
87-
files = ["src/"]
89+
90+
[[tool.mypy.overrides]]
91+
module = [
92+
"tomllib",
93+
"openai",
94+
"anthropic",
95+
"lancedb",
96+
"sentence_transformers",
97+
"pygit2",
98+
"tomli",
99+
]
100+
ignore_missing_imports = true
88101

89102
[tool.pytest.ini_options]
90103
testpaths = ["tests"]

src/codectx/cache.py

Lines changed: 117 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import logging
88
from dataclasses import asdict
99
from pathlib import Path
10+
from typing import Any
1011

1112
from codectx.config.defaults import CACHE_DIR_NAME
1213
from codectx.parser.base import ParseResult, Symbol
@@ -53,18 +54,72 @@ def get_parse_result(self, path: Path, file_hash: str) -> ParseResult | None:
5354
return None
5455

5556
try:
56-
symbols = tuple(
57-
Symbol(**s)
58-
for s in entry.get("symbols", []) # type: ignore[arg-type]
59-
)
57+
raw_symbols = entry.get("symbols", [])
58+
if not isinstance(raw_symbols, list):
59+
return None
60+
61+
symbols_list: list[Symbol] = []
62+
for item in raw_symbols:
63+
if not isinstance(item, dict):
64+
continue
65+
name = item.get("name")
66+
kind = item.get("kind")
67+
signature = item.get("signature")
68+
docstring = item.get("docstring")
69+
start_line = item.get("start_line")
70+
end_line = item.get("end_line")
71+
children = item.get("children", ())
72+
if not isinstance(children, (list, tuple)):
73+
children = ()
74+
if not (
75+
isinstance(name, str)
76+
and isinstance(kind, str)
77+
and isinstance(signature, str)
78+
and isinstance(docstring, str)
79+
and isinstance(start_line, int)
80+
and isinstance(end_line, int)
81+
):
82+
continue
83+
symbols_list.append(
84+
Symbol(
85+
name=name,
86+
kind=kind,
87+
signature=signature,
88+
docstring=docstring,
89+
start_line=start_line,
90+
end_line=end_line,
91+
children=_decode_children(children),
92+
)
93+
)
94+
95+
path_value = entry.get("path")
96+
language_value = entry.get("language")
97+
imports_value = entry.get("imports", [])
98+
docstrings_value = entry.get("docstrings", [])
99+
raw_source_value = entry.get("raw_source", "")
100+
line_count_value = entry.get("line_count", 0)
101+
102+
if not isinstance(path_value, str) or not isinstance(language_value, str):
103+
return None
104+
if not isinstance(imports_value, list) or not isinstance(docstrings_value, list):
105+
return None
106+
if not isinstance(raw_source_value, str):
107+
raw_source_value = str(raw_source_value)
108+
109+
imports = tuple(str(v) for v in imports_value)
110+
docstrings = tuple(str(v) for v in docstrings_value)
111+
line_count = _coerce_int(line_count_value)
112+
if line_count is None:
113+
return None
114+
60115
return ParseResult(
61-
path=Path(entry["path"]), # type: ignore[arg-type]
62-
language=str(entry["language"]),
63-
imports=tuple(entry.get("imports", [])), # type: ignore[arg-type]
64-
symbols=symbols,
65-
docstrings=tuple(entry.get("docstrings", [])), # type: ignore[arg-type]
66-
raw_source=str(entry.get("raw_source", "")),
67-
line_count=int(entry.get("line_count", 0)), # type: ignore[arg-type]
116+
path=Path(path_value),
117+
language=language_value,
118+
imports=imports,
119+
symbols=tuple(symbols_list),
120+
docstrings=docstrings,
121+
raw_source=raw_source_value,
122+
line_count=line_count,
68123
partial_parse=bool(entry.get("partial_parse", False)),
69124
)
70125
except (KeyError, TypeError, ValueError) as exc:
@@ -91,7 +146,8 @@ def get_token_count(self, path: Path, file_hash: str) -> int | None:
91146
entry = self._data.get(key)
92147
if entry is None or entry.get("file_hash") != file_hash:
93148
return None
94-
return int(entry.get("count", 0)) # type: ignore[arg-type]
149+
count_value = entry.get("count", 0)
150+
return _coerce_int(count_value)
95151

96152
def put_token_count(self, path: Path, file_hash: str, count: int) -> None:
97153
"""Cache a token count."""
@@ -150,3 +206,52 @@ def file_hash(path: Path) -> str:
150206
return hashlib.md5(content).hexdigest() # noqa: S324
151207
except OSError:
152208
return ""
209+
210+
211+
def _decode_children(children: list[Any] | tuple[Any, ...]) -> tuple[Symbol, ...]:
212+
decoded: list[Symbol] = []
213+
for child in children:
214+
if not isinstance(child, dict):
215+
continue
216+
name = child.get("name")
217+
kind = child.get("kind")
218+
signature = child.get("signature")
219+
docstring = child.get("docstring")
220+
start_line = child.get("start_line")
221+
end_line = child.get("end_line")
222+
if not (
223+
isinstance(name, str)
224+
and isinstance(kind, str)
225+
and isinstance(signature, str)
226+
and isinstance(docstring, str)
227+
and isinstance(start_line, int)
228+
and isinstance(end_line, int)
229+
):
230+
continue
231+
decoded.append(
232+
Symbol(
233+
name=name,
234+
kind=kind,
235+
signature=signature,
236+
docstring=docstring,
237+
start_line=start_line,
238+
end_line=end_line,
239+
children=(),
240+
)
241+
)
242+
return tuple(decoded)
243+
244+
245+
def _coerce_int(value: object) -> int | None:
246+
if isinstance(value, bool):
247+
return int(value)
248+
if isinstance(value, int):
249+
return value
250+
if isinstance(value, float):
251+
return int(value)
252+
if isinstance(value, str):
253+
try:
254+
return int(value)
255+
except ValueError:
256+
return None
257+
return None

src/codectx/cli.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ def search(
311311

312312
from codectx.cache import Cache
313313
from codectx.config.loader import load_config
314+
from codectx.parser.base import ParseResult
314315
from codectx.parser.treesitter import parse_files
315316
from codectx.walker import walk
316317

@@ -319,7 +320,7 @@ def search(
319320

320321
# Parse files with cache
321322
cache = Cache(config.root)
322-
parse_results = {}
323+
parse_results: dict[Path, ParseResult] = {}
323324
uncached_files: list[Path] = []
324325
for f in files:
325326
try:
@@ -480,6 +481,7 @@ def _run_pipeline(config: object) -> PipelineMetrics:
480481
from codectx.config.loader import Config
481482
from codectx.graph.builder import build_dependency_graph
482483
from codectx.output.formatter import format_context, write_context_file
484+
from codectx.parser.base import ParseResult
483485
from codectx.parser.treesitter import parse_files
484486
from codectx.ranker.git_meta import collect_git_metadata, collect_recent_changes
485487
from codectx.ranker.scorer import score_files
@@ -523,7 +525,7 @@ def _run_pipeline(config: object) -> PipelineMetrics:
523525
progress.update(task, description="Parsing files...")
524526
cache = Cache(config.root)
525527

526-
parse_results = {}
528+
parse_results: dict[Path, ParseResult] = {}
527529
uncached_files: list[Path] = []
528530

529531
for f in files:
@@ -632,7 +634,7 @@ def _run_pipeline(config: object) -> PipelineMetrics:
632634

633635
from codectx.compressor.budget import count_tokens
634636

635-
original_tokens = sum(count_tokens(pr.raw_source) for pr in parse_results.values() if pr)
637+
original_tokens = sum(count_tokens(pr.raw_source) for pr in parse_results.values())
636638

637639
return PipelineMetrics(
638640
output_path=output_path,

src/codectx/compressor/summarizer.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,14 @@
99
import logging
1010
from concurrent.futures import ThreadPoolExecutor
1111
from pathlib import Path
12+
from typing import TYPE_CHECKING, Any
1213

1314
from codectx.parser.base import ParseResult
1415

16+
if TYPE_CHECKING:
17+
import anthropic as anthropic_module
18+
import openai as openai_module
19+
1520
logger = logging.getLogger(__name__)
1621

1722
_PROMPT_TEMPLATE = (
@@ -25,17 +30,21 @@
2530
# Track availability of LLM providers
2631
_HAS_OPENAI = False
2732
_HAS_ANTHROPIC = False
33+
openai: Any | None = None
34+
anthropic: Any | None = None
2835

2936
try:
30-
import openai # noqa: F401
37+
import openai as openai_module
3138

39+
openai = openai_module
3240
_HAS_OPENAI = True
3341
except ImportError:
3442
pass
3543

3644
try:
37-
import anthropic # noqa: F401
45+
import anthropic as anthropic_module
3846

47+
anthropic = anthropic_module
3948
_HAS_ANTHROPIC = True
4049
except ImportError:
4150
pass
@@ -113,32 +122,31 @@ def _do_one(pr: ParseResult) -> tuple[Path, str]:
113122

114123
def _summarize_openai(prompt: str, model: str) -> str:
115124
"""Call OpenAI API for summarization."""
116-
if not _HAS_OPENAI:
125+
if not _HAS_OPENAI or openai is None:
117126
raise ImportError("openai is not installed. Install with: pip install codectx[llm]")
118127

119-
import openai
120-
121128
client = openai.OpenAI()
122129
response = client.chat.completions.create(
123130
model=model,
124131
messages=[{"role": "user", "content": prompt}],
125132
max_tokens=100,
126133
temperature=0.0,
127134
)
128-
return (response.choices[0].message.content or "").strip()
135+
content = response.choices[0].message.content
136+
return str(content or "").strip()
129137

130138

131139
def _summarize_anthropic(prompt: str, model: str) -> str:
132140
"""Call Anthropic API for summarization."""
133-
if not _HAS_ANTHROPIC:
141+
if not _HAS_ANTHROPIC or anthropic is None:
134142
raise ImportError("anthropic is not installed. Install with: pip install codectx[llm]")
135143

136-
import anthropic
137-
138144
client = anthropic.Anthropic()
139145
response = client.messages.create(
140146
model=model,
141147
max_tokens=100,
142148
messages=[{"role": "user", "content": prompt}],
143149
)
144-
return response.content[0].text.strip()
150+
block = response.content[0] if response.content else None
151+
text = getattr(block, "text", "") if block is not None else ""
152+
return str(text).strip()

0 commit comments

Comments
 (0)