hey-granth
diff --git a/‎pyproject.toml‎
Lines changed: 14 additions & 1 deletion b/‎pyproject.toml‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎src/codectx/cache.py‎
Lines changed: 117 additions & 12 deletions b/‎src/codectx/cache.py‎
Lines changed: 117 additions & 12 deletions
diff --git a/‎src/codectx/cli.py‎
Lines changed: 5 additions & 3 deletions b/‎src/codectx/cli.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎src/codectx/compressor/summarizer.py‎
Lines changed: 18 additions & 10 deletions b/‎src/codectx/compressor/summarizer.py‎
Lines changed: 18 additions & 10 deletions
@@ -83,8 +83,21 @@ ignore = ["E501"]
 
 [tool.mypy]
 python_version = "3.10"
+files = ["src"]
+warn_unused_ignores = true
 strict = true
-files = ["src/"]
+
+[[tool.mypy.overrides]]
+module = [
+    "tomllib",
+    "openai",
+    "anthropic",
+    "lancedb",
+    "sentence_transformers",
+    "pygit2",
+    "tomli",
+]
+ignore_missing_imports = true
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 
@@ -7,6 +7,7 @@
 import logging
 from dataclasses import asdict
 from pathlib import Path
+from typing import Any
 
 from codectx.config.defaults import CACHE_DIR_NAME
 from codectx.parser.base import ParseResult, Symbol
@@ -53,18 +54,72 @@ def get_parse_result(self, path: Path, file_hash: str) -> ParseResult | None:
             return None
 
         try:
-            symbols = tuple(
-                Symbol(**s)
-                for s in entry.get("symbols", [])  # type: ignore[arg-type]
-            )
+            raw_symbols = entry.get("symbols", [])
+            if not isinstance(raw_symbols, list):
+                return None
+
+            symbols_list: list[Symbol] = []
+            for item in raw_symbols:
+                if not isinstance(item, dict):
+                    continue
+                name = item.get("name")
+                kind = item.get("kind")
+                signature = item.get("signature")
+                docstring = item.get("docstring")
+                start_line = item.get("start_line")
+                end_line = item.get("end_line")
+                children = item.get("children", ())
+                if not isinstance(children, (list, tuple)):
+                    children = ()
+                if not (
+                    isinstance(name, str)
+                    and isinstance(kind, str)
+                    and isinstance(signature, str)
+                    and isinstance(docstring, str)
+                    and isinstance(start_line, int)
+                    and isinstance(end_line, int)
+                ):
+                    continue
+                symbols_list.append(
+                    Symbol(
+                        name=name,
+                        kind=kind,
+                        signature=signature,
+                        docstring=docstring,
+                        start_line=start_line,
+                        end_line=end_line,
+                        children=_decode_children(children),
+                    )
+                )
+
+            path_value = entry.get("path")
+            language_value = entry.get("language")
+            imports_value = entry.get("imports", [])
+            docstrings_value = entry.get("docstrings", [])
+            raw_source_value = entry.get("raw_source", "")
+            line_count_value = entry.get("line_count", 0)
+
+            if not isinstance(path_value, str) or not isinstance(language_value, str):
+                return None
+            if not isinstance(imports_value, list) or not isinstance(docstrings_value, list):
+                return None
+            if not isinstance(raw_source_value, str):
+                raw_source_value = str(raw_source_value)
+
+            imports = tuple(str(v) for v in imports_value)
+            docstrings = tuple(str(v) for v in docstrings_value)
+            line_count = _coerce_int(line_count_value)
+            if line_count is None:
+                return None
+
             return ParseResult(
-                path=Path(entry["path"]),  # type: ignore[arg-type]
-                language=str(entry["language"]),
-                imports=tuple(entry.get("imports", [])),  # type: ignore[arg-type]
-                symbols=symbols,
-                docstrings=tuple(entry.get("docstrings", [])),  # type: ignore[arg-type]
-                raw_source=str(entry.get("raw_source", "")),
-                line_count=int(entry.get("line_count", 0)),  # type: ignore[arg-type]
+                path=Path(path_value),
+                language=language_value,
+                imports=imports,
+                symbols=tuple(symbols_list),
+                docstrings=docstrings,
+                raw_source=raw_source_value,
+                line_count=line_count,
                 partial_parse=bool(entry.get("partial_parse", False)),
             )
         except (KeyError, TypeError, ValueError) as exc:
@@ -91,7 +146,8 @@ def get_token_count(self, path: Path, file_hash: str) -> int | None:
         entry = self._data.get(key)
         if entry is None or entry.get("file_hash") != file_hash:
             return None
-        return int(entry.get("count", 0))  # type: ignore[arg-type]
+        count_value = entry.get("count", 0)
+        return _coerce_int(count_value)
 
     def put_token_count(self, path: Path, file_hash: str, count: int) -> None:
         """Cache a token count."""
@@ -150,3 +206,52 @@ def file_hash(path: Path) -> str:
         return hashlib.md5(content).hexdigest()  # noqa: S324
     except OSError:
         return ""
+
+
+def _decode_children(children: list[Any] | tuple[Any, ...]) -> tuple[Symbol, ...]:
+    decoded: list[Symbol] = []
+    for child in children:
+        if not isinstance(child, dict):
+            continue
+        name = child.get("name")
+        kind = child.get("kind")
+        signature = child.get("signature")
+        docstring = child.get("docstring")
+        start_line = child.get("start_line")
+        end_line = child.get("end_line")
+        if not (
+            isinstance(name, str)
+            and isinstance(kind, str)
+            and isinstance(signature, str)
+            and isinstance(docstring, str)
+            and isinstance(start_line, int)
+            and isinstance(end_line, int)
+        ):
+            continue
+        decoded.append(
+            Symbol(
+                name=name,
+                kind=kind,
+                signature=signature,
+                docstring=docstring,
+                start_line=start_line,
+                end_line=end_line,
+                children=(),
+            )
+        )
+    return tuple(decoded)
+
+
+def _coerce_int(value: object) -> int | None:
+    if isinstance(value, bool):
+        return int(value)
+    if isinstance(value, int):
+        return value
+    if isinstance(value, float):
+        return int(value)
+    if isinstance(value, str):
+        try:
+            return int(value)
+        except ValueError:
+            return None
+    return None
@@ -311,6 +311,7 @@ def search(
 
         from codectx.cache import Cache
         from codectx.config.loader import load_config
+        from codectx.parser.base import ParseResult
         from codectx.parser.treesitter import parse_files
         from codectx.walker import walk
 
@@ -319,7 +320,7 @@ def search(
 
         # Parse files with cache
         cache = Cache(config.root)
-        parse_results = {}
+        parse_results: dict[Path, ParseResult] = {}
         uncached_files: list[Path] = []
         for f in files:
             try:
@@ -480,6 +481,7 @@ def _run_pipeline(config: object) -> PipelineMetrics:
     from codectx.config.loader import Config
     from codectx.graph.builder import build_dependency_graph
     from codectx.output.formatter import format_context, write_context_file
+    from codectx.parser.base import ParseResult
     from codectx.parser.treesitter import parse_files
     from codectx.ranker.git_meta import collect_git_metadata, collect_recent_changes
     from codectx.ranker.scorer import score_files
@@ -523,7 +525,7 @@ def _run_pipeline(config: object) -> PipelineMetrics:
         progress.update(task, description="Parsing files...")
         cache = Cache(config.root)
 
-        parse_results = {}
+        parse_results: dict[Path, ParseResult] = {}
         uncached_files: list[Path] = []
 
         for f in files:
@@ -632,7 +634,7 @@ def _run_pipeline(config: object) -> PipelineMetrics:
 
     from codectx.compressor.budget import count_tokens
 
-    original_tokens = sum(count_tokens(pr.raw_source) for pr in parse_results.values() if pr)
+    original_tokens = sum(count_tokens(pr.raw_source) for pr in parse_results.values())
 
     return PipelineMetrics(
         output_path=output_path,
 
@@ -9,9 +9,14 @@
 import logging
 from concurrent.futures import ThreadPoolExecutor
 from pathlib import Path
+from typing import TYPE_CHECKING, Any
 
 from codectx.parser.base import ParseResult
 
+if TYPE_CHECKING:
+    import anthropic as anthropic_module
+    import openai as openai_module
+
 logger = logging.getLogger(__name__)
 
 _PROMPT_TEMPLATE = (
@@ -25,17 +30,21 @@
 # Track availability of LLM providers
 _HAS_OPENAI = False
 _HAS_ANTHROPIC = False
+openai: Any | None = None
+anthropic: Any | None = None
 
 try:
-    import openai  # noqa: F401
+    import openai as openai_module
 
+    openai = openai_module
     _HAS_OPENAI = True
 except ImportError:
     pass
 
 try:
-    import anthropic  # noqa: F401
+    import anthropic as anthropic_module
 
+    anthropic = anthropic_module
     _HAS_ANTHROPIC = True
 except ImportError:
     pass
@@ -113,32 +122,31 @@ def _do_one(pr: ParseResult) -> tuple[Path, str]:
 
 def _summarize_openai(prompt: str, model: str) -> str:
     """Call OpenAI API for summarization."""
-    if not _HAS_OPENAI:
+    if not _HAS_OPENAI or openai is None:
         raise ImportError("openai is not installed. Install with: pip install codectx[llm]")
 
-    import openai
-
     client = openai.OpenAI()
     response = client.chat.completions.create(
         model=model,
         messages=[{"role": "user", "content": prompt}],
         max_tokens=100,
         temperature=0.0,
     )
-    return (response.choices[0].message.content or "").strip()
+    content = response.choices[0].message.content
+    return str(content or "").strip()
 
 
 def _summarize_anthropic(prompt: str, model: str) -> str:
     """Call Anthropic API for summarization."""
-    if not _HAS_ANTHROPIC:
+    if not _HAS_ANTHROPIC or anthropic is None:
         raise ImportError("anthropic is not installed. Install with: pip install codectx[llm]")
 
-    import anthropic
-
     client = anthropic.Anthropic()
     response = client.messages.create(
         model=model,
         max_tokens=100,
         messages=[{"role": "user", "content": prompt}],
     )
-    return response.content[0].text.strip()
+    block = response.content[0] if response.content else None
+    text = getattr(block, "text", "") if block is not None else ""
+    return str(text).strip()