diff --git a/README.md b/README.md index ef1dc89..4a11e1f 100644 --- a/README.md +++ b/README.md @@ -112,6 +112,19 @@ sia-code index --clean View config: `sia-code config show` +**Git worktrees:** by default, sia-code auto-detects worktrees and stores a single shared index in the git common dir. You can override with `SIA_CODE_INDEX_SCOPE` or set an explicit path with `SIA_CODE_INDEX_DIR`. + +```bash +# Force shared index even outside worktrees +export SIA_CODE_INDEX_SCOPE=shared + +# Or disable auto-detection (per-worktree index) +export SIA_CODE_INDEX_SCOPE=worktree + +sia-code init +sia-code index . +``` + **AI Summarization** (optional, enhances git changelogs): ```json diff --git a/docs/CLI_FEATURES.md b/docs/CLI_FEATURES.md index 7adc630..7a61124 100644 --- a/docs/CLI_FEATURES.md +++ b/docs/CLI_FEATURES.md @@ -79,6 +79,13 @@ sia-code init [OPTIONS] | `--path PATH` | Directory to initialize | `.` (current directory) | | `--dry-run` | Preview project analysis without creating index | `false` | +**Environment variables:** + +| Variable | Description | +|----------|-------------| +| `SIA_CODE_INDEX_DIR` | Override the index directory (absolute or project-relative) | +| `SIA_CODE_INDEX_SCOPE` | Set to `shared` to reuse one index across git worktrees | + **Examples:** ```bash @@ -100,7 +107,7 @@ sia-code init --dry-run - Finds documentation files - Determines recommended search strategy -2. **Creates `.sia-code/` directory:** +2. **Creates index directory:** - `config.json` - Configuration with auto-detected settings - `vectors.usearch` - HNSW vector index (created empty) - `index.db` - SQLite database with FTS5 (created empty) diff --git a/docs/MEMORY_FEATURES.md b/docs/MEMORY_FEATURES.md index 787a241..84e98d5 100644 --- a/docs/MEMORY_FEATURES.md +++ b/docs/MEMORY_FEATURES.md @@ -9,6 +9,7 @@ Memory features help teams: - **Understand history** - What changed and when? - **Share context** - Export/import decisions across teams - **Maintain documentation** - Auto-generate from git history +- **Preserve timing** - Record commit hashes/timestamps for traceability ## Features @@ -57,6 +58,8 @@ Auto-extracted from git commit history. - Breaking changes - Major refactors +Each event stores the source commit hash and timestamp for auditability. + **Example:** ```python # Auto-extracted from git log diff --git a/sia_code/cli.py b/sia_code/cli.py index d8878fa..528d9d9 100644 --- a/sia_code/cli.py +++ b/sia_code/cli.py @@ -1,7 +1,10 @@ """CLI entry point for Sia Code.""" +import os import sys import logging +import subprocess +from datetime import datetime from pathlib import Path import click @@ -128,6 +131,117 @@ def create_backend(index_path: Path, config: Config, valid_chunks=None): ) +def resolve_git_common_dir(base_dir: Path) -> Path | None: + """Return git common dir path if available for a repository.""" + try: + result = subprocess.run( + ["git", "rev-parse", "--git-common-dir"], + cwd=base_dir, + check=True, + capture_output=True, + text=True, + ) + except (OSError, subprocess.CalledProcessError): + return None + + output = result.stdout.strip() + if not output: + return None + + common_dir = Path(output) + if not common_dir.is_absolute(): + common_dir = (base_dir / common_dir).resolve() + return common_dir + + +def is_git_worktree(base_dir: Path) -> bool: + """Return True when base_dir is inside a git worktree. + + In a normal repo, `--git-dir` and `--git-common-dir` resolve to the same path. + In a linked worktree, the worktree's git dir differs from the common dir. + """ + + try: + git_dir_result = subprocess.run( + ["git", "rev-parse", "--git-dir"], + cwd=base_dir, + check=True, + capture_output=True, + text=True, + ) + common_dir_result = subprocess.run( + ["git", "rev-parse", "--git-common-dir"], + cwd=base_dir, + check=True, + capture_output=True, + text=True, + ) + except (OSError, subprocess.CalledProcessError): + return False + + git_dir_raw = git_dir_result.stdout.strip() + common_dir_raw = common_dir_result.stdout.strip() + if not git_dir_raw or not common_dir_raw: + return False + + git_dir = Path(git_dir_raw) + if not git_dir.is_absolute(): + git_dir = (base_dir / git_dir).resolve() + + common_dir = Path(common_dir_raw) + if not common_dir.is_absolute(): + common_dir = (base_dir / common_dir).resolve() + + return git_dir != common_dir + + +def get_git_commit_context(base_dir: Path) -> tuple[str | None, datetime | None]: + """Return the current git commit hash and commit time for a directory.""" + try: + commit_result = subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=base_dir, + check=True, + capture_output=True, + text=True, + ) + time_result = subprocess.run( + ["git", "show", "-s", "--format=%cI", "HEAD"], + cwd=base_dir, + check=True, + capture_output=True, + text=True, + ) + except (OSError, subprocess.CalledProcessError): + return None, None + + commit_hash = commit_result.stdout.strip() or None + commit_time_raw = time_result.stdout.strip() + commit_time = datetime.fromisoformat(commit_time_raw) if commit_time_raw else None + return commit_hash, commit_time + + +def resolve_index_dir(project_dir: Path | None = None) -> Path: + """Resolve the index directory, honoring environment overrides.""" + base_dir = project_dir or Path(".") + override = os.environ.get("SIA_CODE_INDEX_DIR") + if override: + override_path = Path(override) + if override_path.is_absolute(): + return override_path + return base_dir / override_path + + scope = os.environ.get("SIA_CODE_INDEX_SCOPE") + if not scope or scope == "auto": + scope = "shared" if is_git_worktree(base_dir) else "worktree" + if scope == "shared": + common_dir = resolve_git_common_dir(base_dir) + if common_dir is not None: + return common_dir / "sia-code" + + return base_dir / ".sia-code" + + def require_initialized() -> tuple[Path, Config]: """Ensure Sia Code is initialized, return sia_dir and config. @@ -137,7 +251,7 @@ def require_initialized() -> tuple[Path, Config]: Raises: SystemExit: If .sia-code directory doesn't exist """ - sia_dir = Path(".sia-code") + sia_dir = resolve_index_dir() if not sia_dir.exists(): console.print("[red]Error: Sia Code not initialized. Run 'sia-code init' first.[/red]") sys.exit(1) @@ -164,7 +278,7 @@ def init(path: str, dry_run: bool): from .indexer.project_analyzer import ProjectAnalyzer project_dir = Path(path) - sia_dir = project_dir / ".sia-code" + sia_dir = resolve_index_dir(project_dir) if sia_dir.exists() and not dry_run: console.print(f"[yellow]Sia Code already initialized at {sia_dir}[/yellow]") @@ -190,7 +304,7 @@ def init(path: str, dry_run: bool): console.print("\n[yellow]Dry run complete. No index created.[/yellow]") return - # Create .sia-code directory + # Create index directory sia_dir.mkdir(parents=True, exist_ok=True) (sia_dir / "cache").mkdir(exist_ok=True) @@ -1413,12 +1527,16 @@ def memory_add_decision(title, description, reasoning, alternatives): session_id = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + commit_hash, commit_time = get_git_commit_context(Path(".")) + decision_id = backend.add_decision( session_id=session_id, title=title, description=description, reasoning=reasoning, alternatives=alt_list, + commit_hash=commit_hash, + commit_time=commit_time, ) console.print(f"[green]✓[/green] Created decision #{decision_id}: {title}") @@ -1821,9 +1939,7 @@ def memory_changelog(range, output_format, output): @memory.command(name="export") -@click.option( - "-o", "--output", type=click.Path(), default=".sia-code/memory.json", help="Output file" -) +@click.option("-o", "--output", type=click.Path(), default=None, help="Output file") def memory_export(output): """Export memory to JSON file. @@ -1834,6 +1950,8 @@ def memory_export(output): backend.open_index() try: + if output is None: + output = str(sia_dir / "memory.json") export_path = backend.export_memory(include_pending=True) # Copy to specified output location if different @@ -1855,7 +1973,7 @@ def memory_export(output): "--input", "input_file", type=click.Path(exists=True), - default=".sia-code/memory.json", + default=None, help="Input file", ) def memory_import(input_file): @@ -1868,6 +1986,8 @@ def memory_import(input_file): backend.open_index(writable=True) try: + if input_file is None: + input_file = str(sia_dir / "memory.json") result = backend.import_memory(input_file) console.print("[green]✓[/green] Import complete") diff --git a/sia_code/core/models.py b/sia_code/core/models.py index 171bb14..d4759c4 100644 --- a/sia_code/core/models.py +++ b/sia_code/core/models.py @@ -166,6 +166,8 @@ class Decision: alternatives: list[dict[str, Any]] = field(default_factory=list) status: str = "pending" # 'pending', 'approved', 'rejected' category: str | None = None # Set when approved + commit_hash: str | None = None + commit_time: datetime | None = None created_at: datetime | None = None approved_at: datetime | None = None @@ -180,6 +182,8 @@ def to_dict(self) -> dict[str, Any]: "alternatives": self.alternatives, "status": self.status, "category": self.category, + "commit_hash": self.commit_hash, + "commit_time": self.commit_time.isoformat() if self.commit_time else None, "created_at": self.created_at.isoformat() if self.created_at else None, "approved_at": self.approved_at.isoformat() if self.approved_at else None, } @@ -197,6 +201,8 @@ class TimelineEvent: files_changed: list[str] = field(default_factory=list) diff_stats: dict[str, Any] = field(default_factory=dict) importance: str = "medium" # 'high', 'medium', 'low' + commit_hash: str | None = None + commit_time: datetime | None = None created_at: datetime | None = None def to_dict(self) -> dict[str, Any]: @@ -210,6 +216,8 @@ def to_dict(self) -> dict[str, Any]: "files_changed": self.files_changed, "diff_stats": self.diff_stats, "importance": self.importance, + "commit_hash": self.commit_hash, + "commit_time": self.commit_time.isoformat() if self.commit_time else None, "created_at": self.created_at.isoformat() if self.created_at else None, } @@ -226,6 +234,8 @@ class ChangelogEntry: breaking_changes: list[str] = field(default_factory=list) features: list[str] = field(default_factory=list) fixes: list[str] = field(default_factory=list) + commit_hash: str | None = None + commit_time: datetime | None = None created_at: datetime | None = None def to_dict(self) -> dict[str, Any]: @@ -239,6 +249,8 @@ def to_dict(self) -> dict[str, Any]: "breaking_changes": self.breaking_changes, "features": self.features, "fixes": self.fixes, + "commit_hash": self.commit_hash, + "commit_time": self.commit_time.isoformat() if self.commit_time else None, "created_at": self.created_at.isoformat() if self.created_at else None, } diff --git a/sia_code/indexer/coordinator.py b/sia_code/indexer/coordinator.py index 40160ae..2c9f285 100644 --- a/sia_code/indexer/coordinator.py +++ b/sia_code/indexer/coordinator.py @@ -5,6 +5,7 @@ import time import os import shutil +import subprocess from concurrent.futures import ProcessPoolExecutor, as_completed from typing import Callable @@ -21,6 +22,42 @@ logger = logging.getLogger(__name__) +def _get_git_commit_context(directory: Path) -> dict[str, str]: + try: + commit_result = subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=directory, + check=True, + capture_output=True, + text=True, + ) + time_result = subprocess.run( + ["git", "show", "-s", "--format=%cI", "HEAD"], + cwd=directory, + check=True, + capture_output=True, + text=True, + ) + except (OSError, subprocess.CalledProcessError): + return {} + + commit_hash = commit_result.stdout.strip() + commit_time = time_result.stdout.strip() + if not commit_hash: + return {} + + context = {"commit_hash": commit_hash} + if commit_time: + context["commit_time"] = commit_time + return context + + +def _attach_git_context(chunks: list, context: dict[str, str]) -> list: + if not context: + return chunks + return [chunk.with_metadata(context) for chunk in chunks] + + def _chunk_file_worker( file_path: Path, chunking_config: CASTConfig ) -> tuple[Path, list, str | None, int]: @@ -162,6 +199,8 @@ def index_directory( stats = self._create_index_stats(len(files)) + git_context = _get_git_commit_context(directory) + # Buffer chunks to reduce write overhead pending_chunks: list = [] batch_size = max(1, self.config.indexing.chunk_batch_size) @@ -205,7 +244,7 @@ def flush_chunks() -> None: pass # Buffer chunks and flush when threshold reached - pending_chunks.extend(chunks) + pending_chunks.extend(_attach_git_context(chunks, git_context)) if len(pending_chunks) >= batch_size: flush_chunks() stats["indexed_files"] += 1 @@ -300,6 +339,8 @@ def index_directory_parallel( embed_batch = self.backend._get_embed_batch_size() batch_size = min(batch_size, max(1, embed_batch * 8)) + git_context = _get_git_commit_context(directory) + def flush_chunks() -> None: if pending_chunks: self.backend.store_chunks_batch(pending_chunks) @@ -335,7 +376,7 @@ def flush_chunks() -> None: metrics.bytes_processed += file_size # Buffer chunks and flush when threshold reached - pending_chunks.extend(chunks) + pending_chunks.extend(_attach_git_context(chunks, git_context)) if len(pending_chunks) >= batch_size: flush_chunks() stats["indexed_files"] += 1 @@ -441,6 +482,7 @@ def index_directory_incremental_v2( # Add incremental-specific fields stats["changed_files"] = 0 stats["skipped_files"] = 0 + git_context = _get_git_commit_context(directory) for idx, file_path in enumerate(files, 1): # Update progress for checking phase @@ -481,6 +523,7 @@ def index_directory_incremental_v2( metrics.bytes_processed += file_stat.st_size # Store new chunks + chunks = _attach_git_context(chunks, git_context) chunk_ids = self.backend.store_chunks_batch(chunks) chunk_id_strs = [str(cid) for cid in chunk_ids] diff --git a/sia_code/memory/git_events.py b/sia_code/memory/git_events.py index 6da8435..b828a38 100644 --- a/sia_code/memory/git_events.py +++ b/sia_code/memory/git_events.py @@ -61,6 +61,8 @@ def scan_git_tags(self) -> list[dict[str, Any]]: "breaking_changes": self._extract_breaking_changes(tag_message), "features": self._extract_features(tag_message), "fixes": self._extract_fixes(tag_message), + "commit_hash": tag.commit.hexsha, + "commit_time": tag.commit.committed_datetime, } changelogs.append(changelog) @@ -125,6 +127,8 @@ def scan_merge_events(self, since: str | None = None, limit: int = 50) -> list[d "diff_stats": diff_stats, "importance": self._determine_importance(diff_stats), "created_at": commit.committed_datetime, + "commit_hash": commit.hexsha, + "commit_time": commit.committed_datetime, "merge_commit": commit, # Include for summarization } diff --git a/sia_code/memory/git_sync.py b/sia_code/memory/git_sync.py index 6b6074b..599abca 100644 --- a/sia_code/memory/git_sync.py +++ b/sia_code/memory/git_sync.py @@ -121,6 +121,8 @@ def sync( breaking_changes=changelog_data.get("breaking_changes", []), features=changelog_data.get("features", []), fixes=changelog_data.get("fixes", []), + commit_hash=changelog_data.get("commit_hash"), + commit_time=changelog_data.get("commit_time"), ) stats.changelogs_added += 1 @@ -176,6 +178,8 @@ def sync( files_changed=event_data.get("files_changed", []), diff_stats=event_data.get("diff_stats", {}), importance=event_importance, + commit_hash=event_data.get("commit_hash"), + commit_time=event_data.get("commit_time"), ) stats.timeline_added += 1 diff --git a/sia_code/storage/base.py b/sia_code/storage/base.py index ee6fa66..b35f050 100644 --- a/sia_code/storage/base.py +++ b/sia_code/storage/base.py @@ -1,6 +1,7 @@ """Abstract base class for storage backends.""" from abc import ABC, abstractmethod +from datetime import datetime from pathlib import Path from typing import Any @@ -150,6 +151,8 @@ def add_decision( description: str, reasoning: str | None = None, alternatives: list[dict[str, Any]] | None = None, + commit_hash: str | None = None, + commit_time: datetime | None = None, ) -> int: """Add a pending decision. @@ -161,6 +164,8 @@ def add_decision( description: Full decision context reasoning: Why this decision was made alternatives: Other options considered + commit_hash: Git commit hash at time of decision + commit_time: Commit timestamp for context Returns: Decision ID @@ -227,6 +232,8 @@ def add_timeline_event( files_changed: list[str] | None = None, diff_stats: dict[str, Any] | None = None, importance: str = "medium", + commit_hash: str | None = None, + commit_time: datetime | None = None, ) -> int: """Add a timeline event. @@ -238,6 +245,8 @@ def add_timeline_event( files_changed: List of affected files diff_stats: Statistics about the diff importance: 'high', 'medium', 'low' + commit_hash: Git commit hash for the event + commit_time: Git commit timestamp Returns: Timeline event ID @@ -253,6 +262,8 @@ def add_changelog( breaking_changes: list[str] | None = None, features: list[str] | None = None, fixes: list[str] | None = None, + commit_hash: str | None = None, + commit_time: datetime | None = None, ) -> int: """Add a changelog entry. @@ -263,6 +274,8 @@ def add_changelog( breaking_changes: List of breaking changes features: List of new features fixes: List of bug fixes + commit_hash: Git commit hash for the tag + commit_time: Git commit timestamp Returns: Changelog entry ID diff --git a/sia_code/storage/sqlite_vec_backend.py b/sia_code/storage/sqlite_vec_backend.py index 7435b68..fec85c9 100644 --- a/sia_code/storage/sqlite_vec_backend.py +++ b/sia_code/storage/sqlite_vec_backend.py @@ -553,6 +553,9 @@ def open_index(self, writable: bool = False) -> None: # Open SQLite database (check_same_thread=False for parallel search) self.conn = connect_sqlite(self.db_path, check_same_thread=False) self._vector_table_initialized = False + + # Ensure schema is up to date for older indexes + self._create_tables() if writable: self._ensure_vector_table() @@ -586,6 +589,12 @@ def _create_tables(self) -> None: cursor = self.conn.cursor() + def ensure_column(table: str, column: str, column_type: str) -> None: + cursor.execute(f"PRAGMA table_info({table})") + existing = {row["name"] for row in cursor.fetchall()} + if column not in existing: + cursor.execute(f"ALTER TABLE {table} ADD COLUMN {column} {column_type}") + # Code chunks table cursor.execute( """ @@ -657,6 +666,8 @@ def _create_tables(self) -> None: files_changed JSON, diff_stats JSON, importance TEXT DEFAULT 'medium', + commit_hash TEXT, + commit_time TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) """ @@ -674,6 +685,8 @@ def _create_tables(self) -> None: breaking_changes JSON, features JSON, fixes JSON, + commit_hash TEXT, + commit_time TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) """ @@ -691,12 +704,22 @@ def _create_tables(self) -> None: alternatives JSON, status TEXT DEFAULT 'pending', category TEXT, + commit_hash TEXT, + commit_time TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, approved_at TIMESTAMP ) """ ) + # Backward-compatible schema upgrades + ensure_column("timeline", "commit_hash", "TEXT") + ensure_column("timeline", "commit_time", "TIMESTAMP") + ensure_column("changelogs", "commit_hash", "TEXT") + ensure_column("changelogs", "commit_time", "TIMESTAMP") + ensure_column("decisions", "commit_hash", "TEXT") + ensure_column("decisions", "commit_time", "TIMESTAMP") + # FIFO trigger for decisions (delete oldest when >100 pending) cursor.execute( """ @@ -1397,6 +1420,8 @@ def add_decision( description: str, reasoning: str | None = None, alternatives: list[dict[str, Any]] | None = None, + commit_hash: str | None = None, + commit_time: datetime | None = None, ) -> int: """Add a pending decision (FIFO auto-cleanup when >100). @@ -1416,8 +1441,16 @@ def add_decision( cursor = self.conn.cursor() cursor.execute( """ - INSERT INTO decisions (session_id, title, description, reasoning, alternatives) - VALUES (?, ?, ?, ?, ?) + INSERT INTO decisions ( + session_id, + title, + description, + reasoning, + alternatives, + commit_hash, + commit_time + ) + VALUES (?, ?, ?, ?, ?, ?, ?) """, ( session_id, @@ -1425,6 +1458,8 @@ def add_decision( description, reasoning, json.dumps(alternatives or []), + commit_hash, + commit_time.isoformat() if commit_time else None, ), ) decision_id = cursor.lastrowid @@ -1530,7 +1565,7 @@ def list_pending_decisions(self, limit: int = 20) -> list[Decision]: cursor.execute( """ SELECT id, session_id, title, description, reasoning, alternatives, - status, category, created_at, approved_at + status, category, commit_hash, commit_time, created_at, approved_at FROM decisions WHERE status = 'pending' ORDER BY created_at ASC @@ -1551,6 +1586,10 @@ def list_pending_decisions(self, limit: int = 20) -> list[Decision]: alternatives=json.loads(row["alternatives"]) if row["alternatives"] else [], status=row["status"], category=row["category"], + commit_hash=row["commit_hash"], + commit_time=datetime.fromisoformat(row["commit_time"]) + if row["commit_time"] + else None, created_at=datetime.fromisoformat(row["created_at"]) if row["created_at"] else None, @@ -1578,7 +1617,7 @@ def get_decision(self, decision_id: int) -> Decision | None: cursor.execute( """ SELECT id, session_id, title, description, reasoning, alternatives, - status, category, created_at, approved_at + status, category, commit_hash, commit_time, created_at, approved_at FROM decisions WHERE id = ? """, @@ -1598,6 +1637,8 @@ def get_decision(self, decision_id: int) -> Decision | None: alternatives=json.loads(row["alternatives"]) if row["alternatives"] else [], status=row["status"], category=row["category"], + commit_hash=row["commit_hash"], + commit_time=datetime.fromisoformat(row["commit_time"]) if row["commit_time"] else None, created_at=datetime.fromisoformat(row["created_at"]) if row["created_at"] else None, approved_at=datetime.fromisoformat(row["approved_at"]) if row["approved_at"] else None, ) @@ -1615,6 +1656,8 @@ def add_timeline_event( files_changed: list[str] | None = None, diff_stats: dict[str, Any] | None = None, importance: str = "medium", + commit_hash: str | None = None, + commit_time: datetime | None = None, ) -> int: """Add a timeline event. @@ -1636,8 +1679,10 @@ def add_timeline_event( cursor = self.conn.cursor() cursor.execute( """ - INSERT INTO timeline (event_type, from_ref, to_ref, summary, files_changed, diff_stats, importance) - VALUES (?, ?, ?, ?, ?, ?, ?) + INSERT INTO timeline ( + event_type, from_ref, to_ref, summary, files_changed, diff_stats, importance, commit_hash, commit_time + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( event_type, @@ -1647,6 +1692,8 @@ def add_timeline_event( json.dumps(files_changed or []), json.dumps(diff_stats or {}), importance, + commit_hash, + commit_time.isoformat() if commit_time else None, ), ) timeline_id = cursor.lastrowid @@ -1668,6 +1715,8 @@ def add_changelog( breaking_changes: list[str] | None = None, features: list[str] | None = None, fixes: list[str] | None = None, + commit_hash: str | None = None, + commit_time: datetime | None = None, ) -> int: """Add a changelog entry. @@ -1688,8 +1737,10 @@ def add_changelog( cursor = self.conn.cursor() cursor.execute( """ - INSERT INTO changelogs (tag, version, summary, breaking_changes, features, fixes, date) - VALUES (?, ?, ?, ?, ?, ?, ?) + INSERT INTO changelogs ( + tag, version, summary, breaking_changes, features, fixes, date, commit_hash, commit_time + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( tag, @@ -1699,6 +1750,8 @@ def add_changelog( json.dumps(features or []), json.dumps(fixes or []), datetime.now().isoformat(), + commit_hash, + commit_time.isoformat() if commit_time else None, ), ) changelog_id = cursor.lastrowid @@ -1746,7 +1799,8 @@ def get_timeline_events( cursor.execute( f""" - SELECT id, event_type, from_ref, to_ref, summary, files_changed, diff_stats, importance, created_at + SELECT id, event_type, from_ref, to_ref, summary, files_changed, diff_stats, importance, + commit_hash, commit_time, created_at FROM timeline {where_clause} ORDER BY created_at DESC @@ -1767,6 +1821,10 @@ def get_timeline_events( files_changed=json.loads(row["files_changed"]) if row["files_changed"] else [], diff_stats=json.loads(row["diff_stats"]) if row["diff_stats"] else {}, importance=row["importance"], + commit_hash=row["commit_hash"], + commit_time=datetime.fromisoformat(row["commit_time"]) + if row["commit_time"] + else None, created_at=datetime.fromisoformat(row["created_at"]) if row["created_at"] else None, @@ -1790,7 +1848,8 @@ def get_changelogs(self, limit: int = 20) -> list[ChangelogEntry]: cursor = self.conn.cursor() cursor.execute( """ - SELECT id, tag, version, date, summary, breaking_changes, features, fixes, created_at + SELECT id, tag, version, date, summary, breaking_changes, features, fixes, + commit_hash, commit_time, created_at FROM changelogs ORDER BY date DESC LIMIT ? @@ -1812,6 +1871,10 @@ def get_changelogs(self, limit: int = 20) -> list[ChangelogEntry]: else [], features=json.loads(row["features"]) if row["features"] else [], fixes=json.loads(row["fixes"]) if row["fixes"] else [], + commit_hash=row["commit_hash"], + commit_time=datetime.fromisoformat(row["commit_time"]) + if row["commit_time"] + else None, created_at=datetime.fromisoformat(row["created_at"]) if row["created_at"] else None, @@ -2037,7 +2100,7 @@ def export_memory( cursor = self.conn.cursor() cursor.execute( """ - SELECT id, session_id, title, description, reasoning, category, approved_at + SELECT id, session_id, title, description, reasoning, category, commit_hash, commit_time, approved_at FROM decisions WHERE status = 'approved' ORDER BY approved_at DESC @@ -2052,6 +2115,8 @@ def export_memory( "description": row["description"], "reasoning": row["reasoning"], "category": row["category"], + "commit_hash": row["commit_hash"], + "commit_time": row["commit_time"], "approved_at": row["approved_at"], } ) @@ -2131,6 +2196,10 @@ def import_memory( files_changed=event_data.get("files_changed", []), diff_stats=event_data.get("diff_stats", {}), importance=event_data.get("importance", "medium"), + commit_hash=event_data.get("commit_hash"), + commit_time=datetime.fromisoformat(event_data["commit_time"]) + if event_data.get("commit_time") + else None, ) result.added += 1 @@ -2150,6 +2219,10 @@ def import_memory( breaking_changes=changelog_data.get("breaking_changes", []), features=changelog_data.get("features", []), fixes=changelog_data.get("fixes", []), + commit_hash=changelog_data.get("commit_hash"), + commit_time=datetime.fromisoformat(changelog_data["commit_time"]) + if changelog_data.get("commit_time") + else None, ) result.added += 1 @@ -2169,6 +2242,10 @@ def import_memory( title=decision_data["title"], description=decision_data["description"], reasoning=decision_data.get("reasoning"), + commit_hash=decision_data.get("commit_hash"), + commit_time=datetime.fromisoformat(decision_data["commit_time"]) + if decision_data.get("commit_time") + else None, ) # Immediately approve it self.approve_decision(decision_id, decision_data.get("category", "imported")) diff --git a/sia_code/storage/usearch_backend.py b/sia_code/storage/usearch_backend.py index 3aaa42a..8114582 100644 --- a/sia_code/storage/usearch_backend.py +++ b/sia_code/storage/usearch_backend.py @@ -456,6 +456,10 @@ def open_index(self, writable: bool = False) -> None: # Open SQLite database (check_same_thread=False for parallel search) self.conn = connect_sqlite(self.db_path, check_same_thread=False) + # Ensure schema migrations are applied before any writes + if writable: + self._create_tables() + def close(self) -> None: """Close the index and save changes.""" if self.vector_index is not None: @@ -499,6 +503,12 @@ def _create_tables(self) -> None: cursor = self.conn.cursor() + def ensure_column(table: str, column: str, column_type: str) -> None: + cursor.execute(f"PRAGMA table_info({table})") + existing = {row["name"] for row in cursor.fetchall()} + if column not in existing: + cursor.execute(f"ALTER TABLE {table} ADD COLUMN {column} {column_type}") + # Code chunks table cursor.execute( """ @@ -570,6 +580,8 @@ def _create_tables(self) -> None: files_changed JSON, diff_stats JSON, importance TEXT DEFAULT 'medium', + commit_hash TEXT, + commit_time TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) """ @@ -587,6 +599,8 @@ def _create_tables(self) -> None: breaking_changes JSON, features JSON, fixes JSON, + commit_hash TEXT, + commit_time TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) """ @@ -604,6 +618,8 @@ def _create_tables(self) -> None: alternatives JSON, status TEXT DEFAULT 'pending', category TEXT, + commit_hash TEXT, + commit_time TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, approved_at TIMESTAMP ) @@ -651,6 +667,13 @@ def _create_tables(self) -> None: """ ) + ensure_column("timeline", "commit_hash", "TEXT") + ensure_column("timeline", "commit_time", "TIMESTAMP") + ensure_column("changelogs", "commit_hash", "TEXT") + ensure_column("changelogs", "commit_time", "TIMESTAMP") + ensure_column("decisions", "commit_hash", "TEXT") + ensure_column("decisions", "commit_time", "TIMESTAMP") + self.conn.commit() # The rest of the methods will be added in subsequent parts... @@ -1320,6 +1343,8 @@ def add_decision( description: str, reasoning: str | None = None, alternatives: list[dict[str, Any]] | None = None, + commit_hash: str | None = None, + commit_time: datetime | None = None, ) -> int: """Add a pending decision (FIFO auto-cleanup when >100). @@ -1339,8 +1364,16 @@ def add_decision( cursor = self.conn.cursor() cursor.execute( """ - INSERT INTO decisions (session_id, title, description, reasoning, alternatives) - VALUES (?, ?, ?, ?, ?) + INSERT INTO decisions ( + session_id, + title, + description, + reasoning, + alternatives, + commit_hash, + commit_time + ) + VALUES (?, ?, ?, ?, ?, ?, ?) """, ( session_id, @@ -1348,6 +1381,8 @@ def add_decision( description, reasoning, json.dumps(alternatives or []), + commit_hash, + commit_time.isoformat() if commit_time else None, ), ) decision_id = cursor.lastrowid @@ -1452,8 +1487,8 @@ def list_pending_decisions(self, limit: int = 20) -> list[Decision]: cursor = self.conn.cursor() cursor.execute( """ - SELECT id, session_id, title, description, reasoning, alternatives, - status, category, created_at, approved_at + SELECT id, session_id, title, description, reasoning, alternatives, + status, category, commit_hash, commit_time, created_at, approved_at FROM decisions WHERE status = 'pending' ORDER BY created_at ASC @@ -1474,6 +1509,10 @@ def list_pending_decisions(self, limit: int = 20) -> list[Decision]: alternatives=json.loads(row["alternatives"]) if row["alternatives"] else [], status=row["status"], category=row["category"], + commit_hash=row["commit_hash"], + commit_time=datetime.fromisoformat(row["commit_time"]) + if row["commit_time"] + else None, created_at=datetime.fromisoformat(row["created_at"]) if row["created_at"] else None, @@ -1501,7 +1540,7 @@ def get_decision(self, decision_id: int) -> Decision | None: cursor.execute( """ SELECT id, session_id, title, description, reasoning, alternatives, - status, category, created_at, approved_at + status, category, commit_hash, commit_time, created_at, approved_at FROM decisions WHERE id = ? """, @@ -1521,6 +1560,8 @@ def get_decision(self, decision_id: int) -> Decision | None: alternatives=json.loads(row["alternatives"]) if row["alternatives"] else [], status=row["status"], category=row["category"], + commit_hash=row["commit_hash"], + commit_time=datetime.fromisoformat(row["commit_time"]) if row["commit_time"] else None, created_at=datetime.fromisoformat(row["created_at"]) if row["created_at"] else None, approved_at=datetime.fromisoformat(row["approved_at"]) if row["approved_at"] else None, ) @@ -1538,6 +1579,8 @@ def add_timeline_event( files_changed: list[str] | None = None, diff_stats: dict[str, Any] | None = None, importance: str = "medium", + commit_hash: str | None = None, + commit_time: datetime | None = None, ) -> int: """Add a timeline event. @@ -1559,8 +1602,18 @@ def add_timeline_event( cursor = self.conn.cursor() cursor.execute( """ - INSERT INTO timeline (event_type, from_ref, to_ref, summary, files_changed, diff_stats, importance) - VALUES (?, ?, ?, ?, ?, ?, ?) + INSERT INTO timeline ( + event_type, + from_ref, + to_ref, + summary, + files_changed, + diff_stats, + importance, + commit_hash, + commit_time + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( event_type, @@ -1570,6 +1623,8 @@ def add_timeline_event( json.dumps(files_changed or []), json.dumps(diff_stats or {}), importance, + commit_hash, + commit_time.isoformat() if commit_time else None, ), ) timeline_id = cursor.lastrowid @@ -1591,6 +1646,8 @@ def add_changelog( breaking_changes: list[str] | None = None, features: list[str] | None = None, fixes: list[str] | None = None, + commit_hash: str | None = None, + commit_time: datetime | None = None, ) -> int: """Add a changelog entry. @@ -1611,8 +1668,18 @@ def add_changelog( cursor = self.conn.cursor() cursor.execute( """ - INSERT INTO changelogs (tag, version, summary, breaking_changes, features, fixes, date) - VALUES (?, ?, ?, ?, ?, ?, ?) + INSERT INTO changelogs ( + tag, + version, + summary, + breaking_changes, + features, + fixes, + date, + commit_hash, + commit_time + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( tag, @@ -1622,6 +1689,8 @@ def add_changelog( json.dumps(features or []), json.dumps(fixes or []), datetime.now().isoformat(), + commit_hash, + commit_time.isoformat() if commit_time else None, ), ) changelog_id = cursor.lastrowid @@ -1669,7 +1738,8 @@ def get_timeline_events( cursor.execute( f""" - SELECT id, event_type, from_ref, to_ref, summary, files_changed, diff_stats, importance, created_at + SELECT id, event_type, from_ref, to_ref, summary, files_changed, diff_stats, importance, + commit_hash, commit_time, created_at FROM timeline {where_clause} ORDER BY created_at DESC @@ -1690,6 +1760,10 @@ def get_timeline_events( files_changed=json.loads(row["files_changed"]) if row["files_changed"] else [], diff_stats=json.loads(row["diff_stats"]) if row["diff_stats"] else {}, importance=row["importance"], + commit_hash=row["commit_hash"], + commit_time=datetime.fromisoformat(row["commit_time"]) + if row["commit_time"] + else None, created_at=datetime.fromisoformat(row["created_at"]) if row["created_at"] else None, @@ -1713,7 +1787,8 @@ def get_changelogs(self, limit: int = 20) -> list[ChangelogEntry]: cursor = self.conn.cursor() cursor.execute( """ - SELECT id, tag, version, date, summary, breaking_changes, features, fixes, created_at + SELECT id, tag, version, date, summary, breaking_changes, features, fixes, + commit_hash, commit_time, created_at FROM changelogs ORDER BY date DESC LIMIT ? @@ -1735,6 +1810,10 @@ def get_changelogs(self, limit: int = 20) -> list[ChangelogEntry]: else [], features=json.loads(row["features"]) if row["features"] else [], fixes=json.loads(row["fixes"]) if row["fixes"] else [], + commit_hash=row["commit_hash"], + commit_time=datetime.fromisoformat(row["commit_time"]) + if row["commit_time"] + else None, created_at=datetime.fromisoformat(row["created_at"]) if row["created_at"] else None, @@ -1960,7 +2039,7 @@ def export_memory( cursor = self.conn.cursor() cursor.execute( """ - SELECT id, session_id, title, description, reasoning, category, approved_at + SELECT id, session_id, title, description, reasoning, category, commit_hash, commit_time, approved_at FROM decisions WHERE status = 'approved' ORDER BY approved_at DESC @@ -1975,6 +2054,8 @@ def export_memory( "description": row["description"], "reasoning": row["reasoning"], "category": row["category"], + "commit_hash": row["commit_hash"], + "commit_time": row["commit_time"], "approved_at": row["approved_at"], } ) @@ -2054,6 +2135,10 @@ def import_memory( files_changed=event_data.get("files_changed", []), diff_stats=event_data.get("diff_stats", {}), importance=event_data.get("importance", "medium"), + commit_hash=event_data.get("commit_hash"), + commit_time=datetime.fromisoformat(event_data["commit_time"]) + if event_data.get("commit_time") + else None, ) result.added += 1 @@ -2073,6 +2158,10 @@ def import_memory( breaking_changes=changelog_data.get("breaking_changes", []), features=changelog_data.get("features", []), fixes=changelog_data.get("fixes", []), + commit_hash=changelog_data.get("commit_hash"), + commit_time=datetime.fromisoformat(changelog_data["commit_time"]) + if changelog_data.get("commit_time") + else None, ) result.added += 1 @@ -2092,6 +2181,10 @@ def import_memory( title=decision_data["title"], description=decision_data["description"], reasoning=decision_data.get("reasoning"), + commit_hash=decision_data.get("commit_hash"), + commit_time=datetime.fromisoformat(decision_data["commit_time"]) + if decision_data.get("commit_time") + else None, ) # Immediately approve it self.approve_decision(decision_id, decision_data.get("category", "imported")) diff --git a/tests/test_basic.py b/tests/test_basic.py index db7ef17..2e8ab53 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -3,7 +3,7 @@ import pytest from sia_code.core.models import Chunk from sia_code.core.types import ChunkType, Language, FilePath, LineNumber -from sia_code.storage.backend import UsearchSqliteBackend +from sia_code.storage.usearch_backend import UsearchSqliteBackend @pytest.fixture @@ -51,7 +51,7 @@ def search_semantic(self, query: str, k: int = 10): return self._convert_results(results)""", chunk_type=ChunkType.CLASS, language=Language.PYTHON, - file_path=FilePath("sia_code/storage/backend.py"), + file_path=FilePath("sia_code/storage/usearch_backend.py"), ), Chunk( symbol="Chunk", @@ -82,7 +82,8 @@ def test_create_index(self, tmp_path): test_path = tmp_path / "test.sia-code" backend = UsearchSqliteBackend(test_path, embedding_enabled=False) backend.create_index() - assert backend.mem is not None + assert backend.vector_index is not None + assert backend.conn is not None backend.close() def test_store_chunks(self, backend, sample_chunks): @@ -201,46 +202,6 @@ def test_chunk_empty_code(self): ) -class TestURIParsing: - """Test URI parsing functionality.""" - - def test_parse_valid_uri(self, tmp_path): - """Test parsing valid pci:// URI.""" - test_path = tmp_path / "test.sia-code" - backend = UsearchSqliteBackend(test_path, embedding_enabled=False) - backend.create_index() - - file_path, start, end = backend._parse_uri("pci:///home/user/file.py#42") - assert file_path == "/home/user/file.py" - assert start == 42 - assert end == 42 - backend.close() - - def test_parse_uri_no_line(self, tmp_path): - """Test parsing URI without line number.""" - test_path = tmp_path / "test.sia-code" - backend = UsearchSqliteBackend(test_path, embedding_enabled=False) - backend.create_index() - - file_path, start, end = backend._parse_uri("pci:///home/user/file.py") - assert file_path == "/home/user/file.py" - assert start == 1 - assert end == 1 - backend.close() - - def test_parse_invalid_uri(self, tmp_path): - """Test parsing invalid URI returns defaults.""" - test_path = tmp_path / "test.sia-code" - backend = UsearchSqliteBackend(test_path, embedding_enabled=False) - backend.create_index() - - file_path, start, end = backend._parse_uri("invalid://something") - assert file_path == "unknown" - assert start == 1 - assert end == 1 - backend.close() - - class TestLanguageDetection: """Test language detection from file extensions.""" diff --git a/tests/test_cli_integration.py b/tests/test_cli_integration.py index 6b44b4d..035c80c 100644 --- a/tests/test_cli_integration.py +++ b/tests/test_cli_integration.py @@ -1,6 +1,8 @@ """Integration test for Sia Code CLI.""" import pytest +import json +import os import subprocess import sys from pathlib import Path @@ -67,6 +69,16 @@ def run_cli(args: list, cwd: Path | None = None) -> subprocess.CompletedProcess: ) +def disable_embeddings(project_dir: Path) -> None: + config_path = project_dir / ".sia-code" / "config.json" + if not config_path.exists(): + return + config = json.loads(config_path.read_text()) + config.setdefault("embedding", {}) + config["embedding"]["enabled"] = False + config_path.write_text(json.dumps(config, indent=2)) + + class TestCLIInit: """Test 'sia-code init' command.""" @@ -83,6 +95,7 @@ def test_init_already_initialized(self, test_project): """Test init when already initialized.""" # First init run_cli(["init"], cwd=test_project) + disable_embeddings(test_project) # Second init should warn result = run_cli(["init"], cwd=test_project) @@ -102,6 +115,7 @@ def test_status_not_initialized(self, test_project): def test_status_after_init(self, test_project): """Test status after initialization.""" run_cli(["init"], cwd=test_project) + disable_embeddings(test_project) result = run_cli(["status"], cwd=test_project) assert result.returncode == 0 @@ -120,6 +134,7 @@ def test_index_not_initialized(self, test_project): def test_index_basic(self, test_project): """Test basic indexing.""" run_cli(["init"], cwd=test_project) + disable_embeddings(test_project) result = run_cli(["index", "."], cwd=test_project) assert result.returncode == 0 @@ -128,6 +143,7 @@ def test_index_basic(self, test_project): def test_index_clean(self, test_project): """Test clean indexing.""" run_cli(["init"], cwd=test_project) + disable_embeddings(test_project) run_cli(["index", "."], cwd=test_project) result = run_cli(["index", "--clean", "."], cwd=test_project) @@ -160,6 +176,7 @@ def test_search_not_initialized(self, test_project): def test_search_lexical(self, test_project): """Test lexical search.""" run_cli(["init"], cwd=test_project) + disable_embeddings(test_project) run_cli(["index", "."], cwd=test_project) result = run_cli(["search", "hello", "--regex", "--no-filter"], cwd=test_project) @@ -169,6 +186,7 @@ def test_search_lexical(self, test_project): def test_search_with_limit(self, test_project): """Test search with result limit.""" run_cli(["init"], cwd=test_project) + disable_embeddings(test_project) run_cli(["index", "."], cwd=test_project) result = run_cli( @@ -180,6 +198,7 @@ def test_search_with_limit(self, test_project): def test_search_json_format(self, test_project): """Test search with JSON output format.""" run_cli(["init"], cwd=test_project) + disable_embeddings(test_project) run_cli(["index", "."], cwd=test_project) result = run_cli( @@ -193,6 +212,7 @@ def test_search_json_format(self, test_project): def test_search_table_format(self, test_project): """Test search with table output format.""" run_cli(["init"], cwd=test_project) + disable_embeddings(test_project) run_cli(["index", "."], cwd=test_project) result = run_cli( @@ -208,6 +228,7 @@ class TestCLIConfig: def test_config_show(self, test_project): """Test config show command.""" run_cli(["init"], cwd=test_project) + disable_embeddings(test_project) result = run_cli(["config", "show"], cwd=test_project) assert result.returncode == 0 @@ -217,6 +238,7 @@ def test_config_show(self, test_project): def test_config_path(self, test_project): """Test config path command.""" run_cli(["init"], cwd=test_project) + disable_embeddings(test_project) result = run_cli(["config", "path"], cwd=test_project) assert result.returncode == 0 @@ -237,6 +259,7 @@ def test_compact_not_initialized(self, test_project): def test_compact_healthy_index(self, test_project): """Test compact on healthy index.""" run_cli(["init"], cwd=test_project) + disable_embeddings(test_project) run_cli(["index", "."], cwd=test_project) # Need to run incremental index first to create chunk_index.json diff --git a/tests/test_empty_code_fix.py b/tests/test_empty_code_fix.py index 76a2972..ccaf9e9 100644 --- a/tests/test_empty_code_fix.py +++ b/tests/test_empty_code_fix.py @@ -20,20 +20,17 @@ class TestEmptyCodeHandling: """Test that empty code fields are handled gracefully.""" def test_store_chunk_with_empty_text(self, backend): - """Test storing a chunk and searching doesn't crash with empty text in memvid.""" - # Store a chunk with minimal text to memvid (empty text causes embedding issues) - backend.mem.put( - title="empty_function", - label=ChunkType.FUNCTION.value, - metadata={ - "file_path": "test.py", - "start_line": 1, - "end_line": 1, - "language": Language.PYTHON.value, - }, - text="# placeholder", # Memvid needs some text - uri="test://test.py#1", + """Test storing a minimal chunk and searching doesn't crash.""" + chunk = Chunk( + symbol="empty_function", + start_line=LineNumber(1), + end_line=LineNumber(1), + code="# placeholder", + chunk_type=ChunkType.FUNCTION, + language=Language.PYTHON, + file_path=FilePath("test.py"), ) + backend.store_chunks_batch([chunk]) # Test that search completes without error results = backend.search_lexical("empty", k=5) @@ -41,19 +38,16 @@ def test_store_chunk_with_empty_text(self, backend): def test_search_result_fallback_code(self, backend): """Test that search results have fallback code when text is missing.""" - # Store a chunk - backend.mem.put( - title="test_func", - label=ChunkType.FUNCTION.value, - metadata={ - "file_path": "test.py", - "start_line": 10, - "end_line": 20, - "language": Language.PYTHON.value, - }, - text="def test_func(): pass", - uri="test://test.py#10", + chunk = Chunk( + symbol="test_func", + start_line=LineNumber(10), + end_line=LineNumber(20), + code="def test_func(): pass", + chunk_type=ChunkType.FUNCTION, + language=Language.PYTHON, + file_path=FilePath("test.py"), ) + backend.store_chunks_batch([chunk]) results = backend.search_lexical("test", k=5) diff --git a/tests/test_sqlite_vec_backend.py b/tests/test_sqlite_vec_backend.py index 4d9b06d..585827a 100644 --- a/tests/test_sqlite_vec_backend.py +++ b/tests/test_sqlite_vec_backend.py @@ -1,5 +1,7 @@ """Tests for sqlite-vec backend (FTS5 + sqlite-vec).""" +from datetime import datetime + import numpy as np import pytest import sqlite3 @@ -148,3 +150,135 @@ def create_tables_without_fts(self): assert first_id == second_id backend.close() + + +def test_add_changelog_stores_commit_context(backend): + commit_time = datetime(2024, 1, 1, 12, 0, 0) + backend.add_changelog( + tag="v1.0.0", + version="1.0.0", + summary="Release 1.0", + breaking_changes=[], + features=[], + fixes=[], + commit_hash="abc123", + commit_time=commit_time, + ) + + changelogs = backend.get_changelogs(limit=10) + assert changelogs + assert changelogs[0].commit_hash == "abc123" + assert changelogs[0].commit_time == commit_time + + +def test_add_timeline_event_stores_commit_context(backend): + commit_time = datetime(2024, 1, 2, 12, 0, 0) + backend.add_timeline_event( + event_type="merge", + from_ref="feature", + to_ref="main", + summary="Merge feature", + files_changed=[], + diff_stats={}, + importance="medium", + commit_hash="def456", + commit_time=commit_time, + ) + + events = backend.get_timeline_events(limit=10) + assert events + assert events[0].commit_hash == "def456" + assert events[0].commit_time == commit_time + + +def test_add_decision_stores_commit_context(backend): + commit_time = datetime(2024, 1, 3, 12, 0, 0) + decision_id = backend.add_decision( + session_id="sess-1", + title="Decision", + description="Do the thing", + reasoning="Because", + alternatives=[{"title": "Alt"}], + commit_hash="aaa111", + commit_time=commit_time, + ) + + decision = backend.get_decision(decision_id) + assert decision is not None + assert decision.commit_hash == "aaa111" + assert decision.commit_time == commit_time + + +def test_export_import_memory_preserves_commit_context(tmp_path): + backend1 = SqliteVecBackend(tmp_path / "backend1.sia-code", embedding_enabled=False, ndim=3) + backend1.create_index() + + commit_time = datetime(2024, 1, 1, 12, 0, 0) + + decision_id = backend1.add_decision( + session_id="export-test", + title="Decision export", + description="Export/import should preserve commit context", + commit_hash="d111", + commit_time=commit_time, + ) + backend1.approve_decision(decision_id, category="test") + + backend1.add_timeline_event( + event_type="merge", + from_ref="feature", + to_ref="main", + summary="Merged feature", + commit_hash="t111", + commit_time=commit_time, + ) + + backend1.add_changelog( + tag="v1.0.0", + version="1.0.0", + summary="Release", + commit_hash="c111", + commit_time=commit_time, + ) + + export_path = backend1.export_memory( + output_path=tmp_path / "memory.json", include_pending=False + ) + backend1.close() + + backend2 = SqliteVecBackend(tmp_path / "backend2.sia-code", embedding_enabled=False, ndim=3) + backend2.create_index() + + result = backend2.import_memory(export_path) + assert result.added > 0 + + imported_event = next( + ( + e + for e in backend2.get_timeline_events(limit=50) + if e.from_ref == "feature" and e.to_ref == "main" + ), + None, + ) + assert imported_event is not None + assert imported_event.commit_hash == "t111" + assert imported_event.commit_time == commit_time + + imported_changelog = next( + (c for c in backend2.get_changelogs(limit=50) if c.tag == "v1.0.0"), + None, + ) + assert imported_changelog is not None + assert imported_changelog.commit_hash == "c111" + assert imported_changelog.commit_time == commit_time + + cursor = backend2.conn.cursor() + cursor.execute("SELECT id FROM decisions WHERE title = ?", ("Decision export",)) + row = cursor.fetchone() + assert row is not None + imported_decision = backend2.get_decision(row["id"]) + assert imported_decision is not None + assert imported_decision.commit_hash == "d111" + assert imported_decision.commit_time == commit_time + + backend2.close() diff --git a/tests/test_usearch_backend.py b/tests/test_usearch_backend.py index 744a0ef..023209d 100644 --- a/tests/test_usearch_backend.py +++ b/tests/test_usearch_backend.py @@ -1,6 +1,7 @@ """Tests for UsearchSqliteBackend.""" import tempfile +from datetime import datetime from pathlib import Path import sqlite3 @@ -24,9 +25,7 @@ def backend(temp_index_dir, monkeypatch): """Create a test backend instance.""" backend = UsearchSqliteBackend( path=temp_index_dir, - embedding_model="all-MiniLM-L6-v2", # Small model for fast tests - ndim=384, - dtype="f16", + embedding_enabled=False, ) class DummyEmbedder: @@ -105,6 +104,8 @@ def test_store_and_retrieve_chunks(backend): def test_semantic_search(backend): """Test semantic vector search.""" + if not backend.embedding_enabled: + pytest.skip("Semantic search requires embeddings (disabled in tests).") # Store some chunks chunks = [ Chunk( @@ -217,11 +218,15 @@ def test_export_import_memory(backend, temp_index_dir): ) backend.approve_decision(decision_id, category="test") + commit_time = datetime(2024, 1, 1, 12, 0, 0) + backend.add_timeline_event( event_type="tag", from_ref="v1.0.0", to_ref="v2.0.0", summary="Major release", + commit_hash="abc123", + commit_time=commit_time, ) backend.add_changelog( @@ -229,6 +234,8 @@ def test_export_import_memory(backend, temp_index_dir): version="2.0.0", summary="Major version with breaking changes", breaking_changes=["Changed API signature"], + commit_hash="def456", + commit_time=commit_time, ) # Export memory @@ -248,12 +255,91 @@ def test_export_import_memory(backend, temp_index_dir): events = backend2.get_timeline_events() assert len(events) > 0 + imported_event = next( + (e for e in events if e.from_ref == "v1.0.0" and e.to_ref == "v2.0.0"), None + ) + assert imported_event is not None + assert imported_event.commit_hash == "abc123" + assert imported_event.commit_time == commit_time + changelogs = backend2.get_changelogs() assert len(changelogs) > 0 + imported_changelog = next((c for c in changelogs if c.tag == "v2.0.0"), None) + assert imported_changelog is not None + assert imported_changelog.commit_hash == "def456" + assert imported_changelog.commit_time == commit_time + backend2.close() +def test_open_index_applies_migrations_for_writes(temp_index_dir): + """Opening a legacy index in writable mode should apply schema migrations.""" + import sqlite3 + + legacy_dir = temp_index_dir / "legacy" + legacy_dir.mkdir(parents=True) + + # Minimal legacy schema without commit_hash/commit_time columns + db_path = legacy_dir / "index.db" + conn = sqlite3.connect(db_path) + conn.execute( + """ + CREATE TABLE IF NOT EXISTS timeline ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + event_type TEXT, + from_ref TEXT, + to_ref TEXT, + summary TEXT, + files_changed JSON, + diff_stats JSON, + importance TEXT DEFAULT 'medium', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """ + ) + conn.execute( + """ + CREATE TABLE IF NOT EXISTS changelogs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + tag TEXT UNIQUE, + version TEXT, + date TIMESTAMP, + summary TEXT, + breaking_changes JSON, + features JSON, + fixes JSON, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """ + ) + conn.commit() + conn.close() + + # usearch backend requires vector file to exist + (legacy_dir / "vectors.usearch").write_bytes(b"") + + backend = UsearchSqliteBackend(path=legacy_dir, embedding_enabled=False) + backend.open_index(writable=True) + + commit_time = datetime(2024, 1, 1, 12, 0, 0) + backend.add_changelog( + tag="v0.0.1", + version="0.0.1", + summary="legacy import", + commit_hash="abc123", + commit_time=commit_time, + ) + + changelogs = backend.get_changelogs(limit=10) + imported = next((c for c in changelogs if c.tag == "v0.0.1"), None) + assert imported is not None + assert imported.commit_hash == "abc123" + assert imported.commit_time == commit_time + + backend.close() + + def test_generate_context(backend): """Test LLM context generation.""" # Add test data diff --git a/tests/unit/test_git_sync.py b/tests/unit/test_git_sync.py index 82ce35e..35b73a8 100644 --- a/tests/unit/test_git_sync.py +++ b/tests/unit/test_git_sync.py @@ -1,6 +1,8 @@ """Unit tests for GitSyncService.""" +from datetime import datetime from unittest.mock import MagicMock, patch + import subprocess import pytest @@ -153,6 +155,59 @@ def test_importance_filtering(self, sync_service, mock_backend): # Should skip low importance assert stats["timeline_skipped"] >= 1 + def test_commit_context_passed_to_backend(self, sync_service, mock_backend): + """Ensure commit metadata is forwarded to backend writes.""" + commit_time = datetime(2024, 1, 1, 12, 0, 0) + tag_event = { + "tag": "v1.0.0", + "version": "1.0.0", + "summary": "Release 1.0", + "breaking_changes": [], + "features": [], + "fixes": [], + "commit_hash": "abc123", + "commit_time": commit_time, + } + merge_event = { + "event_type": "merge", + "from_ref": "feature", + "to_ref": "main", + "summary": "Merge feature", + "files_changed": [], + "diff_stats": {}, + "importance": "medium", + "commit_hash": "def456", + "commit_time": commit_time, + } + + with patch.object(sync_service.extractor, "scan_git_tags", return_value=[tag_event]): + with patch.object( + sync_service.extractor, "scan_merge_events", return_value=[merge_event] + ): + sync_service.sync() + + mock_backend.add_changelog.assert_called_with( + tag="v1.0.0", + version="1.0.0", + summary="Release 1.0", + breaking_changes=[], + features=[], + fixes=[], + commit_hash="abc123", + commit_time=commit_time, + ) + mock_backend.add_timeline_event.assert_called_with( + event_type="merge", + from_ref="feature", + to_ref="main", + summary="Merge feature", + files_changed=[], + diff_stats={}, + importance="medium", + commit_hash="def456", + commit_time=commit_time, + ) + def test_meets_importance_threshold(self, sync_service): """Test importance threshold logic.""" assert sync_service._meets_importance_threshold("high", "low") is True diff --git a/tests/unit/test_index_dir_resolution.py b/tests/unit/test_index_dir_resolution.py new file mode 100644 index 0000000..993ef37 --- /dev/null +++ b/tests/unit/test_index_dir_resolution.py @@ -0,0 +1,93 @@ +from pathlib import Path + +import pytest + +from sia_code.cli import resolve_index_dir + + +class _RunResult: + def __init__(self, stdout: str): + self.stdout = stdout + + +def _fake_run_factory(mapping: dict[tuple[str, ...], str]): + def fake_run(args, **kwargs): + key = tuple(args) + if key not in mapping: + raise AssertionError(f"Unexpected subprocess args: {args}") + return _RunResult(mapping[key]) + + return fake_run + + +def test_resolve_index_dir_prefers_env_override(tmp_path, monkeypatch): + override = tmp_path / "custom-index" + monkeypatch.setenv("SIA_CODE_INDEX_DIR", str(override)) + assert resolve_index_dir(tmp_path) == override + + +def test_default_scope_uses_worktree_local_index_when_not_worktree(tmp_path, monkeypatch): + monkeypatch.delenv("SIA_CODE_INDEX_DIR", raising=False) + monkeypatch.delenv("SIA_CODE_INDEX_SCOPE", raising=False) + + monkeypatch.setattr( + "sia_code.cli.subprocess.run", + _fake_run_factory( + { + ("git", "rev-parse", "--git-dir"): ".git\n", + ("git", "rev-parse", "--git-common-dir"): ".git\n", + } + ), + ) + + assert resolve_index_dir(tmp_path) == tmp_path / ".sia-code" + + +def test_default_scope_uses_shared_index_in_worktree(tmp_path, monkeypatch): + monkeypatch.delenv("SIA_CODE_INDEX_DIR", raising=False) + monkeypatch.delenv("SIA_CODE_INDEX_SCOPE", raising=False) + + common_dir = tmp_path / ".." / "repo" / ".git" + common_dir = common_dir.resolve() + + monkeypatch.setattr( + "sia_code.cli.subprocess.run", + _fake_run_factory( + { + ("git", "rev-parse", "--git-dir"): ".git/worktrees/branch\n", + ("git", "rev-parse", "--git-common-dir"): str(common_dir) + "\n", + } + ), + ) + + assert resolve_index_dir(tmp_path) == common_dir / "sia-code" + + +@pytest.mark.parametrize("scope", ["shared", "auto"]) +def test_explicit_scope_controls_resolution(tmp_path, monkeypatch, scope): + monkeypatch.delenv("SIA_CODE_INDEX_DIR", raising=False) + monkeypatch.setenv("SIA_CODE_INDEX_SCOPE", scope) + + common_dir = tmp_path / "common" / ".git" + common_dir.mkdir(parents=True) + + if scope == "shared": + mapping = { + ("git", "rev-parse", "--git-dir"): ".git\n", + ("git", "rev-parse", "--git-common-dir"): str(common_dir) + "\n", + } + expected = common_dir / "sia-code" + else: + # auto decides based on whether we're in a linked worktree + mapping = { + ("git", "rev-parse", "--git-dir"): ".git\n", + ("git", "rev-parse", "--git-common-dir"): ".git\n", + } + expected = tmp_path / ".sia-code" + + monkeypatch.setattr( + "sia_code.cli.subprocess.run", + _fake_run_factory(mapping), + ) + + assert resolve_index_dir(tmp_path) == expected