Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ Pick ──Enter──→ View ──Tab──→ Diff

Semantic-search threading: indexing and engine-load each run on their own thread, talk via `mpsc::channel` (`IndexMessage`, `EngineMessage`). `EngineMessage::Ready(Box<SearchEngine>)` hands the heap-allocated engine to the main thread. `with_silenced_stdio()` redirects stderr during model load to keep hf-hub progress bars out of the alternate screen — **Unix-only** (`libc::dup2`).

Index dir `.glc-index/` has `meta.toml` with `INDEX_VERSION` (currently 3), `head_oid`, per-component metadata. Mismatched version or oid forces rebuild.
Index dir `.glc-index/` has `meta.toml` with `INDEX_VERSION` (currently 5), `head_oid`, per-component metadata. Mismatched version forces full rebuild. Mismatched `head_oid` triggers incremental update (BM25 `delete_term` + turbovec `remove` for stale docs, embed only the delta) when the old `head_oid` is still reachable; otherwise falls back to full rebuild.

## Architecture gotchas

Expand Down
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "gluck"
version = "0.8.2"
version = "0.9.0"
edition = "2021"

[[bin]]
Expand Down
28 changes: 26 additions & 2 deletions src/search/bm25.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::path::{Path, PathBuf};
use tantivy::collector::TopDocs;
use tantivy::query::QueryParser;
use tantivy::schema::{
Field, IndexRecordOption, Schema, TextFieldIndexing, TextOptions, FAST, STORED, STRING,
Field, IndexRecordOption, Schema, TextFieldIndexing, TextOptions, FAST, INDEXED, STORED, STRING,
};
use tantivy::tokenizer::{LowerCaser, NgramTokenizer, TextAnalyzer};
use tantivy::{Index, IndexReader, IndexWriter, ReloadPolicy, TantivyError};
Expand Down Expand Up @@ -47,7 +47,7 @@ fn make_schema() -> (Schema, Bm25Fields) {
.set_index_option(IndexRecordOption::WithFreqs),
);

let id = builder.add_u64_field("id", FAST | STORED);
let id = builder.add_u64_field("id", FAST | STORED | INDEXED);
let kind = builder.add_text_field("kind", STRING | STORED);
let title = builder.add_text_field("title", text_opts);
let body = builder.add_text_field("body", body_opts);
Expand Down Expand Up @@ -139,6 +139,11 @@ impl Bm25Index {
Ok(())
}

pub fn delete_doc(&self, writer: &mut IndexWriter, doc_id: u64) {
let term = tantivy::Term::from_field_u64(self.fields.id, doc_id);
writer.delete_term(term);
}

pub fn commit(&self, mut writer: IndexWriter) -> Result<(), TantivyError> {
writer.commit()?;
self.reader.reload()?;
Expand Down Expand Up @@ -387,6 +392,25 @@ mod tests {
assert_eq!(got.line_end, Some(20));
}

#[test]
fn test_delete_doc_removes_from_search() {
let (_dir, idx) = tmp_index();
let mut w = idx.writer().unwrap();
idx.add_doc(&mut w, &commit_meta(1, "hello world"), "greeting")
.unwrap();
idx.add_doc(&mut w, &commit_meta(2, "hello again"), "second")
.unwrap();
idx.commit(w).unwrap();
assert_eq!(idx.search("he", 10).unwrap().len(), 2);

let mut w = idx.writer().unwrap();
idx.delete_doc(&mut w, 1);
idx.commit(w).unwrap();
let r = idx.search("he", 10).unwrap();
assert_eq!(r.len(), 1);
assert_eq!(r[0].0, 2);
}

#[test]
fn test_path_field_exact_match_query() {
let (_dir, idx) = tmp_index();
Expand Down
147 changes: 147 additions & 0 deletions src/search/diff.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
use std::path::Path;

use git2::{DiffOptions, Oid};

use crate::git::commit::CommitInfo;
use crate::git::repo::GitRepo;
use crate::search::SearchError;

#[derive(Debug, Default, PartialEq, Eq)]
pub struct FileChanges {
pub added: Vec<String>,
pub modified: Vec<String>,
pub deleted: Vec<String>,
}

pub fn compute_file_changes(
repo: &GitRepo,
old_oid: &str,
new_oid: &str,
) -> Result<FileChanges, SearchError> {
let r = repo.repository();
let old = Oid::from_str(old_oid).map_err(|e| SearchError::Git(e.to_string()))?;
let new = Oid::from_str(new_oid).map_err(|e| SearchError::Git(e.to_string()))?;
let old_tree = r
.find_commit(old)
.and_then(|c| c.tree())
.map_err(|e| SearchError::Git(e.to_string()))?;
let new_tree = r
.find_commit(new)
.and_then(|c| c.tree())
.map_err(|e| SearchError::Git(e.to_string()))?;

let mut opts = DiffOptions::new();
let diff = r
.diff_tree_to_tree(Some(&old_tree), Some(&new_tree), Some(&mut opts))
.map_err(|e| SearchError::Git(e.to_string()))?;

let mut out = FileChanges::default();
for delta in diff.deltas() {
match delta.status() {
git2::Delta::Added | git2::Delta::Copied => {
if let Some(p) = delta.new_file().path().and_then(Path::to_str) {
out.added.push(p.to_string());
}
}
git2::Delta::Modified => {
if let Some(p) = delta.new_file().path().and_then(Path::to_str) {
out.modified.push(p.to_string());
}
}
git2::Delta::Deleted => {
if let Some(p) = delta.old_file().path().and_then(Path::to_str) {
out.deleted.push(p.to_string());
}
}
git2::Delta::Renamed => {
if let Some(p) = delta.old_file().path().and_then(Path::to_str) {
out.deleted.push(p.to_string());
}
if let Some(p) = delta.new_file().path().and_then(Path::to_str) {
out.added.push(p.to_string());
}
}
_ => {}
}
}
Ok(out)
}

pub fn commits_since(
repo: &GitRepo,
old_oid: &str,
new_oid: &str,
) -> Result<Vec<CommitInfo>, SearchError> {
let r = repo.repository();
let new = Oid::from_str(new_oid).map_err(|e| SearchError::Git(e.to_string()))?;
let old = Oid::from_str(old_oid).map_err(|e| SearchError::Git(e.to_string()))?;
let mut revwalk = r.revwalk().map_err(|e| SearchError::Git(e.to_string()))?;
revwalk
.push(new)
.map_err(|e| SearchError::Git(e.to_string()))?;
// hide old_oid — old_oid에 도달 가능한 커밋은 결과에서 제외
revwalk
.hide(old)
.map_err(|e| SearchError::Git(e.to_string()))?;
let mut out = Vec::new();
for oid in revwalk.flatten() {
if let Ok(c) = r.find_commit(oid) {
out.push(CommitInfo::from_git_commit(&c));
}
}
Ok(out)
}

#[cfg(test)]
mod tests {
use super::*;
use crate::git::repo::tests::{add_file_commit, init_test_repo};

#[test]
fn test_commits_since_excludes_old_and_includes_new() {
let (_dir, repo) = init_test_repo();
let c1 = add_file_commit(&repo, "a.txt", b"1", "first");
let c2 = add_file_commit(&repo, "b.txt", b"2", "second");
let c3 = add_file_commit(&repo, "c.txt", b"3", "third");

let gr = crate::git::repo::GitRepo::open(_dir.path()).unwrap();
let commits = commits_since(&gr, &c1.to_string(), &c3.to_string()).unwrap();
// old_oid(c1) 자체는 제외, c2/c3만 포함
let oids: Vec<String> = commits.iter().map(|c| c.id.to_string()).collect();
assert!(oids.contains(&c2.to_string()));
assert!(oids.contains(&c3.to_string()));
assert!(!oids.contains(&c1.to_string()));
}

#[test]
fn test_added_modified_deleted_classified() {
let (_dir, repo) = init_test_repo();
let _c1 = add_file_commit(&repo, "keep.txt", b"v1", "Add keep");
let c2 = add_file_commit(&repo, "drop.txt", b"x", "Add drop");
let c3_oid = {
// c3: modify keep.txt, delete drop.txt, add new.txt
std::fs::write(_dir.path().join("keep.txt"), b"v2").unwrap();
std::fs::remove_file(_dir.path().join("drop.txt")).unwrap();
std::fs::write(_dir.path().join("new.txt"), b"hi").unwrap();
let mut idx = repo.index().unwrap();
idx.add_path(std::path::Path::new("keep.txt")).unwrap();
idx.add_path(std::path::Path::new("new.txt")).unwrap();
idx.remove_path(std::path::Path::new("drop.txt")).unwrap();
idx.write().unwrap();
let tree_oid = idx.write_tree().unwrap();
let tree = repo.find_tree(tree_oid).unwrap();
let sig = git2::Signature::now("t", "t@e").unwrap();
let head = repo.head().unwrap().peel_to_commit().unwrap();
repo.commit(Some("HEAD"), &sig, &sig, "c3", &tree, &[&head])
.unwrap()
.to_string()
};

let gr = crate::git::repo::GitRepo::open(_dir.path()).unwrap();
// c2를 baseline으로 사용 — c1 tree에는 drop.txt가 없어 c1→c3 diff로는 Deleted 감지 불가
let changes = compute_file_changes(&gr, &c2.to_string(), &c3_oid).unwrap();
assert!(changes.added.iter().any(|p| p == "new.txt"));
assert!(changes.modified.iter().any(|p| p == "keep.txt"));
assert!(changes.deleted.iter().any(|p| p == "drop.txt"));
}
}
Loading