diff --git a/Cargo.lock b/Cargo.lock
index 1a1d77e..ea62841 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1128,7 +1128,7 @@ dependencies = [
 
 [[package]]
 name = "gluck"
-version = "0.9.1"
+version = "0.9.3"
 dependencies = [
  "anyhow",
  "blas-src",
@@ -3342,12 +3342,14 @@ dependencies = [
 
 [[package]]
 name = "tree-sitter"
-version = "0.22.6"
+version = "0.23.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df7cc499ceadd4dcdf7ec6d4cbc34ece92c3fa07821e287aedecd4416c516dca"
+checksum = "0203df02a3b6dd63575cc1d6e609edc2181c9a11867a271b25cfd2abff3ec5ca"
 dependencies = [
  "cc",
  "regex",
+ "regex-syntax",
+ "tree-sitter-language",
 ]
 
 [[package]]
@@ -3362,9 +3364,9 @@ dependencies = [
 
 [[package]]
 name = "tree-sitter-highlight"
-version = "0.22.6"
+version = "0.23.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eaca0fe34fa96eec6aaa8e63308dbe1bafe65a6317487c287f93938959b21907"
+checksum = "380a7706376fa6c52ba7bf71d1e7a93856ee8ab08a7680631dfa664fdd237d66"
 dependencies = [
  "lazy_static",
  "regex",
diff --git a/Cargo.toml b/Cargo.toml
index ceca30f..2136aba 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "gluck"
-version = "0.9.1"
+version = "0.9.3"
 edition = "2021"
 
 [[bin]]
@@ -16,8 +16,8 @@ clap = { version = "4", features = ["derive"] }
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 anyhow = "1"
-tree-sitter = "0.22"
-tree-sitter-highlight = "0.22"
+tree-sitter = "0.23"
+tree-sitter-highlight = "0.23"
 tree-sitter-rust = "0.23"
 tree-sitter-python = "0.23"
 tree-sitter-javascript = "0.23"
diff --git a/docs/superpowers/plans/2026-05-26-search-quality-improvements.md b/docs/superpowers/plans/2026-05-26-search-quality-improvements.md
new file mode 100644
index 0000000..8de01a1
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-26-search-quality-improvements.md
@@ -0,0 +1,1013 @@
+# 검색 품질 개선 구현 Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** `glc report` MRR을 0.330 → ≥0.65로, Recall@5를 0.286 → ≥0.65로 끌어올린다. BM25 인덱스의 식별자/path 매칭 능력을 강화하고, 일부 파일에서 누락되는 심볼을 잡는다.
+
+**Architecture:** (1) write-time 전처리로 identifier(snake_case + camelCase)를 단어로 분해해 SimpleTokenizer가 처리할 수 있게 한다. (2) `path_terms` 신규 BM25 필드를 추가해 path를 단어 단위로 검색한다. (3) WholeFile 임계값을 16KB로 올리고 Rust 심볼 추출에서 누락된 top-level `trait_item` / `type_item`을 추가한다. (4) `INDEX_VERSION`을 5→6으로 범프해 자동 풀 리빌드한다.
+
+**Tech Stack:** Rust, tantivy 0.22 (SimpleTokenizer + LowerCaser), tree-sitter 0.22, tree-sitter-rust.
+
+**Spec correction (구현 시 반영):**
+- Spec 3.2.1의 "IdentifierSplit TokenFilter"는 tantivy 0.22 GAT 복잡도를 피해 **write-time 전처리 헬퍼 + SimpleTokenizer + LowerCaser** 조합으로 동등 효과 구현.
+- Spec 3.3.2의 "enum/struct/trait/type 추가"는 부분 정정: enum/struct/impl-method/trait-method는 **이미 추출 중**. 진짜 누락은 **top-level `trait_item`(trait 선언 자체)** 과 **`type_item`(type alias)**.
+
+---
+
+## File Structure
+
+| 파일 | 책임 | 변경 |
+|---|---|---|
+| `src/search/text_prep.rs` | 식별자/path 전처리 (snake_case는 SimpleTokenizer가 처리, camelCase·path-separator만 공백 변환) | **신규** |
+| `src/search/bm25.rs` | BM25 스키마 + 토크나이저 + 쿼리. `path_terms` 필드 추가, title은 SimpleTokenizer+LowerCaser, body는 ngram_2_2 유지 | 수정 |
+| `src/search/chunk/file.rs` | `WHOLE_FILE_THRESHOLD: 8KB → 16KB` | 1줄 수정 |
+| `src/search/chunk/symbol.rs` | Rust 쿼리에 top-level `trait_item`, `type_item` 추가. `SymbolKind::TypeAlias` 신규 | 수정 |
+| `src/search/chunk/mod.rs` | `SymbolKind::TypeAlias` re-export 확인 | 변경 없음 (이미 pub use) |
+| `src/search/indexer.rs` | `chunk_to_meta`의 Symbol 분기에서 `TypeAlias` 매칭 | 1~2줄 수정 |
+| `src/search/mod.rs` | `INDEX_VERSION: 5 → 6`, `text_prep` mod 추가 | 2줄 수정 |
+| `tests/fixtures/search_queries.toml` | `incremental indexing fallback` 정답에서 `diff.rs` 제거 | 1 entry 수정 |
+
+---
+
+## Task 1: Fixture 정제
+
+**Files:**
+- Modify: `tests/fixtures/search_queries.toml`
+
+- [ ] **Step 1: 정답 1건 수정**
+
+`tests/fixtures/search_queries.toml`의 4~9번째 줄을 다음으로 교체:
+
+```toml
+[[query]]
+text = "incremental indexing fallback"
+expected = [
+    { path = "src/search/indexer.rs", kind = "Symbol", title = "build_index_incremental" },
+]
+```
+
+(기존의 `{ path = "src/search/diff.rs" }` 항목 제거. 다른 쿼리는 모두 그대로.)
+
+- [ ] **Step 2: 변경 검증**
+
+Run: `git diff tests/fixtures/search_queries.toml`
+Expected: 첫 query만 변경, 다른 6개 query block 영향 없음.
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add tests/fixtures/search_queries.toml
+git commit -m "Drop unreachable diff.rs from incremental indexing fixture answer"
+```
+
+---
+
+## Task 2: 텍스트 전처리 헬퍼 (`text_prep.rs`)
+
+camelCase 경계에 공백을 삽입하는 단순 함수. snake_case와 path separator(`/`, `.`, `-`)는 SimpleTokenizer가 알아서 분해하므로 별도 처리 불필요.
+
+**Files:**
+- Create: `src/search/text_prep.rs`
+- Modify: `src/search/mod.rs` (mod 등록)
+
+- [ ] **Step 1: 실패 테스트 작성**
+
+`src/search/text_prep.rs` 파일을 다음 내용으로 생성:
+
+```rust
+/// 식별자/Path 텍스트를 SimpleTokenizer가 단어 단위로 분해할 수 있게 전처리한다.
+///
+/// SimpleTokenizer는 `_`, `/`, `.`, `-` 등 비-alphanumeric 문자에서 자동 분해하지만
+/// camelCase는 인식하지 못한다. 이 함수는 camelCase 경계(소문자 → 대문자, 글자 → 숫자)에
+/// 공백을 삽입해서 `BuildIndex` → `Build Index`, `Rev2` → `Rev 2`로 만든다.
+///
+/// 한글 등 비-ASCII alphabet은 case 개념이 없어 변환되지 않음.
+pub fn split_camel_case(s: &str) -> String {
+    let mut out = String::with_capacity(s.len() + 4);
+    let mut prev_lower = false;
+    let mut prev_digit = false;
+    for c in s.chars() {
+        let is_upper = c.is_ascii_uppercase();
+        let is_digit = c.is_ascii_digit();
+        let is_lower = c.is_ascii_lowercase();
+        if (is_upper && prev_lower) || (is_digit && !prev_digit && (prev_lower || /* prev_upper */ false)) {
+            out.push(' ');
+        }
+        out.push(c);
+        prev_lower = is_lower || is_upper; // 둘 다 alpha
+        prev_digit = is_digit;
+    }
+    out
+}
+
+/// Path를 단어 후보로 만들기 위해 path separator를 공백으로 치환한 뒤
+/// `split_camel_case`를 적용한다.
+pub fn path_to_terms(path: &str) -> String {
+    let replaced: String = path
+        .chars()
+        .map(|c| if matches!(c, '/' | '.' | '-' | '_' | '\\') { ' ' } else { c })
+        .collect();
+    split_camel_case(&replaced)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn snake_case_unchanged_by_split() {
+        // SimpleTokenizer가 _를 알아서 분해하므로 split_camel_case는 손대지 않음
+        assert_eq!(split_camel_case("rrf_fuse"), "rrf_fuse");
+        assert_eq!(split_camel_case("build_index_incremental"), "build_index_incremental");
+    }
+
+    #[test]
+    fn camel_case_split() {
+        assert_eq!(split_camel_case("BuildIndex"), "Build Index");
+        assert_eq!(split_camel_case("ModalState"), "Modal State");
+        assert_eq!(split_camel_case("HTTPServer"), "HTTPServer"); // 연속 대문자는 split 안 함
+    }
+
+    #[test]
+    fn mixed_identifier() {
+        assert_eq!(split_camel_case("buildIndexFor"), "build Index For");
+    }
+
+    #[test]
+    fn path_terms_replaces_separators() {
+        assert_eq!(path_to_terms("src/search/rrf.rs"), "src search rrf rs");
+        assert_eq!(path_to_terms("src/git/store.rs"), "src git store rs");
+    }
+
+    #[test]
+    fn path_terms_with_camel_case_file() {
+        assert_eq!(path_to_terms("src/search/ModalState.rs"), "src search Modal State rs");
+    }
+
+    #[test]
+    fn empty_string() {
+        assert_eq!(split_camel_case(""), "");
+        assert_eq!(path_to_terms(""), "");
+    }
+
+    #[test]
+    fn korean_passthrough() {
+        // 한글은 case 개념이 없어 변환되지 않음
+        assert_eq!(split_camel_case("한글이름"), "한글이름");
+    }
+}
+```
+
+- [ ] **Step 2: mod 등록**
+
+`src/search/mod.rs`의 module 선언 블록(`pub mod bm25;`로 시작하는 부분, 1~10번째 줄)에 추가:
+
+```rust
+pub mod text_prep;
+```
+
+알파벳 순서를 따른다면 `silence` 다음, `vector` 앞.
+
+- [ ] **Step 3: 테스트 실행으로 정의된 동작 확인**
+
+Run: `cargo test --lib search::text_prep`
+Expected: 7개 테스트 모두 PASS.
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add src/search/text_prep.rs src/search/mod.rs
+git commit -m "Add text_prep helpers for camelCase split and path tokenization"
+```
+
+---
+
+## Task 3: BM25 스키마에 `path_terms` 필드 + title 토크나이저 변경
+
+**Files:**
+- Modify: `src/search/bm25.rs`
+
+- [ ] **Step 1: 토크나이저 상수 추가**
+
+`src/search/bm25.rs`의 14번째 줄(`pub const TOKENIZER: &str = "ngram_2_2";` 부근)에 인접하게 추가:
+
+```rust
+pub const TOKENIZER: &str = "ngram_2_2";
+pub const WORD_TOKENIZER: &str = "word_lower";
+```
+
+- [ ] **Step 2: `Bm25Fields` 구조체에 `path_terms` 필드 추가**
+
+기존 `Bm25Fields` (17~26번째 줄 부근):
+
+```rust
+pub struct Bm25Fields {
+    pub id: Field,
+    pub kind: Field,
+    pub title: Field,
+    pub body: Field,
+    pub path: Field,
+    pub commit_oid: Field,
+    pub line_start: Field,
+    pub line_end: Field,
+}
+```
+
+다음으로 변경 (path 다음에 `path_terms` 추가):
+
+```rust
+pub struct Bm25Fields {
+    pub id: Field,
+    pub kind: Field,
+    pub title: Field,
+    pub body: Field,
+    pub path: Field,
+    pub path_terms: Field,
+    pub commit_oid: Field,
+    pub line_start: Field,
+    pub line_end: Field,
+}
+```
+
+- [ ] **Step 3: `make_schema()` 변경**
+
+기존 `make_schema()` 함수(34~71번째 줄)를 다음으로 교체:
+
+```rust
+fn make_schema() -> (Schema, Bm25Fields) {
+    let mut builder = Schema::builder();
+
+    // Title: 식별자/path 단어 단위 매칭. SimpleTokenizer가 _/./- 등에서 분해, LowerCaser가 case 정규화.
+    // 한글 부분 매칭은 body 필드의 ngram이 담당.
+    let title_opts = TextOptions::default()
+        .set_indexing_options(
+            TextFieldIndexing::default()
+                .set_tokenizer(WORD_TOKENIZER)
+                .set_index_option(IndexRecordOption::WithFreqsAndPositions),
+        )
+        .set_stored();
+
+    // Path terms: title과 같은 토크나이저, 검색 전용 (저장 안 함).
+    let path_terms_opts = TextOptions::default().set_indexing_options(
+        TextFieldIndexing::default()
+            .set_tokenizer(WORD_TOKENIZER)
+            .set_index_option(IndexRecordOption::WithFreqs),
+    );
+
+    // Body: 한글/임의 텍스트 부분 매칭. 기존 ngram_2_2 유지.
+    let body_opts = TextOptions::default().set_indexing_options(
+        TextFieldIndexing::default()
+            .set_tokenizer(TOKENIZER)
+            .set_index_option(IndexRecordOption::WithFreqs),
+    );
+
+    let id = builder.add_u64_field("id", FAST | STORED | INDEXED);
+    let kind = builder.add_text_field("kind", STRING | STORED);
+    let title = builder.add_text_field("title", title_opts);
+    let body = builder.add_text_field("body", body_opts);
+    let path = builder.add_text_field("path", STRING | STORED);
+    let path_terms = builder.add_text_field("path_terms", path_terms_opts);
+    let commit_oid = builder.add_text_field("commit_oid", STRING | STORED);
+    let line_start = builder.add_u64_field("line_start", STORED);
+    let line_end = builder.add_u64_field("line_end", STORED);
+
+    let schema = builder.build();
+    let fields = Bm25Fields {
+        id,
+        kind,
+        title,
+        body,
+        path,
+        path_terms,
+        commit_oid,
+        line_start,
+        line_end,
+    };
+    (schema, fields)
+}
+```
+
+- [ ] **Step 4: `register_tokenizer()` 변경**
+
+기존 (73~79번째 줄):
+
+```rust
+fn register_tokenizer(index: &Index) {
+    let tokenizer =
+        TextAnalyzer::builder(NgramTokenizer::new(2, 2, false).expect("valid ngram params"))
+            .filter(LowerCaser)
+            .build();
+    index.tokenizers().register(TOKENIZER, tokenizer);
+}
+```
+
+다음으로 교체:
+
+```rust
+fn register_tokenizer(index: &Index) {
+    let ngram =
+        TextAnalyzer::builder(NgramTokenizer::new(2, 2, false).expect("valid ngram params"))
+            .filter(LowerCaser)
+            .build();
+    index.tokenizers().register(TOKENIZER, ngram);
+
+    let word_lower = TextAnalyzer::builder(SimpleTokenizer::default())
+        .filter(LowerCaser)
+        .build();
+    index.tokenizers().register(WORD_TOKENIZER, word_lower);
+}
+```
+
+그리고 파일 상단(9번째 줄)의 use 문에 `SimpleTokenizer` 추가:
+
+```rust
+use tantivy::tokenizer::{LowerCaser, NgramTokenizer, SimpleTokenizer, TextAnalyzer};
+```
+
+- [ ] **Step 5: 컴파일 확인**
+
+Run: `cargo build --lib`
+Expected: 컴파일 성공. (이 시점에서 `add_doc`/`search`가 `path_terms`를 모르지만 필드는 schema에 있어도 add_text 안 하면 빈 채로 둠 — 컴파일 OK.)
+
+- [ ] **Step 6: 회귀 테스트 실행**
+
+Run: `cargo test --lib search::bm25`
+Expected: 기존 테스트 9개 중:
+- `test_korean_bigram_search`: body 필드 ngram_2_2 유지 → PASS
+- `test_uppercase_indexed_matches_lowercase_query`: title이 WORD_TOKENIZER + LowerCaser로 바뀌었지만 "he" 쿼리는 SimpleTokenizer로는 hello 전체 매칭이 안 됨 → **FAIL 예상**. 이 테스트는 ngram 동작에 의존했음.
+
+이 테스트는 다음 단계에서 의미를 재정의한다.
+
+- [ ] **Step 7: 기존 `test_uppercase_indexed_matches_lowercase_query` 의미 재정의**
+
+`src/search/bm25.rs`의 해당 테스트(`fn test_uppercase_indexed_matches_lowercase_query` 부근, ~337번째 줄)를 다음으로 교체:
+
+```rust
+#[test]
+fn test_word_tokenizer_lowercases_title() {
+    // Title 필드는 SimpleTokenizer + LowerCaser. "Hello" 인덱싱 후 "hello" 쿼리로 매칭.
+    let (_dir, idx) = tmp_index();
+    let mut w = idx.writer().unwrap();
+    idx.add_doc(&mut w, &commit_meta(1, "Hello"), "").unwrap();
+    idx.commit(w).unwrap();
+    let results = idx.search("hello", 10).unwrap();
+    assert!(
+        !results.is_empty(),
+        "lowercase query 'hello' must match title 'Hello' — requires LowerCaser on word tokenizer"
+    );
+}
+```
+
+`test_create_and_search_basic` (288~298번째 줄)에서 `idx.search("he", 10)`도 이제 단어 단위로 동작하므로 "he"는 "hello"와 매칭 안 됨. 다음으로 수정:
+
+```rust
+#[test]
+fn test_create_and_search_basic() {
+    let (_dir, idx) = tmp_index();
+    let mut w = idx.writer().unwrap();
+    idx.add_doc(&mut w, &commit_meta(1, "hello world"), "greeting text")
+        .unwrap();
+    idx.commit(w).unwrap();
+    let results = idx.search("hello", 10).unwrap();
+    assert!(!results.is_empty());
+    assert_eq!(results[0].0, 1);
+}
+```
+
+`test_cached_reader_sees_data_across_multiple_commits` (~351번째 줄)의 `idx.search("fi", 10)` / `idx.search("se", 10)`도 단어 매칭으로 바뀌어 fail. 다음으로 수정:
+
+```rust
+#[test]
+fn test_cached_reader_sees_data_across_multiple_commits() {
+    let (_dir, idx) = tmp_index();
+
+    let mut w = idx.writer().unwrap();
+    idx.add_doc(&mut w, &commit_meta(1, "first doc"), "")
+        .unwrap();
+    idx.commit(w).unwrap();
+    let r1 = idx.search("first", 10).unwrap();
+    assert_eq!(r1.len(), 1, "first commit visible");
+
+    let mut w = idx.writer().unwrap();
+    idx.add_doc(&mut w, &commit_meta(2, "second doc"), "")
+        .unwrap();
+    idx.commit(w).unwrap();
+    let r2 = idx.search("second", 10).unwrap();
+    assert!(
+        r2.iter().any(|(id, _)| *id == 2),
+        "second commit must be visible via cached reader"
+    );
+}
+```
+
+`test_delete_doc_removes_from_search` (~397번째 줄)의 `idx.search("he", 10)`도 같은 이유:
+
+```rust
+#[test]
+fn test_delete_doc_removes_from_search() {
+    let (_dir, idx) = tmp_index();
+    let mut w = idx.writer().unwrap();
+    idx.add_doc(&mut w, &commit_meta(1, "hello world"), "greeting")
+        .unwrap();
+    idx.add_doc(&mut w, &commit_meta(2, "hello again"), "second")
+        .unwrap();
+    idx.commit(w).unwrap();
+    assert_eq!(idx.search("hello", 10).unwrap().len(), 2);
+
+    let mut w = idx.writer().unwrap();
+    idx.delete_doc(&mut w, 1);
+    idx.commit(w).unwrap();
+    let r = idx.search("hello", 10).unwrap();
+    assert_eq!(r.len(), 1);
+    assert_eq!(r[0].0, 2);
+}
+```
+
+- [ ] **Step 8: 회귀 테스트 재실행**
+
+Run: `cargo test --lib search::bm25`
+Expected: 모든 테스트 PASS.
+
+- [ ] **Step 9: Commit**
+
+```bash
+git add src/search/bm25.rs
+git commit -m "Add path_terms field and word tokenizer to BM25 schema"
+```
+
+---
+
+## Task 4: `add_doc`에서 path/title 전처리 + path_terms 채우기
+
+**Files:**
+- Modify: `src/search/bm25.rs`
+
+- [ ] **Step 1: `add_doc`이 전처리하도록 수정**
+
+`src/search/bm25.rs::add_doc` (117~140번째 줄):
+
+```rust
+pub fn add_doc(
+    &self,
+    writer: &mut IndexWriter,
+    meta: &DocMeta,
+    body: &str,
+) -> Result<(), TantivyError> {
+    let mut doc = tantivy::TantivyDocument::default();
+    doc.add_u64(self.fields.id, meta.doc_id);
+    doc.add_text(self.fields.kind, meta.kind.as_str());
+    doc.add_text(self.fields.title, &meta.title);
+    doc.add_text(self.fields.body, body);
+    doc.add_text(self.fields.commit_oid, &meta.commit_oid);
+    if let Some(p) = &meta.path {
+        doc.add_text(self.fields.path, p);
+    }
+    if let Some(ls) = meta.line_start {
+        doc.add_u64(self.fields.line_start, u64::from(ls));
+    }
+    if let Some(le) = meta.line_end {
+        doc.add_u64(self.fields.line_end, u64::from(le));
+    }
+    writer.add_document(doc)?;
+    Ok(())
+}
+```
+
+다음으로 변경:
+
+```rust
+pub fn add_doc(
+    &self,
+    writer: &mut IndexWriter,
+    meta: &DocMeta,
+    body: &str,
+) -> Result<(), TantivyError> {
+    use crate::search::text_prep::{path_to_terms, split_camel_case};
+    let mut doc = tantivy::TantivyDocument::default();
+    doc.add_u64(self.fields.id, meta.doc_id);
+    doc.add_text(self.fields.kind, meta.kind.as_str());
+    // Title은 camelCase split만 전처리 — _ / . - 등은 SimpleTokenizer가 처리.
+    doc.add_text(self.fields.title, &split_camel_case(&meta.title));
+    doc.add_text(self.fields.body, body);
+    doc.add_text(self.fields.commit_oid, &meta.commit_oid);
+    if let Some(p) = &meta.path {
+        doc.add_text(self.fields.path, p);
+        doc.add_text(self.fields.path_terms, &path_to_terms(p));
+    }
+    if let Some(ls) = meta.line_start {
+        doc.add_u64(self.fields.line_start, u64::from(ls));
+    }
+    if let Some(le) = meta.line_end {
+        doc.add_u64(self.fields.line_end, u64::from(le));
+    }
+    writer.add_document(doc)?;
+    Ok(())
+}
+```
+
+- [ ] **Step 2: path_terms 동작 단위 테스트 추가**
+
+`src/search/bm25.rs::tests` 모듈의 끝(`}` 직전)에 추가:
+
+```rust
+#[test]
+fn test_path_terms_matches_path_segment_query() {
+    let (_dir, idx) = tmp_index();
+    let mut w = idx.writer().unwrap();
+    let meta = DocMeta {
+        doc_id: 1,
+        kind: DocKind::File,
+        title: "src/search/rrf.rs".into(),
+        commit_oid: "a".repeat(40),
+        path: Some("src/search/rrf.rs".into()),
+        line_start: None,
+        line_end: None,
+    };
+    idx.add_doc(&mut w, &meta, "fn rrf_fuse() {}").unwrap();
+    idx.commit(w).unwrap();
+    // path_terms 필드는 QueryParser default field에 포함되므로 "rrf" 쿼리로 매칭되어야 함.
+    let results = idx.search("rrf", 10).unwrap();
+    assert_eq!(results.len(), 1);
+    assert_eq!(results[0].0, 1);
+}
+
+#[test]
+fn test_camel_case_title_split_for_query() {
+    let (_dir, idx) = tmp_index();
+    let mut w = idx.writer().unwrap();
+    let meta = DocMeta {
+        doc_id: 7,
+        kind: DocKind::Symbol,
+        title: "ModalState (src/search/modal_state.rs)".into(),
+        commit_oid: "b".repeat(40),
+        path: Some("src/search/modal_state.rs".into()),
+        line_start: Some(1),
+        line_end: Some(10),
+    };
+    idx.add_doc(&mut w, &meta, "enum ModalState {}").unwrap();
+    idx.commit(w).unwrap();
+    // CamelCase split → "Modal State" → 소문자 매칭
+    let r = idx.search("modal", 10).unwrap();
+    assert!(r.iter().any(|(id, _)| *id == 7), "modal must match split ModalState");
+    let r = idx.search("state", 10).unwrap();
+    assert!(r.iter().any(|(id, _)| *id == 7), "state must match split ModalState");
+}
+```
+
+- [ ] **Step 3: 테스트 실행**
+
+Run: `cargo test --lib search::bm25`
+Expected: 새 테스트 2개 포함 전부 PASS.
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add src/search/bm25.rs
+git commit -m "Write path_terms and camelCase-split title in BM25 add_doc"
+```
+
+---
+
+## Task 5: QueryParser에 path_terms 포함 + boost
+
+**Files:**
+- Modify: `src/search/bm25.rs`
+
+- [ ] **Step 1: `search()` 메서드의 QueryParser 변경**
+
+`src/search/bm25.rs::search` (153~169번째 줄)에서 QueryParser 생성 부분:
+
+```rust
+let parser = QueryParser::for_index(&self.index, vec![self.fields.title, self.fields.body]);
+```
+
+다음으로 변경 (path_terms 포함 + boost):
+
+```rust
+let mut parser = QueryParser::for_index(
+    &self.index,
+    vec![self.fields.title, self.fields.path_terms, self.fields.body],
+);
+parser.set_field_boost(self.fields.title, 2.0);
+parser.set_field_boost(self.fields.path_terms, 2.0);
+parser.set_field_boost(self.fields.body, 1.0);
+```
+
+(`let parser`이 아니라 `let mut parser`로 변경하는 것에 주의.)
+
+- [ ] **Step 2: path_terms 부스트가 path 매칭 우선순위에 영향을 주는지 검증하는 테스트 추가**
+
+`src/search/bm25.rs::tests`에 추가:
+
+```rust
+#[test]
+fn test_path_match_outranks_unrelated_body_match() {
+    let (_dir, idx) = tmp_index();
+    let mut w = idx.writer().unwrap();
+
+    // Doc 1: path가 정확히 매칭하지만 body에 store 단어 없음
+    let target = DocMeta {
+        doc_id: 1,
+        kind: DocKind::File,
+        title: "src/git/store.rs".into(),
+        commit_oid: "a".repeat(40),
+        path: Some("src/git/store.rs".into()),
+        line_start: None,
+        line_end: None,
+    };
+    idx.add_doc(&mut w, &target, "fn open() {}").unwrap();
+
+    // Doc 2: body에 store가 흩어져 있지만 path 무관
+    let distractor = DocMeta {
+        doc_id: 2,
+        kind: DocKind::File,
+        title: "src/ui/view.rs".into(),
+        commit_oid: "b".repeat(40),
+        path: Some("src/ui/view.rs".into()),
+        line_start: None,
+        line_end: None,
+    };
+    idx.add_doc(&mut w, &distractor, "store store store").unwrap();
+
+    idx.commit(w).unwrap();
+
+    let results = idx.search("store", 10).unwrap();
+    // path_terms boost 2.0이 path 매칭 doc을 상위로 끌어올려야 함.
+    assert!(results.iter().any(|(id, _)| *id == 1));
+    let pos_1 = results.iter().position(|(id, _)| *id == 1).unwrap();
+    let pos_2 = results.iter().position(|(id, _)| *id == 2);
+    if let Some(p2) = pos_2 {
+        assert!(pos_1 <= p2, "path-matching doc 1 should rank ≤ body-only doc 2");
+    }
+}
+```
+
+- [ ] **Step 3: 테스트 실행**
+
+Run: `cargo test --lib search::bm25`
+Expected: 모든 테스트 PASS.
+
+- [ ] **Step 4: 클리피 + 포맷 확인**
+
+Run: `cargo clippy --all-targets -- -D warnings`
+Expected: 변경한 파일에 대해 새 warning 없음.
+
+Run: `rustfmt src/search/bm25.rs src/search/text_prep.rs`
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/search/bm25.rs
+git commit -m "Include path_terms in query parser with boost for path matching"
+```
+
+---
+
+## Task 6: WholeFile 임계값 상향
+
+**Files:**
+- Modify: `src/search/chunk/file.rs`
+
+- [ ] **Step 1: 테스트 먼저 — 12KB 파일이 WholeFile로 잡혀야 함**
+
+`src/search/chunk/file.rs::tests` 모듈 (59번째 줄 부근) 끝에 추가:
+
+```rust
+#[test]
+fn twelve_kb_rust_file_stays_whole_file() {
+    // 16KB 임계값 가정 — 12KB Rust 파일은 Symbol 분할이 아닌 WholeFile로 잡혀야 함.
+    let big = format!(
+        "fn foo() {{\n{}\n}}\n",
+        "    let x = 1;\n".repeat(800),
+    );
+    assert!(big.len() > 8 * 1024 && big.len() < 16 * 1024, "test fixture sizing: got {}", big.len());
+    let chunks = split_file("oid", "medium.rs", &big);
+    assert_eq!(chunks.len(), 1, "12KB file should be single WholeFile");
+    assert!(matches!(chunks[0], Chunk::WholeFile { .. }));
+}
+```
+
+- [ ] **Step 2: 테스트 실행 — 기존 8KB 임계로 FAIL 예상**
+
+Run: `cargo test --lib chunk::file::tests::twelve_kb_rust_file_stays_whole_file`
+Expected: FAIL — 8KB 임계로 Symbol 분할됨.
+
+- [ ] **Step 3: 임계값 변경**
+
+`src/search/chunk/file.rs:6`:
+
+```rust
+const WHOLE_FILE_THRESHOLD: usize = 8 * 1024; // 8 KB
+```
+
+→
+
+```rust
+const WHOLE_FILE_THRESHOLD: usize = 16 * 1024; // 16 KB — modal_state.rs, store.rs 등을 WholeFile로 보존
+```
+
+- [ ] **Step 4: 테스트 재실행**
+
+Run: `cargo test --lib chunk::file`
+Expected: 모든 테스트 PASS (새 테스트 + 기존 5개).
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add src/search/chunk/file.rs
+git commit -m "Raise WholeFile threshold to 16KB to keep mid-size files searchable by path"
+```
+
+---
+
+## Task 7: Rust top-level `trait_item` / `type_item` 추출 추가
+
+**Files:**
+- Modify: `src/search/chunk/symbol.rs`
+
+- [ ] **Step 1: 실패 테스트 작성**
+
+`src/search/chunk/symbol.rs::tests` 끝(`}` 직전, ~376번째 줄)에 추가:
+
+```rust
+#[test]
+fn rust_top_level_trait_extracted() {
+    let src = r#"
+trait Greet {
+    fn name(&self) -> &str;
+    fn hello(&self) -> String { String::new() }
+}
+"#;
+    let spans = extract_symbols(src, Language::Rust).unwrap();
+    let has_trait_container = spans
+        .iter()
+        .any(|s| s.kind == SymbolKind::Trait && s.name == "Greet");
+    assert!(
+        has_trait_container,
+        "top-level trait declaration must be extracted as Trait, not only its methods"
+    );
+}
+
+#[test]
+fn rust_top_level_type_alias_extracted() {
+    let src = r#"
+type CommitId = String;
+type Result<T> = std::result::Result<T, MyError>;
+"#;
+    let spans = extract_symbols(src, Language::Rust).unwrap();
+    let names: Vec<_> = spans
+        .iter()
+        .filter(|s| s.kind == SymbolKind::TypeAlias)
+        .map(|s| s.name.as_str())
+        .collect();
+    assert!(names.contains(&"CommitId"));
+    assert!(names.contains(&"Result"));
+}
+```
+
+- [ ] **Step 2: 테스트 실행 — `SymbolKind::TypeAlias` 부재로 컴파일 FAIL**
+
+Run: `cargo test --lib chunk::symbol -- --no-run`
+Expected: 컴파일 에러 — `SymbolKind::TypeAlias` not found.
+
+- [ ] **Step 3: `SymbolKind`에 `TypeAlias` 추가**
+
+`src/search/chunk/symbol.rs:7-16`의 `SymbolKind` enum을:
+
+```rust
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum SymbolKind {
+    Function,
+    Method,
+    Struct,
+    Enum,
+    Trait,
+    Class,
+    Other,
+}
+```
+
+다음으로 교체:
+
+```rust
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum SymbolKind {
+    Function,
+    Method,
+    Struct,
+    Enum,
+    Trait,
+    TypeAlias,
+    Class,
+    Other,
+}
+```
+
+- [ ] **Step 4: Rust 쿼리에 `trait_item` (top-level) + `type_item` 추가**
+
+`src/search/chunk/symbol.rs:191-210`의 `RUST_QUERY` 상수를 다음으로 교체:
+
+```rust
+const RUST_QUERY: &str = r#"
+((source_file
+   (function_item name: (identifier) @name) @symbol.function))
+
+((source_file
+   (struct_item name: (type_identifier) @name) @symbol.struct))
+
+((source_file
+   (enum_item name: (type_identifier) @name) @symbol.enum))
+
+((source_file
+   (trait_item name: (type_identifier) @name) @symbol.trait))
+
+((source_file
+   (type_item name: (type_identifier) @name) @symbol.type))
+
+((source_file
+   (impl_item
+     (declaration_list
+       (function_item name: (identifier) @name) @symbol.method))))
+
+((source_file
+   (trait_item
+     (declaration_list
+       (function_item name: (identifier) @name) @symbol.method))))
+"#;
+```
+
+- [ ] **Step 5: `build_symbol_span()` capture name 매칭에 `symbol.trait`, `symbol.type` 추가**
+
+`src/search/chunk/symbol.rs:68-97`의 capture name match 블록에 추가 (`"symbol.trait"` 케이스는 이미 있지만 다시 확인, `"symbol.type"`은 신규):
+
+```rust
+"symbol.trait" => {
+    symbol_node = Some(cap.node);
+    kind = SymbolKind::Trait;
+}
+"symbol.type" => {
+    symbol_node = Some(cap.node);
+    kind = SymbolKind::TypeAlias;
+}
+```
+
+(기존에 `"symbol.trait"` 케이스가 이미 있다면 중복 추가하지 말고 `"symbol.type"`만 신규로 추가.)
+
+- [ ] **Step 6: 회귀 테스트 실행**
+
+Run: `cargo test --lib chunk::symbol`
+Expected:
+- 새 테스트 2개 PASS
+- 기존 `rust_trait_default_methods_extracted`는 trait method 추출이 그대로라 PASS
+- 다른 모든 테스트 PASS
+
+- [ ] **Step 7: `chunk_to_meta`에서 새 `SymbolKind` 처리 확인**
+
+`src/search/indexer.rs::chunk_to_meta` (395~435번째 줄) — `Chunk::Symbol` 분기는 `kind`를 직접 사용하지 않고 `DocKind::Symbol`로 변환한다. `SymbolKind::TypeAlias` 추가는 영향 없음. 확인만:
+
+Run: `cargo build --lib`
+Expected: 컴파일 성공.
+
+- [ ] **Step 8: Commit**
+
+```bash
+git add src/search/chunk/symbol.rs
+git commit -m "Extract top-level trait_item and type_item as Rust symbols"
+```
+
+---
+
+## Task 8: INDEX_VERSION 범프
+
+**Files:**
+- Modify: `src/search/mod.rs`
+
+- [ ] **Step 1: 버전 상수 변경**
+
+`src/search/mod.rs:209`:
+
+```rust
+pub const INDEX_VERSION: u32 = 5;
+```
+
+→
+
+```rust
+pub const INDEX_VERSION: u32 = 6;
+```
+
+- [ ] **Step 2: 변경된 버전이 기존 인덱스를 거부하는지 확인하는 회귀 테스트는 이미 존재**
+
+`open_fails_on_tokenizer_mismatch`와 `IndexMeta`의 버전 검사 로직(`SearchEngine::open` 122~128번째 줄)이 이미 VersionMismatch를 던지므로 별도 테스트 추가 불필요.
+
+Run: `cargo test --lib search`
+Expected: 모든 테스트 PASS.
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add src/search/mod.rs
+git commit -m "Bump INDEX_VERSION to 6 for path_terms + word tokenizer schema"
+```
+
+---
+
+## Task 9: 전체 빌드 + 회귀
+
+**Files:** (검증 전용, 변경 없음)
+
+- [ ] **Step 1: 전체 테스트**
+
+Run: `cargo test`
+Expected: 모든 테스트 PASS (ignored 제외).
+
+- [ ] **Step 2: Clippy 전체**
+
+Run: `cargo clippy --all-targets -- -D warnings`
+Expected: 변경한 파일에 새 warning 없음.
+
+- [ ] **Step 3: 포맷 확인**
+
+Run: `rustfmt --check src/search/text_prep.rs src/search/bm25.rs src/search/chunk/file.rs src/search/chunk/symbol.rs src/search/mod.rs`
+Expected: 출력 없음 (이미 포맷됨).
+
+만약 차이가 있다면:
+
+Run: `rustfmt src/search/text_prep.rs src/search/bm25.rs src/search/chunk/file.rs src/search/chunk/symbol.rs src/search/mod.rs`
+
+---
+
+## Task 10: End-to-end 검증 (`glc report`)
+
+**Files:** (검증 전용)
+
+- [ ] **Step 1: 인덱스 재빌드**
+
+Run: `cargo run --release --bin glc -- index --force`
+Expected: 로그 마지막에 "Indexed N documents." 출력. INDEX_VERSION 6로 새 인덱스가 `./.glc-index/`에 생성됨.
+
+- [ ] **Step 2: 리포트 생성**
+
+Run: `cargo run --release --bin glc -- report --out result.md`
+Expected: stdout에 aggregate 메트릭 + per-query 표 출력. `result.md` 갱신.
+
+- [ ] **Step 3: 합격 기준 검증**
+
+`result.md` 읽고 아래 기준 충족 여부 확인:
+
+| 메트릭 | 베이스라인 | 목표 |
+|---|---|---|
+| MRR | 0.330 | ≥ 0.65 |
+| Recall@5 | 0.286 | ≥ 0.65 |
+| Recall@10 | 0.571 | ≥ 0.85 |
+| NDCG@10 | 0.384 | ≥ 0.65 |
+
+쿼리별:
+
+| 쿼리 | 베이스라인 Hit Rank | 목표 |
+|---|---|---|
+| incremental indexing fallback | — | ≤ 5 |
+| tantivy delete_term | 1 | 1 (회귀 없음) |
+| RRF reciprocal rank fusion | — | ≤ 5 |
+| embedding model load potion | 1 | 1 (회귀 없음) |
+| search modal state machine | — | ≤ 5 |
+| tree sitter highlight configuration | 6 | ≤ 5 |
+| git revwalk topological commit | 7 | ≤ 5 |
+
+- [ ] **Step 4: 결과 commit**
+
+목표 충족 시:
+
+```bash
+git add result.md
+git commit -m "Update search quality report after BM25 path_terms and word tokenizer rollout"
+```
+
+목표 미달 시: 어떤 쿼리가 미달인지 확인하고, 후속 라운드(RRF k 튜닝, embed_text 보강, 모듈 docstring 청크) 중 하나를 선택해 별도 plan 작성. 이번 plan은 부분 커밋한 채로 종료하고 사용자와 다음 단계 논의.
+
+---
+
+## Self-Review (작성자 체크)
+
+- **Spec coverage:**
+  - Spec 3.1 (fixture 정제) → Task 1 ✓
+  - Spec 3.2.1 (code_ident 토크나이저) → Task 2 (text_prep) + Task 3 (WORD_TOKENIZER) ✓
+  - Spec 3.2.2 (path_terms 필드) → Task 3 + Task 4 ✓
+  - Spec 3.2.3 (title 토크나이저 교체) → Task 3 ✓
+  - Spec 3.2.4 (body 그대로) → Task 3 ✓
+  - Spec 3.2.5 (QueryParser boost) → Task 5 ✓
+  - Spec 3.3.1 (WHOLE_FILE_THRESHOLD 16KB) → Task 6 ✓
+  - Spec 3.3.2 (Symbol 확장) → Task 7 (실제 누락분만 정정 반영) ✓
+  - Spec 3.4 (path → path_terms 분해 로직) → Task 2 (`path_to_terms`) + Task 4 (add_doc) ✓
+  - Spec 3.5 (INDEX_VERSION 5→6) → Task 8 ✓
+  - Spec 3.6 (RRF/임베딩 변경 없음) → 명시적 미작업 ✓
+  - Spec 4 (테스트) → Task 2/3/4/5/6/7 인라인 ✓
+  - Spec 7 (작업 순서) → Task 1~10 ✓
+
+- **Placeholder scan:** "TBD"/"TODO" 없음. 모든 코드 블록은 완성된 형태.
+
+- **Type consistency:**
+  - `WORD_TOKENIZER` 상수는 Task 3에서 정의, 같은 Task의 register/스키마에서 사용 ✓
+  - `path_terms` 필드 이름은 Task 3 정의, Task 4 add_doc, Task 5 QueryParser에서 동일하게 사용 ✓
+  - `path_to_terms`/`split_camel_case` 함수 시그니처는 Task 2 정의, Task 4에서 호출 일치 ✓
+  - `SymbolKind::TypeAlias`는 Task 7 Step 3에서 enum에 추가, Step 5에서 매칭 ✓
diff --git a/docs/superpowers/specs/2026-05-26-search-quality-improvements-design.md b/docs/superpowers/specs/2026-05-26-search-quality-improvements-design.md
new file mode 100644
index 0000000..8f06058
--- /dev/null
+++ b/docs/superpowers/specs/2026-05-26-search-quality-improvements-design.md
@@ -0,0 +1,294 @@
+# 검색 품질 개선 설계
+
+- 작성일: 2026-05-26
+- 대상 커밋: `ccb575d` (Cargo.lock 0.9.1)
+- 관련 베이스라인: `result.md` (MRR 0.330, Recall@5 0.286, Recall@10 0.571, NDCG@10 0.384)
+- 회귀 추적: `glc report` + `tests/fixtures/search_queries.toml`
+
+## 1. 동기
+
+`glc report` 베이스라인에서 검색 품질이 낮음. 7개 쿼리 중:
+
+- **0 hit 3개**: `incremental indexing fallback`, `RRF reciprocal rank fusion`, `search modal state machine`
+- **낮은 랭크 2개**: `tree sitter highlight configuration` (rank 6), `git revwalk topological commit` (rank 7)
+- **rank 1 2개**: `tantivy delete_term`, `embedding model load potion`
+
+원인 분석을 통해 식별된 핵심 결함:
+
+| ID | 원인 | 영향받는 쿼리 |
+|---|---|---|
+| A | snake_case 식별자 ↔ 자연어 쿼리 매칭 부재 (`rrf_fuse` vs "fusion") | RRF, modal state |
+| B | path가 STRING 필드라 토큰 매칭 불가, BM25 검색에 미포함 | tree sitter, git revwalk |
+| C | BM25 `ngram_2_2` 단독 → 영어 단어에서 노이즈 점수 큼 | 전반적 |
+| D | Symbol 추출이 `function_item`만 → `enum`/`struct`/`trait` 누락 | modal state machine |
+| E | fixture에 어휘 매칭 불가능한 정답 (`diff.rs` ↔ "incremental"/"fallback") | incremental |
+
+이 설계는 (E)는 fixture 정제로, (A)~(D)는 인덱싱/쿼리 파이프라인 변경으로 해결한다. 임베딩 모델 교체나 RRF 튜닝은 이번 작업 범위에서 제외 — 이번 변경 후 재측정 결과를 보고 다음 라운드에서 결정한다.
+
+## 2. 변경 범위
+
+| 컴포넌트 | 변경 | 비고 |
+|---|---|---|
+| `tests/fixtures/search_queries.toml` | 정답 1건 수정 | 어휘 매칭 불가능 케이스 제거 |
+| `src/search/bm25.rs` | 스키마 확장, 토크나이저 추가, 쿼리 필드 확대 | path_terms 신규, identifier 분해 토크나이저 신규 |
+| `src/search/chunk/file.rs` | `WHOLE_FILE_THRESHOLD` 8KB → 16KB | 1줄 변경 |
+| `src/search/chunk/symbol.rs` | Rust 심볼 추출 확장 (enum/struct/trait/type) | function 외 타입도 잡음 |
+| `src/search/chunk/mod.rs` | `SymbolKind` 변형 확장 | Enum/Struct/Trait/TypeAlias 추가 |
+| `src/search/indexer.rs` | `chunk_to_meta`에서 path를 path_terms로 분해 저장 | BM25 add_doc 경로 |
+| `src/search/mod.rs` | `INDEX_VERSION: 5 → 6` | 자동 풀 리빌드 트리거 |
+
+**범위 외**:
+
+- 임베딩 모델 교체 또는 임베딩 텍스트(`embed_text`) 변경
+- RRF k 또는 candidate_limit 튜닝
+- Python/JS/TS/Go 심볼 추출 확장 (회귀 위험, fixture에 영향 없음)
+- 모듈/파일 docstring을 별도 청크로 만드는 작업
+- Vector 인덱스 변경 (turbovec)
+
+## 3. 상세 설계
+
+### 3.1 Fixture 정제
+
+`tests/fixtures/search_queries.toml`의 첫 쿼리:
+
+```toml
+# Before
+[[query]]
+text = "incremental indexing fallback"
+expected = [
+    { path = "src/search/diff.rs" },
+    { path = "src/search/indexer.rs", kind = "Symbol", title = "build_index_incremental" },
+]
+```
+
+→
+
+```toml
+# After
+[[query]]
+text = "incremental indexing fallback"
+expected = [
+    { path = "src/search/indexer.rs", kind = "Symbol", title = "build_index_incremental" },
+]
+```
+
+이유: `diff.rs`에는 "incremental"/"fallback" 어휘가 존재하지 않으며, path 또한 의미적으로 직접 일치하지 않음. 회귀 추적 목적상 검색 시스템이 도달 가능한 정답만 fixture에 둔다.
+
+다른 6개 쿼리는 변경 없음.
+
+### 3.2 BM25 스키마 확장
+
+#### 3.2.1 신규 토크나이저: `code_ident`
+
+식별자(snake_case/camelCase)를 단어 단위로 분해하는 토크나이저. Tantivy의 `TokenFilter` 트레잇을 구현하는 `IdentifierSplit` 필터를 만들어 다음 체인으로 등록:
+
+```
+SimpleTokenizer → LowerCaser → IdentifierSplit
+```
+
+`IdentifierSplit` 동작:
+- 입력 토큰을 `_` 기준으로 split → 각 조각
+- 각 조각에 대해 lower→upper 또는 letter→digit 경계로 추가 split (camelCase)
+- 빈 토큰은 버림
+- 원본도 함께 emit (예: `rrf_fuse` → `rrf_fuse`, `rrf`, `fuse`) — recall과 precision 모두 보존
+- 한글 등 비-ASCII 식별자는 split 시도하지 않고 그대로 통과
+
+토크나이저 이름: `"code_ident"`.
+
+기존 `ngram_2_2`는 그대로 유지 — body 검색용으로 한글 부분 매칭에 필요.
+
+#### 3.2.2 신규 필드: `path_terms`
+
+스키마에 텍스트 필드 `path_terms`를 추가:
+
+```rust
+let path_terms_opts = TextOptions::default().set_indexing_options(
+    TextFieldIndexing::default()
+        .set_tokenizer("code_ident")
+        .set_index_option(IndexRecordOption::WithFreqs),
+);
+let path_terms = builder.add_text_field("path_terms", path_terms_opts);
+```
+
+- `STORED` 없음 — 검색용으로만 사용, doc_store 복원에는 기존 `path` STRING 필드 활용.
+- 저장 값: 청크의 path를 `/`, `.`, `_`, `-` 등으로 분해한 공백 문자열. 예: `"src/search/rrf.rs"` → `"src search rrf rs"`. 정확한 분해 규칙은 `IdentifierSplit` 토크나이저 입력 단순화를 위해 호출 측에서 처리 (path → `"src search rrf rs"`로 만든 뒤 토크나이저에 통과).
+- `path_terms`는 path가 `Some(_)`인 청크에만 채움. CommitMessage 청크는 비움.
+
+기존 `path` STRING 필드는 변경 없이 유지 — `extract_path_filter`/`apply_path_filter` 흐름이 그대로 동작.
+
+#### 3.2.3 `title` 필드 토크나이저 교체
+
+`title` 필드 토크나이저를 `ngram_2_2` → `code_ident`로 변경. 한글 commit 메시지 title의 부분 매칭은 일부 약화되나, identifier/단어 단위 매칭은 강해짐. 한글 부분 매칭은 body 필드에서 여전히 가능.
+
+#### 3.2.4 `body` 필드 — 변경 없음
+
+`ngram_2_2` 유지. 한글 본문 부분 매칭 보존.
+
+#### 3.2.5 `QueryParser` 변경
+
+```rust
+let parser = QueryParser::for_index(
+    &self.index,
+    vec![self.fields.title, self.fields.path_terms, self.fields.body],
+);
+parser.set_field_boost(self.fields.title, 2.0);
+parser.set_field_boost(self.fields.path_terms, 2.0);
+parser.set_field_boost(self.fields.body, 1.0);
+```
+
+(정확한 API는 `set_boost`/`set_field_boost`/`SchemaBuilder::set_boost` 중 Tantivy 0.22에서 사용 가능한 것을 구현 시 확인.)
+
+### 3.3 청크 분할 변경
+
+#### 3.3.1 `WHOLE_FILE_THRESHOLD`
+
+`src/search/chunk/file.rs`:
+
+```rust
+const WHOLE_FILE_THRESHOLD: usize = 16 * 1024;
+```
+
+영향:
+- `modal_state.rs`(8.2KB), `engine.rs`(7.1KB는 영향 없음), `store.rs`(8.8KB)가 모두 WholeFile로 잡힘.
+- `indexer.rs`(20KB)는 여전히 Symbol 분할.
+
+#### 3.3.2 Symbol 추출 확장 (Rust 한정)
+
+`src/search/chunk/symbol.rs`의 Rust 쿼리:
+
+기존 (개념):
+```scm
+(function_item name: (identifier) @name) @symbol
+```
+
+확장:
+```scm
+(function_item name: (identifier) @name) @symbol
+(enum_item name: (type_identifier) @name) @symbol
+(struct_item name: (type_identifier) @name) @symbol
+(trait_item name: (type_identifier) @name) @symbol
+(type_item name: (type_identifier) @name) @symbol
+```
+
+`impl` 블록은 추가하지 않음 — 현재 정책(impl 자체는 청크 안 함, 안의 method만)을 유지.
+
+`SymbolKind` 확장 (`src/search/chunk/mod.rs`):
+
+```rust
+pub enum SymbolKind {
+    Function,
+    Method,
+    Enum,        // 신규
+    Struct,      // 신규
+    Trait,       // 신규
+    TypeAlias,   // 신규
+}
+```
+
+다른 언어 (Python/JS/TS/Go)의 쿼리는 손대지 않는다.
+
+### 3.4 Path → path_terms 분해 로직
+
+위치: `src/search/indexer.rs::chunk_to_meta` 호출 직후, 또는 별도 헬퍼 `path_to_terms(path: &str) -> String`로 분리.
+
+규칙:
+- `/`, `.`, `-`, `_`, 공백을 단어 경계로 split
+- 빈 조각 제거
+- 결과를 공백으로 join
+- 예: `src/search/chunk/file.rs` → `"src search chunk file rs"`
+- 예: `src/git/store.rs` → `"src git store rs"`
+
+`code_ident` 토크나이저가 다시 식별자 분해를 적용하므로, `"rs"` 같은 짧은 조각도 토큰으로 들어감. (단 2-3자 흔한 조각은 BM25 IDF가 낮춰 자연히 가중치 약화.)
+
+`add_doc` 시그니처에 `path_terms` 인자 추가하거나, `DocMeta`/`Chunk`에서 자동 도출. 추천: `Bm25Index::add_doc`이 `meta.path`로부터 내부에서 도출 — 호출자 영향 최소.
+
+### 3.5 인덱스 호환성
+
+`src/search/mod.rs`:
+
+```rust
+pub const INDEX_VERSION: u32 = 6;
+```
+
+기존 인덱스를 가진 사용자는 `SearchEngine::open()`에서 `VersionMismatch` 또는 `build_index()`에서 schema outdated 경로로 자동 풀 리빌드.
+
+`Bm25Meta::tokenizer`는 `"ngram_2_2"` 유지 — body 필드 토크나이저로 해석. 단일 문자열 의미가 모호하지만 INDEX_VERSION이 구분하므로 충분.
+
+### 3.6 RRF / 임베딩 — 변경 없음
+
+`rrf::rrf_fuse(k=60.0)`, candidate_limit 정책, `embed_text` 모두 유지. 변경 후 재측정 결과에 따라 후속 라운드에서 결정.
+
+## 4. 테스트
+
+### 4.1 단위 테스트 추가
+
+| 위치 | 테스트 |
+|---|---|
+| `src/search/bm25.rs` | `code_ident` 토크나이저가 `rrf_fuse` → `rrf`, `fuse` 매칭 |
+| `src/search/bm25.rs` | `path_terms` 필드에 `src/search/rrf.rs` 인덱싱 후 쿼리 `"rrf"` hit |
+| `src/search/bm25.rs` | 한글 본문이 `ngram_2_2` body로 여전히 매칭 (회귀 가드) |
+| `src/search/chunk/symbol.rs` | Rust enum/struct/trait 추출 |
+| `src/search/chunk/file.rs` | 12KB 파일이 WholeFile로 잡힘 (16KB 미만) |
+
+### 4.2 회귀 가드
+
+기존 BM25 테스트는 모두 통과해야 함. 특히:
+- `test_korean_bigram_search` — body 필드 ngram 유지로 통과
+- `test_uppercase_indexed_matches_lowercase_query` — title이 code_ident + LowerCaser로 여전히 통과
+- `test_path_field_exact_match_query` — path STRING 필드와 `path:"..."` 문법 미변경
+
+### 4.3 End-to-end 검증
+
+```bash
+cargo run --bin glc -- index --force
+cargo run --bin glc -- report --out result.md
+```
+
+`result.md`를 베이스라인과 비교. 합격 기준:
+
+| 메트릭 | 베이스라인 | 목표 |
+|---|---|---|
+| MRR | 0.330 | ≥ 0.65 |
+| Recall@5 | 0.286 | ≥ 0.65 |
+| Recall@10 | 0.571 | ≥ 0.85 |
+| NDCG@10 | 0.384 | ≥ 0.65 |
+
+쿼리별 기대 변화:
+
+| 쿼리 | 베이스 | 목표 Hit Rank |
+|---|---|---|
+| incremental indexing fallback | 0 hit | ≤ 5 |
+| tantivy delete_term | 1 | 1 (회귀 없음) |
+| RRF reciprocal rank fusion | 0 hit | ≤ 5 |
+| embedding model load potion | 1 | 1 (회귀 없음) |
+| search modal state machine | 0 hit | ≤ 5 |
+| tree sitter highlight configuration | 6 | ≤ 5 |
+| git revwalk topological commit | 7 | ≤ 5 |
+
+목표 미달 시 다음 라운드 후보:
+- RRF k를 60 → 20~30으로 낮춰 top-rank 차별화
+- `embed_text`에 path 토큰 + 식별자 분해 prepend
+- 모듈 docstring을 별도 청크로 추출 (`rrf.rs`처럼 자연어 부재 파일 보완)
+
+## 5. 작업 순서 (구현 청사진)
+
+1. fixture 수정 (1줄)
+2. `IdentifierSplit` TokenFilter 구현 + 단위 테스트
+3. BM25 스키마에 `path_terms` 필드 추가, `code_ident` 토크나이저 등록
+4. `add_doc` 내부에서 path → path_terms 분해 저장
+5. `QueryParser` 필드 확장 + boost
+6. `WHOLE_FILE_THRESHOLD` 16KB로 상향
+7. Symbol 쿼리 확장 + `SymbolKind` 변형 추가
+8. `INDEX_VERSION` 6으로 범프
+9. 단위 테스트 추가
+10. `cargo clippy` / `cargo test` / `rustfmt` 통과
+11. `glc index --force` → `glc report --out result.md` → 합격 기준 검증
+12. 미달 시 후속 라운드 항목 식별, 통과 시 commit
+
+## 6. 위험 / 트레이드오프
+
+- **인덱스 크기 증가**: `path_terms` 필드 추가, WholeFile 임계 상향으로 ~5~10% 증가 예상. 현재 432KiB → 약 500KiB 추정. 허용 범위.
+- **임베딩 단계 변화 없음**: 임베딩 모델/텍스트는 그대로 → 의미 매칭의 근본 한계는 남음. `RRF reciprocal rank fusion` 쿼리는 path_terms `rrf` 매칭에만 의존하므로 path 분해 규칙이 효과적이어야 함.
+- **한글 title 부분 매칭 약화**: title 토크나이저가 ngram에서 code_ident로 바뀌어 commit 메시지 한글 부분 매칭이 줄어듦. body에서 보완되지만 commit title-only 쿼리는 일부 회귀 가능. fixture에 한글 commit 쿼리가 없어 회귀 추적은 불가 — 별도 한글 쿼리를 fixture에 추가하는 작업은 후속.
+- **Rust 외 언어 미변경**: Python/JS/TS/Go 파일에 enum/class는 여전히 잡히지 않음. 현재 fixture는 모두 Rust 파일이라 영향 없음.
+- **`impl` 블록 미청크화**: 기존 정책 유지. 새 SymbolKind 변형이 추가돼도 method는 그대로 `Method` 변형으로 들어감.
diff --git a/report-2026-05-26-1.md b/report-2026-05-26-1.md
new file mode 100644
index 0000000..54ce60b
--- /dev/null
+++ b/report-2026-05-26-1.md
@@ -0,0 +1,43 @@
+# Search Quality Report
+
+- Generated: 2026-05-26T12:20:25Z
+- HEAD (working tree): ccb575d96b518a12b81ddbbe126d82f4945a45e8
+- Index dir: ./.glc-index (432.84 KiB, 473 docs)
+
+## Aggregate
+
+| Metric | Value |
+|--------|-------|
+| MRR | 0.330 |
+| Recall@5 | 0.286 |
+| Recall@10 | 0.571 |
+| NDCG@10 | 0.384 |
+| Queries | 7 |
+
+## Performance (warmup=3, iters=10)
+
+| p50 | p95 | p99* | mean | QPS |
+|-----|-----|------|------|-----|
+| 0.04 ms | 0.05 ms | 0.05 ms | 0.04 ms | 22578.5 |
+
+\* iters=10 표본에서 p99는 표본 최댓값 근사
+
+## Index
+
+- Embedding: minishlab/potion-multilingual-128M (256-dim)
+- BM25 tokenizer: ngram_2_2
+- Vector backend: turboquant_4bit
+- HEAD: ccb575d96b518a12b81ddbbe126d82f4945a45e8 (indexed 1779797971Z)
+- Docs: Commit=232, File=63, Symbol=178
+
+## Per-Query
+
+| # | Query | MRR | R@5 | R@10 | NDCG@10 | Hit Rank | Hit Paths |
+|---|-------|-----|-----|------|---------|----------|-----------|
+| 1 | incremental indexing fallback | 0.000 | 0.000 | 0.000 | 0.000 | — | — |
+| 2 | tantivy delete_term | 1.000 | 1.000 | 1.000 | 1.000 | 1 | src/search/bm25.rs |
+| 3 | RRF reciprocal rank fusion | 0.000 | 0.000 | 0.000 | 0.000 | — | — |
+| 4 | embedding model load potion | 1.000 | 1.000 | 1.000 | 1.000 | 1 | src/search/embedding.rs |
+| 5 | search modal state machine | 0.000 | 0.000 | 0.000 | 0.000 | — | — |
+| 6 | tree sitter highlight configuration | 0.167 | 0.000 | 1.000 | 0.356 | 6 | src/highlight/engine.rs |
+| 7 | git revwalk topological commit | 0.143 | 0.000 | 1.000 | 0.333 | 7 | src/git/store.rs |
diff --git a/report-2026-05-27-1.md b/report-2026-05-27-1.md
new file mode 100644
index 0000000..dbaa008
--- /dev/null
+++ b/report-2026-05-27-1.md
@@ -0,0 +1,43 @@
+# Search Quality Report
+
+- Generated: 2026-05-27T02:24:43Z
+- HEAD (working tree): 25ce5e8562ef1caa82d461b04713de8576fb32c4
+- Index dir: ./.glc-index (709.64 KiB, 409 docs)
+
+## Aggregate
+
+| Metric | Value |
+|--------|-------|
+| MRR | 0.616 |
+| Recall@5 | 0.857 |
+| Recall@10 | 1.000 |
+| NDCG@10 | 0.707 |
+| Queries | 7 |
+
+## Performance (warmup=3, iters=10)
+
+| p50 | p95 | p99* | mean | QPS |
+|-----|-----|------|------|-----|
+| 0.35 ms | 0.43 ms | 0.55 ms | 0.32 ms | 3165.2 |
+
+\* iters=10 표본에서 p99는 표본 최댓값 근사
+
+## Index
+
+- Embedding: minishlab/potion-multilingual-128M (256-dim)
+- BM25 tokenizer: ngram_2_2
+- Vector backend: turboquant_4bit
+- HEAD: 25ce5e8562ef1caa82d461b04713de8576fb32c4 (indexed 1779848644Z)
+- Docs: Commit=249, File=78, Symbol=82
+
+## Per-Query
+
+| # | Query | MRR | R@5 | R@10 | NDCG@10 | Hit Rank | Hit Paths |
+|---|-------|-----|-----|------|---------|----------|-----------|
+| 1 | incremental indexing fallback | 0.500 | 1.000 | 1.000 | 0.631 | 2 | src/search/indexer.rs |
+| 2 | tantivy delete_term | 1.000 | 1.000 | 1.000 | 1.000 | 1 | src/search/bm25.rs |
+| 3 | RRF reciprocal rank fusion | 0.200 | 1.000 | 1.000 | 0.387 | 5 | src/search/rrf.rs |
+| 4 | embedding model load potion | 1.000 | 1.000 | 1.000 | 1.000 | 1 | src/search/embedding.rs |
+| 5 | search modal state machine | 0.500 | 1.000 | 1.000 | 0.631 | 2 | src/search/modal_state.rs |
+| 6 | tree sitter highlight configuration | 1.000 | 1.000 | 1.000 | 1.000 | 1 | src/highlight/engine.rs |
+| 7 | git revwalk topological commit | 0.111 | 0.000 | 1.000 | 0.301 | 9 | src/git/store.rs |
diff --git a/report-2026-05-27-2.md b/report-2026-05-27-2.md
new file mode 100644
index 0000000..002b5b7
--- /dev/null
+++ b/report-2026-05-27-2.md
@@ -0,0 +1,44 @@
+# Search Quality Report
+
+- Generated: 2026-05-27T02:34:37Z
+- HEAD (working tree): 60aed77d34c46a9f5314c24b239614c8b160e359
+- Index dir: ./.glc-index (709.64 KiB, 409 docs)
+- ⚠ HEAD ≠ index.head_oid (25ce5e8562ef1caa82d461b04713de8576fb32c4) — run `glc index` to refresh
+
+## Aggregate
+
+| Metric | Value |
+|--------|-------|
+| MRR | 0.743 |
+| Recall@5 | 0.929 |
+| Recall@10 | 1.000 |
+| NDCG@10 | 0.778 |
+| Queries | 7 |
+
+## Performance (warmup=3, iters=10)
+
+| p50 | p95 | p99* | mean | QPS |
+|-----|-----|------|------|-----|
+| 5.88 ms | 6.59 ms | 6.78 ms | 5.52 ms | 181.3 |
+
+\* iters=10 표본에서 p99는 표본 최댓값 근사
+
+## Index
+
+- Embedding: minishlab/potion-multilingual-128M (256-dim)
+- BM25 tokenizer: ngram_2_2
+- Vector backend: turboquant_4bit
+- HEAD: 25ce5e8562ef1caa82d461b04713de8576fb32c4 (indexed 1779848644Z)
+- Docs: Commit=249, File=78, Symbol=82
+
+## Per-Query
+
+| # | Query | MRR | R@5 | R@10 | NDCG@10 | Hit Rank | Hit Paths |
+|---|-------|-----|-----|------|---------|----------|-----------|
+| 1 | incremental indexing fallback | 0.500 | 1.000 | 1.000 | 0.631 | 2 | src/search/indexer.rs |
+| 2 | tantivy delete_term | 1.000 | 1.000 | 1.000 | 1.000 | 1 | src/search/bm25.rs |
+| 3 | RRF reciprocal rank fusion | 0.200 | 1.000 | 1.000 | 0.387 | 5 | src/search/rrf.rs |
+| 4 | embedding model load potion | 1.000 | 1.000 | 1.000 | 1.000 | 1 | src/search/embedding.rs |
+| 5 | search modal state machine | 0.500 | 1.000 | 1.000 | 0.631 | 2 | src/search/modal_state.rs |
+| 6 | tree sitter highlight configuration | 1.000 | 1.000 | 1.000 | 1.000 | 1 | src/highlight/engine.rs |
+| 7 | git revwalk topological commit | 1.000 | 0.500 | 1.000 | 0.798 | 1 | src/git/commit.rs, src/git/store.rs |
diff --git a/result.md b/result.md
new file mode 100644
index 0000000..caa955d
--- /dev/null
+++ b/result.md
@@ -0,0 +1,43 @@
+# Search Quality Report
+
+- Generated: 2026-05-27T02:20:37Z
+- HEAD (working tree): 8d8fe31d2d00aa3169aba2c12c77b0206e2f439c
+- Index dir: ./.glc-index (698.92 KiB, 407 docs)
+
+## Aggregate
+
+| Metric | Value |
+|--------|-------|
+| MRR | 0.544 |
+| Recall@5 | 0.857 |
+| Recall@10 | 1.000 |
+| NDCG@10 | 0.654 |
+| Queries | 7 |
+
+## Performance (warmup=3, iters=10)
+
+| p50 | p95 | p99* | mean | QPS |
+|-----|-----|------|------|-----|
+| 0.28 ms | 0.33 ms | 0.34 ms | 0.25 ms | 4001.5 |
+
+\* iters=10 표본에서 p99는 표본 최댓값 근사
+
+## Index
+
+- Embedding: minishlab/potion-multilingual-128M (256-dim)
+- BM25 tokenizer: ngram_2_2
+- Vector backend: turboquant_4bit
+- HEAD: 8d8fe31d2d00aa3169aba2c12c77b0206e2f439c (indexed 1779848421Z)
+- Docs: Commit=248, File=77, Symbol=82
+
+## Per-Query
+
+| # | Query | MRR | R@5 | R@10 | NDCG@10 | Hit Rank | Hit Paths |
+|---|-------|-----|-----|------|---------|----------|-----------|
+| 1 | incremental indexing fallback | 0.500 | 1.000 | 1.000 | 0.631 | 2 | src/search/indexer.rs |
+| 2 | tantivy delete_term | 1.000 | 1.000 | 1.000 | 1.000 | 1 | src/search/bm25.rs |
+| 3 | RRF reciprocal rank fusion | 0.200 | 1.000 | 1.000 | 0.387 | 5 | src/search/rrf.rs |
+| 4 | embedding model load potion | 1.000 | 1.000 | 1.000 | 1.000 | 1 | src/search/embedding.rs |
+| 5 | search modal state machine | 0.500 | 1.000 | 1.000 | 0.631 | 2 | src/search/modal_state.rs |
+| 6 | tree sitter highlight configuration | 0.500 | 1.000 | 1.000 | 0.631 | 2 | src/highlight/engine.rs |
+| 7 | git revwalk topological commit | 0.111 | 0.000 | 1.000 | 0.301 | 9 | src/git/store.rs |
diff --git a/src/app.rs b/src/app.rs
index 3c78180..ee73d45 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -8,6 +8,7 @@ use crate::highlight::HighlightEngine;
 use crate::mode::{Action, DiffState, KeyBindings, Mode, PickState, SearchState, ViewState};
 use crate::search::modal_state::SemanticSearchModal;
 use crate::search::SearchEngine;
+use crate::search::SearchResult;
 use crate::theme::Palette;
 use crate::ui;
 use anyhow::Result;
@@ -43,6 +44,9 @@ pub struct App {
     pub saved_search: SearchState,
     pub search_modal: SemanticSearchModal,
     pub search_engine: Option<SearchEngine>,
+    pub search_tx: Option<mpsc::Sender<String>>,
+    pub search_rx: Option<mpsc::Receiver<Vec<SearchResult>>>,
+    pub search_pending: bool,
     pub engine_error: Option<String>,
     pub needs_clear: bool,
     pub index_rx: Option<mpsc::Receiver<IndexMessage>>,
@@ -71,6 +75,9 @@ impl App {
             saved_search: SearchState::Idle { query: None },
             search_modal: SemanticSearchModal::new(),
             search_engine: None,
+            search_tx: None,
+            search_rx: None,
+            search_pending: false,
             engine_error: None,
             needs_clear: false,
             index_rx: None,
@@ -896,6 +903,10 @@ impl App {
         }
         self.engine_error = None;
         self.engine_rx = None;
+        self.search_tx = None;
+        self.search_rx = None;
+        self.search_pending = false;
+        self.search_engine = None;
         let repo_workdir = self
             .repo
             .repository()
@@ -903,7 +914,6 @@ impl App {
             .unwrap_or(std::path::Path::new("."))
             .to_path_buf();
         self.search_modal.set_loading("Starting indexer...");
-        self.search_engine = None;
 
         let (tx, rx) = mpsc::channel::<IndexMessage>();
         self.index_rx = Some(rx);
@@ -931,7 +941,7 @@ impl App {
     }
 
     pub fn is_indexing(&self) -> bool {
-        self.index_rx.is_some() || self.engine_rx.is_some()
+        self.index_rx.is_some() || self.engine_rx.is_some() || self.search_pending
     }
 
     pub fn drain_index_messages(&mut self) {
@@ -1015,6 +1025,9 @@ impl App {
                     ));
                 }
             } else if self.search_engine.is_some() {
+                if let Some(engine) = self.search_engine.take() {
+                    self.spawn_search_worker(engine);
+                }
                 self.engine_error = None;
                 if modal_was_open {
                     self.search_modal.open();
@@ -1049,7 +1062,7 @@ impl App {
     }
 
     fn start_loading_engine(&mut self) {
-        if self.engine_rx.is_some() || self.search_engine.is_some() {
+        if self.engine_rx.is_some() || self.search_rx.is_some() {
             return;
         }
         let index_dir = self.index_dir();
@@ -1074,6 +1087,25 @@ impl App {
         });
     }
 
+    fn spawn_search_worker(&mut self, engine: SearchEngine) {
+        self.search_engine = None;
+        let (stx, worker_rx) = mpsc::channel::<String>();
+        let (worker_tx, srx) = mpsc::channel::<Vec<SearchResult>>();
+        self.search_tx = Some(stx);
+        self.search_rx = Some(srx);
+        let limit = self.config.search.result_limit;
+        std::thread::spawn(move || {
+            while let Ok(query) = worker_rx.recv() {
+                if query.is_empty() {
+                    let _ = worker_tx.send(vec![]);
+                    continue;
+                }
+                let results = engine.search(&query, limit).unwrap_or_default();
+                let _ = worker_tx.send(results);
+            }
+        });
+    }
+
     fn open_semantic_search(&mut self) {
         let index_dir = self.index_dir();
         use crate::search::indexer::IndexStatus;
@@ -1091,7 +1123,7 @@ impl App {
             }
             IndexStatus::Ready => {}
         }
-        if self.search_engine.is_some() {
+        if self.search_rx.is_some() {
             self.search_modal.open();
             if !self.search_modal.state.input().is_empty() {
                 self.run_semantic_search();
@@ -1116,11 +1148,29 @@ impl App {
             self.search_modal.set_results(vec![]);
             return;
         }
-        if let Some(engine) = &self.search_engine {
-            let limit = self.config.search.result_limit;
-            match engine.search(&query, limit) {
-                Ok(results) => self.search_modal.set_results(results),
-                Err(_) => self.search_modal.set_results(vec![]),
+        if let Some(tx) = &self.search_tx {
+            self.search_pending = true;
+            let _ = tx.send(query);
+        }
+    }
+
+    pub fn drain_search_results(&mut self) {
+        let Some(rx) = self.search_rx.as_ref() else {
+            return;
+        };
+        loop {
+            match rx.try_recv() {
+                Ok(results) => {
+                    self.search_modal.set_results(results);
+                    self.search_pending = false;
+                }
+                Err(mpsc::TryRecvError::Empty) => break,
+                Err(mpsc::TryRecvError::Disconnected) => {
+                    self.search_rx = None;
+                    self.search_tx = None;
+                    self.search_pending = false;
+                    break;
+                }
             }
         }
     }
diff --git a/src/highlight/engine.rs b/src/highlight/engine.rs
index cf371bf..a74df6f 100644
--- a/src/highlight/engine.rs
+++ b/src/highlight/engine.rs
@@ -1,7 +1,6 @@
 use ratatui::style::Style;
 use ratatui::text::{Line, Span};
 use std::collections::HashMap;
-use tree_sitter::Language;
 use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter};
 
 impl Default for HighlightEngine {
@@ -125,11 +124,8 @@ impl HighlightEngine {
     }
 
     fn make_rust_config() -> Result<HighlightConfiguration, Box<dyn std::error::Error>> {
-        let raw_fn = tree_sitter_rust::LANGUAGE.into_raw();
-        let raw_ptr = unsafe { raw_fn() };
-        let language = unsafe { Language::from_raw(raw_ptr as *const _) };
         let mut config = HighlightConfiguration::new(
-            language,
+            tree_sitter_rust::LANGUAGE.into(),
             "rust",
             tree_sitter_rust::HIGHLIGHTS_QUERY,
             tree_sitter_rust::INJECTIONS_QUERY,
diff --git a/src/main.rs b/src/main.rs
index 7481433..ee8c1d6 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -99,10 +99,12 @@ fn run_app(terminal: &mut ratatui::DefaultTerminal, app: &mut App) -> Result<()>
         if app.is_indexing() {
             app.drain_index_messages();
             app.drain_engine_messages();
+            app.drain_search_results();
             if event::poll(Duration::from_millis(80))? {
                 read_and_dispatch(app)?;
             }
         } else {
+            app.drain_search_results();
             read_and_dispatch(app)?;
         }
 
diff --git a/src/search/bm25.rs b/src/search/bm25.rs
index 334e7ae..8ae1413 100644
--- a/src/search/bm25.rs
+++ b/src/search/bm25.rs
@@ -6,12 +6,13 @@ use tantivy::query::QueryParser;
 use tantivy::schema::{
     Field, IndexRecordOption, Schema, TextFieldIndexing, TextOptions, FAST, INDEXED, STORED, STRING,
 };
-use tantivy::tokenizer::{LowerCaser, NgramTokenizer, TextAnalyzer};
+use tantivy::tokenizer::{LowerCaser, NgramTokenizer, SimpleTokenizer, TextAnalyzer};
 use tantivy::{Index, IndexReader, IndexWriter, ReloadPolicy, TantivyError};
 
 use crate::search::{DocKind, DocMeta, SearchError};
 
 pub const TOKENIZER: &str = "ngram_2_2";
+pub const WORD_TOKENIZER: &str = "word_lower";
 const WRITER_HEAP: usize = 50_000_000;
 
 pub struct Bm25Fields {
@@ -20,6 +21,7 @@ pub struct Bm25Fields {
     pub title: Field,
     pub body: Field,
     pub path: Field,
+    pub path_terms: Field,
     pub commit_oid: Field,
     pub line_start: Field,
     pub line_end: Field,
@@ -34,24 +36,36 @@ pub struct Bm25Index {
 fn make_schema() -> (Schema, Bm25Fields) {
     let mut builder = Schema::builder();
 
-    let text_opts = TextOptions::default()
+    // Title: SimpleTokenizer + LowerCaser. `_` / `/` / `.` / `-` 자동 분해.
+    // camelCase는 add_doc에서 write-time으로 split.
+    let title_opts = TextOptions::default()
         .set_indexing_options(
             TextFieldIndexing::default()
-                .set_tokenizer(TOKENIZER)
+                .set_tokenizer(WORD_TOKENIZER)
                 .set_index_option(IndexRecordOption::WithFreqsAndPositions),
         )
         .set_stored();
+
+    // Path terms: 검색 전용 (STORED 없음). path를 단어 단위로 매칭.
+    let path_terms_opts = TextOptions::default().set_indexing_options(
+        TextFieldIndexing::default()
+            .set_tokenizer(WORD_TOKENIZER)
+            .set_index_option(IndexRecordOption::WithFreqs),
+    );
+
+    // Body: ngram_2_2 유지 — 한글/임의 텍스트 부분 매칭. 멀티-토큰 쿼리(phrase)를 위해 positions 필요.
     let body_opts = TextOptions::default().set_indexing_options(
         TextFieldIndexing::default()
             .set_tokenizer(TOKENIZER)
-            .set_index_option(IndexRecordOption::WithFreqs),
+            .set_index_option(IndexRecordOption::WithFreqsAndPositions),
     );
 
     let id = builder.add_u64_field("id", FAST | STORED | INDEXED);
     let kind = builder.add_text_field("kind", STRING | STORED);
-    let title = builder.add_text_field("title", text_opts);
+    let title = builder.add_text_field("title", title_opts);
     let body = builder.add_text_field("body", body_opts);
     let path = builder.add_text_field("path", STRING | STORED);
+    let path_terms = builder.add_text_field("path_terms", path_terms_opts);
     let commit_oid = builder.add_text_field("commit_oid", STRING | STORED);
     let line_start = builder.add_u64_field("line_start", STORED);
     let line_end = builder.add_u64_field("line_end", STORED);
@@ -63,6 +77,7 @@ fn make_schema() -> (Schema, Bm25Fields) {
         title,
         body,
         path,
+        path_terms,
         commit_oid,
         line_start,
         line_end,
@@ -71,11 +86,16 @@ fn make_schema() -> (Schema, Bm25Fields) {
 }
 
 fn register_tokenizer(index: &Index) {
-    let tokenizer =
+    let ngram =
         TextAnalyzer::builder(NgramTokenizer::new(2, 2, false).expect("valid ngram params"))
             .filter(LowerCaser)
             .build();
-    index.tokenizers().register(TOKENIZER, tokenizer);
+    index.tokenizers().register(TOKENIZER, ngram);
+
+    let word_lower = TextAnalyzer::builder(SimpleTokenizer::default())
+        .filter(LowerCaser)
+        .build();
+    index.tokenizers().register(WORD_TOKENIZER, word_lower);
 }
 
 impl Bm25Index {
@@ -120,14 +140,16 @@ impl Bm25Index {
         meta: &DocMeta,
         body: &str,
     ) -> Result<(), TantivyError> {
+        use crate::search::text_prep::{path_to_terms, split_camel_case};
         let mut doc = tantivy::TantivyDocument::default();
         doc.add_u64(self.fields.id, meta.doc_id);
         doc.add_text(self.fields.kind, meta.kind.as_str());
-        doc.add_text(self.fields.title, &meta.title);
+        doc.add_text(self.fields.title, split_camel_case(&meta.title));
         doc.add_text(self.fields.body, body);
         doc.add_text(self.fields.commit_oid, &meta.commit_oid);
         if let Some(p) = &meta.path {
             doc.add_text(self.fields.path, p);
+            doc.add_text(self.fields.path_terms, path_to_terms(p));
         }
         if let Some(ls) = meta.line_start {
             doc.add_u64(self.fields.line_start, u64::from(ls));
@@ -152,7 +174,13 @@ impl Bm25Index {
 
     pub fn search(&self, query: &str, limit: usize) -> Result<Vec<(u64, f32)>, SearchError> {
         let searcher = self.reader.searcher();
-        let parser = QueryParser::for_index(&self.index, vec![self.fields.title, self.fields.body]);
+        let mut parser = QueryParser::for_index(
+            &self.index,
+            vec![self.fields.title, self.fields.path_terms, self.fields.body],
+        );
+        parser.set_field_boost(self.fields.title, 2.0);
+        parser.set_field_boost(self.fields.path_terms, 2.0);
+        parser.set_field_boost(self.fields.body, 1.0);
         let tantivy_query = match parser.parse_query(query) {
             Ok(q) => q,
             Err(_) => return Ok(vec![]),
@@ -291,7 +319,7 @@ mod tests {
         idx.add_doc(&mut w, &commit_meta(1, "hello world"), "greeting text")
             .unwrap();
         idx.commit(w).unwrap();
-        let results = idx.search("he", 10).unwrap();
+        let results = idx.search("hello", 10).unwrap();
         assert!(!results.is_empty());
         assert_eq!(results[0].0, 1);
     }
@@ -333,15 +361,15 @@ mod tests {
     }
 
     #[test]
-    fn test_uppercase_indexed_matches_lowercase_query() {
+    fn test_word_tokenizer_lowercases_title() {
         let (_dir, idx) = tmp_index();
         let mut w = idx.writer().unwrap();
         idx.add_doc(&mut w, &commit_meta(1, "Hello"), "").unwrap();
         idx.commit(w).unwrap();
-        let results = idx.search("he", 10).unwrap();
+        let results = idx.search("hello", 10).unwrap();
         assert!(
             !results.is_empty(),
-            "2-char lowercase query 'he' must match 'Hello' — requires LowerCaser on 'He'"
+            "lowercase query 'hello' must match title 'Hello' — requires LowerCaser on word tokenizer"
         );
     }
 
@@ -353,14 +381,14 @@ mod tests {
         idx.add_doc(&mut w, &commit_meta(1, "first doc"), "")
             .unwrap();
         idx.commit(w).unwrap();
-        let r1 = idx.search("fi", 10).unwrap();
+        let r1 = idx.search("first", 10).unwrap();
         assert_eq!(r1.len(), 1, "first commit visible");
 
         let mut w = idx.writer().unwrap();
         idx.add_doc(&mut w, &commit_meta(2, "second doc"), "")
             .unwrap();
         idx.commit(w).unwrap();
-        let r2 = idx.search("se", 10).unwrap();
+        let r2 = idx.search("second", 10).unwrap();
         assert!(
             r2.iter().any(|(id, _)| *id == 2),
             "second commit must be visible via cached reader"
@@ -401,12 +429,12 @@ mod tests {
         idx.add_doc(&mut w, &commit_meta(2, "hello again"), "second")
             .unwrap();
         idx.commit(w).unwrap();
-        assert_eq!(idx.search("he", 10).unwrap().len(), 2);
+        assert_eq!(idx.search("hello", 10).unwrap().len(), 2);
 
         let mut w = idx.writer().unwrap();
         idx.delete_doc(&mut w, 1);
         idx.commit(w).unwrap();
-        let r = idx.search("he", 10).unwrap();
+        let r = idx.search("hello", 10).unwrap();
         assert_eq!(r.len(), 1);
         assert_eq!(r[0].0, 2);
     }
@@ -446,4 +474,99 @@ mod tests {
         );
         assert_eq!(results[0].0, 1);
     }
+
+    #[test]
+    fn test_path_terms_matches_path_segment_query() {
+        let (_dir, idx) = tmp_index();
+        let mut w = idx.writer().unwrap();
+        let meta = DocMeta {
+            doc_id: 1,
+            kind: DocKind::File,
+            title: "src/search/rrf.rs".into(),
+            commit_oid: "a".repeat(40),
+            path: Some("src/search/rrf.rs".into()),
+            line_start: None,
+            line_end: None,
+        };
+        idx.add_doc(&mut w, &meta, "fn rrf_fuse() {}").unwrap();
+        idx.commit(w).unwrap();
+        let results = idx.search("rrf", 10).unwrap();
+        assert!(
+            results.iter().any(|(id, _)| *id == 1),
+            "query 'rrf' must match path_terms src/search/rrf.rs"
+        );
+    }
+
+    #[test]
+    fn test_path_match_outranks_unrelated_body_match() {
+        let (_dir, idx) = tmp_index();
+        let mut w = idx.writer().unwrap();
+
+        let target = DocMeta {
+            doc_id: 1,
+            kind: DocKind::File,
+            title: "src/git/store.rs".into(),
+            commit_oid: "a".repeat(40),
+            path: Some("src/git/store.rs".into()),
+            line_start: None,
+            line_end: None,
+        };
+        idx.add_doc(&mut w, &target, "fn open() {}").unwrap();
+
+        let distractor = DocMeta {
+            doc_id: 2,
+            kind: DocKind::File,
+            title: "src/ui/view.rs".into(),
+            commit_oid: "b".repeat(40),
+            path: Some("src/ui/view.rs".into()),
+            line_start: None,
+            line_end: None,
+        };
+        idx.add_doc(
+            &mut w,
+            &distractor,
+            "some long content that mentions store once in the middle of many other words and bigrams",
+        )
+        .unwrap();
+
+        idx.commit(w).unwrap();
+
+        let results = idx.search("store", 10).unwrap();
+        assert!(results.iter().any(|(id, _)| *id == 1));
+        let pos_1 = results.iter().position(|(id, _)| *id == 1).unwrap();
+        let pos_2 = results.iter().position(|(id, _)| *id == 2);
+        if let Some(p2) = pos_2 {
+            assert!(
+                pos_1 <= p2,
+                "path-matching doc 1 should rank ≤ body-only doc 2"
+            );
+        }
+    }
+
+    #[test]
+    fn test_camel_case_title_split_for_query() {
+        let (_dir, idx) = tmp_index();
+        let mut w = idx.writer().unwrap();
+        let meta = DocMeta {
+            doc_id: 7,
+            kind: DocKind::Symbol,
+            title: "ModalState (src/search/modal_state.rs)".into(),
+            commit_oid: "b".repeat(40),
+            path: Some("src/search/modal_state.rs".into()),
+            line_start: Some(1),
+            line_end: Some(10),
+        };
+        idx.add_doc(&mut w, &meta, "enum ModalState {}").unwrap();
+        idx.commit(w).unwrap();
+        let r = idx.search("modal", 10).unwrap();
+        assert!(
+            r.iter().any(|(id, _)| *id == 7),
+            "modal must match split ModalState in title"
+        );
+        let r = idx.search("state", 10).unwrap();
+        assert!(
+            r.iter().any(|(id, _)| *id == 7),
+            "state must match split ModalState in title"
+        );
+    }
 }
diff --git a/src/search/chunk/file.rs b/src/search/chunk/file.rs
index f43139a..2bac834 100644
--- a/src/search/chunk/file.rs
+++ b/src/search/chunk/file.rs
@@ -3,7 +3,7 @@ use super::Chunk;
 use crate::lang::Language;
 
 /// 파일 크기 임계값 — 미만은 WholeFile, 이상은 Symbol 시도.
-const WHOLE_FILE_THRESHOLD: usize = 8 * 1024; // 8 KB
+const WHOLE_FILE_THRESHOLD: usize = 16 * 1024; // 16 KB — modal_state.rs, store.rs 등을 WholeFile로 보존
 
 /// 너무 짧은 심볼은 청크화 가치가 없다 — 1줄짜리 setter 등을 제외.
 const MIN_SYMBOL_LINES: u32 = 2;
@@ -72,8 +72,13 @@ mod tests {
     fn large_rust_file_produces_symbols() {
         let big_fn = format!(
             "fn foo() {{\n{}\n}}\nfn bar() {{\n{}\n}}",
-            "    let x = 1;\n".repeat(500),
-            "    let y = 2;\n".repeat(500),
+            "    let x = 1;\n".repeat(800),
+            "    let y = 2;\n".repeat(800),
+        );
+        assert!(
+            big_fn.len() > 16 * 1024,
+            "fixture must exceed WHOLE_FILE_THRESHOLD: got {}",
+            big_fn.len()
         );
         let chunks = split_file("abc", "src/lib.rs", &big_fn);
         assert!(chunks.iter().any(|c| matches!(c, Chunk::Symbol { .. })));
@@ -81,9 +86,9 @@ mod tests {
 
     #[test]
     fn utf8_safe_slicing_does_not_panic() {
-        let body = "// 한국어 주석이 잔뜩 들어간 큰 파일\n".repeat(400);
+        let body = "// 한국어 주석이 잔뜩 들어간 큰 파일\n".repeat(800);
         let src = format!("{}\nfn foo() {{\n    let x = 1;\n}}\n", body);
-        assert!(src.len() > 8 * 1024);
+        assert!(src.len() > 16 * 1024);
         // panic 없이 동작하면 통과
         let chunks = split_file("oid", "korean.rs", &src);
         assert!(!chunks.is_empty());
@@ -92,20 +97,38 @@ mod tests {
     #[test]
     fn unsupported_language_large_file_falls_back_to_whole_file() {
         // .java는 supports_symbol_chunking == false. 큰 파일이지만 WholeFile.
-        let src = "class Foo { int x; }\n".repeat(500);
-        assert!(src.len() > 8 * 1024);
+        let src = "class Foo { int x; }\n".repeat(1000);
+        assert!(src.len() > 16 * 1024);
         let chunks = split_file("oid", "Big.java", &src);
         assert_eq!(chunks.len(), 1);
         assert!(matches!(chunks[0], Chunk::WholeFile { .. }));
     }
 
+    #[test]
+    fn twelve_kb_rust_file_stays_whole_file() {
+        let big = format!("fn foo() {{\n{}\n}}\n", "    let x = 1;\n".repeat(800),);
+        assert!(
+            big.len() > 8 * 1024 && big.len() < 16 * 1024,
+            "test fixture sizing: got {}",
+            big.len()
+        );
+        let chunks = split_file("oid", "medium.rs", &big);
+        assert_eq!(chunks.len(), 1, "12KB file should be single WholeFile");
+        assert!(matches!(chunks[0], Chunk::WholeFile { .. }));
+    }
+
     #[test]
     fn symbol_kind_preserved() {
         // Method/Function/Struct 분리 확인용
         let big_fn = format!(
             "struct Foo {{ x: i32 }}\nimpl Foo {{\n    fn bar(&self) {{\n{}\n}}\n}}\nfn top() {{\n{}\n}}",
-            "        let _ = 1;\n".repeat(300),
-            "    let _ = 1;\n".repeat(300),
+            "        let _ = 1;\n".repeat(500),
+            "    let _ = 1;\n".repeat(500),
+        );
+        assert!(
+            big_fn.len() > 16 * 1024,
+            "fixture must exceed WHOLE_FILE_THRESHOLD: got {}",
+            big_fn.len()
         );
         let chunks = split_file("oid", "big.rs", &big_fn);
         let kinds: Vec<SymbolKind> = chunks
diff --git a/src/search/chunk/symbol.rs b/src/search/chunk/symbol.rs
index fede07e..34d8498 100644
--- a/src/search/chunk/symbol.rs
+++ b/src/search/chunk/symbol.rs
@@ -11,6 +11,7 @@ pub enum SymbolKind {
     Struct,
     Enum,
     Trait,
+    TypeAlias,
     Class,
     Other,
 }
@@ -86,6 +87,10 @@ fn build_symbol_span(
                 symbol_node = Some(cap.node);
                 kind = SymbolKind::Trait;
             }
+            "symbol.type" => {
+                symbol_node = Some(cap.node);
+                kind = SymbolKind::TypeAlias;
+            }
             "symbol.class" => {
                 symbol_node = Some(cap.node);
                 kind = SymbolKind::Class;
@@ -125,56 +130,32 @@ fn lang_and_query(language: Language) -> Option<(&'static TsLanguage, &'static Q
 // Language와 Query는 immutable이므로 OnceLock으로 캐싱 — 파일마다 재컴파일 비용 제거.
 fn rust_lang() -> &'static TsLanguage {
     static LANG: OnceLock<TsLanguage> = OnceLock::new();
-    LANG.get_or_init(|| {
-        let raw_fn = tree_sitter_rust::LANGUAGE.into_raw();
-        let raw_ptr = unsafe { raw_fn() };
-        unsafe { TsLanguage::from_raw(raw_ptr as *const _) }
-    })
+    LANG.get_or_init(|| tree_sitter_rust::LANGUAGE.into())
 }
 
 fn python_lang() -> &'static TsLanguage {
     static LANG: OnceLock<TsLanguage> = OnceLock::new();
-    LANG.get_or_init(|| {
-        let raw_fn = tree_sitter_python::LANGUAGE.into_raw();
-        let raw_ptr = unsafe { raw_fn() };
-        unsafe { TsLanguage::from_raw(raw_ptr as *const _) }
-    })
+    LANG.get_or_init(|| tree_sitter_python::LANGUAGE.into())
 }
 
 fn javascript_lang() -> &'static TsLanguage {
     static LANG: OnceLock<TsLanguage> = OnceLock::new();
-    LANG.get_or_init(|| {
-        let raw_fn = tree_sitter_javascript::LANGUAGE.into_raw();
-        let raw_ptr = unsafe { raw_fn() };
-        unsafe { TsLanguage::from_raw(raw_ptr as *const _) }
-    })
+    LANG.get_or_init(|| tree_sitter_javascript::LANGUAGE.into())
 }
 
 fn typescript_lang() -> &'static TsLanguage {
     static LANG: OnceLock<TsLanguage> = OnceLock::new();
-    LANG.get_or_init(|| {
-        let raw_fn = tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into_raw();
-        let raw_ptr = unsafe { raw_fn() };
-        unsafe { TsLanguage::from_raw(raw_ptr as *const _) }
-    })
+    LANG.get_or_init(|| tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into())
 }
 
 fn tsx_lang() -> &'static TsLanguage {
     static LANG: OnceLock<TsLanguage> = OnceLock::new();
-    LANG.get_or_init(|| {
-        let raw_fn = tree_sitter_typescript::LANGUAGE_TSX.into_raw();
-        let raw_ptr = unsafe { raw_fn() };
-        unsafe { TsLanguage::from_raw(raw_ptr as *const _) }
-    })
+    LANG.get_or_init(|| tree_sitter_typescript::LANGUAGE_TSX.into())
 }
 
 fn go_lang() -> &'static TsLanguage {
     static LANG: OnceLock<TsLanguage> = OnceLock::new();
-    LANG.get_or_init(|| {
-        let raw_fn = tree_sitter_go::LANGUAGE.into_raw();
-        let raw_ptr = unsafe { raw_fn() };
-        unsafe { TsLanguage::from_raw(raw_ptr as *const _) }
-    })
+    LANG.get_or_init(|| tree_sitter_go::LANGUAGE.into())
 }
 
 fn rust_query() -> &'static Query {
@@ -222,6 +203,12 @@ const RUST_QUERY: &str = r#"
 ((source_file
    (enum_item name: (type_identifier) @name) @symbol.enum))
 
+((source_file
+   (trait_item name: (type_identifier) @name) @symbol.trait))
+
+((source_file
+   (type_item name: (type_identifier) @name) @symbol.type))
+
 ((source_file
    (impl_item
      (declaration_list
@@ -390,6 +377,40 @@ func Top() {}
         assert!(kinds_names.contains(&(SymbolKind::Method, "Bar")));
     }
 
+    #[test]
+    fn rust_top_level_trait_extracted() {
+        let src = r#"
+trait Greet {
+    fn name(&self) -> &str;
+    fn hello(&self) -> String { String::new() }
+}
+"#;
+        let spans = extract_symbols(src, Language::Rust).unwrap();
+        let has_trait_container = spans
+            .iter()
+            .any(|s| s.kind == SymbolKind::Trait && s.name == "Greet");
+        assert!(
+            has_trait_container,
+            "top-level trait declaration must be extracted as Trait, not only its methods"
+        );
+    }
+
+    #[test]
+    fn rust_top_level_type_alias_extracted() {
+        let src = r#"
+type CommitId = String;
+type Result<T> = std::result::Result<T, MyError>;
+"#;
+        let spans = extract_symbols(src, Language::Rust).unwrap();
+        let names: Vec<_> = spans
+            .iter()
+            .filter(|s| s.kind == SymbolKind::TypeAlias)
+            .map(|s| s.name.as_str())
+            .collect();
+        assert!(names.contains(&"CommitId"));
+        assert!(names.contains(&"Result"));
+    }
+
     #[test]
     fn queries_are_cached() {
         // 두 번 호출해도 같은 포인터를 받아야 함 — 매 호출 재컴파일 없음
diff --git a/src/search/mod.rs b/src/search/mod.rs
index 97b0948..aba7cc0 100644
--- a/src/search/mod.rs
+++ b/src/search/mod.rs
@@ -7,6 +7,7 @@ pub mod modal_state;
 pub mod report;
 pub mod rrf;
 pub mod silence;
+pub mod text_prep;
 pub mod vector;
 
 use std::collections::HashMap;
@@ -206,7 +207,7 @@ impl SearchEngine {
     }
 }
 
-pub const INDEX_VERSION: u32 = 5;
+pub const INDEX_VERSION: u32 = 6;
 pub const INDEX_DIR_NAME: &str = ".glc-index";
 
 #[cfg(test)]
diff --git a/src/search/report/metrics.rs b/src/search/report/metrics.rs
index 57abe8f..80ea6b5 100644
--- a/src/search/report/metrics.rs
+++ b/src/search/report/metrics.rs
@@ -79,8 +79,7 @@ pub fn evaluate(query: &FixtureQuery, results: &[SearchResult]) -> QueryEval {
     }
 
     let n_expected = query.expected.len().max(1);
-    let recall_at_5 = (hit_count_at_5.min(query.expected.len()) as f32)
-        / (query.expected.len().clamp(1, 5) as f32);
+    let recall_at_5 = (hit_count_at_5.min(query.expected.len()) as f32) / (n_expected as f32);
     let recall_at_10 = (hit_count_at_10.min(query.expected.len()) as f32) / (n_expected as f32);
 
     // IDCG@10: min(10, |expected|) 개 위치에 1.0이 이상적으로 배치된 경우.
diff --git a/src/search/text_prep.rs b/src/search/text_prep.rs
new file mode 100644
index 0000000..fcd17f3
--- /dev/null
+++ b/src/search/text_prep.rs
@@ -0,0 +1,92 @@
+/// 식별자/Path 텍스트를 SimpleTokenizer가 단어 단위로 분해할 수 있게 전처리한다.
+///
+/// SimpleTokenizer는 `_`, `/`, `.`, `-` 등 비-alphanumeric 문자에서 자동 분해하지만
+/// camelCase는 인식하지 못한다. 이 함수는 camelCase 경계(소문자 → 대문자, 글자 → 숫자)에
+/// 공백을 삽입해서 `BuildIndex` → `Build Index`, `Rev2` → `Rev 2`로 만든다.
+///
+/// 한글 등 비-ASCII alphabet은 case 개념이 없어 변환되지 않음.
+pub fn split_camel_case(s: &str) -> String {
+    let mut out = String::with_capacity(s.len() + 4);
+    let mut prev_lower_alpha = false;
+    let mut prev_alpha = false;
+    for c in s.chars() {
+        let is_upper = c.is_ascii_uppercase();
+        let is_lower = c.is_ascii_lowercase();
+        let is_digit = c.is_ascii_digit();
+        let is_alpha = is_upper || is_lower;
+        if (is_upper && prev_lower_alpha) || (is_digit && prev_alpha) {
+            out.push(' ');
+        }
+        out.push(c);
+        prev_lower_alpha = is_lower;
+        prev_alpha = is_alpha;
+    }
+    out
+}
+
+/// Path를 단어 후보로 만들기 위해 path separator를 공백으로 치환한 뒤
+/// `split_camel_case`를 적용한다.
+pub fn path_to_terms(path: &str) -> String {
+    let replaced: String = path
+        .chars()
+        .map(|c| {
+            if matches!(c, '/' | '.' | '-' | '_' | '\\') {
+                ' '
+            } else {
+                c
+            }
+        })
+        .collect();
+    split_camel_case(&replaced)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn snake_case_unchanged_by_split() {
+        assert_eq!(split_camel_case("rrf_fuse"), "rrf_fuse");
+        assert_eq!(
+            split_camel_case("build_index_incremental"),
+            "build_index_incremental"
+        );
+    }
+
+    #[test]
+    fn camel_case_split() {
+        assert_eq!(split_camel_case("BuildIndex"), "Build Index");
+        assert_eq!(split_camel_case("ModalState"), "Modal State");
+        assert_eq!(split_camel_case("HTTPServer"), "HTTPServer");
+    }
+
+    #[test]
+    fn mixed_identifier() {
+        assert_eq!(split_camel_case("buildIndexFor"), "build Index For");
+    }
+
+    #[test]
+    fn path_terms_replaces_separators() {
+        assert_eq!(path_to_terms("src/search/rrf.rs"), "src search rrf rs");
+        assert_eq!(path_to_terms("src/git/store.rs"), "src git store rs");
+    }
+
+    #[test]
+    fn path_terms_with_camel_case_file() {
+        assert_eq!(
+            path_to_terms("src/search/ModalState.rs"),
+            "src search Modal State rs"
+        );
+    }
+
+    #[test]
+    fn empty_string() {
+        assert_eq!(split_camel_case(""), "");
+        assert_eq!(path_to_terms(""), "");
+    }
+
+    #[test]
+    fn korean_passthrough() {
+        assert_eq!(split_camel_case("한글이름"), "한글이름");
+    }
+}
diff --git a/src/ui/diff.rs b/src/ui/diff.rs
index 249f8ee..4115660 100644
--- a/src/ui/diff.rs
+++ b/src/ui/diff.rs
@@ -219,6 +219,62 @@ fn render_unified(
     frame.render_widget(paragraph, area);
 }
 
+#[derive(Debug)]
+enum AlignedLine<'a> {
+    Both {
+        old: &'a DiffLine,
+        new: &'a DiffLine,
+    },
+    OldOnly {
+        old: &'a DiffLine,
+    },
+    NewOnly {
+        new: &'a DiffLine,
+    },
+}
+
+fn align_diff_lines(lines: &[DiffLine]) -> Vec<AlignedLine<'_>> {
+    let mut result = Vec::new();
+    let mut i = 0;
+    while i < lines.len() {
+        match &lines[i] {
+            DiffLine::Context { .. } => {
+                result.push(AlignedLine::Both {
+                    old: &lines[i],
+                    new: &lines[i],
+                });
+                i += 1;
+            }
+            _ => {
+                let mut removed = Vec::new();
+                let mut added = Vec::new();
+                while i < lines.len() && !matches!(lines[i], DiffLine::Context { .. }) {
+                    match &lines[i] {
+                        DiffLine::Removed { .. } => removed.push(&lines[i]),
+                        DiffLine::Added { .. } => added.push(&lines[i]),
+                        _ => {}
+                    }
+                    i += 1;
+                }
+                let paired = removed.len().min(added.len());
+                for j in 0..paired {
+                    result.push(AlignedLine::Both {
+                        old: removed[j],
+                        new: added[j],
+                    });
+                }
+                for old_line in removed.iter().skip(paired) {
+                    result.push(AlignedLine::OldOnly { old: old_line });
+                }
+                for new_line in added.iter().skip(paired) {
+                    result.push(AlignedLine::NewOnly { new: new_line });
+                }
+            }
+        }
+    }
+    result
+}
+
 fn render_side_by_side(
     frame: &mut ratatui::Frame,
     area: Rect,
@@ -228,53 +284,27 @@ fn render_side_by_side(
 ) {
     let (left, right) = layout::split_horizontal(area, area.width / 2);
 
-    let old_lines: Vec<Line> = file
-        .lines
-        .iter()
-        .filter(|dl| !matches!(dl, DiffLine::Added { .. }))
-        .map(|dl| {
-            let (prefix, line_no) = match dl {
-                DiffLine::Removed { line_no, .. } => ("-", format!(" {:>4} ", line_no)),
-                DiffLine::Context { old_line_no, .. } => (" ", format!(" {:>4} ", old_line_no)),
-                DiffLine::Added { .. } => unreachable!(),
-            };
-            let content = match dl {
-                DiffLine::Context { content, .. } => content,
-                DiffLine::Removed { content, .. } => content,
-                DiffLine::Added { .. } => unreachable!(),
-            };
-            let style = style_for_line(dl, palette);
-            Line::from(vec![
-                Span::styled(prefix, style),
-                Span::styled(line_no, Style::new().fg(palette.dim)),
-                Span::styled(content.clone(), style),
-            ])
-        })
-        .collect();
+    let aligned = align_diff_lines(&file.lines);
 
-    let new_lines: Vec<Line> = file
-        .lines
-        .iter()
-        .filter(|dl| !matches!(dl, DiffLine::Removed { .. }))
-        .map(|dl| {
-            let (prefix, line_no) = match dl {
-                DiffLine::Added { line_no, .. } => ("+", format!(" {:>4} ", line_no)),
-                DiffLine::Context { new_line_no, .. } => (" ", format!(" {:>4} ", new_line_no)),
-                DiffLine::Removed { .. } => unreachable!(),
-            };
-            let content = match dl {
-                DiffLine::Context { content, .. } => content,
-                DiffLine::Added { content, .. } => content,
-                DiffLine::Removed { .. } => unreachable!(),
-            };
-            let style = style_for_line(dl, palette);
-            Line::from(vec![
-                Span::styled(prefix, style),
-                Span::styled(line_no, Style::new().fg(palette.dim)),
-                Span::styled(content.clone(), style),
-            ])
-        })
-        .collect();
+    let mut old_lines: Vec<Line> = Vec::new();
+    let mut new_lines: Vec<Line> = Vec::new();
+
+    for al in &aligned {
+        match al {
+            AlignedLine::Both { old, new } => {
+                old_lines.push(diff_line_span(old, palette, false));
+                new_lines.push(diff_line_span(new, palette, true));
+            }
+            AlignedLine::OldOnly { old } => {
+                old_lines.push(diff_line_span(old, palette, false));
+                new_lines.push(Line::from(""));
+            }
+            AlignedLine::NewOnly { new } => {
+                old_lines.push(Line::from(""));
+                new_lines.push(diff_line_span(new, palette, true));
+            }
+        }
+    }
 
     let old_widget = Paragraph::new(old_lines)
         .block(
@@ -295,10 +325,212 @@ fn render_side_by_side(
     frame.render_widget(new_widget, right);
 }
 
+fn diff_line_span(dl: &DiffLine, palette: &crate::theme::Palette, is_new: bool) -> Line<'static> {
+    let style = style_for_line(dl, palette);
+    match dl {
+        DiffLine::Context {
+            old_line_no,
+            new_line_no,
+            content,
+        } => {
+            let line_no = if is_new {
+                format!(" {:>4} ", new_line_no)
+            } else {
+                format!(" {:>4} ", old_line_no)
+            };
+            Line::from(vec![
+                Span::styled(" ", style),
+                Span::styled(line_no, Style::new().fg(palette.dim)),
+                Span::styled(content.clone(), style),
+            ])
+        }
+        DiffLine::Removed { line_no, content } => Line::from(vec![
+            Span::styled("-", style),
+            Span::styled(format!(" {:>4} ", line_no), Style::new().fg(palette.dim)),
+            Span::styled(content.clone(), style),
+        ]),
+        DiffLine::Added { line_no, content } => Line::from(vec![
+            Span::styled("+", style),
+            Span::styled(format!(" {:>4} ", line_no), Style::new().fg(palette.dim)),
+            Span::styled(content.clone(), style),
+        ]),
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
 
+    #[test]
+    fn test_align_diff_lines_context_only() {
+        let lines = vec![DiffLine::Context {
+            old_line_no: 1,
+            new_line_no: 1,
+            content: "hello".into(),
+        }];
+        let aligned = align_diff_lines(&lines);
+        assert_eq!(aligned.len(), 1);
+        match &aligned[0] {
+            AlignedLine::Both { old, new } => {
+                assert_eq!(old, new);
+            }
+            _ => panic!("expected Both"),
+        }
+    }
+
+    #[test]
+    fn test_align_diff_lines_added_removed_pairs() {
+        let lines = vec![
+            DiffLine::Removed {
+                line_no: 1,
+                content: "old".into(),
+            },
+            DiffLine::Added {
+                line_no: 1,
+                content: "new".into(),
+            },
+        ];
+        let aligned = align_diff_lines(&lines);
+        assert_eq!(aligned.len(), 1);
+        match &aligned[0] {
+            AlignedLine::Both { old, new } => {
+                match old {
+                    DiffLine::Removed { content, .. } => assert_eq!(content, "old"),
+                    _ => panic!("expected Removed"),
+                }
+                match new {
+                    DiffLine::Added { content, .. } => assert_eq!(content, "new"),
+                    _ => panic!("expected Added"),
+                }
+            }
+            _ => panic!("expected Both"),
+        }
+    }
+
+    #[test]
+    fn test_align_diff_lines_more_added_than_removed() {
+        let lines = vec![
+            DiffLine::Removed {
+                line_no: 1,
+                content: "old".into(),
+            },
+            DiffLine::Added {
+                line_no: 1,
+                content: "new1".into(),
+            },
+            DiffLine::Added {
+                line_no: 2,
+                content: "new2".into(),
+            },
+        ];
+        let aligned = align_diff_lines(&lines);
+        assert_eq!(aligned.len(), 2);
+        match &aligned[0] {
+            AlignedLine::Both { .. } => {}
+            _ => panic!("expected Both at position 0"),
+        }
+        match &aligned[1] {
+            AlignedLine::NewOnly { new } => match new {
+                DiffLine::Added { content, .. } => assert_eq!(content, "new2"),
+                _ => panic!("expected Added"),
+            },
+            _ => panic!("expected NewOnly at position 1"),
+        }
+    }
+
+    #[test]
+    fn test_align_diff_lines_more_removed_than_added() {
+        let lines = vec![
+            DiffLine::Removed {
+                line_no: 1,
+                content: "old1".into(),
+            },
+            DiffLine::Removed {
+                line_no: 2,
+                content: "old2".into(),
+            },
+            DiffLine::Added {
+                line_no: 1,
+                content: "new".into(),
+            },
+        ];
+        let aligned = align_diff_lines(&lines);
+        assert_eq!(aligned.len(), 2);
+        match &aligned[0] {
+            AlignedLine::Both { .. } => {}
+            _ => panic!("expected Both at position 0"),
+        }
+        match &aligned[1] {
+            AlignedLine::OldOnly { old } => match old {
+                DiffLine::Removed { content, .. } => assert_eq!(content, "old2"),
+                _ => panic!("expected Removed"),
+            },
+            _ => panic!("expected OldOnly at position 1"),
+        }
+    }
+
+    #[test]
+    fn test_align_diff_lines_context_surrounding_changes() {
+        let lines = vec![
+            DiffLine::Context {
+                old_line_no: 1,
+                new_line_no: 1,
+                content: "before".into(),
+            },
+            DiffLine::Removed {
+                line_no: 2,
+                content: "old".into(),
+            },
+            DiffLine::Added {
+                line_no: 2,
+                content: "new".into(),
+            },
+            DiffLine::Context {
+                old_line_no: 3,
+                new_line_no: 3,
+                content: "after".into(),
+            },
+        ];
+        let aligned = align_diff_lines(&lines);
+        assert_eq!(aligned.len(), 3);
+        match &aligned[0] {
+            AlignedLine::Both { .. } => {}
+            _ => panic!("expected Both for context line"),
+        }
+        match &aligned[1] {
+            AlignedLine::Both { .. } => {}
+            _ => panic!("expected Both for paired change"),
+        }
+        match &aligned[2] {
+            AlignedLine::Both { .. } => {}
+            _ => panic!("expected Both for context line"),
+        }
+    }
+
+    #[test]
+    fn test_diff_line_span_context_uses_correct_line_no() {
+        let palette = crate::theme::Palette::plain();
+        let dl = DiffLine::Context {
+            old_line_no: 10,
+            new_line_no: 20,
+            content: "ctx".into(),
+        };
+        let old_span = diff_line_span(&dl, &palette, false);
+        let new_span = diff_line_span(&dl, &palette, true);
+        let old_text: String = old_span.spans.iter().map(|s| s.content.as_ref()).collect();
+        let new_text: String = new_span.spans.iter().map(|s| s.content.as_ref()).collect();
+        assert!(
+            old_text.contains("10"),
+            "old side should use old_line_no: {}",
+            old_text
+        );
+        assert!(
+            new_text.contains("20"),
+            "new side should use new_line_no: {}",
+            new_text
+        );
+    }
+
     #[test]
     fn test_visible_tabs_empty() {
         let (offset, visible) = visible_tabs(&[], 0, 80);
diff --git a/tests/fixtures/search_queries.toml b/tests/fixtures/search_queries.toml
index 0058208..1676b89 100644
--- a/tests/fixtures/search_queries.toml
+++ b/tests/fixtures/search_queries.toml
@@ -4,7 +4,6 @@
 [[query]]
 text = "incremental indexing fallback"
 expected = [
-    { path = "src/search/diff.rs" },
     { path = "src/search/indexer.rs", kind = "Symbol", title = "build_index_incremental" },
 ]
 
@@ -42,4 +41,5 @@ expected = [
 text = "git revwalk topological commit"
 expected = [
     { path = "src/git/store.rs" },
+    { path = "src/git/commit.rs" },
 ]
diff --git "a/\352\262\200\354\203\211 \355\222\210\354\247\210 \353\246\254\355\217\254\355\212\270 \355\225\231\354\212\265 \352\260\200\354\235\264\353\223\234.md" "b/\352\262\200\354\203\211 \355\222\210\354\247\210 \353\246\254\355\217\254\355\212\270 \355\225\231\354\212\265 \352\260\200\354\235\264\353\223\234.md"
new file mode 100644
index 0000000..bc10500
--- /dev/null
+++ "b/\352\262\200\354\203\211 \355\222\210\354\247\210 \353\246\254\355\217\254\355\212\270 \355\225\231\354\212\265 \352\260\200\354\235\264\353\223\234.md"	
@@ -0,0 +1,281 @@
+---
+aliases: [검색 품질 리포트, glc report]
+tags:
+  - study
+  - information-retrieval
+  - search-quality
+  - metrics
+  - gluck
+---
+
+# `glc report` 검색 품질 리포트 학습 가이드
+
+> 원문: `/Users/dp/Repository/gluck/result.md` (gluck v0.9.1, 2026-05-26 생성)
+
+---
+
+## 한줄 요약
+
+`glc report`는 7개의 고정 검색 쿼리(fixture)에 대해 gluck의 하이브리드 검색 엔진(BM25 + Vector + RRF)이 얼마나 "정답 문서"를 잘 찾고 얼마나 빠르게 응답하는지를 정보 검색(IR, Information Retrieval) 표준 지표로 측정한 회귀(regression) 추적 리포트입니다.
+
+---
+
+## 1. 왜 이것이 필요한가?
+
+semantic search(의미 기반 검색)는 "동작은 하지만 얼마나 잘 되는지" 직관적으로 알기 어렵습니다. 임베딩 모델을 바꾸거나, BM25 토크나이저를 변경하거나, RRF 가중치를 조정하면 결과가 더 좋아진 건지 나빠진 건지 눈으로는 판단이 안 됩니다.
+
+해결책은 **객관적 메트릭**입니다:
+
+- **정답이 정해진 쿼리 세트**(`tests/fixtures/search_queries.toml`)를 준비
+- 검색을 실행한 뒤 정답이 결과 상위 몇 등에 들어왔는지 자동으로 채점
+- 변경 전후 점수를 비교해서 회귀(점수 하락)를 잡아냄
+
+성능 측면도 같습니다. p50만 보면 평균은 빠른데 1%가 매우 느린 "롱테일(long-tail) 지연" 문제를 놓칩니다. p95, p99까지 보면 사용자가 실제로 체감하는 최악의 경험을 알 수 있습니다.
+
+즉 이 리포트는 **"검색이 점점 나아지고 있는가?"** 라는 질문에 숫자로 답하기 위해 존재합니다.
+
+---
+
+## 2. 근본적인 동작 원리
+
+### 2.1 평가 파이프라인
+
+```
+search_queries.toml (7개 쿼리 + 기대 정답)
+        │
+        ▼
+  SearchEngine.search(query, k=10)
+        │
+        ▼
+  결과 10개 (각각 path/symbol)
+        │
+        ▼
+  ┌─────────────────────────────────┐
+  │ 채점기                          │
+  │  - 정답 path와 매칭되는 첫 hit  │
+  │  - hit rank 계산                │
+  │  - MRR/R@5/R@10/NDCG@10 산출    │
+  └─────────────────────────────────┘
+        │
+        ▼
+  쿼리별 결과 + 전체 평균
+```
+
+### 2.2 채점에 쓰이는 4가지 IR 메트릭
+
+#### MRR (Mean Reciprocal Rank, 평균 역순위)
+
+```
+정답이 1등 → 1/1 = 1.000
+정답이 2등 → 1/2 = 0.500
+정답이 6등 → 1/6 = 0.167
+정답이 10등 → 1/10 = 0.100
+정답 없음   → 0.000
+```
+
+- 첫 번째 정답이 **얼마나 위에 있는지**만 본다.
+- 1.0에 가까울수록 좋음. 0.5면 평균 2등.
+- **단점**: 정답이 여러 개여도 첫 번째만 점수에 반영됨.
+
+#### Recall@k (재현율)
+
+```
+Recall@k = (상위 k개에 들어온 정답 수) / (전체 정답 수)
+```
+
+- "상위 k개 결과 안에 정답을 몇 % 회수했는가?"
+- Recall@5 = 0.286 → 정답의 28.6%만 상위 5위 안에 잡힘.
+- Recall@10이 Recall@5보다 항상 같거나 큼 (k를 늘리면 더 많이 잡힘).
+
+#### NDCG@10 (Normalized Discounted Cumulative Gain)
+
+```
+DCG@10  = Σ (rel_i / log2(i+1))   i=1..10
+IDCG@10 = 이상적으로 정답이 1,2,3... 순서로 나왔을 때의 DCG
+NDCG    = DCG / IDCG               (0~1로 정규화)
+```
+
+- 직관: **정답이 위쪽에 있을수록 가산점**, 아래쪽일수록 점수가 깎임.
+- MRR이 "첫 정답 1개만 본다"면, NDCG는 "상위 10개의 순위 품질을 종합 평가".
+- **추천 시스템·검색 평가의 표준 지표**.
+
+#### Hit Rank (정답의 첫 등장 순위)
+
+- 단순히 정답이 결과의 몇 번째에 처음 등장했는지.
+- `—` 표시는 상위 10개 안에 정답이 전혀 없음.
+
+### 2.3 좋고 나쁨의 기준선
+
+| 지표 | 매우 좋음 | 양호 | 개선 필요 | 현재 |
+|------|-----------|------|-----------|------|
+| MRR | > 0.7 | 0.5~0.7 | < 0.3 | **0.330** |
+| Recall@10 | > 0.9 | 0.7~0.9 | < 0.5 | **0.571** |
+| NDCG@10 | > 0.7 | 0.5~0.7 | < 0.4 | **0.384** |
+
+> ⚠️ 절대적 기준은 도메인마다 다릅니다. 학술 IR 벤치마크(MS MARCO 등)에서는 NDCG@10 0.4도 SOTA에 가까운 경우가 있고, 사내 코드 검색처럼 쿼리/정답이 깔끔하면 0.8 이상도 가능합니다. gluck의 7개 쿼리는 표본이 작아서 1개 쿼리당 점수 변동이 크다는 점도 감안해야 합니다.
+
+### 2.4 레이턴시 메트릭 (p50/p95/p99 + QPS)
+
+```
+warmup=3      → 측정 전에 3번 돌려서 캐시/JIT 워밍업
+iters=10      → 측정용 실행 10회
+```
+
+쿼리 10회 실행 → 응답 시간을 정렬:
+
+```
+[0.03, 0.03, 0.04, 0.04, 0.04, 0.04, 0.04, 0.05, 0.05, 0.05] ms
+                              ↑                    ↑     ↑
+                              p50 (5번째)         p95   p99
+```
+
+- **p50 (median)**: 절반의 쿼리가 이 시간 이하. 평균적 사용자가 체감하는 속도.
+- **p95**: 100번 중 95번은 이 시간 이하. 5%의 느린 쿼리가 시작되는 지점.
+- **p99**: 100번 중 99번은 이 시간 이하. 최악 1%의 꼬리(tail).
+- **mean (평균)**: 모든 값을 합쳐 나눔. 이상치(outlier)에 흔들림.
+- **QPS (Queries Per Second)**: 1초에 처리 가능한 쿼리 수. `1 / mean_latency_sec`로 산출.
+
+리포트에 별표(`*`)가 붙은 이유: iters=10이라는 적은 표본에서 p99는 통계적으로 신뢰하기 어려워서, "표본 최댓값" 정도로 근사한 값이라는 면책 조항입니다. 실무에서 진짜 p99를 보려면 최소 수천 번 측정이 필요합니다.
+
+현재 수치는 **p50 0.04ms, QPS 22,578**로, 검색 자체는 압도적으로 빠릅니다. 코드 검색용으로는 사실상 무시할 수준의 지연. 다만 이건 **이미 인덱스가 메모리에 올라온 상태에서의 쿼리 시간**이고, 인덱스 빌드 시간은 별개입니다.
+
+### 2.5 인덱스 구성요소 해부
+
+리포트의 `## Index` 섹션은 검색 엔진이 어떻게 만들어졌는지의 스냅샷입니다.
+
+```
+Embedding:      minishlab/potion-multilingual-128M (256-dim)
+BM25 tokenizer: ngram_2_2
+Vector backend: turboquant_4bit
+Docs:           Commit=232, File=63, Symbol=178
+```
+
+| 키워드 | 의미 |
+|--------|------|
+| **potion-multilingual-128M** | HuggingFace의 model2vec 계열 다국어 임베딩 모델. 128M 토큰 학습. 텍스트를 의미 벡터로 변환. |
+| **256-dim** | 임베딩 벡터의 차원 수. 차원이 클수록 표현력↑ / 저장공간·연산비용↑. 256은 경량 모델 기준 합리적 수준. |
+| **BM25** | 1990년대부터 검증된 전통적 키워드 기반 랭킹. 단어 빈도(TF) × 역문서빈도(IDF)로 점수. |
+| **ngram_2_2** | tantivy의 토크나이저 설정. 글자 단위 2-gram(2글자씩 쪼개기). 예: "tantivy" → [ta, an, nt, ti, iv, vy]. 부분 매칭과 다국어/오타에 강함. |
+| **turboquant_4bit** | turbovec 라이브러리의 4비트 양자화 ANN(근사 최근접 이웃) 백엔드. float32 벡터를 4비트로 압축해 저장공간 1/8, 검색 속도↑ (약간의 정확도 손실 감수). |
+| **Commit/File/Symbol** | 인덱싱 단위(Chunk). 커밋 메시지 232개, 파일 63개, 코드 심볼(함수/메서드) 178개 → 총 473 docs. |
+
+### 2.6 BM25 + Vector + RRF 하이브리드 검색
+
+```
+쿼리 "tantivy delete_term"
+        │
+        ├──────────────┬─────────────┐
+        ▼              ▼             │
+   BM25 검색      Vector 검색        │
+   (키워드 매칭)  (의미 유사도)      │
+        │              │             │
+   상위 10개      상위 10개          │
+        │              │             │
+        └──────┬───────┘             │
+               ▼                     │
+   RRF (Reciprocal Rank Fusion)      │
+   score = Σ 1/(k + rank_i)          │
+   (k=60 일반적)                     │
+               │                     │
+               ▼                     │
+        최종 상위 10개 ◄─────────────┘
+```
+
+- BM25는 **정확한 단어**가 들어간 문서에 강함 → "delete_term" 같은 식별자 검색에 유리.
+- Vector는 **의미가 비슷한** 문서에 강함 → "어떻게 인덱스에서 문서를 지우지?"에서도 `delete_doc` 함수를 찾아낼 수 있음.
+- RRF는 두 결과를 순위 기반으로 합친다(점수 스케일이 달라도 OK).
+
+---
+
+## 3. 핵심 개념 정리
+
+### 3.1 쿼리별 결과 해석
+
+| # | 쿼리 | 결과 | 해석 |
+|---|------|------|------|
+| 1 | incremental indexing fallback | **0점** | 정답이 `src/search/diff.rs` + `build_index_incremental` 심볼인데 상위 10개에 없음. 쿼리 어휘와 코드 명명이 어긋남(코드에는 "fallback"이라는 단어가 없을 가능성). |
+| 2 | tantivy delete_term | **만점** | 정확한 식별자 매칭. BM25가 압도적으로 잘 잡는 케이스. |
+| 3 | RRF reciprocal rank fusion | **0점** | 정답 `src/search/rrf.rs`가 존재하는데도 못 찾음. 의외. 파일 내용이 매우 짧거나 키워드가 변형돼 있을 가능성. |
+| 4 | embedding model load potion | **만점** | "potion"이라는 희귀 키워드가 강한 신호. |
+| 5 | search modal state machine | **0점** | fixture가 `src/search/modal_state.rs`를 정답으로 적었는데, 실제 코드는 `src/search/modal.rs`. **정답 데이터 오류**일 가능성이 가장 큼. |
+| 6 | tree sitter highlight configuration | **부분 성공** | hit rank 6 → MRR 0.167. R@10=1.0이라 결국 찾기는 함. 상위에 노이즈 5개가 끼어있다는 뜻. |
+| 7 | git revwalk topological commit | **부분 성공** | hit rank 7. 위와 동일 패턴. |
+
+### 3.2 이 리포트가 알려주는 진짜 메시지
+
+집계 점수 MRR 0.33은 평균이지만, 자세히 보면 **2개는 만점, 3개는 0점, 2개는 6~7위**라는 양극화 구조입니다. 평균보다 분산이 더 중요합니다.
+
+- **만점 2개**: 희귀하고 정확한 식별자(`delete_term`, `potion`)는 잘 잡음 → BM25가 일하고 있다.
+- **0점 3개**: 자연어식 쿼리("fallback", "state machine", "fusion")에서 약함 → 임베딩 모델이 코드 시맨틱을 충분히 잡지 못하거나, fixture 정답 정의가 너무 좁다.
+- **6~7위 2개**: 정답은 인덱스에 있고 검색도 잡아내지만 상위로 못 끌어올림 → RRF 가중치 / 청크 분할 정책 튜닝 여지.
+
+### 3.3 실무 활용 흐름
+
+```
+1. main 브랜치에서 `glc report --out baseline.md`
+2. 검색 관련 변경 (예: 임베딩 모델 교체) 후 `glc report --out candidate.md`
+3. baseline vs candidate의 MRR/NDCG 비교
+4. 점수 하락 시 → 회귀로 간주, 변경 재검토
+5. 점수 상승 시 → 머지
+```
+
+이런 자동화된 평가 루프가 있어야 검색 품질이 **체계적으로** 좋아질 수 있습니다.
+
+---
+
+## 4. 이해도 확인 Q&A
+
+### Q1. MRR이 0.33인데 NDCG@10은 0.384로 더 높습니다. 어떻게 NDCG가 MRR보다 클 수 있나요?
+
+**A**: 두 메트릭은 다른 것을 측정합니다. MRR은 **첫 번째 정답의 순위만** 보는 반면, NDCG는 상위 10개 안의 **모든 정답 위치를 가중치(로그 감쇠)로 합산**합니다. Q6, Q7처럼 정답이 6~7위에 등장하면 MRR에는 1/6=0.167, 1/7=0.143만 기여하지만, NDCG는 그 외에도 정답이 들어있다면 추가 점수를 더 받습니다. 또 NDCG는 정규화(IDCG로 나눔)되어 상대 점수가 부풀려 보일 수 있습니다. 둘이 항상 한 방향으로 움직이지는 않습니다.
+
+### Q2. 왜 p50, p95, p99를 따로 보나요? 평균(mean)만으로는 부족한가요?
+
+**A**: 평균은 이상치(outlier)에 흔들립니다. 100개 쿼리 중 99개가 1ms에 끝나고 1개가 1000ms 걸려도 평균은 약 11ms로 "그럭저럭 빠른" 것처럼 보이지만, 실제로는 1%의 사용자가 처참한 경험을 합니다. p50은 일반적 경험, p95/p99는 **꼬리 지연(tail latency)**을 드러냅니다. 검색·웹 API처럼 사용자가 직접 체감하는 시스템에서는 평균보다 p95/p99가 더 중요합니다.
+
+### Q3. Recall@10 = 0.571인데 NDCG@10 = 0.384입니다. Recall이 더 높다는 게 무슨 의미인가요?
+
+**A**: 상위 10개 안에 정답의 **57.1%는 들어와 있다**는 뜻입니다. 즉 "찾을 수는 있다". 그런데 NDCG는 더 낮다 = **정답이 상위에 있지 않고 아래쪽에 깔려있다**는 신호입니다. Q6, Q7이 전형적인 예: 정답이 결국 잡히긴 하지만 6~7위라 사용자가 한참 스크롤해야 합니다. **개선 방향이 명확**해집니다: "검색 자체를 더 잘 시키는" 것보다 "랭킹을 끌어올리는" 작업이 우선.
+
+### Q4. Q5 "search modal state machine"이 0점인 이유는 검색 엔진이 나빠서인가요?
+
+**A**: 거의 확실히 **fixture 데이터 오류**입니다. fixture는 정답을 `src/search/modal_state.rs`로 적어놨지만, 실제 코드베이스(CLAUDE.md 참조)에는 `src/search/modal.rs`만 존재합니다. 검색 엔진이 `modal.rs`를 잘 찾아도 채점기는 `modal_state.rs`가 아니라며 0점을 매깁니다. 이런 케이스를 잡아내는 것도 리포트의 가치입니다 — **메트릭이 갑자기 0이 되면 검색 회귀가 아니라 fixture가 stale해진 것일 수 있다**는 걸 의심해야 합니다.
+
+### Q5. BM25와 Vector 검색을 따로 쓰지 않고 RRF로 합치는 이유는?
+
+**A**: 둘은 서로 다른 약점을 가집니다. BM25는 **어휘 정확성**에 강하지만 동의어/패러프레이즈에 약합니다("delete_term"은 잡지만 "remove entry"는 못 잡음). Vector는 **의미 유사성**에 강하지만 희귀 식별자에 약합니다("potion"이라는 단어를 학습 데이터에서 본 적 없으면 의미 벡터가 부정확). RRF는 두 시스템의 순위만 합치므로 점수 스케일 차이를 고민할 필요가 없고, "둘 다 상위에 올린 문서"가 자연스럽게 부상합니다. Q2의 `delete_term`은 BM25가 끌어올렸고, 자연어 쿼리는 Vector가 끌어올리는 식으로 상호보완합니다.
+
+### Q6. ngram_2_2 토크나이저는 왜 단어 단위가 아니라 2글자씩 쪼개나요?
+
+**A**: 세 가지 이유. (1) **다국어 대응**: 한국어/일본어/중국어는 띄어쓰기 기반 토크나이저가 잘 안 먹지만 글자 ngram은 언어 중립적. (2) **부분 매칭**: "tantivy"를 검색할 때 사용자가 "tanti"만 쳐도 매칭됨. (3) **오타 내성**: 한두 글자 틀려도 대부분의 ngram은 여전히 매칭. 단점은 인덱스 크기가 커지고, 너무 짧은 ngram은 노이즈를 늘릴 수 있다는 것. 2-2(최소 2글자, 최대 2글자)는 균형점.
+
+### Q7. turboquant_4bit이 정확도를 떨어뜨린다면 왜 쓰나요?
+
+**A**: 트레이드오프입니다. float32 벡터를 4비트로 양자화하면 **저장 용량이 1/8로 줄고, 메모리 대역폭이 감소해 ANN 검색이 크게 빨라집니다**. 정확도 손실은 일반적으로 1~3% 수준(Recall 측면)이라 코드 검색처럼 "완벽한 1등"보다 "상위 10개"가 중요한 도메인에서는 충분히 허용됩니다. gluck처럼 로컬 CLI에서 사용자 노트북에 인덱스가 올라가야 하는 환경에서는 용량 절약이 결정적입니다 (현재 473 docs에 432KB).
+
+### Q8. 만약 임베딩 모델을 교체했더니 MRR이 0.33 → 0.45로 올랐다면 그냥 머지하면 되나요?
+
+**A**: 신중해야 합니다. 표본이 7개 쿼리뿐이라 **통계적 신뢰도가 낮습니다**. 1~2개 쿼리가 우연히 잘 잡혀도 평균이 크게 흔들립니다. 진짜 개선인지 확인하려면 (1) fixture를 30개 이상으로 확장, (2) per-query diff를 보고 어떤 쿼리가 좋아졌는지 추적, (3) 인덱스 크기/빌드 시간/메모리 사용량 같은 부수 지표 동시 확인, (4) 실제 손으로 몇 번 써보고 체감 확인. 메트릭은 **방향성**의 신호지 단독 결정 근거가 아닙니다.
+
+---
+
+## 5. 더 알아보기
+
+### 핵심 키워드
+- **Information Retrieval (IR)**: 검색 시스템 평가 전반의 학문 분야
+- **Learning to Rank**: 머신러닝으로 검색 결과 순서를 학습시키는 기법
+- **Cranfield paradigm**: 정답 라벨링된 쿼리 세트로 검색을 평가하는 고전적 방법론 (이 리포트가 이 방식)
+- **TREC**: 텍스트 검색 평가의 대표 학술 벤치마크 — MRR/NDCG 표준이 여기서 정착됨
+
+### 관련 코드
+- `src/search/mod.rs`: SearchEngine 구조 (BM25 + Vector + RRF 결합)
+- `src/search/rrf.rs`: RRF 알고리즘 구현
+- `src/search/bm25.rs`: tantivy 기반 BM25 인덱스
+- `src/search/vector.rs`: turbovec 기반 벡터 검색
+- `tests/fixtures/search_queries.toml`: 평가용 쿼리/정답 세트
+
+### 다음 단계로 좋은 작업
+1. fixture를 7개 → 20~30개로 늘려 통계 신뢰도 확보
+2. Q5의 잘못된 정답 경로 수정 (`modal_state.rs` → `modal.rs`)
+3. Q1, Q3, Q6, Q7이 왜 약한지 분석 → 청크 분할 정책 또는 RRF k 파라미터 조정 실험
+4. 임베딩 모델 후보(potion 외 다른 모델) 벤치마킹 시 이 리포트로 A/B 비교