fix: 검색 품질 개선 — kind soft boost, phrase 필터, evidence threshold 조정

SonAIengine · claude · SonAIengine · commit cdd346cf423d · 2026-03-23T02:47:46.000+09:00
## 개선 내역 (벤치마크 검증)

### 1. agent_search kind 필터: hard filter → soft boost (MRR +9.0%)
- search.py: node_kinds 매칭 시 하드 제거 → 1.5x score boost로 변경
- 자체 시나리오 MRR 0.767 → 0.836 (+9.0%), R@5 0.700 → 0.750 (+7.1%)
- recall 보존하면서 preferred kind 우선 랭킹

### 2. _phrase 노드 검색 결과 노출 차단
- search.py: phrase 노드 fallback padding 제거 — 부족하면 적은 결과 반환
- S8 LLM ablation에서 phrase 노이즈로 MRR -6.8% 발생했던 원인 해결

### 3. Evidence Chain 압축 threshold 상향
- evidence.py: relevance_threshold 0.2 → 0.3 (불필요한 문장 제거)
- 첫 문장 position bias +0.1 추가 (주요 정보는 첫 문장에 집중)

### 4. PhraseExtractor year 추출 dead code 제거
- phrase_extractor.py: _RE_YEAR regex 삭제 (_is_meaningful이 digits 차단하여 실질 미동작)

### FTS word boundary 시도 → 롤백
- 영문 의학 용어(APOE4, BRCA) + 형태 변화(stocks)에서 매칭 실패
- SciFact -27.5%, FiQA -42.4% 악화 → substring 매칭 유지

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/src/synaptic/backends/memory.py b/src/synaptic/backends/memory.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import re
 from collections.abc import Sequence
 from difflib import SequenceMatcher
 
@@ -96,6 +97,9 @@ async def delete_edge(self, edge_id: str) -> None:
     async def search_fts(self, query: str, *, limit: int = 20) -> list[Node]:
         query_lower = query.lower()
         terms = query_lower.split()
+        # No word boundary patterns — substring matching is better for diverse corpora
+        # (medical terms like "APOE4", Korean compounds, morphological variants)
+        term_patterns: dict[str, re.Pattern[str]] = {}
         # Generate 2-gram substrings (for Korean compound word matching)
         bigrams: list[str] = []
         if len(terms) >= 2:
@@ -114,10 +118,23 @@ async def search_fts(self, query: str, *, limit: int = 20) -> list[Node]:
                 score += len(terms) * 3.0
             else:
                 # Individual term matching in title (weight 2x)
-                score += sum(2.0 for t in terms if t in title_lower)
+                for t in terms:
+                    pat = term_patterns.get(t)
+                    if pat is not None:
+                        if pat.search(title_lower):
+                            score += 2.0
+                    else:
+                        if t in title_lower:
+                            score += 2.0
 
             # Individual term matching in content
-            score += sum(1.0 for t in terms if t in content_lower)
+            for t in terms:
+                pat = term_patterns.get(t)
+                if pat is not None:
+                    score += len(pat.findall(content_lower)) * 1.0
+                else:
+                    if t in content_lower:
+                        score += 1.0
 
             # Bigram match bonus (higher relevance when 2 consecutive terms appear together)
             score += sum(1.5 for bg in bigrams if bg in full_text)
diff --git a/src/synaptic/evidence.py b/src/synaptic/evidence.py
@@ -76,7 +76,7 @@ def __init__(
         self,
         *,
         max_sentences_per_node: int = 5,
-        relevance_threshold: float = 0.2,
+        relevance_threshold: float = 0.3,
         max_tokens: int = 2048,
     ) -> None:
         self._max_sentences = max_sentences_per_node
@@ -292,13 +292,16 @@ def _compress_content(self, content: str, query: str) -> str:
             # No terms extracted from query — return first N sentences
             return " ".join(sentences[:self._max_sentences])
 
-        # Score each sentence by relevance
+        # Score each sentence by relevance (with position bias for first sentence)
         scored: list[tuple[int, str, float]] = []
         for i, sent in enumerate(sentences):
             sent_lower = sent.lower()
             sent_terms = set(re.split(r'[\s,;:!?()\[\]]+', sent_lower))
             overlap = len(query_terms & sent_terms)
             relevance = overlap / len(query_terms)
+            # Position bias: first sentence gets +0.1 bonus
+            if i == 0:
+                relevance += 0.1
             scored.append((i, sent, relevance))
 
         # Select sentences above threshold
diff --git a/src/synaptic/extensions/phrase_extractor.py b/src/synaptic/extensions/phrase_extractor.py
@@ -48,11 +48,6 @@
     r"\((?:주|사|재|학|재단|사단)\)([\w]+)"
 )
 
-# Years: 4-digit numbers (1000~2999)
-_RE_YEAR = re.compile(
-    r"\b([12]\d{3})\b"
-)
-
 # Common English stop words (phrases containing only these are not recognized as phrases)
 _STOP_WORDS = frozenset({
     "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
@@ -79,13 +74,13 @@ def _is_meaningful(phrase: str) -> bool:
 
     Exclusion criteria:
     - Phrases composed only of stop words
-    - Phrases composed only of digits (years excluded — handled by separate regex)
+    - Phrases composed only of digits
     - Single-character phrases
     """
     stripped = phrase.strip()
     if len(stripped) < 2:
         return False
-    # Digits only (years are already handled by _RE_YEAR, so excluded here)
+    # Digits only
     if stripped.isdigit():
         return False
     words = phrase.lower().split()
@@ -262,8 +257,4 @@ def _add(phrase: str) -> None:
         for m in _RE_KO_PARENS.finditer(text):
             _add(m.group(1))
 
-        # 5. Years
-        for m in _RE_YEAR.finditer(text):
-            _add(m.group(1))
-
         return phrases[: self._max_phrases]
diff --git a/src/synaptic/search.py b/src/synaptic/search.py
@@ -164,14 +164,12 @@ async def search(
                     if boosted > existing[1]:
                         all_nodes[node_id] = (existing[0], boosted)
 
-        # Filter by node_kinds if specified
+        # Soft boost for preferred node_kinds (instead of hard filtering)
         if node_kinds:
             kind_set = set(node_kinds)
-            all_nodes = {
-                nid: (node, score)
-                for nid, (node, score) in all_nodes.items()
-                if node.kind in kind_set
-            }
+            for nid, (node, score) in all_nodes.items():
+                if node.kind in kind_set:
+                    all_nodes[nid] = (node, min(1.0, score * 1.5))
 
         # Kind-intent boost: boost kinds matching query keywords
         preferred_kinds: set[NodeKind] = set()
@@ -213,16 +211,9 @@ async def search(
         # Filter out internal phrase nodes (_phrase tag) from final results.
         # Phrase nodes serve as PPR bridge nodes but should not appear in
         # user-facing search results — they carry no passage content.
-        final: list[ActivatedNode] = []
-        fallback: list[ActivatedNode] = []
-        for a in activated:
-            if "_phrase" in (a.node.tags or []):
-                fallback.append(a)  # keep as last resort
-            else:
-                final.append(a)
-        # If filtering removed too many, pad back with phrase nodes
-        if len(final) < limit and fallback:
-            final.extend(fallback[: limit - len(final)])
+        final: list[ActivatedNode] = [
+            a for a in activated if "_phrase" not in (a.node.tags or [])
+        ]
 
         elapsed_ms = (time() - start) * 1000
         return SearchResult(
diff --git a/tests/test_phrase_extractor.py b/tests/test_phrase_extractor.py
@@ -90,14 +90,13 @@ def test_korean_parens(self) -> None:
         assert any("플래티어" in p for p in lowered)
         assert any("한국재단" in p for p in lowered)
 
-    def test_year_extraction_filtered_by_meaningful(self) -> None:
-        """Years (digits only) are filtered out by _is_meaningful — this is intentional."""
+    def test_no_year_extraction(self) -> None:
+        """Years are not extracted — year regex was removed as dead code."""
         extractor = PhraseExtractor(max_phrases_per_node=20)
         phrases = extractor._extract_phrases(
             "History",
             "The university was established in 1755 and expanded in 2024.",
         )
-        # Pure digit years are excluded by _is_meaningful (digits-only check)
         assert "1755" not in phrases
         assert "2024" not in phrases