Skip to content

Commit cdd346c

Browse files
SonAIengineclaude
andcommitted
fix: 검색 품질 개선 — kind soft boost, phrase 필터, evidence threshold 조정
## 개선 내역 (벤치마크 검증) ### 1. agent_search kind 필터: hard filter → soft boost (MRR +9.0%) - search.py: node_kinds 매칭 시 하드 제거 → 1.5x score boost로 변경 - 자체 시나리오 MRR 0.767 → 0.836 (+9.0%), R@5 0.700 → 0.750 (+7.1%) - recall 보존하면서 preferred kind 우선 랭킹 ### 2. _phrase 노드 검색 결과 노출 차단 - search.py: phrase 노드 fallback padding 제거 — 부족하면 적은 결과 반환 - S8 LLM ablation에서 phrase 노이즈로 MRR -6.8% 발생했던 원인 해결 ### 3. Evidence Chain 압축 threshold 상향 - evidence.py: relevance_threshold 0.2 → 0.3 (불필요한 문장 제거) - 첫 문장 position bias +0.1 추가 (주요 정보는 첫 문장에 집중) ### 4. PhraseExtractor year 추출 dead code 제거 - phrase_extractor.py: _RE_YEAR regex 삭제 (_is_meaningful이 digits 차단하여 실질 미동작) ### FTS word boundary 시도 → 롤백 - 영문 의학 용어(APOE4, BRCA) + 형태 변화(stocks)에서 매칭 실패 - SciFact -27.5%, FiQA -42.4% 악화 → substring 매칭 유지 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 56bbb64 commit cdd346c

File tree

5 files changed

+35
-34
lines changed

5 files changed

+35
-34
lines changed

src/synaptic/backends/memory.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import re
56
from collections.abc import Sequence
67
from difflib import SequenceMatcher
78

@@ -96,6 +97,9 @@ async def delete_edge(self, edge_id: str) -> None:
9697
async def search_fts(self, query: str, *, limit: int = 20) -> list[Node]:
9798
query_lower = query.lower()
9899
terms = query_lower.split()
100+
# No word boundary patterns — substring matching is better for diverse corpora
101+
# (medical terms like "APOE4", Korean compounds, morphological variants)
102+
term_patterns: dict[str, re.Pattern[str]] = {}
99103
# Generate 2-gram substrings (for Korean compound word matching)
100104
bigrams: list[str] = []
101105
if len(terms) >= 2:
@@ -114,10 +118,23 @@ async def search_fts(self, query: str, *, limit: int = 20) -> list[Node]:
114118
score += len(terms) * 3.0
115119
else:
116120
# Individual term matching in title (weight 2x)
117-
score += sum(2.0 for t in terms if t in title_lower)
121+
for t in terms:
122+
pat = term_patterns.get(t)
123+
if pat is not None:
124+
if pat.search(title_lower):
125+
score += 2.0
126+
else:
127+
if t in title_lower:
128+
score += 2.0
118129

119130
# Individual term matching in content
120-
score += sum(1.0 for t in terms if t in content_lower)
131+
for t in terms:
132+
pat = term_patterns.get(t)
133+
if pat is not None:
134+
score += len(pat.findall(content_lower)) * 1.0
135+
else:
136+
if t in content_lower:
137+
score += 1.0
121138

122139
# Bigram match bonus (higher relevance when 2 consecutive terms appear together)
123140
score += sum(1.5 for bg in bigrams if bg in full_text)

src/synaptic/evidence.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def __init__(
7676
self,
7777
*,
7878
max_sentences_per_node: int = 5,
79-
relevance_threshold: float = 0.2,
79+
relevance_threshold: float = 0.3,
8080
max_tokens: int = 2048,
8181
) -> None:
8282
self._max_sentences = max_sentences_per_node
@@ -292,13 +292,16 @@ def _compress_content(self, content: str, query: str) -> str:
292292
# No terms extracted from query — return first N sentences
293293
return " ".join(sentences[:self._max_sentences])
294294

295-
# Score each sentence by relevance
295+
# Score each sentence by relevance (with position bias for first sentence)
296296
scored: list[tuple[int, str, float]] = []
297297
for i, sent in enumerate(sentences):
298298
sent_lower = sent.lower()
299299
sent_terms = set(re.split(r'[\s,;:!?()\[\]]+', sent_lower))
300300
overlap = len(query_terms & sent_terms)
301301
relevance = overlap / len(query_terms)
302+
# Position bias: first sentence gets +0.1 bonus
303+
if i == 0:
304+
relevance += 0.1
302305
scored.append((i, sent, relevance))
303306

304307
# Select sentences above threshold

src/synaptic/extensions/phrase_extractor.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,6 @@
4848
r"\((?:주|사|재|학|재단|사단)\)([\w]+)"
4949
)
5050

51-
# Years: 4-digit numbers (1000~2999)
52-
_RE_YEAR = re.compile(
53-
r"\b([12]\d{3})\b"
54-
)
55-
5651
# Common English stop words (phrases containing only these are not recognized as phrases)
5752
_STOP_WORDS = frozenset({
5853
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
@@ -79,13 +74,13 @@ def _is_meaningful(phrase: str) -> bool:
7974
8075
Exclusion criteria:
8176
- Phrases composed only of stop words
82-
- Phrases composed only of digits (years excluded — handled by separate regex)
77+
- Phrases composed only of digits
8378
- Single-character phrases
8479
"""
8580
stripped = phrase.strip()
8681
if len(stripped) < 2:
8782
return False
88-
# Digits only (years are already handled by _RE_YEAR, so excluded here)
83+
# Digits only
8984
if stripped.isdigit():
9085
return False
9186
words = phrase.lower().split()
@@ -262,8 +257,4 @@ def _add(phrase: str) -> None:
262257
for m in _RE_KO_PARENS.finditer(text):
263258
_add(m.group(1))
264259

265-
# 5. Years
266-
for m in _RE_YEAR.finditer(text):
267-
_add(m.group(1))
268-
269260
return phrases[: self._max_phrases]

src/synaptic/search.py

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -164,14 +164,12 @@ async def search(
164164
if boosted > existing[1]:
165165
all_nodes[node_id] = (existing[0], boosted)
166166

167-
# Filter by node_kinds if specified
167+
# Soft boost for preferred node_kinds (instead of hard filtering)
168168
if node_kinds:
169169
kind_set = set(node_kinds)
170-
all_nodes = {
171-
nid: (node, score)
172-
for nid, (node, score) in all_nodes.items()
173-
if node.kind in kind_set
174-
}
170+
for nid, (node, score) in all_nodes.items():
171+
if node.kind in kind_set:
172+
all_nodes[nid] = (node, min(1.0, score * 1.5))
175173

176174
# Kind-intent boost: boost kinds matching query keywords
177175
preferred_kinds: set[NodeKind] = set()
@@ -213,16 +211,9 @@ async def search(
213211
# Filter out internal phrase nodes (_phrase tag) from final results.
214212
# Phrase nodes serve as PPR bridge nodes but should not appear in
215213
# user-facing search results — they carry no passage content.
216-
final: list[ActivatedNode] = []
217-
fallback: list[ActivatedNode] = []
218-
for a in activated:
219-
if "_phrase" in (a.node.tags or []):
220-
fallback.append(a) # keep as last resort
221-
else:
222-
final.append(a)
223-
# If filtering removed too many, pad back with phrase nodes
224-
if len(final) < limit and fallback:
225-
final.extend(fallback[: limit - len(final)])
214+
final: list[ActivatedNode] = [
215+
a for a in activated if "_phrase" not in (a.node.tags or [])
216+
]
226217

227218
elapsed_ms = (time() - start) * 1000
228219
return SearchResult(

tests/test_phrase_extractor.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,14 +90,13 @@ def test_korean_parens(self) -> None:
9090
assert any("플래티어" in p for p in lowered)
9191
assert any("한국재단" in p for p in lowered)
9292

93-
def test_year_extraction_filtered_by_meaningful(self) -> None:
94-
"""Years (digits only) are filtered out by _is_meaningful — this is intentional."""
93+
def test_no_year_extraction(self) -> None:
94+
"""Years are not extracted — year regex was removed as dead code."""
9595
extractor = PhraseExtractor(max_phrases_per_node=20)
9696
phrases = extractor._extract_phrases(
9797
"History",
9898
"The university was established in 1755 and expanded in 2024.",
9999
)
100-
# Pure digit years are excluded by _is_meaningful (digits-only check)
101100
assert "1755" not in phrases
102101
assert "2024" not in phrases
103102

0 commit comments

Comments
 (0)