From d4fab576941dcdc2bbb3d6d4ff1716085c130b0b Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 14 Apr 2026 14:12:01 +0000 Subject: [PATCH 1/2] feat: extract and persist Findings from ToolResults (closes #157) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 of the FactStore: discrete Finding records are now extracted from successful ToolResult.data dicts and persisted alongside theses, enabling ticker-based recall of past insights without relying on free-text search. - FactStore schema gains a `findings` table + index on entity, plus `save_finding()` / `get_findings()` CRUD (confidence is clamped to [0,1] at the store boundary so noisy extractor inputs are tolerated). - New `qracer/memory/finding_extractor.py` parses `trade_thesis`, `news`, and `fundamentals` ToolResults into `FindingDraft` records. Zero LLM cost — structured data only. Failed results, unknown tools, and extractor exceptions all yield [] so a bad payload never breaks the persistence pipeline. - `ConversationEngine._persist_facts` loops over analysis.results and persists every extracted draft; per-draft exceptions are isolated so one bad tool doesn't block findings from siblings. - Tests: 6 new CRUD tests on FactStore, 14 extractor tests covering failure paths, sentiment-weighted confidence, article cap, and partial-fundamentals fallback. --- qracer/conversation/engine.py | 34 ++++- qracer/memory/fact_store.py | 84 ++++++++++- qracer/memory/finding_extractor.py | 154 ++++++++++++++++++++ tests/memory/test_fact_store.py | 102 ++++++++++++++ tests/memory/test_finding_extractor.py | 186 +++++++++++++++++++++++++ 5 files changed, 554 insertions(+), 6 deletions(-) create mode 100644 qracer/memory/finding_extractor.py create mode 100644 tests/memory/test_finding_extractor.py diff --git a/qracer/conversation/engine.py b/qracer/conversation/engine.py index 2d546e5..7cf0efe 100644 --- a/qracer/conversation/engine.py +++ b/qracer/conversation/engine.py @@ -36,6 +36,7 @@ from qracer.data.registry import DataRegistry from qracer.llm.registry import LLMRegistry from qracer.memory.fact_store import FactStore +from qracer.memory.finding_extractor import extract_findings from qracer.memory.memory_searcher import MemorySearcher from qracer.memory.session_compactor import SessionCompactor from qracer.memory.session_logger import SessionLogger, TurnRecord @@ -330,9 +331,32 @@ async def query(self, user_input: str) -> EngineResponse: def _persist_facts(self, analysis: AnalysisResult) -> None: """Extract and persist structured facts from analysis results.""" - if self._fact_store is None or analysis.trade_thesis is None: + if self._fact_store is None: return - try: - self._fact_store.save_thesis(analysis.trade_thesis, self._session_id) - except Exception: - logger.warning("Failed to persist thesis to fact store", exc_info=True) + if analysis.trade_thesis is not None: + try: + self._fact_store.save_thesis(analysis.trade_thesis, self._session_id) + except Exception: + logger.warning("Failed to persist thesis to fact store", exc_info=True) + + # Extract and persist discrete findings from every successful tool + # result. Each tool-level failure is isolated so a bad payload for + # one tool never prevents findings from other tools being saved. + for result in analysis.results: + for draft in extract_findings(result): + try: + self._fact_store.save_finding( + entity=draft.entity, + statement=draft.statement, + confidence=draft.confidence, + source_tool=draft.source_tool, + session_id=self._session_id, + event_date=draft.event_date, + ) + except Exception: + logger.warning( + "Failed to persist finding (tool=%s entity=%s)", + draft.source_tool, + draft.entity, + exc_info=True, + ) diff --git a/qracer/memory/fact_store.py b/qracer/memory/fact_store.py index 1ab52e1..fb5dbff 100644 --- a/qracer/memory/fact_store.py +++ b/qracer/memory/fact_store.py @@ -17,7 +17,7 @@ import duckdb -from qracer.memory.fact_models import PersistedThesis, ThesisStatus +from qracer.memory.fact_models import Finding, PersistedThesis, ThesisStatus from qracer.models.base import TradeThesis logger = logging.getLogger(__name__) @@ -43,6 +43,21 @@ updated_at TIMESTAMP NOT NULL, superseded_by INTEGER ); + +CREATE SEQUENCE IF NOT EXISTS finding_id_seq START 1; + +CREATE TABLE IF NOT EXISTS findings ( + id INTEGER PRIMARY KEY DEFAULT nextval('finding_id_seq'), + entity VARCHAR NOT NULL, + statement VARCHAR NOT NULL, + confidence DOUBLE NOT NULL, + source_tool VARCHAR NOT NULL, + session_id VARCHAR NOT NULL, + event_date VARCHAR, + created_at TIMESTAMP NOT NULL +); + +CREATE INDEX IF NOT EXISTS idx_findings_entity ON findings(entity); """ @@ -242,6 +257,73 @@ def update_thesis_status( [status.value, superseded_by, datetime.now(), thesis_id], ) + # ------------------------------------------------------------------ + # Finding CRUD + # ------------------------------------------------------------------ + + def save_finding( + self, + *, + entity: str, + statement: str, + confidence: float, + source_tool: str, + session_id: str, + event_date: str | None = None, + ) -> int: + """Persist a Finding and return its new id. + + Confidence is clamped to the ``[0.0, 1.0]`` range; upstream extractors + may emit out-of-range values when parsing noisy inputs. + """ + clamped_confidence = max(0.0, min(1.0, float(confidence))) + self._conn.execute( + """ + INSERT INTO findings ( + entity, statement, confidence, source_tool, + session_id, event_date, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?) + """, + [ + entity, + statement, + clamped_confidence, + source_tool, + session_id, + event_date, + datetime.now(), + ], + ) + new_id: int = self._conn.execute("SELECT currval('finding_id_seq')").fetchone()[0] # type: ignore[index] + return new_id + + def get_findings(self, entity: str, limit: int = 20) -> list[Finding]: + """Return findings for *entity* ordered most-recent first.""" + rows = self._conn.execute( + """ + SELECT id, entity, statement, confidence, source_tool, + session_id, event_date, created_at + FROM findings + WHERE entity = ? + ORDER BY created_at DESC + LIMIT ? + """, + [entity, limit], + ).fetchall() + return [ + Finding( + id=row[0], + entity=row[1], + statement=row[2], + confidence=row[3], + source_tool=row[4], + session_id=row[5], + event_date=row[6], + created_at=row[7], + ) + for row in rows + ] + # ------------------------------------------------------------------ # Lifecycle # ------------------------------------------------------------------ diff --git a/qracer/memory/finding_extractor.py b/qracer/memory/finding_extractor.py new file mode 100644 index 0000000..5d50be3 --- /dev/null +++ b/qracer/memory/finding_extractor.py @@ -0,0 +1,154 @@ +"""Zero-LLM-cost extraction of Finding drafts from ToolResult data. + +Called from :meth:`ConversationEngine._persist_facts` for each successful +tool result. Parses structured ``ToolResult.data`` dicts — no LLM calls, +no expensive post-processing. + +Extractors are registered per ``ToolResult.tool`` name. Tools without a +registered extractor yield no findings (graceful degradation). +""" + +from __future__ import annotations + +from collections.abc import Callable +from dataclasses import dataclass +from typing import Any + +from qracer.models import ToolResult + + +@dataclass +class FindingDraft: + """A Finding value ready to be persisted (no db id yet).""" + + entity: str + statement: str + confidence: float + source_tool: str + event_date: str | None = None + + +# Sentiment-string → confidence weight. Directional sentiment (positive / +# negative) carries stronger signal than neutral / unlabeled news. +_SENTIMENT_CONFIDENCE: dict[str, float] = { + "positive": 0.7, + "negative": 0.7, + "neutral": 0.4, +} + +_DEFAULT_NEWS_CONFIDENCE = 0.5 +_DEFAULT_FUNDAMENTALS_CONFIDENCE = 0.9 +_MAX_NEWS_FINDINGS = 3 + + +def _extract_thesis(data: dict[str, Any]) -> list[FindingDraft]: + thesis = data.get("thesis") or {} + ticker = thesis.get("ticker") + catalyst = thesis.get("catalyst") + conviction = thesis.get("conviction") + if not ticker or not catalyst or conviction is None: + return [] + statement = ( + f"{ticker}: {catalyst} — target ${thesis.get('target_price')}, " + f"stop ${thesis.get('stop_loss')}, R/R {thesis.get('risk_reward_ratio')}x, " + f"conviction {conviction}/10" + ) + return [ + FindingDraft( + entity=ticker, + statement=statement, + confidence=max(0.0, min(1.0, float(conviction) / 10.0)), + source_tool="trade_thesis", + event_date=thesis.get("catalyst_date"), + ) + ] + + +def _extract_news(data: dict[str, Any]) -> list[FindingDraft]: + ticker = data.get("ticker") + articles = data.get("articles") or [] + if not ticker or not articles: + return [] + drafts: list[FindingDraft] = [] + for art in articles[:_MAX_NEWS_FINDINGS]: + title = art.get("title") + if not title: + continue + raw_sentiment = art.get("sentiment") + sentiment = (raw_sentiment or "").strip().lower() + confidence = _SENTIMENT_CONFIDENCE.get(sentiment, _DEFAULT_NEWS_CONFIDENCE) + source = art.get("source") or "unknown" + label = sentiment or "news" + drafts.append( + FindingDraft( + entity=ticker, + statement=f"[{label}] {title} ({source})", + confidence=confidence, + source_tool="news", + event_date=art.get("published_at"), + ) + ) + return drafts + + +def _format_ratio(value: float) -> str: + return f"{value:.2f}" + + +def _format_money(value: float) -> str: + return f"${value:,.0f}" + + +def _format_percent(value: float) -> str: + return f"{value:.2%}" + + +def _extract_fundamentals(data: dict[str, Any]) -> list[FindingDraft]: + ticker = data.get("ticker") + if not ticker: + return [] + parts: list[str] = [] + if (pe := data.get("pe_ratio")) is not None: + parts.append(f"P/E {_format_ratio(float(pe))}") + if (mc := data.get("market_cap")) is not None: + parts.append(f"market cap {_format_money(float(mc))}") + if (rev := data.get("revenue")) is not None: + parts.append(f"revenue {_format_money(float(rev))}") + if (eps := data.get("earnings")) is not None: + parts.append(f"earnings {_format_money(float(eps))}") + if (dy := data.get("dividend_yield")) is not None: + parts.append(f"dividend yield {_format_percent(float(dy))}") + if not parts: + return [] + return [ + FindingDraft( + entity=ticker, + statement=f"{ticker} fundamentals: " + ", ".join(parts), + confidence=_DEFAULT_FUNDAMENTALS_CONFIDENCE, + source_tool="fundamentals", + ) + ] + + +_EXTRACTORS: dict[str, Callable[[dict[str, Any]], list[FindingDraft]]] = { + "trade_thesis": _extract_thesis, + "news": _extract_news, + "fundamentals": _extract_fundamentals, +} + + +def extract_findings(tool_result: ToolResult) -> list[FindingDraft]: + """Return zero-or-more FindingDrafts for a single ToolResult. + + Failed results, unknown tools, and extractor exceptions all yield an + empty list so a bad payload never breaks the persistence pipeline. + """ + if not tool_result.success: + return [] + extractor = _EXTRACTORS.get(tool_result.tool) + if extractor is None: + return [] + try: + return extractor(tool_result.data or {}) + except Exception: # pragma: no cover - defensive guard + return [] diff --git a/tests/memory/test_fact_store.py b/tests/memory/test_fact_store.py index ad2312b..926eec2 100644 --- a/tests/memory/test_fact_store.py +++ b/tests/memory/test_fact_store.py @@ -172,6 +172,108 @@ def test_get_upcoming_catalysts(self, fact_store: FactStore) -> None: assert upcoming[0].ticker == "NEAR" +# --------------------------------------------------------------------------- +# Finding CRUD +# --------------------------------------------------------------------------- + + +class TestFindingCRUD: + def test_save_and_get_finding(self, fact_store: FactStore) -> None: + fid = fact_store.save_finding( + entity="AAPL", + statement="iPhone sales up 12% YoY", + confidence=0.8, + source_tool="fundamentals", + session_id="sess_001", + ) + assert fid >= 1 + + findings = fact_store.get_findings("AAPL") + assert len(findings) == 1 + f = findings[0] + assert f.entity == "AAPL" + assert f.statement == "iPhone sales up 12% YoY" + assert f.confidence == 0.8 + assert f.source_tool == "fundamentals" + assert f.session_id == "sess_001" + assert f.event_date is None + + def test_save_finding_clamps_confidence(self, fact_store: FactStore) -> None: + fact_store.save_finding( + entity="AAPL", + statement="noisy", + confidence=2.5, + source_tool="news", + session_id="s1", + ) + fact_store.save_finding( + entity="AAPL", + statement="also noisy", + confidence=-0.3, + source_tool="news", + session_id="s1", + ) + confidences = sorted(f.confidence for f in fact_store.get_findings("AAPL")) + assert confidences == [0.0, 1.0] + + def test_get_findings_filters_by_entity(self, fact_store: FactStore) -> None: + fact_store.save_finding( + entity="AAPL", + statement="a1", + confidence=0.5, + source_tool="news", + session_id="s1", + ) + fact_store.save_finding( + entity="MSFT", + statement="m1", + confidence=0.5, + source_tool="news", + session_id="s1", + ) + aapl = fact_store.get_findings("AAPL") + msft = fact_store.get_findings("MSFT") + tsla = fact_store.get_findings("TSLA") + assert [f.statement for f in aapl] == ["a1"] + assert [f.statement for f in msft] == ["m1"] + assert tsla == [] + + def test_get_findings_ordered_desc(self, fact_store: FactStore) -> None: + for label in ("first", "second", "third"): + fact_store.save_finding( + entity="AAPL", + statement=label, + confidence=0.5, + source_tool="news", + session_id="s1", + ) + findings = fact_store.get_findings("AAPL") + assert [f.statement for f in findings] == ["third", "second", "first"] + + def test_get_findings_respects_limit(self, fact_store: FactStore) -> None: + for i in range(5): + fact_store.save_finding( + entity="AAPL", + statement=f"n{i}", + confidence=0.5, + source_tool="news", + session_id="s1", + ) + assert len(fact_store.get_findings("AAPL", limit=2)) == 2 + + def test_save_finding_preserves_event_date(self, fact_store: FactStore) -> None: + fact_store.save_finding( + entity="AAPL", + statement="earnings announced", + confidence=0.7, + source_tool="news", + session_id="s1", + event_date="2026-05-01", + ) + [finding] = fact_store.get_findings("AAPL") + assert finding.event_date == "2026-05-01" + + # --------------------------------------------------------------------------- # Context manager # --------------------------------------------------------------------------- diff --git a/tests/memory/test_finding_extractor.py b/tests/memory/test_finding_extractor.py new file mode 100644 index 0000000..70aad7b --- /dev/null +++ b/tests/memory/test_finding_extractor.py @@ -0,0 +1,186 @@ +"""Tests for Finding extraction from ToolResult data.""" + +from __future__ import annotations + +from qracer.memory.finding_extractor import FindingDraft, extract_findings +from qracer.models import ToolResult + + +def _ok(tool: str, data: dict) -> ToolResult: + return ToolResult(tool=tool, success=True, data=data, source="test") + + +def _fail(tool: str) -> ToolResult: + return ToolResult(tool=tool, success=False, data={}, source="test", error="boom") + + +class TestFailureAndUnknown: + def test_failed_tool_result_yields_nothing(self) -> None: + assert extract_findings(_fail("news")) == [] + + def test_unknown_tool_yields_nothing(self) -> None: + assert extract_findings(_ok("price_event", {"ticker": "AAPL"})) == [] + + +class TestTradeThesisExtraction: + def _thesis(self, **overrides) -> dict: + thesis = { + "ticker": "AAPL", + "entry_zone": [170.0, 175.0], + "target_price": 200.0, + "stop_loss": 160.0, + "risk_reward_ratio": 2.2, + "catalyst": "Q2 earnings beat", + "catalyst_date": "2026-05-01", + "conviction": 8, + "summary": "Long AAPL.", + } + thesis.update(overrides) + return {"thesis": thesis} + + def test_extracts_basic_thesis(self) -> None: + drafts = extract_findings(_ok("trade_thesis", self._thesis())) + assert len(drafts) == 1 + draft = drafts[0] + assert isinstance(draft, FindingDraft) + assert draft.entity == "AAPL" + assert draft.source_tool == "trade_thesis" + assert draft.event_date == "2026-05-01" + assert "Q2 earnings beat" in draft.statement + assert "conviction 8/10" in draft.statement + assert draft.confidence == 0.8 + + def test_missing_catalyst_skips(self) -> None: + drafts = extract_findings( + _ok("trade_thesis", self._thesis(catalyst="")) + ) + assert drafts == [] + + def test_conviction_clamped_to_unit_interval(self) -> None: + drafts = extract_findings(_ok("trade_thesis", self._thesis(conviction=15))) + assert drafts[0].confidence == 1.0 + + +class TestNewsExtraction: + def _news(self, articles: list[dict]) -> dict: + return {"ticker": "AAPL", "count": len(articles), "articles": articles} + + def test_extracts_article_findings_with_sentiment_confidence(self) -> None: + data = self._news( + [ + { + "title": "Earnings beat expectations", + "source": "Reuters", + "published_at": "2026-05-01T10:00:00", + "sentiment": "positive", + "summary": "", + "url": "https://example.com/1", + }, + { + "title": "Supply chain warning", + "source": "Bloomberg", + "published_at": "2026-05-02T10:00:00", + "sentiment": "negative", + "summary": "", + "url": "https://example.com/2", + }, + { + "title": "Analyst day recap", + "source": "WSJ", + "published_at": "2026-05-03T10:00:00", + "sentiment": "neutral", + "summary": "", + "url": "https://example.com/3", + }, + ] + ) + drafts = extract_findings(_ok("news", data)) + assert len(drafts) == 3 + assert all(d.entity == "AAPL" and d.source_tool == "news" for d in drafts) + assert drafts[0].confidence == 0.7 # positive + assert drafts[1].confidence == 0.7 # negative + assert drafts[2].confidence == 0.4 # neutral + assert "Earnings beat" in drafts[0].statement + assert "(Reuters)" in drafts[0].statement + assert drafts[0].event_date == "2026-05-01T10:00:00" + + def test_caps_at_three_articles(self) -> None: + articles = [ + { + "title": f"t{i}", + "source": "src", + "published_at": None, + "sentiment": None, + "summary": "", + "url": "", + } + for i in range(10) + ] + drafts = extract_findings(_ok("news", self._news(articles))) + assert len(drafts) == 3 + + def test_unlabeled_sentiment_uses_default_confidence(self) -> None: + data = self._news( + [ + { + "title": "title", + "source": "src", + "published_at": None, + "sentiment": None, + "summary": "", + "url": "", + } + ] + ) + drafts = extract_findings(_ok("news", data)) + assert drafts[0].confidence == 0.5 + assert "[news]" in drafts[0].statement + + def test_empty_articles_yields_nothing(self) -> None: + assert extract_findings(_ok("news", self._news([]))) == [] + + def test_missing_title_is_skipped(self) -> None: + data = self._news( + [ + {"title": None, "source": "src", "sentiment": "positive"}, + {"title": "real", "source": "src", "sentiment": "positive"}, + ] + ) + drafts = extract_findings(_ok("news", data)) + assert len(drafts) == 1 + assert "real" in drafts[0].statement + + +class TestFundamentalsExtraction: + def test_extracts_summary_finding(self) -> None: + data = { + "ticker": "AAPL", + "pe_ratio": 29.5, + "market_cap": 3_000_000_000_000, + "revenue": 400_000_000_000, + "earnings": 100_000_000_000, + "dividend_yield": 0.005, + } + drafts = extract_findings(_ok("fundamentals", data)) + assert len(drafts) == 1 + draft = drafts[0] + assert draft.entity == "AAPL" + assert draft.source_tool == "fundamentals" + assert draft.confidence == 0.9 + assert "P/E 29.50" in draft.statement + assert "dividend yield 0.50%" in draft.statement + + def test_partial_fundamentals_still_produces_finding(self) -> None: + data = {"ticker": "AAPL", "pe_ratio": 29.5} + drafts = extract_findings(_ok("fundamentals", data)) + assert len(drafts) == 1 + assert "P/E 29.50" in drafts[0].statement + assert "market cap" not in drafts[0].statement + + def test_empty_fundamentals_yields_nothing(self) -> None: + drafts = extract_findings(_ok("fundamentals", {"ticker": "AAPL"})) + assert drafts == [] + + def test_missing_ticker_yields_nothing(self) -> None: + drafts = extract_findings(_ok("fundamentals", {"pe_ratio": 10})) + assert drafts == [] From 5dbf09a9fc0e29406834a1dfff4042673b0e95a3 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 14 Apr 2026 14:14:14 +0000 Subject: [PATCH 2/2] style: apply ruff format to new finding_extractor tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes CI code-quality check failure — ruff format wanted to collapse a two-line call in test_missing_catalyst_skips onto one line. --- tests/memory/test_finding_extractor.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/memory/test_finding_extractor.py b/tests/memory/test_finding_extractor.py index 70aad7b..5706d08 100644 --- a/tests/memory/test_finding_extractor.py +++ b/tests/memory/test_finding_extractor.py @@ -51,9 +51,7 @@ def test_extracts_basic_thesis(self) -> None: assert draft.confidence == 0.8 def test_missing_catalyst_skips(self) -> None: - drafts = extract_findings( - _ok("trade_thesis", self._thesis(catalyst="")) - ) + drafts = extract_findings(_ok("trade_thesis", self._thesis(catalyst=""))) assert drafts == [] def test_conviction_clamped_to_unit_interval(self) -> None: