Skip to content

Commit cb539ea

Browse files
penny-team[bot]jaredlockhartclaude
authored
Fix free thinking: add context, dedup, and reorganize tests (jaredlockhart#837)
* Fix free thinking: add context, dedup, and reorganize tests Free thinking cycles were missing context (profile, previous thoughts, dislikes) and skipping embedding dedup entirely. This caused Penny to fixate on the same topics (quantum computing, exoplanets) across dozens of free thinking cycles with no dedup catching the repetition. The root cause was special-casing free thinking as a distinct codepath rather than treating preference_id=None as a valid filter value. Now all thinking modes (seeded, free, news browse) follow the same codepath — the only difference is the initial prompt. Changes: - ThoughtStore.get_recent_by_preference accepts preference_id: int | None - ThinkingAgent removes _free_thinking flag and all branching on it - get_context, _build_thought_context, _is_duplicate_thought, and handle_text_step all use the same codepath regardless of mode - Tests reorganized: comprehensive integration tests first (seeded + free), then special cases, then error cases, then unit tests Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Fix flaky news browsing test: force non-free-thinking path The test had no monkeypatch on random.random, so it sometimes hit the free-thinking path instead of the news browse fallback, causing the assertion on "news" in the prompt to fail on CI. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Jared Lockhart <119884+jaredlockhart@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent ffb7043 commit cb539ea

File tree

3 files changed

+303
-496
lines changed

3 files changed

+303
-496
lines changed

penny/penny/agents/thinking.py

Lines changed: 14 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
logger = logging.getLogger(__name__)
2020

21-
# Probability of a free-thinking cycle (no seed, no context, just vibes)
21+
# Probability of a free-thinking cycle (no seed topic — Penny picks her own)
2222
FREE_THINKING_PROBABILITY = 1 / 3
2323

2424
# Minimum word count for a thought to be stored (filters model planning text)
@@ -31,15 +31,9 @@ class ThinkingAgent(Agent):
3131
Each cycle picks ONE random seed topic from history
3232
to focus on, keeping thinking rotating across interests.
3333
34-
Context matrix — each mode gets tailored context:
35-
36-
Mode | Thoughts | Dislikes | Tools | Steps
37-
---------- | -------- | -------- | ----------- | -----
38-
Seeded | 10 | yes | search+news | 5
39-
Browse News| 10 | yes | search+news | 5
40-
Free Think | - | - | search+news | 5
41-
42-
All modes include profile (user name) except free think.
34+
All modes get the same context: profile, recent thoughts
35+
(scoped by preference_id), and dislikes. The only difference
36+
is the initial prompt (seed topic vs free exploration).
4337
4438
Thinking loop::
4539
@@ -66,7 +60,6 @@ def __init__(self, **kwargs: object) -> None:
6660
super().__init__(**kwargs) # type: ignore[arg-type]
6761
self.max_steps = int(self.config.runtime.INNER_MONOLOGUE_MAX_STEPS)
6862
self._inner_monologue: list[str] = []
69-
self._free_thinking: bool = False
7063
self._seed_topic: str | None = None
7164
self._seed_pref_id: int | None = None
7265

@@ -75,13 +68,11 @@ def __init__(self, **kwargs: object) -> None:
7568
async def get_prompt(self, user: str) -> str | None:
7669
"""Pick a seed topic or let Penny free-think (~1/3 of the time)."""
7770
self._inner_monologue = []
78-
self._free_thinking = False
7971
self._seed_topic = None
8072
self._seed_pref_id = None
8173

8274
if random.random() < FREE_THINKING_PROBABILITY:
8375
logger.info("Free thinking cycle for %s", user)
84-
self._free_thinking = True
8576
return Prompt.THINKING_FREE
8677

8778
threshold = int(self.config.runtime.PREFERENCE_MENTION_THRESHOLD)
@@ -97,14 +88,11 @@ async def get_prompt(self, user: str) -> str | None:
9788
return Prompt.THINKING_SEED.format(seed=pref.content)
9889

9990
async def get_context(self, user: str) -> str:
100-
"""Slim context — profile, entities (seed-anchored), thoughts, and dislikes.
91+
"""Slim context — profile, thoughts, and dislikes.
10192
102-
Free-thinking cycles get no context so Penny explores freely.
103-
Browse news skips entities (no meaningful anchor).
104-
Seeded cycles anchor entities to the seed topic.
93+
All modes (seeded, free, browse news) get the same context shape:
94+
profile, recent thoughts scoped to the mode, and dislikes.
10595
"""
106-
if self._free_thinking:
107-
return ""
10896
sections: list[str | None] = [
10997
self._build_profile_context(user, None),
11098
self._build_thought_context(user),
@@ -113,22 +101,19 @@ async def get_context(self, user: str) -> str:
113101
return "\n\n".join(s for s in sections if s)
114102

115103
def _build_thought_context(self, sender: str) -> str | None:
116-
"""Build thought context scoped to the current seed preference.
104+
"""Build thought context scoped to the current preference_id.
117105
118-
Only prior thoughts about the same preference are relevant —
119-
they show what Penny already found so she can dig deeper or
120-
find new angles instead of repeating herself.
106+
Shows what Penny already explored so she avoids repeating herself.
107+
Works for both seeded (preference_id=<int>) and free (preference_id=None).
121108
"""
122-
if not self._seed_pref_id:
123-
return None
124109
try:
125110
thoughts = self.db.thoughts.get_recent_by_preference(
126111
sender, self._seed_pref_id, limit=self.THOUGHT_CONTEXT_LIMIT
127112
)
128113
if not thoughts:
129114
return None
130115
lines = [t.content for t in thoughts]
131-
logger.debug("Built preference-scoped thought context (%d thoughts)", len(thoughts))
116+
logger.debug("Built thought context (%d thoughts)", len(thoughts))
132117
return "## Recent Background Thinking\n" + "\n\n".join(lines)
133118
except Exception:
134119
logger.warning("Thought context retrieval failed, proceeding without")
@@ -202,8 +187,8 @@ async def _summarize_with_url_validation(self, combined: str) -> str:
202187
return report
203188

204189
async def _is_duplicate_thought(self, user: str, report: str) -> bool:
205-
"""Check if report is too similar to a same-preference thought via embedding similarity."""
206-
if not self._embedding_model_client or not self._seed_pref_id:
190+
"""Check if report is too similar to a same-scope thought via embedding similarity."""
191+
if not self._embedding_model_client:
207192
return False
208193
threshold = float(self.config.runtime.THOUGHT_DEDUP_EMBEDDING_THRESHOLD)
209194
report_vec = await embed_text(self._embedding_model_client, report)
@@ -248,8 +233,7 @@ async def handle_text_step(
248233
return True
249234
content = response.content.strip()
250235
messages.append(ChatMessage(role=MessageRole.ASSISTANT, content=content).to_dict())
251-
if not self._free_thinking:
252-
await self._rebuild_system_prompt(messages)
236+
await self._rebuild_system_prompt(messages)
253237
nudge = "dig deeper into what you just found"
254238
messages.append(ChatMessage(role=MessageRole.USER, content=nudge).to_dict())
255239
return True

penny/penny/database/thought_store.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,16 @@ def get_recent(self, user: str, limit: int = 50) -> list[Thought]:
5353
return thoughts
5454

5555
def get_recent_by_preference(
56-
self, user: str, preference_id: int, limit: int | None = None
56+
self, user: str, preference_id: int | None, limit: int | None = None
5757
) -> list[Thought]:
58-
"""Get thoughts for a user seeded by a specific preference, oldest first."""
58+
"""Get thoughts for a user scoped by preference_id, oldest first.
59+
60+
Works for both seeded (preference_id=<int>) and free (preference_id=None) thoughts.
61+
"""
5962
with self._session() as session:
6063
query = (
6164
select(Thought)
62-
.where(Thought.user == user, Thought.preference_id == preference_id)
65+
.where(Thought.user == user, Thought.preference_id == preference_id) # noqa: E711
6366
.order_by(Thought.created_at.desc()) # type: ignore[unresolved-attribute]
6467
)
6568
if limit is not None:

0 commit comments

Comments
 (0)