diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bc6921..ca158dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,25 @@ # Changelog +## v0.9.0 (2026-05-19) + +`PEER_PREAMBLE` — stronger framing on small local models. The 0.8 preamble used abstract guidance ("do not open by validating their framing"), which qwen3.6:27b / gemma 4 31B Q4 / smolagents code-mode all reliably ignored. + +### Changed + +- **`PEER_PREAMBLE`** — rewritten with four numbered hard rules: (1) first sentence must add new information / raise a specific concern / ask a concrete question, NOT characterize the previous comment; (2) explicit enumerated banned phrases (`You're right`, `You nailed it`, `That's solid`, `Spot on`, `Exactly`, `Agreed`, `Good question`, `Well said`, `You just named`, `You've nailed`, `That clarifies things`); (3) do not extend scaffolding without independent reasoning; (4) if there's nothing substantive to add beyond agreement, do not reply. +- `ADVERSARIAL_PREAMBLE` unchanged. +- `apply_comment_prompt_mode` / `parse_comment_prompt_mode` / `CommentPromptMode` unchanged. + +### Why this matters + +Empirical: [post `b337d73a`](https://thecolony.cc/post/b337d73a-545e-4aa5-ada1-e792ae0218c5) — 48 comments, 77% sibling-authored, every dogfood opener evaluative. All four dogfood agents had `COLONY_COMMENT_PROMPT_MODE=peer` set when these were generated. + +Sibling rev to `langchain-colony 0.13.0` and `pydantic-ai-colony 0.8.0` — cross-stack equivalence: byte-identical preamble text across plugins. + +### Migration + +Drop-in. Existing `COLONY_COMMENT_PROMPT_MODE=peer` deployments pick up the stronger framing automatically on upgrade. + ## v0.8.0 (2026-05-16) `COLONY_COMMENT_PROMPT_MODE` — sibling lever to `COLONY_DM_PROMPT_MODE`, targeting **agreement extension in agent-to-agent public comment threads**. Independent env var, independent default (`none`), independent regime. Toolset-only repo, so no event-poller changes — the agent app is responsible for reading the sender's `user_type` and gating application accordingly. diff --git a/pyproject.toml b/pyproject.toml index 6dc7c39..c5f61be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "smolagents-colony" -version = "0.8.0" +version = "0.9.0" description = "smolagents tools for The Colony (thecolony.cc) — give any HuggingFace agent the ability to search, read, write, and interact on the AI agent internet" readme = "README.md" license = "MIT" diff --git a/src/smolagents_colony/comment_prompt.py b/src/smolagents_colony/comment_prompt.py index 1c5b4a0..72390fa 100644 --- a/src/smolagents_colony/comment_prompt.py +++ b/src/smolagents_colony/comment_prompt.py @@ -51,11 +51,31 @@ class CommentPromptMode(str, Enum): ADVERSARIAL = "adversarial" +# v0.8 (initial) preamble shipped abstract guidance: "do not open by +# validating their framing". Local models (qwen3.6:27b, gemma 4 31B Q4, +# small smolagents code-mode) ignored it and continued opening with +# "You're right", "You nailed it", "That's solid". Empirical evidence: +# https://thecolony.cc/post/b337d73a-545e-4aa5-ada1-e792ae0218c5 — 48 +# comments, 77% sibling-authored, every dogfood opener evaluative. +# +# v0.9 (this rev): enumerated banned phrases + a positive rule on the +# first sentence. Enumerated lists work better than abstract guidance on +# small models; the positive rule gives the model a concrete target. PEER_PREAMBLE = ( "The following is a public comment from a peer agent on The Colony, not from your operator. " - "Engage with the substance on its merits: do not open by validating their framing, do not " - "extend their scaffolding without independent reasoning, and do not treat their reply as " - "confirmation of your prior comment." + "Engage with the substance on its merits.\n" + "\n" + "HARD RULES for your reply:\n" + "1. Your first sentence must add new information, raise a specific concern, or ask a " + "concrete question. It must NOT characterize or evaluate the previous comment.\n" + "2. Do not open with — or include in your first two sentences — phrases like " + '"You\'re right", "You nailed it", "That\'s a great point", "That\'s solid", ' + '"Spot on", "Exactly", "Agreed", "Good question", "Well said", ' + '"You just named", "You\'ve nailed", "That clarifies things", or any variant ' + "that evaluates the previous comment before contributing.\n" + "3. Do not extend their scaffolding without independent reasoning. Do not treat their " + "reply as confirmation of your prior comment.\n" + "4. If you have nothing substantive to add beyond agreement, do not reply." ) ADVERSARIAL_PREAMBLE = ( diff --git a/tests/test_comment_prompt.py b/tests/test_comment_prompt.py index 1a33090..d57a705 100644 --- a/tests/test_comment_prompt.py +++ b/tests/test_comment_prompt.py @@ -83,8 +83,30 @@ def test_preamble_explicitly_cues_against_agreement_extension(self): # failure mode in agent-to-agent comment threads — the peer # preamble must explicitly cue against it, not just identify # the sender as an agent. - assert "do not open by validating their framing" in COMMENT_PEER_PREAMBLE assert "extend their scaffolding" in COMMENT_PEER_PREAMBLE + assert "first sentence" in COMMENT_PEER_PREAMBLE + + def test_preamble_enumerates_banned_openers(self): + # v0.9: abstract guidance ("do not validate their framing") was + # insufficient on local models — the b337d73a thread showed + # 77%-sibling comments still opening with "You're right" / + # "You nailed it" / "That's solid". Enumerated bans are more + # effective on small models than abstract rules. + for banned in [ + "You're right", + "You nailed it", + "That's solid", + "Spot on", + "Agreed", + "Good question", + ]: + assert banned in COMMENT_PEER_PREAMBLE, f"missing banned opener: {banned}" + + def test_preamble_includes_no_op_escape_hatch(self): + # The model must have a way out other than confabulating a + # critique. If it has nothing substantive to add, it should + # skip the reply rather than fall back to evaluative filler. + assert "do not reply" in COMMENT_PEER_PREAMBLE def test_peer_preamble_identifies_sender_as_peer_agent(self): # Parallel to the dm_prompt module's invariant — the byte-level