Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 160 additions & 0 deletions agents/agenda_relevance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
"""Agenda direction guardrails: prompt constraint block + deterministic scope gate.

PR #41 scoped what the Tier 1 / Tier 2 agents *read* (signal queries circled to
the agenda's taxonomy subgraph), but nothing constrained what they *write*: the
generation prompts never mentioned the agenda, so off-topic candidates passed
through whenever the taxonomy match was loose or fell back to the global scan.

This module adds the missing two pieces, both rule-based (no extra LLM calls):

1. ``agenda_constraint_block(agenda)`` — a prompt section appended to every
generation prompt when an agenda is present, stating the user's direction
verbatim plus the scope keywords, and instructing the model to stay inside.
2. ``insight_in_scope(insight, agenda)`` — a post-generation keyword gate.
Generated insights whose text matches none of the agenda's scope terms are
dropped before storage and reported as ``dropped_out_of_scope``.

The gate is intentionally lenient: by default one term hit is enough to keep
an insight (configurable via AGENDA_SCOPE_MIN_TERM_HITS). Its job is to catch
clearly unrelated output, not to rank borderline cases — prompt steering does
the fine-grained work, the gate is the deterministic backstop.
"""

from __future__ import annotations

import re
from typing import Any, Iterable

from config import AGENDA_SCOPE_MIN_TERM_HITS

# Tokens that appear in almost any ML research direction. Useless as scope
# evidence when auto-extracted from free text, so they are skipped during
# tokenization. Explicit keyword phrases (focus / prefer.keywords) are always
# kept verbatim regardless of this list.
_GENERIC_TOKENS = {
"and", "are", "based", "between", "data", "deep", "for", "from", "into",
"learning", "machine", "method", "methods", "model", "models", "new",
"novel", "over", "paper", "papers", "research", "task", "tasks", "that",
"the", "this", "toward", "towards", "under", "use", "using", "via",
"with",
}

_TOKEN_RE = re.compile(r"[a-z][a-z0-9\-]{2,}")

# Fields whose text represents what a generated insight is about. Covers both
# tiers: Tier 2 ideas carry title/problem_statement/proposed_method, Tier 1
# paradigm insights carry title/formal_structure/transformation.
_SCOPE_TEXT_FIELDS = (
"title",
"problem_statement",
"proposed_method",
"formal_structure",
"transformation",
)


def _tokens(text: Any) -> list[str]:
return [
tok
for tok in _TOKEN_RE.findall(str(text or "").lower())
if tok not in _GENERIC_TOKENS
]


def agenda_match_terms(agenda) -> list[str]:
"""Lowercased scope terms for the relevance gate.

Combines, deduplicated and in order:
- focus + prefer.keywords phrases, verbatim (the user named these);
- the individual tokens of those phrases (so "outlier rejection" also
matches text that only says "outlier");
- tokens extracted from the direction description (catches terms the
user wrote in the free-text direction but not in the keyword list).
"""
from agents.agenda_selector import agenda_scope_keywords

seen: set[str] = set()
terms: list[str] = []

def _add(term: str) -> None:
term = term.strip().lower()
if term and term not in seen:
seen.add(term)
terms.append(term)

for phrase in agenda_scope_keywords(agenda):
_add(phrase)
for tok in _tokens(phrase):
_add(tok)
for tok in _tokens(getattr(agenda, "description", "")):
_add(tok)
return terms


def agenda_constraint_block(agenda) -> str:
"""Prompt section that pins generation to the agenda's direction.

Appended to the user prompt of every generation call when an agenda is
present; without an agenda the prompts are untouched.
"""
from agents.agenda_selector import agenda_scope_keywords

direction = str(getattr(agenda, "description", "") or "").strip()
if not direction:
direction = str(getattr(agenda, "name", "") or "").strip()
lines = [
"",
"# RESEARCH DIRECTION CONSTRAINT (hard requirement)",
"",
"All output must stay inside this user-defined research direction:",
"",
f"Direction: {direction}",
]
keywords = agenda_scope_keywords(agenda)
if keywords:
lines.append(f"Scope keywords: {', '.join(keywords)}")
lines.extend(
[
"",
"Rules:",
"- Only propose problems, insights, and methods that fall inside this direction.",
"- Ignore signals and evidence unrelated to the direction, even if they look promising.",
"- If little of the evidence fits the direction, return fewer items rather than drifting off-topic.",
]
)
return "\n".join(lines)


def insight_scope_text(insight: dict) -> str:
"""Lowercased text of the fields that describe what an insight is about."""
parts = []
for field in _SCOPE_TEXT_FIELDS:
value = insight.get(field)
if value:
parts.append(str(value))
return " ".join(parts).lower()


def count_term_hits(text: str, terms: Iterable[str]) -> int:
return sum(1 for term in terms if term in text)


def insight_in_scope(insight: dict, agenda, *, min_hits: int | None = None) -> bool:
"""Deterministic check that a generated insight matches the agenda's scope.

Lenient by design (default: one term hit keeps the insight). Disabled —
everything passes — when there is no agenda, when the threshold is set to
zero, or when the agenda yields no ASCII-matchable term (e.g. a direction
written entirely in Chinese cannot be matched against English insight
text, and dropping everything would be worse than dropping nothing).
"""
if agenda is None:
return True
if min_hits is None:
min_hits = AGENDA_SCOPE_MIN_TERM_HITS
if min_hits <= 0:
return True
terms = agenda_match_terms(agenda)
if not any(re.search(r"[a-z0-9]", term) for term in terms):
return True
return count_term_hits(insight_scope_text(insight), terms) >= min_hits
75 changes: 53 additions & 22 deletions agents/paper_idea_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"""
import json
from agents.agenda_budget import AgendaBudgetExceededError
from agents.agenda_relevance import agenda_constraint_block, insight_in_scope
from agents.discovery_metadata import build_evidence_packet, enrich_deep_insight
from agents.insight_validation import get_evosci_input_issue
from agents.llm_client import call_llm_json, is_llm_auth_error, is_llm_provider_unavailable_error
Expand Down Expand Up @@ -243,8 +244,12 @@
}"""


def _build_problem_prompt(signals: dict) -> str:
"""Build evidence prompt for Call 1 (Problem Sharpening)."""
def _build_problem_prompt(signals: dict, agenda=None) -> str:
"""Build evidence prompt for Call 1 (Problem Sharpening).

With an agenda, the user's research direction is appended as a hard
constraint; without one the prompt is built exactly as before.
"""
sections = ["# EVIDENCE FROM 10,000+ ML PAPERS\n"]

# Contradiction clusters
Expand Down Expand Up @@ -337,12 +342,15 @@ def _build_problem_prompt(signals: dict) -> str:
for row in rows[:6]:
sections.append(f"- {json.dumps(row, ensure_ascii=True, default=str)[:260]}")

if agenda is not None:
sections.append(agenda_constraint_block(agenda))

return "\n".join(sections)


def _build_method_prompt(problem: dict) -> str:
def _build_method_prompt(problem: dict, agenda=None) -> str:
"""Build prompt for Call 2 (Method Invention)."""
return f"""# RESEARCH PROBLEM
prompt = f"""# RESEARCH PROBLEM

## Title: {problem['title']}

Expand All @@ -365,11 +373,14 @@ def _build_method_prompt(problem: dict) -> str:

Design a NEW method that addresses this specific failure mode.
The method must be technically novel — not "apply [existing technique] to [this domain]"."""
if agenda is not None:
prompt += "\n" + agenda_constraint_block(agenda)
return prompt


def _build_experiment_prompt(problem: dict, method: dict) -> str:
def _build_experiment_prompt(problem: dict, method: dict, agenda=None) -> str:
"""Build prompt for Call 3 (Experimental Design)."""
return f"""# PROPOSED RESEARCH
prompt = f"""# PROPOSED RESEARCH

## Problem
Title: {problem['title']}
Expand All @@ -387,6 +398,9 @@ def _build_experiment_prompt(problem: dict, method: dict) -> str:

Design a complete experimental plan for validating this method.
Be specific: exact model names, dataset names, metric names, compute estimates."""
if agenda is not None:
prompt += "\n" + agenda_constraint_block(agenda)
return prompt


def _llm_temporarily_unavailable(exc: Exception) -> bool:
Expand All @@ -400,23 +414,31 @@ def discover_paper_ideas(
tier2_plateau_limit: int = 20,
tier2_limitation_nodes: int = 15,
agenda=None,
) -> list[dict]:
) -> dict:
"""Run the 3-stage paper idea discovery pipeline.

Returns list of deep_insight dicts ready for storage.
Returns {"insights": [...], "dropped_out_of_scope": n} where insights are
deep_insight dicts ready for storage.
If max_papers is None, every sharpened problem (up to max_problems) is expanded.

With an agenda (contracts.agenda.ResearchAgenda), the signal scan is
circled to the matching taxonomy subgraph and produced ideas are tagged
with agenda_id. Budget exhaustion stops the loop cleanly, returning the
ideas accepted so far.
circled to the matching taxonomy subgraph, every generation prompt carries
the agenda's direction as a hard constraint, ideas whose text matches none
of the agenda's scope terms are dropped (counted in dropped_out_of_scope),
and produced ideas are tagged with agenda_id. Budget exhaustion stops the
loop cleanly, returning the ideas accepted so far.
"""
if max_papers is None:
max_papers = max_problems

print(f"[PAPER_IDEA] Starting Tier 2 discovery...", flush=True)
total_tokens = 0
total_calls = 0
deep_insights: list[dict] = []
dropped_out_of_scope = 0

def _result() -> dict:
return {"insights": deep_insights, "dropped_out_of_scope": dropped_out_of_scope}

# Stage 0: Gather signals (scoped to the agenda's subgraph when known)
scope_node_ids = None
Expand Down Expand Up @@ -451,29 +473,29 @@ def discover_paper_ideas(
)
if not has_signals:
print("[PAPER_IDEA] No signals available. Run signal_harvester first.", flush=True)
return []
return _result()

# Stage 1: Problem Sharpening
print("[PAPER_IDEA] Call 1/3: Problem Sharpening...", flush=True)
problem_prompt = _build_problem_prompt(signals)
problem_prompt = _build_problem_prompt(signals, agenda=agenda)
try:
result1, tokens1 = call_llm_json(PROBLEM_SHARPENING_SYSTEM, problem_prompt)
total_tokens += tokens1
total_calls += 1
except AgendaBudgetExceededError as e:
print(f"[PAPER_IDEA] Stopped before problem sharpening: {e}", flush=True)
return []
return _result()
except Exception as e:
if _llm_temporarily_unavailable(e):
print(f"[PAPER_IDEA] Problem sharpening skipped: LLM unavailable ({e})", flush=True)
return []
return _result()
print(f"[PAPER_IDEA] Problem sharpening failed: {e}", flush=True)
return []
return _result()

problems = result1.get("problems", [])
if not problems:
print("[PAPER_IDEA] No problems extracted", flush=True)
return []
return _result()

problem_budget = min(len(problems), max_problems + max(2, max_papers // 2))
problems = problems[:problem_budget]
Expand All @@ -483,7 +505,6 @@ def discover_paper_ideas(
)

# Stage 2 + 3: Method Invention + Experiment Design for top problems
deep_insights = []
for i, problem in enumerate(problems):
if len(deep_insights) >= max_papers:
break
Expand All @@ -493,7 +514,7 @@ def discover_paper_ideas(

# Stage 2: Method Invention
print(f"[PAPER_IDEA] Call 2/3: Inventing method for '{title[:50]}'...", flush=True)
method_prompt = _build_method_prompt(problem)
method_prompt = _build_method_prompt(problem, agenda=agenda)
try:
result2, tokens2 = call_llm_json(METHOD_INVENTION_SYSTEM, method_prompt)
total_tokens += tokens2
Expand Down Expand Up @@ -521,7 +542,7 @@ def discover_paper_ideas(

# Stage 3: Experimental Design
print(f"[PAPER_IDEA] Call 3/3: Designing experiments for '{method['name']}'...", flush=True)
exp_prompt = _build_experiment_prompt(problem, method)
exp_prompt = _build_experiment_prompt(problem, method, agenda=agenda)
try:
result3, tokens3 = call_llm_json(EXPERIMENT_DESIGN_SYSTEM, exp_prompt)
total_tokens += tokens3
Expand Down Expand Up @@ -604,6 +625,15 @@ def discover_paper_ideas(
"agenda_id": agenda.agenda_id if agenda is not None else None,
}

if agenda is not None and not insight_in_scope(deep_insight, agenda):
dropped_out_of_scope += 1
print(
f"[PAPER_IDEA] Dropped out-of-scope idea for agenda "
f"'{agenda.name}': {deep_insight['title'][:80]}",
flush=True,
)
continue

input_issue = get_evosci_input_issue(deep_insight, mode="verification")
if input_issue:
missing = ", ".join(input_issue.get("missing_fields") or [])
Expand All @@ -616,6 +646,7 @@ def discover_paper_ideas(
deep_insights.append(enrich_deep_insight(deep_insight))
print(f"[PAPER_IDEA] Accepted: {method['name']} — {title[:60]}", flush=True)

print(f"[PAPER_IDEA] Done: {len(deep_insights)} paper ideas from {len(problems)} problems. "
print(f"[PAPER_IDEA] Done: {len(deep_insights)} paper ideas from {len(problems)} problems "
f"({dropped_out_of_scope} dropped as out of agenda scope). "
f"Tokens: {total_tokens}, LLM calls: {total_calls}", flush=True)
return deep_insights
return _result()
Loading
Loading