Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 13 additions & 15 deletions apps/backend/tests/test_planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,6 @@
from langchain_core.messages import HumanMessage


class FailingPlannerLLM:
def with_structured_output(self, schema):
return self

async def ainvoke(self, messages):
raise AssertionError(
"LLM planner should not run for lightweight research requests"
)


class RecordingPlannerLLM:
def __init__(self, plan: str):
self.plan = plan
Expand All @@ -26,6 +16,7 @@ def with_structured_output(self, schema):

async def ainvoke(self, messages):
self.called = True

class Result:
def __init__(self, plan: str):
self.plan = plan
Expand All @@ -34,8 +25,17 @@ def __init__(self, plan: str):


@pytest.mark.asyncio
async def test_planner_uses_lightweight_plan_for_simple_research_query():
planner = make_planner_node(FailingPlannerLLM()) # type: ignore
async def test_planner_always_invokes_llm_for_research_query():
"""LLM-driven 정책: research 쿼리도 휴리스틱이 아닌 LLM planner가 plan을 만든다.

플래너 안에 키워드 사전(`_build_simple_research_plan` 같은) 휴리스틱이
부활하면 이 테스트가 깨진다 — `RecordingPlannerLLM.called`가 False가 되기
때문. CLAUDE.md §"Supervisor → Sub-agent Handoff 정책" P1 위반 회귀 잠금.
"""
llm = RecordingPlannerLLM(
"1. [research_team] RoPE 알고리즘 자료를 조사한다.\n2. 최종 답변을 작성한다."
)
planner = make_planner_node(llm) # type: ignore[arg-type]

state = cast(
BaseAgentState,
Expand All @@ -50,11 +50,9 @@ async def test_planner_uses_lightweight_plan_for_simple_research_query():

command = await planner(state)

assert llm.called is True
assert command.goto == "head_supervisor"
assert command.update["task_plan"].count("\n") == 1
assert "[research_team]" in command.update["task_plan"]
assert "[writing_team]" not in command.update["task_plan"]
assert "최종 답변" in command.update["task_plan"]


@pytest.mark.asyncio
Expand Down
16 changes: 16 additions & 0 deletions apps/backend/tests/test_router_safeguards.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,19 @@ def test_router_decision_default_values_are_safe() -> None:
assert decision.reason == ""
assert decision.request_review is False
assert decision.team_finished is False


def test_public_safeguard_surface_is_limited_to_policy_functions() -> None:
import agent_core.safeguards as safeguards

public_functions = {
name
for name in safeguards.__all__
if name.startswith(("reject_", "enforce_", "fallback_"))
}
assert public_functions == {
"reject_invalid_goto",
"enforce_team_redirect_limit",
"enforce_dispatch_limit",
"fallback_decision_on_parse_failure",
}
59 changes: 59 additions & 0 deletions apps/backend/tests/test_routing_prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from prompt_kit.prompts import (
CODING_TEAM_SUPERVISOR_PROMPT,
DATA_SCIENCE_TEAM_SUPERVISOR_PROMPT,
RESEARCH_TEAM_SUPERVISOR_PROMPT,
SYSTEM_SUPERVISOR_PROMPT,
TEAM_SUPERVISOR_PROMPT,
)


def test_head_prompt_contains_required_first_route_contracts() -> None:
prompt = SYSTEM_SUPERVISOR_PROMPT.template

required_fragments = [
"# REQUIRED FIRST ROUTES",
"Data attachment",
"`data_science_team`",
"`data_engineer`",
"`data_analyst`",
"Image attachment",
"`vision_team`",
"`vision_analyst`",
"Current events, news, or \"latest\"",
"`research_team`",
"`search`",
"`web_scraper`",
"Bound repository plus code",
"`coding_team`",
"`codebase_explorer`",
"`implementation_engineer`",
"`runtime_verifier`",
"Explicit report",
"`writing_team`",
"`note_taker`",
"`doc_writer`",
"Simple greetings",
"`FINISH`",
"`content`",
]

for fragment in required_fragments:
assert fragment in prompt


def test_team_prompt_contains_generic_worker_handoff_contracts() -> None:
prompt = TEAM_SUPERVISOR_PROMPT.template

assert "# DATA SCIENCE TEAM HANDOFF" in prompt
assert "next worker is ALWAYS `data_analyst`" in prompt
assert "# WRITING TEAM HANDOFF" in prompt
assert "Start a new report" in prompt
assert "route to `doc_writer`" in prompt
assert "# VISION TEAM HANDOFF" in prompt
assert "Start image-attachment requests with `vision_analyst`" in prompt


def test_dedicated_team_prompts_pin_first_workers() -> None:
assert "Start with `search`" in RESEARCH_TEAM_SUPERVISOR_PROMPT.template
assert "Start with `data_engineer`" in DATA_SCIENCE_TEAM_SUPERVISOR_PROMPT.template
assert "Start with `codebase_explorer`" in CODING_TEAM_SUPERVISOR_PROMPT.template
33 changes: 33 additions & 0 deletions apps/backend/tests/test_supervisor.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ async def ainvoke(self, messages):
return {"next": self.target_node}


class CountingRouterLLM(FakeRouterLLM):
def __init__(self, target_node: str):
super().__init__(target_node)
self.calls = 0

async def ainvoke(self, messages):
self.calls += 1
return {"next": self.target_node, "reason": "LLM chose next worker"}


class ApprovalAwareLLM:
def with_structured_output(self, schema):
return self
Expand Down Expand Up @@ -67,6 +77,29 @@ async def test_supervisor_routes_to_worker():
assert command.update["route_history"][0]["layer"] == "team"


@pytest.mark.asyncio
async def test_team_dispatch_limit_runs_after_llm_decision():
"""Dispatch limit is a post-decision safeguard, not a pre-LLM branch."""
fake_llm = CountingRouterLLM("search_agent")
supervisor_func = make_supervisor_node(
fake_llm, # type: ignore
["search_agent", "web_scraper"],
layer="team",
team_name="ResearchTeam",
max_team_dispatches=0,
)

state = cast(
BaseAgentState,
{"messages": [HumanMessage(content="Find me something")], "next": ""},
)
command = await supervisor_func(state)

assert fake_llm.calls == 1
assert command.goto == "__end__"
assert command.update["route_history"][0]["reasoning"].startswith("safeguard:")


@pytest.mark.asyncio
async def test_supervisor_routes_to_finish():
"""FINISH at head layer must clear active_team/worker and terminate streaming."""
Expand Down
89 changes: 0 additions & 89 deletions packages/agent-core/src/agent_core/nodes/planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,77 +14,6 @@ class TaskPlan(BaseModel):
)


def _extract_latest_user_text(messages: list) -> str:
for message in reversed(messages):
if isinstance(message, tuple) and len(message) >= 2 and message[0] == "user":
return str(message[1])

message_type = getattr(message, "type", None)
if message_type == "human":
content = getattr(message, "content", "")
if isinstance(content, str):
return content
if isinstance(content, list):
text_parts: list[str] = []
for item in content:
if isinstance(item, dict) and item.get("type") == "text":
text_parts.append(str(item.get("text", "")))
return " ".join(part for part in text_parts if part)

return ""


def _build_simple_research_plan(user_text: str) -> str | None:
normalized = user_text.lower()
research_markers = (
"웹검색",
"웹 검색",
"검색",
"조사",
"찾아",
"알아봐",
"search",
"research",
"look up",
"web",
)
answer_markers = (
"설명",
"요약",
"정리",
"답변",
"explain",
"summary",
"summarize",
)
complex_markers = (
"보고서",
"report",
"table",
"표",
"비교",
"compare",
"slide",
"발표",
"코드",
"파일",
"문서",
"article",
)

if not any(marker in normalized for marker in research_markers):
return None
if not any(marker in normalized for marker in answer_markers):
return None
if any(marker in normalized for marker in complex_markers):
return None

return (
"1. [research_team] 사용자 요청을 답할 만큼만 신뢰할 수 있는 최신 자료를 조사한다.\n"
"2. 조사 결과를 바탕으로 최종 답변을 완성한다."
)


def make_planner_node(llm: BaseChatModel) -> Callable:
"""
Creates a planner node that executes immediately after user input.
Expand All @@ -95,27 +24,10 @@ def make_planner_node(llm: BaseChatModel) -> Callable:
async def planner_node(state: BaseAgentState) -> Command:
print("[Planner] Analyzing request and creating plan...", flush=True)

# If there's already a plan and we are just looping, we don't recreate it unless explicitly asked.
# But usually Planner is only called once at START, or we can check if it's the first turn.
if state.get("task_plan"):
print("[Planner] Plan already exists. Skipping.", flush=True)
return Command(goto="head_supervisor")

latest_user_text = _extract_latest_user_text(state.get("messages", []))
simple_research_plan = _build_simple_research_plan(latest_user_text)
if simple_research_plan:
print(
f"[Planner] Using lightweight plan:\n{simple_research_plan}", flush=True
)
plan_message = AIMessage(
content=f"**[Planner] Proposed Execution Plan:**\n{simple_research_plan}",
name="planner",
)
return Command(
update={"task_plan": simple_research_plan, "messages": [plan_message]},
goto="head_supervisor",
)

messages = [{"role": "system", "content": system_prompt}] + state.get(
"messages", []
)
Expand All @@ -135,7 +47,6 @@ async def planner_node(state: BaseAgentState) -> Command:

print(f"[Planner] Generated Plan:\n{plan}", flush=True)

# Save the plan to state and notify the user/supervisor via message
plan_message = AIMessage(
content=f"**[Planner] Proposed Execution Plan:**\n{plan}",
name="planner",
Expand Down
9 changes: 4 additions & 5 deletions packages/agent-core/src/agent_core/router_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
- ``reason``: short human-readable explanation, exposed to the UI via
the ``route`` SSE event (plan §4.0 P4).
- ``request_review``: ``True`` if the supervisor wants to interrupt for
HITL approval before continuing. Replaces the rule-based
``_should_force_approval`` heuristic.
HITL approval before continuing. The LLM sets this from prompt policy.
- ``team_finished``: team supervisor asserts the team has nothing more
to do this turn; head supervisor uses this to decide between another
team dispatch and a finalizer call.
Expand All @@ -26,8 +25,8 @@
direct-FINISH turns would emit an empty AI message (regression seen
after the head/team split in Phase 2.4).

This schema lives in agent_core so that both supervisor.py (today's
rule-based logic) and the upcoming ``LLMRouter`` class can share it.
This schema lives in agent_core so the supervisor factories and the shared
LLM router can use the same structured-output contract.
"""

from __future__ import annotations
Expand All @@ -53,7 +52,7 @@ class RouterDecision(BaseModel):
default=False,
description=(
"Set True when the supervisor wants to pause for human approval "
"before continuing (replaces _should_force_approval heuristic)."
"before continuing according to the prompt policy."
),
)
team_finished: bool = Field(
Expand Down
2 changes: 1 addition & 1 deletion packages/agent-core/src/agent_core/safeguards.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Phase 2.6 of the codebase-wide refactor. Plan §4.0 P3 says the supervisor
should never **override** the LLM's routing decision; it can only **block**
or **re-request** it. These helpers implement exactly that policy.
or **re-request** it. These four helpers implement exactly that policy.

All functions are intentionally pure:

Expand Down
20 changes: 2 additions & 18 deletions packages/agent-core/src/agent_core/supervisor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,6 @@

- ``layer="head"`` → :func:`agent_core.supervisors.make_head_supervisor_node`
- ``layer="team"`` → :func:`agent_core.supervisors.make_team_supervisor_node`

The historical helpers (``_extract_message_text``,
``_latest_user_request_text``, ``_orchagent_identity_response``) now live
inside ``agent_core.supervisors.head_supervisor`` where they are actually
used. They are re-exported here only for any external test that imported
them directly.
"""

from __future__ import annotations
Expand All @@ -25,12 +19,7 @@

from langchain_core.language_models.chat_models import BaseChatModel

from agent_core.supervisors.head_supervisor import (
_extract_message_text,
_latest_user_request_text,
_orchagent_identity_response,
make_head_supervisor_node,
)
from agent_core.supervisors.head_supervisor import make_head_supervisor_node
from agent_core.supervisors.team_supervisor import make_team_supervisor_node


Expand Down Expand Up @@ -67,9 +56,4 @@ def make_supervisor_node(
)


__all__ = [
"_extract_message_text",
"_latest_user_request_text",
"_orchagent_identity_response",
"make_supervisor_node",
]
__all__ = ["make_supervisor_node"]
Loading
Loading