DONGRYEOLLEE1 · DONGRYEOLLEE1 · May 22, 2026 · May 22, 2026 · May 22, 2026
diff --git a/apps/backend/tests/test_planner.py b/apps/backend/tests/test_planner.py
@@ -6,16 +6,6 @@
 from langchain_core.messages import HumanMessage
 
 
-class FailingPlannerLLM:
-    def with_structured_output(self, schema):
-        return self
-
-    async def ainvoke(self, messages):
-        raise AssertionError(
-            "LLM planner should not run for lightweight research requests"
-        )
-
-
 class RecordingPlannerLLM:
     def __init__(self, plan: str):
         self.plan = plan
@@ -26,6 +16,7 @@ def with_structured_output(self, schema):
 
     async def ainvoke(self, messages):
         self.called = True
+
         class Result:
             def __init__(self, plan: str):
                 self.plan = plan
@@ -34,8 +25,17 @@ def __init__(self, plan: str):
 
 
 @pytest.mark.asyncio
-async def test_planner_uses_lightweight_plan_for_simple_research_query():
-    planner = make_planner_node(FailingPlannerLLM())  # type: ignore
+async def test_planner_always_invokes_llm_for_research_query():
+    """LLM-driven 정책: research 쿼리도 휴리스틱이 아닌 LLM planner가 plan을 만든다.
+
+    플래너 안에 키워드 사전(`_build_simple_research_plan` 같은) 휴리스틱이
+    부활하면 이 테스트가 깨진다 — `RecordingPlannerLLM.called`가 False가 되기
+    때문. CLAUDE.md §"Supervisor → Sub-agent Handoff 정책" P1 위반 회귀 잠금.
+    """
+    llm = RecordingPlannerLLM(
+        "1. [research_team] RoPE 알고리즘 자료를 조사한다.\n2. 최종 답변을 작성한다."
+    )
+    planner = make_planner_node(llm)  # type: ignore[arg-type]
 
     state = cast(
         BaseAgentState,
@@ -50,11 +50,9 @@ async def test_planner_uses_lightweight_plan_for_simple_research_query():
 
     command = await planner(state)
 
+    assert llm.called is True
     assert command.goto == "head_supervisor"
-    assert command.update["task_plan"].count("\n") == 1
     assert "[research_team]" in command.update["task_plan"]
-    assert "[writing_team]" not in command.update["task_plan"]
-    assert "최종 답변" in command.update["task_plan"]
 
 
 @pytest.mark.asyncio

diff --git a/apps/backend/tests/test_router_safeguards.py b/apps/backend/tests/test_router_safeguards.py
@@ -110,3 +110,19 @@ def test_router_decision_default_values_are_safe() -> None:
     assert decision.reason == ""
     assert decision.request_review is False
     assert decision.team_finished is False
+
+
+def test_public_safeguard_surface_is_limited_to_policy_functions() -> None:
+    import agent_core.safeguards as safeguards
+
+    public_functions = {
+        name
+        for name in safeguards.__all__
+        if name.startswith(("reject_", "enforce_", "fallback_"))
+    }
+    assert public_functions == {
+        "reject_invalid_goto",
+        "enforce_team_redirect_limit",
+        "enforce_dispatch_limit",
+        "fallback_decision_on_parse_failure",
+    }
diff --git a/apps/backend/tests/test_routing_prompts.py b/apps/backend/tests/test_routing_prompts.py
@@ -0,0 +1,59 @@
+from prompt_kit.prompts import (
+    CODING_TEAM_SUPERVISOR_PROMPT,
+    DATA_SCIENCE_TEAM_SUPERVISOR_PROMPT,
+    RESEARCH_TEAM_SUPERVISOR_PROMPT,
+    SYSTEM_SUPERVISOR_PROMPT,
+    TEAM_SUPERVISOR_PROMPT,
+)
+
+
+def test_head_prompt_contains_required_first_route_contracts() -> None:
+    prompt = SYSTEM_SUPERVISOR_PROMPT.template
+
+    required_fragments = [
+        "# REQUIRED FIRST ROUTES",
+        "Data attachment",
+        "`data_science_team`",
+        "`data_engineer`",
+        "`data_analyst`",
+        "Image attachment",
+        "`vision_team`",
+        "`vision_analyst`",
+        "Current events, news, or \"latest\"",
+        "`research_team`",
+        "`search`",
+        "`web_scraper`",
+        "Bound repository plus code",
+        "`coding_team`",
+        "`codebase_explorer`",
+        "`implementation_engineer`",
+        "`runtime_verifier`",
+        "Explicit report",
+        "`writing_team`",
+        "`note_taker`",
+        "`doc_writer`",
+        "Simple greetings",
+        "`FINISH`",
+        "`content`",
+    ]
+
+    for fragment in required_fragments:
+        assert fragment in prompt
+
+
+def test_team_prompt_contains_generic_worker_handoff_contracts() -> None:
+    prompt = TEAM_SUPERVISOR_PROMPT.template
+
+    assert "# DATA SCIENCE TEAM HANDOFF" in prompt
+    assert "next worker is ALWAYS `data_analyst`" in prompt
+    assert "# WRITING TEAM HANDOFF" in prompt
+    assert "Start a new report" in prompt
+    assert "route to `doc_writer`" in prompt
+    assert "# VISION TEAM HANDOFF" in prompt
+    assert "Start image-attachment requests with `vision_analyst`" in prompt
+
+
+def test_dedicated_team_prompts_pin_first_workers() -> None:
+    assert "Start with `search`" in RESEARCH_TEAM_SUPERVISOR_PROMPT.template
+    assert "Start with `data_engineer`" in DATA_SCIENCE_TEAM_SUPERVISOR_PROMPT.template
+    assert "Start with `codebase_explorer`" in CODING_TEAM_SUPERVISOR_PROMPT.template
diff --git a/apps/backend/tests/test_supervisor.py b/apps/backend/tests/test_supervisor.py
@@ -18,6 +18,16 @@ async def ainvoke(self, messages):
         return {"next": self.target_node}
 
 
+class CountingRouterLLM(FakeRouterLLM):
+    def __init__(self, target_node: str):
+        super().__init__(target_node)
+        self.calls = 0
+
+    async def ainvoke(self, messages):
+        self.calls += 1
+        return {"next": self.target_node, "reason": "LLM chose next worker"}
+
+
 class ApprovalAwareLLM:
     def with_structured_output(self, schema):
         return self
@@ -67,6 +77,29 @@ async def test_supervisor_routes_to_worker():
     assert command.update["route_history"][0]["layer"] == "team"
 
 
+@pytest.mark.asyncio
+async def test_team_dispatch_limit_runs_after_llm_decision():
+    """Dispatch limit is a post-decision safeguard, not a pre-LLM branch."""
+    fake_llm = CountingRouterLLM("search_agent")
+    supervisor_func = make_supervisor_node(
+        fake_llm,  # type: ignore
+        ["search_agent", "web_scraper"],
+        layer="team",
+        team_name="ResearchTeam",
+        max_team_dispatches=0,
+    )
+
+    state = cast(
+        BaseAgentState,
+        {"messages": [HumanMessage(content="Find me something")], "next": ""},
+    )
+    command = await supervisor_func(state)
+
+    assert fake_llm.calls == 1
+    assert command.goto == "__end__"
+    assert command.update["route_history"][0]["reasoning"].startswith("safeguard:")
+
+
 @pytest.mark.asyncio
 async def test_supervisor_routes_to_finish():
     """FINISH at head layer must clear active_team/worker and terminate streaming."""

diff --git a/packages/agent-core/src/agent_core/nodes/planner.py b/packages/agent-core/src/agent_core/nodes/planner.py
@@ -14,77 +14,6 @@ class TaskPlan(BaseModel):
     )
 
 
-def _extract_latest_user_text(messages: list) -> str:
-    for message in reversed(messages):
-        if isinstance(message, tuple) and len(message) >= 2 and message[0] == "user":
-            return str(message[1])
-
-        message_type = getattr(message, "type", None)
-        if message_type == "human":
-            content = getattr(message, "content", "")
-            if isinstance(content, str):
-                return content
-            if isinstance(content, list):
-                text_parts: list[str] = []
-                for item in content:
-                    if isinstance(item, dict) and item.get("type") == "text":
-                        text_parts.append(str(item.get("text", "")))
-                return " ".join(part for part in text_parts if part)
-
-    return ""
-
-
-def _build_simple_research_plan(user_text: str) -> str | None:
-    normalized = user_text.lower()
-    research_markers = (
-        "웹검색",
-        "웹 검색",
-        "검색",
-        "조사",
-        "찾아",
-        "알아봐",
-        "search",
-        "research",
-        "look up",
-        "web",
-    )
-    answer_markers = (
-        "설명",
-        "요약",
-        "정리",
-        "답변",
-        "explain",
-        "summary",
-        "summarize",
-    )
-    complex_markers = (
-        "보고서",
-        "report",
-        "table",
-        "표",
-        "비교",
-        "compare",
-        "slide",
-        "발표",
-        "코드",
-        "파일",
-        "문서",
-        "article",
-    )
-
-    if not any(marker in normalized for marker in research_markers):
-        return None
-    if not any(marker in normalized for marker in answer_markers):
-        return None
-    if any(marker in normalized for marker in complex_markers):
-        return None
-
-    return (
-        "1. [research_team] 사용자 요청을 답할 만큼만 신뢰할 수 있는 최신 자료를 조사한다.\n"
-        "2. 조사 결과를 바탕으로 최종 답변을 완성한다."
-    )
-
-
 def make_planner_node(llm: BaseChatModel) -> Callable:
     """
     Creates a planner node that executes immediately after user input.
@@ -95,27 +24,10 @@ def make_planner_node(llm: BaseChatModel) -> Callable:
     async def planner_node(state: BaseAgentState) -> Command:
         print("[Planner] Analyzing request and creating plan...", flush=True)
 
-        # If there's already a plan and we are just looping, we don't recreate it unless explicitly asked.
-        # But usually Planner is only called once at START, or we can check if it's the first turn.
         if state.get("task_plan"):
             print("[Planner] Plan already exists. Skipping.", flush=True)
             return Command(goto="head_supervisor")
 
-        latest_user_text = _extract_latest_user_text(state.get("messages", []))
-        simple_research_plan = _build_simple_research_plan(latest_user_text)
-        if simple_research_plan:
-            print(
-                f"[Planner] Using lightweight plan:\n{simple_research_plan}", flush=True
-            )
-            plan_message = AIMessage(
-                content=f"**[Planner] Proposed Execution Plan:**\n{simple_research_plan}",
-                name="planner",
-            )
-            return Command(
-                update={"task_plan": simple_research_plan, "messages": [plan_message]},
-                goto="head_supervisor",
-            )
-
         messages = [{"role": "system", "content": system_prompt}] + state.get(
             "messages", []
         )
@@ -135,7 +47,6 @@ async def planner_node(state: BaseAgentState) -> Command:
 
             print(f"[Planner] Generated Plan:\n{plan}", flush=True)
 
-            # Save the plan to state and notify the user/supervisor via message
             plan_message = AIMessage(
                 content=f"**[Planner] Proposed Execution Plan:**\n{plan}",
                 name="planner",

diff --git a/packages/agent-core/src/agent_core/router_schema.py b/packages/agent-core/src/agent_core/router_schema.py
@@ -13,8 +13,7 @@
 - ``reason``: short human-readable explanation, exposed to the UI via
   the ``route`` SSE event (plan §4.0 P4).
 - ``request_review``: ``True`` if the supervisor wants to interrupt for
-  HITL approval before continuing. Replaces the rule-based
-  ``_should_force_approval`` heuristic.
+  HITL approval before continuing. The LLM sets this from prompt policy.
 - ``team_finished``: team supervisor asserts the team has nothing more
   to do this turn; head supervisor uses this to decide between another
   team dispatch and a finalizer call.
@@ -26,8 +25,8 @@
   direct-FINISH turns would emit an empty AI message (regression seen
   after the head/team split in Phase 2.4).
 
-This schema lives in agent_core so that both supervisor.py (today's
-rule-based logic) and the upcoming ``LLMRouter`` class can share it.
+This schema lives in agent_core so the supervisor factories and the shared
+LLM router can use the same structured-output contract.
 """
 
 from __future__ import annotations
@@ -53,7 +52,7 @@ class RouterDecision(BaseModel):
         default=False,
         description=(
             "Set True when the supervisor wants to pause for human approval "
-            "before continuing (replaces _should_force_approval heuristic)."
+            "before continuing according to the prompt policy."
         ),
     )
     team_finished: bool = Field(

diff --git a/packages/agent-core/src/agent_core/safeguards.py b/packages/agent-core/src/agent_core/safeguards.py
@@ -2,7 +2,7 @@
 
 Phase 2.6 of the codebase-wide refactor. Plan §4.0 P3 says the supervisor
 should never **override** the LLM's routing decision; it can only **block**
-or **re-request** it. These helpers implement exactly that policy.
+or **re-request** it. These four helpers implement exactly that policy.
 
 All functions are intentionally pure:
 

diff --git a/packages/agent-core/src/agent_core/supervisor.py b/packages/agent-core/src/agent_core/supervisor.py
@@ -11,12 +11,6 @@
 
 - ``layer="head"`` → :func:`agent_core.supervisors.make_head_supervisor_node`
 - ``layer="team"`` → :func:`agent_core.supervisors.make_team_supervisor_node`
-
-The historical helpers (``_extract_message_text``,
-``_latest_user_request_text``, ``_orchagent_identity_response``) now live
-inside ``agent_core.supervisors.head_supervisor`` where they are actually
-used. They are re-exported here only for any external test that imported
-them directly.
 """
 
 from __future__ import annotations
@@ -25,12 +19,7 @@
 
 from langchain_core.language_models.chat_models import BaseChatModel
 
-from agent_core.supervisors.head_supervisor import (
-    _extract_message_text,
-    _latest_user_request_text,
-    _orchagent_identity_response,
-    make_head_supervisor_node,
-)
+from agent_core.supervisors.head_supervisor import make_head_supervisor_node
 from agent_core.supervisors.team_supervisor import make_team_supervisor_node
 
 
@@ -67,9 +56,4 @@ def make_supervisor_node(
     )
 
 
-__all__ = [
-    "_extract_message_text",
-    "_latest_user_request_text",
-    "_orchagent_identity_response",
-    "make_supervisor_node",
-]
+__all__ = ["make_supervisor_node"]