From d53f24cd7fc27e0a344d8529a16c3d663f1f7dcb Mon Sep 17 00:00:00 2001 From: poshinchen Date: Thu, 30 Apr 2026 12:20:56 -0400 Subject: [PATCH 1/6] feat(simulator): structured_output of simulator --- src/strands_evals/simulation/__init__.py | 3 + .../simulation/actor_simulator.py | 136 ++++++++- .../types/simulation/__init__.py | 8 +- src/strands_evals/types/simulation/actor.py | 70 ++++- .../simulation/test_actor_simulator.py | 262 +++++++++++++++++- 5 files changed, 469 insertions(+), 10 deletions(-) diff --git a/src/strands_evals/simulation/__init__.py b/src/strands_evals/simulation/__init__.py index 3097b0d6..e05a2b9e 100644 --- a/src/strands_evals/simulation/__init__.py +++ b/src/strands_evals/simulation/__init__.py @@ -1,3 +1,5 @@ +from strands_evals.types.simulation import SimulatorResult + from .actor_simulator import ActorSimulator from .tool_simulator import ToolSimulator @@ -8,4 +10,5 @@ "ActorSimulator", "UserSimulator", "ToolSimulator", + "SimulatorResult", ] diff --git a/src/strands_evals/simulation/actor_simulator.py b/src/strands_evals/simulation/actor_simulator.py index fb1d9c36..12a43c39 100644 --- a/src/strands_evals/simulation/actor_simulator.py +++ b/src/strands_evals/simulation/actor_simulator.py @@ -1,6 +1,7 @@ import logging import random +from pydantic import BaseModel, Field, create_model from strands import Agent from strands.agent.agent_result import AgentResult from strands.types.content import Message @@ -11,7 +12,7 @@ from strands_evals.simulation.prompt_templates.actor_profile_extraction import ACTOR_PROFILE_PROMPT_TEMPLATE from strands_evals.simulation.prompt_templates.actor_system_prompt import DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE from strands_evals.simulation.tools.goal_completion import get_conversation_goal_completion -from strands_evals.types.simulation import ActorProfile, ActorResponse +from strands_evals.types.simulation import ActorProfile, ActorResponse, SimulatorResult logger = logging.getLogger(__name__) @@ -138,10 +139,12 @@ def __init__( self, actor_profile: ActorProfile, initial_query: str, - system_prompt_template: str, + system_prompt_template: str = DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE, tools: list | None = None, model: str | None = None, max_turns: int = 10, + *, + input_type: type[BaseModel] | None = None, ): """ Initialize an ActorSimulator with profile and goal. @@ -152,10 +155,27 @@ def __init__( Args: actor_profile: ActorProfile object containing traits, context, and actor_goal. initial_query: The actor's first query or message. - system_prompt_template: Template string for system prompt. Must include {actor_profile} placeholder. + system_prompt_template: System prompt for the actor. Accepts two shapes: + + - A template containing the `{actor_profile}` placeholder, which + is rendered via `str.format(actor_profile=...)` against the + actor's profile (legacy behavior, the default). + - An already-rendered system prompt string with no + `{actor_profile}` placeholder, which is used verbatim. + + Defaults to the built-in `DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE`. tools: Additional tools available to the actor. Defaults to goal completion tool only. model: Model identifier for the underlying agent. Uses Strands default if None. max_turns: Maximum number of conversation turns before stopping (default: 10). + input_type: Pydantic model class describing the agent-under-test's expected + input payload. Only affects :meth:`act_structured` — the LLM's + structured-output schema is narrowed so `message` is produced as an + `input_type` instance. :meth:`act` is unaffected and always uses the + legacy :class:`ActorResponse` schema. + + Must describe what the agent under test accepts — not a simulator + type. Passing :class:`SimulatorResult` (or a subclass) raises + `ValueError` at construction time. Example: ```python @@ -188,8 +208,13 @@ def __init__( self._turn_count = 0 self._last_message = "" self._max_turns = max_turns + self._input_type = input_type + self._structured_model = self._build_structured_model(input_type) - system_prompt = system_prompt_template.format(actor_profile=actor_profile.model_dump()) + if "{actor_profile}" in system_prompt_template: + system_prompt = system_prompt_template.format(actor_profile=actor_profile.model_dump()) + else: + system_prompt = system_prompt_template # already rendered # Combine tools all_tools = [get_conversation_goal_completion] @@ -207,6 +232,46 @@ def __init__( callback_handler=None, ) + def _build_structured_model(self, input_type: type[BaseModel] | None) -> type[SimulatorResult]: + """Return the :class:`SimulatorResult` subclass used by :meth:`act_structured`. + + When `input_type` is `None`, returns :class:`SimulatorResult` itself — + `message` stays typed as `Any` so the LLM is free to produce a string. + When `input_type` is set, returns a dynamic subclass that narrows + `message` to `input_type | None` so the LLM's tool-use schema enforces + the caller's agent-input shape. + + `input_type` is rejected if it is :class:`SimulatorResult` or a subclass + of it — that's the one nesting case where the outer simulator envelope + (`reasoning`, `stop`, `message`) is duplicated inside the payload + and the LLM's schema becomes ambiguous. + """ + if input_type is None: + return SimulatorResult + + if isinstance(input_type, type) and issubclass(input_type, SimulatorResult): + raise ValueError( + "input_type must describe the agent-under-test's input schema, not " + "SimulatorResult (or a subclass). SimulatorResult is the simulator's " + "return envelope and cannot be used as an agent payload." + ) + + return create_model( + "SimulatorResult", + __base__=SimulatorResult, + message=( + input_type | None, + Field( + None, + description=( + f"Structured message matching the agent's input schema " + f"({input_type.__name__}). Provide when stop=false. Set " + "to null when stop=true." + ), + ), + ), + ) + def _initialize_conversation(self): """ Initialize the conversation history with a greeting and initial query. @@ -232,13 +297,18 @@ def act(self, agent_message: str) -> AgentResult: profile and goal. The response includes reasoning about the actor's thought process and the actual message to send. + This method uses the legacy :class:`ActorResponse` schema and returns the + raw Strands :class:`AgentResult`. It is preserved for backwards compatibility + with existing callers. The `input_type` kwarg on `__init__` does **not** + affect this method — use :meth:`act_structured` to consume `input_type`. + Args: agent_message: The agent's response to react to (required). Returns: AgentResult containing the actor's structured response with: - structured_output.reasoning: Actor's internal reasoning - - structured_output.message: Actor's response message + - structured_output.message: Actor's response message (str) Example: ```python @@ -261,6 +331,62 @@ def act(self, agent_message: str) -> AgentResult: self._last_message = str(cast(ActorResponse, response.structured_output).message) return response + def act_structured(self, agent_message: str) -> SimulatorResult: + """ + Generate the next actor message and return a typed :class:`SimulatorResult`. + + The underlying Strands call uses a :class:`SimulatorResult` subclass as + its structured-output schema (narrowing `message` to `input_type` when + configured). The LLM produces `reasoning`, `stop`, and `message` + directly; the simulator populates `stop_reason` after the call based on + whether the actor signalled stop itself or the `max_turns` backstop + tripped. + + This method also keeps :attr:`_last_message` in sync with the returned + message so :meth:`has_next` works alongside :meth:`act_structured` in the + same conversation. + + Args: + agent_message: The agent's response to react to (required). + + Returns: + A :class:`SimulatorResult` with `message`, `reasoning`, `stop`, + and `stop_reason` populated. + + Example: + ```python + result = user_sim.act_structured(str(agent_response)) + if result.stop: + break + next_message = result.message + ``` + """ + response = self.agent(agent_message.strip(), structured_output_model=self._structured_model) + self._turn_count += 1 + + result = cast(SimulatorResult, response.structured_output) + + hit_max_turns = self._turn_count >= self._max_turns + if result.stop: + result.stop_reason = "goal_completed" + elif hit_max_turns: + result.stop = True + result.stop_reason = "max_turns" + elif result.message is None and self._input_type is not None: + # Guard: structured path, actor signalled continue but produced no + # message — treat as implicit goal_completed to avoid feeding None + # back to the agent under test. + logger.warning( + "Actor produced null message when stop=False; treating as goal_completed " + "(input_type=%s)", + self._input_type.__name__, + ) + result.stop = True + result.stop_reason = "goal_completed" + + self._last_message = str(result.message) if result.message is not None else "" + return result + def has_next(self) -> bool: """ Check if the conversation should continue. diff --git a/src/strands_evals/types/simulation/__init__.py b/src/strands_evals/types/simulation/__init__.py index 13a94b00..4d858836 100644 --- a/src/strands_evals/types/simulation/__init__.py +++ b/src/strands_evals/types/simulation/__init__.py @@ -1,5 +1,9 @@ """Data models for actor simulation.""" -from .actor import ActorProfile, ActorResponse +from .actor import ActorProfile, ActorResponse, SimulatorResult -__all__ = ["ActorProfile", "ActorResponse"] +__all__ = [ + "ActorProfile", + "ActorResponse", + "SimulatorResult", +] diff --git a/src/strands_evals/types/simulation/actor.py b/src/strands_evals/types/simulation/actor.py index d30be945..4e85d9c2 100644 --- a/src/strands_evals/types/simulation/actor.py +++ b/src/strands_evals/types/simulation/actor.py @@ -1,5 +1,6 @@ -from pydantic import BaseModel, Field -from typing_extensions import Any +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field class ActorProfile(BaseModel): @@ -25,6 +26,11 @@ class ActorResponse(BaseModel): """ Structured response from an actor. + Used by :meth:`ActorSimulator.act` as the LLM's structured-output schema. The + simulator keeps ``act()`` on this legacy shape so existing callers continue + to see ``message: str`` as a required field. New code should prefer + :meth:`ActorSimulator.act_structured` and :class:`SimulatorResult`. + Attributes: reasoning: Internal reasoning process for the response. message: The actual message content from the actor. @@ -32,3 +38,63 @@ class ActorResponse(BaseModel): reasoning: str = Field(..., description="Reasoning for the actor's response") message: str = Field(..., description="Message from the actor") + + +class SimulatorResult(BaseModel): + """ + Typed return value from :meth:`ActorSimulator.act_structured`. + + Used in two roles: + + - **As the LLM structured-output schema.** ``reasoning``, ``stop``, and + ``message`` are produced by the LLM via Strands' tool-use contract. + ``stop_reason`` is not part of the schema — the simulator fills it in + after the call, but it is kept on this model so the public return type + stays a single class. + + When ``ActorSimulator`` is given an ``input_type``, a dynamic subclass of + this model is built that narrows ``message`` to ``input_type | None`` so + the LLM's tool-use schema enforces the caller's agent-input shape. + + - **As the caller-facing result.** Callers of ``act_structured()`` receive + an instance of this class (or its dynamic subclass) with all four fields + populated. + + Attributes: + message: The actor's next message. An ``input_type`` instance when the + simulator was constructed with ``input_type``; a plain string or + ``None`` otherwise. ``None`` is expected when ``stop=True``. + reasoning: The actor's internal reasoning for this response. + stop: ``True`` when the actor signals the conversation should end + (either the goal was completed or ``max_turns`` was reached). + stop_reason: Why the conversation ended: ``"goal_completed"``, + ``"max_turns"``, or ``None`` while the conversation is still ongoing. + Populated by the simulator after the LLM call; not part of the + LLM-facing schema semantics even though the field exists on the + model. + """ + + model_config = ConfigDict(arbitrary_types_allowed=True) + + reasoning: str = Field(..., description="Reasoning for the actor's response") + stop: bool = Field( + False, + description=( + "Set to true when the conversation goal is met or the " + "conversation should end." + ), + ) + message: Any = Field( + None, + description=( + "The actor's next message to the agent. Provide when stop=false; " + "set to null when stop=true." + ), + ) + stop_reason: str | None = Field( + None, + description=( + "Populated by the simulator after the call. One of " + '"goal_completed", "max_turns", or None.' + ), + ) diff --git a/tests/strands_evals/simulation/test_actor_simulator.py b/tests/strands_evals/simulation/test_actor_simulator.py index c491a6f5..eb4ecbbe 100644 --- a/tests/strands_evals/simulation/test_actor_simulator.py +++ b/tests/strands_evals/simulation/test_actor_simulator.py @@ -3,11 +3,12 @@ from unittest.mock import MagicMock, patch import pytest +from pydantic import BaseModel from strands.agent.agent_result import AgentResult from strands_evals import Case from strands_evals.simulation import ActorSimulator -from strands_evals.types.simulation import ActorProfile, ActorResponse +from strands_evals.types.simulation import ActorProfile, ActorResponse, SimulatorResult @pytest.fixture @@ -211,3 +212,262 @@ def test_has_next_detects_stop_token(sample_actor_profile): # After act with stop token, has_next should return False simulator.act("Test message") assert simulator.has_next() is False + + +# --------------------------------------------------------------------------- +# input_type + act_structured() +# --------------------------------------------------------------------------- + + +class _AgentInput(BaseModel): + """Sample input schema for input_type tests.""" + + query: str + urgency: str = "normal" + + +def test_init_without_input_type_uses_simulator_result(sample_actor_profile): + """Without input_type, act_structured() hands SimulatorResult to the underlying agent.""" + simulator = ActorSimulator( + actor_profile=sample_actor_profile, + initial_query="Hello", + system_prompt_template="Test: {actor_profile}", + ) + + mock_response = MagicMock(spec=AgentResult) + mock_response.structured_output = SimulatorResult(reasoning="r", stop=False, message="hi") + simulator.agent = MagicMock(return_value=mock_response) + + simulator.act_structured("agent reply") + + assert simulator.agent.call_args[1]["structured_output_model"] is SimulatorResult + + +def test_init_with_input_type_narrows_message_schema(sample_actor_profile): + """With input_type, act_structured() hands a SimulatorResult subclass whose message is typed.""" + simulator = ActorSimulator( + actor_profile=sample_actor_profile, + initial_query="Hello", + system_prompt_template="Test: {actor_profile}", + input_type=_AgentInput, + ) + + typed_message = _AgentInput(query="hi") + captured_model: list = [] + + def _capture(agent_message, *, structured_output_model): + captured_model.append(structured_output_model) + mock_response = MagicMock(spec=AgentResult) + mock_response.structured_output = structured_output_model( + reasoning="r", stop=False, message=typed_message + ) + return mock_response + + simulator.agent = MagicMock(side_effect=_capture) + + simulator.act_structured("agent reply") + + (model_used,) = captured_model + # It must be a SimulatorResult subclass (so act_structured can return SimulatorResult). + assert issubclass(model_used, SimulatorResult) + # And it must schema-accept input_type instances on `message`. + msg_field = model_used.model_fields["message"] + assert msg_field.annotation == _AgentInput | None + + +def test_act_structured_passes_structured_model_to_agent(sample_actor_profile): + """act_structured() reuses the same structured model across turns (cached at construction).""" + simulator = ActorSimulator( + actor_profile=sample_actor_profile, + initial_query="Hello", + system_prompt_template="Test: {actor_profile}", + input_type=_AgentInput, + ) + + seen_models: list = [] + + def _capture(agent_message, *, structured_output_model): + seen_models.append(structured_output_model) + mock_response = MagicMock(spec=AgentResult) + mock_response.structured_output = structured_output_model( + reasoning="r", stop=False, message=_AgentInput(query="x") + ) + return mock_response + + simulator.agent = MagicMock(side_effect=_capture) + + simulator.act_structured("turn 1") + simulator.act_structured("turn 2") + + assert len(seen_models) == 2 + assert seen_models[0] is seen_models[1] # same cached class across turns + + +def test_act_structured_continuing_turn(sample_actor_profile): + """act_structured() returns SimulatorResult with stop=False for normal continuing turns.""" + simulator = ActorSimulator( + actor_profile=sample_actor_profile, + initial_query="Hello", + system_prompt_template="Test: {actor_profile}", + ) + + mock_response = MagicMock(spec=AgentResult) + mock_response.structured_output = SimulatorResult( + reasoning="thinking", stop=False, message="keep going" + ) + simulator.agent = MagicMock(return_value=mock_response) + + result = simulator.act_structured("agent reply") + + assert isinstance(result, SimulatorResult) + assert result.message == "keep going" + assert result.reasoning == "thinking" + assert result.stop is False + assert result.stop_reason is None + + +def test_act_structured_explicit_stop(sample_actor_profile): + """act_structured() records stop_reason='goal_completed' when the LLM sets stop=True.""" + simulator = ActorSimulator( + actor_profile=sample_actor_profile, + initial_query="Hello", + system_prompt_template="Test: {actor_profile}", + ) + + mock_response = MagicMock(spec=AgentResult) + mock_response.structured_output = SimulatorResult( + reasoning="done", stop=True, message=None + ) + simulator.agent = MagicMock(return_value=mock_response) + + result = simulator.act_structured("agent reply") + + assert result.stop is True + assert result.stop_reason == "goal_completed" + + +def test_act_structured_hits_max_turns(sample_actor_profile): + """act_structured() reports max_turns when the cap trips while LLM said stop=False.""" + simulator = ActorSimulator( + actor_profile=sample_actor_profile, + initial_query="Hello", + system_prompt_template="Test: {actor_profile}", + max_turns=1, + ) + + mock_response = MagicMock(spec=AgentResult) + mock_response.structured_output = SimulatorResult( + reasoning="r", stop=False, message="more please" + ) + simulator.agent = MagicMock(return_value=mock_response) + + result = simulator.act_structured("agent reply") + + assert result.stop is True + assert result.stop_reason == "max_turns" + + +def test_act_structured_input_type_returns_typed_message(sample_actor_profile): + """act_structured() with input_type returns the typed message instance.""" + simulator = ActorSimulator( + actor_profile=sample_actor_profile, + initial_query="Hello", + system_prompt_template="Test: {actor_profile}", + input_type=_AgentInput, + ) + + typed_message = _AgentInput(query="ship it", urgency="high") + mock_response = MagicMock(spec=AgentResult) + mock_response.structured_output = simulator._structured_model( + reasoning="r", stop=False, message=typed_message + ) + simulator.agent = MagicMock(return_value=mock_response) + + result = simulator.act_structured("agent reply") + + assert result.stop is False + assert result.stop_reason is None + assert isinstance(result.message, _AgentInput) + assert result.message.query == "ship it" + + +def test_act_structured_input_type_null_message_becomes_implicit_stop(sample_actor_profile): + """act_structured() with input_type treats null message + stop=False as implicit goal_completed.""" + simulator = ActorSimulator( + actor_profile=sample_actor_profile, + initial_query="Hello", + system_prompt_template="Test: {actor_profile}", + input_type=_AgentInput, + ) + + mock_response = MagicMock(spec=AgentResult) + mock_response.structured_output = simulator._structured_model( + reasoning="r", stop=False, message=None + ) + simulator.agent = MagicMock(return_value=mock_response) + + result = simulator.act_structured("agent reply") + + assert result.stop is True + assert result.stop_reason == "goal_completed" + assert result.message is None + + +def test_act_does_not_use_input_type(sample_actor_profile): + """act() ignores input_type and uses the legacy ActorResponse schema.""" + simulator = ActorSimulator( + actor_profile=sample_actor_profile, + initial_query="Hello", + system_prompt_template="Test: {actor_profile}", + input_type=_AgentInput, + ) + + mock_response = MagicMock(spec=AgentResult) + mock_response.structured_output = ActorResponse(reasoning="r", message="m") + simulator.agent = MagicMock(return_value=mock_response) + + simulator.act("Test message") + + assert simulator.agent.call_args[1]["structured_output_model"] == ActorResponse + + +def test_rejects_simulator_result_as_input_type(sample_actor_profile): + """Passing SimulatorResult (or a subclass) as input_type raises at __init__.""" + with pytest.raises(ValueError, match="SimulatorResult"): + ActorSimulator( + actor_profile=sample_actor_profile, + initial_query="Hello", + system_prompt_template="Test: {actor_profile}", + input_type=SimulatorResult, + ) + + + +def test_system_prompt_template_none_uses_default(sample_actor_profile): + """When system_prompt_template is None, the default template is rendered with the profile.""" + from strands_evals.simulation.prompt_templates.actor_system_prompt import ( + DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE, + ) + + simulator = ActorSimulator( + actor_profile=sample_actor_profile, + initial_query="Hello", + ) + + expected = DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE.format( + actor_profile=sample_actor_profile.model_dump() + ) + assert simulator.agent.system_prompt == expected + + +def test_system_prompt_template_prerendered_passes_through(sample_actor_profile): + """A template with no {actor_profile} placeholder is passed through verbatim.""" + prerendered = "You are simulating Alice, a beginner user. Keep replies short." + + simulator = ActorSimulator( + actor_profile=sample_actor_profile, + initial_query="Hello", + system_prompt_template=prerendered, + ) + + assert simulator.agent.system_prompt == prerendered From f50bcb7320df6f82bc36d8aed6c0a45a1c31ba6a Mon Sep 17 00:00:00 2001 From: poshinchen Date: Thu, 30 Apr 2026 14:23:12 -0400 Subject: [PATCH 2/6] Refactor the prompt usage for structured_output --- src/strands_evals/simulation/__init__.py | 10 +- .../simulation/actor_simulator.py | 106 +++++++++++------- .../prompt_templates/actor_system_prompt.py | 46 +++++++- .../types/simulation/__init__.py | 4 +- src/strands_evals/types/simulation/actor.py | 65 +++++------ .../simulation/test_actor_simulator.py | 82 ++++++++------ 6 files changed, 192 insertions(+), 121 deletions(-) diff --git a/src/strands_evals/simulation/__init__.py b/src/strands_evals/simulation/__init__.py index e05a2b9e..ccf21982 100644 --- a/src/strands_evals/simulation/__init__.py +++ b/src/strands_evals/simulation/__init__.py @@ -1,6 +1,10 @@ -from strands_evals.types.simulation import SimulatorResult +from strands_evals.types.simulation import ActorStructuredResponse from .actor_simulator import ActorSimulator +from .prompt_templates.actor_system_prompt import ( + DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE, + STRUCTURED_USER_SIMULATOR_PROMPT_TEMPLATE, +) from .tool_simulator import ToolSimulator # Alias for backward compatibility @@ -10,5 +14,7 @@ "ActorSimulator", "UserSimulator", "ToolSimulator", - "SimulatorResult", + "ActorStructuredResponse", + "DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE", + "STRUCTURED_USER_SIMULATOR_PROMPT_TEMPLATE", ] diff --git a/src/strands_evals/simulation/actor_simulator.py b/src/strands_evals/simulation/actor_simulator.py index 12a43c39..2d1b6b3a 100644 --- a/src/strands_evals/simulation/actor_simulator.py +++ b/src/strands_evals/simulation/actor_simulator.py @@ -10,9 +10,12 @@ from strands_evals.case import Case from strands_evals.simulation.profiles.actor_profile import DEFAULT_USER_PROFILE_SCHEMA from strands_evals.simulation.prompt_templates.actor_profile_extraction import ACTOR_PROFILE_PROMPT_TEMPLATE -from strands_evals.simulation.prompt_templates.actor_system_prompt import DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE +from strands_evals.simulation.prompt_templates.actor_system_prompt import ( + DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE, + STRUCTURED_USER_SIMULATOR_PROMPT_TEMPLATE, +) from strands_evals.simulation.tools.goal_completion import get_conversation_goal_completion -from strands_evals.types.simulation import ActorProfile, ActorResponse, SimulatorResult +from strands_evals.types.simulation import ActorProfile, ActorResponse, ActorStructuredResponse logger = logging.getLogger(__name__) @@ -139,7 +142,7 @@ def __init__( self, actor_profile: ActorProfile, initial_query: str, - system_prompt_template: str = DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE, + system_prompt_template: str | None = None, tools: list | None = None, model: str | None = None, max_turns: int = 10, @@ -159,22 +162,31 @@ def __init__( - A template containing the `{actor_profile}` placeholder, which is rendered via `str.format(actor_profile=...)` against the - actor's profile (legacy behavior, the default). + actor's profile. - An already-rendered system prompt string with no `{actor_profile}` placeholder, which is used verbatim. - Defaults to the built-in `DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE`. + When `None` (the default), the simulator picks one of two + built-in defaults. If `input_type` is set, uses + `STRUCTURED_USER_SIMULATOR_PROMPT_TEMPLATE`: the actor signals + end-of-conversation by setting `stop=true` on the structured + response. Otherwise uses `DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE`: + the actor signals end-of-conversation by emitting the + `` sentinel in the message text, which `has_next` + inspects. + + Pass an explicit template to override the auto-selection. tools: Additional tools available to the actor. Defaults to goal completion tool only. model: Model identifier for the underlying agent. Uses Strands default if None. max_turns: Maximum number of conversation turns before stopping (default: 10). input_type: Pydantic model class describing the agent-under-test's expected - input payload. Only affects :meth:`act_structured` — the LLM's + input payload. Only affects `act_structured`. The LLM's structured-output schema is narrowed so `message` is produced as an - `input_type` instance. :meth:`act` is unaffected and always uses the - legacy :class:`ActorResponse` schema. + `input_type` instance. `act` is unaffected and always uses the + `ActorResponse` schema. - Must describe what the agent under test accepts — not a simulator - type. Passing :class:`SimulatorResult` (or a subclass) raises + Must describe what the agent under test accepts, not a + simulator type. Passing `ActorStructuredResponse` (or a subclass) raises `ValueError` at construction time. Example: @@ -211,6 +223,19 @@ def __init__( self._input_type = input_type self._structured_model = self._build_structured_model(input_type) + # Auto-select the default template when the caller didn't provide one. + # A set `input_type` signals the actor produces structured messages; the + # structured template instructs the LLM to end the conversation via + # `stop=true` on the structured response, matching `act_structured`'s + # read path. Without `input_type`, the default template uses the + # `` sentinel that `has_next` inspects. + if system_prompt_template is None: + system_prompt_template = ( + STRUCTURED_USER_SIMULATOR_PROMPT_TEMPLATE + if input_type is not None + else DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE + ) + if "{actor_profile}" in system_prompt_template: system_prompt = system_prompt_template.format(actor_profile=actor_profile.model_dump()) else: @@ -232,33 +257,33 @@ def __init__( callback_handler=None, ) - def _build_structured_model(self, input_type: type[BaseModel] | None) -> type[SimulatorResult]: - """Return the :class:`SimulatorResult` subclass used by :meth:`act_structured`. + def _build_structured_model(self, input_type: type[BaseModel] | None) -> type[ActorStructuredResponse]: + """Return the `ActorStructuredResponse` subclass used by `act_structured`. - When `input_type` is `None`, returns :class:`SimulatorResult` itself — - `message` stays typed as `Any` so the LLM is free to produce a string. + When `input_type` is `None`, returns `ActorStructuredResponse` itself so + `message` stays typed as `Any` and the LLM is free to produce a string. When `input_type` is set, returns a dynamic subclass that narrows `message` to `input_type | None` so the LLM's tool-use schema enforces the caller's agent-input shape. - `input_type` is rejected if it is :class:`SimulatorResult` or a subclass - of it — that's the one nesting case where the outer simulator envelope - (`reasoning`, `stop`, `message`) is duplicated inside the payload - and the LLM's schema becomes ambiguous. + `input_type` is rejected if it is `ActorStructuredResponse` or a subclass of + it. That's the one nesting case where the outer simulator envelope + (`reasoning`, `stop`, `message`) is duplicated inside the payload and + the LLM's schema becomes ambiguous. """ if input_type is None: - return SimulatorResult + return ActorStructuredResponse - if isinstance(input_type, type) and issubclass(input_type, SimulatorResult): + if isinstance(input_type, type) and issubclass(input_type, ActorStructuredResponse): raise ValueError( "input_type must describe the agent-under-test's input schema, not " - "SimulatorResult (or a subclass). SimulatorResult is the simulator's " + "ActorStructuredResponse (or a subclass). ActorStructuredResponse is the simulator's " "return envelope and cannot be used as an agent payload." ) return create_model( - "SimulatorResult", - __base__=SimulatorResult, + "ActorStructuredResponse", + __base__=ActorStructuredResponse, message=( input_type | None, Field( @@ -297,10 +322,12 @@ def act(self, agent_message: str) -> AgentResult: profile and goal. The response includes reasoning about the actor's thought process and the actual message to send. - This method uses the legacy :class:`ActorResponse` schema and returns the - raw Strands :class:`AgentResult`. It is preserved for backwards compatibility - with existing callers. The `input_type` kwarg on `__init__` does **not** - affect this method — use :meth:`act_structured` to consume `input_type`. + Uses `ActorResponse` as the structured-output schema and returns the + raw Strands `AgentResult`. End-of-conversation is signalled by the + `` sentinel embedded in the message text and inspected by + `has_next`. The `input_type` kwarg on `__init__` does not affect this + method. Use `act_structured` when typed messages or a structured + `stop` field are needed. Args: agent_message: The agent's response to react to (required). @@ -331,27 +358,27 @@ def act(self, agent_message: str) -> AgentResult: self._last_message = str(cast(ActorResponse, response.structured_output).message) return response - def act_structured(self, agent_message: str) -> SimulatorResult: + def act_structured(self, agent_message: str) -> ActorStructuredResponse: """ - Generate the next actor message and return a typed :class:`SimulatorResult`. + Generate the next actor message and return a typed `ActorStructuredResponse`. - The underlying Strands call uses a :class:`SimulatorResult` subclass as - its structured-output schema (narrowing `message` to `input_type` when + The underlying Strands call uses a `ActorStructuredResponse` subclass as its + structured-output schema (narrowing `message` to `input_type` when configured). The LLM produces `reasoning`, `stop`, and `message` directly; the simulator populates `stop_reason` after the call based on whether the actor signalled stop itself or the `max_turns` backstop tripped. - This method also keeps :attr:`_last_message` in sync with the returned - message so :meth:`has_next` works alongside :meth:`act_structured` in the - same conversation. + This method also keeps `_last_message` in sync with the returned + message so `has_next` works alongside `act_structured` in the same + conversation. Args: agent_message: The agent's response to react to (required). Returns: - A :class:`SimulatorResult` with `message`, `reasoning`, `stop`, - and `stop_reason` populated. + A `ActorStructuredResponse` with `message`, `reasoning`, `stop`, and + `stop_reason` populated. Example: ```python @@ -364,7 +391,7 @@ def act_structured(self, agent_message: str) -> SimulatorResult: response = self.agent(agent_message.strip(), structured_output_model=self._structured_model) self._turn_count += 1 - result = cast(SimulatorResult, response.structured_output) + result = cast(ActorStructuredResponse, response.structured_output) hit_max_turns = self._turn_count >= self._max_turns if result.stop: @@ -374,11 +401,10 @@ def act_structured(self, agent_message: str) -> SimulatorResult: result.stop_reason = "max_turns" elif result.message is None and self._input_type is not None: # Guard: structured path, actor signalled continue but produced no - # message — treat as implicit goal_completed to avoid feeding None + # message. Treat as implicit goal_completed to avoid feeding None # back to the agent under test. logger.warning( - "Actor produced null message when stop=False; treating as goal_completed " - "(input_type=%s)", + "Actor produced null message when stop=False; treating as goal_completed (input_type=%s)", self._input_type.__name__, ) result.stop = True diff --git a/src/strands_evals/simulation/prompt_templates/actor_system_prompt.py b/src/strands_evals/simulation/prompt_templates/actor_system_prompt.py index 2b863fd0..4e138098 100644 --- a/src/strands_evals/simulation/prompt_templates/actor_system_prompt.py +++ b/src/strands_evals/simulation/prompt_templates/actor_system_prompt.py @@ -1,13 +1,24 @@ """ -Default system prompt for actor simulation. +Default system prompts for actor simulation. -This module contains the default system prompt that configures the actor's behavior, -communication style, and response protocols for realistic conversation simulation. +Two variants are provided, sharing the majority of their body. + +`DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE` is used when the actor signals +end-of-conversation with the `` sentinel in the message text. Paired +with `ActorSimulator.act` and `ActorSimulator.has_next`. + +`STRUCTURED_USER_SIMULATOR_PROMPT_TEMPLATE` is used when the actor signals +end-of-conversation by setting `stop=true` on the structured response. Paired +with `ActorSimulator.act_structured`. + +Both templates contain a single `{actor_profile}` placeholder. The simulator +renders them with `str.format(actor_profile=...)`. """ from textwrap import dedent -DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE = dedent("""## User Simulation +# Shared head of the prompt. Ends right before the Exit Conditions section. +_BODY_HEAD = dedent("""## User Simulation Core Identity: - You are simulating a user seeking assistance from an AI assistant @@ -24,7 +35,7 @@ - Maximum 2-3 sentences When assistant provides solutions/answers: - - Ask follow-ups, seek clarification, or express satisfaction. Do no deviate from the User Goal. + - Ask follow-ups, seek clarification, or express satisfaction. Do not deviate from the User Goal. - While following up, do not increase the conversation scope beyond your User Goal. Communication Rules: @@ -44,7 +55,10 @@ - Based on my user goal, I need to ... 9. Use the Exit Conditions strictly to stick to User Goal. 10. Use all relevant tools first to ground your responses, and then respond +""") +# Exit conditions + constraints + response format for the `` sentinel path. +_TOKEN_TAIL = dedent(""" Exit Conditions: 1. Use get_conversation_goal_completion tool to check if your User Goal is met. When your User Goal is met: - Just generate "" to terminate conversation @@ -62,3 +76,25 @@ Response Format: Generate ONLY the next SHORT message (1-3 sentences). No explanations, no solutions, no comprehensive information.""") + +# Exit conditions + constraints + response format for the structured `stop=true` path. +_STRUCTURED_TAIL = dedent(""" +Exit Conditions: +1. Use get_conversation_goal_completion tool to check if your User Goal is met. When your + User Goal is met, set stop=true in your structured response to end the conversation. +2. If conversation becomes unproductive or unsafe: + - Naturally steer back towards your User Goal + - If this becomes impossible, set stop=true in your structured response to end the conversation + +CRITICAL BEHAVIORAL CONSTRAINTS: +- You are ONLY a user seeking assistance, NEVER the one providing assistance. +- NEVER generate comprehensive responses, detailed plans, or extensive information. +- NEVER solve problems yourself - that's the assistant's job. Under no circumstances, + you can use your tools to solve your user goal/sub goals. +- If you find yourself writing more than 3 sentences, you're doing it wrong. + +Response Format: +Generate ONLY the next SHORT message (1-3 sentences). No explanations, no solutions, no comprehensive information.""") + +DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE = _BODY_HEAD + _TOKEN_TAIL +STRUCTURED_USER_SIMULATOR_PROMPT_TEMPLATE = _BODY_HEAD + _STRUCTURED_TAIL diff --git a/src/strands_evals/types/simulation/__init__.py b/src/strands_evals/types/simulation/__init__.py index 4d858836..58598c4f 100644 --- a/src/strands_evals/types/simulation/__init__.py +++ b/src/strands_evals/types/simulation/__init__.py @@ -1,9 +1,9 @@ """Data models for actor simulation.""" -from .actor import ActorProfile, ActorResponse, SimulatorResult +from .actor import ActorProfile, ActorResponse, ActorStructuredResponse __all__ = [ "ActorProfile", "ActorResponse", - "SimulatorResult", + "ActorStructuredResponse", ] diff --git a/src/strands_evals/types/simulation/actor.py b/src/strands_evals/types/simulation/actor.py index 4e85d9c2..24fd1e6f 100644 --- a/src/strands_evals/types/simulation/actor.py +++ b/src/strands_evals/types/simulation/actor.py @@ -24,12 +24,11 @@ class ActorProfile(BaseModel): class ActorResponse(BaseModel): """ - Structured response from an actor. + Structured response schema used by `ActorSimulator.act`. - Used by :meth:`ActorSimulator.act` as the LLM's structured-output schema. The - simulator keeps ``act()`` on this legacy shape so existing callers continue - to see ``message: str`` as a required field. New code should prefer - :meth:`ActorSimulator.act_structured` and :class:`SimulatorResult`. + The LLM fills in `reasoning` and `message`. The simulator inspects the + returned `message` for the `` sentinel to decide whether to + continue the conversation. Attributes: reasoning: Internal reasoning process for the response. @@ -40,35 +39,34 @@ class ActorResponse(BaseModel): message: str = Field(..., description="Message from the actor") -class SimulatorResult(BaseModel): +class ActorStructuredResponse(BaseModel): """ - Typed return value from :meth:`ActorSimulator.act_structured`. + Typed return value from `ActorSimulator.act_structured`. - Used in two roles: + Used in two roles. - - **As the LLM structured-output schema.** ``reasoning``, ``stop``, and - ``message`` are produced by the LLM via Strands' tool-use contract. - ``stop_reason`` is not part of the schema — the simulator fills it in - after the call, but it is kept on this model so the public return type - stays a single class. + As the LLM structured-output schema: `reasoning`, `stop`, and `message` + are produced by the LLM via Strands' tool-use contract. `stop_reason` is + not part of the schema. The simulator fills it in after the call, but it + is kept on this model so the public return type stays a single class. - When ``ActorSimulator`` is given an ``input_type``, a dynamic subclass of - this model is built that narrows ``message`` to ``input_type | None`` so - the LLM's tool-use schema enforces the caller's agent-input shape. + When `ActorSimulator` is given an `input_type`, a dynamic subclass of + this model is built that narrows `message` to `input_type | None`, so the + LLM's tool-use schema enforces the caller's agent-input shape. - - **As the caller-facing result.** Callers of ``act_structured()`` receive - an instance of this class (or its dynamic subclass) with all four fields - populated. + As the caller-facing result: callers of `act_structured()` receive an + instance of this class (or its dynamic subclass) with all four fields + populated. Attributes: - message: The actor's next message. An ``input_type`` instance when the - simulator was constructed with ``input_type``; a plain string or - ``None`` otherwise. ``None`` is expected when ``stop=True``. + message: The actor's next message. An `input_type` instance when the + simulator was constructed with `input_type`. A plain string or + `None` otherwise. `None` is expected when `stop=True`. reasoning: The actor's internal reasoning for this response. - stop: ``True`` when the actor signals the conversation should end - (either the goal was completed or ``max_turns`` was reached). - stop_reason: Why the conversation ended: ``"goal_completed"``, - ``"max_turns"``, or ``None`` while the conversation is still ongoing. + stop: `True` when the actor signals the conversation should end + (either the goal was completed or `max_turns` was reached). + stop_reason: Why the conversation ended. One of `"goal_completed"`, + `"max_turns"`, or `None` while the conversation is still ongoing. Populated by the simulator after the LLM call; not part of the LLM-facing schema semantics even though the field exists on the model. @@ -79,22 +77,13 @@ class SimulatorResult(BaseModel): reasoning: str = Field(..., description="Reasoning for the actor's response") stop: bool = Field( False, - description=( - "Set to true when the conversation goal is met or the " - "conversation should end." - ), + description=("Set to true when the conversation goal is met or the conversation should end."), ) message: Any = Field( None, - description=( - "The actor's next message to the agent. Provide when stop=false; " - "set to null when stop=true." - ), + description=("The actor's next message to the agent. Provide when stop=false; set to null when stop=true."), ) stop_reason: str | None = Field( None, - description=( - "Populated by the simulator after the call. One of " - '"goal_completed", "max_turns", or None.' - ), + description=('Populated by the simulator after the call. One of "goal_completed", "max_turns", or None.'), ) diff --git a/tests/strands_evals/simulation/test_actor_simulator.py b/tests/strands_evals/simulation/test_actor_simulator.py index eb4ecbbe..79247773 100644 --- a/tests/strands_evals/simulation/test_actor_simulator.py +++ b/tests/strands_evals/simulation/test_actor_simulator.py @@ -8,7 +8,7 @@ from strands_evals import Case from strands_evals.simulation import ActorSimulator -from strands_evals.types.simulation import ActorProfile, ActorResponse, SimulatorResult +from strands_evals.types.simulation import ActorProfile, ActorResponse, ActorStructuredResponse @pytest.fixture @@ -227,7 +227,7 @@ class _AgentInput(BaseModel): def test_init_without_input_type_uses_simulator_result(sample_actor_profile): - """Without input_type, act_structured() hands SimulatorResult to the underlying agent.""" + """Without input_type, act_structured() hands ActorStructuredResponse to the underlying agent.""" simulator = ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", @@ -235,16 +235,16 @@ def test_init_without_input_type_uses_simulator_result(sample_actor_profile): ) mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = SimulatorResult(reasoning="r", stop=False, message="hi") + mock_response.structured_output = ActorStructuredResponse(reasoning="r", stop=False, message="hi") simulator.agent = MagicMock(return_value=mock_response) simulator.act_structured("agent reply") - assert simulator.agent.call_args[1]["structured_output_model"] is SimulatorResult + assert simulator.agent.call_args[1]["structured_output_model"] is ActorStructuredResponse def test_init_with_input_type_narrows_message_schema(sample_actor_profile): - """With input_type, act_structured() hands a SimulatorResult subclass whose message is typed.""" + """With input_type, act_structured() hands a ActorStructuredResponse subclass whose message is typed.""" simulator = ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", @@ -258,9 +258,7 @@ def test_init_with_input_type_narrows_message_schema(sample_actor_profile): def _capture(agent_message, *, structured_output_model): captured_model.append(structured_output_model) mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = structured_output_model( - reasoning="r", stop=False, message=typed_message - ) + mock_response.structured_output = structured_output_model(reasoning="r", stop=False, message=typed_message) return mock_response simulator.agent = MagicMock(side_effect=_capture) @@ -268,8 +266,8 @@ def _capture(agent_message, *, structured_output_model): simulator.act_structured("agent reply") (model_used,) = captured_model - # It must be a SimulatorResult subclass (so act_structured can return SimulatorResult). - assert issubclass(model_used, SimulatorResult) + # It must be a ActorStructuredResponse subclass (so act_structured can return ActorStructuredResponse). + assert issubclass(model_used, ActorStructuredResponse) # And it must schema-accept input_type instances on `message`. msg_field = model_used.model_fields["message"] assert msg_field.annotation == _AgentInput | None @@ -304,7 +302,7 @@ def _capture(agent_message, *, structured_output_model): def test_act_structured_continuing_turn(sample_actor_profile): - """act_structured() returns SimulatorResult with stop=False for normal continuing turns.""" + """act_structured() returns ActorStructuredResponse with stop=False for normal continuing turns.""" simulator = ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", @@ -312,14 +310,12 @@ def test_act_structured_continuing_turn(sample_actor_profile): ) mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = SimulatorResult( - reasoning="thinking", stop=False, message="keep going" - ) + mock_response.structured_output = ActorStructuredResponse(reasoning="thinking", stop=False, message="keep going") simulator.agent = MagicMock(return_value=mock_response) result = simulator.act_structured("agent reply") - assert isinstance(result, SimulatorResult) + assert isinstance(result, ActorStructuredResponse) assert result.message == "keep going" assert result.reasoning == "thinking" assert result.stop is False @@ -335,9 +331,7 @@ def test_act_structured_explicit_stop(sample_actor_profile): ) mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = SimulatorResult( - reasoning="done", stop=True, message=None - ) + mock_response.structured_output = ActorStructuredResponse(reasoning="done", stop=True, message=None) simulator.agent = MagicMock(return_value=mock_response) result = simulator.act_structured("agent reply") @@ -356,9 +350,7 @@ def test_act_structured_hits_max_turns(sample_actor_profile): ) mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = SimulatorResult( - reasoning="r", stop=False, message="more please" - ) + mock_response.structured_output = ActorStructuredResponse(reasoning="r", stop=False, message="more please") simulator.agent = MagicMock(return_value=mock_response) result = simulator.act_structured("agent reply") @@ -378,9 +370,7 @@ def test_act_structured_input_type_returns_typed_message(sample_actor_profile): typed_message = _AgentInput(query="ship it", urgency="high") mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = simulator._structured_model( - reasoning="r", stop=False, message=typed_message - ) + mock_response.structured_output = simulator._structured_model(reasoning="r", stop=False, message=typed_message) simulator.agent = MagicMock(return_value=mock_response) result = simulator.act_structured("agent reply") @@ -401,9 +391,7 @@ def test_act_structured_input_type_null_message_becomes_implicit_stop(sample_act ) mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = simulator._structured_model( - reasoning="r", stop=False, message=None - ) + mock_response.structured_output = simulator._structured_model(reasoning="r", stop=False, message=None) simulator.agent = MagicMock(return_value=mock_response) result = simulator.act_structured("agent reply") @@ -432,17 +420,16 @@ def test_act_does_not_use_input_type(sample_actor_profile): def test_rejects_simulator_result_as_input_type(sample_actor_profile): - """Passing SimulatorResult (or a subclass) as input_type raises at __init__.""" - with pytest.raises(ValueError, match="SimulatorResult"): + """Passing ActorStructuredResponse (or a subclass) as input_type raises at __init__.""" + with pytest.raises(ValueError, match="ActorStructuredResponse"): ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", system_prompt_template="Test: {actor_profile}", - input_type=SimulatorResult, + input_type=ActorStructuredResponse, ) - def test_system_prompt_template_none_uses_default(sample_actor_profile): """When system_prompt_template is None, the default template is rendered with the profile.""" from strands_evals.simulation.prompt_templates.actor_system_prompt import ( @@ -454,9 +441,7 @@ def test_system_prompt_template_none_uses_default(sample_actor_profile): initial_query="Hello", ) - expected = DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE.format( - actor_profile=sample_actor_profile.model_dump() - ) + expected = DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE.format(actor_profile=sample_actor_profile.model_dump()) assert simulator.agent.system_prompt == expected @@ -471,3 +456,32 @@ def test_system_prompt_template_prerendered_passes_through(sample_actor_profile) ) assert simulator.agent.system_prompt == prerendered + + +def test_system_prompt_template_autopicks_structured_when_input_type_set(sample_actor_profile): + """When input_type is set and no template is given, the structured-stop template is used.""" + from strands_evals.simulation.prompt_templates.actor_system_prompt import ( + STRUCTURED_USER_SIMULATOR_PROMPT_TEMPLATE, + ) + + simulator = ActorSimulator( + actor_profile=sample_actor_profile, + initial_query="Hello", + input_type=_AgentInput, + ) + + expected = STRUCTURED_USER_SIMULATOR_PROMPT_TEMPLATE.format(actor_profile=sample_actor_profile.model_dump()) + assert simulator.agent.system_prompt == expected + + +def test_explicit_template_overrides_autopick(sample_actor_profile): + """Explicit system_prompt_template wins even when input_type is set.""" + custom = "Custom prompt for {actor_profile}" + simulator = ActorSimulator( + actor_profile=sample_actor_profile, + initial_query="Hello", + system_prompt_template=custom, + input_type=_AgentInput, + ) + + assert simulator.agent.system_prompt == custom.format(actor_profile=sample_actor_profile.model_dump()) From 55bf5e195d5ed107a6b2e9bbf49b56ea6abbd439 Mon Sep 17 00:00:00 2001 From: poshinchen Date: Thu, 7 May 2026 14:53:52 -0400 Subject: [PATCH 3/6] breaking change to merge the function --- src/strands_evals/simulation/__init__.py | 10 +- .../simulation/actor_simulator.py | 276 ++++-------------- .../prompt_templates/actor_system_prompt.py | 47 +-- src/strands_evals/types/__init__.py | 1 + .../types/simulation/__init__.py | 4 +- src/strands_evals/types/simulation/actor.py | 71 ++--- .../simulation/test_actor_simulator.py | 271 +++++++---------- 7 files changed, 203 insertions(+), 477 deletions(-) diff --git a/src/strands_evals/simulation/__init__.py b/src/strands_evals/simulation/__init__.py index ccf21982..84a8ae68 100644 --- a/src/strands_evals/simulation/__init__.py +++ b/src/strands_evals/simulation/__init__.py @@ -1,20 +1,16 @@ -from strands_evals.types.simulation import ActorStructuredResponse +from strands_evals.types.simulation import ActorOutputBase from .actor_simulator import ActorSimulator -from .prompt_templates.actor_system_prompt import ( - DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE, - STRUCTURED_USER_SIMULATOR_PROMPT_TEMPLATE, -) +from .prompt_templates.actor_system_prompt import DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE from .tool_simulator import ToolSimulator # Alias for backward compatibility UserSimulator = ActorSimulator __all__ = [ + "ActorOutputBase", "ActorSimulator", "UserSimulator", "ToolSimulator", - "ActorStructuredResponse", "DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE", - "STRUCTURED_USER_SIMULATOR_PROMPT_TEMPLATE", ] diff --git a/src/strands_evals/simulation/actor_simulator.py b/src/strands_evals/simulation/actor_simulator.py index 2d1b6b3a..6c2955fb 100644 --- a/src/strands_evals/simulation/actor_simulator.py +++ b/src/strands_evals/simulation/actor_simulator.py @@ -1,28 +1,22 @@ import logging import random -from pydantic import BaseModel, Field, create_model from strands import Agent from strands.agent.agent_result import AgentResult from strands.types.content import Message -from typing_extensions import cast from strands_evals.case import Case from strands_evals.simulation.profiles.actor_profile import DEFAULT_USER_PROFILE_SCHEMA from strands_evals.simulation.prompt_templates.actor_profile_extraction import ACTOR_PROFILE_PROMPT_TEMPLATE -from strands_evals.simulation.prompt_templates.actor_system_prompt import ( - DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE, - STRUCTURED_USER_SIMULATOR_PROMPT_TEMPLATE, -) +from strands_evals.simulation.prompt_templates.actor_system_prompt import DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE from strands_evals.simulation.tools.goal_completion import get_conversation_goal_completion -from strands_evals.types.simulation import ActorProfile, ActorResponse, ActorStructuredResponse +from strands_evals.types.simulation import ActorOutputBase, ActorProfile, ActorResponse logger = logging.getLogger(__name__) class ActorSimulator: - """ - Simulates an actor in multi-turn conversations for agent evaluation. + """Simulates an actor in multi-turn conversations for agent evaluation. ActorSimulator wraps a Strands Agent configured to behave as a specific actor (typically a user) in conversation scenarios. It maintains conversation history, @@ -53,8 +47,7 @@ def from_case_for_user_simulator( model: str | None = None, max_turns: int = 10, ) -> "ActorSimulator": - """ - Create an ActorSimulator configured as a user simulator from a test case. + """Create an ActorSimulator configured as a user simulator from a test case. Generates a realistic user profile and goal from case.input and optionally case.metadata["task_description"], then configures the simulator with @@ -75,22 +68,14 @@ def from_case_for_user_simulator( from strands_evals import Case, ActorSimulator from strands import Agent - # Create test case case = Case( input="I need to book a flight to Paris", metadata={"task_description": "Flight booking confirmed"} ) - # Create user simulator - user_sim = ActorSimulator.from_case_for_user_simulator( - case=case, - max_turns=5 - ) - - # Create target agent to evaluate + user_sim = ActorSimulator.from_case_for_user_simulator(case=case, max_turns=5) agent = Agent(system_prompt="You are a travel assistant.") - # Run conversation user_message = case.input while user_sim.has_next(): agent_response = agent(user_message) @@ -100,9 +85,6 @@ def from_case_for_user_simulator( """ actor_profile = cls._generate_profile_from_case(case) - if system_prompt_template is None: - system_prompt_template = DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE - return cls( actor_profile=actor_profile, initial_query=case.input, @@ -114,10 +96,8 @@ def from_case_for_user_simulator( @staticmethod def _generate_profile_from_case(case: Case) -> ActorProfile: - """ - Generate user profile from case. + """Generate user profile from case. - Private helper for from_case_for_user_simulator factory method. Uses case.input and optionally case.metadata["task_description"] if present. Args: @@ -146,11 +126,8 @@ def __init__( tools: list | None = None, model: str | None = None, max_turns: int = 10, - *, - input_type: type[BaseModel] | None = None, ): - """ - Initialize an ActorSimulator with profile and goal. + """Initialize an ActorSimulator with profile and goal. Use this constructor when you have a pre-defined ActorProfile. For automatic profile generation from test cases, use from_case_for_user_simulator() instead. @@ -166,45 +143,27 @@ def __init__( - An already-rendered system prompt string with no `{actor_profile}` placeholder, which is used verbatim. - When `None` (the default), the simulator picks one of two - built-in defaults. If `input_type` is set, uses - `STRUCTURED_USER_SIMULATOR_PROMPT_TEMPLATE`: the actor signals - end-of-conversation by setting `stop=true` on the structured - response. Otherwise uses `DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE`: - the actor signals end-of-conversation by emitting the - `` sentinel in the message text, which `has_next` - inspects. + When `None` (the default), uses + `DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE` which instructs the LLM + to set `stop=true` on the structured response when the + conversation goal is met. - Pass an explicit template to override the auto-selection. + Pass an explicit template to override. tools: Additional tools available to the actor. Defaults to goal completion tool only. model: Model identifier for the underlying agent. Uses Strands default if None. max_turns: Maximum number of conversation turns before stopping (default: 10). - input_type: Pydantic model class describing the agent-under-test's expected - input payload. Only affects `act_structured`. The LLM's - structured-output schema is narrowed so `message` is produced as an - `input_type` instance. `act` is unaffected and always uses the - `ActorResponse` schema. - - Must describe what the agent under test accepts, not a - simulator type. Passing `ActorStructuredResponse` (or a subclass) raises - `ValueError` at construction time. Example: ```python from strands_evals.simulation import ActorSimulator from strands_evals.types.simulation import ActorProfile - # Define custom actor profile profile = ActorProfile( - traits={ - "expertise_level": "expert", - "communication_style": "technical" - }, + traits={"expertise_level": "expert", "communication_style": "technical"}, context="A software engineer debugging a production issue.", actor_goal="Identify and resolve the memory leak." ) - # Create simulator with custom profile simulator = ActorSimulator( actor_profile=profile, initial_query="Our service is experiencing high memory usage.", @@ -217,38 +176,24 @@ def __init__( self.initial_query = initial_query self.conversation_history: list[Message] = [] self.model_id = model + self.stop = False self._turn_count = 0 - self._last_message = "" self._max_turns = max_turns - self._input_type = input_type - self._structured_model = self._build_structured_model(input_type) - - # Auto-select the default template when the caller didn't provide one. - # A set `input_type` signals the actor produces structured messages; the - # structured template instructs the LLM to end the conversation via - # `stop=true` on the structured response, matching `act_structured`'s - # read path. Without `input_type`, the default template uses the - # `` sentinel that `has_next` inspects. + if system_prompt_template is None: - system_prompt_template = ( - STRUCTURED_USER_SIMULATOR_PROMPT_TEMPLATE - if input_type is not None - else DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE - ) + system_prompt_template = DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE if "{actor_profile}" in system_prompt_template: system_prompt = system_prompt_template.format(actor_profile=actor_profile.model_dump()) else: - system_prompt = system_prompt_template # already rendered + system_prompt = system_prompt_template - # Combine tools all_tools = [get_conversation_goal_completion] if tools: all_tools.extend(tools) self._initialize_conversation() - # Create agent self.agent = Agent( system_prompt=system_prompt, messages=self.conversation_history, @@ -257,55 +202,8 @@ def __init__( callback_handler=None, ) - def _build_structured_model(self, input_type: type[BaseModel] | None) -> type[ActorStructuredResponse]: - """Return the `ActorStructuredResponse` subclass used by `act_structured`. - - When `input_type` is `None`, returns `ActorStructuredResponse` itself so - `message` stays typed as `Any` and the LLM is free to produce a string. - When `input_type` is set, returns a dynamic subclass that narrows - `message` to `input_type | None` so the LLM's tool-use schema enforces - the caller's agent-input shape. - - `input_type` is rejected if it is `ActorStructuredResponse` or a subclass of - it. That's the one nesting case where the outer simulator envelope - (`reasoning`, `stop`, `message`) is duplicated inside the payload and - the LLM's schema becomes ambiguous. - """ - if input_type is None: - return ActorStructuredResponse - - if isinstance(input_type, type) and issubclass(input_type, ActorStructuredResponse): - raise ValueError( - "input_type must describe the agent-under-test's input schema, not " - "ActorStructuredResponse (or a subclass). ActorStructuredResponse is the simulator's " - "return envelope and cannot be used as an agent payload." - ) - - return create_model( - "ActorStructuredResponse", - __base__=ActorStructuredResponse, - message=( - input_type | None, - Field( - None, - description=( - f"Structured message matching the agent's input schema " - f"({input_type.__name__}). Provide when stop=false. Set " - "to null when stop=true." - ), - ), - ), - ) - def _initialize_conversation(self): - """ - Initialize the conversation history with a greeting and initial query. - - Sets up the conversation with a random greeting from the assistant followed - by the actor's initial query. This establishes the conversation context. - - Note: This is a private method called during initialization. - """ + """Initialize the conversation history with a greeting and initial query.""" selected_greeting = random.choice(self.INITIAL_GREETINGS) greeting_message = {"role": "user", "content": [{"text": selected_greeting}]} self.conversation_history.append(greeting_message) @@ -313,113 +211,75 @@ def _initialize_conversation(self): initial_query_message = {"role": "assistant", "content": [{"text": self.initial_query.strip()}]} self.conversation_history.append(initial_query_message) - def act(self, agent_message: str) -> AgentResult: - """ - Generate the next actor message in the conversation. + def act( + self, + agent_message: str, + *, + structured_output_model: type[ActorOutputBase] | None = None, + ) -> AgentResult: + """Generate the next actor message in the conversation. Processes the agent's message and generates a contextually appropriate - response from the actor's perspective, maintaining consistency with the actor's - profile and goal. The response includes reasoning about the actor's thought - process and the actual message to send. + response from the actor's perspective. The response is returned as an + `AgentResult` whose `structured_output` is an `ActorResponse` (or the + caller-provided `structured_output_model`). - Uses `ActorResponse` as the structured-output schema and returns the - raw Strands `AgentResult`. End-of-conversation is signalled by the - `` sentinel embedded in the message text and inspected by - `has_next`. The `input_type` kwarg on `__init__` does not affect this - method. Use `act_structured` when typed messages or a structured - `stop` field are needed. + The provided model must subclass `ActorOutputBase` and have a `message` + field. A `TypeError` is raised if not a subclass, and `ValueError` if + `message` is missing. Args: - agent_message: The agent's response to react to (required). + agent_message: The agent's response to react to. + structured_output_model: Optional Pydantic model to use instead of + `ActorResponse`. Must subclass `ActorOutputBase` and include a + `message` field. Returns: - AgentResult containing the actor's structured response with: - - structured_output.reasoning: Actor's internal reasoning - - structured_output.message: Actor's response message (str) + AgentResult with `structured_output` set to either `ActorResponse` + or the caller-provided model instance. Example: ```python - # Agent responds to user - agent_response = agent("I need help booking a flight") - - # User simulator generates next message - user_result = user_sim.act(str(agent_response)) - - # Access the response - print(user_result.structured_output.reasoning) # Why the actor responded this way - print(user_result.structured_output.message) # The actual message - - # Continue conversation - next_message = str(user_result.structured_output.message) + # Default usage + result = simulator.act(str(agent_response)) + response = result.structured_output # ActorResponse + print(response.message) + + # Custom structured output + result = simulator.act(str(agent_response), structured_output_model=MySchema) + my_output = result.structured_output # MySchema instance ``` """ - response = self.agent(agent_message.strip(), structured_output_model=ActorResponse) - self._turn_count += 1 - self._last_message = str(cast(ActorResponse, response.structured_output).message) - return response - - def act_structured(self, agent_message: str) -> ActorStructuredResponse: - """ - Generate the next actor message and return a typed `ActorStructuredResponse`. - - The underlying Strands call uses a `ActorStructuredResponse` subclass as its - structured-output schema (narrowing `message` to `input_type` when - configured). The LLM produces `reasoning`, `stop`, and `message` - directly; the simulator populates `stop_reason` after the call based on - whether the actor signalled stop itself or the `max_turns` backstop - tripped. - - This method also keeps `_last_message` in sync with the returned - message so `has_next` works alongside `act_structured` in the same - conversation. + model = structured_output_model or ActorResponse - Args: - agent_message: The agent's response to react to (required). + if not issubclass(model, ActorOutputBase): + raise TypeError(f"structured_output_model must be a subclass of ActorOutputBase, got {model.__name__}.") - Returns: - A `ActorStructuredResponse` with `message`, `reasoning`, `stop`, and - `stop_reason` populated. + if "message" not in model.model_fields: + raise ValueError(f"structured_output_model {model.__name__} must have a 'message' field.") - Example: - ```python - result = user_sim.act_structured(str(agent_response)) - if result.stop: - break - next_message = result.message - ``` - """ - response = self.agent(agent_message.strip(), structured_output_model=self._structured_model) + response = self.agent(agent_message.strip(), structured_output_model=model) self._turn_count += 1 - result = cast(ActorStructuredResponse, response.structured_output) + result = response.structured_output - hit_max_turns = self._turn_count >= self._max_turns if result.stop: - result.stop_reason = "goal_completed" - elif hit_max_turns: + self.stop = True + if hasattr(result, "stop_reason"): + result.stop_reason = "goal_completed" + elif self._turn_count >= self._max_turns: result.stop = True - result.stop_reason = "max_turns" - elif result.message is None and self._input_type is not None: - # Guard: structured path, actor signalled continue but produced no - # message. Treat as implicit goal_completed to avoid feeding None - # back to the agent under test. - logger.warning( - "Actor produced null message when stop=False; treating as goal_completed (input_type=%s)", - self._input_type.__name__, - ) - result.stop = True - result.stop_reason = "goal_completed" + self.stop = True + if hasattr(result, "stop_reason"): + result.stop_reason = "max_turns" - self._last_message = str(result.message) if result.message is not None else "" - return result + return response def has_next(self) -> bool: - """ - Check if the conversation should continue. + """Check if the conversation should continue. - Returns False if the stop token () is present in the last message or if - the maximum number of turns has been reached. Use this in a loop to control - multi-turn conversations. + Returns False if the actor signalled stop or if the maximum number of + turns has been reached. Returns: True if the conversation should continue, False otherwise. @@ -427,18 +287,10 @@ def has_next(self) -> bool: Example: ```python user_message = case.input - - # Continue conversation until completion while user_sim.has_next(): agent_response = agent(user_message) user_result = user_sim.act(str(agent_response)) user_message = str(user_result.structured_output.message) - - # Conversation ended either by: - # - Actor including token in message - # - Reaching max_turns limit ``` """ - if self._turn_count >= self._max_turns: - return False - return "" not in self._last_message + return not self.stop diff --git a/src/strands_evals/simulation/prompt_templates/actor_system_prompt.py b/src/strands_evals/simulation/prompt_templates/actor_system_prompt.py index 4e138098..dfc62ad2 100644 --- a/src/strands_evals/simulation/prompt_templates/actor_system_prompt.py +++ b/src/strands_evals/simulation/prompt_templates/actor_system_prompt.py @@ -1,24 +1,15 @@ -""" -Default system prompts for actor simulation. - -Two variants are provided, sharing the majority of their body. - -`DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE` is used when the actor signals -end-of-conversation with the `` sentinel in the message text. Paired -with `ActorSimulator.act` and `ActorSimulator.has_next`. +"""Default system prompt for actor simulation. -`STRUCTURED_USER_SIMULATOR_PROMPT_TEMPLATE` is used when the actor signals -end-of-conversation by setting `stop=true` on the structured response. Paired -with `ActorSimulator.act_structured`. +The template instructs the actor to signal end-of-conversation by setting +`stop=true` on the structured response. -Both templates contain a single `{actor_profile}` placeholder. The simulator -renders them with `str.format(actor_profile=...)`. +The template contains a single `{actor_profile}` placeholder. The simulator +renders it with `str.format(actor_profile=...)`. """ from textwrap import dedent -# Shared head of the prompt. Ends right before the Exit Conditions section. -_BODY_HEAD = dedent("""## User Simulation +DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE = dedent("""## User Simulation Core Identity: - You are simulating a user seeking assistance from an AI assistant @@ -55,30 +46,7 @@ - Based on my user goal, I need to ... 9. Use the Exit Conditions strictly to stick to User Goal. 10. Use all relevant tools first to ground your responses, and then respond -""") - -# Exit conditions + constraints + response format for the `` sentinel path. -_TOKEN_TAIL = dedent(""" -Exit Conditions: -1. Use get_conversation_goal_completion tool to check if your User Goal is met. When your User Goal is met: - - Just generate "" to terminate conversation -2. If conversation becomes unproductive or unsafe: - - Naturally steer back towards your User Goal - - If this becomes impossible, just generate: "" to terminate conversation - -CRITICAL BEHAVIORAL CONSTRAINTS: -- You are ONLY a user seeking assistance, NEVER the one providing assistance. -- NEVER generate comprehensive responses, detailed plans, or extensive information. -- NEVER solve problems yourself - that's the assistant's job. Under no circumstances, - you can use your tools to solve your user goal/sub goals. -- If you find yourself writing more than 3 sentences, you're doing it wrong. -- Generate only "" to terminate conversation -Response Format: -Generate ONLY the next SHORT message (1-3 sentences). No explanations, no solutions, no comprehensive information.""") - -# Exit conditions + constraints + response format for the structured `stop=true` path. -_STRUCTURED_TAIL = dedent(""" Exit Conditions: 1. Use get_conversation_goal_completion tool to check if your User Goal is met. When your User Goal is met, set stop=true in your structured response to end the conversation. @@ -95,6 +63,3 @@ Response Format: Generate ONLY the next SHORT message (1-3 sentences). No explanations, no solutions, no comprehensive information.""") - -DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE = _BODY_HEAD + _TOKEN_TAIL -STRUCTURED_USER_SIMULATOR_PROMPT_TEMPLATE = _BODY_HEAD + _STRUCTURED_TAIL diff --git a/src/strands_evals/types/__init__.py b/src/strands_evals/types/__init__.py index 8d38173a..b9c29068 100644 --- a/src/strands_evals/types/__init__.py +++ b/src/strands_evals/types/__init__.py @@ -17,6 +17,7 @@ "TaskOutput", "EvaluationData", "EvaluationOutput", + "ActorOutputBase", "ActorProfile", "ActorResponse", "InputT", diff --git a/src/strands_evals/types/simulation/__init__.py b/src/strands_evals/types/simulation/__init__.py index 58598c4f..8a815e70 100644 --- a/src/strands_evals/types/simulation/__init__.py +++ b/src/strands_evals/types/simulation/__init__.py @@ -1,9 +1,9 @@ """Data models for actor simulation.""" -from .actor import ActorProfile, ActorResponse, ActorStructuredResponse +from .actor import ActorOutputBase, ActorProfile, ActorResponse __all__ = [ + "ActorOutputBase", "ActorProfile", "ActorResponse", - "ActorStructuredResponse", ] diff --git a/src/strands_evals/types/simulation/actor.py b/src/strands_evals/types/simulation/actor.py index 24fd1e6f..33e19fc7 100644 --- a/src/strands_evals/types/simulation/actor.py +++ b/src/strands_evals/types/simulation/actor.py @@ -4,8 +4,7 @@ class ActorProfile(BaseModel): - """ - Profile for actor simulation. + """Profile for actor simulation. Attributes: traits: Dictionary of actor characteristics and attributes. @@ -22,68 +21,50 @@ class ActorProfile(BaseModel): ) -class ActorResponse(BaseModel): - """ - Structured response schema used by `ActorSimulator.act`. +class ActorOutputBase(BaseModel): + """Base class for actor simulator structured output models. - The LLM fills in `reasoning` and `message`. The simulator inspects the - returned `message` for the `` sentinel to decide whether to - continue the conversation. + Any model passed as `structured_output_model` to `ActorSimulator.act()` + must subclass this. The simulator reads `stop` and `reasoning` from the + result to manage conversation state. + + Subclasses must also define a `message` field (of any type) — this is + validated at runtime by the simulator rather than enforced here, so + subclasses are free to type `message` however they need. Attributes: reasoning: Internal reasoning process for the response. - message: The actual message content from the actor. + stop: `True` when the actor signals the conversation should end. """ reasoning: str = Field(..., description="Reasoning for the actor's response") - message: str = Field(..., description="Message from the actor") - - -class ActorStructuredResponse(BaseModel): - """ - Typed return value from `ActorSimulator.act_structured`. - - Used in two roles. + stop: bool = Field( + False, + description="Set to true when the conversation goal is met or the conversation should end.", + ) - As the LLM structured-output schema: `reasoning`, `stop`, and `message` - are produced by the LLM via Strands' tool-use contract. `stop_reason` is - not part of the schema. The simulator fills it in after the call, but it - is kept on this model so the public return type stays a single class. - When `ActorSimulator` is given an `input_type`, a dynamic subclass of - this model is built that narrows `message` to `input_type | None`, so the - LLM's tool-use schema enforces the caller's agent-input shape. +class ActorResponse(ActorOutputBase): + """Default structured response from the actor simulator. - As the caller-facing result: callers of `act_structured()` receive an - instance of this class (or its dynamic subclass) with all four fields - populated. + Used as the LLM structured-output schema for `ActorSimulator.act` when no + custom `structured_output_model` is provided. The LLM fills `reasoning`, + `stop`, and `message`. The simulator fills `stop_reason` after the LLM call. Attributes: - message: The actor's next message. An `input_type` instance when the - simulator was constructed with `input_type`. A plain string or - `None` otherwise. `None` is expected when `stop=True`. - reasoning: The actor's internal reasoning for this response. - stop: `True` when the actor signals the conversation should end - (either the goal was completed or `max_turns` was reached). + message: The actual message content from the actor. `None` when `stop=True`. stop_reason: Why the conversation ended. One of `"goal_completed"`, - `"max_turns"`, or `None` while the conversation is still ongoing. - Populated by the simulator after the LLM call; not part of the - LLM-facing schema semantics even though the field exists on the - model. + `"max_turns"`, or `None` while ongoing. Populated by the simulator + after the LLM call. """ model_config = ConfigDict(arbitrary_types_allowed=True) - reasoning: str = Field(..., description="Reasoning for the actor's response") - stop: bool = Field( - False, - description=("Set to true when the conversation goal is met or the conversation should end."), - ) - message: Any = Field( + message: str | None = Field( None, - description=("The actor's next message to the agent. Provide when stop=false; set to null when stop=true."), + description="The actor's next message to the agent. Provide when stop=false; set to null when stop=true.", ) stop_reason: str | None = Field( None, - description=('Populated by the simulator after the call. One of "goal_completed", "max_turns", or None.'), + description='Populated by the simulator after the call. One of "goal_completed", "max_turns", or None.', ) diff --git a/tests/strands_evals/simulation/test_actor_simulator.py b/tests/strands_evals/simulation/test_actor_simulator.py index 79247773..c1cf3299 100644 --- a/tests/strands_evals/simulation/test_actor_simulator.py +++ b/tests/strands_evals/simulation/test_actor_simulator.py @@ -8,7 +8,7 @@ from strands_evals import Case from strands_evals.simulation import ActorSimulator -from strands_evals.types.simulation import ActorProfile, ActorResponse, ActorStructuredResponse +from strands_evals.types.simulation import ActorOutputBase, ActorProfile, ActorResponse @pytest.fixture @@ -69,7 +69,6 @@ def test_initialize_conversation(sample_actor_profile): @patch("strands_evals.simulation.actor_simulator.Agent") def test_from_case_for_user_simulator(mock_agent_class, sample_case): """Test factory method creates simulator from case.""" - # Mock the profile generation agent mock_profile_agent = MagicMock() mock_profile = ActorProfile( traits={"test": "trait"}, @@ -80,10 +79,8 @@ def test_from_case_for_user_simulator(mock_agent_class, sample_case): mock_result.structured_output = mock_profile mock_profile_agent.return_value = mock_result - # Mock the main simulator agent mock_simulator_agent = MagicMock() - # Configure mock to return different instances mock_agent_class.side_effect = [mock_profile_agent, mock_simulator_agent] simulator = ActorSimulator.from_case_for_user_simulator(case=sample_case) @@ -111,7 +108,6 @@ def test_generate_profile_from_case(mock_agent_class, sample_case): assert profile == mock_profile assert mock_agent.called - # Verify structured_output_model was passed call_args = mock_agent.call_args assert call_args[1]["structured_output_model"] == ActorProfile @@ -124,11 +120,11 @@ def test_act_generates_response(sample_actor_profile): system_prompt_template="Test: {actor_profile}", ) - # Mock the agent's response mock_response = MagicMock(spec=AgentResult) mock_actor_response = ActorResponse( reasoning="Test reasoning", message="Test response message", + stop=False, ) mock_response.structured_output = mock_actor_response simulator.agent = MagicMock(return_value=mock_response) @@ -140,8 +136,8 @@ def test_act_generates_response(sample_actor_profile): simulator.agent.assert_called_once() -def test_act_uses_structured_output(sample_actor_profile): - """Test act method requests structured output.""" +def test_act_uses_actor_response_by_default(sample_actor_profile): + """Test act method uses ActorResponse as default structured output model.""" simulator = ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", @@ -149,160 +145,111 @@ def test_act_uses_structured_output(sample_actor_profile): ) mock_response = MagicMock(spec=AgentResult) - mock_actor_response = ActorResponse(reasoning="Test", message="Test message") + mock_actor_response = ActorResponse(reasoning="Test", message="Test message", stop=False) mock_response.structured_output = mock_actor_response simulator.agent = MagicMock(return_value=mock_response) simulator.act("Test message") - # Verify structured_output_model parameter call_kwargs = simulator.agent.call_args[1] assert call_kwargs["structured_output_model"] == ActorResponse -def test_has_next_returns_true_initially(sample_actor_profile): - """Test has_next returns True before any turns.""" - simulator = ActorSimulator( - actor_profile=sample_actor_profile, - initial_query="Hello", - system_prompt_template="Test: {actor_profile}", - ) +def test_act_with_custom_structured_output_model(sample_actor_profile): + """Test act passes custom structured_output_model to the agent.""" - assert simulator.has_next() is True + class CustomOutput(ActorOutputBase): + answer: str + confidence: float + message: str | None = None - -def test_has_next_respects_max_turns(sample_actor_profile): - """Test has_next returns False after max_turns reached.""" simulator = ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", system_prompt_template="Test: {actor_profile}", - max_turns=3, ) - # Mock responses mock_response = MagicMock(spec=AgentResult) - mock_actor_response = ActorResponse(reasoning="Test", message="Continue") - mock_response.structured_output = mock_actor_response + mock_response.structured_output = CustomOutput( + reasoning="r", answer="test", confidence=0.9, stop=False, message="hi" + ) simulator.agent = MagicMock(return_value=mock_response) - # Simulate 3 turns with max_turns=3 - for _ in range(3): - assert simulator.has_next() is True - simulator.act("Test message") + result = simulator.act("Test message", structured_output_model=CustomOutput) - # After 3 turns, should return False - assert simulator.has_next() is False + call_kwargs = simulator.agent.call_args[1] + assert call_kwargs["structured_output_model"] == CustomOutput + assert result.structured_output.answer == "test" -def test_has_next_detects_stop_token(sample_actor_profile): - """Test has_next returns False when stop token is present.""" +def test_act_custom_model_not_subclass_raises(sample_actor_profile): + """Test act raises TypeError if custom model is not a subclass of ActorOutputBase.""" + + class BadModel(BaseModel): + answer: str + stop: bool = False + message: str | None = None + simulator = ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", system_prompt_template="Test: {actor_profile}", ) - # Mock response with stop token - mock_response = MagicMock(spec=AgentResult) - mock_actor_response = ActorResponse(reasoning="Done", message="Thanks! ") - mock_response.structured_output = mock_actor_response - simulator.agent = MagicMock(return_value=mock_response) - - # After act with stop token, has_next should return False - simulator.act("Test message") - assert simulator.has_next() is False - + with pytest.raises(TypeError, match="must be a subclass of ActorOutputBase"): + simulator.act("Test message", structured_output_model=BadModel) -# --------------------------------------------------------------------------- -# input_type + act_structured() -# --------------------------------------------------------------------------- +def test_act_custom_model_without_message_raises(sample_actor_profile): + """Test act raises ValueError if custom model has no message field.""" -class _AgentInput(BaseModel): - """Sample input schema for input_type tests.""" + class NoMessageModel(ActorOutputBase): + answer: str - query: str - urgency: str = "normal" - - -def test_init_without_input_type_uses_simulator_result(sample_actor_profile): - """Without input_type, act_structured() hands ActorStructuredResponse to the underlying agent.""" simulator = ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", system_prompt_template="Test: {actor_profile}", ) - mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = ActorStructuredResponse(reasoning="r", stop=False, message="hi") - simulator.agent = MagicMock(return_value=mock_response) - - simulator.act_structured("agent reply") + with pytest.raises(ValueError, match="must have a 'message' field"): + simulator.act("Test message", structured_output_model=NoMessageModel) - assert simulator.agent.call_args[1]["structured_output_model"] is ActorStructuredResponse - -def test_init_with_input_type_narrows_message_schema(sample_actor_profile): - """With input_type, act_structured() hands a ActorStructuredResponse subclass whose message is typed.""" +def test_has_next_returns_true_initially(sample_actor_profile): + """Test has_next returns True before any turns.""" simulator = ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", system_prompt_template="Test: {actor_profile}", - input_type=_AgentInput, ) - typed_message = _AgentInput(query="hi") - captured_model: list = [] - - def _capture(agent_message, *, structured_output_model): - captured_model.append(structured_output_model) - mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = structured_output_model(reasoning="r", stop=False, message=typed_message) - return mock_response - - simulator.agent = MagicMock(side_effect=_capture) - - simulator.act_structured("agent reply") - - (model_used,) = captured_model - # It must be a ActorStructuredResponse subclass (so act_structured can return ActorStructuredResponse). - assert issubclass(model_used, ActorStructuredResponse) - # And it must schema-accept input_type instances on `message`. - msg_field = model_used.model_fields["message"] - assert msg_field.annotation == _AgentInput | None + assert simulator.has_next() is True -def test_act_structured_passes_structured_model_to_agent(sample_actor_profile): - """act_structured() reuses the same structured model across turns (cached at construction).""" +def test_has_next_respects_max_turns(sample_actor_profile): + """Test has_next returns False after max_turns reached.""" simulator = ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", system_prompt_template="Test: {actor_profile}", - input_type=_AgentInput, + max_turns=3, ) - seen_models: list = [] - - def _capture(agent_message, *, structured_output_model): - seen_models.append(structured_output_model) - mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = structured_output_model( - reasoning="r", stop=False, message=_AgentInput(query="x") - ) - return mock_response - - simulator.agent = MagicMock(side_effect=_capture) + mock_response = MagicMock(spec=AgentResult) + mock_actor_response = ActorResponse(reasoning="Test", message="Continue", stop=False) + mock_response.structured_output = mock_actor_response + simulator.agent = MagicMock(return_value=mock_response) - simulator.act_structured("turn 1") - simulator.act_structured("turn 2") + for _ in range(3): + assert simulator.has_next() is True + simulator.act("Test message") - assert len(seen_models) == 2 - assert seen_models[0] is seen_models[1] # same cached class across turns + assert simulator.has_next() is False -def test_act_structured_continuing_turn(sample_actor_profile): - """act_structured() returns ActorStructuredResponse with stop=False for normal continuing turns.""" +def test_has_next_detects_stop(sample_actor_profile): + """Test has_next returns False when actor signals stop.""" simulator = ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", @@ -310,20 +257,16 @@ def test_act_structured_continuing_turn(sample_actor_profile): ) mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = ActorStructuredResponse(reasoning="thinking", stop=False, message="keep going") + mock_actor_response = ActorResponse(reasoning="Done", message=None, stop=True) + mock_response.structured_output = mock_actor_response simulator.agent = MagicMock(return_value=mock_response) - result = simulator.act_structured("agent reply") - - assert isinstance(result, ActorStructuredResponse) - assert result.message == "keep going" - assert result.reasoning == "thinking" - assert result.stop is False - assert result.stop_reason is None + simulator.act("Test message") + assert simulator.has_next() is False -def test_act_structured_explicit_stop(sample_actor_profile): - """act_structured() records stop_reason='goal_completed' when the LLM sets stop=True.""" +def test_act_sets_stop_reason_goal_completed(sample_actor_profile): + """Test act sets stop_reason to 'goal_completed' when actor signals stop.""" simulator = ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", @@ -331,17 +274,17 @@ def test_act_structured_explicit_stop(sample_actor_profile): ) mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = ActorStructuredResponse(reasoning="done", stop=True, message=None) + mock_actor_response = ActorResponse(reasoning="Done", message=None, stop=True) + mock_response.structured_output = mock_actor_response simulator.agent = MagicMock(return_value=mock_response) - result = simulator.act_structured("agent reply") + result = simulator.act("Test message") - assert result.stop is True - assert result.stop_reason == "goal_completed" + assert result.structured_output.stop_reason == "goal_completed" -def test_act_structured_hits_max_turns(sample_actor_profile): - """act_structured() reports max_turns when the cap trips while LLM said stop=False.""" +def test_act_sets_stop_reason_max_turns(sample_actor_profile): + """Test act sets stop_reason to 'max_turns' when turn cap is reached.""" simulator = ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", @@ -350,84 +293,79 @@ def test_act_structured_hits_max_turns(sample_actor_profile): ) mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = ActorStructuredResponse(reasoning="r", stop=False, message="more please") + mock_actor_response = ActorResponse(reasoning="r", message="more please", stop=False) + mock_response.structured_output = mock_actor_response simulator.agent = MagicMock(return_value=mock_response) - result = simulator.act_structured("agent reply") + result = simulator.act("agent reply") - assert result.stop is True - assert result.stop_reason == "max_turns" + assert result.structured_output.stop is True + assert result.structured_output.stop_reason == "max_turns" -def test_act_structured_input_type_returns_typed_message(sample_actor_profile): - """act_structured() with input_type returns the typed message instance.""" +def test_act_continuing_turn_no_stop_reason(sample_actor_profile): + """Test act leaves stop_reason as None for normal continuing turns.""" simulator = ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", system_prompt_template="Test: {actor_profile}", - input_type=_AgentInput, ) - typed_message = _AgentInput(query="ship it", urgency="high") mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = simulator._structured_model(reasoning="r", stop=False, message=typed_message) + mock_actor_response = ActorResponse(reasoning="thinking", message="keep going", stop=False) + mock_response.structured_output = mock_actor_response simulator.agent = MagicMock(return_value=mock_response) - result = simulator.act_structured("agent reply") + result = simulator.act("agent reply") + + assert result.structured_output.stop is False + assert result.structured_output.stop_reason is None + assert result.structured_output.message == "keep going" - assert result.stop is False - assert result.stop_reason is None - assert isinstance(result.message, _AgentInput) - assert result.message.query == "ship it" +def test_act_custom_model_manages_stop(sample_actor_profile): + """When structured_output_model is provided, act() still manages stop via the stop field.""" + + class CustomOutput(ActorOutputBase): + message: str | None = None -def test_act_structured_input_type_null_message_becomes_implicit_stop(sample_actor_profile): - """act_structured() with input_type treats null message + stop=False as implicit goal_completed.""" simulator = ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", system_prompt_template="Test: {actor_profile}", - input_type=_AgentInput, ) mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = simulator._structured_model(reasoning="r", stop=False, message=None) + mock_response.structured_output = CustomOutput(reasoning="r", message="done", stop=True) simulator.agent = MagicMock(return_value=mock_response) - result = simulator.act_structured("agent reply") + simulator.act("agent reply", structured_output_model=CustomOutput) + + assert simulator.stop is True + assert simulator.has_next() is False + - assert result.stop is True - assert result.stop_reason == "goal_completed" - assert result.message is None +def test_act_custom_model_max_turns(sample_actor_profile): + """Custom model path still enforces max_turns.""" + class CustomOutput(ActorOutputBase): + message: str | None = None -def test_act_does_not_use_input_type(sample_actor_profile): - """act() ignores input_type and uses the legacy ActorResponse schema.""" simulator = ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", system_prompt_template="Test: {actor_profile}", - input_type=_AgentInput, + max_turns=1, ) mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = ActorResponse(reasoning="r", message="m") + mock_response.structured_output = CustomOutput(reasoning="r", message="hi", stop=False) simulator.agent = MagicMock(return_value=mock_response) - simulator.act("Test message") - - assert simulator.agent.call_args[1]["structured_output_model"] == ActorResponse + simulator.act("agent reply", structured_output_model=CustomOutput) - -def test_rejects_simulator_result_as_input_type(sample_actor_profile): - """Passing ActorStructuredResponse (or a subclass) as input_type raises at __init__.""" - with pytest.raises(ValueError, match="ActorStructuredResponse"): - ActorSimulator( - actor_profile=sample_actor_profile, - initial_query="Hello", - system_prompt_template="Test: {actor_profile}", - input_type=ActorStructuredResponse, - ) + assert simulator.stop is True + assert simulator.has_next() is False def test_system_prompt_template_none_uses_default(sample_actor_profile): @@ -458,30 +396,23 @@ def test_system_prompt_template_prerendered_passes_through(sample_actor_profile) assert simulator.agent.system_prompt == prerendered -def test_system_prompt_template_autopicks_structured_when_input_type_set(sample_actor_profile): - """When input_type is set and no template is given, the structured-stop template is used.""" - from strands_evals.simulation.prompt_templates.actor_system_prompt import ( - STRUCTURED_USER_SIMULATOR_PROMPT_TEMPLATE, - ) - +def test_system_prompt_contains_stop_instruction(sample_actor_profile): + """Default prompt instructs the actor to set stop=true.""" simulator = ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", - input_type=_AgentInput, ) - expected = STRUCTURED_USER_SIMULATOR_PROMPT_TEMPLATE.format(actor_profile=sample_actor_profile.model_dump()) - assert simulator.agent.system_prompt == expected + assert "stop=true" in simulator.agent.system_prompt -def test_explicit_template_overrides_autopick(sample_actor_profile): - """Explicit system_prompt_template wins even when input_type is set.""" +def test_explicit_template_overrides_default(sample_actor_profile): + """Explicit system_prompt_template is used instead of the default.""" custom = "Custom prompt for {actor_profile}" simulator = ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", system_prompt_template=custom, - input_type=_AgentInput, ) assert simulator.agent.system_prompt == custom.format(actor_profile=sample_actor_profile.model_dump()) From 47b9700d392bc07749b36ebf2d5f282adc73ee69 Mon Sep 17 00:00:00 2001 From: poshinchen Date: Thu, 7 May 2026 15:13:29 -0400 Subject: [PATCH 4/6] feat(simulator): add structured_output_model to ActorSimulator.__init__ Allow users to set the structured output model once at construction time instead of passing it on every act() call. The init-level model is used as the default for act() and can still be overridden per-call. Validates at init time: must subclass ActorOutputBase and have a 'message' field. Co-Authored-By: Claude Opus 4.6 --- .../simulation/actor_simulator.py | 33 +++++++- .../simulation/test_actor_simulator.py | 82 +++++++++++++++++++ 2 files changed, 111 insertions(+), 4 deletions(-) diff --git a/src/strands_evals/simulation/actor_simulator.py b/src/strands_evals/simulation/actor_simulator.py index 6c2955fb..4c4eed1b 100644 --- a/src/strands_evals/simulation/actor_simulator.py +++ b/src/strands_evals/simulation/actor_simulator.py @@ -126,6 +126,8 @@ def __init__( tools: list | None = None, model: str | None = None, max_turns: int = 10, + *, + structured_output_model: type[ActorOutputBase] | None = None, ): """Initialize an ActorSimulator with profile and goal. @@ -152,11 +154,19 @@ def __init__( tools: Additional tools available to the actor. Defaults to goal completion tool only. model: Model identifier for the underlying agent. Uses Strands default if None. max_turns: Maximum number of conversation turns before stopping (default: 10). + structured_output_model: Optional Pydantic model to use for all `act()` calls. + Must subclass `ActorOutputBase` and include a `message` field. + When set, `act()` uses this model by default instead of `ActorResponse`. + Can still be overridden per-call via `act(structured_output_model=...)`. Example: ```python from strands_evals.simulation import ActorSimulator - from strands_evals.types.simulation import ActorProfile + from strands_evals.types.simulation import ActorOutputBase, ActorProfile + + class AgentInput(ActorOutputBase): + message: str | None = None + urgency: str = "normal" profile = ActorProfile( traits={"expertise_level": "expert", "communication_style": "technical"}, @@ -167,9 +177,13 @@ def __init__( simulator = ActorSimulator( actor_profile=profile, initial_query="Our service is experiencing high memory usage.", - system_prompt_template="You are simulating: {actor_profile}", - max_turns=15 + structured_output_model=AgentInput, + max_turns=15, ) + + # act() uses AgentInput automatically + result = simulator.act(str(agent_response)) + result.structured_output # AgentInput instance ``` """ self.actor_profile = actor_profile @@ -179,6 +193,17 @@ def __init__( self.stop = False self._turn_count = 0 self._max_turns = max_turns + self._structured_output_model = structured_output_model + + if structured_output_model is not None: + if not issubclass(structured_output_model, ActorOutputBase): + raise TypeError( + f"structured_output_model must be a subclass of ActorOutputBase, got {structured_output_model.__name__}." + ) + if "message" not in structured_output_model.model_fields: + raise ValueError( + f"structured_output_model {structured_output_model.__name__} must have a 'message' field." + ) if system_prompt_template is None: system_prompt_template = DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE @@ -250,7 +275,7 @@ def act( my_output = result.structured_output # MySchema instance ``` """ - model = structured_output_model or ActorResponse + model = structured_output_model or self._structured_output_model or ActorResponse if not issubclass(model, ActorOutputBase): raise TypeError(f"structured_output_model must be a subclass of ActorOutputBase, got {model.__name__}.") diff --git a/tests/strands_evals/simulation/test_actor_simulator.py b/tests/strands_evals/simulation/test_actor_simulator.py index c1cf3299..03b28af4 100644 --- a/tests/strands_evals/simulation/test_actor_simulator.py +++ b/tests/strands_evals/simulation/test_actor_simulator.py @@ -416,3 +416,85 @@ def test_explicit_template_overrides_default(sample_actor_profile): ) assert simulator.agent.system_prompt == custom.format(actor_profile=sample_actor_profile.model_dump()) + + +def test_init_structured_output_model_used_by_act(sample_actor_profile): + """structured_output_model set at init is used as default for act().""" + + class CustomOutput(ActorOutputBase): + message: str | None = None + extra: str = "default" + + simulator = ActorSimulator( + actor_profile=sample_actor_profile, + initial_query="Hello", + system_prompt_template="Test: {actor_profile}", + structured_output_model=CustomOutput, + ) + + mock_response = MagicMock(spec=AgentResult) + mock_response.structured_output = CustomOutput(reasoning="r", message="hi", stop=False) + simulator.agent = MagicMock(return_value=mock_response) + + simulator.act("agent reply") + + call_kwargs = simulator.agent.call_args[1] + assert call_kwargs["structured_output_model"] == CustomOutput + + +def test_init_structured_output_model_overridden_per_call(sample_actor_profile): + """Per-call structured_output_model overrides the init-level default.""" + + class InitModel(ActorOutputBase): + message: str | None = None + + class CallModel(ActorOutputBase): + message: str | None = None + priority: int = 0 + + simulator = ActorSimulator( + actor_profile=sample_actor_profile, + initial_query="Hello", + system_prompt_template="Test: {actor_profile}", + structured_output_model=InitModel, + ) + + mock_response = MagicMock(spec=AgentResult) + mock_response.structured_output = CallModel(reasoning="r", message="hi", stop=False) + simulator.agent = MagicMock(return_value=mock_response) + + simulator.act("agent reply", structured_output_model=CallModel) + + call_kwargs = simulator.agent.call_args[1] + assert call_kwargs["structured_output_model"] == CallModel + + +def test_init_structured_output_model_validates_subclass(sample_actor_profile): + """Init raises TypeError if structured_output_model is not a subclass of ActorOutputBase.""" + + class BadModel(BaseModel): + message: str | None = None + stop: bool = False + + with pytest.raises(TypeError, match="must be a subclass of ActorOutputBase"): + ActorSimulator( + actor_profile=sample_actor_profile, + initial_query="Hello", + system_prompt_template="Test: {actor_profile}", + structured_output_model=BadModel, + ) + + +def test_init_structured_output_model_validates_message_field(sample_actor_profile): + """Init raises ValueError if structured_output_model has no message field.""" + + class NoMessageModel(ActorOutputBase): + answer: str = "" + + with pytest.raises(ValueError, match="must have a 'message' field"): + ActorSimulator( + actor_profile=sample_actor_profile, + initial_query="Hello", + system_prompt_template="Test: {actor_profile}", + structured_output_model=NoMessageModel, + ) From dc690ab30370a6d41282e75da53d5043294fdcb7 Mon Sep 17 00:00:00 2001 From: poshinchen Date: Thu, 7 May 2026 15:20:09 -0400 Subject: [PATCH 5/6] refactor(simulator): extract _validate_output_model as static method Co-Authored-By: Claude Opus 4.6 --- .../simulation/actor_simulator.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/strands_evals/simulation/actor_simulator.py b/src/strands_evals/simulation/actor_simulator.py index 4c4eed1b..b1a48dcb 100644 --- a/src/strands_evals/simulation/actor_simulator.py +++ b/src/strands_evals/simulation/actor_simulator.py @@ -196,14 +196,7 @@ class AgentInput(ActorOutputBase): self._structured_output_model = structured_output_model if structured_output_model is not None: - if not issubclass(structured_output_model, ActorOutputBase): - raise TypeError( - f"structured_output_model must be a subclass of ActorOutputBase, got {structured_output_model.__name__}." - ) - if "message" not in structured_output_model.model_fields: - raise ValueError( - f"structured_output_model {structured_output_model.__name__} must have a 'message' field." - ) + self._validate_output_model(structured_output_model) if system_prompt_template is None: system_prompt_template = DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE @@ -236,6 +229,18 @@ def _initialize_conversation(self): initial_query_message = {"role": "assistant", "content": [{"text": self.initial_query.strip()}]} self.conversation_history.append(initial_query_message) + @staticmethod + def _validate_output_model(model: type[ActorOutputBase]) -> None: + """Validate that a structured output model is compatible with the simulator.""" + if not issubclass(model, ActorOutputBase): + raise TypeError( + f"structured_output_model must be a subclass of ActorOutputBase, got {model.__name__}." + ) + if "message" not in model.model_fields: + raise ValueError( + f"structured_output_model {model.__name__} must have a 'message' field." + ) + def act( self, agent_message: str, @@ -276,12 +281,7 @@ def act( ``` """ model = structured_output_model or self._structured_output_model or ActorResponse - - if not issubclass(model, ActorOutputBase): - raise TypeError(f"structured_output_model must be a subclass of ActorOutputBase, got {model.__name__}.") - - if "message" not in model.model_fields: - raise ValueError(f"structured_output_model {model.__name__} must have a 'message' field.") + self._validate_output_model(model) response = self.agent(agent_message.strip(), structured_output_model=model) self._turn_count += 1 From 3c477f74071c2f571533fcfb4077c8fe7d6b4275 Mon Sep 17 00:00:00 2001 From: poshinchen Date: Thu, 7 May 2026 15:22:30 -0400 Subject: [PATCH 6/6] refactor(simulator): default _structured_output_model to ActorResponse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplifies the fallback chain in act() — no need for a triple-or since the instance attribute always holds a valid model. Co-Authored-By: Claude Opus 4.6 --- src/strands_evals/simulation/__init__.py | 3 - .../simulation/actor_simulator.py | 51 ++++++++------- src/strands_evals/types/__init__.py | 1 - .../types/simulation/__init__.py | 3 +- src/strands_evals/types/simulation/actor.py | 32 +++------- .../simulation/test_actor_simulator.py | 63 ++++++++++--------- 6 files changed, 67 insertions(+), 86 deletions(-) diff --git a/src/strands_evals/simulation/__init__.py b/src/strands_evals/simulation/__init__.py index 84a8ae68..b41e3140 100644 --- a/src/strands_evals/simulation/__init__.py +++ b/src/strands_evals/simulation/__init__.py @@ -1,5 +1,3 @@ -from strands_evals.types.simulation import ActorOutputBase - from .actor_simulator import ActorSimulator from .prompt_templates.actor_system_prompt import DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE from .tool_simulator import ToolSimulator @@ -8,7 +6,6 @@ UserSimulator = ActorSimulator __all__ = [ - "ActorOutputBase", "ActorSimulator", "UserSimulator", "ToolSimulator", diff --git a/src/strands_evals/simulation/actor_simulator.py b/src/strands_evals/simulation/actor_simulator.py index b1a48dcb..eb686b19 100644 --- a/src/strands_evals/simulation/actor_simulator.py +++ b/src/strands_evals/simulation/actor_simulator.py @@ -1,6 +1,8 @@ import logging import random +from typing import Any, cast +from pydantic import BaseModel from strands import Agent from strands.agent.agent_result import AgentResult from strands.types.content import Message @@ -10,7 +12,7 @@ from strands_evals.simulation.prompt_templates.actor_profile_extraction import ACTOR_PROFILE_PROMPT_TEMPLATE from strands_evals.simulation.prompt_templates.actor_system_prompt import DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE from strands_evals.simulation.tools.goal_completion import get_conversation_goal_completion -from strands_evals.types.simulation import ActorOutputBase, ActorProfile, ActorResponse +from strands_evals.types.simulation import ActorProfile, ActorResponse logger = logging.getLogger(__name__) @@ -127,7 +129,7 @@ def __init__( model: str | None = None, max_turns: int = 10, *, - structured_output_model: type[ActorOutputBase] | None = None, + structured_output_model: type[BaseModel] | None = None, ): """Initialize an ActorSimulator with profile and goal. @@ -155,16 +157,19 @@ def __init__( model: Model identifier for the underlying agent. Uses Strands default if None. max_turns: Maximum number of conversation turns before stopping (default: 10). structured_output_model: Optional Pydantic model to use for all `act()` calls. - Must subclass `ActorOutputBase` and include a `message` field. + Must have `message` and `stop` fields. When set, `act()` uses this model by default instead of `ActorResponse`. Can still be overridden per-call via `act(structured_output_model=...)`. Example: ```python from strands_evals.simulation import ActorSimulator - from strands_evals.types.simulation import ActorOutputBase, ActorProfile + from pydantic import BaseModel + from strands_evals.types.simulation import ActorProfile - class AgentInput(ActorOutputBase): + class SimulatorResult(BaseModel): + reasoning: str = "" + stop: bool = False message: str | None = None urgency: str = "normal" @@ -177,13 +182,13 @@ class AgentInput(ActorOutputBase): simulator = ActorSimulator( actor_profile=profile, initial_query="Our service is experiencing high memory usage.", - structured_output_model=AgentInput, + structured_output_model=SimulatorResult, max_turns=15, ) - # act() uses AgentInput automatically + # act() uses SimulatorResult automatically result = simulator.act(str(agent_response)) - result.structured_output # AgentInput instance + result.structured_output # SimulatorResult instance ``` """ self.actor_profile = actor_profile @@ -193,7 +198,7 @@ class AgentInput(ActorOutputBase): self.stop = False self._turn_count = 0 self._max_turns = max_turns - self._structured_output_model = structured_output_model + self._structured_output_model = structured_output_model or ActorResponse if structured_output_model is not None: self._validate_output_model(structured_output_model) @@ -230,22 +235,18 @@ def _initialize_conversation(self): self.conversation_history.append(initial_query_message) @staticmethod - def _validate_output_model(model: type[ActorOutputBase]) -> None: - """Validate that a structured output model is compatible with the simulator.""" - if not issubclass(model, ActorOutputBase): - raise TypeError( - f"structured_output_model must be a subclass of ActorOutputBase, got {model.__name__}." - ) + def _validate_output_model(model: type) -> None: + """Validate that a structured output model has the required fields for the simulator.""" if "message" not in model.model_fields: - raise ValueError( - f"structured_output_model {model.__name__} must have a 'message' field." - ) + raise ValueError(f"structured_output_model {model.__name__} must have a 'message' field.") + if "stop" not in model.model_fields: + raise ValueError(f"structured_output_model {model.__name__} must have a 'stop' field.") def act( self, agent_message: str, *, - structured_output_model: type[ActorOutputBase] | None = None, + structured_output_model: type[BaseModel] | None = None, ) -> AgentResult: """Generate the next actor message in the conversation. @@ -254,15 +255,13 @@ def act( `AgentResult` whose `structured_output` is an `ActorResponse` (or the caller-provided `structured_output_model`). - The provided model must subclass `ActorOutputBase` and have a `message` - field. A `TypeError` is raised if not a subclass, and `ValueError` if - `message` is missing. + The provided model must have `message` and `stop` fields. + A `ValueError` is raised if either is missing. Args: agent_message: The agent's response to react to. structured_output_model: Optional Pydantic model to use instead of - `ActorResponse`. Must subclass `ActorOutputBase` and include a - `message` field. + `ActorResponse`. Must have `message` and `stop` fields. Returns: AgentResult with `structured_output` set to either `ActorResponse` @@ -280,13 +279,13 @@ def act( my_output = result.structured_output # MySchema instance ``` """ - model = structured_output_model or self._structured_output_model or ActorResponse + model = structured_output_model or self._structured_output_model self._validate_output_model(model) response = self.agent(agent_message.strip(), structured_output_model=model) self._turn_count += 1 - result = response.structured_output + result = cast(Any, response.structured_output) if result.stop: self.stop = True diff --git a/src/strands_evals/types/__init__.py b/src/strands_evals/types/__init__.py index b9c29068..8d38173a 100644 --- a/src/strands_evals/types/__init__.py +++ b/src/strands_evals/types/__init__.py @@ -17,7 +17,6 @@ "TaskOutput", "EvaluationData", "EvaluationOutput", - "ActorOutputBase", "ActorProfile", "ActorResponse", "InputT", diff --git a/src/strands_evals/types/simulation/__init__.py b/src/strands_evals/types/simulation/__init__.py index 8a815e70..d53fe2ba 100644 --- a/src/strands_evals/types/simulation/__init__.py +++ b/src/strands_evals/types/simulation/__init__.py @@ -1,9 +1,8 @@ """Data models for actor simulation.""" -from .actor import ActorOutputBase, ActorProfile, ActorResponse +from .actor import ActorProfile, ActorResponse __all__ = [ - "ActorOutputBase", "ActorProfile", "ActorResponse", ] diff --git a/src/strands_evals/types/simulation/actor.py b/src/strands_evals/types/simulation/actor.py index 33e19fc7..a1a3cc19 100644 --- a/src/strands_evals/types/simulation/actor.py +++ b/src/strands_evals/types/simulation/actor.py @@ -21,30 +21,7 @@ class ActorProfile(BaseModel): ) -class ActorOutputBase(BaseModel): - """Base class for actor simulator structured output models. - - Any model passed as `structured_output_model` to `ActorSimulator.act()` - must subclass this. The simulator reads `stop` and `reasoning` from the - result to manage conversation state. - - Subclasses must also define a `message` field (of any type) — this is - validated at runtime by the simulator rather than enforced here, so - subclasses are free to type `message` however they need. - - Attributes: - reasoning: Internal reasoning process for the response. - stop: `True` when the actor signals the conversation should end. - """ - - reasoning: str = Field(..., description="Reasoning for the actor's response") - stop: bool = Field( - False, - description="Set to true when the conversation goal is met or the conversation should end.", - ) - - -class ActorResponse(ActorOutputBase): +class ActorResponse(BaseModel): """Default structured response from the actor simulator. Used as the LLM structured-output schema for `ActorSimulator.act` when no @@ -52,6 +29,8 @@ class ActorResponse(ActorOutputBase): `stop`, and `message`. The simulator fills `stop_reason` after the LLM call. Attributes: + reasoning: Internal reasoning process for the response. + stop: `True` when the actor signals the conversation should end. message: The actual message content from the actor. `None` when `stop=True`. stop_reason: Why the conversation ended. One of `"goal_completed"`, `"max_turns"`, or `None` while ongoing. Populated by the simulator @@ -60,6 +39,11 @@ class ActorResponse(ActorOutputBase): model_config = ConfigDict(arbitrary_types_allowed=True) + reasoning: str = Field(..., description="Reasoning for the actor's response") + stop: bool = Field( + False, + description="Set to true when the conversation goal is met or the conversation should end.", + ) message: str | None = Field( None, description="The actor's next message to the agent. Provide when stop=false; set to null when stop=true.", diff --git a/tests/strands_evals/simulation/test_actor_simulator.py b/tests/strands_evals/simulation/test_actor_simulator.py index 03b28af4..59135088 100644 --- a/tests/strands_evals/simulation/test_actor_simulator.py +++ b/tests/strands_evals/simulation/test_actor_simulator.py @@ -8,7 +8,7 @@ from strands_evals import Case from strands_evals.simulation import ActorSimulator -from strands_evals.types.simulation import ActorOutputBase, ActorProfile, ActorResponse +from strands_evals.types.simulation import ActorProfile, ActorResponse @pytest.fixture @@ -158,9 +158,10 @@ def test_act_uses_actor_response_by_default(sample_actor_profile): def test_act_with_custom_structured_output_model(sample_actor_profile): """Test act passes custom structured_output_model to the agent.""" - class CustomOutput(ActorOutputBase): + class CustomOutput(BaseModel): answer: str confidence: float + stop: bool = False message: str | None = None simulator = ActorSimulator( @@ -170,9 +171,7 @@ class CustomOutput(ActorOutputBase): ) mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = CustomOutput( - reasoning="r", answer="test", confidence=0.9, stop=False, message="hi" - ) + mock_response.structured_output = CustomOutput(answer="test", confidence=0.9, stop=False, message="hi") simulator.agent = MagicMock(return_value=mock_response) result = simulator.act("Test message", structured_output_model=CustomOutput) @@ -182,12 +181,10 @@ class CustomOutput(ActorOutputBase): assert result.structured_output.answer == "test" -def test_act_custom_model_not_subclass_raises(sample_actor_profile): - """Test act raises TypeError if custom model is not a subclass of ActorOutputBase.""" +def test_act_custom_model_without_stop_raises(sample_actor_profile): + """Test act raises ValueError if custom model has no stop field.""" - class BadModel(BaseModel): - answer: str - stop: bool = False + class NoStopModel(BaseModel): message: str | None = None simulator = ActorSimulator( @@ -196,15 +193,16 @@ class BadModel(BaseModel): system_prompt_template="Test: {actor_profile}", ) - with pytest.raises(TypeError, match="must be a subclass of ActorOutputBase"): - simulator.act("Test message", structured_output_model=BadModel) + with pytest.raises(ValueError, match="must have a 'stop' field"): + simulator.act("Test message", structured_output_model=NoStopModel) def test_act_custom_model_without_message_raises(sample_actor_profile): """Test act raises ValueError if custom model has no message field.""" - class NoMessageModel(ActorOutputBase): - answer: str + class NoMessageModel(BaseModel): + answer: str = "" + stop: bool = False simulator = ActorSimulator( actor_profile=sample_actor_profile, @@ -326,7 +324,8 @@ def test_act_continuing_turn_no_stop_reason(sample_actor_profile): def test_act_custom_model_manages_stop(sample_actor_profile): """When structured_output_model is provided, act() still manages stop via the stop field.""" - class CustomOutput(ActorOutputBase): + class CustomOutput(BaseModel): + stop: bool = False message: str | None = None simulator = ActorSimulator( @@ -336,7 +335,7 @@ class CustomOutput(ActorOutputBase): ) mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = CustomOutput(reasoning="r", message="done", stop=True) + mock_response.structured_output = CustomOutput(message="done", stop=True) simulator.agent = MagicMock(return_value=mock_response) simulator.act("agent reply", structured_output_model=CustomOutput) @@ -348,7 +347,8 @@ class CustomOutput(ActorOutputBase): def test_act_custom_model_max_turns(sample_actor_profile): """Custom model path still enforces max_turns.""" - class CustomOutput(ActorOutputBase): + class CustomOutput(BaseModel): + stop: bool = False message: str | None = None simulator = ActorSimulator( @@ -359,7 +359,7 @@ class CustomOutput(ActorOutputBase): ) mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = CustomOutput(reasoning="r", message="hi", stop=False) + mock_response.structured_output = CustomOutput(message="hi", stop=False) simulator.agent = MagicMock(return_value=mock_response) simulator.act("agent reply", structured_output_model=CustomOutput) @@ -421,7 +421,8 @@ def test_explicit_template_overrides_default(sample_actor_profile): def test_init_structured_output_model_used_by_act(sample_actor_profile): """structured_output_model set at init is used as default for act().""" - class CustomOutput(ActorOutputBase): + class CustomOutput(BaseModel): + stop: bool = False message: str | None = None extra: str = "default" @@ -433,7 +434,7 @@ class CustomOutput(ActorOutputBase): ) mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = CustomOutput(reasoning="r", message="hi", stop=False) + mock_response.structured_output = CustomOutput(message="hi", stop=False) simulator.agent = MagicMock(return_value=mock_response) simulator.act("agent reply") @@ -445,10 +446,12 @@ class CustomOutput(ActorOutputBase): def test_init_structured_output_model_overridden_per_call(sample_actor_profile): """Per-call structured_output_model overrides the init-level default.""" - class InitModel(ActorOutputBase): + class InitModel(BaseModel): + stop: bool = False message: str | None = None - class CallModel(ActorOutputBase): + class CallModel(BaseModel): + stop: bool = False message: str | None = None priority: int = 0 @@ -460,7 +463,7 @@ class CallModel(ActorOutputBase): ) mock_response = MagicMock(spec=AgentResult) - mock_response.structured_output = CallModel(reasoning="r", message="hi", stop=False) + mock_response.structured_output = CallModel(message="hi", stop=False) simulator.agent = MagicMock(return_value=mock_response) simulator.act("agent reply", structured_output_model=CallModel) @@ -469,27 +472,27 @@ class CallModel(ActorOutputBase): assert call_kwargs["structured_output_model"] == CallModel -def test_init_structured_output_model_validates_subclass(sample_actor_profile): - """Init raises TypeError if structured_output_model is not a subclass of ActorOutputBase.""" +def test_init_structured_output_model_validates_stop_field(sample_actor_profile): + """Init raises ValueError if structured_output_model has no stop field.""" - class BadModel(BaseModel): + class NoStopModel(BaseModel): message: str | None = None - stop: bool = False - with pytest.raises(TypeError, match="must be a subclass of ActorOutputBase"): + with pytest.raises(ValueError, match="must have a 'stop' field"): ActorSimulator( actor_profile=sample_actor_profile, initial_query="Hello", system_prompt_template="Test: {actor_profile}", - structured_output_model=BadModel, + structured_output_model=NoStopModel, ) def test_init_structured_output_model_validates_message_field(sample_actor_profile): """Init raises ValueError if structured_output_model has no message field.""" - class NoMessageModel(ActorOutputBase): + class NoMessageModel(BaseModel): answer: str = "" + stop: bool = False with pytest.raises(ValueError, match="must have a 'message' field"): ActorSimulator(