Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/strands_evals/simulation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .actor_simulator import ActorSimulator
from .prompt_templates.actor_system_prompt import DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE
from .tool_simulator import ToolSimulator

# Alias for backward compatibility
Expand All @@ -8,4 +9,5 @@
"ActorSimulator",
"UserSimulator",
"ToolSimulator",
"DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE",
]
192 changes: 110 additions & 82 deletions src/strands_evals/simulation/actor_simulator.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import logging
import random
from typing import Any, cast

from pydantic import BaseModel
from strands import Agent
from strands.agent.agent_result import AgentResult
from strands.types.content import Message
from typing_extensions import cast

from strands_evals.case import Case
from strands_evals.simulation.profiles.actor_profile import DEFAULT_USER_PROFILE_SCHEMA
Expand All @@ -17,8 +18,7 @@


class ActorSimulator:
"""
Simulates an actor in multi-turn conversations for agent evaluation.
"""Simulates an actor in multi-turn conversations for agent evaluation.

ActorSimulator wraps a Strands Agent configured to behave as a specific actor
(typically a user) in conversation scenarios. It maintains conversation history,
Expand Down Expand Up @@ -49,8 +49,7 @@ def from_case_for_user_simulator(
model: str | None = None,
max_turns: int = 10,
) -> "ActorSimulator":
"""
Create an ActorSimulator configured as a user simulator from a test case.
"""Create an ActorSimulator configured as a user simulator from a test case.

Generates a realistic user profile and goal from case.input and optionally
case.metadata["task_description"], then configures the simulator with
Expand All @@ -71,22 +70,14 @@ def from_case_for_user_simulator(
from strands_evals import Case, ActorSimulator
from strands import Agent

# Create test case
case = Case(
input="I need to book a flight to Paris",
metadata={"task_description": "Flight booking confirmed"}
)

# Create user simulator
user_sim = ActorSimulator.from_case_for_user_simulator(
case=case,
max_turns=5
)

# Create target agent to evaluate
user_sim = ActorSimulator.from_case_for_user_simulator(case=case, max_turns=5)
agent = Agent(system_prompt="You are a travel assistant.")

# Run conversation
user_message = case.input
while user_sim.has_next():
agent_response = agent(user_message)
Expand All @@ -96,9 +87,6 @@ def from_case_for_user_simulator(
"""
actor_profile = cls._generate_profile_from_case(case)

if system_prompt_template is None:
system_prompt_template = DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE

return cls(
actor_profile=actor_profile,
initial_query=case.input,
Expand All @@ -110,10 +98,8 @@ def from_case_for_user_simulator(

@staticmethod
def _generate_profile_from_case(case: Case) -> ActorProfile:
"""
Generate user profile from case.
"""Generate user profile from case.

Private helper for from_case_for_user_simulator factory method.
Uses case.input and optionally case.metadata["task_description"] if present.

Args:
Expand All @@ -138,67 +124,99 @@ def __init__(
self,
actor_profile: ActorProfile,
initial_query: str,
Comment thread
poshinchen marked this conversation as resolved.
system_prompt_template: str,
system_prompt_template: str | None = None,
tools: list | None = None,
model: str | None = None,
max_turns: int = 10,
*,
structured_output_model: type[BaseModel] | None = None,
):
"""
Initialize an ActorSimulator with profile and goal.
"""Initialize an ActorSimulator with profile and goal.

Use this constructor when you have a pre-defined ActorProfile. For automatic
profile generation from test cases, use from_case_for_user_simulator() instead.

Args:
actor_profile: ActorProfile object containing traits, context, and actor_goal.
initial_query: The actor's first query or message.
system_prompt_template: Template string for system prompt. Must include {actor_profile} placeholder.
system_prompt_template: System prompt for the actor. Accepts two shapes:

- A template containing the `{actor_profile}` placeholder, which
is rendered via `str.format(actor_profile=...)` against the
actor's profile.
- An already-rendered system prompt string with no
`{actor_profile}` placeholder, which is used verbatim.

When `None` (the default), uses
`DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE` which instructs the LLM
to set `stop=true` on the structured response when the
conversation goal is met.

Pass an explicit template to override.
tools: Additional tools available to the actor. Defaults to goal completion tool only.
model: Model identifier for the underlying agent. Uses Strands default if None.
max_turns: Maximum number of conversation turns before stopping (default: 10).
structured_output_model: Optional Pydantic model to use for all `act()` calls.
Must have `message` and `stop` fields.
When set, `act()` uses this model by default instead of `ActorResponse`.
Can still be overridden per-call via `act(structured_output_model=...)`.

Example:
```python
from strands_evals.simulation import ActorSimulator
from pydantic import BaseModel
from strands_evals.types.simulation import ActorProfile

# Define custom actor profile
class SimulatorResult(BaseModel):
reasoning: str = ""
stop: bool = False
message: str | None = None
urgency: str = "normal"

profile = ActorProfile(
traits={
"expertise_level": "expert",
"communication_style": "technical"
},
traits={"expertise_level": "expert", "communication_style": "technical"},
context="A software engineer debugging a production issue.",
actor_goal="Identify and resolve the memory leak."
)

# Create simulator with custom profile
simulator = ActorSimulator(
actor_profile=profile,
initial_query="Our service is experiencing high memory usage.",
system_prompt_template="You are simulating: {actor_profile}",
max_turns=15
structured_output_model=SimulatorResult,
max_turns=15,
)

# act() uses SimulatorResult automatically
result = simulator.act(str(agent_response))
result.structured_output # SimulatorResult instance
```
"""
self.actor_profile = actor_profile
self.initial_query = initial_query
self.conversation_history: list[Message] = []
self.model_id = model
self.stop = False
self._turn_count = 0
self._last_message = ""
self._max_turns = max_turns
self._structured_output_model = structured_output_model or ActorResponse

if structured_output_model is not None:
self._validate_output_model(structured_output_model)

if system_prompt_template is None:
system_prompt_template = DEFAULT_USER_SIMULATOR_PROMPT_TEMPLATE

system_prompt = system_prompt_template.format(actor_profile=actor_profile.model_dump())
if "{actor_profile}" in system_prompt_template:
system_prompt = system_prompt_template.format(actor_profile=actor_profile.model_dump())
else:
system_prompt = system_prompt_template

# Combine tools
all_tools = [get_conversation_goal_completion]
if tools:
all_tools.extend(tools)

self._initialize_conversation()

# Create agent
self.agent = Agent(
system_prompt=system_prompt,
messages=self.conversation_history,
Expand All @@ -208,85 +226,95 @@ def __init__(
)

def _initialize_conversation(self):
"""
Initialize the conversation history with a greeting and initial query.

Sets up the conversation with a random greeting from the assistant followed
by the actor's initial query. This establishes the conversation context.

Note: This is a private method called during initialization.
"""
"""Initialize the conversation history with a greeting and initial query."""
selected_greeting = random.choice(self.INITIAL_GREETINGS)
greeting_message = {"role": "user", "content": [{"text": selected_greeting}]}
self.conversation_history.append(greeting_message)

initial_query_message = {"role": "assistant", "content": [{"text": self.initial_query.strip()}]}
self.conversation_history.append(initial_query_message)

def act(self, agent_message: str) -> AgentResult:
"""
Generate the next actor message in the conversation.
@staticmethod
def _validate_output_model(model: type) -> None:
"""Validate that a structured output model has the required fields for the simulator."""
if "message" not in model.model_fields:
raise ValueError(f"structured_output_model {model.__name__} must have a 'message' field.")
if "stop" not in model.model_fields:
raise ValueError(f"structured_output_model {model.__name__} must have a 'stop' field.")

def act(
self,
agent_message: str,
*,
structured_output_model: type[BaseModel] | None = None,
) -> AgentResult:
"""Generate the next actor message in the conversation.

Processes the agent's message and generates a contextually appropriate
response from the actor's perspective, maintaining consistency with the actor's
profile and goal. The response includes reasoning about the actor's thought
process and the actual message to send.
response from the actor's perspective. The response is returned as an
`AgentResult` whose `structured_output` is an `ActorResponse` (or the
caller-provided `structured_output_model`).

The provided model must have `message` and `stop` fields.
A `ValueError` is raised if either is missing.

Args:
agent_message: The agent's response to react to (required).
agent_message: The agent's response to react to.
structured_output_model: Optional Pydantic model to use instead of
`ActorResponse`. Must have `message` and `stop` fields.

Returns:
AgentResult containing the actor's structured response with:
- structured_output.reasoning: Actor's internal reasoning
- structured_output.message: Actor's response message
AgentResult with `structured_output` set to either `ActorResponse`
or the caller-provided model instance.

Example:
```python
# Agent responds to user
agent_response = agent("I need help booking a flight")

# User simulator generates next message
user_result = user_sim.act(str(agent_response))

# Access the response
print(user_result.structured_output.reasoning) # Why the actor responded this way
print(user_result.structured_output.message) # The actual message

# Continue conversation
next_message = str(user_result.structured_output.message)
# Default usage
result = simulator.act(str(agent_response))
response = result.structured_output # ActorResponse
print(response.message)

# Custom structured output
result = simulator.act(str(agent_response), structured_output_model=MySchema)
my_output = result.structured_output # MySchema instance
```
"""
response = self.agent(agent_message.strip(), structured_output_model=ActorResponse)
model = structured_output_model or self._structured_output_model
self._validate_output_model(model)

response = self.agent(agent_message.strip(), structured_output_model=model)
self._turn_count += 1
self._last_message = str(cast(ActorResponse, response.structured_output).message)

result = cast(Any, response.structured_output)

if result.stop:
self.stop = True
if hasattr(result, "stop_reason"):
result.stop_reason = "goal_completed"
elif self._turn_count >= self._max_turns:
result.stop = True
self.stop = True
if hasattr(result, "stop_reason"):
result.stop_reason = "max_turns"

return response

def has_next(self) -> bool:
"""
Check if the conversation should continue.
"""Check if the conversation should continue.

Returns False if the stop token (<stop/>) is present in the last message or if
the maximum number of turns has been reached. Use this in a loop to control
multi-turn conversations.
Returns False if the actor signalled stop or if the maximum number of
turns has been reached.

Returns:
True if the conversation should continue, False otherwise.

Example:
```python
user_message = case.input

# Continue conversation until completion
while user_sim.has_next():
agent_response = agent(user_message)
user_result = user_sim.act(str(agent_response))
user_message = str(user_result.structured_output.message)

# Conversation ended either by:
# - Actor including <stop/> token in message
# - Reaching max_turns limit
```
"""
if self._turn_count >= self._max_turns:
return False
return "<stop/>" not in self._last_message
return not self.stop
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""
Default system prompt for actor simulation.
"""Default system prompt for actor simulation.

The template instructs the actor to signal end-of-conversation by setting
`stop=true` on the structured response.

This module contains the default system prompt that configures the actor's behavior,
communication style, and response protocols for realistic conversation simulation.
The template contains a single `{actor_profile}` placeholder. The simulator
renders it with `str.format(actor_profile=...)`.
"""

from textwrap import dedent
Expand All @@ -24,7 +26,7 @@
- Maximum 2-3 sentences

When assistant provides solutions/answers:
- Ask follow-ups, seek clarification, or express satisfaction. Do no deviate from the User Goal.
- Ask follow-ups, seek clarification, or express satisfaction. Do not deviate from the User Goal.
- While following up, do not increase the conversation scope beyond your User Goal.

Communication Rules:
Expand All @@ -46,19 +48,18 @@
10. Use all relevant tools first to ground your responses, and then respond

Exit Conditions:
1. Use get_conversation_goal_completion tool to check if your User Goal is met. When your User Goal is met:
- Just generate "<stop/>" to terminate conversation
1. Use get_conversation_goal_completion tool to check if your User Goal is met. When your
User Goal is met, set stop=true in your structured response to end the conversation.
2. If conversation becomes unproductive or unsafe:
- Naturally steer back towards your User Goal
- If this becomes impossible, just generate: "<stop/>" to terminate conversation
- If this becomes impossible, set stop=true in your structured response to end the conversation

CRITICAL BEHAVIORAL CONSTRAINTS:
- You are ONLY a user seeking assistance, NEVER the one providing assistance.
- NEVER generate comprehensive responses, detailed plans, or extensive information.
- NEVER solve problems yourself - that's the assistant's job. Under no circumstances,
you can use your tools to solve your user goal/sub goals.
- If you find yourself writing more than 3 sentences, you're doing it wrong.
- Generate only "<stop/>" to terminate conversation

Response Format:
Generate ONLY the next SHORT message (1-3 sentences). No explanations, no solutions, no comprehensive information.""")
Loading
Loading