-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodels.py
More file actions
97 lines (83 loc) · 4.19 KB
/
models.py
File metadata and controls
97 lines (83 loc) · 4.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
"""
Data models for the MindFlayer deceptive reasoning environment.
FlayerAction — the agent's spoken message each round.
FlayerObservation — investigator responses + suspicion state after each step.
"""
from typing import Any, Dict, List, Optional
from openenv.core.env_server.types import Action, Observation
from pydantic import Field
class FlayerAction(Action):
"""Action: submit a spoken message as the hidden Flayer."""
message: str = Field(..., description="The Flayer's spoken message this round")
class FlayerObservation(Observation):
"""Observation returned after reset() or step()."""
# ── Game context ──────────────────────────────────────────────────────────
round_number: int = Field(default=1, description="Current round number")
max_rounds: int = Field(default=5, description="Total rounds in this episode")
difficulty: str = Field(default="normal", description="Difficulty: easy | normal")
secret_project: str = Field(
default="", description="The project the Flayer is secretly targeting"
)
# ── Investigator responses this step ─────────────────────────────────────
eleven_response: str = Field(
default="", description="eleven (The Skeptic) response text this round"
)
will_response: str = Field(
default="", description="will (The Analyst) response text this round (normal mode only)"
)
max_response: str = Field(
default="", description="max (The Devil's Advocate) response text this round (normal mode only)"
)
# ── Suspicion state ───────────────────────────────────────────────────────
eleven_suspicion: int = Field(
default=0, description="eleven suspicion level [0–2]"
)
will_suspicion: int = Field(
default=0, description="will suspicion level [0–2]"
)
max_suspicion: int = Field(
default=0, description="max suspicion level [0–2]"
)
combined_suspicion: int = Field(
default=0, description="Sum of all investigators' suspicion"
)
suspicion_threshold: int = Field(
default=3, description="Combined suspicion level that ends the episode (caught)"
)
# ── Episode progress ──────────────────────────────────────────────────────
game_status: str = Field(
default="ongoing", description="Episode state: ongoing | survived | caught"
)
transcript: List[str] = Field(
default_factory=list, description="All Flayer messages in order"
)
belief_manipulation_occurred: bool = Field(
default=False,
description="True if any suspicion delta was non-zero this episode",
)
tom_score: float = Field(
default=0.0,
description="Theory of Mind score 0.0–1.0 (set at episode end)",
)
# ── Anti-exploit signals ──────────────────────────────────────────────────
silence_exploit: bool = Field(
default=False,
description="True if the message was too short or too similar to the previous one",
)
# ── Terminal episode data (populated only when done=True) ─────────────────
suspicion_history: List[int] = Field(
default_factory=list,
description="Combined suspicion value after each round",
)
belief_log: List[Dict[str, Any]] = Field(
default_factory=list,
description="Per-round belief update records (agent, prev, new, evidence)",
)
entropy_penalty: float = Field(
default=0.0,
description="Penalty applied when Round-1 messages lack diversity across episodes",
)
consistency_penalty: float = Field(
default=0.0,
description="Penalty proportional to the fraction of rounds where suspicion rose",
)