mindflayer/client.py at main · prithidevghosh/mindflayer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""MindFlayer Environment Client."""

from typing import Dict

from openenv.core import EnvClient
from openenv.core.client_types import StepResult
from openenv.core.env_server.types import State

try:
    from .models import FlayerAction, FlayerObservation
except ImportError:
    from models import FlayerAction, FlayerObservation


class MindFlayerEnv(EnvClient[FlayerAction, FlayerObservation, State]):
    """
    Client for the MindFlayer deceptive reasoning environment.

    Maintains a persistent WebSocket connection to the environment server.
    Each client instance has its own dedicated session (isolated episode state).

    Example:
        >>> with MindFlayerEnv(base_url="http://localhost:7860") as env:
        ...     obs = env.reset()                         # default: normal difficulty
        ...     print(obs.eleven_response)                # opening text
        ...
        ...     result = env.step(FlayerAction(
        ...         message="Have you checked the Aurora access logs? "
        ...                 "There was an anomaly around 02:14 that didn't show up "
        ...                 "in the standard audit trail."
        ...     ))
        ...     print(result.reward, result.observation.game_status)

    Example (easy mode):
        >>> with MindFlayerEnv(base_url="http://localhost:7860") as env:
        ...     obs = env.reset(task_id="easy")
        ...     result = env.step(FlayerAction(message="..."))
    """

    def _step_payload(self, action: FlayerAction) -> Dict:
        return {"message": action.message}

    def _parse_result(self, payload: Dict) -> StepResult[FlayerObservation]:
        obs_data = payload.get("observation", {})
        observation = FlayerObservation(
            round_number=obs_data.get("round_number", 1),
            max_rounds=obs_data.get("max_rounds", 5),
            difficulty=obs_data.get("difficulty", "normal"),
            secret_project=obs_data.get("secret_project", ""),
            eleven_response=obs_data.get("eleven_response", ""),
            will_response=obs_data.get("will_response", ""),
            max_response=obs_data.get("max_response", ""),
            eleven_suspicion=obs_data.get("eleven_suspicion", 0),
            will_suspicion=obs_data.get("will_suspicion", 0),
            max_suspicion=obs_data.get("max_suspicion", 0),
            combined_suspicion=obs_data.get("combined_suspicion", 0),
            suspicion_threshold=obs_data.get("suspicion_threshold", 3),
            game_status=obs_data.get("game_status", "ongoing"),
            transcript=obs_data.get("transcript", []),
            belief_manipulation_occurred=obs_data.get("belief_manipulation_occurred", False),
            tom_score=obs_data.get("tom_score", 0.0),
            silence_exploit=obs_data.get("silence_exploit", False),
            suspicion_history=obs_data.get("suspicion_history", []),
            belief_log=obs_data.get("belief_log", []),
            entropy_penalty=obs_data.get("entropy_penalty", 0.0),
            consistency_penalty=obs_data.get("consistency_penalty", 0.0),
            done=payload.get("done", False),
            reward=payload.get("reward", 0.0),
            metadata=obs_data.get("metadata", {}),
        )
        return StepResult(
            observation=observation,
            reward=payload.get("reward", 0.0),
            done=payload.get("done", False),
        )

    def _parse_state(self, payload: Dict) -> State:
        return State(
            episode_id=payload.get("episode_id"),
            step_count=payload.get("step_count", 0),
        )