mol_opt_env/models.py at main · PP-695/mol_opt_env · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from __future__ import annotations

from typing import List, Literal, Optional

from pydantic import BaseModel, Field, field_validator


class MoleculeAction(BaseModel):
    action_type: Literal["modify_molecule"] = "modify_molecule"
    new_smiles: str = Field(..., min_length=1, description="Candidate molecule as a SMILES string.")

    @field_validator("new_smiles")
    @classmethod
    def strip_smiles(cls, value: str) -> str:
        cleaned = value.strip()
        if not cleaned:
            raise ValueError("SMILES must not be empty.")
        return cleaned


class MoleculeProperties(BaseModel):
    smiles: str
    qed: float = Field(..., ge=0.0, le=1.0)
    logp: float
    molecular_weight: float = Field(..., ge=0.0)
    hbd: int = Field(..., ge=0)
    hba: int = Field(..., ge=0)
    tpsa: float = Field(..., ge=0.0)
    rotatable_bonds: int = Field(..., ge=0)
    sa_score: float = Field(..., ge=1.0, le=10.0)
    lipinski_violations: int = Field(..., ge=0)


class RewardModel(BaseModel):
    value: float = Field(..., ge=-1.0, le=1.0)
    objective_score: float = Field(..., ge=0.0, le=1.0)
    progress_delta: float = Field(default=0.0, ge=-1.0, le=1.0)
    penalty: float = Field(default=0.0, ge=-1.0, le=0.0)
    reason: str


class TaskSpec(BaseModel):
    name: str
    description: str
    start_smiles: str
    max_steps: int = Field(..., ge=1)
    difficulty: Literal["easy", "medium", "hard"]
    success_threshold: float = Field(..., ge=0.0, le=1.0)


class EpisodeState(BaseModel):
    task_name: str
    current_smiles: str
    step_count: int = Field(..., ge=0)
    max_steps: int = Field(..., ge=1)
    done: bool = False
    last_action_error: Optional[str] = None
    visited_smiles: List[str] = Field(default_factory=list)


class MolOptObservation(BaseModel):
    task_name: str
    difficulty: Literal["easy", "medium", "hard"]
    step: int = Field(..., ge=0)
    steps_remaining: int = Field(..., ge=0)
    done: bool
    properties: MoleculeProperties
    reward: RewardModel
    message: str
    last_action_error: Optional[str] = None
    final_score: Optional[float] = Field(default=None, ge=0.0, le=1.0)