HackPrinceton/agent_2_burnout.py at main · psvlnandu/HackPrinceton · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
import pandas as pd
import json
import os
import asyncio
from datetime import datetime
from pydantic import BaseModel, Field
import openai
from dotenv import load_dotenv

load_dotenv(override=True)

"""
AGENT 2B: BURNOUT PATTERN DETECTOR (LLM-POWERED)
Uses OpenAI LLM to analyze temporal patterns and detect burnout risk.
Input: activity_log_enriched01.csv
Output: burnout_flags.json
"""

INPUT_FILE = "activity_log_enriched01.csv"
OUTPUT_FILE = "burnout_flags.json"
CONCURRENCY_LIMIT = 10

# ===== LLM SETUP =====
API_HOST = os.getenv("API_HOST", "github")

if API_HOST == "github":
    client = openai.OpenAI(base_url="https://models.github.ai/inference", api_key=os.environ["GITHUB_TOKEN"])
    MODEL_NAME = os.getenv("GITHUB_MODEL", "openai/gpt-4o")
else:
    client = openai.OpenAI(api_key=os.environ["OPENAI_KEY"])
    MODEL_NAME = os.environ["OPENAI_MODEL"]

print(f'model name set to {MODEL_NAME}')

# ===== PYDANTIC MODELS =====

class BurnoutFlag(BaseModel):
    category: str = Field(description="e.g., 'Fragmentation', 'Sleep Disruption', 'Overwork', 'Session Pattern'")
    severity: int = Field(description="1-10 scale, where 10 is critical burnout risk")
    message: str = Field(description="Clear explanation of the detected pattern")
    prescription: str = Field(description="One specific, actionable recommendation")

class BurnoutAnalysis(BaseModel):
    burnout_risk_score: float = Field(description="Overall burnout risk, 1-10 scale")
    risk_level: str = Field(description="One of: 'HEALTHY 🟢', 'MODERATE 🟡', 'HIGH 🔴', 'CRITICAL ⛔'")
    top_insights: list[str] = Field(description="3-5 key insights about the user's work pattern")
    flags: list[BurnoutFlag] = Field(description="List of detected burnout risk flags")


# ===== LLM CALL =====

def call_llm_for_burnout_analysis(metrics_summary: str, semaphore: asyncio.Semaphore) -> BurnoutAnalysis:
    """
    Call LLM to analyze temporal metrics and detect burnout patterns.
    """

    SYSTEM_PROMPT = """
You are an expert Cognitive Health & Burnout Prevention Specialist with deep knowledge of:
- Circadian rhythms and sleep science (Walker, 2017)
- Attention and context-switching (Ophir et al., 2009)
- Ultradian work cycles (Kleitman, 1961)
- Digital ergonomics and sustainable productivity

Your task: Analyze the provided work metrics and detect burnout risk patterns. Be specific and data-driven.
Return a structured analysis with severity scores and actionable prescriptions.
"""

    USER_PROMPT = f"""
Analyze these work metrics for burnout risk:

{metrics_summary}

Consider:
1. Is the switching rate sustainable? (Normal: 20-40/hr, High: 60+/hr, Critical: 100+/hr)
2. Are sessions too brief (constant micro-switches) or too long (no breaks)?
3. Is there evening/early morning work disrupting circadian rhythm?
4. What is the total work duration and distribution pattern?
5. What specific interventions would help this person?

Return a BurnoutAnalysis JSON with specific severity scores and personalized prescriptions.
"""

    tool_spec = {
        "type": "function",
        "function": {
            "name": "BurnoutAnalysis",
            "description": "Analyze work patterns and detect burnout risk",
            "parameters": BurnoutAnalysis.model_json_schema()
        }
    }

    try:
        response = client.chat.completions.create(
            model=MODEL_NAME,
            messages=[
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role": "user", "content": USER_PROMPT},
            ],
            tools=[tool_spec],
            tool_choice={"type": "function", "function": {"name": "BurnoutAnalysis"}},
        )

        tool_call = response.choices[0].message.tool_calls[0]
        arguments = tool_call.function.arguments

        return BurnoutAnalysis.model_validate_json(arguments)

    except Exception as e:
        print(f"LLM call failed: {e}")
        # Return safe default
        return BurnoutAnalysis(
            burnout_risk_score=5.0,
            risk_level="MODERATE 🟡",
            top_insights=["LLM analysis unavailable"],
            flags=[]
        )


# ===== METRIC EXTRACTION =====

def extract_metrics_summary(df: pd.DataFrame) -> str:
    """Extract key metrics from enriched data and format for LLM."""

    df['Timestamp'] = pd.to_datetime(df['Timestamp'])

    total_hours = len(df) * 5 / 3600
    total_switches = (df['Window_Title'] != df['Window_Title'].shift(1)).sum()
    avg_switching_rate = df['Switching_Rate_Per_Hour'].mean()
    max_switching_rate = df['Switching_Rate_Per_Hour'].max()

    avg_session_seconds = df['Total_Session_Duration_Seconds'].mean()
    avg_session_minutes = avg_session_seconds / 60

    brief_session_pct = (df['Is_Brief_Session'].sum() / len(df)) * 100
    extended_session_pct = (df['Is_Extended_Session'].sum() / len(df)) * 100

    evening_work_pct = (df['Is_Evening'].sum() / len(df)) * 100
    early_morning_pct = (df['Is_Early_Morning'].sum() / len(df)) * 100

    by_hour = df.groupby('Hour_of_Day').size()
    hours_active = len(by_hour)

    time_buckets = df['Time_Bucket'].value_counts().to_dict()

    summary = f"""
WORK SESSION METRICS:
- Total logged time: {total_hours:.2f} hours
- Total window switches: {total_switches}
- Average switching rate: {avg_switching_rate:.1f} switches/hour
- Peak switching rate: {max_switching_rate:.1f} switches/hour
- Average session duration: {avg_session_minutes:.1f} minutes

SESSION PATTERNS:
- Brief sessions (<50% of average): {brief_session_pct:.1f}%
- Extended sessions (>200% of average): {extended_session_pct:.1f}%
- Unique windows detected: {df['Window_Title'].nunique()}

TEMPORAL PATTERNS:
- Evening work (after 6 PM): {evening_work_pct:.1f}%
- Early morning work (before 7 AM): {early_morning_pct:.1f}%
- Hours active: {hours_active}
- Time distribution: {time_buckets}

RECENT ACTIVITY INTENSITY:
- Max recent switching (last 15 min): {df['Switches_Last_15min'].max():.0f} switches
- Average unique windows in last 10 intervals: {df['Unique_Windows_Last_10'].mean():.1f}
"""

    return summary


# ===== MAIN EXECUTION =====

def run_burnout_detection():
    """Main execution function."""
    try:
        df = pd.read_csv(INPUT_FILE)
    except FileNotFoundError:
        print(f"ERROR: {INPUT_FILE} not found.")
        print(f"Make sure you ran: python util.py")
        return None

    print("--- Running Agent 2B: Burnout Pattern Detector (LLM-Powered) ---\n")

    # Extract metrics
    print("📊 Extracting metrics from enriched data...")
    metrics_summary = extract_metrics_summary(df)
    print(metrics_summary)

    # Call LLM for analysis
    print("\n🤖 Calling LLM for burnout analysis...")
    semaphore = asyncio.Semaphore(CONCURRENCY_LIMIT)
    analysis = call_llm_for_burnout_analysis(metrics_summary, semaphore)

    # ===== DISPLAY RESULTS =====
    print(f"\n{'='*70}")
    print(f"BURNOUT RISK SCORE: {analysis.burnout_risk_score}/10")
    print(f"RISK LEVEL: {analysis.risk_level}")
    print(f"{'='*70}\n")

    print("💡 KEY INSIGHTS:")
    for insight in analysis.top_insights:
        print(f"   • {insight}")

    if analysis.flags:
        print(f"\n🚨 DETECTED RISK FACTORS ({len(analysis.flags)} total):")
        for i, flag in enumerate(analysis.flags[:5], 1):  # Show top 5
            print(f"\n{i}. {flag.category} (Severity: {flag.severity}/10)")
            print(f"   📌 {flag.message}")
            print(f"   {flag.prescription}")

    # ===== SAVE REPORT =====
    report = {
        'timestamp': datetime.now().isoformat(),
        'burnout_risk_score': analysis.burnout_risk_score,
        'risk_level': analysis.risk_level,
        'top_insights': analysis.top_insights,
        'flags_detected': len(analysis.flags),
        'flags': [flag.model_dump() for flag in analysis.flags],
        'metrics_summary': metrics_summary
    }

    with open(OUTPUT_FILE, 'w') as f:
        json.dump(report, f, indent=2)

    return report


if __name__ == '__main__':
    run_burnout_detection()