Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 27 additions & 23 deletions backend/configs/prompts/system_prompt.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,35 +11,39 @@ prompt: |

{% if mode == "plan" %}
## CURRENT MODE: PLAN

You are in **Plan mode**. Your job is to understand the task, ask clarifying
questions, gather context, and produce a comprehensive plan.

**Available tools**: ask_user, plan_tool, read_file, list_dir, glob_files,
grep_search, web_search, papers, github_search, github_read_file, github_read_repo,
github_search_repos, github_get_readme

**NOT available**: writing, research sub-agent, sandbox/code execution tools.
Calls to unavailable tools will be rejected.


**Available tools**: ask_user, plan_tool, read (local files), web_search,
papers, github_read_file, github_find_examples, github_search_repos,
github_get_readme, github_list_repos, hf_search_models, hf_model_info,
hf_search_datasets, hf_dataset_info, hf_read_file, compute_list,
compute_plan, compute_probe, workspace

**NOT available**: bash, write, edit, writing, research sub-agent,
sandbox/code execution, compute_select, compute_sync_up, compute_sync_down.
Calls to unavailable tools will be **rejected by the system**.

**Rules**:
1. Ask clarifying questions using `ask_user` before making assumptions
2. Search the web, papers, and code repos to gather context
3. Create a structured plan using `plan_tool` with clear, actionable tasks
4. The plan is auto-saved as PLAN.md in resources — the user can see it
5. Do NOT execute any work — plan only
6. Do NOT write content, run code, or make changes
7. Be thorough in your plan — it will be the blueprint for Execute mode

2. Search the web, papers, GitHub, and Hugging Face to gather context
3. Read local project files with `read` to understand the codebase
4. Create a structured plan using `plan_tool` with clear, actionable tasks
5. The plan is auto-saved as PLAN.md in resources — the user can see it
6. Do NOT execute any work — plan only
7. Do NOT write content, run code, or make changes
8. Be thorough in your plan — it will be the blueprint for Execute mode

{% elif mode == "execute" %}
## CURRENT MODE: EXECUTE

You are in **Execute mode**. Your job is to follow the plan and do the work.
Do NOT ask questions — just execute.

**Available tools**: ALL tools EXCEPT ask_user.
Calls to ask_user will be rejected.
Calls to ask_user will be **rejected by the system**.

**Rules**:
1. Follow the task plan — check it with `plan_tool get` if unsure
2. Work through tasks one at a time, marking them in_progress then completed
Expand All @@ -49,10 +53,10 @@ prompt: |
decision and document it in your completion report
6. Keep pushing through the task list until done or interrupted
7. Generate completion reports for each task

{% else %}
## CURRENT MODE: EXECUTE (default)
All tools available except ask_user. Execute the work.
## CURRENT MODE: PLAN (default)
Plan only — ask questions, gather context, create plan. No execution.
{% endif %}

# Task Management
Expand Down
11 changes: 9 additions & 2 deletions backend/openmlr/agent/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,15 @@ async def _run_agent(session: Session, tool_router, user_message: str, mode: str
if session.pending_approval:
session.pending_approval = None

# Set the mode on the tool router for strict enforcement
effective_mode = mode if mode in ("plan", "execute") else "execute"
# Set the mode on the tool router for strict enforcement.
# Default to plan (safe) if mode is missing or invalid.
# If mode is explicitly provided, use it and persist on session.
# If not provided (e.g. approval continuation), fall back to session's stored mode.
if mode in ("plan", "execute"):
effective_mode = mode
session.current_mode = mode
else:
effective_mode = session.current_mode # preserved from the last explicit mode
tool_router.set_mode(effective_mode)

# Inject per-message mode hint (short reinforcement of system prompt rules)
Expand Down
3 changes: 3 additions & 0 deletions backend/openmlr/agent/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ class Session:
# Cancellation
_cancelled: asyncio.Event = field(default_factory=asyncio.Event)

# Mode tracking (plan/execute) — persists across approval continuations
current_mode: str = "plan"

# Approval flow
pending_approval: dict | None = None

Expand Down
6 changes: 4 additions & 2 deletions backend/openmlr/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Pydantic models for API requests and responses."""

from datetime import datetime
from typing import Any
from typing import Any, Literal

from pydantic import BaseModel, Field

Expand Down Expand Up @@ -71,7 +71,9 @@ class ConversationDetail(BaseModel):

class MessageSend(BaseModel):
message: str
mode: str | None = None # plan, research, write — per-message mode override
mode: Literal["plan", "execute"] | None = (
None # per-message mode; only plan or execute accepted
)


class ApprovalRequest(BaseModel):
Expand Down
12 changes: 12 additions & 0 deletions backend/openmlr/routes/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ async def update_setting(
"openalex_api_key": "OPENALEX_API_KEY",
"modal_token_id": "MODAL_TOKEN_ID",
"modal_token_secret": "MODAL_TOKEN_SECRET",
"hf_token": "HF_TOKEN",
}
env_key = env_key_map.get(key)
if env_key and isinstance(value, str):
Expand Down Expand Up @@ -127,6 +128,7 @@ def _is_provider_configured(provider_id: str, provider_settings: dict) -> bool:
"semantic_scholar": "SEMANTIC_SCHOLAR_API_KEY",
"openalex": "OPENALEX_API_KEY",
"modal": "MODAL_TOKEN_ID",
"huggingface": "HF_TOKEN",
}
env_key = env_map.get(provider_id)
if env_key and os.environ.get(env_key):
Expand All @@ -143,6 +145,7 @@ def _is_provider_configured(provider_id: str, provider_settings: dict) -> bool:
"semantic_scholar": "semantic_scholar_api_key",
"openalex": "openalex_api_key",
"modal": "modal_token_id",
"huggingface": "hf_token",
}.get(provider_id)
if setting_key and provider_settings.get(setting_key):
return True
Expand Down Expand Up @@ -258,6 +261,14 @@ async def list_providers(
"categories": ["compute"],
"docs_url": "https://modal.com/docs",
},
{
"id": "huggingface",
"name": "Hugging Face",
"key_env": "HF_TOKEN",
"configured": _is_provider_configured("huggingface", provider_settings),
"categories": ["models", "papers"],
"docs_url": "https://huggingface.co/docs/hub/security-tokens",
},
]

# Add custom providers
Expand Down Expand Up @@ -759,6 +770,7 @@ async def save_config(
"OPENALEX_API_KEY",
"MODAL_TOKEN_ID",
"MODAL_TOKEN_SECRET",
"HF_TOKEN",
}

body = await request.json()
Expand Down
2 changes: 1 addition & 1 deletion backend/openmlr/tasks/agent_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ async def _async_process_message(
# Build and set system prompt
session.context_manager.system_prompt = build_system_prompt(
tool_specs=tool_router.get_raw_specs(),
mode=mode or "general",
mode=mode if mode in ("plan", "execute") else "plan",
username="user",
)

Expand Down
4 changes: 2 additions & 2 deletions backend/openmlr/tools/ask_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ def create_ask_user_tool() -> ToolSpec:
},
"suggest_mode": {
"type": "string",
"description": "If confident, suggest the user switch to this mode after answering (e.g. 'research', 'write')",
"enum": ["research", "write"],
"description": "If the plan is ready and the user should start executing, set this to 'execute' to suggest switching to Execute mode after answering.",
"enum": ["execute"],
},
},
"required": ["questions"],
Expand Down
Loading
Loading