Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions eval/agent-eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ import {
} from "../src/memory/magic-docs.ts";
import { SessionStore } from "../src/sessions/store.ts";
import { CronStore } from "../src/cron/store.ts";
import { curateConsumerTasks } from "../src/cron/task-view.ts";
import {
createDraft,
getDraft,
Expand Down Expand Up @@ -1086,6 +1087,64 @@ async function runCron(): Promise<void> {
if (!KEEP) await store.deleteJob(jobId); // cron_runs cascade via FK
}

async function runTasks(): Promise<void> {
// scheduled-tasks: the consumer Tasks surface (curateConsumerTasks, served behind
// BOTH NomosAgent.ListTasks and MobileApi.ListTasks). A reminder the assistant
// scheduled via schedule_task (source='agent') must surface; the instance's
// always-on system/bundled infra loops -- which collapse onto the owner's user_id
// in power-user mode -- must be filtered OUT by curateConsumerTasks' source guard.
// Deterministic (no LLM): the live agent->schedule_task->Tasks path is covered by
// the iOS XCUITest; here we guard the durable effect + the curation/filter.
const store = new CronStore();
const owner = "eval-tasks-a";
// cron_jobs.name is globally UNIQUE (idx_cron_name), so namespace both rows to avoid
// colliding with seeded loops in nomos_eval.
const reminderId = await store.createJob({
userId: owner,
name: "eval-tasks-call-dentist",
schedule: "2026-06-18T09:00:00",
scheduleType: "at",
sessionTarget: "isolated",
deliveryMode: "none",
prompt: "Remind the user to call the dentist",
enabled: true,
errorCount: 0,
source: "agent", // what schedule_task stamps for a user-owned task
});
const infraId = await store.createJob({
userId: owner, // same user_id as the reminder (power-user collapses system onto the owner)
name: "eval-tasks-infra-loop",
schedule: "6h",
scheduleType: "every",
sessionTarget: "isolated",
deliveryMode: "none",
prompt: "consolidate",
enabled: true,
errorCount: 0,
source: "system", // an infra loop -- must NOT appear on Tasks
});

const tasks = curateConsumerTasks(await store.listJobs({ userId: owner }));
check(
"[tasks] a schedule_task reminder (source=agent) surfaces on the consumer Tasks view",
tasks.some((t) => t.id === reminderId && t.source === "agent" && t.scheduleType === "at"),
`tasks=${tasks.map((t) => `${t.name}:${t.source}`).join(", ")}`,
);
check(
"[tasks] system/bundled infra loops are filtered out of Tasks (curateConsumerTasks source guard)",
!tasks.some((t) => t.id === infraId) &&
tasks.every((t) => t.source !== "system" && t.source !== "bundled"),
);

// Under --audit (KEEP) the source=agent reminder is intentionally left in nomos_eval
// so the scheduled-tasks effect SQL (source IN ('agent','user')) is exercised, not
// just declared. A plain run cleans both up.
if (!KEEP) {
await store.deleteJob(reminderId);
await store.deleteJob(infraId);
}
}

async function runDrafts(): Promise<void> {
// draft_messages: consent-aware outgoing drafts. Per-user at the LIST layer;
// status state machine is pending -> approved -> sent, or pending -> rejected.
Expand Down Expand Up @@ -3207,6 +3266,7 @@ async function runEval(): Promise<void> {
await runCommitments();
await runSessionResume();
await runCron();
await runTasks();
await runDrafts();
await runAutoLinkerGuard();
await runRelationshipStats();
Expand Down
50 changes: 44 additions & 6 deletions eval/feature-manifest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -819,15 +819,19 @@ export const FEATURES: FeatureSpec[] = [
{
id: "multi-agent-teams",
summary:
"Coordinator/worker orchestration via the /team prefix; parallel workers, synthesized result.",
"Coordinator/worker orchestration: a coordinator decomposes a task into parallel workers and synthesizes one result. Triggered EITHER by the `/team` prefix (fast path) OR by the in-loop `delegate_to_team` tool (buildTeamMcpServer) the agent calls when the user asks in natural language ('research X from three angles', 'spin up a team') — both work in hosted + power-user modes (both converge on AgentRuntime.runAgent). Gated on teamMode. Workers receive only the BASE mcp set (no nomos-team), so they can never recurse into delegation.",
trigger: { kind: "turn", gate: "teamMode" },
entry: ["stripTeamPrefix", "TeamRuntime"],
entry: ["stripTeamPrefix", "TeamRuntime", "buildTeamMcpServer"],
effects: [
{
claim: "spawns parallel workers + synthesizes (transient, no durable DB state)",
notExercised: true,
},
],
invariants: [
"invokable without the /team prefix via delegate_to_team, in both hosted + power-user modes",
"workers get only the base mcp set, so a worker can never spawn a nested team",
],
},

// ── Self-improvement (the learning loop) ──
Expand Down Expand Up @@ -983,25 +987,59 @@ export const FEATURES: FeatureSpec[] = [
{
id: "scheduled-tasks",
summary:
"Consumer Tasks surface (MobileApi.ListTasks/UpdateTask/DeleteTask). ListTasks returns the user's own scheduled cron_jobs (one-off 'at' reminders + recurring jobs created via schedule_task/loop_create), owner-scoped by user_id so the instance's system-owned background loops never appear. UpdateTask reschedules/renames/edits the instruction/enables; DeleteTask removes one. Both assert ownership before mutating.",
"Consumer Tasks surface, served by BOTH NomosAgent.ListTasks/UpdateTask/DeleteTask (grpc-server, local power-user) and MobileApi.ListTasks/UpdateTask/DeleteTask (hosted, auth-gated); GetToday reuses curateConsumerTasks for its task strip. A 'task' is any cron_jobs row the user/assistant scheduled (one-off 'at' reminders + recurring 'every'/'cron' jobs created via schedule_task/loop_create). curateConsumerTasks(jobs) filters out INFRA_SOURCES (source in {system,bundled}) so the instance's always-on system loops + bundled templates never appear on Tasks even in power-user mode, where systemTenant() collapses onto the owner so they share the owner's user_id; sorts enabled-first then alphabetical. toConsumerTask shapes each row + prettifies the schedule. UpdateTask reschedules/renames/edits the instruction/enables; DeleteTask removes one; both assert ownership before mutating.",
trigger: { kind: "turn" },
entry: ["curateConsumerTasks", "toConsumerTask"],
effects: [
{
claim: "the user's scheduled tasks are stored as owner-scoped cron_jobs",
// Exercised by runTasks: a schedule_task-style reminder (source='agent')
// is created + survives under --audit (KEEP), so the count is nonzero.
claim:
"user/assistant-scheduled tasks are stored as owner-scoped cron_jobs (source='agent')",
sql: {
query: "SELECT count(*) FROM cron_jobs WHERE source IN ('agent','user')",
expect: "nonzero",
},
notExercised: true,
},
{
// The complement: infra loops genuinely EXIST in cron_jobs (so "Tasks hides
// them" is a non-vacuous claim); curateConsumerTasks filters them out of the
// view (asserted by runTasks' check() + the task-view unit test).
claim:
"infra loops (system/bundled) exist as cron_jobs but are filtered out of the Tasks view",
sql: {
query: "SELECT count(*) FROM cron_jobs WHERE source IN ('system','bundled')",
expect: "nonzero",
},
},
],
invariants: [
"Tasks are owner-scoped (user_id); managed/system loops never appear on this surface",
"Tasks are owner-scoped (user_id); system/bundled infra loops are filtered out by INFRA_SOURCES and never appear on this surface, even when they share the owner's user_id in power-user mode",
"served by both NomosAgent (local) and MobileApi (hosted) ListTasks/UpdateTask/DeleteTask off the same curateConsumerTasks view",
"UpdateTask/DeleteTask assert job.userId === the resolved owner before mutating",
"schedule_task stamps source='agent' (a user-owned task, not infra)",
],
},
{
id: "ask-user-elicitation",
summary:
"MCP-native ask_user round-trip: an in-process tool raises an elicitation/create request; the SDK relays it to AgentRuntime's onElicitation callback (handleElicitation), which renders the question on the user's active channel. Slack gets Block Kit buttons, any text channel matches a numbered/label reply, and channel-less clients (mobile/terminal) get an 'ask' AgentEvent over the open stream via a per-source registered emitter (registerEmitter/unregisterEmitter), with the answer returned OUT-OF-BAND through the AnswerQuestion RPC (NomosAgent + MobileApi) -> resolveById. Answering out-of-band (not as a new chat turn) avoids deadlocking the per-session FIFO queue behind the suspended turn. A pending entry is keyed by elicitation id with a TTL auto-decline.",
trigger: { kind: "turn" },
entry: ["handleElicitation", "registerEmitter", "unregisterEmitter", "resolveById"],
effects: [
{
claim:
"ask_user renders the question on the active channel / over the open stream and resolves the agent's suspended promise from the out-of-band answer (in-memory pending map + transient 'ask' stream event; no durable table)",
notExercised: true,
},
],
invariants: [
"the elicitation manager is created at gateway boot and handed to both the runtime and the gRPC server (setElicitationManager)",
"AnswerQuestion is served by BOTH NomosAgent (grpc-server) and MobileApi (auth-gated) and both resolve via resolveById",
"mobile/terminal sources (no channel adapter) register a per-source emitter that pushes an 'ask' AgentEvent and is torn down when the turn ends",
"the answer arrives out-of-band (a dedicated RPC), never as a new chat message, so the suspended turn never deadlocks the session queue",
],
},
{
id: "brain-overview",
summary:
Expand Down
19 changes: 19 additions & 0 deletions proto/nomos.proto
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ service NomosAgent {
rpc SetLoopEnabled (SetLoopEnabledRequest) returns (LoopActionResponse);
rpc DeleteLoop (LoopDeleteRequest) returns (LoopActionResponse);

// Scheduled tasks (the local owner's cron_jobs) — mirrors MobileApi for local mode.
rpc ListTasks (Empty) returns (MTasksResponse);
rpc UpdateTask (MTaskUpdateRequest) returns (MAck);
rpc DeleteTask (MTaskDeleteRequest) returns (MAck);

// Answer a pending ask_user elicitation out-of-band (does not enqueue a turn).
rpc AnswerQuestion (MAnswerRequest) returns (MAck);

// Health check
rpc Ping (Empty) returns (PongResponse);
}
Expand Down Expand Up @@ -208,6 +216,9 @@ service MobileApi {
rpc UpdateTask (MTaskUpdateRequest) returns (MAck);
rpc DeleteTask (MTaskDeleteRequest) returns (MAck);

// Answer a pending ask_user elicitation out-of-band (does not enqueue a turn).
rpc AnswerQuestion (MAnswerRequest) returns (MAck);

// Brain tab (the user's knowledge graph + learned facts, for the feed + map)
rpc GetBrain (Empty) returns (MBrainResponse);

Expand Down Expand Up @@ -362,6 +373,14 @@ message MAck {
string message = 2;
}

// Answer to a pending ask_user elicitation, delivered out-of-band so it does not
// queue behind the suspended turn that is awaiting it.
message MAnswerRequest {
string session_key = 1;
string question_id = 2;
string answer = 3;
}

// Vault (long-term memory / knowledge base)
message MVaultListRequest {
string prefix = 1;
Expand Down
10 changes: 7 additions & 3 deletions src/cron/loop-view.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,16 @@ describe("prettifySchedule", () => {
expect(prettifySchedule("15m", "every")).toBe("Every 15 minutes");
});

it("renders daily cron expressions", () => {
it("renders daily + weekday/weekly/monthly cron expressions", () => {
expect(prettifySchedule("0 8 * * *", "cron")).toBe("Daily at 8:00 AM");
expect(prettifySchedule("30 17 * * *", "cron")).toBe("Daily at 5:30 PM");
expect(prettifySchedule("0 9 * * 1-5", "cron")).toBe("Weekdays at 9:00 AM");
expect(prettifySchedule("0 9 * * 1", "cron")).toBe("Weekly on Mon at 9:00 AM");
expect(prettifySchedule("0 8 15 * *", "cron")).toBe("Monthly on day 15 at 8:00 AM");
});

it("falls back to the raw string for unrecognized shapes", () => {
expect(prettifySchedule("0 9 * * 1", "cron")).toBe("0 9 * * 1");
it("falls back to the raw string for genuinely unrecognized shapes", () => {
expect(prettifySchedule("*/5 * * * *", "cron")).toBe("*/5 * * * *");
expect(prettifySchedule("0 9 1 1 *", "cron")).toBe("0 9 1 1 *"); // specific month, not modeled
});
});
34 changes: 34 additions & 0 deletions src/cron/schedule-format.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { describe, it, expect } from "vitest";
import { prettifySchedule } from "./schedule-format.ts";

describe("prettifySchedule", () => {
it("renders interval ('every') forms", () => {
expect(prettifySchedule("1h", "every")).toBe("Hourly");
expect(prettifySchedule("24h", "every")).toBe("Daily");
expect(prettifySchedule("15m", "every")).toBe("Every 15 minutes");
expect(prettifySchedule("2d", "every")).toBe("Every 2 days");
});

it("renders the friendly cron forms the consumer editor produces", () => {
expect(prettifySchedule("0 9 * * *", "cron")).toBe("Daily at 9:00 AM");
expect(prettifySchedule("30 17 * * 1-5", "cron")).toBe("Weekdays at 5:30 PM");
expect(prettifySchedule("0 18 * * 1", "cron")).toBe("Weekly on Mon at 6:00 PM");
expect(prettifySchedule("0 18 * * 1,3,5", "cron")).toBe("Weekly on Mon, Wed, Fri at 6:00 PM");
expect(prettifySchedule("0 8 15 * *", "cron")).toBe("Monthly on day 15 at 8:00 AM");
});

it("treats Sunday as 0 or 7", () => {
expect(prettifySchedule("0 9 * * 0", "cron")).toBe("Weekly on Sun at 9:00 AM");
expect(prettifySchedule("0 9 * * 7", "cron")).toBe("Weekly on Sun at 9:00 AM");
});

it("falls back to the raw expression for unmappable cron", () => {
expect(prettifySchedule("*/5 * * * *", "cron")).toBe("*/5 * * * *");
expect(prettifySchedule("0 9 1 1 *", "cron")).toBe("0 9 1 1 *"); // specific month, not handled
});

it("renders one-off 'at' times", () => {
expect(prettifySchedule("2026-06-18T09:00:00Z", "at").startsWith("Once,")).toBe(true);
expect(prettifySchedule("not-a-date", "at")).toBe("not-a-date");
});
});
18 changes: 15 additions & 3 deletions src/cron/schedule-format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
* and toggle/delete key off the id/name); this is display-only.
*/

const DOW = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];

export function prettifySchedule(schedule: string, scheduleType: string): string {
const s = schedule.trim();

Expand All @@ -28,9 +30,19 @@ export function prettifySchedule(schedule: string, scheduleType: string): string
if (scheduleType === "cron") {
const parts = s.split(/\s+/);
if (parts.length === 5) {
const [min, hour, dom, , dow] = parts;
if (/^\d+$/.test(min) && /^\d+$/.test(hour) && dom === "*" && dow === "*") {
return `Daily at ${formatClock(Number(hour), min)}`;
const [min, hour, dom, mon, dow] = parts;
if (/^\d+$/.test(min) && /^\d+$/.test(hour) && mon === "*") {
const clock = formatClock(Number(hour), min);
if (dom === "*" && dow === "*") return `Daily at ${clock}`;
if (dom === "*" && dow === "1-5") return `Weekdays at ${clock}`;
if (dom === "*" && /^[0-7](,[0-7])*$/.test(dow)) {
const names = dow
.split(",")
.map((d) => DOW[Number(d) === 7 ? 0 : Number(d)])
.join(", ");
return `Weekly on ${names} at ${clock}`;
}
if (dow === "*" && /^\d+$/.test(dom)) return `Monthly on day ${dom} at ${clock}`;
}
}
return s;
Expand Down
37 changes: 33 additions & 4 deletions src/cron/task-view.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { describe, it, expect } from "vitest";
import { curateConsumerTasks, toConsumerTask } from "./task-view.ts";
import { curateConsumerTasks, prettifyTaskName, toConsumerTask } from "./task-view.ts";
import { prettifySchedule } from "./schedule-format.ts";
import type { CronJob } from "./types.ts";

Expand Down Expand Up @@ -36,20 +36,49 @@ describe("toConsumerTask", () => {
});
});

describe("prettifyTaskName", () => {
it("humanizes kebab/snake/camel slugs to Title Case", () => {
expect(prettifyTaskName("call-dentist")).toBe("Call Dentist");
expect(prettifyTaskName("water_plants")).toBe("Water Plants");
expect(prettifyTaskName("checkUrgentEmails")).toBe("Check Urgent Emails");
});

it("leaves real prose alone (only capitalizes the first letter)", () => {
expect(prettifyTaskName("Check my inbox")).toBe("Check my inbox");
expect(prettifyTaskName("review the PR diff")).toBe("Review the PR diff");
});

it("is applied by toConsumerTask so both transports show a friendly name", () => {
expect(toConsumerTask(job({ name: "call-dentist" })).name).toBe("Call Dentist");
});
});

describe("curateConsumerTasks", () => {
it("sorts enabled first, then alphabetical", () => {
const out = curateConsumerTasks([
job({ name: "zebra", enabled: true }),
job({ name: "apple", enabled: false }),
job({ name: "mango", enabled: true }),
]);
expect(out.map((t) => t.name)).toEqual(["mango", "zebra", "apple"]);
expect(out.map((t) => t.name)).toEqual(["Mango", "Zebra", "Apple"]);
});

it("passes through every owned job (filtering is done by the per-user query)", () => {
const out = curateConsumerTasks([job({ name: "a" }), job({ name: "b" })]);
it("passes through user/agent-scheduled jobs", () => {
const out = curateConsumerTasks([
job({ name: "a", source: "agent" }),
job({ name: "b", source: "user" }),
]);
expect(out).toHaveLength(2);
});

it("hides infra loops (system/bundled) that share the owner's user_id in power-user mode", () => {
const out = curateConsumerTasks([
job({ name: "call-dentist", source: "agent" }),
job({ name: "auto-dream", source: "system" }),
job({ name: "calendar-prep", source: "bundled" }),
]);
expect(out.map((t) => t.name)).toEqual(["Call Dentist"]);
});
});

describe("prettifySchedule (shared)", () => {
Expand Down
Loading
Loading