diff --git a/.agents/lib/agent_core_cli.py b/.agents/lib/agent_core_cli.py new file mode 100644 index 000000000..638b5d16f --- /dev/null +++ b/.agents/lib/agent_core_cli.py @@ -0,0 +1,312 @@ +#!/usr/bin/env python3 +import sys +import json +import argparse +from pathlib import Path + +# Add current directory to path to import local modules +sys.path.insert(0, str(Path(__file__).parent.resolve())) + +from token_juice import TokenJuice +from autonomy_security import AutonomySecurity, AutonomyTier, CommandClass, Decision +from memory_tree import MemoryTreeManager + +def cmd_compress(args): + compressor = TokenJuice() + result = compressor.compress(args.text, args.tool) + print(json.dumps({"compressed": result})) + +def cmd_classify_command(args): + security = AutonomySecurity(args.action_dir or str(Path.cwd())) + cmd_args = json.loads(args.args) + cmd_class = security.classify_command(cmd_args) + print(json.dumps({"class": cmd_class.value})) + +def cmd_is_path_allowed(args): + security = AutonomySecurity(args.action_dir) + allowed = security.is_path_allowed(args.path) + print(json.dumps({"allowed": allowed})) + +def cmd_evaluate_execution(args): + security = AutonomySecurity(args.action_dir) + cmd_args = json.loads(args.args) + tier = AutonomyTier(args.tier) + paths = json.loads(args.paths) if args.paths else None + + decision, reason = security.evaluate_command_execution(cmd_args, tier, paths) + print(json.dumps({ + "decision": decision.value, + "reason": reason + })) + +def cmd_memory_save(args): + manager = MemoryTreeManager(args.vault) + meta = json.loads(args.meta) + manager.save_page(args.id, meta, args.body) + print(json.dumps({"status": "success"})) + +def cmd_memory_search(args): + manager = MemoryTreeManager(args.vault) + results = manager.search(args.query) + print(json.dumps({"results": [{"id": r[0], "score": r[1]} for r in results]})) + +def cmd_memory_graph(args): + manager = MemoryTreeManager(args.vault) + outgoing, backlinks = manager.build_link_graph() + + # Convert sets to lists for JSON serialization + serialized_outgoing = {k: list(v) for k, v in outgoing.items()} + serialized_backlinks = {k: list(v) for k, v in backlinks.items()} + + print(json.dumps({ + "outgoing": serialized_outgoing, + "backlinks": serialized_backlinks + })) + +def resolve_db_path(): + import os + hermes_home = os.environ.get("HERMES_HOME") + if not hermes_home: + hermes_home = str(Path.home() / ".hermes") + + active_profile_file = Path(hermes_home) / "active_profile" + profile_name = "default" + if active_profile_file.exists(): + try: + profile_name = active_profile_file.read_text().strip() or "default" + except: + pass + + if profile_name != "default": + return str(Path(hermes_home) / "profiles" / profile_name / "state.db") + else: + return str(Path(hermes_home) / "state.db") + +def cmd_lookup_skill(args): + import sqlite3 + db_path = resolve_db_path() + if not Path(db_path).exists(): + print(json.dumps({"results": []})) + return + + try: + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + # Check if table exists + cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='skills_registry'") + if not cursor.fetchone(): + print(json.dumps({"results": []})) + conn.close() + return + + query = args.query.lower() + words = [w for w in query.split() if w] + + if not words: + cursor.execute("SELECT name, description, keywords, status, entrypoint, dependencies FROM skills_registry LIMIT 10") + rows = cursor.fetchall() + else: + clauses = [] + params = [] + for word in words: + clauses.append("(LOWER(name) LIKE ? OR LOWER(description) LIKE ? OR LOWER(keywords) LIKE ?)") + like_val = f"%{word}%" + params.extend([like_val, like_val, like_val]) + + sql = f"SELECT name, description, keywords, status, entrypoint, dependencies FROM skills_registry WHERE {' AND '.join(clauses)} LIMIT 5" + cursor.execute(sql, params) + rows = cursor.fetchall() + + results = [] + for r in rows: + results.append({ + "name": r[0], + "description": r[1], + "keywords": r[2], + "status": r[3], + "entrypoint": r[4], + "dependencies": r[5] + }) + + print(json.dumps({"results": results})) + conn.close() + except Exception as e: + print(json.dumps({"error": str(e)})) + +def cmd_scaffold_skill(args): + import sqlite3 + import subprocess + import os + + hermes_home = os.environ.get("HERMES_HOME") + if not hermes_home: + hermes_home = str(Path.home() / ".hermes") + + active_profile_file = Path(hermes_home) / "active_profile" + profile_name = "default" + if active_profile_file.exists(): + try: + profile_name = active_profile_file.read_text().strip() or "default" + except: + pass + + if profile_name != "default": + p_home = str(Path(hermes_home) / "profiles" / profile_name) + else: + p_home = hermes_home + + slug = args.name.lower().replace(" ", "-") + slug = "".join([c for c in slug if c.isalnum() or c == "-"]) + + skill_dir = Path(p_home) / "skills" / "custom" / slug + try: + skill_dir.mkdir(parents=True, exist_ok=True) + + # Write SKILL.md + skill_md = ( + f"---\n" + f"name: \"{args.name}\"\n" + f"description: \"{args.desc}\"\n" + f"keywords: \"custom, autopoietic, generated\"\n" + f"---\n\n" + f"# {args.name}\n\n" + f"{args.desc}\n" + ) + (skill_dir / "SKILL.md").write_text(skill_md, encoding="utf-8") + + # Write main.py + (skill_dir / "main.py").write_text(args.code, encoding="utf-8") + + # Handle dependencies + deps_list = [d.strip() for d in args.deps.split(",") if d.strip()] if args.deps else [] + if deps_list: + (skill_dir / "requirements.txt").write_text("\n".join(deps_list), encoding="utf-8") + + # Attempt pip install + python_bin = os.environ.get("HERMES_PYTHON", sys.executable) + subprocess.run( + [python_bin, "-m", "pip", "install"] + deps_list, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=30 + ) + + # Register in SQLite + db_path = resolve_db_path() + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + cursor.execute(""" + CREATE TABLE IF NOT EXISTS skills_registry ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT UNIQUE, + description TEXT, + keywords TEXT, + status TEXT DEFAULT 'active', + entrypoint TEXT, + dependencies TEXT, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP + ) + """) + + cursor.execute(""" + INSERT INTO skills_registry (name, description, keywords, status, entrypoint, dependencies) + VALUES (?, ?, ?, ?, ?, ?) + ON CONFLICT(name) DO UPDATE SET + description = excluded.description, + keywords = excluded.keywords, + entrypoint = excluded.entrypoint, + dependencies = excluded.dependencies, + status = excluded.status + """, ( + args.name, + args.desc, + "custom, autopoietic, generated", + "active", + str(skill_dir / "main.py"), + json.dumps(deps_list) + )) + + conn.commit() + conn.close() + + print(json.dumps({ + "status": "success", + "path": str(skill_dir), + "entrypoint": str(skill_dir / "main.py") + })) + except Exception as e: + print(json.dumps({"error": str(e)})) + +def main(): + parser = argparse.ArgumentParser(description="Hermes Agent Python Core Bridge CLI") + subparsers = parser.add_subparsers(dest="command", required=True) + + # Compress Command + p_compress = subparsers.add_parser("compress") + p_compress.add_argument("--text", required=True, help="Text to compress") + p_compress.add_argument("--tool", help="Tool name context") + p_compress.set_defaults(func=cmd_compress) + + # Classify Command + p_classify = subparsers.add_parser("classify-command") + p_classify.add_argument("--args", required=True, help="JSON array of command arguments") + p_classify.add_argument("--action-dir", help="Sandbox action directory") + p_classify.set_defaults(func=cmd_classify_command) + + # Path Allowed Command + p_path = subparsers.add_parser("is-path-allowed") + p_path.add_argument("--path", required=True, help="Path to validate") + p_path.add_argument("--action-dir", required=True, help="Sandbox action directory") + p_path.set_defaults(func=cmd_is_path_allowed) + + # Evaluate Command Execution + p_eval = subparsers.add_parser("evaluate-execution") + p_eval.add_argument("--args", required=True, help="JSON array of command arguments") + p_eval.add_argument("--tier", required=True, choices=["readonly", "supervised", "full"], help="Autonomy security tier") + p_eval.add_argument("--paths", help="JSON array of paths to validate") + p_eval.add_argument("--action-dir", required=True, help="Sandbox action directory") + p_eval.set_defaults(func=cmd_evaluate_execution) + + # Memory Save + p_m_save = subparsers.add_parser("memory-save") + p_m_save.add_argument("--vault", required=True, help="Path to markdown vault directory") + p_m_save.add_argument("--id", required=True, help="Page ID/slug") + p_m_save.add_argument("--meta", required=True, help="JSON object representing frontmatter metadata") + p_m_save.add_argument("--body", required=True, help="Markdown body text") + p_m_save.set_defaults(func=cmd_memory_save) + + # Memory Search + p_m_search = subparsers.add_parser("memory-search") + p_m_search.add_argument("--vault", required=True, help="Path to markdown vault directory") + p_m_search.add_argument("--query", required=True, help="Search query") + p_m_search.set_defaults(func=cmd_memory_search) + + # Memory Graph + p_m_graph = subparsers.add_parser("memory-graph") + p_m_graph.add_argument("--vault", required=True, help="Path to markdown vault directory") + p_m_graph.set_defaults(func=cmd_memory_graph) + + # Lookup Skill Command + p_l_skill = subparsers.add_parser("lookup-skill") + p_l_skill.add_argument("--query", required=True, help="Search terms for registry") + p_l_skill.set_defaults(func=cmd_lookup_skill) + + # Scaffold Skill Command + p_s_skill = subparsers.add_parser("scaffold-skill") + p_s_skill.add_argument("--name", required=True, help="Name of the skill") + p_s_skill.add_argument("--desc", required=True, help="Description of the skill") + p_s_skill.add_argument("--code", required=True, help="Python code for the main.py entrypoint") + p_s_skill.add_argument("--deps", help="Comma-separated pip dependencies") + p_s_skill.set_defaults(func=cmd_scaffold_skill) + + args = parser.parse_args() + try: + args.func(args) + except Exception as e: + print(json.dumps({"error": str(e)})) + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/.agents/lib/autonomy_security.py b/.agents/lib/autonomy_security.py new file mode 100644 index 000000000..5c69d5005 --- /dev/null +++ b/.agents/lib/autonomy_security.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python3 +import os +from enum import Enum +from pathlib import Path +from typing import List, Tuple, Optional + +class CommandClass(Enum): + READ = "READ" + WRITE = "WRITE" + NETWORK = "NETWORK" + INSTALL = "INSTALL" + DESTRUCTIVE = "DESTRUCTIVE" + +class AutonomyTier(Enum): + READONLY = "readonly" + SUPERVISED = "supervised" + FULL = "full" + +class Decision(Enum): + ALLOW = "ALLOW" + PROMPT = "PROMPT" + BLOCK = "BLOCK" + +class AutonomySecurity: + """ + Deterministic security gateway that validates file paths and classifies/gates shell commands. + Does not rely on LLM prompts; executes deterministic checks in Python. + """ + + def __init__(self, action_dir: str, forbidden_roots: Optional[List[str]] = None): + self.action_dir = Path(action_dir).resolve() + + # Default forbidden system directories to prevent traversal or critical access + self.forbidden_roots = [ + Path("/etc").resolve(), + Path("/bin").resolve(), + Path("/sbin").resolve(), + Path("/usr/bin").resolve(), + Path("/usr/sbin").resolve(), + Path("/var").resolve(), + Path("/System").resolve(), + Path("/Library").resolve(), + ] + if forbidden_roots: + for r in forbidden_roots: + try: + self.forbidden_roots.append(Path(r).resolve()) + except Exception: + pass + + # Add user-sensitive folders to forbidden roots if on unix + home = Path.home().resolve() + self.forbidden_roots.extend([ + home / ".ssh", + home / ".aws", + home / ".config", + home / ".hermes", + home / ".gemini" + ]) + + def is_path_allowed(self, target_path: Union[str, Path]) -> bool: + """ + Validates target path against action_dir scope and forbidden system roots. + Canonicalizes paths first to resolve symlinks and prevent traversal attacks. + """ + try: + target = Path(target_path) + + # Resolve target path. If it doesn't exist, resolve its parent. + if target.exists() or target.is_symlink(): + resolved_target = target.resolve() + else: + # For non-existent files (e.g. creating a new file), resolve the parent directory + resolved_target = target.parent.resolve() / target.name + + # Check if target falls under any forbidden root directories + for forbidden in self.forbidden_roots: + if forbidden == resolved_target or forbidden in resolved_target.parents: + return False + + # Check if resolved path is inside the action_dir sandbox + # resolved_target must have action_dir as a parent, or be equal to action_dir + if resolved_target == self.action_dir or self.action_dir in resolved_target.parents: + return True + + return False + except Exception: + # Fail closed on any resolution or validation error + return False + + def classify_command(self, cmd_args: List[str]) -> CommandClass: + """ + Classifies a list of command arguments into a CommandClass. + Fails closed by defaulting unrecognized commands to CommandClass.WRITE. + """ + if not cmd_args: + return CommandClass.WRITE + + cmd = cmd_args[0].strip().lower() + + # Basic read-only utilities + read_commands = { + "cat", "ls", "grep", "find", "pwd", "echo", "diff", + "git status", "git log", "git diff", "git show", "git branch", + "du", "df", "file", "head", "tail", "wc" + } + + # If calling git, look at the subcommands + if cmd == "git" and len(cmd_args) > 1: + git_sub = cmd_args[1].strip().lower() + if git_sub in {"status", "log", "diff", "show", "branch", "config"}: + # Check for write operations within config or branch + if git_sub == "config" and any(arg.startswith("--") and "add" in arg or "unset" in arg for arg in cmd_args): + return CommandClass.WRITE + return CommandClass.READ + elif git_sub in {"clone", "fetch", "pull"}: + return CommandClass.NETWORK + elif git_sub in {"push"}: + return CommandClass.NETWORK # push is network but also has write implications. Let's make it NETWORK. + elif git_sub in {"add", "commit", "checkout", "reset", "revert", "merge", "rebase", "rm"}: + return CommandClass.WRITE + + if cmd in read_commands: + # If cat or ls is called on a forbidden file, path checks will block it, + # but command type itself is classified as READ. + return CommandClass.READ + + # Installers / package managers + install_commands = {"npm", "yarn", "pnpm", "pip", "pip3", "cargo", "brew", "apt", "apt-get", "gem"} + if cmd in install_commands: + # Check if it's installing + args_str = " ".join(cmd_args[1:]).lower() + if any(term in args_str for term in ["install", "ci", "add", "update", "upgrade"]): + return CommandClass.INSTALL + return CommandClass.WRITE # default package manager actions to WRITE + + # Network requests + network_commands = {"curl", "wget", "ping", "ssh", "scp", "ftp", "telnet", "nc", "netstat"} + if cmd in network_commands: + return CommandClass.NETWORK + + # Destructive tools + destructive_terms = ["rm -rf", "mkfs", "dd", "format", "reboot", "shutdown", "nuke"] + full_cmd_str = " ".join(cmd_args).lower() + if any(term in full_cmd_str for term in destructive_terms): + return CommandClass.DESTRUCTIVE + if cmd == "rm" and any(arg == "-rf" or arg == "-fr" or "-f" in arg and "r" in arg for arg in cmd_args[1:]): + return CommandClass.DESTRUCTIVE + + # Common file write/manipulation commands + write_commands = {"mkdir", "touch", "cp", "mv", "rm", "tee", "chmod", "chown", "tar", "zip", "unzip"} + if cmd in write_commands: + return CommandClass.WRITE + + # Default fallback (fail-closed to WRITE, never READ) + return CommandClass.WRITE + + def gate_decision(self, cmd_class: CommandClass, tier: AutonomyTier) -> Tuple[Decision, str]: + """ + Determines the gating action (ALLOW, PROMPT, BLOCK) for a given CommandClass under a selected AutonomyTier. + """ + if tier == AutonomyTier.READONLY: + if cmd_class == CommandClass.READ: + return Decision.ALLOW, "Read-only command allowed under READONLY tier." + return Decision.BLOCK, f"Command classified as {cmd_class.value} is blocked under READONLY tier." + + elif tier == AutonomyTier.SUPERVISED: + if cmd_class == CommandClass.READ: + return Decision.ALLOW, "Read-only command allowed under SUPERVISED tier." + elif cmd_class == CommandClass.DESTRUCTIVE: + return Decision.BLOCK, "Destructive commands are strictly blocked under SUPERVISED tier." + else: + return Decision.PROMPT, f"Command classified as {cmd_class.value} requires explicit user approval under SUPERVISED tier." + + elif tier == AutonomyTier.FULL: + if cmd_class == CommandClass.DESTRUCTIVE: + return Decision.PROMPT, "Destructive command requires confirmation even under FULL tier." + return Decision.ALLOW, f"Command allowed under FULL tier." + + return Decision.BLOCK, "Unknown autonomy tier. Defaulting to BLOCK." + + def evaluate_command_execution(self, cmd_args: List[str], tier: AutonomyTier, paths_to_check: Optional[List[str]] = None) -> Tuple[Decision, str]: + """ + Evaluates command classification, checks paths, and yields a unified security decision. + """ + # Step 1: Check path bounds first if paths were supplied + if paths_to_check: + for p in paths_to_check: + if not self.is_path_allowed(p): + return Decision.BLOCK, f"Access blocked: Path '{p}' is outside the action sandbox or in a forbidden system directory." + + # Step 2: Classify the CLI command + cmd_class = self.classify_command(cmd_args) + + # Step 3: Check decision against the tier + return self.gate_decision(cmd_class, tier) diff --git a/.agents/lib/memory_tree.py b/.agents/lib/memory_tree.py new file mode 100644 index 000000000..fd6816aa5 --- /dev/null +++ b/.agents/lib/memory_tree.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +import os +import re +from pathlib import Path +from typing import Dict, Any, List, Set, Tuple, Optional + +class MemoryTreeManager: + """ + Obsidian-style local memory vault manager. + Parses frontmatter, body content, wikilinks, and maintains a local link graph. + """ + + def __init__(self, vault_dir: str): + self.vault_dir = Path(vault_dir).resolve() + self.vault_dir.mkdir(parents=True, exist_ok=True) + self.wikilink_pattern = re.compile(r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]") + self.frontmatter_pattern = re.compile(r"^---\s*\n([\s\S]*?)\n---\s*\n") + + def _get_page_path(self, page_id: str) -> Path: + """Returns the absolute file path for a given page_id (slugified or direct).""" + # Ensure traversal safety + safe_name = re.sub(r"[\\/:*?\"<>|]", "_", page_id) + if not safe_name.endswith(".md"): + safe_name += ".md" + return (self.vault_dir / safe_name).resolve() + + def parse_page(self, page_id: str) -> Tuple[Dict[str, Any], str]: + """ + Parses an on-disk markdown page. + Returns: + metadata: Dict of YAML frontmatter keys + body: Clean string of remaining markdown body + """ + path = self._get_page_path(page_id) + if not path.exists(): + return {}, "" + + with open(path, "r", encoding="utf-8") as f: + content = f.read() + + frontmatter: Dict[str, Any] = {} + body = content + + match = self.frontmatter_pattern.match(content) + if match: + fm_text = match.group(1) + body = content[match.end():] + + # Simple YAML-like key-value parser for basic frontmatter + for line in fm_text.split("\n"): + if ":" in line: + k, v = line.split(":", 1) + k = k.strip() + v = v.strip() + # Strip quotes if present + if (v.startswith('"') and v.endswith('"')) or (v.startswith("'") and v.endswith("'")): + v = v[1:-1] + + # Parse lists e.g. [tag1, tag2] + if v.startswith("[") and v.endswith("]"): + v = [val.strip().strip('"').strip("'") for val in v[1:-1].split(",") if val.strip()] + + frontmatter[k] = v + + return frontmatter, body + + def save_page(self, page_id: str, metadata: Dict[str, Any], body: str): + """ + Saves a page to disk with structured frontmatter and markdown body. + """ + path = self._get_page_path(page_id) + + # Build YAML frontmatter string + fm_lines = ["---"] + for k, v in metadata.items(): + if isinstance(v, list): + val_str = "[" + ", ".join(f'"{item}"' for item in v) + "]" + fm_lines.append(f"{k}: {val_str}") + else: + # Escape any strings with quotes if they contain special chars + if isinstance(v, str) and (":" in v or "-" in v or "#" in v): + fm_lines.append(f'{k}: "{v}"') + else: + fm_lines.append(f"{k}: {v}") + fm_lines.append("---") + fm_text = "\n".join(fm_lines) + + content = f"{fm_text}\n{body}" + with open(path, "w", encoding="utf-8") as f: + f.write(content) + + def extract_links(self, body: str) -> List[str]: + """Extracts target link names from [[wikilink]] style links in the body.""" + return [match.strip() for match in self.wikilink_pattern.findall(body)] + + def delete_page(self, page_id: str, use_trash: bool = True): + """Deletes a page, either permanently or by archiving it into a .trash subfolder.""" + path = self._get_page_path(page_id) + if not path.exists(): + return + + if use_trash: + trash_dir = self.vault_dir / ".trash" + trash_dir.mkdir(exist_ok=True) + path.rename(trash_dir / path.name) + else: + path.unlink() + + def build_link_graph(self) -> Tuple[Dict[str, Set[str]], Dict[str, Set[str]]]: + """ + Scans all files in the vault to build a bi-directional link and backlink graph. + Returns: + outgoing: Dict mapping page_id -> set of linked page_ids + backlinks: Dict mapping page_id -> set of pages linking back to it + """ + outgoing: Dict[str, Set[str]] = {} + backlinks: Dict[str, Set[str]] = {} + + # Scan all .md files in the vault directory + for item in self.vault_dir.glob("*.md"): + if item.name.startswith("."): + continue + + page_id = item.stem # Page name without extension + _, body = self.parse_page(page_id) + links = self.extract_links(body) + + outgoing[page_id] = set(links) + for link in links: + if link not in backlinks: + backlinks[link] = set() + backlinks[link].add(page_id) + + return outgoing, backlinks + + def search(self, query: str) -> List[Tuple[str, int]]: + """ + Performs full-text search in all vault pages. + Returns a sorted list of tuples: (page_id, score/frequency of matches). + """ + results: List[Tuple[str, int]] = [] + if not query: + return results + + query_pat = re.compile(re.escape(query), re.IGNORECASE) + + for item in self.vault_dir.glob("*.md"): + if item.name.startswith("."): + continue + + page_id = item.stem + metadata, body = self.parse_page(page_id) + + # Search title, tags, and body + score = 0 + + # Match in title + title = metadata.get("title", page_id) + score += len(query_pat.findall(title)) * 5 + + # Match in tags + tags = metadata.get("tags", []) + if isinstance(tags, list): + for tag in tags: + if query.lower() in tag.lower(): + score += 3 + + # Match in body + score += len(query_pat.findall(body)) + + if score > 0: + results.append((page_id, score)) + + # Sort by score desc + results.sort(key=lambda x: x[1], reverse=True) + return results diff --git a/.agents/lib/test_agent_core.py b/.agents/lib/test_agent_core.py new file mode 100644 index 000000000..68fc63926 --- /dev/null +++ b/.agents/lib/test_agent_core.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python3 +import json +import os +import shutil +import tempfile +import unittest +from pathlib import Path + +from token_juice import TokenJuice +from autonomy_security import AutonomySecurity, CommandClass, AutonomyTier, Decision +from memory_tree import MemoryTreeManager + +class TestTokenJuice(unittest.TestCase): + def setUp(self): + self.compressor = TokenJuice() + + def test_terminal_stripping(self): + text = "\x1B[31mError:\x1B[0m Execution failed\rProgress: 10%\rProgress: 100%" + compressed = self.compressor.compress(text) + self.assertEqual(compressed, "Error: Execution failed Progress: 100%") + + def test_url_shortening(self): + long_url = "https://example.com/some/extremely/long/path/name/to/document?param1=value1¶m2=value2¶m3=value3" + compressed = self.compressor.compress(f"Check link: {long_url}") + self.assertIn("https://example.com/some/extremely/long/path/name/to/document?...", compressed) + + def test_html_cleaning(self): + html = """ + + + +

Main Title

+

This is a bold paragraph with a link.

+ + + + """ + compressed = self.compressor.compress(html) + self.assertEqual( + compressed, + "# Main Title\nThis is a **bold** paragraph with a [link](http://google.com) .\n* Item 1\n* Item 2" + ) + + def test_git_status_compression(self): + git_status = """On branch main +Your branch is up to date with 'origin/main'. + +Changes not staged for commit: + (use "git add ..." to update what will be committed) + (use "git restore ..." to discard changes in working directory) + modified: src/main.py + deleted: src/utils.py + +Untracked files: + (use "git add ..." to include in what will be committed) + tests/test_main.py + +nothing added to commit but untracked files present (use "git add" to track)""" + + compressed = self.compressor.compress(git_status, tool_name="git") + expected_parts = [ + "Unstaged:", + "M src/main.py", + "D src/utils.py", + "Untracked:", + "tests/test_main.py" + ] + for part in expected_parts: + self.assertIn(part, compressed) + self.assertNotIn("On branch main", compressed) + self.assertNotIn("use \"git restore", compressed) + + +class TestAutonomySecurity(unittest.TestCase): + def setUp(self): + # Use local workspace .tmp directory to avoid macOS temp /var path blocks + self.test_dir = Path(__file__).parent.parent.parent / ".tmp" / "test_sandbox" + self.action_path = self.test_dir / "sandbox" + + if self.test_dir.exists(): + shutil.rmtree(self.test_dir) + self.action_path.mkdir(parents=True) + + self.security = AutonomySecurity(str(self.action_path)) + + def tearDown(self): + if self.test_dir.exists(): + shutil.rmtree(self.test_dir) + + def test_path_hardening_sandbox(self): + # Paths inside sandbox should be allowed + inside_file = self.action_path / "project.txt" + self.assertTrue(self.security.is_path_allowed(inside_file)) + + # Traversal attempts should be blocked + traversal_file = self.action_path / "../outside.txt" + self.assertFalse(self.security.is_path_allowed(traversal_file)) + + # System forbidden paths should be blocked + self.assertFalse(self.security.is_path_allowed("/etc/passwd")) + self.assertFalse(self.security.is_path_allowed("~/.ssh/id_rsa")) + + def test_symlink_resolutions(self): + # Create a file outside the sandbox + outside_file = self.test_dir / "secret.key" + outside_file.write_text("secret_value") + + # Create a symlink inside the sandbox pointing to the secret file outside + link_path = self.action_path / "link.key" + try: + os.symlink(outside_file, link_path) + # The path checker should canonicalize the symlink and block access to the resolved file + self.assertFalse(self.security.is_path_allowed(link_path)) + except OSError: + # Skip symlink test if OS lacks permissions (e.g. non-admin Windows) + pass + + def test_command_classification(self): + self.assertEqual(self.security.classify_command(["cat", "test.txt"]), CommandClass.READ) + self.assertEqual(self.security.classify_command(["git", "status"]), CommandClass.READ) + self.assertEqual(self.security.classify_command(["git", "checkout", "main"]), CommandClass.WRITE) + self.assertEqual(self.security.classify_command(["curl", "http://api.com"]), CommandClass.NETWORK) + self.assertEqual(self.security.classify_command(["npm", "install", "lodash"]), CommandClass.INSTALL) + + # Destructive checks + self.assertEqual(self.security.classify_command(["rm", "-rf", "node_modules"]), CommandClass.DESTRUCTIVE) + self.assertEqual(self.security.classify_command(["dd", "if=/dev/zero", "of=/dev/sda"]), CommandClass.DESTRUCTIVE) + + # Fallback checks (fail-closed to WRITE) + self.assertEqual(self.security.classify_command(["unknown_command", "--arg"]), CommandClass.WRITE) + + def test_autonomy_tier_gating(self): + # READONLY tier + dec_read, _ = self.security.gate_decision(CommandClass.READ, AutonomyTier.READONLY) + dec_write, _ = self.security.gate_decision(CommandClass.WRITE, AutonomyTier.READONLY) + self.assertEqual(dec_read, Decision.ALLOW) + self.assertEqual(dec_write, Decision.BLOCK) + + # SUPERVISED tier + dec_read_sup, _ = self.security.gate_decision(CommandClass.READ, AutonomyTier.SUPERVISED) + dec_write_sup, _ = self.security.gate_decision(CommandClass.WRITE, AutonomyTier.SUPERVISED) + dec_dest_sup, _ = self.security.gate_decision(CommandClass.DESTRUCTIVE, AutonomyTier.SUPERVISED) + self.assertEqual(dec_read_sup, Decision.ALLOW) + self.assertEqual(dec_write_sup, Decision.PROMPT) + self.assertEqual(dec_dest_sup, Decision.BLOCK) + + # FULL tier + dec_write_full, _ = self.security.gate_decision(CommandClass.WRITE, AutonomyTier.FULL) + dec_dest_full, _ = self.security.gate_decision(CommandClass.DESTRUCTIVE, AutonomyTier.FULL) + self.assertEqual(dec_write_full, Decision.ALLOW) + self.assertEqual(dec_dest_full, Decision.PROMPT) + + +class TestMemoryTreeManager(unittest.TestCase): + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + self.manager = MemoryTreeManager(self.temp_dir.name) + + def tearDown(self): + self.temp_dir.cleanup() + + def test_save_and_parse_page(self): + meta = {"title": "Test Page", "tags": ["test", "pedagogy"]} + body = "This is a memory about [[Jazz Education]] and [[The Shed]]." + self.manager.save_page("test_page", meta, body) + + parsed_meta, parsed_body = self.manager.parse_page("test_page") + self.assertEqual(parsed_meta["title"], "Test Page") + self.assertEqual(parsed_meta["tags"], ["test", "pedagogy"]) + self.assertIn("This is a memory about", parsed_body) + + def test_extract_links(self): + body = "References to [[Miles Davis]] and [[Giant Steps|Coltrane's Steps]]." + links = self.manager.extract_links(body) + self.assertEqual(links, ["Miles Davis", "Giant Steps"]) + + def test_link_graph(self): + # Page A -> links to B and C + self.manager.save_page("PageA", {"title": "Page A"}, "Links to [[PageB]] and [[PageC]].") + # Page B -> links to C + self.manager.save_page("PageB", {"title": "Page B"}, "Links to [[PageC]].") + # Page C -> no outgoing + self.manager.save_page("PageC", {"title": "Page C"}, "No links here.") + + outgoing, backlinks = self.manager.build_link_graph() + self.assertEqual(outgoing["PageA"], {"PageB", "PageC"}) + self.assertEqual(outgoing["PageB"], {"PageC"}) + + self.assertEqual(backlinks["PageB"], {"PageA"}) + self.assertEqual(backlinks["PageC"], {"PageA", "PageB"}) + + def test_page_deletion(self): + self.manager.save_page("PageToDel", {}, "Content") + self.assertTrue((Path(self.temp_dir.name) / "PageToDel.md").exists()) + + self.manager.delete_page("PageToDel", use_trash=True) + self.assertFalse((Path(self.temp_dir.name) / "PageToDel.md").exists()) + self.assertTrue((Path(self.temp_dir.name) / ".trash" / "PageToDel.md").exists()) + + def test_full_text_search(self): + self.manager.save_page("JazzMod", {"title": "Jazz Improv", "tags": ["music"]}, "Learning tritone substitution on Green Dolphin Street.") + self.manager.save_page("LogicMod", {"title": "Logic Puzzles"}, "Studying first-order logic and reasoning.") + + results = self.manager.search("tritone") + self.assertEqual(len(results), 1) + self.assertEqual(results[0][0], "JazzMod") + + results_multi = self.manager.search("Logic") + self.assertEqual(len(results_multi), 1) + self.assertEqual(results_multi[0][0], "LogicMod") + + +if __name__ == "__main__": + unittest.main() diff --git a/.agents/lib/token_juice.py b/.agents/lib/token_juice.py new file mode 100644 index 000000000..51268b407 --- /dev/null +++ b/.agents/lib/token_juice.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 +import json +import os +import re +from typing import Dict, List, Optional, Union + +class TokenJuice: + """ + TokenJuice is a smart token compression layer designed to reduce verbose agent outputs + (shell logs, HTML scrapes, tool responses) before passing them to an LLM context. + """ + + def __init__(self, rules_path: Optional[str] = None): + self.rules: List[Dict[str, str]] = [] + self._load_builtin_rules() + if rules_path: + self.load_custom_rules(rules_path) + + def _load_builtin_rules(self): + """Initializes default built-in rules for common tools (git, bash, npm, cargo).""" + # Git status compression rules + self.git_status_rules = [ + (r"(?i)On branch \S+", ""), + (r"(?i)Your branch is up to date with .*", ""), + (r"(?i)nothing to commit, working tree clean", "Status: Clean"), + (r"(?i)Changes not staged for commit:[\s\S]*?\(use \"git add[\s\S]*?\)", "Unstaged:"), + (r"(?i)Changes to be committed:[\s\S]*?\(use \"git restore[\s\S]*?\)", "Staged:"), + (r"(?i)Untracked files:[\s\S]*?\(use \"git add[\s\S]*?\)", "Untracked:"), + (r"^\s*\(use \"git[\s\S]*?\n", "", re.MULTILINE), + (r"^\s*modified:\s*(.+)$", r"M \1", re.MULTILINE), + (r"^\s*deleted:\s*(.+)$", r"D \1", re.MULTILINE), + (r"^\s*new file:\s*(.+)$", r"A \1", re.MULTILINE), + (r"^\s*renamed:\s*(.+) -> (.+)$", r"R \1 -> \2", re.MULTILINE), + (r"\n{2,}", "\n"), # Collapse multiple newlines + ] + + # General Terminal output noise reduction + self.terminal_rules = [ + (r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])", ""), # Strip ANSI escape codes (color codes) + (r"^[. ]*\r", "", re.MULTILINE), # Remove carriage returns / progress dots + (r"Progress: \d+%.*?\r", "", re.MULTILINE), + (r"Downloading.*?\r", "", re.MULTILINE), + (r"Extracting.*?\r", "", re.MULTILINE), + ] + + # URL shortening rules + self.url_pattern = re.compile(r"https?://\S+") + + def load_custom_rules(self, rules_path: str): + """Loads custom regex replacement rules from a JSON file.""" + if not os.path.exists(rules_path): + return + try: + with open(rules_path, "r", encoding="utf-8") as f: + data = json.load(f) + if isinstance(data, list): + self.rules.extend(data) + except Exception as e: + print(f"Error loading TokenJuice rules from {rules_path}: {e}") + + def shorten_urls(self, text: str) -> str: + """Truncates long URLs and strips excessive query parameters to save tokens.""" + def repl(match): + url = match.group(0) + if len(url) <= 40: + return url + parts = url.split("?", 1) + base_url = parts[0] + if len(parts) > 1: + return base_url + "?..." + if len(base_url) > 60: + return base_url[:57] + "..." + return base_url + + return self.url_pattern.sub(repl, text) + + def clean_html(self, html_content: str) -> str: + """Converts raw HTML strings into semantic, condensed Markdown.""" + # 1. Strip scripts, styles, header, footer, head + html = re.sub(r"(?is)]*>([\s\S]*?)", "", html_content) + html = re.sub(r"(?is)]*>([\s\S]*?)", "", html) + html = re.sub(r"(?is)]*>([\s\S]*?)", "", html) + html = re.sub(r"(?is)", "", html) # Strip HTML comments + + # 2. Structural tags to Markdown equivalents + html = re.sub(r"(?i)]*>(.*?)", r"\n# \1\n", html) + html = re.sub(r"(?i)]*>(.*?)", r"\n## \1\n", html) + html = re.sub(r"(?i)]*>(.*?)

", r"\n\1\n", html) + html = re.sub(r"(?i)]*>(.*?)", r"\n* \1", html) + html = re.sub(r"(?i)]*>(.*?)|]*>(.*?)", r" **\1\2** ", html) + html = re.sub(r"(?i)]*>(.*?)|]*>(.*?)", r" *\1\2* ", html) + html = re.sub(r"(?i)]*href=\"([^\"]*)\"[^>]*>(.*?)", r" [\2](\1) ", html) + html = re.sub(r"(?i)", "\n", html) + + # 3. Strip all remaining HTML tags + html = re.sub(r"<[^>]+>", "", html) + + # 4. Collapse extra whitespace and empty lines + lines = [line.strip() for line in html.split("\n")] + non_empty_lines = [] + for line in lines: + if line: + # Replace multiple spaces with a single space + line = re.sub(r"[ \t]+", " ", line) + non_empty_lines.append(line) + + return "\n".join(non_empty_lines) + + def compress(self, text: str, tool_name: Optional[str] = None) -> str: + """ + Compresses the input text by applying terminal, url, html and tool-specific rules. + """ + if not text: + return "" + + # Step 1: Strip escape codes and clean general terminal noise + for pattern, replacement, *flags in self.terminal_rules: + flag_val = flags[0] if flags else 0 + text = re.sub(pattern, replacement, text, flags=flag_val) + + # Step 2: Apply custom rules if loaded + for rule in self.rules: + pattern = rule.get("pattern", "") + replacement = rule.get("replacement", "") + if pattern: + text = re.sub(pattern, replacement, text) + + # Step 3: Apply tool-specific compression rules + normalized_tool = (tool_name or "").lower() + if "git" in normalized_tool or (not tool_name and "On branch" in text): + for pattern, replacement, *flags in self.git_status_rules: + flag_val = flags[0] if flags else 0 + text = re.sub(pattern, replacement, text, flags=flag_val) + + # Step 4: Check if HTML and clean it + if " str: + compressor = TokenJuice() + return compressor.compress(text, tool_name) diff --git a/.gitignore b/.gitignore index 0d91fa141..0fde35663 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,7 @@ pnpm-debug.log* # Electron packaging artifacts release/ .claude/worktrees +.worktrees/ # Tauri **/src-tauri/target/ @@ -36,4 +37,25 @@ release/ # Local launcher scripts (user-specific, do not commit) /*.bat -/*.ps1 \ No newline at end of file +/*.ps1 +graphify-out/ +src/renderer/public/tesseract/ + +# Bundled OpenAlex MCP server (built by `npm run build:mcp`) +resources/openalex-mcp.cjs + +# Scratch / local experiment dirs (never commit) +scratch/ + +# Python bytecode and caches +__pycache__/ +*.pyc +*.pyo +*.pyd +.pytest_cache/ +.venv/ +venv/ + +# transient verify-script bundles (esbuild output, removed by trap) +.verify-ni.cjs +.verify-ec.cjs diff --git a/.husky/pre-commit b/.husky/pre-commit old mode 100644 new mode 100755 index ede3f59af..c07457adb --- a/.husky/pre-commit +++ b/.husky/pre-commit @@ -1,3 +1,5 @@ +#!/usr/bin/env bash +# Uses bash-only [[ ]]; the shebang guarantees bash even where /bin/sh is dash. BRANCH=$(git symbolic-ref --short HEAD 2>/dev/null) if [[ "$BRANCH" != "release" && "$BRANCH" != release/* ]]; then exit 0 diff --git a/.husky/pre-push b/.husky/pre-push old mode 100644 new mode 100755 index 6d9323f77..0fe9a8125 --- a/.husky/pre-push +++ b/.husky/pre-push @@ -1,3 +1,5 @@ +#!/usr/bin/env bash +# Uses bash-only [[ ]]; the shebang guarantees bash even where /bin/sh is dash. BRANCH=$(git symbolic-ref --short HEAD 2>/dev/null) if [[ "$BRANCH" != "release" && "$BRANCH" != release/* ]]; then exit 0 diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..0aed38cda --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,113 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## What this is + +**Hermes Desktop** — an Electron desktop GUI for installing, configuring, and chatting with +[Hermes Agent](https://github.com/NousResearch/hermes-agent), a self-improving Python AI agent. The +desktop app is a front-end: it drives the official Hermes install script, stores Hermes under +`~/.hermes` (`HERMES_HOME`), spawns/manages the Hermes **gateway** (an OpenAI-compatible server, +local on `127.0.0.1:8642` or remote/SSH), and gives a GUI for chat, providers, profiles, memory, +skills, tools, scheduling, and messaging gateways. + +The current product surface is **SPS Agent** — a Notion-style workspace (docs + tasks + AI +co-author) that is _the_ app; the Hermes admin screens (Providers/Gateway/Settings/…) open on demand +as an overlay (gear button or ⌘,). + +## Commands + +```bash +npm run dev # run the app (electron-vite dev). dev:fresh uses a throwaway HERMES_HOME +npm run build # typecheck + electron-vite build (REQUIRED before the smoke harness) +npm run lint # eslint (cached) +npm run typecheck # BOTH projects: typecheck:node (main+preload) + typecheck:web (renderer) +npm test # vitest run (jsdom). test:watch for watch mode +npx vitest run path/to/file.test.ts # single test file +npx vitest run -t "name substring" # single test by name +npm run verify:note-index # native-module index proof (see caveat below) +node scripts/sps-smoke.mjs # Playwright-Electron UI smoke (build first) +``` + +Build installers: `build:mac` / `build:win` / `build:linux` / `build:rpm` (electron-builder). + +## Architecture + +Three processes (electron-vite builds each separately — `electron.vite.config.ts`): + +- **`src/main/`** — Electron main process. Owns all privileged work: IPC handlers (`index.ts` is the + hub), the Hermes installer (`installer.ts`), gateway lifecycle + chat streaming (`hermes.ts`, + spawns the Python server, parses SSE via `sse-parser.ts`), SSH tunnels (`ssh-tunnel.ts`), config, + profiles, models/providers, memory, skills, cronjobs. `better-sqlite3` is marked `external` here. +- **`src/preload/`** — the secure renderer bridge (`contextBridge`). The renderer never touches + Node/Electron directly; it calls `window.hermesAPI.*` which `ipcRenderer.invoke`s the main handlers. +- **`src/renderer/src/`** — React 19 + Zustand app. `App.tsx` is a state-machine over screens + (`loading → welcome → installing → setup → main`). `screens/` holds each surface; `screens/SpsAgent/` + is the workspace (its own Zustand store, editor, sidebar, panels, graph). +- **`src/shared/`** — types/helpers imported by both main and renderer (attachments, usage, i18n, …). + +### Connection modes + +Hermes runs **local** (managed subprocess), **remote** (URL + API key to a remote Hermes server), or +**ssh** (tunnel to a remote box). `getConnectionConfig()` drives this; the renderer adapts screens to it. + +### SPS Agent storage substrate (read `docs/STORAGE.md` before touching it) + +The one rule: **markdown on disk is the only source of truth; SQLite is a rebuildable index. Writes go +file-first.** Per-profile layout under `/sps-agent/`: `workspace.json` (the blob), +`vault/.md` (one page per file, frontmatter + blocks), `vault//.md` +(folder-backed query-DB rows), `_manifest.json` (tree/trash/comments), `.note-index.db` (derived +`better-sqlite3` FTS5 + wikilink-graph index, rebuilt from disk by `src/main/note-index.ts`). +Two `storageMode`s: `blob` (default; vault is an additive mirror) and `vault` (markdown authoritative). +Migration goes through `lib/storageActions.ts` and is gated by a **parity** round-trip that refuses +lossy cutover. Markdown↔block serializers live in `screens/SpsAgent/editor/` (Tier 1 clean / Tier 2 +lossless-fallback `` comments) and have golden byte-for-byte tests. + +### The SPS design system + +The SPS look is **not** re-derived in Tailwind — the prototype stylesheets are carried over verbatim +into `screens/SpsAgent/styles/` and confined to a `.sps-scope` container by `scripts/scope-sps-css.mjs` +(so its global `:root`/`body`/`*` rules don't leak into the Hermes renderer). Theme/layout switches are +pure attribute swaps on the scope element. The standalone `sps-agent/` (runnable Vite app) and +`sps-agent-prototype/` (the canonical design + interaction spec) are the upstream sources for this code. + +## Conventions that bite if missed + +- **Preload API parity is enforced.** Every method must appear in BOTH `src/preload/index.ts` and + `src/preload/index.d.ts`, or `tests/preload-api-surface.test.ts` fails. +- **`better-sqlite3` is compiled for Electron's node ABI, not vitest's.** Any code that _opens_ the + index cannot run under vitest. Split: pure logic + IPC-mocked hooks/components → vitest (jsdom); + anything that opens the index → `npm run verify:note-index` (runs under `ELECTRON_RUN_AS_NODE=1`); + renderer UI → the Playwright-Electron smoke harness `scripts/sps-smoke.mjs`. +- **Two TS projects, two typechecks.** `tsconfig.node.json` (main+preload) and `tsconfig.web.json` + (renderer) — run `npm run typecheck` (both) before claiming type safety. +- **SSRF hardening is load-bearing** in `src/main/sps-agent.ts` (link unfurl pins the validated IP and + re-validates every redirect hop). Don't loosen the IP-pinning lookup when editing unfurl/fetch code. +- **Full verification gate** (from `docs/STORAGE.md`) before shipping substrate changes: both + typechecks → eslint touched files → `vitest run` → `verify:note-index` → `npm run build`. +- **Keep PRs small and single-purpose** (CONTRIBUTING.md); don't bundle formatting sweeps with logic. +- Husky `pre-commit`/`pre-push` only run lint+tests on `release`/`release/*` branches — feature + branches are NOT gated by the hook, so run checks manually. +- **Each git worktree needs its OWN `node_modules` — run `bash scripts/setup-worktree.sh` (or + `npm ci`) after creating one. Do NOT symlink `node_modules` to another tree:** native modules + (`better-sqlite3`) plus a concurrent session's `npm install` in the other tree will corrupt your + build mid-flight and surface as phantom "cannot find module" typecheck errors in files you never + touched. + +## Related directories + +- `obsidian-bridge/` — a separate Obsidian plugin (localhost bridge so Hermes can call Obsidian). +- `scripts/` — `repro-*.js` / `verify-*.js` / `probe-*.js` are Playwright/node repro harnesses for + specific issues (named by bug); `sps-smoke.mjs` is the UI smoke; `scope-sps-css.mjs` rescopes SPS CSS. +- **External Context Bridge** (`src/main/external-context/`, `src/mcp/external-context-server.ts`) — a + local-first, opt-in, **redacted** index of OTHER AI tools' transcripts (Claude Code / Codex / Gemini + / Grok) so Hermes is the cross-tool continuity layer. Source roots are env-overridable + (`HERMES_EC_{CLAUDE,CODEX,GEMINI,GROK}_ROOT`) for the verify/smoke harnesses. Two load-bearing, + structurally-enforced invariants: (1) **index-time redaction** — `applyFragments` in `db.ts` is the + single writer and redacts every message before INSERT (verify asserts a seeded key never reaches + `messages`/`messages_fts`); (2) **untrusted fencing** — every UI/Save-to-KB/MCP surface wraps + excerpts in an untrusted banner + fence and never auto-injects them into a chat turn. The index DB is + machine-global + rebuildable (`HERMES_HOME/external-context.db`). Harnesses: + `npm run verify:external-context` (index + redaction + MCP roundtrip, under Electron's node) and + `node scripts/external-context-smoke.mjs` (Playwright UI; build first). +- `docs/superpowers/plans/` and `docs/superpowers/specs/` — design plans/specs for in-flight work. diff --git a/README.ja-JP.md b/README.ja-JP.md index ff790f81a..ba24dc86d 100644 --- a/README.ja-JP.md +++ b/README.ja-JP.md @@ -119,38 +119,38 @@ sudo dnf install ./hermes-desktop-.rpm ## 画面構成 -| 画面 | 説明 | -| ------------- | ------------------------------------------------------------------------------------------ | -| **Chat** | ストリーミング会話 UI。スラッシュコマンド、ツール進捗、トークントラッキングに対応 | -| **Sessions** | 過去の会話の閲覧、検索、再開 | -| **Agents** | Hermes プロファイルの作成、削除、切り替え | -| **Skills** | バンドル済み / インストール済みスキルの閲覧、インストール、管理 | -| **Models** | プロバイダごとに保存されたモデル設定の管理 | -| **Memory** | メモリエントリとユーザープロファイルの閲覧 / 編集、メモリプロバイダの設定 | -| **Soul** | アクティブなプロファイルのペルソナ (SOUL.md) を編集 | -| **Tools** | 個別のツールセットを有効化 / 無効化 | -| **Schedules** | 配信先付きの cron ジョブを作成・管理 | -| **Gateway** | メッセージングプラットフォーム統合の設定と制御 | -| **Office** | Claw3d ビジュアルインターフェースのセットアップと管理 | +| 画面 | 説明 | +| ------------- | --------------------------------------------------------------------------------------------- | +| **Chat** | ストリーミング会話 UI。スラッシュコマンド、ツール進捗、トークントラッキングに対応 | +| **Sessions** | 過去の会話の閲覧、検索、再開 | +| **Agents** | Hermes プロファイルの作成、削除、切り替え | +| **Skills** | バンドル済み / インストール済みスキルの閲覧、インストール、管理 | +| **Models** | プロバイダごとに保存されたモデル設定の管理 | +| **Memory** | メモリエントリとユーザープロファイルの閲覧 / 編集、メモリプロバイダの設定 | +| **Soul** | アクティブなプロファイルのペルソナ (SOUL.md) を編集 | +| **Tools** | 個別のツールセットを有効化 / 無効化 | +| **Schedules** | 配信先付きの cron ジョブを作成・管理 | +| **Gateway** | メッセージングプラットフォーム統合の設定と制御 | +| **Office** | Claw3d ビジュアルインターフェースのセットアップと管理 | | **Settings** | プロバイダ設定、認証情報プール、バックアップ / インポート、ログビューア、ネットワーク、テーマ | ## 対応プロバイダ ### LLM プロバイダ -| プロバイダ | 備考 | -| ------------------- | ----------------------------------------------- | +| プロバイダ | 備考 | +| ------------------- | ---------------------------------------------- | | **OpenRouter** | 単一 API で 200 以上のモデルを利用可能(推奨) | -| **Anthropic** | Claude に直接アクセス | -| **OpenAI** | GPT に直接アクセス | -| **Google (Gemini)** | Google AI Studio | -| **xAI (Grok)** | Grok モデル | -| **Nous Portal** | 無料枠あり | -| **Qwen** | QwenAI モデル | -| **MiniMax** | グローバル / 中国向けエンドポイント | -| **Hugging Face** | HF Inference 経由で 20 以上のオープンモデル | -| **Groq** | 高速推論 (Voice/STT) | -| **Local/Custom** | 任意の OpenAI 互換エンドポイント | +| **Anthropic** | Claude に直接アクセス | +| **OpenAI** | GPT に直接アクセス | +| **Google (Gemini)** | Google AI Studio | +| **xAI (Grok)** | Grok モデル | +| **Nous Portal** | 無料枠あり | +| **Qwen** | QwenAI モデル | +| **MiniMax** | グローバル / 中国向けエンドポイント | +| **Hugging Face** | HF Inference 経由で 20 以上のオープンモデル | +| **Groq** | 高速推論 (Voice/STT) | +| **Local/Custom** | 任意の OpenAI 互換エンドポイント | LM Studio、Ollama、vLLM、llama.cpp 用のローカルプリセットが付属しています。 diff --git a/README.md b/README.md index b5e21af14..858b4f45c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -HERMES DESKTOP +SPS

@@ -22,9 +22,9 @@ - 简体中文: `README.zh-CN.md` - 日本語: `README.ja-JP.md` -Hermes Desktop is a native desktop app for installing, configuring, and chatting with [Hermes Agent](https://github.com/NousResearch/hermes-agent) — a self-improving AI assistant with tool use, multi-platform messaging, and a closed learning loop. +SPS is a native desktop workspace for notes, tasks, chat, memory, scheduled work, and My Assistant. Under the hood it installs and runs the upstream [Hermes Agent](https://github.com/NousResearch/hermes-agent) engine, but the day-to-day product surface is SPS. -Instead of managing the CLI by hand, the app walks through install, provider setup, and day-to-day usage in one place. It uses the official Hermes install script, stores Hermes in `~/.hermes`, and gives you a GUI for chat, sessions, profiles, memory, skills, tools, scheduling, messaging gateways, and more. +Instead of managing the CLI by hand, SPS walks through install, provider setup, and day-to-day usage in one place. It uses the official Hermes install script, stores the local engine in `~/.hermes`, and gives you a GUI for chat, sessions, profiles, memory, skills, tools, scheduled work, messaging connections, and more. ## Install @@ -69,7 +69,7 @@ sudo dnf install ./hermes-desktop-.rpm Schedules
Schedules -Gateway
Gateway +Connections
Connections Persona
Persona @@ -83,8 +83,8 @@ sudo dnf install ./hermes-desktop-.rpm ## Features -- **Guided first-run install** for Hermes Agent with progress tracking and dependency resolution -- **Local or remote backend** — run Hermes locally on `127.0.0.1:8642`, or connect the desktop app to a remote Hermes API server with URL + API key +- **Guided first-run install** for SPS with progress tracking and dependency resolution +- **Local or remote backend** — run SPS locally on `127.0.0.1:8642`, or connect the desktop app to a remote SPS service with URL + API key - **Multi-provider support** — OpenRouter, Anthropic, OpenAI, Google (Gemini), xAI (Grok), Nous Portal, Qwen, MiniMax, Hugging Face, Groq, and local OpenAI-compatible endpoints (LM Studio, Atomic Chat, Ollama, vLLM, llama.cpp) - **Streaming chat UI** with SSE streaming, tool progress indicators, markdown rendering, and syntax highlighting - **Token usage tracking** — live prompt/completion token counts and cost display in the chat footer, plus a `/usage` slash command @@ -93,13 +93,14 @@ sudo dnf install ./hermes-desktop-.rpm - **Profile switching** — create, delete, and switch between separate Hermes environments with isolated config - **14 toolsets** — web, browser, terminal, file, code execution, vision, image gen, TTS, skills, memory, session search, clarify, delegation, MoA, and task planning - **Memory system** — view/edit memory entries, user profile memory, capacity tracking, and discoverable memory providers (Honcho, Hindsight, Mem0, RetainDB, Supermemory, ByteRover) -- **Persona editor** — edit and reset your agent's SOUL.md personality +- **Research Reach (optional)** — detect Agent-Reach-style local source tooling so My Research, Learn This, and scheduled research can use richer internet sources such as GitHub, YouTube, RSS, Reddit, and Twitter/X when those local backends are installed and reviewed +- **Persona editor** — edit and reset My Assistant's SOUL.md personality - **Saved models** — CRUD management for model configurations across providers -- **Scheduled tasks** — cron job builder (minutes, hourly, daily, weekly, custom cron) with 15 delivery targets -- **16 messaging gateways** — Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Mattermost, Email (IMAP/SMTP), SMS (Twilio/Vonage), iMessage (BlueBubbles), DingTalk, Feishu/Lark, WeCom, WeChat (iLink Bot), Webhooks, Home Assistant +- **Scheduled work** — schedule builder (minutes, hourly, daily, weekly, custom schedule) with 15 delivery targets +- **16 messaging connections** — Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Mattermost, Email (IMAP/SMTP), SMS (Twilio/Vonage), iMessage (BlueBubbles), DingTalk, Feishu/Lark, WeCom, WeChat (iLink Bot), Webhooks, Home Assistant - **Hermes Office (Claw3d)** — visual 3D interface with dev server and adapter management - **Backup, import & debug dump** — full data backup/restore and system diagnostics from Settings -- **Log viewer** — view gateway and agent logs directly from the Settings screen +- **Log viewer** — view connection-service and assistant logs directly from the Settings screen - **Auto-updater** — check for and install updates via electron-updater - **i18n ready** — internationalization framework with English locale covering all screens, ready for community translations - **Test suite** — SSE parser, IPC handlers, preload API surface, installer utilities, and constants validation with Vitest @@ -108,31 +109,31 @@ sudo dnf install ./hermes-desktop-.rpm On first launch, the app: -1. Asks whether you want to run Hermes **locally** or connect to a **remote** Hermes API server. -2. **Local mode:** checks whether Hermes is already installed in `~/.hermes`; if not, runs the official Hermes installer with dependency resolution (Git, uv, Python 3.11+). +1. Asks whether you want to run SPS **locally** or connect to a **remote** SPS service. +2. **Local mode:** checks whether the local engine is already installed in `~/.hermes`; if not, runs the official installer with dependency resolution (Git, uv, Python 3.11+). 3. **Remote mode:** prompts for the remote API URL and API key, validates the connection, and skips local install. 4. Prompts for an API provider or local model endpoint. -5. Saves provider config and API keys through Hermes config files. +5. Saves provider config and API keys through SPS config files. 6. Launches the main workspace once setup is complete. In local mode, chat requests go through `http://127.0.0.1:8642` with SSE streaming. In remote mode, the app talks to your configured remote URL with the same streaming protocol. The desktop app parses the stream in real time, rendering tool progress, markdown content, and token usage as it arrives. ## Screens -| Screen | Description | -| ------------- | ------------------------------------------------------------------------------------- | -| **Chat** | Streaming conversation UI with slash commands, tool progress, and token tracking | -| **Sessions** | Browse, search, and resume past conversations | -| **Agents** | Create, delete, and switch between Hermes profiles | -| **Skills** | Browse, install, and manage bundled and installed skills | -| **Models** | Manage saved model configurations per provider | -| **Memory** | View/edit memory entries, user profile, and configure memory providers | -| **Soul** | Edit the active profile's persona (SOUL.md) | -| **Tools** | Enable or disable individual toolsets | -| **Schedules** | Create and manage cron jobs with delivery targets | -| **Gateway** | Configure and control messaging platform integrations | -| **Office** | Claw3d visual interface setup and management | -| **Settings** | Provider config, credential pools, backup/import, log viewer, network settings, theme | +| Screen | Description | +| --------------- | ------------------------------------------------------------------------------------- | +| **Chat** | Streaming conversation UI with slash commands, tool progress, and token tracking | +| **Sessions** | Browse, search, and resume past conversations | +| **Assistants** | Create, delete, and switch between SPS profiles | +| **Skills** | Browse, install, and manage bundled and installed skills | +| **Models** | Manage saved model configurations per provider | +| **Memory** | View/edit memory entries, user profile, and configure memory providers | +| **Soul** | Edit the active profile's persona (SOUL.md) | +| **Tools** | Enable or disable individual toolsets | +| **Schedules** | Create and manage scheduled work with delivery targets | +| **Connections** | Configure and control messaging platform integrations | +| **Office** | Claw3d visual interface setup and management | +| **Settings** | Provider config, credential pools, backup/import, log viewer, network settings, theme | ## Supported Providers @@ -230,7 +231,7 @@ Local presets are included for: - vLLM - llama.cpp -Hermes files are managed in: +SPS engine files are managed in: - `~/.hermes` - `~/.hermes/.env` @@ -238,7 +239,7 @@ Hermes files are managed in: - `~/.hermes/hermes-agent` - `~/.hermes/profiles/` — named profile directories - `~/.hermes/state.db` — session history database -- `~/.hermes/cron/jobs.json` — scheduled tasks +- `~/.hermes/cron/jobs.json` — scheduled work ## Tech Stack @@ -253,7 +254,7 @@ Hermes files are managed in: ## Notes -- The desktop app depends on the upstream Hermes Agent project for agent behavior and tool execution. +- SPS depends on the upstream Hermes Agent project for assistant behavior and tool execution. - The built-in installer runs the official Hermes install script with `--skip-setup`, then completes provider configuration in the GUI. - Local model providers do not require an API key, but the compatible server must already be running. - Alternative npm registry routes are supported for environments with restricted network access. @@ -264,6 +265,6 @@ Contributions are welcome! Check out the [Contributing Guide](CONTRIBUTING.md) t ## Related Project -For the core agent, docs, and CLI workflows, see the main Hermes Agent repository: +For the core engine, docs, and CLI workflows, see the main Hermes Agent repository: - https://github.com/NousResearch/hermes-agent diff --git a/README.zh-CN.md b/README.zh-CN.md index 8122dd577..2937b6eaf 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -119,38 +119,38 @@ sudo dnf install ./hermes-desktop-.rpm ## 界面说明 -| 界面 (Screen) | 描述 (Description) | -| ------------- | ------------------------------------------------------------------------------------- | -| **聊天 (Chat)** | 支持斜杠命令、工具进度展示和 token 跟踪的流式对话界面 | -| **会话 (Sessions)** | 浏览、搜索并恢复过去的对话 | -| **代理 (Agents)** | 创建、删除和在不同的 Hermes 配置 (Profile) 之间切换 | -| **技能 (Skills)** | 浏览、安装并管理内置及已安装的技能 | -| **模型 (Models)** | 管理并保存各个提供商的模型配置 | -| **记忆 (Memory)** | 查看/编辑记忆条目、用户配置,并配置记忆提供商 | -| **灵魂 (Soul)** | 编辑当前活动配置的代理人格设定 (`SOUL.md`) | -| **工具 (Tools)** | 启用或禁用特定的工具集 | -| **计划 (Schedules)** | 创建并管理定时任务及推送目标 | -| **网关 (Gateway)** | 配置和控制各类消息平台集成 | -| **办公室 (Office)** | Claw3d 可视化界面设置及管理 | +| 界面 (Screen) | 描述 (Description) | +| -------------------- | --------------------------------------------------------- | +| **聊天 (Chat)** | 支持斜杠命令、工具进度展示和 token 跟踪的流式对话界面 | +| **会话 (Sessions)** | 浏览、搜索并恢复过去的对话 | +| **代理 (Agents)** | 创建、删除和在不同的 Hermes 配置 (Profile) 之间切换 | +| **技能 (Skills)** | 浏览、安装并管理内置及已安装的技能 | +| **模型 (Models)** | 管理并保存各个提供商的模型配置 | +| **记忆 (Memory)** | 查看/编辑记忆条目、用户配置,并配置记忆提供商 | +| **灵魂 (Soul)** | 编辑当前活动配置的代理人格设定 (`SOUL.md`) | +| **工具 (Tools)** | 启用或禁用特定的工具集 | +| **计划 (Schedules)** | 创建并管理定时任务及推送目标 | +| **网关 (Gateway)** | 配置和控制各类消息平台集成 | +| **办公室 (Office)** | Claw3d 可视化界面设置及管理 | | **设置 (Settings)** | 提供商配置、凭证池、备份/导入、日志查看器、网络设置、主题 | ## 支持的提供商 ### 大语言模型 (LLM) 提供商 -| 提供商 (Provider) | 备注说明 (Notes) | -| ------------------- | ---------------------------------------- | -| **OpenRouter** | 通过单一 API 访问 200+ 种模型 (推荐使用) | -| **Anthropic** | 直接访问 Claude 模型 | -| **OpenAI** | 直接访问 GPT 模型 | -| **Google (Gemini)** | Google AI Studio | -| **xAI (Grok)** | Grok 模型 | -| **Nous Portal** | 提供免费额度 | -| **Qwen (通义千问)** | QwenAI 模型 | -| **MiniMax** | 包含全球与中国区端点 | -| **Hugging Face** | 通过 HF Inference 访问 20+ 开源模型 | -| **Groq** | 快速推理 (支持语音/STT) | -| **本地/自定义 (Local/Custom)** | 任何兼容 OpenAI 格式的端点 | +| 提供商 (Provider) | 备注说明 (Notes) | +| ------------------------------ | ---------------------------------------- | +| **OpenRouter** | 通过单一 API 访问 200+ 种模型 (推荐使用) | +| **Anthropic** | 直接访问 Claude 模型 | +| **OpenAI** | 直接访问 GPT 模型 | +| **Google (Gemini)** | Google AI Studio | +| **xAI (Grok)** | Grok 模型 | +| **Nous Portal** | 提供免费额度 | +| **Qwen (通义千问)** | QwenAI 模型 | +| **MiniMax** | 包含全球与中国区端点 | +| **Hugging Face** | 通过 HF Inference 访问 20+ 开源模型 | +| **Groq** | 快速推理 (支持语音/STT) | +| **本地/自定义 (Local/Custom)** | 任何兼容 OpenAI 格式的端点 | 内置以下本地模型预设:LM Studio, Ollama, vLLM, llama.cpp。 diff --git a/build/icon.icns b/build/icon.icns index 399f36df0..ea35079cd 100644 Binary files a/build/icon.icns and b/build/icon.icns differ diff --git a/build/icon.png b/build/icon.png index 8b5521e85..b578f0ab7 100644 Binary files a/build/icon.png and b/build/icon.png differ diff --git a/changelogs/0.5.0.md b/changelogs/0.5.0.md index 53b00f24d..8f157d35f 100644 --- a/changelogs/0.5.0.md +++ b/changelogs/0.5.0.md @@ -3,25 +3,30 @@ ## Features ### Chat + - **Per-conversation context folder** — pin a local folder to a conversation; its files are available as context for that session _([@pmos69](https://github.com/pmos69))_ - **Tool calls, tool output & reasoning in history** — collapsible sections in chat history show tool invocations, their results, and model reasoning steps _([@jason-edstrom](https://github.com/jason-edstrom))_ - **Right-click context menu** — copy / paste / select-all and copy-entire-chat from a native context menu in the chat view _([@pmos69](https://github.com/pmos69))_ ### Providers + - **In-app OAuth sign-in** — "Subscription / OAuth Plans" section on the Providers screen; sign in to ChatGPT (Codex), xAI Grok, Qwen, Gemini CLI, and MiniMax without leaving the app. The Codex device-code flow auto-opens the browser and copies the code to the clipboard _([@leejamesss](https://github.com/leejamesss))_ - **Model autocomplete for OAuth / subscription providers** — live `/v1/models` discovery now works for OAuth-backed providers _([@pmos69](https://github.com/pmos69))_ - **Live model-discovery autocomplete** — model name field autocompletes from the provider's live model list for all OpenAI-compatible endpoints _([@pmos69](https://github.com/pmos69))_ ### Kanban + - **Claw3D HQ read-only board** — surfaces the Claw3D HQ board as a second read-only Kanban view alongside the local board _([@andreab67](https://github.com/andreab67))_ ### Build & CI + - **Portable Windows build target** — new `win-portable` artifact in the release matrix _([@pmos69](https://github.com/pmos69))_ - **GitHub Actions CI** — typecheck + full test suite now run on every push and PR _([@pmos69](https://github.com/pmos69))_ ## Bug Fixes ### Chat & Sessions + - Sessions screen reads the active profile's `state.db`, not the root one _([@pmos69](https://github.com/pmos69))_ - Sessions tab now auto-refreshes while it stays open _([@pmos69](https://github.com/pmos69))_ - Gateway session ID synced on session switch so context is not lost _([@pmos69](https://github.com/pmos69))_ @@ -29,27 +34,33 @@ - IPC chat-send callbacks guarded against destroyed renderer sender _([@Ricardo-M-L](https://github.com/Ricardo-M-L))_ ### Providers & Config + - `model.api_key` auto-populated for known-host custom providers _([@pmos69](https://github.com/pmos69))_ - OpenAI provider correctly routed through custom + explicit `base_url` in setup _([@pmos69](https://github.com/pmos69))_ ### Tools + - `setToolsetEnabled` no longer drops the `cli` section when a trailing platform block is present _([@ytfh44](https://github.com/ytfh44))_ ### Skills + - Install failures surfaced when the CLI exits 0 silently instead of disappearing _([@pmos69](https://github.com/pmos69))_ ### Installer & Updater + - OpenClaw migration detector no longer false-positives on empty directory stubs _([@andreab67](https://github.com/andreab67))_ - `getHermesVersion` wait capped so a stuck version flag can't leak polling intervals _([@Ricardo-M-L](https://github.com/Ricardo-M-L))_ - Install confirmation prompt before writing + option to adopt an existing install _([@pmos69](https://github.com/pmos69))_ - Auto-updater lifecycle logged to a file for post-mortem debugging _([@pmos69](https://github.com/pmos69))_ ### SSH & Office + - SSH-mode gateway operations routed through `systemd` when `hermes.service` exists _([@pmos69](https://github.com/pmos69))_ - Kanban SSH errors surface real messages instead of a misleading "unsupported mode" screen _([@pmos69](https://github.com/pmos69))_ - Office (Claw3D) services authenticated to the gateway _([@pmos69](https://github.com/pmos69))_ ### Tests + - Three failing tests on `main` repaired _([@pmos69](https://github.com/pmos69))_ ## New Contributors diff --git a/docs/BACKLOG.md b/docs/BACKLOG.md new file mode 100644 index 000000000..2d0548c57 --- /dev/null +++ b/docs/BACKLOG.md @@ -0,0 +1,196 @@ +# Hermes Desktop — Roadmap / Deferred Backlog + +**Last updated:** 2026-06-05 · **Default branch:** `main` (local commit-to-main; remote `origin` = github.com/saxster/hermes-desktop) + +This is the **remaining work** for the SPS-Agent knowledgebase / agent-aware / skills surface — the deferred items behind the features already shipped. Each item records _current state_ (with file pointers), _what's left_, and the _rationale / gate_ so any contributor can pick one up cold. Items are independent; ship one per worktree + commit. + +--- + +## Already shipped (context — read the commits, don't redo) + +| Commit | Feature | +| ------------------------------- | ------------------------------------------------------------------------------------- | +| `96bcc67`, `0dd8783`, `6096df3` | KB Phase 0+1: PDF→markdown→vault ingestion + grounded chat (FTS5 OR-mode retrieval) | +| `d069868` | Agent-aware templates: `"button"` block fires the grounded co-author; 7 templates | +| `bf87b8e` | Skills management: author / browse-registry / edit / enable-disable / harvest-local | +| `07aede0` | Generate a skill from a repo (bounded digest → one gateway completion → review modal) | +| `904ec3d` | Untracked generated `graphify-out/` (now gitignored) | + +Read those commit messages + diffs for the shipped design; this doc references files but doesn't re-explain them. Note: the original "Upstream Templates And Skills Import Plan" was **deliberately reframed** — the copy/provenance/manifest machinery was rejected in favour of original content; nothing is owed there. + +--- + +## Deferred items (the backlog) + +Ordered roughly by value. + +### 1. KB Phase 2 — RLM (agentic navigation), NOT vector RAG ⟵ the big one, GATED + +> **⚠️ UPDATE 2026-06-05 — the gate ran (item 9) and inverted the premise. Read +> [`docs/kb-phase2-dogfood.md`](kb-phase2-dogfood.md) before acting on this item.** +> Dogfooding showed **depth is already solved in production**: the agentic gateway +> reads the _full file_ via its file tool when the excerpt is insufficient (proven by +> a controlled bogus-path experiment), so the "spsAssistant doesn't run a tool loop" +> premise below is true of the _client_ but false of the _system_ — the server runs the +> loop. Every depth probe answered correctly; the only live failures were **recall** +> (right doc never retrieved — the one thing a file tool can't fix). **Revised +> direction:** don't build a depth read-loop (it exists); attack **recall** via agentic +> re-search (a `vault_search`/`follow_wikilink` tool the agent can re-call), embeddings +> only if a measured recall gap survives. Caveats (model-dependence, scale reliability, +> latency) are in the findings doc. The original analysis below is preserved as the +> pre-evidence reasoning. +> +> **UPDATE 2026-06-05 (b) — recall measured (see the findings doc's recall addendum).** An +> 8-doc experiment with engineered synonym misses (`scripts/kb-dogfood/recall-experiment.ts`) +> confirms the recall gap is real (baseline **0%**) and that the **cheapest** fix — pointing the +> agent at the vault directory so it can self-navigate — lifts it to **80% but is stochastic** +> (the agentic gateway navigates inconsistently: 100% on an obvious reformulation, 60% on a +> harder one, 0/2 on one whole run). So the **reliable** recall fix is **app-side query +> expansion** (broaden the FTS query so the gold doc enters the candidate set → its path is +> handed over → the agent reads it ~deterministically), which lives in this repo's +> "app-selects-candidates" role and needs no upstream change. +> +> **UPDATE 2026-06-05 (c) — query expansion BUILT & shipped.** `buildRetrievalSystemMessage` now +> generates synonym query variants (one cheap completion) and fuses them with the original +> retrieval by reciprocal rank (`parseQueryVariants` / `fuseRankings` / `expandQueryVariants` in +> `src/main/hermes.ts`, unit-tested). Measured: **clean synonym misses 0 → 80%**, controls +> unaffected, but a **residual on hard semantic gaps** (RM-keys "safe"→"key cabinet": 0 → 20%) — +> keyword expansion can't bridge a concept leap. **That residual is now the measured trigger for +> embeddings** (the gate's condition is met for the hard-gap case). Remaining recall work: local +> embeddings as one more search tool, **only** for semantic (non-keyword) gaps; cost note: query +> expansion adds one model call per grounded question (no cheap "expand-only-when-needed" signal, +> since a recall miss looks like successful retrieval). See the findings doc's expansion section. + +**Direction decided:** do **RLM** — let the co-author _navigate_ the vault (search → read → re-search → recurse → synthesize) — instead of a vector-RAG pipeline. Embeddings are **demoted to an optional tool**, added only if a measured recall gap demands it. Short version: RLM reuses what's already built, dodges the entire vector tax, and wins exactly where top-k stuffing fails (multi-hop, whole-doc, follow-the-thread). + +**Framing that must not be lost:** vector RAG is a _retrieval architecture_; RLM is an _inference/control strategy_. They sit on different axes — (A) **control:** one-shot retrieve-and-stuff vs. agentic navigate-and-recurse; (B) **search primitive:** keyword (FTS5) vs. vector vs. both. The decision is **agentic (RLM) on axis A**, keeping **keyword on axis B for now**. "RLM instead of vectors" really means: pick agentic control; don't build the vector subsystem yet. + +**Status today:** retrieval is FTS5 keyword, **one-shot**. `NoteIndex.search(text, limit, mode)` (`src/main/note-index.ts`, `"any"`/OR mode); `groundingTerms()` + `buildRetrievalSystemMessage()` (`src/main/hermes.ts`) inject a single system message; the SPS path `spsAssistant()` (`src/main/sps-agent.ts`) is `stream:false` and expects a **structured JSON** result — it does **not** run a tool loop. + +**What "do RLM" actually means here (the real work):** + +- **Expose the vault as a first-class navigable toolset to the Hermes agent** — `vault_search` (the existing FTS5 index), `vault_read_page`, `vault_follow_wikilink` — and let the agent iterate. On-thesis: Hermes is already agentic (`file`, `session_search`, `delegation`/`moa` toolsets in `src/main/tools.ts`); KB-as-agent-skill belongs in Hermes, not as an embedding ETL in Electron. +- **The architectural cost is the agentic control loop + latency, not vectors.** The catch: the SPS co-author is one-shot/non-agentic today. RLM means either (a) re-architect `spsAssistant` to run a tool-calling loop, or (b) **route KB questions through the full agentic chat path** (which already can call tools) instead of the one-shot assistant. Decide this routing before coding — it's the bigger lift, bigger than "add a vector index." + +**Why RLM over vector RAG (the case):** reuses the vault + FTS5 + wikilink graph + file tools (no new subsystem); **no embedder at all** → privacy gets _easier_, and no `sqlite-vec`/`better-sqlite3`-ABI/chunking/re-embed-on-edit/model-drift tax; and it beats top-k stuffing on multi-hop / whole-doc / follow-the-thread queries (cf. _Recursive Language Models_, Zhang et al., MIT, late 2025 — context-as-REPL; doesn't degrade as context grows). + +**Honest counter-case (budget for these):** (1) **latency/cost** — multi-turn = several model calls per question; matters for an interactive co-author. (2) **wander/reliability** — agentic nav can miss a file or loop; top-k is bounded. (3) **keyword recall** — RLM's search primitive is still FTS5, so a pure-synonym miss is the one place vectors keep an edge (mitigated, not erased, by the agent reformulating queries). + +**Embeddings — only if earned:** add local embeddings **as one more search tool the RLM can call** (never the foundation, never cloud) **iff** dogfooding shows recall — not depth — is the bottleneck. If so: `sqlite-vec` (mind the `better-sqlite3` ABI: index-opening code can't run under vitest → `verify:note-index` harness) or a Python embedding subprocess in Hermes. + +**GATE — do not build until evidenced (and the diagnosis picks the tool):** dogfood one-shot keyword grounding first (item 9). Then fork on _why_ it failed: + +- wrong/no chunk came back (synonym miss) → **recall** problem → consider embeddings-as-a-tool; +- model needed to read more / follow a thread / read a whole doc → **depth** problem → that's **RLM**, and vectors wouldn't have helped. + +Bet (to verify, not assert): with SOP/contract corpora (shared vocab, cross-refs) most failures are _depth_ → RLM. And don't skip the cheap win: one-shot FTS5 grounding may already suffice for many questions — build the loop only when one-shot demonstrably can't look far enough. + +### 2. OCR for scanned PDFs — ✅ DONE 2026-06-06 + +> **Shipped in three phases (plan: [`docs/superpowers/plans/kb-ocr.md`](superpowers/plans/kb-ocr.md)).** A PDF with no usable text layer — scanned image OR a broken/unmappable font (item 10's `reason:"unreadable"`, which renders fine so OCR rescues it too) — is now OCR'd instead of refused. **P1:** offline `tesseract.js` (WASM) in the renderer renders each page with pdf.js to a canvas and recognizes it in a worker (off the UI thread); assets bundled under `/tesseract/` (no CDN, documents never leave the machine), engine lazy code-split. **P2:** a persistent localStorage queue drains sequentially in the background; multiple scans batch; an interrupted job resumes on next launch; result filed under "Sources" with an `OcrStatus` progress pill. **P3:** an optional "Overnight" defer (configurable time) + "Run now", scheduled in-renderer (fires only while the app is open — no OS daemon, by design). Proven by `scripts/sps-ocr-smoke.mjs` (image-only PDF → text under Sources) + `ocr-probe.mjs`; queue/schedule logic unit-tested. **Non-goal:** handwriting (tesseract is poor at it). Original analysis below. + +**Status:** `extractPdfToMarkdown()` (`src/main/pdf-extract.ts`) detects a missing text layer via `hasUsableTextLayer()` and the UI flags "needs OCR, not imported" (`importPdf` in `screens/SpsAgent/store/slices/workspace.ts`). **Its sibling — the _unreadable_ (present-but-garbage) text-layer case — is now handled by [item 10](#10-ingestion-intelligibility-check----hasusabletextlayer-false-positives-on-garbage-text) (✅ fixed 2026-06-05):** `extractPdfToMarkdown` returns `reason: "missing" | "unreadable"`, so this item is now scoped to **`reason: "missing"` only** (true scanned/image-only PDFs with no text layer), with the rejection plumbing and distinct messaging already in place to hang OCR off. +**Left:** add an OCR path (tesseract.js, or a native/Python OCR) so scanned books actually ingest. Larger effort; isolate it (extraction quality, language packs). Wire it into the existing `reason: "missing"` branch in `importPdf` (swap the "needs OCR" flash for an OCR attempt). + +### 3. Remote / SSH grounding + +**Status:** grounding is **local-mode only** by construction — `buildRetrievalSystemMessage` is called only when `!isRemoteMode()` in both `src/main/hermes.ts` (chat) and `spsAssistant()` (`src/main/sps-agent.ts`). +**Left:** in remote/SSH mode the vault lives on the desktop and the remote agent can't read those paths. Either inline retrieved excerpts into the request (no path handoff) or run retrieval on the desktop and ship results. Decide the transport before coding. + +### 4. "Sources" folder + scanned-PDF UX — ✅ DONE 2026-06-05 + +Ingested PDFs now land in a dedicated **"Sources"** folder (`ensureSourcesFolder`, +find-or-create by title at root — no persisted marker, so the markdown serializers +are untouched); `importPdf` routes there and no longer takes a parent. Refused +imports (scanned / unreadable) now show a **persistent warn toast** (8 s, amber flag +icon) instead of a 2.2 s confirmation flash. Unit-tested (`tests/sps-sources-folder.test.ts`) +and verified end-to-end by `scripts/sps-import-smoke.mjs` (asserts the ingested page +nests under Sources). The OCR CTA is still N/A until item 2 exists. + +Bonus fixes found while verifying: the "New page" templates modal wasn't scrollable, +so the **Import PDF card (last in the grid) was unreachable** once the template list +grew — made `.modal-body` scrollable. Also isolated the import smoke's Electron +`userData` so it runs with a developer's app open (single-instance lock). + +### 5. Generate-from-repo — large-repo quality + +**Status & finding (from a real dogfood on a medium repo via the live gateway + an x.ai model):** medium repos produce an accurate `SKILL.md`. But `buildRepoDigest()` (`src/main/skills.ts`) is bounded (~40 KB, walk-order file selection), so **large repos go tree-heavy / source-light** — hermes-desktop itself inlined only ~4 file bodies (tree + README + manifests ate the budget). +**Left:** a smarter file-selection heuristic — prioritise entrypoints/exports/most-imported files over walk order. Optionally a second mode that hands the repo path to the agent's file tools (agentic, streaming) for depth instead of the one-shot digest. + +### 6. Per-button grounding + general-purpose buttons + +**Status:** the agent-action `"button"` block (`screens/SpsAgent/types.ts`, `editor/ButtonBlock.tsx`) inherits the single global grounding setting and only does `agentPrompt`. +**Left:** optional per-button "ground on/off"; non-agent button actions (e.g., insert template, run a non-LLM action). + +### 7. Grounding toggle in the SPS Assistant panel — ✅ DONE 2026-06-05 + +A `database`-icon toggle now sits in the SPS co-author composer row +(`screens/SpsAgent/assistant/AgentBody.tsx`), reading/writing the shared +`getGroundInWorkspace`/`setGroundInWorkspace` — the **same** persisted preference +the Chat header controls (so the two stay in sync). Verified by the SPS smoke. + +### 8. Grounding refactors (tech debt — low risk, do alongside item 7) + +- **Cross-screen import smell — ✅ DONE 2026-06-05.** The grounding preference is + hoisted to `src/renderer/src/lib/grounding.ts`; `BridgeAssistant` (SPS), + `ChatHeader`, and `useChatActions` (Chat) all import from there, and the legacy + `screens/Chat/lib/grounding.ts` is deleted. SPS no longer imports from Chat. + Unit-tested in `tests/grounding-setting.test.ts`. +- **`sps-agent → hermes` coupling — ⏸ DEFERRED (no longer cheap).** The "Optional" + plan was to extract `buildRetrievalSystemMessage`/`formatRetrievalSystemMessage`/ + `groundingTerms` into `src/main/grounding.ts` and re-export from `hermes.ts`. Since + item 1's query expansion, `buildRetrievalSystemMessage` now depends on + `chatCompletionOnce` (the gateway HTTP plumbing, in `hermes.ts`), so a `grounding.ts` + would import back from `hermes.ts` — a cycle that does NOT achieve the goal + (`sps-agent` → `grounding` → `hermes` still pulls the heavy graph). Doing it right + now means also extracting the HTTP layer — a larger cross-cutting refactor, out of + scope for a "low-risk tech-debt" item. Revisit only if the heavy-graph import + becomes a measured problem. + +### 9. KB dogfooding — the Phase-2 trigger evaluation ⟵ ✅ DONE 2026-06-05 + +> **Ran 2026-06-05 — see [`docs/kb-phase2-dogfood.md`](kb-phase2-dogfood.md).** A +> designed security-guarding corpus + reproducible harness (`scripts/kb-dogfood/`) +> drove the real grounding pipeline against the live gateway (grok-4.3). Outcome: +> 9/11 correct, **0 depth failures, 2 recall failures** — depth is already handled by +> the agentic file tool (proven), recall is the residual gap. This rewrote item 1 +> (above). A re-run against _real_ business docs is still worthwhile to confirm the +> recall base-rate in the wild; the harness accepts any vault. + +Ingest several real business docs (SOPs, a contract, a handbook) and ask the co-author real questions with grounding on. **Don't just judge pass/fail — classify each failure** so the diagnosis picks the tool (see item 1's gate): + +- **recall** failure (the right source existed but a synonym/paraphrase query missed it) → points at embeddings-as-a-tool; +- **depth** failure (the right source came back but the model needed to read more, follow a thread, or read a whole doc) → points at **RLM** (agentic navigation), which is the decided direction. + +Until one-shot keyword grounding demonstrably fails, **build nothing** — it may already suffice. When it fails, the failure _type_ tells you whether to build the RLM loop (depth) or also add a vector tool (recall). + +### 10. Ingestion intelligibility check — `hasUsableTextLayer` false-positives on garbage text — ✅ FIXED 2026-06-05 + +> **Fixed 2026-06-05.** Added `looksIntelligible()` / `commonWordRatio()` (`src/main/pdf-extract.ts`): a text layer that clears the char-count floor must also read like real English prose (≥5% of word tokens are common English words, judged only on a ≥40-word sample). Garbage scores ~0 (the Buffett PDF: 0.011) vs real docs (Coase 0.49, Google 0.33). `extractPdfToMarkdown` now returns `reason: "missing" | "unreadable"`, and the importer (`workspace.ts`) shows a distinct "Unreadable text (broken font encoding) — not imported" flash. Tests in `tests/pdf-extract.test.ts`; verified end-to-end (Buffett refused, Coase/Google still ingest). Known limitation, documented in code: assumes Latin-script English; non-English docs would need language detection. + +**Found 2026-06-05 during the real-PDF dogfood (see [`docs/kb-phase2-dogfood.md`](kb-phase2-dogfood.md)).** `extractPdfToMarkdown()` (`src/main/pdf-extract.ts`) gates on `hasUsableTextLayer()`, which only counts **non-whitespace characters per page**. A PDF with an embedded custom font but **no ToUnicode cmap** extracts a text layer that is the right _length_ but semantic **garbage** (`!" #$%#& "'()…`) — and passes the check. That garbage then gets indexed and can ground answers on nonsense. This is **distinct from item 2** (scanned/image-only PDFs with _no_ text layer → OCR): here the text layer _exists_ but is unmappable. + +**Repro:** `/Users/amar/Downloads/Buffett_Revisited.pdf` ingests "successfully" as gibberish (1 page, 4669 garbage chars, `hasTextLayer=true`). +**Left:** add an _intelligibility_ heuristic alongside the char-count — e.g. ratio of recognizable dictionary words / proportion of ASCII-letter tokens / a low share of Private-Use-Area or non-mappable code points — and flag a doc that has a text layer but fails it as "unreadable encoding, not imported" (reuse the same "needs OCR" UX surface, distinct message). Low-risk, isolated to the extractor; pure-function `hasUsableTextLayer` is unit-testable. **Severity note:** in the dogfood the garbage was inert (gibberish tokens never matched real queries, so it was never retrieved), so this is correctness-hygiene, not a live failure — but on a vault where such a doc ranked, it would silently poison grounding. + +--- + +## Process / environment (project rules) + +- **Worktree-per-task is mandatory.** Never edit/commit in the primary tree. Per feature: create a worktree off `main` → `npm install` + `npx electron-builder install-app-deps` (fresh worktrees need a native rebuild for Electron's ABI) → implement → gate → commit → fast-forward `main` (`git fetch . :main`) → `git push origin main:main` → clean the worktree. +- **Full verification gate** (see `docs/STORAGE.md`): `npm run typecheck` (BOTH projects) → `eslint` touched files → `npx vitest run` → `npm run verify:note-index` (only if the SQLite substrate is touched) → `npm run build` → a Playwright-Electron smoke for UI changes (`scripts/sps-smoke.mjs`, `scripts/sps-import-smoke.mjs`, `scripts/skills-smoke.mjs` are the patterns; smokes stub the OS dialog / gateway `fetch` via `app.evaluate`). +- **better-sqlite3 ABI split:** anything that _opens_ the note index can't run under vitest — pure logic + IPC-mocked hooks → vitest; index-opening code → `verify:note-index` (`ELECTRON_RUN_AS_NODE=1`); renderer UI → the smoke harnesses. +- **Preload parity is enforced** — every IPC method must be in BOTH `src/preload/index.ts` and `index.d.ts` or `tests/preload-api-surface.test.ts` fails. +- **Keep commits scoped**; never stage `graphify-out/` (gitignored) or `out/`. +- **Dogfooding the gateway path:** there's no auto-running gateway. Start it (e.g. run the Hermes agent in a terminal): `API_SERVER_ENABLED=true API_SERVER_PORT=8642 API_SERVER_KEY= hermes gateway`. A harness can run the REAL functions and redirect only the `/v1/chat/completions` transport to that port with `Authorization: Bearer ` — this is how generate-from-repo was dogfooded end-to-end. + +## Suggested skills + +- **`rlm-strategies`** — the decided direction for item 1 (chunk / sample / filter / parallelize / delegate; "treat large context as an environment to explore, not data to consume"). Read it before designing the vault-navigation toolset. +- **`brainstorming`** — before designing item 1 or the large-repo heuristic (item 5); the solution space is wide. +- **`adversarial-review`** — stress-test the RLM plan (latency/UX, agentic wander, and the recall-vs-depth call are easy to get wrong). +- **`test-driven-development`** — pure functions (digest heuristics, retrieval) are well-suited to test-first. + +## Pointers + +- Substrate rules: `docs/STORAGE.md`. Project guidance: `CLAUDE.md` (repo root). diff --git a/docs/ONTOLOGY.md b/docs/ONTOLOGY.md new file mode 100644 index 000000000..5a2d90867 --- /dev/null +++ b/docs/ONTOLOGY.md @@ -0,0 +1,348 @@ +# Hermes Pragmatic Ontology Specification + +This document specifies a **Pragmatic Ontology** for the Hermes ecosystem, covering both the **Hermes CLI (`~/.hermes/`)** and the **Hermes Desktop (SPS Agent)**. + +Rather than adopting heavy Semantic Web standards (like RDF/OWL), this specification establishes a lightweight, developer-friendly schema-based knowledge model using **JSON Schema** for CLI assets, **Markdown Frontmatter** for the desktop workspace, and **SQL Table Extensions** for the local SQLite index. + +--- + +## 1. Conceptual Architecture (Pedagogy) + +The Hermes Ontology organizes the system into a **Property Graph** consisting of **Entities (Nodes)**, **Attributes (Properties)**, and **Relations (Edges)**. + +```mermaid +classDiagram + class Profile { + +string name + +string active_gateway + +list credential_types + } + class Skill { + +string name + +string description + +list inputs + +list outputs + +list credentials_required + } + class Note { + +string path + +string title + +string type + +list tags + } + class Task { + +string status + +string due_date + +string assignee + } + + Note <|-- Task : Inheritance + Profile "1" --> "*" Skill : owns + Skill "1" --> "*" Note : reads/writes + Note "*" --> "*" Note : relates_to (typed) + Skill "*" --> "*" Profile : demands credentials from +``` + +### Core Entities + +1. **Profile**: A user context containing configurations, active models, and credentials. +2. **Skill**: An executable action (CLI tool or Python subprocess) that performs a specific utility. +3. **Note (Page)**: A raw file in the markdown database. +4. **Task (Subclass of Note)**: A note representing an action item with status, dates, and executors. + +--- + +## 2. CLI Skill Manifest Specification (`manifest.json`) + +To enable the agent to dynamically discover, validate, and chain skills, every skill directory in `~/.hermes/skills///` should declare a `manifest.json` file. + +### A. JSON Schema for Skill Manifests + +```json +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "HermesSkillManifest", + "type": "object", + "required": ["name", "description", "category", "input", "output"], + "properties": { + "name": { + "type": "string", + "pattern": "^[a-z0-9-]+$", + "description": "Kebab-case unique identifier for the skill." + }, + "displayName": { + "type": "string", + "description": "Human-readable name for the UI." + }, + "description": { + "type": "string", + "description": "Clear explanation of what the skill does and when the agent should invoke it." + }, + "category": { + "type": "string", + "enum": [ + "communication", + "productivity", + "system", + "development", + "multimedia", + "custom" + ], + "description": "High-level classification group." + }, + "credentials": { + "type": "array", + "items": { "type": "string" }, + "description": "Environment variables or config keys required for execution (e.g., GMAIL_TOKEN)." + }, + "input": { + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["null", "string", "array", "object"], + "description": "The primitive format of the input." + }, + "schema": { + "type": "object", + "description": "Valid JSON Schema defining parameter properties if the input type is an object." + } + } + }, + "output": { + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["null", "string", "array", "object"], + "description": "The primitive format of the output." + }, + "schema": { + "type": "object", + "description": "Valid JSON Schema defining return value fields if the output type is an object." + } + } + } + } +} +``` + +### B. Concrete Example: Gmail Skill Manifest + +Located at `~/.hermes/skills/communication/gmail/manifest.json`: + +```json +{ + "name": "gmail-send-email", + "displayName": "Send Gmail Email", + "description": "Sends an email to a recipient via the Gmail API.", + "category": "communication", + "credentials": ["GMAIL_OAUTH_TOKEN"], + "input": { + "type": "object", + "schema": { + "type": "object", + "required": ["to", "subject", "body"], + "properties": { + "to": { "type": "string", "format": "email" }, + "subject": { "type": "string" }, + "body": { "type": "string" } + } + } + }, + "output": { + "type": "object", + "schema": { + "type": "object", + "required": ["messageId", "status"], + "properties": { + "messageId": { "type": "string" }, + "status": { "type": "string" } + } + } + } +} +``` + +--- + +## 3. SPS Agent Note Frontmatter Specification + +SPS Agent stores documents as Markdown files. We extend the frontmatter schema to classify the page type and specify typed relationships to other pages. + +### A. Core Page Types (`type` field) + +- `concept`: A mental model, definition, or educational topic (e.g., _Tritone Substitution_). +- `task`: An actionable item containing workflow metadata. +- `project`: A collection of related tasks and reference materials. +- `decision`: An architectural or project design decision block. +- `reference`: An index of static documentation, APIs, or files. + +### B. Document Schema Examples + +#### Example 1: A Project Page (`project`) + +Saved in `vault/hermes-desktop-release.md`: + +```markdown +--- +title: "Hermes Desktop v1.0 Release" +type: "project" +tags: ["release", "milestone"] +status: "active" +priority: "high" +relations: + - type: "has_subtask" + target: "vault/tasks/implement-manifest-loader.md" + - type: "has_subtask" + target: "vault/tasks/design-ontology-migration.md" + - type: "references" + target: "vault/docs/ontology-spec.md" +--- + +# Hermes Desktop v1.0 Release + +We are tracking the integration of the pragmatic ontology for the v1.0 release. +``` + +#### Example 2: A Task Page (`task`) + +Saved in `vault/tasks/implement-manifest-loader.md`: + +```markdown +--- +title: "Implement CLI Skill Manifest Loader" +type: "task" +tags: ["development", "cli"] +status: "todo" +due_date: "2026-06-15" +assignee: "Louis" +relations: + - type: "blocked_by" + target: "vault/docs/ontology-spec.md" +--- + +# Implement CLI Skill Manifest Loader + +We need to add a parser to `src/main/skills.ts` that reads `manifest.json` files from active skill directories. +``` + +--- + +## 4. SQLite Database Index Extensions + +The note index (`.note-index.db`) manages links, files, and FTS search. Currently, the `links` table is simple: + +```sql +CREATE TABLE IF NOT EXISTS links ( + source TEXT NOT NULL, + target_norm TEXT NOT NULL +); +``` + +We extend this schema to add **semantically typed edges** and index frontmatter types cleanly for fast query interfaces. + +### A. Extended Table Specifications + +```sql +-- Represents typed edges in our local Knowledge Graph +CREATE TABLE IF NOT EXISTS semantic_relations ( + source TEXT NOT NULL, + target_norm TEXT NOT NULL, + relation_type TEXT NOT NULL, + PRIMARY KEY (source, target_norm, relation_type), + FOREIGN KEY (source) REFERENCES notes(path) ON DELETE CASCADE +); + +CREATE INDEX IF NOT EXISTS idx_semantic_source ON semantic_relations(source); +CREATE INDEX IF NOT EXISTS idx_semantic_target ON semantic_relations(target_norm); +CREATE INDEX IF NOT EXISTS idx_semantic_type ON semantic_relations(relation_type); +``` + +--- + +## 5. Main Process Extractor Code Draft (`src/main/note-index.ts`) + +To populate the schema, the parser inside `NoteIndex` should inspect frontmatter properties. Here is how the node parser integrates: + +```typescript +// Draft extension for note-index.ts parsing flow +interface SemanticRelation { + type: string; + target: string; +} + +function extractSemanticRelations( + props: Record, +): SemanticRelation[] { + const relations: SemanticRelation[] = []; + if (Array.isArray(props.relations)) { + for (const rel of props.relations) { + if ( + rel && + typeof rel === "object" && + typeof rel.type === "string" && + typeof rel.target === "string" + ) { + relations.push({ + type: rel.type.trim().toLowerCase(), + target: rel.target.trim(), + }); + } + } + } + return relations; +} + +// Inside the db.transaction block in NoteIndex.upsert(relPath, raw, mtime): +// ------------------------------------------------------------------------- +// 1. Delete old relations for this source +// this.db.prepare(`DELETE FROM semantic_relations WHERE source = ?`).run(relPath); +// +// 2. Insert new typed relations +// const insSemantic = this.db.prepare( +// `INSERT OR IGNORE INTO semantic_relations(source, target_norm, relation_type) VALUES(?,?,?)` +// ); +// const sRelations = extractSemanticRelations(props); +// for (const rel of sRelations) { +// insSemantic.run(relPath, normalizeName(rel.target), rel.type); +// } +``` + +--- + +## 6. Real-World Execution Scenario (AI Reasoning Loop) + +Using this ontology, the AI agent can parse natural language queries into semantic database lookups. + +### Query + +> "Show me all high-priority project releases that are blocked by another document." + +### AI Resolution Path + +1. **Identify Node Constraints**: Look for files in `.note-index.db` where `type = 'project'` and `json_extract(props, '$.priority') = 'high'`. +2. **Follow Edge Traversal**: Query `semantic_relations` where `relation_type = 'blocked_by'`. +3. **Assemble Results**: Join results to yield the project name, the task name, and the specific blocking page. + +```sql +SELECT + n1.title AS project_title, + n2.title AS blocked_by_title +FROM notes n1 +JOIN semantic_relations r ON n1.path = r.source +JOIN notes n2 ON r.target_norm = n2.path -- simplified name resolution +WHERE json_extract(n1.props, '$.type') = 'project' + AND json_extract(n1.props, '$.priority') = 'high' + AND r.relation_type = 'blocked_by'; +``` + +--- + +## 7. Migration & Rollout Plan + +1. **Phase 1 (Documentation & CLI Auditing)**: Roll out `manifest.json` templates to the standard active skills in `~/.hermes/skills/` (such as the Gmail, filesystem, and internet research tools). +2. **Phase 2 (Database Updates)**: Deploy the SQLite migration schema inside `src/main/note-index.ts` to add the `semantic_relations` table. +3. **Phase 3 (Editor Enhancements)**: Modify the SPS Agent editor component (`src/renderer/src/screens/SpsAgent/editor/`) to support visual relationship linkers when typing (e.g., autocomplete for relations). diff --git a/docs/SECURITY-AUDIT.md b/docs/SECURITY-AUDIT.md new file mode 100644 index 000000000..3cc9afa39 --- /dev/null +++ b/docs/SECURITY-AUDIT.md @@ -0,0 +1,93 @@ +# Security Audit — npm advisory baseline + +This document records the project's `npm audit` posture: what was remediated, and the +**intentionally-accepted residual** with the rationale for each. It exists so that a non-empty +`npm audit` is an interpretable, reviewable baseline rather than ambient noise. + +Last reviewed: 2026-06-06 (Hermes Desktop 0.5.4). + +## Summary + +| | Low | Moderate | High | Total | +| ------ | --- | -------- | ---- | -------------------------------- | +| Before | 1 | 13 | 5 | 19 | +| After | 1 | 9 | 3 | **13 (all accepted, see below)** | + +Remediation was deliberately **surgical**: scoped `overrides` for the cleanly-patchable +dev/build-time advisories, no semver-major bumps, and **no** `@excalidraw/excalidraw` downgrade +(its only npm-known "fix" is 0.17.6, which requires React 17/18 and is incompatible with this +React 19 app). `npm audit fix --force` is therefore unsafe here and must not be run. + +## Remediated (via `overrides` in `package.json`) + +All dev/build-time, pinned forward within the same semver major (low risk): + +| Package | Fixed to | Owner | Advisory | +| ----------------------- | --------- | ----------------------------------------- | ----------------------------------------------- | +| `@xmldom/xmldom` | `^0.8.13` | electron-builder → plist | DoS / XML injection (was `<0.8.13`) | +| `tmp` | `^0.2.6` | electron-builder → flatpak-bundler | path traversal (was `<0.2.6`) | +| `ip-address` | `^10.2.0` | electron-builder → node-gyp | XSS in Address6 (was `<=10.1.0`) | +| `ws` | `^8.21.0` | vitest → jsdom | uninitialized memory disclosure (was `<8.20.1`) | +| `postcss` | `^8.5.15` | vite | XSS via unescaped `` (was `<8.5.10`) | +| `nanoid` (3.x) | `^3.3.12` | `@excalidraw/excalidraw` (pinned `3.3.3`) | predictable IDs (was `<3.3.8`) | +| `brace-expansion` (5.x) | `5.0.6` | minimatch@10 (eslint toolchain) | ReDoS via numeric range (was `>=5.0.0 <5.0.6`) | + +Notes on scoping: + +- `nanoid` and `brace-expansion` are **version/parent-scoped** overrides, not global. A global + `nanoid` override would clobber excalidraw's transitive `nanoid@4.x`; a global `brace-expansion` + override would force CommonJS consumers of 1.x/2.x onto the **ESM-only** 5.x line. The overrides + target only the vulnerable instances (`nanoid@3.3.3`, and `brace-expansion` under `minimatch@^10`). + +## Accepted residual (13) — not fixable without breaking the app + +### 1. `lodash` (high) + `lodash-es` (high) — no patched 4.x exists + +GHSA-r5fr-rjxr-66jc (`_.template` code injection), GHSA-xxjr-mmjv-4gpg / GHSA-f23m-r3pf-42rh +(`_.unset`/`_.omit` prototype pollution). The advisories' vulnerable range is `<=4.17.23` with **no +fixed 4.x published** — the only "fix" is a hypothetical lodash 5 that does not exist as a stable +release. There is no override that clears these. + +- `lodash` is **dev-only** (electron-winstaller → electron-builder; runs at Windows-installer build time). +- `lodash-es` **ships**, but is reachable only through Mermaid diagram parsing + (`mermaid`/`@excalidraw` → `@mermaid-js/parser` → `langium` → `chevrotain`). chevrotain uses lodash + internally for **parser construction**, not to evaluate user-controlled template strings or + `_.unset`/`_.omit` array paths. Real exploitability against our usage is low. Diagram source is + authored locally by the user, not remote/multi-tenant input. + +### 2. Excalidraw / Mermaid subtree (moderate ×7) — pinned by `@excalidraw/excalidraw@0.18.1` + +`@excalidraw/excalidraw`, `@excalidraw/mermaid-to-excalidraw`, `@mermaid-js/parser`, `langium`, +`chevrotain`, `@chevrotain/gast`, `@chevrotain/cst-dts-gen`, and `nanoid@4.x` +(GHSA-mwcw-c2x4-8c55, "predictable results when given non-integer values"). +These are transitively pinned by excalidraw's own dependency tree. npm's only proposed fix is to +**downgrade `@excalidraw/excalidraw` to 0.17.6**, which declares a React 17/18 peer and is +**incompatible with this React 19 app** — it would break the build. The `nanoid@4.x` advisory in +particular requires jumping to the **ESM-only `nanoid@5.0.9`**, which `mermaid-to-excalidraw` does +not support; the practical impact is nil because excalidraw never calls `nanoid()` with a +non-integer size. Re-evaluate when a future `@excalidraw/excalidraw` release moves these forward. + +### 3. `vite` (high/moderate/low) + `esbuild` (moderate) — dead build-time deps of `@wesbos/code-icons` + +GHSA-c27g-q93r-2cwf (launch-editor command injection on Windows), GHSA-67mh-4wv8-2f99 (esbuild dev +server), and related path-traversal advisories. **Our own** toolchain `vite` (7.3.5) and `esbuild` +(0.27.x) are patched. The residual comes entirely from `@wesbos/code-icons@1.2.4`, which lists +`vite@^4` + `vite-plugin-dts@1.7.3` as regular `dependencies` (a packaging mistake on their part). +code-icons ships **pre-built icon assets**; that bundled `vite@4.5.14` / `esbuild@0.18.20` is never +executed by our dev server, build, or runtime. Vite 4.x has no in-major fix, and code-icons' +`vite-plugin-dts@1.7.3` cannot move to vite 7, so forcing it would break nothing real but is not +worth the churn. Accepted as non-executing dead weight. + +### 4. `@wesbos/code-icons` (low) — fix is a semver-major + +GHSA chain via the bundled vite above; the only "fix" is a major bump of code-icons. Deferred with +the rest of the code-icons build-tooling residual. + +## Operational notes + +- **Do not run `npm audit fix --force`** — it downgrades `@excalidraw/excalidraw` to a React-17/18 + release and breaks the React 19 build. +- The remediation is fully reversible: delete the `overrides` block in `package.json` and + `npm install` to revert. +- There is currently **no CI `npm audit` gate**; if one is added, allowlist the advisory IDs above + (e.g. via `audit-ci` / `.nsprc`) so the gate fails only on _new_ advisories. diff --git a/docs/STORAGE.md b/docs/STORAGE.md new file mode 100644 index 000000000..05eed233a --- /dev/null +++ b/docs/STORAGE.md @@ -0,0 +1,159 @@ +# SPS Agent storage substrate + +How the **SPS Agent** workspace persists its content. The design goal: match +Notion's databases / wikis / forms / backlinks affordances while keeping +**plain markdown on disk as the single source of truth**, with SQLite as a +purely derived, rebuildable query index. + +## The one rule + +**Markdown on disk is the only source of truth. SQLite is a rebuildable index. +Writes go file‑first.** Delete the index and it rebuilds from the markdown with +no data loss. Nothing reads the index back as authoritative. + +## On‑disk layout + +Everything lives under the active profile's home directory +(`` — `HERMES_HOME` for the `default` profile, else +`/profiles//`): + +``` +/sps-agent/ + workspace.json # the JSON blob (see "Storage modes") + vault/ + .md # one markdown file per page (frontmatter + blocks) + /.md # rows of a folder-backed query database (S4) + _manifest.json # structure the page files can't hold (see below) + .note-index.db # the derived better-sqlite3 index (rebuildable) +``` + +- **Page files** (`.md`): YAML‑style frontmatter (`title` / `icon` / + `cover`) + the block body. The page's basename **is** its `pageId`; that's how + `[[wikilinks]]` resolve in the graph. +- **Query‑database rows** (`/.md`): frontmatter holds the row's + properties (`title` / `status` / `prio` / …); the body is an optional note. A + database block opts into this folder‑backed mode by carrying a `source` field. +- **`_manifest.json`**: the page tree, trash, comments, and current page — the + structure that individual page files can't represent on their own. + +## Storage modes (the `storageMode` flag) + +`lib/storageMode.ts`, persisted in `localStorage` under +`sps-agent-storage-mode-v1`. Default **`blob`** — nothing changes until the user +explicitly migrates. + +- **`blob`** (default): `workspace.json` is authoritative. The vault is an + **additive mirror** — SPS edits are also written to `vault/.md` so the + substrate and its index materialize, but the mirror is never read back as + truth (`lib/persistence.ts` → `mirrorPage`). +- **`vault`**: the markdown vault (page files + `_manifest.json`) is + authoritative; the editor loads from it and the blob is kept as a backup + (`lib/vaultStore.ts`). + +Both modes are always present and reversible — migrating never rips out the blob +or the embedded data. + +## Serializers (round‑trip markdown ↔ blocks) + +- `editor/blockMarkdown.ts` — block‑tree ↔ markdown, two tiers: + - **Tier 1** (clean, Obsidian‑compatible): `p`, `h1‑3`, `li`, `numli`, `todo`, + `quote`, `code`, `divider`, `image` with only markdown‑expressible inline + marks. + - **Tier 2** (lossless fallback): callout, toggle, bookmark, page, database, + and anything carrying colour/bg or inline HTML markdown can't express → + a single `` metadata comment that reconstructs the block. + - `id` is a runtime handle, **not** content — normally dropped on serialize + and regenerated on parse. **Exception (F2):** a block an open comment is + anchored to keeps a stable id across the round‑trip — an Obsidian‑style + trailing ` ^` on inline tier‑1 blocks, or the id retained inside the + tier‑2 meta. Non‑anchored output stays byte‑identical (the golden tests pin + this). +- `editor/pageMarkdown.ts` — page (frontmatter + blocks) ↔ markdown file. +- `editor/rowMarkdown.ts` — a query‑database row's properties ↔ a markdown file. +- `editor/workspaceVault.ts` — whole workspace ↔ vault snapshot (page files + + manifest), plus the **parity gate** (below). + +## The note index (derived, rebuildable) + +`src/main/note-index.ts` — a `better-sqlite3` database over the markdown: + +- **FTS5** full‑text search over page bodies. +- **Wikilink graph**: `backlinks(relPath)` (who links to this page) and + `links()` (all resolved `{source, target}` edges, used by the graph view). +- **Property queries**: filter/sort/scope over frontmatter (JSON column). +- A `chokidar` watcher keeps it live as files change; `rebuild()` rebuilds it + from disk (proving markdown is the sole truth). + +Renderer access is via IPC‑backed hooks in `hooks/useNoteIndex.ts` +(`useVaultQuery`, `useVaultBacklinks`, `useVaultSearch`, `useVaultGraph`) — all +best‑effort (empty when the index/gateway is unavailable). + +## Migrate / rollback / backup + +`lib/storageActions.ts` (`toggleStorageMode`) is the single safe path, shared by +the command palette and the **Storage** section of the Tweaks panel: + +- **Migrate (blob → vault)**: runs the **parity gate** first + (`workspaceParity`) and **refuses** if content/structure wouldn't round‑trip + losslessly. Then it timestamp‑backs‑up `workspace.json` + (`workspace.json.bak-`, surfaced as the "last backup" path) before + writing the vault. +- **Rollback (vault → blob)**: reconstructs the blob from the vault and makes it + authoritative again. The blob is never deleted. + +**Parity (`workspaceParity`)** round‑trips a live workspace through the vault and +reports `ok` = content + metadata + structure all survive. It also reports +`blockAnchorsOk` (F2): every comment anchored to a _real_ source block keeps its +id through the round‑trip. Dangling anchors (a `blockId` with no matching block) +are pre‑existing breakage and don't gate cutover. + +## Orphan cleanup (F3) + +`deletePage` moves a page to **trash** (restorable, including across reload — its +`vault/.md` is intentionally retained, and `_manifest.json` scoping stops +it resurrecting). When pages leave the workspace entirely (e.g. _Reset workspace +to sample_), `resetWorkspace` removes their now‑orphaned vault files via +`spsDeletePage` / `sps-vault.ts:deletePageIn` (id‑validated, traversal‑safe, +best‑effort). + +## Surfaces + +- **Graph view** (F4): a dependency‑light radial SVG of the wikilink graph + (`graph/GraphView.tsx`); nodes are pages, edges from the index, clicking a node + opens it. +- **Storage settings** (F5): the Tweaks panel's **Storage** section shows the + current mode, a live parity readout, the migrate/rollback control, and the last + backup path. +- **Folder‑backed query databases** (F1): the same board/table/list/gallery/ + calendar views as the embedded `TasksDB`, with inline edits written back to the + row files. + +## Testing — the native‑module caveat + +`better-sqlite3` is compiled for **Electron's** node ABI, not vitest's, so any +code that opens the index **cannot run under vitest**. The split: + +- **Pure logic + IPC‑mocked hooks/components** → vitest (jsdom). Renderer + serializer code may use the DOM (available in jsdom and the renderer). +- **Anything that opens the index** → proven by the electron‑node script + `scripts/verify-note-index.ts`, run via **`npm run verify:note-index`** + (`ELECTRON_RUN_AS_NODE=1` so the Electron‑ABI binary loads). This covers + `query` / `search` / `backlinks` / `links` / `rebuild`. +- **The renderer UI** (never exercised by the unit suite) → the Playwright‑ + Electron smoke harness `scripts/sps-smoke.mjs` (`npm run build` first), which + boots the built app against a throwaway seeded profile and screenshots the key + surfaces. + +## Full verification gate + +```bash +npx tsc --noEmit -p tsconfig.node.json --composite false # main + preload +npx tsc --noEmit -p tsconfig.web.json --composite false # renderer +npx eslint +npx vitest run # full unit suite +npm run verify:note-index # electron-node index proof +npm run build # typecheck + bundle +``` + +Every new preload method must appear in **both** `src/preload/index.ts` and +`src/preload/index.d.ts`, or `tests/preload-api-surface.test.ts` fails. diff --git a/docs/chat-merge-dogfood-2026-06-09.md b/docs/chat-merge-dogfood-2026-06-09.md new file mode 100644 index 000000000..7763602d3 --- /dev/null +++ b/docs/chat-merge-dogfood-2026-06-09.md @@ -0,0 +1,99 @@ +# Dogfood — merged Chat surface + Developer-mode toggle + +**Date:** 2026-06-09 +**Scope:** commits `0e0ac52` (surface merge), `634db48` (Developer mode), `1c093b0` +(three-tier ChatHeader + "⋯" overflow) on `main`. +**Method:** black-box, Playwright-Electron against the built app, throwaway +`HERMES_HOME` (`/tmp/hermes-dogfood`) seeded with a 2-message chat session in +`state.db`. No real data. The Hermes gateway does NOT run in this harness (fake +venv markers), so live message send / approvals / diffs could not be exercised — +see Coverage gaps. + +## What passed (clean) + +- **Rail IA** — the AI Assistant section shows exactly `Chat`, `Search workspace`, + `You`. The old `Advanced (Developer)` entry is gone. No empty/dead surface. +- **Persistence + resume (the core of the merge)** — the seeded session appears in + Recents as "Dogfood test chat"; clicking it loads the full 2-message history into + the single Chat surface. Confirms all chats now route through the session-backed + path. +- **Overflow "⋯" menu — correct items by state:** + - empty chat → `[Fast Mode]` + - populated chat → `[Fast Mode, Compress context, Clear chat]` + - populated + Developer mode ON → `[Fast Mode, Compress context, Checkpoints (/rollback), Clear chat]` + - No dev-only item ever leaks when Developer mode is OFF. +- **Overflow a11y** — trigger has `aria-haspopup="menu"`, `aria-expanded` flips on + open; `role="menu"`/`menuitem` present; Esc closes; focus returns to the trigger + after Esc; ArrowDown roves focus (`Fast Mode → Compress context`). +- **Clear chat** — opens the themed `ConfirmDialog` ("Clear this conversation? This + cannot be undone." with Cancel + danger-red Clear chat), NOT a native dialog. +- **Developer mode** — default OFF (checkbox unchecked, localStorage null); toggling + persists (`hermes-developer-mode-v1="true"`); gating is reactive live — Checkpoints + appears/disappears in the open-able overflow as the flag flips, no reload needed. + Control is well-placed in Settings → Advanced with a clear hint. +- **Regression** — Graph / You / Home navigation still works; workspace boots fine. + +## Issues + +### 1. First-run checklist overlaps the chat input — Medium + +- **What:** The "Get started · 0/5" onboarding card (from D7) is fixed at + bottom-left and overlaps the bottom of the Chat surface — it sits over the + left portion of the chat input row / "Choose a model in Models →" banner. +- **Repro:** Fresh profile (checklist not yet dismissed) → open Chat → the card + covers part of the input area. +- **Why it matters:** Obscures chat controls for exactly the new users the checklist + targets. Not from the chat-merge commits — a D7 placement issue surfaced here. +- **Suggested fix:** Anchor the checklist clear of the chat input (e.g. above the + input, or hide it on the chat surface), or make it dismiss-on-first-chat. +- **Severity:** Medium. + +### 2. Chat header shows the session-id suffix, not the human title — Low (pre-existing) + +- **What:** With the "Dogfood test chat" session open, the header reads + "Session food-1" (last 6 chars of `sess-dogfood-1`) while Recents shows the real + title "Dogfood test chat". +- **Repro:** Open any titled session from Recents → compare header vs Recents label. +- **Why it matters:** Mild inconsistency; the title is the more useful label. The + merge didn't cause this (the `` component only receives `sessionId`, not the + title) but now that all chats route through this path it's more visible. +- **Suggested fix:** Pass the session title into ``/`ChatHeader` and prefer it + over the id suffix when present. +- **Severity:** Low. + +### 3. OpenClaw migration banner shows on every Settings tab — Low / Nice-to-have (pre-existing) + +- **What:** The "OpenClaw Installation Detected" banner renders on the Advanced tab + (and every other tab) because it is intentionally not section-gated. +- **Repro:** Settings → any tab → banner present. +- **Why it matters:** Minor visual repetition; by design from the B3 tab split. +- **Suggested fix:** Gate the banner to the General (or Connection) tab only. +- **Severity:** Nice-to-have. + +## Coverage gaps (not defects — could not be exercised in this harness) + +- **Live chat round-trip** — sending a message, approvals/diffs rendering, and a + brand-new chat appearing in Recents (the documented once-on-mount freshness gap) + all need a running Hermes gateway, which the seeded harness does not start. +- **Worktree control** — gating it ON was verified for Checkpoints; the worktree + toggle additionally requires a bound context folder, which needs the native folder + picker (not drivable headless). The gating _logic_ is shared with Checkpoints + (verified), but the worktree control itself was not visually exercised. + +## Summary + +- **Tested:** rail IA, session persistence + resume, overflow item sets across + empty/populated/dev-on states, overflow keyboard a11y, themed clear-confirm, + Developer-mode default/persist/reactive-gating, light regression. +- **Issues:** 0 Critical · 0 High · 1 Medium (checklist overlap) · 2 Low/Nice-to-have. +- **Confidence:** High for the merge's IA, the overflow menu, and the Developer-mode + behavior (all directly observed, including persistence-resume from a seeded + session). Medium overall, because gateway-dependent flows (live send, approvals, + diffs, new-chat Recents freshness, worktree-with-folder) were not exercised. +- **Recommendation:** The chat-merge + Developer-mode work is solid and ship-ready as + landed. The one actionable item from this run is the **Medium** first-run-checklist + overlap (a D7 regression, not a chat-merge defect). The two Low items are + pre-existing polish. + +> Hard stop — report only. To fix any of the above, start a new turn with +> `/remediate` (by severity/id), `bugfix`, or `diagnose`. diff --git a/docs/external-digest-dogfood-2026-06-10.md b/docs/external-digest-dogfood-2026-06-10.md new file mode 100644 index 000000000..fa88e3db4 --- /dev/null +++ b/docs/external-digest-dogfood-2026-06-10.md @@ -0,0 +1,76 @@ +# Weekly External-Sessions Digest — Dogfood Report (2026-06-10) + +**Feature under test:** the weekly digest (`3bb3eb59` and the 4 commits before it) — +a recurring, review-first job that smart-merges the period's external AI-tool +sessions into a living KB page. + +**Method:** black-box end-to-end against a **throwaway `HERMES_HOME`** + a seeded +fake Claude Code source, with the app pointed (remote mode) at a **local stub +gateway** so the only mocked piece is the external LLM. Every line of the digest +feature ran for real: backfill → `+ Weekly digest` → Run now → `runDigest` → +`listConversationsSince` → source assembly → `mergeBriefAndQueue` → pending → +Apply → page render. Harness: `scripts/digest-dogfood.mjs` (3 modes). The real +`~/.hermes` and real transcripts were never touched. + +## What I tested + +| Journey | Result | +| ------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | +| **Happy path** — enable Claude Code → `+ Weekly digest` → Run now → Apply | ✅ Digest page "External Sessions Digest" created with `## Highlights / ## Decisions / ## Sources (provenance, no URL) / ## Updates` | +| Schedule presentation | ✅ "Digest" chip + "External sessions" label + "Weekly · Mon 08:00 · today · **app-open only**" (no paired cron — correct) | +| Review-first | ✅ Run now produced a **Pending update** ("Weekly digest — 1 external session") with Apply/Dismiss; nothing written to the KB until Apply | +| **Empty period** — session 14 days old (outside the weekly window) | ✅ No pending, no page, no crash; digest short-circuits **before** the LLM call (0 synthesis calls) | +| **Gateway failure** — stub returns 500 | ✅ No pending, no crash, schedule persists; error handled in `runDigest`'s catch | + +Screenshots in `/tmp/digest-dogfood`, `/tmp/digest-empty`, `/tmp/digest-fail`. + +## Issues found + +### 1. Research-flavored copy leaks into the digest UX — **Severity: Medium** + +- **What:** The management modal is titled **"Scheduled research"**, the create row reads **"Research this topic on a schedule…"**, and digest run outcomes flash research wording — empty period → **"No new info this run"**, failure → **"Run failed"**. A digest doesn't research a topic, so this reads as the wrong feature. +- **Repro:** Create a digest via `+ Weekly digest`; the Scheduled modal opens titled "Scheduled research". Run now on an empty period → toast "No new info this run". +- **Why it matters:** User-facing clarity. A user managing a digest sees "research" everywhere and may not trust they're in the right place. Pure copy, no functional impact. +- **Suggested fix (one line):** Neutralize the modal title to "Scheduled" and branch run-outcome toasts on `item.kind` (digest → "No external sessions this period"). + +### 2. Empty-period outcome is indistinguishable from no-material-change — **Severity: Low** + +- **What:** Both "no sessions in the window" and "sessions present but nothing material to merge" surface as the same `no-change` → "No new info this run". The user can't tell whether the digest had nothing to look at vs. looked and found nothing new. +- **Repro:** Run a digest in a week with no sessions vs. re-run after an applied digest (dedupe) — identical toast. +- **Why it matters:** Minor diagnosability; a user wondering "did it even see my sessions?" gets no signal. +- **Suggested fix:** `runDigest` already returns "No external sessions this period" for the empty case — surface that summary in the toast instead of the generic `no-change` string. + +### 3. Digest pending/apply is logged as WikiLog op "research" — **Severity: Low** + +- **What:** `onApply` calls `spsAppendWikiLog("research", …)` and the digest reuses the "research" WikiLogOp (a deliberate v1 choice to avoid widening the union). The wiki evolution log therefore labels a digest commit as "research". +- **Why it matters:** Provenance accuracy in the wiki log only; no functional effect. Already documented as a v1 trade-off. +- **Suggested fix:** Add a "digest" WikiLogOp when the union is next touched. + +### 4. Digest creation + scope are under-exposed in the management modal — **Severity: Low / Nice-to-have** + +- **What:** A digest can only be _created_ from the External Sessions modal's `+ Weekly digest` (good discovery), but the Scheduled modal itself offers only the research topic input — no "new digest" there. Cadence is hardcoded weekly and the `scope` (source/project) plumbed through the types is not exposed in any UI. +- **Why it matters:** Discoverability + flexibility. Acceptable for v1 (documented), but a user who wants a daily digest or a Codex-only digest can't get one from the UI. +- **Suggested fix:** Offer cadence/scope on the `+ Weekly digest` action (or a small digest-create row in the Scheduled modal). + +### 5. Observation (not a bug): the digest skips the LLM when there are no sessions + +- In the empty-period run the stub received **0 synthesis calls** — `runDigest` returns before `mergeBriefAndQueue`. This is correct and cost-saving (no wasted gateway call on an empty week). Noted as a positive. + +## Architectural / invariant check + +- **Review-first (pending → Apply)** preserved — correct for the default `blob` storage mode where direct vault writes aren't read back. ✅ +- **Untrusted fencing** — the digest source is wrapped in `` with the never-follow-instructions preamble (verified in code + ingest test). ✅ +- **Provenance-only `## Sources` (no URLs)** — confirmed in the rendered page ("Claude Code · project: proj"). ✅ +- **App-open-only digests (no paired cron)** — confirmed in the schedule label. ✅ +- No invariant weakened; the change reuses the Scheduled-Research pending/merge path rather than adding a parallel one. + +## Summary + +- **Tested:** happy path (create → run → review → apply → page), empty-period, gateway-failure, schedule presentation, review-first gating. +- **Issues:** 0 Critical, 0 High, 1 Medium (research-flavored copy), 3 Low, 1 positive observation. +- **Confidence: High** — the full pipeline works as designed and both failure modes degrade gracefully; the only mocked component was the LLM itself. +- **Recommendation:** Ship as-is functionally; the Medium copy issue (#1) is the one worth a quick follow-up for clarity. To fix any of these, start a new turn with `/remediate` (or `bugfix` for #1). + +--- + +_Report only — no code was changed in this run._ diff --git a/docs/kb-phase2-dogfood.md b/docs/kb-phase2-dogfood.md new file mode 100644 index 000000000..047f67c96 --- /dev/null +++ b/docs/kb-phase2-dogfood.md @@ -0,0 +1,261 @@ +# KB Phase-2 Dogfood — the trigger evaluation (BACKLOG item 9) + +**Date:** 2026-06-05 · **Model under test:** xai-oauth / grok-4.3, via the live local +Hermes gateway (`127.0.0.1:8642`) · **Harness:** `scripts/kb-dogfood/` (run with +`run.sh`; reproducible). + +## TL;DR — the gate verdict inverted + +The backlog's bet (item 1) was: _most KB failures will be **depth** (the right doc is +retrieved but the model can't read far enough) → build an RLM navigate-and-read loop; +recall is secondary._ The dogfood shows the opposite of the actionable half: + +- **Depth is already solved in production.** The agentic gateway reads the _full file_ + via its file tool when the injected excerpt is insufficient — using the absolute paths + the grounding message already exposes. Proven by a controlled experiment, not inferred. + Every depth question (answer buried past the 1500-char excerpt clamp) was answered + **correctly**. +- **Recall is the residual gap** — and it is exactly the failure the file tool _cannot_ + fix, because you can't read a file retrieval never surfaced. The only two live failures + were recall misses. + +So the targeted next step is **not** a depth-oriented read loop (that exists). It is +**agentic re-search** — let the agent reformulate and re-query the vault (and follow +wikilinks) so the right doc enters the candidate set in the first place. Embeddings stay +demoted: a possible backing for that search tool, only if FTS reformulation leaves a +_measured_ recall gap. + +## Method + +A designed 5-doc security-guarding corpus (`scripts/kb-dogfood/corpus/`) — an SOP, a +guard handbook, a master service agreement, site post orders, and an incident-response +matrix — with deliberate cross-references, shared vocabulary, long sections (to bury +facts past the excerpt clamp), and planted synonym mismatches (to force recall misses). +11 questions (`questions.json`), each tagged with the failure mode it was designed to +probe and a distinctive answer fragment. + +The harness drives the **real** pipeline verbatim: `groundingTerms` → `getSpsNoteIndex() +.search(terms, 5, "any")` → `buildRetrievalSystemMessage` → `buildSpsAssistantMessages`, +then posts the exact SPS payload (`model: "hermes-agent"`, `stream: false`) to the live +gateway. A throwaway `HERMES_HOME` isolates the corpus from the real `~/.hermes`. + +- **Phase A (offline):** for each question, is the answer-bearing doc in the top-5 FTS5 + hits, and is the answer inside the 1500-char excerpt? → a mechanical recall-vs-depth + prediction. +- **Phase B (live):** send the grounded request to grok-4.3, grade the answer. + +## Results + +| Question | designed | Phase A predicted | Phase B live | doc retrieved | answer offset | in excerpt | +| ----------------- | --------- | ----------------- | ------------ | ------------- | ------------- | ---------- | +| pass-checkcall | pass | pass | ✅ correct | yes | 1196/3408 | yes | +| pass-uniform | pass | pass | ✅ correct | yes | 480/3097 | yes | +| pass-coderesponse | pass | pass | ✅ correct | yes | 981/2670 | yes | +| depth-codered | depth | depth-clamp | ✅ correct | yes | **2797**/3408 | **no** | +| depth-notice | depth | depth-clamp | ✅ correct | yes | **2717**/3097 | **no** | +| depth-creditcap | depth | depth-clamp | ✅ correct | yes | **1724**/2670 | **no** | +| depth-liability | depth | depth-clamp | ✅ correct | yes | **2360**/2670 | **no** | +| multihop-medical | multi-hop | pass | ✅ correct | yes | 725/1737 | yes | +| multihop-penalty | multi-hop | depth-clamp | ✅ correct | yes | **1572**/2670 | **no** | +| recall-holiday | recall | recall | ❌ wrong | **no** | n/a | n/a | +| recall-aed | recall | recall | ❌ wrong\* | **no** | n/a | n/a | + +**9/11 correct. 0 depth failures. 2 recall failures.** Phase A had predicted 5 depth +failures — every one was rescued live. + +\* recall-aed is a _partial_: the agent named the defibrillator by pulling it from the +incident-response-matrix (which _was_ retrieved and mentions it in passing), but lost the +location detail ("outside the management suite") that lived only in the non-retrieved post +orders. In a cross-referential corpus, some recall misses are partially masked by +incidental mentions in retrieved neighbours; doc-specific details are still lost. + +## The mechanism proof (`verify-mechanism.ts`) + +The depth-question successes had one of two explanations: (a) the agent read the full file +via the file tool, or (b) the excerpt wasn't really truncated. A single-variable +experiment settled it — hold the grounding excerpt constant, flip only the absolute path +validity: + +- Excerpt contains the buried tail (`within 5 minutes` / `flash report`): **false** + (confirmed truncated; 5 paths exposed). +- **Valid** paths → answer includes the buried tail, verbatim, citing "section 6.1". +- **Bogus** paths (`/nonexistent/missing.md`), identical excerpt → _"the specific steps + and minute-by-minute details are not provided in the excerpts."_ + +Only the path changed. **The gateway reads the full file via the file tool.** The +grounding message's instruction — "If an excerpt is insufficient, read the full file at +its absolute path with the file tool" — is _live behaviour_, not a dead string. The +`spsAssistant` client is one-shot (`stream:false`), but the **server** (the Hermes agent) +runs a tool loop before returning the final completion. + +This is why item 1's premise — "the SPS path does not run a tool loop, so depth needs +building" — is wrong about the system even though it's right about the client. + +## What this does and does not establish (caveats — read before acting) + +- **Model-dependent.** Proven for grok-4.3, which reliably escalates to a file read. A + weaker configured model (e.g. a small local Ollama) may not call the tool, which would + reintroduce the depth gap. The capability rides on the model choosing to use the tool. +- **Reliability at scale unmeasured.** 5/5 depth successes + one causal proof is enough to + show the mechanism _exists and fires_, not that it's robust across a large vault or + never wanders/loops (the backlog's reliability counter-case stands, untested here). +- **Recall base-rate is not 2/11 in the wild.** Both recall misses used _deliberately_ + disjoint vocabulary ("vacation/yearly" vs _holiday/annum_; "life-saving device" vs + _defibrillator_). Natural phrasing usually shares enough keywords to retrieve the doc. + The finding is the failure **type** that survives the file tool, not its frequency. +- **Latency not quantified.** Depth answers visibly took longer (extra tool round-trips). + For an interactive co-author this is the real cost of the existing depth mechanism — + worth measuring before leaning on it harder. +- **Local mode only.** Grounding is gated `!isRemoteMode()`, and the file tool reads + local paths — consistent: in the only mode that grounds, the paths are readable. + Remote/SSH grounding (backlog item 3) is a separate, still-open transport problem. + +## Recommendation for item 1 + +1. **Do not build a depth-oriented RLM read loop.** It already exists (agentic gateway + + file tool + absolute paths in grounding) and worked on every depth probe. Building it + would be re-implementing a shipped capability. +2. **Attack recall instead.** The residual failure is "right doc never retrieved." Cheapest + first moves, in order: (a) measure whether query _reformulation_ by the agent closes it + — i.e. give the agent a `vault_search` tool it can re-call with rephrased terms (this is + the "navigate" half of RLM and also handles multi-hop _discovery_ of non-retrieved, + wikilinked docs); (b) only if a measured recall gap remains after reformulation, add + local embeddings as one more search tool the agent can call. +3. **Harden the existing depth path** for weaker models / scale before relying on it: + confirm the file-tool escalation fires on the models users actually run, and quantify + the latency it adds. + +The corpus, harness, question bank, and mechanism test are committed under +`scripts/kb-dogfood/` and are re-runnable against any gateway. + +--- + +## Addendum — real-PDF run (2026-06-05) + +The synthetic run used hand-written 3 KB markdown. To stress the depth mechanism at +real-document scale and through the **real ingestion path**, the harness was re-run over +three real PDFs via the product's own `extractPdfToMarkdown` (new step: +`scripts/kb-dogfood/ingest-pdfs.ts`): Coase, _The Nature of the Firm_ (20 pp, 57 KB text), +Google, _Anatomy of a Personal Health Agent_ (148 pp, **355 KB text**), and a Buffett +article. 9 questions, graded live against the same gateway (grok-4.3), then **source-verified +by hand**. + +**Headline: depth holds at scale. 9/9 substantively correct, 0 depth failures, 0 +hallucinations.** The agentic file-read recovered exact figures from deep inside the 355 KB +file — e.g. the DE-agent differential-diagnosis "top-1 accuracy of 46.1% vs DDx 41.4%" at +offset ~253k, and the precise effort breakdown "559 + 561 = 1,120 hours" (deeper and _more_ +precise than the abstract's rounded "1,100"). Two answers initially looked wrong but were +the model reading **deeper, more-specific real data** than the anchor fragment — both +confirmed present in the source (lines 158–159, 784–786). Several answers explicitly cited +`[full file: …]`. So the file-tool depth mechanism does not just read the start of a long +file — it locates specific facts ~100 pages in, without confabulating. + +**Three new findings the synthetic run could not surface:** + +1. **An ingestion-quality bug (not OCR).** The Buffett PDF has a custom font with no + ToUnicode cmap, so `extractPdfToMarkdown` produced **garbage text** (`!" #$%#& "'()…`) — + yet `hasUsableTextLayer` returned **true** (it only counts non-space chars), so the + garbage would silently enter the KB and ground answers on nonsense. This is distinct from + the scanned-PDF/OCR case (item 2): a text layer that _exists_ but is unmappable. The + detector needs a real "is this text intelligible" check (e.g. dictionary-word ratio), not + a char count. In this run the garbage was inert (its tokens never matched English queries, + so it was never retrieved) — but on a vault where it _did_ rank, it would poison grounding. + **Recommend a new backlog item.** + +2. **Recall is not testable below the retrieval cap.** With only 3 docs (< `GROUNDING_HITS` + = 5), every on-topic query retrieves the relevant doc; both deliberately-synonym recall + probes still retrieved their gold doc. Confirming the recall gap (the residual failure + from the synthetic run) requires a corpus **larger than 5 docs**. The real test of item + 1's recommended direction (agentic re-search) still needs a bigger corpus. + +3. **Answer specificity on figure-dense docs.** When a long doc reports many similar numbers + (the Google paper has dozens of accuracy figures), an under-specified question + ("what accuracy did the DE agent achieve?") gets _an_ correct figure, not necessarily + _the_ one the user meant. Not a grounding failure — a UX consideration for the co-author + on quantitative source material. + +Net: the depth-already-solved conclusion **strengthens** — it survives a 355 KB / 148-page +document with deep, figure-dense content and no hallucination. The open work is unchanged: +attack **recall** (needs a >5-doc corpus to even measure), harden ingestion (the garbage-text +bug), and quantify latency. Real-run corpus/questions/results were kept out of the repo; only +the reusable `ingest-pdfs.ts` step is committed. + +--- + +## Addendum — recall experiment (2026-06-05): cheapest fix measured + +The recall direction from item 1 was finally **measured**, not assumed. Harness: +`scripts/kb-dogfood/recall-experiment.ts` over an 8-doc corpus +(`scripts/kb-dogfood/recall-corpus/`, > the top-5 retrieval cap) with two engineered +**keyword-recall misses** — the gold doc shares no salient term with the question, so it +falls outside the top-5 and is never handed to the agent (RM-holiday: "vacation/yearly" +vs the handbook's "holiday/annum"; RM-keys: "safe/access code" vs the policy's +"cabinet/combination") — plus two controls. Two arms, 5 trials each, live (grok-4.3): + +- **baseline** — current grounding (top-5 excerpts + paths). +- **vault-nav** — the cheapest possible fix: append one paragraph naming the vault + **directory** so the agent can list/read other files to discover the missed doc with its + existing file tools. (Kept entirely in the harness — production grounding is unchanged.) + +| question | baseline | vault-nav | +| ------------- | -------- | ----------------- | +| RM-holiday | 0/5 | 5/5 (100%) | +| RM-keys | 0/5 | 3/5 (60%) | +| controls (×2) | 5/5 | 5/5 (no breakage) | + +**Findings:** + +1. **The recall gap is real and the file tool cannot close it.** Baseline 0% on both misses: + with the gold doc absent from the top-5, no path is handed over, and (unlike depth) the + agent has nothing to read. This is the residual failure the depth mechanism leaves behind. +2. **The cheapest fix helps a lot — but is stochastic.** vault-nav lifts recall 0 → 80% mean. + But the **agentic gateway is non-deterministic**: across runs the agent navigated the vault + on an obvious reformulation (vacation→holiday, 100%) and only _sometimes_ on a harder one + (safe→cabinet, 60%) — and an earlier single-shot run closed **0/2**. (This itself is a + methodology lesson: n=1 per arm flipped between "ship it" and "useless" on consecutive + runs; agent behaviour must be measured as a **rate**.) +3. **Controls are unaffected** — the hint never broke an easy retrieval. + +**Conclusion / next step.** vault-nav is a cheap, strictly-positive, but **unreliable** +mitigation — it can't be the primary fix because you can't promise a co-author it will find a +synonym-phrased fact. The **reliable** lever is **app-side query expansion**: broaden the FTS +query (synonyms / term variants) so the gold doc enters the candidate set and its path is +handed over — which the agent reads ~deterministically (controls 100%, and the proven depth +mechanism). That sits squarely in this repo's "app selects candidates" role, needs no upstream +change, and is testable with this same harness. **Embeddings remain unjustified** until FTS +query expansion is shown to leave a residual synonym gap. The vault-nav hint may later be added +as a cheap belt-and-braces, but only on top of the deterministic fix, not instead of it. + +### Query expansion — built & measured (2026-06-05) + +Implemented the reliable fix: `buildRetrievalSystemMessage` now asks the model for a few +**synonym-rephrased keyword queries** (`expandQueryVariants` → `parseQueryVariants`), searches +each, and **fuses the ranked lists by reciprocal rank** (`fuseRankings`) so a doc surfaced by +any variant enters the top-K and its path is handed to the agent. Best-effort and bounded by a +12 s timeout — any failure (no gateway, timeout) degrades to the original-query behaviour. +Re-measuring with the same harness (`recall-experiment.ts`, now `expand=false` vs `expand=true`, +5 trials/arm): + +| question | no-expansion | query-expansion | +| ------------------------------------------------------------- | ------------ | ----------------- | +| RM-holiday (vacation→holiday, clean synonym) | 0/5 | 4/5 (80%) | +| RM-keys (safe/access-code→cabinet/combination, semantic leap) | 0/5 | 1/5 (20%) | +| controls (×2) | 5/5 | 5/5 (no breakage) | + +**It reliably closes clean synonym misses** (0 → 80%; the answers cite the gold doc the original +query missed) and **leaves a residual on hard semantic gaps** (RM-keys: "safe" → "key cabinet" is +a concept leap, not a synonym — keyword expansion can't reliably bridge it). That residual is the +**measured justification for embeddings** as the gated next tier — exactly the gate's condition. +Controls are unaffected. **Shipped on** (default, within the already-opt-in grounding path). + +Honest costs / limits, recorded: + +- **+1 model call per grounded question** (serial, before retrieval). We accept it because + recall misses are indistinguishable from successful retrieval (a miss returns a full set of + _wrong_ docs), so there's no cheap "expand only when needed" signal — documented as a future + optimisation hook. +- **Expansion quality is itself stochastic** (the variant model may not produce the bridging + vocabulary), so this is a strong-mitigation, not a guarantee — embeddings are the path to a + semantic (non-keyword) guarantee. +- Pure logic (`parseQueryVariants`, `fuseRankings`) is unit-tested in + `tests/workspace-grounding.test.ts`; end-to-end efficacy is the harness measurement above. diff --git a/docs/research-reach.md b/docs/research-reach.md new file mode 100644 index 000000000..6ed041f60 --- /dev/null +++ b/docs/research-reach.md @@ -0,0 +1,48 @@ +# Research Reach + +Research Reach lets SPS detect local open-source source tools and use them to broaden My Assistant's coverage for research and learning. + +It can help with: + +- Web pages and RSS +- Public webpage extraction through optional Crawl4AI CLI setup +- GitHub repositories, issues, and profiles +- YouTube metadata and transcripts +- Reddit, Twitter/X, and other social sources when a working login-backed backend is configured + +Research Reach is not a production scraping system. Platform access can break, rate-limit, or require login state. SPS always treats fetched web content as untrusted and refuses to save research briefs that do not include real sources. + +## Sources Flow + +Open the SPS RSS reader and use **Sources** for source intake. + +- Find: public Substack discovery stays in Substack Radar. +- Add URL: public Substack and RSS URLs are resolved through feed discovery; other public HTTPS pages use Crawl4AI when it is installed and healthy. +- Review: extracted source text is previewed before it is saved to the Knowledge Base. + +Crawl4AI is optional. SPS does not bundle it, silently install it, start its Docker API server, import cookies, reuse a logged-in browser profile, configure proxies, or enable hooks. If Crawl4AI is unavailable, generic public URLs fall back to the existing safe link preview path. + +## Setup + +Open Settings -> Application Health -> Research Reach. + +Use: + +- Check status: inspect available channels. +- Show setup: see safe install commands. +- Run safe setup: ask Agent-Reach what is needed without making system changes. +- Import skill: let My Assistant learn Agent-Reach routing commands after review. + +Optional Crawl4AI setup: + +```bash +pipx install crawl4ai +crawl4ai-setup +crawl4ai-doctor +``` + +SPS does not silently import cookies, install global packages, or enable MCP servers. + +## Attribution Note + +Crawl4AI's license file includes Apache 2.0 terms plus an additional attribution requirement for distributions, publications, public uses, derivative works, websites, and command-line tools. Before bundling Crawl4AI or advertising it as an included dependency, add the required NOTICE/About/Credits attribution and run a legal/compliance review. diff --git a/docs/substack-radar.md b/docs/substack-radar.md new file mode 100644 index 000000000..f1ddf93e4 --- /dev/null +++ b/docs/substack-radar.md @@ -0,0 +1,59 @@ +# Substack Radar + +Substack Radar helps Hermes find public Substack publications from categories or keywords, then turns user-approved publications into normal RSS subscriptions. + +## Product Flow + +1. The user enters one or more categories or keywords, such as `AI agents`, `markets`, or `longevity`. +2. Hermes opens controlled browser discovery pages and looks for public, visible Substack publication cards. +3. Hermes shows candidate publications with visible page signals, source page URLs, scores, and discovery timestamps. +4. The user approves or rejects each candidate. Nothing is added to RSS feeds until the user approves it. +5. Approved candidates are validated through public Substack RSS discovery. +6. Validated publications are added to the existing RSS feed list. +7. Ongoing post ingestion uses RSS sync. Hermes does not keep re-scraping browser discovery pages for posts after a source is added. + +Browser discovery is a discovery aid, not the durable content ingestion path. RSS remains the source used for ongoing post sync. + +The unified SPS **Sources** flow can also preview a selected public Substack URL. It still tries public RSS first. If a public feed is not available and optional Crawl4AI extraction is installed, Hermes may use Crawl4AI to enrich that one public URL for review and Knowledge Base saving. This does not change the durable sync path: subscribed Substack sources remain RSS feeds. + +## Safety Boundary + +Substack Radar is limited to public Substack discovery. + +- No Twitter/X, Reddit, Facebook, or other social platforms are included in this feature. +- No saved browser profile cookies, login credentials, private posts, subscriber-only posts, paywalled content, or account automation are used. Browser automation runs in an isolated public-page session. +- Browser automation is public-page discovery only. +- Crawl4AI enrichment, when available, is public HTTPS URL extraction only. +- Discovery results are heuristic. They come from visible page text and signals, so they can miss relevant sources, duplicate sources, or score a source imperfectly. +- Approval is explicit. A discovered candidate does not become a feed until the user approves it and RSS discovery validates it. + +## Data And Storage + +Discovery runs are stored as profile-local JSON: + +```txt +/sps-agent/substack-radar/discovery-runs.json +``` + +The discovery run file stores candidates, visible signals, scores, timestamps, source URLs, and user review decisions. It is separate from subscribed RSS feeds. + +Subscribed feeds live in the existing `rss_feeds` table. Feed URL uniqueness means approving a duplicate Substack resolves to the existing feed instead of creating a second subscription. + +Candidate statuses: + +- `new`: discovered and waiting for user review. +- `approved`: approved by the user, but not yet added to RSS feeds. +- `rejected`: rejected by the user. +- `added`: validated through RSS discovery and added to RSS feeds, or resolved to an existing matching feed. + +## Operational Notes + +Focused implementation and test coverage lives around these areas: + +- Shared scoring and candidate helpers: `src/shared/substack-radar.ts` and `src/shared/substack-radar.test.ts`. +- Browser extraction helpers: `src/main/substack-radar-browser.ts` and `src/main/substack-radar-browser.test.ts`. +- Main IPC and storage flow: `src/main/ipc/substack-radar.ts`. +- Preload API parity: `src/preload/bridges/substack-radar.ts`, `src/preload/index.d.ts`, and `tests/preload-api-surface.test.ts`. +- Renderer review panel: `src/renderer/src/screens/SpsAgent/research/SubstackRadarPanel.tsx` and its focused renderer test. + +Tests do not require live Substack network access. Browser extraction tests use static HTML and helper functions so the discovery parser can be validated deterministically. diff --git a/docs/superpowers/plans/2026-06-02-desktop-notion-affordance-closeout.md b/docs/superpowers/plans/2026-06-02-desktop-notion-affordance-closeout.md new file mode 100644 index 000000000..6272361d0 --- /dev/null +++ b/docs/superpowers/plans/2026-06-02-desktop-notion-affordance-closeout.md @@ -0,0 +1,96 @@ +# Desktop Notion Affordance Closeout Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Finish the remaining Notion-style workspace affordance gaps with tested renderer interactions on top of the existing local-first workspace APIs. + +**Architecture:** Keep the already-added page graph, database, template, synced-block, search, history, and proposal APIs. Add focused renderer components and small main-process extensions only where the UI needs durable state, especially hunk-level proposal actions and comments/reminders. Avoid Hermes Agent engine changes. + +**Tech Stack:** Electron, React, TypeScript, TipTap, YAML, Vitest, Testing Library. + +--- + +## Files + +- Modify `src/renderer/src/screens/Workspace/WorkspaceEditor.tsx`: use block IDs, block controls, inline menus. +- Create `src/renderer/src/screens/Workspace/BlockHandleBar.tsx`: duplicate/delete/turn/color/move controls for Markdown blocks. +- Create `src/renderer/src/screens/Workspace/PageMentionMenu.tsx`: searchable `@` and `[[` insertion menu. +- Modify `src/renderer/src/screens/Workspace/DatabaseBlock.tsx`: database search, settings panel, row open actions. +- Create `src/renderer/src/screens/Workspace/DatabaseSettingsPanel.tsx`: filter/sort/group/property visibility/open mode controls. +- Create `src/renderer/src/screens/Workspace/DatabaseRowPeek.tsx`: side peek row page editor. +- Modify `src/main/workspace.ts`: hunk-level proposal accept/reject helpers. +- Modify `src/main/index.ts`, `src/preload/index.ts`, `src/preload/index.d.ts`: hunk action IPC/preload surface. +- Modify `src/renderer/src/screens/Workspace/AgentReviewPanel.tsx`: per-hunk accept/reject/apply-all controls. +- Create `src/main/workspace-comments.ts`: comments and reminders store. +- Create `src/renderer/src/screens/Workspace/WorkspaceCommentsPanel.tsx`: comments/reminders UI. +- Create `src/renderer/src/screens/Workspace/WorkspaceSyncedBlocksPanel.tsx`: synced block creation/list/unsync UI. +- Modify `src/renderer/src/screens/Workspace/PageCreateDialog.tsx`: template picker. +- Create `src/renderer/src/screens/Workspace/WorkspaceOfflinePanel.tsx`: local status panel. +- Modify `src/renderer/src/screens/Workspace/CommandPalette.tsx`: filters, command execution, open in tab/window actions. +- Add tests beside components plus `tests/workspace-comments.test.ts` and proposal hunk tests. + +## Tasks + +### Task 1: Block Controls And Inline Menus + +- [x] Write tests for `BlockHandleBar` duplicate/delete/turn/color/move callbacks. +- [x] Write tests for `PageMentionMenu` filtering and insert callbacks. +- [x] Implement `BlockHandleBar.tsx`. +- [x] Implement `PageMentionMenu.tsx`. +- [x] Integrate both into `WorkspaceEditor.tsx`. +- [x] Validate with `npm test -- src/renderer/src/screens/Workspace/BlockHandleBar.test.tsx src/renderer/src/screens/Workspace/PageMentionMenu.test.tsx tests/workspace-blocks.test.ts`. + +### Task 2: Database Settings And Row Peek + +- [x] Write renderer tests for database search, filtered display, row side peek, row page edit, and open mode controls. +- [x] Implement `DatabaseSettingsPanel.tsx`. +- [x] Implement `DatabaseRowPeek.tsx`. +- [x] Update `DatabaseBlock.tsx` to show settings/search/row open buttons. +- [x] Validate with `npm test -- src/renderer/src/screens/Workspace/DatabaseBlock.test.tsx tests/workspace-database.test.ts`. + +### Task 3: Hunk-Level Agent Review + +- [x] Extend proposal tests for `acceptAgentWorkspaceProposalHunk` and `rejectAgentWorkspaceProposalHunk`. +- [x] Implement hunk-level helpers in `src/main/workspace.ts`. +- [x] Wire IPC/preload/types. +- [x] Update `AgentReviewPanel.tsx` with hunk accept/reject/apply-all. +- [x] Validate with `npm test -- tests/workspace-meta.test.ts tests/ipc-handlers.test.ts tests/preload-api-surface.test.ts`. + +### Task 4: Templates, Synced Blocks, Comments, Reminders, Status + +- [x] Add comments/reminders main-process tests. +- [x] Implement `workspace-comments.ts` and IPC/preload/types. +- [x] Add template picker to page creation. +- [x] Add `WorkspaceSyncedBlocksPanel.tsx`. +- [x] Add `WorkspaceCommentsPanel.tsx`. +- [x] Add `WorkspaceOfflinePanel.tsx`. +- [x] Validate with component tests plus `npm test -- tests/workspace-templates.test.ts tests/workspace-synced-blocks.test.ts tests/workspace-comments.test.ts`. + +### Task 5: Command Palette Actions + +- [x] Write tests for command result execution, search scopes, copy link, and open-in-tab/window callbacks. +- [x] Add filter controls and action buttons to `CommandPalette.tsx`. +- [x] Preserve keyboard navigation. +- [x] Validate with `npm test -- src/renderer/src/screens/Workspace/CommandPalette.test.tsx`. + +### Task 6: Final Validation + +- [x] Run `npm test`. +- [x] Run `npm run typecheck`. +- [x] Run `npm run lint`. +- [x] Run `npm run build`. +- [x] Commit all closeout slices. + +## Acceptance Criteria + +- Every previously listed gap has at least one implemented UI surface or durable state API. +- New behavior is covered by focused tests. +- Existing workspace APIs remain backward compatible. +- Full validation passes, with only existing lint warnings if any. + +## Deliberate Simplifications + +- “True Notion parity” is interpreted as local-first functional affordances, not pixel-perfect Notion behavior. +- Block drag/reorder is implemented through explicit move controls unless a full drag implementation is already low-risk. +- Calendar/timeline remain local renderers based on date fields; no external calendar sync. +- Comments/reminders are local workspace metadata, not system notifications or automations. diff --git a/docs/superpowers/plans/2026-06-02-desktop-notion-affordances.md b/docs/superpowers/plans/2026-06-02-desktop-notion-affordances.md new file mode 100644 index 000000000..cace71039 --- /dev/null +++ b/docs/superpowers/plans/2026-06-02-desktop-notion-affordances.md @@ -0,0 +1,927 @@ +# Desktop Notion Affordances Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Upgrade the Electron desktop workspace from a local Markdown/database foundation into a Notion-style, agent-first workspace with real page, block, database, search, history, and agent review affordances. + +**Architecture:** Keep the app local-first. Main-process workspace modules own filesystem state, metadata, history, page graph, templates, proposals, and database migrations. Renderer components own interaction affordances and call preload APIs only; Hermes Agent engine changes are out of scope except for consuming local workspace context and reviewable proposals. + +**Tech Stack:** Electron, React, TypeScript, TipTap, YAML, chokidar, Vitest, Testing Library, existing preload IPC boundary. + +--- + +## Scope And Constraints + +- This plan is for `/Users/amar/Desktop/MyCode/fathah_hermes`, the Electron desktop app that connects to Hermes Agent. +- Do not add Notion API sync, cloud collaboration, remote SSH sync, or production Hermes Agent engine changes. +- Preserve existing file APIs for backward compatibility. +- Keep local workspace files under the resolved Hermes profile workspace. +- Implement small, verified vertical slices. Commit after each task. +- Reference basis: Notion docs for sidebar/subpages, slash commands, search, database views/properties, version history, offline pages, synced blocks, buttons/automations, comments/reminders, suggested edits, and Notion Agent. + +## File Structure + +- Modify `src/main/workspace.ts`: keep path safety and file operations; delegate page graph, history, proposals, templates, synced blocks, and database-specific work to focused modules. +- Create `src/main/workspace-page-graph.ts`: page IDs, parent/child order, backlinks, aliases, recents, favorites, trash, sidebar collapsed state. +- Create `src/main/workspace-history.ts`: snapshots, restore, version metadata, content diff summaries. +- Create `src/main/workspace-proposals.ts`: block-level agent proposal queue and accept/reject/apply-all operations. +- Create `src/main/workspace-templates.ts`: page templates, database row templates, button/workflow templates. +- Create `src/main/workspace-synced-blocks.ts`: synced block registry, references, update propagation. +- Create `src/main/workspace-database.ts`: YAML database migration, validation, view settings, filters, sorts, row pages. +- Modify `src/main/index.ts`: register new IPC handlers. +- Modify `src/preload/index.ts` and `src/preload/index.d.ts`: expose typed APIs. +- Modify `src/renderer/src/screens/Workspace/Workspace.tsx`: orchestrate page state, tabs/history, agent review, templates, offline/status panels. +- Modify `src/renderer/src/screens/Workspace/WorkspaceEditor.tsx`: replace static snippets with block-aware commands and menus. +- Create `src/renderer/src/screens/Workspace/blockExtensions.ts`: TipTap extensions for block IDs, handles, slash commands, synced blocks, links, comments, and suggestions. +- Create `src/renderer/src/screens/Workspace/BlockCommandMenu.tsx`: `/`, inline `+`, turn-into, color, duplicate/delete/move commands. +- Create `src/renderer/src/screens/Workspace/PageMentionMenu.tsx`: `@` and `[[` page/person/date/reminder/link insertion. +- Modify `src/renderer/src/screens/Workspace/WorkspaceTree.tsx`: polished page tree, subpage creation, reorder, collapsible sections, resize/collapse. +- Modify `src/renderer/src/screens/Workspace/WorkspaceHeader.tsx`: breadcrumbs, page menu, template, history, offline, publish/export actions. +- Modify `src/renderer/src/screens/Workspace/CommandPalette.tsx`: ranked search, filters, quick actions, command execution, open tab/window. +- Modify `src/renderer/src/screens/Workspace/DatabaseBlock.tsx`: render true table/board/list/gallery/calendar/timeline and settings. +- Create `src/renderer/src/screens/Workspace/DatabaseSettingsPanel.tsx`: view settings, filters, sorts, grouping, property visibility, open mode. +- Create `src/renderer/src/screens/Workspace/DatabaseRowPeek.tsx`: side/center/full row page editing. +- Create `src/renderer/src/screens/Workspace/AgentSuggestedEditsPanel.tsx`: block-level suggested edits, comments, accept/reject. +- Create `src/renderer/src/screens/Workspace/WorkspaceActivityPanel.tsx`: activity/provenance timeline. +- Create `src/renderer/src/screens/Workspace/WorkspaceTemplatesPanel.tsx`: page/database template picker. +- Create or update tests under `tests/` and renderer tests under the existing renderer test location. + +--- + +## Task 1: Strengthen Workspace Metadata Into A Page Graph + +**Files:** + +- Create: `src/main/workspace-page-graph.ts` +- Modify: `src/main/workspace.ts` +- Modify: `src/main/index.ts` +- Modify: `src/preload/index.ts` +- Modify: `src/preload/index.d.ts` +- Test: `tests/workspace-page-graph.test.ts` +- Test: `tests/ipc-handlers.test.ts` +- Test: `tests/preload-api-surface.test.ts` + +- [ ] **Step 1: Write failing page graph tests** + +Cover migration from current `.workspace-meta.json`, stable page IDs, parent-child order, favorites, trash, recent visits, backlinks, collapsed sidebar sections, path safety, duplicate-with-children, and move/reorder. + +```ts +expect(graph.pages["docs/prd.md"].id).toMatch(/^page_/); +expect(graph.pages["docs/prd.md"].parentPath).toBe("docs"); +expect(graph.childOrder["docs"]).toEqual(["docs/prd.md", "docs/spec.md"]); +expect(graph.backlinks["docs/spec.md"]).toContain("docs/prd.md"); +``` + +- [ ] **Step 2: Run tests to verify failure** + +Run: `npm test -- tests/workspace-page-graph.test.ts` + +Expected: FAIL because `workspace-page-graph.ts` does not exist. + +- [ ] **Step 3: Implement page graph module** + +Define these exported types: + +```ts +export interface WorkspacePageGraph { + version: 2; + pages: Record; + rootOrder: string[]; + childOrder: Record; + favorites: string[]; + recentVisits: Array<{ path: string; visitedAt: number }>; + backlinks: Record; + sidebar: { + collapsedSections: string[]; + width: number; + collapsed: boolean; + }; +} +``` + +Add functions: + +```ts +loadPageGraph(root: string): Promise +savePageGraph(root: string, graph: WorkspacePageGraph): Promise +syncPageGraph(root: string): Promise +recordVisit(root: string, path: string): Promise +movePage(root: string, path: string, parentPath: string | null, beforePath?: string): Promise +duplicatePageTree(root: string, path: string): Promise +extractBacklinks(content: string): string[] +``` + +- [ ] **Step 4: Wire IPC and preload** + +Add APIs: + +```ts +getWorkspacePageGraph(profile?: string): Promise; +updateWorkspacePageOrder(path: string, parentPath: string | null, beforePath?: string, profile?: string): Promise; +updateWorkspaceSidebarState(state: Partial, profile?: string): Promise; +getWorkspaceBacklinks(path: string, profile?: string): Promise; +``` + +- [ ] **Step 5: Run validation** + +Run: + +```bash +npm test -- tests/workspace-page-graph.test.ts tests/ipc-handlers.test.ts tests/preload-api-surface.test.ts +npm run typecheck +``` + +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add src/main/workspace-page-graph.ts src/main/workspace.ts src/main/index.ts src/preload/index.ts src/preload/index.d.ts tests/workspace-page-graph.test.ts tests/ipc-handlers.test.ts tests/preload-api-surface.test.ts +git commit -m "feat: add workspace page graph" +``` + +--- + +## Task 2: Build Real Sidebar And Page Management Affordances + +**Files:** + +- Modify: `src/renderer/src/screens/Workspace/WorkspaceTree.tsx` +- Modify: `src/renderer/src/screens/Workspace/Workspace.tsx` +- Modify: `src/renderer/src/screens/Workspace/WorkspaceHeader.tsx` +- Create: `src/renderer/src/screens/Workspace/PageCreateDialog.tsx` +- Create: `src/renderer/src/screens/Workspace/PageMenu.tsx` +- Test: renderer tests for sidebar operations + +- [ ] **Step 1: Write failing renderer tests** + +Cover create without `window.prompt`, inline rename, duplicate, trash/restore, favorite, drag reorder before/after, drag nest/un-nest, collapsed section persistence, sidebar resize, breadcrumb ancestor navigation. + +- [ ] **Step 2: Replace prompt-based flows** + +Remove `window.prompt` usage from page create/rename handlers in `Workspace.tsx`. Use `PageCreateDialog` and inline rename state in `WorkspaceTree`. + +- [ ] **Step 3: Implement sidebar sections** + +Render: + +```txt +Favorites +Recent +Workspace +Trash +Agent Control Center +``` + +Each section must support collapse/expand, keyboard focus, and empty state. + +- [ ] **Step 4: Implement page menu** + +Actions: + +```txt +Rename +Duplicate +Copy link +Favorite / Unfavorite +Move to +Turn into synced block source +Export +Move to trash +``` + +- [ ] **Step 5: Implement drag reorder** + +Use current backend `movePage` support. Add drop targets for before, after, and inside page. Preserve child order in metadata. + +- [ ] **Step 6: Validate** + +Run: + +```bash +npm test -- --run WorkspaceTree +npm run typecheck +npm run lint +``` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +```bash +git add src/renderer/src/screens/Workspace +git commit -m "feat: improve workspace page management" +``` + +--- + +## Task 3: Replace Static Markdown Snippets With Block Controls + +**Files:** + +- Modify: `src/renderer/src/screens/Workspace/WorkspaceEditor.tsx` +- Create: `src/renderer/src/screens/Workspace/blockExtensions.ts` +- Create: `src/renderer/src/screens/Workspace/BlockCommandMenu.tsx` +- Create: `src/renderer/src/screens/Workspace/PageMentionMenu.tsx` +- Test: renderer editor tests + +- [ ] **Step 1: Write failing editor tests** + +Cover block IDs, slash command insertion, inline `+`, keyboard navigation, block duplicate/delete, turn-into heading/todo/toggle/callout/code/quote/divider/database, block color, page link search, and Markdown serialization. + +- [ ] **Step 2: Add block ID extension** + +Every top-level block gets a stable `data-block-id`. Markdown serialization preserves IDs in a low-noise compatible form, preferably YAML frontmatter or HTML comments only when needed. + +- [ ] **Step 3: Add block handle UI** + +On hover/focus, show drag handle and `+` button. Handle menu includes: + +```txt +Turn into +Duplicate +Delete +Move up +Move down +Copy link to block +Color +Ask Hermes about this block +Suggest edit with Hermes +``` + +- [ ] **Step 4: Add slash command menu** + +Commands: + +```txt +/page +/todo +/toggle +/callout +/quote +/code +/divider +/image +/file +/database +/button +/synced +/template +/color +/turn +``` + +The menu must be searchable, keyboard-navigable, and dismissible with Escape. + +- [ ] **Step 5: Add `@` and `[[` menus** + +`@` inserts page, date, reminder, or agent mention. `[[` inserts page links and creates backlinks through the page graph. + +- [ ] **Step 6: Validate** + +Run: + +```bash +npm test -- --run WorkspaceEditor +npm run typecheck +npm run lint +``` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +```bash +git add src/renderer/src/screens/Workspace/WorkspaceEditor.tsx src/renderer/src/screens/Workspace/blockExtensions.ts src/renderer/src/screens/Workspace/BlockCommandMenu.tsx src/renderer/src/screens/Workspace/PageMentionMenu.tsx +git commit -m "feat: add workspace block controls" +``` + +--- + +## Task 4: Add Templates, Buttons, And Local Agent Workflow Blocks + +**Files:** + +- Create: `src/main/workspace-templates.ts` +- Modify: `src/main/index.ts` +- Modify: `src/preload/index.ts` +- Modify: `src/preload/index.d.ts` +- Create: `src/renderer/src/screens/Workspace/WorkspaceTemplatesPanel.tsx` +- Modify: `src/renderer/src/screens/Workspace/BlockCommandMenu.tsx` +- Test: `tests/workspace-templates.test.ts` + +- [ ] **Step 1: Write failing template tests** + +Cover built-in templates, custom templates, database row templates, recurring template metadata, and path safety. + +- [ ] **Step 2: Implement template storage** + +Store templates under workspace internal metadata, not as visible user pages: + +```ts +export interface WorkspaceTemplate { + id: string; + kind: "page" | "database-row" | "button"; + title: string; + content: string; + properties?: Record; + createdAt: number; + updatedAt: number; +} +``` + +- [ ] **Step 3: Add page template picker** + +On new page, show templates: + +```txt +Blank +PRD +Meeting notes +Bug report +Research note +Sprint plan +Agent runbook +Decision log +``` + +- [ ] **Step 4: Add button blocks** + +Button block schema: + +```yaml +hermesType: button +label: Summarize this page +actions: + - type: agentPrompt + prompt: Summarize this page and extract action items. +``` + +Renderer should execute local actions only after user click. + +- [ ] **Step 5: Validate** + +Run: + +```bash +npm test -- tests/workspace-templates.test.ts +npm run typecheck +``` + +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add src/main/workspace-templates.ts src/main/index.ts src/preload/index.ts src/preload/index.d.ts src/renderer/src/screens/Workspace tests/workspace-templates.test.ts +git commit -m "feat: add workspace templates and buttons" +``` + +--- + +## Task 5: Upgrade Database Model And View Settings + +**Files:** + +- Create: `src/main/workspace-database.ts` +- Modify: `src/renderer/src/screens/Workspace/database.ts` +- Modify: `src/renderer/src/screens/Workspace/DatabaseBlock.tsx` +- Create: `src/renderer/src/screens/Workspace/DatabaseSettingsPanel.tsx` +- Test: `tests/workspace-database.test.ts` + +- [ ] **Step 1: Write failing database tests** + +Cover migration from current YAML, stable row IDs, filter/sort/group/subgroup, hidden properties, open mode, property editing, malformed YAML recovery, and stringify preserving row pages. + +- [ ] **Step 2: Define versioned schema** + +```ts +export interface WorkspaceDatabase { + hermesType: "database"; + version: 2; + id: string; + title: string; + properties: Record; + views: WorkspaceDatabaseView[]; + items: WorkspaceDatabaseItem[]; + rowPages: Record; + templates: WorkspaceDatabaseTemplate[]; +} +``` + +Property types: + +```txt +title, text, number, select, multi_select, status, date, checkbox, url, email, phone, relation, rollup, formula, files, button, unique_id +``` + +- [ ] **Step 3: Implement view settings** + +Each view supports: + +```ts +filters: WorkspaceDatabaseFilterGroup; +sorts: Array<{ property: string; direction: "asc" | "desc" }>; +groupBy?: string; +subGroupBy?: string; +hiddenProperties: string[]; +openMode: "side" | "center" | "full"; +conditionalColors: WorkspaceConditionalColor[]; +``` + +- [ ] **Step 4: Validate** + +Run: + +```bash +npm test -- tests/workspace-database.test.ts +npm run typecheck +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/main/workspace-database.ts src/renderer/src/screens/Workspace/database.ts src/renderer/src/screens/Workspace/DatabaseBlock.tsx src/renderer/src/screens/Workspace/DatabaseSettingsPanel.tsx tests/workspace-database.test.ts +git commit -m "feat: upgrade workspace database model" +``` + +--- + +## Task 6: Build True Database Views And Row Pages + +**Files:** + +- Modify: `src/renderer/src/screens/Workspace/DatabaseBlock.tsx` +- Create: `src/renderer/src/screens/Workspace/DatabaseRowPeek.tsx` +- Create: `src/renderer/src/screens/Workspace/databaseViews.tsx` +- Test: renderer database tests + +- [ ] **Step 1: Write failing renderer tests** + +Cover table, board, list, gallery, calendar, timeline, database search, filter application, sort application, property visibility, row side peek, center peek, full-page open, row template creation. + +- [ ] **Step 2: Implement real table view** + +Features: sticky first column, editable typed cells, add column, hide column, freeze column, property menu. + +- [ ] **Step 3: Implement real board view** + +Features: grouped columns, card drag between groups, empty group, typed select/status updates. + +- [ ] **Step 4: Implement real calendar/timeline** + +Calendar requires date property. Timeline requires date or date range. If missing, show a user-facing empty/error state with a one-click “Add date property.” + +- [ ] **Step 5: Implement row peek** + +Side peek remains interactive with database visible behind it. Center peek opens modal. Full page opens row page as workspace page-like view. + +- [ ] **Step 6: Validate** + +Run: + +```bash +npm test -- --run DatabaseBlock +npm run typecheck +npm run lint +``` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +```bash +git add src/renderer/src/screens/Workspace/DatabaseBlock.tsx src/renderer/src/screens/Workspace/DatabaseRowPeek.tsx src/renderer/src/screens/Workspace/databaseViews.tsx +git commit -m "feat: add rich database views" +``` + +--- + +## Task 7: Add Synced Blocks, Backlinks, Comments, And Reminders + +**Files:** + +- Create: `src/main/workspace-synced-blocks.ts` +- Modify: `src/main/workspace-page-graph.ts` +- Modify: `src/renderer/src/screens/Workspace/blockExtensions.ts` +- Create: `src/renderer/src/screens/Workspace/WorkspaceCommentsPanel.tsx` +- Test: `tests/workspace-synced-blocks.test.ts` + +- [ ] **Step 1: Write failing tests** + +Cover creating synced block source, pasting synced reference, editing source updates references, unsync single, unsync all, backlink extraction, comments anchored to block IDs, reminders parsed from `@remind`. + +- [ ] **Step 2: Implement synced block registry** + +```ts +export interface WorkspaceSyncedBlock { + id: string; + sourcePath: string; + sourceBlockId: string; + content: string; + references: Array<{ path: string; blockId: string }>; + updatedAt: number; +} +``` + +- [ ] **Step 3: Implement comments and reminders metadata** + +```ts +export interface WorkspaceComment { + id: string; + path: string; + blockId?: string; + body: string; + status: "open" | "resolved"; + createdAt: number; +} +``` + +Reminders are local metadata only. They should not create external automations. + +- [ ] **Step 4: Validate** + +Run: + +```bash +npm test -- tests/workspace-synced-blocks.test.ts +npm run typecheck +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/main/workspace-synced-blocks.ts src/main/workspace-page-graph.ts src/renderer/src/screens/Workspace/blockExtensions.ts src/renderer/src/screens/Workspace/WorkspaceCommentsPanel.tsx tests/workspace-synced-blocks.test.ts +git commit -m "feat: add synced blocks and comments" +``` + +--- + +## Task 8: Upgrade Search, Command Palette, Tabs, And Quick Actions + +**Files:** + +- Modify: `src/main/workspace.ts` +- Create: `src/main/workspace-search.ts` +- Modify: `src/renderer/src/screens/Workspace/CommandPalette.tsx` +- Modify: `src/renderer/src/screens/Workspace/Workspace.tsx` +- Test: `tests/workspace-search.test.ts` +- Test: renderer command palette tests + +- [ ] **Step 1: Write failing search tests** + +Cover recent pages before typing, favorites boost, exact phrase matching, workspace/session/admin/command filters, database row hits, backlinks, comments, and snippets. + +- [ ] **Step 2: Implement ranking** + +Ranking order: + +```txt +exact title match +favorite exact match +recent exact match +title prefix +backlink/page mention +database row title/property +body snippet +session result +admin/command result +``` + +- [ ] **Step 3: Implement command palette actions** + +Actions: + +```txt +Open +Open in new tab +Open in new window +Copy link +Reveal in sidebar +Run command +Create page +Search only pages +Search only sessions +Search only databases +``` + +- [ ] **Step 4: Validate** + +Run: + +```bash +npm test -- tests/workspace-search.test.ts +npm test -- --run CommandPalette +npm run typecheck +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add src/main/workspace-search.ts src/main/workspace.ts src/renderer/src/screens/Workspace/CommandPalette.tsx src/renderer/src/screens/Workspace/Workspace.tsx tests/workspace-search.test.ts +git commit -m "feat: improve workspace search and commands" +``` + +--- + +## Task 9: Implement Agent Suggested Edits And Activity Provenance + +**Files:** + +- Create: `src/main/workspace-proposals.ts` +- Modify: `src/main/workspace.ts` +- Modify: `src/main/index.ts` +- Modify: `src/preload/index.ts` +- Modify: `src/preload/index.d.ts` +- Create: `src/renderer/src/screens/Workspace/AgentSuggestedEditsPanel.tsx` +- Create: `src/renderer/src/screens/Workspace/WorkspaceActivityPanel.tsx` +- Modify: `src/renderer/src/screens/Workspace/AgentReviewPanel.tsx` +- Test: `tests/workspace-proposals.test.ts` + +- [ ] **Step 1: Write failing proposal tests** + +Cover external write becomes proposal, block-level diff, accept one block, reject one block, apply all, dirty user edit never overwritten, activity entries created, restore after rejected proposal. + +- [ ] **Step 2: Define proposal model** + +```ts +export interface AgentWorkspaceProposal { + id: string; + path: string; + baseContent: string; + proposedContent: string; + hunks: AgentWorkspaceProposalHunk[]; + createdAt: number; + status: "pending" | "accepted" | "rejected"; +} + +export interface AgentWorkspaceProposalHunk { + id: string; + blockId?: string; + before: string; + after: string; + status: "pending" | "accepted" | "rejected"; +} +``` + +- [ ] **Step 3: Add suggested edit UI** + +Render side rail with per-hunk diff, comment box, accept, reject, apply all, reject all, restore original. + +- [ ] **Step 4: Add activity panel** + +Activity entries: + +```txt +page created +page renamed +user saved +agent proposed edit +agent edit accepted +agent edit rejected +version restored +database row changed +synced block updated +``` + +- [ ] **Step 5: Validate** + +Run: + +```bash +npm test -- tests/workspace-proposals.test.ts +npm test -- --run AgentSuggestedEditsPanel +npm run typecheck +``` + +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add src/main/workspace-proposals.ts src/main/workspace.ts src/main/index.ts src/preload/index.ts src/preload/index.d.ts src/renderer/src/screens/Workspace tests/workspace-proposals.test.ts +git commit -m "feat: add agent suggested edits" +``` + +--- + +## Task 10: Improve History, Offline Status, Recovery, Export + +**Files:** + +- Create: `src/main/workspace-history.ts` +- Modify: `src/main/workspace.ts` +- Modify: `src/renderer/src/screens/Workspace/WorkspaceHeader.tsx` +- Modify: `src/renderer/src/screens/Workspace/Workspace.tsx` +- Create: `src/renderer/src/screens/Workspace/WorkspaceHistoryPanel.tsx` +- Create: `src/renderer/src/screens/Workspace/WorkspaceOfflinePanel.tsx` +- Test: `tests/workspace-history.test.ts` + +- [ ] **Step 1: Write failing history tests** + +Cover snapshot before user save, page operation, database edit, proposal apply, restore version, diff summary, partial block recovery metadata, trash restore with history, export Markdown/HTML bundle. + +- [ ] **Step 2: Implement history module** + +```ts +export interface WorkspaceHistoryEntry { + id: string; + pageId: string; + path: string; + createdAt: number; + reason: + | "user-save" + | "page-operation" + | "database-edit" + | "agent-proposal" + | "restore"; + content: string; + summary: Array<{ kind: "added" | "removed" | "changed"; text: string }>; +} +``` + +- [ ] **Step 3: Implement history UI** + +Show version list, changed highlights, restore full version, copy block from previous version, and activity linkage. + +- [ ] **Step 4: Implement offline/status UI** + +Because storage is local, status should mean: + +```txt +Local workspace ready +File watcher active +Unsaved edits +Conflict pending +Agent proposal pending +Last saved timestamp +``` + +Favorites and recents should be marked “available locally” by default. + +- [ ] **Step 5: Implement export** + +Export current page or workspace as: + +```txt +Markdown bundle +HTML bundle +YAML database CSV files +``` + +- [ ] **Step 6: Validate** + +Run: + +```bash +npm test -- tests/workspace-history.test.ts +npm run typecheck +npm run lint +npm run build +``` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +```bash +git add src/main/workspace-history.ts src/main/workspace.ts src/renderer/src/screens/Workspace tests/workspace-history.test.ts +git commit -m "feat: add workspace history and recovery" +``` + +--- + +## Task 11: Visual Polish, Accessibility, And Desktop Smoke + +**Files:** + +- Modify: `src/renderer/src/screens/Workspace/Workspace.css` or the existing workspace stylesheet +- Modify: relevant Workspace components +- Test: renderer accessibility tests if present + +- [ ] **Step 1: Audit states** + +Check every workspace surface for: + +```txt +loading +empty +error +disabled +dirty +conflict +pending proposal +keyboard focus +narrow width +dark theme +light theme +``` + +- [ ] **Step 2: Fix nested interactive controls** + +Replace nested clickable spans inside buttons in tabs and command results with accessible sibling buttons or menu actions. + +- [ ] **Step 3: Verify responsive desktop layouts** + +Check: + +```txt +split mode +canvas-only mode +chat-only mode +sidebar collapsed +sidebar resized +database side peek +command palette +history panel +agent suggested edits panel +``` + +- [ ] **Step 4: Run full validation** + +Run: + +```bash +npm test +npm run typecheck +npm run lint +npm run build +``` + +Expected: PASS. Existing lint warnings may remain only if already present and unrelated. + +- [ ] **Step 5: Manual dev smoke** + +Run: `npm run dev` + +Verify: + +```txt +Create/rename/move/trash/restore page +Use slash menu and block handles +Create page link and see backlink +Create database and switch views +Filter/sort/search database +Open row side peek +Trigger agent proposal and accept/reject a hunk +Restore previous version +Use command palette quick actions +Switch light/dark themes +Resize/collapse sidebar +``` + +- [ ] **Step 6: Commit** + +```bash +git add src/renderer/src/screens/Workspace +git commit -m "chore: polish workspace accessibility" +``` + +--- + +## Acceptance Criteria + +- Page tree supports polished create, rename, duplicate, favorite, trash, restore, drag nesting, drag reorder, recents, favorites, breadcrumbs, sidebar resize/collapse, and section persistence. +- Editor supports block IDs, block handles, drag/reorder, duplicate/delete, turn-into, color, slash commands, inline add, page links, backlinks, comments, reminders, synced blocks, templates, and local action buttons. +- Databases support versioned YAML migration, typed properties, filters, sorts, grouping, hidden properties, property editing, table/board/list/gallery/calendar/timeline, row pages, side/center/full open modes, row templates, database search, and malformed YAML recovery. +- Command palette supports recents, ranking, exact phrase search, filters, keyboard navigation, copy link, open in tab/window, reveal in sidebar, command execution, admin/app commands, sessions, pages, and database rows. +- Agent review supports block-level suggested edits, accept/reject/apply all, activity provenance, conflict-safe behavior, and undo/history integration. +- History supports snapshots before meaningful writes, restore version, visible change summaries, partial block recovery, and trash restore with history. +- Offline/status UI accurately describes local workspace state, file watcher state, unsaved edits, conflicts, proposals, and last save time. +- Validation passes: + +```bash +npm test +npm run typecheck +npm run lint +npm run build +``` + +## Deliberately Out Of Scope + +- Notion API sync. +- Cloud multi-user collaboration. +- Remote SSH workspace sync. +- Changing Hermes Agent engine internals beyond existing chat/context/proposal integration. +- Production permissions and sharing controls. +- Browser/mobile Notion parity. + +## Execution Recommendation + +Implement in this order: + +1. Page graph and sidebar. +2. Block editor controls. +3. Database model and views. +4. Search/navigation. +5. Agent suggested edits and activity. +6. History/offline/export. +7. Accessibility and visual polish. + +This order keeps each milestone useful on its own and avoids building UI controls before the main-process state model can support them. diff --git a/docs/superpowers/plans/2026-06-10-homebase-transformation.md b/docs/superpowers/plans/2026-06-10-homebase-transformation.md new file mode 100644 index 000000000..105999f5d --- /dev/null +++ b/docs/superpowers/plans/2026-06-10-homebase-transformation.md @@ -0,0 +1,428 @@ +# Hermes Desktop → "The Home Base": Phased Transformation Plan + +> Owner-approved plan (2026-06-10). This is the canonical reference for the +> multi-phase transformation. Live progress + gotchas are tracked in project +> memory (`homebase-transformation.md`), which points here. When executing an +> item, **reality-check its premise against current `main` first** — the audit +> already found several stale premises; more will appear. + +## Context + +**Vision (owner's words):** "A home from where I can undertake all of my work — a +unifier of all my scattered conversations and code with various LLMs, notes etc. +Simple yet profound and transformative." + +**Shape of the plan:** harden the platform → ruthlessly simplify → widen the intake +(all LLM conversations) → make one search hit everything → close the capture loop. +Not "add more features." The documented #1 killer of second-brain tools is friction + +- feature sprawl + the system doing nothing with your inputs. + +**Owner decisions (locked):** Ruthless consolidation · Stability first · New sources: +ChatGPT, Claude.ai, Grok (x.ai), Gemini (Takeout), Perplexity-if-possible exports + +Telegram quick-capture (WhatsApp via "forward to Telegram bot" — no WhatsApp API). + +**Executor:** Claude Opus 4.8. One worktree-isolated, single-purpose branch per +numbered item (`git worktree add … && bash scripts/setup-worktree.sh` — never symlink +node_modules). Sizing: S < ½ day, M = ½–2 days, L = 2–5 days. + +--- + +## Non-negotiable operating constraints (every item obeys these) + +1. **Reality-check gate on every item.** Before executing, verify the item's premise + against current `main` with file/commit citations. Already-done or inverted ⇒ close + the item with a note, never build it. +2. **Data-inventory + deprecate-before-delete.** Every Phase 2 deletion starts with a + short committed inventory: what the surface reads/writes on disk, which IPC handlers + mutate state, which background processes write through it. No surface is deleted while + a background writer still depends on it — port the management UI first. Deletions hide + behind the existing Developer-mode flag for a one-week owner trial before the removal + commit. IPC/file formats outlive their UI by one release. +3. **Phase 2 is serialized** (one worktree at a time — all deletions converge on + Layout.tsx, preload index.ts/index.d.ts, ipc/\*, i18n trees). Parallel worktrees + allowed only in Phase 3 (disjoint adapter modules). A deletion commit removes renderer + callsites + preload method + index.d.ts entry + main handler + i18n keys in one commit, + verified by grepping the IPC channel name to zero. +4. **All ingestion is worker-threaded, single-writer, idempotent, redaction-proven.** + Imports parse in a worker_thread with progress/cancel; DB writes only via `applyFragments` + (`src/main/external-context/db.ts` — structural invariant); double-import ⇒ identical row + counts; seeded-secret-inside-export-fixture never reaches messages/messages_fts + (verify:external-context extended); save-chat-to-KB and Telegram captures pass + `src/main/redactor.ts` before any vault write and keep untrusted fencing. Test fixtures + use structurally invalid fake keys (GitHub push-protection blocked real-looking ones before). +5. **Workspace write-safety (1.5) lands before any new background writer.** Serialized write + queue + generation check + rolling backups is a prerequisite gate for every Phase 3+ + feature that writes to the workspace. + +**Standard verification gate (every PR):** `npm run typecheck` (both projects) → +`npx eslint ` → `npx vitest run` → `npm run verify:note-index` → `npm run build`. +Plus per-phase: `node scripts/sps-smoke.mjs`, `npm run verify:external-context`, +`node scripts/external-context-smoke.mjs` where relevant. Preload changes always land in +BOTH `src/preload/index.ts` and `src/preload/index.d.ts` (parity test enforces). Run the +gate — don't attest it. + +--- + +## Phase 0 — Baseline (S, do first) ✅ DONE + +- `git push` main to origin. Fetch/rebase per the standing integration rule if rejected. +- Capture baseline: green test counts, lint warning count, sps-smoke pass. +- **Result:** main was already pushed (stale premise #1). Baseline @ 62331cde: typecheck + clean, vitest 1628 pass, lint 63err/230warn (pre-existing prettier noise). + +--- + +## Phase 1 — Stability (the platform stops eating data and hanging) + +Pure hardening, zero UX change, independently shippable. + +### 1.1 Gateway supervision: permanent health loop + auto-recovery (M) ✅ DONE (64a9254d) + +**Where:** `src/main/hermes/gateway-process.ts`, `hermes.ts`/`sse-parser.ts`, `chat-orchestrator.ts`. +**What:** Replace the self-cancelling poll with a permanent 30s supervisor (local mode only, +while a profile's gateway is started): re-run the HTTP health probe; on 3 consecutive failures +kill + `startGateway()` with exponential backoff (max 3 attempts, then a persistent visible +"gateway down" state — never silent-restart during an open interactive stream). Emit +`gateway-health-changed` to the renderer (new preload listener). Add a stream-stall timeout: +no bytes for 120s ⇒ abort. +**Accept:** kill -9 the gateway mid-session ⇒ detected ≤60s, restarted, chat works without app +restart; crash-looping gateway surfaces "down" after 3 attempts; stalled stream errors out. No +restart in remote/SSH. +**Outcome:** Done as pure state machine (`gateway-supervisor.ts`, 9 vitest cases) + effects. +Stream-stall half was ALREADY satisfied (req.setTimeout socket-inactivity timer) — not rebuilt. + +### 1.2 Scheduler locks: PID-liveness + TTL + visible skip counter (M) ✅ DONE (e90d3c29) + +**Where:** `src/main/scheduler.ts` (lock at ~line 186: `/tmp/hermes-routine-.lock`, +bare existsSync — a crash mid-job kills that job forever silently). +**What:** Locks move to `/locks/.lock` as JSON `{pid, startedAt}`. Acquisition +steals if PID dead (`process.kill(pid,0)` throws) or `startedAt` exceeds job timeout (default +15 min). Wrap job runs in a timeout race that releases the lock. Every lock-skip increments a +counter; a job that hasn't run within 2× its cadence surfaces a warning in the scheduled-research UI. +**Accept:** vitest units on extracted pure stale-lock logic; dead-PID lock self-heals; never-resolving +job reaped; skip counter visible. +**Outcome:** Done. Pure `scheduler-lock.ts` (9 cases) + reap timer + skip telemetry via +`get-scheduler-skips` IPC. Skip-counter UI surfacing deferred to 2.2. + +### 1.3 IPC error envelope + last-resort logging (M, may split into 2–3 PRs) ⏳ NEXT + +**Where:** new `src/main/ipc/safe-handle.ts`; mechanical adoption across `src/main/ipc/*.ts` +(16 files) and handlers still in `src/main/index.ts` (~234 sites). +**What:** `safeHandle(channel, fn)` wraps `ipcMain.handle`: catch → structured log +`{channel, message, stack}` (via 1.6 logger — `log.error("ipc", {...})`) → redact via +`src/main/redactor.ts` → rethrow a clean serializable Error. **Do not change return shapes** +(renderer hooks depend on current contracts). Add `process.on('unhandledRejection'|'uncaughtException')` +loggers in `index.ts`. + +```ts +export function safeHandle(channel: string, fn: Handler): void { + ipcMain.handle(channel, async (event, ...args) => { + try { + return await fn(event, ...args); + } catch (err) { + const message = redactText( + err instanceof Error ? err.message : String(err), + ); + log.error("ipc", { + channel, + message, + stack: err instanceof Error ? err.stack : undefined, + }); + throw new Error(message); + } + }); +} +``` + +**Accept:** grep shows zero raw `ipcMain.handle(` outside safe-handle.ts; a deliberately-throwing +handler yields a structured log line + clean renderer rejection. +**Note:** Watch `registerDualHandler` (ipc/utility.ts) + `event.sender.send` patterns. 1.6's logger +is live, so the dependency is satisfied. + +### 1.4 SSH auth-key cache lifecycle (S) ✅ DONE (0478307b) + +`clearSshRemoteApiKey()` (clears key + apiServerAvailable=null) on connection-mode change, new SSH +target, tunnel teardown. vitest unit mocks ../config. + +### 1.5 Workspace write-safety (M/L) — prerequisite for all Phase 3+ writers ⏳ AFTER 1.3 + +**Where:** `src/main/sps-agent.ts` (spsSave ~1086 — already atomic via safeWriteFileAsync, but +swallows every error into `return false`, whole-blob last-write-wins, backups only at migration), +`ipc/sps.ts`, renderer save path (`screens/SpsAgent/lib/persistence.ts`, store). +**What:** (a) Save IPC returns `{ok, error?, bytes}`; persistent save-failure toast + status indicator +in the SPS shell. (b) Single serialized write queue in main with a workspace generation/version number: +a save whose base generation is stale triggers reload-merge, never blind overwrite. (c) Rolling backups +`workspace.json.bak-` (first save per session + every 50 saves, prune to 5), reusing the +`spsBackupWorkspace` convention. (d) One-time advisory when the blob exceeds 25 MB pointing at vault +migration. Background writers prefer the file-first vault path over blob rewrites where possible. +**Accept:** simulated EACCES ⇒ visible warning; two writers with interleaved stale bases ⇒ no lost +pages (test via extracted pure merge logic); backups rotate; sps-smoke green. + +### 1.6 Structured logging + log rotation (M) ✅ DONE (624c488c) + +`src/main/log.ts`: dependency-free JSON-lines logger → `/logs/desktop.log` (5MB, keep 3); +pure `formatLogLine`/`shouldRotate` (electron-free, 5 vitest cases); `rotateGatewayStderrIfLarge` +(10MB, keep 2) at gateway start; adopted in gateway supervisor + scheduler. + +### 1.7 Note-index rebuild event + vault-mirror failure surfacing (S) ✅ DONE (f13e8419) [rebuild half] + +`sps-index-rebuilt {profile, status}` broadcast + preload `onSpsIndexRebuilt` + `useIndexRebuildVersion()` +into all 4 useNoteIndex hooks → search/graph/backlinks refetch on rebuild. **Mirror-write-failure COUNT +half deferred to 2.6** (which owns the Storage settings surface that displays it). + +**Phase 1 gate:** standard + sps-smoke + verify:external-context + manual kill-the-gateway recovery check. + +--- + +## Phase 2 — Ruthless consolidation (serialized; data-inventory + deprecation flow per item) + +**End-state:** the SPS workspace IS the app; the admin overlay is connectivity only (Providers, +Models, Gateway, Settings/Diagnostics); one chat system (SPS Chat) + the page co-author (a distinct +editor capability, NOT a chat). + +### 2.1 Port Personalization into SPS You, then delete the admin screen (M) — highest-risk, inventory first + +**Inventory (verified):** `src/main/personalization.ts` edits `~/.hermes/agent-hooks/focus.md` — +injected into every chat turn via a `pre_llm_call` hook in config.yaml — plus the hook consent allowlist +(shell-hooks-allowlist.json) and USER.md/MEMORY.md. +**What:** SPS You surface (`screens/SpsAgent/you/YouSurface.tsx`) already covers USER.md/rules/focus. +Port the missing pieces: hook enable/disable + allowlist management UI into You (existing IPC kept). +Then deprecate the admin Personalization screen behind Developer mode for a week; delete after sign-off. +Without the port, the owner gets a permanently stale "Current focus:" string poisoning every conversation +with no UI to fix it. +**Accept:** every capability of the admin screen demonstrably reachable in You before the delete commit; +focus-hook editable + disableable from SPS. + +### 2.2 Port Schedules management into the SPS Scheduled modal, then delete the Schedules screen (M) + +**Inventory:** the main-process scheduler keeps firing headless (digests, scheduled research, Telegram +sends) regardless of UI. Deleting the management UI without a replacement = app keeps doing invisible +things the owner can't see or stop. +**What:** The SPS Scheduled modal already manages research/digest schedules. Add: an "all scheduled jobs" +view (including cron jobs via existing cronjobs.ts IPC), last-run/next-run/skip-counter (from 1.2), +pause/delete per job. Then deprecate + delete `screens/Schedules/`. Keep `src/main/cronjobs.ts` and +scheduler IPC. +**Accept:** every running job visible + stoppable from SPS; skip warnings surface here. + +### 2.3 SPS Chat absorbs session history; delete admin Chat + Sessions screens (L) + +**Inventory:** `ipc/sessions.ts` owns search/resume/title-edit (titles in `/desktop/sessions.json`) +and delete-session (deletes rows in the agent's `state.db`). SPS ChatSurface already wraps the same `` +component (chat merge shipped 2026-06-09) — this is history parity, not a chat merge. +**What:** Add a History affordance inside SPS ChatSurface backed by the same hooks (list/search/resume/ +rename/delete). Then deprecate + delete `screens/Chat/` and `screens/Sessions/` + Layout wiring + i18n. +Keep all ipc/sessions.ts / ipc/chat.ts handlers. **The doc co-author (`assistant/AgentBody.tsx`) is +explicitly out of scope** — keep its changeset flow and MED-2 key-scrub untouched. +**Accept:** session list/search/resume/rename/delete all work from SPS; old nav gone; +`scripts/verify-admin-overlay.mjs` updated. + +### 2.4 Delete Kanban + remaining admin screens with relocations (L, can split) + +**Kanban inventory:** boards live in the Python agent (`hermes kanban` CLI) and the agent can create tasks +— the screen is the only oversight window over agent-created work. Default = add a read-only "Agent tasks" +view in SPS via the existing kanban IPC read path, then delete the screen + write-path UI. Inventory decides. +**Other deletions** (each with inventory + grep-to-zero): `screens/Agents/`, `screens/Skills/` (active-skills +toggles move into Workspace Settings 2.6; keep ipc/skills.ts), `screens/Memory/` (read/manage pane added to +You; keep ipc/memory.ts), `screens/Tools/`, `screens/CapabilityReview/` (port its credential/filesystem +capability summary as a card in admin Settings → Diagnostics — we're adding Telegram inbound in Phase 5; +keep the security oversight), `screens/Insights/`, `screens/Soul/`. Main-side modules survive wherever another +consumer exists (e.g., tools.ts feeds chat tool execution). IPC handlers whose only consumer was a deleted +screen are deleted with their preload bridge + d.ts entries in the same commit. +**Accept:** admin overlay shows exactly 4 tabs; parity test green; channel-name greps to zero; i18n keys pruned. + +### 2.5 Remove SPS sidebar stubs (S) + +`sidebar/SidebarStubs.tsx` + Meetings/Shared/Apps sections in Sidebar.tsx (~267/284/343) deleted. Smoke green. + +### 2.6 One Workspace Settings surface (M/L) + +Evolve `tweaks/TweaksPanel.tsx` into the single SPS settings surface: Storage (mode/parity/migrate + +mirror-failure count from 1.7 + backup status from 1.5), Inbox/curator settings as real form fields +(replacing raw-JSON editing in InboxSurface — same persisted shape), active-skills toggles, capture +settings (placeholder for 5.1). Command-palette storage toggle now opens this surface. +**Accept:** no raw-JSON settings editing remains; curator settings round-trip; one discoverable entry point. + +### 2.7 Modal chrome unification (M) + +Extract one `SpsModal` shell (header/close/esc/backdrop/footer slots) in scoped SPS styles (add classes to +`screens/SpsAgent/styles/` and re-run `node scripts/scope-sps-css.mjs` — never hand-edit scoped output); +convert Research/Scheduled/ExternalSessions/Templates/Trash/TaskDrawer mechanically. +**Accept:** identical behavior; smoke screenshots. + +### 2.8 First-run seeded workspace + guided first loop (M) + +Onboarding ends by seeding a starter workspace ("Start here" page wiki-linked to Tasks + an Inbox explainer, +demonstrating wikilinks/graph/Ask) instead of a blank tree; dismissible 3-step checklist (capture → ingest → +search). `dev:fresh` proves it. + +### 2.9 Discoverability pass (S) + +⌘K entries + shortcut hints + tooltips for Save-to-wiki, Ask pane, Vault health, Telos audit. + +**Phase 2 gate per item:** standard + sps-smoke + verify-admin-overlay + parity test + channel grep-to-zero. +Serialized. + +--- + +## Phase 3 — External conversation imports (the unifier's intake) + +**Architecture (one decision for all items):** imports reuse the existing scan pipeline. Import flow copies +the export payload to `/external-context-imports//-` (content-hash +path ⇒ idempotent re-import); each source is a normal `SourceAdapter` +(`src/main/external-context/adapters/types.ts` — pure node, no electron/sqlite, vitest-testable) whose +`roots()` returns the import dir, `strategy: "replace"` (db.ts already implements DELETE-then-INSERT per +conversation). Parsing of large files runs in a worker_thread with progress IPC + cancel. Unparseable +conversations are quarantined + counted in the UI, never silently dropped. All writes via `applyFragments`. +Adapters can run in parallel worktrees (disjoint files). + +### 3.1 Source-type plumbing (M) + +Extend `ExternalSource` union in `src/shared/external-context.ts` (~line 11) with +`"chatgpt" | "claude-ai" | "grok-export" | "gemini-takeout"`; labels, default-OFF config, exhaustiveness via +typecheck; `importRootFor(source)` helper + copy-with-hash util. Existing four sources untouched +(verify:external-context green). + +### 3.2 ChatGPT export adapter (M) + +New `adapters/chatgpt.ts` + fixtures. `conversations.json` is a mapping node graph: walk from `current_node` +via `parent` for the canonical branch; map roles, epoch-seconds timestamps, content.parts; skip tool/system +nodes; seq = branch index. Schema-tolerant (formats drift): unknown shapes ⇒ quarantine, never throw. +**Accept:** branched-mapping fixture test; seeded-secret-in-fixture redaction assertion in verify:external-context. + +### 3.3 Claude.ai + Grok export adapters (M) + +`adapters/claude-ai.ts`: export conversations.json is linear (`{uuid, name, chat_messages: [{sender, text|content[], created_at}]}`). +`adapters/grok-export.ts` (named to avoid the existing live grok.ts): pin the real export shape from an actual +export at execution time; encode in fixtures; tolerant parsing. + +### 3.4 Gemini Takeout adapter (M) + +`adapters/gemini-takeout.ts`: Takeout MyActivity.json (JSON format only — the import UI documents "choose JSON +in Takeout"); group records into pseudo-conversations by >30 min time-gap (Takeout has no conversation ids); +provenance label "Gemini (Takeout)". + +### 3.5 Perplexity: descoped to paste-capture (S, timeboxed ½ day) + +Premortem finding: Perplexity has no official export — an adapter is fiction. Timeboxed feasibility note in +`docs/superpowers/specs/`; supported path is manual paste → Inbox capture (Phase 5 territory). Go/no-go +documented; no adapter built absent a real artifact. + +### 3.6 Import IPC + drop-zone UI (M) + +New `external-context-import-file` handler beside scan/rebuild in `ipc/external-context.ts`: accepts +`{source, filePath}`; ZIP extraction via adm-zip (main-process-safe, small) feeding the worker_thread parser; +copies payload to import root; runs the standard scan; returns counts; progress events. UI: Import tab/drop-zone +in ExternalSessionsModal with per-source export instructions. +**Accept:** real ChatGPT ZIP end-to-end ⇒ searchable, fenced; same ZIP twice ⇒ identical row counts; UI never +freezes on a large ZIP; external-context-smoke extended. + +**Phase 3 gate:** standard + verify:external-context (extended) + external-context-smoke. Dogfood doc per source +— each imported corpus must show up in digests/search (closed loop, not a museum). + +--- + +## Phase 4 — Unified search + chat↔KB linkage (the unifier's payoff) + +### 4.1 Federated search IPC (M) + +New `src/main/federated-search.ts` + `ipc/search.ts`: fan out to the three existing seams — note-index FTS +(`getSpsNoteIndex(profile).search`), sessions (`sessions.ts searchSessions` ~225), external-context FTS — +normalize to `{kind: page|session|external, id, title, snippet, ts, provenance}`, interleave with +recency-boosted rank. **ABI constraint:** merger/ranker are pure functions in a vitest-tested module; the +index-opening integration is proven by a new `verify:federated-search` electron-node script (pattern: +scripts/verify-note-index). +**Accept:** seeded profile returns merged three-corpus results; ranker unit-tested; parity test green. + +### 4.2 Federated search in ⌘K (M) + +CommandPalette queries 4.1 debounced; provenance chips (page / chat / ChatGPT / Claude Code …); Enter routes: +page → editor, session → SPS Chat resume, external → fenced viewer (external hits always render through the +existing untrusted-fence components); scope prefixes `pages:` `chats:` `external:`. +**Accept:** smoke — a term seeded in all three corpora returns three provenance-distinct hits; keyboard-only flow. + +### 4.3 Save-chat-to-KB (M) + +"Save to KB" on a chat session: page built from the transcript (frontmatter `source: chat-session, sessionId`; +body = cleaned transcript or LLM summary, user choice), committed through the same changeset path as ingest +(correct in both storage modes, auto-indexed/mirrored). **Passes redactor.ts before any vault write** (transcripts +contain pasted keys; vault markdown is permanent + indexed). Page links back via sessionId chip. +**Accept:** page in vault + index + graph; parity round-trip; redaction regression test. + +### 4.4 Related-pages suggestions in chat (M, optional) + +After a turn completes, pure matcher over turn text vs note-index titles/wikilink targets ⇒ "Related pages" +chips. Suggestions only — never auto-injected into the prompt (same invariant as external context). + +**Phase 4 gate:** standard + verify:note-index + verify:federated-search + sps-smoke palette scenario + +verify:external-context. + +--- + +## Phase 5 — Live capture + live streaming (close the loop) + +### 5.1 Telegram quick-capture → SPS Inbox (L) + +**Where:** new `src/main/telegram-capture.ts`; settings in Workspace Settings (2.6); writes via existing Inbox +conventions (`sps-ingest.ts`: `vault/_inbox/`, RawCapture shape); reuses +`getTelegramTarget`/`getTelegramAvailability` (scheduled-research.ts ~92–145, platforms config). +**Constraint discovered:** the gateway's Hermes agent already long-polls the bot when the Telegram platform is +enabled — a second `getUpdates` consumer on the same token conflicts. **Default mode: dedicated capture bot** +(second token configured in settings); main process long-polls with persisted offset; accepts messages only +from the paired chat id; rate/size-capped; each message lands as a raw capture in `_inbox/` (source: "telegram"), +redacted + untrusted-fenced, never auto-injected; "Captured ✓" ack. WhatsApp path = forward to this bot +(documented; no WhatsApp code). +**Accept:** message → Inbox within poll interval; foreign chat ids ignored; poller survives network loss/app +restart; ingest turns captures into pages as today. + +### 5.2 WS5 step 1 — streaming spike (S, timeboxed 1 day) + +Per `docs/superpowers/specs/ws5-streaming-spike.md`: throwaway `scripts/probe-*` against +`/api/sessions/{id}/chat/stream`; answer the open question (does `tool.completed` carry result +bodies/attachments?); written go/no-go. + +### 5.3 WS5 steps 2–4 — live streaming transport, retire the post-stream state.db merge (L, gated on 5.2) + +Feature-detect `/api/sessions/*` via `/v1/capabilities` (remote/SSH may lack it — keep OpenAI-compat fallback); +map `assistant.delta`/`tool.*` onto the existing ChatMessage union behind the SAME IPC events; correlate by +message_id; keep state.db for history load only; golden parity check (recorded turn renders identically +old-vs-new) before deleting the `onChatDone → getSessionMessages` merge; 1.1's stall timeout applies to the +new path. Net negative LOC expected. +**Accept:** live reasoning/tool rows during a turn; automatic fallback on incapable gateways; merge code deleted. + +**Phase 5 gate:** standard + sps-smoke + manual capture-bot end-to-end + golden parity transcript (5.3). + +--- + +## Explicitly NOT doing (scope discipline — the "museum" antidote) + +- No Perplexity adapter (no export exists), no WhatsApp API, no Cursor indexing (owner deselected). +- No multiplayer/Yjs, no published/shared pages, no mobile. +- No merging the doc co-author into Chat (distinct capability; key-scrub is load-bearing). +- No embeddings/vector search (query expansion shipped; embeddings stay gated on measured recall failure per BACKLOG.md). +- No new cockpit widgets, no Equity changes (it works; leave it). +- **A unifier feature the owner hasn't touched during its trial window gets reverted, not kept.** + +## Dependency graph + +P0 → P1 (1.5 gates all later writers) → P2 (serialized) → P3 (parallel-safe) → P4 → P5. +5.2/5.3 are droppable without affecting anything else. Each phase independently shippable; stop-anywhere is safe. + +## End-to-end verification of the whole transformation + +1. Full mechanical gate green at every merge (typecheck×2, lint, vitest, verify:note-index, verify:external-context, + build, smoke harnesses). +2. Manual resilience drill: kill gateway mid-chat; crash app mid-save; dead-PID scheduler lock — all recover visibly. +3. Import drill: real ChatGPT + Claude.ai + Gemini exports in; ⌘K returns provenance-mixed results; digests + reference imported corpora. +4. Capture drill: Telegram message → Inbox → ingest → page → findable in ⌘K (the full unifier loop in one pass). +5. Owner dogfood doc per phase, with the trial-window revert rule applied. + +--- + +## Status (as of 2026-06-10, origin/main @ a3e2e278) + +- **Done & merged:** P0; P1.1 (64a9254d); P1.2 (e90d3c29); P1.4 (0478307b); P1.6 (624c488c); + P1.7 rebuild-event half (f13e8419); lint-zero cleanup (a3e2e278). +- **Next:** P1.3 (IPC error envelope) → P1.5 (workspace write-safety) → then Phase 2 (serialized). +- Live progress + gotchas: project memory `homebase-transformation.md`. diff --git a/docs/superpowers/plans/2026-06-11-homebase-remaining-plan.md b/docs/superpowers/plans/2026-06-11-homebase-remaining-plan.md new file mode 100644 index 000000000..38ea84616 --- /dev/null +++ b/docs/superpowers/plans/2026-06-11-homebase-remaining-plan.md @@ -0,0 +1,199 @@ +# Home Base — Remaining Work Plan + +**As of 2026-06-11. `origin/main` @ `80736113`. Phases 1 + 2 + 3 COMPLETE.** + +This is the durable, actionable plan for everything still pending in the "Home Base" +transformation. It supersedes the forward-looking sections of the two older docs for +_remaining_ work; those stay authoritative for history: + +- **Living tracker (authoritative):** auto-memory `homebase-transformation.md` (loads each + session via its `MEMORY.md` index line). +- **In-repo handoff:** `docs/superpowers/plans/HANDOFF-homebase-transformation.md`. +- **Canonical original plan (P0–P5):** `docs/superpowers/plans/2026-06-10-homebase-transformation.md`. + +## The thesis (why the remaining work matters) + +The Home Base unifies scattered LLM conversations / code / notes into one place. We've built +**stability** (P1), **a single SPS workspace + thin admin** (P2), and **intake** — import +ChatGPT/Claude.ai/Grok/Gemini exports into a redacted, fenced, searchable index (P3). The payoff +the user actually _feels_ is **P4 federated search**: one query that reaches vault notes, imported +transcripts, and Hermes sessions at once. Until then the imports sit in their own modal — built, +but not yet woven into daily flow. + +## Recommended order + +1. **P1.7 vault-mirror failure COUNT** — small, owed since Phase 1, standalone. Clears the debt. +2. **P4 federated search** — the keystone. Design-first (reality-check + brainstorm), then implement. +3. **P5 live capture / streaming** — later; 5.2/5.3 droppable. +4. **(Optional)** import worker_thread offload — deferred hardening, only if large-export main-thread + stalls become a real complaint. + +--- + +## Working conventions (LOCKED — apply to every item below) + +- **Reality-check the premise vs current `main` FIRST.** Repeatedly through P1–P3, plan premises were + stale or inverted (the "blank" thing already seeded, the "missing" surface already live, the + worker_thread "freeze" not a renderer freeze). Close stale items; don't build them. Delegate the + reality-check sweep to an Explore subagent (Sonnet) — conclusions to the main thread, not file dumps. +- **One worktree, serial branches.** Reuse `.claude/worktrees/p1.1-gateway-supervision`. Per item: + `git checkout -b worktree-pX origin/main` (keeps `node_modules` — do NOT `npm ci`/symlink). Integrate: + `git fetch origin` → `git merge-base --is-ancestor origin/main HEAD` ff-check → `git push origin HEAD:main`. +- **Per-item gate:** `npm run typecheck` (×2: node + web) → `npx eslint ` → `npx vitest run` + → `npm run verify:note-index` → `npm run build` → plus the item's relevant probe + (`verify:external-context` / `external-context-smoke` / `sps-smoke` / `verify-admin-overlay` / + `verify:firstrun-seed`). **Build BEFORE any Electron UI probe** — they drive the built `out/`. +- **All external/ingestion writes go through `applyFragments`** (the single writer; index-time redaction + is a structural invariant — `verify:external-context` asserts a seeded key never reaches + `messages`/`messages_fts`). Adapters are pure node (no electron/sqlite), vitest-testable. +- **Keep PRs small + single-purpose.** Merge-as-you-go. +- **Known flakes (NOT regressions):** `verify-admin-overlay` a1/a2 (cold-start GROUPS=0; a3/a4/a5 pass); + `sps-smoke` 02b/02c/03 (fresh-seed collapsed nav; 01/02 pass); `verify:note-index` prints a + SemanticIndex "helper not running" stderr line (checks still pass). +- **adm-zip + vitest:** any test touching adm-zip must be pinned `// @vitest-environment node` at the top + — jsdom breaks adm-zip's zlib deflate (test-env only; real node + electron fine). + +--- + +## ITEM 1 — P1.7 vault-mirror failure COUNT (owed; small; do first) + +**Goal:** observability for the load-bearing vault write path — surface how many times a vault-mirror +write FAILED so a user/operator knows their markdown-on-disk source-of-truth is silently diverging. + +**Reality-check first (delegate):** + +- Where does the vault mirror actually write, and where can it fail? Expected: `src/main/sps-vault.ts` + and the `sps-export-page` handler in `src/main/ipc/notes.ts`. Confirm the exact failure points + (catch blocks that currently swallow) and whether a counter already exists. +- Confirm `storageMode` semantics: the mirror is the additive vault write in `blob` mode; in `vault` + mode markdown is authoritative. The counter should cover the path that can silently drop. + +**Build:** + +- A persistent counter (process-lifetime or persisted to a small JSON under `HERMES_HOME` — prefer + persisted so it survives restarts and is meaningful) incremented at each mirror-write failure, with + the last error message + timestamp. +- New IPC `spsGetMirrorFailCount` → `{ count, lastError?, lastAt? }`. Preload bridge + `index.d.ts` + parity (`tests/preload-api-surface.test.ts` enforces two-way). +- Surface in `TweaksPanel.tsx` **Storage** section (the "Workspace settings" panel) — a small line that + only shows when count > 0 (e.g. "⚠ N vault-mirror writes failed — last: …"). + +**Why held until now:** deliberately kept out of every UI commit to avoid touching the storage +substrate in a mixed change. It gets its OWN small commit. + +**Acceptance:** a forced mirror failure (inject a write error in a test/probe) increments the counter; +`spsGetMirrorFailCount` returns it; TweaksPanel shows the warning. Storage round-trip + parity tests green. + +**Gate:** standard + `verify:note-index` (storage substrate) + `sps-smoke` (TweaksPanel renders). + +--- + +## ITEM 2 — P4 federated search (the keystone) + +**Goal:** one query reaches **all** the user's knowledge at once — vault notes, imported/scanned +external transcripts, and Hermes chat sessions — merged into a single ranked result surface, each hit +routing to the right place on click. + +### 4.1 Reality-check + design (DO THIS BEFORE ANY CODE — delegate the sweep, then brainstorm) + +Map the search surfaces that exist today and their result shapes: + +- **Vault/notes:** `src/main/note-index.ts` (`.note-index.db`, FTS5) — consumed by `useNoteIndex` + hooks, the **Ask pane**, `SidebarRecents` search. (KB pages are vault pages, so KB ⊂ this.) +- **External transcripts:** `external-context-search` IPC over `external-context.db` (FTS5) — + `ExternalSessionsModal`. Already redacted + fenced. +- **Hermes sessions:** `searchSessions` (session cache) — `SidebarRecents`, `AskPane`. + +Open design questions to resolve in brainstorming (present strongest case for each, recommend one): + +1. **Aggregation: fan-out-and-merge vs unified index.** Strong recommendation = **fan-out-and-merge** + (parallel-call the 3 existing IPCs, normalize, merge, rank). The indices are rebuildable and separate + for good reasons; a unified index is migration + sync cost for little gain. Reject unified unless the + reality-check surfaces a blocker. +2. **Ranking across heterogeneous FTS scores.** Normalize each source's score to [0,1], then apply a + recency boost. Keep the ranking function PURE (vitest-testable). Decide tie-breaks + per-source caps + so one chatty source can't drown the others. +3. **Which surface hosts it.** Candidates: the **Ask pane** (already the "search your workspace" entry), + **⌘K**, or a dedicated "Search everything" surface. Reality-check what the Ask pane does today — + extending it is likely lowest-friction and most discoverable. Avoid a new top-level surface if an + existing one fits (P2 lesson: discoverability > new screens). +4. **Untrusted-content boundary.** External transcripts are UNTRUSTED (prompt-injection highway). In a + merged list they MUST keep the provenance label + fence treatment; never auto-inject a transcript hit + into a chat turn. This invariant rides along into the federated surface. + +### 4.2 Aggregator (main-side, pure ranking) + +- New `src/main/federated-search.ts` (or similar): `federatedSearch(query, opts)` parallel-calls the 3 + source searches, normalizes each hit to a common shape: + ``` + FederatedHit { kind: 'note' | 'transcript' | 'session', title, snippet, source?, ts, ref, score } + ``` + merges, ranks (normalized score + recency boost), applies per-source + total caps. The merge/rank + logic is PURE → vitest-tested with synthetic per-source results (no sqlite). +- New IPC `federated-search` ({ query, opts }) wiring the aggregator. Preload bridge + `index.d.ts` + parity. + +### 4.3 UI surface + +- Host federated results in the chosen surface (likely the Ask pane). Render grouped or interleaved with + a **type chip** per hit (Note / Transcript / Session) + provenance for transcripts. Clicking routes: + note → open vault page; transcript → open the untrusted `ConversationViewer`; session → resume. +- Keep the untrusted banner/fence on transcript hits. + +### 4.4 Dogfood + smoke + +- Seed a term that exists in a vault note, an imported transcript, AND a Hermes session; one query + returns all three kinds in one ranked list; each routes correctly; transcript hit stays fenced + no + secret leak. +- Extend `external-context-smoke` (or a new `federated-search-smoke`) to assert the merged surface. + +**Gate:** standard + `verify:note-index` + `verify:external-context` + the federated smoke. + +--- + +## ITEM 3 — P5 live capture / streaming (later; 5.2/5.3 droppable) + +**Goal:** close the loop from a one-time intake to a _living_ Home Base — capture conversations as they +happen and stream responses. + +Sub-items (reality-check each premise first — formats/ABIs drift): + +- **5.1 paste-capture** — paste a raw conversation (incl. **Perplexity**, which has no export — this is + where descoped 3.5 lands) → parse heuristically → stage → index through the same `applyFragments` path. + Reuse the import pipeline; the only new bit is a paste → normalized-payload parser + a paste UI in the + Import surface. +- **5.2 Telegram gateway** (droppable) — messaging intake/outtake so Hermes is reachable from Telegram. + Builds on the existing gateway lifecycle (`src/main/hermes/`). Larger; only if the owner wants it. +- **5.3 streaming** (droppable) — streaming response surfacing improvements. Only if a concrete gap is felt. + +**Gate:** standard + whatever probe matches the sub-item (paste-capture → `verify:external-context` + +smoke). + +--- + +## ITEM 4 — Deferred hardening: import worker_thread offload (optional) + +The P3.6 import parses on the main thread (matches the live `gemini` whole-file source). The renderer +can't freeze (async IPC); only the main process could stall on a pathologically-large `JSON.parse`. If +that becomes a real complaint, offload the parse to a `worker_thread`: + +- No existing worker pattern in the repo — `electron.vite.config.ts` main has a single default entry + (`src/main/index.ts`), no worker input. Adding a worker means emitting a worker entry to `out/` and + resolving it at runtime; budget for that build integration. +- The worker runs the PURE adapter parser (no electron/sqlite), returns `{ conversations, messages, +skipped }`; the main thread calls `applyFragments` (still the single writer). Idempotency + the file + cursor are unchanged. +- Only worth it for truly huge exports; document what was measured before building. + +--- + +## Quick status table + +| Item | State | Size | Risk | Notes | +| ----------------------- | ---------- | ---- | ----------------------- | -------------------------------- | +| P1.7 vault-mirror count | owed | S | low (storage substrate) | do first; own commit | +| P4 federated search | next major | L | med (design-heavy) | design-first; fan-out-and-merge | +| P5.1 paste-capture | pending | M | low | Perplexity lands here | +| P5.2 Telegram | droppable | L | — | only if owner wants | +| P5.3 streaming | droppable | M | — | only if a gap is felt | +| Import worker_thread | optional | M | build-integration | only if large-export stalls felt | diff --git a/docs/superpowers/plans/2026-06-12-sps-active-work-cockpit.md b/docs/superpowers/plans/2026-06-12-sps-active-work-cockpit.md new file mode 100644 index 000000000..0533d6354 --- /dev/null +++ b/docs/superpowers/plans/2026-06-12-sps-active-work-cockpit.md @@ -0,0 +1,1642 @@ +# SPS Active Work Cockpit Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Make long-running SPS work visible as goals plus a task board: active goal, acceptance criteria, worker/task status, heartbeat, blocker reason, artifacts, and resume/stop controls. + +**Architecture:** Keep Hermes/Kanban as the backend engine and add a thin SPS product layer around it. Reuse existing `sendMessage`, `spsGetWorkSession`, `kanbanListTasks`, and `kanbanGetTask`; add only a small profile-scoped sidecar for SPS-owned active runs and minimal missing IPC/preload surface. Do not rename `window.hermesAPI`, Hermes internals, storage paths, or upstream CLI concepts. + +**Tech Stack:** Electron main/preload IPC, React 19, Zustand SPS store, TypeScript shared types, Vitest/jsdom, existing Hermes Kanban CLI wrapper. + +--- + +## Product Decisions + +- Employee-facing surface name: **Active Work**. +- Sidebar placement: under **My Assistant**, near **Learn This**. +- Existing **Cockpit** remains the home dashboard. Do not overload it. +- Existing `AgentTasksModal` should either be replaced by the Active Work surface or reduced to a compatibility wrapper that opens the new surface. +- Use employee copy: **My Assistant**, **work**, **task board**, **goal**, **blocked**, **resume**, **stop**. Avoid new user-facing “agent” language. +- Internal names remain unchanged: `window.hermesAPI`, `hermes-agent`, `sps-agent`, `kanban*` IPC names, and Hermes session ids. + +## V1 Scope + +Build one useful cockpit, not the whole upstream dashboard: + +- Show SPS-owned active runs from a profile sidecar. +- Track `/work` plan runs automatically. +- Add an optional “Start goal” flow that starts a `/goal` run and records it as active work. +- Show Kanban board columns using existing tasks. +- On task selection, fetch full `KanbanTaskDetail` and show comments, events, runs, latest summary, parent/child ids, heartbeat timestamp, result, and blocker-like event payloads. +- Add resume and stop for SPS-owned runs. +- Add Kanban task create support for upstream goal-mode flags, but keep advanced run termination/log streaming out of v1 unless already available through the current CLI wrapper. + +## Out Of Scope For V1 + +- No direct SQLite reads from `~/.hermes/kanban.db` in Electron. +- No new Hermes backend implementation. +- No deletion of Kanban task files/rows from SPS. +- No attempt to parse all upstream `/goal status` text into a perfect structured state. +- No mobile/WhatsApp/Telegram/Gmail attachment ingestion in this slice. +- No task attachment schema unless upstream already returns it in `kanban show --json`. + +## File Structure + +Create: + +- `src/shared/active-work.ts` + Shared `ActiveWorkRun`, status enums, criteria/artifact types, and IPC input shapes. + +- `src/main/active-work-runs.ts` + Profile-local JSON sidecar under `/sps-agent/active-work-runs.json`. Pure fs/path logic only, no Electron native modules. + +- `tests/active-work-runs.test.ts` + Unit tests for empty store, create/update/list, criteria updates, finish/error, and defensive handling of corrupt files. + +- `src/renderer/src/screens/SpsAgent/activeWork/ActiveWorkSurface.tsx` + New SPS surface with active run list, goal starter, Kanban board, and detail panel. + +- `src/renderer/src/screens/SpsAgent/activeWork/ActiveWorkSurface.test.tsx` + Renderer tests for surface render, run controls, task detail expansion, and goal draft behavior. + +Modify: + +- `src/shared/kanban.ts` + Add optional `goalMode`, `goalMaxTurns`, and `maxRuntimeSeconds` to `KanbanCreateTaskInput` only. + +- `src/main/kanban.ts` + Pass new create-task flags to `hermes kanban create` when present. Keep existing behavior unchanged when omitted. + +- `src/main/ipc/kanban.ts` + No new handlers required for v1 unless `kanbanGetTask` needs stricter profile forwarding. Verify `kanbanListTasks(filters || {})` still carries `filters.profile`. + +- `src/main/ipc/sps.ts` + Register active-work sidecar handlers. + +- `src/preload/bridges/sps.ts` and `src/preload/index.d.ts` + Expose active-work sidecar methods. Also allow `abortChat(sessionIdOrRunId?: string)` to pass an optional id through preload. + +- `src/preload/bridges/config.ts` + Change `abortChat` bridge to forward the optional run/session id. + +- `src/renderer/src/screens/SpsAgent/App.tsx` + Render `ActiveWorkSurface` when `surface === "activeWork"`. + +- `src/renderer/src/screens/SpsAgent/sidebar/Sidebar.tsx` + Add **Active Work** under **My Assistant**. Consider removing the old **Assistant tasks** modal nav item once the surface is live. + +- `src/renderer/src/screens/SpsAgent/store/storeTypes.ts` + Add `"activeWork"` to `Surface`. + +- `src/renderer/src/screens/SpsAgent/store/slices/assistant.ts` + Wrap `runWork` with active-work sidecar create/update calls, store the `clientRunId`, record tool progress, session id, completion, and error. + +- `src/renderer/src/screens/SpsAgent/modals/AgentTasksModal.tsx` + Either leave as-is for one release or reduce to a small redirect/compatibility component after `ActiveWorkSurface` renders the same board. + +- `src/renderer/src/screens/SpsAgent/sidebar/Sidebar.test.tsx` + Extend expectation for **Active Work** under **My Assistant**. + +## Shared Types + +Add this to `src/shared/active-work.ts`: + +```ts +export type ActiveWorkStatus = + | "running" + | "paused" + | "blocked" + | "completed" + | "stopped" + | "failed"; + +export interface ActiveWorkCriterion { + id: string; + text: string; + done: boolean; +} + +export interface ActiveWorkArtifact { + id: string; + kind: "page" | "session" | "task" | "file" | "text"; + label: string; + ref?: string; + createdAt: number; +} + +export interface ActiveWorkRun { + id: string; + source: "sps-work" | "goal" | "kanban"; + status: ActiveWorkStatus; + title: string; + goal: string; + pageId?: string; + pageTitle?: string; + sessionId?: string; + clientRunId?: string; + taskId?: string; + criteria: ActiveWorkCriterion[]; + artifacts: ActiveWorkArtifact[]; + lastTool?: string; + lastHeartbeatAt?: number; + blockerReason?: string; + summary?: string; + error?: string; + createdAt: number; + updatedAt: number; + completedAt?: number; +} + +export interface ActiveWorkCreateInput { + source: ActiveWorkRun["source"]; + title: string; + goal: string; + pageId?: string; + pageTitle?: string; + sessionId?: string; + clientRunId?: string; + taskId?: string; + criteria?: Array<{ text: string; done?: boolean }>; +} + +export interface ActiveWorkPatch { + status?: ActiveWorkStatus; + sessionId?: string; + clientRunId?: string; + taskId?: string; + criteria?: ActiveWorkCriterion[]; + artifacts?: ActiveWorkArtifact[]; + lastTool?: string | null; + lastHeartbeatAt?: number; + blockerReason?: string | null; + summary?: string | null; + error?: string | null; + completedAt?: number; +} +``` + +Extend `src/shared/kanban.ts`: + +```ts +export interface KanbanCreateTaskInput { + title: string; + body?: string; + assignee?: string; + priority?: number; + tenant?: string; + workspace?: string; + triage?: boolean; + skills?: string[]; + maxRetries?: number; + goalMode?: boolean; + goalMaxTurns?: number; + maxRuntimeSeconds?: number; +} +``` + +## Task 1: Active Work Sidecar Tests + +**Files:** + +- Create: `tests/active-work-runs.test.ts` +- Create later: `src/shared/active-work.ts` +- Create later: `src/main/active-work-runs.ts` + +- [ ] **Step 1: Write failing tests** + +Create `tests/active-work-runs.test.ts`: + +```ts +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { mkdtempSync, rmSync, writeFileSync } from "fs"; +import { tmpdir } from "os"; +import { join } from "path"; +import { + listActiveWorkRuns, + createActiveWorkRun, + updateActiveWorkRun, + getActiveWorkRun, +} from "../src/main/active-work-runs"; + +let home: string; +const PROFILE = "default"; + +beforeEach(() => { + home = mkdtempSync(join(tmpdir(), "sps-active-work-")); + process.env.HERMES_HOME = home; +}); + +afterEach(() => { + rmSync(home, { recursive: true, force: true }); +}); + +describe("active work runs sidecar", () => { + it("returns [] when no sidecar exists", async () => { + expect(await listActiveWorkRuns(PROFILE)).toEqual([]); + }); + + it("creates a running SPS work record with criteria", async () => { + const run = await createActiveWorkRun( + { + source: "sps-work", + title: "Work: launch plan", + goal: "Execute the launch plan", + pageId: "page-1", + pageTitle: "Launch plan", + clientRunId: "run-1", + criteria: [ + { text: "Build it", done: false }, + { text: "Verify it", done: true }, + ], + }, + PROFILE, + ); + + expect(run.status).toBe("running"); + expect(run.criteria).toHaveLength(2); + expect(run.createdAt).toBeGreaterThan(0); + expect(await listActiveWorkRuns(PROFILE)).toEqual([run]); + }); + + it("updates status, session id, tool, and completion fields", async () => { + const run = await createActiveWorkRun( + { + source: "goal", + title: "Goal: fix tests", + goal: "Fix failing tests", + clientRunId: "run-2", + }, + PROFILE, + ); + + const updated = await updateActiveWorkRun( + run.id, + { + sessionId: "sess-2", + lastTool: "terminal", + status: "completed", + summary: "Tests pass", + completedAt: 123, + }, + PROFILE, + ); + + expect(updated?.sessionId).toBe("sess-2"); + expect(updated?.lastTool).toBe("terminal"); + expect(updated?.status).toBe("completed"); + expect(updated?.summary).toBe("Tests pass"); + expect(updated?.completedAt).toBe(123); + }); + + it("returns null when updating a missing run", async () => { + expect( + await updateActiveWorkRun("missing", { status: "stopped" }, PROFILE), + ).toBeNull(); + }); + + it("treats corrupt JSON as empty instead of crashing", async () => { + const p = join( + home, + "profiles", + PROFILE, + "sps-agent", + "active-work-runs.json", + ); + writeFileSync(p, "{not json", "utf-8"); + expect(await listActiveWorkRuns(PROFILE)).toEqual([]); + }); + + it("gets a run by id", async () => { + const run = await createActiveWorkRun( + { source: "kanban", title: "Task", goal: "Do task", taskId: "t_123" }, + PROFILE, + ); + expect(await getActiveWorkRun(run.id, PROFILE)).toEqual(run); + expect(await getActiveWorkRun("nope", PROFILE)).toBeNull(); + }); +}); +``` + +- [ ] **Step 2: Run the tests to verify failure** + +Run: + +```bash +npx vitest run tests/active-work-runs.test.ts +``` + +Expected: FAIL because `src/main/active-work-runs.ts` does not exist. + +## Task 2: Active Work Shared Types And Store + +**Files:** + +- Create: `src/shared/active-work.ts` +- Create: `src/main/active-work-runs.ts` +- Test: `tests/active-work-runs.test.ts` + +- [ ] **Step 1: Add shared types** + +Create `src/shared/active-work.ts` with the exact shared types from the **Shared Types** section. + +- [ ] **Step 2: Implement the sidecar** + +Create `src/main/active-work-runs.ts`: + +```ts +import { promises as fs } from "fs"; +import { dirname, join } from "path"; +import { profileHome, getActiveProfileNameSync } from "./utils"; +import type { + ActiveWorkCreateInput, + ActiveWorkCriterion, + ActiveWorkPatch, + ActiveWorkRun, +} from "../shared/active-work"; + +function activeWorkPath(profile?: string): string { + return join( + profileHome(profile || getActiveProfileNameSync()), + "sps-agent", + "active-work-runs.json", + ); +} + +function id(prefix: string): string { + return `${prefix}-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`; +} + +function normalizeCriteria( + criteria: ActiveWorkCreateInput["criteria"] = [], +): ActiveWorkCriterion[] { + return criteria.map((c) => ({ + id: id("crit"), + text: c.text, + done: Boolean(c.done), + })); +} + +async function readRuns(profile?: string): Promise { + try { + const raw = await fs.readFile(activeWorkPath(profile), "utf-8"); + const parsed = JSON.parse(raw); + return Array.isArray(parsed) ? (parsed as ActiveWorkRun[]) : []; + } catch { + return []; + } +} + +async function writeRuns( + runs: ActiveWorkRun[], + profile?: string, +): Promise { + const p = activeWorkPath(profile); + await fs.mkdir(dirname(p), { recursive: true }); + await fs.writeFile(p, JSON.stringify(runs, null, 2), "utf-8"); +} + +export async function listActiveWorkRuns( + profile?: string, +): Promise { + const runs = await readRuns(profile); + return runs.sort((a, b) => b.updatedAt - a.updatedAt); +} + +export async function getActiveWorkRun( + runId: string, + profile?: string, +): Promise { + const runs = await readRuns(profile); + return runs.find((r) => r.id === runId) ?? null; +} + +export async function createActiveWorkRun( + input: ActiveWorkCreateInput, + profile?: string, +): Promise { + const now = Date.now(); + const run: ActiveWorkRun = { + id: id("work"), + source: input.source, + status: "running", + title: input.title, + goal: input.goal, + pageId: input.pageId, + pageTitle: input.pageTitle, + sessionId: input.sessionId, + clientRunId: input.clientRunId, + taskId: input.taskId, + criteria: normalizeCriteria(input.criteria), + artifacts: [], + createdAt: now, + updatedAt: now, + }; + const runs = await readRuns(profile); + await writeRuns([run, ...runs], profile); + return run; +} + +export async function updateActiveWorkRun( + runId: string, + patch: ActiveWorkPatch, + profile?: string, +): Promise { + const runs = await readRuns(profile); + const idx = runs.findIndex((r) => r.id === runId); + if (idx < 0) return null; + const current = runs[idx]; + const next: ActiveWorkRun = { + ...current, + ...patch, + lastTool: + patch.lastTool === null + ? undefined + : (patch.lastTool ?? current.lastTool), + blockerReason: + patch.blockerReason === null + ? undefined + : (patch.blockerReason ?? current.blockerReason), + summary: + patch.summary === null ? undefined : (patch.summary ?? current.summary), + error: patch.error === null ? undefined : (patch.error ?? current.error), + updatedAt: Date.now(), + }; + runs[idx] = next; + await writeRuns(runs, profile); + return next; +} +``` + +- [ ] **Step 3: Run sidecar tests** + +Run: + +```bash +npx vitest run tests/active-work-runs.test.ts +``` + +Expected: PASS. + +## Task 3: Active Work IPC And Preload + +**Files:** + +- Modify: `src/main/ipc/sps.ts` +- Modify: `src/preload/bridges/sps.ts` +- Modify: `src/preload/bridges/config.ts` +- Modify: `src/preload/index.d.ts` + +- [ ] **Step 1: Register active-work IPC** + +In `src/main/ipc/sps.ts`, import: + +```ts +import { + listActiveWorkRuns, + getActiveWorkRun, + createActiveWorkRun, + updateActiveWorkRun, +} from "../active-work-runs"; +import type { + ActiveWorkCreateInput, + ActiveWorkPatch, +} from "../../shared/active-work"; +``` + +Inside the existing `registerSpsIpc()` function, add handlers near `spsGetWorkSession` / `spsSetWorkSession`: + +```ts +safeHandle("sps-active-work-list", (_event, profile?: string) => + listActiveWorkRuns(profile), +); +safeHandle("sps-active-work-get", (_event, runId: string, profile?: string) => + getActiveWorkRun(runId, profile), +); +safeHandle( + "sps-active-work-create", + (_event, input: ActiveWorkCreateInput, profile?: string) => + createActiveWorkRun(input, profile), +); +safeHandle( + "sps-active-work-update", + (_event, runId: string, patch: ActiveWorkPatch, profile?: string) => + updateActiveWorkRun(runId, patch, profile), +); +``` + +- [ ] **Step 2: Expose active-work methods in preload** + +In `src/preload/bridges/sps.ts`, add: + +```ts +spsListActiveWorkRuns: (profile?: string) => + ipcRenderer.invoke("sps-active-work-list", profile), +spsGetActiveWorkRun: (runId: string, profile?: string) => + ipcRenderer.invoke("sps-active-work-get", runId, profile), +spsCreateActiveWorkRun: (input: ActiveWorkCreateInput, profile?: string) => + ipcRenderer.invoke("sps-active-work-create", input, profile), +spsUpdateActiveWorkRun: ( + runId: string, + patch: ActiveWorkPatch, + profile?: string, +) => ipcRenderer.invoke("sps-active-work-update", runId, patch, profile), +``` + +Add imports at the top: + +```ts +import type { + ActiveWorkCreateInput, + ActiveWorkPatch, +} from "../../shared/active-work"; +``` + +- [ ] **Step 3: Allow scoped abort from preload** + +In `src/preload/bridges/config.ts`, change: + +```ts +abortChat: (): Promise => ipcRenderer.invoke("abort-chat"), +``` + +to: + +```ts +abortChat: (sessionIdOrRunId?: string): Promise => + ipcRenderer.invoke("abort-chat", sessionIdOrRunId), +``` + +- [ ] **Step 4: Update `index.d.ts`** + +Import active-work types: + +```ts +import type { + ActiveWorkCreateInput, + ActiveWorkPatch, + ActiveWorkRun, +} from "../shared/active-work"; +``` + +Change: + +```ts +abortChat: () => Promise; +``` + +to: + +```ts +abortChat: (sessionIdOrRunId?: string) => Promise; +``` + +Add: + +```ts +spsListActiveWorkRuns: (profile?: string) => Promise; +spsGetActiveWorkRun: (runId: string, profile?: string) => + Promise; +spsCreateActiveWorkRun: (input: ActiveWorkCreateInput, profile?: string) => + Promise; +spsUpdateActiveWorkRun: ( + runId: string, + patch: ActiveWorkPatch, + profile?: string, +) => Promise; +``` + +- [ ] **Step 5: Validate preload parity and type surface** + +Run: + +```bash +npx vitest run tests/preload-api-surface.test.ts tests/active-work-runs.test.ts +npm run typecheck +``` + +Expected: PASS. + +## Task 4: Kanban Goal-Mode Create Flags + +**Files:** + +- Modify: `src/shared/kanban.ts` +- Modify: `src/main/kanban.ts` +- Modify: `src/preload/bridges/kanban.ts` +- Modify: `src/preload/index.d.ts` +- Test if existing: `tests/skills-management.test.ts` is unrelated; add coverage only if a Kanban wrapper test exists. If none exists, rely on typecheck and changed-file ESLint for this narrow CLI arg append. + +- [ ] **Step 1: Extend create input type** + +Add these optional fields to every `KanbanCreateTaskInput` shape: + +```ts +goalMode?: boolean; +goalMaxTurns?: number; +maxRuntimeSeconds?: number; +``` + +- [ ] **Step 2: Pass flags in `src/main/kanban.ts`** + +Inside `createTask`, after `maxRetries` handling and before skills: + +```ts +if (input.goalMode) args.push("--goal"); +if (input.goalMaxTurns !== undefined) + args.push("--goal-max-turns", String(input.goalMaxTurns)); +if (input.maxRuntimeSeconds !== undefined) + args.push("--max-runtime", String(input.maxRuntimeSeconds)); +``` + +- [ ] **Step 3: Update preload inline input shape** + +In `src/preload/bridges/kanban.ts`, add the same optional fields to the inline `input` type for `kanbanCreateTask`. + +- [ ] **Step 4: Validate** + +Run: + +```bash +npm run typecheck +npx eslint --quiet src/shared/kanban.ts src/main/kanban.ts src/preload/bridges/kanban.ts src/preload/index.d.ts +``` + +Expected: PASS. + +## Task 5: Add Active Work Surface Shell + +**Files:** + +- Create: `src/renderer/src/screens/SpsAgent/activeWork/ActiveWorkSurface.tsx` +- Modify: `src/renderer/src/screens/SpsAgent/store/storeTypes.ts` +- Modify: `src/renderer/src/screens/SpsAgent/App.tsx` +- Modify: `src/renderer/src/screens/SpsAgent/sidebar/Sidebar.tsx` +- Modify: `src/renderer/src/screens/SpsAgent/sidebar/Sidebar.test.tsx` + +- [ ] **Step 1: Add surface type** + +In `src/renderer/src/screens/SpsAgent/store/storeTypes.ts`, add `"activeWork"` to the existing `Surface` union. + +- [ ] **Step 2: Create minimal surface** + +Create `src/renderer/src/screens/SpsAgent/activeWork/ActiveWorkSurface.tsx`: + +```tsx +import { useEffect, useMemo, useState } from "react"; +import { Icon } from "../components/Icon"; +import { useStore } from "../store"; +import type { ActiveWorkRun } from "../../../../../shared/active-work"; +import type { + KanbanBoard, + KanbanTask, + KanbanTaskDetail, +} from "../../../../../shared/kanban"; + +const COLUMNS = ["triage", "todo", "ready", "running", "blocked", "done"]; + +function timeAgo(ms?: number | null): string { + if (!ms) return "never"; + const age = Date.now() - ms; + const minutes = Math.floor(age / 60000); + if (minutes < 1) return "just now"; + if (minutes < 60) return `${minutes}m ago`; + const hours = Math.floor(minutes / 60); + if (hours < 24) return `${hours}h ago`; + return `${Math.floor(hours / 24)}d ago`; +} + +export function ActiveWorkSurface() { + const [runs, setRuns] = useState([]); + const [boards, setBoards] = useState([]); + const [tasks, setTasks] = useState([]); + const [selectedTask, setSelectedTask] = useState(null); + const [taskDetail, setTaskDetail] = useState(null); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(""); + const selectPage = useStore((s) => s.selectPage); + const setSurface = useStore((s) => s.setSurface); + const runWork = useStore((s) => s.runWork); + + async function refresh(): Promise { + setLoading(true); + try { + const [activeRuns, boardsRes, tasksRes] = await Promise.all([ + window.hermesAPI.spsListActiveWorkRuns(), + window.hermesAPI.kanbanListBoards(false), + window.hermesAPI.kanbanListTasks({ includeArchived: false }), + ]); + setRuns(activeRuns); + setBoards(boardsRes.success ? boardsRes.data || [] : []); + setTasks(tasksRes.success ? tasksRes.data || [] : []); + setError(boardsRes.error || tasksRes.error || ""); + } catch (err) { + setError( + err instanceof Error ? err.message : "Could not load active work.", + ); + } finally { + setLoading(false); + } + } + + useEffect(() => { + void refresh(); + }, []); + + useEffect(() => { + if (!selectedTask) { + setTaskDetail(null); + return; + } + let cancelled = false; + window.hermesAPI.kanbanGetTask(selectedTask).then((res) => { + if (!cancelled && res.success) setTaskDetail(res.data || null); + }); + return () => { + cancelled = true; + }; + }, [selectedTask]); + + const byColumn = useMemo(() => { + const map: Record = {}; + for (const col of COLUMNS) map[col] = []; + for (const task of tasks) { + (map[task.status] ??= []).push(task); + } + return map; + }, [tasks]); + + const currentBoard = boards.find((b) => b.is_current) ?? boards[0] ?? null; + + async function stopRun(run: ActiveWorkRun): Promise { + await window.hermesAPI.abortChat(run.sessionId || run.clientRunId); + await window.hermesAPI.spsUpdateActiveWorkRun(run.id, { + status: "stopped", + completedAt: Date.now(), + lastTool: null, + }); + await refresh(); + } + + async function resumeRun(run: ActiveWorkRun): Promise { + if (run.pageId) { + selectPage(run.pageId); + setSurface("doc"); + await runWork(); + } + } + + return ( +

+
+
+

Active Work

+

Goals, running work, and My Assistant's task board.

+
+ +
+ + {error && ( +
+ {error} +
+ )} + +
+

Active Runs

+ {runs.length === 0 ? ( +
+ No active work yet. Run a plan with /work or start a goal. +
+ ) : ( +
+ {runs.map((run) => ( +
+
+ {run.title} + {run.goal} + + {run.status} · updated {timeAgo(run.updatedAt)} + {run.lastTool ? ` · running ${run.lastTool}` : ""} + +
+
+ {run.pageId && ( + + )} + {run.status === "running" && ( + + )} +
+
+ ))} +
+ )} +
+ +
+

{currentBoard ? currentBoard.name : "Task Board"}

+
+ {COLUMNS.map((col) => ( +
+
+ {col} ({byColumn[col]?.length || 0}) +
+ {(byColumn[col] || []).map((task) => ( + + ))} +
+ ))} +
+
+ + {taskDetail && ( +
+

{taskDetail.task.title}

+

{taskDetail.task.body}

+
+ Status: {taskDetail.task.status} + Runs: {taskDetail.runs.length} + + Last heartbeat:{" "} + {timeAgo( + taskDetail.runs[0]?.last_heartbeat_at + ? taskDetail.runs[0].last_heartbeat_at * 1000 + : null, + )} + +
+ {taskDetail.latest_summary &&

{taskDetail.latest_summary}

} + {taskDetail.comments.length > 0 && ( +
+

Comments

+ {taskDetail.comments.map((c) => ( +
+ {c.author || "worker"} + {c.body} +
+ ))} +
+ )} +
+ )} +
+ ); +} +``` + +If `Icon` has no `"refresh"` icon, use the existing `"return"` or `"clock"` icon rather than adding a new icon in this task. + +- [ ] **Step 3: Render the surface** + +In `src/renderer/src/screens/SpsAgent/App.tsx`, import `ActiveWorkSurface` and add: + +```tsx +{ + surface === "activeWork" && ; +} +``` + +- [ ] **Step 4: Add sidebar nav** + +In `src/renderer/src/screens/SpsAgent/sidebar/Sidebar.tsx`, add under the **My Assistant** section: + +```tsx + +``` + +- [ ] **Step 5: Update sidebar test** + +In `src/renderer/src/screens/SpsAgent/sidebar/Sidebar.test.tsx`, assert: + +```ts +expect(screen.getByText("Active Work")).toBeInTheDocument(); +``` + +- [ ] **Step 6: Validate shell** + +Run: + +```bash +npx vitest run src/renderer/src/screens/SpsAgent/sidebar/Sidebar.test.tsx +npm run typecheck +``` + +Expected: PASS. + +## Task 6: Track SPS `/work` Runs + +**Files:** + +- Modify: `src/renderer/src/screens/SpsAgent/store/slices/assistant.ts` +- Test: add focused tests only if an assistant slice test already exists. If none exists, cover through `ActiveWorkSurface.test.tsx` with mocked APIs. + +- [ ] **Step 1: Extract criteria from page todos** + +Add helper near `serializePlanBlocks` usage in `assistant.ts`: + +```ts +function activeCriteriaFromBlocks( + blocks: Block[], +): Array<{ text: string; done: boolean }> { + return blocks + .filter((b) => b.type === "todo" && b.text.trim()) + .map((b) => ({ text: b.text.trim(), done: Boolean(b.done) })); +} +``` + +- [ ] **Step 2: Create active run at `/work` start** + +Inside `runWork`, after `runId` and before `sendMessage`, create a sidecar record: + +```ts +let activeWorkId: string | null = null; +try { + const active = await window.hermesAPI.spsCreateActiveWorkRun({ + source: "sps-work", + title: `Work: ${meta.title}`, + goal: `Execute the plan "${meta.title}"`, + pageId, + pageTitle: meta.title, + sessionId: resumeId, + clientRunId: runId, + criteria: activeCriteriaFromBlocks(blocks), + }); + activeWorkId = active.id; +} catch { + activeWorkId = null; +} +``` + +- [ ] **Step 3: Update last tool during tool progress** + +Inside `onChatToolProgress` callback, after `tool = t;`: + +```ts +if (activeWorkId) { + void window.hermesAPI.spsUpdateActiveWorkRun(activeWorkId, { + lastTool: t, + lastHeartbeatAt: Date.now(), + }); +} +``` + +- [ ] **Step 4: Record session id and completion** + +After `if (result.sessionId)`, add: + +```ts +if (activeWorkId) { + void window.hermesAPI.spsUpdateActiveWorkRun(activeWorkId, { + sessionId: result.sessionId, + status: "completed", + summary: acc.slice(0, 500), + completedAt: Date.now(), + lastTool: null, + artifacts: [ + { + id: uid("artifact"), + kind: "page", + label: meta.title, + ref: pageId, + createdAt: Date.now(), + }, + { + id: uid("artifact"), + kind: "session", + label: "Assistant session", + ref: result.sessionId, + createdAt: Date.now(), + }, + ], + }); +} +``` + +If there is no `result.sessionId`, still mark completed with the page artifact: + +```ts +if (activeWorkId && !result.sessionId) { + void window.hermesAPI.spsUpdateActiveWorkRun(activeWorkId, { + status: "completed", + summary: acc.slice(0, 500), + completedAt: Date.now(), + lastTool: null, + }); +} +``` + +- [ ] **Step 5: Record failure** + +Inside `catch`, after `render()`: + +```ts +if (activeWorkId) { + void window.hermesAPI.spsUpdateActiveWorkRun(activeWorkId, { + status: "failed", + error: err instanceof Error ? err.message : "work failed", + completedAt: Date.now(), + lastTool: null, + }); +} +``` + +- [ ] **Step 6: Validate** + +Run: + +```bash +npm run typecheck +npx eslint --quiet src/renderer/src/screens/SpsAgent/store/slices/assistant.ts +``` + +Expected: PASS. + +## Task 7: Start Goal Flow + +**Files:** + +- Modify: `src/renderer/src/screens/SpsAgent/activeWork/ActiveWorkSurface.tsx` +- Test: `src/renderer/src/screens/SpsAgent/activeWork/ActiveWorkSurface.test.tsx` + +- [ ] **Step 1: Add test for pending goal creation** + +Create `ActiveWorkSurface.test.tsx` with mocks: + +```tsx +import { render, screen, fireEvent, waitFor } from "@testing-library/react"; +import { describe, it, expect, vi, beforeEach } from "vitest"; +import { ActiveWorkSurface } from "./ActiveWorkSurface"; + +beforeEach(() => { + (globalThis as any).window = { + hermesAPI: { + spsListActiveWorkRuns: vi.fn().mockResolvedValue([]), + spsCreateActiveWorkRun: vi.fn().mockResolvedValue({ + id: "work-1", + source: "goal", + status: "running", + title: "Goal: Fix reports", + goal: "Fix reports", + criteria: [], + artifacts: [], + createdAt: 1, + updatedAt: 1, + }), + spsUpdateActiveWorkRun: vi.fn().mockResolvedValue(null), + kanbanListBoards: vi.fn().mockResolvedValue({ success: true, data: [] }), + kanbanListTasks: vi.fn().mockResolvedValue({ success: true, data: [] }), + kanbanGetTask: vi.fn(), + sendMessage: vi.fn().mockResolvedValue({ + response: "Done", + sessionId: "sess-1", + }), + abortChat: vi.fn().mockResolvedValue(undefined), + }, + }; +}); + +describe("ActiveWorkSurface", () => { + it("starts a goal through /goal and records it as active work", async () => { + render(); + fireEvent.change(await screen.findByLabelText("Goal"), { + target: { value: "Fix reports" }, + }); + fireEvent.click(screen.getByRole("button", { name: "Start goal" })); + + await waitFor(() => { + expect(window.hermesAPI.sendMessage).toHaveBeenCalledWith( + "/goal Fix reports", + undefined, + undefined, + undefined, + undefined, + undefined, + undefined, + expect.stringMatching(/^goal-/), + ); + }); + expect(window.hermesAPI.spsCreateActiveWorkRun).toHaveBeenCalledWith( + expect.objectContaining({ + source: "goal", + goal: "Fix reports", + }), + ); + }); +}); +``` + +If existing test setup already defines `window.hermesAPI`, use that project pattern instead of overwriting `window`. + +- [ ] **Step 2: Run test to verify failure** + +Run: + +```bash +npx vitest run src/renderer/src/screens/SpsAgent/activeWork/ActiveWorkSurface.test.tsx +``` + +Expected: FAIL because the form is not implemented yet. + +- [ ] **Step 3: Add form and send logic** + +In `ActiveWorkSurface`, add state: + +```tsx +const [goalText, setGoalText] = useState(""); +const [startingGoal, setStartingGoal] = useState(false); +``` + +Add function: + +```tsx +async function startGoal(): Promise { + const goal = goalText.trim(); + if (!goal) return; + setStartingGoal(true); + const clientRunId = `goal-${Date.now().toString(36)}`; + let activeId: string | null = null; + try { + const active = await window.hermesAPI.spsCreateActiveWorkRun({ + source: "goal", + title: `Goal: ${goal.length > 60 ? `${goal.slice(0, 60)}...` : goal}`, + goal, + clientRunId, + }); + activeId = active.id; + const result = await window.hermesAPI.sendMessage( + `/goal ${goal}`, + undefined, + undefined, + undefined, + undefined, + undefined, + undefined, + clientRunId, + ); + await window.hermesAPI.spsUpdateActiveWorkRun(active.id, { + status: "completed", + sessionId: result.sessionId, + summary: result.response?.slice(0, 500), + completedAt: Date.now(), + }); + setGoalText(""); + await refresh(); + } catch (err) { + if (activeId) { + await window.hermesAPI.spsUpdateActiveWorkRun(activeId, { + status: "failed", + error: err instanceof Error ? err.message : "Goal failed", + completedAt: Date.now(), + }); + } + } finally { + setStartingGoal(false); + } +} +``` + +Add JSX above Active Runs: + +```tsx +
+

Start Goal

+
+