diff --git a/crates/tui/src/commands/groups/memory/memory.rs b/crates/tui/src/commands/groups/memory/memory.rs index 0c9af71a6..fcd350ef9 100644 --- a/crates/tui/src/commands/groups/memory/memory.rs +++ b/crates/tui/src/commands/groups/memory/memory.rs @@ -23,7 +23,7 @@ use std::path::Path; use super::CommandResult; use crate::tui::app::App; -const MEMORY_USAGE: &str = "/memory [show|path|clear|edit|help]"; +const MEMORY_USAGE: &str = "/memory [show|path|clear|edit|tags|search |search --tag |help]"; fn memory_help(path: &Path) -> String { format!( @@ -31,18 +31,60 @@ fn memory_help(path: &Path) -> String { Usage: {MEMORY_USAGE}\n\n\ Current path: {}\n\n\ Subcommands:\n\ - /memory Show the resolved path and current contents\n\ - /memory show Alias for the no-arg form\n\ - /memory path Print just the resolved path\n\ - /memory clear Replace the file contents with an empty marker\n\ - /memory edit Print the editor command for this file\n\ - /memory help Show this help\n\n\ + /memory Show the resolved path and current contents\n\ + /memory show Alias for the no-arg form\n\ + /memory path Print just the resolved path\n\ + /memory clear Replace the file contents with an empty marker\n\ + /memory edit Print the editor command for this file\n\ + /memory tags List all tags with occurrence counts\n\ + /memory search Search memory by text (body + tags)\n\ + /memory search --tag Search memory by tag (exact match)\n\ + /memory help Show this help\n\n\ Quick capture: type `# foo` in the composer to append a timestamped\n\ bullet without firing a turn.", path.display() ) } +/// Split the argument into subcommand and remaining args. +fn split_subcommand(arg: Option<&str>) -> (&str, Option<&str>) { + match arg { + Some(a) => { + let trimmed = a.trim(); + match trimmed.find(char::is_whitespace) { + Some(pos) => (&trimmed[..pos], Some(trimmed[pos + 1..].trim_start())), + None => (trimmed, None), + } + } + None => ("show", None), + } +} + +fn render_entries(entries: &[&crate::memory::MemoryEntry], prefix: &str) -> String { + let mut lines = String::new(); + for entry in entries { + let _ = std::fmt::Write::write_fmt( + &mut lines, + format_args!("\n{prefix}- ({}) {}", entry.timestamp, entry.body), + ); + if !entry.tags.is_empty() { + let _ = std::fmt::Write::write_fmt( + &mut lines, + format_args!( + " {}", + entry + .tags + .iter() + .map(|t| format!("#{t}")) + .collect::>() + .join(" ") + ), + ); + } + } + lines +} + pub fn memory(app: &mut App, arg: Option<&str>) -> CommandResult { if !app.use_memory { return CommandResult::error( @@ -51,7 +93,7 @@ pub fn memory(app: &mut App, arg: Option<&str>) -> CommandResult { } let path = app.memory_path.clone(); - let sub = arg.unwrap_or("show").trim(); + let (sub, rest) = split_subcommand(arg); match sub { "" | "show" => { @@ -69,6 +111,65 @@ pub fn memory(app: &mut App, arg: Option<&str>) -> CommandResult { CommandResult::message(body) } "path" => CommandResult::message(path.display().to_string()), + "tags" => match fs::read_to_string(&path) { + Ok(content) => { + let tags = crate::memory::list_tags(&content); + if tags.is_empty() { + CommandResult::message("no tags found in memory file") + } else { + let mut lines = format!("Tags in {}:\n", path.display()); + for (i, (tag, count)) in tags.iter().enumerate() { + let _ = std::fmt::Write::write_fmt( + &mut lines, + format_args!("\n {}. #{} ({})", i + 1, tag, count), + ); + } + CommandResult::message(lines) + } + } + Err(_) => CommandResult::message(format!( + "{}\n(file does not exist yet)", + path.display() + )), + }, + "search" => { + let Some(query) = rest.filter(|r| !r.is_empty()) else { + return CommandResult::error( + "Usage: /memory search or /memory search --tag ", + ); + }; + let content = match fs::read_to_string(&path) { + Ok(c) => c, + Err(_) => { + return CommandResult::message(format!( + "memory file does not exist yet at {}", + path.display() + )); + } + }; + let entries = crate::memory::parse_all(&content); + + // Check for --tag flag + let results: Vec<&crate::memory::MemoryEntry> = if query.starts_with("--tag ") { + let tag = query.trim_start_matches("--tag ").trim(); + crate::memory::search_by_tags(&entries, &[tag]) + } else { + crate::memory::search_text(&entries, query) + }; + + if results.is_empty() { + CommandResult::message(format!( + "no memory entries matching \"{query}\"" + )) + } else { + let body = render_entries(&results, ""); + CommandResult::message(format!( + "{} matching entry(ies) for \"{query}\":{}", + results.len(), + body + )) + } + } "clear" => match fs::write(&path, "") { Ok(()) => CommandResult::message(format!("memory cleared: {}", path.display())), Err(err) => CommandResult::error(format!("failed to clear {}: {err}", path.display())), @@ -123,7 +224,7 @@ mod tests { let mut app = create_test_app_with_memory(&tmpdir, true); let result = memory(&mut app, Some("help")); let msg = result.message.expect("help should return text"); - assert!(msg.contains("Usage: /memory [show|path|clear|edit|help]")); + assert!(msg.contains("Usage: /memory [show|path|clear|edit|tags|search |search --tag |help]")); assert!(msg.contains("/memory edit")); assert!(msg.contains(app.memory_path.to_string_lossy().as_ref())); } diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index d55b9211a..bff53adfb 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -2917,6 +2917,44 @@ impl Engine { .working_set .pinned_message_indices(&self.session.messages, &self.session.workspace); + // Build memory context from user's memory file for enriched seams. + // Limit injected entries to prevent seam bloat. + const MAX_MEMORY_CONTEXT_ENTRIES: usize = 20; + let memory_context: Option = if self.config.memory_enabled { + let path = &self.config.memory_path; + std::fs::read_to_string(path).ok().map(|content| { + let index = crate::memory_index::MemoryIndex::from_content(&content); + if index.is_empty() { + return String::new(); + } + // Extract topic tags from the messages to be summarized + let recent_msgs: Vec<&crate::models::Message> = (0..msg_range_end) + .filter_map(|i| self.session.messages.get(i)) + .collect(); + let topics = crate::seam_manager::SeamManager::extract_topic_tags(&recent_msgs); + + let matched: Vec<&crate::memory::MemoryEntry> = if topics.is_empty() { + // No specific topics — include recent memory entries as general context + index.entries().iter().rev().take(MAX_MEMORY_CONTEXT_ENTRIES).collect() + } else { + let topic_refs: Vec<&str> = topics.iter().map(String::as_str).collect(); + let by_tag = index.search_by_tags(&topic_refs); + if by_tag.is_empty() { + index.entries().iter().rev().take(MAX_MEMORY_CONTEXT_ENTRIES).collect() + } else { + by_tag.into_iter().take(MAX_MEMORY_CONTEXT_ENTRIES).collect() + } + }; + matched + .iter() + .map(|e| format!("- ({}) {} #{}", e.timestamp, e.body, e.tags.join(" #"))) + .collect::>() + .join("\n") + }) + } else { + None + }; + let _ = self .tx_event .send(Event::status(format!( @@ -2935,6 +2973,7 @@ impl Engine { msg_range_end, Some(&self.session.workspace), &pinned, + memory_context.as_deref(), ) .await { @@ -2949,7 +2988,14 @@ impl Engine { .filter_map(|i| self.session.messages.get(i)) .collect(); match seam_mgr - .recompact(&existing_seams, &recent, level, 0, msg_range_end) + .recompact( + &existing_seams, + &recent, + level, + 0, + msg_range_end, + memory_context.as_deref(), + ) .await { Ok(text) => text, diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 6805a11f2..6dfde8732 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -52,6 +52,7 @@ mod lsp; mod mcp; mod mcp_server; mod memory; +mod memory_index; mod model_catalog; mod model_inventory; mod model_registry; diff --git a/crates/tui/src/memory.rs b/crates/tui/src/memory.rs index 65533bd92..df43508c9 100644 --- a/crates/tui/src/memory.rs +++ b/crates/tui/src/memory.rs @@ -21,6 +21,7 @@ //! That keeps existing users on zero-overhead behavior and makes the //! feature explicit. +use std::collections::HashMap; use std::fs; use std::io::{self, Write}; use std::path::Path; @@ -113,11 +114,35 @@ pub fn compose_block(enabled: bool, path: &Path) -> Option { as_system_block(&content, path) } +/// Parse `#tag` hashtags from a text string, returning them in order of +/// appearance. Duplicates are preserved as-is; the caller should deduplicate +/// if needed. +pub fn extract_tags(text: &str) -> Vec<&str> { + text.split_whitespace() + .filter(|w| w.starts_with('#') && w.len() > 1 && !w[1..].starts_with('#')) + .collect() +} + +/// Remove `#tag` hashtags from a text string, returning the cleaned text. +/// This is used to separate tags from the note body before storage. +fn strip_tags(text: &str) -> String { + text.split_whitespace() + .filter(|w| !(w.starts_with('#') && w.len() > 1 && !w[1..].starts_with('#'))) + .collect::>() + .join(" ") +} + /// Append `entry` to the memory file at `path`, creating it (and its /// parent directory) if needed. The entry is timestamped so the user can /// later see when each note was added. The leading `#` from a `# foo` /// quick-add is stripped so the file stays as readable Markdown. -pub fn append_entry(path: &Path, entry: &str) -> io::Result<()> { +/// +/// Tags are extracted from two sources: +/// 1. `#tag` hashtags found inline in the entry text +/// 2. The explicit `extra_tags` parameter +/// +/// All tags are deduplicated and appended as `#tag` suffixes on the bullet. +pub fn append_entry(path: &Path, entry: &str, extra_tags: &[&str]) -> io::Result<()> { let trimmed = entry.trim_start_matches('#').trim(); if trimmed.is_empty() { return Err(io::Error::new( @@ -126,6 +151,26 @@ pub fn append_entry(path: &Path, entry: &str) -> io::Result<()> { )); } + // Extract inline tags from the entry, then strip them from the body + let inline_tags = extract_tags(trimmed); + let body = strip_tags(trimmed); + let body = body.trim(); + if body.is_empty() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "memory entry has only tags, no content", + )); + } + + // Merge and deduplicate tags + let mut all_tags: Vec<&str> = Vec::new(); + for t in inline_tags.into_iter().chain(extra_tags.iter().copied()) { + let tag = t.trim_start_matches('#'); + if !tag.is_empty() && !all_tags.contains(&tag) { + all_tags.push(tag); + } + } + if let Some(parent) = path.parent() && !parent.as_os_str().is_empty() { @@ -133,14 +178,154 @@ pub fn append_entry(path: &Path, entry: &str) -> io::Result<()> { } let timestamp = Utc::now().format("%Y-%m-%d %H:%M UTC"); + let tag_str = if all_tags.is_empty() { + String::new() + } else { + format!(" {}", all_tags.iter().map(|t| format!("#{t}")).collect::>().join(" ")) + }; let mut file = fs::OpenOptions::new() .create(true) .append(true) .open(path)?; - writeln!(file, "- ({timestamp}) {trimmed}")?; + writeln!(file, "- ({timestamp}) {body}{tag_str}")?; Ok(()) } +/// A parsed memory entry with structured fields: timestamp, body text, +/// and a deduplicated list of tags (without leading `#`). +#[derive(Debug, Clone)] +pub struct MemoryEntry { + pub timestamp: String, + pub body: String, + pub tags: Vec, + #[allow(dead_code)] + pub raw: String, +} + +/// Parse a single memory line into structured components. +/// +/// Format: `- (2026-06-22 10:30 UTC) body text #tag1 #tag2` +/// +/// Returns `None` for lines that don't match the expected format (blank +/// lines, non-bullet text, free-form markdown, etc.). +pub fn parse_entry(line: &str) -> Option { + let line = line.trim(); + if !line.starts_with("- (") { + return None; + } + let close_paren = line.find(')')?; + let timestamp = line[3..close_paren].to_string(); + let rest = line[close_paren + 1..].trim(); + if rest.is_empty() { + return None; + } + let tag_strs = extract_tags(rest); + let body = strip_tags(rest); + let body = body.trim(); + if body.is_empty() { + return None; + } + let mut seen = Vec::new(); + let tags: Vec = tag_strs + .iter() + .map(|t| t.trim_start_matches('#').to_string()) + .filter(|t| { + if seen.contains(t) { + false + } else { + seen.push(t.clone()); + true + } + }) + .collect(); + Some(MemoryEntry { + timestamp, + body: body.to_string(), + tags, + raw: line.to_string(), + }) +} + +/// Parse all bullet entries from memory file content. Non-bullet lines +/// (blank lines, free-form markdown) are silently skipped. +pub fn parse_all(content: &str) -> Vec { + content.lines().filter_map(parse_entry).collect() +} + +/// List all unique tags with their occurrence counts, sorted by frequency +/// (most frequent first). Tags are returned without the leading `#`. +pub fn list_tags(content: &str) -> Vec<(String, usize)> { + let entries = parse_all(content); + let mut counts: HashMap = HashMap::new(); + for entry in &entries { + for tag in &entry.tags { + *counts.entry(tag.clone()).or_insert(0) += 1; + } + } + let mut result: Vec<_> = counts.into_iter().collect(); + result.sort_by(|a, b| b.1.cmp(&a.1)); + result +} + +/// Filter entries that match any of the given tags (OR logic). Tag +/// matching is case-insensitive and supports both `#tag` and `tag` forms. +pub fn search_by_tags<'a>(entries: &'a [MemoryEntry], tags: &[&str]) -> Vec<&'a MemoryEntry> { + if tags.is_empty() { + return entries.iter().collect(); + } + let normalized: Vec = tags + .iter() + .map(|t| t.trim_start_matches('#').to_lowercase()) + .collect(); + entries + .iter() + .filter(|e| normalized.iter().any(|t| e.tags.iter().any(|et| et.to_lowercase() == *t))) + .collect() +} + +/// Search entries by text content (case-insensitive substring match against +/// both body and tags). +pub fn search_text<'a>(entries: &'a [MemoryEntry], query: &str) -> Vec<&'a MemoryEntry> { + let q = query.to_lowercase(); + entries + .iter() + .filter(|e| { + e.body.to_lowercase().contains(&q) + || e.tags.iter().any(|t| t.to_lowercase().contains(&q)) + }) + .collect() +} + +/// Simple auto-tagging for entries that have no explicit tags. Extracts +/// capitalized words (potential proper nouns / technical terms) and +/// words containing special characters (camelCase, snake_case, etc.) +/// as candidate tags. Returns at most `max_tags` tags, sorted by quality. +pub fn auto_tag(text: &str, max_tags: usize) -> Vec { + let mut candidates: Vec = Vec::new(); + let mut seen = std::collections::HashSet::new(); + + for word in text.split_whitespace() { + let clean = word.trim_matches(|c: char| c.is_ascii_punctuation()); + if clean.len() < 3 || clean.chars().all(|c| c.is_ascii_digit()) { + continue; + } + // Capitalized words (proper nouns / technical terms) + if clean.starts_with(|c: char| c.is_uppercase()) { + let tag = clean.to_lowercase(); + if seen.insert(tag.clone()) { + candidates.push(tag); + } + } else if clean.contains(|c: char| !c.is_alphanumeric() && c != '\'') { + let tag = clean.to_lowercase(); + if seen.insert(tag.clone()) { + candidates.push(tag); + } + } + } + candidates.truncate(max_tags); + candidates +} + #[cfg(test)] mod tests { use super::*; @@ -249,7 +434,7 @@ mod tests { fn append_entry_creates_file_and_writes_one_bullet() { let tmp = tempdir().unwrap(); let path = tmp.path().join("memory.md"); - append_entry(&path, "# remember the milk").unwrap(); + append_entry(&path, "# remember the milk", &[]).unwrap(); let body = fs::read_to_string(&path).unwrap(); assert!(body.contains("remember the milk"), "{body}"); @@ -258,14 +443,16 @@ mod tests { "should start with bullet + date: {body}" ); assert!(body.trim_end().ends_with("remember the milk")); + // No tags appended + assert!(!body.contains('#'), "no tags expected: {body}"); } #[test] fn append_entry_appends_subsequent_lines() { let tmp = tempdir().unwrap(); let path = tmp.path().join("memory.md"); - append_entry(&path, "# first").unwrap(); - append_entry(&path, "second").unwrap(); + append_entry(&path, "# first", &[]).unwrap(); + append_entry(&path, "second", &[]).unwrap(); let body = fs::read_to_string(&path).unwrap(); assert!(body.contains("first")); assert!(body.contains("second")); @@ -277,7 +464,257 @@ mod tests { fn append_entry_rejects_empty_after_strip() { let tmp = tempdir().unwrap(); let path = tmp.path().join("memory.md"); - let err = append_entry(&path, "###").unwrap_err(); + let err = append_entry(&path, "###", &[]).unwrap_err(); assert_eq!(err.kind(), io::ErrorKind::InvalidInput); } + + #[test] + fn append_entry_stores_inline_tags() { + let tmp = tempdir().unwrap(); + let path = tmp.path().join("memory.md"); + append_entry(&path, "# use 4 spaces #indentation #rust", &[]).unwrap(); + let body = fs::read_to_string(&path).unwrap(); + assert!(body.contains("use 4 spaces"), "{body}"); + assert!(body.contains("#indentation"), "{body}"); + assert!(body.contains("#rust"), "{body}"); + // Tags appear as suffix after body, not inline within the body text + assert!( + body.contains("use 4 spaces #indentation"), + "tags should be appended as suffix: {body}" + ); + } + + #[test] + fn append_entry_merges_extra_tags_with_inline_tags() { + let tmp = tempdir().unwrap(); + let path = tmp.path().join("memory.md"); + append_entry(&path, "use tabs #preference", &["editor", "preference"]).unwrap(); + let body = fs::read_to_string(&path).unwrap(); + assert!(body.contains("use tabs"), "{body}"); + assert!(body.contains("#preference"), "{body}"); + assert!(body.contains("#editor"), "{body}"); + } + + #[test] + fn append_entry_deduplicates_tags() { + let tmp = tempdir().unwrap(); + let path = tmp.path().join("memory.md"); + append_entry(&path, "note #dupe", &["dupe", "unique"]).unwrap(); + let body = fs::read_to_string(&path).unwrap(); + // "#dupe" should appear only once + assert_eq!(body.matches("#dupe").count(), 1, "duplicate tag: {body}"); + assert!(body.contains("#unique"), "{body}"); + } + + #[test] + fn append_entry_rejects_only_tags_no_body() { + let tmp = tempdir().unwrap(); + let path = tmp.path().join("memory.md"); + let err = append_entry(&path, "# #tag #only", &[]).unwrap_err(); + assert_eq!(err.kind(), io::ErrorKind::InvalidInput); + } + + #[test] + fn extract_tags_parses_hashtags() { + let tags = extract_tags("hello #world this #is #a test"); + assert_eq!(tags, vec!["#world", "#is", "#a"]); + } + + #[test] + fn extract_tags_ignores_double_hash() { + let tags = extract_tags("hello ##world #valid"); + assert_eq!(tags, vec!["#valid"]); + } + + #[test] + fn extract_tags_returns_empty_for_no_tags() { + let tags = extract_tags("hello world"); + assert!(tags.is_empty()); + } + + // === parse_entry / parse_all === + + #[test] + fn parse_entry_parses_standard_bullet() { + let entry = parse_entry("- (2026-06-22 10:30 UTC) remember the milk #chore").unwrap(); + assert_eq!(entry.timestamp, "2026-06-22 10:30 UTC"); + assert_eq!(entry.body, "remember the milk"); + assert_eq!(entry.tags, vec!["chore"]); + } + + #[test] + fn parse_entry_returns_none_for_non_bullet() { + assert!(parse_entry("free form text").is_none()); + assert!(parse_entry("").is_none()); + assert!(parse_entry(" ").is_none()); + } + + #[test] + fn parse_entry_handles_multi_tag() { + let entry = + parse_entry("- (2026-06-22 10:30 UTC) use 4 spaces #indentation #rust #style").unwrap(); + assert_eq!(entry.body, "use 4 spaces"); + assert_eq!(entry.tags, vec!["indentation", "rust", "style"]); + } + + #[test] + fn parse_entry_deduplicates_tags() { + let entry = + parse_entry("- (2026-06-22 10:30 UTC) note #dupe #unique #dupe").unwrap(); + assert_eq!(entry.tags, vec!["dupe", "unique"]); + } + + #[test] + fn parse_entry_handles_no_tags() { + let entry = parse_entry("- (2026-06-22 10:30 UTC) plain note").unwrap(); + assert_eq!(entry.body, "plain note"); + assert!(entry.tags.is_empty()); + } + + #[test] + fn parse_all_skips_non_bullet_lines() { + let content = "\ +- (2026-06-22 10:30 UTC) first #tag1 +some free text +- (2026-06-22 11:00 UTC) second #tag2 + +- (2026-06-22 12:00 UTC) third #tag3"; + let entries = parse_all(content); + assert_eq!(entries.len(), 3); + assert_eq!(entries[0].body, "first"); + assert_eq!(entries[1].body, "second"); + assert_eq!(entries[2].body, "third"); + } + + #[test] + fn parse_all_returns_empty_for_empty_content() { + assert!(parse_all("").is_empty()); + assert!(parse_all(" \n\n ").is_empty()); + } + + // === list_tags === + + #[test] + fn list_tags_returns_sorted_counts() { + let content = "\ +- (2026-06-22 10:00 UTC) a #rust #cli +- (2026-06-22 11:00 UTC) b #rust #web +- (2026-06-22 12:00 UTC) c #cli"; + let tags = list_tags(content); + assert_eq!(tags.len(), 3); + // Most frequent first + assert!(tags[0].0 == "rust" || tags[0].0 == "cli"); + assert_eq!(tags.iter().find(|(t, _)| t == "rust").unwrap().1, 2); + assert_eq!(tags.iter().find(|(t, _)| t == "cli").unwrap().1, 2); + assert_eq!(tags.iter().find(|(t, _)| t == "web").unwrap().1, 1); + } + + #[test] + fn list_tags_returns_empty_when_no_entries() { + assert!(list_tags("").is_empty()); + } + + // === search_by_tags === + + #[test] + fn search_by_tags_finds_matching_entries() { + let entries = parse_all( + "\ +- (2026-06-22 10:00 UTC) first #rust +- (2026-06-22 11:00 UTC) second #python +- (2026-06-22 12:00 UTC) third #rust #web", + ); + let results = search_by_tags(&entries, &["rust"]); + assert_eq!(results.len(), 2); + assert!(results.iter().any(|e| e.body == "first")); + assert!(results.iter().any(|e| e.body == "third")); + } + + #[test] + fn search_by_tags_accepts_hash_prefix() { + let entries = parse_all("- (2026-06-22 10:00 UTC) note #mytag"); + let results = search_by_tags(&entries, &["#mytag"]); + assert_eq!(results.len(), 1); + } + + #[test] + fn search_by_tags_or_logic() { + let entries = parse_all( + "\ +- (2026-06-22 10:00 UTC) first #rust +- (2026-06-22 11:00 UTC) second #python", + ); + let results = search_by_tags(&entries, &["rust", "python"]); + assert_eq!(results.len(), 2); + } + + #[test] + fn search_by_tags_returns_all_when_empty() { + let entries = parse_all( + "\ +- (2026-06-22 10:00 UTC) first #rust +- (2026-06-22 11:00 UTC) second #python", + ); + let results = search_by_tags(&entries, &[]); + assert_eq!(results.len(), 2); + } + + #[test] + fn search_by_tags_no_match() { + let entries = parse_all("- (2026-06-22 10:00 UTC) note #rust"); + let results = search_by_tags(&entries, &["nonexistent"]); + assert!(results.is_empty()); + } + + // === search_text === + + #[test] + fn search_text_case_insensitive() { + let entries = parse_all("- (2026-06-22 10:00 UTC) Use Four Spaces"); + let results = search_text(&entries, "four"); + assert_eq!(results.len(), 1); + let results = search_text(&entries, "FOUR"); + assert_eq!(results.len(), 1); + } + + #[test] + fn search_text_matches_tags() { + let entries = parse_all("- (2026-06-22 10:00 UTC) note #indentation"); + let results = search_text(&entries, "indentation"); + assert_eq!(results.len(), 1); + } + + #[test] + fn search_text_no_match() { + let entries = parse_all("- (2026-06-22 10:00 UTC) note #rust"); + let results = search_text(&entries, "python"); + assert!(results.is_empty()); + } + + // === auto_tag === + + #[test] + fn auto_tag_extracts_capitalized_words() { + let tags = auto_tag("use DeepSeek V4 in CodeWhale", 5); + assert!(tags.contains(&"deepseek".to_string())); + assert!(tags.contains(&"codewhale".to_string())); + } + + #[test] + fn auto_tag_handles_snake_case() { + let tags = auto_tag("check the memory_manager config", 5); + assert!(tags.contains(&"memory_manager".to_string())); + } + + #[test] + fn auto_tag_respects_max_tags() { + let tags = auto_tag("Foo Bar Baz Qux Quux", 3); + assert_eq!(tags.len(), 3); + } + + #[test] + fn auto_tag_returns_empty_for_no_candidates() { + let tags = auto_tag("a be in it", 5); + assert!(tags.is_empty()); + } } diff --git a/crates/tui/src/memory_index.rs b/crates/tui/src/memory_index.rs new file mode 100644 index 000000000..3613ee5e5 --- /dev/null +++ b/crates/tui/src/memory_index.rs @@ -0,0 +1,360 @@ +use std::collections::{HashMap, HashSet}; + +use crate::memory::MemoryEntry; + +/// Lightweight inverted index over memory entries. +/// +/// Maintains two indices: +/// - **Tag index**: maps each tag (lowercased, without `#`) to the set of +/// entry indices that carry it. +/// - **Full-text index**: maps each word (lowercased) to the set of entry +/// indices whose body or tags contain it. +/// +/// The index is rebuilt from scratch each time the memory file changes, +/// keeping the implementation simple and avoiding stale-entry bugs. +pub struct MemoryIndex { + /// Entries in display order (oldest first). + entries: Vec, + /// Inverted index: tag → entry indices. + tag_index: HashMap>, + /// Full-text index: word → entry indices. + text_index: HashMap>, +} + +impl MemoryIndex { + /// Build an index from parsed memory entries. + #[must_use] + pub fn build(entries: Vec) -> Self { + let mut tag_index: HashMap> = HashMap::new(); + let mut text_index: HashMap> = HashMap::new(); + + for (i, entry) in entries.iter().enumerate() { + // Index tags + for tag in &entry.tags { + let key = tag.to_lowercase(); + tag_index.entry(key).or_default().push(i); + } + // Index body words + for word in entry.body.split_whitespace() { + let clean: String = word + .chars() + .filter(|c| c.is_alphanumeric() || *c == '_' || *c == '-') + .collect(); + if clean.len() >= 2 { + text_index.entry(clean.to_lowercase()).or_default().push(i); + } + } + // Index tag strings as text too + for tag in &entry.tags { + for word in tag.split(|c: char| !c.is_alphanumeric()) { + if word.len() >= 2 { + text_index + .entry(word.to_lowercase()) + .or_default() + .push(i); + } + } + } + } + + // Deduplicate index entries (same entry may contribute a word multiple times) + for indices in tag_index.values_mut() { + indices.sort_unstable(); + indices.dedup(); + } + for indices in text_index.values_mut() { + indices.sort_unstable(); + indices.dedup(); + } + + Self { + entries, + tag_index, + text_index, + } + } + + /// Rebuild the index from memory file content. + #[must_use] + pub fn from_content(content: &str) -> Self { + Self::build(crate::memory::parse_all(content)) + } + + /// Return a reference to the underlying entries. + #[must_use] + pub fn entries(&self) -> &[MemoryEntry] { + &self.entries + } + + /// Search by tags (OR logic — any matching tag). Returns matching + /// entries in display order. + #[must_use] + pub fn search_by_tags(&self, tags: &[&str]) -> Vec<&MemoryEntry> { + if tags.is_empty() { + return self.entries.iter().collect(); + } + let mut matched = HashSet::new(); + for tag in tags { + let key = tag.trim_start_matches('#').to_lowercase(); + if let Some(indices) = self.tag_index.get(&key) { + for &i in indices { + matched.insert(i); + } + } + } + let mut indices: Vec = matched.into_iter().collect(); + indices.sort_unstable(); + indices.iter().map(|&i| &self.entries[i]).collect() + } + + /// Full-text search (AND logic — all query words must match). Returns + /// matching entries in display order. + #[must_use] + pub fn search_text(&self, query: &str) -> Vec<&MemoryEntry> { + let words: Vec = query + .split_whitespace() + .filter_map(|w| { + let clean: String = w + .chars() + .filter(|c| c.is_alphanumeric() || *c == '_' || *c == '-') + .collect(); + if clean.len() >= 2 { + Some(clean.to_lowercase()) + } else { + None + } + }) + .collect(); + + if words.is_empty() { + return self.entries.iter().collect(); + } + + // Find intersection of all word matches + let mut result: Option> = None; + for word in &words { + if let Some(indices) = self.text_index.get(word) { + let set: HashSet = indices.iter().copied().collect(); + result = match result { + Some(existing) => Some(existing.intersection(&set).copied().collect()), + None => Some(set), + }; + } else { + // A required word has no matches → empty result + return Vec::new(); + } + } + + let mut indices: Vec = result.unwrap_or_default().into_iter().collect(); + indices.sort_unstable(); + indices.iter().map(|&i| &self.entries[i]).collect() + } + + /// Combined search: filter by tags (OR) and text (AND). + /// Returns entries that match both criteria. + #[must_use] + pub fn search(&self, tags: &[&str], text: Option<&str>) -> Vec<&MemoryEntry> { + if tags.is_empty() && text.is_none() { + return self.entries.iter().collect(); + } + + let entry_set: HashSet = (0..self.entries.len()).collect(); + + let tag_indices: HashSet = if tags.is_empty() { + entry_set.clone() + } else { + let mut s = HashSet::new(); + for tag in tags { + let key = tag.trim_start_matches('#').to_lowercase(); + if let Some(indices) = self.tag_index.get(&key) { + for &i in indices { + s.insert(i); + } + } + } + s + }; + + let text_indices: HashSet = if let Some(query) = text { + let words: Vec = query + .split_whitespace() + .filter_map(|w| { + let clean: String = w + .chars() + .filter(|c| c.is_alphanumeric() || *c == '_' || *c == '-') + .collect(); + if clean.len() >= 2 { + Some(clean.to_lowercase()) + } else { + None + } + }) + .collect(); + if words.is_empty() { + entry_set + } else { + let mut result: Option> = None; + for word in &words { + if let Some(indices) = self.text_index.get(word) { + let set: HashSet = indices.iter().copied().collect(); + result = Some(match result { + Some(existing) => existing.intersection(&set).copied().collect(), + None => set, + }); + } else { + return Vec::new(); + } + } + result.unwrap_or_default() + } + } else { + entry_set + }; + + let mut indices: Vec = tag_indices + .intersection(&text_indices) + .copied() + .collect(); + indices.sort_unstable(); + indices.iter().map(|&i| &self.entries[i]).collect() + } + + /// Get all unique tags with their occurrence counts, sorted by + /// frequency (most frequent first). + #[must_use] + pub fn all_tags(&self) -> Vec<(String, usize)> { + let mut counts: HashMap = HashMap::new(); + for (_tag, indices) in &self.tag_index { + counts.insert(_tag.clone(), indices.len()); + } + let mut result: Vec<_> = counts.into_iter().collect(); + result.sort_by(|a, b| b.1.cmp(&a.1)); + result + } + + /// Number of entries in the index. + #[must_use] + pub fn len(&self) -> usize { + self.entries.len() + } + + /// Whether the index is empty. + #[must_use] + pub fn is_empty(&self) -> bool { + self.entries.is_empty() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn sample_entries() -> Vec { + crate::memory::parse_all( + "\ +- (2026-06-22 10:00 UTC) first entry about Rust #rust +- (2026-06-22 11:00 UTC) python web framework #python #web +- (2026-06-22 12:00 UTC) rust cli tooling #rust #cli +- (2026-06-22 13:00 UTC) web design patterns #web", + ) + } + + #[test] + fn index_builds_from_entries() { + let index = MemoryIndex::build(sample_entries()); + assert_eq!(index.len(), 4); + assert!(!index.is_empty()); + } + + #[test] + fn index_from_content_parses_and_indexes() { + let index = MemoryIndex::from_content( + "- (2026-06-22 10:00 UTC) test entry #test", + ); + assert_eq!(index.len(), 1); + let tags = index.all_tags(); + assert_eq!(tags.len(), 1); + assert_eq!(tags[0].0, "test"); + } + + #[test] + fn search_by_tags_or() { + let index = MemoryIndex::build(sample_entries()); + let results = index.search_by_tags(&["rust"]); + assert_eq!(results.len(), 2); + assert!(results.iter().any(|e| e.body.contains("first entry"))); + assert!(results.iter().any(|e| e.body.contains("cli tooling"))); + } + + #[test] + fn search_by_tags_multiple() { + let index = MemoryIndex::build(sample_entries()); + let results = index.search_by_tags(&["python", "cli"]); + assert_eq!(results.len(), 2); + } + + #[test] + fn search_by_tags_empty_returns_all() { + let index = MemoryIndex::build(sample_entries()); + let results = index.search_by_tags(&[]); + assert_eq!(results.len(), 4); + } + + #[test] + fn search_text_and() { + let index = MemoryIndex::build(sample_entries()); + // "rust framework" → intersection: entry 0 has "rust", entry 1 has "framework" + // None should have both + let results = index.search_text("rust framework"); + assert!(results.is_empty()); + } + + #[test] + fn search_text_single_word() { + let index = MemoryIndex::build(sample_entries()); + let results = index.search_text("python"); + assert_eq!(results.len(), 1); + } + + #[test] + fn search_text_case_insensitive() { + let index = MemoryIndex::build(sample_entries()); + let results = index.search_text("RUST"); + assert_eq!(results.len(), 2); + } + + #[test] + fn search_combined() { + let index = MemoryIndex::build(sample_entries()); + let results = index.search(&["web"], Some("patterns")); + assert_eq!(results.len(), 1); + assert!(results[0].body.contains("design patterns")); + } + + #[test] + fn search_no_match() { + let index = MemoryIndex::build(sample_entries()); + let results = index.search(&["nonexistent"], None); + assert!(results.is_empty()); + } + + #[test] + fn all_tags_sorted_by_frequency() { + let index = MemoryIndex::build(sample_entries()); + let tags = index.all_tags(); + assert!(tags.iter().any(|(t, _)| t == "rust")); + assert!(tags.iter().any(|(t, _)| t == "web")); + assert!(tags.iter().any(|(t, _)| t == "python")); + assert!(tags.iter().any(|(t, _)| t == "cli")); + } + + #[test] + fn empty_index() { + let index = MemoryIndex::build(vec![]); + assert!(index.is_empty()); + assert_eq!(index.len(), 0); + assert!(index.all_tags().is_empty()); + assert!(index.search_by_tags(&["anything"]).is_empty()); + assert!(index.search_text("anything").is_empty()); + } +} diff --git a/crates/tui/src/seam_manager.rs b/crates/tui/src/seam_manager.rs index e59575321..6952dfcbc 100644 --- a/crates/tui/src/seam_manager.rs +++ b/crates/tui/src/seam_manager.rs @@ -104,6 +104,10 @@ pub struct SeamMetadata { /// Model that produced it. #[allow(dead_code)] pub model: String, + /// Tags inferred from the summarized conversation segment. + /// Used to cross-reference with user memory for enriched summaries. + #[allow(dead_code)] + pub tags: Vec, } /// The Flash seam manager — produces `` blocks. @@ -156,8 +160,44 @@ impl SeamManager { message_count.saturating_sub(verbatim_messages) } + /// Extract topic tags from a set of messages. + /// Scans message content for capitalized technical terms, file paths, + /// and common topic patterns. Used to cross-reference with user memory. + #[must_use] + pub fn extract_topic_tags(messages: &[&Message]) -> Vec { + let mut candidates: Vec = Vec::new(); + let mut seen = std::collections::HashSet::new(); + for msg in messages { + for block in &msg.content { + if let ContentBlock::Text { text, .. } = block { + for word in text.split_whitespace() { + let clean = word.trim_matches(|c: char| c.is_ascii_punctuation()); + if clean.len() < 4 { + continue; + } + // Capitalized words (technologies, frameworks, languages) + if clean.starts_with(|c: char| c.is_uppercase()) + && !clean.starts_with(|c: char| c.is_ascii_digit()) + { + let lower = clean.to_lowercase(); + if seen.insert(lower.clone()) { + candidates.push(lower); + } + } + } + } + } + } + candidates.truncate(10); + candidates + } + /// Produce a soft seam for the given message range and level. /// + /// `memory_context` is optional — when provided, it is included in the + /// summarization prompt so the seam can reference user memory preferences + /// relevant to the conversation segment. + /// /// Returns the `` XML block as a string, ready to /// be appended as an assistant message. pub async fn produce_soft_seam( @@ -168,6 +208,7 @@ impl SeamManager { end_idx: usize, workspace: Option<&Path>, pinned_indices: &[usize], + memory_context: Option<&str>, ) -> Result { if messages.is_empty() || start_idx >= end_idx { return Ok(String::new()); @@ -203,8 +244,11 @@ impl SeamManager { return Ok(String::new()); } + // Extract topic tags for seam metadata + let topic_tags = Self::extract_topic_tags(&to_summarize); + let summary = self - .summarize_messages(&to_summarize, level, start_idx, end_idx) + .summarize_messages(&to_summarize, level, start_idx, end_idx, memory_context) .await?; let density_label = match level { @@ -227,6 +271,7 @@ impl SeamManager { token_estimate, timestamp, model: self.config.seam_model.clone(), + tags: topic_tags, }); } @@ -243,6 +288,9 @@ impl SeamManager { /// Re-compact existing seams into a higher-level block. Consumes prior /// `` content and fuses it with new messages. + /// + /// `memory_context` is optional user-memory context to include in the + /// recompaction prompt for enriched seam quality. pub async fn recompact( &self, existing_seams: &[String], @@ -250,6 +298,7 @@ impl SeamManager { level: u8, start_idx: usize, end_idx: usize, + memory_context: Option<&str>, ) -> Result { let mut input = String::from( "## Prior Context Summaries\n\n\ @@ -279,6 +328,11 @@ impl SeamManager { _ => (L3_MAX_TOKENS, 400), }; + let memory_section = memory_context + .filter(|c| !c.trim().is_empty()) + .map(|c| format!("\n\n## Relevant User Memory\n\n{c}\n")) + .unwrap_or_default(); + let request = MessageRequest { model: self.config.seam_model.clone(), messages: vec![Message { @@ -289,7 +343,7 @@ impl SeamManager { Preserve: decisions made, file paths, error messages, \ constraints, hypotheses, open questions, and task state. \ Drop: greeting, filler, repeated information. \ - Keep it under {word_limit} words.\n\n{input}" + Keep it under {word_limit} words.\n\n{input}{memory_section}" ), cache_control: None, }], @@ -329,6 +383,9 @@ impl SeamManager { let token_estimate = summary.len() / 4; let timestamp = Utc::now(); + // Extract topic tags from recompacted messages + let topic_tags = Self::extract_topic_tags(new_messages); + // Record this recompacted seam. { let mut seams = self.active_seams.lock().await; @@ -339,6 +396,7 @@ impl SeamManager { token_estimate, timestamp, model: self.config.seam_model.clone(), + tags: topic_tags, }); } @@ -359,6 +417,7 @@ impl SeamManager { level: u8, start_idx: usize, end_idx: usize, + memory_context: Option<&str>, ) -> Result { let mut conversation = String::new(); @@ -392,6 +451,11 @@ impl SeamManager { } } + let memory_section = memory_context + .filter(|c| !c.trim().is_empty()) + .map(|c| format!("\n\n## Relevant User Memory\n\nConsider these user preferences and conventions from the user's memory file (they may be relevant to the conversation segment):\n\n{c}\n")) + .unwrap_or_default(); + let (max_tokens, word_limit) = match level { 1 => (L1_MAX_TOKENS, 800), 2 => (L2_MAX_TOKENS, 600), @@ -410,7 +474,7 @@ impl SeamManager { command invocations, error messages, tool-result facts, constraints \ discovered, hypotheses being tested, and open questions. \ Drop: greetings, filler, repeated information, and thinking blocks. \ - Keep it under {word_limit} words.\n\n---\n\n{conversation}" + Keep it under {word_limit} words.{memory_section}\n\n---\n\n{conversation}" ), cache_control: None, }], diff --git a/crates/tui/src/tools/remember.rs b/crates/tui/src/tools/remember.rs index 05b6ff5dd..612b05300 100644 --- a/crates/tui/src/tools/remember.rs +++ b/crates/tui/src/tools/remember.rs @@ -43,6 +43,11 @@ impl ToolSpec for RememberTool { "note": { "type": "string", "description": "The single-sentence durable note to remember." + }, + "tags": { + "type": "array", + "items": { "type": "string", "description": "A hashtag (with or without leading #)" }, + "description": "Optional tags to attach to this entry for future retrieval. Use tags like \"project:codewhale\", \"type:preference\", or \"scope:config\"." } }, "required": ["note"] @@ -69,13 +74,49 @@ impl ToolSpec for RememberTool { ) })?; - crate::memory::append_entry(path, note).map_err(|err| { + // Extract optional tags, normalizing leading # + let tags: Vec = input + .get("tags") + .and_then(|v| v.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str()) + .map(|t| { + let trimmed = t.trim().trim_start_matches('#'); + if trimmed.is_empty() { String::new() } else { trimmed.to_string() } + }) + .filter(|t| !t.is_empty()) + .collect() + }) + .unwrap_or_default(); + + // Auto-tag when the model didn't provide explicit tags + let final_tags: Vec = if tags.is_empty() { + crate::memory::auto_tag(note, 5) + } else { + tags + }; + let tag_refs: Vec<&str> = final_tags.iter().map(String::as_str).collect(); + crate::memory::append_entry(path, note, &tag_refs).map_err(|err| { ToolError::execution_failed(format!("failed to append to {}: {err}", path.display())) })?; + let tag_msg = if final_tags.is_empty() { + String::new() + } else { + format!( + " [{}]", + final_tags + .iter() + .map(|t| format!("#{t}")) + .collect::>() + .join(" ") + ) + }; Ok(ToolResult::success(format!( - "remembered: {}", - note.trim_start_matches('#').trim() + "remembered: {}{}", + note.trim_start_matches('#').trim(), + tag_msg ))) } } @@ -135,4 +176,101 @@ mod tests { let err = tool.execute(json!({}), &ctx).await.unwrap_err(); assert!(err.to_string().to_lowercase().contains("note"), "{err}"); } + + #[tokio::test] + async fn appends_with_tags() { + let tmp = tempdir().unwrap(); + let path = tmp.path().join("memory.md"); + let ctx = ctx_with_memory(path.clone()); + + let tool = RememberTool; + let result = tool + .execute(json!({"note": "use 4 spaces", "tags": ["indentation", "rust"]}), &ctx) + .await + .expect("ok"); + assert!(result.success); + + let body = std::fs::read_to_string(&path).expect("read"); + assert!(body.contains("use 4 spaces"), "{body}"); + assert!(body.contains("#indentation"), "{body}"); + assert!(body.contains("#rust"), "{body}"); + } + + #[tokio::test] + async fn appends_with_tags_normalizes_leading_hash() { + let tmp = tempdir().unwrap(); + let path = tmp.path().join("memory.md"); + let ctx = ctx_with_memory(path.clone()); + + let tool = RememberTool; + let result = tool + .execute(json!({"note": "prefer tabs", "tags": ["#indentation", " #spacing"]}), &ctx) + .await + .expect("ok"); + assert!(result.success); + + let body = std::fs::read_to_string(&path).expect("read"); + assert!(body.contains("#indentation"), "{body}"); + assert!(body.contains("#spacing"), "{body}"); + } + + #[tokio::test] + async fn appends_with_empty_tags_skips() { + let tmp = tempdir().unwrap(); + let path = tmp.path().join("memory.md"); + let ctx = ctx_with_memory(path.clone()); + + let tool = RememberTool; + let result = tool + .execute(json!({"note": "bare note", "tags": []}), &ctx) + .await + .expect("ok"); + assert!(result.success); + + let body = std::fs::read_to_string(&path).expect("read"); + assert!(body.contains("bare note"), "{body}"); + assert!(!body.contains('#'), "no tag char expected: {body}"); + } + + #[tokio::test] + async fn auto_tags_when_no_tags_provided() { + let tmp = tempdir().unwrap(); + let path = tmp.path().join("memory.md"); + let ctx = ctx_with_memory(path.clone()); + + let tool = RememberTool; + let result = tool + .execute(json!({"note": "Use CodeWhale with DeepSeek V4"}), &ctx) + .await + .expect("ok"); + assert!(result.success); + // auto_tag should extract "codewhale" and "deepseek" from capitalized words + assert!(result.content.contains("#codewhale"), "result: {}", result.content); + assert!(result.content.contains("#deepseek"), "result: {}", result.content); + + let body = std::fs::read_to_string(&path).expect("read"); + assert!(body.contains("Use CodeWhale with DeepSeek V4"), "{body}"); + assert!(body.contains("#codewhale"), "{body}"); + assert!(body.contains("#deepseek"), "{body}"); + } + + #[tokio::test] + async fn explicit_tags_override_auto_tag() { + let tmp = tempdir().unwrap(); + let path = tmp.path().join("memory.md"); + let ctx = ctx_with_memory(path.clone()); + + let tool = RememberTool; + let result = tool + .execute(json!({"note": "Use CodeWhale", "tags": ["manual"]}), &ctx) + .await + .expect("ok"); + assert!(result.success); + // Should NOT auto-tag since explicit tags were provided + assert!(result.content.contains("#manual"), "result: {}", result.content); + assert!(!result.content.contains("#codewhale"), "should not auto-tag: {}", result.content); + + let body = std::fs::read_to_string(&path).expect("read"); + assert!(body.contains("#manual"), "{body}"); + } } diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 8e24d8e8b..2cd722c10 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -1097,9 +1097,24 @@ fn is_memory_quick_add(input: &str) -> bool { /// memory directory becomes visible without crashing the composer. fn handle_memory_quick_add(app: &mut App, input: &str, config: &Config) { let path = config.memory_path(); - match crate::memory::append_entry(&path, input) { + let inline_tags = crate::memory::extract_tags(input); + match crate::memory::append_entry(&path, input, &[]) { Ok(()) => { - app.status_message = Some(format!("memory: appended to {}", path.display())); + let tag_hint = if inline_tags.is_empty() { + let note = input.trim_start_matches('#').trim(); + let auto_tags = crate::memory::auto_tag(note, 3); + if auto_tags.is_empty() { + String::new() + } else { + format!( + " [auto: {}]", + auto_tags.iter().map(|t| format!("#{t}")).collect::>().join(" ") + ) + } + } else { + format!(" [{}]", inline_tags.join(" ")) + }; + app.status_message = Some(format!("memory: appended to {}{}", path.display(), tag_hint)); } Err(err) => { app.status_message = Some(format!( diff --git a/integrations/wecom-bridge/.env.example b/integrations/wecom-bridge/.env.example new file mode 100644 index 000000000..cb3d774ae --- /dev/null +++ b/integrations/wecom-bridge/.env.example @@ -0,0 +1,38 @@ +# ============================================================================= +# WeCom (企业微信) Bridge — 环境变量模板 +# ============================================================================= +# +# 此 bridge 使用企业微信智能机器人长连接模式(WebSocket), +# 无需公网 IP。在企业微信管理后台创建"智能机器人", +# 获取 BotID 和 Secret 后填入。 + +# CodeWhale Runtime 连接 +CODEWHALE_RUNTIME_URL=http://127.0.0.1:7878 +CODEWHALE_RUNTIME_TOKEN=replace-with-long-random-token +CODEWHALE_WORKSPACE=/opt/codewhale +CODEWHALE_MODEL=auto +CODEWHALE_MODE=agent +CODEWHALE_ALLOW_SHELL=true +CODEWHALE_TRUST_MODE=false +CODEWHALE_AUTO_APPROVE=false + +# 企业微信智能机器人凭证(必填) +# 在企业微信管理后台 -> 应用管理 -> 智能机器人 -> 创建/编辑 -> 查看 BotID 和 Secret +WECOM_BOT_ID=your-bot-id +WECOM_BOT_SECRET=your-bot-secret + +# 逗号分隔的允许用户 UserID(可在企业微信通讯录中查看) +# 首次配对时设为空并用 WECOM_ALLOW_UNLISTED=true +WECOM_CHAT_ALLOWLIST= +WECOM_ALLOW_UNLISTED=false + +# 状态持久化路径 +WECOM_STATE_DIR=/var/lib/codewhale-wecom-bridge +WECOM_THREAD_MAP_PATH=/var/lib/codewhale-wecom-bridge/thread-map.json + +# 消息配置 +WECOM_MAX_REPLY_CHARS=3500 +CODEWHALE_TURN_TIMEOUT_MS=900000 + +# 企业微信 API 基础地址(一般不需要修改) +WECOM_API_BASE_URL=https://qyapi.weixin.qq.com diff --git a/integrations/wecom-bridge/README.md b/integrations/wecom-bridge/README.md new file mode 100644 index 000000000..a2396fa53 --- /dev/null +++ b/integrations/wecom-bridge/README.md @@ -0,0 +1,100 @@ +# WeCom (企业微信) Bridge + +此 bridge 让**企业微信**用户通过智能机器人长连接控制本地 `codewhale serve --http` runtime。 +使用企业微信智能机器人 API(长连接/WebSocket 模式),无需公网 IP。 + +与 `integrations/weixin-bridge`(个人微信 iLink Bot 协议)不同,此 bridge 面向企业微信组织内部使用, +通过 BotID + Secret 认证,支持企业通讯录权限管理。 + +## 安全模型 + +- `codewhale serve --http` 绑定于 `127.0.0.1`。 +- `/v1/*` runtime 调用使用 `CODEWHALE_RUNTIME_TOKEN`。 +- 企业微信用户必须加入白名单(`WECOM_CHAT_ALLOWLIST`),除非首次配对时设置 `WECOM_ALLOW_UNLISTED=true`。 +- 支持私聊和群聊(群聊需要前缀 `/ds`)。 +- 工具审批通过文本命令:`/allow ` 或 `/deny `。 +- 长连接模式无需公网端口。 + +## 前提 + +1. 拥有企业微信管理员权限 +2. 在企业微信管理后台创建一个**智能机器人**(工作台 → 智能机器人 → 创建机器人) +3. 选择 **API 模式**,获取 BotID 和 Secret +4. (可选)配置机器人接收消息的格式 + +## 设置 + +```bash +cd /opt/codewhale/wecom-bridge +npm install --omit=dev +cp .env.example /etc/codewhale/wecom-bridge.env +sudoedit /etc/codewhale/wecom-bridge.env +node src/index.mjs +``` + +启动后 bridge 会自动建立 WebSocket 长连接,无需额外配置。 + +## 命令 + +| 命令 | 说明 | +|------|------| +| `/help` | 显示帮助 | +| `/status` | runtime 和工作区状态 | +| `/threads` | 最近的 runtime 线程 | +| `/new` | 为此聊天创建新线程 | +| `/resume ` | 绑定到此聊天的现有线程 | +| `/model ` | 设置或重置聊天模型 | +| `/interrupt` | 中断活动 turn | +| `/compact` | 压缩当前线程 | +| `/allow [remember]` | 批准待处理的工具调用 | +| `/deny ` | 拒绝待处理的工具调用 | + +其他所有内容均作为 CodeWhale 提示发送。群聊中需要在消息前加 `/ds` 前缀。 + +## 首次配对 + +1. 设置 `WECOM_ALLOW_UNLISTED=true` 启动 bridge。 +2. 在企业微信中向机器人发送 `/status`。 +3. Bridge 会拒绝并返回你的 `user_id`(或 `chat_id`)。 +4. 将 `user_id` 加入 `WECOM_CHAT_ALLOWLIST`。 +5. 将 `WECOM_ALLOW_UNLISTED` 改回 `false` 并重启 bridge。 + +## 环境变量 + +| 变量 | 必填 | 说明 | +|------|------|------| +| `CODEWHALE_RUNTIME_URL` | 否 | Runtime HTTP 地址(默认 `http://127.0.0.1:7878`) | +| `CODEWHALE_RUNTIME_TOKEN` | **是** | Runtime Bearer 令牌 | +| `CODEWHALE_WORKSPACE` | 否 | 工作区路径(默认 cwd) | +| `CODEWHALE_MODEL` | 否 | 模型名称(默认 `auto`) | +| `CODEWHALE_MODE` | 否 | 运行模式(默认 `agent`) | +| `WECOM_BOT_ID` | **是** | 企业微信智能机器人 BotID | +| `WECOM_BOT_SECRET` | **是** | 企业微信智能机器人 Secret | +| `WECOM_CHAT_ALLOWLIST` | 否 | 逗号分隔的允许用户 UserID | +| `WECOM_ALLOW_UNLISTED` | 否 | 首次配对模式(默认 `false`) | +| `WECOM_STATE_DIR` | 否 | 状态持久化目录 | +| `WECOM_THREAD_MAP_PATH` | 否 | 线程映射文件路径 | +| `WECOM_MAX_REPLY_CHARS` | 否 | 单条回复最大字符数(默认 `3500`) | +| `CODEWHALE_TURN_TIMEOUT_MS` | 否 | Turn 超时(默认 `900000`) | + +## 架构 + +``` +企业微信客户端 → 智能机器人长连接(WebSocket) → WeCom Bridge ──HTTP──→ codewhale serve --http + ◀── aibot_respond_msg ◀── (127.0.0.1:7878) +``` + +Bridge 使用 BotID + Secret 获取 access_token,建立 WebSocket 长连接。 +接收 `aibot_msg_callback` 事件,通过 `aibot_respond_msg` 命令回复消息。 +所有消息处理与 CodeWhale Runtime API 交互,与 Feishu/Telegram bridge 共享相同逻辑。 + +## 与 weixin-bridge 的区别 + +| 特性 | weixin-bridge | wecom-bridge | +|------|---------------|--------------| +| 账号类型 | 个人微信 | 企业微信 | +| 登录方式 | 扫码登录 | BotID + Secret | +| 消息协议 | iLink Bot 长轮询 | 智能机器人 WebSocket | +| 认证方式 | 扫码获取 bot_token | API 获取 access_token | +| 组织管理 | 无 | 支持企业通讯录 | +| 公网需求 | 不需要 | 不需要 | diff --git a/integrations/wecom-bridge/package-lock.json b/integrations/wecom-bridge/package-lock.json new file mode 100644 index 000000000..b2d93d241 --- /dev/null +++ b/integrations/wecom-bridge/package-lock.json @@ -0,0 +1,387 @@ +{ + "name": "@codewhale/wecom-bridge", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "@codewhale/wecom-bridge", + "version": "0.1.0", + "dependencies": { + "@wecom/aibot-node-sdk": "^1.0.7" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@wecom/aibot-node-sdk": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/@wecom/aibot-node-sdk/-/aibot-node-sdk-1.0.7.tgz", + "integrity": "sha512-51w+sTqunry6GD3HFvmuh0gArMSJDFE418vyvR1wMJHj1N6DaFuGD3HuaY2fazZs3mb9FWeQFX3+vU5t0Qhwmw==", + "license": "MIT", + "dependencies": { + "axios": "^1.6.7", + "eventemitter3": "^5.0.1", + "ws": "^8.16.0" + } + }, + "node_modules/agent-base": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz", + "integrity": "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==", + "license": "MIT", + "dependencies": { + "debug": "4" + }, + "engines": { + "node": ">= 6.0.0" + } + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", + "license": "MIT" + }, + "node_modules/axios": { + "version": "1.18.0", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.18.0.tgz", + "integrity": "sha512-E32NzpYKp++W7XRe52rHiXV2ehxmh3wbdgO7MHeFM+vqxLBYHzt0ElkiImtOBxtOmyp0yoC8C6uESVV84Y2/hw==", + "license": "MIT", + "dependencies": { + "follow-redirects": "^1.16.0", + "form-data": "^4.0.5", + "https-proxy-agent": "^5.0.1", + "proxy-from-env": "^2.1.0" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "license": "MIT", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "license": "MIT", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "license": "MIT", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.2.tgz", + "integrity": "sha512-HWcBoN6NileqtSydK2FqHbS/LoDd2pqrnQHLyJzBj4kOp/ky2MWMN694xOfkK8/SnUsW2DH7EfyVlydKCsm1Zw==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-set-tostringtag": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", + "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", + "license": "MIT", + "dependencies": { + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.6", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/eventemitter3": { + "version": "5.0.4", + "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-5.0.4.tgz", + "integrity": "sha512-mlsTRyGaPBjPedk6Bvw+aqbsXDtoAyAzm5MO7JgU+yVRyMQ5O8bD4Kcci7BS85f93veegeCPkL8R4GLClnjLFw==", + "license": "MIT" + }, + "node_modules/follow-redirects": { + "version": "1.16.0", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.16.0.tgz", + "integrity": "sha512-y5rN/uOsadFT/JfYwhxRS5R7Qce+g3zG97+JrtFZlC9klX/W5hD7iiLzScI4nZqUS7DNUdhPgw4xI8W2LuXlUw==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "license": "MIT", + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.6", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.6.tgz", + "integrity": "sha512-vKatAh4SlVfgbv+YtmhiRjhEMJsYpsG1Y2rMQtR+SVSbytsSD1YGzDIcrAJmdFec88u/+VoGmxnl+80gL1tRCQ==", + "license": "MIT", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "es-set-tostringtag": "^2.1.0", + "hasown": "^2.0.4", + "mime-types": "^2.1.35" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "license": "MIT", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "license": "MIT", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-tostringtag": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", + "license": "MIT", + "dependencies": { + "has-symbols": "^1.0.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.4.tgz", + "integrity": "sha512-T2UbfbBEF32wiepXIsMlTW9+dDYC6wMh/t/vYA4tuOMKqWz/n3vr1NFSxQiyP+zk2mXsoMA/i/7qV6LKut1t1A==", + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/https-proxy-agent": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", + "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==", + "license": "MIT", + "dependencies": { + "agent-base": "6", + "debug": "4" + }, + "engines": { + "node": ">= 6" + } + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "license": "MIT", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "license": "MIT" + }, + "node_modules/proxy-from-env": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-2.1.0.tgz", + "integrity": "sha512-cJ+oHTW1VAEa8cJslgmUZrc+sjRKgAKl3Zyse6+PV38hZe/V6Z14TbCuXcan9F9ghlz4QrFr2c92TNF82UkYHA==", + "license": "MIT", + "engines": { + "node": ">=10" + } + }, + "node_modules/ws": { + "version": "8.21.0", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.21.0.tgz", + "integrity": "sha512-Vsp28b7DRcimFQvrqu2Wek3z1iYxDCWqHYB8Qsnk/S4RfaCQzPGPyBNuVjJV3cd6UiKtUtp6sNM77gWvzcCH+g==", + "license": "MIT", + "engines": { + "node": ">=10.0.0" + }, + "peerDependencies": { + "bufferutil": "^4.0.1", + "utf-8-validate": ">=5.0.2" + }, + "peerDependenciesMeta": { + "bufferutil": { + "optional": true + }, + "utf-8-validate": { + "optional": true + } + } + } + } +} diff --git a/integrations/wecom-bridge/package.json b/integrations/wecom-bridge/package.json new file mode 100644 index 000000000..89d7dadb0 --- /dev/null +++ b/integrations/wecom-bridge/package.json @@ -0,0 +1,19 @@ +{ + "name": "@codewhale/wecom-bridge", + "version": "0.1.0", + "private": true, + "type": "module", + "description": "企业微信(WeCom)智能机器人长连接bridge,用于从手机控制codewhale serve --http runtime", + "main": "src/index.mjs", + "scripts": { + "start": "node src/index.mjs", + "check": "node --check src/index.mjs && node --check src/lib.mjs", + "test": "node --test test/*.test.mjs" + }, + "dependencies": { + "@wecom/aibot-node-sdk": "^1.0.7" + }, + "engines": { + "node": ">=18" + } +} diff --git a/integrations/wecom-bridge/src/index.mjs b/integrations/wecom-bridge/src/index.mjs new file mode 100644 index 000000000..b52939d43 --- /dev/null +++ b/integrations/wecom-bridge/src/index.mjs @@ -0,0 +1,497 @@ +import { WSClient, generateReqId } from "@wecom/aibot-node-sdk"; + +import { + activeTurnBlock, + commandAction, + compactRuntimeError, + helpText, + incomingIdentity, + isAllowed, + latestRunningTurn, + pairingRefusalText, + parseBool, + parseCommand, + parseList, + parseApprovalDecisionArgs, + preservedChatStateFields, + requiredEnv, + splitMessage, + stripGroupPrefix, + ThreadStore +} from "./lib.mjs"; + +const config = { + botId: requiredEnv("WECOM_BOT_ID"), + botSecret: requiredEnv("WECOM_BOT_SECRET"), + runtimeUrl: (process.env.CODEWHALE_RUNTIME_URL || "http://127.0.0.1:7878").replace(/\/+$/, ""), + runtimeToken: requiredEnv("CODEWHALE_RUNTIME_TOKEN"), + workspace: process.env.CODEWHALE_WORKSPACE || process.cwd(), + model: process.env.CODEWHALE_MODEL || "auto", + mode: process.env.CODEWHALE_MODE || "agent", + allowShell: parseBool(process.env.CODEWHALE_ALLOW_SHELL, true), + trustMode: parseBool(process.env.CODEWHALE_TRUST_MODE, false), + autoApprove: parseBool(process.env.CODEWHALE_AUTO_APPROVE, false), + allowlist: parseList(process.env.WECOM_CHAT_ALLOWLIST), + allowUnlisted: parseBool(process.env.WECOM_ALLOW_UNLISTED, false), + threadMapPath: process.env.WECOM_THREAD_MAP_PATH || "/var/lib/codewhale-wecom-bridge/thread-map.json", + maxReplyChars: Number(process.env.WECOM_MAX_REPLY_CHARS || 3500), + turnTimeoutMs: Number(process.env.CODEWHALE_TURN_TIMEOUT_MS || 900000) +}; + +const threadStore = await ThreadStore.open(config.threadMapPath); + +const client = new WSClient({ + botId: config.botId, + secret: config.botSecret +}); + +client.on("message", async (frame) => { + try { + await handleIncomingMessage(frame); + } catch (error) { + console.error("Failed to handle WeCom message", error); + } +}); + +client.on("event", async (frame) => { + try { + await handleEvent(frame); + } catch (error) { + console.error("Failed to handle WeCom event", error); + } +}); + +client.on("error", (error) => { + console.error("WeCom client error:", error); +}); + +console.log("Starting CodeWhale WeCom bridge"); +console.log(`Runtime: ${config.runtimeUrl}`); +console.log(`Workspace: ${config.workspace}`); +if (!config.allowlist.length && !config.allowUnlisted) { + console.log("No allowlist configured. Incoming chats will receive their IDs and be refused."); +} + +client.connect(); + +function replyText(frame, text) { + const streamId = generateReqId("stream"); + return client.replyStream(frame, streamId, text, true); +} + +async function handleIncomingMessage(frame) { + const body = frame.body || {}; + const identity = incomingIdentity(body); + console.log(`Incoming message: chatId=${identity.chatId} userId=${identity.userId} chatType=${identity.chatType}`); + if (!identity.chatId || !identity.messageId) return; + + if (body.msgtype && body.msgtype !== "text") { + await replyText(frame, "目前仅支持文本消息。"); + return; + } + + const textContent = body.text?.content || ""; + const scoped = stripGroupPrefix(textContent, { + chatType: identity.chatType, + requirePrefix: identity.chatType === "group", + prefix: "/ds" + }); + if (!scoped.accepted) return; + + if (!isAllowed(identity, config.allowlist, config.allowUnlisted)) { + await replyText(frame, pairingRefusalText(identity)); + return; + } + + const command = parseCommand(scoped.text); + await handleCommand(identity.chatId, command, frame); +} + +async function handleEvent(frame) { + const body = frame.body || {}; + const eventType = body.event?.eventtype || ""; + if (eventType === "enter_chat") { + const chatId = body.chatid; + if (chatId) { + await client.replyWelcome(frame, { msgtype: "text", text: { content: "欢迎使用 CodeWhale!发送 /help 查看可用命令。" } }); + } + } +} + +async function handleCommand(chatId, command, frame) { + const action = commandAction(command); + switch (action.kind) { + case "help": + await replyText(frame, helpText()); + return; + case "status": + await sendStatus(chatId, frame); + return; + case "threads": + await sendThreads(chatId, frame); + return; + case "new_thread": { + const state = await ensureThread(chatId); + await replyText(frame, `Created thread ${state.threadId}`); + return; + } + case "resume": + await resumeThread(chatId, action.threadId, frame); + return; + case "interrupt": + await interruptActiveTurn(chatId, frame); + return; + case "compact": + await compactThread(chatId, frame); + return; + case "approval": + await decideApproval(chatId, action, frame); + return; + case "set_model": + await setChatModel(chatId, action.modelName, frame); + return; + case "prompt": + await runPrompt(chatId, action.prompt, frame); + return; + default: + await replyText(frame, helpText()); + } +} + +async function ensureThread(chatId, { forceNew = false } = {}) { + const existing = await threadStore.getChat(chatId); + if (existing?.threadId && !forceNew) return existing; + + const effectiveModel = existing?.model || config.model; + + const thread = await runtimeJson("/v1/threads", { + method: "POST", + body: { + model: effectiveModel, + workspace: config.workspace, + mode: config.mode, + allow_shell: config.allowShell, + trust_mode: config.trustMode, + auto_approve: config.autoApprove, + archived: false, + system_prompt: + "You are being controlled from a WeCom (企业微信) phone chat. Keep status updates concise. Ask for tool approvals when needed; do not assume mobile messages imply blanket approval." + } + }); + + const state = { + ...preservedChatStateFields(existing), + threadId: thread.id, + lastSeq: 0, + activeTurnId: null, + updatedAt: new Date().toISOString() + }; + await threadStore.setChat(chatId, state); + return state; +} + +async function runPrompt(chatId, prompt, frame) { + if (!prompt.trim()) { + await replyText(frame, helpText()); + return; + } + const state = await ensureThread(chatId); + const effectiveModel = state?.model || config.model; + const detail = await runtimeJson(`/v1/threads/${encodeURIComponent(state.threadId)}`); + const activeBlock = activeTurnBlock(detail, state); + if (activeBlock) { + await threadStore.patchChat(chatId, { + activeTurnId: activeBlock.turnId, + updatedAt: new Date().toISOString() + }); + await replyText(frame, activeBlock.message); + return; + } + if (state.activeTurnId) { + await threadStore.patchChat(chatId, { activeTurnId: null }); + } + const sinceSeq = Number(detail.latest_seq || state.lastSeq || 0); + + const turnResponse = await runtimeJson( + `/v1/threads/${encodeURIComponent(state.threadId)}/turns`, + { + method: "POST", + body: { + prompt, + input_summary: prompt.slice(0, 200), + model: effectiveModel, + mode: config.mode, + allow_shell: config.allowShell, + trust_mode: config.trustMode, + auto_approve: config.autoApprove + } + } + ); + + const turnId = turnResponse.turn?.id; + await threadStore.patchChat(chatId, { + activeTurnId: turnId || null, + lastSeq: sinceSeq, + updatedAt: new Date().toISOString() + }); + await replyText(frame, `Started turn ${turnId || "(unknown)"}`); + + try { + await streamTurnEvents(chatId, frame, state.threadId, turnId, sinceSeq); + } finally { + await threadStore.patchChat(chatId, { + activeTurnId: null, + updatedAt: new Date().toISOString() + }); + } +} + +async function streamTurnEvents(chatId, frame, threadId, turnId, sinceSeq) { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), config.turnTimeoutMs); + const streamId = generateReqId("stream"); + let responseText = ""; + let latestSeq = sinceSeq; + let sentProgressAt = Date.now(); + + try { + const response = await fetch( + `${config.runtimeUrl}/v1/threads/${encodeURIComponent(threadId)}/events?since_seq=${sinceSeq}`, + { + headers: authHeaders(), + signal: controller.signal + } + ); + if (!response.ok) { + const body = await readJsonSafe(response); + throw new Error(compactRuntimeError(response.status, body)); + } + + for await (const event of readSse(response)) { + if (!event.data) continue; + const record = JSON.parse(event.data); + latestSeq = Math.max(latestSeq, Number(record.seq || 0)); + await threadStore.patchChat(chatId, { lastSeq: latestSeq }); + + if (turnId && record.turn_id && record.turn_id !== turnId) continue; + + if (record.event === "item.delta" && record.payload?.kind === "agent_message") { + responseText += record.payload.delta || ""; + await client.replyStream(frame, streamId, responseText, false); + } + + if (record.event === "approval.required") { + const approval = record.payload || {}; + await replyText( + frame, + [ + "审批请求", + `tool=${approval.tool_name || "unknown"}`, + `approval_id=${approval.approval_id || approval.id}`, + approval.description || "", + "", + `回复 /allow ${approval.approval_id || approval.id}`, + `回复 /deny ${approval.approval_id || approval.id}` + ] + .filter(Boolean) + .join("\n") + ); + } + + if (record.event === "turn.completed") { + const turn = record.payload?.turn || {}; + const status = turn.status || "completed"; + const error = turn.error ? `\n${turn.error}` : ""; + await client.replyStream(frame, streamId, responseText.trim() || "Turn completed.", true); + return; + } + + if (record.event === "turn.lifecycle") { + const status = record.payload?.turn?.status || record.payload?.status; + if (["failed", "canceled", "interrupted"].includes(status)) { + await client.replyStream(frame, streamId, `Turn ${status}.`, true); + return; + } + } + } + } catch (error) { + if (error.name === "AbortError") { + await replyText(frame, `Turn timed out after ${Math.round(config.turnTimeoutMs / 1000)}s.`); + return; + } + throw error; + } finally { + clearTimeout(timeout); + } +} + +async function sendStatus(chatId, frame) { + const [health, runtimeInfo, workspace] = await Promise.all([ + runtimeJson("/health", { auth: false }), + runtimeJson("/v1/runtime/info"), + runtimeJson("/v1/workspace/status") + ]); + await replyText( + frame, + [ + `runtime=${health.status || "unknown"}`, + `version=${runtimeInfo.version || "unknown"}`, + `bind=${runtimeInfo.bind_host}:${runtimeInfo.port}`, + `auth_required=${runtimeInfo.auth_required}`, + `workspace=${workspace.workspace}`, + `git_repo=${workspace.git_repo}`, + workspace.branch ? `branch=${workspace.branch}` : "", + `staged=${workspace.staged} unstaged=${workspace.unstaged} untracked=${workspace.untracked}` + ] + .filter(Boolean) + .join("\n") + ); +} + +async function sendThreads(chatId, frame) { + const threads = await runtimeJson("/v1/threads/summary?limit=8&include_archived=true"); + if (!threads.length) { + await replyText(frame, "No runtime threads yet."); + return; + } + await replyText( + frame, + threads + .map((thread) => { + const status = thread.latest_turn_status || "none"; + return `${thread.id} [${status}] ${thread.title || thread.preview || ""}`; + }) + .join("\n") + ); +} + +async function resumeThread(chatId, args, frame) { + const threadId = args.trim(); + if (!threadId) { + await replyText(frame, "Usage: /resume "); + return; + } + const detail = await runtimeJson(`/v1/threads/${encodeURIComponent(threadId)}`); + const existing = await threadStore.getChat(chatId); + await threadStore.setChat(chatId, { + ...preservedChatStateFields(existing), + threadId, + lastSeq: Number(detail.latest_seq || 0), + activeTurnId: null, + updatedAt: new Date().toISOString() + }); + await replyText(frame, `Resumed thread ${threadId}`); +} + +async function interruptActiveTurn(chatId, frame) { + const state = await threadStore.getChat(chatId); + if (!state?.threadId) { + await replyText(frame, "No runtime thread recorded for this chat."); + return; + } + const detail = await runtimeJson(`/v1/threads/${encodeURIComponent(state.threadId)}`); + const runningTurn = latestRunningTurn(detail); + const turnId = state.activeTurnId || runningTurn?.id; + if (!turnId) { + await replyText(frame, "No active turn recorded for this chat."); + return; + } + await runtimeJson( + `/v1/threads/${encodeURIComponent(state.threadId)}/turns/${encodeURIComponent(turnId)}/interrupt`, + { method: "POST" } + ); + await threadStore.patchChat(chatId, { + activeTurnId: turnId, + updatedAt: new Date().toISOString() + }); + await replyText(frame, `Interrupt requested for ${turnId}`); +} + +async function compactThread(chatId, frame) { + const state = await ensureThread(chatId); + const result = await runtimeJson(`/v1/threads/${encodeURIComponent(state.threadId)}/compact`, { + method: "POST", + body: { reason: "phone bridge request" } + }); + await replyText(frame, `Compaction started: ${result.turn?.id || "unknown turn"}`); +} + +async function decideApproval(chatId, action, frame) { + const decision = action.decision; + const { approvalId, remember } = + action.approvalId != null ? action : parseApprovalDecisionArgs(action.args); + if (!approvalId) { + await replyText(frame, `Usage: /${decision} ${decision === "allow" ? " [remember]" : ""}`); + return; + } + await runtimeJson(`/v1/approvals/${encodeURIComponent(approvalId)}`, { + method: "POST", + body: { decision, remember } + }); + await replyText(frame, `Approval ${approvalId}: ${decision}${remember ? " and remember" : ""}`); +} + +async function setChatModel(chatId, modelName, frame) { + if (!modelName || modelName === "default") { + await threadStore.patchChat(chatId, { + model: null, + updatedAt: new Date().toISOString() + }); + await replyText(frame, `Reset per-chat model. Using bridge default: ${config.model}`); + return; + } + await threadStore.patchChat(chatId, { + model: modelName, + updatedAt: new Date().toISOString() + }); + await replyText(frame, `Per-chat model set to: ${modelName}`); +} + +async function runtimeJson(route, options = {}) { + const response = await fetch(`${config.runtimeUrl}${route}`, { + method: options.method || "GET", + headers: { + ...(options.auth === false ? {} : authHeaders()), + ...(options.body ? { "content-type": "application/json" } : {}) + }, + body: options.body ? JSON.stringify(options.body) : undefined + }); + const body = await readJsonSafe(response); + if (!response.ok) { + throw new Error(compactRuntimeError(response.status, body)); + } + return body; +} + +function authHeaders() { + return { authorization: `Bearer ${config.runtimeToken}` }; +} + +async function readJsonSafe(response) { + const text = await response.text(); + if (!text) return {}; + try { + return JSON.parse(text); + } catch { + return text; + } +} + +async function* readSse(response) { + const decoder = new TextDecoder(); + let buffer = ""; + for await (const chunk of response.body) { + buffer += decoder.decode(chunk, { stream: true }); + let boundary; + while ((boundary = buffer.indexOf("\n\n")) >= 0) { + const raw = buffer.slice(0, boundary).replace(/\r/g, ""); + buffer = buffer.slice(boundary + 2); + const event = { event: "", data: "" }; + for (const line of raw.split("\n")) { + if (line.startsWith("event:")) event.event = line.slice(6).trim(); + if (line.startsWith("data:")) event.data += line.slice(5).trim(); + } + yield event; + } + } +} diff --git a/integrations/wecom-bridge/src/lib.mjs b/integrations/wecom-bridge/src/lib.mjs new file mode 100644 index 000000000..0d422894b --- /dev/null +++ b/integrations/wecom-bridge/src/lib.mjs @@ -0,0 +1,311 @@ +import { readFile, writeFile, mkdir, rename } from "node:fs/promises"; +import path from "node:path"; + +export function parseList(raw) { + return String(raw || "") + .split(",") + .map((item) => item.trim()) + .filter(Boolean); +} + +export function parseBool(raw, fallback = false) { + if (raw == null || raw === "") return fallback; + return ["1", "true", "yes", "on"].includes(String(raw).trim().toLowerCase()); +} + +export function cleanEnvValue(value) { + return String(value ?? "").trim(); +} + +export function isPlaceholderValue(value) { + const normalized = cleanEnvValue(value).toLowerCase(); + return ( + !normalized || + normalized.includes("replace-with") || + normalized.includes("xxxxxxxx") || + normalized === "changeme" + ); +} + +export function requiredEnv(name) { + const value = process.env[name]; + if (!value || !value.trim()) { + throw new Error(`${name} is required`); + } + return value.trim(); +} + +export function parseTextContent(content) { + if (typeof content !== "string") return ""; + try { + const parsed = JSON.parse(content); + if (typeof parsed.text === "string") return parsed.text; + } catch { + return content; + } + return content; +} + +export function incomingIdentity(body) { + const from = body?.from || {}; + const chatId = body.chatid || (body.chattype === "single" && from.userid ? `single:${from.userid}` : ""); + return { + chatId, + messageId: body.msgid || "", + chatType: body.chattype || "single", + userId: from.userid || "", + aibotId: body.aibotid || "" + }; +} + +export function isAllowed(identity, allowlist, allowUnlisted = false) { + if (allowUnlisted) return true; + const allowed = new Set(allowlist); + return [identity.chatId, identity.userId].filter(Boolean).some((id) => allowed.has(id)); +} + +export function pairingRefusalText(identity) { + return [ + "This chat is not in WECOM_CHAT_ALLOWLIST.", + `chat_id=${identity.chatId}`, + identity.userId ? `user_id=${identity.userId}` : "" + ] + .filter(Boolean) + .join("\n"); +} + +export function stripGroupPrefix(text, { chatType, requirePrefix, prefix }) { + const trimmed = String(text || "").trim(); + if (!trimmed) return { accepted: false, text: "" }; + if (!requirePrefix || chatType === "single") { + return { accepted: true, text: trimmed }; + } + const marker = prefix || "/ds"; + if (trimmed === marker) return { accepted: true, text: "/help" }; + if (trimmed.startsWith(`${marker} `)) { + return { accepted: true, text: trimmed.slice(marker.length).trim() }; + } + return { accepted: false, text: "" }; +} + +export function parseCommand(text) { + const trimmed = String(text || "").trim(); + if (!trimmed.startsWith("/")) return { name: "prompt", args: trimmed }; + const [head, ...rest] = trimmed.split(/\s+/); + return { + name: head.slice(1).toLowerCase(), + args: rest.join(" ").trim() + }; +} + +export function parseApprovalDecisionArgs(args) { + const parts = String(args || "") + .split(/\s+/) + .filter(Boolean); + return { + approvalId: parts[0] || "", + remember: parts.slice(1).includes("remember") + }; +} + +export function commandAction(command) { + switch (command.name) { + case "help": + return { kind: "help" }; + case "status": + return { kind: "status" }; + case "threads": + return { kind: "threads" }; + case "new": + return { kind: "new_thread" }; + case "resume": + return { kind: "resume", threadId: command.args }; + case "interrupt": + return { kind: "interrupt" }; + case "compact": + return { kind: "compact" }; + case "model": + return { kind: "set_model", modelName: command.args }; + case "allow": + return { kind: "approval", decision: "allow", ...parseApprovalDecisionArgs(command.args) }; + case "deny": + return { kind: "approval", decision: "deny", ...parseApprovalDecisionArgs(command.args) }; + default: + return { + kind: "prompt", + prompt: `/${command.name}${command.args ? ` ${command.args}` : ""}` + }; + } +} + +export function preservedChatStateFields(state = {}) { + const preserved = {}; + if (Object.prototype.hasOwnProperty.call(state || {}, "model")) { + preserved.model = state.model || null; + } + return preserved; +} + +export function splitMessage(text, maxChars = 3500) { + const value = String(text || ""); + const chars = Array.from(value); + if (chars.length <= maxChars) return value ? [value] : []; + const chunks = []; + let cursor = 0; + while (cursor < chars.length) { + chunks.push(chars.slice(cursor, cursor + maxChars).join("")); + cursor += maxChars; + } + return chunks; +} + +export function compactRuntimeError(status, body) { + const message = + body?.error?.message || + body?.message || + (typeof body === "string" ? body : JSON.stringify(body)); + return `Runtime API request failed (${status}): ${message}`; +} + +export function latestRunningTurn(detail) { + const turns = Array.isArray(detail?.turns) ? detail.turns : []; + for (let index = turns.length - 1; index >= 0; index -= 1) { + const turn = turns[index]; + if (["queued", "in_progress"].includes(turn?.status)) return turn; + } + return null; +} + +export function activeTurnBlock(detail, state = {}) { + const runningTurn = latestRunningTurn(detail); + if (!runningTurn) return null; + return { + turnId: runningTurn.id || state.activeTurnId || "", + message: `Thread already has active turn ${ + runningTurn.id || state.activeTurnId || "(unknown)" + }. Wait for it to finish or send /interrupt.` + }; +} + +export function helpText() { + return [ + "CodeWhale 企业微信桥接命令:", + "/help - 显示帮助", + "/status - runtime 和工作区状态", + "/threads - 最近的 runtime 线程", + "/new - 为此聊天创建新线程", + "/resume - 绑定到此聊天的现有线程", + "/model - 设置或重置聊天模型", + "/interrupt - 中断活动 turn", + "/compact - 压缩当前线程", + "/allow [remember] - 批准待处理的工具调用", + "/deny - 拒绝待处理的工具调用", + "", + "其他所有内容均作为 CodeWhale 提示发送。" + ].join("\n"); +} + +export class ThreadStore { + static async open(filePath) { + const store = new ThreadStore(filePath); + await store.load(); + return store; + } + + constructor(filePath) { + this.filePath = filePath; + this.data = { chats: {} }; + } + + async load() { + try { + const raw = await readFile(this.filePath, "utf8"); + this.data = JSON.parse(raw); + if (!this.data.chats) this.data.chats = {}; + } catch (error) { + if (error.code !== "ENOENT") throw error; + } + } + + async getChat(chatId) { + return this.data.chats[chatId] || null; + } + + listChats() { + return Object.entries(this.data.chats || {}); + } + + async setChat(chatId, state) { + this.data.chats[chatId] = state; + await this.save(); + return state; + } + + async patchChat(chatId, patch) { + const current = this.data.chats[chatId] || {}; + this.data.chats[chatId] = { ...current, ...patch }; + await this.save(); + return this.data.chats[chatId]; + } + + async save() { + const dir = path.dirname(this.filePath); + await mkdir(dir, { recursive: true }); + const tmp = `${this.filePath}.tmp`; + await writeFile(tmp, `${JSON.stringify(this.data, null, 2)}\n`); + await rename(tmp, this.filePath); + } +} + +export function validateBridgeConfig(env) { + const errors = []; + const warnings = []; + const info = []; + const add = (list, code, message) => list.push({ code, message }); + + for (const key of ["WECOM_BOT_ID", "WECOM_BOT_SECRET"]) { + const value = cleanEnvValue(env[key]); + if (!value) { + add(errors, "missing_required", `${key} is required`); + } else if (isPlaceholderValue(value)) { + add(errors, "placeholder_value", `${key} still contains a placeholder value`); + } + } + + const runtimeUrl = cleanEnvValue(env.CODEWHALE_RUNTIME_URL || "http://127.0.0.1:7878"); + try { + const parsed = new URL(runtimeUrl); + if (!["http:", "https:"].includes(parsed.protocol)) { + add(errors, "invalid_runtime_url", "CODEWHALE_RUNTIME_URL must use http or https"); + } + } catch { + add(errors, "invalid_runtime_url", "CODEWHALE_RUNTIME_URL is not a valid URL"); + } + + const runtimeToken = cleanEnvValue(env.CODEWHALE_RUNTIME_TOKEN); + if (!runtimeToken) { + add(errors, "missing_runtime_token", "CODEWHALE_RUNTIME_TOKEN is required"); + } else if (isPlaceholderValue(runtimeToken)) { + add(errors, "placeholder_runtime_token", "CODEWHALE_RUNTIME_TOKEN is still a placeholder"); + } + + const allowUnlisted = parseBool(env.WECOM_ALLOW_UNLISTED, false); + const allowlist = parseList(env.WECOM_CHAT_ALLOWLIST); + + if (!allowlist.length && allowUnlisted) { + add(warnings, "pairing_mode_open", "WECOM_ALLOW_UNLISTED=true leaves first-pairing mode open"); + } else if (!allowlist.length) { + add(warnings, "not_paired", "WECOM_CHAT_ALLOWLIST is empty; all chats will be refused"); + } + + return { ok: errors.length === 0, errors, warnings, info }; +} + +export function formatValidationReport(result) { + const lines = ["WeCom bridge config validation"]; + for (const item of result.errors) lines.push(`[fail] ${item.message}`); + for (const item of result.warnings) lines.push(`[warn] ${item.message}`); + for (const item of result.info) lines.push(`[info] ${item.message}`); + if (result.ok) lines.push("[ok] No blocking config errors found"); + return lines.join("\n"); +}