From 97e749e0b366a32108b25442788715861dd8f260 Mon Sep 17 00:00:00 2001 From: laopan <147567034@qq.com> Date: Mon, 22 Jun 2026 13:55:29 +0800 Subject: [PATCH 1/3] feat(memory): add tag-based memory indexing, search, and seam enrichment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Structured memory with tag support: - MemoryEntry parser for timestamped bullets with #tag extraction - Tag indexing via MemoryIndex (inverted index for tags + full-text) - auto_tag() for automatic suggestion from capitalized terms - append_entry() extracts inline tags and accepts extra_tags parameter New CLI commands: - /memory tags — list tags with occurrence counts - /memory search — full-text search across body and tags - /memory search --tag — exact tag filter Tool integration: - Remember tool accepts optional tags parameter - Auto-tag fallback when model provides no explicit tags Seam enrichment: - extract_topic_tags() scans messages for capitalized technical terms - MemoryIndex context injected into layered_context_checkpoint (max 20 entries) - memory_context section added to soft-seam and recompact prompts - SeamMetadata.tags populated for cross-reference --- .../tui/src/commands/groups/memory/memory.rs | 119 ++++- crates/tui/src/core/engine.rs | 48 +- crates/tui/src/main.rs | 1 + crates/tui/src/memory.rs | 451 +++++++++++++++++- crates/tui/src/memory_index.rs | 361 ++++++++++++++ crates/tui/src/seam_manager.rs | 70 ++- crates/tui/src/tools/remember.rs | 144 +++++- crates/tui/src/tui/ui.rs | 19 +- 8 files changed, 1189 insertions(+), 24 deletions(-) create mode 100644 crates/tui/src/memory_index.rs diff --git a/crates/tui/src/commands/groups/memory/memory.rs b/crates/tui/src/commands/groups/memory/memory.rs index 0c9af71a67..fcd350ef91 100644 --- a/crates/tui/src/commands/groups/memory/memory.rs +++ b/crates/tui/src/commands/groups/memory/memory.rs @@ -23,7 +23,7 @@ use std::path::Path; use super::CommandResult; use crate::tui::app::App; -const MEMORY_USAGE: &str = "/memory [show|path|clear|edit|help]"; +const MEMORY_USAGE: &str = "/memory [show|path|clear|edit|tags|search |search --tag |help]"; fn memory_help(path: &Path) -> String { format!( @@ -31,18 +31,60 @@ fn memory_help(path: &Path) -> String { Usage: {MEMORY_USAGE}\n\n\ Current path: {}\n\n\ Subcommands:\n\ - /memory Show the resolved path and current contents\n\ - /memory show Alias for the no-arg form\n\ - /memory path Print just the resolved path\n\ - /memory clear Replace the file contents with an empty marker\n\ - /memory edit Print the editor command for this file\n\ - /memory help Show this help\n\n\ + /memory Show the resolved path and current contents\n\ + /memory show Alias for the no-arg form\n\ + /memory path Print just the resolved path\n\ + /memory clear Replace the file contents with an empty marker\n\ + /memory edit Print the editor command for this file\n\ + /memory tags List all tags with occurrence counts\n\ + /memory search Search memory by text (body + tags)\n\ + /memory search --tag Search memory by tag (exact match)\n\ + /memory help Show this help\n\n\ Quick capture: type `# foo` in the composer to append a timestamped\n\ bullet without firing a turn.", path.display() ) } +/// Split the argument into subcommand and remaining args. +fn split_subcommand(arg: Option<&str>) -> (&str, Option<&str>) { + match arg { + Some(a) => { + let trimmed = a.trim(); + match trimmed.find(char::is_whitespace) { + Some(pos) => (&trimmed[..pos], Some(trimmed[pos + 1..].trim_start())), + None => (trimmed, None), + } + } + None => ("show", None), + } +} + +fn render_entries(entries: &[&crate::memory::MemoryEntry], prefix: &str) -> String { + let mut lines = String::new(); + for entry in entries { + let _ = std::fmt::Write::write_fmt( + &mut lines, + format_args!("\n{prefix}- ({}) {}", entry.timestamp, entry.body), + ); + if !entry.tags.is_empty() { + let _ = std::fmt::Write::write_fmt( + &mut lines, + format_args!( + " {}", + entry + .tags + .iter() + .map(|t| format!("#{t}")) + .collect::>() + .join(" ") + ), + ); + } + } + lines +} + pub fn memory(app: &mut App, arg: Option<&str>) -> CommandResult { if !app.use_memory { return CommandResult::error( @@ -51,7 +93,7 @@ pub fn memory(app: &mut App, arg: Option<&str>) -> CommandResult { } let path = app.memory_path.clone(); - let sub = arg.unwrap_or("show").trim(); + let (sub, rest) = split_subcommand(arg); match sub { "" | "show" => { @@ -69,6 +111,65 @@ pub fn memory(app: &mut App, arg: Option<&str>) -> CommandResult { CommandResult::message(body) } "path" => CommandResult::message(path.display().to_string()), + "tags" => match fs::read_to_string(&path) { + Ok(content) => { + let tags = crate::memory::list_tags(&content); + if tags.is_empty() { + CommandResult::message("no tags found in memory file") + } else { + let mut lines = format!("Tags in {}:\n", path.display()); + for (i, (tag, count)) in tags.iter().enumerate() { + let _ = std::fmt::Write::write_fmt( + &mut lines, + format_args!("\n {}. #{} ({})", i + 1, tag, count), + ); + } + CommandResult::message(lines) + } + } + Err(_) => CommandResult::message(format!( + "{}\n(file does not exist yet)", + path.display() + )), + }, + "search" => { + let Some(query) = rest.filter(|r| !r.is_empty()) else { + return CommandResult::error( + "Usage: /memory search or /memory search --tag ", + ); + }; + let content = match fs::read_to_string(&path) { + Ok(c) => c, + Err(_) => { + return CommandResult::message(format!( + "memory file does not exist yet at {}", + path.display() + )); + } + }; + let entries = crate::memory::parse_all(&content); + + // Check for --tag flag + let results: Vec<&crate::memory::MemoryEntry> = if query.starts_with("--tag ") { + let tag = query.trim_start_matches("--tag ").trim(); + crate::memory::search_by_tags(&entries, &[tag]) + } else { + crate::memory::search_text(&entries, query) + }; + + if results.is_empty() { + CommandResult::message(format!( + "no memory entries matching \"{query}\"" + )) + } else { + let body = render_entries(&results, ""); + CommandResult::message(format!( + "{} matching entry(ies) for \"{query}\":{}", + results.len(), + body + )) + } + } "clear" => match fs::write(&path, "") { Ok(()) => CommandResult::message(format!("memory cleared: {}", path.display())), Err(err) => CommandResult::error(format!("failed to clear {}: {err}", path.display())), @@ -123,7 +224,7 @@ mod tests { let mut app = create_test_app_with_memory(&tmpdir, true); let result = memory(&mut app, Some("help")); let msg = result.message.expect("help should return text"); - assert!(msg.contains("Usage: /memory [show|path|clear|edit|help]")); + assert!(msg.contains("Usage: /memory [show|path|clear|edit|tags|search |search --tag |help]")); assert!(msg.contains("/memory edit")); assert!(msg.contains(app.memory_path.to_string_lossy().as_ref())); } diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 2893bebd08..0aa206f675 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -2784,6 +2784,44 @@ impl Engine { .working_set .pinned_message_indices(&self.session.messages, &self.session.workspace); + // Build memory context from user's memory file for enriched seams. + // Limit injected entries to prevent seam bloat. + const MAX_MEMORY_CONTEXT_ENTRIES: usize = 20; + let memory_context: Option = if self.config.memory_enabled { + let path = &self.config.memory_path; + std::fs::read_to_string(path).ok().map(|content| { + let index = crate::memory_index::MemoryIndex::from_content(&content); + if index.is_empty() { + return String::new(); + } + // Extract topic tags from the messages to be summarized + let recent_msgs: Vec<&crate::models::Message> = (0..msg_range_end) + .filter_map(|i| self.session.messages.get(i)) + .collect(); + let topics = crate::seam_manager::SeamManager::extract_topic_tags(&recent_msgs); + + let matched: Vec<&crate::memory::MemoryEntry> = if topics.is_empty() { + // No specific topics — include recent memory entries as general context + index.entries().iter().rev().take(MAX_MEMORY_CONTEXT_ENTRIES).collect() + } else { + let topic_refs: Vec<&str> = topics.iter().map(String::as_str).collect(); + let by_tag = index.search_by_tags(&topic_refs); + if by_tag.is_empty() { + index.entries().iter().rev().take(MAX_MEMORY_CONTEXT_ENTRIES).collect() + } else { + by_tag.into_iter().take(MAX_MEMORY_CONTEXT_ENTRIES).collect() + } + }; + matched + .iter() + .map(|e| format!("- ({}) {} #{}", e.timestamp, e.body, e.tags.join(" #"))) + .collect::>() + .join("\n") + }) + } else { + None + }; + let _ = self .tx_event .send(Event::status(format!( @@ -2802,6 +2840,7 @@ impl Engine { msg_range_end, Some(&self.session.workspace), &pinned, + memory_context.as_deref(), ) .await { @@ -2816,7 +2855,14 @@ impl Engine { .filter_map(|i| self.session.messages.get(i)) .collect(); match seam_mgr - .recompact(&existing_seams, &recent, level, 0, msg_range_end) + .recompact( + &existing_seams, + &recent, + level, + 0, + msg_range_end, + memory_context.as_deref(), + ) .await { Ok(text) => text, diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index b5103cd702..92cec9aab1 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -50,6 +50,7 @@ mod lsp; mod mcp; mod mcp_server; mod memory; +mod memory_index; mod model_catalog; mod model_inventory; mod model_registry; diff --git a/crates/tui/src/memory.rs b/crates/tui/src/memory.rs index 65533bd92e..d9df1ec1ca 100644 --- a/crates/tui/src/memory.rs +++ b/crates/tui/src/memory.rs @@ -21,6 +21,7 @@ //! That keeps existing users on zero-overhead behavior and makes the //! feature explicit. +use std::collections::HashMap; use std::fs; use std::io::{self, Write}; use std::path::Path; @@ -113,11 +114,35 @@ pub fn compose_block(enabled: bool, path: &Path) -> Option { as_system_block(&content, path) } +/// Parse `#tag` hashtags from a text string, returning them in order of +/// appearance. Duplicates are preserved as-is; the caller should deduplicate +/// if needed. +pub fn extract_tags(text: &str) -> Vec<&str> { + text.split_whitespace() + .filter(|w| w.starts_with('#') && w.len() > 1 && !w[1..].starts_with('#')) + .collect() +} + +/// Remove `#tag` hashtags from a text string, returning the cleaned text. +/// This is used to separate tags from the note body before storage. +fn strip_tags(text: &str) -> String { + text.split_whitespace() + .filter(|w| !(w.starts_with('#') && w.len() > 1 && !w[1..].starts_with('#'))) + .collect::>() + .join(" ") +} + /// Append `entry` to the memory file at `path`, creating it (and its /// parent directory) if needed. The entry is timestamped so the user can /// later see when each note was added. The leading `#` from a `# foo` /// quick-add is stripped so the file stays as readable Markdown. -pub fn append_entry(path: &Path, entry: &str) -> io::Result<()> { +/// +/// Tags are extracted from two sources: +/// 1. `#tag` hashtags found inline in the entry text +/// 2. The explicit `extra_tags` parameter +/// +/// All tags are deduplicated and appended as `#tag` suffixes on the bullet. +pub fn append_entry(path: &Path, entry: &str, extra_tags: &[&str]) -> io::Result<()> { let trimmed = entry.trim_start_matches('#').trim(); if trimmed.is_empty() { return Err(io::Error::new( @@ -126,6 +151,26 @@ pub fn append_entry(path: &Path, entry: &str) -> io::Result<()> { )); } + // Extract inline tags from the entry, then strip them from the body + let inline_tags = extract_tags(trimmed); + let body = strip_tags(trimmed); + let body = body.trim(); + if body.is_empty() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "memory entry has only tags, no content", + )); + } + + // Merge and deduplicate tags + let mut all_tags: Vec<&str> = Vec::new(); + for t in inline_tags.into_iter().chain(extra_tags.iter().copied()) { + let tag = t.trim_start_matches('#'); + if !tag.is_empty() && !all_tags.contains(&tag) { + all_tags.push(tag); + } + } + if let Some(parent) = path.parent() && !parent.as_os_str().is_empty() { @@ -133,14 +178,156 @@ pub fn append_entry(path: &Path, entry: &str) -> io::Result<()> { } let timestamp = Utc::now().format("%Y-%m-%d %H:%M UTC"); + let tag_str = if all_tags.is_empty() { + String::new() + } else { + format!(" {}", all_tags.iter().map(|t| format!("#{t}")).collect::>().join(" ")) + }; let mut file = fs::OpenOptions::new() .create(true) .append(true) .open(path)?; - writeln!(file, "- ({timestamp}) {trimmed}")?; + writeln!(file, "- ({timestamp}) {body}{tag_str}")?; Ok(()) } +/// A parsed memory entry with structured fields: timestamp, body text, +/// and a deduplicated list of tags (without leading `#`). +#[derive(Debug, Clone)] +pub struct MemoryEntry { + pub timestamp: String, + pub body: String, + pub tags: Vec, + #[allow(dead_code)] + pub raw: String, +} + +/// Parse a single memory line into structured components. +/// +/// Format: `- (2026-06-22 10:30 UTC) body text #tag1 #tag2` +/// +/// Returns `None` for lines that don't match the expected format (blank +/// lines, non-bullet text, free-form markdown, etc.). +pub fn parse_entry(line: &str) -> Option { + let line = line.trim(); + if !line.starts_with("- (") { + return None; + } + let close_paren = line.find(')')?; + let timestamp = line[3..close_paren].to_string(); + let rest = line[close_paren + 1..].trim(); + if rest.is_empty() { + return None; + } + let tag_strs = extract_tags(rest); + let body = strip_tags(rest); + let body = body.trim(); + if body.is_empty() { + return None; + } + let mut seen = Vec::new(); + let tags: Vec = tag_strs + .iter() + .map(|t| t.trim_start_matches('#').to_string()) + .filter(|t| { + if seen.contains(t) { + false + } else { + seen.push(t.clone()); + true + } + }) + .collect(); + Some(MemoryEntry { + timestamp, + body: body.to_string(), + tags, + raw: line.to_string(), + }) +} + +/// Parse all bullet entries from memory file content. Non-bullet lines +/// (blank lines, free-form markdown) are silently skipped. +pub fn parse_all(content: &str) -> Vec { + content.lines().filter_map(parse_entry).collect() +} + +/// List all unique tags with their occurrence counts, sorted by frequency +/// (most frequent first). Tags are returned without the leading `#`. +pub fn list_tags(content: &str) -> Vec<(String, usize)> { + let entries = parse_all(content); + let mut counts: HashMap = HashMap::new(); + for entry in &entries { + for tag in &entry.tags { + *counts.entry(tag.clone()).or_insert(0) += 1; + } + } + let mut result: Vec<_> = counts.into_iter().collect(); + result.sort_by(|a, b| b.1.cmp(&a.1)); + result +} + +/// Filter entries that match any of the given tags (OR logic). Tag +/// matching is case-sensitive and supports both `#tag` and `tag` forms. +pub fn search_by_tags<'a>(entries: &'a [MemoryEntry], tags: &[&str]) -> Vec<&'a MemoryEntry> { + if tags.is_empty() { + return entries.iter().collect(); + } + let normalized: Vec = tags + .iter() + .map(|t| t.trim_start_matches('#').to_string()) + .collect(); + entries + .iter() + .filter(|e| normalized.iter().any(|t| e.tags.iter().any(|et| et == t))) + .collect() +} + +/// Search entries by text content (case-insensitive substring match against +/// both body and tags). +pub fn search_text<'a>(entries: &'a [MemoryEntry], query: &str) -> Vec<&'a MemoryEntry> { + let q = query.to_lowercase(); + entries + .iter() + .filter(|e| { + e.body.to_lowercase().contains(&q) + || e.tags.iter().any(|t| t.to_lowercase().contains(&q)) + }) + .collect() +} + +/// Simple auto-tagging for entries that have no explicit tags. Extracts +/// capitalized words (potential proper nouns / technical terms) and +/// words containing special characters (camelCase, snake_case, etc.) +/// as candidate tags. Returns at most `max_tags` tags, sorted by quality. +pub fn auto_tag(text: &str, max_tags: usize) -> Vec { + let mut candidates: Vec = Vec::new(); + let mut seen = std::collections::HashSet::new(); + + for word in text.split_whitespace() { + let clean = word.trim_matches(|c: char| c.is_ascii_punctuation()); + if clean.len() < 3 || clean.chars().all(|c| c.is_ascii_digit()) { + continue; + } + // Capitalized words (proper nouns / technical terms) + if clean.starts_with(|c: char| c.is_uppercase()) { + let tag = clean.to_lowercase(); + if seen.insert(tag.clone()) { + candidates.push(tag); + } + } + // Words with non-alphanumeric chars (camelCase, snake_case, namespaced) + if clean.contains(|c: char| !c.is_alphanumeric() && c != '\'') { + let tag = clean.to_lowercase(); + if seen.insert(tag.clone()) { + candidates.push(tag); + } + } + } + candidates.truncate(max_tags); + candidates +} + #[cfg(test)] mod tests { use super::*; @@ -249,7 +436,7 @@ mod tests { fn append_entry_creates_file_and_writes_one_bullet() { let tmp = tempdir().unwrap(); let path = tmp.path().join("memory.md"); - append_entry(&path, "# remember the milk").unwrap(); + append_entry(&path, "# remember the milk", &[]).unwrap(); let body = fs::read_to_string(&path).unwrap(); assert!(body.contains("remember the milk"), "{body}"); @@ -258,14 +445,16 @@ mod tests { "should start with bullet + date: {body}" ); assert!(body.trim_end().ends_with("remember the milk")); + // No tags appended + assert!(!body.contains('#'), "no tags expected: {body}"); } #[test] fn append_entry_appends_subsequent_lines() { let tmp = tempdir().unwrap(); let path = tmp.path().join("memory.md"); - append_entry(&path, "# first").unwrap(); - append_entry(&path, "second").unwrap(); + append_entry(&path, "# first", &[]).unwrap(); + append_entry(&path, "second", &[]).unwrap(); let body = fs::read_to_string(&path).unwrap(); assert!(body.contains("first")); assert!(body.contains("second")); @@ -277,7 +466,257 @@ mod tests { fn append_entry_rejects_empty_after_strip() { let tmp = tempdir().unwrap(); let path = tmp.path().join("memory.md"); - let err = append_entry(&path, "###").unwrap_err(); + let err = append_entry(&path, "###", &[]).unwrap_err(); + assert_eq!(err.kind(), io::ErrorKind::InvalidInput); + } + + #[test] + fn append_entry_stores_inline_tags() { + let tmp = tempdir().unwrap(); + let path = tmp.path().join("memory.md"); + append_entry(&path, "# use 4 spaces #indentation #rust", &[]).unwrap(); + let body = fs::read_to_string(&path).unwrap(); + assert!(body.contains("use 4 spaces"), "{body}"); + assert!(body.contains("#indentation"), "{body}"); + assert!(body.contains("#rust"), "{body}"); + // Tags appear as suffix after body, not inline within the body text + assert!( + body.contains("use 4 spaces #indentation"), + "tags should be appended as suffix: {body}" + ); + } + + #[test] + fn append_entry_merges_extra_tags_with_inline_tags() { + let tmp = tempdir().unwrap(); + let path = tmp.path().join("memory.md"); + append_entry(&path, "use tabs #preference", &["editor", "preference"]).unwrap(); + let body = fs::read_to_string(&path).unwrap(); + assert!(body.contains("use tabs"), "{body}"); + assert!(body.contains("#preference"), "{body}"); + assert!(body.contains("#editor"), "{body}"); + } + + #[test] + fn append_entry_deduplicates_tags() { + let tmp = tempdir().unwrap(); + let path = tmp.path().join("memory.md"); + append_entry(&path, "note #dupe", &["dupe", "unique"]).unwrap(); + let body = fs::read_to_string(&path).unwrap(); + // "#dupe" should appear only once + assert_eq!(body.matches("#dupe").count(), 1, "duplicate tag: {body}"); + assert!(body.contains("#unique"), "{body}"); + } + + #[test] + fn append_entry_rejects_only_tags_no_body() { + let tmp = tempdir().unwrap(); + let path = tmp.path().join("memory.md"); + let err = append_entry(&path, "# #tag #only", &[]).unwrap_err(); assert_eq!(err.kind(), io::ErrorKind::InvalidInput); } + + #[test] + fn extract_tags_parses_hashtags() { + let tags = extract_tags("hello #world this #is #a test"); + assert_eq!(tags, vec!["#world", "#is", "#a"]); + } + + #[test] + fn extract_tags_ignores_double_hash() { + let tags = extract_tags("hello ##world #valid"); + assert_eq!(tags, vec!["#valid"]); + } + + #[test] + fn extract_tags_returns_empty_for_no_tags() { + let tags = extract_tags("hello world"); + assert!(tags.is_empty()); + } + + // === parse_entry / parse_all === + + #[test] + fn parse_entry_parses_standard_bullet() { + let entry = parse_entry("- (2026-06-22 10:30 UTC) remember the milk #chore").unwrap(); + assert_eq!(entry.timestamp, "2026-06-22 10:30 UTC"); + assert_eq!(entry.body, "remember the milk"); + assert_eq!(entry.tags, vec!["chore"]); + } + + #[test] + fn parse_entry_returns_none_for_non_bullet() { + assert!(parse_entry("free form text").is_none()); + assert!(parse_entry("").is_none()); + assert!(parse_entry(" ").is_none()); + } + + #[test] + fn parse_entry_handles_multi_tag() { + let entry = + parse_entry("- (2026-06-22 10:30 UTC) use 4 spaces #indentation #rust #style").unwrap(); + assert_eq!(entry.body, "use 4 spaces"); + assert_eq!(entry.tags, vec!["indentation", "rust", "style"]); + } + + #[test] + fn parse_entry_deduplicates_tags() { + let entry = + parse_entry("- (2026-06-22 10:30 UTC) note #dupe #unique #dupe").unwrap(); + assert_eq!(entry.tags, vec!["dupe", "unique"]); + } + + #[test] + fn parse_entry_handles_no_tags() { + let entry = parse_entry("- (2026-06-22 10:30 UTC) plain note").unwrap(); + assert_eq!(entry.body, "plain note"); + assert!(entry.tags.is_empty()); + } + + #[test] + fn parse_all_skips_non_bullet_lines() { + let content = "\ +- (2026-06-22 10:30 UTC) first #tag1 +some free text +- (2026-06-22 11:00 UTC) second #tag2 + +- (2026-06-22 12:00 UTC) third #tag3"; + let entries = parse_all(content); + assert_eq!(entries.len(), 3); + assert_eq!(entries[0].body, "first"); + assert_eq!(entries[1].body, "second"); + assert_eq!(entries[2].body, "third"); + } + + #[test] + fn parse_all_returns_empty_for_empty_content() { + assert!(parse_all("").is_empty()); + assert!(parse_all(" \n\n ").is_empty()); + } + + // === list_tags === + + #[test] + fn list_tags_returns_sorted_counts() { + let content = "\ +- (2026-06-22 10:00 UTC) a #rust #cli +- (2026-06-22 11:00 UTC) b #rust #web +- (2026-06-22 12:00 UTC) c #cli"; + let tags = list_tags(content); + assert_eq!(tags.len(), 3); + // Most frequent first + assert!(tags[0].0 == "rust" || tags[0].0 == "cli"); + assert_eq!(tags.iter().find(|(t, _)| t == "rust").unwrap().1, 2); + assert_eq!(tags.iter().find(|(t, _)| t == "cli").unwrap().1, 2); + assert_eq!(tags.iter().find(|(t, _)| t == "web").unwrap().1, 1); + } + + #[test] + fn list_tags_returns_empty_when_no_entries() { + assert!(list_tags("").is_empty()); + } + + // === search_by_tags === + + #[test] + fn search_by_tags_finds_matching_entries() { + let entries = parse_all( + "\ +- (2026-06-22 10:00 UTC) first #rust +- (2026-06-22 11:00 UTC) second #python +- (2026-06-22 12:00 UTC) third #rust #web", + ); + let results = search_by_tags(&entries, &["rust"]); + assert_eq!(results.len(), 2); + assert!(results.iter().any(|e| e.body == "first")); + assert!(results.iter().any(|e| e.body == "third")); + } + + #[test] + fn search_by_tags_accepts_hash_prefix() { + let entries = parse_all("- (2026-06-22 10:00 UTC) note #mytag"); + let results = search_by_tags(&entries, &["#mytag"]); + assert_eq!(results.len(), 1); + } + + #[test] + fn search_by_tags_or_logic() { + let entries = parse_all( + "\ +- (2026-06-22 10:00 UTC) first #rust +- (2026-06-22 11:00 UTC) second #python", + ); + let results = search_by_tags(&entries, &["rust", "python"]); + assert_eq!(results.len(), 2); + } + + #[test] + fn search_by_tags_returns_all_when_empty() { + let entries = parse_all( + "\ +- (2026-06-22 10:00 UTC) first #rust +- (2026-06-22 11:00 UTC) second #python", + ); + let results = search_by_tags(&entries, &[]); + assert_eq!(results.len(), 2); + } + + #[test] + fn search_by_tags_no_match() { + let entries = parse_all("- (2026-06-22 10:00 UTC) note #rust"); + let results = search_by_tags(&entries, &["nonexistent"]); + assert!(results.is_empty()); + } + + // === search_text === + + #[test] + fn search_text_case_insensitive() { + let entries = parse_all("- (2026-06-22 10:00 UTC) Use Four Spaces"); + let results = search_text(&entries, "four"); + assert_eq!(results.len(), 1); + let results = search_text(&entries, "FOUR"); + assert_eq!(results.len(), 1); + } + + #[test] + fn search_text_matches_tags() { + let entries = parse_all("- (2026-06-22 10:00 UTC) note #indentation"); + let results = search_text(&entries, "indentation"); + assert_eq!(results.len(), 1); + } + + #[test] + fn search_text_no_match() { + let entries = parse_all("- (2026-06-22 10:00 UTC) note #rust"); + let results = search_text(&entries, "python"); + assert!(results.is_empty()); + } + + // === auto_tag === + + #[test] + fn auto_tag_extracts_capitalized_words() { + let tags = auto_tag("use DeepSeek V4 in CodeWhale", 5); + assert!(tags.contains(&"deepseek".to_string())); + assert!(tags.contains(&"codewhale".to_string())); + } + + #[test] + fn auto_tag_handles_snake_case() { + let tags = auto_tag("check the memory_manager config", 5); + assert!(tags.contains(&"memory_manager".to_string())); + } + + #[test] + fn auto_tag_respects_max_tags() { + let tags = auto_tag("Foo Bar Baz Qux Quux", 3); + assert_eq!(tags.len(), 3); + } + + #[test] + fn auto_tag_returns_empty_for_no_candidates() { + let tags = auto_tag("a be in it", 5); + assert!(tags.is_empty()); + } } diff --git a/crates/tui/src/memory_index.rs b/crates/tui/src/memory_index.rs new file mode 100644 index 0000000000..5fb4f84117 --- /dev/null +++ b/crates/tui/src/memory_index.rs @@ -0,0 +1,361 @@ +use std::collections::{HashMap, HashSet}; + +use crate::memory::MemoryEntry; + +/// Lightweight inverted index over memory entries. +/// +/// Maintains two indices: +/// - **Tag index**: maps each tag (lowercased, without `#`) to the set of +/// entry indices that carry it. +/// - **Full-text index**: maps each word (lowercased) to the set of entry +/// indices whose body or tags contain it. +/// +/// The index is rebuilt from scratch each time the memory file changes, +/// keeping the implementation simple and avoiding stale-entry bugs. +pub struct MemoryIndex { + /// Entries in display order (oldest first). + entries: Vec, + /// Inverted index: tag → entry indices. + tag_index: HashMap>, + /// Full-text index: word → entry indices. + text_index: HashMap>, +} + +impl MemoryIndex { + /// Build an index from parsed memory entries. + #[must_use] + pub fn build(entries: Vec) -> Self { + let mut tag_index: HashMap> = HashMap::new(); + let mut text_index: HashMap> = HashMap::new(); + + for (i, entry) in entries.iter().enumerate() { + // Index tags + for tag in &entry.tags { + let key = tag.to_lowercase(); + tag_index.entry(key).or_default().push(i); + } + // Index body words + for word in entry.body.split_whitespace() { + let clean: String = word + .chars() + .filter(|c| c.is_alphanumeric() || *c == '_' || *c == '-') + .collect(); + if clean.len() >= 2 { + text_index.entry(clean.to_lowercase()).or_default().push(i); + } + } + // Index tag strings as text too + for tag in &entry.tags { + for word in tag.split(|c: char| !c.is_alphanumeric()) { + if word.len() >= 2 { + text_index + .entry(word.to_lowercase()) + .or_default() + .push(i); + } + } + } + } + + // Deduplicate index entries (same entry may contribute a word multiple times) + for indices in tag_index.values_mut() { + indices.sort_unstable(); + indices.dedup(); + } + for indices in text_index.values_mut() { + indices.sort_unstable(); + indices.dedup(); + } + + Self { + entries, + tag_index, + text_index, + } + } + + /// Rebuild the index from memory file content. + #[must_use] + pub fn from_content(content: &str) -> Self { + Self::build(crate::memory::parse_all(content)) + } + + /// Return a reference to the underlying entries. + #[must_use] + pub fn entries(&self) -> &[MemoryEntry] { + &self.entries + } + + /// Search by tags (OR logic — any matching tag). Returns matching + /// entries in display order. + #[allow(dead_code)] + #[must_use] + pub fn search_by_tags(&self, tags: &[&str]) -> Vec<&MemoryEntry> { + if tags.is_empty() { + return self.entries.iter().collect(); + } + let mut matched = HashSet::new(); + for tag in tags { + let key = tag.trim_start_matches('#').to_lowercase(); + if let Some(indices) = self.tag_index.get(&key) { + for &i in indices { + matched.insert(i); + } + } + } + let mut indices: Vec = matched.into_iter().collect(); + indices.sort_unstable(); + indices.iter().map(|&i| &self.entries[i]).collect() + } + + /// Full-text search (AND logic — all query words must match). Returns + /// matching entries in display order. + #[must_use] + pub fn search_text(&self, query: &str) -> Vec<&MemoryEntry> { + let words: Vec = query + .split_whitespace() + .filter_map(|w| { + let clean: String = w + .chars() + .filter(|c| c.is_alphanumeric() || *c == '_' || *c == '-') + .collect(); + if clean.len() >= 2 { + Some(clean.to_lowercase()) + } else { + None + } + }) + .collect(); + + if words.is_empty() { + return self.entries.iter().collect(); + } + + // Find intersection of all word matches + let mut result: Option> = None; + for word in &words { + if let Some(indices) = self.text_index.get(word) { + let set: HashSet = indices.iter().copied().collect(); + result = match result { + Some(existing) => Some(existing.intersection(&set).copied().collect()), + None => Some(set), + }; + } else { + // A required word has no matches → empty result + return Vec::new(); + } + } + + let mut indices: Vec = result.unwrap_or_default().into_iter().collect(); + indices.sort_unstable(); + indices.iter().map(|&i| &self.entries[i]).collect() + } + + /// Combined search: filter by tags (OR) and text (AND). + /// Returns entries that match both criteria. + #[must_use] + pub fn search(&self, tags: &[&str], text: Option<&str>) -> Vec<&MemoryEntry> { + if tags.is_empty() && text.is_none() { + return self.entries.iter().collect(); + } + + let entry_set: HashSet = (0..self.entries.len()).collect(); + + let tag_indices: HashSet = if tags.is_empty() { + entry_set.clone() + } else { + let mut s = HashSet::new(); + for tag in tags { + let key = tag.trim_start_matches('#').to_lowercase(); + if let Some(indices) = self.tag_index.get(&key) { + for &i in indices { + s.insert(i); + } + } + } + s + }; + + let text_indices: HashSet = if let Some(query) = text { + let words: Vec = query + .split_whitespace() + .filter_map(|w| { + let clean: String = w + .chars() + .filter(|c| c.is_alphanumeric() || *c == '_' || *c == '-') + .collect(); + if clean.len() >= 2 { + Some(clean.to_lowercase()) + } else { + None + } + }) + .collect(); + if words.is_empty() { + entry_set + } else { + let mut result: Option> = None; + for word in &words { + if let Some(indices) = self.text_index.get(word) { + let set: HashSet = indices.iter().copied().collect(); + result = Some(match result { + Some(existing) => existing.intersection(&set).copied().collect(), + None => set, + }); + } else { + return Vec::new(); + } + } + result.unwrap_or_default() + } + } else { + entry_set + }; + + let mut indices: Vec = tag_indices + .intersection(&text_indices) + .copied() + .collect(); + indices.sort_unstable(); + indices.iter().map(|&i| &self.entries[i]).collect() + } + + /// Get all unique tags with their occurrence counts, sorted by + /// frequency (most frequent first). + #[must_use] + pub fn all_tags(&self) -> Vec<(String, usize)> { + let mut counts: HashMap = HashMap::new(); + for (_tag, indices) in &self.tag_index { + counts.insert(_tag.clone(), indices.len()); + } + let mut result: Vec<_> = counts.into_iter().collect(); + result.sort_by(|a, b| b.1.cmp(&a.1)); + result + } + + /// Number of entries in the index. + #[must_use] + pub fn len(&self) -> usize { + self.entries.len() + } + + /// Whether the index is empty. + #[must_use] + pub fn is_empty(&self) -> bool { + self.entries.is_empty() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn sample_entries() -> Vec { + crate::memory::parse_all( + "\ +- (2026-06-22 10:00 UTC) first entry about Rust #rust +- (2026-06-22 11:00 UTC) python web framework #python #web +- (2026-06-22 12:00 UTC) rust cli tooling #rust #cli +- (2026-06-22 13:00 UTC) web design patterns #web", + ) + } + + #[test] + fn index_builds_from_entries() { + let index = MemoryIndex::build(sample_entries()); + assert_eq!(index.len(), 4); + assert!(!index.is_empty()); + } + + #[test] + fn index_from_content_parses_and_indexes() { + let index = MemoryIndex::from_content( + "- (2026-06-22 10:00 UTC) test entry #test", + ); + assert_eq!(index.len(), 1); + let tags = index.all_tags(); + assert_eq!(tags.len(), 1); + assert_eq!(tags[0].0, "test"); + } + + #[test] + fn search_by_tags_or() { + let index = MemoryIndex::build(sample_entries()); + let results = index.search_by_tags(&["rust"]); + assert_eq!(results.len(), 2); + assert!(results.iter().any(|e| e.body.contains("first entry"))); + assert!(results.iter().any(|e| e.body.contains("cli tooling"))); + } + + #[test] + fn search_by_tags_multiple() { + let index = MemoryIndex::build(sample_entries()); + let results = index.search_by_tags(&["python", "cli"]); + assert_eq!(results.len(), 2); + } + + #[test] + fn search_by_tags_empty_returns_all() { + let index = MemoryIndex::build(sample_entries()); + let results = index.search_by_tags(&[]); + assert_eq!(results.len(), 4); + } + + #[test] + fn search_text_and() { + let index = MemoryIndex::build(sample_entries()); + // "rust framework" → intersection: entry 0 has "rust", entry 1 has "framework" + // None should have both + let results = index.search_text("rust framework"); + assert!(results.is_empty()); + } + + #[test] + fn search_text_single_word() { + let index = MemoryIndex::build(sample_entries()); + let results = index.search_text("python"); + assert_eq!(results.len(), 1); + } + + #[test] + fn search_text_case_insensitive() { + let index = MemoryIndex::build(sample_entries()); + let results = index.search_text("RUST"); + assert_eq!(results.len(), 2); + } + + #[test] + fn search_combined() { + let index = MemoryIndex::build(sample_entries()); + let results = index.search(&["web"], Some("patterns")); + assert_eq!(results.len(), 1); + assert!(results[0].body.contains("design patterns")); + } + + #[test] + fn search_no_match() { + let index = MemoryIndex::build(sample_entries()); + let results = index.search(&["nonexistent"], None); + assert!(results.is_empty()); + } + + #[test] + fn all_tags_sorted_by_frequency() { + let index = MemoryIndex::build(sample_entries()); + let tags = index.all_tags(); + assert!(tags.iter().any(|(t, _)| t == "rust")); + assert!(tags.iter().any(|(t, _)| t == "web")); + assert!(tags.iter().any(|(t, _)| t == "python")); + assert!(tags.iter().any(|(t, _)| t == "cli")); + } + + #[test] + fn empty_index() { + let index = MemoryIndex::build(vec![]); + assert!(index.is_empty()); + assert_eq!(index.len(), 0); + assert!(index.all_tags().is_empty()); + assert!(index.search_by_tags(&["anything"]).is_empty()); + assert!(index.search_text("anything").is_empty()); + } +} diff --git a/crates/tui/src/seam_manager.rs b/crates/tui/src/seam_manager.rs index e59575321c..6952dfcbcf 100644 --- a/crates/tui/src/seam_manager.rs +++ b/crates/tui/src/seam_manager.rs @@ -104,6 +104,10 @@ pub struct SeamMetadata { /// Model that produced it. #[allow(dead_code)] pub model: String, + /// Tags inferred from the summarized conversation segment. + /// Used to cross-reference with user memory for enriched summaries. + #[allow(dead_code)] + pub tags: Vec, } /// The Flash seam manager — produces `` blocks. @@ -156,8 +160,44 @@ impl SeamManager { message_count.saturating_sub(verbatim_messages) } + /// Extract topic tags from a set of messages. + /// Scans message content for capitalized technical terms, file paths, + /// and common topic patterns. Used to cross-reference with user memory. + #[must_use] + pub fn extract_topic_tags(messages: &[&Message]) -> Vec { + let mut candidates: Vec = Vec::new(); + let mut seen = std::collections::HashSet::new(); + for msg in messages { + for block in &msg.content { + if let ContentBlock::Text { text, .. } = block { + for word in text.split_whitespace() { + let clean = word.trim_matches(|c: char| c.is_ascii_punctuation()); + if clean.len() < 4 { + continue; + } + // Capitalized words (technologies, frameworks, languages) + if clean.starts_with(|c: char| c.is_uppercase()) + && !clean.starts_with(|c: char| c.is_ascii_digit()) + { + let lower = clean.to_lowercase(); + if seen.insert(lower.clone()) { + candidates.push(lower); + } + } + } + } + } + } + candidates.truncate(10); + candidates + } + /// Produce a soft seam for the given message range and level. /// + /// `memory_context` is optional — when provided, it is included in the + /// summarization prompt so the seam can reference user memory preferences + /// relevant to the conversation segment. + /// /// Returns the `` XML block as a string, ready to /// be appended as an assistant message. pub async fn produce_soft_seam( @@ -168,6 +208,7 @@ impl SeamManager { end_idx: usize, workspace: Option<&Path>, pinned_indices: &[usize], + memory_context: Option<&str>, ) -> Result { if messages.is_empty() || start_idx >= end_idx { return Ok(String::new()); @@ -203,8 +244,11 @@ impl SeamManager { return Ok(String::new()); } + // Extract topic tags for seam metadata + let topic_tags = Self::extract_topic_tags(&to_summarize); + let summary = self - .summarize_messages(&to_summarize, level, start_idx, end_idx) + .summarize_messages(&to_summarize, level, start_idx, end_idx, memory_context) .await?; let density_label = match level { @@ -227,6 +271,7 @@ impl SeamManager { token_estimate, timestamp, model: self.config.seam_model.clone(), + tags: topic_tags, }); } @@ -243,6 +288,9 @@ impl SeamManager { /// Re-compact existing seams into a higher-level block. Consumes prior /// `` content and fuses it with new messages. + /// + /// `memory_context` is optional user-memory context to include in the + /// recompaction prompt for enriched seam quality. pub async fn recompact( &self, existing_seams: &[String], @@ -250,6 +298,7 @@ impl SeamManager { level: u8, start_idx: usize, end_idx: usize, + memory_context: Option<&str>, ) -> Result { let mut input = String::from( "## Prior Context Summaries\n\n\ @@ -279,6 +328,11 @@ impl SeamManager { _ => (L3_MAX_TOKENS, 400), }; + let memory_section = memory_context + .filter(|c| !c.trim().is_empty()) + .map(|c| format!("\n\n## Relevant User Memory\n\n{c}\n")) + .unwrap_or_default(); + let request = MessageRequest { model: self.config.seam_model.clone(), messages: vec![Message { @@ -289,7 +343,7 @@ impl SeamManager { Preserve: decisions made, file paths, error messages, \ constraints, hypotheses, open questions, and task state. \ Drop: greeting, filler, repeated information. \ - Keep it under {word_limit} words.\n\n{input}" + Keep it under {word_limit} words.\n\n{input}{memory_section}" ), cache_control: None, }], @@ -329,6 +383,9 @@ impl SeamManager { let token_estimate = summary.len() / 4; let timestamp = Utc::now(); + // Extract topic tags from recompacted messages + let topic_tags = Self::extract_topic_tags(new_messages); + // Record this recompacted seam. { let mut seams = self.active_seams.lock().await; @@ -339,6 +396,7 @@ impl SeamManager { token_estimate, timestamp, model: self.config.seam_model.clone(), + tags: topic_tags, }); } @@ -359,6 +417,7 @@ impl SeamManager { level: u8, start_idx: usize, end_idx: usize, + memory_context: Option<&str>, ) -> Result { let mut conversation = String::new(); @@ -392,6 +451,11 @@ impl SeamManager { } } + let memory_section = memory_context + .filter(|c| !c.trim().is_empty()) + .map(|c| format!("\n\n## Relevant User Memory\n\nConsider these user preferences and conventions from the user's memory file (they may be relevant to the conversation segment):\n\n{c}\n")) + .unwrap_or_default(); + let (max_tokens, word_limit) = match level { 1 => (L1_MAX_TOKENS, 800), 2 => (L2_MAX_TOKENS, 600), @@ -410,7 +474,7 @@ impl SeamManager { command invocations, error messages, tool-result facts, constraints \ discovered, hypotheses being tested, and open questions. \ Drop: greetings, filler, repeated information, and thinking blocks. \ - Keep it under {word_limit} words.\n\n---\n\n{conversation}" + Keep it under {word_limit} words.{memory_section}\n\n---\n\n{conversation}" ), cache_control: None, }], diff --git a/crates/tui/src/tools/remember.rs b/crates/tui/src/tools/remember.rs index 05b6ff5dd2..612b053003 100644 --- a/crates/tui/src/tools/remember.rs +++ b/crates/tui/src/tools/remember.rs @@ -43,6 +43,11 @@ impl ToolSpec for RememberTool { "note": { "type": "string", "description": "The single-sentence durable note to remember." + }, + "tags": { + "type": "array", + "items": { "type": "string", "description": "A hashtag (with or without leading #)" }, + "description": "Optional tags to attach to this entry for future retrieval. Use tags like \"project:codewhale\", \"type:preference\", or \"scope:config\"." } }, "required": ["note"] @@ -69,13 +74,49 @@ impl ToolSpec for RememberTool { ) })?; - crate::memory::append_entry(path, note).map_err(|err| { + // Extract optional tags, normalizing leading # + let tags: Vec = input + .get("tags") + .and_then(|v| v.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str()) + .map(|t| { + let trimmed = t.trim().trim_start_matches('#'); + if trimmed.is_empty() { String::new() } else { trimmed.to_string() } + }) + .filter(|t| !t.is_empty()) + .collect() + }) + .unwrap_or_default(); + + // Auto-tag when the model didn't provide explicit tags + let final_tags: Vec = if tags.is_empty() { + crate::memory::auto_tag(note, 5) + } else { + tags + }; + let tag_refs: Vec<&str> = final_tags.iter().map(String::as_str).collect(); + crate::memory::append_entry(path, note, &tag_refs).map_err(|err| { ToolError::execution_failed(format!("failed to append to {}: {err}", path.display())) })?; + let tag_msg = if final_tags.is_empty() { + String::new() + } else { + format!( + " [{}]", + final_tags + .iter() + .map(|t| format!("#{t}")) + .collect::>() + .join(" ") + ) + }; Ok(ToolResult::success(format!( - "remembered: {}", - note.trim_start_matches('#').trim() + "remembered: {}{}", + note.trim_start_matches('#').trim(), + tag_msg ))) } } @@ -135,4 +176,101 @@ mod tests { let err = tool.execute(json!({}), &ctx).await.unwrap_err(); assert!(err.to_string().to_lowercase().contains("note"), "{err}"); } + + #[tokio::test] + async fn appends_with_tags() { + let tmp = tempdir().unwrap(); + let path = tmp.path().join("memory.md"); + let ctx = ctx_with_memory(path.clone()); + + let tool = RememberTool; + let result = tool + .execute(json!({"note": "use 4 spaces", "tags": ["indentation", "rust"]}), &ctx) + .await + .expect("ok"); + assert!(result.success); + + let body = std::fs::read_to_string(&path).expect("read"); + assert!(body.contains("use 4 spaces"), "{body}"); + assert!(body.contains("#indentation"), "{body}"); + assert!(body.contains("#rust"), "{body}"); + } + + #[tokio::test] + async fn appends_with_tags_normalizes_leading_hash() { + let tmp = tempdir().unwrap(); + let path = tmp.path().join("memory.md"); + let ctx = ctx_with_memory(path.clone()); + + let tool = RememberTool; + let result = tool + .execute(json!({"note": "prefer tabs", "tags": ["#indentation", " #spacing"]}), &ctx) + .await + .expect("ok"); + assert!(result.success); + + let body = std::fs::read_to_string(&path).expect("read"); + assert!(body.contains("#indentation"), "{body}"); + assert!(body.contains("#spacing"), "{body}"); + } + + #[tokio::test] + async fn appends_with_empty_tags_skips() { + let tmp = tempdir().unwrap(); + let path = tmp.path().join("memory.md"); + let ctx = ctx_with_memory(path.clone()); + + let tool = RememberTool; + let result = tool + .execute(json!({"note": "bare note", "tags": []}), &ctx) + .await + .expect("ok"); + assert!(result.success); + + let body = std::fs::read_to_string(&path).expect("read"); + assert!(body.contains("bare note"), "{body}"); + assert!(!body.contains('#'), "no tag char expected: {body}"); + } + + #[tokio::test] + async fn auto_tags_when_no_tags_provided() { + let tmp = tempdir().unwrap(); + let path = tmp.path().join("memory.md"); + let ctx = ctx_with_memory(path.clone()); + + let tool = RememberTool; + let result = tool + .execute(json!({"note": "Use CodeWhale with DeepSeek V4"}), &ctx) + .await + .expect("ok"); + assert!(result.success); + // auto_tag should extract "codewhale" and "deepseek" from capitalized words + assert!(result.content.contains("#codewhale"), "result: {}", result.content); + assert!(result.content.contains("#deepseek"), "result: {}", result.content); + + let body = std::fs::read_to_string(&path).expect("read"); + assert!(body.contains("Use CodeWhale with DeepSeek V4"), "{body}"); + assert!(body.contains("#codewhale"), "{body}"); + assert!(body.contains("#deepseek"), "{body}"); + } + + #[tokio::test] + async fn explicit_tags_override_auto_tag() { + let tmp = tempdir().unwrap(); + let path = tmp.path().join("memory.md"); + let ctx = ctx_with_memory(path.clone()); + + let tool = RememberTool; + let result = tool + .execute(json!({"note": "Use CodeWhale", "tags": ["manual"]}), &ctx) + .await + .expect("ok"); + assert!(result.success); + // Should NOT auto-tag since explicit tags were provided + assert!(result.content.contains("#manual"), "result: {}", result.content); + assert!(!result.content.contains("#codewhale"), "should not auto-tag: {}", result.content); + + let body = std::fs::read_to_string(&path).expect("read"); + assert!(body.contains("#manual"), "{body}"); + } } diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 51a2e87380..7d2090f4b2 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -1044,9 +1044,24 @@ fn is_memory_quick_add(input: &str) -> bool { /// memory directory becomes visible without crashing the composer. fn handle_memory_quick_add(app: &mut App, input: &str, config: &Config) { let path = config.memory_path(); - match crate::memory::append_entry(&path, input) { + let inline_tags = crate::memory::extract_tags(input); + match crate::memory::append_entry(&path, input, &[]) { Ok(()) => { - app.status_message = Some(format!("memory: appended to {}", path.display())); + let tag_hint = if inline_tags.is_empty() { + let note = input.trim_start_matches('#').trim(); + let auto_tags = crate::memory::auto_tag(note, 3); + if auto_tags.is_empty() { + String::new() + } else { + format!( + " [auto: {}]", + auto_tags.iter().map(|t| format!("#{t}")).collect::>().join(" ") + ) + } + } else { + format!(" [{}]", inline_tags.join(" ")) + }; + app.status_message = Some(format!("memory: appended to {}{}", path.display(), tag_hint)); } Err(err) => { app.status_message = Some(format!( From 6d1c487069e3e65ae8d4b1a5baee9ed3f9416d25 Mon Sep 17 00:00:00 2001 From: laopan <147567034@qq.com> Date: Mon, 22 Jun 2026 14:31:41 +0800 Subject: [PATCH 2/3] fix(memory): case-insensitive tag search, auto_tag dedup, remove stale dead_code - search_by_tags: case-insensitive matching (was case-sensitive in memory.rs, inconsistent with memory_index.rs) - auto_tag: change redundant if/if to if/else for capitalized+special words - memory_index: remove stale #[allow(dead_code)] on search_by_tags (used by engine.rs) --- crates/tui/src/memory.rs | 10 ++++------ crates/tui/src/memory_index.rs | 1 - 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/crates/tui/src/memory.rs b/crates/tui/src/memory.rs index d9df1ec1ca..df43508c9b 100644 --- a/crates/tui/src/memory.rs +++ b/crates/tui/src/memory.rs @@ -268,18 +268,18 @@ pub fn list_tags(content: &str) -> Vec<(String, usize)> { } /// Filter entries that match any of the given tags (OR logic). Tag -/// matching is case-sensitive and supports both `#tag` and `tag` forms. +/// matching is case-insensitive and supports both `#tag` and `tag` forms. pub fn search_by_tags<'a>(entries: &'a [MemoryEntry], tags: &[&str]) -> Vec<&'a MemoryEntry> { if tags.is_empty() { return entries.iter().collect(); } let normalized: Vec = tags .iter() - .map(|t| t.trim_start_matches('#').to_string()) + .map(|t| t.trim_start_matches('#').to_lowercase()) .collect(); entries .iter() - .filter(|e| normalized.iter().any(|t| e.tags.iter().any(|et| et == t))) + .filter(|e| normalized.iter().any(|t| e.tags.iter().any(|et| et.to_lowercase() == *t))) .collect() } @@ -315,9 +315,7 @@ pub fn auto_tag(text: &str, max_tags: usize) -> Vec { if seen.insert(tag.clone()) { candidates.push(tag); } - } - // Words with non-alphanumeric chars (camelCase, snake_case, namespaced) - if clean.contains(|c: char| !c.is_alphanumeric() && c != '\'') { + } else if clean.contains(|c: char| !c.is_alphanumeric() && c != '\'') { let tag = clean.to_lowercase(); if seen.insert(tag.clone()) { candidates.push(tag); diff --git a/crates/tui/src/memory_index.rs b/crates/tui/src/memory_index.rs index 5fb4f84117..3613ee5e54 100644 --- a/crates/tui/src/memory_index.rs +++ b/crates/tui/src/memory_index.rs @@ -88,7 +88,6 @@ impl MemoryIndex { /// Search by tags (OR logic — any matching tag). Returns matching /// entries in display order. - #[allow(dead_code)] #[must_use] pub fn search_by_tags(&self, tags: &[&str]) -> Vec<&MemoryEntry> { if tags.is_empty() { From a95901db482e1c8cdc0ad563e4c24b74487434aa Mon Sep 17 00:00:00 2001 From: laopan <147567034@qq.com> Date: Mon, 22 Jun 2026 15:00:25 +0800 Subject: [PATCH 3/3] =?UTF-8?q?fix:=20address=20review=20feedback=20?= =?UTF-8?q?=E2=80=94=20trailing=20#,=20multi-byte=20whitespace,=20empty=20?= =?UTF-8?q?tag,=20case-sensitive=20dedup,=20sorted=20intersection?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - engine.rs: avoid trailing '#' when tags list is empty - memory.rs (commands): fix split_subcommand panic on multi-byte whitespace - memory.rs (commands): validate --tag is not empty before searching - memory.rs (core): case-insensitive tag dedup in append_entry - memory_index.rs: replace HashSet intersections with sorted slice O(m+n) - memory_index.rs: remove HashSet import --- .../tui/src/commands/groups/memory/memory.rs | 15 +- crates/tui/src/core/engine.rs | 8 +- crates/tui/src/memory.rs | 4 +- crates/tui/src/memory_index.rs | 140 +++++++++++------- 4 files changed, 107 insertions(+), 60 deletions(-) diff --git a/crates/tui/src/commands/groups/memory/memory.rs b/crates/tui/src/commands/groups/memory/memory.rs index fcd350ef91..c4c5e30f84 100644 --- a/crates/tui/src/commands/groups/memory/memory.rs +++ b/crates/tui/src/commands/groups/memory/memory.rs @@ -52,7 +52,15 @@ fn split_subcommand(arg: Option<&str>) -> (&str, Option<&str>) { Some(a) => { let trimmed = a.trim(); match trimmed.find(char::is_whitespace) { - Some(pos) => (&trimmed[..pos], Some(trimmed[pos + 1..].trim_start())), + Some(pos) => { + let sub = &trimmed[..pos]; + let rest = trimmed[pos..].trim_start(); + if rest.is_empty() { + (sub, None) + } else { + (sub, Some(rest)) + } + } None => (trimmed, None), } } @@ -152,6 +160,11 @@ pub fn memory(app: &mut App, arg: Option<&str>) -> CommandResult { // Check for --tag flag let results: Vec<&crate::memory::MemoryEntry> = if query.starts_with("--tag ") { let tag = query.trim_start_matches("--tag ").trim(); + if tag.is_empty() { + return CommandResult::error( + "Usage: /memory search --tag (tag must not be empty)", + ); + } crate::memory::search_by_tags(&entries, &[tag]) } else { crate::memory::search_text(&entries, query) diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 0aa206f675..a91cac04e2 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -2814,7 +2814,13 @@ impl Engine { }; matched .iter() - .map(|e| format!("- ({}) {} #{}", e.timestamp, e.body, e.tags.join(" #"))) + .map(|e| { + if e.tags.is_empty() { + format!("- ({}) {}", e.timestamp, e.body) + } else { + format!("- ({}) {} #{}", e.timestamp, e.body, e.tags.join(" #")) + } + }) .collect::>() .join("\n") }) diff --git a/crates/tui/src/memory.rs b/crates/tui/src/memory.rs index df43508c9b..947d5c57cf 100644 --- a/crates/tui/src/memory.rs +++ b/crates/tui/src/memory.rs @@ -162,11 +162,11 @@ pub fn append_entry(path: &Path, entry: &str, extra_tags: &[&str]) -> io::Result )); } - // Merge and deduplicate tags + // Merge and deduplicate tags (case-insensitive to avoid e.g. #Rust / #rust) let mut all_tags: Vec<&str> = Vec::new(); for t in inline_tags.into_iter().chain(extra_tags.iter().copied()) { let tag = t.trim_start_matches('#'); - if !tag.is_empty() && !all_tags.contains(&tag) { + if !tag.is_empty() && !all_tags.iter().any(|existing| existing.eq_ignore_ascii_case(tag)) { all_tags.push(tag); } } diff --git a/crates/tui/src/memory_index.rs b/crates/tui/src/memory_index.rs index 3613ee5e54..026bec7b2a 100644 --- a/crates/tui/src/memory_index.rs +++ b/crates/tui/src/memory_index.rs @@ -1,4 +1,4 @@ -use std::collections::{HashMap, HashSet}; +use std::collections::HashMap; use crate::memory::MemoryEntry; @@ -86,6 +86,26 @@ impl MemoryIndex { &self.entries } + /// Intersect two sorted, deduped slices and return the intersection + /// in sorted order (two-pointer merge, O(m+n)). + fn intersect_sorted(a: &[usize], b: &[usize]) -> Vec { + let mut result = Vec::new(); + let mut i = 0; + let mut j = 0; + while i < a.len() && j < b.len() { + if a[i] < b[j] { + i += 1; + } else if a[i] > b[j] { + j += 1; + } else { + result.push(a[i]); + i += 1; + j += 1; + } + } + result + } + /// Search by tags (OR logic — any matching tag). Returns matching /// entries in display order. #[must_use] @@ -93,18 +113,34 @@ impl MemoryIndex { if tags.is_empty() { return self.entries.iter().collect(); } - let mut matched = HashSet::new(); + let mut matched = Vec::new(); for tag in tags { let key = tag.trim_start_matches('#').to_lowercase(); if let Some(indices) = self.tag_index.get(&key) { for &i in indices { - matched.insert(i); + if !matched.contains(&i) { + matched.push(i); + } } } } - let mut indices: Vec = matched.into_iter().collect(); - indices.sort_unstable(); - indices.iter().map(|&i| &self.entries[i]).collect() + matched.sort_unstable(); + matched.iter().map(|&i| &self.entries[i]).collect() + } + + /// Union of multiple sorted, deduped slices. Each input is sorted + /// and deduped; the result is sorted and deduped (O(N) merge). + fn union_sorted(slices: &[&[usize]]) -> Vec { + let total: usize = slices.iter().map(|s| s.len()).sum(); + if total == 0 { + return Vec::new(); + } + // Collect all elements, sort, dedup + // This is simpler than an n-way merge and fast enough for our scale. + let mut all: Vec = slices.iter().flat_map(|s| s.iter().copied()).collect(); + all.sort_unstable(); + all.dedup(); + all } /// Full-text search (AND logic — all query words must match). Returns @@ -130,24 +166,21 @@ impl MemoryIndex { return self.entries.iter().collect(); } - // Find intersection of all word matches - let mut result: Option> = None; - for word in &words { - if let Some(indices) = self.text_index.get(word) { - let set: HashSet = indices.iter().copied().collect(); - result = match result { - Some(existing) => Some(existing.intersection(&set).copied().collect()), - None => Some(set), - }; - } else { - // A required word has no matches → empty result - return Vec::new(); + // Find intersection of all word matches using sorted slices + let mut iter = words.iter(); + let first = match iter.next().and_then(|w| self.text_index.get(w)) { + Some(v) => v.as_slice(), + None => return Vec::new(), + }; + + let result = iter.fold(first.to_vec(), |acc, word| { + match self.text_index.get(word) { + Some(indices) => Self::intersect_sorted(&acc, indices.as_slice()), + None => Vec::new(), } - } + }); - let mut indices: Vec = result.unwrap_or_default().into_iter().collect(); - indices.sort_unstable(); - indices.iter().map(|&i| &self.entries[i]).collect() + result.iter().map(|&i| &self.entries[i]).collect() } /// Combined search: filter by tags (OR) and text (AND). @@ -158,24 +191,23 @@ impl MemoryIndex { return self.entries.iter().collect(); } - let entry_set: HashSet = (0..self.entries.len()).collect(); - - let tag_indices: HashSet = if tags.is_empty() { - entry_set.clone() + let tag_indices: Vec = if tags.is_empty() { + (0..self.entries.len()).collect() } else { - let mut s = HashSet::new(); - for tag in tags { - let key = tag.trim_start_matches('#').to_lowercase(); - if let Some(indices) = self.tag_index.get(&key) { - for &i in indices { - s.insert(i); - } - } + let matched: Vec<&[usize]> = tags + .iter() + .filter_map(|tag| { + let key = tag.trim_start_matches('#').to_lowercase(); + self.tag_index.get(&key).map(|v| v.as_slice()) + }) + .collect(); + if matched.is_empty() { + return Vec::new(); } - s + Self::union_sorted(&matched) }; - let text_indices: HashSet = if let Some(query) = text { + let text_indices: Vec = if let Some(query) = text { let words: Vec = query .split_whitespace() .filter_map(|w| { @@ -191,32 +223,28 @@ impl MemoryIndex { }) .collect(); if words.is_empty() { - entry_set + (0..self.entries.len()).collect() } else { - let mut result: Option> = None; - for word in &words { - if let Some(indices) = self.text_index.get(word) { - let set: HashSet = indices.iter().copied().collect(); - result = Some(match result { - Some(existing) => existing.intersection(&set).copied().collect(), - None => set, - }); - } else { - return Vec::new(); + let mut iter = words.iter(); + let first = match iter.next().and_then(|w| self.text_index.get(w)) { + Some(v) => v.as_slice().to_vec(), + None => return Vec::new(), + }; + iter.fold(first, |acc, word| { + match self.text_index.get(word) { + Some(indices) => Self::intersect_sorted(&acc, indices.as_slice()), + None => Vec::new(), } - } - result.unwrap_or_default() + }) } } else { - entry_set + (0..self.entries.len()).collect() }; - let mut indices: Vec = tag_indices - .intersection(&text_indices) - .copied() - .collect(); - indices.sort_unstable(); - indices.iter().map(|&i| &self.entries[i]).collect() + Self::intersect_sorted(&tag_indices, &text_indices) + .iter() + .map(|&i| &self.entries[i]) + .collect() } /// Get all unique tags with their occurrence counts, sorted by