improve caching

PeterM45 · PeterM45 · commit ffcd81cfa8d5 · 2025-03-29T23:21:04.000-04:00
diff --git a/src/core/config.zig b/src/core/config.zig
@@ -12,6 +12,18 @@ pub const UI = struct {
     pub const TEXT_COLOR = 0x00303030;
     /// Selected item text color (white)
     pub const SELECTED_TEXT_COLOR = 0x00FFFFFF;
+    /// Average character width as a proportion of font height (for width estimation)
+    pub const AVG_CHAR_WIDTH_RATIO = 0.6;
+    /// Default popup width (pixels)
+    pub const DEFAULT_POPUP_WIDTH = 200;
+    /// Default popup height (pixels)
+    pub const DEFAULT_POPUP_HEIGHT = 150;
+    /// Screen edge padding (pixels)
+    pub const SCREEN_EDGE_PADDING = 10;
+    /// Base DPI value for scaling calculations
+    pub const BASE_DPI = 96.0;
+    /// Vertical offset below caret (pixels)
+    pub const CARET_VERTICAL_OFFSET = 20;
 };
 
 /// Text handling configuration
@@ -33,3 +45,23 @@ pub const BEHAVIOR = struct {
     /// Maximum edit distance for spelling corrections
     pub const MAX_EDIT_DISTANCE = 2;
 };
+
+/// Performance configuration
+pub const PERFORMANCE = struct {
+    /// Position cache lifetime in milliseconds (how long to use cached positions)
+    pub const POSITION_CACHE_LIFETIME_MS = 200;
+    /// Whether to use caching for suggestions
+    pub const USE_SUGGESTION_CACHE = true;
+    /// Whether to use caching for positions
+    pub const USE_POSITION_CACHE = true;
+    /// Maximum number of user words to check for suggestions
+    pub const MAX_USER_WORDS_TO_CHECK = 1000;
+};
+
+/// Window class specific adjustments
+pub const WINDOW_CLASS_ADJUSTMENTS = struct {
+    /// Vertical offset adjustment for Edit controls
+    pub const EDIT_CONTROL_OFFSET = 5;
+    /// Vertical offset adjustment for RichEdit controls
+    pub const RICHEDIT_CONTROL_OFFSET = -5;
+};
diff --git a/src/text/autocomplete.zig b/src/text/autocomplete.zig
@@ -4,10 +4,62 @@ const sysinput = @import("root").sysinput;
 const dict = sysinput.text.dictionary;
 const insertion = sysinput.text.insertion;
 const config = sysinput.core.config;
+const debug = sysinput.core.debug;
 
 /// Maximum number of suggestions to generate
 const MAX_SUGGESTIONS = config.TEXT.MAX_SUGGESTIONS;
 
+/// Recent suggestion cache to avoid recalculating
+const SuggestionCache = struct {
+    /// The input word that generated these suggestions
+    input: [config.TEXT.MAX_SUGGESTION_LEN]u8,
+    /// Input length (since input might contain garbage past this)
+    input_len: usize,
+    /// The cached suggestions
+    suggestions: [MAX_SUGGESTIONS][]const u8,
+    /// Number of valid suggestions in the cache
+    count: usize,
+    /// Whether the cache is valid
+    valid: bool,
+
+    pub fn init() SuggestionCache {
+        return .{
+            .input = [_]u8{0} ** config.TEXT.MAX_SUGGESTION_LEN,
+            .input_len = 0,
+            .suggestions = [_][]const u8{""} ** MAX_SUGGESTIONS,
+            .count = 0,
+            .valid = false,
+        };
+    }
+
+    pub fn isMatch(self: *const SuggestionCache, input: []const u8) bool {
+        // If caching is disabled in config, always return false
+        if (!config.PERFORMANCE.USE_SUGGESTION_CACHE) return false;
+
+        if (!self.valid or input.len != self.input_len) return false;
+        return std.mem.eql(u8, input, self.input[0..self.input_len]);
+    }
+
+    pub fn update(self: *SuggestionCache, input: []const u8, new_suggestions: [][]const u8) void {
+        if (input.len >= self.input.len) return; // Too long for our cache
+
+        @memcpy(self.input[0..input.len], input);
+        self.input_len = input.len;
+
+        self.count = @min(new_suggestions.len, MAX_SUGGESTIONS);
+        for (0..self.count) |i| {
+            self.suggestions[i] = new_suggestions[i];
+        }
+
+        self.valid = true;
+    }
+
+    pub fn invalidate(self: *SuggestionCache) void {
+        self.valid = false;
+        self.count = 0;
+    }
+};
+
 /// Autocompletion engine structure
 pub const AutocompleteEngine = struct {
     /// Dictionary for base vocabulary
@@ -18,6 +70,8 @@ pub const AutocompleteEngine = struct {
     allocator: std.mem.Allocator,
     /// Current partial word being typed
     current_word: []const u8,
+    /// Suggestion cache for performance
+    cache: SuggestionCache,
 
     /// Initialize a new autocompletion engine
     pub fn init(allocator: std.mem.Allocator, dictionary: *dict.Dictionary) !AutocompleteEngine {
@@ -26,6 +80,7 @@ pub const AutocompleteEngine = struct {
             .user_words = std.StringHashMap(u32).init(allocator),
             .allocator = allocator,
             .current_word = "",
+            .cache = SuggestionCache.init(),
         };
     }
 
@@ -40,11 +95,11 @@ pub const AutocompleteEngine = struct {
 
     /// Add a word to the user's vocabulary
     pub fn addWord(self: *AutocompleteEngine, word: []const u8) !void {
-        // Don't add short words or empty strings
-        if (word.len < 2) return;
+        // Don't add words that are too short (using config value)
+        if (word.len < config.BEHAVIOR.MIN_TRIGGER_LEN) return;
 
         // Convert to lowercase
-        var buf: [256]u8 = undefined;
+        var buf: [config.TEXT.MAX_SUGGESTION_LEN]u8 = undefined;
         if (word.len > buf.len) return;
 
         var i: usize = 0;
@@ -64,66 +119,159 @@ pub const AutocompleteEngine = struct {
             // Add it to the user's words with a count of 1
             try self.user_words.put(owned_word, 1);
         }
+
+        // Invalidate suggestion cache whenever vocabulary changes
+        self.cache.invalidate();
     }
 
     /// Set the current partial word being typed
     pub fn setCurrentWord(self: *AutocompleteEngine, word: []const u8) void {
-        self.current_word = word;
+        // Only update if the word has changed
+        if (!std.mem.eql(u8, self.current_word, word)) {
+            self.current_word = word;
+        }
+    }
+
+    /// Check if a string starts with a prefix (case-insensitive)
+    fn startsWithInsensitive(haystack: []const u8, needle: []const u8) bool {
+        if (needle.len > haystack.len) return false;
+
+        for (needle, 0..) |n_char, i| {
+            const h_char = haystack[i];
+            if (std.ascii.toLower(h_char) != std.ascii.toLower(n_char)) {
+                return false;
+            }
+        }
+
+        return true;
     }
 
     /// Get suggestions based on the current partial word
     pub fn getSuggestions(self: *AutocompleteEngine, results: *std.ArrayList([]const u8)) !void {
-        // Don't provide suggestions for very short partial words
-        if (self.current_word.len < 1) return;
+        // Clear existing suggestions
+        for (results.items) |item| {
+            self.allocator.free(item);
+        }
+        results.clearRetainingCapacity();
+
+        // Don't provide suggestions for very short partial words (using config)
+        if (self.current_word.len < config.BEHAVIOR.MIN_TRIGGER_LEN) return;
+
+        // Check cache first
+        if (self.cache.isMatch(self.current_word)) {
+            debug.debugPrint("Using cached suggestions for '{s}'\n", .{self.current_word});
+
+            // Copy cached suggestions to results
+            for (0..self.cache.count) |i| {
+                const cached_suggestion = self.cache.suggestions[i];
+                const owned_suggestion = try self.allocator.dupe(u8, cached_suggestion);
+                try results.append(owned_suggestion);
+            }
+            return;
+        }
 
         // Convert to lowercase for matching
-        var buf: [256]u8 = undefined;
+        var buf: [config.TEXT.MAX_SUGGESTION_LEN]u8 = undefined;
         if (self.current_word.len > buf.len) return;
 
         var i: usize = 0;
         while (i < self.current_word.len) : (i += 1) {
             buf[i] = std.ascii.toLower(self.current_word[i]);
         }
+        const lower_word = buf[0..self.current_word.len];
+
+        // Pre-allocate a buffer to track highest frequency user words
+        var top_user_words = std.BoundedArray(struct { word: []const u8, freq: u32 }, MAX_SUGGESTIONS).init(0) catch unreachable;
 
-        // First try user's words
-        var user_words_added: usize = 0;
+        // First scan user's words - We'll collect top N by frequency instead of just taking the first N
         var it = self.user_words.iterator();
+        var words_checked: usize = 0;
+        const max_words_to_check = config.PERFORMANCE.MAX_USER_WORDS_TO_CHECK;
+
         while (it.next()) |entry| {
             const word = entry.key_ptr.*;
+            const freq = entry.value_ptr.*;
 
-            // If the word starts with the current partial word
-            if (std.mem.startsWith(u8, word, buf[0..self.current_word.len]) and
-                !std.mem.eql(u8, word, buf[0..self.current_word.len]))
-            {
-                // Create a copy of the word that WE OWN
-                const owned_suggestion = try self.allocator.dupe(u8, word);
-                errdefer self.allocator.free(owned_suggestion);
-
-                try results.append(owned_suggestion);
+            // Limit the number of words we check for performance
+            words_checked += 1;
+            if (words_checked > max_words_to_check) break;
 
-                user_words_added += 1;
-                if (user_words_added >= MAX_SUGGESTIONS) break;
+            // If the word starts with the current partial word (and isn't exactly the same)
+            if (startsWithInsensitive(word, lower_word) and
+                !std.mem.eql(u8, word, lower_word))
+            {
+                // Add to our bounded array, maintaining sorting by frequency
+                if (top_user_words.len < MAX_SUGGESTIONS) {
+                    // Just add it if we have room
+                    try top_user_words.append(.{ .word = word, .freq = freq });
+                    // Insertion sort to keep it sorted by frequency (highest first)
+                    var j = top_user_words.len - 1;
+                    while (j > 0 and top_user_words.get(j).freq > top_user_words.get(j - 1).freq) {
+                        const temp = top_user_words.get(j);
+                        top_user_words.set(j, top_user_words.get(j - 1));
+                        top_user_words.set(j - 1, temp);
+                        j -= 1;
+                    }
+                } else if (freq > top_user_words.get(top_user_words.len - 1).freq) {
+                    // Replace lowest frequency word if this one is higher
+                    top_user_words.set(top_user_words.len - 1, .{ .word = word, .freq = freq });
+                    // Bubble up to maintain sorting
+                    var j = top_user_words.len - 1;
+                    while (j > 0 and top_user_words.get(j).freq > top_user_words.get(j - 1).freq) {
+                        const temp = top_user_words.get(j);
+                        top_user_words.set(j, top_user_words.get(j - 1));
+                        top_user_words.set(j - 1, temp);
+                        j -= 1;
+                    }
+                }
             }
         }
 
+        // Add top user words to results
+        for (0..top_user_words.len) |idx| {
+            const word = top_user_words.get(idx).word;
+            const owned_suggestion = try self.allocator.dupe(u8, word);
+            try results.append(owned_suggestion);
+        }
+
         // If we don't have enough user words, add suggestions from dictionary
-        if (user_words_added < MAX_SUGGESTIONS) {
+        if (top_user_words.len < MAX_SUGGESTIONS) {
+            // Track how many more suggestions we need
+            const needed = MAX_SUGGESTIONS - top_user_words.len;
+
             var dict_iter = self.dictionary.word_map.keyIterator();
+            var dict_count: usize = 0;
+
             while (dict_iter.next()) |dict_word| {
                 // If the dictionary word starts with the current partial word
-                if (std.mem.startsWith(u8, dict_word.*, buf[0..self.current_word.len]) and
-                    !std.mem.eql(u8, dict_word.*, buf[0..self.current_word.len]))
+                if (startsWithInsensitive(dict_word.*, lower_word) and
+                    !std.mem.eql(u8, dict_word.*, lower_word))
                 {
-                    // Create a copy of the word that WE OWN
-                    const owned_suggestion = try self.allocator.dupe(u8, dict_word.*);
-                    errdefer self.allocator.free(owned_suggestion);
+                    // Skip if this word is already in our results (from user words)
+                    var skip = false;
+                    for (0..top_user_words.len) |j| {
+                        if (std.mem.eql(u8, dict_word.*, top_user_words.get(j).word)) {
+                            skip = true;
+                            break;
+                        }
+                    }
 
-                    try results.append(owned_suggestion);
+                    if (!skip) {
+                        // Create a copy of the word
+                        const owned_suggestion = try self.allocator.dupe(u8, dict_word.*);
+                        try results.append(owned_suggestion);
 
-                    if (results.items.len >= MAX_SUGGESTIONS) break;
+                        dict_count += 1;
+                        if (dict_count >= needed) break;
+                    }
                 }
             }
         }
+
+        // Update cache with these new suggestions
+        if (results.items.len > 0) {
+            self.cache.update(self.current_word, results.items);
+        }
     }
 
     /// Process text to extract and learn words
@@ -154,5 +302,8 @@ pub const AutocompleteEngine = struct {
     pub fn completeWord(self: *AutocompleteEngine, word: []const u8) !void {
         try self.addWord(word);
         self.current_word = "";
+
+        // Reset cache when a word is completed
+        self.cache.invalidate();
     }
 };
diff --git a/src/text/edit_distance.zig b/src/text/edit_distance.zig
@@ -28,48 +28,31 @@ pub fn compareByScore(context: void, a: Suggestion, b: Suggestion) bool {
 
 /// Calculate Levenshtein edit distance between two strings with enhancements
 pub fn enhancedEditDistance(a: []const u8, b: []const u8) usize {
-    // Handle edge cases
+    // Handle simple cases immediately
     if (a.len == 0) return b.len;
     if (b.len == 0) return a.len;
+    if (std.mem.eql(u8, a, b)) return 0; // Identical strings
 
-    // Early exit for identical strings
-    if (std.mem.eql(u8, a, b)) return 0;
-
-    // More aggressive early termination for very different lengths
+    // Early termination for strings with very different lengths
     const len_diff = if (a.len > b.len) a.len - b.len else b.len - a.len;
     if (len_diff > MAX_EDIT_DISTANCE) {
-        return len_diff; // Will exceed max distance, so return early
-    }
-
-    // Quick check: if first and last characters don't match, that's already 2 operations
-    if (a.len > 1 and b.len > 1) {
-        var different_chars: usize = 0;
-        if (a[0] != b[0]) different_chars += 1;
-        if (a[a.len - 1] != b[b.len - 1]) different_chars += 1;
-
-        // If both ends differ and strings are long, we can often skip full calculation
-        if (different_chars == 2 and a.len > 5 and b.len > 5) {
-            // If first 2 chars also differ, this is likely not a close match
-            if (a.len > 2 and b.len > 2 and a[1] != b[1]) {
-                return MAX_EDIT_DISTANCE + 1; // Return a value above our threshold
-            }
-        }
+        return len_diff; // Will exceed max distance anyway, so return early
     }
 
-    // If first two characters don't match and words are long enough,
-    // that's another signal they might be quite different
-    if (a.len > 1 and b.len > 1 and a[0] != b[0] and a[1] != b[1]) {
-        // Count additional differences in the first 4 chars
-        var initial_diff_count: usize = 0;
-        const check_len = @min(4, @min(a.len, b.len));
+    // Quick check of first few characters
+    if (a.len > 2 and b.len > 2) {
+        var diff_count: usize = 0;
+        const check_len = @min(3, @min(a.len, b.len));
 
         for (0..check_len) |i| {
-            if (a[i] != b[i]) initial_diff_count += 1;
+            if (a[i] != b[i]) {
+                diff_count += 1;
+            }
         }
 
-        // If we have 3+ differences in the first 4 chars, likely exceeds threshold
-        if (initial_diff_count > 2 and a.len > 5 and b.len > 5) {
-            return MAX_EDIT_DISTANCE + 1;
+        // If first few chars are very different, likely a poor match
+        if (diff_count >= 2) {
+            return MAX_EDIT_DISTANCE + 1; // Return value that exceeds our threshold
         }
     }
 
diff --git a/src/ui/position.zig b/src/ui/position.zig