CGFixIT · CGFixIT · Jun 20, 2026 · Jun 20, 2026
diff --git a/retrieval/embeddings.py b/retrieval/embeddings.py
@@ -42,10 +42,25 @@ def _embeddings_cfg(config_path: str) -> tuple:
         emb_cfg = yaml.safe_load(f)["models"]["embeddings"]
     return emb_cfg["model"], emb_cfg.get("cache_dir", "")
 
-def get_embedding(text: str, config_path: str = "config.yaml") -> List[float]:
+@lru_cache(maxsize=2048)
+def _cached_embedding(text: str, config_path: str) -> tuple:
+    """Memoize query embeddings keyed on (text, config_path).
+
+    Encoding a query is a full SentenceTransformer forward pass -- the most
+    expensive step on the retrieval hot path. Identical queries (common in
+    practice) previously re-ran the model every time. The cached value is an
+    immutable tuple so it can be safely shared across callers.
+    """
     model_name, cache_dir = _embeddings_cfg(config_path)
     model = _load_model(model_name, cache_dir)
-    return model.encode(text, normalize_embeddings=True).tolist()
+    return tuple(model.encode(text, normalize_embeddings=True).tolist())
+
+def get_embedding(text: str, config_path: str = "config.yaml") -> List[float]:
+    return list(_cached_embedding(text, config_path))
+
+def reset_embedding_cache() -> None:
+    """Clear the memoized query-embedding cache (e.g. after a model swap)."""
+    _cached_embedding.cache_clear()
 
 def get_embeddings_batch(texts: List[str], config_path: str = "config.yaml") -> List[List[float]]:
     model_name, cache_dir = _embeddings_cfg(config_path)

diff --git a/utils/health.py b/utils/health.py
@@ -5,16 +5,24 @@
 """
 
 import time
+from functools import lru_cache
 from typing import List
 
 import httpx
 import yaml
 
 from .errors import HealthStatus, LLMServiceError
 
-def check_all(config_path: str = "config.yaml") -> List[HealthStatus]:
+@lru_cache(maxsize=8)
+def _health_cfg(config_path: str) -> dict:
+    """Parse config once per path. check_all runs on every /health request, so
+    re-reading and re-parsing the YAML each time was avoidable disk + parse I/O.
+    """
     with open(config_path, encoding="utf-8") as f:
-        cfg = yaml.safe_load(f)
+        return yaml.safe_load(f)
+
+def check_all(config_path: str = "config.yaml") -> List[HealthStatus]:
+    cfg = _health_cfg(config_path)
     results = []
     llm_base = cfg["models"]["local_llm"]["base_url"]
     results.append(_ping(f"{llm_base}/models", "lm_studio"))

diff --git a/utils/logger.py b/utils/logger.py
@@ -11,8 +11,9 @@
 import logging
 import re
 from datetime import datetime, timezone
+from functools import lru_cache
 from pathlib import Path
-from typing import Optional
+from typing import Optional, Tuple
 
 import yaml
 
@@ -65,19 +66,41 @@ def reset_config_cache() -> None:
 def hash_query(query: str) -> str:
     return hashlib.sha256(query.encode("utf-8")).hexdigest()
 
+@lru_cache(maxsize=8)
+def _compiled_redactors(
+    redact_emails: bool, redact_ips: bool, secret_patterns: Tuple[str, ...]
+) -> Tuple[Tuple[re.Pattern, str], ...]:
+    """Compile the active redaction patterns once per privacy configuration.
+
+    redact_sensitive runs on every audited field of every query; recompiling
+    these regexes each call was pure overhead. Keyed on the (hashable) privacy
+    settings so a config change still produces a fresh pattern set.
+    """
+    compiled = []
+    if redact_emails:
+        compiled.append((re.compile(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'),
+                         '[REDACTED_EMAIL]'))
+    if redact_ips:
+        compiled.append((re.compile(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b'),
+                         '[REDACTED_IP]'))
+    for pattern in secret_patterns:
+        try:
+            compiled.append((re.compile(pattern), '[REDACTED_SECRET]'))
+        except re.error:
+            pass
+    return tuple(compiled)
+
 def redact_sensitive(text: str, cfg: Optional[dict] = None) -> str:
     if cfg is None:
         cfg = _get_config()
     privacy = cfg.get("policy", {}).get("privacy", {})
-    if privacy.get("redact_emails", False):
-        text = re.sub(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', '[REDACTED_EMAIL]', text)
-    if privacy.get("redact_ips", False):
-        text = re.sub(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', '[REDACTED_IP]', text)
-    for pattern in privacy.get("redact_secrets_like", []):
-        try:
-            text = re.sub(pattern, '[REDACTED_SECRET]', text)
-        except re.error:
-            pass
+    redactors = _compiled_redactors(
+        privacy.get("redact_emails", False),
+        privacy.get("redact_ips", False),
+        tuple(privacy.get("redact_secrets_like", []) or []),
+    )
+    for pattern, replacement in redactors:
+        text = pattern.sub(replacement, text)
     return text
 
 def audit_log(event: dict, config_path: str = "config.yaml") -> None:

diff --git a/utils/personality.py b/utils/personality.py
@@ -44,12 +44,6 @@
     "INSERT INTO soul_versions (sha256, content, reason, timestamp) VALUES (?, ?, ?, ?)"  # DevSkim: ignore DS197836
 )
 
-# SQL stores the content's SHA-256 digest alongside a UTC timestamp as metadata —
-# the hash is of *file content*, not of the time value.
-_SQL_INSERT_SOUL_VERSION = (
-    "INSERT INTO soul_versions (sha256, content, reason, timestamp) VALUES (?, ?, ?, ?)"  # DevSkim: ignore DS197836
-)
-
 
 class PersonalityManager:
     def __init__(self, cfg: dict):