diff --git a/retrieval/embeddings.py b/retrieval/embeddings.py index b2ac45d..6000f7f 100644 --- a/retrieval/embeddings.py +++ b/retrieval/embeddings.py @@ -42,10 +42,25 @@ def _embeddings_cfg(config_path: str) -> tuple: emb_cfg = yaml.safe_load(f)["models"]["embeddings"] return emb_cfg["model"], emb_cfg.get("cache_dir", "") -def get_embedding(text: str, config_path: str = "config.yaml") -> List[float]: +@lru_cache(maxsize=2048) +def _cached_embedding(text: str, config_path: str) -> tuple: + """Memoize query embeddings keyed on (text, config_path). + + Encoding a query is a full SentenceTransformer forward pass -- the most + expensive step on the retrieval hot path. Identical queries (common in + practice) previously re-ran the model every time. The cached value is an + immutable tuple so it can be safely shared across callers. + """ model_name, cache_dir = _embeddings_cfg(config_path) model = _load_model(model_name, cache_dir) - return model.encode(text, normalize_embeddings=True).tolist() + return tuple(model.encode(text, normalize_embeddings=True).tolist()) + +def get_embedding(text: str, config_path: str = "config.yaml") -> List[float]: + return list(_cached_embedding(text, config_path)) + +def reset_embedding_cache() -> None: + """Clear the memoized query-embedding cache (e.g. after a model swap).""" + _cached_embedding.cache_clear() def get_embeddings_batch(texts: List[str], config_path: str = "config.yaml") -> List[List[float]]: model_name, cache_dir = _embeddings_cfg(config_path) diff --git a/utils/health.py b/utils/health.py index 63da357..8d0c970 100644 --- a/utils/health.py +++ b/utils/health.py @@ -5,6 +5,7 @@ """ import time +from functools import lru_cache from typing import List import httpx @@ -12,9 +13,16 @@ from .errors import HealthStatus, LLMServiceError -def check_all(config_path: str = "config.yaml") -> List[HealthStatus]: +@lru_cache(maxsize=8) +def _health_cfg(config_path: str) -> dict: + """Parse config once per path. check_all runs on every /health request, so + re-reading and re-parsing the YAML each time was avoidable disk + parse I/O. + """ with open(config_path, encoding="utf-8") as f: - cfg = yaml.safe_load(f) + return yaml.safe_load(f) + +def check_all(config_path: str = "config.yaml") -> List[HealthStatus]: + cfg = _health_cfg(config_path) results = [] llm_base = cfg["models"]["local_llm"]["base_url"] results.append(_ping(f"{llm_base}/models", "lm_studio")) diff --git a/utils/logger.py b/utils/logger.py index 558ada2..40083fa 100644 --- a/utils/logger.py +++ b/utils/logger.py @@ -11,8 +11,9 @@ import logging import re from datetime import datetime, timezone +from functools import lru_cache from pathlib import Path -from typing import Optional +from typing import Optional, Tuple import yaml @@ -65,19 +66,41 @@ def reset_config_cache() -> None: def hash_query(query: str) -> str: return hashlib.sha256(query.encode("utf-8")).hexdigest() +@lru_cache(maxsize=8) +def _compiled_redactors( + redact_emails: bool, redact_ips: bool, secret_patterns: Tuple[str, ...] +) -> Tuple[Tuple[re.Pattern, str], ...]: + """Compile the active redaction patterns once per privacy configuration. + + redact_sensitive runs on every audited field of every query; recompiling + these regexes each call was pure overhead. Keyed on the (hashable) privacy + settings so a config change still produces a fresh pattern set. + """ + compiled = [] + if redact_emails: + compiled.append((re.compile(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'), + '[REDACTED_EMAIL]')) + if redact_ips: + compiled.append((re.compile(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b'), + '[REDACTED_IP]')) + for pattern in secret_patterns: + try: + compiled.append((re.compile(pattern), '[REDACTED_SECRET]')) + except re.error: + pass + return tuple(compiled) + def redact_sensitive(text: str, cfg: Optional[dict] = None) -> str: if cfg is None: cfg = _get_config() privacy = cfg.get("policy", {}).get("privacy", {}) - if privacy.get("redact_emails", False): - text = re.sub(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', '[REDACTED_EMAIL]', text) - if privacy.get("redact_ips", False): - text = re.sub(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', '[REDACTED_IP]', text) - for pattern in privacy.get("redact_secrets_like", []): - try: - text = re.sub(pattern, '[REDACTED_SECRET]', text) - except re.error: - pass + redactors = _compiled_redactors( + privacy.get("redact_emails", False), + privacy.get("redact_ips", False), + tuple(privacy.get("redact_secrets_like", []) or []), + ) + for pattern, replacement in redactors: + text = pattern.sub(replacement, text) return text def audit_log(event: dict, config_path: str = "config.yaml") -> None: diff --git a/utils/personality.py b/utils/personality.py index 23e27bc..7cd2eed 100644 --- a/utils/personality.py +++ b/utils/personality.py @@ -44,12 +44,6 @@ "INSERT INTO soul_versions (sha256, content, reason, timestamp) VALUES (?, ?, ?, ?)" # DevSkim: ignore DS197836 ) -# SQL stores the content's SHA-256 digest alongside a UTC timestamp as metadata — -# the hash is of *file content*, not of the time value. -_SQL_INSERT_SOUL_VERSION = ( - "INSERT INTO soul_versions (sha256, content, reason, timestamp) VALUES (?, ?, ?, ?)" # DevSkim: ignore DS197836 -) - class PersonalityManager: def __init__(self, cfg: dict):