Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions retrieval/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,25 @@ def _embeddings_cfg(config_path: str) -> tuple:
emb_cfg = yaml.safe_load(f)["models"]["embeddings"]
return emb_cfg["model"], emb_cfg.get("cache_dir", "")

def get_embedding(text: str, config_path: str = "config.yaml") -> List[float]:
@lru_cache(maxsize=2048)
def _cached_embedding(text: str, config_path: str) -> tuple:
"""Memoize query embeddings keyed on (text, config_path).

Encoding a query is a full SentenceTransformer forward pass -- the most
expensive step on the retrieval hot path. Identical queries (common in
practice) previously re-ran the model every time. The cached value is an
immutable tuple so it can be safely shared across callers.
"""
model_name, cache_dir = _embeddings_cfg(config_path)
model = _load_model(model_name, cache_dir)
return model.encode(text, normalize_embeddings=True).tolist()
return tuple(model.encode(text, normalize_embeddings=True).tolist())

def get_embedding(text: str, config_path: str = "config.yaml") -> List[float]:
return list(_cached_embedding(text, config_path))

def reset_embedding_cache() -> None:
"""Clear the memoized query-embedding cache (e.g. after a model swap)."""
_cached_embedding.cache_clear()

def get_embeddings_batch(texts: List[str], config_path: str = "config.yaml") -> List[List[float]]:
model_name, cache_dir = _embeddings_cfg(config_path)
Expand Down
12 changes: 10 additions & 2 deletions utils/health.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,24 @@
"""

import time
from functools import lru_cache
from typing import List

import httpx
import yaml

from .errors import HealthStatus, LLMServiceError

def check_all(config_path: str = "config.yaml") -> List[HealthStatus]:
@lru_cache(maxsize=8)
def _health_cfg(config_path: str) -> dict:
"""Parse config once per path. check_all runs on every /health request, so
re-reading and re-parsing the YAML each time was avoidable disk + parse I/O.
"""
with open(config_path, encoding="utf-8") as f:
cfg = yaml.safe_load(f)
return yaml.safe_load(f)

def check_all(config_path: str = "config.yaml") -> List[HealthStatus]:
cfg = _health_cfg(config_path)
results = []
llm_base = cfg["models"]["local_llm"]["base_url"]
results.append(_ping(f"{llm_base}/models", "lm_studio"))
Expand Down
43 changes: 33 additions & 10 deletions utils/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
import logging
import re
from datetime import datetime, timezone
from functools import lru_cache
from pathlib import Path
from typing import Optional
from typing import Optional, Tuple

import yaml

Expand Down Expand Up @@ -65,19 +66,41 @@ def reset_config_cache() -> None:
def hash_query(query: str) -> str:
return hashlib.sha256(query.encode("utf-8")).hexdigest()

@lru_cache(maxsize=8)
def _compiled_redactors(
redact_emails: bool, redact_ips: bool, secret_patterns: Tuple[str, ...]
) -> Tuple[Tuple[re.Pattern, str], ...]:
"""Compile the active redaction patterns once per privacy configuration.

redact_sensitive runs on every audited field of every query; recompiling
these regexes each call was pure overhead. Keyed on the (hashable) privacy
settings so a config change still produces a fresh pattern set.
"""
compiled = []
if redact_emails:
compiled.append((re.compile(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'),
'[REDACTED_EMAIL]'))
if redact_ips:
compiled.append((re.compile(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b'),
'[REDACTED_IP]'))
for pattern in secret_patterns:
try:
compiled.append((re.compile(pattern), '[REDACTED_SECRET]'))
except re.error:
pass
return tuple(compiled)

def redact_sensitive(text: str, cfg: Optional[dict] = None) -> str:
if cfg is None:
cfg = _get_config()
privacy = cfg.get("policy", {}).get("privacy", {})
if privacy.get("redact_emails", False):
text = re.sub(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', '[REDACTED_EMAIL]', text)
if privacy.get("redact_ips", False):
text = re.sub(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', '[REDACTED_IP]', text)
for pattern in privacy.get("redact_secrets_like", []):
try:
text = re.sub(pattern, '[REDACTED_SECRET]', text)
except re.error:
pass
redactors = _compiled_redactors(
privacy.get("redact_emails", False),
privacy.get("redact_ips", False),
tuple(privacy.get("redact_secrets_like", []) or []),
)
for pattern, replacement in redactors:
text = pattern.sub(replacement, text)
return text

def audit_log(event: dict, config_path: str = "config.yaml") -> None:
Expand Down
6 changes: 0 additions & 6 deletions utils/personality.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,6 @@
"INSERT INTO soul_versions (sha256, content, reason, timestamp) VALUES (?, ?, ?, ?)" # DevSkim: ignore DS197836
)

# SQL stores the content's SHA-256 digest alongside a UTC timestamp as metadata —
# the hash is of *file content*, not of the time value.
_SQL_INSERT_SOUL_VERSION = (
"INSERT INTO soul_versions (sha256, content, reason, timestamp) VALUES (?, ?, ?, ?)" # DevSkim: ignore DS197836
)


class PersonalityManager:
def __init__(self, cfg: dict):
Expand Down
Loading