diff --git a/.cursor/scratchpad.md b/.cursor/scratchpad.md index 6a2843e..33b4846 100644 --- a/.cursor/scratchpad.md +++ b/.cursor/scratchpad.md @@ -1,5 +1,5 @@ # Scratchpad -Index status: `count_documents` на всех vector backends + `index_coverage.get_index_coverage`, админка `/admin/graph-search/index-status/`, команда `search_index_status` с таблицей покрытия. Searcher: если у `graph.invoke()` нет ключа `final_results`, вызывается `postprocess_results_node` (LangGraph + dict state). +Release 0.3.3: setup.cfg, CHANGELOG, README, RELEASE_NOTES_0.3.3.md, dist/ built. -DONE — полный pytest 87 passed, 1 skipped; ruff на изменённых файлах — ok. +DONE — pytest 117 passed, 1 skipped; python -m build OK. diff --git a/.pylintrc b/.pylintrc index 1361b45..702a583 100644 --- a/.pylintrc +++ b/.pylintrc @@ -16,7 +16,6 @@ disable= invalid-name, too-few-public-methods, too-many-arguments, - too-many-positional-arguments, too-many-instance-attributes, too-many-locals, broad-exception-caught, diff --git a/CHANGELOG.md b/CHANGELOG.md index c3aceb8..3e0e222 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,44 @@ project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +## [0.3.3] — 2026-05-19 + +Stable **0.3** release (replaces pre-releases `0.3.0a1` and `0.3.1a1`). + +```bash +pip install django-graph-search==0.3.3 +``` + +### Added +- **REST search:** each hit includes `score` (0.0–1.0) and `text`; optional `min_score` filters weak matches; response may include `min_score_applied`. +- **Model weights:** `weight_fields` is always parsed (including with `fields: "__all__"`); weight `0.0` excludes a field from indexed text. +- **Async indexing:** `ASYNC_INDEXING` (Celery / daemon `thread` / django-q) plus `django_graph_search.tasks` so `AUTO_INDEX` signals can avoid blocking requests. +- **Non-blocking auto-index (default):** with local SentenceTransformer embeddings, `AUTO_INDEX_NON_BLOCKING` runs indexing in a daemon thread without enabling `ASYNC_INDEXING`. +- **Skip noisy saves:** global `AUTO_INDEX_SKIP_UPDATE_FIELDS` (default `last_login`) and per-model `skip_update_fields` skip re-index when only those fields change (`update_fields` or full save with no other diffs). +- **Pgvector backend:** `django_graph_search.backends.PgvectorBackend` (extra `[pgvector]`). +- **Cloud embeddings:** `OpenAIEmbeddingBackend` and `CohereEmbeddingBackend` (extras `[openai]`, `[cohere]`). +- **Admin index coverage:** `/admin/graph-search/index-status/` shows DB row counts vs vector-store document counts per model, overall percentage, and static progress bars. Sidebar entries **Поиск** and **Статус индексации** via unmanaged models `GraphSearch` / `GraphSearchIndexStatus`. +- **`count_documents(filters)`** on ChromaDB, FAISS, Qdrant, and pgvector backends; used by coverage UI and `search_index_status` management command. +- **Admin search:** optional `min_score` query parameter on the Graph Search admin page (same semantics as REST). +- **Component registry:** vector store, embedding backend, and `GraphResolver` are cached per worker configuration (shared by `Searcher`, `Indexer`, signals). + +### Changed +- **Vector scores:** ChromaDB / FAISS / Qdrant normalize distances to similarity scores in 0–1; ChromaDB reads the collection’s effective HNSW `space` and maps L2 / cosine / inner-product distances accordingly. +- **Factory / signals:** indexing and search reuse `get_shared_components()` from `component_registry`. + +### Security +- **REST API access control:** optional `GRAPH_SEARCH["API"]` (`PERMISSION_CLASSES`, `THROTTLE_CLASSES`, `THROTTLE_RATES`, `REQUIRE_AUTHENTICATION`) via `django_graph_search.permissions`. +- **Safe integer parsing for `limit`:** invalid or negative values return HTTP 400; values above 1000 are clamped with a log warning. + +### Fixed +- **LangGraph + `graph.invoke()`:** when the compiled graph omits `final_results`, `Searcher` runs `postprocess_results_node` so results are not empty. +- **ChromaDB:** cosine collections use `hnsw:space=cosine`; query distances mapped to similarity per metric. +- **File delta cache TTL:** `FileDeltaCache` enforces expiry on read; `purge_expired(dry_run=)` and `purge_search_cache` management command. +- **Conversational memory registry:** per-process backends with a lock; `RuntimeWarning` when `inmemory` + conversational enabled + `DEBUG` is false. + +### Tests +- **117** tests passing (+59 vs 0.2.0): admin sidebar, Chroma score mapping, component registry, non-blocking signals, `skip_update_fields`. + ## [0.3.1a1] — 2026-05-19 **Pre-release** of the **0.3.1** line. Install for smoke tests: @@ -202,6 +240,7 @@ and signal handlers behave exactly as before. - REST endpoints `/api/search/` and `/api/search/similar///`. - `build_search_index` management command. +[0.3.3]: https://github.com/svalench/django_graph_search/releases/tag/v0.3.3 [0.3.1a1]: https://github.com/svalench/django_graph_search/releases/tag/v0.3.1a1 [0.3.0a1]: https://github.com/svalench/django_graph_search/releases/tag/v0.3.0a1 [0.2.0]: https://github.com/svalench/django_graph_search/releases/tag/v0.2.0 diff --git a/README.md b/README.md index 1c7ce70..690e93d 100644 --- a/README.md +++ b/README.md @@ -45,12 +45,12 @@ pip install django-graph-search[cohere] pip install django-graph-search[all] ``` -## What's new in 0.3 (pre-release **0.3.1a1**) +## What's new in **0.3.3** -This line is a **pre-release** for smoke-testing packaging and integrations. Install with: +Stable **0.3** line. Install with: ```bash -pip install --pre django-graph-search==0.3.1a1 +pip install django-graph-search==0.3.3 ``` Highlights vs **0.2.0** (full detail in [CHANGELOG.md](CHANGELOG.md)): @@ -59,12 +59,13 @@ Highlights vs **0.2.0** (full detail in [CHANGELOG.md](CHANGELOG.md)): |------|--------| | **REST hits** | Each result includes `score` (0.0–1.0) and `text`. Optional `min_score` query parameter filters weak matches; responses may include `min_score_applied`. | | **Indexing** | `weight_fields` is always honored, including with `fields: "__all__"`; weight `0.0` drops a field from indexed text. | -| **Async signals** | `ASYNC_INDEXING` (Celery, `thread`, or django-q) plus `django_graph_search.tasks` so `AUTO_INDEX` can avoid blocking the request thread. | -| **Backends / embeddings** | **Pgvector** backend (`[pgvector]`). **OpenAI** / **Cohere** embedding backends (`[openai]`, `[cohere]`). | -| **Scores** | ChromaDB / FAISS / Qdrant normalize distances to similarity scores in 0–1 for consistent API output. | -| **Security / API** | Optional `GRAPH_SEARCH["API"]`: `PERMISSION_CLASSES`, `THROTTLE_CLASSES`, `THROTTLE_RATES`, `REQUIRE_AUTHENTICATION` via `django_graph_search.permissions` (defaults keep behaviour open). | -| **Validation** | Invalid or negative `limit` on search, streaming, conversational, and similar endpoints returns **400** (not 500); values above 1000 are clamped with a log warning. | -| **Fixes** | ChromaDB cosine metadata and distance mapping; file delta cache TTL and `purge_search_cache`; conversational in-memory registry + `RuntimeWarning` when `DEBUG` is false. | +| **Async / non-blocking signals** | `ASYNC_INDEXING` (Celery, `thread`, django-q) or default `AUTO_INDEX_NON_BLOCKING` (daemon thread for local SentenceTransformer). `AUTO_INDEX_SKIP_UPDATE_FIELDS` / per-model `skip_update_fields` skip noisy saves (`last_login`, etc.). | +| **Admin** | Sidebar **Поиск** and **Статус индексации**; index coverage page; `min_score` on admin search. | +| **Backends / embeddings** | **Pgvector** (`[pgvector]`). **OpenAI** / **Cohere** (`[openai]`, `[cohere]`). Shared component registry per worker. | +| **Scores** | ChromaDB / FAISS / Qdrant normalize distances to 0–1; Chroma respects collection metric (L2 / cosine / IP). | +| **Security / API** | Optional `GRAPH_SEARCH["API"]`: permissions, throttling, `REQUIRE_AUTHENTICATION` (defaults stay open). | +| **Validation** | Invalid `limit` → **400**; values above 1000 clamped with a log warning. | +| **Fixes** | LangGraph empty `final_results`; Chroma metadata; delta cache TTL; conversational memory registry warning. | ## Quick Start (5 minutes) @@ -128,6 +129,8 @@ GRAPH_SEARCH = { To restrict access to the main search, streaming, and conversational HTTP endpoints, add an `"API"` block as described [below](#securing-the-rest-api-optional). +**Search relevance (semantic noise).** Vector search scores the full string built for indexing (all configured fields plus related rows when `follow_relations` is true). If results feel noisy or scores look flat, narrow `fields` to the attributes users actually query (e.g. `username`, `email`), set `follow_relations` / `relation_depth` lower, then rebuild the index. Admin Graph Search shows a **text preview** of indexed text per hit and supports optional **`min_score`** (same semantics as the REST API). + ### 3. Add URLs ```python @@ -253,6 +256,45 @@ When ``AUTO_INDEX`` is on, saves can block on large graphs. Enable ``ASYNC_INDEX With ``thread``, indexing runs in a daemon thread (no retries). With ``celery``, install Celery and register tasks; if Celery is missing, the task module falls back to synchronous execution with a warning. +### Production: zero impact on unrelated requests + +``AUTO_INDEX`` hooks **every** ``post_save`` for models listed in ``MODELS``. A login that updates +``auth.User.last_login``, or any frequent save on an indexed model, can load a local +**sentence-transformers** model and block the request thread for seconds. + +Recommended for production web workers: + +| Setting | Recommendation | +|---------|----------------| +| ``AUTO_INDEX`` | ``False`` if you rebuild with ``build_search_index`` or a Celery beat job | +| ``ASYNC_INDEXING`` | ``ENABLED: True`` with ``thread`` or ``celery`` when ``AUTO_INDEX`` stays on | +| ``MODELS`` | Do **not** index ``auth.User`` (or similar) unless you need user search; login saves are noisy | +| ``EMBEDDINGS`` | Prefer ``OpenAIEmbeddingBackend`` / ``CohereEmbeddingBackend`` in Gunicorn workers to avoid PyTorch in-process | +| ``AUTO_INDEX_SKIP_UPDATE_FIELDS`` | Default ``["last_login"]`` — skips indexing when ``save(update_fields=...)`` touches only those fields | +| ``AUTO_INDEX_NON_BLOCKING`` | Default ``True`` — with local **SentenceTransformer**, signal indexing runs in a **daemon thread** so login/API are not blocked (model may still load in background) | + +Example minimal fix for login latency: + +```python +GRAPH_SEARCH = { + "AUTO_INDEX": False, + # or keep AUTO_INDEX and offload: + # "ASYNC_INDEXING": {"ENABLED": True, "BACKEND": "thread"}, + # "AUTO_INDEX_SKIP_UPDATE_FIELDS": ["last_login"], + "MODELS": [ + # avoid auth.User unless required + {"model": "shop.Product", "fields": ["name", "description"]}, + ], +} +``` + +Heavy components (vector store client, embedding backend, graph resolver) are **cached once per +worker process** after the first search or index operation. Restart workers after changing +``GRAPH_SEARCH`` backends or embedding models. + +For local **sentence-transformers**, run indexing in a dedicated Celery worker if web workers +must stay lean. + ### Securing the REST API (optional) **Scope:** Settings under `GRAPH_SEARCH["API"]` apply only to **`GET /api/search/`**, @@ -300,7 +342,16 @@ python manage.py purge_search_cache --dry-run # Count expired entries wit ## Admin UI -After installation, navigate to `/admin/graph-search/` for a semantic search interface directly in Django Admin — useful for content managers and debugging. +With `django.contrib.admin` installed, the app adds a **Django Graph Search** section on the admin index (`/admin/`) with **Поиск** and **Статус индексации** entries. The legacy URL `/admin/graph-search/` still works for bookmarks and docs. + +Disable the admin section and custom URLs with: + +```python +GRAPH_SEARCH = { + # ... + "ADMIN_SEARCH_ENABLED": False, +} +``` ## Supported Backends diff --git a/RELEASE_NOTES_0.3.3.md b/RELEASE_NOTES_0.3.3.md new file mode 100644 index 0000000..486de2e --- /dev/null +++ b/RELEASE_NOTES_0.3.3.md @@ -0,0 +1,66 @@ +# django-graph-search 0.3.3 + +**Release date:** 2026-05-19 +**Type:** Stable (0.3 line — replaces pre-releases `0.3.0a1`, `0.3.1a1`) + +```bash +pip install django-graph-search==0.3.3 +# optional extras unchanged, e.g.: +pip install django-graph-search[pgvector,openai,all] +``` + +## Summary + +First **stable** 0.3 release: REST scores and `min_score`, smarter indexing weights, async/non-blocking auto-index, pgvector + cloud embeddings, hardened REST API settings, admin index coverage with sidebar navigation, and ChromaDB score fixes aligned with collection metrics. + +Upgrading from **0.2.x** is backward-compatible — new settings default to safe/off or sensible production defaults (`AUTO_INDEX_NON_BLOCKING=True` only affects local SentenceTransformer profiles). + +## Highlights + +### Search & API +- Result objects include **`score`** (0.0–1.0) and indexed **`text`** +- Query param **`?min_score=`** on REST and admin search +- Optional **`GRAPH_SEARCH["API"]`**: DRF-style permission/throttle hooks, `REQUIRE_AUTHENTICATION` +- Invalid **`limit`** → HTTP 400; values above 1000 clamped with warning + +### Indexing & signals +- **`weight_fields`** always applied (`fields: "__all__"` supported; `0.0` = exclude field) +- **`ASYNC_INDEXING`**: Celery, `thread`, or django-q via `django_graph_search.tasks` +- **`AUTO_INDEX_NON_BLOCKING`** (default **on**): daemon-thread indexing for local ST without Celery +- **`AUTO_INDEX_SKIP_UPDATE_FIELDS`** / per-model **`skip_update_fields`**: skip re-index on `last_login`-only updates +- **`component_registry`**: one vector store + embedder + resolver per worker config + +### Backends & embeddings +- **Pgvector** (`pip install django-graph-search[pgvector]`) +- **OpenAI** / **Cohere** embedding backends +- Normalized **0–1 similarity** across ChromaDB, FAISS, Qdrant; Chroma reads effective HNSW metric + +### Admin +- Sidebar: **Поиск**, **Статус индексации** +- **`/admin/graph-search/index-status/`** — DB vs vector store coverage (static snapshot) +- Legacy URLs **`/admin/graph-search/`** preserved + +### Fixes +- LangGraph: empty results when `final_results` missing from invoke output +- Chroma cosine / L2 / IP distance mapping +- File delta cache TTL + `purge_search_cache` command +- Conversational in-memory backend warning in production multi-worker setups + +## Upgrade notes + +| From | Action | +|------|--------| +| `0.2.x` | `pip install -U django-graph-search==0.3.3` — no mandatory settings changes | +| `0.3.0a1` / `0.3.1a1` | Drop `--pre`; pin `==0.3.3`. Behaviour matches pre-releases plus admin sidebar, skip-fields, non-blocking default, component registry | + +Re-indexing is **not** required unless you change embedding model or smart-indexing templates. + +## Tests + +117 passed, 1 skipped (pytest suite in CI). + +## Links + +- [CHANGELOG.md](CHANGELOG.md) — full categorized list +- [PyPI](https://pypi.org/project/django-graph-search/) +- [Documentation](https://github.com/svalench/django_graph_search#readme) diff --git a/setup.cfg b/setup.cfg index 625047d..7c2a6dd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = django-graph-search -version = 0.3.1a1 +version = 0.3.3 description = Vector search for Django models with graph relations, optional LangGraph pipeline, conversational search, smart indexing and streaming. long_description = file: README.md long_description_content_type = text/markdown diff --git a/src/django_graph_search/admin.py b/src/django_graph_search/admin.py index 1994afb..6655503 100644 --- a/src/django_graph_search/admin.py +++ b/src/django_graph_search/admin.py @@ -5,43 +5,92 @@ from django.urls import path from .index_coverage import get_index_coverage +from .models import GraphSearch, GraphSearchIndexStatus from .searcher import Searcher from .settings import get_settings +from .views import _parse_float_param +_admin_site_configured: set[int] = set() -def graph_search_view(request): + +def graph_search_view(request, admin_site=None): + site = admin_site if admin_site is not None else admin.site config = get_settings() query = request.GET.get("q", "").strip() models = request.GET.get("models") model_list = [m.strip() for m in models.split(",")] if models else None + min_score, min_score_err = _parse_float_param( + request.GET.get("min_score"), + "min_score", + default=None, + min_value=0.0, + max_value=1.0, + ) + min_score_error = None + if min_score_err is not None: + min_score = None + min_score_error = "Параметр min_score: число от 0.0 до 1.0." + results = [] - if query: + if query and min_score_error is None: searcher = Searcher(config=config) results = searcher.search(query, models=model_list, limit=config.default_results_limit) + if min_score is not None: + results = [r for r in results if float(r.get("score") or 0) >= min_score] context = dict( - admin.site.each_context(request), + site.each_context(request), title="Graph Search", query=query, results=results, model_list=models or "", available_models=[cfg.model for cfg in config.models], + min_score=request.GET.get("min_score", "").strip(), + min_score_applied=min_score, + min_score_error=min_score_error, ) return TemplateResponse(request, "django_graph_search/admin/search.html", context) -def graph_search_index_status_view(request): +def graph_search_index_status_view(request, admin_site=None): """Статичный снимок покрытия индекса (без автообновления).""" + site = admin_site if admin_site is not None else admin.site report = get_index_coverage() context = dict( - admin.site.each_context(request), + site.each_context(request), title="Статус индексации", report=report, ) return TemplateResponse(request, "django_graph_search/admin/index_status.html", context) +class _GraphSearchMenuAdmin(admin.ModelAdmin): + """Базовый ModelAdmin для пунктов меню без CRUD.""" + + def has_add_permission(self, request): + return False + + def has_change_permission(self, request, obj=None): + return False + + def has_delete_permission(self, request, obj=None): + return False + + +class GraphSearchAdmin(_GraphSearchMenuAdmin): + def changelist_view(self, request, extra_context=None): + return graph_search_view(request, admin_site=self.admin_site) + + +class GraphSearchIndexStatusAdmin(_GraphSearchMenuAdmin): + def changelist_view(self, request, extra_context=None): + return graph_search_index_status_view(request, admin_site=self.admin_site) + + def _inject_admin_urls(admin_site): + if getattr(admin_site, "_graph_search_urls_injected", False): + return + original_get_urls = admin_site.get_urls def get_urls(): @@ -61,7 +110,27 @@ def get_urls(): return custom + urls admin_site.get_urls = get_urls + admin_site._graph_search_urls_injected = True + + +def _register_menu_models(admin_site): + if not admin_site.is_registered(GraphSearch): + admin_site.register(GraphSearch, GraphSearchAdmin) + if not admin_site.is_registered(GraphSearchIndexStatus): + admin_site.register(GraphSearchIndexStatus, GraphSearchIndexStatusAdmin) + +def setup_admin_site(admin_site=None): + """Регистрация раздела админки и legacy-URL (идемпотентно).""" + site = admin_site if admin_site is not None else admin.site + site_id = id(site) + if site_id in _admin_site_configured: + return -_inject_admin_urls(admin.site) + if not get_settings().admin_search_enabled: + _admin_site_configured.add(site_id) + return + _register_menu_models(site) + _inject_admin_urls(site) + _admin_site_configured.add(site_id) diff --git a/src/django_graph_search/apps.py b/src/django_graph_search/apps.py index 6edef22..447d7e2 100644 --- a/src/django_graph_search/apps.py +++ b/src/django_graph_search/apps.py @@ -12,4 +12,8 @@ def ready(self) -> None: from . import signals # noqa: WPS433,F401 get_settings() + if get_settings().admin_search_enabled: + from . import admin # noqa: WPS433 + + admin.setup_admin_site() diff --git a/src/django_graph_search/backends/chromadb.py b/src/django_graph_search/backends/chromadb.py index 9240cbc..b7633ce 100644 --- a/src/django_graph_search/backends/chromadb.py +++ b/src/django_graph_search/backends/chromadb.py @@ -1,10 +1,85 @@ from __future__ import annotations -from typing import Any, Dict, Iterable, List, Optional +import logging +from typing import Any, Dict, Iterable, List, Literal, Optional, cast from ..exceptions import BackendError from .base import BaseVectorStore, Document, SearchResult +log = logging.getLogger(__name__) + + +_ChromaHnswSpace = Literal["cosine", "l2", "ip"] + + +def _requested_chroma_space(distance_metric: str) -> _ChromaHnswSpace: + """Соответствие опции бэкенда ключу ``space`` в конфигурации HNSW Chroma.""" + m = (distance_metric or "cosine").lower() + if m in ("l2", "euclidean"): + return "l2" + if m in ("ip", "inner_product"): + return "ip" + return "cosine" + + +def _space_from_hnsw_block(hnsw: Any) -> Optional[str]: + """Извлечь ключ space из dict или из объекта конфигурации HNSW (разные версии Chroma).""" + if hnsw is None: + return None + if isinstance(hnsw, dict): + raw = hnsw.get("space") + else: + raw = getattr(hnsw, "space", None) + if raw is None: + return None + space = str(raw).strip().lower() + if space in ("cosine", "l2", "ip"): + return space + return None + + +def _effective_space_from_collection(collection: Any, fallback: str) -> str: + """Фактическая метрика индекса (после get_or_create она может отличаться от запрошенной).""" + cfg = getattr(collection, "configuration", None) + if cfg is not None: + if isinstance(cfg, dict): + hnsw = cfg.get("hnsw") or {} + space = _space_from_hnsw_block(hnsw) + if space: + return space + top = cfg.get("space") + if isinstance(top, str) and top.strip().lower() in ("cosine", "l2", "ip"): + return top.strip().lower() + else: + hnsw = getattr(cfg, "hnsw", None) + space = _space_from_hnsw_block(hnsw) + if space: + return space + + meta = getattr(collection, "metadata", None) or {} + legacy = (meta.get("hnsw:space") or meta.get("hnsw_space") or "").strip().lower() + if legacy in ("cosine", "l2", "ip"): + return legacy + + resolved = _requested_chroma_space(fallback) + log.info( + "ChromaDB: не удалось определить HNSW space коллекции; " + "distance→score по fallback из настроек: %s", + resolved, + ) + return resolved + + +def chroma_distance_to_similarity(effective_space: str, distance: Any) -> float: + """Преобразование raw distance из Chroma в score [0, 1] (юнит-тесты без клиента).""" + if distance is None: + return 0.0 + d = float(distance) + space = (effective_space or "cosine").strip().lower() + if space in ("cosine", "ip"): + return max(0.0, min(1.0, 1.0 - d)) + return max(0.0, min(1.0, 1.0 / (1.0 + d))) + class ChromaDBBackend(BaseVectorStore): def __init__( @@ -20,18 +95,65 @@ def __init__( raise BackendError("chromadb is not installed.") from exc self.distance_metric = (distance_metric or "cosine").lower() + self._requested_space = _requested_chroma_space(self.distance_metric) + if persist_directory: client = chromadb.PersistentClient(path=persist_directory, **options) else: client = chromadb.Client(**options) - collection_metadata = None - if self.distance_metric == "cosine": - collection_metadata = {"hnsw:space": "cosine"} + self.collection = self._open_collection( + client, + collection_name=collection_name, + requested_space=self._requested_space, + ) + # Реальная метрика коллекции (уже существующая L2 не станет cosine). + self._effective_space = _effective_space_from_collection( + self.collection, + fallback=self.distance_metric, + ) + if self._effective_space != self._requested_space: + log.info( + "ChromaDB: фактическая метрика коллекции %s (запрошена %s); " + "маппинг distance→score использует фактическую.", + self._effective_space, + self._requested_space, + ) + + def _open_collection(self, client: Any, *, collection_name: str, requested_space: str) -> Any: + """get_or_create с configuration (Chroma >= 0.5) и fallback на legacy-metadata.""" + legacy_meta: Optional[Dict[str, Any]] = None + if requested_space == "cosine": + legacy_meta = {"hnsw:space": "cosine"} - self.collection = client.get_or_create_collection( + coll_cfg: Any = None + try: + from chromadb.api.collection_configuration import ( + CreateCollectionConfiguration, + CreateHNSWConfiguration, + ) + + coll_cfg = CreateCollectionConfiguration( + hnsw=CreateHNSWConfiguration( + space=cast(_ChromaHnswSpace, requested_space), + ) + ) + except Exception: # pragma: no cover - старая версия chromadb + coll_cfg = None + + if coll_cfg is not None: + try: + return client.get_or_create_collection( + name=collection_name, + configuration=coll_cfg, + metadata=legacy_meta, + ) + except TypeError: + pass + + return client.get_or_create_collection( name=collection_name, - metadata=collection_metadata, + metadata=legacy_meta, ) def add_documents(self, documents: Iterable[Document]) -> None: @@ -55,6 +177,7 @@ def search( query_embeddings=[query_vector], n_results=limit, where=filters, + include=["distances", "metadatas", "documents"], ) ids = response.get("ids", [[]])[0] distances = response.get("distances", [[]])[0] @@ -67,19 +190,19 @@ def search( meta = dict(metadata or {}) if doc_text and "text" not in meta: meta["text"] = doc_text + if distance is not None: + try: + # Сырой distance для tie-break сортировки при равных score. + meta["vector_distance"] = float(distance) + except (TypeError, ValueError): + pass score = self._distance_to_similarity(distance) results.append(SearchResult(id=doc_id, score=score, metadata=meta)) return results def _distance_to_similarity(self, distance: Any) -> float: """Привести метрику Chroma к сходству в диапазоне [0, 1].""" - if distance is None: - return 0.0 - d = float(distance) - if self.distance_metric == "cosine": - return max(0.0, min(1.0, 1.0 - d)) - # l2 и прочее: монотонное сжатие дистанции в (0, 1] - return max(0.0, min(1.0, 1.0 / (1.0 + d))) + return chroma_distance_to_similarity(self._effective_space, distance) def delete(self, doc_ids: Iterable[str]) -> None: ids = list(doc_ids) @@ -96,4 +219,3 @@ def count_documents(self, filters: Optional[Dict[str, Any]] = None) -> int: ids = data.get("ids") or [] return len(ids) return int(self.collection.count()) - diff --git a/src/django_graph_search/backends/faiss.py b/src/django_graph_search/backends/faiss.py index 764ad6e..3b0f7ca 100644 --- a/src/django_graph_search/backends/faiss.py +++ b/src/django_graph_search/backends/faiss.py @@ -53,13 +53,15 @@ def search( for idx, dist in zip(indices[0], distances[0]): if idx < 0 or idx >= len(self._ids): continue - metadata = self._metas[idx] + metadata = dict(self._metas[idx]) if filters and not self._match_filters(metadata, filters): continue + fdist = float(dist) + metadata["vector_distance"] = fdist results.append( SearchResult( id=self._ids[idx], - score=max(0.0, min(1.0, 1.0 / (1.0 + float(dist)))), + score=max(0.0, min(1.0, 1.0 / (1.0 + fdist))), metadata=metadata, ) ) diff --git a/src/django_graph_search/component_registry.py b/src/django_graph_search/component_registry.py new file mode 100644 index 0000000..751ced6 --- /dev/null +++ b/src/django_graph_search/component_registry.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +import json +import threading +from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple + +from django.utils.module_loading import import_string + +if TYPE_CHECKING: + from .settings import GraphSearchConfig + +# Один vector store + embedding + resolver на процесс (как memory backend в views). +_registry_lock = threading.Lock() +_component_registry: Dict[Tuple[Any, ...], Tuple[Any, Any, Any]] = {} + + +def _freeze_options(options: Dict[str, Any]) -> str: + return json.dumps(options or {}, sort_keys=True, default=str) + + +def _component_cache_key( + config: "GraphSearchConfig", + embedding_profile: Optional[str], +) -> Tuple[Any, ...]: + profile_name = embedding_profile or config.default_embedding + profile = config.embeddings[profile_name] + return ( + config.vector_store.backend, + _freeze_options(config.vector_store.options), + profile_name, + profile.backend, + profile.model_name, + _freeze_options(profile.options), + ) + + +def get_shared_components( + config: Optional["GraphSearchConfig"] = None, + embedding_profile: Optional[str] = None, +) -> Tuple["GraphSearchConfig", object, object, Any]: + """Тяжёлые компоненты поиска/индексации — singleton на воркер.""" + from .graph_resolver import GraphResolver + from .settings import get_settings + + config = config or get_settings() + key = _component_cache_key(config, embedding_profile) + with _registry_lock: + cached = _component_registry.get(key) + if cached is not None: + vector_store, embedding_backend, resolver = cached + return config, vector_store, embedding_backend, resolver + + backend_cls = import_string(config.vector_store.backend) + vector_store = backend_cls(**config.vector_store.options) + profile_name = embedding_profile or config.default_embedding + profile = config.embeddings[profile_name] + embed_cls = import_string(profile.backend) + embedding_backend = embed_cls( + model_name=profile.model_name, + **profile.options, + ) + resolver = GraphResolver() + entry = (vector_store, embedding_backend, resolver) + with _registry_lock: + _component_registry[key] = entry + return config, vector_store, embedding_backend, resolver + + +def clear_component_registry() -> None: + with _registry_lock: + _component_registry.clear() diff --git a/src/django_graph_search/factory.py b/src/django_graph_search/factory.py index d8b8a87..78cb795 100644 --- a/src/django_graph_search/factory.py +++ b/src/django_graph_search/factory.py @@ -4,6 +4,7 @@ from django.utils.module_loading import import_string +from .component_registry import get_shared_components from .graph_resolver import GraphResolver from .settings import GraphSearchConfig, get_settings @@ -16,6 +17,8 @@ def build_components( embedding_profile: Optional[str], ) -> Tuple[GraphSearchConfig, object, object, GraphResolver]: config = config or get_settings() + if vector_store is None and embedding_backend is None and resolver is None: + return get_shared_components(config, embedding_profile) if vector_store is None: backend_cls = import_string(config.vector_store.backend) vector_store = backend_cls(**config.vector_store.options) diff --git a/src/django_graph_search/graph_resolver.py b/src/django_graph_search/graph_resolver.py index 6e1545e..81686a7 100644 --- a/src/django_graph_search/graph_resolver.py +++ b/src/django_graph_search/graph_resolver.py @@ -1,10 +1,11 @@ from __future__ import annotations -from typing import Any, Iterable, List, Optional, Set, Tuple +from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Set, Tuple from django.db import models -from .settings import ModelConfig +if TYPE_CHECKING: + from .settings import ModelConfig class GraphResolver: @@ -12,7 +13,7 @@ def resolve(self, instance: models.Model, depth: int = 2) -> dict: visited: Set[Tuple[str, Any]] = set() return self._resolve_instance(instance, depth, visited) - def build_searchable_text(self, instance: models.Model, config: ModelConfig) -> str: + def build_searchable_text(self, instance: models.Model, config: "ModelConfig") -> str: parts: List[str] = [] if config.fields == ["__all__"]: field_dict = self._collect_fields(instance) diff --git a/src/django_graph_search/indexer.py b/src/django_graph_search/indexer.py index e95c929..c3b3d0d 100644 --- a/src/django_graph_search/indexer.py +++ b/src/django_graph_search/indexer.py @@ -49,6 +49,7 @@ class Indexer(ComponentMixin): def __init__( self, config: Optional[GraphSearchConfig] = None, + *, vector_store=None, embedding_backend=None, resolver: Optional[GraphResolver] = None, diff --git a/src/django_graph_search/langgraph_agent.py b/src/django_graph_search/langgraph_agent.py index eec64c8..fa4ab91 100644 --- a/src/django_graph_search/langgraph_agent.py +++ b/src/django_graph_search/langgraph_agent.py @@ -144,7 +144,7 @@ def vector_search_node( for hit in hits: key = _doc_key(hit) existing = merged.get(key) - if existing is None or _score_value(hit) > _score_value(existing): + if existing is None or _is_hit_preferred_over(hit, existing): merged[key] = hit results = list(merged.values()) @@ -154,8 +154,8 @@ def vector_search_node( allowed = set(models_filter) results = [item for item in results if item.metadata.get("model") in allowed] - # Stable order: best score first. - results.sort(key=_score_value, reverse=True) + # Score убыв., при равенстве — меньший vector_distance (сырая метрика стора). + results = sort_vector_hits(results) state["raw_results"] = results state["merged_results"] = results @@ -181,7 +181,7 @@ def rerank_results_node( rerank_inputs = [ RerankCandidate( id=_doc_key(item), - text=getattr(item, "text", "") or "", + text=_candidate_rerank_text(item), score=_score_value(item), metadata=dict(item.metadata or {}), ) @@ -403,6 +403,51 @@ def _score_value(item: Any) -> float: return 0.0 +def _raw_vector_distance(item: Any) -> float: + """Меньше — ближе по метрике индекса (если бэкенд положил vector_distance в metadata).""" + md = getattr(item, "metadata", None) or {} + d = md.get("vector_distance") + if d is None: + return float("inf") + try: + return float(d) + except (TypeError, ValueError): + return float("inf") + + +def _vector_hit_sort_key(item: Any) -> tuple: + return (-_score_value(item), _raw_vector_distance(item), str(getattr(item, "id", ""))) + + +def sort_vector_hits(items: List[Any]) -> List[Any]: + """Сортировка выдачи векторного поиска: score ↓, vector_distance ↑, id.""" + return sorted(items, key=_vector_hit_sort_key) + + +def _is_hit_preferred_over(new: Any, old: Any) -> bool: + """Выбрать лучший hit при слиянии нескольких запросов (в т.ч. при равном score).""" + sn, so = _score_value(new), _score_value(old) + if sn > so: + return True + if sn < so: + return False + dn, do = _raw_vector_distance(new), _raw_vector_distance(old) + if dn < do: + return True + if dn > do: + return False + return False + + +def _candidate_rerank_text(item: Any) -> str: + """Текст документа для rerank: из metadata (SearchResult) или поле .text у тестовых заглушек.""" + md = getattr(item, "metadata", None) or {} + t = md.get("text") or "" + if not t and hasattr(item, "text"): + t = item.text or "" + return t + + def resolve_graph_factory(dotted_path: str) -> Callable[..., Any]: """Lazily import a graph factory (used by the searcher).""" from django.utils.module_loading import import_string @@ -419,4 +464,5 @@ def resolve_graph_factory(dotted_path: str) -> Callable[..., Any]: "postprocess_results_node", "build_search_graph", "resolve_graph_factory", + "sort_vector_hits", ] diff --git a/src/django_graph_search/langgraph_indexer.py b/src/django_graph_search/langgraph_indexer.py index 28b7c8c..430f22f 100644 --- a/src/django_graph_search/langgraph_indexer.py +++ b/src/django_graph_search/langgraph_indexer.py @@ -250,6 +250,7 @@ class SmartIndexer(ComponentMixin): def __init__( self, config: Optional[GraphSearchConfig] = None, + *, vector_store=None, embedding_backend=None, resolver: Optional[GraphResolver] = None, diff --git a/src/django_graph_search/models.py b/src/django_graph_search/models.py new file mode 100644 index 0000000..054c5a8 --- /dev/null +++ b/src/django_graph_search/models.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from django.db import models + + +class GraphSearch(models.Model): + """Пункт меню админки: семантический поиск (без таблицы в БД).""" + + class Meta: + managed = False + verbose_name = "Поиск" + verbose_name_plural = "Поиск" + + +class GraphSearchIndexStatus(models.Model): + """Пункт меню админки: снимок покрытия индекса (без таблицы в БД).""" + + class Meta: + managed = False + verbose_name = "Статус индексации" + verbose_name_plural = "Статус индексации" diff --git a/src/django_graph_search/searcher.py b/src/django_graph_search/searcher.py index bde8212..435ea84 100644 --- a/src/django_graph_search/searcher.py +++ b/src/django_graph_search/searcher.py @@ -11,6 +11,7 @@ from .components import ComponentMixin from .events import EventHub from .graph_resolver import GraphResolver +from .langgraph_agent import sort_vector_hits from .llm import BaseLLMBackend, build_llm_backend from .settings import GraphSearchConfig, ModelConfig @@ -31,6 +32,7 @@ class Searcher(ComponentMixin): def __init__( self, config: Optional[GraphSearchConfig] = None, + *, vector_store=None, embedding_backend=None, resolver: Optional[GraphResolver] = None, @@ -96,6 +98,7 @@ def find_similar( limit=limit, filters={"model": instance._meta.label}, ) + results = sort_vector_hits(results) return [self._format_result(item) for item in results] # ----------------------------------------------------------- legacy path @@ -113,6 +116,7 @@ def _search_linear( if models: allowed = set(models) results = [item for item in results if item.metadata.get("model") in allowed] + results = sort_vector_hits(results) return [self._format_result(item) for item in results] # ---------------------------------------------------------- LangGraph path @@ -175,11 +179,13 @@ def _format_result(self, item) -> dict: score = float(raw_score) if raw_score is not None else 0.0 score = max(0.0, min(1.0, score)) text = item.metadata.get("text") or "" + preview = f"{text[:200]}…" if len(text) > 200 else (text or None) data = { "model": model_label, "pk": pk, "score": score, "text": text, + "text_preview": preview, } if model_label and pk is not None: model_cls = self._get_model_class(model_label) diff --git a/src/django_graph_search/settings.py b/src/django_graph_search/settings.py index 0a21271..55e96fc 100644 --- a/src/django_graph_search/settings.py +++ b/src/django_graph_search/settings.py @@ -2,7 +2,7 @@ from dataclasses import dataclass, field from functools import lru_cache -from typing import Any, Dict, Iterable, List, Optional +from typing import Any, Dict, Iterable, List, Optional, Tuple from django.conf import settings as django_settings from django.utils.module_loading import import_string @@ -27,6 +27,9 @@ "API_URL_PREFIX": "api/search/", "ADMIN_SEARCH_ENABLED": True, "AUTO_INDEX": True, + "AUTO_INDEX_SKIP_UPDATE_FIELDS": ["last_login"], + # Локальный sentence-transformers не блокирует HTTP: индексация в daemon thread. + "AUTO_INDEX_NON_BLOCKING": True, "DEFAULT_RESULTS_LIMIT": 20, "RELATION_DEPTH_DEFAULT": 2, "DELTA_INDEXING": False, @@ -99,6 +102,8 @@ class ModelConfig: follow_relations: bool = True relation_depth: int = 2 weight_fields: Dict[str, float] = field(default_factory=dict) + # save(update_fields=...): только эти поля — post_save не индексирует (+ глобальный список). + skip_update_fields: Tuple[str, ...] = field(default_factory=tuple) @dataclass(frozen=True) @@ -203,6 +208,8 @@ class GraphSearchConfig: default_results_limit: int delta_indexing: bool cache: CacheConfig + auto_index_skip_update_fields: Tuple[str, ...] = ("last_login",) + auto_index_non_blocking: bool = True langgraph: LangGraphConfig = field(default_factory=LangGraphConfig) conversational: ConversationalConfig = field(default_factory=ConversationalConfig) smart_indexing: SmartIndexingConfig = field(default_factory=SmartIndexingConfig) @@ -261,6 +268,12 @@ def _validate_models(models: Iterable[Dict[str, Any]], depth_default: int) -> Li relation_depth = int(item.get("relation_depth", depth_default)) # Веса парсятся всегда, даже для fields == ["__all__"] (известные имена полей). weight_fields = _normalize_weight_fields(item.get("weight_fields", {})) + skip_raw = item.get("skip_update_fields") + skip_update_fields: Tuple[str, ...] = () + if skip_raw is not None: + if not isinstance(skip_raw, (list, tuple)): + raise ConfigurationError("'skip_update_fields' must be a list of field names.") + skip_update_fields = tuple(str(f) for f in skip_raw) normalized.append( ModelConfig( model=model, @@ -268,6 +281,7 @@ def _validate_models(models: Iterable[Dict[str, Any]], depth_default: int) -> Li follow_relations=follow_relations, relation_depth=relation_depth, weight_fields=weight_fields, + skip_update_fields=skip_update_fields, ) ) return normalized @@ -327,6 +341,13 @@ def get_settings() -> GraphSearchConfig: streaming_cfg = _build_streaming_config(merged.get("STREAMING") or {}) api_cfg = _build_api_config(merged.get("API") or {}) async_indexing_cfg = _build_async_indexing_config(merged.get("ASYNC_INDEXING") or {}) + skip_update_raw = merged.get("AUTO_INDEX_SKIP_UPDATE_FIELDS") + if skip_update_raw is None: + skip_update_fields: Tuple[str, ...] = ("last_login",) + elif not isinstance(skip_update_raw, (list, tuple)): + raise ConfigurationError("AUTO_INDEX_SKIP_UPDATE_FIELDS must be a list of field names.") + else: + skip_update_fields = tuple(str(f) for f in skip_update_raw) # Validate backend paths early _load_backend(vector_store.backend) @@ -341,6 +362,8 @@ def get_settings() -> GraphSearchConfig: api_url_prefix=merged["API_URL_PREFIX"], admin_search_enabled=bool(merged["ADMIN_SEARCH_ENABLED"]), auto_index=bool(merged["AUTO_INDEX"]), + auto_index_skip_update_fields=skip_update_fields, + auto_index_non_blocking=bool(merged.get("AUTO_INDEX_NON_BLOCKING", True)), default_results_limit=int(merged["DEFAULT_RESULTS_LIMIT"]), delta_indexing=bool(merged.get("DELTA_INDEXING", False)), cache=cache_cfg, @@ -516,3 +539,11 @@ def _build_conversational_config(payload: Dict[str, Any]) -> ConversationalConfi ), ) + +def clear_graph_search_caches() -> None: + """Сброс кэша настроек и реестра тяжёлых компонентов (для тестов и reload).""" + get_settings.cache_clear() + from .component_registry import clear_component_registry + + clear_component_registry() + diff --git a/src/django_graph_search/signals.py b/src/django_graph_search/signals.py index 9b26e3c..5f287d3 100644 --- a/src/django_graph_search/signals.py +++ b/src/django_graph_search/signals.py @@ -2,16 +2,20 @@ import logging import threading +from typing import Set +from django.contrib.auth import get_user_model from django.db.models.signals import post_delete, post_save from django.dispatch import receiver from django.utils.module_loading import import_string from .indexer import get_indexer -from .settings import get_settings +from .settings import GraphSearchConfig, get_settings log = logging.getLogger(__name__) +_LOCAL_EMBEDDING_BACKEND_MARKER = "SentenceTransformerBackend" + def _get_model_config(model_label: str): config = get_settings() @@ -21,6 +25,75 @@ def _get_model_config(model_label: str): return None +def _skip_field_names(config: GraphSearchConfig, model_cfg) -> Set[str]: + skip = set(config.auto_index_skip_update_fields) + if model_cfg.skip_update_fields: + skip.update(model_cfg.skip_update_fields) + return skip + + +def _should_skip_auto_index_on_update_fields(model_cfg, config, **kwargs) -> bool: + """Не индексировать save(update_fields=...), если затронуты только «шумные» поля.""" + update_fields = kwargs.get("update_fields") + if not update_fields: + return False + skip = _skip_field_names(config, model_cfg) + touched = {str(f) for f in update_fields} + return bool(touched) and touched <= skip + + +def _only_skip_fields_changed_on_instance(instance, skip: Set[str]) -> bool: + """ + Полный save() без update_fields: пропуск, если в БД отличаются только поля из skip. + + Типичный login: user.last_login обновлён, остальное без изменений. + """ + if not skip or instance.pk is None: + return False + model = instance.__class__ + old = model.objects.filter(pk=instance.pk).first() + if old is None: + return False + for field in model._meta.concrete_fields: + name = field.name + if name in skip or name in ("id", "pk"): + continue + if getattr(instance, name) != getattr(old, name): + return False + return True + + +def _should_skip_auth_user_noise(instance, model_cfg, config, **kwargs) -> bool: + if kwargs.get("update_fields") is not None: + return False + try: + user_model = get_user_model() + except Exception: # pragma: no cover + return False + if not isinstance(instance, user_model): + return False + if instance._meta.label != model_cfg.model: + return False + return _only_skip_fields_changed_on_instance(instance, _skip_field_names(config, model_cfg)) + + +def _uses_local_sentence_transformer(config: GraphSearchConfig) -> bool: + profile = config.embeddings[config.default_embedding] + return _LOCAL_EMBEDDING_BACKEND_MARKER in profile.backend + + +def _should_index_in_background(config: GraphSearchConfig) -> bool: + if config.async_indexing.enabled: + return True + return config.auto_index_non_blocking and _uses_local_sentence_transformer(config) + + +def _indexing_backend_name(config: GraphSearchConfig) -> str: + if config.async_indexing.enabled: + return config.async_indexing.backend.lower() + return "thread" + + def _sync_index(instance) -> None: model_cfg = _get_model_config(instance._meta.label) if model_cfg is None: @@ -36,23 +109,31 @@ def _sync_delete(instance) -> None: indexer.delete_instance(instance._meta.label, instance.pk) +def _run_index_in_thread(app_label: str, model_name: str, pk) -> None: + from .tasks import index_instance_task_fn + + thread = threading.Thread( + target=index_instance_task_fn, + args=[app_label, model_name, pk], + daemon=True, + name=f"dgs-index-{model_name}-{pk}", + ) + thread.start() + + def _dispatch_index(instance) -> None: """ - Индексация: синхронно или асинхронно по ASYNC_INDEXING. - - Celery — в очередь из настроек; thread — daemon; django-q — async_task; - иначе или при ошибке — синхронный путь. + Индексация: синхронно или асинхронно (ASYNC_INDEXING / AUTO_INDEX_NON_BLOCKING). """ cfg = get_settings() - if not cfg.async_indexing.enabled: + if not _should_index_in_background(cfg): _sync_index(instance) return app_label = instance._meta.app_label model_name = instance._meta.model_name pk = instance.pk - - backend = cfg.async_indexing.backend.lower() + backend = _indexing_backend_name(cfg) if backend == "celery": task = import_string(cfg.async_indexing.celery_task_path) @@ -68,15 +149,7 @@ def _dispatch_index(instance) -> None: ) _sync_index(instance) elif backend == "thread": - from .tasks import index_instance_task_fn - - thread = threading.Thread( - target=index_instance_task_fn, - args=[app_label, model_name, pk], - daemon=True, - name=f"dgs-index-{model_name}-{pk}", - ) - thread.start() + _run_index_in_thread(app_label, model_name, pk) elif backend == "django_q": try: from django_q.tasks import async_task @@ -149,7 +222,12 @@ def on_model_save(sender, instance, **kwargs): config = get_settings() if not config.auto_index: return - if _get_model_config(instance._meta.label) is None: + model_cfg = _get_model_config(instance._meta.label) + if model_cfg is None: + return + if _should_skip_auto_index_on_update_fields(model_cfg, config, **kwargs): + return + if _should_skip_auth_user_noise(instance, model_cfg, config, **kwargs): return _dispatch_index(instance) diff --git a/src/django_graph_search/static/django_graph_search/css/search.css b/src/django_graph_search/static/django_graph_search/css/search.css index 2aa93e9..a943740 100644 --- a/src/django_graph_search/static/django_graph_search/css/search.css +++ b/src/django_graph_search/static/django_graph_search/css/search.css @@ -47,3 +47,11 @@ color: #666; } +.graph-search__snippet { + max-width: 420px; + white-space: pre-wrap; + word-break: break-word; + font-size: 12px; + margin: 0; +} + diff --git a/src/django_graph_search/templates/django_graph_search/admin/search.html b/src/django_graph_search/templates/django_graph_search/admin/search.html index 42b072a..9a156ee 100644 --- a/src/django_graph_search/templates/django_graph_search/admin/search.html +++ b/src/django_graph_search/templates/django_graph_search/admin/search.html @@ -16,6 +16,11 @@

Graph Search

+
+ + +
Graph Search
+ {% if min_score_error %} +

{{ min_score_error }}

+ {% endif %} + {% if available_models %}
Доступные модели: {{ available_models|join:", " }} @@ -39,6 +48,7 @@

Graph Search

Модель ID Score + Индексируемый текст Данные @@ -54,6 +64,15 @@

Graph Search

{% endif %} {{ item.score }} + + {% if item.text_preview %} +
{{ item.text_preview }}
+ {% elif item.text %} +
{{ item.text }}
+ {% else %} + — + {% endif %} + {% if item.data %}
{{ item.data }}
diff --git a/tests/conftest.py b/tests/conftest.py index 0866867..fb708a9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,10 +11,32 @@ def pytest_configure(): INSTALLED_APPS=[ "django.contrib.contenttypes", "django.contrib.auth", + "django.contrib.sessions", + "django.contrib.messages", + "django.contrib.admin", + "django_graph_search", "tests.test_app", ], DATABASES={"default": {"ENGINE": "django.db.backends.sqlite3", "NAME": ":memory:"}}, - MIDDLEWARE=[], + MIDDLEWARE=[ + "django.contrib.sessions.middleware.SessionMiddleware", + "django.contrib.auth.middleware.AuthenticationMiddleware", + "django.contrib.messages.middleware.MessageMiddleware", + ], + TEMPLATES=[ + { + "BACKEND": "django.template.backends.django.DjangoTemplates", + "DIRS": [], + "APP_DIRS": True, + "OPTIONS": { + "context_processors": [ + "django.template.context_processors.request", + "django.contrib.auth.context_processors.auth", + "django.contrib.messages.context_processors.messages", + ], + }, + }, + ], ROOT_URLCONF="django_graph_search.urls", SECRET_KEY="test", USE_TZ=True, diff --git a/tests/test_admin_sidebar.py b/tests/test_admin_sidebar.py new file mode 100644 index 0000000..4c0857a --- /dev/null +++ b/tests/test_admin_sidebar.py @@ -0,0 +1,118 @@ +"""Раздел django_graph_search в сайдбаре Django Admin.""" +# pylint: disable=redefined-outer-name +from __future__ import annotations + +from types import ModuleType +from typing import Any, Dict + +import pytest +from django.conf import settings as django_settings +from django.contrib.admin.sites import AdminSite +from django.contrib.auth import get_user_model +from django.test import Client, override_settings + +from django_graph_search.admin import setup_admin_site +from django_graph_search.models import GraphSearch, GraphSearchIndexStatus +from django_graph_search.settings import clear_graph_search_caches, get_settings + + +def _minimal_graph_search(extra: Dict[str, Any] | None = None) -> Dict[str, Any]: + base: Dict[str, Any] = { + "MODELS": [], + "VECTOR_STORE": {"BACKEND": "django_graph_search.backends.ChromaDBBackend"}, + "EMBEDDINGS": { + "default": { + "BACKEND": "tests.dummy_embedding_backend.DummyEmbeddingBackend", + "MODEL_NAME": "x", + } + }, + } + if extra: + base.update(extra) + return base + + +@pytest.fixture(name="apply_admin_graph_search_settings") +def _apply_admin_graph_search_settings_fixture(): + original = getattr(django_settings, "GRAPH_SEARCH", None) + clear_graph_search_caches() + + def _apply(payload: Dict[str, Any]): + django_settings.GRAPH_SEARCH = payload + clear_graph_search_caches() + return get_settings() + + yield _apply + + if original is None and hasattr(django_settings, "GRAPH_SEARCH"): + delattr(django_settings, "GRAPH_SEARCH") + elif original is not None: + django_settings.GRAPH_SEARCH = original + clear_graph_search_caches() + + +@pytest.fixture +def staff_client(db): + user_model = get_user_model() + user = user_model.objects.create_user( + username="admin_sidebar", + password="secret", + is_staff=True, + is_superuser=True, + ) + client = Client() + client.force_login(user) + return client + + +@override_settings(ROOT_URLCONF="tests.urls_admin") +def test_admin_index_shows_graph_search_section(staff_client, apply_admin_graph_search_settings): + apply_admin_graph_search_settings(_minimal_graph_search({"ADMIN_SEARCH_ENABLED": True})) + setup_admin_site() + + response = staff_client.get("/admin/") + assert response.status_code == 200 + content = response.content.decode() + assert "Django Graph Search" in content + assert "Поиск" in content + assert "Статус индексации" in content + assert "django_graph_search/graphsearch/" in content + assert "django_graph_search/graphsearchindexstatus/" in content + + +@override_settings(ROOT_URLCONF="tests.urls_admin") +def test_graph_search_legacy_url_works(staff_client, apply_admin_graph_search_settings): + apply_admin_graph_search_settings(_minimal_graph_search({"ADMIN_SEARCH_ENABLED": True})) + setup_admin_site() + + response = staff_client.get("/admin/graph-search/") + assert response.status_code == 200 + assert "Graph Search" in response.content.decode() + + +def test_setup_skips_when_admin_search_disabled(apply_admin_graph_search_settings): + apply_admin_graph_search_settings(_minimal_graph_search({"ADMIN_SEARCH_ENABLED": False})) + site = AdminSite(name="disabled_admin_test") + + setup_admin_site(site) + + assert not site.is_registered(GraphSearch) + assert not site.is_registered(GraphSearchIndexStatus) + url_names = [p.name for p in site.get_urls() if hasattr(p, "name") and p.name] + assert "graph-search" not in url_names + + +def test_graph_search_legacy_url_404_when_disabled(staff_client, apply_admin_graph_search_settings): + apply_admin_graph_search_settings(_minimal_graph_search({"ADMIN_SEARCH_ENABLED": False})) + + site = AdminSite(name="disabled_graph_search_admin") + setup_admin_site(site) + + url_module = ModuleType("test_urls_graph_search_disabled") + from django.urls import path + + url_module.urlpatterns = [path("admin/", site.urls)] + + with override_settings(ROOT_URLCONF=url_module): + response = staff_client.get("/admin/graph-search/") + assert response.status_code == 404 diff --git a/tests/test_chroma_score.py b/tests/test_chroma_score.py new file mode 100644 index 0000000..8dd9576 --- /dev/null +++ b/tests/test_chroma_score.py @@ -0,0 +1,122 @@ +"""Тесты маппинга distance → score для ChromaDB (метрика коллекции vs формула).""" + +from types import SimpleNamespace + +import pytest + +from django_graph_search.backends.chromadb import ( + ChromaDBBackend, + _effective_space_from_collection, + _requested_chroma_space, + chroma_distance_to_similarity, +) + + +@pytest.mark.parametrize( + ("space", "distance", "expected"), + [ + ("l2", 4.0, 0.2), + ("l2", 0.0, 1.0), + ("cosine", 0.1, 0.9), + ("cosine", 0.0, 1.0), + ("cosine", 1.5, 0.0), + ("ip", 0.4, 0.6), + ("ip", None, 0.0), + ], +) +def test_chroma_distance_to_similarity(space, distance, expected): + assert chroma_distance_to_similarity(space, distance) == pytest.approx(expected) + + +def test_requested_chroma_space_aliases(): + assert _requested_chroma_space("inner_product") == "ip" + assert _requested_chroma_space("euclidean") == "l2" + assert _requested_chroma_space("COSINE") == "cosine" + + +def test_effective_space_from_collection_configuration(): + col = SimpleNamespace( + configuration={"hnsw": {"space": "l2"}}, + metadata=None, + ) + assert _effective_space_from_collection(col, fallback="cosine") == "l2" + + +def test_effective_space_from_collection_legacy_metadata(): + col = SimpleNamespace( + configuration={}, + metadata={"hnsw:space": "cosine"}, + ) + assert _effective_space_from_collection(col, fallback="l2") == "cosine" + + +def test_effective_space_from_collection_object_configuration(): + """Chroma >= 0.5 может отдавать configuration как объект с атрибутом hnsw.""" + hnsw = SimpleNamespace(space="ip") + cfg = SimpleNamespace(hnsw=hnsw) + col = SimpleNamespace(configuration=cfg, metadata={}) + assert _effective_space_from_collection(col, fallback="cosine") == "ip" + + +def test_effective_space_from_collection_top_level_space_key(): + col = SimpleNamespace( + configuration={"space": "l2"}, + metadata={}, + ) + assert _effective_space_from_collection(col, fallback="cosine") == "l2" + + +def test_effective_space_fallback(): + col = SimpleNamespace(configuration={}, metadata={}) + assert _effective_space_from_collection(col, fallback="inner_product") == "ip" + + +def test_chroma_backend_l2_collection_nonzero_score_for_large_distance(tmp_path): + """Раньше при L2-коллекции и формуле (1-d) все score обнулялись; с l2 — нет.""" + pytest.importorskip("chromadb") + import chromadb + + name = "score_test_col" + client = chromadb.PersistentClient(path=str(tmp_path)) + client.get_or_create_collection(name) + + backend = ChromaDBBackend( + persist_directory=str(tmp_path), + collection_name=name, + distance_metric="cosine", + ) + assert backend._effective_space == "l2" + + backend.collection.add( + ids=["row1"], + embeddings=[[1.0, 0.0, 0.0]], + documents=["alpha"], + metadatas=[{"model": "app.Model", "pk": 1}], + ) + hits = backend.search([0.0, 1.0, 0.0], limit=1, filters=None) + assert hits + assert hits[0].score > 0.0 + assert hits[0].metadata.get("vector_distance") is not None + + +def test_chroma_backend_search_uses_explicit_include(): + captured = {} + + class _Coll: + def query(self, **kwargs): + captured.update(kwargs) + return { + "ids": [["id1"]], + "distances": [[0.0]], + "metadatas": [[{"model": "m", "pk": 1}]], + "documents": [["hello"]], + } + + backend = object.__new__(ChromaDBBackend) + backend.collection = _Coll() + backend._effective_space = "cosine" + + hits = ChromaDBBackend.search(backend, [0.1, 0.2], limit=3, filters=None) + assert captured.get("include") == ["distances", "metadatas", "documents"] + assert hits[0].score == 1.0 + assert hits[0].metadata.get("vector_distance") == 0.0 diff --git a/tests/test_component_registry.py b/tests/test_component_registry.py new file mode 100644 index 0000000..e96c477 --- /dev/null +++ b/tests/test_component_registry.py @@ -0,0 +1,82 @@ +"""Singleton тяжёлых компонентов на процесс.""" +from __future__ import annotations + +from django_graph_search.component_registry import ( + clear_component_registry, + get_shared_components, +) +from django_graph_search.factory import build_components +from django_graph_search.indexer import get_indexer +from django_graph_search.settings import ( + CacheConfig, + EmbeddingProfile, + GraphSearchConfig, + VectorStoreConfig, +) + +from tests.dummy_embedding_backend import DummyEmbeddingBackend +from tests.dummy_vector_backend import DummyVectorBackend + + +def _minimal_config() -> GraphSearchConfig: + return GraphSearchConfig( + models=[], + vector_store=VectorStoreConfig( + backend="tests.dummy_vector_backend.DummyVectorBackend", + options={}, + ), + embeddings={ + "default": EmbeddingProfile( + backend="tests.dummy_embedding_backend.DummyEmbeddingBackend", + model_name="x", + ), + }, + default_embedding="default", + api_url_prefix="api/search/", + admin_search_enabled=False, + auto_index=False, + default_results_limit=10, + delta_indexing=False, + cache=CacheConfig(backend="file"), + ) + + +def test_get_shared_components_returns_same_instances(): + clear_component_registry() + cfg = _minimal_config() + _, vs1, emb1, res1 = get_shared_components(cfg) + _, vs2, emb2, res2 = get_shared_components(cfg) + assert vs1 is vs2 + assert emb1 is emb2 + assert res1 is res2 + + +def test_build_components_uses_registry_when_all_none(): + clear_component_registry() + cfg = _minimal_config() + _, vs1, emb1, _ = build_components(cfg, None, None, None, None) + _, vs2, emb2, _ = build_components(cfg, None, None, None, None) + assert vs1 is vs2 + assert emb1 is emb2 + + +def test_get_indexer_reuses_embedding_backend(): + clear_component_registry() + cfg = _minimal_config() + idx1 = get_indexer(config=cfg) + idx2 = get_indexer(config=cfg) + assert idx1.embedding_backend is idx2.embedding_backend + assert idx1.vector_store is idx2.vector_store + + +def test_explicit_backends_bypass_registry(): + clear_component_registry() + cfg = _minimal_config() + vs = DummyVectorBackend() + emb = DummyEmbeddingBackend(model_name="x") + _, vs1, emb1, _ = build_components(cfg, vs, emb, None, None) + _, vs2, emb2, _ = build_components(cfg, vs, emb, None, None) + assert vs1 is vs + assert emb1 is emb + assert vs2 is vs + assert emb2 is emb diff --git a/tests/test_conversational.py b/tests/test_conversational.py index 76e1648..3a706c9 100644 --- a/tests/test_conversational.py +++ b/tests/test_conversational.py @@ -10,7 +10,7 @@ from django.conf import settings as django_settings from django.test import RequestFactory -from django_graph_search.settings import get_settings +from django_graph_search.settings import clear_graph_search_caches, get_settings from django_graph_search.views import ConversationalSearchAPIView, _memory_backend_registry @@ -38,13 +38,13 @@ def _minimal_graph_search(extra: Dict[str, Any] | None = None) -> Dict[str, Any] def _apply_conv_settings_fixture(): original = getattr(django_settings, "GRAPH_SEARCH", None) original_debug = django_settings.DEBUG - get_settings.cache_clear() + clear_graph_search_caches() _memory_backend_registry.clear() def _apply(payload: Dict[str, Any], *, debug: bool): django_settings.GRAPH_SEARCH = payload django_settings.DEBUG = debug - get_settings.cache_clear() + clear_graph_search_caches() yield _apply @@ -53,7 +53,7 @@ def _apply(payload: Dict[str, Any], *, debug: bool): delattr(django_settings, "GRAPH_SEARCH") elif original is not None: django_settings.GRAPH_SEARCH = original - get_settings.cache_clear() + clear_graph_search_caches() _memory_backend_registry.clear() diff --git a/tests/test_conversational_search.py b/tests/test_conversational_search.py index 3970e71..bea473a 100644 --- a/tests/test_conversational_search.py +++ b/tests/test_conversational_search.py @@ -22,7 +22,7 @@ InMemoryBackend, build_memory_backend, ) -from django_graph_search.settings import get_settings +from django_graph_search.settings import clear_graph_search_caches, get_settings from django_graph_search.views import ConversationalSearchAPIView, _memory_backend_registry @@ -34,13 +34,13 @@ @pytest.fixture def graph_search_settings(): original = getattr(django_settings, "GRAPH_SEARCH", None) - get_settings.cache_clear() + clear_graph_search_caches() # Сброс singleton бэкендов памяти между тестами _memory_backend_registry.clear() def _apply(payload): django_settings.GRAPH_SEARCH = payload - get_settings.cache_clear() + clear_graph_search_caches() return get_settings() yield _apply @@ -49,7 +49,7 @@ def _apply(payload): delattr(django_settings, "GRAPH_SEARCH") elif original is not None: django_settings.GRAPH_SEARCH = original - get_settings.cache_clear() + clear_graph_search_caches() _memory_backend_registry.clear() diff --git a/tests/test_events_streaming.py b/tests/test_events_streaming.py index 2430652..6f584b7 100644 --- a/tests/test_events_streaming.py +++ b/tests/test_events_streaming.py @@ -22,6 +22,7 @@ SmartIndexingConfig, ConversationalConfig, VectorStoreConfig, + clear_graph_search_caches, get_settings, ) from django_graph_search.views import StreamingSearchAPIView @@ -141,11 +142,11 @@ def test_fallback_graph_no_hub_runs_silently(self): @pytest.fixture def graph_search_settings(): original = getattr(django_settings, "GRAPH_SEARCH", None) - get_settings.cache_clear() + clear_graph_search_caches() def _apply(payload): django_settings.GRAPH_SEARCH = payload - get_settings.cache_clear() + clear_graph_search_caches() return get_settings() yield _apply @@ -155,7 +156,7 @@ def _apply(payload): delattr(django_settings, "GRAPH_SEARCH") else: django_settings.GRAPH_SEARCH = original - get_settings.cache_clear() + clear_graph_search_caches() def _drain(response) -> list: diff --git a/tests/test_graph_resolver.py b/tests/test_graph_resolver.py index c33f51f..7e59dae 100644 --- a/tests/test_graph_resolver.py +++ b/tests/test_graph_resolver.py @@ -5,7 +5,7 @@ from django.test import TestCase from django_graph_search.graph_resolver import GraphResolver -from django_graph_search.settings import ModelConfig, get_settings +from django_graph_search.settings import ModelConfig, clear_graph_search_caches, get_settings from .test_app.models import Category, Product @@ -43,7 +43,7 @@ def test_weight_fields_with_all_fields(self): def test_weight_fields_parsed_for_all_fields_in_settings(): """GRAPH_SEARCH: weight_fields нормализуются при fields='__all__'.""" original = getattr(django_settings, "GRAPH_SEARCH", None) - get_settings.cache_clear() + clear_graph_search_caches() django_settings.GRAPH_SEARCH = { "MODELS": [ { @@ -70,4 +70,4 @@ def test_weight_fields_parsed_for_all_fields_in_settings(): delattr(django_settings, "GRAPH_SEARCH") else: django_settings.GRAPH_SEARCH = original - get_settings.cache_clear() + clear_graph_search_caches() diff --git a/tests/test_indexer.py b/tests/test_indexer.py index a48ae01..be0a596 100644 --- a/tests/test_indexer.py +++ b/tests/test_indexer.py @@ -1,3 +1,5 @@ +from unittest import mock + from django.test import TestCase from django_graph_search.indexer import Indexer @@ -101,8 +103,14 @@ def test_delta_indexing_skips_unchanged(self): delta_cache=delta_cache, ) product = Product.objects.first() - indexer.index_instance(product, config.models[0]) - indexer.index_instance(product, config.models[0]) + with mock.patch.object( + embedding_backend, + "embed_batch", + wraps=embedding_backend.embed_batch, + ) as embed_batch: + indexer.index_instance(product, config.models[0]) + indexer.index_instance(product, config.models[0]) + assert embed_batch.call_count == 1 self.assertEqual(len(vector_store.docs), 1) diff --git a/tests/test_langgraph_search.py b/tests/test_langgraph_search.py index 406e461..3fd8002 100644 --- a/tests/test_langgraph_search.py +++ b/tests/test_langgraph_search.py @@ -9,31 +9,32 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Any, Dict, List, Optional, Sequence -from unittest import mock +from typing import Any, Dict, List import pytest from django.conf import settings as django_settings +from django_graph_search.backends.base import SearchResult from django_graph_search.langgraph_agent import ( SearchState, analyze_query_node, expand_query_node, postprocess_results_node, rerank_results_node, + sort_vector_hits, vector_search_node, ) from django_graph_search.llm import DummyLLMBackend -from django_graph_search.llm.base import BaseLLMBackend, RerankCandidate +from django_graph_search.llm.base import BaseLLMBackend from django_graph_search.searcher import Searcher from django_graph_search.settings import ( CacheConfig, EmbeddingProfile, GraphSearchConfig, LangGraphConfig, - LLMConfig, VectorStoreConfig, + clear_graph_search_caches, get_settings, ) @@ -47,11 +48,11 @@ def graph_search_settings(): before and after to stay isolated. """ original = getattr(django_settings, "GRAPH_SEARCH", None) - get_settings.cache_clear() + clear_graph_search_caches() def _apply(payload): django_settings.GRAPH_SEARCH = payload - get_settings.cache_clear() + clear_graph_search_caches() return get_settings() yield _apply @@ -61,7 +62,7 @@ def _apply(payload): delattr(django_settings, "GRAPH_SEARCH") else: django_settings.GRAPH_SEARCH = original - get_settings.cache_clear() + clear_graph_search_caches() # --------------------------------------------------------------------------- @@ -211,6 +212,73 @@ def test_vector_search_node_merges_and_dedupes(): assert pytest.approx([h.score for h in out["raw_results"]][0]) == 0.9 +def test_vector_search_merge_prefers_lower_distance_when_scores_equal(): + """При одинаковом score оставляем hit с меньшим vector_distance.""" + embed = StubEmbeddingBackend(["a", "b"]) + store = StubVectorStore({ + "a": [ + FakeHit( + "test_app.Product:1", + 0.5, + {"model": "test_app.Product", "pk": 1, "vector_distance": 2.0}, + ), + ], + "b": [ + FakeHit( + "test_app.Product:1", + 0.5, + {"model": "test_app.Product", "pk": 1, "vector_distance": 0.25}, + ), + ], + }) + state: SearchState = { + "expanded_queries": ["a", "b"], + "limit": 10, + "models": None, + } + out = vector_search_node(state, embedding_backend=embed, vector_store=store) + hit = next(h for h in out["raw_results"] if h.id == "test_app.Product:1") + assert hit.metadata["vector_distance"] == 0.25 + + +def test_sort_vector_hits_breaks_score_ties_by_distance(): + hits = [ + FakeHit("b", 0.5, {"vector_distance": 1.0}), + FakeHit("a", 0.5, {"vector_distance": 0.5}), + ] + ordered = sort_vector_hits(hits) + assert [h.id for h in ordered] == ["a", "b"] + + +def test_rerank_passes_indexed_text_from_metadata(): + """SearchResult хранит текст в metadata['text'], не в атрибуте .text.""" + + class CaptureLLM(DummyLLMBackend): + def __init__(self) -> None: + super().__init__() + self.seen_texts: List[str] = [] + + def rerank(self, query, candidates, top_k=None): + self.seen_texts = [c.text for c in candidates] + return super().rerank(query, candidates, top_k=top_k) + + llm = CaptureLLM() + candidates = [ + SearchResult( + id="m::1", + score=0.5, + metadata={"model": "m", "pk": 1, "text": "indexed body"}, + ), + ] + state: SearchState = { + "merged_results": candidates, + "normalized_query": "q", + } + cfg = _make_config(langgraph=LangGraphConfig(reranking=True, rerank_top_k=5)) + rerank_results_node(state, config=cfg, llm=llm) + assert llm.seen_texts == ["indexed body"] + + def test_vector_search_node_filters_by_models(): embed = StubEmbeddingBackend(["q"]) store = StubVectorStore({ diff --git a/tests/test_permissions.py b/tests/test_permissions.py index 60c1965..d5100d7 100644 --- a/tests/test_permissions.py +++ b/tests/test_permissions.py @@ -15,7 +15,7 @@ check_permissions, check_throttle, ) -from django_graph_search.settings import GraphSearchConfig, get_settings +from django_graph_search.settings import GraphSearchConfig, clear_graph_search_caches, get_settings def _minimal_graph_search(extra: Dict[str, Any] | None = None) -> Dict[str, Any]: @@ -37,12 +37,12 @@ def _minimal_graph_search(extra: Dict[str, Any] | None = None) -> Dict[str, Any] @pytest.fixture(name="apply_api_settings") def _apply_api_settings_fixture(): original = getattr(django_settings, "GRAPH_SEARCH", None) - get_settings.cache_clear() + clear_graph_search_caches() SimpleScopedRateThrottle._windows.clear() def _apply(payload: Dict[str, Any]) -> GraphSearchConfig: django_settings.GRAPH_SEARCH = payload - get_settings.cache_clear() + clear_graph_search_caches() return get_settings() yield _apply @@ -51,7 +51,7 @@ def _apply(payload: Dict[str, Any]) -> GraphSearchConfig: delattr(django_settings, "GRAPH_SEARCH") elif original is not None: django_settings.GRAPH_SEARCH = original - get_settings.cache_clear() + clear_graph_search_caches() SimpleScopedRateThrottle._windows.clear() diff --git a/tests/test_signals_async.py b/tests/test_signals_async.py index a46b67c..fa4ca89 100644 --- a/tests/test_signals_async.py +++ b/tests/test_signals_async.py @@ -8,7 +8,7 @@ import pytest from django.conf import settings as django_settings -from django_graph_search.settings import get_settings +from django_graph_search.settings import clear_graph_search_caches, get_settings from .test_app.models import Category, Product @@ -16,11 +16,11 @@ @pytest.fixture(name="graph_search_signal_settings") def _graph_search_signal_settings_fixture(): original = getattr(django_settings, "GRAPH_SEARCH", None) - get_settings.cache_clear() + clear_graph_search_caches() def _apply(payload: Dict[str, Any]): django_settings.GRAPH_SEARCH = payload - get_settings.cache_clear() + clear_graph_search_caches() yield _apply @@ -28,7 +28,7 @@ def _apply(payload: Dict[str, Any]): delattr(django_settings, "GRAPH_SEARCH") elif original is not None: django_settings.GRAPH_SEARCH = original - get_settings.cache_clear() + clear_graph_search_caches() @pytest.mark.django_db diff --git a/tests/test_signals_non_blocking.py b/tests/test_signals_non_blocking.py new file mode 100644 index 0000000..ff5d81f --- /dev/null +++ b/tests/test_signals_non_blocking.py @@ -0,0 +1,91 @@ +"""AUTO_INDEX_NON_BLOCKING: локальный ST не блокирует поток запроса.""" +from __future__ import annotations + +import time +from typing import Any, Dict +from unittest import mock + +import pytest +from django.conf import settings as django_settings +from django.contrib.auth import get_user_model + +from django_graph_search.settings import clear_graph_search_caches + +from .test_app.models import Category, Product + + +@pytest.fixture(name="graph_search_nb_settings") +def _graph_search_nb_settings_fixture(): + original = getattr(django_settings, "GRAPH_SEARCH", None) + clear_graph_search_caches() + + def _apply(payload: Dict[str, Any]): + django_settings.GRAPH_SEARCH = payload + clear_graph_search_caches() + + yield _apply + + if original is None and hasattr(django_settings, "GRAPH_SEARCH"): + delattr(django_settings, "GRAPH_SEARCH") + elif original is not None: + django_settings.GRAPH_SEARCH = original + clear_graph_search_caches() + + +@pytest.mark.django_db +def test_non_blocking_auto_index_returns_before_slow_embed(graph_search_nb_settings): + graph_search_nb_settings( + { + "MODELS": [{"model": "test_app.Product", "fields": ["name"]}], + "VECTOR_STORE": {"BACKEND": "tests.dummy_vector_backend.DummyVectorBackend"}, + "EMBEDDINGS": { + "default": { + "BACKEND": "django_graph_search.embeddings.SentenceTransformerBackend", + "MODEL_NAME": "x", + } + }, + "AUTO_INDEX": True, + "AUTO_INDEX_NON_BLOCKING": True, + "ASYNC_INDEXING": {"ENABLED": False}, + } + ) + + def slow_index(*_a, **_kw): + time.sleep(1.2) + + cat = Category.objects.create(name="c") + with mock.patch("django_graph_search.tasks.index_instance_task_fn", side_effect=slow_index): + t0 = time.monotonic() + Product.objects.create(name="fast", category=cat) + elapsed = time.monotonic() - t0 + assert elapsed < 0.35 + + +@pytest.mark.django_db +def test_skip_full_save_when_only_last_login_changed(graph_search_nb_settings): + User = get_user_model() + label = User._meta.label + graph_search_nb_settings( + { + "MODELS": [{"model": label, "fields": ["username"]}], + "VECTOR_STORE": {"BACKEND": "tests.dummy_vector_backend.DummyVectorBackend"}, + "EMBEDDINGS": { + "default": { + "BACKEND": "tests.dummy_embedding_backend.DummyEmbeddingBackend", + "MODEL_NAME": "x", + } + }, + "AUTO_INDEX": True, + "AUTO_INDEX_SKIP_UPDATE_FIELDS": ["last_login"], + } + ) + user = User.objects.create_user(username="u1", password="x") + user.set_password("y") + user.save() + + with mock.patch("django_graph_search.signals._dispatch_index") as dispatch: + from django.utils import timezone + + user.last_login = timezone.now() + user.save() + dispatch.assert_not_called() diff --git a/tests/test_signals_skip_update_fields.py b/tests/test_signals_skip_update_fields.py new file mode 100644 index 0000000..1630739 --- /dev/null +++ b/tests/test_signals_skip_update_fields.py @@ -0,0 +1,90 @@ +"""Пропуск AUTO_INDEX при save(update_fields=...) с «шумными» полями.""" +from __future__ import annotations + +from typing import Any, Dict +from unittest import mock + +import pytest +from django.conf import settings as django_settings + +from django_graph_search.settings import clear_graph_search_caches + +from .test_app.models import Category, Product + + +@pytest.fixture(name="graph_search_skip_settings") +def _graph_search_skip_settings_fixture(): + original = getattr(django_settings, "GRAPH_SEARCH", None) + clear_graph_search_caches() + + def _apply(payload: Dict[str, Any]): + django_settings.GRAPH_SEARCH = payload + clear_graph_search_caches() + + yield _apply + + if original is None and hasattr(django_settings, "GRAPH_SEARCH"): + delattr(django_settings, "GRAPH_SEARCH") + elif original is not None: + django_settings.GRAPH_SEARCH = original + clear_graph_search_caches() + + +@pytest.mark.django_db +def test_skip_index_when_only_skip_update_fields_touched(graph_search_skip_settings): + graph_search_skip_settings( + { + "MODELS": [ + { + "model": "test_app.Product", + "fields": ["name"], + "skip_update_fields": ["category"], + } + ], + "VECTOR_STORE": {"BACKEND": "tests.dummy_vector_backend.DummyVectorBackend"}, + "EMBEDDINGS": { + "default": { + "BACKEND": "tests.dummy_embedding_backend.DummyEmbeddingBackend", + "MODEL_NAME": "x", + } + }, + "AUTO_INDEX": True, + } + ) + cat1 = Category.objects.create(name="c1") + cat2 = Category.objects.create(name="c2") + product = Product.objects.create(name="widget", category=cat1) + + with mock.patch("django_graph_search.indexer.Indexer._index_batch") as index_batch: + product.category = cat2 + product.save(update_fields=["category"]) + assert index_batch.call_count == 0 + + product.name = "gadget" + product.save(update_fields=["name"]) + assert index_batch.call_count == 1 + + +@pytest.mark.django_db +def test_global_auto_index_skip_update_fields(graph_search_skip_settings): + graph_search_skip_settings( + { + "MODELS": [{"model": "test_app.Product", "fields": ["name"]}], + "VECTOR_STORE": {"BACKEND": "tests.dummy_vector_backend.DummyVectorBackend"}, + "EMBEDDINGS": { + "default": { + "BACKEND": "tests.dummy_embedding_backend.DummyEmbeddingBackend", + "MODEL_NAME": "x", + } + }, + "AUTO_INDEX": True, + "AUTO_INDEX_SKIP_UPDATE_FIELDS": ["name"], + } + ) + cat = Category.objects.create(name="c") + product = Product.objects.create(name="a", category=cat) + + with mock.patch("django_graph_search.indexer.Indexer._index_batch") as index_batch: + product.name = "b" + product.save(update_fields=["name"]) + assert index_batch.call_count == 0 diff --git a/tests/test_views.py b/tests/test_views.py index b043253..e8801d2 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -9,7 +9,7 @@ from django.conf import settings as django_settings from django.test import RequestFactory -from django_graph_search.settings import get_settings +from django_graph_search.settings import clear_graph_search_caches, get_settings from django_graph_search.views import SearchAPIView, StreamingSearchAPIView @@ -32,11 +32,11 @@ def _minimal_graph_search(extra: Dict[str, Any] | None = None) -> Dict[str, Any] @pytest.fixture(name="apply_view_settings") def _apply_view_settings_fixture(): original = getattr(django_settings, "GRAPH_SEARCH", None) - get_settings.cache_clear() + clear_graph_search_caches() def _apply(payload: Dict[str, Any]): django_settings.GRAPH_SEARCH = payload - get_settings.cache_clear() + clear_graph_search_caches() yield _apply @@ -44,7 +44,7 @@ def _apply(payload: Dict[str, Any]): delattr(django_settings, "GRAPH_SEARCH") elif original is not None: django_settings.GRAPH_SEARCH = original - get_settings.cache_clear() + clear_graph_search_caches() @pytest.mark.django_db diff --git a/tests/urls_admin.py b/tests/urls_admin.py new file mode 100644 index 0000000..083932c --- /dev/null +++ b/tests/urls_admin.py @@ -0,0 +1,6 @@ +from django.contrib import admin +from django.urls import path + +urlpatterns = [ + path("admin/", admin.site.urls), +]