From 4bc45b4f1561a1d9951bb0e6011bc2893a1e72b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Tolga=20Erd=C3=B6nmez?= Date: Mon, 14 Jul 2025 12:49:34 +0300 Subject: [PATCH 1/8] WIP fixed catalog generation --- .../generator/implementation/ragas/config.py | 2 + .../implementation/ragas/generator.py | 256 +++++++++++------- backend/llm_eval/tasks.py | 13 +- .../model_settings/azure_ai_model_settings.py | 7 +- backend/poetry.lock | 97 ++++++- backend/pyproject.toml | 1 + .../implementation/ragas/test_generator.py | 38 +-- 7 files changed, 272 insertions(+), 142 deletions(-) diff --git a/backend/llm_eval/qa_catalog/generator/implementation/ragas/config.py b/backend/llm_eval/qa_catalog/generator/implementation/ragas/config.py index 7f43262..6b0bdda 100644 --- a/backend/llm_eval/qa_catalog/generator/implementation/ragas/config.py +++ b/backend/llm_eval/qa_catalog/generator/implementation/ragas/config.py @@ -29,6 +29,8 @@ class RagasQACatalogGeneratorConfig(QACatalogGeneratorConfig[RagasGeneratorType] query_distribution: dict[RagasQACatalogQuerySynthesizer, float] personas: list[RagasQACatalogGeneratorPersona] | None + use_existing_knowledge_graph: bool = True + class RagasQACatalogGeneratorModelConfigSchema( QACatalogGeneratorModelConfigSchema[RagasGeneratorType] diff --git a/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py b/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py index 9916e10..54eab69 100644 --- a/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py +++ b/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py @@ -1,15 +1,8 @@ +import inspect import math -import traceback -from copy import copy from hashlib import md5 from typing import Callable, Coroutine, get_args, override -from anyio import ( - CapacityLimiter, - create_memory_object_stream, - create_task_group, -) -from anyio.streams.memory import MemoryObjectSendStream from langchain_community.document_loaders import DirectoryLoader from langchain_core.documents import Document from langchain_text_splitters import RecursiveCharacterTextSplitter @@ -25,10 +18,23 @@ MultiHopSpecificQuerySynthesizer, SingleHopSpecificQuerySynthesizer, ) -from ragas.testset.synthesizers.generate import LangchainLLMWrapper +from ragas.testset.synthesizers.generate import ( + LangchainLLMWrapper, +) from ragas.testset.synthesizers.testset_schema import Testset -from ragas.testset.transforms import apply_transforms, default_transforms -from ragas.testset.transforms.default import num_tokens_from_string +from ragas.testset.transforms import ( + EmbeddingExtractor, + HeadlinesExtractor, + HeadlineSplitter, + KeyphrasesExtractor, + SummaryExtractor, + apply_transforms, +) +from ragas.testset.transforms.default import ( + NERExtractor, + num_tokens_from_string, +) +from typing_extensions import deprecated from llm_eval.qa_catalog.generator.implementation.QACatalogGeneratorTypes import ( # noqa: E501 QACatalogGeneratorType, @@ -48,7 +54,6 @@ QACatalogGeneratorDataSourceConfig, QACatalogGeneratorLocalModelConfig, ) -from llm_eval.qa_catalog.graph_utils import create_backup from llm_eval.qa_catalog.synthetic_qa_pair import SyntheticQAPair from llm_eval.settings import SETTINGS from llm_eval.utils.decorators import retry_on_error @@ -63,6 +68,19 @@ RagasQACatalogQuerySynthesizer.MULTI_HOP_ABSTRACT: MultiHopAbstractQuerySynthesizer, } +query_synthesizer_classes_reverse: dict[ + type[BaseSynthesizer], RagasQACatalogQuerySynthesizer +] = {v: k for k, v in query_synthesizer_classes.items()} + + +def has_parameter(cls: type, param_name: str) -> bool: + """Check if a class constructor has a specific parameter.""" + try: + signature = inspect.signature(cls.__init__) + return param_name in signature.parameters + except Exception: + return False + class RagasQACatalogGenerator( QACatalogGenerator[ @@ -100,7 +118,10 @@ def __init__( raise RuntimeError(_msg) self.embeddings = LangchainEmbeddingsWrapper(azure_settings.to_embeddings()) - if not self.config.knowledge_graph_location: + if ( + not self.config.knowledge_graph_location + and self.config.use_existing_knowledge_graph + ): self.config.knowledge_graph_location = ( SETTINGS.file_upload_temp_location / "knowledge_graph_ragas.json" ) @@ -138,10 +159,14 @@ def _load_and_process_documents(self) -> list[Document]: docs = loader.load() return docs - def _create_document_nodes(self, docs: list[Document]) -> list[Node]: + def _create_knowledge_graph_nodes( + self, + docs: list[Document], + node_type: NodeType = NodeType.DOCUMENT, + ) -> list[Node]: return [ Node( - type=NodeType.DOCUMENT, + type=node_type, properties={ "page_content": doc.page_content, "document_metadata": doc.metadata, @@ -185,20 +210,24 @@ def split_documents( Therefore if the document is large enough, we split it into proper subdocuments. """ + + def _split(doc: Document, token_length: int) -> list[Document]: + chunk_size = math.ceil(token_length / 2) + chunk_overlap = math.ceil(chunk_size * 0.5) + splitter = RecursiveCharacterTextSplitter( + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + length_function=num_tokens_from_string, + ) + return splitter.split_documents([doc]) + _docs = [] split_occurred = False for doc in docs: token_length = num_tokens_from_string(doc.page_content) if token_length > 100_000: split_occurred = True - chunk_size = math.ceil(token_length / 2) - chunk_overlap = math.ceil(chunk_size * 0.5) - splitter = RecursiveCharacterTextSplitter( - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, - length_function=num_tokens_from_string, - ) - _docs.extend(splitter.split_documents([doc])) + _docs.extend(_split(doc, token_length)) else: _docs.append(doc) @@ -208,17 +237,36 @@ def split_documents( return _docs def apply_knowledge_graph_transformations( - self, kg: KnowledgeGraph, docs: list[Document] + self, + kg: KnowledgeGraph, ) -> None: - apply_transforms( - kg, - default_transforms( - self.split_documents(docs), - llm=self.llm, - embedding_model=self.embeddings, - ), + headline_extractor = HeadlinesExtractor(llm=self.llm, max_num=20) + headline_splitter = HeadlineSplitter(max_tokens=1500, min_tokens=100) + keyphrase_extractor = KeyphrasesExtractor(llm=self.llm) + + summary = SummaryExtractor(llm=self.llm) + summary_emb_extractor = EmbeddingExtractor( + embedding_model=self.embeddings, + property_name="summary_embedding", + embed_property_name="summary", + ) + + ner_extractor = NERExtractor( + llm=self.llm, + property_name="entities", ) + transforms = [ + headline_extractor, + headline_splitter, + keyphrase_extractor, + summary, + summary_emb_extractor, + ner_extractor, + ] + + apply_transforms(kg, transforms=transforms) + def create_knowledge_graph(self) -> KnowledgeGraph: """ Loads the knowledge graph if already exists @@ -231,41 +279,28 @@ def create_knowledge_graph(self) -> KnowledgeGraph: if len(docs) == 0: raise RuntimeError("No documents found") - loaded_knowledge_graph = self.load_knowledge_graph() - new_nodes = self._create_document_nodes(docs) - if loaded_knowledge_graph and self._has_same_nodes( - loaded_knowledge_graph.nodes, new_nodes - ): - logger.info("Using the existent knowledge graph") - kg = loaded_knowledge_graph - else: - logger.info("Generating a new knowledge graph") - kg = KnowledgeGraph(nodes=new_nodes) - self.apply_knowledge_graph_transformations(kg, docs) - if self.config.knowledge_graph_location: - create_backup( - self.config.knowledge_graph_location - ) # backup the old kg config - kg.save( - self.config.knowledge_graph_location - ) # save the new kg to system - logger.info( - f"Knowledge graph saved to {self.config.knowledge_graph_location}" - ) + chunks = self.split_documents(docs) + kg = KnowledgeGraph( + nodes=self._create_knowledge_graph_nodes(chunks), + ) + + self.apply_knowledge_graph_transformations(kg) return kg - def load_knowledge_graph(self) -> KnowledgeGraph | None: + @deprecated("Until we have a better way to handle knowledge graph caching") + def load_exiting_knowledge_graph( + self, + docs: list[Document], + ) -> KnowledgeGraph | None: if not self.config.knowledge_graph_location: return None - knowledge_graph: KnowledgeGraph | None = None - if not self.config.knowledge_graph_location.exists(): logger.info( f"No knowledge graph found at {self.config.knowledge_graph_location}" ) - return knowledge_graph + return None logger.info(f"Knowledge graph found at {self.config.knowledge_graph_location}") try: @@ -276,53 +311,71 @@ def load_knowledge_graph(self) -> KnowledgeGraph | None: self.config.knowledge_graph_location.absolute().with_suffix(".bak") ) - return knowledge_graph + new_nodes = self._create_knowledge_graph_nodes(docs) + if knowledge_graph and self._has_same_nodes(knowledge_graph.nodes, new_nodes): + return knowledge_graph + + return None + + def create_query_distribution( + self, + knowledge_graph: KnowledgeGraph, + sample_count: int, + ) -> list[tuple[BaseSynthesizer, float]]: + properties = ["headlines", "keyphrases", "entities"] + + selected_synthesizer_classes = ( + query_synthesizer_classes[q] for q in self.config.query_distribution.keys() + ) + + synthesizers = [] + for synthesizer_class in selected_synthesizer_classes: + if has_parameter(synthesizer_class, "property_name"): + for property_name in properties: + synthesizers.append( + synthesizer_class( + llm=self.llm, + property_name=property_name, # type: ignore + ) + ) + else: + synthesizers.append(synthesizer_class(llm=self.llm)) + + available_queries = [] + for query in synthesizers: + if query.get_node_clusters(knowledge_graph): + available_queries.append(query) + + return [(query, 1 / len(available_queries)) for query in available_queries] @retry_on_error((Exception,), 3) def generate_testset( self, generator: TestsetGenerator, - count: int, + sample_count: int, + query_distribution: list[tuple[BaseSynthesizer, float]], ) -> Testset: + if len(query_distribution) == 0: + raise ValueError("No query distribution provided") + return generator.generate( - count, - query_distribution=[ - (query_synthesizer_classes[synthesizer_type](llm=self.llm), weight) - for synthesizer_type, weight in self.config.query_distribution.items() - if weight > 0 - ], + sample_count, + query_distribution, ) - async def _generate_single_sample( - self, - generator: TestsetGenerator, - send_sample: MemoryObjectSendStream[SyntheticQAPair], - limiter: CapacityLimiter, - ) -> None: - try: - async with limiter: - testset = self.generate_testset(generator, 1) - if testset.samples: - async with send_sample: - for testset_sample in testset.samples: - sample = ragas_sample_to_synthetic_qa_pair(testset_sample) - await send_sample.send(sample) - else: - logger.error(f"empty testset {testset.samples} {testset.to_list()}") - except Exception as e: - logger.error( - f"Error generating sample: {e}\nTraceback: {traceback.format_exc()}" - ) - async def a_create_synthetic_qa( self, process_sample: Callable[[SyntheticQAPair], Coroutine], ) -> None: - limiter = CapacityLimiter(SETTINGS.ragas.parallel_generation_limit) - send_sample, receive_sample = create_memory_object_stream[SyntheticQAPair]() - kg = self.create_knowledge_graph() + if not self.personas: + from ragas.testset.persona import generate_personas_from_kg + + self.personas = generate_personas_from_kg(kg, self.llm) + if not self.personas: + raise ValueError("Failed to generate personas") + logger.info(f"Generating {self.config.sample_count} QA pairs") generator = TestsetGenerator( @@ -332,20 +385,21 @@ async def a_create_synthetic_qa( persona_list=self.personas, ) - async with create_task_group() as tg: - async with send_sample: - for _ in range(self.config.sample_count): - tg.start_soon( - self._generate_single_sample, - copy(generator), - send_sample.clone(), - limiter, - ) + query_distribution = self.create_query_distribution( + kg, self.config.sample_count + ) + + testset = self.generate_testset( + generator, + self.config.sample_count, + query_distribution, + ) + if not testset.samples: + raise ValueError("Empty testset") - async with receive_sample: - logger.info("Waiting for generated qa samples") - async for sample in receive_sample: - await process_sample(sample) + for testset_sample in testset.samples: + sample = ragas_sample_to_synthetic_qa_pair(testset_sample) + await process_sample(sample) logger.info(f"Generated {self.config.sample_count} QA sample sets") diff --git a/backend/llm_eval/tasks.py b/backend/llm_eval/tasks.py index 7d3f93a..bbb451e 100644 --- a/backend/llm_eval/tasks.py +++ b/backend/llm_eval/tasks.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import Any from celery import Celery from celery.bootsteps import StartStopStep @@ -18,12 +19,12 @@ class LivenessProbe(StartStopStep): requires = {"celery.worker.components:Timer"} - def __init__(self, parent: any, **kwargs: dict[str, any]) -> None: + def __init__(self, parent, **kwargs: dict[str, Any]) -> None: # noqa: ANN001 super().__init__(parent, **kwargs) self.requests = [] self.tref = None - def start(self, parent: any) -> None: + def start(self, parent) -> None: # noqa: ANN001 self.tref = parent.timer.call_repeatedly( 1.0, self._update_heartbeat_file, @@ -31,21 +32,21 @@ def start(self, parent: any) -> None: priority=10, ) - def stop(self, parent: any) -> None: + def stop(self, parent) -> None: # noqa: ANN001 HEARTBEAT_FILE.unlink(missing_ok=True) # noinspection PyMethodMayBeStatic - def _update_heartbeat_file(self, parent: any) -> None: + def _update_heartbeat_file(self, parent) -> None: # noqa: ANN001 HEARTBEAT_FILE.touch() @worker_ready.connect -def worker_ready(**_: dict[str, any]) -> None: +def _worker_ready(**_: dict[str, Any]) -> None: READINESS_FILE.touch() @worker_shutdown.connect -def worker_shutdown(**_: dict[str, any]) -> None: +def _worker_shutdown(**_: dict[str, Any]) -> None: READINESS_FILE.unlink(missing_ok=True) diff --git a/backend/llm_eval/utils/model_settings/azure_ai_model_settings.py b/backend/llm_eval/utils/model_settings/azure_ai_model_settings.py index 71fbccd..6ad7e42 100644 --- a/backend/llm_eval/utils/model_settings/azure_ai_model_settings.py +++ b/backend/llm_eval/utils/model_settings/azure_ai_model_settings.py @@ -11,13 +11,14 @@ class AzureAiModelSettings(BaseSettings, ModelSettings, prefix="AZURE_OPENAI_"): api_version: str = Field(default="2024-02-01") endpoint: str = Field(default="https://example.com") api_key: str = Field(default="api-key") - deployment: str = Field(default="text-embedding-3-large") + embedding_deployment: str = Field(default="text-embedding-3-large") + chat_deployment: str = Field(default="gpt-4.1") response_language: str = Field(default="german") def to_chat(self) -> BaseChatModel: return AzureChatOpenAI( api_version=self.api_version, - azure_deployment=self.deployment, + azure_deployment=self.chat_deployment, azure_endpoint=self.endpoint, api_key=SecretStr(self.api_key), ) @@ -25,7 +26,7 @@ def to_chat(self) -> BaseChatModel: def to_embeddings(self) -> Embeddings: return AzureOpenAIEmbeddings( api_version=self.api_version, - azure_deployment=self.deployment, + azure_deployment=self.embedding_deployment, azure_endpoint=self.endpoint, api_key=SecretStr(self.api_key), ) diff --git a/backend/poetry.lock b/backend/poetry.lock index 3638602..42fc9ae 100644 --- a/backend/poetry.lock +++ b/backend/poetry.lock @@ -180,7 +180,7 @@ version = "5.3.1" description = "Low-level AMQP client for Python (fork of amqplib)." optional = false python-versions = ">=3.6" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "amqp-5.3.1-py3-none-any.whl", hash = "sha256:43b3319e1b4e7d1251833a93d672b4af1e40f3d632d479b98661a95f117880a2"}, {file = "amqp-5.3.1.tar.gz", hash = "sha256:cddc00c725449522023bad949f70fff7b48f0b1ade74d170a6f10ab044739432"}, @@ -487,7 +487,7 @@ version = "4.2.1" description = "Python multiprocessing fork with improvements and bugfixes" optional = false python-versions = ">=3.7" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "billiard-4.2.1-py3-none-any.whl", hash = "sha256:40b59a4ac8806ba2c2369ea98d876bc6108b051c227baffd928c644d15d8f3cb"}, {file = "billiard-4.2.1.tar.gz", hash = "sha256:12b641b0c539073fc8d3f5b8b7be998956665c4233c7c1fcd66a7e677c4fb36f"}, @@ -580,7 +580,7 @@ version = "5.5.3" description = "Distributed Task Queue." optional = false python-versions = ">=3.8" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "celery-5.5.3-py3-none-any.whl", hash = "sha256:0b5761a07057acee94694464ca482416b959568904c9dfa41ce8413a7d65d525"}, {file = "celery-5.5.3.tar.gz", hash = "sha256:6c972ae7968c2b5281227f01c3a3f984037d21c5129d07bf3550cc2afc6b10a5"}, @@ -946,7 +946,7 @@ version = "0.3.1" description = "Enables git-like *did-you-mean* feature in click" optional = false python-versions = ">=3.6.2" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "click_didyoumean-0.3.1-py3-none-any.whl", hash = "sha256:5c4bb6007cfea5f2fd6583a2fb6701a22a41eb98957e63d0fac41c10e7c3117c"}, {file = "click_didyoumean-0.3.1.tar.gz", hash = "sha256:4f82fdff0dbe64ef8ab2279bd6aa3f6a99c3b28c05aa09cbfc07c9d7fbb5a463"}, @@ -961,7 +961,7 @@ version = "1.1.1" description = "An extension module for click to enable registering CLI commands via setuptools entry-points." optional = false python-versions = "*" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "click-plugins-1.1.1.tar.gz", hash = "sha256:46ab999744a9d831159c3411bb0c79346d94a444df9a3a3742e9ed63645f264b"}, {file = "click_plugins-1.1.1-py2.py3-none-any.whl", hash = "sha256:5d262006d3222f5057fd81e1623d4443e41dcda5dc815c06b442aa3c02889fc8"}, @@ -979,7 +979,7 @@ version = "0.3.0" description = "REPL plugin for Click" optional = false python-versions = ">=3.6" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "click-repl-0.3.0.tar.gz", hash = "sha256:17849c23dba3d667247dc4defe1757fff98694e90fe37474f3feebb69ced26a9"}, {file = "click_repl-0.3.0-py3-none-any.whl", hash = "sha256:fb7e06deb8da8de86180a33a9da97ac316751c094c6899382da7feeeeb51b812"}, @@ -1574,6 +1574,25 @@ files = [ {file = "flatbuffers-25.2.10.tar.gz", hash = "sha256:97e451377a41262f8d9bd4295cc836133415cc03d8cb966410a4af92eb00d26e"}, ] +[[package]] +name = "flower" +version = "2.0.1" +description = "Celery Flower" +optional = false +python-versions = ">=3.7" +groups = ["dev"] +files = [ + {file = "flower-2.0.1-py2.py3-none-any.whl", hash = "sha256:9db2c621eeefbc844c8dd88be64aef61e84e2deb29b271e02ab2b5b9f01068e2"}, + {file = "flower-2.0.1.tar.gz", hash = "sha256:5ab717b979530770c16afb48b50d2a98d23c3e9fe39851dcf6bc4d01845a02a0"}, +] + +[package.dependencies] +celery = ">=5.0.5" +humanize = "*" +prometheus-client = ">=0.8.0" +pytz = "*" +tornado = ">=5.0.0,<7.0.0" + [[package]] name = "frozenlist" version = "1.7.0" @@ -2151,6 +2170,21 @@ files = [ [package.dependencies] pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_version >= \"3.8\""} +[[package]] +name = "humanize" +version = "4.12.3" +description = "Python humanize utilities" +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "humanize-4.12.3-py3-none-any.whl", hash = "sha256:2cbf6370af06568fa6d2da77c86edb7886f3160ecd19ee1ffef07979efc597f6"}, + {file = "humanize-4.12.3.tar.gz", hash = "sha256:8430be3a615106fdfceb0b2c1b41c4c98c6b0fc5cc59663a5539b111dd325fb0"}, +] + +[package.extras] +tests = ["freezegun", "pytest", "pytest-cov"] + [[package]] name = "identify" version = "2.6.12" @@ -2530,7 +2564,7 @@ version = "5.5.4" description = "Messaging library for Python." optional = false python-versions = ">=3.8" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "kombu-5.5.4-py3-none-any.whl", hash = "sha256:a12ed0557c238897d8e518f1d1fdf84bd1516c5e305af2dacd85c2015115feb8"}, {file = "kombu-5.5.4.tar.gz", hash = "sha256:886600168275ebeada93b888e831352fe578168342f0d1d5833d88ba0d847363"}, @@ -4352,13 +4386,28 @@ nodeenv = ">=0.11.1" pyyaml = ">=5.1" virtualenv = ">=20.10.0" +[[package]] +name = "prometheus-client" +version = "0.22.1" +description = "Python client for the Prometheus monitoring system." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "prometheus_client-0.22.1-py3-none-any.whl", hash = "sha256:cca895342e308174341b2cbf99a56bef291fbc0ef7b9e5412a0f26d653ba7094"}, + {file = "prometheus_client-0.22.1.tar.gz", hash = "sha256:190f1331e783cf21eb60bca559354e0a4d4378facecf78f5428c39b675d20d28"}, +] + +[package.extras] +twisted = ["twisted"] + [[package]] name = "prompt-toolkit" version = "3.0.51" description = "Library for building powerful interactive command lines in Python" optional = false python-versions = ">=3.8" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "prompt_toolkit-3.0.51-py3-none-any.whl", hash = "sha256:52742911fde84e2d423e2f9a4cf1de7d7ac4e51958f648d9540e0fb8db077b07"}, {file = "prompt_toolkit-3.0.51.tar.gz", hash = "sha256:931a162e3b27fc90c86f1b48bb1fb2c528c2761475e57c9c06de13311c7b54ed"}, @@ -5145,7 +5194,7 @@ version = "2025.2" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, @@ -6208,6 +6257,28 @@ dev = ["tokenizers[testing]"] docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests", "ruff"] +[[package]] +name = "tornado" +version = "6.5.1" +description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." +optional = false +python-versions = ">=3.9" +groups = ["dev"] +files = [ + {file = "tornado-6.5.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d50065ba7fd11d3bd41bcad0825227cc9a95154bad83239357094c36708001f7"}, + {file = "tornado-6.5.1-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:9e9ca370f717997cb85606d074b0e5b247282cf5e2e1611568b8821afe0342d6"}, + {file = "tornado-6.5.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b77e9dfa7ed69754a54c89d82ef746398be82f749df69c4d3abe75c4d1ff4888"}, + {file = "tornado-6.5.1-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:253b76040ee3bab8bcf7ba9feb136436a3787208717a1fb9f2c16b744fba7331"}, + {file = "tornado-6.5.1-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:308473f4cc5a76227157cdf904de33ac268af770b2c5f05ca6c1161d82fdd95e"}, + {file = "tornado-6.5.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:caec6314ce8a81cf69bd89909f4b633b9f523834dc1a352021775d45e51d9401"}, + {file = "tornado-6.5.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:13ce6e3396c24e2808774741331638ee6c2f50b114b97a55c5b442df65fd9692"}, + {file = "tornado-6.5.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5cae6145f4cdf5ab24744526cc0f55a17d76f02c98f4cff9daa08ae9a217448a"}, + {file = "tornado-6.5.1-cp39-abi3-win32.whl", hash = "sha256:e0a36e1bc684dca10b1aa75a31df8bdfed656831489bc1e6a6ebed05dc1ec365"}, + {file = "tornado-6.5.1-cp39-abi3-win_amd64.whl", hash = "sha256:908e7d64567cecd4c2b458075589a775063453aeb1d2a1853eedb806922f568b"}, + {file = "tornado-6.5.1-cp39-abi3-win_arm64.whl", hash = "sha256:02420a0eb7bf617257b9935e2b754d1b63897525d8a289c9d65690d580b4dcf7"}, + {file = "tornado-6.5.1.tar.gz", hash = "sha256:84ceece391e8eb9b2b95578db65e920d2a61070260594819589609ba9bc6308c"}, +] + [[package]] name = "tqdm" version = "4.67.1" @@ -6320,7 +6391,7 @@ version = "2025.2" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, @@ -6429,7 +6500,7 @@ version = "5.1.0" description = "Python promises." optional = false python-versions = ">=3.6" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "vine-5.1.0-py3-none-any.whl", hash = "sha256:40fdf3c48b2cfe1c38a49e9ae2da6fda88e4794c810050a728bd7413811fb1dc"}, {file = "vine-5.1.0.tar.gz", hash = "sha256:8b62e981d35c41049211cf62a0a1242d8c1ee9bd15bb196ce38aefd6799e61e0"}, @@ -6581,7 +6652,7 @@ version = "0.2.13" description = "Measures the displayed width of unicode strings in a terminal" optional = false python-versions = "*" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, @@ -7191,4 +7262,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" python-versions = "^3.12, <3.13" -content-hash = "dc93f47ad2ffea514b170c7ba65f07ba1bfa9911b86475fbf2b39f1ed0bd6702" +content-hash = "65ef31e6b64908593eb4138843da98db613db46bf22b69f832eab728bcaabf39" diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 8626e39..b2c2621 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -45,6 +45,7 @@ ruff = "^0.11.2" langchain-chroma = "^0.1.2" # just needed for local query debugpy = "^1.8.8" black = "^24.10.0" +flower = "^2.0.1" [tool.poetry.group.test.dependencies] diff --git a/backend/tests/unit/qa_catalog/generator/implementation/ragas/test_generator.py b/backend/tests/unit/qa_catalog/generator/implementation/ragas/test_generator.py index 8bae4e4..58c391d 100644 --- a/backend/tests/unit/qa_catalog/generator/implementation/ragas/test_generator.py +++ b/backend/tests/unit/qa_catalog/generator/implementation/ragas/test_generator.py @@ -204,7 +204,7 @@ async def test_ragas_generator_create_document_nodes( filenames: list[str], ) -> None: with ragas_generator_factory() as generator: - nodes = generator._create_document_nodes(documents) + nodes = generator._create_knowledge_graph_nodes(documents) assert len(nodes) == len(documents) assert all( @@ -220,15 +220,15 @@ async def test_ragas_generator_has_same_nodes( ) -> None: with ragas_generator_factory() as generator: shuffle(documents) - nodes1 = generator._create_document_nodes(documents) + nodes1 = generator._create_knowledge_graph_nodes(documents) shuffle(documents) - nodes2 = generator._create_document_nodes(documents) + nodes2 = generator._create_knowledge_graph_nodes(documents) assert generator._has_same_nodes(nodes1, nodes2) shuffle(documents) documents[0].page_content = "modified content" - nodes3 = generator._create_document_nodes(documents) + nodes3 = generator._create_knowledge_graph_nodes(documents) assert not generator._has_same_nodes(nodes1, nodes3) @@ -245,8 +245,8 @@ async def test_ragas_generator_create_knowledge_graph_new_successfully( ) -> None: with ragas_generator_factory() as generator: generator._load_and_process_documents = MagicMock(return_value=documents) - generator._create_document_nodes = MagicMock(return_value=nodes) - generator.load_knowledge_graph = MagicMock(return_value=None) + generator._create_knowledge_graph_nodes = MagicMock(return_value=nodes) + generator.load_exiting_knowledge_graph = MagicMock(return_value=None) generator.apply_knowledge_graph_transformations = MagicMock() created_kg = mock_knowledge_graph(nodes=nodes) @@ -254,14 +254,14 @@ async def test_ragas_generator_create_knowledge_graph_new_successfully( kg = generator.create_knowledge_graph() - generator._create_document_nodes.assert_called_once_with(documents) - generator.load_knowledge_graph.assert_called_once() + generator._create_knowledge_graph_nodes.assert_called_once_with(documents) + generator.load_exiting_knowledge_graph.assert_called_once() mock_create_backup.assert_called_once_with( generator.config.knowledge_graph_location ) mock_knowledge_graph.assert_called_with(nodes=nodes) generator.apply_knowledge_graph_transformations.assert_called_once_with( - created_kg, documents + created_kg ) assert kg is not None @@ -276,11 +276,11 @@ async def test_ragas_generator_create_knowledge_graph_use_existing_graph_when_no ) -> None: with ragas_generator_factory() as generator: generator._load_and_process_documents = MagicMock(return_value=documents) - generator._create_document_nodes = MagicMock(return_value=nodes) + generator._create_knowledge_graph_nodes = MagicMock(return_value=nodes) generator.config.knowledge_graph_location = None generator.apply_knowledge_graph_transformations = MagicMock(return_value=None) existing_graph = MagicMock(name="existing_graph", nodes=nodes) - generator.load_knowledge_graph = MagicMock(return_value=existing_graph) + generator.load_exiting_knowledge_graph = MagicMock(return_value=existing_graph) mock_knowledge_graph.return_value = MagicMock(name="generated_graph") kg: MagicMock = generator.create_knowledge_graph() # type: ignore @@ -312,7 +312,7 @@ async def test_ragas_generator_load_knowledge_graph( generator.config.knowledge_graph_location.exists = MagicMock(return_value=True) mock_knowledge_graph.load.return_value = MagicMock(name="existing_graph") - kg = generator.load_knowledge_graph() + kg = generator.load_exiting_knowledge_graph() mock_knowledge_graph.load.assert_called_once_with( generator.config.knowledge_graph_location @@ -330,7 +330,7 @@ async def test_ragas_generator_load_knowledge_graph_fails_on_non_existent_file( generator.config.knowledge_graph_location = MagicMock() generator.config.knowledge_graph_location.exists = MagicMock(return_value=False) - kg = generator.load_knowledge_graph() + kg = generator.load_exiting_knowledge_graph() assert kg is None @@ -342,7 +342,7 @@ async def test_ragas_generator_load_knowledge_graph_fails_on_none_location( with ragas_generator_factory() as generator: generator.config.knowledge_graph_location = None - kg = generator.load_knowledge_graph() + kg = generator.load_exiting_knowledge_graph() assert kg is None @@ -362,7 +362,7 @@ async def test_ragas_generator_load_knowledge_graph_fails_on_load( generator.config.knowledge_graph_location.exists = MagicMock(return_value=True) mock_knowledge_graph.load.side_effect = RuntimeError("Load failed") - kg = generator.load_knowledge_graph() + kg = generator.load_exiting_knowledge_graph() mock_knowledge_graph.load.assert_called_once_with( generator.config.knowledge_graph_location @@ -403,7 +403,7 @@ async def test_ragas_generator_generate_single_sample( send_sample.send = AsyncMock() limiter = MagicMock() - await generator._generate_single_sample(MagicMock(), send_sample, limiter) + await generator._generate_samples(MagicMock(), send_sample, limiter) send_sample.send.assert_called_once_with("sample") @@ -421,7 +421,7 @@ async def test_ragas_generator_generate_single_sample_wont_send_when_fails( send_sample.send = AsyncMock() limiter = MagicMock() - await generator._generate_single_sample(MagicMock(), send_sample, limiter) + await generator._generate_samples(MagicMock(), send_sample, limiter) send_sample.send.assert_not_called() @@ -437,7 +437,7 @@ async def test_ragas_generator_generate_single_sample_does_nothing_on_empty_gene send_sample.send = AsyncMock() limiter = MagicMock() - await generator._generate_single_sample(MagicMock(), send_sample, limiter) + await generator._generate_samples(MagicMock(), send_sample, limiter) send_sample.send.assert_not_called() @@ -483,7 +483,7 @@ async def mock_generate( async with send_sample: await send_sample.send(sample) - generator._generate_single_sample = AsyncMock(side_effect=mock_generate) + generator._generate_samples = AsyncMock(side_effect=mock_generate) samples = [] From 672f53d59371712b71177ee8c57d0b28fce9c795 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Tolga=20Erd=C3=B6nmez?= Date: Mon, 14 Jul 2025 13:39:02 +0300 Subject: [PATCH 2/8] catalog generation and db connection fixed --- .../implementation/ragas/generator.py | 19 ++-- .../qa_catalog/generator/interface.py | 3 +- .../llm_eval/qa_catalog/logic/generation.py | 90 +++++++++---------- .../tasks/handle_generate_catalog_task.py | 7 +- .../qa_catalog/logic/test_generation.py | 5 +- .../implementation/ragas/test_generator.py | 6 +- frontend/messages/de.json | 3 +- frontend/messages/en.json | 3 +- 8 files changed, 70 insertions(+), 66 deletions(-) diff --git a/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py b/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py index 54eab69..30ebb50 100644 --- a/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py +++ b/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py @@ -320,7 +320,6 @@ def load_exiting_knowledge_graph( def create_query_distribution( self, knowledge_graph: KnowledgeGraph, - sample_count: int, ) -> list[tuple[BaseSynthesizer, float]]: properties = ["headlines", "keyphrases", "entities"] @@ -358,6 +357,7 @@ def generate_testset( if len(query_distribution) == 0: raise ValueError("No query distribution provided") + # Given sample count may not match the outputted sample count return generator.generate( sample_count, query_distribution, @@ -365,7 +365,7 @@ def generate_testset( async def a_create_synthetic_qa( self, - process_sample: Callable[[SyntheticQAPair], Coroutine], + collect_samples: Callable[[list[SyntheticQAPair]], Coroutine], ) -> None: kg = self.create_knowledge_graph() @@ -385,9 +385,7 @@ async def a_create_synthetic_qa( persona_list=self.personas, ) - query_distribution = self.create_query_distribution( - kg, self.config.sample_count - ) + query_distribution = self.create_query_distribution(kg) testset = self.generate_testset( generator, @@ -397,11 +395,14 @@ async def a_create_synthetic_qa( if not testset.samples: raise ValueError("Empty testset") - for testset_sample in testset.samples: - sample = ragas_sample_to_synthetic_qa_pair(testset_sample) - await process_sample(sample) + samples: list[SyntheticQAPair] = [ + ragas_sample_to_synthetic_qa_pair(testset_sample) + for testset_sample in testset.samples + ] + + await collect_samples(samples) - logger.info(f"Generated {self.config.sample_count} QA sample sets") + logger.info(f"Generated {len(samples)} QA sample sets") @override @staticmethod diff --git a/backend/llm_eval/qa_catalog/generator/interface.py b/backend/llm_eval/qa_catalog/generator/interface.py index a4ebf64..ff57a3d 100644 --- a/backend/llm_eval/qa_catalog/generator/interface.py +++ b/backend/llm_eval/qa_catalog/generator/interface.py @@ -121,5 +121,6 @@ class AsyncQACatalogGeneratorSupport(ABC): @abstractmethod async def a_create_synthetic_qa( - self, process_sample: Callable[[SyntheticQAPair], Coroutine] + self, + collect_samples: Callable[[list[SyntheticQAPair]], Coroutine], ) -> None: ... diff --git a/backend/llm_eval/qa_catalog/logic/generation.py b/backend/llm_eval/qa_catalog/logic/generation.py index c4b9709..a27618c 100644 --- a/backend/llm_eval/qa_catalog/logic/generation.py +++ b/backend/llm_eval/qa_catalog/logic/generation.py @@ -8,7 +8,6 @@ QACatalogGeneratorTempDataSourceConfig, QACatalogStatus, ) -from llm_eval.db import AsyncSessionLocal from llm_eval.llm_endpoints.db.find_llm_endpoint import find_llm_endpoint from llm_eval.qa_catalog.db.crud_data_source_config import ( delete_data_source_config, @@ -69,23 +68,21 @@ async def _generate_catalog_from_config( if isinstance(generator, AsyncQACatalogGeneratorSupport): - async def process_sample(sample: SyntheticQAPair) -> None: - if sample: - async with AsyncSessionLocal.begin() as db: - store_qa_pairs( - db, - synthethic_qa_pairs_to_db_models([sample], qa_catalog.id), - ) + async def collect_samples(samples: list[SyntheticQAPair]) -> None: + if samples: + store_qa_pairs( + db, + synthethic_qa_pairs_to_db_models(samples, qa_catalog.id), + ) - await generator.a_create_synthetic_qa(process_sample) + await generator.a_create_synthetic_qa(collect_samples) elif isinstance(generator, SyncQACatalogGeneratorSupport): qa_pairs = generator.create_synthetic_qa() - async with AsyncSessionLocal.begin() as db: - store_qa_pairs( - db, - synthethic_qa_pairs_to_db_models(qa_pairs, qa_catalog.id), - ) + store_qa_pairs( + db, + synthethic_qa_pairs_to_db_models(qa_pairs, qa_catalog.id), + ) else: raise RuntimeError(f"{generator} doesnt support sync/async qa generation") @@ -109,38 +106,37 @@ async def generation_cleanup( await delete_data_source_config(db, temp_data_source_config) -async def generate_catalog(catalog_id: str, data: QACatalogGenerationData) -> None: - async with AsyncSessionLocal.begin() as db: - qa_catalog = await find_qa_catalog(db, catalog_id) - temp_data_source_config = await find_data_source_config( - db, data.data_source_config_id - ) +async def generate_catalog( + db: AsyncSession, catalog_id: str, data: QACatalogGenerationData +) -> None: + qa_catalog = await find_qa_catalog(db, catalog_id) + temp_data_source_config = await find_data_source_config( + db, data.data_source_config_id + ) - if qa_catalog: - if not temp_data_source_config: - qa_catalog.status = QACatalogStatus.FAILURE - qa_catalog.error = ( - "Given data source config with id" - f"{data.data_source_config_id} couldn't be found" + if qa_catalog: + if not temp_data_source_config: + qa_catalog.status = QACatalogStatus.FAILURE + qa_catalog.error = ( + "Given data source config with id" + f"{data.data_source_config_id} couldn't be found" + ) + else: + try: + data_source_config = QACatalogGeneratorDataSourceConfig.from_db_json( + temp_data_source_config.config ) - else: - try: - data_source_config = ( - QACatalogGeneratorDataSourceConfig.from_db_json( - temp_data_source_config.config - ) - ) - await _generate_catalog_from_config( - db, - qa_catalog, - data.config, - data.model_config_schema, - data_source_config, - ) - await generation_cleanup(db, temp_data_source_config) # noqa: F821 - except Exception as e: - logger.error( - f"Error generating qa pairs for {qa_catalog.id}\nError: {e}" - ) - qa_catalog.status = QACatalogStatus.FAILURE - qa_catalog.error = repr(e) + await _generate_catalog_from_config( + db, + qa_catalog, + data.config, + data.model_config_schema, + data_source_config, + ) + await generation_cleanup(db, temp_data_source_config) # noqa: F821 + except Exception as e: + logger.error( + f"Error generating qa pairs for {qa_catalog.id}\nError: {e}" + ) + qa_catalog.status = QACatalogStatus.FAILURE + qa_catalog.error = repr(e) diff --git a/backend/llm_eval/qa_catalog/tasks/handle_generate_catalog_task.py b/backend/llm_eval/qa_catalog/tasks/handle_generate_catalog_task.py index d657bd3..22bdbbb 100644 --- a/backend/llm_eval/qa_catalog/tasks/handle_generate_catalog_task.py +++ b/backend/llm_eval/qa_catalog/tasks/handle_generate_catalog_task.py @@ -1,8 +1,9 @@ from loguru import logger +from sqlalchemy.ext.asyncio import AsyncSession from llm_eval.qa_catalog.models import QACatalogGenerationData from llm_eval.tasks import app -from llm_eval.utils.task import async_task +from llm_eval.utils.task import async_task, with_session def submit_generate_catalog_task( @@ -20,7 +21,9 @@ def submit_generate_catalog_task( max_retries=10, ) @async_task +@with_session async def execute_generate_catalog_task( + session: AsyncSession, catalog_id: str, json_data: str, ) -> None: @@ -32,4 +35,4 @@ async def execute_generate_catalog_task( ) from llm_eval.qa_catalog.logic.generation import generate_catalog - await generate_catalog(catalog_id, data) + await generate_catalog(session, catalog_id, data) diff --git a/backend/tests/unit/backend/qa_catalog/logic/test_generation.py b/backend/tests/unit/backend/qa_catalog/logic/test_generation.py index 8cd4d69..3319995 100644 --- a/backend/tests/unit/backend/qa_catalog/logic/test_generation.py +++ b/backend/tests/unit/backend/qa_catalog/logic/test_generation.py @@ -308,9 +308,10 @@ def create_model_configuration_from_kwargs( return kwargs async def a_create_synthetic_qa( - self, process_sample: Callable[[SyntheticQAPair], Awaitable[Any]] + self, + process_samples: Callable[[list[SyntheticQAPair]], Awaitable[Any]], ) -> None: - await process_sample(dummy_pair) + await process_samples([dummy_pair]) def create_synthetic_qa(self) -> list[SyntheticQAPair]: return [dummy_pair] diff --git a/backend/tests/unit/qa_catalog/generator/implementation/ragas/test_generator.py b/backend/tests/unit/qa_catalog/generator/implementation/ragas/test_generator.py index 58c391d..2a3f7b6 100644 --- a/backend/tests/unit/qa_catalog/generator/implementation/ragas/test_generator.py +++ b/backend/tests/unit/qa_catalog/generator/implementation/ragas/test_generator.py @@ -487,10 +487,10 @@ async def mock_generate( samples = [] - async def process_sample(sample: SyntheticQAPair) -> None: - samples.append(sample) + async def process_samples_fn(samples_batch: list[SyntheticQAPair]) -> None: + samples.extend(samples_batch) - await generator.a_create_synthetic_qa(process_sample) + await generator.a_create_synthetic_qa(process_samples_fn) assert len(samples) == generator.config.sample_count diff --git a/frontend/messages/de.json b/frontend/messages/de.json index 4433843..9c09060 100644 --- a/frontend/messages/de.json +++ b/frontend/messages/de.json @@ -693,7 +693,8 @@ "action": { "delete": "Löschen", "edit": "Bearbeiten", - "undo": "Rückgängig" + "undo": "Rückgängig", + "view": "Details" }, "add": "Hinzufügen", "addContext": "Kontext hinzufügen", diff --git a/frontend/messages/en.json b/frontend/messages/en.json index fa379ce..210562c 100644 --- a/frontend/messages/en.json +++ b/frontend/messages/en.json @@ -692,7 +692,8 @@ "action": { "delete": "Delete", "edit": "Edit", - "undo": "Undo" + "undo": "Undo", + "view": "Details" }, "add": "Add", "addContext": "add context", From 28f5d1382d1835efa4a0f47d80d939153ecb0a6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Tolga=20Erd=C3=B6nmez?= Date: Mon, 14 Jul 2025 14:17:36 +0300 Subject: [PATCH 3/8] catalog generation unit tests fixed --- .../implementation/ragas/generator.py | 1 + .../qa_catalog/logic/test_generation.py | 12 +- .../implementation/ragas/test_generator.py | 197 ++++++++---------- 3 files changed, 87 insertions(+), 123 deletions(-) diff --git a/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py b/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py index 30ebb50..a10d911 100644 --- a/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py +++ b/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py @@ -303,6 +303,7 @@ def load_exiting_knowledge_graph( return None logger.info(f"Knowledge graph found at {self.config.knowledge_graph_location}") + knowledge_graph = None try: knowledge_graph = KnowledgeGraph.load(self.config.knowledge_graph_location) except Exception as e: diff --git a/backend/tests/unit/backend/qa_catalog/logic/test_generation.py b/backend/tests/unit/backend/qa_catalog/logic/test_generation.py index 3319995..8a20095 100644 --- a/backend/tests/unit/backend/qa_catalog/logic/test_generation.py +++ b/backend/tests/unit/backend/qa_catalog/logic/test_generation.py @@ -124,12 +124,8 @@ def catalog_generation_data( def session_local() -> MockSessionLocal: @contextmanager def _func(): # noqa: ANN202 - with patch( - "llm_eval.qa_catalog.logic.generation.AsyncSessionLocal", - ) as session_local: - mock_session = AsyncMock() - session_local.begin.return_value.__aenter__.return_value = mock_session - yield mock_session + mock_session = AsyncMock() + yield mock_session return _func @@ -153,7 +149,7 @@ async def test_generate_catalog_task_happy_path( mock_find_qa_catalog.return_value = qa_catalog mock_find_data_source_config.return_value = temp_data_source_config - await generate_catalog(qa_catalog.id, catalog_generation_data) + await generate_catalog(mock_session, qa_catalog.id, catalog_generation_data) mock_find_qa_catalog.assert_called_once_with(mock_session, qa_catalog.id) mock_find_data_source_config.assert_called_once_with( @@ -194,7 +190,7 @@ async def test_generate_catalog_task_temp_data_source_config_not_found( mock_find_qa_catalog.return_value = qa_catalog mock_find_data_source_config.return_value = None - await generate_catalog(qa_catalog.id, catalog_generation_data) + await generate_catalog(mock_session, qa_catalog.id, catalog_generation_data) mock_find_qa_catalog.assert_called_once_with(mock_session, qa_catalog.id) mock_find_data_source_config.assert_called_once_with( diff --git a/backend/tests/unit/qa_catalog/generator/implementation/ragas/test_generator.py b/backend/tests/unit/qa_catalog/generator/implementation/ragas/test_generator.py index 2a3f7b6..6d42009 100644 --- a/backend/tests/unit/qa_catalog/generator/implementation/ragas/test_generator.py +++ b/backend/tests/unit/qa_catalog/generator/implementation/ragas/test_generator.py @@ -2,11 +2,9 @@ from pathlib import Path from random import shuffle from typing import Callable, ContextManager, Generator -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import MagicMock, patch import pytest -from anyio import CapacityLimiter -from anyio.streams.memory import MemoryObjectSendStream from langchain_core.documents import Document from ragas.testset.graph import Node, NodeType @@ -234,11 +232,9 @@ async def test_ragas_generator_has_same_nodes( @pytest.mark.asyncio -@patch("llm_eval.qa_catalog.generator.implementation.ragas.generator.create_backup") @patch("llm_eval.qa_catalog.generator.implementation.ragas.generator.KnowledgeGraph") async def test_ragas_generator_create_knowledge_graph_new_successfully( mock_knowledge_graph: MagicMock, - mock_create_backup: MagicMock, ragas_generator_factory: RagasGeneratorFactory, documents: list[Document], nodes: list[Node], @@ -246,7 +242,7 @@ async def test_ragas_generator_create_knowledge_graph_new_successfully( with ragas_generator_factory() as generator: generator._load_and_process_documents = MagicMock(return_value=documents) generator._create_knowledge_graph_nodes = MagicMock(return_value=nodes) - generator.load_exiting_knowledge_graph = MagicMock(return_value=None) + generator.split_documents = MagicMock(return_value=documents) generator.apply_knowledge_graph_transformations = MagicMock() created_kg = mock_knowledge_graph(nodes=nodes) @@ -254,11 +250,9 @@ async def test_ragas_generator_create_knowledge_graph_new_successfully( kg = generator.create_knowledge_graph() + generator._load_and_process_documents.assert_called_once() + generator.split_documents.assert_called_once_with(documents) generator._create_knowledge_graph_nodes.assert_called_once_with(documents) - generator.load_exiting_knowledge_graph.assert_called_once() - mock_create_backup.assert_called_once_with( - generator.config.knowledge_graph_location - ) mock_knowledge_graph.assert_called_with(nodes=nodes) generator.apply_knowledge_graph_transformations.assert_called_once_with( created_kg @@ -277,16 +271,16 @@ async def test_ragas_generator_create_knowledge_graph_use_existing_graph_when_no with ragas_generator_factory() as generator: generator._load_and_process_documents = MagicMock(return_value=documents) generator._create_knowledge_graph_nodes = MagicMock(return_value=nodes) + generator.split_documents = MagicMock(return_value=documents) generator.config.knowledge_graph_location = None generator.apply_knowledge_graph_transformations = MagicMock(return_value=None) - existing_graph = MagicMock(name="existing_graph", nodes=nodes) - generator.load_exiting_knowledge_graph = MagicMock(return_value=existing_graph) - mock_knowledge_graph.return_value = MagicMock(name="generated_graph") + generated_graph = MagicMock(name="generated_graph") + mock_knowledge_graph.return_value = generated_graph kg: MagicMock = generator.create_knowledge_graph() # type: ignore assert kg is not None - assert kg == existing_graph + assert kg == generated_graph @pytest.mark.asyncio @@ -310,14 +304,24 @@ async def test_ragas_generator_load_knowledge_graph( with ragas_generator_factory() as generator: generator.config.knowledge_graph_location = MagicMock() generator.config.knowledge_graph_location.exists = MagicMock(return_value=True) - mock_knowledge_graph.load.return_value = MagicMock(name="existing_graph") + loaded_graph = MagicMock(name="existing_graph") + mock_knowledge_graph.load.return_value = loaded_graph + + # Create proper mock documents with required attributes + mock_doc = MagicMock() + mock_doc.page_content = "test content" + mock_doc.metadata = {"source": "test.txt"} + docs = [mock_doc] - kg = generator.load_exiting_knowledge_graph() + # Mock _has_same_nodes to return True so the loaded graph is returned + generator._has_same_nodes = MagicMock(return_value=True) + + kg = generator.load_exiting_knowledge_graph(docs) mock_knowledge_graph.load.assert_called_once_with( generator.config.knowledge_graph_location ) - assert kg is not None + assert kg is loaded_graph @pytest.mark.asyncio @@ -330,7 +334,8 @@ async def test_ragas_generator_load_knowledge_graph_fails_on_non_existent_file( generator.config.knowledge_graph_location = MagicMock() generator.config.knowledge_graph_location.exists = MagicMock(return_value=False) - kg = generator.load_exiting_knowledge_graph() + docs = [MagicMock()] # Mock documents + kg = generator.load_exiting_knowledge_graph(docs) assert kg is None @@ -342,7 +347,8 @@ async def test_ragas_generator_load_knowledge_graph_fails_on_none_location( with ragas_generator_factory() as generator: generator.config.knowledge_graph_location = None - kg = generator.load_exiting_knowledge_graph() + docs = [MagicMock()] # Mock documents + kg = generator.load_exiting_knowledge_graph(docs) assert kg is None @@ -362,7 +368,13 @@ async def test_ragas_generator_load_knowledge_graph_fails_on_load( generator.config.knowledge_graph_location.exists = MagicMock(return_value=True) mock_knowledge_graph.load.side_effect = RuntimeError("Load failed") - kg = generator.load_exiting_knowledge_graph() + # Create proper mock documents with required attributes + mock_doc = MagicMock() + mock_doc.page_content = "test content" + mock_doc.metadata = {"source": "test.txt"} + docs = [mock_doc] + + kg = generator.load_exiting_knowledge_graph(docs) mock_knowledge_graph.load.assert_called_once_with( generator.config.knowledge_graph_location @@ -382,117 +394,72 @@ async def test_ragas_generator_generate_testset( mock_testset_generator = MagicMock() mock_testset_generator.generate.return_value = ["generated query"] - testset = generator.generate_testset(mock_testset_generator, 1) - - assert testset[0] == "generated query" # type: ignore - - -@pytest.mark.asyncio -@patch( - "llm_eval.qa_catalog.generator.implementation.ragas.generator.ragas_sample_to_synthetic_qa_pair", -) -async def test_ragas_generator_generate_single_sample( - mock_from_ragas: MagicMock, - ragas_generator_factory: RagasGeneratorFactory, -) -> None: - with ragas_generator_factory() as generator: - generator.generate_testset = MagicMock(return_value=MagicMock(samples=[1])) - mock_from_ragas.return_value = "sample" - - send_sample = MagicMock() - send_sample.send = AsyncMock() - limiter = MagicMock() - - await generator._generate_samples(MagicMock(), send_sample, limiter) - send_sample.send.assert_called_once_with("sample") + # Provide a non-empty query distribution + mock_synthesizer = MagicMock() + query_distribution = [(mock_synthesizer, 1.0)] - -@pytest.mark.asyncio -async def test_ragas_generator_generate_single_sample_wont_send_when_fails( - ragas_generator_factory: RagasGeneratorFactory, -) -> None: - with ragas_generator_factory() as generator: - generator.generate_testset = MagicMock( - side_effect=RuntimeError("sample generation failed") + testset = generator.generate_testset( + mock_testset_generator, 1, query_distribution ) - send_sample = MagicMock() - send_sample.send = AsyncMock() - limiter = MagicMock() - - await generator._generate_samples(MagicMock(), send_sample, limiter) - - send_sample.send.assert_not_called() - - -@pytest.mark.asyncio -async def test_ragas_generator_generate_single_sample_does_nothing_on_empty_generated_testset( # noqa: E501 - ragas_generator_factory: RagasGeneratorFactory, -) -> None: - with ragas_generator_factory() as generator: - generator.generate_testset = MagicMock(return_value=MagicMock(samples=[])) - - send_sample = MagicMock() - send_sample.send = AsyncMock() - limiter = MagicMock() - - await generator._generate_samples(MagicMock(), send_sample, limiter) - - send_sample.send.assert_not_called() + assert testset[0] == "generated query" # type: ignore @pytest.mark.asyncio @patch( - "llm_eval.qa_catalog.generator.implementation.ragas.generator.TestsetGenerator", - new_callable=AsyncMock, -) -@patch( - "llm_eval.qa_catalog.generator.implementation.ragas.generator.RagasQACatalogGenerator.load_chat_model", - new_callable=AsyncMock, -) -@patch( - "llm_eval.qa_catalog.generator.implementation.ragas.generator.copy", - new_callable=AsyncMock, + "llm_eval.qa_catalog.generator.implementation.ragas.generator.ragas_sample_to_synthetic_qa_pair" ) +@patch("ragas.testset.persona.generate_personas_from_kg") async def test_ragas_generator_a_create_synthetic_qa_successfull( - mock_copy: AsyncMock, - mock_load_chat_model: AsyncMock, - mock_testset_generator: AsyncMock, - config: RagasQACatalogGeneratorConfig, - ragas_model_config: RagasQACatalogGeneratorModelConfig, - data_source_config: QACatalogGeneratorDataSourceConfig, + mock_generate_personas: MagicMock, + mock_ragas_sample_to_qa_pair: MagicMock, + ragas_generator_factory: RagasGeneratorFactory, ) -> None: - generator = RagasQACatalogGenerator(config, data_source_config, ragas_model_config) - generator.create_knowledge_graph = AsyncMock() - - sample = SyntheticQAPair( - id="1", - question="question", - expected_output="expected_output", - contexts=[], - meta_data={}, - ) - - async def mock_generate( - _, # noqa: ANN001 - send_sample: MemoryObjectSendStream[SyntheticQAPair | None], - limiter: CapacityLimiter, - ) -> None: - async with limiter: - async with send_sample: - await send_sample.send(sample) + with ragas_generator_factory() as generator: + # Mock knowledge graph + mock_kg = MagicMock() + mock_kg.nodes = [] + generator.create_knowledge_graph = MagicMock(return_value=mock_kg) + + # Mock personas + mock_persona = MagicMock() + mock_generate_personas.return_value = [mock_persona] + + # Mock query distribution + mock_synthesizer = MagicMock() + generator.create_query_distribution = MagicMock( + return_value=[(mock_synthesizer, 1.0)] + ) - generator._generate_samples = AsyncMock(side_effect=mock_generate) + # Mock testset generation + mock_testset = MagicMock() + mock_sample = MagicMock() + mock_testset.samples = [mock_sample] + generator.generate_testset = MagicMock(return_value=mock_testset) + + # Mock conversion to SyntheticQAPair + synthetic_qa_pair = SyntheticQAPair( + id="1", + question="test question", + expected_output="test answer", + contexts=[], + meta_data={}, + ) + mock_ragas_sample_to_qa_pair.return_value = synthetic_qa_pair - samples = [] + # Collect samples + collected_samples = [] - async def process_samples_fn(samples_batch: list[SyntheticQAPair]) -> None: - samples.extend(samples_batch) + async def collect_samples_fn(samples: list[SyntheticQAPair]) -> None: + collected_samples.extend(samples) - await generator.a_create_synthetic_qa(process_samples_fn) + # Test the method + await generator.a_create_synthetic_qa(collect_samples_fn) - assert len(samples) == generator.config.sample_count + # Verify results + assert len(collected_samples) == 1 + assert collected_samples[0] == synthetic_qa_pair @pytest.mark.asyncio From ae86f90cc33d3f7c2f797bcb8145f7cb8f362b80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Tolga=20Erd=C3=B6nmez?= Date: Mon, 14 Jul 2025 15:40:27 +0300 Subject: [PATCH 4/8] synthesizer ui updated --- .../generator/implementation/ragas/config.py | 2 +- .../implementation/ragas/generator.py | 7 +- .../qa-catalogs/generate/page.test.tsx | 80 +++++++++---- .../plugins/implementations/ragas.tsx | 110 +++++++----------- frontend/app/client/types.gen.ts | 63 ++++------ 5 files changed, 125 insertions(+), 137 deletions(-) diff --git a/backend/llm_eval/qa_catalog/generator/implementation/ragas/config.py b/backend/llm_eval/qa_catalog/generator/implementation/ragas/config.py index 6b0bdda..609cea7 100644 --- a/backend/llm_eval/qa_catalog/generator/implementation/ragas/config.py +++ b/backend/llm_eval/qa_catalog/generator/implementation/ragas/config.py @@ -26,7 +26,7 @@ class RagasQACatalogGeneratorPersona(ApiModel): class RagasQACatalogGeneratorConfig(QACatalogGeneratorConfig[RagasGeneratorType]): knowledge_graph_location: Path | None sample_count: int - query_distribution: dict[RagasQACatalogQuerySynthesizer, float] + query_distribution: list[RagasQACatalogQuerySynthesizer] personas: list[RagasQACatalogGeneratorPersona] | None use_existing_knowledge_graph: bool = True diff --git a/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py b/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py index a10d911..95a94b1 100644 --- a/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py +++ b/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py @@ -138,10 +138,9 @@ def __init__( ) def validate_config(self) -> None: - if sum(self.config.query_distribution.values()) != 1: + if not self.config.query_distribution: raise ValueError( - "Given query distribution for the generation is invalid, " - "distribution weights should sum up to 1" + "At least one query synthesizer must be selected for QA generation" ) def _load_and_process_documents(self) -> list[Document]: @@ -325,7 +324,7 @@ def create_query_distribution( properties = ["headlines", "keyphrases", "entities"] selected_synthesizer_classes = ( - query_synthesizer_classes[q] for q in self.config.query_distribution.keys() + query_synthesizer_classes[q] for q in self.config.query_distribution ) synthesizers = [] diff --git a/frontend/app/[locale]/(authenticated)/qa-catalogs/generate/page.test.tsx b/frontend/app/[locale]/(authenticated)/qa-catalogs/generate/page.test.tsx index 4b5a860..b4a01c8 100644 --- a/frontend/app/[locale]/(authenticated)/qa-catalogs/generate/page.test.tsx +++ b/frontend/app/[locale]/(authenticated)/qa-catalogs/generate/page.test.tsx @@ -1,6 +1,6 @@ +import "@/app/test-utils/mock-intl"; import "@/app/test-utils/mock-router"; import "@/app/test-utils/mock-toast"; -import "@/app/test-utils/mock-intl"; import { addToast } from "@heroui/react"; import { @@ -24,6 +24,7 @@ import { QaCatalogGenerationConfig, QaCatalogGenerationModelConfigurationSchema, qaCatalogGetGeneratorTypes, + RagasQaCatalogQuerySynthesizer, } from "@/app/client"; import { clearComboBox, @@ -105,7 +106,7 @@ describe("Synthetic QA Catalog Generation Page", () => { await clearComboBox(user, ragasLabel("llmEndpointId")); - await inputDistributionsForRagas(testCase.configuration, true); + await inputDistributionsForRagas(testCase.configuration); } }; @@ -143,13 +144,13 @@ describe("Synthetic QA Catalog Generation Page", () => { configuration: { type: "RAGAS", personas: [], - queryDistribution: { - MULTI_HOP_ABSTRACT: 0, - MULTI_HOP_SPECIFIC: 0.5, - SINGLE_HOP_SPECIFIC: 0.5, - }, + queryDistribution: [ + RagasQaCatalogQuerySynthesizer.MULTI_HOP_SPECIFIC, + RagasQaCatalogQuerySynthesizer.SINGLE_HOP_SPECIFIC, + ], sampleCount: 5, knowledgeGraphLocation: null, + useExistingKnowledgeGraph: true, }, modelConfigSchema: { type: "RAGAS", @@ -168,20 +169,21 @@ describe("Synthetic QA Catalog Generation Page", () => { { name: "p-1", description: "description-1" }, { name: "p-2", description: "description-2" }, ], - queryDistribution: { - MULTI_HOP_ABSTRACT: 0.2, - MULTI_HOP_SPECIFIC: 0.4, - SINGLE_HOP_SPECIFIC: 0.4, - }, + queryDistribution: [ + RagasQaCatalogQuerySynthesizer.MULTI_HOP_ABSTRACT, + RagasQaCatalogQuerySynthesizer.MULTI_HOP_SPECIFIC, + RagasQaCatalogQuerySynthesizer.SINGLE_HOP_SPECIFIC, + ], sampleCount: 5, knowledgeGraphLocation: null, + useExistingKnowledgeGraph: true, }, modelConfigSchema: { type: "RAGAS", llmEndpoint: "llm-1", }, dataSourceConfigId: "data-source-config-2", - files: createFiles(["file-1", "file-2"]), + files: createFiles(["file-3", "file-4"]), }, ]; @@ -396,20 +398,50 @@ describe("Synthetic QA Catalog Generation Page", () => { const inputDistributionsForRagas = async ( configuration: QaCatalogGenerationConfig, - clear: boolean = false, ) => { if (configuration.type == "RAGAS") { - const distributions = configuration.queryDistribution; - - for (const [synth, weight] of Object.entries(distributions)) { - const slider = screen.getByTestId(`queryDistributionSlider-${synth}`); - const sliderInput = within(slider).getByRole("slider", { - hidden: true, - }); + const selectedSynthesizers = configuration.queryDistribution; + + // First, uncheck all checkboxes to start from clean state + const allSynthesizers = [ + RagasQaCatalogQuerySynthesizer.MULTI_HOP_ABSTRACT, + RagasQaCatalogQuerySynthesizer.MULTI_HOP_SPECIFIC, + RagasQaCatalogQuerySynthesizer.SINGLE_HOP_SPECIFIC, + ]; + + for (const synth of allSynthesizers) { + const checkbox = screen.queryByTestId( + `queryDistributionCheckbox-${synth}`, + ); + if (checkbox) { + // Check if checkbox is currently selected (either checked attribute or data-selected attribute) + const isChecked = + checkbox.getAttribute("checked") !== null || + checkbox.getAttribute("data-selected") === "true"; + + if (isChecked) { + fireEvent.click(checkbox); + // Wait a bit for the state to update + await new Promise((resolve) => setTimeout(resolve, 10)); + } + } + } - fireEvent.change(sliderInput, { - target: { value: clear ? 0 : weight }, - }); + // Then check the ones we want + for (const synth of selectedSynthesizers) { + const checkbox = screen.getByTestId( + `queryDistributionCheckbox-${synth}`, + ); + // Check if checkbox is currently unselected + const isChecked = + checkbox.getAttribute("checked") !== null || + checkbox.getAttribute("data-selected") === "true"; + + if (!isChecked) { + fireEvent.click(checkbox); + // Wait a bit for the state to update + await new Promise((resolve) => setTimeout(resolve, 10)); + } } } }; diff --git a/frontend/app/[locale]/(authenticated)/qa-catalogs/plugins/implementations/ragas.tsx b/frontend/app/[locale]/(authenticated)/qa-catalogs/plugins/implementations/ragas.tsx index 71d244e..541c8a0 100644 --- a/frontend/app/[locale]/(authenticated)/qa-catalogs/plugins/implementations/ragas.tsx +++ b/frontend/app/[locale]/(authenticated)/qa-catalogs/plugins/implementations/ragas.tsx @@ -2,7 +2,8 @@ import { Accordion, AccordionItem, Button, - Slider, + Checkbox, + CheckboxGroup, Textarea, } from "@heroui/react"; import { cx } from "classix"; @@ -30,16 +31,9 @@ const synthesizerTypes = Object.values(RagasQaCatalogQuerySynthesizer) as [ const synthesizerTypeEnum = z.enum(synthesizerTypes); -const getDefaultSynthesizerValues = (): { - [key: string]: number; -} => - synthesizerTypes.reduce( - (acc, t) => { - acc[t] = 0; - return acc; - }, - {} as { [key: string]: number }, - ); +const getDefaultSynthesizerValues = (): RagasQaCatalogQuerySynthesizer[] => [ + RagasQaCatalogQuerySynthesizer.SINGLE_HOP_SPECIFIC, +]; const ragasGeneratorConfigurationShape = { config: z.object({ @@ -50,18 +44,9 @@ const ragasGeneratorConfigurationShape = { }) .int({ message: formErrors.int }) .min(1, { message: formErrors.required }), - queryDistribution: z.record(synthesizerTypeEnum, z.number()).refine( - (distribution) => { - const sum = Object.values(distribution).reduce( - (acc, val) => acc + (val == undefined ? 0 : val), - 0, - ); - return Math.abs(sum - 1) < Number.EPSILON; - }, - { - message: "The sum of all values in queryDistribution must equal 1", - }, - ), + queryDistribution: z + .array(synthesizerTypeEnum) + .min(1, { message: "At least one query synthesizer must be selected" }), personas: z .array( z.object({ @@ -220,46 +205,38 @@ const QueryDistributionForm = ({
- {synthesizerTypes.map((synth) => ( - ( -
- field.onChange(v)} - color={distributionError ? "danger" : "primary"} - data-testid={`queryDistributionSlider-${synth}`} - /> - - {t( - `RagasQACatalogGeneratorConfigurationForm.field.queryDistribution.values.${synth}.description`, - )} - - {errors.config?.queryDistribution?.[synth] && ( - - {errors.config.queryDistribution[synth].message} + ( + + {synthesizerTypes.map((synth) => ( +
+ + {t( + `RagasQACatalogGeneratorConfigurationForm.field.queryDistribution.values.${synth}.title`, + )} + + + {t( + `RagasQACatalogGeneratorConfigurationForm.field.queryDistribution.values.${synth}.description`, + )} - )} -
- )} - /> - ))} - {distributionError && ( - - {distributionError.root?.message} - - )} +
+ ))} + + )} + />
); @@ -324,11 +301,12 @@ export const ragasGeneratorPlugin = createQACatalogGenerationPlugin({ configurationForm: RagasGeneratorConfigurationForm, getDefaults: () => ({ config: { + type: "RAGAS" as const, sampleCount: 5, - queryDistribution: { - ...getDefaultSynthesizerValues(), - SINGLE_HOP_SPECIFIC: 1, - }, + queryDistribution: getDefaultSynthesizerValues(), + personas: [], + knowledgeGraphLocation: null, + useExistingKnowledgeGraph: true, }, modelConfig: { llmEndpoint: null }, }), diff --git a/frontend/app/client/types.gen.ts b/frontend/app/client/types.gen.ts index 3435b33..a0bb463 100644 --- a/frontend/app/client/types.gen.ts +++ b/frontend/app/client/types.gen.ts @@ -1,7 +1,7 @@ // This file is auto-generated by @hey-api/openapi-ts export type ActiveQaCatalogGeneratorType = { - type: "RAGAS" | "DEEPEVAL"; + type: "RAGAS"; }; export type AnswerRelevancyMetricConfigurationCreate = { @@ -69,7 +69,7 @@ export type AzureOpenAillmEndpointConfigurationUpdate = { export type Base64DownloadUrl = string; export type BodyCreateDataSourceConfigV1QaCatalogGeneratorUploadPost = { - generator_type: "RAGAS" | "DEEPEVAL"; + generator_type: "RAGAS"; files: Array; }; @@ -142,18 +142,6 @@ export type DashboardStatistics = { numberOfEvaluations: number; }; -export type DeepevalQaCatalogGeneratorConfig = { - type: "DEEPEVAL"; - goldensPerDoc: number; - numEvolutions: number; - parallelQueries: number; -}; - -export type DeepevalQaCatalogGeneratorModelConfigSchema = { - type: "DEEPEVAL"; - llmEndpoint: string; -}; - export type DeleteCatalogResult = { previousRevisionId: string | null; }; @@ -553,21 +541,27 @@ export type QaCatalog = { error?: string | null; }; -export type QaCatalogGenerationConfig = - | RagasQaCatalogGeneratorConfig - | DeepevalQaCatalogGeneratorConfig; +export type QaCatalogGenerationConfig = { + type: "RAGAS"; + knowledgeGraphLocation: string | null; + sampleCount: number; + queryDistribution: Array; + personas: Array | null; + useExistingKnowledgeGraph?: boolean; +}; export type QaCatalogGenerationData = { - type: "RAGAS" | "DEEPEVAL"; + type: "RAGAS"; name: string; config: QaCatalogGenerationConfig; dataSourceConfigId: string; modelConfigSchema: QaCatalogGenerationModelConfigurationSchema; }; -export type QaCatalogGenerationModelConfigurationSchema = - | RagasQaCatalogGeneratorModelConfigSchema - | DeepevalQaCatalogGeneratorModelConfigSchema; +export type QaCatalogGenerationModelConfigurationSchema = { + type: "RAGAS"; + llmEndpoint: string; +}; export type QaCatalogGenerationResult = { catalogId: string; @@ -620,21 +614,6 @@ export type QaCatalogEvaluationResult = { name: string; }; -export type RagasQaCatalogGeneratorConfig = { - type: "RAGAS"; - knowledgeGraphLocation: string | null; - sampleCount: number; - queryDistribution: { - [key: string]: number; - }; - personas: Array | null; -}; - -export type RagasQaCatalogGeneratorModelConfigSchema = { - type: "RAGAS"; - llmEndpoint: string; -}; - export type RagasQaCatalogGeneratorPersona = { name: string; description: string; @@ -702,7 +681,7 @@ export type ValidationError = { type: string; }; -export type RagEvalBackendEvalEvaluateResultsRouterEvaluationResult = { +export type LlmEvalEvalEvaluateResultsRouterEvaluationResult = { id: string; configurationId: string | null; configurationName: string | null; @@ -718,7 +697,7 @@ export type RagEvalBackendEvalEvaluateResultsRouterEvaluationResult = { error: string | null; }; -export type RagEvalBackendEvalEvaluationsModelsEvaluationResult = { +export type LlmEvalEvalEvaluationsModelsEvaluationResult = { id: string; name: string; createdAt: string; @@ -794,7 +773,7 @@ export type EvaluationsPostResponses = { /** * Successful Response */ - 200: RagEvalBackendEvalEvaluationsModelsEvaluationResult; + 200: LlmEvalEvalEvaluationsModelsEvaluationResult; }; export type EvaluationsPostResponse = @@ -849,7 +828,7 @@ export type EvaluationsGetResponses = { /** * Successful Response */ - 200: RagEvalBackendEvalEvaluationsModelsEvaluationResult; + 200: LlmEvalEvalEvaluationsModelsEvaluationResult; }; export type EvaluationsGetResponse = @@ -882,7 +861,7 @@ export type EvaluationsPatchResponses = { /** * Successful Response */ - 200: RagEvalBackendEvalEvaluationsModelsEvaluationResult; + 200: LlmEvalEvalEvaluationsModelsEvaluationResult; }; export type EvaluationsPatchResponse = @@ -1005,7 +984,7 @@ export type EvaluationResultsGetResponses = { /** * Successful Response */ - 200: RagEvalBackendEvalEvaluateResultsRouterEvaluationResult; + 200: LlmEvalEvalEvaluateResultsRouterEvaluationResult; }; export type EvaluationResultsGetResponse = From 6e52f146184602cf483632754df669ae0b6f783c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Tolga=20Erd=C3=B6nmez?= Date: Mon, 14 Jul 2025 15:50:19 +0300 Subject: [PATCH 5/8] catalog gen ui tests fixed --- .../[locale]/(authenticated)/qa-catalogs/generate/page.test.tsx | 2 -- .../qa-catalogs/plugins/implementations/ragas.tsx | 1 - 2 files changed, 3 deletions(-) diff --git a/frontend/app/[locale]/(authenticated)/qa-catalogs/generate/page.test.tsx b/frontend/app/[locale]/(authenticated)/qa-catalogs/generate/page.test.tsx index b4a01c8..7175ff6 100644 --- a/frontend/app/[locale]/(authenticated)/qa-catalogs/generate/page.test.tsx +++ b/frontend/app/[locale]/(authenticated)/qa-catalogs/generate/page.test.tsx @@ -150,7 +150,6 @@ describe("Synthetic QA Catalog Generation Page", () => { ], sampleCount: 5, knowledgeGraphLocation: null, - useExistingKnowledgeGraph: true, }, modelConfigSchema: { type: "RAGAS", @@ -176,7 +175,6 @@ describe("Synthetic QA Catalog Generation Page", () => { ], sampleCount: 5, knowledgeGraphLocation: null, - useExistingKnowledgeGraph: true, }, modelConfigSchema: { type: "RAGAS", diff --git a/frontend/app/[locale]/(authenticated)/qa-catalogs/plugins/implementations/ragas.tsx b/frontend/app/[locale]/(authenticated)/qa-catalogs/plugins/implementations/ragas.tsx index 541c8a0..1a32809 100644 --- a/frontend/app/[locale]/(authenticated)/qa-catalogs/plugins/implementations/ragas.tsx +++ b/frontend/app/[locale]/(authenticated)/qa-catalogs/plugins/implementations/ragas.tsx @@ -306,7 +306,6 @@ export const ragasGeneratorPlugin = createQACatalogGenerationPlugin({ queryDistribution: getDefaultSynthesizerValues(), personas: [], knowledgeGraphLocation: null, - useExistingKnowledgeGraph: true, }, modelConfig: { llmEndpoint: null }, }), From 0087a26bbf7e835879d510616e9bba79afae90a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Tolga=20Erd=C3=B6nmez?= Date: Fri, 8 Aug 2025 14:47:34 +0200 Subject: [PATCH 6/8] WIP fixing wrong imports on frontend --- .../evaluation-edit-form-wizard.tsx | 4 +- .../eval/[evaluationId]/edit/page.test.tsx | 4 +- .../components/evaluation-result-details.tsx | 2 +- .../[evaluationResultId]/page.test.tsx | 177 +++++++++--------- .../(authenticated)/eval/new/page.test.tsx | 4 +- .../qa-catalog-generation-form-wizard.tsx | 4 +- 6 files changed, 96 insertions(+), 99 deletions(-) diff --git a/frontend/app/[locale]/(authenticated)/eval/[evaluationId]/edit/components/evaluation-edit-form-wizard.tsx b/frontend/app/[locale]/(authenticated)/eval/[evaluationId]/edit/components/evaluation-edit-form-wizard.tsx index 54d76f5..f59eabc 100644 --- a/frontend/app/[locale]/(authenticated)/eval/[evaluationId]/edit/components/evaluation-edit-form-wizard.tsx +++ b/frontend/app/[locale]/(authenticated)/eval/[evaluationId]/edit/components/evaluation-edit-form-wizard.tsx @@ -16,7 +16,7 @@ import { FormWizard, SubmitData } from "@/app/[locale]/components/form-wizard"; import { evaluationsPatch, EvaluationUpdate, - RagEvalBackendEvalEvaluationsModelsEvaluationResult, + LlmEvalEvalEvaluationsModelsEvaluationResult, } from "@/app/client"; import { EditOrigin } from "@/app/types/edit-origin"; import { callApi } from "@/app/utils/backend-client/client"; @@ -32,7 +32,7 @@ const editSchema = z.object({ const schemas = { default: editSchema }; export type EvaluationEditFormWizardProps = { - evaluation: RagEvalBackendEvalEvaluationsModelsEvaluationResult; + evaluation: LlmEvalEvalEvaluationsModelsEvaluationResult; origin?: EditOrigin; }; diff --git a/frontend/app/[locale]/(authenticated)/eval/[evaluationId]/edit/page.test.tsx b/frontend/app/[locale]/(authenticated)/eval/[evaluationId]/edit/page.test.tsx index c69f652..9439420 100644 --- a/frontend/app/[locale]/(authenticated)/eval/[evaluationId]/edit/page.test.tsx +++ b/frontend/app/[locale]/(authenticated)/eval/[evaluationId]/edit/page.test.tsx @@ -12,7 +12,7 @@ import { evaluationsGet, evaluationsPatch, EvaluationStatus, - RagEvalBackendEvalEvaluationsModelsEvaluationResult, + LlmEvalEvalEvaluationsModelsEvaluationResult, } from "@/app/client"; import { expectInputError, @@ -29,7 +29,7 @@ vi.mock("@/app/client"); describe("Edit Evaluation Page", () => { const evaluationId = "123"; - const testEvaluation: RagEvalBackendEvalEvaluationsModelsEvaluationResult = { + const testEvaluation: LlmEvalEvalEvaluationsModelsEvaluationResult = { id: evaluationId, name: "Evaluation Name", createdAt: "2022-01-01T00:00:00Z", diff --git a/frontend/app/[locale]/(authenticated)/eval/[evaluationId]/results/[evaluationResultId]/components/evaluation-result-details.tsx b/frontend/app/[locale]/(authenticated)/eval/[evaluationId]/results/[evaluationResultId]/components/evaluation-result-details.tsx index 6147a63..cfb82e2 100644 --- a/frontend/app/[locale]/(authenticated)/eval/[evaluationId]/results/[evaluationResultId]/components/evaluation-result-details.tsx +++ b/frontend/app/[locale]/(authenticated)/eval/[evaluationId]/results/[evaluationResultId]/components/evaluation-result-details.tsx @@ -8,7 +8,7 @@ import { PropertyList, PropertyListItem, } from "@/app/[locale]/components/property-list"; -import { RagEvalBackendEvalEvaluateResultsRouterEvaluationResult as EvaluationResult } from "@/app/client"; +import { LlmEvalEvalEvaluateResultsRouterEvaluationResult as EvaluationResult } from "@/app/client"; const TextValue = ({ children }: PropsWithChildren) => { return
{children}
; diff --git a/frontend/app/[locale]/(authenticated)/eval/[evaluationId]/results/[evaluationResultId]/page.test.tsx b/frontend/app/[locale]/(authenticated)/eval/[evaluationId]/results/[evaluationResultId]/page.test.tsx index b09fb5f..0bc8f66 100644 --- a/frontend/app/[locale]/(authenticated)/eval/[evaluationId]/results/[evaluationResultId]/page.test.tsx +++ b/frontend/app/[locale]/(authenticated)/eval/[evaluationId]/results/[evaluationResultId]/page.test.tsx @@ -6,8 +6,8 @@ import { evaluationResultsGet, evaluationsGet, EvaluationStatus, - RagEvalBackendEvalEvaluateResultsRouterEvaluationResult, - RagEvalBackendEvalEvaluationsModelsEvaluationResult, + LlmEvalEvalEvaluateResultsRouterEvaluationResult, + LlmEvalEvalEvaluationsModelsEvaluationResult, TestCaseStatus, } from "@/app/client"; import { expectValue } from "@/app/test-utils/details-page"; @@ -19,92 +19,89 @@ import Page from "./page"; vi.mock("@/app/client"); describe("Evaluation Result Page", () => { - const fullTestCase: RagEvalBackendEvalEvaluateResultsRouterEvaluationResult = - { - id: "tc1", - actualOutput: "actual", - expectedOutput: "expected", - status: TestCaseStatus.SUCCESS, - configurationId: "configId1", - configurationName: "configName1", - configurationVersion: "configVersion1", - context: ["context"], - input: "input", - metaData: {}, - metricsData: [ - { - id: "m1", - name: "metric1", - score: 0.5, - error: null, - evaluationModel: "evalModel1", - reason: "reason1", - success: true, - threshold: 0.3, - strictMode: true, - }, - { - id: "m2", - name: "metric2", - score: 0.6, - error: "", - evaluationModel: "", - reason: "", - success: false, - threshold: 0.7, - strictMode: false, - }, - { - id: "m3", - name: "metric3", - score: null, - error: "error3", - evaluationModel: null, - reason: null, - success: false, - threshold: 0.7, - strictMode: false, - }, - ], - retrievalContext: ["retrievalContext"], - error: null, - }; - - const minimalTestCase: RagEvalBackendEvalEvaluateResultsRouterEvaluationResult = - { - id: "tc1", - expectedOutput: "expected", - actualOutput: null, - status: TestCaseStatus.PENDING, - configurationId: null, - configurationName: null, - configurationVersion: null, - context: null, - input: "input", - metaData: null, - metricsData: [], - retrievalContext: null, - error: null, - }; - - const emptyTestCase: RagEvalBackendEvalEvaluateResultsRouterEvaluationResult = - { - id: "tc1", - expectedOutput: "expected", - actualOutput: "", - status: TestCaseStatus.PENDING, - configurationId: null, - configurationName: null, - configurationVersion: null, - context: [], - input: "input", - metaData: {}, - metricsData: [], - retrievalContext: [], - error: null, - }; - - const evaluation: RagEvalBackendEvalEvaluationsModelsEvaluationResult = { + const fullTestCase: LlmEvalEvalEvaluateResultsRouterEvaluationResult = { + id: "tc1", + actualOutput: "actual", + expectedOutput: "expected", + status: TestCaseStatus.SUCCESS, + configurationId: "configId1", + configurationName: "configName1", + configurationVersion: "configVersion1", + context: ["context"], + input: "input", + metaData: {}, + metricsData: [ + { + id: "m1", + name: "metric1", + score: 0.5, + error: null, + evaluationModel: "evalModel1", + reason: "reason1", + success: true, + threshold: 0.3, + strictMode: true, + }, + { + id: "m2", + name: "metric2", + score: 0.6, + error: "", + evaluationModel: "", + reason: "", + success: false, + threshold: 0.7, + strictMode: false, + }, + { + id: "m3", + name: "metric3", + score: null, + error: "error3", + evaluationModel: null, + reason: null, + success: false, + threshold: 0.7, + strictMode: false, + }, + ], + retrievalContext: ["retrievalContext"], + error: null, + }; + + const minimalTestCase: LlmEvalEvalEvaluateResultsRouterEvaluationResult = { + id: "tc1", + expectedOutput: "expected", + actualOutput: null, + status: TestCaseStatus.PENDING, + configurationId: null, + configurationName: null, + configurationVersion: null, + context: null, + input: "input", + metaData: null, + metricsData: [], + retrievalContext: null, + error: null, + }; + + const emptyTestCase: LlmEvalEvalEvaluateResultsRouterEvaluationResult = { + id: "tc1", + expectedOutput: "expected", + actualOutput: "", + status: TestCaseStatus.PENDING, + configurationId: null, + configurationName: null, + configurationVersion: null, + context: [], + input: "input", + metaData: {}, + metricsData: [], + retrievalContext: [], + error: null, + }; + + const evaluation: LlmEvalEvalEvaluationsModelsEvaluationResult = { id: "eval1", name: "evaluation", createdAt: "2021-01-01T00:00:00Z", @@ -161,7 +158,7 @@ describe("Evaluation Result Page", () => { }); const expectTestCase = async ( - testCase: RagEvalBackendEvalEvaluateResultsRouterEvaluationResult, + testCase: LlmEvalEvalEvaluateResultsRouterEvaluationResult, ) => { await expectValue("EvaluationResultDetails.input", testCase.input); await expectValue( @@ -213,7 +210,7 @@ describe("Evaluation Result Page", () => { }; const mockApi = ( - testCase: RagEvalBackendEvalEvaluateResultsRouterEvaluationResult, + testCase: LlmEvalEvalEvaluateResultsRouterEvaluationResult, ) => { vi.mocked(evaluationsGet).mockResolvedValue( successfulServiceResponse(evaluation), diff --git a/frontend/app/[locale]/(authenticated)/eval/new/page.test.tsx b/frontend/app/[locale]/(authenticated)/eval/new/page.test.tsx index 4b3c3ad..7271baf 100644 --- a/frontend/app/[locale]/(authenticated)/eval/new/page.test.tsx +++ b/frontend/app/[locale]/(authenticated)/eval/new/page.test.tsx @@ -16,6 +16,7 @@ import { LlmEndpoint, llmEndpointsGet, llmEndpointsGetAll, + LlmEvalEvalEvaluationsModelsEvaluationResult, Metric, MetricConfigurationRead, metricsGetAll, @@ -25,7 +26,6 @@ import { qaCatalogGetAll, QaCatalogPreview, QaCatalogStatus, - RagEvalBackendEvalEvaluationsModelsEvaluationResult, } from "@/app/client"; import { expectComboBoxError, @@ -278,7 +278,7 @@ describe("New Evaluation Page", () => { }; const mockEvaluationCreate = () => { - const evaluation: RagEvalBackendEvalEvaluationsModelsEvaluationResult = { + const evaluation: LlmEvalEvalEvaluationsModelsEvaluationResult = { id: "1", name: "Test Execution", createdAt: "2021-09-01T00:00:00Z", diff --git a/frontend/app/[locale]/(authenticated)/qa-catalogs/generate/components/qa-catalog-generation-form-wizard.tsx b/frontend/app/[locale]/(authenticated)/qa-catalogs/generate/components/qa-catalog-generation-form-wizard.tsx index 7b98f6b..7246f6b 100644 --- a/frontend/app/[locale]/(authenticated)/qa-catalogs/generate/components/qa-catalog-generation-form-wizard.tsx +++ b/frontend/app/[locale]/(authenticated)/qa-catalogs/generate/components/qa-catalog-generation-form-wizard.tsx @@ -17,8 +17,8 @@ import { ActiveQaCatalogGeneratorType, qaCatalogCreateDataSourceConfig, qaCatalogGenerate, + QaCatalogGenerationConfig, QaCatalogGenerationData, - RagasQaCatalogGeneratorConfig, } from "@/app/client"; import { callApi } from "@/app/utils/backend-client/client"; import { useRouter } from "@/i18n/routing"; @@ -109,7 +109,7 @@ export const QACatalogGeneratorFormWizard = ({ generatorType: data.configuration.type, }); - const config: RagasQaCatalogGeneratorConfig = { + const config: QaCatalogGenerationConfig = { ...data.configuration.config, type: data.configuration.type, knowledgeGraphLocation: null, From 2c54103928e77b7931f4e3365fee7fc97620f37a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Tolga=20Erd=C3=B6nmez?= Date: Fri, 8 Aug 2025 14:59:33 +0200 Subject: [PATCH 7/8] backend unit tests fixed according to new ragas generation config --- .../qa_catalog/logic/test_generation.py | 6 ++--- .../unit/backend/qa_catalog/test_router.py | 6 ++--- .../implementation/ragas/test_generator.py | 24 +++++++------------ 3 files changed, 14 insertions(+), 22 deletions(-) diff --git a/backend/tests/unit/backend/qa_catalog/logic/test_generation.py b/backend/tests/unit/backend/qa_catalog/logic/test_generation.py index 8a20095..5ba80d2 100644 --- a/backend/tests/unit/backend/qa_catalog/logic/test_generation.py +++ b/backend/tests/unit/backend/qa_catalog/logic/test_generation.py @@ -87,9 +87,9 @@ def catalog_generator_config( type=generator_type, sample_count=10, knowledge_graph_location=None, - query_distribution={ - RagasQACatalogQuerySynthesizer.SINGLE_HOP_SPECIFIC: 1.0, - }, + query_distribution=[ + RagasQACatalogQuerySynthesizer.SINGLE_HOP_SPECIFIC, + ], personas=[ RagasQACatalogGeneratorPersona( name="test-persona", description="test description" diff --git a/backend/tests/unit/backend/qa_catalog/test_router.py b/backend/tests/unit/backend/qa_catalog/test_router.py index be026af..b489d07 100644 --- a/backend/tests/unit/backend/qa_catalog/test_router.py +++ b/backend/tests/unit/backend/qa_catalog/test_router.py @@ -300,9 +300,9 @@ async def test_generate( "type": "RAGAS", "knowledge_graph_location": "/tmp/somefile.pdf", "sample_count": 5, - "query_distribution": { - "SINGLE_HOP_SPECIFIC": 1.5, - }, + "query_distribution": [ + "SINGLE_HOP_SPECIFIC", + ], "personas": [ {"name": "person1", "description": "Beschreibung Persona 1"} ], diff --git a/backend/tests/unit/qa_catalog/generator/implementation/ragas/test_generator.py b/backend/tests/unit/qa_catalog/generator/implementation/ragas/test_generator.py index 6d42009..5cddca2 100644 --- a/backend/tests/unit/qa_catalog/generator/implementation/ragas/test_generator.py +++ b/backend/tests/unit/qa_catalog/generator/implementation/ragas/test_generator.py @@ -30,11 +30,11 @@ def config() -> RagasQACatalogGeneratorConfig: type="RAGAS", knowledge_graph_location=None, sample_count=10, - query_distribution={ - RagasQACatalogQuerySynthesizer.SINGLE_HOP_SPECIFIC: 0.5, - RagasQACatalogQuerySynthesizer.MULTI_HOP_SPECIFIC: 0.3, - RagasQACatalogQuerySynthesizer.MULTI_HOP_ABSTRACT: 0.2, - }, + query_distribution=[ + RagasQACatalogQuerySynthesizer.SINGLE_HOP_SPECIFIC, + RagasQACatalogQuerySynthesizer.MULTI_HOP_SPECIFIC, + RagasQACatalogQuerySynthesizer.MULTI_HOP_ABSTRACT, + ], personas=None, ) @@ -173,25 +173,17 @@ async def test_ragas_generator__load_and_process_documents_correctly( @pytest.mark.asyncio -@pytest.mark.parametrize("weights", [[0.5, 0.3, 0.0], [0.5, 1, 0.2]]) @patch( "llm_eval.qa_catalog.generator.implementation.ragas.generator.RagasQACatalogGenerator.load_chat_model" ) -async def test_init_ragas_generator_fails_for_invalid_query_synthesizer( +async def test_init_ragas_generator_fails_for_empty_query_distribution( _: MagicMock, config: RagasQACatalogGeneratorConfig, data_source_config: QACatalogGeneratorDataSourceConfig, ragas_model_config: RagasQACatalogGeneratorModelConfig, - weights: list[float], ) -> None: - invalid_query_synthesizer: dict = { - RagasQACatalogQuerySynthesizer.SINGLE_HOP_SPECIFIC: weights[0], - RagasQACatalogQuerySynthesizer.MULTI_HOP_ABSTRACT: weights[1], - RagasQACatalogQuerySynthesizer.MULTI_HOP_SPECIFIC: weights[2], - } - - with pytest.raises(ValueError, match="distribution weights should sum up to 1"): - config.query_distribution = invalid_query_synthesizer + with pytest.raises(ValueError, match="At least one query synthesizer"): + config.query_distribution = [] RagasQACatalogGenerator(config, data_source_config, ragas_model_config) From fb2adeaef781fca315a0a3cf1ac1b166ce915390 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahmet=20Tolga=20Erd=C3=B6nmez?= Date: Fri, 8 Aug 2025 16:29:52 +0200 Subject: [PATCH 8/8] rename kg to knowledge graph and refactor on persona import from ragas --- .../implementation/ragas/generator.py | 36 ++--- backend/poetry.lock | 131 ++++++++---------- backend/pyproject.toml | 2 +- 3 files changed, 78 insertions(+), 91 deletions(-) diff --git a/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py b/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py index 95a94b1..6cc6318 100644 --- a/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py +++ b/backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py @@ -10,17 +10,15 @@ from pydantic import ValidationError from ragas.embeddings import LangchainEmbeddingsWrapper from ragas.testset import TestsetGenerator +from ragas.testset import persona as ragas_persona from ragas.testset.graph import KnowledgeGraph, Node, NodeType -from ragas.testset.persona import Persona from ragas.testset.synthesizers import ( BaseSynthesizer, MultiHopAbstractQuerySynthesizer, MultiHopSpecificQuerySynthesizer, SingleHopSpecificQuerySynthesizer, ) -from ragas.testset.synthesizers.generate import ( - LangchainLLMWrapper, -) +from ragas.testset.synthesizers.generate import LangchainLLMWrapper from ragas.testset.synthesizers.testset_schema import Testset from ragas.testset.transforms import ( EmbeddingExtractor, @@ -130,7 +128,7 @@ def __init__( self.personas = ( [ - Persona(name=p.name, role_description=p.description) + ragas_persona.Persona(name=p.name, role_description=p.description) for p in self.config.personas ] if self.config.personas @@ -237,7 +235,7 @@ def _split(doc: Document, token_length: int) -> list[Document]: def apply_knowledge_graph_transformations( self, - kg: KnowledgeGraph, + knowledge_graph: KnowledgeGraph, ) -> None: headline_extractor = HeadlinesExtractor(llm=self.llm, max_num=20) headline_splitter = HeadlineSplitter(max_tokens=1500, min_tokens=100) @@ -264,14 +262,15 @@ def apply_knowledge_graph_transformations( ner_extractor, ] - apply_transforms(kg, transforms=transforms) + apply_transforms(knowledge_graph, transforms=transforms) def create_knowledge_graph(self) -> KnowledgeGraph: """ Loads the knowledge graph if already exists and compares it's nodes with the current documents. - If the existent graph has these documents already it uses the existent kg - otherwise create a new kg out ouf the documents + If the existent graph has these documents + already it uses the existent knowledge graph + otherwise create a new knowledge graph out ouf the documents """ docs = self._load_and_process_documents() # chunks of documents @@ -279,13 +278,13 @@ def create_knowledge_graph(self) -> KnowledgeGraph: raise RuntimeError("No documents found") chunks = self.split_documents(docs) - kg = KnowledgeGraph( + knowledge_graph = KnowledgeGraph( nodes=self._create_knowledge_graph_nodes(chunks), ) - self.apply_knowledge_graph_transformations(kg) + self.apply_knowledge_graph_transformations(knowledge_graph) - return kg + return knowledge_graph @deprecated("Until we have a better way to handle knowledge graph caching") def load_exiting_knowledge_graph( @@ -367,12 +366,13 @@ async def a_create_synthetic_qa( self, collect_samples: Callable[[list[SyntheticQAPair]], Coroutine], ) -> None: - kg = self.create_knowledge_graph() + knowledge_graph = self.create_knowledge_graph() if not self.personas: - from ragas.testset.persona import generate_personas_from_kg - - self.personas = generate_personas_from_kg(kg, self.llm) + self.personas = ragas_persona.generate_personas_from_kg( + knowledge_graph, + self.llm, + ) if not self.personas: raise ValueError("Failed to generate personas") @@ -381,11 +381,11 @@ async def a_create_synthetic_qa( generator = TestsetGenerator( llm=self.llm, embedding_model=self.embeddings, - knowledge_graph=kg, + knowledge_graph=knowledge_graph, persona_list=self.personas, ) - query_distribution = self.create_query_distribution(kg) + query_distribution = self.create_query_distribution(knowledge_graph) testset = self.generate_testset( generator, diff --git a/backend/poetry.lock b/backend/poetry.lock index 42fc9ae..be79e2e 100644 --- a/backend/poetry.lock +++ b/backend/poetry.lock @@ -1181,14 +1181,14 @@ typing-inspect = ">=0.4.0,<1" [[package]] name = "datasets" -version = "3.6.0" +version = "4.0.0" description = "HuggingFace community-driven open-source library of datasets" optional = false python-versions = ">=3.9.0" groups = ["main"] files = [ - {file = "datasets-3.6.0-py3-none-any.whl", hash = "sha256:25000c4a2c0873a710df127d08a202a06eab7bf42441a6bc278b499c2f72cd1b"}, - {file = "datasets-3.6.0.tar.gz", hash = "sha256:1b2bf43b19776e2787e181cfd329cb0ca1a358ea014780c3581e0f276375e041"}, + {file = "datasets-4.0.0-py3-none-any.whl", hash = "sha256:7ef95e62025fd122882dbce6cb904c8cd3fbc829de6669a5eb939c77d50e203d"}, + {file = "datasets-4.0.0.tar.gz", hash = "sha256:9657e7140a9050db13443ba21cb5de185af8af944479b00e7ff1e00a61c8dbf1"}, ] [package.dependencies] @@ -1207,18 +1207,17 @@ tqdm = ">=4.66.3" xxhash = "*" [package.extras] -audio = ["librosa", "soundfile (>=0.12.1)", "soxr (>=0.4.0)"] +audio = ["soundfile (>=0.12.1)", "torch (>=2.7.0)", "torchcodec (>=0.4.0)"] benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"] -dev = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elasticsearch (>=7.17.12,<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyav", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0) ; python_version >= \"3.10\"", "tensorflow (>=2.6.0)", "tensorflow (>=2.6.0) ; python_version < \"3.10\"", "tiktoken", "torch", "torch (>=2.0.0)", "torchdata", "torchvision", "transformers", "transformers (>=4.42.0)", "zstandard"] -docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"] +dev = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elasticsearch (>=7.17.12,<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "numba (>=0.56.4)", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "soundfile (>=0.12.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.16.0) ; python_version >= \"3.10\" and sys_platform != \"win32\"", "tensorflow (>=2.6.0)", "tensorflow (>=2.6.0) ; python_version < \"3.10\" and sys_platform != \"win32\"", "tiktoken", "torch", "torch (>=2.0.0)", "torchcodec (>=0.4.0) ; sys_platform != \"win32\"", "torchdata", "transformers", "transformers (>=4.42.0)", "zstandard"] +docs = ["tensorflow (>=2.6.0)", "torch", "transformers"] jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"] pdfs = ["pdfplumber (>=0.11.4)"] quality = ["ruff (>=0.3.0)"] -s3 = ["s3fs"] tensorflow = ["tensorflow (>=2.6.0)"] tensorflow-gpu = ["tensorflow (>=2.6.0)"] -tests = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elasticsearch (>=7.17.12,<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyav", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0) ; python_version >= \"3.10\"", "tensorflow (>=2.6.0) ; python_version < \"3.10\"", "tiktoken", "torch (>=2.0.0)", "torchdata", "torchvision", "transformers (>=4.42.0)", "zstandard"] -tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elasticsearch (>=7.17.12,<8.0.0)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyav", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tiktoken", "torch (>=2.0.0)", "torchdata", "torchvision", "transformers (>=4.42.0)", "zstandard"] +tests = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elasticsearch (>=7.17.12,<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "numba (>=0.56.4)", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "soundfile (>=0.12.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.16.0) ; python_version >= \"3.10\" and sys_platform != \"win32\"", "tensorflow (>=2.6.0) ; python_version < \"3.10\" and sys_platform != \"win32\"", "tiktoken", "torch (>=2.0.0)", "torchcodec (>=0.4.0) ; sys_platform != \"win32\"", "torchdata", "transformers (>=4.42.0)", "zstandard"] +tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elasticsearch (>=7.17.12,<8.0.0)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "numba (>=0.56.4)", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "soundfile (>=0.12.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tiktoken", "torch (>=2.0.0)", "torchcodec (>=0.4.0) ; sys_platform != \"win32\"", "torchdata", "transformers (>=4.42.0)", "zstandard"] torch = ["torch"] vision = ["Pillow (>=9.4.0)"] @@ -4625,67 +4624,55 @@ files = [ [[package]] name = "pyarrow" -version = "20.0.0" +version = "21.0.0" description = "Python library for Apache Arrow" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "pyarrow-20.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:c7dd06fd7d7b410ca5dc839cc9d485d2bc4ae5240851bcd45d85105cc90a47d7"}, - {file = "pyarrow-20.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:d5382de8dc34c943249b01c19110783d0d64b207167c728461add1ecc2db88e4"}, - {file = "pyarrow-20.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6415a0d0174487456ddc9beaead703d0ded5966129fa4fd3114d76b5d1c5ceae"}, - {file = "pyarrow-20.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15aa1b3b2587e74328a730457068dc6c89e6dcbf438d4369f572af9d320a25ee"}, - {file = "pyarrow-20.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:5605919fbe67a7948c1f03b9f3727d82846c053cd2ce9303ace791855923fd20"}, - {file = "pyarrow-20.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a5704f29a74b81673d266e5ec1fe376f060627c2e42c5c7651288ed4b0db29e9"}, - {file = "pyarrow-20.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:00138f79ee1b5aca81e2bdedb91e3739b987245e11fa3c826f9e57c5d102fb75"}, - {file = "pyarrow-20.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f2d67ac28f57a362f1a2c1e6fa98bfe2f03230f7e15927aecd067433b1e70ce8"}, - {file = "pyarrow-20.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:4a8b029a07956b8d7bd742ffca25374dd3f634b35e46cc7a7c3fa4c75b297191"}, - {file = "pyarrow-20.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:24ca380585444cb2a31324c546a9a56abbe87e26069189e14bdba19c86c049f0"}, - {file = "pyarrow-20.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:95b330059ddfdc591a3225f2d272123be26c8fa76e8c9ee1a77aad507361cfdb"}, - {file = "pyarrow-20.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f0fb1041267e9968c6d0d2ce3ff92e3928b243e2b6d11eeb84d9ac547308232"}, - {file = "pyarrow-20.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8ff87cc837601532cc8242d2f7e09b4e02404de1b797aee747dd4ba4bd6313f"}, - {file = "pyarrow-20.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:7a3a5dcf54286e6141d5114522cf31dd67a9e7c9133d150799f30ee302a7a1ab"}, - {file = "pyarrow-20.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a6ad3e7758ecf559900261a4df985662df54fb7fdb55e8e3b3aa99b23d526b62"}, - {file = "pyarrow-20.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6bb830757103a6cb300a04610e08d9636f0cd223d32f388418ea893a3e655f1c"}, - {file = "pyarrow-20.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:96e37f0766ecb4514a899d9a3554fadda770fb57ddf42b63d80f14bc20aa7db3"}, - {file = "pyarrow-20.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:3346babb516f4b6fd790da99b98bed9708e3f02e734c84971faccb20736848dc"}, - {file = "pyarrow-20.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:75a51a5b0eef32727a247707d4755322cb970be7e935172b6a3a9f9ae98404ba"}, - {file = "pyarrow-20.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:211d5e84cecc640c7a3ab900f930aaff5cd2702177e0d562d426fb7c4f737781"}, - {file = "pyarrow-20.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ba3cf4182828be7a896cbd232aa8dd6a31bd1f9e32776cc3796c012855e1199"}, - {file = "pyarrow-20.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c3a01f313ffe27ac4126f4c2e5ea0f36a5fc6ab51f8726cf41fee4b256680bd"}, - {file = "pyarrow-20.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:a2791f69ad72addd33510fec7bb14ee06c2a448e06b649e264c094c5b5f7ce28"}, - {file = "pyarrow-20.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4250e28a22302ce8692d3a0e8ec9d9dde54ec00d237cff4dfa9c1fbf79e472a8"}, - {file = "pyarrow-20.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:89e030dc58fc760e4010148e6ff164d2f44441490280ef1e97a542375e41058e"}, - {file = "pyarrow-20.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6102b4864d77102dbbb72965618e204e550135a940c2534711d5ffa787df2a5a"}, - {file = "pyarrow-20.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:96d6a0a37d9c98be08f5ed6a10831d88d52cac7b13f5287f1e0f625a0de8062b"}, - {file = "pyarrow-20.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:a15532e77b94c61efadde86d10957950392999503b3616b2ffcef7621a002893"}, - {file = "pyarrow-20.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:dd43f58037443af715f34f1322c782ec463a3c8a94a85fdb2d987ceb5658e061"}, - {file = "pyarrow-20.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa0d288143a8585806e3cc7c39566407aab646fb9ece164609dac1cfff45f6ae"}, - {file = "pyarrow-20.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6953f0114f8d6f3d905d98e987d0924dabce59c3cda380bdfaa25a6201563b4"}, - {file = "pyarrow-20.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:991f85b48a8a5e839b2128590ce07611fae48a904cae6cab1f089c5955b57eb5"}, - {file = "pyarrow-20.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:97c8dc984ed09cb07d618d57d8d4b67a5100a30c3818c2fb0b04599f0da2de7b"}, - {file = "pyarrow-20.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9b71daf534f4745818f96c214dbc1e6124d7daf059167330b610fc69b6f3d3e3"}, - {file = "pyarrow-20.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e8b88758f9303fa5a83d6c90e176714b2fd3852e776fc2d7e42a22dd6c2fb368"}, - {file = "pyarrow-20.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:30b3051b7975801c1e1d387e17c588d8ab05ced9b1e14eec57915f79869b5031"}, - {file = "pyarrow-20.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ca151afa4f9b7bc45bcc791eb9a89e90a9eb2772767d0b1e5389609c7d03db63"}, - {file = "pyarrow-20.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:4680f01ecd86e0dd63e39eb5cd59ef9ff24a9d166db328679e36c108dc993d4c"}, - {file = "pyarrow-20.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f4c8534e2ff059765647aa69b75d6543f9fef59e2cd4c6d18015192565d2b70"}, - {file = "pyarrow-20.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e1f8a47f4b4ae4c69c4d702cfbdfe4d41e18e5c7ef6f1bb1c50918c1e81c57b"}, - {file = "pyarrow-20.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:a1f60dc14658efaa927f8214734f6a01a806d7690be4b3232ba526836d216122"}, - {file = "pyarrow-20.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:204a846dca751428991346976b914d6d2a82ae5b8316a6ed99789ebf976551e6"}, - {file = "pyarrow-20.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f3b117b922af5e4c6b9a9115825726cac7d8b1421c37c2b5e24fbacc8930612c"}, - {file = "pyarrow-20.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e724a3fd23ae5b9c010e7be857f4405ed5e679db5c93e66204db1a69f733936a"}, - {file = "pyarrow-20.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:82f1ee5133bd8f49d31be1299dc07f585136679666b502540db854968576faf9"}, - {file = "pyarrow-20.0.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:1bcbe471ef3349be7714261dea28fe280db574f9d0f77eeccc195a2d161fd861"}, - {file = "pyarrow-20.0.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:a18a14baef7d7ae49247e75641fd8bcbb39f44ed49a9fc4ec2f65d5031aa3b96"}, - {file = "pyarrow-20.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb497649e505dc36542d0e68eca1a3c94ecbe9799cb67b578b55f2441a247fbc"}, - {file = "pyarrow-20.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11529a2283cb1f6271d7c23e4a8f9f8b7fd173f7360776b668e509d712a02eec"}, - {file = "pyarrow-20.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:6fc1499ed3b4b57ee4e090e1cea6eb3584793fe3d1b4297bbf53f09b434991a5"}, - {file = "pyarrow-20.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:db53390eaf8a4dab4dbd6d93c85c5cf002db24902dbff0ca7d988beb5c9dd15b"}, - {file = "pyarrow-20.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:851c6a8260ad387caf82d2bbf54759130534723e37083111d4ed481cb253cc0d"}, - {file = "pyarrow-20.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e22f80b97a271f0a7d9cd07394a7d348f80d3ac63ed7cc38b6d1b696ab3b2619"}, - {file = "pyarrow-20.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:9965a050048ab02409fb7cbbefeedba04d3d67f2cc899eff505cc084345959ca"}, - {file = "pyarrow-20.0.0.tar.gz", hash = "sha256:febc4a913592573c8d5805091a6c2b5064c8bd6e002131f01061797d91c783c1"}, + {file = "pyarrow-21.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e563271e2c5ff4d4a4cbeb2c83d5cf0d4938b891518e676025f7268c6fe5fe26"}, + {file = "pyarrow-21.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fee33b0ca46f4c85443d6c450357101e47d53e6c3f008d658c27a2d020d44c79"}, + {file = "pyarrow-21.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:7be45519b830f7c24b21d630a31d48bcebfd5d4d7f9d3bdb49da9cdf6d764edb"}, + {file = "pyarrow-21.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:26bfd95f6bff443ceae63c65dc7e048670b7e98bc892210acba7e4995d3d4b51"}, + {file = "pyarrow-21.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bd04ec08f7f8bd113c55868bd3fc442a9db67c27af098c5f814a3091e71cc61a"}, + {file = "pyarrow-21.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9b0b14b49ac10654332a805aedfc0147fb3469cbf8ea951b3d040dab12372594"}, + {file = "pyarrow-21.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:9d9f8bcb4c3be7738add259738abdeddc363de1b80e3310e04067aa1ca596634"}, + {file = "pyarrow-21.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c077f48aab61738c237802836fc3844f85409a46015635198761b0d6a688f87b"}, + {file = "pyarrow-21.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:689f448066781856237eca8d1975b98cace19b8dd2ab6145bf49475478bcaa10"}, + {file = "pyarrow-21.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:479ee41399fcddc46159a551705b89c05f11e8b8cb8e968f7fec64f62d91985e"}, + {file = "pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:40ebfcb54a4f11bcde86bc586cbd0272bac0d516cfa539c799c2453768477569"}, + {file = "pyarrow-21.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8d58d8497814274d3d20214fbb24abcad2f7e351474357d552a8d53bce70c70e"}, + {file = "pyarrow-21.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:585e7224f21124dd57836b1530ac8f2df2afc43c861d7bf3d58a4870c42ae36c"}, + {file = "pyarrow-21.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:555ca6935b2cbca2c0e932bedd853e9bc523098c39636de9ad4693b5b1df86d6"}, + {file = "pyarrow-21.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3a302f0e0963db37e0a24a70c56cf91a4faa0bca51c23812279ca2e23481fccd"}, + {file = "pyarrow-21.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:b6b27cf01e243871390474a211a7922bfbe3bda21e39bc9160daf0da3fe48876"}, + {file = "pyarrow-21.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e72a8ec6b868e258a2cd2672d91f2860ad532d590ce94cdf7d5e7ec674ccf03d"}, + {file = "pyarrow-21.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b7ae0bbdc8c6674259b25bef5d2a1d6af5d39d7200c819cf99e07f7dfef1c51e"}, + {file = "pyarrow-21.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:58c30a1729f82d201627c173d91bd431db88ea74dcaa3885855bc6203e433b82"}, + {file = "pyarrow-21.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:072116f65604b822a7f22945a7a6e581cfa28e3454fdcc6939d4ff6090126623"}, + {file = "pyarrow-21.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:cf56ec8b0a5c8c9d7021d6fd754e688104f9ebebf1bf4449613c9531f5346a18"}, + {file = "pyarrow-21.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e99310a4ebd4479bcd1964dff9e14af33746300cb014aa4a3781738ac63baf4a"}, + {file = "pyarrow-21.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d2fe8e7f3ce329a71b7ddd7498b3cfac0eeb200c2789bd840234f0dc271a8efe"}, + {file = "pyarrow-21.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f522e5709379d72fb3da7785aa489ff0bb87448a9dc5a75f45763a795a089ebd"}, + {file = "pyarrow-21.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:69cbbdf0631396e9925e048cfa5bce4e8c3d3b41562bbd70c685a8eb53a91e61"}, + {file = "pyarrow-21.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:731c7022587006b755d0bdb27626a1a3bb004bb56b11fb30d98b6c1b4718579d"}, + {file = "pyarrow-21.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc56bc708f2d8ac71bd1dcb927e458c93cec10b98eb4120206a4091db7b67b99"}, + {file = "pyarrow-21.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:186aa00bca62139f75b7de8420f745f2af12941595bbbfa7ed3870ff63e25636"}, + {file = "pyarrow-21.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:a7a102574faa3f421141a64c10216e078df467ab9576684d5cd696952546e2da"}, + {file = "pyarrow-21.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:1e005378c4a2c6db3ada3ad4c217b381f6c886f0a80d6a316fe586b90f77efd7"}, + {file = "pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:65f8e85f79031449ec8706b74504a316805217b35b6099155dd7e227eef0d4b6"}, + {file = "pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:3a81486adc665c7eb1a2bde0224cfca6ceaba344a82a971ef059678417880eb8"}, + {file = "pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fc0d2f88b81dcf3ccf9a6ae17f89183762c8a94a5bdcfa09e05cfe413acf0503"}, + {file = "pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6299449adf89df38537837487a4f8d3bd91ec94354fdd2a7d30bc11c48ef6e79"}, + {file = "pyarrow-21.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:222c39e2c70113543982c6b34f3077962b44fca38c0bd9e68bb6781534425c10"}, + {file = "pyarrow-21.0.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:a7f6524e3747e35f80744537c78e7302cd41deee8baa668d56d55f77d9c464b3"}, + {file = "pyarrow-21.0.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:203003786c9fd253ebcafa44b03c06983c9c8d06c3145e37f1b76a1f317aeae1"}, + {file = "pyarrow-21.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3b4d97e297741796fead24867a8dabf86c87e4584ccc03167e4a811f50fdf74d"}, + {file = "pyarrow-21.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:898afce396b80fdda05e3086b4256f8677c671f7b1d27a6976fa011d3fd0a86e"}, + {file = "pyarrow-21.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:067c66ca29aaedae08218569a114e413b26e742171f526e828e1064fcdec13f4"}, + {file = "pyarrow-21.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0c4e75d13eb76295a49e0ea056eb18dbd87d81450bfeb8afa19a7e5a75ae2ad7"}, + {file = "pyarrow-21.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:cdc4c17afda4dab2a9c0b79148a43a7f4e1094916b3e18d8975bfd6d6d52241f"}, + {file = "pyarrow-21.0.0.tar.gz", hash = "sha256:5051f2dccf0e283ff56335760cbc8622cf52264d67e359d5569541ac11b6d5bc"}, ] [package.extras] @@ -5305,14 +5292,14 @@ files = [ [[package]] name = "ragas" -version = "0.2.15" +version = "0.3.0" description = "" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "ragas-0.2.15-py3-none-any.whl", hash = "sha256:298cd3d1fe3bd21ca4d31023a55079740d7bdd27a8c915bb371cec3c50cde608"}, - {file = "ragas-0.2.15.tar.gz", hash = "sha256:2d0cd77b315a9c9c02ceb0a19ca8a48e82e1d02416587a2944ea51e6e327cd7b"}, + {file = "ragas-0.3.0-py3-none-any.whl", hash = "sha256:9e7bdbcf79c939d695ec10dbb7d587971f489145e0f18d33f40df085116c1c79"}, + {file = "ragas-0.3.0.tar.gz", hash = "sha256:d45f174c665a544b5749d3b2034a207d79ff001fe57fc81f0cd425004b4906a2"}, ] [package.dependencies] @@ -5330,10 +5317,10 @@ pydantic = ">=2" tiktoken = "*" [package.extras] -all = ["datacompy", "llama_index", "nltk", "pandas", "r2r", "rapidfuzz", "rouge_score", "sentence-transformers", "transformers"] -dev = ["black[jupyter]", "datacompy", "fastembed", "graphene", "haystack-ai", "isort", "llama_index", "nltk", "notebook", "pandas", "pyright", "r2r", "rapidfuzz", "rich", "rouge_score", "ruff", "sacrebleu", "sentence-transformers", "sphinx-autobuild", "transformers"] +all = ["datacompy", "llama_index", "nltk", "pandas", "r2r", "ragas-experimental", "rapidfuzz", "rouge_score", "sentence-transformers", "transformers"] +dev = ["black[jupyter]", "datacompy", "fastembed", "graphene", "haystack-ai", "llama_index", "nbmake", "nltk", "notebook", "pandas", "pyright", "pytest", "pytest-asyncio", "pytest-xdist[psutil]", "r2r", "rapidfuzz", "rich", "rouge_score", "ruff", "sacrebleu", "sentence-transformers", "sphinx-autobuild", "transformers"] docs = ["mkdocs (>=1.6.1)", "mkdocs-autorefs", "mkdocs-gen-files", "mkdocs-git-committers-plugin-2", "mkdocs-git-revision-date-localized-plugin", "mkdocs-glightbox", "mkdocs-literate-nav", "mkdocs-material", "mkdocs-material[imaging]", "mkdocs-section-index", "mkdocstrings[python]"] -test = ["llama_index", "nbmake", "pytest", "pytest-asyncio", "pytest-xdist[psutil]"] +experimental = ["ragas-experimental"] [[package]] name = "rapidfuzz" @@ -7262,4 +7249,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" python-versions = "^3.12, <3.13" -content-hash = "65ef31e6b64908593eb4138843da98db613db46bf22b69f832eab728bcaabf39" +content-hash = "0b1cc7cfd0bfb7a9726dbc8be16774f9df3a33d309a8a7973b57ef7569b990d0" diff --git a/backend/pyproject.toml b/backend/pyproject.toml index b2c2621..93ec2fe 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -34,9 +34,9 @@ cryptography = "^44.0.2" pyjwt = "^2.10.1" openpyxl = "^3.1.5" celery = { extras = ["redis"], version = "^5.4.0" } -ragas = "^0.2.14" pydantic-settings = "^2.9.1" rapidfuzz = "^3.13.0" +ragas = "^0.3.0" [tool.poetry.group.dev.dependencies] psycopg2-binary = "^2.9.9"