From a6a003a20acc1868f1ea2f4a49563f6ec3f2cd16 Mon Sep 17 00:00:00 2001 From: CRI USER Date: Wed, 23 Apr 2025 11:12:43 +0200 Subject: [PATCH 1/2] feat(tutor): handle no results --- src/app/api/api_v1/endpoints/tutor.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/app/api/api_v1/endpoints/tutor.py b/src/app/api/api_v1/endpoints/tutor.py index bbb0c99..b84c54c 100644 --- a/src/app/api/api_v1/endpoints/tutor.py +++ b/src/app/api/api_v1/endpoints/tutor.py @@ -5,6 +5,7 @@ from src.app.api.dependencies import get_settings from src.app.services.abst_chat import AbstractChat, ChatFactory +from src.app.services.exceptions import NoResultsError from src.app.services.search import SearchService from src.app.services.search_helpers import search_multi_inputs from src.app.services.tutor.models import ( @@ -93,14 +94,23 @@ async def tutor_search( inputs = [doc.summary for doc in themes_extracted.extracts] # type: ignore - search_results = await search_multi_inputs( - response=response, - inputs=inputs, - nb_results=5, - sdg_filter=None, - collections=None, - callback_function=sp.search, - ) + try: + search_results = await search_multi_inputs( + response=response, + inputs=inputs, + nb_results=5, + sdg_filter=None, + collections=None, + callback_function=sp.search, + ) + except NoResultsError as e: + response.status_code = 404 + logger.error(f"No results found: {e}") + return TutorSearchResponse( + extracts=themes_extracted.extracts, + nb_results=0, + documents=[], + ) if not search_results: return TutorSearchResponse( From cf0e223591281e005f6f43bfa6ccafc72dbe2457 Mon Sep 17 00:00:00 2001 From: CRI USER Date: Wed, 23 Apr 2025 15:38:00 +0200 Subject: [PATCH 2/2] adds tests --- src/app/api/api_v1/endpoints/tutor.py | 23 ++++++-------- src/app/services/tutor/utils.py | 20 ++++++++++++ src/app/tests/api/api_v1/test_search.py | 1 - src/app/tests/api/api_v1/test_tutor.py | 42 +++++++++++++++++++++++++ 4 files changed, 71 insertions(+), 15 deletions(-) create mode 100644 src/app/tests/api/api_v1/test_tutor.py diff --git a/src/app/api/api_v1/endpoints/tutor.py b/src/app/api/api_v1/endpoints/tutor.py index b84c54c..a2dd8a5 100644 --- a/src/app/api/api_v1/endpoints/tutor.py +++ b/src/app/api/api_v1/endpoints/tutor.py @@ -1,7 +1,6 @@ from typing import Annotated -from fastapi import APIRouter, File, Response, UploadFile -from pypdf import PdfReader +from fastapi import APIRouter, File, HTTPException, Response, UploadFile from src.app.api.dependencies import get_settings from src.app.services.abst_chat import AbstractChat, ChatFactory @@ -14,6 +13,7 @@ TutorSearchResponse, ) from src.app.services.tutor.tutor import tutor_manager +from src.app.services.tutor.utils import get_file_content from src.app.utils.logger import logger as utils_logger logger = utils_logger(__name__) @@ -42,19 +42,14 @@ async def tutor_search( response: Response, ): files_content: list[bytes] = [] + for file in files: - if ( - file.content_type == "application/pdf" - or file.content_type == "application/x-pdf" - ): - file_content = "" - reader = PdfReader(file.file) - for page in reader.pages: - file_content += page.extract_text() - files_content.append(file_content.encode("utf-8", errors="ignore")) - else: - file_content = await file.read() - files_content.append(file_content) + file_content = await get_file_content(file) + + if not file_content: + raise HTTPException(status_code=400, detail="added files are empty") + + files_content.append(file_content) doc_list_to_string = "Document {doc_nb}: {content}" diff --git a/src/app/services/tutor/utils.py b/src/app/services/tutor/utils.py index fc31ae4..abdf3f4 100644 --- a/src/app/services/tutor/utils.py +++ b/src/app/services/tutor/utils.py @@ -1,3 +1,6 @@ +from fastapi import UploadFile +from pypdf import PdfReader + from src.app.models.documents import Document @@ -34,3 +37,20 @@ def extract_doc_info(documents: list[Document]) -> list[dict]: } for doc in documents ] + + +async def get_file_content(file: UploadFile): + if ( + file.content_type == "application/pdf" + or file.content_type == "application/x-pdf" + ): + reader = PdfReader(file.file) + pages_content = "" + for page in reader.pages: + pages_content += page.extract_text() + + file_content = pages_content.encode("utf-8", errors="ignore") + else: + file_content = await file.read() + + return file_content diff --git a/src/app/tests/api/api_v1/test_search.py b/src/app/tests/api/api_v1/test_search.py index 8b6c55f..e70173c 100644 --- a/src/app/tests/api/api_v1/test_search.py +++ b/src/app/tests/api/api_v1/test_search.py @@ -265,7 +265,6 @@ async def test_search_all_slices_ok(self, *mocks): }, headers={"X-API-Key": "test"}, ) - print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>", response.json()) self.assertEqual(response.status_code, 200) diff --git a/src/app/tests/api/api_v1/test_tutor.py b/src/app/tests/api/api_v1/test_tutor.py new file mode 100644 index 0000000..531d03f --- /dev/null +++ b/src/app/tests/api/api_v1/test_tutor.py @@ -0,0 +1,42 @@ +import io +from unittest import IsolatedAsyncioTestCase, mock + +from fastapi.testclient import TestClient + +from src.app.core.config import settings +from src.main import app + +client = TestClient(app) + + +@mock.patch("src.app.services.sql_db.session_maker") +@mock.patch( + "src.app.services.security.check_api_key", new=mock.MagicMock(return_value=True) +) +class TutorTests(IsolatedAsyncioTestCase): + def test_tutor_no_files(self, *mocks): + + reponse = client.post( + f"{settings.API_V1_STR}/tutor/search", + files={}, + headers={"x-API-Key": "test"}, + ) + assert reponse.status_code == 422 + + def test_tutor_empty_file(self, *mocks): + file = io.BytesIO(b"") + reponse = client.post( + f"{settings.API_V1_STR}/tutor/search", + files={"files": ("test.txt", file)}, + headers={"x-API-Key": "test"}, + ) + assert reponse.status_code == 400 + + def test_tutor_file(self, *mocks): + file = io.BytesIO(b"this is a test file") + reponse = client.post( + f"{settings.API_V1_STR}/tutor/search", + files={"files": ("test.txt", file)}, + headers={"x-API-Key": "test"}, + ) + assert reponse.status_code == 200