diff --git a/src/app/api/api_v1/endpoints/tutor.py b/src/app/api/api_v1/endpoints/tutor.py index bbb0c99..a2dd8a5 100644 --- a/src/app/api/api_v1/endpoints/tutor.py +++ b/src/app/api/api_v1/endpoints/tutor.py @@ -1,10 +1,10 @@ from typing import Annotated -from fastapi import APIRouter, File, Response, UploadFile -from pypdf import PdfReader +from fastapi import APIRouter, File, HTTPException, Response, UploadFile from src.app.api.dependencies import get_settings from src.app.services.abst_chat import AbstractChat, ChatFactory +from src.app.services.exceptions import NoResultsError from src.app.services.search import SearchService from src.app.services.search_helpers import search_multi_inputs from src.app.services.tutor.models import ( @@ -13,6 +13,7 @@ TutorSearchResponse, ) from src.app.services.tutor.tutor import tutor_manager +from src.app.services.tutor.utils import get_file_content from src.app.utils.logger import logger as utils_logger logger = utils_logger(__name__) @@ -41,19 +42,14 @@ async def tutor_search( response: Response, ): files_content: list[bytes] = [] + for file in files: - if ( - file.content_type == "application/pdf" - or file.content_type == "application/x-pdf" - ): - file_content = "" - reader = PdfReader(file.file) - for page in reader.pages: - file_content += page.extract_text() - files_content.append(file_content.encode("utf-8", errors="ignore")) - else: - file_content = await file.read() - files_content.append(file_content) + file_content = await get_file_content(file) + + if not file_content: + raise HTTPException(status_code=400, detail="added files are empty") + + files_content.append(file_content) doc_list_to_string = "Document {doc_nb}: {content}" @@ -93,14 +89,23 @@ async def tutor_search( inputs = [doc.summary for doc in themes_extracted.extracts] # type: ignore - search_results = await search_multi_inputs( - response=response, - inputs=inputs, - nb_results=5, - sdg_filter=None, - collections=None, - callback_function=sp.search, - ) + try: + search_results = await search_multi_inputs( + response=response, + inputs=inputs, + nb_results=5, + sdg_filter=None, + collections=None, + callback_function=sp.search, + ) + except NoResultsError as e: + response.status_code = 404 + logger.error(f"No results found: {e}") + return TutorSearchResponse( + extracts=themes_extracted.extracts, + nb_results=0, + documents=[], + ) if not search_results: return TutorSearchResponse( diff --git a/src/app/services/tutor/utils.py b/src/app/services/tutor/utils.py index fc31ae4..abdf3f4 100644 --- a/src/app/services/tutor/utils.py +++ b/src/app/services/tutor/utils.py @@ -1,3 +1,6 @@ +from fastapi import UploadFile +from pypdf import PdfReader + from src.app.models.documents import Document @@ -34,3 +37,20 @@ def extract_doc_info(documents: list[Document]) -> list[dict]: } for doc in documents ] + + +async def get_file_content(file: UploadFile): + if ( + file.content_type == "application/pdf" + or file.content_type == "application/x-pdf" + ): + reader = PdfReader(file.file) + pages_content = "" + for page in reader.pages: + pages_content += page.extract_text() + + file_content = pages_content.encode("utf-8", errors="ignore") + else: + file_content = await file.read() + + return file_content diff --git a/src/app/tests/api/api_v1/test_search.py b/src/app/tests/api/api_v1/test_search.py index 8b6c55f..e70173c 100644 --- a/src/app/tests/api/api_v1/test_search.py +++ b/src/app/tests/api/api_v1/test_search.py @@ -265,7 +265,6 @@ async def test_search_all_slices_ok(self, *mocks): }, headers={"X-API-Key": "test"}, ) - print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>", response.json()) self.assertEqual(response.status_code, 200) diff --git a/src/app/tests/api/api_v1/test_tutor.py b/src/app/tests/api/api_v1/test_tutor.py new file mode 100644 index 0000000..531d03f --- /dev/null +++ b/src/app/tests/api/api_v1/test_tutor.py @@ -0,0 +1,42 @@ +import io +from unittest import IsolatedAsyncioTestCase, mock + +from fastapi.testclient import TestClient + +from src.app.core.config import settings +from src.main import app + +client = TestClient(app) + + +@mock.patch("src.app.services.sql_db.session_maker") +@mock.patch( + "src.app.services.security.check_api_key", new=mock.MagicMock(return_value=True) +) +class TutorTests(IsolatedAsyncioTestCase): + def test_tutor_no_files(self, *mocks): + + reponse = client.post( + f"{settings.API_V1_STR}/tutor/search", + files={}, + headers={"x-API-Key": "test"}, + ) + assert reponse.status_code == 422 + + def test_tutor_empty_file(self, *mocks): + file = io.BytesIO(b"") + reponse = client.post( + f"{settings.API_V1_STR}/tutor/search", + files={"files": ("test.txt", file)}, + headers={"x-API-Key": "test"}, + ) + assert reponse.status_code == 400 + + def test_tutor_file(self, *mocks): + file = io.BytesIO(b"this is a test file") + reponse = client.post( + f"{settings.API_V1_STR}/tutor/search", + files={"files": ("test.txt", file)}, + headers={"x-API-Key": "test"}, + ) + assert reponse.status_code == 200