Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 27 additions & 22 deletions src/app/api/api_v1/endpoints/tutor.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from typing import Annotated

from fastapi import APIRouter, File, Response, UploadFile
from pypdf import PdfReader
from fastapi import APIRouter, File, HTTPException, Response, UploadFile

from src.app.api.dependencies import get_settings
from src.app.services.abst_chat import AbstractChat, ChatFactory
from src.app.services.exceptions import NoResultsError
from src.app.services.search import SearchService
from src.app.services.search_helpers import search_multi_inputs
from src.app.services.tutor.models import (
Expand All @@ -13,6 +13,7 @@
TutorSearchResponse,
)
from src.app.services.tutor.tutor import tutor_manager
from src.app.services.tutor.utils import get_file_content
from src.app.utils.logger import logger as utils_logger

logger = utils_logger(__name__)
Expand Down Expand Up @@ -41,19 +42,14 @@ async def tutor_search(
response: Response,
):
files_content: list[bytes] = []

for file in files:
if (
file.content_type == "application/pdf"
or file.content_type == "application/x-pdf"
):
file_content = ""
reader = PdfReader(file.file)
for page in reader.pages:
file_content += page.extract_text()
files_content.append(file_content.encode("utf-8", errors="ignore"))
else:
file_content = await file.read()
files_content.append(file_content)
file_content = await get_file_content(file)

if not file_content:
raise HTTPException(status_code=400, detail="added files are empty")

files_content.append(file_content)

doc_list_to_string = "Document {doc_nb}: {content}"

Expand Down Expand Up @@ -93,14 +89,23 @@ async def tutor_search(

inputs = [doc.summary for doc in themes_extracted.extracts] # type: ignore

search_results = await search_multi_inputs(
response=response,
inputs=inputs,
nb_results=5,
sdg_filter=None,
collections=None,
callback_function=sp.search,
)
try:
search_results = await search_multi_inputs(
response=response,
inputs=inputs,
nb_results=5,
sdg_filter=None,
collections=None,
callback_function=sp.search,
)
except NoResultsError as e:
response.status_code = 404
logger.error(f"No results found: {e}")
return TutorSearchResponse(
extracts=themes_extracted.extracts,
nb_results=0,
documents=[],
)

if not search_results:
return TutorSearchResponse(
Expand Down
20 changes: 20 additions & 0 deletions src/app/services/tutor/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from fastapi import UploadFile
from pypdf import PdfReader

from src.app.models.documents import Document


Expand Down Expand Up @@ -34,3 +37,20 @@ def extract_doc_info(documents: list[Document]) -> list[dict]:
}
for doc in documents
]


async def get_file_content(file: UploadFile):
if (
file.content_type == "application/pdf"
or file.content_type == "application/x-pdf"
):
reader = PdfReader(file.file)
pages_content = ""
for page in reader.pages:
pages_content += page.extract_text()

file_content = pages_content.encode("utf-8", errors="ignore")
else:
file_content = await file.read()

return file_content
1 change: 0 additions & 1 deletion src/app/tests/api/api_v1/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,6 @@ async def test_search_all_slices_ok(self, *mocks):
},
headers={"X-API-Key": "test"},
)
print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>", response.json())

self.assertEqual(response.status_code, 200)

Expand Down
42 changes: 42 additions & 0 deletions src/app/tests/api/api_v1/test_tutor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import io
from unittest import IsolatedAsyncioTestCase, mock

from fastapi.testclient import TestClient

from src.app.core.config import settings
from src.main import app

client = TestClient(app)


@mock.patch("src.app.services.sql_db.session_maker")
@mock.patch(
"src.app.services.security.check_api_key", new=mock.MagicMock(return_value=True)
)
class TutorTests(IsolatedAsyncioTestCase):
def test_tutor_no_files(self, *mocks):

reponse = client.post(
f"{settings.API_V1_STR}/tutor/search",
files={},
headers={"x-API-Key": "test"},
)
assert reponse.status_code == 422

def test_tutor_empty_file(self, *mocks):
file = io.BytesIO(b"")
reponse = client.post(
f"{settings.API_V1_STR}/tutor/search",
files={"files": ("test.txt", file)},
headers={"x-API-Key": "test"},
)
assert reponse.status_code == 400

def test_tutor_file(self, *mocks):
file = io.BytesIO(b"this is a test file")
reponse = client.post(
f"{settings.API_V1_STR}/tutor/search",
files={"files": ("test.txt", file)},
headers={"x-API-Key": "test"},
)
assert reponse.status_code == 200