From 326ce8c466e121c41981cf6c3212b6ead374e2a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o?= Date: Thu, 28 Aug 2025 12:46:18 +0200 Subject: [PATCH 1/3] handle case with only one page --- src/refinedoc/refined_document.py | 12 +++++-- tests/test_refined_document.py | 54 +++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/src/refinedoc/refined_document.py b/src/refinedoc/refined_document.py index b095d92..c1e3418 100644 --- a/src/refinedoc/refined_document.py +++ b/src/refinedoc/refined_document.py @@ -43,8 +43,16 @@ def __init__(self, content: list[list[str]], ratio_speed: int = 1, win: int = 8) raise ValueError(f"Speed must be between 1 and 3: {ratio_speed}") self._processed_body: list[list[str]] = content # Initialize body field - self._processed_headers: list[list[str]] | None = None - self._processed_footers: list[list[str]] | None = None + + if len(content) == 1: + logger.warning( + "The content provided is empty. Headers and footers will be set to empty lists." + ) + self._processed_headers = [[]] + self._processed_footers = [[]] + else: + self._processed_headers: list[list[str]] | None = None + self._processed_footers: list[list[str]] | None = None self.win = win diff --git a/tests/test_refined_document.py b/tests/test_refined_document.py index 49f9cfc..a1b148a 100644 --- a/tests/test_refined_document.py +++ b/tests/test_refined_document.py @@ -404,3 +404,57 @@ def test_various_qty_lines2(self): self.assertListEqual(h_dr, h_ref) self.assertListEqual(f_dr, f_ref) self.assertListEqual(b_dr, b_ref) + + def test_empty_document(self): + document = [] + + rd = RefinedDocument(content=document) + + h_dr = rd.headers + f_dr = rd.footers + b_dr = rd.body + + h_ref = [] + + f_ref = [] + + b_ref = [] + + self.assertListEqual(h_dr, h_ref) + self.assertListEqual(f_dr, f_ref) + self.assertListEqual(b_dr, b_ref) + + def test_single_page_document(self): + document = [ + [ + "header 1", + "subheader 1", + "lorem ipsum dolor sit amet", + "consectetur adipiscing elit", + "footer 1", + ] + ] + + rd = RefinedDocument(content=document) + + h_dr = rd.headers + f_dr = rd.footers + b_dr = rd.body + + h_ref = [[]] + + f_ref = [[]] + + b_ref = [ + [ + "header 1", + "subheader 1", + "lorem ipsum dolor sit amet", + "consectetur adipiscing elit", + "footer 1", + ] + ] + + self.assertListEqual(h_dr, h_ref) + self.assertListEqual(f_dr, f_ref) + self.assertListEqual(b_dr, b_ref) From 49cacbfd75b2c737160692094723cd819073f02f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o?= Date: Thu, 28 Aug 2025 12:47:24 +0200 Subject: [PATCH 2/3] change version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 210fba6..41f2ef9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "refinedoc" -version = "1.0.0" +version = "1.0.1" authors = [ { name="Théo NARDIN", email="theo.nardin@learningplanetinstitute.org" }, ] From 2dcfb7da81e1395bb653aae7744f4cc6ec9597d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o?= <133012334+lpi-tn@users.noreply.github.com> Date: Thu, 28 Aug 2025 14:55:21 +0200 Subject: [PATCH 3/3] Update src/refinedoc/refined_document.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/refinedoc/refined_document.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/refinedoc/refined_document.py b/src/refinedoc/refined_document.py index c1e3418..d304ff0 100644 --- a/src/refinedoc/refined_document.py +++ b/src/refinedoc/refined_document.py @@ -44,9 +44,7 @@ def __init__(self, content: list[list[str]], ratio_speed: int = 1, win: int = 8) self._processed_body: list[list[str]] = content # Initialize body field - if len(content) == 1: - logger.warning( - "The content provided is empty. Headers and footers will be set to empty lists." + "The content provided has only one page. Headers and footers will be set to empty lists." ) self._processed_headers = [[]] self._processed_footers = [[]]