From d204bd3b61033ad2956085304b79c3241c3a71c0 Mon Sep 17 00:00:00 2001 From: fabgpt-coder <276921870+fabgpt-coder@users.noreply.github.com> Date: Sat, 6 Jun 2026 21:34:45 +0200 Subject: [PATCH] Guard against empty input in chunk_markdown() The function currently proceeds to split an empty string into sections, which results in a single empty chunk that is then filtered out by the `min_chars` check. This triggers unnecessary regex compilation and list processing for no gain. Adding an early return for empty input improves performance and prevents the creation of transient empty state. --- agent/rag/chunker.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/agent/rag/chunker.py b/agent/rag/chunker.py index 75c524f..36d5020 100644 --- a/agent/rag/chunker.py +++ b/agent/rag/chunker.py @@ -75,6 +75,9 @@ def chunk_markdown( min_chars: int = 200, ) -> list[Chunk]: """Heading-aware chunker. Returns chunks in document order.""" + if not text or not text.strip(): + return [] + sections = _split_sections(text) chunks: list[Chunk] = [] stack: list[str] = [""] * 7 # index by heading level