From 660434b0466d23bcd9e9eb47df20aa4d8efc4744 Mon Sep 17 00:00:00 2001 From: Alister Lewis-Bowen Date: Tue, 14 Apr 2026 08:59:02 -0400 Subject: [PATCH] Fix corrupt PDF crash and --global-index field parsing for auto-generated COLLECTION.md - Wrap convert_publication() call in try/except so corrupt PDFs log a warning and are skipped rather than aborting the entire run - Update --global-index regexes to match both hand-crafted bold format (**Period**, **Pages**) and auto-generated plain format (Date range, Total pages) so period/pages columns populate correctly for collections built with --write-collection-md Fixes surfaced in ali5ter/electronics-publications-library commit f8f8403. Co-Authored-By: Claude Sonnet 4.6 --- convert.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/convert.py b/convert.py index a043213..fbeb2fb 100644 --- a/convert.py +++ b/convert.py @@ -552,10 +552,10 @@ def write_global_index(collections_root: Path, output_path: Path) -> None: m = re.search(r"^# (.+)$", text, re.MULTILINE) if m: name = m.group(1) - m = re.search(r"\|\s*\*\*Period\*\*\s*\|\s*(.+?)\s*\|", text) + m = re.search(r"\|\s*(?:\*\*Period\*\*|Date range)\s*\|\s*(.+?)\s*\|", text) if m: period = m.group(1).strip() - m = re.search(r"\|\s*\*\*Pages\*\*\s*\|\s*(~?[\d,]+)\s*\|", text) + m = re.search(r"\|\s*(?:\*\*Pages\*\*|Total pages)\s*\|\s*(~?[\d,]+)\s*\|", text) if m: pages = m.group(1).strip() @@ -634,7 +634,11 @@ def main() -> None: base_slug, _ = parse_slug(pdf_path.name) resolved = slug_map[pdf_path] override = resolved if resolved != base_slug else None - info = convert_publication(pdf_path, args.output_dir, args.dpi, args.force, slug_override=override) + try: + info = convert_publication(pdf_path, args.output_dir, args.dpi, args.force, slug_override=override) + except Exception as exc: + print(f" WARNING: skipping {pdf_path.name} — {exc}") + continue if info.get("slug"): write_publication_index(info, args.output_dir) all_pubs.append(info)