From 660434b0466d23bcd9e9eb47df20aa4d8efc4744 Mon Sep 17 00:00:00 2001
From: Alister Lewis-Bowen <alister@lewis-bowen.org>
Date: Tue, 14 Apr 2026 08:59:02 -0400
Subject: [PATCH] Fix corrupt PDF crash and --global-index field parsing for
 auto-generated COLLECTION.md

- Wrap convert_publication() call in try/except so corrupt PDFs log a
  warning and are skipped rather than aborting the entire run
- Update --global-index regexes to match both hand-crafted bold format
  (**Period**, **Pages**) and auto-generated plain format (Date range,
  Total pages) so period/pages columns populate correctly for
  collections built with --write-collection-md

Fixes surfaced in ali5ter/electronics-publications-library commit f8f8403.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 convert.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/convert.py b/convert.py
index a043213..fbeb2fb 100644
--- a/convert.py
+++ b/convert.py
@@ -552,10 +552,10 @@ def write_global_index(collections_root: Path, output_path: Path) -> None:
             m = re.search(r"^# (.+)$", text, re.MULTILINE)
             if m:
                 name = m.group(1)
-            m = re.search(r"\|\s*\*\*Period\*\*\s*\|\s*(.+?)\s*\|", text)
+            m = re.search(r"\|\s*(?:\*\*Period\*\*|Date range)\s*\|\s*(.+?)\s*\|", text)
             if m:
                 period = m.group(1).strip()
-            m = re.search(r"\|\s*\*\*Pages\*\*\s*\|\s*(~?[\d,]+)\s*\|", text)
+            m = re.search(r"\|\s*(?:\*\*Pages\*\*|Total pages)\s*\|\s*(~?[\d,]+)\s*\|", text)
             if m:
                 pages = m.group(1).strip()
 
@@ -634,7 +634,11 @@ def main() -> None:
         base_slug, _ = parse_slug(pdf_path.name)
         resolved = slug_map[pdf_path]
         override = resolved if resolved != base_slug else None
-        info = convert_publication(pdf_path, args.output_dir, args.dpi, args.force, slug_override=override)
+        try:
+            info = convert_publication(pdf_path, args.output_dir, args.dpi, args.force, slug_override=override)
+        except Exception as exc:
+            print(f"  WARNING: skipping {pdf_path.name} — {exc}")
+            continue
         if info.get("slug"):
             write_publication_index(info, args.output_dir)
             all_pubs.append(info)