From 3720f15408e4f0cdacdf6255915e1eeaa57ac949 Mon Sep 17 00:00:00 2001
From: Xitee1 <59659167+Xitee1@users.noreply.github.com>
Date: Tue, 12 May 2026 18:18:59 +0200
Subject: [PATCH 1/7] feat(create): generation-aware naming + new volume label
 format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 2 of incremental-archives. Internal dar archive name is now
<name>-gen<N> (every new full is Gen 1; Phase 3 derives higher N from
--base). Volume labels switch to <truncated_name>_G<NN>_<NNNN> — the
gen suffix lives in the label, the human-meaningful name truncates to
23 chars if longer. Filenames inside the ISO keep the full name.

Pre-Phase-2 (legacy) Gen 1 archives are unaffected: their old labels
and naming stay on the burned discs. New archives produced from this
phase onward carry the new scheme.

Why the truncation tradeoff: physically distinguishing Gen 1 Disc 1
from Gen 2 Disc 1 of the same chain is more useful than seeing the
last few characters of an already-known archive name. The archive
name acts as the chain identity (see project README, updated in a
later phase), which discipline the user enforces by keeping `-n`
constant across generations.

README on disc gains a Generation line and a CHAIN: hint explaining
the name-consistency rule.

Manual e2e verified: phase2test_G01_0001 / phase2test_G01_0002 labels
on a 2-disc set; phase2test-gen1.NNNN.dar slices on the discs via UDF;
phase2test-gen1-catalog.0001.dar persisted to output_dir.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/bd_archive/archive/config.py  | 19 +++++++++++++++--
 src/bd_archive/commands/create.py | 35 ++++++++++++++++++++++---------
 src/bd_archive/constants.py       |  8 +++++--
 3 files changed, 48 insertions(+), 14 deletions(-)
diff --git a/src/bd_archive/archive/config.py b/src/bd_archive/archive/config.py
index 7a63232..d245c31 100644
--- a/src/bd_archive/archive/config.py
+++ b/src/bd_archive/archive/config.py
@@ -12,23 +12,38 @@ class ArchiveConfig:
     redundancy: int
     compression: str
     comp_level: str | None
+    generation: int = 1
 
     @property
     def comp_str(self) -> str:
         return self.compression + (f" ({self.comp_level})" if self.comp_level else "")
 
+    @property
+    def dar_name(self) -> str:
+        """Internal dar archive name including generation suffix.
+
+        File naming uses `<name>-gen<N>` so slices from different
+        generations of the same chain coexist in one staging dir during
+        extract. The user-facing `name` (from `-n`) is the chain
+        identity — see project README for the rule that name must stay
+        identical across all generations of one chain.
+        """
+        return f"{self.name}-gen{self.generation}"
+
 
 def write_readme(
     readme_path: Path, cfg: ArchiveConfig, disc_num: int, total_discs: int, slice_name: str
 ):
     ts = datetime.now().strftime("%Y-%m-%d %H:%M")
     readme_path.write_text(
-        f"BD-ARCHIVE | {cfg.name} | Disc {disc_num}/{total_discs}"
+        f"BD-ARCHIVE | {cfg.name} | Gen {cfg.generation} | Disc {disc_num}/{total_discs}"
         f" | {ts} | Capacity {human_bytes(cfg.disc_bytes)}"
         f" | PAR2 {cfg.redundancy}% | {cfg.comp_str}\n\n"
-        f"RESTORE:  dar -x {cfg.name} -R /target\n"
+        f"RESTORE:  dar -x {cfg.dar_name} -R /target\n"
         f"VERIFY:   sha512sum -c {slice_name}.sha512\n"
         f"          par2 verify {slice_name}.par2\n"
         f"REPAIR:   par2 repair {slice_name}.par2\n"
         f"DEPENDS:  pacman -S dar par2cmdline  |  apt install dar par2\n"
+        f"\nCHAIN:    Name '{cfg.name}' identifies this archive chain.\n"
+        f"          Future incremental generations must use the same name.\n"
     )
diff --git a/src/bd_archive/commands/create.py b/src/bd_archive/commands/create.py
index 8127932..2a5a61a 100644
--- a/src/bd_archive/commands/create.py
+++ b/src/bd_archive/commands/create.py
@@ -11,6 +11,7 @@
 from bd_archive.archive.source_scan import scan_source
 from bd_archive.constants import (
     DISC_END_MARGIN,
+    ISO9660_LABEL_NAME_MAX,
     ISO9660_VOLUME_LABEL_MAX,
     PAR2_AND_MISC_OVERHEAD,
 )
@@ -26,14 +27,23 @@
 def cmd_create(args):
     check_deps("dar", "par2", "mkisofs", "dvd+rw-mediainfo")
 
-    max_name_len = ISO9660_VOLUME_LABEL_MAX - 5  # "_NNNN" suffix
-    if len(args.name) > max_name_len:
+    # Hard cap matches the pre-Phase-2 label format (32 - 5) so existing
+    # archive names that lived right up against the old limit still work.
+    # Names longer than ISO9660_LABEL_NAME_MAX (23) get truncated in the
+    # volume label only; filenames inside the ISO keep the full name.
+    legacy_max_name_len = ISO9660_VOLUME_LABEL_MAX - 5
+    if len(args.name) > legacy_max_name_len:
         log.error(
             f"--name '{args.name}' is {len(args.name)} chars; "
-            f"max {max_name_len} (ISO9660 volume label limit "
-            f"{ISO9660_VOLUME_LABEL_MAX} minus 5-char disc suffix)"
+            f"max {legacy_max_name_len}"
         )
         sys.exit(1)
+    if len(args.name) > ISO9660_LABEL_NAME_MAX:
+        log.warn(
+            f"--name '{args.name}' is {len(args.name)} chars; "
+            f"volume labels will be truncated to {ISO9660_LABEL_NAME_MAX} chars "
+            f"('{args.name[:ISO9660_LABEL_NAME_MAX]}'). Filenames on disc keep the full name."
+        )
 
     source = Path(args.source).resolve()
     if not source.is_dir():
@@ -109,12 +119,15 @@ def cmd_create(args):
     last_disc_free_raw = int(last_disc_free / max(ratio, 0.001))
 
     par2_est = slice_bytes * args.redundancy // 100
+    # Phase 2: every archive starts as Gen 1. Phase 3 lets `--base`
+    # derive higher generation numbers from a predecessor catalog.
     cfg = ArchiveConfig(
         name=args.name,
         disc_bytes=raw_capacity,
         redundancy=args.redundancy,
         compression=args.compression,
         comp_level=args.level,
+        generation=1,
     )
 
     log.step("Source")
@@ -156,7 +169,7 @@ def cmd_create(args):
             output_dir.rmdir()
         sys.exit(0)
 
-    dar_archive = DarArchive(cfg.name, work_dir)
+    dar_archive = DarArchive(cfg.dar_name, work_dir)
     tmp_dir = dar_archive.tmp_dir
 
     # ── Create dar archive ──────────────────────────────────────────────
@@ -238,8 +251,10 @@ def cmd_create(args):
         sources.extend(par2_files)
         sources.append(readme_path)
 
-        # Build ISO directly from in-place files (no staging copies)
-        volume_label = f"{cfg.name}_{i:04d}"
+        # Build ISO directly from in-place files (no staging copies).
+        # Label is "<truncated_name>_G<NN>_<NNNN>" — name truncated to
+        # ISO9660_LABEL_NAME_MAX (23) so gen + disc suffix always fit.
+        volume_label = f"{cfg.name[:ISO9660_LABEL_NAME_MAX]}_G{cfg.generation:02d}_{i:04d}"
         iso_path = images_dir / f"disc_{i:04d}.iso"
         log.info(f"  building {iso_path.name}...")
         mkisofs.build(iso_path, sources, volume_label, publisher)
@@ -281,9 +296,9 @@ def cmd_create(args):
         cat_hash = Path(str(cat) + ".sha512")
         if cat_hash.exists():
             shutil.copy2(cat_hash, output_dir / cat_hash.name)
-    catalog_persisted = sorted(output_dir.glob(f"{cfg.name}-catalog.*.dar"))
+    catalog_persisted = sorted(output_dir.glob(f"{cfg.dar_name}-catalog.*.dar"))
     if catalog_persisted:
-        log.info(f"Catalog persisted: {catalog_persisted[0].parent}/{cfg.name}-catalog.*.dar")
+        log.info(f"Catalog persisted: {catalog_persisted[0].parent}/{cfg.dar_name}-catalog.*.dar")
 
     # Final cleanup: drop the entire tmp/ tree (catalog, dar internals).
     # If workdir is the default hidden one, also remove it — the only
@@ -305,6 +320,6 @@ def cmd_create(args):
     print(f"  PAR2:         {cfg.redundancy}% per disc")
     print(f"  Compression:  {cfg.comp_str}")
     print(f"  Images:       {images_dir}")
-    print(f"  Catalog:      {output_dir}/{cfg.name}-catalog.*.dar")
+    print(f"  Catalog:      {output_dir}/{cfg.dar_name}-catalog.*.dar")
     print(f"\n  Next step:    bd-archive burn -i {output_dir}")
     print(f"  Cleanup:      rm -rf {output_dir}\n")
diff --git a/src/bd_archive/constants.py b/src/bd_archive/constants.py
index 05e33a2..6f36c33 100644
--- a/src/bd_archive/constants.py
+++ b/src/bd_archive/constants.py
@@ -25,9 +25,13 @@
 
 # ISO9660 caps the Primary Volume Descriptor's Volume Identifier at 32
 # bytes. mkisofs/growisofs reject longer labels outright. Volume labels
-# here are "<archive_name>_NNNN", so archive_name must leave room for
-# the 5-char disc suffix.
+# here are "<archive_name>_G<NN>_<NNNN>" — 9 fixed chars for the gen +
+# disc suffixes, leaving 23 chars for the (possibly truncated) name.
+# Filenames *inside* the ISO keep the untruncated archive name, so the
+# label is purely a human hint, not a technical identifier.
 ISO9660_VOLUME_LABEL_MAX = 32
+ISO9660_LABEL_SUFFIX_LEN = 9  # "_G<NN>_<NNNN>"
+ISO9660_LABEL_NAME_MAX = ISO9660_VOLUME_LABEL_MAX - ISO9660_LABEL_SUFFIX_LEN  # 23
 
 # PAR2 recovery volumes are named "<base>.volNNN+NN.par2"; the index file
 # is plain "<base>.par2". This pattern matches recovery volumes only.

From aeebee871f0092c4adf33034c81de008120610f8 Mon Sep 17 00:00:00 2001
From: Xitee1 <59659167+Xitee1@users.noreply.github.com>
Date: Tue, 12 May 2026 18:23:39 +0200
Subject: [PATCH 2/7] feat(create): incremental archives via --base
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 3 of incremental-archives. New `--base <catalog.dar>` flag on
`bd-archive create` makes the run produce an incremental archive
against the supplied isolated catalog. dar's `-A` flag does the
actual work; this commit wires it up end-to-end.

The base catalog filename encodes the predecessor generation
(`<name>-gen<N>-catalog.NNNN.dar`), so the new gen number is derived
without any sidecar metadata file. Legacy catalogs (pre-Phase-2,
filename `<name>-catalog.NNNN.dar`) are treated as Gen 1; the new
gen becomes Gen 2.

The pre-archive preview is now base-aware: when --base is given, the
estimated archive size reflects only files that are new or modified
since the base catalog (tools.dar.list_catalog_paths parses `dar -l`
output; mtime > catalog-mtime catches modifications heuristically).
Disc-count and last-disc-fill estimates use this delta, not the full
source — without this, an incremental's preview would massively
overstate.

Chain identity is the archive name: --base whose embedded archive
name disagrees with -n fails with a clear error pointing at the
mismatch. Same name across generations is the user's discipline.

archive/dar_archive.py gains parse_dar_filename(), a single regex
that handles both Phase-2+ generational filenames and legacy ones.
Used here for --base validation and reusable by Phase 5's chain
detection in extract.

Manual e2e: Gen 1 full of 50 MiB source → 2 discs. Adding 15 MiB
of new files and running Gen 2 with --base produced a single-disc
incremental containing only the delta (phase3test-gen2.0001.dar of
15 MiB, plus its catalog).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/bd_archive/archive/dar_archive.py | 39 +++++++++++-
 src/bd_archive/archive/source_scan.py | 25 ++++++++
 src/bd_archive/cli.py                 |  9 +++
 src/bd_archive/commands/create.py     | 85 ++++++++++++++++++++++++---
 src/bd_archive/tools/dar.py           | 33 +++++++++++
 5 files changed, 182 insertions(+), 9 deletions(-)

diff --git a/src/bd_archive/archive/dar_archive.py b/src/bd_archive/archive/dar_archive.py
index c460422..e5a0c09 100644
--- a/src/bd_archive/archive/dar_archive.py
+++ b/src/bd_archive/archive/dar_archive.py
@@ -1,7 +1,37 @@
+import re
 from pathlib import Path
 
 from bd_archive.tools import dar
 
+# Matches both Phase-2+ generational filenames and legacy ones:
+#   photos-gen3.0001.dar          → ('photos', 3, False)
+#   photos-gen3-catalog.0001.dar  → ('photos', 3, True)
+#   photos.0001.dar               → ('photos', 1, False)  [legacy]
+#   photos-catalog.0001.dar       → ('photos', 1, True)   [legacy]
+# The non-greedy archive-name group keeps `-gen<N>` and `-catalog`
+# detection deterministic when the archive name itself contains
+# hyphens.
+_DAR_FILENAME_RE = re.compile(
+    r"^(?P<name>.+?)(?:-gen(?P<gen>\d+))?(?P<catalog>-catalog)?\.\d+\.dar$"
+)
+
+
+def parse_dar_filename(filename: str) -> tuple[str, int, bool] | None:
+    """Parse a dar slice or catalog filename.
+
+    Returns ``(archive_name, generation, is_catalog)`` or ``None`` if the
+    name does not look like a dar slice/catalog file. Generation
+    defaults to 1 for legacy (pre-Phase-2) filenames that lack the
+    ``-gen<N>`` segment.
+    """
+    m = _DAR_FILENAME_RE.match(filename)
+    if not m:
+        return None
+    name = m.group("name")
+    gen = int(m.group("gen")) if m.group("gen") else 1
+    is_catalog = m.group("catalog") is not None
+    return name, gen, is_catalog
+
 
 class DarArchive:
     def __init__(self, name: str, work_dir: Path):
@@ -27,9 +57,16 @@ def create(
         compression: str,
         comp_level: str | None,
         par2_hook: str | None = None,
+        ref_catalog: Path | None = None,
     ):
         dar.create_sliced(
-            self.base_path, source, slice_bytes, compression, comp_level, execute_hook=par2_hook
+            self.base_path,
+            source,
+            slice_bytes,
+            compression,
+            comp_level,
+            execute_hook=par2_hook,
+            ref_catalog=ref_catalog,
         )
 
     def isolate_catalog(self):
diff --git a/src/bd_archive/archive/source_scan.py b/src/bd_archive/archive/source_scan.py
index 814260a..7f3ec29 100644
--- a/src/bd_archive/archive/source_scan.py
+++ b/src/bd_archive/archive/source_scan.py
@@ -31,3 +31,28 @@ def scan_source(source: Path) -> SourceScan:
         except (OSError, ValueError):
             catalog += PER_ENTRY + 256
     return SourceScan(total_bytes=total, entry_count=count, catalog_est=catalog)
+
+
+def scan_delta_bytes(source: Path, known_paths: set[str], base_mtime: float) -> int:
+    """Sum sizes of files that are either new or modified vs. a base catalog.
+
+    Approximates the data payload size of an incremental archive for
+    preview purposes. A file is counted when either its relative path
+    is not in known_paths (truly new) or its mtime exceeds base_mtime
+    (likely modified since base). mtime is a heuristic — dar's actual
+    diff uses ctime/size/hash and may include or exclude slightly
+    different files; the estimate is good enough for disc-count
+    planning.
+    """
+    total = 0
+    for p in source.rglob("*"):
+        try:
+            if not p.is_file() or p.is_symlink():
+                continue
+            rel = p.relative_to(source).as_posix()
+            st = p.stat()
+            if rel not in known_paths or st.st_mtime > base_mtime:
+                total += st.st_size
+        except (OSError, ValueError):
+            pass
+    return total
diff --git a/src/bd_archive/cli.py b/src/bd_archive/cli.py
index c6ab118..d0de7a4 100644
--- a/src/bd_archive/cli.py
+++ b/src/bd_archive/cli.py
@@ -58,6 +58,15 @@ def build_parser() -> argparse.ArgumentParser:
         help="Compression algorithm (default: zstd)",
     )
     cr.add_argument("-l", "--level", help="Compression level")
+    cr.add_argument(
+        "--base",
+        default=None,
+        help="Path to the isolated catalog of a previous generation "
+        "(e.g. <prev-output>/<name>-gen<N>-catalog.0001.dar). When set, "
+        "this run produces an incremental archive (Gen N+1) containing "
+        "only files new or changed since that catalog. Archive name "
+        "(-n) must match the predecessor — chain identity is the name.",
+    )
     ratio_group = cr.add_mutually_exclusive_group()
     ratio_group.add_argument(
         "--ratio",
diff --git a/src/bd_archive/commands/create.py b/src/bd_archive/commands/create.py
index 2a5a61a..340dffc 100644
--- a/src/bd_archive/commands/create.py
+++ b/src/bd_archive/commands/create.py
@@ -1,4 +1,5 @@
 import contextlib
+import re
 import shlex
 import shutil
 import sys
@@ -6,9 +7,9 @@
 
 from bd_archive import __version__
 from bd_archive.archive.config import ArchiveConfig, write_readme
-from bd_archive.archive.dar_archive import DarArchive
+from bd_archive.archive.dar_archive import DarArchive, parse_dar_filename
 from bd_archive.archive.sizing import compute_slice_bytes, measure_compression_ratio
-from bd_archive.archive.source_scan import scan_source
+from bd_archive.archive.source_scan import scan_delta_bytes, scan_source
 from bd_archive.constants import (
     DISC_END_MARGIN,
     ISO9660_LABEL_NAME_MAX,
@@ -18,11 +19,47 @@
 from bd_archive.shell.deps import check_deps
 from bd_archive.shell.format import human_bytes
 from bd_archive.tools import mkisofs
+from bd_archive.tools.dar import list_catalog_paths
 from bd_archive.tools.mediainfo import detect_disc_capacity
 from bd_archive.tools.optical import resolve_device
 from bd_archive.ui.logger import log
 from bd_archive.ui.prompts import prompt_yn
 
+# Catalog slice files end in ".NNNN.dar"; strip that to get the dar
+# basename suitable for `-A`. dar resolves the actual slice file(s)
+# from the basename, so we never hand it the raw filename.
+_CATALOG_SLICE_SUFFIX_RE = re.compile(r"\.\d+\.dar$")
+
+
+def _resolve_base(base_arg: str, archive_name: str) -> tuple[Path, int]:
+    """Validate and unpack a --base argument.
+
+    Returns ``(catalog_basename_path, base_generation)``. Raises
+    SystemExit with a user-readable error if the path is missing, the
+    filename doesn't look like a dar catalog slice, or the embedded
+    archive name disagrees with ``-n``.
+    """
+    base_path = Path(base_arg).resolve()
+    if not base_path.is_file():
+        log.error(f"--base path does not exist: {base_path}")
+        sys.exit(1)
+    parsed = parse_dar_filename(base_path.name)
+    if parsed is None or not parsed[2]:
+        log.error(
+            f"--base must point to a dar catalog slice "
+            f"(<name>[-gen<N>]-catalog.NNNN.dar); got '{base_path.name}'"
+        )
+        sys.exit(1)
+    base_name, base_gen, _ = parsed
+    if base_name != archive_name:
+        log.error(
+            f"--base belongs to archive '{base_name}' but -n is '{archive_name}'. "
+            f"Chain identity is the archive name; keep it consistent across generations."
+        )
+        sys.exit(1)
+    base_stem = _CATALOG_SLICE_SUFFIX_RE.sub("", base_path.name)
+    return base_path.parent / base_stem, base_gen
+
 
 def cmd_create(args):
     check_deps("dar", "par2", "mkisofs", "dvd+rw-mediainfo")
@@ -45,6 +82,15 @@ def cmd_create(args):
             f"('{args.name[:ISO9660_LABEL_NAME_MAX]}'). Filenames on disc keep the full name."
         )
 
+    # --base: parse and validate. Sets `ref_catalog` (dar -A argument)
+    # and `generation` (current run's gen number = base_gen + 1).
+    ref_catalog: Path | None = None
+    generation = 1
+    if args.base is not None:
+        ref_catalog, base_gen = _resolve_base(args.base, args.name)
+        generation = base_gen + 1
+        log.info(f"Incremental against: {ref_catalog.name} (Gen {base_gen}) → new Gen {generation}")
+
     source = Path(args.source).resolve()
     if not source.is_dir():
         log.error(f"Does not exist: {source}")
@@ -107,7 +153,21 @@ def cmd_create(args):
         ratio = 1.0
         ratio_source = "default (no compression assumed)"
 
-    archive_est = int(scan.total_bytes * ratio)
+    # For an incremental, the data payload is only new/changed files;
+    # estimating against the full source overstates disc count and
+    # last-disc fill. Re-scan the source against the base catalog to
+    # get a delta-aware payload size. mtime is a heuristic — see
+    # scan_delta_bytes for why it's good enough for previews.
+    if ref_catalog is not None:
+        base_paths = list_catalog_paths(ref_catalog)
+        # Stat the user-supplied catalog slice file directly — its mtime
+        # is the timestamp dar wrote the catalog at, which we use as the
+        # cutoff for "modified since base".
+        base_mtime = Path(args.base).resolve().stat().st_mtime
+        delta_bytes = scan_delta_bytes(source, base_paths, base_mtime)
+        archive_est = int(delta_bytes * ratio)
+    else:
+        archive_est = int(scan.total_bytes * ratio)
     n_discs = max(1, (archive_est + slice_bytes - 1) // slice_bytes)
     last_slice = archive_est - (n_discs - 1) * slice_bytes
     if last_slice == 0:
@@ -119,15 +179,13 @@ def cmd_create(args):
     last_disc_free_raw = int(last_disc_free / max(ratio, 0.001))
 
     par2_est = slice_bytes * args.redundancy // 100
-    # Phase 2: every archive starts as Gen 1. Phase 3 lets `--base`
-    # derive higher generation numbers from a predecessor catalog.
     cfg = ArchiveConfig(
         name=args.name,
         disc_bytes=raw_capacity,
         redundancy=args.redundancy,
         compression=args.compression,
         comp_level=args.level,
-        generation=1,
+        generation=generation,
     )
 
     log.step("Source")
@@ -140,7 +198,8 @@ def cmd_create(args):
     log.info(f"Slice size:       {human_bytes(slice_bytes)}")
     log.info(f"PAR2 redundancy:  {cfg.redundancy}% (~{human_bytes(par2_est)})")
     log.info(f"Compression:      {cfg.comp_str} (ratio {ratio:.3f}, {ratio_source})")
-    log.info(f"Estimated archive: {human_bytes(archive_est)}")
+    archive_kind = "delta vs base" if ref_catalog is not None else "full source"
+    log.info(f"Estimated archive: {human_bytes(archive_est)} ({archive_kind})")
 
     log.step("Estimate")
     fill_pct = last_disc_content * 100 // sizing_target
@@ -157,6 +216,9 @@ def cmd_create(args):
     log.info(f"Source:        {source}")
     log.info(f"Output:        {output_dir}")
     log.info(f"Workdir:       {work_dir}{' (default)' if workdir_is_default else ' (custom)'}")
+    log.info(f"Generation:    {cfg.generation} ({'incremental' if ref_catalog else 'full'})")
+    if ref_catalog is not None:
+        log.info(f"Base catalog:  {args.base}")
 
     if not args.yes and not prompt_yn("Proceed with creation?"):
         log.warn("Cancelled by user")
@@ -184,7 +246,14 @@ def cmd_create(args):
     par2_hook = (
         f'{shlex.quote(sys.executable)} -m bd_archive._par2_helper "%p" "%b" %N {cfg.redundancy}'
     )
-    dar_archive.create(source, slice_bytes, cfg.compression, cfg.comp_level, par2_hook=par2_hook)
+    dar_archive.create(
+        source,
+        slice_bytes,
+        cfg.compression,
+        cfg.comp_level,
+        par2_hook=par2_hook,
+        ref_catalog=ref_catalog,
+    )
 
     slices = dar_archive.slices
     slice_count = len(slices)
diff --git a/src/bd_archive/tools/dar.py b/src/bd_archive/tools/dar.py
index 303c624..6d16b07 100644
--- a/src/bd_archive/tools/dar.py
+++ b/src/bd_archive/tools/dar.py
@@ -20,6 +20,7 @@ def create_sliced(
     compression: str,
     comp_level: str | None,
     execute_hook: str | None = None,
+    ref_catalog: Path | None = None,
 ):
     """Create a sliced dar archive with sha512 hashes.
 
@@ -27,6 +28,12 @@ def create_sliced(
     been completed (verified against dar 2.7.17). This is used by
     cmd_create to run par2 on each slice while its bytes are still in
     the OS page cache.
+
+    If ref_catalog is set, dar runs in incremental mode (`-A <ref>`):
+    only files new or changed relative to that reference catalog are
+    archived. Pass the basename of the catalog without the
+    ``.NNNN.dar`` suffix (dar accepts the catalog basename and finds
+    the slice files itself).
     """
     cmd = [
         "dar",
@@ -47,11 +54,37 @@ def create_sliced(
         if comp_level:
             flag += f":{comp_level}"
         cmd += [flag, "-am"]
+    if ref_catalog is not None:
+        cmd += ["-A", str(ref_catalog)]
     if execute_hook is not None:
         cmd += ["-E", execute_hook]
     run(cmd, label="dar")
 
 
+def list_catalog_paths(catalog_base: Path) -> set[str]:
+    """Return the set of relative paths stored in a dar catalog.
+
+    Runs ``dar -l <catalog_base> -as`` and parses the listing. dar's
+    entry lines use tab separators between the user, group, size, date,
+    and filename columns — the filename is always the last tab-separated
+    field. Header and separator lines lack tabs entirely, so the
+    "contains a tab" filter is sufficient to discard them.
+
+    Directories are included; the consumer (auto-defer pool filter)
+    treats the set as "anything dar already knows about", which keeps
+    the filter conservative.
+    """
+    r = run(["dar", "-l", str(catalog_base), "-as", "-Q"], capture=True, check=True)
+    paths: set[str] = set()
+    for line in r.stdout.splitlines():
+        if "\t" not in line:
+            continue
+        path = line.split("\t")[-1].rstrip()
+        if path:
+            paths.add(path)
+    return paths
+
+
 def isolate_catalog(base_path: Path):
     """Isolate the catalog into a separate dar archive with sha512 hashes."""
     run(

From 180d9ed76561ec4935db4b089bd894987aa879bf Mon Sep 17 00:00:00 2001
From: Xitee1 <59659167+Xitee1@users.noreply.github.com>
Date: Tue, 12 May 2026 18:27:10 +0200
Subject: [PATCH 3/7] feat(create): --min-last-disc-fill auto-defers newest
 files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 4 of incremental-archives. When the projected last-disc fill is
below `--min-last-disc-fill PERCENT`, bd-archive automatically defers
the newest files until enough has been removed to either drop a disc
from the set or empty the candidate pool.

Pool selection is deliberately conservative:

- With `--base`: only files whose relative path is not in the base
  catalog (truly new). Determined via `dar -l <base>` parse, so
  files that have merely had their mtime touched on disk stay in
  the archive (no silent loss across generations).

- Without `--base` (Full): all files are candidates, with a loud
  warning that deferred files won't be archived until a future
  incremental run picks them up.

The preview block now shows what would be deferred (file count, byte
count, oldest mtime, sample paths) BEFORE the confirm prompt, so the
user can abort if the plan looks wrong.

When the threshold is unreachable (entire pool deferred without ever
crossing the fill threshold), the run still proceeds with the partial
deferral — the user gets a warning, not an abort. The only fatal case
is "deferring everything would archive zero bytes", which exits 1.

archive/source_scan.py grows a SourceFile dataclass and
list_source_files() walker — separate from scan_source's aggregate
view because the defer algorithm needs per-file rel_path/size/mtime.

tools/dar.py::create_sliced grows an `excludes` parameter that turns
each entry into a `-P <path>` flag, with dar -P being the
relative-subpath exclude operator.

Manual e2e: Gen 1 of 50 MiB, then 60 MiB delta. Without --min-last-
disc-fill: 2 discs, last disc 46%. With --min-last-disc-fill 50:
20 MiB deferred (2 files), single disc, last fill 94%.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/bd_archive/archive/dar_archive.py |   2 +
 src/bd_archive/archive/source_scan.py |  29 ++++++++
 src/bd_archive/cli.py                 |  11 +++
 src/bd_archive/commands/create.py     | 103 +++++++++++++++++++++++---
 src/bd_archive/tools/dar.py           |   8 ++
 5 files changed, 144 insertions(+), 9 deletions(-)

diff --git a/src/bd_archive/archive/dar_archive.py b/src/bd_archive/archive/dar_archive.py
index e5a0c09..23040d8 100644
--- a/src/bd_archive/archive/dar_archive.py
+++ b/src/bd_archive/archive/dar_archive.py
@@ -58,6 +58,7 @@ def create(
         comp_level: str | None,
         par2_hook: str | None = None,
         ref_catalog: Path | None = None,
+        excludes: list[str] | None = None,
     ):
         dar.create_sliced(
             self.base_path,
@@ -67,6 +68,7 @@ def create(
             comp_level,
             execute_hook=par2_hook,
             ref_catalog=ref_catalog,
+            excludes=excludes,
         )
 
     def isolate_catalog(self):
diff --git a/src/bd_archive/archive/source_scan.py b/src/bd_archive/archive/source_scan.py
index 7f3ec29..fbde050 100644
--- a/src/bd_archive/archive/source_scan.py
+++ b/src/bd_archive/archive/source_scan.py
@@ -9,6 +9,35 @@ class SourceScan:
     catalog_est: int  # estimated isolated dar catalog size
 
 
+@dataclass(frozen=True)
+class SourceFile:
+    """Per-file metadata used by the auto-defer pool."""
+
+    rel_path: str  # POSIX-style relative path from source root (matches dar)
+    size: int
+    mtime: float
+
+
+def list_source_files(source: Path) -> list[SourceFile]:
+    """Walk source, return regular-file entries with size + mtime.
+
+    Used by the auto-defer pool builder. Skips directories, symlinks,
+    and anything we can't stat. The rel_path uses POSIX separators so
+    it compares directly against dar's catalog path listing.
+    """
+    files: list[SourceFile] = []
+    for p in source.rglob("*"):
+        try:
+            if not p.is_file() or p.is_symlink():
+                continue
+            rel = p.relative_to(source).as_posix()
+            st = p.stat()
+            files.append(SourceFile(rel_path=rel, size=st.st_size, mtime=st.st_mtime))
+        except (OSError, ValueError):
+            pass
+    return files
+
+
 def scan_source(source: Path) -> SourceScan:
     """Walk source once; return size, entry count, and catalog estimate.
 
diff --git a/src/bd_archive/cli.py b/src/bd_archive/cli.py
index d0de7a4..cbffcdd 100644
--- a/src/bd_archive/cli.py
+++ b/src/bd_archive/cli.py
@@ -67,6 +67,17 @@ def build_parser() -> argparse.ArgumentParser:
         "only files new or changed since that catalog. Archive name "
         "(-n) must match the predecessor — chain identity is the name.",
     )
+    cr.add_argument(
+        "--min-last-disc-fill",
+        type=int,
+        default=0,
+        metavar="PERCENT",
+        help="Auto-defer newest files until the last disc of the set is "
+        "at least PERCENT full (0-100). With --base, defers only files "
+        "not already in the base catalog. Without --base (full archive), "
+        "defers any files — and they will NOT be archived until a future "
+        "incremental run picks them up. Default 0 = no deferral.",
+    )
     ratio_group = cr.add_mutually_exclusive_group()
     ratio_group.add_argument(
         "--ratio",
diff --git a/src/bd_archive/commands/create.py b/src/bd_archive/commands/create.py
index 340dffc..fe851f5 100644
--- a/src/bd_archive/commands/create.py
+++ b/src/bd_archive/commands/create.py
@@ -9,7 +9,12 @@
 from bd_archive.archive.config import ArchiveConfig, write_readme
 from bd_archive.archive.dar_archive import DarArchive, parse_dar_filename
 from bd_archive.archive.sizing import compute_slice_bytes, measure_compression_ratio
-from bd_archive.archive.source_scan import scan_delta_bytes, scan_source
+from bd_archive.archive.source_scan import (
+    SourceFile,
+    list_source_files,
+    scan_delta_bytes,
+    scan_source,
+)
 from bd_archive.constants import (
     DISC_END_MARGIN,
     ISO9660_LABEL_NAME_MAX,
@@ -158,6 +163,7 @@ def cmd_create(args):
     # last-disc fill. Re-scan the source against the base catalog to
     # get a delta-aware payload size. mtime is a heuristic — see
     # scan_delta_bytes for why it's good enough for previews.
+    base_paths: set[str] = set()
     if ref_catalog is not None:
         base_paths = list_catalog_paths(ref_catalog)
         # Stat the user-supplied catalog slice file directly — its mtime
@@ -168,13 +174,77 @@ def cmd_create(args):
         archive_est = int(delta_bytes * ratio)
     else:
         archive_est = int(scan.total_bytes * ratio)
-    n_discs = max(1, (archive_est + slice_bytes - 1) // slice_bytes)
-    last_slice = archive_est - (n_discs - 1) * slice_bytes
-    if last_slice == 0:
-        last_slice = slice_bytes
-    last_disc_content = (
-        last_slice + last_slice * args.redundancy // 100 + scan.catalog_est + PAR2_AND_MISC_OVERHEAD
-    )
+
+    def _layout(est: int) -> tuple[int, int, int]:
+        """(n_discs, last_disc_content, last_fill_pct) for a given archive size."""
+        n = max(1, (est + slice_bytes - 1) // slice_bytes)
+        last_sl = est - (n - 1) * slice_bytes
+        if last_sl == 0:
+            last_sl = slice_bytes
+        last_content = (
+            last_sl + last_sl * args.redundancy // 100
+            + scan.catalog_est + PAR2_AND_MISC_OVERHEAD
+        )
+        return n, last_content, last_content * 100 // sizing_target
+
+    n_discs, last_disc_content, fill_pct = _layout(archive_est)
+
+    # ── Auto-defer (--min-last-disc-fill) ───────────────────────────────
+    # When the last disc would be too empty, push newest files to a
+    # future generation so this set "rounds down" to fewer discs with
+    # higher fill. Pool is "files truly new vs. base catalog" when
+    # incremental, "all files" when full (with warning — those files
+    # won't be archived anywhere until a later incremental run picks
+    # them up).
+    deferred_files: list[SourceFile] = []
+    if args.min_last_disc_fill > 0 and fill_pct < args.min_last_disc_fill:
+        if ref_catalog is not None:
+            pool = [f for f in list_source_files(source) if f.rel_path not in base_paths]
+            pool_kind = "files not in base catalog"
+        else:
+            pool = list_source_files(source)
+            pool_kind = "all source files"
+            log.warn(
+                "--min-last-disc-fill on a Full archive defers files that will "
+                "NOT be archived until a future incremental run picks them up."
+            )
+        pool.sort(key=lambda f: f.mtime, reverse=True)
+
+        cum_size = 0
+        reached = False
+        for f in pool:
+            cum_size += f.size
+            new_est = max(0, archive_est - int(cum_size * ratio))
+            new_n, new_last, new_fill = _layout(new_est) if new_est > 0 else (0, 0, 0)
+            deferred_files.append(f)
+            if new_est == 0:
+                # Pool would empty the archive entirely — stop here.
+                break
+            if new_fill >= args.min_last_disc_fill:
+                archive_est, n_discs, last_disc_content, fill_pct = (
+                    new_est, new_n, new_last, new_fill
+                )
+                reached = True
+                break
+
+        if not reached:
+            log.warn(
+                f"--min-last-disc-fill {args.min_last_disc_fill}% not reachable; "
+                f"pool ({len(pool)} candidate file(s), {pool_kind}) exhausted "
+                f"after deferring {human_bytes(cum_size)}. Proceeding with "
+                f"what we have."
+            )
+            if archive_est - int(cum_size * ratio) > 0:
+                archive_est, n_discs, last_disc_content, fill_pct = (
+                    archive_est - int(cum_size * ratio), new_n, new_last, new_fill
+                )
+            else:
+                log.error(
+                    "Deferring all candidates would leave 0 bytes to archive. "
+                    "Lower --min-last-disc-fill or skip the run."
+                )
+                sys.exit(1)
+
     last_disc_free = max(0, sizing_target - last_disc_content)
     last_disc_free_raw = int(last_disc_free / max(ratio, 0.001))
 
@@ -202,7 +272,6 @@ def cmd_create(args):
     log.info(f"Estimated archive: {human_bytes(archive_est)} ({archive_kind})")
 
     log.step("Estimate")
-    fill_pct = last_disc_content * 100 // sizing_target
     log.info(f"Discs needed:     {n_discs}")
     log.info(
         f"Last disc fill:   {human_bytes(last_disc_content)} / "
@@ -212,6 +281,21 @@ def cmd_create(args):
     if abs(ratio - 1.0) > 0.001:
         log.info(f"                  ~{human_bytes(last_disc_free_raw)} raw (at ratio {ratio:.3f})")
 
+    if deferred_files:
+        defer_bytes = sum(f.size for f in deferred_files)
+        oldest_deferred = min(f.mtime for f in deferred_files)
+        from datetime import datetime as _dt
+
+        log.step(f"Auto-defer (--min-last-disc-fill {args.min_last_disc_fill}%)")
+        log.info(f"Files deferred:   {len(deferred_files)}")
+        log.info(f"Bytes deferred:   {human_bytes(defer_bytes)} (raw)")
+        log.info(f"Oldest deferred:  mtime {_dt.fromtimestamp(oldest_deferred):%Y-%m-%d %H:%M}")
+        sample = deferred_files[:3]
+        for f in sample:
+            log.info(f"  - {f.rel_path}")
+        if len(deferred_files) > len(sample):
+            log.info(f"  - ... and {len(deferred_files) - len(sample)} more")
+
     log.step("Configuration")
     log.info(f"Source:        {source}")
     log.info(f"Output:        {output_dir}")
@@ -253,6 +337,7 @@ def cmd_create(args):
         cfg.comp_level,
         par2_hook=par2_hook,
         ref_catalog=ref_catalog,
+        excludes=[f.rel_path for f in deferred_files] if deferred_files else None,
     )
 
     slices = dar_archive.slices
diff --git a/src/bd_archive/tools/dar.py b/src/bd_archive/tools/dar.py
index 6d16b07..1769d8c 100644
--- a/src/bd_archive/tools/dar.py
+++ b/src/bd_archive/tools/dar.py
@@ -21,6 +21,7 @@ def create_sliced(
     comp_level: str | None,
     execute_hook: str | None = None,
     ref_catalog: Path | None = None,
+    excludes: list[str] | None = None,
 ):
     """Create a sliced dar archive with sha512 hashes.
 
@@ -34,6 +35,10 @@ def create_sliced(
     archived. Pass the basename of the catalog without the
     ``.NNNN.dar`` suffix (dar accepts the catalog basename and finds
     the slice files itself).
+
+    If excludes is set, each entry is passed to dar as ``-P <path>``,
+    excluding that exact relative subpath from the archive. Used by
+    auto-defer to push specific files to a later generation.
     """
     cmd = [
         "dar",
@@ -56,6 +61,9 @@ def create_sliced(
         cmd += [flag, "-am"]
     if ref_catalog is not None:
         cmd += ["-A", str(ref_catalog)]
+    if excludes:
+        for path in excludes:
+            cmd += ["-P", path]
     if execute_hook is not None:
         cmd += ["-E", execute_hook]
     run(cmd, label="dar")

From df3c8d9172d740e70392039ade14aff70d270440 Mon Sep 17 00:00:00 2001
From: Xitee1 <59659167+Xitee1@users.noreply.github.com>
Date: Tue, 12 May 2026 18:31:00 +0200
Subject: [PATCH 4/7] feat(extract): chain-aware whole-restore in one run
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 5 of incremental-archives. `bd-archive extract` now restores
an entire incremental chain (Gen 1 + all subsequent gens) in a
single invocation. Previously it could only restore one archive set.

User flow:

- User runs `bd-archive extract -o ./restored`.
- Tool prompts for discs one at a time. Each disc's filenames are
  parsed (via archive.dar_archive.parse_dar_filename) to detect the
  chain name and which generation that disc belongs to. Order
  doesn't matter; discs from any gen, any order, all accepted.
- All slices land in one flat staging dir. Different generations
  have different dar basenames (photos-gen1, photos-gen2, …), so
  they coexist without collision.
- When the user says "no more discs", the tool runs `dar -x` once
  per generation in order. The first gen extracts into the clean
  output; later gens use dar's -wa flag to overwrite files that
  earlier gens already wrote (later gens carry the newer content).

The current archive_name variable is replaced with two pieces of
state: chain_name (the -n value, identical across all gens) and a
gen→dar_basename mapping (because legacy pre-Phase-2 gen 1 archives
have basename "photos" while new ones have "photos-gen1").

Per-generation catalog verification: each gen has its own catalog
file with a different basename, so the "verified" flag is now a
dict keyed by gen number rather than a single bool. A disc that
fails its gen's catalog sha512 drops it from staging so the next
disc of the same gen can refetch — same convergence logic as
before, just generation-scoped.

The damage path (par2 repair) is unchanged in mechanics.

tools/dar.py::extract_sequential grows an `overwrite` parameter
that toggles dar's `-wa` flag. Required for chain extracts where
gen N's data replaces gen N-1's; no effect on the first gen which
extracts into an empty output dir.

Smoketest: built a 2-gen chain (5 original files + 1 sub-dir file,
then 1 modified + 1 new in gen 2), invoked extract_sequential for
each gen in order against staged slices. diff -rq between source
and restored output: byte-identical, no differences.

Disc-mounting flow (prompt/mount/copy/verify) is preserved from
the previous implementation; refactored to track per-gen state but
the per-disc UX is the same. A full e2e against a real optical
drive remains a manual user verification step.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/bd_archive/commands/extract.py | 219 +++++++++++++++++------------
 src/bd_archive/tools/dar.py        |   8 ++
 2 files changed, 138 insertions(+), 89 deletions(-)

diff --git a/src/bd_archive/commands/extract.py b/src/bd_archive/commands/extract.py
index 93fbba3..a776ea6 100644
--- a/src/bd_archive/commands/extract.py
+++ b/src/bd_archive/commands/extract.py
@@ -1,10 +1,12 @@
 import contextlib
+import re
 import shutil
 import sys
 import tempfile
 from pathlib import Path
 
 from bd_archive.archive.checksums import verify_slice
+from bd_archive.archive.dar_archive import parse_dar_filename
 from bd_archive.archive.disc import DiscIO
 from bd_archive.shell.deps import check_deps
 from bd_archive.shell.format import human_bytes
@@ -15,6 +17,17 @@
 from bd_archive.ui.progress import Progress, copy_with_progress
 from bd_archive.ui.prompts import prompt_disc, prompt_yn
 
+# A dar slice or catalog filename ends in ".NNNN.dar"; stripping that
+# off yields the dar archive basename (e.g. "photos-gen1" or, on legacy
+# pre-Phase-2 archives, just "photos"). That basename is what dar -x
+# wants as input, and what we use to group files by generation in
+# staging.
+_SLICE_SUFFIX_RE = re.compile(r"\.\d+\.dar$")
+
+
+def _dar_basename(filename: str) -> str:
+    return _SLICE_SUFFIX_RE.sub("", filename)
+
 
 def _mount_with_prompt(dio: DiscIO, mount_dir: Path, prompt_msg: str) -> Path | None:
     while True:
@@ -28,23 +41,29 @@ def _mount_with_prompt(dio: DiscIO, mount_dir: Path, prompt_msg: str) -> Path |
 
 
 def _copy_disc_data(
-    mounted: Path, archive_name: str, staging: Path, catalog_verified: bool
+    mounted: Path, disc_basename: str, staging: Path, catalog_verified: bool
 ) -> list[Path]:
-    """Copy slices + sha512 sidecars (and catalog if not yet verified) from
-    disc to staging. par2 files are NOT copied — fetched lazily on damage.
-    Returns list of slice paths in staging for this disc."""
+    """Copy slices + sha512 sidecars (and the catalog of this disc's
+    generation, if not yet verified) from disc to staging. par2 files
+    are NOT copied — fetched lazily on damage.
+
+    Returns the list of slice paths in staging that came from this disc.
+    """
+    catalog_basename = f"{disc_basename}-catalog"
     if not catalog_verified:
-        for cat in mounted.glob(f"{archive_name}-catalog.*.dar"):
+        for cat in mounted.glob(f"{catalog_basename}.*.dar"):
             dest = staging / cat.name
             if not dest.exists():
                 shutil.copy2(cat, dest)
-        for cat_hash in mounted.glob(f"{archive_name}-catalog.*.dar.sha512"):
+        for cat_hash in mounted.glob(f"{catalog_basename}.*.dar.sha512"):
             dest = staging / cat_hash.name
             if not dest.exists():
                 shutil.copy2(cat_hash, dest)
 
     slices = sorted(
-        p for p in mounted.glob(f"{archive_name}.[0-9]*.dar") if "-catalog" not in p.name
+        p
+        for p in mounted.glob(f"{disc_basename}.[0-9]*.dar")
+        if "-catalog" not in p.name
     )
     copied: list[Path] = []
     for sp in slices:
@@ -61,17 +80,16 @@ def _copy_disc_data(
     return copied
 
 
-def _verify_catalog_on_staging(staging: Path, archive_name: str) -> bool:
-    """Verify every catalog slice currently in staging. Drop any that
-    fail sha512. Return True only when all present slices verified.
+def _verify_catalog_on_staging(staging: Path, catalog_basename: str) -> bool:
+    """Verify every catalog slice currently in staging for one generation.
+    Drop any that fail sha512 so the next disc carrying them can refetch.
 
-    Iterates all slices (no early return) so multi-slice catalogs with
-    multiple failures get every corrupt slice flagged + deleted in a
-    single pass. The next disc's _copy_disc_data re-fetches anything
-    missing, so the loop converges in fewer disc-iterations than the
-    naive 'stop at first failure' variant.
+    Returns True only when every present slice verified — a single pass
+    flags every corrupt slice (no early return), so multi-slice catalogs
+    converge in one fewer disc-iteration than a 'stop at first failure'
+    variant would.
     """
-    catalog_files = sorted(staging.glob(f"{archive_name}-catalog.*.dar"))
+    catalog_files = sorted(staging.glob(f"{catalog_basename}.*.dar"))
     if not catalog_files:
         return False
     all_ok = True
@@ -147,16 +165,18 @@ def cmd_extract(args):
     log.info(f"Device:   {device}")
     log.info(f"Output:   {output_dir}")
     log.info(f"Staging:  {staging}")
-
-    archive_name: str | None = None
-    catalog_verified = False
-    disc_num = 0
-    # Slices that sha512 + par2 both failed on. Files coming from them
-    # may end up corrupt in the output — we collect this so the final
-    # corrupted-files.txt explains which disc to blame even when dar's
-    # per-file error parser couldn't pinpoint individual files (e.g.
-    # archive-metadata corruption).
+    log.info("Insert discs from any generation, in any order. The tool")
+    log.info("detects generations from filenames and extracts the chain")
+    log.info("in order at the end.")
+
+    # Per-generation state. Catalog verification and dar basename live
+    # under each gen because the chain may mix legacy (gen 1 without
+    # -gen<N> suffix) and new-format generations.
+    chain_name: str | None = None
+    catalogs_verified: dict[int, bool] = {}
+    gen_basenames: dict[int, str] = {}
     unrepairable_slices: list[str] = []
+    disc_num = 0
 
     while True:
         target = disc_num + 1
@@ -169,19 +189,37 @@ def cmd_extract(args):
             sys.exit(1)
 
         try:
-            if archive_name is None:
-                dar_files = [p for p in mounted.glob("*.dar") if "-catalog" not in p.name]
-                if not dar_files:
-                    log.error("No dar files found on disc — try another")
-                    continue
-                archive_name = dar_files[0].stem.rsplit(".", 1)[0]
-                log.info(f"Archive detected: {archive_name}")
+            # Detect chain name + generation from any slice filename.
+            dar_files = [p for p in mounted.glob("*.dar") if "-catalog" not in p.name]
+            if not dar_files:
+                log.error("No dar files found on disc — try another")
+                continue
+            parsed = parse_dar_filename(dar_files[0].name)
+            if parsed is None:
+                log.error(f"Unrecognised dar filename: {dar_files[0].name}")
+                continue
+            disc_name, disc_gen, _ = parsed
+            disc_basename = _dar_basename(dar_files[0].name)
+
+            if chain_name is None:
+                chain_name = disc_name
+                log.info(f"Chain: {chain_name}")
+            elif disc_name != chain_name:
+                log.error(
+                    f"Disc belongs to chain '{disc_name}', but this run is for "
+                    f"chain '{chain_name}'. Eject and insert a matching disc."
+                )
+                continue
+
+            log.info(f"Disc {target}: Gen {disc_gen} ({disc_basename})")
+            gen_basenames.setdefault(disc_gen, disc_basename)
+            catalog_verified = catalogs_verified.get(disc_gen, False)
 
             disc_num = target
 
             # ── 2. Copy data (no par2) ────────────────────────────────────
             log.info(f"Copying disc {disc_num}...")
-            copied = _copy_disc_data(mounted, archive_name, staging, catalog_verified)
+            copied = _copy_disc_data(mounted, disc_basename, staging, catalog_verified)
             log.ok(f"  {len(copied)} slice(s) staged")
         finally:
             dio.umount(mounted)
@@ -189,11 +227,11 @@ def cmd_extract(args):
                 mount_dir.rmdir()
             dio.eject()
 
-        # ── 3. Verify catalog (only first time it lands intact) ──────────
-        if not catalog_verified:
-            log.info("Verifying catalog on staging...")
-            if _verify_catalog_on_staging(staging, archive_name):
-                catalog_verified = True
+        # ── 3. Verify catalog for this generation (first time it lands) ───
+        if not catalogs_verified.get(disc_gen, False):
+            log.info(f"Verifying Gen {disc_gen} catalog on staging...")
+            if _verify_catalog_on_staging(staging, f"{disc_basename}-catalog"):
+                catalogs_verified[disc_gen] = True
 
         # ── 4. Verify slices on staging via sha512 ───────────────────────
         log.info(f"Verifying disc {disc_num} slices on staging...")
@@ -222,9 +260,8 @@ def cmd_extract(args):
                         continue
                     log.error(f"  {sp.name}: unrecoverable damage")
                     log.warn(
-                        f"  {sp.name}: keeping as-is — files from "
-                        f"this slice may be corrupt; will be listed "
-                        f"in corrupted-files.txt"
+                        f"  {sp.name}: keeping as-is — files from this slice may "
+                        f"be corrupt; will be listed in corrupted-files.txt"
                     )
                     unrepairable_slices.append(sp.name)
             finally:
@@ -234,56 +271,61 @@ def cmd_extract(args):
                 dio.eject()
             _cleanup_par2(staging)
 
-        collected = sorted(staging.glob(f"{archive_name}.[0-9]*.dar"))
-        collected = [c for c in collected if "-catalog" not in c.name]
-        log.info(f"Collected: {len(collected)} slice(s)")
+        # Report current chain collection state.
+        gens_collected = sorted(gen_basenames)
+        log.info(f"Chain so far: Gen {gens_collected} ({disc_num} disc(s) total)")
 
         if not prompt_yn("Insert another disc?"):
             break
 
-    # ── Extract ─────────────────────────────────────────────────────────
-    log.step("Extracting archive")
-    collected = [
-        c for c in sorted(staging.glob(f"{archive_name}.[0-9]*.dar")) if "-catalog" not in c.name
-    ]
-    log.info(f"Slices: {len(collected)}")
-    log.info(f"Output: {output_dir}")
-
-    dar_base = staging / archive_name
-    catalog_base = staging / f"{archive_name}-catalog"
-    has_catalog = any(staging.glob(f"{archive_name}-catalog.*.dar"))
-
-    rc, corrupted_files = dar.extract_sequential(
-        dar_base,
-        output_dir,
-        catalog_base=catalog_base if has_catalog else None,
-    )
+    if chain_name is None:
+        log.error("No discs processed")
+        sys.exit(1)
 
-    if rc == 0 and not corrupted_files and not unrepairable_slices:
+    # ── Extract: one dar -x per generation in order ──────────────────────
+    log.step("Extracting archive chain")
+    sorted_gens = sorted(gen_basenames)
+    log.info(f"Chain: {chain_name}")
+    log.info(f"Generations: {sorted_gens}")
+
+    all_corrupted: list[str] = []
+    for i, gen in enumerate(sorted_gens):
+        basename = gen_basenames[gen]
+        log.info(f"Gen {gen}: dar -x {basename}")
+        catalog_basename = f"{basename}-catalog"
+        has_catalog = any(staging.glob(f"{catalog_basename}.*.dar"))
+        # Subsequent generations must overwrite earlier ones (later gens
+        # carry the newer file contents). Gen 1 extracts into a clean
+        # output dir, so overwrite is a no-op there — but we set it
+        # uniformly to keep the call site simple.
+        rc, corrupted = dar.extract_sequential(
+            staging / basename,
+            output_dir,
+            catalog_base=staging / catalog_basename if has_catalog else None,
+            overwrite=i > 0,
+        )
+        all_corrupted.extend(corrupted)
+        if rc != 0:
+            log.error(f"Gen {gen} dar extract failed (exit {rc})")
+            log.info(f"Slices remain in: {staging}")
+            log.info(
+                f"Manual retry: dar -x {staging / basename} -R {output_dir} --sequential-read -wa"
+            )
+            sys.exit(1)
+
+    if not all_corrupted and not unrepairable_slices:
         log.ok("Extraction complete!")
-    elif rc == 0:
-        # dar exited cleanly but reported per-file CRC errors and/or
-        # we already know slices were unrepairable. Tell the user.
+    else:
         log.warn(
             f"Extraction finished with corruption: "
-            f"{len(corrupted_files)} file(s) reported by dar, "
+            f"{len(all_corrupted)} file(s) reported by dar, "
             f"{len(unrepairable_slices)} slice(s) unrepairable"
         )
-    else:
-        log.error(f"dar extraction failed (exit {rc})")
-        log.info(f"Slices are in: {staging}")
-        if has_catalog:
-            log.info(
-                f"Retry without rescue catalog: dar -x {dar_base} -R {output_dir} --sequential-read"
-            )
-        else:
-            log.info(f"Manual: dar -x {dar_base} -R {output_dir} --sequential-read")
-        sys.exit(1)
 
     # Write corrupted-files.txt manifest into output_dir (NOT into the
     # workdir, which may be auto-cleaned) when anything went sideways.
     manifest_path: Path | None = None
-    if corrupted_files or unrepairable_slices:
+    if all_corrupted or unrepairable_slices:
         manifest_path = output_dir / "corrupted-files.txt"
         lines = [
             "# bd-archive: corrupted-files manifest",
@@ -293,9 +335,9 @@ def cmd_extract(args):
             "# them with intact data if the par2 recovery succeeds.",
             "",
         ]
-        if corrupted_files:
-            lines.append(f"## {len(corrupted_files)} file(s) reported by dar with bad CRC:")
-            for fp in corrupted_files:
+        if all_corrupted:
+            lines.append(f"## {len(all_corrupted)} file(s) reported by dar with bad CRC:")
+            for fp in all_corrupted:
                 try:
                     rel = str(Path(fp).resolve().relative_to(output_dir.resolve()))
                 except ValueError:
@@ -325,19 +367,18 @@ def cmd_extract(args):
         shutil.rmtree(work_dir, ignore_errors=True)
 
     log.step("Restore complete")
-    print(f"\n  Archive: {archive_name}")
-    print(f"  Slices:  {len(collected)}")
-    print(f"  Discs:   {disc_num}")
-    print(f"  Output:  {output_dir}")
-    print(f"  Size:    {human_bytes(total)}")
+    print(f"\n  Chain:        {chain_name}")
+    print(f"  Generations:  {sorted_gens}")
+    print(f"  Discs:        {disc_num}")
+    print(f"  Output:       {output_dir}")
+    print(f"  Size:         {human_bytes(total)}")
     if manifest_path is not None:
-        print(f"  CORRUPT: {manifest_path}")
+        print(f"  CORRUPT:      {manifest_path}")
     if not workdir_is_default:
         print(f"\n  Cleanup staging: rm -rf {work_dir}")
     print()
 
     # Non-zero exit when corruption was detected so scripts know the
-    # restore was not fully clean. The output is still useful (best-
-    # effort restore), but callers should consult corrupted-files.txt.
-    if corrupted_files or unrepairable_slices:
+    # restore was not fully clean.
+    if all_corrupted or unrepairable_slices:
         sys.exit(1)
diff --git a/src/bd_archive/tools/dar.py b/src/bd_archive/tools/dar.py
index 1769d8c..75b1b28 100644
--- a/src/bd_archive/tools/dar.py
+++ b/src/bd_archive/tools/dar.py
@@ -128,6 +128,7 @@ def extract_sequential(
     base_path: Path,
     output_dir: Path,
     catalog_base: Path | None = None,
+    overwrite: bool = False,
 ) -> tuple[int, list[str]]:
     """Extract a dar archive with --sequential-read.
 
@@ -137,6 +138,11 @@ def extract_sequential(
     With a complete slice set, no prompts fire and the ESC stream
     goes unused.
 
+    Set overwrite=True to make dar replace existing files without
+    prompting (`-wa`). Required when extracting an incremental on
+    top of a previously-extracted generation, where later gens
+    update files that earlier gens already restored.
+
     Returns (exit_code, corrupted_files). corrupted_files contains
     the paths dar reported as "Bad CRC" during extract — these
     files were (partially) written to output and need attention.
@@ -144,6 +150,8 @@ def extract_sequential(
     the caller must check this list, not just the exit code.
     """
     cmd = ["dar", "-x", str(base_path), "-R", str(output_dir), "-O", "--sequential-read"]
+    if overwrite:
+        cmd.append("-wa")
     if catalog_base is not None:
         # -A uses the isolated catalog as rescue source — handles
         # corruption of the in-archive catalog (PAR2 covers slice

From a251eef797259b861f997b0f01de0bf668dd215e Mon Sep 17 00:00:00 2001
From: Xitee1 <59659167+Xitee1@users.noreply.github.com>
Date: Tue, 12 May 2026 18:34:18 +0200
Subject: [PATCH 5/7] docs: README + AGENTS sync with incremental-archives
 feature set

README gains an "Adding an incremental generation" section with the
--base workflow and --min-last-disc-fill explanation, plus updates the
extract section to describe whole-chain restore. Adds a "Chain identity
= archive name" callout near the top to explain the discipline of
keeping -n constant across generations.

AGENTS.md create / extract architecture descriptions are rewritten to
cover the new naming scheme, --base flow, list_catalog_paths /
scan_delta_bytes, auto-defer pool semantics, per-gen catalog state,
and dar -x -wa chain restore. Layout section notes the new
constants (ISO9660_LABEL_NAME_MAX / _SUFFIX_LEN), new helpers
(parse_dar_filename, list_source_files / SourceFile,
scan_delta_bytes), and the extended dar wrapper surface.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 AGENTS.md | 18 +++++++++--------
 README.md | 59 ++++++++++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 57 insertions(+), 20 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index b0c0ce2..baa784e 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -19,7 +19,7 @@ PYTHONPATH=src python3 -m bd_archive ...
 ```
 
 ```bash
-bd-archive create   -s <source> -n <name> -o <output> [-w <workdir>] [-D /dev/srN] [-b BYTES] [-r %] [-c zstd|lzma|...] [-l <level>] [--ratio <float> | --sample <path>] [-y]
+bd-archive create   -s <source> -n <name> -o <output> [-w <workdir>] [-D /dev/srN] [-b BYTES] [-r %] [-c zstd|lzma|...] [-l <level>] [--ratio <float> | --sample <path>] [--base <catalog.dar>] [--min-last-disc-fill PERCENT] [-y]
 bd-archive burn     -i <input> [-D /dev/srN] [--start N] [--no-verify] [--skip-fit-check] [-S <speed>]
 bd-archive verify   [<mountpoint|dir|/dev/srN|*.iso>]
 bd-archive extract  -o <output> [-D /dev/srN] [-w <workdir>]
@@ -47,11 +47,11 @@ src/bd_archive/
 ├── __main__.py         # entry point for `python -m bd_archive`
 ├── _par2_helper.py     # dar -E hook: invoked as `python -m bd_archive._par2_helper ...`
 ├── cli.py              # argparse + dispatch + top-level exception handling (uniform cancel/error output)
-├── constants.py        # MiB, DISC_OVERSIZE_TOLERANCE, PAR2_AND_MISC_OVERHEAD, DISC_END_MARGIN, POST_BURN_MOUNT_TIMEOUT, ISO9660_VOLUME_LABEL_MAX, PAR2_RECOVERY_RE
+├── constants.py        # MiB, DISC_OVERSIZE_TOLERANCE, PAR2_AND_MISC_OVERHEAD, DISC_END_MARGIN, POST_BURN_MOUNT_TIMEOUT, ISO9660_VOLUME_LABEL_MAX, ISO9660_LABEL_NAME_MAX, ISO9660_LABEL_SUFFIX_LEN, PAR2_RECOVERY_RE
 ├── ui/                 # logger, prompts (interactive), progress (byte-counted, TTY-aware)
 ├── shell/              # runner.py: run() (+ SIGINT handling); deps.py: check_deps(); format.py: human_bytes()
 ├── tools/              # one thin wrapper per external CLI
-│   ├── dar.py          # dar create_sliced/isolate_catalog/compress/extract_sequential (Bad-CRC parser)
+│   ├── dar.py          # dar create_sliced (incl. -A ref, -P excludes, -E hook) / isolate_catalog / compress / extract_sequential (-wa overwrite for chain restore, Bad-CRC parser) / list_catalog_paths (`dar -l` parse)
 │   ├── par2.py         # par2 create/verify/repair (+ VerifyResult, is_par2_index)
 │   ├── mkisofs.py      # ISO9660+UDF image build (`-iso-level 3 -udf -V -publisher -input-charset utf-8 -graft-points`)
 │   ├── growisofs.py    # burn (+ DeviceBusyError on sg lock, SIGINT double-press abort with BURN_ABORT_GRACE_S=5s)
@@ -63,11 +63,11 @@ src/bd_archive/
 │   └── lsof.py         # find_device_holders (optional — no-op if lsof absent)
 ├── archive/            # domain logic over tools/
 │   ├── checksums.py    # sha512 verify (verify_slice per-file, used by extract on staging)
-│   ├── config.py       # ArchiveConfig, write_readme
-│   ├── dar_archive.py  # DarArchive (slices, catalog, work-dir layout)
+│   ├── config.py       # ArchiveConfig (incl. generation, dar_name), write_readme
+│   ├── dar_archive.py  # DarArchive (slices, catalog, work-dir layout) + parse_dar_filename (chain/gen detection from filename)
 │   ├── disc.py         # DiscIO (mount/mount_with_retry/umount/eject/close_tray_if_open/burn) + find_sg_device
 │   ├── sizing.py       # compute_slice_bytes, measure_compression_ratio
-│   ├── source_scan.py  # SourceScan + scan_source
+│   ├── source_scan.py  # SourceScan + scan_source; SourceFile + list_source_files (auto-defer pool); scan_delta_bytes (incremental preview)
 │   └── verify.py       # verify_disc()
 └── commands/           # one file per subcommand
     ├── create.py
@@ -82,10 +82,12 @@ Layering: `commands/` → `archive/` → `tools/` → `shell/`. Lower layers nev
 
 Four subcommands form a pipeline. `create` previews disc count + last-disc fill before prompting for confirmation, so users can dry-run sizing without committing.
 
-1. **`create`** (`commands/create.py`) reads disc capacity via `tools.mediainfo.detect_disc_capacity` (or `args.bytes`), scans the source, and computes slice sizing plus a disc-count estimate (optionally measuring the compression ratio via `--sample`). The user confirms via `prompt_yn` before any heavy work begins (skip with `-y`). Then runs `tools.dar.create_sliced` with `--hash sha512 --min-digits 4 -Q` (plus `-z<algo>[:level] -am` when compression is enabled) to slice the source into per-disc-sized `.dar` files in `<workdir>/tmp/`. par2 is generated **inline** via dar's `-E` hook (`bd_archive._par2_helper`) — the hook fires after each slice is fully written, so par2 reads the slice while it is still hot in the OS page cache, eliminating most SSD read traffic of the create phase. After dar completes, the catalog is isolated. For each slice in order: regenerate `README.txt` with the right disc number and call `tools.mkisofs.build` (mkisofs `-iso-level 3 -udf -V <label> -publisher "bd-archive v<ver>" -input-charset utf-8 -graft-points`) to assemble `<output>/images/disc_NNNN.iso` directly from in-place files (no staging copies). The ISO file size is checked against the format-aware writable capacity as a hard limit. Phase 3 also asserts par2 files are present on disk — a missing file means the `-E` helper silently failed during dar create. After each ISO is built, the slice + par2 are deleted from `tmp/`; once all slices are processed, `tmp/` is wiped entirely. If `-w` was not supplied, the default `<output>/.bd-archive-work/` is also removed, so `<output>` ends up containing only `images/disc_*.iso`.
+1. **`create`** (`commands/create.py`) reads disc capacity via `tools.mediainfo.detect_disc_capacity` (or `args.bytes`), scans the source, and computes slice sizing plus a disc-count estimate (optionally measuring the compression ratio via `--sample`). The internal dar archive name is `<-n value>-gen<N>` where N is 1 for a full archive and `base_gen + 1` for an incremental against `--base <catalog.dar>` (base gen parsed from the catalog filename via `archive.dar_archive.parse_dar_filename`, which also handles legacy pre-`-gen<N>` filenames as gen 1). Volume labels are `<truncated_name>_G<NN>_<NNNN>` — names longer than `ISO9660_LABEL_NAME_MAX` (23) are truncated in the label only; filenames inside the ISO keep the full name. When `--base` is set, `tools.dar.list_catalog_paths` parses `dar -l` output to get the set of paths already in the predecessor, and `archive.source_scan.scan_delta_bytes` re-scans the source counting only new/modified files for the preview's archive-size estimate. The user confirms via `prompt_yn` before any heavy work begins (skip with `-y`). Then runs `tools.dar.create_sliced` with `--hash sha512 --min-digits 4 -Q` (plus `-z<algo>[:level] -am` when compression is enabled, `-A <ref_catalog>` for incrementals, `-P <path>` per excluded file from auto-defer) to slice the source into per-disc-sized `.dar` files in `<workdir>/tmp/`. par2 is generated **inline** via dar's `-E` hook (`bd_archive._par2_helper`) — the hook fires after each slice is fully written, so par2 reads the slice while it is still hot in the OS page cache, eliminating most SSD read traffic of the create phase. After dar completes, the catalog is isolated. For each slice in order: regenerate `README.txt` with the right disc number + generation and call `tools.mkisofs.build` (mkisofs `-iso-level 3 -udf -V <label> -publisher "bd-archive v<ver>" -input-charset utf-8 -graft-points`) to assemble `<output>/images/disc_NNNN.iso` directly from in-place files (no staging copies). **Catalog files go onto Disc 1 only** — discs 2..N carry only their slice + par2 + README; the dar slice on the last disc embeds the master catalog at its end (dar default), so every set still has two spatially separated catalog copies. After all ISOs are built, the isolated catalog is also copied to `<output>/<name>-gen<N>-catalog.*.dar` so the user can keep it in their regular backup and use it as `--base` for future generations. The ISO file size is checked against the format-aware writable capacity as a hard limit; a missing par2 file (helper silently failed) hard-errors. After each ISO is built, the slice + par2 are deleted from `tmp/`; once all slices are processed, `tmp/` is wiped entirely. If `-w` was not supplied, the default `<output>/.bd-archive-work/` is also removed, so `<output>` ends up containing only `images/disc_*.iso` and the persisted catalog.
+
+   **Auto-defer** (`--min-last-disc-fill PERCENT`): when the projected last-disc fill is below PERCENT, the newest-by-mtime files are pushed to a future generation until either the threshold is met or the candidate pool is exhausted. For incrementals (`--base` given), the pool is "files whose relative path is not in the base catalog" — strictly conservative, so an already-archived file whose mtime has drifted on disk is never lost. For full archives (no `--base`), the pool is "all source files" with a loud warning that deferred files won't be archived anywhere until a later incremental picks them up. Deferred files become `-P <relpath>` flags on dar. The preview block shows count, byte total, oldest mtime, and a sample of deferred paths before the confirm prompt.
 2. **`burn`** (`commands/burn.py`) iterates `<input>/images/disc_*.iso` lexically and burns each via `growisofs -use-the-force-luke=notray -dvd-compat -Z dev=image.iso` — a byte-for-byte ISO write, no on-the-fly mkisofs, so what's in the ISO file is exactly what ends up on disc. Volume label, publisher, file layout are all already in the file. Pre-burn fit check is **two-sided**: rejects too-small discs AND discs more than `DISC_OVERSIZE_TOLERANCE` (= 5%) larger than the ISO, guarding against wasting a 50 GB BD-DL on a 25 GB-sized archive. `--skip-fit-check` disables both directions. SIGINT is trapped during the burn itself: a first `Ctrl+C` warns and is ignored (cancelling mid-burn coasters the disc), a second within `BURN_ABORT_GRACE_S` (= 5 s) terminates growisofs and bubbles up as `KeyboardInterrupt`. growisofs runs in its own session (`start_new_session=True`) so the tty's SIGINT does not reach it directly. After burn, `DiscIO.close_tray_if_open` pulls the tray back in on auto-ejecting drives so the post-burn verify can mount. The post-burn verify runs `verify_disc` and loops on any mount/verify failure with a `Re-insert the disc … press Enter to retry` prompt, since some drives need a manual re-insert. Resumable via `--start N`; per-disc resume hints are logged on every cancel/error path. Catches `DeviceBusyError` from `tools/growisofs.py` (sg device locked) and offers an interactive retry — `tools.lsof.find_device_holders` is consulted to name the holding processes when available.
 3. **`verify`** (`commands/verify.py`) dispatches on target type: block device → mount; directory → check directly; **`.iso` file → loop-mount via `tools.udisks.loop_setup` + check + tear down**. The ISO branch makes pre-burn dry-run trivial: run `create`, then `verify images/disc_0001.iso` to confirm the image is internally consistent before touching media.
-4. **`extract`** (`commands/extract.py`) auto-detects the archive name from the first disc's `*.dar` filenames (no `-n` flag), then iterates discs interactively. For each disc: copy slice + sha512 sidecar (and the catalog on its first arrival) to staging in a single disc-read pass — par2 is **not** copied — then eject. The catalog is verified separately via `_verify_catalog_on_staging`: any failing catalog slice is deleted from staging so the **next** disc that carries it can re-fetch it (multi-disc catalogs converge in fewer disc-iterations than a stop-at-first-failure pass). Each slice is then verified via SHA-512 on the local copy. On corruption, the disc is re-mounted, just the par2 files for the affected slice are fetched, `par2 repair` runs in staging, and the slice is re-verified. If par2 cannot recover, the slice is kept as-is and recorded in `unrepairable_slices` — no prompt, no abort. After each disc the user is asked "Insert another disc?", allowing an early stop for partial restores (e.g. one disc lost). Once the user is done, `tools.dar.extract_sequential` does the final pass with a background thread feeding ESC bytes on stdin so dar's "missing slice" prompts auto-skip — a partial slice set still restores ~95% of files. dar 2.7 exits 0 even when per-file CRC errors occur, so the wrapper parses `Error while restoring <path> : Bad CRC` lines into a list. If `corrupted_files` OR `unrepairable_slices` is non-empty, `<output>/corrupted-files.txt` is written (listing both groups, with unrepairable slices flagged as "files originating from these may be corrupt even if dar didn't report them"), and `cmd_extract` exits with code 1 so scripts can detect a non-clean restore. The output dir still contains whatever dar managed to extract — best-effort, never silently corrupt.
+4. **`extract`** (`commands/extract.py`) is **chain-aware**: it restores all generations of a chain in a single invocation. Auto-detects the chain name from the first disc's filenames (via `parse_dar_filename`) plus per-disc generation. Discs from any gen, in any order, are accepted; the tool tracks per-generation state (`catalogs_verified: dict[int, bool]`, `gen_basenames: dict[int, str]`) because legacy pre-feature gen 1 archives have bare-name basenames (`<name>`) while new-format gens have `<name>-gen<N>`. For each disc: copy slice + sha512 sidecar (and that generation's catalog on its first intact arrival) to staging in a single disc-read pass — par2 is **not** copied — then eject. Per-gen catalog verification runs `_verify_catalog_on_staging` over slices matching `<gen_basename>-catalog.*.dar`; failing slices get deleted so the next disc of the same gen can refetch. Slice verification via SHA-512, par2 fetch + repair on damage (same per-slice logic as before). After each disc the user is asked "Insert another disc?". Once stopped, `tools.dar.extract_sequential` runs **once per generation in order**: Gen 1 into the empty output dir, Gen 2 with `overwrite=True` (passes `-wa` to dar) so its newer file versions replace Gen 1's, and so on. dar's chain-restore semantics handle deletions recorded in later generations. A background thread feeds ESC bytes on stdin so dar's "missing slice" prompts auto-skip — a partial slice set still restores ~95% of files. dar 2.7 exits 0 even when per-file CRC errors occur, so the wrapper parses `Error while restoring <path> : Bad CRC` lines into a list. If any gen's extract exits non-zero, `cmd_extract` aborts with a manual-retry hint. If `corrupted_files` OR `unrepairable_slices` is non-empty across all gens, `<output>/corrupted-files.txt` is written and `cmd_extract` exits 1. The output dir still contains whatever dar managed to extract — best-effort, never silently corrupt.
 
 **SSD-friendly tip:** pass `-w /dev/shm/bd-extract` (or any tmpfs path) to keep the staging copy in RAM. On a 25 GB-slice + 32 GB-RAM box this means **zero SSD writes for slice payload** during extract. Falls back to SSD staging automatically if `-w` is not given.
 
diff --git a/README.md b/README.md
index e1e950a..88f6d79 100644
--- a/README.md
+++ b/README.md
@@ -4,13 +4,17 @@ Archive data to Blu-ray discs with `dar` + `par2`.
 
 Four subcommands form a build-then-burn pipeline:
 
-- `create`   — Slice + compress source, build PAR2 recovery, assemble per-disc ISO images. No burning.
+- `create`   — Slice + compress source, build PAR2 recovery, assemble per-disc ISO images. Supports full archives and incrementals (via `--base`). No burning.
 - `burn`     — Burn pre-built ISO images to discs (resumable).
 - `verify`   — Check disc / directory / ISO integrity via PAR2. Exit code reflects state.
-- `extract`  — Restore archive from discs with auto-repair via PAR2.
+- `extract`  — Restore archive from discs with auto-repair via PAR2. Whole-chain mode: insert discs from any generation in any order; the tool walks the chain at the end.
 
 Optical drives are auto-detected from `/sys/block/sr*`: a single drive is used automatically, multiple drives trigger a picker. Pass `-D /dev/srN` to override.
 
+### Chain identity = archive name
+
+Incremental archives form a **chain**: a Full (Gen 1), then any number of incremental generations (Gen 2, 3, …) that record only what changed since the previous gen. The archive name from `-n` is the chain's identity — **use the same `-n` for every generation of the same chain**. Renaming between generations breaks chain detection at extract time. The volume label shows generation + disc number; the human-readable name in `-n` should be picked for the long term, even if its meaning drifts (an archive named `family-2024-batch1` can grow to hold years of new family photos — its name doesn't have to stay literally accurate, but it must stay literally the same).
+
 ## Installation
 
 ### System dependencies
@@ -161,17 +165,42 @@ If later (e.g. after some years) you want to verify a specific disc, just insert
 Exit codes: `0` OK, `1` repairable, `2` broken.
 
 
-### extract
-If you need the data back from the discs, execute:
-`bd-archive extract -o /path/to/output`
+### Adding an incremental generation
+
+Some time later you have a new batch of photos you want to add to the same archive. Rather than re-burning everything from scratch, build an **incremental** generation that contains only the delta:
+
+```bash
+bd-archive create \
+    -s /path/to/images \
+    -n "My_image_archive" \
+    --base /path/to/staging-dir/My_image_archive-gen1-catalog.0001.dar \
+    -o /path/to/gen2-staging-dir \
+    -c none
+```
+
+`--base` points at the previous generation's locally-persisted catalog (written into the previous output dir alongside `images/`). The tool diffs the current source against that catalog and archives only files that are new or changed. The new gen gets its own ISOs (typically far fewer discs than the full), its own catalog, its own output dir. Burn it like any other set:
+
+```bash
+bd-archive burn -i /path/to/gen2-staging-dir
+```
+
+You can chain as many generations as you want (`--base` always points at the most recent gen's catalog). The first lookup the tool does is **the archive name** — pass the same `-n` you used for Gen 1, otherwise `--base` refuses to proceed.
+
+#### Auto-defer with `--min-last-disc-fill`
 
----> TODO
+When the last disc of an incremental would be only sparsely filled (e.g. 1 GB on a 50 GB disc), you can tell bd-archive to push the newest files to a later generation so the current set rounds down to fewer discs:
 
-### add incremental
+```bash
+bd-archive create -s /path/to/images -n "My_image_archive" \
+    --base /path/to/gen1/My_image_archive-gen1-catalog.0001.dar \
+    -o /path/to/gen2 -c none --min-last-disc-fill 50
+```
 
-----> TODO
+`--min-last-disc-fill 50` says "the last disc must end up at least 50 % full". The tool iterates the newest-by-mtime files that are **not already in the base catalog** and defers them one by one until either the threshold is met or the candidate pool is exhausted. The deferred files stay in your source — they'll naturally appear as "new" the next time you create an incremental against this generation's catalog.
 
-### extract
+Without `--base` (i.e. on a Full archive), `--min-last-disc-fill` still works but defers files that **will not be archived until you do an incremental run later**. The tool warns loudly when you're in that mode.
+
+### extract — whole-chain restore
 
 ```bash
 bd-archive extract -o /path/to/output [options]
@@ -183,14 +212,20 @@ bd-archive extract -o /path/to/output [options]
 | `-D, --device`   | auto-detect                    | Optical drive. Auto-picks the only drive present; prompts if multiple. |
 | `-w, --workdir`  | `<output>/.bd-archive-work/`   | Staging dir for slices. Override to put scratch on tmpfs/RAM. Auto-removed on success when default. |
 
-The archive name is auto-detected from the first disc's filenames — there is no `-n` flag.
+The chain name is auto-detected from the first disc's filenames — there is no `-n` flag. Discs from multiple generations of the same chain may be inserted in any order; the tool detects each disc's generation from its filenames (`<name>-gen<N>.NNNN.dar`).
+
+Per-disc flow: copy slice + sha512 sidecar (and that generation's catalog, on its first intact arrival) to staging in a single read pass, eject, verify the staged slice via SHA-512. PAR2 files are **not** copied unless a slice fails verification — at which point the disc is re-mounted, just the par2 for the affected slice is fetched, and `par2 repair` runs in staging. If the catalog itself fails on a disc, the bad slice is dropped and re-fetched from the next disc that carries it.
 
-Per-disc flow: copy slice + sha512 sidecar (and the catalog, on its first intact arrival) to staging in a single read pass, eject, then verify the staged slice via SHA-512. PAR2 files are **not** copied unless a slice fails verification — at which point the disc is re-mounted, just the par2 for the affected slice is fetched, and `par2 repair` runs in staging. If the catalog itself fails on this disc, the bad slice is dropped and re-fetched from the next disc that carries it.
+After each disc, the tool asks whether to continue. Once you stop, it runs `dar -x` for each generation found in staging, in order: Gen 1 extracts into the (empty) output dir, then Gen 2 extracts on top with overwrite, and so on. Files modified in later generations replace the older versions; deletions recorded in later catalogs are honoured. Partial restores work too — losing all discs of one generation leaves a hole in the chain, but earlier and later gens still restore what they hold.
 
-After each disc, you are asked whether to continue — answer `n` for a partial restore (e.g. one disc lost). Once you stop, `dar --sequential-read` does the final extraction; dar's "missing slice" prompts are auto-skipped so a partial set still yields ~95% of files. Per-file `Bad CRC` lines from dar plus any slices that failed sha512 *and* par2 are recorded in `<output>/corrupted-files.txt`, and `extract` exits with code `1` so scripts can detect a non-clean restore. The output dir still contains whatever dar managed to extract — best-effort, never silently corrupt.
+Per-file `Bad CRC` lines from dar plus any slices that failed sha512 *and* par2 are recorded in `<output>/corrupted-files.txt`, and `extract` exits with code `1` so scripts can detect a non-clean restore. The output dir still contains whatever dar managed to extract — best-effort, never silently corrupt.
 
 For maximum throughput on SSD-hosted archives, point `-w` at a tmpfs path (`/dev/shm/bd-extract`) — a 25 GB slice fits in RAM and never hits disk during staging.
 
+#### Legacy (pre-incremental) archives
+
+Archive sets burned before this version's naming convention have slices named `<name>.NNNN.dar` (no `-gen<N>` segment). Extract handles them transparently as Gen 1. To extend an old set with an incremental: copy the isolated catalog off any of its discs (`<name>-catalog.NNNN.dar`) and pass that file to `--base` on a new `create` run — the new generation will be Gen 2 of the chain, with `<name>-gen2.NNNN.dar` filenames.
+
 ## Development
 
 ### Project structure

From 9e4fb7ae5401f9e3f6395577c946d8fe4a610ba3 Mon Sep 17 00:00:00 2001
From: Xitee1 <59659167+Xitee1@users.noreply.github.com>
Date: Tue, 12 May 2026 18:44:09 +0200
Subject: [PATCH 6/7] fix(create): review fixes for auto-defer + range
 validation + _layout

Addresses findings from the comprehensive code review of phases 2-5:

- Critical: NameError when the auto-defer pool is empty (incremental
  with --min-last-disc-fill where every source file is already in
  the base catalog). The fallthrough branch referenced new_n /
  new_last / new_fill which only existed if the loop body ran.
  Initialise them to the pre-defer layout before the loop, and
  split the "pool was empty" path from the "deferred everything to
  zero" path with distinct messages.

- Important: _layout(0) returned a nonsense 110% fill for the
  delta-empty incremental case (no new files; only catalog +
  par2 overhead on the disc). Add an early return for est==0
  computing fill from the fixed overhead so the preview makes sense.

- Minor: range-validate --min-last-disc-fill at the top of cmd_create
  so 150 surfaces "must be 0-100" rather than silently triggering
  the threshold-unreachable warning.

- Minor: hoist `from datetime import datetime` to the module-level
  imports (was inside the auto-defer print block) and split a long
  f-string to satisfy ruff's line-length rule.

Manual e2e for both regressions verified: empty-pool case now
produces a clean info message and proceeds with the original
layout, generating a tiny incremental that captures any deletions
(which is exactly what dar's incremental does in that scenario).
Range validation rejects 150 with a clear error.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/bd_archive/commands/create.py | 59 ++++++++++++++++++++++---------
 1 file changed, 43 insertions(+), 16 deletions(-)

diff --git a/src/bd_archive/commands/create.py b/src/bd_archive/commands/create.py
index fe851f5..779e3be 100644
--- a/src/bd_archive/commands/create.py
+++ b/src/bd_archive/commands/create.py
@@ -3,6 +3,7 @@
 import shlex
 import shutil
 import sys
+from datetime import datetime
 from pathlib import Path
 
 from bd_archive import __version__
@@ -69,6 +70,12 @@ def _resolve_base(base_arg: str, archive_name: str) -> tuple[Path, int]:
 def cmd_create(args):
     check_deps("dar", "par2", "mkisofs", "dvd+rw-mediainfo")
 
+    if not 0 <= args.min_last_disc_fill <= 100:
+        log.error(
+            f"--min-last-disc-fill must be 0-100, got {args.min_last_disc_fill}"
+        )
+        sys.exit(1)
+
     # Hard cap matches the pre-Phase-2 label format (32 - 5) so existing
     # archive names that lived right up against the old limit still work.
     # Names longer than ISO9660_LABEL_NAME_MAX (23) get truncated in the
@@ -177,6 +184,11 @@ def cmd_create(args):
 
     def _layout(est: int) -> tuple[int, int, int]:
         """(n_discs, last_disc_content, last_fill_pct) for a given archive size."""
+        if est == 0:
+            # Incremental with no new file data — catalog + par2 overhead
+            # still take up a disc, but the data portion is empty.
+            overhead = scan.catalog_est + PAR2_AND_MISC_OVERHEAD
+            return 1, overhead, overhead * 100 // sizing_target
         n = max(1, (est + slice_bytes - 1) // slice_bytes)
         last_sl = est - (n - 1) * slice_bytes
         if last_sl == 0:
@@ -210,8 +222,13 @@ def _layout(est: int) -> tuple[int, int, int]:
             )
         pool.sort(key=lambda f: f.mtime, reverse=True)
 
+        # Initialise loop-mutated state to the pre-defer layout so the
+        # "pool exhausted / threshold unreachable" fallback below has
+        # values to read even when the pool is empty (all source files
+        # are already in the base catalog).
         cum_size = 0
         reached = False
+        new_n, new_last, new_fill = n_discs, last_disc_content, fill_pct
         for f in pool:
             cum_size += f.size
             new_est = max(0, archive_est - int(cum_size * ratio))
@@ -228,22 +245,33 @@ def _layout(est: int) -> tuple[int, int, int]:
                 break
 
         if not reached:
-            log.warn(
-                f"--min-last-disc-fill {args.min_last_disc_fill}% not reachable; "
-                f"pool ({len(pool)} candidate file(s), {pool_kind}) exhausted "
-                f"after deferring {human_bytes(cum_size)}. Proceeding with "
-                f"what we have."
-            )
-            if archive_est - int(cum_size * ratio) > 0:
-                archive_est, n_discs, last_disc_content, fill_pct = (
-                    archive_est - int(cum_size * ratio), new_n, new_last, new_fill
+            if not pool:
+                # Nothing was deferrable (incremental + base already
+                # contains every source file). Keep original layout
+                # and let dar handle the delta-empty run — its
+                # archive will contain only deletion markers if any.
+                log.info(
+                    "Auto-defer pool empty (nothing new vs base); "
+                    "proceeding with the original layout."
                 )
             else:
-                log.error(
-                    "Deferring all candidates would leave 0 bytes to archive. "
-                    "Lower --min-last-disc-fill or skip the run."
+                log.warn(
+                    f"--min-last-disc-fill {args.min_last_disc_fill}% not reachable; "
+                    f"pool ({len(pool)} candidate file(s), {pool_kind}) exhausted "
+                    f"after deferring {human_bytes(cum_size)}. Proceeding with "
+                    f"what we have."
                 )
-                sys.exit(1)
+                new_est = archive_est - int(cum_size * ratio)
+                if new_est > 0:
+                    archive_est, n_discs, last_disc_content, fill_pct = (
+                        new_est, new_n, new_last, new_fill
+                    )
+                else:
+                    log.error(
+                        "Deferring all candidates would leave 0 bytes to archive. "
+                        "Lower --min-last-disc-fill or skip the run."
+                    )
+                    sys.exit(1)
 
     last_disc_free = max(0, sizing_target - last_disc_content)
     last_disc_free_raw = int(last_disc_free / max(ratio, 0.001))
@@ -284,12 +312,11 @@ def _layout(est: int) -> tuple[int, int, int]:
     if deferred_files:
         defer_bytes = sum(f.size for f in deferred_files)
         oldest_deferred = min(f.mtime for f in deferred_files)
-        from datetime import datetime as _dt
-
         log.step(f"Auto-defer (--min-last-disc-fill {args.min_last_disc_fill}%)")
         log.info(f"Files deferred:   {len(deferred_files)}")
         log.info(f"Bytes deferred:   {human_bytes(defer_bytes)} (raw)")
-        log.info(f"Oldest deferred:  mtime {_dt.fromtimestamp(oldest_deferred):%Y-%m-%d %H:%M}")
+        oldest_dt = datetime.fromtimestamp(oldest_deferred)
+        log.info(f"Oldest deferred:  mtime {oldest_dt:%Y-%m-%d %H:%M}")
         sample = deferred_files[:3]
         for f in sample:
             log.info(f"  - {f.rel_path}")

From 62e8340e16ef7d123b4e321fb055e4d38d478b4e Mon Sep 17 00:00:00 2001
From: Xitee1 <59659167+Xitee1@users.noreply.github.com>
Date: Tue, 12 May 2026 20:05:08 +0200
Subject: [PATCH 7/7] lint

---
 src/bd_archive/commands/create.py  | 22 +++++++++++-----------
 src/bd_archive/commands/extract.py |  4 +---
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/src/bd_archive/commands/create.py b/src/bd_archive/commands/create.py
index 779e3be..329f28d 100644
--- a/src/bd_archive/commands/create.py
+++ b/src/bd_archive/commands/create.py
@@ -71,9 +71,7 @@ def cmd_create(args):
     check_deps("dar", "par2", "mkisofs", "dvd+rw-mediainfo")
 
     if not 0 <= args.min_last_disc_fill <= 100:
-        log.error(
-            f"--min-last-disc-fill must be 0-100, got {args.min_last_disc_fill}"
-        )
+        log.error(f"--min-last-disc-fill must be 0-100, got {args.min_last_disc_fill}")
         sys.exit(1)
 
     # Hard cap matches the pre-Phase-2 label format (32 - 5) so existing
@@ -82,10 +80,7 @@ def cmd_create(args):
     # volume label only; filenames inside the ISO keep the full name.
     legacy_max_name_len = ISO9660_VOLUME_LABEL_MAX - 5
     if len(args.name) > legacy_max_name_len:
-        log.error(
-            f"--name '{args.name}' is {len(args.name)} chars; "
-            f"max {legacy_max_name_len}"
-        )
+        log.error(f"--name '{args.name}' is {len(args.name)} chars; max {legacy_max_name_len}")
         sys.exit(1)
     if len(args.name) > ISO9660_LABEL_NAME_MAX:
         log.warn(
@@ -194,8 +189,7 @@ def _layout(est: int) -> tuple[int, int, int]:
         if last_sl == 0:
             last_sl = slice_bytes
         last_content = (
-            last_sl + last_sl * args.redundancy // 100
-            + scan.catalog_est + PAR2_AND_MISC_OVERHEAD
+            last_sl + last_sl * args.redundancy // 100 + scan.catalog_est + PAR2_AND_MISC_OVERHEAD
         )
         return n, last_content, last_content * 100 // sizing_target
 
@@ -239,7 +233,10 @@ def _layout(est: int) -> tuple[int, int, int]:
                 break
             if new_fill >= args.min_last_disc_fill:
                 archive_est, n_discs, last_disc_content, fill_pct = (
-                    new_est, new_n, new_last, new_fill
+                    new_est,
+                    new_n,
+                    new_last,
+                    new_fill,
                 )
                 reached = True
                 break
@@ -264,7 +261,10 @@ def _layout(est: int) -> tuple[int, int, int]:
                 new_est = archive_est - int(cum_size * ratio)
                 if new_est > 0:
                     archive_est, n_discs, last_disc_content, fill_pct = (
-                        new_est, new_n, new_last, new_fill
+                        new_est,
+                        new_n,
+                        new_last,
+                        new_fill,
                     )
                 else:
                     log.error(
diff --git a/src/bd_archive/commands/extract.py b/src/bd_archive/commands/extract.py
index a776ea6..4fc963a 100644
--- a/src/bd_archive/commands/extract.py
+++ b/src/bd_archive/commands/extract.py
@@ -61,9 +61,7 @@ def _copy_disc_data(
                 shutil.copy2(cat_hash, dest)
 
     slices = sorted(
-        p
-        for p in mounted.glob(f"{disc_basename}.[0-9]*.dar")
-        if "-catalog" not in p.name
+        p for p in mounted.glob(f"{disc_basename}.[0-9]*.dar") if "-catalog" not in p.name
     )
     copied: list[Path] = []
     for sp in slices: