From 29d1cd921514c3b498b45998cfb08ea747d9a094 Mon Sep 17 00:00:00 2001
From: Efraim Feinstein <efraim.feinstein@gmail.com>
Date: Wed, 27 May 2026 00:00:06 -0700
Subject: [PATCH 01/10] miqra al pi hamasorah download

---
 README.md                                     |   7 +
 .../importer/miqra_al_pi_hamasorah/README.md  |  65 +++++
 .../miqra_al_pi_hamasorah/__init__.py         |   0
 .../miqra_al_pi_hamasorah/download.py         | 223 ++++++++++++++++++
 opensiddur/importer/util/pages.py             |  15 ++
 opensiddur/tests/fixtures/miqra_minimal.xlsx  | Bin 0 -> 6007 bytes
 .../miqra_al_pi_hamasorah/__init__.py         |   0
 .../miqra_al_pi_hamasorah/test_download.py    | 106 +++++++++
 pyproject.toml                                |   1 +
 uv.lock                                       |  23 ++
 10 files changed, 440 insertions(+)
 create mode 100644 opensiddur/importer/miqra_al_pi_hamasorah/README.md
 create mode 100644 opensiddur/importer/miqra_al_pi_hamasorah/__init__.py
 create mode 100644 opensiddur/importer/miqra_al_pi_hamasorah/download.py
 create mode 100644 opensiddur/tests/fixtures/miqra_minimal.xlsx
 create mode 100644 opensiddur/tests/importer/miqra_al_pi_hamasorah/__init__.py
 create mode 100644 opensiddur/tests/importer/miqra_al_pi_hamasorah/test_download.py
diff --git a/README.md b/README.md
index b1b8132..c997a3a 100644
--- a/README.md
+++ b/README.md
@@ -46,6 +46,13 @@ uv run python -m opensiddur.importer.jps1917.convert_wikisource \
   --project-dir ~/src/opensiddur-repos/opensiddur-projects/project/jps1917
 ```
 
+Example: download Miqra al pi ha-Masorah from Google Sheets into sourcetexts:
+
+```bash
+uv run python -m opensiddur.importer.miqra_al_pi_hamasorah.download \
+  --sourcetexts-root ~/src/opensiddur-repos/sourcetexts/sources
+```
+
 ## JLPTEI sources
 
 JLPTEI sources are compiled into the `project` directory.
diff --git a/opensiddur/importer/miqra_al_pi_hamasorah/README.md b/opensiddur/importer/miqra_al_pi_hamasorah/README.md
new file mode 100644
index 0000000..19b3cfe
--- /dev/null
+++ b/opensiddur/importer/miqra_al_pi_hamasorah/README.md
@@ -0,0 +1,65 @@
+# Miqra al pi ha-Masorah importer (download)
+
+Scripts to download [*Miqra according to the Masorah*](https://docs.google.com/spreadsheets/d/1mkQyj6by1AtBUabpbaxaZq9Z2X3pX8ZpwG91ZCSOEYs/edit) from its public Google Sheet and prepare per-tab TSV files for a future JLPTEI importer.
+
+## License
+
+The README tab of the source spreadsheet states that the text is prepared by Sefer Avi Kadish, based on Hebrew Wikisource material, and is licensed **CC-BY-SA 4.0 International**, with attribution to Hebrew Wikisource. See the downloaded `sheets/readme.tsv` for the full Hebrew and English wording.
+
+## Download
+
+Prerequisites: clone [opensiddur/sourcetexts](https://github.com/opensiddur/sourcetexts) (or use `<repo>/sources`).
+
+```bash
+uv run python -m opensiddur.importer.miqra_al_pi_hamasorah.download \
+  --sourcetexts-root ~/src/opensiddur-repos/sourcetexts/sources
+```
+
+Use `--dry-run` to print paths without downloading.
+
+Output layout:
+
+```
+<sourcetexts-root>/miqra_al_pi_hamasorah/
+  manifest.json
+  sheets/
+    torah.tsv
+    neviim_rishonim.tsv
+    …
+```
+
+The script downloads the workbook once as XLSX, splits each known tab to UTF-8 TSV, writes `manifest.json` (checksums and row counts), and deletes the temporary workbook.
+
+## Worksheet → file mapping
+
+| Tab | Output file |
+|-----|-------------|
+| שינויים changes | `changes.tsv` |
+| README | `readme.tsv` |
+| כתובים אחרונים | `ketuvim_aharonim.tsv` |
+| חמש מגילות | `chamisha_megillot.tsv` |
+| ספרי אמ"ת | `sifrei_emet.tsv` |
+| נביאים אחרונים | `neviim_acharonim.tsv` |
+| נביאים ראשונים | `neviim_rishonim.tsv` |
+| תורה | `torah.tsv` |
+| תבניות templates | `templates.tsv` |
+| מיוחד special | `special.tsv` |
+| AutoEdits | `auto_edits.tsv` |
+
+## Biblical text columns
+
+On the six biblical-book tabs (Torah, Nevi'im, Ketuvim, etc.), each data row uses:
+
+| Column | Role |
+|--------|------|
+| A | Page key (e.g. `ספר בראשית/א`) |
+| B | Row id (`0` = section header; Hebrew letters = verses) |
+| C | Navigation / header wikitext |
+| D | Verse scaffolding (`{{מ:פסוק|…}}`) |
+| E | Pointed Hebrew text and `{{נוסח|…}}` templates |
+
+Content is Hebrew Wikisource-style wikitext, related to the [JPS 1917](../jps1917/) importer pipeline.
+
+## Importer status
+
+Only the download step is implemented. A JLPTEI converter will read `sheets/*.tsv` in a later change.
diff --git a/opensiddur/importer/miqra_al_pi_hamasorah/__init__.py b/opensiddur/importer/miqra_al_pi_hamasorah/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/opensiddur/importer/miqra_al_pi_hamasorah/download.py b/opensiddur/importer/miqra_al_pi_hamasorah/download.py
new file mode 100644
index 0000000..342ba83
--- /dev/null
+++ b/opensiddur/importer/miqra_al_pi_hamasorah/download.py
@@ -0,0 +1,223 @@
+"""Download Miqra al pi ha-Masorah from Google Sheets into per-tab TSV files."""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import hashlib
+import json
+import logging
+import sys
+import tempfile
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+import requests
+from openpyxl import load_workbook
+
+from opensiddur.importer.util.pages import (
+    default_sourcetexts_root,
+    miqra_al_pi_hamasorah_data_directory,
+    miqra_al_pi_hamasorah_sheets_directory,
+)
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+SPREADSHEET_ID = "1mkQyj6by1AtBUabpbaxaZq9Z2X3pX8ZpwG91ZCSOEYs"
+SOURCE_URL = (
+    f"https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/edit"
+)
+EXPORT_XLSX_URL = (
+    f"https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/export?format=xlsx"
+)
+USER_AGENT = (
+    "OpenSiddur-AI/1.0 (https://github.com/opensiddur/opensiddur-ai; "
+    "opensiddur@example.com)"
+)
+
+# Exact worksheet titles from the workbook → output slug (without .tsv).
+SHEET_SLUGS: dict[str, str] = {
+    "שינויים changes": "changes",
+    "README": "readme",
+    "כתובים אחרונים": "ketuvim_aharonim",
+    "חמש מגילות": "chamisha_megillot",
+    "ספרי אמ\"ת": "sifrei_emet",
+    "נביאים אחרונים": "neviim_acharonim",
+    "נביאים ראשונים": "neviim_rishonim",
+    "תורה": "torah",
+    "תבניות templates": "templates",
+    "מיוחד special": "special",
+    "AutoEdits": "auto_edits",
+}
+
+
+def _cell_value(value: object) -> str:
+    if value is None:
+        return ""
+    return str(value)
+
+
+def _worksheet_rows(worksheet: Any) -> tuple[list[list[str]], int, int]:
+    """Return (rows, row_count, max_columns) for a worksheet."""
+    rows: list[list[str]] = []
+    max_col = 0
+    for row in worksheet.iter_rows(values_only=True):
+        cells = [_cell_value(c) for c in row]
+        while cells and cells[-1] == "":
+            cells.pop()
+        if not any(cells):
+            continue
+        max_col = max(max_col, len(cells))
+        rows.append(cells)
+    if max_col == 0:
+        return [], 0, 0
+    padded = [cells + [""] * (max_col - len(cells)) for cells in rows]
+    return padded, len(padded), max_col
+
+
+def _write_tsv(path: Path, rows: list[list[str]]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open("w", encoding="utf-8", newline="") as f:
+        writer = csv.writer(
+            f,
+            delimiter="\t",
+            lineterminator="\n",
+            quoting=csv.QUOTE_MINIMAL,
+        )
+        writer.writerows(rows)
+
+
+def _sha256_file(path: Path) -> str:
+    digest = hashlib.sha256()
+    with path.open("rb") as f:
+        for chunk in iter(lambda: f.read(65536), b""):
+            digest.update(chunk)
+    return digest.hexdigest()
+
+
+def _split_workbook(xlsx_path: Path, sheets_dir: Path) -> list[dict[str, Any]]:
+    sheet_entries: list[dict[str, Any]] = []
+    workbook = load_workbook(xlsx_path, read_only=True, data_only=True)
+    try:
+        for worksheet in workbook.worksheets:
+            title = worksheet.title
+            slug = SHEET_SLUGS.get(title)
+            if slug is None:
+                logger.warning("Skipping unknown worksheet: %r", title)
+                continue
+            rows, row_count, col_count = _worksheet_rows(worksheet)
+            out_path = sheets_dir / f"{slug}.tsv"
+            _write_tsv(out_path, rows)
+            rel_path = f"sheets/{slug}.tsv"
+            sheet_entries.append(
+                {
+                    "name": title,
+                    "slug": slug,
+                    "path": rel_path,
+                    "rows": row_count,
+                    "columns": col_count,
+                }
+            )
+            logger.info("Wrote %s (%d rows, %d columns)", out_path, row_count, col_count)
+    finally:
+        workbook.close()
+    return sheet_entries
+
+
+def download_miqra(
+    sourcetexts_root: Path | None = None,
+    *,
+    dry_run: bool = False,
+) -> None:
+    """Download the spreadsheet and write per-tab TSV files plus manifest.json."""
+    data_dir = miqra_al_pi_hamasorah_data_directory(sourcetexts_root)
+    sheets_dir = miqra_al_pi_hamasorah_sheets_directory(sourcetexts_root)
+    manifest_path = data_dir / "manifest.json"
+
+    if dry_run:
+        logger.info("Would download %s", EXPORT_XLSX_URL)
+        logger.info("Would write TSV files under %s", sheets_dir)
+        logger.info("Would write manifest to %s", manifest_path)
+        return
+
+    data_dir.mkdir(parents=True, exist_ok=True)
+    sheets_dir.mkdir(parents=True, exist_ok=True)
+
+    headers = {"User-Agent": USER_AGENT}
+    logger.info("Downloading %s ...", EXPORT_XLSX_URL)
+    response = requests.get(EXPORT_XLSX_URL, headers=headers, timeout=300)
+    response.raise_for_status()
+
+    tmp_path: Path | None = None
+    try:
+        with tempfile.NamedTemporaryFile(
+            suffix=".xlsx",
+            delete=False,
+            dir=data_dir,
+        ) as tmp:
+            tmp.write(response.content)
+            tmp_path = Path(tmp.name)
+
+        logger.info("Splitting workbook into TSV files ...")
+        sheet_entries = _split_workbook(tmp_path, sheets_dir)
+
+        for entry in sheet_entries:
+            tsv_path = data_dir / entry["path"]
+            entry["sha256"] = _sha256_file(tsv_path)
+
+        manifest = {
+            "spreadsheet_id": SPREADSHEET_ID,
+            "source_url": SOURCE_URL,
+            "export_url": EXPORT_XLSX_URL,
+            "downloaded_at": datetime.now(timezone.utc).isoformat(),
+            "sheets": sheet_entries,
+        }
+        manifest_path.write_text(
+            json.dumps(manifest, ensure_ascii=False, indent=2) + "\n",
+            encoding="utf-8",
+        )
+        logger.info("Wrote manifest to %s", manifest_path)
+    finally:
+        if tmp_path is not None and tmp_path.exists():
+            tmp_path.unlink()
+            logger.info("Removed temporary workbook %s", tmp_path)
+
+
+def _build_arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description=(
+            "Download Miqra al pi ha-Masorah from Google Sheets into per-tab TSV "
+            "files under <sourcetexts-root>/miqra_al_pi_hamasorah."
+        )
+    )
+    parser.add_argument(
+        "--sourcetexts-root",
+        type=Path,
+        default=default_sourcetexts_root(),
+        help=(
+            "Root of the sourcetexts tree; output is written under "
+            "<root>/miqra_al_pi_hamasorah (default: <repo>/sources)."
+        ),
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Log actions without downloading or writing files.",
+    )
+    return parser
+
+
+def main(argv: list[str] | None = None) -> int:
+    args = _build_arg_parser().parse_args(argv)
+    download_miqra(args.sourcetexts_root, dry_run=args.dry_run)
+    return 0
+
+
+if __name__ == "__main__":
+    try:
+        sys.exit(main())
+    except Exception as e:
+        logger.error("Error downloading Miqra al pi ha-Masorah: %s", e)
+        raise
diff --git a/opensiddur/importer/util/pages.py b/opensiddur/importer/util/pages.py
index 117d25a..6f33d4e 100644
--- a/opensiddur/importer/util/pages.py
+++ b/opensiddur/importer/util/pages.py
@@ -29,6 +29,21 @@ def jps1917_credits_directory(sourcetexts_root: Path | None = None) -> Path:
     return jps1917_data_directory(sourcetexts_root) / "credits"
 
 
+def miqra_al_pi_hamasorah_data_directory(sourcetexts_root: Path | None = None) -> Path:
+    """Miqra al pi ha-Masorah raw dumps: <sourcetexts-root>/miqra_al_pi_hamasorah."""
+    root = (
+        sourcetexts_root.resolve()
+        if sourcetexts_root is not None
+        else default_sourcetexts_root()
+    )
+    return root / "miqra_al_pi_hamasorah"
+
+
+def miqra_al_pi_hamasorah_sheets_directory(sourcetexts_root: Path | None = None) -> Path:
+    """Per-tab TSV files from the Google Sheet export."""
+    return miqra_al_pi_hamasorah_data_directory(sourcetexts_root) / "sheets"
+
+
 def get_page(page_number: str | int, sourcetexts_root: Path | None = None) -> Optional[Page]:
     """Return the wikitext of the given Page, or None if it does not exist."""
     page_num = int(page_number)
diff --git a/opensiddur/tests/fixtures/miqra_minimal.xlsx b/opensiddur/tests/fixtures/miqra_minimal.xlsx
new file mode 100644
index 0000000000000000000000000000000000000000..fbb6dd8bb4d6aebf56d3a13c391993062cb20d9d
GIT binary patch
literal 6007
zcmZ`-1yqxL-ySi#5tLC%Bcr=PV3g7&ARsVcbPEU)(kUGhBBcV-t&@;Z!hq2sNGl*I
z4Zg$YdtV><o@eLW_kDKGu515aU-wT-9Si#!006)PjKP~tAzk-WQqkY~(H9x|vUai3
za(8j{;C<-o%H!kYcn^7xu$_+xXa0kZcSB}0Lm`Qxbad`JB#&2EJ*QLX$<`*eth29g
zH|<NFC?>Z045b~OB_~0FE@HVc<`bE|-u&qNrN?jiXM<y_K%|9?3|Jj6oyLR0_=&_m
zEi_bq9pQo4dLh&9JgSz^X6slm5|XH5XDpqeBOqJ#W0i@aR2NU@0{)EA?W5AoUFmf>
zvj%>^ZN9GchY22o$jmMD(YY`I0P??^VCCWty|ke}sm;EFk0{(6dX&qdiXyuEtq-BW
zO=K`J(LZ7DK+TRAQ#m>CcqamNB&{qcn#+?3HePN7Ny(hH<+DHi;#~cJvC*B@o5`K-
z<||5Ln%#^wt35s+E>4+q@@zfE-AFAeB`j)HdsPY>ixp}n%5no!-xQ%ELbFofmP|mp
zg)xS=T~!GHYKBR<r}!whnc6vs2D0XK#Ph-|^7RS3h*!{qBwaprd%7-v(<0NE{LqHk
z1J?5AnKI8ds1W9Dt`d?nmFr96qaBMEQBiMRWtAD(j-JEqj+$4z&ShFIwp;M0<)1%2
z?m`&K54BgckeUDKolMhgW-=TAAe$WkphSDe$C2054(bH`>&}1apG{*Ek14ThPqr!s
zeQXb$2oWN|^|h29Fz@Nw%+O5@#xQcDI7eUW#L&PG$|7WaX~dgJ_nMzeVS~S(4k#(F
zu8H~Xedn=U3y)0M^|6@bogQmx;Qc&rD8NE4&qq^IvE52gbl0rT7zXGN3+SPw=$HwK
z!Bh9S);DH4Zvx_F9{W~1J!=pObIiY?Y;6%>2DXzM)YCS0F~3>6%qVz7)u$H+Ex@2O
zlxJ&-tld3!>^z(LECWS;m7o_0cIKlsx*0g&QnI9B9Qj4B{bQ@Wt*n7V*U+#^P)JWe
z3b0xApa4aMCqOJIkz+LA?SJrbGcUGx%2UV*G{3mxBUaxx9OFudJ+Ix8+n%%U_@Pi9
zzqSB2XgzT-^)|8iO`NVK>M&q#{kdH>L7a&u%Hh#0Dn1HPKIlUX-_eZdcpHT%9rRp>
zv9X`ObtwEwZFdK<xb8<mRktjMx(P6H^&a~)jFo1_dE%7<nxt=;l{eis`Ys+teu3!3
z9)k_jqzOUXbDjZ70HPm<?rQm0$k#{Dw<isoJ{v=F)@gz8vbuwRD2LqYoT)hfk>1i%
zDCn0HATi*O5(q2BDA{azj!ZuDSX;^~Ngls<XTbFx7=P!K+2mmayR#&vy9Ax>9e?D(
zL1GcB0CMY?U|mDXmeC-s@&OXLlpE{L+%kJ$6mX!a?94p=y0wOBJO#p{l-JGOyhJWm
zr%Eh9DT|jAo?)*=hgpcvpOznfREsDhpQj9Hfa7(~#YG;Lx@1Wk<{PO_g&vVXKnvL(
zD;`6Rr?^Fsp#%MQGX?2JR2b<u=7in|gFDB3Qd@V*v!&=MDf&|gapc3(S=cCEi$A%i
zLV3YeG<A`UlG{$<WjRqxOn}uemw2-EuL*+MAQiM;5q85;eiy*wY+(YrL|S@GTiP1P
ztx#CYHbVtVd{%ioIoBN-OBxZ7dmVLfz1!<h3u6kzkdMZ2E&18~GT~$yi|&p7M6ZFd
zL=y5OGg0(!2V$O)e_#j~Q_I?X>1Kp|o@>^j+O`(bXLH{8Bz#oR5gdQ0JaS!aiIlBG
z_N#ypZb8<4{P&>-_IYn2gk)YZ2oL~wniyo3{d@>S(-7~p-iAEJ_&GEs+Fp7T(kdRv
z6M!t*YTB31A8rt34zO)Q#h84CIGcPteak;JF2uuOW$51D-KAHN%sC;R*Sc|TCzHFg
zc{0foT9I|1o0<BVdT@<AOXFEg@AmDF_-ocCtoVI;D5z*OMeZG1=dZdy)KbTnE1EbO
zrA(H-R^)rifRV>i=Bu^+gn|kU?&{y_Q{Z)MVru(UpIkK<`iLZLrIP6DAHAR#K0SEg
zy~JF^r<<<O2)YBua%#?*kr2zxNNgfN={$4kIR4t{16mND#BK`6Fmzg-7#ccVZPTtS
z*pf=4?~2@gdFy+Acm}dVA;R~ud4_E&LQCkHlVceR7}8+TIKBhEHg9dg{Q~!Cg*N7{
zj=lxI2?C4j*-QVsu}W*MtLdiZi$h6^)5Xv_=L*h8G)_!9;_a{uO54)j&(NwieR`z6
z$!;awUb>b_19@@3ygQGGT!WgBLd7cOda>HeJsMs3Dwpa<eX$Qpg7p;XH{LC-bh&Bh
zHd1)klVS=qwTQp=gYw%J@6K@(;e$;g?^7(?d$2jW`aJusACvb<`lB|B-E-F-WzuEI
zRrCD^3U8kr%r2fdCrhf_&Gzji$M96bWKN~Sp}sD#r1d@N84e{ze@c(yHfD%*F<nr`
z3E}%)q9#tNfpQt~Kqc_ga1FCzR*GxUl|;Z>*WDMlwvz21#g3nVgPlV41guA|_bNoS
zNej9wOz5%hy`5qs44Yoi>u&+U*(}sM{G4;R*?&%(jY!vCmxk;WWX?0xM0h;+Yiw64
z`)M@Ba4aw<?m8{;8Ov;i60A`8q%9yiI*7c~65d>)dCZzES^Ib=_~!GovFXBt*?A)W
zky7I#Yc6tGbzFbOBpj<)lYJrcglm@N#Z~2qDSjf~Afr8^d>gU32&F8L;Al*lcpQO?
z{HWZr?DPn+-2t%>T_VtoKeJw>U`c~OYUW~#xU&6Y{Flv`4;X(AkTWueI1~l|0LKOZ
zZu~Vs9=W(Xc-TRqo*ulv9>1nYr{1)4HXqqJ)smWTY%q8uEM~+}r;aq$AZkhmVs@i7
zx^3jZ#)S3waB77~M}DM1rawD#CtDoL+ka(2+W^petX~6Rd!_Sc_93{2z@CZMmoRSN
zn(2rlj6)4<g2B=dVILkaPV9Ei?&<IZf`lpVSe1XORbFgYZ(Q*Vj)Oqr?+%hR7C?*c
z0!rM})f#bsluP5qZBMY42n!|cBFME^)(RS0e%yP{eWdXP%nr9op%V`jnpe-gIZlL4
zBmk8lbIP?Kjm?Dk#pVTQ3h87$)eT&`=&>I+j#q5Ytzi^0)DIEq?#aK&Ui|&&Sa9Dj
z#I2R~r3#4fiFfEwCL+^~sNrov+$8cng@lH1q8hJ^PcW_Gm}A+_S@)24-6YB2OBrA2
zkoRo9e(0%nY%29bzlE~ZcV-Kow$X<%SOQeS*&;NO%dXn?a=CYjKH$SQBkRuGa4#CM
ze#&ml(+`9(pB6Ebk_wKBs*fU(1__LtY|Rp>rJMSq&DY<bwe+UTh);}<XSzAhjXL-m
z$lKj(%&ZRoOys*}Tyya_rKuv0J@7-*q<ItvjF}!MjG_CzDQ-9Utkc(^h4KKJcvDU@
zlZIhZB0Sv}d%#tDz)E$zrojpBpNY`!qNFL0P6Sc3&ia=`5V({RUOMs`9eiYG(aX3a
zoH`i#8Z5@4^Fexbm;=t$s%2B3EnHMjA8*t~^#<#Dk1b~Acv&Z0kQoCk*&R%~ebq;}
z!|<Sl24OVb^9!nRpKnJGiZXdJ=n0!vLuR0p?<>_L&ARg&cEUa~(`;HS-WM}2J@W<P
zoMt1BCnmlX7mbjGZRlZ1zDRIhWTr?~E8=c^F~m>mHITQ`H`>aGmj^m=)vZafPViBg
zFYonD!@<O2Wd%~n%2wi7C27$up1$c}@AKTLXBagP-{9{v0~!?^4^!2N2QHO`7}FCw
zRRxAQ<aP?p*D&d;BuMXZ+TQBZW7+CP@od^E%N%$Ky<A`cDji}g`m$DBFz5YDQu3r>
z|1eRU3g?=$yPgp&5p7))0kpjkSOJoGcO-8l;%#cx??Btw??_dU!?0oMs1G`4n18w*
zckPu&Fxu_=|L%6dOScE;OrzaSbe?)!MJlPjthK(LVvz?UD<f#N%xgTdkTXj8%T`oN
zIf!E0nz2hp#yi^^0dE2Uo@$nZ(yKsKIj-NMLA9>R#bJmKdI<YqhAFB?<v{HNCF312
zBF#qX>W98lT`MA*{h(Aw3uml`G2@8}8V(c`Y}wM9Y&xofky}!3)P}j;lw*=`)T*OK
z9G6^-57wiROkk_Y*gXxsj-NBVhm~aw`$QYuB4Bk39}@_qc)GoU#Usbbv@7trA=}YT
zHSF{v{=7MF7Fm6bgeTT7m353p(n=mb)riYqV3yl)RTU>V9aAaS7zIpQ`Gg9g`?k4f
zt#Eq(#Sgcyv=5*7kg<O08F@Z5nQ@n*QyP@#JZP8YzJJ1zbKRtQ(%jQ9LG<uZ%yb^>
zOeqaZ=hMO?MpasJRnK=NP@(VI6bzc|A7CdjTV~W3vJZEkbHEz}n<H=^8dkdnhxB@c
zDVN|AZrF6u1{H5^!C+)@-)E=tSdG-Sw^I2VvGzMV@>l2n44eO6!HhOKWC!>F0P$aK
z)5Fu(5$bUnt678T=;_N|`GxdVa)z**>aLc8MLsuS3u%D9r8xQ8Lho+U{<<WyxOYwI
zeU5<Ad)uGra++vuQKzL8NZVUdEq2foC+`FJY4+;*4p?YQc2eVpobkHQ;frS7&yO^=
zi<OfMj_s>BJo<}zylDm3-XBOyPPH95)pzgC7T4L-gKO-93QP$PdPhY{o04$ZJMO<N
zYKH6)5G$9KSuyC4tKg{r<ZADG^)Rh~kvq=I?vqvM_FCI(J$p5aRHY7Ql-pLA;A1UJ
z_8W8Zjw$AE>8)Me&zeo8tp#sM-Ry2_hBUTF6u=s^U;dDD-StG2G(3oZacJ;#P~}N=
zcm$nutTv!QIe$bjeoguD>gjOx#CjLb^ry;^=Xm&WmY$SU`#(z>)=u+gt2N-pKcPYP
zcLxJW#$wpnjeYS?iry#4hLJ?gDCLzArCU7@=*rQpA@sgcxH}v>bzdZoZ_6I*FqhHd
z_10O(F{IXC>jH||FbNK%jHB}u&hcxnM(Ea@QQ0<vOZr2NhJ_ZGvi$UBzmh59U@Fq{
zvb<9|_@c&qjolK4&3~ID6{9l>e0$)h9x|(p-BQnaou!&h+#!kbK4&3`C>4AE>v=Up
zV|^v<G(F{YiA?!>Kgz*7=?9SIaH88JkdqcO6slNsX;J!x&B;)>R_gF`6@9fsP@?DD
zGj08^FkQE2^7n$L=wtKZpGu~#Tmx#P_Qv@=g%!Txz<$L4z5uG~S@Ka*n(-SqXY?C2
z=Ex#J4g!L<hD69KB0gLK&ggA=|2^qmm$}#e7bOj+#DC^qdUOk2D>~;W(fLP+eww>O
z9X)t?F7G)>P0r{pAs5&=n9GMdcq>v6x<ie9jA8JSbb<mHt(5bjVyejP86l0Yv=qe*
z159pHtgt@j^^J2f9hmWGBZf*nYXWQ_@y^>3FoST!u$<zwl|rR4SVoSA#-`!DHFb$c
zp<$%ycutY@G_R(-OHhTxUAH?h6?H+9?f&6Ffx|r_VWnj?=2R1q4{G_sdtP-YpUH?i
zOQ{F-lD;UAzU$x_>T87+Q>~xL+OeCR9zpz!6>F@UW$lG<NDt}86TqK_e)pZ@wn6ts
zc`N{c^sgej)FzfLE)JJvR+nV1(Jn`Zs2JYBaKvCu)S_U1#d2}$)jQtJ6pe{lnD=$9
zf&52&W|8Y+PN<2Gp-1f~Z}_DDd4Gf8OyUFHsIj)^28d3kL->u*e0BH1;T&y@a7mi`
z+|udZRL$)ncOQ_<*AcX6kC4_WGDA8L4mKU$@(dIS)<)nkVEe%NL7%|H%MRAt`&(77
z;8c-v1@du^RjkOPT9<2Em}4Fbn7vv3n50v+HWCLkO6XxcZVL5JR5#=_<5^9Tw;W2C
z;8R%l_bS*;|I`SCn;Oz;ioTMXsM>dRfa$)tpDH4yGyy8;%@<!M!#$AY*GYSbX{9Ct
zimmXdeW50|=nl@tiya}0={Wqdx9xSrp}Y`y7v(2~G%c^(05m-;fhqY&WEs|o)Cy+n
z#*{n`EqQF5d%R?S#s{m~kFVT+GwzTJ4kIEiOxRd7TK%DFBf^nzd-40R(rVJUb-{?R
zgXT)E^^~Jr&HDnzKSR>u_-&&Z9SRUSB%r@S@>lx(Jutsgu`^km<S8GSQbmYAAe>c|
z977*86-j8s2ezzQ&g5`CCF5w-i580NJ+V4IH;gS{_fDb})|$0*N5E-0sD*3bSXyRQ
zygmA$h~r|t;&drx-QYA|F0RnlJ!gvpqC~~V#mV<TKX1LMc69fDbvC-#D%-kkP+SB`
z3`<tkz14+@aHgdMM{EsPe<@S~-!(qM9H1ru=g<4+4^j}2USrCYDD1cCH#lEz^~;@G
z7=wTGvg7Ufu?`qBRFV19;^H$k6J~T0&Y=bJ<%Y#X!Nu7V>g;K*=j#gfcyO7B{VB-H
z*^DqR`sQUX5dIOPyaix55quplt_9LB+xdCzGeg5HTQH<kh^0};-Z4<ND=%qzT?W%8
zr-q^L-L2?D9m~-mU5-20$V3U2lE^?51fpMOkLAvvH`jC92U8RH19WVEj8`{h+?y-$
z8A9ow$zGXZOKN3_yzN0%Ae_n+-<LCP&e0isv(de~4WXFrc83Q^angMlq%hk@q9XvR
zu2b7DqqFT9P%C^7yoRE%^&|3fx;>abCBZ{}gOVeT7$D9C8JZNz^l9gN;_`NdR6tj5
zz_f%ckzU<Z{JWj1`2$7j5Jj7lNOmT5^8lH(*|8Ua-77orh!K(}FX+<NZO<kblP|&o
zz!$2F4S^F~CU~1J=1otiG-v{s`o26c2|yP%2Ie)ae_u?Z9sBDOg!c6RUsGO1U)>4*
zg#`eDFgpH@{_lqHD*Wow>^J-i`tSc?g?5$T>PyrA2!fgLei8iRE7Vn%tBU(K3m3sZ
zvi#EDS6QwK$lok4(Zd04%+<5}l9N|iuIicpv0&Us%NsPyZ*_AOdUYKAhK{0}_TPrn
zRp8b3{u>BK@2dar<o|8<SHV{s(QmLe#V_!`w56*&SIhf1j{>^d|Do8m)Pd-I>*eGn
O1oWaOaQbCZ1O5YC8*J48

literal 0
HcmV?d00001

diff --git a/opensiddur/tests/importer/miqra_al_pi_hamasorah/__init__.py b/opensiddur/tests/importer/miqra_al_pi_hamasorah/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_download.py b/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_download.py
new file mode 100644
index 0000000..6e4c498
--- /dev/null
+++ b/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_download.py
@@ -0,0 +1,106 @@
+import json
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+from opensiddur.importer.miqra_al_pi_hamasorah import download
+from opensiddur.importer.util.pages import (
+    miqra_al_pi_hamasorah_data_directory,
+    miqra_al_pi_hamasorah_sheets_directory,
+)
+
+FIXTURE_XLSX = (
+    Path(__file__).resolve().parents[2] / "fixtures" / "miqra_minimal.xlsx"
+)
+
+
+class TestDownloadMiqra(unittest.TestCase):
+    def setUp(self) -> None:
+        self.tmp = tempfile.TemporaryDirectory()
+        self.sourcetexts_root = Path(self.tmp.name)
+
+    def tearDown(self) -> None:
+        self.tmp.cleanup()
+
+    def _mock_response(self) -> MagicMock:
+        response = MagicMock()
+        response.raise_for_status = MagicMock()
+        response.content = FIXTURE_XLSX.read_bytes()
+        return response
+
+    @patch("opensiddur.importer.miqra_al_pi_hamasorah.download.requests.get")
+    def test_download_writes_tsv_and_manifest(self, mock_get: MagicMock) -> None:
+        mock_get.return_value = self._mock_response()
+
+        download.download_miqra(self.sourcetexts_root)
+
+        data_dir = miqra_al_pi_hamasorah_data_directory(self.sourcetexts_root)
+        sheets_dir = miqra_al_pi_hamasorah_sheets_directory(self.sourcetexts_root)
+
+        torah_tsv = sheets_dir / "torah.tsv"
+        readme_tsv = sheets_dir / "readme.tsv"
+        self.assertTrue(torah_tsv.is_file())
+        self.assertTrue(readme_tsv.is_file())
+        self.assertFalse((sheets_dir / "unknowntab.tsv").exists())
+
+        torah_lines = torah_tsv.read_text(encoding="utf-8").splitlines()
+        self.assertEqual(len(torah_lines), 2)
+        self.assertIn("בְּרֵאשִׁית", torah_lines[1])
+
+        manifest_path = data_dir / "manifest.json"
+        self.assertTrue(manifest_path.is_file())
+        manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
+        self.assertEqual(manifest["spreadsheet_id"], download.SPREADSHEET_ID)
+        slugs = {s["slug"] for s in manifest["sheets"]}
+        self.assertIn("torah", slugs)
+        self.assertIn("readme", slugs)
+        for entry in manifest["sheets"]:
+            self.assertIn("sha256", entry)
+            self.assertEqual(len(entry["sha256"]), 64)
+
+        xlsx_files = list(data_dir.glob("*.xlsx"))
+        self.assertEqual(xlsx_files, [])
+
+        mock_get.assert_called_once()
+        call_kwargs = mock_get.call_args
+        self.assertEqual(call_kwargs[0][0], download.EXPORT_XLSX_URL)
+        self.assertIn("User-Agent", call_kwargs[1]["headers"])
+
+    @patch("opensiddur.importer.miqra_al_pi_hamasorah.download.requests.get")
+    def test_dry_run_writes_nothing(self, mock_get: MagicMock) -> None:
+        download.download_miqra(self.sourcetexts_root, dry_run=True)
+
+        data_dir = miqra_al_pi_hamasorah_data_directory(self.sourcetexts_root)
+        self.assertFalse(data_dir.exists())
+        mock_get.assert_not_called()
+
+    @patch("opensiddur.importer.miqra_al_pi_hamasorah.download.logger")
+    @patch("opensiddur.importer.miqra_al_pi_hamasorah.download.requests.get")
+    def test_unknown_sheet_logs_warning(
+        self, mock_get: MagicMock, mock_logger: MagicMock
+    ) -> None:
+        mock_get.return_value = self._mock_response()
+        download.download_miqra(self.sourcetexts_root)
+
+        warning_calls = [
+            c
+            for c in mock_logger.warning.call_args_list
+            if "UnknownTab" in str(c)
+        ]
+        self.assertEqual(len(warning_calls), 1)
+
+    def test_main_dry_run_exit_code(self) -> None:
+        with patch(
+            "opensiddur.importer.miqra_al_pi_hamasorah.download.download_miqra"
+        ) as mock_download:
+            code = download.main(
+                ["--dry-run", "--sourcetexts-root", str(self.sourcetexts_root)]
+            )
+        self.assertEqual(code, 0)
+        mock_download.assert_called_once()
+        self.assertTrue(mock_download.call_args.kwargs["dry_run"])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/pyproject.toml b/pyproject.toml
index 07982ea..8fdfecb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,6 +44,7 @@ dependencies = [
     "pyppeteer>=2.0.0",
     "diff-match-patch>=20241021",
     "pydantic>=2.11.7",
+    "openpyxl>=3.1.5",
 ]
 
 [project.urls]
diff --git a/uv.lock b/uv.lock
index 8f7acaa..60f0375 100644
--- a/uv.lock
+++ b/uv.lock
@@ -605,6 +605,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e1/5e/4b5aaaabddfacfe36ba7768817bd1f71a7a810a43705e531f3ae4c690767/emoji-2.15.0-py3-none-any.whl", hash = "sha256:205296793d66a89d88af4688fa57fd6496732eb48917a87175a023c8138995eb", size = 608433, upload-time = "2025-09-21T12:13:01.197Z" },
 ]
 
+[[package]]
+name = "et-xmlfile"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234, upload-time = "2024-10-25T17:25:40.039Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" },
+]
+
 [[package]]
 name = "executing"
 version = "2.2.1"
@@ -1840,6 +1849,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7d/32/37734d769bc8b42e4938785313cc05aade6cb0fa72479d3220a0d61a4e78/openai-2.33.0-py3-none-any.whl", hash = "sha256:03ac37d70e8c9e3a8124214e3afa785e2cbc12e627fbd98177a086ef2fd87ad5", size = 1162695, upload-time = "2026-04-28T14:04:40.482Z" },
 ]
 
+[[package]]
+name = "openpyxl"
+version = "3.1.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "et-xmlfile" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464, upload-time = "2024-06-28T14:03:44.161Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910, upload-time = "2024-06-28T14:03:41.161Z" },
+]
+
 [[package]]
 name = "opensiddur-ai"
 version = "0.1.0"
@@ -1856,6 +1877,7 @@ dependencies = [
     { name = "markdown" },
     { name = "mwparserfromhell" },
     { name = "openai" },
+    { name = "openpyxl" },
     { name = "pydantic" },
     { name = "pyppeteer" },
     { name = "requests" },
@@ -1885,6 +1907,7 @@ requires-dist = [
     { name = "markdown", specifier = ">=3.9" },
     { name = "mwparserfromhell", specifier = ">=0.7.2" },
     { name = "openai", specifier = ">=1.101.0" },
+    { name = "openpyxl", specifier = ">=3.1.5" },
     { name = "pydantic", specifier = ">=2.11.7" },
     { name = "pyppeteer", specifier = ">=2.0.0" },
     { name = "requests", specifier = ">=2.32.4" },

From 623dc293134361702435af5e7ff37268e006debc Mon Sep 17 00:00:00 2001
From: Efraim Feinstein <efraim.feinstein@gmail.com>
Date: Wed, 27 May 2026 21:54:52 -0700
Subject: [PATCH 02/10] wip: miqra al pi hamasorah converter

---
 .../importer/jps1917/mediawiki_processor.py   | 832 +-----------------
 .../importer/miqra_al_pi_hamasorah/README.md  |   2 +-
 .../miqra_al_pi_hamasorah/convert_tsv.py      | 601 +++++++++++++
 .../miqra_al_pi_hamasorah/miqra_to_tei.xslt   | 243 +++++
 .../miqra_al_pi_hamasorah/miqra_wikitext.py   | 633 +++++++++++++
 .../importer/util/mediawiki_processor.py      | 515 +++++++++++
 .../miqra_al_pi_hamasorah/test_convert_tsv.py | 134 +++
 .../test_miqra_wikitext.py                    | 106 +++
 8 files changed, 2242 insertions(+), 824 deletions(-)
 create mode 100644 opensiddur/importer/miqra_al_pi_hamasorah/convert_tsv.py
 create mode 100644 opensiddur/importer/miqra_al_pi_hamasorah/miqra_to_tei.xslt
 create mode 100644 opensiddur/importer/miqra_al_pi_hamasorah/miqra_wikitext.py
 create mode 100644 opensiddur/importer/util/mediawiki_processor.py
 create mode 100644 opensiddur/tests/importer/miqra_al_pi_hamasorah/test_convert_tsv.py
 create mode 100644 opensiddur/tests/importer/miqra_al_pi_hamasorah/test_miqra_wikitext.py

diff --git a/opensiddur/importer/jps1917/mediawiki_processor.py b/opensiddur/importer/jps1917/mediawiki_processor.py
index 3d62bac..9894597 100644
--- a/opensiddur/importer/jps1917/mediawiki_processor.py
+++ b/opensiddur/importer/jps1917/mediawiki_processor.py
@@ -1,828 +1,14 @@
 """
-MediaWiki to XML Processor for JPS1917 Converter
+JPS1917 MediaWiki processor (compatibility wrapper).
 
-This module provides a modular framework for converting MediaWiki syntax to XML.
-Based on analysis of 1917 JPS Wikisource content, it handles templates and tags
-found in the source material.
-
-Analysis Results Summary:
-- Templates: 30+ types, 50,000+ instances (verse, sc, c, larger, etc.)
-- Tags: 11 types, 25,000+ instances (noinclude, dd, ref, table, etc.)
+The implementation lives in `opensiddur.importer.util.mediawiki_processor` so it
+can be reused by other importers.
 """
 
-import re
-import mwparserfromhell
-from typing import Dict, List, Any
-from dataclasses import dataclass
-from enum import Enum
-
-
-class ProcessingStage(Enum):
-    """Stages of MediaWiki processing"""
-    PREPROCESS = "preprocess"
-    TEMPLATES = "templates"
-    TAGS = "tags"
-    POSTPROCESS = "postprocess"
-
-
-@dataclass
-class ConversionResult:
-    """Result of a conversion operation"""
-    xml_content: str
-    metadata: Dict[str, Any]
-    warnings: List[str]
-    errors: List[str]
-    wikilinks: List[Dict[str, Any]]
-
-
-class MediaWikiProcessor:
-    """
-    Modular MediaWiki to XML processor for JPS1917 content.
-    
-    This processor handles the conversion of MediaWiki syntax to XML,
-    with separate modules for different types of templates and tags.
-    """
-    
-    def __init__(self):
-        self.template_handlers = {}
-        self.tag_handlers = {}
-        self.preprocessors = []
-        self.postprocessors = []
-        self.wikilinks = []  # Store captured wikilinks
-        self._initialize_handlers()
-    
-    def _initialize_handlers(self):
-        """Initialize all template and tag handlers"""
-        self._initialize_template_handlers()
-        self._initialize_tag_handlers()
-        self._initialize_preprocessors()
-        self._initialize_postprocessors()
-        self._initialize_wikilink_handlers()
-    
-    def _initialize_template_handlers(self):
-        """Initialize handlers for MediaWiki templates"""
-        
-        # Text Formatting Templates
-        self.template_handlers['sc'] = self._handle_small_caps
-        self.template_handlers['larger'] = self._handle_larger_text
-        self.template_handlers['x-larger'] = self._handle_x_larger_text
-        self.template_handlers['xx-larger'] = self._handle_xx_larger_text
-        self.template_handlers['xxx-larger'] = self._handle_xxx_larger_text
-        self.template_handlers['smaller'] = self._handle_smaller_text
-        
-        # Layout Templates
-        self.template_handlers['c'] = self._handle_center
-        self.template_handlers['right'] = self._handle_right_align
-        self.template_handlers['rule'] = self._handle_horizontal_rule
-        self.template_handlers['nop'] = self._handle_no_paragraph
-        
-        # Biblical Content Templates
-        self.template_handlers['verse'] = self._handle_verse
-        self.template_handlers['rh'] = self._handle_right_header
-        self.template_handlers['dropinitial'] = self._handle_drop_initial
-        self.template_handlers['dhr'] = self._handle_double_horizontal_rule
-        
-        # Navigation Templates
-        self.template_handlers['anchor'] = self._handle_anchor
-        self.template_handlers['anchor+'] = self._handle_anchor_plus
-        
-        # Language Templates
-        self.template_handlers['lang'] = self._handle_language
-        
-        # Reference Templates
-        self.template_handlers['smallrefs'] = self._handle_small_refs
-        
-        # Special Templates
-        self.template_handlers['hws'] = self._handle_hws
-        self.template_handlers['hwe'] = self._handle_hwe
-        self.template_handlers['***'] = self._handle_asterisks
-        self.template_handlers['reconstruct'] = self._handle_reconstruct
-        self.template_handlers['SIC'] = self._handle_sic
-        self.template_handlers['sic'] = self._handle_sic
-        self.template_handlers['sup'] = self._handle_superscript
-        self.template_handlers['bar'] = self._handle_bar
-        self.template_handlers['gap'] = self._handle_gap
-        self.template_handlers['overfloat left'] = self._handle_overfloat_left
-        self.template_handlers['float right'] = self._handle_float_right
-        self.template_handlers['smaller block/s'] = self._handle_smaller_block_start
-        self.template_handlers['smaller block/e'] = self._handle_smaller_block_end
-    
-    def _initialize_tag_handlers(self):
-        """Initialize handlers for HTML/XML tags"""
-        
-        # Structural Tags
-        self.tag_handlers['section'] = self._handle_section
-        self.tag_handlers['table'] = self._handle_table
-        self.tag_handlers['tr'] = self._handle_table_row
-        self.tag_handlers['td'] = self._handle_table_cell
-        
-        # Text Formatting Tags
-        self.tag_handlers['i'] = self._handle_italic
-        self.tag_handlers['br'] = self._handle_line_break
-        self.tag_handlers['span'] = self._handle_span
-        
-        # Content Tags
-        self.tag_handlers['dd'] = self._handle_definition_description
-        self.tag_handlers['ref'] = self._handle_reference
-        
-        # MediaWiki Specific Tags
-        self.tag_handlers['noinclude'] = self._handle_noinclude
-        self.tag_handlers['pagequality'] = self._handle_pagequality
-    
-    def _initialize_preprocessors(self):
-        """Initialize preprocessing functions"""
-        self.preprocessors = [
-            self._fix_noinclude_line_breaks,
-            self._convert_paragraph_breaks,
-            self._normalize_whitespace,
-            self._handle_special_characters,  # Enable special character processing
-            self._extract_metadata
-        ]
-    
-    def _initialize_postprocessors(self):
-        """Initialize postprocessing functions"""
-        self.postprocessors = [
-            self._validate_xml_structure,
-            self._finalize_metadata
-        ]
-    
-    def _initialize_wikilink_handlers(self):
-        """Initialize wikilink processing"""
-        # Wikilinks are processed during the main parsing loop
-        pass
-    
-    def _process_nested_content(self, content: str, depth: int = 0) -> str:
-        """Recursively process nested templates and other elements"""
-        # Prevent infinite recursion
-        if depth > 10:
-            return content
-            
-        # Parse the content to handle nested elements
-        parsed = mwparserfromhell.parse(content)
-        nodes_to_replace = []
-        
-        # Process nodes recursively
-        for node in parsed.nodes:
-            if hasattr(node, 'name'):  # Template
-                template_name = str(node.name).strip()
-                if template_name in self.template_handlers:
-                    try:
-                        # Process nested content within the template
-                        processed_node = self._process_template_with_nesting(node, depth + 1)
-                        replacement = self.template_handlers[template_name](processed_node)
-                        nodes_to_replace.append((node, replacement))
-                    except Exception as e:
-                        # If nested processing fails, try without nesting
-                        replacement = self.template_handlers[template_name](node)
-                        nodes_to_replace.append((node, replacement))
-                else:
-                    # Unknown template - process its content for nested elements
-                    processed_content = self._process_nested_content(str(node), depth + 1)
-                    nodes_to_replace.append((node, processed_content))
-            
-            elif hasattr(node, 'tag'):  # Tag
-                tag_name = str(node.tag).strip().lower()
-                if tag_name in self.tag_handlers:
-                    try:
-                        # Process nested content within the tag
-                        processed_node = self._process_tag_with_nesting(node, depth + 1)
-                        replacement = self.tag_handlers[tag_name](processed_node)
-                        nodes_to_replace.append((node, replacement))
-                    except Exception as e:
-                        # If nested processing fails, try without nesting
-                        replacement = self.tag_handlers[tag_name](node)
-                        nodes_to_replace.append((node, replacement))
-                else:
-                    # Unknown tag - process its content for nested elements
-                    processed_content = self._process_nested_content(str(node), depth + 1)
-                    nodes_to_replace.append((node, processed_content))
-            
-            elif hasattr(node, '__class__') and 'Wikilink' in str(node.__class__):  # Wikilink
-                try:
-                    replacement = self._handle_wikilink(node)
-                    nodes_to_replace.append((node, replacement))
-                except Exception as e:
-                    # If wikilink processing fails, keep original
-                    nodes_to_replace.append((node, str(node)))
-        
-        # Replace all nodes
-        for node, replacement in nodes_to_replace:
-            parsed.replace(node, replacement)
-        
-        return str(parsed)
-    
-    def _process_template_with_nesting(self, template, depth: int = 0) -> object:
-        """Process a template and its nested content"""
-        # Create a copy of the template to avoid modifying the original
-        import copy
-        processed_template = copy.deepcopy(template)
-        
-        # Process each parameter of the template
-        for param in processed_template.params:
-            if hasattr(param, 'value'):
-                # Process nested content in parameter values
-                processed_value = self._process_nested_content(str(param.value), depth + 1)
-                param.value = processed_value
-        
-        return processed_template
-    
-    def _process_tag_with_nesting(self, tag, depth: int = 0) -> object:
-        """Process a tag and its nested content"""
-        # Create a copy of the tag to avoid modifying the original
-        import copy
-        processed_tag = copy.deepcopy(tag)
-        
-        # Process nested content within the tag
-        if hasattr(processed_tag, 'contents') and processed_tag.contents:
-            processed_contents = self._process_nested_content(str(processed_tag.contents), depth + 1)
-            processed_tag.contents = processed_contents
-        
-        return processed_tag
-    
-    # ============================================================================
-    # TEMPLATE HANDLERS
-    # ============================================================================
-    
-    def _handle_small_caps(self, template) -> str:
-        """Convert {{sc|text}} to <sc>text</sc>"""
-        content = str(template.get(1, ''))
-        return f'<sc>{content}</sc>'
-    
-    def _handle_larger_text(self, template) -> str:
-        """Convert {{larger|text}} to <larger>text</larger>"""
-        content = str(template.get(1, ''))
-        return f'<larger>{content}</larger>'
-    
-    def _handle_x_larger_text(self, template) -> str:
-        """Convert {{x-larger|text}} to <x-larger>text</x-larger>"""
-        content = str(template.get(1, ''))
-        return f'<x-larger>{content}</x-larger>'
-    
-    def _handle_xx_larger_text(self, template) -> str:
-        """Convert {{xx-larger|text}} to <xx-larger>text</xx-larger>"""
-        content = str(template.get(1, ''))
-        return f'<xx-larger>{content}</xx-larger>'
-    
-    def _handle_xxx_larger_text(self, template) -> str:
-        """Convert {{xxx-larger|text}} to <xxx-larger>text</xxx-larger>"""
-        content = str(template.get(1, ''))
-        return f'<xxx-larger>{content}</xxx-larger>'
-    
-    def _handle_smaller_text(self, template) -> str:
-        """Convert {{smaller|text}} to <smaller>text</smaller>"""
-        content = str(template.get(1, ''))
-        return f'<smaller>{content}</smaller>'
-    
-    def _handle_center(self, template) -> str:
-        """Convert {{c|text}} to <c>text</c>"""
-        content = str(template.get(1, ''))
-        return f'<c>{content}</c>'
-    
-    def _handle_right_align(self, template) -> str:
-        """Convert {{right|text}} to <right>text</right>"""
-        content = str(template.get(1, ''))
-        return f'<right>{content}</right>'
-    
-    def _handle_horizontal_rule(self, template) -> str:
-        """Convert {{rule}} to <rule/>"""
-        return '<rule/>'
-    
-    def _handle_no_paragraph(self, template) -> str:
-        """Convert {{nop}} to <nop/>"""
-        return '<nop/>'
-    
-    def _handle_verse(self, template) -> str:
-        """Convert {{verse|chapter|verse|text}} to <verse chapter="..." verse="...">text</verse>"""
-        chapter = str(template.get('chapter', template.get(1, ''))).replace("chapter=", "")
-        verse = str(template.get('verse', template.get(2, ''))).replace("verse=", "")
-        text = str(template.get(3, template.get('text', '')))
-        chapter_attr = f' chapter="{chapter}"' if chapter else ''
-        verse_attr = f' verse="{verse}"' if verse else ''
-        if not chapter or not verse:
-            print(f"Invalid verse template: {template} {template.get(1, '')=} {template.get(2, '')=} {template.get(3, '')=}")
-            
-        return f'<verse{chapter_attr}{verse_attr}>{text}</verse>'
-    
-    def _handle_right_header(self, template) -> str:
-        """Convert {{rh|text}} to <rh>text</rh>"""
-        content = str(template.get(1, ''))
-        return f'<rh>{content}</rh>'
-    
-    def _handle_drop_initial(self, template) -> str:
-        """Convert {{dropinitial|letter}} to <dropinitial>letter</dropinitial>"""
-        letter = str(template.get(1, ''))
-        return f'<dropinitial>{letter}</dropinitial>'
-    
-    def _handle_double_horizontal_rule(self, template) -> str:
-        """Convert {{dhr}} to <dhr/>"""
-        value = str(template.get(1, ''))
-        if value:
-            value=f' value="{value}"'
-        else:
-            value=""
-        return f'<dhr{value}/>'
-    
-    def _handle_anchor(self, template) -> str:
-        """Convert {{anchor|name}} to <anchor name="name"/>"""
-        name = str(template.get(1, ''))
-        return f'<anchor name="{name}"/>'
-    
-    def _handle_anchor_plus(self, template) -> str:
-        """Convert {{anchor+|name|text}} to <anchor name="name">text</anchor>"""
-        name = str(template.get(1, ''))
-        text = str(template.get(2, ''))
-        return f'<anchor name="{name}">{text}</anchor>'
-    
-    def _handle_language(self, template) -> str:
-        """Convert {{lang|code|text}} to <lang code="code">text</lang>"""
-        code = str(template.get(1, ''))
-        text = str(template.get(2, ''))
-        return f'<lang code="{code}">{text}</lang>'
-    
-    def _handle_small_refs(self, template) -> str:
-        """Convert {{smallrefs}} to <smallrefs/>"""
-        return '<smallrefs/>'
-    
-    def _handle_hws(self, template) -> str:
-        """Convert {{hws|text}} to <hws>text</hws>"""
-        content = str(template.get(1, ''))
-        return f'<hws>{content}</hws>'
-    
-    def _handle_hwe(self, template) -> str:
-        """Convert {{hwe|text}} to <hwe>text</hwe>"""
-        content = str(template.get(1, ''))
-        return f'<hwe>{content}</hwe>'
-    
-    def _handle_asterisks(self, template) -> str:
-        """Convert {{***}} to <asterisks>***</asterisks>"""
-        n = str(template.get(1, '3'))
-        return f'<asterisks n="{n}">***</asterisks>'
-    
-    def _handle_reconstruct(self, template) -> str:
-        """Convert {{reconstruct|content|text}} to <reconstruct>text</reconstruct>"""
-        content = str(template.get(1, ''))
-        text = str(template.get(2, ''))
-        return f'<reconstruct><reg>{content}</reg><note>{text}</note></reconstruct>'
-    
-    def _handle_sic(self, template) -> str:
-        """Convert {{SIC|text}} to <sic>text</sic>"""
-        content = str(template.get(1, ''))
-        return f'<sic>{content}</sic>'
-    
-    def _handle_superscript(self, template) -> str:
-        """Convert {{sup|text}} to <sup>text</sup>"""
-        content = str(template.get(1, ''))
-        return f'<sup>{content}</sup>'
-    
-    def _handle_bar(self, template) -> str:
-        """Convert {{bar|length}} to <bar length="length"/>"""
-        length = str(template.get(1, '6'))
-        return f'<bar length="{length}"/>'
-    
-    def _handle_gap(self, template) -> str:
-        """Convert {{gap|length}} to <gap length="length"/>"""
-        length = str(template.get(1, ''))
-        if length:
-            return f'<gap length="{length}"/>'
-        else:
-            return '<gap/>'
-    
-    def _handle_overfloat_left(self, template) -> str:
-        """Convert {{overfloat left|align|padding|text}} to <overfloat_left align="..." padding="...">text</overfloat_left>"""
-        # Get parameters - can be positional or named
-        align = str(template.get('align', template.get(1, '')))
-        padding = str(template.get('padding', template.get(2, '')))
-        text = str(template.get('text', template.get(3, '')))
-        
-        # Clean up named parameters (remove parameter name prefixes)
-        align = align.replace('align=', '') if align.startswith('align=') else align
-        padding = padding.replace('padding=', '') if padding.startswith('padding=') else padding
-        text = text.replace('text=', '') if text.startswith('text=') else text
-        
-        # Build attributes
-        attributes = []
-        if align:
-            attributes.append(f'align="{align}"')
-        if padding:
-            attributes.append(f'padding="{padding}"')
-        
-        attr_str = ' ' + ' '.join(attributes) if attributes else ''
-        
-        return f'<overfloat_left{attr_str}>{text}</overfloat_left>'
-    
-    def _handle_float_right(self, template) -> str:
-        """Convert {{float right|text}} to <float_right>text</float_right>"""
-        text = str(template.get(1, ''))
-        return f'<float_right>{text}</float_right>'
-    
-    def _handle_smaller_block_start(self, template) -> str:
-        """Convert {{smaller block/s}} to <smaller_block>"""
-        return '<smaller_block>'
-    
-    def _handle_smaller_block_end(self, template) -> str:
-        """Convert {{smaller block/e}} to </smaller_block>"""
-        return '</smaller_block>'
-    
-    # ============================================================================
-    # WIKILINK HANDLERS
-    # ============================================================================
-    
-    def _handle_wikilink(self, wikilink) -> str:
-        """Process and capture wikilinks"""
-        # Extract wikilink information
-        title = str(wikilink.title) if hasattr(wikilink, 'title') and wikilink.title else ''
-        text = str(wikilink.text) if hasattr(wikilink, 'text') and wikilink.text else title
-        
-        # Process templates within the wikilink text
-        processed_text = self._process_nested_content(text)
-        
-        # Store wikilink information
-        wikilink_info = {
-            'title': title,
-            'text': processed_text,
-            'namespace': str(wikilink.namespace) if hasattr(wikilink, 'namespace') and wikilink.namespace else None,
-            'section': str(wikilink.section) if hasattr(wikilink, 'section') and wikilink.section else None,
-            'fragment': str(wikilink.fragment) if hasattr(wikilink, 'fragment') and wikilink.fragment else None
-        }
-        self.wikilinks.append(wikilink_info)
-        
-        # Convert to XML - use __link__ tag with attributes
-        attributes = []
-        if title:
-            attributes.append(f'title="{title}"')
-        if wikilink_info['namespace']:
-            attributes.append(f'namespace="{wikilink_info["namespace"]}"')
-        if wikilink_info['section']:
-            attributes.append(f'section="{wikilink_info["section"]}"')
-        if wikilink_info['fragment']:
-            attributes.append(f'fragment="{wikilink_info["fragment"]}"')
-        
-        attr_str = ' ' + ' '.join(attributes) if attributes else ''
-        return f'<__link__{attr_str}>{processed_text}</__link__>'
-    
-    # ============================================================================
-    # TAG HANDLERS
-    # ============================================================================
-    
-    def _handle_section(self, tag) -> str:
-        """Convert <section> to <section> with begin and end attributes"""
-        content = str(tag.contents) if tag.contents else ''
-        
-        # Extract begin and end attributes
-        attributes = []
-        if hasattr(tag, 'attributes') and tag.attributes:
-            for attr in tag.attributes:
-                if hasattr(attr, 'name') and hasattr(attr, 'value'):
-                    attr_name = str(attr.name)
-                    attr_value = str(attr.value)
-                    if attr_name in ['begin', 'end']:
-                        attributes.append(f'{attr_name}="{attr_value}"')
-        
-        # Add begin and end attributes if they exist
-        attr_str = ' ' + ' '.join(attributes) if attributes else ''
-        
-        return f'<section{attr_str}>{content}</section>'
-    
-    def _handle_table(self, tag) -> str:
-        """Convert <table> to <table>"""
-        content = str(tag.contents) if tag.contents else ''
-        attributes = self._extract_tag_attributes(tag)
-        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
-        return f'<table{attr_str}>{content}</table>'
-    
-    def _handle_table_row(self, tag) -> str:
-        """Convert <tr> to <tr>"""
-        content = str(tag.contents) if tag.contents else ''
-        attributes = self._extract_tag_attributes(tag)
-        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
-        return f'<tr{attr_str}>{content}</tr>'
-    
-    def _handle_table_cell(self, tag) -> str:
-        """Convert <td> to <td>"""
-        content = str(tag.contents) if tag.contents else ''
-        attributes = self._extract_tag_attributes(tag)
-        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
-        return f'<td{attr_str}>{content}</td>'
-    
-    def _handle_italic(self, tag) -> str:
-        """Convert <i> to <i>"""
-        content = str(tag.contents) if tag.contents else ''
-        attributes = self._extract_tag_attributes(tag)
-        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
-        return f'<i{attr_str}>{content}</i>'
-    
-    def _handle_line_break(self, tag) -> str:
-        """Convert <br> to <br>"""
-        attributes = self._extract_tag_attributes(tag)
-        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
-        return f'<br{attr_str}/>'
-    
-    def _handle_span(self, tag) -> str:
-        """Convert <span> to <span>"""
-        content = str(tag.contents) if tag.contents else ''
-        attributes = self._extract_tag_attributes(tag)
-        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
-        return f'<span{attr_str}>{content}</span>'
-    
-    def _handle_definition_description(self, tag) -> str:
-        """Convert <dd> to <dd>"""
-        content = str(tag.contents) if tag.contents else ''
-        attributes = self._extract_tag_attributes(tag)
-        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
-        return f'<dd{attr_str}>{content}</dd>'
-    
-    def _handle_reference(self, tag) -> str:
-        """Convert <ref> to <ref>"""
-        content = str(tag.contents) if tag.contents else ''
-        attributes = self._extract_tag_attributes(tag)
-        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
-        return f'<ref{attr_str}>{content}</ref>'
-    
-    def _handle_noinclude(self, tag) -> str:
-        """Convert <noinclude> to <noinclude>"""
-        content = str(tag.contents) if tag.contents else ''
-        attributes = self._extract_tag_attributes(tag)
-        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
-        return f'<noinclude{attr_str}>{content}</noinclude>'
-    
-    def _handle_pagequality(self, tag) -> str:
-        """Convert <pagequality> to <pagequality>"""
-        content = str(tag.contents) if tag.contents else ''
-        attributes = self._extract_tag_attributes(tag)
-        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
-        return f'<pagequality{attr_str}>{content}</pagequality>'
-    
-    def _extract_tag_attributes(self, tag) -> Dict[str, str]:
-        """Extract all attributes from a tag"""
-        attributes = {}
-        if hasattr(tag, 'attributes') and tag.attributes:
-            for attr in tag.attributes:
-                if hasattr(attr, 'name') and hasattr(attr, 'value'):
-                    attributes[str(attr.name)] = str(attr.value)
-        return attributes
-    
-    # ============================================================================
-    # PREPROCESSORS
-    # ============================================================================
-    
-    def _fix_noinclude_line_breaks(self, content: str) -> str:
-        """Insert a blank line after </noinclude> tags when followed by non-whitespace content"""
-        # Pattern to match </noinclude> followed by optional whitespace and any non-whitespace character
-        # This handles cases like: </noinclude>:text, </noinclude>text, </noinclude> {{template}}, etc.
-        pattern = r'(</noinclude>)\s*(\S)'
-        
-        def replace_noinclude_content(match):
-            noinclude_tag = match.group(1)
-            following_content = match.group(2)
-            # Insert a newline after </noinclude> and before the following content
-            return f'{noinclude_tag}\n{following_content}'
-        
-        # Apply the replacement
-        content = re.sub(pattern, replace_noinclude_content, content)
-        
-        return content
-    
-    def _normalize_whitespace(self, content: str) -> str:
-        """Normalize whitespace in content"""
-        # Normalize multiple spaces to single space
-        content = re.sub(r' +', ' ', content)
-        # Normalize line breaks, but preserve paragraph markers
-        content = re.sub(r'\n+', '\n', content)
-        return content.strip()
-    
-    def _convert_paragraph_breaks(self, content: str) -> str:
-        """Convert double newlines to paragraph indicators, but skip if {{nop}} is directly adjacent"""
-        
-        # First, protect {{nop}} markers and their immediate context
-        # Replace {{nop}} with a temporary marker
-        content = content.replace('{{nop}}', '___NOP_MARKER___')
-        
-        # Convert \n\n to <p/>\n paragraph indicators, but not if they're adjacent to ___NOP_MARKER___
-        # This regex matches \n\n that are NOT preceded or followed by ___NOP_MARKER___
-        content = re.sub(r'(?<!___NOP_MARKER___)\n\n(?!___NOP_MARKER___)', '<p/>\n', content)
-        
-        # Restore {{nop}} markers
-        content = content.replace('___NOP_MARKER___', '{{nop}}')
-        
-        return content
-    
-    def _handle_special_characters(self, content: str) -> str:
-        """Handle special characters and entities - escape ampersands not in XML/HTML entities"""
-        # More comprehensive regex to match XML/HTML entities
-        # This includes named entities like &amp;, &lt;, &gt;, &quot;, &apos;
-        # and numeric entities like &#123; and &#x1F;
-        entity_pattern = r'&(?:[a-zA-Z][a-zA-Z0-9]*|#[0-9]+|#x[0-9a-fA-F]+);'
-        
-        # Split content by entities to preserve them
-        parts = re.split(f'({entity_pattern})', content)
-        
-        # Process each part
-        result_parts = []
-        for part in parts:
-            if re.match(entity_pattern, part):
-                # This is an entity, keep it as-is
-                result_parts.append(part)
-            else:
-                # This is not an entity, escape standalone ampersands
-                escaped_part = part.replace('&', '&amp;')
-                result_parts.append(escaped_part)
-        
-        return ''.join(result_parts)
-    
-    def _extract_metadata(self, content: str) -> Dict[str, Any]:
-        """Extract metadata from content"""
-        metadata = {}
-        # Extract page quality information
-        # Extract language information
-        # Extract structural information
-        return metadata
-    
-    # ============================================================================
-    # POSTPROCESSORS
-    # ============================================================================
-    
-    def _validate_xml_structure(self, content: str) -> str:
-        """Validate and fix XML structure"""
-        # Ensure proper nesting
-        # Validate against schema
-        # Fix common issues
-        return content
-    
-    def _cleanup_empty_elements(self, content: str) -> str:
-        """Remove or fix empty elements"""
-        # Remove empty elements
-        content = re.sub(r'<(\w+)[^>]*></\1>', '', content)
-        return content
-    
-    def _finalize_metadata(self, content: str) -> str:
-        """Finalize metadata and add to content"""
-        # Add final metadata
-        # Ensure proper document structure
-        return content
-    
-    # ============================================================================
-    # MAIN PROCESSING METHODS
-    # ============================================================================
-    
-    def process_wikitext(self, wikitext: str) -> ConversionResult:
-        """
-        Main method to process MediaWiki wikitext to XML.
-        
-        Args:
-            wikitext: The MediaWiki content to convert
-            
-        Returns:
-            ConversionResult with XML content and metadata
-        """
-        warnings = []
-        errors = []
-        metadata = {}
-        
-        try:
-            # Preprocessing
-            content = wikitext
-            for preprocessor in self.preprocessors:
-                if preprocessor == self._extract_metadata:
-                    metadata.update(preprocessor(content))
-                else:
-                    content = preprocessor(content)
-            
-            # Parse MediaWiki content
-            parsed = mwparserfromhell.parse(content)
-            
-            # Process all nodes with nested content support
-            nodes_to_replace = []
-            
-            # Process nodes in the order they appear in the document
-            for node in parsed.nodes:
-                if hasattr(node, 'name'):  # Template
-                    template_name = str(node.name).strip()
-                    if template_name in self.template_handlers:
-                        try:
-                            # Process nested content within the template
-                            processed_node = self._process_template_with_nesting(node)
-                            replacement = self.template_handlers[template_name](processed_node)
-                            nodes_to_replace.append((node, replacement))
-                        except Exception as e:
-                            errors.append(f"Error processing template {template_name}: {str(e)}")
-                    else:
-                        warnings.append(f"Unknown template: {template_name}")
-                
-                elif hasattr(node, 'tag'):  # Tag
-                    tag_name = str(node.tag).strip().lower()
-                    if tag_name in self.tag_handlers:
-                        try:
-                            # Process nested content within the tag
-                            processed_node = self._process_tag_with_nesting(node)
-                            replacement = self.tag_handlers[tag_name](processed_node)
-                            nodes_to_replace.append((node, replacement))
-                        except Exception as e:
-                            errors.append(f"Error processing tag {tag_name}: {str(e)}")
-                    else:
-                        warnings.append(f"Unknown tag: {tag_name}")
-                
-                elif hasattr(node, '__class__') and 'Wikilink' in str(node.__class__):  # Wikilink
-                    try:
-                        replacement = self._handle_wikilink(node)
-                        nodes_to_replace.append((node, replacement))
-                    except Exception as e:
-                        errors.append(f"Error processing wikilink: {str(e)}")
-            
-            # Replace all nodes in order
-            for node, replacement in nodes_to_replace:
-                parsed.replace(node, replacement)
-            
-            # Get processed content
-            xml_content = str(parsed)
-            
-            # Postprocessing
-            for postprocessor in self.postprocessors:
-                xml_content = postprocessor(xml_content)
-            
-            # Wrap in mediawiki tag
-            xml_content = f'<mediawiki>{xml_content}</mediawiki>'
-            
-            return ConversionResult(
-                xml_content=xml_content,
-                metadata=metadata,
-                warnings=warnings,
-                errors=errors,
-                wikilinks=self.wikilinks.copy()
-            )
-            
-        except Exception as e:
-            errors.append(f"Fatal error in processing: {str(e)}")
-            return ConversionResult(
-                xml_content="<mediawiki></mediawiki>",
-                metadata={},
-                warnings=warnings,
-                errors=errors,
-                wikilinks=[]
-            )
-    
-    def add_template_handler(self, template_name: str, handler_func):
-        """Add a custom template handler"""
-        self.template_handlers[template_name] = handler_func
-    
-    def add_tag_handler(self, tag_name: str, handler_func):
-        """Add a custom tag handler"""
-        self.tag_handlers[tag_name] = handler_func
-    
-    def add_preprocessor(self, preprocessor_func):
-        """Add a custom preprocessor"""
-        self.preprocessors.append(preprocessor_func)
-    
-    def add_postprocessor(self, postprocessor_func):
-        """Add a custom postprocessor"""
-        self.postprocessors.append(postprocessor_func)
-    
-    def get_wikilinks(self) -> List[Dict[str, Any]]:
-        """Get all captured wikilinks"""
-        return self.wikilinks.copy()
-    
-    def clear_wikilinks(self):
-        """Clear all captured wikilinks"""
-        self.wikilinks.clear()
-
-
-# ============================================================================
-# CONVENIENCE FUNCTIONS
-# ============================================================================
-
-def create_processor() -> MediaWikiProcessor:
-    """Create a new MediaWiki processor instance"""
-    return MediaWikiProcessor()
-
-
-def process_page(page_content: str) -> ConversionResult:
-    """Process a single page of MediaWiki content"""
-    processor = create_processor()
-    return processor.process_wikitext(page_content)
-
-
-if __name__ == "__main__":
-    # Example usage
-    processor = create_processor()
-    
-    # Example MediaWiki content with nested templates
-    sample_wikitext = """
-    {{verse|1|1|In the beginning God created the heaven and the earth.}}
-
-    {{verse|1|2|And the earth was without form, and void; and darkness was upon the face of the deep.}}
+from opensiddur.importer.util.mediawiki_processor import (  # noqa: F401
+    ConversionResult,
+    MediaWikiProcessor,
+    create_processor,
+    process_page,
+)
 
-    {{sc|Genesis}} {{c|Chapter 1}}
-    {{larger|The Creation}}
-    <ref name="gen1">This is a reference</ref>
-    
-    See also [[Genesis]] and [[Creation myth]] for more information.
-    
-    Nested example: {{sc|{{larger|Bold Large Text}}}}
-    Complex nested: {{verse|1|3|{{sc|God}} said, {{larger|Let there be light}}}}
-    """
-    
-    result = processor.process_wikitext(sample_wikitext)
-    print("XML Output:")
-    print(result.xml_content)
-    print("\nWarnings:", result.warnings)
-    print("Errors:", result.errors)
-    print("Wikilinks:", result.wikilinks)
diff --git a/opensiddur/importer/miqra_al_pi_hamasorah/README.md b/opensiddur/importer/miqra_al_pi_hamasorah/README.md
index 19b3cfe..827ef3e 100644
--- a/opensiddur/importer/miqra_al_pi_hamasorah/README.md
+++ b/opensiddur/importer/miqra_al_pi_hamasorah/README.md
@@ -4,7 +4,7 @@ Scripts to download [*Miqra according to the Masorah*](https://docs.google.com/s
 
 ## License
 
-The README tab of the source spreadsheet states that the text is prepared by Sefer Avi Kadish, based on Hebrew Wikisource material, and is licensed **CC-BY-SA 4.0 International**, with attribution to Hebrew Wikisource. See the downloaded `sheets/readme.tsv` for the full Hebrew and English wording.
+The README tab of the source spreadsheet states that the text is prepared by Avi Kadish, based on Hebrew Wikisource material, and is licensed **CC-BY-SA 4.0 International**, with attribution to Hebrew Wikisource. See the downloaded `sheets/readme.tsv` for the full Hebrew and English wording.
 
 ## Download
 
diff --git a/opensiddur/importer/miqra_al_pi_hamasorah/convert_tsv.py b/opensiddur/importer/miqra_al_pi_hamasorah/convert_tsv.py
new file mode 100644
index 0000000..43a3217
--- /dev/null
+++ b/opensiddur/importer/miqra_al_pi_hamasorah/convert_tsv.py
@@ -0,0 +1,601 @@
+from __future__ import annotations
+
+import argparse
+import csv
+import logging
+import re
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Iterable, Optional
+
+import mwparserfromhell
+
+from opensiddur.common.constants import PROJECT_DIRECTORY
+from opensiddur.common.xslt import xslt_transform_string
+from opensiddur.importer.util.pages import (
+    default_sourcetexts_root,
+    miqra_al_pi_hamasorah_data_directory,
+    miqra_al_pi_hamasorah_sheets_directory,
+)
+from opensiddur.importer.util.prettify import prettify_xml
+from opensiddur.importer.util.validation import validate
+from opensiddur.importer.miqra_al_pi_hamasorah.miqra_wikitext import (
+    wikitext_to_intermediate_xml,
+)
+
+logger = logging.getLogger(__name__)
+
+MIQRA_TO_TEI_XSLT = Path(__file__).parent / "miqra_to_tei.xslt"
+
+# Biblical-book tabs only (5-column A–E schema). Do not ingest special/auto_edits/etc.
+BIBLICAL_TSV_SLUGS = frozenset(
+    {
+        "torah",
+        "neviim_rishonim",
+        "neviim_acharonim",
+        "sifrei_emet",
+        "chamisha_megillot",
+        "ketuvim_aharonim",
+    }
+)
+
+_NON_VERSE_ROW_IDS = frozenset({"0", "תתת"})
+
+
+def make_project_directory(project_dir: Path | None = None) -> Path:
+    directory = (
+        project_dir.resolve()
+        if project_dir is not None
+        else PROJECT_DIRECTORY / "miqra_al_pi_hamasorah"
+    )
+    directory.mkdir(parents=True, exist_ok=True)
+    return directory
+
+
+def _default_project_directory() -> Path:
+    return PROJECT_DIRECTORY / "miqra_al_pi_hamasorah"
+
+
+@dataclass(frozen=True)
+class Book:
+    book_name_he: str
+    book_name_en: str
+    file_name: str
+
+
+@dataclass(frozen=True)
+class Index:
+    index_title_en: str
+    index_title_he: Optional[str]
+    index_sub_en: Optional[str]
+    index_sub_he: Optional[str]
+    file_name: str
+    transclusions: list[Book | "Index"]
+
+
+TANAKH_INDEX: list[Index] = [
+    Index(
+        index_title_en="Miqra al pi ha-Masorah",
+        index_title_he="מקרא על פי המסורה",
+        index_sub_en=None,
+        index_sub_he=None,
+        file_name="index",
+        transclusions=[
+            Index(
+                index_title_en="The Law",
+                index_title_he="תורה",
+                index_sub_en=None,
+                index_sub_he=None,
+                file_name="the_law",
+                transclusions=[
+                    Book("בראשית", "Genesis", "genesis"),
+                    Book("שמות", "Exodus", "exodus"),
+                    Book("ויקרא", "Leviticus", "leviticus"),
+                    Book("במדבר", "Numbers", "numbers"),
+                    Book("דברים", "Deuteronomy", "deuteronomy"),
+                ],
+            ),
+            Index(
+                index_title_en="The Prophets",
+                index_title_he="נביאים",
+                index_sub_en=None,
+                index_sub_he=None,
+                file_name="the_prophets",
+                transclusions=[
+                    Book("יהושע", "Joshua", "joshua"),
+                    Book("שפטים", "Judges", "judges"),
+                    Book("שמואל א", "I Samuel", "samuel_1"),
+                    Book("שמואל ב", "II Samuel", "samuel_2"),
+                    Book("מלכים א", "I Kings", "kings_1"),
+                    Book("מלכים ב", "II Kings", "kings_2"),
+                    Book("ישעיה", "Isaiah", "isaiah"),
+                    Book("ירמיה", "Jeremiah", "jeremiah"),
+                    Book("יחזקאל", "Ezekiel", "ezekiel"),
+                    Index(
+                        index_title_en="The Twelve",
+                        index_title_he=None,
+                        index_sub_en=None,
+                        index_sub_he=None,
+                        file_name="the_twelve",
+                        transclusions=[
+                            Book("הושע", "Hosea", "hosea"),
+                            Book("יואל", "Joel", "joel"),
+                            Book("עמוס", "Amos", "amos"),
+                            Book("עובדיה", "Obadiah", "obadiah"),
+                            Book("יונה", "Jonah", "jonah"),
+                            Book("מיכה", "Micah", "micah"),
+                            Book("נחום", "Nahum", "nahum"),
+                            Book("חבקוק", "Habakkuk", "habakkuk"),
+                            Book("צפניה", "Zephaniah", "zephaniah"),
+                            Book("חגי", "Haggai", "haggai"),
+                            Book("זכריה", "Zechariah", "zechariah"),
+                            Book("מלאכי", "Malachi", "malachi"),
+                        ],
+                    ),
+                ],
+            ),
+            Index(
+                index_title_en="The Writings",
+                index_title_he="כתובים",
+                index_sub_en=None,
+                index_sub_he=None,
+                file_name="the_writings",
+                transclusions=[
+                    Book("תהלים", "Psalms", "psalms"),
+                    Book("משלי", "Proverbs", "proverbs"),
+                    Book("איוב", "Job", "job"),
+                    Book("שיר השירים", "Song of Songs", "song_of_songs"),
+                    Book("רות", "Ruth", "ruth"),
+                    Book("איכה", "Lamentations", "lamentations"),
+                    Book("קהלת", "Ecclesiastes", "ecclesiastes"),
+                    Book("אסתר", "Esther", "esther"),
+                    Book("דניאל", "Daniel", "daniel"),
+                    Book("עזרא", "Ezra", "ezra"),
+                    Book("נחמיה", "Nehemiah", "nehemiah"),
+                    Book("דברי הימים א", "I Chronicles", "chronicles_1"),
+                    Book("דברי הימים ב", "II Chronicles", "chronicles_2"),
+                ],
+            ),
+        ],
+    )
+]
+
+
+def _flatten_books(indices: Iterable[Index]) -> list[Book]:
+    books: list[Book] = []
+    for idx in indices:
+        for t in idx.transclusions:
+            if isinstance(t, Book):
+                books.append(t)
+            else:
+                books.extend(_flatten_books([t]))
+    return books
+
+
+def header(
+    title_he: Optional[str],
+    title_en: str,
+    *,
+    project_id: str = "miqra_al_pi_hamasorah",
+    namespace: str = "bible",
+    entrypoint: str = "tanakh",
+    qualifier: str = "",
+    license_url: str = "https://creativecommons.org/licenses/by-sa/4.0/",
+    license_name: str = "Creative Commons Attribution-ShareAlike 4.0 International",
+) -> str:
+    title_he_xml = (
+        f"""<tei:title type="alt" xml:lang="he">{title_he}</tei:title>""" if title_he else ""
+    )
+    return f"""<tei:teiHeader>
+  <tei:fileDesc>
+    <tei:titleStmt>
+      <tei:title type="main" xml:lang="en">{title_en}</tei:title>
+      {title_he_xml}
+    </tei:titleStmt>
+    <tei:publicationStmt>
+      <tei:distributor>
+        <tei:ref target="http://opensiddur.org">Open Siddur Project</tei:ref>
+      </tei:distributor>
+      <tei:idno type="urn">urn:x-opensiddur:text:{namespace}:{entrypoint}{qualifier}@{project_id}</tei:idno>
+      <tei:availability status="free">
+        <tei:licence target="{license_url}">{license_name}</tei:licence>
+      </tei:availability>
+    </tei:publicationStmt>
+    <tei:sourceDesc>
+      <tei:bibl>
+        <tei:title xml:lang="he">מקרא על פי המסורה</tei:title>
+        <tei:editor>Avi Kadish</tei:editor>
+        <tei:distributor>
+          <tei:ref target="https://he.wikisource.org/wiki/%D7%9E%D7%A7%D7%A8%D7%90_%D7%A2%D7%9C_%D7%A4%D7%99_%D7%94%D7%9E%D7%A1%D7%95%D7%A8%D7%94#%D7%A8%D7%90%D7%A9">Hebrew Wikisource</tei:ref>
+        </tei:distributor>
+        <tei:idno type="url">https://he.wikisource.org/wiki/%D7%9E%D7%A7%D7%A8%D7%90_%D7%A2%D7%9C_%D7%A4%D7%99_%D7%94%D7%9E%D7%A1%D7%95%D7%A8%D7%94#%D7%A8%D7%90%D7%A9</tei:idno>
+        <tei:note xml:lang="en">Prepared by Avi Kadish, based on Hebrew Wikisource material; distributed via a public Google Sheet.</tei:note>
+      </tei:bibl>
+    </tei:sourceDesc>
+  </tei:fileDesc>
+</tei:teiHeader>
+"""
+
+
+def tei_file(
+    header_xml: str,
+    *,
+    default_lang: str = "he",
+    front: str = "",
+    body: str = "",
+    back: str = "",
+    stand_off: str = "",
+) -> str:
+    return f"""<tei:TEI xml:lang="{default_lang}" xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:j="http://jewishliturgy.org/ns/jlptei/2">
+{header_xml}
+<tei:text>
+{front}
+{body}
+{back}
+</tei:text>
+{stand_off}
+</tei:TEI>
+"""
+
+
+def validate_and_write_tei_file(tei_content: str, file_name: str, project_dir: Path | None) -> Path:
+    directory = project_dir.resolve() if project_dir is not None else _default_project_directory()
+    out_path = directory / f"{file_name}.xml"
+    pretty_xml = prettify_xml(tei_content, remove_xml_declaration=True)
+    is_valid, errors = validate(pretty_xml)
+    if not is_valid:
+        raise Exception(f"Errors in {file_name}: {errors}")
+    out_path.write_text(pretty_xml, encoding="utf-8")
+    return out_path
+
+
+def _xml_escape(text: str) -> str:
+    return (
+        text.replace("&", "&amp;")
+        .replace("<", "&lt;")
+        .replace(">", "&gt;")
+        .replace('"', "&quot;")
+        .replace("'", "&apos;")
+    )
+
+
+_PAGE_KEY_RE = re.compile(r"^\s*(?:ספר\s+)?(?P<book>[^/]+)\s*/\s*(?P<chapter>[^/\s]+)\s*$")
+
+
+def _book_key_from_page_key(page_key: str) -> Optional[str]:
+    m = _PAGE_KEY_RE.match(page_key or "")
+    if not m:
+        return None
+    return m.group("book").strip()
+
+
+_HEBREW_NUM_RE = re.compile(r"^[\u05d0-\u05ea\"׳״\s]+$")
+
+
+def _hebrew_numeral_to_int(value: str) -> Optional[int]:
+    """
+    Very small Hebrew-numeral parser for verse/chapter labels.
+
+    Handles single-letter verse labels (א,ב,ג,...) and common gershayim/geresh marks.
+    For anything more complex, we return None and fall back to original.
+    """
+    s = (value or "").strip()
+    if not s:
+        return None
+    s = s.replace("״", "").replace("׳", "").replace("'", "").replace('"', "").strip()
+    if not s:
+        return None
+    if not _HEBREW_NUM_RE.match(s):
+        return None
+
+    # Simple gematria for Hebrew letters
+    mapping = {
+        "א": 1,
+        "ב": 2,
+        "ג": 3,
+        "ד": 4,
+        "ה": 5,
+        "ו": 6,
+        "ז": 7,
+        "ח": 8,
+        "ט": 9,
+        "י": 10,
+        "כ": 20,
+        "ך": 20,
+        "ל": 30,
+        "מ": 40,
+        "ם": 40,
+        "נ": 50,
+        "ן": 50,
+        "ס": 60,
+        "ע": 70,
+        "פ": 80,
+        "ף": 80,
+        "צ": 90,
+        "ץ": 90,
+        "ק": 100,
+        "ר": 200,
+        "ש": 300,
+        "ת": 400,
+    }
+
+    total = 0
+    for ch in s:
+        if ch.isspace():
+            continue
+        v = mapping.get(ch)
+        if v is None:
+            return None
+        total += v
+    return total if total > 0 else None
+
+
+def _normalize_to_arabic_numerals(value: str) -> str:
+    s = (value or "").strip()
+    if not s:
+        return ""
+    if s.isdigit():
+        return s
+    n = _hebrew_numeral_to_int(s)
+    if n is not None:
+        return str(n)
+    return ""
+
+
+def _valid_urn_segment(value: str) -> str:
+    """Return an Arabic numeral string suitable for URN path segments, or empty."""
+    normalized = _normalize_to_arabic_numerals(value)
+    return normalized if normalized.isdigit() else ""
+
+
+def _chapter_from_page_key(page_key: str) -> str:
+    m = _PAGE_KEY_RE.match(page_key or "")
+    if not m:
+        return ""
+    return _normalize_to_arabic_numerals(m.group("chapter").strip())
+
+
+def _extract_m_pasuk(scaffold_wikitext: str) -> tuple[str, str]:
+    """
+    Extract (chapter, verse) from {{מ:פסוק|...}} when present.
+    Expected: {{מ:פסוק|<book>|<chapter>|<verse>}}.
+    """
+    parsed = mwparserfromhell.parse(scaffold_wikitext or "")
+    # Top-level only: avoid nested {{מ:פסוק|...}} inside verse text in other columns.
+    for t in parsed.filter_templates(recursive=False):
+        if str(t.name).strip() != "מ:פסוק":
+            continue
+        ch_raw = str(t.get(2).value).strip() if t.has(2) else ""
+        v_raw = str(t.get(3).value).strip() if t.has(3) else ""
+        ch = _valid_urn_segment(ch_raw)
+        v = _valid_urn_segment(v_raw)
+        if ch and v:
+            return ch, v
+    return "", ""
+
+
+def _extract_chapter_verse_numbers(page_key: str, row_id: str, scaffold_wikitext: str) -> tuple[str, str]:
+    row_id = (row_id or "").strip()
+    if row_id in _NON_VERSE_ROW_IDS or len(row_id) > 8:
+        return "", ""
+
+    ch2, v2 = _extract_m_pasuk(scaffold_wikitext)
+    if ch2 and v2:
+        return ch2, v2
+
+    chapter = _valid_urn_segment(_chapter_from_page_key(page_key))
+    verse = _valid_urn_segment(row_id)
+    if chapter and verse:
+        return chapter, verse
+    return "", ""
+
+
+def _build_book_name_map() -> dict[str, Book]:
+    # Map Hebrew book title → Book
+    books = _flatten_books(TANAKH_INDEX)
+    return {b.book_name_he: b for b in books}
+
+
+def _iter_tsv_rows(tsv_path: Path) -> Iterable[list[str]]:
+    with tsv_path.open("r", encoding="utf-8", newline="") as f:
+        reader = csv.reader(f, delimiter="\t")
+        for row in reader:
+            yield row
+
+
+def _looks_like_header_row(row: list[str]) -> bool:
+    # Conservative heuristic: TSV export may include a header row with obvious labels.
+    joined = "\t".join(row).lower()
+    return any(k in joined for k in ("page", "row", "navigation", "scaffold", "text", "עמוד", "שורה"))
+
+
+def miqra_rows_to_intermediate(book: Book, sheets_dir: Path) -> str:
+    """
+    Build an intermediate XML document for a single book.
+
+    We scan all TSVs under sheets_dir and select rows whose page key identifies
+    the requested book.
+    """
+    he_to_book = _build_book_name_map()
+    target_he = book.book_name_he
+
+    rows_xml: list[str] = []
+    for tsv_path in sorted(sheets_dir.glob("*.tsv")):
+        slug = tsv_path.stem
+        if slug not in BIBLICAL_TSV_SLUGS:
+            continue
+
+        first = True
+        for row in _iter_tsv_rows(tsv_path):
+            if first and _looks_like_header_row(row):
+                first = False
+                continue
+            first = False
+
+            # Biblical tabs: require the 5-column A–E schema.
+            if len(row) < 5:
+                continue
+
+            page_key = row[0]
+            row_id = row[1]
+            nav = row[2]
+            scaffold = row[3]
+            text = row[4]
+
+            book_he = _book_key_from_page_key(page_key) or ""
+            resolved = he_to_book.get(book_he)
+            if resolved is None or resolved.book_name_he != target_he:
+                continue
+
+            chapter_n, verse_n = _extract_chapter_verse_numbers(page_key, row_id, scaffold)
+            if not chapter_n or not verse_n:
+                continue
+
+            rows_xml.append(
+                f"""<miqra:row source="{_xml_escape(slug)}" pageKey="{_xml_escape(page_key)}" rowId="{_xml_escape(row_id)}" chapter="{_xml_escape(chapter_n)}" verse="{_xml_escape(verse_n)}">
+  <miqra:nav>{wikitext_to_intermediate_xml(nav, column_c=True)}</miqra:nav>
+  <miqra:scaffold>{wikitext_to_intermediate_xml(scaffold)}</miqra:scaffold>
+  <miqra:text>{wikitext_to_intermediate_xml(text)}</miqra:text>
+</miqra:row>"""
+            )
+
+    rows_joined = "\n".join(rows_xml)
+    return f"""<miqra:book xmlns:miqra="urn:x-opensiddur:miqra:intermediate" xmlns:mw="urn:x-opensiddur:mw:intermediate" fileName="{_xml_escape(book.file_name)}" bookNameHe="{_xml_escape(book.book_name_he)}" bookNameEn="{_xml_escape(book.book_name_en)}">
+{rows_joined}
+</miqra:book>
+"""
+
+
+def intermediate_to_tei(intermediate_xml: str, *, xslt_params: Optional[dict[str, Any]] = None) -> dict[str, str]:
+    outputs = xslt_transform_string(
+        MIQRA_TO_TEI_XSLT,
+        intermediate_xml,
+        multiple_results=True,
+        xslt_params=xslt_params,
+    )
+    return {
+        "front": outputs.get("front", ""),
+        "body": outputs.get("body", outputs.get("", "")),
+        "stand_off": outputs.get("standoff", ""),
+    }
+
+
+def book_file(book: Book, *, sourcetexts_root: Path | None, project_dir: Path | None) -> None:
+    sheets_dir = miqra_al_pi_hamasorah_sheets_directory(sourcetexts_root)
+    if not sheets_dir.exists():
+        raise FileNotFoundError(f"Missing Miqra sheets directory: {sheets_dir} (run download first)")
+
+    intermediate = miqra_rows_to_intermediate(book, sheets_dir)
+    xml_dict = intermediate_to_tei(intermediate)
+    header_xml = header(book.book_name_he, book.book_name_en, qualifier=f":{book.file_name}")
+    tei_content = tei_file(header_xml, **xml_dict)
+    make_project_directory(project_dir)
+    validate_and_write_tei_file(tei_content, book.file_name, project_dir)
+
+
+def _readme_front_matter(sourcetexts_root: Path | None) -> str:
+    sheets_dir = miqra_al_pi_hamasorah_sheets_directory(sourcetexts_root)
+    readme = sheets_dir / "readme.tsv"
+    if not readme.exists():
+        return ""
+    lines: list[str] = []
+    for row in _iter_tsv_rows(readme):
+        # Preserve all cells; this is human prose.
+        line = " ".join(c for c in row if c).strip()
+        if line:
+            lines.append(line)
+    paras = "\n".join([f"<tei:p>{_xml_escape(l)}</tei:p>" for l in lines])
+    return f"<tei:front xmlns:tei=\"http://www.tei-c.org/ns/1.0\">{paras}</tei:front>"
+
+
+def index_file(idx: Index, *, sourcetexts_root: Path | None, project_dir: Path | None) -> None:
+    transclusion_str = "\n".join(
+        [
+            f"""<j:transclude target="urn:x-opensiddur:text:bible:{t.file_name}"/>"""
+            for t in idx.transclusions
+        ]
+    )
+    index_body = f"""<tei:body xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:j="http://jewishliturgy.org/ns/jlptei/2">
+  <tei:div corresp="urn:x-opensiddur:text:bible:{idx.file_name}">
+    <tei:head xml:lang="en">{_xml_escape(idx.index_title_en)}</tei:head>
+    {transclusion_str}
+  </tei:div>
+</tei:body>
+"""
+    front = _readme_front_matter(sourcetexts_root) if idx.file_name == "index" else ""
+    header_xml = header(idx.index_title_he, idx.index_title_en, qualifier=f":{idx.file_name}")
+    tei_content = tei_file(header_xml, front=front, body=index_body)
+    make_project_directory(project_dir)
+    validate_and_write_tei_file(tei_content, idx.file_name, project_dir)
+
+    for t in idx.transclusions:
+        if isinstance(t, Index):
+            index_file(t, sourcetexts_root=sourcetexts_root, project_dir=project_dir)
+        else:
+            book_file(t, sourcetexts_root=sourcetexts_root, project_dir=project_dir)
+
+
+def _build_arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description="Convert Miqra al pi ha-Masorah TSV sheets to JLPTEI."
+    )
+    parser.add_argument(
+        "--sourcetexts-root",
+        type=Path,
+        default=default_sourcetexts_root(),
+        help="Root of sourcetexts tree (default: <repo>/sources).",
+    )
+    parser.add_argument(
+        "--project-dir",
+        type=Path,
+        default=None,
+        help=(
+            "Output project directory (default: <repo>/project/miqra_al_pi_hamasorah)."
+        ),
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print planned actions without writing files.",
+    )
+    parser.add_argument(
+        "--only-book",
+        type=str,
+        default=None,
+        help="Only generate a single book by file slug (e.g. genesis).",
+    )
+    return parser
+
+
+def main(argv: list[str] | None = None) -> int:
+    logging.basicConfig(level=logging.INFO)
+    args = _build_arg_parser().parse_args(argv)
+
+    data_dir = miqra_al_pi_hamasorah_data_directory(args.sourcetexts_root)
+    sheets_dir = miqra_al_pi_hamasorah_sheets_directory(args.sourcetexts_root)
+    out_dir = args.project_dir if args.project_dir is not None else _default_project_directory()
+
+    if args.dry_run:
+        logger.info("Would read Miqra TSVs from %s", sheets_dir)
+        logger.info("Would write project files to %s", out_dir)
+        if args.only_book:
+            logger.info("Would generate only book: %s", args.only_book)
+        return 0
+
+    if args.only_book:
+        all_books = {b.file_name: b for b in _flatten_books(TANAKH_INDEX)}
+        book = all_books.get(args.only_book)
+        if book is None:
+            raise ValueError(f"Unknown book slug: {args.only_book}")
+        book_file(book, sourcetexts_root=args.sourcetexts_root, project_dir=args.project_dir)
+        return 0
+
+    # Generate index + all transclusions recursively
+    index_file(TANAKH_INDEX[0], sourcetexts_root=args.sourcetexts_root, project_dir=args.project_dir)
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
+
diff --git a/opensiddur/importer/miqra_al_pi_hamasorah/miqra_to_tei.xslt b/opensiddur/importer/miqra_al_pi_hamasorah/miqra_to_tei.xslt
new file mode 100644
index 0000000..f080fd9
--- /dev/null
+++ b/opensiddur/importer/miqra_al_pi_hamasorah/miqra_to_tei.xslt
@@ -0,0 +1,243 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xsl:stylesheet version="3.0"
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:tei="http://www.tei-c.org/ns/1.0"
+  xmlns:j="http://jewishliturgy.org/ns/jlptei/2"
+  xmlns:miqra="urn:x-opensiddur:miqra:intermediate"
+  xmlns:mw="urn:x-opensiddur:mw:intermediate"
+  exclude-result-prefixes="miqra mw">
+
+  <xsl:output method="xml" omit-xml-declaration="yes" indent="no"/>
+
+  <xsl:template match="/">
+    <xsl:result-document href="body">
+      <tei:body>
+        <tei:div type="book">
+          <xsl:attribute name="corresp">
+            <xsl:text>urn:x-opensiddur:text:bible:</xsl:text>
+            <xsl:value-of select="/miqra:book/@fileName"/>
+          </xsl:attribute>
+          <tei:head xml:lang="en">
+            <xsl:value-of select="/miqra:book/@bookNameEn"/>
+          </tei:head>
+          <xsl:apply-templates select="/miqra:book/miqra:row"/>
+        </tei:div>
+      </tei:body>
+    </xsl:result-document>
+    <xsl:if test="/miqra:book//miqra:note">
+      <xsl:result-document href="standoff">
+        <tei:standOff type="notes" xml:lang="he">
+          <xsl:apply-templates select="/miqra:book//miqra:note" mode="standoff"/>
+        </tei:standOff>
+      </xsl:result-document>
+    </xsl:if>
+  </xsl:template>
+
+  <xsl:template match="miqra:row">
+    <xsl:variable name="chapter" select="normalize-space(@chapter)"/>
+    <xsl:variable name="verse" select="normalize-space(@verse)"/>
+    <xsl:variable name="has-verse-ref"
+      select="$chapter != '' and $verse != '' and matches($chapter, '^[0-9]+$') and matches($verse, '^[0-9]+$')"/>
+
+    <xsl:if test="$has-verse-ref">
+      <tei:milestone unit="verse" n="{$verse}">
+        <xsl:attribute name="corresp">
+          <xsl:text>urn:x-opensiddur:text:bible:</xsl:text>
+          <xsl:value-of select="/miqra:book/@fileName"/>
+          <xsl:text>/</xsl:text>
+          <xsl:value-of select="$chapter"/>
+          <xsl:text>/</xsl:text>
+          <xsl:value-of select="$verse"/>
+        </xsl:attribute>
+      </tei:milestone>
+    </xsl:if>
+    <tei:ab>
+      <xsl:apply-templates select="miqra:text/node()"/>
+    </tei:ab>
+  </xsl:template>
+
+  <!-- Strip nav/scaffold from body output -->
+  <xsl:template match="miqra:nav | miqra:scaffold"/>
+
+  <!-- Variant documentation (נוסח) -->
+  <xsl:template match="miqra:variant">
+    <xsl:if test="@noteId">
+      <tei:anchor>
+        <xsl:attribute name="xml:id" select="concat(@noteId, '-ref')"/>
+      </tei:anchor>
+    </xsl:if>
+    <xsl:apply-templates select="miqra:display/node()"/>
+  </xsl:template>
+
+  <xsl:template match="miqra:note" mode="standoff">
+    <tei:note>
+      <xsl:copy-of select="@xml:id"/>
+      <xsl:apply-templates/>
+    </tei:note>
+  </xsl:template>
+
+  <xsl:template match="miqra:note"/>
+
+  <!-- Ketiv/qeri -->
+  <xsl:template match="miqra:kq">
+    <tei:choice>
+      <xsl:choose>
+        <xsl:when test="@order = 'qeri-first'">
+          <j:read>
+            <xsl:apply-templates select="miqra:qeri/node() | miqra:bracketed/node()"/>
+          </j:read>
+          <j:written>
+            <xsl:apply-templates select="miqra:ketiv/node()"/>
+          </j:written>
+        </xsl:when>
+        <xsl:otherwise>
+          <j:written>
+            <xsl:apply-templates select="miqra:ketiv/node()"/>
+          </j:written>
+          <j:read>
+            <xsl:apply-templates select="miqra:qeri/node() | miqra:bracketed/node()"/>
+          </j:read>
+        </xsl:otherwise>
+      </xsl:choose>
+    </tei:choice>
+  </xsl:template>
+
+  <xsl:template match="miqra:bracketed">
+    <xsl:text>[</xsl:text>
+    <xsl:apply-templates/>
+    <xsl:text>]</xsl:text>
+  </xsl:template>
+
+  <xsl:template match="miqra:kq-matres"/>
+
+  <xsl:template match="miqra:ketiv-only">
+    <tei:hi rend="ketiv-only">
+      <xsl:text>(</xsl:text>
+      <xsl:apply-templates/>
+      <xsl:text>)</xsl:text>
+    </tei:hi>
+  </xsl:template>
+
+  <xsl:template match="miqra:qeri-only">
+    <tei:hi rend="qeri-only">
+      <xsl:text>[</xsl:text>
+      <xsl:apply-templates/>
+      <xsl:text>]</xsl:text>
+    </tei:hi>
+  </xsl:template>
+
+  <!-- Parashah / poetic layout -->
+  <xsl:template match="miqra:parashah[@type = 'open']">
+    <tei:lb/>
+  </xsl:template>
+  <xsl:template match="miqra:parashah[@type = 'open-line']">
+    <tei:lb type="first"/>
+  </xsl:template>
+  <xsl:template match="miqra:parashah[@type = 'close']">
+    <tei:lb/>
+  </xsl:template>
+  <xsl:template match="miqra:parashah[@type = 'close-inline' or @type = 'close-narrow' or @type = 'shirah']">
+    <tei:lb/>
+  </xsl:template>
+
+  <xsl:template match="miqra:poetic">
+    <tei:lb>
+      <xsl:if test="@level != '0'">
+        <xsl:attribute name="type">indent</xsl:attribute>
+      </xsl:if>
+    </tei:lb>
+  </xsl:template>
+
+  <xsl:template match="miqra:lb">
+    <tei:lb/>
+  </xsl:template>
+
+  <xsl:template match="miqra:centered">
+    <tei:hi rend="centered">
+      <xsl:apply-templates/>
+    </tei:hi>
+  </xsl:template>
+
+  <!-- Letter formatting -->
+  <xsl:template match="miqra:hi">
+    <tei:hi>
+      <xsl:attribute name="rend" select="@rend"/>
+      <xsl:apply-templates/>
+    </tei:hi>
+  </xsl:template>
+
+  <xsl:template match="miqra:dotted">
+    <xsl:apply-templates/>
+  </xsl:template>
+
+  <xsl:template match="miqra:inverted-nun">
+    <xsl:apply-templates/>
+  </xsl:template>
+
+  <xsl:template match="miqra:yerushalem | miqra:yerushalema">
+    <xsl:value-of select="@vowel"/>
+    <xsl:value-of select="@accent"/>
+    <xsl:text>&#x034F;ִ</xsl:text>
+  </xsl:template>
+
+  <xsl:template match="miqra:accent">
+    <xsl:text> </xsl:text>
+  </xsl:template>
+
+  <xsl:template match="miqra:qupo-accent"/>
+
+  <xsl:template match="miqra:punct">
+    <xsl:value-of select="."/>
+  </xsl:template>
+
+  <xsl:template match="miqra:maqaf">
+    <xsl:value-of select="."/>
+  </xsl:template>
+
+  <xsl:template match="miqra:fn-mark">
+    <tei:hi rend="sup">*</tei:hi>
+  </xsl:template>
+
+  <xsl:template match="miqra:anchor">
+    <tei:anchor>
+      <xsl:copy-of select="@xml:id"/>
+    </tei:anchor>
+  </xsl:template>
+
+  <xsl:template match="miqra:line-anchor | miqra:segment | miqra:good-ending | miqra:dual-trope-link | miqra:dual-accent | miqra:strand"/>
+
+  <!-- Legacy mw elements -->
+  <xsl:template match="mw:hi">
+    <tei:hi>
+      <xsl:attribute name="rend" select="@rend"/>
+      <xsl:apply-templates/>
+    </tei:hi>
+  </xsl:template>
+
+  <xsl:template match="mw:link">
+    <xsl:choose>
+      <xsl:when test="normalize-space(.) != ''">
+        <tei:ref>
+          <xsl:attribute name="target" select="@target"/>
+          <xsl:apply-templates/>
+        </tei:ref>
+      </xsl:when>
+      <xsl:otherwise>
+        <xsl:value-of select="@target"/>
+      </xsl:otherwise>
+    </xsl:choose>
+  </xsl:template>
+
+  <xsl:template match="mw:template">
+    <xsl:apply-templates select="mw:param/node()"/>
+  </xsl:template>
+
+  <xsl:template match="mw:param">
+    <xsl:value-of select="."/>
+  </xsl:template>
+
+  <xsl:template match="text()">
+    <xsl:value-of select="."/>
+  </xsl:template>
+
+</xsl:stylesheet>
diff --git a/opensiddur/importer/miqra_al_pi_hamasorah/miqra_wikitext.py b/opensiddur/importer/miqra_al_pi_hamasorah/miqra_wikitext.py
new file mode 100644
index 0000000..6ca5e25
--- /dev/null
+++ b/opensiddur/importer/miqra_al_pi_hamasorah/miqra_wikitext.py
@@ -0,0 +1,633 @@
+"""
+Convert Miqra al pi ha-Masorah wikitext (per templates.tsv) to intermediate XML.
+
+All templates documented in sources/miqra_al_pi_hamasorah/sheets/templates.tsv are
+handled here, including when nested inside verse text (e.g. {{נוסח|…}}).
+"""
+
+from __future__ import annotations
+
+import re
+from typing import Callable, Optional
+from urllib.parse import quote
+
+import mwparserfromhell
+
+from opensiddur.importer.util.mediawiki_processor import MediaWikiProcessor
+
+MIQRA_NS = "urn:x-opensiddur:miqra:intermediate"
+MW_NS = "urn:x-opensiddur:mw:intermediate"
+
+_STRIP_TEMPLATES = frozenset(
+    {
+        "מ:פסוק",
+        "מ:פסוק-שירה",
+        "מ:שוליים",
+        "מ:שוליים-סוף",
+        "מ:טעמי המקרא",
+        "מ:טעמי המקרא-סוף",
+        "טעמי המקרא באינטרנט",
+        "תבנית:טעמי המקרא באינטרנט",
+        "מ:ספר חדש",
+        "מ:רווח בתרי עשר",
+        "רווח בתרי עשר",
+        "מ:רווח לספר בתהלים",
+        "רווח לספר בתהלים",
+        "מ:אין פרשה בתחילת פרק",
+        'מ:אין פרשה בתחילת פרק בספרי אמ"ת',
+        "מ:אין רווח של פרשה בתחילת פרשת השבוע",
+        "מ:יישור-בשני-הצדדים",
+        "מ:יישור-בשני-הצדדים-סוף",
+        "בסיס-משתמש",
+        'צורות כתיבה בספרי אמ"ת',
+        "documentation",
+        "name",
+        "template",
+        "תבנית",
+    }
+)
+
+_BOLD_ITALIC_RE = re.compile(r"'''''(.*?)'''''")
+_BOLD_RE = re.compile(r"'''(.*?)'''")
+_ITALIC_RE = re.compile(r"''(.*?)''")
+_ANY_HI_RE = re.compile(r"'''''(.*?)'''''|'''(.*?)'''|''(.*?)''")
+_TAG_OPEN_RE = re.compile(r"<(miqra|mw):([a-zA-Z0-9-]+)([^>]*?)(/?)>")
+_KETEG_START_RE = re.compile(r"<קטע\s+התחלה=([^/>]+)\s*/>", re.IGNORECASE)
+_KETEG_END_RE = re.compile(r"<קטע\s+סוף=([^/>]+)\s*/>", re.IGNORECASE)
+
+
+def normalize_template_name(name: str) -> str:
+    n = str(name).strip()
+    if n.lower().startswith("תבנית:"):
+        n = n.split(":", 1)[1].strip()
+    n = n.replace("''", '"').replace("״", '"').replace("׳", "'")
+    return n.strip()
+
+
+def link_target_to_uri(target: str) -> str:
+    """Turn a URL or Hebrew Wikisource page title into a valid URI for tei:ref/@target."""
+    t = (target or "").strip()
+    if not t:
+        return ""
+    if re.match(r"^https?://", t, re.I):
+        return t
+    if t.startswith("//"):
+        return "https:" + t
+    page, sep, frag = t.partition("#")
+    page = page.replace(" ", "_").strip()
+    if page:
+        uri = "https://he.wikisource.org/wiki/" + quote(page, safe="/:%")
+    else:
+        uri = "https://he.wikisource.org/wiki/"
+    if sep:
+        uri += "#" + quote(frag, safe=":/%.-_")
+    return uri
+
+
+def _xml_escape(text: str) -> str:
+    return (
+        (text or "")
+        .replace("&", "&amp;")
+        .replace("<", "&lt;")
+        .replace(">", "&gt;")
+        .replace('"', "&quot;")
+        .replace("'", "&apos;")
+    )
+
+
+def _wikitext_basic_markup_to_xml(text: str) -> str:
+    s = text or ""
+    out: list[str] = []
+    pos = 0
+    for m in _ANY_HI_RE.finditer(s):
+        out.append(_xml_escape(s[pos : m.start()]))
+        if m.group(1) is not None:
+            rend, inner = "bold-italic", m.group(1)
+        elif m.group(2) is not None:
+            rend, inner = "bold", m.group(2)
+        else:
+            rend, inner = "italic", m.group(3) or ""
+        out.append(f'<mw:hi rend="{rend}">{_xml_escape(inner)}</mw:hi>')
+        pos = m.end()
+    out.append(_xml_escape(s[pos:]))
+    return "".join(out)
+
+
+def _escape_outside_tags(fragment: str) -> str:
+    """Escape text nodes while preserving nested miqra:/mw: XML elements."""
+
+    out: list[str] = []
+    pos = 0
+    while pos < len(fragment):
+        m = _TAG_OPEN_RE.search(fragment, pos)
+        if not m:
+            out.append(_wikitext_basic_markup_to_xml(fragment[pos:]))
+            break
+        out.append(_wikitext_basic_markup_to_xml(fragment[pos : m.start()]))
+        ns, local, _attrs, self_close = m.group(1), m.group(2), m.group(3), m.group(4)
+        if self_close == "/":
+            out.append(m.group(0))
+            pos = m.end()
+            continue
+        close = f"</{ns}:{local}>"
+        depth = 1
+        search = m.end()
+        closed_at: Optional[int] = None
+        while depth > 0 and search <= len(fragment):
+            next_close = fragment.find(close, search)
+            if next_close == -1:
+                break
+            inner_open = _TAG_OPEN_RE.search(fragment, search, next_close)
+            if inner_open and inner_open.start() < next_close and inner_open.group(4) != "/":
+                inner_local = inner_open.group(2)
+                if inner_open.group(1) == ns and inner_local == local:
+                    depth += 1
+                search = inner_open.end()
+            else:
+                depth -= 1
+                if depth == 0:
+                    closed_at = next_close
+                else:
+                    search = next_close + len(close)
+        if closed_at is None:
+            out.append(_wikitext_basic_markup_to_xml(fragment[m.start() :]))
+            break
+        inner = fragment[m.end() : closed_at]
+        out.append(m.group(0))
+        out.append(_escape_outside_tags(inner))
+        out.append(close)
+        pos = closed_at + len(close)
+    return "".join(out)
+
+
+def _preprocess_column_c(wikitext: str) -> str:
+    """Column C markers from templates.tsv (not templates)."""
+    s = wikitext or ""
+    s = s.replace("__", " ")
+    s = re.sub(r"(?<!https:)(?<!http:)//", "<miqra:lb/>", s)
+    return s
+
+
+def _preprocess_miqra_tags(wikitext: str) -> str:
+    s = wikitext or ""
+    s = _KETEG_START_RE.sub(
+        r'<miqra:segment type="start" name="\1"/>', s
+    )
+    s = _KETEG_END_RE.sub(r'<miqra:segment type="end" name="\1"/>', s)
+    return s
+
+
+class MiqraWikiTextProcessor(MediaWikiProcessor):
+    """MediaWiki processor with handlers for all Miqra templates."""
+
+    def __init__(self) -> None:
+        self._note_seq = 0
+        super().__init__()
+
+    def _initialize_handlers(self) -> None:
+        self.template_handlers = {}
+        self.tag_handlers = {}
+        self.preprocessors = [_preprocess_miqra_tags]
+        self.postprocessors = []
+        self._register_template_handlers()
+        self._register_tag_handlers()
+
+    def _register_tag_handlers(self) -> None:
+        self.tag_handlers["noinclude"] = self._handle_strip_tag
+
+    def _handle_strip_tag(self, tag) -> str:
+        return ""
+
+    def _register_template_handlers(self) -> None:
+        h = self.add_template_handler
+        for name in _STRIP_TEMPLATES:
+            h(name, self._handle_strip)
+
+        h("נוסח", self._handle_nosach)
+        h("ש", self._handle_footnote_mark)
+        h("שם", self._handle_strip)
+
+        h("פפ", self._handle_parashah_open)
+        h("פפפ", self._handle_parashah_open_line)
+        h("רווח בסוף שורה", self._handle_strip)
+        h("סס", self._handle_parashah_close)
+        h("ססס", self._handle_parashah_close_inline)
+        h("סס2", self._handle_parashah_close_narrow)
+        h("מ:ששש", self._handle_shirah_break)
+
+        h("ר0", self._handle_poetic_space)
+        h("ר1", self._handle_poetic_indent1)
+        h("ר2", self._handle_poetic_indent2)
+        h("ר3", self._handle_poetic_line)
+        h("ר4", self._handle_poetic_verse)
+        h("פרשה-מרכז", self._handle_centered_title)
+
+        h("כתיב ולא קרי", self._handle_ketiv_only)
+        h("קרי ולא כתיב", self._handle_qeri_only)
+        h('מ:קו"כ-אם-2', self._handle_qok_if_matres)
+        h('מ:קו"כ קרי שונה מהכתיב בשתי מילים', self._handle_qok_two_qeri_words)
+
+        h("מ:אות-ג", self._handle_large_letter)
+        h("מ:אות-ק", self._handle_small_letter)
+        h("מ:אות תלויה", self._handle_raised_letter)
+        h("מ:אות מנוקדת", self._handle_dotted_letter)
+        h('מ:נו"ן הפוכה', self._handle_inverted_nun)
+        h("מ:ירושלם", self._handle_yerushalem)
+        h("מ:ירושלמה", self._handle_yerushalema)
+        h("ירח בן יומו", self._handle_accent_yerah)
+        h("גלגל", self._handle_accent_galgal)
+        h("אתנח הפוך", self._handle_accent_etnah)
+        h("מ:קמץ", self._handle_qamats)
+        h("מ:טעם ומתג באות אחת", self._handle_taam_meteg)
+        h("שני טעמים באות אחת", self._handle_two_taamim)
+        h(
+            "שני טעמים באות אחת קמץ-תחתון-פתח-עליון",
+            self._handle_two_taamim_qupo,
+        )
+        h("מ:טעם", self._handle_taam_dummy)
+        h("תבנית:מ:טעם", self._handle_taam_dummy)
+        h("מ:גרש ותלישא גדולה", self._handle_geresh_telisha)
+        h("מ:גרשיים ותלישא גדולה", self._handle_gershayim_telisha)
+        h("מ:כל קמץ קטן מרכא", self._handle_kol_qamats)
+        h("מ:לגרמיה-2", self._handle_legarmeh)
+        h("מ:פסק", self._handle_paseq)
+        h("מ:מקף אפור", self._handle_grey_maqaf)
+
+        h("מ:הערה", self._handle_mam_note)
+        h("עוגן בשורה", self._handle_line_anchor)
+        h("מ:סיום בטוב", self._handle_good_ending)
+        h("קק", self._handle_dual_trope_link)
+        h("מ:כפול", self._handle_dual_accent)
+
+        h("מ:קישור בהערה", self._handle_note_link)
+        h("מ:קישור פנימי בהערה", self._handle_note_link)
+        h("מודגש", self._handle_emphasis)
+
+    def _lookup_handler(self, name: str) -> Optional[Callable]:
+        n = normalize_template_name(name)
+        if n in self.template_handlers:
+            return self.template_handlers[n]
+        if n.startswith('מ:כו"ק') or n.startswith('כו"ק') or n.startswith("כו''ק"):
+            return self._handle_ketiv_qeri
+        if n.startswith('מ:קו"כ') or n.startswith('קו"כ') or n.startswith("קו''כ"):
+            return self._handle_qeri_ketiv
+        return None
+
+    def _process_nested_content(self, content: str, depth: int = 0) -> str:
+        if depth > 12:
+            return content
+
+        parsed = mwparserfromhell.parse(content)
+        nodes_to_replace = []
+
+        for node in parsed.nodes:
+            if hasattr(node, "name"):
+                template_name = str(node.name).strip()
+                handler = self._lookup_handler(template_name)
+                if handler is None:
+                    n = normalize_template_name(template_name)
+                    if n in _STRIP_TEMPLATES:
+                        handler = self._handle_strip
+                    else:
+                        processed = self._process_nested_content(str(node), depth + 1)
+                        nodes_to_replace.append((node, processed))
+                        continue
+                try:
+                    processed_node = self._process_template_with_nesting(node, depth + 1)
+                    replacement = handler(processed_node)
+                except Exception:
+                    replacement = handler(node)
+                nodes_to_replace.append((node, replacement))
+            elif hasattr(node, "tag"):
+                tag_name = str(node.tag).strip().lower()
+                if tag_name in self.tag_handlers:
+                    try:
+                        processed_node = self._process_tag_with_nesting(node, depth + 1)
+                        replacement = self.tag_handlers[tag_name](processed_node)
+                    except Exception:
+                        replacement = self.tag_handlers[tag_name](node)
+                    nodes_to_replace.append((node, replacement))
+                else:
+                    processed = self._process_nested_content(str(node), depth + 1)
+                    nodes_to_replace.append((node, processed))
+            elif "Wikilink" in str(node.__class__):
+                nodes_to_replace.append((node, self._handle_wikilink_miqra(node)))
+
+            elif node.__class__.__name__ == "Heading":
+                # Note text uses "=source=reading" notation; mwparser treats it as wikitext headings.
+                title = self._process_nested_content(str(node.title), depth + 1)
+                nodes_to_replace.append((node, "=" + title + "="))
+
+        for node, replacement in nodes_to_replace:
+            parsed.replace(node, replacement)
+
+        return str(parsed)
+
+    def _handle_wikilink_miqra(self, node) -> str:
+        raw_title = str(getattr(node, "title", "")).strip()
+        target = _xml_escape(link_target_to_uri(raw_title))
+        text = str(getattr(node, "text", "")).strip() if getattr(node, "text", None) else ""
+        if text:
+            return f'<mw:link target="{target}">{_xml_escape(text)}</mw:link>'
+        return f'<mw:link target="{target}"/>'
+
+    def _p(self, content: str) -> str:
+        return self._process_nested_content(content or "")
+
+    def _param_value(self, template, key: str | int) -> str:
+        """Read a template parameter by name or 1-based index.
+
+        mwparserfromhell's ``template.get(1)`` returns ``'1=value'`` when the
+        wikitext uses explicit ``1=value`` syntax; iterating ``params`` is reliable.
+        """
+        key_s = str(key).strip()
+        for p in template.params:
+            pname = str(p.name).strip()
+            if pname == key_s:
+                return str(p.value).strip()
+            if pname.isdigit() and key_s.isdigit() and int(pname) == int(key_s):
+                return str(p.value).strip()
+        return ""
+
+    def _param(self, template, index: int) -> str:
+        return self._param_value(template, index)
+
+    def _named_param(self, template, name: str) -> str:
+        return self._param_value(template, name)
+
+    def _note_params(self, template) -> str:
+        parts: list[str] = []
+        for p in template.params:
+            pname = str(p.name).strip()
+            if pname.isdigit() and int(pname) >= 2:
+                parts.append(self._p(str(p.value)))
+            elif pname in ("2", "הערות", "הערה", "notes"):
+                parts.append(self._p(str(p.value)))
+        return "".join(parts)
+
+    def _mid_verse_attr(self, template) -> str:
+        for p in template.params:
+            if "פסקא באמצע פסוק" in str(p.value):
+                return ' midVerse="true"'
+        return ""
+
+    def _next_note_id(self) -> str:
+        self._note_seq += 1
+        return f"miqra-note-{self._note_seq}"
+
+    # --- handlers ---
+
+    def _handle_strip(self, template) -> str:
+        return ""
+
+    def _handle_nosach(self, template) -> str:
+        display = self._p(self._param(template, 1))
+        notes = self._note_params(template)
+        if not notes:
+            return display
+        note_id = self._next_note_id()
+        return (
+            f'<miqra:variant noteId="{note_id}">'
+            f"<miqra:display>{display}</miqra:display>"
+            f"</miqra:variant>"
+            f'<miqra:note xml:id="{note_id}">{notes}</miqra:note>'
+        )
+
+    def _handle_footnote_mark(self, template) -> str:
+        return "<miqra:fn-mark/>"
+
+    def _handle_ketiv_qeri(self, template) -> str:
+        ketiv = self._p(self._param(template, 1))
+        qeri = self._p(self._param(template, 2))
+        return (
+            f'<miqra:kq order="ketiv-first">'
+            f"<miqra:ketiv>{ketiv}</miqra:ketiv>"
+            f"<miqra:qeri>{qeri}</miqra:qeri>"
+            f"</miqra:kq>"
+        )
+
+    def _handle_qeri_ketiv(self, template) -> str:
+        ketiv = self._p(self._param(template, 1))
+        qeri = self._p(self._param(template, 2))
+        return (
+            f'<miqra:kq order="qeri-first">'
+            f"<miqra:ketiv>{ketiv}</miqra:ketiv>"
+            f"<miqra:qeri>{qeri}</miqra:qeri>"
+            f"</miqra:kq>"
+        )
+
+    def _handle_qok_if_matres(self, template) -> str:
+        display = self._p(self._param(template, 1))
+        ketiv = self._p(self._param(template, 2))
+        qeri = self._p(self._param(template, 3))
+        return (
+            f"{display}"
+            f'<miqra:kq-matres>'
+            f"<miqra:ketiv>{ketiv}</miqra:ketiv>"
+            f"<miqra:qeri>{qeri}</miqra:qeri>"
+            f"</miqra:kq-matres>"
+        )
+
+    def _handle_qok_two_qeri_words(self, template) -> str:
+        ketiv = self._p(self._param(template, 1))
+        q1 = self._p(self._param(template, 2))
+        q2 = self._p(self._param(template, 3))
+        return (
+            f'<miqra:kq order="qeri-first" type="split-qeri">'
+            f"<miqra:bracketed>{q1}</miqra:bracketed>"
+            f"<miqra:qeri>{q2}</miqra:qeri>"
+            f"<miqra:ketiv>{ketiv}</miqra:ketiv>"
+            f"</miqra:kq>"
+        )
+
+    def _handle_ketiv_only(self, template) -> str:
+        ketiv = self._p(self._param(template, 1))
+        return f'<miqra:ketiv-only>({ketiv})</miqra:ketiv-only>'
+
+    def _handle_qeri_only(self, template) -> str:
+        qeri = self._p(self._param(template, 1))
+        return f"<miqra:qeri-only>[{qeri}]</miqra:qeri-only>"
+
+    def _handle_parashah_open(self, template) -> str:
+        return f'<miqra:parashah type="open"{self._mid_verse_attr(template)}/>'
+
+    def _handle_parashah_open_line(self, template) -> str:
+        return f'<miqra:parashah type="open-line"{self._mid_verse_attr(template)}/>'
+
+    def _handle_parashah_close(self, template) -> str:
+        return f'<miqra:parashah type="close"{self._mid_verse_attr(template)}/>'
+
+    def _handle_parashah_close_inline(self, template) -> str:
+        return f'<miqra:parashah type="close-inline"{self._mid_verse_attr(template)}/>'
+
+    def _handle_parashah_close_narrow(self, template) -> str:
+        return f'<miqra:parashah type="close-narrow"{self._mid_verse_attr(template)}/>'
+
+    def _handle_shirah_break(self, template) -> str:
+        return '<miqra:parashah type="shirah"/>'
+
+    def _handle_poetic_space(self, template) -> str:
+        return '<miqra:poetic level="0"/>'
+
+    def _handle_poetic_indent1(self, template) -> str:
+        return '<miqra:poetic level="1"/>'
+
+    def _handle_poetic_indent2(self, template) -> str:
+        return '<miqra:poetic level="2"/>'
+
+    def _handle_poetic_line(self, template) -> str:
+        return '<miqra:poetic level="3"/>'
+
+    def _handle_poetic_verse(self, template) -> str:
+        return '<miqra:poetic level="4"/>'
+
+    def _handle_centered_title(self, template) -> str:
+        title = self._p(self._param(template, 1))
+        return f"<miqra:centered>{title}</miqra:centered>"
+
+    def _handle_large_letter(self, template) -> str:
+        letter = self._p(self._param(template, 1))
+        return f'<miqra:hi rend="large">{letter}</miqra:hi>'
+
+    def _handle_small_letter(self, template) -> str:
+        letter = self._p(self._param(template, 1))
+        return f'<miqra:hi rend="small">{letter}</miqra:hi>'
+
+    def _handle_raised_letter(self, template) -> str:
+        letter = self._p(self._param(template, 1))
+        return f'<miqra:hi rend="raised">{letter}</miqra:hi>'
+
+    def _handle_dotted_letter(self, template) -> str:
+        word = self._p(self._param(template, 1))
+        return f"<miqra:dotted>{word}</miqra:dotted>"
+
+    def _handle_inverted_nun(self, template) -> str:
+        sym = self._p(self._param(template, 1))
+        return f"<miqra:inverted-nun>{sym}</miqra:inverted-nun>"
+
+    def _handle_yerushalem(self, template) -> str:
+        p1 = _xml_escape(self._param(template, 1))
+        p2 = _xml_escape(self._param(template, 2))
+        return f'<miqra:yerushalem vowel="{p1}" accent="{p2}"/>'
+
+    def _handle_yerushalema(self, template) -> str:
+        p1 = _xml_escape(self._param(template, 1))
+        p2 = _xml_escape(self._param(template, 2))
+        return f'<miqra:yerushalema vowel="{p1}" accent="{p2}"/>'
+
+    def _handle_accent_yerah(self, template) -> str:
+        return '<miqra:accent type="yerah-ben-yomo"/>'
+
+    def _handle_accent_galgal(self, template) -> str:
+        return '<miqra:accent type="galgal"/>'
+
+    def _handle_accent_etnah(self, template) -> str:
+        return '<miqra:accent type="etnah-hafukh"/>'
+
+    def _handle_qamats(self, template) -> str:
+        d = self._named_param(template, "ד")
+        s = self._named_param(template, "ס")
+        text = d or s or self._param(template, 1)
+        return self._p(text)
+
+    def _handle_taam_meteg(self, template) -> str:
+        return self._p(self._param(template, 1))
+
+    def _handle_two_taamim(self, template) -> str:
+        return '<miqra:accent type="geresh-telisha-gedola"/>'
+
+    def _handle_two_taamim_qupo(self, template) -> str:
+        above = self._p(self._named_param(template, "עליו") or self._param(template, 1))
+        return f'<miqra:qupo-accent above="{_xml_escape(above)}"/>'
+
+    def _handle_taam_dummy(self, template) -> str:
+        raw = self._param(template, 1)
+        return self._p(raw[1:] if raw else "")
+
+    def _handle_geresh_telisha(self, template) -> str:
+        return '<miqra:accent type="geresh-telisha-gedola"/>'
+
+    def _handle_gershayim_telisha(self, template) -> str:
+        return '<miqra:accent type="gershayim-telisha-gedola"/>'
+
+    def _handle_kol_qamats(self, template) -> str:
+        return self._p(self._param(template, 1)) or "כָּל"
+
+    def _handle_legarmeh(self, template) -> str:
+        return '<miqra:punct type="legarmeh">׀</miqra:punct>'
+
+    def _handle_paseq(self, template) -> str:
+        return '<miqra:punct type="paseq">׀</miqra:punct>'
+
+    def _handle_grey_maqaf(self, template) -> str:
+        return '<miqra:maqaf rend="grey">־</miqra:maqaf>'
+
+    def _handle_mam_note(self, template) -> str:
+        body = self._p(self._param(template, 1))
+        note_id = self._next_note_id()
+        return (
+            f'<miqra:anchor xml:id="{note_id}-ref"/>'
+            f'<miqra:note xml:id="{note_id}">{body}</miqra:note>'
+        )
+
+    def _handle_line_anchor(self, template) -> str:
+        label = _xml_escape(self._param(template, 1))
+        return f'<miqra:line-anchor target="{label}"/>'
+
+    def _handle_good_ending(self, template) -> str:
+        text = self._p(self._param(template, 1))
+        return f"<miqra:good-ending>{text}</miqra:good-ending>"
+
+    def _handle_dual_trope_link(self, template) -> str:
+        target = self._p(self._param(template, 1))
+        return f"<miqra:dual-trope-link>{target}</miqra:dual-trope-link>"
+
+    def _handle_dual_accent(self, template) -> str:
+        dual = self._p(self._named_param(template, "כפול"))
+        a = self._p(self._named_param(template, "א"))
+        b = self._p(self._named_param(template, "ב"))
+        return (
+            f'<miqra:dual-accent dual="{_xml_escape(dual)}">'
+            f"<miqra:strand role=\"א\">{a}</miqra:strand>"
+            f"<miqra:strand role=\"ב\">{b}</miqra:strand>"
+            f"</miqra:dual-accent>"
+        )
+
+    def _handle_note_link(self, template) -> str:
+        raw_target = self._named_param(template, "1") or self._param(template, 1)
+        label = self._named_param(template, "2") or self._param(template, 2)
+        if not label:
+            label = raw_target
+        target = _xml_escape(link_target_to_uri(raw_target))
+        return f'<mw:link target="{target}">{self._p(label)}</mw:link>'
+
+    def _handle_emphasis(self, template) -> str:
+        text = self._p(self._param(template, 1))
+        return f'<mw:hi rend="bold">{text}</mw:hi>'
+
+
+_processor: Optional[MiqraWikiTextProcessor] = None
+
+
+def _get_processor() -> MiqraWikiTextProcessor:
+    global _processor
+    if _processor is None:
+        _processor = MiqraWikiTextProcessor()
+    return _processor
+
+
+def wikitext_to_intermediate_xml(
+    wikitext: str, *, column_c: bool = False
+) -> str:
+    """Convert wikitext to an escaped intermediate XML fragment."""
+    text = wikitext or ""
+    if column_c:
+        text = _preprocess_column_c(text)
+    result = _get_processor().process_wikitext(text)
+    return _escape_outside_tags(result.xml_content)
+
+
+def reset_processor() -> None:
+    """Reset the shared processor (for tests)."""
+    global _processor
+    _processor = None
diff --git a/opensiddur/importer/util/mediawiki_processor.py b/opensiddur/importer/util/mediawiki_processor.py
new file mode 100644
index 0000000..76120b3
--- /dev/null
+++ b/opensiddur/importer/util/mediawiki_processor.py
@@ -0,0 +1,515 @@
+"""
+MediaWiki/Wikitext to intermediate XML processor.
+
+This module contains the reusable MediaWiki processing framework originally built
+for the JPS1917 importer. Other importers (e.g. Miqra al pi ha‑Masorah) can reuse
+it by adding/overriding template and tag handlers.
+"""
+
+# NOTE: The initial implementation is intentionally a direct move of the existing
+# processor to provide a stable API surface (`MediaWikiProcessor`, `create_processor`)
+# for multiple importers. Importer-specific specializations should be layered on
+# top by registering handlers.
+
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any, Dict, List
+
+import mwparserfromhell
+
+
+class ProcessingStage(Enum):
+    """Stages of MediaWiki processing"""
+
+    PREPROCESS = "preprocess"
+    TEMPLATES = "templates"
+    TAGS = "tags"
+    POSTPROCESS = "postprocess"
+
+
+@dataclass
+class ConversionResult:
+    """Result of a conversion operation"""
+
+    xml_content: str
+    metadata: Dict[str, Any]
+    warnings: List[str]
+    errors: List[str]
+    wikilinks: List[Dict[str, Any]]
+
+
+class MediaWikiProcessor:
+    """
+    Modular MediaWiki to XML processor.
+
+    Provides a modular framework for converting MediaWiki syntax to an
+    intermediate XML that can be transformed to TEI via XSLT.
+    """
+
+    def __init__(self):
+        self.template_handlers = {}
+        self.tag_handlers = {}
+        self.preprocessors = []
+        self.postprocessors = []
+        self.wikilinks = []  # Store captured wikilinks
+        self._initialize_handlers()
+
+    def _initialize_handlers(self):
+        """Initialize all template and tag handlers"""
+        self._initialize_template_handlers()
+        self._initialize_tag_handlers()
+        self._initialize_preprocessors()
+        self._initialize_postprocessors()
+        self._initialize_wikilink_handlers()
+
+    # -------------------------------------------------------------------------
+    # Default handler initialization
+    #
+    # These defaults match the original JPS1917 processor behavior. Other
+    # importers can clear/override and register their own handlers as needed.
+    # -------------------------------------------------------------------------
+
+    def _initialize_template_handlers(self):
+        """Initialize handlers for MediaWiki templates"""
+
+        # Text Formatting Templates
+        self.template_handlers["sc"] = self._handle_small_caps
+        self.template_handlers["larger"] = self._handle_larger_text
+        self.template_handlers["x-larger"] = self._handle_x_larger_text
+        self.template_handlers["xx-larger"] = self._handle_xx_larger_text
+        self.template_handlers["xxx-larger"] = self._handle_xxx_larger_text
+        self.template_handlers["smaller"] = self._handle_smaller_text
+
+        # Layout Templates
+        self.template_handlers["c"] = self._handle_center
+        self.template_handlers["right"] = self._handle_right_align
+        self.template_handlers["rule"] = self._handle_horizontal_rule
+        self.template_handlers["nop"] = self._handle_no_paragraph
+
+        # Biblical Content Templates
+        self.template_handlers["verse"] = self._handle_verse
+        self.template_handlers["rh"] = self._handle_right_header
+        self.template_handlers["dropinitial"] = self._handle_drop_initial
+        self.template_handlers["dhr"] = self._handle_double_horizontal_rule
+
+        # Navigation Templates
+        self.template_handlers["anchor"] = self._handle_anchor
+        self.template_handlers["anchor+"] = self._handle_anchor_plus
+
+        # Language Templates
+        self.template_handlers["lang"] = self._handle_language
+
+        # Reference Templates
+        self.template_handlers["smallrefs"] = self._handle_small_refs
+
+        # Special Templates
+        self.template_handlers["hws"] = self._handle_hws
+        self.template_handlers["hwe"] = self._handle_hwe
+        self.template_handlers["***"] = self._handle_asterisks
+        self.template_handlers["reconstruct"] = self._handle_reconstruct
+        self.template_handlers["SIC"] = self._handle_sic
+        self.template_handlers["sic"] = self._handle_sic
+        self.template_handlers["sup"] = self._handle_superscript
+        self.template_handlers["bar"] = self._handle_bar
+        self.template_handlers["gap"] = self._handle_gap
+        self.template_handlers["overfloat left"] = self._handle_overfloat_left
+        self.template_handlers["float right"] = self._handle_float_right
+        self.template_handlers["smaller block/s"] = self._handle_smaller_block_start
+        self.template_handlers["smaller block/e"] = self._handle_smaller_block_end
+
+    def _initialize_tag_handlers(self):
+        """Initialize handlers for HTML/XML tags"""
+
+        # Structural Tags
+        self.tag_handlers["section"] = self._handle_section
+        self.tag_handlers["table"] = self._handle_table
+        self.tag_handlers["tr"] = self._handle_table_row
+        self.tag_handlers["td"] = self._handle_table_cell
+
+        # Text Formatting Tags
+        self.tag_handlers["i"] = self._handle_italic
+        self.tag_handlers["br"] = self._handle_line_break
+        self.tag_handlers["span"] = self._handle_span
+
+        # Content Tags
+        self.tag_handlers["dd"] = self._handle_definition_description
+        self.tag_handlers["ref"] = self._handle_reference
+
+        # MediaWiki Specific Tags
+        self.tag_handlers["noinclude"] = self._handle_noinclude
+        self.tag_handlers["pagequality"] = self._handle_pagequality
+
+    def _initialize_preprocessors(self):
+        """Initialize preprocessing functions"""
+        self.preprocessors = [
+            self._fix_noinclude_line_breaks,
+            self._convert_paragraph_breaks,
+            self._normalize_whitespace,
+            self._handle_special_characters,
+            self._extract_metadata,
+        ]
+
+    def _initialize_postprocessors(self):
+        """Initialize postprocessing functions"""
+        self.postprocessors = [
+            self._validate_xml_structure,
+            self._finalize_metadata,
+        ]
+
+    def _initialize_wikilink_handlers(self):
+        """Initialize wikilink processing"""
+        pass
+
+    # -------------------------------------------------------------------------
+    # Core processing
+    # -------------------------------------------------------------------------
+
+    def _process_nested_content(self, content: str, depth: int = 0) -> str:
+        """Recursively process nested templates and other elements"""
+        if depth > 10:
+            return content
+
+        parsed = mwparserfromhell.parse(content)
+        nodes_to_replace = []
+
+        for node in parsed.nodes:
+            if hasattr(node, "name"):  # Template
+                template_name = str(node.name).strip()
+                if template_name in self.template_handlers:
+                    try:
+                        processed_node = self._process_template_with_nesting(node, depth + 1)
+                        replacement = self.template_handlers[template_name](processed_node)
+                        nodes_to_replace.append((node, replacement))
+                    except Exception:
+                        replacement = self.template_handlers[template_name](node)
+                        nodes_to_replace.append((node, replacement))
+                else:
+                    processed_content = self._process_nested_content(str(node), depth + 1)
+                    nodes_to_replace.append((node, processed_content))
+
+            elif hasattr(node, "tag"):  # Tag
+                tag_name = str(node.tag).strip().lower()
+                if tag_name in self.tag_handlers:
+                    try:
+                        processed_node = self._process_tag_with_nesting(node, depth + 1)
+                        replacement = self.tag_handlers[tag_name](processed_node)
+                        nodes_to_replace.append((node, replacement))
+                    except Exception:
+                        replacement = self.tag_handlers[tag_name](node)
+                        nodes_to_replace.append((node, replacement))
+                else:
+                    processed_content = self._process_nested_content(str(node), depth + 1)
+                    nodes_to_replace.append((node, processed_content))
+
+            elif hasattr(node, "__class__") and "Wikilink" in str(node.__class__):
+                try:
+                    replacement = self._handle_wikilink(node)
+                    nodes_to_replace.append((node, replacement))
+                except Exception:
+                    nodes_to_replace.append((node, str(node)))
+
+        for node, replacement in nodes_to_replace:
+            parsed.replace(node, replacement)
+
+        return str(parsed)
+
+    def _process_template_with_nesting(self, template, depth: int = 0) -> object:
+        import copy
+
+        processed_template = copy.deepcopy(template)
+        for param in processed_template.params:
+            if hasattr(param, "value"):
+                processed_value = self._process_nested_content(str(param.value), depth + 1)
+                param.value = processed_value
+        return processed_template
+
+    def _process_tag_with_nesting(self, tag, depth: int = 0) -> object:
+        import copy
+
+        processed_tag = copy.deepcopy(tag)
+        if hasattr(processed_tag, "contents") and processed_tag.contents:
+            processed_contents = self._process_nested_content(
+                str(processed_tag.contents), depth + 1
+            )
+            processed_tag.contents = processed_contents
+        return processed_tag
+
+    # -------------------------------------------------------------------------
+    # Template handlers (JPS1917 defaults)
+    # -------------------------------------------------------------------------
+
+    def _handle_small_caps(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<sc>{content}</sc>"
+
+    def _handle_larger_text(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<larger>{content}</larger>"
+
+    def _handle_x_larger_text(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<x-larger>{content}</x-larger>"
+
+    def _handle_xx_larger_text(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<xx-larger>{content}</xx-larger>"
+
+    def _handle_xxx_larger_text(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<xxx-larger>{content}</xxx-larger>"
+
+    def _handle_smaller_text(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<smaller>{content}</smaller>"
+
+    def _handle_center(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<c>{content}</c>"
+
+    def _handle_right_align(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<right>{content}</right>"
+
+    def _handle_horizontal_rule(self, template) -> str:
+        return "<rule/>"
+
+    def _handle_double_horizontal_rule(self, template) -> str:
+        return "<dhr/>"
+
+    def _handle_no_paragraph(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<nop>{content}</nop>"
+
+    def _handle_verse(self, template) -> str:
+        chapter = str(template.get(1, "")).strip()
+        verse = str(template.get(2, "")).strip()
+        content = str(template.get(3, ""))
+        return f'<verse chapter="{chapter}" verse="{verse}">{content}</verse>'
+
+    def _handle_right_header(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<rh>{content}</rh>"
+
+    def _handle_drop_initial(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<dropinitial>{content}</dropinitial>"
+
+    def _handle_anchor(self, template) -> str:
+        name = str(template.get(1, "")).strip()
+        return f'<anchor name="{name}"/>'
+
+    def _handle_anchor_plus(self, template) -> str:
+        name = str(template.get(1, "")).strip()
+        return f'<anchor-plus name="{name}"/>'
+
+    def _handle_language(self, template) -> str:
+        code = str(template.get(1, "")).strip()
+        content = str(template.get(2, ""))
+        return f'<lang code="{code}">{content}</lang>'
+
+    def _handle_small_refs(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<smallrefs>{content}</smallrefs>"
+
+    def _handle_hws(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<hws>{content}</hws>"
+
+    def _handle_hwe(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<hwe>{content}</hwe>"
+
+    def _handle_asterisks(self, template) -> str:
+        return "<asterisks/>"
+
+    def _handle_reconstruct(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<reconstruct>{content}</reconstruct>"
+
+    def _handle_sic(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<sic>{content}</sic>"
+
+    def _handle_superscript(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<sup>{content}</sup>"
+
+    def _handle_bar(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<bar>{content}</bar>"
+
+    def _handle_gap(self, template) -> str:
+        return "<gap/>"
+
+    def _handle_overfloat_left(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<overfloat_left>{content}</overfloat_left>"
+
+    def _handle_float_right(self, template) -> str:
+        content = str(template.get(1, ""))
+        return f"<float_right>{content}</float_right>"
+
+    def _handle_smaller_block_start(self, template) -> str:
+        return "<smaller_block_start/>"
+
+    def _handle_smaller_block_end(self, template) -> str:
+        return "<smaller_block_end/>"
+
+    # -------------------------------------------------------------------------
+    # Tag handlers (JPS1917 defaults)
+    # -------------------------------------------------------------------------
+
+    def _handle_section(self, tag) -> str:
+        begin = getattr(tag, "attributes", {}).get("begin", "")
+        return f'<section begin="{begin}"/>'
+
+    def _handle_table(self, tag) -> str:
+        contents = getattr(tag, "contents", "") or ""
+        return f"<table>{contents}</table>"
+
+    def _handle_table_row(self, tag) -> str:
+        contents = getattr(tag, "contents", "") or ""
+        return f"<tr>{contents}</tr>"
+
+    def _handle_table_cell(self, tag) -> str:
+        contents = getattr(tag, "contents", "") or ""
+        return f"<td>{contents}</td>"
+
+    def _handle_italic(self, tag) -> str:
+        contents = getattr(tag, "contents", "") or ""
+        return f"<i>{contents}</i>"
+
+    def _handle_line_break(self, tag) -> str:
+        return "<br/>"
+
+    def _handle_span(self, tag) -> str:
+        contents = getattr(tag, "contents", "") or ""
+        return f"<span>{contents}</span>"
+
+    def _handle_definition_description(self, tag) -> str:
+        contents = getattr(tag, "contents", "") or ""
+        return f"<dd>{contents}</dd>"
+
+    def _handle_reference(self, tag) -> str:
+        name = getattr(tag, "attributes", {}).get("name", "")
+        contents = getattr(tag, "contents", "") or ""
+        return f'<ref name="{name}">{contents}</ref>'
+
+    def _handle_noinclude(self, tag) -> str:
+        contents = getattr(tag, "contents", "") or ""
+        return f"<noinclude>{contents}</noinclude>"
+
+    def _handle_pagequality(self, tag) -> str:
+        contents = getattr(tag, "contents", "") or ""
+        return f"<pagequality>{contents}</pagequality>"
+
+    # -------------------------------------------------------------------------
+    # Pre/post processing (JPS1917 defaults)
+    # -------------------------------------------------------------------------
+
+    def _fix_noinclude_line_breaks(self, text: str, metadata: Dict[str, Any]) -> str:
+        return re.sub(r"</noinclude>\n", "</noinclude>", text)
+
+    def _convert_paragraph_breaks(self, text: str, metadata: Dict[str, Any]) -> str:
+        return text.replace("\n\n", "<p/>")
+
+    def _normalize_whitespace(self, text: str, metadata: Dict[str, Any]) -> str:
+        return re.sub(r"[ \t]+", " ", text)
+
+    def _handle_special_characters(self, text: str, metadata: Dict[str, Any]) -> str:
+        # Preserve only minimal escaping at this stage.
+        return text
+
+    def _extract_metadata(self, text: str, metadata: Dict[str, Any]) -> str:
+        metadata.setdefault("length", len(text))
+        return text
+
+    def _validate_xml_structure(self, xml_content: str, metadata: Dict[str, Any]) -> str:
+        # Lightweight sanity check; TEI validation happens later.
+        return xml_content
+
+    def _finalize_metadata(self, xml_content: str, metadata: Dict[str, Any]) -> str:
+        metadata["processed"] = True
+        return xml_content
+
+    # -------------------------------------------------------------------------
+    # Wikilinks
+    # -------------------------------------------------------------------------
+
+    def _handle_wikilink(self, node) -> str:
+        try:
+            title = str(getattr(node, "title", "")).strip()
+            text = str(getattr(node, "text", "")).strip() if getattr(node, "text", None) else ""
+            self.wikilinks.append({"title": title, "text": text})
+            if text:
+                return f'<__link__ title="{title}">{text}</__link__>'
+            return f'<__link__ title="{title}"/>'
+        except Exception:
+            return str(node)
+
+    # -------------------------------------------------------------------------
+    # Public API
+    # -------------------------------------------------------------------------
+
+    def process_wikitext(self, wikitext: str) -> ConversionResult:
+        warnings: List[str] = []
+        errors: List[str] = []
+        metadata: Dict[str, Any] = {}
+
+        text = wikitext or ""
+        for pre in self.preprocessors:
+            try:
+                text = pre(text, metadata)
+            except Exception as e:
+                errors.append(str(e))
+
+        try:
+            xml_content = self._process_nested_content(text)
+        except Exception as e:
+            xml_content = text
+            errors.append(str(e))
+
+        for post in self.postprocessors:
+            try:
+                xml_content = post(xml_content, metadata)
+            except Exception as e:
+                errors.append(str(e))
+
+        return ConversionResult(
+            xml_content=xml_content,
+            metadata=metadata,
+            warnings=warnings,
+            errors=errors,
+            wikilinks=self.wikilinks.copy(),
+        )
+
+    def add_template_handler(self, template_name: str, handler_func):
+        self.template_handlers[template_name] = handler_func
+
+    def add_tag_handler(self, tag_name: str, handler_func):
+        self.tag_handlers[tag_name] = handler_func
+
+    def add_preprocessor(self, preprocessor_func):
+        self.preprocessors.append(preprocessor_func)
+
+    def add_postprocessor(self, postprocessor_func):
+        self.postprocessors.append(postprocessor_func)
+
+    def get_wikilinks(self) -> List[Dict[str, Any]]:
+        return self.wikilinks.copy()
+
+    def clear_wikilinks(self):
+        self.wikilinks.clear()
+
+
+def create_processor() -> MediaWikiProcessor:
+    return MediaWikiProcessor()
+
+
+def process_page(page_content: str) -> ConversionResult:
+    processor = create_processor()
+    return processor.process_wikitext(page_content)
+
diff --git a/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_convert_tsv.py b/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_convert_tsv.py
new file mode 100644
index 0000000..3c440fd
--- /dev/null
+++ b/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_convert_tsv.py
@@ -0,0 +1,134 @@
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+import tempfile
+
+
+from opensiddur.importer.miqra_al_pi_hamasorah.convert_tsv import (
+    _extract_chapter_verse_numbers,
+    main,
+)
+
+
+class TestMiqraConvertTsv(unittest.TestCase):
+    @patch("opensiddur.importer.miqra_al_pi_hamasorah.convert_tsv.validate")
+    def test_only_book_writes_output(self, mock_validate):
+        mock_validate.return_value = (True, [])
+
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            sourcetexts_root = tmp_path / "sources"
+            sheets_dir = sourcetexts_root / "miqra_al_pi_hamasorah" / "sheets"
+            sheets_dir.mkdir(parents=True, exist_ok=True)
+
+            # Minimal README (front matter)
+            (sheets_dir / "readme.tsv").write_text(
+                "License\tCC-BY-SA 4.0\nAttribution\tHebrew Wikisource\n",
+                encoding="utf-8",
+            )
+
+            # Minimal Torah TSV: header + one data row for Genesis 1
+            (sheets_dir / "torah.tsv").write_text(
+                "\t".join(["Page key", "Row id", "Nav", "Scaffold", "Text"])
+                + "\n"
+                + "\t".join(
+                    [
+                        "ספר בראשית/א",
+                        "א",
+                        "",
+                        "{{מ:פסוק|בראשית|1|1}}",
+                        '{{נוסח|{{מ:אות-ג|בְּ}}רֵאשִׁ֖ית|2=test note}}',
+                    ]
+                )
+                + "\n",
+                encoding="utf-8",
+            )
+
+            project_dir = tmp_path / "project"
+            rc = main(
+                [
+                    "--sourcetexts-root",
+                    str(sourcetexts_root),
+                    "--project-dir",
+                    str(project_dir),
+                    "--only-book",
+                    "genesis",
+                ]
+            )
+            self.assertEqual(rc, 0)
+
+            genesis_xml = project_dir / "genesis.xml"
+            self.assertTrue(genesis_xml.exists())
+            xml = genesis_xml.read_text(encoding="utf-8")
+            self.assertIn("<tei:TEI", xml)
+            self.assertIn('unit="verse"', xml)
+            self.assertIn('n="1"', xml)
+            self.assertIn("urn:x-opensiddur:text:bible:genesis/1/1", xml)
+            self.assertIn("<tei:ab>", xml)
+            self.assertIn('<tei:head xml:lang="en">', xml)
+            self.assertIn("Genesis", xml)
+            self.assertIn('rend="large"', xml)
+            self.assertIn("בְּ", xml)
+            self.assertIn("tei:standOff", xml)
+            self.assertIn("test note", xml)
+
+    def test_special_tsv_row_does_not_produce_invalid_urn_segments(self):
+        # special.tsv uses a 2-column schema; must not be merged into book output.
+        ch, v = _extract_chapter_verse_numbers(
+            "ספר שמות/טו תתת",
+            "<noinclude>{{#קטע:שירת הים/צורת השיר|צורת-השיר}}{{מ:טעמי",
+            "",
+        )
+        self.assertEqual(ch, "")
+        self.assertEqual(v, "")
+
+    @patch("opensiddur.importer.miqra_al_pi_hamasorah.convert_tsv.validate")
+    def test_special_tsv_not_merged_into_book(self, mock_validate):
+        mock_validate.return_value = (True, [])
+
+        with tempfile.TemporaryDirectory() as tmp:
+            tmp_path = Path(tmp)
+            sourcetexts_root = tmp_path / "sources"
+            sheets_dir = sourcetexts_root / "miqra_al_pi_hamasorah" / "sheets"
+            sheets_dir.mkdir(parents=True, exist_ok=True)
+
+            (sheets_dir / "torah.tsv").write_text(
+                "\t".join(["Page key", "Row id", "Nav", "Scaffold", "Text"])
+                + "\n"
+                + "\t".join(
+                    [
+                        "ספר שמות/טו",
+                        "א",
+                        "",
+                        "{{מ:פסוק|שמות|15|1}}",
+                        "שירה",
+                    ]
+                )
+                + "\n",
+                encoding="utf-8",
+            )
+            (sheets_dir / "special.tsv").write_text(
+                "ספר שמות/טו תתת\t{{#קטע:שירת הים/צורת השיר|צורת-השיר}}{{מ:טעמי\n",
+                encoding="utf-8",
+            )
+
+            project_dir = tmp_path / "project"
+            main(
+                [
+                    "--sourcetexts-root",
+                    str(sourcetexts_root),
+                    "--project-dir",
+                    str(project_dir),
+                    "--only-book",
+                    "exodus",
+                ]
+            )
+            xml = (project_dir / "exodus.xml").read_text(encoding="utf-8")
+            self.assertIn("urn:x-opensiddur:text:bible:exodus/15/1", xml)
+            self.assertNotIn("צורת-השיר", xml)
+            self.assertNotIn("השיר|", xml)
+
+
+if __name__ == "__main__":
+    unittest.main()
+
diff --git a/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_miqra_wikitext.py b/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_miqra_wikitext.py
new file mode 100644
index 0000000..3a44834
--- /dev/null
+++ b/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_miqra_wikitext.py
@@ -0,0 +1,106 @@
+import unittest
+
+from opensiddur.importer.miqra_al_pi_hamasorah.miqra_wikitext import (
+    link_target_to_uri,
+    normalize_template_name,
+    reset_processor,
+    wikitext_to_intermediate_xml,
+)
+
+
+class TestMiqraWikitext(unittest.TestCase):
+    def setUp(self):
+        reset_processor()
+
+    def test_nosach_nested_large_letter(self):
+        frag = wikitext_to_intermediate_xml(
+            '{{נוסח|{{מ:אות-ג|בְּ}}רֵאשִׁ֖ית|2=note text}}'
+        )
+        self.assertIn("<miqra:variant", frag)
+        self.assertIn('<miqra:hi rend="large">', frag)
+        self.assertIn("בְּ", frag)
+        self.assertIn("<miqra:note", frag)
+        self.assertIn("note text", frag)
+
+    def test_ketiv_qeri(self):
+        frag = wikitext_to_intermediate_xml('{{כו"ק|כתיב|קְרִי}}')
+        self.assertIn('<miqra:kq order="ketiv-first">', frag)
+        self.assertIn("<miqra:ketiv>כתיב</miqra:ketiv>", frag)
+        self.assertIn("<miqra:qeri>קְרִי</miqra:qeri>", frag)
+
+    def test_qeri_ketiv(self):
+        frag = wikitext_to_intermediate_xml('{{קו"כ|כתיב|קְרִי}}')
+        self.assertIn('order="qeri-first"', frag)
+
+    def test_parashah_open(self):
+        frag = wikitext_to_intermediate_xml("{{פפ}}")
+        self.assertIn('<miqra:parashah type="open"', frag)
+
+    def test_strip_pasuk(self):
+        frag = wikitext_to_intermediate_xml("{{מ:פסוק|בראשית|1|1}}")
+        self.assertEqual(frag, "")
+
+    def test_note_link_named_numeric_params(self):
+        frag = wikitext_to_intermediate_xml(
+            "{{מ:קישור בהערה|1=http://www.example.com/doc.pdf|2=label}}"
+        )
+        self.assertIn('target="http://www.example.com/doc.pdf"', frag)
+        self.assertNotIn("1=http", frag)
+        self.assertIn("label", frag)
+
+    def test_internal_note_link_to_wikisource_uri(self):
+        frag = wikitext_to_intermediate_xml(
+            "{{מ:קישור פנימי בהערה|ויקיטקסט:מבוא|פרק שני}}"
+        )
+        self.assertIn('target="https://he.wikisource.org/wiki/', frag)
+        self.assertNotIn("ויקיטקסט:מבוא", frag)
+        self.assertIn("פרק שני", frag)
+
+    def test_link_target_to_uri(self):
+        self.assertEqual(
+            link_target_to_uri("http://example.com/x"),
+            "http://example.com/x",
+        )
+        uri = link_target_to_uri("ויקיטקסט:מבוא")
+        self.assertTrue(uri.startswith("https://he.wikisource.org/wiki/"))
+
+    def test_column_c_double_underscore(self):
+        frag = wikitext_to_intermediate_xml("word__word", column_c=True)
+        self.assertIn("word word", frag)
+
+    def test_all_templates_from_doc_have_handlers(self):
+        """Every template name in templates.tsv examples is recognized."""
+        from pathlib import Path
+        import csv
+        import re
+
+        path = Path(__file__).resolve().parents[4] / "sources" / "miqra_al_pi_hamasorah" / "sheets" / "templates.tsv"
+        if not path.exists():
+            self.skipTest("templates.tsv not in workspace")
+
+        names: set[str] = set()
+        for row in csv.reader(path.open(encoding="utf-8"), delimiter="\t"):
+            for cell in row:
+                for m in re.finditer(r"\{\{([^}|#][^}|#]*?)(?:\|[^}]*)?\}\}", cell):
+                    n = normalize_template_name(m.group(1))
+                    if n and n not in ("documentation", "name", "template", "תבנית"):
+                        names.add(n)
+
+        from opensiddur.importer.miqra_al_pi_hamasorah.miqra_wikitext import (
+            MiqraWikiTextProcessor,
+            _STRIP_TEMPLATES,
+        )
+
+        proc = MiqraWikiTextProcessor()
+        missing = []
+        for n in sorted(names):
+            if n in _STRIP_TEMPLATES or n in proc.template_handlers:
+                continue
+            if proc._lookup_handler(n) is not None:
+                continue
+            missing.append(n)
+        self.assertEqual(missing, [], f"Unhandled templates: {missing}")
+
+
+if __name__ == "__main__":
+    unittest.main()

From b6e83609897871745f890259785496924a93d60b Mon Sep 17 00:00:00 2001
From: Efraim Feinstein <efraim.feinstein@gmail.com>
Date: Wed, 27 May 2026 22:33:51 -0700
Subject: [PATCH 03/10] wip: miqra with notes, still some issues on rendering

---
 opensiddur/exporter/tex/reledmac.xslt         |  38 ++-
 .../miqra_al_pi_hamasorah/miqra_to_tei.xslt   | 233 ++++++++++++------
 .../tests/exporter/test_reledmac_xslt.py      |  40 ++-
 .../miqra_al_pi_hamasorah/test_convert_tsv.py |  20 +-
 4 files changed, 241 insertions(+), 90 deletions(-)

diff --git a/opensiddur/exporter/tex/reledmac.xslt b/opensiddur/exporter/tex/reledmac.xslt
index 82d7980..a06bcd2 100644
--- a/opensiddur/exporter/tex/reledmac.xslt
+++ b/opensiddur/exporter/tex/reledmac.xslt
@@ -539,7 +539,7 @@
                             <xsl:text>\pend&#10;</xsl:text>
                         </xsl:if>
                         <xsl:text>\eledchapter{</xsl:text>
-                        <xsl:value-of select="f:escape-tex(string(@title))"/>
+                        <xsl:value-of select="f:format-section-title(string(@title), string(@xml:lang))"/>
                         <xsl:text>}&#10;</xsl:text>
                         <xsl:next-iteration>
                             <xsl:with-param name="in-pstart" select="false()"/>
@@ -550,7 +550,7 @@
                             <xsl:text>\pend&#10;</xsl:text>
                         </xsl:if>
                         <xsl:text>\eledsubsection{</xsl:text>
-                        <xsl:value-of select="f:escape-tex(string(@title))"/>
+                        <xsl:value-of select="f:format-section-title(string(@title), string(@xml:lang))"/>
                         <xsl:text>}&#10;</xsl:text>
                         <xsl:next-iteration>
                             <xsl:with-param name="in-pstart" select="false()"/>
@@ -659,8 +659,10 @@
 
     <xsl:template match="tei:body/tei:div" mode="leaves">
         <xsl:if test="tei:head">
+            <xsl:variable name="head" select="tei:head[1]"/>
             <xsl:element name="f:eledpart" namespace="urn:opensiddur:reledmac">
-                <xsl:attribute name="title" select="normalize-space(string-join(tei:head//text(), ''))"/>
+                <xsl:attribute name="title" select="normalize-space(string-join($head//text(), ''))"/>
+                <xsl:attribute name="xml:lang" select="f:section-title-lang($head)"/>
             </xsl:element>
         </xsl:if>
         <xsl:apply-templates select="node()[not(self::tei:head)]" mode="leaves"/>
@@ -668,8 +670,10 @@
 
     <xsl:template match="tei:div" mode="leaves" priority="-1">
         <xsl:if test="tei:head">
+            <xsl:variable name="head" select="tei:head[1]"/>
             <xsl:element name="f:eledsubsection" namespace="urn:opensiddur:reledmac">
-                <xsl:attribute name="title" select="normalize-space(string-join(tei:head//text(), ''))"/>
+                <xsl:attribute name="title" select="normalize-space(string-join($head//text(), ''))"/>
+                <xsl:attribute name="xml:lang" select="f:section-title-lang($head)"/>
             </xsl:element>
         </xsl:if>
         <xsl:apply-templates select="node()[not(self::tei:head)]" mode="leaves"/>
@@ -893,6 +897,32 @@
         <xsl:sequence select="count($ctx/preceding::tei:note[not(@type='instruction') and not(ancestor::tei:standOff)])"/>
     </xsl:function>
 
+    <!-- Language for tei:head used in \eledchapter/\eledsubsection titles. -->
+    <xsl:function name="f:section-title-lang" as="xs:string">
+        <xsl:param name="head" as="element(tei:head)"/>
+        <xsl:sequence select="string((
+            $head/@xml:lang,
+            $head/ancestor::tei:div[@xml:lang][1]/@xml:lang,
+            $head/ancestor::tei:TEI[@xml:lang][1]/@xml:lang
+        )[1])"/>
+    </xsl:function>
+
+    <!-- Hebrew titles stay in the stream direction; other languages need an
+         explicit LTR wrapper so Latin text is not reversed in RTL blocks. -->
+    <xsl:function name="f:format-section-title" as="xs:string">
+        <xsl:param name="title" as="xs:string"/>
+        <xsl:param name="lang" as="xs:string"/>
+        <xsl:variable name="escaped" select="f:escape-tex($title)"/>
+        <xsl:choose>
+            <xsl:when test="$lang = 'he' or starts-with($lang, 'he-')">
+                <xsl:sequence select="$escaped"/>
+            </xsl:when>
+            <xsl:otherwise>
+                <xsl:sequence select="concat('{\textdir TLT\selectlanguage{english}', $escaped, '}')"/>
+            </xsl:otherwise>
+        </xsl:choose>
+    </xsl:function>
+
     <xsl:function name="f:escape-tex" as="xs:string">
         <xsl:param name="s" as="xs:string"/>
         <!-- Escape characters that have special meaning in LaTeX. Order matters:
diff --git a/opensiddur/importer/miqra_al_pi_hamasorah/miqra_to_tei.xslt b/opensiddur/importer/miqra_al_pi_hamasorah/miqra_to_tei.xslt
index f080fd9..c57ec6c 100644
--- a/opensiddur/importer/miqra_al_pi_hamasorah/miqra_to_tei.xslt
+++ b/opensiddur/importer/miqra_al_pi_hamasorah/miqra_to_tei.xslt
@@ -5,10 +5,37 @@
   xmlns:j="http://jewishliturgy.org/ns/jlptei/2"
   xmlns:miqra="urn:x-opensiddur:miqra:intermediate"
   xmlns:mw="urn:x-opensiddur:mw:intermediate"
-  exclude-result-prefixes="miqra mw">
+  xmlns:xs="http://www.w3.org/2001/XMLSchema"
+  exclude-result-prefixes="miqra mw xs">
 
   <xsl:output method="xml" omit-xml-declaration="yes" indent="no"/>
 
+  <xsl:function name="miqra:parashah-p-type" as="xs:string">
+    <xsl:param name="type" as="xs:string?"/>
+    <xsl:variable name="t" select="normalize-space($type)"/>
+    <xsl:choose>
+      <xsl:when test="$t = 'open-line'">
+        <xsl:sequence select="'open-3'"/>
+      </xsl:when>
+      <xsl:when test="$t = ('close', 'close-inline', 'close-narrow', 'shirah')">
+        <xsl:sequence select="'closed-1'"/>
+      </xsl:when>
+      <xsl:otherwise>
+        <xsl:sequence select="'open-1'"/>
+      </xsl:otherwise>
+    </xsl:choose>
+  </xsl:function>
+
+  <xsl:function name="miqra:has-verse-ref" as="xs:boolean">
+    <xsl:param name="chapter" as="xs:string"/>
+    <xsl:param name="verse" as="xs:string"/>
+    <xsl:sequence select="
+      $chapter != '' and $verse != ''
+      and matches($chapter, '^[0-9]+$')
+      and matches($verse, '^[0-9]+$')
+    "/>
+  </xsl:function>
+
   <xsl:template match="/">
     <xsl:result-document href="body">
       <tei:body>
@@ -20,7 +47,22 @@
           <tei:head xml:lang="en">
             <xsl:value-of select="/miqra:book/@bookNameEn"/>
           </tei:head>
-          <xsl:apply-templates select="/miqra:book/miqra:row"/>
+          <xsl:variable name="blocks" as="node()*">
+            <xsl:apply-templates select="/miqra:book/miqra:row" mode="flatten"/>
+          </xsl:variable>
+          <xsl:for-each-group select="$blocks" group-starting-with="miqra:parashah">
+            <tei:p>
+              <xsl:attribute name="type">
+                <xsl:choose>
+                  <xsl:when test="current-group()[1] instance of element(miqra:parashah)">
+                    <xsl:sequence select="miqra:parashah-p-type(string((current-group()[1]/@type)))"/>
+                  </xsl:when>
+                  <xsl:otherwise>open-1</xsl:otherwise>
+                </xsl:choose>
+              </xsl:attribute>
+              <xsl:apply-templates select="current-group()[not(self::miqra:parashah)]" mode="block"/>
+            </tei:p>
+          </xsl:for-each-group>
         </tei:div>
       </tei:body>
     </xsl:result-document>
@@ -33,17 +75,52 @@
     </xsl:if>
   </xsl:template>
 
-  <xsl:template match="miqra:row">
+  <!-- Flatten each TSV row into nav markers + verse runs. -->
+  <xsl:template match="miqra:row" mode="flatten">
+    <!-- Column C: only parashah markers structure paragraphs; // line breaks are cosmetic. -->
+    <xsl:apply-templates select="miqra:nav/miqra:parashah" mode="flatten"/>
+    <xsl:choose>
+      <xsl:when test="miqra:text/miqra:parashah">
+        <xsl:for-each-group select="miqra:text/node()" group-starting-with="miqra:parashah">
+          <xsl:apply-templates select="current-group()[self::miqra:parashah]" mode="flatten"/>
+          <xsl:if test="current-group()[not(self::miqra:parashah)]">
+            <miqra:verse chapter="{@chapter}" verse="{@verse}" fileName="{ancestor::miqra:book/@fileName}">
+              <xsl:copy-of select="current-group()[not(self::miqra:parashah)]/node()[not(self::miqra:note)]"/>
+            </miqra:verse>
+          </xsl:if>
+        </xsl:for-each-group>
+      </xsl:when>
+      <xsl:otherwise>
+        <miqra:verse chapter="{@chapter}" verse="{@verse}" fileName="{ancestor::miqra:book/@fileName}">
+          <xsl:copy-of select="miqra:text/node()[not(self::miqra:note)]"/>
+        </miqra:verse>
+      </xsl:otherwise>
+    </xsl:choose>
+  </xsl:template>
+
+  <xsl:template match="miqra:parashah" mode="flatten">
+    <xsl:copy-of select="."/>
+  </xsl:template>
+
+  <xsl:template match="miqra:lb" mode="flatten">
+    <xsl:copy-of select="."/>
+  </xsl:template>
+
+  <xsl:template match="text()[normalize-space(.) = '']" mode="flatten"/>
+
+  <xsl:template match="node()" mode="flatten">
+    <xsl:copy-of select="."/>
+  </xsl:template>
+
+  <!-- Verse milestone + text (no tei:ab wrapper). -->
+  <xsl:template match="miqra:verse" mode="block">
     <xsl:variable name="chapter" select="normalize-space(@chapter)"/>
     <xsl:variable name="verse" select="normalize-space(@verse)"/>
-    <xsl:variable name="has-verse-ref"
-      select="$chapter != '' and $verse != '' and matches($chapter, '^[0-9]+$') and matches($verse, '^[0-9]+$')"/>
-
-    <xsl:if test="$has-verse-ref">
+    <xsl:if test="miqra:has-verse-ref($chapter, $verse)">
       <tei:milestone unit="verse" n="{$verse}">
         <xsl:attribute name="corresp">
           <xsl:text>urn:x-opensiddur:text:bible:</xsl:text>
-          <xsl:value-of select="/miqra:book/@fileName"/>
+          <xsl:value-of select="@fileName"/>
           <xsl:text>/</xsl:text>
           <xsl:value-of select="$chapter"/>
           <xsl:text>/</xsl:text>
@@ -51,96 +128,95 @@
         </xsl:attribute>
       </tei:milestone>
     </xsl:if>
-    <tei:ab>
-      <xsl:apply-templates select="miqra:text/node()"/>
-    </tei:ab>
+    <xsl:apply-templates select="node()" mode="inline"/>
   </xsl:template>
 
-  <!-- Strip nav/scaffold from body output -->
-  <xsl:template match="miqra:nav | miqra:scaffold"/>
+  <xsl:template match="miqra:lb" mode="block">
+    <tei:lb/>
+  </xsl:template>
+
+  <!-- Strip nav/scaffold from direct processing -->
+  <xsl:template match="miqra:nav | miqra:scaffold | miqra:row"/>
 
   <!-- Variant documentation (נוסח) -->
-  <xsl:template match="miqra:variant">
+  <xsl:template match="miqra:variant" mode="inline">
     <xsl:if test="@noteId">
-      <tei:anchor>
+      <!-- Use tei:seg instead of tei:anchor because the compiler inserts resolved
+           annotations as children of the referenced element; tei:anchor must be empty. -->
+      <tei:seg>
         <xsl:attribute name="xml:id" select="concat(@noteId, '-ref')"/>
-      </tei:anchor>
+      </tei:seg>
     </xsl:if>
-    <xsl:apply-templates select="miqra:display/node()"/>
+    <xsl:apply-templates select="miqra:display" mode="inline"/>
+  </xsl:template>
+
+  <xsl:template match="miqra:display" mode="inline">
+    <xsl:apply-templates mode="inline"/>
   </xsl:template>
 
   <xsl:template match="miqra:note" mode="standoff">
     <tei:note>
       <xsl:copy-of select="@xml:id"/>
-      <xsl:apply-templates/>
+      <!-- Link this standOff note to the in-text marker so the reference database
+           can index it and the compiler can inline it at the correct point. -->
+      <xsl:attribute name="target" select="concat('#', string(@xml:id), '-ref')"/>
+      <xsl:apply-templates mode="inline"/>
     </tei:note>
   </xsl:template>
 
+  <xsl:template match="miqra:note" mode="inline"/>
   <xsl:template match="miqra:note"/>
 
   <!-- Ketiv/qeri -->
-  <xsl:template match="miqra:kq">
+  <xsl:template match="miqra:kq" mode="inline">
     <tei:choice>
       <xsl:choose>
         <xsl:when test="@order = 'qeri-first'">
           <j:read>
-            <xsl:apply-templates select="miqra:qeri/node() | miqra:bracketed/node()"/>
+            <xsl:apply-templates select="miqra:qeri/node() | miqra:bracketed/node()" mode="inline"/>
           </j:read>
           <j:written>
-            <xsl:apply-templates select="miqra:ketiv/node()"/>
+            <xsl:apply-templates select="miqra:ketiv/node()" mode="inline"/>
           </j:written>
         </xsl:when>
         <xsl:otherwise>
           <j:written>
-            <xsl:apply-templates select="miqra:ketiv/node()"/>
+            <xsl:apply-templates select="miqra:ketiv/node()" mode="inline"/>
           </j:written>
           <j:read>
-            <xsl:apply-templates select="miqra:qeri/node() | miqra:bracketed/node()"/>
+            <xsl:apply-templates select="miqra:qeri/node() | miqra:bracketed/node()" mode="inline"/>
           </j:read>
         </xsl:otherwise>
       </xsl:choose>
     </tei:choice>
   </xsl:template>
 
-  <xsl:template match="miqra:bracketed">
+  <xsl:template match="miqra:bracketed" mode="inline">
     <xsl:text>[</xsl:text>
-    <xsl:apply-templates/>
+    <xsl:apply-templates mode="inline"/>
     <xsl:text>]</xsl:text>
   </xsl:template>
 
-  <xsl:template match="miqra:kq-matres"/>
+  <xsl:template match="miqra:kq-matres" mode="inline"/>
 
-  <xsl:template match="miqra:ketiv-only">
+  <xsl:template match="miqra:ketiv-only" mode="inline">
     <tei:hi rend="ketiv-only">
       <xsl:text>(</xsl:text>
-      <xsl:apply-templates/>
+      <xsl:apply-templates mode="inline"/>
       <xsl:text>)</xsl:text>
     </tei:hi>
   </xsl:template>
 
-  <xsl:template match="miqra:qeri-only">
+  <xsl:template match="miqra:qeri-only" mode="inline">
     <tei:hi rend="qeri-only">
       <xsl:text>[</xsl:text>
-      <xsl:apply-templates/>
+      <xsl:apply-templates mode="inline"/>
       <xsl:text>]</xsl:text>
     </tei:hi>
   </xsl:template>
 
-  <!-- Parashah / poetic layout -->
-  <xsl:template match="miqra:parashah[@type = 'open']">
-    <tei:lb/>
-  </xsl:template>
-  <xsl:template match="miqra:parashah[@type = 'open-line']">
-    <tei:lb type="first"/>
-  </xsl:template>
-  <xsl:template match="miqra:parashah[@type = 'close']">
-    <tei:lb/>
-  </xsl:template>
-  <xsl:template match="miqra:parashah[@type = 'close-inline' or @type = 'close-narrow' or @type = 'shirah']">
-    <tei:lb/>
-  </xsl:template>
-
-  <xsl:template match="miqra:poetic">
+  <!-- Poetic layout (within a paragraph) -->
+  <xsl:template match="miqra:poetic" mode="inline">
     <tei:lb>
       <xsl:if test="@level != '0'">
         <xsl:attribute name="type">indent</xsl:attribute>
@@ -148,78 +224,69 @@
     </tei:lb>
   </xsl:template>
 
-  <xsl:template match="miqra:lb">
-    <tei:lb/>
-  </xsl:template>
-
-  <xsl:template match="miqra:centered">
+  <xsl:template match="miqra:centered" mode="inline">
     <tei:hi rend="centered">
-      <xsl:apply-templates/>
+      <xsl:apply-templates mode="inline"/>
     </tei:hi>
   </xsl:template>
 
-  <!-- Letter formatting -->
-  <xsl:template match="miqra:hi">
+  <xsl:template match="miqra:hi" mode="inline">
     <tei:hi>
       <xsl:attribute name="rend" select="@rend"/>
-      <xsl:apply-templates/>
+      <xsl:apply-templates mode="inline"/>
     </tei:hi>
   </xsl:template>
 
-  <xsl:template match="miqra:dotted">
-    <xsl:apply-templates/>
-  </xsl:template>
-
-  <xsl:template match="miqra:inverted-nun">
-    <xsl:apply-templates/>
+  <xsl:template match="miqra:dotted | miqra:inverted-nun" mode="inline">
+    <xsl:apply-templates mode="inline"/>
   </xsl:template>
 
-  <xsl:template match="miqra:yerushalem | miqra:yerushalema">
+  <xsl:template match="miqra:yerushalem | miqra:yerushalema" mode="inline">
     <xsl:value-of select="@vowel"/>
     <xsl:value-of select="@accent"/>
     <xsl:text>&#x034F;ִ</xsl:text>
   </xsl:template>
 
-  <xsl:template match="miqra:accent">
+  <xsl:template match="miqra:accent" mode="inline">
     <xsl:text> </xsl:text>
   </xsl:template>
 
-  <xsl:template match="miqra:qupo-accent"/>
+  <xsl:template match="miqra:qupo-accent" mode="inline"/>
 
-  <xsl:template match="miqra:punct">
+  <xsl:template match="miqra:punct | miqra:maqaf" mode="inline">
     <xsl:value-of select="."/>
   </xsl:template>
 
-  <xsl:template match="miqra:maqaf">
-    <xsl:value-of select="."/>
-  </xsl:template>
-
-  <xsl:template match="miqra:fn-mark">
+  <xsl:template match="miqra:fn-mark" mode="inline">
     <tei:hi rend="sup">*</tei:hi>
   </xsl:template>
 
-  <xsl:template match="miqra:anchor">
-    <tei:anchor>
+  <xsl:template match="miqra:anchor" mode="inline">
+    <!-- Use tei:seg instead of tei:anchor because annotations get inserted as children. -->
+    <tei:seg>
       <xsl:copy-of select="@xml:id"/>
-    </tei:anchor>
+    </tei:seg>
   </xsl:template>
 
-  <xsl:template match="miqra:line-anchor | miqra:segment | miqra:good-ending | miqra:dual-trope-link | miqra:dual-accent | miqra:strand"/>
+  <xsl:template match="miqra:line-anchor | miqra:segment | miqra:good-ending | miqra:dual-trope-link | miqra:dual-accent | miqra:strand" mode="inline"/>
+
+  <xsl:template match="miqra:parashah" mode="block"/>
+  <xsl:template match="miqra:parashah" mode="inline"/>
 
   <!-- Legacy mw elements -->
-  <xsl:template match="mw:hi">
+  <xsl:template match="mw:hi" mode="inline">
     <tei:hi>
       <xsl:attribute name="rend" select="@rend"/>
-      <xsl:apply-templates/>
+      <xsl:apply-templates mode="inline"/>
     </tei:hi>
   </xsl:template>
 
-  <xsl:template match="mw:link">
+  <xsl:template match="mw:link" mode="inline">
     <xsl:choose>
       <xsl:when test="normalize-space(.) != ''">
         <tei:ref>
           <xsl:attribute name="target" select="@target"/>
-          <xsl:apply-templates/>
+          <xsl:apply-templates mode="inline"/>
         </tei:ref>
       </xsl:when>
       <xsl:otherwise>
@@ -228,15 +295,19 @@
     </xsl:choose>
   </xsl:template>
 
-  <xsl:template match="mw:template">
-    <xsl:apply-templates select="mw:param/node()"/>
+  <xsl:template match="mw:template" mode="inline">
+    <xsl:apply-templates select="mw:param/node()" mode="inline"/>
+  </xsl:template>
+
+  <xsl:template match="mw:param" mode="inline">
+    <xsl:value-of select="."/>
   </xsl:template>
 
-  <xsl:template match="mw:param">
+  <xsl:template match="text()" mode="inline">
     <xsl:value-of select="."/>
   </xsl:template>
 
-  <xsl:template match="text()">
+  <xsl:template match="text()" mode="block">
     <xsl:value-of select="."/>
   </xsl:template>
 
diff --git a/opensiddur/tests/exporter/test_reledmac_xslt.py b/opensiddur/tests/exporter/test_reledmac_xslt.py
index e67d5fe..18c7d2d 100644
--- a/opensiddur/tests/exporter/test_reledmac_xslt.py
+++ b/opensiddur/tests/exporter/test_reledmac_xslt.py
@@ -565,8 +565,44 @@ def test_div_head_emits_sectioning(self):
           </tei:body></tei:text>
         </tei:TEI>"""
         out = _transform(xml)
-        # Top-level body div with head → \eledchapter
-        self.assertIn(r"\eledchapter{Genesis}", out)
+        # Top-level body div with head → \eledchapter (LTR wrapper when not Hebrew)
+        self.assertIn(
+            r"\eledchapter{{\textdir TLT\selectlanguage{english}Genesis}}",
+            out,
+        )
+
+    def test_english_head_in_hebrew_document_uses_ltr_wrapper(self):
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+        <tei:TEI xmlns:tei="http://www.tei-c.org/ns/1.0" xml:lang="he">
+          <tei:text><tei:body>
+            <tei:div type="book">
+              <tei:head xml:lang="en">Genesis</tei:head>
+              <tei:p><tei:milestone unit="verse" n="1"/>בְּרֵאשִׁית</tei:p>
+            </tei:div>
+          </tei:body></tei:text>
+        </tei:TEI>"""
+        out = _transform(xml)
+        self.assertIn(
+            r"\eledchapter{{\textdir TLT\selectlanguage{english}Genesis}}",
+            out,
+        )
+
+    def test_hebrew_head_in_hebrew_document_has_no_ltr_wrapper(self):
+        xml = """<?xml version="1.0" encoding="UTF-8"?>
+        <tei:TEI xmlns:tei="http://www.tei-c.org/ns/1.0" xml:lang="he">
+          <tei:text><tei:body>
+            <tei:div type="book">
+              <tei:head>בראשית</tei:head>
+              <tei:p><tei:milestone unit="verse" n="1"/>בְּרֵאשִׁית</tei:p>
+            </tei:div>
+          </tei:body></tei:text>
+        </tei:TEI>"""
+        out = _transform(xml)
+        self.assertIn(r"\eledchapter{בראשית}", out)
+        self.assertNotIn(
+            r"\eledchapter{{\textdir TLT\selectlanguage{english}בראשית}}",
+            out,
+        )
 
 
 if __name__ == "__main__":
diff --git a/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_convert_tsv.py b/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_convert_tsv.py
index 3c440fd..a2be3e5 100644
--- a/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_convert_tsv.py
+++ b/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_convert_tsv.py
@@ -27,7 +27,7 @@ def test_only_book_writes_output(self, mock_validate):
                 encoding="utf-8",
             )
 
-            # Minimal Torah TSV: header + one data row for Genesis 1
+            # Torah TSV: parashah in nav + two verses in one paragraph
             (sheets_dir / "torah.tsv").write_text(
                 "\t".join(["Page key", "Row id", "Nav", "Scaffold", "Text"])
                 + "\n"
@@ -35,11 +35,21 @@ def test_only_book_writes_output(self, mock_validate):
                     [
                         "ספר בראשית/א",
                         "א",
-                        "",
+                        "//{{פפ}}//",
                         "{{מ:פסוק|בראשית|1|1}}",
                         '{{נוסח|{{מ:אות-ג|בְּ}}רֵאשִׁ֖ית|2=test note}}',
                     ]
                 )
+                + "\n"
+                + "\t".join(
+                    [
+                        "ספר בראשית/א",
+                        "ב",
+                        "",
+                        "{{מ:פסוק|בראשית|1|2}}",
+                        "וְהָאָ֗רֶץ הָיְתָ֥ה תֹ֙הוּ֙ וָבֹ֔הוּ",
+                    ]
+                )
                 + "\n",
                 encoding="utf-8",
             )
@@ -64,13 +74,17 @@ def test_only_book_writes_output(self, mock_validate):
             self.assertIn('unit="verse"', xml)
             self.assertIn('n="1"', xml)
             self.assertIn("urn:x-opensiddur:text:bible:genesis/1/1", xml)
-            self.assertIn("<tei:ab>", xml)
+            self.assertNotIn("<tei:ab>", xml)
+            self.assertIn('<tei:p type="open-1">', xml)
+            self.assertIn("וְהָאָ֗רֶץ", xml)
             self.assertIn('<tei:head xml:lang="en">', xml)
             self.assertIn("Genesis", xml)
             self.assertIn('rend="large"', xml)
             self.assertIn("בְּ", xml)
             self.assertIn("tei:standOff", xml)
             self.assertIn("test note", xml)
+            # Standoff notes must link to the in-text marker for annotation resolution
+            self.assertIn('target="#miqra-note-1-ref', xml)
 
     def test_special_tsv_row_does_not_produce_invalid_urn_segments(self):
         # special.tsv uses a 2-column schema; must not be merged into book output.

From 643d6439eedf9ebf8d5b43a96de0b39a1c8d03bc Mon Sep 17 00:00:00 2001
From: Efraim Feinstein <efraim.feinstein@gmail.com>
Date: Wed, 27 May 2026 22:50:09 -0700
Subject: [PATCH 04/10] wip: fix some rtl/ltr bugs in the tex renderer

---
 opensiddur/exporter/tex/bibtex.xslt     | 24 +++++++++++++++++++++++-
 opensiddur/tests/exporter/test_latex.py | 15 +++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/opensiddur/exporter/tex/bibtex.xslt b/opensiddur/exporter/tex/bibtex.xslt
index 6b4c83f..d9cbe8d 100644
--- a/opensiddur/exporter/tex/bibtex.xslt
+++ b/opensiddur/exporter/tex/bibtex.xslt
@@ -3,12 +3,34 @@
     xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
     xmlns:tei="http://www.tei-c.org/ns/1.0"
     xmlns:j="http://jewishliturgy.org/ns/jlptei/2"
+    xmlns:xs="http://www.w3.org/2001/XMLSchema"
     exclude-result-prefixes="tei j">
 
   <xsl:output method="text" encoding="UTF-8" indent="no"/>
   
   <xsl:strip-space elements="*"/>
 
+  <xsl:variable name="xml-ns" select="'http://www.w3.org/XML/1998/namespace'"/>
+
+  <xsl:function name="j:in-scope-lang" as="xs:string">
+    <xsl:param name="node" as="node()"/>
+    <xsl:sequence select="string(($node/ancestor-or-self::*[@xml:lang][1]/@xml:lang)[1])"/>
+  </xsl:function>
+
+  <xsl:function name="j:bibtex-field-value" as="xs:string">
+    <xsl:param name="node" as="element()"/>
+    <xsl:variable name="lang" select="j:in-scope-lang($node)"/>
+    <xsl:variable name="v" select="normalize-space(string($node))"/>
+    <xsl:choose>
+      <xsl:when test="$lang = 'he' or starts-with($lang, 'he-')">
+        <xsl:sequence select="concat('\texthebrew{', $v, '}')"/>
+      </xsl:when>
+      <xsl:otherwise>
+        <xsl:sequence select="$v"/>
+      </xsl:otherwise>
+    </xsl:choose>
+  </xsl:function>
+
   <!-- Root template -->
   <xsl:template match="/">
     <xsl:apply-templates select="//tei:bibl"/>
@@ -215,7 +237,7 @@
       <xsl:text>  </xsl:text>
       <xsl:value-of select="$field-name"/>
       <xsl:text> = {</xsl:text>
-      <xsl:value-of select="normalize-space(.)"/>
+      <xsl:value-of select="j:bibtex-field-value(.)"/>
       <xsl:text>},&#10;</xsl:text>
     </xsl:if>
   </xsl:template>
diff --git a/opensiddur/tests/exporter/test_latex.py b/opensiddur/tests/exporter/test_latex.py
index aa9f537..ffd14b0 100644
--- a/opensiddur/tests/exporter/test_latex.py
+++ b/opensiddur/tests/exporter/test_latex.py
@@ -242,6 +242,21 @@ def test_dedupes_when_multiple_files_share_index(self):
         preamble, _ = extract_sources([f1, f2])
         self.assertEqual(preamble.count("@"), 1)
 
+    def test_bibtex_wraps_hebrew_fields_in_texthebrew(self):
+        index = """<?xml version="1.0"?>
+        <root xmlns:tei="http://www.tei-c.org/ns/1.0">
+          <tei:listBibl>
+            <tei:bibl>
+              <tei:title xml:lang="he">מקרא על פי המסורה</tei:title>
+              <tei:editor>Avi Kadish</tei:editor>
+            </tei:bibl>
+          </tei:listBibl>
+        </root>""".encode("utf-8")
+        doc = self._create("p", "doc.xml", b"<root/>")
+        self._create("p", "index.xml", index)
+        preamble, _ = extract_sources([doc])
+        self.assertIn(r"title = {\texthebrew{מקרא על פי המסורה}}", preamble)
+
 
 class TestGetFileReferences(unittest.TestCase):
 

From 62e647c482bfeff1e1435e8bf7f0af1ceef6ba0c Mon Sep 17 00:00:00 2001
From: Efraim Feinstein <efraim.feinstein@gmail.com>
Date: Thu, 28 May 2026 20:36:24 -0700
Subject: [PATCH 05/10] wip: make sure all templates are covered

---
 opensiddur/exporter/tex/reledmac.xslt         |  4 ++-
 .../miqra_al_pi_hamasorah/miqra_wikitext.py   | 20 +++++++++++++
 .../tests/exporter/test_reledmac_xslt.py      |  2 +-
 .../test_miqra_wikitext.py                    | 30 +++++++++++++++++++
 4 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/opensiddur/exporter/tex/reledmac.xslt b/opensiddur/exporter/tex/reledmac.xslt
index a06bcd2..c466758 100644
--- a/opensiddur/exporter/tex/reledmac.xslt
+++ b/opensiddur/exporter/tex/reledmac.xslt
@@ -729,7 +729,9 @@
         <!-- Ensure we're in horizontal mode before forcing a linebreak.
              This avoids \"There's no line here to end\" when lb occurs at the
              start of a paragraph/block. -->
-        <xsl:text>\leavevmode\\&#10;</xsl:text>
+        <!-- Add an empty brace group so a following `[` at the start of the next
+             line is not parsed as the optional length argument to `\\`. -->
+        <xsl:text>\leavevmode\\{}&#10;</xsl:text>
     </xsl:template>
 
     <!-- tei:anchor: linkage ids only; editorial notes are already inlined in the body. -->
diff --git a/opensiddur/importer/miqra_al_pi_hamasorah/miqra_wikitext.py b/opensiddur/importer/miqra_al_pi_hamasorah/miqra_wikitext.py
index 6ca5e25..b220304 100644
--- a/opensiddur/importer/miqra_al_pi_hamasorah/miqra_wikitext.py
+++ b/opensiddur/importer/miqra_al_pi_hamasorah/miqra_wikitext.py
@@ -31,8 +31,12 @@
         "מ:ספר חדש",
         "מ:רווח בתרי עשר",
         "רווח בתרי עשר",
+        "מ:רווח בתרי עשר בפסוק הראשון",
         "מ:רווח לספר בתהלים",
         "רווח לספר בתהלים",
+        "מ:רווח לספר בתהלים בפסוק הראשון",
+        "ניווט טעמים",
+        "שם הדף המלא",
         "מ:אין פרשה בתחילת פרק",
         'מ:אין פרשה בתחילת פרק בספרי אמ"ת',
         "מ:אין רווח של פרשה בתחילת פרשת השבוע",
@@ -235,7 +239,9 @@ def _register_template_handlers(self) -> None:
         h("מ:ירושלם", self._handle_yerushalem)
         h("מ:ירושלמה", self._handle_yerushalema)
         h("ירח בן יומו", self._handle_accent_yerah)
+        h("ירח בן יומו-2", self._handle_accent_with_word)
         h("גלגל", self._handle_accent_galgal)
+        h("גלגל-2", self._handle_accent_with_word)
         h("אתנח הפוך", self._handle_accent_etnah)
         h("מ:קמץ", self._handle_qamats)
         h("מ:טעם ומתג באות אחת", self._handle_taam_meteg)
@@ -252,6 +258,8 @@ def _register_template_handlers(self) -> None:
         h("מ:לגרמיה-2", self._handle_legarmeh)
         h("מ:פסק", self._handle_paseq)
         h("מ:מקף אפור", self._handle_grey_maqaf)
+        h("מ:דחי", self._handle_dechi)
+        h("מ:צינור", self._handle_tzinor)
 
         h("מ:הערה", self._handle_mam_note)
         h("עוגן בשורה", self._handle_line_anchor)
@@ -521,6 +529,10 @@ def _handle_accent_yerah(self, template) -> str:
     def _handle_accent_galgal(self, template) -> str:
         return '<miqra:accent type="galgal"/>'
 
+    def _handle_accent_with_word(self, template) -> str:
+        # Word param already includes the accent (galgal / yerah ben yomo).
+        return self._p(self._param(template, 1))
+
     def _handle_accent_etnah(self, template) -> str:
         return '<miqra:accent type="etnah-hafukh"/>'
 
@@ -562,6 +574,14 @@ def _handle_paseq(self, template) -> str:
     def _handle_grey_maqaf(self, template) -> str:
         return '<miqra:maqaf rend="grey">־</miqra:maqaf>'
 
+    def _handle_dechi(self, template) -> str:
+        # Wikisource shows param 1; param 2 marks the dechi (offset accent) form.
+        return self._p(self._param(template, 1))
+
+    def _handle_tzinor(self, template) -> str:
+        # Wikisource shows param 1; param 2 marks the tzinor accent placement.
+        return self._p(self._param(template, 1))
+
     def _handle_mam_note(self, template) -> str:
         body = self._p(self._param(template, 1))
         note_id = self._next_note_id()
diff --git a/opensiddur/tests/exporter/test_reledmac_xslt.py b/opensiddur/tests/exporter/test_reledmac_xslt.py
index 18c7d2d..3aca2e8 100644
--- a/opensiddur/tests/exporter/test_reledmac_xslt.py
+++ b/opensiddur/tests/exporter/test_reledmac_xslt.py
@@ -533,7 +533,7 @@ def test_lb_emits_leavevmode_linebreak(self):
           </tei:p></tei:body></tei:text>
         </tei:TEI>"""
         out = _transform(xml)
-        self.assertIn(r"\leavevmode\\", out)
+        self.assertIn(r"\leavevmode\\{}", out)
 
 
 class TestStructuralElements(unittest.TestCase):
diff --git a/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_miqra_wikitext.py b/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_miqra_wikitext.py
index 3a44834..273bcf3 100644
--- a/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_miqra_wikitext.py
+++ b/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_miqra_wikitext.py
@@ -68,6 +68,36 @@ def test_column_c_double_underscore(self):
         frag = wikitext_to_intermediate_xml("word__word", column_c=True)
         self.assertIn("word word", frag)
 
+    def test_dechi_shows_first_parameter_only(self):
+        frag = wikitext_to_intermediate_xml(
+            "{{מ:דחי|חַ֭טָּאִים|חַ֭טָּאִ֭ים}}"
+        )
+        self.assertIn("חַ֭טָּאִים", frag)
+        self.assertNotIn("חַ֭טָּאִ֭ים", frag)
+        self.assertNotIn("{{מ:דחי", frag)
+
+    def test_tzinor_shows_first_parameter_only(self):
+        frag = wikitext_to_intermediate_xml(
+            "{{מ:צינור|בָטַחְתִּי֮|בָטַ֮חְתִּי֮}}"
+        )
+        self.assertIn("בָטַחְתִּי֮", frag)
+        self.assertNotIn("בָטַ֮חְתִּי֮", frag)
+        self.assertNotIn("{{מ:צינור", frag)
+
+    def test_galgal2_shows_first_parameter_only(self):
+        frag = wikitext_to_intermediate_xml("{{גלגל-2|אֵ֪ין|אֵ֪֪ין}}")
+        self.assertIn("אֵ֪ין", frag)
+        self.assertNotIn("אֵ֪֪ין", frag)
+        self.assertNotIn("{{גלגל-2", frag)
+
+    def test_yerah_ben_yomo2_shows_first_parameter_only(self):
+        frag = wikitext_to_intermediate_xml(
+            "{{ירח בן יומו-2|אַלְפַּ֪יִם|אַלְפַּ֪֪יִם}}"
+        )
+        self.assertIn("אַלְפַּ֪יִם", frag)
+        self.assertNotIn("אַלְפַּ֪֪יִם", frag)
+        self.assertNotIn("{{ירח בן יומו-2", frag)
+
     def test_all_templates_from_doc_have_handlers(self):
         """Every template name in templates.tsv examples is recognized."""
         from pathlib import Path

From e1713e522f3642943a319cdf55d6699520ba9f4f Mon Sep 17 00:00:00 2001
From: Efraim Feinstein <efraim.feinstein@gmail.com>
Date: Thu, 28 May 2026 22:21:47 -0700
Subject: [PATCH 06/10] restore 1917 jps processor so it wont fail

---
 .../miqra_al_pi_hamasorah/miqra_wikitext.py   |  32 +-
 .../importer/util/mediawiki_processor.py      | 957 ++++++++++++------
 2 files changed, 665 insertions(+), 324 deletions(-)

diff --git a/opensiddur/importer/miqra_al_pi_hamasorah/miqra_wikitext.py b/opensiddur/importer/miqra_al_pi_hamasorah/miqra_wikitext.py
index b220304..1c2ab77 100644
--- a/opensiddur/importer/miqra_al_pi_hamasorah/miqra_wikitext.py
+++ b/opensiddur/importer/miqra_al_pi_hamasorah/miqra_wikitext.py
@@ -13,7 +13,10 @@
 
 import mwparserfromhell
 
-from opensiddur.importer.util.mediawiki_processor import MediaWikiProcessor
+from opensiddur.importer.util.mediawiki_processor import (
+    ConversionResult,
+    MediaWikiProcessor,
+)
 
 MIQRA_NS = "urn:x-opensiddur:miqra:intermediate"
 MW_NS = "urn:x-opensiddur:mw:intermediate"
@@ -196,6 +199,33 @@ def _initialize_handlers(self) -> None:
         self._register_template_handlers()
         self._register_tag_handlers()
 
+    def process_wikitext(self, wikitext: str) -> ConversionResult:
+        """Miqra uses recursive nested processing, not the JPS top-level loop."""
+        warnings: list[str] = []
+        errors: list[str] = []
+        metadata: dict = {}
+
+        text = wikitext or ""
+        for pre in self.preprocessors:
+            try:
+                text = pre(text)
+            except Exception as e:
+                errors.append(str(e))
+
+        try:
+            xml_content = self._process_nested_content(text)
+        except Exception as e:
+            xml_content = text
+            errors.append(str(e))
+
+        return ConversionResult(
+            xml_content=xml_content,
+            metadata=metadata,
+            warnings=warnings,
+            errors=errors,
+            wikilinks=self.wikilinks.copy(),
+        )
+
     def _register_tag_handlers(self) -> None:
         self.tag_handlers["noinclude"] = self._handle_strip_tag
 
diff --git a/opensiddur/importer/util/mediawiki_processor.py b/opensiddur/importer/util/mediawiki_processor.py
index 76120b3..faeccff 100644
--- a/opensiddur/importer/util/mediawiki_processor.py
+++ b/opensiddur/importer/util/mediawiki_processor.py
@@ -1,29 +1,22 @@
 """
 MediaWiki/Wikitext to intermediate XML processor.
 
-This module contains the reusable MediaWiki processing framework originally built
-for the JPS1917 importer. Other importers (e.g. Miqra al pi ha‑Masorah) can reuse
-it by adding/overriding template and tag handlers.
+Reusable framework originally built for the JPS1917 importer. Other importers
+(e.g. Miqra al pi ha-Masorah) subclass ``MediaWikiProcessor`` and register their
+own template/tag handlers.
 """
 
-# NOTE: The initial implementation is intentionally a direct move of the existing
-# processor to provide a stable API surface (`MediaWikiProcessor`, `create_processor`)
-# for multiple importers. Importer-specific specializations should be layered on
-# top by registering handlers.
-
 from __future__ import annotations
 
 import re
+import mwparserfromhell
 from dataclasses import dataclass
 from enum import Enum
 from typing import Any, Dict, List
 
-import mwparserfromhell
-
 
 class ProcessingStage(Enum):
     """Stages of MediaWiki processing"""
-
     PREPROCESS = "preprocess"
     TEMPLATES = "templates"
     TAGS = "tags"
@@ -33,7 +26,6 @@ class ProcessingStage(Enum):
 @dataclass
 class ConversionResult:
     """Result of a conversion operation"""
-
     xml_content: str
     metadata: Dict[str, Any]
     warnings: List[str]
@@ -43,12 +35,12 @@ class ConversionResult:
 
 class MediaWikiProcessor:
     """
-    Modular MediaWiki to XML processor.
-
-    Provides a modular framework for converting MediaWiki syntax to an
-    intermediate XML that can be transformed to TEI via XSLT.
+    Modular MediaWiki to XML processor for JPS1917 content.
+    
+    This processor handles the conversion of MediaWiki syntax to XML,
+    with separate modules for different types of templates and tags.
     """
-
+    
     def __init__(self):
         self.template_handlers = {}
         self.tag_handlers = {}
@@ -56,7 +48,7 @@ def __init__(self):
         self.postprocessors = []
         self.wikilinks = []  # Store captured wikilinks
         self._initialize_handlers()
-
+    
     def _initialize_handlers(self):
         """Initialize all template and tag handlers"""
         self._initialize_template_handlers()
@@ -64,452 +56,771 @@ def _initialize_handlers(self):
         self._initialize_preprocessors()
         self._initialize_postprocessors()
         self._initialize_wikilink_handlers()
-
-    # -------------------------------------------------------------------------
-    # Default handler initialization
-    #
-    # These defaults match the original JPS1917 processor behavior. Other
-    # importers can clear/override and register their own handlers as needed.
-    # -------------------------------------------------------------------------
-
+    
     def _initialize_template_handlers(self):
         """Initialize handlers for MediaWiki templates"""
-
+        
         # Text Formatting Templates
-        self.template_handlers["sc"] = self._handle_small_caps
-        self.template_handlers["larger"] = self._handle_larger_text
-        self.template_handlers["x-larger"] = self._handle_x_larger_text
-        self.template_handlers["xx-larger"] = self._handle_xx_larger_text
-        self.template_handlers["xxx-larger"] = self._handle_xxx_larger_text
-        self.template_handlers["smaller"] = self._handle_smaller_text
-
+        self.template_handlers['sc'] = self._handle_small_caps
+        self.template_handlers['larger'] = self._handle_larger_text
+        self.template_handlers['x-larger'] = self._handle_x_larger_text
+        self.template_handlers['xx-larger'] = self._handle_xx_larger_text
+        self.template_handlers['xxx-larger'] = self._handle_xxx_larger_text
+        self.template_handlers['smaller'] = self._handle_smaller_text
+        
         # Layout Templates
-        self.template_handlers["c"] = self._handle_center
-        self.template_handlers["right"] = self._handle_right_align
-        self.template_handlers["rule"] = self._handle_horizontal_rule
-        self.template_handlers["nop"] = self._handle_no_paragraph
-
+        self.template_handlers['c'] = self._handle_center
+        self.template_handlers['right'] = self._handle_right_align
+        self.template_handlers['rule'] = self._handle_horizontal_rule
+        self.template_handlers['nop'] = self._handle_no_paragraph
+        
         # Biblical Content Templates
-        self.template_handlers["verse"] = self._handle_verse
-        self.template_handlers["rh"] = self._handle_right_header
-        self.template_handlers["dropinitial"] = self._handle_drop_initial
-        self.template_handlers["dhr"] = self._handle_double_horizontal_rule
-
+        self.template_handlers['verse'] = self._handle_verse
+        self.template_handlers['rh'] = self._handle_right_header
+        self.template_handlers['dropinitial'] = self._handle_drop_initial
+        self.template_handlers['dhr'] = self._handle_double_horizontal_rule
+        
         # Navigation Templates
-        self.template_handlers["anchor"] = self._handle_anchor
-        self.template_handlers["anchor+"] = self._handle_anchor_plus
-
+        self.template_handlers['anchor'] = self._handle_anchor
+        self.template_handlers['anchor+'] = self._handle_anchor_plus
+        
         # Language Templates
-        self.template_handlers["lang"] = self._handle_language
-
+        self.template_handlers['lang'] = self._handle_language
+        
         # Reference Templates
-        self.template_handlers["smallrefs"] = self._handle_small_refs
-
+        self.template_handlers['smallrefs'] = self._handle_small_refs
+        
         # Special Templates
-        self.template_handlers["hws"] = self._handle_hws
-        self.template_handlers["hwe"] = self._handle_hwe
-        self.template_handlers["***"] = self._handle_asterisks
-        self.template_handlers["reconstruct"] = self._handle_reconstruct
-        self.template_handlers["SIC"] = self._handle_sic
-        self.template_handlers["sic"] = self._handle_sic
-        self.template_handlers["sup"] = self._handle_superscript
-        self.template_handlers["bar"] = self._handle_bar
-        self.template_handlers["gap"] = self._handle_gap
-        self.template_handlers["overfloat left"] = self._handle_overfloat_left
-        self.template_handlers["float right"] = self._handle_float_right
-        self.template_handlers["smaller block/s"] = self._handle_smaller_block_start
-        self.template_handlers["smaller block/e"] = self._handle_smaller_block_end
-
+        self.template_handlers['hws'] = self._handle_hws
+        self.template_handlers['hwe'] = self._handle_hwe
+        self.template_handlers['***'] = self._handle_asterisks
+        self.template_handlers['reconstruct'] = self._handle_reconstruct
+        self.template_handlers['SIC'] = self._handle_sic
+        self.template_handlers['sic'] = self._handle_sic
+        self.template_handlers['sup'] = self._handle_superscript
+        self.template_handlers['bar'] = self._handle_bar
+        self.template_handlers['gap'] = self._handle_gap
+        self.template_handlers['overfloat left'] = self._handle_overfloat_left
+        self.template_handlers['float right'] = self._handle_float_right
+        self.template_handlers['smaller block/s'] = self._handle_smaller_block_start
+        self.template_handlers['smaller block/e'] = self._handle_smaller_block_end
+    
     def _initialize_tag_handlers(self):
         """Initialize handlers for HTML/XML tags"""
-
+        
         # Structural Tags
-        self.tag_handlers["section"] = self._handle_section
-        self.tag_handlers["table"] = self._handle_table
-        self.tag_handlers["tr"] = self._handle_table_row
-        self.tag_handlers["td"] = self._handle_table_cell
-
+        self.tag_handlers['section'] = self._handle_section
+        self.tag_handlers['table'] = self._handle_table
+        self.tag_handlers['tr'] = self._handle_table_row
+        self.tag_handlers['td'] = self._handle_table_cell
+        
         # Text Formatting Tags
-        self.tag_handlers["i"] = self._handle_italic
-        self.tag_handlers["br"] = self._handle_line_break
-        self.tag_handlers["span"] = self._handle_span
-
+        self.tag_handlers['i'] = self._handle_italic
+        self.tag_handlers['br'] = self._handle_line_break
+        self.tag_handlers['span'] = self._handle_span
+        
         # Content Tags
-        self.tag_handlers["dd"] = self._handle_definition_description
-        self.tag_handlers["ref"] = self._handle_reference
-
+        self.tag_handlers['dd'] = self._handle_definition_description
+        self.tag_handlers['ref'] = self._handle_reference
+        
         # MediaWiki Specific Tags
-        self.tag_handlers["noinclude"] = self._handle_noinclude
-        self.tag_handlers["pagequality"] = self._handle_pagequality
-
+        self.tag_handlers['noinclude'] = self._handle_noinclude
+        self.tag_handlers['pagequality'] = self._handle_pagequality
+    
     def _initialize_preprocessors(self):
         """Initialize preprocessing functions"""
         self.preprocessors = [
             self._fix_noinclude_line_breaks,
             self._convert_paragraph_breaks,
             self._normalize_whitespace,
-            self._handle_special_characters,
-            self._extract_metadata,
+            self._handle_special_characters,  # Enable special character processing
+            self._extract_metadata
         ]
-
+    
     def _initialize_postprocessors(self):
         """Initialize postprocessing functions"""
         self.postprocessors = [
             self._validate_xml_structure,
-            self._finalize_metadata,
+            self._finalize_metadata
         ]
-
+    
     def _initialize_wikilink_handlers(self):
         """Initialize wikilink processing"""
+        # Wikilinks are processed during the main parsing loop
         pass
-
-    # -------------------------------------------------------------------------
-    # Core processing
-    # -------------------------------------------------------------------------
-
+    
     def _process_nested_content(self, content: str, depth: int = 0) -> str:
         """Recursively process nested templates and other elements"""
+        # Prevent infinite recursion
         if depth > 10:
             return content
-
+            
+        # Parse the content to handle nested elements
         parsed = mwparserfromhell.parse(content)
         nodes_to_replace = []
-
+        
+        # Process nodes recursively
         for node in parsed.nodes:
-            if hasattr(node, "name"):  # Template
+            if hasattr(node, 'name'):  # Template
                 template_name = str(node.name).strip()
                 if template_name in self.template_handlers:
                     try:
+                        # Process nested content within the template
                         processed_node = self._process_template_with_nesting(node, depth + 1)
                         replacement = self.template_handlers[template_name](processed_node)
                         nodes_to_replace.append((node, replacement))
-                    except Exception:
+                    except Exception as e:
+                        # If nested processing fails, try without nesting
                         replacement = self.template_handlers[template_name](node)
                         nodes_to_replace.append((node, replacement))
                 else:
+                    # Unknown template - process its content for nested elements
                     processed_content = self._process_nested_content(str(node), depth + 1)
                     nodes_to_replace.append((node, processed_content))
-
-            elif hasattr(node, "tag"):  # Tag
+            
+            elif hasattr(node, 'tag'):  # Tag
                 tag_name = str(node.tag).strip().lower()
                 if tag_name in self.tag_handlers:
                     try:
+                        # Process nested content within the tag
                         processed_node = self._process_tag_with_nesting(node, depth + 1)
                         replacement = self.tag_handlers[tag_name](processed_node)
                         nodes_to_replace.append((node, replacement))
-                    except Exception:
+                    except Exception as e:
+                        # If nested processing fails, try without nesting
                         replacement = self.tag_handlers[tag_name](node)
                         nodes_to_replace.append((node, replacement))
                 else:
+                    # Unknown tag - process its content for nested elements
                     processed_content = self._process_nested_content(str(node), depth + 1)
                     nodes_to_replace.append((node, processed_content))
-
-            elif hasattr(node, "__class__") and "Wikilink" in str(node.__class__):
+            
+            elif hasattr(node, '__class__') and 'Wikilink' in str(node.__class__):  # Wikilink
                 try:
                     replacement = self._handle_wikilink(node)
                     nodes_to_replace.append((node, replacement))
-                except Exception:
+                except Exception as e:
+                    # If wikilink processing fails, keep original
                     nodes_to_replace.append((node, str(node)))
-
+        
+        # Replace all nodes
         for node, replacement in nodes_to_replace:
             parsed.replace(node, replacement)
-
+        
         return str(parsed)
-
+    
     def _process_template_with_nesting(self, template, depth: int = 0) -> object:
+        """Process a template and its nested content"""
+        # Create a copy of the template to avoid modifying the original
         import copy
-
         processed_template = copy.deepcopy(template)
+        
+        # Process each parameter of the template
         for param in processed_template.params:
-            if hasattr(param, "value"):
+            if hasattr(param, 'value'):
+                # Process nested content in parameter values
                 processed_value = self._process_nested_content(str(param.value), depth + 1)
                 param.value = processed_value
+        
         return processed_template
-
+    
     def _process_tag_with_nesting(self, tag, depth: int = 0) -> object:
+        """Process a tag and its nested content"""
+        # Create a copy of the tag to avoid modifying the original
         import copy
-
         processed_tag = copy.deepcopy(tag)
-        if hasattr(processed_tag, "contents") and processed_tag.contents:
-            processed_contents = self._process_nested_content(
-                str(processed_tag.contents), depth + 1
-            )
+        
+        # Process nested content within the tag
+        if hasattr(processed_tag, 'contents') and processed_tag.contents:
+            processed_contents = self._process_nested_content(str(processed_tag.contents), depth + 1)
             processed_tag.contents = processed_contents
+        
         return processed_tag
-
-    # -------------------------------------------------------------------------
-    # Template handlers (JPS1917 defaults)
-    # -------------------------------------------------------------------------
-
+    
+    # ============================================================================
+    # TEMPLATE HANDLERS
+    # ============================================================================
+    
     def _handle_small_caps(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<sc>{content}</sc>"
-
+        """Convert {{sc|text}} to <sc>text</sc>"""
+        content = str(template.get(1, ''))
+        return f'<sc>{content}</sc>'
+    
     def _handle_larger_text(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<larger>{content}</larger>"
-
+        """Convert {{larger|text}} to <larger>text</larger>"""
+        content = str(template.get(1, ''))
+        return f'<larger>{content}</larger>'
+    
     def _handle_x_larger_text(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<x-larger>{content}</x-larger>"
-
+        """Convert {{x-larger|text}} to <x-larger>text</x-larger>"""
+        content = str(template.get(1, ''))
+        return f'<x-larger>{content}</x-larger>'
+    
     def _handle_xx_larger_text(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<xx-larger>{content}</xx-larger>"
-
+        """Convert {{xx-larger|text}} to <xx-larger>text</xx-larger>"""
+        content = str(template.get(1, ''))
+        return f'<xx-larger>{content}</xx-larger>'
+    
     def _handle_xxx_larger_text(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<xxx-larger>{content}</xxx-larger>"
-
+        """Convert {{xxx-larger|text}} to <xxx-larger>text</xxx-larger>"""
+        content = str(template.get(1, ''))
+        return f'<xxx-larger>{content}</xxx-larger>'
+    
     def _handle_smaller_text(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<smaller>{content}</smaller>"
-
+        """Convert {{smaller|text}} to <smaller>text</smaller>"""
+        content = str(template.get(1, ''))
+        return f'<smaller>{content}</smaller>'
+    
     def _handle_center(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<c>{content}</c>"
-
+        """Convert {{c|text}} to <c>text</c>"""
+        content = str(template.get(1, ''))
+        return f'<c>{content}</c>'
+    
     def _handle_right_align(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<right>{content}</right>"
-
+        """Convert {{right|text}} to <right>text</right>"""
+        content = str(template.get(1, ''))
+        return f'<right>{content}</right>'
+    
     def _handle_horizontal_rule(self, template) -> str:
-        return "<rule/>"
-
-    def _handle_double_horizontal_rule(self, template) -> str:
-        return "<dhr/>"
-
+        """Convert {{rule}} to <rule/>"""
+        return '<rule/>'
+    
     def _handle_no_paragraph(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<nop>{content}</nop>"
-
+        """Convert {{nop}} to <nop/>"""
+        return '<nop/>'
+    
     def _handle_verse(self, template) -> str:
-        chapter = str(template.get(1, "")).strip()
-        verse = str(template.get(2, "")).strip()
-        content = str(template.get(3, ""))
-        return f'<verse chapter="{chapter}" verse="{verse}">{content}</verse>'
-
+        """Convert {{verse|chapter|verse|text}} to <verse chapter="..." verse="...">text</verse>"""
+        chapter = str(template.get('chapter', template.get(1, ''))).replace("chapter=", "")
+        verse = str(template.get('verse', template.get(2, ''))).replace("verse=", "")
+        text = str(template.get(3, template.get('text', '')))
+        chapter_attr = f' chapter="{chapter}"' if chapter else ''
+        verse_attr = f' verse="{verse}"' if verse else ''
+        if not chapter or not verse:
+            print(f"Invalid verse template: {template} {template.get(1, '')=} {template.get(2, '')=} {template.get(3, '')=}")
+            
+        return f'<verse{chapter_attr}{verse_attr}>{text}</verse>'
+    
     def _handle_right_header(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<rh>{content}</rh>"
-
+        """Convert {{rh|text}} to <rh>text</rh>"""
+        content = str(template.get(1, ''))
+        return f'<rh>{content}</rh>'
+    
     def _handle_drop_initial(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<dropinitial>{content}</dropinitial>"
-
+        """Convert {{dropinitial|letter}} to <dropinitial>letter</dropinitial>"""
+        letter = str(template.get(1, ''))
+        return f'<dropinitial>{letter}</dropinitial>'
+    
+    def _handle_double_horizontal_rule(self, template) -> str:
+        """Convert {{dhr}} to <dhr/>"""
+        value = str(template.get(1, ''))
+        if value:
+            value=f' value="{value}"'
+        else:
+            value=""
+        return f'<dhr{value}/>'
+    
     def _handle_anchor(self, template) -> str:
-        name = str(template.get(1, "")).strip()
+        """Convert {{anchor|name}} to <anchor name="name"/>"""
+        name = str(template.get(1, ''))
         return f'<anchor name="{name}"/>'
-
+    
     def _handle_anchor_plus(self, template) -> str:
-        name = str(template.get(1, "")).strip()
-        return f'<anchor-plus name="{name}"/>'
-
+        """Convert {{anchor+|name|text}} to <anchor name="name">text</anchor>"""
+        name = str(template.get(1, ''))
+        text = str(template.get(2, ''))
+        return f'<anchor name="{name}">{text}</anchor>'
+    
     def _handle_language(self, template) -> str:
-        code = str(template.get(1, "")).strip()
-        content = str(template.get(2, ""))
-        return f'<lang code="{code}">{content}</lang>'
-
+        """Convert {{lang|code|text}} to <lang code="code">text</lang>"""
+        code = str(template.get(1, ''))
+        text = str(template.get(2, ''))
+        return f'<lang code="{code}">{text}</lang>'
+    
     def _handle_small_refs(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<smallrefs>{content}</smallrefs>"
-
+        """Convert {{smallrefs}} to <smallrefs/>"""
+        return '<smallrefs/>'
+    
     def _handle_hws(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<hws>{content}</hws>"
-
+        """Convert {{hws|text}} to <hws>text</hws>"""
+        content = str(template.get(1, ''))
+        return f'<hws>{content}</hws>'
+    
     def _handle_hwe(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<hwe>{content}</hwe>"
-
+        """Convert {{hwe|text}} to <hwe>text</hwe>"""
+        content = str(template.get(1, ''))
+        return f'<hwe>{content}</hwe>'
+    
     def _handle_asterisks(self, template) -> str:
-        return "<asterisks/>"
-
+        """Convert {{***}} to <asterisks>***</asterisks>"""
+        n = str(template.get(1, '3'))
+        return f'<asterisks n="{n}">***</asterisks>'
+    
     def _handle_reconstruct(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<reconstruct>{content}</reconstruct>"
-
+        """Convert {{reconstruct|content|text}} to <reconstruct>text</reconstruct>"""
+        content = str(template.get(1, ''))
+        text = str(template.get(2, ''))
+        return f'<reconstruct><reg>{content}</reg><note>{text}</note></reconstruct>'
+    
     def _handle_sic(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<sic>{content}</sic>"
-
+        """Convert {{SIC|text}} to <sic>text</sic>"""
+        content = str(template.get(1, ''))
+        return f'<sic>{content}</sic>'
+    
     def _handle_superscript(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<sup>{content}</sup>"
-
+        """Convert {{sup|text}} to <sup>text</sup>"""
+        content = str(template.get(1, ''))
+        return f'<sup>{content}</sup>'
+    
     def _handle_bar(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<bar>{content}</bar>"
-
+        """Convert {{bar|length}} to <bar length="length"/>"""
+        length = str(template.get(1, '6'))
+        return f'<bar length="{length}"/>'
+    
     def _handle_gap(self, template) -> str:
-        return "<gap/>"
-
+        """Convert {{gap|length}} to <gap length="length"/>"""
+        length = str(template.get(1, ''))
+        if length:
+            return f'<gap length="{length}"/>'
+        else:
+            return '<gap/>'
+    
     def _handle_overfloat_left(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<overfloat_left>{content}</overfloat_left>"
-
+        """Convert {{overfloat left|align|padding|text}} to <overfloat_left align="..." padding="...">text</overfloat_left>"""
+        # Get parameters - can be positional or named
+        align = str(template.get('align', template.get(1, '')))
+        padding = str(template.get('padding', template.get(2, '')))
+        text = str(template.get('text', template.get(3, '')))
+        
+        # Clean up named parameters (remove parameter name prefixes)
+        align = align.replace('align=', '') if align.startswith('align=') else align
+        padding = padding.replace('padding=', '') if padding.startswith('padding=') else padding
+        text = text.replace('text=', '') if text.startswith('text=') else text
+        
+        # Build attributes
+        attributes = []
+        if align:
+            attributes.append(f'align="{align}"')
+        if padding:
+            attributes.append(f'padding="{padding}"')
+        
+        attr_str = ' ' + ' '.join(attributes) if attributes else ''
+        
+        return f'<overfloat_left{attr_str}>{text}</overfloat_left>'
+    
     def _handle_float_right(self, template) -> str:
-        content = str(template.get(1, ""))
-        return f"<float_right>{content}</float_right>"
-
+        """Convert {{float right|text}} to <float_right>text</float_right>"""
+        text = str(template.get(1, ''))
+        return f'<float_right>{text}</float_right>'
+    
     def _handle_smaller_block_start(self, template) -> str:
-        return "<smaller_block_start/>"
-
+        """Convert {{smaller block/s}} to <smaller_block>"""
+        return '<smaller_block>'
+    
     def _handle_smaller_block_end(self, template) -> str:
-        return "<smaller_block_end/>"
-
-    # -------------------------------------------------------------------------
-    # Tag handlers (JPS1917 defaults)
-    # -------------------------------------------------------------------------
-
+        """Convert {{smaller block/e}} to </smaller_block>"""
+        return '</smaller_block>'
+    
+    # ============================================================================
+    # WIKILINK HANDLERS
+    # ============================================================================
+    
+    def _handle_wikilink(self, wikilink) -> str:
+        """Process and capture wikilinks"""
+        # Extract wikilink information
+        title = str(wikilink.title) if hasattr(wikilink, 'title') and wikilink.title else ''
+        text = str(wikilink.text) if hasattr(wikilink, 'text') and wikilink.text else title
+        
+        # Process templates within the wikilink text
+        processed_text = self._process_nested_content(text)
+        
+        # Store wikilink information
+        wikilink_info = {
+            'title': title,
+            'text': processed_text,
+            'namespace': str(wikilink.namespace) if hasattr(wikilink, 'namespace') and wikilink.namespace else None,
+            'section': str(wikilink.section) if hasattr(wikilink, 'section') and wikilink.section else None,
+            'fragment': str(wikilink.fragment) if hasattr(wikilink, 'fragment') and wikilink.fragment else None
+        }
+        self.wikilinks.append(wikilink_info)
+        
+        # Convert to XML - use __link__ tag with attributes
+        attributes = []
+        if title:
+            attributes.append(f'title="{title}"')
+        if wikilink_info['namespace']:
+            attributes.append(f'namespace="{wikilink_info["namespace"]}"')
+        if wikilink_info['section']:
+            attributes.append(f'section="{wikilink_info["section"]}"')
+        if wikilink_info['fragment']:
+            attributes.append(f'fragment="{wikilink_info["fragment"]}"')
+        
+        attr_str = ' ' + ' '.join(attributes) if attributes else ''
+        return f'<__link__{attr_str}>{processed_text}</__link__>'
+    
+    # ============================================================================
+    # TAG HANDLERS
+    # ============================================================================
+    
     def _handle_section(self, tag) -> str:
-        begin = getattr(tag, "attributes", {}).get("begin", "")
-        return f'<section begin="{begin}"/>'
-
+        """Convert <section> to <section> with begin and end attributes"""
+        content = str(tag.contents) if tag.contents else ''
+        
+        # Extract begin and end attributes
+        attributes = []
+        if hasattr(tag, 'attributes') and tag.attributes:
+            for attr in tag.attributes:
+                if hasattr(attr, 'name') and hasattr(attr, 'value'):
+                    attr_name = str(attr.name)
+                    attr_value = str(attr.value)
+                    if attr_name in ['begin', 'end']:
+                        attributes.append(f'{attr_name}="{attr_value}"')
+        
+        # Add begin and end attributes if they exist
+        attr_str = ' ' + ' '.join(attributes) if attributes else ''
+        
+        return f'<section{attr_str}>{content}</section>'
+    
     def _handle_table(self, tag) -> str:
-        contents = getattr(tag, "contents", "") or ""
-        return f"<table>{contents}</table>"
-
+        """Convert <table> to <table>"""
+        content = str(tag.contents) if tag.contents else ''
+        attributes = self._extract_tag_attributes(tag)
+        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
+        return f'<table{attr_str}>{content}</table>'
+    
     def _handle_table_row(self, tag) -> str:
-        contents = getattr(tag, "contents", "") or ""
-        return f"<tr>{contents}</tr>"
-
+        """Convert <tr> to <tr>"""
+        content = str(tag.contents) if tag.contents else ''
+        attributes = self._extract_tag_attributes(tag)
+        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
+        return f'<tr{attr_str}>{content}</tr>'
+    
     def _handle_table_cell(self, tag) -> str:
-        contents = getattr(tag, "contents", "") or ""
-        return f"<td>{contents}</td>"
-
+        """Convert <td> to <td>"""
+        content = str(tag.contents) if tag.contents else ''
+        attributes = self._extract_tag_attributes(tag)
+        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
+        return f'<td{attr_str}>{content}</td>'
+    
     def _handle_italic(self, tag) -> str:
-        contents = getattr(tag, "contents", "") or ""
-        return f"<i>{contents}</i>"
-
+        """Convert <i> to <i>"""
+        content = str(tag.contents) if tag.contents else ''
+        attributes = self._extract_tag_attributes(tag)
+        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
+        return f'<i{attr_str}>{content}</i>'
+    
     def _handle_line_break(self, tag) -> str:
-        return "<br/>"
-
+        """Convert <br> to <br>"""
+        attributes = self._extract_tag_attributes(tag)
+        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
+        return f'<br{attr_str}/>'
+    
     def _handle_span(self, tag) -> str:
-        contents = getattr(tag, "contents", "") or ""
-        return f"<span>{contents}</span>"
-
+        """Convert <span> to <span>"""
+        content = str(tag.contents) if tag.contents else ''
+        attributes = self._extract_tag_attributes(tag)
+        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
+        return f'<span{attr_str}>{content}</span>'
+    
     def _handle_definition_description(self, tag) -> str:
-        contents = getattr(tag, "contents", "") or ""
-        return f"<dd>{contents}</dd>"
-
+        """Convert <dd> to <dd>"""
+        content = str(tag.contents) if tag.contents else ''
+        attributes = self._extract_tag_attributes(tag)
+        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
+        return f'<dd{attr_str}>{content}</dd>'
+    
     def _handle_reference(self, tag) -> str:
-        name = getattr(tag, "attributes", {}).get("name", "")
-        contents = getattr(tag, "contents", "") or ""
-        return f'<ref name="{name}">{contents}</ref>'
-
+        """Convert <ref> to <ref>"""
+        content = str(tag.contents) if tag.contents else ''
+        attributes = self._extract_tag_attributes(tag)
+        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
+        return f'<ref{attr_str}>{content}</ref>'
+    
     def _handle_noinclude(self, tag) -> str:
-        contents = getattr(tag, "contents", "") or ""
-        return f"<noinclude>{contents}</noinclude>"
-
+        """Convert <noinclude> to <noinclude>"""
+        content = str(tag.contents) if tag.contents else ''
+        attributes = self._extract_tag_attributes(tag)
+        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
+        return f'<noinclude{attr_str}>{content}</noinclude>'
+    
     def _handle_pagequality(self, tag) -> str:
-        contents = getattr(tag, "contents", "") or ""
-        return f"<pagequality>{contents}</pagequality>"
-
-    # -------------------------------------------------------------------------
-    # Pre/post processing (JPS1917 defaults)
-    # -------------------------------------------------------------------------
-
-    def _fix_noinclude_line_breaks(self, text: str, metadata: Dict[str, Any]) -> str:
-        return re.sub(r"</noinclude>\n", "</noinclude>", text)
-
-    def _convert_paragraph_breaks(self, text: str, metadata: Dict[str, Any]) -> str:
-        return text.replace("\n\n", "<p/>")
-
-    def _normalize_whitespace(self, text: str, metadata: Dict[str, Any]) -> str:
-        return re.sub(r"[ \t]+", " ", text)
-
-    def _handle_special_characters(self, text: str, metadata: Dict[str, Any]) -> str:
-        # Preserve only minimal escaping at this stage.
-        return text
-
-    def _extract_metadata(self, text: str, metadata: Dict[str, Any]) -> str:
-        metadata.setdefault("length", len(text))
-        return text
-
-    def _validate_xml_structure(self, xml_content: str, metadata: Dict[str, Any]) -> str:
-        # Lightweight sanity check; TEI validation happens later.
-        return xml_content
-
-    def _finalize_metadata(self, xml_content: str, metadata: Dict[str, Any]) -> str:
-        metadata["processed"] = True
-        return xml_content
-
-    # -------------------------------------------------------------------------
-    # Wikilinks
-    # -------------------------------------------------------------------------
-
-    def _handle_wikilink(self, node) -> str:
-        try:
-            title = str(getattr(node, "title", "")).strip()
-            text = str(getattr(node, "text", "")).strip() if getattr(node, "text", None) else ""
-            self.wikilinks.append({"title": title, "text": text})
-            if text:
-                return f'<__link__ title="{title}">{text}</__link__>'
-            return f'<__link__ title="{title}"/>'
-        except Exception:
-            return str(node)
-
-    # -------------------------------------------------------------------------
-    # Public API
-    # -------------------------------------------------------------------------
-
+        """Convert <pagequality> to <pagequality>"""
+        content = str(tag.contents) if tag.contents else ''
+        attributes = self._extract_tag_attributes(tag)
+        attr_str = ' ' + ' '.join([f'{k}="{v}"' for k, v in attributes.items()]) if attributes else ''
+        return f'<pagequality{attr_str}>{content}</pagequality>'
+    
+    def _extract_tag_attributes(self, tag) -> Dict[str, str]:
+        """Extract all attributes from a tag"""
+        attributes = {}
+        if hasattr(tag, 'attributes') and tag.attributes:
+            for attr in tag.attributes:
+                if hasattr(attr, 'name') and hasattr(attr, 'value'):
+                    attributes[str(attr.name)] = str(attr.value)
+        return attributes
+    
+    # ============================================================================
+    # PREPROCESSORS
+    # ============================================================================
+    
+    def _fix_noinclude_line_breaks(self, content: str) -> str:
+        """Insert a blank line after </noinclude> tags when followed by non-whitespace content"""
+        # Pattern to match </noinclude> followed by optional whitespace and any non-whitespace character
+        # This handles cases like: </noinclude>:text, </noinclude>text, </noinclude> {{template}}, etc.
+        pattern = r'(</noinclude>)\s*(\S)'
+        
+        def replace_noinclude_content(match):
+            noinclude_tag = match.group(1)
+            following_content = match.group(2)
+            # Insert a newline after </noinclude> and before the following content
+            return f'{noinclude_tag}\n{following_content}'
+        
+        # Apply the replacement
+        content = re.sub(pattern, replace_noinclude_content, content)
+        
+        return content
+    
+    def _normalize_whitespace(self, content: str) -> str:
+        """Normalize whitespace in content"""
+        # Normalize multiple spaces to single space
+        content = re.sub(r' +', ' ', content)
+        # Normalize line breaks, but preserve paragraph markers
+        content = re.sub(r'\n+', '\n', content)
+        return content.strip()
+    
+    def _convert_paragraph_breaks(self, content: str) -> str:
+        """Convert double newlines to paragraph indicators, but skip if {{nop}} is directly adjacent"""
+        
+        # First, protect {{nop}} markers and their immediate context
+        # Replace {{nop}} with a temporary marker
+        content = content.replace('{{nop}}', '___NOP_MARKER___')
+        
+        # Convert \n\n to <p/>\n paragraph indicators, but not if they're adjacent to ___NOP_MARKER___
+        # This regex matches \n\n that are NOT preceded or followed by ___NOP_MARKER___
+        content = re.sub(r'(?<!___NOP_MARKER___)\n\n(?!___NOP_MARKER___)', '<p/>\n', content)
+        
+        # Restore {{nop}} markers
+        content = content.replace('___NOP_MARKER___', '{{nop}}')
+        
+        return content
+    
+    def _handle_special_characters(self, content: str) -> str:
+        """Handle special characters and entities - escape ampersands not in XML/HTML entities"""
+        # More comprehensive regex to match XML/HTML entities
+        # This includes named entities like &amp;, &lt;, &gt;, &quot;, &apos;
+        # and numeric entities like &#123; and &#x1F;
+        entity_pattern = r'&(?:[a-zA-Z][a-zA-Z0-9]*|#[0-9]+|#x[0-9a-fA-F]+);'
+        
+        # Split content by entities to preserve them
+        parts = re.split(f'({entity_pattern})', content)
+        
+        # Process each part
+        result_parts = []
+        for part in parts:
+            if re.match(entity_pattern, part):
+                # This is an entity, keep it as-is
+                result_parts.append(part)
+            else:
+                # This is not an entity, escape standalone ampersands
+                escaped_part = part.replace('&', '&amp;')
+                result_parts.append(escaped_part)
+        
+        return ''.join(result_parts)
+    
+    def _extract_metadata(self, content: str) -> Dict[str, Any]:
+        """Extract metadata from content"""
+        metadata = {}
+        # Extract page quality information
+        # Extract language information
+        # Extract structural information
+        return metadata
+    
+    # ============================================================================
+    # POSTPROCESSORS
+    # ============================================================================
+    
+    def _validate_xml_structure(self, content: str) -> str:
+        """Validate and fix XML structure"""
+        # Ensure proper nesting
+        # Validate against schema
+        # Fix common issues
+        return content
+    
+    def _cleanup_empty_elements(self, content: str) -> str:
+        """Remove or fix empty elements"""
+        # Remove empty elements
+        content = re.sub(r'<(\w+)[^>]*></\1>', '', content)
+        return content
+    
+    def _finalize_metadata(self, content: str) -> str:
+        """Finalize metadata and add to content"""
+        # Add final metadata
+        # Ensure proper document structure
+        return content
+    
+    # ============================================================================
+    # MAIN PROCESSING METHODS
+    # ============================================================================
+    
     def process_wikitext(self, wikitext: str) -> ConversionResult:
-        warnings: List[str] = []
-        errors: List[str] = []
-        metadata: Dict[str, Any] = {}
-
-        text = wikitext or ""
-        for pre in self.preprocessors:
-            try:
-                text = pre(text, metadata)
-            except Exception as e:
-                errors.append(str(e))
-
+        """
+        Main method to process MediaWiki wikitext to XML.
+        
+        Args:
+            wikitext: The MediaWiki content to convert
+            
+        Returns:
+            ConversionResult with XML content and metadata
+        """
+        warnings = []
+        errors = []
+        metadata = {}
+        
         try:
-            xml_content = self._process_nested_content(text)
+            # Preprocessing
+            content = wikitext
+            for preprocessor in self.preprocessors:
+                if preprocessor == self._extract_metadata:
+                    metadata.update(preprocessor(content))
+                else:
+                    content = preprocessor(content)
+            
+            # Parse MediaWiki content
+            parsed = mwparserfromhell.parse(content)
+            
+            # Process all nodes with nested content support
+            nodes_to_replace = []
+            
+            # Process nodes in the order they appear in the document
+            for node in parsed.nodes:
+                if hasattr(node, 'name'):  # Template
+                    template_name = str(node.name).strip()
+                    if template_name in self.template_handlers:
+                        try:
+                            # Process nested content within the template
+                            processed_node = self._process_template_with_nesting(node)
+                            replacement = self.template_handlers[template_name](processed_node)
+                            nodes_to_replace.append((node, replacement))
+                        except Exception as e:
+                            errors.append(f"Error processing template {template_name}: {str(e)}")
+                    else:
+                        warnings.append(f"Unknown template: {template_name}")
+                
+                elif hasattr(node, 'tag'):  # Tag
+                    tag_name = str(node.tag).strip().lower()
+                    if tag_name in self.tag_handlers:
+                        try:
+                            # Process nested content within the tag
+                            processed_node = self._process_tag_with_nesting(node)
+                            replacement = self.tag_handlers[tag_name](processed_node)
+                            nodes_to_replace.append((node, replacement))
+                        except Exception as e:
+                            errors.append(f"Error processing tag {tag_name}: {str(e)}")
+                    else:
+                        warnings.append(f"Unknown tag: {tag_name}")
+                
+                elif hasattr(node, '__class__') and 'Wikilink' in str(node.__class__):  # Wikilink
+                    try:
+                        replacement = self._handle_wikilink(node)
+                        nodes_to_replace.append((node, replacement))
+                    except Exception as e:
+                        errors.append(f"Error processing wikilink: {str(e)}")
+            
+            # Replace all nodes in order
+            for node, replacement in nodes_to_replace:
+                parsed.replace(node, replacement)
+            
+            # Get processed content
+            xml_content = str(parsed)
+            
+            # Postprocessing
+            for postprocessor in self.postprocessors:
+                xml_content = postprocessor(xml_content)
+            
+            # Wrap in mediawiki tag
+            xml_content = f'<mediawiki>{xml_content}</mediawiki>'
+            
+            return ConversionResult(
+                xml_content=xml_content,
+                metadata=metadata,
+                warnings=warnings,
+                errors=errors,
+                wikilinks=self.wikilinks.copy()
+            )
+            
         except Exception as e:
-            xml_content = text
-            errors.append(str(e))
-
-        for post in self.postprocessors:
-            try:
-                xml_content = post(xml_content, metadata)
-            except Exception as e:
-                errors.append(str(e))
-
-        return ConversionResult(
-            xml_content=xml_content,
-            metadata=metadata,
-            warnings=warnings,
-            errors=errors,
-            wikilinks=self.wikilinks.copy(),
-        )
-
+            errors.append(f"Fatal error in processing: {str(e)}")
+            return ConversionResult(
+                xml_content="<mediawiki></mediawiki>",
+                metadata={},
+                warnings=warnings,
+                errors=errors,
+                wikilinks=[]
+            )
+    
     def add_template_handler(self, template_name: str, handler_func):
+        """Add a custom template handler"""
         self.template_handlers[template_name] = handler_func
-
+    
     def add_tag_handler(self, tag_name: str, handler_func):
+        """Add a custom tag handler"""
         self.tag_handlers[tag_name] = handler_func
-
+    
     def add_preprocessor(self, preprocessor_func):
+        """Add a custom preprocessor"""
         self.preprocessors.append(preprocessor_func)
-
+    
     def add_postprocessor(self, postprocessor_func):
+        """Add a custom postprocessor"""
         self.postprocessors.append(postprocessor_func)
-
+    
     def get_wikilinks(self) -> List[Dict[str, Any]]:
+        """Get all captured wikilinks"""
         return self.wikilinks.copy()
-
+    
     def clear_wikilinks(self):
+        """Clear all captured wikilinks"""
         self.wikilinks.clear()
 
 
+# ============================================================================
+# CONVENIENCE FUNCTIONS
+# ============================================================================
+
 def create_processor() -> MediaWikiProcessor:
+    """Create a new MediaWiki processor instance"""
     return MediaWikiProcessor()
 
 
 def process_page(page_content: str) -> ConversionResult:
+    """Process a single page of MediaWiki content"""
     processor = create_processor()
     return processor.process_wikitext(page_content)
 
+
+if __name__ == "__main__":
+    # Example usage
+    processor = create_processor()
+    
+    # Example MediaWiki content with nested templates
+    sample_wikitext = """
+    {{verse|1|1|In the beginning God created the heaven and the earth.}}
+
+    {{verse|1|2|And the earth was without form, and void; and darkness was upon the face of the deep.}}
+
+    {{sc|Genesis}} {{c|Chapter 1}}
+    {{larger|The Creation}}
+    <ref name="gen1">This is a reference</ref>
+    
+    See also [[Genesis]] and [[Creation myth]] for more information.
+    
+    Nested example: {{sc|{{larger|Bold Large Text}}}}
+    Complex nested: {{verse|1|3|{{sc|God}} said, {{larger|Let there be light}}}}
+    """
+    
+    result = processor.process_wikitext(sample_wikitext)
+    print("XML Output:")
+    print(result.xml_content)
+    print("\nWarnings:", result.warnings)
+    print("Errors:", result.errors)
+    print("Wikilinks:", result.wikilinks)

From 0ae7ab2868a2b060f6828137f32efbc7967098d8 Mon Sep 17 00:00:00 2001
From: Efraim Feinstein <efraim.feinstein@gmail.com>
Date: Thu, 28 May 2026 22:26:22 -0700
Subject: [PATCH 07/10] Apply suggestions from code review

Co-authored-by: Copilot Autofix powered by AI <223894421+github-code-quality[bot]@users.noreply.github.com>
---
 opensiddur/importer/miqra_al_pi_hamasorah/convert_tsv.py    | 1 -
 opensiddur/importer/miqra_al_pi_hamasorah/miqra_wikitext.py | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/opensiddur/importer/miqra_al_pi_hamasorah/convert_tsv.py b/opensiddur/importer/miqra_al_pi_hamasorah/convert_tsv.py
index 43a3217..b3b2c31 100644
--- a/opensiddur/importer/miqra_al_pi_hamasorah/convert_tsv.py
+++ b/opensiddur/importer/miqra_al_pi_hamasorah/convert_tsv.py
@@ -572,7 +572,6 @@ def main(argv: list[str] | None = None) -> int:
     logging.basicConfig(level=logging.INFO)
     args = _build_arg_parser().parse_args(argv)
 
-    data_dir = miqra_al_pi_hamasorah_data_directory(args.sourcetexts_root)
     sheets_dir = miqra_al_pi_hamasorah_sheets_directory(args.sourcetexts_root)
     out_dir = args.project_dir if args.project_dir is not None else _default_project_directory()
 
diff --git a/opensiddur/importer/miqra_al_pi_hamasorah/miqra_wikitext.py b/opensiddur/importer/miqra_al_pi_hamasorah/miqra_wikitext.py
index 1c2ab77..bf604c7 100644
--- a/opensiddur/importer/miqra_al_pi_hamasorah/miqra_wikitext.py
+++ b/opensiddur/importer/miqra_al_pi_hamasorah/miqra_wikitext.py
@@ -54,9 +54,6 @@
     }
 )
 
-_BOLD_ITALIC_RE = re.compile(r"'''''(.*?)'''''")
-_BOLD_RE = re.compile(r"'''(.*?)'''")
-_ITALIC_RE = re.compile(r"''(.*?)''")
 _ANY_HI_RE = re.compile(r"'''''(.*?)'''''|'''(.*?)'''|''(.*?)''")
 _TAG_OPEN_RE = re.compile(r"<(miqra|mw):([a-zA-Z0-9-]+)([^>]*?)(/?)>")
 _KETEG_START_RE = re.compile(r"<קטע\s+התחלה=([^/>]+)\s*/>", re.IGNORECASE)

From 9471772b0ff6d7d513dcc7029cbd1705dd247553 Mon Sep 17 00:00:00 2001
From: Efraim Feinstein <efraim.feinstein@gmail.com>
Date: Thu, 28 May 2026 22:31:52 -0700
Subject: [PATCH 08/10] remove unused import

---
 opensiddur/importer/miqra_al_pi_hamasorah/convert_tsv.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/opensiddur/importer/miqra_al_pi_hamasorah/convert_tsv.py b/opensiddur/importer/miqra_al_pi_hamasorah/convert_tsv.py
index b3b2c31..aeba957 100644
--- a/opensiddur/importer/miqra_al_pi_hamasorah/convert_tsv.py
+++ b/opensiddur/importer/miqra_al_pi_hamasorah/convert_tsv.py
@@ -15,7 +15,6 @@
 from opensiddur.common.xslt import xslt_transform_string
 from opensiddur.importer.util.pages import (
     default_sourcetexts_root,
-    miqra_al_pi_hamasorah_data_directory,
     miqra_al_pi_hamasorah_sheets_directory,
 )
 from opensiddur.importer.util.prettify import prettify_xml

From 94df6984d81af8a54fc6e00dc5ce8f88f8b469a9 Mon Sep 17 00:00:00 2001
From: Efraim Feinstein <efraim.feinstein@gmail.com>
Date: Thu, 28 May 2026 22:41:08 -0700
Subject: [PATCH 09/10] chore: try to increase code coverage

---
 opensiddur/exporter/pdf/pdf.py                |   2 +-
 opensiddur/exporter/tex/latex.py              |   2 +-
 .../exporter/validate_urn_references.py       |   2 +-
 .../importer/agent/checkpoint_example.py      |   9 +-
 opensiddur/importer/agent/example_usage.py    |   2 +-
 .../importer/jps1917/template_finder.py       |  27 +-
 opensiddur/importer/jps1917/test_processor.py |   4 +-
 opensiddur/importer/jps1917/wikisource.py     |   2 +-
 .../miqra_al_pi_hamasorah/convert_tsv.py      |   2 +-
 .../miqra_al_pi_hamasorah/download.py         |   6 +-
 .../importer/util/mediawiki_processor.py      |  14 +-
 opensiddur/importer/wlc/download_tanach.py    |   6 +-
 opensiddur/importer/wlc/wlc.py                |   2 +-
 .../test_miqra_wikitext.py                    | 253 ++++++++++++++++++
 14 files changed, 298 insertions(+), 35 deletions(-)

diff --git a/opensiddur/exporter/pdf/pdf.py b/opensiddur/exporter/pdf/pdf.py
index df1801d..cd8cbba 100755
--- a/opensiddur/exporter/pdf/pdf.py
+++ b/opensiddur/exporter/pdf/pdf.py
@@ -392,5 +392,5 @@ def main():  # pragma: no cover
         sys.exit(1)
 
 
-if __name__ == "__main__":
+if __name__ == "__main__":  # pragma: no cover
     main()
diff --git a/opensiddur/exporter/tex/latex.py b/opensiddur/exporter/tex/latex.py
index 6b7e4a8..5629d86 100644
--- a/opensiddur/exporter/tex/latex.py
+++ b/opensiddur/exporter/tex/latex.py
@@ -464,5 +464,5 @@ def main():  # pragma: no cover
     )
 
 
-if __name__ == "__main__":
+if __name__ == "__main__":  # pragma: no cover
     main()
diff --git a/opensiddur/exporter/validate_urn_references.py b/opensiddur/exporter/validate_urn_references.py
index 274acca..2a70d3d 100644
--- a/opensiddur/exporter/validate_urn_references.py
+++ b/opensiddur/exporter/validate_urn_references.py
@@ -178,6 +178,6 @@ def main(argv: Optional[list[str]] = None) -> int:
     return 0
 
 
-if __name__ == "__main__":
+if __name__ == "__main__":  # pragma: no cover
     raise SystemExit(main())
 
diff --git a/opensiddur/importer/agent/checkpoint_example.py b/opensiddur/importer/agent/checkpoint_example.py
index 6274622..55379eb 100644
--- a/opensiddur/importer/agent/checkpoint_example.py
+++ b/opensiddur/importer/agent/checkpoint_example.py
@@ -171,9 +171,10 @@ def demo_interrupted_session():
         print(f"\n🧹 Cleaned up {checkpoint_file}")
 
 
-if __name__ == "__main__":
-    # Run the checkpointing demo
+def _run_cli() -> None:  # pragma: no cover
     demo_checkpointing()
-    
-    # Run the interrupted session demo
     demo_interrupted_session()
+
+
+if __name__ == "__main__":  # pragma: no cover
+    _run_cli()
diff --git a/opensiddur/importer/agent/example_usage.py b/opensiddur/importer/agent/example_usage.py
index 7b472f2..a82367b 100644
--- a/opensiddur/importer/agent/example_usage.py
+++ b/opensiddur/importer/agent/example_usage.py
@@ -63,5 +63,5 @@ def main():
             print(f"  [{role}]: {message[:100]}{'...' if len(message) > 100 else ''}")
 
 
-if __name__ == "__main__":
+if __name__ == "__main__":  # pragma: no cover
     main()
diff --git a/opensiddur/importer/jps1917/template_finder.py b/opensiddur/importer/jps1917/template_finder.py
index da3af2e..2800641 100644
--- a/opensiddur/importer/jps1917/template_finder.py
+++ b/opensiddur/importer/jps1917/template_finder.py
@@ -414,7 +414,7 @@ def save_template_analysis(template_data: Dict, output_file: str = "template_ana
     print(f"Template analysis saved to {output_file}")
 
 
-if __name__ == "__main__": # pragma: no cover
+def _run_cli() -> None:  # pragma: no cover
     import argparse
 
     parser = argparse.ArgumentParser(description="Analyze JPS 1917 Wikisource MediaWiki templates and tags.")
@@ -427,27 +427,28 @@ def save_template_analysis(template_data: Dict, output_file: str = "template_ana
     args = parser.parse_args()
     root = args.sourcetexts_root
 
-    # Example usage
     print("Starting MediaWiki template and tag analysis...")
-    
-    # Find all templates
-    print("\n" + "="*50)
+
+    print("\n" + "=" * 50)
     print("ANALYZING TEMPLATES")
-    print("="*50)
+    print("=" * 50)
     template_data = find_all_templates(sourcetexts_root=root)
     print_template_summary(template_data)
     save_template_analysis(template_data, "jps1917_template_analysis.json")
-    
-    # Find all tags
-    print("\n" + "="*50)
+
+    print("\n" + "=" * 50)
     print("ANALYZING TAGS")
-    print("="*50)
+    print("=" * 50)
     tag_data = find_all_tags(sourcetexts_root=root)
     print_tag_summary(tag_data)
     save_tag_analysis(tag_data, "jps1917_tag_analysis.json")
-    
-    print("\n" + "="*50)
+
+    print("\n" + "=" * 50)
     print("ANALYSIS COMPLETE!")
-    print("="*50)
+    print("=" * 50)
     print("Template analysis saved to: jps1917_template_analysis.json")
     print("Tag analysis saved to: jps1917_tag_analysis.json")
+
+
+if __name__ == "__main__":  # pragma: no cover
+    _run_cli()
diff --git a/opensiddur/importer/jps1917/test_processor.py b/opensiddur/importer/jps1917/test_processor.py
index 0d0480d..75c0b75 100644
--- a/opensiddur/importer/jps1917/test_processor.py
+++ b/opensiddur/importer/jps1917/test_processor.py
@@ -122,7 +122,7 @@ def handle_custom_tag(tag):
         print(f"\nErrors: {result.errors}")
 
 
-def main():
+def main():  # pragma: no cover
     """Run all tests"""
     print("MediaWiki to XML Processor Test Suite")
     print("Based on 1917 JPS Wikisource Analysis")
@@ -143,5 +143,5 @@ def main():
         traceback.print_exc()
 
 
-if __name__ == "__main__":
+if __name__ == "__main__":  # pragma: no cover
     main()
diff --git a/opensiddur/importer/jps1917/wikisource.py b/opensiddur/importer/jps1917/wikisource.py
index 49a5825..c977389 100644
--- a/opensiddur/importer/jps1917/wikisource.py
+++ b/opensiddur/importer/jps1917/wikisource.py
@@ -131,5 +131,5 @@ def main(argv: list[str] | None = None) -> int:
     return 0
 
 
-if __name__ == "__main__":
+if __name__ == "__main__":  # pragma: no cover
     sys.exit(main())
diff --git a/opensiddur/importer/miqra_al_pi_hamasorah/convert_tsv.py b/opensiddur/importer/miqra_al_pi_hamasorah/convert_tsv.py
index aeba957..d3c128e 100644
--- a/opensiddur/importer/miqra_al_pi_hamasorah/convert_tsv.py
+++ b/opensiddur/importer/miqra_al_pi_hamasorah/convert_tsv.py
@@ -594,6 +594,6 @@ def main(argv: list[str] | None = None) -> int:
     return 0
 
 
-if __name__ == "__main__":
+if __name__ == "__main__":  # pragma: no cover
     sys.exit(main())
 
diff --git a/opensiddur/importer/miqra_al_pi_hamasorah/download.py b/opensiddur/importer/miqra_al_pi_hamasorah/download.py
index 342ba83..1b1bc85 100644
--- a/opensiddur/importer/miqra_al_pi_hamasorah/download.py
+++ b/opensiddur/importer/miqra_al_pi_hamasorah/download.py
@@ -215,9 +215,13 @@ def main(argv: list[str] | None = None) -> int:
     return 0
 
 
-if __name__ == "__main__":
+def _run_cli() -> None:  # pragma: no cover
     try:
         sys.exit(main())
     except Exception as e:
         logger.error("Error downloading Miqra al pi ha-Masorah: %s", e)
         raise
+
+
+if __name__ == "__main__":  # pragma: no cover
+    _run_cli()
diff --git a/opensiddur/importer/util/mediawiki_processor.py b/opensiddur/importer/util/mediawiki_processor.py
index faeccff..208dfbf 100644
--- a/opensiddur/importer/util/mediawiki_processor.py
+++ b/opensiddur/importer/util/mediawiki_processor.py
@@ -798,11 +798,8 @@ def process_page(page_content: str) -> ConversionResult:
     return processor.process_wikitext(page_content)
 
 
-if __name__ == "__main__":
-    # Example usage
+def _demo_main() -> None:  # pragma: no cover
     processor = create_processor()
-    
-    # Example MediaWiki content with nested templates
     sample_wikitext = """
     {{verse|1|1|In the beginning God created the heaven and the earth.}}
 
@@ -811,16 +808,19 @@ def process_page(page_content: str) -> ConversionResult:
     {{sc|Genesis}} {{c|Chapter 1}}
     {{larger|The Creation}}
     <ref name="gen1">This is a reference</ref>
-    
+
     See also [[Genesis]] and [[Creation myth]] for more information.
-    
+
     Nested example: {{sc|{{larger|Bold Large Text}}}}
     Complex nested: {{verse|1|3|{{sc|God}} said, {{larger|Let there be light}}}}
     """
-    
     result = processor.process_wikitext(sample_wikitext)
     print("XML Output:")
     print(result.xml_content)
     print("\nWarnings:", result.warnings)
     print("Errors:", result.errors)
     print("Wikilinks:", result.wikilinks)
+
+
+if __name__ == "__main__":  # pragma: no cover
+    _demo_main()
diff --git a/opensiddur/importer/wlc/download_tanach.py b/opensiddur/importer/wlc/download_tanach.py
index f3f74e6..1790156 100644
--- a/opensiddur/importer/wlc/download_tanach.py
+++ b/opensiddur/importer/wlc/download_tanach.py
@@ -72,9 +72,13 @@ def main(argv: list[str] | None = None) -> int:
     return 0
 
 
-if __name__ == "__main__":
+def _run_cli() -> None:  # pragma: no cover
     try:
         sys.exit(main())
     except Exception as e:
         logger.error("Error downloading/unzipping Tanach: %s", e)
         raise
+
+
+if __name__ == "__main__":  # pragma: no cover
+    _run_cli()
diff --git a/opensiddur/importer/wlc/wlc.py b/opensiddur/importer/wlc/wlc.py
index bc69f1b..48f8019 100644
--- a/opensiddur/importer/wlc/wlc.py
+++ b/opensiddur/importer/wlc/wlc.py
@@ -93,5 +93,5 @@ def main(argv: list[str] | None = None) -> int:
     return 0
 
 
-if __name__ == "__main__":
+if __name__ == "__main__":  # pragma: no cover
     sys.exit(main())
diff --git a/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_miqra_wikitext.py b/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_miqra_wikitext.py
index 273bcf3..8fcc69a 100644
--- a/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_miqra_wikitext.py
+++ b/opensiddur/tests/importer/miqra_al_pi_hamasorah/test_miqra_wikitext.py
@@ -1,6 +1,11 @@
 import unittest
 
 from opensiddur.importer.miqra_al_pi_hamasorah.miqra_wikitext import (
+    _escape_outside_tags,
+    _preprocess_column_c,
+    _preprocess_miqra_tags,
+    _wikitext_basic_markup_to_xml,
+    _xml_escape,
     link_target_to_uri,
     normalize_template_name,
     reset_processor,
@@ -8,6 +13,94 @@
 )
 
 
+class TestNormalizeTemplateName(unittest.TestCase):
+    def test_strips_whitespace(self):
+        self.assertEqual(normalize_template_name("  פפ  "), "פפ")
+
+    def test_strips_tevnit_prefix(self):
+        self.assertEqual(normalize_template_name("תבנית:מ:טעם"), "מ:טעם")
+        self.assertEqual(normalize_template_name("תבנית:נוסח"), "נוסח")
+
+    def test_normalizes_quotes(self):
+        self.assertEqual(normalize_template_name("מ:כו''ק"), 'מ:כו"ק')
+        self.assertEqual(
+            normalize_template_name("מ:קו״כ"),
+            'מ:קו"כ',
+        )
+
+
+class TestLinkTargetToUri(unittest.TestCase):
+    def test_empty_target(self):
+        self.assertEqual(link_target_to_uri(""), "")
+        self.assertEqual(link_target_to_uri("   "), "")
+
+    def test_protocol_relative_url(self):
+        self.assertEqual(
+            link_target_to_uri("//cdn.example.com/x.pdf"),
+            "https://cdn.example.com/x.pdf",
+        )
+
+    def test_fragment_preserved(self):
+        uri = link_target_to_uri("דף#פרק")
+        self.assertIn("#", uri)
+        self.assertTrue(uri.startswith("https://he.wikisource.org/wiki/"))
+
+
+class TestPreprocessors(unittest.TestCase):
+    def test_column_c_double_underscore(self):
+        self.assertEqual(_preprocess_column_c("a__b"), "a b")
+
+    def test_column_c_line_break(self):
+        self.assertEqual(
+            _preprocess_column_c("http://host/path"),
+            "http://host/path",
+        )
+        self.assertEqual(
+            _preprocess_column_c("https://host/path"),
+            "https://host/path",
+        )
+        self.assertIn("<miqra:lb/>", _preprocess_column_c("שורה//המשך"))
+
+    def test_miqra_keteg_tags(self):
+        s = "<קטע התחלה=foo/>text<קטע סוף=foo/>"
+        out = _preprocess_miqra_tags(s)
+        self.assertIn('<miqra:segment type="start" name="foo"/>', out)
+        self.assertIn('<miqra:segment type="end" name="foo"/>', out)
+
+
+class TestMarkupAndEscape(unittest.TestCase):
+    def test_xml_escape(self):
+        self.assertEqual(
+            _xml_escape('a & b <c> "d" \'e\''),
+            "a &amp; b &lt;c&gt; &quot;d&quot; &apos;e&apos;",
+        )
+
+    def test_wikitext_bold_italic(self):
+        self.assertEqual(
+            _wikitext_basic_markup_to_xml("plain '''bold''' ''italic''"),
+            'plain <mw:hi rend="bold">bold</mw:hi> <mw:hi rend="italic">italic</mw:hi>',
+        )
+
+    def test_wikitext_bold_italic_combined(self):
+        self.assertIn(
+            'rend="bold-italic"',
+            _wikitext_basic_markup_to_xml("'''''both'''''"),
+        )
+
+    def test_escape_outside_tags_preserves_miqra_elements(self):
+        inner = _escape_outside_tags(
+            "plain <miqra:hi rend=\"large\">א</miqra:hi> '''bold'''"
+        )
+        self.assertIn("<miqra:hi rend=\"large\">", inner)
+        self.assertIn("א", inner)
+        self.assertIn('rend="bold"', inner)
+
+    def test_wikitext_markup_in_verse_via_integration(self):
+        frag = wikitext_to_intermediate_xml("'''דבר'''")
+        self.assertIn('<mw:hi rend="bold">', frag)
+        self.assertIn("דבר", frag)
+
+
 class TestMiqraWikitext(unittest.TestCase):
     def setUp(self):
         reset_processor()
@@ -98,6 +191,166 @@ def test_yerah_ben_yomo2_shows_first_parameter_only(self):
         self.assertNotIn("אַלְפַּ֪֪יִם", frag)
         self.assertNotIn("{{ירח בן יומו-2", frag)
 
+    def test_ketiv_only_and_qeri_only(self):
+        k = wikitext_to_intermediate_xml("{{כתיב ולא קרי|כתיב}}")
+        q = wikitext_to_intermediate_xml("{{קרי ולא כתיב|קְרִי}}")
+        self.assertIn("<miqra:ketiv-only>(כתיב)</miqra:ketiv-only>", k)
+        self.assertIn("<miqra:qeri-only>[קְרִי]</miqra:qeri-only>", q)
+
+    def test_qok_if_matres(self):
+        frag = wikitext_to_intermediate_xml(
+            '{{מ:קו"כ-אם-2|display|כתיב|קְרִי}}'
+        )
+        self.assertIn("display", frag)
+        self.assertIn("<miqra:kq-matres>", frag)
+        self.assertIn("<miqra:ketiv>כתיב</miqra:ketiv>", frag)
+        self.assertIn("<miqra:qeri>קְרִי</miqra:qeri>", frag)
+
+    def test_qok_two_qeri_words(self):
+        frag = wikitext_to_intermediate_xml(
+            '{{מ:קו"כ קרי שונה מהכתיב בשתי מילים|כתיב|ק1|ק2}}'
+        )
+        self.assertIn('type="split-qeri"', frag)
+        self.assertIn("<miqra:bracketed>ק1</miqra:bracketed>", frag)
+        self.assertIn("<miqra:qeri>ק2</miqra:qeri>", frag)
+        self.assertIn("<miqra:ketiv>כתיב</miqra:ketiv>", frag)
+
+    def test_parashah_variants(self):
+        cases = [
+            ("{{פפפ}}", 'type="open-line"'),
+            ("{{סס}}", 'type="close"'),
+            ("{{ססס}}", 'type="close-inline"'),
+            ("{{סס2}}", 'type="close-narrow"'),
+            ("{{מ:ששש}}", 'type="shirah"'),
+        ]
+        for wikitext, expected in cases:
+            with self.subTest(wikitext=wikitext):
+                self.assertIn(expected, wikitext_to_intermediate_xml(wikitext))
+
+    def test_parashah_mid_verse_attribute(self):
+        frag = wikitext_to_intermediate_xml("{{פפ|פסקא באמצע פסוק}}")
+        self.assertIn('midVerse="true"', frag)
+
+    def test_poetic_levels(self):
+        for level, template in enumerate(("ר0", "ר1", "ר2", "ר3", "ר4")):
+            frag = wikitext_to_intermediate_xml(f"{{{{{template}}}}}")
+            self.assertIn(f'<miqra:poetic level="{level}"/>', frag)
+
+    def test_centered_title(self):
+        frag = wikitext_to_intermediate_xml("{{פרשה-מרכז|כותרת}}")
+        self.assertIn("<miqra:centered>כותרת</miqra:centered>", frag)
+
+    def test_letter_formatting(self):
+        frag = wikitext_to_intermediate_xml(
+            "{{מ:אות-ק|ק}}{{מ:אות תלויה|ת}}{{מ:אות מנוקדת|מ}}{{מ:נו\"ן הפוכה|ן}}"
+        )
+        self.assertIn('rend="small"', frag)
+        self.assertIn('rend="raised"', frag)
+        self.assertIn("<miqra:dotted>", frag)
+        self.assertIn("<miqra:inverted-nun>", frag)
+
+    def test_yerushalem_variants(self):
+        y = wikitext_to_intermediate_xml("{{מ:ירושלם|v|a}}")
+        ya = wikitext_to_intermediate_xml("{{מ:ירושלמה|v|a}}")
+        self.assertIn('<miqra:yerushalem vowel="v" accent="a"/>', y)
+        self.assertIn('<miqra:yerushalema vowel="v" accent="a"/>', ya)
+
+    def test_standalone_accents(self):
+        frag = wikitext_to_intermediate_xml(
+            "{{ירח בן יומו}}{{גלגל}}{{אתנח הפוך}}"
+        )
+        self.assertIn('type="yerah-ben-yomo"', frag)
+        self.assertIn('type="galgal"', frag)
+        self.assertIn('type="etnah-hafukh"', frag)
+
+    def test_taam_handlers(self):
+        frag = wikitext_to_intermediate_xml(
+            "{{מ:טעם ומתג באות אחת|א}}"
+            "{{שני טעמים באות אחת}}"
+            "{{מ:גרש ותלישא גדולה}}"
+            "{{מ:גרשיים ותלישא גדולה}}"
+        )
+        self.assertIn("א", frag)
+        self.assertIn('type="geresh-telisha-gedola"', frag)
+        self.assertIn('type="gershayim-telisha-gedola"', frag)
+
+    def test_qamats_named_params(self):
+        frag = wikitext_to_intermediate_xml("{{מ:קמץ|ד=דָּ}}")
+        self.assertIn("דָּ", frag)
+
+    def test_taam_dummy_strips_leading_marker(self):
+        frag = wikitext_to_intermediate_xml("{{מ:טעם|Xאות}}")
+        self.assertIn("אות", frag)
+        self.assertNotIn("Xאות", frag)
+
+    def test_qupo_accent(self):
+        frag = wikitext_to_intermediate_xml(
+            "{{שני טעמים באות אחת קמץ-תחתון-פתח-עליון|עליו=א}}"
+        )
+        self.assertIn('<miqra:qupo-accent above="א"/>', frag)
+
+    def test_punctuation_and_maqaf(self):
+        frag = wikitext_to_intermediate_xml(
+            "{{מ:לגרמיה-2}}{{מ:פסק}}{{מ:מקף אפור}}"
+        )
+        self.assertIn('type="legarmeh"', frag)
+        self.assertIn('type="paseq"', frag)
+        self.assertIn('rend="grey"', frag)
+
+    def test_kol_qamats_default(self):
+        self.assertIn("כָּל", wikitext_to_intermediate_xml("{{מ:כל קמץ קטן מרכא}}"))
+
+    def test_notes_and_anchors(self):
+        frag = wikitext_to_intermediate_xml(
+            "{{מ:הערה|גוף הערה}}{{עוגן בשורה|label}}"
+            "{{מ:סיום בטוב|סוף טוב}}"
+        )
+        self.assertIn("<miqra:note", frag)
+        self.assertIn("גוף הערה", frag)
+        self.assertIn('<miqra:line-anchor target="label"/>', frag)
+        self.assertIn("<miqra:good-ending>סוף טוב</miqra:good-ending>", frag)
+
+    def test_dual_trope_and_accent(self):
+        frag = wikitext_to_intermediate_xml(
+            "{{קק|target}}"
+            "{{מ:כפול|כפול=ד|א=א|ב=ב}}"
+        )
+        self.assertIn("<miqra:dual-trope-link>target</miqra:dual-trope-link>", frag)
+        self.assertIn('<miqra:dual-accent dual="ד">', frag)
+        self.assertIn('role="א"', frag)
+        self.assertIn('role="ב"', frag)
+
+    def test_emphasis_and_footnote_mark(self):
+        frag = wikitext_to_intermediate_xml("{{מודגש|חשוב}}{{ש}}")
+        self.assertIn('<mw:hi rend="bold">חשוב</mw:hi>', frag)
+        self.assertIn("<miqra:fn-mark/>", frag)
+
+    def test_wikilink(self):
+        frag = wikitext_to_intermediate_xml("[[דף]] and [[דף|תווית]]")
+        self.assertIn('<mw:link target="https://he.wikisource.org/wiki/', frag)
+        self.assertIn("תווית", frag)
+
+    def test_noinclude_stripped(self):
+        frag = wikitext_to_intermediate_xml(
+            "visible<noinclude>hidden</noinclude>still"
+        )
+        self.assertIn("visible", frag)
+        self.assertIn("still", frag)
+        self.assertNotIn("hidden", frag)
+
+    def test_keteg_segments_in_wikitext(self):
+        frag = wikitext_to_intermediate_xml("<קטע התחלה=seg/>")
+        self.assertIn('<miqra:segment type="start" name="seg"/>', frag)
+
+    def test_column_c_line_break_integration(self):
+        frag = wikitext_to_intermediate_xml("א//ב", column_c=True)
+        self.assertIn("<miqra:lb/>", frag)
+
+    def test_nosach_without_note(self):
+        frag = wikitext_to_intermediate_xml("{{נוסח|טקסט}}")
+        self.assertEqual(frag, "טקסט")
+        self.assertNotIn("<miqra:variant", frag)
+
     def test_all_templates_from_doc_have_handlers(self):
         """Every template name in templates.tsv examples is recognized."""
         from pathlib import Path

From 8f55b0caa87332d454db0b00f46d8a4b0cc755c8 Mon Sep 17 00:00:00 2001
From: Efraim Feinstein <efraim.feinstein@gmail.com>
Date: Thu, 28 May 2026 22:46:57 -0700
Subject: [PATCH 10/10] chore: set codecov to 85%

---
 codecov.yml | 11 +++++++++++
 1 file changed, 11 insertions(+)
 create mode 100644 codecov.yml

diff --git a/codecov.yml b/codecov.yml
new file mode 100644
index 0000000..3b065ab
--- /dev/null
+++ b/codecov.yml
@@ -0,0 +1,11 @@
+# https://docs.codecov.com/docs/commit-status
+coverage:
+  status:
+    project:
+      default:
+        target: 85%
+        informational: false
+    patch:
+      default:
+        target: 85%
+        informational: false