diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6dcc935..68a840b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -9,21 +9,23 @@ on: jobs: test: runs-on: ubuntu-latest + env: + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true steps: - name: Install Jing (RelaxNG validator) run: sudo apt-get update && sudo apt-get install --no-install-recommends --no-install-suggests -y jing - name: Checkout code - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: '3.13' - name: Install uv - uses: astral-sh/setup-uv@v6 + uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0 with: enable-cache: true @@ -40,8 +42,8 @@ jobs: uv run coverage xml - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v6 with: - file: ./coverage.xml + files: ./coverage.xml fail_ci_if_error: false token: ${{ secrets.CODECOV_TOKEN }} diff --git a/opensiddur/exporter/compiler.py b/opensiddur/exporter/compiler.py index 51f73ef..f0ddbcf 100644 --- a/opensiddur/exporter/compiler.py +++ b/opensiddur/exporter/compiler.py @@ -24,14 +24,12 @@ from lxml.etree import ElementBase from lxml import etree +from opensiddur.exporter.constants import JLPTEI_NAMESPACE, PROCESSING_NAMESPACE from opensiddur.exporter.linear import LinearData, get_linear_data from opensiddur.exporter.refdb import ReferenceDatabase from opensiddur.exporter.settings import load_default_settings, load_settings from opensiddur.exporter.urn import ResolvedUrnRange, UrnResolver -JLPTEI_NAMESPACE = 'http://jewishliturgy.org/ns/jlptei/2' -PROCESSING_NAMESPACE = 'http://jewishliturgy.org/ns/processing' - class _ProcessingCommand(Enum): """ Possible ways the compiler can process an element """ # copy the element and recurse into its children, copying its text content diff --git a/opensiddur/exporter/constants.py b/opensiddur/exporter/constants.py new file mode 100644 index 0000000..34f316e --- /dev/null +++ b/opensiddur/exporter/constants.py @@ -0,0 +1,18 @@ +"""Shared exporter constants (kept dependency-light to avoid circular imports).""" + +JLPTEI_NAMESPACE = "http://jewishliturgy.org/ns/jlptei/2" +PROCESSING_NAMESPACE = "http://jewishliturgy.org/ns/processing" + +TEI_NS = "http://www.tei-c.org/ns/1.0" +XML_NS = "http://www.w3.org/XML/1998/namespace" + +STRUCTURAL_BLOCKS = frozenset( + { + f"{{{TEI_NS}}}div", + f"{{{TEI_NS}}}p", + f"{{{TEI_NS}}}ab", + f"{{{TEI_NS}}}lg", + f"{{{TEI_NS}}}l", + } +) + diff --git a/opensiddur/exporter/external_compiler.py b/opensiddur/exporter/external_compiler.py index 0c1fd44..2e2e976 100644 --- a/opensiddur/exporter/external_compiler.py +++ b/opensiddur/exporter/external_compiler.py @@ -7,27 +7,26 @@ from opensiddur.exporter.compiler import ( CompilerProcessor, - JLPTEI_NAMESPACE, - PROCESSING_NAMESPACE, _ProcessingCommand, _ProcessingContext, _AnnotationCommand, ) +from opensiddur.exporter.constants import ( + JLPTEI_NAMESPACE, + PROCESSING_NAMESPACE, + STRUCTURAL_BLOCKS, + TEI_NS, + XML_NS, +) from opensiddur.exporter.linear import LinearData from opensiddur.exporter.refdb import ReferenceDatabase from opensiddur.exporter.urn import ResolvedUrnRange, UrnResolver from lxml import etree -TEI_NS = "http://www.tei-c.org/ns/1.0" -XML_NS = "http://www.w3.org/XML/1998/namespace" - -STRUCTURAL_BLOCKS = frozenset({ - f"{{{TEI_NS}}}div", - f"{{{TEI_NS}}}p", - f"{{{TEI_NS}}}ab", - f"{{{TEI_NS}}}lg", - f"{{{TEI_NS}}}l", -}) +from opensiddur.exporter.marker_reconstruct import ( + doc_needs_marker_reconstruction, + reconstruct_markered_document, +) def _attrs_structural_original(source: ElementBase) -> dict[str, str]: @@ -761,8 +760,14 @@ def process(self, root: Optional[ElementBase] = None) -> list[ElementBase]: # Root parallel trigger is_root = len(self.linear_data.processing_context) == 0 + def _reconstruct_if_needed(processed: list[ElementBase]) -> None: + if processed and doc_needs_marker_reconstruction(processed[0]): + reconstruct_markered_document(processed[0]) + if is_root and self.linear_data.parallel_projects and not self._in_parallel_compilation: - return self._process_parallel_root() + processed = self._process_parallel_root() + _reconstruct_if_needed(processed) + return processed # set the root language to the language of the deepest common ancestor if present, else root self.root_language = self._get_in_scope_language( @@ -790,4 +795,7 @@ def process(self, root: Optional[ElementBase] = None) -> list[ElementBase]: if self.from_start is None and processed: self._mark_file_source(processed[0]) + if is_root: + _reconstruct_if_needed(processed) + return processed diff --git a/opensiddur/exporter/inline_compiler.py b/opensiddur/exporter/inline_compiler.py index 40810ae..a6a433f 100644 --- a/opensiddur/exporter/inline_compiler.py +++ b/opensiddur/exporter/inline_compiler.py @@ -5,11 +5,11 @@ from opensiddur.exporter.compiler import ( CompilerProcessor, - PROCESSING_NAMESPACE, _ProcessingCommand, _ProcessingContext, _AnnotationCommand, ) +from opensiddur.exporter.constants import PROCESSING_NAMESPACE from opensiddur.exporter.external_compiler import ExternalCompilerProcessor from opensiddur.exporter.linear import LinearData from opensiddur.exporter.refdb import ReferenceDatabase diff --git a/opensiddur/exporter/tex/marker_reconstruct.py b/opensiddur/exporter/marker_reconstruct.py similarity index 97% rename from opensiddur/exporter/tex/marker_reconstruct.py rename to opensiddur/exporter/marker_reconstruct.py index 6bc4037..10c03fb 100644 --- a/opensiddur/exporter/tex/marker_reconstruct.py +++ b/opensiddur/exporter/marker_reconstruct.py @@ -11,7 +11,7 @@ from lxml import etree -from opensiddur.exporter.external_compiler import PROCESSING_NAMESPACE, STRUCTURAL_BLOCKS +from opensiddur.exporter.constants import PROCESSING_NAMESPACE, STRUCTURAL_BLOCKS _P_START = f"{{{PROCESSING_NAMESPACE}}}start" _P_END = f"{{{PROCESSING_NAMESPACE}}}end" @@ -70,11 +70,7 @@ def _absorb_marker_strings(frame: _Frame, el: etree.ElementBase) -> None: def _carrier_attrs_from_marker_el(el: etree.ElementBase) -> dict[str, str]: p_pref = f"{{{PROCESSING_NAMESPACE}}}" xml_id_key = "{http://www.w3.org/XML/1998/namespace}id" - return { - k: v - for k, v in el.attrib.items() - if k != xml_id_key and not k.startswith(p_pref) - } + return {k: v for k, v in el.attrib.items() if k != xml_id_key and not k.startswith(p_pref)} def _new_wrapped_segment( @@ -293,3 +289,4 @@ def reconstruct_markered_document(root: etree.ElementBase) -> None: header = root.find(".//{http://www.tei-c.org/ns/1.0}teiHeader") if header is not None: _strip_stray_processing_markers_under(header) + diff --git a/opensiddur/exporter/tex/xelatex.py b/opensiddur/exporter/tex/xelatex.py index 2862f54..2f609b2 100644 --- a/opensiddur/exporter/tex/xelatex.py +++ b/opensiddur/exporter/tex/xelatex.py @@ -20,10 +20,6 @@ sys.path.insert(0, str(project_root)) from opensiddur.common.xslt import xslt_transform, xslt_transform_string -from opensiddur.exporter.tex.marker_reconstruct import ( - doc_needs_marker_reconstruction, - reconstruct_markered_document, -) XSLT_FILE = Path(__file__).parent / "xelatex.xslt" @@ -324,10 +320,7 @@ def transform_xml_to_tex(input_file, xslt_file=XSLT_FILE, output_file=None): input_xml = input_fd.read() root = etree.fromstring(input_xml.encode("utf-8")) - if doc_needs_marker_reconstruction(root): - reconstruct_markered_document(root) - input_xml = etree.tostring(root, encoding="unicode", xml_declaration=False) - + file_references = get_file_references(input_file, projects_source_root) licenses = extract_licenses(file_references) diff --git a/opensiddur/tests/exporter/test_marker_reconstruct.py b/opensiddur/tests/exporter/test_marker_reconstruct.py index 2bea9fc..dbdb90f 100644 --- a/opensiddur/tests/exporter/test_marker_reconstruct.py +++ b/opensiddur/tests/exporter/test_marker_reconstruct.py @@ -1,4 +1,4 @@ -"""Tests for flattened parallel marker reconstruction (Pass 1 before XeLaTeX).""" +"""Tests for flattened parallel marker reconstruction (compiler output stage).""" import unittest import unittest.mock @@ -7,13 +7,13 @@ from opensiddur.common.xslt import xslt_transform_string from opensiddur.exporter.external_compiler import PROCESSING_NAMESPACE, TEI_NS -from opensiddur.exporter.tex.marker_reconstruct import ( +from opensiddur.exporter.marker_reconstruct import ( doc_needs_marker_reconstruction, reconstruct_markered_document, reconstruct_parallel_item, substantive_content, ) -from opensiddur.exporter.tex import marker_reconstruct as mr +from opensiddur.exporter import marker_reconstruct as mr from opensiddur.exporter.tex.xelatex import XSLT_FILE P_NS = PROCESSING_NAMESPACE diff --git a/opensiddur/tests/exporter/test_parallel_e2e.py b/opensiddur/tests/exporter/test_parallel_e2e.py index 6661c8c..562e141 100644 --- a/opensiddur/tests/exporter/test_parallel_e2e.py +++ b/opensiddur/tests/exporter/test_parallel_e2e.py @@ -216,34 +216,29 @@ def test_no_parallel_fallback_when_parallel_file_missing(self): class TestMarkerStructureE2E(_E2EBase): def test_structural_elements_with_milestones_get_markers(self): - """tei:p with active parallel milestones should produce p:start/p:end markers.""" + """Compiler output should be reconstructed (no raw p:start/p:end markers).""" result = self._compile_primary() all_xml = "".join(etree.tostring(el, encoding="unicode") for el in result) root = etree.fromstring(f"{all_xml}") - # Look for p:start attributes on tei:p elements - start_markers = root.findall(f".//{{{TEI_NS}}}p[@{{{P_NS}}}start]") - end_markers = root.findall(f".//{{{TEI_NS}}}p[@{{{P_NS}}}end]") - self.assertGreater(len(start_markers), 0, "Expected p:start markers on tei:p") - self.assertGreater(len(end_markers), 0, "Expected p:end markers on tei:p") + # Reconstruction happens in the compiler now, so markers should not survive + start_markers = root.findall(f".//*[@{{{P_NS}}}start]") + end_markers = root.findall(f".//*[@{{{P_NS}}}end]") + self.assertEqual(len(start_markers), 0, "p:start markers should be consumed by reconstruction") + self.assertEqual(len(end_markers), 0, "p:end markers should be consumed by reconstruction") + + # Still expect parallel structure to exist in compiled output + parallels = root.findall(f".//{{{P_NS}}}parallel") + self.assertGreater(len(parallels), 0) def test_start_end_marker_ids_match(self): - """Every p:start ID should have a corresponding p:end ID.""" + """Legacy marker pairing test: markers should not be present post-reconstruct.""" result = self._compile_primary() all_xml = "".join(etree.tostring(el, encoding="unicode") for el in result) root = etree.fromstring(f"{all_xml}") - start_ids = set() - end_ids = set() - for el in root.iter(): - sid = el.get(f"{{{P_NS}}}start") - eid = el.get(f"{{{P_NS}}}end") - if sid: - start_ids.add(sid) - if eid: - end_ids.add(eid) - - self.assertEqual(start_ids, end_ids, "Every p:start should have a matching p:end") + self.assertEqual(len(root.findall(f".//*[@{{{P_NS}}}start]")), 0) + self.assertEqual(len(root.findall(f".//*[@{{{P_NS}}}end]")), 0) def test_column_order_attribute(self): """p:parallel elements should have a column-order attribute."""