Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,23 @@ on:
jobs:
test:
runs-on: ubuntu-latest
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true

steps:
- name: Install Jing (RelaxNG validator)
run: sudo apt-get update && sudo apt-get install --no-install-recommends --no-install-suggests -y jing

- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@v5

- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: '3.13'

- name: Install uv
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
with:
enable-cache: true

Expand All @@ -40,8 +42,8 @@ jobs:
uv run coverage xml

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
uses: codecov/codecov-action@v6
with:
file: ./coverage.xml
files: ./coverage.xml
fail_ci_if_error: false
token: ${{ secrets.CODECOV_TOKEN }}
4 changes: 1 addition & 3 deletions opensiddur/exporter/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,12 @@
from lxml.etree import ElementBase
from lxml import etree

from opensiddur.exporter.constants import JLPTEI_NAMESPACE, PROCESSING_NAMESPACE
from opensiddur.exporter.linear import LinearData, get_linear_data
from opensiddur.exporter.refdb import ReferenceDatabase
from opensiddur.exporter.settings import load_default_settings, load_settings
from opensiddur.exporter.urn import ResolvedUrnRange, UrnResolver

JLPTEI_NAMESPACE = 'http://jewishliturgy.org/ns/jlptei/2'
PROCESSING_NAMESPACE = 'http://jewishliturgy.org/ns/processing'

class _ProcessingCommand(Enum):
""" Possible ways the compiler can process an element """
# copy the element and recurse into its children, copying its text content
Expand Down
18 changes: 18 additions & 0 deletions opensiddur/exporter/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Shared exporter constants (kept dependency-light to avoid circular imports)."""

JLPTEI_NAMESPACE = "http://jewishliturgy.org/ns/jlptei/2"
PROCESSING_NAMESPACE = "http://jewishliturgy.org/ns/processing"

TEI_NS = "http://www.tei-c.org/ns/1.0"
XML_NS = "http://www.w3.org/XML/1998/namespace"

STRUCTURAL_BLOCKS = frozenset(
{
f"{{{TEI_NS}}}div",
f"{{{TEI_NS}}}p",
f"{{{TEI_NS}}}ab",
f"{{{TEI_NS}}}lg",
f"{{{TEI_NS}}}l",
}
)

34 changes: 21 additions & 13 deletions opensiddur/exporter/external_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,26 @@

from opensiddur.exporter.compiler import (
CompilerProcessor,
JLPTEI_NAMESPACE,
PROCESSING_NAMESPACE,
_ProcessingCommand,
_ProcessingContext,
_AnnotationCommand,
)
from opensiddur.exporter.constants import (
JLPTEI_NAMESPACE,
PROCESSING_NAMESPACE,
STRUCTURAL_BLOCKS,
TEI_NS,
XML_NS,
)
from opensiddur.exporter.linear import LinearData
from opensiddur.exporter.refdb import ReferenceDatabase
from opensiddur.exporter.urn import ResolvedUrnRange, UrnResolver
from lxml import etree

TEI_NS = "http://www.tei-c.org/ns/1.0"
XML_NS = "http://www.w3.org/XML/1998/namespace"

STRUCTURAL_BLOCKS = frozenset({
f"{{{TEI_NS}}}div",
f"{{{TEI_NS}}}p",
f"{{{TEI_NS}}}ab",
f"{{{TEI_NS}}}lg",
f"{{{TEI_NS}}}l",
})
from opensiddur.exporter.marker_reconstruct import (
doc_needs_marker_reconstruction,
reconstruct_markered_document,
)


def _attrs_structural_original(source: ElementBase) -> dict[str, str]:
Expand Down Expand Up @@ -761,8 +760,14 @@ def process(self, root: Optional[ElementBase] = None) -> list[ElementBase]:

# Root parallel trigger
is_root = len(self.linear_data.processing_context) == 0
def _reconstruct_if_needed(processed: list[ElementBase]) -> None:
if processed and doc_needs_marker_reconstruction(processed[0]):
reconstruct_markered_document(processed[0])

if is_root and self.linear_data.parallel_projects and not self._in_parallel_compilation:
return self._process_parallel_root()
processed = self._process_parallel_root()
_reconstruct_if_needed(processed)
return processed

# set the root language to the language of the deepest common ancestor if present, else root
self.root_language = self._get_in_scope_language(
Expand Down Expand Up @@ -790,4 +795,7 @@ def process(self, root: Optional[ElementBase] = None) -> list[ElementBase]:
if self.from_start is None and processed:
self._mark_file_source(processed[0])

if is_root:
_reconstruct_if_needed(processed)

return processed
2 changes: 1 addition & 1 deletion opensiddur/exporter/inline_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@

from opensiddur.exporter.compiler import (
CompilerProcessor,
PROCESSING_NAMESPACE,
_ProcessingCommand,
_ProcessingContext,
_AnnotationCommand,
)
from opensiddur.exporter.constants import PROCESSING_NAMESPACE
from opensiddur.exporter.external_compiler import ExternalCompilerProcessor
from opensiddur.exporter.linear import LinearData
from opensiddur.exporter.refdb import ReferenceDatabase
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from lxml import etree

from opensiddur.exporter.external_compiler import PROCESSING_NAMESPACE, STRUCTURAL_BLOCKS
from opensiddur.exporter.constants import PROCESSING_NAMESPACE, STRUCTURAL_BLOCKS

_P_START = f"{{{PROCESSING_NAMESPACE}}}start"
_P_END = f"{{{PROCESSING_NAMESPACE}}}end"
Expand Down Expand Up @@ -70,11 +70,7 @@ def _absorb_marker_strings(frame: _Frame, el: etree.ElementBase) -> None:
def _carrier_attrs_from_marker_el(el: etree.ElementBase) -> dict[str, str]:
p_pref = f"{{{PROCESSING_NAMESPACE}}}"
xml_id_key = "{http://www.w3.org/XML/1998/namespace}id"
return {
k: v
for k, v in el.attrib.items()
if k != xml_id_key and not k.startswith(p_pref)
}
return {k: v for k, v in el.attrib.items() if k != xml_id_key and not k.startswith(p_pref)}


def _new_wrapped_segment(
Expand Down Expand Up @@ -293,3 +289,4 @@ def reconstruct_markered_document(root: etree.ElementBase) -> None:
header = root.find(".//{http://www.tei-c.org/ns/1.0}teiHeader")
if header is not None:
_strip_stray_processing_markers_under(header)

9 changes: 1 addition & 8 deletions opensiddur/exporter/tex/xelatex.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@
sys.path.insert(0, str(project_root))

from opensiddur.common.xslt import xslt_transform, xslt_transform_string
from opensiddur.exporter.tex.marker_reconstruct import (
doc_needs_marker_reconstruction,
reconstruct_markered_document,
)

XSLT_FILE = Path(__file__).parent / "xelatex.xslt"

Expand Down Expand Up @@ -324,10 +320,7 @@ def transform_xml_to_tex(input_file, xslt_file=XSLT_FILE, output_file=None):
input_xml = input_fd.read()

root = etree.fromstring(input_xml.encode("utf-8"))
if doc_needs_marker_reconstruction(root):
reconstruct_markered_document(root)
input_xml = etree.tostring(root, encoding="unicode", xml_declaration=False)


file_references = get_file_references(input_file, projects_source_root)

licenses = extract_licenses(file_references)
Expand Down
6 changes: 3 additions & 3 deletions opensiddur/tests/exporter/test_marker_reconstruct.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Tests for flattened parallel marker reconstruction (Pass 1 before XeLaTeX)."""
"""Tests for flattened parallel marker reconstruction (compiler output stage)."""

import unittest
import unittest.mock
Expand All @@ -7,13 +7,13 @@

from opensiddur.common.xslt import xslt_transform_string
from opensiddur.exporter.external_compiler import PROCESSING_NAMESPACE, TEI_NS
from opensiddur.exporter.tex.marker_reconstruct import (
from opensiddur.exporter.marker_reconstruct import (
doc_needs_marker_reconstruction,
reconstruct_markered_document,
reconstruct_parallel_item,
substantive_content,
)
from opensiddur.exporter.tex import marker_reconstruct as mr
from opensiddur.exporter import marker_reconstruct as mr
from opensiddur.exporter.tex.xelatex import XSLT_FILE

P_NS = PROCESSING_NAMESPACE
Expand Down
31 changes: 13 additions & 18 deletions opensiddur/tests/exporter/test_parallel_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,34 +216,29 @@ def test_no_parallel_fallback_when_parallel_file_missing(self):
class TestMarkerStructureE2E(_E2EBase):

def test_structural_elements_with_milestones_get_markers(self):
"""tei:p with active parallel milestones should produce p:start/p:end markers."""
"""Compiler output should be reconstructed (no raw p:start/p:end markers)."""
result = self._compile_primary()
all_xml = "".join(etree.tostring(el, encoding="unicode") for el in result)
root = etree.fromstring(f"<root>{all_xml}</root>")

# Look for p:start attributes on tei:p elements
start_markers = root.findall(f".//{{{TEI_NS}}}p[@{{{P_NS}}}start]")
end_markers = root.findall(f".//{{{TEI_NS}}}p[@{{{P_NS}}}end]")
self.assertGreater(len(start_markers), 0, "Expected p:start markers on tei:p")
self.assertGreater(len(end_markers), 0, "Expected p:end markers on tei:p")
# Reconstruction happens in the compiler now, so markers should not survive
start_markers = root.findall(f".//*[@{{{P_NS}}}start]")
end_markers = root.findall(f".//*[@{{{P_NS}}}end]")
self.assertEqual(len(start_markers), 0, "p:start markers should be consumed by reconstruction")
self.assertEqual(len(end_markers), 0, "p:end markers should be consumed by reconstruction")

# Still expect parallel structure to exist in compiled output
parallels = root.findall(f".//{{{P_NS}}}parallel")
self.assertGreater(len(parallels), 0)

def test_start_end_marker_ids_match(self):
"""Every p:start ID should have a corresponding p:end ID."""
"""Legacy marker pairing test: markers should not be present post-reconstruct."""
result = self._compile_primary()
all_xml = "".join(etree.tostring(el, encoding="unicode") for el in result)
root = etree.fromstring(f"<root>{all_xml}</root>")

start_ids = set()
end_ids = set()
for el in root.iter():
sid = el.get(f"{{{P_NS}}}start")
eid = el.get(f"{{{P_NS}}}end")
if sid:
start_ids.add(sid)
if eid:
end_ids.add(eid)

self.assertEqual(start_ids, end_ids, "Every p:start should have a matching p:end")
self.assertEqual(len(root.findall(f".//*[@{{{P_NS}}}start]")), 0)
self.assertEqual(len(root.findall(f".//*[@{{{P_NS}}}end]")), 0)

def test_column_order_attribute(self):
"""p:parallel elements should have a column-order attribute."""
Expand Down