|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +_scripts/inject_hashes.py |
| 4 | +
|
| 5 | +Post-processes every HTML file in _site/ to add a data-block-hash attribute |
| 6 | +to each <pre> element whose code block has a compiled WASM binary in |
| 7 | +assets/wasm/blocks/. |
| 8 | +
|
| 9 | +Run this AFTER `bundle exec jekyll build` and AFTER `compile_blocks.py`: |
| 10 | +
|
| 11 | + python _scripts/compile_blocks.py |
| 12 | + bundle exec jekyll build --baseurl /docs |
| 13 | + python _scripts/inject_hashes.py |
| 14 | +
|
| 15 | +The JS (main.js) then reads pre.dataset.blockHash directly instead of |
| 16 | +re-computing a hash from code.textContent, eliminating the fragile |
| 17 | +browser-side hash computation. |
| 18 | +""" |
| 19 | + |
| 20 | +import hashlib |
| 21 | +import html as html_mod |
| 22 | +import re |
| 23 | +from pathlib import Path |
| 24 | + |
| 25 | +BLOCKS_DIR = Path('assets/wasm/blocks') |
| 26 | +SITE_DIR = Path('_site') |
| 27 | + |
| 28 | +NON_EXECUTABLE = { |
| 29 | + 'bash', 'sh', 'shell', 'powershell', 'cmd', |
| 30 | + 'js', 'javascript', 'markdown', 'dockerfile', |
| 31 | + 'yaml', 'toml', 'json', 'plaintext', 'text', 'output', 'wat', 'rust', '', |
| 32 | +} |
| 33 | + |
| 34 | +# Pre-build the set of available hashes for fast lookup. |
| 35 | +_available = {p.stem for p in BLOCKS_DIR.glob('*.wasm')} |
| 36 | + |
| 37 | + |
| 38 | +def _lang_from_attrs(attrs: str) -> str: |
| 39 | + m = re.search(r'language-(\w+)', attrs or '') |
| 40 | + return m.group(1).lower() if m else '' |
| 41 | + |
| 42 | + |
| 43 | +def _text_content(html_str: str) -> str: |
| 44 | + """Strip HTML tags and decode entities — equivalent to element.textContent.""" |
| 45 | + return html_mod.unescape(re.sub(r'<[^>]+>', '', html_str)) |
| 46 | + |
| 47 | + |
| 48 | +def _sha16(text: str) -> str: |
| 49 | + return hashlib.sha256(text.encode('utf-8')).hexdigest()[:16] |
| 50 | + |
| 51 | + |
| 52 | +# Matches <pre ...><code class="...">...</code></pre> (single code child). |
| 53 | +_PRE_RE = re.compile( |
| 54 | + r'(<pre\b)([^>]*)(>)\s*(<code\b([^>]*)>)(.*?)</code>\s*</pre>', |
| 55 | + re.DOTALL, |
| 56 | +) |
| 57 | + |
| 58 | + |
| 59 | +def _inject(content: str) -> str: |
| 60 | + def _replace(m: re.Match) -> str: |
| 61 | + pre_tag = m.group(1) # '<pre' |
| 62 | + pre_attrs = m.group(2) # everything between '<pre' and '>' |
| 63 | + pre_close = m.group(3) # '>' |
| 64 | + code_open = m.group(4) # full '<code ...>' |
| 65 | + code_attrs = m.group(5) # attrs inside <code> |
| 66 | + code_body = m.group(6) # raw HTML inside <code> |
| 67 | + |
| 68 | + # Skip if hash already injected (idempotent). |
| 69 | + if 'data-block-hash' in pre_attrs: |
| 70 | + return m.group(0) |
| 71 | + |
| 72 | + lang = _lang_from_attrs(code_attrs) |
| 73 | + if lang in NON_EXECUTABLE: |
| 74 | + return m.group(0) |
| 75 | + |
| 76 | + text = _text_content(code_body).strip() |
| 77 | + if not text: |
| 78 | + return m.group(0) |
| 79 | + |
| 80 | + h = _sha16(text) |
| 81 | + if h not in _available: |
| 82 | + return m.group(0) |
| 83 | + |
| 84 | + return f'{pre_tag}{pre_attrs} data-block-hash="{h}"{pre_close}{code_open}{code_body}</code></pre>' |
| 85 | + |
| 86 | + return _PRE_RE.sub(_replace, content) |
| 87 | + |
| 88 | + |
| 89 | +def main() -> None: |
| 90 | + if not SITE_DIR.exists(): |
| 91 | + print(f'ERROR: {SITE_DIR} not found — run `bundle exec jekyll build` first.') |
| 92 | + raise SystemExit(1) |
| 93 | + if not BLOCKS_DIR.exists() or not _available: |
| 94 | + print(f'WARNING: no compiled WASM blocks found in {BLOCKS_DIR}.') |
| 95 | + |
| 96 | + modified = 0 |
| 97 | + for html_file in SITE_DIR.rglob('*.html'): |
| 98 | + original = html_file.read_text(encoding='utf-8') |
| 99 | + updated = _inject(original) |
| 100 | + if updated != original: |
| 101 | + html_file.write_text(updated, encoding='utf-8') |
| 102 | + modified += 1 |
| 103 | + |
| 104 | + total = sum(1 for _ in SITE_DIR.rglob('*.html')) |
| 105 | + print(f'inject_hashes: {modified}/{total} HTML files updated ' |
| 106 | + f'({len(_available)} WASM blocks available).') |
| 107 | + |
| 108 | + |
| 109 | +if __name__ == '__main__': |
| 110 | + main() |
0 commit comments