From e95a26d15699acb7621624b9f217f4ed2e8d291e Mon Sep 17 00:00:00 2001 From: Paddy Lee Date: Fri, 27 Mar 2026 12:57:21 +0800 Subject: [PATCH] Add HTML report generator --- generate_html.py | 4 + src/utils/generate_html.py | 635 ++++++++++++++++++++++ templates/agents/session_instructions.txt | 7 +- 3 files changed, 645 insertions(+), 1 deletion(-) create mode 100644 generate_html.py create mode 100644 src/utils/generate_html.py diff --git a/generate_html.py b/generate_html.py new file mode 100644 index 0000000..095e788 --- /dev/null +++ b/generate_html.py @@ -0,0 +1,4 @@ +"""Thin wrapper so you can run `python generate_html.py ` from the repo root.""" +from src.utils.generate_html import main +if __name__ == "__main__": + main() diff --git a/src/utils/generate_html.py b/src/utils/generate_html.py new file mode 100644 index 0000000..ca87166 --- /dev/null +++ b/src/utils/generate_html.py @@ -0,0 +1,635 @@ +""" +HTML Generator +Converts REPORT.md + figures into a self-contained styled HTML blog post. + +Usage: + python generate_html.py + python generate_html.py + python generate_html.py --output REPORT.html --title "My Experiment" +""" + +import argparse +import base64 +import mimetypes +import re +import sys +from datetime import datetime +from pathlib import Path + + +# Dependency check +def _check_deps(): + missing = [] + try: + import markdown # noqa: F401 + except ImportError: + missing.append("markdown") + try: + import pygments # noqa: F401 + except ImportError: + missing.append("pygments") + if missing: + print(f"[ERROR] Missing dependencies: {', '.join(missing)}") + print(f"Install with: pip install {' '.join(missing)}") + sys.exit(1) + +_check_deps() + +import markdown +from markdown.extensions.codehilite import CodeHiliteExtension +from markdown.extensions.fenced_code import FencedCodeExtension +from markdown.extensions.tables import TableExtension +from markdown.extensions.toc import TocExtension +from pygments.formatters import HtmlFormatter + + +# Figure discovery +IMAGE_SUFFIXES = {".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp"} +FIGURE_DIRS = [".", "results", "figures", "paper_draft/figures", "plots", "output"] + + +def _find_figure(name: str, base: Path) -> Path | None: + """Resolve an image reference relative to base, checking common figure dirs.""" + candidate = (base / name).resolve() + if candidate.exists(): + return candidate + for d in FIGURE_DIRS: + candidate = (base / d / Path(name).name).resolve() + if candidate.exists(): + return candidate + return None + + +def _embed_image(path: Path) -> str: + """Return a data URI for the given image file.""" + mime, _ = mimetypes.guess_type(str(path)) + mime = mime or "image/png" + data = base64.b64encode(path.read_bytes()).decode() + return f"data:{mime};base64,{data}" + + +def _replace_image_refs(md_text: str, base: Path) -> str: + """Replace file-based image src attributes with base64 data URIs.""" + def replacer(m): + alt, src = m.group(1), m.group(2) + if src.startswith("http://") or src.startswith("https://") or src.startswith("data:"): + return m.group(0) + resolved = _find_figure(src, base) + if resolved: + return f"![{alt}]({_embed_image(resolved)})" + return m.group(0) # leave unchanged if not found + + return re.sub(r"!\[([^\]]*)\]\(([^)]+)\)", replacer, md_text) + + +# md -> HTML +def _ensure_blank_line_before_tables(text: str) -> str: + """Insert a blank line before table rows that aren't already preceded by one. + + The nl2br extension prevents the tables extension from detecting pipe-table + syntax unless the table block is separated from the preceding text by a blank + line. This pre-pass normalises the markdown without touching the source file. + """ + lines = text.splitlines() + out = [] + for i, line in enumerate(lines): + if line.startswith("|") and i > 0 and out and out[-1].strip() != "": + # Only insert if the previous non-empty line is not itself a table row + prev = out[-1] + if not prev.startswith("|"): + out.append("") + out.append(line) + return "\n".join(out) + + +def _convert_markdown(text: str) -> tuple[str, str]: + """Return (body_html, toc_html).""" + text = _ensure_blank_line_before_tables(text) + toc_ext = TocExtension(permalink=True, title="Contents") + hilite_ext = CodeHiliteExtension(guess_lang=False, linenums=False) + md = markdown.Markdown( + extensions=[ + toc_ext, + hilite_ext, + FencedCodeExtension(), + TableExtension(), + "meta", + "nl2br", + "sane_lists", + "smarty", + ] + ) + body = md.convert(text) + toc = md.toc # type: ignore[attr-defined] + return body, toc + +# Metadata extraction +def _extract_title(md_text: str, fallback: str) -> str: + m = re.search(r"^#\s+(.+)$", md_text, re.MULTILINE) + return m.group(1).strip() if m else fallback + + +def _extract_meta(workspace: Path) -> dict: + """Pull experiment metadata from idea.yaml if present.""" + meta: dict = {} + idea_path = workspace / ".neurico" / "idea.yaml" + if not idea_path.exists(): + return meta + try: + import yaml + data = yaml.safe_load(idea_path.read_text()) + if isinstance(data, dict): + meta["domain"] = data.get("domain", "") + meta["author"] = data.get("author", "") + except Exception: + pass + return meta + + +# CSS / HTML template +def _pygments_css() -> str: + return HtmlFormatter(style="github-dark").get_style_defs(".codehilite") + + +_HTML_TEMPLATE = """\ + + + + + + {title} + + + + + + +
+ + +
+ + {body} +
+ +
+ + + + + + +""" + + +# Main logic +def _build_meta_pills(title: str, workspace: Path, extra_meta: dict, date_str: str) -> str: + pills = [] + if extra_meta.get("domain"): + pills.append(f'{extra_meta["domain"]}') + if extra_meta.get("author"): + pills.append(f'{extra_meta["author"]}') + pills.append(f'{date_str}') + if workspace.name: + pills.append(f'{workspace.name}') + return "\n ".join(pills) + + +def generate(report_path: Path, output_path: Path | None = None, title_override: str | None = None): + if report_path.is_dir(): + report_path = report_path / "REPORT.md" + + if not report_path.exists(): + print(f"[ERROR] File not found: {report_path}") + sys.exit(1) + + workspace = report_path.parent + md_text = report_path.read_text(encoding="utf-8") + + # Embed images + md_text = _replace_image_refs(md_text, workspace) + + # Determine title + title = title_override or _extract_title(md_text, report_path.stem) + + # Remove the leading # heading from body (it renders via h1 in the gradient style) + # We keep it so TocExtension can still index it; the heading will appear in the HTML. + + # Convert + body_html, toc_html = _convert_markdown(md_text) + + # Metadata + extra_meta = _extract_meta(workspace) + date_str = datetime.now().strftime("%B %d, %Y") + meta_pills = _build_meta_pills(title, workspace, extra_meta, date_str) + + # Render + html = _HTML_TEMPLATE.format( + title=title, + pygments_css=_pygments_css(), + toc=toc_html, + meta_pills=meta_pills, + body=body_html, + generated_at=datetime.now().strftime("%Y-%m-%d %H:%M"), + ) + + # Output + if output_path is None: + output_path = workspace / "REPORT.html" + + output_path.write_text(html, encoding="utf-8") + print(f"[OK] Report written to: {output_path}") + return output_path + + +# CLI +def main(): + parser = argparse.ArgumentParser( + description="Convert NeuriCo REPORT.md + figures into a styled HTML blog post.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + parser.add_argument( + "input", + help="Path to REPORT.md file or workspace directory containing it", + ) + parser.add_argument( + "--output", "-o", + help="Output HTML file path (default: REPORT.html next to REPORT.md)", + ) + parser.add_argument( + "--title", "-t", + help="Override the page title (default: first H1 in REPORT.md)", + ) + args = parser.parse_args() + + report_path = Path(args.input).expanduser().resolve() + output_path = Path(args.output).expanduser().resolve() if args.output else None + + generate(report_path, output_path, args.title) + + +if __name__ == "__main__": + main() diff --git a/templates/agents/session_instructions.txt b/templates/agents/session_instructions.txt index 6609cc7..68fcf75 100644 --- a/templates/agents/session_instructions.txt +++ b/templates/agents/session_instructions.txt @@ -337,7 +337,12 @@ When you reach Phase 6, create these files with ACTUAL results from your experim Include key visualizations/tables inline (as markdown). -2. README.md - Quick overview containing: +2. REPORT.html - After writing REPORT.md, generate the HTML report by executing the following: + + python3 /path/to/NeuriCo/src/utils/generate_html.py . --output REPORT.html + + This produces a self-contained REPORT.html with embedded figures alongside REPORT.md. +3. README.md - Quick overview containing: - Brief project description (2-3 sentences) - Key findings summary (bullet points, 3-5 main results from your experiments) - How to reproduce (environment setup, run instructions)