diff --git a/.gitignore b/.gitignore index 39c39c189743..829e0075be7c 100644 --- a/.gitignore +++ b/.gitignore @@ -107,6 +107,7 @@ result doc/unreleased.rst doc/refman-html doc/refman-pdf +doc/refman-indices doc/common/version.tex doc/faq/html/ doc/faq/axioms.eps diff --git a/Makefile b/Makefile index c604a4a5abd4..20d0bbadbd1f 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ # Dune Makefile for Rocq .PHONY: help help-install states world rocqide watch check # Main developer targets -.PHONY: refman-html refman-pdf corelib-html apidoc # Documentation targets +.PHONY: refman-html refman-pdf refman-indices corelib-html apidoc # Documentation targets .PHONY: test-suite dev-targets .PHONY: fmt ocheck obuild ireport clean # Maintenance targets .PHONY: dunestrap release install # Miscellaneous @@ -63,6 +63,7 @@ help: @echo "" @echo " - refman-html: build Rocq's reference manual [HTML version]" @echo " - refman-pdf: build Rocq's reference manual [PDF version]" + @echo " - refman-indices: build Rocq's JSON indices" @echo " - corelib-html: build Rocq's Corelib documentation [HTML version]" @echo " - apidoc: build ML API documentation" @echo "" @@ -192,6 +193,10 @@ refman-pdf: world doc/unreleased.rst +$(WITHPYPATH) $(WITHJOBS) dune exec -- sphinx-build -q -W -b latex doc/sphinx doc/refman-pdf $(MAKE) -C doc/refman-pdf LATEXMKOPTS=-silent +refman-indices: world doc/unreleased.rst + rm -rf doc/refman-indices + $(WITHPYPATH) dune exec -- sphinx-build -q -W -b indices doc/sphinx doc/refman-indices + corelib-html: dunestrap dune build @corelib-html diff --git a/doc/README.md b/doc/README.md index ee32e0e56cf9..913d791259f5 100644 --- a/doc/README.md +++ b/doc/README.md @@ -71,6 +71,71 @@ Or if you want to use less disk space: apt install texlive-latex-extra texlive-fonts-recommended texlive-xetex \ latexmk fonts-freefont-otf +### JSON indices + +The documentation generator can also produce computer-readable JSON files for individual indices +with `make refman-indices`. These files are meant to be useful for tool developers, for instance +to power autocompletion in editors. They output files are found in `doc/refman-indices/*.json`. + +Each JSON index has the following shape: + +```typescript +{ + // a key is the name of the index entry + [string]: { + // the refman documentation page path for that entry + "documentation_path": string. + // the anchor on that page that leads to the documentation entry + "documentation_anchor": string, + // the grammar description + "syntax": Syntax, + // a simpler, plain text documentation + "documentation": string, + } +} +``` + +Where `Syntax` can be used to match some text against the index entry or to propose the +user a code snippet. It is of the following shape: + +```ts +type Syntax = + | ["Literal", Literal] + | ["Reference", Reference] + | ["Alternative", Alternative] + | ["Repeat", Repeat] + +// the literal string +type Literal = { + value: string + // an optional subscript, for example "1" which should be rendered as "₁" + subscript: string | null +} + +// a reference to a different entry in the documentation +type Reference = { + value: string + // an optional subscript, for example "1" which should be rendered as "₁" + subscript: string | null +} + +// an alternative between a list of options +type Alternative = { + children: Syntax[] +} + +// repetition of a list of syntax nodes +type Repeat = { + // nodes have to repeat at least this many times + min: number + // nodes can repeat up to and including this many times + max: number | null + // each repetition has to be separated with this + separator: string | null + children: Syntax[] +} +``` + ### Setting the locale for Python Make sure that the locale is configured on your platform so that Python encodes @@ -106,6 +171,9 @@ The current documentation targets are: - `make corelib-html` Build Rocq core library documentation into `_build/default/doc/corelib/html` +- `make refman-indices` + Build the JSON indices into `doc/refman-indices` + - `make apidoc` Build the ML API's documentation into `_build/default/_doc/_html` diff --git a/doc/tools/rocqrst/notations/object.py b/doc/tools/rocqrst/notations/object.py new file mode 100644 index 000000000000..170bd54ceba0 --- /dev/null +++ b/doc/tools/rocqrst/notations/object.py @@ -0,0 +1,122 @@ +from dataclasses import asdict, dataclass +from typing import Self + +from .parsing import parse +from .TacticNotationsParser import TacticNotationsParser +from .TacticNotationsVisitor import TacticNotationsVisitor + + +class NotationObject: + def asdict(self): + return asdict(self) + +TaggedNotationObject = tuple[str, NotationObject] + +@dataclass +class Literal(NotationObject): + value: str + subscript: str | None + @classmethod + def new(cls, value: Self) -> tuple[str, Self]: + return ("Literal", value) + +@dataclass +class Reference(NotationObject): + value: str + subscript: str | None + @classmethod + def new(cls, value: Self) -> tuple[str, Self]: + return ("Reference", value) + +@dataclass +class Alternative(NotationObject): + children: list[TaggedNotationObject] + @classmethod + def new(cls, value: Self) -> tuple[str, Self]: + return ("Alternative", value) + +@dataclass +class Repeat(NotationObject): + min: int + max: int | None + separator: str | None + children: list[TaggedNotationObject] + @classmethod + def new(cls, value: Self) -> tuple[str, Self]: + return ("Repeat", value) + +class TacticNotationsToObjectVisitor(TacticNotationsVisitor): + def defaultResult(self): + return [] + + def aggregateResult(self, aggregate, nextResult): + # Flattening results into a single list of nodes + if nextResult: + if isinstance(nextResult, list): + aggregate.extend(nextResult) + else: + aggregate.append(nextResult) + return aggregate + + def visitAlternative(self, ctx: TacticNotationsParser.AlternativeContext): + return [Alternative.new(Alternative(children=self.visitChildren(ctx)))] + + def visitAltblock(self, ctx: TacticNotationsParser.AltblockContext): + return self.visitChildren(ctx) + + def visitRepeat(self, ctx: TacticNotationsParser.RepeatContext): + separator = ctx.ATOM() or ctx.PIPE() + # skip the '{' + repeat_marker = ctx.LGROUP().getText()[1] + + min_rep, max_rep = None, None + match repeat_marker: + case "?": + min_rep, max_rep = 0, 1 + case "+": + min_rep, max_rep = 1, None + case "*": + min_rep, max_rep = 0, None + case _: + raise ValueError(f"Unexpected repeat marker: {repeat_marker}") + + return [Repeat.new(Repeat( + min=min_rep, + max=max_rep, + separator=separator.getText() if separator else None, + children=self.visitChildren(ctx) + ))] + + def visitCurlies(self, ctx: TacticNotationsParser.CurliesContext): + return [ + Literal.new(Literal(value=ctx.LBRACE().getText(), subscript=None)), + *self.visitChildren(ctx), + Literal.new(Literal(value=ctx.RBRACE().getText(), subscript=None))] + + def visitAtomic(self, ctx: TacticNotationsParser.AtomicContext): + return [Literal.new(Literal( + value=ctx.ATOM().getText(), + # skip '__' + subscript=ctx.SUB().getText()[2:] if ctx.SUB() else None + ))] + + def visitHole(self, ctx: TacticNotationsParser.HoleContext): + return [Reference.new(Reference( + value=ctx.ID().getText()[1:], + # skip '__' + subscript=ctx.SUB().getText()[2:] if ctx.SUB() else None + ))] + + def visitEscaped(self, ctx: TacticNotationsParser.EscapedContext): + return [Literal.new(Literal( + value=ctx.ESCAPED().getText().replace("%", ""), + subscript=None + ))] + +def objectify(notation: str) -> list[TaggedNotationObject]: + """Translate a notation into an object. + + It is essentially a simplified and normalized version of the ANTLR AST. + """ + visitor = TacticNotationsToObjectVisitor() + return visitor.visit(parse(notation)) diff --git a/doc/tools/rocqrst/rocqdomain.py b/doc/tools/rocqrst/rocqdomain.py index 65051ffcce90..08e7c7225438 100644 --- a/doc/tools/rocqrst/rocqdomain.py +++ b/doc/tools/rocqrst/rocqdomain.py @@ -21,9 +21,11 @@ # pylint: disable=import-outside-toplevel, abstract-method, too-many-lines import os +import json import re import shlex from itertools import chain +from dataclasses import dataclass from collections import defaultdict from docutils import nodes, utils @@ -33,6 +35,7 @@ from docutils.parsers.rst.directives.admonitions import BaseAdmonition from sphinx import addnodes, version_info as sphinx_version +from sphinx.application import Sphinx from sphinx.directives import ObjectDescription from sphinx.domains import Domain, ObjType, Index from sphinx.errors import ExtensionError @@ -41,12 +44,14 @@ from sphinx.util.logging import getLogger, get_node_location from sphinx.util.nodes import set_source_info, set_role_source_info, make_refnode from sphinx.writers.latex import LaTeXTranslator +from sphinx.builders.text import TextBuilder from . import rocqdoc from .repl import ansicolors from .repl.rocqtop import RocqTop, RocqTopError from .notations.parsing import ParseError from .notations.sphinx import sphinxify +from .notations import object as object_notations from .notations.plain import stringify_with_ellipses # FIXME: Patch this in Sphinx @@ -84,6 +89,14 @@ def visit_desc_signature(self, node): Offending notation: {} Error message: {}""" +@dataclass +class SubdomainItemData: + """Data stored for each object in a subdomain""" + docname: str + objtype: str + targetid: str + syntax: list[object_notations.TaggedNotationObject] + def notation_to_sphinx(notation, source, line, rawtext=None): """Parse notation and wrap it in an inline node""" try: @@ -170,6 +183,14 @@ def _render_signature(self, signature, signode): """Render a signature, placing resulting nodes into signode.""" raise NotImplementedError(self) + def _signature_to_syntax(self, signature: str) -> list[object_notations.TaggedNotationObject]: + """Convert a signature into a syntax tree, used in generated plain JSON indices. + + By default, returns the signature as a literal string. + + """ + return [object_notations.Literal.new(object_notations.Literal(value=signature, subscript=None))] + option_spec = { # Explicit object naming 'name': directives.unchanged, @@ -179,7 +200,7 @@ def _render_signature(self, signature, signode): 'noindex': directives.flag } - def subdomain_data(self): + def subdomain_data(self) -> dict[str, SubdomainItemData]: if self.subdomain is None: raise ValueError() return self.env.domaindata['rocq']['objects'][self.subdomain] @@ -207,14 +228,14 @@ def handle_signature(self, signature, signode): names = [name] if name else None return names - def _warn_if_duplicate_name(self, objects, name, signode): + def _warn_if_duplicate_name(self, objects: dict[str, SubdomainItemData], name: str, signode): """Check that two objects in the same domain don't have the same name.""" if name in objects: MSG = 'Duplicate name {} (other is in {}) attached to {}' - msg = MSG.format(name, self.env.doc2path(objects[name][0]), signode) + msg = MSG.format(name, self.env.doc2path(objects[name].docname), signode) self.state_machine.reporter.warning(msg, line=self.lineno) - def _record_name(self, name, target_id, signode): + def _record_name(self, name, target_id, signature: str, signode): """Record a `name` in the current subdomain, mapping it to `target_id`. Warns if another object of the same name already exists; `signode` is @@ -222,19 +243,19 @@ def _record_name(self, name, target_id, signode): """ names_in_subdomain = self.subdomain_data() self._warn_if_duplicate_name(names_in_subdomain, name, signode) - names_in_subdomain[name] = (self.env.docname, self.objtype, target_id) + names_in_subdomain[name] = SubdomainItemData(self.env.docname, self.objtype, target_id, self._signature_to_syntax(signature)) def _target_id(self, name): return make_target(self.objtype, make_id(name)) - def _add_target(self, signode, name): + def _add_target(self, signature: str, signode, name): """Register a link target ‘name’, pointing to signode.""" targetid = self._target_id(name) if targetid not in self.state.document.ids: signode['ids'].append(targetid) signode['names'].append(name) signode['first'] = (not self.names) - self._record_name(name, targetid, signode) + self._record_name(name, targetid, signature, signode) else: # We don't warn for duplicates in the SSReflect chapter, because # it's the style of this chapter to repeat all the defined @@ -253,14 +274,14 @@ def _add_index_entry(self, name, target): index_text += " " + self.index_suffix self.indexnode['entries'].append(('single', index_text, target, '', None)) - def add_target_and_index(self, names, _, signode): + def add_target_and_index(self, names, signature: str, signode): """Attach a link target to `signode` and index entries for `names`. This is only called (from ``ObjectDescription.run``) if ``:noindex:`` isn't specified.""" if names: for name in names: if isinstance(name, str) and name.startswith('_'): continue - target = self._add_target(signode, name) + target = self._add_target(signature, signode, name) self._add_index_entry(name, target) self.state.document.note_explicit_target(signode) @@ -320,6 +341,9 @@ class NotationObject(DocumentableObject): Objects that inherit from this class can use the notation grammar (“{+ …}”, “@…”, etc.) in their signature. """ + def _signature_to_syntax(self, signature: str) -> list[object_notations.TaggedNotationObject]: + return object_notations.objectify(signature) + def _render_signature(self, signature, signode): position = self.state_machine.get_source_and_line(self.lineno) tacn_node = notation_to_sphinx(signature, *position) @@ -457,7 +481,7 @@ def _add_index_entry(self, name, target): def _target_id(self, name): return make_id('grammar-token-{}'.format(name[1])) - def _record_name(self, name, targetid, signode): + def _record_name(self, name, targetid, signature, signode): env = self.state.document.settings.env objects = env.domaindata['std']['objects'] self._warn_if_duplicate_name(objects, name, signode) @@ -975,12 +999,12 @@ def generate(self, docnames=None): items = chain(*(self.domain.data['objects'][subdomain].items() for subdomain in self.subdomains)) - for itemname, (docname, _, anchor) in sorted(items, key=lambda x: x[0].lower()): - if docnames and docname not in docnames: + for itemname, data in sorted(items, key=lambda x: x[0].lower()): + if docnames and data.docname not in docnames: continue entries = content[itemname[0].lower()] - entries.append([itemname, 0, docname, anchor, '', '', '']) + entries.append([itemname, 0, data.docname, data.targetid, '', '', '']) collapse = False content = sorted(content.items()) @@ -1158,7 +1182,7 @@ class RocqDomain(Domain): initial_data = { # Collect everything under a key that we control, since Sphinx adds # others, such as “version” - 'objects' : { # subdomain → name → docname, objtype, targetid + 'objects' : { # subdomain → name → SubdomainItemData 'cmd': {}, 'tacn': {}, 'opt': {}, @@ -1182,8 +1206,8 @@ def find_index_by_name(targetid): def get_objects(self): # Used for searching and object inventories (intersphinx) for _, objects in self.data['objects'].items(): - for name, (docname, objtype, targetid) in objects.items(): - yield (name, name, objtype, docname, targetid, self.object_types[objtype].attrs['searchprio']) + for name, data in objects.items(): + yield (name, name, data.objtype, data.docname, data.targetid, self.object_types[data.objtype].attrs['searchprio']) for index in self.indices: yield (index.name, index.localname, 'index', "rocq-" + index.name, '', -1) @@ -1191,11 +1215,11 @@ def merge_domaindata(self, docnames, otherdata): DUP = "Duplicate declaration: '{}' also defined in '{}'.\n" for subdomain, their_objects in otherdata['objects'].items(): our_objects = self.data['objects'][subdomain] - for name, (docname, objtype, targetid) in their_objects.items(): - if docname in docnames: + for name, data in their_objects.items(): + if data.docname in docnames: if name in our_objects: - self.env.warn(docname, DUP.format(name, our_objects[name][0])) - our_objects[name] = (docname, objtype, targetid) + self.env.warn(data.docname, DUP.format(name, our_objects[name].docname)) + our_objects[name] = data def resolve_xref(self, env, fromdocname, builder, role, targetname, node, contnode): # ‘target’ is the name that was written in the document @@ -1207,14 +1231,13 @@ def resolve_xref(self, env, fromdocname, builder, role, targetname, node, contno else: resolved = self.data['objects'][role].get(targetname) if resolved: - (todocname, _, targetid) = resolved - return make_refnode(builder, fromdocname, todocname, targetid, contnode, targetname) + return make_refnode(builder, fromdocname, resolved.docname, resolved.targetid, contnode, targetname) return None def clear_doc(self, docname_to_clear): for subdomain_objects in self.data['objects'].values(): - for name, (docname, _, _) in list(subdomain_objects.items()): - if docname == docname_to_clear: + for name, data in list(subdomain_objects.items()): + if data.docname == docname_to_clear: del subdomain_objects[name] def is_rocqtop_or_rocqdoc_block(node): @@ -1238,6 +1261,63 @@ def simplify_source_code_blocks_for_latex(app, doctree, fromdocname): # pylint: else: node.replace_self(nodes.literal_block(node.rawsource, node.rawsource, language="Coq")) + +class IndicesBuilder(TextBuilder): + """Custom sphinx builder to generate JSON files containing the content of our indices. + + It is based on the TextBuilder to be able to generate the content of the documentation for each object.""" + name = 'indices' + format = 'json' + allow_parallel = True + + def init(self): + super().init() + self.rendered: dict[str, str] = {} + + def write_doc(self, docname: str, doctree: nodes.document) -> None: + domain = self.env.get_domain('rocq') + # find all nodes that are handled by our rocq domain + for node in doctree.findall(lambda n: isinstance(n, nodes.Element) and n.get('objtype') in domain.object_types): + # all IDs will be associated to this content + all_target_ids = [] + for sig in node.traverse(addnodes.desc_signature): + all_target_ids.extend(sig.get('ids', [])) + + # the content is the next node + content_node = node.next_node(addnodes.desc_content) + + if all_target_ids and content_node: + # translate sphinx nodes to text and associate it to all IDs + translator = self.create_translator(doctree, self) + translator.visit_document(content_node) + content_node.walkabout(translator) + translator.depart_document(content_node) + text = translator.body.strip() + for tid in all_target_ids: + self.rendered[tid] = text + + def finish(self): + domain = self.env.get_domain('rocq') + + def write_json(index: RocqSubdomainsIndex): + items: list[tuple[str, SubdomainItemData]] = chain(*(domain.data['objects'][subdomain].items() + for subdomain in index.subdomains)) + + output_data = {} + for name, data in items: + output_data[name] = { + "documentation_path": data.docname, + "documentation_anchor": data.targetid, + "syntax": [[x[0], x[1].asdict()] for x in data.syntax], + "documentation": self.rendered.get(data.targetid, "") + } + + with open(self.outdir / f"{index.name}.json", "w") as f: + json.dump(output_data, f, indent=2) + + for index in RocqDomain.indices: + write_json(index) + ROCQ_ADDITIONAL_DIRECTIVES = [RocqtopDirective, RocqdocDirective, ExampleDirective, @@ -1260,6 +1340,7 @@ def setup(app): # Add domain, directives, and roles app.add_domain(RocqDomain) + app.add_builder(IndicesBuilder) app.add_index_to_domain('std', StdGlossaryIndex) for role in ROCQ_ADDITIONAL_ROLES: