From e06cf134e11d4c7cc704b0be5ff9745027a01327 Mon Sep 17 00:00:00 2001 From: "Kacper Kowalik (Xarthisius)" Date: Sun, 29 Mar 2026 09:21:20 -0500 Subject: [PATCH 1/6] Migrate to jb2 --- _config.yml | 41 -------------------- _toc.yml | 62 ------------------------------ docs/glossary.md | 99 +++++++++++++++++++++++------------------------- myst.yml | 77 +++++++++++++++++++++++++++++++++++++ requirements.txt | 2 +- 5 files changed, 126 insertions(+), 155 deletions(-) delete mode 100644 _config.yml delete mode 100644 _toc.yml create mode 100644 myst.yml diff --git a/_config.yml b/_config.yml deleted file mode 100644 index 8e359a8..0000000 --- a/_config.yml +++ /dev/null @@ -1,41 +0,0 @@ -# Book settings -# Learn more at https://jupyterbook.org/customize/config.html - -title: Transparency Certified -author: The TRACE Team -logo: logo.png -only_build_toc_files: true -exclude_patterns: [venv] - -# Force re-execution of notebooks on each build. -# See https://jupyterbook.org/content/execute.html -execute: - execute_notebooks: force - -# Define the name of the latex output file for PDF builds -latex: - latex_documents: - targetname: book.tex - -# Add a bibtex file so that we can create citations -bibtex_bibfiles: - - references.bib - -# Information about where the book exists on the web -repository: - url: https://github.com/transparency-certified/trace-specification - path_to_book: docs - branch: main - -# Add GitHub buttons to your book -html: - favicon: favicon.ico - use_issues_button: true - use_repository_button: true - -sphinx: - config: - html_css_files: - - custom.css - html_theme_options: - search_bar_text: "Search..." diff --git a/_toc.yml b/_toc.yml deleted file mode 100644 index b001929..0000000 --- a/_toc.yml +++ /dev/null @@ -1,62 +0,0 @@ -# Table of contents -# Learn more at https://jupyterbook.org/customize/toc.html - -format: jb-book -root: docs/intro -options: - numbered: false - -parts: -- caption: TRACE Overview - chapters: - - file: docs/motivation - - file: docs/overview - - file: docs/users - - file: docs/barriers - - file: docs/case-profiles - sections: - - file: docs/case-profiles/rdc - - file: docs/case-profiles/bplim - - file: docs/case-profiles/twitter - - file: docs/case-profiles/ipums - - file: docs/examples - - file: docs/glossary - - file: docs/about - - file: docs/acknowledgements - - file: docs/references -- caption: TRACE Concepts - chapters: - - file: docs/questions - - file: docs/dev/signing - title: Signing & Verification - - file: docs/elements - - file: docs/conceptual-model -- caption: TRACE Specification - chapters: - - file: docs/specifications - title: Overview - - file: docs/tro-declaration-format - title: Declaration Format - - file: docs/tro-declaration-design - title: Declaration Design - - file: docs/tro-packages - title: TRO Packages - - file: docs/trov-vocabulary - title: Vocabulary Reference - - file: docs/trov-extension-guide - title: Extension Guide - - file: docs/trov-versioning-and-roadmap - title: Versioning & Roadmap - - file: docs/trov-prerelease - title: Pre-Release Reference -- caption: TRACE Tools - chapters: - - file: docs/infrastructure - - file: docs/trace-prototype - - file: docs/trace-toolkit - - file: docs/slurm-plugin - - file: docs/sample-implementation -- caption: Project Infrastructure - chapters: - - file: docs/documentation-infrastructure - title: Documentation Infrastructure diff --git a/docs/glossary.md b/docs/glossary.md index b79d578..1dde24b 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -3,84 +3,81 @@ A glossary of common terms used in the TRACE project. ```{glossary} - Reproducibility - Obtaining consistent results using the same input data, computational steps, - methods, and code, and conditions of analysis [16]. +: Obtaining consistent results using the same input data, computational steps, + methods, and code, and conditions of analysis [16]. Transparency - The reporting of materials and methods in a manner that provides enough - information for others to independently assess and/or reproduce findings - (adapted from FASEB [29]). Research object (RO): Standards-based method of - bundling and describing research artifacts, typically via linked-data - technologies [31]. +: The reporting of materials and methods in a manner that provides enough + information for others to independently assess and/or reproduce findings + (adapted from FASEB [29]). Research object (RO): Standards-based method of + bundling and describing research artifacts, typically via linked-data + technologies [31]. Computational artifacts - Inputs, code, outputs, documentation and relevant details of the computational - environment required to ensure the transparency and reproducibility of results - obtained through computational means. +: Inputs, code, outputs, documentation and relevant details of the computational + environment required to ensure the transparency and reproducibility of results + obtained through computational means. Input - any file, database, or stream that is used by a computational workflow (or - workflow step) to obtain reported results. Examples include data and - configuration files. +: any file, database, or stream that is used by a computational workflow (or + workflow step) to obtain reported results. Examples include data and + configuration files. Code - Scripts and/or source code that are not part of an installed software package - and used to execute steps of a computational workflow used to obtain reported - results. This includes code for data preparation, analysis, modeling, plotting, - etc., and is considered separate from the operating system or any installed - software packages (see environment below). +: Scripts and/or source code that are not part of an installed software package + and used to execute steps of a computational workflow used to obtain reported + results. This includes code for data preparation, analysis, modeling, plotting, + etc., and is considered separate from the operating system or any installed + software packages (see environment below). Output - Information created by a computational workflow (or workflow step) used to - obtain reported results. Examples of outputs include: cleaned or otherwise - pre-processed datafiles used for analysis; analytical or simulation results; - tables, figures, and numerical results; log files and other program outputs, - messages, and errors (e.g., stderr, stdout, log files). +: Information created by a computational workflow (or workflow step) used to + obtain reported results. Examples of outputs include: cleaned or otherwise + pre-processed datafiles used for analysis; analytical or simulation results; + tables, figures, and numerical results; log files and other program outputs, + messages, and errors (e.g., stderr, stdout, log files). Environment - The specific set of operating system and dependent software versions used to - execute a computational workflow used to obtain reported results. The - computational environment may be described as a list of operating system and - software versions or as a virtual machine or container image. +: The specific set of operating system and dependent software versions used to + execute a computational workflow used to obtain reported results. The + computational environment may be described as a list of operating system and + software versions or as a virtual machine or container image. Documentation - Materials providing additional information required to reproduce reported - results including step-by-step instructions for obtaining or accessing data; - setting up or accessing the environment; and running the complete computational - workflow. +: Materials providing additional information required to reproduce reported + results including step-by-step instructions for obtaining or accessing data; + setting up or accessing the environment; and running the complete computational + workflow. TRACE workflow - The complete sequence of steps taken to create and release a TRO from - author-provided artifacts. TRACE workflows may be manual or automated and may - include actions such as disclosure avoidance measures. +: The complete sequence of steps taken to create and release a TRO from + author-provided artifacts. TRACE workflows may be manual or automated and may + include actions such as disclosure avoidance measures. Sensitive data - Information that may be regulated by law due to possible risk of harm due to - their disclosure. +: Information that may be regulated by law due to possible risk of harm due to + their disclosure. Proprietary data - Information for which the rights of ownership are restricted so that the - ability to freely distribute the data is limited. (USGS) +: Information for which the rights of ownership are restricted so that the + ability to freely distribute the data is limited. (USGS) Transient data - Information that is not persisted. +: Information that is not persisted. Ephemeral data - Information that change rapidly over time and may be lost if not collected - immediately. +: Information that change rapidly over time and may be lost if not collected + immediately. Streaming data - Information that is generated continuously, generally in large volumes, and - must be processed sequentially and incrementally over time (adapted from - https://aws.amazon.com/streaming-data/). Examples include information from - social networks, financial trading floors, and telemetry from connected - devices. +: Information that is generated continuously, generally in large volumes, and + must be processed sequentially and incrementally over time (adapted from + https://aws.amazon.com/streaming-data/). Examples include information from + social networks, financial trading floors, and telemetry from connected + devices. Disclosure avoidance - Methods of protecting confidentiality that may require modification of data. - (https://www.census.gov/topics/research/disclosure-avoidance.html) - - +: Methods of protecting confidentiality that may require modification of data. + (https://www.census.gov/topics/research/disclosure-avoidance.html) ``` diff --git a/myst.yml b/myst.yml new file mode 100644 index 0000000..d003bcd --- /dev/null +++ b/myst.yml @@ -0,0 +1,77 @@ +version: 1 +project: + title: Transparency Certified + authors: + - name: The TRACE Team + exclude: + - venv + github: transparency-certified/trace-specification + bibliography: + - references.bib + exports: + - format: pdf + template: plain_latex_book + output: exports/book.pdf + toc: + - file: docs/intro.md + - title: TRACE Overview + children: + - file: docs/motivation.md + - file: docs/overview.md + - file: docs/users.md + - file: docs/barriers.md + - file: docs/case-profiles.md + children: + - file: docs/case-profiles/rdc.md + - file: docs/case-profiles/bplim.md + - file: docs/case-profiles/twitter.md + - file: docs/case-profiles/ipums.md + - file: docs/examples.md + - file: docs/glossary.md + - file: docs/about.md + - file: docs/acknowledgements.md + - file: docs/references.md + - title: TRACE Concepts + children: + - file: docs/questions.md + - file: docs/dev/signing.md + title: Signing & Verification + - file: docs/elements.md + - file: docs/conceptual-model.md + - title: TRACE Specification + children: + - file: docs/specifications.md + title: Overview + - file: docs/tro-declaration-format.md + title: Declaration Format + - file: docs/tro-declaration-design.md + title: Declaration Design + - file: docs/tro-packages.md + title: TRO Packages + - file: docs/trov-vocabulary.md + title: Vocabulary Reference + - file: docs/trov-extension-guide.md + title: Extension Guide + - file: docs/trov-versioning-and-roadmap.md + title: Versioning & Roadmap + - file: docs/trov-prerelease.md + title: Pre-Release Reference + - title: TRACE Tools + children: + - file: docs/infrastructure.md + - file: docs/trace-prototype.md + - file: docs/trace-toolkit.md + - file: docs/slurm-plugin.md + - file: docs/sample-implementation.md + - title: Project Infrastructure + children: + - file: docs/documentation-infrastructure.md + title: Documentation Infrastructure +site: + options: + logo: logo.png + favicon: favicon.ico + folders: true + template: book-theme + +myst_heading_anchors: 2 diff --git a/requirements.txt b/requirements.txt index 7e821e4..ee671b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -jupyter-book +jupyter-book>=2.0.0 matplotlib numpy From a461500f4324fd25f5bb9a88f00e05208c762dcc Mon Sep 17 00:00:00 2001 From: "Kacper Kowalik (Xarthisius)" Date: Sun, 29 Mar 2026 09:32:43 -0500 Subject: [PATCH 2/6] Fix all the warnings --- docs/barriers.md | 2 +- docs/case-profiles.md | 6 +++--- docs/case-profiles/bplim.md | 2 +- docs/case-profiles/rdc.md | 4 ++-- docs/case-profiles/twitter.md | 2 +- docs/dev/signing.md | 4 ++-- docs/documentation-infrastructure.md | 9 +++++++++ docs/elements.md | 12 ++++++------ docs/examples.md | 6 +++--- docs/motivation.md | 6 +++--- docs/overview.md | 2 +- docs/sample-implementation.md | 8 ++++---- docs/tro-declaration-design.md | 7 ++++++- docs/tro-declaration-format.md | 21 ++++++++++++++++----- docs/tro-packages.md | 12 +++++++++--- docs/trov-extension-guide.md | 9 +++++++-- docs/trov-prerelease.md | 17 +++++++++++++---- docs/trov-versioning-and-roadmap.md | 7 +++++++ docs/trov-vocabulary.md | 12 +++++++++++- myst.yml | 2 -- 20 files changed, 105 insertions(+), 45 deletions(-) diff --git a/docs/barriers.md b/docs/barriers.md index 40dfd00..d811984 100644 --- a/docs/barriers.md +++ b/docs/barriers.md @@ -28,7 +28,7 @@ within a specific system. Research in the social sciences often relies on access to {term}`sensitive ` or {term}`proprietary ` data that cannot be redistributed and, in many cases, is only accessible to authorized users on [access-controlled -resources](barriers-large-specialized-compute). This includes data collected by +resources](#barriers-large-specialized-compute). This includes data collected by researchers and stored on secure infrastructure at their institutions; confidential private-sector, school district, or government administrative data; as well as data from national statistical agencies. The results of research may diff --git a/docs/case-profiles.md b/docs/case-profiles.md index 85dc020..7cfe988 100644 --- a/docs/case-profiles.md +++ b/docs/case-profiles.md @@ -1,8 +1,8 @@ # Case Studies -* [Statistical Research Data Centers](caseprofile-rdc) -* [BPLIM](caseprofile-bplim) -* [IPUMS](caseprofile-ipums) +* [Statistical Research Data Centers](#caseprofile-rdc) +* [BPLIM](#caseprofile-bplim) +* [IPUMS](#caseprofile-ipums) * Research Cluster * Twitter * Peer-review journal verification processes diff --git a/docs/case-profiles/bplim.md b/docs/case-profiles/bplim.md index 55ecf8b..ad99bbd 100644 --- a/docs/case-profiles/bplim.md +++ b/docs/case-profiles/bplim.md @@ -47,7 +47,7 @@ More recently, they have been pushing researches to work with Singularity conta **The app could be augmented to generate a TRO. The app itself would be described as part of the TRS.** -> The initial TRO in the current usage is not useful to the researcher, since it only tells the BPLIM staff that the code runs cleanly. The confidential TRO generated by the app (TRS) on the confidential data needs vetting, similar to the [FSRDC case](caseprofile-rdc). +> The initial TRO in the current usage is not useful to the researcher, since it only tells the BPLIM staff that the code runs cleanly. The confidential TRO generated by the app (TRS) on the confidential data needs vetting, similar to the [FSRDC case](#caseprofile-rdc). ## Archiving diff --git a/docs/case-profiles/rdc.md b/docs/case-profiles/rdc.md index 6bbc470..335a6f6 100644 --- a/docs/case-profiles/rdc.md +++ b/docs/case-profiles/rdc.md @@ -1,7 +1,7 @@ (caseprofile-rdc)= # Federal Research Data Centers -Jump to [TRACE in the FSRDC](caseprofile-trace-in-the-rdc). +Jump to [TRACE in the FSRDC](#caseprofile-trace-in-the-rdc). [Federal Statistical Research Data Centers](https://www.census.gov/about/adrm/fsrdc.html) (FSRDC) provide secure @@ -64,7 +64,7 @@ not preserved or de-accessioned, a record of the when and why should be kept. ### TRACE System description -A TRACE System description should be published. The content should conform to the [TRACE System](element-trace-system), and could be made human-readable as a webpage that has +A TRACE System description should be published. The content should conform to the [TRACE System](#element-trace-system), and could be made human-readable as a webpage that has structured, machine-readable content. It is expected that a TRACE System description does not reveal sensitive information. It should include principles of disclosure review. In principle, a duly diff --git a/docs/case-profiles/twitter.md b/docs/case-profiles/twitter.md index 35b6189..b096ad0 100644 --- a/docs/case-profiles/twitter.md +++ b/docs/case-profiles/twitter.md @@ -55,4 +55,4 @@ whether a "re-hydrated" dataset matches the original. ### Examples -See [Twitter](example-twitter) examples. +See [Twitter](#example-twitter) examples. diff --git a/docs/dev/signing.md b/docs/dev/signing.md index dc4e48e..11c0a2a 100644 --- a/docs/dev/signing.md +++ b/docs/dev/signing.md @@ -7,8 +7,8 @@ non-repudiability (the signer cannot deny that they signed it). The signature mechanism determines how TROs are verified and TRACE is concerned with verifying the signature at the time of TRO creation even if the signing key/certificate has expired or been revoked/invalidated. We have considered initially using -[GPG](signing-gpg) with possible future support for [X.509 -certificates](signing-x509). +[GPG](#signing-gpg) with possible future support for [X.509 +certificates](#signing-x509). A key difference between these two approaches is the underlying trust model. In public key infrastructure (PKI), trust models define how the authenticity of diff --git a/docs/documentation-infrastructure.md b/docs/documentation-infrastructure.md index 16389cb..20ad37d 100644 --- a/docs/documentation-infrastructure.md +++ b/docs/documentation-infrastructure.md @@ -16,6 +16,7 @@ For the specification content itself, see [TRACE Specifications](specifications. --- +(repositories)= ## Repositories Four repositories under the [transparency-certified](https://github.com/transparency-certified) GitHub organization contribute to the public documentation: @@ -31,6 +32,7 @@ GitHub serves all of these independently under the same domain via its CDN. The --- +(how-the-pieces-connect)= ## How the Pieces Connect ### Public URL structure @@ -52,6 +54,7 @@ transparency-certified.github.io/ └── trov/prerelease/ Generated by Widoco from trov submodule ``` +(source-file-layout)= ### Source file layout To edit content, work with the source files on the `main` branch of each repository. The build pipelines (described below) generate and deploy the public site automatically. @@ -94,6 +97,7 @@ The `trace-specification` and `trace-vocabularies` repos each deploy to an orpha --- +(the-project-website)= ## The Project Website The **[transparency-certified.github.io](https://github.com/transparency-certified/transparency-certified.github.io)** repository is a standard [Jekyll](https://jekyllrb.com/) site. GitHub Pages builds it automatically from the `main` branch using its built-in Jekyll support — no GitHub Action or Docker image is needed. @@ -102,6 +106,7 @@ The site provides a landing page with project description and team information, --- +(the-specification-site)= ## The Specification Site The **[trace-specification](https://github.com/transparency-certified/trace-specification)** repository contains the specification documents as Markdown files in `docs/`. The build and deploy process works as follows: @@ -150,6 +155,7 @@ Each page on the built site offers Markdown and PDF downloads via the toolbar. T --- +(the-vocabulary-reference)= ## The Vocabulary Reference The **[trace-vocabularies](https://github.com/transparency-certified/trace-vocabularies)** repository is a build hub that aggregates vocabulary source repos as git submodules. It runs Widoco to generate HTML documentation and serialization files, then deploys everything to GitHub Pages. @@ -182,6 +188,7 @@ The action uses `peaceiris/actions-gh-pages@v4` to push the built output to an o A [vocabulary index page](https://transparency-certified.github.io/trace-vocabularies/) (`index.html` in the repo root) is copied into the build output during each deploy. It lists all published vocabularies and versions, and serves as the default landing page for unversioned requests via w3id.org. +(updating-after-vocabulary-changes)= ### Updating After Vocabulary Changes Pushing changes to a vocabulary repo (e.g. trov) does not automatically rebuild the site. The trace-vocabularies repo uses git submodules. Each submodule records a specific commit from the vocabulary repo. To pick up new vocabulary content, you advance the submodule pointer and push to trace-vocabularies, which triggers the build workflow. @@ -190,6 +197,7 @@ The submodule pointer is a deliberate decision, not an automatic sync. This mean For the specific git commands to advance submodule pointers, pin to a tagged release, or check submodule status, see the [trace-vocabularies README](https://github.com/transparency-certified/trace-vocabularies#updating-vocabulary-documentation). +(testing-the-w3id-org-redirect-rules)= ### w3id.org Namespace Resolution TROV namespace URIs (e.g. `https://w3id.org/trace/trov/0.1#ArtifactComposition`) resolve via redirect rules in a `.htaccess` file registered at [w3id.org](https://w3id.org/). The rules perform content negotiation: RDF tools requesting Turtle get `trov.ttl`, JSON-LD processors get `trov.jsonld`, and browsers get the HTML documentation page. The redirect targets are the GitHub Pages URLs served by trace-vocabularies. @@ -202,6 +210,7 @@ The w3id.org `.htaccess` rules redirect namespace URI requests to files on GitHu --- +(adding-a-page-to-the-specification)= ## Adding a Page to the Specification To add a new document to the specification site: diff --git a/docs/elements.md b/docs/elements.md index c51f2f0..1d69af4 100644 --- a/docs/elements.md +++ b/docs/elements.md @@ -21,24 +21,24 @@ TRACE System Overview ``` At the core of the TRACE model is the [Transparency-Certified -System](element-trace-system) or TRACE System. TRACE systems are responsible for +System](#element-trace-system) or TRACE System. TRACE systems are responsible for the execution of computational workflows on behalf of researchers where guaranteeing the transparency of results is important. TRACE system owners maintain a structured document, the [TRACE System -Certificate](element-trace-certificate), that specifies how transparency is +Certificate](#element-trace-certificate), that specifies how transparency is supported by the system and a signing key associated with the certificate. TRACE systems execute computational workflows, possibly collecting runtime and computational provenance information, and produce a [Transparent Research -Object](element-tro) (TRO). TROs are a collection of computational +Object](#element-tro) (TRO). TROs are a collection of computational artifacts (inputs, outputs, code, environment) along with records of their -execution with the system, [signed](element-signature) by the system within +execution with the system, [signed](#element-signature) by the system within which they were executed. (element-trace-system)= ## Transparency-Certified System (TRACE System) A TRACE system is any structured process that can be used to generate a -[TRO](element-tro) including a specific computational platform; a special queue +[TRO](#element-tro) including a specific computational platform; a special queue on a job scheduling system; or a person-centric documented workflow. The purpose of the TRACE system is to certify that a computational workflow was executed and to communicate details about how the system ensures the transparency of results. @@ -77,5 +77,5 @@ the affixed signature. A TRACE system owner maintains a signing key or certificate used to digitally sign TROs created by the system. The signature can be used to verify that a TRO -was generated by a specific TRACE system. See the [Signing](signing) for a +was generated by a specific TRACE system. See the [Signing](#signing) for a technical discussion of implementation options. diff --git a/docs/examples.md b/docs/examples.md index f3b0afd..fb8362a 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -20,14 +20,14 @@ package](https://doi.org/10.3886/E135041V1) Berger, David, Kyle Herkenhoff, and Simon Mongey. 2022. "Labor Market Power." American Economic Review 112(4): 1147–93. [Replication package](https://doi.org/10.3886/E154241V2) -* Uses confidential [Census data](caseprofile-rdc), passed disclosure avoidance review +* Uses confidential [Census data](#caseprofile-rdc), passed disclosure avoidance review Yeh, Chen, Claudia Macaluso, and Brad Hershbein. 2022. "Monopsony in the US Labor Market." American Economic Review 112(7): 2099–2138. [Paper](https://www.aeaweb.org/articles?id=10.1257/aer.20200025) [Replication package](https://doi.org/10.3886/E162581V1) -* Uses confidential [Census data](caseprofile-rdc), passed disclosure avoidance review +* Uses confidential [Census data](#caseprofile-rdc), passed disclosure avoidance review (example-specialized-compute)= @@ -101,7 +101,7 @@ Economics of Internal Migration: Advances and Policy Questions." Journal of Economic Literature. [Paper](https://www.aeaweb.org/articles?id=10.1257/jel.20211623) -* This paper uses [IPUMS USA](caseprofile-ipums) data accessed via API. +* This paper uses [IPUMS USA](#caseprofile-ipums) data accessed via API. * An [unofficial Github](https://github.com/AEADataEditor/JEL-2021-162) is available to demonstrate how the API is used to obtain extracts. A private repo provided by the authors has a copy of the extracted data. diff --git a/docs/motivation.md b/docs/motivation.md index 6439116..4aec48c 100644 --- a/docs/motivation.md +++ b/docs/motivation.md @@ -11,11 +11,11 @@ author-provided artifacts are complete or can actually be used to reproduced results. Particularly problematic are studies that employ [sensitive or proprietary -data](barriers-sensitive-proprietary) for which access and reuse are restricted; -[streaming, transient, or ephemeral data](barriers-streaming-transient-ephemeral) +data](#barriers-sensitive-proprietary) for which access and reuse are restricted; +[streaming, transient, or ephemeral data](#barriers-streaming-transient-ephemeral) that cannot be used to verify reproducibility due to their dynamic nature; or [very large-scale or specialized computational -resources](barriers-large-specialized-compute) available only to authorized +resources](#barriers-large-specialized-compute) available only to authorized users. In these cases, verification by repeating computations may not be possible. diff --git a/docs/overview.md b/docs/overview.md index 66e3eb8..7915896 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -3,7 +3,7 @@ By certifying the original execution of a computational workflow, consumers of research can trust results without necessarily repeating the computations. Instead of relying on researchers to -[self-certify](barriers-self-certification), TRACE requires that the operators +[self-certify](#barriers-self-certification), TRACE requires that the operators or owners of systems that researchers use adopt key practices to aid in the production of transparent and trustworthy artifacts. diff --git a/docs/sample-implementation.md b/docs/sample-implementation.md index 1c42808..6f0a2b6 100644 --- a/docs/sample-implementation.md +++ b/docs/sample-implementation.md @@ -135,7 +135,7 @@ export GPG_PASSPHRASE="s3cr3tkey" (trace-server-setup)= ## Setting up the TRACE Server Environment -The TRACE "server" environment is where the workflow is executed. This will depend greatly on the existing infrastructure, and is meant to be general. Your environment will need the tools to collect and sign the information for TROs, but can otherwise be quite variable. The section about [TRS Description](trs-description) will capture this environment in a formal manner. +The TRACE "server" environment is where the workflow is executed. This will depend greatly on the existing infrastructure, and is meant to be general. Your environment will need the tools to collect and sign the information for TROs, but can otherwise be quite variable. The section about [TRS Description](#trs-description) will capture this environment in a formal manner. @@ -203,11 +203,11 @@ ubuntu@b3fabd1971c7:/project# ### Define the TRACE Server capabilities TROs contain by default basic information about the TRS that was used to generate them. We therefore need to specify the [TRACE System -Certificate](element-trace-certificate), that specifies how transparency is -supported by the system and a signing key associated with the certificate. The current implementation relies on a JSON-LD representation of the capabilities of the system. By convention, this is stored in a file `trs.jsonld` or similar. There can be multiple such specifications in use at the same time. Those in use should be separately published (see [web server](web-server)), and preserved. +Certificate](#element-trace-certificate), that specifies how transparency is +supported by the system and a signing key associated with the certificate. The current implementation relies on a JSON-LD representation of the capabilities of the system. By convention, this is stored in a file `trs.jsonld` or similar. There can be multiple such specifications in use at the same time. Those in use should be separately published (see [web server](#web-server)), and preserved. The TRACE System Certificate is expressed in structured language that describe assertions about supported transparency levels and features (see - [transparency questions](questions)). + [transparency questions](#questions)). :::{margin} The **`trs.jsonld** here is very simple. diff --git a/docs/tro-declaration-design.md b/docs/tro-declaration-design.md index 8377055..a07d11e 100644 --- a/docs/tro-declaration-design.md +++ b/docs/tro-declaration-design.md @@ -12,11 +12,12 @@ Why TRO declarations use JSON-LD, what each audience gets from the format, and h | [The Core Idea](#the-core-idea) | One file, two perspectives: valid JSON and valid RDF simultaneously | | [What This Means in Practice](#what-this-means-in-practice) | What producers, JSON consumers, RDF consumers, and repositories each get | | [The JSON Schema Constraint](#the-json-schema-constraint) | How the fixed document structure enables both audiences | -| [The @context Is the Bridge](#the-context-is-the-bridge) | How the JSON-LD context connects the JSON and RDF perspectives | +| [The `@context` Is the Bridge](#the-context-is-the-bridge) | How the JSON-LD context connects the JSON and RDF perspectives | | [Summary](#summary) | Four-audience comparison table | --- +(the-core-idea)= ## The Core Idea A TRO declaration — the document that describes a Transparent Research Object — is a JSON-LD document. This means it is simultaneously: @@ -30,6 +31,7 @@ This dual nature is the central architectural decision for TRO declarations: one --- +(what-this-means-in-practice)= ## What This Means in Practice ### For TRO producers @@ -85,6 +87,7 @@ Both perspectives let repositories integrate TRO metadata with other vocabularie --- +(the-json-schema-constraint)= ## The JSON Schema Constraint The flexibility of RDF is powerful for interoperability but makes validation and tooling harder. If a TRO declaration were unconstrained RDF, a producer could express the same information in many structurally different ways, and consumers would need graph-pattern matching to find anything. @@ -99,6 +102,7 @@ The constraint is more restrictive than what RDF allows. You cannot rearrange th --- +(the-context-is-the-bridge)= ## The `@context` Is the Bridge The `@context` block is the mechanism that connects the two perspectives. For JSON producers, it is a fixed header that can be copied from the [TRO Declaration Format](tro-declaration-format.md) examples and modified only if adding a namespace prefix. For RDF consumers, it is the mapping that turns JSON property names into globally unique URIs. @@ -107,6 +111,7 @@ A TRO producer who follows the JSON Schema and preserves the `@context` is produ --- +(summary)= ## Summary | Perspective | Tools | What you see | What you get | diff --git a/docs/tro-declaration-format.md b/docs/tro-declaration-format.md index 4ad5239..b402acd 100644 --- a/docs/tro-declaration-format.md +++ b/docs/tro-declaration-format.md @@ -9,26 +9,27 @@ A **TRO Declaration** is a JSON-LD document that describes a Transparent Researc | Document Section | Description | |---------|-------------| -| [Overview](#overview) | What a TRO declaration contains at a high level | +| [Overview](#tro-format-overview) | What a TRO declaration contains at a high level | | [Document Structure](#document-structure) | The `@context`, `@graph`, and namespace prefixes | | [The TRO Object](#the-tro-object) | TRS, compositions, arrangements, artifacts, locations, performances | | [The Warrant Chain](#the-warrant-chain) | How performance (TRP) attributes link back to TRS capabilities | | [Identifier Conventions](#identifier-conventions) | `@id` patterns for TRS, performance, arrangement, and artifact nodes | | [Signing and Timestamping](#signing-and-timestamping) | GPG and X.509/CMS signing, timestamp authorities | -| [Verification](#verification) | What verifying a TRO declaration may involve | +| [Verification](#tro-format-verification) | What verifying a TRO declaration may involve | | [Complete Example](#complete-example) | A full annotated TRO declaration | -| [Notes](#notes) | JSON-LD conventions, design rationale, known limitations | +| [Notes](#tro-format-notes) | JSON-LD conventions, design rationale, known limitations | For definitions of the vocabulary terms used here, see the [TROV Vocabulary Reference](trov-vocabulary.md). For the conceptual background, see the [TRACE Conceptual Model](conceptual-model.md). For the design rationale behind the JSON-LD format, see [TRO Declaration Design](tro-declaration-design.md). --- +(tro-format-overview)= ## Overview A TRO declaration is a single JSON-LD file (conventionally `*.jsonld`) containing: 1. A **`@context`** block that maps short term names to full URIs. -2. A **`@graph`** array containing a single TRO object with all of its nested components. (See [Note 2](#notes) for why an array.) +2. A **`@graph`** array containing a single TRO object with all of its nested components. (See [Note 2](#tro-format-notes) for why an array.) The TRO object itself contains: @@ -46,6 +47,7 @@ Optionally, the TRO is accompanied by: --- +(document-structure)= ## Document Structure ### Top-Level Envelope @@ -57,8 +59,9 @@ Optionally, the TRO is accompanied by: } ``` -The `@context` maps short property names (like `trov:hash`) to full URIs. In 0.1, the `@graph` array contains a single object — the TRO declaration itself (see [Note 2](#notes) for why it is an array). +The `@context` maps short property names (like `trov:hash`) to full URIs. In 0.1, the `@graph` array contains a single object — the TRO declaration itself (see [Note 2](#tro-format-notes) for why it is an array). +(the-context-block)= ### The `@context` Block ```json @@ -87,6 +90,7 @@ The mandatory parts of a TRO declaration depend only on TROV and the foundationa --- +(the-tro-object)= ## The TRO Object In 0.1, the single object in the `@graph` array is the TRO itself. @@ -297,6 +301,7 @@ fingerprint = hashlib.sha256("".join(all_hashes).encode("utf-8")).hexdigest() --- +(artifact-arrangements-trovhasarrangement)= ### Artifact Arrangements (`trov:hasArrangement`) An arrangement captures where artifacts were located at a specific point in the workflow. A simple TRO might have just two arrangements — the artifacts present before and after a single computation — but multi-step workflows can have many, with intermediate arrangements shared between performances. @@ -457,6 +462,7 @@ See [TROV Vocabulary Reference — TRO Attribute Types](trov-vocabulary.md#prede --- +(the-warrant-chain)= ## The Warrant Chain The warrant chain is TROV's mechanism for accountability. TROV supports declaring part or all of the chain of warranting attributes and capabilities justifying a particular transparency claim. Downstream consumers — publishers, repositories, funding agencies — may impose their own requirements on the completeness of these chains. @@ -498,6 +504,7 @@ In this example, the TRO-level claim that all input data is included is warrante --- +(identifier-conventions)= ## Identifier Conventions All `@id` values are local to the document. They are used for cross-referencing between objects within the same TRO declaration. The conventions used by tro-utils are: @@ -521,6 +528,7 @@ All `@id` values are local to the document. They are used for cross-referencing --- +(signing-and-timestamping)= ## Signing and Timestamping A TRO package includes the declaration and one or more signing artifacts. The signing mechanism is not fixed — different TRS implementations use different approaches. What matters is that the signature can be verified against the public key or certificate recorded in the TRO declaration. @@ -536,6 +544,7 @@ The signature covers the declaration file byte-for-byte. In the reference implem --- +(tro-format-verification)= ## Verification Verification of a TRO declaration may include: @@ -548,6 +557,7 @@ Verification of a TRO declaration may include: --- +(complete-example)= ## Complete Example The following is a minimal but complete TRO declaration describing a data file and a script, a computation that reads them and produces an output, and a claim of Internet isolation. @@ -708,6 +718,7 @@ The following is a minimal but complete TRO declaration describing a data file a --- +(tro-format-notes)= ## Notes **Note 1: JSON-LD as JSON.** A TRO declaration is valid JSON. Producers can build it with any JSON library, no RDF tooling required. The `@context`, `@id`, `@type`, and `@graph` keys are the only JSON-LD-specific syntax. Everything else is standard JSON objects, arrays, and strings. diff --git a/docs/tro-packages.md b/docs/tro-packages.md index 58cd795..9581316 100644 --- a/docs/tro-packages.md +++ b/docs/tro-packages.md @@ -9,17 +9,18 @@ How Transparent Research Objects are packaged for distribution. TRACE and TROV a | Document Section | Description | |---------|-------------| -| [Overview](#overview) | What a TRO package contains | +| [Overview](#tro-packages-overview) | What a TRO package contains | | [Package Contents](#package-contents) | Declaration, signing artifacts, and optional research artifacts for each signing mechanism | | [Directory Layout](#directory-layout) | How files are organized inside ZIP archives | | [On-Demand Signing](#on-demand-signing) | Creating signed packages after execution | -| [Verification](#verification) | Package-level checks beyond declaration validation | +| [Verification](#tro-packages-verification) | Package-level checks beyond declaration validation | | [Open Questions](#open-questions) | Design decisions not yet resolved for 0.1 | For the structure of the JSON-LD declaration itself, see [TRO Declaration Format](tro-declaration-format.md). For the vocabulary terms, see [TROV Vocabulary Reference](trov-vocabulary.md). --- +(tro-packages-overview)= ## Overview A TRO package is a collection of files containing: @@ -47,6 +48,7 @@ Critically, in TRACE an artifact's identity is its content hash, not its filenam --- +(package-contents)= ## Package Contents Every TRO package contains at minimum a declaration file and a signature. The specific signing artifacts depend on the mechanism used. TROV is agnostic to the signing mechanism; the two current implementations demonstrate different but equally valid approaches. @@ -76,6 +78,7 @@ The package may include the research artifacts referenced by the declaration, or --- +(directory-layout)= ## Directory Layout TRACE places no requirements on the directory structure within a package. A flat archive with all files at the root is valid. So is any nested structure. The declaration identifies artifacts by content hash, not by path. @@ -109,6 +112,7 @@ Repositories like Zenodo and Dataverse can display the file listing of a ZIP arc --- +(on-demand-signing)= ## On-Demand Signing A TRS may produce unsigned packages during execution and sign them later on demand. This is valid as long as the declaration file is not modified between creation and signing. @@ -126,9 +130,10 @@ The critical constraint is chain of custody: the TRS must sign the declaration i --- +(tro-packages-verification)= ## Verification -The [TRO Declaration Format](tro-declaration-format.md#verification) describes verification of the declaration's internal consistency (fingerprint, arrangement references, warrant chain). At the package level, a verifier additionally checks: +The [TRO Declaration Format](tro-declaration-format.md#tro-format-verification) describes verification of the declaration's internal consistency (fingerprint, arrangement references, warrant chain). At the package level, a verifier additionally checks: 1. **Signature validity.** The signature on the declaration file matches the public key or certificate associated with the TRS. The key or certificate must be cryptographically bound to the signature — either by inclusion in the signed declaration itself (GPG: `trov:publicKey`) or by a certificate chain embedded in the signature file (X.509/CMS: `.p7s`). A public key provided separately from the signed content does not establish that the claimed TRS produced the signature. 2. **Timestamp validity.** The signed timestamp establishes that the declaration's signature was produced no later than the indicated time. For GPG, this is the `.tsr` file verified against the TSA's certificate. For X.509/CMS, the timestamp countersignature is embedded in the `.p7s`. @@ -148,6 +153,7 @@ In both cases, a verifier must consult a trustworthy source beyond the TRO packa --- +(open-questions)= ## Open Questions The following design decisions are not yet resolved for 0.1: diff --git a/docs/trov-extension-guide.md b/docs/trov-extension-guide.md index 1ee6f8c..a2f6bdc 100644 --- a/docs/trov-extension-guide.md +++ b/docs/trov-extension-guide.md @@ -9,20 +9,22 @@ How to extend the TRACE model with institution-specific terms in TRO declaration | Document Section | Description | |---------|-------------| -| [Overview](#overview) | The core principle and what this guide covers | -| [Step by Step](#step-by-step) | Choose a prefix, add to @context, use prefixed property names | +| [Overview](#extension-guide-overview) | The core principle and what this guide covers | +| [Step by Step](#step-by-step) | Choose a prefix, add to `@context`, use prefixed property names | | [JSON Schema Validation](#how-json-schema-validation-handles-extensions) | How the schema validates TROV terms while supporting extensions | | [Extending TROV with Custom Types](#extending-trov-with-custom-types) | Defining custom capabilities, attributes, signing mechanisms, and other typed terms | | [Aligning with Future Versions](#aligning-custom-terms-with-future-trov-versions) | Alignment when a custom term is later standardized in TROV | --- +(extension-guide-overview)= ## Overview Use your own namespace for institution-specific terms. TROV uses the prefix `trov:` for its terms; your system may define its own prefix for terms specific to your implementation. Custom terms coexist with TROV terms in the same declaration without conflict. The JSON Schema validates TROV terms while allowing namespaced extensions through. --- +(step-by-step)= ## Step by Step ### 1. Choose a namespace and prefix @@ -87,6 +89,7 @@ See [Extending TROV with Custom Types](#extending-trov-with-custom-types) for a --- +(how-json-schema-validation-handles-extensions)= ## How JSON Schema Validation Handles Extensions The TROV JSON Schema is designed to validate the TROV core while supporting namespaced extensions: @@ -98,6 +101,7 @@ The TROV JSON Schema is designed to validate the TROV core while supporting name --- +(extending-trov-with-custom-types)= ## Extending TROV with Custom Types The TROV vocabulary defines predefined values for capabilities, performance attributes, and other typed properties. Adopters whose TRS uses types not yet covered by TROV can define custom types in their own namespace. @@ -185,6 +189,7 @@ If a custom type proves broadly useful across multiple TRS implementations, it c --- +(aligning-custom-terms-with-future-trov-versions)= ## Aligning Custom Terms with Future TROV Versions If a concept you defined in your namespace is later standardized as a `trov:` term (for example, if a future version of TROV adds `trov:cpuArchitecture`, and this term fully captures the concept represented by your custom `mytrs:architecture` term), consider using the standard term in new declarations. Existing TRO declarations remain valid. diff --git a/docs/trov-prerelease.md b/docs/trov-prerelease.md index 7d60f1f..9e42123 100644 --- a/docs/trov-prerelease.md +++ b/docs/trov-prerelease.md @@ -9,16 +9,17 @@ Frozen snapshot of the pre-release vocabulary used by early TRACE implementation | Document Section | Description | |---------|-------------| -| [Vocabulary Metadata](#vocabulary-metadata) | Namespace, prefix, and status | +| [Vocabulary Metadata](#prerelease-vocabulary-metadata) | Namespace, prefix, and status | | [Scope](#scope) | What terms are included | -| [Classes](#classes) | Core entities, artifacts, arrangements, and locations | -| [Properties](#properties) | TRO, TRS, composition, arrangement, TRP, and warrant chain properties | +| [Classes](#prerelease-classes) | Core entities, artifacts, arrangements, and locations | +| [Properties](#prerelease-properties) | TRO, TRS, composition, arrangement, TRP, and warrant chain properties | | [TRS Capabilities](#trs-capability-types) | Capability types produced by pre-release implementations | | [TRP Attributes](#trp-attribute-types) | Attribute types and their warrant links | -| [External Vocabularies](#external-vocabularies-used) | RDF, RDFS, and schema.org terms used alongside TROV | +| [External Vocabularies](#prerelease-external-vocabularies-used) | RDF, RDFS, and schema.org terms used alongside TROV | | [Term Changes in TROV 0.1](#term-changes-in-trov-01) | Term renames | | [Migration](#migration-to-trov-01) | Steps to convert pre-release declarations to 0.1 | +(prerelease-vocabulary-metadata)= ## Vocabulary Metadata - Namespace: `https://w3id.org/trace/2023/05/trov#` @@ -29,12 +30,14 @@ Frozen snapshot of the pre-release vocabulary used by early TRACE implementation --- +(scope)= ## Scope This document records only the terms actually used by the pre-release implementations. Terms previously defined in the ontology file (`trov.ttl`) or in hand-written examples but not produced by early implementations are not listed here. For the full vocabulary including additional capability types and attribute types, see [TROV 0.1](trov-vocabulary.md). --- +(prerelease-classes)= ## Classes ### Core Entities @@ -62,6 +65,7 @@ This document records only the terms actually used by the pre-release implementa --- +(prerelease-properties)= ## Properties ### TRO Properties @@ -121,6 +125,7 @@ This document records only the terms actually used by the pre-release implementa --- +(trs-capability-types)= ## TRS Capability Types | Type | Description | @@ -130,6 +135,7 @@ This document records only the terms actually used by the pre-release implementa --- +(trp-attribute-types)= ## TRP Attribute Types | Type | Warranted by | Description | @@ -139,6 +145,7 @@ This document records only the terms actually used by the pre-release implementa --- +(prerelease-external-vocabularies-used)= ## External Vocabularies Used Both implementations use the following external vocabularies alongside TROV: @@ -153,6 +160,7 @@ The `schema:` terms appear on the TRO root node and on the TRS node. --- +(term-changes-in-trov-01)= ## Term Changes in TROV 0.1 | Pre-release | TROV 0.1 | @@ -165,6 +173,7 @@ The `schema:` terms appear on the TRO root node and on the TRS node. --- +(migration-to-trov-01)= ## Migration to TROV 0.1 Based on the current 0.1 draft, updating a pre-release TRO declaration to conform to TROV 0.1 involves the following changes. These steps may be revised as 0.1 is finalized. diff --git a/docs/trov-versioning-and-roadmap.md b/docs/trov-versioning-and-roadmap.md index 4987ee5..a1956df 100644 --- a/docs/trov-versioning-and-roadmap.md +++ b/docs/trov-versioning-and-roadmap.md @@ -21,6 +21,7 @@ For the vocabulary terms themselves, see the [TROV Vocabulary Reference](trov-vo --- +(design-goals)= ## Design Goals 1. **Grounded in practice.** TROV addresses transparency problems faced by real organizations, describes capabilities systems can feasibly implement, and answers verification questions reviewers commonly ask. @@ -35,6 +36,7 @@ For the vocabulary terms themselves, see the [TROV Vocabulary Reference](trov-vo --- +(version-numbering)= ## Version Numbering TROV uses **semantic versioning** with different stability guarantees before and after 1.0. @@ -113,6 +115,7 @@ trov/2# → 1.x frozen, new namespace, living docs describe latest 2.x --- +(what-constitutes-a-version)= ## What Constitutes a Version ### TROV 0.1 (Current Draft — Stabilization Target) @@ -163,6 +166,7 @@ trov/2# → 1.x frozen, new namespace, living docs describe latest 2.x --- +(extensibility-adopter-specific-terms)= ## Extensibility: Adopter-Specific Terms Adopters will need to include metadata specific to their systems — container runtime details, internal system versions, resource utilization metrics, and similar operational data. For a step-by-step walkthrough, see the [TROV Extension Guide](trov-extension-guide.md). @@ -198,6 +202,7 @@ Adopters will need to include metadata specific to their systems — container r --- +(breaking-vs-non-breaking-changes)= ## Breaking vs Non-Breaking Changes The following table describes how changes are classified **after 1.0** (pre-1.0, any of these changes may occur in any 0.x release). @@ -217,6 +222,7 @@ The following table describes how changes are classified **after 1.0** (pre-1.0, --- +(repository-structure-and-namespace-resolution)= ## Repository Structure and Namespace Resolution ### The trov repository @@ -318,6 +324,7 @@ transparency-certified.github.io/trov/ --- +(roadmap)= ## Roadmap ### Phase 1: Stabilize 0.1 (Now — June 2026) diff --git a/docs/trov-vocabulary.md b/docs/trov-vocabulary.md index fb171ce..f7113e2 100644 --- a/docs/trov-vocabulary.md +++ b/docs/trov-vocabulary.md @@ -18,12 +18,13 @@ Term reference for the Transparent Research Object Vocabulary (TROV). Lists ever | [Cardinality Constraints](#cardinality-constraints) | Required vs optional properties and their multiplicities | | [External Vocabularies](#external-vocabularies-used) | RDF, RDFS, and schema.org terms used alongside TROV | | [Known Limitations](#known-limitations) | Scope boundaries and open questions for 0.1 | -| [Notes](#notes) | JSON-LD context, design rationale, relationship to pre-release | +| [Notes](#trov-vocab-notes) | JSON-LD context, design rationale, relationship to pre-release | For the conceptual background motivating this vocabulary, see the [TRACE Conceptual Model](conceptual-model.md). For the JSON-LD format used to express TRO declarations, see [TRO Declaration Format](tro-declaration-format.md). For the design rationale behind the JSON-LD format, see [TRO Declaration Design](tro-declaration-design.md). --- +(vocabulary-metadata)= ## Vocabulary Metadata | | | @@ -37,6 +38,7 @@ For the conceptual background motivating this vocabulary, see the [TRACE Concept --- +(classes)= ## Classes ### Core Entities @@ -86,6 +88,7 @@ TRO attribute --- +(properties)= ## Properties ### TRO Properties @@ -148,6 +151,7 @@ TRO attribute --- +(predefined-values-trs-capability-types)= ## Predefined Values: TRS Capability Types The following capability types are currently defined. This list is designed to grow as new transparency conditions are identified by the research community. @@ -181,6 +185,7 @@ The following capability types are currently defined. This list is designed to g --- +(predefined-values-trp-attribute-types)= ## Predefined Values: TRP Attribute Types | Individual | Parent | Warranted By | Description | @@ -190,6 +195,7 @@ The following capability types are currently defined. This list is designed to g --- +(predefined-values-tro-attribute-types)= ## Predefined Values: TRO Attribute Types | Individual | Parent | Warranted By | Description | @@ -198,6 +204,7 @@ The following capability types are currently defined. This list is designed to g --- +(cardinality-constraints)= ## Cardinality Constraints The following constraints are defined in SHACL shapes and enforced during validation. @@ -223,6 +230,7 @@ The following constraints are defined in SHACL shapes and enforced during valida --- +(external-vocabularies-used)= ## External Vocabularies Used TROV uses terms from the following external vocabularies: @@ -235,6 +243,7 @@ TROV uses terms from the following external vocabularies: --- +(known-limitations)= ## Known Limitations | Limitation | Disposition | @@ -248,6 +257,7 @@ TROV uses terms from the following external vocabularies: --- +(trov-vocab-notes)= ## Notes **Namespace.** The 0.1 namespace is `https://w3id.org/trace/trov/0.1#`; the pre-release namespace `https://w3id.org/trace/2023/05/trov#` is retired. See [Pre-Release Vocabulary Reference](trov-prerelease.md) for migration guidance. diff --git a/myst.yml b/myst.yml index d003bcd..d4be736 100644 --- a/myst.yml +++ b/myst.yml @@ -73,5 +73,3 @@ site: favicon: favicon.ico folders: true template: book-theme - -myst_heading_anchors: 2 From 72fda3094ce13f6e1f05fb8476edb934331be32f Mon Sep 17 00:00:00 2001 From: "Kacper Kowalik (Xarthisius)" Date: Sun, 29 Mar 2026 09:37:41 -0500 Subject: [PATCH 3/6] Use upstream gha to deploy --- .github/workflows/deploy.yml | 60 ++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 9a8f631..1308a3b 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -1,26 +1,48 @@ -name: deploy-docs +# This file was created automatically with `jupyter-book init --gh-pages` 🪄 💚 +# Ensure your GitHub Pages settings for this repository are set to deploy with **GitHub Actions**. -# Only run this when the master branch changes +name: Jupyter Book (via myst) GitHub Pages Deploy on: push: - branches: - - main + # Runs on pushes targeting the default branch + branches: [main] +env: + # `BASE_URL` determines, relative to the root of the domain, the URL that your site is served from. + # E.g., if your site lives at `https://mydomain.org/myproject`, set `BASE_URL=/myproject`. + # If, instead, your site lives at the root of the domain, at `https://mydomain.org`, set `BASE_URL=''`. + BASE_URL: /${{ github.event.repository.name }} -# Build the docs and publish to gh-pages +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: 'pages' + cancel-in-progress: false jobs: - deploy-book: + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Build the jupyter-book - uses: addnab/docker-run-action@v3 - with: - image: craigwillis/jupyter-book:latest - options: -v ${{ github.workspace }}:/src - run: jupyter-book build --all . - # Push the book's HTML to github-pages - - name: GitHub Pages action - uses: peaceiris/actions-gh-pages@v3.6.1 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: ./_build/html + - uses: actions/checkout@v4 + - name: Setup Pages + uses: actions/configure-pages@v3 + - uses: actions/setup-node@v4 + with: + node-version: 18.x + - name: Install Jupyter Book (via myst) + run: npm install -g jupyter-book + - name: Build HTML Assets + run: jupyter-book build --html + - name: Upload artifact + uses: actions/upload-pages-artifact@v3 + with: + path: './_build/html' + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 From 799cf3a298c99e9a5f4c1777815f903aa72e5e14 Mon Sep 17 00:00:00 2001 From: "Kacper Kowalik (Xarthisius)" Date: Sun, 29 Mar 2026 09:38:37 -0500 Subject: [PATCH 4/6] Add build validation as test. Fixes #28 --- .github/workflows/build.yml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/workflows/build.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..4613e68 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,30 @@ +name: Jupyter Book Build Check + +on: + push: + branches-ignore: [main] + pull_request: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: 18.x + + - name: Install Jupyter Book (via myst) + run: npm install -g jupyter-book + + - name: Build and check for warnings + run: | + jupyter-book build --site --strict 2>&1 | tee build.log + if grep -qE '^⚠️' build.log; then + echo "::error::Jupyter Book build produced warnings. See log for details." + grep -E '^⚠️' build.log | while IFS= read -r line; do + echo "::warning::$line" + done + exit 1 + fi From 59d0ff6a13389b87cdb86fc6b9743444d58a9903 Mon Sep 17 00:00:00 2001 From: "Kacper Kowalik (Xarthisius)" Date: Sun, 29 Mar 2026 09:41:23 -0500 Subject: [PATCH 5/6] Errors during build should also be fatal, d'oh! --- .github/workflows/build.yml | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4613e68..fd90e40 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -18,13 +18,22 @@ jobs: - name: Install Jupyter Book (via myst) run: npm install -g jupyter-book - - name: Build and check for warnings + - name: Build and check for warnings or errors run: | - jupyter-book build --site --strict 2>&1 | tee build.log + jupyter-book build --site --check-links --strict 2>&1 | tee build.log + FAILED=0 if grep -qE '^⚠️' build.log; then echo "::error::Jupyter Book build produced warnings. See log for details." grep -E '^⚠️' build.log | while IFS= read -r line; do echo "::warning::$line" done - exit 1 + FAILED=1 fi + if grep -qE '^⛔️' build.log; then + echo "::error::Jupyter Book build produced errors. See log for details." + grep -E '^⛔️' build.log | while IFS= read -r line; do + echo "::error::$line" + done + FAILED=1 + fi + exit $FAILED From 3def1d124fa243424efc98ec197b33527adad189 Mon Sep 17 00:00:00 2001 From: "Kacper Kowalik (Xarthisius)" Date: Sun, 29 Mar 2026 09:53:38 -0500 Subject: [PATCH 6/6] Fix broken links --- docs/case-profiles/bplim.md | 2 +- docs/case-profiles/twitter.md | 2 +- docs/examples.md | 2 +- docs/infrastructure.md | 2 -- docs/sample-implementation.md | 4 ++-- docs/trace-prototype.md | 12 +++++++++--- docs/wholetale-integration.md | 2 -- 7 files changed, 14 insertions(+), 12 deletions(-) delete mode 100644 docs/wholetale-integration.md diff --git a/docs/case-profiles/bplim.md b/docs/case-profiles/bplim.md index ad99bbd..0bf1276 100644 --- a/docs/case-profiles/bplim.md +++ b/docs/case-profiles/bplim.md @@ -5,7 +5,7 @@ The [Banco de Portugal Microdata Research Laboratory](https://bplim.bportugal.pt/content/access-0) (BPLIM) provides access to datasets about the Portuguese economy. The following summary is based on the BPLIM [Guide for -Researchers](https://msites-dee-bplim-prd.azurewebsites.net/sites/default/files/guide_for_researchers_v202210.pdf) and [Guimarães (2023)](https://doi.org/10.1162/99608f92.54a00239). +Researchers](https://bplim.github.io/Manuals/Guides/01_Guide_for_Researchers/) and [Guimarães (2023)](https://doi.org/10.1162/99608f92.54a00239). ## Data access diff --git a/docs/case-profiles/twitter.md b/docs/case-profiles/twitter.md index b096ad0..1677ba0 100644 --- a/docs/case-profiles/twitter.md +++ b/docs/case-profiles/twitter.md @@ -4,7 +4,7 @@ Twitter provides access to data for [Academic Research](https://developer.twitter.com/en/products/twitter-api/academic-research). The Twitter [Developer Agreement and -Policy](https://developer.twitter.com/en/developer-terms/agreement-and-policy) +Policy](https://web.archive.org/web/20220307191453/https://developer.twitter.com/en/developer-terms/agreement-and-policy) includes the following content redistribution policy: > If you provide Twitter Content to third parties, including downloadable diff --git a/docs/examples.md b/docs/examples.md index fb8362a..0248c74 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -102,7 +102,7 @@ Economic Literature. [Paper](https://www.aeaweb.org/articles?id=10.1257/jel.20211623) * This paper uses [IPUMS USA](#caseprofile-ipums) data accessed via API. -* An [unofficial Github](https://github.com/AEADataEditor/JEL-2021-162) is +* An [unofficial Github](https://github.com/AEADataEditor/JEL-2021-1623) is available to demonstrate how the API is used to obtain extracts. A private repo provided by the authors has a copy of the extracted data. diff --git a/docs/infrastructure.md b/docs/infrastructure.md index 1b80938..a8eb548 100644 --- a/docs/infrastructure.md +++ b/docs/infrastructure.md @@ -9,7 +9,5 @@ development of a proof-of-concept plugin for a SLURM cluster. Infrastructure elements include: * [TRACE Toolkit](trace-toolkit): A set of reusable programs and libraries implementing the TRACE specification. -* [Whole Tale Integration](wt-integration): Application of the trace-toolkit to - TRACE-enable the Whole Tale platform. * [SLURM Plugin](slurm-plugin): Application of the trace-toolkit to TRACE-enable a SLURM cluster. diff --git a/docs/sample-implementation.md b/docs/sample-implementation.md index 6f0a2b6..4a76bf8 100644 --- a/docs/sample-implementation.md +++ b/docs/sample-implementation.md @@ -6,7 +6,7 @@ This document illustrates the "full package". It provides implementable examples This guide will help you set up a TRACE server and infrastructure. This involves: -- Having a way to sign the TROs (Trace Record Objects) that are generated by the TRACE server. TRACE allows for GPG and X.509. See [Signing](./signing.md) for more information. +- Having a way to sign the TROs (Trace Record Objects) that are generated by the TRACE server. TRACE allows for GPG and X.509. See [Signing](./dev/signing.md) for more information. - Having a way to display the TRS (Trace Record Server) capabilities and the TROs that are generated by the TRACE server, via a **web server**. - Reference tools are implemented using Python, but could be implemented in other languages. @@ -528,7 +528,7 @@ First, create the Zenodo metadata for the deposit as `zenodo.json`: } ``` -After [obtaining an API key](https://developers.zenodo.org/), we can use the provided [zenodo_upload.py](zenodo_upload.py) to create a draft deposit. Note that the script uses `sandbox.zenodo.org` by default: +After [obtaining an API key](https://developers.zenodo.org/), we can use the provided [zenodo_upload.py](./dev/zenodo_upload.py) to create a draft deposit. Note that the script uses `sandbox.zenodo.org` by default: ```bash export API_KEY=your_zenodo_api_key diff --git a/docs/trace-prototype.md b/docs/trace-prototype.md index 052b680..43086ec 100644 --- a/docs/trace-prototype.md +++ b/docs/trace-prototype.md @@ -1,17 +1,23 @@ (trace-prototype)= # TRACE Prototype +:::{attention} + +`server.trace-poc.xyz` has been discontinued as of November 2024. It has been superseeded by https://sivacor.org/ +Following text is retained for archival purposes. + +::: The TRACE Prototype has been developed as an end-to-end implementation to facilitate requirements discussions. It consists of the following: -* https://server.trace-poc.xyz: Running instance of the TRACE prototype server +* `https://server.trace-poc.xyz`: Running instance of the TRACE prototype server * [trace-prototype](https://github.com/transparency-certified/trace-prototype): contains source code for a proof-of-concept TRACE Server and associated commandline interface. * [sample-trace-workflow](https://github.com/transparency-certified/sample-trace-workflow/): Example workflow to demonstrate command-line integration including GitHub actions. -## server.trace-poc.xyz +## `server.trace-poc.xyz` This is a running instance of the [trace-prototype](https://github.com/transparency-certified/trace-prototype) server @@ -46,7 +52,7 @@ $ trace-poc submit --entrypoint "run.sh" --container-user rstudio \ --target-repo-dir "/home/rstudio" --trace-server https://server.trace-poc.xyz . ``` -This will create a new entry on the server at https://server.trace-poc.xyz/. +This will create a new entry on the server at `https://server.trace-poc.xyz/`. ## Inspecting the TRO diff --git a/docs/wholetale-integration.md b/docs/wholetale-integration.md deleted file mode 100644 index 934d759..0000000 --- a/docs/wholetale-integration.md +++ /dev/null @@ -1,2 +0,0 @@ -(wt-integration)= -# Whole Tale Integration