From 074bb6ef70cf8c6861db56c30fe7528db8416b54 Mon Sep 17 00:00:00 2001 From: Sunnyday Technologies Date: Thu, 4 Jun 2026 20:24:42 -0500 Subject: [PATCH] schema v1.7: aggregate conditioning + fidelity/units repairs Additive, backward-compatible schema update (v1.6 datasets remain valid), prepared to support the working-group review. New columns (water accounting): - aggregate_moisture_state, aggregate_absorption_pct, aggregate_moisture_content_pct (ASTM C127/C128, C566) -- recover effective (free) mix water when aggregates are batched off SSD; free moisture = total moisture - absorption (not a w/c, w/b duplicate). - aggregate_prewetted -- process flag for the common pre-wetting-to-damp practice. Tooling / fidelity repairs: - units.py: imperial-tonnage factors (lb_yd3, short_ton, long_ton, metric_ton); a bare "ton"/"t" is rejected as ambiguous (~12% short-vs-long spread). +5 tests. - fidelity.py: field_coverage no longer penalizes relational foreign keys/IDs (still preserved in the sidecar). - crosswalk: completed the test-method enum (four_point_bending -> ASTM_C78, ...). - Measured fidelity on a relational template: 78.7 -> 87.4 (B); UCI 96.7 (A) unchanged. Release hygiene: - v1.6 -> v1.7 across schema.md, sql, CHANGELOG [1.7.0], crosswalk, ingest tool (pyproject + __init__), landing, schema-reference, llms.txt, README, AGENTS, intake, examples, .well-known/mcp-manifest.json (was stale at v1.5), .zenodo.json (version added). - Term justification regenerated: 241 terms / 25 sections (coverage gate passes). - examples/ refreshed against v1.7 (UCI reproduce-and-diff byte-identical). - New scripts/check_version.py + version-consistency CI: one canonical version list that fails the build if any surface drifts (would have caught the stale v1.5 manifest). - check_examples.py / check_version.py made Windows-console safe. 18 ingest tests pass; examples, version, and term-coverage checks green. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/version-consistency.yml | 53 +++++++++++ .well-known/mcp-manifest.json | 2 +- .zenodo.json | 1 + AGENTS.md | 2 +- CHANGELOG.md | 30 +++++++ Open3DCP_SCHEMA.md | 24 ++++- Open3DCP_TERM_JUSTIFICATION.md | 13 ++- README.md | 4 +- crosswalk/open3dcp_to_relational.yaml | 10 ++- examples/index.html | 2 +- examples/rilem-tc304-ils-mech/index.html | 2 +- examples/uci-yeh-1998/index.html | 2 +- .../uci-yeh-1998/uci-yeh-1998.fidelity.json | 2 +- .../uci-yeh-1998/uci-yeh-1998.fidelity.md | 2 +- index.html | 6 +- intake/index.html | 2 +- llms.txt | 4 +- schema-reference/index.html | 6 +- scripts/check_examples.py | 5 ++ scripts/check_version.py | 87 +++++++++++++++++++ sql/create_tables.sql | 11 +++ tools/ingest/README.md | 4 +- tools/ingest/open3dcp_ingest/__init__.py | 4 +- tools/ingest/open3dcp_ingest/fidelity.py | 23 +++-- tools/ingest/open3dcp_ingest/units.py | 19 +++- tools/ingest/pyproject.toml | 4 +- tools/ingest/tests/test_units.py | 25 ++++++ 27 files changed, 311 insertions(+), 38 deletions(-) create mode 100644 .github/workflows/version-consistency.yml create mode 100644 scripts/check_version.py diff --git a/.github/workflows/version-consistency.yml b/.github/workflows/version-consistency.yml new file mode 100644 index 0000000..f07a397 --- /dev/null +++ b/.github/workflows/version-consistency.yml @@ -0,0 +1,53 @@ +name: Version consistency + +# Guards that every place stating the CURRENT schema version matches the top CHANGELOG.md +# entry. The canonical list of version-bearing files lives in scripts/check_version.py — a +# bump means editing those files, and this check fails the build if any one drifts. +on: + push: + paths: + - CHANGELOG.md + - Open3DCP_SCHEMA.md + - crosswalk/** + - tools/ingest/** + - index.html + - schema-reference/** + - llms.txt + - README.md + - AGENTS.md + - intake/** + - .well-known/** + - .zenodo.json + - examples/** + - scripts/check_version.py + pull_request: + paths: + - CHANGELOG.md + - Open3DCP_SCHEMA.md + - crosswalk/** + - tools/ingest/** + - index.html + - schema-reference/** + - llms.txt + - README.md + - AGENTS.md + - intake/** + - .well-known/** + - .zenodo.json + - examples/** + - scripts/check_version.py + workflow_dispatch: + +permissions: + contents: read + +jobs: + version: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Check version-label consistency + run: python scripts/check_version.py diff --git a/.well-known/mcp-manifest.json b/.well-known/mcp-manifest.json index 72d1efc..4d4ec7d 100644 --- a/.well-known/mcp-manifest.json +++ b/.well-known/mcp-manifest.json @@ -2,7 +2,7 @@ "schema_version": "0.1-draft", "_note": "Experimental site-level Model Context Protocol manifest. Schema is a community draft (see https://modelcontextprotocol.io). This site is a static, public, read-only documentation surface — listed resources are GET-only. Open3DCP is a schema definition project; it does not host mix-design data.", "name": "Open3DCP", - "description": "Open data standard / flat schema for 3D-printable concrete (3DCP) mix design and test records. Current public schema version: v1.5. Defines column names, units, types, and engineering context covering binders, aggregates, fibers, admixtures, fresh-state rheology, hardened mechanical properties, durability, and 3DCP process parameters.", + "description": "Open data standard / flat schema for 3D-printable concrete (3DCP) mix design and test records. Current public schema version: v1.7. Defines column names, units, types, and engineering context covering binders, aggregates, fibers, admixtures, fresh-state rheology, hardened mechanical properties, durability, and 3DCP process parameters.", "publisher": { "name": "Sunnyday Technologies LLC", "url": "https://sunn3d.com/" diff --git a/.zenodo.json b/.zenodo.json index 75ab48a..a8337ba 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -1,5 +1,6 @@ { "title": "Open3DCP: Open Data Standard for 3D Concrete Printing", + "version": "1.7.0", "upload_type": "software", "creators": [ { diff --git a/AGENTS.md b/AGENTS.md index e4fcf99..5bc28fe 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -16,7 +16,7 @@ Open3DCP is a **schema specification**: column names, types, units, and engineering context for binders, alkali activators, aggregates, fibers, admixtures, pigments, fresh-state rheology, hardened mechanical properties, durability indicators, 3DCP process parameters, and -interlayer bond. v1.6 defines the current public column vocabulary. +interlayer bond. v1.7 defines the current public column vocabulary. kg/m³-primary basis (mass-% derivable). Flat and analysis/ML-oriented. Open3DCP is **not**: diff --git a/CHANGELOG.md b/CHANGELOG.md index 2373157..54b05ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,36 @@ Schema versioning follows these rules: --- +## [1.7.0] - 2026-06-04 + +Additive, backward-compatible changes. Existing v1.6 datasets remain valid unchanged. + +### Added — aggregate conditioning (water accounting) +- `aggregate_moisture_state` -- as-batched aggregate condition: `oven_dry` | `air_dry` | `SSD` | `wet`. +- `aggregate_absorption_pct` -- 24-h aggregate absorption, % of oven-dry mass (ASTM C127/C128). +- `aggregate_moisture_content_pct` -- total as-batched aggregate moisture, % of oven-dry mass (ASTM C566). + Free moisture = `aggregate_moisture_content_pct` − `aggregate_absorption_pct`, so the effective + (free) mix water is recoverable when aggregates are batched off the SSD reference. The SSD-basis + `water` column plus these three make water accounting unambiguous without duplicating w/c, w/b. +- `aggregate_prewetted` -- process flag for pre-wetting aggregate to a damp condition before + batching (a common 3DCP practice). + +### Fixed — ingestion fidelity & crosswalk +- Fidelity `field_coverage` no longer penalizes relational foreign keys / identifiers (a flat row + carries none); they are excluded from the coverage denominator and still preserved in the + triage sidecar. +- Crosswalk test-method map completed (e.g. `four_point_bending` -> `ASTM_C78`), so standard test + methods canonicalize instead of passing through unmapped. + +### Fixed — unit converter (ingestion tool) +- Added imperial-tonnage factors: `lb_yd3` (US batch-ticket concentration unit), US `short_ton`, + UK `long_ton`, plus explicit `metric_ton`/`tonne`; a bare "ton"/"t" is now rejected as + ambiguous (short vs long ton differ by ~12%). + +### Notes +- Ingestion-tool MAJOR.MINOR bumped to 1.7 to track the schema (`TARGET_SCHEMA_VERSION`). +- Canonical column list remains `Open3DCP_SCHEMA.md` / `sql/create_tables.sql`. + ## [1.6.0] - 2026-06-03 ### Interoperability — basis, uncertainty, raw-data references diff --git a/Open3DCP_SCHEMA.md b/Open3DCP_SCHEMA.md index 1a92193..a2735ef 100644 --- a/Open3DCP_SCHEMA.md +++ b/Open3DCP_SCHEMA.md @@ -1,7 +1,9 @@ -# Open3DCP v1.6 +# Open3DCP v1.7 **Open Data Standard for 3D Concrete Printing** +> **v1.7 (2026-06-04):** **Aggregate-conditioning columns added** so effective (free) mix water is recoverable when aggregates are batched off SSD: `aggregate_moisture_state`, `aggregate_absorption_pct`, `aggregate_moisture_content_pct` (ASTM C127/C128, C566), plus a process flag `aggregate_prewetted` for the common practice of pre-wetting aggregate to a damp condition. Tooling/fidelity fixes: imperial-tonnage units (`lb_yd3`, US short ton, UK long ton) added and a bare "ton" rejected as ambiguous; ingestion fidelity refined so relational foreign keys no longer count against coverage; test-method crosswalk completed. Backward-compatible (additive); v1.6 datasets remain valid. +> > **v1.6 (2026-06-03):** **kg/m³ adopted as the primary reporting basis** (industry/field standard); mass-% retained as a derived secondary representation. New columns: `original_basis`, `mix_density_kg_m3`, `total_binder_kg_m3` (lossless basis conversion); `compressive_strength_stddev_mpa`, `flexural_strength_stddev_mpa`, `tensile_strength_stddev_mpa`, `elastic_modulus_stddev_gpa`, `interlayer_bond_stddev_mpa` (per-measurement uncertainty); `raw_data_doi`, `stress_strain_file`, `rheology_curve_file`, `microstructure_image`, `raw_data_file` (raw-data references). Backward-compatible (additive). Improves interoperability and ingestion fidelity for relational concrete datasets. > > **v1.5 (2026-04-16):** Pigment columns: `iron_oxide_pigment`, `titanium_dioxide_pigment`, `chromium_oxide_pigment`, `carbon_black_pigment`, `pigment_other`. Pigments are ultra-fine (~1 um), used at 1-5% in architectural 3DCP, with significant impact on packing, water demand, and microstructure. The canonical column list below is the source of truth for the public v1.5 schema. @@ -197,13 +199,28 @@ Specialized rheology modifiers for 3DCP thixotropy and shape retention. | Column | Type | Description | |--------|------|-------------| -| `water` | real | Total mix water (mass-% of total wet mix) | +| `water` | real | Free (added) mix water, aggregates at SSD basis (mass-% of total wet mix). See *Aggregate Conditioning* to recover effective water when batched off SSD. | | `w_c_ratio` | real | Water-to-cement ratio (water / cement only) | | `w_b_ratio` | real | Water-to-binder ratio (water / all cementitious materials) | | `a_b_ratio` | real | Aggregate-to-binder ratio | | `water_premix_pct` | real | % of water added during pre-mix phase | | `water_temperature_c` | real | Water temperature at mixing (C) | +### Aggregate Conditioning (v1.7) + +As-batched aggregate moisture relative to the **SSD** (saturated surface-dry) reference, so the +**effective (free) mix water** can be recovered when aggregates are batched off SSD — a common +3DCP practice (see `aggregate_prewetted` in *Mixing Process*). Recorded at mix level, not per +aggregate fraction. The free water an aggregate contributes (+) or absorbs (−) is +`aggregate_moisture_content_pct − aggregate_absorption_pct`; together with the SSD-basis `water` +column this makes the water accounting unambiguous without duplicating the w/c and w/b ratios. + +| Column | Type | Description | Test Method | +|--------|------|-------------|-------------| +| `aggregate_moisture_state` | varchar | As-batched condition: `oven_dry` / `air_dry` / `SSD` / `wet` | -- | +| `aggregate_absorption_pct` | real | 24-h aggregate absorption, % of oven-dry mass | ASTM C127 / C128 | +| `aggregate_moisture_content_pct` | real | Total as-batched aggregate moisture, % of oven-dry mass (free moisture = this − absorption) | ASTM C566 | + ### Mix Basis (v1.6) kg/m³ is the primary basis; the mass-% composition columns are a derived secondary representation. Record the source's native basis and the mix density so the two convert without any assumption. @@ -293,6 +310,7 @@ These columns capture the full extrusion printing process. Null for cast specime | `mixer_type` | varchar | Mixer type (pan, planetary, twin-shaft, continuous) | -- | | `shear_rate_per_s` | real | Applied shear rate during mixing | 1/s | | `admixture_addition_point` | varchar | When admixtures were added (dry, wet, delayed) | -- | +| `aggregate_prewetted` | boolean | Aggregate pre-wetted / pre-soaked before batching (common 3DCP practice; pairs with *Aggregate Conditioning*) | -- | ### Environmental Conditions @@ -529,5 +547,5 @@ If you use Open3DCP in your research, please cite: --- -*Open3DCP v1.6 -- Last updated: 2026-06-03* +*Open3DCP v1.7 -- Last updated: 2026-06-04* *Maintained by [Sunnyday Technologies](https://sunn3d.com), Wisconsin, USA* diff --git a/Open3DCP_TERM_JUSTIFICATION.md b/Open3DCP_TERM_JUSTIFICATION.md index 94cdf99..fbe5fa9 100644 --- a/Open3DCP_TERM_JUSTIFICATION.md +++ b/Open3DCP_TERM_JUSTIFICATION.md @@ -13,7 +13,7 @@ normalize to that canonical term. Per term, the table gives its governing standa where one exists — its relational-schema crosswalk. 3DCP-only terms are justified against RILEM TC 276-DFC / TC 304-ADC. -**Coverage:** 237 canonical `mix_designs` terms, grouped into 24 sections. +**Coverage:** 241 canonical `mix_designs` terms, grouped into 25 sections. --- @@ -145,7 +145,7 @@ RILEM TC 276-DFC / TC 304-ADC. | Term | Type | Definition | Standard | Justification & crosswalk | |---|---|---|---|---| -| `water` | real | Total mix water (mass-% of total wet mix) | — | Open3DCP-specific. | +| `water` | real | Free (added) mix water, aggregates at SSD basis (mass-% of total wet mix). See *Aggregate Conditioning* to recover effective water when batched off SSD. | — | Open3DCP-specific. | ## KEY RATIOS @@ -156,6 +156,14 @@ RILEM TC 276-DFC / TC 304-ADC. | `a_b_ratio` | real | Aggregate-to-binder ratio | — | Open3DCP-specific. | | `water_premix_pct` | real | % of water added during pre-mix phase | — | Open3DCP-specific. | | `water_temperature_c` | real | Water temperature at mixing (C) | — | Open3DCP-specific. | + +## AGGREGATE CONDITIONING + +| Term | Type | Definition | Standard | Justification & crosswalk | +|---|---|---|---|---| +| `aggregate_moisture_state` | varchar(20) | As-batched condition: `oven_dry` / `air_dry` / `SSD` / `wet` | — | Open3DCP-specific. | +| `aggregate_absorption_pct` | real | 24-h aggregate absorption, % of oven-dry mass | ASTM C127 / C128 | Open3DCP-specific. | +| `aggregate_moisture_content_pct` | real | Total as-batched aggregate moisture, % of oven-dry mass (free moisture = this − absorption) | ASTM C566 | Open3DCP-specific. | | `original_basis` | varchar(20) | Basis the source reported: `kg_m3` (primary), `mass_pct`, `volume`, or `lb_yd3` | — | Open3DCP-specific. | | `mix_density_kg_m3` | real | Total fresh wet-mix density (sum of kg/m³ constituents); enables exact mass-% ↔ kg/m³ conversion | — | Open3DCP-specific. | | `total_binder_kg_m3` | real | Total cementitious content (kg/m³); supports w/b and absolute back-conversion | — | Open3DCP-specific. | @@ -220,6 +228,7 @@ RILEM TC 276-DFC / TC 304-ADC. | `mixer_type` | varchar(50) | Mixer type (pan, planetary, twin-shaft, continuous) | — | Open3DCP-specific. | | `shear_rate_per_s` | real | Applied shear rate during mixing | — | Open3DCP-specific. | | `admixture_addition_point` | varchar(50) | When admixtures were added (dry, wet, delayed) | — | Open3DCP-specific. | +| `aggregate_prewetted` | boolean | Aggregate pre-wetted / pre-soaked before batching (common 3DCP practice; pairs with *Aggregate Conditioning*) | — | Open3DCP-specific. | ## ENVIRONMENTAL CONDITIONS diff --git a/README.md b/README.md index 584f178..7fb7a02 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,7 @@ A typical 3DCP mix is 55-65% sand by total mass with little or no coarse aggrega ### Fibers -- The Reinforcement -Without formwork, printed concrete has no external confinement. Fibers provide ductility, crack control, and post-crack load carrying capacity. Open3DCP v1.6 tracks eight core fiber families by material, plus a cellulose compatibility column and industry-standard fiber characterization: +Without formwork, printed concrete has no external confinement. Fibers provide ductility, crack control, and post-crack load carrying capacity. Open3DCP v1.7 tracks eight core fiber families by material, plus a cellulose compatibility column and industry-standard fiber characterization: | Column | Material | Typical Use in 3DCP | |--------|----------|---------------------| @@ -321,7 +321,7 @@ See `Open3DCP_SCHEMA.md` for full disclaimer language. --- -## Test-method coverage (current schema v1.6) +## Test-method coverage (current schema v1.7) Open3DCP captures the materials, fresh-state, hardened-mechanical, interlayer, durability, and process data that 3DCP research and inter-laboratory studies routinely report. The table below lists representative test methods that researchers commonly cite when populating each column group; Open3DCP itself is method-neutral and accepts data from any equivalent test. diff --git a/crosswalk/open3dcp_to_relational.yaml b/crosswalk/open3dcp_to_relational.yaml index ffd27f8..aa66c23 100644 --- a/crosswalk/open3dcp_to_relational.yaml +++ b/crosswalk/open3dcp_to_relational.yaml @@ -14,7 +14,7 @@ # none - source field has no flat Open3DCP home -> triage sidecar meta: name: open3dcp_to_relational - open3dcp_version: "1.6" # MUST match the schema version in Open3DCP_SCHEMA.md / CHANGELOG.md + open3dcp_version: "1.7" # MUST match the schema version in Open3DCP_SCHEMA.md / CHANGELOG.md source_schema: "relational concrete database (15-tab spreadsheet template)" direction: bidirectional_where_possible notes: > @@ -107,7 +107,13 @@ mappings: - { open3dcp: curing_temperature_c, src: tests.initial_env_temperature_C, transform: identity, fidelity: exact } - { open3dcp: curing_humidity_pct, src: tests.initial_env_relative_humidity_percent, transform: identity, fidelity: exact } - { open3dcp: test_method_code, src: tests.test_type, transform: enum_map, fidelity: categorical, - notes: "The source test_type list (ASTM/EN designations) maps to Open3DCP free-text test_method_code." } + map: { + compression: ASTM_C39, compressive_strength: ASTM_C39, cube_compression: ASTM_C39, + cylinder_compression: ASTM_C39, three_point_bending: ASTM_C293, + four_point_bending: ASTM_C78, flexure: ASTM_C78, flexural_strength: ASTM_C78, + splitting_tensile: ASTM_C496, brazilian: ASTM_C496, direct_tension: ASTM_C307, + elastic_modulus: ASTM_C469, pull_off: ASTM_C1583, interlayer_bond: ASTM_C1583 }, + notes: "Source test_type members canonicalize to Open3DCP standard test codes (US/ASTM by default; four-point bending = third-point loading = ASTM C78). Unmapped members pass through and are flagged." } # data -> Open3DCP measured properties (value-keyed by quantity_reported) - { open3dcp: n_specimens, src: data.number_of_specimens, transform: identity, fidelity: exact } diff --git a/examples/index.html b/examples/index.html index 0d27257..1ec8edf 100644 --- a/examples/index.html +++ b/examples/index.html @@ -81,7 +81,7 @@

National map

open3dcp.org · Example entries - Open3DCP · schema v1.6 + Open3DCP · schema v1.7
diff --git a/examples/rilem-tc304-ils-mech/index.html b/examples/rilem-tc304-ils-mech/index.html index 5f277a4..a004eaf 100644 --- a/examples/rilem-tc304-ils-mech/index.html +++ b/examples/rilem-tc304-ils-mech/index.html @@ -163,7 +163,7 @@

4 · How this was built

← all examples · RILEM TC 304-ADC ILS-mech - Open3DCP · schema v1.6 + Open3DCP · schema v1.7
diff --git a/examples/uci-yeh-1998/index.html b/examples/uci-yeh-1998/index.html index d998e8d..9e0f022 100644 --- a/examples/uci-yeh-1998/index.html +++ b/examples/uci-yeh-1998/index.html @@ -177,7 +177,7 @@

4 · How this was built

← all examples · UCI Concrete Compressive Strength - Open3DCP · schema v1.6 + Open3DCP · schema v1.7
diff --git a/examples/uci-yeh-1998/uci-yeh-1998.fidelity.json b/examples/uci-yeh-1998/uci-yeh-1998.fidelity.json index 02f14a7..a94937f 100644 --- a/examples/uci-yeh-1998/uci-yeh-1998.fidelity.json +++ b/examples/uci-yeh-1998/uci-yeh-1998.fidelity.json @@ -9,7 +9,7 @@ "name": "field_coverage", "score": 100.0, "weight": 0.3, - "detail": "126 of 126 populated source fields mapped to Open3DCP columns (0 routed to triage sidecar).", + "detail": "126 of 126 mappable source fields mapped to Open3DCP columns (0 routed to triage sidecar).", "not_preserved_examples": [], "triage": "Sidecar fields are preserved in .unmapped.jsonl; review for schema extension." }, diff --git a/examples/uci-yeh-1998/uci-yeh-1998.fidelity.md b/examples/uci-yeh-1998/uci-yeh-1998.fidelity.md index 93e010b..3890c46 100644 --- a/examples/uci-yeh-1998/uci-yeh-1998.fidelity.md +++ b/examples/uci-yeh-1998/uci-yeh-1998.fidelity.md @@ -8,7 +8,7 @@ | Dimension | Score | Weight | Detail | |---|---:|---:|---| -| field_coverage | 100 | 0.30 | 126 of 126 populated source fields mapped to Open3DCP columns (0 routed to triage sidecar). | +| field_coverage | 100 | 0.30 | 126 of 126 mappable source fields mapped to Open3DCP columns (0 routed to triage sidecar). | | value_fidelity | 89 | 0.30 | 126 values written; 125 exact, 1 required an assumption. | | relational_integrity | 100 | 0.15 | 0 relational fields (reinforcement, geometry parametrization, devices, loading histories) had no flat home. | | file_data_capture | 100 | 0.15 | 0 curve/table/image/raw-file references cannot be held by the flat schema (pre-v1.6). | diff --git a/index.html b/index.html index b23cced..c6ff4b3 100644 --- a/index.html +++ b/index.html @@ -635,7 +635,7 @@ "https://doi.org/10.5281/zenodo.19647471" ], "identifier": "10.5281/zenodo.19647471", - "version": "1.6", + "version": "1.7", "license": "https://www.apache.org/licenses/LICENSE-2.0", "creator": { "@id": "https://sunn3d.com/#organization" @@ -851,7 +851,7 @@
-
Open Schema // 3D Concrete Printing // v1.6
+
Open Schema // 3D Concrete Printing // v1.7

Data.
Standard.
Open.

Open3DCP is an open schema for 3D-printable concrete mix design and test @@ -875,7 +875,7 @@

Data.
Standard.
Open.

Schema Version
-
v1.6current
+
v1.7current
Material Coverage
diff --git a/intake/index.html b/intake/index.html index b65ce73..0c67ff7 100644 --- a/intake/index.html +++ b/intake/index.html @@ -106,7 +106,7 @@

5Continue on GitHub

Open3DCP · Dataset intake - Sunnyday Technologies · schema v1.6 + Sunnyday Technologies · schema v1.7
diff --git a/llms.txt b/llms.txt index eab21a2..62fff35 100644 --- a/llms.txt +++ b/llms.txt @@ -15,7 +15,7 @@ This file is a high-density, plain-text summary intended for LLM retrieval and R - Maintainer: Sunnyday Technologies LLC (https://sunn3d.com) - Author: Nicholas Sonnentag (nick@sunn3d.com) - Contact: open3dcp@sunn3d.com -- Current public version: v1.6 (2026-06-03); canonical column list in Open3DCP_SCHEMA.md +- Current public version: v1.7 (2026-06-04); canonical column list in Open3DCP_SCHEMA.md ## What it is, what it isn't @@ -98,4 +98,4 @@ Open3DCP is the schema layer; CEMFORGE and M3-CRETE are companion projects rathe ## Status -Active. Public schema version v1.6 released 2026-06-03. New columns are added in minor versions as new test methods or material classes become relevant to 3DCP research. +Active. Public schema version v1.7 released 2026-06-04. New columns are added in minor versions as new test methods or material classes become relevant to 3DCP research. diff --git a/schema-reference/index.html b/schema-reference/index.html index 963aa3d..cefe297 100644 --- a/schema-reference/index.html +++ b/schema-reference/index.html @@ -486,7 +486,7 @@ "https://doi.org/10.5281/zenodo.19647471" ], "identifier": "10.5281/zenodo.19647471", - "version": "1.6", + "version": "1.7", "license": "https://www.apache.org/licenses/LICENSE-2.0", "creator": { "@id": "https://sunn3d.com/#organization" @@ -1074,12 +1074,12 @@

5. A worked example

Process columns capture nozzle, layer, speed, layer time gap, ambient conditions, and mix temperature. Test columns capture the spread, the static yield stress at the time of pumping, the orientation (Z, Y, or CAST), and the test method code.

The corresponding Open3DCP CSV row carries the key context needed to interpret the measurement. A consumer that has never seen this dataset can identify the print orientation, the test method, the lab, and every material composition entry from the row alone when those fields are populated. Where two compressive results come from the same mix but different orientations, they are recorded as separate rows that share a mix_id prefix and differ only in test_orientation_code; an ML pipeline can group or condition on orientation at training time. The lab_name and measurement_confidence fields enable downstream meta-analysis to weight or partition by lab and by data quality.

The same record is easier to extract for ML or statistical analysis because each feature is a column and each row is an observation. This is the central design property of the schema and the reason for the flat structure: no JSON traversal is required before a model or dataframe can consume the data.

-

The schema can be wrapped with dataset-level JSON-LD for deposit alongside Zenodo records. In the current public v1.6 release, the canonical machine-readable surfaces are the Markdown schema reference, the SQL DDL, and the Dataset JSON-LD embedded on this site. A full Open3DCP vocabulary namespace and example-deposit templates should be published only when they are complete enough for downstream users to rely on.

+

The schema can be wrapped with dataset-level JSON-LD for deposit alongside Zenodo records. In the current public v1.7 release, the canonical machine-readable surfaces are the Markdown schema reference, the SQL DDL, and the Dataset JSON-LD embedded on this site. A full Open3DCP vocabulary namespace and example-deposit templates should be published only when they are complete enough for downstream users to rely on.


6. Adopting Open3DCP in your lab

The schema is designed so that adoption does not require changing how your lab runs experiments. It requires only that the results are recorded in a uniform shape on the way out. The recommended workflow is three steps.

Step 1: Map your existing CSV columns to Open3DCP keys. Most laboratories already keep a mix-design spreadsheet with columns for each material, plus a results spreadsheet with columns for the tests. Mapping your column names to Open3DCP column names is usually a one-time step. Common synonyms from the literature normalize to the canonical term — e.g. "GGBFS," "ground granulated blast-furnace slag," and "slag" all map to slag; "C33 sand," "concrete sand," and "natural sand" all map to concrete_sand. Where you store densities and compute mass-percent on the fly, do that conversion once at export time so the deposited file is in the canonical shape.

-

Step 2: Add dataset-level metadata. A complete deposit usually includes the CSV file (one row per measurement), metadata describing authors, license, description, related identifiers, and the schema version reference. A formal JSON-LD context and reusable examples are planned, but should not be treated as public v1.6 contract files until they are published with the schema.

+

Step 2: Add dataset-level metadata. A complete deposit usually includes the CSV file (one row per measurement), metadata describing authors, license, description, related identifiers, and the schema version reference. A formal JSON-LD context and reusable examples are planned, but should not be treated as public v1.7 contract files until they are published with the schema.

Step 3: Validate with tooling that consumes Open3DCP-shaped reports. Where users maintain CAD, machine-log, formulation, or statistics tooling, an Open3DCP record can serve as the shared tabular record that those tools reference. This kind of cross-tool integration is the reason the Open3DCP schema is flat and stable: it lets independent tools share the same record with fewer bespoke adapters.

Step 4: Deposit on Zenodo, 4TU.ResearchData, or your institutional repository. Use a CC BY 4.0 license for the data itself (this is independent of the Apache 2.0 license on the schema). Add the Open3DCP @id reference to the metadata so search engines and citation tools can link your dataset to the schema. Mint a DOI. Cite both Open3DCP (using the Zenodo concept DOI, 10.5281/zenodo.19647471) and any test methods you used in your dataset description.

For a well-organized existing dataset, these steps may be a modest export and mapping exercise. The harder cases are legacy datasets where the experimental record is incomplete, and there the schema is honest: missing columns are simply null, and the measurement_confidence field flags the cells that were reconstructed from narrative prose. Open3DCP does not pretend that legacy data are as good as primary measurements, but it does let you publish them in a comparable shape with the limitations recorded explicitly.

diff --git a/scripts/check_examples.py b/scripts/check_examples.py index a4a4f7d..3bd685c 100644 --- a/scripts/check_examples.py +++ b/scripts/check_examples.py @@ -22,6 +22,11 @@ ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) EX = os.path.join(ROOT, "examples") + +try: # Windows consoles default to cp1252; keep the "·"/"✓" status glyphs from crashing + sys.stdout.reconfigure(encoding="utf-8") +except Exception: + pass REQUIRED = ["id", "dataset", "license", "license_url", "attribution", "classification"] ALLOWED_LICENSES = { "cc by 4.0", "cc0 1.0", "cc0", "us public domain", "nist open license", diff --git a/scripts/check_version.py b/scripts/check_version.py new file mode 100644 index 0000000..a2bf0ac --- /dev/null +++ b/scripts/check_version.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +"""Single-source-of-truth version guard for the Open3DCP schema release. + +The current schema version is read from the top entry of CHANGELOG.md (`## [X.Y.Z]`). +Every other place that must state the *current* version is then verified to match. +This is the canonical list of version-bearing locations: bumping the schema means +editing these files, and running this script confirms none was missed (it would have +caught, e.g., a stale `.well-known/mcp-manifest.json`). + +Historical references (changelog history, "added in vX.Y" notes, SQL section banners, +the examples' internal "(pre-vX.Y)" tool text) are deliberately NOT checked — they are +meant to name the version a feature first appeared in, not the current release. + +Run: python scripts/check_version.py # exit 0 if consistent, 1 otherwise +""" +import os +import re +import sys + +ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +try: # Windows consoles default to cp1252; keep output (e.g. "→") from crashing the run + sys.stdout.reconfigure(encoding="utf-8") +except Exception: + pass + + +def canonical_version(): + """Full (X.Y.Z) and minor (X.Y) version from the first CHANGELOG entry.""" + text = open(os.path.join(ROOT, "CHANGELOG.md"), encoding="utf-8").read() + m = re.search(r"^##\s*\[(\d+)\.(\d+)\.(\d+)\]", text, re.M) + if not m: + print("FAIL: could not read a `## [X.Y.Z]` entry from CHANGELOG.md") + sys.exit(2) + major, minor, patch = m.groups() + return f"{major}.{minor}.{patch}", f"{major}.{minor}" + + +def checks(vv, v): + """(relative_path -> list of substrings that MUST be present) for version vv (X.Y.Z) / v (X.Y).""" + return { + "Open3DCP_SCHEMA.md": [f"# Open3DCP v{v}", f"*Open3DCP v{v} -- Last updated:"], + "crosswalk/open3dcp_to_relational.yaml": [f'open3dcp_version: "{v}"'], + "tools/ingest/pyproject.toml": [f'version = "{vv}"'], + "tools/ingest/open3dcp_ingest/__init__.py": [ + f'TARGET_SCHEMA_VERSION = "{v}"', f'__version__ = "{vv}"'], + "tools/ingest/README.md": [f"schema v{v} → tool"], + "index.html": [f'"version": "{v}"', f"// v{v}
", + f'>v{v}current'], + "schema-reference/index.html": [f'"version": "{v}"', f"current public v{v} release"], + "llms.txt": [f"Current public version: v{v}", f"Public schema version v{v}"], + "README.md": [f"current schema v{v}", f"Open3DCP v{v} tracks"], + "AGENTS.md": [f"v{v} defines the current public column vocabulary"], + "intake/index.html": [f"schema v{v}"], + ".well-known/mcp-manifest.json": [f"Current public schema version: v{v}"], + ".zenodo.json": [f'"version": "{vv}"'], + "examples/index.html": [f"schema v{v}"], + "examples/uci-yeh-1998/index.html": [f"schema v{v}"], + "examples/rilem-tc304-ils-mech/index.html": [f"schema v{v}"], + } + + +def main(): + vv, v = canonical_version() + print(f"canonical version (from CHANGELOG.md): {vv} (minor v{v})") + missing = [] + for rel, needles in checks(vv, v).items(): + path = os.path.join(ROOT, rel) + if not os.path.exists(path): + missing.append(f"{rel}: file not found") + continue + text = open(path, encoding="utf-8").read() + for needle in needles: + if needle not in text: + missing.append(f"{rel}: missing {needle!r}") + if missing: + print(f"\nFAIL — {len(missing)} version label(s) out of sync with v{vv}:") + for m in missing: + print(f" x {m}") + return 1 + print(f"OK — all {sum(len(n) for n in checks(vv, v).values())} version labels " + f"across {len(checks(vv, v))} files are consistent at v{vv}.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/sql/create_tables.sql b/sql/create_tables.sql index 1b889bf..8c0813a 100644 --- a/sql/create_tables.sql +++ b/sql/create_tables.sql @@ -144,6 +144,16 @@ CREATE TABLE IF NOT EXISTS mix_designs ( water_premix_pct REAL, -- % water added during pre-mix water_temperature_c REAL, -- Water temperature at mixing (C) + -- ----------------------------------------- + -- AGGREGATE CONDITIONING + -- (v1.7) As-batched aggregate moisture vs the SSD reference, so effective (free) mix water is + -- recoverable when aggregates are batched off SSD. Recorded at mix level (not per fraction). + -- free moisture = aggregate_moisture_content_pct - aggregate_absorption_pct + -- ----------------------------------------- + aggregate_moisture_state VARCHAR(20), -- As-batched condition: oven_dry | air_dry | SSD | wet + aggregate_absorption_pct REAL, -- 24-h aggregate absorption, % of oven-dry mass — ASTM C127/C128 + aggregate_moisture_content_pct REAL, -- Total as-batched aggregate moisture, % of oven-dry mass — ASTM C566 + -- ----------------------------------------- -- MIX BASIS (v1.6) -- kg/m3 is the PRIMARY reporting basis (industry standard: UCI/Yeh, RILEM, fib). @@ -210,6 +220,7 @@ CREATE TABLE IF NOT EXISTS mix_designs ( mixer_type VARCHAR(50), -- pan | planetary | twin-shaft | continuous shear_rate_per_s REAL, -- Applied shear rate (1/s) admixture_addition_point VARCHAR(50), -- dry | wet | delayed + aggregate_prewetted BOOLEAN, -- Aggregate pre-wetted/pre-soaked before batching (common 3DCP practice) -- ----------------------------------------- -- ENVIRONMENTAL CONDITIONS diff --git a/tools/ingest/README.md b/tools/ingest/README.md index ca70e99..4d3e669 100644 --- a/tools/ingest/README.md +++ b/tools/ingest/README.md @@ -12,8 +12,8 @@ triage sidecar**. Bridges relational concrete databases and the Open3DCP flat sc The ingestion tool and crosswalk are **versioned together with the Open3DCP schema** and must be updated every time the schema changes: -- The package version's **MAJOR.MINOR tracks the schema version** it targets (schema v1.6 → tool - `1.6.x`). Check with `open3dcp-ingest --version`. +- The package version's **MAJOR.MINOR tracks the schema version** it targets (schema v1.7 → tool + `1.7.x`). Check with `open3dcp-ingest --version`. - `TARGET_SCHEMA_VERSION` in [`open3dcp_ingest/__init__.py`](open3dcp_ingest/__init__.py) is the authoritative target; the crosswalk declares `meta.open3dcp_version`. - On every run, the tool **warns if the crosswalk's schema version ≠ the tool's target**, so a diff --git a/tools/ingest/open3dcp_ingest/__init__.py b/tools/ingest/open3dcp_ingest/__init__.py index 40aa3e4..9fa9cee 100644 --- a/tools/ingest/open3dcp_ingest/__init__.py +++ b/tools/ingest/open3dcp_ingest/__init__.py @@ -15,8 +15,8 @@ # Versioning policy: the tool's MAJOR.MINOR tracks the Open3DCP schema version it targets. # Bumping the schema (e.g. 1.6 -> 1.7) requires updating the crosswalk + this constant + tests. -TARGET_SCHEMA_VERSION = "1.6" -__version__ = "1.6.0" +TARGET_SCHEMA_VERSION = "1.7" +__version__ = "1.7.0" __all__ = ["convert", "IngestResult", "Crosswalk", "TARGET_SCHEMA_VERSION", "__version__"] diff --git a/tools/ingest/open3dcp_ingest/fidelity.py b/tools/ingest/open3dcp_ingest/fidelity.py index 66f1a35..38f1f56 100644 --- a/tools/ingest/open3dcp_ingest/fidelity.py +++ b/tools/ingest/open3dcp_ingest/fidelity.py @@ -58,6 +58,13 @@ def to_dict(self) -> dict[str, Any]: } +def _is_relational_key(source: str) -> bool: + """A foreign key / identifier (e.g. `specimens.batch_id`) has no place in a denormalized + flat row -- the join is implicit -- so it must not be penalized as a coverage failure.""" + leaf = source.rsplit(".", 1)[-1].lower() + return leaf == "id" or leaf.endswith("_id") + + def _grade(score: float) -> str: if score >= 90: return "A (high fidelity)" if score >= 75: return "B (good; review flagged items)" @@ -72,13 +79,19 @@ def score(result: IngestResult) -> FidelityReport: # 1. field coverage -------------------------------------------------------- src = result.n_source_fields mapped = result.n_mapped_fields - cov = (mapped / src * 100.0) if src else 100.0 - dropped = [u.source for u in result.unmapped] + # Relational plumbing (foreign keys / identifiers) is intentionally not carried by the + # flat row, so it is excluded from the coverage denominator rather than counted as a loss. + keys = [u for u in result.unmapped if _is_relational_key(u.source)] + real_dropped = [u for u in result.unmapped if not _is_relational_key(u.source)] + eff_src = max(0, src - len(keys)) + cov = min(100.0, (mapped / eff_src * 100.0) if eff_src else 100.0) + key_note = (f" {len(keys)} relational keys/IDs excluded from coverage (a flat row needs none)." + if keys else "") dims.append(Dimension( "field_coverage", cov, - f"{mapped} of {src} populated source fields mapped to Open3DCP columns " - f"({len(result.unmapped)} routed to triage sidecar).", - not_preserved=sorted(set(dropped)), + f"{mapped} of {eff_src} mappable source fields mapped to Open3DCP columns " + f"({len(real_dropped)} routed to triage sidecar).{key_note}", + not_preserved=sorted(set(u.source for u in real_dropped)), triage="Sidecar fields are preserved in .unmapped.jsonl; review for schema extension.", )) diff --git a/tools/ingest/open3dcp_ingest/units.py b/tools/ingest/open3dcp_ingest/units.py index ac0152e..069ac0c 100644 --- a/tools/ingest/open3dcp_ingest/units.py +++ b/tools/ingest/open3dcp_ingest/units.py @@ -19,7 +19,12 @@ "m": ("m", 1.0), "in": ("m", 0.0254), "ft": ("m", 0.3048), # density (base: kg/m3) "kg_m3": ("kg_m3", 1.0), "g_cm3": ("kg_m3", 1000.0), "Mg_m3": ("kg_m3", 1000.0), - "lb_ft3": ("kg_m3", 16.018463), + "lb_ft3": ("kg_m3", 16.018463), "lb_yd3": ("kg_m3", 0.59327642), + # mass (base: kg) -- tonnage MUST be explicit; a bare "ton" is rejected (see convert()). + # short ton (2000 lb) and long ton (2240 lb) differ from the metric tonne by -9.3% / +1.6%. + "kg": ("kg", 1.0), "g": ("kg", 1e-3), "lb": ("kg", 0.45359237), + "tonne": ("kg", 1000.0), "metric_ton": ("kg", 1000.0), "Mg": ("kg", 1000.0), + "short_ton": ("kg", 907.18474), "long_ton": ("kg", 1016.0469), # viscosity (base: Pa_s) "Pa_s": ("Pa_s", 1.0), "mPa_s": ("Pa_s", 1e-3), "cP": ("Pa_s", 1e-3), # dimensionless / fraction (base: fraction; "%" is /100) @@ -34,10 +39,14 @@ # Open3DCP target token per "base" so we can scale base -> the column's stored unit. _TARGET_FOR = { "MPa": ("Pa", 1e6), "GPa": ("Pa", 1e9), "Pa": ("Pa", 1.0), "kPa": ("Pa", 1e3), - "mm": ("m", 1e-3), "kg_m3": ("kg_m3", 1.0), "Pa_s": ("Pa_s", 1.0), + "mm": ("m", 1e-3), "kg_m3": ("kg_m3", 1.0), "kg": ("kg", 1.0), "Pa_s": ("Pa_s", 1.0), "%": ("frac", 0.01), "mm_sqrt_s": ("mm_sqrt_s", 1.0), "day": ("s", 86400.0), } +# Ambiguous tonnage tokens: a bare "ton"/"t" could be metric (1000 kg), US short (907 kg), +# or UK long (1016 kg) -- up to a ~12% spread. Reject rather than guess. +_AMBIGUOUS = {"ton", "tons", "Ton", "TON", "t", "T"} + class UnitError(ValueError): pass @@ -47,6 +56,12 @@ def convert(value, from_unit: str, to_unit: str): """Convert a numeric value between unit tokens. Raises UnitError on dimension mismatch.""" if value is None: return None + if from_unit in _AMBIGUOUS or to_unit in _AMBIGUOUS: + bad = from_unit if from_unit in _AMBIGUOUS else to_unit + raise UnitError( + f"ambiguous tonnage unit {bad!r}; specify metric_ton|short_ton|long_ton " + "(mass) or Mg_m3|lb_yd3 (density)" + ) if from_unit == to_unit: return value if from_unit not in _FACTORS: diff --git a/tools/ingest/pyproject.toml b/tools/ingest/pyproject.toml index 8411b2c..4d0e957 100644 --- a/tools/ingest/pyproject.toml +++ b/tools/ingest/pyproject.toml @@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta" [project] name = "open3dcp-ingest" -# Version MAJOR.MINOR tracks the Open3DCP schema version this tool targets (currently v1.6). -version = "1.6.0" +# Version MAJOR.MINOR tracks the Open3DCP schema version this tool targets (currently v1.7). +version = "1.7.0" description = "Public ingestion / conversion tool that translates external concrete datasets into the Open3DCP flat schema, with an honest fidelity score and a drop-nothing triage sidecar. Tool MAJOR.MINOR tracks the schema version." readme = "README.md" requires-python = ">=3.9" diff --git a/tools/ingest/tests/test_units.py b/tools/ingest/tests/test_units.py index 1d649b6..be39b71 100644 --- a/tools/ingest/tests/test_units.py +++ b/tools/ingest/tests/test_units.py @@ -20,3 +20,28 @@ def test_percent_to_fraction_blocked_target(): def test_dimension_mismatch_raises(): with pytest.raises(units.UnitError): units.convert(1.0, "mm", "MPa") + + +def test_lb_yd3_to_kg_m3(): + # the common US batch-ticket concentration unit + assert units.convert(1.0, "lb_yd3", "kg_m3") == pytest.approx(0.593276, rel=1e-4) + + +def test_short_ton_to_kg(): + assert units.convert(1.0, "short_ton", "kg") == pytest.approx(907.18474, rel=1e-6) + + +def test_metric_tonne_to_kg(): + assert units.convert(1.0, "metric_ton", "kg") == pytest.approx(1000.0, rel=1e-9) + + +def test_short_vs_long_ton_spread(): + short = units.convert(1.0, "short_ton", "kg") + long_ = units.convert(1.0, "long_ton", "kg") + assert (long_ - short) / short == pytest.approx(0.12, abs=0.01) + + +def test_ambiguous_ton_rejected(): + for tok in ("ton", "tons", "t", "T"): + with pytest.raises(units.UnitError): + units.convert(1.0, tok, "kg")