diff --git a/.github/workflows/run-codespell.yml b/.github/workflows/run-codespell.yml index 66d5159..787148c 100644 --- a/.github/workflows/run-codespell.yml +++ b/.github/workflows/run-codespell.yml @@ -2,10 +2,12 @@ name: Run Codespell on: push: - branches: [ "main" ] + branches: [ "main", "V2" ] pull_request: - branches: [ "main" ] + branches: [ "main", "V2" ] + + workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.ref }} diff --git a/.github/workflows/test-code.yml b/.github/workflows/test-code.yml index 2eb5556..30fb99f 100644 --- a/.github/workflows/test-code.yml +++ b/.github/workflows/test-code.yml @@ -2,28 +2,80 @@ name: Test code on: push: - branches: main + branches: [main, V2] paths-ignore: - '*.md' - '.github/**' pull_request: - branches: main + branches: [main, V2] paths-ignore: - '*.md' - '.github/**' + workflow_dispatch: + concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: test-code: + # Flat workflow modeled on NDI-matlab/.github/workflows/run-tests.yml. + # Order matters: check-code runs immediately after install-matbox, + # before any other step touches the MATLAB path. An earlier ordering + # that ran matbox.installRequirements() first made check-code fail + # with "codecheckToolbox: Too many input arguments" even on + # release: latest. installRequirements is moved after check-code + # since the tests need mksqlite + vhlab-toolbox-matlab but the + # static analysis does not. name: Analyse and test code - uses: ehennestad/matbox-actions/.github/workflows/test-code-workflow.yml@v1 - with: - matlab_release: R2021b - matlab_use_cache: true - matlab_products: Statistics_and_Machine_Learning_Toolbox - secrets: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + runs-on: ubuntu-latest + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up MATLAB + uses: matlab-actions/setup-matlab@v2 + with: + release: latest + cache: true + products: Statistics_and_Machine_Learning_Toolbox + + - name: Install MatBox + uses: ehennestad/matbox-actions/install-matbox@v1 + + - name: Check code + uses: ehennestad/matbox-actions/check-code@v1 + with: + source_directory: 'src' + + - name: Install repo dependencies (mksqlite, vhlab-toolbox-matlab) + uses: matlab-actions/run-command@v2 + if: always() + with: + command: | + addpath(genpath("src")); + addpath(genpath("tools")); + matbox.installRequirements(didtools.projectdir()); + + - name: Run tests + uses: matlab-actions/run-command@v2 + if: always() + with: + command: | + addpath(genpath("src")); + addpath(genpath("tests")); + import matlab.unittest.TestRunner; + import matlab.unittest.TestSuite; + runner = TestRunner.withTextOutput; + % did2 self-tests live under tests/+did2/+unittest/ now (the + % legacy +did/+unittest/ layout). TestSuite.fromPackage with + % IncludingSubpackages=true picks up the function-based test + % files and any future +abstract base classes. Mirrors the + % discovery pattern in test-symmetry.yml. + suite = TestSuite.fromPackage("did2.unittest", "IncludingSubpackages", true); + results = runner.run(suite); + disp(table(results)); + nFailed = sum([results.Failed]); + assert(nFailed == 0, sprintf("%d test(s) failed", nFailed)); diff --git a/docs/v2/PLAN.md b/docs/v2/PLAN.md index ff7038a..3d1c727 100644 --- a/docs/v2/PLAN.md +++ b/docs/v2/PLAN.md @@ -19,6 +19,8 @@ users. | 4 | Keep the `matlabdumbjsondb` backend. | Useful for tests, trivial deployments, and as a non-SQL reference implementation of the query model. | | 5 | Validate on insert by default; expose an `unsafe_insert` escape hatch for bulk loads; offer a `revalidate_all` maintenance op. | Schema files are the source of truth for what "valid" means. | | 6 | Plan lives at `docs/v2/PLAN.md` on the v2 development branch. | This file. | +| 7 | Provisional namespace: `+did2`. | Picked from §10 option A for the scaffold. Revisit before v2 reaches `main`. | +| 8 | Document instances use a top-level `document_class` header plus class-scoped property blocks (one block per class in the chain, keyed by `class_name` verbatim). | See §4.1. Matches V_gamma_SPEC.md "JSON Format: Document Instances" after the SPEC's two-step revision: (i) restore class-scoped blocks; (ii) drop the underscore prefix on all NDI-extension keys. Every key in the wire shape is a plain MATLAB identifier, so the in-memory MATLAB struct is the JSON shape verbatim. | Open questions are in §10. @@ -87,7 +89,7 @@ CREATE INDEX depends_on_name_value ON depends_on(name, value); Test 4 in the JSON1 probe confirmed that `STORED GENERATED ALWAYS AS (json_extract(body, '$.foo.bar'))` works with `mksqlite`. So for each scalar -`_queryable: true` path declared by the V_gamma schemas, we add a stored +`queryable: true` path declared by the V_gamma schemas, we add a stored generated column directly on `documents` plus an index on it. The set of paths is computed at database open by walking the loaded schemas: @@ -154,6 +156,51 @@ Validation timing: explicit, deferred. The database layer calls bulk loads. A `revalidate_all(db)` maintenance op exists for the case where schemas change. +### 4.1 In-memory document shape + +A `did2.document`'s `documentProperties` is a MATLAB struct that mirrors the +V_gamma JSON shape *as specified in V_gamma_SPEC.md, "JSON Format: Document +Instances"*, exactly. After V_gamma's "drop underscore prefixes" pass, +every key in the wire shape is a plain identifier with no leading +underscore, so the MATLAB struct field names match the JSON keys +one-to-one. `jsonencode` / `jsondecode` round-trip without any rewrite. + +Top-level keys populated by `did2.schema.cache.buildBlankDocument`: + +| Key | Type | Contents | +|------------------|--------------|----------| +| `document_class` | struct | `class_name` (concrete class), `class_version` (semver), `superclasses` (struct array; each entry has `class_name` + `class_version` — the document-instance form). | +| `depends_on` | struct array | Each entry: `name` (role) and `value` (the referenced document's id). Empty by default. | +| `base` | struct | Property block with the four base fields (`id`, `session_id`, `name`, `datestamp`). `id` auto-minted via `did.ido.unique_id`, `datestamp` set to current UTC millisecond ISO-8601 with trailing `Z`. | +| `` | struct | One property block per class in the chain (root through concrete class). Each populated with `blank_value` for the fields *that class* declares. Empty `{}` if it declares none. | + +Field identity is `(declaring_class, name)`. Same-named fields in +different classes of the chain are distinct paths (`base.id` vs. +`.id`), not an override. + +V_alpha → V_gamma at the document level: + +``` +V_alpha V_gamma +------- ------- +document_class.class_name document_class.class_name +document_class.class_version document_class.class_version +document_class.superclasses document_class.superclasses +document_class.property_list_name (gone; block key == class_name) +document_class.definition (gone; schema files own this) +document_class.validation (gone; schema files own this) +base.id, base.session_id, ... base.id, base.session_id, ... +. . +depends_on depends_on +``` + +The converter (§7) is now a thin per-document data migration: strip the +extra `document_class` sub-keys (`property_list_name`, `definition`, +`validation`); rename each property block whose `property_list_name` +differs from its `class_name` so the block key equals the class name; +done. NDI-matlab consumers that already speak the V_alpha class-scoped +layout need no source-code rewrites for the wire shape itself. + --- ## 5. Schema cache @@ -162,7 +209,7 @@ A `+did2/+schema/cache.m` (or similar) loads all V_gamma schema files once, resolves superclass chains, and pre-computes: - For each classname: the full inherited field list. -- The subset of fields with `_queryable: true`, split into scalar paths and +- The subset of fields with `queryable: true`, split into scalar paths and array-iteration paths. - The named composite type expansions (`duration` → `.seconds`, `.approximate`, `.source_unit`, `.source_value`). @@ -210,7 +257,7 @@ A `+did2/+convert/v1_to_v2.m` tool: the table lives next to the v2 schema package). 3. Renames top-level keys (`base.id` → `id`, etc.), rewrites collapsed fields on classes that bumped to `2.0.0` (`probe_location`, `treatment`, - `ontology_image`, `ontology_label`), and reshapes `_ontology` annotations + `ontology_image`, `ontology_label`), and reshapes `ontology` annotations to the V_gamma two-key form. 4. Validates against V_gamma. Successful docs insert into the new DB; failures land in a `quarantine` table with the original body and a reason string. @@ -293,3 +340,115 @@ no-op, so it no longer appears in `compile_options`. The functional tests Test 4 passing is the decisive simplification: queryable scalar paths can live as `STORED` generated columns on `documents` with their own indexes (§3.2), with no separate sidecar table for scalars. + +--- + +## 12. Progress log + +### 2026-05-11 — step 1 scaffold + +Started step 1 of §9 on branch `claude/start-v2-development-tA41P`. + +Added: + +- `src/did/+did2/document.m` — V_gamma document object. API surface + in place (construct from JSON / struct / `(className, values)`, + `get` / `set` / `iterate`, `toJSON` / `toStruct`, `className` / + `classVersion`, `validate`, plus static `fromJSON` / `fromStruct` / + `blank`). Dot-path get/set is implemented in full. The `[*]` array + iterator is implemented via `iterate(arrayPath)`; the bare `get` + rejects paths containing `[*]` to keep the scalar/array distinction + honest. `validate` and `blank` delegate to the schema cache. +- `src/did/+did2/+schema/cache.m` — schema cache class. Singleton + bootstrap, schema-path resolution (env override + `DID_SCHEMA_PATH`, or sibling `did-schema/schemas/V_gamma` checkout), + `getClass`, and `superclasses` traversal are implemented; the + heavier methods (`fieldsFor`, `queryablePaths`, + `buildBlankDocument`, `validateDocument`) currently throw + `did2:notImplemented` and will be filled in next. +- `src/did/+did2/Contents.m` — package overview. +- `tests/+did2/testDocumentScaffold.m` — function-based unit tests + covering construction, dot-path get/set, iterate, round-trip JSON, + and the documented error IDs. Tests that depend on the schema cache + beyond what is implemented are deferred. + +Provisional decision (logged in §1 as #7): use `+did2` for the v2 +namespace during the scaffold, leaving the §10 rename-vs-parallel +question open for resolution before v2 reaches `main`. + +Next up: fill in `did2.schema.cache.fieldsFor`, +`queryablePaths`, and `buildBlankDocument`; then `validateDocument` +against the V_gamma meta-schema; then start the in-memory query +evaluator (step 2). + +### 2026-05-12 — class-scoped property blocks, then drop underscores + +Two upstream did-schema SPEC revisions landed back-to-back and both +required reworking the +did2 in-memory shape: + +1. **Class-scoped property blocks restored** (did-schema commit + `137f583`). V_gamma was amended to organise document instances + into per-class property blocks keyed by class name (one per class + in the chain), instead of the earlier flat namespace. Also moved + `class_name`/`class_version`/`superclasses` under a top-level + `document_class` header. +2. **Drop underscore prefixes** (did-schema commit `77c6363`). The + `_` convention for NDI-extension keys was replaced by plain + keys (`maturity_level`, `depends_on`, `file`, `fields`, + `mustBeNonEmpty`, `blank_value`, `ontology`, etc.). The + authoritative reserved-name list moved to upstream + `ndi_reserved_keys.json`. + +Combined, every key in a V_gamma wire shape is now a plain MATLAB +identifier, so the in-memory MATLAB struct is the JSON shape verbatim +— no `x_` aliasing, no `jsonencode`-time rewrite pass, no +`extractField` underscore-probe helper. Round-tripping a V_gamma +document is `jsondecode` then `jsonencode`. + +Implemented in `src/did/+did2/+schema/cache.m`: + +- `classChain(className)` — root-first list including the class itself + (e.g., `demoB -> {base, demoA, demoB}`). +- `ownFields(className)` — the `fields` list the class declares + directly (no inheritance), via direct `s.fields` access. +- `fieldsFor(className)` — merged inherited fields tagged with their + declaring class. Returns a struct array + `{declaringClass, fieldDef}`. +- `superclasses(className)` — walks + `s.document_class.superclasses[i].class_name` up the chain. +- `buildBlankDocument(className)` — class-scoped V_gamma document: + `doc.document_class.{class_name, class_version, superclasses}` + `doc.depends_on` — empty struct array of `{name, value}` + `doc.` for each class in the chain + Base block has `id` auto-minted via `did.ido.unique_id()` and + `datestamp` set to current UTC ISO-8601. +- `validateDocument(docOrStruct)` — accepts a `did2.document` or its + underlying struct, walks the class chain, and validates each + class's `fields` against its property block. Error messages use + the qualified `.` form; new error IDs + `did2:validation:missingClassBlock` and `:badClassBlock`. +- `queryablePaths` stays a stub (belongs to steps 3 and 4). + +In `src/did/+did2/document.m`: + +- `className` / `classVersion` read + `documentProperties.document_class.class_name` / + `documentProperties.document_class.class_version`. +- `toJSON` is a bare `jsonencode` (no rewrite pass). The previous + `rewriteXUnderscoreKeys` helper is removed. + +Fixtures at `tests/+did2/fixtures/V_gamma/` (`base.json`, +`demoA.json`, `demoB.json`, `demoC.json`, `demoFile.json`, +`CURIE_lookups_meta.json`, `README.md`) rewritten to the +plain-key V_gamma shape. + +`tests/+did2/testSchemaCache.m` updated: 22 tests assert on the +plain-key shape (`doc.document_class.class_name`, `doc.depends_on`, +etc.) and check that a V_gamma document round-trips through +`toJSON`/`fromJSON` unchanged. + +Step 1 is complete to the level the rest of the plan needs. +`queryablePaths` is the only intentional stub left in the cache; +detailed per-named-composite validation and dependency-value checks +are deferred to focused follow-ups. Next up: step 2 — the in-memory +query evaluator over the class-qualified dot-paths. diff --git a/src/did/+did2/+schema/cache.m b/src/did/+did2/+schema/cache.m new file mode 100644 index 0000000..2d9c006 --- /dev/null +++ b/src/did/+did2/+schema/cache.m @@ -0,0 +1,548 @@ +classdef cache < handle + % did2.schema.cache V_gamma schema cache. + % + % Loads V_gamma schema files lazily, resolves superclass chains, + % builds blank documents in the V_gamma class-scoped wire shape, + % and validates documents against their class definitions. See + % docs/v2/PLAN.md §5. + % + % Document shape (V_gamma "JSON Format: Document Instances"): + % document_class + % .class_name string concrete class + % .class_version string semver of the concrete class + % .superclasses array [{class_name, class_version}] + % depends_on array [{name, value}] + % object one property block per class + % in the chain. Contains the + % field values that class + % declared (empty {} if it + % declares no fields). + % + % MATLAB representation: every key in the V_gamma wire shape is a + % valid MATLAB struct field name (no leading underscores anywhere + % after the V_gamma SPEC's "drop underscore prefixes" update), so + % the in-memory representation is the JSON shape verbatim. + % `jsondecode` returns a struct with the same field names, and + % `jsonencode` writes them back without any rename pass. + % + % did2.schema.cache Properties: + % schemaPath - filesystem path to a V_gamma schema dir. + % loadedClasses - containers.Map of classname -> raw schema. + % curieRegistry - parsed CURIE_lookups_meta.json contents. + % + % did2.schema.cache Static Methods: + % shared - return the process-wide singleton cache. + % setSchemaPath - rebuild the singleton at a new schema path. + % resetSingleton - drop the cached singleton (test helper). + % + % did2.schema.cache Methods: + % getClass - resolved class definition for a name. + % superclasses - ancestor chain (parent first, root last). + % classChain - root-first list including the class itself. + % ownFields - the `fields` list a class declares directly. + % fieldsFor - merged inherited fields tagged with the + % declaring class (struct array). + % queryablePaths - scalar and array-iteration paths (stub). + % buildBlankDocument - blank V_gamma document in the wire shape. + % validateDocument - validate a did2.document instance. + % + % See also: did2.document, docs/v2/PLAN.md. + + properties (SetAccess = private) + schemaPath (1,:) char = '' + loadedClasses + curieRegistry struct = struct() + end + + methods (Access = private) + function obj = cache(schemaPath) + % Private constructor — use did2.schema.cache.shared(). + arguments + schemaPath (1,:) char = did2.schema.cache.defaultSchemaPath() + end + obj.schemaPath = schemaPath; + obj.loadedClasses = containers.Map('KeyType', 'char', 'ValueType', 'any'); + obj.loadRegistry(); + end + end + + methods + function s = getClass(obj, className) + % getClass - return the parsed schema struct for className. + arguments + obj + className (1,:) char + end + if obj.loadedClasses.isKey(className) + s = obj.loadedClasses(className); + return; + end + schemaFile = fullfile(obj.schemaPath, [className '.json']); + if ~isfile(schemaFile) + error('did2:schema:missingClass', ... + 'No schema file for class "%s" at %s.', className, schemaFile); + end + s = jsondecode(fileread(schemaFile)); + obj.loadedClasses(className) = s; + end + + function names = superclasses(obj, className) + % superclasses - ancestor chain (parent first, root last). + % Walks `document_class.superclasses[i].class_name` up the + % chain. For 'demoA' -> {'base'}. For 'base' -> {}. + arguments + obj + className (1,:) char + end + names = {}; + current = className; + visited = containers.Map('KeyType', 'char', 'ValueType', 'logical'); + while true + if visited.isKey(current) + error('did2:schema:cycle', ... + 'Superclass cycle detected starting at "%s".', className); + end + visited(current) = true; + s = obj.getClass(current); + if ~isstruct(s) || ~isfield(s, 'document_class') ... + || ~isstruct(s.document_class) ... + || ~isfield(s.document_class, 'superclasses') ... + || isempty(s.document_class.superclasses) + break; + end + parent = obj.elementAt(s.document_class.superclasses, 1); + if ~isstruct(parent) || ~isfield(parent, 'class_name') + break; + end + parentName = char(parent.class_name); + names{end+1} = parentName; %#ok + current = parentName; + end + end + + function chain = classChain(obj, className) + % classChain - root-first list of class names including the + % class itself. For 'demoB' -> {'base', 'demoA', 'demoB'}. + arguments + obj + className (1,:) char + end + chain = [fliplr(obj.superclasses(className)), {className}]; + end + + function fields = ownFields(obj, className) + % ownFields - cell array of field defs the class declares + % directly (not inherited). + arguments + obj + className (1,:) char + end + s = obj.getClass(className); + if ~isstruct(s) || ~isfield(s, 'fields') || isempty(s.fields) + fields = {}; + return; + end + fields = obj.toCellArray(s.fields); + end + + function tagged = fieldsFor(obj, className) + % fieldsFor - merged inherited fields tagged with the + % declaring class. Returns a struct array with fields + % `declaringClass` (char) and `fieldDef` (the schema's + % `fields` entry). + arguments + obj + className (1,:) char + end + tagged = struct('declaringClass', {}, 'fieldDef', {}); + chain = obj.classChain(className); + for k = 1:numel(chain) + own = obj.ownFields(chain{k}); + for f = 1:numel(own) + tagged(end+1) = struct( ... + 'declaringClass', chain{k}, ... + 'fieldDef', own{f}); %#ok + end + end + end + + function paths = queryablePaths(~, ~) %#ok + % queryablePaths - planned for steps 3 & 4. Will return + % .scalar (cellstr of class-qualified dot-paths like + % 'daqsystem.sample_rate.hertz') and .array (cellstr of + % '[*]'-suffixed paths). Used by the SQL backend to drive + % generated columns (§3.2) and the queryable_array_elem + % sidecar (§3.3). + error('did2:notImplemented', ... + 'did2.schema.cache.queryablePaths is not yet implemented (step 3/4).'); + end + + function doc = buildBlankDocument(obj, className) + % buildBlankDocument - blank V_gamma document in the + % class-scoped wire shape. Mints a fresh did_uid for + % base.id and the current UTC timestamp for base.datestamp. + arguments + obj + className (1,:) char + end + doc = struct(); + schema = obj.getClass(className); + schemaDC = schema.document_class; + + ancestors = obj.superclasses(className); + sc = struct('class_name', {}, 'class_version', {}); + for k = 1:numel(ancestors) + ancDC = obj.getClass(ancestors{k}).document_class; + sc(end+1) = struct( ... + 'class_name', char(ancDC.class_name), ... + 'class_version', char(ancDC.class_version)); %#ok + end + doc.document_class = struct( ... + 'class_name', char(schemaDC.class_name), ... + 'class_version', char(schemaDC.class_version), ... + 'superclasses', sc); + + doc.depends_on = struct('name', {}, 'value', {}); + + chain = obj.classChain(className); + for k = 1:numel(chain) + blockClass = chain{k}; + doc.(blockClass) = obj.buildBlockForClass(blockClass); + end + end + + function validateDocument(obj, docOrStruct) + % validateDocument - raise did2:validation:* on a + % non-conforming document. Accepts a did2.document or a + % plain struct. + arguments + obj + docOrStruct + end + if isa(docOrStruct, 'did2.document') + s = docOrStruct.toStruct(); + elseif isstruct(docOrStruct) + s = docOrStruct; + else + error('did2:validation:badInput', ... + 'validateDocument expects a did2.document or a struct, got %s.', ... + class(docOrStruct)); + end + if ~isfield(s, 'document_class') || ~isstruct(s.document_class) + error('did2:validation:missingClassName', ... + 'Document has no document_class header; cannot validate.'); + end + dc = s.document_class; + if ~isfield(dc, 'class_name') || isempty(dc.class_name) + error('did2:validation:missingClassName', ... + 'Document has no document_class.class_name; cannot validate.'); + end + className = char(dc.class_name); + chain = obj.classChain(className); + for k = 1:numel(chain) + blockClass = chain{k}; + if ~isfield(s, blockClass) + error('did2:validation:missingClassBlock', ... + 'Document is missing the "%s" property block.', blockClass); + end + block = s.(blockClass); + if ~isstruct(block) + error('did2:validation:badClassBlock', ... + 'Property block "%s" must be a struct, got %s.', ... + blockClass, class(block)); + end + own = obj.ownFields(blockClass); + for f = 1:numel(own) + fieldDef = own{f}; + obj.validateField(block, fieldDef, blockClass, char(fieldDef.name)); + end + end + end + end + + methods (Static) + function obj = shared(varargin) + % shared - return the process-wide cache singleton. + persistent instance + if nargin == 1 && ischar(varargin{1}) && strcmp(varargin{1}, '-reset') + instance = []; + obj = []; + return; + end + if isempty(instance) || ~isvalid(instance) + if nargin >= 1 && ~isempty(varargin{1}) + schemaPath = varargin{1}; + else + schemaPath = did2.schema.cache.defaultSchemaPath(); + end + instance = did2.schema.cache(schemaPath); + end + obj = instance; + end + + function setSchemaPath(schemaPath) + % setSchemaPath - rebuild the singleton at a new schema path. + arguments + schemaPath (1,:) char + end + did2.schema.cache.shared('-reset'); + did2.schema.cache.shared(schemaPath); + end + + function resetSingleton() + % resetSingleton - drop the cached singleton. + did2.schema.cache.shared('-reset'); + end + end + + methods (Static, Access = private) + function p = defaultSchemaPath() + envOverride = getenv('DID_SCHEMA_PATH'); + if ~isempty(envOverride) + p = envOverride; + return; + end + toolboxDir = did.toolboxdir(); + p = fullfile(toolboxDir, '..', '..', 'did-schema', 'schemas', 'V_gamma'); + end + + function ts = currentUTCTimestamp() + dt = datetime('now', 'TimeZone', 'UTC'); + dt.Format = 'yyyy-MM-dd''T''HH:mm:ss.SSS''Z'''; + ts = char(string(dt)); + end + + function len = stringLength(value) + if isstring(value) + len = strlength(value); + if numel(len) > 1 + len = max(len); + end + elseif ischar(value) + len = numel(value); + else + len = 0; + end + end + end + + methods (Access = private) + function loadRegistry(obj) + registryFile = fullfile(obj.schemaPath, 'CURIE_lookups_meta.json'); + if isfile(registryFile) + obj.curieRegistry = jsondecode(fileread(registryFile)); + end + end + + function out = toCellArray(~, raw) + if iscell(raw) + out = raw(:)'; + elseif isstruct(raw) + out = arrayfun(@(i) raw(i), 1:numel(raw), 'UniformOutput', false); + else + out = {raw}; + end + end + + function elem = elementAt(obj, raw, idx) + cells = obj.toCellArray(raw); + elem = cells{idx}; + end + + function block = buildBlockForClass(obj, className) + % buildBlockForClass - one property block populated with + % `blank_value` for every field the class declares + % directly. Base block also receives a fresh did_uid for + % `id` and the current UTC timestamp for `datestamp`. + block = struct(); + own = obj.ownFields(className); + for f = 1:numel(own) + fieldDef = own{f}; + fieldName = char(fieldDef.name); + blank = fieldDef.blank_value; + fieldType = char(fieldDef.type); + if strcmp(fieldType, 'structure') ... + && (isempty(blank) || (isstruct(blank) && isempty(fieldnames(blank)))) + block.(fieldName) = obj.buildBlankStructure(fieldDef); + else + block.(fieldName) = blank; + end + end + if strcmp(className, 'base') + if isfield(block, 'id') + block.id = did.ido.unique_id(); + end + if isfield(block, 'datestamp') + block.datestamp = did2.schema.cache.currentUTCTimestamp(); + end + end + end + + function s = buildBlankStructure(obj, fieldDef) + s = struct(); + if ~isfield(fieldDef, 'fields') || isempty(fieldDef.fields) + return; + end + entries = obj.toCellArray(fieldDef.fields); + for k = 1:numel(entries) + subDef = entries{k}; + subName = char(subDef.name); + subBlank = subDef.blank_value; + subType = char(subDef.type); + if strcmp(subType, 'structure') ... + && (isempty(subBlank) || (isstruct(subBlank) && isempty(fieldnames(subBlank)))) + s.(subName) = obj.buildBlankStructure(subDef); + else + s.(subName) = subBlank; + end + end + end + + function validateField(obj, block, fieldDef, blockClass, fieldName) + % validateField - apply type, mustBe* flags, and + % constraints for one field against the property block. + % Skips absent fields unless the schema marks them + % mustBeNonEmpty. + mustBeNonEmpty = logical(fieldDef.mustBeNonEmpty); + if ~isfield(block, fieldName) + if mustBeNonEmpty + error('did2:validation:missingField', ... + 'Required field "%s.%s" is missing.', ... + blockClass, fieldName); + end + return; + end + value = block.(fieldName); + fieldType = char(fieldDef.type); + qualifiedName = sprintf('%s.%s', blockClass, fieldName); + obj.validateTypeShape(value, fieldType, qualifiedName); + + mustBeScalar = logical(fieldDef.mustBeScalar); + mustNotHaveNaN = logical(fieldDef.mustNotHaveNaN); + if mustBeNonEmpty && obj.isEmptyValue(value) + error('did2:validation:emptyField', ... + 'Field "%s" is required to be non-empty.', qualifiedName); + end + if mustBeScalar && ~obj.isScalarValue(value, fieldType) + error('did2:validation:notScalar', ... + 'Field "%s" is required to be scalar.', qualifiedName); + end + if mustNotHaveNaN && isnumeric(value) && any(isnan(value(:))) + error('did2:validation:nanValue', ... + 'Field "%s" contains NaN.', qualifiedName); + end + constraints = fieldDef.constraints; + if isstruct(constraints) && ~isempty(fieldnames(constraints)) + obj.validateConstraints(value, constraints, fieldType, qualifiedName); + end + end + + function tf = isEmptyValue(~, value) + if isstring(value) + tf = all(strlength(value) == 0); + elseif ischar(value) + tf = isempty(value); + elseif isstruct(value) + tf = isempty(value) || isempty(fieldnames(value)); + else + tf = isempty(value); + end + end + + function tf = isScalarValue(~, value, fieldType) + switch fieldType + case {'char', 'string', 'did_uid', 'timestamp'} + tf = (ischar(value) && (isempty(value) || size(value,1) <= 1)) ... + || (isstring(value) && isscalar(value)); + otherwise + tf = isscalar(value); + end + end + + function validateTypeShape(~, value, fieldType, qualifiedName) + switch fieldType + case {'char', 'did_uid', 'timestamp'} + if ~(ischar(value) || (isstring(value) && isscalar(value))) + error('did2:validation:typeMismatch', ... + 'Field "%s" must be char/string (type %s).', qualifiedName, fieldType); + end + case 'string' + if ~(ischar(value) || isstring(value)) + error('did2:validation:typeMismatch', ... + 'Field "%s" must be string.', qualifiedName); + end + case 'boolean' + if ~(islogical(value) || (isnumeric(value) && all(value(:) == 0 | value(:) == 1))) + error('did2:validation:typeMismatch', ... + 'Field "%s" must be boolean.', qualifiedName); + end + case 'integer' + if ~isnumeric(value) || any(mod(value(:), 1) ~= 0) + error('did2:validation:typeMismatch', ... + 'Field "%s" must be integer.', qualifiedName); + end + case {'double', 'matrix'} + if ~isnumeric(value) + error('did2:validation:typeMismatch', ... + 'Field "%s" must be numeric.', qualifiedName); + end + case 'structure' + if ~isstruct(value) + error('did2:validation:typeMismatch', ... + 'Field "%s" must be a struct.', qualifiedName); + end + case {'duration','volume','mass','length','voltage','current','frequency','ontology_term'} + if ~isstruct(value) + error('did2:validation:typeMismatch', ... + 'Field "%s" must be a struct (named composite type %s).', ... + qualifiedName, fieldType); + end + otherwise + % Unknown type - tolerated; the meta-schema's enum gates + % new types, so an unknown means tooling drift. + end + end + + function validateConstraints(~, value, constraints, ~, qualifiedName) + cnames = fieldnames(constraints); + for k = 1:numel(cnames) + cname = cnames{k}; + cval = constraints.(cname); + switch cname + case 'maxLength' + len = did2.schema.cache.stringLength(value); + if len > cval + error('did2:validation:maxLength', ... + 'Field "%s" exceeds maxLength %d (got %d).', qualifiedName, cval, len); + end + case 'minLength' + len = did2.schema.cache.stringLength(value); + if len < cval + error('did2:validation:minLength', ... + 'Field "%s" below minLength %d (got %d).', qualifiedName, cval, len); + end + case 'minimum' + if isnumeric(value) && any(value(:) < cval) + error('did2:validation:minimum', ... + 'Field "%s" below minimum %g.', qualifiedName, cval); + end + case 'maximum' + if isnumeric(value) && any(value(:) > cval) + error('did2:validation:maximum', ... + 'Field "%s" above maximum %g.', qualifiedName, cval); + end + case 'enum' + choices = string(cval); + v = string(value); + if ~any(strcmp(v, choices)) + error('did2:validation:enum', ... + 'Field "%s" value "%s" not in enum.', qualifiedName, v); + end + otherwise + % Unrecognised constraint keys are tolerated; + % `pattern` and similar can be added later. + end + end + end + end +end diff --git a/src/did/+did2/Contents.m b/src/did/+did2/Contents.m new file mode 100644 index 0000000..71634bb --- /dev/null +++ b/src/did/+did2/Contents.m @@ -0,0 +1,30 @@ +% +did2 DID v2 (V_gamma) MATLAB toolbox — development scaffold. +% +% The +did2 package is the parallel-namespace v2 line of DID-matlab. +% It consumes the V_gamma schema set from the did-schema repository +% directly, without translating to the V_alpha base.* / +% document_class.* / nesting that the legacy +did +% package uses. See docs/v2/PLAN.md for the full design and the +% step-by-step order of work. +% +% Files +% document - V_gamma document object (load / validate / +% serialise / dot-path access). +% Contents - this overview. +% +% Subpackages +% +schema - schema cache and validation entry points. +% +convert - (planned) v1-to-v2 conversion utilities. +% +query - (planned) query tree, in-memory evaluator, +% SQLite/JSON1 compiler. +% +% Conventions +% - New code uses camelCase identifiers and arguments-block input +% validation, per AGENTS.md. +% - Document data is the flat V_gamma JSON shape (top-level +% snake_case keys; system metadata prefixed with `_`). +% - The schema cache is the single source of truth for what +% "valid" means; runtime reflection over values never substitutes +% for the schema. +% +% See also: did, docs/v2/PLAN.md. diff --git a/src/did/+did2/document.m b/src/did/+did2/document.m new file mode 100644 index 0000000..6727be1 --- /dev/null +++ b/src/did/+did2/document.m @@ -0,0 +1,290 @@ +classdef document < handle + % did2.document V_gamma document object. + % + % Holds a single V_gamma document in the class-scoped wire shape + % (see V_gamma_SPEC.md "JSON Format: Document Instances"), validates + % it against the V_gamma schema set, and serialises it back to JSON. + % + % In-memory representation. The V_gamma document shape carries a + % top-level `document_class` header (with sub-keys `class_name`, + % `class_version`, `superclasses`), plus a top-level `depends_on` + % array, plus one property block per class in the chain keyed by + % class name. After V_gamma's "drop underscore prefixes" pass, + % every key in the wire shape is a valid MATLAB struct field name, + % so the in-memory representation is the JSON shape verbatim. + % `jsonencode`/`jsondecode` round-trip without any name-rewrite + % pass. + % + % did2.document Properties: + % documentProperties - struct mirroring the V_gamma JSON shape. + % + % did2.document Methods: + % document - construct from JSON text, a struct, or + % (className, valueStruct). + % get - dot-path getter into documentProperties. + % set - dot-path setter into documentProperties. + % iterate - element iterator over an array-of-structure path. + % toJSON - serialise to V_gamma JSON text. + % toStruct - return the underlying struct. + % className - shorthand for document_class.class_name. + % classVersion - shorthand for document_class.class_version. + % validate - validate this document against its schema. + % + % did2.document Static Methods: + % fromJSON - construct from a JSON string. + % fromStruct - construct from a struct. + % blank - construct a blank instance of the named class. + % + % See also: did2.schema.cache, docs/v2/PLAN.md. + + properties + documentProperties (1,1) struct = struct() + end + + properties (Access = private) + schemaCacheHandle = [] + end + + methods + function obj = document(varargin) + % document - construct a did2.document. + % + % D = did2.document() creates an empty document. + % D = did2.document(jsonText) parses a JSON string. + % D = did2.document(s) wraps an existing struct. + % D = did2.document(className, valueStruct) builds a blank + % instance of className and overlays valueStruct. + + if nargin == 0 + return; + end + + firstArg = varargin{1}; + if nargin == 1 && (ischar(firstArg) || (isstring(firstArg) && isscalar(firstArg))) + obj.documentProperties = did2.document.parseJSONText(firstArg); + elseif nargin == 1 && isstruct(firstArg) + obj.documentProperties = firstArg; + elseif nargin >= 1 && (ischar(firstArg) || (isstring(firstArg) && isscalar(firstArg))) ... + && nargin == 2 && isstruct(varargin{2}) + obj.documentProperties = did2.document.buildBlank(char(firstArg)); + obj.documentProperties = did2.document.mergeStruct( ... + obj.documentProperties, varargin{2}); + else + error('did2:document:badInput', ... + 'did2.document accepts (), (jsonText), (struct), or (className, valueStruct).'); + end + end + + function value = get(obj, fieldPath) + % get - read documentProperties at a dot-path. + % + % v = doc.get('base.id') returns the id from the base + % property block. `[*]` array iteration is handled by + % `iterate(arrayPath)`, not by this method. + arguments + obj + fieldPath (1,:) char + end + value = did2.document.dotPathGet(obj.documentProperties, fieldPath); + end + + function obj = set(obj, fieldPath, value) + % set - write a value at a dot-path inside documentProperties. + arguments + obj + fieldPath (1,:) char + value + end + obj.documentProperties = did2.document.dotPathSet( ... + obj.documentProperties, fieldPath, value); + end + + function elements = iterate(obj, arrayPath) + % iterate - return the element list at an array-of-structure path. + arguments + obj + arrayPath (1,:) char + end + elements = did2.document.dotPathGet(obj.documentProperties, arrayPath); + if isempty(elements) + elements = struct([]); + elseif ~isstruct(elements) + error('did2:document:notArrayOfStructure', ... + 'Path "%s" is not an array-of-structure field.', arrayPath); + end + end + + function jsonText = toJSON(obj, opts) + % toJSON - serialise documentProperties to V_gamma JSON text. + arguments + obj + opts.PrettyPrint (1,1) logical = false + end + jsonText = jsonencode(obj.documentProperties, ... + 'PrettyPrint', opts.PrettyPrint); + end + + function s = toStruct(obj) + s = obj.documentProperties; + end + + function name = className(obj) + % className - the document's `document_class.class_name`. + if isfield(obj.documentProperties, 'document_class') ... + && isstruct(obj.documentProperties.document_class) ... + && isfield(obj.documentProperties.document_class, 'class_name') + name = char(obj.documentProperties.document_class.class_name); + else + error('did2:document:missingField', ... + 'Document has no document_class.class_name.'); + end + end + + function v = classVersion(obj) + % classVersion - the document's `document_class.class_version`. + if isfield(obj.documentProperties, 'document_class') ... + && isstruct(obj.documentProperties.document_class) ... + && isfield(obj.documentProperties.document_class, 'class_version') + v = char(obj.documentProperties.document_class.class_version); + else + error('did2:document:missingField', ... + 'Document has no document_class.class_version.'); + end + end + + function validate(obj, opts) + % validate - check this document against its V_gamma schema. + arguments + obj + opts.SchemaCache = [] + end + cache = obj.resolveSchemaCache(opts.SchemaCache); + cache.validateDocument(obj); + end + end + + methods (Static) + function obj = fromJSON(jsonText) + arguments + jsonText (1,:) char + end + obj = did2.document(jsonText); + end + + function obj = fromStruct(s) + arguments + s (1,1) struct + end + obj = did2.document(s); + end + + function obj = blank(className, opts) + % blank - construct a blank V_gamma document of the named class. + arguments + className (1,:) char + opts.SchemaCache = [] + end + obj = did2.document(); + obj.documentProperties = did2.document.buildBlank(className, opts.SchemaCache); + end + + function value = dotPathGet(s, fieldPath) + arguments + s + fieldPath (1,:) char + end + if contains(fieldPath, '[*]') + error('did2:document:arrayPathHere', ... + ['"%s" contains [*]; use iterate() for ' ... + 'array-of-structure traversal.'], fieldPath); + end + parts = strsplit(fieldPath, '.'); + value = s; + for k = 1:numel(parts) + segment = parts{k}; + if ~isstruct(value) || ~isfield(value, segment) + error('did2:document:missingField', ... + 'Field "%s" not present while resolving "%s".', ... + segment, fieldPath); + end + value = value.(segment); + end + end + + function s = dotPathSet(s, fieldPath, value) + arguments + s (1,1) struct + fieldPath (1,:) char + value + end + parts = strsplit(fieldPath, '.'); + s = did2.document.assignNested(s, parts, value); + end + end + + methods (Static, Access = private) + function out = parseJSONText(jsonText) + text = char(jsonText); + out = jsondecode(text); + if ~isstruct(out) + error('did2:document:jsonNotObject', ... + 'Top-level JSON value must be an object, got %s.', class(out)); + end + end + + function s = assignNested(s, parts, value) + head = parts{1}; + if isscalar(parts) + s.(head) = value; + return; + end + if isfield(s, head) && isstruct(s.(head)) + inner = s.(head); + else + inner = struct(); + end + s.(head) = did2.document.assignNested(inner, parts(2:end), value); + end + + function s = mergeStruct(base, overlay) + s = base; + if ~isstruct(overlay) + return; + end + f = fieldnames(overlay); + for k = 1:numel(f) + name = f{k}; + if isfield(s, name) && isstruct(s.(name)) && isstruct(overlay.(name)) + s.(name) = did2.document.mergeStruct(s.(name), overlay.(name)); + else + s.(name) = overlay.(name); + end + end + end + + function s = buildBlank(className, cacheOverride) + if nargin < 2 + cacheOverride = []; + end + if isempty(cacheOverride) + cache = did2.schema.cache.shared(); + else + cache = cacheOverride; + end + s = cache.buildBlankDocument(className); + end + end + + methods (Access = private) + function cache = resolveSchemaCache(obj, override) + if ~isempty(override) + cache = override; + return; + end + if isempty(obj.schemaCacheHandle) + obj.schemaCacheHandle = did2.schema.cache.shared(); + end + cache = obj.schemaCacheHandle; + end + end +end diff --git a/tests/+did2/+test/+fixture/.gitkeep b/tests/+did2/+test/+fixture/.gitkeep new file mode 100644 index 0000000..9a47dfc --- /dev/null +++ b/tests/+did2/+test/+fixture/.gitkeep @@ -0,0 +1,4 @@ +% Placeholder. did2 test fixture helpers go here, matching the +% legacy tests/+did/+test/+fixture/ slot. Static V_gamma JSON +% fixtures live as a sibling at tests/+did2/fixtures/V_gamma/ — +% this MATLAB package is for fixture-helper code, not data files. diff --git a/tests/+did2/+test/+helper/.gitkeep b/tests/+did2/+test/+helper/.gitkeep new file mode 100644 index 0000000..de4d847 --- /dev/null +++ b/tests/+did2/+test/+helper/.gitkeep @@ -0,0 +1,2 @@ +% Placeholder. did2 test helpers go here, matching the legacy +% tests/+did/+test/+helper/ slot. diff --git a/tests/+did2/+unittest/+abstract/.gitkeep b/tests/+did2/+unittest/+abstract/.gitkeep new file mode 100644 index 0000000..0c4787a --- /dev/null +++ b/tests/+did2/+unittest/+abstract/.gitkeep @@ -0,0 +1,2 @@ +% Placeholder. Abstract base test classes for did2.unittest go here, +% matching the legacy tests/+did/+unittest/+abstract/ slot. diff --git a/tests/+did2/+unittest/testDocumentScaffold.m b/tests/+did2/+unittest/testDocumentScaffold.m new file mode 100644 index 0000000..b5ab5cc --- /dev/null +++ b/tests/+did2/+unittest/testDocumentScaffold.m @@ -0,0 +1,65 @@ +function tests = testDocumentScaffold +% testDocumentScaffold - smoke tests for the did2.document scaffold. +% +% Run with: +% results = runtests('did2.unittest.testDocumentScaffold'); +% +% These tests exercise the surface API of did2.document that does not +% depend on the schema cache being fully implemented yet: +% construction from a struct or JSON, dot-path get/set, iterate(), +% and toJSON()/toStruct() round-trips. Tests that depend on the +% schema cache live in did2.unittest.testSchemaCache. + +tests = functiontests(localfunctions); +end + +function testConstructFromStruct(testCase) +s = struct('id', 'abc', 'session_id', 'sess', 'name', 'unit-test'); +doc = did2.document(s); +verifyEqual(testCase, doc.get('id'), 'abc'); +verifyEqual(testCase, doc.get('name'), 'unit-test'); +end + +function testConstructFromJSON(testCase) +jsonText = '{"id":"abc","sample_rate":{"hertz":30000,"approximate":false}}'; +doc = did2.document(jsonText); +verifyEqual(testCase, doc.get('id'), 'abc'); +verifyEqual(testCase, doc.get('sample_rate.hertz'), 30000); +verifyFalse(testCase, doc.get('sample_rate.approximate')); +end + +function testSetCreatesNestedPath(testCase) +doc = did2.document(); +doc.set('app.app_name', 'ndi_app_spikeextractor'); +verifyEqual(testCase, doc.get('app.app_name'), 'ndi_app_spikeextractor'); +end + +function testToJSONRoundTrip(testCase) +s = struct('id', 'abc', 'datestamp', '2026-05-11T00:00:00.000Z'); +doc = did2.document(s); +jsonText = doc.toJSON(); +doc2 = did2.document(jsonText); +verifyEqual(testCase, doc2.toStruct(), s); +end + +function testIterateReturnsStructArray(testCase) +s = struct('axes', struct('name', {'x','y','z'}, 'unit', {'um','um','um'})); +doc = did2.document(s); +elements = doc.iterate('axes'); +verifyEqual(testCase, numel(elements), 3); +verifyEqual(testCase, elements(2).name, 'y'); +end + +function testGetMissingFieldErrors(testCase) +doc = did2.document(struct('id', 'abc')); +verifyError(testCase, @() doc.get('nope.missing'), 'did2:document:missingField'); +end + +function testGetRejectsArrayPath(testCase) +doc = did2.document(struct('axes', struct('name', {'x'}))); +verifyError(testCase, @() doc.get('axes[*].name'), 'did2:document:arrayPathHere'); +end + +function testBadConstructorInput(testCase) +verifyError(testCase, @() did2.document(42), 'did2:document:badInput'); +end diff --git a/tests/+did2/+unittest/testSchemaCache.m b/tests/+did2/+unittest/testSchemaCache.m new file mode 100644 index 0000000..c9b0177 --- /dev/null +++ b/tests/+did2/+unittest/testSchemaCache.m @@ -0,0 +1,237 @@ +function tests = testSchemaCache +% testSchemaCache - exercises did2.schema.cache against the in-repo +% V_gamma fixtures at tests/+did2/fixtures/V_gamma/. Also covers +% did2.document.blank() and did2.document.validate() end-to-end. +% +% Run with: +% results = runtests('did2.unittest.testSchemaCache'); +% +% Documents in V_gamma use a top-level `document_class` header plus +% class-scoped property blocks (see V_gamma_SPEC.md "JSON Format: +% Document Instances"). After V_gamma's "drop underscore prefixes" +% pass, every key in the wire shape is a valid MATLAB struct field +% name, so `jsonencode`/`jsondecode` round-trip without any rewrite. + +tests = functiontests(localfunctions); +end + +function setupOnce(testCase) +% Fixtures live at tests/+did2/fixtures/V_gamma/. This file is at +% tests/+did2/+unittest/testSchemaCache.m, so the fixture directory is +% two `fileparts` levels above mfilename's directory. +thisDir = fileparts(mfilename('fullpath')); +fixtureDir = fullfile(fileparts(thisDir), 'fixtures', 'V_gamma'); +did2.schema.cache.setSchemaPath(fixtureDir); +testCase.TestData.fixtureDir = fixtureDir; +testCase.TestData.cache = did2.schema.cache.shared(); +end + +function teardownOnce(~) +did2.schema.cache.resetSingleton(); +end + +% ---- schema-cache plumbing ---- + +function testSchemaPathPointsAtFixtures(testCase) +verifyTrue(testCase, isfolder(testCase.TestData.fixtureDir)); +verifyEqual(testCase, testCase.TestData.cache.schemaPath, testCase.TestData.fixtureDir); +end + +function testGetClassLoadsBase(testCase) +s = testCase.TestData.cache.getClass('base'); +verifyTrue(testCase, isstruct(s)); +verifyTrue(testCase, isfield(s, 'document_class')); +end + +function testGetClassMissingThrows(testCase) +verifyError(testCase, ... + @() testCase.TestData.cache.getClass('not_a_real_class'), ... + 'did2:schema:missingClass'); +end + +function testCurieRegistryLoaded(testCase) +verifyTrue(testCase, isstruct(testCase.TestData.cache.curieRegistry)); +verifyFalse(testCase, isempty(fieldnames(testCase.TestData.cache.curieRegistry))); +end + +% ---- superclass chains ---- + +function testSuperclassesBaseIsRoot(testCase) +verifyEmpty(testCase, testCase.TestData.cache.superclasses('base')); +end + +function testSuperclassesDemoAExtendsBase(testCase) +verifyEqual(testCase, testCase.TestData.cache.superclasses('demoA'), {'base'}); +end + +function testSuperclassesDemoBChain(testCase) +verifyEqual(testCase, testCase.TestData.cache.superclasses('demoB'), {'demoA', 'base'}); +end + +function testClassChainRootFirst(testCase) +verifyEqual(testCase, testCase.TestData.cache.classChain('demoB'), ... + {'base', 'demoA', 'demoB'}); +end + +% ---- field-list resolution ---- + +function testOwnFieldsBaseHasFour(testCase) +own = testCase.TestData.cache.ownFields('base'); +verifyEqual(testCase, numel(own), 4); +end + +function testOwnFieldsDemoAHasOne(testCase) +own = testCase.TestData.cache.ownFields('demoA'); +verifyEqual(testCase, numel(own), 1); +end + +function testFieldsForTagsDeclaringClass(testCase) +tagged = testCase.TestData.cache.fieldsFor('demoB'); +% base(4) + demoA(1) + demoB(1) = 6 entries +verifyEqual(testCase, numel(tagged), 6); +verifyEqual(testCase, tagged(1).declaringClass, 'base'); +verifyEqual(testCase, tagged(4).declaringClass, 'base'); +verifyEqual(testCase, tagged(5).declaringClass, 'demoA'); +verifyEqual(testCase, tagged(6).declaringClass, 'demoB'); +end + +% ---- buildBlankDocument: document_class header ---- + +function testBuildBlankDocumentHeader(testCase) +doc = testCase.TestData.cache.buildBlankDocument('demoB'); +verifyTrue(testCase, isfield(doc, 'document_class')); +verifyEqual(testCase, doc.document_class.class_name, 'demoB'); +verifyEqual(testCase, doc.document_class.class_version, '1.0.0'); +verifyEqual(testCase, numel(doc.document_class.superclasses), 2); +verifyEqual(testCase, doc.document_class.superclasses(1).class_name, 'demoA'); +verifyEqual(testCase, doc.document_class.superclasses(2).class_name, 'base'); +end + +function testBuildBlankDocumentEmptyDependsOn(testCase) +doc = testCase.TestData.cache.buildBlankDocument('demoA'); +verifyTrue(testCase, isfield(doc, 'depends_on')); +verifyEmpty(testCase, doc.depends_on); +end + +% ---- buildBlankDocument: class-scoped blocks ---- + +function testBuildBlankDocumentHasBaseBlock(testCase) +doc = testCase.TestData.cache.buildBlankDocument('demoA'); +verifyTrue(testCase, isfield(doc, 'base')); +verifyTrue(testCase, isfield(doc.base, 'id')); +verifyTrue(testCase, isfield(doc.base, 'session_id')); +verifyTrue(testCase, isfield(doc.base, 'name')); +verifyTrue(testCase, isfield(doc.base, 'datestamp')); +end + +function testBuildBlankDocumentHasConcreteBlock(testCase) +doc = testCase.TestData.cache.buildBlankDocument('demoA'); +verifyTrue(testCase, isfield(doc, 'demoA')); +verifyTrue(testCase, isfield(doc.demoA, 'value')); +verifyEqual(testCase, doc.demoA.value, ''); +end + +function testBuildBlankDocumentAllChainBlocksPresent(testCase) +doc = testCase.TestData.cache.buildBlankDocument('demoB'); +verifyTrue(testCase, isfield(doc, 'base')); +verifyTrue(testCase, isfield(doc, 'demoA')); +verifyTrue(testCase, isfield(doc, 'demoB')); +verifyTrue(testCase, isfield(doc.demoA, 'value')); +verifyTrue(testCase, isfield(doc.demoB, 'value_b')); +end + +function testBuildBlankDocumentMintsIdInBaseBlock(testCase) +doc = testCase.TestData.cache.buildBlankDocument('demoA'); +verifyEqual(testCase, numel(doc.base.id), 33); % did_id format length +end + +function testBuildBlankDocumentSetsDatestampInBaseBlock(testCase) +doc = testCase.TestData.cache.buildBlankDocument('demoA'); +verifyEqual(testCase, doc.base.datestamp(1:2), '20'); +verifyEqual(testCase, doc.base.datestamp(end), 'Z'); +end + +% ---- validateDocument ---- + +function testValidateBlankDocFailsOnEmptySessionId(testCase) +doc = testCase.TestData.cache.buildBlankDocument('demoA'); +verifyError(testCase, ... + @() testCase.TestData.cache.validateDocument(doc), ... + 'did2:validation:emptyField'); +end + +function testValidatePassesAfterFillingSessionId(testCase) +doc = testCase.TestData.cache.buildBlankDocument('demoA'); +doc.base.session_id = did.ido.unique_id(); +testCase.TestData.cache.validateDocument(doc); +end + +function testValidateCatchesMaxLength(testCase) +doc = testCase.TestData.cache.buildBlankDocument('demoA'); +doc.base.session_id = did.ido.unique_id(); +doc.demoA.value = repmat('a', 1, 300); +verifyError(testCase, ... + @() testCase.TestData.cache.validateDocument(doc), ... + 'did2:validation:maxLength'); +end + +function testValidateAcceptsValueAtMaxLength(testCase) +doc = testCase.TestData.cache.buildBlankDocument('demoA'); +doc.base.session_id = did.ido.unique_id(); +doc.demoA.value = repmat('a', 1, 256); +testCase.TestData.cache.validateDocument(doc); +end + +function testValidateCatchesTypeMismatch(testCase) +doc = testCase.TestData.cache.buildBlankDocument('demoA'); +doc.base.session_id = did.ido.unique_id(); +doc.demoA.value = 12345; +verifyError(testCase, ... + @() testCase.TestData.cache.validateDocument(doc), ... + 'did2:validation:typeMismatch'); +end + +function testValidateMissingClassNameThrows(testCase) +doc = struct('base', struct('id', 'abc')); +verifyError(testCase, ... + @() testCase.TestData.cache.validateDocument(doc), ... + 'did2:validation:missingClassName'); +end + +function testValidateMissingClassBlockThrows(testCase) +doc = testCase.TestData.cache.buildBlankDocument('demoA'); +doc = rmfield(doc, 'base'); +verifyError(testCase, ... + @() testCase.TestData.cache.validateDocument(doc), ... + 'did2:validation:missingClassBlock'); +end + +% ---- end-to-end through did2.document ---- + +function testDocumentBlankConvenience(testCase) +doc = did2.document.blank('demoA'); +verifyEqual(testCase, doc.className(), 'demoA'); +verifyEqual(testCase, doc.classVersion(), '1.0.0'); +verifyEqual(testCase, numel(doc.get('base.id')), 33); +end + +function testDocumentValidateRoundTrip(testCase) +doc = did2.document.blank('demoA'); +doc.set('base.session_id', did.ido.unique_id()); +doc.set('demoA.value', 'hello'); +doc.validate(); +end + +function testDocumentToJSONRoundTrip(testCase) +% V_gamma has no leading-underscore keys, so jsonencode/jsondecode is +% identity for any well-formed document. Confirm the wire shape uses +% the V_gamma key names and re-parses to an equivalent document. +doc = did2.document.blank('demoA'); +text = doc.toJSON(); +verifyTrue(testCase, contains(text, '"document_class"')); +verifyTrue(testCase, contains(text, '"class_name":"demoA"')); +doc2 = did2.document.fromJSON(text); +verifyEqual(testCase, doc2.className(), 'demoA'); +verifyTrue(testCase, isfield(doc2.toStruct(), 'base')); +verifyTrue(testCase, isfield(doc2.toStruct(), 'demoA')); +end diff --git a/tests/+did2/fixtures/V_gamma/CURIE_lookups_meta.json b/tests/+did2/fixtures/V_gamma/CURIE_lookups_meta.json new file mode 100644 index 0000000..93e5306 --- /dev/null +++ b/tests/+did2/fixtures/V_gamma/CURIE_lookups_meta.json @@ -0,0 +1,36 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://did-schema.example.org/meta/CURIE_lookups_meta.json", + "title": "CURIE Registry for DID/NDI V_gamma", + "description": "Registry mapping CURIE prefixes (used in ontology_term.node values and in field-level _ontology._node annotations) to their authoritative URI base and metadata. Consumer tooling uses this file to expand a CURIE such as 'uberon:0002436' into the full URI 'http://purl.obolibrary.org/obo/UBERON_0002436'. Prefixes are matched case-insensitively; by convention prefixes are written in lowercase inside documents and annotations.", + "_format_version": "1.0.3", + "_prefixes": { + "iao": { + "_label": "Information Artifact Ontology", + "_uri_base": "http://purl.obolibrary.org/obo/IAO_", + "_uri_style": "obo_underscore", + "_approximate": false, + "_documentation": "OBO Foundry ontology for information entities. Expansion rule: 'iao:NNNNNNN' -> 'http://purl.obolibrary.org/obo/IAO_NNNNNNN'." + }, + "schema": { + "_label": "Schema.org", + "_uri_base": "https://schema.org/", + "_uri_style": "direct", + "_approximate": false, + "_documentation": "General-purpose linked-data vocabulary. CURIE local part is the term name (e.g., 'schema:name' -> 'https://schema.org/name')." + }, + "placeholder": { + "_label": "Placeholder namespace for example and in-progress data", + "_uri_base": "", + "_uri_style": "local", + "_approximate": true, + "_documentation": "Reserved placeholder for documentation examples and for values that have not yet been mapped to a real ontology." + } + }, + "_uri_styles": { + "obo_underscore": "Concatenate _uri_base with the CURIE local part verbatim; underscore-separated form.", + "direct": "Concatenate _uri_base with the CURIE local part verbatim.", + "local": "No URI expansion; the CURIE is the authoritative identifier." + }, + "_documentation": "Test fixture subset of upstream CURIE_lookups_meta.json. The +did2 self-tests only need this registry to be parseable and non-empty." +} diff --git a/tests/+did2/fixtures/V_gamma/README.md b/tests/+did2/fixtures/V_gamma/README.md new file mode 100644 index 0000000..793e00a --- /dev/null +++ b/tests/+did2/fixtures/V_gamma/README.md @@ -0,0 +1,30 @@ +# V_gamma test fixtures + +Hermetic schema fixtures for the `+did2` MATLAB test suite. These exist +so unit tests do not depend on a sibling `did-schema` checkout or on +network access. + +## Files + +| File | Origin | Purpose | +|---|---|---| +| `base.json` | copied from [`Waltham-Data-Science/did-schema/schemas/V_gamma/base.json`](https://github.com/Waltham-Data-Science/did-schema/blob/main/schemas/V_gamma/base.json) | The root V_gamma class. Carries `id`, `session_id`, `name`, `datestamp`. | +| `CURIE_lookups_meta.json` | trimmed subset of upstream | CURIE prefix registry. The cache loads this on construction. | +| `demoA.json` | V_gamma translation of v1's `src/did/example_schema/demo_schema1/database_documents/demoA.json` | Extends `base`. Adds one queryable `value` field. | +| `demoB.json` | V_gamma translation of v1's `demoB.json` | Extends `demoA`. Adds one `value_b` field. Tests multi-level inheritance (`demoB -> demoA -> base`). | +| `demoC.json` | V_gamma translation of v1's `demoC.json` | Extends `base`. Declares three `_depends_on` entries (`item1`, `item2`, `item3`). Tests dependency declarations. | +| `demoFile.json` | V_gamma translation of v1's `demoFile.json` | Extends `base`. Declares two `_file` attachments. Tests file-record declarations. | + +## Conventions + +The `_schema` token in `_superclasses` entries (e.g. +`$DIDSCHEMAPATH/base.json`) is illustrative only — the +did2 cache +resolves superclasses by classname, looking up sibling JSON files in +this directory. + +## Refreshing + +If `base.json` or `CURIE_lookups_meta.json` change upstream, re-copy +them from the same paths above. The `demo*.json` files are V_gamma-only +fixtures and have no upstream counterpart; they evolve with the +did2 +test suite. diff --git a/tests/+did2/fixtures/V_gamma/base.json b/tests/+did2/fixtures/V_gamma/base.json new file mode 100644 index 0000000..f8e73f0 --- /dev/null +++ b/tests/+did2/fixtures/V_gamma/base.json @@ -0,0 +1,78 @@ +{ + "document_class": { + "class_name": "base", + "class_version": "1.0.0", + "superclasses": [], + "maturity_level": "work_in_progress" + }, + "depends_on": [], + "file": [], + "fields": [ + { + "name": "id", + "type": "did_uid", + "blank_value": "", + "default_value": "", + "mustBeNonEmpty": true, + "mustBeScalar": true, + "mustNotHaveNaN": false, + "queryable": true, + "ontology": { + "node": "iao:0000578", + "name": "centrally registered identifier" + }, + "documentation": "Unique identifier for this document instance.", + "constraints": {} + }, + { + "name": "session_id", + "type": "did_uid", + "blank_value": "", + "default_value": "", + "mustBeNonEmpty": true, + "mustBeScalar": true, + "mustNotHaveNaN": false, + "queryable": true, + "ontology": { + "node": "ncit:C169028", + "name": "Study Unique Identifier" + }, + "documentation": "Unique identifier of the session this document belongs to.", + "constraints": {} + }, + { + "name": "name", + "type": "char", + "blank_value": "", + "default_value": "", + "mustBeNonEmpty": false, + "mustBeScalar": true, + "mustNotHaveNaN": false, + "queryable": true, + "ontology": { + "node": "schema:name", + "name": "name" + }, + "documentation": "Human-readable name for this document.", + "constraints": { + "maxLength": 256 + } + }, + { + "name": "datestamp", + "type": "timestamp", + "blank_value": "", + "default_value": "2018-12-05T18:36:47.241Z", + "mustBeNonEmpty": true, + "mustBeScalar": true, + "mustNotHaveNaN": false, + "queryable": true, + "ontology": { + "node": "schema:dateCreated", + "name": "dateCreated" + }, + "documentation": "UTC timestamp of document creation in ISO 8601 format.", + "constraints": {} + } + ] +} diff --git a/tests/+did2/fixtures/V_gamma/demoA.json b/tests/+did2/fixtures/V_gamma/demoA.json new file mode 100644 index 0000000..7233b69 --- /dev/null +++ b/tests/+did2/fixtures/V_gamma/demoA.json @@ -0,0 +1,32 @@ +{ + "document_class": { + "class_name": "demoA", + "class_version": "1.0.0", + "superclasses": [ + { + "class_name": "base", + "schema": "$DIDSCHEMAPATH/base.json" + } + ], + "maturity_level": "work_in_progress" + }, + "depends_on": [], + "file": [], + "fields": [ + { + "name": "value", + "type": "char", + "blank_value": "", + "default_value": "", + "mustBeNonEmpty": false, + "mustBeScalar": true, + "mustNotHaveNaN": false, + "queryable": true, + "ontology": null, + "documentation": "A demo scalar value used by the +did2 self-tests.", + "constraints": { + "maxLength": 256 + } + } + ] +} diff --git a/tests/+did2/fixtures/V_gamma/demoB.json b/tests/+did2/fixtures/V_gamma/demoB.json new file mode 100644 index 0000000..7e6c91e --- /dev/null +++ b/tests/+did2/fixtures/V_gamma/demoB.json @@ -0,0 +1,32 @@ +{ + "document_class": { + "class_name": "demoB", + "class_version": "1.0.0", + "superclasses": [ + { + "class_name": "demoA", + "schema": "$DIDSCHEMAPATH/demoA.json" + } + ], + "maturity_level": "work_in_progress" + }, + "depends_on": [], + "file": [], + "fields": [ + { + "name": "value_b", + "type": "char", + "blank_value": "", + "default_value": "", + "mustBeNonEmpty": false, + "mustBeScalar": true, + "mustNotHaveNaN": false, + "queryable": true, + "ontology": null, + "documentation": "A second demo scalar value, declared by demoB on top of demoA.", + "constraints": { + "maxLength": 256 + } + } + ] +} diff --git a/tests/+did2/fixtures/V_gamma/demoC.json b/tests/+did2/fixtures/V_gamma/demoC.json new file mode 100644 index 0000000..fed0eab --- /dev/null +++ b/tests/+did2/fixtures/V_gamma/demoC.json @@ -0,0 +1,51 @@ +{ + "document_class": { + "class_name": "demoC", + "class_version": "1.0.0", + "superclasses": [ + { + "class_name": "base", + "schema": "$DIDSCHEMAPATH/base.json" + } + ], + "maturity_level": "work_in_progress" + }, + "depends_on": [ + { + "name": "item1", + "mustBeNonEmpty": true, + "documentation": "The first dependency item, by id.", + "must_refer_to_document_class": "" + }, + { + "name": "item2", + "mustBeNonEmpty": true, + "documentation": "The second dependency item, by id.", + "must_refer_to_document_class": "" + }, + { + "name": "item3", + "mustBeNonEmpty": false, + "documentation": "The third dependency item; optional.", + "must_refer_to_document_class": "" + } + ], + "file": [], + "fields": [ + { + "name": "value", + "type": "char", + "blank_value": "", + "default_value": "", + "mustBeNonEmpty": false, + "mustBeScalar": true, + "mustNotHaveNaN": false, + "queryable": true, + "ontology": null, + "documentation": "A demo scalar value carried alongside the dependencies.", + "constraints": { + "maxLength": 256 + } + } + ] +} diff --git a/tests/+did2/fixtures/V_gamma/demoFile.json b/tests/+did2/fixtures/V_gamma/demoFile.json new file mode 100644 index 0000000..cea0da6 --- /dev/null +++ b/tests/+did2/fixtures/V_gamma/demoFile.json @@ -0,0 +1,41 @@ +{ + "document_class": { + "class_name": "demoFile", + "class_version": "1.0.0", + "superclasses": [ + { + "class_name": "base", + "schema": "$DIDSCHEMAPATH/base.json" + } + ], + "maturity_level": "work_in_progress" + }, + "depends_on": [], + "file": [ + { + "name": "filename1.ext", + "documentation": "First demo binary file attached to the document." + }, + { + "name": "filename2.ext2", + "documentation": "Second demo binary file attached to the document." + } + ], + "fields": [ + { + "name": "value", + "type": "char", + "blank_value": "", + "default_value": "", + "mustBeNonEmpty": false, + "mustBeScalar": true, + "mustNotHaveNaN": false, + "queryable": true, + "ontology": null, + "documentation": "A demo scalar value carried alongside the file attachments.", + "constraints": { + "maxLength": 256 + } + } + ] +} diff --git a/tests_symmetry/+did2/+symmetry/+makeArtifacts/.gitkeep b/tests_symmetry/+did2/+symmetry/+makeArtifacts/.gitkeep new file mode 100644 index 0000000..6056c39 --- /dev/null +++ b/tests_symmetry/+did2/+symmetry/+makeArtifacts/.gitkeep @@ -0,0 +1,3 @@ +% Placeholder. did2 symmetry-test makeArtifacts package goes here +% in step 2/3, matching the legacy +% tests_symmetry/+did/+symmetry/+makeArtifacts/ slot. diff --git a/tests_symmetry/+did2/+symmetry/+readArtifacts/.gitkeep b/tests_symmetry/+did2/+symmetry/+readArtifacts/.gitkeep new file mode 100644 index 0000000..9bd0b3f --- /dev/null +++ b/tests_symmetry/+did2/+symmetry/+readArtifacts/.gitkeep @@ -0,0 +1,3 @@ +% Placeholder. did2 symmetry-test readArtifacts package goes here +% in step 2/3, matching the legacy +% tests_symmetry/+did/+symmetry/+readArtifacts/ slot. diff --git a/tools/+didtools/codecheckToolbox.m b/tools/+didtools/codecheckToolbox.m new file mode 100644 index 0000000..6cc4163 --- /dev/null +++ b/tools/+didtools/codecheckToolbox.m @@ -0,0 +1,16 @@ +function codecheckToolbox() + % didtools.codecheckToolbox Developer convenience wrapper for + % matbox.tasks.codecheckToolbox: run the project code check + % against the DID-matlab repo root with badge writing disabled. + % + % Call as `didtools.codecheckToolbox` from the MATLAB prompt. + % + % This used to live at tools/tasks/codecheckToolbox.m (bare name) + % but check-code@v1 puts tools/ on the path and resolved to this + % zero-arg wrapper instead of matbox.tasks.codecheckToolbox's + % multi-arg version, breaking CI with "Too many input arguments". + % Moving it into +didtools/ keeps the convenience without + % shadowing matbox. + projectRootDirectory = didtools.projectdir(); + matbox.tasks.codecheckToolbox(projectRootDirectory, "CreateBadge", false) +end diff --git a/tools/tasks/codecheckToolbox.m b/tools/tasks/codecheckToolbox.m deleted file mode 100644 index 07d140f..0000000 --- a/tools/tasks/codecheckToolbox.m +++ /dev/null @@ -1,4 +0,0 @@ -function codecheckToolbox() - projectRootDirectory = didtools.projectdir(); - matbox.tasks.codecheckToolbox(projectRootDirectory, "CreateBadge", false) -end