From c03d2ac66f0ce11d9334b8cf84b57f946775ab8e Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 22 Apr 2026 19:34:55 +0000 Subject: [PATCH] Consolidate treatment docs via minischema/profile mechanism Replace the per-kind treatment schemas (treatment_drug, virus_injection) with a single generic treatment document whose manipulation field's shape is delegated to a named profile (minischema). This removes schema-file proliferation for the treatment family while preserving rigor via ontology-anchored profiles and keeping search tractable via canonical units, promoted fields, and profile_id references. Meta-schema changes: - Add ontology, quantity, relative_quantity to the type enum. - Add _shape_from_minischema (boolean) on structure fields; when true, the nested _fields is not required. New: profile_meta.json validates profile files; type-specific _constraints rules (canonical_unit + canonical_unit_label on quantity; add reference on relative_quantity; descendant_of permitted on ontology). New: schemas/V_beta/profiles/virus_injection.json canonical profile. treatment.json rewritten as a generic document with a single manipulation field that delegates its shape via _shape_from_minischema. Version bumped to 2.0.0. Retired: treatment_drug.json, virus_injection.json. stimulus_bath intentionally retained (it's a stimulus approach, not a treatment). Tests: tests/test_minischema.py covers new types, _shape_from_minischema conditional, profile validation, canonical profile contents, and regression over all existing V_beta schemas against the updated meta-schema. 28 new tests, all passing. Follow-on work (drug_treatment profile, mammalian stereotaxic profile, cross-profile canonical-unit registry) captured in Ideas.md. --- Ideas.md | 27 ++ schemas/V_beta/did_schema_meta.json | 87 +++-- schemas/V_beta/profile_meta.json | 212 +++++++++++ schemas/V_beta/profiles/virus_injection.json | 155 ++++++++ schemas/V_beta/treatment.json | 66 +--- schemas/V_beta/treatment_drug.json | 80 ---- schemas/V_beta/virus_injection.json | 123 ------- schemas/V_beta_SPEC.md | 189 ++++++++-- schemas/V_beta_notes.md | 53 +++ tests/test_minischema.py | 361 +++++++++++++++++++ 10 files changed, 1044 insertions(+), 309 deletions(-) create mode 100644 schemas/V_beta/profile_meta.json create mode 100644 schemas/V_beta/profiles/virus_injection.json delete mode 100644 schemas/V_beta/treatment_drug.json delete mode 100644 schemas/V_beta/virus_injection.json create mode 100644 tests/test_minischema.py diff --git a/Ideas.md b/Ideas.md index 801c3d0..95a4f45 100644 --- a/Ideas.md +++ b/Ideas.md @@ -8,3 +8,30 @@ Running list of design ideas to consider for future brainstorms. together all of a given subject's digital records. - Allow globally immutable openMinds documents to be linked. + +## Follow-on work for treatment consolidation + +- Author a canonical `drug_treatment` profile under + `schemas/V_beta/profiles/` that supersedes the retired `treatment_drug.json` + schema. Required fields at minimum: `drug` (ontology), `dose` (quantity, + canonical unit TBD by domain expert — likely `mg_per_kg` or `mg`), `route` + (ontology). Optional: `onset` (relative_quantity, same shape as in + `virus_injection`). + +- Author a `mammalian_stereotaxic_virus_injection` profile that `extends: + "virus_injection"` and adds required stereotaxic coordinate fields + (`stereotaxic_ap`, `stereotaxic_ml`, `stereotaxic_dv` as `quantity` with + canonical unit `mm`) plus a `target_region` field constrained to + UBERON:brain descendants. + +- Consider applying the same profile mechanism to the stimulus family + (`stimulus_bath`, `stimulus_presentation`, etc.) once it is proven on + treatments. Different domain (stimulus-to-element rather than + treatment-to-subject) but the same `_shape_from_minischema` + `_minischema` + mechanism would apply unchanged. + +- Decide and document a canonical-unit convention for cross-profile + coherence: when two profiles measure the same physical quantity, they must + use the same canonical unit (e.g., all volumes in nL, all times in days or + seconds, not mixed). A small registry table in this repo (or the spec) + would help enforce this by review. diff --git a/schemas/V_beta/did_schema_meta.json b/schemas/V_beta/did_schema_meta.json index 0d449dd..eebde05 100644 --- a/schemas/V_beta/did_schema_meta.json +++ b/schemas/V_beta/did_schema_meta.json @@ -220,9 +220,12 @@ "matrix", "timestamp", "boolean", - "structure" + "structure", + "ontology", + "quantity", + "relative_quantity" ], - "description": "Data type of the field. Uses the standard JSON Schema keyword 'type'; values are NDI-specific." + "description": "Data type of the field. Uses the standard JSON Schema keyword 'type'; values are NDI-specific. The 'ontology' type holds an ontology_object value. The 'quantity' type holds an object {: double, source_value: double, source_unit: string}; the canonical-unit label is declared in _constraints.canonical_unit_label. The 'relative_quantity' type adds a 'reference' string to the quantity object and declares its reference anchor in _constraints.reference." }, "_blank_value": { "description": "Value in a freshly constructed blank document. May fail validation." @@ -271,31 +274,69 @@ "$ref": "#/$defs/field_definition" }, "description": "Nested field definitions for structure type fields." + }, + "_shape_from_minischema": { + "type": "boolean", + "description": "Only valid on type 'structure'. When true, the field's nested shape is delegated to a minischema carried on the document instance under the top-level _minischema key; the schema file does not need to list nested _fields. When omitted or false, a structure field must list its own _fields." } }, - "if": { - "properties": { - "type": { - "const": "structure" + "allOf": [ + { + "if": { + "properties": { + "type": { + "const": "structure" + }, + "_shape_from_minischema": { + "const": true + } + }, + "required": [ + "_shape_from_minischema" + ] + }, + "then": { + "required": [ + "_name", + "type", + "_blank_value", + "_default_value", + "_mustBeNonEmpty", + "_mustBeScalar", + "_mustNotHaveNaN", + "_queryable", + "_ontology", + "_documentation", + "_constraints" + ] + }, + "else": { + "if": { + "properties": { + "type": { + "const": "structure" + } + } + }, + "then": { + "required": [ + "_name", + "type", + "_blank_value", + "_default_value", + "_mustBeNonEmpty", + "_mustBeScalar", + "_mustNotHaveNaN", + "_queryable", + "_ontology", + "_documentation", + "_constraints", + "_fields" + ] + } } } - }, - "then": { - "required": [ - "_name", - "type", - "_blank_value", - "_default_value", - "_mustBeNonEmpty", - "_mustBeScalar", - "_mustNotHaveNaN", - "_queryable", - "_ontology", - "_documentation", - "_constraints", - "_fields" - ] - } + ] } } } diff --git a/schemas/V_beta/profile_meta.json b/schemas/V_beta/profile_meta.json new file mode 100644 index 0000000..19c8575 --- /dev/null +++ b/schemas/V_beta/profile_meta.json @@ -0,0 +1,212 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://did-schema.example.org/meta/profile_meta.json", + "title": "DID/NDI Profile Meta-Schema", + "description": "Validates the structure of DID/NDI profile (minischema) files. A profile is a named, versioned, ontology-anchored schema fragment that defines the shape of a structure field on another document type. Profiles live under schemas/V_beta/profiles/ (canonical) or as user-defined documents in a database. The inline _fields array reuses the field_definition shape from did_schema_meta.json.", + "type": "object", + "required": [ + "profile_name", + "profile_version", + "_maturity_level", + "extends", + "profile_ontology", + "_documentation", + "_fields", + "promoted_fields" + ], + "additionalProperties": false, + "properties": { + "profile_name": { + "type": "string", + "pattern": "^[a-z][a-z0-9_]*$", + "description": "Unique snake_case identifier for this profile." + }, + "profile_version": { + "type": "string", + "pattern": "^\\d+\\.\\d+\\.\\d+$", + "description": "Semantic version string MAJOR.MINOR.PATCH. Same semantics as _class_version on regular schemas: MAJOR bumps on breaking changes (removing required fields, changing canonical units, tightening constraints); MINOR on additive changes; PATCH on docs-only changes." + }, + "_maturity_level": { + "type": "string", + "enum": [ + "work_in_progress", + "mature" + ], + "description": "Maturity level of this profile." + }, + "extends": { + "type": "string", + "description": "profile_name of a parent profile that this profile extends, or empty string for no parent. When non-empty, consumer tooling flattens the parent's _fields into this profile's _fields (parent-first order) before validating a document's manipulation structure." + }, + "profile_ontology": { + "$ref": "#/$defs/ontology_object" + }, + "_documentation": { + "type": "string", + "description": "Human-readable description of what this profile represents." + }, + "_fields": { + "type": "array", + "items": { + "$ref": "#/$defs/field_definition" + }, + "description": "Array of field definitions describing the shape of each named entry inside the manipulation structure of a document conforming to this profile. Same shape as _fields on a regular schema file." + }, + "promoted_fields": { + "type": "array", + "items": { + "type": "string", + "pattern": "^[a-z][a-z0-9_]*$" + }, + "description": "List of field _names from _fields that consumer database tooling should materialize as indexed columns for fast search. Purely an indexing hint; the manipulation structure remains the source of truth." + } + }, + "$defs": { + "ontology_object": { + "type": "object", + "required": [ + "_namespace", + "_term", + "_name", + "_uri" + ], + "additionalProperties": false, + "properties": { + "_namespace": { + "type": "string", + "description": "Ontology name (e.g., uberon, obi, pato, uo, ucum)." + }, + "_term": { + "type": "string", + "description": "Term identifier within the namespace." + }, + "_name": { + "type": "string", + "description": "Human-readable label of the ontology term." + }, + "_uri": { + "type": [ + "string", + "null" + ], + "description": "Full resolvable URI, or null if unavailable." + } + } + }, + "field_definition": { + "type": "object", + "required": [ + "_name", + "type", + "_blank_value", + "_default_value", + "_mustBeNonEmpty", + "_mustBeScalar", + "_mustNotHaveNaN", + "_queryable", + "_ontology", + "_documentation", + "_constraints" + ], + "additionalProperties": false, + "properties": { + "_name": { + "type": "string", + "pattern": "^[a-z][a-z0-9_]*$", + "description": "Field name (snake_case)." + }, + "type": { + "type": "string", + "enum": [ + "did_uid", + "char", + "string", + "integer", + "double", + "matrix", + "timestamp", + "boolean", + "structure", + "ontology", + "quantity", + "relative_quantity" + ] + }, + "_blank_value": {}, + "_default_value": {}, + "_mustBeNonEmpty": { "type": "boolean" }, + "_mustBeScalar": { "type": "boolean" }, + "_mustNotHaveNaN": { "type": "boolean" }, + "_queryable": { "type": "boolean" }, + "_ontology": { + "oneOf": [ + { "$ref": "#/$defs/ontology_object" }, + { "type": "null" } + ] + }, + "_documentation": { "type": "string" }, + "_constraints": { + "type": "object", + "description": "JSON Schema validation keywords and NDI-specific constraint keys. Applicable keys depend on 'type'. For 'ontology': 'descendant_of' (ontology_object) optionally constrains values to a subtree. For 'quantity' and 'relative_quantity': 'canonical_unit' (ontology_object, required) names the canonical unit; 'canonical_unit_label' (string, required) is the snake_case field name used in the document to hold the canonical numeric value; 'minimum'/'maximum' optionally constrain that numeric value. For 'relative_quantity': 'reference' (string, required) names the temporal anchor (e.g., 'session_start', 'surgery', 'birth').", + "properties": { + "descendant_of": { "$ref": "#/$defs/ontology_object" }, + "canonical_unit": { "$ref": "#/$defs/ontology_object" }, + "canonical_unit_label": { + "type": "string", + "pattern": "^[a-z][a-z0-9_]*$" + }, + "reference": { "type": "string" }, + "minimum": { "type": "number" }, + "maximum": { "type": "number" }, + "minLength": { "type": "integer", "minimum": 0 }, + "maxLength": { "type": "integer", "minimum": 0 }, + "pattern": { "type": "string" }, + "enum": { "type": "array" } + } + }, + "_fields": { + "type": "array", + "items": { "$ref": "#/$defs/field_definition" } + } + }, + "allOf": [ + { + "if": { + "properties": { "type": { "const": "ontology" } } + }, + "then": { + "properties": { + "_constraints": { + "not": { "required": [ "canonical_unit" ] } + } + } + } + }, + { + "if": { + "properties": { "type": { "const": "quantity" } } + }, + "then": { + "properties": { + "_constraints": { + "required": [ "canonical_unit", "canonical_unit_label" ] + } + } + } + }, + { + "if": { + "properties": { "type": { "const": "relative_quantity" } } + }, + "then": { + "properties": { + "_constraints": { + "required": [ "canonical_unit", "canonical_unit_label", "reference" ] + } + } + } + } + ] + } + } +} diff --git a/schemas/V_beta/profiles/virus_injection.json b/schemas/V_beta/profiles/virus_injection.json new file mode 100644 index 0000000..4085cec --- /dev/null +++ b/schemas/V_beta/profiles/virus_injection.json @@ -0,0 +1,155 @@ +{ + "profile_name": "virus_injection", + "profile_version": "1.0.0", + "_maturity_level": "work_in_progress", + "extends": "", + "profile_ontology": { + "_namespace": "obi", + "_term": "0000412", + "_name": "viral vector administration", + "_uri": "http://purl.obolibrary.org/obo/OBI_0000412" + }, + "_documentation": "Administration of a viral vector to a subject. Species-agnostic base profile; profiles for specific organisms or delivery routes (e.g., mammalian stereotaxic, systemic via tail-vein, intracerebroventricular) should extend this one and add location or route fields as appropriate.", + "_fields": [ + { + "_name": "virus_construct", + "type": "ontology", + "_blank_value": null, + "_default_value": null, + "_mustBeNonEmpty": true, + "_mustBeScalar": true, + "_mustNotHaveNaN": false, + "_queryable": true, + "_ontology": null, + "_documentation": "The specific viral construct injected (e.g., Addgene plasmid 100842 'pAAV.Syn.GCaMP6f.WPRE.SV40'). Value ontology is unconstrained to allow Addgene and lab-local registries.", + "_constraints": {} + }, + { + "_name": "serotype", + "type": "ontology", + "_blank_value": null, + "_default_value": null, + "_mustBeNonEmpty": true, + "_mustBeScalar": true, + "_mustNotHaveNaN": false, + "_queryable": true, + "_ontology": null, + "_documentation": "Viral serotype (e.g., AAV1, AAV9, AAVretro). Constrained to descendants of Parvoviridae; profiles for lentivirus or rabies should extend this one and widen the constraint.", + "_constraints": { + "descendant_of": { + "_namespace": "ncbitaxon", + "_term": "10803", + "_name": "Parvoviridae", + "_uri": null + } + } + }, + { + "_name": "volume", + "type": "quantity", + "_blank_value": null, + "_default_value": null, + "_mustBeNonEmpty": true, + "_mustBeScalar": true, + "_mustNotHaveNaN": true, + "_queryable": true, + "_ontology": null, + "_documentation": "Total volume of viral stock delivered.", + "_constraints": { + "minimum": 0, + "canonical_unit": { + "_namespace": "uo", + "_term": "0000102", + "_name": "nanoliter", + "_uri": "http://purl.obolibrary.org/obo/UO_0000102" + }, + "canonical_unit_label": "nl" + } + }, + { + "_name": "titer", + "type": "quantity", + "_blank_value": null, + "_default_value": null, + "_mustBeNonEmpty": true, + "_mustBeScalar": true, + "_mustNotHaveNaN": true, + "_queryable": true, + "_ontology": null, + "_documentation": "Viral particle concentration of the injected stock. Tooling converts vg/mL, IU/mL, and TU/mL at insert using the standard vector-biology conversion table.", + "_constraints": { + "minimum": 0, + "canonical_unit": { + "_namespace": "ucum", + "_term": "[GC]/mL", + "_name": "genome copies per milliliter", + "_uri": null + }, + "canonical_unit_label": "gc_per_ml" + } + }, + { + "_name": "onset", + "type": "relative_quantity", + "_blank_value": null, + "_default_value": null, + "_mustBeNonEmpty": true, + "_mustBeScalar": true, + "_mustNotHaveNaN": true, + "_queryable": true, + "_ontology": null, + "_documentation": "Time of injection relative to session start. Negative = before session, positive = during or after.", + "_constraints": { + "canonical_unit": { + "_namespace": "uo", + "_term": "0000033", + "_name": "day", + "_uri": "http://purl.obolibrary.org/obo/UO_0000033" + }, + "canonical_unit_label": "day", + "reference": "session_start" + } + }, + { + "_name": "injection_rate", + "type": "quantity", + "_blank_value": null, + "_default_value": null, + "_mustBeNonEmpty": false, + "_mustBeScalar": true, + "_mustNotHaveNaN": true, + "_queryable": true, + "_ontology": null, + "_documentation": "Delivery speed. Optional; often not recorded for bolus injections.", + "_constraints": { + "minimum": 0, + "canonical_unit": { + "_namespace": "ucum", + "_term": "nL/min", + "_name": "nanoliters per minute", + "_uri": null + }, + "canonical_unit_label": "nl_per_min" + } + }, + { + "_name": "promoter", + "type": "ontology", + "_blank_value": null, + "_default_value": null, + "_mustBeNonEmpty": false, + "_mustBeScalar": true, + "_mustNotHaveNaN": false, + "_queryable": true, + "_ontology": null, + "_documentation": "Promoter driving transgene expression (e.g., CaMKII, hSyn, CAG, Ef1a). Often redundant with virus_construct but recorded separately by convention.", + "_constraints": {} + } + ], + "promoted_fields": [ + "serotype", + "volume", + "titer", + "onset" + ] +} diff --git a/schemas/V_beta/treatment.json b/schemas/V_beta/treatment.json index a018331..84ef265 100644 --- a/schemas/V_beta/treatment.json +++ b/schemas/V_beta/treatment.json @@ -1,11 +1,11 @@ { "_classname": "treatment", - "_class_version": "1.0.0", + "_class_version": "2.0.0", "_maturity_level": "work_in_progress", "_superclasses": [ { "_classname": "base", - "_schema": "$NDISCHEMAPATH/base/schema.json" + "_schema": "$NDISCHEMAPATH/base.json" } ], "_depends_on": [ @@ -14,72 +14,22 @@ "_mustBeNonEmpty": true, "_documentation": "The document ID of the subject that received this treatment.", "_must_refer_to_document_class": "" - }, - { - "_name": "manipulation_id", - "_mustBeNonEmpty": false, - "_documentation": "The document ID of the manipulation associated with this treatment.", - "_must_refer_to_document_class": "" - }, - { - "_name": "protocol_id", - "_mustBeNonEmpty": false, - "_documentation": "The document ID of the protocol associated with this treatment.", - "_must_refer_to_document_class": "" } ], "_file": [], "_fields": [ { - "_name": "ontology_name", - "type": "char", - "_blank_value": "", - "_default_value": "", - "_mustBeNonEmpty": false, - "_mustBeScalar": true, - "_mustNotHaveNaN": false, - "_queryable": true, - "_ontology": null, - "_documentation": "The ontology node of the treatment.", - "_constraints": {} - }, - { - "_name": "name", - "type": "char", - "_blank_value": "", - "_default_value": "", - "_mustBeNonEmpty": false, - "_mustBeScalar": true, - "_mustNotHaveNaN": false, - "_queryable": true, - "_ontology": null, - "_documentation": "The name of the treatment in the ontology node.", - "_constraints": {} - }, - { - "_name": "numeric_value", - "type": "matrix", - "_blank_value": "", - "_default_value": "", - "_mustBeNonEmpty": false, - "_mustBeScalar": false, - "_mustNotHaveNaN": false, - "_queryable": true, - "_ontology": null, - "_documentation": "The numeric value associated with the treatment.", - "_constraints": {} - }, - { - "_name": "string_value", - "type": "char", - "_blank_value": "", - "_default_value": "", + "_name": "manipulation", + "type": "structure", + "_shape_from_minischema": true, + "_blank_value": {}, + "_default_value": {}, "_mustBeNonEmpty": false, "_mustBeScalar": true, "_mustNotHaveNaN": false, "_queryable": true, "_ontology": null, - "_documentation": "The character string value associated with the treatment.", + "_documentation": "The set of name/value entries describing what was done to the subject. The shape of this structure is delegated to a minischema (profile) identified on each document instance via the top-level _minischema key. Each entry is a named slot whose type comes from the profile's _fields array (typically 'ontology', 'quantity', or 'relative_quantity').", "_constraints": {} } ] diff --git a/schemas/V_beta/treatment_drug.json b/schemas/V_beta/treatment_drug.json deleted file mode 100644 index a28ac4d..0000000 --- a/schemas/V_beta/treatment_drug.json +++ /dev/null @@ -1,80 +0,0 @@ -{ - "_classname": "treatment_drug", - "_class_version": "1.0.0", - "_maturity_level": "work_in_progress", - "_superclasses": [ - { - "_classname": "base", - "_schema": "$NDISCHEMAPATH/base/schema.json" - } - ], - "_depends_on": [ - { - "_name": "subject_id", - "_mustBeNonEmpty": true, - "_documentation": "The document ID of the subject that received this drug treatment.", - "_must_refer_to_document_class": "" - } - ], - "_file": [], - "_fields": [ - { - "_name": "drug_name", - "type": "char", - "_blank_value": "", - "_default_value": "", - "_mustBeNonEmpty": true, - "_mustBeScalar": true, - "_mustNotHaveNaN": false, - "_queryable": true, - "_ontology": null, - "_documentation": "The name of the drug administered.", - "_constraints": { - "max_length": 256 - } - }, - { - "_name": "dose", - "type": "double", - "_blank_value": 0.0, - "_default_value": 0.0, - "_mustBeNonEmpty": false, - "_mustBeScalar": true, - "_mustNotHaveNaN": false, - "_queryable": false, - "_ontology": null, - "_documentation": "The dose of the drug administered.", - "_constraints": {} - }, - { - "_name": "dose_units", - "type": "char", - "_blank_value": "", - "_default_value": "", - "_mustBeNonEmpty": false, - "_mustBeScalar": true, - "_mustNotHaveNaN": false, - "_queryable": true, - "_ontology": null, - "_documentation": "The units of the dose (e.g., 'mg/kg', 'uL', 'mM').", - "_constraints": { - "max_length": 32 - } - }, - { - "_name": "route", - "type": "char", - "_blank_value": "", - "_default_value": "", - "_mustBeNonEmpty": false, - "_mustBeScalar": true, - "_mustNotHaveNaN": false, - "_queryable": true, - "_ontology": null, - "_documentation": "The route of administration (e.g., 'ip', 'iv', 'sc', 'oral', 'topical').", - "_constraints": { - "max_length": 64 - } - } - ] -} diff --git a/schemas/V_beta/virus_injection.json b/schemas/V_beta/virus_injection.json deleted file mode 100644 index 306d5a3..0000000 --- a/schemas/V_beta/virus_injection.json +++ /dev/null @@ -1,123 +0,0 @@ -{ - "_classname": "virus_injection", - "_class_version": "1.0.0", - "_maturity_level": "work_in_progress", - "_superclasses": [ - { - "_classname": "base", - "_schema": "$NDISCHEMAPATH/base/schema.json" - } - ], - "_depends_on": [ - { - "_name": "subject_id", - "_mustBeNonEmpty": true, - "_documentation": "The document ID of the subject that received this virus injection.", - "_must_refer_to_document_class": "" - } - ], - "_file": [], - "_fields": [ - { - "_name": "virus_name", - "type": "char", - "_blank_value": "", - "_default_value": "", - "_mustBeNonEmpty": true, - "_mustBeScalar": true, - "_mustNotHaveNaN": false, - "_queryable": true, - "_ontology": null, - "_documentation": "The name of the virus construct (e.g., 'AAV2-CaMKII-GCaMP6f').", - "_constraints": { - "max_length": 512 - } - }, - { - "_name": "serotype", - "type": "char", - "_blank_value": "", - "_default_value": "", - "_mustBeNonEmpty": false, - "_mustBeScalar": true, - "_mustNotHaveNaN": false, - "_queryable": true, - "_ontology": null, - "_documentation": "The serotype of the viral vector (e.g., 'AAV2', 'AAV9', 'lentivirus').", - "_constraints": { - "max_length": 64 - } - }, - { - "_name": "titer", - "type": "double", - "_blank_value": 0.0, - "_default_value": 0.0, - "_mustBeNonEmpty": false, - "_mustBeScalar": true, - "_mustNotHaveNaN": false, - "_queryable": false, - "_ontology": null, - "_documentation": "The titer of the virus in titer_units.", - "_constraints": {} - }, - { - "_name": "titer_units", - "type": "char", - "_blank_value": "", - "_default_value": "gc/mL", - "_mustBeNonEmpty": false, - "_mustBeScalar": true, - "_mustNotHaveNaN": false, - "_queryable": false, - "_ontology": null, - "_documentation": "The units for the titer (e.g., 'gc/mL', 'vg/mL').", - "_constraints": { - "max_length": 32 - } - }, - { - "_name": "volume", - "type": "double", - "_blank_value": 0.0, - "_default_value": 0.0, - "_mustBeNonEmpty": false, - "_mustBeScalar": true, - "_mustNotHaveNaN": false, - "_queryable": false, - "_ontology": null, - "_documentation": "The volume injected in volume_units.", - "_constraints": {} - }, - { - "_name": "volume_units", - "type": "char", - "_blank_value": "", - "_default_value": "nL", - "_mustBeNonEmpty": false, - "_mustBeScalar": true, - "_mustNotHaveNaN": false, - "_queryable": false, - "_ontology": null, - "_documentation": "The units for the injected volume (e.g., 'nL', 'uL').", - "_constraints": { - "max_length": 16 - } - }, - { - "_name": "injection_site", - "type": "char", - "_blank_value": "", - "_default_value": "", - "_mustBeNonEmpty": false, - "_mustBeScalar": true, - "_mustNotHaveNaN": false, - "_queryable": true, - "_ontology": null, - "_documentation": "The anatomical location of the injection site.", - "_constraints": { - "max_length": 256 - } - } - ] -} diff --git a/schemas/V_beta_SPEC.md b/schemas/V_beta_SPEC.md index 34fc03e..7504ab5 100644 --- a/schemas/V_beta_SPEC.md +++ b/schemas/V_beta_SPEC.md @@ -49,10 +49,13 @@ did-schema/ │ ├── V_beta_notes.md ← V_beta status and V_alpha→V_beta renames │ └── V_beta/ ← flat directory of V_beta schemas │ ├── did_schema_meta.json ← meta-schema: validates schema files +│ ├── profile_meta.json ← meta-schema: validates profile (minischema) files │ ├── base.json ← schema for the base document type │ ├── directory.json ← schema for the directory document type │ ├── probe_location.json ← schema for probe_location document type -│ └── ... ← one JSON file per document type (all snake_case) +│ ├── ... ← one JSON file per document type (all snake_case) +│ └── profiles/ ← canonical profiles (minischemas) +│ └── virus_injection.json ← profile for virus injection treatments │ └── tests/ ├── conftest.py ← shared fixtures and helpers @@ -76,11 +79,16 @@ filename is also snake_case. There is exactly one schema file per document type; subdirectories and per-type directories are not used. The meta-schema (`did_schema_meta.json`) lives alongside the document-type -schemas in `schemas/V_beta/`. +schemas in `schemas/V_beta/`. A second meta-schema (`profile_meta.json`) sits +next to it and validates canonical profile (minischema) files under +`schemas/V_beta/profiles/`. See **Minischemas and the `_minischema` Document +Key** for the profile mechanism. Path references in schema files use the `$NDISCHEMAPATH` token, resolved at runtime by consumer tooling. Under the flat layout, a reference to another -schema resolves as `$NDISCHEMAPATH/.json`. Blank document +schema resolves as `$NDISCHEMAPATH/.json`; a reference to a +canonical profile resolves as +`$NDISCHEMAPATH/profiles/.json`. Blank document definitions (templates) are the responsibility of language-specific tooling repos, not this repo. @@ -412,29 +420,47 @@ The following ontology terms are relevant to DID/NDI schemas and may be used in ### Valid Types -| Type | Description | `_constraints` keys | Notes | -|-------------|-------------------------------------------------|------------------------------------------------------|-------| -| `did_uid` | NDI/DID unique identifier string | `{}` (none) | | -| `char` | Character array / string | `{ "maxLength": integer or null }` | `"string"` is accepted as an alias | -| `integer` | Single integer value | `{ "minimum": integer or null, "maximum": integer or null }` | | -| `double` | Single double-precision float | `{ "minimum": number or null, "maximum": number or null }` | | -| `matrix` | 2D array of doubles | `{ "rows": int or null, "cols": int or null, "minimum": number or null, "maximum": number or null }` | `_mustBeScalar` should be `false` | -| `timestamp` | ISO 8601 UTC timestamp string | `{}` (none) | Validator checks format | -| `boolean` | true/false | `{}` (none) | | -| `structure` | Nested sub-document (JSON object) | `{}` (none); use `"_fields"` key for nested fields | Recursive | +| Type | Description | `_constraints` keys | Notes | +|---------------------|-------------------------------------------------|------------------------------------------------------|-------| +| `did_uid` | NDI/DID unique identifier string | `{}` (none) | | +| `char` | Character array / string | `{ "maxLength": integer or null }` | `"string"` is accepted as an alias | +| `integer` | Single integer value | `{ "minimum": integer or null, "maximum": integer or null }` | | +| `double` | Single double-precision float | `{ "minimum": number or null, "maximum": number or null }` | | +| `matrix` | 2D array of doubles | `{ "rows": int or null, "cols": int or null, "minimum": number or null, "maximum": number or null }` | `_mustBeScalar` should be `false` | +| `timestamp` | ISO 8601 UTC timestamp string | `{}` (none) | Validator checks format | +| `boolean` | true/false | `{}` (none) | | +| `structure` | Nested sub-document (JSON object) | `{}` (none); use `"_fields"` key for nested fields, OR set `"_shape_from_minischema": true` to delegate the shape to a minischema at runtime | Recursive | +| `ontology` | An ontology node (`ontology_object`: `_namespace`, `_term`, `_name`, `_uri`) | `{ "descendant_of": ontology_object or absent }` — when present, the value must be a descendant of this node in its ontology | | +| `quantity` | A measured value with units, stored canonically: `{ "": double, "source_value": double, "source_unit": string }`. The canonical-unit-label field name is declared in `_constraints.canonical_unit_label`. | `{ "canonical_unit": ontology_object (required), "canonical_unit_label": snake_case string (required), "minimum": number or null, "maximum": number or null }` | Consumer tooling converts `source_value`/`source_unit` to the canonical unit at insert. `source_value` and `source_unit` are retained for provenance and display. | +| `relative_quantity` | A `quantity` with an explicit temporal (or other) anchor: adds a `"reference"` string to the document value. | `{ "canonical_unit", "canonical_unit_label", "reference" (string, required), "minimum", "maximum" }` | Same conversion and provenance semantics as `quantity`. The `reference` constraint names the anchor (e.g., `"session_start"`, `"surgery"`, `"birth"`). | #### Semantics of validation flags by type -| Type | `_mustBeNonEmpty` applies? | `_mustBeScalar` applies? | `_mustNotHaveNaN` applies? | -|-------------|---------------------------|--------------------------|---------------------------| -| `did_uid` | yes (non-empty string) | yes | no — must be `false` | -| `char` | yes (non-empty string) | yes | no — must be `false` | -| `integer` | yes | yes | yes | -| `double` | yes | yes | yes | -| `matrix` | yes (non-empty array) | no — should be `false` | yes (element-wise) | -| `timestamp` | yes (non-empty string) | yes | no — must be `false` | -| `boolean` | yes | yes (implicitly) | no — must be `false` | -| `structure` | yes (non-empty object) | yes | no — must be `false` | +| Type | `_mustBeNonEmpty` applies? | `_mustBeScalar` applies? | `_mustNotHaveNaN` applies? | +|---------------------|---------------------------|--------------------------|---------------------------| +| `did_uid` | yes (non-empty string) | yes | no — must be `false` | +| `char` | yes (non-empty string) | yes | no — must be `false` | +| `integer` | yes | yes | yes | +| `double` | yes | yes | yes | +| `matrix` | yes (non-empty array) | no — should be `false` | yes (element-wise) | +| `timestamp` | yes (non-empty string) | yes | no — must be `false` | +| `boolean` | yes | yes (implicitly) | no — must be `false` | +| `structure` | yes (non-empty object) | yes | no — must be `false` | +| `ontology` | yes (object with non-empty `_namespace` and `_term`) | yes | no — must be `false` | +| `quantity` | yes (object with canonical value present) | yes | yes (on canonical numeric value) | +| `relative_quantity` | yes (object with canonical value and reference present) | yes | yes (on canonical numeric value) | + +#### `_shape_from_minischema` on `structure` fields + +A `structure` field may set `"_shape_from_minischema": true` to declare that its +nested shape is not fixed in the schema file but is delegated at runtime to a +profile (minischema) named on each document instance under the top-level +`_minischema` key (see **Minischemas and the `_minischema` Document Key** below). + +When `_shape_from_minischema` is `true` on a `structure` field, the schema file +omits the nested `_fields` array (it is no longer required by the meta-schema). +When `_shape_from_minischema` is absent or `false`, a `structure` field must +include `_fields` as before. --- @@ -514,6 +540,104 @@ Phase 2 is specified here but enforced by consumer tooling (e.g., the database i path in `DID-matlab` or `DID-python`). This repo does not test Phase 2 because it has no database. +### Minischema resolution (Phase 2) + +When a schema declares a `structure` field with `"_shape_from_minischema": true`, +Phase 1 validates only the wrapper (type is object; all base-schema rules apply) +and does not descend into the field's contents. Phase 2 is responsible for: + +1. Reading the document's top-level `_minischema` key (see next section). +2. For each minischema entry, either fetching the referenced profile document (or + file, for canonical profiles) or reading the inline `_fields` array. +3. Validating the delegated `structure` field's contents against the resolved + `_fields` array using the standard Phase-1 field-validation pipeline. + +--- + +## Minischemas and the `_minischema` Document Key + +A **minischema** (profile) is a named, versioned, ontology-anchored schema +fragment that defines the shape of a `structure` field on a document. Profiles +enable a single document type (e.g., `treatment`) to host an open-ended family +of domain-specific shapes (virus injection, drug treatment, bath application, +etc.) without creating one document type per shape. + +Profiles come in two tiers: + +- **Canonical profiles** ship in this repository under + `schemas/V_beta/profiles/.json` and are governed by + `schemas/V_beta/profile_meta.json` (a standard JSON Schema Draft 7 file). + They are versioned with the same semver discipline as schema files. +- **User-defined profiles** live in a user's database as documents, created and + edited without changes to this repository. They follow the same shape as + canonical profiles and are validated by the same `profile_meta.json`. + +### Profile file shape + +A profile file is a JSON object with the following required keys: + +| Key | Type | Description | +|--------------------|-------------------------|-------------| +| `profile_name` | string (snake_case) | Unique profile identifier. | +| `profile_version` | string (semver) | `MAJOR.MINOR.PATCH`; bump semantics match `_class_version`. | +| `_maturity_level` | enum | `"work_in_progress"` or `"mature"`. | +| `extends` | string | `profile_name` of a parent profile, or `""` for no parent. When non-empty, consumer tooling flattens the parent's `_fields` into this profile's (parent-first) before applying. | +| `profile_ontology` | ontology object | Semantic anchor for the profile as a whole. | +| `_documentation` | string | Human-readable description. | +| `_fields` | array of field defs | Same shape as `_fields` on a regular schema file. Each entry's `_name` becomes the key of a slot inside the delegated `structure` field on the document. | +| `promoted_fields` | array of strings | List of field names that consumer database tooling should materialize as indexed columns for fast search. Indexing hint only; the delegated structure remains the source of truth. | + +### Document-level `_minischema` key + +A document instance whose schema declares a `structure` field with +`"_shape_from_minischema": true` must carry a top-level `_minischema` key +mapping each such field name to a profile definition. Two forms are permitted +per entry: + +**Reference form** — point at a stored or canonical profile: + +```json +"_minischema": { + "manipulation": { "_ref": "virus_injection" } +} +``` + +The `_ref` value is either the `profile_name` of a canonical profile (resolved +against `schemas/V_beta/profiles/`) or a `did_uid` of a user-defined profile +document in the same database. + +**Inline form** — carry the schema fragment directly, no profile document +needed: + +```json +"_minischema": { + "manipulation": { + "_fields": [ + { "_name": "observed_effect", "type": "ontology", + "_mustBeNonEmpty": true, + "_mustBeScalar": true, + "_mustNotHaveNaN": false, + "_queryable": true, + "_ontology": null, + "_documentation": "...", + "_blank_value": null, + "_default_value": null, + "_constraints": {} } + ] + } +} +``` + +Inline form is appropriate for one-off experimental treatments that don't +warrant a reusable profile. If a pattern recurs, lift the inline `_fields` +into a profile document (canonical or user-defined) and switch to `_ref`. + +The reference form does **not** duplicate into `_depends_on`. Phase 2 +resolution of `_minischema._ref` is itself the referential-integrity check; +writing the same reference twice adds no information and obscures the +distinction between data-to-data references (`_depends_on`) and +data-to-schema references (`_minischema`). + --- ## The Meta-Schema @@ -536,10 +660,19 @@ The meta-schema must enforce: - `_directory` (if present) is an array of directory record objects. - `_fields` is an array of field definition objects. - Each field definition object has all required keys with correct types. -- `type` is one of the valid type strings. +- `type` is one of the valid type strings (`did_uid`, `char`, `string`, `integer`, `double`, `matrix`, `timestamp`, `boolean`, `structure`, `ontology`, `quantity`, `relative_quantity`). - `_ontology` is either `null` or an object with `_namespace`, `_term`, `_uri`. - `_mustBeNonEmpty`, `_mustBeScalar`, `_mustNotHaveNaN`, `_queryable` are all booleans. -- For `type: "structure"`, the `_fields` key is present. +- For `type: "structure"` without `"_shape_from_minischema": true`, the `_fields` key is present. When `"_shape_from_minischema": true`, `_fields` may be omitted. + +`schemas/V_beta/profile_meta.json` is a separate JSON Schema Draft 7 file that +validates profile (minischema) files. It reuses the `ontology_object` and +`field_definition` shapes from `did_schema_meta.json` and adds profile-specific +top-level keys (`profile_name`, `profile_version`, `extends`, `profile_ontology`, +`_documentation`, `_fields`, `promoted_fields`). Profile `_constraints` objects +may carry the profile-specific keys `canonical_unit`, `canonical_unit_label`, +`descendant_of`, and `reference` in addition to standard JSON Schema validation +keywords. --- @@ -743,3 +876,9 @@ pytest 13. **Directories are stored as separate documents, not inline metadata.** A directory's file listing is stored in a manifest file attached to a `directory` document, not in the JSON metadata of the parent document. This keeps document metadata small regardless of directory size (even for directories with hundreds of thousands of files). The directory tree structure is expressed through `_depends_on` references (`parent_doc_id` and `parent_directory_id`), enabling efficient tree queries. 14. **`open_binary_file` on a directory document resolves filenames from the manifest.** When called on a directory document, `open_binary_file(doc_id, name)` resolves `name` against the manifest entries, not `_file` slots. The `manifest_file` `_file` slot is internal infrastructure and is never accessible via `open_binary_file`; use `get_directory_manifest` instead. + +15. **Minischemas (profiles) are separate from `_depends_on`.** `_depends_on` expresses data-to-data relationships — this document references those documents as peer data entities. `_minischema` expresses data-to-schema-fragment relationships — the shape of this field is defined by that profile. Conflating the two would overload `_depends_on` with a categorically different concept. The `_ref` inside `_minischema` is its own referential-integrity check during Phase 2 resolution; it is not additionally written into `_depends_on`. + +16. **`quantity` and `relative_quantity` store a canonical numeric value plus the original source.** Each document value is an object `{ : double, source_value: double, source_unit: string }` (plus `reference` for `relative_quantity`). The profile declares the canonical unit and its short label once; the canonical value field is named for its unit (e.g., `volume.nl`, `titer.gc_per_ml`, `onset.day`) so JSON is self-documenting to a human reader. `source_value` and `source_unit` are retained on every document for provenance, audit, and display. This mirrors UCUM + LOINC practice in clinical data: the profile (LOINC) fixes the canonical unit, the value is stored in that unit, the original is retained in parallel fields. + +17. **Canonical profiles are versioned artifacts; user profiles are data.** Canonical profiles ship under `schemas/V_beta/profiles/`, follow the same semver discipline as schema files, and are PR-reviewed. User-defined profiles live in a database as documents and share the same `profile_meta.json` validator but are not governed by this repository. A user profile may extend a canonical one via the `extends` key, inheriting all of its `_fields` and adding or overriding where appropriate. diff --git a/schemas/V_beta_notes.md b/schemas/V_beta_notes.md index ae80393..89767ba 100644 --- a/schemas/V_beta_notes.md +++ b/schemas/V_beta_notes.md @@ -62,3 +62,56 @@ is complete, the `V_alpha/` directory will be removed. Do not add new document types to `V_beta/` without following the naming requirements in `V_beta_SPEC.md`. + +## Treatment consolidation and the minischema mechanism + +The V_beta treatment family has been consolidated around a single generic +`treatment` document type that delegates its `manipulation` field's shape to +a profile (minischema) named on each document instance. + +### Retired schemas + +| V_beta (retired) | Replaced by | +|---------------------------|----------------------------------------------------------------| +| `treatment_drug.json` | `treatment.json` + a `drug_treatment` profile (to be authored) | +| `virus_injection.json` | `treatment.json` + `profiles/virus_injection.json` (canonical) | + +`stimulus_bath.json` was **not** retired — it depends on `element_id`, not +`subject_id`, and is a stimulus-delivery approach rather than a subject +treatment. It remains in the flat `V_beta/` directory unchanged. If the +stimulus family develops its own proliferation problem, the profile +mechanism established here is a candidate pattern for it too. + +### New mechanism summary + +- **New types in `did_schema_meta.json`:** `ontology`, `quantity`, + `relative_quantity`. See the updated type table in `V_beta_SPEC.md`. +- **New field attribute:** `_shape_from_minischema` (boolean, optional) on + `structure` fields. When `true`, the schema file omits nested `_fields` and + the field's shape is supplied at runtime by a profile referenced on each + document instance. +- **New top-level document key:** `_minischema`, mapping each profile-delegated + field name to either `{ "_ref": "" }` or an inline `_fields` + fragment. +- **New meta-schema:** `profile_meta.json` validates profile files. Canonical + profiles live under `schemas/V_beta/profiles/`. + +### Migration guidance for existing treatment documents + +Existing `virus_injection` documents (if any) migrate by: + +1. Re-classing the document as `treatment` (classname change; `_class_version` + bumps to `2.0.0`). +2. Moving field values into a `manipulation` object whose keys match the + canonical `virus_injection` profile's `_fields` names: `virus_construct`, + `serotype`, `volume`, `titer`, `onset`, and optionally `injection_rate`, + `promoter`. +3. Expanding numeric fields (`volume`, `titer`) into the `quantity` shape + with a canonical-unit label matching the profile (`nl`, `gc_per_ml`) plus + `source_value` and `source_unit` retained from the original record. +4. Adding a top-level `_minischema` key: + `{ "manipulation": { "_ref": "virus_injection" } }`. + +`treatment_drug` documents migrate similarly once a `drug_treatment` +canonical profile is authored. That is not done in this change; it is a +follow-on task tracked in `Ideas.md` / `todo.md`. diff --git a/tests/test_minischema.py b/tests/test_minischema.py new file mode 100644 index 0000000..0157d49 --- /dev/null +++ b/tests/test_minischema.py @@ -0,0 +1,361 @@ +"""Tests for the V_beta minischema/profile mechanism and new field types. + +These tests are self-contained: they resolve their own paths and do not rely +on the pre-existing fixtures infrastructure (which predates the V_beta flat +layout and is known-stale). + +Covered: +- did_schema_meta.json is a valid Draft 7 schema. +- profile_meta.json is a valid Draft 7 schema. +- The new treatment.json validates against did_schema_meta.json. +- The canonical virus_injection profile validates against profile_meta.json. +- The type enum accepts ontology, quantity, relative_quantity. +- _shape_from_minischema: true exempts a structure field from needing _fields. +- _shape_from_minischema: omitted or false still requires _fields on structure. +- Retired schemas (treatment_drug, virus_injection) are absent. +- profile_meta.json enforces canonical_unit and canonical_unit_label on quantity. +- profile_meta.json enforces reference on relative_quantity. +- profile_meta.json forbids canonical_unit on ontology type. +""" + +import json +import os + +import jsonschema +import pytest + + +REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +V_BETA = os.path.join(REPO_ROOT, "schemas", "V_beta") +PROFILES = os.path.join(V_BETA, "profiles") + + +def load(*parts): + with open(os.path.join(*parts)) as f: + return json.load(f) + + +@pytest.fixture +def did_meta(): + return load(V_BETA, "did_schema_meta.json") + + +@pytest.fixture +def profile_meta(): + return load(V_BETA, "profile_meta.json") + + +@pytest.fixture +def minimal_schema(): + """A minimal valid schema file with one char field; mutate for negative tests.""" + return { + "_classname": "mini_test", + "_class_version": "1.0.0", + "_maturity_level": "work_in_progress", + "_superclasses": [], + "_depends_on": [], + "_fields": [ + { + "_name": "note", + "type": "char", + "_blank_value": "", + "_default_value": "", + "_mustBeNonEmpty": False, + "_mustBeScalar": True, + "_mustNotHaveNaN": False, + "_queryable": False, + "_ontology": None, + "_documentation": "A note.", + "_constraints": {}, + } + ], + } + + +def _field(type_, **overrides): + """Build a minimal field definition with all required keys.""" + base = { + "_name": "f", + "type": type_, + "_blank_value": None, + "_default_value": None, + "_mustBeNonEmpty": False, + "_mustBeScalar": True, + "_mustNotHaveNaN": False, + "_queryable": False, + "_ontology": None, + "_documentation": "test", + "_constraints": {}, + } + base.update(overrides) + return base + + +class TestMetaSchemasAreValidDraft7: + def test_did_schema_meta_is_valid(self, did_meta): + jsonschema.Draft7Validator.check_schema(did_meta) + + def test_profile_meta_is_valid(self, profile_meta): + jsonschema.Draft7Validator.check_schema(profile_meta) + + +class TestTreatmentSchema: + def test_treatment_validates(self, did_meta): + treatment = load(V_BETA, "treatment.json") + jsonschema.validate(instance=treatment, schema=did_meta) + + def test_treatment_manipulation_delegates_shape(self): + treatment = load(V_BETA, "treatment.json") + manip = next( + f for f in treatment["_fields"] if f["_name"] == "manipulation" + ) + assert manip["type"] == "structure" + assert manip.get("_shape_from_minischema") is True + assert "_fields" not in manip # delegated; no inline shape + + +class TestRetiredSchemas: + def test_treatment_drug_removed(self): + assert not os.path.exists(os.path.join(V_BETA, "treatment_drug.json")) + + def test_virus_injection_removed(self): + assert not os.path.exists(os.path.join(V_BETA, "virus_injection.json")) + + def test_stimulus_bath_retained(self): + """stimulus_bath is not a treatment; it should remain untouched.""" + assert os.path.exists(os.path.join(V_BETA, "stimulus_bath.json")) + + +class TestVirusInjectionProfile: + def test_validates_against_profile_meta(self, profile_meta): + prof = load(PROFILES, "virus_injection.json") + jsonschema.validate(instance=prof, schema=profile_meta) + + def test_expected_required_fields_present(self): + prof = load(PROFILES, "virus_injection.json") + names = [f["_name"] for f in prof["_fields"]] + required_by_mustBeNonEmpty = [ + f["_name"] for f in prof["_fields"] if f["_mustBeNonEmpty"] + ] + assert "virus_construct" in required_by_mustBeNonEmpty + assert "serotype" in required_by_mustBeNonEmpty + assert "volume" in required_by_mustBeNonEmpty + assert "titer" in required_by_mustBeNonEmpty + assert "onset" in required_by_mustBeNonEmpty + assert "injection_rate" in names # optional + assert "promoter" in names # optional + + def test_volume_is_quantity_with_nl(self): + prof = load(PROFILES, "virus_injection.json") + volume = next(f for f in prof["_fields"] if f["_name"] == "volume") + assert volume["type"] == "quantity" + assert volume["_constraints"]["canonical_unit_label"] == "nl" + assert volume["_constraints"]["canonical_unit"]["_namespace"] == "uo" + + def test_titer_is_quantity_with_gc_per_ml(self): + prof = load(PROFILES, "virus_injection.json") + titer = next(f for f in prof["_fields"] if f["_name"] == "titer") + assert titer["type"] == "quantity" + assert titer["_constraints"]["canonical_unit_label"] == "gc_per_ml" + + def test_onset_is_relative_quantity_with_reference(self): + prof = load(PROFILES, "virus_injection.json") + onset = next(f for f in prof["_fields"] if f["_name"] == "onset") + assert onset["type"] == "relative_quantity" + assert onset["_constraints"]["canonical_unit_label"] == "day" + assert onset["_constraints"]["reference"] == "session_start" + + def test_serotype_has_descendant_of_constraint(self): + prof = load(PROFILES, "virus_injection.json") + serotype = next(f for f in prof["_fields"] if f["_name"] == "serotype") + assert serotype["type"] == "ontology" + assert serotype["_constraints"]["descendant_of"]["_name"] == "Parvoviridae" + + +class TestNewTypesAcceptedByMetaSchema: + """The did_schema_meta.json type enum accepts ontology, quantity, relative_quantity.""" + + def _wrap(self, field): + return { + "_classname": "wrap_test", + "_class_version": "1.0.0", + "_maturity_level": "work_in_progress", + "_superclasses": [], + "_depends_on": [], + "_fields": [field], + } + + def test_ontology_type_accepted(self, did_meta): + jsonschema.validate(self._wrap(_field("ontology")), did_meta) + + def test_quantity_type_accepted(self, did_meta): + jsonschema.validate(self._wrap(_field("quantity")), did_meta) + + def test_relative_quantity_type_accepted(self, did_meta): + jsonschema.validate(self._wrap(_field("relative_quantity")), did_meta) + + def test_nonexistent_type_rejected(self, did_meta): + with pytest.raises(jsonschema.ValidationError): + jsonschema.validate(self._wrap(_field("not_a_type")), did_meta) + + +class TestShapeFromMinischema: + """A structure field with _shape_from_minischema: true need not carry _fields.""" + + def _wrap(self, field): + return { + "_classname": "wrap_test", + "_class_version": "1.0.0", + "_maturity_level": "work_in_progress", + "_superclasses": [], + "_depends_on": [], + "_fields": [field], + } + + def test_structure_without_fields_and_without_flag_fails(self, did_meta): + f = _field("structure") + with pytest.raises(jsonschema.ValidationError): + jsonschema.validate(self._wrap(f), did_meta) + + def test_structure_with_flag_and_without_fields_passes(self, did_meta): + f = _field("structure", _shape_from_minischema=True) + jsonschema.validate(self._wrap(f), did_meta) + + def test_structure_with_fields_and_without_flag_passes(self, did_meta): + f = _field("structure", _fields=[]) + jsonschema.validate(self._wrap(f), did_meta) + + def test_flag_false_still_requires_fields(self, did_meta): + f = _field("structure", _shape_from_minischema=False) + with pytest.raises(jsonschema.ValidationError): + jsonschema.validate(self._wrap(f), did_meta) + + +class TestProfileMetaConstraintRules: + """profile_meta.json enforces type-specific _constraints requirements.""" + + def _wrap_profile(self, field): + return { + "profile_name": "test_profile", + "profile_version": "1.0.0", + "_maturity_level": "work_in_progress", + "extends": "", + "profile_ontology": { + "_namespace": "obi", + "_term": "0000412", + "_name": "test", + "_uri": None, + }, + "_documentation": "Test profile.", + "_fields": [field], + "promoted_fields": [], + } + + def test_quantity_requires_canonical_unit_and_label(self, profile_meta): + # Missing both required keys under _constraints + bad = _field("quantity", _name="x") + with pytest.raises(jsonschema.ValidationError): + jsonschema.validate(self._wrap_profile(bad), profile_meta) + + def test_quantity_with_canonical_unit_and_label_passes(self, profile_meta): + good = _field( + "quantity", + _name="x", + _constraints={ + "canonical_unit": { + "_namespace": "uo", + "_term": "0000102", + "_name": "nanoliter", + "_uri": None, + }, + "canonical_unit_label": "nl", + }, + ) + jsonschema.validate(self._wrap_profile(good), profile_meta) + + def test_relative_quantity_requires_reference(self, profile_meta): + # Has canonical_unit and label but no reference + bad = _field( + "relative_quantity", + _name="x", + _constraints={ + "canonical_unit": { + "_namespace": "uo", + "_term": "0000033", + "_name": "day", + "_uri": None, + }, + "canonical_unit_label": "day", + }, + ) + with pytest.raises(jsonschema.ValidationError): + jsonschema.validate(self._wrap_profile(bad), profile_meta) + + def test_relative_quantity_with_reference_passes(self, profile_meta): + good = _field( + "relative_quantity", + _name="x", + _constraints={ + "canonical_unit": { + "_namespace": "uo", + "_term": "0000033", + "_name": "day", + "_uri": None, + }, + "canonical_unit_label": "day", + "reference": "session_start", + }, + ) + jsonschema.validate(self._wrap_profile(good), profile_meta) + + def test_ontology_forbids_canonical_unit(self, profile_meta): + bad = _field( + "ontology", + _name="x", + _constraints={ + "canonical_unit": { + "_namespace": "uo", + "_term": "0000102", + "_name": "nanoliter", + "_uri": None, + }, + }, + ) + with pytest.raises(jsonschema.ValidationError): + jsonschema.validate(self._wrap_profile(bad), profile_meta) + + def test_ontology_with_descendant_of_passes(self, profile_meta): + good = _field( + "ontology", + _name="x", + _constraints={ + "descendant_of": { + "_namespace": "uberon", + "_term": "0000955", + "_name": "brain", + "_uri": None, + }, + }, + ) + jsonschema.validate(self._wrap_profile(good), profile_meta) + + +class TestAllVBetaSchemasStillValidate: + """Regression: all pre-existing V_beta schemas still validate against the updated meta-schema.""" + + def test_all_flat_schemas_pass(self, did_meta): + import glob + + files = sorted(glob.glob(os.path.join(V_BETA, "*.json"))) + # Exclude the meta-schemas themselves; they're not NDI schema files. + files = [ + f for f in files + if os.path.basename(f) not in {"did_schema_meta.json", "profile_meta.json"} + ] + assert len(files) > 10, "sanity check: at least 10 schema files expected" + for path in files: + data = load(path) + jsonschema.validate( + instance=data, + schema=did_meta, + )