From 3fe09fb36fe76fc38d6fbda38621bc120e2b8150 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Thu, 21 May 2026 18:04:25 -0600 Subject: [PATCH 1/3] Filter set!(model, ::MetadataSet) by per-model accepted kwargs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit set!(model, mset) previously forwarded every glossary-mapped variable to set!(model; kwargs...), which fails for real Oceananigans/ClimaSeaIce models that throw ArgumentError on unknown kwargs. A shared 4-variable MetadataSet (T, S, h, ℵ) therefore broke both the ocean's set! (rejects h, ℵ) and the sea-ice set! (rejects T, S), as seen in the docs-build failure of examples/one_degree_simulation.jl. Add `accepted_metadata_names(model)` with model-specific methods for HydrostaticFreeSurfaceModel (introspect velocities/tracers/free_surface) and SeaIceModel (only :sea_ice_thickness, :sea_ice_concentration), and filter the routed kwargs through it. The fallback (`keys(variable_glossary)`) preserves existing StubModel test behavior. Add a RestrictiveStubModel regression test in test_metadata_set.jl that mimics the real models' kwarg-strictness. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/DataWrangling/metadata.jl | 39 +++++++++++++++++-- test/test_metadata_set.jl | 70 ++++++++++++++++++++++++++++++++++- 2 files changed, 104 insertions(+), 5 deletions(-) diff --git a/src/DataWrangling/metadata.jl b/src/DataWrangling/metadata.jl index 645c27f8..7a8da502 100644 --- a/src/DataWrangling/metadata.jl +++ b/src/DataWrangling/metadata.jl @@ -503,7 +503,7 @@ Set fields of `model` from the variables in `mset`, auto-routing verbose dataset variable names to short model field-names via the global [`variable_glossary`](@ref) registry. -Variables in `mset` that have no entry in `variable_glossary` are silently +Variables in `mset` that the target `model` does not consume are silently skipped — this lets partial application across coupled-model components work naturally. For example, a single 4-variable set can drive both an ocean and a sea-ice model: @@ -515,14 +515,45 @@ mset = MetadataSet(:temperature, :salinity, set!(ocean.model, mset) # consumes :temperature, :salinity → T, S set!(sea_ice.model, mset) # consumes :sea_ice_thickness, :sea_ice_concentration → h, ℵ ``` + +The per-model filter is controlled by [`accepted_metadata_names`](@ref). The +fallback accepts every key in `variable_glossary` (preserving prior behavior +for stub/ad-hoc models in tests). """ function Fields.set!(model, mset::MetadataSet) - names = getfield(mset, :names) - known = filter(n -> haskey(variable_glossary, n), names) - kwargs = NamedTuple{Tuple(variable_glossary[n] for n in known)}(Tuple(mset[n] for n in known)) + accepted = accepted_metadata_names(model) + names = filter(n -> n in accepted, getfield(mset, :names)) + isempty(names) && return model + kwargs = NamedTuple{Tuple(variable_glossary[n] for n in names)}(Tuple(mset[n] for n in names)) return set!(model; kwargs...) end +""" + accepted_metadata_names(model) -> Tuple{Symbol,...} + +Return the verbose `MetadataSet` variable names that `set!(model, mset)` is +allowed to route to `model`. Defaults to every key in `variable_glossary`; +model-specific methods narrow this to the subset that the underlying +`set!(model; kwargs...)` actually accepts (otherwise an unrelated variable in +a shared `MetadataSet` would trigger an `ArgumentError` from the unknown kwarg). +""" +accepted_metadata_names(model) = Tuple(keys(variable_glossary)) + +# Ocean: route any glossary key whose short name matches a property of +# velocities, tracers, or free_surface (the three places HydrostaticFreeSurfaceModel's +# `set!` looks up kwargs). +using Oceananigans.Models.HydrostaticFreeSurfaceModels: HydrostaticFreeSurfaceModel +function accepted_metadata_names(model::HydrostaticFreeSurfaceModel) + short = (propertynames(model.velocities)..., + propertynames(model.tracers)..., + propertynames(model.free_surface)...) + return Tuple(n for (n, s) in variable_glossary if s in short) +end + +# Sea ice: ClimaSeaIce's `set!(::SeaIceModel; h, ℵ)` only knows these two. +using ClimaSeaIce: SeaIceModel +accepted_metadata_names(::SeaIceModel) = (:sea_ice_thickness, :sea_ice_concentration) + """ download(mset::MetadataSet; kwargs...) diff --git a/test/test_metadata_set.jl b/test/test_metadata_set.jl index 9e5d62c5..c6940e0c 100644 --- a/test/test_metadata_set.jl +++ b/test/test_metadata_set.jl @@ -1,7 +1,8 @@ include("runtests_setup.jl") using NumericalEarth.DataWrangling: MetadataSet, Metadata, Metadatum, - BoundingBox, variable_glossary, metadata_path + BoundingBox, variable_glossary, metadata_path, + accepted_metadata_names # `MetadataSet` is a pure DataWrangling concept: no downloads, no field # construction. These tests exercise construction, accessors, iteration, @@ -211,6 +212,73 @@ end @test issubset(received_keys, values(NumericalEarth.DataWrangling.variable_glossary)) end +##### +##### Per-model accepted_metadata_names filter +##### +##### A real Oceananigans/ClimaSeaIce model throws ArgumentError on unknown +##### kwargs, so the generic dispatch must filter glossary keys down to those +##### the target model can consume. This stub mimics that strict behavior. + +mutable struct RestrictiveStubModel + accepts :: Tuple{Vararg{Symbol}} # short field names this model accepts + received :: Vector{Pair{Symbol, Any}} + RestrictiveStubModel(accepts...) = new(accepts, Pair{Symbol, Any}[]) +end + +function NumericalEarth.DataWrangling.set!(m::RestrictiveStubModel; kw...) + for (k, v) in kw + k in m.accepts || throw(ArgumentError("name $k not accepted by RestrictiveStubModel")) + push!(m.received, k => v) + end + return m +end + +# Narrow the verbose-name filter so only variables routable to `m.accepts` are +# delivered — same pattern as the HydrostaticFreeSurfaceModel/SeaIceModel +# methods. +function NumericalEarth.DataWrangling.accepted_metadata_names(m::RestrictiveStubModel) + return Tuple(n for (n, s) in variable_glossary if s in m.accepts) +end + +@testset "set!(model, mset) — per-model filter (regression for issue with 4-var sets)" begin + mset = MetadataSet(:temperature, :salinity, + :sea_ice_thickness, :sea_ice_concentration; + dataset = ECCO4Monthly(), + date = snapshot_date) + + # Ocean-like model: only :T, :S + ocean_like = RestrictiveStubModel(:T, :S) + set!(ocean_like, mset) + ocean_keys = first.(ocean_like.received) + @test :T in ocean_keys + @test :S in ocean_keys + @test !(:h in ocean_keys) + @test !(:ℵ in ocean_keys) + @test length(ocean_keys) == 2 + + # Sea-ice-like model: only :h, :ℵ + sea_ice_like = RestrictiveStubModel(:h, :ℵ) + set!(sea_ice_like, mset) + ice_keys = first.(sea_ice_like.received) + @test :h in ice_keys + @test :ℵ in ice_keys + @test !(:T in ice_keys) + @test !(:S in ice_keys) + @test length(ice_keys) == 2 + + # Model that accepts nothing relevant — call is a no-op, no error. + empty_model = RestrictiveStubModel(:foo) + set!(empty_model, mset) + @test isempty(empty_model.received) +end + +@testset "accepted_metadata_names — fallback covers all glossary keys" begin + # The fallback (used by ad-hoc/stub models) is every glossary key, so a + # bare StubModel sees every routed kwarg. Real-model methods narrow this + # — exercised end-to-end in test_ocean_sea_ice_model.jl. + @test Set(accepted_metadata_names(StubModel())) == Set(keys(variable_glossary)) +end + @testset "set!(::NamedTuple, mset) — explicit per-variable" begin mset = MetadataSet(:temperature, :salinity; dataset = ECCO4Monthly(), From 43e973d84a21c5776de8df46278d0f9f334e47a8 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Thu, 21 May 2026 18:36:49 -0600 Subject: [PATCH 2/3] Replace accepted_metadata_names hook with direct set!(::Model, ::MetadataSet) dispatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous commit introduced an `accepted_metadata_names` hook that the generic `set!(model, mset)` consulted to filter routed kwargs. Refactor to plain multiple dispatch: each model type overrides `set!(::Model, ::MetadataSet)` directly. The generic fallback delivers every glossary-mapped variable (useful for permissive stub models) and real models override to filter. Why this design is better: - Idiomatic Julia — no parallel hook layer, just multiple dispatch. - Per-model logic lives next to its dispatch, not behind a predicate. - Future models can do more than just filter (transform, validate, multi-pass) without having to extend a second function. A small `_set_glossary!` helper keeps the kwargs-construction code DRY across the dispatches. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/DataWrangling/metadata.jl | 61 +++++++++++++++++------------------ test/test_metadata_set.jl | 42 ++++++++++++------------ 2 files changed, 50 insertions(+), 53 deletions(-) diff --git a/src/DataWrangling/metadata.jl b/src/DataWrangling/metadata.jl index 7a8da502..37ed4683 100644 --- a/src/DataWrangling/metadata.jl +++ b/src/DataWrangling/metadata.jl @@ -499,14 +499,16 @@ end """ set!(model, mset::MetadataSet) -Set fields of `model` from the variables in `mset`, auto-routing verbose -dataset variable names to short model field-names via the global -[`variable_glossary`](@ref) registry. +Generic fallback: route every glossary-mapped variable in `mset` to +`set!(model; kwargs...)`. Variables not in [`variable_glossary`](@ref) are +silently skipped. -Variables in `mset` that the target `model` does not consume are silently -skipped — this lets partial application across coupled-model components work -naturally. For example, a single 4-variable set can drive both an ocean and a -sea-ice model: +This fallback works for permissive models that accept arbitrary kwargs but +will fail on models (e.g. `HydrostaticFreeSurfaceModel`, `SeaIceModel`) that +throw `ArgumentError` on unknown kwargs. Those models override this method to +filter the routed kwargs to those they actually consume — see the methods +below. The overrides let a single multi-component MetadataSet drive both an +ocean and a sea-ice model: ```julia mset = MetadataSet(:temperature, :salinity, @@ -515,44 +517,41 @@ mset = MetadataSet(:temperature, :salinity, set!(ocean.model, mset) # consumes :temperature, :salinity → T, S set!(sea_ice.model, mset) # consumes :sea_ice_thickness, :sea_ice_concentration → h, ℵ ``` - -The per-model filter is controlled by [`accepted_metadata_names`](@ref). The -fallback accepts every key in `variable_glossary` (preserving prior behavior -for stub/ad-hoc models in tests). """ function Fields.set!(model, mset::MetadataSet) - accepted = accepted_metadata_names(model) - names = filter(n -> n in accepted, getfield(mset, :names)) + names = filter(n -> haskey(variable_glossary, n), getfield(mset, :names)) + return _set_glossary!(model, mset, names) +end + +# Build kwargs from `mset` restricted to verbose `names` and forward to +# `set!(model; kwargs...)`. Internal helper for the per-model overrides. +function _set_glossary!(model, mset::MetadataSet, names) isempty(names) && return model kwargs = NamedTuple{Tuple(variable_glossary[n] for n in names)}(Tuple(mset[n] for n in names)) return set!(model; kwargs...) end -""" - accepted_metadata_names(model) -> Tuple{Symbol,...} - -Return the verbose `MetadataSet` variable names that `set!(model, mset)` is -allowed to route to `model`. Defaults to every key in `variable_glossary`; -model-specific methods narrow this to the subset that the underlying -`set!(model; kwargs...)` actually accepts (otherwise an unrelated variable in -a shared `MetadataSet` would trigger an `ArgumentError` from the unknown kwarg). -""" -accepted_metadata_names(model) = Tuple(keys(variable_glossary)) - -# Ocean: route any glossary key whose short name matches a property of -# velocities, tracers, or free_surface (the three places HydrostaticFreeSurfaceModel's -# `set!` looks up kwargs). +# Ocean: route only variables whose short name appears in velocities, +# tracers, or free_surface — the three places HydrostaticFreeSurfaceModel's +# `set!` looks up kwargs. using Oceananigans.Models.HydrostaticFreeSurfaceModels: HydrostaticFreeSurfaceModel -function accepted_metadata_names(model::HydrostaticFreeSurfaceModel) +function Fields.set!(model::HydrostaticFreeSurfaceModel, mset::MetadataSet) short = (propertynames(model.velocities)..., propertynames(model.tracers)..., propertynames(model.free_surface)...) - return Tuple(n for (n, s) in variable_glossary if s in short) + names = filter(getfield(mset, :names)) do n + haskey(variable_glossary, n) && variable_glossary[n] in short + end + return _set_glossary!(model, mset, names) end -# Sea ice: ClimaSeaIce's `set!(::SeaIceModel; h, ℵ)` only knows these two. +# Sea ice: ClimaSeaIce's `set!(::SeaIceModel; h, ℵ)` only accepts these two. using ClimaSeaIce: SeaIceModel -accepted_metadata_names(::SeaIceModel) = (:sea_ice_thickness, :sea_ice_concentration) +function Fields.set!(model::SeaIceModel, mset::MetadataSet) + sea_ice_names = (:sea_ice_thickness, :sea_ice_concentration) + names = filter(in(sea_ice_names), getfield(mset, :names)) + return _set_glossary!(model, mset, names) +end """ download(mset::MetadataSet; kwargs...) diff --git a/test/test_metadata_set.jl b/test/test_metadata_set.jl index c6940e0c..93c74f42 100644 --- a/test/test_metadata_set.jl +++ b/test/test_metadata_set.jl @@ -1,8 +1,7 @@ include("runtests_setup.jl") using NumericalEarth.DataWrangling: MetadataSet, Metadata, Metadatum, - BoundingBox, variable_glossary, metadata_path, - accepted_metadata_names + BoundingBox, variable_glossary, metadata_path # `MetadataSet` is a pure DataWrangling concept: no downloads, no field # construction. These tests exercise construction, accessors, iteration, @@ -213,11 +212,13 @@ end end ##### -##### Per-model accepted_metadata_names filter +##### Per-model overrides (regression for issue with shared multi-component sets) ##### -##### A real Oceananigans/ClimaSeaIce model throws ArgumentError on unknown -##### kwargs, so the generic dispatch must filter glossary keys down to those -##### the target model can consume. This stub mimics that strict behavior. +##### Real Oceananigans/ClimaSeaIce models throw ArgumentError on unknown +##### kwargs, so they override `set!(model, ::MetadataSet)` to filter down to +##### the variables they consume. This RestrictiveStubModel mimics that +##### strictness and demonstrates the override pattern used by the real +##### dispatches in src/DataWrangling/metadata.jl. mutable struct RestrictiveStubModel accepts :: Tuple{Vararg{Symbol}} # short field names this model accepts @@ -233,20 +234,24 @@ function NumericalEarth.DataWrangling.set!(m::RestrictiveStubModel; kw...) return m end -# Narrow the verbose-name filter so only variables routable to `m.accepts` are -# delivered — same pattern as the HydrostaticFreeSurfaceModel/SeaIceModel -# methods. -function NumericalEarth.DataWrangling.accepted_metadata_names(m::RestrictiveStubModel) - return Tuple(n for (n, s) in variable_glossary if s in m.accepts) +# Override the dispatch to filter to `m.accepts` — same shape as the real +# HydrostaticFreeSurfaceModel/SeaIceModel methods. +function Oceananigans.Fields.set!(m::RestrictiveStubModel, mset::MetadataSet) + names = filter(getfield(mset, :names)) do n + haskey(variable_glossary, n) && variable_glossary[n] in m.accepts + end + isempty(names) && return m + kwargs = NamedTuple{Tuple(variable_glossary[n] for n in names)}(Tuple(mset[n] for n in names)) + return set!(m; kwargs...) end -@testset "set!(model, mset) — per-model filter (regression for issue with 4-var sets)" begin +@testset "set!(model, mset) — per-model overrides filter a shared MetadataSet" begin mset = MetadataSet(:temperature, :salinity, :sea_ice_thickness, :sea_ice_concentration; dataset = ECCO4Monthly(), date = snapshot_date) - # Ocean-like model: only :T, :S + # Ocean-like model: override filters to (:T, :S). ocean_like = RestrictiveStubModel(:T, :S) set!(ocean_like, mset) ocean_keys = first.(ocean_like.received) @@ -256,7 +261,7 @@ end @test !(:ℵ in ocean_keys) @test length(ocean_keys) == 2 - # Sea-ice-like model: only :h, :ℵ + # Sea-ice-like model: override filters to (:h, :ℵ). sea_ice_like = RestrictiveStubModel(:h, :ℵ) set!(sea_ice_like, mset) ice_keys = first.(sea_ice_like.received) @@ -266,19 +271,12 @@ end @test !(:S in ice_keys) @test length(ice_keys) == 2 - # Model that accepts nothing relevant — call is a no-op, no error. + # No matching variables → no-op, no error. empty_model = RestrictiveStubModel(:foo) set!(empty_model, mset) @test isempty(empty_model.received) end -@testset "accepted_metadata_names — fallback covers all glossary keys" begin - # The fallback (used by ad-hoc/stub models) is every glossary key, so a - # bare StubModel sees every routed kwarg. Real-model methods narrow this - # — exercised end-to-end in test_ocean_sea_ice_model.jl. - @test Set(accepted_metadata_names(StubModel())) == Set(keys(variable_glossary)) -end - @testset "set!(::NamedTuple, mset) — explicit per-variable" begin mset = MetadataSet(:temperature, :salinity; dataset = ECCO4Monthly(), From f91425afc3514cadf4122aef67a2c371da6f765a Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Thu, 21 May 2026 18:46:06 -0600 Subject: [PATCH 3/3] Collapse set! and _set_glossary! into one function with optional names arg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the private `_set_glossary!` helper with a third optional argument `names` on `Fields.set!(model, mset::MetadataSet, names=keys(variable_glossary))`. Per-model overrides simply compute their narrowed `names` and call the 3-arg generic — no separate helper, no redundant `haskey(variable_glossary, ...)` check (the default already restricts to glossary keys). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/DataWrangling/metadata.jl | 42 +++++++++++++---------------------- test/test_metadata_set.jl | 13 +++++------ 2 files changed, 20 insertions(+), 35 deletions(-) diff --git a/src/DataWrangling/metadata.jl b/src/DataWrangling/metadata.jl index 37ed4683..c77ef783 100644 --- a/src/DataWrangling/metadata.jl +++ b/src/DataWrangling/metadata.jl @@ -497,18 +497,16 @@ function Fields.set!(fields::NamedTuple, mset::MetadataSet) end """ - set!(model, mset::MetadataSet) + set!(model, mset::MetadataSet, names=keys(variable_glossary)) -Generic fallback: route every glossary-mapped variable in `mset` to -`set!(model; kwargs...)`. Variables not in [`variable_glossary`](@ref) are -silently skipped. +Route variables from `mset` to `set!(model; kwargs...)`, translating verbose +dataset names to short model field-names via [`variable_glossary`](@ref). +Only the intersection of `names` and `mset.names` is forwarded. -This fallback works for permissive models that accept arbitrary kwargs but -will fail on models (e.g. `HydrostaticFreeSurfaceModel`, `SeaIceModel`) that -throw `ArgumentError` on unknown kwargs. Those models override this method to -filter the routed kwargs to those they actually consume — see the methods -below. The overrides let a single multi-component MetadataSet drive both an -ocean and a sea-ice model: +The default `names` is every glossary key — fine for permissive models. Models +that throw on unknown kwargs (`HydrostaticFreeSurfaceModel`, `SeaIceModel`) +override the 2-argument form to pass a narrower `names`, letting a single +multi-component MetadataSet drive both an ocean and a sea-ice model: ```julia mset = MetadataSet(:temperature, :salinity, @@ -518,16 +516,10 @@ set!(ocean.model, mset) # consumes :temperature, :salinity → T, S set!(sea_ice.model, mset) # consumes :sea_ice_thickness, :sea_ice_concentration → h, ℵ ``` """ -function Fields.set!(model, mset::MetadataSet) - names = filter(n -> haskey(variable_glossary, n), getfield(mset, :names)) - return _set_glossary!(model, mset, names) -end - -# Build kwargs from `mset` restricted to verbose `names` and forward to -# `set!(model; kwargs...)`. Internal helper for the per-model overrides. -function _set_glossary!(model, mset::MetadataSet, names) - isempty(names) && return model - kwargs = NamedTuple{Tuple(variable_glossary[n] for n in names)}(Tuple(mset[n] for n in names)) +function Fields.set!(model, mset::MetadataSet, names=keys(variable_glossary)) + routed = filter(in(names), getfield(mset, :names)) + isempty(routed) && return model + kwargs = NamedTuple{Tuple(variable_glossary[n] for n in routed)}(Tuple(mset[n] for n in routed)) return set!(model; kwargs...) end @@ -539,18 +531,14 @@ function Fields.set!(model::HydrostaticFreeSurfaceModel, mset::MetadataSet) short = (propertynames(model.velocities)..., propertynames(model.tracers)..., propertynames(model.free_surface)...) - names = filter(getfield(mset, :names)) do n - haskey(variable_glossary, n) && variable_glossary[n] in short - end - return _set_glossary!(model, mset, names) + names = Tuple(n for (n, s) in variable_glossary if s in short) + return set!(model, mset, names) end # Sea ice: ClimaSeaIce's `set!(::SeaIceModel; h, ℵ)` only accepts these two. using ClimaSeaIce: SeaIceModel function Fields.set!(model::SeaIceModel, mset::MetadataSet) - sea_ice_names = (:sea_ice_thickness, :sea_ice_concentration) - names = filter(in(sea_ice_names), getfield(mset, :names)) - return _set_glossary!(model, mset, names) + return set!(model, mset, (:sea_ice_thickness, :sea_ice_concentration)) end """ diff --git a/test/test_metadata_set.jl b/test/test_metadata_set.jl index 93c74f42..32c085d5 100644 --- a/test/test_metadata_set.jl +++ b/test/test_metadata_set.jl @@ -234,15 +234,12 @@ function NumericalEarth.DataWrangling.set!(m::RestrictiveStubModel; kw...) return m end -# Override the dispatch to filter to `m.accepts` — same shape as the real -# HydrostaticFreeSurfaceModel/SeaIceModel methods. +# Override the 2-arg dispatch to pass a narrowed `names` to the generic +# 3-arg form — same shape as the real HydrostaticFreeSurfaceModel/SeaIceModel +# methods. function Oceananigans.Fields.set!(m::RestrictiveStubModel, mset::MetadataSet) - names = filter(getfield(mset, :names)) do n - haskey(variable_glossary, n) && variable_glossary[n] in m.accepts - end - isempty(names) && return m - kwargs = NamedTuple{Tuple(variable_glossary[n] for n in names)}(Tuple(mset[n] for n in names)) - return set!(m; kwargs...) + names = Tuple(n for (n, s) in variable_glossary if s in m.accepts) + return set!(m, mset, names) end @testset "set!(model, mset) — per-model overrides filter a shared MetadataSet" begin