diff --git a/Project.toml b/Project.toml
index 98a0ba09..dbbf6d63 100644
--- a/Project.toml
+++ b/Project.toml
@@ -31,6 +31,7 @@ SeawaterPolynomials = "d496a93d-167e-4197-9f49-d3af4ff8fe40"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Thermodynamics = "b60c26fb-14c3-4610-9d3e-2d17fe7ff00c"
+TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
[weakdeps]
@@ -90,6 +91,7 @@ SpeedyWeather = "0.20"
StaticArrays = "1"
Statistics = "<0.0.1, 1"
Thermodynamics = "0.15.3"
+TOML = "<0.0.1, 1"
WorldOceanAtlasTools = "0.6"
ZipFile = "0.10"
julia = "1.10"
diff --git a/src/Bathymetry/Bathymetry.jl b/src/Bathymetry/Bathymetry.jl
index c44d4de0..0cb7222a 100644
--- a/src/Bathymetry/Bathymetry.jl
+++ b/src/Bathymetry/Bathymetry.jl
@@ -22,7 +22,7 @@ using Printf: Printf
using Scratch: Scratch, @get_scratch!
using ..DataWrangling: Metadatum, native_grid, metadata_path,
- dataset_variable_name, validate_dataset_coverage
+ dataset_variable_name, validate_dataset_coverage, download_dataset
using ..DataWrangling.ETOPO: ETOPO2022
include("regrid_bathymetry.jl")
diff --git a/src/Bathymetry/regrid_bathymetry.jl b/src/Bathymetry/regrid_bathymetry.jl
index 8e7a8195..daf6ae88 100644
--- a/src/Bathymetry/regrid_bathymetry.jl
+++ b/src/Bathymetry/regrid_bathymetry.jl
@@ -203,7 +203,7 @@ function regrid_bathymetry(target_grid, metadata;
end
end
- download(metadata)
+ download_dataset(metadata)
target_z = _regrid_bathymetry(target_grid, metadata;
height_above_water,
@@ -302,7 +302,7 @@ function regrid_bathymetry(target_grid::DistributedGrid, metadata;
interpolation_passes, major_basins)
# download uses @root internally; all ranks must call it
- download(metadata)
+ download_dataset(metadata)
# Only rank 0 performs cache lookup and computation to avoid OOM
bottom_height = if arch.local_rank == 0
diff --git a/src/DataWrangling/DataModes/DataModes.jl b/src/DataWrangling/DataModes/DataModes.jl
new file mode 100644
index 00000000..351f296c
--- /dev/null
+++ b/src/DataWrangling/DataModes/DataModes.jl
@@ -0,0 +1,91 @@
+"""
+ DataModes
+
+Three-mode download dispatch and a declarative `NumericalEarthDataManifest.toml` for NumericalEarth.
+Modes are selected by the `NUMERICALEARTH_DATA` environment variable:
+
+| Value | Behavior |
+|-------------------|----------------------------------------------------------------------------|
+| `"auto"` (default)| Download on demand (current behavior). |
+| `"strict"` | Error if any required file is missing. Never download. |
+| `"pregenerate"` | Trace the running script; write the manifest to `pwd()`. |
+| `"pregenerate:
"` | Same as `"pregenerate"` but write to `/NumericalEarthDataManifest.toml`. |
+
+The filename is fixed (`NumericalEarthDataManifest.toml`) so manifests don't collide with Pkg's
+`Project.toml` / `Manifest.toml` and there is one canonical manifest per directory.
+
+See [`NumericalEarth.DataWrangling.download_dataset`](@ref) for the dispatch and
+[`pregenerate_dataset_manifest`](@ref) for the trace entry point.
+"""
+module DataModes
+
+using DocStringExtensions: TYPEDSIGNATURES
+using TOML: TOML
+
+using ..DataWrangling: DataWrangling, AbstractMetadata, Metadata, Metadatum, MetadataSet, BoundingBox, Column, Linear, Nearest
+using ..DataWrangling: DatewiseFilename, metadata_path, default_download_directory, download_dataset
+
+export DryRunValue
+export pregenerate_dataset_manifest, download_datasets
+export register_dataset!
+
+const DATA_MODE = Ref{Symbol}(:auto)
+
+include("dry_run_value.jl")
+include("data_manifest_wrangling.jl")
+include("parse_and_rewrite_script.jl")
+
+DataWrangling.observe_metadata(m::Metadata) = (DATA_MODE[] === :pregenerate && record_for_manifest(m); nothing)
+DataWrangling.observe_metadata(m::MetadataSet) = (DATA_MODE[] === :pregenerate && record_for_manifest(m); nothing)
+
+"""
+ $(TYPEDSIGNATURES)
+
+Acquire every dataset listed in `metadata...` (varargs form) or in the manifest at
+`joinpath(dir, "NumericalEarthDataManifest.toml")` (zero-arg form). Each entry is routed through
+[`download_dataset`](@ref), so the current `NUMERICALEARTH_DATA` mode applies.
+
+For the manifest form, `dir` is the directory containing the manifest (defaults to `pwd()`). Pass
+`download_dir` to override the default download directory for every reconstructed entry (e.g. when
+login-node and compute-node filesystems differ).
+"""
+function download_datasets(metadata::AbstractMetadata...)
+ foreach(download_dataset, metadata)
+ return nothing
+end
+
+function download_datasets(; dir::AbstractString = pwd(), download_dir = nothing)
+ foreach(download_dataset, read_manifest(; dir, download_dir))
+ return nothing
+end
+
+function expected_paths(m::AbstractMetadata)
+ m isa MetadataSet &&
+ return reduce(vcat, expected_paths(m[n]) for n in m.names; init = String[])
+ p = metadata_path(m)
+ return p isa AbstractVector ? collect(String, p) : String[p]
+end
+
+"""
+ $(TYPEDSIGNATURES)
+
+Verify that every file required by `metadata` is already on disk. Raises a single error listing
+every missing file. Returns `nothing` on success.
+"""
+function check_files_exist(metadata::AbstractMetadata)
+ paths = expected_paths(metadata)
+ missing_paths = filter(p -> !isfile(p), paths)
+ isempty(missing_paths) && return nothing
+ list = join((" " * p for p in missing_paths), "\n")
+ error("NUMERICALEARTH_DATA=strict: $(length(missing_paths)) required file(s) missing:\n$list")
+end
+
+function __init__()
+ env = get(ENV, "NUMERICALEARTH_DATA", "auto")
+ mode, dir_from_env = parse_data_mode(env)
+ DATA_MODE[] = mode
+ MANIFEST_DIR[] = isempty(dir_from_env) ? pwd() : abspath(dir_from_env)
+ return nothing
+end
+
+end # module
diff --git a/src/DataWrangling/DataModes/data_manifest_wrangling.jl b/src/DataWrangling/DataModes/data_manifest_wrangling.jl
new file mode 100644
index 00000000..1575424a
--- /dev/null
+++ b/src/DataWrangling/DataModes/data_manifest_wrangling.jl
@@ -0,0 +1,319 @@
+const MANIFEST_FILENAME = "NumericalEarthDataManifest.toml"
+const MANIFEST_DIR = Ref{String}("")
+const RECORDED = AbstractMetadata[]
+const DATASET_REGISTRY = Dict{String, Any}()
+
+"""
+ $(TYPEDSIGNATURES)
+
+Return the absolute path to the data manifest inside `dir`. The basename is fixed
+(`NumericalEarthDataManifest.toml`) to avoid name collisions with Pkg's `Project.toml` /
+`Manifest.toml` and similar Julia-ecosystem files, and to give one canonical manifest per
+directory — analogous to how each project directory has one `Project.toml`.
+"""
+manifest_path_in(dir::AbstractString) = joinpath(abspath(dir), MANIFEST_FILENAME)
+
+"""
+ $(TYPEDSIGNATURES)
+
+Parse a `NUMERICALEARTH_DATA` value into a `(mode, dir)` tuple. `dir` is the directory the manifest
+will be written to / read from; the filename is always `NumericalEarthDataManifest.toml`.
+
+Recognized values:
+- `""` or `"auto"` → `(:auto, "")`
+- `"strict"` → `(:strict, "")`
+- `"pregenerate"` → `(:pregenerate, "")` — writes to the cwd at trace time
+- `"pregenerate:"` → `(:pregenerate, "")` — writes to `/NumericalEarthDataManifest.toml`
+
+Throws `ArgumentError` on any other value.
+"""
+function parse_data_mode(s::AbstractString)
+ (isempty(s) || s == "auto") && return (:auto, "")
+ s == "strict" && return (:strict, "")
+ s == "pregenerate" && return (:pregenerate, "")
+ if startswith(s, "pregenerate:")
+ dir = s[length("pregenerate:")+1:end]
+ isempty(dir) && throw(ArgumentError("`NUMERICALEARTH_DATA=pregenerate:` requires a non-empty directory"))
+ return (:pregenerate, dir)
+ end
+ throw(ArgumentError("Unrecognized NUMERICALEARTH_DATA value: $(repr(s)). Expected \"auto\", \"strict\", \"pregenerate\", or \"pregenerate:\"."))
+end
+
+"""
+ $(TYPEDSIGNATURES)
+
+Record `metadata` into [`RECORDED`](@ref) for later serialization to a
+`NumericalEarthDataManifest.toml`. Deduplication here is by `metadata` fieldwise `==` (which
+includes `dir`); [`write_manifest`](@ref) does a second, canonical dedup by serialized-dict
+equality (which doesn't). Returns `nothing`.
+"""
+function record_for_manifest(metadata::AbstractMetadata)
+ any(==(metadata), RECORDED) || push!(RECORDED, metadata)
+ return nothing
+end
+
+"""
+ $(TYPEDSIGNATURES)
+
+Register a dataset constructor under a string name so that the manifest writer can serialize it
+(`dataset = "Name"`) and the loader can reconstruct it via `DATASET_REGISTRY[name]()`. Idempotent.
+"""
+function register_dataset!(constructor, name::AbstractString)
+ DATASET_REGISTRY[String(name)] = constructor
+ return nothing
+end
+
+function dataset_name(d)
+ T = typeof(d)
+ for (name, ctor) in DATASET_REGISTRY
+ ctor === T && return name
+ end
+ return string(nameof(T))
+end
+
+region_to_dict(::Nothing) = nothing
+
+function region_to_dict(bb::BoundingBox)
+ d = Dict{String, Any}("kind" => "BoundingBox")
+ bb.longitude === nothing || (d["longitude"] = collect(bb.longitude))
+ bb.latitude === nothing || (d["latitude"] = collect(bb.latitude))
+ bb.z === nothing || (d["z"] = collect(bb.z))
+ return d
+end
+
+function region_to_dict(col::Column)
+ interp = col.interpolation isa Nearest ? "Nearest" : "Linear"
+ d = Dict{String, Any}("kind" => "Column", "longitude" => col.longitude, "latitude" => col.latitude, "interpolation" => interp)
+ col.z === nothing || (d["z"] = collect(col.z))
+ return d
+end
+
+filename_to_toml(::Nothing) = nothing
+filename_to_toml(s::AbstractString) = String(s)
+filename_to_toml(f::DatewiseFilename) = collect(String, f.filenames)
+
+# Single source of truth for the TOML schema. Read [`from_toml`](@ref) for the inverse —
+# the two functions must stay symmetric, and keeping them adjacent + linear makes drift visible.
+function metadata_to_dict(m::AbstractMetadata)
+ d = Dict{String, Any}()
+ if m isa MetadataSet
+ d["variable_names"] = [String(n) for n in m.names]
+ else
+ d["variable_name"] = String(m.name)
+ end
+ if m.dates isa AbstractVector
+ d["start_date"] = first(m.dates)
+ d["end_date"] = last(m.dates)
+ elseif m.dates !== nothing
+ d["date"] = m.dates
+ end
+ m.region === nothing || (d["region"] = region_to_dict(m.region))
+ if !(m isa MetadataSet)
+ m.filename === nothing || (d["filename"] = filename_to_toml(m.filename))
+ end
+ return d
+end
+
+"""
+ $(TYPEDSIGNATURES)
+
+Serialize `records` (a vector of `AbstractMetadata`) to `io` as a `NumericalEarthDataManifest.toml`,
+with one table array per dataset:
+
+```toml
+[[ETOPO2022]]
+variable_name = "bathymetry"
+
+[[JRA55RepeatYear]]
+variable_names = ["eastward_wind", "northward_wind", ...]
+start_date = "1990-01-01T00:00:00"
+end_date = "1990-12-31T18:00:00"
+
+[[GLORYSDaily]]
+variable_name = "temperature"
+date = "2020-06-15T00:00:00"
+```
+
+The download directory (`dir`) is not stored. The loader uses each dataset's default directory
+unless overridden by `download_datasets(; dir=...)`.
+"""
+function write_manifest(io::IO, records::AbstractVector)
+ grouped = Dict{String, Vector{Dict{String, Any}}}()
+ for r in records
+ entries = get!(() -> Dict{String, Any}[], grouped, dataset_name(r.dataset))
+ d = metadata_to_dict(r)
+ any(==(d), entries) || push!(entries, d)
+ end
+ TOML.print(io, grouped; sorted = true)
+ return nothing
+end
+
+function write_manifest(path::AbstractString, records::AbstractVector)
+ open(io -> write_manifest(io, records), path, "w")
+ return nothing
+end
+
+#####
+##### filename and `region` reconstruction
+#####
+
+region_from_toml(::Nothing) = nothing
+
+function region_from_toml(d::AbstractDict)
+ kind = d["kind"]
+ if kind == "BoundingBox"
+ longitude = haskey(d, "longitude") ? Tuple(d["longitude"]) : nothing
+ latitude = haskey(d, "latitude") ? Tuple(d["latitude"]) : nothing
+ z = haskey(d, "z") ? Tuple(d["z"]) : nothing
+ return BoundingBox(; longitude, latitude, z)
+ elseif kind == "Column"
+ z = haskey(d, "z") ? Tuple(d["z"]) : nothing
+ interpolation = get(d, "interpolation", "Linear") == "Nearest" ? Nearest() : Linear()
+ return Column(d["longitude"], d["latitude"]; z, interpolation)
+ else
+ throw(ArgumentError("Unknown region kind: $(repr(kind))"))
+ end
+end
+
+filename_from_toml(::Nothing) = nothing
+filename_from_toml(s::AbstractString) = String(s)
+filename_from_toml(v::AbstractVector) = DatewiseFilename(collect(String, v))
+
+function lookup_dataset(name::AbstractString)
+ haskey(DATASET_REGISTRY, name) ||
+ throw(ArgumentError("Unknown dataset $(repr(name)). Did you `using` the dataset module so its __init__ runs and registers it?"))
+ return Base.invokelatest(DATASET_REGISTRY[name])
+end
+
+#####
+##### AbstractMetadata reconstruction
+#####
+
+function from_toml(name::AbstractString, entry::AbstractDict; download_dir = nothing)
+ dataset = lookup_dataset(name)
+ region = region_from_toml(get(entry, "region", nothing))
+ filename = filename_from_toml(get(entry, "filename", nothing))
+ dir = download_dir === nothing ? default_download_directory(dataset) : String(download_dir)
+ if haskey(entry, "variable_names")
+ variable_names = Tuple(Symbol(n) for n in entry["variable_names"])
+ haskey(entry, "date") &&
+ return MetadataSet(variable_names...; dataset, region, dir, date = entry["date"])
+ return MetadataSet(variable_names...; dataset, region, dir,
+ start_date = entry["start_date"], end_date = entry["end_date"])
+ end
+ variable_name = Symbol(entry["variable_name"])
+ haskey(entry, "start_date") &&
+ return Metadata(variable_name; dataset, region, filename, dir,
+ start_date = entry["start_date"], end_date = entry["end_date"])
+ return Metadatum(variable_name; dataset, region, filename, dir,
+ date = get(entry, "date", nothing))
+end
+
+"""
+ $(TYPEDSIGNATURES)
+
+Read the manifest at `joinpath(dir, "NumericalEarthDataManifest.toml")` and reconstruct every
+record as the matching `Metadatum`/`Metadata`/`MetadataSet`. Datasets are looked up by name in
+[`DATASET_REGISTRY`](@ref).
+
+Pass `download_dir` to override every reconstructed record's download directory (useful when
+login-node and compute-node filesystems differ); otherwise `default_download_directory(dataset)`
+is used.
+"""
+function read_manifest(; dir::AbstractString = pwd(), download_dir = nothing)
+ raw = TOML.parsefile(manifest_path_in(dir))
+ return manifest_from_dict(raw; download_dir)
+end
+
+read_manifest(io::IO; download_dir = nothing) = manifest_from_dict(TOML.parse(read(io, String)); download_dir)
+
+"""
+ $(TYPEDSIGNATURES)
+
+Reconstruct every entry in a parsed manifest dict. Entries whose `dataset` key isn't currently in
+[`DATASET_REGISTRY`](@ref) are skipped with a single grouped warning, so a manifest containing
+records from a dataset module the current session hasn't loaded (e.g. `JRA55` when running a
+`Bathymetry`-only script) doesn't abort the read.
+"""
+function manifest_from_dict(raw::AbstractDict; download_dir = nothing)
+ records = AbstractMetadata[]
+ unknown = String[]
+ for (name, entries) in raw
+ if !haskey(DATASET_REGISTRY, name)
+ push!(unknown, name)
+ continue
+ end
+ for entry in entries
+ push!(records, Base.invokelatest(from_toml, name, entry; download_dir))
+ end
+ end
+ isempty(unknown) || @warn "Skipping manifest entries for unregistered datasets; load the relevant dataset modules to include them" datasets=sort(unknown)
+ return records
+end
+
+"""
+ $(TYPEDSIGNATURES)
+
+Trace `script` in build-mode and write the resulting manifest to
+`joinpath(dir, "NumericalEarthDataManifest.toml")`.
+
+The script's source is parsed with `Meta.parseall`, every statement is wrapped in a per-statement
+`try`/`catch` that rebinds failed assignments to [`DryRunValue`](@ref), and the rewritten code is
+evaluated in a fresh sandbox module with `DATA_MODE[] = :pregenerate`. Each [`download_dataset`](@ref) call
+records its metadata into [`RECORDED`](@ref) instead of downloading. The accumulated records are
+then serialized via [`write_manifest`](@ref).
+
+When `overwrite_existing = false` and a manifest already exists at `dir`, the existing records are
+read first and merged (deduplicated) with the newly recorded ones, so this call appends rather
+than replaces. Defaults to `true` (replace).
+
+`quiet = true` (the default) swallows everything the traced script writes to stdout/stderr — most
+of which is noise (test-failure summaries, NetCDF "file not found" warnings, library `@warn`s)
+because pregenerate mode deliberately skips the downloads those tests depend on. Pass
+`quiet = false` to see all of it, e.g. when debugging an unexpected trace failure.
+"""
+function pregenerate_dataset_manifest(script::AbstractString;
+ dir::AbstractString = pwd(),
+ overwrite_existing::Bool = true,
+ quiet::Bool = true)
+ script_abs = abspath(script)
+ source = read(script_abs, String)
+ parsed = Meta.parseall(source; filename = script_abs)
+ basedir = dirname(script_abs)
+ rewritten = Expr(:toplevel, [rewrite_statement(a, basedir) for a in parsed.args]...)
+
+ saved_mode = DATA_MODE[]
+ saved_records = copy(RECORDED)
+ empty!(RECORDED)
+ DATA_MODE[] = :pregenerate
+
+ new_records = AbstractMetadata[]
+ try
+ sandbox = Module(:DataModesSandbox)
+ Core.eval(sandbox, :(eval(x) = Core.eval($sandbox, x)))
+ Core.eval(sandbox, :(include(p) = Base.include($sandbox, p)))
+ if quiet
+ redirect_stdout(devnull) do
+ redirect_stderr(devnull) do
+ Core.eval(sandbox, rewritten)
+ end
+ end
+ else
+ Core.eval(sandbox, rewritten)
+ end
+ new_records = copy(RECORDED)
+ finally
+ DATA_MODE[] = saved_mode
+ empty!(RECORDED)
+ append!(RECORDED, saved_records)
+ end
+
+ manifest = manifest_path_in(dir)
+ if !overwrite_existing && isfile(manifest)
+ for r in read_manifest(; dir)
+ any(==(r), new_records) || pushfirst!(new_records, r)
+ end
+ end
+ write_manifest(manifest, new_records)
+ return manifest
+end
\ No newline at end of file
diff --git a/src/DataWrangling/DataModes/dry_run_value.jl b/src/DataWrangling/DataModes/dry_run_value.jl
new file mode 100644
index 00000000..3995cef9
--- /dev/null
+++ b/src/DataWrangling/DataModes/dry_run_value.jl
@@ -0,0 +1,87 @@
+"""
+ DryRunValue()
+
+Sentinel returned when a statement under [`pregenerate_dataset_manifest`](@ref) tracing either errors or
+stands in for a value that real data would have produced. The per-statement `try`/`catch` wrappers
+rebind any failed assignment to a `DryRunValue`, so the script continues running and downstream
+`download_dataset` calls still register their metadata.
+
+To maximise script reach without touching `src` outside this module, `DryRunValue` absorbs almost
+every common operation — call, property access, indexing, iteration, broadcasting, arithmetic, and
+comparison all return another `DryRunValue`. Operations that fall outside this set still throw and
+are caught by the surrounding per-statement wrapper.
+"""
+struct DryRunValue end
+
+Base.show(io::IO, ::DryRunValue) = print(io, "DryRunValue()")
+Base.print(io::IO, ::DryRunValue) = print(io, "DryRunValue()")
+Base.string(::DryRunValue) = "DryRunValue()"
+
+Base.getproperty(::DryRunValue, ::Symbol) = DryRunValue()
+Base.setproperty!(::DryRunValue, ::Symbol, _) = DryRunValue()
+Base.propertynames(::DryRunValue, ::Bool = false) = ()
+Base.hasproperty(::DryRunValue, ::Symbol) = true
+
+(::DryRunValue)(args...; kwargs...) = DryRunValue()
+
+Base.length(::DryRunValue) = 0
+Base.size(::DryRunValue) = ()
+Base.size(::DryRunValue, ::Int) = 0
+Base.axes(::DryRunValue) = ()
+Base.axes(::DryRunValue, ::Int) = Base.OneTo(0)
+Base.eltype(::Type{DryRunValue}) = DryRunValue
+Base.ndims(::DryRunValue) = 0
+Base.ndims(::Type{DryRunValue}) = 0
+Base.isempty(::DryRunValue) = true
+Base.firstindex(::DryRunValue) = 1
+Base.lastindex(::DryRunValue) = 0
+Base.keys(::DryRunValue) = ()
+Base.values(::DryRunValue) = ()
+Base.pairs(::DryRunValue) = ()
+
+Base.iterate(::DryRunValue, state = nothing) = nothing
+Base.IteratorSize(::Type{DryRunValue}) = Base.HasShape{0}()
+Base.IteratorEltype(::Type{DryRunValue}) = Base.HasEltype()
+
+Base.broadcastable(::DryRunValue) = Ref(DryRunValue())
+
+Base.getindex(::DryRunValue, args...) = DryRunValue()
+Base.setindex!(::DryRunValue, args...) = DryRunValue()
+Base.view(::DryRunValue, args...) = DryRunValue()
+
+Base.adjoint(::DryRunValue) = DryRunValue()
+Base.transpose(::DryRunValue) = DryRunValue()
+Base.collect(::DryRunValue) = DryRunValue()
+Base.copy(::DryRunValue) = DryRunValue()
+Base.deepcopy(::DryRunValue) = DryRunValue()
+Base.similar(::DryRunValue, args...) = DryRunValue()
+
+Base.convert(::Type{DryRunValue}, ::DryRunValue) = DryRunValue()
+Base.promote_rule(::Type{DryRunValue}, ::Type) = DryRunValue
+Base.promote_rule(::Type, ::Type{DryRunValue}) = DryRunValue
+
+Base.hash(::DryRunValue, h::UInt) = hash(DryRunValue, h)
+Base.:(==)(::DryRunValue, ::DryRunValue) = true
+Base.isequal(::DryRunValue, ::DryRunValue) = true
+
+for op in (:+, :-, :*, :/, :\, :^, :%, :÷, :&, :|, :⊻, :>>, :<<, :>>>,
+ :<, :>, :<=, :>=, :min, :max)
+ @eval Base.$op(::DryRunValue, ::Any) = DryRunValue()
+ @eval Base.$op(::Any, ::DryRunValue) = DryRunValue()
+ @eval Base.$op(::DryRunValue, ::DryRunValue) = DryRunValue()
+end
+
+for op in (:-, :+, :abs, :abs2, :sqrt, :cbrt, :exp, :exp2, :exp10, :expm1,
+ :log, :log2, :log10, :log1p, :sin, :cos, :tan, :asin, :acos, :atan,
+ :sinh, :cosh, :tanh, :floor, :ceil, :round, :real, :imag, :conj,
+ :inv, :sign, :signbit, :one, :zero, :oneunit, :isnan, :isinf, :isfinite,
+ :iszero, :isone, :isreal, :isinteger)
+ @eval Base.$op(::DryRunValue) = DryRunValue()
+end
+
+Base.:(:)(::DryRunValue, ::Any) = DryRunValue()
+Base.:(:)(::Any, ::DryRunValue) = DryRunValue()
+Base.:(:)(::DryRunValue, ::DryRunValue) = DryRunValue()
+Base.:(:)(::DryRunValue, ::Any, ::Any) = DryRunValue()
+Base.:(:)(::Any, ::DryRunValue, ::Any) = DryRunValue()
+Base.:(:)(::Any, ::Any, ::DryRunValue) = DryRunValue()
diff --git a/src/DataWrangling/DataModes/parse_and_rewrite_script.jl b/src/DataWrangling/DataModes/parse_and_rewrite_script.jl
new file mode 100644
index 00000000..8b38b933
--- /dev/null
+++ b/src/DataWrangling/DataModes/parse_and_rewrite_script.jl
@@ -0,0 +1,121 @@
+const PASSTHROUGH_HEADS = (:using, :import, :export, :module, :struct, :abstract, :primitive, :macro, :macrocall, :const)
+
+function is_include_call(s)
+ s isa Expr && s.head === :call && !isempty(s.args) || return false
+ f = s.args[1]
+ f === :include && return true
+ if f isa Expr && f.head === :. && length(f.args) == 2
+ f.args[2] === QuoteNode(:include) && return true
+ end
+ return false
+end
+
+function is_function_def(s)
+ s isa Expr || return false
+ s.head === :function && return true
+ if s.head === :(=) && length(s.args) == 2
+ lhs = s.args[1]
+ lhs isa Expr || return false
+ lhs.head === :call && return true
+ lhs.head === :where && return is_function_def(Expr(:(=), lhs.args[1], s.args[2]))
+ lhs.head === :(::) && lhs.args[1] isa Expr && lhs.args[1].head === :call && return true
+ end
+ return false
+end
+
+function wrap_assignment(lhs, rhs)
+ if lhs isa Symbol
+ return Expr(:(=), lhs, :(try; $rhs; catch; $DryRunValue(); end))
+ elseif lhs isa Expr && lhs.head === :tuple
+ fallback = Expr(:tuple, fill(:($DryRunValue()), length(lhs.args))...)
+ return Expr(:(=), lhs, :(try; $rhs; catch; $fallback; end))
+ end
+ return :(try; $lhs = $rhs; catch; end)
+end
+
+wrap_return(args::Vector) = Expr(:try, Expr(:return, args...), false, Expr(:return, :($DryRunValue())))
+wrap_bare(expr) = Expr(:try, expr, false, Expr(:block))
+
+function rewrite_block(body, basedir::AbstractString)
+ if body isa Expr && (body.head === :block || body.head === :toplevel)
+ return Expr(body.head, [rewrite_statement(a, basedir) for a in body.args]...)
+ else
+ return rewrite_statement(body, basedir)
+ end
+end
+
+function rewrite_if(s, basedir::AbstractString)
+ new_then = rewrite_block(s.args[2], basedir)
+ if length(s.args) >= 3
+ else_branch = s.args[3]
+ new_else = (else_branch isa Expr && (else_branch.head === :elseif || else_branch.head === :if)) ?
+ rewrite_if(else_branch, basedir) : rewrite_block(else_branch, basedir)
+ return Expr(s.head, s.args[1], new_then, new_else)
+ end
+ return Expr(s.head, s.args[1], new_then)
+end
+
+function rewrite_function_body(s, basedir::AbstractString)
+ if s.head === :function
+ return Expr(:function, s.args[1], rewrite_block(s.args[2], basedir))
+ elseif s.head === :(=)
+ return Expr(:(=), s.args[1], rewrite_block(s.args[2], basedir))
+ elseif s.head === :(->)
+ return Expr(:(->), s.args[1], rewrite_block(s.args[2], basedir))
+ elseif s.head === :do
+ anon = s.args[2]
+ new_anon = Expr(:(->), anon.args[1], rewrite_block(anon.args[2], basedir))
+ return Expr(:do, s.args[1], new_anon)
+ end
+ return s
+end
+
+function inline_include(s, basedir::AbstractString)
+ path_arg = s.args[2]
+ path_arg isa AbstractString || return wrap_bare(s)
+ full_path = isabspath(path_arg) ? path_arg : joinpath(basedir, path_arg)
+ isfile(full_path) || return wrap_bare(s)
+ inner_source = read(full_path, String)
+ inner_parsed = Meta.parseall(inner_source; filename = full_path)
+ inner_basedir = dirname(abspath(full_path))
+ return Expr(:toplevel, [rewrite_statement(a, inner_basedir) for a in inner_parsed.args]...)
+end
+
+function rewrite_statement(s, basedir::AbstractString)
+ s isa LineNumberNode && return s
+ s isa Expr || return wrap_bare(s)
+
+ h = s.head
+
+ h in PASSTHROUGH_HEADS && return s
+
+ is_include_call(s) && return inline_include(s, basedir)
+
+ if is_function_def(s) || h === :(->)
+ return rewrite_function_body(s, basedir)
+ end
+
+ h === :do && return wrap_bare(rewrite_function_body(s, basedir))
+
+ if h === :(=)
+ lhs, rhs = s.args
+ return wrap_assignment(lhs, rhs)
+ end
+
+ h === :return && return wrap_return(s.args)
+
+ if h === :for || h === :while || h === :let
+ new_body = rewrite_block(s.args[2], basedir)
+ return wrap_bare(Expr(h, s.args[1], new_body))
+ end
+
+ (h === :if || h === :elseif) && return wrap_bare(rewrite_if(s, basedir))
+
+ if h === :block || h === :toplevel
+ return Expr(h, [rewrite_statement(a, basedir) for a in s.args]...)
+ end
+
+ h === :quote && return s
+
+ return wrap_bare(s)
+end
\ No newline at end of file
diff --git a/src/DataWrangling/DataWrangling.jl b/src/DataWrangling/DataWrangling.jl
index a1003d57..0a909c8b 100644
--- a/src/DataWrangling/DataWrangling.jl
+++ b/src/DataWrangling/DataWrangling.jl
@@ -4,7 +4,8 @@ restoring, or validation.
"""
module DataWrangling
-export Metadata, Metadatum, MetadataSet, DatewiseFilename, ECCOMetadatum, EN4Metadatum, all_dates, first_date, last_date
+export AbstractMetadata, Metadata, Metadatum, MetadataSet, DatewiseFilename, ECCOMetadatum, EN4Metadatum, all_dates, first_date, last_date
+export download_dataset
export validate_dataset_coverage, metadata_filename
export BoundingBox, Column, Linear, Nearest
export WOAClimatology, WOAAnnual, WOAMonthly
@@ -233,8 +234,27 @@ abstract type AbstractStaticBathymetry <: AbstractStaticDataset end
z_interfaces(::AbstractStaticBathymetry) = (0, 1)
Base.size(dataset::AbstractStaticBathymetry, variable) = size(dataset)
+"""
+ AbstractMetadata
+
+Common supertype for [`Metadata`](@ref), [`Metadatum`](@ref), and [`MetadataSet`](@ref).
+Used to dispatch [`download_dataset`](@ref) on the three concrete kinds with a single method.
+"""
+abstract type AbstractMetadata end
+
+"""
+Hook called at the end of every `AbstractMetadata` inner constructor. The default is a no-op;
+[`NumericalEarth.DataWrangling.DataModes`](@ref) adds more-specific methods on `Metadata` and
+`MetadataSet` that record into the manifest in `:pregenerate` mode, so the trace captures Metadata
+constructed inside library functions too.
+"""
+observe_metadata(::AbstractMetadata) = nothing
+
# Fundamentals
include("metadata.jl")
+
+function download_dataset end
+
include("set_region_data.jl")
include("metadata_field.jl")
include("dataset_backend.jl")
@@ -242,6 +262,10 @@ include("metadata_field_time_series.jl")
include("inpainting.jl")
include("restoring.jl")
+# parse and verify what data is needed
+# download it all in one pass if needed
+include("DataModes/DataModes.jl")
+
function metadata_time_step end
function metadata_epoch end
@@ -345,4 +369,25 @@ function Downloads.download(metadata::Metadata)
error("No download method for $metadata is available (is the backend package loaded?)")
end
+"""
+ download_dataset(metadata::AbstractMetadata)
+
+Acquire the data referenced by `metadata` according to the current
+`NUMERICALEARTH_DATA` mode (see [`DataModes`](@ref)):
+
+- `:auto` — call `Downloads.download(metadata)` (the per-dataset method).
+- `:strict` — verify every required file is already on disk; error otherwise.
+- `:pregenerate` — no-op (metadata is recorded into the manifest by `observe_metadata` at construction).
+
+This is the single chokepoint through which every code path that needs dataset files must go.
+Per-dataset modules keep extending `Downloads.download` for the `:auto` branch only.
+"""
+function download_dataset(metadata::AbstractMetadata)
+ mode = DataModes.DATA_MODE[]
+ mode === :auto && return Downloads.download(metadata)
+ mode === :strict && return DataModes.check_files_exist(metadata)
+ mode === :pregenerate && return nothing
+ error("Unknown NUMERICALEARTH_DATA mode: $(repr(mode))")
+end
+
end # module
diff --git a/src/DataWrangling/ECCO/ECCO.jl b/src/DataWrangling/ECCO/ECCO.jl
index fbb1b665..26e31105 100644
--- a/src/DataWrangling/ECCO/ECCO.jl
+++ b/src/DataWrangling/ECCO/ECCO.jl
@@ -55,6 +55,11 @@ import ..DataWrangling:
download_ECCO_cache::String = ""
function __init__()
global download_ECCO_cache = @get_scratch!("ECCO")
+ DataWrangling.DataModes.register_dataset!(ECCO2Monthly, "ECCO2Monthly")
+ DataWrangling.DataModes.register_dataset!(ECCO2Daily, "ECCO2Daily")
+ DataWrangling.DataModes.register_dataset!(ECCO4Monthly, "ECCO4Monthly")
+ DataWrangling.DataModes.register_dataset!(ECCO2DarwinMonthly, "ECCO2DarwinMonthly")
+ DataWrangling.DataModes.register_dataset!(ECCO4DarwinMonthly, "ECCO4DarwinMonthly")
end
# Datasets
diff --git a/src/DataWrangling/EN4/EN4.jl b/src/DataWrangling/EN4/EN4.jl
index c08e1189..1e7309a3 100644
--- a/src/DataWrangling/EN4/EN4.jl
+++ b/src/DataWrangling/EN4/EN4.jl
@@ -16,6 +16,7 @@ using ..DataWrangling: DataWrangling, Metadata, Metadatum, DownloadProgress, Kel
download_EN4_cache::String = ""
function __init__()
global download_EN4_cache = @get_scratch!("EN4")
+ DataWrangling.DataModes.register_dataset!(EN4Monthly, "EN4Monthly")
end
EN4_dataset_variable_names = Dict(
diff --git a/src/DataWrangling/ERA5/ERA5.jl b/src/DataWrangling/ERA5/ERA5.jl
index 3341b8b2..7b9c4872 100644
--- a/src/DataWrangling/ERA5/ERA5.jl
+++ b/src/DataWrangling/ERA5/ERA5.jl
@@ -17,7 +17,7 @@ using Printf: Printf, @sprintf
using Scratch: Scratch, @get_scratch!
using Statistics: Statistics, mean
-using ..DataWrangling: Metadata, Metadatum, metadata_path, native_grid,
+using ..DataWrangling: DataWrangling, Metadata, Metadatum, metadata_path, native_grid,
InverseGravity
using NumericalEarth.Grids: PressureLevelVerticalDiscretization
@@ -42,6 +42,10 @@ download_ERA5_cache::String = ""
function __init__()
global download_ERA5_cache = @get_scratch!("ERA5")
+ DataWrangling.DataModes.register_dataset!(ERA5HourlySingleLevel, "ERA5HourlySingleLevel")
+ DataWrangling.DataModes.register_dataset!(ERA5MonthlySingleLevel, "ERA5MonthlySingleLevel")
+ DataWrangling.DataModes.register_dataset!(ERA5HourlyPressureLevels, "ERA5HourlyPressureLevels")
+ DataWrangling.DataModes.register_dataset!(ERA5MonthlyPressureLevels, "ERA5MonthlyPressureLevels")
end
#####
diff --git a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl
index 84dbe000..3a6bb830 100644
--- a/src/DataWrangling/ERA5/ERA5_pressure_levels.jl
+++ b/src/DataWrangling/ERA5/ERA5_pressure_levels.jl
@@ -266,8 +266,8 @@ function per_column_geopotential_discretization(metadata::ERA5PressureMetadata)
ϕ_sl_meta = Metadata(:geopotential_height; dataset=sl_ds,
dates=metadata.dates, region=metadata.region, dir=metadata.dir)
- Downloads.download(ϕ_meta)
- Downloads.download(ϕ_sl_meta)
+ download_dataset(ϕ_meta)
+ download_dataset(ϕ_sl_meta)
Φ = Field(first(ϕ_meta)) # 3-D geopotential, m²/s²
Φ_sfc = Field(first(ϕ_sl_meta)) # 2-D surface geopotential, m²/s²
diff --git a/src/DataWrangling/ETOPO/ETOPO.jl b/src/DataWrangling/ETOPO/ETOPO.jl
index 010cbc50..838b1ae2 100644
--- a/src/DataWrangling/ETOPO/ETOPO.jl
+++ b/src/DataWrangling/ETOPO/ETOPO.jl
@@ -21,6 +21,7 @@ import ..DataWrangling:
download_ETOPO_cache::String = ""
function __init__()
global download_ETOPO_cache = @get_scratch!("ETOPO")
+ DataWrangling.DataModes.register_dataset!(ETOPO2022, "ETOPO2022")
end
ETOPO_bathymetry_variable_names = Dict(
diff --git a/src/DataWrangling/GEBCO/GEBCO.jl b/src/DataWrangling/GEBCO/GEBCO.jl
index 4b32b892..75396609 100644
--- a/src/DataWrangling/GEBCO/GEBCO.jl
+++ b/src/DataWrangling/GEBCO/GEBCO.jl
@@ -8,7 +8,7 @@ using Oceananigans.DistributedComputations: @root
using Scratch: Scratch, @get_scratch!
using ZipFile: ZipFile
-using ..DataWrangling: DownloadProgress, Metadatum, metadata_path, AbstractStaticBathymetry
+using ..DataWrangling: DataWrangling, DownloadProgress, Metadatum, metadata_path, AbstractStaticBathymetry
import ..DataWrangling:
metadata_filename,
@@ -21,6 +21,7 @@ import ..DataWrangling:
download_GEBCO_cache::String = ""
function __init__()
global download_GEBCO_cache = @get_scratch!("GEBCO")
+ DataWrangling.DataModes.register_dataset!(GEBCO2024, "GEBCO2024")
end
GEBCO_bathymetry_variable_names = Dict(
diff --git a/src/DataWrangling/GLORYS/GLORYS.jl b/src/DataWrangling/GLORYS/GLORYS.jl
index 30177671..9c4b74c5 100644
--- a/src/DataWrangling/GLORYS/GLORYS.jl
+++ b/src/DataWrangling/GLORYS/GLORYS.jl
@@ -8,7 +8,7 @@ using NCDatasets: NCDatasets, Dataset
using Printf: Printf, @sprintf
using Scratch: Scratch, @get_scratch!
-using ..DataWrangling: Metadata, Metadatum, metadata_path
+using ..DataWrangling: DataWrangling, Metadata, Metadatum, metadata_path
import ..DataWrangling:
all_dates,
@@ -27,6 +27,9 @@ import ..DataWrangling:
download_GLORYS_cache::String = ""
function __init__()
global download_GLORYS_cache = @get_scratch!("GLORYS")
+ DataWrangling.DataModes.register_dataset!(GLORYSStatic, "GLORYSStatic")
+ DataWrangling.DataModes.register_dataset!(GLORYSDaily, "GLORYSDaily")
+ DataWrangling.DataModes.register_dataset!(GLORYSMonthly, "GLORYSMonthly")
end
# Datasets
@@ -155,4 +158,3 @@ function z_interfaces(metadata::GLORYSMetadata)
end
end # module GLORYS
-
diff --git a/src/DataWrangling/IBCAO/IBCAO.jl b/src/DataWrangling/IBCAO/IBCAO.jl
index be9deb37..25030f5a 100644
--- a/src/DataWrangling/IBCAO/IBCAO.jl
+++ b/src/DataWrangling/IBCAO/IBCAO.jl
@@ -7,7 +7,7 @@ using Oceananigans: Oceananigans
using Oceananigans.DistributedComputations: @root
using Scratch: Scratch, @get_scratch!
-using ..DataWrangling: DownloadProgress, Metadatum, metadata_path, AbstractStaticBathymetry
+using ..DataWrangling: DataWrangling, DownloadProgress, Metadatum, metadata_path, AbstractStaticBathymetry
import ..DataWrangling:
metadata_filename,
@@ -22,6 +22,7 @@ import ..DataWrangling:
download_IBCAO_cache::String = ""
function __init__()
global download_IBCAO_cache = @get_scratch!("IBCAO")
+ DataWrangling.DataModes.register_dataset!(IBCAOv5, "IBCAOv5")
end
IBCAO_bathymetry_variable_names = Dict(
diff --git a/src/DataWrangling/IBCSO/IBCSO.jl b/src/DataWrangling/IBCSO/IBCSO.jl
index ec799a7a..dfafdf6e 100644
--- a/src/DataWrangling/IBCSO/IBCSO.jl
+++ b/src/DataWrangling/IBCSO/IBCSO.jl
@@ -22,6 +22,7 @@ import ..DataWrangling:
download_IBCSO_cache::String = ""
function __init__()
global download_IBCSO_cache = @get_scratch!("IBCSO")
+ DataWrangling.DataModes.register_dataset!(IBCSOv2, "IBCSOv2")
end
IBCSO_bathymetry_variable_names = Dict(
diff --git a/src/DataWrangling/JRA55/JRA55.jl b/src/DataWrangling/JRA55/JRA55.jl
index e347664a..ee3c796f 100644
--- a/src/DataWrangling/JRA55/JRA55.jl
+++ b/src/DataWrangling/JRA55/JRA55.jl
@@ -7,7 +7,6 @@ export JRA55PrescribedAtmosphere,
MultiYearJRA55
using Adapt: Adapt
-using CFTime: CFTime
using Dates: Dates, DateTime, Day, Hour
using Downloads: Downloads
using Oceananigans: Oceananigans
@@ -27,6 +26,8 @@ download_JRA55_cache::String = ""
function __init__()
global download_JRA55_cache = @get_scratch!("JRA55")
+ DataWrangling.DataModes.register_dataset!(RepeatYearJRA55, "RepeatYearJRA55")
+ DataWrangling.DataModes.register_dataset!(MultiYearJRA55, "MultiYearJRA55")
end
include("JRA55_metadata.jl")
diff --git a/src/DataWrangling/JRA55/JRA55_metadata.jl b/src/DataWrangling/JRA55/JRA55_metadata.jl
index cbf647a3..120a78ec 100644
--- a/src/DataWrangling/JRA55/JRA55_metadata.jl
+++ b/src/DataWrangling/JRA55/JRA55_metadata.jl
@@ -1,4 +1,3 @@
-using CFTime: CFTime
using Dates: Dates, DateTime, Day, Hour
using Downloads: Downloads
using Oceananigans.DistributedComputations
diff --git a/src/DataWrangling/ORCA/ORCA.jl b/src/DataWrangling/ORCA/ORCA.jl
index 910c722b..ec2feaf5 100644
--- a/src/DataWrangling/ORCA/ORCA.jl
+++ b/src/DataWrangling/ORCA/ORCA.jl
@@ -25,6 +25,8 @@ download_ORCA_cache::String = ""
function __init__()
global download_ORCA_cache = @get_scratch!("ORCA")
+ DataWrangling.DataModes.register_dataset!(ORCA1, "ORCA1")
+ DataWrangling.DataModes.register_dataset!(ORCA12, "ORCA12")
end
abstract type ORCADataset end
diff --git a/src/DataWrangling/OSPapa/OSPapa.jl b/src/DataWrangling/OSPapa/OSPapa.jl
index b51d2d43..2fe554a5 100644
--- a/src/DataWrangling/OSPapa/OSPapa.jl
+++ b/src/DataWrangling/OSPapa/OSPapa.jl
@@ -20,7 +20,7 @@ using NCDatasets: NCDatasets, NCDataset, defDim, defVar
using Scratch: Scratch, @get_scratch!
using Thermodynamics: q_vap_from_RH, Liquid
-using ..DataWrangling: DownloadProgress
+using ..DataWrangling: DataWrangling, DownloadProgress
using ...Atmospheres: PrescribedAtmosphere, PrescribedPrecipitationFlux, AtmosphereThermodynamicsParameters
using ..DataWrangling: Metadata, Metadatum, metadata_path, first_date, last_date,
@@ -57,6 +57,8 @@ download_OSPapa_cache::String = ""
function __init__()
global download_OSPapa_cache = @get_scratch!("OSPapa")
+ DataWrangling.DataModes.register_dataset!(OSPapaFluxHourly, "OSPapaFluxHourly")
+ DataWrangling.DataModes.register_dataset!(OSPapaHourly, "OSPapaHourly")
end
function download_ospapa_file(dir=download_OSPapa_cache)
diff --git a/src/DataWrangling/OSPapa/OSPapa_prescribed_atmosphere.jl b/src/DataWrangling/OSPapa/OSPapa_prescribed_atmosphere.jl
index f03a3d67..180d0c69 100644
--- a/src/DataWrangling/OSPapa/OSPapa_prescribed_atmosphere.jl
+++ b/src/DataWrangling/OSPapa/OSPapa_prescribed_atmosphere.jl
@@ -59,7 +59,7 @@ function OSPapaPrescribedAtmosphere(architecture = CPU(), FT = Float32;
function ospapa_fts(name)
md = Metadata(name; mdkw...)
- Downloads.download(md)
+ download_dataset(md)
fts = FieldTimeSeries(md, surface_grid; time_indices_in_memory = length(md))
fill_gaps!(fts; max_gap = max_gap_hours)
return fts
diff --git a/src/DataWrangling/OSPapa/OSPapa_prescribed_fluxes.jl b/src/DataWrangling/OSPapa/OSPapa_prescribed_fluxes.jl
index 9c525781..6547b460 100644
--- a/src/DataWrangling/OSPapa/OSPapa_prescribed_fluxes.jl
+++ b/src/DataWrangling/OSPapa/OSPapa_prescribed_fluxes.jl
@@ -44,7 +44,7 @@ function os_papa_prescribed_fluxes(architecture = CPU(), FT = Float64;
function flux_fts(name)
md = Metadata(name; mdkw...)
- Downloads.download(md)
+ download_dataset(md)
fts = FieldTimeSeries(md, surface_grid;
time_indices_in_memory = length(md),
time_indexing = Cyclical())
diff --git a/src/DataWrangling/OSPapa/OSPapa_prescribed_radiation.jl b/src/DataWrangling/OSPapa/OSPapa_prescribed_radiation.jl
index b8e26caf..a8cfe910 100644
--- a/src/DataWrangling/OSPapa/OSPapa_prescribed_radiation.jl
+++ b/src/DataWrangling/OSPapa/OSPapa_prescribed_radiation.jl
@@ -27,7 +27,7 @@ function OSPapaPrescribedRadiation(architecture = CPU(), FT = Float32;
function ospapa_fts(name)
md = Metadata(name; mdkw...)
- Downloads.download(md)
+ download_dataset(md)
fts = FieldTimeSeries(md, surface_grid; time_indices_in_memory = length(md))
fill_gaps!(fts; max_gap = max_gap_hours)
return fts
diff --git a/src/DataWrangling/WOA/WOA.jl b/src/DataWrangling/WOA/WOA.jl
index 70f1e964..1995ef23 100644
--- a/src/DataWrangling/WOA/WOA.jl
+++ b/src/DataWrangling/WOA/WOA.jl
@@ -12,6 +12,8 @@ using ..DataWrangling: DataWrangling, Metadata, Metadatum, metadata_path,
download_WOA_cache::String = ""
function __init__()
global download_WOA_cache = @get_scratch!("WOA")
+ DataWrangling.DataModes.register_dataset!(WOAAnnual, "WOAAnnual")
+ DataWrangling.DataModes.register_dataset!(WOAMonthly, "WOAMonthly")
end
WOA_variable_names = Dict(
diff --git a/src/DataWrangling/metadata.jl b/src/DataWrangling/metadata.jl
index c77ef783..acedc0f3 100644
--- a/src/DataWrangling/metadata.jl
+++ b/src/DataWrangling/metadata.jl
@@ -1,4 +1,3 @@
-using CFTime: AbstractCFDateTime, CFTime
using Dates: Dates, Date, DateTime
using Base: @propagate_inbounds
@@ -72,15 +71,21 @@ getfilename(f::DatewiseFilename, i) = f.filenames[i]
getfilename(f::String, i) = f
getfilename(::Nothing, i) = nothing
-struct Metadata{V, D, R, S, F}
+struct Metadata{V, D, R, S, F} <: AbstractMetadata
name :: S
dataset :: V
dates :: D
region :: R
dir :: String
filename :: F
+ function Metadata{V, D, R, S, F}(name, dataset, dates, region, dir, filename) where {V, D, R, S, F}
+ m = new{V, D, R, S, F}(name, dataset, dates, region, dir, filename)
+ observe_metadata(m)
+ return m
+ end
end
+Metadata(name::S, dataset::V, dates::D, region::R, dir::String, filename::F) where {V, D, R, S, F} = Metadata{V, D, R, S, F}(name, dataset, dates, region, dir, filename)
Metadata(name, dataset, dates, region, dir) = Metadata(name, dataset, dates, region, dir, nothing)
is_three_dimensional(::Metadata) = true
@@ -116,17 +121,17 @@ Keyword Arguments
- `dataset`: Supported datasets are `ETOPO2022()`, `ECCO2Monthly()`, `ECCO2Daily()`, `ECCO4Monthly()`, `EN4Monthly()`,
`GLORYSDaily()`, `GLORYSMonthly()`, `RepeatYearJRA55()`, and `MultiYearJRA55()`.
-- `dates`: The dates of the dataset (`Dates.AbstractDateTime` or `CFTime.AbstractCFDateTime`).
+- `dates`: The dates of the dataset (`Dates.AbstractDateTime`).
Note that `dates` can either be a range or a vector of dates, representing a time-series.
For a single date, use [`Metadatum`](@ref).
- `start_date`: If `dates = nothing`, we can prescribe the first date of metadata as a date
- (`Dates.AbstractDateTime` or `CFTime.AbstractCFDateTime`). If outside the
- date range of the dataset, the first allowable date is chosen. Default: nothing.
+ (`Dates.AbstractDateTime`). If outside the date range of the dataset, the first
+ allowable date is chosen. Default: nothing.
- `end_date`: If `dates = nothing`, we can prescribe the last date of metadata as a date
- (`Dates.AbstractDateTime` or `CFTime.AbstractCFDateTime`). If outside the
- date range of the dataset, the last allowable date is chosen. Default: nothing.
+ (`Dates.AbstractDateTime`). If outside the date range of the dataset, the last
+ allowable date is chosen. Default: nothing.
- `region`: Specifies the spatial region of the dataset. Can be a [`BoundingBox`](@ref)
for a rectangular region, a [`Column`](@ref) for a single horizontal location,
@@ -165,7 +170,7 @@ function Metadata(variable_name;
return Metadata(variable_name, dataset, dates, region, dir, filename)
end
-const AnyDateTime = Union{AbstractCFDateTime, Dates.AbstractDateTime}
+const AnyDateTime = Dates.AbstractDateTime
const Metadatum{V} = Metadata{V, <:Union{AnyDateTime, Nothing}} where V
function Base.size(metadata::Metadata)
@@ -201,7 +206,7 @@ function Metadatum(variable_name;
end
if !isnothing(date) && !(date isa AnyDateTime)
- msg = "`date` must be `nothing`, a `Dates.AbstractDateTime`, or `CFTime.AbstractCFDateTime`, received $(typeof(date))"
+ msg = "`date` must be `nothing` or a `Dates.AbstractDateTime`, received $(typeof(date))"
throw(ArgumentError(msg))
end
@@ -300,15 +305,23 @@ end
##### `download`, ...) keeps working unchanged on the elements.
#####
-struct MetadataSet{V, D, R, N, F}
+struct MetadataSet{V, D, R, N, F} <: AbstractMetadata
names :: N # NTuple{K, Symbol} — verbose dataset variable names
dataset :: V # shared
dates :: D # shared; scalar or AbstractVector
region :: R # shared
dir :: String # shared
filenames :: F # NamedTuple keyed by `names`, one entry per variable
+ function MetadataSet{V, D, R, N, F}(names, dataset, dates, region, dir, filenames) where {V, D, R, N, F}
+ m = new{V, D, R, N, F}(names, dataset, dates, region, dir, filenames)
+ observe_metadata(m)
+ return m
+ end
end
+MetadataSet(names::N, dataset::V, dates::D, region::R, dir::String, filenames::F) where {V, D, R, N, F} =
+ MetadataSet{V, D, R, N, F}(names, dataset, dates, region, dir, filenames)
+
"""
MetadataSet(variable_names::Symbol...;
dataset,
@@ -320,12 +333,11 @@ end
start_date = nothing,
end_date = nothing)
-A bundle of [`Metadata`](@ref) for many variables that share `dataset`, `dates`,
-`region`, and `dir` — differing only in variable name.
+A bundle of [`Metadata`](@ref) for many variables that share `dataset`, `dates`, `region`, and `dir`
+— differing only in variable name.
-Each element `mset[name]` (or equivalently `mset.name` or `mset[i]`) is itself a
-`Metadata` — or a `Metadatum` when `dates` is a single date. Iteration walks the
-variable axis, yielding one `Metadata` per variable.
+Each element `mset[name]` (or equivalently `mset.name` or `mset[i]`) is itself a `Metadata` — or a
+`Metadatum` when `dates` is a single date. Iteration walks the variable axis, yielding one `Metadata` per variable.
Arguments
=========
@@ -335,7 +347,7 @@ Arguments
Keyword Arguments
=================
- `dataset`: the shared dataset (e.g. `ECCO4Monthly()`, `ERA5HourlyPressureLevels()`).
-- `dates`: shared date axis. Either a single `AbstractDateTime`/`AbstractCFDateTime`
+- `dates`: shared date axis. Either a single `AbstractDateTime`
(yielding a [`MetadatumSet`](@ref)) or an `AbstractVector` of dates.
Defaults to `all_dates(dataset, first(variable_names))`.
- `date`: convenience scalar form; cannot be used together with `dates`.
@@ -357,8 +369,7 @@ function MetadataSet(variable_names::Symbol...;
start_date = nothing,
end_date = nothing)
- isempty(variable_names) &&
- throw(ArgumentError("MetadataSet requires at least one variable name"))
+ isempty(variable_names) && throw(ArgumentError("MetadataSet requires at least one variable name"))
if !isnothing(date) && !isnothing(dates)
throw(ArgumentError("Specify either `date` (scalar) or `dates` (vector), not both"))
@@ -374,7 +385,7 @@ function MetadataSet(variable_names::Symbol...;
end
if !isnothing(date) && !(effective_dates isa AnyDateTime)
- msg = "`date` must be a `Dates.AbstractDateTime` or `CFTime.AbstractCFDateTime`, received $(typeof(date))"
+ msg = "`date` must be a `Dates.AbstractDateTime`, received $(typeof(date))"
throw(ArgumentError(msg))
end
@@ -415,8 +426,7 @@ end
Base.propertynames(mset::MetadataSet) =
(getfield(mset, :names)..., fieldnames(MetadataSet)...)
-# Indexed access. We use `getfield` here so subsequent edits to `getproperty`
-# can't make these recursive.
+# Indexed access. We use `getfield` here so subsequent edits to `getproperty` can't make these recursive.
function Base.getindex(mset::MetadataSet, name::Symbol)
fname = getfield(mset, :filenames)[name]
return Metadata(name,
@@ -427,8 +437,7 @@ function Base.getindex(mset::MetadataSet, name::Symbol)
fname)
end
-@propagate_inbounds Base.getindex(mset::MetadataSet, i::Int) =
- getindex(mset, getfield(mset, :names)[i])
+@propagate_inbounds Base.getindex(mset::MetadataSet, i::Int) = getindex(mset, getfield(mset, :names)[i])
Base.length(mset::MetadataSet) = length(getfield(mset, :names))
Base.keys(mset::MetadataSet) = getfield(mset, :names)
@@ -441,8 +450,7 @@ Base.lastindex(mset::MetadataSet) = length(mset)
return mset[state], state + 1
end
-Base.NamedTuple(mset::MetadataSet) =
- NamedTuple{getfield(mset, :names)}(map(n -> mset[n], getfield(mset, :names)))
+Base.NamedTuple(mset::MetadataSet) = NamedTuple{getfield(mset, :names)}(map(n -> mset[n], getfield(mset, :names)))
"""
metadata_path(mset::MetadataSet)
@@ -554,7 +562,7 @@ results of each per-variable `download` call (typically the file path(s)).
"""
function Downloads.download(mset::MetadataSet; kwargs...)
names = getfield(mset, :names)
- return NamedTuple{names}(map(n -> Downloads.download(mset[n]; kwargs...), names))
+ return NamedTuple{names}(map(n -> download_dataset(mset[n]), names))
end
"""
diff --git a/src/DataWrangling/metadata_field.jl b/src/DataWrangling/metadata_field.jl
index c90b3ef9..cde27106 100644
--- a/src/DataWrangling/metadata_field.jl
+++ b/src/DataWrangling/metadata_field.jl
@@ -197,7 +197,7 @@ function Oceananigans.Fields.Field(metadata::Metadatum, arch=CPU();
halo = (3, 3, 3),
cache_inpainted_data = true)
- Downloads.download(metadata)
+ download_dataset(metadata)
# Inpainting on a (Flat, Flat, *) column field is meaningless and the
# iterative algorithm doesn't terminate gracefully without horizontal
diff --git a/src/DataWrangling/metadata_field_time_series.jl b/src/DataWrangling/metadata_field_time_series.jl
index d25cc042..7b801e35 100644
--- a/src/DataWrangling/metadata_field_time_series.jl
+++ b/src/DataWrangling/metadata_field_time_series.jl
@@ -30,7 +30,7 @@ Keyword Arguments
Default: `true`.
"""
function Oceananigans.OutputReaders.FieldTimeSeries(metadata::Metadata, arch::AbstractArchitecture=CPU(); kw...)
- Downloads.download(metadata)
+ download_dataset(metadata)
grid = native_grid(metadata, arch)
return FieldTimeSeries(metadata, grid; kw...)
end
@@ -41,7 +41,7 @@ function Oceananigans.OutputReaders.FieldTimeSeries(metadata::Metadata, grid::Ab
inpainting = default_inpainting(metadata),
cache_inpainted_data = true)
- Downloads.download(metadata)
+ download_dataset(metadata)
times = native_times(metadata)
diff --git a/src/DataWrangling/restoring.jl b/src/DataWrangling/restoring.jl
index e85de0cf..ada59f9a 100644
--- a/src/DataWrangling/restoring.jl
+++ b/src/DataWrangling/restoring.jl
@@ -194,7 +194,7 @@ function DatasetRestoring(metadata::Metadata,
inpainting = NearestNeighborInpainting(Inf),
cache_inpainted_data = true)
- Downloads.download(metadata)
+ download_dataset(metadata)
fts = FieldTimeSeries(metadata, arch_or_grid;
time_indices_in_memory,
diff --git a/src/NumericalEarth.jl b/src/NumericalEarth.jl
index 2ef5330c..2d026eb5 100644
--- a/src/NumericalEarth.jl
+++ b/src/NumericalEarth.jl
@@ -177,6 +177,56 @@ using .DataWrangling.OSPapa
using PrecompileTools: @setup_workload, @compile_workload
+"""
+Process-level entry point that fires once after every submodule's `__init__` has run.
+
+- In `:auto` mode (the default), auto-downloads datasets listed in `NumericalEarthDataManifest.toml`
+ whenever a manifest sits next to the active project's `Project.toml`. Cached files are skipped by
+ each dataset's per-dataset `Downloads.download` method, so subsequent runs are cheap.
+- In `:pregenerate` mode (`NUMERICALEARTH_DATA=pregenerate` or `pregenerate:`), traces
+ `Base.PROGRAM_FILE` via `pregenerate_dataset_manifest` and `exit(0)` — the script's real
+ execution is skipped. The trace runs silently (`quiet = true`) so only the final
+ `wrote manifest` log appears.
+
+Both paths are no-ops during precompilation, in `:strict` mode, and when no real `PROGRAM_FILE`
+is set (REPL / `julia -e ...`).
+"""
+function __init__()
+ ccall(:jl_generating_output, Cint, ()) == 1 && return nothing
+ (!isempty(Base.PROGRAM_FILE) && isfile(Base.PROGRAM_FILE)) || return nothing
+
+ mode = DataWrangling.DataModes.DATA_MODE[]
+ if mode === :pregenerate
+ script = abspath(Base.PROGRAM_FILE)
+ # `MANIFEST_DIR[]` is populated by `DataModes.__init__` from the env var; if that init
+ # somehow hasn't run (precompile workload edge case), fall back to the current directory.
+ dir = isempty(DataWrangling.DataModes.MANIFEST_DIR[]) ? pwd() : DataWrangling.DataModes.MANIFEST_DIR[]
+ try
+ manifest = DataWrangling.DataModes.pregenerate_dataset_manifest(script; dir)
+ @info "NUMERICALEARTH_DATA=pregenerate: wrote manifest via AST trace" manifest script
+ catch err
+ @error "NUMERICALEARTH_DATA=pregenerate: trace failed" dir script exception=(err, catch_backtrace())
+ end
+ exit(0)
+ end
+
+ mode === :auto || return nothing
+
+ project = Base.active_project()
+ project === nothing && return nothing
+ project_dir = dirname(project)
+ manifest = joinpath(project_dir, DataWrangling.DataModes.MANIFEST_FILENAME)
+ isfile(manifest) || return nothing
+
+ @info "NumericalEarth: auto-downloading datasets from manifest" manifest
+ try
+ DataWrangling.DataModes.download_datasets(; dir = project_dir)
+ catch err
+ @error "NumericalEarth: auto-download failed; continuing without it" manifest exception=(err, catch_backtrace())
+ end
+ return nothing
+end
+
@setup_workload begin
Nx, Ny, Nz = 32, 32, 10
@compile_workload begin
diff --git a/test/NumericalEarthDataManifest.toml b/test/NumericalEarthDataManifest.toml
new file mode 100644
index 00000000..cd92e034
--- /dev/null
+++ b/test/NumericalEarthDataManifest.toml
@@ -0,0 +1,280 @@
+[[ECCO2Daily]]
+date = 1993-01-05T00:00:00.000Z
+filename = "SALT.1440x720x50.19930105.nc"
+variable_name = "salinity"
+[[ECCO2Daily]]
+date = 1993-01-04T00:00:00.000Z
+filename = "SALT.1440x720x50.19930104.nc"
+variable_name = "salinity"
+[[ECCO2Daily]]
+date = 1993-01-03T00:00:00.000Z
+filename = "SALT.1440x720x50.19930103.nc"
+variable_name = "salinity"
+[[ECCO2Daily]]
+date = 1993-01-02T00:00:00.000Z
+filename = "SALT.1440x720x50.19930102.nc"
+variable_name = "salinity"
+[[ECCO2Daily]]
+end_date = 1993-01-05T00:00:00.000Z
+filename = ["SALT.1440x720x50.19930101.nc", "SALT.1440x720x50.19930102.nc", "SALT.1440x720x50.19930103.nc", "SALT.1440x720x50.19930104.nc", "SALT.1440x720x50.19930105.nc"]
+start_date = 1993-01-01T00:00:00.000Z
+variable_name = "salinity"
+[[ECCO2Daily]]
+date = 1993-01-01T00:00:00.000Z
+filename = "SALT.1440x720x50.19930101.nc"
+variable_name = "salinity"
+[[ECCO2Daily]]
+date = 1993-01-05T00:00:00.000Z
+filename = "THETA.1440x720x50.19930105.nc"
+variable_name = "temperature"
+[[ECCO2Daily]]
+date = 1993-01-04T00:00:00.000Z
+filename = "THETA.1440x720x50.19930104.nc"
+variable_name = "temperature"
+[[ECCO2Daily]]
+date = 1993-01-03T00:00:00.000Z
+filename = "THETA.1440x720x50.19930103.nc"
+variable_name = "temperature"
+[[ECCO2Daily]]
+date = 1993-01-02T00:00:00.000Z
+filename = "THETA.1440x720x50.19930102.nc"
+variable_name = "temperature"
+[[ECCO2Daily]]
+date = 1993-01-01T00:00:00.000Z
+filename = "THETA.1440x720x50.19930101.nc"
+variable_name = "temperature"
+[[ECCO2Daily]]
+end_date = 1993-01-05T00:00:00.000Z
+filename = ["THETA.1440x720x50.19930101.nc", "THETA.1440x720x50.19930102.nc", "THETA.1440x720x50.19930103.nc", "THETA.1440x720x50.19930104.nc", "THETA.1440x720x50.19930105.nc"]
+start_date = 1993-01-01T00:00:00.000Z
+variable_name = "temperature"
+
+[[ECCO4Monthly]]
+date = 1993-01-01T00:00:00.000Z
+filename = "SIarea_1993_01.nc"
+variable_name = "sea_ice_concentration"
+[[ECCO4Monthly]]
+date = 1993-01-01T00:00:00.000Z
+variable_names = ["temperature", "salinity", "sea_ice_thickness", "sea_ice_concentration"]
+[[ECCO4Monthly]]
+date = 1993-01-01T00:00:00.000Z
+filename = "SIheff_1993_01.nc"
+variable_name = "sea_ice_thickness"
+[[ECCO4Monthly]]
+date = 1993-01-01T00:00:00.000Z
+variable_names = ["temperature", "sea_ice_thickness"]
+[[ECCO4Monthly]]
+end_date = 1993-04-01T00:00:00.000Z
+filename = ["SALT_1993_01.nc", "SALT_1993_02.nc", "SALT_1993_03.nc", "SALT_1993_04.nc"]
+start_date = 1993-01-01T00:00:00.000Z
+variable_name = "salinity"
+[[ECCO4Monthly]]
+date = 1993-01-01T00:00:00.000Z
+filename = "SALT_1993_01.nc"
+variable_name = "salinity"
+[[ECCO4Monthly]]
+date = 1993-01-01T00:00:00.000Z
+variable_names = ["temperature", "salinity"]
+
+ [ECCO4Monthly.region]
+ kind = "BoundingBox"
+ latitude = [-10.0, 10.0]
+ longitude = [-20.0, 20.0]
+[[ECCO4Monthly]]
+end_date = 1993-04-01T00:00:00.000Z
+filename = ["THETA_1993_01.nc", "THETA_1993_02.nc", "THETA_1993_03.nc", "THETA_1993_04.nc"]
+start_date = 1993-01-01T00:00:00.000Z
+variable_name = "temperature"
+[[ECCO4Monthly]]
+end_date = 1993-04-01T00:00:00.000Z
+start_date = 1993-01-01T00:00:00.000Z
+variable_names = ["temperature", "salinity"]
+[[ECCO4Monthly]]
+date = 1993-01-01T00:00:00.000Z
+filename = "THETA_1993_01.nc"
+variable_name = "temperature"
+[[ECCO4Monthly]]
+date = 1993-01-01T00:00:00.000Z
+variable_names = ["temperature", "salinity"]
+[[ECCO4Monthly]]
+date = 1993-01-01T00:00:00.000Z
+filename = "NVEL_1993_01.nc"
+variable_name = "v_velocity"
+[[ECCO4Monthly]]
+date = 1993-02-01T00:00:00.000Z
+filename = "SALT_1993_02.nc"
+variable_name = "salinity"
+[[ECCO4Monthly]]
+date = 1993-02-01T00:00:00.000Z
+filename = "THETA_1993_02.nc"
+variable_name = "temperature"
+[[ECCO4Monthly]]
+end_date = 1993-02-01T00:00:00.000Z
+filename = ["SALT_1993_01.nc", "SALT_1993_02.nc"]
+start_date = 1993-01-01T00:00:00.000Z
+variable_name = "salinity"
+[[ECCO4Monthly]]
+end_date = 1993-02-01T00:00:00.000Z
+filename = ["THETA_1993_01.nc", "THETA_1993_02.nc"]
+start_date = 1993-01-01T00:00:00.000Z
+variable_name = "temperature"
+[[ECCO4Monthly]]
+date = 1992-01-01T00:00:00.000Z
+filename = "THETA_1992_01.nc"
+variable_name = "temperature"
+
+ [ECCO4Monthly.region]
+ interpolation = "Linear"
+ kind = "Column"
+ latitude = 50.1
+ longitude = 35.1
+[[ECCO4Monthly]]
+date = 1992-01-01T00:00:00.000Z
+filename = "EVEL_1992_01.nc"
+variable_name = "u_velocity"
+
+ [ECCO4Monthly.region]
+ interpolation = "Linear"
+ kind = "Column"
+ latitude = 50.1
+ longitude = 35.1
+[[ECCO4Monthly]]
+date = 1992-01-01T00:00:00.000Z
+filename = "SSH_1992_01.nc"
+variable_name = "free_surface"
+
+ [ECCO4Monthly.region]
+ interpolation = "Linear"
+ kind = "Column"
+ latitude = 50.1
+ longitude = 35.1
+[[ECCO4Monthly]]
+date = 1992-01-01T00:00:00.000Z
+filename = "EVEL_1992_01.nc"
+variable_name = "u_velocity"
+[[ECCO4Monthly]]
+date = 1992-01-01T00:00:00.000Z
+filename = "EVEL_1992_01.nc"
+variable_name = "u_velocity"
+
+ [ECCO4Monthly.region]
+ kind = "BoundingBox"
+ latitude = [0, 10]
+ longitude = [0, 10]
+[[ECCO4Monthly]]
+date = 1992-01-01T00:00:00.000Z
+filename = "THETA_1992_01.nc"
+variable_name = "temperature"
+[[ECCO4Monthly]]
+date = 1992-01-01T00:00:00.000Z
+filename = "THETA_1992_01.nc"
+variable_name = "temperature"
+
+ [ECCO4Monthly.region]
+ kind = "BoundingBox"
+ latitude = [0, 10]
+ longitude = [0, 10]
+[[ECCO4Monthly]]
+date = 1992-01-01T00:00:00.000Z
+filename = "THETA_1992_01.nc"
+variable_name = "temperature"
+
+ [ECCO4Monthly.region]
+ kind = "BoundingBox"
+ latitude = [-30, 30]
+ longitude = [-180, 180]
+[[ECCO4Monthly]]
+date = 1992-01-01T00:00:00.000Z
+filename = "THETA_1992_01.nc"
+variable_name = "temperature"
+
+ [ECCO4Monthly.region]
+ kind = "BoundingBox"
+ latitude = [-30, 30]
+[[ECCO4Monthly]]
+end_date = 1992-03-01T00:00:00.000Z
+filename = ["THETA_1992_01.nc", "THETA_1992_02.nc", "THETA_1992_03.nc"]
+start_date = 1992-01-01T00:00:00.000Z
+variable_name = "temperature"
+
+ [ECCO4Monthly.region]
+ interpolation = "Linear"
+ kind = "Column"
+ latitude = 50.1
+ longitude = 35.1
+[[ECCO4Monthly]]
+date = 1992-02-01T00:00:00.000Z
+filename = "THETA_1992_02.nc"
+variable_name = "temperature"
+
+ [ECCO4Monthly.region]
+ interpolation = "Linear"
+ kind = "Column"
+ latitude = 50.1
+ longitude = 35.1
+[[ECCO4Monthly]]
+date = 1992-03-01T00:00:00.000Z
+filename = "THETA_1992_03.nc"
+variable_name = "temperature"
+
+ [ECCO4Monthly.region]
+ interpolation = "Linear"
+ kind = "Column"
+ latitude = 50.1
+ longitude = 35.1
+
+[[ERA5HourlySingleLevel]]
+date = 2004-12-27T00:00:00.000Z
+filename = "2m_temperature_ERA5HourlySingleLevel_2004-12-27T00_-110.0_30.0_-25.0_35.0.nc"
+variable_name = "temperature"
+
+ [ERA5HourlySingleLevel.region]
+ kind = "BoundingBox"
+ latitude = [-25, 35]
+ longitude = [-110, 30]
+
+[[ETOPO2022]]
+filename = "ETOPO_2022_v1_60s_N90W180_surface.nc"
+variable_name = "bottom_height"
+
+[[GEBCO2024]]
+filename = "GEBCO_2024.nc"
+variable_name = "bottom_height"
+
+[[IBCAOv5]]
+filename = "ibcao_v5_wgs84_0p01deg.nc"
+variable_name = "bottom_height"
+
+[[IBCSOv2]]
+filename = "IBCSO_v2_bed_WGS84.nc"
+variable_name = "bottom_height"
+
+[[ORCA1]]
+filename = "eORCA1.2_mesh_mask.nc"
+variable_name = "mesh_mask"
+[[ORCA1]]
+filename = "eORCA_R1_bathy_meter_v2.2.nc"
+variable_name = "bottom_height"
+
+[[ORCA12]]
+filename = "bathy_eORCA12_noclosea_from_GEBCO2021_FillZero_S21TT_CloseaCopy.nc"
+variable_name = "bottom_height"
+[[ORCA12]]
+filename = "grid_mask_eORCA12-GO6.nc"
+variable_name = "mesh_mask"
+
+[[WOAAnnual]]
+filename = "woa_t_annual.nc"
+variable_name = "temperature"
+[[WOAAnnual]]
+filename = "woa_s_annual.nc"
+variable_name = "salinity"
+
+[[WOAMonthly]]
+date = 2018-01-01T00:00:00.000Z
+filename = "woa_t_monthly_01.nc"
+variable_name = "temperature"
+[[WOAMonthly]]
+date = 2018-01-01T00:00:00.000Z
+filename = "woa_s_monthly_01.nc"
+variable_name = "salinity"
diff --git a/test/download_utils.jl b/test/download_utils.jl
index 52928090..a7f6917e 100644
--- a/test/download_utils.jl
+++ b/test/download_utils.jl
@@ -10,6 +10,7 @@ function emit_ci_warning(title, message)
end
function download_from_artifacts(filepath::AbstractString; max_retries=3)
+ NumericalEarth.DataWrangling.DataModes.DATA_MODE[] === :pregenerate && return nothing
filename = basename(filepath)
fallback_url = ARTIFACTS_BASE_URL * filename
@info "Downloading $filename from NumericalEarthArtifacts fallback..."
@@ -41,9 +42,13 @@ end
Try `download_fn()`. If it throws, download the required files from
NumericalEarthArtifacts and retry. Emits a CI warning when the fallback is used.
+In `:pregenerate` mode the fallback is skipped — `download_fn()` runs unguarded and any error
+propagates to the script's per-statement wrapper, so the trace never reaches the network.
+
Returns the result of `download_fn()`.
"""
function download_dataset_with_fallback(download_fn, filepaths; dataset_name="dataset")
+ NumericalEarth.DataWrangling.DataModes.DATA_MODE[] === :pregenerate && return download_fn()
try
return download_fn()
catch e
diff --git a/test/runtests.jl b/test/runtests.jl
index aba5f4fa..7ea5736a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -70,7 +70,7 @@ function __init__()
ETOPOmetadata = Metadatum(:bottom_height, dataset=NumericalEarth.ETOPO.ETOPO2022())
download_dataset_with_fallback(metadata_path(ETOPOmetadata); dataset_name="ETOPO2022") do
- download(ETOPOmetadata)
+ download_dataset(ETOPOmetadata)
end
#####
@@ -109,14 +109,14 @@ function __init__()
for md in ts_set
download_dataset_with_fallback(metadata_path(md); dataset_name="$(typeof(dataset)) $(md.name)") do
- download(md)
+ download_dataset(md)
end
end
if dataset isa Union{ECCO2DarwinMonthly, ECCO4DarwinMonthly}
PO₄_metadata = Metadata(:phosphate; dataset, dates)
download_dataset_with_fallback(metadata_path(PO₄_metadata); dataset_name="$(typeof(dataset)) phosphate") do
- download(PO₄_metadata)
+ download_dataset(PO₄_metadata)
end
end
end
diff --git a/test/runtests_setup.jl b/test/runtests_setup.jl
index 0b98b8ba..27c277c9 100644
--- a/test/runtests_setup.jl
+++ b/test/runtests_setup.jl
@@ -15,7 +15,6 @@ using NumericalEarth.WOA
using Oceananigans.Architectures: architecture, on_architecture
using Oceananigans.OutputReaders: interpolate!
-using CFTime
using Dates
using CUDA: @allowscalar
@@ -23,7 +22,7 @@ using CUDA: @allowscalar
gpu_test = parse(Bool, get(ENV, "GPU_TEST", "false"))
test_architectures = gpu_test ? [GPU()] : [CPU()]
-start_date = DateTimeProlepticGregorian(1993, 1, 1)
+start_date = DateTime(1993, 1, 1)
test_datasets = (ECCO2Monthly(),
ECCO2Daily(),
@@ -117,7 +116,7 @@ function test_ocean_metadata_utilities(arch, dataset, dates, inpainting;
metadata = Metadata(name; dates, dataset)
filepaths = [metadata_path(datum) for datum in metadata]
download_dataset_with_fallback(filepaths; dataset_name="$(typeof(dataset)) $name") do
- download(metadata)
+ download_dataset(metadata)
end
restoring = DatasetRestoring(metadata, arch; rate=1/1000, inpainting)
@@ -175,7 +174,7 @@ function test_dataset_restoring(arch, dataset, dates, inpainting;
metadata = Metadata(name; dates, dataset)
filepaths = [metadata_path(datum) for datum in metadata]
download_dataset_with_fallback(filepaths; dataset_name="$(typeof(dataset)) $name") do
- download(metadata)
+ download_dataset(metadata)
end
var_restoring = DatasetRestoring(metadata, arch; mask, inpainting, rate=1/1000)
@@ -216,7 +215,7 @@ function test_timestepping_with_dataset_restoring(arch, dataset, dates, inpainti
metadata = Metadata(varnames[end]; dates, dataset)
filepaths = [metadata_path(datum) for datum in metadata]
download_dataset_with_fallback(filepaths; dataset_name="$(typeof(dataset)) $(varnames[end])") do
- download(metadata)
+ download_dataset(metadata)
end
restoring = DatasetRestoring(metadata, arch; inpainting, rate=1/1000)
forcing = NamedTuple{tuple(fldnames[end])}(tuple(restoring))
diff --git a/test/test_bathymetry.jl b/test/test_bathymetry.jl
index bf714aa7..26df4a40 100644
--- a/test/test_bathymetry.jl
+++ b/test/test_bathymetry.jl
@@ -18,7 +18,7 @@ using Statistics
# Testing downloading
download_dataset_with_fallback(filepath; dataset_name="ETOPO2022") do
- download(ETOPOmetadata)
+ download_dataset(ETOPOmetadata)
end
@test isfile(filepath)
diff --git a/test/test_cds_downloading.jl b/test/test_cds_downloading.jl
index 4da3e63a..3a24a278 100644
--- a/test/test_cds_downloading.jl
+++ b/test/test_cds_downloading.jl
@@ -39,7 +39,7 @@ start_date = DateTime(2005, 2, 16, 12)
# Download the data (falls back to NumericalEarthArtifacts if CDS is unreachable)
download_dataset_with_fallback(filepath; dataset_name="ERA5Hourly $variable") do
- download(metadatum)
+ download_dataset(metadatum)
end
@test isfile(filepath)
@@ -299,7 +299,7 @@ start_date = DateTime(2005, 2, 16, 12)
# Download if not present (falls back to NumericalEarthArtifacts if CDS is unreachable)
filepath = metadata_path(metadatum)
isfile(filepath) || download_dataset_with_fallback(filepath; dataset_name="ERA5Hourly $variable") do
- download(metadatum)
+ download_dataset(metadatum)
end
# Create a Field from the downloaded data
@@ -325,7 +325,7 @@ start_date = DateTime(2005, 2, 16, 12)
# Download if not present (falls back to NumericalEarthArtifacts if CDS is unreachable)
filepath = metadata_path(metadatum)
isfile(filepath) || download_dataset_with_fallback(filepath; dataset_name="ERA5Hourly $variable") do
- download(metadatum)
+ download_dataset(metadatum)
end
# Create a target grid matching the bounding box region
@@ -361,7 +361,7 @@ start_date = DateTime(2005, 2, 16, 12)
filepath = metadata_path(meta)
isfile(filepath) && rm(filepath; force=true)
- download(meta)
+ download_dataset(meta)
@test isfile(filepath)
# Verify the NetCDF has a pressure_level dimension and the right variable
diff --git a/test/test_data_manifest_freshness.jl b/test/test_data_manifest_freshness.jl
new file mode 100644
index 00000000..c33cd00e
--- /dev/null
+++ b/test/test_data_manifest_freshness.jl
@@ -0,0 +1,60 @@
+include("runtests_setup.jl")
+
+using NumericalEarth.DataWrangling.DataModes: DataModes, pregenerate_dataset_manifest, manifest_path_in
+using TOML
+
+# Regenerate the test-folder `NumericalEarthDataManifest.toml` by tracing every `test_*.jl`
+# (excluding self) in pregenerate mode, and compare the result against the committed manifest.
+# If they differ the manifest is stale — re-run the pregenerate command below and commit the
+# resulting `test/NumericalEarthDataManifest.toml`.
+function regenerate_manifest_in(out_dir)
+ test_dir = @__DIR__
+ self = @__FILE__
+ for f in sort(readdir(test_dir; join=true))
+ endswith(f, ".jl") && startswith(basename(f), "test_") || continue
+ abspath(f) == abspath(self) && continue
+ try
+ pregenerate_dataset_manifest(f; dir = out_dir, overwrite_existing = false)
+ catch
+ end
+ end
+ out = manifest_path_in(out_dir)
+ return isfile(out) ? TOML.parsefile(out) : Dict{String, Any}()
+end
+
+@testset "DataManifest freshness" begin
+ # This test self-invokes `pregenerate_dataset_manifest` on every other `test_*.jl`. If
+ # we're already running inside a pregenerate trace (i.e. some outer loop is tracing this
+ # very file), recursing here both wastes work and corrupts per-process state — most
+ # notably MPI, which gets re-initialised across nested sandbox boundaries.
+ if DataModes.DATA_MODE[] === :pregenerate
+ @info "Skipping DataManifest freshness test inside a pregenerate trace"
+ return
+ end
+
+ committed_path = manifest_path_in(@__DIR__)
+ @test isfile(committed_path)
+
+ committed = TOML.parsefile(committed_path)
+ regenerated = mktempdir(regenerate_manifest_in)
+
+ if committed != regenerated
+ added = sort(collect(setdiff(keys(regenerated), keys(committed))))
+ removed = sort(collect(setdiff(keys(committed), keys(regenerated))))
+ isempty(added) || @info "Datasets added to the regenerated manifest" datasets=added
+ isempty(removed) || @info "Datasets missing from the regenerated manifest" datasets=removed
+ for k in sort(collect(intersect(keys(committed), keys(regenerated))))
+ committed[k] == regenerated[k] && continue
+ @info "Entries differ for dataset" dataset=k committed=committed[k] regenerated=regenerated[k]
+ end
+ @info "Manifest is stale. To regenerate, run from the repo root:\n " *
+ "julia --project -e 'using NumericalEarth.DataWrangling.DataModes: " *
+ "pregenerate_dataset_manifest, manifest_path_in; dir = abspath(\"test\"); " *
+ "rm(manifest_path_in(dir); force=true); for f in sort(readdir(dir; join=true)); " *
+ "endswith(f, \".jl\") && startswith(basename(f), \"test_\") && " *
+ "basename(f) != \"test_data_manifest_freshness.jl\" || continue; " *
+ "try; pregenerate_dataset_manifest(f; dir, overwrite_existing=false); catch; end; end'"
+ end
+
+ @test committed == regenerated
+end
diff --git a/test/test_data_modes.jl b/test/test_data_modes.jl
new file mode 100644
index 00000000..fdc727d1
--- /dev/null
+++ b/test/test_data_modes.jl
@@ -0,0 +1,315 @@
+include("runtests_setup.jl")
+
+using NumericalEarth.DataWrangling: AbstractMetadata, Metadatum, Metadata, MetadataSet,
+ BoundingBox, Column, Nearest, Linear, DatewiseFilename,
+ download_dataset
+using NumericalEarth.DataWrangling.DataModes: DataModes, parse_data_mode, register_dataset!,
+ write_manifest, read_manifest, download_datasets,
+ pregenerate_dataset_manifest, DryRunValue,
+ MANIFEST_FILENAME, manifest_path_in
+
+using Downloads: Downloads
+using Dates: DateTime
+using TOML: TOML
+
+struct FakeDataset end
+
+NumericalEarth.DataWrangling.all_dates(::FakeDataset, ::Symbol) = [DateTime(2020, m, 1) for m in 1:12]
+NumericalEarth.DataWrangling.build_filename(::FakeDataset, name, dates::AbstractArray, region) = "$(name).nc"
+NumericalEarth.DataWrangling.build_filename(::FakeDataset, name, date, region) = "$(name).nc"
+NumericalEarth.DataWrangling.metadata_filename(::FakeDataset, name, date, region) = "$(name)_$(date).nc"
+NumericalEarth.DataWrangling.default_download_directory(::FakeDataset) = "/tmp/fake_dataset_test"
+NumericalEarth.DataWrangling.first_date(::FakeDataset, ::Symbol) = DateTime(2020, 1, 1)
+
+struct MockMetadatum <: AbstractMetadata
+ name :: Symbol
+end
+
+const MOCK_DOWNLOAD_CALLS = Ref(0)
+Downloads.download(::MockMetadatum) = (MOCK_DOWNLOAD_CALLS[] += 1; nothing)
+
+@testset "AbstractMetadata supertype" begin
+ @test Metadata <: AbstractMetadata
+ @test Metadatum <: AbstractMetadata
+ @test MetadataSet <: AbstractMetadata
+end
+
+@testset "parse_data_mode" begin
+ @test parse_data_mode("auto") == (:auto, "")
+ @test parse_data_mode("") == (:auto, "")
+ @test parse_data_mode("strict") == (:strict, "")
+ @test parse_data_mode("pregenerate") == (:pregenerate, "")
+ @test parse_data_mode("pregenerate:/tmp/m") == (:pregenerate, "/tmp/m")
+ @test parse_data_mode("pregenerate:relative/dir") == (:pregenerate, "relative/dir")
+
+ @test_throws ArgumentError parse_data_mode("pregenerate:")
+ @test_throws ArgumentError parse_data_mode("garbage")
+end
+
+@testset "check_files_exist" begin
+ mktempdir() do dir
+ m_missing = Metadata(:t, nothing, nothing, nothing, dir, "missing_file.nc")
+ @test_throws ErrorException DataModes.check_files_exist(m_missing)
+
+ present_path = joinpath(dir, "present.nc")
+ write(present_path, "x")
+ m_present = Metadata(:t, nothing, nothing, nothing, dir, "present.nc")
+ @test DataModes.check_files_exist(m_present) === nothing
+
+ dates_vec = [DateTime(2020, 1, 1), DateTime(2020, 1, 2)]
+ m_multi_missing = Metadata(:t, nothing, dates_vec, nothing, dir,
+ DatewiseFilename(["a.nc", "b.nc"]))
+ err = try
+ DataModes.check_files_exist(m_multi_missing)
+ nothing
+ catch e
+ e
+ end
+ @test err !== nothing
+ @test occursin("a.nc", sprint(showerror, err))
+ @test occursin("b.nc", sprint(showerror, err))
+ end
+end
+
+@testset "write_manifest groups by dataset" begin
+ register_dataset!(FakeDataset, "FakeDataset")
+
+ md_um = Metadata(:bathymetry, FakeDataset(), nothing, nothing, "/tmp", "b.nc")
+ md_one = Metadata(:temperature, FakeDataset(), DateTime(2020, 1, 1), nothing, "/tmp", "t.nc")
+ dates = [DateTime(2020, 1, 1), DateTime(2020, 12, 31)]
+ md_range = Metadata(:salinity, FakeDataset(), dates, nothing, "/tmp",
+ DatewiseFilename(["s1.nc", "s2.nc"]))
+ region = BoundingBox(longitude=(200.0, 220.0), latitude=(35.0, 55.0))
+ md_region = Metadata(:eastward_velocity, FakeDataset(), DateTime(2020, 1, 1), region, "/tmp", "u.nc")
+ mset = MetadataSet((:T, :S), FakeDataset(), dates, nothing, "/tmp",
+ (T = DatewiseFilename(["T1.nc", "T2.nc"]), S = DatewiseFilename(["S1.nc", "S2.nc"])))
+
+ records = AbstractMetadata[md_um, md_one, md_range, md_region, mset]
+ io = IOBuffer()
+ write_manifest(io, records)
+ parsed = TOML.parse(String(take!(io)))
+
+ @test collect(keys(parsed)) == ["FakeDataset"]
+ entries = parsed["FakeDataset"]
+ @test length(entries) == 5
+ @test all(!haskey(e, "dataset") for e in entries)
+
+ @test any(e -> get(e, "variable_name", nothing) == "bathymetry" && !haskey(e, "date") && !haskey(e, "region"), entries)
+ @test any(e -> get(e, "variable_name", nothing) == "temperature" && get(e, "date", nothing) == DateTime(2020, 1, 1), entries)
+ @test any(e -> get(e, "variable_name", nothing) == "eastward_velocity"
+ && haskey(e, "region") && e["region"]["kind"] == "BoundingBox"
+ && e["region"]["longitude"] == [200.0, 220.0],
+ entries)
+ @test any(e -> get(e, "variable_name", nothing) == "salinity"
+ && get(e, "start_date", nothing) == DateTime(2020, 1, 1)
+ && get(e, "end_date", nothing) == DateTime(2020, 12, 31),
+ entries)
+ @test any(e -> get(e, "variable_names", nothing) == ["T", "S"]
+ && get(e, "start_date", nothing) == DateTime(2020, 1, 1)
+ && get(e, "end_date", nothing) == DateTime(2020, 12, 31),
+ entries)
+
+ col = Column(45.0, 30.0; z=(-400.0, 0.0), interpolation=Nearest())
+ md_col = Metadata(:temperature, FakeDataset(), DateTime(2020, 1, 1), col, "/tmp", "t.nc")
+ io2 = IOBuffer()
+ write_manifest(io2, AbstractMetadata[md_col])
+ parsed2 = TOML.parse(String(take!(io2)))
+ col_entry = parsed2["FakeDataset"][1]
+ @test col_entry["region"]["kind"] == "Column"
+ @test col_entry["region"]["longitude"] == 45.0
+ @test col_entry["region"]["latitude"] == 30.0
+ @test col_entry["region"]["interpolation"] == "Nearest"
+end
+
+@testset "read_manifest round-trip" begin
+ register_dataset!(FakeDataset, "FakeDataset")
+
+ md_one = Metadatum(:temperature; dataset=FakeDataset(), date=DateTime(2020, 6, 1))
+ md_range = Metadata(:salinity; dataset=FakeDataset(),
+ start_date=DateTime(2020, 3, 1), end_date=DateTime(2020, 8, 1))
+ region = BoundingBox(longitude=(200.0, 220.0), latitude=(35.0, 55.0))
+ md_region = Metadatum(:temperature; dataset=FakeDataset(), date=DateTime(2020, 6, 1), region=region)
+ mset = MetadataSet(:T, :S; dataset=FakeDataset(),
+ start_date=DateTime(2020, 3, 1), end_date=DateTime(2020, 8, 1))
+
+ mktempdir() do dir
+ path = manifest_path_in(dir)
+ write_manifest(path, AbstractMetadata[md_one, md_range, md_region, mset])
+ records = read_manifest(; dir)
+ @test length(records) == 4
+
+ rt = first(r for r in records if r isa Metadatum && r.name == :temperature && r.region === nothing)
+ @test rt.dataset isa FakeDataset
+ @test rt.dates == DateTime(2020, 6, 1)
+
+ rrange = first(r for r in records if r isa Metadata && !(r isa Metadatum) && r.name == :salinity)
+ @test rrange.dates == [DateTime(2020, m, 1) for m in 3:8]
+
+ rregion = first(r for r in records if r isa Metadatum && r.region !== nothing)
+ @test rregion.region isa BoundingBox
+ @test rregion.region.longitude == (200.0, 220.0)
+ @test rregion.region.latitude == (35.0, 55.0)
+
+ rset = first(r for r in records if r isa MetadataSet)
+ @test rset.names == (:T, :S)
+ @test rset.dates == [DateTime(2020, m, 1) for m in 3:8]
+ end
+end
+
+@testset "download_datasets varargs and manifest dir" begin
+ register_dataset!(FakeDataset, "FakeDataset")
+ saved = DataModes.DATA_MODE[]
+ REAL_DOWNLOAD_CALLS = Ref(0)
+ Downloads.download(::Metadata{<:FakeDataset}) = (REAL_DOWNLOAD_CALLS[] += 1; nothing)
+ try
+ DataModes.DATA_MODE[] = :auto
+ m1 = Metadatum(:temperature; dataset=FakeDataset(), date=DateTime(2020, 6, 1))
+ m2 = Metadatum(:salinity; dataset=FakeDataset(), date=DateTime(2020, 7, 1))
+ REAL_DOWNLOAD_CALLS[] = 0
+ download_datasets(m1, m2)
+ @test REAL_DOWNLOAD_CALLS[] == 2
+
+ mktempdir() do dir
+ write_manifest(manifest_path_in(dir), AbstractMetadata[m1, m2])
+ REAL_DOWNLOAD_CALLS[] = 0
+ download_datasets(; dir)
+ @test REAL_DOWNLOAD_CALLS[] == 2
+ end
+ finally
+ DataModes.DATA_MODE[] = saved
+ end
+end
+
+@testset "observe_metadata hook fires inside library-style functions" begin
+ register_dataset!(FakeDataset, "FakeDataset")
+ saved = DataModes.DATA_MODE[]
+ try
+ DataModes.DATA_MODE[] = :pregenerate
+ empty!(DataModes.RECORDED)
+
+ library_constructor() = (Metadatum(:temperature; dataset=FakeDataset(), date=DateTime(2020, 6, 1)),
+ Metadatum(:salinity; dataset=FakeDataset(), date=DateTime(2020, 6, 1)))
+ library_constructor()
+
+ @test length(DataModes.RECORDED) == 2
+ names = sort([String(r.name) for r in DataModes.RECORDED])
+ @test names == ["salinity", "temperature"]
+ finally
+ DataModes.DATA_MODE[] = saved
+ empty!(DataModes.RECORDED)
+ end
+end
+
+@testset "DryRunValue minimal stub" begin
+ v = DryRunValue()
+ @test v.anything isa DryRunValue
+ @test v.something_else isa DryRunValue
+ @test sprint(show, v) == "DryRunValue()"
+end
+
+@testset "pregenerate_dataset_manifest append (overwrite_existing=false)" begin
+ mktempdir() do dir
+ manifest = manifest_path_in(dir)
+
+ script_one = joinpath(dir, "one.jl")
+ write(script_one, """
+ using NumericalEarth
+ using NumericalEarth.DataWrangling: Metadatum, download_dataset
+ using NumericalEarth.DataWrangling.DataModes: register_dataset!
+ using Dates: DateTime
+
+ struct AppendDataset end
+ NumericalEarth.DataWrangling.metadata_filename(::AppendDataset, name, date, region) = string(name, ".nc")
+ NumericalEarth.DataWrangling.default_download_directory(::AppendDataset) = "/tmp"
+ register_dataset!(AppendDataset, "AppendDataset")
+
+ download_dataset(Metadatum(:temperature; dataset=AppendDataset(), date=DateTime(2020, 1, 1)))
+ """)
+ pregenerate_dataset_manifest(script_one; dir)
+ parsed_after_one = TOML.parsefile(manifest)
+ @test length(parsed_after_one["AppendDataset"]) == 1
+
+ script_two = joinpath(dir, "two.jl")
+ write(script_two, """
+ using NumericalEarth
+ using NumericalEarth.DataWrangling: Metadatum, download_dataset
+ using NumericalEarth.DataWrangling.DataModes: register_dataset!
+ using Dates: DateTime
+
+ struct AppendDataset end
+ NumericalEarth.DataWrangling.metadata_filename(::AppendDataset, name, date, region) = string(name, ".nc")
+ NumericalEarth.DataWrangling.default_download_directory(::AppendDataset) = "/tmp"
+ register_dataset!(AppendDataset, "AppendDataset")
+
+ download_dataset(Metadatum(:salinity; dataset=AppendDataset(), date=DateTime(2020, 1, 1)))
+ """)
+
+ pregenerate_dataset_manifest(script_two; dir, overwrite_existing = false)
+ parsed_after_two = TOML.parsefile(manifest)
+ @test length(parsed_after_two["AppendDataset"]) == 2
+ @test sort([e["variable_name"] for e in parsed_after_two["AppendDataset"]]) == ["salinity", "temperature"]
+
+ pregenerate_dataset_manifest(script_two; dir, overwrite_existing = false)
+ parsed_after_two_repeat = TOML.parsefile(manifest)
+ @test length(parsed_after_two_repeat["AppendDataset"]) == 2
+
+ pregenerate_dataset_manifest(script_two; dir, overwrite_existing = true)
+ parsed_after_overwrite = TOML.parsefile(manifest)
+ @test length(parsed_after_overwrite["AppendDataset"]) == 1
+ @test parsed_after_overwrite["AppendDataset"][1]["variable_name"] == "salinity"
+ end
+end
+
+@testset "pregenerate_dataset_manifest end-to-end" begin
+ mktempdir() do dir
+ script = joinpath(dir, "demo.jl")
+ write(script, """
+ using NumericalEarth
+ using NumericalEarth.DataWrangling: Metadatum, download_dataset
+ using NumericalEarth.DataWrangling.DataModes: register_dataset!
+ using Dates: DateTime
+
+ struct DemoDataset end
+ NumericalEarth.DataWrangling.metadata_filename(::DemoDataset, name, date, region) = string(name, ".nc")
+ NumericalEarth.DataWrangling.default_download_directory(::DemoDataset) = "/tmp"
+ register_dataset!(DemoDataset, "DemoDataset")
+
+ bad = something_undefined()
+
+ function helper()
+ x = bad.field
+ y = download_dataset(Metadatum(:T; dataset=DemoDataset(), date=DateTime(2020, 1, 1)))
+ z = download_dataset(Metadatum(:S; dataset=DemoDataset(), date=DateTime(2020, 1, 1)))
+ return z
+ end
+
+ helper()
+ """)
+ pregenerate_dataset_manifest(script; dir)
+ parsed = TOML.parsefile(manifest_path_in(dir))
+ @test length(get(parsed, "DemoDataset", [])) == 2
+ @test sort([e["variable_name"] for e in parsed["DemoDataset"]]) == ["S", "T"]
+ end
+end
+
+@testset "download_dataset chokepoint" begin
+ md = MockMetadatum(:t)
+ saved_mode = DataModes.DATA_MODE[]
+ try
+ DataModes.DATA_MODE[] = :auto
+ MOCK_DOWNLOAD_CALLS[] = 0
+ download_dataset(md)
+ @test MOCK_DOWNLOAD_CALLS[] == 1
+
+ DataModes.DATA_MODE[] = :pregenerate
+ MOCK_DOWNLOAD_CALLS[] = 0
+ download_dataset(md)
+ @test MOCK_DOWNLOAD_CALLS[] == 0
+
+ DataModes.DATA_MODE[] = :strict
+ @test_throws Exception download_dataset(md)
+ finally
+ DataModes.DATA_MODE[] = saved_mode
+ empty!(DataModes.RECORDED)
+ end
+end
diff --git a/test/test_diagnostics_1.jl b/test/test_diagnostics_1.jl
index 8e05ada0..42933d57 100644
--- a/test/test_diagnostics_1.jl
+++ b/test/test_diagnostics_1.jl
@@ -23,8 +23,8 @@ for arch in test_architectures, dataset in (ECCO4Monthly(),)
grid = ImmersedBoundaryGrid(grid, GridFittedBottom(bottom_height))
- start = DateTimeProlepticGregorian(1993, 1, 1)
- stop = DateTimeProlepticGregorian(1993, 2, 1)
+ start = DateTime(1993, 1, 1)
+ stop = DateTime(1993, 2, 1)
dates = range(start; stop, step=Month(1))
Tmeta = Metadata(:temperature; dataset, dates)
diff --git a/test/test_distributed_utils.jl b/test/test_distributed_utils.jl
index 1f29e49a..eca34746 100644
--- a/test/test_distributed_utils.jl
+++ b/test/test_distributed_utils.jl
@@ -3,7 +3,6 @@ include("runtests_setup.jl")
using MPI
MPI.Init()
-using CFTime
using Dates
using NCDatasets
using NumericalEarth.DataWrangling: metadata_path
@@ -57,9 +56,9 @@ latitude_interfaces(::TrivalBathymetry) = (0, 50)
metadata_filename(::TrivalBathymetry, name, date, region) = "trivial_bathymetry.nc"
@testset "Distributed ECCO download" begin
- dates = DateTimeProlepticGregorian(1992, 1, 1) : Month(1) : DateTimeProlepticGregorian(1994, 4, 1)
+ dates = DateTime(1992, 1, 1) : Month(1) : DateTime(1994, 4, 1)
metadata = Metadata(:u_velocity; dataset=ECCO4Monthly(), dates)
- download(metadata)
+ download_dataset(metadata)
@root for metadatum in metadata
@test isfile(metadata_path(metadatum))
diff --git a/test/test_ecco2_daily.jl b/test/test_ecco2_daily.jl
index 587ade4c..c0eed3f6 100644
--- a/test/test_ecco2_daily.jl
+++ b/test/test_ecco2_daily.jl
@@ -36,7 +36,7 @@ for arch in test_architectures
# if the primary source is unreachable
filepaths = [metadata_path(datum) for datum in metadata]
download_dataset_with_fallback(filepaths; dataset_name="$D $name") do
- download(metadata)
+ download_dataset(metadata)
end
for datum in metadata
@test isfile(metadata_path(datum))
diff --git a/test/test_ecco2_monthly.jl b/test/test_ecco2_monthly.jl
index c36cf456..8a88bd48 100644
--- a/test/test_ecco2_monthly.jl
+++ b/test/test_ecco2_monthly.jl
@@ -42,7 +42,7 @@ for arch in test_architectures, dataset in test_ecco_datasets
# if the primary source is unreachable
filepaths = [metadata_path(datum) for datum in metadata]
download_dataset_with_fallback(filepaths; dataset_name="$D $name") do
- download(metadata)
+ download_dataset(metadata)
end
for datum in metadata
@test isfile(metadata_path(datum))
diff --git a/test/test_ecco4_en4.jl b/test/test_ecco4_en4.jl
index 6d209312..0a008613 100644
--- a/test/test_ecco4_en4.jl
+++ b/test/test_ecco4_en4.jl
@@ -39,7 +39,7 @@ for arch in test_architectures, dataset in test_ecco_en4_datasets
# if the primary source is unreachable
filepaths = [metadata_path(datum) for datum in metadata]
download_dataset_with_fallback(filepaths; dataset_name="$D $name") do
- download(metadata)
+ download_dataset(metadata)
end
for datum in metadata
@test isfile(metadata_path(datum))
diff --git a/test/test_ecco_atmosphere.jl b/test/test_ecco_atmosphere.jl
index 01293380..0009b679 100644
--- a/test/test_ecco_atmosphere.jl
+++ b/test/test_ecco_atmosphere.jl
@@ -15,7 +15,7 @@ let dates = DateTime(1992, 1, 1):Month(1):DateTime(1992, 3, 1)
for name in NumericalEarth.ECCO.ECCO_atmosphere_variables
md = Metadata(name; dataset=ECCO4Monthly(), dates)
download_dataset_with_fallback(metadata_path(md); dataset_name="ECCO4Monthly $name") do
- download(md)
+ download_dataset(md)
end
end
end
diff --git a/test/test_glorys_downloading.jl b/test/test_glorys_downloading.jl
index 6dcc23b3..a09995d9 100644
--- a/test/test_glorys_downloading.jl
+++ b/test/test_glorys_downloading.jl
@@ -19,7 +19,7 @@ using Oceananigans.Fields: location
metadatum = Metadatum(variable; dataset, region)
filepath = NumericalEarth.DataWrangling.metadata_path(metadatum)
isfile(filepath) && rm(filepath; force=true)
- download(metadatum)
+ download_dataset(metadatum)
@test isfile(filepath)
end
end
diff --git a/test/test_jra55_ecco_en4_etopo_downloading.jl b/test/test_jra55_ecco_en4_etopo_downloading.jl
index 5ea6ed3f..ee6760da 100644
--- a/test/test_jra55_ecco_en4_etopo_downloading.jl
+++ b/test/test_jra55_ecco_en4_etopo_downloading.jl
@@ -30,12 +30,12 @@ end
error("what am I supposed to download?")
for variable in variables
- metadata = Metadata(variable; dates=DateTimeProlepticGregorian(1993, 1, 1), dataset)
+ metadata = Metadata(variable; dates=DateTime(1993, 1, 1), dataset)
filepath = metadata_path(metadata)
isfile(filepath) && rm(filepath; force=true)
download_dataset_with_fallback(filepath; dataset_name="$(typeof(dataset)) $variable") do
- download(metadata)
+ download_dataset(metadata)
end
@test isfile(filepath)
rm(filepath; force=true)
@@ -50,7 +50,7 @@ end
isfile(filepath) && rm(filepath; force=true)
download_dataset_with_fallback(filepath; dataset_name="ETOPO2022") do
- download(metadata)
+ download_dataset(metadata)
end
@test isfile(filepath)
end
diff --git a/test/test_mangling.jl b/test/test_mangling.jl
index 540869c1..22a32c2f 100644
--- a/test/test_mangling.jl
+++ b/test/test_mangling.jl
@@ -34,7 +34,7 @@ end
@testset "ECCO v_velocity Field uses ShiftSouth mangling end-to-end" begin
md = Metadatum(:v_velocity; dataset=ECCO4Monthly(), date=start_date)
download_dataset_with_fallback([metadata_path(md)]; dataset_name="ECCO4Monthly v_velocity") do
- download(md)
+ download_dataset(md)
end
for arch in test_architectures
field = Field(md, arch)
diff --git a/test/test_ocean_sea_ice_model.jl b/test/test_ocean_sea_ice_model.jl
index 20d60e32..555888f8 100644
--- a/test/test_ocean_sea_ice_model.jl
+++ b/test/test_ocean_sea_ice_model.jl
@@ -33,9 +33,9 @@ using ClimaSeaIce.Rheologies
for dataset in [ECCO4Monthly(), EN4Monthly()]
@info "Testing timestepping with $(typeof(dataset)) on $A"
- start_date = DateTimeProlepticGregorian(1993, 1, 1)
+ start_date = DateTime(1993, 1, 1)
time_resolution = dataset isa ECCO2Daily ? Day(1) : Month(1)
- end_date = DateTimeProlepticGregorian(1993, 2, 1)
+ end_date = DateTime(1993, 2, 1)
dates = start_date : time_resolution : end_date
initial_state = MetadataSet(:temperature, :salinity;
diff --git a/test/test_orca_grid.jl b/test/test_orca_grid.jl
index b3d73552..60e0bf7b 100644
--- a/test/test_orca_grid.jl
+++ b/test/test_orca_grid.jl
@@ -16,7 +16,7 @@ using Test
for name in (:mesh_mask, :bottom_height)
md = Metadatum(name; dataset=ORCA1())
download_dataset_with_fallback(metadata_path(md); dataset_name="ORCA1 $name") do
- download(md)
+ download_dataset(md)
end
end
@@ -165,7 +165,7 @@ end
@testset "ORCA1 bathymetry retrieval" begin
bathy_md = Metadatum(:bottom_height; dataset=ORCA1())
- download(bathy_md)
+ download_dataset(bathy_md)
path = metadata_path(bathy_md)
@test isfile(path)
diff --git a/test/test_woa.jl b/test/test_woa.jl
index 7f9293ac..b3b6d607 100644
--- a/test/test_woa.jl
+++ b/test/test_woa.jl
@@ -16,7 +16,7 @@ inpainting = NearestNeighborInpainting(10)
function ensure_woa_file(metadatum; label)
filepath = metadata_path(metadatum)
download_dataset_with_fallback(filepath; dataset_name=label) do
- download(metadatum)
+ download_dataset(metadatum)
end
return filepath
end