Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ SeawaterPolynomials = "d496a93d-167e-4197-9f49-d3af4ff8fe40"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Thermodynamics = "b60c26fb-14c3-4610-9d3e-2d17fe7ff00c"
TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"

[weakdeps]
Expand Down Expand Up @@ -90,6 +91,7 @@ SpeedyWeather = "0.20"
StaticArrays = "1"
Statistics = "<0.0.1, 1"
Thermodynamics = "0.15.3"
TOML = "<0.0.1, 1"
WorldOceanAtlasTools = "0.6"
ZipFile = "0.10"
julia = "1.10"
2 changes: 1 addition & 1 deletion src/Bathymetry/Bathymetry.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ using Printf: Printf
using Scratch: Scratch, @get_scratch!

using ..DataWrangling: Metadatum, native_grid, metadata_path,
dataset_variable_name, validate_dataset_coverage
dataset_variable_name, validate_dataset_coverage, download_dataset
using ..DataWrangling.ETOPO: ETOPO2022

include("regrid_bathymetry.jl")
Expand Down
4 changes: 2 additions & 2 deletions src/Bathymetry/regrid_bathymetry.jl
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ function regrid_bathymetry(target_grid, metadata;
end
end

download(metadata)
download_dataset(metadata)

target_z = _regrid_bathymetry(target_grid, metadata;
height_above_water,
Expand Down Expand Up @@ -302,7 +302,7 @@ function regrid_bathymetry(target_grid::DistributedGrid, metadata;
interpolation_passes, major_basins)

# download uses @root internally; all ranks must call it
download(metadata)
download_dataset(metadata)

# Only rank 0 performs cache lookup and computation to avoid OOM
bottom_height = if arch.local_rank == 0
Expand Down
91 changes: 91 additions & 0 deletions src/DataWrangling/DataModes/DataModes.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""
DataModes

Three-mode download dispatch and a declarative `NumericalEarthDataManifest.toml` for NumericalEarth.
Modes are selected by the `NUMERICALEARTH_DATA` environment variable:

| Value | Behavior |
|-------------------|----------------------------------------------------------------------------|
| `"auto"` (default)| Download on demand (current behavior). |
| `"strict"` | Error if any required file is missing. Never download. |
| `"pregenerate"` | Trace the running script; write the manifest to `pwd()`. |
| `"pregenerate:<dir>"` | Same as `"pregenerate"` but write to `<dir>/NumericalEarthDataManifest.toml`. |

The filename is fixed (`NumericalEarthDataManifest.toml`) so manifests don't collide with Pkg's
`Project.toml` / `Manifest.toml` and there is one canonical manifest per directory.

See [`NumericalEarth.DataWrangling.download_dataset`](@ref) for the dispatch and
[`pregenerate_dataset_manifest`](@ref) for the trace entry point.
"""
module DataModes

using DocStringExtensions: TYPEDSIGNATURES
using TOML: TOML

using ..DataWrangling: DataWrangling, AbstractMetadata, Metadata, Metadatum, MetadataSet, BoundingBox, Column, Linear, Nearest
using ..DataWrangling: DatewiseFilename, metadata_path, default_download_directory, download_dataset

export DryRunValue
export pregenerate_dataset_manifest, download_datasets
export register_dataset!

const DATA_MODE = Ref{Symbol}(:auto)

include("dry_run_value.jl")
include("data_manifest_wrangling.jl")
include("parse_and_rewrite_script.jl")

DataWrangling.observe_metadata(m::Metadata) = (DATA_MODE[] === :pregenerate && record_for_manifest(m); nothing)
DataWrangling.observe_metadata(m::MetadataSet) = (DATA_MODE[] === :pregenerate && record_for_manifest(m); nothing)

"""
$(TYPEDSIGNATURES)

Acquire every dataset listed in `metadata...` (varargs form) or in the manifest at
`joinpath(dir, "NumericalEarthDataManifest.toml")` (zero-arg form). Each entry is routed through
[`download_dataset`](@ref), so the current `NUMERICALEARTH_DATA` mode applies.

For the manifest form, `dir` is the directory containing the manifest (defaults to `pwd()`). Pass
`download_dir` to override the default download directory for every reconstructed entry (e.g. when
login-node and compute-node filesystems differ).
"""
function download_datasets(metadata::AbstractMetadata...)
foreach(download_dataset, metadata)
return nothing
end

function download_datasets(; dir::AbstractString = pwd(), download_dir = nothing)
foreach(download_dataset, read_manifest(; dir, download_dir))
return nothing
end

function expected_paths(m::AbstractMetadata)
m isa MetadataSet &&
return reduce(vcat, expected_paths(m[n]) for n in m.names; init = String[])
p = metadata_path(m)
return p isa AbstractVector ? collect(String, p) : String[p]
end

"""
$(TYPEDSIGNATURES)

Verify that every file required by `metadata` is already on disk. Raises a single error listing
every missing file. Returns `nothing` on success.
"""
function check_files_exist(metadata::AbstractMetadata)
paths = expected_paths(metadata)
missing_paths = filter(p -> !isfile(p), paths)
isempty(missing_paths) && return nothing
list = join((" " * p for p in missing_paths), "\n")
error("NUMERICALEARTH_DATA=strict: $(length(missing_paths)) required file(s) missing:\n$list")
end

function __init__()
env = get(ENV, "NUMERICALEARTH_DATA", "auto")
mode, dir_from_env = parse_data_mode(env)
DATA_MODE[] = mode
MANIFEST_DIR[] = isempty(dir_from_env) ? pwd() : abspath(dir_from_env)
return nothing
end

end # module
Loading
Loading