diff --git a/amocatlas/data_sources/__init__.py b/amocatlas/data_sources/__init__.py index ad8eb01..a63d1f9 100644 --- a/amocatlas/data_sources/__init__.py +++ b/amocatlas/data_sources/__init__.py @@ -27,6 +27,7 @@ from .zheng2024 import read_zheng2024 from .wh41n import read_41n from .noac47n import read_47n +from .nac import read_nac __all__ = [ "read_rapid", @@ -43,4 +44,5 @@ "read_zheng2024", "read_41n", "read_47n", + "read_nac", ] diff --git a/amocatlas/data_sources/nac.py b/amocatlas/data_sources/nac.py new file mode 100644 index 0000000..bd561be --- /dev/null +++ b/amocatlas/data_sources/nac.py @@ -0,0 +1,188 @@ +"""North Atlantic Current (NAC) data reader for AMOCatlas. + +This module provides functions to read and process the North Atlantic current time series from satellite and float observations. +The NAC is a key component of the Atlantic Meridional Overturning Circulation, transporting warm, saline water from the tropics to the high northern latitudes. + +The dataset includes NAC transport estimates from satellite and float observations and an NAC estimation from satellite altimetry alone. + +Key functions: +- read_nac(): Main data loading interface for North Atlantic Current data + +Data source: Satellite and float observations +Location: Between tip of Greenland and northern Spain +""" + +from pathlib import Path +from typing import Union + +import xarray as xr + +# Import the modules used +from amocatlas import logger, utilities +from amocatlas.logger import log_error, log_info, log_warning +from amocatlas.utilities import apply_defaults +from amocatlas.reader_utils import ReaderUtils + +log = logger.log # Use the global logger + +# Datasource identifier for automatic standardization +DATASOURCE_ID = "nac" + +# Default list of NAC data files +NAC_DEFAULT_FILES = ["_2_1.nc"] +NAC_TRANSPORT_FILES = ["_2_1.nc"] +NAC_DEFAULT_SOURCE = "https://library.ucsd.edu/dc/object/bb6635909m" + +NAC_METADATA = { + "project": "North Atlantic Current Time Series from Satellite and Float Observations (1993-2025)", + "weblink": "https://library.ucsd.edu/dc/object/bb6635909m", + "comment": "Dataset accessed and processed via http://github.com/AMOCcommunity/amocatlas", +} + +NAC_FILE_METADATA = { + "_2_1.nc": { + "data_product": "6-monthly estimates of NAC transport from satellite altimetry and float observations", + }, +} + + +@apply_defaults(NAC_DEFAULT_SOURCE, NAC_DEFAULT_FILES) +def read_nac( + source: Union[str, Path, None], + file_list: Union[str, list[str]], + transport_only: bool = True, + data_dir: Union[str, Path, None] = None, + redownload: bool = False, + track_added_attrs: bool = False, +) -> list[xr.Dataset]: + """Load the NAC (North Atlantic Current) transport datasets from a URL or local file path into xarray Datasets. + + Parameters + ---------- + source : str, optional + Local path to the data directory (remote source is handled per-file). + + file_list : str or list of str, optional + Filename or list of filenames to process. + Defaults to NAC_DEFAULT_FILES. + + transport_only : bool, optional + If True, restrict to transport files only. + + data_dir : str, Path or None, optional + Optional local data directory. + + redownload : bool, optional + If True, force redownload of the data. + track_added_attrs : bool, optional + If True, track which attributes were added during metadata enrichment. + + Returns + ------- + list of xr.Dataset + List of loaded xarray datasets with basic inline and file-specific metadata. + And if track_added_attrs is True, also returns a list of dictionaries with the attributes that were added to each dataset during metadata enrichment. + + Raises + ------ + ValueError + If no source is provided for a file and no default URL mapping is found. + + FileNotFoundError + If the file cannot be downloaded or does not exist locally. + + """ + log.info("Starting to read NAC dataset") + + # Load YAML metadata with fallback + global_metadata, yaml_file_metadata = ReaderUtils.load_array_metadata_with_fallback( + DATASOURCE_ID, NAC_METADATA + ) + + # Ensure file_list has a default + if file_list is None: + file_list = NAC_DEFAULT_FILES + if transport_only: + file_list = NAC_TRANSPORT_FILES + if isinstance(file_list, str): + file_list = [file_list] + # Determine the local storage path + local_data_dir = Path(data_dir) if data_dir else utilities.get_default_data_dir() + local_data_dir.mkdir(parents=True, exist_ok=True) + + # Print information about files being loaded + ReaderUtils.print_loading_info(file_list, DATASOURCE_ID, NAC_FILE_METADATA) + + datasets = [] + + added_attrs_per_dataset = [] if track_added_attrs else None + for file in file_list: + if not (file.lower().endswith(".nc")): + log_warning("Skipping unsupported file type : %s", file) + continue + + download_url = ( + f"{source.rstrip('/')}/{file}" if utilities.is_valid_url(source) else None + ) + + file_path = utilities.resolve_file_path( + file_name=file, + source=source, + download_url=download_url, + local_data_dir=local_data_dir, + redownload=redownload, + ) + + # Open dataset + + if file.lower().endswith(".nc"): + # Use ReaderUtils for consistent dataset loading + + ds = ReaderUtils.safe_load_dataset(file_path) + # Attach metadata + # Attach metadata with optional tracking + + if track_added_attrs: + + ds, attr_changes = ReaderUtils.attach_metadata_with_tracking( + ds, + file, + file_path, + global_metadata, + yaml_file_metadata, + NAC_FILE_METADATA, + DATASOURCE_ID, + track_added_attrs=True, + ) + + added_attrs_per_dataset.append(attr_changes) + + else: + + ds = ReaderUtils.attach_metadata_with_tracking( + ds, + file, + file_path, + global_metadata, + yaml_file_metadata, + NAC_FILE_METADATA, + DATASOURCE_ID, + track_added_attrs=False, + ) + else: + raise ValueError( + f"Unsupported file type for {file}. Only .nc files are supported." + ) + + datasets.append(ds) + + if not datasets: + log_error("No valid NAC files in %s", file_list) + raise FileNotFoundError(f"No valid data files found in {file_list}") + + log_info("Successfully loaded %d NAC dataset(s)", len(datasets)) + + if track_added_attrs: + return datasets, added_attrs_per_dataset + else: + return datasets diff --git a/amocatlas/metadata/nac.yml b/amocatlas/metadata/nac.yml new file mode 100644 index 0000000..290af35 --- /dev/null +++ b/amocatlas/metadata/nac.yml @@ -0,0 +1,39 @@ +metadata: + program: "NAC" + description: "North Atlantic Current Time Series from Satellite and Float Observations (1993-2025)" + project: "Lankhorst, Matthias (2025). North Atlantic Current Time Series from Satellite and Float Observations (1993-2025). " + weblink: https://library.ucsd.edu/dc/object/bb6635909m + comment: Dataset accessed and processed via http://github.com/AMOCcommunity/amocatlas + acknowledgment: > + Earlier versions of this dataset were created with support from the European Commission through awards EVK2-CT-2000-00087 and EVR1-CT-2001-40014 (projects 'GYROSCOPE' and 'ANIMATE'). Updated versions were partially supported through award NA15OAR4320071 from U.S. NOAA OOMD. + citation: > + Lankhorst, Matthias (2025). North Atlantic Current Time Series from Satellite and Float Observations (1993-2025). In North Atlantic Current Time Series from Satellite and Float Observations. UC San Diego Library Digital Collections. https://doi.org/10.6075/J0D79CCG + license: + featureType: timeSeries + time_coverage_start: '1993-01-01' + time_coverage_end: '2025-07-02' + +files: + _2_1.nc: + source_url: https://library.ucsd.edu/dc/object/bb6635909m/ + data_product: "6-monthly mean NAC transport time series (1993-2025) estimated from satellite and float observations" + variable_mapping: + "NAC": TRANS_NAC + "NAC_UNCERTAINTY": TRANS_NAC_UNCERTAINTY + "NAC_PROXY": TRANS_NAC_PROXY + original_variable_metadata: + NAC: + long_name: "NAC Transport" + description: "North Atlantic Current transport time series from satellite and float observations" + units: Sverdrup + standard_name: ocean_volume_transport_across_line + NAC_UNCERTAINTY: + long_name: "Uncertainty of values in NAC variable" + description: "Uncertainty of North Atlantic Current transport time series" + units: Sverdrup + standard_name: ocean_volume_transport_across_line_uncertainty + NAC_PROXY: + long_name: "NAC Transport Proxy" + description: "Proxy for North Atlantic Current transport time series from satellite altimetry" + units: Sverdrup + standard_name: ocean_volume_transport_across_line \ No newline at end of file diff --git a/amocatlas/plotters.py b/amocatlas/plotters.py index 8d1f7d2..f56e66e 100644 --- a/amocatlas/plotters.py +++ b/amocatlas/plotters.py @@ -585,14 +585,14 @@ def plot_amoc_timeseries( # Raw plot if plot_raw: - # Use black if no monthly resampling, grey otherwise - raw_color = "black" if not resample_monthly else "grey" + # Use grey if monthly resampling is enabled, otherwise the specified color + raw_color = "grey" if resample_monthly else color ax.plot( da[time_key], da, color=raw_color, - alpha=0.7 if not resample_monthly else 0.5, - linewidth=0.5, + alpha=0.85 if not resample_monthly else 0.5, + linewidth=1 if not resample_monthly else 0.5, label=f"{label} (raw)", ) diff --git a/amocatlas/read.py b/amocatlas/read.py index a8c174c..bc31264 100644 --- a/amocatlas/read.py +++ b/amocatlas/read.py @@ -50,6 +50,7 @@ read_47n, read_fbc, read_arcticgateway, + read_nac, ) # Import file constants for list_files() functionality @@ -66,6 +67,7 @@ from .data_sources.noac47n import NOAC47N_DEFAULT_FILES from .data_sources.fbc import FBC_DEFAULT_FILES from .data_sources.arcticgateway import ARCTIC_DEFAULT_FILES +from .data_sources.nac import NAC_DEFAULT_FILES # Import standardization functions from . import standardise @@ -85,6 +87,7 @@ "fbc", "calafat2025", "zheng2024", + "nac", } @@ -428,6 +431,9 @@ def list_files() -> List[str]: zheng2024 = _create_array_function( read_zheng2024, "Zheng et al. 2024", available_files=ZHENG2024_DEFAULT_FILES ) +nac = _create_array_function( + read_nac, "North Atlantic Current", available_files=NAC_DEFAULT_FILES +) # Define __all__ to control what's exported @@ -445,4 +451,5 @@ def list_files() -> List[str]: "fbc", "calafat2025", "zheng2024", + "nac" ] diff --git a/amocatlas/readers.py b/amocatlas/readers.py index e6c0c1b..a81e627 100644 --- a/amocatlas/readers.py +++ b/amocatlas/readers.py @@ -41,6 +41,7 @@ read_47n, read_fbc, read_arcticgateway, + read_nac, ) log = logger.log @@ -83,6 +84,7 @@ def _get_reader(array_name: str) -> Callable[..., List[xr.Dataset]]: "47n": read_47n, "fbc": read_fbc, "arcticgateway": read_arcticgateway, + "nac": read_nac, } try: return readers[array_name.lower()] @@ -175,6 +177,7 @@ def load_dataset( - '47n' : 47N array - 'fbc' : Faroe Bank Channel overflow array - 'arcticgateway' : ARCTIC Gateway array + - 'nac' : North Atlantic Current array source : str, optional URL or local path to the data source. If None, the reader-specific default source will be used. diff --git a/amocatlas/standardise.py b/amocatlas/standardise.py index 51b5a33..943d603 100644 --- a/amocatlas/standardise.py +++ b/amocatlas/standardise.py @@ -1168,6 +1168,18 @@ def standardise_arcticgateway(ds: xr.Dataset, file_name: str) -> xr.Dataset: return standardise_array(ds, file_name) +def standardise_nac(ds: xr.Dataset, file_name: str) -> xr.Dataset: + """Standardise NAC array dataset to consistent format.""" + warnings.warn( + "standardise_nac() is deprecated and will be removed in a future version. " + "Use standardise_data() instead.", + DeprecationWarning, + stacklevel=2, + ) + + return standardise_array(ds, file_name) + + def standardise_data(ds: xr.Dataset, file_name: str) -> xr.Dataset: """Standardise a dataset using YAML-based metadata. diff --git a/amocatlas/utilities.py b/amocatlas/utilities.py index 3da8178..51826ab 100644 --- a/amocatlas/utilities.py +++ b/amocatlas/utilities.py @@ -20,6 +20,7 @@ import pandas as pd import requests import xarray as xr +import numpy as np from amocatlas import logger from amocatlas.logger import log_debug @@ -879,6 +880,9 @@ def mask_invalid_values(ds: xr.Dataset) -> xr.Dataset: # Use xarray operations to preserve lazy evaluation invalid_mask = xr.zeros_like(var, dtype=bool) + if np.issubdtype(var.dtype, np.datetime64): + continue + if valid_min is not None: invalid_mask = invalid_mask | (var < valid_min) diff --git a/data/_2_1.nc b/data/_2_1.nc new file mode 100644 index 0000000..4e8e01c Binary files /dev/null and b/data/_2_1.nc differ diff --git a/notebooks/demo.ipynb b/notebooks/demo.ipynb index fe59b88..cd1d4d9 100644 --- a/notebooks/demo.ipynb +++ b/notebooks/demo.ipynb @@ -520,6 +520,49 @@ " )" ] }, + { + "cell_type": "markdown", + "id": "56145b37", + "metadata": {}, + "source": [ + "### Load NAC timeseries\n", + "\n", + "Available for plotting is a transport estimate by satellite and float observations (TRANS_NAC) and a proxy only based on satellite altimetry (TRANS_NAC_PROXY). Both are shown here.\n", + "\n", + "Dataset is already in 6-monthly resolution, which is why the raw data is shown in the plot. If you type `resample_monthly = False` then the raw data is shown in your desired color. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6a79ce1", + "metadata": {}, + "outputs": [], + "source": [ + "# Load dataset\n", + "standardNAC = read.nac()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f5125a3", + "metadata": {}, + "outputs": [], + "source": [ + "fig, ax = plotters.plot_amoc_timeseries(\n", + " data=[standardNAC,standardNAC],\n", + " varnames=[\"TRANS_NAC\", \"TRANS_NAC_PROXY\"],\n", + " labels=[\"Transport\", \"Proxy\"],\n", + " colors=[\"red\",\"darkblue\"],\n", + " resample_monthly=False,\n", + " plot_raw=True,\n", + " title=\"NAC - North Atlantic Current\",\n", + " ylim=(15, 35),\n", + ")\n", + "ax.legend(loc=\"lower right\") # Plot NAC transport and proxy timeseries" + ] + }, { "cell_type": "markdown", "id": "f1124d71", diff --git a/tests/test_nac.py b/tests/test_nac.py new file mode 100644 index 0000000..fe7cfec --- /dev/null +++ b/tests/test_nac.py @@ -0,0 +1,102 @@ +"""Tests for NAC (North Atlantic Current) data reader. + +Simple, targeted tests focusing on module structure and constants. +Tests the basic functionality without complex data loading mocks. +""" + +from amocatlas.data_sources import nac +from amocatlas.logger import disable_logging + +# Keep tests quiet +disable_logging() + + +class TestNAC: + """Test basic NAC module functionality.""" + + def test_module_constants_defined(self): + """Test that all required module constants exist.""" + assert hasattr(nac, "DATASOURCE_ID") + assert hasattr(nac, "NAC_DEFAULT_FILES") + assert hasattr(nac, "NAC_TRANSPORT_FILES") + assert hasattr(nac, "NAC_DEFAULT_SOURCE") + assert hasattr(nac, "NAC_METADATA") + assert hasattr(nac, "NAC_FILE_METADATA") + + # Check values are sensible + assert nac.DATASOURCE_ID == "nac" + assert isinstance(nac.NAC_DEFAULT_FILES, list) + assert isinstance(nac.NAC_TRANSPORT_FILES, list) + assert isinstance(nac.NAC_METADATA, dict) + assert isinstance(nac.NAC_FILE_METADATA, dict) + + def test_default_files_configuration(self): + """Test default files configuration is reasonable.""" + files = nac.NAC_DEFAULT_FILES + assert len(files) > 0 + assert "_2_1.nc" in files + + transport_files = nac.NAC_TRANSPORT_FILES + assert len(transport_files) > 0 + assert "_2_1.nc" in transport_files + + # Files should be NetCDF format for NAC data + for file in files: + assert file.endswith(".nc") + + def test_function_exists_and_callable(self): + """Test that main function exists and is callable.""" + assert hasattr(nac, "read_nac") + assert callable(nac.read_nac) + + # Check function has documentation + func = nac.read_nac + assert func.__doc__ is not None + assert "North Atlantic Current" in func.__doc__ or "NAC" in func.__doc__ + + def test_module_docstring_informative(self): + """Test that module has informative docstring.""" + assert nac.__doc__ is not None + assert "North Atlantic Current" in nac.__doc__ + assert "NAC" in nac.__doc__ + + def test_module_imports_successfully(self): + """Test that the module imports without errors.""" + # This test passes if the module loaded successfully + assert nac is not None + + def test_nac_metadata_structure(self): + """Test that NAC metadata has expected structure.""" + metadata = nac.NAC_METADATA + + # Should have expected keys + assert "project" in metadata + assert "weblink" in metadata + assert "comment" in metadata + + # Values should be non-empty strings + assert isinstance(metadata["project"], str) + assert len(metadata["project"]) > 0 + assert isinstance(metadata["weblink"], str) + assert len(metadata["weblink"]) > 0 + + def test_nac_file_metadata_structure(self): + """Test that NAC file metadata has expected structure.""" + file_metadata = nac.NAC_FILE_METADATA + + # Should have metadata for the default _2_1.nc file + assert "_2_1.nc" in file_metadata + + # File metadata should have expected keys + file_meta = file_metadata["_2_1.nc"] + assert "data_product" in file_meta + assert isinstance(file_meta["data_product"], str) + assert len(file_meta["data_product"]) > 0 + + def test_default_source_url(self): + """Test that default source URL is properly configured.""" + source = nac.NAC_DEFAULT_SOURCE + assert isinstance(source, str) + assert len(source) > 0 + # Should be a valid URL-like string + assert "http" in source or "library" in source or "/" in source diff --git a/tests/test_utilities.py b/tests/test_utilities.py index 1959dbe..ff2b462 100644 --- a/tests/test_utilities.py +++ b/tests/test_utilities.py @@ -317,6 +317,11 @@ def test_mask_invalid_values(self): [35.0, -99.0, 36.0, 37.0, 100.0], # -99 and 100 are invalid {"valid_min": 30.0, "valid_max": 40.0}, ), + "datetime_marker": ( + ["time"], + pd.date_range("2020-01-01", periods=5), + {"valid_min": 0.0, "valid_max": 50.0}, + ), "no_limits": (["time"], [1, 2, 3, 4, 5]), # No valid_min/max }, coords={"time": pd.date_range("2020-01-01", periods=5)}, @@ -340,6 +345,11 @@ def test_mask_invalid_values(self): # Variable without limits should be unchanged assert result["no_limits"].equals(ds["no_limits"]) + # Datetime-valued data with numeric valid_min/valid_max should be left alone + assert result["datetime_marker"].equals(ds["datetime_marker"]) + assert result["datetime_marker"].attrs["valid_min"] == 0.0 + assert result["datetime_marker"].attrs["valid_max"] == 50.0 + # Attributes should be preserved assert result["temperature"].attrs["valid_min"] == 0.0 assert result["temperature"].attrs["valid_max"] == 50.0