diff --git a/docs/joss/ExampleFigure.png b/docs/joss/ExampleFigure.png new file mode 100644 index 0000000..82bce92 Binary files /dev/null and b/docs/joss/ExampleFigure.png differ diff --git a/docs/joss/HDP_Notebook_Example.png b/docs/joss/HDP_Notebook_Example.png deleted file mode 100644 index adcff17..0000000 Binary files a/docs/joss/HDP_Notebook_Example.png and /dev/null differ diff --git a/docs/joss/paper.md b/docs/joss/paper.md index d27d779..bab0967 100644 --- a/docs/joss/paper.md +++ b/docs/joss/paper.md @@ -63,9 +63,9 @@ The `HDP` allows the user to test a range of parameter values: for example, heat ## Diagnostic Notebooks and Figures -The automatic workflow compiles a "figure deck" containing diagnostic plots for multiple heatwave parameters and input variables. To simplify this process, figure decks are serialized and stored in a single Jupyter Notebook separated into descriptive sections. Basic descriptions are included in markdown cells at the top of each figure. The `HDPNotebook` class in `hdp.graphics.notebook` is utilized to facilitate the generation of these Notebooks internally, but can be called through the API as well to build custom notebooks. An example of a Notebook of the standard figure deck is shown in Figure \ref{fig:notebook}. +The automatic workflow compiles a "figure deck" containing diagnostic plots for multiple heatwave parameters and input variables. To simplify this process, figure decks are serialized and stored in a single Jupyter Notebook separated into descriptive sections. Basic descriptions are included in markdown cells at the top of each figure. The `HDPNotebook` class in `hdp.graphics.notebook` is utilized to facilitate the generation of these Notebooks internally, but can be called through the API as well to build custom notebooks. An example figure of HWF from the sample figure deck is shown in Figure \ref{fig:notebook}. -![Example of an HDP standard figure deck \label{fig:notebook}](HDP_Notebook_Example.png) +![Example of an HDP standard figure deck \label{fig:notebook}](ExampleFigure.png) # Ongoing Work diff --git a/docs/joss/paper.pdf b/docs/joss/paper.pdf index ea9cb53..a0389ae 100644 Binary files a/docs/joss/paper.pdf and b/docs/joss/paper.pdf differ diff --git a/docs/sample_data/sample.py b/docs/sample_data/sample.py index 6f355af..b73aaff 100644 --- a/docs/sample_data/sample.py +++ b/docs/sample_data/sample.py @@ -22,8 +22,8 @@ def generate_sample_data(output_dir): if not isdir(output_dir): raise RuntimeError(f"Output directory '{output_dir}' does not exist!") - sample_control_temp = hdp.utils.generate_test_control_dataarray() - sample_warming_temp = hdp.utils.generate_test_warming_dataarray() + sample_control_temp = hdp.utils.generate_test_control_dataarray(add_noise=True) + sample_warming_temp = hdp.utils.generate_test_warming_dataarray(add_noise=True) baseline_measures = hdp.measure.format_standard_measures( temp_datasets=[sample_control_temp] diff --git a/docs/user.rst b/docs/user.rst index 3c531ba..cb2f74e 100644 --- a/docs/user.rst +++ b/docs/user.rst @@ -30,6 +30,9 @@ The HDP can be installed using PyPI. You can view the webpage `here ` code snippet. This example can be completely reproduced using tools installed with the HDP, no extra downloads required! + +First, we need a baseline temperature dataset to derive the threshold from and a test datasets to compare against. For unit testing, the HDP includes utility functions ``hdp.utils.generate_test_control_dataarray`` and ``hdp.utils.generate_test_warming_dataarray`` for generating mock datasets. + +.. code-block:: python + + import hdp + + sample_control_temp = hdp.utils.generate_test_control_dataarray(add_noise=True) + sample_warming_temp = hdp.utils.generate_test_warming_dataarray(add_noise=True) + +The sample control temperatures are generated using the following equation over a time series :math:`t`: + +.. math:: + + y_1(t) = 20 + 2\sin{\left(2\pi \left(\frac{\beta + t}{365}\right)\right)} + +Where :math:`\beta = 270` for the Northern hemisphere and :math:`\beta = 90` for the Southern hemisphere. A latitudinal gradient is then applied for latitudes :math:`l` + +.. math:: + y_2(t) = y_1 - 10\frac{|l|}{90} + +This essentially simulates a seasonally-varying temperature pattern that decreases with latititude. Optionally, a random noise filter can be applied to create variance across multiple definitions and pecentiles. + +Before we can use the data with HDP functions, we must format the measures to convert units and variable names if necessary: + +.. code-block:: python + + baseline_measures = hdp.measure.format_standard_measures( + temp_datasets=[sample_control_temp] + ) + test_measures = hdp.measure.format_standard_measures( + temp_datasets=[sample_warming_temp] + ) + +Next, we define the range of percentiles to sample over and then derive the thresholds from the baseline (control) dataset: + +.. code-block:: python + + import numpy as np + + percentiles = np.arange(0.9, 1.0, 0.01) + + thresholds_dataset = hdp.threshold.compute_thresholds( + baseline_measures, + percentiles + ) + + +Once we have a threshold, we can specify the heatwave definitions to sample and then calculate the heatwave metrics across all thresholds and definitions: + +.. code-block:: python + + definitions = [[3,0,0], [3,1,1], [4,0,0], [4,1,1], [5,0,0], [5,1,1]] + + metrics_dataset = hdp.metric.compute_group_metrics(test_measures, thresholds_dataset, definitions, include_threshold=True) + +By setting ``include_threshold=True``, we ensure the output dataset stored in ``metrics_dataset`` contains the threshold dataset as well. This can result in very large arrays for larger datasets (particularly large ensmebles), so this is an optional parameter. + +The output is a standard ``xarray.Dataset``, so we can save as a netCDF file just as any other ``xarray.Dataset``: + +.. code-block:: python + + output_dir = "." + metrics_dataset.to_netcdf(f"{output_dir}/sample_hw_metrics.nc", mode='w') + +Similary, the input datasets (control and warming) are ``xarray.DataArray`` which can be converted ``xarray.Dataset`` to save these as netCDF files for further inspection: + +.. code-block:: python + + sample_control_temp = sample_control_temp.to_dataset() + sample_control_temp.attrs["description"] = "Mock control temperature dataset generated by HDP for unit testing." + sample_control_temp.to_netcdf(f"{output_dir}/sample_control_temp.nc", mode='w') + + sample_warming_temp = sample_warming_temp.to_dataset() + sample_warming_temp.attrs["description"] = "Mock temperature dataset with warming trend generated by HDP for unit testing." + +Finally, we can generate a figure deck for this dataset and save the resulting summary figures to a notebook: + +.. code-block:: python + + figure_notebook = create_notebook(metrics_dataset) + figure_notebook.save_notebook(f"{output_dir}/sample_hw_summary_figures.ipynb") -Example: Generating Heatwave Diagnostics +Example: LENS2 Heatwave Diagnostics ------------------------------------------ -In this first example, we will produce heatwave metrics for one IPCC AR6 emission scenario, SSP3-7.0, run by the CESM2 climate model to produce a large ensemble called the "CESM2 Large Ensemble Community Project" or `LENS2 `_. We will explore the following set of heatwave parameters: +In this example, we will produce heatwave metrics for one IPCC AR6 emission scenario, SSP3-7.0, run by the CESM2 climate model to produce a large ensemble called the "CESM2 Large Ensemble Community Project" or `LENS2 `_. **The input data is not included in the HDP and LENS2 data must be downloaded to reproduce this section.** We will explore the following set of heatwave parameters: -.. list-table:: Example 1 Parameter Space +.. list-table:: LENS2 Example Parameter Space :widths: 50 50 :header-rows: 1 @@ -117,7 +206,7 @@ To fully utilize the performance enhancments offered by the HDP, we must first s client = Client(cluster) -Once a Dask cluster is initialized, we then need to organize our data into `xarray.DataArray `_ objects. The entire HDP is built around xarray data structures to ensure ease of use and remain agnostic to input file types. Since we are working with a large ensemble, we need to make sure to concatenate the ensemble members along a "member" dimension. If we weren't using a large ensemble (a single long-running simulation for example), we would just omit this step. To read data from disk, we can use the `xarray.open_mfdataset `_ function. Reading and post-processing data will look different from system to system, but the final format should be the same. Below is a list of `xarray.DataArrays` with the data structure for `baseline_tasmax` dataset visualized below: +Once a Dask cluster is initialized, we then need to organize our data into `xarray.DataArray `_ objects. The entire HDP is built around xarray data structures to ensure ease of use and remain agnostic to input file types. Since we are working with a large ensemble, we need to make sure to concatenate the ensemble members along a "member" dimension. If we weren't using a large ensemble (a single long-running simulation for example), we would just omit this step. To read data from disk, we can use the `xarray.open_mfdataset `_ function. Reading and post-processing data will look different from system to system, but the final format should be the same. Below is a list of ``xarray.DataArrays`` with the data structure for ``baseline_tasmax`` dataset visualized below: .. code-block:: python @@ -131,7 +220,7 @@ Once a Dask cluster is initialized, we then need to organize our data into `xarr .. image:: assets/tasmax_dataarray_example.png :width: 600 -The spatial coordinates for latitude and longitude should be named "lat" and "lon" respectively. The "time" coordinates should be decoded into `CFTime`` objects and a "member" dimension should be created if an ensemble is being used. +The spatial coordinates for latitude and longitude should be named "lat" and "lon" respectively. The "time" coordinates should be decoded into ``CFTime`` objects and a "member" dimension should be created if an ensemble is being used. To begin, we first need to format these measures so that they are in the correct units. This process will also compute heat index values using the relative humidity (rh) datasets. diff --git a/hdp/measure.py b/hdp/measure.py index 7a0876f..9d05537 100644 --- a/hdp/measure.py +++ b/hdp/measure.py @@ -101,7 +101,7 @@ def heat_index_map_wrapper(ds): dask="parallelized", input_core_dims=[[], []], output_core_dims=[[]], - output_dtypes=[float], + output_dtypes=[np.float32], dask_gufunc_kwargs={ 'allow_rechunk': False }) @@ -121,13 +121,11 @@ def apply_heat_index(temp: xarray.DataArray, rh: xarray.DataArray) -> xarray.Dat """ assert temp.attrs["units"] == "degF" assert rh.attrs["units"] == "%" - hi_da = xarray.map_blocks( - heat_index_map_wrapper, + hi_da = heat_index_map_wrapper( xarray.Dataset({ "temp": temp, "rh": rh }), - template=temp ) hi_da = hi_da.rename(f"{temp.name}_hi") hi_da.attrs["baseline_variable"] = hi_da.name diff --git a/hdp/sample.py b/hdp/sample.py deleted file mode 100644 index 6f355af..0000000 --- a/hdp/sample.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python -""" -hdp.py - -Heatwave Diagnostics Package (HDP) - -Entry point for package. - -Developer: Cameron Cummins -Contact: cameron.cummins@utexas.edu -""" -from hdp.graphics.notebook import create_notebook -from os.path import isdir -import hdp.measure -import hdp.threshold -import hdp.metric -import numpy as np -import sys - - -def generate_sample_data(output_dir): - if not isdir(output_dir): - raise RuntimeError(f"Output directory '{output_dir}' does not exist!") - - sample_control_temp = hdp.utils.generate_test_control_dataarray() - sample_warming_temp = hdp.utils.generate_test_warming_dataarray() - - baseline_measures = hdp.measure.format_standard_measures( - temp_datasets=[sample_control_temp] - ) - test_measures = hdp.measure.format_standard_measures( - temp_datasets=[sample_warming_temp] - ) - - percentiles = np.arange(0.9, 1.0, 0.01) - - thresholds_dataset = hdp.threshold.compute_thresholds( - baseline_measures, - percentiles - ) - - definitions = [[3,0,0], [3,1,1], [4,0,0], [4,1,1], [5,0,0], [5,1,1]] - - metrics_dataset = hdp.metric.compute_group_metrics(test_measures, thresholds_dataset, definitions, include_threshold=True) - metrics_dataset.to_netcdf(f"{output_dir}/sample_hw_metrics.nc", mode='w') - - figure_notebook = create_notebook(metrics_dataset) - figure_notebook.save_notebook(f"{output_dir}/sample_hw_summary_figures.ipynb") - - sample_control_temp = sample_control_temp.to_dataset() - sample_control_temp.attrs["description"] = "Mock control temperature dataset generated by HDP for unit testing." - sample_control_temp.to_netcdf(f"{output_dir}/sample_control_temp.nc", mode='w') - - sample_warming_temp = sample_warming_temp.to_dataset() - sample_warming_temp.attrs["description"] = "Mock temperature dataset with warming trend generated by HDP for unit testing." - sample_warming_temp.to_netcdf(f"{output_dir}/sample_warming_temp.nc", mode='w') - -if __name__ == "__main__": - print("Generating testing data and simulating a full data-to-figure workflow: ") - if len(sys.argv) != 2: - assert Exception("Please specifiy the path to output sample data and results to.") - generate_sample_data(sys.argv[1]) - print("Done!") \ No newline at end of file diff --git a/hdp/utils.py b/hdp/utils.py index 6e01082..fc46657 100644 --- a/hdp/utils.py +++ b/hdp/utils.py @@ -36,8 +36,8 @@ def get_func_description(func): return desc -def generate_test_warming_dataarray(start_date="2000-01-01", end_date="2049-12-31", grid_shape=(12, 10), warming_period=10): - base_data = generate_test_control_dataarray(start_date=start_date, end_date=end_date, grid_shape=grid_shape) +def generate_test_warming_dataarray(start_date="2000-01-01", end_date="2049-12-31", grid_shape=(12, 10), warming_period=100, add_noise=False): + base_data = generate_test_control_dataarray(start_date=start_date, end_date=end_date, grid_shape=grid_shape, add_noise=add_noise) base_data += xarray.DataArray(np.arange(base_data["time"].size) / (365*warming_period), dims=["time"], coords={"time": base_data["time"]}) return base_data @@ -50,7 +50,7 @@ def generate_test_rh_dataarray(start_date="2000-01-01", end_date="2049-12-31", g return base_data -def generate_test_control_dataarray(start_date="2000-01-01", end_date="2049-12-31", grid_shape=(12, 10)): +def generate_test_control_dataarray(start_date="1700-01-01", end_date="1749-12-31", grid_shape=(12, 10), add_noise=False, seed=0): time_values = xarray.date_range( start=start_date, end=end_date, @@ -58,13 +58,24 @@ def generate_test_control_dataarray(start_date="2000-01-01", end_date="2049-12-3 calendar="noleap", use_cftime=True ) - temperature_seasonal_ts = 20 + 15*np.sin(np.pi*np.arange(time_values.size, dtype=float) / 365) - temperature_seasonal_vals = np.broadcast_to(temperature_seasonal_ts, (grid_shape[0], grid_shape[1], temperature_seasonal_ts.size)) + north_seasonal_ts = 20+2*np.sin(2*np.pi*((270 + np.arange(time_values.size, dtype=float)) / 365)) + north_seasonal_vals = np.broadcast_to(north_seasonal_ts, (grid_shape[0], grid_shape[1], north_seasonal_ts.size)) + + south_seasonal_ts = 20+2*np.sin(2*np.pi*((90 + np.arange(time_values.size, dtype=float)) / 365)) + south_seasonal_vals = np.broadcast_to(south_seasonal_ts, (grid_shape[0], grid_shape[1], south_seasonal_ts.size)) + + temperature_seasonal_vals = np.zeros((grid_shape[0], grid_shape[1], north_seasonal_ts.size)) + temperature_seasonal_vals[:, grid_shape[1]//2:, :] = north_seasonal_vals[:, grid_shape[1]//2:, :] + temperature_seasonal_vals[:, :grid_shape[1]//2, :] = south_seasonal_vals[:, :grid_shape[1]//2, :] + if add_noise: + np.random.seed(seed) + temperature_seasonal_vals += np.random.random(temperature_seasonal_vals.shape)*(np.std(temperature_seasonal_vals) / 2) + lat_vals = np.linspace(-90, 90, grid_shape[1], dtype=float) - lat_grad = np.broadcast_to(np.abs(lat_vals) / 90 * 15, grid_shape) - temperature_seasonal_vals = temperature_seasonal_vals - np.broadcast_to(lat_grad[:, :, None], temperature_seasonal_vals.shape) + lat_grad = np.broadcast_to(np.abs(lat_vals) / 90, grid_shape) + temperature_seasonal_vals = temperature_seasonal_vals - 10*np.broadcast_to(lat_grad[:, :, None], temperature_seasonal_vals.shape) return xarray.DataArray( data=temperature_seasonal_vals, @@ -78,4 +89,4 @@ def generate_test_control_dataarray(start_date="2000-01-01", end_date="2049-12-3 attrs={ "units": "degC" } - ).chunk('auto') \ No newline at end of file + ).chunk(dict(time=-1, lat=2, lon=2)) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 7bd2794..fd2c7cc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ cartopy numpy numba nc_time_axis -dask +dask[distributed] zarr netCDF4 tqdm