From a0762bc7854d4a618e4c501d5fd839f37ba89846 Mon Sep 17 00:00:00 2001 From: Tim Reichelt Date: Mon, 28 Jul 2025 11:39:57 +0100 Subject: [PATCH 1/3] Use relative error bound for nitrogen dioxide --- .../compressor/scripts/create_error_bounds.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/climatebenchpress/compressor/scripts/create_error_bounds.py b/src/climatebenchpress/compressor/scripts/create_error_bounds.py index 057dfba..378bdb2 100644 --- a/src/climatebenchpress/compressor/scripts/create_error_bounds.py +++ b/src/climatebenchpress/compressor/scripts/create_error_bounds.py @@ -111,6 +111,19 @@ def create_error_bounds( low_error_bounds[v], mid_error_bounds[v], high_error_bounds[v] = ( get_agb_bound(datasets, percentiles=[1.00, 0.99, 0.95]) ) + elif v == "no2": + low_error_bounds[v] = { + ABS_ERROR: None, + REL_ERROR: 2 ** (-4), + } + mid_error_bounds[v] = { + ABS_ERROR: None, + REL_ERROR: 2 ** (-7), + } + high_error_bounds[v] = { + ABS_ERROR: None, + REL_ERROR: 2 ** (-10), + } else: data_range: float = (ds[v].max() - ds[v].min()).values.item() # type: ignore low_error_bounds[v] = { From 3b4c9fa4b67f8d7acc17b9328cd6a091f92df063 Mon Sep 17 00:00:00 2001 From: Tim Reichelt Date: Mon, 28 Jul 2025 14:54:18 +0100 Subject: [PATCH 2/3] Change in error bounds to balance relative and absolute errors --- .../compressor/scripts/create_error_bounds.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/climatebenchpress/compressor/scripts/create_error_bounds.py b/src/climatebenchpress/compressor/scripts/create_error_bounds.py index 378bdb2..947486c 100644 --- a/src/climatebenchpress/compressor/scripts/create_error_bounds.py +++ b/src/climatebenchpress/compressor/scripts/create_error_bounds.py @@ -60,15 +60,15 @@ ABS_ERROR = "abs_error" REL_ERROR = "rel_error" VAR_NAME_TO_ERROR_BOUND = { - "rlut": REL_ERROR, + "rlut": ABS_ERROR, "agb": REL_ERROR, "pr": REL_ERROR, "ta": ABS_ERROR, "tos": ABS_ERROR, - "10m_u_component_of_wind": REL_ERROR, - "10m_v_component_of_wind": REL_ERROR, + "10m_u_component_of_wind": ABS_ERROR, + "10m_v_component_of_wind": ABS_ERROR, "mean_sea_level_pressure": ABS_ERROR, - "no2": ABS_ERROR, + "no2": REL_ERROR, } From 98b2cd1a00d6859cc5ef94bafc4068036cea4829 Mon Sep 17 00:00:00 2001 From: Tim Reichelt Date: Tue, 29 Jul 2025 11:34:51 +0100 Subject: [PATCH 3/3] More principled derivation for no2 error bounds --- .../compressor/scripts/create_error_bounds.py | 85 ++++++++++++++++--- 1 file changed, 73 insertions(+), 12 deletions(-) diff --git a/src/climatebenchpress/compressor/scripts/create_error_bounds.py b/src/climatebenchpress/compressor/scripts/create_error_bounds.py index 947486c..cf9fb86 100644 --- a/src/climatebenchpress/compressor/scripts/create_error_bounds.py +++ b/src/climatebenchpress/compressor/scripts/create_error_bounds.py @@ -17,6 +17,49 @@ # derived from the ERA5 ensembles. ERROR_BOUNDS = "https://gist.githubusercontent.com/juntyr/bbe2780256e5f91d8f2cb2f606b7935f/raw/table-raw.csv" +# Bitwise real information for no2 data computed from the CAMS dataset. +# The numbers are taken from the supplementary material of: +# https://www.nature.com/articles/s43588-021-00156-2 +# "Compressing atmospheric data into its real information content" +# Milan Klöwer, Miha Razinger, Juan J. Dominguez, Peter D. Düben & Tim N. Palmer +# Exact data was extracted from the CSV file at: +# https://static-content.springer.com/esm/art%3A10.1038%2Fs43588-021-00156-2/MediaObjects/43588_2021_156_MOESM2_ESM.zip +# Accessed on: 29/08/2025. +NO2_REAL_INFORMATION = [ + 0.0, + 0.0, + 0.0, + 0.82609236, + 0.82609236, + 0.8401368, + 0.7781777, + 0.65409344, + 0.47173682, + 0.2915112, + 0.16627097, + 0.08410827, + 0.03733299, + 0.015123328, + 0.0058184885, + 0.0020212175, + 0.00053772517, + 9.423521e-5, + 8.782874e-6, + 5.8530753e-7, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, +] + VAR_NAME_TO_ERA5 = { # NextGEMS Icon Outgoing Longwave Radiation (OLR). @@ -112,18 +155,9 @@ def create_error_bounds( get_agb_bound(datasets, percentiles=[1.00, 0.99, 0.95]) ) elif v == "no2": - low_error_bounds[v] = { - ABS_ERROR: None, - REL_ERROR: 2 ** (-4), - } - mid_error_bounds[v] = { - ABS_ERROR: None, - REL_ERROR: 2 ** (-7), - } - high_error_bounds[v] = { - ABS_ERROR: None, - REL_ERROR: 2 ** (-10), - } + low_error_bounds[v], mid_error_bounds[v], high_error_bounds[v] = ( + get_no2_bounds(percentiles=[1.00, 0.99, 0.95]) + ) else: data_range: float = (ds[v].max() - ds[v].min()).values.item() # type: ignore low_error_bounds[v] = { @@ -190,6 +224,33 @@ def get_error_bounds( return var_ebs +def get_no2_bounds(percentiles=[1.00, 0.99, 0.95]) -> list[dict[str, Optional[float]]]: + # First we need to transform the bitwise real information into a cumulative + # distribution function. + real_information_dist = np.cumsum(NO2_REAL_INFORMATION) / np.sum( + NO2_REAL_INFORMATION + ) + no2_bounds = [] + for p in percentiles: + # Find the first position where cumulative distribution exceeds p. + # Add one for 1-based indexing. + keepbits = np.argmax(real_information_dist > p) + 1 + # There are 9 non-mantissa bits in the 32-bit floating point representation. + mantissa_keepbits = max(int(keepbits - 9), 0) + # 2^(-mantissa_keepbits) indicates the spacing between representable values. + # 2^(-mantissa_keepbits) / 2 = 2 ** (-mantissa_keepbits - 1) + # then is the maximum rounding error. + # See: https://en.wikipedia.org/wiki/Machine_epsilon + rel_error = 2 ** (-mantissa_keepbits - 1) + no2_bounds.append( + { + "abs_error": None, + "rel_error": float(rel_error), + } + ) + return no2_bounds + + def get_agb_bound( datasets: Path, percentiles=[1.00, 0.99, 0.95] ) -> list[dict[str, Optional[float]]]: