From becec7f890d13ca34119ff1dc5c385f4aa621be8 Mon Sep 17 00:00:00 2001
From: Juniper Tyree <50025784+juntyr@users.noreply.github.com>
Date: Thu, 17 Apr 2025 15:54:19 +0300
Subject: [PATCH 1/5] Draft JPEG2000 quantization fix

---
 .../compressor/compressors/jpeg2000.py        | 20 ++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/climatebenchpress/compressor/compressors/jpeg2000.py b/src/climatebenchpress/compressor/compressors/jpeg2000.py
index fd643fc..08187c5 100644
--- a/src/climatebenchpress/compressor/compressors/jpeg2000.py
+++ b/src/climatebenchpress/compressor/compressors/jpeg2000.py
@@ -17,28 +17,34 @@ class Jpeg2000(Compressor):
 
     @staticmethod
     def abs_bound_codec(dtype, error_bound):
-        # Currently, the input is transformed into the range
-        # round(min_pixel_val/ error_bound) <= x <= round(max_pixel_val / error_bound)
-        # This means any values outside this range will incur a larger error.
-        precision = error_bound
         max_pixel_val = 2**25 - 1  # maximum pixel value for our integer encoding.
 
+        data_range = float(np.amax(data)) - float(np.amin(data))
+
         # Here we use the formula for the PSNR (https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio)
         # to convert between the absolute error and the PSNR value.
         # The original PSNR formula uses the root mean square error (RMSE),
         # therefore JPEG does not guaruantee pointwise error bounds but only
         # average error bounds.
-        psnr = 20 * (math.log10(max_pixel_val) - math.log10(error_bound))
+        psnr = 20 * (math.log10(data_range) - math.log10(error_bound))
 
         return CodecStack(
+            # increase precision for better rounding during linear quantization
+            numcodecs.astype.AsType(
+                encode_dtype="float64",
+                decode_dtype="float32",
+            ),
+            # remap from [min, max] to [0, max_pixel_val]
             numcodecs_wasm_fixed_offset_scale.FixedOffsetScale(
-                offset=0,
-                scale=precision,
+                offset=float(np.amin(data)),
+                scale=data_range / max_pixel_val,
             ),
+            # round and truncate to integer values
             numcodecs_wasm_round.Round(precision=1),
             numcodecs.astype.AsType(
                 encode_dtype="int32",
                 decode_dtype="float32",
             ),
+            # apply the PSNR error bound
             numcodecs_wasm_jpeg2000.Jpeg2000(mode="psnr", psnr=psnr),
         )

From 6fe1c0c89e838f290bfecf1e8c0dda1b4bae1ee0 Mon Sep 17 00:00:00 2001
From: Tim Reichelt <treichelt6@gmail.com>
Date: Tue, 22 Apr 2025 12:26:57 +0200
Subject: [PATCH 2/5] Implementation of new interface to allow JPEG2000
 quantization

---
 .../compressor/compressors/abc.py             | 30 ++++++++++++++++---
 .../compressor/compressors/bitround.py        |  4 ++-
 .../compressor/compressors/bitround_pco.py    |  4 ++-
 .../compressor/compressors/jpeg2000.py        | 17 ++++++++---
 .../compressor/compressors/stochround.py      |  2 +-
 .../compressor/compressors/sz3.py             |  4 +--
 .../compressor/compressors/tthresh.py         |  4 +--
 .../compressor/compressors/zfp.py             |  2 +-
 8 files changed, 51 insertions(+), 16 deletions(-)

diff --git a/src/climatebenchpress/compressor/compressors/abc.py b/src/climatebenchpress/compressor/compressors/abc.py
index 1a16813..b9ab020 100644
--- a/src/climatebenchpress/compressor/compressors/abc.py
+++ b/src/climatebenchpress/compressor/compressors/abc.py
@@ -57,12 +57,24 @@ class Compressor(ABC):
 
     @staticmethod
     @abstractmethod
-    def abs_bound_codec(dtype: np.dtype, error_bound: float) -> Codec:
+    def abs_bound_codec(
+        error_bound: float,
+        *,
+        dtype: Optional[np.dtype] = None,
+        data_abs_min: Optional[float] = None,
+        data_abs_max: Optional[float] = None,
+    ) -> Codec:
         pass
 
     @staticmethod
     @abstractmethod
-    def rel_bound_codec(dtype: np.dtype, error_bound: float) -> Codec:
+    def rel_bound_codec(
+        error_bound: float,
+        *,
+        dtype: Optional[np.dtype] = None,
+        data_abs_min: Optional[float] = None,
+        data_abs_max: Optional[float] = None,
+    ) -> Codec:
         pass
 
     @classmethod
@@ -116,9 +128,19 @@ def build(
             new_codecs: dict[VariableName, Codec] = dict()
             for var, eb in eb_per_var.items():
                 if eb.abs_error is not None and cls.has_abs_error_impl:
-                    new_codecs[var] = cls.abs_bound_codec(dtypes[var], eb.abs_error)
+                    new_codecs[var] = cls.abs_bound_codec(
+                        eb.abs_error,
+                        dtype=dtypes[var],
+                        data_abs_min=data_abs_min[var],
+                        data_abs_max=data_abs_max[var],
+                    )
                 elif eb.rel_error is not None and cls.has_rel_error_impl:
-                    new_codecs[var] = cls.rel_bound_codec(dtypes[var], eb.rel_error)
+                    new_codecs[var] = cls.rel_bound_codec(
+                        eb.rel_error,
+                        dtype=dtypes[var],
+                        data_abs_min=data_abs_min[var],
+                        data_abs_max=data_abs_max[var],
+                    )
                 else:
                     # This should never happen as we have already transformed the error bounds.
                     # If this happens, it means there is a bug in the implementation.
diff --git a/src/climatebenchpress/compressor/compressors/bitround.py b/src/climatebenchpress/compressor/compressors/bitround.py
index 599a7a9..836083a 100644
--- a/src/climatebenchpress/compressor/compressors/bitround.py
+++ b/src/climatebenchpress/compressor/compressors/bitround.py
@@ -13,7 +13,9 @@ class BitRound(Compressor):
     description = "Bit Rounding"
 
     @staticmethod
-    def rel_bound_codec(dtype, error_bound):
+    def rel_bound_codec(error_bound, *, dtype=None, **kwargs):
+        assert dtype is not None, "dtype must be provided"
+
         keepbits = compute_keepbits(dtype, error_bound)
         return CodecStack(
             numcodecs_wasm_bit_round.BitRound(keepbits=keepbits),
diff --git a/src/climatebenchpress/compressor/compressors/bitround_pco.py b/src/climatebenchpress/compressor/compressors/bitround_pco.py
index 5fda857..e1f34aa 100644
--- a/src/climatebenchpress/compressor/compressors/bitround_pco.py
+++ b/src/climatebenchpress/compressor/compressors/bitround_pco.py
@@ -14,7 +14,9 @@ class BitRoundPco(Compressor):
     description = "Bit Rounding + PCodec"
 
     @staticmethod
-    def rel_bound_codec(dtype, error_bound):
+    def rel_bound_codec(error_bound, *, dtype=None, **kwargs):
+        assert dtype is not None, "dtype must be provided"
+
         keepbits = compute_keepbits(dtype, error_bound)
         return CodecStack(
             numcodecs_wasm_bit_round.BitRound(keepbits=keepbits),
diff --git a/src/climatebenchpress/compressor/compressors/jpeg2000.py b/src/climatebenchpress/compressor/compressors/jpeg2000.py
index 08187c5..5faad74 100644
--- a/src/climatebenchpress/compressor/compressors/jpeg2000.py
+++ b/src/climatebenchpress/compressor/compressors/jpeg2000.py
@@ -16,10 +16,19 @@ class Jpeg2000(Compressor):
     description = "JPEG 2000"
 
     @staticmethod
-    def abs_bound_codec(dtype, error_bound):
+    def abs_bound_codec(
+        error_bound,
+        *,
+        data_abs_min=None,
+        data_abs_max=None,
+        **kwargs,
+    ):
+        assert data_abs_min is not None, "data_abs_min must be provided"
+        assert data_abs_max is not None, "data_abs_max must be provided"
+
         max_pixel_val = 2**25 - 1  # maximum pixel value for our integer encoding.
 
-        data_range = float(np.amax(data)) - float(np.amin(data))
+        data_range = data_abs_max - data_abs_min
 
         # Here we use the formula for the PSNR (https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio)
         # to convert between the absolute error and the PSNR value.
@@ -36,14 +45,14 @@ def abs_bound_codec(dtype, error_bound):
             ),
             # remap from [min, max] to [0, max_pixel_val]
             numcodecs_wasm_fixed_offset_scale.FixedOffsetScale(
-                offset=float(np.amin(data)),
+                offset=data_abs_min,
                 scale=data_range / max_pixel_val,
             ),
             # round and truncate to integer values
             numcodecs_wasm_round.Round(precision=1),
             numcodecs.astype.AsType(
                 encode_dtype="int32",
-                decode_dtype="float32",
+                decode_dtype="float64",
             ),
             # apply the PSNR error bound
             numcodecs_wasm_jpeg2000.Jpeg2000(mode="psnr", psnr=psnr),
diff --git a/src/climatebenchpress/compressor/compressors/stochround.py b/src/climatebenchpress/compressor/compressors/stochround.py
index bcdab95..c7a756a 100644
--- a/src/climatebenchpress/compressor/compressors/stochround.py
+++ b/src/climatebenchpress/compressor/compressors/stochround.py
@@ -13,7 +13,7 @@ class StochRound(Compressor):
     description = "Stochastic Rounding"
 
     @staticmethod
-    def abs_bound_codec(dtype, error_bound):
+    def abs_bound_codec(error_bound, **kwargs):
         precision = error_bound
         return CodecStack(
             numcodecs_wasm_uniform_noise.UniformNoise(scale=precision / 2, seed=42),
diff --git a/src/climatebenchpress/compressor/compressors/sz3.py b/src/climatebenchpress/compressor/compressors/sz3.py
index ddc148b..b4f6c08 100644
--- a/src/climatebenchpress/compressor/compressors/sz3.py
+++ b/src/climatebenchpress/compressor/compressors/sz3.py
@@ -10,11 +10,11 @@ class Sz3(Compressor):
     description = "SZ3"
 
     @staticmethod
-    def abs_bound_codec(dtype, error_bound):
+    def abs_bound_codec(error_bound, **kwargs):
         return numcodecs_wasm_sz3.Sz3(eb_mode="abs", eb_abs=error_bound)
 
     @staticmethod
-    def rel_bound_codec(dtype, error_bound):
+    def rel_bound_codec(error_bound, **kwargs):
         # SZ3 will not ensure that the relative error bound is strictly met.
         # Internally, SZ3 transforms the relative error bound to an absolute error bound
         # based on the range of the input data:
diff --git a/src/climatebenchpress/compressor/compressors/tthresh.py b/src/climatebenchpress/compressor/compressors/tthresh.py
index 68ba853..401186f 100644
--- a/src/climatebenchpress/compressor/compressors/tthresh.py
+++ b/src/climatebenchpress/compressor/compressors/tthresh.py
@@ -10,9 +10,9 @@ class Tthresh(Compressor):
     description = "tthresh"
 
     @staticmethod
-    def abs_bound_codec(dtype, error_bound):
+    def abs_bound_codec(error_bound, **kwargs):
         return numcodecs_wasm_tthresh.Tthresh(eb_mode="rmse", eb_rmse=error_bound)
 
     @staticmethod
-    def rel_bound_codec(dtype, error_bound):
+    def rel_bound_codec(error_bound, **kwargs):
         return numcodecs_wasm_tthresh.Tthresh(eb_mode="eps", eb_rmse=error_bound)
diff --git a/src/climatebenchpress/compressor/compressors/zfp.py b/src/climatebenchpress/compressor/compressors/zfp.py
index a6611b8..a09f8a5 100644
--- a/src/climatebenchpress/compressor/compressors/zfp.py
+++ b/src/climatebenchpress/compressor/compressors/zfp.py
@@ -18,5 +18,5 @@ class Zfp(Compressor):
     # See https://zfp.readthedocs.io/en/release1.0.1/faq.html#q-relerr for more details.
 
     @staticmethod
-    def abs_bound_codec(dtype, error_bound):
+    def abs_bound_codec(error_bound, **kwargs):
         return numcodecs_wasm_zfp.Zfp(mode="fixed-accuracy", tolerance=error_bound)

From 85e67ea251e0bf3d9b381a73d8497a5509dc9edf Mon Sep 17 00:00:00 2001
From: Tim Reichelt <treichelt6@gmail.com>
Date: Tue, 22 Apr 2025 18:35:29 +0200
Subject: [PATCH 3/5] Pass in data_min not data_abs_min to jpeg2000

---
 .../compressor/compressors/abc.py             | 22 ++++++++++++-------
 .../compressor/compressors/jpeg2000.py        | 12 +++++-----
 .../compressor/scripts/compress.py            | 20 +++++++++++------
 3 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/src/climatebenchpress/compressor/compressors/abc.py b/src/climatebenchpress/compressor/compressors/abc.py
index b9ab020..5156afe 100644
--- a/src/climatebenchpress/compressor/compressors/abc.py
+++ b/src/climatebenchpress/compressor/compressors/abc.py
@@ -61,8 +61,8 @@ def abs_bound_codec(
         error_bound: float,
         *,
         dtype: Optional[np.dtype] = None,
-        data_abs_min: Optional[float] = None,
-        data_abs_max: Optional[float] = None,
+        data_min: Optional[float] = None,
+        data_max: Optional[float] = None,
     ) -> Codec:
         pass
 
@@ -72,8 +72,8 @@ def rel_bound_codec(
         error_bound: float,
         *,
         dtype: Optional[np.dtype] = None,
-        data_abs_min: Optional[float] = None,
-        data_abs_max: Optional[float] = None,
+        data_min: Optional[float] = None,
+        data_max: Optional[float] = None,
     ) -> Codec:
         pass
 
@@ -83,6 +83,8 @@ def build(
         dtypes: dict[VariableName, np.dtype],
         data_abs_min: dict[VariableName, float],
         data_abs_max: dict[VariableName, float],
+        data_min: dict[VariableName, float],
+        data_max: dict[VariableName, float],
         error_bounds: list[dict[VariableName, ErrorBound]],
     ) -> dict[VariantName, list[NamedPerVariableCodec]]:
         """
@@ -102,6 +104,10 @@ def build(
             Dict mapping from variable name to minimum absolute value for the variable.
         data_abs_max : dict[VariableName, float]
             Dict mapping from variable name to maximum absolute value for the variable.
+        data_min : dict[VariableName, float]
+            Dict mapping from variable name to minimum value for the variable.
+        data_max : dict[VariableName, float]
+            Dict mapping from variable name to maximum value for the variable.
         error_bounds: list[ErrorBound]
             List of error bounds to use for the compressor.
 
@@ -131,15 +137,15 @@ def build(
                     new_codecs[var] = cls.abs_bound_codec(
                         eb.abs_error,
                         dtype=dtypes[var],
-                        data_abs_min=data_abs_min[var],
-                        data_abs_max=data_abs_max[var],
+                        data_min=data_min[var],
+                        data_max=data_max[var],
                     )
                 elif eb.rel_error is not None and cls.has_rel_error_impl:
                     new_codecs[var] = cls.rel_bound_codec(
                         eb.rel_error,
                         dtype=dtypes[var],
-                        data_abs_min=data_abs_min[var],
-                        data_abs_max=data_abs_max[var],
+                        data_min=data_min[var],
+                        data_max=data_max[var],
                     )
                 else:
                     # This should never happen as we have already transformed the error bounds.
diff --git a/src/climatebenchpress/compressor/compressors/jpeg2000.py b/src/climatebenchpress/compressor/compressors/jpeg2000.py
index 5faad74..2a74834 100644
--- a/src/climatebenchpress/compressor/compressors/jpeg2000.py
+++ b/src/climatebenchpress/compressor/compressors/jpeg2000.py
@@ -19,16 +19,16 @@ class Jpeg2000(Compressor):
     def abs_bound_codec(
         error_bound,
         *,
-        data_abs_min=None,
-        data_abs_max=None,
+        data_min=None,
+        data_max=None,
         **kwargs,
     ):
-        assert data_abs_min is not None, "data_abs_min must be provided"
-        assert data_abs_max is not None, "data_abs_max must be provided"
+        assert data_min is not None, "data_min must be provided"
+        assert data_max is not None, "data_max must be provided"
 
         max_pixel_val = 2**25 - 1  # maximum pixel value for our integer encoding.
 
-        data_range = data_abs_max - data_abs_min
+        data_range = data_max - data_min
 
         # Here we use the formula for the PSNR (https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio)
         # to convert between the absolute error and the PSNR value.
@@ -45,7 +45,7 @@ def abs_bound_codec(
             ),
             # remap from [min, max] to [0, max_pixel_val]
             numcodecs_wasm_fixed_offset_scale.FixedOffsetScale(
-                offset=data_abs_min,
+                offset=data_min,
                 scale=data_range / max_pixel_val,
             ),
             # round and truncate to integer values
diff --git a/src/climatebenchpress/compressor/scripts/compress.py b/src/climatebenchpress/compressor/scripts/compress.py
index 251f05d..bd039e2 100644
--- a/src/climatebenchpress/compressor/scripts/compress.py
+++ b/src/climatebenchpress/compressor/scripts/compress.py
@@ -15,11 +15,7 @@
 from numcodecs_observers.walltime import WalltimeObserver
 from numcodecs_wasm import WasmCodecInstructionCounterObserver
 
-from ..compressors.abc import (
-    Compressor,
-    ErrorBound,
-    NamedPerVariableCodec,
-)
+from ..compressors.abc import Compressor, ErrorBound, NamedPerVariableCodec
 from ..monitor import progress_bar
 
 
@@ -49,11 +45,19 @@ def compress(
             continue
 
         ds = xr.open_dataset(dataset, chunks=dict(), engine="zarr")
-        ds_dtypes, ds_abs_mins, ds_abs_maxs = dict(), dict(), dict()
+        ds_dtypes, ds_abs_mins, ds_abs_maxs, ds_mins, ds_maxs = (
+            dict(),
+            dict(),
+            dict(),
+            dict(),
+            dict(),
+        )
         for v in ds:
             abs_vals = xr.ufuncs.abs(ds[v])
             ds_abs_mins[v] = abs_vals.min().values.item()
             ds_abs_maxs[v] = abs_vals.max().values.item()
+            ds_mins[v] = ds[v].min().values.item()
+            ds_maxs[v] = ds[v].max().values.item()
             ds_dtypes[v] = ds[v].dtype
 
         error_bounds = get_error_bounds(datasets_error_bounds, dataset.parent.name)
@@ -64,7 +68,9 @@ def compress(
                 continue
 
             compressor_variants: dict[str, list[NamedPerVariableCodec]] = (
-                compressor.build(ds_dtypes, ds_abs_mins, ds_abs_maxs, error_bounds)
+                compressor.build(
+                    ds_dtypes, ds_abs_mins, ds_abs_maxs, ds_mins, ds_maxs, error_bounds
+                )
             )
 
             for compr_name, named_codecs in compressor_variants.items():

From 2373a7a613eb494cf559ce4bd65ea422ece5cad7 Mon Sep 17 00:00:00 2001
From: Tim Reichelt <treichelt6@gmail.com>
Date: Thu, 24 Apr 2025 12:56:26 +0200
Subject: [PATCH 4/5] Adjust max_pixel_value bound

---
 src/climatebenchpress/compressor/compressors/jpeg2000.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/climatebenchpress/compressor/compressors/jpeg2000.py b/src/climatebenchpress/compressor/compressors/jpeg2000.py
index 2a74834..2f0a6b9 100644
--- a/src/climatebenchpress/compressor/compressors/jpeg2000.py
+++ b/src/climatebenchpress/compressor/compressors/jpeg2000.py
@@ -26,7 +26,7 @@ def abs_bound_codec(
         assert data_min is not None, "data_min must be provided"
         assert data_max is not None, "data_max must be provided"
 
-        max_pixel_val = 2**25 - 1  # maximum pixel value for our integer encoding.
+        max_pixel_val = 2**24 - 1  # maximum pixel value for our integer encoding.
 
         data_range = data_max - data_min
 

From 44653bd0b4fbb613e8d30bd461f178550629df1c Mon Sep 17 00:00:00 2001
From: Tim Reichelt <treichelt6@gmail.com>
Date: Thu, 24 Apr 2025 13:41:59 +0200
Subject: [PATCH 5/5] Switch to unsigned integer

---
 src/climatebenchpress/compressor/compressors/jpeg2000.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/climatebenchpress/compressor/compressors/jpeg2000.py b/src/climatebenchpress/compressor/compressors/jpeg2000.py
index 2f0a6b9..1d73bba 100644
--- a/src/climatebenchpress/compressor/compressors/jpeg2000.py
+++ b/src/climatebenchpress/compressor/compressors/jpeg2000.py
@@ -26,7 +26,7 @@ def abs_bound_codec(
         assert data_min is not None, "data_min must be provided"
         assert data_max is not None, "data_max must be provided"
 
-        max_pixel_val = 2**24 - 1  # maximum pixel value for our integer encoding.
+        max_pixel_val = 2**25 - 1  # maximum pixel value for our integer encoding.
 
         data_range = data_max - data_min
 
@@ -51,7 +51,7 @@ def abs_bound_codec(
             # round and truncate to integer values
             numcodecs_wasm_round.Round(precision=1),
             numcodecs.astype.AsType(
-                encode_dtype="int32",
+                encode_dtype="uint32",
                 decode_dtype="float64",
             ),
             # apply the PSNR error bound