From e2fcdc79b0c685e859f5310bd0e8f6a35fc835e7 Mon Sep 17 00:00:00 2001
From: Tim Reichelt <treichelt6@gmail.com>
Date: Fri, 29 May 2026 15:25:01 +0100
Subject: [PATCH] Add IFS cloud ice water content dataset

---
 .../data_loader/datasets/all.py               |  1 +
 .../datasets/ifs_cloud_ice_water_content.py   | 80 +++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 src/climatebenchpress/data_loader/datasets/ifs_cloud_ice_water_content.py

diff --git a/src/climatebenchpress/data_loader/datasets/all.py b/src/climatebenchpress/data_loader/datasets/all.py
index 8b7361a..0f46b84 100644
--- a/src/climatebenchpress/data_loader/datasets/all.py
+++ b/src/climatebenchpress/data_loader/datasets/all.py
@@ -4,6 +4,7 @@
 from .cmip6.all import *
 from .era5 import *
 from .esa_biomass_cci import *
+from .ifs_cloud_ice_water_content import *
 from .ifs_humidity import *
 from .ifs_uncompressed import *
 from .nextgems import *
diff --git a/src/climatebenchpress/data_loader/datasets/ifs_cloud_ice_water_content.py b/src/climatebenchpress/data_loader/datasets/ifs_cloud_ice_water_content.py
new file mode 100644
index 0000000..8df4f9e
--- /dev/null
+++ b/src/climatebenchpress/data_loader/datasets/ifs_cloud_ice_water_content.py
@@ -0,0 +1,80 @@
+__all__ = ["IFSCloudIceWaterContentDataset"]
+
+import argparse
+from pathlib import Path
+
+import xarray as xr
+
+from .. import (
+    monitor,
+    open_downloaded_canonicalized_dataset,
+    open_downloaded_tiny_canonicalized_dataset,
+)
+from .abc import Dataset
+from .ifs_uncompressed import load_hplp_data, regrid_to_regular
+
+
+class IFSCloudIceWaterContentDataset(Dataset):
+    """Dataset for the cloud ice water content field of the uncompressed IFS data.
+
+    Contains data from the [hplp](https://apps.ecmwf.int/ifs-experiments/rd/hplp/)
+    experiment from the Integrated Forecasting System (IFS) model. Crucially,
+    this dataset contains uncompressed 64-bit floating point data.
+    """
+
+    name = "ifs-cloud-ice-water-content"
+
+    @staticmethod
+    def download(download_path: Path, progress: bool = True):
+        donefile = download_path / "download.done"
+        if donefile.exists():
+            return
+
+        ds = load_hplp_data(leveltype="ml", gridtype="reduced_gg", step=0)
+        ds = ds[["ciwc"]]
+        ds_regridded = regrid_to_regular(
+            ds,
+            in_grid={"grid": "O400"},
+            out_grid={"grid": [0.25, 0.25]},
+        )
+        downloadfile = download_path / "ifs_cloud_ice_water_content.zarr"
+        with monitor.progress_bar(progress):
+            ds_regridded.to_zarr(downloadfile, mode="w", compute=False).compute()
+
+    @staticmethod
+    def open(download_path: Path) -> xr.Dataset:
+        ds = xr.open_zarr(
+            download_path / "ifs_cloud_ice_water_content.zarr"
+        ).drop_encoding()
+        num_levels = ds["level"].size
+        ds = ds.isel(time=slice(0, 1)).chunk(
+            {
+                "latitude": -1,
+                "longitude": -1,
+                "time": -1,
+                "level": (num_levels // 2) + 1,
+            }
+        )
+
+        # Needed to make the dataset CF-compliant.
+        ds.longitude.attrs["axis"] = "X"
+        ds.latitude.attrs["axis"] = "Y"
+        ds.level.attrs["axis"] = "Z"
+        ds.time.attrs["standard_name"] = "time"
+        return ds
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--basepath", type=Path, default=Path())
+    args = parser.parse_args()
+
+    ds = open_downloaded_canonicalized_dataset(
+        IFSCloudIceWaterContentDataset, basepath=args.basepath
+    )
+    open_downloaded_tiny_canonicalized_dataset(
+        IFSCloudIceWaterContentDataset, basepath=args.basepath
+    )
+
+    for v, da in ds.items():
+        print(f"- {v}: {da.dims}")