Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
d56b053
small fix in RADOLAN notebook (still using low-leve func), adding x_g…
cchwala Mar 27, 2026
29db954
Add high-level RADOLAN merging class. This might still need some adju…
cchwala Mar 27, 2026
a486bfe
First working version of the high-level RADOLAN class with working ex…
cchwala Mar 27, 2026
030c5cc
use random start index for audit station search in notebook and reran…
cchwala Mar 27, 2026
bc15ff1
need to adjust radolan_io test for 'cml' instead of 'cml_ericsson'
cchwala Apr 8, 2026
4ed47b7
Update MergeRADOLAN class to correctly pass allow_gauge_and_cml to rh…
cchwala Apr 8, 2026
55e299a
Add test for RADOLAN merge functionality using MergeRADOLAN class to …
cchwala Apr 8, 2026
899c255
Fix pylint warning by disabling dangerous-default-value for bogra_kwa…
cchwala Apr 8, 2026
2477d07
try again, fix pylint warning by disabling dangerous-default-value fo…
cchwala Apr 8, 2026
a35421a
force new test to fail to check that it works correctly
cchwala Apr 8, 2026
c706a50
make test green again
cchwala Apr 8, 2026
cdd7b88
fix transform_openmrg_data_for_old_radolan_code with pd.concat instea…
cchwala Apr 8, 2026
2696a39
force float dytpe in station interpolation to avoid problem when havi…
cchwala Apr 8, 2026
b117abe
avoid too strong requirment of having datetime64 as timestamp because…
cchwala Apr 8, 2026
07d11b7
add test for RADOLAN merge with gauge and CML datasets
cchwala Apr 8, 2026
bbac393
fix test
cchwala Apr 8, 2026
9c32ddc
add test for error handling in transform_openmrg_data_for_old_radolan…
cchwala Apr 8, 2026
5bdd29b
Add RADOLAN method to comparison in OpenMRG and OpenRainER high-leve…
cchwala Apr 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 68 additions & 16 deletions docs/notebooks/OpenRainER_case_highlevel.ipynb

Large diffs are not rendered by default.

213 changes: 88 additions & 125 deletions docs/notebooks/openMRG_case_RADOLAN.ipynb

Large diffs are not rendered by default.

113 changes: 80 additions & 33 deletions docs/notebooks/openMRG_case_highlevel.ipynb

Large diffs are not rendered by default.

133 changes: 132 additions & 1 deletion src/mergeplg/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import poligrain as plg
import xarray as xr

from mergeplg import bk_functions, interpolate
from mergeplg import bk_functions, interpolate, radolan


class MergeBase:
Expand Down Expand Up @@ -819,3 +819,134 @@ def __call__(
da = xr.DataArray(data=adjusted, coords=self.grid_coords, dims=self.grid_dims)
da.coords["time"] = self._get_timestamp(da_cmls, da_gauges)
return da


class MergeRADOLAN(MergeBase):
"""Merge CML and radar using RADOLAN method

Merges the provided radar field in ds_rad to CML and rain gauge
observations by using the RADOLAN method, which is an inverse distance
weighting method with a specific weighting function.
"""

def __init__( # pylint: disable=dangerous-default-value
self,
ds_rad,
ds_cmls=None,
ds_gauges=None,
grid_location_radar="center",
radar_threshold=0.01,
nnear=8,
max_distance=60000,
fill_radar=True,
range_checks=None,
idw_method="radolan",
p=0.7,
bogra_kwargs={ # noqa: B006
"max_iterations": 100,
"max_allowed_relative_diff": 3,
},
):
"""Initialize merging object.

Parameters
----------
ds_rad: xarray.Dataset
Dataset providing the grid for merging. Must contain
projected x_grid and y_grid coordinates.
ds_cmls: xarray.Dataset
CML geometry. Must contain the projected midpoint
coordinates (x, y).
ds_gauges: xarray.Dataset
Gauge geometry. Must contain the coordinates projected
coordinates (x, y).
grid_location_radar: str
Position of radar.
radar_threshold: float
Radar values below this threshold are set to zero rainfall and
ignored in the adjustment.
idw_method : str
With the default ("radolan") an exponential decay with range, as in the
original RADOLAN implementation, is used for IDW. When set to "standard"
a normal IDW decay with 1/d**p is used.
nnear : int
Number of nearest neighbors to use for interpolation.
p : float
The exponent used for "standard" IDW with 1/d**p.
max_distance : int or float
The maximum distance around a station (or mid-point of a CML) to use
for interpolation.
bogra_kwargs : dict
Parameters for the BOGRA smoothing function, see
`adjust.bogra_like_smoothing` for details.

"""
MergeBase.__init__(self)
self.grid_location_radar = grid_location_radar
self.nnear = nnear
self._update_weights(ds_rad, ds_cmls, ds_gauges)
self.radar_threshold = radar_threshold
self.max_distance = max_distance
self.fill_radar = fill_radar
self.range_checks = {} if range_checks is None else range_checks
self.idw_method = idw_method
self.p = p
self.bogra_kwargs = bogra_kwargs

if ds_cmls is None:
self.allow_gauge_and_cml = False
else:
self.allow_gauge_and_cml = True

def __call__(
self,
da_rad,
da_cmls=None,
da_gauges=None,
start_index_in_relevant_stations="random",
):
"""Adjust radar field for one time step using RADOLAN method

Parameters
----------
da_rad: xarray.DataArray
Radar data. Must contain
projected x_grid and y_grid coordinates.
da_cmls: xarray.DataArray
CML observations. Must contain the projected coordinates (site_0_x,
site_1_x, site_0_y, site_1_y).
da_gauges: xarray.DataArray
Gauge observations. Must contain the projected
coordinates (x, y).
start_index_in_relevant_stations : str or int
If set to "random" (which is the default) a random station will be picked as
starting point for selecting the audit stations.

Returns
-------
da_field_out: xarray.DataArray
DataArray with the same structure as the ds_rad but with the
interpolated field.
"""
if da_cmls is not None:
da_cmls = da_cmls.expand_dims("time")
if da_gauges is not None:
da_gauges = da_gauges.expand_dims("time")
df_stations_t = radolan.io.transform_openmrg_data_for_old_radolan_code(
ds_cmls=da_cmls, ds_gauges=da_gauges
)

da_adjusted, _ = radolan.processing.rh_to_rw(
ds_radolan_t=da_rad.to_dataset(name="RH"),
df_stations_t=df_stations_t,
start_index_in_relevant_stations=start_index_in_relevant_stations,
idw_method=self.idw_method,
nnear=self.nnear,
p=self.p,
max_distance=self.max_distance,
bogra_kwargs=self.bogra_kwargs,
allow_gauge_and_cml=self.allow_gauge_and_cml,
intersect_weights=self.intersect_weights,
)

return da_adjusted
2 changes: 1 addition & 1 deletion src/mergeplg/radolan/adjust.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ def interpolate_station_values(
df_stations_temp.loc[~z.isna(), ["x", "y"]].values
)
if np.all(np.isnan(z)):
zi = np.empty_like(x_grid)
zi = np.empty_like(x_grid, dtype="float")
zi[:] = np.nan
else:
zi = idw_interpolator(
Expand Down
56 changes: 46 additions & 10 deletions src/mergeplg/radolan/io.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,67 @@
"""Function to get data in the specific format needed for RADOLAN code"""

import pandas as pd

# TODO: This module could be removed in case the RADOLAN code is updated
# to use xr.Dataset as input for gauge and CML data.


def transform_openmrg_data_for_old_radolan_code(ds_cmls):
def transform_openmrg_data_for_old_radolan_code(ds_cmls=None, ds_gauges=None):
"""Transform OpenMRG CML Dataset to DataFrame as needed by RADOLAN code.

The old RADOLAN code requires input from gauges and CML in one pandas.DataFrame.
This function creates such a DataFrame for the OpenMRG CML data based on the
xr.Dataset that we normally use for CML data.

The "sensor_type" column contains the values "cml" for CML data and "gauge"
for gauge data.

Parameters
----------
ds_cmls : xr.Dataset
The CML data in an xr.Dataset as returned
by `io.load_and_transform_openmrg_data`
ds_gauges : xr.Dataset
The gauge data in an xr.Dataset

Returns
-------
df_cmls : pd.DataFrame
The CML data as DataFrame

df_stations : pd.DataFrame
A DataFrame with columns "time", "station_id", "sensor_type", and
"rainfall_amount" containing the CML and gauge data in the format needed for
the old RADOLAN rh_to_rw function.
"""
df_cmls = ds_cmls.to_dataframe().sort_index(level=["time", "cml_id"])
df_cmls["station_id"] = df_cmls.index.get_level_values("cml_id")
df_cmls.index = df_cmls.index.droplevel("cml_id")
df_cmls["sensor_type"] = "cml_ericsson"
df_cmls["rainfall_amount"] = df_cmls.R
return df_cmls
if ds_cmls is not None:
df_cmls = ds_cmls.to_dataframe().sort_index(level=["time", "cml_id"])
df_cmls["station_id"] = df_cmls.index.get_level_values("cml_id")
df_cmls.index = df_cmls.index.droplevel("cml_id")
df_cmls["sensor_type"] = "cml"
df_cmls["rainfall_amount"] = df_cmls.R
if ds_gauges is not None:
df_gauges = ds_gauges.to_dataframe().sort_index(level=["time", "id"])
df_gauges["station_id"] = df_gauges.index.get_level_values("id")
df_gauges.index = df_gauges.index.droplevel("id")
df_gauges["sensor_type"] = "gauge"
df_gauges["rainfall_amount"] = df_gauges.R

if ds_cmls is not None and ds_gauges is not None:
df_stations = pd.concat(
[
df_cmls[["station_id", "sensor_type", "rainfall_amount", "x", "y"]],
df_gauges[["station_id", "sensor_type", "rainfall_amount", "x", "y"]],
],
ignore_index=True,
)
elif ds_cmls is not None:
df_stations = df_cmls[
["station_id", "sensor_type", "rainfall_amount", "x", "y"]
]
elif ds_gauges is not None:
df_stations = df_gauges[
["station_id", "sensor_type", "rainfall_amount", "x", "y"]
]
else:
msg = "At least one of ds_cmls and ds_gauges must be provided."
raise ValueError(msg)

return df_stations
6 changes: 3 additions & 3 deletions src/mergeplg/radolan/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,16 +89,16 @@ def rh_to_rw(
# and not NxMx1. But we want to have the time stamp so that we can use it in
# this function (even though, I thing it is not yet used to check if the radar
# and gauge data are from the same time step).
if not isinstance(ds_radolan_t.time.values, np.datetime64):
if ds_radolan_t.time.values.ndim != 0:
msg = (
"`ds_radolan_t` must have a `time` variable but no time dimension, "
"i.e. there must only be one timestamp in `time`"
)
raise ValueError(msg)

if allow_gauge_and_cml:
sensor_is_dwd_gauge = df_stations_t.sensor_type == "gauge_dwd"
sensor_is_cml = df_stations_t.sensor_type == "cml_ericsson"
sensor_is_dwd_gauge = df_stations_t.sensor_type == "gauge"
sensor_is_cml = df_stations_t.sensor_type == "cml"
if intersect_weights is None:
msg = "You must pass `intersect_weights` if you allow CML data"
raise ValueError(msg)
Expand Down
Loading
Loading