Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- Added support for running an ensemble. Default output is the ensemble median, but --full_ensemble can be specified [!17](https://github.com/dmidk/sunflow/pull/17), @KristianHMoller
- Added a check for the number of ensemble members, as the code currently supports only one [!13](https://github.com/dmidk/sunflow/pull/13), @KristianHMoller
- Subsetting to bounding box is now also done in the `s3` and `files` code paths [!11](https://github.com/dmidk/sunflow/pull/11), @JoachimKoenigslieb
- Subsetting to bounding box now correctly handles both ascending and descending lat/lon coordinates [!11](https://github.com/dmidk/sunflow/pull/11), @JoachimKoenigslieb
Expand Down
18 changes: 17 additions & 1 deletion sunflow/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ class NowcastConfig:

nowcast_directory: str
ens_members: int
alpha: float
beta: float
past_steps: int
future_steps: int
input_data_availability_delay_minutes: int
Expand All @@ -61,6 +63,8 @@ def from_env(cls) -> Self:

- NOWCAST_DIRECTORY (default: .)
- ENS_MEMBERS (default: 1)
- ALPHA (default: 0.0 for ENS_MEMBERS=1, 9.29 for ENS_MEMBERS>1)
- BETA (default: 0.0 for ENS_MEMBERS=1, 0.17 for ENS_MEMBERS>1)
- PAST_STEPS (default: 4)
- FUTURE_STEPS (default: 24)
- INPUT_DATA_AVAILABILITY_DELAY_MINUTES (default: 24)
Expand All @@ -69,9 +73,21 @@ def from_env(cls) -> Self:
- SATELLITE_DATA_DIRECTORY (default: .)
- MAX_CLEARSKY_FALLBACK_DAYS (default: 3)
"""

ens_members = int(os.getenv("ENS_MEMBERS", "1"))
# Reference for default noise values:
# A. Carpentieri, D. Folini, D. Nerini, S. Pulkkinen, M. Wild, A. Meyer,
# "Intraday probabilistic forecasts of surface solar radiation with cloud
# scale-dependent autoregressive advection,"
# Applied Energy, Volume 351, 2023
default_alpha = 0.0 if ens_members == 1 else 9.29
default_beta = 0.0 if ens_members == 1 else 0.17

return cls(
nowcast_directory=os.getenv("NOWCAST_DIRECTORY", "."),
ens_members=int(os.getenv("ENS_MEMBERS", "1")),
ens_members=ens_members,
alpha=float(os.getenv("ALPHA", str(default_alpha))),
beta=float(os.getenv("BETA", str(default_beta))),
past_steps=int(os.getenv("PAST_STEPS", "4")),
future_steps=int(os.getenv("FUTURE_STEPS", "24")),
input_data_availability_delay_minutes=int(
Expand Down
20 changes: 12 additions & 8 deletions sunflow/data_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,7 @@ def save_forecast(
dataset_name: str,
nowcast_config: NowcastConfig,
model_version: str,
output_mode: str,
run_mode: str = "files",
s3_config: S3Config | None = None,
) -> str:
Expand All @@ -453,29 +454,31 @@ def save_forecast(
numeric values (float64, minutes since the forecast reference time).
Args:
forecast: Forecast array, shape [time, lat, lon] or
[ensemble, time, lat, lon].
forecast: Forecast array, shape [ensemble, time, lat, lon].
time_step: Forecast reference time (start of the forecast window).
n_steps: Number of forecast time steps to write.
latitudes: 1-D array of latitude values (degrees).
longitudes: 1-D array of longitude values (degrees).
dataset_name: Name of the source dataset (options: KNMI, DWD).
nowcast_config: NowcastConfig object supplying output directory,
ensemble size, and input data frequency.
nowcast_config: NowcastConfig object supplying output directory
and input data frequency.
model_version: Model version string written as a global attribute.
output_mode: Output aggregation mode label written to global
NetCDF attrs (expected: 'deterministic', 'median',
or 'full_ensemble').
run_mode: One of 'files' (local) or 's3'. Defaults to 'files'.
s3_config: S3Config object; required when run_mode is 's3'.
Returns:
Filename (basename only) of the written NetCDF file.
"""
input_data_frequency_minutes = nowcast_config.input_data_frequency_minutes
ens_members = nowcast_config.ens_members
filename = f"SolarNowcast_{time_step.strftime('%Y%m%d%H%M')}.nc"

# Add ensemble dimension if needed (forecast should be [ensemble, time, lat, lon])
if forecast.ndim == 3:
forecast = forecast[np.newaxis, :, :, :] # Now [1, time, lat, lon]
if forecast.ndim != 4:
raise ValueError("forecast must have shape (ensemble, time, lat, lon).")

ens_members = forecast.shape[0]

# Build time coordinate (CF-convention: minutes since forecast reference time)

Expand Down Expand Up @@ -525,6 +528,7 @@ def save_forecast(
f"Simple Probabilistic Advection solar forecast "
f"using {dataset_name} data"
),
"output_mode": output_mode,
"history": (
f"Created "
f"{datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC"
Expand Down
116 changes: 93 additions & 23 deletions sunflow/forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,42 +61,44 @@ def preprocess_data(
)


def simple_advection_forecast(
ratio_data: np.ndarray, motion_field: np.ndarray, n_steps: int, ens_members: int
def probabilistic_advection_forecast(
ratio_data: np.ndarray,
motion_field: np.ndarray,
n_steps: int,
ens_members: int,
alpha: float,
beta: float,
) -> np.ndarray:
"""Run a deterministic advection forecast on solar irradiance ratios.
"""Run a probabilistic advection forecast on solar irradiance ratios.

Uses ProbabilisticAdvection with noise parameters alpha=0 and beta=0,
which disables Gaussian noise on the motion field norm and
von Mises noise on the direction, yielding a purely deterministic
advection result. The ensemble dimension added by the model is removed
before returning.
Uses ProbabilisticAdvection with configurable noise parameters:
alpha controls Gaussian noise on motion field norm and beta controls
von Mises noise on motion field direction.

Args:
ratio_data: Input array of shape (time, lat, lon) containing
SDS/SDS_CS ratios for the past timesteps.
motion_field: Optical flow field of shape (2, lat, lon) as
produced by dense_lucaskanade.
n_steps: Number of forecast timesteps to produce.
ens_members: Number of ensemble members.
alpha: Gaussian noise strength on motion field norm.
beta: von Mises noise strength on motion field angle.

Returns:
Forecast array of shape (n_steps, lat, lon).
"""

# Initialize ProbabilisticAdvection with NO noise (alpha=0, beta=0)
# Initialize ProbabilisticAdvection with configured noise settings.
pa = ProbabilisticAdvection(
alpha=0.0, # No Gaussian noise on motion field norm
beta=0.0, # No von Mises noise on motion field angle
alpha=alpha,
beta=beta,
return_motion_field=False,
ens_members=ens_members,
)
# Run probabilistic advection using the correct method name
forecast = pa.maps_forecast(n_steps, ratio_data, motion_field)

# Remove ensemble dimension if present (squeeze to get shape: [time, lat, lon])
if forecast.ndim == 4: # [ensemble, time, lat, lon]
forecast = forecast[0] # Take first (and only) ensemble member

return forecast


Expand All @@ -115,7 +117,7 @@ def multiply_clearsky(

Args:
ratio_forecast: Forecast array of shape (n_steps, lat, lon)
containing SDS/SDS_CS ratios.
or (ensemble, n_steps, lat, lon) containing SDS/SDS_CS ratios.
clearsky_data: xarray Dataset with a 'time' dimension containing
the clearsky variable for each forecast step.
previous_day_time_steps: List of datetimes (one per forecast step)
Expand All @@ -124,26 +126,94 @@ def multiply_clearsky(
NetCDF variable name in the datasets.

Returns:
Solar irradiance forecast array of shape (n_steps, lat, lon)
in W m⁻².
Solar irradiance forecast array with the same shape as
ratio_forecast, in W m⁻².

Raises:
RuntimeError: If clearsky data is missing for any forecast timestep.
"""
solar_forecast = np.zeros_like(ratio_forecast)
clearsky_steps: list[np.ndarray] = []

for i, time_step in enumerate(previous_day_time_steps):
for time_step in previous_day_time_steps:
try:
sds_cs = clearsky_data.sel(time=time_step.replace(tzinfo=None))[
nc_variable_names["sds_cs"]
].values

# Multiply ratio by clearsky
solar_forecast[i] = ratio_forecast[i] * sds_cs
clearsky_steps.append(sds_cs)
except KeyError:
raise RuntimeError(
f"No clearsky data for {time_step.strftime('%Y-%m-%dT%H:%M:%SZ')}, "
"cannot compute solar forecast for this step."
)

clearsky_stack = np.stack(clearsky_steps, axis=0)

if ratio_forecast.ndim == 3:
if ratio_forecast.shape[0] != clearsky_stack.shape[0]:
raise ValueError(
"ratio_forecast time dimension does not match clearsky timesteps "
f"({ratio_forecast.shape[0]} != {clearsky_stack.shape[0]})."
)
solar_forecast = ratio_forecast * clearsky_stack
return solar_forecast[
np.newaxis, :, :, :
] # Add ensemble dimension for consistency

if ratio_forecast.ndim == 4:
if ratio_forecast.shape[1] != clearsky_stack.shape[0]:
raise ValueError(
"ratio_forecast time dimension does not match clearsky timesteps "
f"({ratio_forecast.shape[1]} != {clearsky_stack.shape[0]})."
)
return ratio_forecast * clearsky_stack[np.newaxis, :, :, :]

raise ValueError(
"ratio_forecast must have shape (time, lat, lon) or "
"(ensemble, time, lat, lon)."
)


def prepend_t0(
clearsky_data: xr.Dataset,
ratio_data: np.ndarray,
solar_forecast: np.ndarray,
config: dict,
clearsky_t0_time: datetime,
) -> np.ndarray:
"""Prepend analysis timestep (t=0) to an ensemble solar forecast.

Computes the t=0 solar field as the latest observed ratio
(ratio_data[-1]) multiplied by clearsky irradiance at clearsky_t0_time,
then prepends that field to all ensemble members in solar_forecast.

Args:
clearsky_data: Dataset containing clearsky irradiance values on
a time axis.
ratio_data: Ratio history array with shape (time, lat, lon).
solar_forecast: Forecast array with shape
(ensemble, forecast_time, lat, lon).
config: Runtime configuration dict containing
config["nc_variable_names"]["sds_cs"].
clearsky_t0_time: Timestamp for the analysis clearsky field,
typically one day before the nowcast time.

Returns:
Array of shape (ensemble, forecast_time + 1, lat, lon)
with the analysis field inserted at index 0 along the time axis.
"""
# Prepend timestep 0: current observation (ratio_data[-1]) × clearsky at t=0
sds_cs_t0 = clearsky_data.sel(time=clearsky_t0_time.replace(tzinfo=None))[
config["nc_variable_names"]["sds_cs"]
].values
solar_t0 = ratio_data[-1] * sds_cs_t0

# Broadcast the same t=0 clearsky-based analysis field to all ensembles.
solar_t0_ens = np.broadcast_to(
solar_t0,
(solar_forecast.shape[0],) + solar_t0.shape,
)
solar_forecast = np.concatenate(
[solar_t0_ens[:, np.newaxis, :, :], solar_forecast],
axis=1,
)
return solar_forecast
Loading
Loading