benchopt · GeoffNN · May 29, 2026 · felixdivo · Jun 9, 2026 · felixdivo
diff --git a/benchmark_utils/adapters/base.py b/benchmark_utils/adapters/base.py
@@ -47,8 +47,21 @@ class BaseTSFMAdapter(ABC):
 
     Subclasses must implement ``predict``.  ``fit`` is optional (used by
     supervised adaptations such as linear probe or fine-tuning).
+
+    Attributes
+    ----------
+    covariate_capabilities : frozenset[str]
+        The *effective* covariate capabilities this adapter consumes for the
+        current run (a subset of
+        :data:`benchmark_utils.capabilities.COVARIATE_CAPABILITIES`). The
+        forecasting objective reads this and masks the covariate payload down
+        to it before calling :meth:`predict`, so an adapter only ever sees
+        covariates it both declares and has enabled. Defaults to empty —
+        univariate, no covariates — so a new adapter is safe by default.
     """
 
+    covariate_capabilities: frozenset = frozenset()
+
     def fit(self, X_train, y_train, **kwargs):
         """Optional supervised fitting step (called inside Solver.run())."""
         return self

diff --git a/benchmark_utils/capabilities.py b/benchmark_utils/capabilities.py
@@ -0,0 +1,75 @@
+"""Model capability flags and covariate masking.
+
+Vocabulary
+----------
+A forecasting solver declares a ``capabilities`` set drawn from:
+
+- :data:`MULTIVARIATE`      — the model treats target channels jointly.
+  *Declarative only*: targets are always passed whole (no channel
+  splitting), so there is no behavioural toggle for this yet — it exists to
+  describe the model until a multivariate-*target* dataset and the matching
+  masking land.
+- :data:`HIST_COVARIATES`   — the model consumes history-only (past) covariates.
+- :data:`FUTURE_COVARIATES` — the model consumes known-ahead (future) covariates.
+
+``univariate`` is deliberately **not** a flag — it is the floor every model
+gets. A model that declares (or has enabled) none of the covariate
+capabilities runs univariate.
+
+Deactivation / lift
+-------------------
+The covariate capabilities are independently switchable per run (exposed as
+benchopt parameters by the consuming solver), so the lift each one provides
+can be benchmarked. Enforcement is central: the objective masks the
+:class:`~benchmark_utils.covariates.Covariates` payload down to the adapter's
+*effective* active set (``BaseTSFMAdapter.covariate_capabilities``) via
+:func:`mask_covariates` before calling ``predict``. A model therefore only
+ever sees covariates it both declares and has enabled. Targets are never
+masked.
+"""
+
+from benchmark_utils.covariates import Covariates
+
+MULTIVARIATE = "multivariate"
+HIST_COVARIATES = "hist_covariates"
+FUTURE_COVARIATES = "future_covariates"
+
+#: Capabilities whose covariate payload :func:`mask_covariates` acts on.
+COVARIATE_CAPABILITIES = frozenset({HIST_COVARIATES, FUTURE_COVARIATES})
+
+#: Every capability in the vocabulary.
+ALL_CAPABILITIES = frozenset({MULTIVARIATE, HIST_COVARIATES, FUTURE_COVARIATES})
+
+
+def mask_covariates(covariates: Covariates, active) -> Covariates:
+    """Return a copy of ``covariates`` with disabled covariate fields emptied.
+
+    ``hist_covars`` is cleared unless :data:`HIST_COVARIATES` is in ``active``,
+    and ``future_covars`` unless :data:`FUTURE_COVARIATES` is in ``active``.
+    ``static_covars`` is passed through unchanged — it is not yet part of the
+    capability vocabulary. Targets live in ``ForecastInput.x`` and are never
+    touched here.
+
+    Parameters
+    ----------
+    covariates : Covariates
+        The dataset's full covariate payload.
+    active : Iterable[str]
+        The effective active capability names (typically an adapter's
+        ``covariate_capabilities``).
+
+    Returns
+    -------
+    Covariates
+        A new (frozen) instance; the input is not mutated.
+    """
+    active = frozenset(active)
+    return Covariates(
+        static_covars=covariates.static_covars,
+        hist_covars=(
+            covariates.hist_covars if HIST_COVARIATES in active else []
+        ),
+        future_covars=(
+            covariates.future_covars if FUTURE_COVARIATES in active else []
+        ),
+    )
diff --git a/objective.py b/objective.py
@@ -117,13 +117,19 @@ def evaluate_result(self, model):
     # --- forecasting ---------------------------------------------------
 
     def _eval_forecasting(self, model):
+        from benchmark_utils.capabilities import mask_covariates
         from benchmark_utils.inputs import ForecastInput
 
+        # Mask the covariate payload down to what this model declares it can
+        # use and has enabled. A model that consumes no covariates (the
+        # default) thus runs univariate; toggling a capability off here is
+        # what makes its lift measurable. Targets are never masked.
+        active = getattr(model, "covariate_capabilities", frozenset())
         forecast = model.predict(
             ForecastInput(
                 x=self.X_test,
                 cutoff_indexes=self.cutoff_indexes,
-                covariates=self.covariates,
+                covariates=mask_covariates(self.covariates, active),
             )
         ).flatten()  # canonical (M, Q, H, C) shape for metrics
 

diff --git a/solvers/chronos.py b/solvers/chronos.py
@@ -253,6 +253,9 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
+    # Chronos (v1) is univariate and consumes no covariates.
+    capabilities = frozenset()
+
     parameters = {
         "model_size": ["small"],
         "layer": [None],

diff --git a/solvers/chronos2.py b/solvers/chronos2.py
@@ -27,6 +27,7 @@
     UnpooledEncoder,
 )
 from benchmark_utils.adapters.forecast_residual import ForecastResidualAdapter
+from benchmark_utils.capabilities import MULTIVARIATE
 from benchmark_utils.outputs import ForecastOutput
 
 from .chronos import (
@@ -174,6 +175,11 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
+    # Chronos-2 models channels jointly. ``multivariate`` is declarative
+    # metadata only — targets are always passed whole, so there is no
+    # behavioural toggle yet. This solver does not consume covariates.
+    capabilities = frozenset({MULTIVARIATE})
+
     parameters = {
         "model_size": ["small"],
         "layer": [None],

diff --git a/solvers/naive.py b/solvers/naive.py
@@ -94,6 +94,9 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
+    # Per-channel univariate baseline; consumes no covariates.
+    capabilities = frozenset()
+
     parameters = {
         "seasonality": [1],
     }

diff --git a/solvers/seasonal_naive.py b/solvers/seasonal_naive.py
@@ -64,6 +64,9 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
+    # Per-channel univariate baseline; consumes no covariates.
+    capabilities = frozenset()
+
     parameters = {
         "season_length": [1, 7, 12, 24],
     }

diff --git a/solvers/tfc_api.py b/solvers/tfc_api.py
@@ -32,6 +32,12 @@
 from benchopt import BaseSolver
 
 from benchmark_utils.adapters.base import BaseTSFMAdapter
+from benchmark_utils.capabilities import (
+    FUTURE_COVARIATES,
+    HIST_COVARIATES,
+    MULTIVARIATE,
+)
+from benchmark_utils.covariates import Covariates
 from benchmark_utils.inputs import ForecastInput
 from benchmark_utils.outputs import ForecastOutput
 
@@ -69,6 +75,47 @@ def _shared_offsets_from_end(x, cutoff_indexes):
     return reference
 
 
+def _as_2d(arr) -> np.ndarray:
+    """Normalise a covariate cell to ``(T, n)``."""
+    arr = np.asarray(arr, dtype=np.float32)
+    return arr[:, None] if arr.ndim == 1 else arr
+
+
+def _covar_var_names(covariates: Covariates) -> tuple[list[str], list[str]]:
+    """Column names for the SDK's ``historical_variables`` / ``future_variables``.
+
+    Derived from the per-series covariate width (assumed homogeneous across
+    series). Empty lists when a covariate kind is absent — which is exactly
+    what the objective produces after masking off a deactivated capability.
+    """
+    hist_names, future_names = [], []
+    if covariates.hist_covars:
+        n = _as_2d(covariates.hist_covars[0]).shape[1]
+        hist_names = [f"hist_{j}" for j in range(n)]
+    if covariates.future_covars:
+        n = _as_2d(covariates.future_covars[0]).shape[1]
+        future_names = [f"future_{j}" for j in range(n)]
+    return hist_names, future_names
+
+
+def _attach_covars(frame, covariates: Covariates, series_idx: int):
+    """Add this series' covariate columns to a per-``unique_id`` frame.
+
+    Covariates are series-level, so every channel frame of a series gets the
+    same columns. Arrays span the full series length ``T`` (history *and*
+    horizon), so future-covariate values for each cutoff's horizon are present.
+    """
+    if covariates.hist_covars:
+        arr = _as_2d(covariates.hist_covars[series_idx])
+        for j in range(arr.shape[1]):
+            frame[f"hist_{j}"] = arr[:, j]
+    if covariates.future_covars:
+        arr = _as_2d(covariates.future_covars[series_idx])
+        for j in range(arr.shape[1]):
+            frame[f"future_{j}"] = arr[:, j]
+    return frame
+
+
 class _TFCAPIForecaster(BaseTSFMAdapter):
     """Adapter calling the TFC SDK.
 
@@ -106,24 +153,30 @@ def __init__(
         self.batch_size = batch_size
 
     def predict(self, x: ForecastInput) -> ForecastOutput:
-        # TODO: thread ``x.covariates`` (static/hist/future) through to the SDK
-        # once the benchmark datasets populate them. Monash currently
-        # carries none, so the dataclass arrives with empty sequences.
+        # ``x.covariates`` is already masked by the objective down to this
+        # adapter's ``covariate_capabilities`` — a deactivated (or
+        # undeclared) covariate kind arrives as an empty sequence, so the
+        # column/variable wiring below simply produces nothing for it.
         series_list, cutoff_indexes = x.x, x.cutoff_indexes
+        covariates = x.covariates
+        hist_names, future_names = _covar_var_names(covariates)
         pd_freq = _to_pandas_freq(self.freq)
 
         offsets = _shared_offsets_from_end(series_list, cutoff_indexes)
         if getattr(self.model, "supports_batching", False) and offsets is not None:
             per_series, levels = self._predict_batched(
-                series_list, cutoff_indexes, pd_freq, offsets
+                series_list, cutoff_indexes, pd_freq, offsets,
+                covariates, hist_names, future_names,
             )
         else:
             per_series, levels = self._predict_per_series(
-                series_list, cutoff_indexes, pd_freq
+                series_list, cutoff_indexes, pd_freq,
+                covariates, hist_names, future_names,
             )
         return ForecastOutput(quantiles=per_series, quantile_levels=levels)
 
-    def _predict_per_series(self, x, cutoff_indexes, pd_freq):
+    def _predict_per_series(self, x, cutoff_indexes, pd_freq,
+                            covariates, hist_names, future_names):
         per_series = []
         levels = None
         for series_idx, (series, cutoffs) in enumerate(zip(x, cutoff_indexes)):
@@ -134,11 +187,14 @@ def _predict_per_series(self, x, cutoff_indexes, pd_freq):
             index = pd.date_range("2000-01-01", periods=T, freq=pd_freq)
 
             frames = [
-                pd.DataFrame({
-                    "unique_id": f"s{series_idx}_c{c}",
-                    "ds": index,
-                    "target": series[:, c],
-                })
+                _attach_covars(
+                    pd.DataFrame({
+                        "unique_id": f"s{series_idx}_c{c}",
+                        "ds": index,
+                        "target": series[:, c],
+                    }),
+                    covariates, series_idx,
+                )
                 for c in range(C)
             ]
             train_df = pd.concat(frames, ignore_index=True)
@@ -155,6 +211,8 @@ def _predict_per_series(self, x, cutoff_indexes, pd_freq):
                 add_holidays=self.add_holidays,
                 add_events=self.add_events,
                 country_isocode=self.country_isocode,
+                historical_variables=hist_names or None,
+                future_variables=future_names or None,
                 batch_size=self.batch_size,
             )
 
@@ -165,7 +223,8 @@ def _predict_per_series(self, x, cutoff_indexes, pd_freq):
             levels = series_levels
         return per_series, (levels if levels is not None else (0.5,))
 
-    def _predict_batched(self, x, cutoff_indexes, pd_freq, offsets):
+    def _predict_batched(self, x, cutoff_indexes, pd_freq, offsets,
+                         covariates, hist_names, future_names):
         """One ``cross_validate`` call covering every series in ``x``.
 
         Series are aligned to share an end date so all cutoffs collapse to
@@ -183,11 +242,14 @@ def _predict_batched(self, x, cutoff_indexes, pd_freq, offsets):
             index = pd.date_range(end=end, periods=T, freq=pd_freq)
             for c in range(C):
                 frames.append(
-                    pd.DataFrame({
-                        "unique_id": f"s{series_idx}_c{c}",
-                        "ds": index,
-                        "target": series[:, c],
-                    })
+                    _attach_covars(
+                        pd.DataFrame({
+                            "unique_id": f"s{series_idx}_c{c}",
+                            "ds": index,
+                            "target": series[:, c],
+                        }),
+                        covariates, series_idx,
+                    )
                 )
             per_series_meta.append((series_idx, C, index, cutoffs))
 
@@ -209,6 +271,8 @@ def _predict_batched(self, x, cutoff_indexes, pd_freq, offsets):
             add_holidays=self.add_holidays,
             add_events=self.add_events,
             country_isocode=self.country_isocode,
+            historical_variables=hist_names or None,
+            future_variables=future_names or None,
             batch_size=self.batch_size,
         )
 
@@ -271,6 +335,12 @@ class Solver(BaseSolver):
         ``country_isocode`` to be set.
     country_isocode : str or None
         ISO country code (e.g. ``"US"``) used by the holiday/event lookup.
+    use_hist_covars, use_future_covars : bool
+        Whether to feed the dataset's historical / future covariates to the
+        model. Default ``True``; sweep over ``[True, False]`` to benchmark the
+        lift each covariate kind provides. Deactivating both runs the model
+        univariate. (The objective enforces this by masking the covariate
+        payload — see :mod:`benchmark_utils.capabilities`.)
     batch_size : int
         Series-per-batch for batching-enabled models (chronos-2, moirai-2).
     """
@@ -281,12 +351,20 @@ class Solver(BaseSolver):
 
     sampling_strategy = "run_once"
 
+    # Declared capabilities (metadata). ``multivariate`` is declarative only —
+    # targets are always passed whole, so there is no behavioural toggle for
+    # it yet. The two covariate capabilities are wired end-to-end and
+    # switchable via ``use_hist_covars`` / ``use_future_covars``.
+    capabilities = frozenset({MULTIVARIATE, HIST_COVARIATES, FUTURE_COVARIATES})
+
     parameters = {
         "model": ["chronos-2"],
         "context": [None],
         "add_holidays": [False],
         "add_events": [False],
         "country_isocode": [None],
+        "use_hist_covars": [True],
+        "use_future_covars": [True],
         "batch_size": [256],
     }
 
@@ -329,6 +407,15 @@ def run(self, _):
             country_isocode=self.country_isocode,
             batch_size=self.batch_size,
         )
+        # Effective active covariate capabilities for this run = the toggled-on
+        # ones, intersected with what the model declares. The objective reads
+        # this to mask the covariate payload before calling predict().
+        active = set()
+        if self.use_hist_covars:
+            active.add(HIST_COVARIATES)
+        if self.use_future_covars:
+            active.add(FUTURE_COVARIATES)
+        self._adapter.covariate_capabilities = frozenset(active & self.capabilities)
 
     def get_result(self):
         return {"model": self._adapter}