dalia-project · vincent-maillou · Jul 14, 2025 · Sep 9, 2025 · Sep 10, 2025 · Sep 11, 2025
diff --git a/src/dalia/configs/dalia_config.py b/src/dalia/configs/dalia_config.py
@@ -6,6 +6,12 @@
 
 from pydantic import BaseModel, ConfigDict, PositiveInt
 
+from dalia.configs.gradient_method_config import (
+    GradientMethodConfig,
+    SmartGradientConfig,
+    VanillaGradientConfig,
+)
+
 
 class SolverConfig(BaseModel):
     model_config = ConfigDict(extra="forbid")
@@ -20,15 +26,16 @@ class BFGSConfig(BaseModel):
 
     max_iter: PositiveInt = 100
     jac: bool = True
-
-    maxcor: PositiveInt = 10 # maximum number of past gradient vectors to store -> good default: dim(theta)
-    maxls: PositiveInt = 20 # maximum number of line search iterations
+
+    maxcor: PositiveInt = (
+        10  # maximum number of past gradient vectors to store -> good default: dim(theta)
+    )
+    maxls: PositiveInt = 20  # maximum number of line search iterations
 
     gtol: float = 1e-1
     # c1: float = 1e-4  # only relevant for BFGS not for L-BFGS-B
     # c2: float = 0.9  # only relevant for BFGS not for L-BFGS-B
     disp: bool = False
-
 
 
 class DaliaConfig(BaseModel):
@@ -37,11 +44,12 @@ class DaliaConfig(BaseModel):
     # --- Simulation parameters ------------------------------------------------
     solver: SolverConfig = SolverConfig()
     minimize: BFGSConfig = BFGSConfig()
+    gradient_method: GradientMethodConfig = SmartGradientConfig()
 
     # exit BFGS early if the reduction in the objective function is less than f_reduction_tol after f_reduction_lag iterations
     f_reduction_lag: int = 3
     f_reduction_tol: float = 1e-4
-    
+
     # exit BFGS early if the change in theta is less than theta_reduction_tol after theta_reduction_lag iterations
     theta_reduction_lag: int = 3
     theta_reduction_tol: float = 1e-4

diff --git a/src/dalia/configs/gradient_method_config.py b/src/dalia/configs/gradient_method_config.py
@@ -0,0 +1,26 @@
+from abc import ABC
+from typing import Literal
+
+from pydantic import BaseModel, ConfigDict
+
+
+class GradientMethodConfig(BaseModel, ABC):
+    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
+
+    # Input folder for this specific submodel
+    type: Literal["vanilla_gradient", "smart_gradient"] = None
+
+    finite_difference_epsilon: float = 1e-3
+
+
+class VanillaGradientConfig(GradientMethodConfig):
+    type: Literal["vanilla_gradient"] = "vanilla_gradient"
+
+
+class SmartGradientConfig(GradientMethodConfig):
+    type: Literal["smart_gradient"] = "smart_gradient"
+
+    # The diagonal noise ensure to avoid singularities in the QR decomposition
+    diagonal_noise: float = 1e-8
+    # Threshold below which scaling is not performed
+    scaling_threshold: float = 1e-12
diff --git a/src/dalia/core/dalia.py b/src/dalia/core/dalia.py
@@ -9,6 +9,7 @@
 from dalia.configs.dalia_config import DaliaConfig
 from dalia.core.model import Model
 from dalia.solvers import DenseSolver, DistSerinvSolver, SerinvSolver, SparseSolver
+from dalia.gradient_methods import VanillaGradient, SmartGradient
 from dalia.utils import (
     DummyCommunicator,
     add_str_header,
@@ -190,9 +191,28 @@ def __init__(
                     nccl_comm=self.nccl_comm,
                 )
 
+        # --- Initialize Gradient Method
+        if self.config.gradient_method.type == "vanilla_gradient":
+            self.gradient_method = VanillaGradient(
+                basis_size=self.model.n_hyperparameters,
+                finite_difference_epsilon=self.config.gradient_method.finite_difference_epsilon,
+            )
+        elif self.config.gradient_method.type == "smart_gradient":
+            self.gradient_method = SmartGradient(
+                basis_size=self.model.n_hyperparameters,
+                finite_difference_epsilon=self.config.gradient_method.finite_difference_epsilon,
+                diagonal_noise=self.config.gradient_method.diagonal_noise,
+                scaling_threshold=self.config.gradient_method.scaling_threshold,
+            )
+        else:
+            raise ValueError(
+                f"Unknown gradient method type: {self.config.gradient_method.type}"
+            )
+
         # --- Set up recurrent variables
         self.gradient_f = xp.zeros(self.model.n_hyperparameters, dtype=xp.float64)
         self.f_values_i = xp.zeros(self.n_f_evaluations, dtype=xp.float64)
+        self.gradient_basis = xp.eye(self.model.n_hyperparameters, dtype=xp.float64)
-        self.gradient_basis = xp.eye(self.model.n_hyperparameters, dtype=xp.float64)
-        self.gradient_basis = xp.eye(self.model.n_hyperparameters, dtype=xp.float64)
         self.eps_mat = xp.zeros(
             (self.model.n_hyperparameters, self.model.n_hyperparameters),
             dtype=xp.float64,
@@ -336,7 +356,7 @@ def run(self) -> dict:
         }
         synchronize(comm=self.comm_world)
         toc = time.perf_counter()
-        print_msg(f"DALIA inference took: {toc - tic:0.4f} (s)", flush = True)
+        print_msg(f"DALIA inference took: {toc - tic:0.4f} (s)", flush=True)
         return results
 
     def minimize(self) -> optimize.OptimizeResult:
@@ -563,15 +583,9 @@ def _objective_function(
         for i in range(self.n_f_evaluations):
             task_mapping.append(i % n_feval_comm)
 
-        # Initialize central difference scheme matrix
-        self.eps_mat[:] = self.eps_gradient_f * xp.eye(self.model.n_hyperparameters)
-        self.theta_mat[:] = xp.repeat(
-            get_device(theta_i).reshape(-1, 1), self.n_f_evaluations, axis=1
+        self.gradient_method.get_evaluation_directions(
+            direction_matrix=self.theta_mat, theta=theta_i
         )
-        self.theta_mat[:, 1 : 1 + self.model.n_hyperparameters] += self.eps_mat
-        self.theta_mat[
-            :, self.model.n_hyperparameters + 1 : self.n_f_evaluations
-        ] -= self.eps_mat
 
         # Proceed to the parallel function evaluation
         for feval_i in range(self.n_f_evaluations - 1, -1, -1):
@@ -593,11 +607,9 @@ def _objective_function(
         synchronize(comm=self.comm_world)
 
         # Compute gradient using central difference scheme
-        for i in range(self.model.n_hyperparameters):
-            self.gradient_f[i] = (
-                self.f_values_i[i + 1]
-                - self.f_values_i[self.model.n_hyperparameters + i + 1]
-            ) / (2 * self.eps_gradient_f)
+        self.gradient_method.compute_gradient(
+            function_evaluations=self.f_values_i, gradient=self.gradient_f
+        )
 
         f_0 = get_host(self.f_values_i[0])
         grad_f = get_host(self.gradient_f)
@@ -794,11 +806,15 @@ def compute_covariance_hp(self, theta_i: NDArray) -> NDArray:
         #     flush=True,
         # )
         cov_theta = xp.linalg.inv(hess_theta)
-        synchronize(comm=self.comm_world)     
+        synchronize(comm=self.comm_world)
         toc = time.perf_counter()
         t_covariance_hp = toc - tic
-        print_msg("Time to compute covariance of hyperparameters:", t_covariance_hp, flush=True)
-
+        print_msg(
+            "Time to compute covariance of hyperparameters:",
+            t_covariance_hp,
+            flush=True,
+        )
+
         return cov_theta
 
     def _evaluate_hessian_f(

diff --git a/src/dalia/core/gradient_method.py b/src/dalia/core/gradient_method.py
@@ -0,0 +1,59 @@
+from abc import ABC, abstractmethod
+
+from dalia import xp
+
+
+class GradientMethod(ABC):
+    """Core class for gradient computation methods."""
+
+    def __init__(self, basis_size, finite_difference_epsilon) -> None:
+        """Initialize the gradient computation method.
+
+        Parameters
+        ----------
+        basis_size : int
+            The size of the basis for finite differences.
+        finite_difference_epsilon : float
+            The epsilon value for finite difference computations.
+
+        Returns
+        -------
+        None
+        """
+        self.basis_size = basis_size
+        self.basis = xp.identity(self.basis_size, dtype=xp.float64)
+        self.finite_difference_epsilon = finite_difference_epsilon
+
+    @abstractmethod
+    def get_evaluation_directions(self, direction_matrix, theta) -> None:
+        """Get the evaluation directions for the gradient computation.
+
+        Parameters
+        ----------
+        direction_matrix : xp.ndarray
+            The matrix to store the evaluation directions.
+        theta : xp.ndarray
+            The current (hyper)parameter values.
+
+        Returns
+        -------
+        None
+        """
+        ...
+
+    @abstractmethod
+    def compute_gradient(self, function_evaluations, gradient) -> None:
+        """Compute the gradient using finite differences.
+
+        Parameters
+        ----------
+        function_evaluations : xp.ndarray
+            The function evaluations at the current and perturbed points.
+        gradient : xp.ndarray
+            The array to store the computed gradient.
+
+        Returns
+        -------
+        None
+        """
+        ...
diff --git a/src/dalia/gradient_methods/__init__.py b/src/dalia/gradient_methods/__init__.py
@@ -0,0 +1,6 @@
+# Copyright 2024-2025 DALIA authors. All rights reserved.
+
+from dalia.gradient_methods.vanilla_gradient import VanillaGradient
+from dalia.gradient_methods.smart_gradient import SmartGradient
+
+__all__ = ["VanillaGradient", "SmartGradient"]