diff --git a/src/dalia/configs/dalia_config.py b/src/dalia/configs/dalia_config.py
index 0dbbbec0..c8d2a015 100644
--- a/src/dalia/configs/dalia_config.py
+++ b/src/dalia/configs/dalia_config.py
@@ -6,6 +6,12 @@
 
 from pydantic import BaseModel, ConfigDict, PositiveInt
 
+from dalia.configs.gradient_method_config import (
+    GradientMethodConfig,
+    SmartGradientConfig,
+    VanillaGradientConfig,
+)
+
 
 class SolverConfig(BaseModel):
     model_config = ConfigDict(extra="forbid")
@@ -20,15 +26,16 @@ class BFGSConfig(BaseModel):
 
     max_iter: PositiveInt = 100
     jac: bool = True
-    
-    maxcor: PositiveInt = 10 # maximum number of past gradient vectors to store -> good default: dim(theta)
-    maxls: PositiveInt = 20 # maximum number of line search iterations
+
+    maxcor: PositiveInt = (
+        10  # maximum number of past gradient vectors to store -> good default: dim(theta)
+    )
+    maxls: PositiveInt = 20  # maximum number of line search iterations
 
     gtol: float = 1e-1
     # c1: float = 1e-4  # only relevant for BFGS not for L-BFGS-B
     # c2: float = 0.9  # only relevant for BFGS not for L-BFGS-B
     disp: bool = False
-    
 
 
 class DaliaConfig(BaseModel):
@@ -37,11 +44,12 @@ class DaliaConfig(BaseModel):
     # --- Simulation parameters ------------------------------------------------
     solver: SolverConfig = SolverConfig()
     minimize: BFGSConfig = BFGSConfig()
+    gradient_method: GradientMethodConfig = SmartGradientConfig()
 
     # exit BFGS early if the reduction in the objective function is less than f_reduction_tol after f_reduction_lag iterations
     f_reduction_lag: int = 3
     f_reduction_tol: float = 1e-4
-    
+
     # exit BFGS early if the change in theta is less than theta_reduction_tol after theta_reduction_lag iterations
     theta_reduction_lag: int = 3
     theta_reduction_tol: float = 1e-4
diff --git a/src/dalia/configs/gradient_method_config.py b/src/dalia/configs/gradient_method_config.py
new file mode 100644
index 00000000..a02443cb
--- /dev/null
+++ b/src/dalia/configs/gradient_method_config.py
@@ -0,0 +1,26 @@
+from abc import ABC
+from typing import Literal
+
+from pydantic import BaseModel, ConfigDict
+
+
+class GradientMethodConfig(BaseModel, ABC):
+    model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
+
+    # Input folder for this specific submodel
+    type: Literal["vanilla_gradient", "smart_gradient"] = None
+
+    finite_difference_epsilon: float = 1e-3
+
+
+class VanillaGradientConfig(GradientMethodConfig):
+    type: Literal["vanilla_gradient"] = "vanilla_gradient"
+
+
+class SmartGradientConfig(GradientMethodConfig):
+    type: Literal["smart_gradient"] = "smart_gradient"
+
+    # The diagonal noise ensure to avoid singularities in the QR decomposition
+    diagonal_noise: float = 1e-8
+    # Threshold below which scaling is not performed
+    scaling_threshold: float = 1e-12
diff --git a/src/dalia/core/dalia.py b/src/dalia/core/dalia.py
index 5756d086..cc4a6182 100644
--- a/src/dalia/core/dalia.py
+++ b/src/dalia/core/dalia.py
@@ -9,6 +9,7 @@
 from dalia.configs.dalia_config import DaliaConfig
 from dalia.core.model import Model
 from dalia.solvers import DenseSolver, DistSerinvSolver, SerinvSolver, SparseSolver
+from dalia.gradient_methods import VanillaGradient, SmartGradient
 from dalia.utils import (
     DummyCommunicator,
     add_str_header,
@@ -190,9 +191,28 @@ def __init__(
                     nccl_comm=self.nccl_comm,
                 )
 
+        # --- Initialize Gradient Method
+        if self.config.gradient_method.type == "vanilla_gradient":
+            self.gradient_method = VanillaGradient(
+                basis_size=self.model.n_hyperparameters,
+                finite_difference_epsilon=self.config.gradient_method.finite_difference_epsilon,
+            )
+        elif self.config.gradient_method.type == "smart_gradient":
+            self.gradient_method = SmartGradient(
+                basis_size=self.model.n_hyperparameters,
+                finite_difference_epsilon=self.config.gradient_method.finite_difference_epsilon,
+                diagonal_noise=self.config.gradient_method.diagonal_noise,
+                scaling_threshold=self.config.gradient_method.scaling_threshold,
+            )
+        else:
+            raise ValueError(
+                f"Unknown gradient method type: {self.config.gradient_method.type}"
+            )
+
         # --- Set up recurrent variables
         self.gradient_f = xp.zeros(self.model.n_hyperparameters, dtype=xp.float64)
         self.f_values_i = xp.zeros(self.n_f_evaluations, dtype=xp.float64)
+        self.gradient_basis = xp.eye(self.model.n_hyperparameters, dtype=xp.float64)
         self.eps_mat = xp.zeros(
             (self.model.n_hyperparameters, self.model.n_hyperparameters),
             dtype=xp.float64,
@@ -336,7 +356,7 @@ def run(self) -> dict:
         }
         synchronize(comm=self.comm_world)
         toc = time.perf_counter()
-        print_msg(f"DALIA inference took: {toc - tic:0.4f} (s)", flush = True)
+        print_msg(f"DALIA inference took: {toc - tic:0.4f} (s)", flush=True)
         return results
 
     def minimize(self) -> optimize.OptimizeResult:
@@ -563,15 +583,9 @@ def _objective_function(
         for i in range(self.n_f_evaluations):
             task_mapping.append(i % n_feval_comm)
 
-        # Initialize central difference scheme matrix
-        self.eps_mat[:] = self.eps_gradient_f * xp.eye(self.model.n_hyperparameters)
-        self.theta_mat[:] = xp.repeat(
-            get_device(theta_i).reshape(-1, 1), self.n_f_evaluations, axis=1
+        self.gradient_method.get_evaluation_directions(
+            direction_matrix=self.theta_mat, theta=theta_i
         )
-        self.theta_mat[:, 1 : 1 + self.model.n_hyperparameters] += self.eps_mat
-        self.theta_mat[
-            :, self.model.n_hyperparameters + 1 : self.n_f_evaluations
-        ] -= self.eps_mat
 
         # Proceed to the parallel function evaluation
         for feval_i in range(self.n_f_evaluations - 1, -1, -1):
@@ -593,11 +607,9 @@ def _objective_function(
         synchronize(comm=self.comm_world)
 
         # Compute gradient using central difference scheme
-        for i in range(self.model.n_hyperparameters):
-            self.gradient_f[i] = (
-                self.f_values_i[i + 1]
-                - self.f_values_i[self.model.n_hyperparameters + i + 1]
-            ) / (2 * self.eps_gradient_f)
+        self.gradient_method.compute_gradient(
+            function_evaluations=self.f_values_i, gradient=self.gradient_f
+        )
 
         f_0 = get_host(self.f_values_i[0])
         grad_f = get_host(self.gradient_f)
@@ -794,11 +806,15 @@ def compute_covariance_hp(self, theta_i: NDArray) -> NDArray:
         #     flush=True,
         # )
         cov_theta = xp.linalg.inv(hess_theta)
-        synchronize(comm=self.comm_world)     
+        synchronize(comm=self.comm_world)
         toc = time.perf_counter()
         t_covariance_hp = toc - tic
-        print_msg("Time to compute covariance of hyperparameters:", t_covariance_hp, flush=True)
-        
+        print_msg(
+            "Time to compute covariance of hyperparameters:",
+            t_covariance_hp,
+            flush=True,
+        )
+
         return cov_theta
 
     def _evaluate_hessian_f(
diff --git a/src/dalia/core/gradient_method.py b/src/dalia/core/gradient_method.py
new file mode 100644
index 00000000..3bf30bf1
--- /dev/null
+++ b/src/dalia/core/gradient_method.py
@@ -0,0 +1,59 @@
+from abc import ABC, abstractmethod
+
+from dalia import xp
+
+
+class GradientMethod(ABC):
+    """Core class for gradient computation methods."""
+
+    def __init__(self, basis_size, finite_difference_epsilon) -> None:
+        """Initialize the gradient computation method.
+
+        Parameters
+        ----------
+        basis_size : int
+            The size of the basis for finite differences.
+        finite_difference_epsilon : float
+            The epsilon value for finite difference computations.
+
+        Returns
+        -------
+        None
+        """
+        self.basis_size = basis_size
+        self.basis = xp.identity(self.basis_size, dtype=xp.float64)
+        self.finite_difference_epsilon = finite_difference_epsilon
+
+    @abstractmethod
+    def get_evaluation_directions(self, direction_matrix, theta) -> None:
+        """Get the evaluation directions for the gradient computation.
+
+        Parameters
+        ----------
+        direction_matrix : xp.ndarray
+            The matrix to store the evaluation directions.
+        theta : xp.ndarray
+            The current (hyper)parameter values.
+
+        Returns
+        -------
+        None
+        """
+        ...
+
+    @abstractmethod
+    def compute_gradient(self, function_evaluations, gradient) -> None:
+        """Compute the gradient using finite differences.
+
+        Parameters
+        ----------
+        function_evaluations : xp.ndarray
+            The function evaluations at the current and perturbed points.
+        gradient : xp.ndarray
+            The array to store the computed gradient.
+
+        Returns
+        -------
+        None
+        """
+        ...
diff --git a/src/dalia/gradient_methods/__init__.py b/src/dalia/gradient_methods/__init__.py
new file mode 100644
index 00000000..4e807656
--- /dev/null
+++ b/src/dalia/gradient_methods/__init__.py
@@ -0,0 +1,6 @@
+# Copyright 2024-2025 DALIA authors. All rights reserved.
+
+from dalia.gradient_methods.vanilla_gradient import VanillaGradient
+from dalia.gradient_methods.smart_gradient import SmartGradient
+
+__all__ = ["VanillaGradient", "SmartGradient"]
diff --git a/src/dalia/gradient_methods/smart_gradient.py b/src/dalia/gradient_methods/smart_gradient.py
new file mode 100644
index 00000000..71845da6
--- /dev/null
+++ b/src/dalia/gradient_methods/smart_gradient.py
@@ -0,0 +1,181 @@
+from numpy import random as rand
+
+from dalia import xp
+
+from dalia.core.gradient_method import GradientMethod
+from dalia.utils import get_device
+
+
+class SmartGradient(GradientMethod):
+    """Smart gradient computation method.
+
+    This method adaptively updates the basis used for finite difference
+    gradient estimation based on the changes in the (hyper)parameter values.
+    It employs QR decomposition to maintain an orthogonal basis and scales
+    the updates to ensure numerical stability.
+
+    References
+    ----------
+    .. [1] Esmail Abdul Fattah, Janet Van Niekerk, Håvard Rue.
+        Smart Gradient - An adaptive technique for improving
+        gradient estimation. Foundations of Data Science, 2022,
+        4(1): 123-136. doi: 10.3934/fods.2021037
+    """
+
+    def __init__(
+        self,
+        basis_size,
+        finite_difference_epsilon,
+        diagonal_noise=1e-8,
+        scaling_threshold=1e-12,
+    ) -> None:
+        """Initialize the gradient computation method.
+
+        Parameters
+        ----------
+        basis_size : int
+            The size of the basis for finite differences.
+        finite_difference_epsilon : float
+            The epsilon value for finite difference computations.
+        diagonal_noise : float
+            The diagonal noise to avoid singularities in the QR decomposition.
+        scaling_threshold : float
+            The threshold below which scaling is not performed.
+
+        Returns
+        -------
+        None
+        """
+        super().__init__(
+            basis_size=basis_size, finite_difference_epsilon=finite_difference_epsilon
+        )
+        self.diagonal_noise = diagonal_noise
+        self.scaling_threshold = scaling_threshold
+        self.temp_basis = xp.identity(self.basis_size)
+        self.prev_theta = xp.zeros(self.basis_size)
+        self.curr_theta = xp.zeros(self.basis_size)
+        self.count = 0
+        self.rng = rand.default_rng()
+
+    def _scale(self, x) -> xp.ndarray:
+        """Scale the input vector.
+
+        Parameters
+        ----------
+        x : xp.ndarray
+            The input vector to scale.
+
+        Returns
+        -------
+        xp.ndarray
+            The scaled output vector.
+        """
+        mean = xp.mean(x)
+        std = xp.std(x, ddof=1)
+        if std < self.scaling_threshold:
+            return x - mean
+        return (x - mean) / std
+
+    def _update_basis(self, current_theta) -> None:
+        """Update the basis using the current theta.
+
+        Parameters
+        ----------
+        current_theta : xp.ndarray
+            The current (hyper)parameter values.
+
+        Returns
+        -------
+        None
+        """
+        self.curr_theta = current_theta
+        self.temp_basis = xp.roll(self.temp_basis, 1, axis=1)
+        xdiff = current_theta - self.prev_theta
+        xdiff += get_device(self.rng.normal(0.0, self.diagonal_noise, self.basis_size))
+        self.temp_basis[:, 0] = self._scale(xdiff)
+        self.basis = self.temp_basis
+
+    def _orthogonalize_basis(self) -> None:
+        """Orthogonalize the basis using QR decomposition.
+
+        Parameters
+        ----------
+        None
+
+        Returns
+        -------
+        None
+        """
+        try:
+            Q, R = xp.linalg.qr(self.basis)
+            self.basis = Q
+        except xp.linalg.LinAlgError:
+            print(
+                "Warning: QR decomposition failed. Resetting G to identity.", flush=True
+            )
+            self.basis = xp.identity(self.basis_size)
+
+    def get_evaluation_directions(self, direction_matrix, theta) -> None:
+        """Get the evaluation directions for the gradient computation.
+
+        Parameters
+        ----------
+        direction_matrix : xp.ndarray
+            The matrix to store the evaluation directions.
+        theta : xp.ndarray
+            The current (hyper)parameter values.
+
+        Returns
+        -------
+        None
+        """
+        theta_dev = get_device(theta)
+        direction_matrix.fill(0.0)
+
+        if self.count > 0:
+            self._update_basis(theta_dev)
+        else:
+            self.curr_theta = theta_dev  # Set the starting point
+
+        self._orthogonalize_basis()
+
+        self.prev_theta = xp.copy(theta_dev)
+        self.count += 1
+
+        # First column is the current theta
+        direction_matrix[:, 0] = theta_dev
+        # Next basis_size columns are + epsilon * e_i
+        direction_matrix[
+            :, 1 : 1 + self.basis_size
+        ] += self.finite_difference_epsilon * self.basis + xp.repeat(
+            theta_dev.reshape(-1, 1), self.basis_size, 1
+        )
+        # Next basis_size columns are - epsilon * e_i
+        direction_matrix[
+            :, self.basis_size + 1 :
+        ] -= self.finite_difference_epsilon * self.basis - xp.repeat(
+            theta_dev.reshape(-1, 1), self.basis_size, 1
+        )
+
+    def compute_gradient(self, function_evaluations, gradient) -> None:
+        """Compute the gradient using finite differences.
+
+        Parameters
+        ----------
+        function_evaluations : xp.ndarray
+            The function evaluations at the current and perturbed points.
+        gradient : xp.ndarray
+            The array to store the computed gradient.
+
+        Returns
+        -------
+        None
+        """
+        for i in range(self.basis_size):
+            gradient[i] = (
+                function_evaluations[i + 1]
+                - function_evaluations[self.basis_size + i + 1]
+            ) / (2.0 * self.finite_difference_epsilon)
+
+        # Transform back the gradient into the original basis
+        gradient[:] = xp.linalg.solve(self.basis.T, gradient)
diff --git a/src/dalia/gradient_methods/vanilla_gradient.py b/src/dalia/gradient_methods/vanilla_gradient.py
new file mode 100644
index 00000000..20bdabde
--- /dev/null
+++ b/src/dalia/gradient_methods/vanilla_gradient.py
@@ -0,0 +1,78 @@
+from dalia.core.gradient_method import GradientMethod
+from dalia import xp
+
+from dalia.utils import get_device
+
+
+class VanillaGradient(GradientMethod):
+    """Vanilla finite difference gradient computation method."""
+
+    def __init__(self, basis_size, finite_difference_epsilon) -> None:
+        """Initialize the gradient computation method.
+
+        Parameters
+        ----------
+        basis_size : int
+            The size of the basis for finite differences.
+        finite_difference_epsilon : float
+            The epsilon value for finite difference computations.
+
+        Returns
+        -------
+        None
+        """
+        super().__init__(
+            basis_size=basis_size, finite_difference_epsilon=finite_difference_epsilon
+        )
+
+    def get_evaluation_directions(self, direction_matrix, theta) -> None:
+        """Get the evaluation directions for the gradient computation.
+
+        Parameters
+        ----------
+        direction_matrix : xp.ndarray
+            The matrix to store the evaluation directions.
+        theta : xp.ndarray
+            The current (hyper)parameter values.
+
+        Returns
+        -------
+        None
+        """
+        theta_dev = get_device(theta)
+        direction_matrix.fill(0.0)
+
+        # First column is the current theta
+        direction_matrix[:, 0] = theta_dev
+        # Next basis_size columns are + epsilon * e_i
+        direction_matrix[
+            :, 1 : self.basis_size + 1
+        ] += self.finite_difference_epsilon * self.basis + xp.repeat(
+            theta_dev.reshape(-1, 1), self.basis_size, 1
+        )
+        # Next basis_size columns are - epsilon * e_i
+        direction_matrix[
+            :, 1 + self.basis_size :
+        ] -= self.finite_difference_epsilon * self.basis - xp.repeat(
+            theta_dev.reshape(-1, 1), self.basis_size, 1
+        )
+
+    def compute_gradient(self, function_evaluations, gradient) -> None:
+        """Compute the gradient using finite differences.
+
+        Parameters
+        ----------
+        function_evaluations : xp.ndarray
+            The function evaluations at the current and perturbed points.
+        gradient : xp.ndarray
+            The array to store the computed gradient.
+
+        Returns
+        -------
+        None
+        """
+        for i in range(self.basis_size):
+            gradient[i] = (
+                function_evaluations[i + 1]
+                - function_evaluations[self.basis_size + i + 1]
+            ) / (2.0 * self.finite_difference_epsilon)