Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions src/dalia/configs/dalia_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@

from pydantic import BaseModel, ConfigDict, PositiveInt

from dalia.configs.gradient_method_config import (
GradientMethodConfig,
SmartGradientConfig,
VanillaGradientConfig,
)


class SolverConfig(BaseModel):
model_config = ConfigDict(extra="forbid")
Expand All @@ -20,15 +26,16 @@ class BFGSConfig(BaseModel):

max_iter: PositiveInt = 100
jac: bool = True

maxcor: PositiveInt = 10 # maximum number of past gradient vectors to store -> good default: dim(theta)
maxls: PositiveInt = 20 # maximum number of line search iterations

maxcor: PositiveInt = (
10 # maximum number of past gradient vectors to store -> good default: dim(theta)
)
maxls: PositiveInt = 20 # maximum number of line search iterations

gtol: float = 1e-1
# c1: float = 1e-4 # only relevant for BFGS not for L-BFGS-B
# c2: float = 0.9 # only relevant for BFGS not for L-BFGS-B
disp: bool = False



class DaliaConfig(BaseModel):
Expand All @@ -37,11 +44,12 @@ class DaliaConfig(BaseModel):
# --- Simulation parameters ------------------------------------------------
solver: SolverConfig = SolverConfig()
minimize: BFGSConfig = BFGSConfig()
gradient_method: GradientMethodConfig = SmartGradientConfig()

# exit BFGS early if the reduction in the objective function is less than f_reduction_tol after f_reduction_lag iterations
f_reduction_lag: int = 3
f_reduction_tol: float = 1e-4

# exit BFGS early if the change in theta is less than theta_reduction_tol after theta_reduction_lag iterations
theta_reduction_lag: int = 3
theta_reduction_tol: float = 1e-4
Expand Down
26 changes: 26 additions & 0 deletions src/dalia/configs/gradient_method_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from abc import ABC
from typing import Literal

from pydantic import BaseModel, ConfigDict


class GradientMethodConfig(BaseModel, ABC):
model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)

# Input folder for this specific submodel
type: Literal["vanilla_gradient", "smart_gradient"] = None

finite_difference_epsilon: float = 1e-3


class VanillaGradientConfig(GradientMethodConfig):
type: Literal["vanilla_gradient"] = "vanilla_gradient"


class SmartGradientConfig(GradientMethodConfig):
type: Literal["smart_gradient"] = "smart_gradient"

# The diagonal noise ensure to avoid singularities in the QR decomposition
diagonal_noise: float = 1e-8
# Threshold below which scaling is not performed
scaling_threshold: float = 1e-12
50 changes: 33 additions & 17 deletions src/dalia/core/dalia.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from dalia.configs.dalia_config import DaliaConfig
from dalia.core.model import Model
from dalia.solvers import DenseSolver, DistSerinvSolver, SerinvSolver, SparseSolver
from dalia.gradient_methods import VanillaGradient, SmartGradient
from dalia.utils import (
DummyCommunicator,
add_str_header,
Expand Down Expand Up @@ -190,9 +191,28 @@ def __init__(
nccl_comm=self.nccl_comm,
)

# --- Initialize Gradient Method
if self.config.gradient_method.type == "vanilla_gradient":
self.gradient_method = VanillaGradient(
basis_size=self.model.n_hyperparameters,
finite_difference_epsilon=self.config.gradient_method.finite_difference_epsilon,
)
elif self.config.gradient_method.type == "smart_gradient":
self.gradient_method = SmartGradient(
basis_size=self.model.n_hyperparameters,
finite_difference_epsilon=self.config.gradient_method.finite_difference_epsilon,
diagonal_noise=self.config.gradient_method.diagonal_noise,
scaling_threshold=self.config.gradient_method.scaling_threshold,
)
else:
raise ValueError(
f"Unknown gradient method type: {self.config.gradient_method.type}"
)

# --- Set up recurrent variables
self.gradient_f = xp.zeros(self.model.n_hyperparameters, dtype=xp.float64)
self.f_values_i = xp.zeros(self.n_f_evaluations, dtype=xp.float64)
self.gradient_basis = xp.eye(self.model.n_hyperparameters, dtype=xp.float64)

Copilot AI Sep 11, 2025

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The self.gradient_basis variable is defined but never used in the updated code. This appears to be leftover from the previous implementation and should be removed.

Suggested change
self.gradient_basis = xp.eye(self.model.n_hyperparameters, dtype=xp.float64)

Copilot uses AI. Check for mistakes.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

k = self.basis_size
eps = self.finite_difference_epsilon
deltas = eps * self.basis

direction_matrix[:, 0] = theta_dev
direction_matrix[:, 1:1+k] = deltas
direction_matrix[:, 1+k:1+2*k] = -deltas

for j in range(1, direction_matrix.shape[1]):
direction_matrix[:, j] = self._transformed_fun(phi=direction_matrix[:, j])

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think copilot might actually be right on this one. I didn't see it being used somewhere else either.

self.eps_mat = xp.zeros(
(self.model.n_hyperparameters, self.model.n_hyperparameters),
dtype=xp.float64,
Expand Down Expand Up @@ -336,7 +356,7 @@ def run(self) -> dict:
}
synchronize(comm=self.comm_world)
toc = time.perf_counter()
print_msg(f"DALIA inference took: {toc - tic:0.4f} (s)", flush = True)
print_msg(f"DALIA inference took: {toc - tic:0.4f} (s)", flush=True)
return results

def minimize(self) -> optimize.OptimizeResult:
Expand Down Expand Up @@ -563,15 +583,9 @@ def _objective_function(
for i in range(self.n_f_evaluations):
task_mapping.append(i % n_feval_comm)

# Initialize central difference scheme matrix
self.eps_mat[:] = self.eps_gradient_f * xp.eye(self.model.n_hyperparameters)
self.theta_mat[:] = xp.repeat(
get_device(theta_i).reshape(-1, 1), self.n_f_evaluations, axis=1
self.gradient_method.get_evaluation_directions(
direction_matrix=self.theta_mat, theta=theta_i
)
self.theta_mat[:, 1 : 1 + self.model.n_hyperparameters] += self.eps_mat
self.theta_mat[
:, self.model.n_hyperparameters + 1 : self.n_f_evaluations
] -= self.eps_mat

# Proceed to the parallel function evaluation
for feval_i in range(self.n_f_evaluations - 1, -1, -1):
Expand All @@ -593,11 +607,9 @@ def _objective_function(
synchronize(comm=self.comm_world)

# Compute gradient using central difference scheme
for i in range(self.model.n_hyperparameters):
self.gradient_f[i] = (
self.f_values_i[i + 1]
- self.f_values_i[self.model.n_hyperparameters + i + 1]
) / (2 * self.eps_gradient_f)
self.gradient_method.compute_gradient(
function_evaluations=self.f_values_i, gradient=self.gradient_f
)

f_0 = get_host(self.f_values_i[0])
grad_f = get_host(self.gradient_f)
Expand Down Expand Up @@ -794,11 +806,15 @@ def compute_covariance_hp(self, theta_i: NDArray) -> NDArray:
# flush=True,
# )
cov_theta = xp.linalg.inv(hess_theta)
synchronize(comm=self.comm_world)
synchronize(comm=self.comm_world)
toc = time.perf_counter()
t_covariance_hp = toc - tic
print_msg("Time to compute covariance of hyperparameters:", t_covariance_hp, flush=True)

print_msg(
"Time to compute covariance of hyperparameters:",
t_covariance_hp,
flush=True,
)

return cov_theta

def _evaluate_hessian_f(
Expand Down
59 changes: 59 additions & 0 deletions src/dalia/core/gradient_method.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from abc import ABC, abstractmethod

from dalia import xp


class GradientMethod(ABC):
"""Core class for gradient computation methods."""

def __init__(self, basis_size, finite_difference_epsilon) -> None:
"""Initialize the gradient computation method.

Parameters
----------
basis_size : int
The size of the basis for finite differences.
finite_difference_epsilon : float
The epsilon value for finite difference computations.

Returns
-------
None
"""
self.basis_size = basis_size
self.basis = xp.identity(self.basis_size, dtype=xp.float64)
self.finite_difference_epsilon = finite_difference_epsilon

@abstractmethod
def get_evaluation_directions(self, direction_matrix, theta) -> None:
"""Get the evaluation directions for the gradient computation.

Parameters
----------
direction_matrix : xp.ndarray
The matrix to store the evaluation directions.
theta : xp.ndarray
The current (hyper)parameter values.

Returns
-------
None
"""
...

@abstractmethod
def compute_gradient(self, function_evaluations, gradient) -> None:
"""Compute the gradient using finite differences.

Parameters
----------
function_evaluations : xp.ndarray
The function evaluations at the current and perturbed points.
gradient : xp.ndarray
The array to store the computed gradient.

Returns
-------
None
"""
...
6 changes: 6 additions & 0 deletions src/dalia/gradient_methods/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Copyright 2024-2025 DALIA authors. All rights reserved.

from dalia.gradient_methods.vanilla_gradient import VanillaGradient
from dalia.gradient_methods.smart_gradient import SmartGradient

__all__ = ["VanillaGradient", "SmartGradient"]
Loading