diff --git a/src/dalia/configs/likelihood_config.py b/src/dalia/configs/likelihood_config.py index 74f48ecd..c472ade7 100644 --- a/src/dalia/configs/likelihood_config.py +++ b/src/dalia/configs/likelihood_config.py @@ -18,6 +18,7 @@ class LikelihoodConfig(BaseModel, ABC): model_config = ConfigDict(extra="forbid") type: Literal["gaussian", "poisson", "binomial"] = None + method: Literal["exact", "finite_difference"] = "exact" # TODO: cleaner way to let user fix hyperparameters fix_hyperparameters: bool = False diff --git a/src/dalia/core/likelihood.py b/src/dalia/core/likelihood.py index 27e13adb..0f713aa6 100644 --- a/src/dalia/core/likelihood.py +++ b/src/dalia/core/likelihood.py @@ -19,13 +19,46 @@ def __init__( self.config = config self.n_observations = n_observations + def gradient_likelihood(self, eta, y, h=1e-4, **kwargs): + if self.config.method == "exact": + return self.evaluate_gradient_likelihood(eta, y, **kwargs) + elif self.config.method == "finite_difference": + grad = self.finite_difference_gradient_likelihood(eta, y, h, **kwargs) + return grad + else: + raise NotImplementedError(f"Method {self.config.method} not implemented.") + # ref = self.evaluate_gradient_likelihood(eta, y, **kwargs) + # grad = self.finite_difference_gradient_likelihood(eta, y, h, **kwargs) + # assert xp.allclose(ref, grad), f"Gradient mismatch: {ref} vs {grad}" + # return grad + + def hessian_likelihood(self, h: float = 1e-2, **kwargs): + if self.config.method == "exact": + return self.evaluate_hessian_likelihood(**kwargs) + elif self.config.method == "finite_difference": + kwargs = kwargs or {} + kwargs["h"] = h + hess = self.finite_difference_hessian_likelihood(**kwargs) + return hess + else: + raise NotImplementedError(f"Method {self.config.method} not implemented.") + # ref = self.evaluate_hessian_likelihood(**kwargs) + # ref_diag = ref.diagonal() + # kwargs = kwargs or {} + # kwargs["h"] = 1e-3 + # hess = self.finite_difference_hessian_likelihood(**kwargs) + # rel_error = xp.linalg.norm(ref_diag - hess) / xp.linalg.norm(ref_diag) + # if not xp.allclose(ref_diag, hess): + # print(f"Hessian mismatch: {rel_error}") + # return hess + @abstractmethod def evaluate_likelihood( self, eta: NDArray, y: NDArray, **kwargs, - ) -> float: + ) -> NDArray: """Evaluate the likelihood. Parameters @@ -45,7 +78,33 @@ def evaluate_likelihood( """ pass - @abstractmethod + def evaluate_sum_likelihood( + self, + eta: NDArray, + y: NDArray, + **kwargs, + ) -> float: + """Evaluate the sum of the likelihood over all observations. + + Parameters + ---------- + eta : NDArray + Vector of the linear predictor. + y : NDArray + Vector of the observations. + kwargs : + theta : float + Specific parameter for the likelihood calculation. + + Returns + ------- + sum_likelihood : float + Sum of the likelihood over all observations. + """ + likelihood = self.evaluate_likelihood(eta, y, **kwargs) + sum_likelihood = float(likelihood.sum()) + return sum_likelihood + def evaluate_gradient_likelihood( self, eta: NDArray, @@ -56,10 +115,10 @@ def evaluate_gradient_likelihood( Parameters ---------- - y : NDArray - Vector of the observations. eta : NDArray Vector of the linear predictor. + y : NDArray + Vector of the observations. **kwargs : optional Hyperparameters for likelihood. @@ -68,11 +127,50 @@ def evaluate_gradient_likelihood( gradient_likelihood : NDArray Gradient of the likelihood. """ - pass + self.finite_difference_gradient_likelihood(eta, y, **kwargs) + + def finite_difference_gradient_likelihood( + self, + eta: NDArray, + y: NDArray, + h: float = 1e-4, + **kwargs, + ) -> NDArray: + """Evaluate the finite difference gradient of the likelihood wrt to eta = Ax. + + Parameters + ---------- + eta : NDArray + Vector of the linear predictor. + y : NDArray + Vector of the observations. + h : float + Finite difference step size. + **kwargs : optional + Hyperparameters for likelihood. + + Returns + ------- + finite_difference_gradient : NDArray + Finite difference gradient of the likelihood. + + Notes + ----- + The Gradient of the likelihood is computed using a five-point stencil as follows: + + .. math:: \grad{f}=\frac{-f(x+2h) + 8f(x+h) - 8f(x-h) + f(x-2h)}{12h} + """ + f1 = self.evaluate_likelihood(eta + h, y, **kwargs) + f2 = self.evaluate_likelihood(eta + 2 * h, y, **kwargs) + b1 = self.evaluate_likelihood(eta - h, y, **kwargs) + b2 = self.evaluate_likelihood(eta - 2 * h, y, **kwargs) + grad = (-f2 + 8 * f1 - 8 * b1 + b2) / (12 * h) + return grad - @abstractmethod def evaluate_hessian_likelihood( self, + eta: NDArray, + y: NDArray, **kwargs, ) -> ArrayLike: """Evaluate the Hessian of the likelihood wrt to eta = Ax. @@ -86,10 +184,48 @@ def evaluate_hessian_likelihood( **kwargs : optional Hyperparameters for likelihood. - Returns ------- hessian_likelihood : ArrayLike Hessian of the likelihood. """ - pass + self.finite_difference_hessian_likelihood(eta, y, **kwargs) + + def finite_difference_hessian_likelihood( + self, + eta: NDArray, + y: NDArray, + h: float = 1e-2, + **kwargs, + ) -> NDArray: + """Evaluate the finite difference Hessian of the likelihood wrt to eta = Ax. + + Parameters + ---------- + eta : NDArray + Vector of the linear predictor. + y : NDArray + Vector of the observations. + h : float + Finite difference step size. + **kwargs : optional + Hyperparameters for likelihood. + + Returns + ------- + finite_difference_hessian : NDArray + Finite difference hessian of the likelihood. + + Notes + ----- + The Hessian of the likelihood is computed using a five-point stencil as follows: + + .. math:: \hess{f}=\frac{-f(x+2h) + 16f(x+h) - 30f(x) + 16f(x-h) - f(x-2h)}{12h^2} + """ + f1 = self.evaluate_likelihood(eta + h, y, **kwargs) + f2 = self.evaluate_likelihood(eta + 2 * h, y, **kwargs) + b1 = self.evaluate_likelihood(eta - h, y, **kwargs) + b2 = self.evaluate_likelihood(eta - 2 * h, y, **kwargs) + c = self.evaluate_likelihood(eta, y, **kwargs) + hessian = (-f2 + 16 * f1 - 30 * c + 16 * b1 - b2) / (12 * h * h) + return hessian diff --git a/src/dalia/core/model.py b/src/dalia/core/model.py index 91608d62..ede0e035 100644 --- a/src/dalia/core/model.py +++ b/src/dalia/core/model.py @@ -431,19 +431,30 @@ def construct_Q_conditional( d_matrix = self.submodels[0].evaluate_d_matrix(**kwargs) else: # General rules - d_matrix = self.likelihood.evaluate_hessian_likelihood(**kwargs) + kwargs["y"] = self.y + d_matrix = self.likelihood.hessian_likelihood(**kwargs) + + if d_matrix.ndim == 1: + d_matrix_diagonal_0 = d_matrix[0] + d_matrix = sp.sparse.diags(d_matrix) + elif d_matrix.ndim == 2: + d_matrix_diagonal_0 = d_matrix.diagonal()[0] + else: + raise ValueError("d_matrix must be 1D or 2D array.") # if self.a is sparse -> Q_conditional should be sparse, else dense if sp.sparse.issparse(self.a): if self.aTa is not None: - self.Q_conditional = self.Q_prior - d_matrix.diagonal()[0] * self.aTa + # self.Q_conditional = self.Q_prior - d_matrix.diagonal()[0] * self.aTa + self.Q_conditional = self.Q_prior - d_matrix_diagonal_0 * self.aTa else: self.Q_conditional = self.Q_prior - self.a.T @ d_matrix @ self.a # self.Q_conditional = self.Q_prior - self.a.T @ d_matrix @ self.a else: if self.aTa is not None: self.Q_conditional = ( - self.Q_prior.toarray() - d_matrix.diagonal()[0] * self.aTa + # self.Q_prior.toarray() - d_matrix.diagonal()[0] * self.aTa + self.Q_prior.toarray() - d_matrix_diagonal_0 * self.aTa ) else: self.Q_conditional = ( @@ -467,7 +478,7 @@ def construct_information_vector( ) else: - gradient_likelihood = self.likelihood.evaluate_gradient_likelihood( + gradient_likelihood = self.likelihood.gradient_likelihood( eta=eta, y=self.y, theta=self.theta[self.hyperparameters_idx[-1] :], @@ -540,7 +551,7 @@ def evaluate_likelihood(self, eta: NDArray, **kwargs) -> float: kwargs["h2"] = float(self.theta[0]) likelihood = self.submodels[0].evaluate_likelihood(eta, self.y, **kwargs) else: - likelihood = self.likelihood.evaluate_likelihood( + likelihood = self.likelihood.evaluate_sum_likelihood( eta, self.y, theta=self.theta[self.hyperparameters_idx[-1] :] ) diff --git a/src/dalia/likelihoods/binomial.py b/src/dalia/likelihoods/binomial.py index 53c2754d..9156352e 100644 --- a/src/dalia/likelihoods/binomial.py +++ b/src/dalia/likelihoods/binomial.py @@ -43,7 +43,7 @@ def evaluate_likelihood( eta: NDArray, y: NDArray, **kwargs, - ) -> float: + ) -> NDArray: """Evalutate the a binomial likelihood. Parameters @@ -64,9 +64,10 @@ def evaluate_likelihood( """ linkEta: NDArray = self.link_function(eta) - likelihood: float = xp.dot(y, xp.log(linkEta)) + xp.dot( - self.n_trials - y, xp.log(1 - linkEta) - ) + # likelihood: float = xp.dot(y, xp.log(linkEta)) + xp.dot( + # self.n_trials - y, xp.log(1 - linkEta) + # ) + likelihood = y * xp.log(linkEta) + (self.n_trials - y) * xp.log(1 - linkEta) return likelihood diff --git a/src/dalia/likelihoods/gaussian.py b/src/dalia/likelihoods/gaussian.py index ee69bd16..9997259a 100644 --- a/src/dalia/likelihoods/gaussian.py +++ b/src/dalia/likelihoods/gaussian.py @@ -21,7 +21,7 @@ def evaluate_likelihood( eta: NDArray, y: NDArray, **kwargs, - ) -> float: + ) -> NDArray: """Evaluate a Gaussian likelihood. Notes @@ -55,9 +55,8 @@ def evaluate_likelihood( yEta = eta - y # print("xp.exp(theta) in lh:", xp.exp(theta)) - likelihood: float = ( - 0.5 * theta * self.n_observations - 0.5 * xp.exp(theta) * yEta.T @ yEta - ) + likelihood = 0.5 * theta - 0.5 * xp.exp(theta) * yEta * yEta + return likelihood diff --git a/src/dalia/likelihoods/poisson.py b/src/dalia/likelihoods/poisson.py index 6fbea733..74e9679d 100644 --- a/src/dalia/likelihoods/poisson.py +++ b/src/dalia/likelihoods/poisson.py @@ -18,7 +18,7 @@ def __init__( config: PoissonLikelihoodConfig, ) -> None: """Initializes the Poisson likelihood.""" - super().__init__(config, n_observations) + super().__init__(n_observations, config) # Load the extra coeficients for Poisson likelihood try: @@ -37,8 +37,9 @@ def evaluate_likelihood( eta: NDArray, y: NDArray, **kwargs, - ) -> float: - likelihood: float = xp.dot(eta, y) - xp.sum(self.e * xp.exp(eta)) + ) -> NDArray: + # likelihood: float = xp.dot(eta, y) - xp.sum(self.e * xp.exp(eta)) + likelihood = eta * y - self.e * xp.exp(eta) return likelihood diff --git a/src/dalia/models/coregional_model.py b/src/dalia/models/coregional_model.py index 3f5717f7..3d9c5d33 100644 --- a/src/dalia/models/coregional_model.py +++ b/src/dalia/models/coregional_model.py @@ -630,9 +630,19 @@ def construct_Q_conditional( } # d_list[i] = model.likelihood.evaluate_hessian_likelihood(**kwargs) + kwargs["y"] = self.y[self.n_observations_idx[i] : self.n_observations_idx[i + 1]] + hessian = model.likelihood.hessian_likelihood(**kwargs) + if hessian.ndim == 1: + diag = hessian + elif hessian.ndim == 2: + diag = hessian.diagonal() + else: + raise ValueError( + "Hessian of the likelihood must be either 1D or 2D array." + ) d_vec[ self.n_observations_idx[i] : self.n_observations_idx[i + 1] - ] = model.likelihood.evaluate_hessian_likelihood(**kwargs).diagonal() + ] = diag self.Qconditional = self.custom_Q_ATDA( Q=self.Q_prior, @@ -653,7 +663,7 @@ def construct_information_vector( gradient_vector_list = [] for i, model in enumerate(self.models): - gradient_likelihood = model.likelihood.evaluate_gradient_likelihood( + gradient_likelihood = model.likelihood.gradient_likelihood( eta=eta[self.n_observations_idx[i] : self.n_observations_idx[i + 1]], y=self.y[self.n_observations_idx[i] : self.n_observations_idx[i + 1]], theta=float(self.theta[self.hyperparameters_idx[i + 1] - 1]), @@ -682,7 +692,7 @@ def evaluate_likelihood( ) -> float: likelihood: float = 0.0 for i, model in enumerate(self.models): - likelihood += model.likelihood.evaluate_likelihood( + likelihood += model.likelihood.evaluate_sum_likelihood( eta=eta[self.n_observations_idx[i] : self.n_observations_idx[i + 1]], y=self.y[self.n_observations_idx[i] : self.n_observations_idx[i + 1]], theta=float(self.theta[self.hyperparameters_idx[i + 1] - 1]), diff --git a/src/dalia/submodels/brainiac.py b/src/dalia/submodels/brainiac.py index 61e01bdf..b3154a61 100644 --- a/src/dalia/submodels/brainiac.py +++ b/src/dalia/submodels/brainiac.py @@ -98,6 +98,8 @@ def evaluate_likelihood(self, eta: NDArray, y: NDArray, **kwargs) -> float: return likelihood + # TODO/NOTE: Maybe specialize the Gaussian likelihood in its own class + # and have BrainiacSubModel use it as its likelihood? def evaluate_gradient_likelihood( self, eta: NDArray, y: NDArray, **kwargs ) -> NDArray: