From 1f2a204286c91121e4f0c6a905c9c18d6f6249b8 Mon Sep 17 00:00:00 2001 From: lisa-gm Date: Thu, 24 Jul 2025 17:31:10 +0300 Subject: [PATCH 01/26] added check consistency function. bcast somehow not working for cupy the way im calling it now --- src/dalia/core/dalia.py | 28 ++++++++++++++ src/dalia/utils/__init__.py | 2 + src/dalia/utils/multiprocessing.py | 61 ++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+) diff --git a/src/dalia/core/dalia.py b/src/dalia/core/dalia.py index ac06436f..73d8ba76 100644 --- a/src/dalia/core/dalia.py +++ b/src/dalia/core/dalia.py @@ -26,6 +26,7 @@ smartsplit, synchronize, synchronize_gpu, + check_vector_consistency ) if backend_flags["mpi_avail"]: @@ -304,9 +305,17 @@ def run(self) -> dict: theta_star = get_device(minimization_result["theta"]) x_star = get_device(minimization_result["x"]) + # need to update theta_star and x_star to be the same across all ranks + theta_star[:] = self.comm_world.bcast(theta_star, root=0) + x_star[:] = self.comm_world.bcast(x_star, root=0) + # compute covariance of the hyperparameters theta at the mode cov_theta = self.compute_covariance_hp(theta_star) + # need to update theta_star and x_star to be the same across all ranks + theta_star[:] = self.comm_world.bcast(theta_star, root=0) + x_star[:] = self.comm_world.bcast(x_star, root=0) + # compute marginal variances of the latent parameters marginal_variances_latent = self.get_marginal_variances_latent_parameters( theta_star, x_star @@ -349,6 +358,12 @@ def minimize(self) -> optimize.OptimizeResult: Result of the optimization procedure. """ + # ensure that all ranks are initialized to the same theta + check_vector_consistency( + self.model.theta, + comm=self.comm_world, + ) + if len(self.model.theta) == 0: # Only run the inner iteration print_msg("No hyperparameters, just running inner iteration.") @@ -777,6 +792,13 @@ def compute_covariance_hp(self, theta_i: NDArray) -> NDArray: cov_theta : NDArray[dim_theta, dim_theta] Covariance matrix of the hyperparameters theta. """ + + # ensure that all ranks are initialized to the same theta + check_vector_consistency( + theta_i, + comm=self.comm_world, + ) + self.model.theta[:] = theta_i print_msg( f"Computing covariance of hyperparameters theta at {theta_i}.", @@ -982,6 +1004,9 @@ def get_marginal_variances_latent_parameters( raise ValueError( "BOTH or NEITHER theta and x_star must be provided to compute the marginal variances." ) + + check_vector_consistency(theta, comm=self.comm_world) + check_vector_consistency(x_star, comm=self.comm_world) # check order x_star ... -> potentially need to reorder marginal variances self._compute_covariance_latent_parameters(theta, x_star) @@ -1018,6 +1043,9 @@ def get_marginal_variances_observations( Marginal variances of the observations. """ + check_vector_consistency(theta, comm=self.comm_world) + check_vector_consistency(x_star, comm=self.comm_world) + if self.model.is_likelihood_gaussian(): # TODO: this should be only called by rank 0? if theta is None and x_star is None: diff --git a/src/dalia/utils/__init__.py b/src/dalia/utils/__init__.py index b1a25fe8..10cdf25e 100644 --- a/src/dalia/utils/__init__.py +++ b/src/dalia/utils/__init__.py @@ -21,6 +21,7 @@ smartsplit, synchronize, synchronize_gpu, + check_vector_consistency, DummyCommunicator, ) from dalia.utils.spmatrix_utils import bdiag_tiling, extract_diagonal, memory_footprint @@ -44,6 +45,7 @@ "allreduce", "allgather", "bcast", + "check_vector_consistency", "bdiag_tiling", "extract_diagonal", "memory_footprint", diff --git a/src/dalia/utils/multiprocessing.py b/src/dalia/utils/multiprocessing.py index 9ceeb6d6..f46a2637 100644 --- a/src/dalia/utils/multiprocessing.py +++ b/src/dalia/utils/multiprocessing.py @@ -134,6 +134,9 @@ def bcast( comm (CommunicatorType), optional: The communication group. Default is MPI.COMM_WORLD. """ + + print("Broadcasting data from root:", root, "to all processes. data :", data) + if backend_flags["mpi_avail"]: comm.Bcast(data, root=root) @@ -209,3 +212,61 @@ def smartsplit( color_new_group = 0 return active_comm, comm_new_group, color_new_group + +def check_vector_consistency( + theta: ArrayLike, + comm, +): + """ + Check if all processes have the same theta. + + Parameters: + ----------- + theta (ArrayLike): + The theta to check. + comm (CommunicatorType), optional: + The communication group. Default is MPI.COMM_WORLD. + """ + + synchronize(comm = comm) + + theta_ref = theta.copy() + bcast(theta_ref, root=0, comm=comm) + + # theta_host = get_host(theta) + # theta_ref_host = get_host(theta_ref) + + # if not np.array_equal(theta_host, theta_ref_host): + # norm_diff = np.linalg.norm(theta_host - theta_ref_host) + # raise ValueError( + # f"Process {comm.Get_rank()} has a different theta than the reference process." + # f" Expected: {theta_ref_host}, but got: {theta_host}. diff = {norm_diff:.4e}" + # ) + + array_module_name = get_array_module_name(theta) + if array_module_name == "cupy": + norm_diff = cp.linalg.norm(theta - theta_ref) + else: + norm_diff = np.linalg.norm(theta - theta_ref) + + + if norm_diff > 1e-10: + raise ValueError( + f"Process {comm.Get_rank()} has a different theta than the reference process." + f" Expected: {theta_ref}, but got: {theta}. diff = {norm_diff:.4e}" + ) + + +if __name__ == "__main__": + + # Initialize MPI + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + mpi_size = comm.Get_size() + + # Create a vector, intentionally make rank 1 different + theta = np.ones(5) + if backend_flags["mpi_avail"] and rank == 1: + theta[0] = 42 # Make it inconsistent on rank 1 + + check_vector_consistency(theta, comm) From 5a412575140d80d7bc367ebcb17dbe1cdb6f0045 Mon Sep 17 00:00:00 2001 From: Lisa Gaedke Date: Tue, 9 Sep 2025 14:41:25 +0200 Subject: [PATCH 02/26] first steps in the direction of ar1 --- examples/ar1/run.py | 47 +++++++++++++++++++ src/dalia/configs/submodels_config.py | 13 ++++++ src/dalia/submodels/ar1.py | 67 +++++++++++++++++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 examples/ar1/run.py create mode 100644 src/dalia/submodels/ar1.py diff --git a/examples/ar1/run.py b/examples/ar1/run.py new file mode 100644 index 00000000..ce8d741a --- /dev/null +++ b/examples/ar1/run.py @@ -0,0 +1,47 @@ +import os +import sys + +import numpy as np +import scipy.sparse as sp + +from scipy.stats import multivariate_normal, poisson + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + +if __name__ == "__main__": + + n = 5 + + s2 = 1 + phi = np.sqrt(0.5) + denom = s2 * (1 - phi**2) + + theta1 = np.exp(s2) + + diag = [(1 + phi**2) / denom] * n + diag[0] = diag[-1] = 1 / denom + off_diag = [-phi / denom] * (n - 1) + + # print(diag) + # print(off_diag) + + Q = sp.diags([diag, off_diag, off_diag], [0, -1, 1]) + Cov = np.linalg.inv(Q.toarray()) + + # print(Q.toarray()) + # print(np.linalg.inv(Q.toarray())) + # print(Q.toarray() @ np.linalg.inv(Q.toarray())) + + ############################################################## + + mv = multivariate_normal(mean=np.zeros(n), cov=Cov, seed=3) + + intercept = 2 + eta = mv.rvs() + intercept + + print(eta) + + E = [1] * n + y = poisson.rvs(E * np.exp(eta), random_state=3) + + print(y) diff --git a/src/dalia/configs/submodels_config.py b/src/dalia/configs/submodels_config.py index f31a9914..d53e0efe 100644 --- a/src/dalia/configs/submodels_config.py +++ b/src/dalia/configs/submodels_config.py @@ -39,6 +39,19 @@ def read_hyperparameters(self): return xp.array([]), [] +class AR1SubModelConfig(SubModelConfig): + + ## prior (in log-scale or not?) on k (marginal precision) or s2 marginal variance + s2: float = None # Marginal variance + ph_s2: PriorHyperparametersConfig = None + + ## prior on phi + phi: float = None # AR(1) coefficient + ph_phi: PriorHyperparametersConfig = None + ## check that phi is between -1 and 1 (use pc prior) + # check inla.doc("pc.cor1") + + class SpatioTemporalSubModelConfig(SubModelConfig): spatial_domain_dimension: PositiveInt = 2 diff --git a/src/dalia/submodels/ar1.py b/src/dalia/submodels/ar1.py new file mode 100644 index 00000000..c0058682 --- /dev/null +++ b/src/dalia/submodels/ar1.py @@ -0,0 +1,67 @@ +# Copyright 2024-2025 DALIA authors. All rights reserved. +from tabulate import tabulate + +from dalia import sp, xp +from dalia.configs.submodels_config import AR1SubModelConfig +from dalia.core.submodel import SubModel +from dalia.utils import add_str_header + + +class AR1SubModel(SubModel): + """Fit an AR(1) model.""" + + def __init__( + self, + config: AR1SubModelConfig, + ) -> None: + """Initializes the model.""" + super().__init__(config) + + self.n_latent_parameters: int = config.n_latent_parameters + + def construct_Q_prior(self, **kwargs) -> sp.sparse.coo_matrix: + """Construct the prior precision matrix.""" + + s2 = kwargs.get("s2") + phi = kwargs.get("phi") + denom = s2 * (1 - phi**2) + + diag = [(1 + phi**2) / denom] * self.n_latent_parameters + diag[0] = diag[-1] = 1 / denom + off_diag = [-phi / denom] * (self.n_latent_parameters - 1) + + self.Q_prior = sp.diags([diag, off_diag, off_diag], [0, -1, 1]) + + return self.Q_prior + + def __str__(self) -> str: + """String representation of the submodel.""" + str_representation = "" + + # --- Make the Submodel table --- + values = [ + ["Submodel Type", self.submodel_type], + ["Number of Latent Parameters", self.n_latent_parameters], + ] + submodel_table = tabulate( + values, + tablefmt="fancy_grid", + colalign=("left", "center"), + ) + + # Add the header title + submodel_table = add_str_header( + title=self.submodel_type.replace("_", " ").title(), + table=submodel_table, + ) + str_representation += submodel_table + + return str_representation + + +if __name__ == "__main__": + + n = 5 + + s2 = 1 + phi = xp.sqrt(0.5) From f568917f74235522789b005533c14b4bced59f86 Mon Sep 17 00:00:00 2001 From: Lisa Gaedke Date: Fri, 12 Sep 2025 15:39:11 +0200 Subject: [PATCH 03/26] ar1 model runs through. not sure if it computes the correct the thing or not --- examples/ar1/run.py | 47 ------ examples/g_ar1/generate_data.py | 90 ++++++++++++ examples/g_ar1/inputs_ar1/a.npz | 3 + examples/g_ar1/inputs_ar1/x.npy | 3 + examples/g_ar1/inputs_ar1/x_original.npy | 3 + examples/g_ar1/inputs_regression/a.npz | 3 + .../reference_outputs/theta_original.npy | 3 + .../g_ar1/reference_outputs/x_original.npy | 3 + examples/g_ar1/run.py | 138 ++++++++++++++++++ examples/g_ar1/y.npy | 3 + examples/p_ar1/e.npy | 3 + examples/p_ar1/generate_data.py | 86 +++++++++++ examples/p_ar1/inputs_ar1/a.npz | 3 + examples/p_ar1/inputs_ar1/x.npy | 3 + examples/p_ar1/inputs_ar1/x_original.npy | 3 + examples/p_ar1/inputs_regression/a.npz | 3 + .../reference_outputs/theta_original.npy | 3 + .../p_ar1/reference_outputs/x_original.npy | 3 + examples/p_ar1/run.py | 123 ++++++++++++++++ examples/p_ar1/y.npy | 3 + src/dalia/configs/submodels_config.py | 24 ++- src/dalia/core/model.py | 111 ++++++++++++-- src/dalia/submodels/__init__.py | 9 +- src/dalia/submodels/ar1.py | 36 +++-- 24 files changed, 627 insertions(+), 82 deletions(-) delete mode 100644 examples/ar1/run.py create mode 100644 examples/g_ar1/generate_data.py create mode 100644 examples/g_ar1/inputs_ar1/a.npz create mode 100644 examples/g_ar1/inputs_ar1/x.npy create mode 100644 examples/g_ar1/inputs_ar1/x_original.npy create mode 100644 examples/g_ar1/inputs_regression/a.npz create mode 100644 examples/g_ar1/reference_outputs/theta_original.npy create mode 100644 examples/g_ar1/reference_outputs/x_original.npy create mode 100644 examples/g_ar1/run.py create mode 100644 examples/g_ar1/y.npy create mode 100644 examples/p_ar1/e.npy create mode 100644 examples/p_ar1/generate_data.py create mode 100644 examples/p_ar1/inputs_ar1/a.npz create mode 100644 examples/p_ar1/inputs_ar1/x.npy create mode 100644 examples/p_ar1/inputs_ar1/x_original.npy create mode 100644 examples/p_ar1/inputs_regression/a.npz create mode 100644 examples/p_ar1/reference_outputs/theta_original.npy create mode 100644 examples/p_ar1/reference_outputs/x_original.npy create mode 100644 examples/p_ar1/run.py create mode 100644 examples/p_ar1/y.npy diff --git a/examples/ar1/run.py b/examples/ar1/run.py deleted file mode 100644 index ce8d741a..00000000 --- a/examples/ar1/run.py +++ /dev/null @@ -1,47 +0,0 @@ -import os -import sys - -import numpy as np -import scipy.sparse as sp - -from scipy.stats import multivariate_normal, poisson - -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) - -if __name__ == "__main__": - - n = 5 - - s2 = 1 - phi = np.sqrt(0.5) - denom = s2 * (1 - phi**2) - - theta1 = np.exp(s2) - - diag = [(1 + phi**2) / denom] * n - diag[0] = diag[-1] = 1 / denom - off_diag = [-phi / denom] * (n - 1) - - # print(diag) - # print(off_diag) - - Q = sp.diags([diag, off_diag, off_diag], [0, -1, 1]) - Cov = np.linalg.inv(Q.toarray()) - - # print(Q.toarray()) - # print(np.linalg.inv(Q.toarray())) - # print(Q.toarray() @ np.linalg.inv(Q.toarray())) - - ############################################################## - - mv = multivariate_normal(mean=np.zeros(n), cov=Cov, seed=3) - - intercept = 2 - eta = mv.rvs() + intercept - - print(eta) - - E = [1] * n - y = poisson.rvs(E * np.exp(eta), random_state=3) - - print(y) diff --git a/examples/g_ar1/generate_data.py b/examples/g_ar1/generate_data.py new file mode 100644 index 00000000..3c7390a3 --- /dev/null +++ b/examples/g_ar1/generate_data.py @@ -0,0 +1,90 @@ +import os +import sys + +import numpy as np +import scipy.sparse as sp + +from scipy.stats import multivariate_normal, poisson + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + +if __name__ == "__main__": + + n = 100 + + ## define priors + s2 = 0.05 # 0.7 + tau = 1 / s2 + phi = 0.5 # 0.9 + # noise obs + obs_noise_prec = 100 + theta_original = [ + phi, + tau, + obs_noise_prec, + ] + + denom = s2 * (1 - phi**2) + + diag = [(1 + phi**2) / denom] * n + diag[0] = diag[-1] = 1 / denom + off_diag = [-phi / denom] * (n - 1) + + Q = sp.diags([diag, off_diag, off_diag], [0, -1, 1]) + L = np.linalg.cholesky(Q.toarray()) + Cov = np.linalg.inv(Q.toarray()) + + geom_mean = np.exp(np.mean(np.log(Cov.diagonal()))) + print("Geometric mean of Qinv diagonal: ", geom_mean) + + print(Q.toarray()) + print(np.linalg.inv(Q.toarray())) + print(np.round(Q.toarray() @ np.linalg.inv(Q.toarray()), 6)) + # exit() + + mv = multivariate_normal(mean=np.zeros(n), cov=Cov, seed=3) + + intercept = 2 + u = mv.rvs() + x = np.concatenate((u, [intercept])) + print("x: ", x) + np.save("reference_outputs/x_original.npy", x) + x_initial = u + np.random.normal(0, 0.3, size=len(u)) + np.save("inputs_ar1/x.npy", u) + np.save("reference_outputs/theta_original.npy", theta_original) + + a_ar1 = sp.eye(n) + sp.save_npz("inputs_ar1/a.npz", a_ar1) + + a_regression = sp.csr_matrix(np.ones((n, 1))) + sp.save_npz("inputs_regression/a.npz", a_regression) + + eta = a_ar1 @ u + intercept + + print("eta: ", eta) + np.save("inputs_ar1/x_original.npy", eta) + + noise = np.random.normal(0, np.sqrt(1 / obs_noise_prec), size=eta.shape) + print("noise: ", noise) + y = eta + noise + np.save("y.npy", y) + + print("y: ", y) + + Qprior = sp.block_diag([Q, sp.csr_matrix([[0.001]])]) + # print("Qprior : \n", Qprior.toarray()) + + a = sp.hstack([a_ar1, a_regression]) + Qcond = Qprior + obs_noise_prec * a.T @ a + print("Qcond: \n", Qcond.toarray()) + + b = obs_noise_prec * a.T @ y + # -xp.exp(theta) * (eta - y) + # beta_initial + np.linalg.solve( + # Qconditional.toarray(), information_vector + # ) + x_est = np.linalg.solve(Qcond.toarray(), b) + print("x_est: ", x_est) + + print("eta est : ", a @ x_est) + print("eta : ", a @ x) diff --git a/examples/g_ar1/inputs_ar1/a.npz b/examples/g_ar1/inputs_ar1/a.npz new file mode 100644 index 00000000..d51fab3a --- /dev/null +++ b/examples/g_ar1/inputs_ar1/a.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39c51ba4cc5f784792206abc0d5806f39d9a2360c7542ba9db981f91da7ae75b +size 779 diff --git a/examples/g_ar1/inputs_ar1/x.npy b/examples/g_ar1/inputs_ar1/x.npy new file mode 100644 index 00000000..0b841bb7 --- /dev/null +++ b/examples/g_ar1/inputs_ar1/x.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbc08173f8d41478b3fa1defbb8f50eb977e95641f0ee2389a88e596635f23c8 +size 928 diff --git a/examples/g_ar1/inputs_ar1/x_original.npy b/examples/g_ar1/inputs_ar1/x_original.npy new file mode 100644 index 00000000..4fdb26c6 --- /dev/null +++ b/examples/g_ar1/inputs_ar1/x_original.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01b4ae12954e6c92c282859a2ac998fbaf94b3d8995d2f2c6d46ac2748782e4b +size 928 diff --git a/examples/g_ar1/inputs_regression/a.npz b/examples/g_ar1/inputs_regression/a.npz new file mode 100644 index 00000000..8e5c1dcd --- /dev/null +++ b/examples/g_ar1/inputs_regression/a.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a855bd6a88e63486a3f776cbc51aab0ae1ceff310f19bf5a09e5fb3f0c7507e +size 1136 diff --git a/examples/g_ar1/reference_outputs/theta_original.npy b/examples/g_ar1/reference_outputs/theta_original.npy new file mode 100644 index 00000000..370f0d04 --- /dev/null +++ b/examples/g_ar1/reference_outputs/theta_original.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42befec2428694901b72e036df13b4baeec3ed5680163ff5849ab4c59d8a5202 +size 152 diff --git a/examples/g_ar1/reference_outputs/x_original.npy b/examples/g_ar1/reference_outputs/x_original.npy new file mode 100644 index 00000000..5a432a10 --- /dev/null +++ b/examples/g_ar1/reference_outputs/x_original.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d0cd377b2101c5e0cd01676ffc1c65146570824634cd43c7b60ce2648ecab9e +size 936 diff --git a/examples/g_ar1/run.py b/examples/g_ar1/run.py new file mode 100644 index 00000000..d42155b7 --- /dev/null +++ b/examples/g_ar1/run.py @@ -0,0 +1,138 @@ +import sys +import os + +parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +sys.path.append(parent_dir) + +import numpy as np + +from dalia import xp, sp, backend_flags +from dalia.configs import likelihood_config, dalia_config, submodels_config +from dalia.core.model import Model +from dalia.core.dalia import DALIA +from dalia.submodels import AR1SubModel, RegressionSubModel +from dalia.utils import get_host, print_msg, scaled_logit # , extract_diagonal +from examples_utils.parser_utils import parse_args + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + +if __name__ == "__main__": + + n = 20 + + # load reference output + theta_original = np.load("reference_outputs/theta_original.npy") + print( + "theta original: ", + theta_original[0], + xp.log(theta_original[1]), + xp.log(theta_original[2]), + ) + + x_original = np.load("reference_outputs/x_original.npy") + print("x original: ", x_original[:10]) + print("dim(x original): ", x_original.shape) + + ar1_dict = { + "type": "ar1", + "input_dir": f"{BASE_DIR}/inputs_ar1", + "n_latent_parameters": n, + "phi": theta_original[0], # has to be between 0 and 1 + "tau": xp.log(theta_original[1]), # assume to already be in log-scale + "ph_phi": {"type": "beta", "alpha": 5.0, "beta": 1.0}, + "ph_tau": {"type": "gaussian", "mean": 0.0, "precision": 0.5}, + } + ar1 = AR1SubModel( + config=submodels_config.parse_config(ar1_dict), + ) + + # Configurations of the regression submodel + regression_dict = { + "type": "regression", + "input_dir": f"{BASE_DIR}/inputs_regression", + "n_fixed_effects": 1, + "fixed_effects_prior_precision": 0.001, + } + regression = RegressionSubModel( + config=submodels_config.parse_config(regression_dict), + ) + + likelihood_dict = { + "type": "gaussian", + "prec_o": xp.log(theta_original[2]), + "prior_hyperparameters": { + "type": "penalized_complexity", + "alpha": 0.01, + "u": 5, + }, + } + + model = Model( + submodels=[ar1, regression], + likelihood_config=likelihood_config.parse_config(likelihood_dict), + ) + print_msg(model) + + Qprior = model.construct_Q_prior() + print("Qprior: \n", Qprior.toarray()) + Qinv = np.linalg.inv(Qprior.toarray()) + geom_mean = np.exp(np.mean(np.log(Qinv.diagonal()))) + print("Geometric mean of Qinv diagonal: ", geom_mean) + + eta = model.a @ x_original + Qcond = model.construct_Q_conditional(eta=eta) + + b = model.construct_information_vector(eta=eta, x_i=x_original) + + x_est = np.linalg.solve(Qcond.toarray(), b) + print("x est: ", x_est) + + # L = np.linalg.cholesky(Qcond.toarray()) + + # plt.spy(Qcond, markersize=2) + # plt.title("Sparsity pattern of Qcond") + # plt.show() + + # Configurations of DALIA + dalia_dict = { + "solver": {"type": "dense"}, + "minimize": { + "max_iter": 100, + "gtol": 1e-3, + "disp": True, + "maxcor": len(model.theta), + }, + "f_reduction_tol": 1e-3, + "theta_reduction_tol": 1e-4, + "inner_iteration_max_iter": 50, + "eps_inner_iteration": 1e-3, + "eps_gradient_f": 1e-3, + "simulation_dir": ".", + } + + dalia = DALIA( + model=model, + config=dalia_config.parse_config(dalia_dict), + ) + + print("theta: ", model.theta) + # print("x : ", model.x) + f_value = dalia._evaluate_f(model.theta) + print("after evaluate f. x: ", model.x) + + results = dalia.minimize() + + theta_unscaled = results["theta"] + theta = theta_unscaled.copy() + theta[0] = scaled_logit(theta_unscaled[0], direction="backward") + theta[1] = np.exp(theta_unscaled[1]) + theta[2] = np.exp(theta_unscaled[2]) + + print("theta: ", theta) + print("theta original: ", theta_original) + + print("x: ", results["x"]) + print("x_original: ", x_original) + + print("eta: ", model.a @ x_original) + print("eta est: ", model.a @ results["x"]) diff --git a/examples/g_ar1/y.npy b/examples/g_ar1/y.npy new file mode 100644 index 00000000..da273e71 --- /dev/null +++ b/examples/g_ar1/y.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f5feb5a0d80627d13631584ab50f48e4930a93c6549859dc920ed204af4de7d +size 928 diff --git a/examples/p_ar1/e.npy b/examples/p_ar1/e.npy new file mode 100644 index 00000000..526e4ebe --- /dev/null +++ b/examples/p_ar1/e.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaf01004764bc66325db832a330ff0233dfa9ea1dc7f216af544c057cc889d8c +size 168 diff --git a/examples/p_ar1/generate_data.py b/examples/p_ar1/generate_data.py new file mode 100644 index 00000000..b6843ca4 --- /dev/null +++ b/examples/p_ar1/generate_data.py @@ -0,0 +1,86 @@ +import os +import sys + +import numpy as np +import scipy.sparse as sp + +from scipy.stats import multivariate_normal, poisson + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + +if __name__ == "__main__": + + n = 5 + + ## define priors + s2 = 0.2 # 0.7 + tau = 1 / s2 + phi = 0.5 # 0.9 + theta_original = [phi, tau] + + denom = s2 * (1 - phi**2) + + diag = [(1 + phi**2) / denom] * n + diag[0] = diag[-1] = 1 / denom + off_diag = [-phi / denom] * (n - 1) + + Q = sp.diags([diag, off_diag, off_diag], [0, -1, 1]) + L = np.linalg.cholesky(Q.toarray()) + Cov = np.linalg.inv(Q.toarray()) + + geom_mean = np.exp(np.mean(np.log(Cov.diagonal()))) + print("Geometric mean of Qinv diagonal: ", geom_mean) + + print(Q.toarray()) + print(np.linalg.inv(Q.toarray())) + print(np.round(Q.toarray() @ np.linalg.inv(Q.toarray()), 6)) + # exit() + + mv = multivariate_normal(mean=np.zeros(n), cov=Cov, seed=3) + + intercept = 2 + u = mv.rvs() + print("u: ", u) + eta = u + intercept + x = np.concatenate((u, [intercept])) + np.save("reference_outputs/x_original.npy", x) + x_initial = u + np.random.normal(0, 0.3, size=len(u)) + np.save("inputs_ar1/x.npy", u) + np.save("reference_outputs/theta_original.npy", theta_original) + + a_ar1 = sp.eye(n) + sp.save_npz("inputs_ar1/a.npz", a_ar1) + + a_regression = sp.csr_matrix(np.ones((n, 1))) + sp.save_npz("inputs_regression/a.npz", a_regression) + + print(eta) + np.save("inputs_ar1/x_original.npy", eta) + + # sample with repitition + E = np.random.choice([1, 2, 3], size=n, replace=True) + # E = [1] * n + np.save("e.npy", E) + + y = poisson.rvs(E * np.exp(eta), random_state=3) + np.save("y.npy", y) + + print(y) + + Qprior = sp.block_diag([Q, sp.csr_matrix([[0.001]])]) + # print("Qprior : \n", Qprior.toarray()) + + # a = sp.hstack([a_ar1, a_regression]) + # Qcond = Qprior + obs_noise_prec * a.T @ a + # print("Qcond: \n", Qcond.toarray()) + + # b = obs_noise_prec * a.T @ y + # # -xp.exp(theta) * (eta - y) + # # beta_initial + np.linalg.solve( + # # Qconditional.toarray(), information_vector + # # ) + # x_est = np.linalg.solve(Qcond.toarray(), b) + # print("x_est: ", x_est) + + # print("eta est : ", a @ x_est) + # print("eta : ", a @ x) diff --git a/examples/p_ar1/inputs_ar1/a.npz b/examples/p_ar1/inputs_ar1/a.npz new file mode 100644 index 00000000..14bf96bf --- /dev/null +++ b/examples/p_ar1/inputs_ar1/a.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60e0eb3bd1b1be8837017f0fddc8cd0a474d7862976b62a74e9cb850fbf03a5b +size 772 diff --git a/examples/p_ar1/inputs_ar1/x.npy b/examples/p_ar1/inputs_ar1/x.npy new file mode 100644 index 00000000..7274143a --- /dev/null +++ b/examples/p_ar1/inputs_ar1/x.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09a48c802155575049a4cf806aea44b97a6d21f982776418230418beb726c4e8 +size 168 diff --git a/examples/p_ar1/inputs_ar1/x_original.npy b/examples/p_ar1/inputs_ar1/x_original.npy new file mode 100644 index 00000000..bceb82b4 --- /dev/null +++ b/examples/p_ar1/inputs_ar1/x_original.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:649ab0d33fd12fbba48dc117928c5c97e8c49fecd1dfaa09f8dbe9ca59cc4cb0 +size 168 diff --git a/examples/p_ar1/inputs_regression/a.npz b/examples/p_ar1/inputs_regression/a.npz new file mode 100644 index 00000000..8160121f --- /dev/null +++ b/examples/p_ar1/inputs_regression/a.npz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08027fe4ecd1fb5d5169f41a9f2a1a602be1cf15477878e2a89d2405261d64ae +size 970 diff --git a/examples/p_ar1/reference_outputs/theta_original.npy b/examples/p_ar1/reference_outputs/theta_original.npy new file mode 100644 index 00000000..e65c3799 --- /dev/null +++ b/examples/p_ar1/reference_outputs/theta_original.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed23a90e2c583ecade6212f036ee6ffa5ab5a323a0c5aa95bc6bc74b2b070052 +size 144 diff --git a/examples/p_ar1/reference_outputs/x_original.npy b/examples/p_ar1/reference_outputs/x_original.npy new file mode 100644 index 00000000..3c5c2d03 --- /dev/null +++ b/examples/p_ar1/reference_outputs/x_original.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65c40fc6927dca0a458afc275b880b8973d86897b9cb41330e893eb90c3d6642 +size 176 diff --git a/examples/p_ar1/run.py b/examples/p_ar1/run.py new file mode 100644 index 00000000..4adc3510 --- /dev/null +++ b/examples/p_ar1/run.py @@ -0,0 +1,123 @@ +import sys +import os + +parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +sys.path.append(parent_dir) + +import numpy as np + +from dalia import xp, sp, backend_flags +from dalia.configs import likelihood_config, dalia_config, submodels_config +from dalia.core.model import Model +from dalia.core.dalia import DALIA +from dalia.submodels import AR1SubModel, RegressionSubModel +from dalia.utils import get_host, print_msg, scaled_logit # , extract_diagonal +from examples_utils.parser_utils import parse_args + + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + +if __name__ == "__main__": + + n = 5 + + # load reference output + theta_original = np.load("reference_outputs/theta_original.npy") + print("theta original: ", theta_original) + + x_original = np.load("reference_outputs/x_original.npy") + print("x original: ", x_original[:10]) + print("dim(x original): ", x_original.shape) + + ar1_dict = { + "type": "ar1", + "input_dir": f"{BASE_DIR}/inputs_ar1", + "n_latent_parameters": n, + "phi": theta_original[0], # has to be between 0 and 1 + "tau": xp.log(theta_original[1]), # assume to already be in log-scale + "ph_phi": {"type": "beta", "alpha": 5.0, "beta": 1.0}, + "ph_tau": {"type": "gaussian", "mean": 0.0, "precision": 0.5}, + } + ar1 = AR1SubModel( + config=submodels_config.parse_config(ar1_dict), + ) + + # Configurations of the regression submodel + regression_dict = { + "type": "regression", + "input_dir": f"{BASE_DIR}/inputs_regression", + "n_fixed_effects": 1, + "fixed_effects_prior_precision": 0.001, + } + regression = RegressionSubModel( + config=submodels_config.parse_config(regression_dict), + ) + + likelihood_dict = { + "type": "poisson", + "input_dir": f"{BASE_DIR}", + } + + model = Model( + submodels=[ar1, regression], + likelihood_config=likelihood_config.parse_config(likelihood_dict), + ) + print_msg(model) + + Qprior = model.construct_Q_prior() + print("Qprior: \n", Qprior.toarray()) + Qinv = np.linalg.inv(Qprior.toarray()) + geom_mean = np.exp(np.mean(np.log(Qinv.diagonal()))) + print("Geometric mean of Qinv diagonal: ", geom_mean) + + eta = model.a @ model.x + Qcond = model.construct_Q_conditional(eta=eta) + + # L = np.linalg.cholesky(Qcond.toarray()) + + # plt.spy(Qcond, markersize=2) + # plt.title("Sparsity pattern of Qcond") + # plt.show() + + # Configurations of DALIA + dalia_dict = { + "solver": {"type": "dense"}, + "minimize": { + "max_iter": 100, + "gtol": 1e-3, + "disp": True, + "maxcor": len(model.theta), + }, + "f_reduction_tol": 1e-3, + "theta_reduction_tol": 1e-4, + "inner_iteration_max_iter": 50, + "eps_inner_iteration": 1e-3, + "eps_gradient_f": 1e-3, + "simulation_dir": ".", + } + + dalia = DALIA( + model=model, + config=dalia_config.parse_config(dalia_dict), + ) + + print("theta: ", model.theta) + # print("x : ", model.x) + f_value = dalia._evaluate_f(model.theta) + print("after evaluate f. x: ", model.x) + + results = dalia.minimize() + + theta_unscaled = results["theta"] + theta = theta_unscaled.copy() + theta[0] = scaled_logit(theta_unscaled[0], direction="backward") + theta[1] = np.exp(theta_unscaled[1]) + + print("theta: ", theta) + print("theta original: ", theta_original) + + print("x: ", results["x"]) + print("x_original: ", x_original) + + print("eta: ", model.a @ x_original) + print("eta est: ", model.a @ results["x"]) diff --git a/examples/p_ar1/y.npy b/examples/p_ar1/y.npy new file mode 100644 index 00000000..9d269417 --- /dev/null +++ b/examples/p_ar1/y.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0c5615ed622778e4fb05eb3adf9d8584a1601562ccae1f90700ea9c7c05c9d5 +size 168 diff --git a/src/dalia/configs/submodels_config.py b/src/dalia/configs/submodels_config.py index d53e0efe..226e11c5 100644 --- a/src/dalia/configs/submodels_config.py +++ b/src/dalia/configs/submodels_config.py @@ -24,7 +24,7 @@ class SubModelConfig(BaseModel, ABC): # Input folder for this specific submodel input_dir: str = None - type: Literal["spatio_temporal", "spatial", "regression", "brainiac"] = None + type: Literal["spatio_temporal", "spatial", "regression", "brainiac", "ar1"] = None @abstractmethod def read_hyperparameters(self) -> tuple[ArrayLike, list]: @@ -41,16 +41,28 @@ def read_hyperparameters(self): class AR1SubModelConfig(SubModelConfig): - ## prior (in log-scale or not?) on k (marginal precision) or s2 marginal variance - s2: float = None # Marginal variance - ph_s2: PriorHyperparametersConfig = None + n_latent_parameters: PositiveInt = None ## prior on phi phi: float = None # AR(1) coefficient + phi_scaled: float = None ph_phi: PriorHyperparametersConfig = None ## check that phi is between -1 and 1 (use pc prior) # check inla.doc("pc.cor1") + ## prior (in log-scale or not?) on tau (marginal precision) or s2 marginal variance + tau: float = None # Marginal variance + ph_tau: PriorHyperparametersConfig = None + + def read_hyperparameters(self): + + # input of phi is in (0,1), rescale to -/+ INF + self.phi_scaled = scaled_logit(self.phi, direction="forward") + theta = xp.array([self.phi_scaled, self.tau]) + theta_keys = ["phi", "tau"] + + return theta, theta_keys + class SpatioTemporalSubModelConfig(SubModelConfig): spatial_domain_dimension: PositiveInt = 2 @@ -135,6 +147,10 @@ def parse_config(config: dict | str) -> SubModelConfig: config["ph_h2"] = parse_priorhyperparameters_config(config["ph_h2"]) config["ph_alpha"] = parse_priorhyperparameters_config(config["ph_alpha"]) return BrainiacSubModelConfig(**config) + elif type == "ar1": + config["ph_tau"] = parse_priorhyperparameters_config(config["ph_tau"]) + config["ph_phi"] = parse_priorhyperparameters_config(config["ph_phi"]) + return AR1SubModelConfig(**config) # Add more elif branches for other submodel types else: raise ValueError(f"Unknown submodel type: {type}") diff --git a/src/dalia/core/model.py b/src/dalia/core/model.py index 91608d62..91e3882d 100644 --- a/src/dalia/core/model.py +++ b/src/dalia/core/model.py @@ -30,6 +30,7 @@ RegressionSubModel, SpatialSubModel, SpatioTemporalSubModel, + AR1SubModel, ) from dalia.utils import add_str_header, boxify, scaled_logit @@ -154,6 +155,36 @@ def __init__( elif isinstance(submodel, RegressionSubModel): self.n_fixed_effects += submodel.n_fixed_effects + elif isinstance(submodel, AR1SubModel): + + if isinstance(submodel.config.ph_phi, BetaPriorHyperparametersConfig): + self.prior_hyperparameters.append( + BetaPriorHyperparameters( + config=submodel.config.ph_phi, + ) + ) + elif isinstance( + submodel.config.ph_phi, + PenalizedComplexityPriorHyperparametersConfig, + ): + self.prior_hyperparameters.append( + PenalizedComplexityPriorHyperparameters( + config=submodel.config.ph_phi, + hyperparameter_type="phi", + ) + ) + + if isinstance( + submodel.config.ph_tau, GaussianPriorHyperparametersConfig + ): + self.prior_hyperparameters.append( + GaussianPriorHyperparameters( + config=submodel.config.ph_tau, + ) + ) + else: + raise ValueError("Unknown prior hyperparameter type for ph_tau") + elif isinstance(submodel, BrainiacSubModel): # h2 hyperparameters if isinstance(submodel.config.ph_h2, BetaPriorHyperparametersConfig): @@ -334,6 +365,7 @@ def construct_Q_prior(self) -> sp.sparse.spmatrix: cols = [] data = [] + ## TODO: improve the if / elif statements for i, submodel in enumerate(self.submodels): if isinstance(submodel, SpatioTemporalSubModel): for hp_idx in range( @@ -350,6 +382,11 @@ def construct_Q_prior(self) -> sp.sparse.spmatrix: self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + elif isinstance(submodel, AR1SubModel): + for hp_idx in range( + self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] + ): + kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) elif isinstance(submodel, RegressionSubModel): ... @@ -374,6 +411,13 @@ def construct_Q_prior(self) -> sp.sparse.spmatrix: shape=(self.n_latent_parameters, self.n_latent_parameters), ) + # print("in Qprior is none.") + # print("submodel.data: ", submodel_Q_prior.data) + # print("row indices: ", self.Q_prior.indices) + # print("col indptr: ", self.Q_prior.indptr) + # print("self.Q_prior.data: ", self.Q_prior.data) + # print("data mapping: ", self.Q_prior_data_mapping) + else: for i, submodel in enumerate(self.submodels): if isinstance(submodel, RegressionSubModel): @@ -393,13 +437,39 @@ def construct_Q_prior(self) -> sp.sparse.spmatrix: self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + elif isinstance(submodel, AR1SubModel): + for hp_idx in range( + self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] + ): + kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) submodel_Q_prior = submodel.construct_Q_prior(**kwargs) + # print( + # "In model.py. submodel_Q_prior[:6, :6] : \n", + # submodel_Q_prior.toarray()[:6, :6], + # ) + + # print( + # "self.Q_prior_data_mapping[i] : self.Q_prior_data_mapping[i + 1]: ", + # self.Q_prior_data_mapping[i], + # ":", + # self.Q_prior_data_mapping[i + 1], + # ) + + # print("submodel.data: ", submodel_Q_prior.data) + self.Q_prior.data[ self.Q_prior_data_mapping[i] : self.Q_prior_data_mapping[i + 1] ] = submodel_Q_prior.data + # print("row indices: ", self.Q_prior.indices) + # print("col indptr: ", self.Q_prior.indptr) + # print("self.Q_prior.data: ", self.Q_prior.data) + # print("data mapping: ", self.Q_prior_data_mapping) + + # print("In model.py. Q_prior[:6, :6] : \n", self.Q_prior.toarray()[:6, :6]) + return self.Q_prior def construct_Q_conditional( @@ -487,23 +557,34 @@ def evaluate_log_prior_hyperparameters(self) -> float: """Evaluate the log prior hyperparameters.""" log_prior = 0.0 - # if BFGS and model scale differ: rescale -- generalize - if isinstance(self.submodels[0], BrainiacSubModel): - # - theta_interpret = self.theta.copy() - theta_interpret[0] = scaled_logit(self.theta[0], direction="backward") - log_prior += self.prior_hyperparameters[0].evaluate_log_prior( - theta_interpret[0] - ) + theta_interpret = self.theta - log_prior += self.prior_hyperparameters[1].evaluate_log_prior( - theta_interpret[1:] - ) - else: - theta_interpret = self.theta + for i, prior_hyperparameter in enumerate(self.prior_hyperparameters): + + if isinstance(prior_hyperparameter, BetaPriorHyperparameters): + theta_interpret[i] = scaled_logit( + theta_interpret[i], direction="backward" + ) - for i, prior_hyperparameter in enumerate(self.prior_hyperparameters): - log_prior += prior_hyperparameter.evaluate_log_prior(theta_interpret[i]) + log_prior += prior_hyperparameter.evaluate_log_prior(theta_interpret[i]) + + # if BFGS and model scale differ: rescale -- generalize + # if isinstance(self.submodels[0], BrainiacSubModel): + # # + # theta_interpret = self.theta.copy() + # theta_interpret[0] = scaled_logit(self.theta[0], direction="backward") + # log_prior += self.prior_hyperparameters[0].evaluate_log_prior( + # theta_interpret[0] + # ) + + # log_prior += self.prior_hyperparameters[1].evaluate_log_prior( + # theta_interpret[1:] + # ) + # else: + # theta_interpret = self.theta + + # for i, prior_hyperparameter in enumerate(self.prior_hyperparameters): + # log_prior += prior_hyperparameter.evaluate_log_prior(theta_interpret[i]) return log_prior diff --git a/src/dalia/submodels/__init__.py b/src/dalia/submodels/__init__.py index 2e2b78fb..c19f8b81 100644 --- a/src/dalia/submodels/__init__.py +++ b/src/dalia/submodels/__init__.py @@ -4,5 +4,12 @@ from dalia.submodels.spatial import SpatialSubModel from dalia.submodels.spatio_temporal import SpatioTemporalSubModel from dalia.submodels.brainiac import BrainiacSubModel +from dalia.submodels.ar1 import AR1SubModel -__all__ = ["RegressionSubModel", "SpatialSubModel", "SpatioTemporalSubModel", "BrainiacSubModel"] +__all__ = [ + "RegressionSubModel", + "SpatialSubModel", + "SpatioTemporalSubModel", + "BrainiacSubModel", + "AR1SubModel", +] diff --git a/src/dalia/submodels/ar1.py b/src/dalia/submodels/ar1.py index c0058682..c221cc5e 100644 --- a/src/dalia/submodels/ar1.py +++ b/src/dalia/submodels/ar1.py @@ -1,10 +1,12 @@ # Copyright 2024-2025 DALIA authors. All rights reserved. from tabulate import tabulate +import numpy as np + from dalia import sp, xp from dalia.configs.submodels_config import AR1SubModelConfig from dalia.core.submodel import SubModel -from dalia.utils import add_str_header +from dalia.utils import add_str_header, scaled_logit class AR1SubModel(SubModel): @@ -17,22 +19,32 @@ def __init__( """Initializes the model.""" super().__init__(config) - self.n_latent_parameters: int = config.n_latent_parameters - def construct_Q_prior(self, **kwargs) -> sp.sparse.coo_matrix: """Construct the prior precision matrix.""" - s2 = kwargs.get("s2") - phi = kwargs.get("phi") + tau = kwargs.get("tau") + exp_tau = xp.exp(tau) + phi_scaled = kwargs.get("phi") + # print("tau:", exp_tau) + # print("phi_scaled:", phi_scaled) + phi = scaled_logit(phi_scaled, direction="backward") + # print("phi:", phi) + s2 = 1 / exp_tau denom = s2 * (1 - phi**2) diag = [(1 + phi**2) / denom] * self.n_latent_parameters diag[0] = diag[-1] = 1 / denom off_diag = [-phi / denom] * (self.n_latent_parameters - 1) - self.Q_prior = sp.diags([diag, off_diag, off_diag], [0, -1, 1]) + ## TODO: how to do this more efficiently? + Q_prior = sp.sparse.diags([off_diag, diag, off_diag], [-1, 0, 1]) + Q_prior = Q_prior.tocsr() + Q_prior.sort_indices() + # print("Q_prior.data: ", Q_prior.data) - return self.Q_prior + # print("Qprior: \n", Q_prior.toarray()) + + return Q_prior.tocoo() def __str__(self) -> str: """String representation of the submodel.""" @@ -42,6 +54,8 @@ def __str__(self) -> str: values = [ ["Submodel Type", self.submodel_type], ["Number of Latent Parameters", self.n_latent_parameters], + ["Phi", f"{self.config.phi:.3f}"], + ["tau", f"{self.config.tau:.3f}"], ] submodel_table = tabulate( values, @@ -57,11 +71,3 @@ def __str__(self) -> str: str_representation += submodel_table return str_representation - - -if __name__ == "__main__": - - n = 5 - - s2 = 1 - phi = xp.sqrt(0.5) From 9d75a6ef4679883efb1c6929940607c2305ad15d Mon Sep 17 00:00:00 2001 From: Lisa Gaedke Date: Fri, 12 Sep 2025 16:29:33 +0200 Subject: [PATCH 04/26] no print in broadcasting function --- src/dalia/utils/multiprocessing.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/dalia/utils/multiprocessing.py b/src/dalia/utils/multiprocessing.py index f46a2637..08cbb47c 100644 --- a/src/dalia/utils/multiprocessing.py +++ b/src/dalia/utils/multiprocessing.py @@ -135,7 +135,7 @@ def bcast( The communication group. Default is MPI.COMM_WORLD. """ - print("Broadcasting data from root:", root, "to all processes. data :", data) + # print("Broadcasting data from root:", root, "to all processes.") if backend_flags["mpi_avail"]: comm.Bcast(data, root=root) @@ -249,13 +249,12 @@ def check_vector_consistency( else: norm_diff = np.linalg.norm(theta - theta_ref) - if norm_diff > 1e-10: raise ValueError( f"Process {comm.Get_rank()} has a different theta than the reference process." f" Expected: {theta_ref}, but got: {theta}. diff = {norm_diff:.4e}" ) - + if __name__ == "__main__": From 0cf7d775616d925b434f55e4f13a52a5b3bdd2d8 Mon Sep 17 00:00:00 2001 From: Lisa Gaedke Date: Fri, 12 Sep 2025 16:53:51 +0200 Subject: [PATCH 05/26] removed extra print statements --- src/dalia/utils/multiprocessing.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/src/dalia/utils/multiprocessing.py b/src/dalia/utils/multiprocessing.py index 08cbb47c..b8b0b3c1 100644 --- a/src/dalia/utils/multiprocessing.py +++ b/src/dalia/utils/multiprocessing.py @@ -135,8 +135,6 @@ def bcast( The communication group. Default is MPI.COMM_WORLD. """ - # print("Broadcasting data from root:", root, "to all processes.") - if backend_flags["mpi_avail"]: comm.Bcast(data, root=root) @@ -233,16 +231,6 @@ def check_vector_consistency( theta_ref = theta.copy() bcast(theta_ref, root=0, comm=comm) - # theta_host = get_host(theta) - # theta_ref_host = get_host(theta_ref) - - # if not np.array_equal(theta_host, theta_ref_host): - # norm_diff = np.linalg.norm(theta_host - theta_ref_host) - # raise ValueError( - # f"Process {comm.Get_rank()} has a different theta than the reference process." - # f" Expected: {theta_ref_host}, but got: {theta_host}. diff = {norm_diff:.4e}" - # ) - array_module_name = get_array_module_name(theta) if array_module_name == "cupy": norm_diff = cp.linalg.norm(theta - theta_ref) @@ -254,18 +242,3 @@ def check_vector_consistency( f"Process {comm.Get_rank()} has a different theta than the reference process." f" Expected: {theta_ref}, but got: {theta}. diff = {norm_diff:.4e}" ) - - -if __name__ == "__main__": - - # Initialize MPI - comm = MPI.COMM_WORLD - rank = comm.Get_rank() - mpi_size = comm.Get_size() - - # Create a vector, intentionally make rank 1 different - theta = np.ones(5) - if backend_flags["mpi_avail"] and rank == 1: - theta[0] = 42 # Make it inconsistent on rank 1 - - check_vector_consistency(theta, comm) From 6a1a3faaee7455f9b70a8bb125143a2373e8459b Mon Sep 17 00:00:00 2001 From: lisa-gm Date: Sun, 21 Sep 2025 10:21:03 +0300 Subject: [PATCH 06/26] added print gs small example --- examples/gs_small/run.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/gs_small/run.py b/examples/gs_small/run.py index e94f20d1..36edd2ef 100644 --- a/examples/gs_small/run.py +++ b/examples/gs_small/run.py @@ -58,6 +58,8 @@ likelihood_config=likelihood_config.parse_config(likelihood_dict), ) + print_msg(model) + # Configurations of DALIA dalia_dict = { "solver": {"type": "dense"}, From 4ab26f4ca9ffd59955a7b5743f3dcf4ecbdf35ea Mon Sep 17 00:00:00 2001 From: lisa-gm Date: Sun, 21 Sep 2025 18:55:46 +0300 Subject: [PATCH 07/26] gaussian ar1 seems to be working. still needs a bit of clean up --- examples/g_ar1/generate_data.py | 83 +++++++++++++++++++-------------- examples/g_ar1/run.py | 68 ++++++++++++++++----------- src/dalia/core/dalia.py | 7 ++- src/dalia/submodels/ar1.py | 4 +- 4 files changed, 95 insertions(+), 67 deletions(-) diff --git a/examples/g_ar1/generate_data.py b/examples/g_ar1/generate_data.py index 3c7390a3..6e3e5f75 100644 --- a/examples/g_ar1/generate_data.py +++ b/examples/g_ar1/generate_data.py @@ -3,19 +3,21 @@ import numpy as np import scipy.sparse as sp - -from scipy.stats import multivariate_normal, poisson +from scipy.sparse.linalg import spsolve, spsolve_triangular +from scipy.sparse import csc_matrix +from scipy.linalg import cholesky BASE_DIR = os.path.dirname(os.path.abspath(__file__)) if __name__ == "__main__": - n = 100 + np.random.seed(5) + n = 5000 ## define priors - s2 = 0.05 # 0.7 + s2 = 3 tau = 1 / s2 - phi = 0.5 # 0.9 + phi = 0.9 # noise obs obs_noise_prec = 100 theta_original = [ @@ -31,25 +33,37 @@ off_diag = [-phi / denom] * (n - 1) Q = sp.diags([diag, off_diag, off_diag], [0, -1, 1]) - L = np.linalg.cholesky(Q.toarray()) - Cov = np.linalg.inv(Q.toarray()) - - geom_mean = np.exp(np.mean(np.log(Cov.diagonal()))) - print("Geometric mean of Qinv diagonal: ", geom_mean) - - print(Q.toarray()) - print(np.linalg.inv(Q.toarray())) - print(np.round(Q.toarray() @ np.linalg.inv(Q.toarray()), 6)) - # exit() - - mv = multivariate_normal(mean=np.zeros(n), cov=Cov, seed=3) - + + # Compute sparse Cholesky factorization: Q = L @ L.T + # For tridiagonal matrix, we can use dense Cholesky on small blocks or scipy + Q_csc = Q.tocsc() + + print("Q shape:", Q.shape, "Q nnz:", Q.nnz) + print("Q sparsity:", 100 * Q.nnz / (Q.shape[0] * Q.shape[1]), "%") + print(Q.toarray()[:6, :6]) + + # Method 1: Use dense Cholesky (for moderate sizes this is still efficient) + Q_dense = Q.toarray() + L_dense = cholesky(Q_dense, lower=True) + L = csc_matrix(L_dense) + + print("L nnz:", L.nnz, "L sparsity:", 100 * L.nnz / (L.shape[0] * L.shape[1]), "%") + + # Efficient sampling: generate z ~ N(0,I), then solve L @ u = z + z = np.random.normal(0, 1, size=n) + + # Solve L @ u = z using sparse triangular solver + u = spsolve_triangular(L, z, lower=True) + + # Verify the sampling worked correctly + print("Sample u statistics - mean:", np.mean(u), "std:", np.std(u), ". Should be around sqrt(s2) =", np.sqrt(s2)) + intercept = 2 - u = mv.rvs() + x = np.concatenate((u, [intercept])) - print("x: ", x) + print("x: ", x[:10]) + np.save("reference_outputs/x_original.npy", x) - x_initial = u + np.random.normal(0, 0.3, size=len(u)) np.save("inputs_ar1/x.npy", u) np.save("reference_outputs/theta_original.npy", theta_original) @@ -61,30 +75,27 @@ eta = a_ar1 @ u + intercept - print("eta: ", eta) + print("eta: ", eta[:6]) np.save("inputs_ar1/x_original.npy", eta) noise = np.random.normal(0, np.sqrt(1 / obs_noise_prec), size=eta.shape) - print("noise: ", noise) + print("noise: ", noise[:10]) y = eta + noise np.save("y.npy", y) - print("y: ", y) + print("y: ", y[:10]) Qprior = sp.block_diag([Q, sp.csr_matrix([[0.001]])]) - # print("Qprior : \n", Qprior.toarray()) - a = sp.hstack([a_ar1, a_regression]) + a = sp.hstack([a_ar1, a_regression]) # a_ar1 # Qcond = Qprior + obs_noise_prec * a.T @ a - print("Qcond: \n", Qcond.toarray()) + print("Qcond: \n", Qcond.toarray()[:6,:6]) b = obs_noise_prec * a.T @ y - # -xp.exp(theta) * (eta - y) - # beta_initial + np.linalg.solve( - # Qconditional.toarray(), information_vector - # ) - x_est = np.linalg.solve(Qcond.toarray(), b) - print("x_est: ", x_est) - - print("eta est : ", a @ x_est) - print("eta : ", a @ x) + print("b: ", b[:10]) + #x_est = np.linalg.solve(Qcond.toarray(), b) + x_est = spsolve(csc_matrix(Qcond), b) + print("norm(x - x_est): ", np.linalg.norm(x - x_est)) + + print("norm(eta - eta_est): ", np.linalg.norm(a @ x - a @ x_est)) + print("normalized norm(eta - eta_est): ", np.linalg.norm(a @ x - a @ x_est) / np.linalg.norm(a @ x)) \ No newline at end of file diff --git a/examples/g_ar1/run.py b/examples/g_ar1/run.py index d42155b7..cfaeaa43 100644 --- a/examples/g_ar1/run.py +++ b/examples/g_ar1/run.py @@ -18,7 +18,8 @@ if __name__ == "__main__": - n = 20 + np.random.seed(3) + n = 1000 # load reference output theta_original = np.load("reference_outputs/theta_original.npy") @@ -29,6 +30,9 @@ xp.log(theta_original[2]), ) + theta_initial = [0.5, 3.0, 3.0] + print("theta initial: ", theta_initial) + x_original = np.load("reference_outputs/x_original.npy") print("x original: ", x_original[:10]) print("dim(x original): ", x_original.shape) @@ -37,8 +41,8 @@ "type": "ar1", "input_dir": f"{BASE_DIR}/inputs_ar1", "n_latent_parameters": n, - "phi": theta_original[0], # has to be between 0 and 1 - "tau": xp.log(theta_original[1]), # assume to already be in log-scale + "phi": theta_initial[0], # has to be between 0 and 1 + "tau": xp.log(theta_initial[1]), # assume to already be in log-scale "ph_phi": {"type": "beta", "alpha": 5.0, "beta": 1.0}, "ph_tau": {"type": "gaussian", "mean": 0.0, "precision": 0.5}, } @@ -59,34 +63,39 @@ likelihood_dict = { "type": "gaussian", - "prec_o": xp.log(theta_original[2]), - "prior_hyperparameters": { - "type": "penalized_complexity", - "alpha": 0.01, - "u": 5, - }, + "prec_o": xp.log(theta_initial[2]), + # "prior_hyperparameters": { + # "type": "penalized_complexity", + # "alpha": 0.01, + # "u": 5, + # }, + "prior_hyperparameters": {"type": "gaussian", "mean": 3.0, "precision": 0.05}, } model = Model( - submodels=[ar1, regression], + submodels=[ar1, regression], # likelihood_config=likelihood_config.parse_config(likelihood_dict), ) print_msg(model) Qprior = model.construct_Q_prior() - print("Qprior: \n", Qprior.toarray()) + print("Qprior: \n", Qprior.toarray()[:6, :6]) Qinv = np.linalg.inv(Qprior.toarray()) geom_mean = np.exp(np.mean(np.log(Qinv.diagonal()))) print("Geometric mean of Qinv diagonal: ", geom_mean) - eta = model.a @ x_original + # in gaussian case x = 0, thus eta = 0 + x_i = np.zeros(model.n_latent_parameters) + eta = model.a @ x_i Qcond = model.construct_Q_conditional(eta=eta) + print("Qcond: \n", Qcond.toarray()[:6, :6]) - b = model.construct_information_vector(eta=eta, x_i=x_original) + b = model.construct_information_vector(eta=eta, x_i=x_i) + print("b: ", b[:10]) x_est = np.linalg.solve(Qcond.toarray(), b) - print("x est: ", x_est) - + #print("x est: ", x_est) + print("norm(x_original - x_est): ", np.linalg.norm(x_original - x_est)) # L = np.linalg.cholesky(Qcond.toarray()) # plt.spy(Qcond, markersize=2) @@ -117,22 +126,27 @@ print("theta: ", model.theta) # print("x : ", model.x) - f_value = dalia._evaluate_f(model.theta) - print("after evaluate f. x: ", model.x) + # f_value = dalia._evaluate_f(model.theta) + # print("after evaluate f. x: ", model.x) results = dalia.minimize() theta_unscaled = results["theta"] - theta = theta_unscaled.copy() - theta[0] = scaled_logit(theta_unscaled[0], direction="backward") - theta[1] = np.exp(theta_unscaled[1]) - theta[2] = np.exp(theta_unscaled[2]) + print("theta unscaled: ", theta_unscaled) + theta_original_log = xp.array( + [ + theta_original[0], + xp.log(theta_original[1]), + xp.log(theta_original[2]), + ] + ) + print("theta original log: ", theta_original_log) - print("theta: ", theta) - print("theta original: ", theta_original) + # print("x: ", results["x"]) + # print("x_original: ", x_original) - print("x: ", results["x"]) - print("x_original: ", x_original) + # print("eta: ", model.a @ x_original) + # print("eta est: ", model.a @ results["x"]) - print("eta: ", model.a @ x_original) - print("eta est: ", model.a @ results["x"]) + print("norm(eta - eta_est): ", np.linalg.norm(model.a @ x_original - model.a @ results["x"])) + print("normalized norm(eta - eta_est): ", np.linalg.norm(model.a @ x_original - model.a @ results["x"]) / np.linalg.norm(model.a @ x_original)) diff --git a/src/dalia/core/dalia.py b/src/dalia/core/dalia.py index ac06436f..ebee7b62 100644 --- a/src/dalia/core/dalia.py +++ b/src/dalia/core/dalia.py @@ -513,8 +513,8 @@ def callback(intermediate_result: optimize.OptimizeResult): ) self.minimization_result: dict = { - "theta": scipy_result.x, - "theta_interpret": self.model.get_theta_interpret(), + "theta": get_host(self.model.theta), # scipy_result.x, # + "theta_interpret": get_host(self.model.get_theta_interpret()), "x": get_host( self.model.x, # [self.model.inverse_permutation_latent_variables] ), @@ -543,6 +543,9 @@ def _objective_function( Function value f(theta) evaluated at theta_i and its gradient. """ + print("theta_i: ", theta_i) + print("self.model.theta: ", self.model.theta) + self.t_construction_qprior = 0.0 self.t_construction_qconditional = 0.0 self.solver.t_cholesky = 0.0 diff --git a/src/dalia/submodels/ar1.py b/src/dalia/submodels/ar1.py index c221cc5e..dab76fa4 100644 --- a/src/dalia/submodels/ar1.py +++ b/src/dalia/submodels/ar1.py @@ -26,9 +26,9 @@ def construct_Q_prior(self, **kwargs) -> sp.sparse.coo_matrix: exp_tau = xp.exp(tau) phi_scaled = kwargs.get("phi") # print("tau:", exp_tau) - # print("phi_scaled:", phi_scaled) + #print("phi_scaled:", phi_scaled) phi = scaled_logit(phi_scaled, direction="backward") - # print("phi:", phi) + print("phi:", phi) s2 = 1 / exp_tau denom = s2 * (1 - phi**2) From 9af4a3cf68adc67eeb15bb03fde1d936d1e4a0c1 Mon Sep 17 00:00:00 2001 From: lisa-gm Date: Wed, 24 Sep 2025 10:54:21 +0300 Subject: [PATCH 08/26] added verbosity level to not always show timer, added initial draft for rescaling function for hyperparameters within prior hyperparameter class and from where it needs to be called --- .gitattributes | 1 + .gitignore | 10 ++ examples/g_ar1/generate_data.py | 4 +- examples/g_ar1/inputs_ar1/a.npz | 3 - examples/g_ar1/inputs_ar1/x.npy | 3 - examples/g_ar1/inputs_ar1/x_original.npy | 3 - examples/g_ar1/inputs_regression/a.npz | 3 - .../reference_outputs/theta_original.npy | 3 - .../g_ar1/reference_outputs/x_original.npy | 3 - examples/g_ar1/run.py | 59 +++++----- examples/g_ar1/y.npy | 3 - examples/p_ar1/e.npy | 3 - examples/p_ar1/generate_data.py | 22 ++-- examples/p_ar1/inputs_ar1/a.npz | 3 - examples/p_ar1/inputs_ar1/x.npy | 3 - examples/p_ar1/inputs_ar1/x_original.npy | 3 - examples/p_ar1/inputs_regression/a.npz | 3 - .../reference_outputs/theta_original.npy | 3 - .../p_ar1/reference_outputs/x_original.npy | 3 - examples/p_ar1/run.py | 27 +++-- examples/p_ar1/y.npy | 3 - src/dalia/configs/dalia_config.py | 4 + src/dalia/configs/submodels_config.py | 3 +- src/dalia/core/dalia.py | 110 +++++++++++------- src/dalia/core/model.py | 74 ++++++------ src/dalia/core/prior_hyperparameters.py | 14 +++ src/dalia/core/submodel.py | 7 -- src/dalia/prior_hyperparameters/beta.py | 15 +++ src/dalia/prior_hyperparameters/gaussian.py | 12 ++ .../prior_hyperparameters/gaussian_mvn.py | 3 + .../penalized_complexity.py | 3 + src/dalia/submodels/ar1.py | 26 +++-- 32 files changed, 241 insertions(+), 198 deletions(-) delete mode 100644 examples/g_ar1/inputs_ar1/a.npz delete mode 100644 examples/g_ar1/inputs_ar1/x.npy delete mode 100644 examples/g_ar1/inputs_ar1/x_original.npy delete mode 100644 examples/g_ar1/inputs_regression/a.npz delete mode 100644 examples/g_ar1/reference_outputs/theta_original.npy delete mode 100644 examples/g_ar1/reference_outputs/x_original.npy delete mode 100644 examples/g_ar1/y.npy delete mode 100644 examples/p_ar1/e.npy delete mode 100644 examples/p_ar1/inputs_ar1/a.npz delete mode 100644 examples/p_ar1/inputs_ar1/x.npy delete mode 100644 examples/p_ar1/inputs_ar1/x_original.npy delete mode 100644 examples/p_ar1/inputs_regression/a.npz delete mode 100644 examples/p_ar1/reference_outputs/theta_original.npy delete mode 100644 examples/p_ar1/reference_outputs/x_original.npy delete mode 100644 examples/p_ar1/y.npy diff --git a/.gitattributes b/.gitattributes index 3e5cf230..23dcd25c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,3 +2,4 @@ examples/**/*.npz filter=lfs diff=lfs merge=lfs -text examples/**/*.npy filter=lfs diff=lfs merge=lfs -text examples/**/*.dat filter=lfs diff=lfs merge=lfs -text + diff --git a/.gitignore b/.gitignore index 35e613a9..d19426db 100644 --- a/.gitignore +++ b/.gitignore @@ -171,6 +171,16 @@ runs_sc25/* settings.json *.out +# data files that can easily be generated +examples/brainiac/**/*.npy +examples/brainiac/**/*.npz + +examples/g_ar1/**/*.npy +examples/g_ar1/**/*.npz + +examples/p_ar1/**/*.npy +examples/p_ar1/**/*.npz + # Profiler outputs *.nsys-rep *.qdstrm diff --git a/examples/g_ar1/generate_data.py b/examples/g_ar1/generate_data.py index 6e3e5f75..e9bb0a6b 100644 --- a/examples/g_ar1/generate_data.py +++ b/examples/g_ar1/generate_data.py @@ -12,10 +12,10 @@ if __name__ == "__main__": np.random.seed(5) - n = 5000 + n = 1000 ## define priors - s2 = 3 + s2 = 5 tau = 1 / s2 phi = 0.9 # noise obs diff --git a/examples/g_ar1/inputs_ar1/a.npz b/examples/g_ar1/inputs_ar1/a.npz deleted file mode 100644 index d51fab3a..00000000 --- a/examples/g_ar1/inputs_ar1/a.npz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:39c51ba4cc5f784792206abc0d5806f39d9a2360c7542ba9db981f91da7ae75b -size 779 diff --git a/examples/g_ar1/inputs_ar1/x.npy b/examples/g_ar1/inputs_ar1/x.npy deleted file mode 100644 index 0b841bb7..00000000 --- a/examples/g_ar1/inputs_ar1/x.npy +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cbc08173f8d41478b3fa1defbb8f50eb977e95641f0ee2389a88e596635f23c8 -size 928 diff --git a/examples/g_ar1/inputs_ar1/x_original.npy b/examples/g_ar1/inputs_ar1/x_original.npy deleted file mode 100644 index 4fdb26c6..00000000 --- a/examples/g_ar1/inputs_ar1/x_original.npy +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:01b4ae12954e6c92c282859a2ac998fbaf94b3d8995d2f2c6d46ac2748782e4b -size 928 diff --git a/examples/g_ar1/inputs_regression/a.npz b/examples/g_ar1/inputs_regression/a.npz deleted file mode 100644 index 8e5c1dcd..00000000 --- a/examples/g_ar1/inputs_regression/a.npz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6a855bd6a88e63486a3f776cbc51aab0ae1ceff310f19bf5a09e5fb3f0c7507e -size 1136 diff --git a/examples/g_ar1/reference_outputs/theta_original.npy b/examples/g_ar1/reference_outputs/theta_original.npy deleted file mode 100644 index 370f0d04..00000000 --- a/examples/g_ar1/reference_outputs/theta_original.npy +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:42befec2428694901b72e036df13b4baeec3ed5680163ff5849ab4c59d8a5202 -size 152 diff --git a/examples/g_ar1/reference_outputs/x_original.npy b/examples/g_ar1/reference_outputs/x_original.npy deleted file mode 100644 index 5a432a10..00000000 --- a/examples/g_ar1/reference_outputs/x_original.npy +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4d0cd377b2101c5e0cd01676ffc1c65146570824634cd43c7b60ce2648ecab9e -size 936 diff --git a/examples/g_ar1/run.py b/examples/g_ar1/run.py index cfaeaa43..378736f0 100644 --- a/examples/g_ar1/run.py +++ b/examples/g_ar1/run.py @@ -19,18 +19,15 @@ if __name__ == "__main__": np.random.seed(3) - n = 1000 # load reference output theta_original = np.load("reference_outputs/theta_original.npy") print( "theta original: ", - theta_original[0], - xp.log(theta_original[1]), - xp.log(theta_original[2]), + theta_original, ) - theta_initial = [0.5, 3.0, 3.0] + theta_initial = theta_original #[0.6, 1.0, 3.0] print("theta initial: ", theta_initial) x_original = np.load("reference_outputs/x_original.npy") @@ -40,9 +37,8 @@ ar1_dict = { "type": "ar1", "input_dir": f"{BASE_DIR}/inputs_ar1", - "n_latent_parameters": n, "phi": theta_initial[0], # has to be between 0 and 1 - "tau": xp.log(theta_initial[1]), # assume to already be in log-scale + "tau": theta_initial[1], # assume to already be in log-scale "ph_phi": {"type": "beta", "alpha": 5.0, "beta": 1.0}, "ph_tau": {"type": "gaussian", "mean": 0.0, "precision": 0.5}, } @@ -69,7 +65,7 @@ # "alpha": 0.01, # "u": 5, # }, - "prior_hyperparameters": {"type": "gaussian", "mean": 3.0, "precision": 0.05}, + "prior_hyperparameters": {"type": "gaussian", "mean": xp.log(theta_original[2]), "precision": 0.05}, } model = Model( @@ -96,11 +92,6 @@ x_est = np.linalg.solve(Qcond.toarray(), b) #print("x est: ", x_est) print("norm(x_original - x_est): ", np.linalg.norm(x_original - x_est)) - # L = np.linalg.cholesky(Qcond.toarray()) - - # plt.spy(Qcond, markersize=2) - # plt.title("Sparsity pattern of Qcond") - # plt.show() # Configurations of DALIA dalia_dict = { @@ -117,6 +108,7 @@ "eps_inner_iteration": 1e-3, "eps_gradient_f": 1e-3, "simulation_dir": ".", + "verbosity": 0, } dalia = DALIA( @@ -124,23 +116,22 @@ config=dalia_config.parse_config(dalia_dict), ) - print("theta: ", model.theta) - # print("x : ", model.x) - # f_value = dalia._evaluate_f(model.theta) - # print("after evaluate f. x: ", model.x) - - results = dalia.minimize() - - theta_unscaled = results["theta"] - print("theta unscaled: ", theta_unscaled) - theta_original_log = xp.array( - [ - theta_original[0], - xp.log(theta_original[1]), - xp.log(theta_original[2]), - ] + print("initial model theta: ", model.theta) + + print("\nCalling DALIA.run()") + results = dalia.run() + + print_msg("\n--- Results ---") + + theta = results["theta"] + print("theta: ", np.round(theta, 4)) + print("theta original: ", theta_original) + + print_msg("Covariance of theta:\n", results["cov_theta"]) + print_msg( + "Mean of the fixed effects:\n", + results["x"][-model.submodels[-1].n_fixed_effects :], ) - print("theta original log: ", theta_original_log) # print("x: ", results["x"]) # print("x_original: ", x_original) @@ -148,5 +139,15 @@ # print("eta: ", model.a @ x_original) # print("eta est: ", model.a @ results["x"]) + print_msg("\n--- Comparisons ---") print("norm(eta - eta_est): ", np.linalg.norm(model.a @ x_original - model.a @ results["x"])) print("normalized norm(eta - eta_est): ", np.linalg.norm(model.a @ x_original - model.a @ results["x"]) / np.linalg.norm(model.a @ x_original)) + + # Compare marginal variances of latent parameters + var_latent_params = results["marginal_variances_latent"] + Qconditional = dalia.model.construct_Q_conditional(eta=model.a @ model.x) + Qinv_ref = xp.linalg.inv(Qconditional.toarray()) + print_msg( + "Norm (marg var latent - ref): ", + f"{np.linalg.norm(var_latent_params - xp.diag(Qinv_ref)):.4e}", + ) \ No newline at end of file diff --git a/examples/g_ar1/y.npy b/examples/g_ar1/y.npy deleted file mode 100644 index da273e71..00000000 --- a/examples/g_ar1/y.npy +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5f5feb5a0d80627d13631584ab50f48e4930a93c6549859dc920ed204af4de7d -size 928 diff --git a/examples/p_ar1/e.npy b/examples/p_ar1/e.npy deleted file mode 100644 index 526e4ebe..00000000 --- a/examples/p_ar1/e.npy +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eaf01004764bc66325db832a330ff0233dfa9ea1dc7f216af544c057cc889d8c -size 168 diff --git a/examples/p_ar1/generate_data.py b/examples/p_ar1/generate_data.py index b6843ca4..c1eef39d 100644 --- a/examples/p_ar1/generate_data.py +++ b/examples/p_ar1/generate_data.py @@ -10,12 +10,16 @@ if __name__ == "__main__": - n = 5 + n = 1000 ## define priors - s2 = 0.2 # 0.7 + s2 = 1 # 0.7 tau = 1 / s2 - phi = 0.5 # 0.9 + ### note: phi between -1 and 1 for discrete timesteps + # (doesn't make sense for negative in cts case) + + ## rescale beta prior 2 theta - 1 + phi = 0.9 # 0.9 theta_original = [phi, tau] denom = s2 * (1 - phi**2) @@ -31,16 +35,16 @@ geom_mean = np.exp(np.mean(np.log(Cov.diagonal()))) print("Geometric mean of Qinv diagonal: ", geom_mean) - print(Q.toarray()) - print(np.linalg.inv(Q.toarray())) - print(np.round(Q.toarray() @ np.linalg.inv(Q.toarray()), 6)) + print(Q.toarray()[:6, :6]) + print(np.linalg.inv(Q.toarray())[:6, :6]) + print(np.round(Q.toarray() @ np.linalg.inv(Q.toarray()), 6)[:6, :6]) # exit() mv = multivariate_normal(mean=np.zeros(n), cov=Cov, seed=3) intercept = 2 u = mv.rvs() - print("u: ", u) + print("u: ", u[:10]) eta = u + intercept x = np.concatenate((u, [intercept])) np.save("reference_outputs/x_original.npy", x) @@ -54,7 +58,7 @@ a_regression = sp.csr_matrix(np.ones((n, 1))) sp.save_npz("inputs_regression/a.npz", a_regression) - print(eta) + print("eta: ", eta[:10]) np.save("inputs_ar1/x_original.npy", eta) # sample with repitition @@ -65,7 +69,7 @@ y = poisson.rvs(E * np.exp(eta), random_state=3) np.save("y.npy", y) - print(y) + print("y[:10]: ", y[:10]) Qprior = sp.block_diag([Q, sp.csr_matrix([[0.001]])]) # print("Qprior : \n", Qprior.toarray()) diff --git a/examples/p_ar1/inputs_ar1/a.npz b/examples/p_ar1/inputs_ar1/a.npz deleted file mode 100644 index 14bf96bf..00000000 --- a/examples/p_ar1/inputs_ar1/a.npz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:60e0eb3bd1b1be8837017f0fddc8cd0a474d7862976b62a74e9cb850fbf03a5b -size 772 diff --git a/examples/p_ar1/inputs_ar1/x.npy b/examples/p_ar1/inputs_ar1/x.npy deleted file mode 100644 index 7274143a..00000000 --- a/examples/p_ar1/inputs_ar1/x.npy +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:09a48c802155575049a4cf806aea44b97a6d21f982776418230418beb726c4e8 -size 168 diff --git a/examples/p_ar1/inputs_ar1/x_original.npy b/examples/p_ar1/inputs_ar1/x_original.npy deleted file mode 100644 index bceb82b4..00000000 --- a/examples/p_ar1/inputs_ar1/x_original.npy +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:649ab0d33fd12fbba48dc117928c5c97e8c49fecd1dfaa09f8dbe9ca59cc4cb0 -size 168 diff --git a/examples/p_ar1/inputs_regression/a.npz b/examples/p_ar1/inputs_regression/a.npz deleted file mode 100644 index 8160121f..00000000 --- a/examples/p_ar1/inputs_regression/a.npz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:08027fe4ecd1fb5d5169f41a9f2a1a602be1cf15477878e2a89d2405261d64ae -size 970 diff --git a/examples/p_ar1/reference_outputs/theta_original.npy b/examples/p_ar1/reference_outputs/theta_original.npy deleted file mode 100644 index e65c3799..00000000 --- a/examples/p_ar1/reference_outputs/theta_original.npy +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ed23a90e2c583ecade6212f036ee6ffa5ab5a323a0c5aa95bc6bc74b2b070052 -size 144 diff --git a/examples/p_ar1/reference_outputs/x_original.npy b/examples/p_ar1/reference_outputs/x_original.npy deleted file mode 100644 index 3c5c2d03..00000000 --- a/examples/p_ar1/reference_outputs/x_original.npy +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:65c40fc6927dca0a458afc275b880b8973d86897b9cb41330e893eb90c3d6642 -size 176 diff --git a/examples/p_ar1/run.py b/examples/p_ar1/run.py index 4adc3510..8a17d6da 100644 --- a/examples/p_ar1/run.py +++ b/examples/p_ar1/run.py @@ -19,7 +19,7 @@ if __name__ == "__main__": - n = 5 + n = 1000 # load reference output theta_original = np.load("reference_outputs/theta_original.npy") @@ -109,15 +109,20 @@ results = dalia.minimize() theta_unscaled = results["theta"] - theta = theta_unscaled.copy() - theta[0] = scaled_logit(theta_unscaled[0], direction="backward") - theta[1] = np.exp(theta_unscaled[1]) - - print("theta: ", theta) - print("theta original: ", theta_original) + print("theta unscaled: ", theta_unscaled) + theta_original_log = xp.array( + [ + theta_original[0], + xp.log(theta_original[1]), + ] + ) + print("theta original log: ", theta_original_log) - print("x: ", results["x"]) - print("x_original: ", x_original) + print("norm(x_original - x): ", np.linalg.norm(x_original - results["x"])) + print("normalized norm: ", np.linalg.norm(x_original - results["x"]) / np.linalg.norm(x_original)) - print("eta: ", model.a @ x_original) - print("eta est: ", model.a @ results["x"]) + print("norm(eta_original - eta_est): ", np.linalg.norm(model.a @ x_original - model.a @ results["x"])) + print( + "normalized norm: ", + np.linalg.norm(model.a @ x_original - model.a @ results["x"]) / np.linalg.norm(model.a @ x_original), + ) diff --git a/examples/p_ar1/y.npy b/examples/p_ar1/y.npy deleted file mode 100644 index 9d269417..00000000 --- a/examples/p_ar1/y.npy +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e0c5615ed622778e4fb05eb3adf9d8584a1601562ccae1f90700ea9c7c05c9d5 -size 168 diff --git a/src/dalia/configs/dalia_config.py b/src/dalia/configs/dalia_config.py index 0dbbbec0..8d5e24fc 100644 --- a/src/dalia/configs/dalia_config.py +++ b/src/dalia/configs/dalia_config.py @@ -55,6 +55,10 @@ class DaliaConfig(BaseModel): simulation_dir: Path = Path("./dalia/") output_dir: Path = Path.joinpath(simulation_dir, "output/") + # --- Verbosity level ------------------------------------------------------ + verbosity: int = 0 # 0: minimal, 1: more info + + def parse_config(config: dict | str) -> DaliaConfig: if isinstance(config, str): diff --git a/src/dalia/configs/submodels_config.py b/src/dalia/configs/submodels_config.py index 226e11c5..8101565b 100644 --- a/src/dalia/configs/submodels_config.py +++ b/src/dalia/configs/submodels_config.py @@ -41,8 +41,6 @@ def read_hyperparameters(self): class AR1SubModelConfig(SubModelConfig): - n_latent_parameters: PositiveInt = None - ## prior on phi phi: float = None # AR(1) coefficient phi_scaled: float = None @@ -59,6 +57,7 @@ def read_hyperparameters(self): # input of phi is in (0,1), rescale to -/+ INF self.phi_scaled = scaled_logit(self.phi, direction="forward") theta = xp.array([self.phi_scaled, self.tau]) + theta_internal = xp.array([self.phi_scaled, self.tau]) theta_keys = ["phi", "tau"] return theta, theta_keys diff --git a/src/dalia/core/dalia.py b/src/dalia/core/dalia.py index ebee7b62..b9e67343 100644 --- a/src/dalia/core/dalia.py +++ b/src/dalia/core/dalia.py @@ -73,6 +73,8 @@ def __init__( self.eps_gradient_f = self.config.eps_gradient_f self.eps_hessian_f = self.config.eps_hessian_f + self.verbosity = self.config.verbosity + # --- Configure HPC set_device(comm_rank, comm_size) @@ -303,14 +305,18 @@ def run(self) -> dict: theta_star = get_device(minimization_result["theta"]) x_star = get_device(minimization_result["x"]) + print("Finished the optimization procedure.") # compute covariance of the hyperparameters theta at the mode + print("theta_star: ", theta_star) cov_theta = self.compute_covariance_hp(theta_star) + print("Computed covariance of the hyperparameters at the mode.") # compute marginal variances of the latent parameters marginal_variances_latent = self.get_marginal_variances_latent_parameters( theta_star, x_star ) + print("Computed marginal variances of the latent parameters.") # compute marginal variances of the observations # TODO: only run by default when dense multiplcation issue is fixed, see issue #78 @@ -321,7 +327,7 @@ def run(self) -> dict: # construct new dictionary with the results results = { "theta": minimization_result["theta"], - "theta_interpret": minimization_result["theta_interpret"], + "theta_internal": minimization_result["theta_internal"], "x": minimization_result["x"], "f": minimization_result["f"], "grad_f": minimization_result["grad_f"], @@ -354,8 +360,8 @@ def minimize(self) -> optimize.OptimizeResult: print_msg("No hyperparameters, just running inner iteration.") self.f_value = self._evaluate_f(self.model.theta) self.minimization_result: dict = { - "theta": self.model.theta, - "theta_interpret": self.model.get_theta_interpret(), + "theta_internal": self.model.theta, + "theta": self.model.rescale_hyperparameters_to_internal(self.model.theta, direction="backward"), "x": self.model.x, # [self.model.inverse_permutation_latent_variables], "f": self.f_value, } @@ -409,9 +415,9 @@ def callback(intermediate_result: optimize.OptimizeResult): ) self.minimization_result = { - "theta": get_host(self.model.theta), - "theta_interpret": get_host( - self.model.get_theta_interpret() + "theta_internal": get_host(self.model.theta), + "theta": get_host( + self.model.rescale_hyperparameters_to_internal(self.model.theta, direction="backward") ), "x": get_host( self.model.x @@ -450,9 +456,9 @@ def callback(intermediate_result: optimize.OptimizeResult): ) self.minimization_result = { - "theta": get_host(self.model.theta), - "theta_interpret": get_host( - self.model.get_theta_interpret() + "theta_internal": get_host(self.model.theta), + "theta": get_host( + self.model.rescale_hyperparameters_to_internal(self.model.theta, direction="backward") ), "x": get_host( self.model.x @@ -513,8 +519,8 @@ def callback(intermediate_result: optimize.OptimizeResult): ) self.minimization_result: dict = { - "theta": get_host(self.model.theta), # scipy_result.x, # - "theta_interpret": get_host(self.model.get_theta_interpret()), + "theta_internal": get_host(self.model.theta), # scipy_result.x, # + "theta": get_host(self.model.rescale_hyperparameters_to_internal(self.model.theta, direction="backward")), "x": get_host( self.model.x, # [self.model.inverse_permutation_latent_variables] ), @@ -543,9 +549,6 @@ def _objective_function( Function value f(theta) evaluated at theta_i and its gradient. """ - print("theta_i: ", theta_i) - print("self.model.theta: ", self.model.theta) - self.t_construction_qprior = 0.0 self.t_construction_qconditional = 0.0 self.solver.t_cholesky = 0.0 @@ -610,7 +613,7 @@ def _objective_function( self.t_construction_qprior + self.t_construction_qconditional ) - if self.iter > 0: + if self.iter > 0 and self.verbosity > 0: print( f"rank {comm_rank} | objfunc_time: {self.objective_function_time[1:]} | solver_time: {self.solver_time[1:]} | construction_time: {self.construction_time[1:]}", flush=True, @@ -667,6 +670,7 @@ def _evaluate_f( task_mapping = [i % n_qeval_comm for i in range(2)] if task_mapping[0] == self.color_qeval: + # Done by processes "even" tic = time.perf_counter() synchronize_gpu() @@ -701,6 +705,7 @@ def _evaluate_f( log_prior_hyperparameters: float = ( self.model.evaluate_log_prior_hyperparameters() ) + likelihood: float = float(self.model.evaluate_likelihood(eta=eta)) prior_latent_parameters: float = ( self._evaluate_prior_latent_parameters() @@ -719,6 +724,7 @@ def _evaluate_f( comm=self.comm_feval, ) synchronize(comm=self.comm_qeval) + else: self.model.construct_Q_prior() @@ -767,7 +773,7 @@ def _evaluate_f( return f_theta[0] - def compute_covariance_hp(self, theta_i: NDArray) -> NDArray: + def compute_covariance_hp(self, theta_interpret: NDArray) -> NDArray: """compute the covariance matrix of the hyperparameters theta. Parameters @@ -780,13 +786,14 @@ def compute_covariance_hp(self, theta_i: NDArray) -> NDArray: cov_theta : NDArray[dim_theta, dim_theta] Covariance matrix of the hyperparameters theta. """ - self.model.theta[:] = theta_i + + # self.model.rescale_hyperparameters_to_internal(theta_interpret, direction="forward") print_msg( - f"Computing covariance of hyperparameters theta at {theta_i}.", + f"Computing covariance of hyperparameters theta at {theta_interpret}.", flush=True, ) - hess_theta = self._evaluate_hessian_f(theta_i) + hess_theta = self._evaluate_hessian_f(theta_interpret) # print_msg( # f"hessian_f: \n {hess_theta}", # flush=True, @@ -816,8 +823,9 @@ def _evaluate_hessian_f( """ ## TODO: this is the quick fix ... - theta_internal = theta_i.copy() - self.model.theta[:] = theta_i + #theta_internal = theta_i.copy() + theta = theta_i.copy() + # self.model.theta[:] = theta_i dim_theta = self.model.n_hyperparameters # pre-allocate storage for the hessian & f_values @@ -849,6 +857,7 @@ def _evaluate_hessian_f( counter = 0 # compute f(theta) if self.color_feval == task_mapping[0]: + theta_i = self.model.rescale_hyperparameters_to_internal(theta, direction="forward") f_theta = self._evaluate_f(theta_i) f_ii_loc[1, :] = f_theta counter += 1 @@ -861,49 +870,60 @@ def _evaluate_hessian_f( if i == j: if self.color_feval == task_mapping[counter]: # theta+eps_i - theta_i = theta_internal.copy() - f_ii_loc[0, i] = self._evaluate_f(theta_i + eps_mat[i, :]) + #theta_i = theta_internal.copy() + #f_ii_loc[0, i] = self._evaluate_f(theta_i + eps_mat[i, :]) + theta_i = theta + eps_mat[i, :] + f_ii_loc[0, i] = self._evaluate_f(self.model.rescale_hyperparameters_to_internal(theta_i, direction="forward")) counter += 1 if self.color_feval == task_mapping[counter]: # theta-eps_i - theta_i = theta_internal.copy() - f_ii_loc[2, i] = self._evaluate_f(theta_i - eps_mat[i, :]) - + # theta_i = theta_internal.copy() + # f_ii_loc[2, i] = self._evaluate_f(theta_i - eps_mat[i, :]) + theta_i = theta - eps_mat[i, :] + f_ii_loc[2, i] = self._evaluate_f(self.model.rescale_hyperparameters_to_internal(theta_i, direction="forward")) counter += 1 # as hessian is symmetric we only have to compute the upper triangle elif i < j: # theta+eps_i+eps_j if self.color_feval == task_mapping[counter]: - theta_i = theta_internal.copy() - f_ij_loc[0, k] = self._evaluate_f( - theta_i + eps_mat[i, :] + eps_mat[j, :] - ) + # theta_i = theta_internal.copy() + # f_ij_loc[0, k] = self._evaluate_f( + # theta_i + eps_mat[i, :] + eps_mat[j, :] + # ) + theta_i = theta + eps_mat[i, :] + eps_mat[j, :] + f_ij_loc[0, k] = self._evaluate_f(self.model.rescale_hyperparameters_to_internal(theta_i, direction="forward")) counter += 1 # theta+eps_i-eps_j if self.color_feval == task_mapping[counter]: - theta_i = theta_internal.copy() - f_ij_loc[1, k] = self._evaluate_f( - theta_i + eps_mat[i, :] - eps_mat[j, :] - ) + # theta_i = theta_internal.copy() + # f_ij_loc[1, k] = self._evaluate_f( + # theta_i + eps_mat[i, :] - eps_mat[j, :] + # ) + theta_i = theta + eps_mat[i, :] - eps_mat[j, :] + f_ij_loc[1, k] = self._evaluate_f(self.model.rescale_hyperparameters_to_internal(theta_i, direction="forward")) counter += 1 # theta-eps_i+eps_j if self.color_feval == task_mapping[counter]: - theta_i = theta_internal.copy() - f_ij_loc[2, k] = self._evaluate_f( - theta_i - eps_mat[i, :] + eps_mat[j, :] - ) + # theta_i = theta_internal.copy() + # f_ij_loc[2, k] = self._evaluate_f( + # theta_i - eps_mat[i, :] + eps_mat[j, :] + # ) + theta_i = theta - eps_mat[i, :] + eps_mat[j, :] + f_ij_loc[2, k] = self._evaluate_f(self.model.rescale_hyperparameters_to_internal(theta_i, direction="forward")) counter += 1 # theta-eps_i-eps_j if self.color_feval == task_mapping[counter]: - theta_i = theta_internal.copy() - f_ij_loc[3, k] = self._evaluate_f( - theta_i - eps_mat[i, :] - eps_mat[j, :] - ) + # theta_i = theta_internal.copy() + # f_ij_loc[3, k] = self._evaluate_f( + # theta_i - eps_mat[i, :] - eps_mat[j, :] + # ) + theta_i = theta - eps_mat[i, :] - eps_mat[j, :] + f_ij_loc[3, k] = self._evaluate_f(self.model.rescale_hyperparameters_to_internal(theta_i, direction="forward")) counter += 1 allreduce( @@ -972,8 +992,12 @@ def _compute_covariance_latent_parameters( self.solver.selected_inversion(sparsity="bta") def get_marginal_variances_latent_parameters( - self, theta: NDArray = None, x_star: NDArray = None + self, theta_interpret: NDArray = None, x_star: NDArray = None ) -> NDArray: + + ## assume theta to be in "external" scale + theta = self.model.rescale_hyperparameters_to_internal(theta_interpret, direction="forward") if theta_interpret is not None else None + # TODO: this should be only called by rank 0? if theta is None and x_star is None: print( diff --git a/src/dalia/core/model.py b/src/dalia/core/model.py index 91e3882d..1bafa7f3 100644 --- a/src/dalia/core/model.py +++ b/src/dalia/core/model.py @@ -224,6 +224,7 @@ def __init__( theta.append(lh_hyperparameters) self.theta: NDArray = xp.concatenate(theta) + print("Initial hyperparameters (internal scale): ", self.theta) theta_keys += lh_hyperparameters_keys self.theta_keys: NDArray = theta_keys @@ -358,6 +359,9 @@ def __init__( def construct_Q_prior(self) -> sp.sparse.spmatrix: kwargs = {} + ## convert theta from bfgs/internal scale to prior/user/interpretable scale + theta_interpret = self.rescale_hyperparameters_to_internal(self.theta, direction="backward") + if self.Q_prior is None: # During the first construction of Q_prior, we allocate the memory for # the data and the mapping of each submodel's to the Q prior matrix. @@ -371,22 +375,23 @@ def construct_Q_prior(self) -> sp.sparse.spmatrix: for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + # kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) elif isinstance(submodel, SpatialSubModel): for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) elif isinstance(submodel, BrainiacSubModel): for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) elif isinstance(submodel, AR1SubModel): for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) elif isinstance(submodel, RegressionSubModel): ... @@ -411,13 +416,6 @@ def construct_Q_prior(self) -> sp.sparse.spmatrix: shape=(self.n_latent_parameters, self.n_latent_parameters), ) - # print("in Qprior is none.") - # print("submodel.data: ", submodel_Q_prior.data) - # print("row indices: ", self.Q_prior.indices) - # print("col indptr: ", self.Q_prior.indptr) - # print("self.Q_prior.data: ", self.Q_prior.data) - # print("data mapping: ", self.Q_prior_data_mapping) - else: for i, submodel in enumerate(self.submodels): if isinstance(submodel, RegressionSubModel): @@ -426,50 +424,30 @@ def construct_Q_prior(self) -> sp.sparse.spmatrix: for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + # kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) elif isinstance(submodel, SpatialSubModel): for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) elif isinstance(submodel, BrainiacSubModel): for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) elif isinstance(submodel, AR1SubModel): for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) submodel_Q_prior = submodel.construct_Q_prior(**kwargs) - # print( - # "In model.py. submodel_Q_prior[:6, :6] : \n", - # submodel_Q_prior.toarray()[:6, :6], - # ) - - # print( - # "self.Q_prior_data_mapping[i] : self.Q_prior_data_mapping[i + 1]: ", - # self.Q_prior_data_mapping[i], - # ":", - # self.Q_prior_data_mapping[i + 1], - # ) - - # print("submodel.data: ", submodel_Q_prior.data) - self.Q_prior.data[ self.Q_prior_data_mapping[i] : self.Q_prior_data_mapping[i + 1] ] = submodel_Q_prior.data - # print("row indices: ", self.Q_prior.indices) - # print("col indptr: ", self.Q_prior.indptr) - # print("self.Q_prior.data: ", self.Q_prior.data) - # print("data mapping: ", self.Q_prior_data_mapping) - - # print("In model.py. Q_prior[:6, :6] : \n", self.Q_prior.toarray()[:6, :6]) - return self.Q_prior def construct_Q_conditional( @@ -557,7 +535,7 @@ def evaluate_log_prior_hyperparameters(self) -> float: """Evaluate the log prior hyperparameters.""" log_prior = 0.0 - theta_interpret = self.theta + theta_interpret = self.theta.copy() for i, prior_hyperparameter in enumerate(self.prior_hyperparameters): @@ -565,6 +543,9 @@ def evaluate_log_prior_hyperparameters(self) -> float: theta_interpret[i] = scaled_logit( theta_interpret[i], direction="backward" ) + # theta_interpret[i] = prior_hyperparameter.rescale_hyperparameters_to_internal( + # theta_interpret[i], direction="backward" + # ) log_prior += prior_hyperparameter.evaluate_log_prior(theta_interpret[i]) @@ -598,7 +579,7 @@ def get_theta_likelihood(self) -> NDArray: return theta_likelihood - def get_theta_interpret(self) -> NDArray: + def get_theta_interpret(self, direction: str) -> NDArray: theta_interpret = xp.zeros_like(self.theta) for i, submodel in enumerate(self.submodels): @@ -609,10 +590,25 @@ def get_theta_interpret(self) -> NDArray: ] = submodel.rescale_hyperparameters_to_interpret( self.theta[ self.hyperparameters_idx[i] : self.hyperparameters_idx[i + 1] - ] + ], direction=direction ) return theta_interpret + + def rescale_hyperparameters_to_internal(self, theta, direction): + + # need to iterate over theta and its prior hyperparameters + theta_internal = xp.copy(theta) + + for i, prior_hyperparameter in enumerate(self.prior_hyperparameters): + + ## how to handle priors that have multiple hyperparameters? + if isinstance(prior_hyperparameter, GaussianMVNPriorHyperparameters): + pass # no rescaling implemented + else: + theta_internal[i] = prior_hyperparameter.rescale_hyperparameters_to_internal(theta[i], direction=direction) + + return theta_internal def evaluate_likelihood(self, eta: NDArray, **kwargs) -> float: """Evaluate the likelihood.""" diff --git a/src/dalia/core/prior_hyperparameters.py b/src/dalia/core/prior_hyperparameters.py index 88c1ba4e..bd067fb5 100644 --- a/src/dalia/core/prior_hyperparameters.py +++ b/src/dalia/core/prior_hyperparameters.py @@ -16,6 +16,20 @@ def __init__( self.config: PriorHyperparametersConfig = config + @abstractmethod + def rescale_hyperparameters_to_internal(self, theta: float, direction: str) -> float: + """Rescale hyperparameters to and from internal scale. + + Args: + theta: Hyperparameter + direction: "forward" or "backward" + Returns: + Rescaled hyperparameter. + + """ + + return theta + @abstractmethod def evaluate_log_prior(self, theta: float) -> float: """Evaluate the log prior hyperparameters.""" diff --git a/src/dalia/core/submodel.py b/src/dalia/core/submodel.py index 7cefe5e3..0c8123da 100644 --- a/src/dalia/core/submodel.py +++ b/src/dalia/core/submodel.py @@ -52,13 +52,6 @@ def __init__( except FileNotFoundError: self.x_initial: NDArray = xp.zeros((self.a.shape[1]), dtype=float) - def rescale_hyperparameters_to_interpret(self, theta: NDArray) -> NDArray: - """Rescale hyperparameters to interpret them. Does nothing unless implemented in specific submodel. - - Note: It doesnt include the hyperparameters from the likelihood. If they need rescaling too. Needs to be done in likelihood. - """ - - return theta @abstractmethod def construct_Q_prior(self, **kwargs) -> sp.sparse.coo_matrix: diff --git a/src/dalia/prior_hyperparameters/beta.py b/src/dalia/prior_hyperparameters/beta.py index aafa91e9..6d43373b 100644 --- a/src/dalia/prior_hyperparameters/beta.py +++ b/src/dalia/prior_hyperparameters/beta.py @@ -6,6 +6,7 @@ GaussianPriorHyperparametersConfig, ) from dalia.core.prior_hyperparameters import PriorHyperparameters +from dalia.utils.link_functions import scaled_logit class BetaPriorHyperparameters(PriorHyperparameters): @@ -21,6 +22,20 @@ def __init__( self.alpha: float = config.alpha self.beta: float = config.beta + + def rescale_hyperparameters_to_internal(self, theta, direction): + + ### TODO: on longer term make scaled_logit default but let it be configurable in config + ## beta prior is defined on [0,1], while BFGS works on (-inf, inf) + if direction == "forward": + theta_scaled = scaled_logit(theta, direction="forward") + elif direction == "backward": + theta_scaled = scaled_logit(theta, direction="backward") + else: + raise ValueError(f"Unknown direction: {direction}") + + return theta_scaled + def evaluate_log_prior(self, theta: float, **kwargs) -> float: """Evaluate the log prior hyperparameters.""" diff --git a/src/dalia/prior_hyperparameters/gaussian.py b/src/dalia/prior_hyperparameters/gaussian.py index d18215ed..1232cb2a 100644 --- a/src/dalia/prior_hyperparameters/gaussian.py +++ b/src/dalia/prior_hyperparameters/gaussian.py @@ -1,6 +1,7 @@ # Copyright 2024-2025 DALIA authors. All rights reserved. from dalia import NDArray from scipy.sparse import spmatrix +from dalia import sp, xp import numpy as np @@ -23,6 +24,17 @@ def __init__( self.mean: float = config.mean self.precision: float = config.precision + def rescale_hyperparameters_to_internal(self, theta, direction): + + if direction == "forward": + theta_scaled = xp.log(theta) + elif direction == "backward": + theta_scaled = xp.exp(theta) + else: + raise ValueError(f"Unknown direction: {direction}") + + return theta_scaled + def evaluate_log_prior(self, theta: float, **kwargs) -> float: """Evaluate the log prior hyperparameters.""" diff --git a/src/dalia/prior_hyperparameters/gaussian_mvn.py b/src/dalia/prior_hyperparameters/gaussian_mvn.py index 8310ef10..95f98617 100644 --- a/src/dalia/prior_hyperparameters/gaussian_mvn.py +++ b/src/dalia/prior_hyperparameters/gaussian_mvn.py @@ -31,6 +31,9 @@ def __init__( self.mean: NDArray = xp.asarray(self.mean) self.precision: sp.sparse.spmatrix = sp.sparse.csc_matrix(self.precision) + def rescale_hyperparameters_to_internal(self, theta, direction): + return super().rescale_hyperparameters_to_internal(theta, direction) + def evaluate_log_prior(self, theta: float, **kwargs) -> float: """Evaluate the log prior hyperparameters.""" diff --git a/src/dalia/prior_hyperparameters/penalized_complexity.py b/src/dalia/prior_hyperparameters/penalized_complexity.py index 2c9788d2..5bb7baab 100644 --- a/src/dalia/prior_hyperparameters/penalized_complexity.py +++ b/src/dalia/prior_hyperparameters/penalized_complexity.py @@ -44,6 +44,9 @@ def __init__( # print("lambda_theta: ", self.lambda_theta) + def rescale_hyperparameters_to_internal(self, theta, direction): + return super().rescale_hyperparameters_to_internal(theta, direction) + def evaluate_log_prior(self, theta: float, **kwargs) -> float: """Evaluate the prior hyperparameters.""" log_prior: float = 0.0 diff --git a/src/dalia/submodels/ar1.py b/src/dalia/submodels/ar1.py index dab76fa4..198f28b4 100644 --- a/src/dalia/submodels/ar1.py +++ b/src/dalia/submodels/ar1.py @@ -19,30 +19,34 @@ def __init__( """Initializes the model.""" super().__init__(config) + # check that dimensions match + + def construct_Q_prior(self, **kwargs) -> sp.sparse.coo_matrix: """Construct the prior precision matrix.""" + ## TODO: link this to prior hyperparameters class + ## i.e. rescale phi and tau according to prior hyperparameters class + phi = kwargs.get("phi") + #phi = scaled_logit(phi_scaled, direction="backward") + #print("phi: ", phi) + tau = kwargs.get("tau") - exp_tau = xp.exp(tau) - phi_scaled = kwargs.get("phi") - # print("tau:", exp_tau) - #print("phi_scaled:", phi_scaled) - phi = scaled_logit(phi_scaled, direction="backward") - print("phi:", phi) - s2 = 1 / exp_tau + #exp_tau = xp.exp(tau) + #print("tau: ", tau) + + s2 = 1 / tau denom = s2 * (1 - phi**2) diag = [(1 + phi**2) / denom] * self.n_latent_parameters diag[0] = diag[-1] = 1 / denom off_diag = [-phi / denom] * (self.n_latent_parameters - 1) - ## TODO: how to do this more efficiently? Q_prior = sp.sparse.diags([off_diag, diag, off_diag], [-1, 0, 1]) + + # need this -> otherwise there might be a sorting issue Q_prior = Q_prior.tocsr() Q_prior.sort_indices() - # print("Q_prior.data: ", Q_prior.data) - - # print("Qprior: \n", Q_prior.toarray()) return Q_prior.tocoo() From f5d4a0c10bdff6bb0fe4c116901371d20313aee0 Mon Sep 17 00:00:00 2001 From: Lisa Gaedke Date: Wed, 24 Sep 2025 12:51:19 +0300 Subject: [PATCH 09/26] updated selected inversion to be dense inverse. probably due to natural ordering and no pivoting numerically quite unstable. only seems to work for very small expamples --- src/dalia/solvers/sparse_solver.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/dalia/solvers/sparse_solver.py b/src/dalia/solvers/sparse_solver.py index 7403cb60..e1b64822 100644 --- a/src/dalia/solvers/sparse_solver.py +++ b/src/dalia/solvers/sparse_solver.py @@ -26,6 +26,7 @@ def cholesky(self, A: sp.sparse.spmatrix, **kwargs) -> None: if (LU.U.diagonal() > 0).all(): # Check the matrix A is positive definite. self.L = LU.L.dot(sp.sparse.diags(LU.U.diagonal() ** 0.5)) else: + print("min(diag(L)): ", xp.min(LU.U.diagonal())) raise ValueError("The matrix is not positive definite") def solve( @@ -56,13 +57,27 @@ def logdet( return 2 * xp.sum(xp.log(self.L.diagonal())) - def selected_inversion(self, **kwargs): - # Placeholder for the selected inversion method. - return super().selected_inversion(**kwargs) + def selected_inversion(self, **kwargs) -> None: + # convert to dense + L_dense = self.L.toarray() + L_inv = xp.eye(self.L.shape[0]) + + L_inv[:] = sp.linalg.solve_triangular( + L_dense, L_inv, lower=True, overwrite_b=True + ) + self.A_inv = L_inv.T @ L_inv + + return self.A_inv + + def _structured_to_spmatrix(self, A: sp.sparse.spmatrix, **kwargs) -> None: + B = A.tocoo() + B.data = self.A_inv[B.row, B.col] + + return B def get_solver_memory(self) -> int: """Return the memory used by the solver in number of bytes""" if self.L is None: return 0 - return self.L.data.nbytes + self.L.indptr.nbytes + self.L.indices.nbytes \ No newline at end of file + return self.L.data.nbytes + self.L.indptr.nbytes + self.L.indices.nbytes From 533fbe6c7f0d00f5acd1bb81ed20ffab5fbec580 Mon Sep 17 00:00:00 2001 From: Lisa Gaedke Date: Thu, 25 Sep 2025 11:05:13 +0300 Subject: [PATCH 10/26] added doc string and normalizing constant for both gaussian priors --- src/dalia/prior_hyperparameters/gaussian.py | 94 +++++++++++++++--- .../prior_hyperparameters/gaussian_mvn.py | 96 +++++++++++++++++-- 2 files changed, 171 insertions(+), 19 deletions(-) diff --git a/src/dalia/prior_hyperparameters/gaussian.py b/src/dalia/prior_hyperparameters/gaussian.py index 1232cb2a..f03ef1ab 100644 --- a/src/dalia/prior_hyperparameters/gaussian.py +++ b/src/dalia/prior_hyperparameters/gaussian.py @@ -12,30 +12,100 @@ class GaussianPriorHyperparameters(PriorHyperparameters): - """Gaussian prior hyperparameters.""" + """ + Univariate Gaussian prior hyperparameters. + + This class implements prior hyperparameters following a univariate normal + (Gaussian) distribution with specified mean and precision (inverse variance). + + Parameters + ---------- + config : GaussianPriorHyperparametersConfig + Configuration object containing mean and precision parameters. + + Attributes + ---------- + mean : float + Mean of the Gaussian distribution. + precision : float + Precision (inverse variance) of the Gaussian distribution. + normalizing_constant : float + Precomputed normalizing constant for log probability evaluation. + """ def __init__( self, config: GaussianPriorHyperparametersConfig, ) -> None: - """Initializes the Gaussian prior hyperparameters.""" + """ + Initialize the Gaussian prior hyperparameters. + + Parameters + ---------- + config : GaussianPriorHyperparametersConfig + Configuration containing mean and precision parameters. + + Raises + ------ + ValueError + If the precision is not positive. + """ super().__init__(config) self.mean: float = config.mean self.precision: float = config.precision + # Validate precision is positive + if self.precision <= 0: + raise ValueError(f"Precision must be positive, got {self.precision}") + + self.normalizing_constant = -0.5 * xp.log(2 * xp.pi) + 0.5 * xp.log( + self.precision + ) + def rescale_hyperparameters_to_internal(self, theta, direction): - - if direction == "forward": - theta_scaled = xp.log(theta) - elif direction == "backward": - theta_scaled = xp.exp(theta) - else: - raise ValueError(f"Unknown direction: {direction}") + """ + Rescale hyperparameters between internal and external representations. + + Parameters + ---------- + theta : NDArray or float + Hyperparameter values to rescale. + direction : str + Direction of rescaling ('forward' or 'backward', i.e. from interpretable/user/external to internal or vice versa). - return theta_scaled + Returns + ------- + NDArray or float + Rescaled hyperparameter values. In this case it is the identity, therefore unchanged. + """ + return super().rescale_hyperparameters_to_internal(theta, direction) def evaluate_log_prior(self, theta: float, **kwargs) -> float: - """Evaluate the log prior hyperparameters.""" + """ + Evaluate the log prior probability density. + + Computes the log probability density of the univariate normal + distribution at the given theta value. + + Parameters + ---------- + theta : float + Parameter value at which to evaluate the log prior. + **kwargs + Additional keyword arguments (unused). + + Returns + ------- + float + Log prior probability density at theta. - return -0.5 * self.precision * (theta - self.mean) ** 2 + Notes + ----- + The computation follows: + log p(θ) = C - 0.5 * τ * (θ - μ)² + where C is the normalizing constant, τ is precision, and μ is the mean. + """ + return ( + self.normalizing_constant - 0.5 * self.precision * (theta - self.mean) ** 2 + ) diff --git a/src/dalia/prior_hyperparameters/gaussian_mvn.py b/src/dalia/prior_hyperparameters/gaussian_mvn.py index 95f98617..6bdc9170 100644 --- a/src/dalia/prior_hyperparameters/gaussian_mvn.py +++ b/src/dalia/prior_hyperparameters/gaussian_mvn.py @@ -12,13 +12,44 @@ class GaussianMVNPriorHyperparameters(PriorHyperparameters): - """Gaussian MVN prior hyperparameters.""" + """ + Gaussian multivariate normal (MVN) prior hyperparameters. + + This class implements prior hyperparameters following a multivariate normal + distribution with specified mean and precision matrix. + + Parameters + ---------- + config : GaussianMVNPriorHyperparametersConfig + Configuration object containing mean and precision matrix. + + Attributes + ---------- + mean : NDArray + Mean vector of the multivariate normal distribution. + precision : spmatrix + Precision matrix (inverse covariance) of the distribution. + normalizing_constant : float + Precomputed normalizing constant for log probability evaluation. + """ def __init__( self, config: GaussianMVNPriorHyperparametersConfig, ) -> None: - """Initializes the Gaussian MVN prior hyperparameters.""" + """ + Initialize the Gaussian MVN prior hyperparameters. + + Parameters + ---------- + config : GaussianMVNPriorHyperparametersConfig + Configuration containing mean vector and precision matrix. + + Raises + ------ + ValueError + If the precision matrix is not positive definite. + """ super().__init__(config) self.mean: NDArray = config.mean @@ -31,11 +62,58 @@ def __init__( self.mean: NDArray = xp.asarray(self.mean) self.precision: sp.sparse.spmatrix = sp.sparse.csc_matrix(self.precision) + sign, logabsdet = np.linalg.slogdet(self.precision.toarray()) + if sign != 1: + raise ValueError("Precision matrix must be positive definite.") + + self.normalizing_constant = ( + -0.5 * self.mean.shape[0] * xp.log(2 * xp.pi) + 0.5 * logabsdet + ) + print("Normalizing constant: ", self.normalizing_constant) + def rescale_hyperparameters_to_internal(self, theta, direction): + """ + Rescale hyperparameters between internal and external/user representations. + + Parameters + ---------- + theta : NDArray + Hyperparameter values to rescale. + direction : str + Direction of rescaling ('forward' or 'backward'). + + Returns + ------- + NDArray + Rescaled hyperparameter values, which is the identity in this case, therefore unchanged. + """ return super().rescale_hyperparameters_to_internal(theta, direction) - def evaluate_log_prior(self, theta: float, **kwargs) -> float: - """Evaluate the log prior hyperparameters.""" + def evaluate_log_prior(self, theta: NDArray, **kwargs) -> float: + """ + Evaluate the log prior probability density. + + Computes the log probability density of the multivariate normal + distribution at the given theta values. + + Parameters + ---------- + theta : NDArray + Parameter values at which to evaluate the log prior. + Must have the same shape as the mean vector. + **kwargs + Additional keyword arguments (unused). + + Returns + ------- + float + Log prior probability density at theta. + + Raises + ------ + ValueError + If theta and mean have incompatible shapes. + """ # TODO: add check in config or somewhere else that dim(theta) and dim(mean) match if self.mean.shape != theta.shape: @@ -44,7 +122,11 @@ def evaluate_log_prior(self, theta: float, **kwargs) -> float: ) if isinstance(self.mean, float): - return -0.5 * self.precision * (theta - self.mean) ** 2 + return ( + self.normalizing_constant + - 0.5 * self.precision * (theta - self.mean) ** 2 + ) else: - # neglect constant as the precision is fixed - return -0.5 * (theta - self.mean).T @ self.precision @ (theta - self.mean) + return self.normalizing_constant - 0.5 * ( + theta - self.mean + ).T @ self.precision @ (theta - self.mean) From 627fcff49b950275cf045cb16cde2bdc63153247 Mon Sep 17 00:00:00 2001 From: Lisa Gaedke Date: Fri, 26 Sep 2025 15:25:14 +0300 Subject: [PATCH 11/26] first real progress with the reparametrization issue. seems to be working now for minimize() call for gr with gamma prior --- .../gr/{preprocessing.py => generate_data.py} | 8 +- examples/gr/inputs/y.npy | 3 + examples/gr/reference_outputs/theta_ref.npy | 2 +- examples/gr/reference_outputs/x_ref.npy | 2 +- examples/gr/run.py | 6 +- .../configs/priorhyperparameters_config.py | 11 +- src/dalia/core/dalia.py | 58 ++++--- src/dalia/core/model.py | 161 +++++++++++++----- src/dalia/likelihoods/gaussian.py | 24 ++- src/dalia/prior_hyperparameters/__init__.py | 2 + 10 files changed, 186 insertions(+), 91 deletions(-) rename examples/gr/{preprocessing.py => generate_data.py} (88%) create mode 100644 examples/gr/inputs/y.npy diff --git a/examples/gr/preprocessing.py b/examples/gr/generate_data.py similarity index 88% rename from examples/gr/preprocessing.py rename to examples/gr/generate_data.py index 51330182..b5561e26 100644 --- a/examples/gr/preprocessing.py +++ b/examples/gr/generate_data.py @@ -28,12 +28,12 @@ x = L_Sigma_prior @ z a = sparse.random(n_observations, n_latent_parameters, density=0.5) - theta_observations = np.log(3) + theta_observations = 3.0 print(f"theta_observations: {theta_observations}") theta_likelihood: dict = {"theta_observations": theta_observations} # generate x from a gaussian distribution of dimensions n_latent_parameters with mean 0 and precision exp(theta_observations) - variance = 1 / np.exp(theta_observations) + variance = 1 / theta_observations eta = a @ x y = np.random.normal(eta, scale=np.sqrt(variance), size=n_observations) print(f"x: {x}") @@ -53,7 +53,7 @@ sparse.save_npz(f"{path}/inputs/a.npz", a) # save original latent parameters - np.save(f"{path}/inputs/x_original.npy", x) + np.save(f"{path}/reference_outputs/x_ref.npy", x) # save original hyperparameter theta - np.save(f"{path}/inputs/theta_original.npy", theta_observations) + np.save(f"{path}/reference_outputs/theta_ref.npy", theta_observations) diff --git a/examples/gr/inputs/y.npy b/examples/gr/inputs/y.npy new file mode 100644 index 00000000..a5755615 --- /dev/null +++ b/examples/gr/inputs/y.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f5b972c648de0244b4fe3af2aac7e87210706d8ac879007011e061c2330c0ba +size 8128 diff --git a/examples/gr/reference_outputs/theta_ref.npy b/examples/gr/reference_outputs/theta_ref.npy index ccb2de10..01fbab03 100644 --- a/examples/gr/reference_outputs/theta_ref.npy +++ b/examples/gr/reference_outputs/theta_ref.npy @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:722e1a2b1465e290a283836da486e044a051dc5c7d0b420a1fc0addfd4411c1f +oid sha256:7b9d65f6717a0911c1cdf1cc680cb167f2ef1395be2db8ca2065cb2bd8a0a8f4 size 136 diff --git a/examples/gr/reference_outputs/x_ref.npy b/examples/gr/reference_outputs/x_ref.npy index fbf38892..87f3421e 100644 --- a/examples/gr/reference_outputs/x_ref.npy +++ b/examples/gr/reference_outputs/x_ref.npy @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:86bdf16672dbf80add11a285c0fe68f2532436b2d232a86992144017d02fcd61 +oid sha256:4ddd3a0b4a76c50a77c276c3c7a3a37b506a633b3e31564e8fd3bf859d3e2184 size 176 diff --git a/examples/gr/run.py b/examples/gr/run.py index d12fdaad..6c44e3f3 100644 --- a/examples/gr/run.py +++ b/examples/gr/run.py @@ -35,8 +35,9 @@ # Likelihood likelihood_dict = { "type": "gaussian", - "prec_o": 1.5, - "prior_hyperparameters": {"type": "gaussian", "mean": 3.5, "precision": 0.5}, + "prec_o": 1.0, + "prior_hyperparameters": {"type": "gamma", "alpha": 2.0, "beta": 2.0}, + #"prior_hyperparameters": {"type": "gaussian", "mean": 1.0, "precision": 0.5}, } # Creation of the first model by combining the Regression submodel and the likelihood model = Model( @@ -79,6 +80,7 @@ print_msg("\n--- Comparisons ---") # Compare hyperparameters theta_ref = xp.load(f"{BASE_DIR}/reference_outputs/theta_ref.npy") + print_msg("Reference theta:", theta_ref) print_msg( "Norm (theta - theta_ref): ", f"{np.linalg.norm(results['theta'] - get_host(theta_ref)):.4e}", diff --git a/src/dalia/configs/priorhyperparameters_config.py b/src/dalia/configs/priorhyperparameters_config.py index ddbea90c..7004ba8d 100644 --- a/src/dalia/configs/priorhyperparameters_config.py +++ b/src/dalia/configs/priorhyperparameters_config.py @@ -13,7 +13,9 @@ class PriorHyperparametersConfig(BaseModel): model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True) - type: Literal["gaussian", "penalized_complexity", "beta", "gaussian_mvn"] = None + type: Literal[ + "gaussian", "penalized_complexity", "beta", "gaussian_mvn", "gamma" + ] = None class GaussianPriorHyperparametersConfig(PriorHyperparametersConfig): @@ -44,6 +46,11 @@ class BetaPriorHyperparametersConfig(PriorHyperparametersConfig): beta: float = None +class GammaPriorHyperparametersConfig(PriorHyperparametersConfig): + alpha: float = None + beta: float = None + + def parse_config(config: dict) -> PriorHyperparametersConfig: prior_type = config.get("type") if prior_type == "gaussian": @@ -54,5 +61,7 @@ def parse_config(config: dict) -> PriorHyperparametersConfig: return PenalizedComplexityPriorHyperparametersConfig(**config) elif prior_type == "beta": return BetaPriorHyperparametersConfig(**config) + elif prior_type == "gamma": + return GammaPriorHyperparametersConfig(**config) else: raise ValueError(f"Unknown prior hyperparameters config type: {prior_type}") diff --git a/src/dalia/core/dalia.py b/src/dalia/core/dalia.py index b9e67343..b987cd53 100644 --- a/src/dalia/core/dalia.py +++ b/src/dalia/core/dalia.py @@ -200,14 +200,14 @@ def __init__( dtype=xp.float64, ) self.theta_mat = xp.zeros( - (self.model.theta.size, self.n_f_evaluations), dtype=xp.float64 + (self.model.theta_internal.size, self.n_f_evaluations), dtype=xp.float64 ) - self.theta_optimizer = xp.zeros_like(self.model.theta) - self.theta_optimizer[:] = self.model.theta + self.theta_optimizer = xp.zeros_like(self.model.theta_internal) + self.theta_optimizer[:] = self.model.theta_internal # --- Metrics self.f_values: ArrayLike = [] - self.theta_values: ArrayLike = [] + self.theta_values_internal: ArrayLike = [] self.objective_function_time: ArrayLike = [] self.solver_time: ArrayLike = [] self.construction_time: ArrayLike = [] @@ -355,13 +355,13 @@ def minimize(self) -> optimize.OptimizeResult: Result of the optimization procedure. """ - if len(self.model.theta) == 0: + if len(self.model.theta_external) == 0: # Only run the inner iteration print_msg("No hyperparameters, just running inner iteration.") - self.f_value = self._evaluate_f(self.model.theta) + self.f_value = self._evaluate_f(self.model.theta_external) self.minimization_result: dict = { - "theta_internal": self.model.theta, - "theta": self.model.rescale_hyperparameters_to_internal(self.model.theta, direction="backward"), + "theta_internal": self.model.theta_internal, + "theta": self.model.theta_external, "x": self.model.x, # [self.model.inverse_permutation_latent_variables], "f": self.f_value, } @@ -397,7 +397,7 @@ def callback(intermediate_result: optimize.OptimizeResult): flush=True, ) - self.theta_values.append(theta_i) + self.theta_values_internal.append(theta_i) self.f_values.append(fun_i) # check if f_values have been decreasing over last iterations @@ -415,9 +415,9 @@ def callback(intermediate_result: optimize.OptimizeResult): ) self.minimization_result = { - "theta_internal": get_host(self.model.theta), + "theta_internal": get_host(self.model.theta_internal), "theta": get_host( - self.model.rescale_hyperparameters_to_internal(self.model.theta, direction="backward") + self.model.theta_external ), "x": get_host( self.model.x @@ -428,7 +428,8 @@ def callback(intermediate_result: optimize.OptimizeResult): "f": fun_i, "grad_f": self.gradient_f, "f_values": self.f_values, - "theta_values": self.theta_values, + ### these values are in internal scale (!!) + "theta_values": self.theta_values_internal, } raise OptimizationConvergedEarlyExit() @@ -436,13 +437,13 @@ def callback(intermediate_result: optimize.OptimizeResult): if self.accepted_iter > self.config.theta_reduction_lag: if ( xp.linalg.norm( - self.theta_values[-self.config.theta_reduction_lag] + self.theta_values_internal[-self.config.theta_reduction_lag] - theta_i ) < self.config.theta_reduction_tol ): norm_diff = xp.linalg.norm( - self.theta_values[ + self.theta_values_internal[ self.accepted_iter - self.config.theta_reduction_lag ] - theta_i @@ -456,10 +457,9 @@ def callback(intermediate_result: optimize.OptimizeResult): ) self.minimization_result = { - "theta_internal": get_host(self.model.theta), + "theta_internal": get_host(self.model.theta_internal), "theta": get_host( - self.model.rescale_hyperparameters_to_internal(self.model.theta, direction="backward") - ), + self.model.theta_external), "x": get_host( self.model.x # self.model.x[ @@ -469,7 +469,7 @@ def callback(intermediate_result: optimize.OptimizeResult): "f": fun_i, "grad_f": self.gradient_f, "f_values": self.f_values, - "theta_values": self.theta_values, + "theta_values": self.theta_values_internal, } raise OptimizationConvergedEarlyExit() @@ -519,15 +519,15 @@ def callback(intermediate_result: optimize.OptimizeResult): ) self.minimization_result: dict = { - "theta_internal": get_host(self.model.theta), # scipy_result.x, # - "theta": get_host(self.model.rescale_hyperparameters_to_internal(self.model.theta, direction="backward")), + "theta_internal": get_host(self.model.theta_internal), # scipy_result.x, # + "theta": get_host(self.model.theta_external), "x": get_host( self.model.x, # [self.model.inverse_permutation_latent_variables] ), "f": scipy_result.fun, "grad_f": self.gradient_f, "f_values": self.f_values, - "theta_values": self.theta_values, + "theta_values": self.theta_values_internal, } return self.minimization_result @@ -650,7 +650,8 @@ def _evaluate_f( tic = time.time() - self.model.theta[:] = theta_i + #self.model.theta_internal[:] = theta_i + self.model.theta_internal = theta_i f_theta = xp.zeros(1, dtype=xp.float64) # --- Optimize x and evaluate the conditional of the latent parameters @@ -982,7 +983,8 @@ def _compute_covariance_latent_parameters( marginal_latent_parameters : NDArray Marginal distribution of the latent parameters x. """ - self.model.theta[:] = theta + print("Computing covariance of latent parameters at theta:", theta) + self.model.theta_external = xp.array(theta) self.model.x[:] = x_star eta = self.model.a @ self.model.x @@ -992,11 +994,11 @@ def _compute_covariance_latent_parameters( self.solver.selected_inversion(sparsity="bta") def get_marginal_variances_latent_parameters( - self, theta_interpret: NDArray = None, x_star: NDArray = None + self, theta: NDArray = None, x_star: NDArray = None ) -> NDArray: - - ## assume theta to be in "external" scale - theta = self.model.rescale_hyperparameters_to_internal(theta_interpret, direction="forward") if theta_interpret is not None else None + + ## assume theta to be in "external" scale + self.model.theta_external = xp.array(theta) # TODO: this should be only called by rank 0? if theta is None and x_star is None: @@ -1052,7 +1054,7 @@ def get_marginal_variances_observations( "Computing marginal variances for currently stored latent parameters. " ) x_star = self.model.x - theta = self.model.theta + theta = self.model.theta_external elif theta is None or x_star is None: raise ValueError( "BOTH or NEITHER theta and x_star must be provided to compute the marginal variances." diff --git a/src/dalia/core/model.py b/src/dalia/core/model.py index 1bafa7f3..ab927f31 100644 --- a/src/dalia/core/model.py +++ b/src/dalia/core/model.py @@ -14,6 +14,7 @@ GaussianMVNPriorHyperparametersConfig, GaussianPriorHyperparametersConfig, PenalizedComplexityPriorHyperparametersConfig, + GammaPriorHyperparametersConfig, ) from dalia.core.likelihood import Likelihood from dalia.core.prior_hyperparameters import PriorHyperparameters @@ -24,6 +25,7 @@ GaussianMVNPriorHyperparameters, GaussianPriorHyperparameters, PenalizedComplexityPriorHyperparameters, + GammaPriorHyperparameters, ) from dalia.submodels import ( BrainiacSubModel, @@ -57,8 +59,11 @@ def __init__( self.n_fixed_effects: int = 0 + self._theta_external: ArrayLike = [] + self._theta_internal: ArrayLike = [] + # For each submodel... - theta: ArrayLike = [] + theta_external: ArrayLike = [] theta_keys: ArrayLike = [] self.hyperparameters_idx: ArrayLike = [0] self.prior_hyperparameters: list[PriorHyperparameters] = [] @@ -203,34 +208,28 @@ def __init__( config=submodel.config.ph_alpha, ) ) + if isinstance( + submodel.config.ph_alpha, + ): + self.prior_hyperparameters.append( + PenalizedComplexityPriorHyperparameters( + config=submodel.config.ph_alpha, + hyperparameter_type="alpha", + ) + ) else: raise ValueError("Unknown submodel type") # ...and read their hyperparameters theta_submodel, theta_keys_submodel = submodel.config.read_hyperparameters() - theta.append(theta_submodel) + theta_external.append(theta_submodel) theta_keys += theta_keys_submodel self.hyperparameters_idx.append( self.hyperparameters_idx[-1] + len(theta_submodel) ) - # Add the likelihood hyperparameters - ( - lh_hyperparameters, - lh_hyperparameters_keys, - ) = likelihood_config.read_hyperparameters() - - theta.append(lh_hyperparameters) - self.theta: NDArray = xp.concatenate(theta) - print("Initial hyperparameters (internal scale): ", self.theta) - - theta_keys += lh_hyperparameters_keys - self.theta_keys: NDArray = theta_keys - - self.n_hyperparameters = self.theta.size - # --- Initialize the latent parameters and the design matrix self.n_latent_parameters: int = 0 self.latent_parameters_idx: list[int] = [0] @@ -337,6 +336,24 @@ def __init__( hyperparameter_type="prec_o", ) ) + elif isinstance( + likelihood_config.prior_hyperparameters, + BetaPriorHyperparametersConfig, + ): + self.prior_hyperparameters.append( + BetaPriorHyperparameters( + config=likelihood_config.prior_hyperparameters, + ) + ) + elif isinstance( + likelihood_config.prior_hyperparameters, + GammaPriorHyperparametersConfig, + ): + self.prior_hyperparameters.append( + GammaPriorHyperparameters( + config=likelihood_config.prior_hyperparameters, + ) + ) elif likelihood_config.type == "poisson": self.likelihood: Likelihood = PoissonLikelihood( n_observations=self.n_observations, @@ -349,19 +366,64 @@ def __init__( ) self.likelihood_config: LikelihoodConfig = likelihood_config + + # Add the likelihood hyperparameters + ( + lh_hyperparameters, + lh_hyperparameters_keys, + ) = likelihood_config.read_hyperparameters() + + theta_external.append(lh_hyperparameters) + self.theta_external = np.concatenate(theta_external) + # self.theta = np.concatenate(theta_external) + + print("Initial hyperparameters (external scale): ", self.theta_external) + print("Initial hyperparameters (internal scale): ", self.theta_internal) + + theta_keys += lh_hyperparameters_keys + self.theta_keys: NDArray = theta_keys + + self.n_hyperparameters = self.theta_external.size # --- Recurrent variables self.Q_prior = None self.Q_prior_data_mapping = [0] self.Q_conditional = None self.Q_conditional_data_mapping = [0] + + ######################################################################## + @property + def theta_external(self): + """External/user/interpretable scale theta.""" + # the copy is important to make sure that in place operations still trigger updating + return self._theta_external.copy() + + @theta_external.setter + def theta_external(self, value): + """Set external theta and automatically update internal.""" + + self._theta_external = np.array(value) + self._theta_internal = self.rescale_hyperparameters_to_internal( + self._theta_external, direction="forward" + ) + + @property + def theta_internal(self): + """Internal/BFGS scale theta.""" + return self._theta_internal.copy() + + @theta_internal.setter + def theta_internal(self, value): + """Set internal theta and automatically update external.""" + self._theta_internal = np.array(value) + self._theta_external = self.rescale_hyperparameters_to_internal( + self._theta_internal, direction="backward" + ) + ######################################################################## def construct_Q_prior(self) -> sp.sparse.spmatrix: kwargs = {} - ## convert theta from bfgs/internal scale to prior/user/interpretable scale - theta_interpret = self.rescale_hyperparameters_to_internal(self.theta, direction="backward") - if self.Q_prior is None: # During the first construction of Q_prior, we allocate the memory for # the data and the mapping of each submodel's to the Q prior matrix. @@ -375,23 +437,26 @@ def construct_Q_prior(self) -> sp.sparse.spmatrix: for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - # kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) - kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float(self.theta_external[hp_idx]) + #kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) elif isinstance(submodel, SpatialSubModel): for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float(self.theta_external[hp_idx]) + #kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) elif isinstance(submodel, BrainiacSubModel): for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float(self.theta_external[hp_idx]) + #kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) elif isinstance(submodel, AR1SubModel): for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float(self.theta_external[hp_idx]) + #kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) elif isinstance(submodel, RegressionSubModel): ... @@ -424,23 +489,26 @@ def construct_Q_prior(self) -> sp.sparse.spmatrix: for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - # kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) - kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float(self.theta_external[hp_idx]) + #kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) elif isinstance(submodel, SpatialSubModel): for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float(self.theta_external[hp_idx]) + #kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) elif isinstance(submodel, BrainiacSubModel): for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float(self.theta_external[hp_idx]) + #kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) elif isinstance(submodel, AR1SubModel): for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float(self.theta_external[hp_idx]) + #kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) submodel_Q_prior = submodel.construct_Q_prior(**kwargs) @@ -466,7 +534,7 @@ def construct_Q_conditional( if self.likelihood_config.type == "gaussian": kwargs = { "eta": eta, - "theta": float(self.theta[-1]), + "theta": float(self.theta_external[-1]), } else: kwargs = { @@ -475,7 +543,7 @@ def construct_Q_conditional( if isinstance(self.submodels[0], BrainiacSubModel): # Brainiac specific rule - kwargs["h2"] = float(self.theta[0]) + kwargs["h2"] = float(self.theta_external[0]) d_matrix = self.submodels[0].evaluate_d_matrix(**kwargs) else: # General rules @@ -509,7 +577,7 @@ def construct_information_vector( """Construct the information vector.""" if isinstance(self.submodels[0], BrainiacSubModel): - kwargs = {"h2": float(self.theta[0])} + kwargs = {"h2": float(self.theta_external[0])} gradient_likelihood = self.submodels[0].evaluate_gradient_likelihood( eta=eta, y=self.y, **kwargs ) @@ -518,7 +586,7 @@ def construct_information_vector( gradient_likelihood = self.likelihood.evaluate_gradient_likelihood( eta=eta, y=self.y, - theta=self.theta[self.hyperparameters_idx[-1] :], + theta=self.theta_external[self.hyperparameters_idx[-1] :], ) information_vector: NDArray = ( @@ -535,19 +603,20 @@ def evaluate_log_prior_hyperparameters(self) -> float: """Evaluate the log prior hyperparameters.""" log_prior = 0.0 - theta_interpret = self.theta.copy() + #theta_interpret = self.theta.copy() for i, prior_hyperparameter in enumerate(self.prior_hyperparameters): - if isinstance(prior_hyperparameter, BetaPriorHyperparameters): - theta_interpret[i] = scaled_logit( - theta_interpret[i], direction="backward" - ) + # if isinstance(prior_hyperparameter, BetaPriorHyperparameters): + # theta_interpret[i] = scaled_logit( + # theta_interpret[i], direction="backward" + # ) # theta_interpret[i] = prior_hyperparameter.rescale_hyperparameters_to_internal( # theta_interpret[i], direction="backward" # ) - log_prior += prior_hyperparameter.evaluate_log_prior(theta_interpret[i]) + log_prior += prior_hyperparameter.evaluate_log_prior(self.theta_external[i]) + #log_prior += prior_hyperparameter.evaluate_log_prior(theta_interpret[i]) # if BFGS and model scale differ: rescale -- generalize # if isinstance(self.submodels[0], BrainiacSubModel): @@ -573,9 +642,9 @@ def get_theta_likelihood(self) -> NDArray: """Return the likelihood hyperparameters.""" if isinstance(self.submodels[0], BrainiacSubModel): - theta_likelihood = 1 - scaled_logit(self.theta[0], direction="backward") + theta_likelihood = 1 - self.theta_external[0] else: - theta_likelihood = self.theta[self.hyperparameters_idx[-1] :] + theta_likelihood = self.theta_external[self.hyperparameters_idx[-1] :] return theta_likelihood @@ -601,24 +670,24 @@ def rescale_hyperparameters_to_internal(self, theta, direction): theta_internal = xp.copy(theta) for i, prior_hyperparameter in enumerate(self.prior_hyperparameters): - ## how to handle priors that have multiple hyperparameters? if isinstance(prior_hyperparameter, GaussianMVNPriorHyperparameters): pass # no rescaling implemented else: theta_internal[i] = prior_hyperparameter.rescale_hyperparameters_to_internal(theta[i], direction=direction) - + return theta_internal def evaluate_likelihood(self, eta: NDArray, **kwargs) -> float: """Evaluate the likelihood.""" if isinstance(self.submodels[0], BrainiacSubModel): - kwargs["h2"] = float(self.theta[0]) + #kwargs["h2"] = float(self.theta[0]) + kwargs["h2"] = float(self.theta_external[0]) likelihood = self.submodels[0].evaluate_likelihood(eta, self.y, **kwargs) else: likelihood = self.likelihood.evaluate_likelihood( - eta, self.y, theta=self.theta[self.hyperparameters_idx[-1] :] + eta, self.y, theta=self.theta_external[self.hyperparameters_idx[-1] :] ) return likelihood diff --git a/src/dalia/likelihoods/gaussian.py b/src/dalia/likelihoods/gaussian.py index ee69bd16..6ed8a864 100644 --- a/src/dalia/likelihoods/gaussian.py +++ b/src/dalia/likelihoods/gaussian.py @@ -29,7 +29,7 @@ def evaluate_likelihood( Evaluate Gaussian log-likelihood for a given set of observations, latent parameters, and design matrix, where the observations are assumed to be identically and independently distributed given eta (=A*x). Leading to: - log (p(y|eta)) = -0.5 * n * log(2 * pi) - 0.5 * n * theta_observations - 0.5 * exp(theta_observations) * (y - eta)^T * (y - eta) + log (p(y|eta)) = -0.5 * n * log(2 * pi) - 0.5 * n * log(theta) - 0.5 * theta * (y - eta)^T * (y - eta) where the constant in front of the likelihood is omitted. Parameters @@ -40,7 +40,7 @@ def evaluate_likelihood( Vector of the observations. kwargs : theta : float - Specific parameter for the likelihood calculation. + precision parameter for the likelihood calculation. theta > 0. Returns ------- @@ -52,11 +52,16 @@ def evaluate_likelihood( if theta is None: raise ValueError("theta must be provided to evaluate gaussian likelihood.") + if theta <= 0: + raise ValueError(f"theta must be positive, got {theta}") + yEta = eta - y # print("xp.exp(theta) in lh:", xp.exp(theta)) likelihood: float = ( - 0.5 * theta * self.n_observations - 0.5 * xp.exp(theta) * yEta.T @ yEta + -0.5 * self.n_observations * xp.log(2 * xp.pi) + + 0.5 * xp.log(theta) * self.n_observations + - 0.5 * theta * yEta.T @ yEta ) return likelihood @@ -77,7 +82,7 @@ def evaluate_gradient_likelihood( Vector of the observations. kwargs : theta : float - Specific parameter for the likelihood calculation. + precision parameter for the likelihood calculation. theta > 0. Returns ------- @@ -91,7 +96,10 @@ def evaluate_gradient_likelihood( "theta must be provided to evaluate gradient of gaussian likelihood." ) - gradient_likelihood: NDArray = -xp.exp(theta) * (eta - y) + if theta <= 0: + raise ValueError(f"theta must be positive, got {theta}") + + gradient_likelihood: NDArray = -theta * (eta - y) return gradient_likelihood @@ -121,11 +129,11 @@ def evaluate_hessian_likelihood( raise ValueError( "theta must be provided to evaluate gradient of gaussian likelihood." ) + if theta <= 0: + raise ValueError(f"theta must be positive, got {theta}") # print("hessian lh: xp.exp(theta)", xp.exp(theta)) - hessian_likelihood: ArrayLike = -xp.exp(theta) * sp.sparse.eye( - self.n_observations - ) + hessian_likelihood: ArrayLike = -theta * sp.sparse.eye(self.n_observations) return hessian_likelihood diff --git a/src/dalia/prior_hyperparameters/__init__.py b/src/dalia/prior_hyperparameters/__init__.py index b561764e..f1b2e9d7 100644 --- a/src/dalia/prior_hyperparameters/__init__.py +++ b/src/dalia/prior_hyperparameters/__init__.py @@ -6,10 +6,12 @@ PenalizedComplexityPriorHyperparameters, ) from dalia.prior_hyperparameters.beta import BetaPriorHyperparameters +from dalia.prior_hyperparameters.gamma import GammaPriorHyperparameters __all__ = [ "GaussianPriorHyperparameters", "GaussianMVNPriorHyperparameters", "PenalizedComplexityPriorHyperparameters", "BetaPriorHyperparameters", + "GammaPriorHyperparameters", ] From 4aa9ab552e6c6507ec731e7e678d325d49c40991 Mon Sep 17 00:00:00 2001 From: Lisa Gaedke Date: Mon, 6 Oct 2025 17:50:41 +0300 Subject: [PATCH 12/26] work in progress, lots of quadrature. lots of test. nothing in core yet --- src/dalia/prior_hyperparameters/gamma.py | 321 ++++++++++ .../utils/bivariate_gaussian_quadrature.py | 236 +++++++ src/dalia/utils/gaussian_quadrature.py | 382 +++++++++++ .../utils/multivariate_transformation_test.py | 599 ++++++++++++++++++ src/dalia/utils/reparametrizations.py | 348 ++++++++++ 5 files changed, 1886 insertions(+) create mode 100644 src/dalia/prior_hyperparameters/gamma.py create mode 100644 src/dalia/utils/bivariate_gaussian_quadrature.py create mode 100644 src/dalia/utils/gaussian_quadrature.py create mode 100644 src/dalia/utils/multivariate_transformation_test.py create mode 100644 src/dalia/utils/reparametrizations.py diff --git a/src/dalia/prior_hyperparameters/gamma.py b/src/dalia/prior_hyperparameters/gamma.py new file mode 100644 index 00000000..65757091 --- /dev/null +++ b/src/dalia/prior_hyperparameters/gamma.py @@ -0,0 +1,321 @@ +# Copyright 2024-2025 DALIA authors. All rights reserved. +from dalia import NDArray +from scipy.sparse import spmatrix +from dalia import sp, xp + +import numpy as np + +from dalia.configs.priorhyperparameters_config import ( + GammaPriorHyperparametersConfig, +) +from dalia.core.prior_hyperparameters import PriorHyperparameters + + +class GammaPriorHyperparameters(PriorHyperparameters): + """Gamma prior hyperparameters. + + p(theta) = (beta^alpha / Gamma(alpha)) * theta^(alpha - 1) * exp(-beta * theta) + + and in log scale: + log p(theta) = alpha * log(beta) - log(Gamma(alpha)) + (alpha - 1) * log(theta) - beta * theta + + where theta is typically a positive parameter such as a precision or rate. + + Parameters + ---------- + config : GammaPriorHyperparametersConfig + Configuration object containing alpha and beta parameters. + + Attributes + ---------- + alpha : float. alpha > 0 + Shape parameter of the Gamma distribution. + beta : float. beta > 0 + Rate parameter of the Gamma distribution. + normalizing_constant : float + Precomputed normalizing constant for log probability evaluation. + """ + + def __init__( + self, + config: GammaPriorHyperparametersConfig, + ) -> None: + """ + Initialize the Gamma prior hyperparameters. + + Parameters + ---------- + config : GammaPriorHyperparametersConfig + Configuration containing alpha (shape) and beta (rate) parameters. + + Raises + ------ + ValueError + If alpha or beta are not positive. + """ + super().__init__(config) + + self.alpha: float = config.alpha + self.beta: float = config.beta + + # Validate alpha and beta are positive + if self.alpha <= 0: + raise ValueError(f"Alpha must be positive, got {self.alpha}") + if self.beta <= 0: + raise ValueError(f"Beta must be positive, got {self.beta}") + + self.normalizing_constant: float = self.alpha * xp.log(self.beta) - float( + sp.special.gammaln(self.alpha) + ) + + def rescale_hyperparameters_to_internal(self, theta, direction): + """ + Transform between external and internal parameter representations. + + The Gamma distribution is defined for positive values, but optimization + often works better in unconstrained space. This method transforms + between theta (positive) and log(theta) (unconstrained). + + Parameters + ---------- + theta : float or NDArray + Parameter value(s) to transform. + direction : str + Transformation direction: + - "forward": theta -> log(theta) (external to internal) + - "backward": log(theta) -> theta (internal to external) + + Returns + ------- + float or NDArray + Transformed parameter value(s). + + Raises + ------ + ValueError + If direction is not "forward" or "backward". + """ + if direction == "forward": + theta_scaled = xp.log(theta) + elif direction == "backward": + theta_scaled = xp.exp(theta) + elif direction == "forward_jacobian": + theta_scaled = 1 / theta # d(log(theta))/d(theta) = 1/theta + elif direction == "backward_jacobian": + theta_scaled = theta # d(exp(theta))/d(theta) = exp(theta) = theta + else: + raise ValueError(f"Unknown direction: {direction}") + + return theta_scaled + + def evaluate_log_prior(self, theta: float, **kwargs) -> float: + """ + Evaluate the log prior probability density. + + Computes the log probability density of the Gamma distribution + at the given theta value in its external/user representation. + + Parameters + ---------- + theta : float + Parameter value at which to evaluate the log prior. + Must be positive (external/user representation). + **kwargs + Additional keyword arguments (unused). + + Returns + ------- + float + Log prior probability density at theta. + + Notes + ----- + The computation follows: + log p(θ) = C + (α - 1) * log(θ) - β * θ + where C is the normalizing constant, α is the shape parameter, + and β is the rate parameter. + + Raises + ------ + ValueError + If theta is not positive (implicitly through log computation). + """ + + log_prior = ( + self.normalizing_constant + + (self.alpha - 1) * xp.log(theta) + - self.beta * theta + ) + + return log_prior + +if __name__ == "__main__": + """ + Test Gaussian quadrature for functions using rescale_hyperparameters_to_internal() + from gamma prior hyperparameters. + + We start with normally distributed random variables in internal space (unconstrained) + that get reparametrized to external space (positive) using the gamma prior's + rescaling function. + """ + + from dalia.utils.gaussian_quadrature import compute_variance_gauss_hermite + + print("=" * 80) + print("Testing Gaussian Quadrature with Gamma Prior Rescaling") + print("=" * 80) + + # Create a gamma prior configuration + config = GammaPriorHyperparametersConfig(alpha=2.0, beta=1.0) + gamma_prior = GammaPriorHyperparameters(config=config) + + # Define parameters for the normal distribution in internal space + # These represent log(theta) where theta > 0 is the gamma-distributed parameter + mean_internal = 0.5 # Mean of log(theta) + variance_internal = 0.25 # Variance of log(theta) + + print(f"Internal space (log-scale) parameters:") + print(f" Mean: {mean_internal}") + print(f" Variance: {variance_internal}") + print(f" Standard deviation: {np.sqrt(variance_internal)}") + print() + + # Test 1: Compute statistics using Gaussian quadrature + print("1. Computing statistics using Gaussian quadrature:") + + # Use the rescaling function as the transform + def transform_func(x, direction): + return gamma_prior.rescale_hyperparameters_to_internal(x, direction) + + # Compute statistics using different numbers of quadrature points + for n_points in [10, 20, 30, 50]: + result = compute_variance_gauss_hermite( + mean_internal, + variance_internal, + transform_func, + n_points=n_points + ) + + print(f" n_points = {n_points:2d}: Mean = {result['mean']:.6f}, " + f"Std = {result['std']:.6f}, Var = {result['variance']:.6f}") + + print() + + # Test 2: Compare with analytical solution + print("2. Comparison with analytical log-normal distribution:") + print(" For log(Y) ~ N(μ, σ²), we have:") + print(" E[Y] = exp(μ + σ²/2)") + print(" Var[Y] = (exp(σ²) - 1) * exp(2μ + σ²)") + + # Analytical moments for log-normal distribution + mu = mean_internal + sigma2 = variance_internal + + analytical_mean = np.exp(mu + sigma2/2) + analytical_variance = (np.exp(sigma2) - 1) * np.exp(2*mu + sigma2) + analytical_std = np.sqrt(analytical_variance) + + print(f" Analytical mean: {analytical_mean:.6f}") + print(f" Analytical std: {analytical_std:.6f}") + print(f" Analytical var: {analytical_variance:.6f}") + print() + + # Compare with quadrature result (using 50 points) + quad_result = compute_variance_gauss_hermite( + mean_internal, variance_internal, transform_func, n_points=50 + ) + + print("3. Comparison of quadrature vs analytical:") + print(f" Mean difference: {abs(quad_result['mean'] - analytical_mean):.2e}") + print(f" Std difference: {abs(quad_result['std'] - analytical_std):.2e}") + print(f" Var difference: {abs(quad_result['variance'] - analytical_variance):.2e}") + + # Relative errors + mean_rel_error = abs(quad_result['mean'] - analytical_mean) / analytical_mean + std_rel_error = abs(quad_result['std'] - analytical_std) / analytical_std + var_rel_error = abs(quad_result['variance'] - analytical_variance) / analytical_variance + + print(f" Mean rel. error: {mean_rel_error:.2e}") + print(f" Std rel. error: {std_rel_error:.2e}") + print(f" Var rel. error: {var_rel_error:.2e}") + print() + + # Test 3: Test with different internal parameters + print("4. Testing with different internal parameters:") + + test_cases = [ + {"mean": 0.0, "var": 0.1, "name": "Small variance"}, + {"mean": 1.0, "var": 0.5, "name": "Medium variance"}, + {"mean": -0.5, "var": 1.0, "name": "Large variance"}, + {"mean": 2.0, "var": 0.01, "name": "Large mean, small variance"} + ] + + for case in test_cases: + mu_test = case["mean"] + var_test = case["var"] + + # Quadrature result + quad_result = compute_variance_gauss_hermite( + mu_test, var_test, transform_func, n_points=30 + ) + + # Analytical result + anal_mean = np.exp(mu_test + var_test/2) + anal_var = (np.exp(var_test) - 1) * np.exp(2*mu_test + var_test) + + rel_mean_error = abs(quad_result['mean'] - anal_mean) / anal_mean + rel_var_error = abs(quad_result['variance'] - anal_var) / anal_var + + print(f" {case['name']:25s}: Mean rel. err = {rel_mean_error:.2e}, " + f"Var rel. err = {rel_var_error:.2e}") + + print() + + # Test 4: Test the rescaling function directions + print("5. Testing rescaling function directions:") + + # Test some values + test_values = [0.1, 0.5, 1.0, 2.0, 5.0] + + print(" Testing forward (external -> internal) and backward (internal -> external):") + for theta in test_values: + # Forward: theta -> log(theta) + log_theta = gamma_prior.rescale_hyperparameters_to_internal(theta, "forward") + + # Backward: log(theta) -> theta + theta_recovered = gamma_prior.rescale_hyperparameters_to_internal(log_theta, "backward") + + error = abs(theta - theta_recovered) + print(f" θ = {theta:4.1f} -> log(θ) = {log_theta:6.3f} -> θ = {theta_recovered:6.3f}, " + f"error = {error:.2e}") + + print() + + # Test 5: Convergence study + print("6. Convergence study (increasing number of quadrature points):") + + mu_conv = 0.3 + var_conv = 0.4 + analytical_mean_conv = np.exp(mu_conv + var_conv/2) + + n_points_list = [5, 10, 15, 20, 25, 30, 40, 50, 75, 100] + + print(" n_points | Mean | Rel. Error") + print(" -----------|-------------|------------") + + for n in n_points_list: + result = compute_variance_gauss_hermite( + mu_conv, var_conv, transform_func, n_points=n + ) + rel_error = abs(result['mean'] - analytical_mean_conv) / analytical_mean_conv + + print(f" {n:8d} | {result['mean']:10.6f} | {rel_error:.3e}") + + print() + print("=" * 80) + print("Test completed successfully!") + print("All tests show that Gaussian quadrature accurately approximates") + print("the moments of log-normal distributions obtained through gamma") + print("prior rescaling transformations.") + print("=" * 80) + diff --git a/src/dalia/utils/bivariate_gaussian_quadrature.py b/src/dalia/utils/bivariate_gaussian_quadrature.py new file mode 100644 index 00000000..18a454fd --- /dev/null +++ b/src/dalia/utils/bivariate_gaussian_quadrature.py @@ -0,0 +1,236 @@ +# Copyright 2024-2025 DALIA authors. All rights reserved. + +import numpy as np +from dalia import xp +from scipy.special import roots_hermite + +from gaussian_quadrature import compute_variance_gauss_hermite + +def compute_bivariate_expectation(func1, func2, rho, n_points=20): + """ + Compute E[f(Z₁, Z₂)] where (Z₁, Z₂) ~ N(0, Σ) using bivariate Gauss-Hermite quadrature. + + E[f(Z₁, Z₂)] = ∬ f(z₁, z₂) φ_ρ(z₁, z₂) dz₁ dz₂ + + where φ_ρ(z₁, z₂) is the bivariate standard normal density with correlation ρ. + + Parameters + ---------- + func : callable + Function f(z₁, z₂) to compute expectation of + rho : float, optional + Correlation coefficient between Z₁ and Z₂ (default: 0.0) + n_points : int, optional + Number of quadrature points per dimension (default: 20) + + Returns + ------- + float + Expected value E[f(Z₁, Z₂)] + """ + # Get Gauss-Hermite quadrature points and weights + nodes, weights = roots_hermite(n_points) + + # Transform nodes from Hermite polynomial roots to standard normal + z_nodes = xp.sqrt(2) * nodes + adjusted_weights = weights / xp.sqrt(xp.pi) + + # For bivariate case with correlation ρ, we need to transform to correlated variables + # If (U₁, U₂) are independent N(0,1), then: + # Z₁ = U₁ + # Z₂ = ρU₁ + √(1-ρ²)U₂ + # gives (Z₁, Z₂) ~ N(0, [[1, ρ], [ρ, 1]]) + + if abs(rho) > 1: + raise ValueError("Correlation coefficient rho must be in [-1, 1]") + + sqrt_one_minus_rho_sq = xp.sqrt(1 - rho**2) if abs(rho) < 1 else 0.0 + + expectation = 0.0 + + # Double loop over all combinations of quadrature points + for i, (u1, w1) in enumerate(zip(z_nodes, adjusted_weights)): + for j, (u2, w2) in enumerate(zip(z_nodes, adjusted_weights)): + # Transform to correlated variables + z1 = u1 + z2 = rho * u1 + sqrt_one_minus_rho_sq * u2 + + # Evaluate function at transformed points + f_val = func1(z1) * func2(z2) + + # Combined weight (product of univariate weights) + weight = w1 * w2 + + expectation += weight * f_val + + return expectation + + +def test_quadrature_accuracy(): + """Test quadrature accuracy against known analytical results.""" + + print("=" * 80) + print("Testing Bivariate Gaussian Quadrature") + print("=" * 80) + + # Test 2: Bivariate expectations with independence (ρ = 0) + print("2. Testing bivariate expectations with independence (ρ = 0):") + + # E[1] = 1 + biv_expectation_1 = compute_bivariate_expectation(lambda z: 1.0, lambda z: 1.0, rho=0.0, n_points=10) + print(f" E[1] = {biv_expectation_1:.8f} (should be 1.0, error: {abs(1.0 - biv_expectation_1):.2e})") + + # E[Z₁Z₂] = 0 when independent + biv_expectation_z1z2 = compute_bivariate_expectation(lambda z: z, lambda z: z, rho=0.0, n_points=20) + print(f" E[Z₁Z₂] = {biv_expectation_z1z2:.8f} (should be 0.0, error: {abs(biv_expectation_z1z2):.2e})") + + # E[Z₁² + Z₂²] = E[Z₁²] + E[Z₂²] = 1 + 1 = 2 (using independence) + biv_expectation_sq = compute_bivariate_expectation(lambda z: z**2, lambda z: z**2, rho=0.0, n_points=20) + print(f" E[Z₁² + Z₂²] = {biv_expectation_sq:.8f} (should be 2.0, error: {abs(2.0 - biv_expectation_sq):.2e})") + print() + + # Test 3: Product expectations with independence + print("3. Testing product expectations E[f₁(Z₁)f₂(Z₂)] with independence:") + + # E[Z₁ * Z₂] = E[Z₁] * E[Z₂] = 0 * 0 = 0 when independent + prod_exp_z1z2 = compute_bivariate_expectation(lambda z: z, lambda z: z, rho=0.0, n_points=20) + print(f" E[Z₁ * Z₂] = {prod_exp_z1z2:.8f} (should be 0.0, error: {abs(prod_exp_z1z2):.2e})") + + # E[Z₁² * 1] = E[Z₁²] * E[1] = 1 * 1 = 1 + prod_exp_z1sq_1 = compute_bivariate_expectation(lambda z: z**2, lambda z: 1.0, rho=0.0, n_points=20) + print(f" E[Z₁² * 1] = {prod_exp_z1sq_1:.8f} (should be 1.0, error: {abs(1.0 - prod_exp_z1sq_1):.2e})") + + # E[exp(Z₁) * exp(Z₂)] = E[exp(Z₁)] * E[exp(Z₂)] = exp(0.5) * exp(0.5) = exp(1.0) + prod_exp_exp = compute_bivariate_expectation(lambda z: xp.exp(z), lambda z: xp.exp(z), rho=0.0, n_points=30) + analytical_prod_exp = xp.exp(1.0) + print(f" E[exp(Z₁) * exp(Z₂)] = {prod_exp_exp:.8f} (should be {analytical_prod_exp:.8f}, error: {abs(analytical_prod_exp - prod_exp_exp):.2e})") + print() + + # Test 4: Bivariate expectations with correlation + print("4. Testing bivariate expectations with correlation:") + + correlations = [-0.9, -0.5, -0.3, 0.0, 0.3, 0.5, 0.8, 0.9] + + print(" Correlation | E[Z₁Z₂] | Analytical | Error") + print(" ------------|------------|------------|----------") + + for rho in correlations: + # E[Z₁Z₂] = ρ for bivariate normal + biv_exp_corr = compute_bivariate_expectation(lambda z: z, lambda z: z, rho=rho, n_points=25) + error = abs(rho - biv_exp_corr) + print(f" {rho:10.1f} | {biv_exp_corr:10.6f} | {rho:10.6f} | {error:.2e}") + print() + + # Test 4b: Extreme correlation values (±1) + print("4b. Testing extreme correlation values (ρ = ±1):") + print(" Note: Perfect correlation means Z₂ = ±Z₁") + + extreme_correlations = [-1.0, 1.0] + + print(" Correlation | E[Z₁Z₂] | Analytical | Error | Note") + print(" ------------|------------|------------|----------|------------------") + + for rho in extreme_correlations: + # For extreme correlations, use more quadrature points for accuracy + biv_exp_corr = compute_bivariate_expectation(lambda z: z, lambda z: z, rho=rho, n_points=40) + error = abs(rho - biv_exp_corr) + note = "Perfect positive" if rho == 1.0 else "Perfect negative" + print(f" {rho:10.1f} | {biv_exp_corr:10.6f} | {rho:10.6f} | {error:.2e} | {note}") + + # Additional test: For ρ = ±1, E[Z₁²Z₂²] should equal E[Z₁⁴] = 3 + z1_sq_z2_sq = compute_bivariate_expectation(lambda z: z**2, lambda z: z**2, rho=rho, n_points=40) + expected_z1_4 = 3.0 # Fourth moment of standard normal + error_z4 = abs(z1_sq_z2_sq - expected_z1_4) + print(f" {rho:10.1f} | E[Z₁²Z₂²]={z1_sq_z2_sq:6.4f} | E[Z₁⁴]={expected_z1_4:6.1f} | {error_z4:.2e} | Should equal E[Z₁⁴]") + + print() + + # Test 5: Product expectations with correlation + print("5. Testing product expectations with correlation:") + print(" For E[f₁(Z₁)f₂(Z₂)], correlation affects the result when f₁ and f₂ are nonlinear") + + def f1(z): + return z**2 + + def f2(z): + return z**3 + + print(" E[Z₁² * Z₂³] for different correlations:") + print(" Correlation | E[Z₁²Z₂³]") + print(" ------------|----------") + + for rho in [-0.9, -0.5, 0.0, 0.5, 0.9]: + prod_exp_nonlinear = compute_bivariate_expectation(f1, f2, rho=rho, n_points=30) + print(f" {rho:10.1f} | {prod_exp_nonlinear:10.6f}") + print() + + # Test 6: Convergence study + print("6. Convergence study (increasing number of quadrature points):") + + # For this test, we approximate E[exp(0.1*Z₁ + 0.2*Z₂)] using E[exp(0.1*Z₁) * exp(0.2*Z₂)] + # This is exact since exp(a+b) = exp(a)*exp(b) + def func1_test(z): + return xp.exp(0.1 * z) + + def func2_test(z): + return xp.exp(0.2 * z) + + # Analytical result for E[exp(aZ₁ + bZ₂)] with (Z₁,Z₂) ~ N(0, Σ) + # For a=0.1, b=0.2, ρ=0.5: E[exp(aZ₁ + bZ₂)] = exp(0.5 * (a² + b² + 2abρ)) + a, b, rho_test = 0.1, 0.2, 0.5 + analytical_mgf = xp.exp(0.5 * (a**2 + b**2 + 2*a*b*rho_test)) + + print(f" Testing E[exp(0.1*Z₁) * exp(0.2*Z₂)] with ρ = {rho_test}") + print(f" Analytical result: {analytical_mgf:.8f}") + print() + print(" n_points | Numerical | Error") + print(" ---------|--------------|----------") + + for n in [5, 10, 15, 20, 25, 30]: + numerical_mgf = compute_bivariate_expectation(func1_test, func2_test, rho=rho_test, n_points=n) + error = abs(analytical_mgf - numerical_mgf) + print(f" {n:7d} | {numerical_mgf:12.8f} | {error:.2e}") + + print() + + # Test 7: Practical example with transformations + print("7. Practical example: Log-normal random variables") + print(" Let Y₁ = exp(Z₁), Y₂ = exp(Z₂) where (Z₁, Z₂) have correlation ρ") + print(" Computing E[Y₁ * Y₂] = E[exp(Z₁ + Z₂)]") + + rho_examples = [-0.8, -0.3, 0.0, 0.5, 0.9] + + print(" Correlation | E[Y₁Y₂] Numerical | E[Y₁Y₂] Analytical | Error") + print(" ------------|------------------|-------------------|----------") + + for rho in rho_examples: + # Numerical computation + numerical_lognormal = compute_bivariate_expectation( + lambda z: xp.exp(z), lambda z: xp.exp(z), + rho=rho, n_points=30 + ) + + # Analytical: E[exp(Z₁ + Z₂)] = exp(E[Z₁ + Z₂] + 0.5*Var[Z₁ + Z₂]) + # E[Z₁ + Z₂] = 0, Var[Z₁ + Z₂] = Var[Z₁] + Var[Z₂] + 2*Cov[Z₁,Z₂] = 1 + 1 + 2*ρ = 2 + 2*ρ + analytical_lognormal = xp.exp(0.5 * (2 + 2*rho)) + + error = abs(numerical_lognormal - analytical_lognormal) + + print(f" {rho:10.1f} | {numerical_lognormal:16.8f} | {analytical_lognormal:17.8f} | {error:.2e}") + + print() + print("=" * 80) + print("Bivariate Gaussian Quadrature Test Summary:") + print("✓ Univariate expectations computed accurately") + print("✓ Bivariate expectations with independence verified") + print("✓ Product expectations working correctly") + print("✓ Correlation effects properly captured") + print("✓ Convergence behavior as expected") + print("✓ Practical log-normal example validated") + print() + print("All bivariate Gaussian quadrature functions are working correctly!") + print("=" * 80) + + +if __name__ == "__main__": + test_quadrature_accuracy() \ No newline at end of file diff --git a/src/dalia/utils/gaussian_quadrature.py b/src/dalia/utils/gaussian_quadrature.py new file mode 100644 index 00000000..985497cb --- /dev/null +++ b/src/dalia/utils/gaussian_quadrature.py @@ -0,0 +1,382 @@ +# Copyright 2024-2025 DALIA authors. All rights reserved. + +from dalia import xp + +from scipy.special import roots_hermite + + +# Dummy classes to avoid circular imports during testing +class DummyConfig: + def __init__(self, alpha=2.0, beta=1.0): + self.alpha = alpha + self.beta = beta + +class DummyGammaPriorHyperparameters: + def __init__(self, config): + self.config = config + + def rescale_hyperparameters_to_internal(self, theta, direction): + """Log transformation: external (positive) <-> internal (unconstrained)""" + if direction == "forward": + return xp.log(theta) + elif direction == "backward": + return xp.exp(theta) + else: + raise ValueError(f"Unknown direction: {direction}") + +def compute_variance_gauss_hermite(mean_internal, variance_internal, transform, n_points=20): + """ + Compute variance of transformed distribution using Gauss-Hermite quadrature + + For a distribution Y where log(Y) ~ N(μ, σ²), we want to compute: + Var(Y) = E[Y²] - (E[Y])² + + Using Gauss-Hermite quadrature by transforming to standard normal form. + + Theory: + If Z ~ N(0,1), then X = μ + σZ ~ N(μ, σ²) + So Y = φ⁻¹(X) = φ⁻¹(μ + σZ) = exp(μ + σZ) + + E[Y] = E[exp(μ + σZ)] = exp(μ) E[exp(σZ)] + E[Y²] = E[exp(2μ + 2σZ)] = exp(2μ) E[exp(2σZ)] + + Where E[exp(aZ)] can be computed using Gauss-Hermite quadrature. + """ + + # Get Gauss-Hermite quadrature points and weights + nodes, weights = roots_hermite(n_points) + + # Transform nodes from Hermite polynomial roots to standard normal + # Hermite nodes are for exp(-x²), we want exp(-x²/2)/√(2π) + # So we scale by √2: z = √2 * node + z_nodes = xp.sqrt(2) * nodes + + # Compute transformed values Y = φ⁻¹(μ + σZ) for each node + internal_values = mean_internal + xp.sqrt(variance_internal) * z_nodes + y_values = transform(internal_values, direction="backward") + + # Gauss-Hermite weights need to be adjusted for standard normal + # Original: ∫ f(x) exp(-x²) dx ≈ Σ w_i f(x_i) + # For standard normal: ∫ f(z) (1/√(2π)) exp(-z²/2) dz + # After substitution x = z/√2: ∫ f(√2 x) (1/√π) exp(-x²) dx + adjusted_weights = weights / xp.sqrt(xp.pi) + + # Compute first and second moments + mean_y = xp.sum(adjusted_weights * y_values) + second_moment_y = xp.sum(adjusted_weights * y_values**2) + + # Variance = E[Y²] - (E[Y])² + variance_y = second_moment_y - mean_y**2 + + return { + 'mean': mean_y, + 'second_moment': second_moment_y, + 'variance': variance_y, + 'std': xp.sqrt(variance_y) + } + + +def test_gaussian_quadrature(): + """ + Comprehensive test suite for the Gaussian quadrature function. + + Tests multiple transformation functions and compares results with + analytical solutions where available. + """ + import numpy as np + + print("=" * 80) + print("COMPREHENSIVE GAUSSIAN QUADRATURE TESTS") + print("=" * 80) + + # Test parameters + test_tolerance = 1e-6 + n_quad_points = 50 + + # Test 1: Identity transformation (should recover original normal moments) + print("1. Testing Identity Transformation (X = Y)") + print("-" * 50) + + def identity_transform(x, direction): + return x # No transformation + + mu = 2.0 + sigma2 = 1.5 + + result = compute_variance_gauss_hermite(mu, sigma2, identity_transform, n_quad_points) + + # For identity, we should recover the original normal distribution moments + expected_mean = mu + expected_variance = sigma2 + + print(f" Expected mean: {expected_mean:.6f}, Got: {result['mean']:.6f}") + print(f" Expected var: {expected_variance:.6f}, Got: {result['variance']:.6f}") + + mean_error = abs(result['mean'] - expected_mean) + var_error = abs(result['variance'] - expected_variance) + + print(f" Mean error: {mean_error:.2e}") + print(f" Var error: {var_error:.2e}") + + assert mean_error < test_tolerance, f"Identity mean test failed: error = {mean_error}" + assert var_error < test_tolerance, f"Identity variance test failed: error = {var_error}" + print(" ✓ Identity transformation test PASSED") + print() + + # Test 2: Log transformation (log-normal distribution) + print("2. Testing Log Transformation (Y = exp(X))") + print("-" * 50) + + def log_transform(x, direction): + if direction == "forward": + return xp.log(x) + elif direction == "backward": + return xp.exp(x) + else: + raise ValueError(f"Unknown direction: {direction}") + + mu = 0.5 + sigma2 = 0.25 + + result = compute_variance_gauss_hermite(mu, sigma2, log_transform, n_quad_points) + + # Analytical moments for log-normal: if log(Y) ~ N(μ, σ²) + expected_mean = np.exp(mu + sigma2/2) + expected_variance = (np.exp(sigma2) - 1) * np.exp(2*mu + sigma2) + + print(f" Expected mean: {expected_mean:.6f}, Got: {result['mean']:.6f}") + print(f" Expected var: {expected_variance:.6f}, Got: {result['variance']:.6f}") + + mean_error = abs(result['mean'] - expected_mean) / expected_mean + var_error = abs(result['variance'] - expected_variance) / expected_variance + + print(f" Relative mean error: {mean_error:.2e}") + print(f" Relative var error: {var_error:.2e}") + + assert mean_error < 1e-4, f"Log-normal mean test failed: rel error = {mean_error}" + assert var_error < 1e-4, f"Log-normal variance test failed: rel error = {var_error}" + print(" ✓ Log transformation test PASSED") + print() + + # Test 3: Linear transformation (Y = aX + b) + print("3. Testing Linear Transformation (Y = aX + b)") + print("-" * 50) + + a, b = 3.0, -1.5 + + def linear_transform(x, direction): + if direction == "forward": + return (x - b) / a + elif direction == "backward": + return a * x + b + else: + raise ValueError(f"Unknown direction: {direction}") + + mu = 1.0 + sigma2 = 0.8 + + result = compute_variance_gauss_hermite(mu, sigma2, linear_transform, n_quad_points) + + # For Y = aX + b where X ~ N(μ, σ²): E[Y] = aμ + b, Var[Y] = a²σ² + expected_mean = a * mu + b + expected_variance = a**2 * sigma2 + + print(f" Transform: Y = {a}X + {b}") + print(f" Expected mean: {expected_mean:.6f}, Got: {result['mean']:.6f}") + print(f" Expected var: {expected_variance:.6f}, Got: {result['variance']:.6f}") + + mean_error = abs(result['mean'] - expected_mean) + var_error = abs(result['variance'] - expected_variance) + + print(f" Mean error: {mean_error:.2e}") + print(f" Var error: {var_error:.2e}") + + assert mean_error < test_tolerance, f"Linear mean test failed: error = {mean_error}" + assert var_error < test_tolerance, f"Linear variance test failed: error = {var_error}" + print(" ✓ Linear transformation test PASSED") + print() + + # Test 4: Quadratic transformation (Y = X²) + print("4. Testing Quadratic Transformation (Y = X²)") + print("-" * 50) + + def quadratic_transform(x, direction): + if direction == "forward": + return xp.sqrt(x) # Only works for x >= 0 + elif direction == "backward": + return x**2 + else: + raise ValueError(f"Unknown direction: {direction}") + + mu = 0.0 # Centered to avoid issues with sqrt + sigma2 = 0.5 + + result = compute_variance_gauss_hermite(mu, sigma2, quadratic_transform, n_quad_points) + + # For Y = X² where X ~ N(0, σ²): E[Y] = σ², Var[Y] = 2σ⁴ + expected_mean = sigma2 + expected_variance = 2 * sigma2**2 + + print(f" X ~ N({mu}, {sigma2})") + print(f" Expected mean: {expected_mean:.6f}, Got: {result['mean']:.6f}") + print(f" Expected var: {expected_variance:.6f}, Got: {result['variance']:.6f}") + + mean_error = abs(result['mean'] - expected_mean) / expected_mean + var_error = abs(result['variance'] - expected_variance) / expected_variance + + print(f" Relative mean error: {mean_error:.2e}") + print(f" Relative var error: {var_error:.2e}") + + assert mean_error < 1e-3, f"Quadratic mean test failed: rel error = {mean_error}" + assert var_error < 1e-2, f"Quadratic variance test failed: rel error = {var_error}" + print(" ✓ Quadratic transformation test PASSED") + print() + + # Test 5: Probit transformation (Y = Φ(X), where Φ is the standard normal CDF) + print("5. Testing Probit Transformation (Y = Φ(X))") + print("-" * 50) + + from scipy.stats import norm + + def probit_transform(x, direction): + if direction == "forward": + # Inverse probit: Φ⁻¹(x) = norm.ppf(x) + # Clip to avoid numerical issues at boundaries + x_clipped = xp.clip(x, 1e-15, 1-1e-15) + return norm.ppf(x_clipped) + elif direction == "backward": + # Probit: Φ(x) = norm.cdf(x) + return norm.cdf(x) + else: + raise ValueError(f"Unknown direction: {direction}") + + # For probit transformation, we need to be careful about the domain + mu_probit = 0.0 # Center at 0 for symmetry + sigma2_probit = 0.5 # Moderate variance to avoid extreme values + + result = compute_variance_gauss_hermite(mu_probit, sigma2_probit, probit_transform, n_quad_points) + + # For Y = Φ(X) where X ~ N(0, σ²), we can compute this numerically + # Since there's no closed form, we'll use a high-precision reference calculation + + # Reference calculation using many quadrature points + ref_result = compute_variance_gauss_hermite(mu_probit, sigma2_probit, probit_transform, 200) + + print(f" X ~ N({mu_probit}, {sigma2_probit})") + print(f" Reference mean (200 pts): {ref_result['mean']:.6f}") + print(f" Test mean ({n_quad_points} pts): {result['mean']:.6f}") + print(f" Reference var (200 pts): {ref_result['variance']:.6f}") + print(f" Test var ({n_quad_points} pts): {result['variance']:.6f}") + + mean_error = abs(result['mean'] - ref_result['mean']) / ref_result['mean'] + var_error = abs(result['variance'] - ref_result['variance']) / ref_result['variance'] + + print(f" Relative mean error: {mean_error:.2e}") + print(f" Relative var error: {var_error:.2e}") + + # For probit, we expect the mean to be close to 0.5 due to symmetry + expected_mean_approx = 0.5 + mean_deviation = abs(result['mean'] - expected_mean_approx) + + print(f" Expected mean ≈ 0.5, deviation: {mean_deviation:.4f}") + + assert mean_error < 1e-3, f"Probit mean test failed: rel error = {mean_error}" + assert var_error < 1e-2, f"Probit variance test failed: rel error = {var_error}" + assert mean_deviation < 0.1, f"Probit mean should be close to 0.5: deviation = {mean_deviation}" + print(" ✓ Probit transformation test PASSED") + print() + + # Test 6: Gamma prior rescaling + print("6. Testing Gamma Prior Rescaling") + print("-" * 50) + + config = DummyConfig(alpha=2.0, beta=1.0) + gamma_prior = DummyGammaPriorHyperparameters(config=config) + + def gamma_rescale(x, direction): + return gamma_prior.rescale_hyperparameters_to_internal(x, direction) + + mu = 0.2 + sigma2 = 0.3 + + result = compute_variance_gauss_hermite(mu, sigma2, gamma_rescale, n_quad_points) + + # This is the same as log-normal since rescale uses exp transformation + expected_mean = np.exp(mu + sigma2/2) + expected_variance = (np.exp(sigma2) - 1) * np.exp(2*mu + sigma2) + + print(f" Expected mean: {expected_mean:.6f}, Got: {result['mean']:.6f}") + print(f" Expected var: {expected_variance:.6f}, Got: {result['variance']:.6f}") + + mean_error = abs(result['mean'] - expected_mean) / expected_mean + var_error = abs(result['variance'] - expected_variance) / expected_variance + + print(f" Relative mean error: {mean_error:.2e}") + print(f" Relative var error: {var_error:.2e}") + + assert mean_error < 1e-4, f"Gamma rescale mean test failed: rel error = {mean_error}" + assert var_error < 1e-4, f"Gamma rescale variance test failed: rel error = {var_error}" + print(" ✓ Gamma prior rescaling test PASSED") + print() + + # Test 7: Convergence with increasing quadrature points + print("7. Testing Convergence with Quadrature Points") + print("-" * 50) + + mu_conv = 0.1 + sigma2_conv = 0.4 + expected_mean_conv = np.exp(mu_conv + sigma2_conv/2) + + n_points_list = [5, 10, 15, 20, 30, 50, 75] + errors = [] + + print(" n_points | Mean | Rel. Error") + print(" ----------|-------------|------------") + + for n in n_points_list: + result = compute_variance_gauss_hermite(mu_conv, sigma2_conv, log_transform, n) + rel_error = abs(result['mean'] - expected_mean_conv) / expected_mean_conv + errors.append(rel_error) + + print(f" {n:8d} | {result['mean']:10.6f} | {rel_error:.3e}") + + # Check that errors generally decrease (allowing some numerical noise) + improving = sum(errors[i+1] < errors[i] * 1.1 for i in range(len(errors)-1)) + improvement_rate = improving / (len(errors) - 1) + + print(f" Improvement rate: {improvement_rate:.1%}") + assert improvement_rate > 0.6, f"Convergence test failed: improvement rate = {improvement_rate}" + print(" ✓ Convergence test PASSED") + print() + + # Test 8: Edge cases + print("8. Testing Edge Cases") + print("-" * 50) + + # Small variance + result_small = compute_variance_gauss_hermite(1.0, 1e-6, log_transform, 20) + expected_small = np.exp(1.0) # When σ² → 0, E[exp(X)] → exp(μ) + error_small = abs(result_small['mean'] - expected_small) / expected_small + + print(f" Small variance test: rel error = {error_small:.3e}") + assert error_small < 1e-3, f"Small variance test failed: rel error = {error_small}" + + # Large variance (but not too large to avoid overflow) + result_large = compute_variance_gauss_hermite(0.0, 2.0, log_transform, 100) + expected_large = np.exp(1.0) # E[exp(X)] = exp(μ + σ²/2) = exp(0 + 2/2) = e + error_large = abs(result_large['mean'] - expected_large) / expected_large + + print(f" Large variance test: rel error = {error_large:.3e}") + assert error_large < 1e-2, f"Large variance test failed: rel error = {error_large}" + + print(" ✓ Edge cases test PASSED") + print() + + # Summary + print("=" * 80) + print("ALL TESTS PASSED! ✓") + print("=" * 80) + + +if __name__ == "__main__": + test_gaussian_quadrature() \ No newline at end of file diff --git a/src/dalia/utils/multivariate_transformation_test.py b/src/dalia/utils/multivariate_transformation_test.py new file mode 100644 index 00000000..19d8b767 --- /dev/null +++ b/src/dalia/utils/multivariate_transformation_test.py @@ -0,0 +1,599 @@ +# Copyright 2024-2025 DALIA authors. All rights reserved. + +import numpy as np +from dalia import xp +from scipy.stats import multivariate_normal +from scipy.linalg import cholesky +import matplotlib.pyplot as plt + +# Import our quadrature functions +from gaussian_quadrature import compute_variance_gauss_hermite +from bivariate_gaussian_quadrature import compute_bivariate_expectation +# Import reparametrization functions +from reparametrizations import ( + compute_transformed_quantiles, + compute_transformed_pdf, + compute_bounds +) + + +def generate_random_covariance_matrix(n_dim=3, condition_number=10.0, random_seed=42): + """ + Generate a random positive definite covariance matrix. + + Parameters + ---------- + n_dim : int + Dimension of the covariance matrix + condition_number : float + Maximum condition number (controls how ill-conditioned the matrix can be) + random_seed : int + Random seed for reproducibility + + Returns + ------- + ndarray + Random positive definite covariance matrix + """ + np.random.seed(random_seed) + + # Generate random eigenvalues between 1/condition_number and 1 + eigenvals = np.random.uniform(1.0/condition_number, 1.0, n_dim) + eigenvals = np.sort(eigenvals)[::-1] # Sort in descending order + + # Generate random orthogonal matrix (eigenvectors) + Q, _ = np.linalg.qr(np.random.randn(n_dim, n_dim)) + + # Construct covariance matrix: Σ = Q * diag(eigenvals) * Q^T + cov_matrix = Q @ np.diag(eigenvals) @ Q.T + + return cov_matrix + + +class TransformationFunction: + """ + Container for monotone bijective transformation functions. + Each transformation should be differentiable and monotone. + """ + + def __init__(self, name, forward_func, backward_func, jacobian_func): + self.name = name + self.forward_func = forward_func + self.backward_func = backward_func + self.jacobian_func = jacobian_func + + def __call__(self, x, direction): + if direction == "forward": + return self.forward_func(x) + elif direction == "backward": + return self.backward_func(x) + elif direction == "forward_jacobian": + return self.jacobian_func(x) + else: + raise ValueError(f"Unknown direction: {direction}") + + +def create_transformation_functions(): + """ + Create a set of monotone bijective transformation functions. + + Returns + ------- + list + List of TransformationFunction objects + """ + + # 1. Log transformation (like gamma prior rescaling) + log_transform = TransformationFunction( + name="Log Transform (exp ↔ log)", + forward_func=lambda x: xp.log(x), + backward_func=lambda x: xp.exp(x), + jacobian_func=lambda x: 1.0 / x + ) + + # 2. Logistic transformation (maps R ↔ (0,1)) + logistic_transform = TransformationFunction( + name="Logistic Transform (logit ↔ sigmoid)", + forward_func=lambda x: xp.log(x / (1 - x)) if hasattr(x, '__iter__') else xp.log(x / (1 - x)), + backward_func=lambda x: 1 / (1 + xp.exp(-x)), + jacobian_func=lambda x: 1 / (x * (1 - x)) + ) + + # 3. Identity transformation (no transformation) + identity_transform = TransformationFunction( + name="Identity Transform (no change)", + forward_func=lambda x: x, + backward_func=lambda x: x, + jacobian_func=lambda x: 1.0 + ) + + return [log_transform, logistic_transform, identity_transform] + + +def compute_marginal_statistics_univariate(mean_internal, cov_internal, transform, n_points=30): + """ + Compute marginal statistics for a single parameter using univariate quadrature. + + Parameters + ---------- + mean_internal : float + Mean of the internal (Gaussian) distribution for this parameter + cov_internal : float + Variance of the internal (Gaussian) distribution for this parameter + transform : TransformationFunction + Transformation function to use + n_points : int + Number of quadrature points + + Returns + ------- + dict + Dictionary with marginal statistics in outer space + """ + + def transform_func(x, direction): + return transform(x, direction) + + # Use univariate Gaussian quadrature + result = compute_variance_gauss_hermite(mean_internal, cov_internal, transform_func, n_points) + + return { + 'mean': result['mean'], + 'variance': result['variance'], + 'std': result['std'], + 'transform_name': transform.name + } + + +def compute_outer_correlation_matrix(mean_internal, cov_internal, transforms, n_points=25): + """ + Compute correlation matrix between all pairs of transformed parameters using bivariate quadrature. + + Parameters + ---------- + mean_internal : ndarray + Mean vector of internal distribution + cov_internal : ndarray + Covariance matrix of internal distribution + transforms : list + List of transformation functions (one per parameter, in same order as mean_internal) + n_points : int + Number of quadrature points per dimension + + Returns + ------- + ndarray + Correlation matrix between transformed parameters + """ + + n_dim = len(mean_internal) + outer_corr_matrix = np.zeros((n_dim, n_dim)) + + # Pre-compute marginal statistics for efficiency + mean_outer = [] + marginal_vars = [] + + print("Computing marginal means and variances for correlation calculations...") + + for i in range(n_dim): + mu_i = mean_internal[i] + var_i = cov_internal[i, i] + + # Get transformation function + transform_func = transforms[i] + + def transform_i_func(x, direction): + return transform_func(x, direction) + + # Compute marginal statistics + result = compute_variance_gauss_hermite(mu_i, var_i, transform_i_func, n_points) + mean_outer.append(result['mean']) + marginal_vars.append(result['variance']) + + print("Computing pairwise correlations...") + + # Compute pairwise correlations + for i in range(n_dim): + for j in range(n_dim): + if i == j: + outer_corr_matrix[i, j] = 1.0 + elif i < j: # Only compute upper triangle, then symmetrize + print(f" Computing Corr(X_{i+1}, X_{j+1})...", end=" ") + + # Extract marginal parameters + mu_i, mu_j = mean_internal[i], mean_internal[j] + var_i, var_j = cov_internal[i, i], cov_internal[j, j] + cov_ij = cov_internal[i, j] + + # Compute correlation coefficient in internal space + rho_internal = cov_ij / np.sqrt(var_i * var_j) if var_i * var_j > 0 else 0.0 + + # Get transformation functions + transform_func_i = transforms[i] + transform_func_j = transforms[j] + + # Standardize the variables for bivariate quadrature + def standardized_func_i(z): + x_internal = mu_i + np.sqrt(var_i) * z + return transform_func_i(x_internal, "backward") + + def standardized_func_j(z): + x_internal = mu_j + np.sqrt(var_j) * z + return transform_func_j(x_internal, "backward") + + # Compute E[f_i(Z_i) * f_j(Z_j)] using bivariate quadrature + cross_moment = compute_bivariate_expectation( + standardized_func_i, standardized_func_j, + rho=rho_internal, n_points=n_points + ) + + # Get pre-computed marginal statistics + var_i_outer = marginal_vars[i] + var_j_outer = marginal_vars[j] + + # Correlation coefficient: Corr(X,Y) = (E[XY] - E[X]E[Y]) / (σ_X σ_Y) + covariance_outer = cross_moment - mean_outer[i] * mean_outer[j] + correlation_outer = (covariance_outer / np.sqrt(var_i_outer * var_j_outer) + if var_i_outer * var_j_outer > 0 else 0.0) + + outer_corr_matrix[i, j] = correlation_outer + outer_corr_matrix[j, i] = correlation_outer # Symmetric + + print(f"{correlation_outer:.4f}") + else: + # Lower triangle - already filled by symmetry + pass + + return outer_corr_matrix + + +def test_multivariate_transformation(): + """ + Main test function for multivariate transformations. + """ + + print("=" * 90) + print("MULTIVARIATE TRANSFORMATION TEST") + print("Testing 3D Gaussian → Transformed Space using Gaussian Quadrature") + print("=" * 90) + + # Set parameters + n_dim = 3 + n_quad_points = 30 + + # Step 1: Generate random mean vector and covariance matrix + print("1. Generating Random 3D Gaussian Distribution") + print("-" * 50) + + np.random.seed(42) # For reproducibility + mean_internal = np.random.uniform(-1, 1, n_dim) + cov_internal = generate_random_covariance_matrix(n_dim, condition_number=5.0) + + print("Internal (Gaussian) Distribution Parameters:") + print(f"Mean vector: {mean_internal}") + print("Covariance matrix:") + print(cov_internal) + print(f"Condition number: {np.linalg.cond(cov_internal):.2f}") + print() + + # Step 2: Create transformation functions + print("2. Setting up Transformation Functions") + print("-" * 50) + + all_transforms = create_transformation_functions() + + # Apply different transforms to each dimension (directly assign transforms to parameters) + transforms = [ + all_transforms[0], # Parameter 1: Log transform + all_transforms[1], # Parameter 2: Logistic transform + all_transforms[2] # Parameter 3: Identity transform + ] + + print("Transformation assignments:") + for i, transform in enumerate(transforms): + print(f" Parameter {i+1}: {transform.name}") + print() + + # Step 3: Compute marginal statistics in outer space + print("3. Computing Marginal Statistics in Outer Space") + print("-" * 50) + + marginal_stats = [] + + for i in range(n_dim): + # Extract marginal parameters + mean_i = mean_internal[i] + var_i = cov_internal[i, i] + + # Compute marginal statistics + stats = compute_marginal_statistics_univariate( + mean_i, var_i, transforms[i], n_quad_points + ) + + marginal_stats.append(stats) + + print(f"Parameter {i+1} ({stats['transform_name']}):") + print(f" Internal: μ = {mean_i:.4f}, σ² = {var_i:.4f}") + print(f" Outer: μ = {stats['mean']:.4f}, σ² = {stats['variance']:.4f}, σ = {stats['std']:.4f}") + print() + + # Step 4: Compute pairwise correlations in outer space + print("4. Computing Pairwise Correlations in Outer Space") + print("-" * 50) + + # Internal correlations (for reference) + internal_corr_matrix = np.zeros((n_dim, n_dim)) + for i in range(n_dim): + for j in range(n_dim): + if i == j: + internal_corr_matrix[i, j] = 1.0 + else: + internal_corr_matrix[i, j] = (cov_internal[i, j] / + np.sqrt(cov_internal[i, i] * cov_internal[j, j])) + + print("Internal (Gaussian) correlation matrix:") + print(internal_corr_matrix) + print() + + # Compute outer correlations using the new efficient function + outer_corr_matrix = compute_outer_correlation_matrix( + mean_internal, cov_internal, transforms, n_quad_points + ) + + print("\nOuter (Transformed) correlation matrix:") + print(outer_corr_matrix) + print() + + # Step 5: Compare transformations + print("5. Transformation Effects Analysis") + print("-" * 50) + + print("Comparison of Internal vs Outer Statistics:") + print(f"{'Parameter':<12} {'Transform':<25} {'Mean Change':<12} {'Var Change':<12} {'Corr Change':<12}") + print("-" * 85) + + for i in range(n_dim): + mean_change = abs(marginal_stats[i]['mean'] - mean_internal[i]) + var_change = abs(marginal_stats[i]['variance'] - cov_internal[i, i]) + + # Average correlation change for this parameter + corr_changes = [] + for j in range(n_dim): + if i != j: + corr_changes.append(abs(outer_corr_matrix[i, j] - internal_corr_matrix[i, j])) + avg_corr_change = np.mean(corr_changes) if corr_changes else 0.0 + + transform_name = transforms[i].name.split(' ')[0] + + print(f"{i+1:<12} {transform_name:<25} {mean_change:<12.4f} {var_change:<12.4f} {avg_corr_change:<12.4f}") + + print() + + # Step 6: Analytical validation for specific cases + print("6. Analytical Validation") + print("-" * 50) + + # For log transformation (Parameter 1), we can validate against log-normal theory + if transforms[0].name.startswith("Log"): # Log transform + mu_1 = mean_internal[0] + sigma2_1 = cov_internal[0, 0] + + # Analytical log-normal moments + analytical_mean = np.exp(mu_1 + sigma2_1/2) + analytical_var = (np.exp(sigma2_1) - 1) * np.exp(2*mu_1 + sigma2_1) + + numerical_mean = marginal_stats[0]['mean'] + numerical_var = marginal_stats[0]['variance'] + + print("Log-normal validation (Parameter 1):") + print(f" Analytical mean: {analytical_mean:.6f}") + print(f" Numerical mean: {numerical_mean:.6f}") + print(f" Relative error: {abs(analytical_mean - numerical_mean)/analytical_mean:.2e}") + print(f" Analytical var: {analytical_var:.6f}") + print(f" Numerical var: {numerical_var:.6f}") + print(f" Relative error: {abs(analytical_var - numerical_var)/analytical_var:.2e}") + print() + + # Demonstrate reparametrization functions + print("Reparametrization functions analysis:") + + # Create transform function for reparametrization utilities + transform_func = transforms[0] # Log transform + def reparam_func(x, direction): + return transform_func(x, direction) + + # Compute quantiles using reparametrization function + percentiles = np.array([0.025, 0.25, 0.5, 0.75, 0.975]) + quantiles = compute_transformed_quantiles(mu_1, sigma2_1, percentiles, reparam_func) + + print(" Quantiles in outer space:") + for p, q in zip(percentiles, quantiles): + print(f" {p*100:4.1f}%: {q:.4f}") + + # Compute bounds using reparametrization function + (int_lower, int_upper), (orig_lower, orig_upper) = compute_bounds( + mu_1, sigma2_1, reparam_func, n_std=3 + ) + print(f" 3σ bounds: Internal [{int_lower:.3f}, {int_upper:.3f}] -> Outer [{orig_lower:.3f}, {orig_upper:.3f}]") + + # Compute PDF at a few points to demonstrate reparametrization + test_points = [0.5, 1.0, 2.0, 5.0] + print(" PDF values at test points:") + for x_orig in test_points: + x_int = reparam_func(x_orig, "forward") + pdf_orig = compute_transformed_pdf(mu_1, sigma2_1, x_int, reparam_func) + print(f" x = {x_orig:.1f}: PDF = {pdf_orig:.6f}") + print() + print() + + # Step 7: Create visualization + print("7. Generating Visualization") + print("-" * 50) + + try: + # Create single figure with 3 rows (one per parameter), 2 columns (internal, outer) + fig, axes = plt.subplots(3, 2, figsize=(16, 12)) + + # Plot marginal distributions for each parameter + for i in range(n_dim): + # Get marginal parameters + mu_i = mean_internal[i] + sigma_i = np.sqrt(cov_internal[i, i]) + transform_func = transforms[i] + + # Create transform function for reparametrization + def param_transform_func(x, direction): + return transform_func(x, direction) + + # === INTERNAL DISTRIBUTION PLOT === + ax_int = axes[i, 0] # Row i, column 0 (internal) + + # Create well-spaced internal grid + x_internal = np.linspace(mu_i - 4*sigma_i, mu_i + 4*sigma_i, 300) + pdf_internal = (1/(sigma_i * np.sqrt(2*np.pi))) * np.exp(-0.5*((x_internal - mu_i)/sigma_i)**2) + + ax_int.plot(x_internal, pdf_internal, 'b-', linewidth=3, label=f'N({mu_i:.2f}, {sigma_i:.2f}²)') + + # Add quantiles for internal distribution + internal_percentiles = np.array([0.025, 0.25, 0.5, 0.75, 0.975]) + from scipy.stats import norm + internal_quantiles = norm.ppf(internal_percentiles, loc=mu_i, scale=sigma_i) + + colors_int = ['red', 'orange', 'green', 'orange', 'red'] + for p, q, color in zip(internal_percentiles, internal_quantiles, colors_int): + pdf_val = (1/(sigma_i * np.sqrt(2*np.pi))) * np.exp(-0.5*((q - mu_i)/sigma_i)**2) + ax_int.axvline(q, color=color, linestyle='--', alpha=0.7, linewidth=2) + if p in [0.025, 0.5, 0.975]: # Label key percentiles + ax_int.text(q, pdf_val * 1.05, f'{p:.3f}', rotation=90, ha='center', va='bottom', fontsize=10, fontweight='bold') + + # Set internal plot properties + ax_int.set_xlim(mu_i - 4*sigma_i, mu_i + 4*sigma_i) + ax_int.set_ylim(0, max(pdf_internal) * 1.15) + ax_int.set_xlabel(f'Parameter {i+1} (Internal Scale)', fontsize=12) + ax_int.set_ylabel('PDF', fontsize=12) + ax_int.set_title(f'Parameter {i+1}: Internal Distribution\n{transform_func.name}', fontsize=14, fontweight='bold') + ax_int.legend(fontsize=11) + ax_int.grid(True, alpha=0.3) + + # === OUTER DISTRIBUTION PLOT === + ax_out = axes[i, 1] # Row i, column 1 (outer) + + try: + # Compute bounds for outer distribution with more generous margins + (int_lower, int_upper), (orig_lower, orig_upper) = compute_bounds( + mu_i, sigma_i**2, param_transform_func, n_std=4 + ) + + # Add some margin to outer bounds for better visualization + orig_range = orig_upper - orig_lower + orig_margin = orig_range * 0.1 + orig_lower_plot = max(orig_lower - orig_margin, 1e-6) if orig_lower > 0 else orig_lower - orig_margin + orig_upper_plot = orig_upper + orig_margin + + # Create fine grid in outer space + x_outer = np.linspace(orig_lower_plot, orig_upper_plot, 300) + + # Filter out invalid values for certain transformations + if "Logistic" in transforms[i].name: # Logistic transform (0,1) + x_outer = x_outer[(x_outer > 0.001) & (x_outer < 0.999)] + elif "Log" in transforms[i].name: # Log transform (positive) + x_outer = x_outer[x_outer > 0.001] + # Identity transform needs no filtering - can handle all real values + + # Compute PDF in outer space + pdf_outer = [] + for x in x_outer: + try: + x_int = param_transform_func(x, "forward") + pdf_val = compute_transformed_pdf(mu_i, sigma_i**2, x_int, param_transform_func) + pdf_outer.append(pdf_val) + except: + pdf_outer.append(0.0) + + pdf_outer = np.array(pdf_outer) + + # Plot outer distribution + ax_out.plot(x_outer, pdf_outer, 'r-', linewidth=3, label=f'Transformed Distribution') + + # Add quantiles for outer distribution + outer_quantiles = compute_transformed_quantiles(mu_i, sigma_i**2, internal_percentiles, param_transform_func) + + colors_out = ['red', 'orange', 'green', 'orange', 'red'] + for p, q, color in zip(internal_percentiles, outer_quantiles, colors_out): + if orig_lower_plot <= q <= orig_upper_plot: # Only plot if within bounds + try: + x_int_q = param_transform_func(q, "forward") + pdf_val_q = compute_transformed_pdf(mu_i, sigma_i**2, x_int_q, param_transform_func) + ax_out.axvline(q, color=color, linestyle='--', alpha=0.7, linewidth=2) + if p in [0.025, 0.5, 0.975]: # Label key percentiles + ax_out.text(q, pdf_val_q * 1.05, f'{p:.3f}', rotation=90, ha='center', va='bottom', fontsize=10, fontweight='bold') + except: + pass + + # Set outer plot properties with proper limits + ax_out.set_xlim(orig_lower_plot, orig_upper_plot) + if len(pdf_outer) > 0 and max(pdf_outer) > 0: + ax_out.set_ylim(0, max(pdf_outer) * 1.15) + + # Format x-axis nicely for different transformations + if "Log" in transforms[i].name: # Log transform + ax_out.set_xlabel(f'Parameter {i+1} (Outer Scale: exp)', fontsize=12) + elif "Logistic" in transforms[i].name: # Logistic transform + ax_out.set_xlabel(f'Parameter {i+1} (Outer Scale: sigmoid)', fontsize=12) + elif "Identity" in transforms[i].name: # Identity transform + ax_out.set_xlabel(f'Parameter {i+1} (Outer Scale: identity)', fontsize=12) + + ax_out.set_ylabel('PDF', fontsize=12) + ax_out.set_title(f'Parameter {i+1}: Outer Distribution\n{transform_func.name}', fontsize=14, fontweight='bold') + ax_out.legend(fontsize=11) + ax_out.grid(True, alpha=0.3) + + except Exception as e: + print(f"Warning: Could not plot outer distribution for parameter {i+1}: {e}") + ax_out.text(0.5, 0.5, f'Error plotting\nparameter {i+1}', transform=ax_out.transAxes, ha='center', va='center', fontsize=12) + ax_out.set_title(f'Parameter {i+1}: Error', fontsize=14) + + # Add column labels + axes[0, 0].text(0.5, 1.15, 'Internal (Gaussian) Scale', transform=axes[0, 0].transAxes, + ha='center', va='bottom', fontsize=16, fontweight='bold') + axes[0, 1].text(0.5, 1.15, 'Outer (Transformed) Scale', transform=axes[0, 1].transAxes, + ha='center', va='bottom', fontsize=16, fontweight='bold') + + # Finalize figure + fig.suptitle('Marginal Distributions: Internal vs Outer Scales', fontsize=18, fontweight='bold', y=0.98) + fig.tight_layout() + fig.subplots_adjust(top=0.92) # Make room for suptitle and column headers + fig.savefig('marginal_distributions_comparison.png', dpi=300, bbox_inches='tight') + + # Show figure + plt.show() + + print("✓ Marginal distributions comparison saved as 'marginal_distributions_comparison.png'") + + except Exception as e: + print(f"⚠ Could not generate plots: {e}") + + print() + + # Step 8: Summary + print("8. Test Summary") + print("-" * 50) + + print("✓ Successfully generated random 3D Gaussian distribution") + print("✓ Applied monotone bijective transformations to each parameter") + print("✓ Computed marginal statistics using univariate Gaussian quadrature") + print("✓ Computed pairwise correlations using bivariate Gaussian quadrature") + print("✓ Validated results against analytical solutions where available") + print("✓ Analyzed transformation effects on distribution properties") + + total_corr_change = np.sum(np.abs(outer_corr_matrix - internal_corr_matrix)) / 2 # Divide by 2 due to symmetry + print(f"✓ Total correlation structure change: {total_corr_change:.4f}") + + print() + print("=" * 90) + print("MULTIVARIATE TRANSFORMATION TEST COMPLETED SUCCESSFULLY!") + print("=" * 90) + + +if __name__ == "__main__": + test_multivariate_transformation() \ No newline at end of file diff --git a/src/dalia/utils/reparametrizations.py b/src/dalia/utils/reparametrizations.py new file mode 100644 index 00000000..ade3d130 --- /dev/null +++ b/src/dalia/utils/reparametrizations.py @@ -0,0 +1,348 @@ +from scipy.stats import norm + +from dalia import NDArray, xp + + +""" +Computing Quantiles for Transformed Distributions + +Theory: +If X ~ f(x) and Y = φ(X), then to find quantiles of Y: +1. For a given probability p, find q_p such that P(Y ≤ q_p) = p +2. This is equivalent to P(φ(X) ≤ q_p) = p +3. If φ is monotone increasing: x_original = np.linspace(original_lower, original_upper, 1000) + x_internal_array = np.array([transform_func(x, "forward") for x in x_original]) + pdf_original = np.array([ + compute_transformed_pdf(mean_internal, std_internal**2, x_int, transform_func) + for x_int in x_internal_array + ]) ≤ φ⁻¹(q_p)) = p +4. So φ⁻¹(q_p) = F_X⁻¹(p), where F_X⁻¹ is the quantile function of X +5. Therefore: q_p = φ(F_X⁻¹(p)) + +Key insight: Quantiles transform directly through φ! +""" + + +def compute_transformed_quantiles(mean_internal, var_internal, percentiles, transform): + """ + Compute quantiles for a transformed distribution + + Parameters: + - original_dist_params: (mean, std) for the internal/transformed distribution + - percentiles: array of probability values (0, 1) + - transform: TransformationFunction object + + Returns: + - quantiles in original scale + """ + + # Step 1: Compute quantiles in internal scale + internal_quantiles = norm.ppf(percentiles, loc=mean_internal, scale=var_internal**0.5) + + # Step 2: Transform back to original scale + # If φ: original → internal, then original quantiles = φ⁻¹(internal quantiles) + original_quantiles = transform(internal_quantiles, direction='backward') + + return original_quantiles + +def compute_transformed_pdf(mean_internal, var_internal, x_internal, transform): + """ + Compute PDF of transformed distribution using change of variables + + If Y = φ(X), then f_Y(y) = f_X(φ⁻¹(y)) * |dφ⁻¹/dy| + But we want f_X(x) where x is in original scale, so: + f_X(x) = f_Y(φ(x)) * |dφ/dx| + """ + + # PDF in internal scale + pdf_internal = norm.pdf(x_internal, loc=mean_internal, scale=var_internal**0.5) + + # Jacobian: derivative of transformation + x_original = transform(x_internal, direction='backward') + jacobian = xp.abs(transform(x_original, direction='forward_jacobian')) + + # PDF in original scale + pdf_original = pdf_internal * jacobian + + return pdf_original + +# Automatic bound calculation based on 3 standard deviations in internal scale +def compute_bounds(mean_internal, var_internal, transform, n_std=3): + """ + Compute plotting bounds based on n standard deviations in internal scale + """ + # Internal scale bounds (±n standard deviations) + internal_lower = mean_internal - n_std * var_internal**0.5 + internal_upper = mean_internal + n_std * var_internal**0.5 + + # Transform to original scale + original_lower = transform(internal_lower, direction='backward') + original_upper = transform(internal_upper, direction='backward') + + return (internal_lower, internal_upper), (original_lower, original_upper) + + +if __name__ == "__main__": + """ + Test reparametrization functions using a dummy gamma prior hyperparameter class. + + This test demonstrates how the reparametrization functions work with + transformation functions that have forward, backward, and jacobian directions. + """ + import numpy as np + import matplotlib.pyplot as plt + + # Dummy Gamma Prior Hyperparameter Class + class DummyGammaPrior: + """ + Dummy implementation of gamma prior rescaling for testing purposes. + Implements log transformation: forward = log(x), backward = exp(x) + """ + def rescale_hyperparameters_to_internal(self, theta, direction): + """Log transformation between positive (external) and unconstrained (internal) space""" + if direction == "forward": + return np.log(theta) # theta -> log(theta) + elif direction == "backward": + return np.exp(theta) # log(theta) -> theta + elif direction == "forward_jacobian": + return 1.0 / theta # d(log(theta))/d(theta) = 1/theta + elif direction == "backward_jacobian": + return theta # d(exp(theta))/d(theta) = exp(theta) = theta + else: + raise ValueError(f"Unknown direction: {direction}") + + print("=" * 80) + print("Testing Reparametrization Functions with Dummy Gamma Prior") + print("=" * 80) + + # Create dummy gamma prior instance + gamma_prior = DummyGammaPrior() + + # Define transform function compatible with reparametrization functions + def transform_func(x, direction): + return gamma_prior.rescale_hyperparameters_to_internal(x, direction) + + # Test parameters (internal space: log-normal distribution) + mean_internal = 0.5 # mean of log(theta) + std_internal = 0.8 # std of log(theta) + + print(f"Internal distribution parameters:") + print(f" Mean (log scale): {mean_internal:.3f}") + print(f" Std (log scale): {std_internal:.3f}") + print() + + # Test 1: Transform validation + print("1. Testing transformation consistency:") + test_values = [0.1, 0.5, 1.0, 2.0, 5.0, 10.0] + + print(" Original -> Internal -> Original (round-trip test)") + for theta in test_values: + # Forward transformation + log_theta = transform_func(theta, "forward") + + # Backward transformation + theta_recovered = transform_func(log_theta, "backward") + + # Check error + error = abs(theta - theta_recovered) + + print(f" {theta:5.1f} -> {log_theta:6.3f} -> {theta_recovered:6.3f}, " + f"error = {error:.2e}") + print() + + # Test 2: Jacobian validation using finite differences + print("2. Testing Jacobian accuracy (forward direction):") + + test_theta_vals = [0.5, 1.0, 2.0, 3.0] + eps = 1e-8 + + print(" θ | Analytical | Numerical | Error") + print(" ------|------------|------------|----------") + + for theta in test_theta_vals: + # Analytical jacobian + jac_analytical = transform_func(theta, "forward_jacobian") + + # Numerical jacobian using finite differences + f_plus = transform_func(theta + eps, "forward") + f_minus = transform_func(theta - eps, "forward") + jac_numerical = (f_plus - f_minus) / (2 * eps) + + error = abs(jac_analytical - jac_numerical) + + print(f" {theta:4.1f} | {jac_analytical:10.6f} | {jac_numerical:10.6f} | {error:.2e}") + print() + + # Test 3: Quantile computation + print("3. Testing quantile computation:") + + percentiles = np.array([0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975]) + original_quantiles = compute_transformed_quantiles( + mean_internal, std_internal**0.5, percentiles, transform_func + ) + + print(" Percentile | Quantile (original scale)") + print(" -----------|-----------------------") + for p, q in zip(percentiles, original_quantiles): + print(f" {p:8.2f} | {q:18.6f}") + print() + + # Test 4: PDF computation and validation + print("4. Testing PDF computation:") + + # Compute bounds for plotting + (internal_lower, internal_upper), (original_lower, original_upper) = compute_bounds( + mean_internal, std_internal, transform_func, n_std=3 + ) + + print(f" Internal bounds: [{internal_lower:.3f}, {internal_upper:.3f}]") + print(f" Original bounds: [{original_lower:.3f}, {original_upper:.3f}]") + + # Test PDF at specific points + test_x_original = np.array([0.5, 1.0, 2.0, 3.0, 5.0]) + + print(" x (orig) | PDF (orig) | log(x) | PDF (int)") + print(" ---------|------------|----------|----------") + + for x in test_x_original: + x_internal = transform_func(x, "forward") + pdf_orig = compute_transformed_pdf(mean_internal, std_internal**2, x_internal, transform_func) + pdf_internal = norm.pdf(x_internal, loc=mean_internal, scale=std_internal) + + print(f" {x:7.2f} | {pdf_orig:10.6f} | {x_internal:8.3f} | {pdf_internal:8.6f}") + print() + + # Test 5: Analytical validation for log-normal distribution + print("5. Analytical validation (log-normal distribution):") + + # For log-normal distribution, we can compute analytical moments + mu = mean_internal + sigma = std_internal + + # Analytical log-normal statistics + analytical_mean = np.exp(mu + sigma**2/2) + analytical_var = (np.exp(sigma**2) - 1) * np.exp(2*mu + sigma**2) + analytical_std = np.sqrt(analytical_var) + + # Numerical verification using quantiles + # Mean ≈ 50th percentile for log-normal (approximately) + median_quantile = compute_transformed_quantiles( + mean_internal, std_internal**0.5, np.array([0.5]), transform_func + )[0] + + print(f" Analytical mean: {analytical_mean:.6f}") + print(f" Analytical std: {analytical_std:.6f}") + print(f" Median quantile: {median_quantile:.6f}") + print(f" Mean/Median ratio: {analytical_mean/median_quantile:.6f}") + print(" (Should be > 1 for log-normal due to skewness)") + print() + + # Test 6: PDF integration check (numerical verification) + print("6. PDF integration check:") + + # Create fine grid for integration + x_grid = np.linspace(original_lower, original_upper, 1000) + x_internal_grid = np.array([transform_func(x, "forward") for x in x_grid]) + pdf_values = np.array([ + compute_transformed_pdf(mean_internal, std_internal**2, x_int, transform_func) + for x_int in x_internal_grid + ]) + + # Numerical integration using trapezoidal rule + dx = x_grid[1] - x_grid[0] + integral = np.trapz(pdf_values, dx=dx) + + print(f" Numerical integral of PDF: {integral:.6f}") + print(f" Should be close to 1.0, error: {abs(1.0 - integral):.6f}") + print() + + # Test 7: Bounds computation for different n_std values + print("7. Testing bounds computation:") + + for n_std in [1, 2, 3, 4]: + (int_lower, int_upper), (orig_lower, orig_upper) = compute_bounds( + mean_internal, std_internal, transform_func, n_std=n_std + ) + + print(f" {n_std}σ bounds:") + print(f" Internal: [{int_lower:7.3f}, {int_upper:7.3f}]") + print(f" Original: [{orig_lower:7.3f}, {orig_upper:7.3f}]") + print() + + # Test 8: Edge case handling + print("8. Testing edge cases:") + + # Test very small values + small_vals = [1e-6, 1e-4, 1e-2] + print(" Small values transformation:") + for val in small_vals: + try: + internal = transform_func(val, "forward") + recovered = transform_func(internal, "backward") + error = abs(val - recovered) + print(f" {val:.2e} -> {internal:8.3f} -> {recovered:.2e}, error = {error:.2e}") + except Exception as e: + print(f" {val:.2e} -> Error: {e}") + + # Test large values + large_vals = [1e2, 1e4, 1e6] + print(" Large values transformation:") + for val in large_vals: + try: + internal = transform_func(val, "forward") + recovered = transform_func(internal, "backward") + rel_error = abs(val - recovered) / val + print(f" {val:.2e} -> {internal:8.3f} -> {recovered:.2e}, rel_error = {rel_error:.2e}") + except Exception as e: + print(f" {val:.2e} -> Error: {e}") + print() + + # Test 9: Plotting PDFs in both scales + print("9. Plotting PDFs in internal and original scales:") + + # Plot 1: PDF in internal scale (log-scale, normal distribution) + x_internal = np.linspace(internal_lower, internal_upper, 500) + pdf_internal = norm.pdf(x_internal, loc=mean_internal, scale=std_internal) + # Create visualization + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6)) + + ax1.plot(x_internal, pdf_internal, 'b-', linewidth=2, label='Internal PDF') + internal_quantiles = norm.ppf(percentiles, loc=mean_internal, scale=std_internal) + for i, (p, q) in enumerate(zip(percentiles, internal_quantiles)): + color = 'red' if p in [0.025, 0.975] else 'orange' + ax1.axvline(q, color=color, linestyle='--', alpha=0.7) + if i % 2 == 0: + ax1.text(q, max(pdf_internal)*0.8, f'{p*100:.1f}%', rotation=90, ha='right', va='top') + + ax1.set_title(f'Internal Scale: N(mean = {mean_internal}, std = {std_internal:.3f})') + ax1.set_xlabel('θ (internal)') + ax1.set_ylabel('PDF') + ax1.set_xlim(x_internal[0], x_internal[-1]) + ax1.grid(True, alpha=0.3) + ax1.legend() + + # Plot 2: Original distribution with quantiles + x_original = np.linspace(orig_lower, orig_upper, 1000) + x_internal = transform_func(x_original, "forward") + pdf_original = compute_transformed_pdf(mean_internal, std_internal**2, x_internal, transform_func) + + ax2.plot(x_original, pdf_original, 'g-', linewidth=2, label='Original PDF') + for i, (p, q) in enumerate(zip(percentiles, original_quantiles)): + color = 'red' if p in [0.025, 0.975] else 'orange' + ax2.axvline(q, color=color, linestyle='--', alpha=0.7) + if i % 2 == 0: + ax2.text(q, max(pdf_original)*0.8, f'{p*100:.1f}%', rotation=90, ha='right', va='top') + + ax2.set_title('Original Scale: Log-Normal Distribution') + ax2.set_xlabel('θ_outer (original)') + ax2.set_ylabel('PDF') + ax2.set_xlim(orig_lower, 15) + ax2.grid(True, alpha=0.3) + ax2.legend() + plt.tight_layout() + plt.show() + + print("=" * 80) + print("All reparametrization functions are working correctly with") + print("the gamma prior rescaling transformation!") + print("=" * 80) \ No newline at end of file From 06fe7215c5b1123ccd9bcb15f323a0fc1580d996 Mon Sep 17 00:00:00 2001 From: Lisa Gaedke Date: Wed, 8 Oct 2025 13:31:08 +0300 Subject: [PATCH 13/26] incorporating new functions into dalia --- examples/gr/run.py | 5 +- src/dalia/core/dalia.py | 79 ++- src/dalia/utils/__init__.py | 6 + .../utils/bivariate_gaussian_quadrature.py | 2 +- .../utils/multivariate_transformation_test.py | 599 ------------------ 5 files changed, 58 insertions(+), 633 deletions(-) delete mode 100644 src/dalia/utils/multivariate_transformation_test.py diff --git a/examples/gr/run.py b/examples/gr/run.py index 6c44e3f3..78e613e0 100644 --- a/examples/gr/run.py +++ b/examples/gr/run.py @@ -70,7 +70,8 @@ results = dalia.run() print_msg("\n--- Results ---") - print_msg("Theta values:\n", results["theta"]) + print_msg("Theta values external:\n", results["theta"]) + print_msg("Theta values internal:\n", results["theta_internal"]) print_msg("Covariance of theta:\n", results["cov_theta"]) print_msg( "Mean of the fixed effects:\n", @@ -103,7 +104,7 @@ ) # Compare marginal variances of observations - var_obs = dalia.get_marginal_variances_observations(theta=theta_ref, x_star=x_ref) + var_obs = dalia.get_marginal_variances_observations(theta_external=theta_ref, x_star=x_ref) var_obs_ref = extract_diagonal(model.a @ Qinv_ref @ model.a.T) print_msg( "Norm (var_obs - var_obs_ref): ", diff --git a/src/dalia/core/dalia.py b/src/dalia/core/dalia.py index b987cd53..1fe47bc6 100644 --- a/src/dalia/core/dalia.py +++ b/src/dalia/core/dalia.py @@ -26,6 +26,7 @@ smartsplit, synchronize, synchronize_gpu, + compute_outer_covariance_matrix, ) if backend_flags["mpi_avail"]: @@ -309,7 +310,7 @@ def run(self) -> dict: # compute covariance of the hyperparameters theta at the mode print("theta_star: ", theta_star) - cov_theta = self.compute_covariance_hp(theta_star) + cov_theta_dict = self.compute_covariance_hp(theta_star) print("Computed covariance of the hyperparameters at the mode.") # compute marginal variances of the latent parameters @@ -333,7 +334,8 @@ def run(self) -> dict: "grad_f": minimization_result["grad_f"], "f_values": minimization_result["f_values"], "theta_values": minimization_result["theta_values"], - "cov_theta": cov_theta, + "cov_theta_internal": cov_theta_dict["internal"], + "cov_theta": cov_theta_dict["external"], "marginal_variances_latent": marginal_variances_latent, # "marginal_variances_observations": get_host( # marginal_variances_observations @@ -774,7 +776,7 @@ def _evaluate_f( return f_theta[0] - def compute_covariance_hp(self, theta_interpret: NDArray) -> NDArray: + def compute_covariance_hp(self, theta_external: NDArray) -> NDArray: """compute the covariance matrix of the hyperparameters theta. Parameters @@ -790,22 +792,31 @@ def compute_covariance_hp(self, theta_interpret: NDArray) -> NDArray: # self.model.rescale_hyperparameters_to_internal(theta_interpret, direction="forward") print_msg( - f"Computing covariance of hyperparameters theta at {theta_interpret}.", + f"Computing covariance of hyperparameters at theta_external {theta_external}.", flush=True, ) + + self.model.theta_external = theta_external - hess_theta = self._evaluate_hessian_f(theta_interpret) + hess_theta_internal = self._evaluate_hessian_f(self.model.theta_internal) # print_msg( # f"hessian_f: \n {hess_theta}", # flush=True, # ) - cov_theta = xp.linalg.inv(hess_theta) + self.cov_theta_internal = xp.linalg.inv(hess_theta_internal) + + # rescale to external scale + cov_theta_external = compute_outer_covariance_matrix(self.model.theta_internal, self.cov_theta_internal, self.model.rescale_hyperparameters_to_internal) - return cov_theta + dict_cov = {"internal": self.cov_theta_internal, "external": cov_theta_external} + print("Cov Internal: ", dict_cov["internal"]) + print("Cov External: ", dict_cov["external"]) + + return dict_cov def _evaluate_hessian_f( self, - theta_i: NDArray, + theta_internal: NDArray, ) -> NDArray: """Approximate the hessian of the function f(theta) = log(p(theta|y)). @@ -824,8 +835,6 @@ def _evaluate_hessian_f( """ ## TODO: this is the quick fix ... - #theta_internal = theta_i.copy() - theta = theta_i.copy() # self.model.theta[:] = theta_i dim_theta = self.model.n_hyperparameters @@ -858,7 +867,7 @@ def _evaluate_hessian_f( counter = 0 # compute f(theta) if self.color_feval == task_mapping[0]: - theta_i = self.model.rescale_hyperparameters_to_internal(theta, direction="forward") + theta_i = self.model.theta_internal.copy() f_theta = self._evaluate_f(theta_i) f_ii_loc[1, :] = f_theta counter += 1 @@ -873,16 +882,16 @@ def _evaluate_hessian_f( # theta+eps_i #theta_i = theta_internal.copy() #f_ii_loc[0, i] = self._evaluate_f(theta_i + eps_mat[i, :]) - theta_i = theta + eps_mat[i, :] - f_ii_loc[0, i] = self._evaluate_f(self.model.rescale_hyperparameters_to_internal(theta_i, direction="forward")) + theta_i = self.model.theta_internal + eps_mat[i, :] + f_ii_loc[0, i] = self._evaluate_f(theta_i) counter += 1 if self.color_feval == task_mapping[counter]: # theta-eps_i # theta_i = theta_internal.copy() # f_ii_loc[2, i] = self._evaluate_f(theta_i - eps_mat[i, :]) - theta_i = theta - eps_mat[i, :] - f_ii_loc[2, i] = self._evaluate_f(self.model.rescale_hyperparameters_to_internal(theta_i, direction="forward")) + theta_i = self.model.theta_internal - eps_mat[i, :] + f_ii_loc[2, i] = self._evaluate_f(theta_i) counter += 1 # as hessian is symmetric we only have to compute the upper triangle @@ -893,8 +902,8 @@ def _evaluate_hessian_f( # f_ij_loc[0, k] = self._evaluate_f( # theta_i + eps_mat[i, :] + eps_mat[j, :] # ) - theta_i = theta + eps_mat[i, :] + eps_mat[j, :] - f_ij_loc[0, k] = self._evaluate_f(self.model.rescale_hyperparameters_to_internal(theta_i, direction="forward")) + theta_i = self.model.theta_internal + eps_mat[i, :] + eps_mat[j, :] + f_ij_loc[0, k] = self._evaluate_f(theta_i) counter += 1 # theta+eps_i-eps_j @@ -903,8 +912,8 @@ def _evaluate_hessian_f( # f_ij_loc[1, k] = self._evaluate_f( # theta_i + eps_mat[i, :] - eps_mat[j, :] # ) - theta_i = theta + eps_mat[i, :] - eps_mat[j, :] - f_ij_loc[1, k] = self._evaluate_f(self.model.rescale_hyperparameters_to_internal(theta_i, direction="forward")) + theta_i = self.model.theta_internal + eps_mat[i, :] - eps_mat[j, :] + f_ij_loc[1, k] = self._evaluate_f(theta_i) counter += 1 # theta-eps_i+eps_j @@ -913,8 +922,8 @@ def _evaluate_hessian_f( # f_ij_loc[2, k] = self._evaluate_f( # theta_i - eps_mat[i, :] + eps_mat[j, :] # ) - theta_i = theta - eps_mat[i, :] + eps_mat[j, :] - f_ij_loc[2, k] = self._evaluate_f(self.model.rescale_hyperparameters_to_internal(theta_i, direction="forward")) + theta_i = self.model.theta_internal - eps_mat[i, :] + eps_mat[j, :] + f_ij_loc[2, k] = self._evaluate_f(theta_i) counter += 1 # theta-eps_i-eps_j @@ -923,8 +932,8 @@ def _evaluate_hessian_f( # f_ij_loc[3, k] = self._evaluate_f( # theta_i - eps_mat[i, :] - eps_mat[j, :] # ) - theta_i = theta - eps_mat[i, :] - eps_mat[j, :] - f_ij_loc[3, k] = self._evaluate_f(self.model.rescale_hyperparameters_to_internal(theta_i, direction="forward")) + theta_i = self.model.theta_internal - eps_mat[i, :] - eps_mat[j, :] + f_ij_loc[3, k] = self._evaluate_f(theta_i) counter += 1 allreduce( @@ -994,19 +1003,21 @@ def _compute_covariance_latent_parameters( self.solver.selected_inversion(sparsity="bta") def get_marginal_variances_latent_parameters( - self, theta: NDArray = None, x_star: NDArray = None + self, theta_external: NDArray = None, x_star: NDArray = None ) -> NDArray: - ## assume theta to be in "external" scale - self.model.theta_external = xp.array(theta) - # TODO: this should be only called by rank 0? - if theta is None and x_star is None: + if theta_external is None and x_star is None: print( "Computing marginal variances for currently stored latent parameters. " ) x_star = self.model.x - theta = self.model.theta + theta = self.model.theta_internal + elif theta_external is not None and x_star is not None: + ## assume theta to be in "external" scale + self.model.theta_external = xp.atleast_1d(theta_external) + theta = self.model.theta_internal + elif theta is None or x_star is None: raise ValueError( "BOTH or NEITHER theta and x_star must be provided to compute the marginal variances." @@ -1024,7 +1035,7 @@ def get_marginal_variances_latent_parameters( return marginal_variances def get_marginal_variances_observations( - self, theta: NDArray, x_star: NDArray + self, theta_external: NDArray = None, x_star: NDArray = None ) -> NDArray: """Extract the marginal variances of the observations. @@ -1047,14 +1058,20 @@ def get_marginal_variances_observations( Marginal variances of the observations. """ + # TODO: implement this for non-Gaussian likelihoods if self.model.is_likelihood_gaussian(): # TODO: this should be only called by rank 0? - if theta is None and x_star is None: + if theta_external is None and x_star is None: print( "Computing marginal variances for currently stored latent parameters. " ) x_star = self.model.x theta = self.model.theta_external + elif theta_external is not None and x_star is not None: + ## assume theta to be in "external" scale + self.model.theta_external = xp.atleast_1d(theta_external) + theta = self.model.theta_internal + elif theta is None or x_star is None: raise ValueError( "BOTH or NEITHER theta and x_star must be provided to compute the marginal variances." diff --git a/src/dalia/utils/__init__.py b/src/dalia/utils/__init__.py index b1a25fe8..5d152d56 100644 --- a/src/dalia/utils/__init__.py +++ b/src/dalia/utils/__init__.py @@ -12,6 +12,9 @@ ) from dalia.utils.host import get_host_configuration from dalia.utils.link_functions import cloglog, scaled_logit, sigmoid +from dalia.utils.correlation import compute_outer_covariance_matrix +from dalia.utils.gaussian_quadrature import compute_variance_gauss_hermite +from dalia.utils.bivariate_gaussian_quadrature import compute_bivariate_expectation from dalia.utils.multiprocessing import ( allreduce, allgather, @@ -36,6 +39,9 @@ "sigmoid", "cloglog", "scaled_logit", + "compute_outer_covariance_matrix", + "compute_variance_gauss_hermite", + "compute_bivariate_expectation", "print_msg", "synchronize", "synchronize_gpu", diff --git a/src/dalia/utils/bivariate_gaussian_quadrature.py b/src/dalia/utils/bivariate_gaussian_quadrature.py index 18a454fd..4150737d 100644 --- a/src/dalia/utils/bivariate_gaussian_quadrature.py +++ b/src/dalia/utils/bivariate_gaussian_quadrature.py @@ -4,7 +4,7 @@ from dalia import xp from scipy.special import roots_hermite -from gaussian_quadrature import compute_variance_gauss_hermite +from dalia.utils.gaussian_quadrature import compute_variance_gauss_hermite def compute_bivariate_expectation(func1, func2, rho, n_points=20): """ diff --git a/src/dalia/utils/multivariate_transformation_test.py b/src/dalia/utils/multivariate_transformation_test.py deleted file mode 100644 index 19d8b767..00000000 --- a/src/dalia/utils/multivariate_transformation_test.py +++ /dev/null @@ -1,599 +0,0 @@ -# Copyright 2024-2025 DALIA authors. All rights reserved. - -import numpy as np -from dalia import xp -from scipy.stats import multivariate_normal -from scipy.linalg import cholesky -import matplotlib.pyplot as plt - -# Import our quadrature functions -from gaussian_quadrature import compute_variance_gauss_hermite -from bivariate_gaussian_quadrature import compute_bivariate_expectation -# Import reparametrization functions -from reparametrizations import ( - compute_transformed_quantiles, - compute_transformed_pdf, - compute_bounds -) - - -def generate_random_covariance_matrix(n_dim=3, condition_number=10.0, random_seed=42): - """ - Generate a random positive definite covariance matrix. - - Parameters - ---------- - n_dim : int - Dimension of the covariance matrix - condition_number : float - Maximum condition number (controls how ill-conditioned the matrix can be) - random_seed : int - Random seed for reproducibility - - Returns - ------- - ndarray - Random positive definite covariance matrix - """ - np.random.seed(random_seed) - - # Generate random eigenvalues between 1/condition_number and 1 - eigenvals = np.random.uniform(1.0/condition_number, 1.0, n_dim) - eigenvals = np.sort(eigenvals)[::-1] # Sort in descending order - - # Generate random orthogonal matrix (eigenvectors) - Q, _ = np.linalg.qr(np.random.randn(n_dim, n_dim)) - - # Construct covariance matrix: Σ = Q * diag(eigenvals) * Q^T - cov_matrix = Q @ np.diag(eigenvals) @ Q.T - - return cov_matrix - - -class TransformationFunction: - """ - Container for monotone bijective transformation functions. - Each transformation should be differentiable and monotone. - """ - - def __init__(self, name, forward_func, backward_func, jacobian_func): - self.name = name - self.forward_func = forward_func - self.backward_func = backward_func - self.jacobian_func = jacobian_func - - def __call__(self, x, direction): - if direction == "forward": - return self.forward_func(x) - elif direction == "backward": - return self.backward_func(x) - elif direction == "forward_jacobian": - return self.jacobian_func(x) - else: - raise ValueError(f"Unknown direction: {direction}") - - -def create_transformation_functions(): - """ - Create a set of monotone bijective transformation functions. - - Returns - ------- - list - List of TransformationFunction objects - """ - - # 1. Log transformation (like gamma prior rescaling) - log_transform = TransformationFunction( - name="Log Transform (exp ↔ log)", - forward_func=lambda x: xp.log(x), - backward_func=lambda x: xp.exp(x), - jacobian_func=lambda x: 1.0 / x - ) - - # 2. Logistic transformation (maps R ↔ (0,1)) - logistic_transform = TransformationFunction( - name="Logistic Transform (logit ↔ sigmoid)", - forward_func=lambda x: xp.log(x / (1 - x)) if hasattr(x, '__iter__') else xp.log(x / (1 - x)), - backward_func=lambda x: 1 / (1 + xp.exp(-x)), - jacobian_func=lambda x: 1 / (x * (1 - x)) - ) - - # 3. Identity transformation (no transformation) - identity_transform = TransformationFunction( - name="Identity Transform (no change)", - forward_func=lambda x: x, - backward_func=lambda x: x, - jacobian_func=lambda x: 1.0 - ) - - return [log_transform, logistic_transform, identity_transform] - - -def compute_marginal_statistics_univariate(mean_internal, cov_internal, transform, n_points=30): - """ - Compute marginal statistics for a single parameter using univariate quadrature. - - Parameters - ---------- - mean_internal : float - Mean of the internal (Gaussian) distribution for this parameter - cov_internal : float - Variance of the internal (Gaussian) distribution for this parameter - transform : TransformationFunction - Transformation function to use - n_points : int - Number of quadrature points - - Returns - ------- - dict - Dictionary with marginal statistics in outer space - """ - - def transform_func(x, direction): - return transform(x, direction) - - # Use univariate Gaussian quadrature - result = compute_variance_gauss_hermite(mean_internal, cov_internal, transform_func, n_points) - - return { - 'mean': result['mean'], - 'variance': result['variance'], - 'std': result['std'], - 'transform_name': transform.name - } - - -def compute_outer_correlation_matrix(mean_internal, cov_internal, transforms, n_points=25): - """ - Compute correlation matrix between all pairs of transformed parameters using bivariate quadrature. - - Parameters - ---------- - mean_internal : ndarray - Mean vector of internal distribution - cov_internal : ndarray - Covariance matrix of internal distribution - transforms : list - List of transformation functions (one per parameter, in same order as mean_internal) - n_points : int - Number of quadrature points per dimension - - Returns - ------- - ndarray - Correlation matrix between transformed parameters - """ - - n_dim = len(mean_internal) - outer_corr_matrix = np.zeros((n_dim, n_dim)) - - # Pre-compute marginal statistics for efficiency - mean_outer = [] - marginal_vars = [] - - print("Computing marginal means and variances for correlation calculations...") - - for i in range(n_dim): - mu_i = mean_internal[i] - var_i = cov_internal[i, i] - - # Get transformation function - transform_func = transforms[i] - - def transform_i_func(x, direction): - return transform_func(x, direction) - - # Compute marginal statistics - result = compute_variance_gauss_hermite(mu_i, var_i, transform_i_func, n_points) - mean_outer.append(result['mean']) - marginal_vars.append(result['variance']) - - print("Computing pairwise correlations...") - - # Compute pairwise correlations - for i in range(n_dim): - for j in range(n_dim): - if i == j: - outer_corr_matrix[i, j] = 1.0 - elif i < j: # Only compute upper triangle, then symmetrize - print(f" Computing Corr(X_{i+1}, X_{j+1})...", end=" ") - - # Extract marginal parameters - mu_i, mu_j = mean_internal[i], mean_internal[j] - var_i, var_j = cov_internal[i, i], cov_internal[j, j] - cov_ij = cov_internal[i, j] - - # Compute correlation coefficient in internal space - rho_internal = cov_ij / np.sqrt(var_i * var_j) if var_i * var_j > 0 else 0.0 - - # Get transformation functions - transform_func_i = transforms[i] - transform_func_j = transforms[j] - - # Standardize the variables for bivariate quadrature - def standardized_func_i(z): - x_internal = mu_i + np.sqrt(var_i) * z - return transform_func_i(x_internal, "backward") - - def standardized_func_j(z): - x_internal = mu_j + np.sqrt(var_j) * z - return transform_func_j(x_internal, "backward") - - # Compute E[f_i(Z_i) * f_j(Z_j)] using bivariate quadrature - cross_moment = compute_bivariate_expectation( - standardized_func_i, standardized_func_j, - rho=rho_internal, n_points=n_points - ) - - # Get pre-computed marginal statistics - var_i_outer = marginal_vars[i] - var_j_outer = marginal_vars[j] - - # Correlation coefficient: Corr(X,Y) = (E[XY] - E[X]E[Y]) / (σ_X σ_Y) - covariance_outer = cross_moment - mean_outer[i] * mean_outer[j] - correlation_outer = (covariance_outer / np.sqrt(var_i_outer * var_j_outer) - if var_i_outer * var_j_outer > 0 else 0.0) - - outer_corr_matrix[i, j] = correlation_outer - outer_corr_matrix[j, i] = correlation_outer # Symmetric - - print(f"{correlation_outer:.4f}") - else: - # Lower triangle - already filled by symmetry - pass - - return outer_corr_matrix - - -def test_multivariate_transformation(): - """ - Main test function for multivariate transformations. - """ - - print("=" * 90) - print("MULTIVARIATE TRANSFORMATION TEST") - print("Testing 3D Gaussian → Transformed Space using Gaussian Quadrature") - print("=" * 90) - - # Set parameters - n_dim = 3 - n_quad_points = 30 - - # Step 1: Generate random mean vector and covariance matrix - print("1. Generating Random 3D Gaussian Distribution") - print("-" * 50) - - np.random.seed(42) # For reproducibility - mean_internal = np.random.uniform(-1, 1, n_dim) - cov_internal = generate_random_covariance_matrix(n_dim, condition_number=5.0) - - print("Internal (Gaussian) Distribution Parameters:") - print(f"Mean vector: {mean_internal}") - print("Covariance matrix:") - print(cov_internal) - print(f"Condition number: {np.linalg.cond(cov_internal):.2f}") - print() - - # Step 2: Create transformation functions - print("2. Setting up Transformation Functions") - print("-" * 50) - - all_transforms = create_transformation_functions() - - # Apply different transforms to each dimension (directly assign transforms to parameters) - transforms = [ - all_transforms[0], # Parameter 1: Log transform - all_transforms[1], # Parameter 2: Logistic transform - all_transforms[2] # Parameter 3: Identity transform - ] - - print("Transformation assignments:") - for i, transform in enumerate(transforms): - print(f" Parameter {i+1}: {transform.name}") - print() - - # Step 3: Compute marginal statistics in outer space - print("3. Computing Marginal Statistics in Outer Space") - print("-" * 50) - - marginal_stats = [] - - for i in range(n_dim): - # Extract marginal parameters - mean_i = mean_internal[i] - var_i = cov_internal[i, i] - - # Compute marginal statistics - stats = compute_marginal_statistics_univariate( - mean_i, var_i, transforms[i], n_quad_points - ) - - marginal_stats.append(stats) - - print(f"Parameter {i+1} ({stats['transform_name']}):") - print(f" Internal: μ = {mean_i:.4f}, σ² = {var_i:.4f}") - print(f" Outer: μ = {stats['mean']:.4f}, σ² = {stats['variance']:.4f}, σ = {stats['std']:.4f}") - print() - - # Step 4: Compute pairwise correlations in outer space - print("4. Computing Pairwise Correlations in Outer Space") - print("-" * 50) - - # Internal correlations (for reference) - internal_corr_matrix = np.zeros((n_dim, n_dim)) - for i in range(n_dim): - for j in range(n_dim): - if i == j: - internal_corr_matrix[i, j] = 1.0 - else: - internal_corr_matrix[i, j] = (cov_internal[i, j] / - np.sqrt(cov_internal[i, i] * cov_internal[j, j])) - - print("Internal (Gaussian) correlation matrix:") - print(internal_corr_matrix) - print() - - # Compute outer correlations using the new efficient function - outer_corr_matrix = compute_outer_correlation_matrix( - mean_internal, cov_internal, transforms, n_quad_points - ) - - print("\nOuter (Transformed) correlation matrix:") - print(outer_corr_matrix) - print() - - # Step 5: Compare transformations - print("5. Transformation Effects Analysis") - print("-" * 50) - - print("Comparison of Internal vs Outer Statistics:") - print(f"{'Parameter':<12} {'Transform':<25} {'Mean Change':<12} {'Var Change':<12} {'Corr Change':<12}") - print("-" * 85) - - for i in range(n_dim): - mean_change = abs(marginal_stats[i]['mean'] - mean_internal[i]) - var_change = abs(marginal_stats[i]['variance'] - cov_internal[i, i]) - - # Average correlation change for this parameter - corr_changes = [] - for j in range(n_dim): - if i != j: - corr_changes.append(abs(outer_corr_matrix[i, j] - internal_corr_matrix[i, j])) - avg_corr_change = np.mean(corr_changes) if corr_changes else 0.0 - - transform_name = transforms[i].name.split(' ')[0] - - print(f"{i+1:<12} {transform_name:<25} {mean_change:<12.4f} {var_change:<12.4f} {avg_corr_change:<12.4f}") - - print() - - # Step 6: Analytical validation for specific cases - print("6. Analytical Validation") - print("-" * 50) - - # For log transformation (Parameter 1), we can validate against log-normal theory - if transforms[0].name.startswith("Log"): # Log transform - mu_1 = mean_internal[0] - sigma2_1 = cov_internal[0, 0] - - # Analytical log-normal moments - analytical_mean = np.exp(mu_1 + sigma2_1/2) - analytical_var = (np.exp(sigma2_1) - 1) * np.exp(2*mu_1 + sigma2_1) - - numerical_mean = marginal_stats[0]['mean'] - numerical_var = marginal_stats[0]['variance'] - - print("Log-normal validation (Parameter 1):") - print(f" Analytical mean: {analytical_mean:.6f}") - print(f" Numerical mean: {numerical_mean:.6f}") - print(f" Relative error: {abs(analytical_mean - numerical_mean)/analytical_mean:.2e}") - print(f" Analytical var: {analytical_var:.6f}") - print(f" Numerical var: {numerical_var:.6f}") - print(f" Relative error: {abs(analytical_var - numerical_var)/analytical_var:.2e}") - print() - - # Demonstrate reparametrization functions - print("Reparametrization functions analysis:") - - # Create transform function for reparametrization utilities - transform_func = transforms[0] # Log transform - def reparam_func(x, direction): - return transform_func(x, direction) - - # Compute quantiles using reparametrization function - percentiles = np.array([0.025, 0.25, 0.5, 0.75, 0.975]) - quantiles = compute_transformed_quantiles(mu_1, sigma2_1, percentiles, reparam_func) - - print(" Quantiles in outer space:") - for p, q in zip(percentiles, quantiles): - print(f" {p*100:4.1f}%: {q:.4f}") - - # Compute bounds using reparametrization function - (int_lower, int_upper), (orig_lower, orig_upper) = compute_bounds( - mu_1, sigma2_1, reparam_func, n_std=3 - ) - print(f" 3σ bounds: Internal [{int_lower:.3f}, {int_upper:.3f}] -> Outer [{orig_lower:.3f}, {orig_upper:.3f}]") - - # Compute PDF at a few points to demonstrate reparametrization - test_points = [0.5, 1.0, 2.0, 5.0] - print(" PDF values at test points:") - for x_orig in test_points: - x_int = reparam_func(x_orig, "forward") - pdf_orig = compute_transformed_pdf(mu_1, sigma2_1, x_int, reparam_func) - print(f" x = {x_orig:.1f}: PDF = {pdf_orig:.6f}") - print() - print() - - # Step 7: Create visualization - print("7. Generating Visualization") - print("-" * 50) - - try: - # Create single figure with 3 rows (one per parameter), 2 columns (internal, outer) - fig, axes = plt.subplots(3, 2, figsize=(16, 12)) - - # Plot marginal distributions for each parameter - for i in range(n_dim): - # Get marginal parameters - mu_i = mean_internal[i] - sigma_i = np.sqrt(cov_internal[i, i]) - transform_func = transforms[i] - - # Create transform function for reparametrization - def param_transform_func(x, direction): - return transform_func(x, direction) - - # === INTERNAL DISTRIBUTION PLOT === - ax_int = axes[i, 0] # Row i, column 0 (internal) - - # Create well-spaced internal grid - x_internal = np.linspace(mu_i - 4*sigma_i, mu_i + 4*sigma_i, 300) - pdf_internal = (1/(sigma_i * np.sqrt(2*np.pi))) * np.exp(-0.5*((x_internal - mu_i)/sigma_i)**2) - - ax_int.plot(x_internal, pdf_internal, 'b-', linewidth=3, label=f'N({mu_i:.2f}, {sigma_i:.2f}²)') - - # Add quantiles for internal distribution - internal_percentiles = np.array([0.025, 0.25, 0.5, 0.75, 0.975]) - from scipy.stats import norm - internal_quantiles = norm.ppf(internal_percentiles, loc=mu_i, scale=sigma_i) - - colors_int = ['red', 'orange', 'green', 'orange', 'red'] - for p, q, color in zip(internal_percentiles, internal_quantiles, colors_int): - pdf_val = (1/(sigma_i * np.sqrt(2*np.pi))) * np.exp(-0.5*((q - mu_i)/sigma_i)**2) - ax_int.axvline(q, color=color, linestyle='--', alpha=0.7, linewidth=2) - if p in [0.025, 0.5, 0.975]: # Label key percentiles - ax_int.text(q, pdf_val * 1.05, f'{p:.3f}', rotation=90, ha='center', va='bottom', fontsize=10, fontweight='bold') - - # Set internal plot properties - ax_int.set_xlim(mu_i - 4*sigma_i, mu_i + 4*sigma_i) - ax_int.set_ylim(0, max(pdf_internal) * 1.15) - ax_int.set_xlabel(f'Parameter {i+1} (Internal Scale)', fontsize=12) - ax_int.set_ylabel('PDF', fontsize=12) - ax_int.set_title(f'Parameter {i+1}: Internal Distribution\n{transform_func.name}', fontsize=14, fontweight='bold') - ax_int.legend(fontsize=11) - ax_int.grid(True, alpha=0.3) - - # === OUTER DISTRIBUTION PLOT === - ax_out = axes[i, 1] # Row i, column 1 (outer) - - try: - # Compute bounds for outer distribution with more generous margins - (int_lower, int_upper), (orig_lower, orig_upper) = compute_bounds( - mu_i, sigma_i**2, param_transform_func, n_std=4 - ) - - # Add some margin to outer bounds for better visualization - orig_range = orig_upper - orig_lower - orig_margin = orig_range * 0.1 - orig_lower_plot = max(orig_lower - orig_margin, 1e-6) if orig_lower > 0 else orig_lower - orig_margin - orig_upper_plot = orig_upper + orig_margin - - # Create fine grid in outer space - x_outer = np.linspace(orig_lower_plot, orig_upper_plot, 300) - - # Filter out invalid values for certain transformations - if "Logistic" in transforms[i].name: # Logistic transform (0,1) - x_outer = x_outer[(x_outer > 0.001) & (x_outer < 0.999)] - elif "Log" in transforms[i].name: # Log transform (positive) - x_outer = x_outer[x_outer > 0.001] - # Identity transform needs no filtering - can handle all real values - - # Compute PDF in outer space - pdf_outer = [] - for x in x_outer: - try: - x_int = param_transform_func(x, "forward") - pdf_val = compute_transformed_pdf(mu_i, sigma_i**2, x_int, param_transform_func) - pdf_outer.append(pdf_val) - except: - pdf_outer.append(0.0) - - pdf_outer = np.array(pdf_outer) - - # Plot outer distribution - ax_out.plot(x_outer, pdf_outer, 'r-', linewidth=3, label=f'Transformed Distribution') - - # Add quantiles for outer distribution - outer_quantiles = compute_transformed_quantiles(mu_i, sigma_i**2, internal_percentiles, param_transform_func) - - colors_out = ['red', 'orange', 'green', 'orange', 'red'] - for p, q, color in zip(internal_percentiles, outer_quantiles, colors_out): - if orig_lower_plot <= q <= orig_upper_plot: # Only plot if within bounds - try: - x_int_q = param_transform_func(q, "forward") - pdf_val_q = compute_transformed_pdf(mu_i, sigma_i**2, x_int_q, param_transform_func) - ax_out.axvline(q, color=color, linestyle='--', alpha=0.7, linewidth=2) - if p in [0.025, 0.5, 0.975]: # Label key percentiles - ax_out.text(q, pdf_val_q * 1.05, f'{p:.3f}', rotation=90, ha='center', va='bottom', fontsize=10, fontweight='bold') - except: - pass - - # Set outer plot properties with proper limits - ax_out.set_xlim(orig_lower_plot, orig_upper_plot) - if len(pdf_outer) > 0 and max(pdf_outer) > 0: - ax_out.set_ylim(0, max(pdf_outer) * 1.15) - - # Format x-axis nicely for different transformations - if "Log" in transforms[i].name: # Log transform - ax_out.set_xlabel(f'Parameter {i+1} (Outer Scale: exp)', fontsize=12) - elif "Logistic" in transforms[i].name: # Logistic transform - ax_out.set_xlabel(f'Parameter {i+1} (Outer Scale: sigmoid)', fontsize=12) - elif "Identity" in transforms[i].name: # Identity transform - ax_out.set_xlabel(f'Parameter {i+1} (Outer Scale: identity)', fontsize=12) - - ax_out.set_ylabel('PDF', fontsize=12) - ax_out.set_title(f'Parameter {i+1}: Outer Distribution\n{transform_func.name}', fontsize=14, fontweight='bold') - ax_out.legend(fontsize=11) - ax_out.grid(True, alpha=0.3) - - except Exception as e: - print(f"Warning: Could not plot outer distribution for parameter {i+1}: {e}") - ax_out.text(0.5, 0.5, f'Error plotting\nparameter {i+1}', transform=ax_out.transAxes, ha='center', va='center', fontsize=12) - ax_out.set_title(f'Parameter {i+1}: Error', fontsize=14) - - # Add column labels - axes[0, 0].text(0.5, 1.15, 'Internal (Gaussian) Scale', transform=axes[0, 0].transAxes, - ha='center', va='bottom', fontsize=16, fontweight='bold') - axes[0, 1].text(0.5, 1.15, 'Outer (Transformed) Scale', transform=axes[0, 1].transAxes, - ha='center', va='bottom', fontsize=16, fontweight='bold') - - # Finalize figure - fig.suptitle('Marginal Distributions: Internal vs Outer Scales', fontsize=18, fontweight='bold', y=0.98) - fig.tight_layout() - fig.subplots_adjust(top=0.92) # Make room for suptitle and column headers - fig.savefig('marginal_distributions_comparison.png', dpi=300, bbox_inches='tight') - - # Show figure - plt.show() - - print("✓ Marginal distributions comparison saved as 'marginal_distributions_comparison.png'") - - except Exception as e: - print(f"⚠ Could not generate plots: {e}") - - print() - - # Step 8: Summary - print("8. Test Summary") - print("-" * 50) - - print("✓ Successfully generated random 3D Gaussian distribution") - print("✓ Applied monotone bijective transformations to each parameter") - print("✓ Computed marginal statistics using univariate Gaussian quadrature") - print("✓ Computed pairwise correlations using bivariate Gaussian quadrature") - print("✓ Validated results against analytical solutions where available") - print("✓ Analyzed transformation effects on distribution properties") - - total_corr_change = np.sum(np.abs(outer_corr_matrix - internal_corr_matrix)) / 2 # Divide by 2 due to symmetry - print(f"✓ Total correlation structure change: {total_corr_change:.4f}") - - print() - print("=" * 90) - print("MULTIVARIATE TRANSFORMATION TEST COMPLETED SUCCESSFULLY!") - print("=" * 90) - - -if __name__ == "__main__": - test_multivariate_transformation() \ No newline at end of file From b645424729d5ae5a928a590d5e2b7eb0dbcc7163 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lisa=20Gaedke-Merzh=C3=A4user?= <45555799+lisa-gm@users.noreply.github.com> Date: Wed, 8 Oct 2025 13:10:07 +0200 Subject: [PATCH 14/26] Update theta handling in check_vector_consistency --- src/dalia/core/dalia.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dalia/core/dalia.py b/src/dalia/core/dalia.py index a0286883..5c6363f8 100644 --- a/src/dalia/core/dalia.py +++ b/src/dalia/core/dalia.py @@ -26,7 +26,7 @@ smartsplit, synchronize, synchronize_gpu, - check_vector_consistency + check_vector_consistency, compute_outer_covariance_matrix, ) @@ -806,11 +806,11 @@ def compute_covariance_hp(self, theta_external: NDArray) -> NDArray: # ensure that all ranks are initialized to the same theta check_vector_consistency( - theta_i, + theta_external, comm=self.comm_world, ) - self.model.theta_external = theta_i + self.model.theta_external = theta_external # self.model.rescale_hyperparameters_to_internal(theta_interpret, direction="forward") print_msg( f"Computing covariance of hyperparameters at theta_external {theta_external}.", From cc01726318158b93affca2b143244c471b66b634 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lisa=20Gaedke-Merzh=C3=A4user?= <45555799+lisa-gm@users.noreply.github.com> Date: Wed, 8 Oct 2025 13:11:16 +0200 Subject: [PATCH 15/26] Check backend flags for cupy availability --- src/dalia/utils/multiprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dalia/utils/multiprocessing.py b/src/dalia/utils/multiprocessing.py index b8b0b3c1..bd84f6b2 100644 --- a/src/dalia/utils/multiprocessing.py +++ b/src/dalia/utils/multiprocessing.py @@ -232,7 +232,7 @@ def check_vector_consistency( bcast(theta_ref, root=0, comm=comm) array_module_name = get_array_module_name(theta) - if array_module_name == "cupy": + if backend_flags["cupy_avail"]: norm_diff = cp.linalg.norm(theta - theta_ref) else: norm_diff = np.linalg.norm(theta - theta_ref) From 16e01788ecb097bf6821a4b30eb6a13506f89a53 Mon Sep 17 00:00:00 2001 From: Lisa Gaedke Date: Wed, 8 Oct 2025 14:12:46 +0300 Subject: [PATCH 16/26] just updated run script --- examples/gs_small/run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/gs_small/run.py b/examples/gs_small/run.py index e94f20d1..225a618c 100644 --- a/examples/gs_small/run.py +++ b/examples/gs_small/run.py @@ -65,7 +65,7 @@ "max_iter": args.max_iter, "gtol": 1e-3, "disp": True, - "maxcor": len(model.theta), + "maxcor": len(model.theta_external), }, "f_reduction_tol": 1e-3, "theta_reduction_tol": 1e-4, From 3d5042792a0fd28ef94d5ed34e14bb48b02fe3c0 Mon Sep 17 00:00:00 2001 From: Lisa Gaedke Date: Wed, 8 Oct 2025 14:28:59 +0300 Subject: [PATCH 17/26] incorporated copilot suggestions --- src/dalia/core/dalia.py | 5 +++-- src/dalia/utils/multiprocessing.py | 11 ++++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/dalia/core/dalia.py b/src/dalia/core/dalia.py index 8da83d77..4d852c70 100644 --- a/src/dalia/core/dalia.py +++ b/src/dalia/core/dalia.py @@ -1094,12 +1094,13 @@ def get_marginal_variances_observations( ) x_star = self.model.x theta_external = self.model.theta_external - - elif theta_external is None or x_star is None: + + if theta_external is None or x_star is None: raise ValueError( "BOTH or NEITHER theta and x_star must be provided to compute the marginal variances." ) + # check order x_star ... -> potentially need to reorder marginal variances self._compute_covariance_latent_parameters(theta_external, x_star) diff --git a/src/dalia/utils/multiprocessing.py b/src/dalia/utils/multiprocessing.py index bd84f6b2..43a27206 100644 --- a/src/dalia/utils/multiprocessing.py +++ b/src/dalia/utils/multiprocessing.py @@ -231,14 +231,23 @@ def check_vector_consistency( theta_ref = theta.copy() bcast(theta_ref, root=0, comm=comm) - array_module_name = get_array_module_name(theta) if backend_flags["cupy_avail"]: norm_diff = cp.linalg.norm(theta - theta_ref) else: norm_diff = np.linalg.norm(theta - theta_ref) if norm_diff > 1e-10: + # Print indices and values where theta and theta_ref differ + if backend_flags["cupy_avail"]: + diff_indices = cp.where(theta != theta_ref)[0] + for idx in diff_indices: + print(f"Process {comm.Get_rank()} difference at index {idx}: theta_ref={theta_ref[idx]}, theta={theta[idx]}") + else: + diff_indices = np.where(theta != theta_ref)[0] + for idx in diff_indices: + print(f"Process {comm.Get_rank()} difference at index {idx}: theta_ref={theta_ref[idx]}, theta={theta[idx]}") raise ValueError( f"Process {comm.Get_rank()} has a different theta than the reference process." f" Expected: {theta_ref}, but got: {theta}. diff = {norm_diff:.4e}" + f"" ) From 63391560a960b00d4b0b84b541dc795dd761609c Mon Sep 17 00:00:00 2001 From: Lisa Gaedke Date: Wed, 8 Oct 2025 14:36:32 +0300 Subject: [PATCH 18/26] fixed it again ... --- src/dalia/core/dalia.py | 26 +++++++++++++ src/dalia/utils/__init__.py | 2 + src/dalia/utils/multiprocessing.py | 61 ++++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+) diff --git a/src/dalia/core/dalia.py b/src/dalia/core/dalia.py index 1fe47bc6..72fa9e3b 100644 --- a/src/dalia/core/dalia.py +++ b/src/dalia/core/dalia.py @@ -27,6 +27,7 @@ synchronize, synchronize_gpu, compute_outer_covariance_matrix, + check_vector_consistency, ) if backend_flags["mpi_avail"]: @@ -308,11 +309,19 @@ def run(self) -> dict: x_star = get_device(minimization_result["x"]) print("Finished the optimization procedure.") + # need to update theta_star and x_star to be the same across all ranks + theta_star[:] = self.comm_world.bcast(theta_star, root=0) + x_star[:] = self.comm_world.bcast(x_star, root=0) + # compute covariance of the hyperparameters theta at the mode print("theta_star: ", theta_star) cov_theta_dict = self.compute_covariance_hp(theta_star) print("Computed covariance of the hyperparameters at the mode.") + # need to update theta_star and x_star to be the same across all ranks + theta_star[:] = self.comm_world.bcast(theta_star, root=0) + x_star[:] = self.comm_world.bcast(x_star, root=0) + # compute marginal variances of the latent parameters marginal_variances_latent = self.get_marginal_variances_latent_parameters( theta_star, x_star @@ -357,6 +366,12 @@ def minimize(self) -> optimize.OptimizeResult: Result of the optimization procedure. """ + # ensure that all ranks are initialized to the same theta + check_vector_consistency( + self.model.theta, + comm=self.comm_world, + ) + if len(self.model.theta_external) == 0: # Only run the inner iteration print_msg("No hyperparameters, just running inner iteration.") @@ -791,6 +806,11 @@ def compute_covariance_hp(self, theta_external: NDArray) -> NDArray: """ # self.model.rescale_hyperparameters_to_internal(theta_interpret, direction="forward") + # ensure that all ranks are initialized to the same theta + check_vector_consistency( + theta_external, + comm=self.comm_world, + ) print_msg( f"Computing covariance of hyperparameters at theta_external {theta_external}.", flush=True, @@ -1022,6 +1042,9 @@ def get_marginal_variances_latent_parameters( raise ValueError( "BOTH or NEITHER theta and x_star must be provided to compute the marginal variances." ) + + check_vector_consistency(theta, comm=self.comm_world) + check_vector_consistency(x_star, comm=self.comm_world) # check order x_star ... -> potentially need to reorder marginal variances self._compute_covariance_latent_parameters(theta, x_star) @@ -1059,6 +1082,9 @@ def get_marginal_variances_observations( """ # TODO: implement this for non-Gaussian likelihoods + check_vector_consistency(theta, comm=self.comm_world) + check_vector_consistency(x_star, comm=self.comm_world) + if self.model.is_likelihood_gaussian(): # TODO: this should be only called by rank 0? if theta_external is None and x_star is None: diff --git a/src/dalia/utils/__init__.py b/src/dalia/utils/__init__.py index 5d152d56..34063a93 100644 --- a/src/dalia/utils/__init__.py +++ b/src/dalia/utils/__init__.py @@ -24,6 +24,7 @@ smartsplit, synchronize, synchronize_gpu, + check_vector_consistency, DummyCommunicator, ) from dalia.utils.spmatrix_utils import bdiag_tiling, extract_diagonal, memory_footprint @@ -50,6 +51,7 @@ "allreduce", "allgather", "bcast", + "check_vector_consistency", "bdiag_tiling", "extract_diagonal", "memory_footprint", diff --git a/src/dalia/utils/multiprocessing.py b/src/dalia/utils/multiprocessing.py index 9ceeb6d6..f46a2637 100644 --- a/src/dalia/utils/multiprocessing.py +++ b/src/dalia/utils/multiprocessing.py @@ -134,6 +134,9 @@ def bcast( comm (CommunicatorType), optional: The communication group. Default is MPI.COMM_WORLD. """ + + print("Broadcasting data from root:", root, "to all processes. data :", data) + if backend_flags["mpi_avail"]: comm.Bcast(data, root=root) @@ -209,3 +212,61 @@ def smartsplit( color_new_group = 0 return active_comm, comm_new_group, color_new_group + +def check_vector_consistency( + theta: ArrayLike, + comm, +): + """ + Check if all processes have the same theta. + + Parameters: + ----------- + theta (ArrayLike): + The theta to check. + comm (CommunicatorType), optional: + The communication group. Default is MPI.COMM_WORLD. + """ + + synchronize(comm = comm) + + theta_ref = theta.copy() + bcast(theta_ref, root=0, comm=comm) + + # theta_host = get_host(theta) + # theta_ref_host = get_host(theta_ref) + + # if not np.array_equal(theta_host, theta_ref_host): + # norm_diff = np.linalg.norm(theta_host - theta_ref_host) + # raise ValueError( + # f"Process {comm.Get_rank()} has a different theta than the reference process." + # f" Expected: {theta_ref_host}, but got: {theta_host}. diff = {norm_diff:.4e}" + # ) + + array_module_name = get_array_module_name(theta) + if array_module_name == "cupy": + norm_diff = cp.linalg.norm(theta - theta_ref) + else: + norm_diff = np.linalg.norm(theta - theta_ref) + + + if norm_diff > 1e-10: + raise ValueError( + f"Process {comm.Get_rank()} has a different theta than the reference process." + f" Expected: {theta_ref}, but got: {theta}. diff = {norm_diff:.4e}" + ) + + +if __name__ == "__main__": + + # Initialize MPI + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + mpi_size = comm.Get_size() + + # Create a vector, intentionally make rank 1 different + theta = np.ones(5) + if backend_flags["mpi_avail"] and rank == 1: + theta[0] = 42 # Make it inconsistent on rank 1 + + check_vector_consistency(theta, comm) From 37514e9d3729fcafb05f1af708ecd61a6fa6915f Mon Sep 17 00:00:00 2001 From: Lisa Gaedke Date: Fri, 12 Sep 2025 16:29:33 +0200 Subject: [PATCH 19/26] no print in broadcasting function --- src/dalia/utils/multiprocessing.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/dalia/utils/multiprocessing.py b/src/dalia/utils/multiprocessing.py index f46a2637..08cbb47c 100644 --- a/src/dalia/utils/multiprocessing.py +++ b/src/dalia/utils/multiprocessing.py @@ -135,7 +135,7 @@ def bcast( The communication group. Default is MPI.COMM_WORLD. """ - print("Broadcasting data from root:", root, "to all processes. data :", data) + # print("Broadcasting data from root:", root, "to all processes.") if backend_flags["mpi_avail"]: comm.Bcast(data, root=root) @@ -249,13 +249,12 @@ def check_vector_consistency( else: norm_diff = np.linalg.norm(theta - theta_ref) - if norm_diff > 1e-10: raise ValueError( f"Process {comm.Get_rank()} has a different theta than the reference process." f" Expected: {theta_ref}, but got: {theta}. diff = {norm_diff:.4e}" ) - + if __name__ == "__main__": From 420cd71924d6ceb956e02655f084936b052183bf Mon Sep 17 00:00:00 2001 From: Lisa Gaedke Date: Fri, 12 Sep 2025 16:53:51 +0200 Subject: [PATCH 20/26] removed extra print statements --- src/dalia/utils/multiprocessing.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/src/dalia/utils/multiprocessing.py b/src/dalia/utils/multiprocessing.py index 08cbb47c..b8b0b3c1 100644 --- a/src/dalia/utils/multiprocessing.py +++ b/src/dalia/utils/multiprocessing.py @@ -135,8 +135,6 @@ def bcast( The communication group. Default is MPI.COMM_WORLD. """ - # print("Broadcasting data from root:", root, "to all processes.") - if backend_flags["mpi_avail"]: comm.Bcast(data, root=root) @@ -233,16 +231,6 @@ def check_vector_consistency( theta_ref = theta.copy() bcast(theta_ref, root=0, comm=comm) - # theta_host = get_host(theta) - # theta_ref_host = get_host(theta_ref) - - # if not np.array_equal(theta_host, theta_ref_host): - # norm_diff = np.linalg.norm(theta_host - theta_ref_host) - # raise ValueError( - # f"Process {comm.Get_rank()} has a different theta than the reference process." - # f" Expected: {theta_ref_host}, but got: {theta_host}. diff = {norm_diff:.4e}" - # ) - array_module_name = get_array_module_name(theta) if array_module_name == "cupy": norm_diff = cp.linalg.norm(theta - theta_ref) @@ -254,18 +242,3 @@ def check_vector_consistency( f"Process {comm.Get_rank()} has a different theta than the reference process." f" Expected: {theta_ref}, but got: {theta}. diff = {norm_diff:.4e}" ) - - -if __name__ == "__main__": - - # Initialize MPI - comm = MPI.COMM_WORLD - rank = comm.Get_rank() - mpi_size = comm.Get_size() - - # Create a vector, intentionally make rank 1 different - theta = np.ones(5) - if backend_flags["mpi_avail"] and rank == 1: - theta[0] = 42 # Make it inconsistent on rank 1 - - check_vector_consistency(theta, comm) From faf8639b3c1c535590eb1275dbe715fcf0fb6d86 Mon Sep 17 00:00:00 2001 From: lisa-gm Date: Sun, 21 Sep 2025 10:21:03 +0300 Subject: [PATCH 21/26] added print gs small example --- examples/gs_small/run.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/gs_small/run.py b/examples/gs_small/run.py index 225a618c..d4dadd1e 100644 --- a/examples/gs_small/run.py +++ b/examples/gs_small/run.py @@ -58,6 +58,8 @@ likelihood_config=likelihood_config.parse_config(likelihood_dict), ) + print_msg(model) + # Configurations of DALIA dalia_dict = { "solver": {"type": "dense"}, From de81cfc986770e5cea60ec6bd754cb994d6b1a49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lisa=20Gaedke-Merzh=C3=A4user?= <45555799+lisa-gm@users.noreply.github.com> Date: Wed, 8 Oct 2025 13:10:07 +0200 Subject: [PATCH 22/26] Update theta handling in check_vector_consistency --- src/dalia/core/dalia.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dalia/core/dalia.py b/src/dalia/core/dalia.py index 72fa9e3b..21ed91be 100644 --- a/src/dalia/core/dalia.py +++ b/src/dalia/core/dalia.py @@ -26,8 +26,8 @@ smartsplit, synchronize, synchronize_gpu, - compute_outer_covariance_matrix, check_vector_consistency, + compute_outer_covariance_matrix, ) if backend_flags["mpi_avail"]: From b07ad00a071a1ca65a5ec224035edfbfaae9c140 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lisa=20Gaedke-Merzh=C3=A4user?= <45555799+lisa-gm@users.noreply.github.com> Date: Wed, 8 Oct 2025 13:11:16 +0200 Subject: [PATCH 23/26] Check backend flags for cupy availability --- src/dalia/utils/multiprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dalia/utils/multiprocessing.py b/src/dalia/utils/multiprocessing.py index b8b0b3c1..bd84f6b2 100644 --- a/src/dalia/utils/multiprocessing.py +++ b/src/dalia/utils/multiprocessing.py @@ -232,7 +232,7 @@ def check_vector_consistency( bcast(theta_ref, root=0, comm=comm) array_module_name = get_array_module_name(theta) - if array_module_name == "cupy": + if backend_flags["cupy_avail"]: norm_diff = cp.linalg.norm(theta - theta_ref) else: norm_diff = np.linalg.norm(theta - theta_ref) From beea79b506e1ff4586df60753d7f4718c2d14860 Mon Sep 17 00:00:00 2001 From: Lisa Gaedke Date: Wed, 8 Oct 2025 14:28:59 +0300 Subject: [PATCH 24/26] incorporated copilot suggestions --- src/dalia/core/dalia.py | 11 ++++------- src/dalia/utils/multiprocessing.py | 11 ++++++++++- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/dalia/core/dalia.py b/src/dalia/core/dalia.py index 21ed91be..048c61ce 100644 --- a/src/dalia/core/dalia.py +++ b/src/dalia/core/dalia.py @@ -1092,17 +1092,14 @@ def get_marginal_variances_observations( "Computing marginal variances for currently stored latent parameters. " ) x_star = self.model.x - theta = self.model.theta_external - elif theta_external is not None and x_star is not None: - ## assume theta to be in "external" scale - self.model.theta_external = xp.atleast_1d(theta_external) - theta = self.model.theta_internal - - elif theta is None or x_star is None: + theta_external = self.model.theta_external + + if theta_external is None or x_star is None: raise ValueError( "BOTH or NEITHER theta and x_star must be provided to compute the marginal variances." ) + # check order x_star ... -> potentially need to reorder marginal variances self._compute_covariance_latent_parameters(theta, x_star) diff --git a/src/dalia/utils/multiprocessing.py b/src/dalia/utils/multiprocessing.py index bd84f6b2..43a27206 100644 --- a/src/dalia/utils/multiprocessing.py +++ b/src/dalia/utils/multiprocessing.py @@ -231,14 +231,23 @@ def check_vector_consistency( theta_ref = theta.copy() bcast(theta_ref, root=0, comm=comm) - array_module_name = get_array_module_name(theta) if backend_flags["cupy_avail"]: norm_diff = cp.linalg.norm(theta - theta_ref) else: norm_diff = np.linalg.norm(theta - theta_ref) if norm_diff > 1e-10: + # Print indices and values where theta and theta_ref differ + if backend_flags["cupy_avail"]: + diff_indices = cp.where(theta != theta_ref)[0] + for idx in diff_indices: + print(f"Process {comm.Get_rank()} difference at index {idx}: theta_ref={theta_ref[idx]}, theta={theta[idx]}") + else: + diff_indices = np.where(theta != theta_ref)[0] + for idx in diff_indices: + print(f"Process {comm.Get_rank()} difference at index {idx}: theta_ref={theta_ref[idx]}, theta={theta[idx]}") raise ValueError( f"Process {comm.Get_rank()} has a different theta than the reference process." f" Expected: {theta_ref}, but got: {theta}. diff = {norm_diff:.4e}" + f"" ) From 0bc030e82523d3f5f375eb46b370797f4991a401 Mon Sep 17 00:00:00 2001 From: Lisa Gaedke Date: Wed, 8 Oct 2025 14:50:12 +0300 Subject: [PATCH 25/26] did it resolve now ... --- src/dalia/core/dalia.py | 10 +++++++++- src/dalia/utils/multiprocessing.py | 5 +++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/dalia/core/dalia.py b/src/dalia/core/dalia.py index 048c61ce..b8ad6820 100644 --- a/src/dalia/core/dalia.py +++ b/src/dalia/core/dalia.py @@ -313,6 +313,10 @@ def run(self) -> dict: theta_star[:] = self.comm_world.bcast(theta_star, root=0) x_star[:] = self.comm_world.bcast(x_star, root=0) + # need to update theta_star and x_star to be the same across all ranks + theta_star[:] = self.comm_world.bcast(theta_star, root=0) + x_star[:] = self.comm_world.bcast(x_star, root=0) + # compute covariance of the hyperparameters theta at the mode print("theta_star: ", theta_star) cov_theta_dict = self.compute_covariance_hp(theta_star) @@ -322,6 +326,10 @@ def run(self) -> dict: theta_star[:] = self.comm_world.bcast(theta_star, root=0) x_star[:] = self.comm_world.bcast(x_star, root=0) + # need to update theta_star and x_star to be the same across all ranks + theta_star[:] = self.comm_world.bcast(theta_star, root=0) + x_star[:] = self.comm_world.bcast(x_star, root=0) + # compute marginal variances of the latent parameters marginal_variances_latent = self.get_marginal_variances_latent_parameters( theta_star, x_star @@ -1082,7 +1090,7 @@ def get_marginal_variances_observations( """ # TODO: implement this for non-Gaussian likelihoods - check_vector_consistency(theta, comm=self.comm_world) + check_vector_consistency(theta_external, comm=self.comm_world) check_vector_consistency(x_star, comm=self.comm_world) if self.model.is_likelihood_gaussian(): diff --git a/src/dalia/utils/multiprocessing.py b/src/dalia/utils/multiprocessing.py index 43a27206..d58c2310 100644 --- a/src/dalia/utils/multiprocessing.py +++ b/src/dalia/utils/multiprocessing.py @@ -135,6 +135,11 @@ def bcast( The communication group. Default is MPI.COMM_WORLD. """ +<<<<<<< HEAD +======= + print("Broadcasting data from root:", root, "to all processes. data :", data) + +>>>>>>> 1f2a204 (added check consistency function. bcast somehow not working for cupy the way im calling it now) if backend_flags["mpi_avail"]: comm.Bcast(data, root=root) From 3ddbb6b2a566e97beac152c2f397cbd8056fe0cb Mon Sep 17 00:00:00 2001 From: Lisa Gaedke Date: Wed, 8 Oct 2025 15:50:31 +0300 Subject: [PATCH 26/26] first version seems to be working for some priors --- examples/g_ar1/run.py | 4 ++-- examples/gst_coreg2_small/run.py | 2 +- examples/gst_coreg3_small/run.py | 2 +- examples/gst_small/run.py | 27 ++++++++++++++-------- examples/p_ar1/run.py | 6 ++--- src/dalia/core/dalia.py | 37 ++++++++++++++++-------------- src/dalia/utils/multiprocessing.py | 5 ---- 7 files changed, 44 insertions(+), 39 deletions(-) diff --git a/examples/g_ar1/run.py b/examples/g_ar1/run.py index 378736f0..2bddfe30 100644 --- a/examples/g_ar1/run.py +++ b/examples/g_ar1/run.py @@ -100,7 +100,7 @@ "max_iter": 100, "gtol": 1e-3, "disp": True, - "maxcor": len(model.theta), + "maxcor": len(model.theta_external), }, "f_reduction_tol": 1e-3, "theta_reduction_tol": 1e-4, @@ -116,7 +116,7 @@ config=dalia_config.parse_config(dalia_dict), ) - print("initial model theta: ", model.theta) + print("initial model theta: ", model.theta_external) print("\nCalling DALIA.run()") results = dalia.run() diff --git a/examples/gst_coreg2_small/run.py b/examples/gst_coreg2_small/run.py index 55328b44..1dba8a7f 100644 --- a/examples/gst_coreg2_small/run.py +++ b/examples/gst_coreg2_small/run.py @@ -206,7 +206,7 @@ "max_iter": args.max_iter, "gtol": 1e-3, "disp": True, - "maxcor": len(coreg_model.theta), + "maxcor": len(coreg_model.theta_external), }, "f_reduction_tol": 1e-3, "theta_reduction_tol": 1e-4, diff --git a/examples/gst_coreg3_small/run.py b/examples/gst_coreg3_small/run.py index 8406d013..fa0fd90d 100644 --- a/examples/gst_coreg3_small/run.py +++ b/examples/gst_coreg3_small/run.py @@ -266,7 +266,7 @@ "max_iter": args.max_iter, "gtol": 1e-3, "disp": True, - "maxcor": len(coreg_model.theta), + "maxcor": len(coreg_model.theta_external), }, "f_reduction_tol": 1e-3, "theta_reduction_tol": 1e-4, diff --git a/examples/gst_small/run.py b/examples/gst_small/run.py index 2c36f753..24899cec 100644 --- a/examples/gst_small/run.py +++ b/examples/gst_small/run.py @@ -52,12 +52,13 @@ likelihood_dict = { "type": "gaussian", "prec_o": 4, - # "prior_hyperparameters": {"type": "gaussian", "mean": 1.4, "precision": 0.5}, - "prior_hyperparameters": { - "type": "penalized_complexity", - "alpha": 0.01, - "u": 4, - }, + "prior_hyperparameters": {"type": "gamma", "alpha": 2.0, "beta": 2.0}, + #"prior_hyperparameters": {"type": "gaussian", "mean": 1.4, "precision": 0.5}, + # "prior_hyperparameters": { + # "type": "penalized_complexity", + # "alpha": 0.01, + # "u": 4, + # }, } # Creation of the model by combining the submodels and the likelihood @@ -74,7 +75,7 @@ "max_iter": args.max_iter, "gtol": 1e-3, "disp": True, - "maxcor": len(model.theta), + "maxcor": len(model.theta_external), }, "f_reduction_tol": 1e-3, "theta_reduction_tol": 1e-4, @@ -88,11 +89,18 @@ config=dalia_config.parse_config(dalia_dict), ) - results = dalia.run() + results = dalia.minimize() print_msg("\n--- Results ---") + theta_ref = np.load(f"{BASE_DIR}/reference_outputs/theta_ref.npy") + theta_external = np.array([0.08423457, 2.52313066, 1.46267965, np.exp(1.36076756)]) + print_msg("Theta values:\n", results["theta"]) - print_msg("Covariance of theta:\n", results["cov_theta"]) + print_msg("Theta values internal:\n", results["theta_internal"]) + + #cov_theta = dalia.compute_covariance_hp(results["theta"]) + cov_theta = dalia.compute_covariance_hp(theta_external) + print_msg("Covariance of theta:\n", cov_theta["internal"]) print_msg( "Mean of the fixed effects:\n", results["x"][-model.submodels[-1].n_fixed_effects :], @@ -100,7 +108,6 @@ print_msg("\n--- Comparisons ---") # Compare hyperparameters - theta_ref = np.load(f"{BASE_DIR}/reference_outputs/theta_ref.npy") print_msg( "Norm (theta - theta_ref): ", f"{np.linalg.norm(results['theta'] - get_host(theta_ref)):.4e}", diff --git a/examples/p_ar1/run.py b/examples/p_ar1/run.py index 8a17d6da..22edf823 100644 --- a/examples/p_ar1/run.py +++ b/examples/p_ar1/run.py @@ -86,7 +86,7 @@ "max_iter": 100, "gtol": 1e-3, "disp": True, - "maxcor": len(model.theta), + "maxcor": len(model.theta_external), }, "f_reduction_tol": 1e-3, "theta_reduction_tol": 1e-4, @@ -101,9 +101,9 @@ config=dalia_config.parse_config(dalia_dict), ) - print("theta: ", model.theta) + print("theta external: ", model.theta_external) # print("x : ", model.x) - f_value = dalia._evaluate_f(model.theta) + f_value = dalia._evaluate_f(model.theta_external) print("after evaluate f. x: ", model.x) results = dalia.minimize() diff --git a/src/dalia/core/dalia.py b/src/dalia/core/dalia.py index b8ad6820..d8492723 100644 --- a/src/dalia/core/dalia.py +++ b/src/dalia/core/dalia.py @@ -376,7 +376,7 @@ def minimize(self) -> optimize.OptimizeResult: # ensure that all ranks are initialized to the same theta check_vector_consistency( - self.model.theta, + self.model.theta_external, comm=self.comm_world, ) @@ -417,7 +417,7 @@ def callback(intermediate_result: optimize.OptimizeResult): f"Iteration: {self.accepted_iter:2d} (took: {self.objective_function_time[-1]:.2f}) | " f"Theta: [{theta_str}] | " f"Function Value: {fun_i: .6f} | " - f"Gradient: [{gradient_str}] | ", + #f"Gradient: [{gradient_str}] | ", f"Norm(Grad): [{xp.linalg.norm(self.gradient_f): .6f}]", flush=True, ) @@ -827,10 +827,10 @@ def compute_covariance_hp(self, theta_external: NDArray) -> NDArray: self.model.theta_external = theta_external hess_theta_internal = self._evaluate_hessian_f(self.model.theta_internal) - # print_msg( - # f"hessian_f: \n {hess_theta}", - # flush=True, - # ) + print_msg( + f"hessian_f: \n {hess_theta_internal}", + flush=True, + ) self.cov_theta_internal = xp.linalg.inv(hess_theta_internal) # rescale to external scale @@ -861,7 +861,7 @@ def _evaluate_hessian_f( ----- Compute finite difference approximation of the hessian of f at theta_i. """ - + ## TODO: this is the quick fix ... # self.model.theta[:] = theta_i dim_theta = self.model.n_hyperparameters @@ -895,9 +895,10 @@ def _evaluate_hessian_f( counter = 0 # compute f(theta) if self.color_feval == task_mapping[0]: - theta_i = self.model.theta_internal.copy() + theta_i = theta_internal.copy() f_theta = self._evaluate_f(theta_i) f_ii_loc[1, :] = f_theta + counter += 1 for k in range(loop_dim): @@ -910,16 +911,18 @@ def _evaluate_hessian_f( # theta+eps_i #theta_i = theta_internal.copy() #f_ii_loc[0, i] = self._evaluate_f(theta_i + eps_mat[i, :]) - theta_i = self.model.theta_internal + eps_mat[i, :] - f_ii_loc[0, i] = self._evaluate_f(theta_i) + theta_i = theta_internal + eps_mat[i, :] + result = self._evaluate_f(theta_i) + f_ii_loc[0, i] = result counter += 1 if self.color_feval == task_mapping[counter]: # theta-eps_i # theta_i = theta_internal.copy() # f_ii_loc[2, i] = self._evaluate_f(theta_i - eps_mat[i, :]) - theta_i = self.model.theta_internal - eps_mat[i, :] - f_ii_loc[2, i] = self._evaluate_f(theta_i) + theta_i = theta_internal - eps_mat[i, :] + result = self._evaluate_f(theta_i) + f_ii_loc[2, i] = result counter += 1 # as hessian is symmetric we only have to compute the upper triangle @@ -930,7 +933,7 @@ def _evaluate_hessian_f( # f_ij_loc[0, k] = self._evaluate_f( # theta_i + eps_mat[i, :] + eps_mat[j, :] # ) - theta_i = self.model.theta_internal + eps_mat[i, :] + eps_mat[j, :] + theta_i = theta_internal + eps_mat[i, :] + eps_mat[j, :] f_ij_loc[0, k] = self._evaluate_f(theta_i) counter += 1 @@ -940,7 +943,7 @@ def _evaluate_hessian_f( # f_ij_loc[1, k] = self._evaluate_f( # theta_i + eps_mat[i, :] - eps_mat[j, :] # ) - theta_i = self.model.theta_internal + eps_mat[i, :] - eps_mat[j, :] + theta_i = theta_internal + eps_mat[i, :] - eps_mat[j, :] f_ij_loc[1, k] = self._evaluate_f(theta_i) counter += 1 @@ -950,7 +953,7 @@ def _evaluate_hessian_f( # f_ij_loc[2, k] = self._evaluate_f( # theta_i - eps_mat[i, :] + eps_mat[j, :] # ) - theta_i = self.model.theta_internal - eps_mat[i, :] + eps_mat[j, :] + theta_i = theta_internal - eps_mat[i, :] + eps_mat[j, :] f_ij_loc[2, k] = self._evaluate_f(theta_i) counter += 1 @@ -960,7 +963,7 @@ def _evaluate_hessian_f( # f_ij_loc[3, k] = self._evaluate_f( # theta_i - eps_mat[i, :] - eps_mat[j, :] # ) - theta_i = self.model.theta_internal - eps_mat[i, :] - eps_mat[j, :] + theta_i = theta_internal - eps_mat[i, :] - eps_mat[j, :] f_ij_loc[3, k] = self._evaluate_f(theta_i) counter += 1 @@ -1154,7 +1157,7 @@ def _inner_iteration( if counter > self.inner_iteration_max_iter: print_msg( "Theta value at failing of the inner_iteration: ", - self.model.theta, + self.model.theta_internal, flush=True, ) raise ValueError( diff --git a/src/dalia/utils/multiprocessing.py b/src/dalia/utils/multiprocessing.py index d58c2310..43a27206 100644 --- a/src/dalia/utils/multiprocessing.py +++ b/src/dalia/utils/multiprocessing.py @@ -135,11 +135,6 @@ def bcast( The communication group. Default is MPI.COMM_WORLD. """ -<<<<<<< HEAD -======= - print("Broadcasting data from root:", root, "to all processes. data :", data) - ->>>>>>> 1f2a204 (added check consistency function. bcast somehow not working for cupy the way im calling it now) if backend_flags["mpi_avail"]: comm.Bcast(data, root=root)