diff --git a/.gitattributes b/.gitattributes index 3e5cf230..23dcd25c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -2,3 +2,4 @@ examples/**/*.npz filter=lfs diff=lfs merge=lfs -text examples/**/*.npy filter=lfs diff=lfs merge=lfs -text examples/**/*.dat filter=lfs diff=lfs merge=lfs -text + diff --git a/.gitignore b/.gitignore index 35e613a9..d19426db 100644 --- a/.gitignore +++ b/.gitignore @@ -171,6 +171,16 @@ runs_sc25/* settings.json *.out +# data files that can easily be generated +examples/brainiac/**/*.npy +examples/brainiac/**/*.npz + +examples/g_ar1/**/*.npy +examples/g_ar1/**/*.npz + +examples/p_ar1/**/*.npy +examples/p_ar1/**/*.npz + # Profiler outputs *.nsys-rep *.qdstrm diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..5320725e --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,32 @@ +@inproceedings{10.1145/3712285.3759832, + author = {Gaedke-Merzh\"{a}user, Lisa and Maillou, Vincent and Rodriguez Avellaneda, Fernando and Schenk, Olaf and Moraga, Paula and Luisier, Mathieu and Ziogas, Alexandros Nikolaos and Rue, H\r{a}vard}, + title = {Accelerated Spatio-Temporal Bayesian Modeling for Multivariate Gaussian Processes}, + year = {2025}, + isbn = {9798400714665}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + url = {https://doi.org/10.1145/3712285.3759832}, + doi = {10.1145/3712285.3759832}, + booktitle = {Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis}, + pages = {949–972}, + numpages = {24}, + keywords = {Large-Scale Bayesian Inference, Spatio-Temporal Modeling, Distributed Memory Computing}, + location = {}, + series = {SC '25} +} + +@inproceedings{11186484, + author = {Maillou, Vincent and Gaedke-Merzhauser, Lisa and Ziogas, Alexandros Nikolaos and Schenk, Olaf and Luisier, Mathieu}, + booktitle = { 2025 IEEE International Conference on Cluster Computing (CLUSTER) }, + title = {{ Parallel Selected Inversion of Block-Tridiagonal with Arrowhead Matrices }}, + year = {2025}, + volume = {}, + ISSN = {}, + pages = {1-12}, + keywords = {Materials science and technology;Temperature distribution;Computational modeling;Graphics processing units;Linear algebra;Predictive models;Libraries;Supercomputers;Sparse matrices;Parallel algorithms}, + doi = {10.1109/CLUSTER59342.2025.11186484}, + url = {https://doi.ieeecomputersociety.org/10.1109/CLUSTER59342.2025.11186484}, + publisher = {IEEE Computer Society}, + address = {Los Alamitos, CA, USA}, + month =sep +} \ No newline at end of file diff --git a/examples/bee_genomics/generate_synthetic_data.py b/examples/bee_genomics/generate_synthetic_data.py new file mode 100644 index 00000000..709f339e --- /dev/null +++ b/examples/bee_genomics/generate_synthetic_data.py @@ -0,0 +1,123 @@ +import os +from pathlib import Path + +import numpy as np +from scipy import sparse + + +BASE_DIR = Path(__file__).resolve().parent / "synthetic_data" + + +def make_balanced_group_indices(n_obs: int, n_groups: int) -> np.ndarray: + """Create near-balanced group assignments and shuffle them.""" + base = np.repeat(np.arange(n_groups), n_obs // n_groups) + remainder = np.arange(n_obs % n_groups) + groups = np.concatenate([base, remainder]) + rng = np.random.default_rng(123) + rng.shuffle(groups) + return groups + + +def main() -> None: + rng = np.random.default_rng(41) + + # Keep model structure identical to run.py, but generate stable synthetic data. + n_obs = 4000 + n_fixed = 10 + n_iid = 40 + n_dense = 40 + + tau_iid_true = 3.0 + tau_dense_true = 10.0 + prec_o_true = 50.0 + + # Directories expected by run.py + inputs_iid_dir = BASE_DIR / "inputs_iid" + inputs_dense_dir = BASE_DIR / "inputs_queenGRMinv" + inputs_fixed_dir = BASE_DIR / "inputs_fixed_effects" + ref_dir = BASE_DIR / "reference_outputs" + + for p in [inputs_iid_dir, inputs_dense_dir, inputs_fixed_dir, ref_dir]: + p.mkdir(parents=True, exist_ok=True) + + # 1) IID generic component + group_idx = make_balanced_group_indices(n_obs, n_iid) + rows = np.arange(n_obs) + cols = group_idx + data = np.ones(n_obs, dtype=float) + a_iid = sparse.coo_matrix((data, (rows, cols)), shape=(n_obs, n_iid)).tocsc() + q_iid = sparse.identity(n_iid, format="csc") + + # True latent effects + u_iid = rng.normal(loc=0.0, scale=np.sqrt(1.0 / tau_iid_true), size=n_iid) + + # 2) Dense generic component: random design and SPD precision matrix. + a_dense = rng.normal(loc=0.0, scale=1.0 / np.sqrt(n_dense), size=(n_obs, n_dense)) + + m = rng.normal(loc=0.0, scale=0.2, size=(n_dense, n_dense)) + q_dense_np = m.T @ m + np.eye(n_dense) + q_dense = sparse.csc_matrix(q_dense_np) + + cov_dense = np.linalg.inv(tau_dense_true * q_dense_np) + l_dense = np.linalg.cholesky(cov_dense) + u_dense = l_dense @ rng.normal(size=n_dense) + + # 3) Regression component: intercept + near-orthonormal covariates. + x_raw = rng.normal(size=(n_obs, n_fixed - 1)) + q_cov, _ = np.linalg.qr(x_raw, mode="reduced") + a_fixed = np.column_stack([np.ones(n_obs), q_cov]) + + # Sparse signal setting: only a few fixed effects are nonzero. + beta_true = np.zeros(n_fixed) + beta_true[0] = 0.5 + n_nonzero = 8 + nonzero_idx = rng.choice(np.arange(1, n_fixed), size=n_nonzero, replace=False) + beta_true[nonzero_idx] = rng.normal(loc=0.0, scale=0.2, size=n_nonzero) + + eta = a_iid @ u_iid + a_dense @ u_dense + a_fixed @ beta_true + y = eta + rng.normal(loc=0.0, scale=np.sqrt(1.0 / prec_o_true), size=n_obs) + + # Save run.py inputs + np.save(BASE_DIR / "y.npy", y) + + sparse.save_npz(inputs_iid_dir / "a.npz", a_iid) + sparse.save_npz(inputs_iid_dir / "q.npz", q_iid) + + np.save(inputs_dense_dir / "a.npy", a_dense) + sparse.save_npz(inputs_dense_dir / "q.npz", q_dense) + + np.save(inputs_fixed_dir / "a.npy", a_fixed) + + # Save consistent reference outputs used by run.py debug checks. + theta_external_true = np.array([tau_iid_true, tau_dense_true, prec_o_true]) + theta_internal_true = np.log(theta_external_true) + x_true = np.concatenate([u_iid, u_dense, beta_true]) + + np.save(ref_dir / "theta_internal.npy", theta_internal_true) + np.save(ref_dir / "theta_external.npy", theta_external_true) + np.save(ref_dir / "x.npy", x_true) + + q_prior = sparse.block_diag( + [ + tau_iid_true * q_iid, + tau_dense_true * q_dense, + 0.001 * sparse.identity(n_fixed), + ], + format="csc", + ) + + a_full = sparse.hstack( + [a_iid, sparse.csc_matrix(a_dense), sparse.csc_matrix(a_fixed)], format="csc" + ) + q_cond = q_prior + prec_o_true * (a_full.T @ a_full) + + sparse.save_npz(ref_dir / "qprior.npz", q_prior) + sparse.save_npz(ref_dir / "qcond.npz", q_cond) + + print("Generated stable synthetic bee_genomics data.") + print(f"n_obs={n_obs}, n_fixed={n_fixed}, n_iid={n_iid}, n_dense={n_dense}") + print(f"Saved y to {BASE_DIR / 'y.npy'}") + + +if __name__ == "__main__": + main() diff --git a/examples/bee_genomics/run.py b/examples/bee_genomics/run.py new file mode 100644 index 00000000..51873d1a --- /dev/null +++ b/examples/bee_genomics/run.py @@ -0,0 +1,167 @@ +import os +import sys + +import numpy as np + +from pathlib import Path + + +from dalia import xp, sp +from dalia.configs import dalia_config, likelihood_config, submodels_config +from dalia.core.dalia import DALIA +from dalia.core.model import Model +from dalia.submodels import GenericSubModel, RegressionSubModel +from dalia.utils import ( + print_msg, +) + +parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +sys.path.append(parent_dir) +from examples_utils.parser_utils import parse_args # noqa: E402 + +# BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +BASE_DIR = Path(__file__).resolve().parent / "synthetic_data" + +if __name__ == "__main__": + print_msg("--- Example: Bee Genomics ---") + # consists of 2 generic submodels (iid + dense component) and 1 regression submodel (covariates + covariates) with a Gaussian likelihood + + # Check for parsed parameters + args = parse_args() + + # Configurations of the generic submodel + generic_dict_iid = { + "type": "generic", + "input_dir": f"{BASE_DIR}/inputs_iid", + "tau": 4, # has to be positive + "ph_tau": {"type": "gamma", "alpha": 1.0, "beta": 1e-1}, + } + generic_iid = GenericSubModel( + config=submodels_config.parse_config(generic_dict_iid), + ) + + # Configurations of the generic submodel + generic_dict_queenGRMinv = { + "type": "generic", + "input_dir": f"{BASE_DIR}/inputs_queenGRMinv", + "ph_tau": {"type": "gamma", "alpha": 1.0, "beta": 1e-1}, + } + generic_queenGRMinv = GenericSubModel( + config=submodels_config.parse_config(generic_dict_queenGRMinv), + ) + + # fixed effects + intercept + regression_dict = { + "type": "regression", + "input_dir": f"{BASE_DIR}/inputs_fixed_effects", + } + regression = RegressionSubModel( + config=submodels_config.parse_config(regression_dict), + ) + + # Likelihood + likelihood_dict = { + "type": "gaussian", + "prior_hyperparameters": {"type": "gamma", "alpha": 1.0, "beta": 1e-1}, + } + # Creation of the first model by combining the Generic submodel and the likelihood + model = Model( + submodels=[generic_iid, generic_queenGRMinv, regression], + likelihood_config=likelihood_config.parse_config(likelihood_dict), + ) + print_msg(model) + + # Configurations of DALIA + dalia_dict = { + "solver": {"type": "dense"}, + } + dalia = DALIA( + model=model, + config=dalia_config.parse_config(dalia_dict), + ) + + results = dalia.run() + + # load theta reference and set theta_internal to reference values + theta_ref_internal = xp.load(f"{BASE_DIR}/reference_outputs/theta_internal.npy") + theta_ref_external = xp.load(f"{BASE_DIR}/reference_outputs/theta_external.npy") + x_ref = xp.load(f"{BASE_DIR}/reference_outputs/x.npy") + + print_msg("\n--- Results ---") + print_msg("Theta values external:\n", results["theta"]) + print_msg("Theta values internal:\n", results["theta_internal"]) + print_msg("Internal Covariance of theta:\n", results["cov_theta_internal"]) + + print_msg("\n--- Comparisons ---") + # Compare hyperparameters + print_msg("Theta reference external:\n", theta_ref_external) + print_msg("Theta values external:\n", results["theta"]) + print_msg( + "Norm (theta external - theta_ref_external): ", + f"{xp.linalg.norm(results['theta'] - theta_ref_external):.4e}", + ) + print_msg( + "Norm (theta internal - theta_ref_internal): ", + f"{xp.linalg.norm(results['theta_internal'] - theta_ref_internal):.4e}", + ) + + # Compare latent parameters + print_msg( + "Norm (x - x_ref)/Norm(x_ref): ", + f"{xp.linalg.norm(results['x'] - x_ref) / xp.linalg.norm(x_ref):.4e}", + ) + + # Compare marginal variances of latent parameters + var_latent_params = results["marginal_variances_latent"] + Qconditional = dalia.model.construct_Q_conditional(eta=model.a @ model.x) + Qinv_ref = xp.linalg.inv(Qconditional) + print_msg( + "Norm (marg var latent - ref): ", + f"{np.linalg.norm(var_latent_params - xp.diag(Qinv_ref)):.4e}", + ) + + print_msg("\n--- Marginal distributions of the hyperparameters ---") + marginals_hp = dalia.marginal_distributions_hp() + + prec_obs = marginals_hp["hyperparameters"]["prec_o"] + quantile_pairs = prec_obs["quantiles"]["external"]["pairs"] + + print("Quantile pairs of prec_o:") + for p, q in quantile_pairs: + print(f" {p:.3f} quantile: {q:.4f}") + + no_samples = 5000 + samples = dalia.sample_posterior_latent_parameters(n_samples=no_samples) + + n_iid_latent = generic_iid.n_latent_parameters + + # randomly sample subset of indices + no_sub_indices = 10 + iid_indices = np.sort( + np.random.choice(np.arange(n_iid_latent), no_sub_indices, replace=False) + ) + print_msg("Subindices: ", iid_indices) + + # compute generic indices corresponding to the same latent variables + # NOTE: check that the iid submodel is actually first in the model, then generic & that they have the same number + if generic_queenGRMinv.n_latent_parameters != n_iid_latent: + raise ValueError( + "The number of latent parameters in the generic submodel should be the same as in the iid submodel for this." + ) + generic_indices = iid_indices + n_iid_latent + print_msg("Randomly sampled iid indices: ", iid_indices) + print_msg("Corresponding generic indices: ", generic_indices) + + # extract subset of relevant samples from different latent components + relevant_latent_idd = results["x"][iid_indices] + relevant_latent_generic = results["x"][generic_indices] + samples_iid = samples[iid_indices, :] + samples_generic = samples[generic_indices, :] + + # check sample means are close to the relevant latent parameters + print_msg("Mean of samples (iid) : ", np.mean(samples_iid, axis=1)) + print_msg("Relevant latent parameters (iid): ", relevant_latent_idd) + print_msg("Mean of samples (generic) : ", np.mean(samples_generic, axis=1)) + print_msg("Relevant latent parameters (generic): ", relevant_latent_generic) + + print_msg("\n--- Finished ---") diff --git a/examples/brainiac/generate_data.py b/examples/brainiac/depreciated_generate_data.py similarity index 82% rename from examples/brainiac/generate_data.py rename to examples/brainiac/depreciated_generate_data.py index 25924d3e..4f7a3459 100644 --- a/examples/brainiac/generate_data.py +++ b/examples/brainiac/depreciated_generate_data.py @@ -17,7 +17,7 @@ # dim(\Phi) = (b,b) no = 1000 # number of observations - b = 2 # number of latent variables (number of features) + b = 20 # number of latent variables (number of features) m = 2 # number of annotations per feature # generate random Z -> needs to be loaded with the model @@ -28,24 +28,24 @@ # TODO: how to estimate alpha_beta and beta_beta? # alpha_beta = 5.0 # beta_beta = 1.0 - #h2 = np.random.beta(alpha_beta, beta_beta) + # h2 = np.random.beta(alpha_beta, beta_beta) h2 = 0.95 print("h2: ", h2) - + # \sigma_a^2: large and fixed sigma_a2 = 1 print("sigma_a2: ", sigma_a2) # sample alpha from N(0, \sigma_a^2 I) - alpha = np.random.normal(2, np.sqrt(sigma_a2), (m, 1)) - #alpha = np.ones((m, 1)) + alpha = np.random.normal(0, np.sqrt(sigma_a2), (m, 1)) + # alpha = np.ones((m, 1)) # print(alpha) theta_original = np.concatenate(([h2], alpha.flatten())) print(theta_original) # save original hyperparameters - np.save("inputs_brainiac/theta_original.npy", theta_original) + np.save("reference_outputs/theta_original.npy", theta_original) # \Phi = 1 / \sum_k=1^B exp(Z^k \alpha) * diag(exp(Z_1 \alpha), exp(Z_2 \alpha), ... ) print("z : ", z) @@ -62,21 +62,18 @@ Qprior = diags(1 / h2_phi) print("Qprior: \n", Qprior.toarray()) - # save Qprior as a sparse matrix - sp.save_npz("inputs_brainiac/Qprior_original.npz", Qprior) - # sample full model: Y = a \beta + \epsilon # X random covariates of dimension (no, b) a = np.random.rand(no, b) - # np.save("a.npy", a) + np.save("inputs_brainiac/a.npy", a) a_sp = sp.csc_matrix(a) - sp.save_npz("inputs_brainiac/a.npz", a_sp) + # sp.save_npz("inputs_brainiac/a.npz", a_sp) # beta ~ N(0, (h^2 \Phi)^-1) var = 1 / Qprior.diagonal() print(var) beta = np.random.normal(0, np.sqrt(var)).reshape(b, 1) - np.save("inputs_brainiac/beta_original.npy", beta.flatten()) + np.save("reference_outputs/beta_original.npy", beta.flatten()) # print(beta) # beta regression parameters with @@ -86,7 +83,6 @@ # construct Qconditional Qconditional = Qprior + 1 / (1 - h2) * a_sp.T @ a_sp - sp.save_npz("inputs_brainiac/Qconditional_original.npz", Qconditional) # recover beta # beta_initial = beta @@ -103,5 +99,3 @@ print("beta recovered: ", beta_recovered.flatten()) print("beta original : ", beta.flatten()) print("norm(diff) : ", np.linalg.norm(beta_recovered - beta)) - - diff --git a/examples/brainiac/depreciated_run.py b/examples/brainiac/depreciated_run.py new file mode 100644 index 00000000..e795d35e --- /dev/null +++ b/examples/brainiac/depreciated_run.py @@ -0,0 +1,125 @@ +import os +import time + +# import matplotlib.pyplot as plt +import numpy as np +import scipy.sparse as scsp + +from dalia import xp +from dalia.configs import dalia_config, likelihood_config, submodels_config +from dalia.core.dalia import DALIA +from dalia.core.model import Model +from dalia.submodels import BrainiacSubModel +from dalia.utils import plot_marginal_distributions_hp, print_msg + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + +if __name__ == "__main__": + print_msg("--- Example: Brainiac Submodel ---") + + # base_dir_data = BASE_DIR + "/inputs_brainiac_cmPRS" + base_dir_data = BASE_DIR + + m = 2 # number of annotations per feature + b = 20 # number of latent variables / number of features + sigma_a2 = 1.0 / 1.0 + precision_mat = sigma_a2 * scsp.eye(m) + + theta_ref = xp.load(f"{base_dir_data}/reference_outputs/theta_original.npy") + x_ref = np.load(f"{base_dir_data}/reference_outputs/beta_original.npy") + + print("Reference theta: ", theta_ref) + + xp.random.seed(5) + # has to be between 0 and 1 + initial_h2 = theta_ref[0] - 0.1 + initial_alpha = theta_ref[1:] + 1.5 * xp.random.randn(m) + + brainiac_dict = { + "type": "brainiac", + "input_dir": f"{base_dir_data}/inputs_brainiac", + "h2": initial_h2, + "alpha": initial_alpha, + "ph_h2": {"type": "beta", "alpha": 5.0, "beta": 1.0}, + "ph_alpha": { + "type": "gaussian_mvn", + "mean": theta_ref[1:], + "precision": precision_mat, # sp.sparse.csc_matrix(precision_mat), + }, + } + brainiac = BrainiacSubModel( + config=submodels_config.parse_config(brainiac_dict), + ) + print(brainiac) + + print("SubModel initialized.") + + likelihood_dict = {"type": "gaussian", "fix_hyperparameters": True} + model = Model( + submodels=[brainiac], + likelihood_config=likelihood_config.parse_config(likelihood_dict), + ) + + print(model) + + print("Model initialized.") + + dalia_dict = { + "solver": {"type": "dense"}, + "minimize": { + "max_iter": 50, + "gtol": 1e-3, + "disp": True, + }, + "inner_iteration_max_iter": 50, + "eps_inner_iteration": 1e-3, + "eps_gradient_f": 1e-3, + "simulation_dir": ".", + } + dalia = DALIA( + model=model, + config=dalia_config.parse_config(dalia_dict), + ) + + tic = time.time() + result = dalia.run() + toc = time.time() + print("Elapsed time dalia.run(): ", toc - tic) + + print("\n------ Compare to reference solution ------\n") + print("theta_ref: ", theta_ref) + + theta = result["theta"] + print("theta dalia:", theta) + + x = result["x"] + print("norm(x_ref - x) = ", np.linalg.norm(x_ref - x)) + + # marginal variances latent parameters + var_latent_params = result["marginal_variances_latent"] + Qconditional = dalia.model.construct_Q_conditional(eta=model.a @ model.x) + + if scsp.issparse(Qconditional): + Q_inv_ref = xp.linalg.inv(Qconditional.toarray()) + else: + Q_inv_ref = xp.linalg.inv(Qconditional) + print_msg( + "Norm (marg var latent - ref): ", + f"{np.linalg.norm(var_latent_params - xp.diag(Q_inv_ref)):.4e}", + ) + + marginals_hyperparameters = dalia.marginal_distributions_hp() + + fig, axes = plot_marginal_distributions_hp(marginals_hyperparameters) + import matplotlib.pyplot as plt + + plt.savefig("marginal_distributions_hp_brainiac.png") + + h2 = marginals_hyperparameters["hyperparameters"]["h2"] + quantile_pairs = h2["quantiles"]["external"]["pairs"] + + print("Quantile pairs of phi:") + for p, q in quantile_pairs: + print(f" {p:.3f} quantile: {q:.4f}") + + print_msg("\n--- Finished ---") diff --git a/examples/brainiac/generate_synthetic_dataset.py b/examples/brainiac/generate_synthetic_dataset.py new file mode 100644 index 00000000..35862b8b --- /dev/null +++ b/examples/brainiac/generate_synthetic_dataset.py @@ -0,0 +1,107 @@ +from pathlib import Path +from typing import Literal + +import numpy as np +from scipy import sparse as sp + +np.random.seed(5) + +if __name__ == "__main__": + + # Study parameters + n_observations: int = 100 # keep: 100 + n_features: int = 20 # keep: 20 + n_annotations_per_features: int = 2 # keep: 2 + + # Model parameters + h2: float = 0.9 + + # General parameters + model_format: Literal["dense", "sparse"] = "dense" + density: float = 0.4 # only used if model_format is "sparse" + + path_script: Path = Path(__file__).parent + path_inputs: Path = path_script / "inputs_brainiac" + path_reference: Path = path_inputs / "reference" + + path_inputs.mkdir(parents=True, exist_ok=True) + path_reference.mkdir(parents=True, exist_ok=True) + + # 1. Generate random Z matrix + z = np.random.rand(n_features, n_annotations_per_features) + + # 2. Sample alpha(s) from N(0, 1) + alpha = np.random.normal(0, 1, (n_annotations_per_features, 1)) + print("alpha: ", alpha.flatten()) + + # 3. Construct reference hyperparameters vector + theta = np.concatenate(([h2], alpha.flatten())) + + # 4. Construct reference prior precision matrix + normalized_exp_Z_alpha = np.exp(z @ alpha) / np.sum(np.exp(z @ alpha)) + + h2_phi = h2 * normalized_exp_Z_alpha.flatten() + Q_prior = sp.diags(1 / h2_phi) + + # 5. Generate random projection matrix "a" + if model_format == "dense": + a = np.random.rand(n_observations, n_features) + elif model_format == "sparse": + a = sp.random( + n_observations, n_features, density=density, format="csc", dtype=np.float64 + ) + + # 6. Construct reference beta vector + var = 1 / Q_prior.diagonal() + beta = np.random.normal(0, np.sqrt(var), n_features) + + # 7. Generate observation vector: sample full model: Y = a beta + epsilon + if model_format == "dense": + y = a @ beta + elif model_format == "sparse": + y = a.dot(beta) + y += np.random.normal(0, np.sqrt(1 - h2), n_observations) + + # 8. Construct reference conditional precision matrix + if model_format == "dense": + Q_conditional = Q_prior.toarray() + 1 / (1 - h2) * (a.T @ a) + elif model_format == "sparse": + Q_conditional = Q_prior + 1 / (1 - h2) * (a.T @ a).tocsc() + + # Print Summary + print("Generated BRAINIAC synthetic dataset with the following parameters:") + print(f" - Number of observations: {n_observations}") + print(f" - Number of features (latent variables): {n_features}") + print(f" - Number of annotations per feature: {n_annotations_per_features}") + print(f" - Model format: {model_format}") + if model_format == "sparse": + print(f" - Projection matrix density: {density}") + print(f" - h2: {h2}") + print() + print("Saved the following files:") + print(f" - BRAINIAC inputs in {path_inputs.resolve()}") + print(f" - BRAINIAC reference outputs in {path_reference.resolve()}") + + # Save BRAINIAC inputs + np.save(path_inputs / "z.npy", z) + if model_format == "dense": + np.save(path_inputs / "a.npy", a) + elif model_format == "sparse": + sp.save_npz(path_inputs / "a.npz", a) + np.save(path_script / "y.npy", y) + model_params = { + "n_observations": n_observations, + "n_features": n_features, + "n_annotations_per_features": n_annotations_per_features, + "h2": h2, + } + np.save(path_inputs / "model_params.npy", model_params) + + # Save BRAINIAC references + np.save(path_reference / "theta.npy", theta) + sp.save_npz(path_reference / "Q_prior.npz", Q_prior) + np.save(path_reference / "beta.npy", beta) + if model_format == "dense": + np.save(path_reference / "Q_conditional.npy", Q_conditional) + elif model_format == "sparse": + sp.save_npz(path_reference / "Q_conditional.npz", Q_conditional) diff --git a/examples/brainiac/plotting.py b/examples/brainiac/plotting.py new file mode 100644 index 00000000..1b0e57fa --- /dev/null +++ b/examples/brainiac/plotting.py @@ -0,0 +1,242 @@ +import matplotlib.pyplot as plt +import numpy as np +from scipy.stats import norm + + +def plot_prior_hp(param_name, theta_interval, prior_hp, log=False): + """ + Plot prior distribution of a hyperparameter. + + All priors are in log-scale. Therefore exponeniate unless log=True. + + Parameters + ---------- + param_name : str + Name of the hyperparameter. + theta_interval: tuple of float + Interval (min, max) for plotting the prior. + prior_hp : PriorHyperparameters + Prior hyperparameter object. + log : bool, optional + Whether to plot in log-scale or original scale. Default is False. + + + Note + ---- + If log is True, the plot will be in log-scale. Otherwise, it will be in the original scale. + + + Returns + ------- + fig, ax : matplotlib Figure and Axes + The figure and axes objects containing the plot. + """ + + if theta_interval[0] == 0: + theta_interval = (1e-6, theta_interval[1]) + + theta_vals = np.linspace(theta_interval[0], theta_interval[1], 200) + prior_vals = np.array([prior_hp.evaluate_log_prior(theta) for theta in theta_vals]) + + if log: + xlabel = f"{param_name}" + ylabel = "Log Prior Density" + else: + prior_vals = np.exp(prior_vals) + xlabel = f"{param_name}" + ylabel = "Prior Density" + + fig, ax = plt.subplots(figsize=(8, 5)) + ax.plot(theta_vals, prior_vals, "b-", linewidth=2) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + ax.set_title(f"Prior Distribution of {param_name}") + ax.grid(True, alpha=0.3) + + return fig, ax + + +def plot_marginal_distributions_hp(marginals_hp): + """Plot marginal distributions of hyperparameters in both internal and external parametrizations.""" + + # Get all hyperparameters + hyperparams = marginals_hp["hyperparameters"] + n_params = len(hyperparams) + + # Create subplot grid: n_params rows, 2 columns (internal left, external right) + fig, axes = plt.subplots(n_params, 2, figsize=(15, 5 * n_params)) + + # Handle case of single parameter + if n_params == 1: + axes = axes.reshape(1, -1) + + # Quantile colors and labels + colors = ["#DEB887", "#DEB887", "darkred", "#DEB887", "#DEB887"] + labels = ["2.5%", "25%", "50%", "75%", "97.5%"] + + for row, (param_name, param_data) in enumerate(hyperparams.items()): + # Get internal parameters + mean_internal = param_data["mean_internal"] + var_internal = param_data["variance_internal"] + std_internal = np.sqrt(var_internal) + + # Get external parameters + mean_external = param_data["mean_external"] + var_external = param_data["variance_external"] + theta_external, pdf_external = param_data["pdf_data"] + + # Get quantiles + quantile_pairs_internal = param_data["quantiles"]["internal"]["pairs"] + quantile_pairs_external = param_data["quantiles"]["external"]["pairs"] + + # ===== LEFT PLOT: INTERNAL PARAMETRIZATION ===== + ax_left = axes[row, 0] + + # Create internal distribution (Gaussian) + x_internal = np.linspace( + mean_internal - 4 * std_internal, mean_internal + 4 * std_internal, 100 + ) + pdf_internal = norm.pdf(x_internal, loc=mean_internal, scale=std_internal) + + # Plot internal PDF + ax_left.plot( + x_internal, pdf_internal, "b-", linewidth=2, label="PDF (Internal)" + ) + + # Mark internal mean + ax_left.axvline( + mean_internal, + color="red", + linestyle="--", + linewidth=2, + label=f"Mean = {mean_internal:.3f}", + ) + + # Mark internal quantiles + for i, (prob, q_val) in enumerate(quantile_pairs_internal): + if i < len(labels): + ax_left.axvline( + q_val, + color=colors[i], + linestyle=":", + linewidth=2, + label=f"{labels[i]} = {q_val:.3f}", + ) + + ax_left.set_xlabel(f"{param_name} (internal scale)") + ax_left.set_ylabel("PDF") + ax_left.set_title(f"{param_name}: Internal Distribution (Gaussian)") + ax_left.legend() + ax_left.grid(True, alpha=0.3) + + # ===== RIGHT PLOT: EXTERNAL PARAMETRIZATION ===== + ax_right = axes[row, 1] + + # Plot external PDF + ax_right.plot(theta_external, pdf_external, "b-", linewidth=2, label="PDF") + + # Mark external mean + ax_right.axvline( + mean_external, + color="red", + linestyle="--", + linewidth=2, + label=f"Mean = {mean_external:.3f}", + ) + + # Mark external quantiles + for i, (prob, q_val) in enumerate(quantile_pairs_external): + if i < len(labels): + ax_right.axvline( + q_val, + color=colors[i], + linestyle=":", + linewidth=2, + label=f"{labels[i]} = {q_val:.3f}", + ) + + ax_right.set_xlabel(f"{param_name} ") + ax_right.set_ylabel("PDF") + ax_right.set_title(f"{param_name}: Marginal Distribution") + ax_right.legend() + ax_right.grid(True, alpha=0.3) + + # plt.tight_layout() + # plt.show() + + return fig, axes + + +def plot_marginal_distributions_hp_external(marginals_hp, true_means=None): + """Plot marginal distributions of hyperparameters in both internal and external parametrizations.""" + + # Get all hyperparameters + hyperparams = marginals_hp["hyperparameters"] + n_params = len(hyperparams) + + # Create subplot grid: 1 row, n_params columns + fig, axes = plt.subplots(1, n_params, figsize=(5 * n_params, 5)) + + # Handle case of single parameter - make it iterable + if n_params == 1: + axes = [axes] + + # Quantile colors and labels + colors = ["#DEB887", "#DEB887", "darkblue", "#DEB887", "#DEB887"] + labels = ["2.5%", "25%", "50%", "75%", "97.5%"] + + for col, (param_name, param_data) in enumerate(hyperparams.items()): + # Get external parameters + mean_external = param_data["mean_external"] + var_external = param_data["variance_external"] + theta_external, pdf_external = param_data["pdf_data"] + + # Get quantiles + quantile_pairs_external = param_data["quantiles"]["external"]["pairs"] + + # ===== PLOT: EXTERNAL PARAMETRIZATION ===== + ax_right = axes[col] + + # Plot external PDF + ax_right.plot(theta_external, pdf_external, "b-", linewidth=2, label="PDF") + + # Mark external mean + ax_right.axvline( + mean_external, + color="darkgreen", + linestyle="--", + linewidth=2, + label=f"Mean = {mean_external:.3f}", + ) + + # Mark true mean if provided + if true_means is not None and col < len(true_means): + ax_right.axvline( + true_means[col], + color="red", + linestyle="-", + linewidth=2, + label=f"True Mean = {true_means[col]:.3f}", + ) + + # Mark external quantiles + for i, (prob, q_val) in enumerate(quantile_pairs_external): + if i < len(labels): + ax_right.axvline( + q_val, + color=colors[i], + linestyle=":", + linewidth=2, + label=f"{labels[i]} = {q_val:.3f}", + ) + + ax_right.set_xlabel(f"{param_name} ") + ax_right.set_ylabel("PDF") + ax_right.set_title(f"{param_name}: Marginal Distribution") + ax_right.legend() + ax_right.grid(True, alpha=0.3) + + # plt.tight_layout() + # plt.show() + + return fig, axes diff --git a/examples/brainiac/run.py b/examples/brainiac/run.py index 9b347ceb..1c3e82ec 100644 --- a/examples/brainiac/run.py +++ b/examples/brainiac/run.py @@ -1,9 +1,9 @@ -import os import time +from pathlib import Path -# import matplotlib.pyplot as plt +import matplotlib.pyplot as plt import numpy as np -import scipy.sparse as scsp +import scipy.sparse as sp from dalia import xp from dalia.configs import dalia_config, likelihood_config, submodels_config @@ -11,92 +11,120 @@ from dalia.core.model import Model from dalia.submodels import BrainiacSubModel from dalia.utils import print_msg - -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +from plotting import plot_marginal_distributions_hp_external if __name__ == "__main__": - print_msg("--- Example: Brainiac Submodel ---") - - base_dir_data = BASE_DIR + "/inputs_brainiac_cmPRS" - - m = 2 # number of annotations per feature - b = 1000 # number of latent variables / number of features - sigma_a2 = 1.0 / 1.0 - precision_mat = sigma_a2 * scsp.eye(m) - - theta_ref = xp.load(f"{base_dir_data}/theta_original.npy") - x_ref = np.load(f"{base_dir_data}/beta_original.npy") - - xp.random.seed(5) - # has to be between 0 and 1 - initial_h2 = theta_ref[0] - 0.1 - initial_alpha = theta_ref[1:] + 0.5 * xp.random.randn(m - 1) - - brainiac_dict = { - "type": "brainiac", - "input_dir": f"{base_dir_data}/inputs_brainiac", - "h2": initial_h2, - "alpha": initial_alpha, - "ph_h2": {"type": "beta", "alpha": 5.0, "beta": 1.0}, - "ph_alpha": { - "type": "gaussian_mvn", - "mean": theta_ref[1:], - "precision": precision_mat, # sp.sparse.csc_matrix(precision_mat), - }, - } - brainiac = BrainiacSubModel( - config=submodels_config.parse_config(brainiac_dict), + print_msg(f"Running BRAINIAC model on synthetic dataset.") + + path_inputs: Path = Path(__file__).parent / "inputs_brainiac" + path_reference: Path = path_inputs / "reference" + + # 1. Load model parameters + model_params: dict = np.load( + path_inputs / "model_params.npy", allow_pickle=True + ).item() + n_observations: int = model_params["n_observations"] + n_features: int = model_params["n_features"] + n_annotations_per_features: int = model_params["n_annotations_per_features"] + h2: float = model_params["h2"] + sigma_a2: float = 5.0 + + # 2. Load references + theta_reference: np.ndarray = np.load(path_reference / "theta.npy") + x_reference: np.ndarray = np.load(path_reference / "beta.npy") + + # 3. Create starting values for DALIA + initial_h2: float = theta_reference[0] - 0.1 + initial_alpha: np.ndarray = theta_reference[1:] + 0.5 * np.random.randn( + n_annotations_per_features ) - print(brainiac) - print("SubModel initialized.") - - likelihood_dict = {"type": "gaussian", "fix_hyperparameters": True} + # 4. Initialize the Brainiac submodel and the DALIA model + brainiac = BrainiacSubModel( + config=submodels_config.parse_config( + { + "type": "brainiac", + "input_dir": str(path_inputs.resolve()), + "h2": initial_h2, + "alpha": initial_alpha, + "ph_h2": {"type": "beta", "alpha": 1.0, "beta": 1.0}, + "ph_alpha": { + "type": "gaussian_mvn", + "mean": xp.zeros(n_annotations_per_features), + "precision": (1.0 / sigma_a2) * sp.eye(n_annotations_per_features), + }, + } + ) + ) model = Model( submodels=[brainiac], - likelihood_config=likelihood_config.parse_config(likelihood_dict), + likelihood_config=likelihood_config.parse_config( + {"type": "gaussian", "fix_hyperparameters": True} + ), ) + print_msg(model) - print(model) - - print("Model initialized.") - - dalia_dict = { - "solver": {"type": "dense"}, - "minimize": { - "max_iter": 50, - "gtol": 1e-3, - "disp": True, - }, - "inner_iteration_max_iter": 50, - "eps_inner_iteration": 1e-3, - "eps_gradient_f": 1e-3, - "simulation_dir": ".", - } + # 5. Initialize DALIA dalia = DALIA( model=model, - config=dalia_config.parse_config(dalia_dict), + config=dalia_config.parse_config( + { + "solver": {"type": "dense"}, + "minimize": { + "max_iter": 50, + "gtol": 1e-3, + "disp": True, + }, + "inner_iteration_max_iter": 50, + "eps_inner_iteration": 1e-3, + "eps_gradient_f": 1e-3, + "simulation_dir": ".", + } + ), ) - tic = time.time() + # 6. Run inference + tic = time.perf_counter() result = dalia.run() - toc = time.time() - print("Elapsed time dalia.run(): ", toc - tic) - - print("\n------ Compare to reference solution ------\n") - print("theta_ref: ", theta_ref) + toc = time.perf_counter() + print_msg(f"DALIA finished in {toc - tic:.2f} seconds.") - theta = result["theta_interpret"] - print("theta_interpret:", theta) + # 7. Compare to reference solution + print_msg("\n------ Compare to reference solution ------\n") + print_msg("theta_reference: ", theta_reference) + print_msg("theta dalia:", result["theta"]) - x = result["x"] - print("norm(x_ref - x) = ", np.linalg.norm(x_ref - x)) + print_msg("norm(x_reference - x) = ", np.linalg.norm(xp.asarray(x_reference) - result["x"])) - # marginal variances latent parameters + # 8. Check marginal variances of latent parameters var_latent_params = result["marginal_variances_latent"] - Qconditional = dalia.model.construct_Q_conditional(eta=model.a @ model.x) - Qinv_ref = xp.linalg.inv(Qconditional) + Q_conditional = dalia.model.construct_Q_conditional(eta=model.a @ model.x) + + if sp.issparse(Q_conditional): + Q_inv_ref = xp.linalg.inv(Q_conditional.toarray()) + else: + Q_inv_ref = xp.linalg.inv(Q_conditional) print_msg( - "Norm (marg var latent - ref): ", - f"{np.linalg.norm(var_latent_params - xp.diag(Qinv_ref)):.4e}", + f"Norm (marginal variances of latent parameters - reference): {xp.linalg.norm(var_latent_params - xp.diag(Q_inv_ref)):.4e}", ) + + # 9. Compute marginal distributions of the hyperparameters + marginals_hyperparameters = dalia.marginal_distributions_hp() + + # 10. Plot marginal distributions of hyperparameters + # fig, axes = plot_marginal_distributions_hp(marginals_hyperparameters) + # plt.savefig("marginal_distributions_hyperparameters.png") + + fig, axes = plot_marginal_distributions_hp_external( + marginals_hyperparameters, theta_reference + ) + plt.savefig("marginal_distributions_hyperparameters.png") + + h2 = marginals_hyperparameters["hyperparameters"]["h2"] + quantile_pairs = h2["quantiles"]["external"]["pairs"] + + print("Quantile pairs of h2:") + for p, q in quantile_pairs: + print(f" {p:.3f} quantile: {q:.4f}") + + print_msg("\n--- Finished ---") diff --git a/examples/g_ar1/generate_data.py b/examples/g_ar1/generate_data.py new file mode 100644 index 00000000..cba3858e --- /dev/null +++ b/examples/g_ar1/generate_data.py @@ -0,0 +1,105 @@ +import os +from pathlib import Path + +import numpy as np +import scipy.sparse as sp +from scipy.sparse.linalg import spsolve, spsolve_triangular +from scipy.sparse import csc_matrix +from scipy.linalg import cholesky + +BASE_DIR: Path = Path(__file__).parent + +if __name__ == "__main__": + + np.random.seed(5) + n = 1000 + + ## define priors + s2 = 5 + tau = 1 / s2 + phi = 0.9 + # noise obs + obs_noise_prec = 100 + theta_original = [ + phi, + tau, + obs_noise_prec, + ] + + denom = s2 * (1 - phi**2) + + diag = [(1 + phi**2) / denom] * n + diag[0] = diag[-1] = 1 / denom + off_diag = [-phi / denom] * (n - 1) + + Q = sp.diags([diag, off_diag, off_diag], [0, -1, 1]) + + # Compute sparse Cholesky factorization: Q = L @ L.T + # For tridiagonal matrix, we can use dense Cholesky on small blocks or scipy + Q_csc = Q.tocsc() + + print("Q shape:", Q.shape, "Q nnz:", Q.nnz) + print("Q sparsity:", 100 * Q.nnz / (Q.shape[0] * Q.shape[1]), "%") + print(Q.toarray()[:6, :6]) + + # Method 1: Use dense Cholesky (for moderate sizes this is still efficient) + Q_dense = Q.toarray() + L_dense = cholesky(Q_dense, lower=True) + L = csc_matrix(L_dense) + + print("L nnz:", L.nnz, "L sparsity:", 100 * L.nnz / (L.shape[0] * L.shape[1]), "%") + + # Efficient sampling: generate z ~ N(0,I), then solve L @ u = z + z = np.random.normal(0, 1, size=n) + + # Solve L @ u = z using sparse triangular solver + u = spsolve_triangular(L, z, lower=True) + + # Verify the sampling worked correctly + print("Sample u statistics - mean:", np.mean(u), "std:", np.std(u), ". Should be around sqrt(s2) =", np.sqrt(s2)) + + intercept = 2 + + x = np.concatenate((u, [intercept])) + print("x: ", x[:10]) + + os.makedirs(BASE_DIR / "reference_outputs", exist_ok=True) + np.save(BASE_DIR / "reference_outputs" / "x_original.npy", x) + np.save(BASE_DIR / "reference_outputs" / "theta_original.npy", theta_original) + + os.makedirs(BASE_DIR / "inputs_ar1", exist_ok=True) + np.save(BASE_DIR / "inputs_ar1" / "x.npy", u) + + a_ar1 = sp.eye(n) + sp.save_npz(BASE_DIR / "inputs_ar1" / "a.npz", a_ar1) + + a_regression = sp.csr_matrix(np.ones((n, 1))) + os.makedirs(BASE_DIR / "inputs_regression", exist_ok=True) + sp.save_npz(BASE_DIR / "inputs_regression" / "a.npz", a_regression) + + eta = a_ar1 @ u + intercept + + print("eta: ", eta[:6]) + np.save(BASE_DIR / "inputs_ar1" / "x_original.npy", eta) + + noise = np.random.normal(0, np.sqrt(1 / obs_noise_prec), size=eta.shape) + print("noise: ", noise[:10]) + y = eta + noise + np.save(BASE_DIR / "y.npy", y) + + print("y: ", y[:10]) + + Qprior = sp.block_diag([Q, sp.csr_matrix([[0.001]])]) + + a = sp.hstack([a_ar1, a_regression]) # a_ar1 # + Qcond = Qprior + obs_noise_prec * a.T @ a + print("Qcond: \n", Qcond.toarray()[:6,:6]) + + b = obs_noise_prec * a.T @ y + print("b: ", b[:10]) + # x_est = np.linalg.solve(Qcond.toarray(), b) + x_est = spsolve(csc_matrix(Qcond), b) + print("norm(x - x_est): ", np.linalg.norm(x - x_est)) + + print("norm(eta - eta_est): ", np.linalg.norm(a @ x - a @ x_est)) + print("normalized norm(eta - eta_est): ", np.linalg.norm(a @ x - a @ x_est) / np.linalg.norm(a @ x)) diff --git a/examples/g_ar1/run.py b/examples/g_ar1/run.py new file mode 100644 index 00000000..ef42c8ea --- /dev/null +++ b/examples/g_ar1/run.py @@ -0,0 +1,181 @@ +from pathlib import Path + +import numpy as np + +from dalia import xp +from dalia.configs import likelihood_config, dalia_config, submodels_config +from dalia.core.model import Model +from dalia.core.dalia import DALIA +from dalia.submodels import AR1SubModel, RegressionSubModel +from dalia.utils import print_msg, plot_marginal_distributions_hp, plot_prior_hp # , extract_diagonal + +BASE_DIR: Path = Path(__file__).parent + +if __name__ == "__main__": + + np.random.seed(3) + + # load reference output + theta_original = np.load(BASE_DIR / "reference_outputs" / "theta_original.npy") + print( + "theta original: ", + theta_original, + ) + + theta_initial = theta_original #[0.6, 1.0, 3.0] + print("theta initial: ", theta_initial) + + x_original = np.load(BASE_DIR / "reference_outputs" / "x_original.npy") + print("x original: ", x_original[:10]) + print("dim(x original): ", x_original.shape) + + ar1_dict = { + "type": "ar1", + "input_dir": f"{BASE_DIR}/inputs_ar1", + "phi": 0.5, # has to be between 0 and 1 + "ph_phi": {"type": "beta", "alpha": 5.0, "beta": 1.0}, + # initial guess on the precision + "tau": 3, # has to be positive + "ph_tau": {"type": "gamma", "alpha": 2.0, "beta": 1.0}, + # initial guess on the variance + # "sigma2": 0.33, # has to be positive + # "ph_sigma2": {"type": "invgamma", "alpha": 2.0, "beta": 1.0}, + } + ar1 = AR1SubModel( + config=submodels_config.parse_config(ar1_dict), + ) + + # Configurations of the regression submodel + regression_dict = { + "type": "regression", + "input_dir": f"{BASE_DIR}/inputs_regression", + "n_fixed_effects": 1, + "fixed_effects_prior_precision": 0.001, + } + regression = RegressionSubModel( + config=submodels_config.parse_config(regression_dict), + ) + + likelihood_dict = { + "type": "gaussian", + "prec_o": 20, + # "prior_hyperparameters": { + # "type": "penalized_complexity", + # "alpha": 0.01, + # "u": 5, + # }, + "prior_hyperparameters": {"type": "gaussian", "mean": theta_original[2], "precision": 0.05}, + } + + model = Model( + submodels=[ar1, regression], # + likelihood_config=likelihood_config.parse_config(likelihood_dict), + ) + print_msg(model) + + # plot phi + # theta_interval = [0, 1] + # prior_hp = model.prior_hyperparameters[0] + # fig, ax = plot_prior_hp("phi", theta_interval, prior_hp) + + # plot tau + theta_interval = [0, 5] + prior_hp = model.prior_hyperparameters[1] + fig, ax = plot_prior_hp("tau", theta_interval, prior_hp) + + import matplotlib.pyplot as plt + plt.show() + + Qprior = model.construct_Q_prior() + print("Qprior: \n", Qprior.toarray()[:6, :6]) + Qinv = xp.linalg.inv(Qprior.toarray()) + geom_mean = xp.exp(xp.mean(xp.log(Qinv.diagonal()))) + print("Geometric mean of Qinv diagonal: ", geom_mean) + + # in gaussian case x = 0, thus eta = 0 + x_i = xp.zeros(model.n_latent_parameters) + eta = model.a @ x_i + Qcond = model.construct_Q_conditional(eta=eta) + print("Qcond: \n", Qcond.toarray()[:6, :6]) + + b = model.construct_information_vector(eta=eta, x_i=x_i) + print("b: ", b[:10]) + + x_est = xp.linalg.solve(Qcond.toarray(), b) + #print("x est: ", x_est) + print("norm(x_original - x_est): ", xp.linalg.norm(xp.asarray(x_original) - x_est)) + + # Configurations of DALIA + dalia_dict = { + "solver": {"type": "dense"}, + "minimize": { + "max_iter": 100, + "gtol": 1e-3, + "disp": True, + "maxcor": len(model.theta_external), + }, + "f_reduction_tol": 1e-3, + "theta_reduction_tol": 1e-4, + "inner_iteration_max_iter": 50, + "eps_inner_iteration": 1e-3, + "eps_gradient_f": 1e-3, + "simulation_dir": ".", + "verbosity": 0, + } + + dalia = DALIA( + model=model, + config=dalia_config.parse_config(dalia_dict), + ) + + print("initial model theta: ", model.theta_external) + + print("\nCalling DALIA.run()") + results = dalia.run() + + print_msg("\n--- Results ---") + + theta = results["theta"] + print("theta: ", np.round(theta, 4)) + print("theta original: ", theta_original) + + print_msg("Covariance of theta:\n", results["cov_theta_internal"]) + print_msg( + "Mean of the fixed effects:\n", + results["x"][-model.submodels[-1].n_fixed_effects :], + ) + + # print("x: ", results["x"]) + # print("x_original: ", x_original) + + # print("eta: ", model.a @ x_original) + # print("eta est: ", model.a @ results["x"]) + + print_msg("\n--- Comparisons ---") + print("norm(eta - eta_est): ", xp.linalg.norm(model.a @ xp.asarray(x_original) - model.a @ results["x"])) + print("normalized norm(eta - eta_est): ", xp.linalg.norm(model.a @ xp.asarray(x_original) - model.a @ results["x"]) / xp.linalg.norm(model.a @ xp.asarray(x_original))) + + # Compare marginal variances of latent parameters + var_latent_params = results["marginal_variances_latent"] + Qconditional = dalia.model.construct_Q_conditional(eta=model.a @ model.x) + Qinv_ref = xp.linalg.inv(Qconditional.toarray()) + print_msg( + "Norm (marg var latent - ref): ", + f"{xp.linalg.norm(var_latent_params - xp.diag(Qinv_ref)):.4e}", + ) + + print_msg("\n--- Marginal distributions of the hyperparameters ---") + marginals_hp = dalia.marginal_distributions_hp() + + fig, axes = plot_marginal_distributions_hp(marginals_hp) + import matplotlib.pyplot as plt + plt.show() + + phi = marginals_hp['hyperparameters']['phi'] + quantile_pairs = phi['quantiles']['external']['pairs'] + + print("Quantile pairs of phi:") + for p, q in quantile_pairs: + print(f" {p:.3f} quantile: {q:.4f}") + + print_msg("\n--- Finished ---") \ No newline at end of file diff --git a/examples/g_generic/generate_data.py b/examples/g_generic/generate_data.py new file mode 100644 index 00000000..918392cf --- /dev/null +++ b/examples/g_generic/generate_data.py @@ -0,0 +1,59 @@ +## generate synthetic data for the generic submodel + +import os +import numpy as np + +# add .. to the path +import sys + +import numpy as np +import scipy.sparse as sp + +sys.path.append("..") + +np.random.seed(44) + +path = os.path.dirname(__file__) + +if __name__ == "__main__": + n_latent_parameters = 300 + nrep = 1 + + q_temp = np.random.randn(n_latent_parameters, n_latent_parameters) + q = q_temp @ q_temp.T + + tau = 2.5 + s = 0.1 # sd, prec 100 + theta_ref = [tau, 1 / s**2] + + Q_scaled = tau * q + + L_Q_prior = np.linalg.cholesky(Q_scaled) + z = np.random.normal(size=n_latent_parameters) + x = np.linalg.solve(L_Q_prior.T, z) + + # construct projection matrix + a = sp.kron(np.ones((nrep, 1)), sp.eye(n_latent_parameters)) + + # y = Ax + noise + y = (a @ x) + np.random.normal(scale=s, size=n_latent_parameters * nrep) + + # save the synthetic data + np.save(f"{path}/y.npy", y) + + # create a subfolder called inputs + os.makedirs(f"{path}/inputs_generic", exist_ok=True) + q = sp.coo_matrix(q) + sp.save_npz(f"{path}/inputs_generic/q.npz", q) + + # save a as .npz + sp.save_npz(f"{path}/inputs_generic/a.npz", a) + # sparse.save_npz(f"{path}/inputs_generic/a.npz", a) + + # save original latent parameters + os.makedirs(f"{path}/reference_outputs", exist_ok=True) + + np.save(f"{path}/reference_outputs/x_ref.npy", x) + + # save original hyperparameter theta + np.save(f"{path}/reference_outputs/theta_ref.npy", theta_ref) diff --git a/examples/g_generic/run.py b/examples/g_generic/run.py new file mode 100644 index 00000000..e1951a08 --- /dev/null +++ b/examples/g_generic/run.py @@ -0,0 +1,154 @@ +import os +import sys + +import numpy as np + +from dalia import xp +from dalia.configs import dalia_config, likelihood_config, submodels_config +from dalia.core.dalia import DALIA +from dalia.core.model import Model +from dalia.submodels import GenericSubModel +from dalia.utils import ( + extract_diagonal, + get_host, + print_msg, + plot_marginal_distributions_hp, + plot_prior_hp, +) + +parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +sys.path.append(parent_dir) +from examples_utils.parser_utils import parse_args # noqa: E402 + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + +if __name__ == "__main__": + print_msg("--- Example: Gaussian Generic ---") + + # Check for parsed parameters + args = parse_args() + + # Configurations of the generic submodel + generic_dict = { + "type": "generic", + "input_dir": f"{BASE_DIR}/inputs_generic", + # initial guess on the precision + "tau": 4, # has to be positive + "ph_tau": {"type": "gamma", "alpha": 1.0, "beta": 1e-5}, + # "ph_tau": {"type": "gaussian", "mean": 5.0, "precision": 1.5}, + } + generic = GenericSubModel( + config=submodels_config.parse_config(generic_dict), + ) + + # Likelihood + likelihood_dict = { + "type": "gaussian", + "prec_o": 1.0, + "prior_hyperparameters": {"type": "gamma", "alpha": 1.0, "beta": 1e-5}, + # "prior_hyperparameters": {"type": "gaussian", "mean": 1.0, "precision": 0.05}, + # "prior_hyperparameters": { + # "type": "penalized_complexity", + # "alpha": 0.01, + # "u": 5, + # }, + } + # Creation of the first model by combining the Generic submodel and the likelihood + model = Model( + submodels=[generic], + likelihood_config=likelihood_config.parse_config(likelihood_dict), + ) + print_msg(model) + + ## Plot prior of hyperparameter -- identification by [0], [1], ... not amazing but works for now + theta_interval = [1e-6, 15] + prior_hp = model.prior_hyperparameters[0] + + # fig, ax = plot_prior_hp("tau", theta_interval, prior_hp) + # import matplotlib.pyplot as plt + # plt.show() + + # Configurations of DALIA + dalia_dict = { + "solver": {"type": "dense"}, + "minimize": { + "max_iter": 50, + "gtol": 1e-3, + "disp": True, + }, + "inner_iteration_max_iter": 50, + "eps_inner_iteration": 1e-3, + "eps_gradient_f": 1e-3, + "simulation_dir": ".", + } + dalia = DALIA( + model=model, + config=dalia_config.parse_config(dalia_dict), + ) + + theta_ref = xp.load(f"{BASE_DIR}/reference_outputs/theta_ref.npy") + x_ref = xp.load(f"{BASE_DIR}/reference_outputs/x_ref.npy") + + results = dalia.run() + + print_msg("\n--- Results ---") + print_msg("theta reference:\n", theta_ref) + print_msg("Theta values external:\n", results["theta"]) + print_msg("Theta values internal:\n", results["theta_internal"]) + print_msg("Internal Covariance of theta:\n", results["cov_theta_internal"]) + # print_msg( + # "Mean of the latent parameters:\n", + # results["x"], + # ) + + print_msg("\n--- Comparisons ---") + # Compare hyperparameters + theta_ref = xp.load(f"{BASE_DIR}/reference_outputs/theta_ref.npy") + print_msg("Reference theta:", theta_ref) + print_msg( + "Norm (theta - theta_ref): ", + f"{xp.linalg.norm(results['theta'] - theta_ref):.4e}", + ) + + # Compare latent parameters + x_ref = xp.load(f"{BASE_DIR}/reference_outputs/x_ref.npy") + print_msg( + "Norm (x - x_ref): ", + f"{xp.linalg.norm(results['x'] - x_ref):.4e}", + ) + + # Compare marginal variances of latent parameters + var_latent_params = results["marginal_variances_latent"] + Qconditional = dalia.model.construct_Q_conditional(eta=model.a @ model.x) + Qinv_ref = xp.linalg.inv(Qconditional.toarray()) + print_msg( + "Norm (marg var latent - ref): ", + f"{np.linalg.norm(var_latent_params - xp.diag(Qinv_ref)):.4e}", + ) + + # Compare marginal variances of observations + var_obs = dalia.get_marginal_variances_observations( + theta_external=theta_ref, x_star=x_ref + ) + var_obs_ref = extract_diagonal(model.a @ Qinv_ref @ model.a.T) + print_msg( + "Norm (var_obs - var_obs_ref): ", + f"{xp.linalg.norm(var_obs - var_obs_ref):.4e}", + ) + + print_msg("\n--- Marginal distributions of the hyperparameters ---") + marginals_hp = dalia.marginal_distributions_hp() + + fig, axes = plot_marginal_distributions_hp(marginals_hp) + import matplotlib.pyplot as plt + + plt.savefig(f"gr_marginal_distributions_hp.png") + + prec_obs = marginals_hp["hyperparameters"]["prec_o"] + quantile_pairs = prec_obs["quantiles"]["external"]["pairs"] + + print("Quantile pairs of prec_o:") + for p, q in quantile_pairs: + print(f" {p:.3f} quantile: {q:.4f}") + + print_msg("\n--- Finished ---") diff --git a/examples/gr/preprocessing.py b/examples/gr/generate_data.py similarity index 84% rename from examples/gr/preprocessing.py rename to examples/gr/generate_data.py index 51330182..daa4cbbf 100644 --- a/examples/gr/preprocessing.py +++ b/examples/gr/generate_data.py @@ -17,7 +17,7 @@ path = os.path.dirname(__file__) if __name__ == "__main__": - n_observations = 1000 + n_observations = 20 n_latent_parameters = 6 z = np.random.normal(size=n_latent_parameters) @@ -28,12 +28,12 @@ x = L_Sigma_prior @ z a = sparse.random(n_observations, n_latent_parameters, density=0.5) - theta_observations = np.log(3) + theta_observations = 2.0 print(f"theta_observations: {theta_observations}") theta_likelihood: dict = {"theta_observations": theta_observations} # generate x from a gaussian distribution of dimensions n_latent_parameters with mean 0 and precision exp(theta_observations) - variance = 1 / np.exp(theta_observations) + variance = 1 / theta_observations eta = a @ x y = np.random.normal(eta, scale=np.sqrt(variance), size=n_observations) print(f"x: {x}") @@ -48,12 +48,12 @@ os.makedirs(f"{path}/inputs", exist_ok=True) # save the synthetic data - np.save(f"{path}/inputs/y.npy", y) + np.save(f"{path}/y.npy", y) # save a as .npz sparse.save_npz(f"{path}/inputs/a.npz", a) # save original latent parameters - np.save(f"{path}/inputs/x_original.npy", x) + np.save(f"{path}/reference_outputs/x_ref.npy", x) # save original hyperparameter theta - np.save(f"{path}/inputs/theta_original.npy", theta_observations) + np.save(f"{path}/reference_outputs/theta_ref.npy", theta_observations) diff --git a/examples/gr/inputs/y.npy b/examples/gr/inputs/y.npy new file mode 100644 index 00000000..a5755615 --- /dev/null +++ b/examples/gr/inputs/y.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f5b972c648de0244b4fe3af2aac7e87210706d8ac879007011e061c2330c0ba +size 8128 diff --git a/examples/gr/reference_outputs/theta_ref.npy b/examples/gr/reference_outputs/theta_ref.npy index ccb2de10..01fbab03 100644 --- a/examples/gr/reference_outputs/theta_ref.npy +++ b/examples/gr/reference_outputs/theta_ref.npy @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:722e1a2b1465e290a283836da486e044a051dc5c7d0b420a1fc0addfd4411c1f +oid sha256:7b9d65f6717a0911c1cdf1cc680cb167f2ef1395be2db8ca2065cb2bd8a0a8f4 size 136 diff --git a/examples/gr/reference_outputs/x_ref.npy b/examples/gr/reference_outputs/x_ref.npy index fbf38892..87f3421e 100644 --- a/examples/gr/reference_outputs/x_ref.npy +++ b/examples/gr/reference_outputs/x_ref.npy @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:86bdf16672dbf80add11a285c0fe68f2532436b2d232a86992144017d02fcd61 +oid sha256:4ddd3a0b4a76c50a77c276c3c7a3a37b506a633b3e31564e8fd3bf859d3e2184 size 176 diff --git a/examples/gr/run.py b/examples/gr/run.py index d12fdaad..246fe89e 100644 --- a/examples/gr/run.py +++ b/examples/gr/run.py @@ -8,7 +8,7 @@ from dalia.core.dalia import DALIA from dalia.core.model import Model from dalia.submodels import RegressionSubModel -from dalia.utils import extract_diagonal, get_host, print_msg +from dalia.utils import extract_diagonal, get_host, print_msg, plot_marginal_distributions_hp, plot_prior_hp parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) sys.path.append(parent_dir) @@ -32,11 +32,13 @@ regression = RegressionSubModel( config=submodels_config.parse_config(regression_dict), ) + # Likelihood likelihood_dict = { "type": "gaussian", - "prec_o": 1.5, - "prior_hyperparameters": {"type": "gaussian", "mean": 3.5, "precision": 0.5}, + "prec_o": 1.0, + "prior_hyperparameters": {"type": "gamma", "alpha": 2.0, "beta": 2.0}, + #"prior_hyperparameters": {"type": "gaussian", "mean": 1.0, "precision": 0.5}, } # Creation of the first model by combining the Regression submodel and the likelihood model = Model( @@ -45,6 +47,14 @@ ) print_msg(model) + ## Plot prior of hyperparameter -- identification by [0], [1], ... not amazing but works for now + theta_interval = [-5, 7] + prior_hp = model.prior_hyperparameters[0] + + fig, ax = plot_prior_hp("prec_o", theta_interval, prior_hp) + import matplotlib.pyplot as plt + plt.show() + # Configurations of DALIA dalia_dict = { "solver": {"type": "dense"}, @@ -69,8 +79,9 @@ results = dalia.run() print_msg("\n--- Results ---") - print_msg("Theta values:\n", results["theta"]) - print_msg("Covariance of theta:\n", results["cov_theta"]) + print_msg("Theta values external:\n", results["theta"]) + print_msg("Theta values internal:\n", results["theta_internal"]) + print_msg("Internal Covariance of theta:\n", results["cov_theta_internal"]) print_msg( "Mean of the fixed effects:\n", results["x"][-model.submodels[-1].n_fixed_effects :], @@ -79,16 +90,17 @@ print_msg("\n--- Comparisons ---") # Compare hyperparameters theta_ref = xp.load(f"{BASE_DIR}/reference_outputs/theta_ref.npy") + print_msg("Reference theta:", theta_ref) print_msg( "Norm (theta - theta_ref): ", - f"{np.linalg.norm(results['theta'] - get_host(theta_ref)):.4e}", + f"{xp.linalg.norm(results['theta'] - theta_ref):.4e}", ) # Compare latent parameters x_ref = xp.load(f"{BASE_DIR}/reference_outputs/x_ref.npy") print_msg( "Norm (x - x_ref): ", - f"{np.linalg.norm(results['x'] - get_host(x_ref)):.4e}", + f"{xp.linalg.norm(results['x'] - x_ref):.4e}", ) # Compare marginal variances of latent parameters @@ -101,11 +113,25 @@ ) # Compare marginal variances of observations - var_obs = dalia.get_marginal_variances_observations(theta=theta_ref, x_star=x_ref) + var_obs = dalia.get_marginal_variances_observations(theta_external=theta_ref, x_star=x_ref) var_obs_ref = extract_diagonal(model.a @ Qinv_ref @ model.a.T) print_msg( "Norm (var_obs - var_obs_ref): ", f"{xp.linalg.norm(var_obs - var_obs_ref):.4e}", ) + print_msg("\n--- Marginal distributions of the hyperparameters ---") + marginals_hp = dalia.marginal_distributions_hp() + + fig, axes = plot_marginal_distributions_hp(marginals_hp) + import matplotlib.pyplot as plt + plt.savefig(f"gr_marginal_distributions_hp.png") + + prec_obs = marginals_hp['hyperparameters']['prec_o'] + quantile_pairs = prec_obs['quantiles']['external']['pairs'] + + print("Quantile pairs of prec_o:") + for p, q in quantile_pairs: + print(f" {p:.3f} quantile: {q:.4f}") + print_msg("\n--- Finished ---") diff --git a/examples/gs_coreg2_small/run.py b/examples/gs_coreg2_small/run.py index 1bb6ba29..7ac07622 100644 --- a/examples/gs_coreg2_small/run.py +++ b/examples/gs_coreg2_small/run.py @@ -168,7 +168,7 @@ print_msg("results['theta']: ", results["theta"]) - print_msg("cov_theta: \n", results["cov_theta"]) + print_msg("Internal Covariance of theta:\n", results["cov_theta_internal"]) print_msg("mean of the fixed effects: ", results["x"][-nb:]) print_msg( "marginal variances of the fixed effects: ", diff --git a/examples/gs_coreg3_small/run.py b/examples/gs_coreg3_small/run.py index 65e1598d..9f056ba3 100644 --- a/examples/gs_coreg3_small/run.py +++ b/examples/gs_coreg3_small/run.py @@ -217,7 +217,7 @@ print_msg("results['theta']: ", results["theta"]) - print_msg("cov_theta: \n", results["cov_theta"]) + print_msg("Internal Covariance of theta:\n", results["cov_theta_internal"]) print_msg("mean of the fixed effects: ", results["x"][-nb:]) print_msg( "marginal variances of the fixed effects: ", diff --git a/examples/gs_small/run.py b/examples/gs_small/run.py index e94f20d1..ecdc73a0 100644 --- a/examples/gs_small/run.py +++ b/examples/gs_small/run.py @@ -47,25 +47,28 @@ likelihood_dict = { "type": "gaussian", "prec_o": 4, - "prior_hyperparameters": { - "type": "penalized_complexity", - "alpha": 0.01, - "u": 5, - }, + # "prior_hyperparameters": { + # "type": "penalized_complexity", + # "alpha": 0.01, + # "u": 5, + # }, + "prior_hyperparameters": {"type": "gamma", "alpha": 2.0, "beta": 2.0}, } model = Model( submodels=[spatial, regression], likelihood_config=likelihood_config.parse_config(likelihood_dict), ) + print_msg(model) + # Configurations of DALIA dalia_dict = { - "solver": {"type": "dense"}, + "solver": {"type": "scipy"}, "minimize": { "max_iter": args.max_iter, "gtol": 1e-3, "disp": True, - "maxcor": len(model.theta), + "maxcor": len(model.theta_external), }, "f_reduction_tol": 1e-3, "theta_reduction_tol": 1e-4, @@ -84,7 +87,7 @@ print_msg("\n--- Results ---") print_msg("Theta values:\n", results["theta"]) - print_msg("Covariance of theta:\n", results["cov_theta"]) + print_msg("Internal Covariance of theta:\n", results["cov_theta_internal"]) print_msg( "Mean of the fixed effects:\n", results["x"][-model.submodels[-1].n_fixed_effects :], @@ -95,7 +98,7 @@ theta_ref = np.load(f"{BASE_DIR}/reference_outputs/theta_ref.npy") print_msg( "Norm (theta - theta_ref): ", - f"{np.linalg.norm(results['theta'] - get_host(theta_ref)):.4e}", + f"{np.linalg.norm(results['theta_internal'] - get_host(theta_ref)):.4e}", ) # Compare latent parameters diff --git a/examples/gst_coreg2_small/run.py b/examples/gst_coreg2_small/run.py index 55328b44..79fec600 100644 --- a/examples/gst_coreg2_small/run.py +++ b/examples/gst_coreg2_small/run.py @@ -206,7 +206,7 @@ "max_iter": args.max_iter, "gtol": 1e-3, "disp": True, - "maxcor": len(coreg_model.theta), + "maxcor": len(coreg_model.theta_external), }, "f_reduction_tol": 1e-3, "theta_reduction_tol": 1e-4, @@ -228,7 +228,7 @@ print_msg("results['theta']: ", results["theta"]) - print_msg("cov_theta: \n", results["cov_theta"]) + print_msg("Internal Covariance of theta:\n", results["cov_theta_internal"]) print_msg("mean of the fixed effects: ", results["x"][-nb:]) print_msg( "marginal variances of the fixed effects: ", @@ -243,7 +243,7 @@ print_msg( "Norm (theta - theta_ref): ", - f"{np.linalg.norm(results['theta'] - get_host(theta_ref)):.4e}", + f"{np.linalg.norm(get_host(results['theta']) - get_host(theta_ref)):.4e}", ) x_ref = np.load(f"{BASE_DIR}/inputs_nv{nv}_ns{ns}_nt{nt}_nb{nb}/reference_outputs/x_ref.npy") @@ -254,7 +254,7 @@ # Compare latent parameters print_msg( "Norm (x - x_ref): ", - f"{np.linalg.norm(results['x'] - get_host(x_ref)):.4e}", + f"{np.linalg.norm(get_host(results['x']) - get_host(x_ref)):.4e}", ) # Compare marginal variances of latent parameters @@ -265,7 +265,7 @@ Qinv_ref = xp.linalg.inv(Qconditional.toarray()) print_msg( "Norm (marg var latent - ref): ", - f"{np.linalg.norm(var_latent_params - xp.diag(Qinv_ref)):.4e}", + f"{xp.linalg.norm(var_latent_params - xp.diag(Qinv_ref)):.4e}", ) # Compare marginal variances of observations diff --git a/examples/gst_coreg3_small/run.py b/examples/gst_coreg3_small/run.py index 8406d013..b17ac251 100644 --- a/examples/gst_coreg3_small/run.py +++ b/examples/gst_coreg3_small/run.py @@ -266,7 +266,7 @@ "max_iter": args.max_iter, "gtol": 1e-3, "disp": True, - "maxcor": len(coreg_model.theta), + "maxcor": len(coreg_model.theta_external), }, "f_reduction_tol": 1e-3, "theta_reduction_tol": 1e-4, @@ -288,7 +288,7 @@ print_msg("results['theta']: ", results["theta"]) - print_msg("cov_theta: \n", results["cov_theta"]) + print_msg("Internal Covariance of theta:\n", results["cov_theta_internal"]) print_msg("mean of the fixed effects: ", results["x"][-nb:]) print_msg( "marginal variances of the fixed effects: ", diff --git a/examples/gst_large/reference_outputs/theta_ref.npy b/examples/gst_large/reference_outputs/theta_ref.npy index 983e4c62..113200b7 100644 --- a/examples/gst_large/reference_outputs/theta_ref.npy +++ b/examples/gst_large/reference_outputs/theta_ref.npy @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:303281b0ded419b21216bd0cefa983b10ed2b7fbea6df2110df2706807f52ff9 +oid sha256:5ba7d2f753dbe60aca3f47b2220a826eb436a0e96cf83eb592eb925e9a96b444 size 160 diff --git a/examples/gst_large/run.py b/examples/gst_large/run.py index bb4b8ade..b377fec6 100644 --- a/examples/gst_large/run.py +++ b/examples/gst_large/run.py @@ -12,7 +12,7 @@ from dalia.core.dalia import DALIA from dalia.submodels import RegressionSubModel, SpatioTemporalSubModel from examples_utils.parser_utils import parse_args -from dalia.utils import print_msg, get_host +from dalia.utils import print_msg, get_host, plot_marginal_distributions_hp BASE_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -26,9 +26,9 @@ "type": "spatio_temporal", "input_dir": f"{BASE_DIR}/inputs_spatio_temporal", "spatial_domain_dimension": 2, - "r_s": -0.960279229160082, - "r_t": -0.3068528194400548, - "sigma_st": -2.112085713764618, + "r_s": -0.96, + "r_t": -0.31, + "sigma_st": -2.11, "manifold": "sphere", "ph_s": {"type": "penalized_complexity", "alpha": 0.01, "u": 0.5}, "ph_t": {"type": "penalized_complexity", "alpha": 0.01, "u": 5}, @@ -79,15 +79,11 @@ config=dalia_config.parse_config(dalia_dict), ) - # print_msg("\n--- References ---") - theta_ref = xp.array(np.load(f"{BASE_DIR}/reference_outputs/theta_ref.npy")) - x_ref = xp.array(np.load(f"{BASE_DIR}/reference_outputs/x_ref.npy")) - results = dalia.run() print_msg("\n--- Results ---") print_msg("Theta values:\n", results["theta"]) - print_msg("Covariance of theta:\n", results["cov_theta"]) + print_msg("Internal Covariance of theta:\n", results["cov_theta_internal"]) print_msg( "Mean of the fixed effects:\n", results["x"][-model.submodels[-1].n_fixed_effects:], @@ -96,12 +92,14 @@ print_msg("\n--- Comparisons ---") # Compare hyperparameters + theta_ref = np.array(np.load(f"{BASE_DIR}/reference_outputs/theta_ref.npy")) print_msg( "Norm (theta - theta_ref): ", f"{np.linalg.norm(results['theta'] - get_host(theta_ref)):.4e}", ) # Compare latent parameters + x_ref = np.array(np.load(f"{BASE_DIR}/reference_outputs/x_ref.npy")) print_msg( "Norm (x - x_ref): ", f"{np.linalg.norm(results['x'] - get_host(x_ref)):.4e}", diff --git a/examples/gst_medium/reference_outputs/theta_ref.npy b/examples/gst_medium/reference_outputs/theta_ref.npy index 983e4c62..113200b7 100644 --- a/examples/gst_medium/reference_outputs/theta_ref.npy +++ b/examples/gst_medium/reference_outputs/theta_ref.npy @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:303281b0ded419b21216bd0cefa983b10ed2b7fbea6df2110df2706807f52ff9 +oid sha256:5ba7d2f753dbe60aca3f47b2220a826eb436a0e96cf83eb592eb925e9a96b444 size 160 diff --git a/examples/gst_medium/run.py b/examples/gst_medium/run.py index 1e1cd1fd..9bf144c5 100644 --- a/examples/gst_medium/run.py +++ b/examples/gst_medium/run.py @@ -8,7 +8,7 @@ from dalia.configs import likelihood_config, dalia_config, submodels_config from dalia.core.model import Model from dalia.core.dalia import DALIA -from dalia.utils import print_msg, get_host +from dalia.utils import print_msg, get_host, plot_marginal_distributions_hp from dalia.submodels import RegressionSubModel, SpatioTemporalSubModel from examples_utils.parser_utils import parse_args from dalia import xp @@ -27,12 +27,12 @@ "input_dir": f"{BASE_DIR}/inputs_spatio_temporal", "spatial_domain_dimension": 2, "r_s": 0.0, - "r_t": 0.0, - "sigma_st": 0.0, - "manifold": "plane", - "ph_s": {"type": "gaussian", "mean": 0.03972077083991806, "precision": 0.5}, - "ph_t": {"type": "gaussian", "mean": 2.3931471805599456, "precision": 0.5}, - "ph_st": {"type": "gaussian", "mean": 1.4379142862353824, "precision": 0.5}, + "r_t": 2.2, + "sigma_st": 1.3, + "manifold": "sphere", + "ph_s": {"type": "penalized_complexity", "alpha": 0.01, "u": 0.5}, + "ph_t": {"type": "penalized_complexity", "alpha": 0.01, "u": 5}, + "ph_st": {"type": "penalized_complexity", "alpha": 0.01, "u": 3}, } spatio_temporal = SpatioTemporalSubModel( config=submodels_config.parse_config(spatio_temporal_dict), @@ -85,15 +85,11 @@ config=dalia_config.parse_config(dalia_dict), ) - # print_msg("\n--- References ---") - theta_ref = xp.array(np.load(f"{BASE_DIR}/reference_outputs/theta_ref.npy")) - x_ref = xp.array(np.load(f"{BASE_DIR}/reference_outputs/x_ref.npy")) - results = dalia.run() print_msg("\n--- Results ---") print_msg("Theta values:\n", results["theta"]) - print_msg("Covariance of theta:\n", results["cov_theta"]) + print_msg("Internal Covariance of theta:\n", results["cov_theta_internal"]) print_msg( "Mean of the fixed effects:\n", results["x"][-model.submodels[-1].n_fixed_effects:], @@ -102,15 +98,17 @@ print_msg("\n--- Comparisons ---") # Compare hyperparameters + theta_ref = np.array(np.load(f"{BASE_DIR}/reference_outputs/theta_ref.npy")) print_msg( "Norm (theta - theta_ref): ", - f"{np.linalg.norm(results['theta'] - get_host(theta_ref)):.4e}", + f"{np.linalg.norm(get_host(results['theta_internal']) - theta_ref):.4e}", ) # Compare latent parameters + x_ref = np.array(np.load(f"{BASE_DIR}/reference_outputs/x_ref.npy")) print_msg( "Norm (x - x_ref): ", - f"{np.linalg.norm(results['x'] - get_host(x_ref)):.4e}", + f"{np.linalg.norm(get_host(results['x']) - x_ref):.4e}", ) print_msg("\n--- Finished ---") diff --git a/examples/gst_small/reference_outputs/theta_ref.npy b/examples/gst_small/reference_outputs/theta_ref.npy index 983e4c62..113200b7 100644 --- a/examples/gst_small/reference_outputs/theta_ref.npy +++ b/examples/gst_small/reference_outputs/theta_ref.npy @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:303281b0ded419b21216bd0cefa983b10ed2b7fbea6df2110df2706807f52ff9 +oid sha256:5ba7d2f753dbe60aca3f47b2220a826eb436a0e96cf83eb592eb925e9a96b444 size 160 diff --git a/examples/gst_small/run.py b/examples/gst_small/run.py index 2c36f753..348e828b 100644 --- a/examples/gst_small/run.py +++ b/examples/gst_small/run.py @@ -11,7 +11,7 @@ from dalia.core.model import Model from dalia.core.dalia import DALIA from dalia.submodels import RegressionSubModel, SpatioTemporalSubModel -from dalia.utils import get_host, print_msg, extract_diagonal +from dalia.utils import get_host, print_msg, plot_marginal_distributions_hp from examples_utils.parser_utils import parse_args BASE_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -26,6 +26,7 @@ "type": "spatio_temporal", "input_dir": f"{BASE_DIR}/inputs_spatio_temporal", "spatial_domain_dimension": 2, + # These hyperparameters are in the internal scale (dalia.py/BFGS) "r_s": 0, "r_t": 0, "sigma_st": 0, @@ -52,12 +53,13 @@ likelihood_dict = { "type": "gaussian", "prec_o": 4, - # "prior_hyperparameters": {"type": "gaussian", "mean": 1.4, "precision": 0.5}, - "prior_hyperparameters": { - "type": "penalized_complexity", - "alpha": 0.01, - "u": 4, - }, + "prior_hyperparameters": {"type": "gamma", "alpha": 2.0, "beta": 2.0}, + #"prior_hyperparameters": {"type": "gaussian", "mean": 1.4, "precision": 0.5}, + # "prior_hyperparameters": { + # "type": "penalized_complexity", + # "alpha": 0.01, + # "u": 4, + # }, } # Creation of the model by combining the submodels and the likelihood @@ -74,7 +76,7 @@ "max_iter": args.max_iter, "gtol": 1e-3, "disp": True, - "maxcor": len(model.theta), + "maxcor": len(model.theta_external), }, "f_reduction_tol": 1e-3, "theta_reduction_tol": 1e-4, @@ -91,43 +93,48 @@ results = dalia.run() print_msg("\n--- Results ---") + theta_ref = np.load(f"{BASE_DIR}/reference_outputs/theta_ref.npy") + print_msg("Theta values:\n", results["theta"]) - print_msg("Covariance of theta:\n", results["cov_theta"]) - print_msg( - "Mean of the fixed effects:\n", - results["x"][-model.submodels[-1].n_fixed_effects :], - ) + print_msg("Theta values internal:\n", results["theta_internal"]) + print_msg("Covariance of theta:\n", results["cov_theta_internal"]) print_msg("\n--- Comparisons ---") # Compare hyperparameters - theta_ref = np.load(f"{BASE_DIR}/reference_outputs/theta_ref.npy") print_msg( "Norm (theta - theta_ref): ", - f"{np.linalg.norm(results['theta'] - get_host(theta_ref)):.4e}", + f"{np.linalg.norm(get_host(results["theta"]) - theta_ref):.4e}", ) # Compare latent parameters x_ref = np.load(f"{BASE_DIR}/reference_outputs/x_ref.npy") print_msg( "Norm (x - x_ref): ", - f"{np.linalg.norm(results['x'] - get_host(x_ref)):.4e}", + f"{np.linalg.norm(get_host(results['x']) - x_ref):.4e}", ) # Compare marginal variances of latent parameters - var_latent_params = results["marginal_variances_latent"] + var_latent_params = get_host(results["marginal_variances_latent"]) + dalia.model.theta_internal = results["theta_internal"] Qconditional = dalia.model.construct_Q_conditional(eta=model.a @ model.x) Qinv_ref = xp.linalg.inv(Qconditional.toarray()) print_msg( "Norm (marg var latent - ref): ", - f"{np.linalg.norm(var_latent_params - xp.diag(Qinv_ref)):.4e}", + f"{np.linalg.norm(var_latent_params - get_host(xp.diag(Qinv_ref))):.4e}", ) - # Compare marginal variances of observations - # var_obs = dalia.get_marginal_variances_observations(theta=theta_ref, x_star=x_ref) - # var_obs_ref = extract_diagonal(model.a @ Qinv_ref @ model.a.T) - # print_msg( - # "Norm (var_obs - var_obs_ref): ", - # f"{xp.linalg.norm(var_obs - var_obs_ref):.4e}", - # ) + print_msg("\n--- Marginal distributions of the hyperparameters ---") + marginals_hp = dalia.marginal_distributions_hp() + + fig, axes = plot_marginal_distributions_hp(marginals_hp) + import matplotlib.pyplot as plt + plt.savefig("gst_small_marginal_distributions_hp.png") + + prec_obs = marginals_hp['hyperparameters']['prec_o'] + quantile_pairs = prec_obs['quantiles']['external']['pairs'] + print("Quantile pairs of prec_o:") + for p, q in quantile_pairs: + print(f" {p:.3f} quantile: {q:.4f}") + print_msg("\n--- Finished ---") diff --git a/examples/p_ar1/generate_data.py b/examples/p_ar1/generate_data.py new file mode 100644 index 00000000..bdc62a92 --- /dev/null +++ b/examples/p_ar1/generate_data.py @@ -0,0 +1,92 @@ +import os +import sys + +import numpy as np +import scipy.sparse as sp + +from scipy.stats import multivariate_normal, poisson + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + +if __name__ == "__main__": + + n = 1000 + + ## define priors + s2 = 1 # 0.7 + tau = 1 / s2 + ### note: phi between -1 and 1 for discrete timesteps + # (doesn't make sense for negative in cts case) + + ## rescale beta prior 2 theta - 1 + phi = 0.9 # 0.9 + theta_original = [phi, tau] + + denom = s2 * (1 - phi**2) + + diag = [(1 + phi**2) / denom] * n + diag[0] = diag[-1] = 1 / denom + off_diag = [-phi / denom] * (n - 1) + + Q = sp.diags([diag, off_diag, off_diag], [0, -1, 1]) + L = np.linalg.cholesky(Q.toarray()) + Cov = np.linalg.inv(Q.toarray()) + + print(Q.toarray()[:6, :6]) + print(np.linalg.inv(Q.toarray())[:6, :6]) + print(np.round(Q.toarray() @ np.linalg.inv(Q.toarray()), 6)[:6, :6]) + # exit() + + mv = multivariate_normal(mean=np.zeros(n), cov=Cov, seed=3) + + intercept = 2 + u = mv.rvs() + print("u: ", u[:10]) + eta = u + intercept + x = np.concatenate((u, [intercept])) + + os.makedirs("reference_outputs", exist_ok=True) + np.save("reference_outputs/x_original.npy", x) + np.save("reference_outputs/theta_original.npy", theta_original) + + x_initial = u + np.random.normal(0, 0.3, size=len(u)) + os.makedirs("inputs_ar1", exist_ok=True) + np.save("inputs_ar1/x.npy", u) + + a_ar1 = sp.eye(n) + sp.save_npz("inputs_ar1/a.npz", a_ar1) + + a_regression = sp.csr_matrix(np.ones((n, 1))) + os.makedirs("inputs_regression", exist_ok=True) + sp.save_npz("inputs_regression/a.npz", a_regression) + + print("eta: ", eta[:10]) + np.save("inputs_ar1/x_original.npy", eta) + + # sample with repitition + E = np.random.choice([1, 2, 3], size=n, replace=True) + # E = [1] * n + np.save("e.npy", E) + + y = poisson.rvs(E * np.exp(eta), random_state=3) + np.save("y.npy", y) + + print("y[:10]: ", y[:10]) + + Qprior = sp.block_diag([Q, sp.csr_matrix([[0.001]])]) + # print("Qprior : \n", Qprior.toarray()) + + # a = sp.hstack([a_ar1, a_regression]) + # Qcond = Qprior + obs_noise_prec * a.T @ a + # print("Qcond: \n", Qcond.toarray()) + + # b = obs_noise_prec * a.T @ y + # # -xp.exp(theta) * (eta - y) + # # beta_initial + np.linalg.solve( + # # Qconditional.toarray(), information_vector + # # ) + # x_est = np.linalg.solve(Qcond.toarray(), b) + # print("x_est: ", x_est) + + # print("eta est : ", a @ x_est) + # print("eta : ", a @ x) diff --git a/examples/p_ar1/run.py b/examples/p_ar1/run.py new file mode 100644 index 00000000..730c437a --- /dev/null +++ b/examples/p_ar1/run.py @@ -0,0 +1,109 @@ +import sys +import os + +parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +sys.path.append(parent_dir) + +import numpy as np + +from dalia import xp +from dalia.configs import likelihood_config, dalia_config, submodels_config +from dalia.core.model import Model +from dalia.core.dalia import DALIA +from dalia.submodels import AR1SubModel, RegressionSubModel +from dalia.utils import get_host, print_msg # , extract_diagonal + + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + +if __name__ == "__main__": + n = 1000 + + # load reference output + theta_original = np.load(f"{BASE_DIR}/reference_outputs/theta_original.npy") + print("theta original: ", theta_original) + + x_original = np.load(f"{BASE_DIR}/reference_outputs/x_original.npy") + print("x original: ", x_original[:10]) + print("dim(x original): ", x_original.shape) + + ar1_dict = { + "type": "ar1", + "input_dir": f"{BASE_DIR}/inputs_ar1", + "phi": 0.45, # has to be between 0 and 1 + "tau": 0.5, # precision + "ph_phi": {"type": "beta", "alpha": 5.0, "beta": 1.0}, + "ph_tau": {"type": "gamma", "alpha": 2.0, "beta": 0.5}, + } + ar1 = AR1SubModel( + config=submodels_config.parse_config(ar1_dict), + ) + + # Configurations of the regression submodel + regression_dict = { + "type": "regression", + "input_dir": f"{BASE_DIR}/inputs_regression", + "n_fixed_effects": 1, + "fixed_effects_prior_precision": 0.001, + } + regression = RegressionSubModel( + config=submodels_config.parse_config(regression_dict), + ) + + likelihood_dict = { + "type": "poisson", + "input_dir": f"{BASE_DIR}", + } + + model = Model( + submodels=[ar1, regression], + likelihood_config=likelihood_config.parse_config(likelihood_dict), + ) + print_msg(model) + + Qprior = model.construct_Q_prior() + print("Qprior: \n", Qprior.toarray()) + Qinv = np.linalg.inv(Qprior.toarray()) + geom_mean = np.exp(np.mean(np.log(Qinv.diagonal()))) + print("Geometric mean of Qinv diagonal: ", geom_mean) + + eta = model.a @ model.x + Qcond = model.construct_Q_conditional(eta=eta) + + # L = np.linalg.cholesky(Qcond.toarray()) + + # plt.spy(Qcond, markersize=2) + # plt.title("Sparsity pattern of Qcond") + # plt.show() + + # Configurations of DALIA + dalia_dict = { + "solver": {"type": "dense"}, + "minimize": { + "max_iter": 100, + "gtol": 1e-3, + "disp": True, + "maxcor": len(model.theta_external), + }, + "f_reduction_tol": 1e-3, + "theta_reduction_tol": 1e-4, + "inner_iteration_max_iter": 50, + "eps_inner_iteration": 1e-3, + "eps_gradient_f": 1e-3, + "simulation_dir": ".", + } + + dalia = DALIA( + model=model, + config=dalia_config.parse_config(dalia_dict), + ) + + results = dalia.minimize() + + theta_user = results["theta"] + print("theta_ref: ", theta_original) + print("theta user: ", theta_user) + print("norm(theta_original - theta_user): ", np.linalg.norm(theta_original - get_host(theta_user))) + print("norm(x_original - x): ", np.linalg.norm(x_original - get_host(results["x"]))) + print("normalized norm: ", np.linalg.norm(x_original - get_host(results["x"])) / np.linalg.norm(x_original)) + diff --git a/examples/run_example_alex_fau.sh b/examples/run_example_alex_fau.sh index 90f872c0..6450207a 100644 --- a/examples/run_example_alex_fau.sh +++ b/examples/run_example_alex_fau.sh @@ -38,34 +38,44 @@ source ../scripts/dalia_job_utils.sh && dalia_set_perfenv && dalia_print_job_con # solver. The default is 1. The maximum number of processes is # `--max_iter` : The maximum number of iterations of the minimization. -# --- Run Regression Example --- -echo "Regression Example..." -srun python ./gr/run.py --max_iter 100 +# --- Brainiac Example --- +srun python ./brainiac/run.py --max_iter 100 -# --- Run Spatial Examples --- -# echo "Spatial Example (small)..." +# --- Gaussian AR1 Example --- +# srun python ./g_ar1/run.py --max_iter 100 + +# --- Gaussian Regression Example --- +# srun python ./gr/run.py --max_iter 100 + +# --- Gaussian Spatial Coregional 2 Models (Small) Example --- +# srun python ./gs_coreg2_small/run.py --max_iter 100 + +# --- Gaussian Spatial Coregional 3 Models (Small) Example --- +# srun python ./gs_coreg3_small/run.py --max_iter 100 + +# --- Gaussian Spatial Model (Small) Example --- # srun python ./gs_small/run.py --max_iter 100 -# --- Run Spatio-temporal Examples --- -# echo "Spatio-temporal Example (small)..." -# srun python ./gst_small/run.py --solver_min_p 1 --max_iter 100 +# --- Gaussian Spatio-temporal Coregional 2 Models (Small) Example --- +# srun python ./gst_coreg2_small/run.py --solver_min_p 1 --max_iter 100 -# echo "Spatio-temporal Example (medium)..." -# srun python ./gst_medium/run.py --solver_min_p 1 --max_iter 100 +# --- Gaussian Spatio-temporal Coregional 3 Models (Small) Example --- +# srun python ./gst_coreg3_small/run.py --solver_min_p 1 --max_iter 100 -# echo "Spatio-temporal Example (large)..." +# --- Gaussian Spatio-temporal Model (Large) Example --- # srun python ./gst_large/run.py --solver_min_p 1 --max_iter 100 -# --- Run Coregional (Spatial) Examples --- -# echo "Coregional Spatial Example (2 models)..." -# srun python ./gs_coreg2_small/run.py --max_iter 100 +# --- Gaussian Spatio-temporal Model (Medium) Example --- +# srun python ./gst_medium/run.py --solver_min_p 1 --max_iter 100 -# echo "Coregional Spatial Example (3 models)..." -# srun python ./gs_coreg3_small/run.py --max_iter 100 +# --- Gaussian Spatio-temporal Model (Small) Example --- +# srun python ./gst_small/run.py --solver_min_p 1 --max_iter 100 -# --- Run Coregional (Spatio-temporal) Examples --- -# echo "Coregional Spatio-temporal Example (2 models)..." -# srun python ./gst_coreg2_small/run.py --solver_min_p 1 --max_iter 100 +# --- Poisson AR1 Example --- +# srun python ./p_ar1/run.py --max_iter 100 + +# --- Poisson Regression Example --- +# srun python ./pr/run.py --max_iter 100 -# echo "Coregional Spatio-temporal Example (3 models)..." -# srun python ./gst_coreg3_small/run.py --solver_min_p 1 --max_iter 100 \ No newline at end of file +# --- Poisson Spatio-temporal Model (Small) Example --- +# srun python ./pst_small/run.py --solver_min_p 1 --max_iter 100 \ No newline at end of file diff --git a/examples/run_example_daint_cscs.sh b/examples/run_example_daint_cscs.sh index 74fa6885..bbb47e96 100644 --- a/examples/run_example_daint_cscs.sh +++ b/examples/run_example_daint_cscs.sh @@ -40,34 +40,44 @@ source ../scripts/dalia_job_utils.sh && dalia_set_perfenv && dalia_print_job_con # solver. The default is 1. The maximum number of processes is # `--max_iter` : The maximum number of iterations of the minimization. -# --- Run Regression Example --- -echo "Regression Example..." -srun python ./gr/run.py --max_iter 100 +# --- Brainiac Example --- +srun python ./brainiac/run.py --max_iter 100 -# --- Run Spatial Examples --- -#echo "Spatial Example (small)..." -#srun python ./gs_small/run.py --max_iter 100 +# --- Gaussian AR1 Example --- +# srun python ./g_ar1/run.py --max_iter 100 -# --- Run Spatio-temporal Examples --- -# echo "Spatio-temporal Example (small)..." -# srun python ./gst_small/run.py --solver_min_p 1 --max_iter 100 +# --- Gaussian Regression Example --- +# srun python ./gr/run.py --max_iter 100 -# echo "Spatio-temporal Example (medium)..." -# srun python ./gst_medium/run.py --solver_min_p 1 --max_iter 100 +# --- Gaussian Spatial Coregional 2 Models (Small) Example --- +# srun python ./gs_coreg2_small/run.py --max_iter 100 + +# --- Gaussian Spatial Coregional 3 Models (Small) Example --- +# srun python ./gs_coreg3_small/run.py --max_iter 100 + +# --- Gaussian Spatial Model (Small) Example --- +# srun python ./gs_small/run.py --max_iter 100 + +# --- Gaussian Spatio-temporal Coregional 2 Models (Small) Example --- +# srun python ./gst_coreg2_small/run.py --solver_min_p 1 --max_iter 100 -#echo "Spatio-temporal Example (large)..." -#srun python ./gst_large/run.py --solver_min_p 1 --max_iter 100 +# --- Gaussian Spatio-temporal Coregional 3 Models (Small) Example --- +# srun python ./gst_coreg3_small/run.py --solver_min_p 1 --max_iter 100 -# --- Run Coregional (Spatial) Examples --- -#echo "Coregional Spatial Example (2 models)..." -#srun python ./gs_coreg2_small/run.py --max_iter 100 +# --- Gaussian Spatio-temporal Model (Large) Example --- +# srun python ./gst_large/run.py --solver_min_p 1 --max_iter 100 + +# --- Gaussian Spatio-temporal Model (Medium) Example --- +# srun python ./gst_medium/run.py --solver_min_p 1 --max_iter 100 + +# --- Gaussian Spatio-temporal Model (Small) Example --- +# srun python ./gst_small/run.py --solver_min_p 1 --max_iter 100 -#echo "Coregional Spatial Example (3 models)..." -#srun python ./gs_coreg3_small/run.py --max_iter 100 +# --- Poisson AR1 Example --- +# srun python ./p_ar1/run.py --max_iter 100 -# --- Run Coregional (Spatio-temporal) Examples --- -#echo "Coregional Spatio-temporal Example (2 models)..." -#srun python ./gst_coreg2_small/run.py --solver_min_p 1 --max_iter 100 +# --- Poisson Regression Example --- +# srun python ./pr/run.py --max_iter 100 -#echo "Coregional Spatio-temporal Example (3 models)..." -#srun python ./gst_coreg3_small/run.py --solver_min_p 1 --max_iter 100 +# --- Poisson Spatio-temporal Model (Small) Example --- +# srun python ./pst_small/run.py --solver_min_p 1 --max_iter 100 \ No newline at end of file diff --git a/examples/run_example_fritz_fau.sh b/examples/run_example_fritz_fau.sh index 07d4330a..fa1d414b 100644 --- a/examples/run_example_fritz_fau.sh +++ b/examples/run_example_fritz_fau.sh @@ -36,34 +36,44 @@ source ../scripts/dalia_job_utils.sh && dalia_set_perfenv && dalia_print_job_con # solver. The default is 1. The maximum number of processes is # `--max_iter` : The maximum number of iterations of the minimization. -# --- Run Regression Example --- -echo "Regression Example..." -srun python ./gr/run.py --max_iter 100 +# --- Brainiac Example --- +srun python ./brainiac/run.py --max_iter 100 -# --- Run Spatial Examples --- -# echo "Spatial Example (small)..." +# --- Gaussian AR1 Example --- +# srun python ./g_ar1/run.py --max_iter 100 + +# --- Gaussian Regression Example --- +# srun python ./gr/run.py --max_iter 100 + +# --- Gaussian Spatial Coregional 2 Models (Small) Example --- +# srun python ./gs_coreg2_small/run.py --max_iter 100 + +# --- Gaussian Spatial Coregional 3 Models (Small) Example --- +# srun python ./gs_coreg3_small/run.py --max_iter 100 + +# --- Gaussian Spatial Model (Small) Example --- # srun python ./gs_small/run.py --max_iter 100 -# --- Run Spatio-temporal Examples --- -# echo "Spatio-temporal Example (small)..." -# srun python ./gst_small/run.py --solver_min_p 1 --max_iter 100 +# --- Gaussian Spatio-temporal Coregional 2 Models (Small) Example --- +# srun python ./gst_coreg2_small/run.py --solver_min_p 1 --max_iter 100 -# echo "Spatio-temporal Example (medium)..." -# srun python ./gst_medium/run.py --solver_min_p 1 --max_iter 100 +# --- Gaussian Spatio-temporal Coregional 3 Models (Small) Example --- +# srun python ./gst_coreg3_small/run.py --solver_min_p 1 --max_iter 100 -# echo "Spatio-temporal Example (large)..." +# --- Gaussian Spatio-temporal Model (Large) Example --- # srun python ./gst_large/run.py --solver_min_p 1 --max_iter 100 -# --- Run Coregional (Spatial) Examples --- -# echo "Coregional Spatial Example (2 models)..." -# srun python ./gs_coreg2_small/run.py --max_iter 100 +# --- Gaussian Spatio-temporal Model (Medium) Example --- +# srun python ./gst_medium/run.py --solver_min_p 1 --max_iter 100 -# echo "Coregional Spatial Example (3 models)..." -# srun python ./gs_coreg3_small/run.py --max_iter 100 +# --- Gaussian Spatio-temporal Model (Small) Example --- +# srun python ./gst_small/run.py --solver_min_p 1 --max_iter 100 -# --- Run Coregional (Spatio-temporal) Examples --- -# echo "Coregional Spatio-temporal Example (2 models)..." -# srun python ./gst_coreg2_small/run.py --solver_min_p 1 --max_iter 100 +# --- Poisson AR1 Example --- +# srun python ./p_ar1/run.py --max_iter 100 + +# --- Poisson Regression Example --- +# srun python ./pr/run.py --max_iter 100 -# echo "Coregional Spatio-temporal Example (3 models)..." -# srun python ./gst_coreg3_small/run.py --solver_min_p 1 --max_iter 100 \ No newline at end of file +# --- Poisson Spatio-temporal Model (Small) Example --- +# srun python ./pst_small/run.py --solver_min_p 1 --max_iter 100 \ No newline at end of file diff --git a/examples/todo_utils_scripts/data_preprocessing_coreg.py b/examples/todo_utils_scripts/data_preprocessing_coreg.py deleted file mode 100644 index 2d4afae8..00000000 --- a/examples/todo_utils_scripts/data_preprocessing_coreg.py +++ /dev/null @@ -1,218 +0,0 @@ -# import math -import os - -# import matplotlib.pyplot as plt -import numpy as np -from matrix_utilities import ( # read_sym_CSC, - load_matrices_spatial_model_from_dat, - load_matrices_spatial_temporal_model_from_dat, - read_CSC, -) -from scipy.sparse import csc_matrix, save_npz # block_diag, - -# from dalia import sp, xp - -if __name__ == "__main__": - # get current path - path = os.path.dirname(__file__) - - num_vars = 2 - - type = "spatio-temporal" # "spatial" # - - ns = 354 - nt = 12 - - # add more shared fixed effects later - nb = num_vars - nb_per_var = 1 - - no1 = 2 * ns * nt - no2 = 2 * ns * nt - no3 = 0 # 2 * ns * nt - - dim_theta = 9 - - no_list = [no1, no2, no3] - total_obs = sum(no_list) - - n = num_vars * (ns * nt + nb_per_var) - - data_dir = f"../../../repositories/application/coregionalization_models/data/nv{num_vars}_ns{ns}_nt{nt}_nb{nb}" - - # load submatrices - if type == "spatial": - c0, g1, g2 = load_matrices_spatial_model_from_dat(ns, data_dir) - - elif type == "spatio-temporal": - c0, g1, g2, g3, M0, M1, M2 = load_matrices_spatial_temporal_model_from_dat( - ns, nt, data_dir - ) - else: - raise ValueError("Invalid model type") - - # load observation vectors - y1_file = f"{data_dir}/y1_{no1}_1.dat" - y1 = np.loadtxt(y1_file) - - y2_file = f"{data_dir}/y2_{no2}_1.dat" - y2 = np.loadtxt(y2_file) - - # load projection matrices - a1_file = f"{data_dir}/A1_{no1}_{ns*nt}.dat" - a1 = read_CSC(a1_file) - print("nnz(A1) = ", a1.nnz) - - # split a into random and fixed effects - a1_random = a1[:, : ns * nt] - a1_fixed = csc_matrix(np.ones((no_list[0], nb_per_var))) - - a2_file = f"{data_dir}/A2_{no1}_{ns*nt}.dat" - a2 = read_CSC(a2_file) - print("nnz(A2) = ", a1.nnz) - - a2_random = a2[:, : ns * nt] - a2_fixed = csc_matrix(np.ones((no_list[1], nb_per_var))) - - if num_vars == 3: - y3_file = f"{data_dir}/y3_{no3}_1.dat" - y3 = np.loadtxt(y3_file) - - a3_file = f"{data_dir}/A3_{no3}_{ns*nt}.dat" - a3 = read_CSC(a3_file) - - a3_random = a3[:, : ns * nt] - a3_fixed = csc_matrix(np.ones((no_list[2], nb_per_var))) - - # set path new data directory and create necessary folders - new_data_dir = f"{path}/inputs_nv{num_vars}_ns{ns}_nt{nt}_nb{nb}" - os.makedirs(new_data_dir, exist_ok=True) - print(f"Created directory {new_data_dir}") - - # save matrices - if type == "spatial": - for i in range(num_vars): - os.makedirs(f"{new_data_dir}/model_{i+1}/inputs_spatial", exist_ok=True) - save_npz(f"{new_data_dir}/model_{i+1}/inputs_spatial/c0.npz", c0) - save_npz(f"{new_data_dir}/model_{i+1}/inputs_spatial/g1.npz", g1) - save_npz(f"{new_data_dir}/model_{i+1}/inputs_spatial/g2.npz", g2) - - os.makedirs(f"{new_data_dir}/model_{i+1}/inputs_regression", exist_ok=True) - - save_npz(f"{new_data_dir}/model_1/inputs_spatial/a.npz", a1_random) - save_npz(f"{new_data_dir}/model_2/inputs_spatial/a.npz", a2_random) - - save_npz(f"{new_data_dir}/model_1/inputs_regression/a.npz", a1_fixed) - save_npz(f"{new_data_dir}/model_2/inputs_regression/a.npz", a2_fixed) - - if num_vars == 3: - save_npz(f"{new_data_dir}/model_3/inputs_spatial/a.npz", a3_random) - save_npz(f"{new_data_dir}/model_3/inputs_regression/a.npz", a3_fixed) - - elif type == "spatio-temporal": - for i in range(num_vars): - os.makedirs( - f"{new_data_dir}/model_{i+1}/inputs_spatio_temporal", exist_ok=True - ) - save_npz(f"{new_data_dir}/model_{i+1}/inputs_spatio_temporal/c0.npz", c0) - save_npz(f"{new_data_dir}/model_{i+1}/inputs_spatio_temporal/g1.npz", g1) - save_npz(f"{new_data_dir}/model_{i+1}/inputs_spatio_temporal/g2.npz", g2) - save_npz(f"{new_data_dir}/model_{i+1}/inputs_spatio_temporal/g3.npz", g3) - - save_npz(f"{new_data_dir}/model_{i+1}/inputs_spatio_temporal/m0.npz", M0) - save_npz(f"{new_data_dir}/model_{i+1}/inputs_spatio_temporal/m1.npz", M1) - save_npz(f"{new_data_dir}/model_{i+1}/inputs_spatio_temporal/m2.npz", M2) - - os.makedirs(f"{new_data_dir}/model_{i+1}/inputs_regression/", exist_ok=True) - - save_npz(f"{new_data_dir}/model_1/inputs_spatio_temporal/a.npz", a1_random) - save_npz(f"{new_data_dir}/model_2/inputs_spatio_temporal/a.npz", a2_random) - - save_npz(f"{new_data_dir}/model_1/inputs_regression/a.npz", a1_fixed) - save_npz(f"{new_data_dir}/model_2/inputs_regression/a.npz", a2_fixed) - - if num_vars == 3: - save_npz(f"{new_data_dir}/model_3/inputs_spatio_temporal/a.npz", a3_random) - save_npz(f"{new_data_dir}/model_3/inputs_regression/a.npz", a3_fixed) - - np.save(f"{new_data_dir}/model_1/y.npy", y1) - np.save(f"{new_data_dir}/model_2/y.npy", y2) - - if num_vars == 3: - np.save(f"{new_data_dir}/model_3/y.npy", y3) - - # reference output - theta_ref_file = f"{data_dir}/theta_interpretS_original_{dim_theta}_1.dat" - theta_ref = np.loadtxt(theta_ref_file) - print(f"Reference theta: {theta_ref}") - - theta_ref_py_file = ( - f"{data_dir}/theta_interpretS_original_DALIA_perm_{dim_theta}_1.dat" - ) - theta_ref_py = np.loadtxt(theta_ref_py_file) - print(f"Reference theta (python order): {theta_ref_py}") - - # load reference x - x_ref_file = f"{data_dir}/x_original_{n}_1.dat" - x_ref = np.loadtxt(x_ref_file) - print("x_ref[:10]: ", x_ref[:10]) - - os.makedirs(f"{new_data_dir}/reference_outputs", exist_ok=True) - - np.save(f"{new_data_dir}/reference_outputs/theta_ref.npy", theta_ref_py) - # np.savetxt( - # f"{new_data_dir}/reference_outputs/theta_interpretS_original_DALIA_perm_{dim_theta}_1.dat", - # theta_ref_py, - # ) - - # save reference x - # np.savetxt(f"{new_data_dir}/reference_outputs/x_original_{n}_1.dat", x_ref) - np.save(f"{new_data_dir}/reference_outputs/x_ref.npy", x_ref) - - print("num vars: ", num_vars) - print("type: ", type) - - # # reorder theta to python format - # if type == "spatial" and num_vars == 3: - # # old order ['sigma_1' , 'r_s1', 'sigma_2', 'r_s2', 'sigma_3', 'r_s3', 'lambda_0_1', 'lambda_0_2', 'lambda_1_2' 'prec_o1', 'prec_o2', 'prec_o3'] - # # new order ['r_s1', 'prec_o1', 'r_s2', 'prec_o2', 'r_s3', 'prec_o3', 'sigma_1', 'sigma_2', 'sigma_3', 'lambda_0_1', 'lambda_0_2', 'lambda_1_2'] - # theta_py = np.array([ - # theta_ref[1], # r_s1 - # theta_ref[9], # prec_o1 - # theta_ref[3], # r_s2 - # theta_ref[10], # prec_o2 - # theta_ref[5], # r_s3 - # theta_ref[11], # prec_o3 - # theta_ref[0], # sigma_1 (was sigma_0 in the previous mapping) - # theta_ref[2], # sigma_2 (was sigma_1 in the previous mapping) - # theta_ref[4], # sigma_3 - # theta_ref[6], # lambda_0_1 - # theta_ref[7], # lambda_0_2 - # theta_ref[8], # lambda_1_2 - # ]) - - # elif num_vars == 2 and type == "spatio-temporal": - # # old order ['sigma_1' , 'r_s1', 'r_t1', 'sigma_2', 'r_s2', 'r_t2', 'prec_o1', 'prec_o2', 'lambda_0_1'] - # # new order ['r_s1', 'r_t1', 'prec_o1', 'r_s2', 'r_t2', 'prec_o2', 'sigma_1', 'sigma_2', 'lambda_0_1'] - # theta_py = np.array([ - # theta_ref[1], # r_s1 - # theta_ref[2], # r_t1 - # theta_ref[6], # prec_o1 - # theta_ref[4], # r_s2 - # theta_ref[5], # r_t2 - # theta_ref[7], # prec_o2 - # theta_ref[0], # sigma_1 - # theta_ref[3], # sigma_2 - # theta_ref[8], # lambda_0_1 - # ]) - - # else: - # raise ValueError("Invalid model type") - - # print(f"theta R: {theta_ref}") - # print(f"Reordered theta: {theta_py}") - - # save reordered theta - # theta_py_file = f"{new_data_dir}/reference_outputs/theta_interpretS_original_DALIA_perm_{len(theta_py)}_1.dat" - # np.savetxt(theta_py_file, theta_py) - # print(f"Saved reordered theta to {theta_py_file}") diff --git a/scripts/daint_cscs_utils.sh b/scripts/daint_cscs_utils.sh index bcf9dd22..aeb10b1b 100644 --- a/scripts/daint_cscs_utils.sh +++ b/scripts/daint_cscs_utils.sh @@ -842,6 +842,10 @@ daint_create_conda_env() { if MPICC=$(which mpicc) python -m pip install --no-cache-dir --no-binary=mpi4py mpi4py; then echo " Successfully installed mpi4py in MPI-enhanced environment." + # Update the libcxx given Daint NCCL modules requirements + conda update libstdcxx-ng + conda install -c conda-forge libstdcxx-ng + # Try to import nccl to ensure it's available echo " Verifying NCCL installation in MPI-enhanced environment..." if python -c "from cupy.cuda import nccl; nccl.get_unique_id()"; then @@ -1037,7 +1041,7 @@ daint_set_perfenv() { export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK export CUDA_VISIBLE_DEVICES=$SLURM_LOCALID - export MPICH_GPU_SUPPORT_ENABLED=1 + export MPICH_GPU_SUPPORT_ENABLED=0 # NCCL Performance Configuration # More can be found: https://docs.cscs.ch/software/communication/nccl/#using-nccl @@ -1060,8 +1064,12 @@ daint_set_perfenv() { # performance on the Alps network across a wide range of applications. Specific # applications may perform better with other values. export FI_CXI_DEFAULT_CQ_SIZE=131072 - export FI_CXI_DEFAULT_TX_SIZE=32768 + export FI_CXI_DEFAULT_TX_SIZE=16384 export FI_CXI_DISABLE_HOST_REGISTER=1 export FI_CXI_RX_MATCH_MODE=software export FI_MR_CACHE_MONITOR=userfaultfd + + export FI_CXI_RDZV_GET_MIN=0 + export FI_CXI_RDZV_THRESHOLD=0 + export FI_CXI_RDZV_EAGER_SIZE=0 } \ No newline at end of file diff --git a/scripts/dalia_job_utils.sh b/scripts/dalia_job_utils.sh index eef792e8..070e5543 100644 --- a/scripts/dalia_job_utils.sh +++ b/scripts/dalia_job_utils.sh @@ -2,10 +2,10 @@ dalia_set_perfenv() { echo "dalia_set_perfenv: setting up DALIA performance environment variables." - export ARRAY_MODULE=cupy - export MPI_CUDA_AWARE=0 - export USE_NCCL=0 - export MPICH_GPU_SUPPORT_ENABLED=0 + export ARRAY_MODULE=numpy # numpy or cupy + export MPI_CUDA_AWARE=0 # 0 or 1 + export USE_NCCL=0 # 0 or 1 + export MPICH_GPU_SUPPORT_ENABLED=0 # 0 or 1 echo "DALIA Environment Configuration:" echo " - ARRAY_MODULE: ${ARRAY_MODULE}" echo " - MPI_CUDA_AWARE: ${MPI_CUDA_AWARE}" diff --git a/src/dalia/configs/dalia_config.py b/src/dalia/configs/dalia_config.py index bceb97bc..27f97aaf 100644 --- a/src/dalia/configs/dalia_config.py +++ b/src/dalia/configs/dalia_config.py @@ -56,6 +56,10 @@ class DaliaConfig(BaseModel): simulation_dir: Path = Path("./dalia/") output_dir: Path = Path.joinpath(simulation_dir, "output/") + # --- Verbosity level ------------------------------------------------------ + verbosity: int = 0 # 0: minimal, 1: more info + + def parse_config(config: dict | str) -> DaliaConfig: if isinstance(config, str): diff --git a/src/dalia/configs/likelihood_config.py b/src/dalia/configs/likelihood_config.py index 33abf64a..f55ef66c 100644 --- a/src/dalia/configs/likelihood_config.py +++ b/src/dalia/configs/likelihood_config.py @@ -4,7 +4,7 @@ from abc import ABC, abstractmethod from typing import Literal -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, PositiveFloat from dalia.__init__ import ArrayLike, xp from dalia.configs.priorhyperparameters_config import ( @@ -30,7 +30,9 @@ def read_hyperparameters(self) -> tuple[ArrayLike, list]: ... class GaussianLikelihoodConfig(LikelihoodConfig): - prec_o: float = None # Observation precision + prec_o: PositiveFloat = ( + 4.0 # Observation precision, has to be positive, offer initial guess + ) def read_hyperparameters(self): if self.fix_hyperparameters: diff --git a/src/dalia/configs/priorhyperparameters_config.py b/src/dalia/configs/priorhyperparameters_config.py index 7a58529c..b1ccd933 100644 --- a/src/dalia/configs/priorhyperparameters_config.py +++ b/src/dalia/configs/priorhyperparameters_config.py @@ -13,7 +13,9 @@ class PriorHyperparametersConfig(BaseModel): model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True) - type: Literal["gaussian", "penalized_complexity", "beta", "gaussian_mvn"] = None + type: Literal[ + "gaussian", "penalized_complexity", "beta", "gaussian_mvn", "gamma", "inverse_gamma" + ] = None class GaussianPriorHyperparametersConfig(PriorHyperparametersConfig): @@ -44,15 +46,27 @@ class BetaPriorHyperparametersConfig(PriorHyperparametersConfig): beta: float = None +class GammaPriorHyperparametersConfig(PriorHyperparametersConfig): + alpha: float = None + beta: float = None + +class InverseGammaPriorHyperparametersConfig(PriorHyperparametersConfig): + alpha: float = None + beta: float = None + + def parse_config(config: dict) -> PriorHyperparametersConfig: prior_type = config.get("type") if prior_type == "gaussian": return GaussianPriorHyperparametersConfig(**config) - elif prior_type == "gaussian_mvn": + if prior_type == "gaussian_mvn": return GaussianMVNPriorHyperparametersConfig(**config) - elif prior_type == "penalized_complexity": + if prior_type == "penalized_complexity": return PenalizedComplexityPriorHyperparametersConfig(**config) - elif prior_type == "beta": + if prior_type == "beta": return BetaPriorHyperparametersConfig(**config) - else: - raise ValueError(f"Unknown prior hyperparameters config type: {prior_type}") + if prior_type == "gamma": + return GammaPriorHyperparametersConfig(**config) + if prior_type == "inverse_gamma": + return InverseGammaPriorHyperparametersConfig(**config) + raise ValueError(f"Unknown prior hyperparameters config type: {prior_type}") diff --git a/src/dalia/configs/submodels_config.py b/src/dalia/configs/submodels_config.py index 038563c5..960480a4 100644 --- a/src/dalia/configs/submodels_config.py +++ b/src/dalia/configs/submodels_config.py @@ -4,7 +4,7 @@ from abc import ABC, abstractmethod from typing import Literal -from pydantic import BaseModel, ConfigDict, Field, PositiveInt +from pydantic import BaseModel, ConfigDict, Field, PositiveInt, PositiveFloat from typing_extensions import Annotated from dalia.__init__ import ArrayLike, xp @@ -16,7 +16,6 @@ from dalia.configs.priorhyperparameters_config import ( parse_config as parse_priorhyperparameters_config, ) -from dalia.utils import scaled_logit class SubModelConfig(BaseModel, ABC): @@ -24,20 +23,63 @@ class SubModelConfig(BaseModel, ABC): # Input folder for this specific submodel input_dir: str = None - type: Literal["spatio_temporal", "spatial", "regression", "brainiac"] = None + type: Literal[ + "spatio_temporal", "spatial", "regression", "brainiac", "ar1", "generic" + ] = None @abstractmethod def read_hyperparameters(self) -> tuple[ArrayLike, list]: ... class RegressionSubModelConfig(SubModelConfig): - n_fixed_effects: Annotated[int, Field(strict=True, ge=1)] = 1 + n_fixed_effects: Annotated[int | None, Field(strict=True, ge=1)] = None fixed_effects_prior_precision: float = 0.001 def read_hyperparameters(self): return xp.array([]), [] +class GenericSubModelConfig(SubModelConfig): + tau: PositiveFloat = ( + 4.0 # Precision of the Gaussian prior on the latent parameters, offer initial guess, has to be positive + ) + + ph_tau: PriorHyperparametersConfig = None + + def read_hyperparameters(self): + theta = xp.array([self.tau]) + theta_keys = ["tau"] + + return theta, theta_keys + + +class AR1SubModelConfig(SubModelConfig): + + ## prior on phi + phi: float = None # AR(1) coefficient + phi_scaled: float = None + ph_phi: PriorHyperparametersConfig = None + ## check that phi is between -1 and 1 (use pc prior) + # check inla.doc("pc.cor1") + + ## either define tau or sigma2 + tau: PositiveFloat = None # Precision + # sigma2: float = None # Marginal variance + + ph_tau: PriorHyperparametersConfig = None + # ph_sigma2: PriorHyperparametersConfig = None + + def read_hyperparameters(self): + + # input of phi is in (0,1), rescale to -/+ INF + # self.phi_scaled = scaled_logit(self.phi, direction="forward") + theta = xp.array([self.phi, self.tau]) + # theta_internal = xp.array([self.phi, self.tau]) + theta_keys = ["phi", "tau"] + + return theta, theta_keys + + class SpatioTemporalSubModelConfig(SubModelConfig): spatial_domain_dimension: PositiveInt = 2 @@ -90,10 +132,7 @@ class BrainiacSubModelConfig(SubModelConfig): ph_alpha: GaussianMVNPriorHyperparametersConfig = None def read_hyperparameters(self): - # input of h2 is in (0,1), rescale to -/+ INF - self.h2_scaled = scaled_logit(self.h2, direction="forward") - - theta = xp.concatenate([xp.array([self.h2_scaled]), xp.array(self.alpha)]) + theta = xp.concatenate([xp.array([self.h2]), xp.array(self.alpha)]) theta_keys = ["h2"] + [f"alpha_{i}" for i in range(len(self.alpha))] return theta, theta_keys @@ -103,23 +142,29 @@ def parse_config(config: dict | str) -> SubModelConfig: if isinstance(config, str): with open(config, "rb") as f: config = tomllib.load(f) - - type = config.get("type") - if type == "spatio_temporal": + model_type = config.get("type") + if model_type == "spatio_temporal": config["ph_s"] = parse_priorhyperparameters_config(config["ph_s"]) config["ph_t"] = parse_priorhyperparameters_config(config["ph_t"]) config["ph_st"] = parse_priorhyperparameters_config(config["ph_st"]) return SpatioTemporalSubModelConfig(**config) - elif type == "spatial": + if model_type == "spatial": config["ph_s"] = parse_priorhyperparameters_config(config["ph_s"]) config["ph_e"] = parse_priorhyperparameters_config(config["ph_e"]) return SpatialSubModelConfig(**config) - elif type == "regression": + if model_type == "regression": return RegressionSubModelConfig(**config) - elif type == "brainiac": + if model_type == "brainiac": config["ph_h2"] = parse_priorhyperparameters_config(config["ph_h2"]) config["ph_alpha"] = parse_priorhyperparameters_config(config["ph_alpha"]) return BrainiacSubModelConfig(**config) + if model_type == "ar1": + config["ph_tau"] = parse_priorhyperparameters_config(config["ph_tau"]) + config["ph_phi"] = parse_priorhyperparameters_config(config["ph_phi"]) + return AR1SubModelConfig(**config) + elif model_type == "generic": + config["ph_tau"] = parse_priorhyperparameters_config(config["ph_tau"]) + return GenericSubModelConfig(**config) # Add more elif branches for other submodel types else: - raise ValueError(f"Unknown submodel type: {type}") + raise ValueError(f"Unknown submodel type: {model_type}") diff --git a/src/dalia/core/dalia.py b/src/dalia/core/dalia.py index 761a7e00..4ff13c35 100644 --- a/src/dalia/core/dalia.py +++ b/src/dalia/core/dalia.py @@ -4,6 +4,7 @@ from scipy import optimize from tabulate import tabulate +import copy from dalia import ArrayLike, NDArray, backend_flags, comm_rank, comm_size, sp, xp from dalia.configs.dalia_config import DaliaConfig @@ -26,6 +27,8 @@ smartsplit, synchronize, synchronize_gpu, + check_vector_consistency, + bcast, ) if backend_flags["mpi_avail"]: @@ -75,6 +78,8 @@ def __init__( self.eps_gradient_f = self.config.eps_gradient_f self.eps_hessian_f = self.config.eps_hessian_f + self.verbosity = self.config.verbosity + # --- Configure HPC set_device(comm_rank, comm_size) @@ -200,17 +205,22 @@ def __init__( dtype=xp.float64, ) self.theta_mat = xp.zeros( - (self.model.theta.size, self.n_f_evaluations), dtype=xp.float64 + (self.model.theta_internal.size, self.n_f_evaluations), dtype=xp.float64 ) - self.theta_optimizer = xp.zeros_like(self.model.theta) - self.theta_optimizer[:] = self.model.theta + self.theta_optimizer = xp.zeros_like(self.model.theta_internal) + self.theta_optimizer[:] = self.model.theta_internal + self.theta_star = None # mode not yet computed + self.theta_star_internal = None + self.x_star = None # mode not yet computed + self.cov_theta_internal = None # covariance not yet computed # --- Metrics self.f_values: ArrayLike = [] - self.theta_values: ArrayLike = [] + self.theta_values_internal: ArrayLike = [] self.objective_function_time: ArrayLike = [] self.solver_time: ArrayLike = [] self.construction_time: ArrayLike = [] + self.accepted_iter = 0 # --- Timers self.t_construction_qprior = 0.0 @@ -304,16 +314,26 @@ def run(self) -> dict: # compute mode of the hyperparameters theta minimization_result = self.minimize() - theta_star = get_device(minimization_result["theta"]) - x_star = get_device(minimization_result["x"]) + self.theta_star = minimization_result["theta"] + self.theta_star_internal = minimization_result["theta_internal"] + self.x_star = minimization_result["x"] + + print("Finished the optimization procedure.") + + # need to update theta_star and x_star to be the same across all ranks + bcast(data=self.theta_star[:], root=0, comm=self.comm_world) + bcast(data=self.theta_star_internal[:], root=0, comm=self.comm_world) + bcast(data=self.x_star[:], root=0, comm=self.comm_world) # compute covariance of the hyperparameters theta at the mode - cov_theta = self.compute_covariance_hp(theta_star) + self.cov_theta_internal = self.compute_covariance_hp(self.theta_star) + print("Computed covariance of the hyperparameters at the mode.") # compute marginal variances of the latent parameters marginal_variances_latent = self.get_marginal_variances_latent_parameters( - theta_star, x_star + self.theta_star, self.x_star ) + print_msg("Computed marginal variances of the latent parameters.") # compute marginal variances of the observations # TODO: only run by default when dense multiplcation issue is fixed, see issue #78 @@ -324,14 +344,15 @@ def run(self) -> dict: # construct new dictionary with the results results = { "theta": minimization_result["theta"], - "theta_interpret": minimization_result["theta_interpret"], + "theta_internal": minimization_result["theta_internal"], "x": minimization_result["x"], "f": minimization_result["f"], "grad_f": minimization_result["grad_f"], "f_values": minimization_result["f_values"], "theta_values": minimization_result["theta_values"], - "cov_theta": cov_theta, + "cov_theta_internal": self.cov_theta_internal, "marginal_variances_latent": marginal_variances_latent, + "optimization_iterations": self.accepted_iter, # "marginal_variances_observations": get_host( # marginal_variances_observations # ), @@ -353,16 +374,27 @@ def minimize(self) -> optimize.OptimizeResult: minimization_result : scipy.optimize.OptimizeResult Result of the optimization procedure. """ + # Ensure that all ranks are initialized to the same theta + check_vector_consistency( + value=self.model.theta_external, + comm=self.comm_world, + flag="self.model.theta_external", + verbose="Full", + ) - if len(self.model.theta) == 0: + if len(self.model.theta_external) == 0: # Only run the inner iteration print_msg("No hyperparameters, just running inner iteration.") - self.f_value = self._evaluate_f(self.model.theta) + self.f_value = self._evaluate_f(self.model.theta_external) self.minimization_result: dict = { - "theta": self.model.theta, - "theta_interpret": self.model.get_theta_interpret(), - "x": self.model.x, # [self.model.inverse_permutation_latent_variables], - "f": self.f_value, + "theta_internal": copy.deepcopy(self.model.theta_internal), + "theta": copy.deepcopy(self.model.theta_external), + "x": copy.deepcopy(self.model.x), # [self.model.inverse_permutation_latent_variables], + "f": copy.deepcopy(self.f_value), + "grad_f": [], + "f_values": [], + ### these values are in internal scale (!!) + "theta_values": [], } else: @@ -391,12 +423,12 @@ def callback(intermediate_result: optimize.OptimizeResult): f"Iteration: {self.accepted_iter:2d} (took: {self.objective_function_time[-1]:.2f}) | " f"Theta: [{theta_str}] | " f"Function Value: {fun_i: .6f} | " - f"Gradient: [{gradient_str}] | ", + #f"Gradient: [{gradient_str}] | ", f"Norm(Grad): [{xp.linalg.norm(self.gradient_f): .6f}]", flush=True, ) - self.theta_values.append(theta_i) + self.theta_values_internal.append(theta_i) self.f_values.append(fun_i) # check if f_values have been decreasing over last iterations @@ -414,20 +446,15 @@ def callback(intermediate_result: optimize.OptimizeResult): ) self.minimization_result = { - "theta": get_host(self.model.theta), - "theta_interpret": get_host( - self.model.get_theta_interpret() - ), - "x": get_host( - self.model.x - # self.model.x[ - # self.model.inverse_permutation_latent_variables - # ] - ), - "f": fun_i, - "grad_f": self.gradient_f, + "theta_internal": copy.deepcopy(self.model.theta_internal), + "theta": + copy.deepcopy(self.model.theta_external), + "x": copy.deepcopy(self.model.x), + "f": copy.deepcopy(fun_i), + "grad_f": copy.deepcopy(self.gradient_f), "f_values": self.f_values, - "theta_values": self.theta_values, + ### these values are in internal scale (!!) + "theta_values": self.theta_values_internal, } raise OptimizationConvergedEarlyExit() @@ -435,13 +462,13 @@ def callback(intermediate_result: optimize.OptimizeResult): if self.accepted_iter > self.config.theta_reduction_lag: if ( xp.linalg.norm( - self.theta_values[-self.config.theta_reduction_lag] + self.theta_values_internal[-self.config.theta_reduction_lag] - theta_i ) < self.config.theta_reduction_tol ): norm_diff = xp.linalg.norm( - self.theta_values[ + self.theta_values_internal[ self.accepted_iter - self.config.theta_reduction_lag ] - theta_i @@ -455,20 +482,19 @@ def callback(intermediate_result: optimize.OptimizeResult): ) self.minimization_result = { - "theta": get_host(self.model.theta), - "theta_interpret": get_host( - self.model.get_theta_interpret() - ), + "theta_internal": get_host(self.model.theta_internal), + "theta": get_host( + self.model.theta_external), "x": get_host( self.model.x # self.model.x[ # self.model.inverse_permutation_latent_variables # ] ), - "f": fun_i, - "grad_f": self.gradient_f, - "f_values": self.f_values, - "theta_values": self.theta_values, + "f": copy.deepcopy(fun_i), + "grad_f": copy.deepcopy(self.gradient_f), + "f_values": copy.deepcopy(self.f_values), + "theta_values": copy.deepcopy(self.theta_values_internal), } raise OptimizationConvergedEarlyExit() @@ -518,15 +544,13 @@ def callback(intermediate_result: optimize.OptimizeResult): ) self.minimization_result: dict = { - "theta": scipy_result.x, - "theta_interpret": self.model.get_theta_interpret(), - "x": get_host( - self.model.x, # [self.model.inverse_permutation_latent_variables] - ), - "f": scipy_result.fun, - "grad_f": self.gradient_f, - "f_values": self.f_values, - "theta_values": self.theta_values, + "theta_internal": copy.deepcopy(self.model.theta_internal), # scipy_result.x, # + "theta": copy.deepcopy(self.model.theta_external), + "x": copy.deepcopy(self.model.x), # [self.model.inverse_permutation_latent_variables] + "f": copy.deepcopy(scipy_result.fun), + "grad_f": copy.deepcopy(self.gradient_f), + "f_values": copy.deepcopy(self.f_values), + "theta_values": copy.deepcopy(self.theta_values_internal), } return self.minimization_result @@ -612,7 +636,7 @@ def _objective_function( self.t_construction_qprior + self.t_construction_qconditional ) - if self.iter > 0: + if self.iter > 0 and self.verbosity > 0: print( f"rank {comm_rank} | objfunc_time: {self.objective_function_time[1:]} | solver_time: {self.solver_time[1:]} | construction_time: {self.construction_time[1:]}", flush=True, @@ -649,7 +673,8 @@ def _evaluate_f( tic = time.time() - self.model.theta[:] = theta_i + # self.model.theta_internal[:] = theta_i + self.model.theta_internal = theta_i f_theta = xp.zeros(1, dtype=xp.float64) # --- Optimize x and evaluate the conditional of the latent parameters @@ -669,6 +694,7 @@ def _evaluate_f( task_mapping = [i % n_qeval_comm for i in range(2)] if task_mapping[0] == self.color_qeval: + # Done by processes "even" synchronize_gpu() tic = time.perf_counter() @@ -722,6 +748,7 @@ def _evaluate_f( comm=self.comm_feval, ) synchronize(comm=self.comm_qeval) + else: synchronize_gpu() tic = time.perf_counter() @@ -775,7 +802,7 @@ def _evaluate_f( return f_theta[0] - def compute_covariance_hp(self, theta_i: NDArray) -> NDArray: + def compute_covariance_hp(self, theta_external: NDArray) -> NDArray: """compute the covariance matrix of the hyperparameters theta. Parameters @@ -788,34 +815,44 @@ def compute_covariance_hp(self, theta_i: NDArray) -> NDArray: cov_theta : NDArray[dim_theta, dim_theta] Covariance matrix of the hyperparameters theta. """ - self.model.theta[:] = theta_i + + # self.model.rescale_hyperparameters_to_internal(theta_interpret, direction="forward") + # ensure that all ranks are initialized to the same theta + check_vector_consistency( + theta_external, + comm=self.comm_world, + flag="theta_external", + verbose="Full", + ) print_msg( - f"Computing covariance of hyperparameters theta at {theta_i}.", + f"Computing covariance of hyperparameters at theta_external {theta_external}.", flush=True, ) synchronize(comm=self.comm_world) tic = time.perf_counter() - hess_theta = self._evaluate_hessian_f(theta_i) - # print_msg( - # f"hessian_f: \n {hess_theta}", - # flush=True, - # ) - cov_theta = xp.linalg.inv(hess_theta) + self.model.theta_external = theta_external + + hess_theta_internal = self._evaluate_hessian_f(self.model.theta_internal) + print_msg( + f"hessian_f: \n {hess_theta_internal}", + flush=True, + ) + cov_theta_internal = xp.linalg.inv(hess_theta_internal) + synchronize(comm=self.comm_world) toc = time.perf_counter() - t_covariance_hp = toc - tic print_msg( "Time to compute covariance of hyperparameters:", - t_covariance_hp, + toc - tic, flush=True, ) - return cov_theta + return cov_theta_internal def _evaluate_hessian_f( self, - theta_i: NDArray, + theta_internal: NDArray, ) -> NDArray: """Approximate the hessian of the function f(theta) = log(p(theta|y)). @@ -834,8 +871,7 @@ def _evaluate_hessian_f( """ ## TODO: this is the quick fix ... - theta_internal = theta_i.copy() - self.model.theta[:] = theta_i + # self.model.theta[:] = theta_i dim_theta = self.model.n_hyperparameters # pre-allocate storage for the hessian & f_values @@ -867,8 +903,10 @@ def _evaluate_hessian_f( counter = 0 # compute f(theta) if self.color_feval == task_mapping[0]: + theta_i = theta_internal.copy() f_theta = self._evaluate_f(theta_i) f_ii_loc[1, :] = f_theta + counter += 1 for k in range(loop_dim): @@ -879,49 +917,62 @@ def _evaluate_hessian_f( if i == j: if self.color_feval == task_mapping[counter]: # theta+eps_i - theta_i = theta_internal.copy() - f_ii_loc[0, i] = self._evaluate_f(theta_i + eps_mat[i, :]) + # theta_i = theta_internal.copy() + # f_ii_loc[0, i] = self._evaluate_f(theta_i + eps_mat[i, :]) + theta_i = theta_internal + eps_mat[i, :] + result = self._evaluate_f(theta_i) + f_ii_loc[0, i] = result counter += 1 if self.color_feval == task_mapping[counter]: # theta-eps_i - theta_i = theta_internal.copy() - f_ii_loc[2, i] = self._evaluate_f(theta_i - eps_mat[i, :]) - + # theta_i = theta_internal.copy() + # f_ii_loc[2, i] = self._evaluate_f(theta_i - eps_mat[i, :]) + theta_i = theta_internal - eps_mat[i, :] + result = self._evaluate_f(theta_i) + f_ii_loc[2, i] = result counter += 1 # as hessian is symmetric we only have to compute the upper triangle elif i < j: # theta+eps_i+eps_j if self.color_feval == task_mapping[counter]: - theta_i = theta_internal.copy() - f_ij_loc[0, k] = self._evaluate_f( - theta_i + eps_mat[i, :] + eps_mat[j, :] - ) + # theta_i = theta_internal.copy() + # f_ij_loc[0, k] = self._evaluate_f( + # theta_i + eps_mat[i, :] + eps_mat[j, :] + # ) + theta_i = theta_internal + eps_mat[i, :] + eps_mat[j, :] + f_ij_loc[0, k] = self._evaluate_f(theta_i) counter += 1 # theta+eps_i-eps_j if self.color_feval == task_mapping[counter]: - theta_i = theta_internal.copy() - f_ij_loc[1, k] = self._evaluate_f( - theta_i + eps_mat[i, :] - eps_mat[j, :] - ) + # theta_i = theta_internal.copy() + # f_ij_loc[1, k] = self._evaluate_f( + # theta_i + eps_mat[i, :] - eps_mat[j, :] + # ) + theta_i = theta_internal + eps_mat[i, :] - eps_mat[j, :] + f_ij_loc[1, k] = self._evaluate_f(theta_i) counter += 1 # theta-eps_i+eps_j if self.color_feval == task_mapping[counter]: - theta_i = theta_internal.copy() - f_ij_loc[2, k] = self._evaluate_f( - theta_i - eps_mat[i, :] + eps_mat[j, :] - ) + # theta_i = theta_internal.copy() + # f_ij_loc[2, k] = self._evaluate_f( + # theta_i - eps_mat[i, :] + eps_mat[j, :] + # ) + theta_i = theta_internal - eps_mat[i, :] + eps_mat[j, :] + f_ij_loc[2, k] = self._evaluate_f(theta_i) counter += 1 # theta-eps_i-eps_j if self.color_feval == task_mapping[counter]: - theta_i = theta_internal.copy() - f_ij_loc[3, k] = self._evaluate_f( - theta_i - eps_mat[i, :] - eps_mat[j, :] - ) + # theta_i = theta_internal.copy() + # f_ij_loc[3, k] = self._evaluate_f( + # theta_i - eps_mat[i, :] - eps_mat[j, :] + # ) + theta_i = theta_internal - eps_mat[i, :] - eps_mat[j, :] + f_ij_loc[3, k] = self._evaluate_f(theta_i) counter += 1 allreduce( @@ -963,8 +1014,178 @@ def _evaluate_hessian_f( return hess + def marginal_distributions_hp(self, + #quantiles: NDArray = xp.array([0.0001, 0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975, 0.9999]) + quantiles: NDArray = xp.array([0.025, 0.25, 0.5, 0.75, 0.975]) + ) -> dict: + """Compute the marginal distributions of the hyperparameters theta. + + Parameters + ---------- + quantiles : NDArray + Quantiles to compute. If not provided, default quantiles are used. If None, no quantiles are computed. + + Returns + ------- + dict + Dictionary containing the marginal distributions of the hyperparameters theta and possibly quantiles / percentiles. + + """ + + # check that theta_star and covariance matrix are computed + if self.theta_star is None or self.cov_theta_internal is None or self.x_star is None: + raise ValueError("theta_star, x_star and covariance matrix of the hyperparameters must be computed before calling marginal_distributions_hp(). Please run the full DALIA pipeline or set them manually.") + + # set up dictionary to store results + results = { + 'hyperparameters': {}, + 'summary': { + 'n_params': self.model.n_hyperparameters, + 'param_names': self.model.theta_keys, + 'quantile_levels': quantiles.tolist() if quantiles is not None else None + } + } + + # Import necessary functions + from dalia.utils.gaussian_quadrature import compute_variance_gauss_hermite + from dalia.utils.reparametrizations import compute_bounds, compute_transformed_pdf, compute_transformed_quantiles + from dalia.prior_hyperparameters import GaussianMVNPriorHyperparameters + + hp_offset = 0 + for i, prior in enumerate(self.model.prior_hyperparameters): + if isinstance(prior, GaussianMVNPriorHyperparameters): + n_hp_for_this_prior = prior.mean.shape[0] + else: + n_hp_for_this_prior = 1 + + for j in range(0, n_hp_for_this_prior, 1): + param_name = results['summary']['param_names'][i+hp_offset+j] + + # Extract marginal parameters for this hyperparameter + theta_internal_i = self.theta_star_internal[i+hp_offset+j] + marg_var_internal_i = self.cov_theta_internal[ + i + hp_offset + j, i + hp_offset + j + ] + + # compute external_mean and external_var using + # compute_variance_gauss_hermite(mean_internal, variance_internal, transform, n_points=20): from utils gaussian quadrature + gauss_hermite_result = compute_variance_gauss_hermite( + theta_internal_i, marg_var_internal_i, prior.rescale_hyperparameters_to_internal, n_points=30 + ) + + # compute bounds for theta intervals using compute_bounds() from utils + (theta_internal_lower, theta_internal_upper), (theta_external_lower, theta_external_upper) = compute_bounds( + theta_internal_i, marg_var_internal_i, prior.rescale_hyperparameters_to_internal, n_std=4 + ) + + # set theta_internal_interval + theta_internal_interval = xp.linspace(theta_internal_lower, theta_internal_upper, num=100) + + # Compute PDF values in external scale + theta_external_interval, pdf_external = compute_transformed_pdf(theta_internal_i, marg_var_internal_i, theta_internal_interval, prior.rescale_hyperparameters_to_internal) + + # Initialize parameter dictionary + param_dict = { + 'mean_internal': float(get_host(theta_internal_i)), + 'variance_internal': float(get_host(marg_var_internal_i)), + 'mean_external': float(get_host(gauss_hermite_result['mean'])), + 'variance_external': float(get_host(gauss_hermite_result['variance'])), + 'pdf_data': (get_host(theta_external_interval), get_host(pdf_external)) # tuple of xp arrays + } + + # if quantiles is not None, compute quantiles using compute_transformed_quantiles() + if quantiles is not None: + quantiles_external = compute_transformed_quantiles( + theta_internal_i, marg_var_internal_i, quantiles, prior.rescale_hyperparameters_to_internal + ) + + # Also compute internal quantiles for completeness + from scipy.stats import norm + quantiles_internal = get_device(norm.ppf(get_host(quantiles), loc=get_host(theta_internal_i), scale=get_host(xp.sqrt(marg_var_internal_i)))) + + param_dict['quantiles'] = { + 'levels': get_host(quantiles).tolist(), + 'internal': { + 'values': get_host(quantiles_internal).tolist(), + 'pairs': list(zip(get_host(quantiles).tolist(), get_host(quantiles_internal).tolist())) + }, + 'external': { + 'values': get_host(quantiles_external).tolist(), + 'pairs': list(zip(get_host(quantiles).tolist(), get_host(quantiles_external).tolist())) + } + } + + # Store in main results dictionary + results['hyperparameters'][param_name] = param_dict + + hp_offset += n_hp_for_this_prior-1 + + # Old code + if False: + # iterate over all hyperparameters and store outputs in a dictionary + for i in range(self.model.n_hyperparameters): + param_name = results['summary']['param_names'][i] + + # Extract marginal parameters for this hyperparameter + theta_internal_i = self.theta_star_internal[i] + marg_var_internal_i = self.cov_theta_internal[i, i] + + # compute external_mean and external_var using + # compute_variance_gauss_hermite(mean_internal, variance_internal, transform, n_points=20): from utils gaussian quadrature + gauss_hermite_result = compute_variance_gauss_hermite( + theta_internal_i, marg_var_internal_i, self.model.prior_hyperparameters[i].rescale_hyperparameters_to_internal, n_points=30 + ) + + # compute bounds for theta intervals using compute_bounds() from utils + (theta_internal_lower, theta_internal_upper), (theta_external_lower, theta_external_upper) = compute_bounds( + theta_internal_i, marg_var_internal_i, self.model.prior_hyperparameters[i].rescale_hyperparameters_to_internal, n_std=4 + ) + + # set theta_internal_interval + theta_internal_interval = xp.linspace(theta_internal_lower, theta_internal_upper, num=100) + + # Compute PDF values in external scale + theta_external_interval, pdf_external = compute_transformed_pdf(theta_internal_i, marg_var_internal_i, theta_internal_interval, self.model.prior_hyperparameters[i].rescale_hyperparameters_to_internal) + + # Initialize parameter dictionary + param_dict = { + 'mean_internal': float(get_host(theta_internal_i)), + 'variance_internal': float(get_host(marg_var_internal_i)), + 'mean_external': float(get_host(gauss_hermite_result['mean'])), + 'variance_external': float(get_host(gauss_hermite_result['variance'])), + 'pdf_data': (get_host(theta_external_interval), get_host(pdf_external)) # tuple of xp arrays + } + + # if quantiles is not None, compute quantiles using compute_transformed_quantiles() + if quantiles is not None: + quantiles_external = compute_transformed_quantiles( + theta_internal_i, marg_var_internal_i, quantiles, self.model.prior_hyperparameters[i].rescale_hyperparameters_to_internal + ) + + # Also compute internal quantiles for completeness + from scipy.stats import norm + quantiles_internal = get_device(norm.ppf(get_host(quantiles), loc=get_host(theta_internal_i), scale=get_host(xp.sqrt(marg_var_internal_i)))) + + param_dict['quantiles'] = { + 'levels': get_host(quantiles).tolist(), + 'internal': { + 'values': get_host(quantiles_internal).tolist(), + 'pairs': list(zip(get_host(quantiles).tolist(), get_host(quantiles_internal).tolist())) + }, + 'external': { + 'values': get_host(quantiles_external).tolist(), + 'pairs': list(zip(get_host(quantiles).tolist(), get_host(quantiles_external).tolist())) + } + } + + # Store in main results dictionary + results['hyperparameters'][param_name] = param_dict + + # return dictionary + return results + def _compute_covariance_latent_parameters( - self, theta: NDArray, x_star: NDArray + self, theta_internal: NDArray, x_star: NDArray ) -> None: """Compute the marginal distribution of the latent parameters x. @@ -980,7 +1201,8 @@ def _compute_covariance_latent_parameters( marginal_latent_parameters : NDArray Marginal distribution of the latent parameters x. """ - self.model.theta[:] = theta + + self.model.theta_internal = xp.atleast_1d(theta_internal) self.model.x[:] = x_star eta = self.model.a @ self.model.x @@ -996,20 +1218,29 @@ def _compute_covariance_latent_parameters( self.solver.selected_inversion(sparsity="bta") def get_marginal_variances_latent_parameters( - self, theta: NDArray = None, x_star: NDArray = None + self, theta_external: NDArray = None, x_star: NDArray = None ) -> NDArray: + # TODO: this should be only called by rank 0? - if theta is None and x_star is None: + if theta_external is None and x_star is None: print( "Computing marginal variances for currently stored latent parameters. " ) x_star = self.model.x - theta = self.model.theta + theta = self.model.theta_internal + elif theta_external is not None and x_star is not None: + ## assume theta to be in "external" scale + self.model.theta_external = xp.atleast_1d(theta_external) + theta = self.model.theta_internal + elif theta is None or x_star is None: raise ValueError( "BOTH or NEITHER theta and x_star must be provided to compute the marginal variances." ) + check_vector_consistency(theta, comm=self.comm_world, flag="theta", verbose="Full") + check_vector_consistency(x_star, comm=self.comm_world, flag="x_star", verbose="Minimal") + # check order x_star ... -> potentially need to reorder marginal variances self._compute_covariance_latent_parameters(theta, x_star) @@ -1022,7 +1253,7 @@ def get_marginal_variances_latent_parameters( return marginal_variances def get_marginal_variances_observations( - self, theta: NDArray, x_star: NDArray + self, theta_external: NDArray = None, x_star: NDArray = None ) -> NDArray: """Extract the marginal variances of the observations. @@ -1045,21 +1276,26 @@ def get_marginal_variances_observations( Marginal variances of the observations. """ + # TODO: implement this for non-Gaussian likelihoods + check_vector_consistency(theta_external, comm=self.comm_world, flag="theta_external", verbose="Full") + check_vector_consistency(x_star, comm=self.comm_world, flag="x_star", verbose="Minimal") + if self.model.is_likelihood_gaussian(): # TODO: this should be only called by rank 0? - if theta is None and x_star is None: + if theta_external is None and x_star is None: print( "Computing marginal variances for currently stored latent parameters. " ) x_star = self.model.x - theta = self.model.theta - elif theta is None or x_star is None: + theta_external = self.model.theta_external + + if theta_external is None or x_star is None: raise ValueError( "BOTH or NEITHER theta and x_star must be provided to compute the marginal variances." ) # check order x_star ... -> potentially need to reorder marginal variances - self._compute_covariance_latent_parameters(theta, x_star) + self._compute_covariance_latent_parameters(theta_external, x_star) # now only extract diagonal elements corresponding to marginal variances of the latent parameters variances_latent = self.solver._structured_to_spmatrix( @@ -1075,11 +1311,52 @@ def get_marginal_variances_observations( return marginal_variances_observations - else: - raise NotImplementedError( - "in compute marginals observations: Only Gaussian likelihood is currently supported." + raise NotImplementedError( + "in compute marginals observations: Only Gaussian likelihood is currently supported." + ) + + def sample_posterior_latent_parameters( + self, n_samples: int = 1000, random_seed: int = 33 + ) -> NDArray: + """Sample from the posterior distribution of the latent parameters x. + + Parameters + ---------- + n_samples : int + Number of samples to draw. + + Returns + ------- + samples : NDArray[n_latent_parameters, n_samples] + Samples from the posterior distribution of the latent parameters x. + + Notes + ----- + Based on the factorization of the conditional precision matrix Q_conditional at theta_star and x_star. + """ + + if self.theta_star is None or self.x_star is None: + raise ValueError( + "theta_star and x_star must be computed calling minimize() before calling sample_posterior_latent_parameters()." ) + # sample iid standard normal + rng = xp.random.default_rng(seed=random_seed) + iid_samples = rng.standard_normal( + size=(self.model.n_latent_parameters, n_samples) + ) + + # compute the corresponding samples in the original space using the covariance matrix + Q_conditional = self.model.construct_Q_conditional( + eta=self.model.a @ self.x_star + ) + self.solver.factorize(Q_conditional) + samples = xp.reshape(self.x_star, (-1, 1)) + sp.linalg.solve_triangular( + self.solver.L.T, iid_samples, lower=False + ) + + return samples + def _inner_iteration( self, ) -> float: @@ -1104,7 +1381,7 @@ def _inner_iteration( if counter > self.inner_iteration_max_iter: print_msg( "Theta value at failing of the inner_iteration: ", - self.model.theta, + self.model.theta_internal, flush=True, ) raise ValueError( diff --git a/src/dalia/core/model.py b/src/dalia/core/model.py index e7e7428e..bc366b86 100644 --- a/src/dalia/core/model.py +++ b/src/dalia/core/model.py @@ -14,6 +14,7 @@ GaussianMVNPriorHyperparametersConfig, GaussianPriorHyperparametersConfig, PenalizedComplexityPriorHyperparametersConfig, + GammaPriorHyperparametersConfig, ) from dalia.core.likelihood import Likelihood from dalia.core.prior_hyperparameters import PriorHyperparameters @@ -24,12 +25,15 @@ GaussianMVNPriorHyperparameters, GaussianPriorHyperparameters, PenalizedComplexityPriorHyperparameters, + GammaPriorHyperparameters, ) from dalia.submodels import ( BrainiacSubModel, RegressionSubModel, SpatialSubModel, SpatioTemporalSubModel, + AR1SubModel, + GenericSubModel, ) from dalia.utils import add_str_header, boxify, scaled_logit from dalia.utils.scalar_ndarray import ensure_scalar @@ -57,8 +61,11 @@ def __init__( self.n_fixed_effects: int = 0 + self._theta_external: ArrayLike = [] + self._theta_internal: ArrayLike = [] + # For each submodel... - theta: ArrayLike = [] + theta_external: ArrayLike = [] theta_keys: ArrayLike = [] self.hyperparameters_idx: ArrayLike = [0] self.prior_hyperparameters: list[PriorHyperparameters] = [] @@ -155,6 +162,73 @@ def __init__( elif isinstance(submodel, RegressionSubModel): self.n_fixed_effects += submodel.n_fixed_effects + elif isinstance(submodel, AR1SubModel): + + if isinstance(submodel.config.ph_phi, BetaPriorHyperparametersConfig): + self.prior_hyperparameters.append( + BetaPriorHyperparameters( + config=submodel.config.ph_phi, + ) + ) + elif isinstance( + submodel.config.ph_phi, + PenalizedComplexityPriorHyperparametersConfig, + ): + self.prior_hyperparameters.append( + PenalizedComplexityPriorHyperparameters( + config=submodel.config.ph_phi, + hyperparameter_type="phi", + ) + ) + + if isinstance( + submodel.config.ph_tau, GaussianPriorHyperparametersConfig + ): + self.prior_hyperparameters.append( + GaussianPriorHyperparameters( + config=submodel.config.ph_tau, + ) + ) + if isinstance(submodel.config.ph_tau, GammaPriorHyperparametersConfig): + self.prior_hyperparameters.append( + GammaPriorHyperparameters( + config=submodel.config.ph_tau, + ) + ) + + elif isinstance(submodel, GenericSubModel): + print( + "Generic submodel detected. Initializing prior hyperparameters for generic submodel." + ) + print(submodel.config.ph_tau) + if isinstance(submodel.config.ph_tau, GammaPriorHyperparametersConfig): + self.prior_hyperparameters.append( + GammaPriorHyperparameters( + config=submodel.config.ph_tau, + ) + ) + if isinstance( + submodel.config.ph_tau, + PenalizedComplexityPriorHyperparametersConfig, + ): + self.prior_hyperparameters.append( + PenalizedComplexityPriorHyperparameters( + config=submodel.config.ph_tau, + hyperparameter_type="tau", + ) + ) + + # doesn't really make sense to allow Gaussian prior on precision + # implement proper check to raise error later + if isinstance( + submodel.config.ph_tau, GaussianPriorHyperparametersConfig + ): + self.prior_hyperparameters.append( + GaussianPriorHyperparameters( + config=submodel.config.ph_tau, + ) + ) + elif isinstance(submodel, BrainiacSubModel): # h2 hyperparameters if isinstance(submodel.config.ph_h2, BetaPriorHyperparametersConfig): @@ -173,33 +247,29 @@ def __init__( config=submodel.config.ph_alpha, ) ) + if isinstance( + submodel.config.ph_alpha, + PenalizedComplexityPriorHyperparametersConfig, + ): + self.prior_hyperparameters.append( + PenalizedComplexityPriorHyperparameters( + config=submodel.config.ph_alpha, + hyperparameter_type="alpha", + ) + ) else: raise ValueError("Unknown submodel type") # ...and read their hyperparameters theta_submodel, theta_keys_submodel = submodel.config.read_hyperparameters() - theta.append(theta_submodel) + theta_external.append(theta_submodel) theta_keys += theta_keys_submodel self.hyperparameters_idx.append( self.hyperparameters_idx[-1] + len(theta_submodel) ) - # Add the likelihood hyperparameters - ( - lh_hyperparameters, - lh_hyperparameters_keys, - ) = likelihood_config.read_hyperparameters() - - theta.append(lh_hyperparameters) - self.theta: NDArray = xp.concatenate(theta) - - theta_keys += lh_hyperparameters_keys - self.theta_keys: NDArray = theta_keys - - self.n_hyperparameters = self.theta.size - # --- Initialize the latent parameters and the design matrix self.n_latent_parameters: int = 0 self.latent_parameters_idx: list[int] = [0] @@ -306,6 +376,24 @@ def __init__( hyperparameter_type="prec_o", ) ) + elif isinstance( + likelihood_config.prior_hyperparameters, + BetaPriorHyperparametersConfig, + ): + self.prior_hyperparameters.append( + BetaPriorHyperparameters( + config=likelihood_config.prior_hyperparameters, + ) + ) + elif isinstance( + likelihood_config.prior_hyperparameters, + GammaPriorHyperparametersConfig, + ): + self.prior_hyperparameters.append( + GammaPriorHyperparameters( + config=likelihood_config.prior_hyperparameters, + ) + ) elif likelihood_config.type == "poisson": self.likelihood: Likelihood = PoissonLikelihood( n_observations=self.n_observations, @@ -319,12 +407,64 @@ def __init__( self.likelihood_config: LikelihoodConfig = likelihood_config + # Add the likelihood hyperparameters + ( + lh_hyperparameters, + lh_hyperparameters_keys, + ) = likelihood_config.read_hyperparameters() + + theta_external.append(lh_hyperparameters) + self.theta_external = xp.concatenate(theta_external) + + print("Initial hyperparameters (external scale): ", self.theta_external) + print("Initial hyperparameters (internal scale): ", self.theta_internal) + + theta_keys += lh_hyperparameters_keys + self.theta_keys: NDArray = theta_keys + + self.n_hyperparameters = self.theta_external.size + # --- Recurrent variables self.Q_prior = None self.Q_prior_data_mapping = [0] self.Q_conditional = None self.Q_conditional_data_mapping = [0] + ######################################################################## + @property + def theta_external(self): + """External/user/interpretable scale theta.""" + # the copy is important to make sure that in place operations still trigger updating + return self._theta_external.copy() + + @theta_external.setter + def theta_external(self, value): + """Set external theta and automatically update internal. + + Notes + ----- + The re-scaling is implemented for all prios but PenalizedComplexity (identity but already in the correct "log" scale). + """ + self._theta_external = xp.array(value) + self._theta_internal = self.rescale_hyperparameters_to_internal( + self._theta_external, direction="forward" + ) + + @property + def theta_internal(self): + """Internal/BFGS scale theta.""" + return self._theta_internal.copy() + + @theta_internal.setter + def theta_internal(self, value): + """Set internal theta and automatically update external.""" + self._theta_internal = xp.array(value) + self._theta_external = self.rescale_hyperparameters_to_internal( + self._theta_internal, direction="backward" + ) + + ######################################################################## + def construct_Q_prior(self) -> sp.sparse.spmatrix: kwargs = {} @@ -335,22 +475,48 @@ def construct_Q_prior(self) -> sp.sparse.spmatrix: cols = [] data = [] + ## TODO: improve the if / elif statements for i, submodel in enumerate(self.submodels): if isinstance(submodel, SpatioTemporalSubModel): for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float( + self.theta_external[hp_idx] + ) + # kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) elif isinstance(submodel, SpatialSubModel): for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float( + self.theta_external[hp_idx] + ) + # kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) elif isinstance(submodel, BrainiacSubModel): for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float( + self.theta_external[hp_idx] + ) + # kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) + elif isinstance(submodel, AR1SubModel): + for hp_idx in range( + self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] + ): + kwargs[self.theta_keys[hp_idx]] = float( + self.theta_external[hp_idx] + ) + # kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) + elif isinstance(submodel, GenericSubModel): + for hp_idx in range( + self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] + ): + kwargs[self.theta_keys[hp_idx]] = float( + self.theta_external[hp_idx] + ) + elif isinstance(submodel, RegressionSubModel): ... @@ -383,17 +549,41 @@ def construct_Q_prior(self) -> sp.sparse.spmatrix: for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float( + self.theta_external[hp_idx] + ) + # kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) elif isinstance(submodel, SpatialSubModel): for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float( + self.theta_external[hp_idx] + ) + # kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) elif isinstance(submodel, BrainiacSubModel): for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + kwargs[self.theta_keys[hp_idx]] = float( + self.theta_external[hp_idx] + ) + # kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) + elif isinstance(submodel, AR1SubModel): + for hp_idx in range( + self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] + ): + kwargs[self.theta_keys[hp_idx]] = float( + self.theta_external[hp_idx] + ) + # kwargs[self.theta_keys[hp_idx]] = float(theta_interpret[hp_idx]) + elif isinstance(submodel, GenericSubModel): + for hp_idx in range( + self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] + ): + kwargs[self.theta_keys[hp_idx]] = float( + self.theta_external[hp_idx] + ) submodel_Q_prior = submodel.construct_Q_prior(**kwargs) @@ -419,7 +609,7 @@ def construct_Q_conditional( if self.likelihood_config.type == "gaussian": kwargs = { "eta": eta, - "theta": float(self.theta[-1]), + "theta": float(self.theta_external[-1]), } else: kwargs = { @@ -428,7 +618,7 @@ def construct_Q_conditional( if isinstance(self.submodels[0], BrainiacSubModel): # Brainiac specific rule - kwargs["h2"] = float(self.theta[0]) + kwargs["h2"] = float(self.theta_external[0]) d_matrix = self.submodels[0].evaluate_d_matrix(**kwargs) else: # General rules @@ -462,7 +652,7 @@ def construct_information_vector( """Construct the information vector.""" if isinstance(self.submodels[0], BrainiacSubModel): - kwargs = {"h2": float(self.theta[0])} + kwargs = {"h2": float(self.theta_external[0])} gradient_likelihood = self.submodels[0].evaluate_gradient_likelihood( eta=eta, y=self.y, **kwargs ) @@ -471,7 +661,7 @@ def construct_information_vector( gradient_likelihood = self.likelihood.evaluate_gradient_likelihood( eta=eta, y=self.y, - theta=self.theta[self.hyperparameters_idx[-1] :], + theta=self.theta_external[self.hyperparameters_idx[-1] :], ) information_vector: NDArray = ( @@ -488,23 +678,16 @@ def evaluate_log_prior_hyperparameters(self) -> float: """Evaluate the log prior hyperparameters.""" log_prior = 0.0 - # if BFGS and model scale differ: rescale -- generalize - if isinstance(self.submodels[0], BrainiacSubModel): - # - theta_interpret = self.theta.copy() - theta_interpret[0] = scaled_logit(self.theta[0], direction="backward") - log_prior += self.prior_hyperparameters[0].evaluate_log_prior( - theta_interpret[0] - ) - - log_prior += self.prior_hyperparameters[1].evaluate_log_prior( - theta_interpret[1:] - ) - else: - theta_interpret = self.theta - - for i, prior_hyperparameter in enumerate(self.prior_hyperparameters): - log_prior += prior_hyperparameter.evaluate_log_prior(theta_interpret[i]) + for i, prior_hyperparameter in enumerate(self.prior_hyperparameters): + if isinstance(prior_hyperparameter, GaussianMVNPriorHyperparameters): + # for MVN prior hyperparameters, we need to pass the full vector + log_prior += prior_hyperparameter.evaluate_log_prior( + self.theta_external[i : i + prior_hyperparameter.mean.shape[0]] + ) + else: + log_prior += prior_hyperparameter.evaluate_log_prior( + self.theta_external[i] + ) return log_prior @@ -512,27 +695,29 @@ def get_theta_likelihood(self) -> NDArray: """Return the likelihood hyperparameters.""" if isinstance(self.submodels[0], BrainiacSubModel): - theta_likelihood = 1 - scaled_logit(self.theta[0], direction="backward") + theta_likelihood = 1 - self.theta_external[0] else: - theta_likelihood = self.theta[self.hyperparameters_idx[-1] :] + theta_likelihood = self.theta_external[self.hyperparameters_idx[-1] :] return theta_likelihood - def get_theta_interpret(self) -> NDArray: - theta_interpret = xp.zeros_like(self.theta) + def rescale_hyperparameters_to_internal(self, theta, direction): - for i, submodel in enumerate(self.submodels): - # print("indices: ", self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1]) - # print("submodel theta in model: ", self.theta[self.hyperparameters_idx[i] : self.hyperparameters_idx[i + 1]]) - theta_interpret[ - self.hyperparameters_idx[i] : self.hyperparameters_idx[i + 1] - ] = submodel.rescale_hyperparameters_to_interpret( - self.theta[ - self.hyperparameters_idx[i] : self.hyperparameters_idx[i + 1] - ] - ) + # need to iterate over theta and its prior hyperparameters + theta_internal = xp.copy(theta) + + for i, prior_hyperparameter in enumerate(self.prior_hyperparameters): + ## how to handle priors that have multiple hyperparameters? + if isinstance(prior_hyperparameter, GaussianMVNPriorHyperparameters): + pass # no rescaling implemented + else: + theta_internal[i] = ( + prior_hyperparameter.rescale_hyperparameters_to_internal( + theta[i], direction=direction + ) + ) - return theta_interpret + return theta_internal def evaluate_likelihood(self, eta: NDArray, **kwargs) -> float: """Evaluate the likelihood. @@ -551,11 +736,12 @@ def evaluate_likelihood(self, eta: NDArray, **kwargs) -> float: """ if isinstance(self.submodels[0], BrainiacSubModel): - kwargs["h2"] = float(self.theta[0]) + # kwargs["h2"] = float(self.theta[0]) + kwargs["h2"] = float(self.theta_external[0]) likelihood = self.submodels[0].evaluate_likelihood(eta, self.y, **kwargs) else: likelihood = self.likelihood.evaluate_likelihood( - eta, self.y, theta=self.theta[self.hyperparameters_idx[-1] :] + eta, self.y, theta=self.theta_external[self.hyperparameters_idx[-1] :] ) return ensure_scalar(likelihood) diff --git a/src/dalia/core/prior_hyperparameters.py b/src/dalia/core/prior_hyperparameters.py index 88c1ba4e..c438b7f5 100644 --- a/src/dalia/core/prior_hyperparameters.py +++ b/src/dalia/core/prior_hyperparameters.py @@ -1,6 +1,7 @@ # Copyright 2024-2025 DALIA authors. All rights reserved. from abc import ABC, abstractmethod +from dalia import xp from dalia.configs.priorhyperparameters_config import PriorHyperparametersConfig @@ -16,6 +17,23 @@ def __init__( self.config: PriorHyperparametersConfig = config + @abstractmethod + def rescale_hyperparameters_to_internal(self, theta: float, direction: str) -> float: + """Rescale hyperparameters to and from internal scale. + + Args: + theta: Hyperparameter + direction: "forward", "backward", "forward_jacobian", "backward_jacobian" + Returns: + Rescaled hyperparameter. + + """ + + if direction == "forward" or direction == "backward": + return theta + elif direction == "forward_jacobian" or direction == "backward_jacobian": + return xp.ones_like(theta) + @abstractmethod def evaluate_log_prior(self, theta: float) -> float: """Evaluate the log prior hyperparameters.""" diff --git a/src/dalia/core/submodel.py b/src/dalia/core/submodel.py index 7cefe5e3..0c8123da 100644 --- a/src/dalia/core/submodel.py +++ b/src/dalia/core/submodel.py @@ -52,13 +52,6 @@ def __init__( except FileNotFoundError: self.x_initial: NDArray = xp.zeros((self.a.shape[1]), dtype=float) - def rescale_hyperparameters_to_interpret(self, theta: NDArray) -> NDArray: - """Rescale hyperparameters to interpret them. Does nothing unless implemented in specific submodel. - - Note: It doesnt include the hyperparameters from the likelihood. If they need rescaling too. Needs to be done in likelihood. - """ - - return theta @abstractmethod def construct_Q_prior(self, **kwargs) -> sp.sparse.coo_matrix: diff --git a/src/dalia/likelihoods/gaussian.py b/src/dalia/likelihoods/gaussian.py index ee69bd16..6ed8a864 100644 --- a/src/dalia/likelihoods/gaussian.py +++ b/src/dalia/likelihoods/gaussian.py @@ -29,7 +29,7 @@ def evaluate_likelihood( Evaluate Gaussian log-likelihood for a given set of observations, latent parameters, and design matrix, where the observations are assumed to be identically and independently distributed given eta (=A*x). Leading to: - log (p(y|eta)) = -0.5 * n * log(2 * pi) - 0.5 * n * theta_observations - 0.5 * exp(theta_observations) * (y - eta)^T * (y - eta) + log (p(y|eta)) = -0.5 * n * log(2 * pi) - 0.5 * n * log(theta) - 0.5 * theta * (y - eta)^T * (y - eta) where the constant in front of the likelihood is omitted. Parameters @@ -40,7 +40,7 @@ def evaluate_likelihood( Vector of the observations. kwargs : theta : float - Specific parameter for the likelihood calculation. + precision parameter for the likelihood calculation. theta > 0. Returns ------- @@ -52,11 +52,16 @@ def evaluate_likelihood( if theta is None: raise ValueError("theta must be provided to evaluate gaussian likelihood.") + if theta <= 0: + raise ValueError(f"theta must be positive, got {theta}") + yEta = eta - y # print("xp.exp(theta) in lh:", xp.exp(theta)) likelihood: float = ( - 0.5 * theta * self.n_observations - 0.5 * xp.exp(theta) * yEta.T @ yEta + -0.5 * self.n_observations * xp.log(2 * xp.pi) + + 0.5 * xp.log(theta) * self.n_observations + - 0.5 * theta * yEta.T @ yEta ) return likelihood @@ -77,7 +82,7 @@ def evaluate_gradient_likelihood( Vector of the observations. kwargs : theta : float - Specific parameter for the likelihood calculation. + precision parameter for the likelihood calculation. theta > 0. Returns ------- @@ -91,7 +96,10 @@ def evaluate_gradient_likelihood( "theta must be provided to evaluate gradient of gaussian likelihood." ) - gradient_likelihood: NDArray = -xp.exp(theta) * (eta - y) + if theta <= 0: + raise ValueError(f"theta must be positive, got {theta}") + + gradient_likelihood: NDArray = -theta * (eta - y) return gradient_likelihood @@ -121,11 +129,11 @@ def evaluate_hessian_likelihood( raise ValueError( "theta must be provided to evaluate gradient of gaussian likelihood." ) + if theta <= 0: + raise ValueError(f"theta must be positive, got {theta}") # print("hessian lh: xp.exp(theta)", xp.exp(theta)) - hessian_likelihood: ArrayLike = -xp.exp(theta) * sp.sparse.eye( - self.n_observations - ) + hessian_likelihood: ArrayLike = -theta * sp.sparse.eye(self.n_observations) return hessian_likelihood diff --git a/src/dalia/models/coregional_model.py b/src/dalia/models/coregional_model.py index e3acfa47..e1c14671 100644 --- a/src/dalia/models/coregional_model.py +++ b/src/dalia/models/coregional_model.py @@ -102,14 +102,18 @@ def __init__( f"Model {model} has a different number of fixed effects than the first model" ) + self._theta_external: ArrayLike = [] + self._theta_internal: ArrayLike = [] + + # For each model # Get Models() hyperparameters - theta: ArrayLike = [] + theta_external: ArrayLike = [] theta_keys: ArrayLike = [] self.hyperparameters_idx: ArrayLike = [0] self.prior_hyperparameters: list[PriorHyperparameters] = [] for model in self.models: - theta_model = model.theta + theta_model = model.theta_external theta_keys_model = model.theta_keys # remove the theta that correspond to the "sigma_xx" where x can be whatever @@ -125,7 +129,7 @@ def __init__( key for i, key in enumerate(theta_keys_model) if i not in sigma_indices ] - theta.append(xp.array(theta_model)) + theta_external.append(xp.array(theta_model)) theta_keys += theta_keys_model self.hyperparameters_idx.append( @@ -144,7 +148,7 @@ def __init__( theta_coregional_model, theta_keys_coregional_model, ) = coregional_model_config.read_hyperparameters() - theta.append(xp.array(theta_coregional_model)) + theta_external.append(xp.array(theta_coregional_model)) theta_keys += theta_keys_coregional_model self.hyperparameters_idx.append( @@ -152,8 +156,8 @@ def __init__( ) # Finalize the hyperparameters - self.theta: NDArray = xp.concatenate(theta) - self.n_hyperparameters = self.theta.size + self.theta_external: NDArray = xp.concatenate(theta_external) + self.n_hyperparameters = self.theta_external.size self.theta_keys: NDArray = theta_keys # Initialize the Coregional Prior Hyperparameters @@ -299,7 +303,7 @@ def construct_Q_prior(self) -> sp.sparse.spmatrix: for hp_idx in range( self.hyperparameters_idx[i], self.hyperparameters_idx[i + 1] ): - kwargs_st[self.theta_keys[hp_idx]] = float(self.theta[hp_idx]) + kwargs_st[self.theta_keys[hp_idx]] = float(self.theta_external[hp_idx]) Qu_list[i] = submodel_st.construct_Q_prior(**kwargs_st).tocsc() @@ -310,10 +314,10 @@ def construct_Q_prior(self) -> sp.sparse.spmatrix: kwargs_r = {} Q_r[i] = submodel_r.construct_Q_prior(**kwargs_r).tocsc() - sigma_0 = xp.exp(self.theta[self.theta_keys.index("sigma_0")]) - sigma_1 = xp.exp(self.theta[self.theta_keys.index("sigma_1")]) + sigma_0 = xp.exp(self.theta_external[self.theta_keys.index("sigma_0")]) + sigma_1 = xp.exp(self.theta_external[self.theta_keys.index("sigma_1")]) - lambda_0_1 = self.theta[self.theta_keys.index("lambda_0_1")] + lambda_0_1 = self.theta_external[self.theta_keys.index("lambda_0_1")] if self.n_models == 2: q11 = sp.sparse.coo_matrix( @@ -366,10 +370,10 @@ def construct_Q_prior(self) -> sp.sparse.spmatrix: # Qprior_st = sp.sparse.bmat([[q11, q12], [q21, q22]]).tocsc() elif self.n_models == 3: - sigma_2 = xp.exp(self.theta[self.theta_keys.index("sigma_2")]) + sigma_2 = xp.exp(self.theta_external[self.theta_keys.index("sigma_2")]) - lambda_0_2 = self.theta[self.theta_keys.index("lambda_0_2")] - lambda_1_2 = self.theta[self.theta_keys.index("lambda_1_2")] + lambda_0_2 = self.theta_external[self.theta_keys.index("lambda_0_2")] + lambda_1_2 = self.theta_external[self.theta_keys.index("lambda_1_2")] q11 = sp.sparse.coo_matrix( (1 / sigma_0**2) * Qu_list[0] @@ -621,7 +625,7 @@ def construct_Q_conditional( "eta": eta[ self.n_observations_idx[i] : self.n_observations_idx[i + 1] ], - "theta": float(self.theta[self.hyperparameters_idx[i + 1] - 1]), + "theta": float(self.theta_external[self.hyperparameters_idx[i + 1] - 1]), } else: kwargs = { @@ -657,7 +661,7 @@ def construct_information_vector( gradient_likelihood = model.likelihood.evaluate_gradient_likelihood( eta=eta[self.n_observations_idx[i] : self.n_observations_idx[i + 1]], y=self.y[self.n_observations_idx[i] : self.n_observations_idx[i + 1]], - theta=float(self.theta[self.hyperparameters_idx[i + 1] - 1]), + theta=float(self.theta_external[self.hyperparameters_idx[i + 1] - 1]), ) gradient_vector_list.append(gradient_likelihood) @@ -711,7 +715,7 @@ def evaluate_likelihood( likelihood += model.likelihood.evaluate_likelihood( eta=eta[self.n_observations_idx[i] : self.n_observations_idx[i + 1]], y=self.y[self.n_observations_idx[i] : self.n_observations_idx[i + 1]], - theta=float(self.theta[self.hyperparameters_idx[i + 1] - 1]), + theta=float(self.theta_external[self.hyperparameters_idx[i + 1] - 1]), ) return ensure_scalar(likelihood) @@ -720,19 +724,13 @@ def evaluate_log_prior_hyperparameters(self) -> float: """Evaluate the log prior hyperparameters.""" log_prior = 0.0 - theta_interpret = self.theta + theta_interpret = self.theta_external for i, prior_hyperparameter in enumerate(self.prior_hyperparameters): log_prior += prior_hyperparameter.evaluate_log_prior(theta_interpret[i]) return log_prior - def get_theta_interpret(self) -> NDArray: - # TODO: find long term fix - theta_interpret = self.theta.copy() - - return theta_interpret - def __str__(self) -> str: """String representation of the model.""" str_representation = "" diff --git a/src/dalia/prior_hyperparameters/__init__.py b/src/dalia/prior_hyperparameters/__init__.py index 8a8d8c9d..274d58cc 100644 --- a/src/dalia/prior_hyperparameters/__init__.py +++ b/src/dalia/prior_hyperparameters/__init__.py @@ -1,15 +1,19 @@ # Copyright 2024-2025 DALIA authors. All rights reserved. -from dalia.prior_hyperparameters.beta import BetaPriorHyperparameters from dalia.prior_hyperparameters.gaussian import GaussianPriorHyperparameters from dalia.prior_hyperparameters.gaussian_mvn import GaussianMVNPriorHyperparameters from dalia.prior_hyperparameters.penalized_complexity import ( PenalizedComplexityPriorHyperparameters, ) +from dalia.prior_hyperparameters.beta import BetaPriorHyperparameters +from dalia.prior_hyperparameters.gamma import GammaPriorHyperparameters +from dalia.prior_hyperparameters.inverse_gamma import InverseGammaPriorHyperparameters __all__ = [ "GaussianPriorHyperparameters", "GaussianMVNPriorHyperparameters", "PenalizedComplexityPriorHyperparameters", "BetaPriorHyperparameters", + "GammaPriorHyperparameters", + "InverseGammaPriorHyperparameters", ] diff --git a/src/dalia/prior_hyperparameters/beta.py b/src/dalia/prior_hyperparameters/beta.py index aafa91e9..ad70a253 100644 --- a/src/dalia/prior_hyperparameters/beta.py +++ b/src/dalia/prior_hyperparameters/beta.py @@ -6,6 +6,7 @@ GaussianPriorHyperparametersConfig, ) from dalia.core.prior_hyperparameters import PriorHyperparameters +from dalia.utils.link_functions import scaled_logit class BetaPriorHyperparameters(PriorHyperparameters): @@ -21,6 +22,24 @@ def __init__( self.alpha: float = config.alpha self.beta: float = config.beta + + def rescale_hyperparameters_to_internal(self, theta, direction): + + ### TODO: on longer term make scaled_logit default but let it be configurable in config + ## beta prior is defined on [0,1], while BFGS works on (-inf, inf) + if direction == "forward": + theta_scaled = scaled_logit(theta, direction="forward") + elif direction == "backward": + theta_scaled = scaled_logit(theta, direction="backward") + elif direction == "forward_jacobian": + theta_scaled = scaled_logit(theta, direction="forward_jacobian") + elif direction == "backward_jacobian": + theta_scaled = scaled_logit(theta, direction="backward_jacobian") + else: + raise ValueError(f"Unknown direction: {direction}") + + return theta_scaled + def evaluate_log_prior(self, theta: float, **kwargs) -> float: """Evaluate the log prior hyperparameters.""" diff --git a/src/dalia/prior_hyperparameters/gamma.py b/src/dalia/prior_hyperparameters/gamma.py new file mode 100644 index 00000000..61dc966c --- /dev/null +++ b/src/dalia/prior_hyperparameters/gamma.py @@ -0,0 +1,347 @@ +# Copyright 2024-2025 DALIA authors. All rights reserved. +from dalia import NDArray +from scipy.sparse import spmatrix +from dalia import sp, xp + +import numpy as np + +from dalia.configs.priorhyperparameters_config import ( + GammaPriorHyperparametersConfig, +) +from dalia.core.prior_hyperparameters import PriorHyperparameters + + +class GammaPriorHyperparameters(PriorHyperparameters): + """Gamma prior hyperparameters. + + p(theta) = (beta^alpha / Gamma(alpha)) * theta^(alpha - 1) * exp(-beta * theta) + + and in log scale: + log p(theta) = alpha * log(beta) - log(Gamma(alpha)) + (alpha - 1) * log(theta) - beta * theta + + where theta is typically a positive parameter such as a precision or rate. + + Parameters + ---------- + config : GammaPriorHyperparametersConfig + Configuration object containing alpha and beta parameters. + + Attributes + ---------- + alpha : float. alpha > 0 + Shape parameter of the Gamma distribution. + beta : float. beta > 0 + Rate parameter of the Gamma distribution. + normalizing_constant : float + Precomputed normalizing constant for log probability evaluation. + """ + + def __init__( + self, + config: GammaPriorHyperparametersConfig, + ) -> None: + """ + Initialize the Gamma prior hyperparameters. + + Parameters + ---------- + config : GammaPriorHyperparametersConfig + Configuration containing alpha (shape) and beta (rate) parameters. + + Raises + ------ + ValueError + If alpha or beta are not positive. + """ + super().__init__(config) + + self.alpha: float = config.alpha + self.beta: float = config.beta + + # Validate alpha and beta are positive + if self.alpha <= 0: + raise ValueError(f"Alpha must be positive, got {self.alpha}") + if self.beta <= 0: + raise ValueError(f"Beta must be positive, got {self.beta}") + + self.normalizing_constant: float = self.alpha * xp.log(self.beta) - float( + sp.special.gammaln(self.alpha) + ) + + def rescale_hyperparameters_to_internal(self, theta, direction): + """ + Transform between external and internal parameter representations. + + The Gamma distribution is defined for positive values, but optimization + often works better in unconstrained space. This method transforms + between theta (positive) and log(theta) (unconstrained). + + Parameters + ---------- + theta : float or NDArray + Parameter value(s) to transform. + direction : str + Transformation direction: + - "forward": theta -> log(theta) (external to internal) + - "backward": log(theta) -> theta (internal to external) + + Returns + ------- + float or NDArray + Transformed parameter value(s). + + Raises + ------ + ValueError + If direction is not "forward" or "backward". + """ + if direction == "forward": + theta_scaled = xp.log(theta) + elif direction == "backward": + theta_scaled = xp.exp(theta) + elif direction == "forward_jacobian": + theta_scaled = 1 / theta # d(log(theta))/d(theta) = 1/theta + elif direction == "backward_jacobian": + theta_scaled = theta # d(exp(theta))/d(theta) = exp(theta) = theta + else: + raise ValueError(f"Unknown direction: {direction}") + + return theta_scaled + + def evaluate_log_prior(self, theta: float, **kwargs) -> float: + """ + Evaluate the log prior probability density. + + Computes the log probability density of the Gamma distribution + at the given theta value in its external/user representation. + + Parameters + ---------- + theta : float + Parameter value at which to evaluate the log prior. + Must be positive (external/user representation). + **kwargs + Additional keyword arguments (unused). + + Returns + ------- + float + Log prior probability density at theta. + + Notes + ----- + The computation follows: + log p(θ) = C + (α - 1) * log(θ) - β * θ + where C is the normalizing constant, α is the shape parameter, + and β is the rate parameter. + + Raises + ------ + ValueError + If theta is not positive (implicitly through log computation). + """ + + log_prior = ( + self.normalizing_constant + + (self.alpha - 1) * xp.log(theta) + - self.beta * theta + ) + + return log_prior + +if __name__ == "__main__": + """ + Test Gaussian quadrature for functions using rescale_hyperparameters_to_internal() + from gamma prior hyperparameters. + + We start with normally distributed random variables in internal space (unconstrained) + that get reparametrized to external space (positive) using the gamma prior's + rescaling function. + """ + + from dalia.utils.gaussian_quadrature import compute_variance_gauss_hermite + + print("=" * 80) + print("Testing Gaussian Quadrature with Gamma Prior Rescaling") + print("=" * 80) + + # Create a inverse gamma prior configuration + alpha_values = [1.0, 3.0, 5.0] + beta_values = [0.5, 1.0, 2.0] + + for alpha, beta in zip(alpha_values, beta_values): + print(f"\nTesting alpha={alpha}, beta={beta}") + config = GammaPriorHyperparametersConfig(alpha=alpha, beta=beta) + gamma_prior = GammaPriorHyperparameters(config=config) + + ## compare against scipy implementation + from scipy.stats import gamma + + test_values = [0.1, 0.5, 1.0, 2.0, 5.0] + print("Comparing log prior evaluations with scipy.stats.gamma:") + for val in test_values: + logp_dalia = gamma_prior.evaluate_log_prior(val) + ## note: scipy's gamma takes scale = 1/beta + logp_scipy = gamma.logpdf(val, a=alpha, scale=1/beta) + print(f" θ = {val:4.1f}: DALIA logp = {logp_dalia:.6f}, " + f"scipy logp = {logp_scipy:.6f}, diff = {abs(logp_dalia - logp_scipy):.2e}") + if abs(logp_dalia - logp_scipy) > 1e-6: + raise ValueError("Log prior evaluation does not match scipy implementation.") + + print() + print("All tests passed!") + + # Create a gamma prior configuration + config = GammaPriorHyperparametersConfig(alpha=2.0, beta=1.0) + gamma_prior = GammaPriorHyperparameters(config=config) + + # Define parameters for the normal distribution in internal space + # These represent log(theta) where theta > 0 is the gamma-distributed parameter + mean_internal = 0.5 # Mean of log(theta) + variance_internal = 0.25 # Variance of log(theta) + + print(f"Internal space (log-scale) parameters:") + print(f" Mean: {mean_internal}") + print(f" Variance: {variance_internal}") + print(f" Standard deviation: {xp.sqrt(variance_internal)}") + print() + + # Test 1: Compute statistics using Gaussian quadrature + print("1. Computing statistics using Gaussian quadrature:") + + # Use the rescaling function as the transform + def transform_func(x, direction): + return gamma_prior.rescale_hyperparameters_to_internal(x, direction) + + # Compute statistics using different numbers of quadrature points + for n_points in [10, 20, 30, 50]: + result = compute_variance_gauss_hermite( + mean_internal, + variance_internal, + transform_func, + n_points=n_points + ) + + print(f" n_points = {n_points:2d}: Mean = {result['mean']:.6f}, " + f"Std = {result['std']:.6f}, Var = {result['variance']:.6f}") + + print() + + # Test 2: Compare with analytical solution + print("2. Comparison with analytical log-normal distribution:") + print(" For log(Y) ~ N(μ, σ²), we have:") + print(" E[Y] = exp(μ + σ²/2)") + print(" Var[Y] = (exp(σ²) - 1) * exp(2μ + σ²)") + + # Analytical moments for log-normal distribution + mu = mean_internal + sigma2 = variance_internal + + analytical_mean = xp.exp(mu + sigma2/2) + analytical_variance = (xp.exp(sigma2) - 1) * xp.exp(2*mu + sigma2) + analytical_std = xp.sqrt(analytical_variance) + + print(f" Analytical mean: {analytical_mean:.6f}") + print(f" Analytical std: {analytical_std:.6f}") + print(f" Analytical var: {analytical_variance:.6f}") + print() + + # Compare with quadrature result (using 50 points) + quad_result = compute_variance_gauss_hermite( + mean_internal, variance_internal, transform_func, n_points=50 + ) + + print("3. Comparison of quadrature vs analytical:") + print(f" Mean difference: {abs(quad_result['mean'] - analytical_mean):.2e}") + print(f" Std difference: {abs(quad_result['std'] - analytical_std):.2e}") + print(f" Var difference: {abs(quad_result['variance'] - analytical_variance):.2e}") + + # Relative errors + mean_rel_error = abs(quad_result['mean'] - analytical_mean) / analytical_mean + std_rel_error = abs(quad_result['std'] - analytical_std) / analytical_std + var_rel_error = abs(quad_result['variance'] - analytical_variance) / analytical_variance + + print(f" Mean rel. error: {mean_rel_error:.2e}") + print(f" Std rel. error: {std_rel_error:.2e}") + print(f" Var rel. error: {var_rel_error:.2e}") + print() + + # Test 3: Test with different internal parameters + print("4. Testing with different internal parameters:") + + test_cases = [ + {"mean": 0.0, "var": 0.1, "name": "Small variance"}, + {"mean": 1.0, "var": 0.5, "name": "Medium variance"}, + {"mean": -0.5, "var": 1.0, "name": "Large variance"}, + {"mean": 2.0, "var": 0.01, "name": "Large mean, small variance"} + ] + + for case in test_cases: + mu_test = case["mean"] + var_test = case["var"] + + # Quadrature result + quad_result = compute_variance_gauss_hermite( + mu_test, var_test, transform_func, n_points=30 + ) + + # Analytical result + anal_mean = xp.exp(mu_test + var_test/2) + anal_var = (xp.exp(var_test) - 1) * xp.exp(2*mu_test + var_test) + + rel_mean_error = abs(quad_result['mean'] - anal_mean) / anal_mean + rel_var_error = abs(quad_result['variance'] - anal_var) / anal_var + + print(f" {case['name']:25s}: Mean rel. err = {rel_mean_error:.2e}, " + f"Var rel. err = {rel_var_error:.2e}") + + print() + + # Test 4: Test the rescaling function directions + print("5. Testing rescaling function directions:") + + # Test some values + test_values = [0.1, 0.5, 1.0, 2.0, 5.0] + + print(" Testing forward (external -> internal) and backward (internal -> external):") + for theta in test_values: + # Forward: theta -> log(theta) + log_theta = gamma_prior.rescale_hyperparameters_to_internal(theta, "forward") + + # Backward: log(theta) -> theta + theta_recovered = gamma_prior.rescale_hyperparameters_to_internal(log_theta, "backward") + + error = abs(theta - theta_recovered) + print(f" θ = {theta:4.1f} -> log(θ) = {log_theta:6.3f} -> θ = {theta_recovered:6.3f}, " + f"error = {error:.2e}") + + print() + + # Test 5: Convergence study + print("6. Convergence study (increasing number of quadrature points):") + + mu_conv = 0.3 + var_conv = 0.4 + analytical_mean_conv = xp.exp(mu_conv + var_conv/2) + + n_points_list = [5, 10, 15, 20, 25, 30, 40, 50, 75, 100] + + print(" n_points | Mean | Rel. Error") + print(" -----------|-------------|------------") + + for n in n_points_list: + result = compute_variance_gauss_hermite( + mu_conv, var_conv, transform_func, n_points=n + ) + rel_error = abs(result['mean'] - analytical_mean_conv) / analytical_mean_conv + + print(f" {n:8d} | {result['mean']:10.6f} | {rel_error:.3e}") + + print() + print("=" * 80) + print("Test completed successfully!") + print("All tests show that Gaussian quadrature accurately approximates") + print("the moments of log-normal distributions obtained through gamma") + print("prior rescaling transformations.") + print("=" * 80) + diff --git a/src/dalia/prior_hyperparameters/gaussian.py b/src/dalia/prior_hyperparameters/gaussian.py index f6d7ab6e..f7e5b227 100644 --- a/src/dalia/prior_hyperparameters/gaussian.py +++ b/src/dalia/prior_hyperparameters/gaussian.py @@ -1,6 +1,7 @@ # Copyright 2024-2025 DALIA authors. All rights reserved. import numpy as np from scipy.sparse import spmatrix +from dalia import sp, xp from dalia import NDArray from dalia.configs.priorhyperparameters_config import ( @@ -10,19 +11,104 @@ class GaussianPriorHyperparameters(PriorHyperparameters): - """Gaussian prior hyperparameters.""" + """ + Univariate Gaussian prior hyperparameters. + + This class implements prior hyperparameters following a univariate normal + (Gaussian) distribution with specified mean and precision (inverse variance). + + Parameters + ---------- + config : GaussianPriorHyperparametersConfig + Configuration object containing mean and precision parameters. + + Attributes + ---------- + mean : float + Mean of the Gaussian distribution. + precision : float + Precision (inverse variance) of the Gaussian distribution. + normalizing_constant : float + Precomputed normalizing constant for log probability evaluation. + """ def __init__( self, config: GaussianPriorHyperparametersConfig, ) -> None: - """Initializes the Gaussian prior hyperparameters.""" + """ + Initialize the Gaussian prior hyperparameters. + + Parameters + ---------- + config : GaussianPriorHyperparametersConfig + Configuration containing mean and precision parameters. + + Raises + ------ + ValueError + If the precision is not positive. + """ super().__init__(config) self.mean: float = config.mean self.precision: float = config.precision + # Validate precision is positive + if self.precision <= 0: + raise ValueError(f"Precision must be positive, got {self.precision}") + + self.normalizing_constant = -0.5 * xp.log(2 * xp.pi) + 0.5 * xp.log( + self.precision + ) + + def rescale_hyperparameters_to_internal(self, theta, direction): + """ + Rescale hyperparameters between internal and external representations. + + Parameters + ---------- + theta : NDArray or float + Hyperparameter values to rescale. + direction : str + Direction of rescaling ('forward' or 'backward', i.e. from interpretable/user/external to internal or vice versa). + + Returns + ------- + NDArray or float + Rescaled hyperparameter values. In this case it is the identity, therefore unchanged. + + Notes + ----- + For Gaussian priors, the rescaling is the identity function since the internal and external representations are the same. + """ + return super().rescale_hyperparameters_to_internal(theta, direction) + def evaluate_log_prior(self, theta: float, **kwargs) -> float: - """Evaluate the log prior hyperparameters.""" + """ + Evaluate the log prior probability density. + + Computes the log probability density of the univariate normal + distribution at the given theta value. + + Parameters + ---------- + theta : float + Parameter value at which to evaluate the log prior. + **kwargs + Additional keyword arguments (unused). + + Returns + ------- + float + Log prior probability density at theta. - return -0.5 * self.precision * (theta - self.mean) ** 2 + Notes + ----- + The computation follows: + log p(θ) = C - 0.5 * τ * (θ - μ)² + where C is the normalizing constant, τ is precision, and μ is the mean. + """ + return ( + self.normalizing_constant - 0.5 * self.precision * (theta - self.mean) ** 2 + ) diff --git a/src/dalia/prior_hyperparameters/gaussian_mvn.py b/src/dalia/prior_hyperparameters/gaussian_mvn.py index 0a71e1e9..bbec0e6e 100644 --- a/src/dalia/prior_hyperparameters/gaussian_mvn.py +++ b/src/dalia/prior_hyperparameters/gaussian_mvn.py @@ -10,13 +10,44 @@ class GaussianMVNPriorHyperparameters(PriorHyperparameters): - """Gaussian MVN prior hyperparameters.""" + """ + Gaussian multivariate normal (MVN) prior hyperparameters. + + This class implements prior hyperparameters following a multivariate normal + distribution with specified mean and precision matrix. + + Parameters + ---------- + config : GaussianMVNPriorHyperparametersConfig + Configuration object containing mean and precision matrix. + + Attributes + ---------- + mean : NDArray + Mean vector of the multivariate normal distribution. + precision : spmatrix + Precision matrix (inverse covariance) of the distribution. + normalizing_constant : float + Precomputed normalizing constant for log probability evaluation. + """ def __init__( self, config: GaussianMVNPriorHyperparametersConfig, ) -> None: - """Initializes the Gaussian MVN prior hyperparameters.""" + """ + Initialize the Gaussian MVN prior hyperparameters. + + Parameters + ---------- + config : GaussianMVNPriorHyperparametersConfig + Configuration containing mean vector and precision matrix. + + Raises + ------ + ValueError + If the precision matrix is not positive definite. + """ super().__init__(config) self.mean: NDArray = config.mean @@ -29,8 +60,58 @@ def __init__( self.mean: NDArray = xp.asarray(self.mean) self.precision: sp.sparse.spmatrix = sp.sparse.csc_matrix(self.precision) - def evaluate_log_prior(self, theta: float, **kwargs) -> float: - """Evaluate the log prior hyperparameters.""" + sign, logabsdet = np.linalg.slogdet(self.precision.toarray()) + if sign != 1: + raise ValueError("Precision matrix must be positive definite.") + + self.normalizing_constant = ( + -0.5 * self.mean.shape[0] * xp.log(2 * xp.pi) + 0.5 * logabsdet + ) + print("Normalizing constant: ", self.normalizing_constant) + + def rescale_hyperparameters_to_internal(self, theta, direction): + """ + Rescale hyperparameters between internal and external/user representations. + + Parameters + ---------- + theta : NDArray + Hyperparameter values to rescale. + direction : str + Direction of rescaling ('forward' or 'backward'). + + Returns + ------- + NDArray + Rescaled hyperparameter values, which is the identity in this case, therefore unchanged. + """ + return super().rescale_hyperparameters_to_internal(theta, direction) + + def evaluate_log_prior(self, theta: NDArray, **kwargs) -> float: + """ + Evaluate the log prior probability density. + + Computes the log probability density of the multivariate normal + distribution at the given theta values. + + Parameters + ---------- + theta : NDArray + Parameter values at which to evaluate the log prior. + Must have the same shape as the mean vector. + **kwargs + Additional keyword arguments (unused). + + Returns + ------- + float + Log prior probability density at theta. + + Raises + ------ + ValueError + If theta and mean have incompatible shapes. + """ # TODO: add check in config or somewhere else that dim(theta) and dim(mean) match if self.mean.shape != theta.shape: @@ -39,7 +120,11 @@ def evaluate_log_prior(self, theta: float, **kwargs) -> float: ) if isinstance(self.mean, float): - return -0.5 * self.precision * (theta - self.mean) ** 2 + return ( + self.normalizing_constant + - 0.5 * self.precision * (theta - self.mean) ** 2 + ) else: - # neglect constant as the precision is fixed - return -0.5 * (theta - self.mean).T @ self.precision @ (theta - self.mean) + return self.normalizing_constant - 0.5 * ( + theta - self.mean + ).T @ self.precision @ (theta - self.mean) diff --git a/src/dalia/prior_hyperparameters/inverse_gamma.py b/src/dalia/prior_hyperparameters/inverse_gamma.py new file mode 100644 index 00000000..aa9bc403 --- /dev/null +++ b/src/dalia/prior_hyperparameters/inverse_gamma.py @@ -0,0 +1,192 @@ +# Copyright 2024-2025 DALIA authors. All rights reserved. +from dalia import sp, xp + +import numpy as np + +from dalia.configs.priorhyperparameters_config import ( + InverseGammaPriorHyperparametersConfig, +) +from dalia.core.prior_hyperparameters import PriorHyperparameters + + +class InverseGammaPriorHyperparameters(PriorHyperparameters): + """Inverse Gamma prior hyperparameters. + + p(theta) = (beta^alpha / Gamma(alpha)) * (1/theta)^(alpha + 1) * exp(-beta / theta) + + and in log scale: + log p(theta) = alpha * log(beta) - log(Gamma(alpha)) - (alpha + 1) * log(theta) - beta / theta + + where theta is typically a positive parameter such as a variance. + + Parameters + ---------- + config : InverseGammaPriorHyperparametersConfig + Configuration object containing alpha and beta parameters. + + Attributes + ---------- + alpha : float. alpha > 0 + Shape parameter of the Gamma distribution. + beta : float. beta > 0 + Rate parameter of the Gamma distribution. + normalizing_constant : float + Precomputed normalizing constant for log probability evaluation. + """ + + def __init__( + self, + config: InverseGammaPriorHyperparametersConfig, + ) -> None: + """ + Initialize the Inverse Gamma prior hyperparameters. + + Parameters + ---------- + config : InverseGammaPriorHyperparametersConfig + Configuration containing alpha (shape) and beta (rate) parameters. + + Raises + ------ + ValueError + If alpha or beta are not positive. + """ + super().__init__(config) + + self.alpha: float = config.alpha + self.beta: float = config.beta + + # Validate alpha and beta are positive + if self.alpha <= 0: + raise ValueError(f"Alpha must be positive, got {self.alpha}") + if self.beta <= 0: + raise ValueError(f"Beta must be positive, got {self.beta}") + + self.normalizing_constant: float = self.alpha * xp.log(self.beta) - float( + sp.special.gammaln(self.alpha) + ) + + def rescale_hyperparameters_to_internal(self, theta, direction): + """ + Transform between external and internal parameter representations. + + The Inverse Gamma distribution is defined for positive values, but the optimization + happens in an unconstrained space. This method transforms + between theta (positive) and log(theta) (unconstrained). + + Parameters + ---------- + theta : float or NDArray + Parameter value(s) to transform. + direction : str + Transformation direction: + - "forward": theta -> log(theta) (external to internal) + - "backward": log(theta) -> theta (internal to external) + - "forward jacobian": derivative of forward transformation + - "backward jacobian": 1 / derivative of forward transformation + + Returns + ------- + float or NDArray + Transformed parameter value(s). + + Raises + ------ + ValueError + If direction is not "forward" or "backward". + """ + if direction == "forward": + theta_scaled = xp.log(theta) + elif direction == "backward": + theta_scaled = xp.exp(theta) + elif direction == "forward_jacobian": + theta_scaled = 1 / theta # d(log(theta))/d(theta) = 1/theta + elif direction == "backward_jacobian": + theta_scaled = theta + else: + raise ValueError(f"Unknown direction: {direction}") + + return theta_scaled + + def evaluate_log_prior(self, theta: float, **kwargs) -> float: + """ + Evaluate the log prior probability density. + + Computes the log probability density of the Inverse Gamma distribution + at the given theta value in its external/user representation. + + Parameters + ---------- + theta : float + Parameter value at which to evaluate the log prior. + Must be positive (external/user representation). + **kwargs + Additional keyword arguments (unused). + + Returns + ------- + float + Log prior probability density at theta. + + Notes + ----- + The computation follows: + log p(θ) = C - (α + 1) * log(θ) - β / θ + where C is the normalizing constant, α is the shape parameter, + and β is the rate parameter. + + Raises + ------ + ValueError + If theta is not positive (implicitly through log computation). + """ + + log_prior = ( + self.normalizing_constant + - (self.alpha + 1) * xp.log(theta) + - self.beta / theta + ) + + return log_prior + +if __name__ == "__main__": + """ + Test Gaussian quadrature for functions using rescale_hyperparameters_to_internal() + from gamma prior hyperparameters. + + We start with normally distributed random variables in internal space (unconstrained) + that get reparametrized to external space (positive) using the gamma prior's + rescaling function. + """ + + from dalia.utils.gaussian_quadrature import compute_variance_gauss_hermite + + print("=" * 80) + print("Testing Gaussian Quadrature with Inverse Gamma Prior Rescaling") + print("=" * 80) + + # Create a inverse gamma prior configuration + alpha_values = [1.0, 3.0, 5.0] + beta_values = [0.5, 1.0, 2.0] + + for alpha, beta in zip(alpha_values, beta_values): + print(f"\nTesting alpha={alpha}, beta={beta}") + config = InverseGammaPriorHyperparametersConfig(alpha=alpha, beta=beta) + inverse_gamma_prior = InverseGammaPriorHyperparameters(config=config) + + ## compare against scipy implementation + from scipy.stats import invgamma + + test_values = [0.1, 0.5, 1.0, 2.0, 5.0] + print("Comparing log prior evaluations with scipy.stats.invgamma:") + for val in test_values: + logp_dalia = inverse_gamma_prior.evaluate_log_prior(val) + logp_scipy = invgamma.logpdf(val, a=alpha, scale=beta) + print(f" θ = {val:4.1f}: DALIA logp = {logp_dalia:.6f}, " + f"scipy logp = {logp_scipy:.6f}, diff = {abs(logp_dalia - logp_scipy):.2e}") + if abs(logp_dalia - logp_scipy) > 1e-6: + raise ValueError("Log prior evaluation does not match scipy implementation.") + + print() + print("All tests passed!") + diff --git a/src/dalia/prior_hyperparameters/penalized_complexity.py b/src/dalia/prior_hyperparameters/penalized_complexity.py index 2c9788d2..158562d0 100644 --- a/src/dalia/prior_hyperparameters/penalized_complexity.py +++ b/src/dalia/prior_hyperparameters/penalized_complexity.py @@ -44,6 +44,13 @@ def __init__( # print("lambda_theta: ", self.lambda_theta) + def rescale_hyperparameters_to_internal(self, theta, direction): + """Rescale hyperparameters to and from internal scale. + + TODO: Implement the re-scaling. + """ + return super().rescale_hyperparameters_to_internal(theta, direction) + def evaluate_log_prior(self, theta: float, **kwargs) -> float: """Evaluate the prior hyperparameters.""" log_prior: float = 0.0 diff --git a/src/dalia/solvers/distributed_structured_solver.py b/src/dalia/solvers/distributed_structured_solver.py index 487ee3b4..b9f3f4d9 100644 --- a/src/dalia/solvers/distributed_structured_solver.py +++ b/src/dalia/solvers/distributed_structured_solver.py @@ -279,6 +279,9 @@ def solve( synchronize(comm=self.comm) tic = time.perf_counter() + # Store the original shape of rhs to reshape the solution back after solving + in_rhs_shape = rhs.shape + # Ensure rhs is a 2D array if rhs.ndim == 1: rhs = rhs[:, None] @@ -331,6 +334,10 @@ def solve( toc = time.perf_counter() self.t_solve += toc - tic + # Reshape the solution back to the original shape if needed + if in_rhs_shape != rhs.shape: + rhs = rhs.reshape(in_rhs_shape) + return rhs def logdet( diff --git a/src/dalia/solvers/sparse_solver.py b/src/dalia/solvers/sparse_solver.py index 5e1ab78a..05de2145 100644 --- a/src/dalia/solvers/sparse_solver.py +++ b/src/dalia/solvers/sparse_solver.py @@ -25,6 +25,7 @@ def __init__( super().__init__(config) self.LU_factor = None # Store the LU factorization object + self.A_inv = None # Store the inverse of A if needed # Solver Metrics self.t_factorize = 0.0 @@ -130,24 +131,24 @@ def logdet( return float(log_det_U) - def selected_inversion(self, **kwargs): - """Compute selected inversion of input matrix using LU factorization. - - Raises: - ------ - NotImplementedError - Selected inversion is not implemented for SparseSolver. - """ - raise NotImplementedError( - "Selected inversion is not implemented for SparseSolver." + def selected_inversion(self, **kwargs) -> NDArray: + L_inv = sp.linalg.solve_triangular( + self.LU_factor.L, xp.eye(self.LU_factor.L.shape[0]), lower=True, overwrite_b=False ) + U_inv = sp.linalg.solve_triangular( + self.LU_factor.U, xp.eye(self.LU_factor.U.shape[0]), lower=False, overwrite_b=False + ) + self.A_inv = U_inv @ L_inv - def _structured_to_spmatrix(self, **kwargs) -> None: - """Convert structured matrix to sparse matrix. + return self.A_inv - For SparseSolver, this is a no-op since it works directly with sparse matrices. - """ - pass + def _structured_to_spmatrix( + self, A: sp.sparse.spmatrix, **kwargs + ) -> sp.sparse.spmatrix: + B = A.tocoo() + B.data = self.A_inv[B.row, B.col] + + return B def get_solver_memory(self) -> int: """Return the memory used by the solver in number of bytes""" @@ -165,4 +166,9 @@ def get_solver_memory(self) -> int: + self.LU_factor.U.indptr.nbytes + self.LU_factor.U.indices.nbytes ) + + if self.A_inv is not None: + A_inv_memory = self.A_inv.nbytes + return L_memory + U_memory + A_inv_memory + return L_memory + U_memory diff --git a/src/dalia/submodels/__init__.py b/src/dalia/submodels/__init__.py index 64e0216b..83614b94 100644 --- a/src/dalia/submodels/__init__.py +++ b/src/dalia/submodels/__init__.py @@ -4,10 +4,15 @@ from dalia.submodels.regression import RegressionSubModel from dalia.submodels.spatial import SpatialSubModel from dalia.submodels.spatio_temporal import SpatioTemporalSubModel +from dalia.submodels.brainiac import BrainiacSubModel +from dalia.submodels.ar1 import AR1SubModel +from dalia.submodels.generic import GenericSubModel __all__ = [ "RegressionSubModel", "SpatialSubModel", "SpatioTemporalSubModel", "BrainiacSubModel", + "AR1SubModel", + "GenericSubModel", ] diff --git a/src/dalia/submodels/ar1.py b/src/dalia/submodels/ar1.py new file mode 100644 index 00000000..41a5ce25 --- /dev/null +++ b/src/dalia/submodels/ar1.py @@ -0,0 +1,71 @@ +# Copyright 2024-2025 DALIA authors. All rights reserved. +from tabulate import tabulate + +import numpy as np + +from dalia import sp, xp +from dalia.configs.submodels_config import AR1SubModelConfig +from dalia.core.submodel import SubModel +from dalia.utils import add_str_header + + +class AR1SubModel(SubModel): + """Fit an AR(1) model.""" + + def __init__( + self, + config: AR1SubModelConfig, + ) -> None: + """Initializes the model.""" + super().__init__(config) + + # check that dimensions match + + + def construct_Q_prior(self, **kwargs) -> sp.sparse.coo_matrix: + """Construct the prior precision matrix.""" + + # kwargs expects hyperparameters in external scale + phi = kwargs.get("phi") + tau = kwargs.get("tau") + + s2 = 1 / tau + denom = s2 * (1 - phi**2) + + diag = [(1 + phi**2) / denom] * self.n_latent_parameters + diag[0] = diag[-1] = 1 / denom + off_diag = [-phi / denom] * (self.n_latent_parameters - 1) + + Q_prior = sp.sparse.diags([off_diag, diag, off_diag], [-1, 0, 1]) + + # need this -> otherwise there might be a sorting issue + Q_prior = Q_prior.tocsr() + Q_prior.sort_indices() + + return Q_prior.tocoo() + + def __str__(self) -> str: + """String representation of the submodel.""" + str_representation = "" + + # --- Make the Submodel table --- + values = [ + ["Submodel Type", self.submodel_type], + ["Number of Latent Parameters", self.n_latent_parameters], + ["Phi", f"{self.config.phi:.3f}"], + ["tau", f"{self.config.tau:.3f}"], + ] + submodel_table = tabulate( + values, + tablefmt="fancy_grid", + colalign=("left", "center"), + ) + + # Add the header title + submodel_table = add_str_header( + title=self.submodel_type.replace("_", " ").title(), + table=submodel_table, + ) + str_representation += submodel_table + + return str_representation diff --git a/src/dalia/submodels/brainiac.py b/src/dalia/submodels/brainiac.py index 61e01bdf..21100c1c 100644 --- a/src/dalia/submodels/brainiac.py +++ b/src/dalia/submodels/brainiac.py @@ -50,23 +50,13 @@ def _check_dimensions_matrices(self) -> None: self.z.shape[0] == self.a.shape[1] ), f"Numbers rows in z ({self.z.shape[0]}) must match number of columns in a ({self.a.shape[1]})." - def rescale_hyperparameters_to_interpret(self, theta: NDArray) -> NDArray: - h2_scaled = theta[0] - # rescale h2 to (0,1) as it's currently between -INF:+INF - h2 = scaled_logit(h2_scaled, direction="backward") - - theta_interpret = xp.concatenate((xp.array([h2]), theta[1:])) - return theta_interpret def construct_Q_prior(self, **kwargs) -> sp.sparse.coo_matrix: """Construct the prior precision matrix.""" # Extract all alpha_x values and put them into an array alpha_keys = sorted([key for key in kwargs if key.startswith("alpha_")]) alpha = xp.array([kwargs[key] for key in alpha_keys]) - h2_scaled = kwargs.get("h2") - - # rescale h2 to (0,1) as it's currently between -INF:+INF - h2 = scaled_logit(h2_scaled, direction="backward") + h2 = kwargs.get("h2") # \Phi = 1 / \sum_k=1^B exp(Z^k \alpha) * diag(exp(Z_1 \alpha), exp(Z_2 \alpha), ... ) exp_Z_alpha = xp.exp(self.z @ alpha) @@ -85,9 +75,7 @@ def construct_Q_prior(self, **kwargs) -> sp.sparse.coo_matrix: def evaluate_likelihood(self, eta: NDArray, y: NDArray, **kwargs) -> float: n_observations = y.shape[0] - h2_scaled = kwargs.get("h2") - # rescale h2 to (0,1) as it's currently between -INF:+INF - h2 = scaled_logit(h2_scaled, direction="backward") + h2 = kwargs.get("h2") if h2 == 1: raise ValueError("h2 is 1. Will lead to division by zero.") @@ -101,10 +89,8 @@ def evaluate_likelihood(self, eta: NDArray, y: NDArray, **kwargs) -> float: def evaluate_gradient_likelihood( self, eta: NDArray, y: NDArray, **kwargs ) -> NDArray: - h2_scaled = kwargs.get("h2") + h2 = kwargs.get("h2") - # rescale h2 to (0,1) as it's currently between -INF:+INF - h2 = scaled_logit(h2_scaled, direction="backward") if h2 == 1: raise ValueError("h2 is 1. Will lead to division by zero.") @@ -113,9 +99,7 @@ def evaluate_gradient_likelihood( return gradient def evaluate_d_matrix(self, **kwargs) -> NDArray: - h2_scaled = kwargs.get("h2") - # rescale h2 to (0,1) as it's currently between -INF:+INF - h2 = scaled_logit(h2_scaled, direction="backward") + h2 = kwargs.get("h2") if h2 == 1: raise ValueError("h2 is 1. Will lead to division by zero.") diff --git a/src/dalia/submodels/generic.py b/src/dalia/submodels/generic.py new file mode 100644 index 00000000..8340709d --- /dev/null +++ b/src/dalia/submodels/generic.py @@ -0,0 +1,82 @@ +# Copyright 2024-2026 DALIA authors. All rights reserved. +from tabulate import tabulate + +import numpy as np +from scipy.sparse import load_npz, spmatrix + +from dalia import sp, xp, NDArray +from dalia.configs.submodels_config import GenericSubModelConfig +from dalia.core.submodel import SubModel +from dalia.utils import add_str_header + + +class GenericSubModel(SubModel): + """Fit a generic model meaning the precision matrix of the latent parameters is directly specified by the user. The mean is assumed to be zero.""" + + def __init__( + self, + config: GenericSubModelConfig, + ) -> None: + """Initializes the model.""" + super().__init__(config) + + self.tau = self.config.tau + + # accept sparse or dense input for the precision matrix + try: + q: spmatrix = load_npz(self.input_path.joinpath("q.npz")) + self.q = sp.sparse.coo_matrix(q) + except FileNotFoundError: + # check if dense q matrix exists + try: + q: NDArray = np.load(self.input_path.joinpath("q.npy")) + if xp == np: + self.q: NDArray = q + else: + self.q: NDArray = xp.array(q) + except FileNotFoundError: + raise FileNotFoundError( + "No precision matrix found for generic model. Please provide a valid precision matrix." + ) + + # Check that q shape match number of latent parameters + assert ( + self.q.shape[0] == self.q.shape[1] == self.n_latent_parameters + ), f"Precision matrix has {self.q.shape[0]} rows and {self.q.shape[1]} columns, but expected {self.n_latent_parameters} rows and columns." + + print( + f"Successfully loaded precision matrix of shape {self.q.shape} for generic model." + ) + + def construct_Q_prior(self, **kwargs) -> sp.sparse.coo_matrix: + """Construct the prior precision matrix.""" + + tau = kwargs.get("tau") + self.Q_prior = tau * self.q + + return self.Q_prior + + def __str__(self) -> str: + """String representation of the submodel.""" + str_representation = "" + + # --- Make the Submodel table --- + values = [ + ["Submodel Type", self.submodel_type], + ["Number of Latent Effects", self.n_latent_parameters], + ["tau", f"{self.config.tau:.3f}"], + ] + submodel_table = tabulate( + values, + tablefmt="fancy_grid", + colalign=("left", "center"), + ) + + # Add the header title + submodel_table = add_str_header( + title=self.submodel_type.replace("_", " ").title(), + table=submodel_table, + ) + str_representation += submodel_table + + return str_representation diff --git a/src/dalia/submodels/regression.py b/src/dalia/submodels/regression.py index 986d05e2..e009359c 100644 --- a/src/dalia/submodels/regression.py +++ b/src/dalia/submodels/regression.py @@ -17,7 +17,10 @@ def __init__( """Initializes the model.""" super().__init__(config) - self.n_fixed_effects: int = config.n_fixed_effects + if config.n_fixed_effects is None: + self.n_fixed_effects = self.n_latent_parameters + else: + self.n_fixed_effects = config.n_fixed_effects self.fixed_effects_prior_precision: float = config.fixed_effects_prior_precision # Check that design_matrix shape match number of fixed effects diff --git a/src/dalia/utils/__init__.py b/src/dalia/utils/__init__.py index 9ce6d36f..fd63b5b7 100644 --- a/src/dalia/utils/__init__.py +++ b/src/dalia/utils/__init__.py @@ -12,6 +12,9 @@ ) from dalia.utils.host import get_host_configuration from dalia.utils.link_functions import cloglog, scaled_logit, sigmoid +from dalia.utils.correlation import compute_outer_covariance_matrix +from dalia.utils.gaussian_quadrature import compute_variance_gauss_hermite +from dalia.utils.bivariate_gaussian_quadrature import compute_bivariate_expectation from dalia.utils.multiprocessing import ( DummyCommunicator, allgather, @@ -22,6 +25,7 @@ smartsplit, synchronize, synchronize_gpu, + check_vector_consistency, ) from dalia.utils.print_utils import ( add_str_header, @@ -30,6 +34,8 @@ boxify, ) from dalia.utils.spmatrix_utils import bdiag_tiling, extract_diagonal, memory_footprint +from dalia.utils.print_utils import add_str_header, align_tables_side_by_side, boxify, ascii_logo +from dalia.utils.plotting import plot_marginal_distributions_hp, plot_prior_hp from .scalar_ndarray import ensure_scalar __all__ = [ @@ -42,6 +48,9 @@ "sigmoid", "cloglog", "scaled_logit", + "compute_outer_covariance_matrix", + "compute_variance_gauss_hermite", + "compute_bivariate_expectation", "print_msg", "synchronize", "synchronize_gpu", @@ -50,6 +59,7 @@ "allreduce", "allgather", "bcast", + "check_vector_consistency", "bdiag_tiling", "extract_diagonal", "memory_footprint", @@ -61,4 +71,6 @@ "memory_report", "format_size", "DummyCommunicator", + "plot_marginal_distributions_hp", + "plot_prior_hp", ] diff --git a/src/dalia/utils/bivariate_gaussian_quadrature.py b/src/dalia/utils/bivariate_gaussian_quadrature.py new file mode 100644 index 00000000..d9a113fb --- /dev/null +++ b/src/dalia/utils/bivariate_gaussian_quadrature.py @@ -0,0 +1,254 @@ +# Copyright 2024-2025 DALIA authors. All rights reserved. + +import numpy as np +from dalia import xp +from scipy.special import roots_hermite + + +def compute_bivariate_expectation(func1, func2, mu1, mu2, Sigma, n_points=20): + """ + Compute E[f(Z₁, Z₂)] where (Z₁, Z₂) ~ N(0, Σ) using bivariate Gauss-Hermite quadrature. + + E[f(Z₁, Z₂)] = ∬ f(z₁, z₂) φ_ρ(z₁, z₂) dz₁ dz₂ + + where φ_ρ(z₁, z₂) is the bivariate standard normal density with correlation ρ. + + Parameters + ---------- + func : callable + Function f(z₁, z₂) to compute expectation of + rho : float, optional + Correlation coefficient between Z₁ and Z₂ (default: 0.0) + n_points : int, optional + Number of quadrature points per dimension (default: 20) + + Returns + ------- + float + Expected value E[f(Z₁, Z₂)] + """ + # Get Gauss-Hermite quadrature points and weights + nodes, weights = roots_hermite(n_points) + + nodes = xp.array(nodes) + weights = xp.array(weights) + + # Transform nodes from Hermite polynomial roots to standard normal + z_nodes = xp.sqrt(2) * nodes + adjusted_weights = weights / xp.sqrt(xp.pi) + + rho = Sigma[0, 1] # Correlation coefficient + sigma1 = xp.sqrt(Sigma[0, 0]) + sigma2 = xp.sqrt(Sigma[1, 1]) + + # For bivariate case with correlation ρ and means μ₁, μ₂: + # If (U₁, U₂) are independent N(0,1), then: + # Z₁ = μ₁ + U₁ + # Z₂ = μ₂ + ρU₁ + √(1-ρ²)U₂ + # gives (Z₁, Z₂) ~ N([μ₁, μ₂], [[1, ρ], [ρ, 1]]) + + if xp.abs(rho) > 1: + raise ValueError("Correlation coefficient rho must be in [-1, 1]") + + sqrt_one_minus_rho_sq = xp.sqrt(1 - rho**2) + + expectation = 0.0 + + # Double loop over all combinations of quadrature points + for i, (u1, w1) in enumerate(zip(z_nodes, adjusted_weights)): + for j, (u2, w2) in enumerate(zip(z_nodes, adjusted_weights)): + # Transform to correlated variables + z1 = mu1 + sigma1 * u1 + z2 = mu2 + rho * sigma1 * u1 + sigma2 * sqrt_one_minus_rho_sq * u2 + + # Evaluate function at transformed points + f_val = func1(z1) * func2(z2) + + # Combined weight (product of univariate weights) + weight = w1 * w2 + + expectation += weight * f_val + + return expectation + + +def test_quadrature_accuracy(): + """Test quadrature accuracy against known analytical results.""" + + print("=" * 80) + print("Testing Bivariate Gaussian Quadrature") + print("=" * 80) + + # Test 2: Bivariate expectations with independence (ρ = 0) + print("2. Testing bivariate expectations with independence (ρ = 0):") + + # E[1] = 1 + biv_expectation_1 = compute_bivariate_expectation(lambda z: 1.0, lambda z: 1.0, rho=0.0, n_points=10) + print(f" E[1] = {biv_expectation_1:.8f} (should be 1.0, error: {abs(1.0 - biv_expectation_1):.2e})") + + # E[Z₁Z₂] = 0 when independent + biv_expectation_z1z2 = compute_bivariate_expectation(lambda z: z, lambda z: z, rho=0.0, n_points=20) + print(f" E[Z₁Z₂] = {biv_expectation_z1z2:.8f} (should be 0.0, error: {abs(biv_expectation_z1z2):.2e})") + + # E[Z₁² + Z₂²] = E[Z₁²] + E[Z₂²] = 1 + 1 = 2 (using independence) + biv_expectation_sq = compute_bivariate_expectation(lambda z: z**2, lambda z: z**2, rho=0.0, n_points=20) + print(f" E[Z₁² + Z₂²] = {biv_expectation_sq:.8f} (should be 2.0, error: {abs(2.0 - biv_expectation_sq):.2e})") + print() + + # Test 3: Product expectations with independence + print("3. Testing product expectations E[f₁(Z₁)f₂(Z₂)] with independence:") + + # E[Z₁ * Z₂] = E[Z₁] * E[Z₂] = 0 * 0 = 0 when independent + prod_exp_z1z2 = compute_bivariate_expectation(lambda z: z, lambda z: z, rho=0.0, n_points=20) + print(f" E[Z₁ * Z₂] = {prod_exp_z1z2:.8f} (should be 0.0, error: {abs(prod_exp_z1z2):.2e})") + + # E[Z₁² * 1] = E[Z₁²] * E[1] = 1 * 1 = 1 + prod_exp_z1sq_1 = compute_bivariate_expectation(lambda z: z**2, lambda z: 1.0, rho=0.0, n_points=20) + print(f" E[Z₁² * 1] = {prod_exp_z1sq_1:.8f} (should be 1.0, error: {abs(1.0 - prod_exp_z1sq_1):.2e})") + + # E[exp(Z₁) * exp(Z₂)] = E[exp(Z₁)] * E[exp(Z₂)] = exp(0.5) * exp(0.5) = exp(1.0) + prod_exp_exp = compute_bivariate_expectation(lambda z: xp.exp(z), lambda z: xp.exp(z), rho=0.0, n_points=30) + analytical_prod_exp = xp.exp(1.0) + print(f" E[exp(Z₁) * exp(Z₂)] = {prod_exp_exp:.8f} (should be {analytical_prod_exp:.8f}, error: {abs(analytical_prod_exp - prod_exp_exp):.2e})") + print() + + # Test 4: Bivariate expectations with correlation + print("4. Testing bivariate expectations with correlation:") + + correlations = [-0.9, -0.5, -0.3, 0.0, 0.3, 0.5, 0.8, 0.9] + + print(" Correlation | E[Z₁Z₂] | Analytical | Error") + print(" ------------|------------|------------|----------") + + for rho in correlations: + # E[Z₁Z₂] = ρ for bivariate normal + biv_exp_corr = compute_bivariate_expectation(lambda z: z, lambda z: z, rho=rho, n_points=25) + error = abs(rho - biv_exp_corr) + print(f" {rho:10.1f} | {biv_exp_corr:10.6f} | {rho:10.6f} | {error:.2e}") + print() + + # Test 4b: Extreme correlation values (±1) + print("4b. Testing extreme correlation values (ρ = ±1):") + print(" Note: Perfect correlation means Z₂ = ±Z₁") + + extreme_correlations = [-1.0, 1.0] + + print(" Correlation | E[Z₁Z₂] | Analytical | Error | Note") + print(" ------------|------------|------------|----------|------------------") + + for rho in extreme_correlations: + # For extreme correlations, use more quadrature points for accuracy + biv_exp_corr = compute_bivariate_expectation(lambda z: z, lambda z: z, rho=rho, n_points=40) + error = abs(rho - biv_exp_corr) + note = "Perfect positive" if rho == 1.0 else "Perfect negative" + print(f" {rho:10.1f} | {biv_exp_corr:10.6f} | {rho:10.6f} | {error:.2e} | {note}") + + # Additional test: For ρ = ±1, E[Z₁²Z₂²] should equal E[Z₁⁴] = 3 + z1_sq_z2_sq = compute_bivariate_expectation(lambda z: z**2, lambda z: z**2, rho=rho, n_points=40) + expected_z1_4 = 3.0 # Fourth moment of standard normal + error_z4 = abs(z1_sq_z2_sq - expected_z1_4) + print(f" {rho:10.1f} | E[Z₁²Z₂²]={z1_sq_z2_sq:6.4f} | E[Z₁⁴]={expected_z1_4:6.1f} | {error_z4:.2e} | Should equal E[Z₁⁴]") + + print() + + # Test 5: Product expectations with correlation + print("5. Testing product expectations with correlation:") + print(" For E[f₁(Z₁)f₂(Z₂)], correlation affects the result when f₁ and f₂ are nonlinear") + + def f1(z): + return z**2 + + def f2(z): + return z**3 + + def f_prod(z): + return f1(z[0]) * f2(z[1]) + + print(" E[Z₁² * Z₂³] for different correlations:") + # print(" Correlation | E[Z₁²Z₂³]") + # print(" ------------|----------") + print(" Correlation | E[Z₁²Z₂³] Numerical | E[Z₁²Z₂³] GHQ | Error") + + for rho in [-0.9, -0.5, 0.0, 0.5, 0.9]: + prod_exp_nonlinear = compute_bivariate_expectation(f1, f2, rho=rho, n_points=30) + + Sigma = xp.array([[1.0, rho], [rho, 1.0]]) + mu = xp.array([0.0, 0.0]) + import ghq + + prod_ref = ghq.multivariate(f_prod, mu, Sigma, n_points=30) + error = abs(prod_exp_nonlinear - prod_ref) + + print(f" {rho:10.1f} | {prod_exp_nonlinear:10.6f} | {prod_ref:10.6f} | {error:.2e}") + print() + + # Test 6: Convergence study + print("6. Convergence study (increasing number of quadrature points):") + + # For this test, we approximate E[exp(0.1*Z₁ + 0.2*Z₂)] using E[exp(0.1*Z₁) * exp(0.2*Z₂)] + # This is exact since exp(a+b) = exp(a)*exp(b) + def func1_test(z): + return xp.exp(0.1 * z) + + def func2_test(z): + return xp.exp(0.2 * z) + + # Analytical result for E[exp(aZ₁ + bZ₂)] with (Z₁,Z₂) ~ N(0, Σ) + # For a=0.1, b=0.2, ρ=0.5: E[exp(aZ₁ + bZ₂)] = exp(0.5 * (a² + b² + 2abρ)) + a, b, rho_test = 0.1, 0.2, 0.5 + analytical_mgf = xp.exp(0.5 * (a**2 + b**2 + 2*a*b*rho_test)) + + print(f" Testing E[exp(0.1*Z₁) * exp(0.2*Z₂)] with ρ = {rho_test}") + print(f" Analytical result: {analytical_mgf:.8f}") + print() + print(" n_points | Numerical | Error") + print(" ---------|--------------|----------") + + for n in [5, 10, 15, 20, 25, 30]: + numerical_mgf = compute_bivariate_expectation(func1_test, func2_test, rho=rho_test, n_points=n) + error = abs(analytical_mgf - numerical_mgf) + print(f" {n:7d} | {numerical_mgf:12.8f} | {error:.2e}") + + print() + + # Test 7: Practical example with transformations + print("7. Practical example: Log-normal random variables") + print(" Let Y₁ = exp(Z₁), Y₂ = exp(Z₂) where (Z₁, Z₂) have correlation ρ") + print(" Computing E[Y₁ * Y₂] = E[exp(Z₁ + Z₂)]") + + rho_examples = [-0.8, -0.3, 0.0, 0.5, 0.9] + + print(" Correlation | E[Y₁Y₂] Numerical | E[Y₁Y₂] Analytical | Error") + print(" ------------|------------------|-------------------|----------") + + for rho in rho_examples: + # Numerical computation + numerical_lognormal = compute_bivariate_expectation( + lambda z: xp.exp(z), lambda z: xp.exp(z), + rho=rho, n_points=30 + ) + + # Analytical: E[exp(Z₁ + Z₂)] = exp(E[Z₁ + Z₂] + 0.5*Var[Z₁ + Z₂]) + # E[Z₁ + Z₂] = 0, Var[Z₁ + Z₂] = Var[Z₁] + Var[Z₂] + 2*Cov[Z₁,Z₂] = 1 + 1 + 2*ρ = 2 + 2*ρ + analytical_lognormal = xp.exp(0.5 * (2 + 2*rho)) + + error = abs(numerical_lognormal - analytical_lognormal) + + print(f" {rho:10.1f} | {numerical_lognormal:16.8f} | {analytical_lognormal:17.8f} | {error:.2e}") + + print() + print("=" * 80) + print("Bivariate Gaussian Quadrature Test Summary:") + print("✓ Univariate expectations computed accurately") + print("✓ Bivariate expectations with independence verified") + print("✓ Product expectations working correctly") + print("✓ Correlation effects properly captured") + print("✓ Convergence behavior as expected") + print("✓ Practical log-normal example validated") + print() + print("All bivariate Gaussian quadrature functions are working correctly!") + print("=" * 80) + + +if __name__ == "__main__": + test_quadrature_accuracy() \ No newline at end of file diff --git a/src/dalia/utils/correlation.py b/src/dalia/utils/correlation.py new file mode 100644 index 00000000..80dc4c0c --- /dev/null +++ b/src/dalia/utils/correlation.py @@ -0,0 +1,594 @@ +# Copyright 2024-2025 DALIA authors. All rights reserved. + +import numpy as np +from dalia import xp +import matplotlib.pyplot as plt + +# Import our quadrature functions +from dalia.utils.gaussian_quadrature import compute_variance_gauss_hermite +from dalia.utils.bivariate_gaussian_quadrature import compute_bivariate_expectation +# Import reparametrization functions +from dalia.utils.reparametrizations import ( + compute_transformed_quantiles, + compute_transformed_pdf, + compute_bounds +) + +def compute_outer_covariance_matrix(mean_internal, cov_internal, transform_list, n_points=25): + """ + Compute covariance matrix between all pairs of transformed parameters using bivariate quadrature. + The results are to be considered with caution. Covariance matrices provide reliable information in + the Gaussian context, however, not necessarily in the transformed space. + + Parameters + ---------- + mean_internal : ndarray + Mean vector of internal distribution + cov_internal : ndarray + Covariance matrix of internal distribution + transform_func : function + Transformation function that takes (theta_vector, direction) and returns transformed vector + This should be the model's rescale_hyperparameters_to_internal method + n_points : int + Number of quadrature points per dimension + + Returns + ------- + ndarray + Covariance matrix between transformed parameters (outer space) + """ + + n_dim = len(mean_internal) + outer_cov_matrix = np.zeros((n_dim, n_dim)) + + mean_outer = [] + marginal_vars = [] + + print("Computing marginal means and variances for correlation calculations...") + + for i in range(n_dim): + mu_i = mean_internal[i] + var_i = cov_internal[i, i] + + # Compute marginal statistics + result = compute_variance_gauss_hermite(mu_i, var_i, transform_list[i], n_points) + mean_outer.append(result['mean']) + marginal_vars.append(result['variance']) + + outer_cov_matrix[i, i] = result['variance'] + + print("Computing pairwise covariances...") + + # Compute pairwise covariances + for i in range(n_dim): + for j in range(n_dim): + if i < j: # Only compute upper triangle, then symmetrize, i == j already filled + print(f" Computing Cov(X_{i+1}, X_{j+1})...", end=" ") + + # Extract marginal parameters + mu_i, mu_j = mean_internal[i], mean_internal[j] + var_i, var_j = cov_internal[i, i], cov_internal[j, j] + cov_ij = cov_internal[i, j] + + # Compute correlation coefficient in internal space + rho_internal = cov_ij / np.sqrt(var_i * var_j) + + # Create transformation functions for these parameters + transform_func_i = transform_list[i] + transform_func_j = transform_list[j] + + # Standardize the variables for bivariate quadrature + def standardized_func_i(z): + x_internal = mu_i + np.sqrt(var_i) * z + return transform_func_i(x_internal, "backward") + + def standardized_func_j(z): + x_internal = mu_j + np.sqrt(var_j) * z + return transform_func_j(x_internal, "backward") + + # Compute E[f_i(Z_i) * f_j(Z_j)] using bivariate quadrature + cross_moment = compute_bivariate_expectation( + standardized_func_i, standardized_func_j, + rho=rho_internal, n_points=n_points + ) + + # Covariance: Cov(X,Y) = E[XY] - E[X]E[Y] + covariance_outer = cross_moment - mean_outer[i] * mean_outer[j] + + # Store covariance directly + outer_cov_matrix[i, j] = covariance_outer + outer_cov_matrix[j, i] = covariance_outer # Symmetric + + print(f"{covariance_outer:.6f}") + + return outer_cov_matrix + + +if __name__ == "__main__": + + """ + Main test function for multivariate transformations. + """ + + # Dummy Prior Hyperparameter Class with Log Transform + class Prior_LogTransform: + """ + Dummy implementation of gamma prior rescaling for testing purposes. + Implements log transformation: forward = log(x), backward = exp(x) + """ + def rescale_hyperparameters_to_internal(self, theta, direction): + """Log transformation between positive (external) and unconstrained (internal) space""" + if direction == "forward": + return np.log(theta) # theta -> log(theta) + elif direction == "backward": + return np.exp(theta) # log(theta) -> theta + elif direction == "forward_jacobian": + return 1.0 / theta # d(log(theta))/d(theta) = 1/theta + elif direction == "backward_jacobian": + return theta # d(exp(theta))/d(theta) = exp(theta) = theta + else: + raise ValueError(f"Unknown direction: {direction}") + + class Prior_LogisticTransform: + """ + Dummy implementation of beta prior rescaling for testing purposes. + Implements logistic transformation: forward = logit(x), backward = sigmoid(x) + """ + + def rescale_hyperparameters_to_internal(self, theta, direction): + """Logistic transformation between (0,1) and unconstrained space""" + if direction == "forward": + return np.log(theta / (1 - theta)) # logit(theta) + elif direction == "backward": + return 1 / (1 + np.exp(-theta)) # sigmoid(theta) + elif direction == "forward_jacobian": + return 1.0 / (theta * (1 - theta)) # d(logit(theta))/d(theta) + elif direction == "backward_jacobian": + sig = 1 / (1 + np.exp(-theta)) + return sig * (1 - sig) # d(sigmoid(theta))/d(theta) + else: + raise ValueError(f"Unknown direction: {direction}") + + class Prior_IdentityTransform: + """ + Dummy implementation of identity prior rescaling for testing purposes. + Implements identity transformation: forward = x, backward = x + """ + def rescale_hyperparameters_to_internal(self, theta, direction): + """Identity transformation (no change)""" + if direction in ["forward", "backward"]: + return theta + elif direction in ["forward_jacobian", "backward_jacobian"]: + return 1.0 + else: + raise ValueError(f"Unknown direction: {direction}") + + def generate_random_covariance_matrix(n_dim=3, condition_number=10.0, random_seed=42): + """ + Generate a random positive definite covariance matrix. + + Parameters + ---------- + n_dim : int + Dimension of the covariance matrix + condition_number : float + Maximum condition number (controls how ill-conditioned the matrix can be) + random_seed : int + Random seed for reproducibility + + Returns + ------- + ndarray + Random positive definite covariance matrix + """ + np.random.seed(random_seed) + + # Generate random eigenvalues between 1/condition_number and 1 + eigenvals = np.random.uniform(1.0/condition_number, 1.0, n_dim) + eigenvals = np.sort(eigenvals)[::-1] # Sort in descending order + + # Generate random orthogonal matrix (eigenvectors) + Q, _ = np.linalg.qr(np.random.randn(n_dim, n_dim)) + + # Construct covariance matrix: Σ = Q * diag(eigenvals) * Q^T + cov_matrix = Q @ np.diag(eigenvals) @ Q.T + + return cov_matrix + + print("=" * 90) + print("MULTIVARIATE TRANSFORMATION TEST") + print("Testing 3D Gaussian → Transformed Space using Gaussian Quadrature") + print("=" * 90) + + # Set parameters + n_dim = 3 + n_quad_points = 30 + + # Step 1: Generate random mean vector and covariance matrix + print("1. Generating Random 3D Gaussian Distribution") + print("-" * 50) + + np.random.seed(42) # For reproducibility + mean_internal = np.random.uniform(-1, 1, n_dim) + cov_internal = generate_random_covariance_matrix(n_dim, condition_number=5.0) + + print("Internal (Gaussian) Distribution Parameters:") + print(f"Mean vector: {mean_internal}") + print("Covariance matrix:") + print(cov_internal) + print(f"Condition number: {np.linalg.cond(cov_internal):.2f}") + print() + + # Step 2: Create transformation functions + print("2. Setting up Transformation Functions") + print("-" * 50) + + prior_log_transform = Prior_LogTransform() + + def log_transform_func(x, direction): + return prior_log_transform.rescale_hyperparameters_to_internal(x, direction) + + prior_logistic_transform = Prior_LogisticTransform() + def logistic_transform_func(x, direction): + return prior_logistic_transform.rescale_hyperparameters_to_internal(x, direction) + + prior_identity_transform = Prior_IdentityTransform() + def identity_transform_func(x, direction): + return prior_identity_transform.rescale_hyperparameters_to_internal(x, direction) + + # Apply different transforms to each dimension (directly assign transforms to parameters) + transforms = [ + log_transform_func, # Parameter 1: Log transform + logistic_transform_func, # Parameter 2: Logistic transform + identity_transform_func # Parameter 3: Identity transform + ] + + # Step 3: Compute marginal statistics in outer space + print("3. Computing Marginal Statistics in Outer Space") + print("-" * 50) + + marginal_stats = [] + + for i in range(n_dim): + # Extract marginal parameters + mean_i = mean_internal[i] + var_i = cov_internal[i, i] + + # Compute marginal statistics + stats = compute_variance_gauss_hermite( + mean_i, var_i, transforms[i], n_quad_points + ) + + marginal_stats.append(stats) + + print(f"Parameter {i+1} ({stats['transform_name']}):") + print(f" Internal: μ = {mean_i:.4f}, σ² = {var_i:.4f}") + print(f" Outer: μ = {stats['mean']:.4f}, σ² = {stats['variance']:.4f}, σ = {stats['std']:.4f}") + print() + + # Step 4: Compute covariance matrix in outer space + print("4. Computing Covariance Matrix in Outer Space") + print("-" * 50) + + # Internal covariance matrix (for reference) + internal_cov_matrix = cov_internal.copy() + + print("Internal (Gaussian) covariance matrix:") + print(internal_cov_matrix) + print() + + # Create a mock transformation function that applies different transforms to each parameter + def mock_vectorized_transform(theta_vec, direction): + """Mock transformation function that applies different transforms to each parameter.""" + result = theta_vec.copy() + for i, transform_func in enumerate(transforms): + if i < len(result): + result[i] = transform_func(theta_vec[i], direction) + return result + + # Compute outer covariance matrix using the new function + outer_cov_matrix = compute_outer_covariance_matrix( + mean_internal, cov_internal, mock_vectorized_transform, n_quad_points + ) + + print("\nOuter (Transformed) covariance matrix:") + print(outer_cov_matrix) + print() + + # Convert to correlation matrices for comparison + # Internal correlations (for reference) + internal_corr_matrix = np.zeros((n_dim, n_dim)) + for i in range(n_dim): + for j in range(n_dim): + if i == j: + internal_corr_matrix[i, j] = 1.0 + else: + internal_corr_matrix[i, j] = (cov_internal[i, j] / + np.sqrt(cov_internal[i, i] * cov_internal[j, j])) + + # Outer correlations (derived from covariance matrix) + outer_corr_matrix = np.zeros((n_dim, n_dim)) + for i in range(n_dim): + for j in range(n_dim): + if i == j: + outer_corr_matrix[i, j] = 1.0 + else: + outer_corr_matrix[i, j] = (outer_cov_matrix[i, j] / + np.sqrt(outer_cov_matrix[i, i] * outer_cov_matrix[j, j])) + + print("Derived outer correlation matrix:") + print(outer_corr_matrix) + print() + + # Step 5: Compare transformations + print("5. Transformation Effects Analysis") + print("-" * 50) + + print("Comparison of Internal vs Outer Statistics:") + print(f"{'Parameter':<12} {'Transform':<25} {'Mean Change':<12} {'Var Change':<12} {'Cov Change':<12}") + print("-" * 85) + + for i in range(n_dim): + mean_change = abs(marginal_stats[i]['mean'] - mean_internal[i]) + var_change = abs(marginal_stats[i]['variance'] - cov_internal[i, i]) + + # Average covariance change for this parameter + cov_changes = [] + for j in range(n_dim): + if i != j: + cov_changes.append(abs(outer_cov_matrix[i, j] - internal_cov_matrix[i, j])) + avg_cov_change = np.mean(cov_changes) if cov_changes else 0.0 + + transform_name = transforms[i].name.split(' ')[0] + + print(f"{i+1:<12} {transform_name:<25} {mean_change:<12.4f} {var_change:<12.4f} {avg_cov_change:<12.4f}") + + print() + + # Additional covariance matrix validation + print("Covariance Matrix Validation:") + print("-" * 50) + + # Check if outer covariance matrix is positive semidefinite + eigenvals = np.linalg.eigvals(outer_cov_matrix) + is_pos_def = np.all(eigenvals >= -1e-10) # Allow small numerical errors + + print(f"Outer covariance matrix eigenvalues: {eigenvals}") + print(f"Is positive semidefinite: {is_pos_def}") + + # Check symmetry + is_symmetric = np.allclose(outer_cov_matrix, outer_cov_matrix.T) + print(f"Is symmetric: {is_symmetric}") + + # Check diagonal elements (should be positive variances) + diag_elements = np.diag(outer_cov_matrix) + all_positive_vars = np.all(diag_elements > 0) + print(f"All diagonal elements (variances) positive: {all_positive_vars}") + print(f"Outer variances: {diag_elements}") + + print() + + # Step 6: Analytical validation for specific cases + print("6. Analytical Validation") + print("-" * 50) + + # For log transformation (Parameter 1), we can validate against log-normal theory + if transforms[0].name.startswith("Log"): # Log transform + mu_1 = mean_internal[0] + sigma2_1 = cov_internal[0, 0] + + # Analytical log-normal moments + analytical_mean = np.exp(mu_1 + sigma2_1/2) + analytical_var = (np.exp(sigma2_1) - 1) * np.exp(2*mu_1 + sigma2_1) + + numerical_mean = marginal_stats[0]['mean'] + numerical_var = marginal_stats[0]['variance'] + + print("Log-normal validation (Parameter 1):") + print(f" Analytical mean: {analytical_mean:.6f}") + print(f" Numerical mean: {numerical_mean:.6f}") + print(f" Relative error: {abs(analytical_mean - numerical_mean)/analytical_mean:.2e}") + print(f" Analytical var: {analytical_var:.6f}") + print(f" Numerical var: {numerical_var:.6f}") + print(f" Relative error: {abs(analytical_var - numerical_var)/analytical_var:.2e}") + print() + + # Demonstrate reparametrization functions + print("Reparametrization functions analysis:") + + # Create transform function for reparametrization utilities + transform_func = transforms[0] # Log transform + def reparam_func(x, direction): + return transform_func(x, direction) + + # Compute quantiles using reparametrization function + percentiles = np.array([0.025, 0.25, 0.5, 0.75, 0.975]) + quantiles = compute_transformed_quantiles(mu_1, sigma2_1, percentiles, reparam_func) + + print(" Quantiles in outer space:") + for p, q in zip(percentiles, quantiles): + print(f" {p*100:4.1f}%: {q:.4f}") + + # Compute bounds using reparametrization function + (int_lower, int_upper), (orig_lower, orig_upper) = compute_bounds( + mu_1, sigma2_1, reparam_func, n_std=3 + ) + print(f" 3σ bounds: Internal [{int_lower:.3f}, {int_upper:.3f}] -> Outer [{orig_lower:.3f}, {orig_upper:.3f}]") + + # Compute PDF at a few points to demonstrate reparametrization + test_points = [0.5, 1.0, 2.0, 5.0] + print(" PDF values at test points:") + for x_orig in test_points: + x_int = reparam_func(x_orig, "forward") + pdf_orig = compute_transformed_pdf(mu_1, sigma2_1, x_int, reparam_func) + print(f" x = {x_orig:.1f}: PDF = {pdf_orig:.6f}") + print() + print() + + # Step 7: Create visualization + print("7. Generating Visualization") + print("-" * 50) + + try: + # Create single figure with 3 rows (one per parameter), 2 columns (internal, outer) + fig, axes = plt.subplots(3, 2, figsize=(16, 12)) + + # Plot marginal distributions for each parameter + for i in range(n_dim): + # Get marginal parameters + mu_i = mean_internal[i] + sigma_i = np.sqrt(cov_internal[i, i]) + transform_func = transforms[i] + + # Create transform function for reparametrization + def param_transform_func(x, direction): + return transform_func(x, direction) + + # === INTERNAL DISTRIBUTION PLOT === + ax_int = axes[i, 0] # Row i, column 0 (internal) + + # Create well-spaced internal grid + x_internal = np.linspace(mu_i - 4*sigma_i, mu_i + 4*sigma_i, 300) + pdf_internal = (1/(sigma_i * np.sqrt(2*np.pi))) * np.exp(-0.5*((x_internal - mu_i)/sigma_i)**2) + + ax_int.plot(x_internal, pdf_internal, 'b-', linewidth=3, label=f'N({mu_i:.2f}, {sigma_i:.2f}²)') + + # Add quantiles for internal distribution + internal_percentiles = np.array([0.025, 0.25, 0.5, 0.75, 0.975]) + from scipy.stats import norm + internal_quantiles = norm.ppf(internal_percentiles, loc=mu_i, scale=sigma_i) + + colors_int = ['red', 'orange', 'green', 'orange', 'red'] + for p, q, color in zip(internal_percentiles, internal_quantiles, colors_int): + pdf_val = (1/(sigma_i * np.sqrt(2*np.pi))) * np.exp(-0.5*((q - mu_i)/sigma_i)**2) + ax_int.axvline(q, color=color, linestyle='--', alpha=0.7, linewidth=2) + if p in [0.025, 0.5, 0.975]: # Label key percentiles + ax_int.text(q, pdf_val * 1.05, f'{p:.3f}', rotation=90, ha='center', va='bottom', fontsize=10, fontweight='bold') + + # Set internal plot properties + ax_int.set_xlim(mu_i - 4*sigma_i, mu_i + 4*sigma_i) + ax_int.set_ylim(0, max(pdf_internal) * 1.15) + ax_int.set_xlabel(f'Parameter {i+1} (Internal Scale)', fontsize=12) + ax_int.set_ylabel('PDF', fontsize=12) + ax_int.set_title(f'Parameter {i+1}: Internal Distribution\n{transform_func.name}', fontsize=14, fontweight='bold') + ax_int.legend(fontsize=11) + ax_int.grid(True, alpha=0.3) + + # === OUTER DISTRIBUTION PLOT === + ax_out = axes[i, 1] # Row i, column 1 (outer) + + try: + # Compute bounds for outer distribution with more generous margins + (int_lower, int_upper), (orig_lower, orig_upper) = compute_bounds( + mu_i, sigma_i**2, param_transform_func, n_std=4 + ) + + # Add some margin to outer bounds for better visualization + orig_range = orig_upper - orig_lower + orig_margin = orig_range * 0.1 + orig_lower_plot = max(orig_lower - orig_margin, 1e-6) if orig_lower > 0 else orig_lower - orig_margin + orig_upper_plot = orig_upper + orig_margin + + # Create fine grid in outer space + x_outer = np.linspace(orig_lower_plot, orig_upper_plot, 300) + + # Filter out invalid values for certain transformations + if "Logistic" in transforms[i].name: # Logistic transform (0,1) + x_outer = x_outer[(x_outer > 0.001) & (x_outer < 0.999)] + elif "Log" in transforms[i].name: # Log transform (positive) + x_outer = x_outer[x_outer > 0.001] + # Identity transform needs no filtering - can handle all real values + + # Compute PDF in outer space + pdf_outer = [] + for x in x_outer: + try: + x_int = param_transform_func(x, "forward") + pdf_val = compute_transformed_pdf(mu_i, sigma_i**2, x_int, param_transform_func) + pdf_outer.append(pdf_val) + except: + pdf_outer.append(0.0) + + pdf_outer = np.array(pdf_outer) + + # Plot outer distribution + ax_out.plot(x_outer, pdf_outer, 'r-', linewidth=3, label=f'Transformed Distribution') + + # Add quantiles for outer distribution + outer_quantiles = compute_transformed_quantiles(mu_i, sigma_i**2, internal_percentiles, param_transform_func) + + colors_out = ['red', 'orange', 'green', 'orange', 'red'] + for p, q, color in zip(internal_percentiles, outer_quantiles, colors_out): + if orig_lower_plot <= q <= orig_upper_plot: # Only plot if within bounds + try: + x_int_q = param_transform_func(q, "forward") + pdf_val_q = compute_transformed_pdf(mu_i, sigma_i**2, x_int_q, param_transform_func) + ax_out.axvline(q, color=color, linestyle='--', alpha=0.7, linewidth=2) + if p in [0.025, 0.5, 0.975]: # Label key percentiles + ax_out.text(q, pdf_val_q * 1.05, f'{p:.3f}', rotation=90, ha='center', va='bottom', fontsize=10, fontweight='bold') + except: + pass + + # Set outer plot properties with proper limits + ax_out.set_xlim(orig_lower_plot, orig_upper_plot) + if len(pdf_outer) > 0 and max(pdf_outer) > 0: + ax_out.set_ylim(0, max(pdf_outer) * 1.15) + + # Format x-axis nicely for different transformations + if "Log" in transforms[i].name: # Log transform + ax_out.set_xlabel(f'Parameter {i+1} (Outer Scale: exp)', fontsize=12) + elif "Logistic" in transforms[i].name: # Logistic transform + ax_out.set_xlabel(f'Parameter {i+1} (Outer Scale: sigmoid)', fontsize=12) + elif "Identity" in transforms[i].name: # Identity transform + ax_out.set_xlabel(f'Parameter {i+1} (Outer Scale: identity)', fontsize=12) + + ax_out.set_ylabel('PDF', fontsize=12) + ax_out.set_title(f'Parameter {i+1}: Outer Distribution\n{transform_func.name}', fontsize=14, fontweight='bold') + ax_out.legend(fontsize=11) + ax_out.grid(True, alpha=0.3) + + except Exception as e: + print(f"Warning: Could not plot outer distribution for parameter {i+1}: {e}") + ax_out.text(0.5, 0.5, f'Error plotting\nparameter {i+1}', transform=ax_out.transAxes, ha='center', va='center', fontsize=12) + ax_out.set_title(f'Parameter {i+1}: Error', fontsize=14) + + # Add column labels + axes[0, 0].text(0.5, 1.15, 'Internal (Gaussian) Scale', transform=axes[0, 0].transAxes, + ha='center', va='bottom', fontsize=16, fontweight='bold') + axes[0, 1].text(0.5, 1.15, 'Outer (Transformed) Scale', transform=axes[0, 1].transAxes, + ha='center', va='bottom', fontsize=16, fontweight='bold') + + # Finalize figure + fig.suptitle('Marginal Distributions: Internal vs Outer Scales', fontsize=18, fontweight='bold', y=0.98) + fig.tight_layout() + fig.subplots_adjust(top=0.92) # Make room for suptitle and column headers + fig.savefig('marginal_distributions_comparison.png', dpi=300, bbox_inches='tight') + + # Show figure + plt.show() + + print("✓ Marginal distributions comparison saved as 'marginal_distributions_comparison.png'") + + except Exception as e: + print(f"⚠ Could not generate plots: {e}") + + print() + + # Step 8: Summary + print("8. Summary") + print("-" * 50) + + # Calculate total changes in both covariance and correlation structures + total_cov_change = np.sum(np.abs(outer_cov_matrix - internal_cov_matrix)) / 2 # Divide by 2 due to symmetry + total_corr_change = np.sum(np.abs(outer_corr_matrix - internal_corr_matrix)) / 2 # Divide by 2 due to symmetry + print(f"✓ Total covariance structure change: {total_cov_change:.6f}") + print(f"✓ Total correlation structure change: {total_corr_change:.4f}") + + print() + print("=" * 90) + print("MULTIVARIATE TRANSFORMATION TEST COMPLETED!") + print("=" * 90) + + +if __name__ == "__main__": + test_multivariate_transformation() \ No newline at end of file diff --git a/src/dalia/utils/gaussian_quadrature.py b/src/dalia/utils/gaussian_quadrature.py new file mode 100644 index 00000000..25c2dab4 --- /dev/null +++ b/src/dalia/utils/gaussian_quadrature.py @@ -0,0 +1,273 @@ +# Copyright 2024-2025 DALIA authors. All rights reserved. + +from dalia import xp + +from scipy.special import roots_hermite + +def compute_variance_gauss_hermite(mean_internal, variance_internal, transform, n_points=20): + """ + Compute variance of transformed distribution using Gauss-Hermite quadrature + + For a distribution Y where transform(Y) ~ N(μ, σ²), we want to compute: + Var(Y) = E[Y²] - (E[Y])² + + Using Gauss-Hermite quadrature by transforming to standard normal form. + + Theory: + If Z ~ N(0,1), then X = μ + σZ ~ N(μ, σ²) + So Y = φ⁻¹(X) = φ⁻¹(μ + σZ) + + E[Y] = E[φ⁻¹(X)] = E[φ⁻¹(μ + σZ)] + E[Y²] = E[φ⁻¹(X)φ⁻¹(X)] = E[φ⁻¹(μ + σZ) φ⁻¹(μ + σZ)] + """ + + # Get Gauss-Hermite quadrature points and weights + nodes, weights = roots_hermite(n_points) + + # copy nodes and weight to device + nodes = xp.array(nodes) + weights = xp.array(weights) + + # Transform nodes from Hermite polynomial roots to standard normal + # Hermite nodes are for exp(-x²), we want exp(-x²/2)/√(2π) + # So we scale by √2: z = √2 * node + z_nodes = xp.sqrt(2) * nodes + + # Compute transformed values Y = φ⁻¹(μ + σZ) for each node + internal_values = mean_internal + xp.sqrt(variance_internal) * z_nodes + y_values = transform(internal_values, direction="backward") + + # Gauss-Hermite weights need to be adjusted for standard normal + # Original: ∫ f(x) exp(-x²) dx ≈ Σ w_i f(x_i) + # For standard normal: ∫ f(z) (1/√(2π)) exp(-z²/2) dz + # After substitution x = z/√2: ∫ f(√2 x) (1/√π) exp(-x²) dx + adjusted_weights = weights / xp.sqrt(xp.pi) + + # Compute first and second moments + mean_y = xp.sum(adjusted_weights * y_values) + second_moment_y = xp.sum(adjusted_weights * y_values**2) + + # Variance = E[Y²] - (E[Y])² + variance_y = second_moment_y - mean_y**2 + + return { + 'mean': mean_y, + 'second_moment': second_moment_y, + 'variance': variance_y, + 'std': xp.sqrt(variance_y) + } + + +################################################################################# +#### just for testing purposes below ########################################## +# Dummy classes to avoid circular imports during testing +class DummyConfig: + def __init__(self, alpha=2.0, beta=1.0): + self.alpha = alpha + self.beta = beta + +class DummyGammaPriorHyperparameters: + def __init__(self, config): + self.config = config + + def rescale_hyperparameters_to_internal(self, theta, direction): + """Log transformation: external (positive) <-> internal (unconstrained)""" + if direction == "forward": + return xp.log(theta) + elif direction == "backward": + return xp.exp(theta) + else: + raise ValueError(f"Unknown direction: {direction}") + + +def test_gaussian_quadrature(): + """ + Testing Gaussian quadrature function. + + Tests multiple transformation functions and compares results with + analytical solutions where available. + """ + + print("=" * 80) + print("GAUSSIAN QUADRATURE TESTS") + print("=" * 80) + + # Test parameters + test_tolerance = 1e-6 + n_quad_points = 20 + + # Test 1: Identity transformation (should recover original normal moments) + print("1. Testing Identity Transformation (X = Y)") + print("-" * 50) + + def identity_transform(x, direction): + return x # No transformation + + mu = 2.0 + sigma2 = 1.5 + + result = compute_variance_gauss_hermite(mu, sigma2, identity_transform, n_quad_points) + + # For identity, we should recover the original normal distribution moments + expected_mean = mu + expected_variance = sigma2 + + print(f" Expected mean: {expected_mean:.6f}, Got: {result['mean']:.6f}") + print(f" Expected var: {expected_variance:.6f}, Got: {result['variance']:.6f}") + + mean_error = abs(result['mean'] - expected_mean) + var_error = abs(result['variance'] - expected_variance) + + print(f" Mean error: {mean_error:.2e}") + print(f" Var error: {var_error:.2e}") + + assert mean_error < test_tolerance, f"Identity mean test failed: error = {mean_error}" + assert var_error < test_tolerance, f"Identity variance test failed: error = {var_error}" + print(" ✓ Identity transformation test PASSED") + print() + + # Test 2: Log transformation (log-normal distribution) + print("2. Testing Log Transformation (Y = exp(X))") + print("-" * 50) + + def log_transform(x, direction): + if direction == "forward": + return xp.log(x) + elif direction == "backward": + return xp.exp(x) + else: + raise ValueError(f"Unknown direction: {direction}") + + mu = 0.5 + sigma2 = 0.25 + + result = compute_variance_gauss_hermite(mu, sigma2, log_transform, n_quad_points) + + # Analytical moments for log-normal: if log(Y) ~ N(μ, σ²) + expected_mean = xp.exp(mu + sigma2/2) + expected_variance = (xp.exp(sigma2) - 1) * xp.exp(2*mu + sigma2) + + print(f" Expected mean: {expected_mean:.6f}, Got: {result['mean']:.6f}") + print(f" Expected var: {expected_variance:.6f}, Got: {result['variance']:.6f}") + + mean_error = abs(result['mean'] - expected_mean) / expected_mean + var_error = abs(result['variance'] - expected_variance) / expected_variance + + print(f" Relative mean error: {mean_error:.2e}") + print(f" Relative var error: {var_error:.2e}") + + assert mean_error < 1e-4, f"Log-normal mean test failed: rel error = {mean_error}" + assert var_error < 1e-4, f"Log-normal variance test failed: rel error = {var_error}" + print(" ✓ Log transformation test PASSED") + print() + + # Test 3: Linear transformation (Y = aX + b) + print("3. Testing Linear Transformation (Y = aX + b)") + print("-" * 50) + + a, b = 3.0, -1.5 + + def linear_transform(x, direction): + if direction == "forward": + return (x - b) / a + elif direction == "backward": + return a * x + b + else: + raise ValueError(f"Unknown direction: {direction}") + + mu = 1.0 + sigma2 = 0.8 + + result = compute_variance_gauss_hermite(mu, sigma2, linear_transform, n_quad_points) + + # For Y = aX + b where X ~ N(μ, σ²): E[Y] = aμ + b, Var[Y] = a²σ² + expected_mean = a * mu + b + expected_variance = a**2 * sigma2 + + print(f" Transform: Y = {a}X + {b}") + print(f" Expected mean: {expected_mean:.6f}, Got: {result['mean']:.6f}") + print(f" Expected var: {expected_variance:.6f}, Got: {result['variance']:.6f}") + + mean_error = abs(result['mean'] - expected_mean) + var_error = abs(result['variance'] - expected_variance) + + print(f" Mean error: {mean_error:.2e}") + print(f" Var error: {var_error:.2e}") + + assert mean_error < test_tolerance, f"Linear mean test failed: error = {mean_error}" + assert var_error < test_tolerance, f"Linear variance test failed: error = {var_error}" + print(" ✓ Linear transformation test PASSED") + print() + + # Test 4: Gamma prior rescaling (again log transformation) + print("4. Testing Gamma Prior Rescaling") + print("-" * 50) + + config = DummyConfig(alpha=2.0, beta=1.0) + gamma_prior = DummyGammaPriorHyperparameters(config=config) + + def gamma_rescale(x, direction): + return gamma_prior.rescale_hyperparameters_to_internal(x, direction) + + mu = 0.2 + sigma2 = 0.3 + + result = compute_variance_gauss_hermite(mu, sigma2, gamma_rescale, n_quad_points) + + # This is the same as log-normal since rescale uses exp transformation + expected_mean = xp.exp(mu + sigma2/2) + expected_variance = (xp.exp(sigma2) - 1) * xp.exp(2*mu + sigma2) + + print(f" Expected mean: {expected_mean:.6f}, Got: {result['mean']:.6f}") + print(f" Expected var: {expected_variance:.6f}, Got: {result['variance']:.6f}") + + mean_error = abs(result['mean'] - expected_mean) / expected_mean + var_error = abs(result['variance'] - expected_variance) / expected_variance + + print(f" Relative mean error: {mean_error:.2e}") + print(f" Relative var error: {var_error:.2e}") + + assert mean_error < 1e-4, f"Gamma rescale mean test failed: rel error = {mean_error}" + assert var_error < 1e-4, f"Gamma rescale variance test failed: rel error = {var_error}" + print(" ✓ Gamma prior rescaling test PASSED") + print() + + # Test 5: Convergence with increasing quadrature points + print("5. Testing Convergence with Quadrature Points") + print("-" * 50) + + mu_conv = 0.1 + sigma2_conv = 0.4 + expected_mean_conv = xp.exp(mu_conv + sigma2_conv/2) + + n_points_list = [5, 10, 15, 20, 30, 50] + errors_mean = [] + errors_var = [] + + print(" n_points | Mean | Rel. Mean Error | Rel. Var. Error") + print(" ----------|-------------|------------------|-----------------") + + for n in n_points_list: + result = compute_variance_gauss_hermite(mu_conv, sigma2_conv, log_transform, n) + rel_error_mean = abs(result['mean'] - expected_mean_conv) / expected_mean_conv + errors_mean.append(rel_error_mean) + + print(f" {n:8d} | {result['mean']:10.6f} | {rel_error_mean:5.3e} ") + + # Check that errors generally decrease (allowing some numerical noise) + improving = sum(errors_mean[i+1] < errors_mean[i] * 1.1 for i in range(len(errors_mean)-1)) + improvement_rate = improving / (len(errors_mean) - 1) + + print(f" Improvement rate: {improvement_rate:.1%}") + assert improvement_rate > 0.6, f"Convergence test failed: improvement rate = {improvement_rate}" + print(" ✓ Convergence test PASSED") + print() + + # Summary + print("=" * 80) + print("ALL TESTS PASSED! ✓") + print("=" * 80) + + +if __name__ == "__main__": + test_gaussian_quadrature() \ No newline at end of file diff --git a/src/dalia/utils/link_functions.py b/src/dalia/utils/link_functions.py index 9f335bba..97ddfe84 100644 --- a/src/dalia/utils/link_functions.py +++ b/src/dalia/utils/link_functions.py @@ -23,5 +23,9 @@ def scaled_logit(x: NDArray, direction: str) -> NDArray: return (1.0 / k) * xp.log(x / (1.0 - x)) elif direction == "backward": return 1 / (1 + xp.exp(-k * x)) + elif direction == "forward_jacobian": + return 1.0 / (k * x * (1.0 - x)) + elif direction == "backward_jacobian": ### should be 1 / forward_jacobian ... + return k * x * (1.0 - x) else: raise ValueError(f"Unknown direction: {direction}") diff --git a/src/dalia/utils/multiprocessing.py b/src/dalia/utils/multiprocessing.py index a7c0ee6a..beaf4344 100644 --- a/src/dalia/utils/multiprocessing.py +++ b/src/dalia/utils/multiprocessing.py @@ -1,9 +1,10 @@ # Copyright 2024-2025 DALIA authors. All rights reserved. +from typing import Literal + from dataclasses import dataclass -import numpy as np -from dalia import ArrayLike, backend_flags, comm_rank +from dalia import ArrayLike, backend_flags, comm_rank, xp from dalia.utils.gpu_utils import get_array_module_name, get_device, get_host if backend_flags["mpi_avail"]: @@ -139,9 +140,27 @@ def bcast( comm (CommunicatorType), optional: The communication group. Default is MPI.COMM_WORLD. """ + # Need to check data module and MPI capabilities + d2h2d_needed : bool = ( + backend_flags["mpi_avail"] and get_array_module_name(data) == "cupy" + ) + if backend_flags["mpi_avail"]: - comm.Bcast(data, root=root) + if d2h2d_needed: + data_comm = get_host(data) + else: + data_comm = data + if data.ndim == 0: + comm.Bcast(data_comm, root=root) + else: + comm.Bcast(data_comm[:], root=root) + + if d2h2d_needed: + if data.ndim == 0: + data[...] = get_device(data_comm) + else: + data[:] = get_device(data_comm) def get_active_comm( comm, @@ -214,3 +233,59 @@ def smartsplit( color_new_group = 0 return active_comm, comm_new_group, color_new_group + +def check_vector_consistency( + value: ArrayLike, + comm, + flag: str, + verbose: Literal["No", "Minimal", "Full"] = "No", + rtol: float = 1e-10, +): + """ Check if all processes have the same value. + + Parameters: + ----------- + value (ArrayLike): + The value to check. + comm (CommunicatorType), optional: + The communication group. + flag (str): + A string to identify the value being checked in the error message. + verbose (str): + The level of verbosity for the error message. Choose from 'No', 'Minimal', or 'Full'. Default is 'No'. + rtol (float): + The relative tolerance for the consistency check. Default is 1e-10. + + Raises: + ------- + ValueError: + If the value is not consistent across all processes. + """ + synchronize(comm = comm) + + value_ref = value.copy() + + bcast(data=value_ref[:], root=0, comm=comm) + + norm_diff = xp.linalg.norm(value - value_ref) + + if norm_diff > rtol: + # Print indices and values where value and value_ref differ + if verbose == "No": + raise ValueError( + f"Process {comm.Get_rank()} has a different {flag} than the reference process with a norm of the difference of {norm_diff:.4e}." + ) + + diff_indices = xp.where(value != value_ref)[0] + if verbose == "Minimal": + # Only print the first 5 differences, make sure it's 5 or the max number of differences + diff_indices = diff_indices[:min(5, len(diff_indices))] + elif verbose == "Full": + pass + else: + raise ValueError( + f"Invalid verbose option: {verbose}. Choose from 'No', 'Minimal', or 'Full'." + ) + + for idx in diff_indices: + print(f"Process {comm.Get_rank()} difference at index {idx}: {flag}={value_ref[idx]}, value={value[idx]}") diff --git a/src/dalia/utils/plotting.py b/src/dalia/utils/plotting.py new file mode 100644 index 00000000..c28b4212 --- /dev/null +++ b/src/dalia/utils/plotting.py @@ -0,0 +1,150 @@ +import matplotlib.pyplot as plt +import numpy as np +from scipy.stats import norm + +from dalia import xp +from dalia.utils import get_host + +def plot_prior_hp(param_name, theta_interval, prior_hp, log=False): + """ + Plot prior distribution of a hyperparameter. + + All priors are in log-scale. Therefore exponeniate unless log=True. + + Parameters + ---------- + param_name : str + Name of the hyperparameter. + theta_interval: tuple of float + Interval (min, max) for plotting the prior. + prior_hp : PriorHyperparameters + Prior hyperparameter object. + log : bool, optional + Whether to plot in log-scale or original scale. Default is False. + + + Note + ---- + If log is True, the plot will be in log-scale. Otherwise, it will be in the original scale. + + + Returns + ------- + fig, ax : matplotlib Figure and Axes + The figure and axes objects containing the plot. + """ + + if theta_interval[0] == 0: + theta_interval = (1e-6, theta_interval[1]) + + theta_vals = xp.linspace(theta_interval[0], theta_interval[1], 200) + prior_vals = xp.array([prior_hp.evaluate_log_prior(theta) for theta in theta_vals]) + + theta_vals = get_host(theta_vals) + prior_vals = get_host(prior_vals) + + if log: + xlabel = f"{param_name}" + ylabel = "Log Prior Density" + else: + prior_vals = np.exp(prior_vals) + xlabel = f"{param_name}" + ylabel = "Prior Density" + + fig, ax = plt.subplots(figsize=(8, 5)) + ax.plot(theta_vals, prior_vals, 'b-', linewidth=2) + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + ax.set_title(f"Prior Distribution of {param_name}") + ax.grid(True, alpha=0.3) + + return fig, ax + + + + +def plot_marginal_distributions_hp(marginals_hp): + """Plot marginal distributions of hyperparameters in both internal and external parametrizations.""" + + # Get all hyperparameters + hyperparams = marginals_hp['hyperparameters'] + n_params = len(hyperparams) + + # Create subplot grid: n_params rows, 2 columns (internal left, external right) + fig, axes = plt.subplots(n_params, 2, figsize=(15, 5*n_params)) + + # Handle case of single parameter + if n_params == 1: + axes = axes.reshape(1, -1) + + # Quantile colors and labels + colors = ['#DEB887', '#DEB887','darkred', '#DEB887', '#DEB887'] + labels = ['2.5%', '25%', '50%', '75%','97.5%'] + + for row, (param_name, param_data) in enumerate(hyperparams.items()): + # Get internal parameters + mean_internal = param_data['mean_internal'] + var_internal = param_data['variance_internal'] + std_internal = np.sqrt(var_internal) + + # Get external parameters + mean_external = param_data['mean_external'] + var_external = param_data['variance_external'] + theta_external, pdf_external = param_data['pdf_data'] + + # Get quantiles + quantile_pairs_internal = param_data['quantiles']['internal']['pairs'] + quantile_pairs_external = param_data['quantiles']['external']['pairs'] + + # ===== LEFT PLOT: INTERNAL PARAMETRIZATION ===== + ax_left = axes[row, 0] + + # Create internal distribution (Gaussian) + x_internal = np.linspace(mean_internal - 4*std_internal, mean_internal + 4*std_internal, 100) + pdf_internal = norm.pdf(x_internal, loc=mean_internal, scale=std_internal) + + # Plot internal PDF + ax_left.plot(x_internal, pdf_internal, 'b-', linewidth=2, label='PDF (Internal)') + + # Mark internal mean + ax_left.axvline(mean_internal, color='red', linestyle='--', linewidth=2, + label=f'Mean = {mean_internal:.3f}') + + # Mark internal quantiles + for i, (prob, q_val) in enumerate(quantile_pairs_internal): + if i < len(labels): + ax_left.axvline(q_val, color=colors[i], linestyle=':', linewidth=2, + label=f'{labels[i]} = {q_val:.3f}') + + ax_left.set_xlabel(f'{param_name} (internal scale)') + ax_left.set_ylabel('PDF') + ax_left.set_title(f'{param_name}: Internal Distribution (Gaussian)') + ax_left.legend() + ax_left.grid(True, alpha=0.3) + + # ===== RIGHT PLOT: EXTERNAL PARAMETRIZATION ===== + ax_right = axes[row, 1] + + # Plot external PDF + ax_right.plot(theta_external, pdf_external, 'b-', linewidth=2, label='PDF') + + # Mark external mean + ax_right.axvline(mean_external, color='red', linestyle='--', linewidth=2, + label=f'Mean = {mean_external:.3f}') + + # Mark external quantiles + for i, (prob, q_val) in enumerate(quantile_pairs_external): + if i < len(labels): + ax_right.axvline(q_val, color=colors[i], linestyle=':', linewidth=2, + label=f'{labels[i]} = {q_val:.3f}') + + ax_right.set_xlabel(f'{param_name} ') + ax_right.set_ylabel('PDF') + ax_right.set_title(f'{param_name}: Marginal Distribution') + ax_right.legend() + ax_right.grid(True, alpha=0.3) + + # plt.tight_layout() + # plt.show() + + return fig, axes diff --git a/src/dalia/utils/reparametrizations.py b/src/dalia/utils/reparametrizations.py new file mode 100644 index 00000000..d5924b01 --- /dev/null +++ b/src/dalia/utils/reparametrizations.py @@ -0,0 +1,332 @@ +from scipy.stats import norm +import numpy as np + +from dalia import NDArray, xp +from dalia.utils import get_host, get_device + + +def compute_transformed_quantiles(mean_internal, var_internal, percentiles, transform): + """ + Compute quantiles for a transformed distribution + + Parameters: + - original_dist_params: (mean, std) for the internal/transformed distribution + - percentiles: array of probability values (0, 1) + - transform: TransformationFunction object + + Returns: + - quantiles in original scale + + Notes: + Computing Quantiles for Transformed Distributions + + Idea: + If X ~ f(x) and Y = φ(X), then find quantiles of Y: + 1. For a given probability p, find q_p such that P(Y ≤ q_p) = p + 2. This is equivalent to P(φ(X) ≤ q_p) = p + 3. We suppose φ is bijective and monotonely increasing. Then, if F_X is the CDF of X, we can write: + F_Y(q_p) = P(Y ≤ q_p) = P(φ(X) ≤ q_p) = P(X ≤ φ⁻¹(q_p)) = F_X(φ⁻¹(q_p)) + + 4. So φ⁻¹(q_p) = F_X⁻¹(p), where F_X⁻¹ is the quantile function of X + 5. Therefore: q_p = φ(F_X⁻¹(p)) + """ + + # Step 1: Compute quantiles in internal scale + quantiles_np = get_host(percentiles) + mean_internal_np = get_host(mean_internal) + var_internal_np = get_host(var_internal) + + internal_quantiles = norm.ppf(quantiles_np, loc=mean_internal_np, scale=var_internal_np**0.5) + + # copy qunatiles to device + internal_quantiles = get_device(internal_quantiles) + + # Step 2: Transform back to original scale + # If φ: original → internal, then original quantiles = φ⁻¹(internal quantiles) + original_quantiles = transform(internal_quantiles, direction='backward') + + return original_quantiles + +def compute_transformed_pdf(mean_internal, var_internal, x_internal, transform): + """ + Compute PDF of transformed distribution using change of variables + + If Y = φ(X), then f_Y(y) = f_X(φ⁻¹(y)) * |dφ⁻¹/dy| + But we want f_X(x) where x is in original scale, so: + f_X(x) = f_Y(φ(x)) * |dφ/dx| + """ + + # PDF in internal scale + x_internal_np = get_host(x_internal) + mean_internal_np = get_host(mean_internal) + var_internal_np = get_host(var_internal) + pdf_internal_np = norm.pdf(x_internal_np, loc=mean_internal_np, scale=var_internal_np**0.5) + + # copy pdf_internal to device + pdf_internal = get_device(pdf_internal_np) + + # Jacobian: derivative of transformation + # Ensure x_internal is treated as array for vectorized operations + x_original = transform(x_internal, direction='backward') + jacobian = xp.abs(transform(x_original, direction='forward_jacobian')) + + # PDF values in original scale + pdf_original = pdf_internal * jacobian + + return x_original, pdf_original + +# Automatic bound calculation based on 4 (default) standard deviations in internal scale +def compute_bounds(mean_internal, var_internal, transform, n_std=4): + """ + Compute plotting bounds based on n standard deviations in internal scale + """ + # Internal scale bounds (±n standard deviations) + internal_lower = mean_internal - n_std * var_internal**0.5 + internal_upper = mean_internal + n_std * var_internal**0.5 + + # Transform to original scale + original_lower = transform(internal_lower, direction='backward') + original_upper = transform(internal_upper, direction='backward') + + return (internal_lower, internal_upper), (original_lower, original_upper) + +###################################### TEST ###################################### +if __name__ == "__main__": + """ + Test reparametrization functions using a dummy gamma prior hyperparameter class. + + This test demonstrates how the reparametrization functions work with + transformation functions that have forward, backward, and jacobian directions. + """ + import numpy as np + import matplotlib.pyplot as plt + + # Dummy Gamma Prior Hyperparameter Class + class DummyGammaPrior: + """ + Dummy implementation of gamma prior rescaling for testing purposes. + Implements log transformation: forward = log(x), backward = exp(x) + """ + def rescale_hyperparameters_to_internal(self, theta, direction): + """Log transformation between positive (external) and unconstrained (internal) space""" + if direction == "forward": + return xp.log(theta) # theta -> log(theta) + elif direction == "backward": + return xp.exp(theta) # log(theta) -> theta + elif direction == "forward_jacobian": + return 1.0 / theta # d(log(theta))/d(theta) = 1/theta + elif direction == "backward_jacobian": + return theta # d(exp(theta))/d(theta) = exp(theta) = theta + else: + raise ValueError(f"Unknown direction: {direction}") + + print("=" * 80) + print("Testing Reparametrization Functions with Dummy Gamma Prior") + print("=" * 80) + + # Create dummy gamma prior instance + gamma_prior = DummyGammaPrior() + + # Define transform function compatible with reparametrization functions + def transform_func(x, direction): + return gamma_prior.rescale_hyperparameters_to_internal(x, direction) + + # Test parameters (internal space: log-normal distribution) + mean_internal = 0.5 # mean of log(theta) + std_internal = 0.8 # std of log(theta) + + print(f"Internal distribution parameters:") + print(f" Mean (log scale): {mean_internal:.3f}") + print(f" Std (log scale): {std_internal:.3f}") + print() + + # Test 1: Transform validation + print("1. Testing transformation consistency:") + test_values = [0.1, 0.5, 1.0, 2.0, 5.0, 10.0] + + print(" Original -> Internal -> Original (round-trip test)") + for theta in test_values: + # Forward transformation + log_theta = transform_func(theta, "forward") + + # Backward transformation + theta_recovered = transform_func(log_theta, "backward") + + # Check error + error = abs(theta - theta_recovered) + + print(f" {theta:5.1f} -> {log_theta:6.3f} -> {theta_recovered:6.3f}, " + f"error = {error:.2e}") + print() + + # Test 2: Jacobian validation using finite differences + print("2. Testing Jacobian accuracy (forward direction):") + + test_theta_vals = [0.5, 1.0, 2.0, 3.0] + eps = 1e-8 + + print(" θ | Analytical | Numerical | Error") + print(" ------|------------|------------|----------") + + for theta in test_theta_vals: + # Analytical jacobian + jac_analytical = transform_func(theta, "forward_jacobian") + + # Numerical jacobian using finite differences + f_plus = transform_func(theta + eps, "forward") + f_minus = transform_func(theta - eps, "forward") + jac_numerical = (f_plus - f_minus) / (2 * eps) + + error = abs(jac_analytical - jac_numerical) + + print(f" {theta:4.1f} | {jac_analytical:10.6f} | {jac_numerical:10.6f} | {error:.2e}") + print() + + # Test 3: Quantile computation + print("3. Testing quantile computation:") + + percentiles = xp.array([0.025, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.975]) + original_quantiles = compute_transformed_quantiles( + mean_internal, std_internal**0.5, percentiles, transform_func + ) + + print(" Percentile | Quantile (original scale)") + print(" -----------|-----------------------") + for p, q in zip(percentiles, original_quantiles): + print(f" {p:8.2f} | {q:18.6f}") + print() + + # Test 4: PDF computation and validation + print("4. Testing PDF computation:") + + # Compute bounds for plotting + (internal_lower, internal_upper), (original_lower, original_upper) = compute_bounds( + mean_internal, std_internal, transform_func, n_std=4 + ) + + print(f" Internal bounds: [{internal_lower:.3f}, {internal_upper:.3f}]") + print(f" Original bounds: [{original_lower:.3f}, {original_upper:.3f}]") + + # Test PDF at specific points + test_x_original = xp.array([0.5, 1.0, 2.0, 3.0, 5.0]) + + print(" x (orig) | PDF (orig) | log(x) | PDF (int)") + print(" ---------|------------|----------|----------") + + x_internal = transform_func(test_x_original, "forward") + x_original, pdf_orig = compute_transformed_pdf(mean_internal, std_internal**2, x_internal, transform_func) + pdf_internal = norm.pdf(x_internal, loc=mean_internal, scale=std_internal) + + for i in range(len(test_x_original)): + print(f" {test_x_original[i]:7.2f} | {pdf_orig[i]:10.6f} | {x_internal[i]:8.3f} | {pdf_internal[i]:8.6f}") + print() + + # Test 5: Analytical validation for log-normal distribution + print("5. Analytical validation (log-normal distribution):") + + # For log-normal distribution, we can compute analytical moments + mu = mean_internal + sigma = std_internal + + # Analytical log-normal statistics + analytical_mean = xp.exp(mu + sigma**2/2) + analytical_var = (xp.exp(sigma**2) - 1) * xp.exp(2*mu + sigma**2) + analytical_std = xp.sqrt(analytical_var) + + # Numerical verification using quantiles + # Mean ≈ 50th percentile for log-normal (approximately) + median_quantile = compute_transformed_quantiles( + mean_internal, std_internal**0.5, xp.array([0.5]), transform_func + )[0] + + print(f" Analytical mean: {analytical_mean:.6f}") + print(f" Analytical std: {analytical_std:.6f}") + print(f" Median quantile: {median_quantile:.6f}") + print(f" Mean/Median ratio: {analytical_mean/median_quantile:.6f}") + print(" (Should be > 1 for log-normal due to skewness)") + print() + + # Test 6: PDF integration check (numerical verification) + print("6. PDF integration check:") + + # Create fine grid for integration -> need to start in original scale for dx to be equidistant + x_original = xp.linspace(original_lower, original_upper, 1000) + x_internal = transform_func(x_original, "forward") + x_original, pdf_values = compute_transformed_pdf(mean_internal, std_internal**2, x_internal, transform_func) + + # Numerical integration using trapezoidal rule + dx = x_original[1] - x_original[0] + integral = np.trapezoid(pdf_values, dx=dx) + + print(f" Numerical integral of PDF: {integral:.6f}") + print(f" Should be close to 1.0, error: {abs(1.0 - integral):.6f}") + print() + + # repeat with non-equidistant grid in original scale but equidistant in internal scale + print(" Repeating PDF integration with equidistant grid in internal scale:") + x_internal = np.linspace(internal_lower, internal_upper, 1000) + x_original, pdf_values = compute_transformed_pdf(mean_internal, std_internal**2, x_internal, transform_func) + + integral = np.trapezoid(pdf_values, x=x_original) + print(f" Numerical integral of PDF: {integral:.6f}") + print(f" Should be close to 1.0, error: {abs(1.0 - integral):.6f}") + print() + + # Test 7: Bounds computation for different n_std values + print("7. Testing bounds computation:") + + for n_std in [1, 2, 3, 4]: + (int_lower, int_upper), (orig_lower, orig_upper) = compute_bounds( + mean_internal, std_internal, transform_func, n_std=n_std + ) + + print(f" {n_std}σ bounds:") + print(f" Internal: [{int_lower:7.3f}, {int_upper:7.3f}]") + print(f" Original: [{orig_lower:7.3f}, {orig_upper:7.3f}]") + print() + + # Test 8: Plotting PDFs in both scales + print("8. Plotting PDFs in internal and original scales:") + + # Plot 1: PDF in internal scale (log-scale, normal distribution) + x_internal = xp.linspace(internal_lower, internal_upper, 500) + pdf_internal = norm.pdf(x_internal, loc=mean_internal, scale=std_internal) + # Create visualization + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6)) + + ax1.plot(x_internal, pdf_internal, 'b-', linewidth=2, label='Internal PDF') + internal_quantiles = norm.ppf(percentiles, loc=mean_internal, scale=std_internal) + for i, (p, q) in enumerate(zip(percentiles, internal_quantiles)): + color = 'red' if p in [0.025, 0.975] else 'orange' + ax1.axvline(q, color=color, linestyle='--', alpha=0.7) + if i % 2 == 0: + ax1.text(q, max(pdf_internal)*0.8, f'{p*100:.1f}%', rotation=90, ha='right', va='top') + + ax1.set_title(f'Internal Scale: N(mean = {mean_internal}, std = {std_internal:.3f})') + ax1.set_xlabel('θ (internal)') + ax1.set_ylabel('PDF') + ax1.set_xlim(x_internal[0], x_internal[-1]) + ax1.grid(True, alpha=0.3) + ax1.legend() + + # Plot 2: Original distribution with quantiles + x_original = np.linspace(int_lower, int_upper, 1000) + x_original, pdf_original = compute_transformed_pdf(mean_internal, std_internal**2, x_internal, transform_func) + + ax2.plot(x_original, pdf_original, 'g-', linewidth=2, label='Original PDF') + for i, (p, q) in enumerate(zip(percentiles, original_quantiles)): + color = 'red' if p in [0.025, 0.975] else 'orange' + ax2.axvline(q, color=color, linestyle='--', alpha=0.7) + if i % 2 == 0: + ax2.text(q, max(pdf_original)*0.8, f'{p*100:.1f}%', rotation=90, ha='right', va='top') + + ax2.set_title('Original Scale: Log-Normal Distribution') + ax2.set_xlabel('θ_outer (original)') + ax2.set_ylabel('PDF') + ax2.set_xlim(orig_lower, 15) + ax2.grid(True, alpha=0.3) + ax2.legend() + plt.tight_layout() + plt.show() + + print("=" * 80) \ No newline at end of file diff --git a/tests/integration/gst/itest.py b/tests/integration/gst/itest.py new file mode 100644 index 00000000..a45fecde --- /dev/null +++ b/tests/integration/gst/itest.py @@ -0,0 +1,111 @@ +from pathlib import Path +import numpy as np + +from dalia.configs import likelihood_config, dalia_config, submodels_config +from dalia.core.model import Model +from dalia.core.dalia import DALIA +from dalia.utils import print_msg, get_host +from dalia.submodels import RegressionSubModel, SpatioTemporalSubModel + +SCRIPT_DIR = Path(__file__).resolve() +DALIA_DIR = SCRIPT_DIR.parent.parent.parent.parent +EXAMPLE_PATH = DALIA_DIR / "examples" / "gst_medium" + +X_TOL = 1e-1 +THETA_TOL = 1e1 +TYPICAL_N_ITER = 26 + +def gst_itest(): + spatio_temporal_dict = { + "type": "spatio_temporal", + "input_dir": f"{EXAMPLE_PATH}/inputs_spatio_temporal", + "spatial_domain_dimension": 2, + "r_s": 0.0, + "r_t": 2.2, + "sigma_st": 1.3, + "manifold": "sphere", + "ph_s": {"type": "penalized_complexity", "alpha": 0.01, "u": 0.5}, + "ph_t": {"type": "penalized_complexity", "alpha": 0.01, "u": 5}, + "ph_st": {"type": "penalized_complexity", "alpha": 0.01, "u": 3}, + } + spatio_temporal = SpatioTemporalSubModel( + config=submodels_config.parse_config(spatio_temporal_dict), + ) + # . Regression submodel + regression_dict = { + "type": "regression", + "input_dir": f"{EXAMPLE_PATH}/inputs_regression", + "n_fixed_effects": 6, + "fixed_effects_prior_precision": 0.001, + } + regression = RegressionSubModel( + config=submodels_config.parse_config(regression_dict), + ) + # Configurations of the likelihood + likelihood_dict = { + "type": "gaussian", + "prec_o": 4, + "prior_hyperparameters": {"type": "gaussian", "mean": 1.4, "precision": 0.5}, + } + # Creation of the model by combining the submodels and the likelihood + model = Model( + submodels=[regression, spatio_temporal], + likelihood_config=likelihood_config.parse_config(likelihood_dict), + ) + # Configurations of DALIA + dalia_dict = { + "solver": { + "type": "serinv", + "min_processes": 1, + }, + "minimize": { + "max_iter": 100, + "gtol": 1e-3, + "disp": True, + }, + "f_reduction_tol": 1e-4, + "theta_reduction_tol": 1e-4, + "inner_iteration_max_iter": 50, + "eps_inner_iteration": 1e-3, + "eps_gradient_f": 1e-3, + "simulation_dir": f"{EXAMPLE_PATH}", + } + dalia = DALIA( + model=model, + config=dalia_config.parse_config(dalia_dict), + ) + results = dalia.run() + + # Check iterations behavior + success_msg : str = "success" + if results["optimization_iterations"] > TYPICAL_N_ITER: + success_msg = "warning_more_iters_than_typical" + elif results["optimization_iterations"] < TYPICAL_N_ITER: + success_msg = "success_less_iters_than_typical" + + # Compare hyperparameters + theta_ref = np.array(np.load(f"{EXAMPLE_PATH}/reference_outputs/theta_ref.npy")) + print(f"theta_ref: {theta_ref}") + print(f"theta_dalia: {get_host(results['theta_internal'])}") + print_msg( + "Norm (theta - theta_ref): ", + f"{np.linalg.norm(get_host(results['theta_internal']) - theta_ref):.4e}", + ) + if np.linalg.norm(get_host(results["theta_internal"]) - theta_ref) > THETA_TOL: + return "theta_tol_exceeded" + + # Compare latent parameters + x_ref = np.array(np.load(f"{EXAMPLE_PATH}/reference_outputs/x_ref.npy")) + print(f"x_ref: {x_ref}") + print(f"x_dalia: {get_host(results['x'])}") + print_msg( + "Norm (x - x_ref): ", + f"{np.linalg.norm(get_host(results['x']) - x_ref):.4e}", + ) + if np.linalg.norm(get_host(results["x"]) - x_ref) > X_TOL: + return "x_tol_exceeded" + + return success_msg + +if __name__ == "__main__": + gst_itest() \ No newline at end of file diff --git a/tests/integration/gstcoreg2/itest.py b/tests/integration/gstcoreg2/itest.py new file mode 100644 index 00000000..d075a822 --- /dev/null +++ b/tests/integration/gstcoreg2/itest.py @@ -0,0 +1,234 @@ +from pathlib import Path +import numpy as np + +from dalia.configs import ( + likelihood_config, + models_config, + dalia_config, + submodels_config, +) +from dalia.core.model import Model +from dalia.core.dalia import DALIA +from dalia.models import CoregionalModel +from dalia.submodels import RegressionSubModel, SpatioTemporalSubModel +from dalia.utils import print_msg, get_host + +SCRIPT_DIR = Path(__file__).resolve() +DALIA_DIR = SCRIPT_DIR.parent.parent.parent.parent +EXAMPLE_PATH = DALIA_DIR / "examples" / "gst_coreg2_small" + +X_TOL = 1e3 # 1.5e+2 +THETA_TOL = 1e2 # 9.e+1 +TYPICAL_N_ITER = 80 # On Fritz: 86 + +def gstcoreg2_itest(): + nv = 2 + ns = 354 + nt = 12 + nb = 2 + + theta_ref_file = ( + f"{EXAMPLE_PATH}/inputs_nv{nv}_ns{ns}_nt{nt}_nb{nb}/reference_outputs/theta_ref.npy" + ) + theta_ref = np.load(theta_ref_file) + perturbation = [ + 0.18197867, + -0.12551227, + 0.19998896, + 0.17226796, + 0.14656176, + -0.11864931, + 0.17817371, + -0.13006157, + 0.19308036, + ] + theta_initial = theta_ref + np.array(perturbation) + + # Configurations of the submodels for the first model + # . Spatio-temporal submodel 1 + spatio_temporal_1_dict = { + "type": "spatio_temporal", + "input_dir": f"{EXAMPLE_PATH}/inputs_nv{nv}_ns{ns}_nt{nt}_nb{nb}/model_1/inputs_spatio_temporal", + "spatial_domain_dimension": 2, + "r_s": theta_initial[0], + "r_t": theta_initial[1], + "sigma_st": 0.0, + "manifold": "plane", + "ph_s": { + "type": "gaussian", + "mean": theta_ref[0], + "precision": 0.5, + }, + "ph_t": { + "type": "gaussian", + "mean": theta_ref[1], + "precision": 0.5, + }, + "ph_st": { + "type": "gaussian", + "mean": 0.0, + "precision": 0.5, + }, + } + spatio_temporal_1 = SpatioTemporalSubModel( + config=submodels_config.parse_config(spatio_temporal_1_dict), + ) + # . Regression submodel 1 + regression_1_dict = { + "type": "regression", + "input_dir": f"{EXAMPLE_PATH}/inputs_nv{nv}_ns{ns}_nt{nt}_nb{nb}/model_1/inputs_regression", + "n_fixed_effects": 1, + "fixed_effects_prior_precision": 0.001, + } + regression_1 = RegressionSubModel( + config=submodels_config.parse_config(regression_1_dict), + ) + # . Likelihood submodel 1 + likelihood_1_dict = { + "type": "gaussian", + "prec_o": theta_initial[2], + "prior_hyperparameters": { + "type": "gaussian", + "mean": theta_initial[2], + "precision": 0.5, + }, + } + # Creation of the first model by combining the submodels and the likelihood + model_1 = Model( + submodels=[regression_1, spatio_temporal_1], + likelihood_config=likelihood_config.parse_config(likelihood_1_dict), + ) + + # Configurations of the submodels for the second model + # . Spatio-temporal submodel 2 + spatio_temporal_2_dict = { + "type": "spatio_temporal", + "input_dir": f"{EXAMPLE_PATH}/inputs_nv{nv}_ns{ns}_nt{nt}_nb{nb}/model_2/inputs_spatio_temporal", + "spatial_domain_dimension": 2, + "r_s": theta_initial[3], + "r_t": theta_initial[4], + "sigma_st": 0.0, + "manifold": "plane", + "ph_s": { + "type": "gaussian", + "mean": theta_ref[3], + "precision": 0.5, + }, + "ph_t": { + "type": "gaussian", + "mean": theta_ref[4], + "precision": 0.5, + }, + "ph_st": { + "type": "gaussian", + "mean": 0.0, + "precision": 0.5, + }, + } + spatio_temporal_2 = SpatioTemporalSubModel( + config=submodels_config.parse_config(spatio_temporal_2_dict), + ) + # . Regression submodel 2 + regression_2_dict = { + "type": "regression", + "input_dir": f"{EXAMPLE_PATH}/inputs_nv{nv}_ns{ns}_nt{nt}_nb{nb}/model_2/inputs_regression", + "n_fixed_effects": 1, + "fixed_effects_prior_precision": 0.001, + } + regression_2 = RegressionSubModel( + config=submodels_config.parse_config(regression_2_dict), + ) + # . Likelihood submodel 2 + likelihood_2_dict = { + "type": "gaussian", + "prec_o": theta_initial[5], + "prior_hyperparameters": { + "type": "gaussian", + "mean": theta_ref[5], + "precision": 0.5, + }, + } + # Creation of the second model by combining the submodels and the likelihood + model_2 = Model( + submodels=[spatio_temporal_2, regression_2], + likelihood_config=likelihood_config.parse_config(likelihood_2_dict), + ) + # Creation of the coregional model by combining the models + coreg_dict = { + "type": "coregional", + "n_models": 2, + "sigmas": [theta_initial[6], theta_initial[7]], + "lambdas": [theta_initial[8]], + "ph_sigmas": [ + {"type": "gaussian", "mean": theta_ref[6], "precision": 0.5}, + {"type": "gaussian", "mean": theta_ref[7], "precision": 0.5}, + ], + "ph_lambdas": [ + {"type": "gaussian", "mean": 0.0, "precision": 0.5}, + ], + } + coreg_model = CoregionalModel( + models=[model_1, model_2], + coregional_model_config=models_config.parse_config(coreg_dict), + ) + # Configurations of DALIA + dalia_dict = { + "solver": { + "type": "serinv", + "min_processes": 1, + }, + "minimize": { + "max_iter": 100, + "gtol": 1e-3, + "disp": True, + "maxcor": len(coreg_model.theta_external), + }, + "f_reduction_tol": 1e-3, + "theta_reduction_tol": 1e-4, + "inner_iteration_max_iter": 50, + "eps_inner_iteration": 1e-3, + "eps_gradient_f": 1e-3, + "eps_hessian_f": 5 * 1e-3, + "simulation_dir": f"{EXAMPLE_PATH}", + } + dalia = DALIA( + model=coreg_model, + config=dalia_config.parse_config(dalia_dict), + ) + # Run the optimization + results = dalia.run() + + # Check iterations behavior + success_msg : str = "success" + if results["optimization_iterations"] > TYPICAL_N_ITER: + success_msg = "warning_more_iters_than_typical" + elif results["optimization_iterations"] < TYPICAL_N_ITER: + success_msg = "success_less_iters_than_typical" + + # Compare hyperparameters + theta_ref = np.load(f"{EXAMPLE_PATH}/inputs_nv{nv}_ns{ns}_nt{nt}_nb{nb}/reference_outputs/theta_ref.npy") + print(f"theta_ref: {theta_ref}") + print(f"theta_dalia: {get_host(results['theta_internal'])}") + print_msg( + "Norm (theta - theta_ref): ", + f"{np.linalg.norm(get_host(results['theta_internal']) - theta_ref):.4e}", + ) + if np.linalg.norm(get_host(results["theta_internal"]) - theta_ref) > THETA_TOL: + return "theta_tol_exceeded" + + # Compare latent parameters + x_ref = np.load(f"{EXAMPLE_PATH}/inputs_nv{nv}_ns{ns}_nt{nt}_nb{nb}/reference_outputs/x_ref.npy") + x_ref = x_ref[dalia.model.permutation_latent_variables] + print(f"x_ref: {x_ref}") + print(f"x_dalia: {get_host(results['x'])}") + print_msg( + "Norm (x - x_ref): ", + f"{np.linalg.norm(get_host(results['x']) - x_ref):.4e}", + ) + if np.linalg.norm(get_host(results["x"]) - x_ref) > X_TOL: + return "x_tol_exceeded" + + return success_msg + +if __name__ == "__main__": + gstcoreg2_itest() diff --git a/tests/integration/par1/itest.py b/tests/integration/par1/itest.py new file mode 100644 index 00000000..fb42f224 --- /dev/null +++ b/tests/integration/par1/itest.py @@ -0,0 +1,110 @@ +from pathlib import Path +import numpy as np + +from dalia import xp +from dalia.configs import likelihood_config, dalia_config, submodels_config +from dalia.core.model import Model +from dalia.core.dalia import DALIA +from dalia.utils import print_msg, get_host +from dalia.submodels import RegressionSubModel, AR1SubModel + +SCRIPT_DIR = Path(__file__).resolve() +DALIA_DIR = SCRIPT_DIR.parent.parent.parent.parent +EXAMPLE_PATH = DALIA_DIR / "examples" / "p_ar1" + +X_TOL = 1e2 +THETA_TOL = 2e-2 +TYPICAL_N_ITER = 12 + +def par1_itest(): + # load reference output + theta_original = np.load(f"{EXAMPLE_PATH}/reference_outputs/theta_original.npy") + x_original = np.load(f"{EXAMPLE_PATH}/reference_outputs/x_original.npy") + + ar1_dict = { + "type": "ar1", + "input_dir": f"{EXAMPLE_PATH}/inputs_ar1", + "phi": 0.45, # has to be between 0 and 1 + "tau": 0.5, # precision + "ph_phi": {"type": "beta", "alpha": 5.0, "beta": 1.0}, + "ph_tau": {"type": "gamma", "alpha": 2.0, "beta": 0.5}, + } + ar1 = AR1SubModel( + config=submodels_config.parse_config(ar1_dict), + ) + + # Configurations of the regression submodel + regression_dict = { + "type": "regression", + "input_dir": f"{EXAMPLE_PATH}/inputs_regression", + "n_fixed_effects": 1, + "fixed_effects_prior_precision": 0.001, + } + regression = RegressionSubModel( + config=submodels_config.parse_config(regression_dict), + ) + + likelihood_dict = { + "type": "poisson", + "input_dir": f"{EXAMPLE_PATH}", + } + + model = Model( + submodels=[ar1, regression], + likelihood_config=likelihood_config.parse_config(likelihood_dict), + ) + # Configurations of DALIA + dalia_dict = { + "solver": {"type": "dense"}, + "minimize": { + "max_iter": 100, + "gtol": 1e-3, + "disp": True, + "maxcor": len(model.theta_external), + }, + "f_reduction_tol": 1e-3, + "theta_reduction_tol": 1e-4, + "inner_iteration_max_iter": 50, + "eps_inner_iteration": 1e-3, + "eps_gradient_f": 1e-3, + "simulation_dir": f"{EXAMPLE_PATH}", + } + + dalia = DALIA( + model=model, + config=dalia_config.parse_config(dalia_dict), + ) + results = dalia.run() + + # Check iterations behavior + success_msg : str = "success" + if results["optimization_iterations"] > TYPICAL_N_ITER: + success_msg = "warning_more_iters_than_typical" + elif results["optimization_iterations"] < TYPICAL_N_ITER: + success_msg = "success_less_iters_than_typical" + + # Compare hyperparameters + theta_user = get_host(results["theta"]) + print("theta_ref: ", theta_original) + print("theta user: ", theta_user) + print_msg( + "Norm (theta - theta_ref): ", + f"{np.linalg.norm(theta_user - theta_original):.4e}", + ) + if np.linalg.norm(theta_user - theta_original) > THETA_TOL: + return "theta_tol_exceeded" + + # Compare latent parameters + print(f"x_ref: {x_original}") + print(f"x_dalia: {get_host(results['x'])}") + print_msg( + "Norm (x - x_ref): ", + f"{np.linalg.norm(get_host(results['x']) - x_original):.4e}", + ) + if np.linalg.norm(get_host(results["x"]) - x_original) > X_TOL: + return "x_tol_exceeded" + + return success_msg + +if __name__ == "__main__": + par1_itest() diff --git a/tests/integration/pr/itest.py b/tests/integration/pr/itest.py new file mode 100644 index 00000000..95d18902 --- /dev/null +++ b/tests/integration/pr/itest.py @@ -0,0 +1,69 @@ +from pathlib import Path +import numpy as np + +from dalia.configs import likelihood_config, dalia_config, submodels_config +from dalia.core.model import Model +from dalia.core.dalia import DALIA +from dalia.utils import print_msg, get_host +from dalia.submodels import RegressionSubModel + +SCRIPT_DIR = Path(__file__).resolve() +DALIA_DIR = SCRIPT_DIR.parent.parent.parent.parent +EXAMPLE_PATH = DALIA_DIR / "examples" / "pr" + +X_TOL = 1e-5 + +def pr_itest(): + # Configurations of the regression submodel + regression_dict = { + "type": "regression", + "input_dir": f"{EXAMPLE_PATH}/inputs", + "n_fixed_effects": 6, + "fixed_effects_prior_precision": 0.001, + } + regression = RegressionSubModel( + config=submodels_config.parse_config(regression_dict), + ) + # Likelihood + likelihood_dict = { + "type": "poisson", + "input_dir": f"{EXAMPLE_PATH}", + } + model = Model( + submodels=[regression], + likelihood_config=likelihood_config.parse_config(likelihood_dict), + ) + # Configurations of DALIA + dalia_dict = { + "solver": {"type": "dense"}, + "minimize": { + "max_iter": 100, + "gtol": 1e-1, + "disp": True, + }, + "inner_iteration_max_iter": 50, + "eps_inner_iteration": 1e-3, + "eps_gradient_f": 1e-3, + "simulation_dir": f"{EXAMPLE_PATH}", + } + dalia = DALIA( + model=model, + config=dalia_config.parse_config(dalia_dict), + ) + results = dalia.run() + + # Compare latent parameters + x_ref = np.load(f"{EXAMPLE_PATH}/reference_outputs/x_ref.npy") + print(f"x_ref: {x_ref}") + print(f"x_dalia: {get_host(results['x'])}") + print_msg( + "Norm (x - x_ref): ", + f"{np.linalg.norm(get_host(results['x']) - x_ref):.4e}", + ) + if np.linalg.norm(get_host(results["x"]) - x_ref) > X_TOL: + return "x_tol_exceeded" + + return "success" + +if __name__ == "__main__": + pr_itest() \ No newline at end of file diff --git a/tests/integration/readme.md b/tests/integration/readme.md new file mode 100644 index 00000000..76c7f5cc --- /dev/null +++ b/tests/integration/readme.md @@ -0,0 +1,25 @@ +# How to run the integration tests + +## On Daint +I recomend getting on an interactive session where you can directly run the integration tests for all configurtions (in particular types of parallelization). +```bash +srun --pty --partition=debug --account=xxxx bash +``` + +Then, you can run the integration tests sequentially using: +```bash +python runner.py +``` + +Further configurations are available in the `runner.py` file. + +## On Fritz +I recomend getting on an interactive session where you can directly run the integration tests for all configurtions (in particular types of parallelization). +```bash +salloc -N 1 --partition=spr2tb --time=00:30:00 +``` + +Then, you can run the integration tests sequentially using: +```bash +srun python runner.py +``` \ No newline at end of file diff --git a/tests/integration/runner.py b/tests/integration/runner.py new file mode 100644 index 00000000..84e4c506 --- /dev/null +++ b/tests/integration/runner.py @@ -0,0 +1,32 @@ +import os + +from gst.itest import gst_itest +from gstcoreg2.itest import gstcoreg2_itest +from par1.itest import par1_itest +from pr.itest import pr_itest + +# run_test_scripts = { +# "gst/itest.py": ["seq", "par_f", "par_s"], +# "gcoreg/itest.py": ["seq", "par_f", "par_s"], +# "par1/itest.py": ["seq", "par_f"], +# "pr/itest.py": ["seq"], +# } + +itest_calls = { + gst_itest: ["seq"], + gstcoreg2_itest: ["seq"], + par1_itest: ["seq"], + pr_itest: ["seq"], +} + +os.environ["ARRAY_MODULE"] = "cupy" # "numpy" or "cupy" + +if __name__ == "__main__": + + for itest, modes in itest_calls.items(): + print(f"{itest.__name__} in mode `{modes[0]}` returned: {itest()}") + + # for mode in modes: + # print(f"Running {itest.__name__} in {mode} mode...") + # command = f"ARRAY_MODULE={mode} python tests/integration/{script}" + # os.system(command) \ No newline at end of file