diff --git a/README.md b/README.md index d858a12..be0a236 100644 --- a/README.md +++ b/README.md @@ -244,7 +244,67 @@ predict_data( ``` -This example walks through initializing hardware, simulating spatial data, estimating model parameters, and making predictions using **ExaGeoStatCPP** in R. + +``` +## R Example +Here is another example demonstrating how to use **ExaGeoStatCPP** with nugget in R: + +```r +# Load the ExaGeoStatCPP library +library(ExaGeoStatCPP) + +# Set parameters for the simulation +ncores <- 30 +ngpus <- 0 +problem_size <- 1600 +dts <- 320 +lts <- 0 +computation <- "exact" +dimension <- "2D" +kernel <- "UnivariateMaternNuggetsStationary" +initial_theta <- c(1,0.1,0.5,0.1) +lower_bound <- c(0.05,0.005,0.05,0.005) +upper_bound <- c(5,5,5,5) +acc <- 1e-9 +p <- 1 +q <- 1 +opt_itrs <- 100 + +# Initialize hardware configuration +hardware <- new(Hardware, computation, ncores, ngpus, p, q) + +# Simulate spatial data based on the specified kernel and parameters +exageostat_data <- simulate_data( + kernel = kernel, + initial_theta = initial_theta, + problem_size = problem_size, + dts = dts, + dimension = dimension +) + +# Estimate model parameters using MLE +estimated_theta <- model_data( + matrix=exageostat_data$m, + x=exageostat_data$x, + y=exageostat_data$y, + kernel=kernel, dts=dts, + dimension=dimension, + lb=lower_bound, + ub=upper_bound, + mle_itr=opt_itrs) + +# Perform spatial prediction using the estimated parameters +test_x <- c(0.2, 0.330) +test_y <- c(0.104, 0.14) +predict_data( + train_data=list(x=exageostat_data$x, y=exageostat_data$y, exageostat_data$m), + test_data=list(test_x, test_y), + kernel=kernel, + dts=dts, + estimated_theta=estimated_theta) +``` + +These two examples walk through initializing hardware, simulating spatial data, estimating model parameters, and making predictions using **ExaGeoStatCPP** in R. > **Note:** Please take a look at the end-to-end examples in the `examples/` directory as a reference for using all the operations. diff --git a/cmake/ImportNLOPT.cmake b/cmake/ImportNLOPT.cmake index 66972f8..f0022a1 100644 --- a/cmake/ImportNLOPT.cmake +++ b/cmake/ImportNLOPT.cmake @@ -13,12 +13,12 @@ # Configuration settings for the integration of the NLOPT library # 'name' is assigned to "NLOPT", serving as the identifier for this library within the script. set(name "NLOPT") -# 'tag' defines "v2.7.1" as the version tag of NLOPT, indicating the specific release to be utilized. -set(tag "v2.7.1") -# 'version' specifies "2.7.1" as the version of the NLOPT library, ensuring compatibility with the project's requirements. -set(version "2.7.1") -# 'flag' is intended for additional configuration options during the build process. Disable Python and SWIG to avoid Python compatibility issues. -set(flag "-DNLOPT_PYTHON=OFF -DNLOPT_SWIG=OFF") +# 'tag' defines "v2.8.0" as the version tag of NLOPT, indicating the specific release to be utilized. +set(tag "v2.8.0") +# 'version' specifies "2.8.0" as the version of the NLOPT library, ensuring compatibility with the project's requirements (Python 3.13+ compatible). +set(version "2.8.0") +# 'flag' is intended for additional configuration options during the build process. Disable ALL language bindings to avoid Python compatibility issues. +set(flag -DNLOPT_PYTHON=OFF \-DNLOPT_SWIG=OFF \-DNLOPT_OCTAVE=OFF \-DNLOPT_MATLAB=OFF \-DNLOPT_GUILE=OFF) # 'is_cmake' indicates that NLOPT uses CMake for its build system, which is set to ON. set(is_cmake ON) # 'is_git' denotes that the NLOPT source code is hosted in a Git repository, which is set to ON. @@ -35,4 +35,3 @@ ImportDependency(${name} ${tag} ${version} ${url} "${flag}" "" ${is_cmake} ${is_ # A status message is outputted to indicate the successful integration of the NLOPT library into the project. message(STATUS "${name} done") - diff --git a/src/configurations/Configurations.cpp b/src/configurations/Configurations.cpp index f7e03a9..e67a8ad 100644 --- a/src/configurations/Configurations.cpp +++ b/src/configurations/Configurations.cpp @@ -57,13 +57,9 @@ Configurations::Configurations() { SetAccuracy(0); SetIsNonGaussian(false); mIsThetaInit = false; - -#if !DEFAULT_RUNTIME - // Set default values for Hicma-Parsec params + #if !DEFAULT_RUNTIME + // Set default values for PaRSEC runtime params SetTolerance(0); - //TODO:currently,we support real data only in parsec.In the future,we should support synthetic and real data for both runtimes - SetIsSynthetic(false); - SetMeanTrendRemoval(false); #endif } @@ -93,6 +89,9 @@ void Configurations::ValidateConfiguration() { if (!GetDataPath().empty()) { SetIsSynthetic(false); } + if (GetMeanTrendRemoval()) { + SetIsSynthetic(false); + } if (GetIsMSPE() || GetIsMLOEMMOM() || GetIsIDW()) { if (GetUnknownObservationsNb() <= 1) { @@ -101,8 +100,9 @@ void Configurations::ValidateConfiguration() { } } + // Auto-enable logging if log path is provided if (!GetLoggerPath().empty() && !GetLogger()) { - throw domain_error("To enable logging, please utilize the '--log' option in order to specify a log file."); + SetLogger(true); } if (GetUnknownObservationsNb() >= GetProblemSize()) { @@ -130,29 +130,31 @@ void Configurations::ValidateConfiguration() { } #if DEFAULT_RUNTIME - // Throw Errors if any of these arguments aren't given by the user. + // StarPU runtime: kernel always required if (GetKernelName().empty()) { throw domain_error("You need to set the Kernel, before starting"); } if (GetMaxRank() == -1) { SetMaxRank(1); } -//#else +#else + // PaRSEC runtime: kernel required for synthetic data or Mean Trend Removal + if (GetKernelName().empty() && (GetIsSynthetic() || GetMeanTrendRemoval())) { + throw domain_error("You need to set the Kernel, before starting"); + } if(GetMaxRank() == -1){ SetMaxRank(GetDenseTileSize() / 2); } if (mDictionary.find("tolerance") == mDictionary.end()) { SetTolerance(8); } - if (GetDataPath().empty()) { - throw domain_error("You need to set the data path, before starting"); - } -#else - if(GetMeanTrendRemoval() && GetKernelName().empty()){ - throw domain_error("You need to set the Kernel for Mean Trend Removal, before starting"); - } #endif + // Both runtimes: data_path required if not synthetic OR if Mean Trend Removal + if ((!GetIsSynthetic() || GetMeanTrendRemoval()) && GetDataPath().empty()) { + throw domain_error("You need to set the data path (use --data_path), before starting"); + } + size_t found = GetKernelName().find("NonGaussian"); // Check if the substring was found if (found != std::string::npos) { @@ -217,52 +219,76 @@ void Configurations::PrintUsage() { LOGGER("\n\t*** Available Arguments For ExaGeoStat Configurations ***") LOGGER("--N=value : Problem size.") LOGGER("--kernel=value : Used Kernel.") - LOGGER("--dimension=value : Used Dimension.") + LOGGER("--dimension=value : Used Dimension (2D, 3D, ST).") LOGGER("--p=value : Used P-Grid.") - LOGGER("--q=value : Used P-Grid.") + LOGGER("--q=value : Used Q-Grid.") LOGGER("--time_slot=value : Time slot value for ST.") - LOGGER("--computation=value : Used computation.") - LOGGER("--precision=value : Used precision.") + LOGGER("--computation=value : Used computation (exact, tlr, diagonal_approx).") + LOGGER("--precision=value : Used precision (single, double, mixed).") LOGGER("--cores=value : Used to set the number of cores.") LOGGER("--gpus=value : Used to set the number of GPUs.") LOGGER("--dts=value : Used to set the Dense Tile size.") LOGGER("--lts=value : Used to set the Low Tile size.") LOGGER("--band=value : Used to set the Tile diagonal thickness.") - LOGGER("--Zmiss=value : Used to set number of unknown observation to be predicted.") + LOGGER("--max_rank=value : Used to set the max rank value.") + LOGGER("--hnb=value : Used to set HNB value.") + LOGGER("--gen_max_rank=value : Used to set generation max rank.") + LOGGER("--comp_max_rank=value : Used to set computation max rank.") + LOGGER("--auto_band=value : Used to set auto band.") + LOGGER("--band_dense_sp=value : Used to set band dense single precision.") + LOGGER("--band_low_rank_dp=value : Used to set band low rank double precision.") LOGGER("--observations_file=PATH/TO/File : Used to pass the observations file path.") - LOGGER("--max_rank=value : Used to the max rank value.") - LOGGER("--initial_theta=value : Initial theta parameters for optimization.") - LOGGER("--estimated_theta=value : Estimated kernel parameters for optimization.") LOGGER("--seed=value : Seed value for random number generation.") - LOGGER("--verbose=value : Run mode whether quiet/standard/detailed.") + LOGGER("--verbose=value : Run mode (0=quiet, 1=standard, 2=detailed).") LOGGER("--log_path=value : Path to log file.") - LOGGER("--distance_metric=value : Used distance metric either eg or gcd.") - LOGGER("--max_mle_iterations=value : Maximum number of MLE iterations.") - LOGGER("--tolerance : MLE tolerance between two iterations.") - LOGGER("--data_path : Used to enter the path to the real data file.") - LOGGER("--mspe: Used to enable mean square prediction error.") - LOGGER("--fisher: Used to enable fisher tile prediction function.") - LOGGER("--idw: Used to IDW prediction auxiliary function.") - LOGGER("--mloe-mmom: Used to enable MLOE MMOM.") - LOGGER("--OOC : Used to enable Out of core technology.") - LOGGER("--approximation_mode : Used to enable Approximation mode.") LOGGER("--log : Enable logging.") - LOGGER("--accuracy : Used to set the accuracy when using tlr.") - LOGGER("--band_dense=value : Used to set the dense band double precision, Used with PaRSEC runtime only.") - LOGGER("--objects_number=value : Used to set the number of objects (number of viruses within a population), Used with PaRSEC runtime only.") - LOGGER("--adaptive_decision=value : Used to set the adaptive decision of each tile's format using norm approach, if enabled, otherwise 0, Used with PaRSEC runtime only.") - LOGGER("--add_diagonal=value : Used to add this number to diagonal elements to make the matrix positive definite in electrodynamics problem, Used with PaRSEC runtime only.") - LOGGER("--file_time_slot=value : Used to set time slot per file, Used with PaRSEC runtime only.") - LOGGER("--file_number=value : Used to set file number, Used with PaRSEC runtime only.") - LOGGER("--enable-inverse : Used to enable inverse spherical harmonics transform, Used with PaRSEC runtime only.") - LOGGER("--mpiio : Used to enable MPI IO, Used with PaRSEC runtime only.") - LOGGER("--log-file-path: Used to set path of file where events and results are logged.") - LOGGER("--start-year=value : Used to set the starting year for NetCDF data processing (MeanTrendRemoval).") - LOGGER("--end-year=value : Used to set the ending year for NetCDF data processing (MeanTrendRemoval).") - LOGGER("--lat=value : Used to set the latitude band index for MeanTrendRemoval climate data processing (required for MeanTrendRemoval).") - LOGGER("--lon=value : Used to set the longitude count for MeanTrendRemoval climate data processing (required for MeanTrendRemoval).") - LOGGER("--resultspath=PATH : Used to set the output directory path for MeanTrendRemoval results (required for MeanTrendRemoval).") - LOGGER("\n\n") + LOGGER("--initial_theta=value1,value2,... : Initial theta parameters for optimization.") + LOGGER("--estimated_theta=value1,value2,... : Estimated kernel parameters for optimization.") + LOGGER("--lb=value1,value2,... : Lower bounds for optimization.") + LOGGER("--ub=value1,value2,... : Upper bounds for optimization.") + LOGGER("--starting_theta=value1,value2,... : Starting theta parameters.") + LOGGER("--is_non_gaussian : Enable non-Gaussian mode.") + LOGGER("--OOC : Used to enable Out of Core (OOC) technology.") + LOGGER("--approximation_mode=value : Used to enable Approximation mode.") + LOGGER("--accuracy=value : Used to set the accuracy when using TLR.") + LOGGER("\t=== DATA GENERATION ARGUMENTS ===") + LOGGER("--data_path=PATH : Used to enter the path to the real data file.") + LOGGER("--is_synthetic : Use synthetic data generation.") + LOGGER("--resultspath=PATH : Used to set the output directory path for results.") + LOGGER("\t=== DATA MODELING ARGUMENTS ===") + LOGGER("--distance_metric=value : Used distance metric (eg=Euclidean, gcd=Great Circle Distance).") + LOGGER("--max_mle_iterations=value : Maximum number of MLE iterations.") + LOGGER("--tolerance=value : MLE tolerance between two iterations.") + LOGGER("--recovery_file=PATH : Path to recovery file.") + LOGGER("\t=== DATA PREDICTION ARGUMENTS ===") + LOGGER("--Zmiss=value : Used to set number of unknown observations to be predicted.") + LOGGER("--observation_number=value : Used to set the number of observations.") + LOGGER("--mspe : Used to enable Mean Square Prediction Error.") + LOGGER("--fisher : Used to enable Fisher tile prediction function.") + LOGGER("--idw : Used to enable IDW prediction auxiliary function.") + LOGGER("--mloe-mmom : Used to enable MLOE MMOM.") + LOGGER("\t=== PARSEC RUNTIME SPECIFIC ARGUMENTS ===") + LOGGER("--band_dense=value : Used to set the dense band double precision (PaRSEC only).") + LOGGER("--band_dense_dp=value : Used to set dense band double precision (PaRSEC only).") + LOGGER("--band_dense_hp=value : Used to set dense band high precision (PaRSEC only).") + LOGGER("--objects_number=value : Used to set the number of objects (PaRSEC only).") + LOGGER("--adaptive_decision=value : Used to set adaptive decision for tile format (PaRSEC only).") + LOGGER("--add_diagonal=value : Add value to diagonal elements (PaRSEC only).") + LOGGER("--file_time_slot=value : Used to set time slot per file (PaRSEC only).") + LOGGER("--file_number=value : Used to set file number (PaRSEC only).") + LOGGER("--enable-inverse : Enable inverse spherical harmonics transform (PaRSEC only).") + LOGGER("--mpiio : Enable MPI IO (PaRSEC only).") + LOGGER("--log-file-path=PATH : Path to file where events and results are logged (PaRSEC only).") + LOGGER("\t=== MEAN TREND REMOVAL / CLIMATE EMULATOR ARGUMENTS ===") + LOGGER("--mean_trend_removal : Enable Mean Trend Removal.") + LOGGER("--is_climate_emulator : Enable Climate Emulator mode.") + LOGGER("--forcing_data_path=PATH : Path to forcing data file.") + LOGGER("--netcdf_data_path=PATH : Path to NetCDF data file.") + LOGGER("--start-year=value : Starting year for NetCDF data processing.") + LOGGER("--end-year=value : Ending year for NetCDF data processing.") + LOGGER("--lat=value : Latitude band index for climate data processing (required for MeanTrendRemoval).") + LOGGER("--lon=value : Longitude count for climate data processing (required for MeanTrendRemoval).") + LOGGER("\n") exit(0); } diff --git a/src/configurations/Parser.cpp b/src/configurations/Parser.cpp index 7f8687b..8976986 100644 --- a/src/configurations/Parser.cpp +++ b/src/configurations/Parser.cpp @@ -16,6 +16,7 @@ #include #include +#include using namespace std; using namespace exageostat::configurations::parser; @@ -32,6 +33,12 @@ void Parser::ParseCLI(const int &aArgC, char **apArgV, unordered_map(argument.find('=')); argument_name = argument.substr(0, equal_sign_Idx);