Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 61 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,67 @@ predict_data(

```

This example walks through initializing hardware, simulating spatial data, estimating model parameters, and making predictions using **ExaGeoStatCPP** in R.

```
## R Example
Here is another example demonstrating how to use **ExaGeoStatCPP** with nugget in R:

```r
# Load the ExaGeoStatCPP library
library(ExaGeoStatCPP)

# Set parameters for the simulation
ncores <- 30
ngpus <- 0
problem_size <- 1600
dts <- 320
lts <- 0
computation <- "exact"
dimension <- "2D"
kernel <- "UnivariateMaternNuggetsStationary"
initial_theta <- c(1,0.1,0.5,0.1)
lower_bound <- c(0.05,0.005,0.05,0.005)
upper_bound <- c(5,5,5,5)
acc <- 1e-9
p <- 1
q <- 1
opt_itrs <- 100

# Initialize hardware configuration
hardware <- new(Hardware, computation, ncores, ngpus, p, q)

# Simulate spatial data based on the specified kernel and parameters
exageostat_data <- simulate_data(
kernel = kernel,
initial_theta = initial_theta,
problem_size = problem_size,
dts = dts,
dimension = dimension
)

# Estimate model parameters using MLE
estimated_theta <- model_data(
matrix=exageostat_data$m,
x=exageostat_data$x,
y=exageostat_data$y,
kernel=kernel, dts=dts,
dimension=dimension,
lb=lower_bound,
ub=upper_bound,
mle_itr=opt_itrs)

# Perform spatial prediction using the estimated parameters
test_x <- c(0.2, 0.330)
test_y <- c(0.104, 0.14)
predict_data(
train_data=list(x=exageostat_data$x, y=exageostat_data$y, exageostat_data$m),
test_data=list(test_x, test_y),
kernel=kernel,
dts=dts,
estimated_theta=estimated_theta)
```

These two examples walk through initializing hardware, simulating spatial data, estimating model parameters, and making predictions using **ExaGeoStatCPP** in R.

> **Note:** Please take a look at the end-to-end examples in the `examples/` directory as a reference for using all the operations.

Expand Down
13 changes: 6 additions & 7 deletions cmake/ImportNLOPT.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
# Configuration settings for the integration of the NLOPT library
# 'name' is assigned to "NLOPT", serving as the identifier for this library within the script.
set(name "NLOPT")
# 'tag' defines "v2.7.1" as the version tag of NLOPT, indicating the specific release to be utilized.
set(tag "v2.7.1")
# 'version' specifies "2.7.1" as the version of the NLOPT library, ensuring compatibility with the project's requirements.
set(version "2.7.1")
# 'flag' is intended for additional configuration options during the build process. Disable Python and SWIG to avoid Python compatibility issues.
set(flag "-DNLOPT_PYTHON=OFF -DNLOPT_SWIG=OFF")
# 'tag' defines "v2.8.0" as the version tag of NLOPT, indicating the specific release to be utilized.
set(tag "v2.8.0")
# 'version' specifies "2.8.0" as the version of the NLOPT library, ensuring compatibility with the project's requirements (Python 3.13+ compatible).
set(version "2.8.0")
# 'flag' is intended for additional configuration options during the build process. Disable ALL language bindings to avoid Python compatibility issues.
set(flag -DNLOPT_PYTHON=OFF \-DNLOPT_SWIG=OFF \-DNLOPT_OCTAVE=OFF \-DNLOPT_MATLAB=OFF \-DNLOPT_GUILE=OFF)
# 'is_cmake' indicates that NLOPT uses CMake for its build system, which is set to ON.
set(is_cmake ON)
# 'is_git' denotes that the NLOPT source code is hosted in a Git repository, which is set to ON.
Expand All @@ -35,4 +35,3 @@ ImportDependency(${name} ${tag} ${version} ${url} "${flag}" "" ${is_cmake} ${is_

# A status message is outputted to indicate the successful integration of the NLOPT library into the project.
message(STATUS "${name} done")

128 changes: 77 additions & 51 deletions src/configurations/Configurations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,9 @@ Configurations::Configurations() {
SetAccuracy(0);
SetIsNonGaussian(false);
mIsThetaInit = false;

#if !DEFAULT_RUNTIME
// Set default values for Hicma-Parsec params
#if !DEFAULT_RUNTIME
// Set default values for PaRSEC runtime params
SetTolerance(0);
//TODO:currently,we support real data only in parsec.In the future,we should support synthetic and real data for both runtimes
SetIsSynthetic(false);
SetMeanTrendRemoval(false);
#endif
}

Expand Down Expand Up @@ -93,6 +89,9 @@ void Configurations::ValidateConfiguration() {
if (!GetDataPath().empty()) {
SetIsSynthetic(false);
}
if (GetMeanTrendRemoval()) {
SetIsSynthetic(false);
}

if (GetIsMSPE() || GetIsMLOEMMOM() || GetIsIDW()) {
if (GetUnknownObservationsNb() <= 1) {
Expand All @@ -101,8 +100,9 @@ void Configurations::ValidateConfiguration() {
}
}

// Auto-enable logging if log path is provided
if (!GetLoggerPath().empty() && !GetLogger()) {
throw domain_error("To enable logging, please utilize the '--log' option in order to specify a log file.");
SetLogger(true);
}

if (GetUnknownObservationsNb() >= GetProblemSize()) {
Expand Down Expand Up @@ -130,29 +130,31 @@ void Configurations::ValidateConfiguration() {
}

#if DEFAULT_RUNTIME
// Throw Errors if any of these arguments aren't given by the user.
// StarPU runtime: kernel always required
if (GetKernelName().empty()) {
throw domain_error("You need to set the Kernel, before starting");
}
if (GetMaxRank() == -1) {
SetMaxRank(1);
}
//#else
#else
// PaRSEC runtime: kernel required for synthetic data or Mean Trend Removal
if (GetKernelName().empty() && (GetIsSynthetic() || GetMeanTrendRemoval())) {
throw domain_error("You need to set the Kernel, before starting");
}
if(GetMaxRank() == -1){
SetMaxRank(GetDenseTileSize() / 2);
}
if (mDictionary.find("tolerance") == mDictionary.end()) {
SetTolerance(8);
}
if (GetDataPath().empty()) {
throw domain_error("You need to set the data path, before starting");
}
#else
if(GetMeanTrendRemoval() && GetKernelName().empty()){
throw domain_error("You need to set the Kernel for Mean Trend Removal, before starting");
}
#endif

// Both runtimes: data_path required if not synthetic OR if Mean Trend Removal
if ((!GetIsSynthetic() || GetMeanTrendRemoval()) && GetDataPath().empty()) {
throw domain_error("You need to set the data path (use --data_path), before starting");
}

size_t found = GetKernelName().find("NonGaussian");
// Check if the substring was found
if (found != std::string::npos) {
Expand Down Expand Up @@ -217,52 +219,76 @@ void Configurations::PrintUsage() {
LOGGER("\n\t*** Available Arguments For ExaGeoStat Configurations ***")
LOGGER("--N=value : Problem size.")
LOGGER("--kernel=value : Used Kernel.")
LOGGER("--dimension=value : Used Dimension.")
LOGGER("--dimension=value : Used Dimension (2D, 3D, ST).")
LOGGER("--p=value : Used P-Grid.")
LOGGER("--q=value : Used P-Grid.")
LOGGER("--q=value : Used Q-Grid.")
LOGGER("--time_slot=value : Time slot value for ST.")
LOGGER("--computation=value : Used computation.")
LOGGER("--precision=value : Used precision.")
LOGGER("--computation=value : Used computation (exact, tlr, diagonal_approx).")
LOGGER("--precision=value : Used precision (single, double, mixed).")
LOGGER("--cores=value : Used to set the number of cores.")
LOGGER("--gpus=value : Used to set the number of GPUs.")
LOGGER("--dts=value : Used to set the Dense Tile size.")
LOGGER("--lts=value : Used to set the Low Tile size.")
LOGGER("--band=value : Used to set the Tile diagonal thickness.")
LOGGER("--Zmiss=value : Used to set number of unknown observation to be predicted.")
LOGGER("--max_rank=value : Used to set the max rank value.")
LOGGER("--hnb=value : Used to set HNB value.")
LOGGER("--gen_max_rank=value : Used to set generation max rank.")
LOGGER("--comp_max_rank=value : Used to set computation max rank.")
LOGGER("--auto_band=value : Used to set auto band.")
LOGGER("--band_dense_sp=value : Used to set band dense single precision.")
LOGGER("--band_low_rank_dp=value : Used to set band low rank double precision.")
LOGGER("--observations_file=PATH/TO/File : Used to pass the observations file path.")
LOGGER("--max_rank=value : Used to the max rank value.")
LOGGER("--initial_theta=value : Initial theta parameters for optimization.")
LOGGER("--estimated_theta=value : Estimated kernel parameters for optimization.")
LOGGER("--seed=value : Seed value for random number generation.")
LOGGER("--verbose=value : Run mode whether quiet/standard/detailed.")
LOGGER("--verbose=value : Run mode (0=quiet, 1=standard, 2=detailed).")
LOGGER("--log_path=value : Path to log file.")
LOGGER("--distance_metric=value : Used distance metric either eg or gcd.")
LOGGER("--max_mle_iterations=value : Maximum number of MLE iterations.")
LOGGER("--tolerance : MLE tolerance between two iterations.")
LOGGER("--data_path : Used to enter the path to the real data file.")
LOGGER("--mspe: Used to enable mean square prediction error.")
LOGGER("--fisher: Used to enable fisher tile prediction function.")
LOGGER("--idw: Used to IDW prediction auxiliary function.")
LOGGER("--mloe-mmom: Used to enable MLOE MMOM.")
LOGGER("--OOC : Used to enable Out of core technology.")
LOGGER("--approximation_mode : Used to enable Approximation mode.")
LOGGER("--log : Enable logging.")
LOGGER("--accuracy : Used to set the accuracy when using tlr.")
LOGGER("--band_dense=value : Used to set the dense band double precision, Used with PaRSEC runtime only.")
LOGGER("--objects_number=value : Used to set the number of objects (number of viruses within a population), Used with PaRSEC runtime only.")
LOGGER("--adaptive_decision=value : Used to set the adaptive decision of each tile's format using norm approach, if enabled, otherwise 0, Used with PaRSEC runtime only.")
LOGGER("--add_diagonal=value : Used to add this number to diagonal elements to make the matrix positive definite in electrodynamics problem, Used with PaRSEC runtime only.")
LOGGER("--file_time_slot=value : Used to set time slot per file, Used with PaRSEC runtime only.")
LOGGER("--file_number=value : Used to set file number, Used with PaRSEC runtime only.")
LOGGER("--enable-inverse : Used to enable inverse spherical harmonics transform, Used with PaRSEC runtime only.")
LOGGER("--mpiio : Used to enable MPI IO, Used with PaRSEC runtime only.")
LOGGER("--log-file-path: Used to set path of file where events and results are logged.")
LOGGER("--start-year=value : Used to set the starting year for NetCDF data processing (MeanTrendRemoval).")
LOGGER("--end-year=value : Used to set the ending year for NetCDF data processing (MeanTrendRemoval).")
LOGGER("--lat=value : Used to set the latitude band index for MeanTrendRemoval climate data processing (required for MeanTrendRemoval).")
LOGGER("--lon=value : Used to set the longitude count for MeanTrendRemoval climate data processing (required for MeanTrendRemoval).")
LOGGER("--resultspath=PATH : Used to set the output directory path for MeanTrendRemoval results (required for MeanTrendRemoval).")
LOGGER("\n\n")
LOGGER("--initial_theta=value1,value2,... : Initial theta parameters for optimization.")
LOGGER("--estimated_theta=value1,value2,... : Estimated kernel parameters for optimization.")
LOGGER("--lb=value1,value2,... : Lower bounds for optimization.")
LOGGER("--ub=value1,value2,... : Upper bounds for optimization.")
LOGGER("--starting_theta=value1,value2,... : Starting theta parameters.")
LOGGER("--is_non_gaussian : Enable non-Gaussian mode.")
LOGGER("--OOC : Used to enable Out of Core (OOC) technology.")
LOGGER("--approximation_mode=value : Used to enable Approximation mode.")
LOGGER("--accuracy=value : Used to set the accuracy when using TLR.")
LOGGER("\t=== DATA GENERATION ARGUMENTS ===")
LOGGER("--data_path=PATH : Used to enter the path to the real data file.")
LOGGER("--is_synthetic : Use synthetic data generation.")
LOGGER("--resultspath=PATH : Used to set the output directory path for results.")
LOGGER("\t=== DATA MODELING ARGUMENTS ===")
LOGGER("--distance_metric=value : Used distance metric (eg=Euclidean, gcd=Great Circle Distance).")
LOGGER("--max_mle_iterations=value : Maximum number of MLE iterations.")
LOGGER("--tolerance=value : MLE tolerance between two iterations.")
LOGGER("--recovery_file=PATH : Path to recovery file.")
LOGGER("\t=== DATA PREDICTION ARGUMENTS ===")
LOGGER("--Zmiss=value : Used to set number of unknown observations to be predicted.")
LOGGER("--observation_number=value : Used to set the number of observations.")
LOGGER("--mspe : Used to enable Mean Square Prediction Error.")
LOGGER("--fisher : Used to enable Fisher tile prediction function.")
LOGGER("--idw : Used to enable IDW prediction auxiliary function.")
LOGGER("--mloe-mmom : Used to enable MLOE MMOM.")
LOGGER("\t=== PARSEC RUNTIME SPECIFIC ARGUMENTS ===")
LOGGER("--band_dense=value : Used to set the dense band double precision (PaRSEC only).")
LOGGER("--band_dense_dp=value : Used to set dense band double precision (PaRSEC only).")
LOGGER("--band_dense_hp=value : Used to set dense band high precision (PaRSEC only).")
LOGGER("--objects_number=value : Used to set the number of objects (PaRSEC only).")
LOGGER("--adaptive_decision=value : Used to set adaptive decision for tile format (PaRSEC only).")
LOGGER("--add_diagonal=value : Add value to diagonal elements (PaRSEC only).")
LOGGER("--file_time_slot=value : Used to set time slot per file (PaRSEC only).")
LOGGER("--file_number=value : Used to set file number (PaRSEC only).")
LOGGER("--enable-inverse : Enable inverse spherical harmonics transform (PaRSEC only).")
LOGGER("--mpiio : Enable MPI IO (PaRSEC only).")
LOGGER("--log-file-path=PATH : Path to file where events and results are logged (PaRSEC only).")
LOGGER("\t=== MEAN TREND REMOVAL / CLIMATE EMULATOR ARGUMENTS ===")
LOGGER("--mean_trend_removal : Enable Mean Trend Removal.")
LOGGER("--is_climate_emulator : Enable Climate Emulator mode.")
LOGGER("--forcing_data_path=PATH : Path to forcing data file.")
LOGGER("--netcdf_data_path=PATH : Path to NetCDF data file.")
LOGGER("--start-year=value : Starting year for NetCDF data processing.")
LOGGER("--end-year=value : Ending year for NetCDF data processing.")
LOGGER("--lat=value : Latitude band index for climate data processing (required for MeanTrendRemoval).")
LOGGER("--lon=value : Longitude count for climate data processing (required for MeanTrendRemoval).")
LOGGER("\n")

exit(0);
}
Expand Down
7 changes: 7 additions & 0 deletions src/configurations/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <nlohmann/json.hpp>

#include <configurations/Parser.hpp>
#include <configurations/Configurations.hpp>

using namespace std;
using namespace exageostat::configurations::parser;
Expand All @@ -32,6 +33,12 @@ void Parser::ParseCLI(const int &aArgC, char **apArgV, unordered_map<string, any

for (int i = 1; i < aArgC; ++i) {
argument = apArgV[i];

// Check for help flag before processing
if (argument == "--help" || argument == "-h") {
exageostat::configurations::Configurations::PrintUsage();
}

argument = argument.substr(2);
equal_sign_Idx = static_cast<int>(argument.find('='));
argument_name = argument.substr(0, equal_sign_Idx);
Expand Down