diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 00000000..91585aba --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,26 @@ +stages: + - compile + +variables: + GIT_STRATEGY: clone + GIT_SUBMODULE_STRATEGY: normal + ARTIFACTS_NAME: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG" + ARTIFACTS_PATH: bin.tar + +.compile-job: &compile-job + stage: compile + artifacts: + when: on_success + name: $ARTIFACTS_NAME + expire_in: 1h + paths: + - bin.tar + script: + - echo -e "Script Arguments:\t $CONFIG_ARGS" + +compile:cpp: + <<: *compile-job + image: acai-oneapi + tags: + - acai-cpu + diff --git a/CMakeLists.txt b/CMakeLists.txt index 18ce5b92..830ca84c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,8 +18,8 @@ cmake_minimum_required(VERSION 3.20 FATAL_ERROR) cmake_policy(SET CMP0048 NEW) # Set project options -option(USE_CUDA "Use Cuda, if available" false) -option(USE_MPI "Use MPI, if available" false) +option(USE_CUDA "Use CUDA, if available" OFF) +option(USE_MPI "Use MPI, if available" OFF) option(BUILD_TESTS "Option to enable building tests" OFF) option(BUILD_HEAVY_TESTS "Option to enable building heavy tests, This may take a lot of time" OFF) option(BUILD_EXAMPLES "Option to enable building examples" ON) @@ -27,15 +27,32 @@ option(BUILD_DOCS "Build documentation in docs directory" ON) option(USE_R "Enable the use of R and Rcpp in the project" OFF) option(CREATE_PACKAGE "Enable a packaging system for distribution" OFF) +# Declare the RUNTIME_TYPE variable with a default value +set(RUNTIME_TYPE "starpu" CACHE STRING "Specify the runtime type (e.g., starpu or parsec)") # Cmake Module Paths set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake;${CMAKE_MODULE_PATH}") - if (${BUILD_SHARED_LIBS}) set(BLA_STATIC OFF) else () set(BLA_STATIC ON) endif () +# Project Name and Version +project(ExaGeoStatCPP VERSION 2.0.0 DESCRIPTION "ExaGeoStatCPP is a parallel high performance unified framework for geostatistics on manycore systems.") +set(CMAKE_CXX_EXTENSIONS OFF) +string(TOUPPER ${RUNTIME_TYPE} RUNTIME_TYPE) + +# Default path for default configuration json file +add_definitions( + -DDEFAULT_CONFIGURATION_PATH="${PROJECT_SOURCE_DIR}/configurations/config.json" +) + +if(RUNTIME_TYPE STREQUAL "PARSEC") + message(STATUS "MPI is required to be enabled in order to use PaRSEC") + set(USE_MPI ON) + set(USE_HICMA OFF) +endif() + # Select toolchain based on whether CUDA is enabled or not if (USE_CUDA) message("") @@ -53,9 +70,6 @@ else () include(toolchains/GccToolchain) endif () -# Project Name and Version -project(ExaGeoStatCPP VERSION 1.0.0 DESCRIPTION "ExaGeoStatCPP is a parallel high performance unified framework for geostatistics on manycore systems.") - # Show the current version of CMake. message(STATUS "CMAKE VERSION: ${CMAKE_VERSION}") # Enable C++ language @@ -98,6 +112,11 @@ list(APPEND LIBS ${LAPACKE_LIBRARIES}) link_directories(${LAPACKE_LIBRARY_DIRS_DEP}) include_directories(${LAPACKE_INCLUDE_DIRS}) +if (${BLA_VENDOR} MATCHES "Intel10_64lp" OR ${BLA_VENDOR} MATCHES "Intel" OR ${BLA_VENDOR} MATCHES "Intel10_64lp_seq") + message(STATUS "Linked to MKL") + add_compile_definitions(USE_MKL) +endif() + # Add all dependencies for ExaGeoStatCPP #----------------------------- @@ -110,30 +129,48 @@ include(ImportHwloc) list(APPEND STARPU_COMPONENT_LIST "HWLOC") string(REPLACE ";" " " STARPU_COMPONENT_STRING "${STARPU_COMPONENT_LIST}") -# ExaGeoStatCPP depends on StarPU runtime +# ExaGeoStatCPP depends on NLOHMANN # ------------------------------- -include(ImportStarPu) - -# ExaGeoStatCPP depends on GSL -# ------------------------------- -include(ImportGSL) +include(ImportNlohmannJSON) # ExaGeoStatCPP depends on NLOPT # ------------------------------- include(ImportNLOPT) -# ExaGeoStatCPP depends on HiCMA +# ExaGeoStatCPP depends on GSL # ------------------------------- -if (USE_HICMA) - add_definitions(-DUSE_HICMA=TRUE) - include(ImportHCore) - include(ImportStarsH) - include(ImportHiCMA) -endif () +include(ImportGSL) -# ExaGeoStatCPP depends on Chameleon -# ------------------------------- -include(ImportChameleon) +message("---------------------------------------- ${RUNTIME_TYPE}") +if(RUNTIME_TYPE STREQUAL "STARPU") + message(STATUS "Using StarPU as the runtime") + # ExaGeoStatCPP depends on StarPU runtime + # ------------------------------- + include(ImportStarPu) + include(ImportNetCDF) + # ExaGeoStatCPP depends on HiCMA + # ------------------------------- + if (USE_HICMA) + add_definitions(-DUSE_HICMA=TRUE) + include(ImportHCore) + include(ImportStarsH) + include(ImportHiCMA) + endif () + # ExaGeoStatCPP depends on Chameleon + # ------------------------------- + include(ImportChameleon) + +elseif(RUNTIME_TYPE STREQUAL "PARSEC") + message(STATUS "Using PaRSEC as the runtime") + include(ImportNetCDF) + # ExaGeoStatCPP depends on StarPU runtime + # ------------------------------- + include(ImportStarsH) + include(ImportHCore) + include(ImportHiCMAX) +else() + message(FATAL_ERROR "Unknown RUNTIME_TYPE: ${RUNTIME_TYPE}. Supported values are 'STARPU' or 'PARSEC'.") +endif() # ExaGeoStatCPP depends on LAPACK/BLASPP # ------------------------------- @@ -164,14 +201,20 @@ if (USE_R) if (${R_FOUND}) message(STATUS "Using R technology") list(APPEND LIBS R) - add_definitions(-DUSING_R) + add_definitions(-DUSE_R) endif () endif () - # Add src Directory to expose added libraries add_subdirectory(src) +# Define USE_STARPU or USE_RUNTIME based on the selected runtime +if(RUNTIME_TYPE STREQUAL "STARPU") + target_compile_definitions(${PROJECT_NAME} PUBLIC DEFAULT_RUNTIME=1) +elseif(RUNTIME_TYPE STREQUAL "PARSEC") + target_compile_definitions(${PROJECT_NAME} PUBLIC DEFAULT_RUNTIME=0) +endif() + # Creates a new INTERFACE library target named ${PROJECT_NAME}_INTERFACE. # The INTERFACE keyword specifies that this library will not be built, but instead will only be used for its properties. add_library(${PROJECT_NAME}_INTERFACE INTERFACE) @@ -213,15 +256,22 @@ endif () message(" \n \t ** Configurations of ExaGeoStatCPP and installation of dependence is done successfully ** ") message("\t - Export the following line to avoid re-install dependencies each time. -") message("\t ----------------------------------------------------------------------------------------------------------------------------------- ") -message("\t export PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/CHAMELEON/lib/pkgconfig:$PKG_CONFIG_PATH") -message("\t export PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/STARPU/lib/pkgconfig:$PKG_CONFIG_PATH") +message("\t export PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/NLOHMANN_JSON/share/pkgconfig:$PKG_CONFIG_PATH") message("\t export PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/HWLOC/lib/pkgconfig:$PKG_CONFIG_PATH") -message("\t export PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/GSL/lib/pkgconfig:$PKG_CONFIG_PATH") message("\t export PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/NLOPT/lib/pkgconfig:${CMAKE_INSTALL_PREFIX}/NLOPT/lib64/pkgconfig:$PKG_CONFIG_PATH") -if(USE_HICMA) +message("\t export PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/GSL/lib/pkgconfig:$PKG_CONFIG_PATH") +if(RUNTIME_TYPE STREQUAL "STARPU") + message("\t export PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/STARPU/lib/pkgconfig:$PKG_CONFIG_PATH") + message("\t export PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/CHAMELEON/lib/pkgconfig:$PKG_CONFIG_PATH") + if(USE_HICMA) + message("\t export PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/STARSH/lib/pkgconfig:$PKG_CONFIG_PATH") + message("\t export PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/HCORE/lib/pkgconfig:$PKG_CONFIG_PATH") + message("\t export PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/HICMA/lib/pkgconfig:$PKG_CONFIG_PATH") + endif() +elseif(RUNTIME_TYPE STREQUAL "PARSEC") message("\t export PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/STARSH/lib/pkgconfig:$PKG_CONFIG_PATH") message("\t export PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/HCORE/lib/pkgconfig:$PKG_CONFIG_PATH") - message("\t export PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/HICMA/lib/pkgconfig:$PKG_CONFIG_PATH") + message("\t export PKG_CONFIG_PATH=${CMAKE_INSTALL_PREFIX}/HICMA-X/lib/pkgconfig:${CMAKE_INSTALL_PREFIX}/HICMA-X/lib64/pkgconfig:${CMAKE_INSTALL_PREFIX}/HICMA-X/lib64:$PKG_CONFIG_PATH") endif() message("\t ----------------------------------------------------------------------------------------------------------------------------------- \n") diff --git a/README.md b/README.md index 6f6e83f1..a58e6281 100644 --- a/README.md +++ b/README.md @@ -55,35 +55,44 @@ statisticians with modest computational resources. ## Installation -> Note: Installation requires at least **CMake of version 3.2**. to build ExaGeoStatCPP. +### Requirements +To build and run this software, you will need: + +1. [CMake](https://cmake.org/download/) (version 3.2 or higher) +2. [wget](https://www.gnu.org/software/wget/) +3. **gcc** and **g++** compilers +4. **autoconf** and **automake** +5. [libtool](https://www.gnu.org/software/libtool/) +6. [R](https://cran.r-project.org/bin/windows/base/) (only if you plan on using the R functionality) ### C++ source code installation -To install the `ExaGeoStat` project locally, run the following commands in your terminal: +To install the `ExaGeoStatCPP` project locally (C++ version), run the following commands in your terminal: -1. Clone the project from the remote gitHub repository into your local machine using the following command +1. Clone the project repository to your local machine: ```bash git clone https://github.com/ecrc/ExaGeoStatCPP.git ``` -2. Change your current directory by getting into the `ExaGeoStatCPP` project directory +2. Navigate to the cloned directory: ```bash cd ExaGeoStatCPP ``` -3. Run `configure` script with the flag `-h` for help, to know the supported options and their corresponding flags. +3. Run `configure` script (use the `-h` flag for help, to know the supported options and their corresponding flags). This step is **not required** when using R. ```bash - ./configure -h + ./configure -e ``` -4. Run `clean_build.sh` script with the flag `-h` for help, to know the needed arguments to run with your specific options. +4. Run `clean_build.sh` (use the `-h` flag for help, to know the needed arguments to run with your specific options). This step is **not required** when using R. ```bash - ./clean_build.sh -h + ./clean_build.sh ``` 5. Export the installation paths of the dependencies to your `.bashrc` file, e.g. ```bash export PKG_CONFIG_PATH=$PWD/installdir/_deps/DEPENDENCY_NAME/lib/pkgconfig:$PKG_CONFIG_PATH ``` + or copy/paste the output pkg-config paths from the configure step Now, you can use the pkg-config executable to collect compiler and linker flags for ExaGeoStatCPP. @@ -91,11 +100,11 @@ ExaGeoStatCPP. ### R package installation 1. Open the R prompt window by simply running `R` command in the terminal, inside the prompt, we will install needed packages by running the following commands: ```R - install.packages(Rcpp) - install.packages("assert") + install.packages("Rcpp") + install.packages("assertthat") ``` -2. close the R prompt and return to the terminal. Run the following command, make sure your current path is the ExaGeoStat project directory +2. close the R prompt and return to the terminal. Run the following command, make sure your current path is the ExaGeoStatCPP project directory ```commandline R CMD INSTALL . --configure-args="-r" @@ -103,6 +112,44 @@ ExaGeoStatCPP. > For more detailed information on installing ExaGeoStat with different configurations and enabling technologies such as CUDA, MPI, R, etc., please refer to the [User Manual](USER_MANUAL.md) +## Common Installation Errors and Solutions + +### 1. Missing CMake +The installation requires **CMake** version 3.2 or higher. Ensure it is installed on your system before proceeding with the installation of **ExaGeoStatCPP**. + +To install CMake, use: +```sh +sudo apt install cmake +``` + +### 2. Missing Libtool +If you encounter the following error during installation: +``` +./autogen.sh: line 17: libtool: command not found +./autogen.sh: line 20: glibtool: command not found +``` +This indicates that **Libtool** is missing. You can install it using: +```sh +sudo apt install libtool libtool-bin +``` + +Alternatively, you can install **Libtool** locally: +```sh +wget http://ftpmirror.gnu.org/libtool/libtool-2.4.7.tar.gz +tar -xvzf libtool-2.4.7.tar.gz +cd libtool-2.4.7 +./configure --prefix=$HOME/local +make +make install +``` +Then, update your environment variables: +```sh +export PATH=$HOME/local/bin:$PATH +export LD_LIBRARY_PATH=$HOME/local/lib:$LD_LIBRARY_PATH +export PKG_CONFIG_PATH=$HOME/local/lib/pkgconfig:$PKG_CONFIG_PATH +``` +After this, restart your terminal or run `source ~/.bashrc` to apply the changes. + ## Usage #### C++ Example @@ -127,14 +174,94 @@ int main(int argc, char **argv) { return 0; } ``` -### R Example: -```R +## R Example +Here is an example demonstrating how to use **ExaGeoStatCPP** in R: + +```r +# Load the ExaGeoStatCPP library +library(ExaGeoStatCPP) + +# Set parameters for the simulation +ncores <- 30 +ngpus <- 0 +problem_size <- 1600 +dts <- 320 +lts <- 0 +computation <- "exact" +dimension <- "2D" +kernel <- "univariate_matern_stationary" +initial_theta <- c(1,0.1,0.5) +lower_bound <- c(0.1,0.1,0.1) +upper_bound <- c(5,5,5) +p <- 1 +q <- 1 +opt_itrs <- 100 + +# Initialize hardware configuration hardware <- new(Hardware, computation, ncores, ngpus, p, q) -exageostat_data <- simulate_data(kernel=kernel, initial_theta=initial_theta, problem_size=problem_size, dts=dts, dimension=dimension) -estimated_theta <- model_data(data=exageostat_data, kernel=kernel, dts=dts, dimension=dimension,lb=lower_bound, ub=upper_bound, mle_itr=10) -predict_data(train_data=list(x, y, z_measurement), test_data=list(test_x, test_y), kernel=kernel, dts=dts, estimated_theta=estimated_theta) + +# Simulate spatial data based on the specified kernel and parameters +exageostat_data <- simulate_data( + kernel = kernel, + initial_theta = initial_theta, + problem_size = problem_size, + dts = dts, + dimension = dimension +) + +# Estimate model parameters using MLE +estimated_theta <- model_data( + matrix=exageostat_data$m, + x=exageostat_data$x, + y=exageostat_data$y, + kernel=kernel, dts=dts, + dimension=dimension, + lb=lower_bound, + ub=upper_bound, + mle_itr=opt_itrs) + +# Perform spatial prediction using the estimated parameters +test_x <- c(0.2, 0.330) +test_y <- c(0.104, 0.14) +predict_data( + train_data=list(x=exageostat_data$x, y=exageostat_data$y, exageostat_data$m), + test_data=list(test_x, test_y), + kernel=kernel, + dts=dts, + estimated_theta=estimated_theta) + +``` + +This example walks through initializing hardware, simulating spatial data, estimating model parameters, and making predictions using **ExaGeoStatCPP** in R. + +### Stage Zero Example +Stage Zero is a preprocessing step for climate data that removes mean trends from time series data. Here's an example command to run the Stage Zero data generation: + +```bash +./bin/examples/stage-zero/Example_Stage_Zero \ + --kernel=trend_model \ + --data-path=/path/to/ERA_data/ \ + --forcing-data-path=/path/to/forcing_new.csv \ + --lts=200 \ + --lb=0.001 \ + --ub=0.95 \ + --starting-theta=0.9 \ + --stage-zero \ + --cores=50 \ + --gpus=0 \ + --dts=200 \ + --resultspath=/path/to/output/ \ + --startyear=2000 \ + --endyear=2002 \ + --numlocs=10 \ + --max-mle-iterations=30 \ + --tolerance=7 ``` +The output includes: +- `z_*.csv` files: Normalized residuals for each time slot +- `params.csv`: Optimized parameters for each location + > Please take a look at the end-to-end examples as a reference for using all the operations. ## Contributing @@ -188,4 +315,4 @@ Find detailed information on how to contribute to ExaGeoStatCPP [here](CONTRIBUT [BSD 3-Clause](LICENSE) ## Handout -![ExaGeoStatCPP-handout.png](docs/ExaGeoStatCPP-handout.png) \ No newline at end of file +![ExaGeoStatCPP-handout.png](docs/ExaGeoStatCPP-handout.png) diff --git a/USER_MANUAL.md b/USER_MANUAL.md index 6b95b532..fb79d39f 100644 --- a/USER_MANUAL.md +++ b/USER_MANUAL.md @@ -80,6 +80,7 @@ * To enable packaging system for distribution, add `-p` disabled by default. * To enable showing code warnings, add `-w` disabled by default. * To manually set mkl as blas vendor, add `--use-mkl`. MKL is required as blas vendor and it's automatically detected but in some environments it need to be manually set. +* To enable PaRSEC as a runtime system, add `--use=parsec`, StarPU by default. ## Building @@ -98,7 +99,10 @@ ## Arguments -These are the arguments that you can specify when running any C++ example. +These are the arguments that you can specify when running any C++ example. +**Please note that the arguments are not case-sensitive**, and you can use variations such as `-`, `_`, or capitalized forms for the same argument name. +For example, `MaxRank`, `max-rank`, `max_rank`, `Max_Rank` ,`Maxrank` , `MaxrAnk` and `Max-Rank` are all considered equivalent. + * {Mandatory} To set the problem size (N) --N= @@ -152,13 +156,13 @@ These are the arguments that you can specify when running any C++ example. --oub= * {Optional} To set the initial theta - --itheta= + --initial_theta= * {Optional} To set the target theta --ttheta= * {Optional} To set the estimated theta - --etheta= + --estimated_theta= * {Optional} To set the seed value, the default is 0 --seed= @@ -223,10 +227,12 @@ These are the arguments that you can specify when running any C++ example. ### Provide Arguments To use any operations, you must initially supply the necessary arguments -to the operation via the Configurations module. There are two methods available +to the operation via the Configurations module. +This program is configured by default through a json file of default values. You can either change the configuration arguments +in the json file or provide them through command line. There are two methods available for setting your arguments: -1. Provide your arguments with the command line. +1. Provide your arguments with the command line, this overwrites the json configuration. ```c++ // Create a new configuration object. Configurations configurations; diff --git a/cmake/FindHICMA-X.cmake b/cmake/FindHICMA-X.cmake new file mode 100644 index 00000000..bb2a045e --- /dev/null +++ b/cmake/FindHICMA-X.cmake @@ -0,0 +1,109 @@ + +# Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file FindHICMA-X.cmake +# @brief This is a CMakeLists file for finding HiCMA-X and link and include it's headers +# @version 2.0.0 +# @author Mahmoud ElKarargy +# @date 2024-09-28 + +# Include pkg-config +find_package(PkgConfig QUIET) + +# Try to find dplasma and parsec via pkg-config +if(PKG_CONFIG_FOUND) + pkg_check_modules(DPLASMA_PKG dplasma) + pkg_check_modules(PARSEC_PKG parsec) + if(DPLASMA_PKG_FOUND AND PARSEC_PKG_FOUND) + # Try to find the HICMA-X or hicma-x path in the library directories + string(FIND "${PARSEC_PKG_LIBRARY_DIRS}" "HICMA-X" HICMA_X_START) + if(HICMA_X_START EQUAL -1) + string(FIND "${PARSEC_PKG_LIBRARY_DIRS}" "hicma-x" HICMA_X_START) + endif() + if(HICMA_X_START GREATER -1) + # Extract the full path to HICMA-X or hicma-x and set the include directory + string(REGEX MATCH "([^;]*(HICMA-X|hicma-x)[^;]*/lib)" HICMA_X_LIB_PATH "${PARSEC_PKG_LIBRARY_DIRS}") + get_filename_component(HICMA_X_ROOT "${HICMA_X_LIB_PATH}" DIRECTORY) # Go one level up + set(HICMA-X_INCLUDE_DIRS "${HICMA_X_ROOT}/include") # Set the include path + endif() + # TODO: This is not generalized for the case of hicma installed manually + set(HICMA_X_SRC_DIR ${HICMA_X_ROOT}/hicma-x-src) + set(HICMA_X_BIN_DIR ${HICMA_X_ROOT}/bin) + set(HICMA-X_FOUND TRUE) + set(HICMA-X_LIBRARIES ${DPLASMA_PKG_LIBRARIES} ${PARSEC_PKG_LIBRARIES}) + set(HICMA-X_LIBRARY_DIRS "${HICMA_X_LIB_PATH}") + # Add a search for lib64 directories and set HICMA-X_LIBRARY_DIRS_DEP + set(HICMA-X_LIBRARY_DIRS_DEP "${HICMA_X_LIB_PATH}64") + + find_library(HICMA_PARSEC_LIB hicma_parsec PATHS ${HICMA-X_LIBRARY_DIRS_DEP}) + + if(HICMA_PARSEC_LIB) + list(APPEND HICMA-X_LIBRARIES ${HICMA_PARSEC_LIB}) + else() + message(FATAL_ERROR "libhicma_parsec.so not found") + endif() + + endif() +endif() + +# Fallback: Manual search if pkg-config fails or HICMA-X path isn't set +if(NOT HICMA-X_FOUND) + # Improved search to handle multiple possible paths and fallback for include directories + find_path(HICMA-X_INCLUDE_DIR + NAMES hicma.h + PATHS + ${CMAKE_CURRENT_LIST_DIR}/../hicma-x/include + /usr/local/include/hicma-x + /usr/local/include + /usr/include/hicma-x + /usr/include + DOC "Path to HICMA-X include directory" + ) + + # Search for the main HICMA-X library + find_library(HICMA-X_LIBRARY + NAMES hicma-x + PATHS + ${CMAKE_CURRENT_LIST_DIR}/../hicma-x/lib + /usr/local/lib + /usr/lib + DOC "Path to HICMA-X library" + ) + + # Search for the hicma_parsec library in the lib64 directory if it's not found in the standard lib + find_library(HICMA_PARSEC_LIB + NAMES hicma_parsec + PATHS + ${CMAKE_CURRENT_LIST_DIR}/../hicma-x/lib64 + /usr/local/lib64 + /usr/lib64 + DOC "Path to HICMA-Parsec library" + ) + + # Check if both the include directory and libraries were found + if(HICMA-X_INCLUDE_DIR AND HICMA-X_LIBRARY AND HICMA_PARSEC_LIB) + set(HICMA-X_FOUND TRUE) + # Combine the found libraries + set(HICMA-X_LIBRARIES ${HICMA-X_LIBRARY} ${HICMA_PARSEC_LIB}) + # Set the include directory + set(HICMA-X_INCLUDE_DIRS "${HICMA-X_INCLUDE_DIR}") + # Include both lib and lib64 directories + # TODO: This paths are not generalized, if the install is not with the same dir. + set(HICMA-X_LIBRARY_DIRS "${HICMA-X_LIBRARY}/lib") + set(HICMA-X_LIBRARY_DIRS_DEP "${HICMA-X_LIBRARY}/lib64") + else() + set(HICMA-X_FOUND FALSE) + endif() +endif() + +# Mark the variables as advanced to keep the CMake GUI clean +mark_as_advanced(HICMA-X_INCLUDE_DIR HICMA-X_LIBRARY HICMA_PARSEC_LIB) + +# Provide feedback on whether the library was found +if(HICMA-X_FOUND) + message(STATUS "Found HICMA-X") +else() + message("Could not find HICMA-X or its dependencies (dplasma, parsec)") +endif() diff --git a/cmake/FindHWLOC.cmake b/cmake/FindHwloc.cmake similarity index 96% rename from cmake/FindHWLOC.cmake rename to cmake/FindHwloc.cmake index 2613be08..5b333b4b 100644 --- a/cmake/FindHWLOC.cmake +++ b/cmake/FindHwloc.cmake @@ -13,6 +13,7 @@ # Hwloc_FOUND - True if hwloc was found # Hwloc_INCLUDE_DIRS - include directories for hwloc # Hwloc_LIBRARIES - link against these libraries to use hwloc +# Hwloc_LIBRARY_DIRS - directories where hwloc libraries are found # Hwloc_VERSION - version # Hwloc_CFLAGS - include directories as compiler flags # Hwloc_LDLFAGS - link paths and libs as compiler flags @@ -171,6 +172,9 @@ else() list(GET Hwloc_VERSION_PARSED 1 Hwloc_VERSION_MINOR) set(Hwloc_VERSION_MINOR "${Hwloc_VERSION_MINOR}" CACHE STRING "Minor version of Hwloc") + # Capture the library directories + set(Hwloc_LIBRARY_DIRS ${Hwloc_LIBRARY_DIRS} CACHE STRING "Directories where hwloc libraries are found") + include(FindPackageHandleStandardArgs) find_package_handle_standard_args(Hwloc DEFAULT_MSG Hwloc_LIBRARIES) @@ -184,5 +188,4 @@ else() endif() endif() - -endif() +endif() \ No newline at end of file diff --git a/cmake/FindNetCDF.cmake b/cmake/FindNetCDF.cmake new file mode 100644 index 00000000..70522ff1 --- /dev/null +++ b/cmake/FindNetCDF.cmake @@ -0,0 +1,133 @@ +#[==[ +Provides the following variables: + + * `NetCDF_FOUND`: Whether NetCDF was found or not. + * `NetCDF_INCLUDE_DIRS`: Include directories necessary to use NetCDF. + * `NetCDF_LIBRARIES`: Libraries necessary to use NetCDF. + * `NetCDF_VERSION`: The version of NetCDF found. + * `NetCDF::NetCDF`: A target to use with `target_link_libraries`. + * `NetCDF_HAS_PARALLEL`: Whether or not NetCDF was found with parallel IO support. +#]==] + +function(FindNetCDF_get_is_parallel_aware include_dir) + file(STRINGS "${include_dir}/netcdf_meta.h" _netcdf_lines + REGEX "#define[ \t]+NC_HAS_PARALLEL[ \t]") + string(REGEX REPLACE ".*NC_HAS_PARALLEL[ \t]*([0-1]+).*" "\\1" _netcdf_has_parallel "${_netcdf_lines}") + if (_netcdf_has_parallel) + set(NetCDF_HAS_PARALLEL TRUE PARENT_SCOPE) + else() + set(NetCDF_HAS_PARALLEL FALSE PARENT_SCOPE) + endif() +endfunction() + +find_package(PkgConfig QUIET) +if (PkgConfig_FOUND) + pkg_check_modules(_NetCDF QUIET netcdf IMPORTED_TARGET) + if (_NetCDF_FOUND) + # Forward the variables in a consistent way. + set(NetCDF_FOUND "${_NetCDF_FOUND}") + set(NetCDF_INCLUDE_DIRS "${_NetCDF_INCLUDE_DIRS}") + set(NetCDF_LIBRARIES "${_NetCDF_LIBRARIES}") + set(NetCDF_VERSION "${_NetCDF_VERSION}") + set(NetCDF_LIBRARY_DIRS "${_NetCDF_LIBRARY_DIRS}") + + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(NetCDF + REQUIRED_VARS NetCDF_LIBRARIES + # This is not required because system-default include paths are not + # reported by `FindPkgConfig`, so this might be empty. Assume that if we + # have a library, the include directories are fine (if any) since + # PkgConfig reported that the package was found. + # NetCDF_INCLUDE_DIRS + VERSION_VAR NetCDF_VERSION) + + if (NOT TARGET NetCDF::NetCDF) + add_library(NetCDF::NetCDF INTERFACE IMPORTED) + set_target_properties(NetCDF::NetCDF PROPERTIES + INTERFACE_LINK_LIBRARIES "PkgConfig::_NetCDF") + endif () + + FindNetCDF_get_is_parallel_aware("${_NetCDF_INCLUDEDIR}") + # Skip the rest of the logic in this file. + return () + endif () +endif () + +# Try to find a CMake-built NetCDF. +find_package(netCDF CONFIG QUIET) +if (netCDF_FOUND) + # Forward the variables in a consistent way. + set(NetCDF_FOUND "${netCDF_FOUND}") + set(NetCDF_INCLUDE_DIRS "${netCDF_INCLUDE_DIR}") + set(NetCDF_LIBRARIES "${netCDF_LIBRARIES}") + set(NetCDF_LIBRARY_DIRS "${netCDF_LIBRARY_DIRS}") + set(NetCDF_VERSION "${NetCDFVersion}") + + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(NetCDF + REQUIRED_VARS NetCDF_INCLUDE_DIRS NetCDF_LIBRARIES + VERSION_VAR NetCDF_VERSION) + + if (NOT TARGET NetCDF::NetCDF) + add_library(NetCDF::NetCDF INTERFACE IMPORTED) + if (TARGET "netCDF::netcdf") + # 4.7.3 + set_target_properties(NetCDF::NetCDF PROPERTIES + INTERFACE_LINK_LIBRARIES "netCDF::netcdf") + elseif (TARGET "netcdf") + set_target_properties(NetCDF::NetCDF PROPERTIES + INTERFACE_LINK_LIBRARIES "netcdf") + else () + set_target_properties(NetCDF::NetCDF PROPERTIES + INTERFACE_LINK_LIBRARIES "${netCDF_LIBRARIES}") + endif () + endif () + + FindNetCDF_get_is_parallel_aware("${NetCDF_INCLUDE_DIRS}") + # Skip the rest of the logic in this file. + return () +endif () + +find_path(NetCDF_INCLUDE_DIR + NAMES netcdf.h + DOC "netcdf include directories") +mark_as_advanced(NetCDF_INCLUDE_DIR) + +find_library(NetCDF_LIBRARY + NAMES netcdf + DOC "netcdf library") +mark_as_advanced(NetCDF_LIBRARY) + +if (NetCDF_INCLUDE_DIR) + file(STRINGS "${NetCDF_INCLUDE_DIR}/netcdf_meta.h" _netcdf_version_lines + REGEX "#define[ \t]+NC_VERSION_(MAJOR|MINOR|PATCH|NOTE)") + string(REGEX REPLACE ".*NC_VERSION_MAJOR *\([0-9]*\).*" "\\1" _netcdf_version_major "${_netcdf_version_lines}") + string(REGEX REPLACE ".*NC_VERSION_MINOR *\([0-9]*\).*" "\\1" _netcdf_version_minor "${_netcdf_version_lines}") + string(REGEX REPLACE ".*NC_VERSION_PATCH *\([0-9]*\).*" "\\1" _netcdf_version_patch "${_netcdf_version_lines}") + string(REGEX REPLACE ".*NC_VERSION_NOTE *\"\([^\"]*\)\".*" "\\1" _netcdf_version_note "${_netcdf_version_lines}") + set(NetCDF_VERSION "${_netcdf_version_major}.${_netcdf_version_minor}.${_netcdf_version_patch}${_netcdf_version_note}") + unset(_netcdf_version_major) + unset(_netcdf_version_minor) + unset(_netcdf_version_patch) + unset(_netcdf_version_note) + unset(_netcdf_version_lines) + + FindNetCDF_get_is_parallel_aware("${NetCDF_INCLUDE_DIR}") +endif () + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(NetCDF + REQUIRED_VARS NetCDF_LIBRARY NetCDF_INCLUDE_DIR + VERSION_VAR NetCDF_VERSION) + +if (NetCDF_FOUND) + set(NetCDF_INCLUDE_DIRS "${NetCDF_INCLUDE_DIR}") + set(NetCDF_LIBRARIES "${NetCDF_LIBRARY}") + + if (NOT TARGET NetCDF::NetCDF) + add_library(NetCDF::NetCDF UNKNOWN IMPORTED) + set_target_properties(NetCDF::NetCDF PROPERTIES + IMPORTED_LOCATION "${NetCDF_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${NetCDF_INCLUDE_DIR}") + endif () +endif () diff --git a/cmake/FindPnetCDF.cmake b/cmake/FindPnetCDF.cmake new file mode 100644 index 00000000..47de7476 --- /dev/null +++ b/cmake/FindPnetCDF.cmake @@ -0,0 +1,74 @@ + +# Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file FindPnetCDF.cmake +# @brief A CMake module to locate the Parallel NetCDF library using pkg-config if available. +# @version 2.0.0 +# @author Mahmoud ElKarargy +# @date 2024-11-23 + +# This module defines the following variables: +# - PnetCDF_FOUND : True if the library is found +# - PnetCDF_INCLUDE_DIRS : Path to the include directory +# - PnetCDF_LIBRARIES : The library to link against +# - PnetCDF_VERSION : Version of the library + +find_package(PkgConfig) + +if(PkgConfig_FOUND) + # Use pkg-config to locate PnetCDF + pkg_check_modules(PnetCDF QUIET pnetcdf) + if(PnetCDF_FOUND) + # Assign include directories and libraries from pkg-config + set(PnetCDF_INCLUDE_DIRS ${PnetCDF_INCLUDE_DIRS} ${PnetCDF_INCLUDEDIR}) + set(PnetCDF_LIBRARIES ${PnetCDF_LIBRARIES}) + set(PnetCDF_VERSION ${PnetCDF_VERSION}) + endif() +endif() + +# Fallback if pkg-config failed or did not provide include directories +if(NOT PnetCDF_INCLUDE_DIRS) + find_path(PnetCDF_INCLUDE_DIR + NAMES pnetcdf.h + HINTS ENV PNETCDF_DIR + PATH_SUFFIXES include + ) + set(PnetCDF_INCLUDE_DIRS ${PnetCDF_INCLUDE_DIR}) +endif() + +if(NOT PnetCDF_LIBRARIES) + find_library(PnetCDF_LIBRARY + NAMES pnetcdf + HINTS ENV PNETCDF_DIR + PATH_SUFFIXES lib + ) + set(PnetCDF_LIBRARIES ${PnetCDF_LIBRARY}) +endif() + +if(PnetCDF_INCLUDE_DIRS AND PnetCDF_LIBRARIES) + set(PnetCDF_FOUND TRUE) +else() + set(PnetCDF_FOUND FALSE) +endif() + +# Detect version from header file if not set +if(PnetCDF_FOUND AND NOT PnetCDF_VERSION) + file(READ "${PnetCDF_INCLUDE_DIRS}/pnetcdf.h" PNETCDF_HEADER_CONTENTS) + string(REGEX MATCH "#define PNETCDF_VERSION_MAJOR ([0-9]+)" _major_match "${PNETCDF_HEADER_CONTENTS}") + string(REGEX MATCH "#define PNETCDF_VERSION_MINOR ([0-9]+)" _minor_match "${PNETCDF_HEADER_CONTENTS}") + string(REGEX MATCH "#define PNETCDF_VERSION_PATCH ([0-9]+)" _patch_match "${PNETCDF_HEADER_CONTENTS}") + if(_major_match AND _minor_match AND _patch_match) + string(REGEX REPLACE ".* ([0-9]+).*" "\\1" PNETCDF_VERSION_MAJOR "${_major_match}") + string(REGEX REPLACE ".* ([0-9]+).*" "\\1" PNETCDF_VERSION_MINOR "${_minor_match}") + string(REGEX REPLACE ".* ([0-9]+).*" "\\1" PNETCDF_VERSION_PATCH "${_patch_match}") + set(PnetCDF_VERSION "${PNETCDF_VERSION_MAJOR}.${PNETCDF_VERSION_MINOR}.${PNETCDF_VERSION_PATCH}") + endif() +endif() + +# Print debug information +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(PnetCDF REQUIRED_VARS PnetCDF_INCLUDE_DIRS PnetCDF_LIBRARIES VERSION_VAR PnetCDF_VERSION) + +mark_as_advanced(PnetCDF_INCLUDE_DIRS PnetCDF_LIBRARIES) \ No newline at end of file diff --git a/cmake/ImportHDF5.cmake b/cmake/ImportHDF5.cmake new file mode 100644 index 00000000..37b8430e --- /dev/null +++ b/cmake/ImportHDF5.cmake @@ -0,0 +1,44 @@ + +# Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file ImportHDF5.cmake +# @brief Checks for the HDF5 library and includes it in the project if it is not already present. +# @version 2.0.0 +# @author Mahmoud ElKarargy +# @author Sameh Abdulah +# @date 2024-11-14 + +# Configuration settings for the integration of the NLOPT library +# 'name' is assigned to "NLOPT", serving as the identifier for this library within the script. +set(name "HDF5") +# 'tag' defines "hdf5-1_12_0" as the version tag of NLOPT, indicating the specific release to be utilized. +set(tag "hdf5-1_12_0") +# 'version' specifies "1.12.0" as the version of the NLOPT library, ensuring compatibility with the project's requirements. +set(version "0") +# 'flag' is intended for additional configuration options during the build process. A space is placed as a placeholder. +set(flag \--enable-parallel-tests) +# 'is_cmake' indicates that NLOPT uses CMake for its build system, which is set to ON. +set(is_cmake OFF) +# 'is_git' denotes that the NLOPT source code is hosted in a Git repository, which is set to ON. +set(is_git ON) +# 'auto_gen' signals whether autogen scripts are required for the build process, which is set to OFF for NLOPT. +set(auto_gen OFF) +# 'url' provides the location of the NLOPT source code repository on GitHub. +set(url "https://github.com/HDFGroup/hdf5") + +if (NOT DEFINED ${name}_ROOT OR ${name}_ROOT STREQUAL "") + set(${name}_ROOT "${CMAKE_INSTALL_PREFIX}/${name}") +endif() +# The 'ImportDependency' macro script, located in the 'macros' directory, is included for managing the import and setup of the NLOPT library. +include(macros/ImportDependency) +# The 'ImportDependency' macro is invoked with the above-defined parameters to handle the detection, fetching, and integration of NLOPT into the project. +ImportDependency(${name} ${tag} ${version} ${url} "${flag}" "" ${is_cmake} ${is_git} ${auto_gen}) + +set(ENV{LIBRARY_PATH} "${CMAKE_INSTALL_PREFIX}/${name}/lib:$ENV{LIBRARY_PATH}") +set(ENV{CPATH} "${CMAKE_INSTALL_PREFIX}/${name}/include:$ENV{CPATH}") + +message("${CMAKE_INSTALL_PREFIX}/${name}/lib") +# A status message is outputted to indicate the successful integration of the NLOPT library into the project. +message(STATUS "${name} done") diff --git a/cmake/ImportHiCMAX.cmake b/cmake/ImportHiCMAX.cmake new file mode 100644 index 00000000..6bdc2a79 --- /dev/null +++ b/cmake/ImportHiCMAX.cmake @@ -0,0 +1,42 @@ +# Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file ImportHiCMAX.cmake +# @brief Find and include HiCMA-X library as a dependency. +# @version 2.0.0 +# @author Mahmoud ElKarargy +# @author Sameh Abdulah +# @date 2024-09-21 + +# Configuration settings for integrating the HICMA-X library into the project +# 'name' sets the identifier for the HICMA-X library within this script to "HICMA-X". +set(name "HICMA-X") +# Set the version tag for HiCMA-X. +set(tag "FIX-package-installation-MK") +# Flags to configure the build for HiCMA-X, including precision settings for DPLASMA +# and disabling GPU support for both CUDA and HIP. +set(flags '-DDPLASMA_PRECISIONS="s;d"' \-DPARSEC_WITH_DEVEL_HEADERS=ON \-DCMAKE_Fortran_FLAGS="-Wno-main" + \-DPARSEC_GPU_WITH_HIP=OFF \-DPARSEC_GPU_WITH_CUDA=OFF \-DPARSEC_HAVE_CUDA=OFF \-DPARSEC_DIST_SHORT_LIMIT=0 + \-DPARSEC_DIST_COLLECTIVES=ON \-DPARSEC_HAVE_DEV_CUDA_SUPPORT=OFF \-DDPLASMA_HAVE_CUDA=OFF \-DBLA_VENDOR=${BLA_VENDOR}) +# Indicates that HiCMA-X uses CMake for its build system. +set(is_cmake ON) +# Indicates that HiCMA-X is hosted on a Git repository. +set(is_git ON) +# Indicates that autogen scripts are not required for HiCMA-X. +set(auto_gen OFF) +# Set the URL of the HiCMA-X GitHub repository. +set(url "https://github.com/SAbdulah/hicma-x-dev.git") +# Include the macro to import HiCMA-X as a dependency. +include(macros/ImportDependency) + +# Use the ImportDependency macro to handle fetching, detecting, and setting up HiCMA-X. +ImportDependency(${name} ${tag} "" ${url} "${flags}" "" ${is_cmake} ${is_git} ${auto_gen}) + +# Include necessary directories for HiCMA-X and its dependencies. +include_directories(${HICMA_X_SRC_DIR}) +include_directories(${HICMA_X_SRC_DIR}/dplasma/src) +include_directories(${HICMA_X_SRC_DIR}/hicma_parsec) +include_directories(${HICMA_X_SRC_DIR}/bin/dplasma/src) +# Display a status message indicating that HiCMA-X has been successfully included. +message(STATUS "HiCMA-X done") diff --git a/cmake/ImportHwloc.cmake b/cmake/ImportHwloc.cmake index 5f632725..9607943f 100644 --- a/cmake/ImportHwloc.cmake +++ b/cmake/ImportHwloc.cmake @@ -12,7 +12,7 @@ # Configuration settings for integrating the HWLOC library # 'name' sets the identifier for the HWLOC library within this script to "HWLOC". -set(name "HWLOC") +set(name "Hwloc") # 'tag' specifies "hwloc-2.10.0" as the version tag, identifying a specific release of HWLOC to be used. set(tag "hwloc-2.10.0") # 'version' defines "2.10.0" as the version of HWLOC, ensuring it meets project compatibility requirements. diff --git a/cmake/ImportNetCDF.cmake b/cmake/ImportNetCDF.cmake new file mode 100644 index 00000000..d794db7f --- /dev/null +++ b/cmake/ImportNetCDF.cmake @@ -0,0 +1,40 @@ + +# Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file ImportNetCDF.cmake +# @brief Checks for the NetCDF library and includes it in the project if it is not already present. +# @version 2.0.0 +# @author Mahmoud ElKarargy +# @author Sameh Abdulah +# @date 2024-11-14 + +include(ImportHDF5) +include(ImportPnetCDF) + +# Configuration settings for the integration of the NLOPT library +# 'name' is assigned to "NLOPT", serving as the identifier for this library within the script. +set(name "NetCDF") +# 'tag' defines "v2.7.1" as the version tag of NLOPT, indicating the specific release to be utilized. +set(tag "v4.7.4") +# 'version' specifies "2.7.1" as the version of the NLOPT library, ensuring compatibility with the project's requirements. +set(version "4.7.4") +# 'flag' is intended for additional configuration options during the build process. A space is placed as a placeholder. +set(flag \--enable-pnetcdf ) +# 'is_cmake' indicates that NLOPT uses CMake for its build system, which is set to ON. +set(is_cmake OFF) +# 'is_git' denotes that the NLOPT source code is hosted in a Git repository, which is set to ON. +set(is_git ON) +# 'auto_gen' signals whether autogen scripts are required for the build process, which is set to OFF for NLOPT. +set(auto_gen OFF) +# 'url' provides the location of the NLOPT source code repository on GitHub. +set(url "https://github.com/Unidata/netcdf-c") + +# The 'ImportDependency' macro script, located in the 'macros' directory, is included for managing the import and setup of the NLOPT library. +include(macros/ImportDependency) +# The 'ImportDependency' macro is invoked with the above-defined parameters to handle the detection, fetching, and integration of NLOPT into the project. +ImportDependency(${name} ${tag} ${version} ${url} "${flag}" "" ${is_cmake} ${is_git} ${auto_gen}) + +# A status message is outputted to indicate the successful integration of the NLOPT library into the project. +message(STATUS "${name} done") diff --git a/cmake/ImportNlohmannJSON.cmake b/cmake/ImportNlohmannJSON.cmake new file mode 100644 index 00000000..67227b9e --- /dev/null +++ b/cmake/ImportNlohmannJSON.cmake @@ -0,0 +1,39 @@ + +# Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file ImportNlohmannJSON.cmake +# @brief Checks for the nlohmann library and includes it in the project if it is not already present. +# @version 1.1.0 +# @author Mahmoud ElKarargy +# @date 2024-12-25 + +# Configurations for integrating the nlohmann +# 'name' is assigned "NlohmannJSON" to identify the nlohmann Library within this script. +set(name "nlohmann_json") +# 'tag' specifies the version tag "v3.11.2" for the nlohmann library, indicating the exact version to be used. +set(tag "v3.11.2") +# 'version' sets "3.11.2" as the version of the nlohmann library, ensuring compatibility with project requirements. +set(version "3.11.2") +# 'flag' is available for additional configuration options during build or installation, but remains empty here. +set(flag "") +# 'is_cmake' indicates whether nlohmann uses CMake for building. +set(is_cmake ON) +# 'is_git' denotes if nlohmann's source code is hosted in a Git repository. +set(is_git ON) +# 'auto_gen' signifies the need for autogen scripts in the build process. Here, it is set to OFF, indicating they are not needed. +set(auto_gen OFF) +# 'url' provides the download location for the nlohmann source code. +set(url "https://github.com/nlohmann/json.git") + +# Include the 'ImportDependency' macro, responsible for managing the GSL library's import and setup process. +include(macros/ImportDependency) +# Execute the 'ImportDependency' macro with the previously established parameters to handle the detection, downloading, and integration of nlohmann. +ImportDependency(${name} ${tag} ${version} ${url} "${flag}" "" ${is_cmake} ${is_git} ${auto_gen}) + +# Add nlohmann_json to the project's list of linked libraries, making its functionality accessible within the project. +list(APPEND LIBS nlohmann_json::nlohmann_json) + +# Output a message signaling the successful integration of the nlohmann library into the project. +message(STATUS "${name} done") diff --git a/cmake/ImportPnetCDF.cmake b/cmake/ImportPnetCDF.cmake new file mode 100644 index 00000000..1b588c31 --- /dev/null +++ b/cmake/ImportPnetCDF.cmake @@ -0,0 +1,45 @@ + +# Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file ImportPnetCDF.cmake +# @brief Checks for the PnetCDF library and includes it in the project if it is not already present. +# @version 2.0.0 +# @author Mahmoud ElKarargy +# @author Sameh Abdulah +# @date 2024-11-14 + +# Compute MPI installation root from the MPI C++ compiler path +if (DEFINED MPI_CXX_COMPILER) + get_filename_component(MPI_COMPILER_DIR "${MPI_CXX_COMPILER}" DIRECTORY) # .../bin + get_filename_component(MPI_ROOT "${MPI_COMPILER_DIR}" DIRECTORY) # parent of bin +else() + message(FATAL_ERROR "MPI_CXX_COMPILER not set; cannot locate MPI installation") +endif() + +# Configuration settings for the integration of the NLOPT library +# 'name' is assigned to "NLOPT", serving as the identifier for this library within the script. +set(name "PnetCDF") +# 'tag' defines "PnetCDF-1_12_0" as the version tag of NLOPT, indicating the specific release to be utilized. +set(tag "tag.v1.10.0") +# 'version' specifies "1.12.0" as the version of the NLOPT library, ensuring compatibility with the project's requirements. +set(version "1.10.0") +# 'flag' is intended for additional configuration options during the build process. A space is placed as a placeholder. +set(flag \--enable-shared \--with-mpi=${MPI_ROOT}) +# 'is_cmake' indicates that NLOPT uses CMake for its build system, which is set to ON. +set(is_cmake OFF) +# 'is_git' denotes that the NLOPT source code is hosted in a Git repository, which is set to ON. +set(is_git ON) +# 'auto_gen' signals whether autogen scripts are required for the build process, which is set to OFF for NLOPT. +set(auto_gen ON) +# 'url' provides the location of the NLOPT source code repository on GitHub. +set(url "https://github.com/Parallel-NetCDF/PnetCDF") + +# The 'ImportDependency' macro script, located in the 'macros' directory, is included for managing the import and setup of the NLOPT library. +include(macros/ImportDependency) +# The 'ImportDependency' macro is invoked with the above-defined parameters to handle the detection, fetching, and integration of NLOPT into the project. +ImportDependency(${name} ${tag} ${version} ${url} "${flag}" "" ${is_cmake} ${is_git} ${auto_gen}) + +# A status message is outputted to indicate the successful integration of the NLOPT library into the project. +message(STATUS "${name} done") diff --git a/cmake/ImportStarsH.cmake b/cmake/ImportStarsH.cmake index 0fd9cde5..da715722 100644 --- a/cmake/ImportStarsH.cmake +++ b/cmake/ImportStarsH.cmake @@ -5,33 +5,43 @@ # @file CMakeLists.txt # @brief Find and include STARSH library as a dependency. -# @version 1.1.0 +# @version 2.0.0 # @author Mahmoud ElKarargy # @author Sameh Abdulah -# @date 2023-03-13 +# @date 2024-09-28 # Configuration parameters for integrating the STARSH library # 'name' is set to "STARSH" to identify the STARSH library within this script. set(name "STARSH") -# 'tag' specifies "v0.3.1" as the version tag for STARSH, denoting the exact release to be used. -set(tag "v0.3.1") -# 'version' sets "0.3.1" as the version of the STARSH library, ensuring it aligns with project requirements. -set(version "0.3.1") + +# Check the value of RUNTIME_TYPE and configure STARSH accordingly +if(RUNTIME_TYPE STREQUAL "STARPU") + # Default values for STARPU runtime + set(STARSH_TAG "v0.3.1") + set(STARSH_VERSION "0.3.1") + set(STARSH_URL "https://github.com/ecrc/stars-h.git") + message(STATUS "RUNTIME_TYPE is STARPU. Using default STARSH configuration.") + +elseif(RUNTIME_TYPE STREQUAL "PARSEC") + # Custom values for PARSEC runtime + set(STARSH_TAG "sabdulah/non-gaussian-kernel") + set(STARSH_VERSION "0") + set(STARSH_URL "https://github.com/SAbdulah/stars-h.git") + message(STATUS "RUNTIME_TYPE is PARSEC. Using custom STARSH configuration for PARSEC.") +endif() + # 'flag' is used for additional build configuration options, specifically disabling StarPU and optionally enabling MPI. -set(flag \-DSTARPU=OFF \-DMPI=${USE_MPI}) +set(flag \-DSTARPU=OFF \-DMPI=${USE_MPI} \-DBLA_VENDOR=${BLA_VENDOR}) # 'is_cmake' indicates that STARSH uses CMake as its build system, set to ON. set(is_cmake ON) # 'is_git' denotes that the source code for STARSH is hosted on a Git repository, set to ON. set(is_git ON) # 'auto_gen' signals whether autogen scripts are needed for the build process; it is set to OFF for STARSH. set(auto_gen OFF) -# 'url' provides the GitHub repository URL for STARSH, specifying the source code's location. -set(url "https://github.com/ecrc/stars-h.git") - # The 'ImportDependency' macro, located in the 'macros' directory, is included to manage the import and setup of the STARSH library. include(macros/ImportDependency) # The 'ImportDependency' macro is called with the configuration parameters set above to manage the detection, fetching, and setup of STARSH. -ImportDependency(${name} ${tag} ${version} ${url} "${flag}" "" ${is_cmake} ${is_git} ${auto_gen}) +ImportDependency(${name} ${STARSH_TAG} ${STARSH_VERSION} ${STARSH_URL} "${flag}" "" ${is_cmake} ${is_git} ${auto_gen}) # A message is output to indicate the successful integration of the STARSH library into the project. message(STATUS "${name} done") diff --git a/cmake/macros/BuildDependency.cmake b/cmake/macros/BuildDependency.cmake index 50f5c125..56531c0a 100644 --- a/cmake/macros/BuildDependency.cmake +++ b/cmake/macros/BuildDependency.cmake @@ -73,11 +73,24 @@ macro(BuildDependency raw_name url tag flags is_using_cmake is_using_git auto_ge else () # For non-CMake projects, run autogen.sh if auto_generation is true, then configure the project with specified flags. if (${auto_generation}) - execute_process(COMMAND ./autogen.sh - WORKING_DIRECTORY ${${name}_srcpath} - COMMAND_ERROR_IS_FATAL ANY) # Halt on error + if (EXISTS "${${name}_srcpath}/autogen.sh") + message(STATUS "autogen.sh found. Running ./autogen.sh.") + execute_process( + COMMAND ./autogen.sh + WORKING_DIRECTORY ${${name}_srcpath} + COMMAND_ERROR_IS_FATAL ANY + ) + else () + message(STATUS "autogen.sh not found. Running autoreconf -i.") + execute_process( + COMMAND autoreconf -i + WORKING_DIRECTORY ${${name}_srcpath} + COMMAND_ERROR_IS_FATAL ANY + ) + endif () endif () - execute_process(COMMAND ./configure --prefix=${CMAKE_INSTALL_PREFIX}/${capital_name} ${flags} + execute_process( + COMMAND ./configure --prefix=${CMAKE_INSTALL_PREFIX}/${capital_name} ${flags} CFLAGS=-fPIC WORKING_DIRECTORY ${${name}_srcpath} COMMAND_ERROR_IS_FATAL ANY) # Halt on error endif () @@ -85,6 +98,11 @@ macro(BuildDependency raw_name url tag flags is_using_cmake is_using_git auto_ge # Include the ProcessorCount module to determine the number of CPUs for parallel build and install commands. include(ProcessorCount) ProcessorCount(N) + # Subtract 5 from N, ensuring it doesn't go below 0 + math(EXPR N "${N} - 5") + if (N LESS 0) + set(N 1) + endif() # Build the project using make, with parallel jobs based on processor count. This applies to both CMake and non-CMake projects. if (${is_using_cmake}) execute_process(COMMAND make -j ${N} diff --git a/configurations/config.json b/configurations/config.json new file mode 100644 index 00000000..aeff9838 --- /dev/null +++ b/configurations/config.json @@ -0,0 +1,52 @@ +{ + "N": "16", + "Kernel": "UnivariateMaternStationary", + "P": "1", + "Q": "1", + "Timeslot": "1", + "Computation": "exact", + "Precision": "double", + "Cores": "1", + "Gpus": "0", + "Dts": "8", + "Lts": "0", + "Band": "0", + "MaxRank": "0", + "ObservationsFile": "./configurations/observations.dat", + "Seed": "0", + "LogPath": "configurations", + "Ooc": "false", + "ApproximationMode": "1", + "Log": "true", + "IsNonGaussian": "false", + "Verbose": "standard", + "BandDense": "0", + "ObjectsNumber": "0", + "AdaptiveDecision": "0", + "AddDiagonal": "0", + "FileTimeSlot": "1", + "FileNumber": "0", + "EnableInverse": "false", + "Mpiio": "true", + "Dimension": "2D", + "IsSynthetic": "true", + "DataPath": "", + "RecoveryFile": "", + "FileLogPath": "logs.log", + "FileLogName": "logs.log", + "DistanceMetric": "euclidean", + "MaxMleIterations": "1", + "Accuracy": "0", + "Tolerance": "1", + "ZMiss": "1", + "Mspe": "false", + "Idw": "false", + "MloeMmom": "false", + "Fisher": "false", + "ObservationNumber": "0", + "NetCDFDataPath": "", + "ForcingDataPath": "", + "StartYear": "2000", + "EndYear": "2001", + "StageZero": "false" +} diff --git a/configure b/configure index 5e6a92f2..6f95f233 100755 --- a/configure +++ b/configure @@ -17,6 +17,7 @@ BLUE='\033[0;34m' NC='\033[0m' INSTALL_PREFIX=$PWD/installdir/_deps +BASE_DIR=$PWD # Function to install CMake from source install_cmake() { @@ -36,12 +37,11 @@ install_cmake() { cd cmake-3.28.1 || exit 1 # Configure, build, and install CMake to the specified location - ./bootstrap --prefix="$INSTALL_PREFIX" --parallel=2 -- -DCMAKE_USE_OPENSSL=OFF + ./bootstrap --prefix="$INSTALL_PREFIX/CMAKE" --parallel=2 -- -DCMAKE_USE_OPENSSL=OFF make -j 2 - sudo make install - + make install # Clean up - cd "$temp_dir" || exit 1 + cd "$BASE_DIR" || exit 1 rm -rf "$temp_dir" } @@ -59,7 +59,7 @@ fi BUILDING_TESTS="OFF" BUILDING_HEAVY_TESTS="OFF" BUILDING_EXAMPLES="OFF" -USING_HiCMA="OFF" +USE_HiCMA="OFF" VERBOSE="OFF" USE_CUDA="OFF" USE_MPI="OFF" @@ -112,7 +112,7 @@ while getopts ":tevhHi:cmpTwr" opt; do ;; H) ##### Using HiCMA ##### echo "${GREEN}Using HiCMA.${NC}" - USING_HiCMA="ON" + USE_HiCMA="ON" ;; c) ##### Using cuda enabled ##### echo "${GREEN}Cuda enabled ${NC}" @@ -179,7 +179,7 @@ if [ -z "$BUILDING_EXAMPLES" ]; then echo "${RED}Building examples disabled.${NC}" fi -if [ -z "$USING_HiCMA" ]; then +if [ -z "$USE_HiCMA" ]; then echo "${RED}Using HiCMA is disabled.${NC}" fi @@ -226,10 +226,8 @@ elif [ -x "/Applications/CMake.app/Contents/bin/cmake" ]; then cmake_command_bin="${cmake_install_dir}/cmake" else echo "Installing CMake from source" - mkdir "${ABSOLUTE_PATH}/inst/_deps/" - install_dir="${ABSOLUTE_PATH}/inst/_deps/" - install_cmake "$install_dir" - cmake_command_bin="${ABSOLUTE_PATH}/inst/_deps/bin/cmake" + install_cmake + cmake_command_bin="${INSTALL_PREFIX}/CMAKE/bin/cmake" fi "$cmake_command_bin" "$DEVELOPER_WARNINGS" -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ @@ -238,7 +236,7 @@ fi -DBUILD_TESTS="${BUILDING_TESTS}" \ -DBUILD_HEAVY_TESTS="${BUILDING_HEAVY_TESTS}" \ -DBUILD_EXAMPLES="${BUILDING_EXAMPLES}" \ - -DUSE_HICMA="${USING_HiCMA}" \ + -DUSE_HICMA="${USE_HiCMA}" \ -DCMAKE_VERBOSE_MAKEFILE:BOOL=${VERBOSE} \ -DUSE_CUDA="${USE_CUDA}" \ -DUSE_MPI="${USE_MPI}" \ diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 450c6d91..c55b728c 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -5,14 +5,21 @@ # @file CMakeLists.txt # @brief Includes subdirectories for different modules of the ExaGeoStat software package. -# @version 1.1.0 +# @version 2.0.0 # @author Mahmoud ElKarargy -# @date 2024-02-24 +# @date 2024-09-28 # Include subdirectories for end-to-end module, configurations module and data-generators module. -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/configurations) -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/data-generators) -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/data-loader) -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/descriptors) -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/end-to-end) -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/hardware) +# Check the value of RUNTIME_TYPE +if (RUNTIME_TYPE STREQUAL "STARPU") + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/hardware) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/configurations) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/data-loader) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/end-to-end) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/data-generators) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/descriptors) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/stage-zero) +elseif (RUNTIME_TYPE STREQUAL "PARSEC") + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/climate-emulator) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/stage-zero) +endif () diff --git a/examples/climate-emulator/CMakeLists.txt b/examples/climate-emulator/CMakeLists.txt new file mode 100644 index 00000000..01ee6898 --- /dev/null +++ b/examples/climate-emulator/CMakeLists.txt @@ -0,0 +1,16 @@ + +# Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file CMakeLists.txt +# @brief Defines an executables and links them with the ExaGeoStat library and other libraries. +# @version 2.0.0 +# @author Mahmoud ElKarargy +# @date 2024-09-23 + +# Define the target executable +add_executable(Example_Climate_Emulator ${CMAKE_CURRENT_SOURCE_DIR}/ClimateEmulator.cpp) + +# Link the target executable with the project and any additional libraries +target_link_libraries(Example_Climate_Emulator PUBLIC ${PROJECT_NAME}_INTERFACE) diff --git a/examples/climate-emulator/ClimateEmulator.cpp b/examples/climate-emulator/ClimateEmulator.cpp new file mode 100644 index 00000000..2ff03177 --- /dev/null +++ b/examples/climate-emulator/ClimateEmulator.cpp @@ -0,0 +1,38 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file ClimateEmulator.cpp + * @brief example of climate emulator. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @date 2024-09-23 +**/ + +#include +#include +#include + +using namespace exageostat::configurations; +using namespace exageostat::api; + +int main(int argc, char **argv) { + + // Create a new configurations object. + Configurations configurations; + // Initialize the arguments with the provided command line arguments + configurations.InitializeArguments(argc, argv); + // Initialize the ExaGeoStat Hardware + auto hardware = ExaGeoStatHardware(configurations); + // Create a unique pointer to hold the data. + std::unique_ptr> data; + // Load the data, either by reading from a file or generating synthetic data. + ExaGeoStat::ExaGeoStatLoadData(configurations, data); + // Perform data modeling. + ExaGeoStat::ExaGeoStatDataModeling(configurations, data); + + return 0; +} diff --git a/examples/data-generators/SyntheticDataGeneration.cpp b/examples/data-generators/SyntheticDataGeneration.cpp index e4df4645..f5eae7b0 100644 --- a/examples/data-generators/SyntheticDataGeneration.cpp +++ b/examples/data-generators/SyntheticDataGeneration.cpp @@ -32,7 +32,6 @@ int main(int argc, char **argv) { // Create a new synthetic_data_configurations object with the provided command line arguments Configurations synthetic_data_configurations; synthetic_data_configurations.InitializeArguments(argc, argv); - synthetic_data_configurations.InitializeDataGenerationArguments(); // initialize ExaGeoStat Hardware. ExaGeoStatHardware hardware(synthetic_data_configurations.GetComputation(), diff --git a/examples/data-loader/CSVLoader.cpp b/examples/data-loader/CSVLoader.cpp index d3776fb6..cf61aeb6 100644 --- a/examples/data-loader/CSVLoader.cpp +++ b/examples/data-loader/CSVLoader.cpp @@ -40,7 +40,6 @@ int main(int argc, char **argv) { // Generate Data and Log it into file configurations.SetLogger(true); - configurations.InitializeDataGenerationArguments(); // Initialize ExaGeoStat Hardware and Kernel. auto hardware = ExaGeoStatHardware(configurations.GetComputation(), configurations.GetCoresNumber(), diff --git a/examples/stage-zero/CMakeLists.txt b/examples/stage-zero/CMakeLists.txt new file mode 100644 index 00000000..f76066d7 --- /dev/null +++ b/examples/stage-zero/CMakeLists.txt @@ -0,0 +1,16 @@ + +# Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file CMakeLists.txt +# @brief Defines an executables and links them with the ExaGeoStat library and other libraries. +# @version 2.0.0 +# @author Mahmoud ElKarargy +# @date 2024-09-23 + +# Define the target executable +add_executable(Example_Stage_Zero ${CMAKE_CURRENT_SOURCE_DIR}/StageZeroDataGenerator.cpp) + +# Link the target executable with the project and any additional libraries +target_link_libraries(Example_Stage_Zero PUBLIC ${PROJECT_NAME}_INTERFACE) diff --git a/examples/stage-zero/StageZeroDataGenerator.cpp b/examples/stage-zero/StageZeroDataGenerator.cpp new file mode 100644 index 00000000..8f14f610 --- /dev/null +++ b/examples/stage-zero/StageZeroDataGenerator.cpp @@ -0,0 +1,47 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file StageZeroDataGenerator.cpp + * @brief Example to run Stage Zero data generation (mean-trend pipeline). + * @version 2.0.0 + **/ + +#include +#include +#include + +using namespace exageostat::configurations; +using namespace exageostat::api; + +int main(int argc, char **argv) { + + // Create a configurations object. + Configurations configurations; + // Initialize the arguments with the provided command line arguments + configurations.InitializeArguments(argc, argv); + + // Initialize the ExaGeoStat Hardware based on runtime type +#if DEFAULT_RUNTIME + // StarPU/CHAMELEON mode - use constructor that initializes CHAMELEON + auto hardware = ExaGeoStatHardware(configurations.GetComputation(), + configurations.GetCoresNumber(), + configurations.GetGPUsNumbers(), + configurations.GetPGrid(), + configurations.GetQGrid()); +#else + // PaRSEC mode - use constructor that initializes PaRSEC + auto hardware = ExaGeoStatHardware(configurations); +#endif + + std::unique_ptr> data; + // Generate Stage Zero mean-trend data + ExaGeoStat::ExaGeoStatGenerateMeanTrendData(configurations, data); + + // Finalize Hardware + hardware.FinalizeHardware(); + + return 0; +} diff --git a/forcing_new.csv b/forcing_new.csv new file mode 100644 index 00000000..b986dffc --- /dev/null +++ b/forcing_new.csv @@ -0,0 +1,751 @@ +0.0735333085974343 +0.0564078292360275 +0.0281704739642215 +-0.00681406843048154 +-0.040249321860721 +-0.0944424926883795 +-0.387764035983989 +-0.209454525613492 +-0.0554689921455058 +0.0346834160181229 +0.0737015108093417 +0.0740008937607664 +-0.986140239899765 +-0.701322098491405 +-0.292739996657424 +-0.140586008912452 +-0.539503263997613 +-0.536639478806497 +-0.182571503259008 +0.015252152983758 +-0.0987185246498932 +-0.021054021036017 +0.045138281572218 +0.0452083088233568 +0.0179329762638491 +-0.0107806607097613 +-0.0287782171932308 +-0.0214634979339537 +0.0251990166556861 +0.0983233605015185 +0.142928610406545 +0.145099098793241 +0.122670984757829 +-1.7428176346405 +-3.32279989921491 +-1.52814497172732 +-0.792884773251679 +-0.344365050491177 +-0.0503187060749755 +0.116971804028518 +0.177166662652701 +0.175619769859675 +0.163373766030212 +0.135486456364717 +0.10168715909582 +0.0662781459682924 +-0.162852381316567 +-0.315334484575428 +-0.182229243435798 +-0.0645471293791664 +-0.000593144933324755 +0.0476445541712106 +0.0816977363097027 +0.103248949592118 +0.108905601681988 +0.102185196538713 +0.0904309469245979 +0.0725644863613421 +0.0540909684632587 +-3.5006762035251 +-2.56739943296779 +-1.01365684694915 +-0.357318490191441 +-0.0924670180024181 +0.0210579337376115 +-3.2679700948547 +-4.33884291543812 +-1.76312234120678 +-0.606896589781271 +-0.169703080571861 +-0.0177922037719317 +-0.0933299657385628 +-0.107173027964606 +-0.483947689691565 +-0.167737843711994 +0.00761066815646821 +0.0986167223150245 +0.14291955771193 +0.158072989868482 +0.152370349836954 +0.131548765151827 +-2.37260765554138 +-1.45587383493979 +-0.529206139799823 +-0.150974246638826 +-1.85936629478726 +-1.17775442111454 +-0.320667123954394 +0.0244921664181625 +0.135954279955787 +0.158208651897832 +0.142657657865581 +0.11244929651932 +0.0846236539262703 +0.0678941706490323 +0.0674790758523018 +-0.18540448180014 +-0.0819847477181374 +0.0434153397101125 +0.101013646570047 +0.129696616142984 +0.131964714518556 +0.126185533100305 +-0.116299407811571 +-0.143724245726592 +0.00589102615712744 +0.0189110830185275 +-0.169155783170765 +0.00382193363111591 +0.105335350671688 +0.148108920960814 +0.0674112621927254 +-0.790344557820372 +-0.371889400148492 +-0.042841120233549 +0.084585235518913 +0.127447115275609 +0.138679444588529 +0.157097257717767 +0.175926960499267 +0.180323351656727 +0.190298316258954 +0.177890105413739 +-0.121345099507027 +0.0414571094479849 +0.0441822655198852 +0.064915562952822 +0.116604740390051 +0.137428687028469 +0.142633824484082 +0.0961929476617062 +0.111560819854818 +0.162999191322291 +-0.396017989557001 +-1.94455939682261 +-0.784384733032967 +-0.183770505496569 +-0.126237111272539 +0.0700143623265346 +0.150089977283444 +0.00113201457425549 +-0.198146624228969 +0.0486100458146779 +0.2247403532897 +0.255169580598002 +0.231408170577555 +0.203325452488972 +0.183378112579253 +0.169795953046963 +0.14667542479648 +0.136359650277299 +0.12463366865421 +0.00446080278802874 +-0.922701154275033 +-0.398613218302848 +-0.0152492876801434 +0.13535893946413 +-0.0457683964988274 +0.0736370395703204 +0.138628468906399 +0.12204771577585 +0.112463532348668 +-0.410540494265728 +-0.51859155146024 +-0.0773006179549918 +0.163121123284847 +0.211956620536955 +0.231602504124714 +0.252054909126777 +0.293662120634099 +0.234771035085209 +0.29030157343894 +0.123561879872062 +0.114388531341161 +0.22698105817062 +0.248274120771539 +0.276306922489814 +0.278395981465593 +0.236796205033435 +0.00494116729513297 +0.27237703203339 +0.317452470755623 +0.172580968547615 +0.291767969713245 +0.322956210496099 +0.328714227092424 +0.373687537078817 +0.358169524575507 +0.393847682618557 +0.376678260532022 +0.323550004824654 +0.299255972426473 +0.27990384178757 +0.262014005476918 +0.281847488134076 +0.38190659292483 +0.40765894746699 +0.40096987382174 +0.401388176007784 +0.408589787555987 +0.329701324598059 +0.265088121269562 +0.266966970256257 +0.25936925604085 +0.271789240369756 +0.264152020302313 +0.302515516975254 +0.350501649255739 +0.362609527765518 +0.316691433650802 +0.291299042769268 +0.172781032655988 +-0.0586213156167243 +-0.857414773670313 +-0.791914464060474 +-0.427350411566554 +0.105472551955637 +-0.0703594804614384 +0.0455190446001029 +0.154651605891782 +0.174163222032147 +0.0390203422780789 +0.179166672969898 +0.315589589567982 +0.283734934561143 +-0.0653392509197938 +0.229515277266285 +0.418519894553732 +0.548367971198338 +0.618645755832936 +0.614546311694787 +0.637389586850539 +0.37122283938293 +-0.167266064430766 +0.311129838290026 +0.593688019405194 +0.653565187647754 +0.785146857113679 +0.952838349965965 +1.10238075721802 +1.12934285234633 +0.57799352670705 +-0.753989765878911 +0.100370880847733 +0.937189096847845 +1.18929412242444 +1.30554921690475 +1.39747490914516 +1.5158249272712 +1.61698860355094 +1.66351109519782 +1.67273825244096 +1.73916287781751 +1.68268397531896 +1.66988734347366 +1.62366961536371 +1.61747608721877 +1.6470728828526 +1.7163413635855 +1.73968601222201 +1.86757013504027 +1.89444819074678 +1.99958370770229 +2.08527227239531 +2.11457079559187 +2.29014673572101 +2.31542582059018 +2.36676683543442 +2.42403318217367 +2.48594370920299 +2.55491656036155 +2.64990795926218 +2.74900752922216 +2.83130475628934 +2.90033526073925 +2.93895234697681 +2.97518351180996 +3.01139610136313 +3.06318199331034 +3.10864193160321 +3.16250296290423 +3.21681304064662 +3.28223815713722 +3.36738695550688 +3.4689854230086 +3.53699086210268 +3.61767099165672 +3.66438450238893 +3.72781049856311 +3.76768064590623 +3.80926319043955 +3.85312181490019 +3.91185209354861 +3.97898145849728 +4.05602139371929 +4.15698556478939 +4.24654531145331 +4.33923114782817 +4.40729312032761 +4.4506546914661 +4.48198540345201 +4.53303052573636 +4.58558231934071 +4.64269843172109 +4.71549415336983 +4.80505613399776 +4.89972561409484 +4.96942692629377 +5.0428014814696 +5.11524031808973 +5.16308625516263 +5.21456445923415 +5.26943841814895 +5.33414504671089 +5.40233298827459 +5.47906180388394 +5.57806029476192 +5.67415435879534 +5.7589893484269 +5.82406911707569 +5.87921896298363 +5.92801072337236 +5.98854993913605 +6.05200708874083 +6.11187574028978 +6.17887692029714 +6.24597935430614 +6.32440178727353 +6.41446650034022 +6.50609669681013 +6.59206464013353 +6.66607594271513 +6.73732222052866 +6.79533276757332 +6.86115511544195 +6.91208940185968 +6.97280297672138 +7.04455310442392 +7.11904659503861 +7.20859460487598 +7.30926244550782 +7.40397660809063 +7.50488850300552 +7.56844378688965 +7.62081194804236 +7.67788433486626 +7.73209304540885 +7.80000726373986 +7.86852765931031 +7.95321250766698 +8.05037850863166 +8.15048982216197 +8.22239742623978 +8.29278025265854 +8.35122817634572 +8.39790705551816 +8.4521141158509 +8.50695158392537 +8.56922541506577 +8.63758600523246 +8.71889805526434 +8.82148186485643 +8.88157052009191 +8.94105276628684 +8.97206488868594 +9.01952166754427 +9.05209331067053 +9.09377515234435 +9.13843222641117 +9.18949355762173 +9.25915450173458 +9.33894543620531 +9.41985106822212 +9.44841305292502 +9.49934129614928 +9.51828920377542 +9.53984520974457 +9.57219343524954 +9.60597592886569 +9.65127210449224 +9.71329406138085 +9.80623726334177 +9.85506748589743 +9.90485408098237 +9.92743757559975 +9.94066482212674 +9.94835405799236 +9.97660709912634 +10.0043886928732 +10.0363740440508 +10.0755358821554 +10.1313172371714 +10.1874147980763 +10.2244866795797 +10.2628477703564 +10.2923220305938 +10.3064034944071 +10.3363382054143 +10.3554215868798 +10.3796876149564 +10.4071104006363 +10.4375519433133 +10.4852834282478 +10.5492970201053 +10.6045866188063 +10.6317216056881 +10.6544925347347 +10.6653186060074 +10.6683060843486 +10.6751204524014 +10.6904806229535 +10.7173011301045 +10.7604215025133 +10.8213083697877 +10.8807191332847 +10.8935858254107 +10.9225518040488 +10.911929642219 +10.9106211831593 +10.9158205352107 +10.9281024099657 +10.9471723188006 +10.9787213312425 +11.0307661464439 +11.0695053233518 +11.1058201951807 +11.1263719397705 +11.1295684715422 +11.125387867106 +11.1300646120063 +11.1329154287833 +11.1416613596702 +11.1511665504716 +11.1652357731497 +11.1860510266948 +11.21682674283 +11.2662085366153 +11.2876424453899 +11.3129101577895 +11.3352544008041 +11.3276257813527 +11.3226251075762 +11.3241199370222 +11.3338711360022 +11.3457745715026 +11.3677552159473 +11.4106724334043 +11.4546704830249 +11.484944771653 +11.489773688374 +11.4872864496371 +11.4736057211223 +11.4746891196508 +11.4775107642773 +11.4746019382994 +11.4801534141148 +11.4844605092827 +11.5005867092908 +11.5286993480283 +11.5586266639517 +11.5832169188797 +11.5923224203854 +11.5990990072668 +11.5920455686653 +11.5916143816096 +11.5764954958129 +11.5710546406618 +11.5772773274359 +11.5867939882416 +11.6115460519395 +11.6457690174502 +11.6728371899232 +11.708903584911 +11.7019730885016 +11.6851766519983 +11.6712810280528 +11.6527684272305 +11.6516749269333 +11.6465593233072 +11.6614943514003 +11.6908021048941 +11.7114793180298 +11.7192214261128 +11.7323186337916 +11.7236730661777 +11.7161818815425 +11.6955475483581 +11.6888430371265 +11.6852727606719 +11.6888264839104 +11.7033966656434 +11.735898047673 +11.7648833617096 +11.7698079891382 +11.7671668887794 +11.7474745544631 +11.7269726588513 +11.7184604312344 +11.7044684289953 +11.6957962944082 +11.6890452665221 +11.6796388005274 +11.6810603277753 +11.6937760673166 +11.709778137357 +11.7063098109613 +11.7053505213886 +11.6943628111551 +11.6723941789203 +11.6552934009583 +11.6281282327614 +11.6121217409934 +11.6072792392638 +11.6060208535241 +11.6219717348455 +11.6467646839049 +11.6645205252459 +11.6810382542954 +11.6533066605846 +11.6310214248735 +11.6028844183571 +11.5842209104449 +11.566368427695 +11.5620159326721 +11.5741131497649 +11.5935957818967 +11.6067153677667 +11.6023866683221 +11.5948677512777 +11.578229876855 +11.5514944421722 +11.5357322834895 +11.5274115210899 +11.5214031645005 +11.5293251735788 +11.5541041547924 +11.5805098693536 +11.5868930454046 +11.5825344410568 +11.5452316879559 +11.533956135378 +11.5123846531958 +11.4936997059757 +11.4817277343254 +11.4850662647116 +11.5079122987115 +11.5279555293444 +11.5496991696653 +11.5471188141682 +11.5248167776564 +11.5199350305606 +11.5166985335659 +11.5135242118571 +11.5103558000814 +11.5072003578307 +11.5040575012144 +11.5009199393838 +11.497792413993 +11.4947151996047 +11.4916062152226 +11.4885013734765 +11.4854513184532 +11.4823637006223 +11.4793230976588 +11.4762497743771 +11.4732231325134 +11.4702015952343 +11.4671465632998 +11.4641377529569 +11.4611336133668 +11.4581339589085 +11.4551415980495 +11.4521480538692 +11.4491618180696 +11.4461792841896 +11.4432442738681 +11.4402721752661 +11.437298250821 +11.4343765891569 +11.4314123508288 +11.4284949643136 +11.4255366960322 +11.4226230618748 +11.4197123103175 +11.4167579292375 +11.4138575407875 +11.4109522686211 +11.4080525337726 +11.4051571172591 +11.4022617087182 +11.3994105436258 +11.3965192747343 +11.3936278720316 +11.3907382694542 +11.387897999548 +11.3850097566398 +11.3821725634888 +11.3792875465745 +11.3764515533121 +11.3736169145238 +11.3707784388312 +11.3679000546045 +11.3650671996071 +11.3622409334235 +11.3594103129933 +11.3565789732271 +11.3537930078132 +11.3509687138602 +11.3481380596451 +11.3453580678104 +11.3425344844092 +11.3397064511707 +11.3369267849204 +11.334149898738 +11.3313248850823 +11.3285472628565 +11.3257681126632 +11.3229895392013 +11.320216771897 +11.3174391229482 +11.3146650354874 +11.3118861507105 +11.3091130235386 +11.3063347926451 +11.3036049976857 +11.3008326389109 +11.2980583529332 +11.2953273419558 +11.2925535143148 +11.2898226593324 +11.2870920618544 +11.2843667515015 +11.281589261619 +11.2788639554954 +11.2761316295212 +11.2734044160306 +11.2706720568963 +11.2679428227838 +11.2652583952187 +11.2625289860887 +11.2597994486326 +11.2571151061334 +11.254383170837 +11.2516984647091 +11.2489715274171 +11.2462823220804 +11.2435950356806 +11.2409126769912 +11.2381827154141 +11.2354926688037 +11.23280743408 +11.2301200866242 +11.2274797651171 +11.2247916345017 +11.2221032811195 +11.2194146212976 +11.2167710040059 +11.2140814181718 +11.2114403423244 +11.2087480398445 +11.2061061957696 +11.2034586191759 +11.2007703812919 +11.1981197969674 +11.1954762283804 +11.1928301195676 +11.1901836519697 +11.187536611127 +11.1849327204008 +11.1822847469736 +11.1796340989619 +11.1769883057555 +11.174382460696 +11.1717304832429 +11.1691289195248 +11.1665215298399 +11.1638709809035 +11.1612657055473 +11.1586598991553 +11.1560483989376 +11.1534414459611 +11.150833861841 +11.1482237169678 +11.1456151046734 +11.1430089761297 +11.1404432181712 +11.1378307449588 +11.1352206441179 +11.1326529485141 +11.1300438789669 +11.1274728595104 +11.1249066002157 +11.1222933551917 +11.119720631694 +11.1171521997666 +11.114583282006 +11.1120117902847 +11.109441549497 +11.1068686584441 +11.1042951648719 +11.1017722973768 +11.0991973621529 +11.0966218484914 +11.0940971458517 +11.0915181416875 +11.0889920954282 +11.0864632701786 +11.0838821032849 +11.0813520801894 +11.0788212880021 +11.0762898005516 +11.0737575545045 +11.0712225539992 +11.0686890398999 +11.0661526525729 +11.0636621097348 +11.0611243452742 +11.0585910872791 +11.0560985366659 +11.0535637151325 +11.0510697781005 +11.0485336576291 +11.0460361679099 +11.0435431687061 +11.0410516511123 +11.0385103343198 +11.036015184187 +11.0335141448226 +11.0310644658628 +11.0285723318335 +11.0260723169731 +11.0235734974625 +11.0210718099222 +11.0186185787304 +11.0161207713275 +11.0136639504786 +11.0111645634857 +11.0087062285003 +11.006252419884 +11.0037456820529 +11.0012904320412 +10.9988324252913 +10.9963757535223 +10.9939160421573 +10.9914526032147 +10.988991493057 +10.9865769076953 +10.9841196628365 +10.9816563491361 +10.9792396572172 +10.9780073371668 diff --git a/inst/include/Rcpp-adapters/FunctionsAdapter.hpp b/inst/include/Rcpp-adapters/FunctionsAdapter.hpp index 336b037f..7d65974f 100644 --- a/inst/include/Rcpp-adapters/FunctionsAdapter.hpp +++ b/inst/include/Rcpp-adapters/FunctionsAdapter.hpp @@ -65,6 +65,7 @@ namespace exageostat::adapters { * @param[in] aDimension Dimensionality of the problem ("2D" or "3D"). * @param[in] aBand Bandwidth for band matrices, applicable in certain computational kernels. * @param[in] aMaxRank Maximum rank for low-rank approximations. + * @param[in] aAccuracy Accuracy value for low-rank approximations, The final value is computed as 10^(-1.0 * aAccuracy) * @param[in] apData Pointer to ExaGeoStatData object to be modeled. * @param[in] aMeasurementsVector Optional vector of measurements to enhance modeling, can be nullable. * @param[in] aLocationsX Optional vector of X coordinates for locations, can be nullable. @@ -79,7 +80,7 @@ namespace exageostat::adapters { const std::vector &aUpperBound, const int &aTolerance, const int &aMleIterations, const int &aDenseTileSize, const int &aLowTileSize, const std::string &aDimension, const int &aBand, - const int &aMaxRank, SEXP apData, + const int &aMaxRank, const int &aAccuracy, SEXP apData, Rcpp::Nullable aMeasurementsVector = R_NilValue, Rcpp::Nullable aLocationsX = R_NilValue, Rcpp::Nullable aLocationsY = R_NilValue, diff --git a/inst/include/api/ExaGeoStat.hpp b/inst/include/api/ExaGeoStat.hpp index 54c61c16..623a9abb 100644 --- a/inst/include/api/ExaGeoStat.hpp +++ b/inst/include/api/ExaGeoStat.hpp @@ -14,8 +14,6 @@ #ifndef EXAGEOSTATCPP_EXAGEOSTAT_HPP #define EXAGEOSTATCPP_EXAGEOSTAT_HPP -#include - #include #include @@ -29,6 +27,16 @@ namespace exageostat::api { class ExaGeoStat { public: + /** + * @brief Generates Data using stage zero mean-trend + * @param[in] aConfigurations Reference to Configurations object containing user input data. + * @param[out] aData Reference to an ExaGeoStatData object where generated data will be stored. + * @return void + * + */ + static void ExaGeoStatGenerateMeanTrendData(configurations::Configurations &aConfigurations, + std::unique_ptr> &aData); + /** * @brief Generates Data whether it's synthetic data or real. * @param[in] aConfigurations Reference to Configurations object containing user input data. @@ -51,17 +59,6 @@ namespace exageostat::api { std::unique_ptr> &aData, T *apMeasurementsMatrix = nullptr); - - /** - * @brief Objective function used in optimization, and following the NLOPT objective function format. - * @param[in] aTheta An array of length n containing the current point in the parameter space. - * @param[in] aGrad An array of length n where you can optionally return the gradient of the objective function. - * @param[in] apInfo pointer containing needed configurations and data. - * @return double MLE results. - * - */ - static double ExaGeoStatMLETileAPI(const std::vector &aTheta, std::vector &aGrad, void *apInfo); - /** * @brief Predict missing measurements values. * @param[in] aConfigurations Reference to Configurations object containing user input data. @@ -76,7 +73,6 @@ namespace exageostat::api { ExaGeoStatPrediction(configurations::Configurations &aConfigurations, std::unique_ptr> &aData, T *apMeasurementsMatrix = nullptr, dataunits::Locations *apTrainLocations = nullptr, dataunits::Locations *apTestLocations = nullptr); - }; /** diff --git a/inst/include/common/Definitions.hpp b/inst/include/common/Definitions.hpp index 400e26df..65707b02 100644 --- a/inst/include/common/Definitions.hpp +++ b/inst/include/common/Definitions.hpp @@ -131,7 +131,8 @@ namespace exageostat::common { */ enum DescriptorType { CHAMELEON_DESCRIPTOR = 0, - HICMA_DESCRIPTOR = 1 + HICMA_DESCRIPTOR = 1, + PARSEC_DESCRIPTOR = 2 }; /** @@ -141,7 +142,8 @@ namespace exageostat::common { */ enum DataSourceType { SYNTHETIC = 0, - CSV_FILE = 1 + CSV_FILE = 1, + PARSEC_FILE = 2 }; /** @@ -205,6 +207,21 @@ namespace exageostat::common { DESCRIPTOR_SUM = 52, DESCRIPTOR_R = 53, DESCRIPTOR_R_COPY = 54, + DESCRIPTOR_F_DATA = 55, + DESCRIPTOR_ET1 = 56, + DESCRIPTOR_ET2 = 57, + DESCRIPTOR_EP = 58, + DESCRIPTOR_SLMN = 59, + DESCRIPTOR_IE = 60, + DESCRIPTOR_IO = 61, + DESCRIPTOR_P = 62, + DESCRIPTOR_D = 63, + DESCRIPTOR_FLMERA = 64, + DESCRIPTOR_ZLM = 65, + DESCRIPTOR_SC = 66, + DESCRIPTOR_F_SPATIAL = 67, + DESCRIPTOR_FLM = 68, + DESCRIPTOR_FLMT = 69 }; /** diff --git a/inst/include/configurations/Configurations.hpp b/inst/include/configurations/Configurations.hpp index 5e523cc8..d61c6a34 100644 --- a/inst/include/configurations/Configurations.hpp +++ b/inst/include/configurations/Configurations.hpp @@ -95,137 +95,97 @@ namespace exageostat::configurations { * */ void InitializeAllTheta(); - - /** - * @brief Initialize data generation arguments.. - * @return void - * - */ - void InitializeDataGenerationArguments(); - /** - * @brief Initialize data Modeling arguments. - * @return void - * - */ - void InitializeDataModelingArguments(); - - /** - * @brief Initialize data Prediction arguments. + * @brief Print the usage and accepted Arguments. * @return void * */ - void InitializeDataPredictionArguments(); + static void PrintUsage(); /** - * @brief Print the usage and accepted Arguments. + * @brief Validate the config through a set of if/else. + * @throw exception in case some if/else conditions are not met. * @return void * */ - static void PrintUsage(); + void ValidateConfiguration(); /** START OF THE COMMON ARGUMENTS BETWEEN ALL MODULES. **/ - CREATE_SETTER_FUNCTION(ProblemSize, int, aProblemSize, "ProblemSize") - - CREATE_GETTER_FUNCTION(ProblemSize, int, "ProblemSize") - - CREATE_SETTER_FUNCTION(KernelName, const std::string&, aKernel, "Kernel") - - CREATE_GETTER_FUNCTION(KernelName, const std::string&, "Kernel") - - CREATE_SETTER_FUNCTION(PGrid, int, aPGrid, "PGrid") - - CREATE_GETTER_FUNCTION(PGrid, int, "PGrid") - - CREATE_SETTER_FUNCTION(QGrid, int, aQGrid, "QGrid") - - CREATE_GETTER_FUNCTION(QGrid, int, "QGrid") - - CREATE_SETTER_FUNCTION(TimeSlot, int, aTimeSlot, "TimeSlot") + CREATE_SETTER_FUNCTION(ProblemSize, int, aProblemSize, "n") + CREATE_GETTER_FUNCTION(ProblemSize, int, "n") - CREATE_GETTER_FUNCTION(TimeSlot, int, "TimeSlot") + CREATE_SETTER_FUNCTION(KernelName, const std::string&, aKernel, "kernel") + CREATE_GETTER_FUNCTION(KernelName, const std::string&, "kernel") - CREATE_SETTER_FUNCTION(Computation, common::Computation, aComputation, "Computation") + CREATE_SETTER_FUNCTION(PGrid, int, aPGrid, "p") + CREATE_GETTER_FUNCTION(PGrid, int, "p") - CREATE_GETTER_FUNCTION(Computation, common::Computation, "Computation") + CREATE_SETTER_FUNCTION(QGrid, int, aQGrid, "q") + CREATE_GETTER_FUNCTION(QGrid, int, "q") - CREATE_SETTER_FUNCTION(Precision, common::Precision, aPrecision, "Precision") + CREATE_SETTER_FUNCTION(TimeSlot, int, aTimeSlot, "timeslot") + CREATE_GETTER_FUNCTION(TimeSlot, int, "timeslot") - CREATE_GETTER_FUNCTION(Precision, common::Precision, "Precision") + CREATE_SETTER_FUNCTION(Computation, common::Computation, aComputation, "computation") + CREATE_GETTER_FUNCTION(Computation, common::Computation, "computation") - CREATE_SETTER_FUNCTION(CoresNumber, int, aCoresNumbers, "CoresNumbers") + CREATE_SETTER_FUNCTION(Precision, common::Precision, aPrecision, "precision") + CREATE_GETTER_FUNCTION(Precision, common::Precision, "precision") - CREATE_GETTER_FUNCTION(CoresNumber, int, "CoresNumbers") + CREATE_SETTER_FUNCTION(CoresNumber, int, aCoresNumbers, "cores") + CREATE_GETTER_FUNCTION(CoresNumber, int, "cores") - CREATE_SETTER_FUNCTION(GPUsNumbers, int, aGPUsNumber, "GPUsNumbers") + CREATE_SETTER_FUNCTION(GPUsNumbers, int, aGPUsNumber, "gpus") + CREATE_GETTER_FUNCTION(GPUsNumbers, int, "gpus") - CREATE_GETTER_FUNCTION(GPUsNumbers, int, "GPUsNumbers") + CREATE_SETTER_FUNCTION(DenseTileSize, int, aTileSize, "dts") + CREATE_GETTER_FUNCTION(DenseTileSize, int, "dts") - CREATE_SETTER_FUNCTION(DenseTileSize, int, aTileSize, "DTS") + CREATE_SETTER_FUNCTION(LowTileSize, int, aTileSize, "lts") + CREATE_GETTER_FUNCTION(LowTileSize, int, "lts") - CREATE_GETTER_FUNCTION(DenseTileSize, int, "DTS") + CREATE_SETTER_FUNCTION(Band, int, aBand, "band") + CREATE_GETTER_FUNCTION(Band, int, "band") - CREATE_SETTER_FUNCTION(LowTileSize, int, aTileSize, "LTS") + CREATE_SETTER_FUNCTION(MaxRank, int, aMaxRank, "maxrank") + CREATE_GETTER_FUNCTION(MaxRank, int, "maxrank") - CREATE_GETTER_FUNCTION(LowTileSize, int, "LTS") + CREATE_SETTER_FUNCTION(ActualObservationsFilePath, const std::string&, aActualObservationsFilePath, "observationsfile") + CREATE_GETTER_FUNCTION(ActualObservationsFilePath, std::string, "observationsfile") - CREATE_SETTER_FUNCTION(Band, int, aBand, "Band") + CREATE_SETTER_FUNCTION(Seed, int, aSeed, "seed") + CREATE_GETTER_FUNCTION(Seed, int, "seed") - CREATE_GETTER_FUNCTION(Band, int, "Band") + CREATE_SETTER_FUNCTION(LoggerPath, const std::string&, aLoggerPath, "logpath") + CREATE_GETTER_FUNCTION(LoggerPath, std::string, "logpath") - CREATE_SETTER_FUNCTION(MaxRank, int, aMaxRank, "MaxRank") + CREATE_SETTER_FUNCTION(InitialTheta, const std::vector&, apTheta, "initialtheta") + CREATE_GETTER_FUNCTION(InitialTheta, std::vector&, "initialtheta") - CREATE_GETTER_FUNCTION(MaxRank, int, "MaxRank") + CREATE_SETTER_FUNCTION(IsOOC, bool, aIsOOC, "ooc") + CREATE_GETTER_FUNCTION(IsOOC, bool, "ooc") - CREATE_SETTER_FUNCTION(ActualObservationsFilePath, const std::string &, aActualObservationsFilePath, - "ActualObservationsFilePath") + CREATE_SETTER_FUNCTION(ApproximationMode, int, aApproximationMode, "approximationmode") + CREATE_GETTER_FUNCTION(ApproximationMode, int, "approximationmode") - CREATE_GETTER_FUNCTION(ActualObservationsFilePath, std::string, "ActualObservationsFilePath") + CREATE_SETTER_FUNCTION(Logger, bool, aLogger, "log") + CREATE_GETTER_FUNCTION(Logger, bool, "log") - CREATE_SETTER_FUNCTION(Seed, int, aSeed, "Seed") + CREATE_SETTER_FUNCTION(LowerBounds, const std::vector&, apTheta, "lb") + CREATE_GETTER_FUNCTION(LowerBounds, std::vector&, "lb") - CREATE_GETTER_FUNCTION(Seed, int, "Seed") + CREATE_SETTER_FUNCTION(UpperBounds, const std::vector&, apTheta, "ub") + CREATE_GETTER_FUNCTION(UpperBounds, std::vector&, "ub") - CREATE_SETTER_FUNCTION(LoggerPath, const std::string&, aLoggerPath, "LoggerPath") + CREATE_SETTER_FUNCTION(EstimatedTheta, const std::vector&, apTheta, "estimatedtheta") + CREATE_GETTER_FUNCTION(EstimatedTheta, std::vector&, "estimatedtheta") - CREATE_GETTER_FUNCTION(LoggerPath, std::string, "LoggerPath") + CREATE_SETTER_FUNCTION(StartingTheta, const std::vector&, apTheta, "startingtheta") + CREATE_GETTER_FUNCTION(StartingTheta, std::vector&, "startingtheta") - CREATE_SETTER_FUNCTION(InitialTheta, const std::vector &, apTheta, "InitialTheta") - - CREATE_GETTER_FUNCTION(InitialTheta, std::vector &, "InitialTheta") - - CREATE_SETTER_FUNCTION(IsOOC, bool, aIsOOC, "OOC") - - CREATE_GETTER_FUNCTION(IsOOC, bool, "OOC") - - CREATE_SETTER_FUNCTION(ApproximationMode, int, aApproximationMode, "ApproximationMode") - - CREATE_GETTER_FUNCTION(ApproximationMode, int, "ApproximationMode") - - CREATE_SETTER_FUNCTION(Logger, bool, aLogger, "Logger") - - CREATE_GETTER_FUNCTION(Logger, bool, "Logger") - - CREATE_SETTER_FUNCTION(LowerBounds, const std::vector &, apTheta, "LowerBounds") - - CREATE_GETTER_FUNCTION(LowerBounds, std::vector &, "LowerBounds") - - CREATE_SETTER_FUNCTION(UpperBounds, const std::vector &, apTheta, "UpperBounds") - - CREATE_GETTER_FUNCTION(UpperBounds, std::vector &, "UpperBounds") - - CREATE_SETTER_FUNCTION(EstimatedTheta, const std::vector &, apTheta, "EstimatedTheta") - - CREATE_GETTER_FUNCTION(EstimatedTheta, std::vector &, "EstimatedTheta") - - CREATE_SETTER_FUNCTION(StartingTheta, const std::vector &, apTheta, "StartingTheta") - - CREATE_GETTER_FUNCTION(StartingTheta, std::vector &, "StartingTheta") - - CREATE_SETTER_FUNCTION(IsNonGaussian, bool, aIsNonGaussian, "IsNonGaussian") - - CREATE_GETTER_FUNCTION(IsNonGaussian, bool, "IsNonGaussian") + CREATE_SETTER_FUNCTION(IsNonGaussian, bool, aIsNonGaussian, "isnongaussian") + CREATE_GETTER_FUNCTION(IsNonGaussian, bool, "isnongaussian") /** * @brief Getter for the verbosity. @@ -237,125 +197,113 @@ namespace exageostat::configurations { static void SetVerbosity(const common::Verbose &aVerbose); /** END OF THE COMMON ARGUMENTS BETWEEN ALL MODULES. **/ - /** START OF THE DATA GENERATION MODULES. **/ - CREATE_SETTER_FUNCTION(Dimension, exageostat::common::Dimension, aDimension, "Dimension") + /** START OF THE HICMA-PARSEC SPECIFIC ARGUEMNTS. **/ - CREATE_GETTER_FUNCTION(Dimension, exageostat::common::Dimension, "Dimension") + CREATE_SETTER_FUNCTION(DenseBandDP, int, aDenseBandDP, "banddense") + CREATE_GETTER_FUNCTION(DenseBandDP, int, "banddense") - CREATE_SETTER_FUNCTION(IsSynthetic, bool, aIsSynthetic, "IsSynthetic") + CREATE_SETTER_FUNCTION(ObjectsNumber, int, aObjectsNumber, "objectsnumber") + CREATE_GETTER_FUNCTION(ObjectsNumber, int, "objectsnumber") - CREATE_GETTER_FUNCTION(IsSynthetic, bool, "IsSynthetic") + CREATE_SETTER_FUNCTION(AdaptiveDecision, int, aAdaptiveDecision, "adaptivedecision") + CREATE_GETTER_FUNCTION(AdaptiveDecision, int, "adaptivedecision") - CREATE_SETTER_FUNCTION(DataPath, const std::string&, aDataPath, "DataPath") + CREATE_SETTER_FUNCTION(DiagonalAddition, int, aDiagonalAddition, "adddiagonal") + CREATE_GETTER_FUNCTION(DiagonalAddition, int, "adddiagonal") - CREATE_GETTER_FUNCTION(DataPath, std::string, "DataPath") + CREATE_SETTER_FUNCTION(TimeSlotPerFile, int, aTimeSlotPerFile, "filetimeslot") + CREATE_GETTER_FUNCTION(TimeSlotPerFile, int, "filetimeslot") - /** END OF THE DATA GENERATION MODULES. **/ - /** START OF THE DATA MODELING MODULES. **/ + CREATE_SETTER_FUNCTION(FileNumber, int, aFileNumber, "filenumber") + CREATE_GETTER_FUNCTION(FileNumber, int, "filenumber") - CREATE_SETTER_FUNCTION(RecoveryFile, const std::string&, aRecoveryFile, "RecoveryFile") + CREATE_SETTER_FUNCTION(EnableInverse, bool, aEnableInverse, "enableinverse") + CREATE_GETTER_FUNCTION(EnableInverse, bool, "enableinverse") - CREATE_GETTER_FUNCTION(RecoveryFile, std::string, "RecoveryFile") + CREATE_SETTER_FUNCTION(MPIIO, bool, aMPIIO, "mpiio") + CREATE_GETTER_FUNCTION(MPIIO, bool, "mpiio") - CREATE_SETTER_FUNCTION(FileLogPath, FILE *, apFileLogPath, "FileLogPath") + CREATE_SETTER_FUNCTION(StageZero, bool, aIsEnabled, "stagezero") + CREATE_GETTER_FUNCTION(StageZero, bool, "stagezero") - CREATE_GETTER_FUNCTION(FileLogPath, FILE *, "FileLogPath") + CREATE_SETTER_FUNCTION(ForcingDataPath, const std::string&, aPath, "forcingdatapath") + CREATE_GETTER_FUNCTION(ForcingDataPath, std::string, "forcingdatapath") - CREATE_SETTER_FUNCTION(FileLogName, const std::string&, aFileLogName, "FileLogName") + CREATE_SETTER_FUNCTION(NetCDFDataPath, const std::string&, aPath, "netcdfdatapath") + CREATE_GETTER_FUNCTION(NetCDFDataPath, std::string, "netcdfdatapath") - CREATE_SETTER_FUNCTION(DistanceMetric, common::DistanceMetric, aDistanceMetric, "DistanceMetric") + CREATE_SETTER_FUNCTION(StartYear, int, aStartYear, "startyear") + CREATE_GETTER_FUNCTION(StartYear, int, "startyear") - CREATE_GETTER_FUNCTION(DistanceMetric, common::DistanceMetric, "DistanceMetric") + CREATE_SETTER_FUNCTION(EndYear, int, aEndYear, "endyear") + CREATE_GETTER_FUNCTION(EndYear, int, "endyear") - CREATE_SETTER_FUNCTION(MaxMleIterations, int, aMaxMleIterations, "MaxMleIterations") +/** END OF THE HICMA-PARSEC SPECIFIC ARGUMENTS. **/ +/** START OF THE DATA GENERATION MODULES. **/ - CREATE_GETTER_FUNCTION(MaxMleIterations, int, "MaxMleIterations") + CREATE_SETTER_FUNCTION(Dimension, exageostat::common::Dimension, aDimension, "dimension") + CREATE_GETTER_FUNCTION(Dimension, exageostat::common::Dimension, "dimension") - CREATE_SETTER_FUNCTION(Accuracy, int, aAccuracy, "Accuracy") + CREATE_SETTER_FUNCTION(IsSynthetic, bool, aIsSynthetic, "issynthetic") + CREATE_GETTER_FUNCTION(IsSynthetic, bool, "issynthetic") - CREATE_GETTER_FUNCTION(Accuracy, int, "Accuracy") + CREATE_SETTER_FUNCTION(DataPath, const std::string&, aDataPath, "datapath") + CREATE_GETTER_FUNCTION(DataPath, std::string, "datapath") - void SetTolerance(double aTolerance); - - CREATE_GETTER_FUNCTION(Tolerance, double, "Tolerance") - - /** END OF THE DATA MODELING MODULES. **/ - /** START OF THE DATA PREDICTION MODULES. **/ + // Results output directory path (for CSVs and parameters) + CREATE_SETTER_FUNCTION(ResultsPath, const std::string&, aResultsPath, "resultspath") + CREATE_GETTER_FUNCTION(ResultsPath, std::string, "resultspath") - CREATE_SETTER_FUNCTION(UnknownObservationsNb, int, aUnknownObservationsNumber, "UnknownObservationsNb") + // Number of locations for Stage Zero + CREATE_SETTER_FUNCTION(NumLocs, int, aNumLocs, "numlocs") + CREATE_GETTER_FUNCTION(NumLocs, int, "numlocs") - CREATE_GETTER_FUNCTION(UnknownObservationsNb, int, "UnknownObservationsNb") +/** END OF THE DATA GENERATION MODULES. **/ +/** START OF THE DATA MODELING MODULES. **/ - CREATE_SETTER_FUNCTION(IsMSPE, bool, aIsMSPE, "IsMSPE") + CREATE_SETTER_FUNCTION(RecoveryFile, const std::string&, aRecoveryFile, "recoveryfile") + CREATE_GETTER_FUNCTION(RecoveryFile, std::string, "recoveryfile") - CREATE_GETTER_FUNCTION(IsMSPE, bool, "IsMSPE") + CREATE_SETTER_FUNCTION(FileLogPath, FILE *, apFileLogPath, "filelogpath") + CREATE_GETTER_FUNCTION(FileLogPath, FILE *, "filelogpath") - CREATE_SETTER_FUNCTION(IsIDW, bool, aIsIDW, "IsIDW") + CREATE_SETTER_FUNCTION(FileLogName, const std::string&, aFileLogName, "filelogname") - CREATE_GETTER_FUNCTION(IsIDW, bool, "IsIDW") + CREATE_SETTER_FUNCTION(DistanceMetric, common::DistanceMetric, aDistanceMetric, "distancemetric") + CREATE_GETTER_FUNCTION(DistanceMetric, common::DistanceMetric, "distancemetric") - CREATE_SETTER_FUNCTION(IsMLOEMMOM, bool, aIsMLOEMMOM, "IsMLOEMMOM") + CREATE_SETTER_FUNCTION(MaxMleIterations, int, aMaxMleIterations, "maxmleiterations") + CREATE_GETTER_FUNCTION(MaxMleIterations, int, "maxmleiterations") - CREATE_GETTER_FUNCTION(IsMLOEMMOM, bool, "IsMLOEMMOM") + CREATE_SETTER_FUNCTION(Accuracy, int, aAccuracy, "accuracy") + CREATE_GETTER_FUNCTION(Accuracy, int, "accuracy") - CREATE_SETTER_FUNCTION(IsFisher, bool, aIsFisher, "IsFisher") - - CREATE_GETTER_FUNCTION(IsFisher, bool, "IsFisher") + void SetTolerance(double aTolerance); + CREATE_GETTER_FUNCTION(Tolerance, double, "tolerance") - CREATE_SETTER_FUNCTION(ObservationNumber, int, aObservationsNumber, "ObservationNumber") + /** END OF THE DATA MODELING MODULES. **/ + /** START OF THE DATA PREDICTION MODULES. **/ - CREATE_GETTER_FUNCTION(ObservationNumber, int, "ObservationNumber") + CREATE_SETTER_FUNCTION(UnknownObservationsNb, int, aUnknownObservationsNumber, "zmiss") + CREATE_GETTER_FUNCTION(UnknownObservationsNb, int, "zmiss") - /** END OF THE DATA PREDICTION MODULES. **/ + CREATE_SETTER_FUNCTION(IsMSPE, bool, aIsMSPE, "mspe") + CREATE_GETTER_FUNCTION(IsMSPE, bool, "mspe") - /** - * @brief Check if input value is numerical. - * @param[in] aValue The input from the user side. - * @return The int casted value. - * - */ - static int CheckNumericalValue(const std::string &aValue); - - /** - * @brief Checks the value of the dimension parameter. - * @param[in] aDimension A string represents the dimension. - * @return The corresponding dimension value. - * - */ - static exageostat::common::Dimension CheckDimensionValue(const std::string &aDimension); + CREATE_SETTER_FUNCTION(IsIDW, bool, aIsIDW, "idw") + CREATE_GETTER_FUNCTION(IsIDW, bool, "idw") - /** - * @brief Checks if the kernel value is valid. - * @param[in] aKernel The kernel to check. - * @return void - * - */ - void CheckKernelValue(const std::string &aKernel); + CREATE_SETTER_FUNCTION(IsMLOEMMOM, bool, aIsMLOEMMOM, "mloemmom") + CREATE_GETTER_FUNCTION(IsMLOEMMOM, bool, "mloemmom") - /** - * @brief Check input computation value. - * @param[in] aValue The input from the user side. - * @return Enum with the selected computation, Error if not exist. - * - */ - static common::Computation CheckComputationValue(const std::string &aValue); + CREATE_SETTER_FUNCTION(IsFisher, bool, aIsFisher, "fisher") + CREATE_GETTER_FUNCTION(IsFisher, bool, "fisher") - /** - * @brief Check input precision value. - * @param[in] aValue The input from the user side. - * @return Enum with the selected Precision, Error if not exist. - * - */ - static common::Precision CheckPrecisionValue(const std::string &aValue); + CREATE_SETTER_FUNCTION(ObservationNumber, int, aObservationsNumber, "observationnumber") + CREATE_GETTER_FUNCTION(ObservationNumber, int, "observationnumber") - /** - * @brief Checks the value of the unknown observations parameter. - * @param[in] aValue A string represents the number of unknown observations. - * @return The corresponding integer value. - * - */ - int CheckUnknownObservationsValue(const std::string &aValue); + /** END OF THE DATA PREDICTION MODULES. **/ /** * @brief Initialize a vector with a given size to contain zeros. @@ -380,42 +328,7 @@ namespace exageostat::configurations { */ int CalculateZObsNumber(); - /** - * @brief Parses a string of theta values and returns an array of doubles. - * @param[in] aInputValues The input string of theta values. - * @return A vector of parsed theta values. - * - */ - static std::vector ParseTheta(const std::string &aInputValues); - - - /** - * @brief parse user's input to distance metric. - * @param[in] aDistanceMetric string specifying the used distance metric. - * @return void - * - */ - void ParseDistanceMetric(const std::string &aDistanceMetric); - private: - - /** - * @brief Checks the run mode and sets the verbosity level. - * @param[in] aVerbosity A string represents the desired run mode ("verbose" or "standard"). - * @throws std::range_error if the input string is not "verbose" or "standard". - * @return void - * - */ - static void ParseVerbose(const std::string &aVerbosity); - - /** - * @brief Checks if a given string is in camel case format. - * @param[in] aString The string to check. - * @return true if the string is in camel case format, false otherwise. - * - */ - static bool IsCamelCase(const std::string &aString); - /// Used Dictionary std::unordered_map mDictionary; /// Used Argument counter diff --git a/inst/include/configurations/Parser.hpp b/inst/include/configurations/Parser.hpp new file mode 100644 index 00000000..0d465de6 --- /dev/null +++ b/inst/include/configurations/Parser.hpp @@ -0,0 +1,61 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** +* @file Parser.hpp +* @version 1.1.0 +* @brief Contains the declaration of the Parser class and its member functions for configuration parsing. +* @details Provides static methods to parse command-line arguments and JSON configuration files, as well as utility functions for string transformations. +* @author Mahmoud ElKarargy +* @date 2024-12-11 +**/ + +#ifndef EXAGEOSTAT_CPP_PARSER_HPP +#define EXAGEOSTAT_CPP_PARSER_HPP + +#include +#include + +#include + +namespace exageostat::configurations::parser { + + /** + * @class Parser + * @brief A class containing static methods for parsing configurations and utility functions. + */ + class Parser { + public: + + /** + * @brief Parses command-line arguments and extracts them into a key-value map. + * @param[in] aArgC The number of command-line arguments. + * @param[in] apArgV The array of command-line arguments. + * @param[in] apConfigurationMap The configuration map to fill. + * @return void. + * + */ + static void ParseCLI(const int &aArgC, char **apArgV, std::unordered_map &apConfigurationMap); + + /** + * @brief Parses a JSON configuration file and extracts its contents into a key-value map. + * @param[in] aJSONFile The path to the JSON file. + * @param[in] apConfigurationMap The configuration map to fill. + * @return void. + * + */ + static void ParseJSON(const std::string &aJSONFile, std::unordered_map &apConfigurationMap); + + /** + * @brief Converts a given string to all-small characters format. + * @param[in] arg The input string to convert. + * @return The converted string. + */ + static std::string ProcessKeyString(const std::string &arg); + + }; +}// namespace exageostat::configurations::parser + +#endif // EXAGEOSTAT_CPP_PARSER_HPP diff --git a/inst/include/configurations/Validator.hpp b/inst/include/configurations/Validator.hpp new file mode 100644 index 00000000..3e649afa --- /dev/null +++ b/inst/include/configurations/Validator.hpp @@ -0,0 +1,141 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** +* @file Validator.hpp +* @version 1.1.0 +* @brief Contains the declaration of the Validator class and its member functions for configuration validation. +* @details Provides a set of static methods to validate the input configuration parsed from CLI or JSON for the ExaGeoStat software package. +* @author Mahmoud ElKarargy +* @date 2024-12-11 +**/ + +#ifndef EXAGEOSTAT_CPP_VALIDATOR_HPP +#define EXAGEOSTAT_CPP_VALIDATOR_HPP + +#include + +namespace exageostat::configurations::validator { + + /** + * @class Validator + * @brief A class containing static methods for validating configuration parameters. + */ + class Validator { + public: + + /** + * @brief Validates the configuration parameters parsed from the CLI or JSON input. + * @param[in,out] apArgsMap A map containing configuration parameters and their values. + * @return void + * + */ + static void Validate(std::unordered_map &apArgsMap); + + /** + * @brief Parses a string of theta values and converts them to a vector of doubles. + * @param[in] aInputValues The input string containing theta values. + * @return A vector of parsed theta values as doubles. + * + */ + static std::vector CheckThetaValue(const std::string &aInputValues); + + /** + * @brief Validates and converts a string representing a tolerance value into a double. + * @param[in] aTolerance The input string for the tolerance value. + * @return The validated and converted tolerance value as a double. + * + */ + static double CheckToleranceValue(const std::string& aTolerance); + + /** + * @brief Validates a string representing a file name and returns file handler. + * @param[in] aFileLogPath The input string for the file name. + * @return The file handler of the validated file name. + * + */ + static FILE *CheckLogFileValue(const std::string &aFileLogPath); + + /** + * @brief Validates a string representing a kernel name. + * @param[in] aKernel The input string for the kernel name. + * @return The validated kernel name as a string. + * + */ + static std::string CheckKernelValue(const std::string &aKernel); + + /** + * @brief Checks if the input string is in camel case format. + * @param[in] aString The input string to validate. + * @return True if the string is in camel case; otherwise, false. + * + */ + bool static IsCamelCase(const std::string &aString); + + /** + * @brief Validates and converts a verbosity level string into a Verbose enum. + * @param[in] aVerbosity The input string for the verbosity level. + * @return The validated Verbose enum value. + * + */ + static common::Verbose CheckVerboseValue(const std::string &aVerbosity); + + /** + * @brief Validates and converts a string into a Precision enum. + * @param[in] aValue The input string representing precision. + * @return The validated Precision enum value. + * + */ + static common::Precision CheckPrecisionValue(const std::string &aValue); + + /** + * @brief Validates and converts a string into a Computation enum. + * @param[in] aValue The input string representing computation type. + * @return The validated Computation enum value. + * + */ + static common::Computation CheckComputationValue(const std::string &aValue); + + /** + * @brief Validates and converts a string into a boolean value. + * @param[in] aBooleanValue The input string representing a boolean value. + * @return The validated boolean value. + * + */ + static bool CheckBoolValue(const std::string& aBooleanValue); + + /** + * @brief Validates and converts a string into a DistanceMetric enum. + * @param[in] aDistanceMetric The input string representing the distance metric. + * @return The validated DistanceMetric enum value. + * + */ + static common::DistanceMetric CheckDistanceMetricValue(const std::string &aDistanceMetric); + + /** + * @brief Validates and converts a string into an integer. + * @param[in] aValue The input string representing a numerical value. + * @return The validated integer value. + * + */ + static int CheckNumericalValue(const std::string &aValue); + + /** + * @brief Validates and converts a string into a Dimension enum. + * @param[in] aDimension The input string representing the dimension. + * @return The validated Dimension enum value. + * + */ + static common::Dimension CheckDimensionValue(const std::string &aDimension); + + private: + /// A map of validation functions for different parameter types. + static const std::unordered_map> mCheckersMap; + /// A map linking arguments to their categories. + static const std::unordered_map mArgumentToCategoryMap; + }; +}// namespace exageostat::configurations::validator + +#endif // EXAGEOSTAT_CPP_VALIDATOR_HPP diff --git a/inst/include/data-analyzer/DataAnalyzer.hpp b/inst/include/data-analyzer/DataAnalyzer.hpp new file mode 100644 index 00000000..f9f50379 --- /dev/null +++ b/inst/include/data-analyzer/DataAnalyzer.hpp @@ -0,0 +1,65 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file DataAnalyzer.hpp + * @brief Contains the definition of the DataAnalyzer class. + * @version 1.1.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @date 2024-10-15 +**/ + +#ifndef EXAGEOSTATCPP_DATAANALYZER_HPP +#define EXAGEOSTATCPP_DATAANALYZER_HPP + +#include +#include + +namespace exageostat::analyzer{ + /** + * @brief Class represents the data analyzer for the Climate Emulator. + * @tparam T Data Type: float or double + */ + template + class DataAnalyzer { + + public: + + /** + * @brief Analyzes the given matrix data pre computation. + * @param[in, out] aData Reference to an ExaGeoStatData object that contains matrix to be analyzed. + * @return void + * + */ + static void PreAnalyzeMatrix(std::unique_ptr> &aData); + + /** + * @brief Analyzes the given matrix data post computation. + * @param[in, out] aData Reference to an ExaGeoStatData object that contains matrix to be analyzed. + * @return void + * + */ + static void PostAnalyzeMatrix(std::unique_ptr> &aData); + + /** + * @brief Compares between two matrices by getting the difference. + * @param[in, out] aData Reference to an ExaGeoStatData object that contains matrix to be analyzed. + * @return the calculated MSE. + * + */ + static double CompareMatDifference(std::unique_ptr> &aData); + }; + + /** + * @brief Instantiates the ExaGeoStat class for float and double types. + * @tparam T Data Type: float or double + * + */ + EXAGEOSTAT_INSTANTIATE_CLASS(DataAnalyzer) + +}//namespace exageostat + +#endif // EXAGEOSTATCPP_DATAANALYZER_HPP \ No newline at end of file diff --git a/inst/include/data-generators/DataGenerator.hpp b/inst/include/data-generators/DataGenerator.hpp index 2e5d4344..9f27e933 100644 --- a/inst/include/data-generators/DataGenerator.hpp +++ b/inst/include/data-generators/DataGenerator.hpp @@ -61,8 +61,8 @@ namespace exageostat::generators { protected: - /// Used enum for data generators types. - static common::DataSourceType aDataSourceType; + /// Used flag to determine if data generated is synthetic. + static bool aIsSynthetic; }; /** diff --git a/inst/include/data-generators/concrete/StageZeroGenerator.hpp b/inst/include/data-generators/concrete/StageZeroGenerator.hpp new file mode 100644 index 00000000..009aae3a --- /dev/null +++ b/inst/include/data-generators/concrete/StageZeroGenerator.hpp @@ -0,0 +1,226 @@ +#ifndef EXAGEOSTAT_STAGEZEROGENERATOR_HPP +#define EXAGEOSTAT_STAGEZEROGENERATOR_HPP + +#include "../DataGenerator.hpp" +#include +#include +#include +#include +#include + +// Forward declarations to avoid heavy includes +namespace exageostat { namespace kernels { template class Kernel; } } + +// Fallback define for template instantiation macro +#ifndef EXAGEOSTAT_INSTANTIATE_CLASS +#define EXAGEOSTAT_INSTANTIATE_CLASS(...) +#endif + +namespace exageostat::generators::stagezero { + + /** + * @brief Holds runtime configuration and state for Stage Zero. + */ + struct StageZeroArgs { + // Model/config + int mM = 10; // harmonics for mean-trend + int mT = 365 * 24; // period in hours (used to compute N) + int mNoYears = 751; // forcing length (does not change obs N) + int mNumParams = 1; // number of optimized parameters (theta) + size_t mN = 365 * 24 * 3; // observations count default (T * years) + int mNumLocs = 2; // number of locations + + // Optimization bounds and vectors + double *mStartingTheta = nullptr; + double *mTargetTheta = nullptr; + double *mInitialTheta = nullptr; + double *mLb = nullptr; + double *mUp = nullptr; + + // Forcing and input buffers + double *mForcing = nullptr; + double **mT2mHourlyPerYear = nullptr; + int *mT2mHourlyPerYearCount = nullptr; + + // Runtime grid and flags + int mPGrid = 1; + int mQGrid = 1; + int mZMiss = 0; + int mLog = 0; + exageostat::configurations::Configurations *mConfigs = nullptr; + int mNcid = 0; + int mAsync = 0; + int mDiagThick = 1; + int mCheck = 0; + int mHicmaMaxRank = 0; + + // CHAMELEON descriptors + void *mpDescZ = nullptr; // Z observations vector + void *mpX = nullptr; // Design matrix X + void *mpXtX = nullptr; // X^T * X matrix + void *mpDescPart1 = nullptr; // part1 scalar + void *mpDescPart2 = nullptr; // part2 scalar + void *mpPart2Vector = nullptr; // part2_vector + void *mpEstimatedMeanTrend = nullptr; // estimated mean trend + + // Scalars/counters + double mPart1 = 0.0; + double mPart2 = 0.0; + int mIterCount = 0; + int mCurrentLocation = 0; + }; + + /** + * @class StageZeroGenerator + * @brief Stage Zero pipeline: read inputs, build mean-trend, optimize, and write CSV outputs. + * @tparam T float or double + */ + template + class StageZeroGenerator : public DataGenerator { + + public: + + /** + * @brief Get a pointer to the singleton instance. + * @return Pointer to the `StageZeroGenerator` instance. + */ + static StageZeroGenerator *GetInstance(); + + /** + * @brief Configure and run Stage Zero, returning the generated data container. + * @param aConfigurations Global configuration object. + * @param aKernel Kernel (unused by Stage Zero mean-trend; retained for API compatibility). + * @return Unique pointer to `ExaGeoStatData`. + */ + std::unique_ptr> + CreateData(configurations::Configurations &aConfigurations, + exageostat::kernels::Kernel &aKernel) override; + + /** + * @brief Release the singleton instance. + */ + static void ReleaseInstance(); + + /** + * @brief Core MLE objective (apObj != nullptr) and final compute+write path (apObj == nullptr). + * @param aThetaVec Current theta vector. + * @param aGrad Gradient vector (unused by BOBYQA). + * @param apObj When non-null, acts as NLopt objective; when null, runs final compute and writes outputs. + * @return Objective value (negative log-likelihood component in objective mode; -sigma^2 in final path). + */ + double MLEAlgorithm(const std::vector &aThetaVec, + std::vector &aGrad, void *apObj); + + private: + /** + * @brief NLopt objective callback. + * @param aN Number of parameters. + * @param aTheta Parameter vector. + * @param aGrad Gradient vector. + * @param aData Pointer to StageZeroGenerator instance. + * @return Objective value. + */ + static double StageZeroObjectiveCallback(unsigned aN, const double *aTheta, double *aGrad, void *aData); + + /** + * @brief Orchestrate Stage Zero pipeline (configure, allocate, load, optimize, cleanup). + * @param aConfigurations Global configuration object. + */ + void Runner(configurations::Configurations &aConfigurations); + + /** + * @brief Initialize internal arguments from configuration (years, N, locations, bounds...). + */ + void ConfigureGenerator(); + + /** + * @brief Read NetCDF observations for the configured year range into per-location buffers. + */ + void ReadNetCDFFiles(); + + /** + * @brief Load forcing series from file (length `mNoYears`). + */ + void ReadForcingData(); + + /** + * @brief Allocate arrays and copy configuration vectors (bounds, theta, forcing, buffers). + */ + void Allocate(); + + /** + * @brief Execute mean-trend optimization per location and write CSV outputs. + */ + void RunMeanTrend(); + + /** + * @brief Leap year helper. + * @param aYear Year integer. + * @return true if leap year, else false. + */ + bool IsLeapYear(const int &aYear); + + /** + * @brief Read a text file of doubles (one per line) into a heap array. + * @param aFileName Path to input file. + * @param aNumLoc Number of values to read (capacity). + * @return Pointer to newly allocated array (caller owns). + */ + double * ReadObsFile(char *aFileName, const int &aNumLoc); + + /** + * @brief Free allocated arrays and CHAMELEON descriptors. + */ + void CleanUp(); + + /** + * @brief Create CHAMELEON descriptors for Z, X, XtX, part scalars and buffers. + */ + void SetupMLEComponents(); + + /** + * @brief Copy all locations to Z (batch path; per-location path preferred). + */ + void ConvertT2MToZ(); + + /** + * @brief Copy a single location's observations into the Z descriptor. + * @param location_index Index of the location to copy. + */ + void ConvertT2MToZForLocation(int location_index); + + /** + * @brief Generate design matrix X (column-major) for the mean-trend model. + * @param matrix LAPACK-style buffer to write (size m*n). + * @param m Number of rows (observations). + * @param n Number of columns (parameters). + * @param m0 Row offset. + * @param n0 Column offset. + * @param localtheta [theta, T, M, forcing...]. + */ + void GenerateDesignMatrixExact(double *matrix, int m, int n, int m0, int n0, double *localtheta); + + /** + * @brief Constructor for the SyntheticGenerator class. + * @return void + */ + StageZeroGenerator() = default; + + /** + * @brief Default destructor. + */ + ~StageZeroGenerator() override = default; + + // Pointer to the singleton instance + static StageZeroGenerator *mpInstance; + + private: + + std::unique_ptr> mData; + StageZeroArgs mArgs; + + }; + +} // namespace exageostat + +#endif //EXAGEOSTAT_STAGEZEROGENERATOR_HPP diff --git a/inst/include/data-generators/concrete/StageZeroGeneratorParsec.hpp b/inst/include/data-generators/concrete/StageZeroGeneratorParsec.hpp new file mode 100644 index 00000000..91413d76 --- /dev/null +++ b/inst/include/data-generators/concrete/StageZeroGeneratorParsec.hpp @@ -0,0 +1,226 @@ +#ifndef EXAGEOSTAT_STAGEZEROGENERATORPARSEC_HPP +#define EXAGEOSTAT_STAGEZEROGENERATORPARSEC_HPP + +#include "../DataGenerator.hpp" +#include +#include +#include +#include +#include + +// Forward declarations to avoid heavy includes +namespace exageostat { + namespace kernels { template class Kernel; } + namespace configurations { class Configurations; } +} + +// Fallback define for template instantiation macro +#ifndef EXAGEOSTAT_INSTANTIATE_CLASS +#define EXAGEOSTAT_INSTANTIATE_CLASS(...) +#endif + +// PaRSEC includes +extern "C" { +#include +#include +#include +#include +} + +namespace exageostat::generators::stagezero { + + /** + * @brief Holds runtime configuration and state for Stage Zero with PaRSEC. + */ + struct StageZeroArgsParsec { + // Model/config + int mM = 10; // harmonics for mean-trend + int mT = 365 * 24; // period in hours (used to compute N) + int mNoYears = 751; // forcing length (does not change obs N) + int mNumParams = 1; // number of optimized parameters (theta) + size_t mN = 365 * 24 * 3; // observations count default (T * years) + int mNumLocs = 2; // number of locations + + // Optimization bounds and vectors + double *mStartingTheta = nullptr; + double *mTargetTheta = nullptr; + double *mInitialTheta = nullptr; + double *mLb = nullptr; + double *mUp = nullptr; + + // Forcing and input buffers + double *mForcing = nullptr; + double **mT2mHourlyPerYear = nullptr; + int *mT2mHourlyPerYearCount = nullptr; + + // Runtime grid and flags + int mPGrid = 1; + int mQGrid = 1; + int mZMiss = 0; + int mLog = 0; + exageostat::configurations::Configurations *mConfigs = nullptr; + int mNcid = 0; + int mAsync = 0; + int mDiagThick = 1; + int mCheck = 0; + int mHicmaMaxRank = 0; + + // PaRSEC descriptors (replacing CHAMELEON descriptors) + parsec_matrix_block_cyclic_t *mpDescZ = nullptr; // Z observations vector + parsec_matrix_block_cyclic_t *mpX = nullptr; // X design matrix + parsec_matrix_block_cyclic_t *mpXtX = nullptr; // X^T * X matrix + parsec_matrix_block_cyclic_t *mpDescPart1 = nullptr; // part1 scalar + parsec_matrix_block_cyclic_t *mpDescPart2 = nullptr; // part2 scalar + parsec_matrix_block_cyclic_t *mpPart2Vector = nullptr; // part2 vector + parsec_matrix_block_cyclic_t *mpEstimatedMeanTrend = nullptr; // estimated mean trend + + // PaRSEC context + parsec_context_t *mpParsecContext = nullptr; + + // Iteration tracking + int mIterCount = 0; + int mCurrentLocation = 0; + }; + + /** + * @brief Stage Zero data generator using PaRSEC/DPLASMA for climate data preprocessing. + * @details This class implements mean trend removal from time series climate data + * using PaRSEC runtime and DPLASMA linear algebra operations. + * @tparam T Data Type: float or double + */ + template + class StageZeroGeneratorParsec : public DataGenerator { + public: + /** + * @brief Creates data using the Stage Zero mean trend removal pipeline. + * @param[in] aConfigurations Reference to Configurations object. + * @param[in] aKernel Reference to Kernel object. + * @return Unique pointer to ExaGeoStatData object. + */ + std::unique_ptr> CreateData(exageostat::configurations::Configurations &aConfigurations, + exageostat::kernels::Kernel &aKernel) override; + + /** + * @brief Gets the singleton instance of StageZeroGeneratorParsec. + * @return Pointer to StageZeroGeneratorParsec instance. + */ + static StageZeroGeneratorParsec *GetInstance(); + + /** + * @brief Releases the singleton instance. + */ + static void ReleaseInstance(); + + private: + /// Singleton instance + static StageZeroGeneratorParsec *mpInstance; + + /// Runtime arguments and state + StageZeroArgsParsec mArgs; + + /// Data object (for compatibility with base class) + std::unique_ptr> mData; + + /** + * @brief Main execution pipeline for Stage Zero. + * @param[in] aConfigurations Reference to Configurations object. + */ + void Runner(exageostat::configurations::Configurations &aConfigurations); + + /** + * @brief Configures the generator with parameters. + */ + void ConfigureGenerator(); + + /** + * @brief Allocates memory for arrays and descriptors. + */ + void Allocate(); + + /** + * @brief Reads NetCDF files for climate data. + */ + void ReadNetCDFFiles(); + + /** + * @brief Reads forcing data from CSV file. + */ + void ReadForcingData(); + + /** + * @brief Runs the mean trend removal algorithm using PaRSEC/DPLASMA. + */ + void RunMeanTrend(); + + /** + * @brief Sets up PaRSEC components and descriptors. + */ + void SetupMLEComponents(); + + /** + * @brief Converts T2M data to Z vector for a specific location. + * @param[in] location_index Index of the location to process. + */ + void ConvertT2MToZForLocation(int location_index); + + /** + * @brief Generates the design matrix for mean trend modeling. + * @param[in] matrix Pointer to matrix data. + * @param[in] m Number of rows. + * @param[in] n Number of columns. + * @param[in] m0 Starting row index. + * @param[in] n0 Starting column index. + * @param[in] localtheta Local theta parameters. + */ + void GenerateDesignMatrixExact(double *matrix, int m, int n, int m0, int n0, double *localtheta); + + /** + * @brief MLE algorithm implementation using PaRSEC/DPLASMA. + * @param[in] aThetaVec Vector of theta parameters. + * @param[out] aGrad Gradient vector (unused in this implementation). + * @param[in] apObj Pointer to objective function data. + * @return Objective function value. + */ + double MLEAlgorithm(const std::vector &aThetaVec, + std::vector &aGrad, void *apObj); + + /** + * @brief Objective function callback for NLOPT optimization. + * @param[in] aN Number of parameters. + * @param[in] aTheta Parameter vector. + * @param[out] aGrad Gradient vector. + * @param[in] aData Pointer to generator instance. + * @return Objective function value. + */ + static double StageZeroObjectiveCallback(unsigned aN, const double *aTheta, double *aGrad, void *aData); + + /** + * @brief Checks if a year is a leap year. + * @param[in] aYear Year to check. + * @return True if leap year, false otherwise. + */ + bool IsLeapYear(const int &aYear); + + /** + * @brief Reads observation data from file. + * @param[in] aFileName File name to read from. + * @param[in] aNumLoc Number of locations. + * @return Pointer to data array. + */ + double *ReadObsFile(char *aFileName, const int &aNumLoc); + + /** + * @brief Cleans up allocated memory and descriptors. + */ + void CleanUp(); + }; + + /** + * @brief Instantiates the StageZeroGeneratorParsec class for float and double types. + * @tparam T Data Type: float or double + */ + EXAGEOSTAT_INSTANTIATE_CLASS(StageZeroGeneratorParsec) + +}//namespace exageostat + +#endif //EXAGEOSTAT_STAGEZEROGENERATORPARSEC_HPP diff --git a/inst/include/data-loader/DataLoader.hpp b/inst/include/data-loader/DataLoader.hpp index 5f530bd6..931cdec0 100644 --- a/inst/include/data-loader/DataLoader.hpp +++ b/inst/include/data-loader/DataLoader.hpp @@ -67,6 +67,35 @@ namespace exageostat::dataLoader { virtual void WriteData(const T &aMatrixPointer, const int &aProblemSize, const int &aP, std::string &aLoggerPath, exageostat::dataunits::Locations &aLocations) = 0; + + /** + * @brief Abstract method for loading data based on provided configurations and kernel. + * @param[in] aConfigurations Reference to the configurations object that contains parameters for loading data. + * @param[in] aKernel Reference to the kernel object that defines the operations to be applied while loading the data. + * @return A unique pointer to the loaded ExaGeoStatData object. + * + */ + virtual std::unique_ptr> + LoadData(configurations::Configurations &aConfigurations, exageostat::kernels::Kernel &aKernel) = 0; + + /** + * @brief Factory method for creating a DataLoader instance based on the given configurations. + * This method dynamically determines the type of data loader to instantiate based on compile-time conditions. + * @param[in] aConfigurations Reference to the configurations object that contains parameters for loading data. + * @return A unique pointer to a DataLoader instance configured as per the specified runtime conditions. + * + */ + static std::unique_ptr> + CreateDataLoader(exageostat::configurations::Configurations &apConfigurations); + + /** + * @brief Releases the singleton instance of the currently active DataLoader. + * This method ensures proper deallocation of the singleton instance of the data loader, + * depending on the selected runtime. + * + */ + static void ReleaseDataLoader(); + }; /** diff --git a/inst/include/data-loader/concrete/CSVLoader.hpp b/inst/include/data-loader/concrete/CSVLoader.hpp index 52b185bd..153d8922 100644 --- a/inst/include/data-loader/concrete/CSVLoader.hpp +++ b/inst/include/data-loader/concrete/CSVLoader.hpp @@ -5,7 +5,7 @@ /** * @file CSVLoader.hpp - * @brief A class for generating synthetic data. + * @brief A class for loading csv format data. * @version 1.1.0 * @author Mahmoud ElKarargy * @author Sameh Abdulah @@ -54,6 +54,14 @@ namespace exageostat::dataLoader::csv { exageostat::dataunits::Locations &aLocations) override; /** + * @brief Loads data based on given configuration. + * @copydoc DataLoader::LoadData() + * + */ + std::unique_ptr> + LoadData(configurations::Configurations &aConfigurations, exageostat::kernels::Kernel &aKernel) override; + + /** * @brief Release the singleton instance of the CSVLoader class. * @return void * diff --git a/inst/include/data-loader/concrete/ParsecLoader.hpp b/inst/include/data-loader/concrete/ParsecLoader.hpp new file mode 100644 index 00000000..6d0a1b70 --- /dev/null +++ b/inst/include/data-loader/concrete/ParsecLoader.hpp @@ -0,0 +1,121 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file ParsecLoader.hpp + * @brief A class for loading PaRSEC format data. + * @version 1.1.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @date 2024-02-04 +**/ + +#ifndef EXAGEOSTAT_CPP_PARSECDATALOADER_HPP +#define EXAGEOSTAT_CPP_PARSECDATALOADER_HPP + +#include + +namespace exageostat::dataLoader::parsec { + + /** + * @class ParsecLoader + * @brief A class for creating data by reading PaRSEC files. + * @tparam T Data Type: float or double + */ + template + class ParsecLoader : public DataLoader { + public: + + /** + * @brief Get a pointer to the singleton instance of the ParsecLoader class. + * @return A pointer to the instance of the ParsecLoader class. + * + */ + static ParsecLoader *GetInstance(); + + /** + * @brief Reads data from external sources into ExaGeoStat format. + * @copydoc DataLoader::ReadData() + * + */ + void ReadData(configurations::Configurations &aConfigurations, std::vector &aMeasurementsMatrix, + std::vector &aXLocations, std::vector &aYLocations, std::vector &aZLocations, + const int &aP) override; + + /** + * @brief Writes a matrix of vectors to disk. + * @copydoc DataLoader::WriteData() + * + */ + void + WriteData(const T &aMatrixPointer, const int &aProblemSize, const int &aP, std::string &aLoggerPath, + exageostat::dataunits::Locations &aLocations) override; + + /** + * @brief Creates the data by synthetically generating it. + * @copydoc DataGenerator::LoadData() + * + */ + std::unique_ptr> + LoadData(configurations::Configurations &aConfigurations, kernels::Kernel &aKernel) override; + + /** + * @brief Release the singleton instance of the ParsecLoader class. + * @return void + * + */ + static void ReleaseInstance(); + + /** + * @brief Reads data from a CSV file into a matrix. + * @param[in] apFilename Name of the CSV file. + * @param[out] apFileContent Pointer to an array where file contents will be stored. + * @param[in] aM Number of rows in the matrix. + * @param[in] aN Number of columns in the matrix. + * @return 0 on success, or a non-zero error code on failure. + * + */ + int ReadCSVFileHelper(const char* apFilename, double *apFileContent, int aM, int aN); + + /** + * @brief Adapter for the matrix compress operation + * @param[in] aConfigurations Configurations object containing relevant settings. + * @param[in,out] aData Descriptor Data object to be populated with descriptors and data. + * @return void. + * + */ + void CompressMatrixHelper(configurations::Configurations &aConfigurations, std::unique_ptr> &aData); + + + private: + /** + * @brief Constructor for the ParsecLoader class. + * @return void + * + */ + ParsecLoader() = default; + + /** + * @brief Default destructor. + * + */ + ~ParsecLoader() override = default; + + /** + * @brief Pointer to the singleton instance of the ParsecLoader class. + * + */ + static ParsecLoader *mpInstance; + + }; + + /** + * @brief Instantiates the PaRSEC Data Generator class for float and double types. + * @tparam T Data Type: float or double + * + */ + EXAGEOSTAT_INSTANTIATE_CLASS(ParsecLoader) +} +#endif //EXAGEOSTAT_CPP_PARSECDATALOADER_HPP \ No newline at end of file diff --git a/inst/include/data-transformer/DataTransformer.hpp b/inst/include/data-transformer/DataTransformer.hpp new file mode 100644 index 00000000..9cf640cc --- /dev/null +++ b/inst/include/data-transformer/DataTransformer.hpp @@ -0,0 +1,60 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file DataTransformer.hpp + * @brief Contains the definition of the DataTransformer class. + * @version 1.1.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @date 2024-10-15 +**/ + +#ifndef EXAGEOSTATCPP_DATATRANSFORMER_HPP +#define EXAGEOSTATCPP_DATATRANSFORMER_HPP + +#include +#include + +namespace exageostat::transformers{ + + /** + * @brief Class represents the data transformer for the Climate Emulator. + * @tparam T Data Type: float or double + */ + template + class DataTransformer { + + public: + /** + * @brief Performs the forward spherical harmonics transform (SHT). + * @param[in] aLSize The size of tile size + * @param[in,out] aData Descriptor Data object to be populated with descriptors and data. + */ + static void ForwardSphericalHarmonicsTransform(const int &aLSize, std::unique_ptr> &aData); + + /** + * @brief Reshapes data during the forward phase of the simulation. + * @param[in] aConfigurations Configurations object containing relevant settings. + * @param[in,out] aData Descriptor Data object to be populated with descriptors and data. + */ + static void ForwardReshape(configurations::Configurations &aConfigurations, std::unique_ptr> &aData); + + /** + * @brief Performs the inverse spherical harmonics transform (SHT). + * @param[in] aLSize The size of tile size + * @param[in,out] aData Descriptor Data object to be populated with descriptors and data. + */ + static void InverseSphericalHarmonicsTransform(const int &aLSize, std::unique_ptr> &aData); + }; + + /** + * @brief Instantiates the DataTransformers class for float and double types. + * @tparam T Data Type: float or double + */ + EXAGEOSTAT_INSTANTIATE_CLASS(DataTransformer) +} // namespace exageostat + +#endif // EXAGEOSTATCPP_DATATRANSFORMER_HPP diff --git a/inst/include/data-units/DescriptorData.hpp b/inst/include/data-units/DescriptorData.hpp index 4c2b1d89..fe771445 100644 --- a/inst/include/data-units/DescriptorData.hpp +++ b/inst/include/data-units/DescriptorData.hpp @@ -28,9 +28,13 @@ namespace exageostat::dataunits { * */ union BaseDescriptor { +#if DEFAULT_RUNTIME CHAM_desc_t *chameleon_desc; -#ifdef USE_HICMA + #ifdef USE_HICMA HICMA_desc_t *hicma_desc; + #endif +#else + parsec_matrix_block_cyclic_t *parsec_desc; #endif }; @@ -136,10 +140,11 @@ namespace exageostat::dataunits { * */ void SetDescriptor(const common::DescriptorType &aDescriptorType, const common::DescriptorName &aDescriptorName, - const bool &aIsOOC, void *apMatrix, const common::FloatPoint &aFloatPoint, const int &aMB, - const int &aNB, const int &aSize, const int &aLM, const int &aLN, const int &aI, - const int &aJ, const int &aM, const int &aN, const int &aP, const int &aQ, - const bool &aValidOOC = true, const bool &aConverted = false); + const bool &aIsOOC = false, void *apMatrix = nullptr, + const common::FloatPoint &aFloatPoint = common::EXAGEOSTAT_REAL_DOUBLE, const int &aMB = 0, + const int &aNB = 0, const int &aSize = 0, const int &aLM = 0, const int &aLN = 0, + const int &aI = 0, const int &aJ = 0, const int &aM = 0, const int &aN = 0, + const int &aP = 0, const int &aQ = 0, const bool &aValidOOC = true, const bool &aConverted = false); /** * @brief Getter for the Descriptor matrix. diff --git a/inst/include/data-units/descriptor/ExaGeoStatDescriptor.hpp b/inst/include/data-units/descriptor/ExaGeoStatDescriptor.hpp index fbf1eb3d..c10bb8a8 100644 --- a/inst/include/data-units/descriptor/ExaGeoStatDescriptor.hpp +++ b/inst/include/data-units/descriptor/ExaGeoStatDescriptor.hpp @@ -15,8 +15,15 @@ #ifndef EXAGEOSTATCPP_EXAGEOSTATDESCRIPTOR_HPP #define EXAGEOSTATCPP_EXAGEOSTATDESCRIPTOR_HPP +#if DEFAULT_RUNTIME + #include #include + +#else +#include +#endif + #include /** diff --git a/inst/include/data-units/descriptor/concrete/ParsecDescriptor.hpp b/inst/include/data-units/descriptor/concrete/ParsecDescriptor.hpp new file mode 100644 index 00000000..074a4744 --- /dev/null +++ b/inst/include/data-units/descriptor/concrete/ParsecDescriptor.hpp @@ -0,0 +1,58 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file ParsecDescriptor.hpp + * @brief Defines the ParsecDescriptor class for creating matrix descriptors using the PaRSEC library. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @date 2024-10-18 +**/ + +#ifndef EXAGEOSTATCPP_ParsecDESCRIPTOR_HPP +#define EXAGEOSTATCPP_ParsecDESCRIPTOR_HPP + +#include +#include + +namespace exageostat::dataunits::descriptor { + + /** + * @brief ParsecDescriptor is a class for creating matrix descriptors by Parsec library. + * @tparam T Data Type: float or double + * + */ + template + class ParsecDescriptor { + + public: + /** + * @brief Create a Parsec descriptor for a matrix with the given parameters. + * @param[in] apDescriptor A pointer to the existing parsec_matrix_block_cyclic_t descriptor. The new descriptor will be created based on this descriptor. + * @return A pointer to the newly created parsec_matrix_block_cyclic_t descriptor. + * + */ + static parsec_matrix_block_cyclic_t *CreateParsecDescriptor(void *apDescriptor); + + /** + * @brief destroys and finalize a descriptor + * @param[in] apDescriptor A pointer to the existing parsec_matrix_block_cyclic_t descriptor. + * @return An error code or success code. + * + */ + static int DestroyParsecDescriptor(void *apDescriptor); + }; + + /** + * @brief Instantiates the Parsec descriptor methods class for float and double types. + * @tparam T Data Type: float or double + * + */ + EXAGEOSTAT_INSTANTIATE_CLASS(ParsecDescriptor) + +}//namespace exageostat + +#endif //EXAGEOSTATCPP_ParsecDESCRIPTOR_HPP diff --git a/inst/include/hardware/ExaGeoStatHardware.hpp b/inst/include/hardware/ExaGeoStatHardware.hpp index 6e872091..8eb69570 100644 --- a/inst/include/hardware/ExaGeoStatHardware.hpp +++ b/inst/include/hardware/ExaGeoStatHardware.hpp @@ -16,6 +16,11 @@ #define EXAGEOSTATCPP_EXAGEOSTATHARDWARE_HPP #include +#include + +#if !DEFAULT_RUNTIME +#include +#endif /** * @brief Class represents the hardware configuration for the ExaGeoStat solver. @@ -24,6 +29,14 @@ class ExaGeoStatHardware { public: + + /** + * @brief Constructor for ExaGeoStatHardware. + * @param[in] aConfigurations The set of arguments from the configurations. + * + */ + explicit ExaGeoStatHardware(exageostat::configurations::Configurations &aConfigurations); + /** * @brief Constructor for ExaGeoStatHardware. * @param[in] aComputation The computation mode for the solver. @@ -89,6 +102,13 @@ class ExaGeoStatHardware { */ [[nodiscard]] static void *GetHicmaContext(); + /** + * @brief Get the PaRSEC hardware context. + * @return Pointer to the hardware context. + * + */ + [[nodiscard]] static void *GetParsecContext(); + /** * @brief Get the hardware context. * @param[in] aComputation Used computation to decide whether to use Hicma or Chameleon context. @@ -97,6 +117,21 @@ class ExaGeoStatHardware { */ [[nodiscard]] static void *GetContext(exageostat::common::Computation aComputation); + /** + * @brief Sets the rank of MPI for PaRSEC. + * @param[in] aRank The new value for the rank. + * @return void + * + **/ + static void SetParsecMPIRank(int aRank); + + /** + * @brief Retrieves the rank of MPI for PaRSEC. + * @return The current rank of MPI PaRSEC. + * + **/ + static int GetParsecMPIRank(); + /** * @brief Retrieves the P dimension of the grid. * @details This function returns the current setting of the P dimension of the grid, which is part of the grid configuration used in various computational processes. @@ -131,17 +166,65 @@ class ExaGeoStatHardware { **/ static void SetQGrid(int aQ); +#if !DEFAULT_RUNTIME + /** + * @brief Retrieves the HiCMA parameters. + * @details This function returns a pointer to the current HiCMA parameters used in the computational process. + * @return A pointer to the current HiCMA parameters of type `hicma_parsec_params_t`. + * + */ + static hicma_parsec_params_t* GetHicmaParams(); + + /** + * @brief Retrieves the STARSH parameters. + * @details This function returns a pointer to the current STARSH parameters used in the computational process. + * @return A pointer to the current STARSH parameters of type `starsh_params_t`. + * + */ + static starsh_params_t* GetParamsKernel(); + + /** + * @brief Retrieves the HiCMA data. + * @details This function returns a pointer to the current HiCMA data used in the computational process. + * @return A pointer to the current HiCMA data of type `hicma_parsec_data_t`. + * + */ + static hicma_parsec_data_t* GetHicmaData(); + + /** + * @brief Retrieves the HiCMA matrix analysis. + * @details This function returns a pointer to the current HiCMA matrix analysis data used in the computational process. + * @return A pointer to the current HiCMA matrix analysis of type `hicma_parsec_matrix_analysis_t`. + * + */ + static hicma_parsec_matrix_analysis_t* GetAnalysis(); +#endif + private: //// Used Pointer to the Chameleon hardware context. static void *mpChameleonContext; //// Used Pointer to the Hicma hardware context. static void *mpHicmaContext; + //// Used Pointer to the PaRSEC hardware context. + static void *mpParsecContext; + //// Used P-Grid + static int mParsecMPIRank; //// Used P-Grid static int mPGrid; //// Used Q-Grid static int mQGrid; //// Used boolean to avoid re-init mpi static bool mIsMPIInit; +#if !DEFAULT_RUNTIME + //// HiCMA-specific variables - Himca_parsec_params + static std::unique_ptr mpHicmaParams; + //// HiCMA-specific variables - starsh_params_t + static std::unique_ptr mpParamsKernel; + //// HiCMA-specific variables - hicma_parsec_data_t + static std::unique_ptr mpHicmaData; + //// HiCMA-specific variables - hicma_parsec_matrix_analysis_t + static std::unique_ptr mpAnalysis; +#endif }; #endif // EXAGEOSTATCPP_EXAGEOSTATHARDWARE_HPP \ No newline at end of file diff --git a/inst/include/kernels/Kernel.hpp b/inst/include/kernels/Kernel.hpp index 3d018d5c..2460c0f3 100644 --- a/inst/include/kernels/Kernel.hpp +++ b/inst/include/kernels/Kernel.hpp @@ -20,7 +20,9 @@ #include +#if DEFAULT_RUNTIME #include +#endif extern "C" { #include diff --git a/inst/include/kernels/concrete/TrendModel.hpp b/inst/include/kernels/concrete/TrendModel.hpp new file mode 100644 index 00000000..987ab9c9 --- /dev/null +++ b/inst/include/kernels/concrete/TrendModel.hpp @@ -0,0 +1,71 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file TrendModel.hpp + * @brief Defines the TrendModel class – a time–trend kernel that can be used as a mean/covariate model. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @date 2024-11-11 +**/ + +#ifndef EXAGEOSTATCPP_TRENDMODEL_HPP +#define EXAGEOSTATCPP_TRENDMODEL_HPP + +#include + +namespace exageostat::kernels { + + /** + * @class TrendModel + * @brief A simple temporal trend model that can be plugged into the unified kernel interface. + * + * The implementation replicates the time-series trend model that is originally implemented in the + * C version of ExaGeoStat. The class inherits from the abstract `Kernel` and therefore must + * provide an implementation for `GenerateCovarianceMatrix` in which the design matrix (or the + * corresponding covariance when used jointly with other kernels) is produced. + */ + template + class TrendModel : public Kernel { + public: + /** + * @brief Default constructor – sets the internal bookkeeping variables. + */ + TrendModel(); + + /** + * @brief Virtual destructor to allow calls to the correct concrete destructor. + */ + ~TrendModel() override = default; + + /** + * @brief Generates a covariance (or design) matrix block for the trend model. + * @copydoc Kernel::GenerateCovarianceMatrix() + */ + void GenerateCovarianceMatrix(T *apMatrixA, const int &aRowsNumber, const int &aColumnsNumber, + const int &aRowOffset, const int &aColumnOffset, dataunits::Locations &aLocation1, + dataunits::Locations &aLocation2, dataunits::Locations &aLocation3, + T *apLocalTheta, const int &aDistanceMetric) override; + + /** + * @brief Factory function required by the plugin registry mechanism. + * @return A newly allocated `TrendModel` object. + */ + static Kernel *Create(); + + private: + //// Used plugin name for static registration + static bool plugin_name; + }; + + /** + * @brief Instantiate the TrendModel class for the supported precision types. + */ + EXAGEOSTAT_INSTANTIATE_CLASS(TrendModel) + +}// namespace exageostat::kernels + +#endif // EXAGEOSTATCPP_TRENDMODEL_HPP diff --git a/inst/include/linear-algebra-solvers/concrete/chameleon/ChameleonImplementation.hpp b/inst/include/linear-algebra-solvers/concrete/chameleon/ChameleonImplementation.hpp index 8653bfc5..b42c46c8 100644 --- a/inst/include/linear-algebra-solvers/concrete/chameleon/ChameleonImplementation.hpp +++ b/inst/include/linear-algebra-solvers/concrete/chameleon/ChameleonImplementation.hpp @@ -71,6 +71,11 @@ namespace exageostat::linearAlgebra { ExaGeoStatCreateSequence(void *apSequence) override; }; + /** + * @brief Instantiates the Chameleon Implementation class for float and double types. + * @tparam T Data Type: float or double + * + */ EXAGEOSTAT_INSTANTIATE_CLASS(ChameleonImplementation) }//namespace exageostat diff --git a/inst/include/results/Results.hpp b/inst/include/results/Results.hpp index 1f21c6b9..73c25020 100644 --- a/inst/include/results/Results.hpp +++ b/inst/include/results/Results.hpp @@ -16,6 +16,7 @@ #include #include +#include namespace exageostat::results { @@ -31,79 +32,78 @@ namespace exageostat::results { static Results *GetInstance(); /** - * @brief Set the flag indicating whether the results are synthetic or not. - * @param[in] aIsSynthetic True if the results are synthetic, false otherwise. - * + * @brief Set whether the dataset is synthetic or not. + * @param aIsSynthetic Boolean indicating if the dataset is synthetic. + * @param aKey Custom dictionary key (optional). */ - void SetIsSynthetic(bool aIsSynthetic); + void SetIsSynthetic(bool aIsSynthetic, const std::string &aKey = ""); /** * @brief Set the number of generated locations. - * @param[in] aNumLocations The number of generated locations. - * + * @param aNumLocations Integer representing the number of locations. + * @param aKey Custom dictionary key (optional). */ - void SetGeneratedLocationsNumber(int aNumLocations); + void SetGeneratedLocationsNumber(int aNumLocations, const std::string &aKey = ""); /** - * @brief Set the flag indicating whether the logger is active or not. - * @param[in] aIsLogger True if the logger is active, false otherwise. - * + * @brief Enable or disable logging. + * @param aIsLogger Boolean indicating if logging is enabled. + * @param aKey Custom dictionary key (optional). */ - void SetIsLogger(bool aIsLogger); + void SetIsLogger(bool aIsLogger, const std::string &aKey = ""); /** - * @brief Set the path for the logger. - * @param[in] aLoggerPath The path for the logger. - * + * @brief Set the logger's file path. + * @param aLoggerPath String representing the logger file path. + * @param aKey Custom dictionary key (optional). */ - void SetLoggerPath(const std::string &aLoggerPath); + void SetLoggerPath(const std::string &aLoggerPath, const std::string &aKey = ""); /** - * @brief Set the Total Data Generation execution time. - * @param[in] aTime The execution time. - * + * @brief Set the total data generation execution time. + * @param aTime Double representing the execution time. + * @param aKey Custom dictionary key (optional). */ - void SetTotalDataGenerationExecutionTime(double aTime); + void SetTotalDataGenerationExecutionTime(double aTime, const std::string &aKey = ""); /** - * @brief Set the Data Generation floating-point operations (FLOPs). - * @param[in] aFlops The number of FLOPs. - * + * @brief Set the total data generation FLOPS. + * @param aFlops Double representing the FLOPS. + * @param aKey Custom dictionary key (optional). */ - void SetTotalDataGenerationFlops(double aFlops); + void SetTotalDataGenerationFlops(double aFlops, const std::string &aKey = ""); /** * @brief Set the log-likelihood value. - * @param[in] aLogLikValue The log-likelihood value. - * + * @param aLogLikValue Double representing the log-likelihood value. + * @param aKey Custom dictionary key (optional). */ - void SetLogLikValue(double aLogLikValue); + void SetLogLikValue(double aLogLikValue, const std::string &aKey = ""); /** - * @brief Set the number of maximum likelihood estimation (MLE) iterations. - * @param[in] aIterationsNumber The number of MLE iterations. - * + * @brief Set the number of MLE iterations. + * @param aIterationsNumber Integer representing the number of iterations. + * @param aKey Custom dictionary key (optional). */ - void SetMLEIterations(int aIterationsNumber); + void SetMLEIterations(int aIterationsNumber, const std::string &aKey = ""); /** - * @brief Set the vector of maximum theta values. - * @param[in] aMaximumTheta The vector of maximum theta values. - * + * @brief Set the maximum theta vector. + * @param aMaximumTheta Vector of doubles representing the theta values. + * @param aKey Custom dictionary key (optional). */ - void SetMaximumTheta(const std::vector &aMaximumTheta); + void SetMaximumTheta(const std::vector &aMaximumTheta, const std::string &aKey = ""); /** * @brief Set the total modeling execution time. - * @param[in] aTime The total execution time for data modeling. - * + * @param aTime Double representing the execution time. + * @param aKey Custom dictionary key (optional). */ - void SetTotalModelingExecutionTime(double aTime); + void SetTotalModelingExecutionTime(double aTime, const std::string &aKey = ""); /** * @brief Get the total modeling execution time. - * @return The total execution time for data modeling. - * + * @return Double representing the total modeling execution time. */ [[nodiscard]] double GetTotalModelingExecutionTime() const; @@ -150,16 +150,15 @@ namespace exageostat::results { [[nodiscard]] std::vector GetPredictedMissedValues() const; /** - * @brief Set the total modeling FLOPs. - * @param[in] aTime The total number of FLOPs for data modeling. - * + * @brief Set the total modeling FLOPS. + * @param aTime Double representing the FLOPS. + * @param aKey Custom dictionary key (optional). */ - void SetTotalModelingFlops(double aTime); + void SetTotalModelingFlops(double aTime, const std::string &aKey = ""); /** - * @brief Get the total modeling FLOPs. - * @return The total number of FLOPs for data modeling. - * + * @brief Get the total modeling FLOPS. + * @return Double representing the total modeling FLOPS. */ [[nodiscard]] double GetTotalModelingFlops() const; @@ -182,105 +181,105 @@ namespace exageostat::results { * @param[in] aZMiss The value of ZMiss. * */ - void SetZMiss(int aZMiss); + void SetZMiss(int aZMiss, const std::string &aKey = ""); /** * @brief Set the value of MSPEError. * @param[in] aMSPEError The value of MSPEError. * */ - void SetMSPEError(double aMSPEError); + void SetMSPEError(double aMSPEError, const std::string &aKey = ""); /** * @brief Set the MSPE execution time. * @param[in] aTime The execution time. * */ - void SetMSPEExecutionTime(double aTime); + void SetMSPEExecutionTime(double aTime, const std::string &aKey = ""); /** * @brief Set the MSPE number of floating-point operations (FLOPs). * @param[in] aFlops The number of FLOPs. * */ - void SetMSPEFlops(double aFlops); + void SetMSPEFlops(double aFlops, const std::string &aKey = ""); /** * @brief Set the vector of IDW errors. * @param[in] aIDWError The vector of IDW errors. * */ - void SetIDWError(const std::vector &aIDWError); + void SetIDWError(const std::vector &aIDWError, const std::string &aKey = ""); /** * @brief Set the value of MLOE. * @param[in] aMLOE The value of MLOE. * */ - void SetMLOE(double aMLOE); + void SetMLOE(double aMLOE, const std::string &aKey = ""); /** * @brief Set the value of MMOM. * @param[in] aMMOM The value of MMOM. * */ - void SetMMOM(double aMMOM); + void SetMMOM(double aMMOM, const std::string &aKey = ""); /** * @brief Set the MLOE-MMOM execution time. * @param[in] aTime The execution time. * */ - void SetExecutionTimeMLOEMMOM(double aTime); + void SetExecutionTimeMLOEMMOM(double aTime, const std::string &aKey = ""); /** * @brief Set the MLOE-MMOM matrix generation time. * @param[in] aTime The execution time. * */ - void SetMatrixGenerationTimeMLOEMMOM(double aTime); + void SetMatrixGenerationTimeMLOEMMOM(double aTime, const std::string &aKey = ""); /** * @brief Set the MLOE-MMOM cholesky factorization time. * @param[in] aTime The execution time. * */ - void SetFactoTimeMLOEMMOM(double aTime); + void SetFactoTimeMLOEMMOM(double aTime, const std::string &aKey = ""); /** * @brief Set the MLOE-MMOM loop time. * @param[in] aTime The execution time. * */ - void SetLoopTimeMLOEMMOM(double aTime); + void SetLoopTimeMLOEMMOM(double aTime, const std::string &aKey = ""); /** * @brief Set the MLOE-MMOM number of floating-point operations (FLOPs). * @param[in] aFlops The number of FLOPs. * */ - void SetFlopsMLOEMMOM(double aFlops); + void SetFlopsMLOEMMOM(double aFlops, const std::string &aKey = ""); /** * @brief Set The total execution time of the fisher tile computation. * @param[in] aTime The total execution time for fisher tile computation. * */ - void SetTotalFisherTime(double aTime); + void SetTotalFisherTime(double aTime, const std::string &aKey = ""); /** * @brief Set the elements of the fisher matrix. * @param aFisherMatrix Elements of the fisher matrix. * */ - void SetFisherMatrix(std::vector aFisherMatrix); + void SetFisherMatrix(std::vector aFisherMatrix, const std::string &aKey = ""); /** * @brief Set the elements of the Z missed matrix. * @param aPredictedValues Elements of the Predicted Z missed matrix. * */ - void SetPredictedMissedValues(std::vector aPredictedValues); + void SetPredictedMissedValues(std::vector aPredictedValues, const std::string &aKey = ""); /** * @brief Print the end summary of the results. @@ -288,6 +287,15 @@ namespace exageostat::results { */ void PrintEndSummary(); + private: + /** + * @brief Add result to dictionary + * @param[in] aKey string used as title + * @param[in] aValue string value of result + * + */ + void UpdateDictionary(const std::string &aKey, const std::string &aValue); + private: /** * @brief Pointer to the singleton instance of the SyntheticGenerator class. @@ -345,6 +353,8 @@ namespace exageostat::results { std::vector mFisherMatrix; /// Z miss values std::vector mPredictedMissedValues; + /// Map that holds results + std::map mSummaryDictionary; }; }//namespace exageostat diff --git a/inst/include/runtime-solver/RuntimeSolverFactory.hpp b/inst/include/runtime-solver/RuntimeSolverFactory.hpp new file mode 100644 index 00000000..f8034fb8 --- /dev/null +++ b/inst/include/runtime-solver/RuntimeSolverFactory.hpp @@ -0,0 +1,52 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file RuntimeSolverFactory.hpp + * @brief Header file for the RuntimeSolverFactory class, which creates runtime solvers based on the configured runtime. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @date 2024-11-04 +**/ + +#ifndef EXAGEOSTATCPP_RUNTIMESOLVERFACTORY_HPP +#define EXAGEOSTATCPP_RUNTIMESOLVERFACTORY_HPP + +#include + +#include +#include +#include + +namespace exageostat::runtimesolver { + + /** + * @class RuntimeSolverFactory + * @brief A class that creates linear algebra solvers based on the input computation type. + * @tparam T Data Type: float or double. + * + */ + template + class RuntimeSolverFactory { + public: + + /** + * @brief Creates a linear algebra solver based on the input computation type. + * @return Pointer to the created linear algebra solver. + * + */ + static std::unique_ptr> CreateRuntimeSolver(); + }; + + /** + * @brief Instantiates the Runtime Solver Factory class for float and double types. + * @tparam T Data Type: float or double + * + */ + EXAGEOSTAT_INSTANTIATE_CLASS(RuntimeSolverFactory) + +}//namespace exageostat + +#endif //EXAGEOSTATCPP_RUNTIMESOLVERFACTORY_HPP \ No newline at end of file diff --git a/inst/include/runtime-solver/RuntimeSolverMethods.hpp b/inst/include/runtime-solver/RuntimeSolverMethods.hpp new file mode 100644 index 00000000..acd87cf4 --- /dev/null +++ b/inst/include/runtime-solver/RuntimeSolverMethods.hpp @@ -0,0 +1,54 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file RuntimeSolverMethods.hpp + * @brief Header file for the RuntimeSolverMethods class, which defines the interface for runtime solvers. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @date 2024-11-04 +**/ + +#ifndef EXAGEOSTATCPP_RUNTIMESOLVERMETHODS_HPP +#define EXAGEOSTATCPP_RUNTIMESOLVERMETHODS_HPP + +#include +#include + +namespace exageostat::runtimesolver { + + /** + * @class RuntimeSolverMethods + * @brief A class that defines the interface for linear algebra solvers. + * @tparam T Data Type: float or double. + * + */ + template + class RuntimeSolverMethods { + public: + + /** + * @brief Virtual destructor to allow calls to the correct concrete destructor. + * + */ + virtual ~RuntimeSolverMethods() = default; + + /** + * @brief The Gateway for the Modeling Operation + * @param[in,out] aData DescriptorData object to be populated with descriptors and data. + * @param[in] aConfigurations Configurations object containing relevant settings. + * @param[in] apMeasurementsMatrix measurements matrix to be stored in DescZ. + * @param[in] aKernel Reference to the kernel object to use. + * @return log likelihood value + * + */ + virtual T ModelingOperations(std::unique_ptr> &aData, configurations::Configurations &aConfigurations, + T *apMeasurementsMatrix, const kernels::Kernel &aKernel) = 0; + + }; +}//namespace exageostat + +#endif //EXAGEOSTATCPP_RUNTIMESOLVERMETHODS_HPP \ No newline at end of file diff --git a/inst/include/runtime-solver/concrete/ParsecRuntimeSolver.hpp b/inst/include/runtime-solver/concrete/ParsecRuntimeSolver.hpp new file mode 100644 index 00000000..0f88bb2a --- /dev/null +++ b/inst/include/runtime-solver/concrete/ParsecRuntimeSolver.hpp @@ -0,0 +1,83 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file ParsecRuntimeSolver.hpp + * @brief This file contains the declaration of ParsecRuntimeSolver class. + * @details ParsecRuntimeSolver is a concrete implementation of the RuntimeSolverMethods class. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @author Qinglei Cao + * @date 2024-11-04 +**/ + +#ifndef EXAGEOSTATCPP_PARSECRUNTIMESOLVER_HPP +#define EXAGEOSTATCPP_PARSECRUNTIMESOLVER_HPP + +#include + +namespace exageostat::runtimesolver { + + /** + * @brief ParsecRuntimeSolver is a concrete implementation of RuntimeSolverMethods class for dense or diagonal-super tile matrices. + * @tparam T Data Type: float or double + * + */ + template + class ParsecRuntimeSolver : public RuntimeSolverMethods { + public: + + /** + * @brief Calculates the log likelihood value of a given value theta. + * @copydoc RuntimeSolverMethods::ModelingOperations() + * + */ + T ModelingOperations(std::unique_ptr > &aData, configurations::Configurations &aConfigurations, + T *apMeasurementsMatrix, const kernels::Kernel &aKernel) override; + + /** + * @brief Performs a SYRK (symmetric rank-k update) operation on the matrix. + * @param[in,out] aData Descriptor Data object to be populated with descriptors and data. + * + */ + void ExaGeoStatSYRK(std::unique_ptr> &aData); + + /** + * @brief Performs TLR Cholesky operation on the matrix. + * @param[in,out] aData Descriptor Data object to be populated with descriptors and data. + * + */ + void ExaGeoStatTLRCholesky(std::unique_ptr> &aData); + + /** + * @brief Calculates norm. + * @param[in] aConfigurations Configurations object containing relevant settings. + * @param[in,out] aData Descriptor Data object to be populated with descriptors and data. + * + */ + double ExaGeoStatNorm(configurations::Configurations &aConfigurations, std::unique_ptr> &aData); + + /** + * @brief Calculates the Mean Squared Error (MSE). + * @param[in] aConfigurations Reference to Configurations object containing needed parameters. + * @param[out] aData Reference to an ExaGeoStatData object that contains matrix to be analyzed. + * @return the calculated MSE. + * + */ + double CalculateMSE(configurations::Configurations &aConfigurations, + std::unique_ptr> &aData); + + }; + + /** + * @brief Instantiates the Parsec Runtime Solver class for float and double types. + * @tparam T Data Type: float or double + * + */ + EXAGEOSTAT_INSTANTIATE_CLASS(ParsecRuntimeSolver) +}//namespace exageostat + +#endif //EXAGEOSTATCPP_PARSECRUNTIMESOLVER_HPP \ No newline at end of file diff --git a/inst/include/runtime-solver/concrete/StarpuRuntimeSolver.hpp b/inst/include/runtime-solver/concrete/StarpuRuntimeSolver.hpp new file mode 100644 index 00000000..09c32b04 --- /dev/null +++ b/inst/include/runtime-solver/concrete/StarpuRuntimeSolver.hpp @@ -0,0 +1,62 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file StarpuRuntimeSolver.hpp + * @brief This file contains the declaration of StarpuRuntimeSolver class. + * @details StarpuRuntimeSolver is a concrete implementation of the RuntimeSolverMethods class. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @date 2024-11-04 +**/ + +#ifndef EXAGEOSTATCPP_STARPURUNTIMESOLVER_HPP +#define EXAGEOSTATCPP_STARPURUNTIMESOLVER_HPP + +#include +#include + +namespace exageostat::runtimesolver { + + /** + * @brief StarpuRuntimeSolver is a concrete implementation of RuntimeSolverMethods class for dense or diagonal-super tile matrices. + * @tparam T Data Type: float or double + * + */ + template + class StarpuRuntimeSolver : public RuntimeSolverMethods { + public: + + /** + * @brief Calculates the log likelihood value of a given value theta. + * @copydoc RuntimeSolverMethods::ModelingOperations() + * + */ + T ModelingOperations(std::unique_ptr > &aData, + configurations::Configurations &aConfigurations, T *apMeasurementsMatrix, const kernels::Kernel &aKernel) override; + + + /** + * @brief Objective function used in optimization, and following the NLOPT objective function format. + * @param[in] aTheta An array of length n containing the current point in the parameter space. + * @param[in] aGrad An array of length n where you can optionally return the gradient of the objective function. + * @param[in] apInfo pointer containing needed configurations and data. + * @return double MLE results. + * + */ + static double DataModelingAPI(const std::vector &aTheta, std::vector &aGrad, void *apInfo); + + }; + + /** + * @brief Instantiates the Starpu Runtime Solver class for float and double types. + * @tparam T Data Type: float or double + * + */ + EXAGEOSTAT_INSTANTIATE_CLASS(StarpuRuntimeSolver) +}//namespace exageostat + +#endif //EXAGEOSTATCPP_STARPURUNTIMESOLVER_HPP \ No newline at end of file diff --git a/inst/include/runtime/parsec/JDFHelperFunctions.h b/inst/include/runtime/parsec/JDFHelperFunctions.h new file mode 100644 index 00000000..bcba3377 --- /dev/null +++ b/inst/include/runtime/parsec/JDFHelperFunctions.h @@ -0,0 +1,76 @@ +/** + * @file JDFHelperFunctions.h + * @brief A header file for declarations of JDF helper functions. + * @details Contains function prototypes for JDF operations, including computations, + * transformations, file I/O, and MPI-related data handling for matrix and data structures. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @date 2024-10-20 +**/ + +#include +#include + +/** + * @brief Calculates a unique single index from given dimensions. + * @param[in] aN The row index. + * @param[in] aM The column index. + * @return The calculated single index. + * + */ +int CalculateSingleIndex(int aN, int aM); + +/** + * @brief Sums the elements of a double-precision data matrix. + * @param[in] apData Pointer to the data matrix. + * @param[in] aColumn The number of columns in the matrix. + * @param[in] aRow The number of rows in the matrix. + * @return The sum of matrix elements as a double. + * + */ +double SumDoubleData(double *apData, int aColumn, int aRow); + +/** + * @brief Sums the elements of a complex double-precision data matrix. + * @param[in] apData Pointer to the complex data matrix. + * @param[in] aColumn The number of columns in the matrix. + * @param[in] aRow The number of rows in the matrix. + * @return The sum of matrix elements as a complex double. + * @return void + * + */ +complex double SumComplexData(complex double *apData, int aColumn, int aRow); + +/** + * @brief Performs forward Spherical Harmonic Transform (SHT) calculations. + * @param[in,out] apFlm Pointer to SHT coefficients. + * @param[in] apF_data Pointer to spatial data for transformation. + * @param[in] aFDataM Number of rows in spatial data. + * @param[in] aFDataN Number of columns in spatial data. + * @param[in] apEt1, apEt2, apEp, apSlmn, apIe, apIo, apP Arrays for intermediate calculations. + * @param[in] aEt1M, aEt2M, aEpM, aEpN, aSlmnN, aSlmnM, aIeM, aIeN, aIoM, aIoN Dimensions of respective arrays. + * @param[in] apD, apGmtheta_r, apFmnm, apTmp1, apTmp2 Additional intermediate matrices. + * @param[in] aL Maximum degree for the SHT. + * @return void + * + */ +void ForwardSHTHelper(double *apFlm, complex double *apF_data, int aFDataM, int aFDataN, + complex double *apEt1, int aEt1M, complex double *apEt2, int aEt2M, + complex double *apEp, int aEpM, int aEpN, complex double *apSlmn, + int aSlmnN, int aSlmnM, complex double *apIe, int aIeM, int aIeN, + complex double *apIo, int aIoM, int aIoN, complex double *apP, + int aPM, int aPN, complex double *apD, complex double *apGmtheta_r, + complex double *apFmnm, complex double *apTmp1, + complex double *apTmp2, int aL); + +/** + * @brief Performs inverse Spherical Harmonic Transform (SHT) calculations. + * @param[in] apFlm Pointer to coefficients from forward SHT. + * @param[out] apF_spatial Pointer to result spatial data. + * @param[in] apZlm, apSC, apSmt Arrays for intermediate calculations. + * @param[in] aL Maximum degree for the SHT. + * @return void + * + */ +void InverseSHTHelper(double *apFlm, double *apF_spatial, double *apZlm, double *apSC, + double *apSmt, int aL); diff --git a/inst/include/runtime/parsec/ParsecHeader.h b/inst/include/runtime/parsec/ParsecHeader.h new file mode 100644 index 00000000..37b367ce --- /dev/null +++ b/inst/include/runtime/parsec/ParsecHeader.h @@ -0,0 +1,16 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file ParsecHeader.hpp + * @brief A header file to include hicma_parsec and undo str definition inside it. + * @details Due to an error occuers when using hicma_parsec.h which set a defination called str() it conflicts with the standart C++ str() fn + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @date 2024-10-08 +**/ + +#include +#undef str diff --git a/inst/include/runtime/parsec/jdf/JobDescriptionFormat.h b/inst/include/runtime/parsec/jdf/JobDescriptionFormat.h new file mode 100644 index 00000000..3312f713 --- /dev/null +++ b/inst/include/runtime/parsec/jdf/JobDescriptionFormat.h @@ -0,0 +1,266 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file JobDescriptionFormat.h + * @brief A header file for parsec generated functions from jdf files. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @date 2024-10-08 +**/ + +#include + +/** + * @brief Reads a CSV file into a matrix description. + * @details This function reads data from a CSV file and populates the specified matrix description. + * @param[in] apContext Pointer to the parsec context. + * @param[in, out] apDesc Pointer to the matrix block cyclic descriptor. + * @param[in] aMB Number of rows in the block. + * @param[in] aNB Number of columns in the block. + * @param[in] aNodes Number of nodes. + * @param[in] aTimeSlot Time slot for the data. + * @param[in] apFilename Filename of the CSV file. + * @param[in] aRank Rank of the current process. + * @param[in] aVerbose Verbosity level for output. + * @param[in] aGpus Number of GPUs available. + * @return 0 on success, negative value on error. + * + */ +int ReadCSV(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, int aNodes, + int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus); + +/** + * @brief Reads a specific time slot from a CSV file. + * @details This function extracts data from a specified time slot in a CSV file and updates the matrix description. + * @param[in] apContext Pointer to the parsec context. + * @param[in, out] apDesc Pointer to the matrix block cyclic descriptor. + * @param[in] aMB Number of rows in the block. + * @param[in] aNB Number of columns in the block. + * @param[in] aNodes Number of nodes. + * @param[in] aTimeSlot Time slot for the data. + * @param[in] apFilename Filename of the CSV file. + * @param[in] aRank Rank of the current process. + * @param[in] aVerbose Verbosity level for output. + * @param[in] aGpus Number of GPUs available. + * @return 0 on success, negative value on error. + * + */ +int ReadCSVTimeSlot(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, + int aNodes, int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus); + +/** + * @brief Reads a complex CSV file for a specific time slot. + * @details This function reads complex data from a CSV file corresponding to a specific time slot. + * @param[in] apContext Pointer to the parsec context. + * @param[in, out] apDesc Pointer to the matrix block cyclic descriptor. + * @param[in] aMB Number of rows in the block. + * @param[in] aNB Number of columns in the block. + * @param[in] aNodes Number of nodes. + * @param[in] aTimeSlot Time slot for the data. + * @param[in] apFilename Filename of the CSV file. + * @param[in] aRank Rank of the current process. + * @param[in] aVerbose Verbosity level for output. + * @param[in] aGpus Number of GPUs available. + * @return 0 on success, negative value on error. + * + */ +int ReadCSVToComplexTimeSlot(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, + int aNB, int aNodes, int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus); + +/** + * @brief Reads a complex CSV file. + * @details This function reads complex data from a CSV file and updates the matrix description. + * @param[in] apContext Pointer to the parsec context. + * @param[in, out] apDesc Pointer to the matrix block cyclic descriptor. + * @param[in] aMB Number of rows in the block. + * @param[in] aNB Number of columns in the block. + * @param[in] aNodes Number of nodes. + * @param[in] aTimeSlot Time slot for the data. + * @param[in] apFilename Filename of the CSV file. + * @param[in] aRank Rank of the current process. + * @param[in] aVerbose Verbosity level for output. + * @param[in] aGpus Number of GPUs available. + * @return 0 on success, negative value on error. + * + */ +int ReadCSVComplex(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, + int aNodes, int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus); + +/** + * @brief Reads a complex CSV file into a matrix description. + * @details This function reads complex data from a CSV file and populates the specified matrix description. + * @param[in] apContext Pointer to the parsec context. + * @param[in, out] apDesc Pointer to the matrix block cyclic descriptor. + * @param[in] aMB Number of rows in the block. + * @param[in] aNB Number of columns in the block. + * @param[in] aNodes Number of nodes. + * @param[in] aTimeSlot Time slot for the data. + * @param[in] apFilename Filename of the CSV file. + * @param[in] aRank Rank of the current process. + * @param[in] aVerbose Verbosity level for output. + * @param[in] aGpus Number of GPUs available. + * @return 0 on success, negative value on error. + * + */ +int ReadCSVToComplex(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, + int aNodes, int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus); + +/** + * @brief Performs forward spherical harmonic transform. + * @details This function computes the forward spherical harmonic transform using the provided data descriptors. + * @param[in] apContext Pointer to the parsec context. + * @param[in] apFDataDesc Pointer to the data descriptor for the forward transform. + * @param[in] apFLMDesc Pointer to the descriptor for the spherical harmonic coefficients. + * @param[in] apFLMTDesc Pointer to the descriptor for the transformed spherical harmonic coefficients. + * @param[in] apET1Desc Pointer to the first auxiliary descriptor. + * @param[in] apET2Desc Pointer to the second auxiliary descriptor. + * @param[in] apEPDesc Pointer to the endpoint descriptor. + * @param[in] apSLMNDesc Pointer to the descriptor for Spherical Harmonic Matrix. + * @param[in] apIEDesc Pointer to the input descriptor. + * @param[in] apIODesc Pointer to the output descriptor. + * @param[in] apPDesc Pointer to the parameter descriptor. + * @param[in] apDDesc Pointer to the descriptor for intermediate data. + * @param[in] aFDataM Number of rows in the forward data matrix. + * @param[in] aEPN Number of endpoints. + * @param[in] aET1M Number of rows in the first auxiliary matrix. + * @param[in] aET2M Number of rows in the second auxiliary matrix. + * @param[in] aPN Number of processes. + * @param[in] aFlmM Number of rows in the spherical harmonic coefficients matrix. + * @param[in] aFlmN Number of columns in the spherical harmonic coefficients matrix. + * @param[in] aLSize Size of the spherical harmonic basis. + * @return 0 on success, negative value on error. + * + */ +int ForwardSHT(parsec_context_t *apContext, parsec_tiled_matrix_t *apFDataDesc, parsec_tiled_matrix_t *apFLMDesc, + parsec_tiled_matrix_t *apFLMTDesc, parsec_tiled_matrix_t *apET1Desc, + parsec_tiled_matrix_t *apET2Desc, parsec_tiled_matrix_t *apEPDesc, + parsec_tiled_matrix_t *apSLMNDesc, parsec_tiled_matrix_t *apIEDesc, + parsec_tiled_matrix_t *apIODesc, parsec_tiled_matrix_t *apPDesc, + parsec_tiled_matrix_t *apDDesc, int aFDataM, int aEPN, int aET1M, + int aET2M, int aPN, int aFlmM, int aFlmN, int aLSize); + +/** + * @brief Computes the element-wise difference between two matrices. + * @details This function calculates the difference between corresponding elements + * of two matrices, `apDescA` and `apDescB`, which are described by a block-cyclic distribution. + * @param[in] apContext Pointer to the PaRSEC context (`parsec_context_t`) in which the computation will be performed. + * @param[in] apDescA Pointer to the descriptor of the first matrix. + * @param[in] apDescB Pointer to the descriptor of the second matrix. + * @return Returns 0 on successful completion or an error otherwise. + * + */ +int DifferenceDouble(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDescA, parsec_matrix_block_cyclic_t *apDescB); + +/** + * @brief Performs forward spherical harmonic transform with reshaping. + * @details This function computes the forward spherical harmonic transform and reshapes the resulting data. + * @param[in] apContext Pointer to the parsec context. + * @param[in] aRank Rank of the current process. + * @param[in] aVerbose Verbosity level for output. + * @param[in] apFDataDesc Pointer to the data descriptor for the forward transform. + * @param[in] apFLMDesc Pointer to the descriptor for the spherical harmonic coefficients. + * @param[in] apFLMTDesc Pointer to the descriptor for the transformed spherical harmonic coefficients. + * @param[in] apET1Desc Pointer to the first auxiliary descriptor. + * @param[in] apET2Desc Pointer to the second auxiliary descriptor. + * @param[in] apEPDesc Pointer to the endpoint descriptor. + * @param[in] apSLMNDesc Pointer to the descriptor for Spherical Harmonic Matrix. + * @param[in] apIEDesc Pointer to the input descriptor. + * @param[in] apIODesc Pointer to the output descriptor. + * @param[in] apPDesc Pointer to the parameter descriptor. + * @param[in] apDDesc Pointer to the descriptor for intermediate data. + * @param[in] apADesc Pointer to the descriptor for A matrix. + * @param[in] aFDataM Number of rows in the forward data matrix. + * @param[in] aEPN Number of endpoints. + * @param[in] aET1M Number of rows in the first auxiliary matrix. + * @param[in] aET2M Number of rows in the second auxiliary matrix. + * @param[in] aPN Number of processes. + * @param[in] aFlmTNB Block size parameter specific to the reshaping of spherical harmonic coefficients. + * @param[in] aT Transform-specific parameter, often used to define temporal or frequency characteristics. + * @param[in] aLSize Size of the spherical harmonic basis, determining the resolution of the transformation. + * @param[out] apNormGlobal Pointer to an array storing global normalization values applied during the transform. + * @param[in] aNT Number of transformations applied, defining the iteration count for computation. + * @param[in] aUpperLower Specifies the range for the spherical harmonic transformation, either upper or lower spherical components. + * @return 0 on success, negative value on error. + * + */ +int ForwardSHTReshape(parsec_context_t *apContext, int aRank, int aVerbose, parsec_tiled_matrix_t *apFDataDesc, + parsec_tiled_matrix_t *apFLMDesc, parsec_tiled_matrix_t *apFLMTDesc, + parsec_tiled_matrix_t *apET1Desc, parsec_tiled_matrix_t *apET2Desc, + parsec_tiled_matrix_t *apEPDesc, parsec_tiled_matrix_t *apSLMNDesc, + parsec_tiled_matrix_t *apIEDesc, parsec_tiled_matrix_t *apIODesc, + parsec_tiled_matrix_t *apPDesc, parsec_tiled_matrix_t *apDDesc, + parsec_tiled_matrix_t *apADesc, int aFDataM, int aEPN, int aET1M, + int aET2M, int aPN, int aFlmTNB, int aT, int aLSize, double *apNormGlobal, + int aNT, int aUpperLower); + +/** + * @brief Computes the mean squared error between data and spatial descriptors. + * @details This function calculates the mean squared error (MSE) between two matrix descriptors, which represent + * the observed data and the spatial data for a given size of spherical harmonic basis. + * @param[in] apContext Pointer to the parsec context. + * @param[in] apFDataDesc Pointer to the matrix descriptor for the observed data. + * @param[in] apFSpatialDesc Pointer to the matrix descriptor for the spatial data. + * @param[in] aLSize Size of the spherical harmonic basis. + * @return 0 on success, negative value on error. + * + */ +int MeanSquaredError(parsec_context_t *apContext, parsec_matrix_block_cyclic_t* apFDataDesc, + parsec_matrix_block_cyclic_t* apFSpatialDesc, int aLSize); + +/** + * @brief Computes the norm of a tiled matrix. + * @details This function calculates the norm of a matrix represented by the given tiled matrix descriptor. + * The result can be computed for the entire matrix or for specific tiles based on the provided parameters. + * @param[in] apContext Pointer to the parsec context, which holds information about the execution environment. + * @param[in] apNormGlobal The global norm value to be updated. + * @param[in] apADesc Pointer to the descriptor of the tiled matrix for which the norm is being computed. + * @param[in] aNT The number of tiles in the tiled matrix. This indicates how many submatrices will be considered. + * @param[in] aIsSymmetric flag to specify if the data matrix is symmetric or not. + * @param[in] aUpperLower Specifies whether to calculate the norm for the upper or lower triangular part of the matrix. + * @return void + * + */ +void GetMatrixNorm(parsec_context_t *apContext, double *apNormGlobal, parsec_tiled_matrix_t *apADesc, + int aNT, int aUpperLower, int aIsSymmetric); + +/** + * @brief Performs inverse spherical harmonic transform. + * @details This function computes the inverse spherical harmonic transform using the provided data descriptors. + * @param[in] apContext Pointer to the parsec context. + * @param[in] apFSpatialDesc Pointer to the descriptor for the spherical harmonic coefficients. + * @param[in] apFLMTDesc Pointer to the descriptor for the transformed spherical harmonic coefficients. + * @param[in] apZLMDesc Pointer to the second auxiliary descriptor. + * @param[in] apSCDesc Pointer to the input descriptor. + * @param[in] aLSize Size of the spherical harmonic basis. + * @return 0 on success, negative value on error. + * + */ +int InverseSHT(parsec_context_t *apContext, parsec_tiled_matrix_t *apFSpatialDesc, parsec_tiled_matrix_t *apFLMDesc, + parsec_tiled_matrix_t *apZLMDesc, parsec_tiled_matrix_t *apSCDesc, int aLSize); + +/** + * @brief Compresses a matrix using a specified compression strategy and parameters. + * @details This function applies matrix compression based on the parameters provided. + * @param[in] apContex Pointer to the Parsec context, which manages task execution and dataflow. + * @param[in] apNormGlobal The global norm value to be updated. + * @param[in] aUpperLower Integer flag indicating which triangular part of the matrix to compress. + * @param[in] aBandSizeDense Band size for the dense region of the matrix in the compression process. + * @param[in] aNT Number of tiles in one dimension of the matrix. + * @param[in] aMaxRank Maximum allowable rank for the compressed matrix. Controls the degree of compression. + * @param[in] aN Total number of columns in the matrix, used to define the matrix dimensions. + * @param[in] aAdaptiveDecision Flag indicating whether to enable adaptive compression. + * @param[in] aTolerance Compression tolerance value, used to decide the accuracy of the compressed matrix. + * @param[in] aSendFullTile Flag indicating whether to transmit full tiles as part of the compression,. + * @param[in] aAutoBand Flag to enable automatic adjustment of the band size during compression. + * @param[in] aGpus Number of GPUs to utilize in the compression. + * @param[in] apHicmaData The HiCMA data struct of descriptors + * @param[in] apParamsKernel The Starsh struct of kernels + * @return void + * + */ +void MatrixCompress(parsec_context_t *apContext, double *apNormGlobal, int aUpperLower, int aBandSizeDense, int aNT, + int aMaxRank, int aN, int aAdaptiveDecision, int aTolerance, int aSendFullTile, int aAutoBand, + int aGpus, hicma_parsec_data_t *apHicmaData, starsh_params_t *apParamsKernel); \ No newline at end of file diff --git a/inst/include/utilities/EnumStringParser.hpp b/inst/include/utilities/EnumStringParser.hpp index ee7201bb..46bf13d3 100644 --- a/inst/include/utilities/EnumStringParser.hpp +++ b/inst/include/utilities/EnumStringParser.hpp @@ -14,6 +14,10 @@ #ifndef EXAGEOSTATCPP_ENUMSTRINGPARSER_HPP #define EXAGEOSTATCPP_ENUMSTRINGPARSER_HPP +#ifdef min +#undef min +#endif + #include #include diff --git a/man/get_Z_measurement_vector.Rd b/man/get_Z_measurement_vector.Rd deleted file mode 100644 index b9a456bf..00000000 --- a/man/get_Z_measurement_vector.Rd +++ /dev/null @@ -1,50 +0,0 @@ -% Copyright (c) 2017-2024 King Abdullah University of Science and Technology, -% All rights reserved. -% ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). - -% @file get_Z_measurement_vector.Rd -% @brief roxygen2 documentation for the R Interface of get_Z_measurment_vector function. -% @version 1.1.0 -% @author Mahmoud ElKarargy -% @date 2024-03-17 - -\name{get_Z_measurement_vector} -\alias{get_Z_measurement_vector} -\title{Get descriptive Z values function} - -\description{ -Retrieves descriptive Z values from ExaGeoStat data based on type. -} - -\usage{ -get_Z_measurement_vector(data,type) -} - -\arguments{ -\item{data}{A list of ExaGeoStatData that contains the locations.} -\item{type}{A string specifies the type of descriptor value to retrieve (e.g., "Chameleon", "HiCMA").} -} - -\value{ -A numeric vector of descriptive Z values. -} - -\examples{ -ncores <- 2 -ngpus <- 0 -computation <- "exact" -p <- 1 -q <- 1 -hardware <- new(Hardware, computation, ncores, ngpus, p, q) - -dimension = "3D" -problem_size <- 4 -empty_data <- new(Data, problem_size, dimension) - -dts <- 2 -kernel <- "univariate_matern_stationary" -initial_theta <- c(1,0.1,0.5) -exageostat_data <- simulate_data(kernel=kernel, initial_theta=initial_theta, -problem_size=problem_size, dts=dts, dimension=dimension) -Z <- get_Z_measurement_vector(data=exageostat_data, type="chameleon") -} diff --git a/man/get_locations.Rd b/man/get_locations.Rd deleted file mode 100644 index 42eb4153..00000000 --- a/man/get_locations.Rd +++ /dev/null @@ -1,49 +0,0 @@ -% Copyright (c) 2017-2024 King Abdullah University of Science and Technology, -% All rights reserved. -% ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). - -% @file get_locations.Rd -% @brief roxygen2 documentation for the R Interface of get_locations function for all coordinates. -% @version 1.1.0 -% @author Mahmoud ElKarargy -% @date 2024-03-17 - -\name{get_locations} -\alias{get_locations} -\title{Get Locations function} - -\description{ -Retrieves all the coordinates of locations from ExaGeoStatData object. -} - -\usage{ -get_locations(data) -} - -\arguments{ -\item{data}{A list of ExaGeoStatData that contains the locations.} -} - -\value{ -A numeric vector of locations. -} - -\examples{ -ncores <- 1 -ngpus <- 0 -computation <- "exact" -p <- 1 -q <- 1 -hardware <- new(Hardware, computation, ncores, ngpus, p, q) - -dimension = "2D" -problem_size <- 4 -empty_data <- new(Data, problem_size, dimension) - -dts <- 2 -kernel <- "univariate_matern_stationary" -initial_theta <- c(1,0.1,0.5) -exageostat_data <- simulate_data(kernel=kernel, initial_theta=initial_theta, -problem_size=problem_size, dts=dts, dimension=dimension) -locs <- get_locations(data=exageostat_data) -} diff --git a/man/model_data.Rd b/man/model_data.Rd index b49a3665..32e63a49 100644 --- a/man/model_data.Rd +++ b/man/model_data.Rd @@ -15,7 +15,7 @@ \usage{ model_data(computation = "exact", kernel, distance_matrix = "euclidean", lb, -ub, tol = 4, mle_itr, dts, lts = 0, dimension = "2D", band = 0, max_rank = 500, +ub, tol = 4, mle_itr, dts, lts = 0, dimension = "2D", band = 0, max_rank = 500, acc = 0, data = NULL, matrix = NULL, x = NULL, y = NULL, z = NULL) } @@ -54,6 +54,7 @@ data = NULL, matrix = NULL, x = NULL, y = NULL, z = NULL) \item{dimension}{A string specifies the data dimension, either "2D" or "3D". Default is "2D".} \item{band}{A numeric value Bandwidth for band matrices, applicable in certain computational kernels, Default is 0.} \item{max_rank}{A numeric value specifies the Maximum rank for low-rank approximations, Default is 500.} +\item{acc}{A numeric value specifies the accuracy for low-rank approximations, The final value is computed as 10^(-1.0 * acc). Default is 0.} \item{data}{A list of data vectors. Default is `R_NilValue`.} \item{matrix}{A matrix object. Default is `R_NilValue`.} \item{x}{A numeric vector. Default is `R_NilValue`.} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index beb9378f..d7bf24f7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,18 +11,26 @@ # @date 2024-02-04 # Add subdirectories for configurations, data-generators, data-units, and linear-algebra-solvers. -add_subdirectory(api) add_subdirectory(configurations) -add_subdirectory(data-generators) -add_subdirectory(data-loader) +add_subdirectory(kernels) +add_subdirectory(helpers) add_subdirectory(data-units) +add_subdirectory(runtime) +add_subdirectory(api) +add_subdirectory(results) add_subdirectory(hardware) -add_subdirectory(helpers) -add_subdirectory(kernels) -add_subdirectory(linear-algebra-solvers) +add_subdirectory(data-generators) +add_subdirectory(data-loader) add_subdirectory(prediction) -add_subdirectory(results) -add_subdirectory(runtime) +add_subdirectory(runtime-solver) + +# Check the value of RUNTIME_TYPE and configure chameleon/starpu accordingly +if (RUNTIME_TYPE STREQUAL "PARSEC") + add_subdirectory(data-analyzer) + add_subdirectory(data-transformer) +else() + add_subdirectory(linear-algebra-solvers) +endif () if (USE_R) add_subdirectory(Rcpp-adapters) @@ -31,7 +39,7 @@ endif () # Set the name of the library to be created. set(LIB_NAME ${PROJECT_NAME}) # Create the library with the specified source files and linking libraries. -add_library(${LIB_NAME} SHARED ${SOURCES}) +add_library(${LIB_NAME} SHARED ${SOURCES} ${JDF_GENERATED_SOURCES}) target_compile_definitions(${LIB_NAME} PUBLIC ${COMPILE_DEFINITIONS}) target_link_libraries(${LIB_NAME} ${LIBS}) diff --git a/src/Rcpp-adapters/FunctionsAdapter.cpp b/src/Rcpp-adapters/FunctionsAdapter.cpp index 99560eca..02d9f0af 100644 --- a/src/Rcpp-adapters/FunctionsAdapter.cpp +++ b/src/Rcpp-adapters/FunctionsAdapter.cpp @@ -14,6 +14,7 @@ #include #include #include +#include using namespace std; using namespace Rcpp; @@ -34,14 +35,14 @@ namespace exageostat::adapters { // manually set the configurations values with the user input from R. Configurations configurations; configurations.SetProblemSize(aProblemSize); - configurations.CheckKernelValue(aKernelName); - configurations.ParseDistanceMetric(aDistanceMatrix); + configurations.SetKernelName(validator::Validator::CheckKernelValue(aKernelName)); + configurations.SetDistanceMetric(validator::Validator::CheckDistanceMetricValue(aDistanceMatrix)); configurations.SetSeed(aSeed); configurations.SetDenseTileSize(aDenseTileSize); configurations.SetLowTileSize(aLowTileSize); configurations.SetComputation(EXACT_DENSE); configurations.SetInitialTheta(aInitialTheta); - configurations.SetDimension(Configurations::CheckDimensionValue(aDimension)); + configurations.SetDimension(validator::Validator::CheckDimensionValue(aDimension)); if (!aLogPath.empty()) { configurations.SetLogger(true); @@ -116,9 +117,10 @@ namespace exageostat::adapters { R_ExaGeoStatModelData(const string &aComputation, const string &aKernelName, const string &aDistanceMatrix, const vector &aLowerBound, const vector &aUpperBound, const int &aTolerance, const int &aMleIterations, const int &aDenseTileSize, const int &aLowTileSize, - const string &aDimension, const int &aBand, const int &aMaxRank, SEXP apData, - Nullable aMeasurementsVector, Nullable aLocationsX, - Nullable aLocationsY, Nullable aLocationsZ) { + const string &aDimension, const int &aBand, const int &aMaxRank, const int &aAccuracy, + SEXP apData, Nullable aMeasurementsVector, + Nullable aLocationsX, Nullable aLocationsY, + Nullable aLocationsZ) { Configurations configurations; bool is_initialized = ((SEXP) apData == R_NilValue); @@ -136,7 +138,7 @@ namespace exageostat::adapters { double *pMeasurementsVectorPtr = GetDataFromArguments(aMeasurementsVector, aLocationsX, aLocationsY, aLocationsZ, data, configurations, aKernelName, aDistanceMatrix, aDenseTileSize, aLowTileSize, aDimension, - Configurations::CheckComputationValue(aComputation)); + validator::Validator::CheckComputationValue(aComputation)); configurations.SetLowerBounds(aLowerBound); configurations.SetUpperBounds(aUpperBound); @@ -144,6 +146,7 @@ namespace exageostat::adapters { configurations.SetTolerance(aTolerance); configurations.SetBand(aBand); configurations.SetMaxRank(aMaxRank); + configurations.SetAccuracy(aAccuracy); ExaGeoStat::ExaGeoStatDataModeling(configurations, data, pMeasurementsVectorPtr); // Take back ownership, to avoid deleting apData when the unique_ptr goes out of scope. @@ -275,11 +278,11 @@ namespace exageostat::adapters { } // Set common configurations. aConfigurations.SetComputation(aComputation); - aConfigurations.CheckKernelValue(aKernelName); - aConfigurations.ParseDistanceMetric(aDistanceMatrix); + aConfigurations.SetKernelName(validator::Validator::CheckKernelValue(aKernelName)); + aConfigurations.SetDistanceMetric(validator::Validator::CheckDistanceMetricValue(aDistanceMatrix)); aConfigurations.SetDenseTileSize(aDenseTileSize); aConfigurations.SetLowTileSize(aLowTileSize); - aConfigurations.SetDimension(Configurations::CheckDimensionValue(aDimension)); + aConfigurations.SetDimension(validator::Validator::CheckDimensionValue(aDimension)); aConfigurations.SetProblemSize(aData->GetLocations()->GetSize()); return pMeasurementsVectorPtr; } @@ -352,11 +355,11 @@ namespace exageostat::adapters { } // Set common configurations. - aConfigurations.CheckKernelValue(aKernelName); - aConfigurations.ParseDistanceMetric(aDistanceMatrix); + aConfigurations.SetKernelName(validator::Validator::CheckKernelValue(aKernelName)); + aConfigurations.SetDistanceMetric(validator::Validator::CheckDistanceMetricValue(aDistanceMatrix)); aConfigurations.SetDenseTileSize(aDenseTileSize); aConfigurations.SetLowTileSize(aLowTileSize); - aConfigurations.SetDimension(Configurations::CheckDimensionValue(aDimension)); + aConfigurations.SetDimension(validator::Validator::CheckDimensionValue(aDimension)); aConfigurations.SetProblemSize(data->GetLocations()->GetSize()); aConfigurations.SetEstimatedTheta(aEstimatedTheta); diff --git a/src/Rcpp-adapters/RcppModules.cpp b/src/Rcpp-adapters/RcppModules.cpp index 17c9036d..55eb19ff 100644 --- a/src/Rcpp-adapters/RcppModules.cpp +++ b/src/Rcpp-adapters/RcppModules.cpp @@ -47,7 +47,7 @@ RCPP_MODULE(ExaGeoStatCPP) { function("model_data", &exageostat::adapters::R_ExaGeoStatModelData, List::create(_["computation"] = "exact", _["kernel"], _["distance_matrix"] = "euclidean", _["lb"], _["ub"], _["tol"] = 4, _["mle_itr"], _["dts"], _["lts"] = 0, _["dimension"] = "2D", _["band"] = 0, - _["max_rank"] = 500, _["data"] = R_NilValue, _["matrix"] = R_NilValue, _["x"] = R_NilValue, + _["max_rank"] = 500, _["acc"] = 0, _["data"] = R_NilValue, _["matrix"] = R_NilValue, _["x"] = R_NilValue, _["y"] = R_NilValue, _["z"] = R_NilValue)); function("predict_data", &exageostat::adapters::R_ExaGeoStatPredictData, diff --git a/src/api/ExaGeoStat.cpp b/src/api/ExaGeoStat.cpp index e1f1b1d1..91cc1712 100644 --- a/src/api/ExaGeoStat.cpp +++ b/src/api/ExaGeoStat.cpp @@ -13,32 +13,70 @@ #include #include -#include #include +#include +#include +#if !DEFAULT_RUNTIME +#include +#endif using namespace std; -using namespace nlopt; using namespace exageostat::api; using namespace exageostat::generators; using namespace exageostat::dataunits; using namespace exageostat::configurations; +using namespace exageostat::generators::stagezero; + + +template +void ExaGeoStat::ExaGeoStatGenerateMeanTrendData( + configurations::Configurations &aConfigurations, + std::unique_ptr > &aData) { + + int seed = 0; + std::srand(seed); + aConfigurations.PrintSummary(); + LOGGER("** ExaGeoStat stage zero data generation **") + // Register and create a kernel object + kernels::Kernel *pKernel = plugins::PluginRegistry>::Create(aConfigurations.GetKernelName(), + aConfigurations.GetTimeSlot()); + + // Create a unique pointer to a DataGenerator object + // Automatically select PaRSEC version when PaRSEC runtime is enabled + unique_ptr> data_generator; +#if DEFAULT_RUNTIME + // Use StarPU/CHAMELEON version + data_generator = unique_ptr>(StageZeroGenerator::GetInstance()); + LOGGER("Using StageZeroGenerator (StarPU/CHAMELEON runtime)") +#else + // Use PaRSEC version + data_generator = unique_ptr>(StageZeroGeneratorParsec::GetInstance()); + LOGGER("Using StageZeroGeneratorParsec (PaRSEC runtime)") +#endif + + aData = data_generator->CreateData(aConfigurations, *pKernel); + delete pKernel; + LOGGER("\t*Data generation finished*") + +} template void ExaGeoStat::ExaGeoStatLoadData(Configurations &aConfigurations, std::unique_ptr> &aData) { + int seed = 0; + std::srand(seed); aConfigurations.PrintSummary(); LOGGER("** ExaGeoStat data generation/loading **") // Register and create a kernel object kernels::Kernel *pKernel = plugins::PluginRegistry>::Create(aConfigurations.GetKernelName(), aConfigurations.GetTimeSlot()); - // Add the data generation arguments. - aConfigurations.InitializeDataGenerationArguments(); // Create a unique pointer to a DataGenerator object unique_ptr> data_generator = DataGenerator::CreateGenerator(aConfigurations); aData = data_generator->CreateData(aConfigurations, *pKernel); delete pKernel; LOGGER("\t*Data generation/loading finished*") + } template @@ -50,57 +88,14 @@ T ExaGeoStat::ExaGeoStatDataModeling(Configurations &aConfigurations, std::un // Register and create a kernel object kernels::Kernel *pKernel = plugins::PluginRegistry>::Create(aConfigurations.GetKernelName(), aConfigurations.GetTimeSlot()); - // Add the data modeling arguments. - aConfigurations.InitializeDataModelingArguments(); - - int parameters_number = pKernel->GetParametersNumbers(); - int max_number_of_iterations = aConfigurations.GetMaxMleIterations(); - // Setting struct of data to pass to the modeling. - auto modeling_data = new mModelingData(aData, aConfigurations, *apMeasurementsMatrix, *pKernel); - // Create nlopt - double opt_f; - opt optimizing_function(nlopt::LN_BOBYQA, parameters_number); - // Initialize problem's bound. - optimizing_function.set_lower_bounds(aConfigurations.GetLowerBounds()); - optimizing_function.set_upper_bounds(aConfigurations.GetUpperBounds()); - optimizing_function.set_ftol_abs(aConfigurations.GetTolerance()); - // Set max iterations value. - optimizing_function.set_maxeval(max_number_of_iterations); - optimizing_function.set_max_objective(ExaGeoStatMLETileAPI, (void *) modeling_data); - // Optimize mle using nlopt. - optimizing_function.optimize(aConfigurations.GetStartingTheta(), opt_f); - aConfigurations.SetEstimatedTheta(aConfigurations.GetStartingTheta()); - - auto theta = aConfigurations.GetStartingTheta(); - - LOGGER("--> Final Theta Values (", true) - for (int i = 0; i < parameters_number; i++) { - LOGGER_PRECISION(theta[i]) - if (i != parameters_number - 1) { - LOGGER_PRECISION(", ") - } - } - LOGGER_PRECISION(")") - LOGGER("") - - delete pKernel; - delete modeling_data; - return optimizing_function.last_optimum_value(); -} - -template -double -ExaGeoStat::ExaGeoStatMLETileAPI(const std::vector &aTheta, std::vector &aGrad, void *apInfo) { - - auto config = ((mModelingData *) apInfo)->mpConfiguration; - auto data = ((mModelingData *) apInfo)->mpData; - auto measurements = ((mModelingData *) apInfo)->mpMeasurementsMatrix; - auto kernel = ((mModelingData *) apInfo)->mpKernel; + // Initialize all theta: starting, estimated, lower and upper bounds. + aConfigurations.InitializeAllTheta(); // We do Date Modeling with any computation. - auto linear_algebra_solver = linearAlgebra::LinearAlgebraFactory::CreateLinearAlgebraSolver( - config->GetComputation()); - return linear_algebra_solver->ExaGeoStatMLETile(*data, *config, aTheta.data(), measurements, *kernel); + auto runtime_solver = runtimesolver::RuntimeSolverFactory::CreateRuntimeSolver(); + T result = runtime_solver->ModelingOperations(aData, aConfigurations, apMeasurementsMatrix, *pKernel); + delete pKernel; + return result; } @@ -114,9 +109,10 @@ void ExaGeoStat::ExaGeoStatPrediction(Configurations &aConfigurations, std::u // Register and create a kernel object kernels::Kernel *pKernel = plugins::PluginRegistry>::Create(aConfigurations.GetKernelName(), aConfigurations.GetTimeSlot()); - // Add the data prediction arguments. - aConfigurations.InitializeDataPredictionArguments(); + // Initialize all theta: starting, estimated, lower and upper bounds. + aConfigurations.InitializeAllTheta(); prediction::Prediction::PredictMissingData(aData, aConfigurations, apMeasurementsMatrix, *pKernel, apTrainLocations, apTestLocations); delete pKernel; -} \ No newline at end of file +} + diff --git a/src/configurations/CMakeLists.txt b/src/configurations/CMakeLists.txt index 3575e1a3..02ffb480 100644 --- a/src/configurations/CMakeLists.txt +++ b/src/configurations/CMakeLists.txt @@ -13,6 +13,8 @@ # Add the Configurations.cpp file to the list of source files. set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/Configurations.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/Parser.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/Validator.cpp ${SOURCES} PARENT_SCOPE ) \ No newline at end of file diff --git a/src/configurations/Configurations.cpp b/src/configurations/Configurations.cpp index 8da6a38f..52f40044 100644 --- a/src/configurations/Configurations.cpp +++ b/src/configurations/Configurations.cpp @@ -13,12 +13,16 @@ **/ #include -#include +#include +#include #include +#include using namespace std; using namespace exageostat::configurations; +using namespace exageostat::configurations::parser; +using namespace exageostat::configurations::validator; using namespace exageostat::common; Verbose Configurations::mVerbosity = Verbose::STANDARD_MODE; @@ -28,46 +32,25 @@ bool Configurations::mFirstInit = false; Configurations::Configurations() { - // Set default values for arguments! - SetComputation(EXACT_DENSE); - SetCoresNumber(1); - SetGPUsNumbers(0); - SetPGrid(1); - SetQGrid(1); - SetMaxRank(1); - SetIsOOC(false); - SetKernelName(""); - SetDimension(Dimension2D); - SetTimeSlot(1); - SetProblemSize(0); - SetDenseTileSize(0); -#ifdef USE_HICMA - SetLowTileSize(0); -#endif - SetBand(0); - SetLoggerPath(""); - SetIsSynthetic(true); + // Set default values from the json file + Parser::ParseJSON(DEFAULT_CONFIGURATION_PATH, this->mDictionary); + Validator::Validate(this->mDictionary); + + // initialize thetas with empty vectors vector theta; SetInitialTheta(theta); SetLowerBounds(theta); SetUpperBounds(theta); SetEstimatedTheta(theta); - SetSeed(0); - SetLogger(false); - SetUnknownObservationsNb(0); - SetApproximationMode(1); - SetActualObservationsFilePath(""); - SetRecoveryFile(""); - SetPrecision(DOUBLE); - SetIsMSPE(false); - SetIsFisher(false); - SetIsIDW(false); - SetIsMLOEMMOM(false); - SetDataPath(""); - SetDistanceMetric(EUCLIDEAN_DISTANCE); - SetAccuracy(0); - SetIsNonGaussian(false); mIsThetaInit = false; + +#if !DEFAULT_RUNTIME + // Set default values for Hicma-Parsec params + SetTolerance(0); + //TODO:currently,we support real data only in parsec.In the future,we should support synthetic and real data for both runtimes + SetIsSynthetic(false); + SetStageZero(false); +#endif } void Configurations::InitializeArguments(const int &aArgC, char **apArgV, const bool &aEnableR) { @@ -76,141 +59,102 @@ void Configurations::InitializeArguments(const int &aArgC, char **apArgV, const this->mpArgV = apArgV; mHeapAllocated = aEnableR; - // Get the example name - string example_name = apArgV[0]; - // Remove the './' - example_name.erase(0, 2); - string argument; - string argument_name; - string argument_value; - int equal_sign_Idx; - - // Loop through the arguments - for (int i = 1; i < aArgC; ++i) { - argument = apArgV[i]; - equal_sign_Idx = static_cast(argument.find('=')); - argument_name = argument.substr(0, equal_sign_Idx); - - // Check if argument has an equal sign. - if (equal_sign_Idx != string::npos) { - argument_value = argument.substr(equal_sign_Idx + 1); - - // Check the argument name and set the corresponding value - if (argument_name == "--N" || argument_name == "--n") { - SetProblemSize(CheckNumericalValue(argument_value)); - } else if (argument_name == "--Kernel" || argument_name == "--kernel") { - CheckKernelValue(argument_value); - } else if (argument_name == "--P" || argument_name == "--p") { - SetPGrid(max(CheckNumericalValue(argument_value), GetPGrid())); - } else if (argument_name == "--Q" || argument_name == "--q") { - SetQGrid(max(CheckNumericalValue(argument_value), GetQGrid())); - } else if (argument_name == "--Dimension" || argument_name == "--dimension" || argument_name == "--dim" || - argument_name == "--Dim") { - SetDimension(CheckDimensionValue(argument_value)); - } else if (argument_name == "--TimeSlot" || argument_name == "--timeslot" || - argument_name == "--time_slot") { - SetTimeSlot(CheckNumericalValue(argument_value)); - } else if (argument_name == "--Computation" || argument_name == "--computation") { - SetComputation(CheckComputationValue(argument_value)); - } else if (argument_name == "--precision" || argument_name == "--Precision") { - SetPrecision(CheckPrecisionValue(argument_value)); - } else if (argument_name == "--cores" || argument_name == "--coresNumber" || - argument_name == "--cores_number" || argument_name == "--ncores") { - SetCoresNumber(CheckNumericalValue(argument_value)); - } else if (argument_name == "--gpus" || argument_name == "--GPUsNumbers" || - argument_name == "--gpu_number" || argument_name == "--ngpus") { - SetGPUsNumbers(CheckNumericalValue(argument_value)); - } else if (argument_name == "--DTS" || argument_name == "--dts" || argument_name == "--Dts") { - SetDenseTileSize(CheckNumericalValue(argument_value)); - } else if (argument_name == "--LTS" || argument_name == "--lts" || argument_name == "--Lts") { - SetLowTileSize(CheckNumericalValue(argument_value)); - } else if (argument_name == "--maxRank" || argument_name == "--maxrank" || argument_name == "--max_rank") { - SetMaxRank(CheckNumericalValue(argument_value)); - } else if (argument_name == "--initial_theta" || argument_name == "--itheta" || - argument_name == "--iTheta") { - vector theta = ParseTheta(argument_value); - SetInitialTheta(theta); - } else if (argument_name == "--lb" || argument_name == "--olb" || argument_name == "--lower_bounds") { - vector theta = ParseTheta(argument_value); - SetLowerBounds(theta); - SetStartingTheta(theta); - } else if (argument_name == "--ub" || argument_name == "--oub" || argument_name == "--upper_bounds") { - vector theta = ParseTheta(argument_value); - SetUpperBounds(theta); - } else if (argument_name == "--estimated_theta" || argument_name == "--etheta" || - argument_name == "--eTheta") { - vector theta = ParseTheta(argument_value); - SetEstimatedTheta(theta); - } else if (argument_name == "--ObservationsFile" || argument_name == "--observationsfile" || - argument_name == "--observations_file") { - SetActualObservationsFilePath(argument_value); - } else if (argument_name == "--Seed" || argument_name == "--seed") { - SetSeed(CheckNumericalValue(argument_value)); - } else if (argument_name == "--verbose" || argument_name == "--Verbose") { - ParseVerbose(argument_value); - } else if (argument_name == "--distance_metric" || argument_name == "--distanceMetric") { - ParseDistanceMetric(argument_value); - } else if (argument_name == "--logpath" || argument_name == "--log_path" || argument_name == "--logPath") { - SetLoggerPath(argument_value); - } else { - if (!(argument_name == "--ZmissNumber" || argument_name == "--Zmiss" || - argument_name == "--ZMiss" || argument_name == "--predict" || argument_name == "--Predict" || - argument_name == "--iterations" || - argument_name == "--Iterations" || argument_name == "--max_mle_iterations" || - argument_name == "--maxMleIterations" || argument_name == "--opt_iters" || - argument_name == "--tolerance" || argument_name == "--opt_tol" || - argument_name == "--distanceMetric" || argument_name == "--distance_metric" || - argument_name == "--log_file_name" || argument_name == "--logFileName" || - argument_name == "--Band" || argument_name == "--band" || - argument_name == "--DataPath" || argument_name == "--dataPath" || - argument_name == "--data_path" || - argument_name == "--acc" || argument_name == "--Acc")) { - LOGGER("!! " << argument_name << " !!") - throw invalid_argument( - "This argument is undefined, Please use --help to print all available arguments"); - } - } - } else { - if (argument_name == "--help") { - PrintUsage(); - } - if (argument_name == "--OOC" || argument_name == "--ooc") { - SetIsOOC(true); - } else if (argument_name == "--ApproximationMode" || argument_name == "--approximationmode" || - argument_name == "--approximation_mode") { - SetApproximationMode(true); - } else if (argument_name == "--log" || argument_name == "--Log") { - SetLogger(true); - } else { - if (!(argument_name == "--mspe" || argument_name == "--MSPE" || - argument_name == "--idw" || argument_name == "--IDW" || - argument_name == "--mloe-mmom" || argument_name == "--mloe_mmom" || - argument_name == "--fisher" || argument_name == "--Fisher")) { - LOGGER("!! " << argument_name << " !!") - throw invalid_argument( - "This argument is undefined, Please use --help to print all available arguments"); - } - } - } - } + // the CLI arguments overwrite the arguments of the constructor + Parser::ParseCLI(aArgC, apArgV, this->mDictionary); + Validator::Validate(this->mDictionary); + ValidateConfiguration(); +} + +void Configurations::ValidateConfiguration() { // Throw Errors if any of these arguments aren't given by the user. if (GetProblemSize() == 0 && GetIsSynthetic()) { throw domain_error("You need to set the problem size, before starting"); } + if (GetDenseTileSize() == 0) { throw domain_error("You need to set the Dense tile size, before starting"); } + + if (!GetDataPath().empty()) { + SetIsSynthetic(false); + } + + if (GetIsMSPE() || GetIsMLOEMMOM() || GetIsIDW()) { + if (GetUnknownObservationsNb() <= 1) { + throw domain_error( + "You need to set ZMiss number, as the number of missing values should be bigger than one"); + } + } + + if (!GetLoggerPath().empty() && !GetLogger()) { + throw domain_error("To enable logging, please utilize the '--log' option in order to specify a log file."); + } + + if (GetUnknownObservationsNb() >= GetProblemSize()) { + throw range_error("Invalid value for ZmissNumber. Please make sure it's smaller than Problem size"); + } + + if (GetComputation() == DIAGONAL_APPROX) { + if (GetBand() == 0) { + throw domain_error("You need to set the tile band thickness, before starting"); + } + } + + if (GetStageZero()) { + if (GetResultsPath().empty()) { + throw domain_error("You need to set the results path (--resultspath) before starting"); + } + } + +#if DEFAULT_RUNTIME // Throw Errors if any of these arguments aren't given by the user. if (GetKernelName().empty()) { throw domain_error("You need to set the Kernel, before starting"); } + if (GetMaxRank() == -1) { + SetMaxRank(1); + } +//#else + if(GetMaxRank() == -1){ + SetMaxRank(GetDenseTileSize() / 2); + } + if (mDictionary.find("tolerance") == mDictionary.end()) { + SetTolerance(8); + } + if (GetDataPath().empty()) { + throw domain_error("You need to set the data path, before starting"); + } +#else + if(GetStageZero() && GetKernelName().empty()){ + throw domain_error("You need to set the Kernel for stage 0, before starting"); + } +#endif size_t found = GetKernelName().find("NonGaussian"); // Check if the substring was found if (found != std::string::npos) { SetIsNonGaussian(true); } + + if (GetDimension() != DimensionST) { + if (GetTimeSlot() != 1) { +#if DEFAULT_RUNTIME + throw std::runtime_error("Time Slot can only be greater than 1 if the dimensions are set to SpaceTime."); +#endif + } + } else if (GetTimeSlot() < 1) { + throw std::runtime_error("Time Slot must be at least 1 if the dimensions are set to SpaceTime."); + } + + + if (GetComputation() == TILE_LOW_RANK) { +#ifdef USE_HICMA + if (GetLowTileSize() == 0) { + throw domain_error("You need to set the Low tile size, before starting"); + } +#endif + } } void Configurations::InitializeAllTheta() { @@ -246,145 +190,8 @@ void Configurations::InitializeAllTheta() { } } -void Configurations::InitializeDataGenerationArguments() { - - this->InitializeAllTheta(); - string argument; - string argument_name; - string argument_value; - int equal_sign_Idx; - - // Loop through the arguments that are specific for data generation. - for (int i = 1; i < this->mArgC; ++i) { - argument = this->mpArgV[i]; - equal_sign_Idx = static_cast(argument.find('=')); - argument_name = argument.substr(0, equal_sign_Idx); - - // Check if argument has an equal sign. - if (equal_sign_Idx != string::npos) { - argument_value = argument.substr(equal_sign_Idx + 1); - - // Check the argument name and set the corresponding value - if (argument_name == "--DataPath" || argument_name == "--dataPath" || - argument_name == "--data_path") { - SetDataPath(argument_value); - SetIsSynthetic(false); - } - } - } - if (GetDimension() != DimensionST) { - if (GetTimeSlot() != 1) { - throw std::runtime_error("Time Slot can only be greater than 1 if the dimensions are set to SpaceTime."); - } - } else if (GetTimeSlot() < 1) { - throw std::runtime_error("Time Slot must be at least 1 if the dimensions are set to SpaceTime."); - } -} - -void Configurations::InitializeDataModelingArguments() { - - this->InitializeAllTheta(); - string argument; - string argument_name; - string argument_value; - int equal_sign_Idx; - - // Loop through the arguments that are specific for data modeling. - for (int i = 1; i < this->mArgC; ++i) { - argument = this->mpArgV[i]; - equal_sign_Idx = static_cast(argument.find('=')); - argument_name = argument.substr(0, equal_sign_Idx); - - // Check if argument has an equal sign. - if (equal_sign_Idx != string::npos) { - argument_value = argument.substr(equal_sign_Idx + 1); - - // Check the argument name and set the corresponding value - if (argument_name == "--max_mle_iterations" || argument_name == "--maxMleIterations" || - argument_name == "--opt_iters") { - SetMaxMleIterations(CheckNumericalValue(argument_value)); - } else if (argument_name == "--tolerance" || argument_name == "--opt_tol") { - SetTolerance(CheckNumericalValue(argument_value)); - } else if (argument_name == "--Band" || argument_name == "--band") { - SetBand(CheckNumericalValue(argument_value)); - } else if (argument_name == "--acc" || argument_name == "--Acc") { - SetAccuracy(CheckNumericalValue(argument_value)); - } else if (argument_name == "--log_file_name" || argument_name == "--logFileName") { - if (!GetLogger()) { - throw domain_error( - "To enable logging, please utilize the '--log' option in order to specify a log file."); - } - SetFileLogName(argument_value); - } - } - } - if (GetComputation() == DIAGONAL_APPROX) { - if (GetBand() == 0) { - throw domain_error("You need to set the tile band thickness, before starting"); - } - } - if (GetComputation() == TILE_LOW_RANK) { -#ifdef USE_HICMA - if (GetLowTileSize() == 0) { - throw domain_error("You need to set the Low tile size, before starting"); - } -#endif - } -} - -void Configurations::InitializeDataPredictionArguments() { - - this->InitializeAllTheta(); - string argument; - string argument_name; - string argument_value; - int equal_sign_Idx; - - for (int i = 1; i < this->mArgC; ++i) { - argument = this->mpArgV[i]; - equal_sign_Idx = static_cast(argument.find('=')); - argument_name = argument.substr(0, equal_sign_Idx); - if (equal_sign_Idx != string::npos) { - argument_value = argument.substr(equal_sign_Idx + 1); - if (argument_name == "--ZmissNumber" || argument_name == "--Zmiss" || argument_name == "--ZMiss" || - argument_name == "--predict" || argument_name == "--Predict") { - SetUnknownObservationsNb(CheckUnknownObservationsValue(argument_value)); - } - } - } - - // Loop through the arguments that are specific for Prediction. - for (int i = 1; i < this->mArgC; ++i) { - argument = this->mpArgV[i]; - equal_sign_Idx = static_cast(argument.find('=')); - argument_name = argument.substr(0, equal_sign_Idx); - - if (argument_name == "--mspe" || argument_name == "--MSPE") { - if (GetUnknownObservationsNb() <= 1) { - throw domain_error( - "You need to set ZMiss number, as the number of missing values should be bigger than one"); - } - SetIsMSPE(true); - } else if (argument_name == "--idw" || argument_name == "--IDW") { - if (GetUnknownObservationsNb() <= 1) { - throw domain_error( - "You need to set ZMiss number, as the number of missing values should be bigger than one"); - } - SetIsIDW(true); - } else if (argument_name == "--mloe-mmom" || argument_name == "--MLOE_MMOM" || argument_name == "--mloe_mmom") { - if (GetUnknownObservationsNb() <= 1) { - throw domain_error( - "You need to set ZMiss number, as the number of missing values should be bigger than one"); - } - SetIsMLOEMMOM(true); - } else if (argument_name == "--Fisher" || argument_name == "--fisher") { - //Fisher can be performed without zmiss. - SetIsFisher(true); - } - } -} - void Configurations::PrintUsage() { + LOGGER("\n\t*** Available Arguments For ExaGeoStat Configurations ***") LOGGER("--N=value : Problem size.") LOGGER("--kernel=value : Used Kernel.") @@ -402,10 +209,8 @@ void Configurations::PrintUsage() { LOGGER("--Zmiss=value : Used to set number of unknown observation to be predicted.") LOGGER("--observations_file=PATH/TO/File : Used to pass the observations file path.") LOGGER("--max_rank=value : Used to the max rank value.") - LOGGER("--olb=value : Lower bounds for optimization.") - LOGGER("--oub=value : Upper bounds for optimization.") - LOGGER("--itheta=value : Initial theta parameters for optimization.") - LOGGER("--etheta=value : Estimated kernel parameters for optimization.") + LOGGER("--initial_theta=value : Initial theta parameters for optimization.") + LOGGER("--estimated_theta=value : Estimated kernel parameters for optimization.") LOGGER("--seed=value : Seed value for random number generation.") LOGGER("--verbose=value : Run mode whether quiet/standard/detailed.") LOGGER("--log_path=value : Path to log file.") @@ -420,7 +225,18 @@ void Configurations::PrintUsage() { LOGGER("--OOC : Used to enable Out of core technology.") LOGGER("--approximation_mode : Used to enable Approximation mode.") LOGGER("--log : Enable logging.") - LOGGER("--acc : Used to set the accuracy when using tlr.") + LOGGER("--accuracy : Used to set the accuracy when using tlr.") + LOGGER("--band_dense=value : Used to set the dense band double precision, Used with PaRSEC runtime only.") + LOGGER("--objects_number=value : Used to set the number of objects (number of viruses within a population), Used with PaRSEC runtime only.") + LOGGER("--adaptive_decision=value : Used to set the adaptive decision of each tile's format using norm approach, if enabled, otherwise 0, Used with PaRSEC runtime only.") + LOGGER("--add_diagonal=value : Used to add this number to diagonal elements to make the matrix positive definite in electrodynamics problem, Used with PaRSEC runtime only.") + LOGGER("--file_time_slot=value : Used to set time slot per file, Used with PaRSEC runtime only.") + LOGGER("--file_number=value : Used to set file number, Used with PaRSEC runtime only.") + LOGGER("--enable-inverse : Used to enable inverse spherical harmonics transform, Used with PaRSEC runtime only.") + LOGGER("--mpiio : Used to enable MPI IO, Used with PaRSEC runtime only.") + LOGGER("--log-file-path: Used to set path of file where events and results are logged.") + LOGGER("--start-year=value : Used to set the starting year for NetCDF data processing (StageZero).") + LOGGER("--end-year=value : Used to set the ending year for NetCDF data processing (StageZero).") LOGGER("\n\n") exit(0); @@ -434,178 +250,6 @@ void Configurations::SetVerbosity(const Verbose &aVerbose) { Configurations::mVerbosity = aVerbose; } -int Configurations::CheckNumericalValue(const string &aValue) { - - int numericalValue; - try { - numericalValue = stoi(aValue); - } - catch (...) { - throw range_error("Invalid value. Please use Numerical values only."); - } - - if (numericalValue < 0) { - throw range_error("Invalid value. Please use positive values"); - } - return numericalValue; -} - -Computation Configurations::CheckComputationValue(const std::string &aValue) { - - if (aValue != "exact" and aValue != "Exact" and aValue != "Dense" and aValue != "dense" and - aValue != "diag_approx" and aValue != "diagonal_approx" and aValue != "lr_approx" and aValue != "tlr" and - aValue != "TLR") { - throw range_error("Invalid value for Computation. Please use Exact, diagonal_approx or TLR."); - } - if (aValue == "exact" or aValue == "Exact" or aValue == "Dense" or aValue == "dense") { - return EXACT_DENSE; - } else if (aValue == "diag_approx" or aValue == "diagonal_approx") { - return DIAGONAL_APPROX; - } - return TILE_LOW_RANK; -} - -Precision Configurations::CheckPrecisionValue(const std::string &aValue) { - - if (aValue != "single" and aValue != "Single" and aValue != "double" and aValue != "Double" and aValue != "mix" and - aValue != "Mix" and aValue != "Mixed" and aValue != "mixed") { - throw range_error("Invalid value for Computation. Please use Single, Double or Mixed."); - } - if (aValue == "single" or aValue == "Single") { - return SINGLE; - } else if (aValue == "double" or aValue == "Double") { - return DOUBLE; - } - return MIXED; -} - -void Configurations::ParseVerbose(const std::string &aVerbosity) { - if (aVerbosity == "quiet" || aVerbosity == "Quiet") { - mVerbosity = Verbose::QUIET_MODE; - } else if (aVerbosity == "standard" || aVerbosity == "Standard") { - mVerbosity = Verbose::STANDARD_MODE; - } else if (aVerbosity == "detailed" || aVerbosity == "Detailed" || aVerbosity == "detail") { - mVerbosity = Verbose::DETAILED_MODE; - } else { - LOGGER("Error: " << aVerbosity << " is not valid ") - throw range_error("Invalid value. Please use verbose or standard values only."); - } -} - - -void Configurations::CheckKernelValue(const string &aKernel) { - - // Check if the kernel name exists in the availableKernels set. - if (availableKernels.count(aKernel) <= 0) { - throw range_error("Invalid value for Kernel. Please check manual."); - } - // Check if the string is already in CamelCase format - if (IsCamelCase(aKernel)) { - this->SetKernelName(aKernel); - return; - } - string str = aKernel; - // Replace underscores with spaces and split the string into words - std::replace(str.begin(), str.end(), '_', ' '); - std::istringstream iss(str); - std::string word, result; - while (iss >> word) { - // Capitalize the first letter of each word and append it to the result - word[0] = static_cast(toupper(word[0])); - result += word; - } - this->SetKernelName(result); -} - -bool Configurations::IsCamelCase(const std::string &aString) { - // If the string contains an underscore, it is not in CamelCase format - if (aString.find('_') != std::string::npos) { - return false; - } - // If the string starts with a lowercase letter, it is not in CamelCase format - if (islower(aString[0])) { - return false; - } - // If none of the above conditions hold, the string is in CamelCase format - return true; -} - -vector Configurations::ParseTheta(const std::string &aInputValues) { - // Count the number of values in the string - int num_values = 1; - for (char aInputValue: aInputValues) { - if (aInputValue == ':') { - num_values++; - } - } - // Allocate memory for the array of doubles - vector theta; - - // Split the string into tokens using strtok() - const char *delim = ":"; - char *token = strtok((char *) aInputValues.c_str(), delim); - int i = 0; - while (token != nullptr) { - // Check if the token is a valid double or "?" - if (!strcmp(token, "?")) { - theta.push_back(-1); - } else { - try { - theta.push_back(stod(token)); - } - catch (...) { - LOGGER("Error: " << token << " is not a valid double or '?' ") - throw range_error("Invalid value. Please use Numerical values only."); - } - } - - // Get the next token - token = strtok(nullptr, delim); - i++; - } - - // Check if the number of values in the array is correct - if (i != num_values) { - throw range_error( - "Error: the number of values in the input string is invalid, please use this example format as a reference 1:?:0.1"); - } - - return theta; -} - -Dimension Configurations::CheckDimensionValue(const string &aDimension) { - - if (aDimension != "2D" and aDimension != "2d" and aDimension != "3D" and aDimension != "3d" and - aDimension != "st" and aDimension != "ST") { - throw range_error("Invalid value for Dimension. Please use 2D, 3D or ST."); - } - if (aDimension == "2D" or aDimension == "2d") { - return Dimension2D; - } else if (aDimension == "3D" or aDimension == "3d") { - return Dimension3D; - } - return DimensionST; -} - -int Configurations::CheckUnknownObservationsValue(const string &aValue) { - int value = CheckNumericalValue(aValue); - if (value >= GetProblemSize()) { - throw range_error("Invalid value for ZmissNumber. Please make sure it's smaller than Problem size"); - } - return value; -} - -void Configurations::ParseDistanceMetric(const std::string &aDistanceMetric) { - if (aDistanceMetric == "eg" || aDistanceMetric == "EG" || aDistanceMetric == "euclidean") { - SetDistanceMetric(EUCLIDEAN_DISTANCE); - } else if (aDistanceMetric == "gcd" || aDistanceMetric == "GCD" || aDistanceMetric == "great_circle") { - SetDistanceMetric(GREAT_CIRCLE_DISTANCE); - } else { - throw range_error("Invalid value. Please use eg or gcd values only."); - } -} - - void Configurations::InitTheta(vector &aTheta, const int &size) { // If null, this mean user have not passed the values arguments, Make values equal -1 @@ -623,13 +267,14 @@ void Configurations::InitTheta(vector &aTheta, const int &size) { void Configurations::PrintSummary() { -#ifndef USING_R - Verbose temp = this->GetVerbosity(); +#ifndef USE_R + Verbose temp = Configurations::GetVerbosity(); mVerbosity = STANDARD_MODE; if (!mFirstInit) { LOGGER("********************SUMMARY**********************") +#if DEFAULT_RUNTIME if (this->GetIsSynthetic()) { LOGGER("#Synthetic Data generation") } else { @@ -674,6 +319,16 @@ void Configurations::PrintSummary() { if (this->GetIsOOC()) { LOGGER("#Out Of Core (OOC) technology is enabled") } +#else + LOGGER("#L: " << this->GetDenseTileSize()) + LOGGER("#T: " << this->GetTimeSlot()) + LOGGER("#NB: " << this->GetDenseTileSize()) + LOGGER("#gpus: " << this->GetGPUsNumbers()) + LOGGER("#Nodes: " << this->GetCoresNumber()) + LOGGER("#Time slot per file: " << GetTimeSlotPerFile()) + LOGGER("#Number of files: " << this->GetFileNumber()) + LOGGER("#File per node: " << ((this->GetFileNumber()%this->GetCoresNumber())? this->GetFileNumber()/this->GetCoresNumber()+1 : this->GetFileNumber()/this->GetCoresNumber())) +#endif LOGGER("*************************************************") mFirstInit = true; } @@ -698,5 +353,5 @@ Configurations::~Configurations() { } void Configurations::SetTolerance(double aTolerance) { - mDictionary["Tolerance"] = pow(10, -1 * aTolerance); + mDictionary["tolerance"] = pow(10, -1 * aTolerance); } diff --git a/src/configurations/Parser.cpp b/src/configurations/Parser.cpp new file mode 100644 index 00000000..7f8687b3 --- /dev/null +++ b/src/configurations/Parser.cpp @@ -0,0 +1,81 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** +* @file Parser.cpp +* @version 1.1.0 +* @brief Contains the implementation of the Parser class and its member functions for configuration parsing. +* @details Provides static methods to parse command-line arguments and JSON configuration files, as well as utility functions for string transformations. +* @author Mahmoud ElKarargy +* @date 2024-12-11 +**/ + +#include +#include + +#include + +using namespace std; +using namespace exageostat::configurations::parser; + +void Parser::ParseCLI(const int &aArgC, char **apArgV, unordered_map &apConfigurationMap) { + + string example_name = apArgV[0]; + // Remove the './' + example_name.erase(0, 2); + string argument; + string argument_name; + string argument_value; + int equal_sign_Idx; + + for (int i = 1; i < aArgC; ++i) { + argument = apArgV[i]; + argument = argument.substr(2); + equal_sign_Idx = static_cast(argument.find('=')); + argument_name = argument.substr(0, equal_sign_Idx); + + string converted_name = ProcessKeyString(argument_name); + + if (equal_sign_Idx != string::npos) { + // Store value as a string + argument_value = argument.substr(equal_sign_Idx + 1); + apConfigurationMap[converted_name] = argument_value; + } else { + // For flags, store "true" as a string + apConfigurationMap[converted_name] = string("true"); + } + } +} + +void Parser::ParseJSON(const string &aJSONFile, unordered_map &apConfigurationMap) { + + const auto &config_path = aJSONFile; + ifstream json_file_stream(config_path); + if (!json_file_stream.is_open()) { + throw runtime_error("Could not open JSON configuration file."); + } + + nlohmann::json json_config; + json_file_stream >> json_config; + + for (auto &[key, value]: json_config.items()) { + auto converted_name = ProcessKeyString(key); + apConfigurationMap[converted_name] = value.get(); + + } +} + +string Parser::ProcessKeyString(const string &arg) { + + // Process the string to remove dashes/underscores and handle capitalization + std::string result; + for (char ch : arg) { + // Ignore hyphens, underscores, and other non-alphanumeric characters + if (std::isalnum(ch)) { // Keep only alphanumeric characters + result += static_cast(std::tolower(ch)); // Convert to lowercase + } + } + return result; +} diff --git a/src/configurations/Validator.cpp b/src/configurations/Validator.cpp new file mode 100644 index 00000000..b8aa4587 --- /dev/null +++ b/src/configurations/Validator.cpp @@ -0,0 +1,327 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file Validator.cpp + * @brief This file defines the Validator class which validates configuration string input. + * @version 1.1.0 + * @author Mahmoud ElKarargy + * @date 2024-12-11 +**/ + +#include + +#include +#include + +using namespace std; +using namespace exageostat::common; +using namespace exageostat::configurations::validator; + +const unordered_map> Validator::mCheckersMap = { + {"Theta", [](const string &value) -> any { return CheckThetaValue(value); }}, + {"Tolerance", [](const string &value) -> any { return CheckToleranceValue(value); }}, + {"FileHandler", [](const string &value) -> any { return CheckLogFileValue(value); }}, + {"Bool", [](const string &value) -> any { return CheckBoolValue(value); }}, + {"Numerical", [](const string &value) -> any { return CheckNumericalValue(value); }}, + {"Dimension", [](const string &value) -> any { return CheckDimensionValue(value); }}, + {"DistanceMetric", [](const string &value) -> any { return CheckDistanceMetricValue(value); }}, + {"Kernel", [](const string &value) -> any { return CheckKernelValue(value); }}, + {"Verbose", [](const string &value) -> any { return CheckVerboseValue(value); }}, + {"Precision", [](const string &value) -> any { return CheckPrecisionValue(value); }}, + {"Computation", [](const string &value) -> any { return CheckComputationValue(value); }} +}; + +const std::unordered_map Validator::mArgumentToCategoryMap = { + {"n", "Numerical"}, + {"kernel", "Kernel"}, + {"p", "Numerical"}, + {"q", "Numerical"}, + {"timeslot", "Numerical"}, + {"computation", "Computation"}, + {"precision", "Precision"}, + {"cores", "Numerical"}, + {"gpus", "Numerical"}, + {"dts", "Numerical"}, + {"lts", "Numerical"}, + {"band", "Numerical"}, + {"maxrank", "Numerical"}, + {"observationnumber", "Numerical"}, + {"observationsfile", "Path"}, + {"filelogname", "Path"}, + {"filelogpath", "FileHandler"}, + {"seed", "Numerical"}, + {"logpath", "Path"}, + {"initialtheta", "Theta"}, + {"ooc", "Bool"}, + {"approximationmode", "Numerical"}, + {"mspe", "Bool"}, + {"fisher", "Bool"}, + {"idw", "Bool"}, + {"log", "Bool"}, + {"lb", "Theta"}, + {"ub", "Theta"}, + {"estimatedtheta", "Theta"}, + {"startingtheta", "Theta"}, + {"isnongaussian", "Bool"}, + {"verbose", "Verbose"}, + {"banddense", "Numerical"}, + {"objectsnumber", "Numerical"}, + {"adaptivedecision", "Numerical"}, + {"adddiagonal", "Numerical"}, + {"filetimeslot", "Numerical"}, + {"filenumber", "Numerical"}, + {"enableinverse", "Bool"}, + {"mpiio", "Bool"}, + {"mloemmom", "Bool"}, + {"dimension", "Dimension"}, + {"stagezero", "Bool"}, + {"issynthetic", "Bool"}, + {"datapath", "Path"}, + {"resultspath", "Path"}, + {"numlocs", "Numerical"}, + {"netcdfdatapath", "Path"}, + {"forcingdatapath", "Path"}, + {"startyear", "Numerical"}, + {"endyear", "Numerical"}, + {"recoveryfile", "Path"}, + {"distancemetric", "DistanceMetric"}, + {"maxmleiterations", "Numerical"}, + {"accuracy", "Numerical"}, + {"tolerance", "Tolerance"}, + {"zmiss", "Numerical"}, + {"missingpattern", "Theta"}, +}; + +Computation Validator::CheckComputationValue(const string &aValue) { + + if (aValue != "exact" and aValue != "Exact" and aValue != "Dense" and aValue != "dense" and + aValue != "diag_approx" and aValue != "diagonal_approx" and aValue != "lr_approx" and aValue != "tlr" and + aValue != "TLR") { + throw range_error("Invalid value for Computation. Please use Exact, diagonal_approx or TLR."); + } + if (aValue == "exact" or aValue == "Exact" or aValue == "Dense" or aValue == "dense") { + return EXACT_DENSE; + } else if (aValue == "diag_approx" or aValue == "diagonal_approx") { + return DIAGONAL_APPROX; + } + return TILE_LOW_RANK; +} + +Precision Validator::CheckPrecisionValue(const string &aValue) { + + if (aValue != "single" and aValue != "Single" and aValue != "double" and aValue != "Double" and aValue != "mix" and + aValue != "Mix" and aValue != "Mixed" and aValue != "mixed") { + throw range_error("Invalid value for Computation. Please use Single, Double or Mixed."); + } + if (aValue == "single" or aValue == "Single") { + return SINGLE; + } else if (aValue == "double" or aValue == "Double") { + return DOUBLE; + } + return MIXED; +} + +string Validator::CheckKernelValue(const string &aKernel) { + + // Check if the kernel name exists in the availableKernels set. + if (availableKernels.count(aKernel) <= 0) { + throw range_error("Invalid value for Kernel. Please check manual."); + } + // Check if the string is already in CamelCase format + if (IsCamelCase(aKernel)) { + return aKernel; + } + string str = aKernel; + // Replace underscores with spaces and split the string into words + replace(str.begin(), str.end(), '_', ' '); + istringstream iss(str); + string word, result; + while (iss >> word) { + // Capitalize the first letter of each word and append it to the result + word[0] = static_cast(toupper(word[0])); + result += word; + } + return result; +} + +bool Validator::IsCamelCase(const string &aString) { + + // If the string contains an underscore, it is not in CamelCase format + if (aString.find('_') != string::npos) { + return false; + } + // If the string starts with a lowercase letter, it is not in CamelCase format + if (islower(aString[0])) { + return false; + } + // If none of the above conditions hold, the string is in CamelCase format + return true; +} + +Verbose Validator::CheckVerboseValue(const string &aVerbosity) { + + if (aVerbosity == "quiet" || aVerbosity == "Quiet") { + Configurations::SetVerbosity(Verbose::QUIET_MODE); + return Verbose::QUIET_MODE; + } else if (aVerbosity == "standard" || aVerbosity == "Standard") { + Configurations::SetVerbosity(Verbose::STANDARD_MODE); + return Verbose::STANDARD_MODE; + } else if (aVerbosity == "detailed" || aVerbosity == "Detailed" || aVerbosity == "detail") { + Configurations::SetVerbosity(Verbose::DETAILED_MODE); + return Verbose::DETAILED_MODE; + } else { + LOGGER("Error: " << aVerbosity << " is not valid ") + throw range_error("Invalid value. Please use verbose or standard values only."); + } +} + + +int Validator::CheckNumericalValue(const string &aValue) { + + int numerical_value; + try { + numerical_value = stoi(aValue); + } + catch (...) { + throw range_error("Invalid value. Please use Numerical values only."); + } + + if (numerical_value < 0) { + throw range_error("Invalid value. Please use positive values"); + } + return numerical_value; +} + + +vector Validator::CheckThetaValue(const string &aInputValues) { + + // Count the number of values in the string + int num_values = 1; + for (char input_value_char: aInputValues) { + if (input_value_char == ':') { + num_values++; + } + } + // Allocate memory for the array of doubles + vector theta; + + if (aInputValues.empty()) { + return theta; + } + + // Split the string into tokens using strtok() + const char *delim = ":"; + char *token = strtok((char *) aInputValues.c_str(), delim); + int i = 0; + while (token != nullptr) { + // Check if the token is a valid double or "?" + if (!strcmp(token, "?")) { + theta.push_back(-1); + } else { + try { + theta.push_back(stod(token)); + } + catch (...) { + LOGGER("Error: " << token << " is not a valid double or '?' ") + throw range_error("Invalid value. Please use Numerical values only."); + } + } + + // Get the next token + token = strtok(nullptr, delim); + i++; + } + + // Check if the number of values in the array is correct + if (i != num_values) { + throw range_error( + "Error: the number of values in the input string is invalid, please use this example format as a reference 1:?:0.1"); + } + + return theta; +} + +Dimension Validator::CheckDimensionValue(const string &aDimension) { + + if (aDimension != "2D" and aDimension != "2d" and aDimension != "3D" and aDimension != "3d" and + aDimension != "st" and aDimension != "ST") { + throw range_error("Invalid value for Dimension. Please use 2D, 3D or ST."); + } + if (aDimension == "2D" or aDimension == "2d") { + return Dimension2D; + } else if (aDimension == "3D" or aDimension == "3d") { + return Dimension3D; + } + return DimensionST; +} + +bool Validator::CheckBoolValue(const string &aBooleanValue) { + + // Convert input to lowercase for case-insensitive comparison + std::string lower_case_value = aBooleanValue; + std::transform(lower_case_value.begin(), lower_case_value.end(), lower_case_value.begin(), ::tolower); + + // Check if the value matches "true" in any case + if (lower_case_value == "true") + return true; + else + return false; +} + + +DistanceMetric Validator::CheckDistanceMetricValue(const string &aDistanceMetric) { + + if (aDistanceMetric == "eg" || aDistanceMetric == "EG" || aDistanceMetric == "euclidean") { + return EUCLIDEAN_DISTANCE; + } else if (aDistanceMetric == "gcd" || aDistanceMetric == "GCD" || aDistanceMetric == "great_circle") { + return GREAT_CIRCLE_DISTANCE; + } else { + throw range_error("Invalid value. Please use eg or gcd values only."); + } +} + +double Validator::CheckToleranceValue(const string &aTolerancePower) { + + auto val = Validator::CheckNumericalValue(aTolerancePower); + return pow(10, -1 * val); +} + +FILE *Validator::CheckLogFileValue(const std::string &apFileLogPath) { + + if (apFileLogPath.empty()) { + throw std::invalid_argument("Path cannot be empty."); + } + + auto file_handler = fopen(apFileLogPath.c_str(), "w"); + + if (file_handler == nullptr) { + throw std::domain_error("Failed to open file at path " + apFileLogPath + "."); + } + return file_handler; +} + +void Validator::Validate(unordered_map &apArgsMap) { + + for (auto &[key, value]: apArgsMap) { + // Find the appropriate checker based on key + auto category_map_key = mArgumentToCategoryMap.find(key); + if (category_map_key == mArgumentToCategoryMap.end()) { // throw exception if no category + throw invalid_argument("No category for the following argument name: " + key); + } else if (category_map_key->second != "Path") { + auto it = mCheckersMap.find(category_map_key->second); + if (it != mCheckersMap.end()) { + if (value.type() == typeid(std::string)) { + // Apply the checker function + auto checker_function = it->second; + auto checked_value = checker_function(any_cast(value)); + apArgsMap[key] = checked_value; + } + } else { + throw invalid_argument("Unknown key in the args map: " + key); + } + } + } +} diff --git a/src/data-analyzer/CMakeLists.txt b/src/data-analyzer/CMakeLists.txt new file mode 100644 index 00000000..c257f99f --- /dev/null +++ b/src/data-analyzer/CMakeLists.txt @@ -0,0 +1,17 @@ + +# Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file CMakeLists.txt +# @brief This file contains the CMake configuration for the data-analyzer directory. +# @version 2.0.0 +# @author Mahmoud ElKarargy +# @author Sameh Abdulah +# @date 2024-10-15 + +set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/DataAnalyzer.cpp + ${SOURCES} + PARENT_SCOPE +) \ No newline at end of file diff --git a/src/data-analyzer/DataAnalyzer.cpp b/src/data-analyzer/DataAnalyzer.cpp new file mode 100644 index 00000000..f7e42149 --- /dev/null +++ b/src/data-analyzer/DataAnalyzer.cpp @@ -0,0 +1,61 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file DataAnalyzer.cpp + * @brief Contains the implementation of the DataAnalyzer class. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @author Qinglei Cao + * @date 2024-02-04 +**/ + +#include + +extern "C" { +#include +} + +using namespace exageostat::analyzer; +using namespace exageostat::common; + +template +void +DataAnalyzer::PreAnalyzeMatrix(std::unique_ptr> &aData){ + + auto *pParams = ExaGeoStatHardware::GetHicmaParams(); + auto *pHicma_data = ExaGeoStatHardware::GetHicmaData(); + auto *pAnalysis = ExaGeoStatHardware::GetAnalysis(); + auto *pStarsh_kernel = ExaGeoStatHardware::GetParamsKernel(); + auto *pContext = (parsec_context_t *) ExaGeoStatHardware::GetParsecContext(); + + hicma_parsec_matrix_pre_analysis(pContext, pHicma_data, pParams, pStarsh_kernel, pAnalysis); +} + +template +void +DataAnalyzer::PostAnalyzeMatrix(std::unique_ptr> &aData){ + + auto *pParams = ExaGeoStatHardware::GetHicmaParams(); + auto *pHicma_data = ExaGeoStatHardware::GetHicmaData(); + auto *pAnalysis = ExaGeoStatHardware::GetAnalysis(); + auto *pStarsh_kernel = ExaGeoStatHardware::GetParamsKernel(); + auto *pContext = (parsec_context_t *) ExaGeoStatHardware::GetParsecContext(); + + hicma_parsec_matrix_post_analysis(pContext, pHicma_data, pParams, pStarsh_kernel, pAnalysis); +} + +template +double +DataAnalyzer::CompareMatDifference(std::unique_ptr> &aData){ + + // Get parsec descriptors + auto flm_desc = aData->GetDescriptorData()->GetDescriptor(DescriptorType::PARSEC_DESCRIPTOR,DescriptorName::DESCRIPTOR_FLM).parsec_desc; + auto flm_era_desc = aData->GetDescriptorData()->GetDescriptor(DescriptorType::PARSEC_DESCRIPTOR,DescriptorName::DESCRIPTOR_FLMERA).parsec_desc; + + // Call jdf generated function + DifferenceDouble((parsec_context_t *)ExaGeoStatHardware::GetParsecContext(), flm_desc, flm_era_desc); +} diff --git a/src/data-generators/DataGenerator.cpp b/src/data-generators/DataGenerator.cpp index dd4bec8b..98b47810 100644 --- a/src/data-generators/DataGenerator.cpp +++ b/src/data-generators/DataGenerator.cpp @@ -12,45 +12,46 @@ **/ #include +#if DEFAULT_RUNTIME #include -#include +#endif +#include +using namespace exageostat::common; using namespace exageostat::generators; -using namespace exageostat::dataLoader::csv; +using namespace exageostat::dataLoader; +#if DEFAULT_RUNTIME using namespace exageostat::generators::synthetic; -using namespace exageostat::common; -using namespace exageostat::results; +#endif template std::unique_ptr> DataGenerator::CreateGenerator(configurations::Configurations &apConfigurations) { - - //// TODO: In case of other file support, Then we can create another layer for the factory creation depending on the file size. - // Check the used Data generation method, whether it's synthetic or real. - aDataSourceType = apConfigurations.GetIsSynthetic() ? SYNTHETIC : CSV_FILE; - - // Return DataGenerator unique pointer of Synthetic type - if (aDataSourceType == SYNTHETIC) { - Results::GetInstance()->SetIsSynthetic(true); +#if DEFAULT_RUNTIME + if (apConfigurations.GetIsSynthetic()){ + aIsSynthetic = true; return std::unique_ptr>(SyntheticGenerator::GetInstance()); - } else if (aDataSourceType == CSV_FILE) { - Results::GetInstance()->SetIsSynthetic(false); - return std::unique_ptr>(CSVLoader::GetInstance()); } else { - throw std::runtime_error("Data Loading for this file type is unsupported for now"); + aIsSynthetic = false; + return DataLoader::CreateDataLoader(apConfigurations); } +#else + // For PaRSEC runtime, the API directly calls StageZeroGeneratorParsec + // This path should not be used for PaRSEC Stage Zero + throw std::runtime_error("DataGenerator::CreateGenerator should not be called for PaRSEC runtime. Use --stage-zero flag instead."); +#endif } template DataGenerator::~DataGenerator() { - // Return DataGenerator unique pointer of Synthetic type - if (aDataSourceType == SYNTHETIC) { +#if DEFAULT_RUNTIME + if (aIsSynthetic) { SyntheticGenerator::GetInstance()->ReleaseInstance(); - } else if (aDataSourceType == CSV_FILE) { - CSVLoader::GetInstance()->ReleaseInstance(); } else { - std::cerr << "Data Loading for this file type is unsupported for now" << std::endl; - std::exit(1); + DataLoader::ReleaseDataLoader(); } +#else + // For PaRSEC runtime, nothing to clean up since we don't use DataLoader +#endif } -template DataSourceType DataGenerator::aDataSourceType = SYNTHETIC; +template bool DataGenerator::aIsSynthetic = true; diff --git a/src/data-generators/concrete/CMakeLists.txt b/src/data-generators/concrete/CMakeLists.txt index bca0dfdb..e90f587c 100644 --- a/src/data-generators/concrete/CMakeLists.txt +++ b/src/data-generators/concrete/CMakeLists.txt @@ -10,9 +10,17 @@ # @author Sameh Abdulah # @date 2023-02-14 -# Add source files to the parent scope -set(SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/SyntheticGenerator.cpp - ${SOURCES} - PARENT_SCOPE - ) \ No newline at end of file +# Add source files to the parent scope - always include SyntheticGenerator +list(APPEND SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/SyntheticGenerator.cpp) + +# Add StageZeroGenerator based on runtime type +if (RUNTIME_TYPE STREQUAL "STARPU") + # Use StarPU/CHAMELEON version + list(APPEND SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/StageZeroGenerator.cpp) +elseif (RUNTIME_TYPE STREQUAL "PARSEC") + # Use PaRSEC version + list(APPEND SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/StageZeroGeneratorParsec.cpp) +endif() + +# Set the final SOURCES to parent scope +set(SOURCES ${SOURCES} PARENT_SCOPE) diff --git a/src/data-generators/concrete/StageZeroGenerator.cpp b/src/data-generators/concrete/StageZeroGenerator.cpp new file mode 100644 index 00000000..7328d8d8 --- /dev/null +++ b/src/data-generators/concrete/StageZeroGenerator.cpp @@ -0,0 +1,831 @@ +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file StageZeroGenerator.cpp + * @brief Implementation of the StageZeroGenerator class + * @version 1.1.0 +**/ + +#include +#include +#if !DEFAULT_RUNTIME +#include +#else +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +#define START_TIMING(x) double _start_##x = MPI_Wtime(); +#define STOP_TIMING(x) x = MPI_Wtime() - _start_##x; + +namespace { +constexpr int kForcingOffset = 238; +} + +using namespace exageostat::generators::stagezero; +using namespace exageostat::common; +using namespace exageostat::configurations; +using namespace exageostat::results; + +template +StageZeroGenerator *StageZeroGenerator::GetInstance() { + if (mpInstance == nullptr) { + mpInstance = new StageZeroGenerator(); + } + return mpInstance; +} + +template +std::unique_ptr> +StageZeroGenerator::CreateData(Configurations &aConfigurations, + exageostat::kernels::Kernel &aKernel) { + this->Runner(aConfigurations); + return std::move(this->mData); +} + +template +void StageZeroGenerator::ReleaseInstance() { + if (mpInstance != nullptr) { + delete mpInstance; + mpInstance = nullptr; + } +} + +template +void StageZeroGenerator::Runner(Configurations &aConfigurations) { + mArgs.mConfigs = &aConfigurations; + this->ConfigureGenerator(); + this->Allocate(); + this->ReadForcingData(); + this->ReadNetCDFFiles(); + this->RunMeanTrend(); + + this->CleanUp(); +} + +template +void StageZeroGenerator::ConfigureGenerator() { + + // Values for the mean trend removal + mArgs.mM = 10; + mArgs.mT = 365*24; + mArgs.mNoYears = 751; + + // Only support trend_model for this workflow + if (mArgs.mConfigs->GetKernelName() == "TrendModel" || mArgs.mConfigs->GetKernelName() == "trend_model") { + mArgs.mNumParams = 1; + } else { + throw std::invalid_argument("Unsupported kernel for Stage Zero: only 'TrendModel' is supported"); + } + + // Derive observation years and N from configuration (keep forcing length separate) + int start_year = mArgs.mConfigs->GetStartYear(); + int end_year = mArgs.mConfigs->GetEndYear(); + if (end_year < start_year) { + throw std::runtime_error("EndYear must be >= StartYear"); + } + int obs_years = (end_year - start_year + 1); + mArgs.mN = static_cast(mArgs.mT) * static_cast(obs_years); + + // Number of locations (prefer explicit arg; fallback to configured ProblemSize) + try { mArgs.mNumLocs = mArgs.mConfigs->GetNumLocs(); } + catch (...) { mArgs.mNumLocs = mArgs.mConfigs->GetProblemSize(); } + + +} + +template +void StageZeroGenerator::ReadNetCDFFiles() { + + int ncid, retval; + MPI_Offset lat_len, lon_len, time_len; + int lon_varid, lat_varid, time_varid, t2m_varid; + int v = 365; + int rank, nprocs; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + int start_year = mArgs.mConfigs->GetStartYear(); + int end_year = mArgs.mConfigs->GetEndYear(); + + auto openFileNCmpi = [&](char *filename) { + int id, ret; + double start_time = MPI_Wtime(); + if ((ret = ncmpi_open(MPI_COMM_WORLD, filename, NC_NOWRITE, MPI_INFO_NULL, &id))) { + double end_time = MPI_Wtime(); + fprintf(stderr, "[StageZero] FAILED to open NetCDF file: %s (%.3f sec) - Error: %s\n", + filename, end_time - start_time, ncmpi_strerror(ret)); + throw std::runtime_error("Error opening NetCDF file: " + std::string(ncmpi_strerror(ret))); + } + double end_time = MPI_Wtime(); + fprintf(stderr, "[StageZero] SUCCESS opening NetCDF file: %s (%.3f sec)\n", filename, end_time - start_time); + return id; + }; + + auto closeFileNCmpi = [&](int id) { + int ret; + if ((ret = ncmpi_close(id))) { + throw std::runtime_error("Error closing NetCDF file: " + std::string(ncmpi_strerror(ret))); + } + }; + + + char path[256]; + std::string data_path = mArgs.mConfigs->GetDataPath(); + if (!data_path.empty() && data_path.back() != '/') data_path += '/'; + snprintf(path, sizeof(path), "%sdata_%d.nc", data_path.c_str(), start_year); + + fprintf(stderr, "[StageZero] Loading initial NetCDF file for dimension setup: %s\n", path); + ncid = openFileNCmpi(path); + + // Dimension IDs and lengths with error checks (exactly like reference) + if ((retval = ncmpi_inq_dimid(ncid, "longitude", &lon_varid))) + std::cerr << "Error: " << ncmpi_strerror(retval) << std::endl; + if ((retval = ncmpi_inq_dimlen(ncid, lon_varid, &lon_len))) + std::cerr << "Error: " << ncmpi_strerror(retval) << std::endl; + + if ((retval = ncmpi_inq_dimid(ncid, "latitude", &lat_varid))) + std::cerr << "Error: " << ncmpi_strerror(retval) << std::endl; + if ((retval = ncmpi_inq_dimlen(ncid, lat_varid, &lat_len))) + std::cerr << "Error: " << ncmpi_strerror(retval) << std::endl; + + if ((retval = ncmpi_inq_dimid(ncid, "time", &time_varid))) + std::cerr << "Error: " << ncmpi_strerror(retval) << std::endl; + + if ((retval = ncmpi_inq_varid(ncid, "t2m", &t2m_varid))) + std::cerr << "Error: " << ncmpi_strerror(retval) << std::endl; + + closeFileNCmpi(ncid); + + int min_loc = mArgs.mConfigs->GetLowTileSize(); + + for (int y = start_year; y <= end_year; y++) { + char path2[256]; + snprintf(path2, sizeof(path2), "%sdata_%d.nc", data_path.c_str(), y); + + fprintf(stderr, "[StageZero] Loading NetCDF data for year %d: %s\n", y, path2); + ncid = openFileNCmpi(path2); + + + double scaling_var = 1.0, offset_var = 0.0; + if ((retval = ncmpi_get_att_double(ncid, t2m_varid, "scale_factor", &scaling_var))) { + std::cerr << "Warning: missing scale_factor: " << ncmpi_strerror(retval) << std::endl; + } + if ((retval = ncmpi_get_att_double(ncid, t2m_varid, "add_offset", &offset_var))) { + std::cerr << "Warning: missing add_offset: " << ncmpi_strerror(retval) << std::endl; + } + + int *t2m = nullptr; + int *t2m_local = nullptr; + + if (IsLeapYear(y)) time_len = v * 24 + 24; + else time_len = v * 24; + + size_t total_elems = static_cast(time_len) * static_cast(mArgs.mNumLocs); + size_t local_elems = static_cast(time_len / nprocs) * static_cast(mArgs.mNumLocs); + t2m = (int *) malloc(total_elems * sizeof(int)); + t2m_local = (int *) malloc(local_elems * sizeof(int)); + + MPI_Offset index[] = {(MPI_Offset) (time_len / nprocs) * rank, + (MPI_Offset) min_loc, 0}; + MPI_Offset count[] = {(MPI_Offset) (time_len / nprocs), 1, (MPI_Offset) mArgs.mNumLocs}; + + double x_read = 0; + START_TIMING(x_read) + ncmpi_get_vara_int_all(ncid, t2m_varid, index, count, t2m_local); + STOP_TIMING(x_read) + + double gather_start = MPI_Wtime(); + MPI_Allgather(t2m_local, (int) local_elems, MPI_INT, t2m, + (int) local_elems, MPI_INT, MPI_COMM_WORLD); + double gather_time = MPI_Wtime() - gather_start; + + fprintf(stderr, "[StageZero] Year %d: Read time: %.3f sec, Gather time: %.3f sec\n", y, x_read, gather_time); + + for (int lu = 0; lu < mArgs.mNumLocs; lu++) { + int r = 0; + for (size_t k = lu; k < (size_t) time_len * mArgs.mNumLocs; k += mArgs.mNumLocs) { + if (!(IsLeapYear(y) && (r >= 1416 && r <= 1439))) { + mArgs.mT2mHourlyPerYear[lu][mArgs.mT2mHourlyPerYearCount[lu]++] = + (static_cast(t2m[k]) * scaling_var) + offset_var - 273.15; + } + r++; + } + } + closeFileNCmpi(ncid); + free(t2m); + free(t2m_local); + fprintf(stderr, "[StageZero] Completed processing year %d\n\n", y); + } + fprintf(stderr, "[StageZero] NetCDF loading completed for years %d-%d (%d files)\n\n", + start_year, end_year, end_year - start_year + 1); +} + +template +void StageZeroGenerator::ReadForcingData() { + std::string forcing_path = mArgs.mConfigs->GetForcingDataPath(); + if (!forcing_path.empty() && forcing_path.back() == '/') forcing_path.pop_back(); + + fprintf(stderr, "[StageZero] Loading forcing data from: %s\n", forcing_path.c_str()); + mArgs.mForcing = ReadObsFile((char *) forcing_path.c_str(), mArgs.mNoYears); + fprintf(stderr, "[StageZero] Successfully loaded forcing data (%d years)\n\n", mArgs.mNoYears); +} + +template +void StageZeroGenerator::Allocate() { + + mArgs.mStartingTheta = new double[mArgs.mNumParams]; + mArgs.mTargetTheta = new double[mArgs.mNumParams]; + mArgs.mInitialTheta = new double[mArgs.mNumParams]; + mArgs.mLb = new double[mArgs.mNumParams]; + mArgs.mUp = new double[mArgs.mNumParams]; + + // Copy configuration values to allocated arrays + auto lb_vec = mArgs.mConfigs->GetLowerBounds(); + auto up_vec = mArgs.mConfigs->GetUpperBounds(); + auto starting_theta_vec = mArgs.mConfigs->GetStartingTheta(); + + for(size_t i = 0; i < mArgs.mNumParams; ++i) { + mArgs.mLb[i] = lb_vec[i]; + mArgs.mUp[i] = up_vec[i]; + mArgs.mStartingTheta[i] = starting_theta_vec[i]; + } + + mArgs.mT2mHourlyPerYear = (double **) malloc(mArgs.mNumLocs * sizeof(double *)); + mArgs.mT2mHourlyPerYearCount = (int *) calloc(mArgs.mNumLocs, sizeof(int)); + for (int k = 0; k < mArgs.mNumLocs; k++) { + mArgs.mT2mHourlyPerYear[k] = (double *) malloc(mArgs.mN * sizeof(double)); + } +} + +template +void StageZeroGenerator::RunMeanTrend() { + const int lts = mArgs.mConfigs->GetLowTileSize(); + const int no_locs = mArgs.mNumLocs; + +#if defined(CHAMELEON_USE_MPI) + if (CHAMELEON_Comm_rank() == 0) { +#endif + const std::string kernel = mArgs.mConfigs->GetKernelName(); + const std::string data_path = mArgs.mConfigs->GetDataPath(); + const std::string forcing_path = mArgs.mConfigs->GetForcingDataPath(); + const std::string results_path = mArgs.mConfigs->GetResultsPath(); + const int start_year = mArgs.mConfigs->GetStartYear(); + const int end_year = mArgs.mConfigs->GetEndYear(); + const int years = mArgs.mNoYears; + const int period_hours = mArgs.mT; + const int M = mArgs.mM; + const size_t N = mArgs.mN; + const int num_locs = mArgs.mNumLocs; + const double tol = mArgs.mConfigs->GetTolerance(); + const int maxeval = mArgs.mConfigs->GetMaxMleIterations(); + const double st = mArgs.mStartingTheta[0]; + const double lb0 = mArgs.mLb[0]; + const double ub0 = mArgs.mUp[0]; + const int dts = mArgs.mConfigs->GetDenseTileSize(); + const int lts_val = mArgs.mConfigs->GetLowTileSize(); + fprintf(stderr, "----- StageZero Arguments -----\n"); + fprintf(stderr, "kernel: %s\n", kernel.c_str()); + fprintf(stderr, "data_path: %s\n", data_path.c_str()); + fprintf(stderr, "forcing_data_path: %s\n", forcing_path.c_str()); + fprintf(stderr, "results_path: %s\n", results_path.c_str()); + fprintf(stderr, "start_year: %d, end_year: %d, years: %d\n", start_year, end_year, end_year-start_year+1); + fprintf(stderr, "num_locs: %d\n", num_locs); + fprintf(stderr, "period_hours(T): %d, harmonics(M): %d\n", period_hours, M); + fprintf(stderr, "N (observations): %zu\n", N); + fprintf(stderr, "tolerance(ftol_abs): %.10e, maxeval: %d\n", tol, maxeval); + fprintf(stderr, "-------------------------------\n"); +#if defined(CHAMELEON_USE_MPI) + } +#endif + + // Key bounds info + fprintf(stderr, "Starting theta[0]: %f\n", mArgs.mStartingTheta[0]); + fprintf(stderr, "Lower bound: %f, Upper bound: %f\n", mArgs.mLb[0], mArgs.mUp[0]); + + // Initialize CHAMELEON and create descriptors ONCE + this->SetupMLEComponents(); + + // Set up NLOPT optimization ONCE + nlopt_opt opt = nlopt_create(NLOPT_LN_BOBYQA, mArgs.mNumParams); + nlopt_set_lower_bounds(opt, mArgs.mLb); + nlopt_set_upper_bounds(opt, mArgs.mUp); + nlopt_set_max_objective(opt, &StageZeroGenerator::StageZeroObjectiveCallback, (void *)this); + + // Use configured tolerance and max iterations (defaults preserved in config) + nlopt_set_ftol_abs(opt, mArgs.mConfigs->GetTolerance()); + nlopt_set_maxeval(opt, mArgs.mConfigs->GetMaxMleIterations()); + + // Working theta vector (mutable for NLopt) + std::vector theta(mArgs.mNumParams, 0.0); + + // Process locations + for (int l = 0; l < no_locs; ++l) { + mArgs.mIterCount = 0; // reset iteration counter + mArgs.mCurrentLocation = l; // Set current location for MLEAlgorithm + + // Fill Z for current location + this->ConvertT2MToZForLocation(l); + + // Optimization + fprintf(stderr, "[StageZero] Starting NLopt for location %d/%d\n", l + 1, no_locs); + // Respect configured starting theta + theta[0] = mArgs.mStartingTheta[0]; + double opt_f = 0.0; + + // Re-register the objective before each optimize call + nlopt_set_max_objective(opt, &StageZeroGenerator::StageZeroObjectiveCallback, (void *)this); + nlopt_result nlres = nlopt_optimize(opt, theta.data(), &opt_f); + fprintf(stderr, "[StageZero] NLopt finished (res=%d), theta=%0.10f, f=%0.10f\n\n", (int) nlres, theta[0], opt_f); + + // Recompute full pipeline once with optimal theta to generate CSV (apObj == nullptr) + { + std::vector grad; // unused + this->MLEAlgorithm(theta, grad, nullptr); + } + } + + // Cleanup + nlopt_destroy(opt); + +} + +template +void StageZeroGenerator::CleanUp() { + + delete[] mArgs.mStartingTheta; + delete[] mArgs.mTargetTheta; + delete[] mArgs.mInitialTheta; + delete[] mArgs.mLb; + delete[] mArgs.mUp; + delete[] mArgs.mForcing; + + for (int i = 0; i < mArgs.mNumLocs; ++i) { + free(mArgs.mT2mHourlyPerYear[i]); + } + free(mArgs.mT2mHourlyPerYear); + free(mArgs.mT2mHourlyPerYearCount); + + // Clean up CHAMELEON descriptors + if (mArgs.mpDescZ) { + CHAMELEON_Desc_Destroy((CHAM_desc_t**)&mArgs.mpDescZ); + mArgs.mpDescZ = nullptr; + } + if (mArgs.mpX) { + CHAMELEON_Desc_Destroy((CHAM_desc_t**)&mArgs.mpX); + mArgs.mpX = nullptr; + } + if (mArgs.mpXtX) { + CHAMELEON_Desc_Destroy((CHAM_desc_t**)&mArgs.mpXtX); + mArgs.mpXtX = nullptr; + } + if (mArgs.mpDescPart1) { + CHAMELEON_Desc_Destroy((CHAM_desc_t**)&mArgs.mpDescPart1); + mArgs.mpDescPart1 = nullptr; + } + if (mArgs.mpDescPart2) { + CHAMELEON_Desc_Destroy((CHAM_desc_t**)&mArgs.mpDescPart2); + mArgs.mpDescPart2 = nullptr; + } + if (mArgs.mpPart2Vector) { + CHAMELEON_Desc_Destroy((CHAM_desc_t**)&mArgs.mpPart2Vector); + mArgs.mpPart2Vector = nullptr; + } + if (mArgs.mpEstimatedMeanTrend) { + CHAMELEON_Desc_Destroy((CHAM_desc_t**)&mArgs.mpEstimatedMeanTrend); + mArgs.mpEstimatedMeanTrend = nullptr; + } +} + +template +void StageZeroGenerator::SetupMLEComponents() { + + int N = mArgs.mN; + int nparams = 3 + 2*mArgs.mM; + int dts = mArgs.mConfigs->GetDenseTileSize(); + int p_grid = 1, q_grid = 1; // Single process grid for now + + fprintf(stderr, "Creating descriptors: N=%d, nparams=%d, dts=%d\n", N, nparams, dts); + + // Initialize iteration counter + mArgs.mIterCount = 0; + + // Allocate CHAMELEON descriptors - Use UNIFORM tile size for compatibility + // Z vector (observations) - N x 1 + CHAMELEON_Desc_Create((CHAM_desc_t**)&mArgs.mpDescZ, NULL, ChamRealDouble, + dts, dts, dts*dts, N, 1, 0, 0, N, 1, p_grid, q_grid); + + // X matrix (design matrix) - N x nparams + CHAMELEON_Desc_Create((CHAM_desc_t**)&mArgs.mpX, NULL, ChamRealDouble, + dts, dts, dts*dts, N, nparams, 0, 0, N, nparams, p_grid, q_grid); + + // XtX matrix - nparams x nparams (use same tile size as others) + CHAMELEON_Desc_Create((CHAM_desc_t**)&mArgs.mpXtX, NULL, ChamRealDouble, + dts, dts, dts*dts, nparams, nparams, 0, 0, nparams, nparams, p_grid, q_grid); + + // part1 scalar - 1 x 1 (use same tile size pattern) + CHAMELEON_Desc_Create((CHAM_desc_t**)&mArgs.mpDescPart1, NULL, ChamRealDouble, + dts, dts, dts*dts, 1, 1, 0, 0, 1, 1, p_grid, q_grid); + + // part2 scalar - 1 x 1 (use same tile size pattern) + CHAMELEON_Desc_Create((CHAM_desc_t**)&mArgs.mpDescPart2, NULL, ChamRealDouble, + dts, dts, dts*dts, 1, 1, 0, 0, 1, 1, p_grid, q_grid); + + // part2_vector - nparams x 1 (use same tile size pattern) + CHAMELEON_Desc_Create((CHAM_desc_t**)&mArgs.mpPart2Vector, NULL, ChamRealDouble, + dts, dts, dts*dts, nparams, 1, 0, 0, nparams, 1, p_grid, q_grid); + + // estimated_mean_trend vector - N x 1 (for final mean trend calculation) + CHAMELEON_Desc_Create((CHAM_desc_t**)&mArgs.mpEstimatedMeanTrend, NULL, ChamRealDouble, + dts, dts, dts*dts, N, 1, 0, 0, N, 1, p_grid, q_grid); + + fprintf(stderr, "All CHAMELEON descriptors created successfully\n\n"); +} + +template +void StageZeroGenerator::ConvertT2MToZForLocation(int location_index) { + + auto *DESC_Z = static_cast(mArgs.mpDescZ); + int N = mArgs.mN; + + if (location_index < 0 || location_index >= mArgs.mNumLocs) { + throw std::runtime_error("ConvertT2MToZForLocation: invalid location index"); + } + + // Copy data from specified location to a LAPACK buffer, then tile it into Z + std::vector z_lap(static_cast(N), 0.0); + int count = std::min(N, mArgs.mT2mHourlyPerYearCount[location_index]); + for (int i = 0; i < count; i++) { + z_lap[static_cast(i)] = mArgs.mT2mHourlyPerYear[location_index][i]; + } + CHAMELEON_Lapack_to_Tile(static_cast(z_lap.data()), N, DESC_Z); +} + +template +void StageZeroGenerator::GenerateDesignMatrixExact(double *matrix, int m, int n, int m0, int n0, double *localtheta) { + // localtheta layout: [theta, T, M, forcing...] + const double theta = localtheta[0]; + const int T_val = static_cast(localtheta[1]); + const int M_val = static_cast(localtheta[2]); + double *forcing = &localtheta[3]; + + int i0 = m0; + int j0_start = n0; + double i_x = static_cast(i0) + 1.0; + + // Precompute the AR-term recursively: r[0] = 0; r[n+1] = theta*r[n] + (1-theta)*forcing[n] + std::vector ar_acc(static_cast(mArgs.mNoYears) + 1, 0.0); + for (int n = 0; n < mArgs.mNoYears; ++n) { + ar_acc[static_cast(n + 1)] = theta * ar_acc[static_cast(n)] + (1.0 - theta) * forcing[n]; + } + + for (int i = 0; i < m; i++) { + int j0 = j0_start; + for (int j = 0; j < n; j++) { + const int idx = i + j * m; // column-major + if (j0 == 0) { + // First column: constant 1 + matrix[idx] = 1.0; + } else if (j0 == 1) { + // Second column: forcing[(i0/T) + OFFSET] + const int ty = (i0 / T_val); + const int f_idx = ty + kForcingOffset; + matrix[idx] = forcing[std::min(f_idx, mArgs.mNoYears - 1)]; + } else if (j0 == 2) { + // Third column: use recursive accumulation r[n] + const int ty = (i0 / T_val); + const int n_idx = std::min(ty + kForcingOffset, mArgs.mNoYears); + matrix[idx] = ar_acc[static_cast(n_idx)]; + } else { + // Remaining 2*M columns: alternate sin/cos per C core (j even -> sin, j odd -> cos) + // frequency index = floor((j0-3)/2) + 1 in [1..M] + const int freq = static_cast(std::floor((j0 - 3.0) / 2.0)) + 1; + const double angle = 2.0 * M_PI * (i_x) * static_cast(freq) / static_cast(T_val); + if ((j % 2) == 0) { + matrix[idx] = std::sin(angle); + } else { + matrix[idx] = std::cos(angle); + } + } + j0++; + } + i0++; + i_x += 1.0; + } +} + + +template +double StageZeroGenerator::MLEAlgorithm(const std::vector &aThetaVec, + std::vector &aGrad, void *apObj) { + + // Get descriptors + auto *Zobs = static_cast(mArgs.mpDescZ); + auto *X = static_cast(mArgs.mpX); + auto *XtX = static_cast(mArgs.mpXtX); + auto *part1 = static_cast(mArgs.mpDescPart1); + auto *part2 = static_cast(mArgs.mpDescPart2); + auto *part2_vector = static_cast(mArgs.mpPart2Vector); + int N = X->m; + double value = 0.0; + const bool is_final_call = (apObj == nullptr); // final call after optimization + + // Null pointer checks + if (!X) { throw std::runtime_error("X descriptor is null"); } + if (!Zobs) { throw std::runtime_error("Zobs descriptor is null"); } + + + + // Compose local parameter vector + double* localtheta = (double *) malloc((3+mArgs.mNoYears) * sizeof(double)); + localtheta[0] = aThetaVec[0]; // theta[0] + localtheta[1] = mArgs.mT; // Direct access: 8760 + localtheta[2] = mArgs.mM; // Direct access: 10 + + for(int ii=0; iim; + int Ncols = X->n; + double *x_lap = (double*)calloc((size_t)Nrows * (size_t)Ncols, sizeof(double)); + if (!x_lap) { throw std::runtime_error("Allocation failed for design matrix buffer"); } + this->GenerateDesignMatrixExact(x_lap, Nrows, Ncols, 0, 0, localtheta); + CHAMELEON_Lapack_to_Tile((void*)x_lap, Nrows, X); + free(x_lap); + } + + // Ensure workspaces are clean before computations + CHAMELEON_dlaset_Tile(ChamUpperLower, 0.0, 0.0, part1); + CHAMELEON_dlaset_Tile(ChamUpperLower, 0.0, 0.0, part2); + CHAMELEON_dlaset_Tile(ChamUpperLower, 0.0, 0.0, part2_vector); + CHAMELEON_dlaset_Tile(ChamUpperLower, 0.0, 0.0, XtX); + + // Step 2: part1 = Z^T * Z + CHAMELEON_dgemm_Tile(ChamTrans, ChamNoTrans, 1.0, Zobs, Zobs, 0.0, part1); + + // Step 3: part2_vector = X^T * Z + CHAMELEON_dgemm_Tile(ChamTrans, ChamNoTrans, 1.0, X, Zobs, 0.0, part2_vector); + + // Step 4: XtX = X^T * X + CHAMELEON_dgemm_Tile(ChamTrans, ChamNoTrans, 1.0, X, X, 0.0, XtX); + + // Step 5: Cholesky decomposition of XtX + { + int info = CHAMELEON_dpotrf_Tile(ChamLower, XtX); + if(info != 0) { + if (apObj) { free(localtheta); return -1e18; } + else { + fprintf(stderr, "[StageZero] ERROR: Cholesky(X^T X) failed in final pass. Aborting.\n"); + free(localtheta); + exit(1); + } + } + } + + // Step 6: First triangular solve: y = L^{-1} (X^T Z) + CHAMELEON_dtrsm_Tile(ChamLeft, ChamLower, ChamNoTrans, ChamNonUnit, 1.0, XtX, part2_vector); + // For the final pass compute beta = L^{-T} y before forming the trend + if (!apObj) { + CHAMELEON_dtrsm_Tile(ChamLeft, ChamLower, ChamTrans, ChamNonUnit, 1.0, XtX, part2_vector); + } + + // If this is an optimization callback, compute and return the C objective using y^T y + if (apObj) { + double part1_val_cb = 0.0; + if (part1 && part1->mat) part1_val_cb = static_cast(part1->mat)[0]; + // Compute part2 = y^T y + CHAMELEON_dlaset_Tile(ChamUpperLower, 0.0, 0.0, part2); + CHAMELEON_dgemm_Tile(ChamTrans, ChamNoTrans, 1.0, part2_vector, part2_vector, 0.0, part2); + double part2_val_cb = 0.0; + if (part2 && part2->mat) part2_val_cb = static_cast(part2->mat)[0]; + // If numerical issues cause part1 <= part2, penalize + if (part1_val_cb <= part2_val_cb) { + free(localtheta); + return -1e18; + } + value = (-1.0) * std::log(part1_val_cb - part2_val_cb); + mArgs.mIterCount++; + free(localtheta); + return value; + } + + // Optional: second triangular solve is only needed for final beta; handled below + + // Step 7: Final computation with CSV output + // Create estimated_mean_trend = X * beta (using part2_vector as beta) + auto *estimated_mean_trend = static_cast(mArgs.mpEstimatedMeanTrend); + CHAMELEON_dgemm_Tile(ChamNoTrans, ChamNoTrans, 1.0, X, part2_vector, 0.0, estimated_mean_trend); + + // Residuals = Z - trend (stored back into estimated_mean_trend) + CHAMELEON_dgeadd_Tile(ChamNoTrans, 1.0, Zobs, -1.0, estimated_mean_trend); + + // Compute sigma² = residuals^T * residuals + auto *sigma_desc = static_cast(mArgs.mpDescPart2); + CHAMELEON_dgemm_Tile(ChamTrans, ChamNoTrans, 1.0, estimated_mean_trend, estimated_mean_trend, 0.0, sigma_desc); + + // Get sigma² value and divide by N (like C version) + double sigma_squared_raw = static_cast(sigma_desc->mat)[0]; + double sigma_squared = sigma_squared_raw / N; + + // Pull residuals to LAPACK layout for correct order and normalization + double *emt_lap = (double*)calloc((size_t)N, sizeof(double)); + if (!emt_lap) { throw std::runtime_error("Allocation failed for emt_lap"); } + CHAMELEON_Tile_to_Lapack(estimated_mean_trend, emt_lap, N); + + // Standardize residuals: residuals = residuals / sqrt(sigma²) + double sqrt_sigma = std::sqrt(sigma_squared); + for (int i = 0; i < N; ++i) { + emt_lap[i] /= sqrt_sigma; + } + + // Set objective value (maximize -sigma^2 -> minimize sigma^2) + value = -sigma_squared; + + // Static global arrays for multi-location storage (like C version) + static std::vector> Z_new(mArgs.mNumLocs, std::vector(N)); + static std::vector> params(mArgs.mNumLocs, std::vector(3 + 2*mArgs.mM + 2)); + + // Store normalized residuals for this location + int location_index = mArgs.mCurrentLocation; // Current location being processed (0-based) + for (int i = 0; i < N; ++i) { + Z_new[location_index][i] = emt_lap[i]; + } + free(emt_lap); + + // Pull beta to LAPACK layout and store parameters + int vlen = part2_vector->m; + double *p2v_lap = (double*)calloc((size_t)vlen, sizeof(double)); + if (!p2v_lap) { throw std::runtime_error("Allocation failed for p2v_lap"); } + CHAMELEON_Tile_to_Lapack(part2_vector, p2v_lap, vlen); + params[location_index][0] = aThetaVec[0]; // optimized theta + params[location_index][1] = sigma_squared; // sigma² + for(int i = 0; i < 3 + 2*mArgs.mM; i++) { + params[location_index][i+2] = p2v_lap[i]; + } + free(p2v_lap); + + // Print debug info matching C version exactly + // remove noisy debug prints + + // Write CSV files only on the final call and when processing the last location + if (is_final_call && (location_index == mArgs.mNumLocs - 1)) { + std::string results_path; + try { + results_path = mArgs.mConfigs->GetResultsPath(); + } catch (...) { + results_path.clear(); + } + + if (results_path.empty()) { + fprintf(stderr, "[StageZero] ERROR: ResultsPath is required. Please set --resultspath.\n"); + throw std::runtime_error("ResultsPath is required. Please set --resultspath."); + } + + if (results_path.back() != '/') results_path += "/"; + + // Create results directory if it doesn't exist + try { + if (!std::filesystem::exists(results_path)) { + fprintf(stderr, "[StageZero] Creating results directory: %s\n", results_path.c_str()); + std::filesystem::create_directories(results_path); + } + } catch (const std::exception &e) { + fprintf(stderr, "[StageZero] ERROR: Failed to create output directory: %s (%s)\n", results_path.c_str(), e.what()); + throw std::runtime_error("Failed to create output directory: " + std::string(e.what())); + } + + fprintf(stderr, "[StageZero] Writing outputs to: %s\n", results_path.c_str()); + + // Write Z files for each time slot (replace mode instead of append) + #pragma omp parallel for + for (int time_slot = 0; time_slot < N; time_slot++) { + char file_path[256]; + std::snprintf(file_path, sizeof(file_path), "%sz_%d.csv", results_path.c_str(), time_slot); + + // Retry loop for file locking with replace mode + while (true) { + FILE *fp = std::fopen(file_path, "w"); // Replace mode instead of append + if (!fp) { + fprintf(stderr, "[StageZero] WARNING: Failed to open file %s, retrying...\n", file_path); + std::this_thread::sleep_for(std::chrono::seconds(1)); + continue; + } + + // Write all locations for this time slot + for (int loc = 0; loc < mArgs.mNumLocs; ++loc) { + std::fprintf(fp, "%.14f\n", Z_new[loc][time_slot]); + } + std::fclose(fp); + break; + } + } + + // Write params file (replace mode) + char params_file_path[256]; + std::snprintf(params_file_path, sizeof(params_file_path), "%sparams.csv", results_path.c_str()); + FILE* fp = std::fopen(params_file_path, "w"); // Replace mode + if(fp == NULL){ + fprintf(stderr, "[StageZero] ERROR: Failed to create params file: %s\n", params_file_path); + exit(1); + } + + for(int k = 0; k < mArgs.mNumLocs; k++) { + for(int i = 0; i < 3 + 2*mArgs.mM + 2; i++) { + fprintf(fp, "%.14f ", params[k][i]); + } + fprintf(fp, "\n"); + } + fclose(fp); + + fprintf(stderr, "[StageZero] Successfully wrote %d Z files and params.csv to %s\n", N, results_path.c_str()); + } + + mArgs.mIterCount++; + + } catch (const std::exception& e) { + free(localtheta); + throw; + } + free(localtheta); + return value; +} + +template +double StageZeroGenerator::StageZeroObjectiveCallback(unsigned aN, const double *aTheta, double *aGrad, void *aData) { + auto *generator = static_cast *>(aData); + std::vector theta_vec(aTheta, aTheta + aN); + std::vector grad_vec(aN); + double result = generator->MLEAlgorithm(theta_vec, grad_vec, aData); + return result; +} + +template +bool StageZeroGenerator::IsLeapYear(const int &aYear) { + if (aYear % 400 == 0) { + return true; + } else if (aYear % 100 == 0) { + return false; + } else if (aYear % 4 == 0) { + return true; + } else { + return false; + } +} + +template +double * StageZeroGenerator::ReadObsFile(char *aFileName, const int &aNumLoc) { + FILE *fp; + char *line = NULL; + size_t len = 0; + ssize_t read; + int count = 0; + double *z_vec = new double[aNumLoc]; + + double start_time = MPI_Wtime(); + fp = fopen(aFileName, "r"); + if (fp == NULL) { + double end_time = MPI_Wtime(); + fprintf(stderr, "[StageZero] FAILED to open file: %s (%.3f sec)\n", aFileName, end_time - start_time); + throw std::runtime_error("readObsFile:cannot open observations file: " + std::string(aFileName)); + } + double end_time = MPI_Wtime(); + fprintf(stderr, "[StageZero] SUCCESS opening file: %s (%.3f sec)\n", aFileName, end_time - start_time); + + while ((read = getline(&line, &len, fp)) != -1 && count < aNumLoc) { + z_vec[count++] = atof(line); + } + + fclose(fp); + if (line) free(line); + return z_vec; +} + +template StageZeroGenerator *StageZeroGenerator::mpInstance = nullptr; + +// Explicit instantiation of StageZeroGenerator for supported types +template class StageZeroGenerator; +template class StageZeroGenerator; diff --git a/src/data-generators/concrete/StageZeroGeneratorParsec.cpp b/src/data-generators/concrete/StageZeroGeneratorParsec.cpp new file mode 100644 index 00000000..2b948b8f --- /dev/null +++ b/src/data-generators/concrete/StageZeroGeneratorParsec.cpp @@ -0,0 +1,906 @@ +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file StageZeroGeneratorParsec.cpp + * @brief Implementation of the StageZeroGeneratorParsec class using PaRSEC/DPLASMA + * @version 2.0.0 +**/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +#define START_TIMING(x) double _start_##x = MPI_Wtime(); +#define STOP_TIMING(x) x = MPI_Wtime() - _start_##x; + +namespace { +constexpr int kForcingOffset = 238; +} + +using namespace exageostat::generators::stagezero; +using namespace exageostat::common; +using namespace exageostat::configurations; +using namespace exageostat::results; + +template +StageZeroGeneratorParsec *StageZeroGeneratorParsec::GetInstance() { + if (mpInstance == nullptr) { + // Ensure MPI is initialized before creating the instance + // int initialized; + // MPI_Initialized(&initialized); + // if (!initialized) { + // int argc = 0; + // char **argv = nullptr; + // MPI_Init(&argc, &argv); + // } + mpInstance = new StageZeroGeneratorParsec(); + } + return mpInstance; +} + +template +std::unique_ptr> +StageZeroGeneratorParsec::CreateData(exageostat::configurations::Configurations &aConfigurations, + exageostat::kernels::Kernel &aKernel) { + this->Runner(aConfigurations); + return std::move(this->mData); +} + +template +void StageZeroGeneratorParsec::ReleaseInstance() { + if (mpInstance != nullptr) { + delete mpInstance; + mpInstance = nullptr; + } +} + +template +void StageZeroGeneratorParsec::Runner(exageostat::configurations::Configurations &aConfigurations) { + int rank, nprocs; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + mArgs.mConfigs = &aConfigurations; + this->ConfigureGenerator(); + this->Allocate(); + this->ReadForcingData(); + this->ReadNetCDFFiles(); + + // After data gathering, only rank 0 continues with optimization + // Other ranks can exit since they're no longer needed + if (rank != 0) { + fprintf(stderr, "[StageZero PaRSEC] Rank %d: Data gathering complete, exiting (only rank 0 performs optimization)\n", rank); + this->CleanUp(); + return; + } + + fprintf(stderr, "[StageZero PaRSEC] Rank 0: Starting optimization phase\n"); + this->RunMeanTrend(); + + this->CleanUp(); +} + +template +void StageZeroGeneratorParsec::ConfigureGenerator() { + + // Values for the mean trend removal + mArgs.mM = 10; + mArgs.mT = 365*24; + mArgs.mNoYears = 751; + + // Only support trend_model for this workflow + if (mArgs.mConfigs->GetKernelName() == "TrendModel" || mArgs.mConfigs->GetKernelName() == "trend_model") { + mArgs.mNumParams = 1; + } else { + throw std::invalid_argument("Unsupported kernel for Stage Zero: only 'TrendModel' is supported"); + } + + // Derive observation years and N from configuration + int start_year = mArgs.mConfigs->GetStartYear(); + int end_year = mArgs.mConfigs->GetEndYear(); + if (end_year < start_year) { + throw std::runtime_error("EndYear must be >= StartYear"); + } + int obs_years = (end_year - start_year + 1); + mArgs.mN = static_cast(mArgs.mT) * static_cast(obs_years); + + // Number of locations + try { mArgs.mNumLocs = mArgs.mConfigs->GetNumLocs(); } + catch (...) { mArgs.mNumLocs = mArgs.mConfigs->GetProblemSize(); } +} + +template +void StageZeroGeneratorParsec::ReadNetCDFFiles() { + + int ncid, retval; + MPI_Offset lat_len, lon_len, time_len; + int lon_varid, lat_varid, time_varid, t2m_varid; + int v = 365; + int rank, nprocs; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + int start_year = mArgs.mConfigs->GetStartYear(); + int end_year = mArgs.mConfigs->GetEndYear(); + + auto openFileNCmpi = [&](char *filename) { + int id, ret; + if ((ret = ncmpi_open(MPI_COMM_WORLD, filename, NC_NOWRITE, MPI_INFO_NULL, &id))) { + throw std::runtime_error("Error opening NetCDF file: " + std::string(ncmpi_strerror(ret))); + } + return id; + }; + + auto closeFileNCmpi = [&](int id) { + int ret; + if ((ret = ncmpi_close(id))) { + throw std::runtime_error("Error closing NetCDF file: " + std::string(ncmpi_strerror(ret))); + } + }; + + + char path[256]; + snprintf(path, sizeof(path), "%sdata_%d.nc", mArgs.mConfigs->GetDataPath().c_str(), start_year); + + ncid = openFileNCmpi(path); + + // Dimension IDs and lengths with error checks + if ((retval = ncmpi_inq_dimid(ncid, "longitude", &lon_varid))) + std::cerr << "Error: " << ncmpi_strerror(retval) << std::endl; + if ((retval = ncmpi_inq_dimlen(ncid, lon_varid, &lon_len))) + std::cerr << "Error: " << ncmpi_strerror(retval) << std::endl; + + if ((retval = ncmpi_inq_dimid(ncid, "latitude", &lat_varid))) + std::cerr << "Error: " << ncmpi_strerror(retval) << std::endl; + if ((retval = ncmpi_inq_dimlen(ncid, lat_varid, &lat_len))) + std::cerr << "Error: " << ncmpi_strerror(retval) << std::endl; + + if ((retval = ncmpi_inq_dimid(ncid, "time", &time_varid))) + std::cerr << "Error: " << ncmpi_strerror(retval) << std::endl; + + if ((retval = ncmpi_inq_varid(ncid, "t2m", &t2m_varid))) + std::cerr << "Error: " << ncmpi_strerror(retval) << std::endl; + + closeFileNCmpi(ncid); + + int min_loc = mArgs.mConfigs->GetLowTileSize(); + + for (int y = start_year; y <= end_year; y++) { + char path2[256]; + snprintf(path2, sizeof(path2), "%sdata_%d.nc", mArgs.mConfigs->GetDataPath().c_str(), y); + + ncid = openFileNCmpi(path2); + + double scaling_var = 1.0, offset_var = 0.0; + if ((retval = ncmpi_get_att_double(ncid, t2m_varid, "scale_factor", &scaling_var))) { + std::cerr << "Warning: missing scale_factor: " << ncmpi_strerror(retval) << std::endl; + } + if ((retval = ncmpi_get_att_double(ncid, t2m_varid, "add_offset", &offset_var))) { + std::cerr << "Warning: missing add_offset: " << ncmpi_strerror(retval) << std::endl; + } + + int *t2m = nullptr; + int *t2m_local = nullptr; + + if (IsLeapYear(y)) time_len = v * 24 + 24; + else time_len = v * 24; + + size_t total_elems = static_cast(time_len) * static_cast(mArgs.mNumLocs); + size_t local_elems = static_cast(time_len / nprocs) * static_cast(mArgs.mNumLocs); + t2m = (int *) malloc(total_elems * sizeof(int)); + t2m_local = (int *) malloc(local_elems * sizeof(int)); + + MPI_Offset index[] = {(MPI_Offset) (time_len / nprocs) * rank, + (MPI_Offset) min_loc, 0}; + MPI_Offset count[] = {(MPI_Offset) (time_len / nprocs), 1, (MPI_Offset) mArgs.mNumLocs}; + + double x_read = 0; + START_TIMING(x_read) + ncmpi_get_vara_int_all(ncid, t2m_varid, index, count, t2m_local); + STOP_TIMING(x_read) + + double gather_start = MPI_Wtime(); + MPI_Allgather(t2m_local, (int) local_elems, MPI_INT, t2m, + (int) local_elems, MPI_INT, MPI_COMM_WORLD); + double gather_time = MPI_Wtime() - gather_start; + + fprintf(stderr, "[StageZero] Year %d: Read time: %.3f sec, Gather time: %.3f sec\n", y, x_read, gather_time); + + for (int lu = 0; lu < mArgs.mNumLocs; lu++) { + int r = 0; + for (size_t k = lu; k < (size_t) time_len * mArgs.mNumLocs; k += mArgs.mNumLocs) { + if (!(IsLeapYear(y) && (r >= 1416 && r <= 1439))) { + mArgs.mT2mHourlyPerYear[lu][mArgs.mT2mHourlyPerYearCount[lu]++] = + (static_cast(t2m[k]) * scaling_var) + offset_var - 273.15; + } + r++; + } + } + closeFileNCmpi(ncid); + free(t2m); + free(t2m_local); + fprintf(stderr, "[StageZero] Completed processing year %d\n\n", y); + } + fprintf(stderr, "[StageZero] NetCDF loading completed for years %d-%d (%d files)\n\n", + start_year, end_year, end_year - start_year + 1); +} + +template +void StageZeroGeneratorParsec::ReadForcingData() { + std::string forcing_path = mArgs.mConfigs->GetForcingDataPath(); + if (!forcing_path.empty() && forcing_path.back() == '/') forcing_path.pop_back(); + + fprintf(stderr, "[StageZero] Loading forcing data from: %s\n", forcing_path.c_str()); + mArgs.mForcing = ReadObsFile((char *) mArgs.mConfigs->GetForcingDataPath().c_str(), mArgs.mNoYears); + fprintf(stderr, "[StageZero] Successfully loaded forcing data (%d years)\n\n", mArgs.mNoYears); +} + +template +void StageZeroGeneratorParsec::Allocate() { + + mArgs.mStartingTheta = new double[mArgs.mNumParams]; + mArgs.mTargetTheta = new double[mArgs.mNumParams]; + mArgs.mInitialTheta = new double[mArgs.mNumParams]; + mArgs.mLb = new double[mArgs.mNumParams]; + mArgs.mUp = new double[mArgs.mNumParams]; + + // Copy configuration values to allocated arrays + auto lb_vec = mArgs.mConfigs->GetLowerBounds(); + auto up_vec = mArgs.mConfigs->GetUpperBounds(); + auto starting_theta_vec = mArgs.mConfigs->GetStartingTheta(); + + for(size_t i = 0; i < mArgs.mNumParams; ++i) { + mArgs.mLb[i] = lb_vec[i]; + mArgs.mUp[i] = up_vec[i]; + mArgs.mStartingTheta[i] = starting_theta_vec[i]; + } + + mArgs.mT2mHourlyPerYear = (double **) malloc(mArgs.mNumLocs * sizeof(double *)); + mArgs.mT2mHourlyPerYearCount = (int *) calloc(mArgs.mNumLocs, sizeof(int)); + for (int k = 0; k < mArgs.mNumLocs; k++) { + mArgs.mT2mHourlyPerYear[k] = (double *) malloc(mArgs.mN * sizeof(double)); + } +} + +template +void StageZeroGeneratorParsec::RunMeanTrend() { + const int no_locs = mArgs.mNumLocs; + + // Print configuration summary + const std::string kernel = mArgs.mConfigs->GetKernelName(); + const std::string data_path = mArgs.mConfigs->GetDataPath(); + const std::string forcing_path = mArgs.mConfigs->GetForcingDataPath(); + const std::string results_path = mArgs.mConfigs->GetResultsPath(); + const int start_year = mArgs.mConfigs->GetStartYear(); + const int end_year = mArgs.mConfigs->GetEndYear(); + const int years = mArgs.mNoYears; + const int period_hours = mArgs.mT; + const int M = mArgs.mM; + const size_t N = mArgs.mN; + const int num_locs = mArgs.mNumLocs; + const double tol = mArgs.mConfigs->GetTolerance(); + const int maxeval = mArgs.mConfigs->GetMaxMleIterations(); + const double st = mArgs.mStartingTheta[0]; + const double lb0 = mArgs.mLb[0]; + const double ub0 = mArgs.mUp[0]; + const int dts = mArgs.mConfigs->GetDenseTileSize(); + const int lts_val = mArgs.mConfigs->GetLowTileSize(); + + fprintf(stderr, "----- StageZero Arguments -----\n"); + fprintf(stderr, "kernel: %s\n", kernel.c_str()); + fprintf(stderr, "data_path: %s\n", data_path.c_str()); + fprintf(stderr, "forcing_data_path: %s\n", forcing_path.c_str()); + fprintf(stderr, "results_path: %s\n", results_path.c_str()); + fprintf(stderr, "start_year: %d, end_year: %d, years: %d\n", start_year, end_year, end_year-start_year+1); + fprintf(stderr, "num_locs: %d\n", num_locs); + fprintf(stderr, "period_hours(T): %d, harmonics(M): %d\n", period_hours, M); + fprintf(stderr, "N (observations): %zu\n", N); + fprintf(stderr, "tolerance(ftol_abs): %.10e, maxeval: %d\n", tol, maxeval); + fprintf(stderr, "-------------------------------\n"); + + // Key bounds info + fprintf(stderr, "Starting theta[0]: %f\n", mArgs.mStartingTheta[0]); + fprintf(stderr, "Lower bound: %f, Upper bound: %f\n", mArgs.mLb[0], mArgs.mUp[0]); + + this->SetupMLEComponents(); + + nlopt_opt opt = nlopt_create(NLOPT_LN_BOBYQA, mArgs.mNumParams); + nlopt_set_lower_bounds(opt, mArgs.mLb); + nlopt_set_upper_bounds(opt, mArgs.mUp); + nlopt_set_max_objective(opt, &StageZeroGeneratorParsec::StageZeroObjectiveCallback, (void *)this); + + nlopt_set_ftol_abs(opt, mArgs.mConfigs->GetTolerance()); + nlopt_set_maxeval(opt, mArgs.mConfigs->GetMaxMleIterations()); + + std::vector theta(mArgs.mNumParams, 0.0); + + for (int l = 0; l < no_locs; ++l) { + mArgs.mIterCount = 0; + mArgs.mCurrentLocation = l; + + this->ConvertT2MToZForLocation(l); + + fprintf(stderr, "[StageZero PaRSEC] Starting NLopt for location %d/%d\n", l + 1, no_locs); + theta[0] = mArgs.mStartingTheta[0]; + double opt_f = 0.0; + + nlopt_set_max_objective(opt, &StageZeroGeneratorParsec::StageZeroObjectiveCallback, (void *)this); + nlopt_result nlres = nlopt_optimize(opt, theta.data(), &opt_f); + fprintf(stderr, "[StageZero PaRSEC] NLopt finished (res=%d), theta=%0.10f, f=%0.10f\n", (int) nlres, theta[0], opt_f); + + { + std::vector grad; + this->MLEAlgorithm(theta, grad, nullptr); + } + } + + nlopt_destroy(opt); +} + +template +void StageZeroGeneratorParsec::CleanUp() { + + delete[] mArgs.mStartingTheta; + delete[] mArgs.mTargetTheta; + delete[] mArgs.mInitialTheta; + delete[] mArgs.mLb; + delete[] mArgs.mUp; + delete[] mArgs.mForcing; + + for (int i = 0; i < mArgs.mNumLocs; ++i) { + free(mArgs.mT2mHourlyPerYear[i]); + } + free(mArgs.mT2mHourlyPerYear); + free(mArgs.mT2mHourlyPerYearCount); + + // Clean up PaRSEC descriptors + if (mArgs.mpDescZ) { + parsec_data_free(mArgs.mpDescZ->mat); + free(mArgs.mpDescZ); + mArgs.mpDescZ = nullptr; + } + if (mArgs.mpX) { + parsec_data_free(mArgs.mpX->mat); + free(mArgs.mpX); + mArgs.mpX = nullptr; + } + if (mArgs.mpXtX) { + parsec_data_free(mArgs.mpXtX->mat); + free(mArgs.mpXtX); + mArgs.mpXtX = nullptr; + } + if (mArgs.mpDescPart1) { + parsec_data_free(mArgs.mpDescPart1->mat); + free(mArgs.mpDescPart1); + mArgs.mpDescPart1 = nullptr; + } + if (mArgs.mpDescPart2) { + parsec_data_free(mArgs.mpDescPart2->mat); + free(mArgs.mpDescPart2); + mArgs.mpDescPart2 = nullptr; + } + if (mArgs.mpPart2Vector) { + parsec_data_free(mArgs.mpPart2Vector->mat); + free(mArgs.mpPart2Vector); + mArgs.mpPart2Vector = nullptr; + } + if (mArgs.mpEstimatedMeanTrend) { + parsec_data_free(mArgs.mpEstimatedMeanTrend->mat); + free(mArgs.mpEstimatedMeanTrend); + mArgs.mpEstimatedMeanTrend = nullptr; + } +} + +template +void StageZeroGeneratorParsec::SetupMLEComponents() { + + int N = mArgs.mN; + int nparams = 3 + 2*mArgs.mM; + int dts = mArgs.mConfigs->GetDenseTileSize(); + int p_grid = 1, q_grid = 1; + + fprintf(stderr, "Creating PaRSEC descriptors: N=%d, nparams=%d, dts=%d\n", N, nparams, dts); + + mArgs.mIterCount = 0; + + // Get PaRSEC context from hardware layer + try { + mArgs.mpParsecContext = static_cast(ExaGeoStatHardware::GetParsecContext()); + if (mArgs.mpParsecContext) { + fprintf(stderr, "PaRSEC context obtained successfully\n"); + } else { + fprintf(stderr, "Warning: PaRSEC context is null, using fallback computations\n"); + } + } catch (const std::exception& e) { + fprintf(stderr, "Warning: Could not get PaRSEC context: %s, using fallback computations\n", e.what()); + mArgs.mpParsecContext = nullptr; + } + + // Create PaRSEC block-cyclic descriptors using proper initialization + int rank = 0; // Single process for now + int nodes = 1; // Single node for now + + // Use same tile size as CHAMELEON version for exact equivalence + int tile_size = dts; // Use full dense tile size like CHAMELEON + + // Z vector (observations) - N x 1 + mArgs.mpDescZ = (parsec_matrix_block_cyclic_t*)malloc(sizeof(parsec_matrix_block_cyclic_t)); + parsec_matrix_block_cyclic_init(mArgs.mpDescZ, PARSEC_MATRIX_DOUBLE, PARSEC_MATRIX_TILE, rank, + tile_size, tile_size, N, 1, 0, 0, N, 1, 1, nodes, 1, 1, 0, 0); + mArgs.mpDescZ->mat = parsec_data_allocate((size_t)mArgs.mpDescZ->super.nb_local_tiles * + (size_t)mArgs.mpDescZ->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(mArgs.mpDescZ->super.mtype)); + + // X matrix (design matrix) - N x nparams + mArgs.mpX = (parsec_matrix_block_cyclic_t*)malloc(sizeof(parsec_matrix_block_cyclic_t)); + parsec_matrix_block_cyclic_init(mArgs.mpX, PARSEC_MATRIX_DOUBLE, PARSEC_MATRIX_TILE, rank, + tile_size, tile_size, N, nparams, 0, 0, N, nparams, 1, nodes, 1, 1, 0, 0); + mArgs.mpX->mat = parsec_data_allocate((size_t)mArgs.mpX->super.nb_local_tiles * + (size_t)mArgs.mpX->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(mArgs.mpX->super.mtype)); + + // XtX matrix - nparams x nparams + mArgs.mpXtX = (parsec_matrix_block_cyclic_t*)malloc(sizeof(parsec_matrix_block_cyclic_t)); + parsec_matrix_block_cyclic_init(mArgs.mpXtX, PARSEC_MATRIX_DOUBLE, PARSEC_MATRIX_TILE, rank, + tile_size, tile_size, nparams, nparams, 0, 0, nparams, nparams, 1, nodes, 1, 1, 0, 0); + mArgs.mpXtX->mat = parsec_data_allocate((size_t)mArgs.mpXtX->super.nb_local_tiles * + (size_t)mArgs.mpXtX->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(mArgs.mpXtX->super.mtype)); + + // part1 scalar - 1 x 1 + mArgs.mpDescPart1 = (parsec_matrix_block_cyclic_t*)malloc(sizeof(parsec_matrix_block_cyclic_t)); + parsec_matrix_block_cyclic_init(mArgs.mpDescPart1, PARSEC_MATRIX_DOUBLE, PARSEC_MATRIX_TILE, rank, + tile_size, tile_size, 1, 1, 0, 0, 1, 1, 1, nodes, 1, 1, 0, 0); + mArgs.mpDescPart1->mat = parsec_data_allocate((size_t)mArgs.mpDescPart1->super.nb_local_tiles * + (size_t)mArgs.mpDescPart1->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(mArgs.mpDescPart1->super.mtype)); + + // part2 scalar - 1 x 1 + mArgs.mpDescPart2 = (parsec_matrix_block_cyclic_t*)malloc(sizeof(parsec_matrix_block_cyclic_t)); + parsec_matrix_block_cyclic_init(mArgs.mpDescPart2, PARSEC_MATRIX_DOUBLE, PARSEC_MATRIX_TILE, rank, + tile_size, tile_size, 1, 1, 0, 0, 1, 1, 1, nodes, 1, 1, 0, 0); + mArgs.mpDescPart2->mat = parsec_data_allocate((size_t)mArgs.mpDescPart2->super.nb_local_tiles * + (size_t)mArgs.mpDescPart2->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(mArgs.mpDescPart2->super.mtype)); + + // part2_vector - nparams x 1 + mArgs.mpPart2Vector = (parsec_matrix_block_cyclic_t*)malloc(sizeof(parsec_matrix_block_cyclic_t)); + parsec_matrix_block_cyclic_init(mArgs.mpPart2Vector, PARSEC_MATRIX_DOUBLE, PARSEC_MATRIX_TILE, rank, + tile_size, tile_size, nparams, 1, 0, 0, nparams, 1, 1, nodes, 1, 1, 0, 0); + mArgs.mpPart2Vector->mat = parsec_data_allocate((size_t)mArgs.mpPart2Vector->super.nb_local_tiles * + (size_t)mArgs.mpPart2Vector->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(mArgs.mpPart2Vector->super.mtype)); + + // estimated_mean_trend vector - N x 1 + mArgs.mpEstimatedMeanTrend = (parsec_matrix_block_cyclic_t*)malloc(sizeof(parsec_matrix_block_cyclic_t)); + parsec_matrix_block_cyclic_init(mArgs.mpEstimatedMeanTrend, PARSEC_MATRIX_DOUBLE, PARSEC_MATRIX_TILE, rank, + tile_size, tile_size, N, 1, 0, 0, N, 1, 1, nodes, 1, 1, 0, 0); + mArgs.mpEstimatedMeanTrend->mat = parsec_data_allocate((size_t)mArgs.mpEstimatedMeanTrend->super.nb_local_tiles * + (size_t)mArgs.mpEstimatedMeanTrend->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(mArgs.mpEstimatedMeanTrend->super.mtype)); + + fprintf(stderr, "All PaRSEC descriptors created successfully\n"); +} + +template +void StageZeroGeneratorParsec::ConvertT2MToZForLocation(int location_index) { + + parsec_matrix_block_cyclic_t *DESC_Z = mArgs.mpDescZ; + int N = mArgs.mN; + + if (location_index < 0 || location_index >= mArgs.mNumLocs) { + throw std::runtime_error("ConvertT2MToZForLocation: invalid location index"); + } + + // Copy data directly to PaRSEC descriptor + double *z_data = (double*)DESC_Z->mat; + int count = std::min(N, mArgs.mT2mHourlyPerYearCount[location_index]); + int mb = DESC_Z->super.mb; + int nb = DESC_Z->super.nb; + int mt = (N + mb - 1) / mb; + int nt = (1 + nb - 1) / nb; + int bsiz = DESC_Z->super.bsiz; + for (int tj = 0; tj < nt; ++tj) { + for (int ti = 0; ti < mt; ++ti) { + double *tile_ptr = z_data + (tj * mt + ti) * bsiz; + int row_offset = ti * mb; + int col_offset = tj * nb; // vector has only one column + int rows = std::min(mb, N - row_offset); + int cols = std::min(nb, 1 - col_offset); + if (cols <= 0) continue; + for (int cj = 0; cj < cols; ++cj) { + for (int ri = 0; ri < rows; ++ri) { + int global_row = row_offset + ri; + double val = (global_row < count) ? mArgs.mT2mHourlyPerYear[location_index][global_row] : 0.0; + tile_ptr[ri + cj * mb] = val; + } + } + } + } +} + +template +void StageZeroGeneratorParsec::GenerateDesignMatrixExact(double *matrix, int m, int n, int m0, int n0, double *localtheta) { + // localtheta layout: [theta, T, M, forcing...] + const double theta = localtheta[0]; + const int T_val = static_cast(localtheta[1]); + const int M_val = static_cast(localtheta[2]); + double *forcing = &localtheta[3]; + + int i0 = m0; + int j0_start = n0; + double i_x = static_cast(i0) + 1.0; + + // Precompute the AR-term recursively: r[0] = 0; r[n+1] = theta*r[n] + (1-theta)*forcing[n] + std::vector ar_acc(static_cast(mArgs.mNoYears) + 1, 0.0); + for (int n = 0; n < mArgs.mNoYears; ++n) { + ar_acc[static_cast(n + 1)] = theta * ar_acc[static_cast(n)] + (1.0 - theta) * forcing[n]; + } + + for (int i = 0; i < m; i++) { + int j0 = j0_start; + for (int j = 0; j < n; j++) { + const int idx = i + j * m; // column-major + if (j0 == 0) { + // First column: constant 1 + matrix[idx] = 1.0; + } else if (j0 == 1) { + // Second column: forcing[(i0/T) + OFFSET] + const int ty = (i0 / T_val); + const int f_idx = ty + kForcingOffset; + matrix[idx] = forcing[std::min(f_idx, mArgs.mNoYears - 1)]; + } else if (j0 == 2) { + // Third column: use recursive accumulation r[n] + const int ty = (i0 / T_val); + const int n_idx = std::min(ty + kForcingOffset, mArgs.mNoYears); + matrix[idx] = ar_acc[static_cast(n_idx)]; + } else { + // Remaining 2*M columns: alternate sin/cos per C core (j even -> sin, j odd -> cos) + // frequency index = floor((j0-3)/2) + 1 in [1..M] + const int freq = static_cast(std::floor((j0 - 3.0) / 2.0)) + 1; + const double angle = 2.0 * M_PI * (i_x) * static_cast(freq) / static_cast(T_val); + if ((j % 2) == 0) { + matrix[idx] = std::sin(angle); + } else { + matrix[idx] = std::cos(angle); + } + } + j0++; + } + i0++; + i_x += 1.0; + } +} + +template +double StageZeroGeneratorParsec::MLEAlgorithm(const std::vector &aThetaVec, + std::vector &aGrad, void *apObj) { + + parsec_matrix_block_cyclic_t *Zobs = mArgs.mpDescZ; + parsec_matrix_block_cyclic_t *X = mArgs.mpX; + parsec_matrix_block_cyclic_t *XtX = mArgs.mpXtX; + parsec_matrix_block_cyclic_t *part1 = mArgs.mpDescPart1; + parsec_matrix_block_cyclic_t *part2 = mArgs.mpDescPart2; + parsec_matrix_block_cyclic_t *part2_vector = mArgs.mpPart2Vector; + int N = static_cast(mArgs.mN); + double value = 0.0; + const bool is_final_call = (apObj == nullptr); + + if (!X) { throw std::runtime_error("X descriptor is null"); } + if (!Zobs) { throw std::runtime_error("Zobs descriptor is null"); } + + double* localtheta = (double *) malloc((3+mArgs.mNoYears) * sizeof(double)); + localtheta[0] = aThetaVec[0]; + localtheta[1] = mArgs.mT; + localtheta[2] = mArgs.mM; + + for(int ii=0; ii(mArgs.mN); + int Ncols = 3 + 2*mArgs.mM; + + // Create LAPACK buffer like CHAMELEON version + double *x_lap = (double*)calloc((size_t)Nrows * (size_t)Ncols, sizeof(double)); + if (!x_lap) { throw std::runtime_error("Allocation failed for design matrix buffer"); } + + this->GenerateDesignMatrixExact(x_lap, Nrows, Ncols, 0, 0, localtheta); + + // Tile-aware copy from LAPACK (column-major) to PaRSEC tiled layout + double *x_data = (double*)X->mat; + int mb = X->super.mb; + int nb = X->super.nb; + int mt = (Nrows + mb - 1) / mb; + int nt = (Ncols + nb - 1) / nb; + int bsiz = X->super.bsiz; + for (int tj = 0; tj < nt; ++tj) { + for (int ti = 0; ti < mt; ++ti) { + double *tile_ptr = x_data + (tj * mt + ti) * bsiz; + int row_offset = ti * mb; + int col_offset = tj * nb; + int rows = std::min(mb, Nrows - row_offset); + int cols = std::min(nb, Ncols - col_offset); + for (int cj = 0; cj < cols; ++cj) { + for (int ri = 0; ri < rows; ++ri) { + int global_row = row_offset + ri; + int global_col = col_offset + cj; + tile_ptr[ri + cj * mb] = x_lap[global_row + global_col * Nrows]; + } + } + } + } + + free(x_lap); + } + + // Ensure workspaces are clean before computations (equivalent to CHAMELEON_dlaset_Tile) + dplasma_dlaset(mArgs.mpParsecContext, dplasmaUpperLower, 0.0, 0.0, (parsec_tiled_matrix_t*)part1); + dplasma_dlaset(mArgs.mpParsecContext, dplasmaUpperLower, 0.0, 0.0, (parsec_tiled_matrix_t*)part2); + dplasma_dlaset(mArgs.mpParsecContext, dplasmaUpperLower, 0.0, 0.0, (parsec_tiled_matrix_t*)part2_vector); + dplasma_dlaset(mArgs.mpParsecContext, dplasmaUpperLower, 0.0, 0.0, (parsec_tiled_matrix_t*)XtX); + + // Step 2: part1 = Z^T * Z (using DPLASMA) + dplasma_dgemm(mArgs.mpParsecContext, dplasmaTrans, dplasmaNoTrans, + 1.0, (parsec_tiled_matrix_t*)Zobs, (parsec_tiled_matrix_t*)Zobs, 0.0, (parsec_tiled_matrix_t*)part1); + + // Optional debug removed + + // Step 3: part2_vector = X^T * Z (using DPLASMA) + dplasma_dgemm(mArgs.mpParsecContext, dplasmaTrans, dplasmaNoTrans, + 1.0, (parsec_tiled_matrix_t*)X, (parsec_tiled_matrix_t*)Zobs, 0.0, (parsec_tiled_matrix_t*)part2_vector); + + // Optional debug removed + + // Step 4: XtX = X^T * X (using DPLASMA) + dplasma_dgemm(mArgs.mpParsecContext, dplasmaTrans, dplasmaNoTrans, + 1.0, (parsec_tiled_matrix_t*)X, (parsec_tiled_matrix_t*)X, 0.0, (parsec_tiled_matrix_t*)XtX); + + // Step 5: Cholesky decomposition (using DPLASMA) + int info = dplasma_dpotrf(mArgs.mpParsecContext, dplasmaLower, (parsec_tiled_matrix_t*)XtX); + + if(info != 0) { + if (apObj) { free(localtheta); return -1e18; } + else { + fprintf(stderr, "[StageZero PaRSEC] ERROR: Cholesky failed. Aborting.\n"); + free(localtheta); + exit(1); + } + } + + // Step 6: Triangular solve + dplasma_dtrsm(mArgs.mpParsecContext, dplasmaLeft, dplasmaLower, dplasmaNoTrans, dplasmaNonUnit, + 1.0, (parsec_tiled_matrix_t*)XtX, (parsec_tiled_matrix_t*)part2_vector); + + if (!apObj) { + dplasma_dtrsm(mArgs.mpParsecContext, dplasmaLeft, dplasmaLower, dplasmaTrans, dplasmaNonUnit, + 1.0, (parsec_tiled_matrix_t*)XtX, (parsec_tiled_matrix_t*)part2_vector); + } + + // If this is an optimization callback, compute and return the C objective using y^T y + if (apObj) { + double part1_val_cb = 0.0; + if (part1 && part1->mat) part1_val_cb = static_cast(part1->mat)[0]; + // Compute part2 = y^T y (equivalent to CHAMELEON_dgemm_Tile) + dplasma_dlaset(mArgs.mpParsecContext, dplasmaUpperLower, 0.0, 0.0, (parsec_tiled_matrix_t*)part2); + dplasma_dgemm(mArgs.mpParsecContext, dplasmaTrans, dplasmaNoTrans, 1.0, + (parsec_tiled_matrix_t*)part2_vector, (parsec_tiled_matrix_t*)part2_vector, 0.0, (parsec_tiled_matrix_t*)part2); + double part2_val_cb = 0.0; + if (part2 && part2->mat) part2_val_cb = static_cast(part2->mat)[0]; + + // If numerical issues cause part1 <= part2, penalize + if (part1_val_cb <= part2_val_cb) { + free(localtheta); + return -1e18; + } + value = (-1.0) * std::log(part1_val_cb - part2_val_cb); + + mArgs.mIterCount++; + free(localtheta); + return value; + } + + // Step 7: Final computation with CSV output + // Create estimated_mean_trend = X * beta (using part2_vector as beta) + parsec_matrix_block_cyclic_t *estimated_mean_trend = mArgs.mpEstimatedMeanTrend; + + dplasma_dgemm(mArgs.mpParsecContext, dplasmaNoTrans, dplasmaNoTrans, + 1.0, (parsec_tiled_matrix_t*)X, (parsec_tiled_matrix_t*)part2_vector, 0.0, (parsec_tiled_matrix_t*)estimated_mean_trend); + + // Residuals = Z - trend (stored back into estimated_mean_trend) - equivalent to CHAMELEON_dgeadd_Tile + dplasma_dgeadd(mArgs.mpParsecContext, dplasmaNoTrans, 1.0, (parsec_tiled_matrix_t*)Zobs, -1.0, (parsec_tiled_matrix_t*)estimated_mean_trend); + + // Compute sigma² = residuals^T * residuals (equivalent to CHAMELEON_dgemm_Tile) + dplasma_dgemm(mArgs.mpParsecContext, dplasmaTrans, dplasmaNoTrans, 1.0, + (parsec_tiled_matrix_t*)estimated_mean_trend, (parsec_tiled_matrix_t*)estimated_mean_trend, 0.0, (parsec_tiled_matrix_t*)part2); + + // Get sigma² value and divide by N (like CHAMELEON version) + double sigma_squared_raw = static_cast(part2->mat)[0]; + double sigma_squared = sigma_squared_raw / N; + + // Standardize residuals: residuals = residuals / sqrt(sigma²) + double sqrt_sigma = std::sqrt(sigma_squared); + double *trend_data = (double*)estimated_mean_trend->mat; + for (int i = 0; i < N; ++i) { + trend_data[i] /= sqrt_sigma; + } + + // Set objective value (maximize -sigma^2 -> minimize sigma^2) + value = -sigma_squared; + + // Static global arrays for multi-location storage (like CHAMELEON version) + static std::vector> Z_new(mArgs.mNumLocs, std::vector(N)); + static std::vector> params(mArgs.mNumLocs, std::vector(3 + 2*mArgs.mM + 2)); + + // Store normalized residuals for this location + int location_index = mArgs.mCurrentLocation; // Current location being processed (0-based) + for (int i = 0; i < N; ++i) { + Z_new[location_index][i] = trend_data[i]; + } + + // Store parameters + params[location_index][0] = aThetaVec[0]; // optimized theta + params[location_index][1] = sigma_squared; // sigma² + double *part2_vector_data_local = (double*)part2_vector->mat; + for(int i = 0; i < 3 + 2*mArgs.mM; i++) { + params[location_index][i+2] = part2_vector_data_local[i]; + } + + // Write CSV files only on the final call and when processing the last location + if (is_final_call && (location_index == mArgs.mNumLocs - 1)) { + std::string results_path; + try { + results_path = mArgs.mConfigs->GetResultsPath(); + } catch (...) { + results_path.clear(); + } + + if (results_path.empty()) { + fprintf(stderr, "[StageZero PaRSEC] ERROR: ResultsPath is required. Please set --resultspath.\n"); + throw std::runtime_error("ResultsPath is required. Please set --resultspath."); + } + + if (results_path.back() != '/') results_path += "/"; + + // Create results directory if it doesn't exist + try { + if (!std::filesystem::exists(results_path)) { + fprintf(stderr, "[StageZero PaRSEC] Creating results directory: %s\n", results_path.c_str()); + std::filesystem::create_directories(results_path); + } + } catch (const std::exception &e) { + fprintf(stderr, "[StageZero PaRSEC] ERROR: Failed to create output directory: %s (%s)\n", results_path.c_str(), e.what()); + throw std::runtime_error("Failed to create output directory: " + std::string(e.what())); + } + + fprintf(stderr, "[StageZero PaRSEC] Writing outputs to: %s\n", results_path.c_str()); + + // Write Z files for each time slot (replace mode instead of append) + #pragma omp parallel for + for (int time_slot = 0; time_slot < N; time_slot++) { + char file_path[256]; + std::snprintf(file_path, sizeof(file_path), "%sz_%d.csv", results_path.c_str(), time_slot); + + // Retry loop for file locking with replace mode + while (true) { + FILE *fp = std::fopen(file_path, "w"); // Replace mode instead of append + if (!fp) { + fprintf(stderr, "[StageZero PaRSEC] WARNING: Failed to open file %s, retrying...\n", file_path); + std::this_thread::sleep_for(std::chrono::seconds(1)); + continue; + } + + // Write all locations for this time slot + for (int loc = 0; loc < mArgs.mNumLocs; ++loc) { + std::fprintf(fp, "%.14f\n", Z_new[loc][time_slot]); + } + std::fclose(fp); + break; + } + } + + // Write params file (replace mode) + char params_file_path[256]; + std::snprintf(params_file_path, sizeof(params_file_path), "%sparams.csv", results_path.c_str()); + FILE* fp = std::fopen(params_file_path, "w"); // Replace mode + if(fp == NULL){ + fprintf(stderr, "[StageZero PaRSEC] ERROR: Failed to create params file: %s\n", params_file_path); + exit(1); + } + + for(int k = 0; k < mArgs.mNumLocs; k++) { + for(int i = 0; i < 3 + 2*mArgs.mM + 2; i++) { + fprintf(fp, "%.14f ", params[k][i]); + } + fprintf(fp, "\n"); + } + fclose(fp); + + fprintf(stderr, "[StageZero PaRSEC] Successfully wrote %d Z files and params.csv to %s\n", N, results_path.c_str()); + } + + // Set summary information for final output + if (is_final_call) { + Results::GetInstance()->SetGeneratedLocationsNumber(mArgs.mNumLocs); + Results::GetInstance()->SetIsLogger(mArgs.mConfigs->GetLogger()); + Results::GetInstance()->SetLoggerPath(mArgs.mConfigs->GetLoggerPath()); + } + + mArgs.mIterCount++; + + } catch (const std::exception& e) { + free(localtheta); + throw; + } + free(localtheta); + return value; +} + +template +double StageZeroGeneratorParsec::StageZeroObjectiveCallback(unsigned aN, const double *aTheta, double *aGrad, void *aData) { + auto *generator = static_cast *>(aData); + std::vector theta_vec(aTheta, aTheta + aN); + std::vector grad_vec(aN); + double result = generator->MLEAlgorithm(theta_vec, grad_vec, aData); + return result; +} + +template +bool StageZeroGeneratorParsec::IsLeapYear(const int &aYear) { + if (aYear % 400 == 0) { + return true; + } else if (aYear % 100 == 0) { + return false; + } else if (aYear % 4 == 0) { + return true; + } else { + return false; + } +} + +template +double * StageZeroGeneratorParsec::ReadObsFile(char *aFileName, const int &aNumLoc) { + FILE *fp; + char *line = NULL; + size_t len = 0; + ssize_t read; + int count = 0; + double *z_vec = new double[aNumLoc]; + + fp = fopen(aFileName, "r"); + if (fp == NULL) { + throw std::runtime_error("readObsFile:cannot open observations file"); + } + + while ((read = getline(&line, &len, fp)) != -1 && count < aNumLoc) { + z_vec[count++] = atof(line); + } + + fclose(fp); + if (line) free(line); + return z_vec; +} + +template StageZeroGeneratorParsec *StageZeroGeneratorParsec::mpInstance = nullptr; diff --git a/src/data-generators/concrete/SyntheticGenerator.cpp b/src/data-generators/concrete/SyntheticGenerator.cpp index 85cfa7ec..ef5b191b 100644 --- a/src/data-generators/concrete/SyntheticGenerator.cpp +++ b/src/data-generators/concrete/SyntheticGenerator.cpp @@ -14,8 +14,14 @@ #include #include +#if !DEFAULT_RUNTIME +#include +#else #include +#endif +//TODO: we need to make WriteData a function outside the csv, So it can be used whatever the runtime is. +// currently, it has an implementation for the CSVLoader and an empty body for the parsec loader using namespace exageostat::generators::synthetic; using namespace exageostat::common; using namespace exageostat::configurations; @@ -51,6 +57,8 @@ SyntheticGenerator::CreateData(Configurations &aConfigurations, *locations); data->SetLocations(*locations); + // TODO: May need to get refactored to avoid the if/else guards +#if DEFAULT_RUNTIME // Generate Descriptors phase auto linear_algebra_solver = linearAlgebra::LinearAlgebraFactory::CreateLinearAlgebraSolver(EXACT_DENSE); linear_algebra_solver->GenerateSyntheticData(aConfigurations, data, aKernel); @@ -81,6 +89,7 @@ SyntheticGenerator::CreateData(Configurations &aConfigurations, #endif VERBOSE("Done.") } +#endif Results::GetInstance()->SetGeneratedLocationsNumber(n); Results::GetInstance()->SetIsLogger(aConfigurations.GetLogger()); Results::GetInstance()->SetLoggerPath(aConfigurations.GetLoggerPath()); diff --git a/src/data-loader/DataLoader.cpp b/src/data-loader/DataLoader.cpp index fb7ca2fd..de566c2d 100644 --- a/src/data-loader/DataLoader.cpp +++ b/src/data-loader/DataLoader.cpp @@ -12,56 +12,41 @@ * @date 2023-02-14 **/ -#include +#if !DEFAULT_RUNTIME +#include +#else +#include +#endif using namespace std; using namespace exageostat::dataLoader; -using namespace exageostat::common; -using namespace exageostat::results; template std::unique_ptr> DataLoader::CreateData(configurations::Configurations &aConfigurations, exageostat::kernels::Kernel &aKernel) { - // create vectors that will be populated with read data. - vector measurements_vector; - vector x_locations; - vector y_locations; - vector z_locations; - - aKernel.SetPValue(aConfigurations.GetTimeSlot()); - int p = aKernel.GetVariablesNumber(); - - //Read the data out of the CSV file. - this->ReadData(aConfigurations, measurements_vector, x_locations, y_locations, z_locations, p); - - //create data object - auto data = std::make_unique>(aConfigurations.GetProblemSize() / p, - aConfigurations.GetDimension()); - - //Initialize the descriptors. - auto linear_algebra_solver = linearAlgebra::LinearAlgebraFactory::CreateLinearAlgebraSolver(EXACT_DENSE); - linear_algebra_solver->InitiateDescriptors(aConfigurations, *data->GetDescriptorData(), p); - linear_algebra_solver->ExaGeoStatLaSetTile(EXAGEOSTAT_UPPER_LOWER, 0, 0, - data->GetDescriptorData()->GetDescriptor(CHAMELEON_DESCRIPTOR, - DESCRIPTOR_C).chameleon_desc); - //populate data object with read data - for (int i = 0; i < aConfigurations.GetProblemSize() / p; i++) { - data->GetLocations()->GetLocationX()[i] = x_locations[i]; - data->GetLocations()->GetLocationY()[i] = y_locations[i]; - if (aConfigurations.GetDimension() != Dimension2D) { - data->GetLocations()->GetLocationZ()[i] = z_locations[i]; - } - } - for (int i = 0; i < aConfigurations.GetProblemSize(); i++) { - ((T *) data->GetDescriptorData()->GetDescriptor(CHAMELEON_DESCRIPTOR, - DESCRIPTOR_Z).chameleon_desc->mat)[i] = measurements_vector[i]; - } - - Results::GetInstance()->SetGeneratedLocationsNumber(aConfigurations.GetProblemSize() / p); - Results::GetInstance()->SetIsLogger(aConfigurations.GetLogger()); - Results::GetInstance()->SetLoggerPath(aConfigurations.GetLoggerPath()); - + auto data = this->LoadData(aConfigurations, aKernel); return data; } + +template +std::unique_ptr> +DataLoader::CreateDataLoader(exageostat::configurations::Configurations &apConfigurations){ + +#if DEFAULT_RUNTIME + return std::unique_ptr>(csv::CSVLoader::GetInstance()); +#else + return std::unique_ptr>(parsec::ParsecLoader::GetInstance()); +#endif +} + +template +void DataLoader::ReleaseDataLoader() { + +#if DEFAULT_RUNTIME + csv::CSVLoader::GetInstance()->ReleaseInstance(); +#else + parsec::ParsecLoader::GetInstance()->ReleaseInstance(); +#endif +} \ No newline at end of file diff --git a/src/data-loader/concrete/CMakeLists.txt b/src/data-loader/concrete/CMakeLists.txt index 88180c80..7792dea3 100644 --- a/src/data-loader/concrete/CMakeLists.txt +++ b/src/data-loader/concrete/CMakeLists.txt @@ -10,8 +10,16 @@ # @date 2023-02-14 # Add source files to the parent scope -set(SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/CSVLoader.cpp - ${SOURCES} - PARENT_SCOPE - ) \ No newline at end of file +if (RUNTIME_TYPE STREQUAL "STARPU") + set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/CSVLoader.cpp + ${SOURCES} + PARENT_SCOPE + ) +elseif (RUNTIME_TYPE STREQUAL "PARSEC") + set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/ParsecLoader.cpp + ${SOURCES} + PARENT_SCOPE + ) +endif() diff --git a/src/data-loader/concrete/CSVLoader.cpp b/src/data-loader/concrete/CSVLoader.cpp index bfd153cd..2c9266d8 100644 --- a/src/data-loader/concrete/CSVLoader.cpp +++ b/src/data-loader/concrete/CSVLoader.cpp @@ -4,8 +4,8 @@ // ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). /** - * @file CSVDataGenerator.cpp - * @brief Implementation of the CSVDataGenerator class + * @file CSVLoader.cpp + * @brief Implementation of the CSVLoader class * @version 1.1.0 * @author Mahmoud ElKarargy * @author Sameh Abdulah @@ -21,6 +21,8 @@ using namespace std; using namespace exageostat::configurations; using namespace exageostat::common; using namespace exageostat::dataLoader::csv; +using namespace exageostat::results; +using namespace exageostat::dataunits; template CSVLoader *CSVLoader::GetInstance() { @@ -231,6 +233,52 @@ void CSVLoader::WriteData(const T &aMatrixPointer, const int &aProblemSize, c p_file_synthetic.close(); } +template +std::unique_ptr> +CSVLoader::LoadData(configurations::Configurations &aConfigurations, exageostat::kernels::Kernel &aKernel) { + // create vectors that will be populated with read data. + vector measurements_vector; + vector x_locations; + vector y_locations; + vector z_locations; + + aKernel.SetPValue(aConfigurations.GetTimeSlot()); + int p = aKernel.GetVariablesNumber(); + + //Read the data out of the CSV file. + this->ReadData(aConfigurations, measurements_vector, x_locations, y_locations, z_locations, p); + + //create data object + auto data = std::make_unique>(aConfigurations.GetProblemSize() / p, + aConfigurations.GetDimension()); + + //Initialize the descriptors. + auto linear_algebra_solver = linearAlgebra::LinearAlgebraFactory::CreateLinearAlgebraSolver(EXACT_DENSE); + + linear_algebra_solver->InitiateDescriptors(aConfigurations, *data->GetDescriptorData(), p); + linear_algebra_solver->ExaGeoStatLaSetTile(EXAGEOSTAT_UPPER_LOWER, 0, 0, + data->GetDescriptorData()->GetDescriptor(CHAMELEON_DESCRIPTOR, + DESCRIPTOR_C).chameleon_desc); + //populate data object with read data + for (int i = 0; i < aConfigurations.GetProblemSize() / p; i++) { + data->GetLocations()->GetLocationX()[i] = x_locations[i]; + data->GetLocations()->GetLocationY()[i] = y_locations[i]; + if (aConfigurations.GetDimension() != Dimension2D) { + data->GetLocations()->GetLocationZ()[i] = z_locations[i]; + } + } + for (int i = 0; i < aConfigurations.GetProblemSize(); i++) { + ((T *) data->GetDescriptorData()->GetDescriptor(CHAMELEON_DESCRIPTOR, + DESCRIPTOR_Z).chameleon_desc->mat)[i] = measurements_vector[i]; + } + + Results::GetInstance()->SetGeneratedLocationsNumber(aConfigurations.GetProblemSize() / p); + Results::GetInstance()->SetIsLogger(aConfigurations.GetLogger()); + Results::GetInstance()->SetLoggerPath(aConfigurations.GetLoggerPath()); + + return data; +} + template void CSVLoader::ReleaseInstance() { if (mpInstance != nullptr) { diff --git a/src/data-loader/concrete/ParsecLoader.cpp b/src/data-loader/concrete/ParsecLoader.cpp new file mode 100644 index 00000000..b5daa893 --- /dev/null +++ b/src/data-loader/concrete/ParsecLoader.cpp @@ -0,0 +1,285 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file SyntheticGenerator.cpp + * @brief Implementation of the SyntheticGenerator class + * @version 1.1.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @author Qinglei Cao + * @date 2023-02-14 +**/ + +#include +#include +extern "C"{ +#include +} + +using namespace std; + +using namespace exageostat::dataLoader::parsec; +using namespace exageostat::common; +using namespace exageostat::configurations; +using namespace exageostat::dataunits; +using namespace exageostat::transformers; + +template +ParsecLoader *ParsecLoader::GetInstance() { + + if (mpInstance == nullptr) { + mpInstance = new ParsecLoader(); + } + return mpInstance; +} + +template +std::unique_ptr> +ParsecLoader::LoadData(configurations::Configurations &aConfigurations, exageostat::kernels::Kernel &aKernel) { + + SYNC_TIME_START(); + //create data object + auto data = std::make_unique>(aConfigurations.GetProblemSize() / 1, + aConfigurations.GetDimension()); + + // Initiate Descriptors + int L = aConfigurations.GetDenseTileSize(); + int MB; + int NB; + int t = aConfigurations.GetTimeSlot(); + int P = aConfigurations.GetPGrid(); + int nodes = aConfigurations.GetCoresNumber(); + int rank = ExaGeoStatHardware::GetParsecMPIRank(); + int verbose = configurations::Configurations::GetVerbosity() == DETAILED_MODE? 1: 0; + int gpus = aConfigurations.GetGPUsNumbers(); + int tile_size = aConfigurations.GetDenseTileSize(); + string files_directory_path = aConfigurations.GetDataPath(); + int path_length = files_directory_path.length(); + char filename[path_length + 50]; + char directory_path[path_length]; + sprintf(directory_path, "%s", files_directory_path.c_str()); + + MB = L + 1; + NB = L * 2; + VERBOSE_PRINT(rank, verbose, ("Reading f_data\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_F_DATA); + parsec_matrix_block_cyclic_t *pF_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_F_DATA).parsec_desc; + ReadCSVToComplexTimeSlot((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pF_data_desc, MB, NB, nodes, t, directory_path, rank, verbose, gpus); + + MB = 2*L-1; + NB = L+1; + VERBOSE_PRINT(rank, verbose, ("Reading Et1\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ET1); + parsec_matrix_block_cyclic_t *pEt1_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ET1).parsec_desc; + sprintf(filename, "%s/%d%s", files_directory_path.c_str(), tile_size, "_Et1.csv"); + ReadCSVComplex((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pEt1_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = 2*L-1; + NB = L-1; + VERBOSE_PRINT(rank, verbose, ("Reading Et2\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ET2); + parsec_matrix_block_cyclic_t *pEt2_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ET2).parsec_desc; + sprintf(filename, "%s/%d%s", files_directory_path.c_str(), tile_size, "_Et2.csv"); + ReadCSVComplex((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pEt2_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = 2*L; + NB = 2*L-1; + VERBOSE_PRINT(rank, verbose, ("Reading Ep\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_EP); + parsec_matrix_block_cyclic_t *pEp_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_EP).parsec_desc; + sprintf(filename, "%s/%d%s", files_directory_path.c_str(), tile_size, "_Ep.csv"); + ReadCSVComplex((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pEp_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = (L*L+L)/2; + NB = L; + VERBOSE_PRINT(rank, verbose, ("Reading Slmn\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_SLMN); + parsec_matrix_block_cyclic_t *pSlum_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_SLMN).parsec_desc; + sprintf(filename, "%s/%d%s", files_directory_path.c_str(), tile_size, "_Slmn.csv"); + ReadCSVComplex((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pSlum_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = L; + NB = 2*L-1; + VERBOSE_PRINT(rank, verbose, ("Reading Ie\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_IE); + parsec_matrix_block_cyclic_t *pIe_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_IE).parsec_desc; + sprintf(filename, "%s/%d%s", files_directory_path.c_str(), tile_size, "_Ie.csv"); + ReadCSVToComplex((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pIe_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = L; + NB = 2*L-1; + VERBOSE_PRINT(rank, verbose, ("Reading Io\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_IO); + parsec_matrix_block_cyclic_t *pIo_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_IO).parsec_desc; + sprintf(filename, "%s/%d%s", files_directory_path.c_str(), tile_size, "_Io.csv"); + ReadCSVToComplex((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pIo_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = L-1; + NB = L+1; + VERBOSE_PRINT(rank, verbose, ("Reading P\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_P); + parsec_matrix_block_cyclic_t *pP_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_P).parsec_desc; + sprintf(filename, "%s/%d%s", files_directory_path.c_str(), tile_size, "_P.csv"); + ReadCSVToComplex((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pP_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = 2*L-1; + NB = 2*L-1; + VERBOSE_PRINT(rank, verbose, ("Reading D\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_D); + parsec_matrix_block_cyclic_t *pD_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_D).parsec_desc; + sprintf(filename, "%s/%d%s", files_directory_path.c_str(), tile_size, "_D.csv"); + ReadCSVToComplex((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pD_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = L; + NB = L; + VERBOSE_PRINT(rank, verbose, ("Reading flm\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLM); + parsec_matrix_block_cyclic_t *pFlm_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLM).parsec_desc; + ReadCSVTimeSlot((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pFlm_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + VERBOSE_PRINT(rank, verbose, ("Reading flmERA\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLMERA); + parsec_matrix_block_cyclic_t *pFlmera_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLMERA).parsec_desc; + sprintf(filename, "%s/%d%s", files_directory_path.c_str(), tile_size, "_flmERA.csv"); + ReadCSVTimeSlot((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pFlmera_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + // Backward + if(aConfigurations.GetEnableInverse()){ + + MB = L+1; + NB = (L*L+L)/2; + VERBOSE_PRINT(rank, verbose, ("Reading Zlm\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ZLM); + parsec_matrix_block_cyclic_t *PZlm_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ZLM).parsec_desc; + sprintf(filename, "%s/%d%s", files_directory_path.c_str(), tile_size, "_Zlm.csv"); + ReadCSV((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), PZlm_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = 2*L-1; + NB = 2*L; + VERBOSE_PRINT(rank, verbose, ("Reading SC\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_SC); + parsec_matrix_block_cyclic_t *pSc_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_SC).parsec_desc; + sprintf(filename, "%s/%d%s", files_directory_path.c_str(), tile_size, "_SC.csv"); + ReadCSV((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pSc_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + + MB = L+1; + NB = 2*L; + VERBOSE_PRINT(rank, verbose, ("f_spatial\n")); + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_F_SPATIAL); + parsec_matrix_block_cyclic_t *pF_spatial_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_F_SPATIAL).parsec_desc; + ReadCSV((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pF_spatial_data_desc, MB, NB, nodes, t, filename, rank, verbose, gpus); + } + + // Init and allocate memory for desc_flmT + MB = L * L; + NB = t; + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLMT); + parsec_matrix_block_cyclic_t *pFlmt_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLMT).parsec_desc; + parsec_matrix_block_cyclic_init(pFlmt_data_desc, PARSEC_MATRIX_DOUBLE, PARSEC_MATRIX_TILE, rank, L*((MB/nodes%L) ? MB/nodes/L+1 : MB/nodes/L), + NB, MB, NB, 0, 0, MB, NB, nodes, 1, 1, 1, 0, 0); + + pFlmt_data_desc->mat = parsec_data_allocate((size_t)pFlmt_data_desc->super.nb_local_tiles * + (size_t)pFlmt_data_desc->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(pFlmt_data_desc->super.mtype)); + parsec_data_collection_set_key((parsec_data_collection_t*)&desc_flmT, "desc_flmT"); + + // Init and allocate memory for pA_data_desc + MB = L * L; + NB = t; + data->GetDescriptorData()->SetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_A); + parsec_matrix_block_cyclic_t *pA_data_desc = data->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_A).parsec_desc; + parsec_matrix_block_cyclic_init(pA_data_desc, PARSEC_MATRIX_DOUBLE, PARSEC_MATRIX_TILE, rank, L, L, MB, NB, 0, 0, + pFlmt_data_desc->super.mb, pFlmt_data_desc->super.nb, P, nodes/P, 1, 1, 0, 0); + pA_data_desc->mat = parsec_data_allocate((size_t)pA_data_desc->super.nb_local_tiles * + (size_t)pA_data_desc->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(pA_data_desc->super.mtype)); + parsec_data_collection_set_key((parsec_data_collection_t*)&pA_data_desc, "desc_A"); + + if(aConfigurations.GetEnableInverse()){ + int ts_test_M = 2000; + int ts_test_N = 1; + // Allocate memory + double *pFileContent = (double *)malloc(ts_test_M * ts_test_N * sizeof(double)); + sprintf(filename, "%s/%s", files_directory_path.c_str(), "ts_test.csv"); + ReadCSVFileHelper(filename, pFileContent, ts_test_M, ts_test_N); + } + + SYNC_TIME_PRINT(ExaGeoStatHardware::GetParsecMPIRank(), ("Load Data\n")); + // Forward SHT + DataTransformer::ForwardSphericalHarmonicsTransform(L, data); + // Forward SHT Reshape + DataTransformer::ForwardReshape(aConfigurations, data); + // Generate matrix + CompressMatrixHelper(aConfigurations, data); + + return data; +} + +template +int ParsecLoader::ReadCSVFileHelper(const char* apFilename, double *apFileContent, int aM, int aN) { + + FILE *pFile = fopen(apFilename, "r"); + if (!pFile) { + printf("File opening failed: %s", apFilename); + return -1; + } + + int status = 0; + for (int i = 0; i < aM; i++) { + for (int j = 0; j < aN; j++) { + status = fscanf(pFile, "%lf,", &apFileContent[i * aN + j]); + if (status != 1) { + fprintf(stderr, "Error reading file at row %d, column %d\n", i, j); + fclose(pFile); + return 1; + } + } + } + fclose(pFile); + + return 0; +} + +template +void ParsecLoader::CompressMatrixHelper(configurations::Configurations &aConfigurations, std::unique_ptr> &aData) { + + int max_rank = aConfigurations.GetMaxRank(); + int n = aConfigurations.GetProblemSize(); + int adaptive_decision = aConfigurations.GetAdaptiveDecision(); + int tol = aConfigurations.GetTolerance(); + int send_full_tile = 0; + int auto_band = 0; + int gpus = aConfigurations.GetGPUsNumbers(); + double upper_lower = EXAGEOSTAT_LOWER; + int L = aConfigurations.GetDenseTileSize(); + int N = aConfigurations.GetProblemSize(); + int NT = (N % L == 0) ? (N/L) : (N/L + 1); + + SYNC_TIME_START(); + MatrixCompress((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), &ExaGeoStatHardware::GetHicmaParams()->norm_global, upper_lower, L, NT, max_rank, n, + adaptive_decision, tol, send_full_tile, auto_band, gpus, ExaGeoStatHardware::GetHicmaData(), ExaGeoStatHardware::GetParamsKernel()); + + SYNC_TIME_PRINT(ExaGeoStatHardware::GetParsecMPIRank(), ("Matrix genneration Matrix norm: norm_global= %le\n", ExaGeoStatHardware::GetHicmaParams()->norm_global)); +} + +template +void ParsecLoader::ReadData(Configurations &aConfigurations, vector &aMeasurementsMatrix, vector &aXLocations, + vector &aYLocations, vector &aZLocations, const int &aP) { +} + +template +void ParsecLoader::WriteData(const T &aMatrixPointer, const int &aProblemSize, const int &aP, std::string &aLoggerPath, Locations &aLocations) { + +} + +template +void ParsecLoader::ReleaseInstance() { + if (mpInstance != nullptr) { + mpInstance = nullptr; + } +} + +template ParsecLoader *ParsecLoader::mpInstance = nullptr; diff --git a/src/data-transformer/CMakeLists.txt b/src/data-transformer/CMakeLists.txt new file mode 100644 index 00000000..273aeaf9 --- /dev/null +++ b/src/data-transformer/CMakeLists.txt @@ -0,0 +1,17 @@ + +# Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file CMakeLists.txt +# @brief CMake configuration file for data-transformer directory +# @version 2.0.0 +# @author Mahmoud ElKarargy +# @date 2024-10-15 + +# Add source files to the parent scope +set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/DataTransformer.cpp + ${SOURCES} + PARENT_SCOPE +) diff --git a/src/data-transformer/DataTransformer.cpp b/src/data-transformer/DataTransformer.cpp new file mode 100644 index 00000000..bbca781b --- /dev/null +++ b/src/data-transformer/DataTransformer.cpp @@ -0,0 +1,113 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file DataTransformer.cpp + * @brief Contains the implementation of the DataTransformer class. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @author Qinglei Cao + * @date 2024-02-04 +**/ + +#include +extern "C"{ + #include +} + +using namespace exageostat::common; +using namespace exageostat::configurations; +using namespace exageostat::transformers; + +template void DataTransformer::ForwardSphericalHarmonicsTransform(const int &aLSize, std::unique_ptr> &aData){ + + SYNC_TIME_START(); + parsec_tiled_matrix_t *pFDataDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_F_DATA).parsec_desc; + parsec_tiled_matrix_t *pFLMDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLM).parsec_desc; + parsec_tiled_matrix_t *pFLMTDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLMT).parsec_desc; + parsec_tiled_matrix_t *pET1Desc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ET1).parsec_desc; + parsec_tiled_matrix_t *pET2Desc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ET2).parsec_desc; + parsec_tiled_matrix_t *pEPDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_EP).parsec_desc; + parsec_tiled_matrix_t *pSLMNDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_SLMN).parsec_desc; + parsec_tiled_matrix_t *pIEDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_IE).parsec_desc; + parsec_tiled_matrix_t *pIODesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_IO).parsec_desc; + parsec_tiled_matrix_t *pPDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_P).parsec_desc; + parsec_tiled_matrix_t *pDDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_D).parsec_desc; + + int f_desc_M = pFDataDesc->mb; + int ep_desc_N = pEPDesc->nb; + int et1_desc_M = pET1Desc->mb; + int et2_desc_M = pET2Desc->mb; + int p_desc_N = pPDesc->nb; + int flm_desc_M = pFLMDesc->mb; + int flm_desc_N = pFLMDesc->nb; + + ForwardSHT((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pFDataDesc, pFLMDesc, + pFLMTDesc, pET1Desc, pET2Desc, pEPDesc, pSLMNDesc, pIEDesc, pIODesc, pPDesc, + pDDesc, f_desc_M, ep_desc_N, et1_desc_M, et2_desc_M, p_desc_N, flm_desc_M, flm_desc_N, aLSize); + + int flops_forward = 2.0*(aLSize+1)*(2*aLSize-1)*(2*aLSize) // Gmtheta_r = f_data*Ep + + 2.0*(2*aLSize-1)*(2*aLSize-1)*(aLSize+1) // Fmnm = Et1*Gmtheta_r + + 2.0*(2*aLSize-1)*(aLSize-1)*(aLSize+1) // tmp1 = Et2*P + + 2.0*(2*aLSize-1)*(2*aLSize-1)*(aLSize+1) // tmp2 = tmp1 * Gmtheta_r + + 2.0*(2*aLSize-1)*(2*aLSize-1)*(2*aLSize-1) // Fmnm += tmp2 * D + + 2.0*aLSize*aLSize/2*(aLSize*(2*aLSize-1)+aLSize); // flmn_matrix(ell+1,m+1) = Slmn(climate_emulator_getSingleIndex(ell, m),:)*Ie*Fmnm(:,L+m) + + SYNC_TIME_PRINT(ExaGeoStatHardware::GetParsecMPIRank(), ("Forward SHT: %.2lf Gflop/s\n", flops_forward/sync_time_elapsed/1.0e9)); + +} + +template void DataTransformer::ForwardReshape(Configurations &aConfigurations, std::unique_ptr> &aData){ + + SYNC_TIME_START(); + int rank = ExaGeoStatHardware::GetParsecMPIRank(); + int verbose = configurations::Configurations::GetVerbosity() == DETAILED_MODE? 1: 0; + int L = aConfigurations.GetDenseTileSize(); + int t = aConfigurations.GetTimeSlot(); + + parsec_tiled_matrix_t *pFDataDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_F_DATA).parsec_desc; + parsec_tiled_matrix_t *pFLMDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLM).parsec_desc; + parsec_tiled_matrix_t *pFLMTDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLMT).parsec_desc; + parsec_tiled_matrix_t *pET1Desc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ET1).parsec_desc; + parsec_tiled_matrix_t *pET2Desc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ET2).parsec_desc; + parsec_tiled_matrix_t *pEPDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_EP).parsec_desc; + parsec_tiled_matrix_t *pSLMNDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_SLMN).parsec_desc; + parsec_tiled_matrix_t *pIEDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_IE).parsec_desc; + parsec_tiled_matrix_t *pIODesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_IO).parsec_desc; + parsec_tiled_matrix_t *pPDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_P).parsec_desc; + parsec_tiled_matrix_t *pDDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_D).parsec_desc; + parsec_tiled_matrix_t *pADesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_A).parsec_desc; + + int f_desc_M = pFDataDesc->mb; + int ep_desc_N = pEPDesc->nb; + int et1_desc_M = pET1Desc->mb; + int et2_desc_M = pET2Desc->mb; + int p_desc_N = pPDesc->nb; + int flmt_desc_M = pFLMTDesc->mb; + int nodes = aConfigurations.GetCoresNumber(); + int flmt_desc_nb = (flmt_desc_M / nodes % L) ? flmt_desc_M / nodes / L + 1 : flmt_desc_M / nodes / L; + + int N = aConfigurations.GetProblemSize(); + int NT = (N % L == 0) ? (N/L) : (N/L + 1); + double upper_lower = EXAGEOSTAT_LOWER; + ForwardSHTReshape((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), rank, verbose, pFDataDesc, pFLMDesc, + pFLMTDesc, pET1Desc, pET2Desc, pEPDesc, pSLMNDesc, pIEDesc, pIODesc, pPDesc, + pDDesc, pADesc, f_desc_M, ep_desc_N, et1_desc_M, et2_desc_M, p_desc_N, flmt_desc_nb, t, L, &ExaGeoStatHardware::GetHicmaParams()->norm_global, NT, upper_lower); + SYNC_TIME_PRINT(ExaGeoStatHardware::GetParsecMPIRank(), ("Forward SHT Reshape\n")); + +} + +template void DataTransformer::InverseSphericalHarmonicsTransform(const int &aLSize, std::unique_ptr> &aData){ + + SYNC_TIME_START(); + parsec_tiled_matrix_t *pFSpatialDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_F_SPATIAL).parsec_desc; + parsec_tiled_matrix_t *pFLMDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_FLM).parsec_desc; + parsec_tiled_matrix_t *pZLMDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_ZLM).parsec_desc; + parsec_tiled_matrix_t *pSCDesc = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(PARSEC_DESCRIPTOR, DESCRIPTOR_SC).parsec_desc; + + InverseSHT((parsec_context_t *) ExaGeoStatHardware::GetParsecContext(), pFSpatialDesc, pFLMDesc, pZLMDesc, pSCDesc, aLSize); + SYNC_TIME_PRINT(ExaGeoStatHardware::GetParsecMPIRank(), ("Inverse SHT\n")); +} diff --git a/src/data-units/CMakeLists.txt b/src/data-units/CMakeLists.txt index 85e48392..d7de6d14 100644 --- a/src/data-units/CMakeLists.txt +++ b/src/data-units/CMakeLists.txt @@ -12,10 +12,12 @@ # Include the concrete implementations of the ExaGeoStat Descriptor class add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/descriptor) +# Initialize SOURCES variable if not already initialized set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/Locations.cpp ${CMAKE_CURRENT_SOURCE_DIR}/DescriptorData.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ExaGeoStatData.cpp ${SOURCES} PARENT_SCOPE - ) \ No newline at end of file + ) + diff --git a/src/data-units/DescriptorData.cpp b/src/data-units/DescriptorData.cpp index eff728d3..70f79b0b 100644 --- a/src/data-units/DescriptorData.cpp +++ b/src/data-units/DescriptorData.cpp @@ -21,13 +21,14 @@ using namespace exageostat::dataunits::descriptor; template DescriptorData::~DescriptorData() { - ExaGeoStatDescriptor exaGeoStatDescriptor; + ExaGeoStatDescriptor exageostat_descriptor; // Destroy descriptors. const std::string &chameleon = "_CHAMELEON"; for (const auto &pair: this->mDictionary) { const std::string &key = pair.first; +#if DEFAULT_RUNTIME if (key.find("CHAMELEON") != std::string::npos && pair.second != nullptr) { - exaGeoStatDescriptor.DestroyDescriptor(CHAMELEON_DESCRIPTOR, pair.second); + exageostat_descriptor.DestroyDescriptor(CHAMELEON_DESCRIPTOR, pair.second); #ifdef USE_HICMA // Since there are converted descriptors from Chameleon to Hicma, which have the same memory address. // So, by deleting the owner which is Chameleon, no need to delete hicma. Therefore, we remove the row of that descriptor. @@ -40,13 +41,24 @@ DescriptorData::~DescriptorData() { } #endif } else if (key.find("HICMA") != std::string::npos && pair.second != nullptr) { - exaGeoStatDescriptor.DestroyDescriptor(HICMA_DESCRIPTOR, pair.second); + exageostat_descriptor.DestroyDescriptor(HICMA_DESCRIPTOR, pair.second); } +#else + if (key.find("PARSEC") != std::string::npos && pair.second != nullptr) { + if(key == "DESCRIPTOR_FLMT_PARSEC") { + continue; + } + exageostat_descriptor.DestroyDescriptor(PARSEC_DESCRIPTOR, pair.second); + } +#endif + } this->mDictionary.clear(); +#if DEFAULT_RUNTIME if (this->mpSequence) { CHAMELEON_Sequence_Destroy((RUNTIME_sequence_t *) this->mpSequence); } +#endif } template @@ -91,6 +103,8 @@ BaseDescriptor DescriptorData::GetDescriptor(const DescriptorType &aDescriptorType, const DescriptorName &aDescriptorName) { BaseDescriptor descriptor{}; +#if DEFAULT_RUNTIME + if (aDescriptorType == CHAMELEON_DESCRIPTOR) { if (this->mDictionary.find(GetDescriptorName(aDescriptorName) + "_CHAMELEON") == this->mDictionary.end()) { descriptor.chameleon_desc = nullptr; @@ -119,6 +133,15 @@ DescriptorData::GetDescriptor(const DescriptorType &aDescriptorType, const De throw std::runtime_error("To use HiCMA descriptor you need to enable USE_HICMA!"); #endif } +#else + if (aDescriptorType == PARSEC_DESCRIPTOR) { + if (this->mDictionary.find(GetDescriptorName(aDescriptorName) + "_PARSEC") == this->mDictionary.end()) { + descriptor.parsec_desc = nullptr; + } + descriptor.parsec_desc = (parsec_matrix_block_cyclic_t *) this->mDictionary[GetDescriptorName(aDescriptorName) + + "_PARSEC"]; + } +#endif return descriptor; } @@ -131,16 +154,18 @@ void DescriptorData::SetDescriptor(const DescriptorType &aDescriptorType, con void *descriptor; std::string type; - ExaGeoStatDescriptor exaGeoStatDescriptor; + ExaGeoStatDescriptor exageostat_descriptor; +#if DEFAULT_RUNTIME + if (aDescriptorType == CHAMELEON_DESCRIPTOR) { - descriptor = exaGeoStatDescriptor.CreateDescriptor((CHAM_desc_t *) descriptor, aDescriptorType, aIsOOC, + descriptor = exageostat_descriptor.CreateDescriptor((CHAM_desc_t *) descriptor, aDescriptorType, aIsOOC, apMatrix, aFloatPoint, aMB, aNB, aSize, aLM, aLN, aI, aJ, aM, aN, aP, aQ, aValidOOC); type = "_CHAMELEON"; } else { #ifdef USE_HICMA - descriptor = exaGeoStatDescriptor.CreateDescriptor((HICMA_desc_t *) descriptor, aDescriptorType, aIsOOC, + descriptor = exageostat_descriptor.CreateDescriptor((HICMA_desc_t *) descriptor, aDescriptorType, aIsOOC, apMatrix, aFloatPoint, aMB, aNB, aSize, aLM, aLN, aI, aJ, aM, aN, aP, aQ, aValidOOC); type = "_HICMA"; @@ -148,17 +173,27 @@ void DescriptorData::SetDescriptor(const DescriptorType &aDescriptorType, con throw std::runtime_error("To create HiCMA descriptor you need to enable USE_HICMA!"); #endif } - if (aConverted) { type = "_CHAM_HIC"; } +#else + if (aDescriptorType == PARSEC_DESCRIPTOR) { + descriptor = exageostat_descriptor.CreateDescriptor((parsec_matrix_block_cyclic_t *) descriptor, aDescriptorType, aIsOOC, + apMatrix, aFloatPoint, aMB, aNB, aSize, aLM, aLN, aI, aJ, aM, + aN, aP, aQ, aValidOOC); + type = "_PARSEC"; + } + else { + throw std::runtime_error("While using PaRSEC as a runtime, only PaRSEC descriptors are enabled!"); + } +#endif this->mDictionary[GetDescriptorName(aDescriptorName) + type] = descriptor; - } template T * DescriptorData::GetDescriptorMatrix(const DescriptorType &aDescriptorType, const DescriptorName &aDescriptorName) { +#if DEFAULT_RUNTIME if (aDescriptorType == CHAMELEON_DESCRIPTOR) { return (T *) (this->GetDescriptor(CHAMELEON_DESCRIPTOR, aDescriptorName).chameleon_desc)->mat; } else { @@ -168,6 +203,7 @@ DescriptorData::GetDescriptorMatrix(const DescriptorType &aDescriptorType, co throw std::runtime_error("To use Hicma descriptor you need to enable USE_HICMA!"); #endif } +#endif } // Define a function that returns the name of a DescriptorName value as a string @@ -284,6 +320,36 @@ std::string DescriptorData::GetDescriptorName(const DescriptorName &aDescript return "DESCRIPTOR_R"; case DESCRIPTOR_R_COPY : return "DESCRIPTOR_R_COPY"; + case DESCRIPTOR_F_DATA: + return "DESCRIPTOR_F_DATA"; + case DESCRIPTOR_ET1: + return "DESCRIPTOR_ET1"; + case DESCRIPTOR_ET2: + return "DESCRIPTOR_ET2"; + case DESCRIPTOR_EP: + return "DESCRIPTOR_EP"; + case DESCRIPTOR_SLMN: + return "DESCRIPTOR_SLMN"; + case DESCRIPTOR_IE: + return "DESCRIPTOR_IE"; + case DESCRIPTOR_IO: + return "DESCRIPTOR_IO"; + case DESCRIPTOR_P: + return "DESCRIPTOR_P"; + case DESCRIPTOR_D: + return "DESCRIPTOR_D"; + case DESCRIPTOR_FLMERA: + return "DESCRIPTOR_FLMERA"; + case DESCRIPTOR_ZLM: + return "DESCRIPTOR_ZLM"; + case DESCRIPTOR_SC: + return "DESCRIPTOR_SC"; + case DESCRIPTOR_F_SPATIAL: + return "DESCRIPTOR_F_SPATIAL"; + case DESCRIPTOR_FLM: + return "DESCRIPTOR_FLM"; + case DESCRIPTOR_FLMT: + return "DESCRIPTOR_FLMT"; default: throw std::invalid_argument( "The name of descriptor you provided is undefined, Please read the user manual to know the available descriptors"); diff --git a/src/data-units/Locations.cpp b/src/data-units/Locations.cpp index abe0572c..701730e9 100644 --- a/src/data-units/Locations.cpp +++ b/src/data-units/Locations.cpp @@ -22,7 +22,7 @@ using namespace exageostat::common; template void Locations::SetLocationX(T &aLocationX, const int &aSize) { - if (aLocationX && aSize == this->mSize) { + if (&aLocationX != nullptr && aSize == this->mSize) { memcpy(this->mpLocationX, &aLocationX, this->mSize * sizeof(T)); } else { throw std::runtime_error("Invalid value for setting Locations X"); @@ -41,7 +41,7 @@ T *Locations::GetLocationX() { template void Locations::SetLocationY(T &aLocationY, const int &aSize) { - if (aLocationY && aSize == this->mSize) { + if (&aLocationY != nullptr && aSize == this->mSize) { memcpy(this->mpLocationY, &aLocationY, this->mSize * sizeof(T)); } else { throw std::runtime_error("Invalid value for setting Locations Y"); @@ -59,7 +59,7 @@ T *Locations::GetLocationY() { template void Locations::SetLocationZ(T &aLocationZ, const int &aSize) { - if (aLocationZ && aSize == this->mSize) { + if (&aLocationZ != nullptr && aSize == this->mSize) { memcpy(this->mpLocationZ, &aLocationZ, this->mSize * sizeof(T)); } else { throw std::runtime_error("Invalid value for setting Locations Z"); diff --git a/src/data-units/descriptor/ExaGeoStatDescriptor.cpp b/src/data-units/descriptor/ExaGeoStatDescriptor.cpp index 5d6cd730..829684b3 100644 --- a/src/data-units/descriptor/ExaGeoStatDescriptor.cpp +++ b/src/data-units/descriptor/ExaGeoStatDescriptor.cpp @@ -15,12 +15,16 @@ #include #include + +#if DEFAULT_RUNTIME + #include #ifdef USE_HICMA - #include - +#endif +#else +#include #endif using namespace exageostat::common; @@ -28,34 +32,49 @@ using namespace exageostat::dataunits::descriptor; template void * -ExaGeoStatDescriptor::CreateDescriptor(void *apDescriptor, const common::DescriptorType &aDescriptorType, - const bool &aIsOOC, void *apMatrix, const common::FloatPoint &aFloatPoint, +ExaGeoStatDescriptor::CreateDescriptor(void *apDescriptor, const DescriptorType &aDescriptorType, + const bool &aIsOOC, void *apMatrix, const FloatPoint &aFloatPoint, const int &aMB, const int &aNB, const int &aSize, const int &aLM, const int &aLN, const int &aI, const int &aJ, const int &aM, const int &aN, const int &aP, const int &aQ, const bool &aValidOOC) { +#if DEFAULT_RUNTIME if (aDescriptorType == CHAMELEON_DESCRIPTOR) { return ChameleonDescriptor::CreateChameleonDescriptor(apDescriptor, aIsOOC, apMatrix, aFloatPoint, aMB, aNB, aSize, aLM, aLN, aI, aJ, aM, aN, aP, aQ, aValidOOC); - } else if (aDescriptorType == HICMA_DESCRIPTOR) { + } #ifdef USE_HICMA + if (aDescriptorType == HICMA_DESCRIPTOR) { return HicmaDescriptor::CreateHicmaDescriptor(apDescriptor, aIsOOC, apMatrix, aFloatPoint, aMB, aNB, aSize, aLM, aLN, aI, aJ, aM, aN, aP, aQ, aValidOOC); + } #endif +#else + if (aDescriptorType == PARSEC_DESCRIPTOR) { + return ParsecDescriptor::CreateParsecDescriptor(apDescriptor); } +#endif std::cerr << "Error, please select the correct descriptor type!" << std::endl; return nullptr; } template int ExaGeoStatDescriptor::DestroyDescriptor(const DescriptorType &aDescriptorType, void *apDesc) { + +#if DEFAULT_RUNTIME if (aDescriptorType == CHAMELEON_DESCRIPTOR) { return ChameleonDescriptor::DestroyChameleonDescriptor(apDesc); - } else if (aDescriptorType == HICMA_DESCRIPTOR) { + } #ifdef USE_HICMA + if (aDescriptorType == HICMA_DESCRIPTOR) { return HicmaDescriptor::DestroyHicmaDescriptor(apDesc); + } #endif +#else + if (aDescriptorType == PARSEC_DESCRIPTOR) { + return ParsecDescriptor::DestroyParsecDescriptor(apDesc); } +#endif std::cerr << "Error, please select the correct descriptor type!" << std::endl; return -1; } \ No newline at end of file diff --git a/src/data-units/descriptor/concrete/CMakeLists.txt b/src/data-units/descriptor/concrete/CMakeLists.txt index 6bb197d4..961cd150 100644 --- a/src/data-units/descriptor/concrete/CMakeLists.txt +++ b/src/data-units/descriptor/concrete/CMakeLists.txt @@ -12,14 +12,20 @@ # @date 2023-03-20 # Include the concrete implementations of the Descriptor class based on the enabled libraries (HiCMA or Chameleon) -set(SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/ChameleonDescriptor.cpp - ${SOURCES} - ) - -if (USE_HICMA) - list(APPEND SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/HicmaDescriptor.cpp +if (RUNTIME_TYPE STREQUAL "STARPU") + set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/ChameleonDescriptor.cpp + ${SOURCES} + ) + if (USE_HICMA) + list(APPEND SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/HicmaDescriptor.cpp + ) + endif () +else () + set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/ParsecDescriptor.cpp + ${SOURCES} ) endif () diff --git a/src/data-units/descriptor/concrete/ParsecDescriptor.cpp b/src/data-units/descriptor/concrete/ParsecDescriptor.cpp new file mode 100644 index 00000000..63ac2283 --- /dev/null +++ b/src/data-units/descriptor/concrete/ParsecDescriptor.cpp @@ -0,0 +1,34 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file ParsecDescriptor.cpp + * @brief Defines the ParsecDescriptor class for creating matrix descriptors using the PaRSEC library. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @author Qinglei Cao + * @date 2024-10-18 +**/ + +#include + +using namespace exageostat::dataunits::descriptor; + +template +parsec_matrix_block_cyclic_t * +ParsecDescriptor::CreateParsecDescriptor(void *apDescriptor) { + + parsec_matrix_block_cyclic_t *parsec_desc = new parsec_matrix_block_cyclic_t(); + return parsec_desc; +} + +template +int ParsecDescriptor::DestroyParsecDescriptor(void *apDesc) { + auto Parsec_desc = (parsec_matrix_block_cyclic_t *) apDesc; + parsec_data_free(Parsec_desc->mat); + parsec_tiled_matrix_destroy((parsec_tiled_matrix_t *) Parsec_desc); + return 0; +} \ No newline at end of file diff --git a/src/hardware/ExaGeoStatHardware.cpp b/src/hardware/ExaGeoStatHardware.cpp index 3e9f3c18..27f5c6a7 100644 --- a/src/hardware/ExaGeoStatHardware.cpp +++ b/src/hardware/ExaGeoStatHardware.cpp @@ -12,13 +12,19 @@ * @date 2024-02-04 **/ +#include + +#if DEFAULT_RUNTIME #ifdef USE_MPI #include #endif #include #include -#include +#else +#include +#endif + #include #include #include @@ -26,6 +32,67 @@ using namespace exageostat::common; using namespace exageostat::results; +using namespace std; + +ExaGeoStatHardware::ExaGeoStatHardware(exageostat::configurations::Configurations &aConfigurations){ + + // These variables are named according to HiCMA-X inputs + const int N = aConfigurations.GetProblemSize(); + const int t = aConfigurations.GetDenseTileSize(); + const int e = aConfigurations.GetAccuracy(); + const int a = aConfigurations.GetAdaptiveDecision(); + const int g = aConfigurations.GetGPUsNumbers(); + const int c = aConfigurations.GetCoresNumber(); + const int j = aConfigurations.GetDiagonalAddition(); + const int J = aConfigurations.GetTimeSlot(); + const int K = aConfigurations.GetObjectsNumber(); + const int I = aConfigurations.GetDenseBandDP(); + const int time_slot_per_file = aConfigurations.GetTimeSlotPerFile(); + const int num_file = aConfigurations.GetFileNumber(); + + int v = 0; + if (aConfigurations.GetVerbosity() == Verbose::DETAILED_MODE){ + v = 1; + } + + // Create a vector to store the arguments as strings + std::vector new_args = { + "-g", to_string(g), + "-NB", to_string(t), + "-K", to_string(t), + "-N", to_string(N), + "-v", to_string(v), + "-I", to_string(I), + "-a", to_string(a), + "-J", to_string(J), + "-c", to_string(c), + "-K", to_string(K), + "-j", to_string(j) + }; + + // Convert std::vector to char** for the new argv + int new_argc = new_args.size(); + char **new_argv = new char*[new_argc]; + + for (int i = 0; i < new_argc; ++i) { + new_argv[i] = new char[new_args[i].length() + 1]; + strcpy(new_argv[i], new_args[i].c_str()); + } + +#if !DEFAULT_RUNTIME + int iparam[IPARAM_SIZEOF] = {0}; + double dparam[DPARAM_SIZEOF]; + char *cparam[CPARAM_SIZEOF]; + this->mpHicmaParams = make_unique(); + this->mpParamsKernel = make_unique(); + this->mpHicmaData = make_unique(); + this->mpAnalysis = make_unique(); + + mpParsecContext = hicma_parsec_init(new_argc, new_argv, iparam, dparam, cparam, this->mpHicmaParams.get(), this->mpParamsKernel.get(), this->mpHicmaData.get()); + SetParsecMPIRank(this->mpHicmaParams->rank); +#endif + exageostat::helpers::CommunicatorMPI::GetInstance()->SetHardwareInitialization(); +} ExaGeoStatHardware::ExaGeoStatHardware(const Computation &aComputation, const int &aCoreNumber, const int &aGpuNumber, const int &aP, const int &aQ) { @@ -45,6 +112,7 @@ void ExaGeoStatHardware::InitHardware(const Computation &aComputation, const int SetQGrid(aQ); int tag_width = 31, tag_sep = 40; +#if DEFAULT_RUNTIME // Init hardware using Chameleon if (!mpChameleonContext) { #ifdef USE_MPI @@ -80,30 +148,42 @@ void ExaGeoStatHardware::InitHardware(const Computation &aComputation, const int throw std::runtime_error("You need to enable HiCMA to use TLR computation!"); #endif } +#endif exageostat::helpers::CommunicatorMPI::GetInstance()->SetHardwareInitialization(); LOGGER("** Initialize ExaGeoStat hardware **") } void ExaGeoStatHardware::FinalizeHardware() { +#if DEFAULT_RUNTIME // finalize hardware using HiCMA -#ifdef USE_HICMA + #ifdef USE_HICMA if (mpHicmaContext) { HICMA_Finalize(); mpHicmaContext = nullptr; } -#endif + #endif // finalize hardware using Chameleon if (mpChameleonContext) { -#if defined(USE_MPI) && defined(USE_HICMA) + #if defined(USE_MPI) && defined(USE_HICMA) // Since already HiCMA do so, then no need to remove empty cache. starpu_mpi_cache_set(0); -#endif + #endif CHAMELEON_Finalize() mpChameleonContext = nullptr; } +#else + if (mpParsecContext) { + + int iparam[IPARAM_SIZEOF] = {0}; + double dparam[DPARAM_SIZEOF]; + char *cparam[CPARAM_SIZEOF]; + hicma_parsec_fini((parsec_context_t *) mpParsecContext, 0, NULL, iparam, dparam, cparam, this->mpHicmaParams.get(), this->mpParamsKernel.get(), this->mpHicmaData.get(), this->mpAnalysis.get()); + mpParsecContext = nullptr; + } +#endif exageostat::helpers::CommunicatorMPI::GetInstance()->RemoveHardwareInitialization(); } @@ -127,6 +207,13 @@ void *ExaGeoStatHardware::GetChameleonContext() { return mpChameleonContext; } +void *ExaGeoStatHardware::GetParsecContext() { + if (!mpParsecContext) { + throw std::runtime_error("PaRSEC Hardware is not initialized!"); + } + return mpParsecContext; +} + void *ExaGeoStatHardware::GetContext(Computation aComputation) { if (aComputation == EXACT_DENSE || aComputation == DIAGONAL_APPROX) { return GetChameleonContext(); @@ -137,6 +224,14 @@ void *ExaGeoStatHardware::GetContext(Computation aComputation) { return nullptr; } +void ExaGeoStatHardware::SetParsecMPIRank(int aRank){ + mParsecMPIRank = aRank; +} + +int ExaGeoStatHardware::GetParsecMPIRank() { + return mParsecMPIRank; +} + int ExaGeoStatHardware::GetPGrid() { return mPGrid; } @@ -153,8 +248,34 @@ void ExaGeoStatHardware::SetQGrid(int aQ) { mQGrid = aQ; } +#if !DEFAULT_RUNTIME +hicma_parsec_params_t* ExaGeoStatHardware::GetHicmaParams() { + return mpHicmaParams.get(); +} + +starsh_params_t* ExaGeoStatHardware::GetParamsKernel() { + return mpParamsKernel.get(); +} + +hicma_parsec_data_t* ExaGeoStatHardware::GetHicmaData() { + return mpHicmaData.get(); +} + +hicma_parsec_matrix_analysis_t* ExaGeoStatHardware::GetAnalysis() { + return mpAnalysis.get(); +} +#endif + void *ExaGeoStatHardware::mpChameleonContext = nullptr; void *ExaGeoStatHardware::mpHicmaContext = nullptr; +void *ExaGeoStatHardware::mpParsecContext = nullptr; +int ExaGeoStatHardware::mParsecMPIRank = 0; int ExaGeoStatHardware::mPGrid = 1; int ExaGeoStatHardware::mQGrid = 1; bool ExaGeoStatHardware::mIsMPIInit = false; +#if !DEFAULT_RUNTIME +unique_ptr ExaGeoStatHardware::mpHicmaParams = nullptr; +unique_ptr ExaGeoStatHardware::mpParamsKernel = nullptr; +unique_ptr ExaGeoStatHardware::mpHicmaData = nullptr; +unique_ptr ExaGeoStatHardware::mpAnalysis = nullptr; +#endif diff --git a/src/helpers/CommunicatorMPI.cpp b/src/helpers/CommunicatorMPI.cpp index 0abc6d1c..b2d6b891 100644 --- a/src/helpers/CommunicatorMPI.cpp +++ b/src/helpers/CommunicatorMPI.cpp @@ -10,9 +10,12 @@ * @author Sameh Abdulah * @date 2023-11-10 **/ - +#include #include +#if DEFAULT_RUNTIME #include +#endif +#include using namespace exageostat::helpers; @@ -27,12 +30,18 @@ int CommunicatorMPI::GetRank() const { #ifdef USE_MPI if (!mIsHardwareInitialized) { return 0; - } else { + } + #if DEFAULT_RUNTIME + else { return CHAMELEON_Comm_rank(); } -#else - return 0; + #else + else { + return ExaGeoStatHardware::GetParsecMPIRank(); + } + #endif #endif + return 0; } void CommunicatorMPI::SetHardwareInitialization() { diff --git a/src/kernels/concrete/TrendModel.cpp b/src/kernels/concrete/TrendModel.cpp new file mode 100644 index 00000000..20b599a1 --- /dev/null +++ b/src/kernels/concrete/TrendModel.cpp @@ -0,0 +1,90 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file TrendModel.cpp + * @brief Implementation of the TrendModel kernel. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @date 2024-11-11 +**/ + +#include + + +using namespace exageostat::kernels; +using namespace exageostat::dataunits; +using namespace exageostat::helpers; + +template +TrendModel::TrendModel() { + this->mP = 1; + this->mParametersNumber = 1; +} + +template +Kernel *TrendModel::Create() { + KernelsConfigurations::GetParametersNumberKernelMap()["TrendModel"] = 1; + return new TrendModel(); +} + +namespace exageostat::kernels { + template bool TrendModel::plugin_name = plugins::PluginRegistry>::Add( + "TrendModel", TrendModel::Create); +} + +template +void +TrendModel::GenerateCovarianceMatrix(T *apMatrixA, const int &aRowsNumber, const int &aColumnsNumber, + const int &aRowOffset, const int &aColumnOffset, Locations &aLocation1, + Locations &aLocation2, Locations &aLocation3, T *aLocalTheta, + const int &aDistanceMetric) { + int i, j; + int row_offset = aRowOffset; + int column_offset = aColumnOffset; + T theta_1 = aLocalTheta[1]; + T theta_2 = aLocalTheta[2]; + T *forcing_theta = &aLocalTheta[3]; + + double i_x = row_offset + 1; + int ty; + double theta_pow = 1.0; + double sum = 0.0; + + for (i = 0; i < aRowsNumber; i++) { + column_offset = aColumnOffset; + for (j = 0; j < aColumnsNumber; j++) { + if (column_offset == 0) { + apMatrixA[i + j * aRowsNumber] = 1.0; + } + else if( column_offset==1 ) { + apMatrixA[i + j * aRowsNumber] = forcing_theta[(int) (row_offset / theta_1) + + 238]; // 190 for 1940 -- 238 for 1988 + } else if(column_offset==2) { + ty = (row_offset) / theta_1; + for (int k = 0; k < ty + 238; k++) { // 190 for 1940 -- 238 for 1988 + for (int kk = k; kk < ty + 237; kk++) { // 189 for 1940 -- 237 for 1988 + theta_pow *= aLocalTheta[0]; + } + sum += theta_pow * forcing_theta[k]; + theta_pow = 1; + } + apMatrixA[i + j * aRowsNumber] = (1 - aLocalTheta[0]) * sum; + sum = 0; + theta_pow = 1; + } else { + if( j%2==0 ) { + apMatrixA[i + j * aRowsNumber]=sin(2.0 * PI * (i_x) * (floor((column_offset-3.0)/2.0)+1.0) / (theta_1)); + } else{ + apMatrixA[i + j * aRowsNumber]=cos(2.0 * PI * (i_x) * ((column_offset-3.0)/2.0+1.0) / (theta_1)); + } + } + column_offset++; + } + row_offset++; + i_x++; + } +} \ No newline at end of file diff --git a/src/linear-algebra-solvers/CMakeLists.txt b/src/linear-algebra-solvers/CMakeLists.txt index 8a9eba80..865936d7 100644 --- a/src/linear-algebra-solvers/CMakeLists.txt +++ b/src/linear-algebra-solvers/CMakeLists.txt @@ -16,7 +16,14 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/concrete) # Define the sources for the library set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/LinearAlgebraFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/LinearAlgebraMethods.cpp ${SOURCES} - PARENT_SCOPE - ) \ No newline at end of file + ) + +if (RUNTIME_TYPE STREQUAL "STARPU") + set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/LinearAlgebraMethods.cpp + ${SOURCES} + ) +endif () + +set(SOURCES ${SOURCES} PARENT_SCOPE) diff --git a/src/linear-algebra-solvers/concrete/CMakeLists.txt b/src/linear-algebra-solvers/concrete/CMakeLists.txt index 74df59f8..e0017fb7 100644 --- a/src/linear-algebra-solvers/concrete/CMakeLists.txt +++ b/src/linear-algebra-solvers/concrete/CMakeLists.txt @@ -12,18 +12,19 @@ # @date 2023-03-20 # Include the concrete implementations of the LinearAlgebraMethods class based on the enabled libraries (HiCMA or Chameleon) -set(SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/chameleon/dense/ChameleonDense.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/chameleon/dst/ChameleonDST.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/chameleon/ChameleonImplementation.cpp - ${SOURCES} - ) +if (RUNTIME_TYPE STREQUAL "STARPU") + set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/chameleon/dense/ChameleonDense.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/chameleon/dst/ChameleonDST.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/chameleon/ChameleonImplementation.cpp + ${SOURCES} + ) -if (USE_HICMA) - list(APPEND SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/tlr/HicmaImplementation.cpp - ) + if (USE_HICMA) + list(APPEND SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/tlr/HicmaImplementation.cpp + ) + endif () endif () - set(SOURCES ${SOURCES} PARENT_SCOPE) diff --git a/src/linear-algebra-solvers/concrete/chameleon/dense/ChameleonDense.cpp b/src/linear-algebra-solvers/concrete/chameleon/dense/ChameleonDense.cpp index f08c8c3b..4b822886 100644 --- a/src/linear-algebra-solvers/concrete/chameleon/dense/ChameleonDense.cpp +++ b/src/linear-algebra-solvers/concrete/chameleon/dense/ChameleonDense.cpp @@ -12,7 +12,9 @@ * @date 2023-03-20 **/ +#ifdef USE_MKL #include +#endif #include @@ -29,5 +31,7 @@ ChameleonDense::ExaGeoStatPotrfTile(const common::UpperLower &aUpperLower, vo throw std::runtime_error("CHAMELEON_dpotrf_Tile Failed, Matrix is not positive definite"); } // Due to a leak in dense mode in Chameleon, We had to free the buffer manually. +#ifdef USE_MKL mkl_free_buffers(); +#endif } \ No newline at end of file diff --git a/src/linear-algebra-solvers/concrete/tlr/HicmaImplementation.cpp b/src/linear-algebra-solvers/concrete/tlr/HicmaImplementation.cpp index c297bb01..da7e378e 100644 --- a/src/linear-algebra-solvers/concrete/tlr/HicmaImplementation.cpp +++ b/src/linear-algebra-solvers/concrete/tlr/HicmaImplementation.cpp @@ -232,6 +232,7 @@ T HicmaImplementation::ExaGeoStatMLETile(std::unique_ptr> & //Calculate Cholesky Factorization (C=LL-1) VERBOSE("LR: Cholesky factorization of Sigma...") START_TIMING(time_facto); + this->ExaGeoStatPotrfTile(EXAGEOSTAT_LOWER, HICMA_descCUV, 0, HICMA_descCD, HICMA_descCrk, max_rank, pow(10, -1.0 * acc)); diff --git a/src/prediction/Prediction.cpp b/src/prediction/Prediction.cpp index c5b625b7..f0e6a7df 100644 --- a/src/prediction/Prediction.cpp +++ b/src/prediction/Prediction.cpp @@ -13,7 +13,10 @@ **/ #include + +#ifdef USE_MKL #include +#endif #include #include @@ -32,6 +35,7 @@ void Prediction::PredictMissingData(unique_ptr> &aData, Con T *apMeasurementsMatrix, const kernels::Kernel &aKernel, Locations *apTrainLocations, Locations *apTestLocations) { +#if DEFAULT_RUNTIME int i, j; bool can_predict = true; int num_params = aKernel.GetParametersNumbers(); @@ -194,13 +198,16 @@ void Prediction::PredictMissingData(unique_ptr> &aData, Con // Due to a leak in Chameleon, exactly trsm We had to free the buffer manually. +#ifdef USE_MKL mkl_free_buffers(); +#endif delete[] z_obs; delete[] z_miss; delete[] z_actual; delete miss_locations; delete obs_locations; +#endif } template @@ -209,7 +216,7 @@ void Prediction::InitializePredictionArguments(Configurations &aConfiguration T *apZObs, T *apZActual, Locations &aMissLocation, Locations &aObsLocation, T *apMeasurementsMatrix, const int &aP, Locations *apTrainLocations, Locations *apTestLocations) { - +#if DEFAULT_RUNTIME int full_problem_size = aConfigurations.GetProblemSize() * aP; T *z = new T[full_problem_size]; @@ -231,4 +238,5 @@ void Prediction::InitializePredictionArguments(Configurations &aConfiguration memcpy(apZObs, apMeasurementsMatrix, aObsLocation.GetSize() * sizeof(T)); } delete[] z; +#endif } \ No newline at end of file diff --git a/src/results/Results.cpp b/src/results/Results.cpp index 3637fe66..82ccef22 100644 --- a/src/results/Results.cpp +++ b/src/results/Results.cpp @@ -21,6 +21,8 @@ using namespace exageostat::configurations; using namespace std; +Results *Results::mpInstance = nullptr; + Results *Results::GetInstance() { if (mpInstance == nullptr) { @@ -29,20 +31,28 @@ Results *Results::GetInstance() { return mpInstance; } -void Results::SetIsSynthetic(bool aIsSynthetic) { +void Results::UpdateDictionary(const string &key, const string &value) { + mSummaryDictionary[key] = value; +} + +void Results::SetIsSynthetic(bool aIsSynthetic, const std::string &aKey) { this->mIsSynthetic = aIsSynthetic; + UpdateDictionary(aKey.empty() ? "Data is Synthetic" : aKey, aIsSynthetic ? "Yes" : "No"); } -void Results::SetGeneratedLocationsNumber(int aNumLocations) { +void Results::SetGeneratedLocationsNumber(int aNumLocations, const std::string &aKey) { this->mGeneratedLocationsNumber = aNumLocations; + UpdateDictionary(aKey.empty() ? "Number of Locations" : aKey, to_string(aNumLocations)); } -void Results::SetIsLogger(bool aIsLogger) { +void Results::SetIsLogger(bool aIsLogger, const std::string &aKey) { this->mIsLogger = aIsLogger; + UpdateDictionary(aKey.empty() ? "Logger Enabled" : aKey, aIsLogger ? "Yes" : "No"); } -void Results::SetLoggerPath(const string &aLoggerPath) { +void Results::SetLoggerPath(const string &aLoggerPath, const std::string &aKey) { this->mLoggerPath = aLoggerPath; + UpdateDictionary(aKey.empty() ? "Logger Path" : aKey, aLoggerPath.empty() ? LOG_PATH : aLoggerPath); } void Results::PrintEndSummary() { @@ -50,112 +60,84 @@ void Results::PrintEndSummary() { Verbose temp = Configurations::GetVerbosity(); Configurations::SetVerbosity(STANDARD_MODE); LOGGER("********************SUMMARY**********************") - - auto locations_number = this->mGeneratedLocationsNumber; - if (locations_number > 0) { - LOGGER("#Number of Locations: " << locations_number) - if (this->mIsLogger && this->mIsSynthetic) { - LOGGER(" #Data is written to file (", true) - if (this->mLoggerPath.empty()) { - this->mLoggerPath = LOG_PATH; - } - LOGGER_PRECISION(this->mLoggerPath << ").") - LOGGER("") - } - VERBOSE("#Total Data Generation Execution Time: " << this->mExecutionTimeDataGeneration) - VERBOSE("#Total Data Generation Gflop/s: " << this->mFlopsDataGeneration) - } - if (this->mMLEIterations > 0) { - LOGGER("#Number of MLE Iterations: " << this->mMLEIterations) - LOGGER("#Found Maximum Theta at: ", true) - for (double i: this->mMaximumTheta) { - LOGGER_PRECISION(i << " ", 8) - } - LOGGER("") - LOGGER("#Final Log Likelihood value: " << this->mLogLikValue) - VERBOSE("#Average Time Modeling per Iteration: " << this->GetAverageModelingExecutionTime()) - VERBOSE("#Average Flops per Iteration: " << this->GetAverageModelingFlops()) - VERBOSE("#Total MLE Execution time: " << this->mTotalModelingExecutionTime) - VERBOSE("#Total MLE GFlop/s: " << this->mTotalModelingFlops) - } - - if (this->mZMiss > 0) { - LOGGER("#Number of Missing Observations: " << this->mZMiss) - if (this->mMSPEError > 0) { - VERBOSE("#MSPE Prediction Execution Time: " << this->mExecutionTimeMSPE) - VERBOSE("#MSPE Gflop/s: " << this->mFlopsMSPE) - LOGGER("#Mean Square Error MSPE: " << this->mMSPEError) - - } - if (!this->mIDWError.empty()) { - LOGGER("#IDW Error: ( ", true) - for (int i = 0; i < 3; i++) { - LOGGER_PRECISION(this->mIDWError[i] << " ", 8) - } - LOGGER_PRECISION(").") - LOGGER("") - } - if (this->mMLOE > 0 || this->mMMOM > 0) { - LOGGER("#MLOE: " << this->mMLOE << "\t\t#MMOM: " << this->mMMOM) - VERBOSE("#MLOE-MMOM Execution Time: " << this->mExecutionTimeMLOEMMOM) - VERBOSE("#MLOE-MMOM Matrix Generation Time: " << this->mGenerationTimeMLOEMMOM) - VERBOSE("#MLOE-MMOM Cholesky Factorization Time: " << this->mFactoTimeMLOEMMOM) - VERBOSE("#MLOE-MMOM Loop Time: " << this->mLoopTimeMLOEMMOM) - VERBOSE("#MLOE-MMOM Number of flops: " << this->mFlopsMLOEMMOM) - } - } - if (!this->mFisherMatrix.empty()) { - LOGGER("#Sd For Sigma2: " << this->mFisherMatrix[0]) - LOGGER("#Sd For Alpha: " << this->mFisherMatrix[1]) - LOGGER("#Sd For Nu: " << this->mFisherMatrix[2]) - VERBOSE("#Fisher Execution Time: " << this->mTotalFisherTime) + for (const auto &entry: mSummaryDictionary) { + LOGGER("#" << entry.first << ": " << entry.second) } LOGGER("*************************************************") Configurations::SetVerbosity(temp); } -void Results::SetMLEIterations(int aIterationsNumber) { +void Results::SetMLEIterations(int aIterationsNumber, const std::string& aKey) { this->mMLEIterations = aIterationsNumber; + UpdateDictionary(aKey.empty() ? "Number of MLE Iterations" : aKey, to_string(aIterationsNumber)); } -void Results::SetMaximumTheta(const vector &aMaximumTheta) { +void Results::SetMaximumTheta(const vector &aMaximumTheta, const std::string& aKey) { this->mMaximumTheta = aMaximumTheta; + + ostringstream oss; + oss << "[ "; + for (double val: aMaximumTheta) { + oss << fixed << setprecision(8) << val << " "; + } + oss << "]"; + UpdateDictionary(aKey.empty() ? "Found Maximum Theta at" : aKey, oss.str()); } -void Results::SetLogLikValue(double aLogLikValue) { +void Results::SetLogLikValue(double aLogLikValue, const std::string& aKey) { this->mLogLikValue = aLogLikValue; + UpdateDictionary(aKey.empty() ? "Final Log Likelihood Value" : aKey, to_string(aLogLikValue)); } -void Results::SetZMiss(int aZMiss) { +void Results::SetZMiss(int aZMiss, const std::string& aKey) { this->mZMiss = aZMiss; + UpdateDictionary(aKey.empty() ? "Number of Missing Observations" : aKey, to_string(aZMiss)); } -void Results::SetMSPEError(double aMSPEError) { +void Results::SetMSPEError(double aMSPEError, const std::string& aKey) { this->mMSPEError = aMSPEError; + UpdateDictionary(aKey.empty() ? "Mean Square Error MSPE" : aKey, to_string(aMSPEError)); } -void Results::SetIDWError(const vector &aIDWError) { +void Results::SetIDWError(const vector &aIDWError, const std::string& aKey) { this->mIDWError = aIDWError; + + ostringstream oss; + oss << "[ "; + for (double val: aIDWError) { + oss << fixed << setprecision(8) << val << " "; + } + oss << "]"; + UpdateDictionary(aKey.empty() ? "IDW Error" : aKey, oss.str()); } -void Results::SetMLOE(double aMLOE) { +void Results::SetMLOE(double aMLOE, const std::string& aKey) { this->mMLOE = aMLOE; + UpdateDictionary(aKey.empty() ? "MLOE" : aKey, to_string(aMLOE)); } -void Results::SetMMOM(double aMMOM) { +void Results::SetMMOM(double aMMOM, const std::string& aKey) { this->mMMOM = aMMOM; + UpdateDictionary(aKey.empty() ? "MMOM" : aKey, to_string(aMMOM)); } -void Results::SetMSPEExecutionTime(double aTime) { +void Results::SetMSPEExecutionTime(double aTime, const std::string& aKey) { this->mExecutionTimeMSPE = aTime; + UpdateDictionary(aKey.empty() ? "MSPE Prediction Execution Time" : aKey, to_string(aTime)); } -void Results::SetMSPEFlops(double aFlops) { +void Results::SetMSPEFlops(double aFlops, const std::string& aKey) { this->mFlopsMSPE = aFlops; + UpdateDictionary(aKey.empty() ? "MSPE Gflop/s" : aKey, to_string(aFlops)); } -void Results::SetTotalModelingExecutionTime(double aTime) { +void Results::SetTotalModelingExecutionTime(double aTime, const std::string& aKey) { this->mTotalModelingExecutionTime = aTime; + UpdateDictionary(aKey.empty() ? "Total Modeling Execution Time" : aKey, to_string(aTime)); + if (this->mMLEIterations) { + UpdateDictionary("Average Time Modeling per Iteration", to_string( + this->mTotalModelingExecutionTime / this->mMLEIterations)); + } } double Results::GetTotalModelingExecutionTime() const { @@ -176,54 +158,81 @@ double Results::GetAverageModelingFlops() const { throw runtime_error("Number of MLE Iterations is not set!"); } -void Results::SetTotalModelingFlops(double aTime) { +void Results::SetTotalModelingFlops(double aTime, const std::string& aKey) { this->mTotalModelingFlops = aTime; + UpdateDictionary(aKey.empty() ? "Total Modeling Flops" : aKey, to_string(aTime)); + if (this->mMLEIterations) { + UpdateDictionary("Average Flops per Iteration", to_string(this->mTotalModelingFlops / this->mMLEIterations)); + } } double Results::GetTotalModelingFlops() const { return this->mTotalModelingFlops; } -Results *Results::mpInstance = nullptr; - -void Results::SetExecutionTimeMLOEMMOM(double aTime) { +void Results::SetExecutionTimeMLOEMMOM(double aTime, const std::string& aKey) { this->mExecutionTimeMLOEMMOM = aTime; + UpdateDictionary(aKey.empty() ? "MLOE-MMOM Execution Time" : aKey, to_string(aTime)); } -void Results::SetMatrixGenerationTimeMLOEMMOM(double aTime) { +void Results::SetMatrixGenerationTimeMLOEMMOM(double aTime, const std::string& aKey) { this->mGenerationTimeMLOEMMOM = aTime; + UpdateDictionary(aKey.empty() ? "MLOE-MMOM Matrix Generation Time" : aKey, to_string(aTime)); + } -void Results::SetFactoTimeMLOEMMOM(double aTime) { +void Results::SetFactoTimeMLOEMMOM(double aTime, const std::string& aKey) { this->mFactoTimeMLOEMMOM = aTime; + UpdateDictionary(aKey.empty() ? "MLOE-MMOM Cholesky Factorization Time" : aKey, to_string(aTime)); } -void Results::SetLoopTimeMLOEMMOM(double aTime) { +void Results::SetLoopTimeMLOEMMOM(double aTime, const std::string& aKey) { this->mLoopTimeMLOEMMOM = aTime; + UpdateDictionary(aKey.empty() ? "MLOE-MMOM Loop Time" : aKey, to_string(aTime)); } -void Results::SetFlopsMLOEMMOM(double aFlops) { +void Results::SetFlopsMLOEMMOM(double aFlops, const std::string& aKey) { this->mFlopsMLOEMMOM = aFlops; + UpdateDictionary(aKey.empty() ? "MLOE-MMOM Number of flops" : aKey, to_string(aFlops)); } -void Results::SetTotalDataGenerationExecutionTime(double aTime) { +void Results::SetTotalDataGenerationExecutionTime(double aTime, const std::string& aKey) { this->mExecutionTimeDataGeneration = aTime; + UpdateDictionary(aKey.empty() ? "Total Data Generation Execution Time" : aKey, to_string(aTime)); } -void Results::SetTotalDataGenerationFlops(double aFlops) { +void Results::SetTotalDataGenerationFlops(double aFlops, const std::string& aKey) { this->mFlopsDataGeneration = aFlops; + UpdateDictionary(aKey.empty() ? "Total Data Generation Gflop/s" : aKey, to_string(aFlops)); } -void Results::SetTotalFisherTime(double aTime) { +void Results::SetTotalFisherTime(double aTime, const std::string& aKey) { this->mTotalFisherTime = aTime; + UpdateDictionary(aKey.empty() ? "Fisher Execution Time" : aKey, to_string(aTime)); } -void Results::SetFisherMatrix(vector aFisherMatrix) { +void Results::SetFisherMatrix(vector aFisherMatrix, const std::string& aKey) { this->mFisherMatrix = std::move(aFisherMatrix); + + ostringstream oss; + if (this->mFisherMatrix.size() >= 3) { + oss << "Sd For Sigma2: " << this->mFisherMatrix[0] << ", " + << "Sd For Alpha: " << this->mFisherMatrix[1] << ", " + << "Sd For Nu: " << this->mFisherMatrix[2]; + } + UpdateDictionary(aKey.empty() ? "Fisher Matrix" : aKey, oss.str()); } -void Results::SetPredictedMissedValues(vector aPredictedValues) { +void Results::SetPredictedMissedValues(vector aPredictedValues, const std::string& aKey) { this->mPredictedMissedValues = std::move(aPredictedValues); + + ostringstream oss; + if (this->mPredictedMissedValues.size() >= 3) { + oss << this->mPredictedMissedValues[0] << ", " + << this->mPredictedMissedValues[1] << ", " + << this->mPredictedMissedValues[2]; + } + UpdateDictionary(aKey.empty() ? "Predicted Values" : aKey, oss.str()); } double Results::GetMLOE() const { diff --git a/src/runtime-solver/CMakeLists.txt b/src/runtime-solver/CMakeLists.txt new file mode 100644 index 00000000..17e296d0 --- /dev/null +++ b/src/runtime-solver/CMakeLists.txt @@ -0,0 +1,19 @@ + +# Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file CMakeLists.txt +# @version 2.0.0 +# @brief CMake build script for the runtime-solvers library, which includes the RuntimeSolversMethods base class and the RuntimeSolversFactory +# @author Mahmoud ElKarargy +# @date 2024-11-04 + +# Include the concrete implementations of the RuntimeSolversMethods class +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/concrete) +# Define the sources for the library +set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/RuntimeSolverFactory.cpp + ${SOURCES} + PARENT_SCOPE + ) diff --git a/src/runtime-solver/RuntimeSolverFactory.cpp b/src/runtime-solver/RuntimeSolverFactory.cpp new file mode 100644 index 00000000..1eed2ddc --- /dev/null +++ b/src/runtime-solver/RuntimeSolverFactory.cpp @@ -0,0 +1,36 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file RuntimeSolverFactory.cpp + * @brief Implementation of the RuntimeSolverFactory class for creating runtime solvers for different runtime systems using StarPU or PaRSEC libraries. + * The factory creates a unique pointer to a concrete implementation of the RuntimeSolverMethods class based on the runtime specified. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @date 2024-11-04 +**/ + +#include + +#if DEFAULT_RUNTIME +#include +#else +#include +#endif + +using namespace exageostat::runtimesolver; +using namespace exageostat::common; + +template +std::unique_ptr> RuntimeSolverFactory::CreateRuntimeSolver() { + + // Check which Runtime is used +#if DEFAULT_RUNTIME + return std::make_unique>(); +#else + return std::make_unique>(); +#endif + +} diff --git a/src/runtime-solver/concrete/CMakeLists.txt b/src/runtime-solver/concrete/CMakeLists.txt new file mode 100644 index 00000000..d3d75c77 --- /dev/null +++ b/src/runtime-solver/concrete/CMakeLists.txt @@ -0,0 +1,27 @@ + +# Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file CMakeLists.txt +# @version 2.0.0 +# @brief CMake build script for the runtime-solvers library, which includes the concrete implementations of the +# RuntimeSolversMethods class based on the enabled runtime (StarPU or PaRSEC). +# @author Mahmoud ElKarargy +# @author Sameh Abdulah +# @date 2024-11-04 + +# Include the concrete implementations of the RuntimeSolversMethods class based on the enabled runtime (StarPU or PaRSEC) +if (RUNTIME_TYPE STREQUAL "STARPU") + set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/StarpuRuntimeSolver.cpp + ${SOURCES} + ) +elseif (RUNTIME_TYPE STREQUAL "PARSEC") + set(SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/ParsecRuntimeSolver.cpp; + ${SOURCES} + ) +endif () +set(SOURCES ${SOURCES} PARENT_SCOPE) + diff --git a/src/runtime-solver/concrete/ParsecRuntimeSolver.cpp b/src/runtime-solver/concrete/ParsecRuntimeSolver.cpp new file mode 100644 index 00000000..75410f18 --- /dev/null +++ b/src/runtime-solver/concrete/ParsecRuntimeSolver.cpp @@ -0,0 +1,105 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file ParsecRuntimeSolver.cpp + * @brief This file contains the implementation of ParsecRuntimeSolver class. + * @details ParsecRuntimeSolver is a concrete implementation of the RuntimeSolversMethods class. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @author Qinglei Cao + * @date 2024-11-04 +**/ + +#include +#include +#include + +extern "C"{ +#include +} + +using namespace exageostat::common; +using namespace exageostat::configurations; +using namespace exageostat::analyzer; +using namespace exageostat::runtimesolver; + +template +void ParsecRuntimeSolver::ExaGeoStatSYRK(std::unique_ptr> &aData){ + auto* pContext = (parsec_context_t *) ExaGeoStatHardware::GetParsecContext(); + auto* pDesc_A = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(DescriptorType::PARSEC_DESCRIPTOR, DescriptorName::DESCRIPTOR_A).parsec_desc; + + SYNC_TIME_START(); + dplasma_dsyrk(pContext, dplasmaLower, dplasmaNoTrans, 1.0, pDesc_A, 0.0, (parsec_tiled_matrix_t *) &ExaGeoStatHardware::GetHicmaData()->dcA); + SYNC_TIME_PRINT(ExaGeoStatHardware::GetParsecMPIRank(),("SYRK\n")); +} + +template +void ParsecRuntimeSolver::ExaGeoStatTLRCholesky(std::unique_ptr> &aData){ + + auto *pParams = ExaGeoStatHardware::GetHicmaParams(); + auto *pHicma_data = ExaGeoStatHardware::GetHicmaData(); + auto *pAnalysis = ExaGeoStatHardware::GetAnalysis(); + auto *pContext = (parsec_context_t *) ExaGeoStatHardware::GetParsecContext(); + for( int i= 0; i < pParams->nruns; i++ ) { + hicma_parsec_potrf(pContext, pHicma_data, pParams, pAnalysis); + } +} + +template +double ParsecRuntimeSolver::ExaGeoStatNorm(Configurations &aConfigurations, std::unique_ptr> &aData){ + + int L = aConfigurations.GetDenseTileSize(); + int N = aConfigurations.GetProblemSize(); + double aNT = (N % L == 0) ? (N/L) : (N/L + 1); + int aUpperLower = EXAGEOSTAT_LOWER; + auto* pContext = (parsec_context_t *) ExaGeoStatHardware::GetParsecContext(); + auto* pDescA = (parsec_tiled_matrix_t *) aData->GetDescriptorData()->GetDescriptor(DescriptorType::PARSEC_DESCRIPTOR, DescriptorName::DESCRIPTOR_A).parsec_desc; + + SYNC_TIME_START(); + GetMatrixNorm(pContext, &ExaGeoStatHardware::GetHicmaParams()->norm_global, (parsec_tiled_matrix_t *) &ExaGeoStatHardware::GetHicmaData()->dcA, aNT, aUpperLower, 1); + SYNC_TIME_PRINT(ExaGeoStatHardware::GetParsecMPIRank(), ("Matrix norm: norm_global= %le\n", ExaGeoStatHardware::GetHicmaParams()->norm_global)); + return ExaGeoStatHardware::GetHicmaParams()->norm_global; +} + +template +double ParsecRuntimeSolver::CalculateMSE(Configurations &aConfigurations, std::unique_ptr> &aData) { + + auto* pContext = (parsec_context_t * )ExaGeoStatHardware::GetParsecContext(); + auto* pDesc_f_data = aData->GetDescriptorData()->GetDescriptor(DescriptorType::PARSEC_DESCRIPTOR, + DescriptorName::DESCRIPTOR_F_DATA).parsec_desc; + auto* pDesc_f_spatial = aData->GetDescriptorData()->GetDescriptor(DescriptorType::PARSEC_DESCRIPTOR, + DescriptorName::DESCRIPTOR_F_SPATIAL).parsec_desc; + + SYNC_TIME_START(); + auto mse_result = MeanSquaredError(pContext, pDesc_f_data, pDesc_f_spatial, aConfigurations.GetDenseTileSize()); + SYNC_TIME_PRINT(ExaGeoStatHardware::GetParsecMPIRank(),("mse\n")); + return mse_result; +} + +template +T ParsecRuntimeSolver::ModelingOperations(std::unique_ptr> &aData, Configurations &aConfigurations, + T *apMeasurementsMatrix, const kernels::Kernel &aKernel) { + + // SYRK + ExaGeoStatSYRK(aData); + // Calculate norm + ExaGeoStatNorm(aConfigurations, aData); + // Analyze matrix before Cholesky + DataAnalyzer::PreAnalyzeMatrix(aData); + // HiCMA Cholesky + ExaGeoStatTLRCholesky(aData); + // Analyze matrix after Cholesky + DataAnalyzer::PostAnalyzeMatrix(aData); + // Diff to matlab result + DataAnalyzer::CompareMatDifference(aData); + + if(aConfigurations.GetEnableInverse()){ + transformers::DataTransformer::InverseSphericalHarmonicsTransform(aConfigurations.GetDenseTileSize(), aData); + // TODO: results in a seg fault in C + CalculateMSE(aConfigurations, aData); + } +} diff --git a/src/runtime-solver/concrete/StarpuRuntimeSolver.cpp b/src/runtime-solver/concrete/StarpuRuntimeSolver.cpp new file mode 100644 index 00000000..39dae64c --- /dev/null +++ b/src/runtime-solver/concrete/StarpuRuntimeSolver.cpp @@ -0,0 +1,79 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file StarpuRuntimeSolver.cpp + * @brief This file contains the implementation of StarpuRuntimeSolver class. + * @details StarpuRuntimeSolver is a concrete implementation of the RuntimeSolversMethods class. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @author Qinglei Cao + * @date 2024-11-04 +**/ + +#include +#include +#include +#include + +using namespace exageostat::common; +using namespace exageostat::configurations; +using namespace exageostat::runtimesolver; +using namespace exageostat::dataunits; +using namespace nlopt; + +template +T StarpuRuntimeSolver::ModelingOperations(std::unique_ptr> &aData, Configurations &aConfigurations, + T *apMeasurementsMatrix, const kernels::Kernel &aKernel) { + + int parameters_number = aKernel.GetParametersNumbers(); + int max_number_of_iterations = aConfigurations.GetMaxMleIterations(); + // Setting struct of data to pass to the modeling. + auto modeling_data = new mModelingData(aData, aConfigurations, *apMeasurementsMatrix, aKernel); + // Create nlopt + double opt_f; + opt optimizing_function(nlopt::LN_BOBYQA, parameters_number); + // Initialize problem's bound. + optimizing_function.set_lower_bounds(aConfigurations.GetLowerBounds()); + optimizing_function.set_upper_bounds(aConfigurations.GetUpperBounds()); + optimizing_function.set_ftol_abs(aConfigurations.GetTolerance()); + // Set max iterations value. + optimizing_function.set_maxeval(max_number_of_iterations); + optimizing_function.set_max_objective(DataModelingAPI, (void *) modeling_data); + + // Optimize mle using nlopt. + optimizing_function.optimize(aConfigurations.GetStartingTheta(), opt_f); + aConfigurations.SetEstimatedTheta(aConfigurations.GetStartingTheta()); + + auto theta = aConfigurations.GetStartingTheta(); + + LOGGER("--> Final Theta Values (", true) + for (int i = 0; i < parameters_number; i++) { + LOGGER_PRECISION(theta[i]) + if (i != parameters_number - 1) { + LOGGER_PRECISION(", ") + } + } + LOGGER_PRECISION(")") + LOGGER("") + + delete modeling_data; + return optimizing_function.last_optimum_value(); + +} + +template +double StarpuRuntimeSolver::DataModelingAPI(const std::vector &aTheta, std::vector &aGrad, void *apInfo) { + + auto config = ((mModelingData *) apInfo)->mpConfiguration; + auto data = ((mModelingData *) apInfo)->mpData; + auto measurements = ((mModelingData *) apInfo)->mpMeasurementsMatrix; + auto kernel = ((mModelingData *) apInfo)->mpKernel; + + // We do Date Modeling with any computation. + auto linear_algebra_solver = linearAlgebra::LinearAlgebraFactory::CreateLinearAlgebraSolver(config->GetComputation()); + return linear_algebra_solver->ExaGeoStatMLETile(*data, *config, aTheta.data(), measurements, *kernel); +} diff --git a/src/runtime/CMakeLists.txt b/src/runtime/CMakeLists.txt index 74d34df3..afeef659 100644 --- a/src/runtime/CMakeLists.txt +++ b/src/runtime/CMakeLists.txt @@ -10,8 +10,12 @@ # @date 2024-03-10 # Include runtime directory,based on runtime flag. -if ("${RUNTIME_TYPE}" STREQUAL "parsec") +if ("${RUNTIME_TYPE}" STREQUAL "PARSEC") add_subdirectory(parsec) + set(JDF_GENERATED_SOURCES + ${JDF_GENERATED_SOURCES} + PARENT_SCOPE + ) else () #by default use StarPu runtime. add_subdirectory(starpu) @@ -22,4 +26,3 @@ set(SOURCES ${SOURCES} PARENT_SCOPE ) - diff --git a/src/runtime/parsec/CMakeLists.txt b/src/runtime/parsec/CMakeLists.txt index f1b427db..8c8e25a9 100644 --- a/src/runtime/parsec/CMakeLists.txt +++ b/src/runtime/parsec/CMakeLists.txt @@ -5,13 +5,24 @@ # @file CMakeLists.txt # @version 1.1.0 -# @brief CMake build script for Parsec runtime +# @brief CMake build script for Parsec directory. # @author Mahmoud ElKarargy # @date 2024-03-10 -# Define the sources for the library +# Add subdirectory for JDF files +add_subdirectory(jdf) + +# Set source files for ClimateEmulator set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ParsecFunctions.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/JDFHelperFunctions.c ${SOURCES} PARENT_SCOPE - ) \ No newline at end of file +) + +# Set JDF generated source files for Climate Emulator +set(JDF_GENERATED_SOURCES + ${JDF_GENERATED_SOURCES} + PARENT_SCOPE +) + diff --git a/src/runtime/parsec/JDFHelperFunctions.c b/src/runtime/parsec/JDFHelperFunctions.c new file mode 100644 index 00000000..06b54135 --- /dev/null +++ b/src/runtime/parsec/JDFHelperFunctions.c @@ -0,0 +1,185 @@ + +// Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +// All rights reserved. +// ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +/** + * @file JDFHelperFunctions.c + * @brief Implementation of JDF helper functions. + * @version 2.0.0 + * @author Mahmoud ElKarargy + * @author Sameh Abdulah + * @author Qinglei Cao + * @date 2024-10-20 +**/ + +#include + +int CalculateSingleIndex(int aN, int aM) { + return aN * (aN + 1) / 2 + aM; +} + +double SumDoubleData(double *apData, int aColumn, int aRow) { + double sum = 0.0; + for (int j = 0; j < aRow; j++) { + for (int i = 0; i < aColumn; i++) { + sum += apData[j * aColumn + i]; + } + } + return sum; +} + +complex double SumComplexData(complex double *apData, int aColumn, int aRow) { + complex double sum = 0.0; + for (int j = 0; j < aRow; j++) { + for (int i = 0; i < aColumn; i++) { + sum += apData[j * aColumn + i]; + } + } + return sum; +} + +void ForwardSHTHelper(double *apFlm, complex double *apF_data, int aFDataM, int aFDataN, + complex double *apEt1, int aEt1M, complex double *apEt2, int aEt2M, + complex double *apEp, int aEpM, int aEpN, complex double *apSlmn, + int aSlmnM, int aSlmnN, complex double *apIe, int aIeM, int aIeN, + complex double *apIo, int aIoM, int aIoN, complex double *apP, + int aPM, int aPN, complex double *apD, complex double *apGmtheta_r, + complex double *apFmnm, complex double *apTmp1, + complex double *apTmp2, int aL){ + + complex double alpha_complex, beta_complex; + double alpha_double, beta_double; + + assert(aFDataN == aEpM); + alpha_complex = (complex double) 1.0; + beta_complex = (complex double) 0.0; + + int gmtheta_r_M = aFDataM; + int gmtheta_r_N = aEpN; + int gmtheta_r_K = aFDataN; + + cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, + gmtheta_r_M, gmtheta_r_N, gmtheta_r_K, + &alpha_complex, apF_data, gmtheta_r_M, + apEp, gmtheta_r_K, &beta_complex, + apGmtheta_r, gmtheta_r_M); + + int fmnm_M = aEt1M; + int fmnm_N = gmtheta_r_N; + int fmnm_K = gmtheta_r_M; + + cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, + fmnm_M, fmnm_N, fmnm_K, + &alpha_complex,apEt1, fmnm_M, + apGmtheta_r, fmnm_K, + &beta_complex, apFmnm, fmnm_M); + + int tmp1_M = aEt2M; + int tmp1_N = aPN; + int tmp1_K = aPM; + cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, + tmp1_M, tmp1_N, tmp1_K, + &alpha_complex, apEt2, tmp1_M, + apP, tmp1_K, + &beta_complex, apTmp1, tmp1_M); + + assert(tmp1_N == gmtheta_r_M); + int tmp2_M = tmp1_M; + int tmp2_N = gmtheta_r_N; + int tmp2_K = gmtheta_r_M; + + cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, + tmp2_M, tmp2_N, tmp2_K, + &alpha_complex, apTmp1, tmp2_M, + apGmtheta_r, tmp2_K, + &beta_complex, apTmp2, tmp2_M); + + assert(fmnm_M == tmp2_M); + fmnm_K = tmp2_N; + beta_complex = (complex double) 1.0; + cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, + fmnm_M, fmnm_N, fmnm_K, + &alpha_complex, apTmp2, fmnm_M, + apD, fmnm_K, + &beta_complex, apFmnm, fmnm_M); + + assert(aSlmnN == aIeM); + assert(aIeN == fmnm_M); + + int flmn_matrix_M = aL; + + complex double *pFlmn_matrix = apTmp1; + complex double *pFmnm_tmp; + complex double *pSlmn_tmp; + complex double *multipy_tmp = apTmp2 + fmnm_M + aSlmnN; + + for (int m = 0; m < aL; m++) { + pFmnm_tmp = apFmnm + (aL + m - 1) * fmnm_M; + if (0 == m % 2) { + for (int n = m; n < aL; n++) { + pSlmn_tmp = apSlmn + CalculateSingleIndex(n, m); + + alpha_complex = (complex double) 1.0; + beta_complex = (complex double) 0.0; + cblas_zgemv(CblasColMajor, CblasNoTrans, + aIeM, aIeN, + &alpha_complex, apIe, aIeM, + pFmnm_tmp, 1, + &beta_complex, multipy_tmp, 1); + cblas_zdotu_sub(aSlmnN, pSlmn_tmp, aSlmnM, multipy_tmp, 1, &pFlmn_matrix[m * flmn_matrix_M + n]); + } + } else { + for (int n = m; n < aL; n++) { + pSlmn_tmp = apSlmn + CalculateSingleIndex(n, m); + + alpha_complex = (complex double) 1.0; + beta_complex = (complex double) 0.0; + cblas_zgemv(CblasColMajor, CblasNoTrans, + aIoM, aIoN, + &alpha_complex, apIo, aIoM, + pFmnm_tmp, 1, + &beta_complex, multipy_tmp, 1); + + cblas_zdotu_sub(aSlmnN, pSlmn_tmp, aSlmnM, multipy_tmp, 1, &pFlmn_matrix[m * flmn_matrix_M + n]); + } + } + + } + for (int n = 0; n < aL; n++) { + for (int m = 0; m <= n; m++) { + apFlm[n * n + n + m] = creal(pFlmn_matrix[m * flmn_matrix_M + n]); + if (m != 0) { + apFlm[n * n + n - m] = cimag(pFlmn_matrix[m * flmn_matrix_M + n]); + } + } + } +} + +void InverseSHTHelper(double *apFlm, double *apFspatial, double *apZlm, double *apSC, double *apSmt, int aL) { + + int index_Zlm, index_flm; + int Smt_M = aL + 1; + int Smt_N = 2 * aL - 1; + + memset(apSmt, 0, Smt_M * Smt_N * sizeof(double)); + + for (int m = -(aL - 1); m < aL; m++) { + for (int n = abs(m); n < aL; n++) { + index_Zlm = CalculateSingleIndex(n, abs(m)); + index_flm = n * n + n + m; + cblas_daxpy(Smt_M, apFlm[index_flm], apZlm + index_Zlm * aL + 1, 1, apSmt + (m + aL - 1) * Smt_M, 1); + } + } + + int f_spatial_M = Smt_M; + int f_spatial_N = 2 * aL; + int f_spatial_K = Smt_N; + + cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, + f_spatial_M, f_spatial_N, f_spatial_K, + (double) 1.0, apSmt, f_spatial_M, + apSC, f_spatial_K, + (double) 0.0, apFspatial, f_spatial_M); + +} diff --git a/src/runtime/parsec/ParsecFunctions.cpp b/src/runtime/parsec/ParsecFunctions.cpp index 65b0073d..02e46b93 100644 --- a/src/runtime/parsec/ParsecFunctions.cpp +++ b/src/runtime/parsec/ParsecFunctions.cpp @@ -20,68 +20,92 @@ using namespace exageostat::dataunits; //TODO: implement parsec functions template -void RuntimeFunctions::CovarianceMatrix(DescriptorData &aDescriptorData, void *apDescriptor, - const int &aTriangularPart, Locations *apLocation1, - Locations *apLocation2, Locations *apLocation3, - T *apLocalTheta, const int &aDistanceMetric, - const kernels::Kernel *apKernel, void *apContext) {} +void RuntimeFunctions::CovarianceMatrix(dataunits::DescriptorData &aDescriptorData, void *apDescriptor, + const int &aTriangularPart, + dataunits::Locations *apLocation1, dataunits::Locations *apLocation2, + dataunits::Locations *apLocation3, T *apLocalTheta, + const int &aDistanceMetric, + const kernels::Kernel *apKernel) { +} + template -void RuntimeFunctions::ExaGeoStatMLETileAsyncMLOEMMOM(void *apDescExpr1, void *apDescExpr2, void *apDescExpr3, +void RuntimeFunctions::ExaGeoStatMLETileAsyncMLOEMMOM(void *apDescExpr2, void *apDescExpr3, void *apDescExpr4, void *apDescMLOE, void *apDescMMOM, void *apSequence, - void *apRequest, void *apContext) {} + void *apRequest) { + +} template void RuntimeFunctions::ExaGeoStatMLEMSPETileAsync(void *apDescZPredict, void *apDescZMiss, void *apDescError, - void *apSequence, void *apRequest, void *apContext) {} + void *apSequence, + void *apRequest) { + +} template -void RuntimeFunctions::CopyDescriptorZ(DescriptorData &aDescriptorData, void *apDescriptor, T *apDoubleVector, - void *apContext) {} +void RuntimeFunctions::CopyDescriptorZ(dataunits::DescriptorData &aDescriptorData, void *apDescriptor, + T *apDoubleVector) { + +} template -void -RuntimeFunctions::ExaGeoStatGaussianToNonTileAsync(DescriptorData &aDescriptorData, void *apDesc, T *apTheta, - void *apContext) {} +void RuntimeFunctions::ExaGeoStatGaussianToNonTileAsync(dataunits::DescriptorData &aDescriptorData, void *apDesc, + T *apTheta) { + +} template void -RuntimeFunctions::ExaGeoStatMeasureDetTileAsync(const Computation &aComputation, void *apDescA, void *apSequence, - void *apRequest, void *apDescDet, void *apContext) {} +RuntimeFunctions::ExaGeoStatMeasureDetTileAsync(const common::Computation &aComputation, void *apDescA, + void *apSequence, void *apRequest, + void *apDescDet) { +} template void RuntimeFunctions::ExaGeoStaStrideVectorTileAsync(void *apDescA, void *apDescB, void *apDescC, void *apSequence, void *apRequest, void *apContext) {} template -void RuntimeFunctions::ExaGeoStaStrideVectorTileAsync(void *apDescA, void *apDescB, void *apDescC, void *apDescD, - void *apSequence, void *apRequest, void *apContext) {} +void RuntimeFunctions::ExaGeoStaStrideVectorTileAsync(void *apDescA, void *apDescB, void *apDescC, void *apSequence, + void *apRequest) { +} template -void -RuntimeFunctions::ExaGeoStatMLETraceTileAsync(void *apDescA, void *apSequence, void *apRequest, void *apDescNum, - void *apDescTrace, void *apContext) {} +void RuntimeFunctions::ExaGeoStatMLETraceTileAsync(void *apDescA, void *apSequence, void *apRequest, void *apDescNum, + void *apDescTrace) { + +} template void -RuntimeFunctions::ExaGeoStatDoubleDotProduct(void *apDescA, void *apDescProduct, void *apSequence, - void *apRequest, - void *apContext) {} +RuntimeFunctions::ExaGeoStatDoubleDotProduct(void *apDescA, void *apDescProduct, void *apSequence, void *apRequest) { + + +} template -void -RuntimeFunctions::ExaGeoStatMLEMSPEBivariateTileAsync(void *apDescZPre, void *apDescZMiss, void *apDescsError1, - void *apDescsError2, void *apDescsError, void *apSequence, - void *apRequest, void *apContext) {} +void RuntimeFunctions::ExaGeoStatMLEMSPEBivariateTileAsync(void *apDescZPre, void *apDescZMiss, void *apDescError1, + void *apDescError2, + void *apDescError, void *apSequence, void *apRequest) { + +} template -void RuntimeFunctions::ExaGeoStatNonGaussianLogLikeTileAsync(const Computation &aComputation, void *apDescZ, - void *apDescSum, const T *apTheta, void *apSequence, - void *apRequest, void *apContext) {} +void RuntimeFunctions::ExaGeoStatNonGaussianLogLikeTileAsync(const common::Computation &aComputation, void *apDescZ, + void *apDescSum, + const T *apTheta, void *apSequence, void *apRequest) { + + + +} template void -RuntimeFunctions::ExaGeoStatNonGaussianTransformTileAsync(const Computation &aComputation, void *apDescZ, +RuntimeFunctions::ExaGeoStatNonGaussianTransformTileAsync(const common::Computation &aComputation, void *apDescZ, const T *apTheta, - void *apSequence, void *apRequest, void *apContext) {} + void *apSequence, void *apRequest) { + + +} diff --git a/src/runtime/parsec/jdf/CMakeLists.txt b/src/runtime/parsec/jdf/CMakeLists.txt new file mode 100644 index 00000000..1463fb93 --- /dev/null +++ b/src/runtime/parsec/jdf/CMakeLists.txt @@ -0,0 +1,50 @@ + +# Copyright (c) 2017-2024 King Abdullah University of Science and Technology, +# All rights reserved. +# ExaGeoStat is a software package, provided by King Abdullah University of Science and Technology (KAUST). + +# @file CMakeLists.txt +# @version 1.1.0 +# @brief CMake build script for jdf directory +# @author Mahmoud ElKarargy +# @date 2024-10-17 + +# Add .jdf files +set(JDF_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/ReadCSVTimeSlot.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/ReadCSVComplex.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/ReadCSVToComplex.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/ReadCSVToComplexTimeSlot.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/ReadCSV.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/ForwardSHT.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/MeanSquaredError.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/DifferenceDouble.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/ForwardSHTReshape.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/GetMatrixNorm.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/InverseSHT.jdf + ${CMAKE_CURRENT_SOURCE_DIR}/MatrixCompress.jdf +) + +# Output directory for generated sources +set(JDF_OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated_jdf") + +# Make sure the output directory exists +file(MAKE_DIRECTORY ${JDF_OUTPUT_DIR}) + +foreach (jdf_file ${JDF_FILES}) + get_filename_component(jdf_name ${jdf_file} NAME_WE) + execute_process( + COMMAND ${HICMA_X_BIN_DIR}/parsec-ptgpp -i ${jdf_file} -o ${jdf_name} + ERROR_QUIET + ) + execute_process( + COMMAND mv ${jdf_name}.c ${CMAKE_CURRENT_BINARY_DIR}/generated_jdf/${jdf_name}.c + COMMAND mv ${jdf_name}.h ${CMAKE_CURRENT_BINARY_DIR}/generated_jdf/${jdf_name}.h + ) + list(APPEND JDF_GENERATED_SOURCES ${JDF_OUTPUT_DIR}/${jdf_name}.c) +endforeach () + +set(JDF_GENERATED_SOURCES + ${JDF_GENERATED_SOURCES} + PARENT_SCOPE +) diff --git a/src/runtime/parsec/jdf/DifferenceDouble.jdf b/src/runtime/parsec/jdf/DifferenceDouble.jdf new file mode 100644 index 00000000..76e49e79 --- /dev/null +++ b/src/runtime/parsec/jdf/DifferenceDouble.jdf @@ -0,0 +1,92 @@ +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static double DifferenceDouble_core(double *apDataA, double *apDataB, int aM, int aN) { + double result = 0.0; + for (int j = 0; j < aN; j++) { + for (int i = 0; i < aM; i++) { + result = fmax(result, fabs(apDataA[j*aM+i]-apDataB[j*aM+i])); + if( result > 1.0e-4 ) { + return result; + } + } + } + return result; +} + +%} + +apDescA [ type = "parsec_tiled_matrix_t*" ] +apDescB [ type = "parsec_tiled_matrix_t*" aligned = apDescA ] + +task(m, n) + +m = 0 .. apDescA->lmt-1 +n = 0 .. apDescA->lnt-1 + +: apDescA(m, n) + +READ apDataA <- apDescA(m, n) +READ apDataB <- apDescB(m, n) + +BODY +{ + double diff = DifferenceDouble_core(apDataA, apDataB, apDescA->mb, apDescA->nb); +} +END + +extern "C" %{ + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* +DifferenceDoubleConstructor(parsec_matrix_block_cyclic_t *apDescA, parsec_matrix_block_cyclic_t *apDescB) +{ + assert(apDescA->super.mb == apDescB->super.mb); + assert(apDescA->super.nb == apDescB->super.nb); + parsec_DifferenceDouble_taskpool_t *taskpool = parsec_DifferenceDouble_new(&apDescA->super, &apDescB->super); + parsec_add2arena(&taskpool->arenas_datatypes[PARSEC_DifferenceDouble_DEFAULT_ADT_IDX], + parsec_datatype_double_t, PARSEC_MATRIX_FULL, + 1, apDescA->super.mb, apDescA->super.nb, apDescA->super.mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)taskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void DifferenceDouble_destructor(parsec_taskpool_t *pTaskpool) +{ + parsec_DifferenceDouble_taskpool_t *difference_double_taskpool = (parsec_DifferenceDouble_taskpool_t *)pTaskpool; + parsec_del2arena(&difference_double_taskpool->arenas_datatypes[PARSEC_DifferenceDouble_DEFAULT_ADT_IDX]); + parsec_taskpool_free(pTaskpool); +} + +/** + */ +int DifferenceDouble(parsec_context_t *apContext, + parsec_matrix_block_cyclic_t *apDescA, + parsec_matrix_block_cyclic_t *apDescB) +{ + parsec_taskpool_t *parsec_difference_double = NULL; + parsec_difference_double = DifferenceDoubleConstructor(apDescA, apDescB); + if(parsec_difference_double != NULL ){ + parsec_context_add_taskpool(apContext, parsec_difference_double); + parsec_context_start(apContext); + parsec_context_wait(apContext); + DifferenceDouble_destructor(parsec_difference_double); + } + + return 0; +} + +%} diff --git a/src/runtime/parsec/jdf/ForwardSHT.jdf b/src/runtime/parsec/jdf/ForwardSHT.jdf new file mode 100644 index 00000000..26c30687 --- /dev/null +++ b/src/runtime/parsec/jdf/ForwardSHT.jdf @@ -0,0 +1,290 @@ +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static void FlmToFlmT(double *apFlmT, double *apFlm, parsec_matrix_block_cyclic_t *apFLMTDesc, int aFlmM, int aFlmN, int aM, int aN) { + int flm_offset = aM * apFLMTDesc->super.mb; + int flmT_offset = aN * apFLMTDesc->super.mb; + int size = (aM == apFLMTDesc->super.lmt - 1) ? (aFlmM * aFlmN) - flm_offset : apFLMTDesc->super.mb; + memcpy(apFlmT + flmT_offset, apFlm + flm_offset, size*sizeof(double)); +} + + +%} + +/* Globals + */ +apFDataDesc [ type = "parsec_tiled_matrix_t*" ] +apFLMDesc [ type = "parsec_tiled_matrix_t*" aligned = apFDataDesc] +apFLMTDesc [ type = "parsec_tiled_matrix_t*" ] +apET1Desc [ type = "parsec_tiled_matrix_t*" ] +apET2Desc [ type = "parsec_tiled_matrix_t*" ] +apEPDesc [ type = "parsec_tiled_matrix_t*" ] +apSLMNDesc [ type = "parsec_tiled_matrix_t*" ] +apIEDesc [ type = "parsec_tiled_matrix_t*" ] +apIODesc [ type = "parsec_tiled_matrix_t*" ] +apPDesc [ type = "parsec_tiled_matrix_t*" ] +apDDesc [ type = "parsec_tiled_matrix_t*" ] +aFlmM [ type = "int" ] +aFlmN [ type = "int" ] +aLSize [ type = "int" ] + +/* Temporary buffer used for convert */ +apGmtheta_r [ type = "parsec_memory_pool_t *" hidden = on default = NULL ] +apFmnm [ type = "parsec_memory_pool_t *" hidden = on default = NULL ] +apWork1 [ type = "parsec_memory_pool_t *" hidden = on default = NULL ] +apWork2 [ type = "parsec_memory_pool_t *" hidden = on default = NULL ] + +/* GPU workspace */ +ws_gpu [ type = "void *" hidden = on default = NULL ] + +/* GPU number and index */ +nb_cuda_devices [ type = "int" hidden = on default = 0 ] +cuda_device_index [ type = "int *" hidden = on default = "NULL"] + +bind_gpu(n) + +n = 0 .. apFDataDesc->lnt-1 + +: apFDataDesc(0, n) + +READ apFlm <- apFLMDesc(0, n) + -> apFlm task(n) [ type_remote = apFlm ] + +READ apF_data <- apFDataDesc(0, n) + -> apF_data task(n) [ type_remote = apF_data ] + + +BODY +{ +#if defined(USE_CUDA) + if( nb_cuda_devices > 0 ) { + int g = climate_emualtor_gpu_load_balance( n, gb->nodes, nb_cuda_devices ); + parsec_advise_data_on_device( _f_apFlm->original, + cuda_device_index[g], + PARSEC_DEV_DATA_ADVICE_PREFERRED_DEVICE ); + parsec_advise_data_on_device( _f_apF_data->original, + cuda_device_index[g], + PARSEC_DEV_DATA_ADVICE_PREFERRED_DEVICE ); + } +#endif +} +END + + + +task(n) + +n = 0 .. apFDataDesc->lnt-1 + +my_rank = %{ return apFDataDesc->super.rank_of(&apFDataDesc->super, 0, n); %} + +: apFDataDesc(0, n) + +// TODO: check whether this will evict apF_data first on GPU +READ apF_data <- apF_data bind_gpu(n) [ type_remote = apF_data ] +RW apFlm <- apFlm bind_gpu(n) [ type_remote = apFlm ] + -> apFlm task_apFlmT(0..apFLMTDesc->lmt-1, n) [ type_remote = apFlm ] + -> apFLMDesc(0, n) + +READ apEt1 <- apET1Desc(0, my_rank) +READ apEt2 <- apET2Desc(0, my_rank) +READ apEp <- apEPDesc(0, my_rank) +READ apSlmn <- apSLMNDesc(0, my_rank) +READ apIe <- apIEDesc(0, my_rank) +READ apIo <- apIODesc(0, my_rank) +READ apP <- apPDesc(0, my_rank) +READ apD <- apDDesc(0, my_rank) + + +BODY[type=CUDA] +{ +#if defined(USE_CUDA) + ForwardSHT_gpu_core(apFlm, apF_data, apEt1, apEt2, apEp, apSlmn, apIe, apIo, apP, apD, cuda_device, gpu_task, cuda_stream, gb); +#endif +} +END + +BODY[type=HIP] +{ +#if defined(USE_CUDA) + ForwardSHT_gpu_core(apFlm, apF_data, apEt1, apEt2, apEp, apSlmn, apIe, apIo, apP, apD, cuda_device, gpu_task, cuda_stream, gb); +#endif +} +END + +BODY +{ + complex double *pGmtheta_r = (complex double *) parsec_private_memory_pop(apGmtheta_r); + complex double *pFmnm = (complex double *) parsec_private_memory_pop(apFmnm); + complex double *pTmp1 = (complex double *) parsec_private_memory_pop(apWork1); + complex double *pTmp2 = (complex double *) parsec_private_memory_pop(apWork2); + + ForwardSHTHelper(apFlm, apF_data, apFDataDesc->mb, apFDataDesc->nb, apEt1, apET1Desc->mb, + apEt2, apET2Desc->mb, apEp, apEPDesc->mb, apEPDesc->nb, apSlmn, + apSLMNDesc->mb, apSLMNDesc->nb, apIe, apIEDesc->mb, apIEDesc->nb, apIo, + apIODesc->mb, apIODesc->nb, apP, apPDesc->mb, apPDesc->nb, apD, + pGmtheta_r, pFmnm, pTmp1, pTmp2, aLSize); + + parsec_private_memory_push(apGmtheta_r, pGmtheta_r); + parsec_private_memory_push(apFmnm, pFmnm); + parsec_private_memory_push(apWork1, pTmp1); + parsec_private_memory_push(apWork2, pTmp2); + +} +END + + +task_apFlmT(m, n) + +m = 0 .. apFLMTDesc->lmt-1 +n = 0 .. apFLMDesc->lnt-1 + +: apFLMTDesc(m, 0) + +READ apFlm <- apFlm task(n) [ type_remote = apFlm ] + +READ apFlmT <- apFLMTDesc(m, 0) + + +BODY +{ + FlmToFlmT(apFlmT, apFlm, (parsec_matrix_block_cyclic_t *) apFLMTDesc, aFlmM, aFlmN, m, n); +} +END + + + +extern "C" %{ + + +#if 0 +void *gb_forward_create_workspace(void *obj, void *user) +{ + parsec_device_module_t *mod = (parsec_device_module_t *)obj; + zone_malloc_t *memory = ((parsec_device_cuda_module_t*)mod)->super.memory; + parsec_ForwardSHT_taskpool_t *tp = (parsec_ForwardSHT_taskpool_t*)user; + gb_forward_workspace_t *wp = NULL; + int nb = tp->_g_descA->nb; + int workspace_size = tp->_g_gb->apF_data_M * tp->_g_gb->apEp_N + + tp->_g_gb->apEt1_M * tp->_g_gb->apEp_N + + tp->_g_gb->apEt2_M * tp->_g_gb->apP_N + + tp->_g_gb->apEt2_M * tp->_g_gb->apEp_N; + size_t elt_size = sizeof(complex double); + + wp = (gb_forward_workspace_t*)malloc(sizeof(gb_forward_workspace_t)); + wp->tmpmem = zone_malloc(memory, workspace_size * elt_size + sizeof(int)); + assert(NULL != wp->tmpmem); + wp->lwork = workspace_size; + wp->memory = memory; + + return wp; +} + +static void destroy_workspace(void *apWorkSpace, void *aN) +{ + gb_forward_workspace_t *ws = (gb_forward_workspace_t*) apWorkSpace; + zone_free((zone_malloc_t*)ws->memory, ws->tmpmem); + free(ws); + (void)aN; +} +#endif + + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* +ForwardSHTConstructor(parsec_tiled_matrix_t *apFDataDesc, parsec_tiled_matrix_t *apFLMDesc, + parsec_tiled_matrix_t *apFLMTDesc, parsec_tiled_matrix_t *apET1Desc, + parsec_tiled_matrix_t *apET2Desc, parsec_tiled_matrix_t *apEPDesc, + parsec_tiled_matrix_t *apSLMNDesc, parsec_tiled_matrix_t *apIEDesc, + parsec_tiled_matrix_t *apIODesc, parsec_tiled_matrix_t *apPDesc, + parsec_tiled_matrix_t *apDDesc, int aFDataM, int aEPN, int aET1M, + int aET2M, int aPN, int aFlmM, int aFlmN, int aLSize) +{ + + parsec_ForwardSHT_taskpool_t *pTaskpool = + parsec_ForwardSHT_new(apFDataDesc, apFLMDesc, apFLMTDesc, apET1Desc, apET2Desc, apEPDesc, + apSLMNDesc, apIEDesc,apIODesc, apPDesc, apDDesc, aFlmM, aFlmN, aLSize); + + pTaskpool->_g_apGmtheta_r = (parsec_memory_pool_t*) malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_apGmtheta_r, aFDataM * aEPN * sizeof(complex double)); + + pTaskpool->_g_apFmnm = (parsec_memory_pool_t*) malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_apFmnm, aET1M * aEPN * sizeof(complex double)); + + pTaskpool->_g_apWork1 = (parsec_memory_pool_t*) malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_apWork1, aET2M * aPN * sizeof(complex double)); + + pTaskpool->_g_apWork2 = (parsec_memory_pool_t*)malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init( pTaskpool->_g_apWork2, aET2M * aEPN * sizeof(complex double)); + +#if defined(USE_CUDA) + int nb = 0, *dev_index; + + /** Find all CUDA devices */ + hicma_parsec_find_cuda_devices( parsec, &dev_index, &nb); + + pTaskpool->_g_ws_gpu = (void *)gb->ws; + pTaskpool->_g_nb_cuda_devices = nb; + pTaskpool->_g_cuda_device_index = dev_index; +#endif + + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_ForwardSHT_apF_data_ADT_IDX], + parsec_datatype_double_complex_t, PARSEC_MATRIX_FULL, + 1, apFDataDesc->mb, apFDataDesc->nb, apFDataDesc->mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_ForwardSHT_apFlm_ADT_IDX], + parsec_datatype_double_t, PARSEC_MATRIX_FULL, + 1, apFLMTDesc->mb, apFLMTDesc->nb, apFLMTDesc->mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)pTaskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void ForwardSHTDestructor(parsec_taskpool_t *apTaskpool) +{ + parsec_ForwardSHT_taskpool_t *pTaskpool = (parsec_ForwardSHT_taskpool_t *)apTaskpool; + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_ForwardSHT_apF_data_ADT_IDX]); + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_ForwardSHT_apFlm_ADT_IDX]); + parsec_private_memory_fini(pTaskpool->_g_apGmtheta_r); + parsec_private_memory_fini(pTaskpool->_g_apFmnm); + parsec_private_memory_fini(pTaskpool->_g_apWork1); + parsec_private_memory_fini(pTaskpool->_g_apWork2); + parsec_taskpool_free(apTaskpool); +} + +/** + */ +int ForwardSHT(parsec_context_t *apContext, parsec_tiled_matrix_t *apFDataDesc, parsec_tiled_matrix_t *apFLMDesc, + parsec_tiled_matrix_t *apFLMTDesc, parsec_tiled_matrix_t *apET1Desc, + parsec_tiled_matrix_t *apET2Desc, parsec_tiled_matrix_t *apEPDesc, + parsec_tiled_matrix_t *apSLMNDesc, parsec_tiled_matrix_t *apIEDesc, + parsec_tiled_matrix_t *apIODesc, parsec_tiled_matrix_t *apPDesc, + parsec_tiled_matrix_t *apDDesc, int aFDataM, int aEPN, int aET1M, + int aET2M, int aPN, int aFlmM, int aFlmN, int aLSize) { + + parsec_taskpool_t *pTaskpool = ForwardSHTConstructor(apFDataDesc, apFLMDesc, apFLMTDesc, apET1Desc, apET2Desc, + apEPDesc, apSLMNDesc, apIEDesc, apIODesc, apPDesc, apDDesc, + aFDataM, aEPN, aET1M, aET2M, aPN, aFlmM, aFlmN, aLSize); + if( pTaskpool != NULL ){ + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + ForwardSHTDestructor(pTaskpool); + } + return 0; +} + +%} diff --git a/src/runtime/parsec/jdf/ForwardSHTReshape.jdf b/src/runtime/parsec/jdf/ForwardSHTReshape.jdf new file mode 100644 index 00000000..35cef935 --- /dev/null +++ b/src/runtime/parsec/jdf/ForwardSHTReshape.jdf @@ -0,0 +1,236 @@ +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static void FlmTReshape(double *apFlmT, int aM, double *apTemp, parsec_matrix_block_cyclic_t *apFlmTDesc, int aT) { + + int flmT_offset = aM * apFlmTDesc->super.mb; + int M = (aM == apFlmTDesc->super.lmt - 1)? apFlmTDesc->super.mb - flmT_offset : apFlmTDesc->super.mb; + if( aT-3 <= 0 ) return; + int N = aT-3; + int lda_flmT = apFlmTDesc->super.mb; + + // N * 1 + double *Y = apTemp; + // N * 3 + double *X = apTemp + N; + // 3 * 3 + double *XtX = apTemp + 4 * N; + // 3 * 1 + double *XtY = apTemp + 4 * N + 10; + double *phi = apTemp + 4 * N + 14; + + for(int i = 0; i < M; i++) { + // Get Y + for(int j = 0; j < N; j++) { + Y[j] = apFlmT[(j+3) * lda_flmT + i]; + } + + // Get X + for(int j = 0; j < N; j++) { + X[3*j+0] = apFlmT[(j+2) * lda_flmT + i]; + X[3*j+1] = apFlmT[(j+1) * lda_flmT + i]; + X[3*j+2] = apFlmT[(j+0) * lda_flmT + i]; + } + + // X transpose times X + double alpha = 1.0; + double beta = 0.0; + + cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, + 3, 3, N, + alpha, X, 3, + X, 3, + beta, XtX, 3); + + // X transpose times y + cblas_dgemv(CblasRowMajor, CblasTrans, + N, 3, + alpha, X, 3, + Y, 1, + beta, XtY, 1); + + // Solve + LAPACKE_dposv(LAPACK_ROW_MAJOR, 'U', 3, 1, XtX, 3, XtY, 1); + + // Use phi to compute eps_out + cblas_dgemv(CblasRowMajor, CblasNoTrans, + N, 3, + -1.0, X, 3, + XtY, 1, + 1.0, &apFlmT[3*lda_flmT+i], lda_flmT); + } +} + + +%} + +/* Globals + */ +apFDataDesc [ type = "parsec_tiled_matrix_t*" ] +apFLMDesc [ type = "parsec_tiled_matrix_t*" aligned = apFDataDesc] +apFLMTDesc [ type = "parsec_tiled_matrix_t*" ] +apET1Desc [ type = "parsec_tiled_matrix_t*" ] +apET2Desc [ type = "parsec_tiled_matrix_t*" ] +apEPDesc [ type = "parsec_tiled_matrix_t*" ] +apSLMNDesc [ type = "parsec_tiled_matrix_t*" ] +apIEDesc [ type = "parsec_tiled_matrix_t*" ] +apIODesc [ type = "parsec_tiled_matrix_t*" ] +apPDesc [ type = "parsec_tiled_matrix_t*" ] +apDDesc [ type = "parsec_tiled_matrix_t*" ] +apADesc [ type = "parsec_tiled_matrix_t*" ] + +aFlmTNB [ type = "int" ] +aLSize [ type = "int" ] +aT [ type = "int" ] + +/* Temporary buffer used for convert */ +apGmtheta_r [ type = "parsec_memory_pool_t *" hidden = on default = NULL ] +apFmnm [ type = "parsec_memory_pool_t *" hidden = on default = NULL ] +apWork1 [ type = "parsec_memory_pool_t *" hidden = on default = NULL ] +apWork2 [ type = "parsec_memory_pool_t *" hidden = on default = NULL ] + +/* GPU workspace */ +ws_gpu [ type = "void *" hidden = on default = NULL ] + +/* GPU number and index */ +nb_cuda_devices [ type = "int" hidden = on default = 0 ] +cuda_device_index [ type = "int *" hidden = on default = "NULL"] + + + +task(m) + +m = 0 .. apFLMTDesc->lmt-1 +m_s = %{ return m*aFlmTNB; %} +m_e = %{ return parsec_imin((m+1)*aFlmTNB-1, apADesc->lmt-1); %} + +: apFLMTDesc(m, 0) + +RW apFlmT <- apFLMTDesc(m, 0) + -> apFLMTDesc(m, 0) + + +BODY +{ + + double *pTemp = (double *) parsec_private_memory_pop(apWork1); + FlmTReshape(apFlmT, m, pTemp, (parsec_matrix_block_cyclic_t *) apFLMTDesc, aT); + parsec_private_memory_push(apWork1, pTemp); +} +END + +extern "C" %{ + + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* +ForwardSHTReshape_constructor(parsec_tiled_matrix_t *apFDataDesc, parsec_tiled_matrix_t *apFLMDesc, + parsec_tiled_matrix_t *apFLMTDesc, parsec_tiled_matrix_t *apET1Desc, + parsec_tiled_matrix_t *apET2Desc, parsec_tiled_matrix_t *apEPDesc, + parsec_tiled_matrix_t *apSLMNDesc, parsec_tiled_matrix_t *apIEDesc, + parsec_tiled_matrix_t *apIODesc, parsec_tiled_matrix_t *apPDesc, + parsec_tiled_matrix_t *apDDesc, parsec_tiled_matrix_t *apADesc, + int aFDataM, int aEPN, int aET1M, int aET2M, int aPN, + int aFlmTNB, int aT, int aLSize) +{ + + parsec_ForwardSHTReshape_taskpool_t + *pTaskpool = parsec_ForwardSHTReshape_new(apFDataDesc, apFLMDesc, apFLMTDesc, apET1Desc, apET2Desc, apEPDesc, + apSLMNDesc, apIEDesc, apIODesc, apPDesc, apDDesc, apADesc, aFlmTNB, aLSize, aT); + + pTaskpool->_g_apGmtheta_r = (parsec_memory_pool_t*)malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_apGmtheta_r, aFDataM * aEPN * sizeof(complex double) ); + + pTaskpool->_g_apFmnm = (parsec_memory_pool_t*)malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_apFmnm, aET1M * aEPN * sizeof(complex double) ); + + pTaskpool->_g_apWork1 = (parsec_memory_pool_t*)malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_apWork1, aET2M * aPN * sizeof(complex double) ); + + pTaskpool->_g_apWork2 = (parsec_memory_pool_t*)malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_apWork2, aET2M * aEPN * sizeof(complex double) ); + +#if defined(USE_CUDA) + int nb = 0, *dev_index; + + /** Find all CUDA devices */ + hicma_parsec_find_cuda_devices( parsec, &dev_index, &nb); + + pTaskpool->_g_ws_gpu = (void *)gb->ws; + pTaskpool->_g_nb_cuda_devices = nb; + pTaskpool->_g_cuda_device_index = dev_index; +#endif + + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_ForwardSHTReshape_DEFAULT_ADT_IDX], + parsec_datatype_double_t, PARSEC_MATRIX_FULL, + 1, apFLMTDesc->mb, apFLMTDesc->nb, apFLMTDesc->mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1); + + return (parsec_taskpool_t*) pTaskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void ForwardSHTReshape_destructor(parsec_taskpool_t *apTaskpool) +{ + parsec_ForwardSHTReshape_taskpool_t *pTaskpool = (parsec_ForwardSHTReshape_taskpool_t *)apTaskpool; + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_ForwardSHTReshape_DEFAULT_ADT_IDX]); + parsec_private_memory_fini(pTaskpool->_g_apGmtheta_r); + parsec_private_memory_fini(pTaskpool->_g_apFmnm); + parsec_private_memory_fini(pTaskpool->_g_apWork1); + parsec_private_memory_fini(pTaskpool->_g_apWork2); + parsec_taskpool_free(apTaskpool); +} + +/** + */ +int ForwardSHTReshape(parsec_context_t *apContext, int aRank, int aVerbose, parsec_tiled_matrix_t *apFDataDesc, + parsec_tiled_matrix_t *apFLMDesc, parsec_tiled_matrix_t *apFLMTDesc, parsec_tiled_matrix_t *apET1Desc, + parsec_tiled_matrix_t *apET2Desc, parsec_tiled_matrix_t *apEPDesc, parsec_tiled_matrix_t *apSLMNDesc, + parsec_tiled_matrix_t *apIEDesc, parsec_tiled_matrix_t *apIODesc, parsec_tiled_matrix_t *apPDesc, + parsec_tiled_matrix_t *apDDesc, parsec_tiled_matrix_t *apADesc, int aFDataM, int aEPN, int aET1M, int aET2M, + int aPN, int aFlmTNB, int aT, int aLSize, double *apNormGlobal, int aNT, int aUpperLower) +{ + parsec_taskpool_t *pTaskpool = ForwardSHTReshape_constructor(apFDataDesc, apFLMDesc, apFLMTDesc, apET1Desc, apET2Desc, + apEPDesc, apSLMNDesc, apIEDesc, apIODesc, apPDesc, apDDesc, + apADesc, aFDataM, aEPN, aET1M, aET2M, aPN, aFlmTNB, aT, aLSize); + + if( pTaskpool != NULL ){ + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + ForwardSHTReshape_destructor(pTaskpool); + } + // Reshape + VERBOSE_PRINT(aRank, aVerbose, ("Redistribute apFLMTDesc -> desc_A\n")); + parsec_redistribute(apContext, apFLMTDesc, apADesc, apADesc->m, apADesc->n, 0, 0, 0, 0); + + double norm_flmT = 0.0; + GetMatrixNorm(apContext, apNormGlobal, apFLMTDesc, aNT, aUpperLower, 0); + norm_flmT = *apNormGlobal; + double norm_A = 0.0; + GetMatrixNorm(apContext, apNormGlobal, apADesc, aNT, aUpperLower, 0); + norm_A = *apNormGlobal; + + if( 0 == aRank ) { + fprintf(stderr, RED "norm_flmT %lf norm_A %lf\n" RESET, norm_flmT, norm_A); + } + + // Free memory + parsec_data_free(((parsec_matrix_block_cyclic_t *)apFLMTDesc)->mat); + parsec_tiled_matrix_destroy((parsec_tiled_matrix_t*)apFLMTDesc); + + return 0; +} + +%} diff --git a/src/runtime/parsec/jdf/GetMatrixNorm.jdf b/src/runtime/parsec/jdf/GetMatrixNorm.jdf new file mode 100644 index 00000000..705eaa40 --- /dev/null +++ b/src/runtime/parsec/jdf/GetMatrixNorm.jdf @@ -0,0 +1,139 @@ +extern "C" %{ +/** + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + **/ + +#include +#include + +%} + +/* Globals + */ +apADesc [ type = "parsec_tiled_matrix_t*" ] +aNT [ type = "int" ] +aUpperLower [ type = "int" ] +apNorm [ type = "double *" ] +apNormTile [ type = "double *" ] +aIsSymmetric [ type = "int" ] + +/************************************************** + * generate diagonal tiles * + **************************************************/ +task(m, n) + +// Execution space +m = 0 .. apADesc->mt-1 +n = 0 .. (aIsSymmetric ? m : apADesc->nt-1) // Adjust based on matrix symmetry + +// Parallel partitioning +:apADesc(m, n) + +// Parameters +READ D <- apADesc(m, n) + +BODY +{ + int ldd = BLKLDD(apADesc, m); + int tempmm = m == apADesc->mt-1 ? apADesc->m - m * apADesc->mb : apADesc->mb; + int tempnn = tempmm; + + /* Calcuate the global norm */ + int tid = es->th_id; + double current_value = 0.0; + apNormTile[n * aNT + m] = 0.0; + + // Diagonal modification for symmetric matrices + if (aIsSymmetric && m == n) { + for(int i = 0; i < apADesc->mb; i++) { + ((double *)D)[i * apADesc->mb + i] += 1.0e-6; + } + } + + // Norm calculation + for (int j = 0; j < apADesc->nb; j++) { + for (int i = 0; i < apADesc->mb; i++) { + current_value = ((double *) D)[j * apADesc->mb + i]; + apNormTile[n * aNT + m] += current_value * current_value; + } + } + + apNorm[tid] += apNormTile[n * aNT + m]; + apNormTile[n * aNT + m] = sqrt(apNormTile[n * aNT + m]); + + if(m - n >= aNT * PORTION_NORM ){ + apNorm[tid] = 0.0; + } +} +END + +extern "C" %{ + +/** + * Generate matrix + * @return the parsec object to schedule + */ +parsec_taskpool_t* +GetMatrixNormConstructor(parsec_tiled_matrix_t *apADesc, int aUpperLower, int aNT, + double *apNormTmp, double *apNormTile, int aIsSymmetric) +{ + + /* Check input arguments */ + if (aUpperLower != PlasmaLower) { + dplasma_error("STARSH_appr_New", "illegal value of uplo, should be PlasmaLower\n"); + return NULL; + } + + parsec_GetMatrixNorm_taskpool_t *pTaskpool = parsec_GetMatrixNorm_new(apADesc, aNT, aUpperLower, + apNormTmp, apNormTile, aIsSymmetric); + + return (parsec_taskpool_t*) pTaskpool; +} + +/* Destructor */ +void GetMatrixNormDestructor(parsec_taskpool_t *apTaskpool) +{ + parsec_taskpool_free(apTaskpool); +} + +/** + * Generate matrix + */ +void GetMatrixNorm(parsec_context_t *apContext, double *apNormGlobal, parsec_tiled_matrix_t *apADesc, + int aNT, int aUpperLower, int aIsSymmetric) +{ + + /* Only for 1 vp */ + assert(apContext->nb_vp == 1); + int nb_threads = apContext->virtual_processes[0]->nb_cores; + double *pNormTmp = (double *) calloc(nb_threads, sizeof(double)); + + /* Make sure norm_tile and norm_global is fresh */ + double* pNormTile = (double*) malloc(aNT * aNT * sizeof(double)); + memset(pNormTile, 0, aNT * aNT * sizeof(double)); + *apNormGlobal = 0.0; + parsec_taskpool_t *pTaskpool = GetMatrixNormConstructor(apADesc, aUpperLower, aNT, + pNormTmp, pNormTile, aIsSymmetric); + + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + GetMatrixNormDestructor(pTaskpool); + + /* Reduce to the global norm */ + double norm_process = 0.0; + for(int i = 0; i < nb_threads; i++) { + norm_process += pNormTmp[i]; + } + + MPI_Allreduce(MPI_IN_PLACE, pNormTile, aNT * aNT, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&norm_process, apNormGlobal, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + *apNormGlobal = sqrt(*apNormGlobal); + + free(pNormTile); + free(pNormTmp); +} + +%} diff --git a/src/runtime/parsec/jdf/InverseSHT.jdf b/src/runtime/parsec/jdf/InverseSHT.jdf new file mode 100644 index 00000000..86eb11c7 --- /dev/null +++ b/src/runtime/parsec/jdf/InverseSHT.jdf @@ -0,0 +1,162 @@ +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static void FlmToFlmT(double *apFlmT, double *apFlm, parsec_matrix_block_cyclic_t *apFLMTDesc, int aFlmM, int aFlmN, int aM, int aN) { + int flm_offset = aM * apFLMTDesc->super.mb; + int flmT_offset = aN * apFLMTDesc->super.mb; + int size = (aM == apFLMTDesc->super.lmt - 1) ? (aFlmM * aFlmN) - flm_offset : apFLMTDesc->super.mb; + memcpy(apFlmT + flmT_offset, apFlm + flm_offset, size*sizeof(double)); +} + + +%} + +/* Globals + */ +apFSpatialDesc [ type = "parsec_tiled_matrix_t*" ] +apFLMDesc [ type = "parsec_tiled_matrix_t*" aligned = apFDataDesc] +apZLMDesc [ type = "parsec_tiled_matrix_t*" ] +apSCDesc [ type = "parsec_tiled_matrix_t*" ] +aLSize [ type = "int" ] + +/* Temporary buffer used for convert */ +apWork [ type = "parsec_memory_pool_t *" hidden = on default = NULL ] + +/* GPU workspace */ +ws_gpu [ type = "void *" hidden = on default = NULL ] + +/* GPU number and index */ +nb_cuda_devices [ type = "int" hidden = on default = 0 ] +cuda_device_index [ type = "int *" hidden = on default = "NULL"] + +bind_gpu(n) + +n = 0 .. apFSpatialDesc->lnt-1 + +: apFSpatialDesc(0, n) + +READ flm <- apFLMDesc(0, n) + -> flm task(n) [ type_remote = flm ] + +READ f_spatial <- apFSpatialDesc(0, n) + -> f_spatial task(n) [ type_remote = f_spatial ] + + +BODY +{ +#if defined(USE_CUDA) + if( nb_cuda_devices > 0 ) { + int g = climate_emulator_gpu_load_balance( n, gb->nodes, nb_cuda_devices ); + parsec_advise_data_on_device( _f_flm->original, + cuda_device_index[g], + PARSEC_DEV_DATA_ADVICE_PREFERRED_DEVICE ); + parsec_advise_data_on_device( _f_f_spatial->original, + cuda_device_index[g], + PARSEC_DEV_DATA_ADVICE_PREFERRED_DEVICE ); + } +#endif +} +END + + +task(n) + +n = 0 .. apFSpatialDesc->lnt-1 + +: apFSpatialDesc(0, n) + +// TODO: check whether this will evict f_spatial first on GPU +RW f_spatial <- f_spatial bind_gpu(n) [ type_remote = f_spatial ] + -> apFSpatialDesc(0, n) + +READ flm <- flm bind_gpu(n) [ type_remote = flm ] +READ Zlm <- apZLMDesc(0, %{ return apFSpatialDesc->super.rank_of(&apFSpatialDesc->super, 0, n); %}) +READ SC <- apSCDesc(0, %{ return apFSpatialDesc->super.rank_of(&apFSpatialDesc->super, 0, n); %}) + +BODY +{ + double *pSmt = (double *) parsec_private_memory_pop(apWork); + InverseSHTHelper(flm, f_spatial, Zlm, SC, pSmt, aLSize); + parsec_private_memory_push(apWork, pSmt); + +} +END + + +extern "C" %{ + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* +InverseSHTConstructor(parsec_tiled_matrix_t *apFSpatialDesc, parsec_tiled_matrix_t *apFLMDesc, + parsec_tiled_matrix_t *apZLMDesc, parsec_tiled_matrix_t *apSCDesc, + int aLSize) +{ + + parsec_InverseSHT_taskpool_t *pTaskpool = parsec_InverseSHT_new(apFSpatialDesc, apFLMDesc, apZLMDesc, apSCDesc, aLSize); + + pTaskpool->_g_apWork = (parsec_memory_pool_t*) malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_apWork, (aLSize + 1) * (2 * aLSize - 1) * sizeof(double) ); + +#if defined(USE_CUDA) + int nb = 0, *dev_index; + + /** Find all CUDA devices */ + + hicma_parsec_find_cuda_devices( parsec, &dev_index, &nb); + + pTaskpool->_g_ws_gpu = (void *)gb->ws; + pTaskpool->_g_nb_cuda_devices = nb; + pTaskpool->_g_cuda_device_index = dev_index; +#endif + + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_InverseSHT_f_spatial_ADT_IDX], + parsec_datatype_double_t, PARSEC_MATRIX_FULL, + 1, apFSpatialDesc->mb, apFSpatialDesc->nb, apFSpatialDesc->mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_InverseSHT_flm_ADT_IDX], + parsec_datatype_double_t, PARSEC_MATRIX_FULL, + 1, apFLMDesc->mb, apFLMDesc->nb, apFLMDesc->mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)pTaskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void InverseSHTDestructor(parsec_taskpool_t *apTaskpool) +{ + parsec_InverseSHT_taskpool_t *pTaskpool = (parsec_InverseSHT_taskpool_t *) apTaskpool; + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_InverseSHT_f_spatial_ADT_IDX]); + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_InverseSHT_flm_ADT_IDX]); + parsec_private_memory_fini(pTaskpool->_g_apWork); + parsec_taskpool_free(apTaskpool); +} + +/** + */ +int InverseSHT(parsec_context_t *apContext, parsec_tiled_matrix_t *apFSpatialDesc, parsec_tiled_matrix_t *apFLMDesc, + parsec_tiled_matrix_t *apZLMDesc, parsec_tiled_matrix_t *apSCDesc, int aLSize) { + + parsec_taskpool_t *pTaskpool = InverseSHTConstructor(apFSpatialDesc, apFLMDesc, apZLMDesc, apSCDesc, aLSize); + + if( pTaskpool != NULL ){ + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + InverseSHTDestructor(pTaskpool); + } + return 0; +} + +%} diff --git a/src/runtime/parsec/jdf/MatrixCompress.jdf b/src/runtime/parsec/jdf/MatrixCompress.jdf new file mode 100644 index 00000000..c64a7c93 --- /dev/null +++ b/src/runtime/parsec/jdf/MatrixCompress.jdf @@ -0,0 +1,409 @@ +extern "C" %{ +/** + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + **/ + +#include +#include +#define GENERATE_RANDOM_DATA 0 + +%} + +/** Generate matrix + */ + +/* Globals + */ +apADesc [ type = "parsec_tiled_matrix_t*" ] +apArDesc [ type = "parsec_tiled_matrix_t*" ] +aBandSizeDense [ type = "int" ] +aNT [ type = "int" ] +aMaxRank [ type = "int" ] +aN [ type = "int" ] +apNorm [ type = "double *" ] +apNormTile [ type = "double *" ] +aAdaptiveDecision [ type = "int" ] +aTolerance [ type = "double" ] +aSendFullTile [ type = "int" ] +aAutoBand [ type = "int" ] +aGpus [ type = "int" ] +params_kernel [ type = "starsh_params_t *" ] + +aRsvd_oversample [ type = "int" hidden = on ] +aRsvd_lwork [ type = "size_t" hidden = on ] +aRsvd_liwork [ type = "size_t" hidden = on ] +aUv_work [ type = "parsec_memory_pool_t *" hidden = on default = NULL] +aD_work [ type = "parsec_memory_pool_t *" hidden = on default = NULL] +aRsvaD_work [ type = "parsec_memory_pool_t *" hidden = on default = NULL] +aRsvd_iwork [ type = "parsec_memory_pool_t *" hidden = on default = NULL] + +/************************************************** + * generate diagonal tiles * + **************************************************/ +generate_band(m, n) [high_priority = on] + +// Execution space +m = 0 .. apADesc->mt-1 +n = %{ return parsec_imax(m-aBandSizeDense+1, 0); %} .. m + +// Parallel partitioning +:apADesc(m, n) + +// Parameters +READ D <- apADesc(m, n) +READ D1 <- NULL [ type_remote = FULL ] + +BODY +{ + int ldd = BLKLDD(apADesc, m); + int tempmm = m == apADesc->mt-1 ? apADesc->m - m * apADesc->mb : apADesc->mb; + int tempnn = tempmm; + + /* New data_copy and allocate memory on band if not allocated */ +#if !BAND_MEMORY_CONTIGUOUS + if( aBandSizeDense < aNT || aAutoBand == 1 || !MEMORY_IN_CHOLEKSY_DP ) { + this_task->data._f_D.data_out = parsec_data_copy_new(data_of_apADesc(m, n), 0, PARSEC_MatrixCompress_FULL_ADT->opaque_dtt, PARSEC_DATA_FLAG_PARSEC_MANAGED); + if( aGpus > 0 ) { +#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) + cudaMallocHost((void**)&this_task->data._f_D.data_out->device_private, apADesc->mb * apADesc->mb * sizeof(double)); +#endif + +#if defined(PARSEC_HAVE_DEV_HIP_SUPPORT) + // TODO A better way + hipHostMalloc((void**)&this_task->data._f_D.data_out->device_private, apADesc->mb * apADesc->mb * sizeof(double), hipHostMallocDefault); +#endif + } else { + this_task->data._f_D.data_out->device_private = calloc(apADesc->mb * apADesc->mb, sizeof(double)); + } + } +#endif + + /* Calcuate the global norm */ + if( 1 || aAdaptiveDecision ) { + int tid = es->th_id; + double current_value = 0.0; + apNormTile[n*aNT+m] = 0.0; + + for(int j = 0; j < apADesc->nb; j++) { + for(int i = 0; i < apADesc->mb; i++) { + current_value = ((double *)this_task->data._f_D.data_out->device_private)[j*apADesc->mb+i]; + apNormTile[n * aNT + m] += current_value * current_value; + } + } + + apNorm[tid] += apNormTile[n * aNT + m]; + apNormTile[n * aNT + m] = sqrt(apNormTile[n * aNT + m]); + + if( m - n >= aNT * PORTION_NORM ) + apNorm[tid] = 0.0; + } +} +END + + +/************************************************** + **************************************************/ +READ_R(m, n) + +// Execution space +m = aBandSizeDense .. apADesc->mt-1 +n = 0 .. m-aBandSizeDense + +:apArDesc(m, n) + +READ R <- apArDesc(m, n) + -> R generate_approximate_L(m, n) [ type_remote = AR ] + +BODY +{ +} +END + + +/************************************************** + **************************************************/ +WRITE_R(m, n) + +// Execution space +m = aBandSizeDense .. apADesc->mt-1 +n = 0 .. m-aBandSizeDense + +:apArDesc(m, n) + +RW R <- R generate_approximate_L(m, n) [ type_remote = AR ] + -> apArDesc(m, n) + +BODY +{ +} +END + + +/************************************************** + * generate and approximate lower triangular part * + **************************************************/ +generate_approximate_L(m, n) [high_priority = on] + +// Execution space +m = aBandSizeDense .. apADesc->mt-1 +n = 0 .. m-aBandSizeDense + +// Parallel partitioning +:apADesc(m, n) + +// Parameters +RW R <- R READ_R(m, n) [ type_remote = AR ] + -> R WRITE_R(m, n) [ type_remote = AR ] + +READ A <- NULL [ type_remote = UV ] + +BODY +{ + int size = 0; + int ldU = BLKLDD(apADesc, m); + int ldV = BLKLDD(apADesc, m); + int tempmm = m == apADesc->mt-1 ? apADesc->m - m * apADesc->mb : apADesc->mb; + int tempnn = n == apADesc->mt-1 ? apADesc->m - m * apADesc->mb : apADesc->mb; + void *U = parsec_private_memory_pop(aUv_work); + void *tmp_D = parsec_private_memory_pop(aD_work); + void *work = parsec_private_memory_pop(aRsvaD_work); + void *iwork = parsec_private_memory_pop(aRsvd_iwork); + int rank = -1; + void *V = (void *)U + apADesc->mb * aMaxRank * sizeof(double); + +#if GENERATE_RANDOM_DATA + CORE_dplgsy( + aN, tempmm, tempnn, tmp_D, ldU, + apADesc->m, m*apADesc->mb, n*apADesc->nb, 3872 ); +#else + params_kernel->kernel(tempmm, tempnn, params_kernel->index + m*apADesc->mb, + params_kernel->index + n*apADesc->mb, params_kernel->data, params_kernel->data, tmp_D, + tempmm); +#endif + + /* Calcuate the global norm */ + if( 1 || aAdaptiveDecision ) { + int tid = es->th_id; + double current_value = 0.0; + apNormTile[n*aNT+m] = 0.0; + + for(int j = 0; j < apADesc->nb; j++) { + for(int i = 0; i < apADesc->mb; i++) { + current_value = ((double *)tmp_D)[j*apADesc->mb+i]; + //norm[tid] += current_value * current_value; + apNormTile[n*aNT+m] += current_value * current_value; + } + } + + apNorm[tid] += apNormTile[n*aNT+m]; + apNormTile[n*aNT+m] = sqrt(apNormTile[n*aNT+m]); + + if( m - n >= aNT*PORTION_NORM ) + apNorm[tid] = 0.0; + } + +#if 1 + starsh_dense_dlrrsdd(tempmm, tempnn, tmp_D, tempmm, U, ldU, V, ldV, &rank, + aMaxRank, aRsvd_oversample, aTolerance, work, aRsvd_lwork, iwork); +#else + int maxrank_used = hicma_parsec_min(100, aMaxRank); + while( 1 ) { + starsh_dense_dlrrsdd(tempmm, tempnn, tmp_D, tempmm, U, ldU, V, ldV, &rank, + maxrank_used, aRsvd_oversample, aTolerance, work, aRsvd_lwork, iwork); + maxrank_used *= 2; + maxrank_used = hicma_parsec_min( apADesc->nb / 2, maxrank_used ); + if( rank != -1 || maxrank_used > apADesc->nb / 2 ) break; + + params_kernel->kernel(tempmm, tempnn, params_kernel->index + m*apADesc->mb, + params_kernel->index + n*apADesc->mb, params_kernel->data, params_kernel->data, tmp_D, + tempmm); + } +#endif + + if(rank == -1) { + printf("Tile(%d, %d) is dense, try increasing NB or aMaxRank \n", m, n); + } else { + /* Update R and size */ + *(int *)R = rank; + + if(aSendFullTile == 1){ /* Storage of UV tiles is MB by maxrank by 2 */ + size = apADesc->mb * aMaxRank * 2; + } else { + size = apADesc->mb * parsec_imin(aMaxRank, rank) * 2; + } + + /* New data_copy and allocate memory for apADesc(m, n); + * For off band, if send_full_tile, allocate mb * maxrank * 2, + * else, size = mb * min(maxrank, rank) * 2 + */ + this_task->data._f_A.data_out = parsec_data_copy_new(data_of_apADesc(m, n), 0, PARSEC_MatrixCompress_UV_ADT->opaque_dtt, PARSEC_DATA_FLAG_PARSEC_MANAGED); + this_task->data._f_A.data_out->device_private = calloc(size, sizeof(double)); + + /* New nb_elts for data_of(m, n) */ + (data_of_apADesc(m, n))->nb_elts = size * sizeof(double); + + /* Copy U to A */ + memcpy((void *)this_task->data._f_A.data_out->device_private, + (void *)U, apADesc->mb * rank * sizeof(double)); + + /* Copy V to A */ + memcpy((void *)this_task->data._f_A.data_out->device_private + apADesc->mb * rank * sizeof(double), + (void *)V, apADesc->mb * rank * sizeof(double)); + } + + parsec_private_memory_push(aUv_work, U); + parsec_private_memory_push(aD_work, tmp_D); + parsec_private_memory_push(aRsvaD_work, work); + parsec_private_memory_push(aRsvd_iwork, iwork); +} +END + +extern "C" %{ + +/** + * Generate matrix + * @return the parsec object to schedule + */ +parsec_taskpool_t* +MatrixCompress_constructor(int aUpperLower, int aBandSizeDense, int aNT, int aMaxRank, int aN, double *apNormTmp, double *apNormTile, + int aAdaptiveDecision, int aTolerance, int aSendFullTile, int aAutoBand, int aGpus, + hicma_parsec_data_t *data, starsh_params_t *params_kernel) +{ + + parsec_tiled_matrix_t *apADesc = (parsec_tiled_matrix_t *)&data->dcA; + // TODO: get */params_tlr->auto_band == 0 &&*/ + if(aBandSizeDense >= aNT && MEMORY_IN_CHOLEKSY_DP ) { + apADesc = (parsec_tiled_matrix_t *)&data->dcAd; + } + parsec_tiled_matrix_t *apArDesc = (parsec_tiled_matrix_t *)&data->dcAr; + + /* Check input arguments */ + if (aUpperLower != PlasmaLower) { + dplasma_error("STARSH_appr_New", "illegal value of uplo, should be PlasmaLower\n"); + return NULL; + } + + /* Check aBandSizeDense */ + if(aBandSizeDense < 1 ) { + if(0 == apADesc->super.myrank ) + fprintf(stderr, "\nERROR: band_size_dense should be not less that 1 : %d\n\n", aBandSizeDense); + exit(1); + } + + /* Calculate workspace */ + int rsvd_oversample = 10; + int mn = rsvd_oversample + aMaxRank; + if(mn > apADesc->mb) { + mn = apADesc->mb; + } + size_t rsvd_lwork = (4*mn+7) * mn; + + if(rsvd_lwork < apADesc->mb){ + rsvd_lwork = apADesc->mb; + } + rsvd_lwork += mn*(3*apADesc->mb+mn+1); + size_t rsvd_liwork = 8*mn; + + parsec_MatrixCompress_taskpool_t *pTaskpool = + parsec_MatrixCompress_new(apADesc, apArDesc, aBandSizeDense, aNT, aMaxRank, aN, + apNormTmp, apNormTile, aAdaptiveDecision, aTolerance, aSendFullTile, + aAutoBand, aGpus, params_kernel); + + pTaskpool->_g_apNorm = apNormTmp; + pTaskpool->_g_aRsvd_oversample = rsvd_oversample; + pTaskpool->_g_aRsvd_lwork = rsvd_lwork; + pTaskpool->_g_aRsvd_liwork = rsvd_liwork; + + /* Memery pool */ + pTaskpool->_g_aUv_work = malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_aUv_work, (apADesc->mb*aMaxRank*2)*sizeof(double)); + + pTaskpool->_g_aD_work = malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_aD_work, (apADesc->mb*apADesc->mb)*sizeof(double)); + + pTaskpool->_g_aRsvaD_work = malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_aRsvaD_work, rsvd_lwork*sizeof(double)); + + pTaskpool->_g_aRsvd_iwork = malloc(sizeof(parsec_memory_pool_t)); + parsec_private_memory_init(pTaskpool->_g_aRsvd_iwork, rsvd_liwork*sizeof(int)); + + /* Arena */ + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_MatrixCompress_FULL_ADT_IDX], + parsec_datatype_double_t, PARSEC_MATRIX_FULL, + 1, apADesc->mb, apADesc->mb, apADesc->mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_MatrixCompress_UV_ADT_IDX], + parsec_datatype_double_t, PARSEC_MATRIX_FULL, + 1, apADesc->mb, aMaxRank*2, apADesc->mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_MatrixCompress_AR_ADT_IDX], + parsec_datatype_int_t, PARSEC_MATRIX_FULL, + 1, 1, 1, 1, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)pTaskpool; +} + +/* Destructor */ +void MatrixCompress_destructor(parsec_taskpool_t *apTaskpool) +{ + parsec_MatrixCompress_taskpool_t *pTaskpool = (parsec_MatrixCompress_taskpool_t *)apTaskpool; + + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_MatrixCompress_FULL_ADT_IDX]); + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_MatrixCompress_UV_ADT_IDX]); + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_MatrixCompress_AR_ADT_IDX]); + + parsec_private_memory_fini(pTaskpool->_g_aUv_work ); + parsec_private_memory_fini(pTaskpool->_g_aD_work ); + parsec_private_memory_fini(pTaskpool->_g_aRsvaD_work ); + parsec_private_memory_fini(pTaskpool->_g_aRsvd_iwork ); + + parsec_taskpool_free(apTaskpool); +} + +/** + * Generate matrix + */ +void MatrixCompress(parsec_context_t *apContext, double *apNormGlobal, int aUpperLower, int aBandSizeDense, int aNT, + int aMaxRank, int aN, int aAdaptiveDecision, int aTolerance, int aSendFullTile, int aAutoBand, + int aGpus, hicma_parsec_data_t *data, starsh_params_t *params_kernel) +{ + + /* Only for 1 vp */ + assert(apContext->nb_vp == 1); + int nb_threads = apContext->virtual_processes[0]->nb_cores; + double *pNormTmp = (double *) calloc(sizeof(double), nb_threads); + + /* Make sure norm_tile and norm_global is fresh */ + double* pNormTile = (double*) malloc(aNT * aNT * sizeof(double)); + memset(pNormTile, 0, aNT * aNT * sizeof(double)); + // Make sure norm_tile and norm_global is fresh + *apNormGlobal = 0.0; + parsec_taskpool_t *pTaskpool = + MatrixCompress_constructor(aUpperLower, aBandSizeDense, aNT, aMaxRank, aN, pNormTmp, pNormTile, + aAdaptiveDecision, aTolerance, aSendFullTile, aAutoBand, aGpus, data, params_kernel); + + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + MatrixCompress_destructor(pTaskpool); + + /* Reduce to the global norm */ + double norm_process = 0.0; + for( int i = 0; i < nb_threads; i++ ) { + norm_process += pNormTmp[i]; + } + + MPI_Allreduce(MPI_IN_PLACE, pNormTile, aNT * aNT, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&norm_process, apNormGlobal, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + *apNormGlobal = sqrt(*apNormGlobal); + + free(pNormTmp); + free(pNormTile); +} + +%} diff --git a/src/runtime/parsec/jdf/MeanSquaredError.jdf b/src/runtime/parsec/jdf/MeanSquaredError.jdf new file mode 100644 index 00000000..c925aff7 --- /dev/null +++ b/src/runtime/parsec/jdf/MeanSquaredError.jdf @@ -0,0 +1,96 @@ +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static double NormCore(complex double *apDataA, double *apDataB, int aM, int aN, int aLSize) { + double result = 0.0, diff; + for(int j = 0; j < aN; j++) { + for(int i = 0; i < aM; i++) { + double diff = creal(apDataA[j*aM+i]) - apDataB[j*aM+i]; + result += diff * diff; + } + } + return sqrt(result) / (2*aLSize*(aLSize+1)); +} + +%} + +apFDataDesc [ type = "parsec_tiled_matrix_t*" ] +apFSpatialDesc [ type = "parsec_tiled_matrix_t*" aligned = apFDataDesc ] +aLSize [type="int"] + +task(n) + +n = 0 .. apFDataDesc->lnt-1 + +: apFDataDesc(0, n) + +READ f_data <- apFDataDesc(0, n) [ type = f_data ] +READ f_spatial <- apFSpatialDesc(0, n) [ type = f_spatial ] + +BODY +{ + double norm = NormCore(f_data, f_spatial, apFDataDesc->mb, apFDataDesc->nb, aLSize); +} +END + +extern "C" %{ + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* +MeanSquaredErrorConstructor( parsec_matrix_block_cyclic_t * apDataDesc, parsec_matrix_block_cyclic_t * apSpatialDesc, int aLSize) +{ + int mb = apDataDesc->super.mb; + int nb = apDataDesc->super.nb; + parsec_MeanSquaredError_taskpool_t *pTaskpool = parsec_MeanSquaredError_new(&apDataDesc->super, &apSpatialDesc->super, aLSize); + + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_MeanSquaredError_f_data_ADT_IDX], + parsec_datatype_double_complex_t, PARSEC_MATRIX_FULL, + 1, mb, nb, mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + parsec_add2arena(&pTaskpool->arenas_datatypes[PARSEC_MeanSquaredError_f_spatial_ADT_IDX], + parsec_datatype_double_t, PARSEC_MATRIX_FULL, + 1, mb, nb, mb, + PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)pTaskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void MeanSquaredErrorDestructor(parsec_taskpool_t *apTaskpool) +{ + parsec_MeanSquaredError_taskpool_t *pTaskpool = (parsec_MeanSquaredError_taskpool_t *)apTaskpool; + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_MeanSquaredError_f_data_ADT_IDX]); + parsec_del2arena(&pTaskpool->arenas_datatypes[PARSEC_MeanSquaredError_f_spatial_ADT_IDX]); + parsec_taskpool_free(apTaskpool); +} + +/** + */ +int MeanSquaredError(parsec_context_t *apContext, parsec_matrix_block_cyclic_t* apDataDesc, + parsec_matrix_block_cyclic_t* apSpatialDesc, int aLSize) +{ + parsec_taskpool_t *pParsec_MeanSquaredError = NULL; + pParsec_MeanSquaredError = MeanSquaredErrorConstructor(apDataDesc, apSpatialDesc, aLSize); + if( pParsec_MeanSquaredError != NULL ){ + parsec_context_add_taskpool(apContext, pParsec_MeanSquaredError); + parsec_context_start(apContext); + parsec_context_wait(apContext); + MeanSquaredErrorDestructor(pParsec_MeanSquaredError); + } + + return 0; +} + +%} diff --git a/src/runtime/parsec/jdf/ReadCSV.jdf b/src/runtime/parsec/jdf/ReadCSV.jdf new file mode 100644 index 00000000..475f27de --- /dev/null +++ b/src/runtime/parsec/jdf/ReadCSV.jdf @@ -0,0 +1,123 @@ + +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static int ReadCSVCore(const char* apFilename, double *apData, int aM, int aN, int aGpus) { + + FILE *pFile = fopen(apFilename, "r"); + if (!pFile) { + printf("File opening failed: %s", apFilename); + return -1; + } + + int status = 0; + if( 0 == aGpus ) { + complex double *pData = (complex double *)apData; + for (int i = 0; i < aM; i++) { + for (int j = 0; j < aN; j++) { + // Assuming the CSV data is separated by commas, + // fscanf can be used to read directly into the array. + status = fscanf(pFile, "%lf,", &apData[j*aM+i]); +#if DEBUG_INFO_GB24 + if (status != 1) { + fprintf(stderr, "Error reading file at row %d, column %d\n", i, j); + fclose(pFile); + return 1; + } +#endif + } + } + } + fclose(pFile); + return 0; +} + +%} + +pDescA [ type = "parsec_tiled_matrix_t*" ] +pFilename [ type = "char *" ] +nb_gpus [ type = "int" ] + +task(m, n) + +m = 0 .. pDescA->lmt-1 +n = 0 .. pDescA->lnt-1 + +: pDescA(m, n) + +RW A <- pDescA(m, n) + -> pDescA(m, n) + +BODY +{ + ReadCSVCore(pFilename, A, pDescA->mb, pDescA->nb, nb_gpus); + if(0 == nb_gpus) SumDoubleData(A, pDescA->mb, pDescA->nb); +} +END + +extern "C" %{ + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* ReadCSVConstructor(parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, int aNodes, + int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) { + + // Init and allocate memory + int kq = (aTimeSlot%aNodes)? aTimeSlot/aNodes+1 : aTimeSlot/aNodes; + parsec_matrix_block_cyclic_init(apDesc, PARSEC_MATRIX_DOUBLE, PARSEC_MATRIX_TILE, aRank, aMB, aNB, aMB, + aNB*aNodes, 0, 0, aMB, aNB*aNodes, 1, aNodes, 1, kq, 0, 0); + + apDesc->mat = parsec_data_allocate((size_t)apDesc->super.nb_local_tiles * + (size_t)apDesc->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(apDesc->super.mtype)); + + if(NULL == apFilename) { + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, "desc"); + VERBOSE_PRINT(aRank, aVerbose, ("FileName is NULL\n")); + return NULL; + } + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, apFilename); + parsec_ReadCSV_taskpool_t *taskpool = parsec_ReadCSV_new(&apDesc->super, apFilename, aGpus); + + parsec_add2arena(&taskpool->arenas_datatypes[PARSEC_ReadCSV_DEFAULT_ADT_IDX], + parsec_datatype_double_complex_t, PARSEC_MATRIX_FULL, + 1, aMB, aNB, aMB, PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)taskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void ReadCSVDestructor(parsec_taskpool_t *pTaskpool) +{ + parsec_ReadCSV_taskpool_t *ReadCSV_taskpool = (parsec_ReadCSV_taskpool_t *)pTaskpool; + parsec_del2arena(&ReadCSV_taskpool->arenas_datatypes[PARSEC_ReadCSV_DEFAULT_ADT_IDX]); + parsec_taskpool_free(pTaskpool); +} + +int ReadCSV(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, int aNodes, + int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) +{ + VERBOSE_PRINT(aRank, aVerbose, ("Reading %s\n", apFilename)); + parsec_taskpool_t *pTaskpool = ReadCSVConstructor(apDesc, aMB, aNB, aNodes, aTimeSlot, + apFilename, aRank, aVerbose, aGpus); + + if(pTaskpool != NULL ){ + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + ReadCSVDestructor(pTaskpool); + } + return 0; +} + +%} diff --git a/src/runtime/parsec/jdf/ReadCSVComplex.jdf b/src/runtime/parsec/jdf/ReadCSVComplex.jdf new file mode 100644 index 00000000..5d4196c2 --- /dev/null +++ b/src/runtime/parsec/jdf/ReadCSVComplex.jdf @@ -0,0 +1,133 @@ + +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static int ReadCSVComplexCore(const char* apFilename, complex double *apData, int aM, int aN, int aGpus) { + + FILE *pFile = fopen(apFilename, "r"); + if (!pFile) { + printf("File opening failed: %s", apFilename); + return -1; + } + + size_t len = 0, row = 0; + ssize_t read; + double real, imag; + int status; + + if( 0 == aGpus ) { + complex double *pData = (complex double *)apData; + for (int i = 0; i < aM; i++) { + for (int j = 0; j < aN; j++) { + status = fscanf(pFile, "%lf%lfi,", &real, &imag); + pData[j*aM+i] = real + imag * I; + } + } + } else { + +#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT) + cuDoubleComplex *pData = (cuDoubleComplex *)apData; + for (int i = 0; i < aM; i++) { + for (int j = 0; j < aN; j++) { + // Assuming the CSV data is separated by commas, + // fscanf can be used to read directly into the array. + status = fscanf(pFile, "%lf%lfi,", &real, &imag); + pData[j*aM+i] = make_cuDoubleComplex(real, imag); + } + } +#endif + } + + + fclose(pFile); + return 0; +} + +%} + +pDescA [ type = "parsec_tiled_matrix_t*" ] +pFilename [ type = "char *" ] +nb_gpus [ type = "int" ] + +task(m, n) + +m = 0 .. pDescA->lmt-1 +n = 0 .. pDescA->lnt-1 + +: pDescA(m, n) + +RW A <- pDescA(m, n) + -> pDescA(m, n) + +BODY +{ + ReadCSVComplexCore(pFilename, A, pDescA->mb, pDescA->nb, nb_gpus); + if(0 == nb_gpus) SumComplexData(A, pDescA->mb, pDescA->nb); +} +END + +extern "C" %{ + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* ReadCSVComplexConstructor(parsec_matrix_block_cyclic_t *apDesc, int aMB, + int aNB, int aNodes, int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) { + + // Init and allocate memory + parsec_matrix_block_cyclic_init(apDesc, PARSEC_MATRIX_COMPLEX_DOUBLE, PARSEC_MATRIX_TILE, aRank, aMB, aNB, aMB, + aNB*aNodes, 0, 0, aMB, aNB*aNodes, 1, aNodes, 1, 1, 0, 0); + + apDesc->mat = parsec_data_allocate((size_t)apDesc->super.nb_local_tiles * + (size_t)apDesc->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(apDesc->super.mtype)); + + if(NULL == apFilename) { + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, "desc"); + VERBOSE_PRINT(aRank, aVerbose, ("FileName is NULL\n")); + return NULL; + } + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, apFilename); + parsec_ReadCSVComplex_taskpool_t *taskpool = parsec_ReadCSVComplex_new(&apDesc->super, apFilename, aGpus); + + parsec_add2arena(&taskpool->arenas_datatypes[PARSEC_ReadCSVComplex_DEFAULT_ADT_IDX], + parsec_datatype_double_complex_t, PARSEC_MATRIX_FULL, + 1, aMB, aNB, aMB, PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)taskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void ReadCSVComplexDestructor(parsec_taskpool_t *pTaskpool) +{ + parsec_ReadCSVComplex_taskpool_t *ReadCSVComplex_taskpool = (parsec_ReadCSVComplex_taskpool_t *)pTaskpool; + parsec_del2arena(&ReadCSVComplex_taskpool->arenas_datatypes[PARSEC_ReadCSVComplex_DEFAULT_ADT_IDX]); + parsec_taskpool_free(pTaskpool); +} + +int ReadCSVComplex(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, int aNodes, + int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) +{ + VERBOSE_PRINT(aRank, aVerbose, ("Reading %s\n", apFilename)); + parsec_taskpool_t *pTaskpool = ReadCSVComplexConstructor(apDesc, aMB, aNB, aNodes, aTimeSlot, + apFilename, aRank, aVerbose, aGpus); + + if(pTaskpool != NULL ){ + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + ReadCSVComplexDestructor(pTaskpool); + } + return 0; +} + +%} diff --git a/src/runtime/parsec/jdf/ReadCSVTimeSlot.jdf b/src/runtime/parsec/jdf/ReadCSVTimeSlot.jdf new file mode 100644 index 00000000..eeba2c8c --- /dev/null +++ b/src/runtime/parsec/jdf/ReadCSVTimeSlot.jdf @@ -0,0 +1,123 @@ + +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static int ReadCSVTimeSlotCore(const char* apFilename, double *apData, int aM, int aN, int aGpus) { + + FILE *pFile = fopen(apFilename, "r"); + if (!pFile) { + printf("File opening failed: %s", apFilename); + return -1; + } + + int status = 0; + if( 0 == aGpus ) { + complex double *pData = (complex double *)apData; + for (int i = 0; i < aM; i++) { + for (int j = 0; j < aN; j++) { + // Assuming the CSV data is separated by commas, + // fscanf can be used to read directly into the array. + status = fscanf(pFile, "%lf,", &apData[j*aM+i]); +#if DEBUG_INFO_GB24 + if (status != 1) { + fprintf(stderr, "Error reading file at row %d, column %d\n", i, j); + fclose(pFile); + return 1; + } +#endif + } + } + } + fclose(pFile); + return 0; +} + +%} + +pDescA [ type = "parsec_tiled_matrix_t*" ] +pFilename [ type = "char *" ] +nb_gpus [ type = "int" ] + +task(m, n) + +m = 0 .. pDescA->lmt-1 +n = 0 .. pDescA->lnt-1 + +: pDescA(m, n) + +RW A <- pDescA(m, n) + -> pDescA(m, n) + +BODY +{ + ReadCSVTimeSlotCore(pFilename, A, pDescA->mb, pDescA->nb, nb_gpus); + if(0 == nb_gpus) SumDoubleData(A, pDescA->mb, pDescA->nb); +} +END + +extern "C" %{ + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* ReadCSVTimeSlotConstructor(parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, int aNodes, + int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) { + + // Init and allocate memory + int kq = (aTimeSlot%aNodes)? aTimeSlot/aNodes+1 : aTimeSlot/aNodes; + parsec_matrix_block_cyclic_init(apDesc, PARSEC_MATRIX_DOUBLE, PARSEC_MATRIX_TILE, aRank, aMB, aNB, aMB, + aNB*aTimeSlot, 0, 0, aMB, aNB*aTimeSlot, 1, aNodes, 1, kq, 0, 0); + + apDesc->mat = parsec_data_allocate((size_t)apDesc->super.nb_local_tiles * + (size_t)apDesc->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(apDesc->super.mtype)); + + if(NULL == apFilename) { + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, "desc"); + VERBOSE_PRINT(aRank, aVerbose, ("FileName is NULL\n")); + return NULL; + } + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, apFilename); + parsec_ReadCSVTimeSlot_taskpool_t *taskpool = parsec_ReadCSVTimeSlot_new(&apDesc->super, apFilename, aGpus); + + parsec_add2arena(&taskpool->arenas_datatypes[PARSEC_ReadCSVTimeSlot_DEFAULT_ADT_IDX], + parsec_datatype_double_complex_t, PARSEC_MATRIX_FULL, + 1, aMB, aNB, aMB, PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)taskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void ReadCSVTimeSlotDestructor(parsec_taskpool_t *pTaskpool) +{ + parsec_ReadCSVTimeSlot_taskpool_t *ReadCSVTimeSlot_taskpool = (parsec_ReadCSVTimeSlot_taskpool_t *)pTaskpool; + parsec_del2arena(&ReadCSVTimeSlot_taskpool->arenas_datatypes[PARSEC_ReadCSVTimeSlot_DEFAULT_ADT_IDX]); + parsec_taskpool_free(pTaskpool); +} + +int ReadCSVTimeSlot(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, int aNodes, + int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) +{ + VERBOSE_PRINT(aRank, aVerbose, ("Reading %s\n", apFilename)); + parsec_taskpool_t *pTaskpool = ReadCSVTimeSlotConstructor(apDesc, aMB, aNB, aNodes, aTimeSlot, + apFilename, aRank, aVerbose, aGpus); + + if(pTaskpool != NULL ){ + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + ReadCSVTimeSlotDestructor(pTaskpool); + } + return 0; +} + +%} diff --git a/src/runtime/parsec/jdf/ReadCSVToComplex.jdf b/src/runtime/parsec/jdf/ReadCSVToComplex.jdf new file mode 100644 index 00000000..2aa68c3d --- /dev/null +++ b/src/runtime/parsec/jdf/ReadCSVToComplex.jdf @@ -0,0 +1,130 @@ + +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static int ReadCSVToComplexCore(const char* apFilename, void *apData, int aM, int aN, int aGpus) { + + FILE *pFile = fopen(apFilename, "r"); + if (!pFile) { + printf("File opening failed: %s", apFilename); + return -1; + } + + double real; + int status; + if( 0 == aGpus ) { + complex double *pData = (complex double *)apData; + for (int i = 0; i < aM; i++) { + for (int j = 0; j < aN; j++) { + status = fscanf(pFile, "%lf,", &real); + pData[j*aM+i] = (complex double)real; + } + } + } else { + +#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT) + cuDoubleComplex *pData = (cuDoubleComplex *)apData; + for (int i = 0; i < aM; i++) { + for (int j = 0; j < aN; j++) { + // Assuming the CSV data is separated by commas, + // fscanf can be used to read directly into the array. + status = fscanf(pFile, "%lf,", &real); + pData[j*aM+i] = make_cuDoubleComplex(real, 0); + } + } +#endif + } + + + fclose(pFile); + return 0; +} + +%} + +pDescA [ type = "parsec_tiled_matrix_t*" ] +pFilename [ type = "char *" ] +nb_gpus [ type = "int" ] + +task(m, n) + +m = 0 .. pDescA->lmt-1 +n = 0 .. pDescA->lnt-1 + +: pDescA(m, n) + +RW A <- pDescA(m, n) + -> pDescA(m, n) + +BODY +{ + ReadCSVToComplexCore(pFilename, A, pDescA->mb, pDescA->nb, nb_gpus); + if(0 == nb_gpus) SumComplexData(A, pDescA->mb, pDescA->nb); +} +END + +extern "C" %{ + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* ReadCSVToComplexConstructor(parsec_matrix_block_cyclic_t *apDesc, int aMB, + int aNB, int aNodes, int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) { + + // Init and allocate memory + parsec_matrix_block_cyclic_init(apDesc, PARSEC_MATRIX_COMPLEX_DOUBLE, PARSEC_MATRIX_TILE, aRank, aMB, aNB, aMB, + aNB*aNodes, 0, 0, aMB, aNB*aNodes, 1, aNodes, 1, 1, 0, 0); + + apDesc->mat = parsec_data_allocate((size_t)apDesc->super.nb_local_tiles * + (size_t)apDesc->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(apDesc->super.mtype)); + + if(NULL == apFilename) { + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, "desc"); + VERBOSE_PRINT(aRank, aVerbose, ("FileName is NULL\n")); + return NULL; + } + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, apFilename); + parsec_ReadCSVToComplex_taskpool_t *taskpool = parsec_ReadCSVToComplex_new(&apDesc->super, apFilename, aGpus); + + parsec_add2arena(&taskpool->arenas_datatypes[PARSEC_ReadCSVToComplex_DEFAULT_ADT_IDX], + parsec_datatype_double_complex_t, PARSEC_MATRIX_FULL, + 1, aMB, aNB, aMB, PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)taskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void ReadCSVToComplexDestructor(parsec_taskpool_t *pTaskpool) +{ + parsec_ReadCSVToComplex_taskpool_t *ReadCSVToComplex_taskpool = (parsec_ReadCSVToComplex_taskpool_t *)pTaskpool; + parsec_del2arena(&ReadCSVToComplex_taskpool->arenas_datatypes[PARSEC_ReadCSVToComplex_DEFAULT_ADT_IDX]); + parsec_taskpool_free(pTaskpool); +} + +int ReadCSVToComplex(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, int aNodes, + int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) +{ + VERBOSE_PRINT(aRank, aVerbose, ("Reading %s\n", apFilename)); + parsec_taskpool_t *pTaskpool = ReadCSVToComplexConstructor(apDesc, aMB, aNB, aNodes, aTimeSlot, + apFilename, aRank, aVerbose, aGpus); + + if(pTaskpool != NULL ){ + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + ReadCSVToComplexDestructor(pTaskpool); + } + return 0; +} + +%} diff --git a/src/runtime/parsec/jdf/ReadCSVToComplexTimeSlot.jdf b/src/runtime/parsec/jdf/ReadCSVToComplexTimeSlot.jdf new file mode 100644 index 00000000..dc3cb4a3 --- /dev/null +++ b/src/runtime/parsec/jdf/ReadCSVToComplexTimeSlot.jdf @@ -0,0 +1,154 @@ + +extern "C" %{ +/* + * @copyright (c) 2023 King Abdullah University of Science and Technology (KAUST). + * @copyright (c) 2023 The Universiy of Tennessee and The Universiy of Tennessee Research Foundation. + * All rights reserved. + */ + +#include +#include + +static int ReadCSVToComplexTimeSlotCore(const char* apFilename, void *apData, int aM, int aN, int aGpus) { + + FILE *pFile = fopen(apFilename, "r"); + if (!pFile) { + printf("File opening failed: %s", apFilename); + return -1; + } + + int status = 0; + double real; + if( 0 == aGpus ) { + complex double *pData = (complex double *)apData; + for (int i = 0; i < aM; i++) { + for (int j = 0; j < aN; j++) { + // Assuming the CSV data is separated by commas, + // fscanf can be used to read directly into the array. + status = fscanf(pFile, "%lf\n", &real); + pData[j*aM+i] = (complex double)real; +#if DEBUG_INFO_GB24 + if (status != 1) { + fprintf(stderr, "Error reading file at row %d, column %d\n", i, j); + fclose(pFile); + return 1; + } +#endif + } + } + +#if DEBUG_INFO_GB24 + climate_emulator_print_matrix_col_complex(pData, 10, 10, aM); +#endif + + + } else { + +#if defined(PARSEC_HAVE_DEV_CUDA_SUPPORT) || defined(PARSEC_HAVE_DEV_HIP_SUPPORT) + cuDoubleComplex *pData = (cuDoubleComplex *)apData; + for (int i = 0; i < aM; i++) { + for (int j = 0; j < aN; j++) { + // Assuming the CSV data is separated by commas, + // fscanf can be used to read directly into the array. + status = fscanf(pFile, "%lf,", &real); + pData[j*aM+i] = make_cuDoubleComplex(real, 0); +#if DEBUG_INFO_GB24 + if (status != 1) { + fprintf(stderr, "Error reading file at row %d, column %d\n", i, j); + fclose(pFile); + return 1; + } +#endif + } + } +#endif + + } + fclose(pFile); + return 0; +} + +%} + +pDescA [ type = "parsec_tiled_matrix_t*" ] +pFilename [ type = "char *" ] +nb_gpus [ type = "int" ] + +task(m, n) + +m = 0 .. pDescA->lmt-1 +n = 0 .. pDescA->lnt-1 + +: pDescA(m, n) + +RW A <- pDescA(m, n) + -> pDescA(m, n) + +BODY +{ + char *pFileZ_data = (char *) malloc(200 * sizeof(char)); + snprintf(pFileZ_data, 200, "%s%s%d%s", pFilename,"/z_", n, ".csv"); + ReadCSVToComplexTimeSlotCore(pFileZ_data, A, pDescA->mb, pDescA->nb, nb_gpus); + if(0 == nb_gpus) SumComplexData(A, pDescA->mb, pDescA->nb); +} +END + +extern "C" %{ + +/** + * @return the parsec object to schedule. + */ +parsec_taskpool_t* ReadCSVToComplexTimeSlotConstructor(parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, int aNodes, + int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) { + + // Init and allocate memory + int kq = (aTimeSlot%aNodes)? aTimeSlot/aNodes+1 : aTimeSlot/aNodes; + parsec_matrix_block_cyclic_init(apDesc, PARSEC_MATRIX_COMPLEX_DOUBLE, PARSEC_MATRIX_TILE, aRank, aMB, aNB, aMB, + aNB*aTimeSlot, 0, 0, aMB, aNB*aTimeSlot, 1, aNodes, 1, kq, 0, 0); + + apDesc->mat = parsec_data_allocate((size_t)apDesc->super.nb_local_tiles * + (size_t)apDesc->super.bsiz * + (size_t)parsec_datadist_getsizeoftype(apDesc->super.mtype)); + + if(NULL == apFilename) { + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, "desc"); + VERBOSE_PRINT(aRank, aVerbose, ("FileName is NULL\n")); + return NULL; + } + parsec_data_collection_set_key((parsec_data_collection_t*)apDesc, apFilename); + parsec_ReadCSVToComplexTimeSlot_taskpool_t *taskpool = parsec_ReadCSVToComplexTimeSlot_new(&apDesc->super, apFilename, aGpus); + + parsec_add2arena(&taskpool->arenas_datatypes[PARSEC_ReadCSVToComplexTimeSlot_DEFAULT_ADT_IDX], + parsec_datatype_double_complex_t, PARSEC_MATRIX_FULL, + 1, aMB, aNB, aMB, PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + + return (parsec_taskpool_t*)taskpool; +} + +/** + * @param [inout] the parsec object to destroy +*/ +void ReadCSVToComplexTimeSlotDestructor(parsec_taskpool_t *pTaskpool) +{ + parsec_ReadCSVToComplexTimeSlot_taskpool_t *ReadCSVToComplexTimeSlot_taskpool = (parsec_ReadCSVToComplexTimeSlot_taskpool_t *)pTaskpool; + parsec_del2arena(&ReadCSVToComplexTimeSlot_taskpool->arenas_datatypes[PARSEC_ReadCSVToComplexTimeSlot_DEFAULT_ADT_IDX]); + parsec_taskpool_free(pTaskpool); +} + +int ReadCSVToComplexTimeSlot(parsec_context_t *apContext, parsec_matrix_block_cyclic_t *apDesc, int aMB, int aNB, int aNodes, + int aTimeSlot, char *apFilename, int aRank, int aVerbose, int aGpus) +{ + VERBOSE_PRINT(aRank, aVerbose, ("Reading %s\n", apFilename)); + parsec_taskpool_t *pTaskpool = ReadCSVToComplexTimeSlotConstructor(apDesc, aMB, aNB, aNodes, aTimeSlot, + apFilename, aRank, aVerbose, aGpus); + + if(pTaskpool != NULL ){ + parsec_context_add_taskpool(apContext, pTaskpool); + parsec_context_start(apContext); + parsec_context_wait(apContext); + ReadCSVToComplexTimeSlotDestructor(pTaskpool); + } + return 0; +} + +%} diff --git a/tests/R-tests/TestExaGeoStatAPI.R b/tests/R-tests/TestExaGeoStatAPI.R index 9d1328fe..fdb5c7be 100644 --- a/tests/R-tests/TestExaGeoStatAPI.R +++ b/tests/R-tests/TestExaGeoStatAPI.R @@ -18,7 +18,6 @@ ncores <- 1 ngpus <- 0 problem_size <- 16 dts <- 8 -lts <- 0 computation <- "exact" dimension = "2D" kernel <- "univariate_matern_stationary" @@ -75,3 +74,19 @@ test_x <- c(0.2, 0.330) test_y <- c(0.104, 0.14) predict_data(train_data=list(locations_x, locations_y, z_value), test_data=list(test_x, test_y), kernel=kernel, dts=dts, estimated_theta=estimated_theta) + +paste("---------------------------------------------------------------") +paste("ExaGeoStat with all Modules - tile low rank") + +problem_size <- 1600 +computation <- "tlr" +lts <- 400 +dts <- 400 +max_rank <- 200 +# The final value is computed as 10^(-1.0 * aAccuracy) +acc <- 5 + +hardware <- new(Hardware, computation, ncores, ngpus, p, q) +exageostat_data <- simulate_data(kernel=kernel, initial_theta=initial_theta, problem_size=problem_size, dts=dts, dimension=dimension) +estimated_theta <- model_data(matrix=exageostat_data$m, x=exageostat_data$x, y=exageostat_data$y, kernel=kernel, dts=dts, lts = lts, dimension=dimension,lb=lower_bound, ub=upper_bound, mle_itr=10, computation=computation, max_rank=max_rank, acc=acc) + diff --git a/tests/cpp-tests/CMakeLists.txt b/tests/cpp-tests/CMakeLists.txt index 75c33860..fcbb425e 100644 --- a/tests/cpp-tests/CMakeLists.txt +++ b/tests/cpp-tests/CMakeLists.txt @@ -8,19 +8,24 @@ # @author Mahmoud ElKarargy # @date 2024-01-24 -add_subdirectory(api) add_subdirectory(configurations) -add_subdirectory(data-generators) -add_subdirectory(hardware) -add_subdirectory(helpers) -add_subdirectory(kernels) -add_subdirectory(linear-algebra-solvers) -add_subdirectory(prediction) -add_subdirectory(results) -if (USE_HICMA) - add_subdirectory(data-units) -endif () +# Check the value of RUNTIME_TYPE +if(RUNTIME_TYPE STREQUAL "STARPU") + add_subdirectory(api) + add_subdirectory(data-generators) + add_subdirectory(hardware) + add_subdirectory(helpers) + add_subdirectory(kernels) + add_subdirectory(linear-algebra-solvers) + add_subdirectory(prediction) + add_subdirectory(results) + if (USE_HICMA) + add_subdirectory(data-units) + endif () +elseif(RUNTIME_TYPE STREQUAL "PARSEC") + +endif() if (USE_R) add_subdirectory(Rcpp-adapters) diff --git a/tests/cpp-tests/configurations/TestConfigurations.cpp b/tests/cpp-tests/configurations/TestConfigurations.cpp index 6ea294f6..5c34397d 100644 --- a/tests/cpp-tests/configurations/TestConfigurations.cpp +++ b/tests/cpp-tests/configurations/TestConfigurations.cpp @@ -15,15 +15,15 @@ * @date 2023-01-31 **/ -#include - #include #include +#include using namespace std; using namespace exageostat::common; using namespace exageostat::configurations; +using namespace exageostat::configurations::validator; void TEST_ARGUMENT_INITIALIZATION() { @@ -39,7 +39,7 @@ void TEST_ARGUMENT_INITIALIZATION() { const_cast("--ub=5:5:5"), const_cast("--lb=0.1:0.1:0.1"), const_cast("--max_mle_iterations=5"), - const_cast("--tolerance=4"), + const_cast("--tolerance=8"), const_cast("--ZMiss=6"), const_cast("--mspe"), const_cast("--idw"), @@ -49,8 +49,6 @@ void TEST_ARGUMENT_INITIALIZATION() { }; Configurations configurations; - - // Initialize configuration dictionary with only common arguments configurations.InitializeArguments(argc, argv); REQUIRE(configurations.GetProblemSize() == 16); @@ -58,33 +56,10 @@ void TEST_ARGUMENT_INITIALIZATION() { REQUIRE(configurations.GetDenseTileSize() == 8); REQUIRE(configurations.GetPrecision() == DOUBLE); - // No data generation arguments initialized - REQUIRE(configurations.GetDataPath() == string("")); - - // No data modeling arguments initialized - REQUIRE_THROWS(configurations.GetMaxMleIterations()); - REQUIRE_THROWS(configurations.GetTolerance()); - - // No data prediction arguments initialized - REQUIRE(configurations.GetIsMSPE() == false); - REQUIRE(configurations.GetIsIDW() == false); - REQUIRE(configurations.GetIsFisher() == false); - REQUIRE(configurations.GetIsMLOEMMOM() == false); - REQUIRE(configurations.GetUnknownObservationsNb() == 0); - - // Data generation arguments initialized - configurations.InitializeDataGenerationArguments(); - REQUIRE(configurations.GetDataPath() == string("./dummy-path")); - // Data modelling arguments initialized - configurations.InitializeDataModelingArguments(); - REQUIRE(configurations.GetMaxMleIterations() == 5); - REQUIRE(configurations.GetTolerance() == pow(10, -4)); - - // Data prediction arguments initialized - configurations.InitializeDataPredictionArguments(); + REQUIRE(configurations.GetTolerance() == pow(10, -8)); REQUIRE(configurations.GetIsMSPE() == true); REQUIRE(configurations.GetIsIDW() == true); @@ -94,6 +69,48 @@ void TEST_ARGUMENT_INITIALIZATION() { } + +void TEST_ARGUMENT_INITIALIZATION_PARSEC() { + + const int argc = 13; + char *argv[] = { + const_cast("program_name"), + const_cast("--N=16"), + const_cast("--dts=8"), + const_cast("--precision=double"), + const_cast("--band_dense=100"), + const_cast("--objects-number=72"), + const_cast("--adaptive_decision=1"), + const_cast("--add_diagonal=10"), + const_cast("--file_time_slot=1"), + const_cast("--file-number=1"), + const_cast("--enable-inverse"), + const_cast("--mpiio"), + const_cast("--data_path=./dummy-path"), + }; + + Configurations configurations; + // Initialize configuration dictionary with only common arguments + configurations.InitializeArguments(argc, argv); + + REQUIRE(configurations.GetProblemSize() == 16); + REQUIRE(configurations.GetDenseTileSize() == 8); + REQUIRE(configurations.GetPrecision() == DOUBLE); + + // Check Hicma-Parsec parameters + REQUIRE(configurations.GetDenseBandDP() == 100); + REQUIRE(configurations.GetObjectsNumber() == 72); + REQUIRE(configurations.GetAdaptiveDecision() == 1); + REQUIRE(configurations.GetDiagonalAddition() == 10); + REQUIRE(configurations.GetTimeSlotPerFile() == 1); + REQUIRE(configurations.GetFileNumber() == 1); + REQUIRE(configurations.GetEnableInverse() == true); + REQUIRE(configurations.GetMPIIO() == true); + + REQUIRE(configurations.GetDataPath() == string("./dummy-path")); +} + + void TEST_SYNTHETIC_CONFIGURATIONS() { Configurations synthetic_data_configurations; @@ -106,9 +123,9 @@ void TEST_SYNTHETIC_CONFIGURATIONS() { }SECTION("Dimensions value checker test") { REQUIRE_THROWS_WITH( - synthetic_data_configurations.CheckDimensionValue("4D"), + Validator::CheckDimensionValue("4D"), "Invalid value for Dimension. Please use 2D, 3D or ST."); - Configurations::CheckDimensionValue("2D"); + Validator::CheckDimensionValue("2D"); } SECTION("P-GRID setter/getter test") @@ -119,42 +136,40 @@ void TEST_SYNTHETIC_CONFIGURATIONS() { }SECTION("P-GRID value checker test") { REQUIRE_THROWS_WITH( - synthetic_data_configurations.CheckNumericalValue("K"), + Validator::CheckNumericalValue("K"), "Invalid value. Please use Numerical values only."); REQUIRE_THROWS_WITH( - synthetic_data_configurations.CheckNumericalValue("-100"), + Validator::CheckNumericalValue("-100"), "Invalid value. Please use positive values"); - int test_nb = Configurations::CheckNumericalValue("512"); + int test_nb = Validator::CheckNumericalValue("512"); synthetic_data_configurations.SetPGrid(test_nb); REQUIRE(synthetic_data_configurations.GetPGrid() == 512); }SECTION("Kernel setter/getter test") { - REQUIRE(synthetic_data_configurations.GetKernelName().empty()); synthetic_data_configurations.SetKernelName("univariate_matern_stationary"); REQUIRE(synthetic_data_configurations.GetKernelName() == "univariate_matern_stationary"); }SECTION("Kernel checker value test") { REQUIRE_THROWS_WITH( - synthetic_data_configurations.CheckKernelValue("100"), + Validator::CheckKernelValue("100"), "Invalid value for Kernel. Please check manual."); REQUIRE_THROWS_WITH( - synthetic_data_configurations.CheckKernelValue("univariate_matern_dnu%"), + Validator::CheckKernelValue("univariate_matern_dnu%"), "Invalid value for Kernel. Please check manual."); - synthetic_data_configurations.CheckKernelValue("univariate_matern_dnu"); + Validator::CheckKernelValue("univariate_matern_dnu"); }SECTION("Problem size setter/getter test") { - REQUIRE(synthetic_data_configurations.GetProblemSize() == 0); synthetic_data_configurations.SetProblemSize(random_number); REQUIRE(synthetic_data_configurations.GetProblemSize() == random_number); }SECTION("Problem size checker value test") { REQUIRE_THROWS_WITH( - synthetic_data_configurations.CheckNumericalValue("K"), + Validator::CheckNumericalValue("K"), "Invalid value. Please use Numerical values only."); REQUIRE_THROWS_WITH( - synthetic_data_configurations.CheckNumericalValue("-100"), + Validator::CheckNumericalValue("-100"), "Invalid value. Please use positive values"); - int test_nb = Configurations::CheckNumericalValue("512"); + int test_nb = Validator::CheckNumericalValue("512"); synthetic_data_configurations.SetProblemSize(test_nb); REQUIRE(synthetic_data_configurations.GetProblemSize() == 512); } @@ -166,7 +181,7 @@ void TEST_COPY_CONSTRUCTOR() { Configurations synthetic_data_configurations; synthetic_data_configurations.SetProblemSize(10); synthetic_data_configurations.SetKernelName("BivariateSpacetimeMaternStationary"); - synthetic_data_configurations.SetPrecision(exageostat::common::MIXED); + synthetic_data_configurations.SetPrecision(MIXED); synthetic_data_configurations.SetLoggerPath("any/path"); vector lb{0.1, 0.1, 0.1}; synthetic_data_configurations.SetLowerBounds(lb); @@ -186,4 +201,7 @@ TEST_CASE("Configurations Tests") { TEST_SYNTHETIC_CONFIGURATIONS(); TEST_COPY_CONSTRUCTOR(); TEST_ARGUMENT_INITIALIZATION(); +#if !DEFAULT_RUNTIME + TEST_ARGUMENT_INITIALIZATION_PARSEC(); +#endif } diff --git a/tests/cpp-tests/prediction/TestPrediction.cpp b/tests/cpp-tests/prediction/TestPrediction.cpp index 49605ed7..f108887e 100644 --- a/tests/cpp-tests/prediction/TestPrediction.cpp +++ b/tests/cpp-tests/prediction/TestPrediction.cpp @@ -89,8 +89,6 @@ void TEST_PREDICTION_MISSING_DATA() { configurations.GetKernelName(), configurations.GetTimeSlot()); - // Add the data prediction arguments. - configurations.InitializeDataPredictionArguments(); Prediction::PredictMissingData(data, configurations, z_matrix, *pKernel); REQUIRE(Results::GetInstance()->GetMSPEError() == Catch::Approx(0.552448)); @@ -109,8 +107,6 @@ void TEST_PREDICTION_MISSING_DATA() { exageostat::kernels::Kernel *pKernel = exageostat::plugins::PluginRegistry>::Create( configurations.GetKernelName(), configurations.GetTimeSlot()); - // Add the data prediction arguments. - configurations.InitializeDataPredictionArguments(); Prediction::PredictMissingData(data, configurations, z_matrix, *pKernel); for (int i = 0; i < 3; i++) { REQUIRE(Results::GetInstance()->GetIDWError()[i] == Catch::Approx(idw_error[i])); @@ -129,8 +125,6 @@ void TEST_PREDICTION_MISSING_DATA() { exageostat::kernels::Kernel *pKernel = exageostat::plugins::PluginRegistry>::Create( configurations.GetKernelName(), configurations.GetTimeSlot()); - // Add the data prediction arguments. - configurations.InitializeDataPredictionArguments(); Prediction::PredictMissingData(data, configurations, z_matrix, *pKernel); REQUIRE(Results::GetInstance()->GetMLOE() == Catch::Approx(0.004467).margin(0.001)); REQUIRE(Results::GetInstance()->GetMMOM() == Catch::Approx(-0.0812376).margin(0.001)); @@ -149,8 +143,6 @@ void TEST_PREDICTION_MISSING_DATA() { configurations.GetKernelName(), configurations.GetTimeSlot()); - // Add the data prediction arguments. - configurations.InitializeDataPredictionArguments(); Prediction::PredictMissingData(data, configurations, z_matrix, *pKernel); REQUIRE(Results::GetInstance()->GetMLOE() == Catch::Approx(0).margin(0.001)); REQUIRE(Results::GetInstance()->GetMMOM() == Catch::Approx(0).margin(0.001)); @@ -169,8 +161,7 @@ void TEST_PREDICTION_MISSING_DATA() { exageostat::kernels::Kernel *pKernel = exageostat::plugins::PluginRegistry>::Create( configurations.GetKernelName(), configurations.GetTimeSlot()); - // Add the data prediction arguments. - configurations.InitializeDataPredictionArguments(); + Prediction::PredictMissingData(data, configurations, z_matrix, *pKernel); vector required_fisher = {0.1045891821, 0.0005116817, 0.0409307011, 0.0005116817, 0.1873553354,