From c78c8cecc6ffb87b12b411486a05f4e00486ffe0 Mon Sep 17 00:00:00 2001 From: jjwilke Date: Wed, 17 Apr 2019 12:57:58 -0700 Subject: [PATCH 001/530] major cmake overhaul: #1902, #1930, #2026, #2027, #2099 --- CMakeLists.txt | 141 ++-- Makefile.kokkos | 56 +- algorithms/CMakeLists.txt | 14 +- algorithms/src/CMakeLists.txt | 19 +- algorithms/unit_tests/CMakeLists.txt | 25 +- cmake/KokkosConfig.cmake.in | 28 +- cmake/deps/CUDA.cmake | 2 +- cmake/deps/CUSPARSE.cmake | 2 +- cmake/deps/HWLOC.cmake | 2 +- cmake/deps/Pthread.cmake | 4 +- cmake/deps/QTHREADS.cmake | 2 +- cmake/kokkos_build.cmake | 261 ------- cmake/kokkos_install.cmake | 73 ++ cmake/kokkos_tribits.cmake | 662 ++++++++++++++++++ cmake/tpls/FindTPLHWLOC.cmake | 2 +- cmake/tpls/FindTPLPthread.cmake | 2 +- cmake/tpls/FindTPLQTHREADS.cmake | 2 +- cmake/tribits.cmake | 531 -------------- containers/CMakeLists.txt | 13 +- containers/performance_tests/CMakeLists.txt | 27 +- containers/src/CMakeLists.txt | 50 +- containers/unit_tests/CMakeLists.txt | 38 +- core/CMakeLists.txt | 13 +- core/perf_test/CMakeLists.txt | 49 +- core/src/CMakeLists.txt | 131 +--- core/unit_test/CMakeLists.txt | 280 ++------ example/CMakeLists.txt | 24 +- example/feint/CMakeLists.txt | 5 +- example/fenl/CMakeLists.txt | 6 +- example/fixture/CMakeLists.txt | 4 +- example/global_2_local_ids/CMakeLists.txt | 11 +- example/grow_array/CMakeLists.txt | 5 +- .../CMakeLists.txt | 0 .../cmake_example.cpp | 0 example/{cmake_build => in_tree_build}/foo.f | 0 example/md_skeleton/CMakeLists.txt | 8 +- example/multi_fem/CMakeLists.txt | 7 +- example/query_device/CMakeLists.txt | 6 +- example/sort_array/CMakeLists.txt | 5 +- .../tutorial/01_hello_world/CMakeLists.txt | 5 +- .../01_hello_world_lambda/CMakeLists.txt | 13 +- .../tutorial/02_simple_reduce/CMakeLists.txt | 5 +- .../02_simple_reduce_lambda/CMakeLists.txt | 13 +- .../tutorial/03_simple_view/CMakeLists.txt | 5 +- .../03_simple_view_lambda/CMakeLists.txt | 2 +- .../04_simple_memoryspaces/CMakeLists.txt | 5 +- .../tutorial/05_simple_atomics/CMakeLists.txt | 6 +- .../06_simple_mdrangepolicy/CMakeLists.txt | 6 +- .../01_data_layouts/CMakeLists.txt | 5 +- .../02_memory_traits/CMakeLists.txt | 5 +- .../Advanced_Views/03_subviews/CMakeLists.txt | 5 +- .../04_dualviews/CMakeLists.txt | 5 +- .../05_NVIDIA_UVM/CMakeLists.txt | 12 +- .../tutorial/Advanced_Views/CMakeLists.txt | 10 +- .../01_thread_teams/CMakeLists.txt | 5 +- .../01_thread_teams_lambda/CMakeLists.txt | 13 +- .../02_nested_parallel_for/CMakeLists.txt | 5 +- .../03_vectorization/CMakeLists.txt | 10 +- .../04_team_scan/CMakeLists.txt | 6 +- .../Hierarchical_Parallelism/CMakeLists.txt | 10 +- example/tutorial/launch_bounds/CMakeLists.txt | 5 +- 61 files changed, 1098 insertions(+), 1568 deletions(-) delete mode 100644 cmake/kokkos_build.cmake create mode 100644 cmake/kokkos_install.cmake create mode 100644 cmake/kokkos_tribits.cmake delete mode 100644 cmake/tribits.cmake rename example/{cmake_build => in_tree_build}/CMakeLists.txt (100%) rename example/{cmake_build => in_tree_build}/cmake_example.cpp (100%) rename example/{cmake_build => in_tree_build}/foo.f (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 236f523aec2..59898e14632 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,75 +6,60 @@ ELSE() SET(KOKKOS_HAS_TRILINOS OFF CACHE BOOL "") ENDIF() +# Basic initialization (Used in KOKKOS_SETTINGS) +set(Kokkos_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR}) +set(KOKKOS_PATH ${Kokkos_SOURCE_DIR}) +set(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}) + IF(NOT KOKKOS_HAS_TRILINOS) cmake_minimum_required(VERSION 3.3 FATAL_ERROR) - - # Define Project Name if this is a standalone build IF(NOT DEFINED ${PROJECT_NAME}) - project(Kokkos CXX) + PROJECT(Kokkos CXX) ENDIF() + set (Kokkos_VERSION_MAJOR 2) + set (Kokkos_VERSION_MINOR 7) + set (Kokkos_VERSION_PATCH 4) +ENDIF() - # Basic initialization (Used in KOKKOS_SETTINGS) - set(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR}) - set(KOKKOS_PATH ${KOKKOS_SRC_PATH}) - - #------------ COMPILER AND FEATURE CHECKS ------------------------------------ - include(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake) - set_kokkos_cxx_compiler() - set_kokkos_cxx_standard() - - #------------ GET OPTIONS AND KOKKOS_SETTINGS -------------------------------- - # Add Kokkos' modules to CMake's module path. - set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/") - - set(KOKKOS_CMAKE_VERBOSE True) - include(${KOKKOS_SRC_PATH}/cmake/kokkos_options.cmake) - - include(${KOKKOS_SRC_PATH}/cmake/kokkos_settings.cmake) - - #------------ GENERATE HEADER AND SOURCE FILES ------------------------------- - execute_process( - COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} PREFIX=${CMAKE_INSTALL_PREFIX} generate_build_settings - WORKING_DIRECTORY "${Kokkos_BINARY_DIR}" - OUTPUT_FILE ${Kokkos_BINARY_DIR}/core_src_make.out - RESULT_VARIABLE GEN_SETTINGS_RESULT - ) - if (GEN_SETTINGS_RESULT) - message(FATAL_ERROR "Kokkos settings generation failed:\n" - "${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} generate_build_settings") - endif() - include(${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake) - install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION lib/cmake/Kokkos) - install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION ${CMAKE_INSTALL_PREFIX}) - string(REPLACE " " ";" KOKKOS_TPL_INCLUDE_DIRS "${KOKKOS_GMAKE_TPL_INCLUDE_DIRS}") - string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_DIRS "${KOKKOS_GMAKE_TPL_LIBRARY_DIRS}") - string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_NAMES "${KOKKOS_GMAKE_TPL_LIBRARY_NAMES}") - list(REMOVE_ITEM KOKKOS_TPL_INCLUDE_DIRS "") - list(REMOVE_ITEM KOKKOS_TPL_LIBRARY_DIRS "") - list(REMOVE_ITEM KOKKOS_TPL_LIBRARY_NAMES "") - set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC}) - - #------------ NOW BUILD ------------------------------------------------------ - include(${KOKKOS_SRC_PATH}/cmake/kokkos_build.cmake) - - #------------ Add in Fake Tribits Handling to allow unit test builds- -------- - - include(${KOKKOS_SRC_PATH}/cmake/tribits.cmake) - - TRIBITS_PACKAGE_DECL(Kokkos) - - ADD_SUBDIRECTORY(core) - ADD_SUBDIRECTORY(containers) - ADD_SUBDIRECTORY(algorithms) +INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake) +KOKKOS_SETUP_BUILD_ENVIRONMENT() + +SET_PROPERTY(GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) +SET_PROPERTY(GLOBAL PROPERTY KOKKOS_ALL_SOURCES) +SET_PROPERTY(GLOBAL PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES) +SET_PROPERTY(GLOBAL PROPERTY KOKKOS_COMPILE_DEFINITIONS) +IF (NOT KOKKOS_HAS_TRILINOS AND NOT KOKKOS_SEPARATE_LIBS) + SET_PROPERTY(GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES kokkos) +ENDIF() + +GET_DIRECTORY_PROPERTY(HAS_PARENT PARENT_DIRECTORY) +IF (KOKKOS_HAS_TRILINOS) + SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) + SET(KOKKOS_HEADER_DIR ${TRILINOS_INCDIR}) +ELSEIF(HAS_PARENT) + SET(KOKKOS_HEADER_DIR "include/kokkos") ELSE() + SET(KOKKOS_HEADER_DIR "include") +ENDIF() + + +#------------ NOW BUILD ------------------------------------------------------ +#include(${KOKKOS_SRC_PATH}/cmake/kokkos_build.cmake) + +#TRIBITS_PACKAGE_DECL(Kokkos) +#ADD_SUBDIRECTORY(core) +#ADD_SUBDIRECTORY(containers) +#ADD_SUBDIRECTORY(algorithms) + #------------------------------------------------------------------------------ # # A) Forward declare the package so that certain options are also defined for # subpackages # -TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS) +KOKKOS_PACKAGE_DECL() #------------------------------------------------------------------------------ @@ -87,21 +72,18 @@ TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS) # in the previously define Kokkos_GEN_DIR # We need to copy them over to the correct place and source the cmake file -if(NOT KOKKOS_LEGACY_TRIBITS) - set(Kokkos_GEN_DIR ${CMAKE_BINARY_DIR}) - file(COPY "${Kokkos_GEN_DIR}/KokkosCore_config.h" - DESTINATION "${CMAKE_CURRENT_BINARY_DIR}" USE_SOURCE_PERMISSIONS) - install(FILES "${Kokkos_GEN_DIR}/KokkosCore_config.h" - DESTINATION include) - file(COPY "${Kokkos_GEN_DIR}/kokkos_generated_settings.cmake" - DESTINATION "${CMAKE_CURRENT_BINARY_DIR}" USE_SOURCE_PERMISSIONS) - - include(${CMAKE_CURRENT_BINARY_DIR}/kokkos_generated_settings.cmake) - # Sources come from makefile-generated kokkos_generated_settings.cmake file - # Enable using the individual sources if needed - set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC}) -endif () +set(Kokkos_GEN_DIR ${CMAKE_BINARY_DIR}) +file(COPY "${Kokkos_GEN_DIR}/KokkosCore_config.h" + DESTINATION "${CMAKE_CURRENT_BINARY_DIR}" USE_SOURCE_PERMISSIONS) +install(FILES "${Kokkos_GEN_DIR}/KokkosCore_config.h" + DESTINATION include) +file(COPY "${Kokkos_GEN_DIR}/kokkos_generated_settings.cmake" + DESTINATION "${CMAKE_CURRENT_BINARY_DIR}" USE_SOURCE_PERMISSIONS) +include(${CMAKE_CURRENT_BINARY_DIR}/kokkos_generated_settings.cmake) +# Sources come from makefile-generated kokkos_generated_settings.cmake file +# Enable using the individual sources if needed +set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC}) #------------------------------------------------------------------------------ # @@ -119,10 +101,11 @@ INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION bin) #------------------------------------------------------------------------------ # -# D) Process the subpackages for Kokkos +# D) Process the subpackages (subdirectories) for Kokkos # -TRIBITS_PROCESS_SUBPACKAGES() + +KOKKOS_PROCESS_SUBPACKAGES() #------------------------------------------------------------------------------ @@ -130,10 +113,16 @@ TRIBITS_PROCESS_SUBPACKAGES() # E) If Kokkos itself is enabled, process the Kokkos package # -TRIBITS_PACKAGE_DEF() +KOKKOS_PACKAGE_DEF() +KOKKOS_EXCLUDE_AUTOTOOLS_FILES() +KOKKOS_PACKAGE_POSTPROCESS() + +IF (NOT KOKKOS_HAS_TRILINOS) + IF (NOT KOKKOS_SEPARATE_LIBS) + KOKKOS_MAKE_LIBKOKKOS() + ENDIF() + include(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake) +ENDIF() -TRIBITS_EXCLUDE_AUTOTOOLS_FILES() -TRIBITS_PACKAGE_POSTPROCESS() -ENDIF() diff --git a/Makefile.kokkos b/Makefile.kokkos index 45c307617da..6d75304e039 100644 --- a/Makefile.kokkos +++ b/Makefile.kokkos @@ -380,7 +380,7 @@ endif #CPPFLAGS is now unused KOKKOS_CPPFLAGS = ifneq ($(KOKKOS_CMAKE), yes) - KOKKOS_CXXFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src -I$(KOKKOS_ETI_PATH) + KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src -I$(KOKKOS_ETI_PATH) endif KOKKOS_TPL_INCLUDE_DIRS = KOKKOS_TPL_LIBRARY_DIRS = @@ -531,17 +531,17 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) - ifneq ($(HWLOC_PATH),) - ifneq ($(KOKKOS_CMAKE), yes) - KOKKOS_CXXFLAGS += -I$(HWLOC_PATH)/include + ifneq ($(KOKKOS_CMAKE), yes) + ifneq ($(HWLOC_PATH),) + KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include + KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib + KOKKOS_CXXLDFLAGS += -L$(HWLOC_PATH)/lib + KOKKOS_TPL_INCLUDE_DIRS += $(HWLOC_PATH)/include + KOKKOS_TPL_LIBRARY_DIRS += $(HWLOC_PATH)/lib endif - KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib - KOKKOS_CXXLDFLAGS += -L$(HWLOC_PATH)/lib - KOKKOS_TPL_INCLUDE_DIRS += $(HWLOC_PATH)/include - KOKKOS_TPL_LIBRARY_DIRS += $(HWLOC_PATH)/lib + KOKKOS_LIBS += -lhwloc + KOKKOS_TPL_LIBRARY_NAMES += hwloc endif - KOKKOS_LIBS += -lhwloc - KOKKOS_TPL_LIBRARY_NAMES += hwloc tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HWLOC") endif @@ -552,17 +552,17 @@ ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) - ifneq ($(MEMKIND_PATH),) - ifneq ($(KOKKOS_CMAKE), yes) - KOKKOS_CXXFLAGS += -I$(MEMKIND_PATH)/include + ifneq ($(KOKKOS_CMAKE), yes) + ifneq ($(MEMKIND_PATH),) + KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include + KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib + KOKKOS_CXXLDFLAGS += -L$(MEMKIND_PATH)/lib + KOKKOS_TPL_INCLUDE_DIRS += $(MEMKIND_PATH)/include + KOKKOS_TPL_LIBRARY_DIRS += $(MEMKIND_PATH)/lib endif - KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib - KOKKOS_CXXLDFLAGS += -L$(MEMKIND_PATH)/lib - KOKKOS_TPL_INCLUDE_DIRS += $(MEMKIND_PATH)/include - KOKKOS_TPL_LIBRARY_DIRS += $(MEMKIND_PATH)/lib + KOKKOS_LIBS += -lmemkind -lnuma + KOKKOS_TPL_LIBRARY_NAMES += memkind numa endif - KOKKOS_LIBS += -lmemkind -lnuma - KOKKOS_TPL_LIBRARY_NAMES += memkind numa tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HBWSPACE") endif @@ -1141,17 +1141,17 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp) - ifneq ($(QTHREADS_PATH),) - ifneq ($(KOKKOS_CMAKE), yes) - KOKKOS_CXXFLAGS += -I$(QTHREADS_PATH)/include + ifneq ($(KOKKOS_CMAKE), yes) + ifneq ($(QTHREADS_PATH),) + KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include + KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib + KOKKOS_CXXLDFLAGS += -L$(QTHREADS_PATH)/lib + KOKKOS_TPL_INCLUDE_DIRS += $(QTHREADS_PATH)/include + KOKKOS_TPL_LIBRARY_DIRS += $(QTHREADS_PATH)/lib64 endif - KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib - KOKKOS_CXXLDFLAGS += -L$(QTHREADS_PATH)/lib - KOKKOS_TPL_INCLUDE_DIRS += $(QTHREADS_PATH)/include - KOKKOS_TPL_LIBRARY_DIRS += $(QTHREADS_PATH)/lib64 + KOKKOS_LIBS += -lqthread + KOKKOS_TPL_LIBRARY_NAMES += qthread endif - KOKKOS_LIBS += -lqthread - KOKKOS_TPL_LIBRARY_NAMES += qthread endif ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) diff --git a/algorithms/CMakeLists.txt b/algorithms/CMakeLists.txt index 507c9f2fdb7..5a2ac408020 100644 --- a/algorithms/CMakeLists.txt +++ b/algorithms/CMakeLists.txt @@ -1,12 +1,12 @@ -TRIBITS_SUBPACKAGE(Algorithms) +KOKKOS_SUBPACKAGE(Algorithms) + +ADD_SUBDIRECTORY(src) + +KOKKOS_ADD_TEST_DIRECTORIES(unit_tests) + +KOKKOS_SUBPACKAGE_POSTPROCESS() -IF(KOKKOS_HAS_TRILINOS) - ADD_SUBDIRECTORY(src) -ENDIF() -TRIBITS_ADD_TEST_DIRECTORIES(unit_tests) -#TRIBITS_ADD_TEST_DIRECTORIES(performance_tests) -TRIBITS_SUBPACKAGE_POSTPROCESS() diff --git a/algorithms/src/CMakeLists.txt b/algorithms/src/CMakeLists.txt index dfbf3323c2d..c57ee299e7e 100644 --- a/algorithms/src/CMakeLists.txt +++ b/algorithms/src/CMakeLists.txt @@ -1,6 +1,7 @@ -TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) +KOKKOS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) +#I have to leave these here for tribits INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) @@ -12,10 +13,14 @@ LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h) #----------------------------------------------------------------------------- -TRIBITS_ADD_LIBRARY( - kokkosalgorithms - HEADERS ${HEADERS} - SOURCES ${SOURCES} - DEPLIBS - ) +# We have to pass the sources in here for Tribits +# These will get ignored for standalone CMake and a true interface library made +KOKKOS_ADD_INTERFACE_LIBRARY( + kokkosalgorithms + HEADERS ${HEADERS} + SOURCES ${SOURCES} +) +KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES(kokkosalgorithms) + + diff --git a/algorithms/unit_tests/CMakeLists.txt b/algorithms/unit_tests/CMakeLists.txt index e238b37c8ec..eaf85c6773d 100644 --- a/algorithms/unit_tests/CMakeLists.txt +++ b/algorithms/unit_tests/CMakeLists.txt @@ -1,15 +1,9 @@ +#Leave these here for now - I don't need transitive deps anyway INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) -IF(NOT KOKKOS_HAS_TRILINOS) - IF(KOKKOS_SEPARATE_LIBS) - set(TEST_LINK_TARGETS kokkoscore) - ELSE() - set(TEST_LINK_TARGETS kokkos) - ENDIF() -ENDIF() SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest) INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR}) @@ -20,22 +14,19 @@ INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR}) # possible to build only (e.g.,) KokkosAlgorithms tests, without # building KokkosCore tests. -SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGTEST_HAS_PTHREAD=0") -TRIBITS_ADD_LIBRARY( +KOKKOS_ADD_TEST_LIBRARY( kokkosalgorithms_gtest HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc - TESTONLY - ) +) +KOKKOS_TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0") SET(SOURCES UnitTestMain.cpp TestCuda.cpp ) -SET(LIBRARIES kokkoscore) - IF(Kokkos_ENABLE_OpenMP) LIST( APPEND SOURCES TestOpenMP.cpp @@ -60,11 +51,7 @@ IF(Kokkos_ENABLE_Pthread) ) ENDIF() -TRIBITS_ADD_EXECUTABLE_AND_TEST( +KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest SOURCES ${SOURCES} - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkosalgorithms_gtest ${TEST_LINK_TARGETS} - ) +) diff --git a/cmake/KokkosConfig.cmake.in b/cmake/KokkosConfig.cmake.in index fc099a494ce..0b4676b6ea0 100644 --- a/cmake/KokkosConfig.cmake.in +++ b/cmake/KokkosConfig.cmake.in @@ -4,15 +4,33 @@ # Kokkos_LIBRARIES - libraries to link against # Compute paths +@PACKAGE_INIT@ + GET_FILENAME_COMPONENT(Kokkos_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) SET(Kokkos_INCLUDE_DIRS "@CONF_INCLUDE_DIRS@") -# Our library dependencies (contains definitions for IMPORTED targets) -IF(NOT TARGET kokkos AND NOT Kokkos_BINARY_DIR) - INCLUDE("${Kokkos_CMAKE_DIR}/KokkosTargets.cmake") -ENDIF() +INCLUDE("${Kokkos_CMAKE_DIR}/KokkosTargets.cmake") + # These are IMPORTED targets created by KokkosTargets.cmake SET(Kokkos_LIBRARY_DIRS @INSTALL_LIB_DIR@) -SET(Kokkos_LIBRARIES @Kokkos_LIBRARIES_NAMES@) +SET(Kokkos_LIBRARIES @KOKKOS_LIBRARIES@) +SET(Kokkos_SEPARATE_LIBS @KOKKOS_SEPARATE_LIBS@) SET(Kokkos_TPL_LIBRARIES @KOKKOS_LIBS@) + +if (${Kokkos_FIND_VERSION_MINOR}) + if (${Kokkos_FIND_VERSION_MINOR} LESS ${Kokkos_VERSION_MINOR}) + FOREACH(LIB ${Kokkos_LIBRARIES}) + set_property(TARGET ${LIB} APPEND PROPERTY + INTERFACE_COMPILE_DEFINITIONS "KOKKOS_ENABLE_DEPRECATED_CODE" + ) + ENDFOREACH() + endif() +endif() + +FUNCTION(TARGET_LINK_KOKKOS TARGET) +FOREACH(LIB ${Kokkos_LIBRARIES}) + TARGET_LINK_LIBRARIES(${TARGET} ${ARGN} ${LIB}) +ENDFOREACH() +ENDFUNCTION(TARGET_LINK_KOKKOS) + diff --git a/cmake/deps/CUDA.cmake b/cmake/deps/CUDA.cmake index 801c20067b9..4876bca2590 100644 --- a/cmake/deps/CUDA.cmake +++ b/cmake/deps/CUDA.cmake @@ -73,7 +73,7 @@ IF(NOT _CUDA_FAILURE) GLOBAL_SET(TPL_CUDA_LIBRARY_DIRS) GLOBAL_SET(TPL_CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE}) GLOBAL_SET(TPL_CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY} ${CUDA_cublas_LIBRARY} ${CUDA_cufft_LIBRARY}) - TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) + KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) ELSE() SET(TPL_ENABLE_CUDA OFF) ENDIF() diff --git a/cmake/deps/CUSPARSE.cmake b/cmake/deps/CUSPARSE.cmake index 6f26d857c09..b2420d1168e 100644 --- a/cmake/deps/CUSPARSE.cmake +++ b/cmake/deps/CUSPARSE.cmake @@ -59,6 +59,6 @@ # GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS) # GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS}) # GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY}) -# TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) +# KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) #ENDIF() diff --git a/cmake/deps/HWLOC.cmake b/cmake/deps/HWLOC.cmake index 275abd3a5d4..ed89c8c1e51 100644 --- a/cmake/deps/HWLOC.cmake +++ b/cmake/deps/HWLOC.cmake @@ -64,7 +64,7 @@ # Version: 1.3 # -TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( HWLOC +KOKKOS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( HWLOC REQUIRED_HEADERS hwloc.h REQUIRED_LIBS_NAMES "hwloc" ) diff --git a/cmake/deps/Pthread.cmake b/cmake/deps/Pthread.cmake index 46d0a939cad..5f835fc3002 100644 --- a/cmake/deps/Pthread.cmake +++ b/cmake/deps/Pthread.cmake @@ -74,9 +74,9 @@ IF(USE_THREADS) SET(TPL_Pthread_INCLUDE_DIRS "") SET(TPL_Pthread_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}") SET(TPL_Pthread_LIBRARY_DIRS "") - TIBITS_CREATE_IMPORTED_TPL_LIBRARY(Pthread) + KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(Pthread) ELSE() - TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( Pthread + KOKKOS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( Pthread REQUIRED_HEADERS pthread.h REQUIRED_LIBS_NAMES pthread ) diff --git a/cmake/deps/QTHREADS.cmake b/cmake/deps/QTHREADS.cmake index c312f2590bc..b3a36fb7c34 100644 --- a/cmake/deps/QTHREADS.cmake +++ b/cmake/deps/QTHREADS.cmake @@ -63,7 +63,7 @@ # Source: https://code.google.com/p/qthreads # -TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREADS +KOKKOS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREADS REQUIRED_HEADERS qthread.h REQUIRED_LIBS_NAMES "qthread" ) diff --git a/cmake/kokkos_build.cmake b/cmake/kokkos_build.cmake deleted file mode 100644 index f9b995baae4..00000000000 --- a/cmake/kokkos_build.cmake +++ /dev/null @@ -1,261 +0,0 @@ -############################ Detect if submodule ############################### -# -# With thanks to StackOverflow: -# http://stackoverflow.com/questions/25199677/how-to-detect-if-current-scope-has-a-parent-in-cmake -# -get_directory_property(HAS_PARENT PARENT_DIRECTORY) -if(HAS_PARENT) - message(STATUS "Submodule build") - SET(KOKKOS_HEADER_DIR "include/kokkos") -else() - message(STATUS "Standalone build") - SET(KOKKOS_HEADER_DIR "include") -endif() - -################################ Handle the actual build ####################### - -SET(INSTALL_LIB_DIR lib CACHE PATH "Installation directory for libraries") -SET(INSTALL_BIN_DIR bin CACHE PATH "Installation directory for executables") -SET(INSTALL_INCLUDE_DIR ${KOKKOS_HEADER_DIR} CACHE PATH - "Installation directory for header files") -IF(WIN32 AND NOT CYGWIN) - SET(DEF_INSTALL_CMAKE_DIR CMake) -ELSE() - SET(DEF_INSTALL_CMAKE_DIR lib/CMake/Kokkos) -ENDIF() - -SET(INSTALL_CMAKE_DIR ${DEF_INSTALL_CMAKE_DIR} CACHE PATH - "Installation directory for CMake files") - -# Make relative paths absolute (needed later on) -FOREACH(p LIB BIN INCLUDE CMAKE) - SET(var INSTALL_${p}_DIR) - IF(NOT IS_ABSOLUTE "${${var}}") - SET(${var} "${CMAKE_INSTALL_PREFIX}/${${var}}") - ENDIF() -ENDFOREACH() - -# set up include-directories -SET (Kokkos_INCLUDE_DIRS - ${Kokkos_SOURCE_DIR}/core/src - ${Kokkos_SOURCE_DIR}/containers/src - ${Kokkos_SOURCE_DIR}/algorithms/src - ${Kokkos_BINARY_DIR} # to find KokkosCore_config.h - ${KOKKOS_INCLUDE_DIRS} -) - -# pass include dirs back to parent scope -if(HAS_PARENT) -SET(Kokkos_INCLUDE_DIRS_RET ${Kokkos_INCLUDE_DIRS} PARENT_SCOPE) -else() -SET(Kokkos_INCLUDE_DIRS_RET ${Kokkos_INCLUDE_DIRS}) -endif() - -INCLUDE_DIRECTORIES(${Kokkos_INCLUDE_DIRS}) - -IF(KOKKOS_SEPARATE_LIBS) - # Sources come from makefile-generated kokkos_generated_settings.cmake file - # Separate libs need to separate the sources - set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC}) - - # kokkoscore - ADD_LIBRARY( - kokkoscore - ${KOKKOS_CORE_SRCS} - ) - - target_compile_options( - kokkoscore - PUBLIC $<$:${KOKKOS_CXX_FLAGS}> - ) - - target_include_directories( - kokkoscore - PUBLIC - ${KOKKOS_TPL_INCLUDE_DIRS} - ) - - foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES) - if (("${lib}" STREQUAL "cuda") AND (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")) - set(LIB_cuda "-lcuda") - elseif ("${lib}" STREQUAL "hpx") - find_package(HPX REQUIRED) - if(${HPX_FOUND}) - target_link_libraries(kokkoscore PUBLIC ${HPX_LIBRARIES}) - target_link_libraries(kokkoscontainers PUBLIC ${HPX_LIBRARIES}) - target_link_libraries(kokkosalgorithms PUBLIC ${HPX_LIBRARIES}) - target_include_directories(kokkoscore PUBLIC ${HPX_INCLUDE_DIRS}) - target_include_directories(kokkoscontainers PUBLIC ${HPX_INCLUDE_DIRS}) - target_include_directories(kokkosalgorithms PUBLIC ${HPX_INCLUDE_DIRS}) - else() - message(ERROR "HPX not found. Check the value of HPX_DIR (= ${HPX_DIR}) or CMAKE_PREFIX_PATH (= ${CMAKE_PREFIX_PATH}).") - endif() - else() - find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS}) - endif() - target_link_libraries(kokkoscore PUBLIC ${LIB_${lib}}) - endforeach() - - target_link_libraries(kokkoscore PUBLIC "${KOKKOS_LINK_FLAGS}") - - # Install the kokkoscore library - INSTALL (TARGETS kokkoscore - EXPORT KokkosTargets - ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin - ) - - # kokkoscontainers - if (DEFINED KOKKOS_CONTAINERS_SRCS) - ADD_LIBRARY( - kokkoscontainers - ${KOKKOS_CONTAINERS_SRCS} - ) - endif() - - TARGET_LINK_LIBRARIES( - kokkoscontainers - kokkoscore - ) - - # Install the kokkocontainers library - INSTALL (TARGETS kokkoscontainers - EXPORT KokkosTargets - ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) - - # kokkosalgorithms - Build as interface library since no source files. - ADD_LIBRARY( - kokkosalgorithms - INTERFACE - ) - - target_include_directories( - kokkosalgorithms - INTERFACE ${Kokkos_SOURCE_DIR}/algorithms/src - ) - - TARGET_LINK_LIBRARIES( - kokkosalgorithms - INTERFACE kokkoscore - ) - - # Install the kokkoalgorithms library - INSTALL (TARGETS kokkosalgorithms - ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) - - SET (Kokkos_LIBRARIES_NAMES kokkoscore kokkoscontainers kokkosalgorithms) - -ELSE() - # kokkos - ADD_LIBRARY( - kokkos - ${KOKKOS_CORE_SRCS} - ${KOKKOS_CONTAINERS_SRCS} - ) - - target_compile_options( - kokkos - PUBLIC $<$:${KOKKOS_CXX_FLAGS}> - ) - - target_include_directories( - kokkos - PUBLIC - ${KOKKOS_TPL_INCLUDE_DIRS} - ) - - foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES) - if (("${lib}" STREQUAL "cuda") AND (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")) - set(LIB_cuda "-lcuda") - elseif ("${lib}" STREQUAL "hpx") - find_package(HPX REQUIRED) - if(${HPX_FOUND}) - target_link_libraries(kokkos PUBLIC ${HPX_LIBRARIES}) - target_include_directories(kokkos PUBLIC ${HPX_INCLUDE_DIRS}) - else() - message(ERROR "HPX not found. Check the value of HPX_DIR (= ${HPX_DIR}) or CMAKE_PREFIX_PATH (= ${CMAKE_PREFIX_PATH}).") - endif() - else() - find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS}) - endif() - target_link_libraries(kokkos PUBLIC ${LIB_${lib}}) - endforeach() - - target_link_libraries(kokkos PUBLIC "${KOKKOS_LINK_FLAGS}") - - # Install the kokkos library - INSTALL (TARGETS kokkos - EXPORT KokkosTargets - ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib - RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) - - - SET (Kokkos_LIBRARIES_NAMES kokkos) - -endif() # KOKKOS_SEPARATE_LIBS - -# Install the kokkos headers -INSTALL (DIRECTORY - EXPORT KokkosTargets - ${Kokkos_SOURCE_DIR}/core/src/ - DESTINATION ${KOKKOS_HEADER_DIR} - FILES_MATCHING PATTERN "*.hpp" -) -INSTALL (DIRECTORY - EXPORT KokkosTargets - ${Kokkos_SOURCE_DIR}/containers/src/ - DESTINATION ${KOKKOS_HEADER_DIR} - FILES_MATCHING PATTERN "*.hpp" -) -INSTALL (DIRECTORY - EXPORT KokkosTargets - ${Kokkos_SOURCE_DIR}/algorithms/src/ - DESTINATION ${KOKKOS_HEADER_DIR} - FILES_MATCHING PATTERN "*.hpp" -) - -INSTALL (FILES - ${Kokkos_BINARY_DIR}/KokkosCore_config.h - DESTINATION ${KOKKOS_HEADER_DIR} -) - -# Add all targets to the build-tree export set -export(TARGETS ${Kokkos_LIBRARIES_NAMES} - FILE "${Kokkos_BINARY_DIR}/KokkosTargets.cmake") - -# Export the package for use from the build-tree -# (this registers the build-tree with a global CMake-registry) -export(PACKAGE Kokkos) - -# Create the KokkosConfig.cmake and KokkosConfigVersion files -file(RELATIVE_PATH REL_INCLUDE_DIR "${INSTALL_CMAKE_DIR}" - "${INSTALL_INCLUDE_DIR}") -# ... for the build tree -set(CONF_INCLUDE_DIRS "${Kokkos_SOURCE_DIR}" "${Kokkos_BINARY_DIR}") -configure_file(${Kokkos_SOURCE_DIR}/cmake/KokkosConfig.cmake.in - "${Kokkos_BINARY_DIR}/KokkosConfig.cmake" @ONLY) -# ... for the install tree -set(CONF_INCLUDE_DIRS "\${Kokkos_CMAKE_DIR}/${REL_INCLUDE_DIR}") -configure_file(${Kokkos_SOURCE_DIR}/cmake/KokkosConfig.cmake.in - "${Kokkos_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/KokkosConfig.cmake" @ONLY) - -# Install the KokkosConfig.cmake and KokkosConfigVersion.cmake -install(FILES - "${Kokkos_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/KokkosConfig.cmake" - DESTINATION "${INSTALL_CMAKE_DIR}") - -#This seems not to do anything? -#message(STATUS "KokkosTargets: " ${KokkosTargets}) -# Install the export set for use with the install-tree -INSTALL(EXPORT KokkosTargets DESTINATION - "${INSTALL_CMAKE_DIR}") - -# build and install pkgconfig file -CONFIGURE_FILE(core/src/kokkos.pc.in kokkos.pc @ONLY) -INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/kokkos.pc DESTINATION lib/pkgconfig) diff --git a/cmake/kokkos_install.cmake b/cmake/kokkos_install.cmake new file mode 100644 index 00000000000..8dccdd59ae1 --- /dev/null +++ b/cmake/kokkos_install.cmake @@ -0,0 +1,73 @@ + +SET(INSTALL_LIB_DIR lib CACHE PATH "Installation directory for libraries") +SET(INSTALL_BIN_DIR bin CACHE PATH "Installation directory for executables") +SET(INSTALL_INCLUDE_DIR ${KOKKOS_HEADER_DIR} CACHE PATH + "Installation directory for header files") + +#Set all the variables needed for kokkosConfig.cmake +GET_PROPERTY(KOKKOS_PROP_LIBS GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) +SET(KOKKOS_LIBRARIES ${KOKKOS_PROP_LIBS}) + +SET(DEF_INSTALL_CMAKE_DIR) +IF(WIN32 AND NOT CYGWIN) + LIST(APPEND DEF_INSTALL_CMAKE_DIR CMake) +ELSE() + LIST(APPEND DEF_INSTALL_CMAKE_DIR lib/CMake/Kokkos) + #also add the totally normal place for it to be + LIST(APPEND DEF_INSTALL_CMAKE_DIR lib/cmake) +ENDIF() +SET(INSTALL_CMAKE_DIR ${DEF_INSTALL_CMAKE_DIR} CACHE PATH + "Installation directory for CMake files") + +# Make relative paths absolute (needed later on) +FOREACH(p LIB BIN INCLUDE CMAKE) + SET(var INSTALL_${p}_DIR) + IF(NOT IS_ABSOLUTE "${${var}}") + SET(${var} "${CMAKE_INSTALL_PREFIX}/${${var}}") + ENDIF() +ENDFOREACH() + +INSTALL (FILES + ${Kokkos_BINARY_DIR}/KokkosCore_config.h + DESTINATION ${KOKKOS_HEADER_DIR} +) + +# Add all targets to the build-tree export set +#export(TARGETS ${Kokkos_LIBRARIES_NAMES} +# FILE "${Kokkos_BINARY_DIR}/KokkosTargets.cmake") + +# Export the package for use from the build-tree +# (this registers the build-tree with a global CMake-registry) +#export(PACKAGE Kokkos) + +# Create the KokkosConfig.cmake and KokkosConfigVersion files +file(RELATIVE_PATH REL_INCLUDE_DIR "${INSTALL_CMAKE_DIR}" + "${INSTALL_INCLUDE_DIR}") +# ... for the build tree +set(CONF_INCLUDE_DIRS "${Kokkos_SOURCE_DIR}" "${Kokkos_BINARY_DIR}") +include(CMakePackageConfigHelpers) +configure_package_config_file(cmake/KokkosConfig.cmake.in + "${Kokkos_BINARY_DIR}/KokkosConfig.cmake" + INSTALL_DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/cmake) +write_basic_package_version_file("${Kokkos_BINARY_DIR}/KokkosConfigVersion.cmake" + VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}" + COMPATIBILITY SameMajorVersion) + +# Install the KokkosConfig.cmake and KokkosConfigVersion.cmake +FOREACH(DIR ${INSTALL_CMAKE_DIR}) + install(FILES + "${Kokkos_BINARY_DIR}/KokkosConfig.cmake" + DESTINATION ${DIR}) + install(FILES + "${Kokkos_BINARY_DIR}/KokkosConfigVersion.cmake" + DESTINATION ${DIR}) + + #This seems not to do anything? + # Install the export set for use with the install-tree + INSTALL(EXPORT KokkosTargets DESTINATION ${DIR}) +ENDFOREACH() + +# build and install pkgconfig file +CONFIGURE_FILE(core/src/kokkos.pc.in kokkos.pc @ONLY) +INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/kokkos.pc DESTINATION lib/pkgconfig) + diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake new file mode 100644 index 00000000000..c0d1de9ad09 --- /dev/null +++ b/cmake/kokkos_tribits.cmake @@ -0,0 +1,662 @@ +INCLUDE(CMakeParseArguments) +INCLUDE(CTest) + +cmake_policy(SET CMP0054 NEW) + +MESSAGE(STATUS "The project name is: ${PROJECT_NAME}") + +IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_OpenMP) + SET(${PROJECT_NAME}_ENABLE_OpenMP OFF) +ENDIF() + +IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_HPX) + SET(${PROJECT_NAME}_ENABLE_HPX OFF) +ENDIF() + +IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_DEBUG) + SET(${PROJECT_NAME}_ENABLE_DEBUG OFF) +ENDIF() + +IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_CXX11) + SET(${PROJECT_NAME}_ENABLE_CXX11 ON) +ENDIF() + +IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_TESTS) + SET(${PROJECT_NAME}_ENABLE_TESTS OFF) +ENDIF() + +IF(NOT DEFINED TPL_ENABLE_Pthread) + SET(TPL_ENABLE_Pthread OFF) +ENDIF() + +FUNCTION(ASSERT_DEFINED VARS) + FOREACH(VAR ${VARS}) + IF(NOT DEFINED ${VAR}) + MESSAGE(SEND_ERROR "Error, the variable ${VAR} is not defined!") + ENDIF() + ENDFOREACH() +ENDFUNCTION() + +if (NOT KOKKOS_HAS_TRILINOS) +MACRO(GLOBAL_SET VARNAME) + SET(${VARNAME} ${ARGN} CACHE INTERNAL "") +ENDMACRO() + +FUNCTION(VERIFY_EMPTY CONTEXT) +if(${ARGN}) +MESSAGE(FATAL_ERROR "Kokkos does not support all of Tribits. Unhandled arguments in ${CONTEXT}:\n${ARGN}") +endif() +ENDFUNCTION() + +MACRO(PREPEND_GLOBAL_SET VARNAME) + ASSERT_DEFINED(${VARNAME}) + GLOBAL_SET(${VARNAME} ${ARGN} ${${VARNAME}}) +ENDMACRO() + +MACRO(PREPEND_TARGET_SET VARNAME TARGET_NAME TYPE) + IF(TYPE STREQUAL "REQUIRED") + SET(REQUIRED TRUE) + ELSE() + SET(REQUIRED FALSE) + ENDIF() + IF(TARGET ${TARGET_NAME}) + PREPEND_GLOBAL_SET(${VARNAME} ${TARGET_NAME}) + ELSE() + IF(REQUIRED) + MESSAGE(FATAL_ERROR "Missing dependency ${TARGET_NAME}") + ENDIF() + ENDIF() +ENDMACRO() +endif() + + +FUNCTION(KOKKOS_CONFIGURE_FILE PACKAGE_NAME_CONFIG_FILE) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME_CONFIG_FILE}) + else() + # Configure the file + CONFIGURE_FILE( + ${PACKAGE_SOURCE_DIR}/cmake/${PACKAGE_NAME_CONFIG_FILE}.in + ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME_CONFIG_FILE} + ) + endif() +ENDFUNCTION() + +MACRO(KOKKOS_ADD_TEST_DIRECTORIES) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_TEST_DIRECTORIES(${ARGN}) + else() + IF(${${PROJECT_NAME}_ENABLE_TESTS}) + FOREACH(TEST_DIR ${ARGN}) + ADD_SUBDIRECTORY(${TEST_DIR}) + ENDFOREACH() + ENDIF() + endif() +ENDMACRO() + +MACRO(KOKKOS_ADD_EXAMPLE_DIRECTORIES) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_EXAMPLE_DIRECTORIES(${ARGN}) + else() + IF(${PACKAGE_NAME}_ENABLE_EXAMPLES OR ${PARENT_PACKAGE_NAME}_ENABLE_EXAMPLES) + FOREACH(EXAMPLE_DIR ${ARGN}) + ADD_SUBDIRECTORY(${EXAMPLE_DIR}) + ENDFOREACH() + ENDIF() + endif() +ENDMACRO() + + +MACRO(ADD_INTERFACE_LIBRARY LIB_NAME) + FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp "") + ADD_LIBRARY(${LIB_NAME} STATIC ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp) + SET_TARGET_PROPERTIES(${LIB_NAME} PROPERTIES INTERFACE TRUE) +ENDMACRO() + + +FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) + CMAKE_PARSE_ARGUMENTS(PARSE + "STATIC;SHARED" + "" + "HEADERS;SOURCES" + ${ARGN}) + + IF(PARSE_HEADERS) + LIST(REMOVE_DUPLICATES PARSE_HEADERS) + ENDIF() + IF(PARSE_SOURCES) + LIST(REMOVE_DUPLICATES PARSE_SOURCES) + ENDIF() + + ADD_LIBRARY( + ${LIBRARY_NAME} + ${PARSE_HEADERS} + ${PARSE_SOURCES} + ) + + TARGET_COMPILE_OPTIONS( + ${LIBRARY_NAME} + PUBLIC $<$:${KOKKOS_CXX_FLAGS}> + ) + + if(${CMAKE_VERSION} VERSION_GREATER "3.13" OR ${CMAKE_VERSION} VERSION_EQUAL "3.13") + TARGET_LINK_OPTIONS( + ${LIBRARY_NAME} + PUBLIC ${KOKKOS_LD_FLAGS} + ) + else() + #well, this is annoying - I am going to need to hack this for Visual Studio + TARGET_LINK_LIBRARIES( + ${LIBRARY_NAME} PUBLIC ${KOKKOS_LD_FLAGS} + ) + endif() + + + TARGET_INCLUDE_DIRECTORIES( + ${LIBRARY_NAME} + PUBLIC ${KOKKOS_TPL_INCLUDE_DIRS} + ) + + foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES) + if (("${lib}" STREQUAL "cuda") AND (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")) + set(LIB_cuda "-lcuda") + target_link_libraries(${LIBRARY_NAME} PUBLIC cuda) + elseif ("${lib}" STREQUAL "hpx") + find_package(HPX REQUIRED) + if(${HPX_FOUND}) + target_link_libraries(${LIBRARY_NAME} PUBLIC ${HPX_LIBRARIES}) + target_include_directories(${LIBRARY_NAME} PUBLIC ${HPX_INCLUDE_DIRS}) + else() + message(ERROR "HPX not found. Check the value of HPX_DIR (= ${HPX_DIR}) or CMAKE_PREFIX_PATH (= ${CMAKE_PREFIX_PATH}).") + endif() + else() + find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS}) + target_link_libraries(${LIBRARY_NAME} PUBLIC ${LIB_${lib}}) + endif() + endforeach() + + INSTALL( + TARGETS ${LIBRARY_NAME} + EXPORT ${PROJECT_NAME} + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + COMPONENT ${PACKAGE_NAME} + ) + + INSTALL( + TARGETS ${LIBRARY_NAME} + EXPORT KokkosTargets + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + ) + + INSTALL( + FILES ${PARSE_HEADERS} + DESTINATION include + COMPONENT ${PACKAGE_NAME} + ) + + #Can we please add a remove duplicates to property append + GET_PROPERTY(LIBLIST GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + LIST(APPEND LIBLIST ${LIBRARY_NAME}) + LIST(REMOVE_DUPLICATES LIBLIST) + SET_PROPERTY(GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES ${LIBLIST}) + GET_PROPERTY(LIBLIST GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + VERIFY_EMPTY(KOKKOS_ADD_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) +ENDFUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) + +FUNCTION(KOKKOS_ADD_LIBRARY LIBRARY_NAME) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_LIBRARY(${LIBRARY_NAME} ${ARGN}) + elseif(KOKKOS_SEPARATE_LIBS) + KOKKOS_INTERNAL_ADD_LIBRARY( + ${LIBRARY_NAME} ${ARGN}) + else() + CMAKE_PARSE_ARGUMENTS(PARSE + "" + "" + "SOURCES;HEADERS" + ${ARGN}) + #just append the headers and sources to the list + SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_ALL_SOURCES ${PARSE_SOURCES}) + GET_PROPERTY(SRCS GLOBAL PROPERTY KOKKOS_ALL_SOURCES) + endif() +ENDFUNCTION() + +FUNCTION(KOKKOS_ADD_EXECUTABLE EXE_NAME) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_EXECUTABLE(${EXE_NAME} ${ARGN}) + else() + CMAKE_PARSE_ARGUMENTS(PARSE + "TESTONLY" + "" + "SOURCES;TESTONLYLIBS" + ${ARGN}) + + ADD_EXECUTABLE(${EXE_NAME} ${PARSE_SOURCES}) + IF (PARSE_TESTONLYLIBS) + TARGET_LINK_LIBRARIES(${EXE_NAME} ${PARSE_TESTONLYLIBS}) + ENDIF() + GET_PROPERTY(liblist GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + FOREACH(LIB ${liblist}) + TARGET_LINK_LIBRARIES(${EXE_NAME} ${LIB}) + ENDFOREACH() + VERIFY_EMPTY(KOKKOS_ADD_EXECUTABLE ${PARSE_UNPARSED_ARGUMENTS}) + endif() +ENDFUNCTION() + +IF(NOT TARGET check) + ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR}) +ENDIF() + +FUNCTION(KOKKOS_ADD_TEST EXE_NAME) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_TEST(${EXE_NAME} + ${ARGN} + COMM serial mpi + NUM_MPI_PROCS 1 + ) + else() + CMAKE_PARSE_ARGUMENTS(TEST + "WILL_FAIL" + "NAME;FAIL_REGULAR_EXPRESSION;PASS_REGULAR_EXPRESSION" + "CATEGORIES" + ${ARGN}) + SET(EXE ${PACKAGE_NAME}_${EXE_NAME}) + IF(WIN32) + ADD_TEST(NAME ${TEST_NAME} WORKING_DIRECTORY ${LIBRARY_OUTPUT_PATH} COMMAND ${EXE}${CMAKE_EXECUTABLE_SUFFIX}) + ELSE() + ADD_TEST(NAME ${TEST_NAME} COMMAND ${EXE}) + ENDIF() + IF(TEST_WILL_FAIL) + SET_TESTS_PROPERTIES(${NAME} PROPERTIES WILL_FAIL ${TEST_WILL_FAIL}) + ENDIF() + IF(TEST_FAIL_REGULAR_EXPRESSION) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES FAIL_REGULAR_EXPRESSION ${TEST_FAIL_REGULAR_EXPRESSION}) + ENDIF() + IF(TEST_PASS_REGULAR_EXPRESSION) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES PASS_REGULAR_EXPRESSION ${TEST_PASS_REGULAR_EXPRESSION}) + ENDIF() + VERIFY_EMPTY(KOKKOS_ADD_TEST ${TEST_UNPARSED_ARGUMENTS}) + endif() +ENDFUNCTION() + +FUNCTION(KOKKOS_ADD_ADVANCED_TEST) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_ADVANCED_TEST(${ARGN}) + else() + # TODO Write this + endif() +ENDFUNCTION() + +FUNCTION(KOKKOS_ADD_EXECUTABLE_AND_TEST EXE_NAME) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_EXECUTABLE_AND_TEST(${EXE_NAME} ${ARGN}) + else() + CMAKE_PARSE_ARGUMENTS(PARSE + "" + "" + "SOURCES" + ${ARGN}) + + KOKKOS_ADD_TEST_EXECUTABLE(${EXE_NAME} SOURCES ${PARSE_SOURCES}) + KOKKOS_ADD_TEST(${EXE_NAME} NAME ${EXE_NAME} + FAIL_REGULAR_EXPRESSION " FAILED " + ${PARSE_UNPARSED_ARGUMENTS}) + endif() +ENDFUNCTION() + +MACRO(KOKKOS_CREATE_IMPORTED_TPL_LIBRARY TPL_NAME) + ADD_INTERFACE_LIBRARY(TPL_LIB_${TPL_NAME}) + TARGET_LINK_LIBRARIES(TPL_LIB_${TPL_NAME} LINK_PUBLIC ${TPL_${TPL_NAME}_LIBRARIES}) + TARGET_INCLUDE_DIRECTORIES(TPL_LIB_${TPL_NAME} INTERFACE ${TPL_${TPL_NAME}_INCLUDE_DIRS}) +ENDMACRO() + +FUNCTION(KOKKOS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES TPL_NAME) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES(${TPL_NAME} ${ARGN}) + else() + CMAKE_PARSE_ARGUMENTS(PARSE + "" + "" + "REQUIRED_HEADERS;REQUIRED_LIBS_NAMES" + ${ARGN}) + + SET(_${TPL_NAME}_ENABLE_SUCCESS TRUE) + IF (PARSE_REQUIRED_LIBS_NAMES) + FIND_LIBRARY(TPL_${TPL_NAME}_LIBRARIES NAMES ${PARSE_REQUIRED_LIBS_NAMES}) + IF(NOT TPL_${TPL_NAME}_LIBRARIES) + SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE) + ENDIF() + ENDIF() + IF (PARSE_REQUIRED_HEADERS) + FIND_PATH(TPL_${TPL_NAME}_INCLUDE_DIRS NAMES ${PARSE_REQUIRED_HEADERS}) + IF(NOT TPL_${TPL_NAME}_INCLUDE_DIRS) + SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE) + ENDIF() + ENDIF() + IF (_${TPL_NAME}_ENABLE_SUCCESS) + KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(${TPL_NAME}) + ENDIF() + VERIFY_EMPTY(KOKKOS_CREATE_IMPORTED_TPL_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) + endif() +ENDFUNCTION() + +MACRO(KOKKOS_SUBPACKAGE NAME) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_SUBPACKAGE(${NAME}) + else() + SET(PACKAGE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + SET(PARENT_PACKAGE_NAME ${PACKAGE_NAME}) + SET(PACKAGE_NAME ${PACKAGE_NAME}${NAME}) + STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) + SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + #ADD_INTERFACE_LIBRARY(PACKAGE_${PACKAGE_NAME}) + #GLOBAL_SET(${PACKAGE_NAME}_LIBS "") + endif() +ENDMACRO(KOKKOS_SUBPACKAGE) + +MACRO(KOKKOS_SUBPACKAGE_POSTPROCESS) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_SUBPACKAGE_POSTPROCESS() + endif() +ENDMACRO(KOKKOS_SUBPACKAGE_POSTPROCESS) + +MACRO(KOKKOS_PACKAGE_DECL) + + if (KOKKOS_HAS_TRILINOS) + TRIBITS_PACKAGE_DECL(Kokkos) + else() + SET(PACKAGE_NAME Kokkos) + SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) + endif() + + #SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps") + #FILE(GLOB TPLS_FILES "${TRIBITS_DEPS_DIR}/*.cmake") + #FOREACH(TPL_FILE ${TPLS_FILES}) + # TRIBITS_PROCESS_TPL_DEP_FILE(${TPL_FILE}) + #ENDFOREACH() + +ENDMACRO() + + +MACRO(KOKKOS_PROCESS_SUBPACKAGES) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_PROCESS_SUBPACKAGES() + else() + ADD_SUBDIRECTORY(core) + ADD_SUBDIRECTORY(containers) + ADD_SUBDIRECTORY(algorithms) + endif() +ENDMACRO(KOKKOS_PROCESS_SUBPACKAGES) + +MACRO(KOKKOS_PACKAGE_DEF) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_PACKAGE_DEF() + else() + #do nothing + endif() +ENDMACRO(KOKKOS_PACKAGE_DEF) + +MACRO(KOKKOS_TARGET_COMPILE_OPTIONS TARGET) +if(KOKKOS_HAS_TRILINOS) +COMPILE_OPTIONS(${ARGN}) +else() +TARGET_COMPILE_OPTIONS(${TARGET} ${ARGN}) +endif() +ENDMACRO() + +MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT) + if (NOT KOKKOS_HAS_TRILINOS) + set(KOKKOS_ENABLE_EXAMPLES OFF CACHE BOOL "Whether to build examples") + set(KOKKOS_ENABLE_TESTS OFF CACHE BOOL "Whether to build tests") + + + #------------ COMPILER AND FEATURE CHECKS ------------------------------------ + include(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake) + set_kokkos_cxx_compiler() + set_kokkos_cxx_standard() + + #------------ GET OPTIONS AND KOKKOS_SETTINGS -------------------------------- + # Add Kokkos' modules to CMake's module path. + set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/") + + set(KOKKOS_CMAKE_VERBOSE True) + include(${KOKKOS_SRC_PATH}/cmake/kokkos_options.cmake) + include(${KOKKOS_SRC_PATH}/cmake/kokkos_settings.cmake) + + #------------ GENERATE HEADER AND SOURCE FILES ------------------------------- + execute_process( + COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} PREFIX=${CMAKE_INSTALL_PREFIX} generate_build_settings + WORKING_DIRECTORY "${Kokkos_BINARY_DIR}" + OUTPUT_FILE ${Kokkos_BINARY_DIR}/core_src_make.out + RESULT_VARIABLE GEN_SETTINGS_RESULT + ) + if (GEN_SETTINGS_RESULT) + message(FATAL_ERROR "Kokkos settings generation failed:\n" + "${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} generate_build_settings") + endif() + include(${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake) + install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION lib/cmake/Kokkos) + install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION lib/cmake) + install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION ${CMAKE_INSTALL_PREFIX}) + string(REPLACE " " ";" KOKKOS_TPL_INCLUDE_DIRS "${KOKKOS_GMAKE_TPL_INCLUDE_DIRS}") + string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_DIRS "${KOKKOS_GMAKE_TPL_LIBRARY_DIRS}") + string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_NAMES "${KOKKOS_GMAKE_TPL_LIBRARY_NAMES}") + list(REMOVE_ITEM KOKKOS_TPL_INCLUDE_DIRS "") + list(REMOVE_ITEM KOKKOS_TPL_LIBRARY_DIRS "") + list(REMOVE_ITEM KOKKOS_TPL_LIBRARY_NAMES "") + set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC}) + endif() +ENDMACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT) + +MACRO(KOKKOS_EXCLUDE_AUTOTOOLS_FILES) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_EXCLUDE_AUTOTOOLS_FILES() + else() + #do nothing + endif() +ENDMACRO(KOKKOS_EXCLUDE_AUTOTOOLS_FILES) + +MACRO(KOKKOS_ADD_TEST_EXECUTABLE EXE_NAME) + CMAKE_PARSE_ARGUMENTS(PARSE + "" + "" + "SOURCES" + ${ARGN}) + KOKKOS_ADD_EXECUTABLE(${PACKAGE_NAME}_${EXE_NAME} + SOURCES ${PARSE_SOURCES} + TESTONLYLIBS kokkos_gtest ${KOKKOS_TEST_LINK_TARGETS} + ${PARSE_UNPARSED_ARGUMENTS} + ) + ADD_DEPENDENCIES(check ${PACKAGE_NAME}_${EXE_NAME}) +ENDMACRO(KOKKOS_ADD_TEST_EXECUTABLE) + +MACRO(KOKKOS_ADD_PERFORMANCE_TEST NAME) +KOKKOS_ADD_TEST(${NAME} + CATEGORIES PERFORMANCE + ${ARGN} +) +ENDMACRO() + + +MACRO(KOKKOS_PACKAGE_POSTPROCESS) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_PACKAGE_POSTPROCESS() + endif() +ENDMACRO(KOKKOS_PACKAGE_POSTPROCESS) + +FUNCTION(KOKKOS_LIB_TYPE LIB RET) +GET_TARGET_PROPERTY(PROP ${LIB} TYPE) +IF (${PROP} STREQUAL "INTERFACE_LIBRARY") + SET(${RET} "INTERFACE" PARENT_SCOPE) +ELSE() + SET(${RET} "PUBLIC" PARENT_SCOPE) +ENDIF() +ENDFUNCTION(KOKKOS_LIB_TYPE) + + +FUNCTION(KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES TARGET) +IF(KOKKOS_HAS_TRILINOS) + #ignore the target, tribits doesn't do anything directly with targets + TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) + TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +ELSEIF(KOKKOS_SEPARATE_LIBS) + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} + $) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} + $) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} + $) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} + $) +ELSE() #append to a list for later + SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}) + SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}) +ENDIF() +ENDFUNCTION(KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES) + + +FUNCTION(KOKKOS_TARGET_INCLUDE_DIRECTORIES TARGET) +IF(KOKKOS_HAS_TRILINOS) + TRIBITS_INCLUDE_DIRECTORIES(${TARGET} ${ARGN}) +ELSEIF(TARGET ${TARGET}) + #the target actually exists - this means we are doing separate libs + #or this a test library + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} ${ARGN}) +ELSE() + GET_PROPERTY(LIBS GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + IF (${TARGET} IN_LIST LIBS) + SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_LIBRARY_INCLUDES ${ARGN}) + ELSE() + MESSAGE(FATAL_ERROR "Trying to set include directories on unknown target ${TARGET}") + ENDIF() +ENDIF() +ENDFUNCTION(KOKKOS_TARGET_INCLUDE_DIRECTORIES TARGET) + +FUNCTION(KOKKOS_LINK_INTERNAL_LIBRARY TARGET DEPLIB) +IF(KOKKOS_HAS_TRILINOS) + #do nothing +ELSEIF(KOKKOS_SEPARATE_LIBS) + SET(options INTERFACE) + SET(oneValueArgs) + SET(multiValueArgs) + CMAKE_PARSE_ARGUMENTS(PARSE + "INTERFACE" + "" + "" + ${ARGN}) + SET(LINK_TYPE) + IF(PARSE_INTERFACE) + SET(LINK_TYPE INTERFACE) + ELSE() + SET(LINK_TYPE PUBLIC) + ENDIF() + TARGET_LINK_LIBRARIES(${TARGET} ${LINK_TYPE} ${DEPLIB}) + VERIFY_EMPTY(KOKKOS_LINK_INTERNAL_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) + ELSE() + #only a single lib - so nothing to do here +ENDIF() +ENDFUNCTION(KOKKOS_LINK_INTERNAL_LIBRARY) + +FUNCTION(KOKKOS_ADD_INTERFACE_LIBRARY NAME) +IF (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_LIBRARY(${NAME} ${ARGN}) +ELSE() + ADD_LIBRARY(${NAME} INTERFACE) + #I hate that property append doesn't have a remove duplicates + GET_PROPERTY(LIBLIST GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + LIST(APPEND LIBLIST ${NAME}) + LIST(REMOVE_DUPLICATES LIBLIST) + SET_PROPERTY(GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES ${LIBLIST}) + GET_PROPERTY(LIBLIST GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) +ENDIF() +ENDFUNCTION(KOKKOS_ADD_INTERFACE_LIBRARY) + +FUNCTION(KOKKOS_ADD_TEST_LIBRARY NAME) +IF (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_LIBRARY(${NAME} ${ARGN}) +ELSE() + SET(oneValueArgs) + SET(multiValueArgs HEADERS SOURCES) + + CMAKE_PARSE_ARGUMENTS(PARSE + "STATIC;SHARED" + "" + "HEADERS;SOURCES" + ${ARGN}) + + IF(PARSE_HEADERS) + LIST(REMOVE_DUPLICATES PARSE_HEADERS) + ENDIF() + IF(PARSE_SOURCES) + LIST(REMOVE_DUPLICATES PARSE_SOURCES) + ENDIF() + ADD_LIBRARY(${NAME} ${PARSE_SOURCES}) + target_compile_options( + ${NAME} + PUBLIC $<$:${KOKKOS_CXX_FLAGS}> + ) + target_link_libraries( + ${NAME} + PUBLIC ${KOKKOS_LD_FLAGS} + ) +ENDIF() +ENDFUNCTION(KOKKOS_ADD_TEST_LIBRARY) + + +FUNCTION(KOKKOS_TARGET_COMPILE_DEFINITIONS TARGET VISIBILITY) +IF(KOKKOS_HAS_TRILINOS) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGTEST_HAS_PTHREAD=0") +ELSEIF(TARGET ${TARGET}) + #the target actually exists - this means we are doing separate libs + #or this a test library + TARGET_COMPILE_DEFINITIONS(${TARGET} ${VISIBILITY} ${ARGN}) +ELSE() + GET_PROPERTY(LIBS GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + IF (${TARGET} IN_LIST LIBS) + SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_LIBRARY_DEFINITIONS ${ARGN}) + ELSE() + MESSAGE(FATAL_ERROR "Trying to set compile definitions on unknown target ${TARGET}") + ENDIF() +ENDIF() +ENDFUNCTION(KOKKOS_TARGET_COMPILE_DEFINITIONS) + +FUNCTION(KOKKOS_INCLUDE_DIRECTORIES) +IF(KOKKOS_HAS_TRILINOS) + TRIBITS_INCLUDE_DIRECTORIES(${ARGN}) +ELSE() + CMAKE_PARSE_ARGUMENTS( + INC + "REQUIRED_DURING_INSTALLATION_TESTING" + "" + "" + ${ARGN} + ) + INCLUDE_DIRECTORIES(${INC_UNPARSED_ARGUMENTS}) +ENDIF() +ENDFUNCTION(KOKKOS_INCLUDE_DIRECTORIES) + +MACRO(KOKKOS_MAKE_LIBKOKKOS) +GET_PROPERTY(SRCS GLOBAL PROPERTY KOKKOS_ALL_SOURCES) +GET_PROPERTY(INCS GLOBAL PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES) +GET_PROPERTY(DEFS GLOBAL PROPERTY KOKKOS_COMPILE_DEFINITIONS) +KOKKOS_INTERNAL_ADD_LIBRARY(kokkos SOURCES ${SRCS}) +FOREACH(INC ${INCS}) + TARGET_INCLUDE_DIRECTORIES(kokkos PUBLIC $) +ENDFOREACH() +TARGET_INCLUDE_DIRECTORIES(kokkos PUBLIC $) +TARGET_INCLUDE_DIRECTORIES(kokkos PUBLIC $) +FOREACH(DEF ${DEFS}) + TARGET_COMPILE_DEFINITIONS(kokkos PUBLIC ${DEF}) +ENDFOREACH() +ENDMACRO() + +MACRO(KOKKOS_ADD_COMPILE_OPTIONS) +ADD_COMPILE_OPTIONS(${ARGN}) +ENDMACRO() + diff --git a/cmake/tpls/FindTPLHWLOC.cmake b/cmake/tpls/FindTPLHWLOC.cmake index 715b3e9bde5..a4c55e1d7b6 100644 --- a/cmake/tpls/FindTPLHWLOC.cmake +++ b/cmake/tpls/FindTPLHWLOC.cmake @@ -64,7 +64,7 @@ # Version: 1.3 # -TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( HWLOC +KOKKOS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( HWLOC REQUIRED_HEADERS hwloc.h REQUIRED_LIBS_NAMES "hwloc" ) diff --git a/cmake/tpls/FindTPLPthread.cmake b/cmake/tpls/FindTPLPthread.cmake index fc401d75433..4dc1a87e186 100644 --- a/cmake/tpls/FindTPLPthread.cmake +++ b/cmake/tpls/FindTPLPthread.cmake @@ -75,7 +75,7 @@ IF(USE_THREADS) SET(TPL_Pthread_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}") SET(TPL_Pthread_LIBRARY_DIRS "") ELSE() - TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( Pthread + KOKKOS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( Pthread REQUIRED_HEADERS pthread.h REQUIRED_LIBS_NAMES pthread ) diff --git a/cmake/tpls/FindTPLQTHREADS.cmake b/cmake/tpls/FindTPLQTHREADS.cmake index c312f2590bc..b3a36fb7c34 100644 --- a/cmake/tpls/FindTPLQTHREADS.cmake +++ b/cmake/tpls/FindTPLQTHREADS.cmake @@ -63,7 +63,7 @@ # Source: https://code.google.com/p/qthreads # -TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREADS +KOKKOS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREADS REQUIRED_HEADERS qthread.h REQUIRED_LIBS_NAMES "qthread" ) diff --git a/cmake/tribits.cmake b/cmake/tribits.cmake deleted file mode 100644 index 1f467f0662e..00000000000 --- a/cmake/tribits.cmake +++ /dev/null @@ -1,531 +0,0 @@ -INCLUDE(CMakeParseArguments) -INCLUDE(CTest) - -cmake_policy(SET CMP0054 NEW) - -MESSAGE(STATUS "The project name is: ${PROJECT_NAME}") - -IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_OpenMP) - SET(${PROJECT_NAME}_ENABLE_OpenMP OFF) -ENDIF() - -IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_HPX) - SET(${PROJECT_NAME}_ENABLE_HPX OFF) -ENDIF() - -IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_DEBUG) - SET(${PROJECT_NAME}_ENABLE_DEBUG OFF) -ENDIF() - -IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_CXX11) - SET(${PROJECT_NAME}_ENABLE_CXX11 ON) -ENDIF() - -IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_TESTS) - SET(${PROJECT_NAME}_ENABLE_TESTS OFF) -ENDIF() - -IF(NOT DEFINED TPL_ENABLE_Pthread) - SET(TPL_ENABLE_Pthread OFF) -ENDIF() - -FUNCTION(ASSERT_DEFINED VARS) - FOREACH(VAR ${VARS}) - IF(NOT DEFINED ${VAR}) - MESSAGE(SEND_ERROR "Error, the variable ${VAR} is not defined!") - ENDIF() - ENDFOREACH() -ENDFUNCTION() - -MACRO(GLOBAL_SET VARNAME) - SET(${VARNAME} ${ARGN} CACHE INTERNAL "") -ENDMACRO() - -MACRO(PREPEND_GLOBAL_SET VARNAME) - ASSERT_DEFINED(${VARNAME}) - GLOBAL_SET(${VARNAME} ${ARGN} ${${VARNAME}}) -ENDMACRO() - -#FUNCTION(REMOVE_GLOBAL_DUPLICATES VARNAME) -# ASSERT_DEFINED(${VARNAME}) -# IF (${VARNAME}) -# SET(TMP ${${VARNAME}}) -# LIST(REMOVE_DUPLICATES TMP) -# GLOBAL_SET(${VARNAME} ${TMP}) -# ENDIF() -#ENDFUNCTION() - -#MACRO(TRIBITS_ADD_OPTION_AND_DEFINE USER_OPTION_NAME MACRO_DEFINE_NAME DOCSTRING DEFAULT_VALUE) -# MESSAGE(STATUS "TRIBITS_ADD_OPTION_AND_DEFINE: '${USER_OPTION_NAME}' '${MACRO_DEFINE_NAME}' '${DEFAULT_VALUE}'") -# SET( ${USER_OPTION_NAME} "${DEFAULT_VALUE}" CACHE BOOL "${DOCSTRING}" ) -# IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "") -# IF(${USER_OPTION_NAME}) -# GLOBAL_SET(${MACRO_DEFINE_NAME} ON) -# ELSE() -# GLOBAL_SET(${MACRO_DEFINE_NAME} OFF) -# ENDIF() -# ENDIF() -#ENDMACRO() - -FUNCTION(TRIBITS_CONFIGURE_FILE PACKAGE_NAME_CONFIG_FILE) - - # Configure the file - CONFIGURE_FILE( - ${PACKAGE_SOURCE_DIR}/cmake/${PACKAGE_NAME_CONFIG_FILE}.in - ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME_CONFIG_FILE} - ) - -ENDFUNCTION() - -#MACRO(TRIBITS_ADD_DEBUG_OPTION) -# TRIBITS_ADD_OPTION_AND_DEFINE( -# ${PROJECT_NAME}_ENABLE_DEBUG -# HAVE_${PROJECT_NAME_UC}_DEBUG -# "Enable a host of runtime debug checking." -# OFF -# ) -#ENDMACRO() - - -MACRO(TRIBITS_ADD_TEST_DIRECTORIES) - IF(${${PROJECT_NAME}_ENABLE_TESTS}) - FOREACH(TEST_DIR ${ARGN}) - ADD_SUBDIRECTORY(${TEST_DIR}) - ENDFOREACH() - ENDIF() -ENDMACRO() - -MACRO(TRIBITS_ADD_EXAMPLE_DIRECTORIES) - IF(${PACKAGE_NAME}_ENABLE_EXAMPLES OR ${PARENT_PACKAGE_NAME}_ENABLE_EXAMPLES) - FOREACH(EXAMPLE_DIR ${ARGN}) - ADD_SUBDIRECTORY(${EXAMPLE_DIR}) - ENDFOREACH() - ENDIF() -ENDMACRO() - - -function(INCLUDE_DIRECTORIES) - cmake_parse_arguments(INCLUDE_DIRECTORIES "REQUIRED_DURING_INSTALLATION_TESTING" "" "" ${ARGN}) - _INCLUDE_DIRECTORIES(${INCLUDE_DIRECTORIES_UNPARSED_ARGUMENTS}) -endfunction() - - -MACRO(TARGET_TRANSFER_PROPERTY TARGET_NAME PROP_IN PROP_OUT) - SET(PROP_VALUES) - FOREACH(TARGET_X ${ARGN}) - LIST(APPEND PROP_VALUES "$") - ENDFOREACH() - SET_TARGET_PROPERTIES(${TARGET_NAME} PROPERTIES ${PROP_OUT} "${PROP_VALUES}") -ENDMACRO() - -MACRO(ADD_INTERFACE_LIBRARY LIB_NAME) - FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp "") - ADD_LIBRARY(${LIB_NAME} STATIC ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp) - SET_TARGET_PROPERTIES(${LIB_NAME} PROPERTIES INTERFACE TRUE) -ENDMACRO() - -# Older versions of cmake does not make include directories transitive -MACRO(TARGET_LINK_AND_INCLUDE_LIBRARIES TARGET_NAME) - TARGET_LINK_LIBRARIES(${TARGET_NAME} LINK_PUBLIC ${ARGN}) - FOREACH(DEP_LIB ${ARGN}) - TARGET_INCLUDE_DIRECTORIES(${TARGET_NAME} PUBLIC $) - TARGET_INCLUDE_DIRECTORIES(${TARGET_NAME} PUBLIC $) - ENDFOREACH() -ENDMACRO() - -FUNCTION(TRIBITS_ADD_LIBRARY LIBRARY_NAME) - - SET(options STATIC SHARED TESTONLY NO_INSTALL_LIB_OR_HEADERS CUDALIBRARY) - SET(oneValueArgs) - SET(multiValueArgs HEADERS HEADERS_INSTALL_SUBDIR NOINSTALLHEADERS SOURCES DEPLIBS IMPORTEDLIBS DEFINES ADDED_LIB_TARGET_NAME_OUT) - - CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - IF(PARSE_HEADERS) - LIST(REMOVE_DUPLICATES PARSE_HEADERS) - ENDIF() - IF(PARSE_SOURCES) - LIST(REMOVE_DUPLICATES PARSE_SOURCES) - ENDIF() - - # Local variable to hold all of the libraries that will be directly linked - # to this library. - SET(LINK_LIBS ${${PACKAGE_NAME}_DEPS}) - - # Add dependent libraries passed directly in - - IF (PARSE_IMPORTEDLIBS) - LIST(APPEND LINK_LIBS ${PARSE_IMPORTEDLIBS}) - ENDIF() - - IF (PARSE_DEPLIBS) - LIST(APPEND LINK_LIBS ${PARSE_DEPLIBS}) - ENDIF() - - # Add the library and all the dependencies - - IF (PARSE_DEFINES) - ADD_DEFINITIONS(${PARSE_DEFINES}) - ENDIF() - - IF (PARSE_STATIC) - SET(STATIC_KEYWORD "STATIC") - ELSE() - SET(STATIC_KEYWORD) - ENDIF() - - IF (PARSE_SHARED) - SET(SHARED_KEYWORD "SHARED") - ELSE() - SET(SHARED_KEYWORD) - ENDIF() - - IF (PARSE_TESTONLY) - SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL") - ELSE() - SET(EXCLUDE_FROM_ALL_KEYWORD) - ENDIF() - IF (NOT PARSE_CUDALIBRARY) - ADD_LIBRARY( - ${LIBRARY_NAME} - ${STATIC_KEYWORD} - ${SHARED_KEYWORD} - ${EXCLUDE_FROM_ALL_KEYWORD} - ${PARSE_HEADERS} - ${PARSE_NOINSTALLHEADERS} - ${PARSE_SOURCES} - ) - ELSE() - CUDA_ADD_LIBRARY( - ${LIBRARY_NAME} - ${PARSE_HEADERS} - ${PARSE_NOINSTALLHEADERS} - ${PARSE_SOURCES} - ) - ENDIF() - - TARGET_LINK_AND_INCLUDE_LIBRARIES(${LIBRARY_NAME} ${LINK_LIBS}) - - IF (NOT PARSE_TESTONLY OR PARSE_NO_INSTALL_LIB_OR_HEADERS) - - INSTALL( - TARGETS ${LIBRARY_NAME} - EXPORT ${PROJECT_NAME} - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib - COMPONENT ${PACKAGE_NAME} - ) - - INSTALL( - FILES ${PARSE_HEADERS} - EXPORT ${PROJECT_NAME} - DESTINATION include - COMPONENT ${PACKAGE_NAME} - ) - - INSTALL( - DIRECTORY ${PARSE_HEADERS_INSTALL_SUBDIR} - EXPORT ${PROJECT_NAME} - DESTINATION include - COMPONENT ${PACKAGE_NAME} - ) - - ENDIF() - - IF (NOT PARSE_TESTONLY) - PREPEND_GLOBAL_SET(${PACKAGE_NAME}_LIBS ${LIBRARY_NAME}) - REMOVE_GLOBAL_DUPLICATES(${PACKAGE_NAME}_LIBS) - ENDIF() - -ENDFUNCTION() - -FUNCTION(TRIBITS_ADD_EXECUTABLE EXE_NAME) - - SET(options NOEXEPREFIX NOEXESUFFIX ADD_DIR_TO_NAME INSTALLABLE TESTONLY) - SET(oneValueArgs ADDED_EXE_TARGET_NAME_OUT) - SET(multiValueArgs SOURCES CATEGORIES HOST XHOST HOSTTYPE XHOSTTYPE DIRECTORY TESTONLYLIBS IMPORTEDLIBS DEPLIBS COMM LINKER_LANGUAGE TARGET_DEFINES DEFINES) - - CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - IF (PARSE_TARGET_DEFINES) - TARGET_COMPILE_DEFINITIONS(${EXE_NAME} PUBLIC ${PARSE_TARGET_DEFINES}) - ENDIF() - - SET(LINK_LIBS PACKAGE_${PACKAGE_NAME}) - - IF (PARSE_TESTONLYLIBS) - LIST(APPEND LINK_LIBS ${PARSE_TESTONLYLIBS}) - ENDIF() - - IF (PARSE_IMPORTEDLIBS) - LIST(APPEND LINK_LIBS ${PARSE_IMPORTEDLIBS}) - ENDIF() - - SET (EXE_SOURCES) - IF(PARSE_DIRECTORY) - FOREACH( SOURCE_FILE ${PARSE_SOURCES} ) - IF(IS_ABSOLUTE ${SOURCE_FILE}) - SET (EXE_SOURCES ${EXE_SOURCES} ${SOURCE_FILE}) - ELSE() - SET (EXE_SOURCES ${EXE_SOURCES} ${PARSE_DIRECTORY}/${SOURCE_FILE}) - ENDIF() - ENDFOREACH( ) - ELSE() - FOREACH( SOURCE_FILE ${PARSE_SOURCES} ) - SET (EXE_SOURCES ${EXE_SOURCES} ${SOURCE_FILE}) - ENDFOREACH( ) - ENDIF() - - SET(EXE_BINARY_NAME ${EXE_NAME}) - IF(DEFINED PACKAGE_NAME AND NOT PARSE_NOEXEPREFIX) - SET(EXE_BINARY_NAME ${PACKAGE_NAME}_${EXE_BINARY_NAME}) - ENDIF() - - # IF (PARSE_TESTONLY) - # SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL") - # ELSE() - # SET(EXCLUDE_FROM_ALL_KEYWORD) - # ENDIF() - ADD_EXECUTABLE(${EXE_BINARY_NAME} ${EXCLUDE_FROM_ALL_KEYWORD} ${EXE_SOURCES}) - - TARGET_LINK_AND_INCLUDE_LIBRARIES(${EXE_BINARY_NAME} ${LINK_LIBS}) - - IF(PARSE_ADDED_EXE_TARGET_NAME_OUT) - SET(${PARSE_ADDED_EXE_TARGET_NAME_OUT} ${EXE_BINARY_NAME} PARENT_SCOPE) - ENDIF() - - IF(PARSE_INSTALLABLE) - INSTALL( - TARGETS ${EXE_BINARY_NAME} - EXPORT ${PROJECT_NAME} - DESTINATION bin - ) - ENDIF() -ENDFUNCTION() - -IF(NOT TARGET check) - ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR}) -ENDIF() - -FUNCTION(TRIBITS_ADD_TEST) -ENDFUNCTION() -FUNCTION(TRIBITS_TPL_TENTATIVELY_ENABLE) -ENDFUNCTION() - -FUNCTION(TRIBITS_ADD_ADVANCED_TEST) - # TODO Write this -ENDFUNCTION() - -FUNCTION(TRIBITS_ADD_EXECUTABLE_AND_TEST EXE_NAME) - - SET(options STANDARD_PASS_OUTPUT WILL_FAIL) - SET(oneValueArgs PASS_REGULAR_EXPRESSION FAIL_REGULAR_EXPRESSION ENVIRONMENT TIMEOUT CATEGORIES ADDED_TESTS_NAMES_OUT ADDED_EXE_TARGET_NAME_OUT) - SET(multiValueArgs) - - CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - TRIBITS_ADD_EXECUTABLE(${EXE_NAME} TESTONLY ADDED_EXE_TARGET_NAME_OUT TEST_NAME ${PARSE_UNPARSED_ARGUMENTS}) - - IF(WIN32) - ADD_TEST(NAME ${TEST_NAME} WORKING_DIRECTORY ${LIBRARY_OUTPUT_PATH} COMMAND ${TEST_NAME}${CMAKE_EXECUTABLE_SUFFIX}) - ELSE() - ADD_TEST(NAME ${TEST_NAME} COMMAND ${TEST_NAME}) - ENDIF() - ADD_DEPENDENCIES(check ${TEST_NAME}) - - IF(PARSE_FAIL_REGULAR_EXPRESSION) - SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES FAIL_REGULAR_EXPRESSION ${PARSE_FAIL_REGULAR_EXPRESSION}) - ENDIF() - - IF(PARSE_PASS_REGULAR_EXPRESSION) - SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES PASS_REGULAR_EXPRESSION ${PARSE_PASS_REGULAR_EXPRESSION}) - ENDIF() - - IF(PARSE_WILL_FAIL) - SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES WILL_FAIL ${PARSE_WILL_FAIL}) - ENDIF() - - IF(PARSE_ADDED_TESTS_NAMES_OUT) - SET(${PARSE_ADDED_TESTS_NAMES_OUT} ${TEST_NAME} PARENT_SCOPE) - ENDIF() - - IF(PARSE_ADDED_EXE_TARGET_NAME_OUT) - SET(${PARSE_ADDED_EXE_TARGET_NAME_OUT} ${TEST_NAME} PARENT_SCOPE) - ENDIF() - -ENDFUNCTION() - -MACRO(TIBITS_CREATE_IMPORTED_TPL_LIBRARY TPL_NAME) - ADD_INTERFACE_LIBRARY(TPL_LIB_${TPL_NAME}) - TARGET_LINK_LIBRARIES(TPL_LIB_${TPL_NAME} LINK_PUBLIC ${TPL_${TPL_NAME}_LIBRARIES}) - TARGET_INCLUDE_DIRECTORIES(TPL_LIB_${TPL_NAME} INTERFACE ${TPL_${TPL_NAME}_INCLUDE_DIRS}) -ENDMACRO() - -FUNCTION(TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES TPL_NAME) - - SET(options MUST_FIND_ALL_LIBS MUST_FIND_ALL_HEADERS NO_PRINT_ENABLE_SUCCESS_FAIL) - SET(oneValueArgs) - SET(multiValueArgs REQUIRED_HEADERS REQUIRED_LIBS_NAMES) - - CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - SET(_${TPL_NAME}_ENABLE_SUCCESS TRUE) - IF (PARSE_REQUIRED_LIBS_NAMES) - FIND_LIBRARY(TPL_${TPL_NAME}_LIBRARIES NAMES ${PARSE_REQUIRED_LIBS_NAMES}) - IF(NOT TPL_${TPL_NAME}_LIBRARIES) - SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE) - ENDIF() - ENDIF() - IF (PARSE_REQUIRED_HEADERS) - FIND_PATH(TPL_${TPL_NAME}_INCLUDE_DIRS NAMES ${PARSE_REQUIRED_HEADERS}) - IF(NOT TPL_${TPL_NAME}_INCLUDE_DIRS) - SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE) - ENDIF() - ENDIF() - - - IF (_${TPL_NAME}_ENABLE_SUCCESS) - TIBITS_CREATE_IMPORTED_TPL_LIBRARY(${TPL_NAME}) - ENDIF() - -ENDFUNCTION() - -#MACRO(TRIBITS_PROCESS_TPL_DEP_FILE TPL_FILE) -# GET_FILENAME_COMPONENT(TPL_NAME ${TPL_FILE} NAME_WE) -# INCLUDE("${TPL_FILE}") -# IF(TARGET TPL_LIB_${TPL_NAME}) -# MESSAGE(STATUS "Found tpl library: ${TPL_NAME}") -# SET(TPL_ENABLE_${TPL_NAME} TRUE) -# ELSE() -# MESSAGE(STATUS "Tpl library not found: ${TPL_NAME}") -# SET(TPL_ENABLE_${TPL_NAME} FALSE) -# ENDIF() -#ENDMACRO() - -MACRO(PREPEND_TARGET_SET VARNAME TARGET_NAME TYPE) - IF(TYPE STREQUAL "REQUIRED") - SET(REQUIRED TRUE) - ELSE() - SET(REQUIRED FALSE) - ENDIF() - IF(TARGET ${TARGET_NAME}) - PREPEND_GLOBAL_SET(${VARNAME} ${TARGET_NAME}) - ELSE() - IF(REQUIRED) - MESSAGE(FATAL_ERROR "Missing dependency ${TARGET_NAME}") - ENDIF() - ENDIF() -ENDMACRO() - -MACRO(TRIBITS_APPEND_PACKAGE_DEPS DEP_LIST TYPE) - FOREACH(DEP ${ARGN}) - PREPEND_GLOBAL_SET(${DEP_LIST} PACKAGE_${DEP}) - ENDFOREACH() -ENDMACRO() - -MACRO(TRIBITS_APPEND_TPLS_DEPS DEP_LIST TYPE) - FOREACH(DEP ${ARGN}) - PREPEND_TARGET_SET(${DEP_LIST} TPL_LIB_${DEP} ${TYPE}) - ENDFOREACH() -ENDMACRO() - -MACRO(TRIBITS_ENABLE_TPLS) - FOREACH(TPL ${ARGN}) - IF(TARGET ${TPL}) - GLOBAL_SET(${PACKAGE_NAME}_ENABLE_${TPL} TRUE) - ELSE() - GLOBAL_SET(${PACKAGE_NAME}_ENABLE_${TPL} FALSE) - ENDIF() - ENDFOREACH() -ENDMACRO() - -MACRO(TRIBITS_PACKAGE_DEFINE_DEPENDENCIES) - - SET(options) - SET(oneValueArgs) - SET(multiValueArgs - LIB_REQUIRED_PACKAGES - LIB_OPTIONAL_PACKAGES - TEST_REQUIRED_PACKAGES - TEST_OPTIONAL_PACKAGES - LIB_REQUIRED_TPLS - LIB_OPTIONAL_TPLS - TEST_REQUIRED_TPLS - TEST_OPTIONAL_TPLS - REGRESSION_EMAIL_LIST - SUBPACKAGES_DIRS_CLASSIFICATIONS_OPTREQS - ) - CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - GLOBAL_SET(${PACKAGE_NAME}_DEPS "") - TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_DEPS REQUIRED ${PARSE_LIB_REQUIRED_PACKAGES}) - TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_DEPS OPTIONAL ${PARSE_LIB_OPTIONAL_PACKAGES}) - TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_DEPS REQUIRED ${PARSE_LIB_REQUIRED_TPLS}) - TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_DEPS OPTIONAL ${PARSE_LIB_OPTIONAL_TPLS}) - - GLOBAL_SET(${PACKAGE_NAME}_TEST_DEPS "") - TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_TEST_DEPS REQUIRED ${PARSE_TEST_REQUIRED_PACKAGES}) - TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_TEST_DEPS OPTIONAL ${PARSE_TEST_OPTIONAL_PACKAGES}) - TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_TEST_DEPS REQUIRED ${PARSE_TEST_REQUIRED_TPLS}) - TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_TEST_DEPS OPTIONAL ${PARSE_TEST_OPTIONAL_TPLS}) - - TRIBITS_ENABLE_TPLS(${PARSE_LIB_REQUIRED_TPLS} ${PARSE_LIB_OPTIONAL_TPLS} ${PARSE_TEST_REQUIRED_TPLS} ${PARSE_TEST_OPTIONAL_TPLS}) - -ENDMACRO() - -MACRO(TRIBITS_SUBPACKAGE NAME) - SET(PACKAGE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) - SET(PARENT_PACKAGE_NAME ${PACKAGE_NAME}) - SET(PACKAGE_NAME ${PACKAGE_NAME}${NAME}) - STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) - SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) - - ADD_INTERFACE_LIBRARY(PACKAGE_${PACKAGE_NAME}) - - GLOBAL_SET(${PACKAGE_NAME}_LIBS "") - - INCLUDE(${PACKAGE_SOURCE_DIR}/cmake/Dependencies.cmake) - -ENDMACRO(TRIBITS_SUBPACKAGE) - -MACRO(TRIBITS_SUBPACKAGE_POSTPROCESS) - TARGET_LINK_AND_INCLUDE_LIBRARIES(PACKAGE_${PACKAGE_NAME} ${${PACKAGE_NAME}_LIBS}) -ENDMACRO(TRIBITS_SUBPACKAGE_POSTPROCESS) - -MACRO(TRIBITS_PACKAGE_DECL NAME) - - SET(PACKAGE_NAME ${NAME}) - SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) - STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) - - #SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps") - #FILE(GLOB TPLS_FILES "${TRIBITS_DEPS_DIR}/*.cmake") - #FOREACH(TPL_FILE ${TPLS_FILES}) - # TRIBITS_PROCESS_TPL_DEP_FILE(${TPL_FILE}) - #ENDFOREACH() - -ENDMACRO() - - -MACRO(TRIBITS_PROCESS_SUBPACKAGES) - FILE(GLOB SUBPACKAGES RELATIVE ${CMAKE_SOURCE_DIR} */cmake/Dependencies.cmake) - FOREACH(SUBPACKAGE ${SUBPACKAGES}) - GET_FILENAME_COMPONENT(SUBPACKAGE_CMAKE ${SUBPACKAGE} DIRECTORY) - GET_FILENAME_COMPONENT(SUBPACKAGE_DIR ${SUBPACKAGE_CMAKE} DIRECTORY) - ADD_SUBDIRECTORY(${CMAKE_BINARY_DIR}/../${SUBPACKAGE_DIR}) - ENDFOREACH() -ENDMACRO(TRIBITS_PROCESS_SUBPACKAGES) - -MACRO(TRIBITS_PACKAGE_DEF) -ENDMACRO(TRIBITS_PACKAGE_DEF) - -MACRO(TRIBITS_EXCLUDE_AUTOTOOLS_FILES) -ENDMACRO(TRIBITS_EXCLUDE_AUTOTOOLS_FILES) - -MACRO(TRIBITS_EXCLUDE_FILES) -ENDMACRO(TRIBITS_EXCLUDE_FILES) - -MACRO(TRIBITS_PACKAGE_POSTPROCESS) -ENDMACRO(TRIBITS_PACKAGE_POSTPROCESS) - diff --git a/containers/CMakeLists.txt b/containers/CMakeLists.txt index c37aa3e3e21..6b69fe62e65 100644 --- a/containers/CMakeLists.txt +++ b/containers/CMakeLists.txt @@ -1,13 +1,10 @@ -TRIBITS_SUBPACKAGE(Containers) +KOKKOS_SUBPACKAGE(Containers) +ADD_SUBDIRECTORY(src) -IF(KOKKOS_HAS_TRILINOS) - ADD_SUBDIRECTORY(src) -ENDIF() +KOKKOS_ADD_TEST_DIRECTORIES(unit_tests) +KOKKOS_ADD_TEST_DIRECTORIES(performance_tests) -TRIBITS_ADD_TEST_DIRECTORIES(unit_tests) -TRIBITS_ADD_TEST_DIRECTORIES(performance_tests) - -TRIBITS_SUBPACKAGE_POSTPROCESS() +KOKKOS_SUBPACKAGE_POSTPROCESS() diff --git a/containers/performance_tests/CMakeLists.txt b/containers/performance_tests/CMakeLists.txt index 3c6584bc346..3f25891eb96 100644 --- a/containers/performance_tests/CMakeLists.txt +++ b/containers/performance_tests/CMakeLists.txt @@ -3,14 +3,6 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) -IF(NOT KOKKOS_HAS_TRILINOS) - IF(KOKKOS_SEPARATE_LIBS) - set(TEST_LINK_TARGETS kokkoscore) - ELSE() - set(TEST_LINK_TARGETS kokkos) - ENDIF() -ENDIF() - SET(SOURCES TestMain.cpp TestCuda.cpp @@ -32,18 +24,13 @@ ENDIF() # it as a PERFORMANCE test. That's why we separate building the test # from running the test. -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_TEST_EXECUTABLE( PerfTestExec SOURCES ${SOURCES} - COMM serial mpi - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} - ) +) + +KOKKOS_ADD_TEST( + PerfTestExec + NAME PerformanceTest +) -TRIBITS_ADD_TEST( - PerformanceTest - NAME PerfTestExec - COMM serial mpi - NUM_MPI_PROCS 1 - CATEGORIES PERFORMANCE - FAIL_REGULAR_EXPRESSION " FAILED " - ) diff --git a/containers/src/CMakeLists.txt b/containers/src/CMakeLists.txt index e68fcad5e94..2fb19e72926 100644 --- a/containers/src/CMakeLists.txt +++ b/containers/src/CMakeLists.txt @@ -1,47 +1,25 @@ -TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) +KOKKOS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) +#need these here for now INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) #----------------------------------------------------------------------------- -SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) -if(KOKKOS_LEGACY_TRIBITS) +INSTALL ( + DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/" + DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING PATTERN "*.hpp" +) - SET(HEADERS "") - SET(SOURCES "") +KOKKOS_ADD_LIBRARY( + kokkoscontainers + SOURCES ${KOKKOS_CONTAINERS_SRCS} +) +KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES(kokkoscontainers) +KOKKOS_LINK_INTERNAL_LIBRARY(kokkoscontainers kokkoscore) - SET(HEADERS_IMPL "") - - FILE(GLOB HEADERS *.hpp) - FILE(GLOB HEADERS_IMPL impl/*.hpp) - FILE(GLOB SOURCES impl/*.cpp) - - INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/) - - TRIBITS_ADD_LIBRARY( - kokkoscontainers - HEADERS ${HEADERS} - NOINSTALLHEADERS ${HEADERS_IMPL} - SOURCES ${SOURCES} - DEPLIBS - ) - -else() - - INSTALL ( - DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/" - DESTINATION ${TRILINOS_INCDIR} - FILES_MATCHING PATTERN "*.hpp" - ) - - TRIBITS_ADD_LIBRARY( - kokkoscontainers - SOURCES ${KOKKOS_CONTAINERS_SRCS} - DEPLIBS - ) - -endif() #----------------------------------------------------------------------------- + diff --git a/containers/unit_tests/CMakeLists.txt b/containers/unit_tests/CMakeLists.txt index 8564bd9ddd6..71cf6c3e563 100644 --- a/containers/unit_tests/CMakeLists.txt +++ b/containers/unit_tests/CMakeLists.txt @@ -3,16 +3,8 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) -IF(NOT KOKKOS_HAS_TRILINOS) - IF(KOKKOS_SEPARATE_LIBS) - set(TEST_LINK_TARGETS kokkoscore) - ELSE() - set(TEST_LINK_TARGETS kokkos) - ENDIF() -ENDIF() - IF(Kokkos_ENABLE_Pthread) -TRIBITS_ADD_EXECUTABLE_AND_TEST( +KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Threads SOURCES UnitTestMain.cpp @@ -29,15 +21,11 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( threads/TestThreads_UnorderedMap.cpp threads/TestThreads_Vector.cpp threads/TestThreads_ViewCtorPropEmbeddedDim.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() IF(Kokkos_ENABLE_Serial) -TRIBITS_ADD_EXECUTABLE_AND_TEST( +KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Serial SOURCES UnitTestMain.cpp @@ -54,15 +42,11 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( serial/TestSerial_UnorderedMap.cpp serial/TestSerial_Vector.cpp serial/TestSerial_ViewCtorPropEmbeddedDim.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() IF(Kokkos_ENABLE_OpenMP) -TRIBITS_ADD_EXECUTABLE_AND_TEST( +KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_OpenMP SOURCES UnitTestMain.cpp @@ -79,15 +63,11 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( openmp/TestOpenMP_UnorderedMap.cpp openmp/TestOpenMP_Vector.cpp openmp/TestOpenMP_ViewCtorPropEmbeddedDim.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() IF(Kokkos_ENABLE_HPX) -TRIBITS_ADD_EXECUTABLE_AND_TEST( +KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_HPX SOURCES UnitTestMain.cpp @@ -104,15 +84,11 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( hpx/TestHPX_UnorderedMap.cpp hpx/TestHPX_Vector.cpp hpx/TestHPX_ViewCtorPropEmbeddedDim.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() IF(Kokkos_ENABLE_Cuda) -TRIBITS_ADD_EXECUTABLE_AND_TEST( +KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Cuda SOURCES UnitTestMain.cpp @@ -129,10 +105,6 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( cuda/TestCuda_UnorderedMap.cpp cuda/TestCuda_Vector.cpp cuda/TestCuda_ViewCtorPropEmbeddedDim.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 93db0d2ecf9..b943e82b71f 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -1,13 +1,12 @@ -TRIBITS_SUBPACKAGE(Core) +KOKKOS_SUBPACKAGE(Core) -IF(KOKKOS_HAS_TRILINOS) - ADD_SUBDIRECTORY(src) -ENDIF() +ADD_SUBDIRECTORY(src) -TRIBITS_ADD_TEST_DIRECTORIES(unit_test) -TRIBITS_ADD_TEST_DIRECTORIES(perf_test) +KOKKOS_ADD_TEST_DIRECTORIES(unit_test) +KOKKOS_ADD_TEST_DIRECTORIES(perf_test) + +KOKKOS_SUBPACKAGE_POSTPROCESS() -TRIBITS_SUBPACKAGE_POSTPROCESS() diff --git a/core/perf_test/CMakeLists.txt b/core/perf_test/CMakeLists.txt index d92462a357c..328afbda03c 100644 --- a/core/perf_test/CMakeLists.txt +++ b/core/perf_test/CMakeLists.txt @@ -1,20 +1,13 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) +#INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +#INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) -IF(NOT KOKKOS_HAS_TRILINOS) - IF(KOKKOS_SEPARATE_LIBS) - set(TEST_LINK_TARGETS kokkoscore) - ELSE() - set(TEST_LINK_TARGETS kokkos) - ENDIF() -ENDIF() # warning: PerfTest_CustomReduction.cpp uses # ../../algorithms/src/Kokkos_Random.hpp # we'll just allow it to be included, but note # that in TriBITS KokkosAlgorithms can be disabled... -INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/../../algorithms/src") +#INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/../../algorithms/src") SET(SOURCES PerfTestMain.cpp @@ -27,33 +20,27 @@ SET(SOURCES # it as a PERFORMANCE test. That's why we separate building the test # from running the test. -TRIBITS_ADD_EXECUTABLE( +#leave these as basic includes for now +#I don't need anything transitive +INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/../../algorithms/src") +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) + +KOKKOS_ADD_TEST_EXECUTABLE( PerfTestExec - SOURCES ${SOURCES} - COMM serial mpi - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} - ) + SOURCES ${SOURCES}) -TRIBITS_ADD_TEST( - PerfTest +KOKKOS_ADD_PERFORMANCE_TEST( + PerfTestExec NAME PerfTestExec - COMM serial mpi - NUM_MPI_PROCS 1 - CATEGORIES PERFORMANCE - FAIL_REGULAR_EXPRESSION " FAILED " - ) +) -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_TEST_EXECUTABLE( PerformanceTest_TaskDAG SOURCES test_taskdag.cpp - COMM serial mpi - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} - ) +) -TRIBITS_ADD_TEST( +KOKKOS_ADD_PERFORMANCE_TEST( PerformanceTest_TaskDAG NAME PerformanceTest_TaskDAG - COMM serial mpi - NUM_MPI_PROCS 1 - CATEGORIES PERFORMANCE - ) +) diff --git a/core/src/CMakeLists.txt b/core/src/CMakeLists.txt index a941c5da0c3..db0c15b2cab 100644 --- a/core/src/CMakeLists.txt +++ b/core/src/CMakeLists.txt @@ -1,124 +1,23 @@ +#I have to leave these here for tribits INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) -#----------------------------------------------------------------------------- +INSTALL (DIRECTORY + "${CMAKE_CURRENT_SOURCE_DIR}/" + DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING PATTERN "*.hpp" +) -SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) - -#----------------------------------------------------------------------------- - -IF(KOKKOS_LEGACY_TRIBITS) - - MESSAGE("LEGACY STUFF GETTING CALLED") - - IF(KOKKOS_ENABLE_EXPLICIT_INSTANTIATION) - MESSAGE("GOING INTO ETI DIR") - ADD_SUBDIRECTORY(eti) - INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/eti") - ENDIF() - - ASSERT_DEFINED(${PROJECT_NAME}_ENABLE_CXX11) - ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_CUDA) - - SET(HEADERS_PUBLIC "") - SET(HEADERS_PRIVATE "") - SET(SOURCES "") - - FILE(GLOB HEADERS_PUBLIC Kokkos*.hpp) - LIST( APPEND HEADERS_PUBLIC ${CMAKE_BINARY_DIR}/${PACKAGE_NAME}_config.h ) - - #----------------------------------------------------------------------------- - - FILE(GLOB HEADERS_IMPL impl/*.hpp) - FILE(GLOB SOURCES_IMPL impl/*.cpp) - - LIST(APPEND HEADERS_PRIVATE ${HEADERS_IMPL} ) - LIST(APPEND SOURCES ${SOURCES_IMPL} ) - - INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/) - - #----------------------------------------------------------------------------- - - FILE(GLOB HEADERS_THREADS Threads/*.hpp) - FILE(GLOB SOURCES_THREADS Threads/*.cpp) - - LIST(APPEND HEADERS_PRIVATE ${HEADERS_THREADS} ) - LIST(APPEND SOURCES ${SOURCES_THREADS} ) - - INSTALL(FILES ${HEADERS_THREADS} DESTINATION ${TRILINOS_INCDIR}/Threads/) - - #----------------------------------------------------------------------------- - - FILE(GLOB HEADERS_OPENMP OpenMP/*.hpp) - FILE(GLOB SOURCES_OPENMP OpenMP/*.cpp) - - LIST(APPEND HEADERS_PRIVATE ${HEADERS_OPENMP} ) - LIST(APPEND SOURCES ${SOURCES_OPENMP} ) - - INSTALL(FILES ${HEADERS_OPENMP} DESTINATION ${TRILINOS_INCDIR}/OpenMP/) - - #----------------------------------------------------------------------------- - - FILE(GLOB HEADERS_HPX HPX/*.hpp) - FILE(GLOB SOURCES_HPX HPX/*.cpp) - - LIST(APPEND HEADERS_PRIVATE ${HEADERS_HPX} ) - LIST(APPEND SOURCES ${SOURCES_HPX} ) - - INSTALL(FILES ${HEADERS_HPX} DESTINATION ${TRILINOS_INCDIR}/HPX/) - - #----------------------------------------------------------------------------- - - FILE(GLOB HEADERS_CUDA Cuda/*.hpp) - FILE(GLOB SOURCES_CUDA Cuda/*.cpp) - - LIST(APPEND HEADERS_PRIVATE ${HEADERS_CUDA} ) - LIST(APPEND SOURCES ${SOURCES_CUDA} ) - - INSTALL(FILES ${HEADERS_CUDA} DESTINATION ${TRILINOS_INCDIR}/Cuda/) - - #----------------------------------------------------------------------------- - FILE(GLOB HEADERS_QTHREADS Qthreads/*.hpp) - FILE(GLOB SOURCES_QTHREADS Qthreads/*.cpp) - - LIST(APPEND HEADERS_PRIVATE ${HEADERS_QTHREADS} ) - LIST(APPEND SOURCES ${SOURCES_QTHREADS} ) - - IF(KOKKOS_ENABLE_EXPLICIT_INSTANTIATION) - LIST(APPEND SOURCES ${ETI_SOURCES} ) - ENDIF() - - INSTALL(FILES ${HEADERS_QTHREADS} DESTINATION ${TRILINOS_INCDIR}/Qthreads/) - - TRIBITS_ADD_LIBRARY( - kokkoscore - HEADERS ${HEADERS_PUBLIC} - NOINSTALLHEADERS ${HEADERS_PRIVATE} - SOURCES ${SOURCES} - DEPLIBS - ) - -#----------------------------------------------------------------------------- -# In the new build system, sources are calculated by Makefile.kokkos -else() - - INSTALL (DIRECTORY - "${CMAKE_CURRENT_SOURCE_DIR}/" - DESTINATION ${TRILINOS_INCDIR} - FILES_MATCHING PATTERN "*.hpp" - ) - - TRIBITS_ADD_LIBRARY( - kokkoscore - SOURCES ${KOKKOS_CORE_SRCS} - DEPLIBS - ) - -endif() -#----------------------------------------------------------------------------- +KOKKOS_ADD_LIBRARY( + kokkoscore + SOURCES ${KOKKOS_CORE_SRCS} +) +#include source and binary dirs +KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES(kokkoscore) # build and install pkgconfig file -CONFIGURE_FILE(kokkos.pc.in kokkos.pc @ONLY) -INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/kokkos.pc DESTINATION lib/pkgconfig) +#CONFIGURE_FILE(kokkos.pc.in kokkos.pc @ONLY) +#INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/kokkos.pc DESTINATION lib/pkgconfig) + diff --git a/core/unit_test/CMakeLists.txt b/core/unit_test/CMakeLists.txt index 6a480daa8d4..f2be15fb48a 100644 --- a/core/unit_test/CMakeLists.txt +++ b/core/unit_test/CMakeLists.txt @@ -2,47 +2,31 @@ # Add test-only library for gtest to be reused by all the subpackages # -IF(NOT KOKKOS_HAS_TRILINOS) - IF(KOKKOS_SEPARATE_LIBS) - set(TEST_LINK_TARGETS kokkoscore) - ELSE() - set(TEST_LINK_TARGETS kokkos) - ENDIF() -ENDIF() SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest) -# TODO get the C++ standard flag from KOKKOS_CXX_STANDARD -SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGTEST_HAS_PTHREAD=0") -INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR}) -TRIBITS_ADD_LIBRARY( +#need here for tribits +KOKKOS_ADD_COMPILE_OPTIONS("-DGTEST_HAS_PTHREAD=0") +KOKKOS_ADD_TEST_LIBRARY( kokkos_gtest HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc - TESTONLY - ) - -IF(NOT KOKKOS_HAS_TRILINOS) -target_compile_options( - kokkos_gtest - PUBLIC $<$:${KOKKOS_CXX_FLAGS}> -) -target_link_libraries( - kokkos_gtest - PUBLIC ${KOKKOS_LD_FLAGS} ) -ENDIF() +KOKKOS_TARGET_COMPILE_OPTIONS(kokkos_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0") +KOKKOS_TARGET_INCLUDE_DIRECTORIES(kokkos_gtest PUBLIC ${GTEST_SOURCE_DIR}) + # # Define the tests # -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) +#I will leave these alone for now because I don't need transitive dependencies on tests +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) IF(Kokkos_ENABLE_Serial) IF(KOKKOS_SEPARATE_TESTS) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Serial_Atomics SOURCES UnitTestMainInit.cpp @@ -57,12 +41,8 @@ IF(Kokkos_ENABLE_Serial) serial/TestSerial_AtomicOperations_complexfloat.cpp serial/TestSerial_AtomicViews.cpp serial/TestSerial_Atomics.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Serial_SubView SOURCES UnitTestMainInit.cpp @@ -81,12 +61,8 @@ IF(Kokkos_ENABLE_Serial) serial/TestSerial_SubView_c11.cpp serial/TestSerial_SubView_c12.cpp serial/TestSerial_SubView_c13.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Serial_ViewAPI SOURCES UnitTestMainInit.cpp @@ -96,24 +72,16 @@ IF(Kokkos_ENABLE_Serial) serial/TestSerial_ViewAPI_d.cpp serial/TestSerial_ViewAPI_e.cpp serial/TestSerial_ViewOfClass.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Serial_ViewMapping SOURCES UnitTestMainInit.cpp serial/TestSerial_ViewMapping_a.cpp serial/TestSerial_ViewMapping_b.cpp serial/TestSerial_ViewMapping_subview.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Serial_Reducers SOURCES UnitTestMainInit.cpp @@ -122,12 +90,8 @@ IF(Kokkos_ENABLE_Serial) serial/TestSerial_Reducers_b.cpp serial/TestSerial_Reducers_c.cpp serial/TestSerial_Reducers_d.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Serial_MDRange SOURCES UnitTestMainInit.cpp @@ -136,34 +100,22 @@ IF(Kokkos_ENABLE_Serial) serial/TestSerial_MDRange_c.cpp serial/TestSerial_MDRange_d.cpp serial/TestSerial_MDRange_e.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Serial_Team SOURCES UnitTestMainInit.cpp serial/TestSerial_Team.cpp serial/TestSerial_TeamReductionScan.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Serial_Tasking SOURCES UnitTestMainInit.cpp serial/TestSerial_Task.cpp serial/TestSerial_WorkGraph.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Serial_Misc SOURCES UnitTestMainInit.cpp @@ -174,13 +126,9 @@ IF(Kokkos_ENABLE_Serial) serial/TestSerial_Scan.cpp serial/TestSerial_SharedAlloc.cpp serial/TestSerial_Crs.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ELSE() - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Serial SOURCES UnitTestMainInit.cpp @@ -241,16 +189,12 @@ IF(Kokkos_ENABLE_Serial) serial/TestSerial_ViewOfClass.cpp serial/TestSerial_Crs.cpp serial/TestSerial_WorkGraph.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() ENDIF() IF(Kokkos_ENABLE_Pthread) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Threads SOURCES UnitTestMainInit.cpp @@ -310,16 +254,12 @@ IF(Kokkos_ENABLE_Pthread) threads/TestThreads_ViewOfClass.cpp threads/TestThreads_Crs.cpp threads/TestThreads_WorkGraph.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() IF(Kokkos_ENABLE_OpenMP) IF(KOKKOS_SEPARATE_TESTS) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_OpenMP_Atomics SOURCES UnitTestMainInit.cpp @@ -334,12 +274,8 @@ IF(Kokkos_ENABLE_OpenMP) openmp/TestOpenMP_AtomicOperations_complexfloat.cpp openmp/TestOpenMP_AtomicViews.cpp openmp/TestOpenMP_Atomics.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_OpenMP_SubView SOURCES UnitTestMainInit.cpp @@ -358,12 +294,8 @@ IF(Kokkos_ENABLE_OpenMP) openmp/TestOpenMP_SubView_c11.cpp openmp/TestOpenMP_SubView_c12.cpp openmp/TestOpenMP_SubView_c13.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_OpenMP_ViewAPI SOURCES UnitTestMainInit.cpp @@ -373,24 +305,16 @@ IF(Kokkos_ENABLE_OpenMP) openmp/TestOpenMP_ViewAPI_d.cpp openmp/TestOpenMP_ViewAPI_e.cpp openmp/TestOpenMP_ViewOfClass.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_OpenMP_ViewMapping SOURCES UnitTestMainInit.cpp openmp/TestOpenMP_ViewMapping_a.cpp openmp/TestOpenMP_ViewMapping_b.cpp openmp/TestOpenMP_ViewMapping_subview.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_OpenMP_Reducers SOURCES UnitTestMainInit.cpp @@ -399,12 +323,8 @@ IF(Kokkos_ENABLE_OpenMP) openmp/TestOpenMP_Reducers_b.cpp openmp/TestOpenMP_Reducers_c.cpp openmp/TestOpenMP_Reducers_d.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_OpenMP_MDRange SOURCES UnitTestMainInit.cpp @@ -413,34 +333,22 @@ IF(Kokkos_ENABLE_OpenMP) openmp/TestOpenMP_MDRange_c.cpp openmp/TestOpenMP_MDRange_d.cpp openmp/TestOpenMP_MDRange_e.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_OpenMP_Team SOURCES UnitTestMainInit.cpp openmp/TestOpenMP_Team.cpp openmp/TestOpenMP_TeamReductionScan.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_OpenMP_Tasking SOURCES UnitTestMainInit.cpp openmp/TestOpenMP_Task.cpp openmp/TestOpenMP_WorkGraph.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_OpenMP_Misc SOURCES UnitTestMainInit.cpp @@ -452,23 +360,15 @@ IF(Kokkos_ENABLE_OpenMP) openmp/TestOpenMP_SharedAlloc.cpp openmp/TestOpenMP_Crs.cpp openmp/TestOpenMP_UniqueToken.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_OpenMPInterOp SOURCES UnitTestMain.cpp openmp/TestOpenMP_InterOp.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ELSE() - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_OpenMP SOURCES UnitTestMainInit.cpp @@ -529,26 +429,18 @@ IF(Kokkos_ENABLE_OpenMP) openmp/TestOpenMP_Crs.cpp openmp/TestOpenMP_WorkGraph.cpp openmp/TestOpenMP_UniqueToken.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_OpenMPInterOp SOURCES UnitTestMain.cpp openmp/TestOpenMP_InterOp.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() ENDIF() IF(Kokkos_ENABLE_HPX) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_HPX SOURCES UnitTestMainInit.cpp @@ -607,25 +499,17 @@ IF(Kokkos_ENABLE_HPX) hpx/TestHPX_Crs.cpp hpx/TestHPX_WorkGraph.cpp hpx/TestHPX_UniqueToken.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_HPXInterOp SOURCES UnitTestMain.cpp hpx/TestHPX_InterOp.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() IF(Kokkos_ENABLE_Qthreads) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Qthreads SOURCES UnitTestMainInit.cpp @@ -658,15 +542,11 @@ IF(Kokkos_ENABLE_Qthreads) qthreads/TestQthreads_ViewAPI_c.cpp qthreads/TestQthreads_ViewAPI_d.cpp qthreads/TestQthreads_ViewAPI_e.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() IF(Kokkos_ENABLE_Cuda) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Cuda SOURCES UnitTestMainInit.cpp @@ -749,34 +629,22 @@ IF(Kokkos_ENABLE_Cuda) cuda/TestCuda_Crs.cpp cuda/TestCuda_WorkGraph.cpp cuda/TestCuda_UniqueToken.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_CudaInterOpInit SOURCES UnitTestMain.cpp cuda/TestCuda_InterOp_Init.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) - TRIBITS_ADD_EXECUTABLE_AND_TEST( + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_CudaInterOpStreams SOURCES UnitTestMain.cpp cuda/TestCuda_InterOp_Streams.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) ENDIF() -TRIBITS_ADD_EXECUTABLE_AND_TEST( +KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Default SOURCES UnitTestMainInit.cpp @@ -792,33 +660,25 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( default/TestDefaultDeviceType_c3.cpp default/TestDefaultDeviceType_d.cpp default/TestDefaultDeviceTypeResize.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) -TRIBITS_ADD_EXECUTABLE_AND_TEST( +KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_PushFinalizeHook SOURCES UnitTest_PushFinalizeHook.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION "FAILED" - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) # This test is special, because it passes exactly when it prints the # message "PASSED: I am the custom std::terminate handler.", AND calls # std::terminate. This means that we can't use -# TRIBITS_ADD_EXECUTABLE_AND_TEST. See GitHub issue #2147. +# KOKKOS_ADD_EXECUTABLE_AND_TEST. See GitHub issue #2147. -TRIBITS_ADD_EXECUTABLE( push_finalize_hook_terminate +KOKKOS_ADD_EXECUTABLE( push_finalize_hook_terminate SOURCES UnitTest_PushFinalizeHook_terminate.cpp TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) -TRIBITS_ADD_ADVANCED_TEST( UnitTest_PushFinalizeHook_terminate +KOKKOS_ADD_ADVANCED_TEST( UnitTest_PushFinalizeHook_terminate TEST_0 EXEC push_finalize_hook_terminate NUM_MPI_PROCS 1 @@ -828,67 +688,23 @@ TRIBITS_ADD_ADVANCED_TEST( UnitTest_PushFinalizeHook_terminate ) foreach(INITTESTS_NUM RANGE 1 16) -TRIBITS_ADD_EXECUTABLE_AND_TEST( +KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_DefaultInit_${INITTESTS_NUM} SOURCES UnitTestMain.cpp default/TestDefaultDeviceTypeInit_${INITTESTS_NUM}.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) endforeach(INITTESTS_NUM) -TRIBITS_ADD_EXECUTABLE_AND_TEST( +if (KOKKOS_ENABLE_HWLOC) +KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_HWLOC SOURCES UnitTestMain.cpp TestHWLOC.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) +endif() -TRIBITS_ADD_EXECUTABLE_AND_TEST( +KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_HostBarrier SOURCES UnitTestMain.cpp TestHostBarrier.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) -# -# Compile-only tests -# -FUNCTION(KOKKOS_ADD_COMPILE_TEST TEST_NAME) - - SET(options LINK_KOKKOS) - SET(oneValueArgs) - SET(multiValueArgs) - - CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - IF(PARSE_LINK_KOKKOS) - SET(libs ${TEST_LINK_TARGETS}) - ELSE() - SET(libs) - ENDIF() - - TRIBITS_ADD_EXECUTABLE( - ${TEST_NAME} - TESTONLY - COMM serial - TESTONLYLIBS ${libs} - ${PARSE_UNPARSED_ARGUMENTS} - ) - - target_compile_options( - ${PACKAGE_NAME}_${TEST_NAME} - PUBLIC $<$:${KOKKOS_CXX_FLAGS}> - ) - target_link_libraries( - ${PACKAGE_NAME}_${TEST_NAME} - PUBLIC ${KOKKOS_LD_FLAGS} - ) -ENDFUNCTION() diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 3809cc2ea57..507401b8c31 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -2,19 +2,19 @@ # Subpackage name must match what appears in kokkos/cmake/Dependencies.cmake # -TRIBITS_SUBPACKAGE(Example) +KOKKOS_SUBPACKAGE(Example) -TRIBITS_ADD_EXAMPLE_DIRECTORIES(query_device) -TRIBITS_ADD_EXAMPLE_DIRECTORIES(fixture) -TRIBITS_ADD_EXAMPLE_DIRECTORIES(feint) -TRIBITS_ADD_EXAMPLE_DIRECTORIES(fenl) -TRIBITS_ADD_EXAMPLE_DIRECTORIES(multi_fem) -TRIBITS_ADD_EXAMPLE_DIRECTORIES(md_skeleton) -TRIBITS_ADD_EXAMPLE_DIRECTORIES(global_2_local_ids) -TRIBITS_ADD_EXAMPLE_DIRECTORIES(grow_array) -TRIBITS_ADD_EXAMPLE_DIRECTORIES(sort_array) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(query_device) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(fixture) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(feint) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(fenl) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(multi_fem) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(md_skeleton) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(global_2_local_ids) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(grow_array) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(sort_array) if(NOT Kokkos_ENABLE_Cuda) -TRIBITS_ADD_EXAMPLE_DIRECTORIES(tutorial) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(tutorial) endif() -TRIBITS_SUBPACKAGE_POSTPROCESS() +KOKKOS_SUBPACKAGE_POSTPROCESS() diff --git a/example/feint/CMakeLists.txt b/example/feint/CMakeLists.txt index 0018b9f9f53..b8c2b77321b 100644 --- a/example/feint/CMakeLists.txt +++ b/example/feint/CMakeLists.txt @@ -10,9 +10,8 @@ FILE(GLOB SOURCES *.cpp) LIST( APPEND SOURCES ../fixture/BoxElemPart.cpp) -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( feint SOURCES ${SOURCES} - COMM serial mpi - ) +) diff --git a/example/fenl/CMakeLists.txt b/example/fenl/CMakeLists.txt index 150656b16e1..5e1e846b7b7 100644 --- a/example/fenl/CMakeLists.txt +++ b/example/fenl/CMakeLists.txt @@ -10,8 +10,8 @@ FILE( GLOB SOURCES *.cpp ) LIST( APPEND SOURCES ../fixture/BoxElemPart.cpp ) -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( fenl SOURCES ${SOURCES} - COMM serial mpi - ) +) + diff --git a/example/fixture/CMakeLists.txt b/example/fixture/CMakeLists.txt index 298c54c5bb3..1735927ec90 100644 --- a/example/fixture/CMakeLists.txt +++ b/example/fixture/CMakeLists.txt @@ -6,8 +6,8 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common) SET(SOURCES_TEST Main.cpp TestFixture.cpp BoxElemPart.cpp ) # Automatically picks up 'kokkosexample_fixture' -TRIBITS_ADD_EXECUTABLE_AND_TEST( +KOKKOS_ADD_EXECUTABLE_AND_TEST( TestFixture SOURCES ${SOURCES_TEST} - ) +) diff --git a/example/global_2_local_ids/CMakeLists.txt b/example/global_2_local_ids/CMakeLists.txt index 9f32fe58024..5c2bd370456 100644 --- a/example/global_2_local_ids/CMakeLists.txt +++ b/example/global_2_local_ids/CMakeLists.txt @@ -2,16 +2,11 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) -SET(SOURCES "") +SET(SOURCES G2L_Main.cpp) -SET(SOURCES - G2L_Main.cpp - ) - -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( global_2_local_ids SOURCES ${SOURCES} - COMM serial mpi - ) +) diff --git a/example/grow_array/CMakeLists.txt b/example/grow_array/CMakeLists.txt index d9ff1704929..c5a5d2d0c26 100644 --- a/example/grow_array/CMakeLists.txt +++ b/example/grow_array/CMakeLists.txt @@ -6,9 +6,8 @@ SET(SOURCES "") FILE(GLOB SOURCES *.cpp) -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( grow_array SOURCES ${SOURCES} - COMM serial mpi - ) +) diff --git a/example/cmake_build/CMakeLists.txt b/example/in_tree_build/CMakeLists.txt similarity index 100% rename from example/cmake_build/CMakeLists.txt rename to example/in_tree_build/CMakeLists.txt diff --git a/example/cmake_build/cmake_example.cpp b/example/in_tree_build/cmake_example.cpp similarity index 100% rename from example/cmake_build/cmake_example.cpp rename to example/in_tree_build/cmake_example.cpp diff --git a/example/cmake_build/foo.f b/example/in_tree_build/foo.f similarity index 100% rename from example/cmake_build/foo.f rename to example/in_tree_build/foo.f diff --git a/example/md_skeleton/CMakeLists.txt b/example/md_skeleton/CMakeLists.txt index 28412c37847..53d2856ddcd 100644 --- a/example/md_skeleton/CMakeLists.txt +++ b/example/md_skeleton/CMakeLists.txt @@ -3,14 +3,10 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) SET(SOURCES "") -SET(LIBRARIES "") - FILE(GLOB SOURCES *.cpp ) -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( md_skeleton SOURCES ${SOURCES} - COMM serial mpi - DEPLIBS ${LIBRARIES} - ) +) diff --git a/example/multi_fem/CMakeLists.txt b/example/multi_fem/CMakeLists.txt index e3a40bc26f0..e3d417b9b6e 100644 --- a/example/multi_fem/CMakeLists.txt +++ b/example/multi_fem/CMakeLists.txt @@ -6,11 +6,8 @@ SET(SOURCES "") FILE(GLOB SOURCES *.cpp) -SET(LIBRARIES kokkoscore) - -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( multi_fem SOURCES ${SOURCES} - COMM serial mpi - ) +) diff --git a/example/query_device/CMakeLists.txt b/example/query_device/CMakeLists.txt index dade7f01fef..0e4fa4f4450 100644 --- a/example/query_device/CMakeLists.txt +++ b/example/query_device/CMakeLists.txt @@ -3,12 +3,10 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) SET(SOURCES "") - FILE(GLOB SOURCES *.cpp) -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( query_device SOURCES ${SOURCES} - COMM serial mpi - ) +) diff --git a/example/sort_array/CMakeLists.txt b/example/sort_array/CMakeLists.txt index 0c7da74f4a9..813e942f87a 100644 --- a/example/sort_array/CMakeLists.txt +++ b/example/sort_array/CMakeLists.txt @@ -6,9 +6,8 @@ SET(SOURCES "") FILE(GLOB SOURCES *.cpp) -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( sort_array SOURCES ${SOURCES} - COMM serial mpi - ) +) diff --git a/example/tutorial/01_hello_world/CMakeLists.txt b/example/tutorial/01_hello_world/CMakeLists.txt index 5e5b1fcb46f..de35d75d5ea 100644 --- a/example/tutorial/01_hello_world/CMakeLists.txt +++ b/example/tutorial/01_hello_world/CMakeLists.txt @@ -3,9 +3,8 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( tutorial_01_hello_world SOURCES hello_world.cpp - COMM serial mpi - ) +) diff --git a/example/tutorial/01_hello_world_lambda/CMakeLists.txt b/example/tutorial/01_hello_world_lambda/CMakeLists.txt index 3fcca4bceba..f267de12351 100644 --- a/example/tutorial/01_hello_world_lambda/CMakeLists.txt +++ b/example/tutorial/01_hello_world_lambda/CMakeLists.txt @@ -2,12 +2,9 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) -IF (Kokkos_ENABLE_CXX11) - # This is a tutorial, not a test, so we don't ask CTest to run it. - TRIBITS_ADD_EXECUTABLE( - tutorial_01_hello_world_lambda - SOURCES hello_world_lambda.cpp - COMM serial mpi - ) -ENDIF () +# This is a tutorial, not a test, so we don't ask CTest to run it. +KOKKOS_ADD_EXECUTABLE( + tutorial_01_hello_world_lambda + SOURCES hello_world_lambda.cpp +) diff --git a/example/tutorial/02_simple_reduce/CMakeLists.txt b/example/tutorial/02_simple_reduce/CMakeLists.txt index 7c78db840f8..3d9683500dd 100644 --- a/example/tutorial/02_simple_reduce/CMakeLists.txt +++ b/example/tutorial/02_simple_reduce/CMakeLists.txt @@ -3,8 +3,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( tutorial_02_simple_reduce SOURCES simple_reduce.cpp - COMM serial mpi - ) +) diff --git a/example/tutorial/02_simple_reduce_lambda/CMakeLists.txt b/example/tutorial/02_simple_reduce_lambda/CMakeLists.txt index e2e3a929f1a..9c4e48c5523 100644 --- a/example/tutorial/02_simple_reduce_lambda/CMakeLists.txt +++ b/example/tutorial/02_simple_reduce_lambda/CMakeLists.txt @@ -2,11 +2,8 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) -IF (Kokkos_ENABLE_CXX11) - # This is a tutorial, not a test, so we don't ask CTest to run it. - TRIBITS_ADD_EXECUTABLE( - tutorial_02_simple_reduce_lambda - SOURCES simple_reduce_lambda.cpp - COMM serial mpi - ) -ENDIF () +KOKKOS_ADD_EXECUTABLE( + tutorial_02_simple_reduce_lambda + SOURCES simple_reduce_lambda.cpp +) + diff --git a/example/tutorial/03_simple_view/CMakeLists.txt b/example/tutorial/03_simple_view/CMakeLists.txt index 7475a99e492..b19d07ec011 100644 --- a/example/tutorial/03_simple_view/CMakeLists.txt +++ b/example/tutorial/03_simple_view/CMakeLists.txt @@ -3,8 +3,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( tutorial_03_simple_view SOURCES simple_view.cpp - COMM serial mpi - ) +) diff --git a/example/tutorial/03_simple_view_lambda/CMakeLists.txt b/example/tutorial/03_simple_view_lambda/CMakeLists.txt index 601fe452a4c..2f42523464f 100644 --- a/example/tutorial/03_simple_view_lambda/CMakeLists.txt +++ b/example/tutorial/03_simple_view_lambda/CMakeLists.txt @@ -4,7 +4,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) IF (Kokkos_ENABLE_CXX11) # This is a tutorial, not a test, so we don't ask CTest to run it. - TRIBITS_ADD_EXECUTABLE( + KOKKOS_ADD_EXECUTABLE( tutorial_03_simple_view_lambda SOURCES simple_view_lambda.cpp COMM serial mpi diff --git a/example/tutorial/04_simple_memoryspaces/CMakeLists.txt b/example/tutorial/04_simple_memoryspaces/CMakeLists.txt index 09f209077a0..03fb97a133c 100644 --- a/example/tutorial/04_simple_memoryspaces/CMakeLists.txt +++ b/example/tutorial/04_simple_memoryspaces/CMakeLists.txt @@ -3,8 +3,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( tutorial_04_simple_memoryspaces SOURCES simple_memoryspaces.cpp - COMM serial mpi - ) +) diff --git a/example/tutorial/05_simple_atomics/CMakeLists.txt b/example/tutorial/05_simple_atomics/CMakeLists.txt index 5a5790fb048..85870e5e504 100644 --- a/example/tutorial/05_simple_atomics/CMakeLists.txt +++ b/example/tutorial/05_simple_atomics/CMakeLists.txt @@ -3,8 +3,8 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( tutorial_05_simple_atomics SOURCES simple_atomics.cpp - COMM serial mpi - ) +) + diff --git a/example/tutorial/06_simple_mdrangepolicy/CMakeLists.txt b/example/tutorial/06_simple_mdrangepolicy/CMakeLists.txt index d18938a61f8..ca9f0bf8da9 100644 --- a/example/tutorial/06_simple_mdrangepolicy/CMakeLists.txt +++ b/example/tutorial/06_simple_mdrangepolicy/CMakeLists.txt @@ -3,8 +3,8 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( tutorial_06_simple_mdrangepolicy SOURCES simple_mdrangepolicy.cpp - COMM serial mpi - ) +) + diff --git a/example/tutorial/Advanced_Views/01_data_layouts/CMakeLists.txt b/example/tutorial/Advanced_Views/01_data_layouts/CMakeLists.txt index 2eb3a8f6c98..b0db41bf451 100644 --- a/example/tutorial/Advanced_Views/01_data_layouts/CMakeLists.txt +++ b/example/tutorial/Advanced_Views/01_data_layouts/CMakeLists.txt @@ -3,8 +3,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( tutorial_advancedviews_01_data_layouts SOURCES data_layouts.cpp - COMM serial mpi - ) +) diff --git a/example/tutorial/Advanced_Views/02_memory_traits/CMakeLists.txt b/example/tutorial/Advanced_Views/02_memory_traits/CMakeLists.txt index 1963e544d7a..0e50968b4bf 100644 --- a/example/tutorial/Advanced_Views/02_memory_traits/CMakeLists.txt +++ b/example/tutorial/Advanced_Views/02_memory_traits/CMakeLists.txt @@ -3,8 +3,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( tutorial_advancedviews_02_memory_traits SOURCES memory_traits.cpp - COMM serial mpi - ) +) diff --git a/example/tutorial/Advanced_Views/03_subviews/CMakeLists.txt b/example/tutorial/Advanced_Views/03_subviews/CMakeLists.txt index cbe394c78b8..90270740c3c 100644 --- a/example/tutorial/Advanced_Views/03_subviews/CMakeLists.txt +++ b/example/tutorial/Advanced_Views/03_subviews/CMakeLists.txt @@ -3,8 +3,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( tutorial_advancedviews_03_subviews SOURCES subviews.cpp - COMM serial mpi - ) +) diff --git a/example/tutorial/Advanced_Views/04_dualviews/CMakeLists.txt b/example/tutorial/Advanced_Views/04_dualviews/CMakeLists.txt index 300dab128e4..4f9b9225d21 100644 --- a/example/tutorial/Advanced_Views/04_dualviews/CMakeLists.txt +++ b/example/tutorial/Advanced_Views/04_dualviews/CMakeLists.txt @@ -3,8 +3,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( tutorial_advancedviews_04_dualviews SOURCES dual_view.cpp - COMM serial mpi - ) +) diff --git a/example/tutorial/Advanced_Views/05_NVIDIA_UVM/CMakeLists.txt b/example/tutorial/Advanced_Views/05_NVIDIA_UVM/CMakeLists.txt index f0ed569f9f4..6a0efbffa64 100644 --- a/example/tutorial/Advanced_Views/05_NVIDIA_UVM/CMakeLists.txt +++ b/example/tutorial/Advanced_Views/05_NVIDIA_UVM/CMakeLists.txt @@ -3,11 +3,9 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) IF (Kokkos_ENABLE_Cuda_UVM) - # This is a tutorial, not a test, so we don't ask CTest to run it. - TRIBITS_ADD_EXECUTABLE( - tutorial_advancedviews_05_nvidia_uvm - SOURCES uvm_example.cpp - COMM serial mpi - DEPLIBS kokkoscontainers kokkoscore - ) +# This is a tutorial, not a test, so we don't ask CTest to run it. +KOKKOS_ADD_EXECUTABLE( + tutorial_advancedviews_05_nvidia_uvm + SOURCES uvm_example.cpp +) ENDIF () diff --git a/example/tutorial/Advanced_Views/CMakeLists.txt b/example/tutorial/Advanced_Views/CMakeLists.txt index f4f1addc555..8ecb052c589 100644 --- a/example/tutorial/Advanced_Views/CMakeLists.txt +++ b/example/tutorial/Advanced_Views/CMakeLists.txt @@ -1,9 +1,9 @@ -TRIBITS_ADD_EXAMPLE_DIRECTORIES(01_data_layouts) -TRIBITS_ADD_EXAMPLE_DIRECTORIES(02_memory_traits) -TRIBITS_ADD_EXAMPLE_DIRECTORIES(03_subviews) -TRIBITS_ADD_EXAMPLE_DIRECTORIES(04_dualviews) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(01_data_layouts) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(02_memory_traits) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(03_subviews) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(04_dualviews) IF (Kokkos_ENABLE_Cuda_UVM) - TRIBITS_ADD_EXAMPLE_DIRECTORIES(05_NVIDIA_UVM) + KOKKOS_ADD_EXAMPLE_DIRECTORIES(05_NVIDIA_UVM) ENDIF () diff --git a/example/tutorial/Hierarchical_Parallelism/01_thread_teams/CMakeLists.txt b/example/tutorial/Hierarchical_Parallelism/01_thread_teams/CMakeLists.txt index 2d8a514a454..e7cd6dea07f 100644 --- a/example/tutorial/Hierarchical_Parallelism/01_thread_teams/CMakeLists.txt +++ b/example/tutorial/Hierarchical_Parallelism/01_thread_teams/CMakeLists.txt @@ -3,8 +3,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( tutorial_hierarchicalparallelism_01_thread_teams SOURCES thread_teams.cpp - COMM serial mpi - ) +) diff --git a/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/CMakeLists.txt b/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/CMakeLists.txt index ec7f1e1159f..8c7f3853a01 100644 --- a/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/CMakeLists.txt +++ b/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/CMakeLists.txt @@ -2,12 +2,9 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) -IF (Kokkos_ENABLE_CXX11) - # This is a tutorial, not a test, so we don't ask CTest to run it. - TRIBITS_ADD_EXECUTABLE( - tutorial_hierarchical_01_thread_teams_lambda - SOURCES thread_teams_lambda.cpp - COMM serial mpi - ) -ENDIF () +# This is a tutorial, not a test, so we don't ask CTest to run it. +KOKKOS_ADD_EXECUTABLE( + tutorial_hierarchical_01_thread_teams_lambda + SOURCES thread_teams_lambda.cpp +) diff --git a/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/CMakeLists.txt b/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/CMakeLists.txt index e6604053451..92b701e4f43 100644 --- a/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/CMakeLists.txt +++ b/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/CMakeLists.txt @@ -3,8 +3,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( tutorial_hierarchicalparallelism_02_nested_parallel_for SOURCES nested_parallel_for.cpp - COMM serial mpi - ) +) diff --git a/example/tutorial/Hierarchical_Parallelism/03_vectorization/CMakeLists.txt b/example/tutorial/Hierarchical_Parallelism/03_vectorization/CMakeLists.txt index ea6b0b1e426..3907d166648 100644 --- a/example/tutorial/Hierarchical_Parallelism/03_vectorization/CMakeLists.txt +++ b/example/tutorial/Hierarchical_Parallelism/03_vectorization/CMakeLists.txt @@ -3,14 +3,8 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. - -IF(Kokkos_ENABLE_CXX11) - -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( tutorial_hierarchicalparallelism_03_vectorization SOURCES vectorization.cpp - COMM serial mpi - ) - -ENDIF() +) diff --git a/example/tutorial/Hierarchical_Parallelism/04_team_scan/CMakeLists.txt b/example/tutorial/Hierarchical_Parallelism/04_team_scan/CMakeLists.txt index 15ad5d78034..d2f83a25eab 100644 --- a/example/tutorial/Hierarchical_Parallelism/04_team_scan/CMakeLists.txt +++ b/example/tutorial/Hierarchical_Parallelism/04_team_scan/CMakeLists.txt @@ -3,8 +3,8 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( tutorial_hierarchicalparallelism_04_team_scan SOURCES team_scan.cpp - COMM serial mpi - ) +) + diff --git a/example/tutorial/Hierarchical_Parallelism/CMakeLists.txt b/example/tutorial/Hierarchical_Parallelism/CMakeLists.txt index e03d7aeb901..c892df34cd5 100644 --- a/example/tutorial/Hierarchical_Parallelism/CMakeLists.txt +++ b/example/tutorial/Hierarchical_Parallelism/CMakeLists.txt @@ -1,8 +1,6 @@ -TRIBITS_ADD_EXAMPLE_DIRECTORIES(01_thread_teams) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(01_thread_teams) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(01_thread_teams_lambda) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(02_nested_parallel_for) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(03_vectorization) -IF (Kokkos_ENABLE_CXX11) - TRIBITS_ADD_EXAMPLE_DIRECTORIES(01_thread_teams_lambda) - TRIBITS_ADD_EXAMPLE_DIRECTORIES(02_nested_parallel_for) - TRIBITS_ADD_EXAMPLE_DIRECTORIES(03_vectorization) -ENDIF () diff --git a/example/tutorial/launch_bounds/CMakeLists.txt b/example/tutorial/launch_bounds/CMakeLists.txt index 7c78db840f8..3d9683500dd 100644 --- a/example/tutorial/launch_bounds/CMakeLists.txt +++ b/example/tutorial/launch_bounds/CMakeLists.txt @@ -3,8 +3,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. -TRIBITS_ADD_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE( tutorial_02_simple_reduce SOURCES simple_reduce.cpp - COMM serial mpi - ) +) From 38beedbc5cec8451f5dfe882413b64a376afde47 Mon Sep 17 00:00:00 2001 From: jjwilke Date: Wed, 17 Apr 2019 15:18:14 -0600 Subject: [PATCH 002/530] library list fix --- cmake/kokkos_tribits.cmake | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index c0d1de9ad09..7404f8885a4 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -567,13 +567,15 @@ FUNCTION(KOKKOS_ADD_INTERFACE_LIBRARY NAME) IF (KOKKOS_HAS_TRILINOS) TRIBITS_ADD_LIBRARY(${NAME} ${ARGN}) ELSE() - ADD_LIBRARY(${NAME} INTERFACE) - #I hate that property append doesn't have a remove duplicates - GET_PROPERTY(LIBLIST GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) - LIST(APPEND LIBLIST ${NAME}) - LIST(REMOVE_DUPLICATES LIBLIST) - SET_PROPERTY(GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES ${LIBLIST}) - GET_PROPERTY(LIBLIST GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + IF (KOKKOS_SEPARATE_LIBS) + ADD_LIBRARY(${NAME} INTERFACE) + #I hate that property append doesn't have a remove duplicates + GET_PROPERTY(LIBLIST GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + LIST(APPEND LIBLIST ${NAME}) + LIST(REMOVE_DUPLICATES LIBLIST) + SET_PROPERTY(GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES ${LIBLIST}) + GET_PROPERTY(LIBLIST GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + ENDIF() ENDIF() ENDFUNCTION(KOKKOS_ADD_INTERFACE_LIBRARY) From 6c107bda6186d9c2247c3574deb1ef72a72be45f Mon Sep 17 00:00:00 2001 From: jjwilke Date: Fri, 19 Apr 2019 18:41:07 -0700 Subject: [PATCH 003/530] c++ standard now a compile feature, other fixes: #2103 #2099 #2100 #2025 --- CMakeLists.txt | 57 +++++++++++++++++- Makefile.kokkos | 48 ++++++++++----- cmake/KokkosConfig.cmake.in | 21 ++++++- cmake/kokkos_settings.cmake | 3 +- cmake/kokkos_tribits.cmake | 109 ++++++++++++++++++++-------------- core/unit_test/CMakeLists.txt | 1 + 6 files changed, 175 insertions(+), 64 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 59898e14632..7518dedfc80 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,7 @@ set(KOKKOS_PATH ${Kokkos_SOURCE_DIR}) set(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}) IF(NOT KOKKOS_HAS_TRILINOS) - cmake_minimum_required(VERSION 3.3 FATAL_ERROR) + cmake_minimum_required(VERSION 3.8 FATAL_ERROR) IF(NOT DEFINED ${PROJECT_NAME}) PROJECT(Kokkos CXX) ENDIF() @@ -22,6 +22,61 @@ IF(NOT KOKKOS_HAS_TRILINOS) set (Kokkos_VERSION_PATCH 4) ENDIF() +SET(KOKKOS_CXX_STANDARD "" CACHE STRING "The C++ standard for Kokkos to use: c++11, c++14, or c++17") +SET(KOKKOS_CXX_FEATURES "" CACHE STRING "The list of C++ features for Kokkos to enable") +SET(CXX_STANDARD_TEST) + +IF (KOKKOS_CXX_STANDARD AND CMAKE_CXX_STANDARD) + #make sure these are consistent + message("GOT ${KOKKOS_CXX_STANDARD} AND ${CMAKE_CXX_STANDARD}") + IF (${KOKKOS_CXX_STANDARD} STREQUAL "c++11") + IF(NOT ${CMAKE_CXX_STANDARD} STREQUAL "11") + SET(CXX_STD_ERROR ON) + ENDIF() + ELSEIF (${KOKKOS_CXX_STANDARD} STREQUAL "c++14") + IF(NOT ${CMAKE_CXX_STANDARD} STREQUAL "14") + SET(CXX_STD_ERROR ON) + ENDIF() + ELSEIF (${KOKKOS_CXX_STANDARD} STREQUAL "c++17") + IF(NOT ${CMAKE_CXX_STANDARD} STREQUAL "17") + SET(CXX_STD_ERROR ON) + ENDIF() + ELSE() + #KOKKOS_CXX_STANDARD is something else, which means definitely invalid + SET(CXX_STD_ERROR ON) + ENDIF() + IF (CXX_STD_ERROR) + MESSAGE(FATAL_ERROR "Specified both CMAKE_CXX_STANDARD and KOKKOS_CXX_STANDARD, but they don't match") + ENDIF() +ENDIF() + +IF (NOT KOKKOS_CXX_STANDARD AND NOT CMAKE_CXX_STANDARD) + SET(KOKKOS_CXX_STANDARD "c++11") +ENDIF() + +IF (KOKKOS_CXX_STANDARD) + IF (${KOKKOS_CXX_STANDARD} STREQUAL "c++11") + LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_11) + ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++14") + LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_14) + ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++17") + LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_17) + ENDIF() +ENDIF() + +IF (CMAKE_CXX_STANDARD) + IF (${CMAKE_CXX_STANDARD} STREQUAL "11") + LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_11) + ELSEIF(${CMAKE_CXX_STANDARD} STREQUAL "14") + LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_14) + ELSEIF(${CMAKE_CXX_STANDARD} STREQUAL "17") + LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_17) + ENDIF() +ENDIF() + + + + INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake) KOKKOS_SETUP_BUILD_ENVIRONMENT() diff --git a/Makefile.kokkos b/Makefile.kokkos index 6d75304e039..ebc01f4270a 100644 --- a/Makefile.kokkos +++ b/Makefile.kokkos @@ -41,7 +41,8 @@ kokkos_has_string=$(if $(findstring $2,$1),1,0) # Will return a 1 if /path/to/file exists kokkos_path_exists=$(if $(wildcard $1),1,0) -# Check for general settings. +# Check for general settings + KOKKOS_INTERNAL_ENABLE_DEBUG := $(call kokkos_has_string,$(KOKKOS_DEBUG),yes) KOKKOS_INTERNAL_ENABLE_CXX11 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++11) KOKKOS_INTERNAL_ENABLE_CXX14 := $(call kokkos_has_string,$(KOKKOS_CXX_STANDARD),c++14) @@ -377,8 +378,8 @@ endif # Generating the list of Flags. -#CPPFLAGS is now unused KOKKOS_CPPFLAGS = +KOKKOS_LIBDIRS = ifneq ($(KOKKOS_CMAKE), yes) KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src -I$(KOKKOS_ETI_PATH) endif @@ -393,7 +394,7 @@ endif KOKKOS_LIBS = -ldl KOKKOS_TPL_LIBRARY_NAMES += dl ifneq ($(KOKKOS_CMAKE), yes) - KOKKOS_LDFLAGS = -L$(shell pwd) + KOKKOS_LIBDIRS = -L$(shell pwd) # CXXLDFLAGS is used together with CXXFLAGS in a combined compile/link command KOKKOS_CXXLDFLAGS = -L$(shell pwd) endif @@ -486,28 +487,38 @@ ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1) tmp := $(call kokkos_append_header,"\#endif") endif +#only add the c++ standard flags if this is not CMake tmp := $(call kokkos_append_header,"/* General Settings */") ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1) +ifneq ($(KOKKOS_CMAKE), yes) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG) +endif tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX11") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX14), 1) +ifneq ($(KOKKOS_CMAKE), yes) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX14_FLAG) +endif tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Y), 1) + #I cannot make CMake add this in a good way - so add it here KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Y_FLAG) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX17), 1) +ifneq ($(KOKKOS_CMAKE), yes) #for CMake, use target_compile_features KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX17_FLAG) +endif tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1) + #I cannot make CMake add this in a good way - so add it here KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX2A), 1) + #I cannot make CMake add this in a good way - so add it here KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX2A_FLAG) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX20") endif @@ -534,7 +545,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) ifneq ($(KOKKOS_CMAKE), yes) ifneq ($(HWLOC_PATH),) KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include - KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib + KOKKOS_LIBDIRS += -L$(HWLOC_PATH)/lib KOKKOS_CXXLDFLAGS += -L$(HWLOC_PATH)/lib KOKKOS_TPL_INCLUDE_DIRS += $(HWLOC_PATH)/include KOKKOS_TPL_LIBRARY_DIRS += $(HWLOC_PATH)/lib @@ -555,7 +566,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) ifneq ($(KOKKOS_CMAKE), yes) ifneq ($(MEMKIND_PATH),) KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include - KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib + KOKKOS_LIBDIRS += -L$(MEMKIND_PATH)/lib KOKKOS_CXXLDFLAGS += -L$(MEMKIND_PATH)/lib KOKKOS_TPL_INCLUDE_DIRS += $(MEMKIND_PATH)/include KOKKOS_TPL_LIBRARY_DIRS += $(MEMKIND_PATH)/lib @@ -1077,15 +1088,13 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1) endif KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) ifneq ($(CUDA_PATH),) - ifneq ($(KOKKOS_CMAKE), yes) - KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include - endif + KOKKOS_CPPLAGS += -I$(CUDA_PATH)/include ifeq ($(call kokkos_path_exists,$(CUDA_PATH)/lib64), 1) - KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 + KOKKOS_LIBDIRS += -L$(CUDA_PATH)/lib64 KOKKOS_CXXLDFLAGS += -L$(CUDA_PATH)/lib64 KOKKOS_TPL_LIBRARY_DIRS += $(CUDA_PATH)/lib64 else ifeq ($(call kokkos_path_exists,$(CUDA_PATH)/lib), 1) - KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib + KOKKOS_LIBDIRS += -L$(CUDA_PATH)/lib KOKKOS_CXXLDFLAGS += -L$(CUDA_PATH)/lib KOKKOS_TPL_LIBRARY_DIRS += $(CUDA_PATH)/lib else @@ -1144,7 +1153,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) ifneq ($(KOKKOS_CMAKE), yes) ifneq ($(QTHREADS_PATH),) KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include - KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib + KOKKOS_LIBDIRS += -L$(QTHREADS_PATH)/lib KOKKOS_CXXLDFLAGS += -L$(QTHREADS_PATH)/lib KOKKOS_TPL_INCLUDE_DIRS += $(QTHREADS_PATH)/include KOKKOS_TPL_LIBRARY_DIRS += $(QTHREADS_PATH)/lib64 @@ -1161,21 +1170,21 @@ ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) KOKKOS_CXXFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --cflags hpx_application_debug) KOKKOS_CXXLDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application_debug) - KOKKOS_LDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application_debug) + KOKKOS_LIBS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application_debug) else KOKKOS_CXXFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --cflags hpx_application) KOKKOS_CXXLDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application) - KOKKOS_LDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application) + KOKKOS_LIBS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application) endif else ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) KOKKOS_CXXFLAGS += $(shell pkg-config --cflags hpx_application_debug) KOKKOS_CXXLDFLAGS += $(shell pkg-config --libs hpx_application_debug) - KOKKOS_LDFLAGS += $(shell pkg-config --libs hpx_application_debug) + KOKKOS_LIBS += $(shell pkg-config --libs hpx_application_debug) else KOKKOS_CXXFLAGS += $(shell pkg-config --cflags hpx_application) KOKKOS_CXXLDFLAGS += $(shell pkg-config --libs hpx_application) - KOKKOS_LDFLAGS += $(shell pkg-config --libs hpx_application) + KOKKOS_LIBS += $(shell pkg-config --libs hpx_application) endif endif KOKKOS_TPL_LIBRARY_NAMES += hpx @@ -1237,3 +1246,12 @@ libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS) ranlib libkokkos.a KOKKOS_LINK_DEPENDS=libkokkos.a + +#we have carefully separated LDFLAGS from LIBS and LIBDIRS +#we have also separated CPPFLAGS from CXXFLAGS +#if this is not cmake, for backwards compatibility +#we just jam everything together into the CXXFLAGS and LDFLAGS +ifneq ($(KOKKOS_CMAKE), yes) + KOKKOS_CXXFLAGS += $(KOKKOS_CPPFLAGS) + KOKKOS_LDFLAGS += $(KOKKOS_LIBDIRS) +endif diff --git a/cmake/KokkosConfig.cmake.in b/cmake/KokkosConfig.cmake.in index 0b4676b6ea0..8389f6f96f5 100644 --- a/cmake/KokkosConfig.cmake.in +++ b/cmake/KokkosConfig.cmake.in @@ -29,8 +29,27 @@ if (${Kokkos_FIND_VERSION_MINOR}) endif() FUNCTION(TARGET_LINK_KOKKOS TARGET) +CMAKE_PARSE_ARGUMENTS( + PARSE + "PRIVATE;PUBLIC;INTERFACE" + "" + "" + ${ARGN} +) FOREACH(LIB ${Kokkos_LIBRARIES}) - TARGET_LINK_LIBRARIES(${TARGET} ${ARGN} ${LIB}) + GET_TARGET_PROPERTY(PROP ${LIB} TYPE) + SET(LINK_TYPE) + IF (${PROP} STREQUAL "INTERFACE_LIBRARY") + #if an interface library, you MUST use link interface + SET(LINK_TYPE "INTERFACE") + ELSEIF(PARSE_PUBLIC) + SET(LINK_TYPE "PUBLIC") + ELSEIF(PARSE_PRIVATE) + SET(LINK_TYPE "PRIVATE") + ELSEIF(PARSE_INTERFACE) + SET(LINK_TYPE "INTERFACE") + ENDIF() + TARGET_LINK_LIBRARIES(${TARGET} ${LINK_TYPE} ${ARGN} ${LIB}) ENDFOREACH() ENDFUNCTION(TARGET_LINK_KOKKOS) diff --git a/cmake/kokkos_settings.cmake b/cmake/kokkos_settings.cmake index 2c622d0de91..52246752ae5 100644 --- a/cmake/kokkos_settings.cmake +++ b/cmake/kokkos_settings.cmake @@ -172,8 +172,9 @@ if (CMAKE_CXX_STANDARD) set(KOKKOS_CXX_STANDARD "gnu++${CMAKE_CXX_STANDARD}") endif() endif() - set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "KOKKOS_CXX_STANDARD=\"${KOKKOS_CXX_STANDARD}\"") endif() +#changed - allow user to directly set KOKKOS_CXX_STANDARD +set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "KOKKOS_CXX_STANDARD=\"${KOKKOS_CXX_STANDARD}\"") # Final form that gets passed to make set(KOKKOS_SETTINGS env ${KOKKOS_SETTINGS}) diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index 7404f8885a4..ed24b810433 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -113,6 +113,33 @@ MACRO(ADD_INTERFACE_LIBRARY LIB_NAME) SET_TARGET_PROPERTIES(${LIB_NAME} PROPERTIES INTERFACE TRUE) ENDMACRO() +MACRO(KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL LIBRARY_NAME) + INSTALL( + TARGETS ${LIBRARY_NAME} + EXPORT ${PROJECT_NAME} + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + COMPONENT ${PACKAGE_NAME} + ) + + INSTALL( + TARGETS ${LIBRARY_NAME} + EXPORT KokkosTargets + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + ) + + #Can we please add a remove duplicates to property append + GET_PROPERTY(LIBLIST GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + LIST(APPEND LIBLIST ${LIBRARY_NAME}) + LIST(REMOVE_DUPLICATES LIBLIST) + SET_PROPERTY(GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES ${LIBLIST}) + GET_PROPERTY(LIBLIST GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + VERIFY_EMPTY(KOKKOS_ADD_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) +ENDMACRO(KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL) + FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) CMAKE_PARSE_ARGUMENTS(PARSE @@ -157,6 +184,10 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) PUBLIC ${KOKKOS_TPL_INCLUDE_DIRS} ) + foreach(FEATURE IN LISTS KOKKOS_CXX_FEATURES) + TARGET_COMPILE_FEATURES(${LIBRARY_NAME} PUBLIC ${FEATURE}) + endforeach() + foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES) if (("${lib}" STREQUAL "cuda") AND (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")) set(LIB_cuda "-lcuda") @@ -175,22 +206,7 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) endif() endforeach() - INSTALL( - TARGETS ${LIBRARY_NAME} - EXPORT ${PROJECT_NAME} - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib - COMPONENT ${PACKAGE_NAME} - ) - - INSTALL( - TARGETS ${LIBRARY_NAME} - EXPORT KokkosTargets - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib - ) + KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${LIBRARY_NAME}) INSTALL( FILES ${PARSE_HEADERS} @@ -198,13 +214,6 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) COMPONENT ${PACKAGE_NAME} ) - #Can we please add a remove duplicates to property append - GET_PROPERTY(LIBLIST GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) - LIST(APPEND LIBLIST ${LIBRARY_NAME}) - LIST(REMOVE_DUPLICATES LIBLIST) - SET_PROPERTY(GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES ${LIBLIST}) - GET_PROPERTY(LIBLIST GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) - VERIFY_EMPTY(KOKKOS_ADD_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) ENDFUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) FUNCTION(KOKKOS_ADD_LIBRARY LIBRARY_NAME) @@ -292,20 +301,24 @@ FUNCTION(KOKKOS_ADD_ADVANCED_TEST) ENDFUNCTION() FUNCTION(KOKKOS_ADD_EXECUTABLE_AND_TEST EXE_NAME) - if (KOKKOS_HAS_TRILINOS) - TRIBITS_ADD_EXECUTABLE_AND_TEST(${EXE_NAME} ${ARGN}) - else() - CMAKE_PARSE_ARGUMENTS(PARSE - "" - "" - "SOURCES" - ${ARGN}) - - KOKKOS_ADD_TEST_EXECUTABLE(${EXE_NAME} SOURCES ${PARSE_SOURCES}) - KOKKOS_ADD_TEST(${EXE_NAME} NAME ${EXE_NAME} - FAIL_REGULAR_EXPRESSION " FAILED " - ${PARSE_UNPARSED_ARGUMENTS}) - endif() +IF (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_EXECUTABLE( + ${EXE_NAME} + TESTONLYLIBS kokkos_gtest ${KOKKOS_TEST_LINK_TARGETS} + ${ARGN} + ) +ELSE() + CMAKE_PARSE_ARGUMENTS(PARSE + "" + "" + "SOURCES" + ${ARGN}) + KOKKOS_ADD_EXECUTABLE(${PACKAGE_NAME}_${EXE_NAME} + SOURCES ${PARSE_SOURCES} + TESTONLYLIBS kokkos_gtest ${KOKKOS_TEST_LINK_TARGETS} + ${PARSE_UNPARSED_ARGUMENTS} + ) +ENDIF() ENDFUNCTION() MACRO(KOKKOS_CREATE_IMPORTED_TPL_LIBRARY TPL_NAME) @@ -403,9 +416,10 @@ ENDMACRO(KOKKOS_PACKAGE_DEF) MACRO(KOKKOS_TARGET_COMPILE_OPTIONS TARGET) if(KOKKOS_HAS_TRILINOS) -COMPILE_OPTIONS(${ARGN}) + ADD_COMPILE_OPTIONS(${ARGN}) + TARGET_COMPILE_OPTIONS(${TARGET} ${ARGN}) else() -TARGET_COMPILE_OPTIONS(${TARGET} ${ARGN}) + TARGET_COMPILE_OPTIONS(${TARGET} ${ARGN}) endif() ENDMACRO() @@ -502,8 +516,11 @@ ENDFUNCTION(KOKKOS_LIB_TYPE) FUNCTION(KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES TARGET) IF(KOKKOS_HAS_TRILINOS) #ignore the target, tribits doesn't do anything directly with targets + TRIBITS_INCLUDE_DIRECTORIES(${KOKKOS_TOP_BUILD_DIR}) TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} + $) ELSEIF(KOKKOS_SEPARATE_LIBS) KOKKOS_LIB_TYPE(${TARGET} INCTYPE) TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} @@ -517,6 +534,7 @@ ELSEIF(KOKKOS_SEPARATE_LIBS) ELSE() #append to a list for later SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}) SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}) + SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES ${KOKKOS_TOP_BUILD_DIR}) ENDIF() ENDFUNCTION(KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES) @@ -524,6 +542,8 @@ ENDFUNCTION(KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES) FUNCTION(KOKKOS_TARGET_INCLUDE_DIRECTORIES TARGET) IF(KOKKOS_HAS_TRILINOS) TRIBITS_INCLUDE_DIRECTORIES(${TARGET} ${ARGN}) + #don't trust tribits to do this correctly + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} ${ARGN}) ELSEIF(TARGET ${TARGET}) #the target actually exists - this means we are doing separate libs #or this a test library @@ -569,19 +589,14 @@ IF (KOKKOS_HAS_TRILINOS) ELSE() IF (KOKKOS_SEPARATE_LIBS) ADD_LIBRARY(${NAME} INTERFACE) - #I hate that property append doesn't have a remove duplicates - GET_PROPERTY(LIBLIST GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) - LIST(APPEND LIBLIST ${NAME}) - LIST(REMOVE_DUPLICATES LIBLIST) - SET_PROPERTY(GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES ${LIBLIST}) - GET_PROPERTY(LIBLIST GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${NAME}) ENDIF() ENDIF() ENDFUNCTION(KOKKOS_ADD_INTERFACE_LIBRARY) FUNCTION(KOKKOS_ADD_TEST_LIBRARY NAME) IF (KOKKOS_HAS_TRILINOS) - TRIBITS_ADD_LIBRARY(${NAME} ${ARGN}) + TRIBITS_ADD_LIBRARY(${NAME} ${ARGN} TESTONLY) ELSE() SET(oneValueArgs) SET(multiValueArgs HEADERS SOURCES) @@ -614,6 +629,8 @@ ENDFUNCTION(KOKKOS_ADD_TEST_LIBRARY) FUNCTION(KOKKOS_TARGET_COMPILE_DEFINITIONS TARGET VISIBILITY) IF(KOKKOS_HAS_TRILINOS) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGTEST_HAS_PTHREAD=0") + #don't trust tribits to do this correctly + TARGET_COMPILE_DEFINITIONS(${TARGET} ${VISIBILITY} ${ARGN}) ELSEIF(TARGET ${TARGET}) #the target actually exists - this means we are doing separate libs #or this a test library diff --git a/core/unit_test/CMakeLists.txt b/core/unit_test/CMakeLists.txt index f2be15fb48a..7ed1f182141 100644 --- a/core/unit_test/CMakeLists.txt +++ b/core/unit_test/CMakeLists.txt @@ -7,6 +7,7 @@ SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest) #need here for tribits KOKKOS_ADD_COMPILE_OPTIONS("-DGTEST_HAS_PTHREAD=0") +KOKKOS_INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR}) KOKKOS_ADD_TEST_LIBRARY( kokkos_gtest HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h From dd85bd509e09854455f35a883896572b124cedf3 Mon Sep 17 00:00:00 2001 From: jjwilke Date: Sat, 20 Apr 2019 12:16:09 -0700 Subject: [PATCH 004/530] tribits fixes, spack-friendly names --- CMakeLists.txt | 6 +++++- cmake/kokkos_options.cmake | 1 + cmake/kokkos_settings.cmake | 2 +- cmake/kokkos_tribits.cmake | 3 +++ 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7518dedfc80..a67d74e943b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,10 +8,15 @@ ENDIF() # Basic initialization (Used in KOKKOS_SETTINGS) set(Kokkos_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(KOKKOS_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR}) set(KOKKOS_PATH ${Kokkos_SOURCE_DIR}) set(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}) +OPTION(KOKKOS_ENABLE_EXPLICIT_INSTANATION "Whether to enable explicit template instantiation" OFF) +OPTION(KOKKOS_ENABLE_ETI "Whether to enable explicit template instantiation" OFF) +OPTION(KOKKOS_ENABLE_Cuda_RDC "Whether to enable relocatable device code for CUDA" OFF) + IF(NOT KOKKOS_HAS_TRILINOS) cmake_minimum_required(VERSION 3.8 FATAL_ERROR) IF(NOT DEFINED ${PROJECT_NAME}) @@ -28,7 +33,6 @@ SET(CXX_STANDARD_TEST) IF (KOKKOS_CXX_STANDARD AND CMAKE_CXX_STANDARD) #make sure these are consistent - message("GOT ${KOKKOS_CXX_STANDARD} AND ${CMAKE_CXX_STANDARD}") IF (${KOKKOS_CXX_STANDARD} STREQUAL "c++11") IF(NOT ${CMAKE_CXX_STANDARD} STREQUAL "11") SET(CXX_STD_ERROR ON) diff --git a/cmake/kokkos_options.cmake b/cmake/kokkos_options.cmake index e730a946645..4875cc6259c 100644 --- a/cmake/kokkos_options.cmake +++ b/cmake/kokkos_options.cmake @@ -22,6 +22,7 @@ list(APPEND KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST LIBRT Cuda_Lambda Cuda_Relocatable_Device_Code + Cuda_RDC Cuda_UVM Cuda_LDG_Intrinsic HPX_ASYNC_DISPATCH diff --git a/cmake/kokkos_settings.cmake b/cmake/kokkos_settings.cmake index 52246752ae5..32a8bf797a8 100644 --- a/cmake/kokkos_settings.cmake +++ b/cmake/kokkos_settings.cmake @@ -74,7 +74,7 @@ endif() if(${KOKKOS_ENABLE_PROFILING_LOAD_PRINT}) list(APPEND KOKKOS_OPTIONSl enable_profile_load_print) endif() -if(${KOKKOS_ENABLE_EXPLICIT_INSTANTIATION}) +if(${KOKKOS_ENABLE_EXPLICIT_INSTANTIATION} OR ${KOKKOS_ENABLE_ETI}) list(APPEND KOKKOS_OPTIONSl enable_eti) endif() # List needs to be comma-delimitted diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index ed24b810433..9f2ec7956bc 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -515,6 +515,7 @@ ENDFUNCTION(KOKKOS_LIB_TYPE) FUNCTION(KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES TARGET) IF(KOKKOS_HAS_TRILINOS) + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) #ignore the target, tribits doesn't do anything directly with targets TRIBITS_INCLUDE_DIRECTORIES(${KOKKOS_TOP_BUILD_DIR}) TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) @@ -541,12 +542,14 @@ ENDFUNCTION(KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES) FUNCTION(KOKKOS_TARGET_INCLUDE_DIRECTORIES TARGET) IF(KOKKOS_HAS_TRILINOS) + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) TRIBITS_INCLUDE_DIRECTORIES(${TARGET} ${ARGN}) #don't trust tribits to do this correctly TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} ${ARGN}) ELSEIF(TARGET ${TARGET}) #the target actually exists - this means we are doing separate libs #or this a test library + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} ${ARGN}) ELSE() GET_PROPERTY(LIBS GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) From a4f0c4aaf8f83f43cb05bbaeaed62efc41361c93 Mon Sep 17 00:00:00 2001 From: jjwilke Date: Sun, 21 Apr 2019 10:22:40 -0700 Subject: [PATCH 005/530] cmake fixes for enable case issues: #2108 --- CMakeLists.txt | 3 -- cmake/kokkos_options.cmake | 25 +++------------ cmake/kokkos_tribits.cmake | 62 +++++++++++++++++++++++++------------- 3 files changed, 46 insertions(+), 44 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a67d74e943b..d985e5f14b3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,9 +13,6 @@ set(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR}) set(KOKKOS_PATH ${Kokkos_SOURCE_DIR}) set(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}) -OPTION(KOKKOS_ENABLE_EXPLICIT_INSTANATION "Whether to enable explicit template instantiation" OFF) -OPTION(KOKKOS_ENABLE_ETI "Whether to enable explicit template instantiation" OFF) -OPTION(KOKKOS_ENABLE_Cuda_RDC "Whether to enable relocatable device code for CUDA" OFF) IF(NOT KOKKOS_HAS_TRILINOS) cmake_minimum_required(VERSION 3.8 FATAL_ERROR) diff --git a/cmake/kokkos_options.cmake b/cmake/kokkos_options.cmake index 4875cc6259c..df0a757015b 100644 --- a/cmake/kokkos_options.cmake +++ b/cmake/kokkos_options.cmake @@ -61,7 +61,12 @@ foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST}) ENDIF() ELSE() SET(KOKKOS_INTERNAL_ENABLE_${OPT}_DEFAULT ${Kokkos_ENABLE_${opt}}) + SET(KOKKOS_ENABLE_${OPT} ${Kokkos_ENABLE_${opt}}) ENDIF() + ELSEIF(DEFINED KOKKOS_ENABLE_${OPT}) + #if we are here, the lower case version is not defined + #define it to avoid breaking anything later on + SET(Kokkos_ENABLE_${opt} ${KOKKOS_ENABLE_${OPT}}) ENDIF() endforeach() @@ -384,26 +389,6 @@ set_kokkos_default_default(HPX_ASYNC_DISPATCH OFF) set(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH ${KOKKOS_INTERNAL_ENABLE_HPX_ASYNC_DISPATCH_DEFAULT} CACHE BOOL "Enable HPX async dispatch.") -#------------------------------------------------------------------------------- -#----------------------- HOST ARCH AND LEGACY TRIBITS -------------------------- -#------------------------------------------------------------------------------- - -# This defines the previous legacy TriBITS builds. -set(KOKKOS_LEGACY_TRIBITS False) -IF ("${KOKKOS_ARCH}" STREQUAL "NOT_SET") - set(KOKKOS_ARCH "None") - IF(KOKKOS_HAS_TRILINOS) - set(KOKKOS_LEGACY_TRIBITS True) - ENDIF() -ENDIF() -IF (KOKKOS_HAS_TRILINOS) - IF (KOKKOS_LEGACY_TRIBITS) - message(STATUS "Using the legacy tribits build because KOKKOS_ARCH not set") - ELSE() - message(STATUS "NOT using the legacy tribits build because KOKKOS_ARCH *is* set") - ENDIF() -ENDIF() - #------------------------------------------------------------------------------- #----------------------- Set CamelCase Options if they are not yet set --------- #------------------------------------------------------------------------------- diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index 9f2ec7956bc..305117fb822 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -5,28 +5,42 @@ cmake_policy(SET CMP0054 NEW) MESSAGE(STATUS "The project name is: ${PROJECT_NAME}") -IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_OpenMP) - SET(${PROJECT_NAME}_ENABLE_OpenMP OFF) -ENDIF() +#Leave this here for now - but only do for tribits +#This breaks the standalone CMake +IF (KOKKOS_HAS_TRILINOS) + IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_OpenMP) + SET(${PROJECT_NAME}_ENABLE_OpenMP OFF) + ENDIF() -IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_HPX) - SET(${PROJECT_NAME}_ENABLE_HPX OFF) -ENDIF() + IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_HPX) + SET(${PROJECT_NAME}_ENABLE_HPX OFF) + ENDIF() -IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_DEBUG) - SET(${PROJECT_NAME}_ENABLE_DEBUG OFF) -ENDIF() + IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_DEBUG) + SET(${PROJECT_NAME}_ENABLE_DEBUG OFF) + ENDIF() -IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_CXX11) - SET(${PROJECT_NAME}_ENABLE_CXX11 ON) -ENDIF() + IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_CXX11) + SET(${PROJECT_NAME}_ENABLE_CXX11 ON) + ENDIF() -IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_TESTS) - SET(${PROJECT_NAME}_ENABLE_TESTS OFF) -ENDIF() + IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_TESTS) + SET(${PROJECT_NAME}_ENABLE_TESTS OFF) + ENDIF() -IF(NOT DEFINED TPL_ENABLE_Pthread) - SET(TPL_ENABLE_Pthread OFF) + IF(NOT DEFINED TPL_ENABLE_Pthread) + SET(TPL_ENABLE_Pthread OFF) + ENDIF() +ELSE() + #Don't do any of these yet - because of case nonsense + #We should not have any enable variables show up in the cache or bad things will happen + #OPTION(Kokkos_ENABLE_EXPLICIT_INSTANATION "Whether to enable explicit template instantiation" OFF) + #OPTION(Kokkos_ENABLE_ETI "Whether to enable explicit template instantiation" OFF) + #OPTION(Kokkos_ENABLE_Cuda_RDC "Whether to enable relocatable device code for CUDA" OFF) + #OPTION(Kokkos_ENABLE_OpenMP "Whether to enable the OpenMP backend" OFF) + #OPTION(Kokkos_ENABLE_HPX "Whether to enable the HPX backend" OFF) + #OPTION(Kokkos_ENABLE_DEBUG "Whether to enable extra debug checks/prints" OFF) + #OPTION(Kokkos_ENABLE_TESTS "Whether to enable ENDIF() FUNCTION(ASSERT_DEFINED VARS) @@ -423,12 +437,9 @@ else() endif() ENDMACRO() + MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT) if (NOT KOKKOS_HAS_TRILINOS) - set(KOKKOS_ENABLE_EXAMPLES OFF CACHE BOOL "Whether to build examples") - set(KOKKOS_ENABLE_TESTS OFF CACHE BOOL "Whether to build tests") - - #------------ COMPILER AND FEATURE CHECKS ------------------------------------ include(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake) set_kokkos_cxx_compiler() @@ -682,3 +693,12 @@ MACRO(KOKKOS_ADD_COMPILE_OPTIONS) ADD_COMPILE_OPTIONS(${ARGN}) ENDMACRO() +MACRO(PRINTALL) +get_cmake_property(_variableNames VARIABLES) +list (SORT _variableNames) +foreach (_variableName ${_variableNames}) + if("${_variableName}" MATCHES "Kokkos" OR "${_variableName}" MATCHES "KOKKOS") + message(STATUS "${_variableName}=${${_variableName}}") + endif() +endforeach() +ENDMACRO(PRINTALL) From bfe4656ae3b1da496e95d255b4cd7ae52a10f037 Mon Sep 17 00:00:00 2001 From: jjwilke Date: Sun, 21 Apr 2019 10:32:38 -0700 Subject: [PATCH 006/530] missing default option: #2108 --- cmake/kokkos_options.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmake/kokkos_options.cmake b/cmake/kokkos_options.cmake index df0a757015b..7d9dc9ac14e 100644 --- a/cmake/kokkos_options.cmake +++ b/cmake/kokkos_options.cmake @@ -314,6 +314,9 @@ set(KOKKOS_ENABLE_DEPRECATED_CODE ${KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE_DEFAU set_kokkos_default_default(EXPLICIT_INSTANTIATION OFF) set(KOKKOS_ENABLE_EXPLICIT_INSTANTIATION ${KOKKOS_INTERNAL_ENABLE_EXPLICIT_INSTANTIATION_DEFAULT} CACHE BOOL "Enable explicit template instantiation.") +set_kokkos_default_default(ETI OFF) +set(KOKKOS_ENABLE_ETI ${KOKKOS_INTERNAL_ENABLE_EXPLICIT_INSTANTIATION_DEFAULT} CACHE BOOL "Enable explicit template instantiation.") + #------------------------------------------------------------------------------- #------------------------------- KOKKOS_USE_TPLS ------------------------------- #------------------------------------------------------------------------------- From 961545f5bbac3feedca2efae91b720685f6c69c8 Mon Sep 17 00:00:00 2001 From: jjwilke Date: Tue, 23 Apr 2019 14:13:51 -0700 Subject: [PATCH 007/530] tribits cleanup, tpl refactoring: #2110, #2098 --- CMakeLists.txt | 1 + algorithms/src/CMakeLists.txt | 4 +- cmake/KokkosConfig.cmake.in | 30 +- cmake/Modules/FindHWLOC.cmake | 14 +- cmake/fake_tribits.cmake | 380 +++++++++++ cmake/kokkos_install.cmake | 1 - cmake/kokkos_options.cmake | 5 + cmake/kokkos_tribits.cmake | 679 ++++++-------------- containers/performance_tests/CMakeLists.txt | 2 +- core/perf_test/CMakeLists.txt | 25 +- core/unit_test/CMakeLists.txt | 3 +- 11 files changed, 617 insertions(+), 527 deletions(-) create mode 100644 cmake/fake_tribits.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index d985e5f14b3..2f4ef5fc931 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -78,6 +78,7 @@ ENDIF() +INCLUDE(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake) INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake) KOKKOS_SETUP_BUILD_ENVIRONMENT() diff --git a/algorithms/src/CMakeLists.txt b/algorithms/src/CMakeLists.txt index c57ee299e7e..77f693de863 100644 --- a/algorithms/src/CMakeLists.txt +++ b/algorithms/src/CMakeLists.txt @@ -2,8 +2,8 @@ KOKKOS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) #I have to leave these here for tribits -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) #----------------------------------------------------------------------------- diff --git a/cmake/KokkosConfig.cmake.in b/cmake/KokkosConfig.cmake.in index 8389f6f96f5..19489e1b88f 100644 --- a/cmake/KokkosConfig.cmake.in +++ b/cmake/KokkosConfig.cmake.in @@ -17,16 +17,20 @@ SET(Kokkos_LIBRARY_DIRS @INSTALL_LIB_DIR@) SET(Kokkos_LIBRARIES @KOKKOS_LIBRARIES@) SET(Kokkos_SEPARATE_LIBS @KOKKOS_SEPARATE_LIBS@) SET(Kokkos_TPL_LIBRARIES @KOKKOS_LIBS@) +SET(Kokkos_ENABLE_Cuda @KOKKOS_ENABLE_CUDA@) +SET(Kokkos_ENABLE_OpenMP @KOKKOS_ENABLE_OPENMP@) +SET(Kokkos_ENABLE_Pthread @KOKKOS_ENABLE_PTHREAD@) +SET(Kokkos_ENABLE_Serial @KOKKOS_ENABLE_SERIAL@) -if (${Kokkos_FIND_VERSION_MINOR}) - if (${Kokkos_FIND_VERSION_MINOR} LESS ${Kokkos_VERSION_MINOR}) +IF (${Kokkos_FIND_VERSION_MINOR}) + IF (${Kokkos_FIND_VERSION_MINOR} LESS ${Kokkos_VERSION_MINOR}) FOREACH(LIB ${Kokkos_LIBRARIES}) set_property(TARGET ${LIB} APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS "KOKKOS_ENABLE_DEPRECATED_CODE" ) ENDFOREACH() - endif() -endif() + ENDIF() +ENDIF() FUNCTION(TARGET_LINK_KOKKOS TARGET) CMAKE_PARSE_ARGUMENTS( @@ -53,3 +57,21 @@ FOREACH(LIB ${Kokkos_LIBRARIES}) ENDFOREACH() ENDFUNCTION(TARGET_LINK_KOKKOS) +#Find dependencies +INCLUDE(CMakeFindDependencyMacro) +SET(FOUND_HPX_DIR @HPX_DIR@) +SET(FOUND_HPX_ROOT @HPX_ROOT@) +#figure out if we found HPX using root or dir +#set the search path accordingly, unless it got overriden +IF (NOT DEFINED HPX_ROOT AND NOT DEFINED HPX_DIR) + IF (FOUND_HPX_DIR) + SET(HPX_DIR ${FOUND_HPX_DIR}) + ELSEIF(FOUND_HPX_ROOT) + SET(HPX_ROOT ${FOUND_HPX_ROOT}) + ENDIF() +ENDIF() +SET(KOKKOS_ENABLE_HPX @KOKKOS_ENABLE_HPX@) +IF (KOKKOS_ENABLE_HPX) + FIND_DEPENDENCY(HPX) +ENDIF() + diff --git a/cmake/Modules/FindHWLOC.cmake b/cmake/Modules/FindHWLOC.cmake index 60df8084d80..13c9224e439 100644 --- a/cmake/Modules/FindHWLOC.cmake +++ b/cmake/Modules/FindHWLOC.cmake @@ -10,11 +10,21 @@ # HWLOC_INCLUDE_DIR - HWLOC include directory # HWLOC_LIBRARIES - Libraries needed to use HWLOC -find_path(HWLOC_INCLUDE_DIR hwloc.h PATHS "${KOKKOS_HWLOC_DIR}/include") -find_library(HWLOC_LIBRARIES hwloc PATHS "${KOKKOS_HWLOC_DIR}/lib") +find_path(HWLOC_INCLUDE_DIR hwloc.h PATHS "${KOKKOS_HWLOC_DIR}/include" ${hwloc_ROOT}/include) +find_library(HWLOC_LIBRARIES hwloc PATHS "${KOKKOS_HWLOC_DIR}/lib" ${hwloc_ROOT}/lib) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(HWLOC DEFAULT_MSG HWLOC_INCLUDE_DIR HWLOC_LIBRARIES) +add_library(hwloc UNKNOWN IMPORTED) + +set_target_properties(hwloc PROPERTIES + INTERFACE_COMPILE_FEATURES "" + INTERFACE_COMPILE_OPTIONS "" + INTERFACE_INCLUDE_DIRECTORIES "${HWLOC_INCLUDE_DIR}" + INTERFACE_LINK_LIBRARIES "${HWLOC_LIBRARIES}" +) + mark_as_advanced(HWLOC_INCLUDE_DIR HWLOC_LIBRARIES) + diff --git a/cmake/fake_tribits.cmake b/cmake/fake_tribits.cmake new file mode 100644 index 00000000000..53f49e5ad85 --- /dev/null +++ b/cmake/fake_tribits.cmake @@ -0,0 +1,380 @@ +#These are tribits wrappers used by all projects in the Kokkos ecosystem + +INCLUDE(CMakeParseArguments) +INCLUDE(CTest) + +cmake_policy(SET CMP0054 NEW) + +FUNCTION(ASSERT_DEFINED VARS) + FOREACH(VAR ${VARS}) + IF(NOT DEFINED ${VAR}) + MESSAGE(SEND_ERROR "Error, the variable ${VAR} is not defined!") + ENDIF() + ENDFOREACH() +ENDFUNCTION() + +MACRO(KOKKOS_ADD_OPTION_AND_DEFINE USER_OPTION_NAME MACRO_DEFINE_NAME DOCSTRING DEFAULT_VALUE ) +SET( ${USER_OPTION_NAME} "${DEFAULT_VALUE}" CACHE BOOL "${DOCSTRING}" ) +IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "") + IF(${USER_OPTION_NAME}) + GLOBAL_SET(${MACRO_DEFINE_NAME} ON) + ELSE() + GLOBAL_SET(${MACRO_DEFINE_NAME} OFF) + ENDIF() +ENDIF() +ENDMACRO() + +if (NOT KOKKOS_HAS_TRILINOS) +MACRO(GLOBAL_SET VARNAME) + SET(${VARNAME} ${ARGN} CACHE INTERNAL "") +ENDMACRO() + +FUNCTION(VERIFY_EMPTY CONTEXT) +if(${ARGN}) +MESSAGE(FATAL_ERROR "Kokkos does not support all of Tribits. Unhandled arguments in ${CONTEXT}:\n${ARGN}") +endif() +ENDFUNCTION() + +MACRO(PREPEND_GLOBAL_SET VARNAME) + ASSERT_DEFINED(${VARNAME}) + GLOBAL_SET(${VARNAME} ${ARGN} ${${VARNAME}}) +ENDMACRO() + +MACRO(PREPEND_TARGET_SET VARNAME TARGET_NAME TYPE) + IF(TYPE STREQUAL "REQUIRED") + SET(REQUIRED TRUE) + ELSE() + SET(REQUIRED FALSE) + ENDIF() + IF(TARGET ${TARGET_NAME}) + PREPEND_GLOBAL_SET(${VARNAME} ${TARGET_NAME}) + ELSE() + IF(REQUIRED) + MESSAGE(FATAL_ERROR "Missing dependency ${TARGET_NAME}") + ENDIF() + ENDIF() +ENDMACRO() +endif() + + +FUNCTION(KOKKOS_CONFIGURE_FILE PACKAGE_NAME_CONFIG_FILE) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME_CONFIG_FILE}) + else() + # Configure the file + CONFIGURE_FILE( + ${PACKAGE_SOURCE_DIR}/cmake/${PACKAGE_NAME_CONFIG_FILE}.in + ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME_CONFIG_FILE} + ) + endif() +ENDFUNCTION() + +MACRO(KOKKOS_ADD_TEST_DIRECTORIES) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_TEST_DIRECTORIES(${ARGN}) + else() + IF(${${PROJECT_NAME}_ENABLE_TESTS}) + FOREACH(TEST_DIR ${ARGN}) + ADD_SUBDIRECTORY(${TEST_DIR}) + ENDFOREACH() + ENDIF() + endif() +ENDMACRO() + +MACRO(KOKKOS_ADD_EXAMPLE_DIRECTORIES) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_EXAMPLE_DIRECTORIES(${ARGN}) + else() + IF(${PACKAGE_NAME}_ENABLE_EXAMPLES OR ${PARENT_PACKAGE_NAME}_ENABLE_EXAMPLES) + FOREACH(EXAMPLE_DIR ${ARGN}) + ADD_SUBDIRECTORY(${EXAMPLE_DIR}) + ENDFOREACH() + ENDIF() + endif() +ENDMACRO() + + +MACRO(ADD_INTERFACE_LIBRARY LIB_NAME) + FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp "") + ADD_LIBRARY(${LIB_NAME} STATIC ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp) + SET_TARGET_PROPERTIES(${LIB_NAME} PROPERTIES INTERFACE TRUE) +ENDMACRO() + +IF(NOT TARGET check) + ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR}) +ENDIF() + +FUNCTION(KOKKOS_ADD_TEST) + if (KOKKOS_HAS_TRILINOS) + CMAKE_PARSE_ARGUMENTS(TEST + "" + "EXE;NAME" + "" + ${ARGN}) + IF(TEST_EXE) + SET(EXE_ROOT ${TEST_EXE}) + ELSE() + SET(EXE_ROOT ${TEST_NAME}) + ENDIF() + + TRIBITS_ADD_TEST( + ${EXE_ROOT} + NAME ${TEST_NAME} + ${ARGN} + COMM serial mpi + NUM_MPI_PROCS 1 + ${TEST_UNPARSED_ARGUMENTS} + ) + else() + CMAKE_PARSE_ARGUMENTS(TEST + "WILL_FAIL" + "FAIL_REGULAR_EXPRESSION;PASS_REGULAR_EXPRESSION;EXE;NAME" + "CATEGORIES" + ${ARGN}) + IF(TEST_EXE) + SET(EXE ${TEST_EXE}) + ELSE() + SET(EXE ${TEST_NAME}) + ENDIF() + IF(WIN32) + ADD_TEST(NAME ${TEST_NAME} WORKING_DIRECTORY ${LIBRARY_OUTPUT_PATH} COMMAND ${EXE}${CMAKE_EXECUTABLE_SUFFIX}) + ELSE() + ADD_TEST(NAME ${TEST_NAME} COMMAND ${EXE}) + ENDIF() + IF(TEST_WILL_FAIL) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES WILL_FAIL ${TEST_WILL_FAIL}) + ENDIF() + IF(TEST_FAIL_REGULAR_EXPRESSION) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES FAIL_REGULAR_EXPRESSION ${TEST_FAIL_REGULAR_EXPRESSION}) + ENDIF() + IF(TEST_PASS_REGULAR_EXPRESSION) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES PASS_REGULAR_EXPRESSION ${TEST_PASS_REGULAR_EXPRESSION}) + ENDIF() + VERIFY_EMPTY(KOKKOS_ADD_TEST ${TEST_UNPARSED_ARGUMENTS}) + endif() +ENDFUNCTION() + +FUNCTION(KOKKOS_ADD_ADVANCED_TEST) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_ADVANCED_TEST(${ARGN}) + else() + # TODO Write this + endif() +ENDFUNCTION() + +MACRO(KOKKOS_CREATE_IMPORTED_TPL_LIBRARY TPL_NAME) + ADD_INTERFACE_LIBRARY(TPL_LIB_${TPL_NAME}) + TARGET_LINK_LIBRARIES(TPL_LIB_${TPL_NAME} LINK_PUBLIC ${TPL_${TPL_NAME}_LIBRARIES}) + TARGET_INCLUDE_DIRECTORIES(TPL_LIB_${TPL_NAME} INTERFACE ${TPL_${TPL_NAME}_INCLUDE_DIRS}) +ENDMACRO() + +FUNCTION(KOKKOS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES TPL_NAME) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES(${TPL_NAME} ${ARGN}) + else() + CMAKE_PARSE_ARGUMENTS(PARSE + "" + "" + "REQUIRED_HEADERS;REQUIRED_LIBS_NAMES" + ${ARGN}) + + SET(_${TPL_NAME}_ENABLE_SUCCESS TRUE) + IF (PARSE_REQUIRED_LIBS_NAMES) + FIND_LIBRARY(TPL_${TPL_NAME}_LIBRARIES NAMES ${PARSE_REQUIRED_LIBS_NAMES}) + IF(NOT TPL_${TPL_NAME}_LIBRARIES) + SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE) + ENDIF() + ENDIF() + IF (PARSE_REQUIRED_HEADERS) + FIND_PATH(TPL_${TPL_NAME}_INCLUDE_DIRS NAMES ${PARSE_REQUIRED_HEADERS}) + IF(NOT TPL_${TPL_NAME}_INCLUDE_DIRS) + SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE) + ENDIF() + ENDIF() + IF (_${TPL_NAME}_ENABLE_SUCCESS) + KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(${TPL_NAME}) + ENDIF() + VERIFY_EMPTY(KOKKOS_CREATE_IMPORTED_TPL_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) + endif() +ENDFUNCTION() + +MACRO(KOKKOS_TARGET_COMPILE_OPTIONS TARGET) +if(KOKKOS_HAS_TRILINOS) + ADD_COMPILE_OPTIONS(${ARGN}) + #here everything gets prefixed with package name + TARGET_COMPILE_OPTIONS(${PACKAGE_NAME}_${TARGET} ${ARGN}) +else() + TARGET_COMPILE_OPTIONS(${TARGET} ${ARGN}) +endif() +ENDMACRO() + + +MACRO(KOKKOS_EXCLUDE_AUTOTOOLS_FILES) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_EXCLUDE_AUTOTOOLS_FILES() + else() + #do nothing + endif() +ENDMACRO(KOKKOS_EXCLUDE_AUTOTOOLS_FILES) + +FUNCTION(KOKKOS_LIB_TYPE LIB RET) +GET_TARGET_PROPERTY(PROP ${LIB} TYPE) +IF (${PROP} STREQUAL "INTERFACE_LIBRARY") + SET(${RET} "INTERFACE" PARENT_SCOPE) +ELSE() + SET(${RET} "PUBLIC" PARENT_SCOPE) +ENDIF() +ENDFUNCTION(KOKKOS_LIB_TYPE) + + +FUNCTION(KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES TARGET) +IF(KOKKOS_HAS_TRILINOS) + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + #ignore the target, tribits doesn't do anything directly with targets + TRIBITS_INCLUDE_DIRECTORIES(${KOKKOS_TOP_BUILD_DIR}) + TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) + TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} + $) +ELSEIF(KOKKOS_SEPARATE_LIBS) + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} + $) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} + $) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} + $) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} + $) +ELSE() #append to a list for later + SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}) + SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}) + SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES ${KOKKOS_TOP_BUILD_DIR}) +ENDIF() +ENDFUNCTION(KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES) + + +FUNCTION(KOKKOS_TARGET_INCLUDE_DIRECTORIES TARGET) +IF(KOKKOS_HAS_TRILINOS) + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + #don't trust tribits to do this correctly - but need to add package name + TARGET_INCLUDE_DIRECTORIES(${PACKAGE_NAME}_${TARGET} ${INCTYPE} ${ARGN}) +ELSEIF(TARGET ${TARGET}) + #the target actually exists - this means we are doing separate libs + #or this a test library + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} ${ARGN}) +ELSE() + GET_PROPERTY(LIBS GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + IF (${TARGET} IN_LIST LIBS) + SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_LIBRARY_INCLUDES ${ARGN}) + ELSE() + MESSAGE(FATAL_ERROR "Trying to set include directories on unknown target ${TARGET}") + ENDIF() +ENDIF() +ENDFUNCTION(KOKKOS_TARGET_INCLUDE_DIRECTORIES TARGET) + +FUNCTION(KOKKOS_LINK_INTERNAL_LIBRARY TARGET DEPLIB) +IF(KOKKOS_HAS_TRILINOS) + #do nothing +ELSEIF(KOKKOS_SEPARATE_LIBS) + SET(options INTERFACE) + SET(oneValueArgs) + SET(multiValueArgs) + CMAKE_PARSE_ARGUMENTS(PARSE + "INTERFACE" + "" + "" + ${ARGN}) + SET(LINK_TYPE) + IF(PARSE_INTERFACE) + SET(LINK_TYPE INTERFACE) + ELSE() + SET(LINK_TYPE PUBLIC) + ENDIF() + TARGET_LINK_LIBRARIES(${TARGET} ${LINK_TYPE} ${DEPLIB}) + VERIFY_EMPTY(KOKKOS_LINK_INTERNAL_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) + ELSE() + #only a single lib - so nothing to do here +ENDIF() +ENDFUNCTION(KOKKOS_LINK_INTERNAL_LIBRARY) + +FUNCTION(KOKKOS_ADD_TEST_LIBRARY NAME) +IF (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_LIBRARY(${NAME} ${ARGN} TESTONLY) +ELSE() + SET(oneValueArgs) + SET(multiValueArgs HEADERS SOURCES) + + CMAKE_PARSE_ARGUMENTS(PARSE + "STATIC;SHARED" + "" + "HEADERS;SOURCES" + ${ARGN}) + + IF(PARSE_HEADERS) + LIST(REMOVE_DUPLICATES PARSE_HEADERS) + ENDIF() + IF(PARSE_SOURCES) + LIST(REMOVE_DUPLICATES PARSE_SOURCES) + ENDIF() + ADD_LIBRARY(${NAME} ${PARSE_SOURCES}) + target_compile_options( + ${NAME} + PUBLIC $<$:${KOKKOS_CXX_FLAGS}> + ) + target_link_libraries( + ${NAME} + PUBLIC ${KOKKOS_LD_FLAGS} + ) +ENDIF() +ENDFUNCTION(KOKKOS_ADD_TEST_LIBRARY) + + +FUNCTION(KOKKOS_TARGET_COMPILE_DEFINITIONS TARGET VISIBILITY) +IF(KOKKOS_HAS_TRILINOS) + #don't trust tribits to do this correctly + TARGET_COMPILE_DEFINITIONS(${PACKAGE_NAME}_${TARGET} ${VISIBILITY} ${ARGN}) +ELSEIF(TARGET ${TARGET}) + #the target actually exists - this means we are doing separate libs + #or this a test library + TARGET_COMPILE_DEFINITIONS(${TARGET} ${VISIBILITY} ${ARGN}) +ELSE() + GET_PROPERTY(LIBS GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + IF (${TARGET} IN_LIST LIBS) + SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_LIBRARY_DEFINITIONS ${ARGN}) + ELSE() + MESSAGE(FATAL_ERROR "Trying to set compile definitions on unknown target ${TARGET}") + ENDIF() +ENDIF() +ENDFUNCTION(KOKKOS_TARGET_COMPILE_DEFINITIONS) + +FUNCTION(KOKKOS_INCLUDE_DIRECTORIES) +IF(KOKKOS_HAS_TRILINOS) + TRIBITS_INCLUDE_DIRECTORIES(${ARGN}) +ELSE() + CMAKE_PARSE_ARGUMENTS( + INC + "REQUIRED_DURING_INSTALLATION_TESTING" + "" + "" + ${ARGN} + ) + INCLUDE_DIRECTORIES(${INC_UNPARSED_ARGUMENTS}) +ENDIF() +ENDFUNCTION(KOKKOS_INCLUDE_DIRECTORIES) + + +MACRO(KOKKOS_ADD_COMPILE_OPTIONS) +ADD_COMPILE_OPTIONS(${ARGN}) +ENDMACRO() + +MACRO(PRINTALL) +get_cmake_property(_variableNames VARIABLES) +list (SORT _variableNames) +foreach (_variableName ${_variableNames}) + if("${_variableName}" MATCHES "Kokkos" OR "${_variableName}" MATCHES "KOKKOS") + message(STATUS "${_variableName}=${${_variableName}}") + endif() +endforeach() +ENDMACRO(PRINTALL) diff --git a/cmake/kokkos_install.cmake b/cmake/kokkos_install.cmake index 8dccdd59ae1..326d065c7b8 100644 --- a/cmake/kokkos_install.cmake +++ b/cmake/kokkos_install.cmake @@ -62,7 +62,6 @@ FOREACH(DIR ${INSTALL_CMAKE_DIR}) "${Kokkos_BINARY_DIR}/KokkosConfigVersion.cmake" DESTINATION ${DIR}) - #This seems not to do anything? # Install the export set for use with the install-tree INSTALL(EXPORT KokkosTargets DESTINATION ${DIR}) ENDFOREACH() diff --git a/cmake/kokkos_options.cmake b/cmake/kokkos_options.cmake index 7d9dc9ac14e..393a594c79d 100644 --- a/cmake/kokkos_options.cmake +++ b/cmake/kokkos_options.cmake @@ -379,6 +379,11 @@ set(KOKKOS_ENABLE_CUDA_UVM ${KOKKOS_INTERNAL_ENABLE_CUDA_UVM_DEFAULT} CACHE BOOL set(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE ${KOKKOS_INTERNAL_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE_DEFAULT} CACHE BOOL "Enable relocatable device code for CUDA. (cuda option)") set(KOKKOS_ENABLE_CUDA_LAMBDA ${KOKKOS_INTERNAL_ENABLE_CUDA_LAMBDA_DEFAULT} CACHE BOOL "Enable lambdas for CUDA. (cuda option)") +# Make sure KOKKOS_ARCH is set to something +IF ("${KOKKOS_ARCH}" STREQUAL "NOT_SET") + set(KOKKOS_ARCH "None") +ENDIF() + #------------------------------------------------------------------------------- #------------------------------- KOKKOS_HPX_OPTIONS ---------------------------- diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index 305117fb822..5497dfc6370 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -1,8 +1,8 @@ +#These are tribits wrappers only ever called by Kokkos itself + INCLUDE(CMakeParseArguments) INCLUDE(CTest) -cmake_policy(SET CMP0054 NEW) - MESSAGE(STATUS "The project name is: ${PROJECT_NAME}") #Leave this here for now - but only do for tribits @@ -43,89 +43,62 @@ ELSE() #OPTION(Kokkos_ENABLE_TESTS "Whether to enable ENDIF() -FUNCTION(ASSERT_DEFINED VARS) - FOREACH(VAR ${VARS}) - IF(NOT DEFINED ${VAR}) - MESSAGE(SEND_ERROR "Error, the variable ${VAR} is not defined!") - ENDIF() - ENDFOREACH() -ENDFUNCTION() - -if (NOT KOKKOS_HAS_TRILINOS) -MACRO(GLOBAL_SET VARNAME) - SET(${VARNAME} ${ARGN} CACHE INTERNAL "") -ENDMACRO() - -FUNCTION(VERIFY_EMPTY CONTEXT) -if(${ARGN}) -MESSAGE(FATAL_ERROR "Kokkos does not support all of Tribits. Unhandled arguments in ${CONTEXT}:\n${ARGN}") -endif() -ENDFUNCTION() - -MACRO(PREPEND_GLOBAL_SET VARNAME) - ASSERT_DEFINED(${VARNAME}) - GLOBAL_SET(${VARNAME} ${ARGN} ${${VARNAME}}) -ENDMACRO() +MACRO(KOKKOS_SUBPACKAGE NAME) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_SUBPACKAGE(${NAME}) + else() + SET(PACKAGE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + SET(PARENT_PACKAGE_NAME ${PACKAGE_NAME}) + SET(PACKAGE_NAME ${PACKAGE_NAME}${NAME}) + STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) + SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + #ADD_INTERFACE_LIBRARY(PACKAGE_${PACKAGE_NAME}) + #GLOBAL_SET(${PACKAGE_NAME}_LIBS "") + endif() +ENDMACRO(KOKKOS_SUBPACKAGE) -MACRO(PREPEND_TARGET_SET VARNAME TARGET_NAME TYPE) - IF(TYPE STREQUAL "REQUIRED") - SET(REQUIRED TRUE) - ELSE() - SET(REQUIRED FALSE) - ENDIF() - IF(TARGET ${TARGET_NAME}) - PREPEND_GLOBAL_SET(${VARNAME} ${TARGET_NAME}) - ELSE() - IF(REQUIRED) - MESSAGE(FATAL_ERROR "Missing dependency ${TARGET_NAME}") - ENDIF() - ENDIF() -ENDMACRO() -endif() +MACRO(KOKKOS_SUBPACKAGE_POSTPROCESS) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_SUBPACKAGE_POSTPROCESS() + endif() +ENDMACRO(KOKKOS_SUBPACKAGE_POSTPROCESS) +MACRO(KOKKOS_PACKAGE_DECL) -FUNCTION(KOKKOS_CONFIGURE_FILE PACKAGE_NAME_CONFIG_FILE) if (KOKKOS_HAS_TRILINOS) - TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME_CONFIG_FILE}) + TRIBITS_PACKAGE_DECL(Kokkos) else() - # Configure the file - CONFIGURE_FILE( - ${PACKAGE_SOURCE_DIR}/cmake/${PACKAGE_NAME_CONFIG_FILE}.in - ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME_CONFIG_FILE} - ) + SET(PACKAGE_NAME Kokkos) + SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) endif() -ENDFUNCTION() -MACRO(KOKKOS_ADD_TEST_DIRECTORIES) + #SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps") + #FILE(GLOB TPLS_FILES "${TRIBITS_DEPS_DIR}/*.cmake") + #FOREACH(TPL_FILE ${TPLS_FILES}) + # TRIBITS_PROCESS_TPL_DEP_FILE(${TPL_FILE}) + #ENDFOREACH() + +ENDMACRO() + + +MACRO(KOKKOS_PROCESS_SUBPACKAGES) if (KOKKOS_HAS_TRILINOS) - TRIBITS_ADD_TEST_DIRECTORIES(${ARGN}) + TRIBITS_PROCESS_SUBPACKAGES() else() - IF(${${PROJECT_NAME}_ENABLE_TESTS}) - FOREACH(TEST_DIR ${ARGN}) - ADD_SUBDIRECTORY(${TEST_DIR}) - ENDFOREACH() - ENDIF() + ADD_SUBDIRECTORY(core) + ADD_SUBDIRECTORY(containers) + ADD_SUBDIRECTORY(algorithms) endif() -ENDMACRO() +ENDMACRO(KOKKOS_PROCESS_SUBPACKAGES) -MACRO(KOKKOS_ADD_EXAMPLE_DIRECTORIES) +MACRO(KOKKOS_PACKAGE_DEF) if (KOKKOS_HAS_TRILINOS) - TRIBITS_ADD_EXAMPLE_DIRECTORIES(${ARGN}) + TRIBITS_PACKAGE_DEF() else() - IF(${PACKAGE_NAME}_ENABLE_EXAMPLES OR ${PARENT_PACKAGE_NAME}_ENABLE_EXAMPLES) - FOREACH(EXAMPLE_DIR ${ARGN}) - ADD_SUBDIRECTORY(${EXAMPLE_DIR}) - ENDFOREACH() - ENDIF() + #do nothing endif() -ENDMACRO() - - -MACRO(ADD_INTERFACE_LIBRARY LIB_NAME) - FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp "") - ADD_LIBRARY(${LIB_NAME} STATIC ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp) - SET_TARGET_PROPERTIES(${LIB_NAME} PROPERTIES INTERFACE TRUE) -ENDMACRO() +ENDMACRO(KOKKOS_PACKAGE_DEF) MACRO(KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL LIBRARY_NAME) INSTALL( @@ -154,100 +127,6 @@ MACRO(KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL LIBRARY_NAME) VERIFY_EMPTY(KOKKOS_ADD_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) ENDMACRO(KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL) - -FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) - CMAKE_PARSE_ARGUMENTS(PARSE - "STATIC;SHARED" - "" - "HEADERS;SOURCES" - ${ARGN}) - - IF(PARSE_HEADERS) - LIST(REMOVE_DUPLICATES PARSE_HEADERS) - ENDIF() - IF(PARSE_SOURCES) - LIST(REMOVE_DUPLICATES PARSE_SOURCES) - ENDIF() - - ADD_LIBRARY( - ${LIBRARY_NAME} - ${PARSE_HEADERS} - ${PARSE_SOURCES} - ) - - TARGET_COMPILE_OPTIONS( - ${LIBRARY_NAME} - PUBLIC $<$:${KOKKOS_CXX_FLAGS}> - ) - - if(${CMAKE_VERSION} VERSION_GREATER "3.13" OR ${CMAKE_VERSION} VERSION_EQUAL "3.13") - TARGET_LINK_OPTIONS( - ${LIBRARY_NAME} - PUBLIC ${KOKKOS_LD_FLAGS} - ) - else() - #well, this is annoying - I am going to need to hack this for Visual Studio - TARGET_LINK_LIBRARIES( - ${LIBRARY_NAME} PUBLIC ${KOKKOS_LD_FLAGS} - ) - endif() - - - TARGET_INCLUDE_DIRECTORIES( - ${LIBRARY_NAME} - PUBLIC ${KOKKOS_TPL_INCLUDE_DIRS} - ) - - foreach(FEATURE IN LISTS KOKKOS_CXX_FEATURES) - TARGET_COMPILE_FEATURES(${LIBRARY_NAME} PUBLIC ${FEATURE}) - endforeach() - - foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES) - if (("${lib}" STREQUAL "cuda") AND (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")) - set(LIB_cuda "-lcuda") - target_link_libraries(${LIBRARY_NAME} PUBLIC cuda) - elseif ("${lib}" STREQUAL "hpx") - find_package(HPX REQUIRED) - if(${HPX_FOUND}) - target_link_libraries(${LIBRARY_NAME} PUBLIC ${HPX_LIBRARIES}) - target_include_directories(${LIBRARY_NAME} PUBLIC ${HPX_INCLUDE_DIRS}) - else() - message(ERROR "HPX not found. Check the value of HPX_DIR (= ${HPX_DIR}) or CMAKE_PREFIX_PATH (= ${CMAKE_PREFIX_PATH}).") - endif() - else() - find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS}) - target_link_libraries(${LIBRARY_NAME} PUBLIC ${LIB_${lib}}) - endif() - endforeach() - - KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${LIBRARY_NAME}) - - INSTALL( - FILES ${PARSE_HEADERS} - DESTINATION include - COMPONENT ${PACKAGE_NAME} - ) - -ENDFUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) - -FUNCTION(KOKKOS_ADD_LIBRARY LIBRARY_NAME) - if (KOKKOS_HAS_TRILINOS) - TRIBITS_ADD_LIBRARY(${LIBRARY_NAME} ${ARGN}) - elseif(KOKKOS_SEPARATE_LIBS) - KOKKOS_INTERNAL_ADD_LIBRARY( - ${LIBRARY_NAME} ${ARGN}) - else() - CMAKE_PARSE_ARGUMENTS(PARSE - "" - "" - "SOURCES;HEADERS" - ${ARGN}) - #just append the headers and sources to the list - SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_ALL_SOURCES ${PARSE_SOURCES}) - GET_PROPERTY(SRCS GLOBAL PROPERTY KOKKOS_ALL_SOURCES) - endif() -ENDFUNCTION() - FUNCTION(KOKKOS_ADD_EXECUTABLE EXE_NAME) if (KOKKOS_HAS_TRILINOS) TRIBITS_ADD_EXECUTABLE(${EXE_NAME} ${ARGN}) @@ -274,170 +153,35 @@ IF(NOT TARGET check) ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR}) ENDIF() -FUNCTION(KOKKOS_ADD_TEST EXE_NAME) - if (KOKKOS_HAS_TRILINOS) - TRIBITS_ADD_TEST(${EXE_NAME} - ${ARGN} - COMM serial mpi - NUM_MPI_PROCS 1 - ) - else() - CMAKE_PARSE_ARGUMENTS(TEST - "WILL_FAIL" - "NAME;FAIL_REGULAR_EXPRESSION;PASS_REGULAR_EXPRESSION" - "CATEGORIES" - ${ARGN}) - SET(EXE ${PACKAGE_NAME}_${EXE_NAME}) - IF(WIN32) - ADD_TEST(NAME ${TEST_NAME} WORKING_DIRECTORY ${LIBRARY_OUTPUT_PATH} COMMAND ${EXE}${CMAKE_EXECUTABLE_SUFFIX}) - ELSE() - ADD_TEST(NAME ${TEST_NAME} COMMAND ${EXE}) - ENDIF() - IF(TEST_WILL_FAIL) - SET_TESTS_PROPERTIES(${NAME} PROPERTIES WILL_FAIL ${TEST_WILL_FAIL}) - ENDIF() - IF(TEST_FAIL_REGULAR_EXPRESSION) - SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES FAIL_REGULAR_EXPRESSION ${TEST_FAIL_REGULAR_EXPRESSION}) - ENDIF() - IF(TEST_PASS_REGULAR_EXPRESSION) - SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES PASS_REGULAR_EXPRESSION ${TEST_PASS_REGULAR_EXPRESSION}) - ENDIF() - VERIFY_EMPTY(KOKKOS_ADD_TEST ${TEST_UNPARSED_ARGUMENTS}) - endif() -ENDFUNCTION() - -FUNCTION(KOKKOS_ADD_ADVANCED_TEST) - if (KOKKOS_HAS_TRILINOS) - TRIBITS_ADD_ADVANCED_TEST(${ARGN}) - else() - # TODO Write this - endif() -ENDFUNCTION() -FUNCTION(KOKKOS_ADD_EXECUTABLE_AND_TEST EXE_NAME) +FUNCTION(KOKKOS_ADD_EXECUTABLE_AND_TEST ROOT_NAME) IF (KOKKOS_HAS_TRILINOS) - TRIBITS_ADD_EXECUTABLE( - ${EXE_NAME} - TESTONLYLIBS kokkos_gtest ${KOKKOS_TEST_LINK_TARGETS} + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ROOT_NAME} + TESTONLYLIBS kokkos_gtest ${ARGN} + NUM_MPI_PROCS 1 + COMM serial mpi + FAIL_REGULAR_EXPRESSION " FAILED " ) ELSE() CMAKE_PARSE_ARGUMENTS(PARSE "" "" - "SOURCES" + "SOURCES;CATEGORIES" ${ARGN}) - KOKKOS_ADD_EXECUTABLE(${PACKAGE_NAME}_${EXE_NAME} + VERIFY_EMPTY(KOKKOS_ADD_EXECUTABLE_AND_TEST ${PARSE_UNPARSED_ARGUMENTS}) + SET(EXE_NAME ${PACKAGE_NAME}_${ROOT_NAME}) + KOKKOS_ADD_TEST_EXECUTABLE(${EXE_NAME} SOURCES ${PARSE_SOURCES} - TESTONLYLIBS kokkos_gtest ${KOKKOS_TEST_LINK_TARGETS} - ${PARSE_UNPARSED_ARGUMENTS} + ) + KOKKOS_ADD_TEST(NAME ${ROOT_NAME} + EXE ${EXE_NAME} + FAIL_REGULAR_EXPRESSION " FAILED " ) ENDIF() ENDFUNCTION() -MACRO(KOKKOS_CREATE_IMPORTED_TPL_LIBRARY TPL_NAME) - ADD_INTERFACE_LIBRARY(TPL_LIB_${TPL_NAME}) - TARGET_LINK_LIBRARIES(TPL_LIB_${TPL_NAME} LINK_PUBLIC ${TPL_${TPL_NAME}_LIBRARIES}) - TARGET_INCLUDE_DIRECTORIES(TPL_LIB_${TPL_NAME} INTERFACE ${TPL_${TPL_NAME}_INCLUDE_DIRS}) -ENDMACRO() - -FUNCTION(KOKKOS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES TPL_NAME) - if (KOKKOS_HAS_TRILINOS) - TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES(${TPL_NAME} ${ARGN}) - else() - CMAKE_PARSE_ARGUMENTS(PARSE - "" - "" - "REQUIRED_HEADERS;REQUIRED_LIBS_NAMES" - ${ARGN}) - - SET(_${TPL_NAME}_ENABLE_SUCCESS TRUE) - IF (PARSE_REQUIRED_LIBS_NAMES) - FIND_LIBRARY(TPL_${TPL_NAME}_LIBRARIES NAMES ${PARSE_REQUIRED_LIBS_NAMES}) - IF(NOT TPL_${TPL_NAME}_LIBRARIES) - SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE) - ENDIF() - ENDIF() - IF (PARSE_REQUIRED_HEADERS) - FIND_PATH(TPL_${TPL_NAME}_INCLUDE_DIRS NAMES ${PARSE_REQUIRED_HEADERS}) - IF(NOT TPL_${TPL_NAME}_INCLUDE_DIRS) - SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE) - ENDIF() - ENDIF() - IF (_${TPL_NAME}_ENABLE_SUCCESS) - KOKKOS_CREATE_IMPORTED_TPL_LIBRARY(${TPL_NAME}) - ENDIF() - VERIFY_EMPTY(KOKKOS_CREATE_IMPORTED_TPL_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) - endif() -ENDFUNCTION() - -MACRO(KOKKOS_SUBPACKAGE NAME) - if (KOKKOS_HAS_TRILINOS) - TRIBITS_SUBPACKAGE(${NAME}) - else() - SET(PACKAGE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) - SET(PARENT_PACKAGE_NAME ${PACKAGE_NAME}) - SET(PACKAGE_NAME ${PACKAGE_NAME}${NAME}) - STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) - SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) - #ADD_INTERFACE_LIBRARY(PACKAGE_${PACKAGE_NAME}) - #GLOBAL_SET(${PACKAGE_NAME}_LIBS "") - endif() -ENDMACRO(KOKKOS_SUBPACKAGE) - -MACRO(KOKKOS_SUBPACKAGE_POSTPROCESS) - if (KOKKOS_HAS_TRILINOS) - TRIBITS_SUBPACKAGE_POSTPROCESS() - endif() -ENDMACRO(KOKKOS_SUBPACKAGE_POSTPROCESS) - -MACRO(KOKKOS_PACKAGE_DECL) - - if (KOKKOS_HAS_TRILINOS) - TRIBITS_PACKAGE_DECL(Kokkos) - else() - SET(PACKAGE_NAME Kokkos) - SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) - STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) - endif() - - #SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps") - #FILE(GLOB TPLS_FILES "${TRIBITS_DEPS_DIR}/*.cmake") - #FOREACH(TPL_FILE ${TPLS_FILES}) - # TRIBITS_PROCESS_TPL_DEP_FILE(${TPL_FILE}) - #ENDFOREACH() - -ENDMACRO() - - -MACRO(KOKKOS_PROCESS_SUBPACKAGES) - if (KOKKOS_HAS_TRILINOS) - TRIBITS_PROCESS_SUBPACKAGES() - else() - ADD_SUBDIRECTORY(core) - ADD_SUBDIRECTORY(containers) - ADD_SUBDIRECTORY(algorithms) - endif() -ENDMACRO(KOKKOS_PROCESS_SUBPACKAGES) - -MACRO(KOKKOS_PACKAGE_DEF) - if (KOKKOS_HAS_TRILINOS) - TRIBITS_PACKAGE_DEF() - else() - #do nothing - endif() -ENDMACRO(KOKKOS_PACKAGE_DEF) - -MACRO(KOKKOS_TARGET_COMPILE_OPTIONS TARGET) -if(KOKKOS_HAS_TRILINOS) - ADD_COMPILE_OPTIONS(${ARGN}) - TARGET_COMPILE_OPTIONS(${TARGET} ${ARGN}) -else() - TARGET_COMPILE_OPTIONS(${TARGET} ${ARGN}) -endif() -ENDMACRO() - - MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT) if (NOT KOKKOS_HAS_TRILINOS) #------------ COMPILER AND FEATURE CHECKS ------------------------------------ @@ -470,151 +214,51 @@ MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT) install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION ${CMAKE_INSTALL_PREFIX}) string(REPLACE " " ";" KOKKOS_TPL_INCLUDE_DIRS "${KOKKOS_GMAKE_TPL_INCLUDE_DIRS}") string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_DIRS "${KOKKOS_GMAKE_TPL_LIBRARY_DIRS}") - string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_NAMES "${KOKKOS_GMAKE_TPL_LIBRARY_NAMES}") list(REMOVE_ITEM KOKKOS_TPL_INCLUDE_DIRS "") list(REMOVE_ITEM KOKKOS_TPL_LIBRARY_DIRS "") - list(REMOVE_ITEM KOKKOS_TPL_LIBRARY_NAMES "") set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC}) endif() ENDMACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT) -MACRO(KOKKOS_EXCLUDE_AUTOTOOLS_FILES) - if (KOKKOS_HAS_TRILINOS) - TRIBITS_EXCLUDE_AUTOTOOLS_FILES() - else() - #do nothing - endif() -ENDMACRO(KOKKOS_EXCLUDE_AUTOTOOLS_FILES) - MACRO(KOKKOS_ADD_TEST_EXECUTABLE EXE_NAME) CMAKE_PARSE_ARGUMENTS(PARSE "" "" "SOURCES" ${ARGN}) - KOKKOS_ADD_EXECUTABLE(${PACKAGE_NAME}_${EXE_NAME} + KOKKOS_ADD_EXECUTABLE(${EXE_NAME} SOURCES ${PARSE_SOURCES} - TESTONLYLIBS kokkos_gtest ${KOKKOS_TEST_LINK_TARGETS} ${PARSE_UNPARSED_ARGUMENTS} + TESTONLYLIBS kokkos_gtest ) - ADD_DEPENDENCIES(check ${PACKAGE_NAME}_${EXE_NAME}) + IF (NOT KOKKOS_HAS_TRILINOS) + TARGET_LINK_LIBRARIES(${EXE_NAME} kokkos_gtest) + ENDIF() + ADD_DEPENDENCIES(check ${EXE_NAME}) ENDMACRO(KOKKOS_ADD_TEST_EXECUTABLE) -MACRO(KOKKOS_ADD_PERFORMANCE_TEST NAME) -KOKKOS_ADD_TEST(${NAME} - CATEGORIES PERFORMANCE - ${ARGN} -) -ENDMACRO() - - MACRO(KOKKOS_PACKAGE_POSTPROCESS) if (KOKKOS_HAS_TRILINOS) TRIBITS_PACKAGE_POSTPROCESS() endif() ENDMACRO(KOKKOS_PACKAGE_POSTPROCESS) -FUNCTION(KOKKOS_LIB_TYPE LIB RET) -GET_TARGET_PROPERTY(PROP ${LIB} TYPE) -IF (${PROP} STREQUAL "INTERFACE_LIBRARY") - SET(${RET} "INTERFACE" PARENT_SCOPE) -ELSE() - SET(${RET} "PUBLIC" PARENT_SCOPE) -ENDIF() -ENDFUNCTION(KOKKOS_LIB_TYPE) - - -FUNCTION(KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES TARGET) -IF(KOKKOS_HAS_TRILINOS) - KOKKOS_LIB_TYPE(${TARGET} INCTYPE) - #ignore the target, tribits doesn't do anything directly with targets - TRIBITS_INCLUDE_DIRECTORIES(${KOKKOS_TOP_BUILD_DIR}) - TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) - TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) - TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} - $) -ELSEIF(KOKKOS_SEPARATE_LIBS) - KOKKOS_LIB_TYPE(${TARGET} INCTYPE) - TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} - $) - TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} - $) - TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} - $) - TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} - $) -ELSE() #append to a list for later - SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}) - SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}) - SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES ${KOKKOS_TOP_BUILD_DIR}) -ENDIF() -ENDFUNCTION(KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES) - - -FUNCTION(KOKKOS_TARGET_INCLUDE_DIRECTORIES TARGET) -IF(KOKKOS_HAS_TRILINOS) - KOKKOS_LIB_TYPE(${TARGET} INCTYPE) - TRIBITS_INCLUDE_DIRECTORIES(${TARGET} ${ARGN}) - #don't trust tribits to do this correctly - TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} ${ARGN}) -ELSEIF(TARGET ${TARGET}) - #the target actually exists - this means we are doing separate libs - #or this a test library - KOKKOS_LIB_TYPE(${TARGET} INCTYPE) - TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} ${ARGN}) -ELSE() - GET_PROPERTY(LIBS GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) - IF (${TARGET} IN_LIST LIBS) - SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_LIBRARY_INCLUDES ${ARGN}) - ELSE() - MESSAGE(FATAL_ERROR "Trying to set include directories on unknown target ${TARGET}") - ENDIF() -ENDIF() -ENDFUNCTION(KOKKOS_TARGET_INCLUDE_DIRECTORIES TARGET) - -FUNCTION(KOKKOS_LINK_INTERNAL_LIBRARY TARGET DEPLIB) -IF(KOKKOS_HAS_TRILINOS) - #do nothing -ELSEIF(KOKKOS_SEPARATE_LIBS) - SET(options INTERFACE) - SET(oneValueArgs) - SET(multiValueArgs) - CMAKE_PARSE_ARGUMENTS(PARSE - "INTERFACE" - "" - "" - ${ARGN}) - SET(LINK_TYPE) - IF(PARSE_INTERFACE) - SET(LINK_TYPE INTERFACE) - ELSE() - SET(LINK_TYPE PUBLIC) - ENDIF() - TARGET_LINK_LIBRARIES(${TARGET} ${LINK_TYPE} ${DEPLIB}) - VERIFY_EMPTY(KOKKOS_LINK_INTERNAL_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) - ELSE() - #only a single lib - so nothing to do here -ENDIF() -ENDFUNCTION(KOKKOS_LINK_INTERNAL_LIBRARY) - -FUNCTION(KOKKOS_ADD_INTERFACE_LIBRARY NAME) -IF (KOKKOS_HAS_TRILINOS) - TRIBITS_ADD_LIBRARY(${NAME} ${ARGN}) -ELSE() - IF (KOKKOS_SEPARATE_LIBS) - ADD_LIBRARY(${NAME} INTERFACE) - KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${NAME}) - ENDIF() -ENDIF() -ENDFUNCTION(KOKKOS_ADD_INTERFACE_LIBRARY) - -FUNCTION(KOKKOS_ADD_TEST_LIBRARY NAME) -IF (KOKKOS_HAS_TRILINOS) - TRIBITS_ADD_LIBRARY(${NAME} ${ARGN} TESTONLY) -ELSE() - SET(oneValueArgs) - SET(multiValueArgs HEADERS SOURCES) +MACRO(KOKKOS_MAKE_LIBKOKKOS) +GET_PROPERTY(SRCS GLOBAL PROPERTY KOKKOS_ALL_SOURCES) +GET_PROPERTY(INCS GLOBAL PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES) +GET_PROPERTY(DEFS GLOBAL PROPERTY KOKKOS_COMPILE_DEFINITIONS) +KOKKOS_INTERNAL_ADD_LIBRARY(kokkos SOURCES ${SRCS}) +FOREACH(INC ${INCS}) + TARGET_INCLUDE_DIRECTORIES(kokkos PUBLIC $) +ENDFOREACH() +TARGET_INCLUDE_DIRECTORIES(kokkos PUBLIC $) +TARGET_INCLUDE_DIRECTORIES(kokkos PUBLIC $) +FOREACH(DEF ${DEFS}) + TARGET_COMPILE_DEFINITIONS(kokkos PUBLIC ${DEF}) +ENDFOREACH() +ENDMACRO() +FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) CMAKE_PARSE_ARGUMENTS(PARSE "STATIC;SHARED" "" @@ -627,78 +271,115 @@ ELSE() IF(PARSE_SOURCES) LIST(REMOVE_DUPLICATES PARSE_SOURCES) ENDIF() - ADD_LIBRARY(${NAME} ${PARSE_SOURCES}) - target_compile_options( - ${NAME} + + ADD_LIBRARY( + ${LIBRARY_NAME} + ${PARSE_HEADERS} + ${PARSE_SOURCES} + ) + + TARGET_COMPILE_OPTIONS( + ${LIBRARY_NAME} PUBLIC $<$:${KOKKOS_CXX_FLAGS}> ) - target_link_libraries( - ${NAME} - PUBLIC ${KOKKOS_LD_FLAGS} + + if(${CMAKE_VERSION} VERSION_GREATER "3.13" OR ${CMAKE_VERSION} VERSION_EQUAL "3.13") + TARGET_LINK_OPTIONS( + ${LIBRARY_NAME} + PUBLIC ${KOKKOS_LD_FLAGS} + ) + else() + #well, this is annoying - I am going to need to hack this for Visual Studio + TARGET_LINK_LIBRARIES( + ${LIBRARY_NAME} PUBLIC ${KOKKOS_LD_FLAGS} + ) + endif() + + + TARGET_INCLUDE_DIRECTORIES( + ${LIBRARY_NAME} + PUBLIC ${KOKKOS_TPL_INCLUDE_DIRS} ) -ENDIF() -ENDFUNCTION(KOKKOS_ADD_TEST_LIBRARY) - - -FUNCTION(KOKKOS_TARGET_COMPILE_DEFINITIONS TARGET VISIBILITY) -IF(KOKKOS_HAS_TRILINOS) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGTEST_HAS_PTHREAD=0") - #don't trust tribits to do this correctly - TARGET_COMPILE_DEFINITIONS(${TARGET} ${VISIBILITY} ${ARGN}) -ELSEIF(TARGET ${TARGET}) - #the target actually exists - this means we are doing separate libs - #or this a test library - TARGET_COMPILE_DEFINITIONS(${TARGET} ${VISIBILITY} ${ARGN}) -ELSE() - GET_PROPERTY(LIBS GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) - IF (${TARGET} IN_LIST LIBS) - SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_LIBRARY_DEFINITIONS ${ARGN}) - ELSE() - MESSAGE(FATAL_ERROR "Trying to set compile definitions on unknown target ${TARGET}") - ENDIF() -ENDIF() -ENDFUNCTION(KOKKOS_TARGET_COMPILE_DEFINITIONS) -FUNCTION(KOKKOS_INCLUDE_DIRECTORIES) -IF(KOKKOS_HAS_TRILINOS) - TRIBITS_INCLUDE_DIRECTORIES(${ARGN}) + if (KOKKOS_ENABLE_CUDA) + set(LIB_cuda "-lcuda") + target_link_libraries(${LIBRARY_NAME} PUBLIC cuda) + endif() + + if (KOKKOS_ENABLE_HPX) + find_package(HPX REQUIRED) + target_link_libraries(${LIBRARY_NAME} PUBLIC ${HPX_LIBRARIES}) + target_include_directories(${LIBRARY_NAME} PUBLIC ${HPX_INCLUDE_DIRS}) + endif() + + if (KOKKOS_ENABLE_HWLOC) + find_package(HWLOC REQUIRED) + target_link_libraries(${LIBRARY_NAME} PRIVATE hwloc) + endif() + + if (KOKKOS_ENABLE_MEMKIND) + find_package(MEMKIND REQUIRED) + target_link_libraries(${LIBRARY_NAME} PRIVATE memkind) + endif() + + foreach(FEATURE IN LISTS KOKKOS_CXX_FEATURES) + TARGET_COMPILE_FEATURES(${LIBRARY_NAME} PUBLIC ${FEATURE}) + endforeach() + + KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${LIBRARY_NAME}) + + INSTALL( + FILES ${PARSE_HEADERS} + DESTINATION include + COMPONENT ${PACKAGE_NAME} + ) + +ENDFUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) + +FUNCTION(KOKKOS_ADD_LIBRARY LIBRARY_NAME) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_LIBRARY(${LIBRARY_NAME} ${ARGN}) + elseif(KOKKOS_SEPARATE_LIBS) + KOKKOS_INTERNAL_ADD_LIBRARY( + ${LIBRARY_NAME} ${ARGN}) + else() + CMAKE_PARSE_ARGUMENTS(PARSE + "" + "" + "SOURCES;HEADERS" + ${ARGN}) + #just append the headers and sources to the list + SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_ALL_SOURCES ${PARSE_SOURCES}) + GET_PROPERTY(SRCS GLOBAL PROPERTY KOKKOS_ALL_SOURCES) + endif() +ENDFUNCTION() + +FUNCTION(KOKKOS_ADD_INTERFACE_LIBRARY NAME) +IF (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_LIBRARY(${NAME} ${ARGN}) ELSE() - CMAKE_PARSE_ARGUMENTS( - INC - "REQUIRED_DURING_INSTALLATION_TESTING" + CMAKE_PARSE_ARGUMENTS(PARSE "" "" + "HEADERS;SOURCES" ${ARGN} ) - INCLUDE_DIRECTORIES(${INC_UNPARSED_ARGUMENTS}) -ENDIF() -ENDFUNCTION(KOKKOS_INCLUDE_DIRECTORIES) -MACRO(KOKKOS_MAKE_LIBKOKKOS) -GET_PROPERTY(SRCS GLOBAL PROPERTY KOKKOS_ALL_SOURCES) -GET_PROPERTY(INCS GLOBAL PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES) -GET_PROPERTY(DEFS GLOBAL PROPERTY KOKKOS_COMPILE_DEFINITIONS) -KOKKOS_INTERNAL_ADD_LIBRARY(kokkos SOURCES ${SRCS}) -FOREACH(INC ${INCS}) - TARGET_INCLUDE_DIRECTORIES(kokkos PUBLIC $) -ENDFOREACH() -TARGET_INCLUDE_DIRECTORIES(kokkos PUBLIC $) -TARGET_INCLUDE_DIRECTORIES(kokkos PUBLIC $) -FOREACH(DEF ${DEFS}) - TARGET_COMPILE_DEFINITIONS(kokkos PUBLIC ${DEF}) -ENDFOREACH() -ENDMACRO() + IF (KOKKOS_SEPARATE_LIBS) + ADD_LIBRARY(${NAME} INTERFACE) + KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${NAME}) + ENDIF() -MACRO(KOKKOS_ADD_COMPILE_OPTIONS) -ADD_COMPILE_OPTIONS(${ARGN}) -ENDMACRO() + INSTALL( + FILES ${PARSE_HEADERS} + DESTINATION include + ) + + INSTALL( + FILES ${PARSE_HEADERS} + DESTINATION include + COMPONENT ${PACKAGE_NAME} + ) +ENDIF() +ENDFUNCTION(KOKKOS_ADD_INTERFACE_LIBRARY) -MACRO(PRINTALL) -get_cmake_property(_variableNames VARIABLES) -list (SORT _variableNames) -foreach (_variableName ${_variableNames}) - if("${_variableName}" MATCHES "Kokkos" OR "${_variableName}" MATCHES "KOKKOS") - message(STATUS "${_variableName}=${${_variableName}}") - endif() -endforeach() -ENDMACRO(PRINTALL) diff --git a/containers/performance_tests/CMakeLists.txt b/containers/performance_tests/CMakeLists.txt index 3f25891eb96..4aaebd023d3 100644 --- a/containers/performance_tests/CMakeLists.txt +++ b/containers/performance_tests/CMakeLists.txt @@ -30,7 +30,7 @@ KOKKOS_ADD_TEST_EXECUTABLE( ) KOKKOS_ADD_TEST( - PerfTestExec NAME PerformanceTest + EXE ${EXE_NAME} ) diff --git a/core/perf_test/CMakeLists.txt b/core/perf_test/CMakeLists.txt index 328afbda03c..dcc9d0e6bef 100644 --- a/core/perf_test/CMakeLists.txt +++ b/core/perf_test/CMakeLists.txt @@ -22,25 +22,18 @@ SET(SOURCES #leave these as basic includes for now #I don't need anything transitive -INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/../../algorithms/src") -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/../../algorithms/src") +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) -KOKKOS_ADD_TEST_EXECUTABLE( +KOKKOS_ADD_EXECUTABLE_AND_TEST( PerfTestExec - SOURCES ${SOURCES}) - -KOKKOS_ADD_PERFORMANCE_TEST( - PerfTestExec - NAME PerfTestExec + SOURCES ${SOURCES} + CATEGORIES PERFORMANCE ) -KOKKOS_ADD_TEST_EXECUTABLE( - PerformanceTest_TaskDAG +KOKKOS_ADD_EXECUTABLE_AND_TEST( + PerformanceTest_TaskDag SOURCES test_taskdag.cpp -) - -KOKKOS_ADD_PERFORMANCE_TEST( - PerformanceTest_TaskDAG - NAME PerformanceTest_TaskDAG + CATEGORIES PERFORMANCE ) diff --git a/core/unit_test/CMakeLists.txt b/core/unit_test/CMakeLists.txt index 7ed1f182141..fa69a3f5010 100644 --- a/core/unit_test/CMakeLists.txt +++ b/core/unit_test/CMakeLists.txt @@ -674,9 +674,8 @@ KOKKOS_ADD_EXECUTABLE_AND_TEST( # std::terminate. This means that we can't use # KOKKOS_ADD_EXECUTABLE_AND_TEST. See GitHub issue #2147. -KOKKOS_ADD_EXECUTABLE( push_finalize_hook_terminate +KOKKOS_ADD_TEST_EXECUTABLE( push_finalize_hook_terminate SOURCES UnitTest_PushFinalizeHook_terminate.cpp - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) KOKKOS_ADD_ADVANCED_TEST( UnitTest_PushFinalizeHook_terminate From 5ccaa5768a3fc39034bf4322ea5d75062b327675 Mon Sep 17 00:00:00 2001 From: jjwilke Date: Tue, 23 Apr 2019 15:46:57 -0700 Subject: [PATCH 008/530] test name fix --- containers/performance_tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/containers/performance_tests/CMakeLists.txt b/containers/performance_tests/CMakeLists.txt index 4aaebd023d3..c293425e559 100644 --- a/containers/performance_tests/CMakeLists.txt +++ b/containers/performance_tests/CMakeLists.txt @@ -31,6 +31,6 @@ KOKKOS_ADD_TEST_EXECUTABLE( KOKKOS_ADD_TEST( NAME PerformanceTest - EXE ${EXE_NAME} + EXE PerfTestExec ) From 997fea5480218ecc4e42e62062e17ede9d7d193c Mon Sep 17 00:00:00 2001 From: jjwilke Date: Wed, 24 Apr 2019 15:55:32 -0700 Subject: [PATCH 009/530] tribits fixes for trilinos promotion --- CMakeLists.txt | 54 -------------- Makefile.kokkos | 8 +- algorithms/src/CMakeLists.txt | 6 +- algorithms/unit_tests/CMakeLists.txt | 8 +- cmake/fake_tribits.cmake | 62 ++++------------ cmake/kokkos_settings.cmake | 60 +++++++++++++++ cmake/kokkos_tribits.cmake | 73 ++++++++++++++++++- containers/performance_tests/CMakeLists.txt | 6 +- containers/src/CMakeLists.txt | 10 ++- containers/unit_tests/CMakeLists.txt | 6 +- core/src/CMakeLists.txt | 11 ++- core/unit_test/CMakeLists.txt | 3 +- example/feint/CMakeLists.txt | 8 +- example/fenl/CMakeLists.txt | 8 +- example/fixture/CMakeLists.txt | 6 +- example/global_2_local_ids/CMakeLists.txt | 4 +- example/grow_array/CMakeLists.txt | 4 +- example/md_skeleton/CMakeLists.txt | 4 +- example/multi_fem/CMakeLists.txt | 4 +- example/query_device/CMakeLists.txt | 4 +- example/sort_array/CMakeLists.txt | 4 +- .../tutorial/01_hello_world/CMakeLists.txt | 4 +- .../01_hello_world_lambda/CMakeLists.txt | 4 +- .../tutorial/02_simple_reduce/CMakeLists.txt | 4 +- .../02_simple_reduce_lambda/CMakeLists.txt | 4 +- .../tutorial/03_simple_view/CMakeLists.txt | 4 +- .../03_simple_view_lambda/CMakeLists.txt | 4 +- 27 files changed, 215 insertions(+), 162 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2f4ef5fc931..5e574f43e0b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,60 +24,6 @@ IF(NOT KOKKOS_HAS_TRILINOS) set (Kokkos_VERSION_PATCH 4) ENDIF() -SET(KOKKOS_CXX_STANDARD "" CACHE STRING "The C++ standard for Kokkos to use: c++11, c++14, or c++17") -SET(KOKKOS_CXX_FEATURES "" CACHE STRING "The list of C++ features for Kokkos to enable") -SET(CXX_STANDARD_TEST) - -IF (KOKKOS_CXX_STANDARD AND CMAKE_CXX_STANDARD) - #make sure these are consistent - IF (${KOKKOS_CXX_STANDARD} STREQUAL "c++11") - IF(NOT ${CMAKE_CXX_STANDARD} STREQUAL "11") - SET(CXX_STD_ERROR ON) - ENDIF() - ELSEIF (${KOKKOS_CXX_STANDARD} STREQUAL "c++14") - IF(NOT ${CMAKE_CXX_STANDARD} STREQUAL "14") - SET(CXX_STD_ERROR ON) - ENDIF() - ELSEIF (${KOKKOS_CXX_STANDARD} STREQUAL "c++17") - IF(NOT ${CMAKE_CXX_STANDARD} STREQUAL "17") - SET(CXX_STD_ERROR ON) - ENDIF() - ELSE() - #KOKKOS_CXX_STANDARD is something else, which means definitely invalid - SET(CXX_STD_ERROR ON) - ENDIF() - IF (CXX_STD_ERROR) - MESSAGE(FATAL_ERROR "Specified both CMAKE_CXX_STANDARD and KOKKOS_CXX_STANDARD, but they don't match") - ENDIF() -ENDIF() - -IF (NOT KOKKOS_CXX_STANDARD AND NOT CMAKE_CXX_STANDARD) - SET(KOKKOS_CXX_STANDARD "c++11") -ENDIF() - -IF (KOKKOS_CXX_STANDARD) - IF (${KOKKOS_CXX_STANDARD} STREQUAL "c++11") - LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_11) - ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++14") - LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_14) - ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++17") - LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_17) - ENDIF() -ENDIF() - -IF (CMAKE_CXX_STANDARD) - IF (${CMAKE_CXX_STANDARD} STREQUAL "11") - LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_11) - ELSEIF(${CMAKE_CXX_STANDARD} STREQUAL "14") - LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_14) - ELSEIF(${CMAKE_CXX_STANDARD} STREQUAL "17") - LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_17) - ENDIF() -ENDIF() - - - - INCLUDE(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake) INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake) KOKKOS_SETUP_BUILD_ENVIRONMENT() diff --git a/Makefile.kokkos b/Makefile.kokkos index ebc01f4270a..614c9738538 100644 --- a/Makefile.kokkos +++ b/Makefile.kokkos @@ -22,6 +22,8 @@ KOKKOS_OPTIONS ?= "" # Option for setting ETI path KOKKOS_ETI_PATH ?= ${KOKKOS_PATH}/core/src/eti KOKKOS_CMAKE ?= "no" +KOKKOS_TRIBITS ?= "no" +KOKKOS_STANDALONE_CMAKE ?= "no" # Default settings specific options. # Options: force_uvm,use_ldg,rdc,enable_lambda @@ -490,13 +492,13 @@ endif #only add the c++ standard flags if this is not CMake tmp := $(call kokkos_append_header,"/* General Settings */") ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1) -ifneq ($(KOKKOS_CMAKE), yes) +ifneq ($(KOKKOS_STANDALONE_CMAKE), yes) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG) endif tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX11") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX14), 1) -ifneq ($(KOKKOS_CMAKE), yes) +ifneq ($(KOKKOS_STANDALONE_CMAKE), yes) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX14_FLAG) endif tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14") @@ -507,7 +509,7 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Y), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX14") endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX17), 1) -ifneq ($(KOKKOS_CMAKE), yes) #for CMake, use target_compile_features +ifneq ($(KOKKOS_STANDALONE_CMAKE), yes) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX17_FLAG) endif tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX17") diff --git a/algorithms/src/CMakeLists.txt b/algorithms/src/CMakeLists.txt index 77f693de863..5afd319fcc7 100644 --- a/algorithms/src/CMakeLists.txt +++ b/algorithms/src/CMakeLists.txt @@ -20,7 +20,11 @@ KOKKOS_ADD_INTERFACE_LIBRARY( HEADERS ${HEADERS} SOURCES ${SOURCES} ) -KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES(kokkosalgorithms) +KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms + ${KOKKOS_TOP_BUILD_DIR} + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR} +) diff --git a/algorithms/unit_tests/CMakeLists.txt b/algorithms/unit_tests/CMakeLists.txt index eaf85c6773d..3f94d57d9cf 100644 --- a/algorithms/unit_tests/CMakeLists.txt +++ b/algorithms/unit_tests/CMakeLists.txt @@ -1,12 +1,12 @@ #Leave these here for now - I don't need transitive deps anyway -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest) -INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR}) # mfh 03 Nov 2017: The gtest library used here must have a different # name than that of the gtest library built in KokkosCore. We can't diff --git a/cmake/fake_tribits.cmake b/cmake/fake_tribits.cmake index 53f49e5ad85..4ecfcd888be 100644 --- a/cmake/fake_tribits.cmake +++ b/cmake/fake_tribits.cmake @@ -200,9 +200,7 @@ ENDFUNCTION() MACRO(KOKKOS_TARGET_COMPILE_OPTIONS TARGET) if(KOKKOS_HAS_TRILINOS) - ADD_COMPILE_OPTIONS(${ARGN}) - #here everything gets prefixed with package name - TARGET_COMPILE_OPTIONS(${PACKAGE_NAME}_${TARGET} ${ARGN}) + TARGET_COMPILE_OPTIONS(${TARGET} ${ARGN}) else() TARGET_COMPILE_OPTIONS(${TARGET} ${ARGN}) endif() @@ -226,39 +224,11 @@ ELSE() ENDIF() ENDFUNCTION(KOKKOS_LIB_TYPE) - -FUNCTION(KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES TARGET) -IF(KOKKOS_HAS_TRILINOS) - KOKKOS_LIB_TYPE(${TARGET} INCTYPE) - #ignore the target, tribits doesn't do anything directly with targets - TRIBITS_INCLUDE_DIRECTORIES(${KOKKOS_TOP_BUILD_DIR}) - TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) - TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) - TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} - $) -ELSEIF(KOKKOS_SEPARATE_LIBS) - KOKKOS_LIB_TYPE(${TARGET} INCTYPE) - TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} - $) - TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} - $) - TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} - $) - TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} - $) -ELSE() #append to a list for later - SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}) - SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}) - SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES ${KOKKOS_TOP_BUILD_DIR}) -ENDIF() -ENDFUNCTION(KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES) - - FUNCTION(KOKKOS_TARGET_INCLUDE_DIRECTORIES TARGET) IF(KOKKOS_HAS_TRILINOS) KOKKOS_LIB_TYPE(${TARGET} INCTYPE) #don't trust tribits to do this correctly - but need to add package name - TARGET_INCLUDE_DIRECTORIES(${PACKAGE_NAME}_${TARGET} ${INCTYPE} ${ARGN}) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} ${ARGN}) ELSEIF(TARGET ${TARGET}) #the target actually exists - this means we are doing separate libs #or this a test library @@ -301,7 +271,9 @@ ENDFUNCTION(KOKKOS_LINK_INTERNAL_LIBRARY) FUNCTION(KOKKOS_ADD_TEST_LIBRARY NAME) IF (KOKKOS_HAS_TRILINOS) - TRIBITS_ADD_LIBRARY(${NAME} ${ARGN} TESTONLY) + TRIBITS_ADD_LIBRARY(${NAME} ${ARGN} TESTONLY + ADDED_LIB_TARGET_NAME_OUT ${NAME} + ) ELSE() SET(oneValueArgs) SET(multiValueArgs HEADERS SOURCES) @@ -331,22 +303,12 @@ ENDIF() ENDFUNCTION(KOKKOS_ADD_TEST_LIBRARY) -FUNCTION(KOKKOS_TARGET_COMPILE_DEFINITIONS TARGET VISIBILITY) -IF(KOKKOS_HAS_TRILINOS) - #don't trust tribits to do this correctly - TARGET_COMPILE_DEFINITIONS(${PACKAGE_NAME}_${TARGET} ${VISIBILITY} ${ARGN}) -ELSEIF(TARGET ${TARGET}) - #the target actually exists - this means we are doing separate libs - #or this a test library - TARGET_COMPILE_DEFINITIONS(${TARGET} ${VISIBILITY} ${ARGN}) -ELSE() - GET_PROPERTY(LIBS GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) - IF (${TARGET} IN_LIST LIBS) - SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_LIBRARY_DEFINITIONS ${ARGN}) - ELSE() - MESSAGE(FATAL_ERROR "Trying to set compile definitions on unknown target ${TARGET}") - ENDIF() -ENDIF() +FUNCTION(KOKKOS_TARGET_COMPILE_DEFINITIONS) + IF (KOKKOS_HAS_TRILINOS) + TARGET_COMPILE_DEFINITIONS(${TARGET} ${ARGN}) + ELSE() + TARGET_COMPILE_DEFINITIONS(${TARGET} ${ARGN}) + ENDIF() ENDFUNCTION(KOKKOS_TARGET_COMPILE_DEFINITIONS) FUNCTION(KOKKOS_INCLUDE_DIRECTORIES) @@ -378,3 +340,5 @@ foreach (_variableName ${_variableNames}) endif() endforeach() ENDMACRO(PRINTALL) + + diff --git a/cmake/kokkos_settings.cmake b/cmake/kokkos_settings.cmake index 32a8bf797a8..6da0dcd6f21 100644 --- a/cmake/kokkos_settings.cmake +++ b/cmake/kokkos_settings.cmake @@ -131,6 +131,11 @@ string(REPLACE ";" ":" KOKKOS_INTERNAL_ADDTOPATH "${addpathl}") # makefile configuration. See Makefile.kokkos set(KOKKOS_SETTINGS KOKKOS_CMAKE=yes) +if(KOKKOS_HAS_TRILINOS) + set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_TRIBITS=yes) +else() + set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_STANDALONE_CMAKE=yes) +endif() set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_SRC_PATH=${KOKKOS_SRC_PATH}) set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_PATH=${KOKKOS_PATH}) set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_INSTALL_PATH=${CMAKE_INSTALL_PREFIX}) @@ -162,6 +167,61 @@ if (NOT "${KOKKOS_INTERNAL_ADDTOPATH}" STREQUAL "") set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "PATH=${KOKKOS_INTERNAL_ADDTOPATH}:$ENV{PATH}") endif() +SET(KOKKOS_CXX_STANDARD "" CACHE STRING "The C++ standard for Kokkos to use: c++11, c++14, or c++17") +SET(KOKKOS_CXX_FEATURES "" CACHE STRING "The list of C++ features for Kokkos to enable") +SET(CXX_STANDARD_TEST) + + +IF (KOKKOS_CXX_STANDARD AND CMAKE_CXX_STANDARD) + #make sure these are consistent + IF (${KOKKOS_CXX_STANDARD} STREQUAL "c++11") + IF(NOT ${CMAKE_CXX_STANDARD} STREQUAL "11") + SET(CXX_STD_ERROR ON) + ENDIF() + ELSEIF (${KOKKOS_CXX_STANDARD} STREQUAL "c++14") + IF(NOT ${CMAKE_CXX_STANDARD} STREQUAL "14") + SET(CXX_STD_ERROR ON) + ENDIF() + ELSEIF (${KOKKOS_CXX_STANDARD} STREQUAL "c++17") + IF(NOT ${CMAKE_CXX_STANDARD} STREQUAL "17") + SET(CXX_STD_ERROR ON) + ENDIF() + ELSE() + #KOKKOS_CXX_STANDARD is something else, which means definitely invalid + SET(CXX_STD_ERROR ON) + ENDIF() + IF (CXX_STD_ERROR) + MESSAGE(FATAL_ERROR "Specified both CMAKE_CXX_STANDARD and KOKKOS_CXX_STANDARD, but they don't match") + ENDIF() +ENDIF() + +IF (NOT KOKKOS_CXX_STANDARD AND NOT CMAKE_CXX_STANDARD) + SET(KOKKOS_CXX_STANDARD "c++11") +ENDIF() + +IF (KOKKOS_CXX_STANDARD) + IF (${KOKKOS_CXX_STANDARD} STREQUAL "c++11") + LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_11) + ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++14") + LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_14) + ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++17") + LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_17) + ENDIF() +ENDIF() + +IF (CMAKE_CXX_STANDARD) + IF (${CMAKE_CXX_STANDARD} STREQUAL "11") + LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_11) + SET(KOKKOS_CXX_STANDARD "c++11") + ELSEIF(${CMAKE_CXX_STANDARD} STREQUAL "14") + LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_14) + SET(KOKKOS_CXX_STANDARD "c++14") + ELSEIF(${CMAKE_CXX_STANDARD} STREQUAL "17") + LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_17) + SET(KOKKOS_CXX_STANDARD "c++17") + ENDIF() +ENDIF() + if (CMAKE_CXX_STANDARD) if (CMAKE_CXX_STANDARD STREQUAL "98") message(FATAL_ERROR "Kokkos requires C++11 or newer!") diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index 5497dfc6370..8d8c3708fa4 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -233,8 +233,8 @@ MACRO(KOKKOS_ADD_TEST_EXECUTABLE EXE_NAME) ) IF (NOT KOKKOS_HAS_TRILINOS) TARGET_LINK_LIBRARIES(${EXE_NAME} kokkos_gtest) + ADD_DEPENDENCIES(check ${EXE_NAME}) ENDIF() - ADD_DEPENDENCIES(check ${EXE_NAME}) ENDMACRO(KOKKOS_ADD_TEST_EXECUTABLE) MACRO(KOKKOS_PACKAGE_POSTPROCESS) @@ -383,3 +383,74 @@ ELSE() ENDIF() ENDFUNCTION(KOKKOS_ADD_INTERFACE_LIBRARY) +FUNCTION(KOKKOS_LIB_COMPILE_DEFINITIONS) +IF(KOKKOS_HAS_TRILINOS) + #don't trust tribits to do this correctly + KOKKOS_TARGET_COMPILE_DEFINITIONS(${TARGET} ${ARGN}) +ELSEIF(TARGET ${TARGET}) + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + KOKKOS_TARGET_COMPILE_DEFINITIONS(${${PROJECT_NAME}_LIBRARY_NAME_PREFIX}${TARGET} ${INCTYPE} ${ARGN}) +ELSE() + #prefix is empty unless in tribits + CMAKE_PARSE_ARGUMENTS( + PARSE + "PUBLIC;PRIVATE;INTERFACE" + "" + "" + ${ARGN} + ) + IF (PARSE_PUBLIC OR PARSE_PRIVATE OR PARSE_INTERFACE) + MESSAGE(FATAL_ERROR "KOKKOS_LIB_COMPILE_DEFINITIONS for ${TARGET} should not have attributes") + ENDIF() + GET_PROPERTY(LIBS GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + IF (${TARGET} IN_LIST LIBS) + SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_LIBRARY_DEFINITIONS ${ARGN}) + ELSE() + MESSAGE(FATAL_ERROR "Trying to set compile definitions on unknown target ${TARGET}") + ENDIF() +ENDFUNCTION(KOKKOS_LIB_COMPILE_DEFINITIONS) + +FUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES TARGET) +IF(KOKKOS_HAS_TRILINOS) + #ignore the target, tribits doesn't do anything directly with targets + TRIBITS_INCLUDE_DIRECTORIES(${ARGN}) +ELSEIF(KOKKOS_SEPARATE_LIBS) + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + FOREACH(DIR ${ARGN}) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} $) + ENDFOREACH() + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} $) +ELSE() #append to a list for later + FOREACH(DIR ${ARGN}) + SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES ${DIR}) + ENDFOREACH() +ENDIF() +ENDFUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES) + +FUNCTION(KOKKOS_LIB_COMPILE_OPTIONS TARGET) +IF(KOKKOS_HAS_TRILINOS) + #don't trust tribits to do this correctly + KOKKOS_TARGET_COMPILE_OPTIONS(${TARGET} ${ARGN}) +ELSEIF(TARGET ${TARGET}) + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + KOKKOS_TARGET_COMPILE_OPTIONS(${${PROJECT_NAME}_LIBRARY_NAME_PREFIX}${TARGET} ${INCTYPE} ${ARGN}) +ELSE() + #prefix is empty unless in tribits + CMAKE_PARSE_ARGUMENTS( + PARSE + "PUBLIC;PRIVATE;INTERFACE" + "" + "" + ${ARGN} + ) + IF (PARSE_PUBLIC OR PARSE_PRIVATE OR PARSE_INTERFACE) + MESSAGE(FATAL_ERROR "KOKKOS_LIB_COMPILE_OPTIONS for ${TARGET} should not have attributes") + ENDIF() + GET_PROPERTY(LIBS GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + IF (${TARGET} IN_LIST LIBS) + SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_LIBRARY_OPTIONS ${ARGN}) + ELSE() + MESSAGE(FATAL_ERROR "Trying to set compile options on unknown target ${TARGET}") + ENDIF() +ENDIF() +ENDFUNCTION(KOKKOS_LIB_COMPILE_OPTIONS) diff --git a/containers/performance_tests/CMakeLists.txt b/containers/performance_tests/CMakeLists.txt index c293425e559..8351475f43b 100644 --- a/containers/performance_tests/CMakeLists.txt +++ b/containers/performance_tests/CMakeLists.txt @@ -1,7 +1,7 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) SET(SOURCES TestMain.cpp diff --git a/containers/src/CMakeLists.txt b/containers/src/CMakeLists.txt index 2fb19e72926..0b8c5e59135 100644 --- a/containers/src/CMakeLists.txt +++ b/containers/src/CMakeLists.txt @@ -2,8 +2,8 @@ KOKKOS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) #need these here for now -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) #----------------------------------------------------------------------------- @@ -18,7 +18,11 @@ KOKKOS_ADD_LIBRARY( kokkoscontainers SOURCES ${KOKKOS_CONTAINERS_SRCS} ) -KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES(kokkoscontainers) +KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkoscontainers + ${KOKKOS_TOP_BUILD_DIR} + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR} +) KOKKOS_LINK_INTERNAL_LIBRARY(kokkoscontainers kokkoscore) #----------------------------------------------------------------------------- diff --git a/containers/unit_tests/CMakeLists.txt b/containers/unit_tests/CMakeLists.txt index 71cf6c3e563..6374802b30b 100644 --- a/containers/unit_tests/CMakeLists.txt +++ b/containers/unit_tests/CMakeLists.txt @@ -1,7 +1,7 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) IF(Kokkos_ENABLE_Pthread) KOKKOS_ADD_EXECUTABLE_AND_TEST( diff --git a/core/src/CMakeLists.txt b/core/src/CMakeLists.txt index db0c15b2cab..13d5423f506 100644 --- a/core/src/CMakeLists.txt +++ b/core/src/CMakeLists.txt @@ -1,8 +1,8 @@ #I have to leave these here for tribits -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) INSTALL (DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/" @@ -14,8 +14,11 @@ KOKKOS_ADD_LIBRARY( kokkoscore SOURCES ${KOKKOS_CORE_SRCS} ) -#include source and binary dirs -KOKKOS_LIB_DEFAULT_INCLUDE_DIRECTORIES(kokkoscore) +KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkoscore + ${KOKKOS_TOP_BUILD_DIR} + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_SOURCE_DIR} +) # build and install pkgconfig file #CONFIGURE_FILE(kokkos.pc.in kokkos.pc @ONLY) diff --git a/core/unit_test/CMakeLists.txt b/core/unit_test/CMakeLists.txt index fa69a3f5010..6af9c981b9f 100644 --- a/core/unit_test/CMakeLists.txt +++ b/core/unit_test/CMakeLists.txt @@ -6,14 +6,13 @@ SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest) #need here for tribits -KOKKOS_ADD_COMPILE_OPTIONS("-DGTEST_HAS_PTHREAD=0") KOKKOS_INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR}) KOKKOS_ADD_TEST_LIBRARY( kokkos_gtest HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc ) -KOKKOS_TARGET_COMPILE_OPTIONS(kokkos_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0") +KOKKOS_TARGET_COMPILE_DEFINITIONS(kokkos_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0") KOKKOS_TARGET_INCLUDE_DIRECTORIES(kokkos_gtest PUBLIC ${GTEST_SOURCE_DIR}) diff --git a/example/feint/CMakeLists.txt b/example/feint/CMakeLists.txt index b8c2b77321b..f8659fe2a72 100644 --- a/example/feint/CMakeLists.txt +++ b/example/feint/CMakeLists.txt @@ -1,8 +1,8 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../fixture) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../fixture) SET(SOURCES "") diff --git a/example/fenl/CMakeLists.txt b/example/fenl/CMakeLists.txt index 5e1e846b7b7..522fbbce230 100644 --- a/example/fenl/CMakeLists.txt +++ b/example/fenl/CMakeLists.txt @@ -1,8 +1,8 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../fixture) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../fixture) SET(SOURCES "") diff --git a/example/fixture/CMakeLists.txt b/example/fixture/CMakeLists.txt index 1735927ec90..bebc3eacfad 100644 --- a/example/fixture/CMakeLists.txt +++ b/example/fixture/CMakeLists.txt @@ -1,7 +1,7 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common) SET(SOURCES_TEST Main.cpp TestFixture.cpp BoxElemPart.cpp ) diff --git a/example/global_2_local_ids/CMakeLists.txt b/example/global_2_local_ids/CMakeLists.txt index 5c2bd370456..d30d15eaf42 100644 --- a/example/global_2_local_ids/CMakeLists.txt +++ b/example/global_2_local_ids/CMakeLists.txt @@ -1,6 +1,6 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) SET(SOURCES G2L_Main.cpp) diff --git a/example/grow_array/CMakeLists.txt b/example/grow_array/CMakeLists.txt index c5a5d2d0c26..3510f28069f 100644 --- a/example/grow_array/CMakeLists.txt +++ b/example/grow_array/CMakeLists.txt @@ -1,6 +1,6 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) SET(SOURCES "") diff --git a/example/md_skeleton/CMakeLists.txt b/example/md_skeleton/CMakeLists.txt index 53d2856ddcd..8be8ed2bfb8 100644 --- a/example/md_skeleton/CMakeLists.txt +++ b/example/md_skeleton/CMakeLists.txt @@ -1,6 +1,6 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) SET(SOURCES "") FILE(GLOB SOURCES *.cpp ) diff --git a/example/multi_fem/CMakeLists.txt b/example/multi_fem/CMakeLists.txt index e3d417b9b6e..2e747b0cfbf 100644 --- a/example/multi_fem/CMakeLists.txt +++ b/example/multi_fem/CMakeLists.txt @@ -1,6 +1,6 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) SET(SOURCES "") diff --git a/example/query_device/CMakeLists.txt b/example/query_device/CMakeLists.txt index 0e4fa4f4450..86956ba3ba4 100644 --- a/example/query_device/CMakeLists.txt +++ b/example/query_device/CMakeLists.txt @@ -1,6 +1,6 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) SET(SOURCES "") FILE(GLOB SOURCES *.cpp) diff --git a/example/sort_array/CMakeLists.txt b/example/sort_array/CMakeLists.txt index 813e942f87a..09501d1171f 100644 --- a/example/sort_array/CMakeLists.txt +++ b/example/sort_array/CMakeLists.txt @@ -1,6 +1,6 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) SET(SOURCES "") diff --git a/example/tutorial/01_hello_world/CMakeLists.txt b/example/tutorial/01_hello_world/CMakeLists.txt index de35d75d5ea..e1b90b133dd 100644 --- a/example/tutorial/01_hello_world/CMakeLists.txt +++ b/example/tutorial/01_hello_world/CMakeLists.txt @@ -1,6 +1,6 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. KOKKOS_ADD_EXECUTABLE( diff --git a/example/tutorial/01_hello_world_lambda/CMakeLists.txt b/example/tutorial/01_hello_world_lambda/CMakeLists.txt index f267de12351..a939a5f0ded 100644 --- a/example/tutorial/01_hello_world_lambda/CMakeLists.txt +++ b/example/tutorial/01_hello_world_lambda/CMakeLists.txt @@ -1,6 +1,6 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. KOKKOS_ADD_EXECUTABLE( diff --git a/example/tutorial/02_simple_reduce/CMakeLists.txt b/example/tutorial/02_simple_reduce/CMakeLists.txt index 3d9683500dd..21b0c38014b 100644 --- a/example/tutorial/02_simple_reduce/CMakeLists.txt +++ b/example/tutorial/02_simple_reduce/CMakeLists.txt @@ -1,6 +1,6 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. KOKKOS_ADD_EXECUTABLE( diff --git a/example/tutorial/02_simple_reduce_lambda/CMakeLists.txt b/example/tutorial/02_simple_reduce_lambda/CMakeLists.txt index 9c4e48c5523..82a87be4bdc 100644 --- a/example/tutorial/02_simple_reduce_lambda/CMakeLists.txt +++ b/example/tutorial/02_simple_reduce_lambda/CMakeLists.txt @@ -1,6 +1,6 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) KOKKOS_ADD_EXECUTABLE( tutorial_02_simple_reduce_lambda diff --git a/example/tutorial/03_simple_view/CMakeLists.txt b/example/tutorial/03_simple_view/CMakeLists.txt index b19d07ec011..99a7d39c17a 100644 --- a/example/tutorial/03_simple_view/CMakeLists.txt +++ b/example/tutorial/03_simple_view/CMakeLists.txt @@ -1,6 +1,6 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # This is a tutorial, not a test, so we don't ask CTest to run it. KOKKOS_ADD_EXECUTABLE( diff --git a/example/tutorial/03_simple_view_lambda/CMakeLists.txt b/example/tutorial/03_simple_view_lambda/CMakeLists.txt index 2f42523464f..c0f33a61b9a 100644 --- a/example/tutorial/03_simple_view_lambda/CMakeLists.txt +++ b/example/tutorial/03_simple_view_lambda/CMakeLists.txt @@ -1,6 +1,6 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) IF (Kokkos_ENABLE_CXX11) # This is a tutorial, not a test, so we don't ask CTest to run it. From 459092fa8b4cb887cc1907b3148d925555a31694 Mon Sep 17 00:00:00 2001 From: jjwilke Date: Fri, 26 Apr 2019 16:22:15 -0700 Subject: [PATCH 010/530] more sweeping cmake changes: #2108, #2119, #2120 --- CMakeLists.txt | 30 +- cmake/KokkosConfig.cmake.in | 30 +- cmake/KokkosCore_config.h.in | 76 +++++ cmake/fake_tribits.cmake | 40 +-- cmake/kokkos_functions.cmake | 221 ++------------- cmake/kokkos_install.cmake | 3 + cmake/kokkos_options.cmake | 419 +++++++-------------------- cmake/kokkos_settings.cmake | 517 +++++++++++++++++++++------------- cmake/kokkos_tribits.cmake | 188 ++++++------- containers/src/CMakeLists.txt | 2 + core/src/CMakeLists.txt | 49 ++++ core/src/dummy.cpp | 0 12 files changed, 707 insertions(+), 868 deletions(-) create mode 100644 cmake/KokkosCore_config.h.in create mode 100644 core/src/dummy.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 5e574f43e0b..053a34c2246 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,12 @@ set(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR}) set(KOKKOS_PATH ${Kokkos_SOURCE_DIR}) set(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}) +SET(KOKKOS_COMPILE_OPTIONS) +SET(KOKKOS_LINK_OPTIONS) +SET(KOKKOS_CUDA_OPTIONS) +SET(KOKKOS_CUDAFE_OPTIONS) +SET(KOKKOS_XCOMPILER_OPTIONS) + IF(NOT KOKKOS_HAS_TRILINOS) cmake_minimum_required(VERSION 3.8 FATAL_ERROR) @@ -28,12 +34,10 @@ INCLUDE(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake) INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake) KOKKOS_SETUP_BUILD_ENVIRONMENT() -SET_PROPERTY(GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) -SET_PROPERTY(GLOBAL PROPERTY KOKKOS_ALL_SOURCES) -SET_PROPERTY(GLOBAL PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES) -SET_PROPERTY(GLOBAL PROPERTY KOKKOS_COMPILE_DEFINITIONS) IF (NOT KOKKOS_HAS_TRILINOS AND NOT KOKKOS_SEPARATE_LIBS) - SET_PROPERTY(GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES kokkos) + SET(KOKKOS_LIBRARIES_NAMES kokkos) +ELSE() + SET(KOKKOS_LIBRARIES_NAMES kokkoscore kokkoscontainers kokkosalgorithms) ENDIF() @@ -80,13 +84,9 @@ file(COPY "${Kokkos_GEN_DIR}/KokkosCore_config.h" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}" USE_SOURCE_PERMISSIONS) install(FILES "${Kokkos_GEN_DIR}/KokkosCore_config.h" DESTINATION include) -file(COPY "${Kokkos_GEN_DIR}/kokkos_generated_settings.cmake" - DESTINATION "${CMAKE_CURRENT_BINARY_DIR}" USE_SOURCE_PERMISSIONS) - -include(${CMAKE_CURRENT_BINARY_DIR}/kokkos_generated_settings.cmake) -# Sources come from makefile-generated kokkos_generated_settings.cmake file -# Enable using the individual sources if needed -set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC}) +#file(COPY "${Kokkos_GEN_DIR}/kokkos_generated_settings.cmake" +# DESTINATION "${CMAKE_CURRENT_BINARY_DIR}" USE_SOURCE_PERMISSIONS) +#include(${CMAKE_CURRENT_BINARY_DIR}/kokkos_generated_settings.cmake) #------------------------------------------------------------------------------ # @@ -98,7 +98,6 @@ set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC}) # executables also need nvcc_wrapper. Thus, we need to install it. # If the argument of DESTINATION is a relative path, CMake computes it # as relative to ${CMAKE_INSTALL_PATH}. - INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION bin) @@ -121,9 +120,8 @@ KOKKOS_EXCLUDE_AUTOTOOLS_FILES() KOKKOS_PACKAGE_POSTPROCESS() IF (NOT KOKKOS_HAS_TRILINOS) - IF (NOT KOKKOS_SEPARATE_LIBS) - KOKKOS_MAKE_LIBKOKKOS() - ENDIF() + #just always do it + KOKKOS_MAKE_LIBKOKKOS() include(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake) ENDIF() diff --git a/cmake/KokkosConfig.cmake.in b/cmake/KokkosConfig.cmake.in index 19489e1b88f..dcdb7798ce7 100644 --- a/cmake/KokkosConfig.cmake.in +++ b/cmake/KokkosConfig.cmake.in @@ -33,28 +33,14 @@ IF (${Kokkos_FIND_VERSION_MINOR}) ENDIF() FUNCTION(TARGET_LINK_KOKKOS TARGET) -CMAKE_PARSE_ARGUMENTS( - PARSE - "PRIVATE;PUBLIC;INTERFACE" - "" - "" - ${ARGN} -) -FOREACH(LIB ${Kokkos_LIBRARIES}) - GET_TARGET_PROPERTY(PROP ${LIB} TYPE) - SET(LINK_TYPE) - IF (${PROP} STREQUAL "INTERFACE_LIBRARY") - #if an interface library, you MUST use link interface - SET(LINK_TYPE "INTERFACE") - ELSEIF(PARSE_PUBLIC) - SET(LINK_TYPE "PUBLIC") - ELSEIF(PARSE_PRIVATE) - SET(LINK_TYPE "PRIVATE") - ELSEIF(PARSE_INTERFACE) - SET(LINK_TYPE "INTERFACE") - ENDIF() - TARGET_LINK_LIBRARIES(${TARGET} ${LINK_TYPE} ${ARGN} ${LIB}) -ENDFOREACH() + CMAKE_PARSE_ARGUMENTS( + PARSE + "PRIVATE;PUBLIC;INTERFACE" + "" + "" + ${ARGN} + ) + TARGET_LINK_LIBRARIES(${TARGET} ${ARGN} kokkos) ENDFUNCTION(TARGET_LINK_KOKKOS) #Find dependencies diff --git a/cmake/KokkosCore_config.h.in b/cmake/KokkosCore_config.h.in new file mode 100644 index 00000000000..24a4c1de7da --- /dev/null +++ b/cmake/KokkosCore_config.h.in @@ -0,0 +1,76 @@ + +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else +#define KOKKOS_CORE_CONFIG_H +#endif + +#cmakedefine KOKKOS_ENABLE_SERIAL +#cmakedefine KOKKOS_ENABLE_OPENMP +#cmakedefine KOKKOS_ENABLE_PTHREAD +#cmakedefine KOKKOS_ENABLE_CUDA +#cmakedefine KOKKOS_ENABLE_ROCM +#cmakedefine KOKKOS_ENABLE_HWLOC +#cmakedefine KOKKOS_ENABLE_MEMKIND +#cmakedefine KOKKOS_ENABLE_LIBRT +#cmakedefine KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE +#cmakedefine KOKKOS_ENABLE_CUDA_UVM +#cmakedefine KOKKOS_ENABLE_CUDA_LDG_INTRINSIC +#cmakedefine KOKKOS_ENABLE_HPX_ASYNC_DISPATCH +#cmakedefine KOKKOS_ENABLE_DEBUG +#cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK +#cmakedefine KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK +#cmakedefine KOKKOS_ENABLE_COMPILER_WARNINGS +#cmakedefine KOKKOS_ENABLE_PROFILING +#cmakedefine KOKKOS_ENABLE_PROFILING_LOAD_PRINT +#cmakedefine KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION +#cmakedefine KOKKOS_ENABLE_DEPRECATED_CODE +#cmakedefine KOKKOS_ENABLE_EXPLICIT_INSTANTIATION +#cmakedefine KOKKOS_ENABLE_ETI +#cmakedefine KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION + + +#cmakedefine KOKKOS_ARCH_NONE +#cmakedefine KOKKOS_ARCH_AMDAVX +#cmakedefine KOKKOS_ARCH_ARMV80 +#cmakedefine KOKKOS_ARCH_ARMV81 +#cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX +#cmakedefine KOKKOS_ARCH_ARMV8_TX2 +#cmakedefine KOKKOS_ARCH_SSE42 +#cmakedefine KOKKOS_ARCH_WSM +#cmakedefine KOKKOS_ARCH_SNB +#cmakedefine KOKKOS_ARCH_AVX +#cmakedefine KOKKOS_ARCH_HSW +#cmakedefine KOKKOS_ARCH_BDW +#cmakedefine KOKKOS_ARCH_AVX2 +#cmakedefine KOKKOS_ARCH_SKX +#cmakedefine KOKKOS_ARCH_AVX512XEON +#cmakedefine KOKKOS_ARCH_KNC +#cmakedefine KOKKOS_ARCH_AVX512MIC +#cmakedefine KOKKOS_ARCH_KNL +#cmakedefine KOKKOS_ARCH_BGQ +#cmakedefine KOKKOS_ARCH_POWER7 +#cmakedefine KOKKOS_ARCH_POWER8 +#cmakedefine KOKKOS_ARCH_POWER9 +#cmakedefine KOKKOS_ARCH_KEPLER +#cmakedefine KOKKOS_ARCH_KEPLER30 +#cmakedefine KOKKOS_ARCH_KEPLER32 +#cmakedefine KOKKOS_ARCH_KEPLER35 +#cmakedefine KOKKOS_ARCH_KEPLER37 +#cmakedefine KOKKOS_ARCH_MAXWELL +#cmakedefine KOKKOS_ARCH_MAXWELL50 +#cmakedefine KOKKOS_ARCH_MAXWELL52 +#cmakedefine KOKKOS_ARCH_MAXWELL53 +#cmakedefine KOKKOS_ARCH_PASCAL60 +#cmakedefine KOKKOS_ARCH_PASCAL61 +#cmakedefine KOKKOS_ARCH_VOLTA70 +#cmakedefine KOKKOS_ARCH_VOLTA72 +#cmakedefine KOKKOS_ARCH_TURING75 +#cmakedefine KOKKOS_ARCH_RYZEN +#cmakedefine KOKKOS_ARCH_EPYC +#cmakedefine KOKKOS_ARCH_KAVERI +#cmakedefine KOKKOS_ARCH_CARRIZO +#cmakedefine KOKKOS_ARCH_FIJI +#cmakedefine KOKKOS_ARCH_VEGA +#cmakedefine KOKKOS_ARCH_GFX901 +#cmakedefine KOKKOS_ARCH_ROCM @KOKKOS_ARCH_ROCM@ diff --git a/cmake/fake_tribits.cmake b/cmake/fake_tribits.cmake index 4ecfcd888be..1f1f5d0b6ac 100644 --- a/cmake/fake_tribits.cmake +++ b/cmake/fake_tribits.cmake @@ -24,7 +24,12 @@ IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "") ENDIF() ENDMACRO() -if (NOT KOKKOS_HAS_TRILINOS) +IF (NOT KOKKOS_HAS_TRILINOS) +MACRO(APPEND_GLOB VAR) + FILE(GLOB LOCAL_TMP_VAR ${ARGN}) + LIST(APPEND ${VAR} ${LOCAL_TMP_VAR}) +ENDMACRO() + MACRO(GLOBAL_SET VARNAME) SET(${VARNAME} ${ARGN} CACHE INTERNAL "") ENDMACRO() @@ -69,31 +74,6 @@ FUNCTION(KOKKOS_CONFIGURE_FILE PACKAGE_NAME_CONFIG_FILE) endif() ENDFUNCTION() -MACRO(KOKKOS_ADD_TEST_DIRECTORIES) - if (KOKKOS_HAS_TRILINOS) - TRIBITS_ADD_TEST_DIRECTORIES(${ARGN}) - else() - IF(${${PROJECT_NAME}_ENABLE_TESTS}) - FOREACH(TEST_DIR ${ARGN}) - ADD_SUBDIRECTORY(${TEST_DIR}) - ENDFOREACH() - ENDIF() - endif() -ENDMACRO() - -MACRO(KOKKOS_ADD_EXAMPLE_DIRECTORIES) - if (KOKKOS_HAS_TRILINOS) - TRIBITS_ADD_EXAMPLE_DIRECTORIES(${ARGN}) - else() - IF(${PACKAGE_NAME}_ENABLE_EXAMPLES OR ${PARENT_PACKAGE_NAME}_ENABLE_EXAMPLES) - FOREACH(EXAMPLE_DIR ${ARGN}) - ADD_SUBDIRECTORY(${EXAMPLE_DIR}) - ENDFOREACH() - ENDIF() - endif() -ENDMACRO() - - MACRO(ADD_INTERFACE_LIBRARY LIB_NAME) FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp "") ADD_LIBRARY(${LIB_NAME} STATIC ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp) @@ -247,7 +227,7 @@ ENDFUNCTION(KOKKOS_TARGET_INCLUDE_DIRECTORIES TARGET) FUNCTION(KOKKOS_LINK_INTERNAL_LIBRARY TARGET DEPLIB) IF(KOKKOS_HAS_TRILINOS) #do nothing -ELSEIF(KOKKOS_SEPARATE_LIBS) +ELSE(KOKKOS_SEPARATE_LIBS) SET(options INTERFACE) SET(oneValueArgs) SET(multiValueArgs) @@ -262,10 +242,8 @@ ELSEIF(KOKKOS_SEPARATE_LIBS) ELSE() SET(LINK_TYPE PUBLIC) ENDIF() - TARGET_LINK_LIBRARIES(${TARGET} ${LINK_TYPE} ${DEPLIB}) - VERIFY_EMPTY(KOKKOS_LINK_INTERNAL_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) - ELSE() - #only a single lib - so nothing to do here + TARGET_LINK_LIBRARIES(${TARGET} ${LINK_TYPE} ${DEPLIB}) + VERIFY_EMPTY(KOKKOS_LINK_INTERNAL_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) ENDIF() ENDFUNCTION(KOKKOS_LINK_INTERNAL_LIBRARY) diff --git a/cmake/kokkos_functions.cmake b/cmake/kokkos_functions.cmake index 616618753b1..96b88371319 100644 --- a/cmake/kokkos_functions.cmake +++ b/cmake/kokkos_functions.cmake @@ -137,6 +137,7 @@ function(set_kokkos_cxx_standard) # PGI No No # XL No No # + # Kokkos now requires a minimum of CMake 3.8 # For compiling CUDA code using nvcc_wrapper, we will use the host compiler's # flags for turning on C++11. Since for compiler ID and versioning purposes # CMake recognizes the host compiler when calling nvcc_wrapper, this just @@ -144,202 +145,40 @@ function(set_kokkos_cxx_standard) # that we can only use host compilers for CUDA builds that use those flags. # It also means that extensions (gnu++11) can't be turned on for CUDA builds. - # Check if we can use compile features. - if(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) - if(CMAKE_CXX_COMPILER_ID STREQUAL Clang) - if(NOT CMAKE_VERSION VERSION_LESS 3.1) - set(INTERNAL_USE_COMPILE_FEATURES ON) - endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang OR CMAKE_CXX_COMPILER_ID STREQUAL GNU) - if(NOT CMAKE_VERSION VERSION_LESS 3.2) - set(INTERNAL_USE_COMPILE_FEATURES ON) - endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel) - if(NOT CMAKE_VERSION VERSION_LESS 3.6) - set(INTERNAL_USE_COMPILE_FEATURES ON) - endif() - endif() - endif() - if(INTERNAL_USE_COMPILE_FEATURES) - # Use the compile features aspect of CMake to transitively cause C++ flags - # to populate to user code. + # Check if we can use compile features. + SET(VALID_FOR_FEATURES Clang GNU Intel AppleClang) + #always valid for certain compilers + IF(${KOKKOS_CXX_COMPILER_ID} IN_LIST VALID_FOR_FEATURES) + set(KOKKOS_CXX_STANDARD_IS_FEATURE ON CACHE INTERNAL + "Whether the compiler family supports target_compile_features") + return() + ENDIF() - # I'm using a hack by requiring features that I know force the lowest version - # of the compilers we want to support. Clang 3.3 and later support all of - # the C++11 standard. With CMake 3.8 and higher, we could switch to using - # cxx_std_11. - set(KOKKOS_CXX11_FEATURES - cxx_nonstatic_member_init # Forces GCC 4.7 or later and Intel 14.0 or later. - PARENT_SCOPE - ) + set(KOKKOS_CXX_STANDARD_IS_FEATURE OFF CACHE INTERNAL + "Whether the compiler family supports target_compile_features") + if(CMAKE_CXX_COMPILER_ID STREQUAL Cray) + # CMAKE doesn't support CXX_STANDARD or C++ compile features for the Cray + # compiler. Set compiler options transitively here such that they trickle + # down to a call to target_compile_options(). + set(KOKKOS_CXX_STANDARD_FLAG "-hstd=c++11" CACHE INTERNAL + "The flags needed for the C++ standard, if not supported as feature") + elseif(CMAKE_CXX_COMPILER_ID STREQUAL PGI) + # CMAKE doesn't support CXX_STANDARD or C++ compile features for the PGI + # compiler. Set compiler options transitively here such that they trickle + # down to a call to target_compile_options(). + set(KOKKOS_CXX_STANDARD_FLAG "--c++11" CACHE INTERNAL + "The flags needed for the C++ standard, if not supported as feature") + elseif(CMAKE_CXX_COMPILER_ID STREQUAL XL) + # CMAKE doesn't support CXX_STANDARD or C++ compile features for the XL + # compiler. Set compiler options transitively here such that they trickle + # down to a call to target_compile_options(). + set(KOKKOS_CXX_STANDARD_FLAG "-std=c++11" CACHE INTERNAL + "The flags needed for the C++ standard, if not supported as feature") else() - # CXX compile features are not yet implemented for this combination of - # compiler and version of CMake. - - if(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang) - # Versions of CMAKE before 3.2 don't support CXX_STANDARD or C++ compile - # features for the AppleClang compiler. Set compiler flags transitively - # here such that they trickle down to a call to target_compile_options(). - - # The following two blocks of code were copied from - # /Modules/Compiler/AppleClang-CXX.cmake from CMake 3.7.2 and then - # modified. - if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.0) - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-std=c++11") - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-std=gnu++11") - endif() - - if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.1) - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++14") - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=gnu++14") - elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1) - # AppleClang 5.0 knows this flag, but does not set a __cplusplus macro - # greater than 201103L. - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++1y") - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=gnu++1y") - endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel) - # Versions of CMAKE before 3.6 don't support CXX_STANDARD or C++ compile - # features for the Intel compiler. Set compiler flags transitively here - # such that they trickle down to a call to target_compile_options(). - - # The following three blocks of code were copied from - # /Modules/Compiler/Intel-CXX.cmake from CMake 3.7.2 and then modified. - if("x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC") - set(_std -Qstd) - set(_ext c++) - else() - set(_std -std) - set(_ext gnu++) - endif() - - if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.2) - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "${_std}=c++14") - # TODO: There is no gnu++14 value supported; figure out what to do. - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "${_std}=c++14") - elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.0) - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "${_std}=c++1y") - # TODO: There is no gnu++14 value supported; figure out what to do. - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "${_std}=c++1y") - endif() - - if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13.0) - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "${_std}=c++11") - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "${_std}=${_ext}11") - elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12.1) - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "${_std}=c++0x") - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "${_std}=${_ext}0x") - endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL Cray) - # CMAKE doesn't support CXX_STANDARD or C++ compile features for the Cray - # compiler. Set compiler options transitively here such that they trickle - # down to a call to target_compile_options(). - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-hstd=c++11") - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-hstd=c++11") - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-hstd=c++11") - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-hstd=c++11") - elseif(CMAKE_CXX_COMPILER_ID STREQUAL PGI) - # CMAKE doesn't support CXX_STANDARD or C++ compile features for the PGI - # compiler. Set compiler options transitively here such that they trickle - # down to a call to target_compile_options(). - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "--c++11") - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "--c++11") - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "--c++11") - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "--c++11") - elseif(CMAKE_CXX_COMPILER_ID STREQUAL XL) - # CMAKE doesn't support CXX_STANDARD or C++ compile features for the XL - # compiler. Set compiler options transitively here such that they trickle - # down to a call to target_compile_options(). - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-std=c++11") - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-std=c++11") - set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++11") - set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=c++11") - else() - # Assume GNU. CMAKE_CXX_STANDARD is handled correctly by CMake 3.1 and - # above for this compiler. If the user explicitly requests a C++ - # standard, CMake takes care of it. If not, transitively require C++11. - if(NOT CMAKE_CXX_STANDARD) - set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION ${CMAKE_CXX11_STANDARD_COMPILE_OPTION}) - set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION ${CMAKE_CXX11_EXTENSION_COMPILE_OPTION}) - endif() - endif() - - # Set the C++ standard info for Kokkos respecting user set values for - # CMAKE_CXX_STANDARD and CMAKE_CXX_EXTENSIONS. - # Only use cxx extension if explicitly requested - if(CMAKE_CXX_STANDARD EQUAL 14) - if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON) - set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX14_EXTENSION_COMPILE_OPTION}) - else() - set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX14_STANDARD_COMPILE_OPTION}) - endif() - elseif(CMAKE_CXX_STANDARD EQUAL 11) - if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON) - set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_EXTENSION_COMPILE_OPTION}) - else() - set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_STANDARD_COMPILE_OPTION}) - endif() - else() - # The user didn't explicitly request a standard, transitively require - # C++11 respecting CMAKE_CXX_EXTENSIONS. - if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON) - set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_EXTENSION_COMPILE_OPTION}) - else() - set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_STANDARD_COMPILE_OPTION}) - endif() - endif() - - set(KOKKOS_CXX_FLAGS ${INTERNAL_CXX_FLAGS} PARENT_SCOPE) + message(FATAL_ERROR "Got unknown compiler ${KOKKOS_COMPILER_ID}") endif() endfunction() -#------------------------------------------------------------------------------- -# function(set_kokkos_sources) -# Takes a list of sources for kokkos (e.g., KOKKOS_SRC from Makefile.kokkos and -# put it into kokkos_generated_settings.cmake) and sorts the files into the subpackages or -# separate_libraries. This is core and containers (algorithms is pure header -# files). -# -# Inputs: -# KOKKOS_SRC -# -# Outputs: -# KOKKOS_CORE_SRCS -# KOKKOS_CONTAINERS_SRCS -# -function(set_kokkos_srcs) - set(opts ) # no-value args - set(oneValArgs ) - set(multValArgs KOKKOS_SRC) # e.g., lists - cmake_parse_arguments(IN "${opts}" "${oneValArgs}" "${multValArgs}" ${ARGN}) - - foreach(sfile ${IN_KOKKOS_SRC}) - string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "" stripfile "${sfile}") - string(REPLACE "/" ";" striplist "${stripfile}") - list(GET striplist 0 firstdir) - if(${firstdir} STREQUAL "core") - list(APPEND KOKKOS_CORE_SRCS ${sfile}) - else() - list(APPEND KOKKOS_CONTAINERS_SRCS ${sfile}) - endif() - endforeach() - set(KOKKOS_CORE_SRCS ${KOKKOS_CORE_SRCS} PARENT_SCOPE) - set(KOKKOS_CONTAINERS_SRCS ${KOKKOS_CONTAINERS_SRCS} PARENT_SCOPE) - return() -endfunction() -# Setting a default value if it is not already set -macro(set_kokkos_default_default VARIABLE DEFAULT) - IF( "${KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT}" STREQUAL "" ) - IF( "${KOKKOS_ENABLE_${VARIABLE}}" STREQUAL "" ) - set(KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT ${DEFAULT}) - # MESSAGE(WARNING "Set: KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT to ${KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT}") - ELSE() - set(KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT ${KOKKOS_ENABLE_${VARIABLE}}) - # MESSAGE(WARNING "Set: KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT to ${KOKKOS_INTERNAL_ENABLE_${VARIABLE}_DEFAULT}") - ENDIF() - ENDIF() - UNSET(KOKKOS_ENABLE_${VARIABLE} CACHE) -endmacro() diff --git a/cmake/kokkos_install.cmake b/cmake/kokkos_install.cmake index 326d065c7b8..6641ff1327a 100644 --- a/cmake/kokkos_install.cmake +++ b/cmake/kokkos_install.cmake @@ -70,3 +70,6 @@ ENDFOREACH() CONFIGURE_FILE(core/src/kokkos.pc.in kokkos.pc @ONLY) INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/kokkos.pc DESTINATION lib/pkgconfig) +CONFIGURE_FILE(cmake/KokkosCore_config.h.in KokkosCore_config.h @ONLY) +INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h DESTINATION include) + diff --git a/cmake/kokkos_options.cmake b/cmake/kokkos_options.cmake index 393a594c79d..f91e62674e7 100644 --- a/cmake/kokkos_options.cmake +++ b/cmake/kokkos_options.cmake @@ -7,68 +7,99 @@ ########################## AVAILABLE OPTIONS ################################### # Use lists for documentation, verification, and programming convenience -# All CMake options of the type KOKKOS_ENABLE_* -set(KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST) -list(APPEND KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST - Serial - OpenMP - Pthread - Qthread - HPX - Cuda - ROCm - HWLOC - MEMKIND - LIBRT - Cuda_Lambda - Cuda_Relocatable_Device_Code - Cuda_RDC - Cuda_UVM - Cuda_LDG_Intrinsic - HPX_ASYNC_DISPATCH - Debug - Debug_DualView_Modify_Check - Debug_Bounds_Check - Compiler_Warnings - Profiling - Profiling_Load_Print - Aggressive_Vectorization - Deprecated_Code - Explicit_Instantiation - ) +function(KOKKOS_ENABLE_OPTION CAMEL_SUFFIX DEFAULT DOCSTRING) + set(CAMEL_NAME Kokkos_ENABLE_${CAMEL_SUFFIX}) + string(TOUPPER ${CAMEL_NAME} UC_NAME) + if (NOT DEFINED KOKKOS_CACHED_${UC_NAME} AND DEFINED ${CAMEL_NAME}) + #this is our first time through the cmake + #we were given the camel case name instead of the UC name we wanted + #make darn sure we don't have both an UC and Camel version that differ + if (DEFINED ${UC_NAME} AND NOT ${CAMEL_NAME} STREQUAL ${UC_NAME}) + message(FATAL_ERROR "Given both ${CAMEL_NAME} and ${UC_NAME} with different values") + endif() + #great, no conflicts - use the camel case name as the default for the UC + set(${UC_NAME} ${${CAMEL_NAME}} CACHE BOOL ${DOCSTRING}) + elseif(DEFINED ${CAMEL_NAME}) + #this is at least our second configure and we have an existing cache + #CMake makes this impossible to distinguish something already in cache + #and somthing given explicitly on the command line + #at this point, we have no choice but to accept the Camel value and print a warning + if (NOT ${CAMEL_NAME} STREQUAL ${UC_NAME}) + message(WARNING "Overriding ${UC_NAME}=${${UC_NAME}} with ${CAMEL_NAME}=${${CAMEL_NAME}}") + endif() + #I have to accept the Camel case value - really no choice here - force it + set(${UC_NAME} ${${CAMEL_NAME}} CACHE BOOL ${DOCSTRING} FORCE) + else() #great, no camel case names - nice and simple + set(${UC_NAME} ${DEFAULT} CACHE BOOL ${DOCSTRING}) + endif() + set(KOKKO_CACHED_${UC_NAME} ${${UC_NAME}} CACHE INTERNAL ${DOCSTRING}) + + if (${UC_NAME}) #cmake if statements follow really annoying string resolution rules + message(STATUS "${UC_NAME}") + endif() +endfunction() + +KOKKOS_ENABLE_OPTION(TESTS OFF "Whether to build serial backend") +KOKKOS_ENABLE_OPTION(EXAMPLES OFF "Whether to build OpenMP backend") +KOKKOS_ENABLE_OPTION(Serial ON "Whether to build serial backend") +KOKKOS_ENABLE_OPTION(OpenMP OFF "Whether to build OpenMP backend") +KOKKOS_ENABLE_OPTION(Pthread OFF "Whether to build Pthread backend") +KOKKOS_ENABLE_OPTION(Cuda OFF "Whether to build CUDA backend") +KOKKOS_ENABLE_OPTION(ROCm OFF "Whether to build AMD ROCm backend") +KOKKOS_ENABLE_OPTION(HWLOC OFF "Whether to enable HWLOC features - may also require -DHWLOC_DIR") +KOKKOS_ENABLE_OPTION(MEMKIND OFF "Whether to enable MEMKIND featuers - may also require -DMEMKIND_DIR") +KOKKOS_ENABLE_OPTION(LIBRT OFF "Whether to enable LIBRT features") +KOKKOS_ENABLE_OPTION(Cuda_Relocatable_Device_Code OFF "Whether to enable relocatable device code (RDC) for CUDA") +KOKKOS_ENABLE_OPTION(Cuda_UVM OFF "Whether to enable unified virtual memory (UVM) for CUDA") +KOKKOS_ENABLE_OPTION(Cuda_LDG_Intrinsic OFF "Whether to use CUDA LDG intrinsics") +KOKKOS_ENABLE_OPTION(HPX_ASYNC_DISPATCH OFF "Whether HPX supports asynchronous dispath") +KOKKOS_ENABLE_OPTION(Debug OFF "Whether to activate extra debug features - may increase compile times") +KOKKOS_ENABLE_OPTION(Debug_DualView_Modify_Check OFF "Debug check on dual views") +KOKKOS_ENABLE_OPTION(Debug_Bounds_Check OFF "Whether to use bounds checking - will increase runtime") +KOKKOS_ENABLE_OPTION(Compiler_Warnings OFF "Whether to print all compiler warnings") +KOKKOS_ENABLE_OPTION(Profiling ON "Whether to create bindings for profiling tools") +KOKKOS_ENABLE_OPTION(Profiling_Load_Print OFF "Whether to print information about which profiling tools got loaded") +KOKKOS_ENABLE_OPTION(Aggressive_Vectorization OFF "Whether to aggressively vectorize loops") +KOKKOS_ENABLE_OPTION(Deprecated_Code OFF "Whether to enable deprecated code") +KOKKOS_ENABLE_OPTION(Explicit_Instantiation OFF + "Whether to explicitly instantiate certain types to lower future compile times") +SET(KOKKOS_ENABLE_ETI ${KOKKOS_ENABLE_EXPLICIT_INSTANTIATION} CACHE INTERNAL "eti") + +IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_QTHREAD) +SET(QTHR_DEFAULT ON) +ELSE() +SET(QTHR_DEFAULT OFF) +ENDIF() +KOKKOS_ENABLE_OPTION(Qthread ${QTHR_DEFAULT} + "Whether to build Qthreads backend - may also require -DQTHREAD_DIR") -#------------------------------------------------------------------------------- -#------------------------------- Recognize CamelCase Options --------------------------- -#------------------------------------------------------------------------------- +IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_HPX) +SET(HPX_DEFAULT ON) +ELSE() +SET(HPX_DEFAULT OFF) +ENDIF() +KOKKOS_ENABLE_OPTION(HPX ${HPX_DEFAULT} "Whether to build HPX backend - may also require -DHPX_DIR") -foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST}) - string(TOUPPER ${opt} OPT ) - IF(DEFINED Kokkos_ENABLE_${opt}) - IF(DEFINED KOKKOS_ENABLE_${OPT}) - IF(NOT ("${KOKKOS_ENABLE_${OPT}}" STREQUAL "${Kokkos_ENABLE_${opt}}")) - IF(DEFINED KOKKOS_ENABLE_${OPT}_INTERNAL) - MESSAGE(WARNING "Defined both Kokkos_ENABLE_${opt}=[${Kokkos_ENABLE_${opt}}] and KOKKOS_ENABLE_${OPT}=[${KOKKOS_ENABLE_${OPT}}] and they differ! Could be caused by old CMakeCache Variable. Run CMake again and warning should disappear. If not you are truly setting both variables.") - IF(NOT ("${Kokkos_ENABLE_${opt}}" STREQUAL "${KOKKOS_ENABLE_${OPT}_INTERNAL}")) - UNSET(KOKKOS_ENABLE_${OPT} CACHE) - SET(KOKKOS_ENABLE_${OPT} ${Kokkos_ENABLE_${opt}}) - MESSAGE(WARNING "SET BOTH VARIABLES KOKKOS_ENABLE_${OPT}: ${KOKKOS_ENABLE_${OPT}}") - ELSE() - SET(Kokkos_ENABLE_${opt} ${KOKKOS_ENABLE_${OPT}}) - ENDIF() - ELSE() - MESSAGE(FATAL_ERROR "Defined both Kokkos_ENABLE_${opt}=[${Kokkos_ENABLE_${opt}}] and KOKKOS_ENABLE_${OPT}=[${KOKKOS_ENABLE_${OPT}}] and they differ!") - ENDIF() - ENDIF() - ELSE() - SET(KOKKOS_INTERNAL_ENABLE_${OPT}_DEFAULT ${Kokkos_ENABLE_${opt}}) - SET(KOKKOS_ENABLE_${OPT} ${Kokkos_ENABLE_${opt}}) - ENDIF() - ELSEIF(DEFINED KOKKOS_ENABLE_${OPT}) - #if we are here, the lower case version is not defined - #define it to avoid breaking anything later on - SET(Kokkos_ENABLE_${opt} ${KOKKOS_ENABLE_${OPT}}) - ENDIF() -endforeach() +IF(Trilinos_ENABLE_Kokkos AND Trilinos_ENABLE_OpenMP) + SET(OMP_DEFAULT ON) +ELSE() + SET(OMP_DEFAULT OFF) +ENDIF() +KOKKOS_ENABLE_OPTION(OpenMP ${OMP_DEFAULT} "Whether to build OpenMP backend") + +IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA) + SET(CUDA_DEFAULT ON) +ELSE() + SET(CUDA_DEFAULT OFF) +ENDIF() +KOKKOS_ENABLE_OPTION(Cuda ${CUDA_DEFAULT} "Whether to build CUDA backend") + +IF (DEFINED CUDA_VERSION AND CUDA_VERSION VERSION_GREATER "7.0") + SET(LAMBDA_DEFAULT ON) +ELSE() + SET(LAMBDA_DEFAULT OFF) +ENDIF() +KOKKOS_ENABLE_OPTION(Cuda_Lambda ${LAMBDA_DEFAULT} "Whether to activate experimental laambda features") IF(DEFINED Kokkos_ARCH) MESSAGE(FATAL_ERROR "Defined Kokkos_ARCH, use KOKKOS_ARCH instead!") @@ -86,8 +117,8 @@ list(APPEND KOKKOS_ARCH_LIST AMDAVX # (HOST) AMD chip ARMv80 # (HOST) ARMv8.0 Compatible CPU ARMv81 # (HOST) ARMv8.1 Compatible CPU - ARMv8-ThunderX # (HOST) ARMv8 Cavium ThunderX CPU - ARMv8-TX2 # (HOST) ARMv8 Cavium ThunderX2 CPU + ARMv8_ThunderX # (HOST) ARMv8 Cavium ThunderX CPU + ARMv8_TX2 # (HOST) ARMv8 Cavium ThunderX2 CPU WSM # (HOST) Intel Westmere CPU SNB # (HOST) Intel Sandy/Ivy Bridge CPUs HSW # (HOST) Intel Haswell CPUs @@ -113,91 +144,20 @@ list(APPEND KOKKOS_ARCH_LIST Volta70 # (GPU) NVIDIA Volta generation CC 7.0 Volta72 # (GPU) NVIDIA Volta generation CC 7.2 Turing75 # (GPU) NVIDIA Turing generation CC 7.5 + Ryzen + Epyc + Kaveri + Carrizo + Fiji + Vega + GFX901 ) -# List of possible device architectures. -# The case and spelling here needs to match Makefile.kokkos -set(KOKKOS_DEVICES_LIST) -# Options: Cuda,ROCm,OpenMP,Pthread,Qthreads,Serial -list(APPEND KOKKOS_DEVICES_LIST - Cuda # NVIDIA GPU -- see below - OpenMP # OpenMP - Pthread # pthread - Qthreads # qthreads - HPX # HPX - Serial # serial - ROCm # Relocatable device code - ) - -# List of possible TPLs for Kokkos -# From Makefile.kokkos: Options: hwloc,librt,experimental_memkind -set(KOKKOS_USE_TPLS_LIST) -if(APPLE) -list(APPEND KOKKOS_USE_TPLS_LIST - HWLOC # hwloc - MEMKIND # experimental_memkind - ) -else() -list(APPEND KOKKOS_USE_TPLS_LIST - HWLOC # hwloc - LIBRT # librt - MEMKIND # experimental_memkind - ) -endif() -# Map of cmake variables to Makefile variables -set(KOKKOS_INTERNAL_HWLOC hwloc) -set(KOKKOS_INTERNAL_LIBRT librt) -set(KOKKOS_INTERNAL_MEMKIND experimental_memkind) -# List of possible Advanced options -set(KOKKOS_OPTIONS_LIST) -list(APPEND KOKKOS_OPTIONS_LIST - AGGRESSIVE_VECTORIZATION - DISABLE_PROFILING - DISABLE_DUALVIEW_MODIFY_CHECK - ENABLE_PROFILE_LOAD_PRINT - ) -# Map of cmake variables to Makefile variables -set(KOKKOS_INTERNAL_LDG_INTRINSIC use_ldg) -set(KOKKOS_INTERNAL_UVM librt) -set(KOKKOS_INTERNAL_RELOCATABLE_DEVICE_CODE rdc) - - -#------------------------------------------------------------------------------- -# List of possible Options for CUDA -#------------------------------------------------------------------------------- -# From Makefile.kokkos: Options: use_ldg,force_uvm,rdc -set(KOKKOS_CUDA_OPTIONS_LIST) -list(APPEND KOKKOS_CUDA_OPTIONS_LIST - LDG_INTRINSIC # use_ldg - UVM # force_uvm - RELOCATABLE_DEVICE_CODE # rdc - LAMBDA # enable_lambda - ) - -# Map of cmake variables to Makefile variables -set(KOKKOS_INTERNAL_LDG_INTRINSIC use_ldg) -set(KOKKOS_INTERNAL_UVM force_uvm) -set(KOKKOS_INTERNAL_RELOCATABLE_DEVICE_CODE rdc) -set(KOKKOS_INTERNAL_LAMBDA enable_lambda) - - -#------------------------------------------------------------------------------- -# List of possible Options for HPX -#------------------------------------------------------------------------------- -# From Makefile.kokkos: Options: enable_async_dispatch -set(KOKKOS_HPX_OPTIONS_LIST) -list(APPEND KOKKOS_HPX_OPTIONS_LIST - ASYNC_DISPATCH # enable_async_dispatch - ) - -# Map of cmake variables to Makefile variables -set(KOKKOS_INTERNAL_ENABLE_ASYNC_DISPATCH enable_async_dispatch) - - -#------------------------------------------------------------------------------- -#------------------------------- Create doc strings ---------------------------- -#------------------------------------------------------------------------------- +FOREACH(Arch ${KOKKOS_ARCH_LIST}) + STRING(TOUPPER ${Arch} ARCH) + SET(KOKKOS_ARCH_${ARCH} OFF CACHE BOOL "Whether to optimize for the ${ARCH} architecture") +ENDFOREACH() set(tmpr "\n ") string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_ARCH_DOCSTR "${KOKKOS_ARCH_LIST}") @@ -230,183 +190,12 @@ set(KOKKOS_HPX_DIR "" CACHE PATH "Location of HPX library.") # Whether to build separate libraries or now set(KOKKOS_SEPARATE_TESTS OFF CACHE BOOL "Provide unit test targets with finer granularity.") -#------------------------------------------------------------------------------- -#------------------------------- KOKKOS_DEVICES -------------------------------- -#------------------------------------------------------------------------------- -# Figure out default settings -IF(Trilinos_ENABLE_Kokkos) - set_kokkos_default_default(SERIAL ON) - set_kokkos_default_default(PTHREAD OFF) - IF(TPL_ENABLE_QTHREAD) - set_kokkos_default_default(QTHREADS ${TPL_ENABLE_QTHREAD}) - ELSE() - set_kokkos_default_default(QTHREADS OFF) - ENDIF() - IF(TPL_ENABLE_HPX) - set_kokkos_default_default(HPX ON) - ELSE() - set_kokkos_default_default(HPX OFF) - ENDIF() - IF(Trilinos_ENABLE_OpenMP) - set_kokkos_default_default(OPENMP ${Trilinos_ENABLE_OpenMP}) - ELSE() - set_kokkos_default_default(OPENMP OFF) - ENDIF() - IF(TPL_ENABLE_CUDA) - set_kokkos_default_default(CUDA ${TPL_ENABLE_CUDA}) - ELSE() - set_kokkos_default_default(CUDA OFF) - ENDIF() - set_kokkos_default_default(ROCM OFF) -ELSE() - set_kokkos_default_default(SERIAL ON) - set_kokkos_default_default(OPENMP OFF) - set_kokkos_default_default(PTHREAD OFF) - set_kokkos_default_default(QTHREAD OFF) - set_kokkos_default_default(HPX OFF) - set_kokkos_default_default(CUDA OFF) - set_kokkos_default_default(ROCM OFF) -ENDIF() - -# Set which Kokkos backend to use. -# These are the actual options that define the settings. -set(KOKKOS_ENABLE_SERIAL ${KOKKOS_INTERNAL_ENABLE_SERIAL_DEFAULT} CACHE BOOL "Whether to enable the Kokkos::Serial device. This device executes \"parallel\" kernels sequentially on a single CPU thread. It is enabled by default. If you disable this device, please enable at least one other CPU device, such as Kokkos::OpenMP or Kokkos::Threads.") -set(KOKKOS_ENABLE_OPENMP ${KOKKOS_INTERNAL_ENABLE_OPENMP_DEFAULT} CACHE BOOL "Enable OpenMP support in Kokkos." FORCE) -set(KOKKOS_ENABLE_PTHREAD ${KOKKOS_INTERNAL_ENABLE_PTHREAD_DEFAULT} CACHE BOOL "Enable Pthread support in Kokkos.") -set(KOKKOS_ENABLE_QTHREADS ${KOKKOS_INTERNAL_ENABLE_QTHREADS_DEFAULT} CACHE BOOL "Enable Qthreads support in Kokkos.") -set(KOKKOS_ENABLE_HPX ${KOKKOS_INTERNAL_ENABLE_HPX_DEFAULT} CACHE BOOL "Enable HPX support in Kokkos.") -set(KOKKOS_ENABLE_CUDA ${KOKKOS_INTERNAL_ENABLE_CUDA_DEFAULT} CACHE BOOL "Enable CUDA support in Kokkos.") -set(KOKKOS_ENABLE_ROCM ${KOKKOS_INTERNAL_ENABLE_ROCM_DEFAULT} CACHE BOOL "Enable ROCm support in Kokkos.") - - - -#------------------------------------------------------------------------------- -#------------------------------- KOKKOS DEBUG and PROFILING -------------------- -#------------------------------------------------------------------------------- - -# Debug related options enable compiler warnings - -set_kokkos_default_default(DEBUG OFF) -set(KOKKOS_ENABLE_DEBUG ${KOKKOS_INTERNAL_ENABLE_DEBUG_DEFAULT} CACHE BOOL "Enable Kokkos Debug.") - -# From Makefile.kokkos: Advanced Options: -#compiler_warnings, aggressive_vectorization, disable_profiling, disable_dualview_modify_check, enable_profile_load_print -set_kokkos_default_default(COMPILER_WARNINGS OFF) -set(KOKKOS_ENABLE_COMPILER_WARNINGS ${KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS_DEFAULT} CACHE BOOL "Enable compiler warnings.") - -set_kokkos_default_default(DEBUG_DUALVIEW_MODIFY_CHECK OFF) -set(KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK ${KOKKOS_INTERNAL_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK_DEFAULT} CACHE BOOL "Enable dualview modify check.") - -# Enable aggressive vectorization. -set_kokkos_default_default(AGGRESSIVE_VECTORIZATION OFF) -set(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION ${KOKKOS_INTERNAL_ENABLE_AGGRESSIVE_VECTORIZATION_DEFAULT} CACHE BOOL "Enable aggressive vectorization.") - -# Enable profiling. -set_kokkos_default_default(PROFILING ON) -set(KOKKOS_ENABLE_PROFILING ${KOKKOS_INTERNAL_ENABLE_PROFILING_DEFAULT} CACHE BOOL "Enable profiling.") - -set_kokkos_default_default(PROFILING_LOAD_PRINT OFF) -set(KOKKOS_ENABLE_PROFILING_LOAD_PRINT ${KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT_DEFAULT} CACHE BOOL "Enable profile load print.") - -set_kokkos_default_default(DEPRECATED_CODE ON) -set(KOKKOS_ENABLE_DEPRECATED_CODE ${KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE_DEFAULT} CACHE BOOL "Enable deprecated code.") - -set_kokkos_default_default(EXPLICIT_INSTANTIATION OFF) -set(KOKKOS_ENABLE_EXPLICIT_INSTANTIATION ${KOKKOS_INTERNAL_ENABLE_EXPLICIT_INSTANTIATION_DEFAULT} CACHE BOOL "Enable explicit template instantiation.") - -set_kokkos_default_default(ETI OFF) -set(KOKKOS_ENABLE_ETI ${KOKKOS_INTERNAL_ENABLE_EXPLICIT_INSTANTIATION_DEFAULT} CACHE BOOL "Enable explicit template instantiation.") - -#------------------------------------------------------------------------------- -#------------------------------- KOKKOS_USE_TPLS ------------------------------- -#------------------------------------------------------------------------------- -# Enable hwloc library. -# Figure out default: -IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_HWLOC) - set_kokkos_default_default(HWLOC ON) -ELSE() - set_kokkos_default_default(HWLOC OFF) -ENDIF() -set(KOKKOS_ENABLE_HWLOC ${KOKKOS_INTERNAL_ENABLE_HWLOC_DEFAULT} CACHE BOOL "Enable hwloc for better process placement.") set(KOKKOS_HWLOC_DIR "" CACHE PATH "Location of hwloc library. (kokkos tpl)") - -# Enable memkind library. -set_kokkos_default_default(MEMKIND OFF) -set(KOKKOS_ENABLE_MEMKIND ${KOKKOS_INTERNAL_ENABLE_MEMKIND_DEFAULT} CACHE BOOL "Enable memkind. (kokkos tpl)") set(KOKKOS_MEMKIND_DIR "" CACHE PATH "Location of memkind library. (kokkos tpl)") - -# Enable rt library. -IF(Trilinos_ENABLE_Kokkos) - IF(DEFINED TPL_ENABLE_LIBRT) - set_kokkos_default_default(LIBRT ${TPL_ENABLE_LIBRT}) - ELSE() - set_kokkos_default_default(LIBRT OFF) - ENDIF() -ELSE() - set_kokkos_default_default(LIBRT ON) -ENDIF() -set(KOKKOS_ENABLE_LIBRT ${KOKKOS_INTERNAL_ENABLE_LIBRT_DEFAULT} CACHE BOOL "Enable librt for more precise timer. (kokkos tpl)") - - -#------------------------------------------------------------------------------- -#------------------------------- KOKKOS_CUDA_OPTIONS --------------------------- -#------------------------------------------------------------------------------- - -# CUDA options. -# Set Defaults -set_kokkos_default_default(CUDA_LDG_INTRINSIC_DEFAULT OFF) -set_kokkos_default_default(CUDA_UVM_DEFAULT OFF) -set_kokkos_default_default(CUDA_RELOCATABLE_DEVICE_CODE OFF) -IF(Trilinos_ENABLE_Kokkos) - IF(KOKKOS_ENABLE_CUDA) - find_package(CUDA) - ENDIF() - IF (DEFINED CUDA_VERSION) - IF (CUDA_VERSION VERSION_GREATER "7.0") - set_kokkos_default_default(CUDA_LAMBDA ON) - ELSE() - set_kokkos_default_default(CUDA_LAMBDA OFF) - ENDIF() - ENDIF() -ELSE() - set_kokkos_default_default(CUDA_LAMBDA OFF) -ENDIF() - -# Set actual options set(KOKKOS_CUDA_DIR "" CACHE PATH "Location of CUDA library. Defaults to where nvcc installed.") -set(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC ${KOKKOS_INTERNAL_ENABLE_CUDA_LDG_INTRINSIC_DEFAULT} CACHE BOOL "Enable CUDA LDG. (cuda option)") -set(KOKKOS_ENABLE_CUDA_UVM ${KOKKOS_INTERNAL_ENABLE_CUDA_UVM_DEFAULT} CACHE BOOL "Enable CUDA unified virtual memory.") -set(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE ${KOKKOS_INTERNAL_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE_DEFAULT} CACHE BOOL "Enable relocatable device code for CUDA. (cuda option)") -set(KOKKOS_ENABLE_CUDA_LAMBDA ${KOKKOS_INTERNAL_ENABLE_CUDA_LAMBDA_DEFAULT} CACHE BOOL "Enable lambdas for CUDA. (cuda option)") # Make sure KOKKOS_ARCH is set to something IF ("${KOKKOS_ARCH}" STREQUAL "NOT_SET") set(KOKKOS_ARCH "None") ENDIF() - -#------------------------------------------------------------------------------- -#------------------------------- KOKKOS_HPX_OPTIONS ---------------------------- -#------------------------------------------------------------------------------- - -# HPX options. -# Set Defaults -set_kokkos_default_default(HPX_ASYNC_DISPATCH OFF) - -# Set actual options -set(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH ${KOKKOS_INTERNAL_ENABLE_HPX_ASYNC_DISPATCH_DEFAULT} CACHE BOOL "Enable HPX async dispatch.") - - -#------------------------------------------------------------------------------- -#----------------------- Set CamelCase Options if they are not yet set --------- -#------------------------------------------------------------------------------- - -foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST}) - string(TOUPPER ${opt} OPT ) - UNSET(KOKKOS_ENABLE_${OPT}_INTERNAL CACHE) - SET(KOKKOS_ENABLE_${OPT}_INTERNAL ${KOKKOS_ENABLE_${OPT}} CACHE BOOL INTERNAL) - IF(DEFINED KOKKOS_ENABLE_${OPT}) - UNSET(Kokkos_ENABLE_${opt} CACHE) - SET(Kokkos_ENABLE_${opt} ${KOKKOS_ENABLE_${OPT}} CACHE BOOL "CamelCase Compatibility setting for KOKKOS_ENABLE_${OPT}") - ENDIF() -endforeach() diff --git a/cmake/kokkos_settings.cmake b/cmake/kokkos_settings.cmake index 6da0dcd6f21..97d7428335a 100644 --- a/cmake/kokkos_settings.cmake +++ b/cmake/kokkos_settings.cmake @@ -13,231 +13,371 @@ #------------------------------- GENERAL OPTIONS ------------------------------- #------------------------------------------------------------------------------- + # Ensure that KOKKOS_ARCH is in the ARCH_LIST if (KOKKOS_ARCH MATCHES ",") - message("-- Detected a comma in: KOKKOS_ARCH=`${KOKKOS_ARCH}`") + message(WARNING "-- Detected a comma in: KOKKOS_ARCH=`${KOKKOS_ARCH}`") message("-- Although we prefer KOKKOS_ARCH to be semicolon-delimited, we do allow") message("-- comma-delimited values for compatibility with scripts (see github.com/trilinos/Trilinos/issues/2330)") string(REPLACE "," ";" KOKKOS_ARCH "${KOKKOS_ARCH}") message("-- Commas were changed to semicolons, now KOKKOS_ARCH=`${KOKKOS_ARCH}`") endif() -foreach(arch ${KOKKOS_ARCH}) - list(FIND KOKKOS_ARCH_LIST ${arch} indx) - if (indx EQUAL -1) - message(FATAL_ERROR "`${arch}` is not an accepted value in KOKKOS_ARCH=`${KOKKOS_ARCH}`." - " Please pick from these choices: ${KOKKOS_INTERNAL_ARCH_DOCSTR}") - endif () -endforeach() - -# KOKKOS_SETTINGS uses KOKKOS_ARCH -string(REPLACE ";" "," KOKKOS_GMAKE_ARCH "${KOKKOS_ARCH}") -# From Makefile.kokkos: Options: yes,no -if(${KOKKOS_ENABLE_DEBUG}) - set(KOKKOS_GMAKE_DEBUG yes) -else() - set(KOKKOS_GMAKE_DEBUG no) +if (KOKKOS_ARCH MATCHES "-") + string(REPLACE "-" "_" KOKKOS_ARCH "${KOKKOS_ARCH}") endif() -#------------------------------- KOKKOS_DEVICES -------------------------------- -# Can have multiple devices -set(KOKKOS_DEVICESl) -foreach(devopt ${KOKKOS_DEVICES_LIST}) - string(TOUPPER ${devopt} devoptuc) - if (${KOKKOS_ENABLE_${devoptuc}}) - list(APPEND KOKKOS_DEVICESl ${devopt}) - endif () +foreach(Arch ${KOKKOS_ARCH}) + string(TOUPPER ${Arch} ARCH) + #force on all the architectures in the list + SET(KOKKOS_ARCH_${ARCH} ON CACHE BOOL "optimize for architecture ${Arch}" FORCE) + IF (NOT ${Arch} IN_LIST KOKKOS_ARCH_LIST) + message(FATAL_ERROR "`${arch}` is not an accepted value in KOKKOS_ARCH=`${KOKKOS_ARCH}`." + " Please pick from these choices: ${KOKKOS_INTERNAL_ARCH_DOCSTR}") + ENDIF() endforeach() -# List needs to be comma-delmitted -string(REPLACE ";" "," KOKKOS_GMAKE_DEVICES "${KOKKOS_DEVICESl}") -#------------------------------- KOKKOS_OPTIONS -------------------------------- -# From Makefile.kokkos: Options: aggressive_vectorization,disable_profiling,disable_deprecated_code -#compiler_warnings, aggressive_vectorization, disable_profiling, disable_dualview_modify_check, enable_profile_load_print +if(KOKKOS_ENABLE_COMPILER_WARNINGS) + IF (KOKKOS_COMPILER_ID STREQUAL PGI) + #add nothing for PGI + ELSE() + LIST(APPEND KOKKOS_COMPILE_OPTIONS + "-Wall" "-Wshadow" "-pedantic" + "-Wsign-compare" "-Wtype-limits" "-Wuninitialized") + ENDIF() -set(KOKKOS_OPTIONSl) -if(${KOKKOS_ENABLE_COMPILER_WARNINGS}) - list(APPEND KOKKOS_OPTIONSl compiler_warnings) -endif() -if(${KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION}) - list(APPEND KOKKOS_OPTIONSl aggressive_vectorization) -endif() -if(NOT ${KOKKOS_ENABLE_PROFILING}) - list(APPEND KOKKOS_OPTIONSl disable_profiling) -endif() -if(NOT ${KOKKOS_ENABLE_DEPRECATED_CODE}) - list(APPEND KOKKOS_OPTIONSl disable_deprecated_code) -endif() -if(NOT ${KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK}) - list(APPEND KOKKOS_OPTIONSl disable_dualview_modify_check) -endif() -if(${KOKKOS_ENABLE_PROFILING_LOAD_PRINT}) - list(APPEND KOKKOS_OPTIONSl enable_profile_load_print) -endif() -if(${KOKKOS_ENABLE_EXPLICIT_INSTANTIATION} OR ${KOKKOS_ENABLE_ETI}) - list(APPEND KOKKOS_OPTIONSl enable_eti) + #add some extra for GNU + IF (KOKKOS_COMPILER_ID STREQUAL GNU) + LIST(APPEND KOKKOS_COMPILE_OPTIONS + "-Wempty-body" + "-Wclobbered" + "-Wignored-qualifiers") + ENDIF() endif() -# List needs to be comma-delimitted -string(REPLACE ";" "," KOKKOS_GMAKE_OPTIONS "${KOKKOS_OPTIONSl}") -#------------------------------- KOKKOS_USE_TPLS ------------------------------- +#------------------------------- KOKKOS_CUDA_OPTIONS --------------------------- # Construct the Makefile options -set(KOKKOS_USE_TPLSl) -foreach(tplopt ${KOKKOS_USE_TPLS_LIST}) - if (${KOKKOS_ENABLE_${tplopt}}) - list(APPEND KOKKOS_USE_TPLSl ${KOKKOS_INTERNAL_${tplopt}}) - endif () -endforeach() -# List needs to be comma-delimitted -string(REPLACE ";" "," KOKKOS_GMAKE_USE_TPLS "${KOKKOS_USE_TPLSl}") +if (KOKKOS_ENABLE_CUDA_LAMBDA) + list(APPEND KOKKOS_CUDA_OPTIONS "-expt-extended-lambda") +endif() +if (KOKKOS_COMPILER_ID STREQUAL Clang) + set(CUDA_ARCH_FLAG "--cuda-gpu-arch") + list(APPEND KOKKOS_CUDA_OPTIONS "-x cuda") +elseif(KOKKOS_COMPILER_ID STREQUAL NVIDIA) + set(CUDA_ARCH_FLAG "-arch") +endif() -#------------------------------- KOKKOS_CUDA_OPTIONS --------------------------- -# Construct the Makefile options -set(KOKKOS_CUDA_OPTIONSl) -foreach(cudaopt ${KOKKOS_CUDA_OPTIONS_LIST}) - if (${KOKKOS_ENABLE_CUDA_${cudaopt}}) - list(APPEND KOKKOS_CUDA_OPTIONSl ${KOKKOS_INTERNAL_${cudaopt}}) - endif () -endforeach() -# List needs to be comma-delmitted -string(REPLACE ";" "," KOKKOS_GMAKE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONSl}") - -#------------------------------- PATH VARIABLES -------------------------------- -# Want makefile to use same executables specified which means modifying -# the path so the $(shell ...) commands in the makefile see the right exec -# Also, the Makefile's use FOO_PATH naming scheme for -I/-L construction -#TODO: Makefile.kokkos allows this to be overwritten? ROCM_HCC_PATH - -set(KOKKOS_INTERNAL_PATHS) -set(addpathl) -foreach(kvar IN LISTS KOKKOS_USE_TPLS_LIST ITEMS CUDA QTHREADS) - if(${KOKKOS_ENABLE_${kvar}}) - if(DEFINED KOKKOS_${kvar}_DIR) - set(KOKKOS_INTERNAL_PATHS ${KOKKOS_INTERNAL_PATHS} "${kvar}_PATH=${KOKKOS_${kvar}_DIR}") - if(IS_DIRECTORY ${KOKKOS_${kvar}_DIR}/bin) - list(APPEND addpathl ${KOKKOS_${kvar}_DIR}/bin) - endif() - endif() - endif() -endforeach() -# Path env is : delimitted -string(REPLACE ";" ":" KOKKOS_INTERNAL_ADDTOPATH "${addpathl}") +IF (KOKKOS_COMPILER_ID STREQUAL NVIDIA) + IF (KOKKOS_ENABLE_DEBUG OR CMAKE_BUILD_TYPE STREQUAL "DEBUG") + LIST(APPEND KOKKOS_CUDA_OPTIONS -lineinfo) + ENDIF() + IF (KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER 9.0 OR KOKKOS_CXX_COMPILER_VERSION VERSION_EQUAL 9.0) + LIST(APPEND KOKKOS_CUDAFE_OPTIONS --diag_suppress=esa_on_defaulted_function_ignored) + ENDIF() +ENDIF() +IF(KOKKOS_ENABLE_OPENMP) + IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang OR KOKKOS_CXX_COMPILER_ID STREQUAL AppleClang) + LIST(APPEND KOKKOS_COMPILE_OPTIONS -fopenmp=libomp) + LIST(APPEND KOKKOS_LINK_OPTIONS -fopenmp=libomp) + ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + LIST(APPEND KOKKOS_COMPILE_OPTIONS -mp) + LIST(APPEND KOKKOS_LINK_OPTIONS -mp) + ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL XL) + LIST(APPEND KOKKOS_COMPILE_OPTIONS -qsmp=omp) + LIST(APPEND KOKKOS_LINK_OPTIONS -qsmp=omp) + ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) + #on by default + ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + LIST(APPEND KOKKOS_XCOMPILER_OPTIONS -fopenmp) + LIST(APPEND KOKKOS_LINK_OPTIONS -fopenmp) + ELSE() + LIST(APPEND KOKKOS_COMPILE_OPTIONS -fopenmp) + LIST(APPEND KOKKOS_LINK_OPTIONS -fopenmp) + ENDIF() +ENDIF() -######################### SET KOKKOS_SETTINGS ################################## -# Set the KOKKOS_SETTINGS String -- this is the primary communication with the -# makefile configuration. See Makefile.kokkos +IF (KOKKOS_ENABLE_ARCH_ARMV81) + IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL Cray AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + LIST(APPEND KOKKOS_COMPILER_OPTIONS -march=armv8.1-a) + LIST(APPEND KOKKOS_COMPILER_OPTIONS -march=armv8.1-a) + ENDIF() +ENDIF() -set(KOKKOS_SETTINGS KOKKOS_CMAKE=yes) -if(KOKKOS_HAS_TRILINOS) - set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_TRIBITS=yes) -else() - set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_STANDALONE_CMAKE=yes) -endif() -set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_SRC_PATH=${KOKKOS_SRC_PATH}) -set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_PATH=${KOKKOS_PATH}) -set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_INSTALL_PATH=${CMAKE_INSTALL_PREFIX}) - -# Form of KOKKOS_foo=$KOKKOS_foo -foreach(kvar ARCH;DEVICES;DEBUG;OPTIONS;CUDA_OPTIONS;USE_TPLS) - if(DEFINED KOKKOS_GMAKE_${kvar}) - if (NOT "${KOKKOS_GMAKE_${kvar}}" STREQUAL "") - set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_${kvar}=${KOKKOS_GMAKE_${kvar}}) - endif() - endif() -endforeach() +IF (KOKKOS_ENABLE_ARCH_ARMV8_THUNDERX) + SET(KOKKOS_ARCH_ARMV80 ON CACHE BOOL "enable armv80" FORCE) + IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL Cray AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + LIST(APPEND KOKKOS_COMPILER_OPTIONS -march=armv8-a -mtune=thunderx) + LIST(APPEND KOKKOS_COMPILER_OPTIONS -march=armv8-a -mtune=thunderx) + ENDIF() +ENDIF() -# Form of VAR=VAL -#TODO: Makefile supports MPICH_CXX, OMPI_CXX as well -foreach(ovar CXX;CXXFLAGS;LDFLAGS) - if(DEFINED ${ovar}) - if (NOT "${${ovar}}" STREQUAL "") - set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${ovar}=${${ovar}}) - endif() - endif() -endforeach() +IF (KOKKOS_ENABLE_ARCH_ARMV8_THUNDERX2) + SET(KOKKOS_ARCH_ARMV81 ON CACHE BOOL "enable armv80" FORCE) + IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL Cray AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + LIST(APPEND KOKKOS_COMPILER_OPTIONS -mtune=thunderx2t99 -mcpu=thunderx2t99) + LIST(APPEND KOKKOS_COMPILER_OPTIONS -mtune=thunderx2t99 -mcup=thunderx2t99) + ENDIF() +ENDIF() -# Finally, do the paths -if (NOT "${KOKKOS_INTERNAL_PATHS}" STREQUAL "") - set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${KOKKOS_INTERNAL_PATHS}) -endif() -if (NOT "${KOKKOS_INTERNAL_ADDTOPATH}" STREQUAL "") - set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "PATH=${KOKKOS_INTERNAL_ADDTOPATH}:$ENV{PATH}") -endif() +IF (KOKKOS_ENABLE_ARCH_EPYC) + IF (KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + LIST(APPEND KOKKOS_COMPILER_OPTIONS -mavx2) + LIST(APPEND KOKKOS_LINK_OPTIONS -mavx2) + ELSE() + LIST(APPEND KOKKOS_COMPILER_OPTIONS -march=znver1 -mtune=znver1) + LIST(APPEND KOKKOS_LINK_OPTIONS -march=znver1 -mtune=znver1) + ENDIF() +ENDIF() -SET(KOKKOS_CXX_STANDARD "" CACHE STRING "The C++ standard for Kokkos to use: c++11, c++14, or c++17") -SET(KOKKOS_CXX_FEATURES "" CACHE STRING "The list of C++ features for Kokkos to enable") -SET(CXX_STANDARD_TEST) +IF (KOKKOS_ENABLE_ARCH_WSM) + #sse42 + IF (KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + LIST(APPEND KOKKOS_COMPILER_OPTIONS -xSSE4.2) + LIST(APPEND KOKKOS_LINK_OPTIONS -xSSE4.2) + ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + LIST(APPEND KOKKOS_COMPILER_OPTIONS -tp=nehalem) + LIST(APPEND KOKKOS_LINK_OPTIONS -tp=nehalem) + ELSEIF(KOKKOX_CXX_COMPILER_ID STREQUAL Cray) + #nothing + ELSE() + #assume gcc flags + LIST(APPEND KOKKOS_COMPILER_OPTIONS -msse4.2) + LIST(APPEND KOKKOS_LINK_OPTIONS -msse4.2) + ENDIF() +ENDIF() +IF (KOKKOS_ENABLE_ARCH_SNB OR KOKKOS_ENABLE_ARCH_AMDAVX) + #avx + IF (KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + LIST(APPEND KOKKOS_COMPILER_OPTIONS -mavx) + LIST(APPEND KOKKOS_LINK_OPTIONS -mavx) + ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + LIST(APPEND KOKKOS_COMPILER_OPTIONS -tp=sandybridge) + LIST(APPEND KOKKOS_LINK_OPTIONS -tp=sandybridge) + ELSEIF(KOKKOX_CXX_COMPILER_ID STREQUAL Cray) + #nothing + ELSE() + #assume gcc flags + LIST(APPEND KOKKOS_COMPILER_OPTIONS -mavx) + LIST(APPEND KOKKOS_LINK_OPTIONS -mavx) + ENDIF() +ENDIF() -IF (KOKKOS_CXX_STANDARD AND CMAKE_CXX_STANDARD) - #make sure these are consistent - IF (${KOKKOS_CXX_STANDARD} STREQUAL "c++11") - IF(NOT ${CMAKE_CXX_STANDARD} STREQUAL "11") - SET(CXX_STD_ERROR ON) - ENDIF() - ELSEIF (${KOKKOS_CXX_STANDARD} STREQUAL "c++14") - IF(NOT ${CMAKE_CXX_STANDARD} STREQUAL "14") - SET(CXX_STD_ERROR ON) - ENDIF() - ELSEIF (${KOKKOS_CXX_STANDARD} STREQUAL "c++17") - IF(NOT ${CMAKE_CXX_STANDARD} STREQUAL "17") - SET(CXX_STD_ERROR ON) +IF (KOKKOS_ARCH_HSW OR KOKKOS_ARCH_BDW) + SET(KOKKOS_ARCH_AVX2 ON CACHE BOOL "enable avx2" FORCE) + IF (KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + LIST(APPEND KOKKOS_COMPILER_OPTIONS -xCORE-AVX2) + LIST(APPEND KOKKOS_LINK_OPTIONS -xCORE-AVX2) + ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + LIST(APPEND KOKKOS_COMPILER_OPTIONS -tp=haswell) + LIST(APPEND KOKKOS_LINK_OPTIONS -tp=haswell) + ELSEIF(KOKKOX_CXX_COMPILER_ID STREQUAL Cray) + #nothing + ELSE() + #assume gcc flags + LIST(APPEND KOKKOS_COMPILER_OPTIONS -march=core-avx2 -mtune=core-avx2) + LIST(APPEND KOKKOS_LINK_OPTIONS -march=core-avx2 -mtune=core-avx2) + IF (KOKKOS_ARCH_BDW) + LIST(APPEND KOKKOS_COMPILER_OPTIONS -mrtm) + LIST(APPEND KOKKOS_LINK_OPTIONS -mrtm) ENDIF() + ENDIF() +ENDIF() + +IF (KOKKOS_ARCH_KNL) + #avx512-mic + SET(KOKKOS_ARCH_AVX512MIC ON CACHE BOOL "enable avx-512 MIC" FORCE) + IF (KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + LIST(APPEND KOKKOS_COMPILER_OPTIONS -xMIC-AVX512) + LIST(APPEND KOKKOS_LINK_OPTIONS -xMIC-AVX512) + ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + #nothing + ELSEIF(KOKKOX_CXX_COMPILER_ID STREQUAL Cray) + #nothing ELSE() - #KOKKOS_CXX_STANDARD is something else, which means definitely invalid - SET(CXX_STD_ERROR ON) + #assume gcc flags + LIST(APPEND KOKKOS_COMPILER_OPTIONS -march=knl -mtune=knl) + LIST(APPEND KOKKOS_LINK_OPTIONS -march=knl -mtune=knl) ENDIF() - IF (CXX_STD_ERROR) - MESSAGE(FATAL_ERROR "Specified both CMAKE_CXX_STANDARD and KOKKOS_CXX_STANDARD, but they don't match") +ENDIF() + +IF (KOKKOS_ARCH_SKX) + #avx512-xeon + SET(KOKKOS_ARCH_AVX512XEON ON CACHE BOOL "enable avx-512 Xeon" FORCE) + IF (KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + LIST(APPEND KOKKOS_COMPILER_OPTIONS -xCORE-AVX512) + LIST(APPEND KOKKOS_LINK_OPTIONS -xCORE-AVX512) + ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + #nothing + ELSEIF(KOKKOX_CXX_COMPILER_ID STREQUAL Cray) + #nothing + ELSE() + #assume gcc flags + LIST(APPEND KOKKOS_COMPILER_OPTIONS -march=skylake-avx512 -mtune=skylake-avx512 -mrtm) + LIST(APPEND KOKKOS_LINK_OPTIONS -march=skylake-avx512 -mtune=skylake-avx512 -mrtm) + ENDIF() +ENDIF() + +IF (KOKKOS_ARCH_POWER7) + IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + LIST(APPEND KOKKOS_COMPILE_OPTIONS KOKKOS_CXXFLAGS += -mcpu=power7 -mtune=power7) + LIST(APPEND KOKKOS_LINK_OPTIONS KOKKOS_CXXFLAGS += -mcpu=power7 -mtune=power7) + ENDIF() +ENDIF() + +IF (KOKKOS_ARCH_POWER8) + IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL PGI AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + LIST(APPEND KOKKOS_COMPILE_OPTIONS KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8) + LIST(APPEND KOKKOS_LINK_OPTIONS KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8) + ENDIF() +ENDIF() + +IF (KOKKOS_ARCH_POWER9) + IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL PGI AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + LIST(APPEND KOKKOS_COMPILE_OPTIONS KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9) + LIST(APPEND KOKKOS_LINK_OPTIONS KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9) ENDIF() ENDIF() + +IF (KOKKOS_ENABLE_ARCH_KAVERI) + SET(KOKKOS_ARCH_ROCM 701 CACHE STRING "rocm arch" FORCE) +ENDIF() + +IF (KOKKOS_ENABLE_ARCH_CARRIZO) + SET(KOKKOS_ARCH_ROCM 801 CACHE STRING "rocm arch" FORCE) +ENDIF() + +IF (KOKKOS_ARCH_FIJI) + SET(KOKKOS_ARCH_ROCM 803 CACHE STRING "rocm arch" FORCE) +ENDIF() + +IF (KOKKOS_ARCH_VEGA) + SET(KOKKOS_ARCH_ROCM 900 CACHE STRING "rocm arch" FORCE) +ENDIF() + +IF (KOKKOS_ARCH_GFX901) + SET(KOKKOS_ARCH_ROCM 901 CACHE STRING "rocm arch" FORCE) +ENDIF() + +IF (KOKKOS_ARCH_RYZEN) +ENDIF() + + + +IF (KOKKOS_ENABLE_ARCH_GFX901) +ENDIF() + + +IF (KOKKOS_ENABLE_CUDA_RELOCATED_DEVICE_CODE) + IF (KOKKOS_COMPILER_ID STREQUAL Clang) + LIST(APPEND KOKKOS_CUDA_OPTIONS -fcuda-rdc) + ELSEIF (KOKKOS_COMPILER_ID STREQUAL NVIDIA) + LIST(APPEND KOKKOS_CUDA_OPTIONS --relocatable-device-code=true) + ENDIF() +ENDIF() + + +IF (KOKKOS_ARCH_KEPLER30) + LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAGS}=sm_30") +ENDIF() + +IF (KOKKOS_ARCH_KEPLER32) + LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_32") +ENDIF() + +IF (KOKKOS_ARCH_KEPLER35) + LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_35") +ENDIF() + +IF (KOKKOS_ARCH_KEPLER35) + LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_37") +ENDIF() + +IF (KOKKOS_ARCH_MAXWELL50) + LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_50") +ENDIF() + +IF (KOKKOS_ARCH_MAXWELL52) + LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_52") +ENDIF() + +IF (KOKKOS_ARCH_MAXWELL53) + LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_53") +ENDIF() + +IF (KOKKOS_ARCH_PASCAL60) + LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_60") +ENDIF() + +IF (KOKKOS_ARCH_PASCAL61) + LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_61") +ENDIF() + +IF (KOKKOS_ARCH_VOLTA70) + LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_70") +ENDIF() + +IF (KOKKOS_ARCH_VOLTA72) + LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_72") +ENDIF() + +IF (KOKKOS_ARCH_TURING75) + LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_75") +ENDIF() + +SET(KOKKOS_CXX_STANDARD "" CACHE STRING "The C++ standard for Kokkos to use: c++11, c++14, or c++17") +SET(KOKKOS_CXX_FEATURES "" CACHE STRING "The list of C++ features for Kokkos to enable") +SET(CXX_STANDARD_TEST) + IF (NOT KOKKOS_CXX_STANDARD AND NOT CMAKE_CXX_STANDARD) - SET(KOKKOS_CXX_STANDARD "c++11") + SET(KOKKOS_CXX_STANDARD "11") +ELSEIF(NOT KOKKOS_CXX_STANDARD) + SET(KOKKOS_CXX_STANDARD ${CMAKE_CXX_STANDARD}) ENDIF() IF (KOKKOS_CXX_STANDARD) - IF (${KOKKOS_CXX_STANDARD} STREQUAL "c++11") - LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_11) + IF (${KOKKOS_CXX_STANDARD} STREQUAL "c++98") + MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++98'. Use '98' instead.") + SET(KOKKOS_CXX_STANDARD "98") + ELSEIF (${KOKKOS_CXX_STANDARD} STREQUAL "c++11") + MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++11'. Use '11' instead.") + SET(KOKKOS_CXX_STANDARD "11") ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++14") - LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_14) + MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++14'. Use '14' instead.") + SET(KOKKOS_CXX_STANDARD "14") ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++17") - LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_17) + MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++17'. Use '17' instead.") + SET(KOKKOS_CXX_STANDARD "17") ENDIF() ENDIF() -IF (CMAKE_CXX_STANDARD) - IF (${CMAKE_CXX_STANDARD} STREQUAL "11") - LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_11) - SET(KOKKOS_CXX_STANDARD "c++11") - ELSEIF(${CMAKE_CXX_STANDARD} STREQUAL "14") - LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_14) - SET(KOKKOS_CXX_STANDARD "c++14") - ELSEIF(${CMAKE_CXX_STANDARD} STREQUAL "17") - LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_17) - SET(KOKKOS_CXX_STANDARD "c++17") +IF (KOKKOS_CXX_STANDARD AND CMAKE_CXX_STANDARD) + #make sure these are consistent + IF (NOT KOKKOS_CXX_STANDARD STREQUAL CMAKE_CXX_STANDARD) + MESSAGE(FATAL_ERROR "Specified both CMAKE_CXX_STANDARD and KOKKOS_CXX_STANDARD, but they don't match") ENDIF() ENDIF() -if (CMAKE_CXX_STANDARD) - if (CMAKE_CXX_STANDARD STREQUAL "98") - message(FATAL_ERROR "Kokkos requires C++11 or newer!") - endif() - set(KOKKOS_CXX_STANDARD "c++${CMAKE_CXX_STANDARD}") - if (CMAKE_CXX_EXTENSIONS) - if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set(KOKKOS_CXX_STANDARD "gnu++${CMAKE_CXX_STANDARD}") - endif() - endif() -endif() -#changed - allow user to directly set KOKKOS_CXX_STANDARD -set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "KOKKOS_CXX_STANDARD=\"${KOKKOS_CXX_STANDARD}\"") +IF (${KOKKOS_CXX_STANDARD} STREQUAL "11") + LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_11) +ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "14") + LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_14) +ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "17") + LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_17) +ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "98") + MESSAGE(FATAL_ERROR "Kokkos requires C++11 or newer!") +ELSEIF(${KOKKOX_CXX_STANDARD} STREQUAL "1Y") +ELSEIF(${KOKKOX_CXX_STANDARD} STREQUAL "1Z") +ELSEIF(${KOKKOX_CXX_STANDARD} STREQUAL "2A") +ENDIF() + + + -# Final form that gets passed to make -set(KOKKOS_SETTINGS env ${KOKKOS_SETTINGS}) ############################ PRINT CONFIGURE STATUS ############################ @@ -273,22 +413,20 @@ if(KOKKOS_CMAKE_VERBOSE) message(STATUS "") message(STATUS "Architectures:") - message(STATUS " ${KOKKOS_GMAKE_ARCH}") + foreach(Arch ${KOKKOS_ARCH_LIST}) + string(TOUPPER ${Arch} ARCH) + if (KOKKOS_ENABLE_${ARCH}) + message(STATUS " ${Arch}") + endif() + endforeach() message(STATUS "") - message(STATUS "Enabled options") + message(STATUS "Enabled options:") if(KOKKOS_SEPARATE_LIBS) message(STATUS " KOKKOS_SEPARATE_LIBS") endif() - foreach(opt IN LISTS KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST) - string(TOUPPER ${opt} OPT) - if (KOKKOS_ENABLE_${OPT}) - message(STATUS " KOKKOS_ENABLE_${OPT}") - endif() - endforeach() - if(KOKKOS_ENABLE_CUDA) if(KOKKOS_CUDA_DIR) message(STATUS " KOKKOS_CUDA_DIR: ${KOKKOS_CUDA_DIR}") @@ -312,9 +450,6 @@ if(KOKKOS_CMAKE_VERBOSE) endif() message(STATUS "") - message(STATUS "Final kokkos settings variable:") - message(STATUS " ${KOKKOS_SETTINGS}") - message(STATUS "*****************************************************") message(STATUS "") endif() diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index 8d8c3708fa4..5384671a1f8 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -31,16 +31,6 @@ IF (KOKKOS_HAS_TRILINOS) IF(NOT DEFINED TPL_ENABLE_Pthread) SET(TPL_ENABLE_Pthread OFF) ENDIF() -ELSE() - #Don't do any of these yet - because of case nonsense - #We should not have any enable variables show up in the cache or bad things will happen - #OPTION(Kokkos_ENABLE_EXPLICIT_INSTANATION "Whether to enable explicit template instantiation" OFF) - #OPTION(Kokkos_ENABLE_ETI "Whether to enable explicit template instantiation" OFF) - #OPTION(Kokkos_ENABLE_Cuda_RDC "Whether to enable relocatable device code for CUDA" OFF) - #OPTION(Kokkos_ENABLE_OpenMP "Whether to enable the OpenMP backend" OFF) - #OPTION(Kokkos_ENABLE_HPX "Whether to enable the HPX backend" OFF) - #OPTION(Kokkos_ENABLE_DEBUG "Whether to enable extra debug checks/prints" OFF) - #OPTION(Kokkos_ENABLE_TESTS "Whether to enable ENDIF() MACRO(KOKKOS_SUBPACKAGE NAME) @@ -141,8 +131,7 @@ FUNCTION(KOKKOS_ADD_EXECUTABLE EXE_NAME) IF (PARSE_TESTONLYLIBS) TARGET_LINK_LIBRARIES(${EXE_NAME} ${PARSE_TESTONLYLIBS}) ENDIF() - GET_PROPERTY(liblist GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) - FOREACH(LIB ${liblist}) + FOREACH(LIB ${KOKKOS_LIBRARIES_NAMES}) TARGET_LINK_LIBRARIES(${EXE_NAME} ${LIB}) ENDFOREACH() VERIFY_EMPTY(KOKKOS_ADD_EXECUTABLE ${PARSE_UNPARSED_ARGUMENTS}) @@ -198,25 +187,24 @@ MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT) include(${KOKKOS_SRC_PATH}/cmake/kokkos_settings.cmake) #------------ GENERATE HEADER AND SOURCE FILES ------------------------------- - execute_process( - COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} PREFIX=${CMAKE_INSTALL_PREFIX} generate_build_settings - WORKING_DIRECTORY "${Kokkos_BINARY_DIR}" - OUTPUT_FILE ${Kokkos_BINARY_DIR}/core_src_make.out - RESULT_VARIABLE GEN_SETTINGS_RESULT - ) - if (GEN_SETTINGS_RESULT) - message(FATAL_ERROR "Kokkos settings generation failed:\n" - "${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} generate_build_settings") - endif() - include(${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake) - install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION lib/cmake/Kokkos) - install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION lib/cmake) - install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION ${CMAKE_INSTALL_PREFIX}) + #execute_process( + # COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} PREFIX=${CMAKE_INSTALL_PREFIX} generate_build_settings + # WORKING_DIRECTORY "${Kokkos_BINARY_DIR}" + # OUTPUT_FILE ${Kokkos_BINARY_DIR}/core_src_make.out + # RESULT_VARIABLE GEN_SETTINGS_RESULT + #) + #if (GEN_SETTINGS_RESULT) + # message(FATAL_ERROR "Kokkos settings generation failed:\n" + # "${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} generate_build_settings") + #endif() + #include(${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake) + #install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION lib/cmake/Kokkos) + #install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION lib/cmake) + #install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION ${CMAKE_INSTALL_PREFIX}) string(REPLACE " " ";" KOKKOS_TPL_INCLUDE_DIRS "${KOKKOS_GMAKE_TPL_INCLUDE_DIRS}") string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_DIRS "${KOKKOS_GMAKE_TPL_LIBRARY_DIRS}") list(REMOVE_ITEM KOKKOS_TPL_INCLUDE_DIRS "") list(REMOVE_ITEM KOKKOS_TPL_LIBRARY_DIRS "") - set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC}) endif() ENDMACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT) @@ -244,18 +232,9 @@ MACRO(KOKKOS_PACKAGE_POSTPROCESS) ENDMACRO(KOKKOS_PACKAGE_POSTPROCESS) MACRO(KOKKOS_MAKE_LIBKOKKOS) -GET_PROPERTY(SRCS GLOBAL PROPERTY KOKKOS_ALL_SOURCES) -GET_PROPERTY(INCS GLOBAL PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES) -GET_PROPERTY(DEFS GLOBAL PROPERTY KOKKOS_COMPILE_DEFINITIONS) -KOKKOS_INTERNAL_ADD_LIBRARY(kokkos SOURCES ${SRCS}) -FOREACH(INC ${INCS}) - TARGET_INCLUDE_DIRECTORIES(kokkos PUBLIC $) -ENDFOREACH() -TARGET_INCLUDE_DIRECTORIES(kokkos PUBLIC $) -TARGET_INCLUDE_DIRECTORIES(kokkos PUBLIC $) -FOREACH(DEF ${DEFS}) - TARGET_COMPILE_DEFINITIONS(kokkos PUBLIC ${DEF}) -ENDFOREACH() + ADD_LIBRARY(kokkos ${KOKKOS_SOURCE_DIR}/core/src/dummy.cpp) + TARGET_LINK_LIBRARIES(kokkos PUBLIC kokkoscore kokkoscontainers) + TARGET_LINK_LIBRARIES(kokkos PUBLIC kokkosalgorithms) ENDMACRO() FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) @@ -280,7 +259,7 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) TARGET_COMPILE_OPTIONS( ${LIBRARY_NAME} - PUBLIC $<$:${KOKKOS_CXX_FLAGS}> + PUBLIC $<$:${KOKKOS_COMPILE_OPTIONS}> ) if(${CMAKE_VERSION} VERSION_GREATER "3.13" OR ${CMAKE_VERSION} VERSION_EQUAL "3.13") @@ -291,10 +270,37 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) else() #well, this is annoying - I am going to need to hack this for Visual Studio TARGET_LINK_LIBRARIES( - ${LIBRARY_NAME} PUBLIC ${KOKKOS_LD_FLAGS} + ${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_OPTIONS} ) endif() + IF (KOKKOS_ENABLE_CUDA) + TARGET_COMPILE_OPTIONS( + ${LIBRARY_NAME} + PUBLIC ${KOKKOS_CUDA_OPTIONS}> + ) + SET(NODEDUP_CUDAFE_OPTIONS) + FOREACH(OPT ${NODEDEUP_CUDAFE_OPTIONS}) + LIST(APPEND NODEDUP_CUDAFE_OPTIONS "SHELL: -Xcudafe ${OPT}") + ENDFOREACH() + TARGET_COMPILE_OPTIONS( + ${LIBRARY_NAME} + PUBLIC $<$:${NODEDUP_CUDAFE_OPTIONS}> + ) + ENDIF() + + IF(KOKKOS_XCOMPILER_OPTIONS) + SET(NODEDUP_XCOMPILER_OPTIONS) + FOREACH(OPT ${KOKKOS_XCOMPILER_OPTIONS}) + LIST(APPEND NODEDUP_XCOMPILER_OPTIONS "SHELL: -Xcompiler ${OPT}") + ENDFOREACH() + TARGET_COMPILE_OPTIONS( + ${LIBRARY_NAME} + PUBLIC $<$:${NODEDUP_XCOMPILER_OPTIONS}> + ) + ENDIF() + + TARGET_INCLUDE_DIRECTORIES( ${LIBRARY_NAME} @@ -322,9 +328,15 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) target_link_libraries(${LIBRARY_NAME} PRIVATE memkind) endif() - foreach(FEATURE IN LISTS KOKKOS_CXX_FEATURES) - TARGET_COMPILE_FEATURES(${LIBRARY_NAME} PUBLIC ${FEATURE}) - endforeach() + if (KOKKOS_CXX_STANDARD_IS_FEATURE) + #great! I can't do this the right way + foreach(FEATURE IN LISTS KOKKOS_CXX_FEATURES) + TARGET_COMPILE_FEATURES(${LIBRARY_NAME} PUBLIC ${FEATURE}) + endforeach() + else() + #oh, well, no choice but the wrong way + TARGET_COMPILE_OPTIONS(${LIBRARY_NAME} PUBLIC ${KOKKOS_CXX_STANDARD_FLAG}) + endif() KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${LIBRARY_NAME}) @@ -339,18 +351,9 @@ ENDFUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) FUNCTION(KOKKOS_ADD_LIBRARY LIBRARY_NAME) if (KOKKOS_HAS_TRILINOS) TRIBITS_ADD_LIBRARY(${LIBRARY_NAME} ${ARGN}) - elseif(KOKKOS_SEPARATE_LIBS) + else() KOKKOS_INTERNAL_ADD_LIBRARY( ${LIBRARY_NAME} ${ARGN}) - else() - CMAKE_PARSE_ARGUMENTS(PARSE - "" - "" - "SOURCES;HEADERS" - ${ARGN}) - #just append the headers and sources to the list - SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_ALL_SOURCES ${PARSE_SOURCES}) - GET_PROPERTY(SRCS GLOBAL PROPERTY KOKKOS_ALL_SOURCES) endif() ENDFUNCTION() @@ -365,10 +368,8 @@ ELSE() ${ARGN} ) - IF (KOKKOS_SEPARATE_LIBS) - ADD_LIBRARY(${NAME} INTERFACE) - KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${NAME}) - ENDIF() + ADD_LIBRARY(${NAME} INTERFACE) + KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${NAME}) INSTALL( FILES ${PARSE_HEADERS} @@ -387,43 +388,20 @@ FUNCTION(KOKKOS_LIB_COMPILE_DEFINITIONS) IF(KOKKOS_HAS_TRILINOS) #don't trust tribits to do this correctly KOKKOS_TARGET_COMPILE_DEFINITIONS(${TARGET} ${ARGN}) -ELSEIF(TARGET ${TARGET}) +ELSE(TARGET ${TARGET}) KOKKOS_LIB_TYPE(${TARGET} INCTYPE) KOKKOS_TARGET_COMPILE_DEFINITIONS(${${PROJECT_NAME}_LIBRARY_NAME_PREFIX}${TARGET} ${INCTYPE} ${ARGN}) -ELSE() - #prefix is empty unless in tribits - CMAKE_PARSE_ARGUMENTS( - PARSE - "PUBLIC;PRIVATE;INTERFACE" - "" - "" - ${ARGN} - ) - IF (PARSE_PUBLIC OR PARSE_PRIVATE OR PARSE_INTERFACE) - MESSAGE(FATAL_ERROR "KOKKOS_LIB_COMPILE_DEFINITIONS for ${TARGET} should not have attributes") - ENDIF() - GET_PROPERTY(LIBS GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) - IF (${TARGET} IN_LIST LIBS) - SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_LIBRARY_DEFINITIONS ${ARGN}) - ELSE() - MESSAGE(FATAL_ERROR "Trying to set compile definitions on unknown target ${TARGET}") - ENDIF() ENDFUNCTION(KOKKOS_LIB_COMPILE_DEFINITIONS) FUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES TARGET) IF(KOKKOS_HAS_TRILINOS) #ignore the target, tribits doesn't do anything directly with targets TRIBITS_INCLUDE_DIRECTORIES(${ARGN}) -ELSEIF(KOKKOS_SEPARATE_LIBS) +ELSE() #append to a list for later KOKKOS_LIB_TYPE(${TARGET} INCTYPE) FOREACH(DIR ${ARGN}) TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} $) ENDFOREACH() - TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} $) -ELSE() #append to a list for later - FOREACH(DIR ${ARGN}) - SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_BUILD_INTERFACE_INCLUDES ${DIR}) - ENDFOREACH() ENDIF() ENDFUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES) @@ -431,26 +409,32 @@ FUNCTION(KOKKOS_LIB_COMPILE_OPTIONS TARGET) IF(KOKKOS_HAS_TRILINOS) #don't trust tribits to do this correctly KOKKOS_TARGET_COMPILE_OPTIONS(${TARGET} ${ARGN}) -ELSEIF(TARGET ${TARGET}) +ELSE() KOKKOS_LIB_TYPE(${TARGET} INCTYPE) KOKKOS_TARGET_COMPILE_OPTIONS(${${PROJECT_NAME}_LIBRARY_NAME_PREFIX}${TARGET} ${INCTYPE} ${ARGN}) -ELSE() - #prefix is empty unless in tribits - CMAKE_PARSE_ARGUMENTS( - PARSE - "PUBLIC;PRIVATE;INTERFACE" - "" - "" - ${ARGN} - ) - IF (PARSE_PUBLIC OR PARSE_PRIVATE OR PARSE_INTERFACE) - MESSAGE(FATAL_ERROR "KOKKOS_LIB_COMPILE_OPTIONS for ${TARGET} should not have attributes") - ENDIF() - GET_PROPERTY(LIBS GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) - IF (${TARGET} IN_LIST LIBS) - SET_PROPERTY(GLOBAL APPEND PROPERTY KOKKOS_LIBRARY_OPTIONS ${ARGN}) - ELSE() - MESSAGE(FATAL_ERROR "Trying to set compile options on unknown target ${TARGET}") - ENDIF() ENDIF() ENDFUNCTION(KOKKOS_LIB_COMPILE_OPTIONS) + +MACRO(KOKKOS_ADD_TEST_DIRECTORIES) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_TEST_DIRECTORIES(${ARGN}) + else() + IF(KOKKOS_ENABLE_TESTS) + FOREACH(TEST_DIR ${ARGN}) + ADD_SUBDIRECTORY(${TEST_DIR}) + ENDFOREACH() + ENDIF() + endif() +ENDMACRO() + +MACRO(KOKKOS_ADD_EXAMPLE_DIRECTORIES) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_EXAMPLE_DIRECTORIES(${ARGN}) + else() + IF(KOKKOS_ENABLE_EXAMPLES) + FOREACH(EXAMPLE_DIR ${ARGN}) + ADD_SUBDIRECTORY(${EXAMPLE_DIR}) + ENDFOREACH() + ENDIF() + endif() +ENDMACRO() diff --git a/containers/src/CMakeLists.txt b/containers/src/CMakeLists.txt index 0b8c5e59135..f8572358553 100644 --- a/containers/src/CMakeLists.txt +++ b/containers/src/CMakeLists.txt @@ -7,6 +7,8 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) #----------------------------------------------------------------------------- +SET(KOKKOS_CONTAINERS_SRCS) +APPEND_GLOB(KOKKOS_CONTAINERS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/impl/*.cpp) INSTALL ( DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/" diff --git a/core/src/CMakeLists.txt b/core/src/CMakeLists.txt index 13d5423f506..aefdecb10b9 100644 --- a/core/src/CMakeLists.txt +++ b/core/src/CMakeLists.txt @@ -10,10 +10,59 @@ INSTALL (DIRECTORY FILES_MATCHING PATTERN "*.hpp" ) +SET(KOKKOS_CORE_SRCS) +APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/impl/*.cpp) + +IF (KOKKOS_ENABLE_ROCM) + APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/ROCm/*.cpp) + IF (KOKKOS_ENABLE_ETI) + APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/eti/ROCm/*.cpp) + ENDIF() +ENDIF() + +IF (KOKKOS_ENABLE_CUDA) + APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/Cuda/*.cpp) + IF (KOKKOS_ENABLE_ETI) + APPEND_GLOB(KOKKOS_CORE_SRC ${CMAKE_CURRENT_SOURCE_DIR/eti/Cuda/*.cpp) + ENDIF() +ENDIF() + +IF (KOKKOS_ENABLE_OPENMP) + APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/OpenMP/*.cpp) + IF (KOKKOS_ENABLE_ETI) + APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/eti/OpenMP/*.cpp) + ENDIF() +ENDIF() + +IF (KOKKOS_ENABLE_PTHREAD) + APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/Threads/*.cpp) + IF (KOKKOS_ENABLE_ETI) + APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/eti/Threads/*.cpp) + ENDIF() +ENDIF() + +IF (KOKKOS_ENABLE_HPX) + APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/HPX/*.cpp) +ENDIF() + +IF (NOT KOKKOS_ENABLE_MEMKIND) + LIST(REMOVE_ITEM KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/impl/Kokkos_HBWSpace.cpp) +ENDIF() + +IF (KOKKOS_ENABLE_SERIAL) + IF (KOKKOS_ENABLE_ETI) + APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/eti/Serial/*.cpp) + ENDIF() +ELSE() + LIST(REMOVE_ITEM KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/impl/Kokkos_Serial.cpp) + LIST(REMOVE_ITEM KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/impl/Kokkos_Serial_task.cpp) +ENDIF() + KOKKOS_ADD_LIBRARY( kokkoscore SOURCES ${KOKKOS_CORE_SRCS} ) + KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkoscore ${KOKKOS_TOP_BUILD_DIR} ${CMAKE_CURRENT_BINARY_DIR} diff --git a/core/src/dummy.cpp b/core/src/dummy.cpp new file mode 100644 index 00000000000..e69de29bb2d From 8642a2a3828fa4eea35859a5225f331c11d9fae6 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Sun, 28 Apr 2019 22:08:06 -0700 Subject: [PATCH 011/530] Some work to get OpenMPTarget with GCC 9 working --- Makefile.kokkos | 9 +- Makefile.targets | 6 +- core/src/Kokkos_Core.hpp | 4 +- core/src/Kokkos_OpenMPTarget.hpp | 74 +++------- .../OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp | 138 +----------------- .../OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp | 21 --- .../Kokkos_OpenMPTarget_Instance.cpp | 40 +++++ .../Kokkos_OpenMPTarget_Instance.hpp | 42 ++++++ .../Kokkos_OpenMPTarget_Parallel.hpp | 17 ++- core/src/impl/Kokkos_Core.cpp | 15 +- core/unit_test/TestRange.hpp | 10 +- .../unit_test/standalone/UnitTestMainInit.cpp | 5 +- 12 files changed, 149 insertions(+), 232 deletions(-) create mode 100644 core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp create mode 100644 core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.hpp diff --git a/Makefile.kokkos b/Makefile.kokkos index 45c307617da..fe3bad61e84 100644 --- a/Makefile.kokkos +++ b/Makefile.kokkos @@ -121,6 +121,7 @@ KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell export OMPI_CXX=$(OMPI_C KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang) KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple LLVM) KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC) +KOKKOS_INTERNAL_COMPILER_GCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),GCC) # Check Host Compiler if using NVCC through nvcc_wrapper ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) @@ -220,6 +221,9 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp + else + #Assume GCC + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none endif endif @@ -434,6 +438,9 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMPTARGET') + ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1) + tmp := $(call kokkos_append_header,"\#define KOKKOS_WORKAROUND_OPENMPTARGET_GCC") + endif endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) @@ -1101,7 +1108,7 @@ endif endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - KOKKOS_SRC += $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.hpp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) diff --git a/Makefile.targets b/Makefile.targets index e7d5a3c9076..00114a30a15 100644 --- a/Makefile.targets +++ b/Makefile.targets @@ -104,10 +104,12 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) Kokkos_OpenMPTarget_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp +Kokkos_OpenMPTarget_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp Kokkos_OpenMPTargetSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp -#Kokkos_OpenMPTarget_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp -# $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp +Kokkos_OpenMPTarget_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp endif Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp diff --git a/core/src/Kokkos_Core.hpp b/core/src/Kokkos_Core.hpp index 9fbba0abfa3..731b2b56ac4 100644 --- a/core/src/Kokkos_Core.hpp +++ b/core/src/Kokkos_Core.hpp @@ -57,10 +57,10 @@ #include #endif -//#if defined( KOKKOS_ENABLE_OPENMPTARGET ) +#if defined( KOKKOS_ENABLE_OPENMPTARGET ) #include #include -//#endif +#endif #if defined( KOKKOS_ENABLE_QTHREADS ) #include diff --git a/core/src/Kokkos_OpenMPTarget.hpp b/core/src/Kokkos_OpenMPTarget.hpp index e2dd249ff78..146db879b6a 100644 --- a/core/src/Kokkos_OpenMPTarget.hpp +++ b/core/src/Kokkos_OpenMPTarget.hpp @@ -64,6 +64,10 @@ namespace Kokkos { namespace Experimental { +namespace Impl { +class OpenMPTargetInternal; +} + /// \class OpenMPTarget /// \brief Kokkos device for multicore processors in the host memory space. class OpenMPTarget { @@ -83,70 +87,36 @@ class OpenMPTarget { typedef ScratchMemorySpace< OpenMPTarget > scratch_memory_space ; - //@} - //------------------------------------ - //! \name Functions that all Kokkos execution spaces must implement. - //@{ - inline static bool in_parallel() { return omp_in_parallel(); } - /** \brief Set the device in a "sleep" state. A noop for OpenMPTarget. */ - static bool sleep(); + static void fence(); - /** \brief Wake the device from the 'sleep' state. A noop for OpenMPTarget. */ - static bool wake(); - - /** \brief Wait until all dispatched functors complete. A noop for OpenMPTarget. */ - static void fence() {} - - /// \brief Print configuration information to the given output stream. - static void print_configuration( std::ostream & , const bool detail = false ); - - /// \brief Free any resources being consumed by the device. - static void finalize(); + /** \brief Return the maximum amount of concurrency. */ + static int concurrency(); - /** \brief Initialize the device. - * - * 1) If the hardware locality library is enabled and OpenMPTarget has not - * already bound threads then bind OpenMPTarget threads to maximize - * core utilization and group for memory hierarchy locality. - * - * 2) Allocate a HostThread for each OpenMPTarget thread to hold its - * topology and fan in/out data. - */ - static void initialize( unsigned thread_count = 0 , - unsigned use_numa_count = 0 , - unsigned use_cores_per_numa = 0 ); + //! Print configuration information to the given output stream. + void print_configuration( std::ostream & , const bool detail = false ); - static int is_initialized(); - /** \brief Return the maximum amount of concurrency. */ - static int concurrency(); + static const char* name(); + + //! Free any resources being consumed by the device. + void impl_finalize(); - //@} - //------------------------------------ - /** \brief This execution space has a topological thread pool which can be queried. - * - * All threads within a pool have a common memory space for which they are cache coherent. - * depth = 0 gives the number of threads in the whole pool. - * depth = 1 gives the number of threads in a NUMA region, typically sharing L3 cache. - * depth = 2 gives the number of threads at the finest granularity, typically sharing L1 cache. - */ - inline static int thread_pool_size( int depth = 0 ); - - /** \brief The rank of the executing thread in this thread pool */ - KOKKOS_INLINE_FUNCTION static int thread_pool_rank(); + //! Has been initialized + static int impl_is_initialized(); - //------------------------------------ + //! Initialize, telling the CUDA run-time library which device to use. + void impl_initialize(); - inline static unsigned max_hardware_threads() { return thread_pool_size(0); } + inline Impl::OpenMPTargetInternal* impl_internal_space_instance() const { return m_space_instance; } - KOKKOS_INLINE_FUNCTION static - unsigned hardware_thread_id() { return thread_pool_rank(); } + OpenMPTarget(); + ~OpenMPTarget(); - static const char* name(); private: - static bool m_is_initialized; + Impl::OpenMPTargetInternal* m_space_instance; + }; } // namespace Experimental } // namespace Kokkos diff --git a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp index 2fa2f1382ca..4b4219bf182 100644 --- a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp +++ b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp @@ -60,16 +60,17 @@ namespace { KOKKOS_INLINE_FUNCTION int kokkos_omp_in_parallel(); -int kokkos_omp_in_critical_region = ( Kokkos::HostSpace::register_in_parallel( kokkos_omp_in_parallel ) , 0 ); +//int kokkos_omp_in_critical_region = ( Kokkos::HostSpace::register_in_parallel( kokkos_omp_in_parallel ) , 0 ); KOKKOS_INLINE_FUNCTION int kokkos_omp_in_parallel() { -#ifndef __CUDA_ARCH__ +/*#ifndef __CUDA_ARCH__ return omp_in_parallel() && ! kokkos_omp_in_critical_region ; #else return 0; -#endif +#endif*/ + return omp_in_parallel(); } bool s_using_hwloc = false; @@ -79,12 +80,6 @@ bool s_using_hwloc = false; } // namespace Kokkos -namespace Kokkos { -namespace Experimental { -bool OpenMPTarget::m_is_initialized = false; -} -} - namespace Kokkos { namespace Impl { @@ -106,18 +101,12 @@ void OpenMPTargetExec::verify_is_process( const char * const label ) void OpenMPTargetExec::verify_initialized( const char * const label ) { - if ( 0 == Kokkos::Experimental::OpenMPTarget::is_initialized() ) { + if ( 0 == Kokkos::Experimental::OpenMPTarget().impl_is_initialized() ) { std::string msg( label ); msg.append( " ERROR: not initialized" ); Kokkos::Impl::throw_runtime_exception( msg ); } - if ( omp_get_max_threads() != Kokkos::Experimental::OpenMPTarget::thread_pool_size(0) ) { - std::string msg( label ); - msg.append( " ERROR: Initialized but threads modified inappropriately" ); - Kokkos::Impl::throw_runtime_exception( msg ); - } - } void* OpenMPTargetExec::m_scratch_ptr = NULL; @@ -153,121 +142,4 @@ void OpenMPTargetExec::resize_scratch( int64_t reduce_bytes , } // namespace Impl } // namespace Kokkos -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -//---------------------------------------------------------------------------- - -int OpenMPTarget::is_initialized() -{ return m_is_initialized; }// != Impl::OpenMPTargetExec::m_pool[0]; } - -void OpenMPTarget::initialize( unsigned thread_count , - unsigned use_numa_count , - unsigned use_cores_per_numa ) -{ - // Before any other call to OMP query the maximum number of threads - // and save the value for re-initialization unit testing. - - - // Init the array for used for arbitrarily sized atomics - Kokkos::Impl::init_lock_array_host_space(); - - #ifdef KOKKOS_ENABLE_PROFILING - Kokkos::Profiling::initialize(); - #endif - m_is_initialized = true; -} - -//---------------------------------------------------------------------------- - -void OpenMPTarget::finalize() -{ - Kokkos::Impl::OpenMPTargetExec::verify_initialized( "OpenMPTarget::finalize" ); - Kokkos::Impl::OpenMPTargetExec::verify_is_process( "OpenMPTarget::finalize" ); - - m_is_initialized = false; - - omp_set_num_threads(1); - - if ( Kokkos::Impl::s_using_hwloc && Kokkos::hwloc::can_bind_threads() ) { - hwloc::unbind_this_thread(); - } - - #ifdef KOKKOS_ENABLE_PROFILING - Kokkos::Profiling::finalize(); - #endif -} - -//---------------------------------------------------------------------------- - -void OpenMPTarget::print_configuration( std::ostream & s , const bool detail ) -{ - Kokkos::Impl::OpenMPTargetExec::verify_is_process( "OpenMPTarget::print_configuration" ); -/* - s << "Kokkos::Experimental::OpenMPTarget" ; - -#if defined( KOKKOS_ENABLE_OPENMPTARGET ) - s << " KOKKOS_ENABLE_OPENMPTARGET" ; -#endif -#if defined( KOKKOS_ENABLE_HWLOC ) - - const unsigned numa_count_ = Kokkos::hwloc::get_available_numa_count(); - const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); - const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); - - s << " hwloc[" << numa_count_ << "x" << cores_per_numa << "x" << threads_per_core << "]" - << " hwloc_binding_" << ( Impl::s_using_hwloc ? "enabled" : "disabled" ) - ; -#endif - - const bool is_initialized = 0 != Impl::OpenMPTargetExec::m_pool[0] ; - - if ( is_initialized ) { - const int numa_count = Kokkos::Impl::OpenMPTargetExec::m_pool_topo[0] / Kokkos::Impl::OpenMPTargetExec::m_pool_topo[1] ; - const int core_per_numa = Kokkos::Impl::OpenMPTargetExec::m_pool_topo[1] / Kokkos::Impl::OpenMPTargetExec::m_pool_topo[2] ; - const int thread_per_core = Kokkos::Impl::OpenMPTargetExec::m_pool_topo[2] ; - - s << " thread_pool_topology[ " << numa_count - << " x " << core_per_numa - << " x " << thread_per_core - << " ]" - << std::endl ; - - if ( detail ) { - std::vector< std::pair > coord( Kokkos::Impl::OpenMPTargetExec::m_pool_topo[0] ); - -#pragma omp parallel - { -#pragma omp critical - { - coord[ omp_get_thread_num() ] = hwloc::get_this_thread_coordinate(); - } -// END #pragma omp critical - } -// END #pragma omp parallel - - for ( unsigned i = 0 ; i < coord.size() ; ++i ) { - s << " thread omp_rank[" << i << "]" - << " kokkos_rank[" << Impl::OpenMPTargetExec::m_map_rank[ i ] << "]" - << " hwloc_coord[" << coord[i].first << "." << coord[i].second << "]" - << std::endl ; - } - } - } - else { - s << " not initialized" << std::endl ; - } -*/ -} - -int OpenMPTarget::concurrency() { - return thread_pool_size(0); -} - -const char* OpenMPTarget::name() { return "OpenMPTarget"; } -} // namespace Experimental -} // namespace Kokkos - #endif //KOKKOS_ENABLE_OPENMPTARGET diff --git a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp index d57ddbcc5cc..41c39e80929 100644 --- a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp +++ b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp @@ -549,27 +549,6 @@ class TeamPolicyInternal< Kokkos::Experimental::OpenMPTarget, Properties ... >: } // namespace Kokkos -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { - -inline -int OpenMPTarget::thread_pool_size( int depth ) -{ - //return Impl::OpenMPTargetExec::pool_size(depth); - return omp_get_max_threads(); -} - -KOKKOS_INLINE_FUNCTION -int OpenMPTarget::thread_pool_rank() -{ - return omp_get_thread_num(); -} - -} // namespace Experimental -} // namespace Kokkos namespace Kokkos { diff --git a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp new file mode 100644 index 00000000000..a112e5fd7b6 --- /dev/null +++ b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp @@ -0,0 +1,40 @@ +#include + +namespace Kokkos { +namespace Experimental { +namespace Impl { +OpenMPTargetInternal::OpenMPTargetInternal():m_is_initialized(false) {} +void OpenMPTargetInternal::fence() {} +int OpenMPTargetInternal::concurrency() { return 128000; } +const char* OpenMPTargetInternal::name() { return "OpenMPTarget"; } +void OpenMPTargetInternal::print_configuration(std::ostream & stream, const bool) { + printf("Using OpenMPTarget\n"); +} + +void OpenMPTargetInternal::impl_finalize() { m_is_initialized = false; } +void OpenMPTargetInternal::impl_initialize() { m_is_initialized = true; } +int OpenMPTargetInternal::impl_is_initialized() { return m_is_initialized?1:0; } + +OpenMPTargetInternal* OpenMPTargetInternal::impl_singleton() { + static OpenMPTargetInternal self; + return &self; +} +} // Namespace Impl + +OpenMPTarget::OpenMPTarget():m_space_instance(Impl::OpenMPTargetInternal::impl_singleton()) {} +OpenMPTarget::~OpenMPTarget() {} + +const char* OpenMPTarget::name() { return Impl::OpenMPTargetInternal::impl_singleton()->name(); } +void OpenMPTarget::print_configuration(std::ostream & stream, const bool detail) { + m_space_instance->print_configuration(stream,detail); +} + +int OpenMPTarget::concurrency() { return Impl::OpenMPTargetInternal::impl_singleton()->concurrency(); } +void OpenMPTarget::fence() { Impl::OpenMPTargetInternal::impl_singleton()->fence(); } + +void OpenMPTarget::impl_initialize() { m_space_instance->impl_initialize(); } +void OpenMPTarget::impl_finalize() { m_space_instance->impl_finalize(); } +int OpenMPTarget::impl_is_initialized() { return Impl::OpenMPTargetInternal::impl_singleton()->impl_is_initialized(); } +} // Namespace Experimental +} // Namespace Kokkos + diff --git a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.hpp b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.hpp new file mode 100644 index 00000000000..d1f2f936b21 --- /dev/null +++ b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.hpp @@ -0,0 +1,42 @@ +#include + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +class OpenMPTargetInternal { +private: + + OpenMPTargetInternal(); + OpenMPTargetInternal( const OpenMPTargetInternal & ) = default; + OpenMPTargetInternal & operator = ( const OpenMPTargetInternal & ) = default; + +public: + void fence(); + + /** \brief Return the maximum amount of concurrency. */ + int concurrency(); + + //! Print configuration information to the given output stream. + void print_configuration( std::ostream & , const bool detail = false ); + + static const char* name(); + + //! Free any resources being consumed by the device. + void impl_finalize(); + + //! Has been initialized + int impl_is_initialized(); + + //! Initialize, telling the CUDA run-time library which device to use. + void impl_initialize(); + + static OpenMPTargetInternal* impl_singleton(); +private: + bool m_is_initialized; + +}; +} // Namespace Impl +} // Namespace Experimental +} // Namespace Kokkos + diff --git a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp index c93a88606da..a222662f093 100644 --- a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp +++ b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp @@ -88,10 +88,10 @@ class ParallelFor< FunctorType OpenMPTargetExec::verify_initialized("Kokkos::Experimental::OpenMPTarget parallel_for"); const typename Policy::member_type begin = m_policy.begin(); const typename Policy::member_type end = m_policy.end(); - - #pragma omp target teams distribute parallel for map(to:this->m_functor) + FunctorType a_functor(m_functor); + #pragma omp target teams distribute parallel for map(to:a_functor) for(int i=begin; im_functor) + FunctorType a_functor(m_functor); + #pragma omp target teams distribute parallel for num_threads(128) map(to:a_functor) for(int i=begin; im_functor,scratch_ptr) + map(to:a_functor,scratch_ptr) for(int i=0 ; im_functor,scratch_ptr) + map(to:a_functor,scratch_ptr) for(int i=0 ; i::value ) { - if(num_threads>0) { - if(use_numa>0) { - Kokkos::Experimental::OpenMPTarget::initialize(num_threads,use_numa); - } - else { - Kokkos::Experimental::OpenMPTarget::initialize(num_threads); - } - } else { - Kokkos::Experimental::OpenMPTarget::initialize(); - } + Kokkos::Experimental::OpenMPTarget().impl_initialize(); //std::cout << "Kokkos::initialize() fyi: OpenMP enabled and initialized" << std::endl ; } else { @@ -295,8 +286,8 @@ void finalize_internal( const bool all_spaces = false ) #if defined( KOKKOS_ENABLE_OPENMPTARGET ) if( std::is_same< Kokkos::Experimental::OpenMPTarget , Kokkos::DefaultExecutionSpace >::value || all_spaces ) { - if(Kokkos::Experimental::OpenMPTarget::is_initialized()) - Kokkos::Experimental::OpenMPTarget::finalize(); + if(Kokkos::Experimental::OpenMPTarget().impl_is_initialized()) + Kokkos::Experimental::OpenMPTarget().impl_finalize(); } #endif diff --git a/core/unit_test/TestRange.hpp b/core/unit_test/TestRange.hpp index be878046cb4..b8007f00620 100644 --- a/core/unit_test/TestRange.hpp +++ b/core/unit_test/TestRange.hpp @@ -64,10 +64,18 @@ struct TestRange { struct VerifyOffsetTag {}; int N; + #ifndef KOKKOS_WORKAROUND_OPENMPTARGET_GCC static const int offset = 13; + #else + int offset; + #endif TestRange( const size_t N_ ) : m_flags( Kokkos::ViewAllocateWithoutInitializing( "flags" ), N_ ), N(N_) - {} + { + #ifdef KOKKOS_WORKAROUND_OPENMPTARGET_GCC + offset = 13; + #endif + } void test_for() { diff --git a/core/unit_test/standalone/UnitTestMainInit.cpp b/core/unit_test/standalone/UnitTestMainInit.cpp index 4625459400a..127fe26d570 100644 --- a/core/unit_test/standalone/UnitTestMainInit.cpp +++ b/core/unit_test/standalone/UnitTestMainInit.cpp @@ -61,12 +61,15 @@ #ifdef KOKKOS_ENABLE_HPX #include #endif +#ifdef KOKKOS_ENABLE_OPENMPTARGET +#include +#endif #ifndef TEST_EXECSPACE #ifdef KOKKOS_ENABLE_SERIAL #include #endif #endif -#include +#include int main( int argc, char *argv[] ) { Kokkos::initialize(argc,argv); From 75d3283258f723b2aad86588199c63040d2b5128 Mon Sep 17 00:00:00 2001 From: jjwilke Date: Mon, 29 Apr 2019 16:11:10 -0700 Subject: [PATCH 012/530] cxx flags and headers no longer use Makefile --- CMakeLists.txt | 70 +-- cmake/KokkosConfig.cmake.in | 20 +- cmake/KokkosCore_config.h.in | 23 + cmake/cray.cmake | 7 + cmake/fake_tribits.cmake | 13 +- cmake/gnu.cmake | 20 + cmake/intel.cmake | 28 ++ cmake/kokkos_arch.cmake | 427 ++++++++++++++++ cmake/kokkos_cxx.cmake | 216 +++++++++ cmake/kokkos_enable_options.cmake | 103 ++++ cmake/kokkos_functions.cmake | 207 ++------ cmake/kokkos_install.cmake | 30 +- cmake/kokkos_options.cmake | 201 -------- cmake/kokkos_settings.cmake | 455 ------------------ cmake/kokkos_tpls.cmake | 23 + cmake/kokkos_tribits.cmake | 168 +++---- cmake/pgi.cmake | 6 + core/src/dummy.cpp | 11 + .../unit_test/config/cmaketest/CMakeLists.txt | 1 - 19 files changed, 1013 insertions(+), 1016 deletions(-) create mode 100644 cmake/cray.cmake create mode 100644 cmake/gnu.cmake create mode 100644 cmake/intel.cmake create mode 100644 cmake/kokkos_arch.cmake create mode 100644 cmake/kokkos_cxx.cmake create mode 100644 cmake/kokkos_enable_options.cmake delete mode 100644 cmake/kokkos_options.cmake delete mode 100644 cmake/kokkos_settings.cmake create mode 100644 cmake/kokkos_tpls.cmake create mode 100644 cmake/pgi.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 053a34c2246..d0bff0e1962 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,13 +13,6 @@ set(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR}) set(KOKKOS_PATH ${Kokkos_SOURCE_DIR}) set(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}) -SET(KOKKOS_COMPILE_OPTIONS) -SET(KOKKOS_LINK_OPTIONS) -SET(KOKKOS_CUDA_OPTIONS) -SET(KOKKOS_CUDAFE_OPTIONS) -SET(KOKKOS_XCOMPILER_OPTIONS) - - IF(NOT KOKKOS_HAS_TRILINOS) cmake_minimum_required(VERSION 3.8 FATAL_ERROR) IF(NOT DEFINED ${PROJECT_NAME}) @@ -31,15 +24,17 @@ IF(NOT KOKKOS_HAS_TRILINOS) ENDIF() INCLUDE(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake) +GLOBAL_SET(KOKKOS_COMPILE_OPTIONS) +GLOBAL_SET(KOKKOS_LINK_OPTIONS) +GLOBAL_SET(KOKKOS_CUDA_OPTIONS) +GLOBAL_SET(KOKKOS_CUDAFE_OPTIONS) +GLOBAL_SET(KOKKOS_XCOMPILER_OPTIONS) + + + INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake) KOKKOS_SETUP_BUILD_ENVIRONMENT() -IF (NOT KOKKOS_HAS_TRILINOS AND NOT KOKKOS_SEPARATE_LIBS) - SET(KOKKOS_LIBRARIES_NAMES kokkos) -ELSE() - SET(KOKKOS_LIBRARIES_NAMES kokkoscore kokkoscontainers kokkosalgorithms) -ENDIF() - GET_DIRECTORY_PROPERTY(HAS_PARENT PARENT_DIRECTORY) IF (KOKKOS_HAS_TRILINOS) @@ -52,13 +47,6 @@ ELSE() ENDIF() -#------------ NOW BUILD ------------------------------------------------------ -#include(${KOKKOS_SRC_PATH}/cmake/kokkos_build.cmake) - -#TRIBITS_PACKAGE_DECL(Kokkos) -#ADD_SUBDIRECTORY(core) -#ADD_SUBDIRECTORY(containers) -#ADD_SUBDIRECTORY(algorithms) #------------------------------------------------------------------------------ # @@ -69,44 +57,10 @@ ENDIF() KOKKOS_PACKAGE_DECL() -#------------------------------------------------------------------------------ -# -# B) Install Kokkos' build files -# -# If using the Makefile-generated files, then need to set things up. -# Here, assume that TriBITS has been run from ProjectCompilerPostConfig.cmake -# and already generated KokkosCore_config.h and kokkos_generated_settings.cmake -# in the previously define Kokkos_GEN_DIR -# We need to copy them over to the correct place and source the cmake file - -set(Kokkos_GEN_DIR ${CMAKE_BINARY_DIR}) -file(COPY "${Kokkos_GEN_DIR}/KokkosCore_config.h" - DESTINATION "${CMAKE_CURRENT_BINARY_DIR}" USE_SOURCE_PERMISSIONS) -install(FILES "${Kokkos_GEN_DIR}/KokkosCore_config.h" - DESTINATION include) -#file(COPY "${Kokkos_GEN_DIR}/kokkos_generated_settings.cmake" -# DESTINATION "${CMAKE_CURRENT_BINARY_DIR}" USE_SOURCE_PERMISSIONS) -#include(${CMAKE_CURRENT_BINARY_DIR}/kokkos_generated_settings.cmake) - -#------------------------------------------------------------------------------ -# -# C) Install Kokkos' executable scripts -# - -# nvcc_wrapper is Kokkos' wrapper for NVIDIA's NVCC CUDA compiler. -# Kokkos needs nvcc_wrapper in order to build. Other libraries and -# executables also need nvcc_wrapper. Thus, we need to install it. -# If the argument of DESTINATION is a relative path, CMake computes it -# as relative to ${CMAKE_INSTALL_PATH}. -INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION bin) - - #------------------------------------------------------------------------------ # # D) Process the subpackages (subdirectories) for Kokkos # - - KOKKOS_PROCESS_SUBPACKAGES() @@ -125,5 +79,11 @@ IF (NOT KOKKOS_HAS_TRILINOS) include(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake) ENDIF() - +# nvcc_wrapper is Kokkos' wrapper for NVIDIA's NVCC CUDA compiler. +# Kokkos needs nvcc_wrapper in order to build. Other libraries and +# executables also need nvcc_wrapper. Thus, we need to install it. +# If the argument of DESTINATION is a relative path, CMake computes it +# as relative to ${CMAKE_INSTALL_PATH}. +INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION bin) +INSTALL(FILES "${CMAKE_BINARY_DIR}/KokkosCore_config.h" DESTINATION include) diff --git a/cmake/KokkosConfig.cmake.in b/cmake/KokkosConfig.cmake.in index dcdb7798ce7..c709ad2ec81 100644 --- a/cmake/KokkosConfig.cmake.in +++ b/cmake/KokkosConfig.cmake.in @@ -1,8 +1,3 @@ -# - Config file for the Kokkos package -# It defines the following variables -# Kokkos_INCLUDE_DIRS - include directories for Kokkos -# Kokkos_LIBRARIES - libraries to link against - # Compute paths @PACKAGE_INIT@ @@ -10,11 +5,11 @@ GET_FILENAME_COMPONENT(Kokkos_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) SET(Kokkos_INCLUDE_DIRS "@CONF_INCLUDE_DIRS@") INCLUDE("${Kokkos_CMAKE_DIR}/KokkosTargets.cmake") +INCLUDE("${Kokkos_CMAKE_DIR}/KokkosDeprecatedTargets.cmake") # These are IMPORTED targets created by KokkosTargets.cmake SET(Kokkos_LIBRARY_DIRS @INSTALL_LIB_DIR@) -SET(Kokkos_LIBRARIES @KOKKOS_LIBRARIES@) SET(Kokkos_SEPARATE_LIBS @KOKKOS_SEPARATE_LIBS@) SET(Kokkos_TPL_LIBRARIES @KOKKOS_LIBS@) SET(Kokkos_ENABLE_Cuda @KOKKOS_ENABLE_CUDA@) @@ -22,16 +17,6 @@ SET(Kokkos_ENABLE_OpenMP @KOKKOS_ENABLE_OPENMP@) SET(Kokkos_ENABLE_Pthread @KOKKOS_ENABLE_PTHREAD@) SET(Kokkos_ENABLE_Serial @KOKKOS_ENABLE_SERIAL@) -IF (${Kokkos_FIND_VERSION_MINOR}) - IF (${Kokkos_FIND_VERSION_MINOR} LESS ${Kokkos_VERSION_MINOR}) - FOREACH(LIB ${Kokkos_LIBRARIES}) - set_property(TARGET ${LIB} APPEND PROPERTY - INTERFACE_COMPILE_DEFINITIONS "KOKKOS_ENABLE_DEPRECATED_CODE" - ) - ENDFOREACH() - ENDIF() -ENDIF() - FUNCTION(TARGET_LINK_KOKKOS TARGET) CMAKE_PARSE_ARGUMENTS( PARSE @@ -40,7 +25,7 @@ FUNCTION(TARGET_LINK_KOKKOS TARGET) "" ${ARGN} ) - TARGET_LINK_LIBRARIES(${TARGET} ${ARGN} kokkos) + TARGET_LINK_LIBRARIES(${TARGET} ${ARGN} Kokkos::kokkos) ENDFUNCTION(TARGET_LINK_KOKKOS) #Find dependencies @@ -56,6 +41,7 @@ IF (NOT DEFINED HPX_ROOT AND NOT DEFINED HPX_DIR) SET(HPX_ROOT ${FOUND_HPX_ROOT}) ENDIF() ENDIF() + SET(KOKKOS_ENABLE_HPX @KOKKOS_ENABLE_HPX@) IF (KOKKOS_ENABLE_HPX) FIND_DEPENDENCY(HPX) diff --git a/cmake/KokkosCore_config.h.in b/cmake/KokkosCore_config.h.in index 24a4c1de7da..54d97cfd266 100644 --- a/cmake/KokkosCore_config.h.in +++ b/cmake/KokkosCore_config.h.in @@ -5,6 +5,7 @@ #define KOKKOS_CORE_CONFIG_H #endif +/* Execution Spaces */ #cmakedefine KOKKOS_ENABLE_SERIAL #cmakedefine KOKKOS_ENABLE_OPENMP #cmakedefine KOKKOS_ENABLE_PTHREAD @@ -13,6 +14,21 @@ #cmakedefine KOKKOS_ENABLE_HWLOC #cmakedefine KOKKOS_ENABLE_MEMKIND #cmakedefine KOKKOS_ENABLE_LIBRT + +#ifndef __CUDA_ARCH__ +#cmakedefine KOKKOS_ENABLE_TM +#cmakedefine KOKKOS_USE_ISA_X86_64 +#cmakedefine KOKKOS_USE_ISA_KNC +#cmakedefine KOKKOS_USE_ISA_POWERPCLE +#cmakedefine KOKkOS_USE_ISA_POWERPCBE +#endif + +/* General Settings */ +#cmakedefine KOKKOS_ENABLE_CXX11 +#cmakedefine KOKKOS_ENABLE_CXX14 +#cmakedefine KOKKOS_ENABLE_CXX17 +#cmakedefine KOKKOS_ENABLE_CXX20 + #cmakedefine KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE #cmakedefine KOKKOS_ENABLE_CUDA_UVM #cmakedefine KOKKOS_ENABLE_CUDA_LDG_INTRINSIC @@ -29,6 +45,13 @@ #cmakedefine KOKKOS_ENABLE_ETI #cmakedefine KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION +/* TPL Settings */ +#cmakedefine KOKKOS_ENABLE_HWLOC +#cmakedefine KOKKOS_USE_LIBRT +#cmakedefine KOKKOS_ENABLE_HWBSPACE + +#cmakedefine KOKKOS_IMPL_CUDA_CLANG_WORKAROUND + #cmakedefine KOKKOS_ARCH_NONE #cmakedefine KOKKOS_ARCH_AMDAVX diff --git a/cmake/cray.cmake b/cmake/cray.cmake new file mode 100644 index 00000000000..42dc66000db --- /dev/null +++ b/cmake/cray.cmake @@ -0,0 +1,7 @@ + + +function(kokkos_set_cray_flags standard) + STRING(TOLOWER ${standard} LC_STANDARD) + GLOBAL_SET(KOKKOS_CXX_STANDARD_FLAG "-hstd=c++${LC_STANDARD}") +endfunction() + diff --git a/cmake/fake_tribits.cmake b/cmake/fake_tribits.cmake index 1f1f5d0b6ac..a8a7f705efc 100644 --- a/cmake/fake_tribits.cmake +++ b/cmake/fake_tribits.cmake @@ -24,6 +24,10 @@ IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "") ENDIF() ENDMACRO() +MACRO(GLOBAL_OVERWRITE VARNAME VALUE TYPE) + SET(${VARNAME} ${VALUE} CACHE ${TYPE} "" FORCE) +ENDMACRO() + IF (NOT KOKKOS_HAS_TRILINOS) MACRO(APPEND_GLOB VAR) FILE(GLOB LOCAL_TMP_VAR ${ARGN}) @@ -309,14 +313,19 @@ MACRO(KOKKOS_ADD_COMPILE_OPTIONS) ADD_COMPILE_OPTIONS(${ARGN}) ENDMACRO() -MACRO(PRINTALL) +MACRO(PRINTALL match) get_cmake_property(_variableNames VARIABLES) list (SORT _variableNames) foreach (_variableName ${_variableNames}) - if("${_variableName}" MATCHES "Kokkos" OR "${_variableName}" MATCHES "KOKKOS") + if("${_variableName}" MATCHES "${match}") message(STATUS "${_variableName}=${${_variableName}}") endif() endforeach() ENDMACRO(PRINTALL) +MACRO(GLOBAL_APPEND VARNAME) + SET(TEMP ${VARNAME}) + LIST(APPEND TEMP ${ARGN}) + GLOBAL_SET(${VARNAME} ${TEMP}) +ENDMACRO() diff --git a/cmake/gnu.cmake b/cmake/gnu.cmake new file mode 100644 index 00000000000..294c8e04a00 --- /dev/null +++ b/cmake/gnu.cmake @@ -0,0 +1,20 @@ + +FUNCTION(kokkos_set_gnu_flags standard) + STRING(TOLOWER ${standard} LC_STANDARD) + # The following three blocks of code were copied from + # /Modules/Compiler/Intel-CXX.cmake from CMake 3.7.2 and then modified. + IF(CMAKE_CXX_SIMULATE_ID STREQUAL MSVC) + SET(_std -Qstd) + SET(_ext c++) + ELSE() + SET(_std -std) + SET(_ext gnu++) + ENDIF() + + IF (CMAKE_CXX_EXTENSIONS) + GLOBAL_SET(KOKKOS_CXX_STANDARD_FLAG "-std=gnu++${LC_STANDARD}") + ELSE() + GLOBAL_SET(KOKKOS_CXX_STANDARD_FLAG "-std=c++${LC_STANDARD}") + ENDIF() +ENDFUNCTION() + diff --git a/cmake/intel.cmake b/cmake/intel.cmake new file mode 100644 index 00000000000..a0ca715ed59 --- /dev/null +++ b/cmake/intel.cmake @@ -0,0 +1,28 @@ + +function(kokkos_set_intel_flags standard) + STRING(TOLOWER ${standard} LC_STANDARD) + # The following three blocks of code were copied from + # /Modules/Compiler/Intel-CXX.cmake from CMake 3.7.2 and then modified. + if(CMAKE_CXX_SIMULATE_ID STREQUAL MSVC) + set(_std -Qstd) + set(_ext c++) + else() + set(_std -std) + set(_ext gnu++) + endif() + + if(NOT KOKKOS_CXX_STANDARD STREQUAL 11 AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.2) + #There is no gnu++14 value supported; figure out what to do. + GLOBAL_SET(KOKKOS_CXX_STANDARD_FLAG "${_std}=c++${LC_STANDARD}") + elseif(KOKKOS_CXX_STANDARD STREQUAL 11 AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13.0) + IF (CMAKE_CXX_EXTENSIONS) + GLOBAL_SET(KOKKOS_CXX_STANDARD_FLAG "${_std}=${_ext}c++11") + ELSE() + GLOBAL_SET(KOKKOS_CXX_STANDARD_FLAG "${_std}=c++11") + ENDIF() + else() + message(FATAL_ERROR "Intel compiler version too low - need 13.0 for C++11 and 15.0 for C++14") + endif() + +endfunction() + diff --git a/cmake/kokkos_arch.cmake b/cmake/kokkos_arch.cmake new file mode 100644 index 00000000000..ccb2a8eddac --- /dev/null +++ b/cmake/kokkos_arch.cmake @@ -0,0 +1,427 @@ + +FUNCTION(KOKKOS_ARCH_OPTION CAMEL_SUFFIX DOCSTRING) + #all optimizations off by default + kokkos_option(ARCH_${CAMEL_SUFFIX} OFF BOOL ${DOCSTRING}) +ENDFUNCTION() + +FUNCTION(ARCH_FLAGS) + SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang GNU) + CMAKE_PARSE_ARGUMENTS( + PARSE + "LINK_ONLY;COMPILE_ONLY" + "" + "${COMPILERS}" + ${ARGN}) + + SET(COMPILER ${KOKKOS_CXX_COMPILER_ID}) + + SET(FLAGS) + SET(NEW_COMPILE_OPTIONS) + SET(NEW_XCOMPILER_OPTIONS) + SET(NEW_LINK_OPTIONS) + LIST(APPEND NEW_XCOMPILER_OPTIONS ${KOKKOS_XCOMPILER_OPTIONS}) + LIST(APPEND NEW_COMPILE_OPTIONS ${KOKKOS_COMPILE_OPTIONS}) + LIST(APPEND NEW_LINK_OPTIONS ${KOKKOS_COMPILE_OPTIONS}) + FOREACH(COMP ${COMPILERS}) + IF (COMPILER STREQUAL "${COMP}") + IF (PARSE_${COMPILER}) + SET(FLAGS ${PARSE_${COMPILER}}) + ELSEIF(PARSE_DEFAULT) + SET(FLAGS ${PARSE_DEFAULT}) + ENDIF() + ENDIF() + ENDFOREACH() + + IF (NOT LINK_ONLY) + IF ("-Xcompiler" IN_LIST FLAGS) + LIST(REMOVE_ITEM FLAGS "-Xcompiler") + LIST(APPEND NEW_XCOMPILER_OPTIONS ${FLAGS}) + GLOBAL_SET(KOKKOS_XCOMPILER_OPTIONS ${NEW_XCOMPILER_OPTIONS}) + ELSE() + LIST(APPEND NEW_COMPILE_OPTIONS ${FLAGS}) + GLOBAL_SET(KOKKOS_COMPILE_OPTIONS ${NEW_COMPILE_OPTIONS}) + ENDIF() + ENDIF() + + IF (NOT COMPILE_ONLY) + LIST(APPEND NEW_LINK_OPTIONS ${FLAGS}) + GLOBAL_SET(KOKKOS_LINK_OPTIONS ${NEW_LINK_OPTIONS}) + ENDIF() +ENDFUNCTION() + +# Setting this variable to a value other than "None" can improve host +# performance by turning on architecture specific code. +# NOT SET is used to determine if the option is passed in. It is reset to +# default "None" down below. +SET(KOKKOS_ARCH "NOT_SET" CACHE STRING + "Optimize for specific host architecture. Options are: ${KOKKOS_INTERNAL_ARCH_DOCSTR}") + +# Make sure KOKKOS_ARCH is set to something +IF ("${KOKKOS_ARCH}" STREQUAL "NOT_SET") + SET(KOKKOS_ARCH "None") +ENDIF() + +#------------------------------------------------------------------------------- +# List of possible host architectures. +#------------------------------------------------------------------------------- +SET(KOKKOS_ARCH_LIST) +LIST(APPEND KOKKOS_ARCH_LIST + None # No architecture optimization + AMDAVX # (HOST) AMD chip + ARMv80 # (HOST) ARMv8.0 Compatible CPU + ARMv81 # (HOST) ARMv8.1 Compatible CPU + ARMv8_ThunderX # (HOST) ARMv8 Cavium ThunderX CPU + ARMv8_TX2 # (HOST) ARMv8 Cavium ThunderX2 CPU + WSM # (HOST) Intel Westmere CPU + SNB # (HOST) Intel Sandy/Ivy Bridge CPUs + HSW # (HOST) Intel Haswell CPUs + BDW # (HOST) Intel Broadwell Xeon E-class CPUs + SKX # (HOST) Intel Sky Lake Xeon E-class HPC CPUs (AVX512) + KNC # (HOST) Intel Knights Corner Xeon Phi + KNL # (HOST) Intel Knights Landing Xeon Phi + BGQ # (HOST) IBM Blue Gene Q + Power7 # (HOST) IBM POWER7 CPUs + Power8 # (HOST) IBM POWER8 CPUs + Power9 # (HOST) IBM POWER9 CPUs + Kepler # (GPU) NVIDIA Kepler default (generation CC 3.5) + Kepler30 # (GPU) NVIDIA Kepler generation CC 3.0 + Kepler32 # (GPU) NVIDIA Kepler generation CC 3.2 + Kepler35 # (GPU) NVIDIA Kepler generation CC 3.5 + Kepler37 # (GPU) NVIDIA Kepler generation CC 3.7 + Maxwell # (GPU) NVIDIA Maxwell default (generation CC 5.0) + Maxwell50 # (GPU) NVIDIA Maxwell generation CC 5.0 + Maxwell52 # (GPU) NVIDIA Maxwell generation CC 5.2 + Maxwell53 # (GPU) NVIDIA Maxwell generation CC 5.3 + Pascal60 # (GPU) NVIDIA Pascal generation CC 6.0 + Pascal61 # (GPU) NVIDIA Pascal generation CC 6.1 + Volta70 # (GPU) NVIDIA Volta generation CC 7.0 + Volta72 # (GPU) NVIDIA Volta generation CC 7.2 + Turing75 # (GPU) NVIDIA Turing generation CC 7.5 + Ryzen + Epyc + Kaveri + Carrizo + Fiji + Vega + GFX901 + ) + + + +# Ensure that KOKKOS_ARCH is in the ARCH_LIST +IF (KOKKOS_ARCH MATCHES ",") + MESSAGE(WARNING "-- Detected a comma in: KOKKOS_ARCH=`${KOKKOS_ARCH}`") + MESSAGE("-- Although we prefer KOKKOS_ARCH to be semicolon-delimited, we do allow") + MESSAGE("-- comma-delimited values for compatibility with scripts (see github.com/trilinos/Trilinos/issues/2330)") + STRING(REPLACE "," ";" KOKKOS_ARCH "${KOKKOS_ARCH}") + MESSAGE("-- Commas were changed to semicolons, now KOKKOS_ARCH=`${KOKKOS_ARCH}`") +ENDIF() + +IF (KOKKOS_ARCH MATCHES "-") + string(REPLACE "-" "_" KOKKOS_ARCH "${KOKKOS_ARCH}") +ENDIF() + +FOREACH(Arch ${KOKKOS_ARCH_LIST}) + KOKKOS_ARCH_OPTION(${Arch} "Whether to optimize for the ${Arch} architecture") +ENDFOREACH() + +set(tmpr "\n ") +string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_ARCH_DOCSTR "${KOKKOS_ARCH_LIST}") +set(KOKKOS_INTERNAL_ARCH_DOCSTR "${tmpr}${KOKKOS_INTERNAL_ARCH_DOCSTR}") + +FOREACH(Arch ${KOKKOS_ARCH}) + STRING(TOUPPER ${Arch} ARCH) + #force on all the architectures in the list + SET(KOKKOS_ARCH_${ARCH} ON CACHE BOOL "optimize for architecture ${Arch}" FORCE) + IF (NOT ${Arch} IN_LIST KOKKOS_ARCH_LIST) + MESSAGE(FATAL_ERROR "`${arch}` is not an accepted value in KOKKOS_ARCH=`${KOKKOS_ARCH}`." + " Please pick from these choices: ${KOKKOS_INTERNAL_ARCH_DOCSTR}") + ENDIF() +ENDFOREACH() + +IF(KOKKOS_ENABLE_COMPILER_WARNINGS) + SET(COMMON_WARNINGS + "-Wall" "-Wshadow" "-pedantic" + "-Wsign-compare" "-Wtype-limits" "-Wuninitialized") + + SET(GNU_WARNINGS "-Wempty-body" "-Wclobbered" "-Wignored-qualifiers" + ${COMMON_WARNINGS}) + + ARCH_FLAGS( + PGI " " + GNU ${GNU_WARNINGS} + DEFAULT ${COMMON_WARNINGS} + ) +ENDIF() + + +#------------------------------- KOKKOS_CUDA_OPTIONS --------------------------- +# Construct the Makefile options +IF (KOKKOS_ENABLE_CUDA_LAMBDA) + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "-expt-extended-lambda") +ENDIF() + +IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang) + SET(CUDA_ARCH_FLAG "--cuda-gpu-arch") + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "-x cuda") +ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + SET(CUDA_ARCH_FLAG "-arch") +ENDIF() + +IF (KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + IF (KOKKOS_ENABLE_DEBUG OR CMAKE_BUILD_TYPE STREQUAL "DEBUG") + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -lineinfo) + ENDIF() + IF (KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER 9.0 OR KOKKOS_CXX_COMPILER_VERSION VERSION_EQUAL 9.0) + GLOBAL_APPEND(KOKKOS_CUDAFE_OPTIONS --diag_suppress=esa_on_defaulted_function_ignored) + ENDIF() +ENDIF() + +IF(KOKKOS_ENABLE_OPENMP) + IF (KOKKOS_CXX_COMPILER_ID STREQUAL AppleClang) + MESSAGE(FATAL_ERROR "Apple Clang does not support OpenMP. Use native clang instead") + ENDIF() + ARCH_FLAGS( + Clang -fopenmp=libomp + PGI -mp + NVIDIA -Xcompiler -fopenmp + Cray "" + XL -qsmp=omp + DEFAULT -fopenmp + ) +ENDIF() + +IF (KOKKOS_ARCH_ARMV81) + ARCH_FLAGS( + Cray " " + PGI " " + DEFAULT -march=armv8.1-a + ) +ENDIF() + +IF (KOKKOS_ARCH_ARMV8_THUNDERX) + SET(KOKKOS_ARCH_ARMV80 ON CACHE BOOL "enable armv80" FORCE) + ARCH_FLAGS( + Cray " " + PGI " " + DEFAULT -march=armv8-a -mtune=thunderx + ) +ENDIF() + +IF (KOKKOS_ARCH_ARMV8_THUNDERX2) + SET(KOKKOS_ARCH_ARMV81 ON CACHE BOOL "enable armv80" FORCE) + ARCH_FLAGS( + Cray " " + PGI " " + DEFAULT -march=thunderx2t99 -mtune=thunderx2t99 + ) +ENDIF() + +IF (KOKKOS_ARCH_EPYC) + ARCH_FLAGS( + Intel -mavx2 + DEFAULT -march=znver1 -mtune=znver1 + ) + SET(KOKKOS_USE_ISA_X86_64 ON CACHE INTERNAL "x86-64 architecture") +ENDIF() + +IF (KOKKOS_ARCH_WSM) + ARCH_FLAGS( + Intel -xSSE4.2 + PGI -tp=nehalem + Cray " " + DEFAULT -msse4.2 + ) + SET(KOKKOS_USE_ISA_X86_64 ON CACHE INTERNAL "x86-64 architecture") +ENDIF() + +IF (KOKKOS_ARCH_SNB OR KOKKOS_ARCH_AMDAVX) + ARCH_FLAGS( + Intel -mavx + PGI -tp=sandybridge + Cray " " + DEFAULT -mavx + ) + SET(KOKKOS_USE_ISA_X86_64 ON CACHE INTERNAL "x86-64 architecture") +ENDIF() + +IF (KOKKOS_ARCH_HSW OR KOKKOS_ARCH_BDW) + SET(KOKKOS_ARCH_AVX2 ON CACHE BOOL "enable avx2" FORCE) + ARCH_FLAGS( + Intel -xCORE-AVX2 + PGI -tp=haswell + Cray " " + DEFAULT -march=core-avx2 -mtune=core-avx2 + ) + SET(KOKKOS_USE_ISA_X86_64 ON CACHE INTERNAL "x86-64 architecture") + IF (KOKKOS_ARCH_BDW) + SET(KOKKOS_ENABLE_TM ON CACHE INTERNAL "whether transactional memory supported") + ENDIF() +ENDIF() + +IF (KOKKOS_ARCH_KNL) + #avx512-mic + SET(KOKKOS_ARCH_AVX512MIC ON CACHE BOOL "enable avx-512 MIC" FORCE) + ARCH_FLAGS( + Intel -xMIC-AVX512 + PGI " " + Cray " " + DEFAULT -march=knl -mtune=knl + ) + SET(KOKKOS_USE_ISA_X86_64 ON CACHE INTERNAL "x86-64 architecture") +ENDIF() + +IF (KOKKOS_ARCH_SKX) + #avx512-xeon + SET(KOKKOS_ARCH_AVX512XEON ON CACHE BOOL "enable avx-512 Xeon" FORCE) + ARCH_FLAGS( + Intel -xCORE-AVX512 + PGI " " + Cray " " + DEFAULT -march=skylake-avx512 -march=skylake-avx512 -mrtm + ) + SET(KOKKOS_USE_ISA_X86_64 ON CACHE INTERNAL "x86-64 architecture") + SET(KOKKOS_ENABLE_TM ON CACHE INTERNAL "whether transactional memory supported") +ENDIF() + +IF (KOKKOS_ARCH_POWER7) + ARCH_FLAGS( + PGI " " + DEFAULT -mcpu=power7 -mtune=power7 + ) + SET(KOKKOS_USE_ISA_POWERPCBE ON CACHE INTERNAL "Power PC Architecture") +ENDIF() + +IF (KOKKOS_ARCH_POWER8) + ARCH_FLAGS( + PGI " " + NVIDIA " " + DEFAULT -mcpu=power8 -mtune=power8 + ) + SET(KOKKOS_USE_ISA_POWERPCLE ON CACHE INTERNAL "Power PC Architecture") +ENDIF() + +IF (KOKKOS_ARCH_POWER9) + ARCH_FLAGS( + PGI " " + NVIDIA " " + DEFAULT -mcpu=power9 -mtune=power9 + ) + SET(KOKKOS_USE_ISA_POWERPCLE ON CACHE INTERNAL "Power PC Architecture") +ENDIF() + + +IF (KOKKOS_ARCH_KAVERI) + SET(KOKKOS_ARCH_ROCM 701 CACHE STRING "rocm arch" FORCE) +ENDIF() + +IF (KOKKOS_ARCH_CARRIZO) + SET(KOKKOS_ARCH_ROCM 801 CACHE STRING "rocm arch" FORCE) +ENDIF() + +IF (KOKKOS_ARCH_FIJI) + SET(KOKKOS_ARCH_ROCM 803 CACHE STRING "rocm arch" FORCE) +ENDIF() + +IF (KOKKOS_ARCH_VEGA) + SET(KOKKOS_ARCH_ROCM 900 CACHE STRING "rocm arch" FORCE) +ENDIF() + +IF (KOKKOS_ARCH_GFX901) + SET(KOKKOS_ARCH_ROCM 901 CACHE STRING "rocm arch" FORCE) +ENDIF() + +IF (KOKKOS_ARCH_RYZEN) +ENDIF() + +IF (KOKKOS_ENABLE_CUDA_RELOCATED_DEVICE_CODE) + ARCH_FLAGS( + Clang -fcuda-rdc + NVIDIA --relocatable-device-code=true + ) +ENDIF() + + +IF (KOKKOS_ARCH_KEPLER30) + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAGS}=sm_30") +ENDIF() + +IF (KOKKOS_ARCH_KEPLER32) + GLOBA_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_32") +ENDIF() + +IF (KOKKOS_ARCH_KEPLER35) + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_35") +ENDIF() + +IF (KOKKOS_ARCH_KEPLER35) + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_37") +ENDIF() + +IF (KOKKOS_ARCH_MAXWELL50) + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_50") +ENDIF() + +IF (KOKKOS_ARCH_MAXWELL52) + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_52") +ENDIF() + +IF (KOKKOS_ARCH_MAXWELL53) + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_53") +ENDIF() + +IF (KOKKOS_ARCH_PASCAL60) + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_60") +ENDIF() + +IF (KOKKOS_ARCH_PASCAL61) + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_61") +ENDIF() + +IF (KOKKOS_ARCH_VOLTA70) + GLOBLA_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_70") +ENDIF() + +IF (KOKKOS_ARCH_VOLTA72) + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_72") +ENDIF() + +IF (KOKKOS_ARCH_TURING75) + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_75") +ENDIF() + +#CMake verbose is kind of pointless +#Let's just always print things +MESSAGE(STATUS "Execution Spaces:") +IF(KOKKOS_ENABLE_CUDA) + MESSAGE(STATUS " Device Parallel: Cuda") +ELSE() + MESSAGE(STATUS " Device Parallel: None") +ENDIF() + +IF(KOKKOS_ENABLE_OPENMP) + MESSAGE(STATUS " Host Parallel: OpenMP") +ELSEIF(KOKKOS_ENABLE_PTHREAD) + MESSAGE(STATUS " Host Parallel: Pthread") +ELSEIF(KOKKOS_ENABLE_QTHREADS) + MESSAGE(STATUS " Host Parallel: Qthreads") +ELSEIF(KOKKOS_ENABLE_HPX) + MESSAGE(STATUS " Host Parallel: HPX") +ELSE() + MESSAGE(STATUS " Host Parallel: None") +ENDIF() + +IF(KOKKOS_ENABLE_SERIAL) + MESSAGE(STATUS " Host Serial: Serial") +ELSE() + MESSAGE(STATUS " Host Serial: None") +ENDIF() + +MESSAGE(STATUS "") +MESSAGE(STATUS "Architectures:") +FOREACH(Arch ${KOKKOS_ARCH_LIST}) + STRING(TOUPPER ${Arch} ARCH) + IF (KOKKOS_${ARCH}) + MESSAGE(STATUS " ${Arch}") + ENDIF() +ENDFOREACH() diff --git a/cmake/kokkos_cxx.cmake b/cmake/kokkos_cxx.cmake new file mode 100644 index 00000000000..826e3fd8de5 --- /dev/null +++ b/cmake/kokkos_cxx.cmake @@ -0,0 +1,216 @@ + +FUNCTION(kokkos_set_cxx_standard_feature standard) + SET(EXTENSION_NAME CMAKE_CXX${standard}_EXTENSION_COMPILE_OPTION) + SET(STANDARD_NAME CMAKE_CXX${standard}_STANDARD_COMPILE_OPTION) + SET(FEATURE_NAME cxx_std_${standard}) + #message("HAVE ${FEATURE_NAME} ${${EXTENSION_NAME}} ${${STANDARD_NAME}}") + #CMake's way of telling us that the standard (or extension) + #flags are supported is the extension/standard variables + IF (CMAKE_CXX_EXTENSIONS AND ${EXTENSION_NAME}) + GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME}) + ELSEIF(NOT CMAKE_CXX_EXTENSIONS AND ${STANDARD_NAME}) + GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME}) + ELSE() + #nope, we can't do anything here + GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE "") + ENDIF() +ENDFUNCTION(kokkos_set_cxx_standard_feature) + +SET(KOKKOS_CXX_COMPILER ${CMAKE_CXX_COMPILER} CACHE STRING INTERNAL) +SET(KOKKOS_CXX_COMPILER_ID ${CMAKE_CXX_COMPILER_ID} CACHE STRING INTERNAL) +SET(KOKKOS_CXX_COMPILER_VERSION ${CMAKE_CXX_COMPILER_VERSION} CACHE STRING INTERNAL) + +# Check if the compiler is nvcc (which really means nvcc_wrapper). +EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version + COMMAND grep nvcc + COMMAND wc -l + OUTPUT_VARIABLE INTERNAL_HAVE_COMPILER_NVCC + OUTPUT_STRIP_TRAILING_WHITESPACE) + +STRING(REGEX REPLACE "^ +" "" + INTERNAL_HAVE_COMPILER_NVCC ${INTERNAL_HAVE_COMPILER_NVCC}) + +IF(INTERNAL_HAVE_COMPILER_NVCC) + # SET the compiler id to nvcc. We use the value used by CMake 3.8. + SET(KOKKOS_CXX_COMPILER_ID NVIDIA CACHE STRING INTERNAL) + + # SET nvcc's compiler version. + EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version + COMMAND grep release + OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE) + + STRING(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+$" + TEMP_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION}) + SET(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL) +ENDIF() + +# Enforce the minimum compilers supported by Kokkos. +SET(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos. Required compiler versions:") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang 3.5.2 or higher") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 4.8.4 or higher") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 15.0.2 or higher") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC 7.0.28 or higher") +SET(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n PGI 17.1 or higher\n") + +IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 3.5.2) + MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + ENDIF() +ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL GNU) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 4.8.4) + MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + ENDIF() +ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 15.0.2) + MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + ENDIF() +ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 7.0.28) + MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + ENDIF() +ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 17.1) + MESSAGE(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + ENDIF() +ENDIF() + +# Enforce that extensions are turned off for nvcc_wrapper. +# For compiling CUDA code using nvcc_wrapper, we will use the host compiler's +# flags for turning on C++11. Since for compiler ID and versioning purposes +# CMake recognizes the host compiler when calling nvcc_wrapper, this just +# works. Both NVCC and nvcc_wrapper only recognize '-std=c++11' which means +# that we can only use host compilers for CUDA builds that use those flags. +# It also means that extensions (gnu++11) can't be turned on for CUDA builds. +IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + IF(CMAKE_CXX_EXTENSIONS) + MESSAGE(FATAL_ERROR "NVCC doesn't support C++ extensions. Set -DCMAKE_CXX_EXTENSIONS=OFF") + ENDIF() +ENDIF() + +IF(KOKKOS_ENABLE_CUDA) + # ENFORCE that the compiler can compile CUDA code. + IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang) + IF(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 4.0.0) + MESSAGE(FATAL_ERROR "Compiling CUDA code directly with Clang requires version 4.0.0 or higher.") + ENDIF() + ELSEIF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + MESSAGE(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang, but compiler ID was ${KOKKOS_CXX_COMPILER_ID}") + ENDIF() +ENDIF() + + +# From CMake 3.10 documentation +#CMake is currently aware of the C++ standards compiler features + +#AppleClang: Apple Clang for Xcode versions 4.4 though 6.2. +#Clang: Clang compiler versions 2.9 through 3.4. +#GNU: GNU compiler versions 4.4 through 5.0. +#MSVC: Microsoft Visual Studio versions 2010 through 2017. +#SunPro: Oracle SolarisStudio versions 12.4 through 12.5. +#Intel: Intel compiler versions 12.1 through 17.0. +# +#Cray: Cray Compiler Environment version 8.1 through 8.5.8. +#PGI: PGI version 12.10 through 17.5. +#XL: IBM XL version 10.1 through 13.1.5. + +kokkos_option(CXX_STANDARD "" STRING "The C++ standard for Kokkos to use: c++11, c++14, or c++17") + +IF (NOT KOKKOS_CXX_STANDARD AND NOT CMAKE_CXX_STANDARD) + SET(KOKKOS_CXX_STANDARD "11" CACHE STRING "C++ standard") +ELSEIF(NOT KOKKOS_CXX_STANDARD) + SET(KOKKOS_CXX_STANDARD ${CMAKE_CXX_STANDARD} CACHE STRING "C++ standard") +ENDIF() + + + + +# Set CXX standard flags +SET(KOKKOS_ENABLE_CXX11 OFF CACHE INTERNAL "Enable C++11 flags") +SET(KOKKOS_ENABLE_CXX14 OFF CACHE INTERNAL "Enable C++14 flags") +SET(KOKKOS_ENABLE_CXX17 OFF CACHE INTERNAL "Enable C++17 flags") +SET(KOKKOS_ENABLE_CXX20 OFF CACHE INTERNAL "Enable C++20 flags") +IF (KOKKOS_CXX_STANDARD) + IF (${KOKKOS_CXX_STANDARD} STREQUAL "c++98") + MESSAGE(FATAL_ERROR "Kokkos no longer supports C++98 - minimum C++11") + ELSEIF (${KOKKOS_CXX_STANDARD} STREQUAL "c++11") + MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++11'. Use '11' instead.") + GLOBAL_OVERWRITE(KOKKOS_CXX_STANDARD "14" STRING) + ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++14") + MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++14'. Use '14' instead.") + GLOBAL_OVERWRITE(KOKKOS_CXX_STANDARD "14" STRING) + ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++17") + MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++17'. Use '17' instead.") + GLOBAL_OVERWRITE(KOKKOS_CXX_STANDARD "17" STRING) + ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++1y") + MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++1y'. Use '1Y' instead.") + GLOBAL_OVERWRITE(KOKKOS_CXX_STANDARD "1Y" STRING) + ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++1z") + MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++1z'. Use '1Z' instead.") + GLOBAL_OVERWRITE(KOKKOS_CXX_STANDARD "1Z" STRING) + ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++2a") + MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++2a'. Use '2A' instead.") + GLOBAL_OVERWRITE(KOKKOS_CXX_STANDARD "2A" STRING) + ENDIF() +ENDIF() + + +IF (KOKKOS_CXX_STANDARD AND CMAKE_CXX_STANDARD) + #make sure these are consistent + IF (NOT KOKKOS_CXX_STANDARD STREQUAL CMAKE_CXX_STANDARD) + MESSAGE(FATAL_ERROR "Specified both CMAKE_CXX_STANDARD and KOKKOS_CXX_STANDARD, but they don't match") + ENDIF() +ENDIF() + +IF (KOKKOS_CXX_STANDARD STREQUAL "11" ) + kokkos_set_cxx_standard_feature(11) + GLOBAL_SET(KOKKOS_ENABLE_CXX11 ON) +ELSEIF(KOKKOS_CXX_STANDARD STREQUAL "14") + kokkos_set_cxx_standard_feature(14) + GLOBAL_SET(KOKKOS_ENABLE_CXX14 ON) +ELSEIF(KOKKOS_CXX_STANDARD STREQUAL "17") + kokkos_set_cxx_standard_feature(17) + GLOBAL_SET(KOKKOS_ENABLE_CXX17 ON) +ELSEIF(KOKKOS_CXX_STANDARD STREQUAL "98") + MESSAGE(FATAL_ERROR "Kokkos requires C++11 or newer!") +ELSE() + IF (KOKKOS_CXX_STANDARD STREQUAL "1Y") + GLOBAL_SET(KOKKOS_ENABLE_CXX14 ON) + ELSEIF (KOKKOS_CXX_STANDARD STREQUAL "1Z") + GLOBAL_SET(KOKKOS_ENABLE_CXX1Z ON) + ELSEIF (KOKKOS_CXX_STANDARD STREQUAL "2A") + GLOBAL_SET(KOKKOS_ENABLE_CXX20 ON) + ENDIF() +ENDIF() + +IF (NOT KOKKOS_CXX_STANDARD_FEATURE) + #we need to pick the C++ flags ourselves + IF(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) + INCLUDE(${KOKKOS_SRC_PATH}/cmake/cray.cmake) + kokkos_set_cray_flags(${KOKKOS_CXX_STANDARD}) + ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + INCLUDE(${KOKKOS_SRC_PATH}/cmake/pgi.cmake) + kokkos_set_pgi_flags(${KOKKOS_CXX_STANDARD}) + ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + INCLUDE(${KOKKOS_SRC_PATH}/cmake/intel.cmake) + kokkos_set_intel_flags(${KOKKOS_CXX_STANDARD}) + ELSE() + INCLUDE(${KOKKOS_SRC_PATH}/cmake/gnu.cmake) + kokkos_set_gnu_flags(${KOKKOS_CXX_STANDARD}) + ENDIF() + #check that the compiler accepts the C++ standard flag + INCLUDE(CheckCXXCompilerFlag) + IF (DEFINED CXX_STD_FLAGS_ACCEPTED) + UNSET(CXX_STD_FLAGS_ACCEPTED CACHE) + ENDIF() + CHECK_CXX_COMPILER_FLAG(${KOKKOS_CXX_STANDARD_FLAG} CXX_STD_FLAGS_ACCEPTED) + IF (NOT CXX_STD_FLAGS_ACCEPTED) + MESSAGE(FATAL_ERROR "${KOKKOS_CXX_COMPILER_ID} did not accept ${KOKKOS_CXX_STANDARD_FLAG}. You likely need to reduce the level of the C++ standard from ${KOKKOS_CXX_STANDARD}") + ELSE() + MESSAGE("Compiler features not supported, but ${KOKKOS_CXX_COMPILER_ID} accepts ${KOKKOS_CXX_STANDARD_FLAG}") + ENDIF() +ENDIF() + + + + diff --git a/cmake/kokkos_enable_options.cmake b/cmake/kokkos_enable_options.cmake new file mode 100644 index 00000000000..9b310f5b769 --- /dev/null +++ b/cmake/kokkos_enable_options.cmake @@ -0,0 +1,103 @@ +########################## NOTES ############################################### +# List the options for configuring kokkos using CMake method of doing it. +# These options then get mapped onto KOKKOS_SETTINGS environment variable by +# kokkos_settings.cmake. It is separate to allow other packages to override +# these variables (e.g., TriBITS). + +########################## AVAILABLE OPTIONS ################################### +# Use lists for documentation, verification, and programming convenience + +FUNCTION(KOKKOS_ENABLE_OPTION CAMEL_SUFFIX DEFAULT DOCSTRING) + kokkos_option(ENABLE_${CAMEL_SUFFIX} ${DEFAULT} BOOL ${DOCSTRING}) +ENDFUNCTION(KOKKOS_ENABLE_OPTION) + +KOKKOS_ENABLE_OPTION(TESTS OFF "Whether to build serial backend") +KOKKOS_ENABLE_OPTION(EXAMPLES OFF "Whether to build OpenMP backend") +KOKKOS_ENABLE_OPTION(Serial ON "Whether to build serial backend") +KOKKOS_ENABLE_OPTION(OpenMP OFF "Whether to build OpenMP backend") +KOKKOS_ENABLE_OPTION(Pthread OFF "Whether to build Pthread backend") +KOKKOS_ENABLE_OPTION(Cuda OFF "Whether to build CUDA backend") +KOKKOS_ENABLE_OPTION(ROCm OFF "Whether to build AMD ROCm backend") +KOKKOS_ENABLE_OPTION(HWLOC OFF "Whether to enable HWLOC features - may also require -DHWLOC_DIR") +KOKKOS_ENABLE_OPTION(MEMKIND OFF "Whether to enable MEMKIND featuers - may also require -DMEMKIND_DIR") +KOKKOS_ENABLE_OPTION(LIBRT OFF "Whether to enable LIBRT features") +KOKKOS_ENABLE_OPTION(Cuda_Relocatable_Device_Code OFF "Whether to enable relocatable device code (RDC) for CUDA") +KOKKOS_ENABLE_OPTION(Cuda_UVM OFF "Whether to enable unified virtual memory (UVM) for CUDA") +KOKKOS_ENABLE_OPTION(Cuda_LDG_Intrinsic OFF "Whether to use CUDA LDG intrinsics") +KOKKOS_ENABLE_OPTION(HPX_ASYNC_DISPATCH OFF "Whether HPX supports asynchronous dispath") +KOKKOS_ENABLE_OPTION(Debug OFF "Whether to activate extra debug features - may increase compile times") +KOKKOS_ENABLE_OPTION(Debug_DualView_Modify_Check OFF "Debug check on dual views") +KOKKOS_ENABLE_OPTION(Debug_Bounds_Check OFF "Whether to use bounds checking - will increase runtime") +KOKKOS_ENABLE_OPTION(Compiler_Warnings OFF "Whether to print all compiler warnings") +KOKKOS_ENABLE_OPTION(Profiling ON "Whether to create bindings for profiling tools") +KOKKOS_ENABLE_OPTION(Profiling_Load_Print OFF "Whether to print information about which profiling tools got loaded") +KOKKOS_ENABLE_OPTION(Aggressive_Vectorization OFF "Whether to aggressively vectorize loops") +KOKKOS_ENABLE_OPTION(Deprecated_Code OFF "Whether to enable deprecated code") +KOKKOS_ENABLE_OPTION(Explicit_Instantiation OFF + "Whether to explicitly instantiate certain types to lower future compile times") +SET(KOKKOS_ENABLE_ETI ${KOKKOS_ENABLE_EXPLICIT_INSTANTIATION} CACHE INTERNAL "eti") + +IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_QTHREAD) +SET(QTHR_DEFAULT ON) +ELSE() +SET(QTHR_DEFAULT OFF) +ENDIF() +KOKKOS_ENABLE_OPTION(Qthread ${QTHR_DEFAULT} + "Whether to build Qthreads backend - may also require -DQTHREAD_DIR") + +IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_HPX) +SET(HPX_DEFAULT ON) +ELSE() +SET(HPX_DEFAULT OFF) +ENDIF() +KOKKOS_ENABLE_OPTION(HPX ${HPX_DEFAULT} "Whether to build HPX backend - may also require -DHPX_DIR") + +IF(Trilinos_ENABLE_Kokkos AND Trilinos_ENABLE_OpenMP) + SET(OMP_DEFAULT ON) +ELSE() + SET(OMP_DEFAULT OFF) +ENDIF() +KOKKOS_ENABLE_OPTION(OpenMP ${OMP_DEFAULT} "Whether to build OpenMP backend") + +IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA) + SET(CUDA_DEFAULT ON) +ELSE() + SET(CUDA_DEFAULT OFF) +ENDIF() +KOKKOS_ENABLE_OPTION(Cuda ${CUDA_DEFAULT} "Whether to build CUDA backend") + +IF (DEFINED CUDA_VERSION AND CUDA_VERSION VERSION_GREATER "7.0") + SET(LAMBDA_DEFAULT ON) +ELSE() + SET(LAMBDA_DEFAULT OFF) +ENDIF() +KOKKOS_ENABLE_OPTION(Cuda_Lambda ${LAMBDA_DEFAULT} "Whether to activate experimental laambda features") + +IF(DEFINED Kokkos_ARCH) + MESSAGE(FATAL_ERROR "Defined Kokkos_ARCH, use KOKKOS_ARCH instead!") +ENDIF() +IF(DEFINED Kokkos_Arch) + MESSAGE(FATAL_ERROR "Defined Kokkos_Arch, use KOKKOS_ARCH instead!") +ENDIF() + + +#------------------------------------------------------------------------------- +#------------------------------- GENERAL OPTIONS ------------------------------- +#------------------------------------------------------------------------------- +# Whether to build separate libraries or now +SET(KOKKOS_SEPARATE_LIBS OFF CACHE BOOL "OFF = kokkos. ON = kokkoscore, kokkoscontainers, and kokkosalgorithms.") + +# Qthreads options. +SET(KOKKOS_QTHREADS_DIR "" CACHE PATH "Location of Qthreads library.") + +# HPX options. +SET(KOKKOS_HPX_DIR "" CACHE PATH "Location of HPX library.") + +# Whether to build separate libraries or now +SET(KOKKOS_SEPARATE_TESTS OFF CACHE BOOL "Provide unit test targets with finer granularity.") + +SET(KOKKOS_HWLOC_DIR "" CACHE PATH "Location of hwloc library. (kokkos tpl)") +SET(KOKKOS_MEMKIND_DIR "" CACHE PATH "Location of memkind library. (kokkos tpl)") +SET(KOKKOS_CUDA_DIR "" CACHE PATH "Location of CUDA library. Defaults to where nvcc installed.") + + diff --git a/cmake/kokkos_functions.cmake b/cmake/kokkos_functions.cmake index 96b88371319..e901bceaebf 100644 --- a/cmake/kokkos_functions.cmake +++ b/cmake/kokkos_functions.cmake @@ -2,183 +2,42 @@ # List of functions # set_kokkos_cxx_compiler # set_kokkos_cxx_standard -# set_kokkos_srcs - -#------------------------------------------------------------------------------- -# function(set_kokkos_cxx_compiler) -# Sets the following compiler variables that are analogous to the CMAKE_* -# versions. We add the ability to detect NVCC (really nvcc_wrapper). -# KOKKOS_CXX_COMPILER -# KOKKOS_CXX_COMPILER_ID -# KOKKOS_CXX_COMPILER_VERSION -# -# Inputs: -# KOKKOS_ENABLE_CUDA -# CMAKE_CXX_COMPILER -# CMAKE_CXX_COMPILER_ID -# CMAKE_CXX_COMPILER_VERSION -# -# Also verifies the compiler version meets the minimum required by Kokkos. -function(set_kokkos_cxx_compiler) - # Since CMake doesn't recognize the nvcc compiler until 3.8, we use our own - # version of the CMake variables and detect nvcc ourselves. Initially set to - # the CMake variable values. - set(INTERNAL_CXX_COMPILER ${CMAKE_CXX_COMPILER}) - set(INTERNAL_CXX_COMPILER_ID ${CMAKE_CXX_COMPILER_ID}) - set(INTERNAL_CXX_COMPILER_VERSION ${CMAKE_CXX_COMPILER_VERSION}) - - # Check if the compiler is nvcc (which really means nvcc_wrapper). - execute_process(COMMAND ${INTERNAL_CXX_COMPILER} --version - COMMAND grep nvcc - COMMAND wc -l - OUTPUT_VARIABLE INTERNAL_HAVE_COMPILER_NVCC - OUTPUT_STRIP_TRAILING_WHITESPACE) - - string(REGEX REPLACE "^ +" "" - INTERNAL_HAVE_COMPILER_NVCC ${INTERNAL_HAVE_COMPILER_NVCC}) - - if(INTERNAL_HAVE_COMPILER_NVCC) - # Set the compiler id to nvcc. We use the value used by CMake 3.8. - set(INTERNAL_CXX_COMPILER_ID NVIDIA) - - # Set nvcc's compiler version. - execute_process(COMMAND ${INTERNAL_CXX_COMPILER} --version - COMMAND grep release - OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION - OUTPUT_STRIP_TRAILING_WHITESPACE) - - string(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+$" - INTERNAL_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION}) - endif() - - # Enforce the minimum compilers supported by Kokkos. - set(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos. Required compiler versions:") - set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang 3.5.2 or higher") - set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 4.8.4 or higher") - set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 15.0.2 or higher") - set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC 7.0.28 or higher") - set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n PGI 17.1 or higher\n") - - if(INTERNAL_CXX_COMPILER_ID STREQUAL Clang) - if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 3.5.2) - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") - endif() - elseif(INTERNAL_CXX_COMPILER_ID STREQUAL GNU) - if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 4.8.4) - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") - endif() - elseif(INTERNAL_CXX_COMPILER_ID STREQUAL Intel) - if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 15.0.2) - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") - endif() - elseif(INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) - if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 7.0.28) - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") - endif() - elseif(INTERNAL_CXX_COMPILER_ID STREQUAL PGI) - if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 17.1) - message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") - endif() - endif() - - # Enforce that extensions are turned off for nvcc_wrapper. - if(INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) - if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL ON) - message(FATAL_ERROR "NVCC doesn't support C++ extensions. Set CMAKE_CXX_EXTENSIONS to OFF in your CMakeLists.txt.") - endif() - endif() - - if(KOKKOS_ENABLE_CUDA) - # Enforce that the compiler can compile CUDA code. - if(INTERNAL_CXX_COMPILER_ID STREQUAL Clang) - if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 4.0.0) - message(FATAL_ERROR "Compiling CUDA code directly with Clang requires version 4.0.0 or higher.") - endif() - elseif(NOT INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) - message(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang, but compiler ID was ${INTERNAL_CXX_COMPILER_ID}") - endif() - endif() - - set(KOKKOS_CXX_COMPILER ${INTERNAL_CXX_COMPILER} PARENT_SCOPE) - set(KOKKOS_CXX_COMPILER_ID ${INTERNAL_CXX_COMPILER_ID} PARENT_SCOPE) - set(KOKKOS_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION} PARENT_SCOPE) -endfunction() - -#------------------------------------------------------------------------------- -# function(set_kokkos_cxx_standard) -# Transitively enforces that the appropriate CXX standard compile flags (C++11 -# or above) are added to targets that use the Kokkos library. Compile features -# are used if possible. Otherwise, the appropriate flags are added to -# KOKKOS_CXX_FLAGS. Values set by the user to CMAKE_CXX_STANDARD and -# CMAKE_CXX_EXTENSIONS are honored. -# -# Outputs: -# KOKKOS_CXX11_FEATURES -# KOKKOS_CXX_FLAGS -# -# Inputs: -# KOKKOS_CXX_COMPILER -# KOKKOS_CXX_COMPILER_ID -# KOKKOS_CXX_COMPILER_VERSION -# -function(set_kokkos_cxx_standard) - # The following table lists the versions of CMake that supports CXX_STANDARD - # and the CXX compile features for different compilers. The versions are - # based on CMake documentation, looking at CMake code, and verifying by - # testing with specific CMake versions. - # - # COMPILER CXX_STANDARD Compile Features - # --------------------------------------------------------------- - # Clang 3.1 3.1 - # GNU 3.1 3.2 - # AppleClang 3.2 3.2 - # Intel 3.6 3.6 - # Cray No No - # PGI No No - # XL No No - # - # Kokkos now requires a minimum of CMake 3.8 - # For compiling CUDA code using nvcc_wrapper, we will use the host compiler's - # flags for turning on C++11. Since for compiler ID and versioning purposes - # CMake recognizes the host compiler when calling nvcc_wrapper, this just - # works. Both NVCC and nvcc_wrapper only recognize '-std=c++11' which means - # that we can only use host compilers for CUDA builds that use those flags. - # It also means that extensions (gnu++11) can't be turned on for CUDA builds. - +# kokkos_option + +FUNCTION(kokkos_option CAMEL_SUFFIX DEFAULT TYPE DOCSTRING) + SET(CAMEL_NAME Kokkos_${CAMEL_SUFFIX}) + STRING(TOUPPER ${CAMEL_NAME} UC_NAME) + SET(CACHE_NAME KOKKOS_CACHED_${UC_NAME}) + IF (NOT DEFINED ${CACHE_NAME} AND DEFINED ${CAMEL_NAME}) + #THIS IS our first time through the cmake + #WE WERE given the camel case name instead of the UC name we wanted + #MAKE DARn sure we don't have both an UC and Camel version that differ + IF (DEFINED ${UC_NAME} AND NOT ${CAMEL_NAME} STREQUAL ${UC_NAME}) + MESSAGE(FATAL_ERROR "Given both ${CAMEL_NAME} and ${UC_NAME} with different values: ${${CAMEL_NAME}} != ${${UC_NAME}}") + ENDIF() + #GREAT, No conflicts - use the camel case name as the default for the UC + SET(${UC_NAME} ${${CAMEL_NAME}} CACHE ${TYPE} ${DOCSTRING}) + ELSEIF(DEFINED ${CAMEL_NAME}) + #THIS IS at least our second configure and we have an existing cache + #CMAKE Makes this impossible to distinguish something already in cache + #AND SOMthing given explicitly on the command line + #AT THIS point, we have no choice but to accept the Camel value and print a warning + IF (NOT ${CAMEL_NAME} STREQUAL ${UC_NAME}) + MESSAGE(WARNING "Overriding ${UC_NAME}=${${UC_NAME}} with ${CAMEL_NAME}=${${CAMEL_NAME}}") + ENDIF() + #I HAVE to accept the Camel case value - really no choice here - force it + SET(${UC_NAME} ${${CAMEL_NAME}} CACHE ${TYPE} ${DOCSTRING} FORCE) + ELSE() #GReat, no camel case names - nice and simple + SET(${UC_NAME} ${DEFAULT} CACHE ${TYPE} ${DOCSTRING}) + ENDIF() + #STORE A Value in the cache to identify whether this is the 1st configure + SET(${CACHE_NAME} ${${UC_NAME}} CACHE ${TYPE} ${DOCSTRING} FORCE) - # Check if we can use compile features. - SET(VALID_FOR_FEATURES Clang GNU Intel AppleClang) - #always valid for certain compilers - IF(${KOKKOS_CXX_COMPILER_ID} IN_LIST VALID_FOR_FEATURES) - set(KOKKOS_CXX_STANDARD_IS_FEATURE ON CACHE INTERNAL - "Whether the compiler family supports target_compile_features") - return() + IF (${UC_NAME}) #cmake if statements follow really annoying string resolution rules + MESSAGE(STATUS "${UC_NAME}=${${UC_NAME}}") ENDIF() +ENDFUNCTION(kokkos_option) - set(KOKKOS_CXX_STANDARD_IS_FEATURE OFF CACHE INTERNAL - "Whether the compiler family supports target_compile_features") - if(CMAKE_CXX_COMPILER_ID STREQUAL Cray) - # CMAKE doesn't support CXX_STANDARD or C++ compile features for the Cray - # compiler. Set compiler options transitively here such that they trickle - # down to a call to target_compile_options(). - set(KOKKOS_CXX_STANDARD_FLAG "-hstd=c++11" CACHE INTERNAL - "The flags needed for the C++ standard, if not supported as feature") - elseif(CMAKE_CXX_COMPILER_ID STREQUAL PGI) - # CMAKE doesn't support CXX_STANDARD or C++ compile features for the PGI - # compiler. Set compiler options transitively here such that they trickle - # down to a call to target_compile_options(). - set(KOKKOS_CXX_STANDARD_FLAG "--c++11" CACHE INTERNAL - "The flags needed for the C++ standard, if not supported as feature") - elseif(CMAKE_CXX_COMPILER_ID STREQUAL XL) - # CMAKE doesn't support CXX_STANDARD or C++ compile features for the XL - # compiler. Set compiler options transitively here such that they trickle - # down to a call to target_compile_options(). - set(KOKKOS_CXX_STANDARD_FLAG "-std=c++11" CACHE INTERNAL - "The flags needed for the C++ standard, if not supported as feature") - else() - message(FATAL_ERROR "Got unknown compiler ${KOKKOS_COMPILER_ID}") - endif() -endfunction() diff --git a/cmake/kokkos_install.cmake b/cmake/kokkos_install.cmake index 6641ff1327a..9067b23d892 100644 --- a/cmake/kokkos_install.cmake +++ b/cmake/kokkos_install.cmake @@ -32,24 +32,11 @@ INSTALL (FILES DESTINATION ${KOKKOS_HEADER_DIR} ) -# Add all targets to the build-tree export set -#export(TARGETS ${Kokkos_LIBRARIES_NAMES} -# FILE "${Kokkos_BINARY_DIR}/KokkosTargets.cmake") - -# Export the package for use from the build-tree -# (this registers the build-tree with a global CMake-registry) -#export(PACKAGE Kokkos) - -# Create the KokkosConfig.cmake and KokkosConfigVersion files -file(RELATIVE_PATH REL_INCLUDE_DIR "${INSTALL_CMAKE_DIR}" - "${INSTALL_INCLUDE_DIR}") -# ... for the build tree -set(CONF_INCLUDE_DIRS "${Kokkos_SOURCE_DIR}" "${Kokkos_BINARY_DIR}") -include(CMakePackageConfigHelpers) -configure_package_config_file(cmake/KokkosConfig.cmake.in +INCLUDE(CMakePackageConfigHelpers) +CONFIGURE_PACKAGE_CONFIG_FILE(cmake/KokkosConfig.cmake.in "${Kokkos_BINARY_DIR}/KokkosConfig.cmake" INSTALL_DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/cmake) -write_basic_package_version_file("${Kokkos_BINARY_DIR}/KokkosConfigVersion.cmake" +WRITE_BASIC_PACKAGE_VERSION_FILE("${Kokkos_BINARY_DIR}/KokkosConfigVersion.cmake" VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}" COMPATIBILITY SameMajorVersion) @@ -63,7 +50,16 @@ FOREACH(DIR ${INSTALL_CMAKE_DIR}) DESTINATION ${DIR}) # Install the export set for use with the install-tree - INSTALL(EXPORT KokkosTargets DESTINATION ${DIR}) + INSTALL(EXPORT + KokkosDeprecatedTargets + DESTINATION ${DIR} + ) + + INSTALL(EXPORT + KokkosTargets + NAMESPACE Kokkos:: + DESTINATION ${DIR} + ) ENDFOREACH() # build and install pkgconfig file diff --git a/cmake/kokkos_options.cmake b/cmake/kokkos_options.cmake deleted file mode 100644 index f91e62674e7..00000000000 --- a/cmake/kokkos_options.cmake +++ /dev/null @@ -1,201 +0,0 @@ -########################## NOTES ############################################### -# List the options for configuring kokkos using CMake method of doing it. -# These options then get mapped onto KOKKOS_SETTINGS environment variable by -# kokkos_settings.cmake. It is separate to allow other packages to override -# these variables (e.g., TriBITS). - -########################## AVAILABLE OPTIONS ################################### -# Use lists for documentation, verification, and programming convenience - -function(KOKKOS_ENABLE_OPTION CAMEL_SUFFIX DEFAULT DOCSTRING) - set(CAMEL_NAME Kokkos_ENABLE_${CAMEL_SUFFIX}) - string(TOUPPER ${CAMEL_NAME} UC_NAME) - if (NOT DEFINED KOKKOS_CACHED_${UC_NAME} AND DEFINED ${CAMEL_NAME}) - #this is our first time through the cmake - #we were given the camel case name instead of the UC name we wanted - #make darn sure we don't have both an UC and Camel version that differ - if (DEFINED ${UC_NAME} AND NOT ${CAMEL_NAME} STREQUAL ${UC_NAME}) - message(FATAL_ERROR "Given both ${CAMEL_NAME} and ${UC_NAME} with different values") - endif() - #great, no conflicts - use the camel case name as the default for the UC - set(${UC_NAME} ${${CAMEL_NAME}} CACHE BOOL ${DOCSTRING}) - elseif(DEFINED ${CAMEL_NAME}) - #this is at least our second configure and we have an existing cache - #CMake makes this impossible to distinguish something already in cache - #and somthing given explicitly on the command line - #at this point, we have no choice but to accept the Camel value and print a warning - if (NOT ${CAMEL_NAME} STREQUAL ${UC_NAME}) - message(WARNING "Overriding ${UC_NAME}=${${UC_NAME}} with ${CAMEL_NAME}=${${CAMEL_NAME}}") - endif() - #I have to accept the Camel case value - really no choice here - force it - set(${UC_NAME} ${${CAMEL_NAME}} CACHE BOOL ${DOCSTRING} FORCE) - else() #great, no camel case names - nice and simple - set(${UC_NAME} ${DEFAULT} CACHE BOOL ${DOCSTRING}) - endif() - set(KOKKO_CACHED_${UC_NAME} ${${UC_NAME}} CACHE INTERNAL ${DOCSTRING}) - - if (${UC_NAME}) #cmake if statements follow really annoying string resolution rules - message(STATUS "${UC_NAME}") - endif() -endfunction() - -KOKKOS_ENABLE_OPTION(TESTS OFF "Whether to build serial backend") -KOKKOS_ENABLE_OPTION(EXAMPLES OFF "Whether to build OpenMP backend") -KOKKOS_ENABLE_OPTION(Serial ON "Whether to build serial backend") -KOKKOS_ENABLE_OPTION(OpenMP OFF "Whether to build OpenMP backend") -KOKKOS_ENABLE_OPTION(Pthread OFF "Whether to build Pthread backend") -KOKKOS_ENABLE_OPTION(Cuda OFF "Whether to build CUDA backend") -KOKKOS_ENABLE_OPTION(ROCm OFF "Whether to build AMD ROCm backend") -KOKKOS_ENABLE_OPTION(HWLOC OFF "Whether to enable HWLOC features - may also require -DHWLOC_DIR") -KOKKOS_ENABLE_OPTION(MEMKIND OFF "Whether to enable MEMKIND featuers - may also require -DMEMKIND_DIR") -KOKKOS_ENABLE_OPTION(LIBRT OFF "Whether to enable LIBRT features") -KOKKOS_ENABLE_OPTION(Cuda_Relocatable_Device_Code OFF "Whether to enable relocatable device code (RDC) for CUDA") -KOKKOS_ENABLE_OPTION(Cuda_UVM OFF "Whether to enable unified virtual memory (UVM) for CUDA") -KOKKOS_ENABLE_OPTION(Cuda_LDG_Intrinsic OFF "Whether to use CUDA LDG intrinsics") -KOKKOS_ENABLE_OPTION(HPX_ASYNC_DISPATCH OFF "Whether HPX supports asynchronous dispath") -KOKKOS_ENABLE_OPTION(Debug OFF "Whether to activate extra debug features - may increase compile times") -KOKKOS_ENABLE_OPTION(Debug_DualView_Modify_Check OFF "Debug check on dual views") -KOKKOS_ENABLE_OPTION(Debug_Bounds_Check OFF "Whether to use bounds checking - will increase runtime") -KOKKOS_ENABLE_OPTION(Compiler_Warnings OFF "Whether to print all compiler warnings") -KOKKOS_ENABLE_OPTION(Profiling ON "Whether to create bindings for profiling tools") -KOKKOS_ENABLE_OPTION(Profiling_Load_Print OFF "Whether to print information about which profiling tools got loaded") -KOKKOS_ENABLE_OPTION(Aggressive_Vectorization OFF "Whether to aggressively vectorize loops") -KOKKOS_ENABLE_OPTION(Deprecated_Code OFF "Whether to enable deprecated code") -KOKKOS_ENABLE_OPTION(Explicit_Instantiation OFF - "Whether to explicitly instantiate certain types to lower future compile times") -SET(KOKKOS_ENABLE_ETI ${KOKKOS_ENABLE_EXPLICIT_INSTANTIATION} CACHE INTERNAL "eti") - -IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_QTHREAD) -SET(QTHR_DEFAULT ON) -ELSE() -SET(QTHR_DEFAULT OFF) -ENDIF() -KOKKOS_ENABLE_OPTION(Qthread ${QTHR_DEFAULT} - "Whether to build Qthreads backend - may also require -DQTHREAD_DIR") - -IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_HPX) -SET(HPX_DEFAULT ON) -ELSE() -SET(HPX_DEFAULT OFF) -ENDIF() -KOKKOS_ENABLE_OPTION(HPX ${HPX_DEFAULT} "Whether to build HPX backend - may also require -DHPX_DIR") - -IF(Trilinos_ENABLE_Kokkos AND Trilinos_ENABLE_OpenMP) - SET(OMP_DEFAULT ON) -ELSE() - SET(OMP_DEFAULT OFF) -ENDIF() -KOKKOS_ENABLE_OPTION(OpenMP ${OMP_DEFAULT} "Whether to build OpenMP backend") - -IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA) - SET(CUDA_DEFAULT ON) -ELSE() - SET(CUDA_DEFAULT OFF) -ENDIF() -KOKKOS_ENABLE_OPTION(Cuda ${CUDA_DEFAULT} "Whether to build CUDA backend") - -IF (DEFINED CUDA_VERSION AND CUDA_VERSION VERSION_GREATER "7.0") - SET(LAMBDA_DEFAULT ON) -ELSE() - SET(LAMBDA_DEFAULT OFF) -ENDIF() -KOKKOS_ENABLE_OPTION(Cuda_Lambda ${LAMBDA_DEFAULT} "Whether to activate experimental laambda features") - -IF(DEFINED Kokkos_ARCH) - MESSAGE(FATAL_ERROR "Defined Kokkos_ARCH, use KOKKOS_ARCH instead!") -ENDIF() -IF(DEFINED Kokkos_Arch) - MESSAGE(FATAL_ERROR "Defined Kokkos_Arch, use KOKKOS_ARCH instead!") -ENDIF() - -#------------------------------------------------------------------------------- -# List of possible host architectures. -#------------------------------------------------------------------------------- -set(KOKKOS_ARCH_LIST) -list(APPEND KOKKOS_ARCH_LIST - None # No architecture optimization - AMDAVX # (HOST) AMD chip - ARMv80 # (HOST) ARMv8.0 Compatible CPU - ARMv81 # (HOST) ARMv8.1 Compatible CPU - ARMv8_ThunderX # (HOST) ARMv8 Cavium ThunderX CPU - ARMv8_TX2 # (HOST) ARMv8 Cavium ThunderX2 CPU - WSM # (HOST) Intel Westmere CPU - SNB # (HOST) Intel Sandy/Ivy Bridge CPUs - HSW # (HOST) Intel Haswell CPUs - BDW # (HOST) Intel Broadwell Xeon E-class CPUs - SKX # (HOST) Intel Sky Lake Xeon E-class HPC CPUs (AVX512) - KNC # (HOST) Intel Knights Corner Xeon Phi - KNL # (HOST) Intel Knights Landing Xeon Phi - BGQ # (HOST) IBM Blue Gene Q - Power7 # (HOST) IBM POWER7 CPUs - Power8 # (HOST) IBM POWER8 CPUs - Power9 # (HOST) IBM POWER9 CPUs - Kepler # (GPU) NVIDIA Kepler default (generation CC 3.5) - Kepler30 # (GPU) NVIDIA Kepler generation CC 3.0 - Kepler32 # (GPU) NVIDIA Kepler generation CC 3.2 - Kepler35 # (GPU) NVIDIA Kepler generation CC 3.5 - Kepler37 # (GPU) NVIDIA Kepler generation CC 3.7 - Maxwell # (GPU) NVIDIA Maxwell default (generation CC 5.0) - Maxwell50 # (GPU) NVIDIA Maxwell generation CC 5.0 - Maxwell52 # (GPU) NVIDIA Maxwell generation CC 5.2 - Maxwell53 # (GPU) NVIDIA Maxwell generation CC 5.3 - Pascal60 # (GPU) NVIDIA Pascal generation CC 6.0 - Pascal61 # (GPU) NVIDIA Pascal generation CC 6.1 - Volta70 # (GPU) NVIDIA Volta generation CC 7.0 - Volta72 # (GPU) NVIDIA Volta generation CC 7.2 - Turing75 # (GPU) NVIDIA Turing generation CC 7.5 - Ryzen - Epyc - Kaveri - Carrizo - Fiji - Vega - GFX901 - ) - - -FOREACH(Arch ${KOKKOS_ARCH_LIST}) - STRING(TOUPPER ${Arch} ARCH) - SET(KOKKOS_ARCH_${ARCH} OFF CACHE BOOL "Whether to optimize for the ${ARCH} architecture") -ENDFOREACH() - -set(tmpr "\n ") -string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_ARCH_DOCSTR "${KOKKOS_ARCH_LIST}") -set(KOKKOS_INTERNAL_ARCH_DOCSTR "${tmpr}${KOKKOS_INTERNAL_ARCH_DOCSTR}") -# This would be useful, but we use Foo_ENABLE mechanisms -#string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_DEVICES_DOCSTR "${KOKKOS_DEVICES_LIST}") -#string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_USE_TPLS_DOCSTR "${KOKKOS_USE_TPLS_LIST}") -#string(REPLACE ";" ${tmpr} KOKKOS_INTERNAL_CUDA_OPTIONS_DOCSTR "${KOKKOS_CUDA_OPTIONS_LIST}") - -#------------------------------------------------------------------------------- -#------------------------------- GENERAL OPTIONS ------------------------------- -#------------------------------------------------------------------------------- - -# Setting this variable to a value other than "None" can improve host -# performance by turning on architecture specific code. -# NOT SET is used to determine if the option is passed in. It is reset to -# default "None" down below. -set(KOKKOS_ARCH "NOT_SET" CACHE STRING - "Optimize for specific host architecture. Options are: ${KOKKOS_INTERNAL_ARCH_DOCSTR}") - -# Whether to build separate libraries or now -set(KOKKOS_SEPARATE_LIBS OFF CACHE BOOL "OFF = kokkos. ON = kokkoscore, kokkoscontainers, and kokkosalgorithms.") - -# Qthreads options. -set(KOKKOS_QTHREADS_DIR "" CACHE PATH "Location of Qthreads library.") - -# HPX options. -set(KOKKOS_HPX_DIR "" CACHE PATH "Location of HPX library.") - -# Whether to build separate libraries or now -set(KOKKOS_SEPARATE_TESTS OFF CACHE BOOL "Provide unit test targets with finer granularity.") - -set(KOKKOS_HWLOC_DIR "" CACHE PATH "Location of hwloc library. (kokkos tpl)") -set(KOKKOS_MEMKIND_DIR "" CACHE PATH "Location of memkind library. (kokkos tpl)") -set(KOKKOS_CUDA_DIR "" CACHE PATH "Location of CUDA library. Defaults to where nvcc installed.") - -# Make sure KOKKOS_ARCH is set to something -IF ("${KOKKOS_ARCH}" STREQUAL "NOT_SET") - set(KOKKOS_ARCH "None") -ENDIF() - diff --git a/cmake/kokkos_settings.cmake b/cmake/kokkos_settings.cmake deleted file mode 100644 index 97d7428335a..00000000000 --- a/cmake/kokkos_settings.cmake +++ /dev/null @@ -1,455 +0,0 @@ -########################## NOTES ############################################### -# This files goal is to take CMake options found in kokkos_options.cmake but -# possibly set from elsewhere -# (see: trilinos/cmake/ProjectCOmpilerPostConfig.cmake) -# using CMake idioms and map them onto the KOKKOS_SETTINGS variables that gets -# passed to the kokkos makefile configuration: -# make -f ${CMAKE_SOURCE_DIR}/core/src/Makefile ${KOKKOS_SETTINGS} build-makefile-cmake-kokkos -# that generates KokkosCore_config.h and kokkos_generated_settings.cmake -# To understand how to form KOKKOS_SETTINGS, see -# /Makefile.kokkos - -#------------------------------------------------------------------------------- -#------------------------------- GENERAL OPTIONS ------------------------------- -#------------------------------------------------------------------------------- - - -# Ensure that KOKKOS_ARCH is in the ARCH_LIST -if (KOKKOS_ARCH MATCHES ",") - message(WARNING "-- Detected a comma in: KOKKOS_ARCH=`${KOKKOS_ARCH}`") - message("-- Although we prefer KOKKOS_ARCH to be semicolon-delimited, we do allow") - message("-- comma-delimited values for compatibility with scripts (see github.com/trilinos/Trilinos/issues/2330)") - string(REPLACE "," ";" KOKKOS_ARCH "${KOKKOS_ARCH}") - message("-- Commas were changed to semicolons, now KOKKOS_ARCH=`${KOKKOS_ARCH}`") -endif() - -if (KOKKOS_ARCH MATCHES "-") - string(REPLACE "-" "_" KOKKOS_ARCH "${KOKKOS_ARCH}") -endif() - -foreach(Arch ${KOKKOS_ARCH}) - string(TOUPPER ${Arch} ARCH) - #force on all the architectures in the list - SET(KOKKOS_ARCH_${ARCH} ON CACHE BOOL "optimize for architecture ${Arch}" FORCE) - IF (NOT ${Arch} IN_LIST KOKKOS_ARCH_LIST) - message(FATAL_ERROR "`${arch}` is not an accepted value in KOKKOS_ARCH=`${KOKKOS_ARCH}`." - " Please pick from these choices: ${KOKKOS_INTERNAL_ARCH_DOCSTR}") - ENDIF() -endforeach() - -if(KOKKOS_ENABLE_COMPILER_WARNINGS) - IF (KOKKOS_COMPILER_ID STREQUAL PGI) - #add nothing for PGI - ELSE() - LIST(APPEND KOKKOS_COMPILE_OPTIONS - "-Wall" "-Wshadow" "-pedantic" - "-Wsign-compare" "-Wtype-limits" "-Wuninitialized") - ENDIF() - - #add some extra for GNU - IF (KOKKOS_COMPILER_ID STREQUAL GNU) - LIST(APPEND KOKKOS_COMPILE_OPTIONS - "-Wempty-body" - "-Wclobbered" - "-Wignored-qualifiers") - ENDIF() -endif() - - -#------------------------------- KOKKOS_CUDA_OPTIONS --------------------------- -# Construct the Makefile options -if (KOKKOS_ENABLE_CUDA_LAMBDA) - list(APPEND KOKKOS_CUDA_OPTIONS "-expt-extended-lambda") -endif() - -if (KOKKOS_COMPILER_ID STREQUAL Clang) - set(CUDA_ARCH_FLAG "--cuda-gpu-arch") - list(APPEND KOKKOS_CUDA_OPTIONS "-x cuda") -elseif(KOKKOS_COMPILER_ID STREQUAL NVIDIA) - set(CUDA_ARCH_FLAG "-arch") -endif() - -IF (KOKKOS_COMPILER_ID STREQUAL NVIDIA) - IF (KOKKOS_ENABLE_DEBUG OR CMAKE_BUILD_TYPE STREQUAL "DEBUG") - LIST(APPEND KOKKOS_CUDA_OPTIONS -lineinfo) - ENDIF() - IF (KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER 9.0 OR KOKKOS_CXX_COMPILER_VERSION VERSION_EQUAL 9.0) - LIST(APPEND KOKKOS_CUDAFE_OPTIONS --diag_suppress=esa_on_defaulted_function_ignored) - ENDIF() -ENDIF() - -IF(KOKKOS_ENABLE_OPENMP) - IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang OR KOKKOS_CXX_COMPILER_ID STREQUAL AppleClang) - LIST(APPEND KOKKOS_COMPILE_OPTIONS -fopenmp=libomp) - LIST(APPEND KOKKOS_LINK_OPTIONS -fopenmp=libomp) - ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - LIST(APPEND KOKKOS_COMPILE_OPTIONS -mp) - LIST(APPEND KOKKOS_LINK_OPTIONS -mp) - ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL XL) - LIST(APPEND KOKKOS_COMPILE_OPTIONS -qsmp=omp) - LIST(APPEND KOKKOS_LINK_OPTIONS -qsmp=omp) - ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) - #on by default - ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) - LIST(APPEND KOKKOS_XCOMPILER_OPTIONS -fopenmp) - LIST(APPEND KOKKOS_LINK_OPTIONS -fopenmp) - ELSE() - LIST(APPEND KOKKOS_COMPILE_OPTIONS -fopenmp) - LIST(APPEND KOKKOS_LINK_OPTIONS -fopenmp) - ENDIF() -ENDIF() - -IF (KOKKOS_ENABLE_ARCH_ARMV81) - IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL Cray AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - LIST(APPEND KOKKOS_COMPILER_OPTIONS -march=armv8.1-a) - LIST(APPEND KOKKOS_COMPILER_OPTIONS -march=armv8.1-a) - ENDIF() -ENDIF() - -IF (KOKKOS_ENABLE_ARCH_ARMV8_THUNDERX) - SET(KOKKOS_ARCH_ARMV80 ON CACHE BOOL "enable armv80" FORCE) - IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL Cray AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - LIST(APPEND KOKKOS_COMPILER_OPTIONS -march=armv8-a -mtune=thunderx) - LIST(APPEND KOKKOS_COMPILER_OPTIONS -march=armv8-a -mtune=thunderx) - ENDIF() -ENDIF() - -IF (KOKKOS_ENABLE_ARCH_ARMV8_THUNDERX2) - SET(KOKKOS_ARCH_ARMV81 ON CACHE BOOL "enable armv80" FORCE) - IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL Cray AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - LIST(APPEND KOKKOS_COMPILER_OPTIONS -mtune=thunderx2t99 -mcpu=thunderx2t99) - LIST(APPEND KOKKOS_COMPILER_OPTIONS -mtune=thunderx2t99 -mcup=thunderx2t99) - ENDIF() -ENDIF() - -IF (KOKKOS_ENABLE_ARCH_EPYC) - IF (KOKKOS_CXX_COMPILER_ID STREQUAL Intel) - LIST(APPEND KOKKOS_COMPILER_OPTIONS -mavx2) - LIST(APPEND KOKKOS_LINK_OPTIONS -mavx2) - ELSE() - LIST(APPEND KOKKOS_COMPILER_OPTIONS -march=znver1 -mtune=znver1) - LIST(APPEND KOKKOS_LINK_OPTIONS -march=znver1 -mtune=znver1) - ENDIF() -ENDIF() - -IF (KOKKOS_ENABLE_ARCH_WSM) - #sse42 - IF (KOKKOS_CXX_COMPILER_ID STREQUAL Intel) - LIST(APPEND KOKKOS_COMPILER_OPTIONS -xSSE4.2) - LIST(APPEND KOKKOS_LINK_OPTIONS -xSSE4.2) - ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - LIST(APPEND KOKKOS_COMPILER_OPTIONS -tp=nehalem) - LIST(APPEND KOKKOS_LINK_OPTIONS -tp=nehalem) - ELSEIF(KOKKOX_CXX_COMPILER_ID STREQUAL Cray) - #nothing - ELSE() - #assume gcc flags - LIST(APPEND KOKKOS_COMPILER_OPTIONS -msse4.2) - LIST(APPEND KOKKOS_LINK_OPTIONS -msse4.2) - ENDIF() -ENDIF() - -IF (KOKKOS_ENABLE_ARCH_SNB OR KOKKOS_ENABLE_ARCH_AMDAVX) - #avx - IF (KOKKOS_CXX_COMPILER_ID STREQUAL Intel) - LIST(APPEND KOKKOS_COMPILER_OPTIONS -mavx) - LIST(APPEND KOKKOS_LINK_OPTIONS -mavx) - ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - LIST(APPEND KOKKOS_COMPILER_OPTIONS -tp=sandybridge) - LIST(APPEND KOKKOS_LINK_OPTIONS -tp=sandybridge) - ELSEIF(KOKKOX_CXX_COMPILER_ID STREQUAL Cray) - #nothing - ELSE() - #assume gcc flags - LIST(APPEND KOKKOS_COMPILER_OPTIONS -mavx) - LIST(APPEND KOKKOS_LINK_OPTIONS -mavx) - ENDIF() -ENDIF() - -IF (KOKKOS_ARCH_HSW OR KOKKOS_ARCH_BDW) - SET(KOKKOS_ARCH_AVX2 ON CACHE BOOL "enable avx2" FORCE) - IF (KOKKOS_CXX_COMPILER_ID STREQUAL Intel) - LIST(APPEND KOKKOS_COMPILER_OPTIONS -xCORE-AVX2) - LIST(APPEND KOKKOS_LINK_OPTIONS -xCORE-AVX2) - ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - LIST(APPEND KOKKOS_COMPILER_OPTIONS -tp=haswell) - LIST(APPEND KOKKOS_LINK_OPTIONS -tp=haswell) - ELSEIF(KOKKOX_CXX_COMPILER_ID STREQUAL Cray) - #nothing - ELSE() - #assume gcc flags - LIST(APPEND KOKKOS_COMPILER_OPTIONS -march=core-avx2 -mtune=core-avx2) - LIST(APPEND KOKKOS_LINK_OPTIONS -march=core-avx2 -mtune=core-avx2) - IF (KOKKOS_ARCH_BDW) - LIST(APPEND KOKKOS_COMPILER_OPTIONS -mrtm) - LIST(APPEND KOKKOS_LINK_OPTIONS -mrtm) - ENDIF() - ENDIF() -ENDIF() - -IF (KOKKOS_ARCH_KNL) - #avx512-mic - SET(KOKKOS_ARCH_AVX512MIC ON CACHE BOOL "enable avx-512 MIC" FORCE) - IF (KOKKOS_CXX_COMPILER_ID STREQUAL Intel) - LIST(APPEND KOKKOS_COMPILER_OPTIONS -xMIC-AVX512) - LIST(APPEND KOKKOS_LINK_OPTIONS -xMIC-AVX512) - ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - #nothing - ELSEIF(KOKKOX_CXX_COMPILER_ID STREQUAL Cray) - #nothing - ELSE() - #assume gcc flags - LIST(APPEND KOKKOS_COMPILER_OPTIONS -march=knl -mtune=knl) - LIST(APPEND KOKKOS_LINK_OPTIONS -march=knl -mtune=knl) - ENDIF() -ENDIF() - -IF (KOKKOS_ARCH_SKX) - #avx512-xeon - SET(KOKKOS_ARCH_AVX512XEON ON CACHE BOOL "enable avx-512 Xeon" FORCE) - IF (KOKKOS_CXX_COMPILER_ID STREQUAL Intel) - LIST(APPEND KOKKOS_COMPILER_OPTIONS -xCORE-AVX512) - LIST(APPEND KOKKOS_LINK_OPTIONS -xCORE-AVX512) - ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - #nothing - ELSEIF(KOKKOX_CXX_COMPILER_ID STREQUAL Cray) - #nothing - ELSE() - #assume gcc flags - LIST(APPEND KOKKOS_COMPILER_OPTIONS -march=skylake-avx512 -mtune=skylake-avx512 -mrtm) - LIST(APPEND KOKKOS_LINK_OPTIONS -march=skylake-avx512 -mtune=skylake-avx512 -mrtm) - ENDIF() -ENDIF() - -IF (KOKKOS_ARCH_POWER7) - IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL PGI) - LIST(APPEND KOKKOS_COMPILE_OPTIONS KOKKOS_CXXFLAGS += -mcpu=power7 -mtune=power7) - LIST(APPEND KOKKOS_LINK_OPTIONS KOKKOS_CXXFLAGS += -mcpu=power7 -mtune=power7) - ENDIF() -ENDIF() - -IF (KOKKOS_ARCH_POWER8) - IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL PGI AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) - LIST(APPEND KOKKOS_COMPILE_OPTIONS KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8) - LIST(APPEND KOKKOS_LINK_OPTIONS KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8) - ENDIF() -ENDIF() - -IF (KOKKOS_ARCH_POWER9) - IF (NOT KOKKOS_CXX_COMPILER_ID STREQUAL PGI AND NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) - LIST(APPEND KOKKOS_COMPILE_OPTIONS KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9) - LIST(APPEND KOKKOS_LINK_OPTIONS KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9) - ENDIF() -ENDIF() - - -IF (KOKKOS_ENABLE_ARCH_KAVERI) - SET(KOKKOS_ARCH_ROCM 701 CACHE STRING "rocm arch" FORCE) -ENDIF() - -IF (KOKKOS_ENABLE_ARCH_CARRIZO) - SET(KOKKOS_ARCH_ROCM 801 CACHE STRING "rocm arch" FORCE) -ENDIF() - -IF (KOKKOS_ARCH_FIJI) - SET(KOKKOS_ARCH_ROCM 803 CACHE STRING "rocm arch" FORCE) -ENDIF() - -IF (KOKKOS_ARCH_VEGA) - SET(KOKKOS_ARCH_ROCM 900 CACHE STRING "rocm arch" FORCE) -ENDIF() - -IF (KOKKOS_ARCH_GFX901) - SET(KOKKOS_ARCH_ROCM 901 CACHE STRING "rocm arch" FORCE) -ENDIF() - -IF (KOKKOS_ARCH_RYZEN) -ENDIF() - - - -IF (KOKKOS_ENABLE_ARCH_GFX901) -ENDIF() - - -IF (KOKKOS_ENABLE_CUDA_RELOCATED_DEVICE_CODE) - IF (KOKKOS_COMPILER_ID STREQUAL Clang) - LIST(APPEND KOKKOS_CUDA_OPTIONS -fcuda-rdc) - ELSEIF (KOKKOS_COMPILER_ID STREQUAL NVIDIA) - LIST(APPEND KOKKOS_CUDA_OPTIONS --relocatable-device-code=true) - ENDIF() -ENDIF() - - -IF (KOKKOS_ARCH_KEPLER30) - LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAGS}=sm_30") -ENDIF() - -IF (KOKKOS_ARCH_KEPLER32) - LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_32") -ENDIF() - -IF (KOKKOS_ARCH_KEPLER35) - LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_35") -ENDIF() - -IF (KOKKOS_ARCH_KEPLER35) - LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_37") -ENDIF() - -IF (KOKKOS_ARCH_MAXWELL50) - LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_50") -ENDIF() - -IF (KOKKOS_ARCH_MAXWELL52) - LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_52") -ENDIF() - -IF (KOKKOS_ARCH_MAXWELL53) - LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_53") -ENDIF() - -IF (KOKKOS_ARCH_PASCAL60) - LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_60") -ENDIF() - -IF (KOKKOS_ARCH_PASCAL61) - LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_61") -ENDIF() - -IF (KOKKOS_ARCH_VOLTA70) - LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_70") -ENDIF() - -IF (KOKKOS_ARCH_VOLTA72) - LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_72") -ENDIF() - -IF (KOKKOS_ARCH_TURING75) - LIST(APPEND KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_75") -ENDIF() - -SET(KOKKOS_CXX_STANDARD "" CACHE STRING "The C++ standard for Kokkos to use: c++11, c++14, or c++17") -SET(KOKKOS_CXX_FEATURES "" CACHE STRING "The list of C++ features for Kokkos to enable") -SET(CXX_STANDARD_TEST) - -IF (NOT KOKKOS_CXX_STANDARD AND NOT CMAKE_CXX_STANDARD) - SET(KOKKOS_CXX_STANDARD "11") -ELSEIF(NOT KOKKOS_CXX_STANDARD) - SET(KOKKOS_CXX_STANDARD ${CMAKE_CXX_STANDARD}) -ENDIF() - -IF (KOKKOS_CXX_STANDARD) - IF (${KOKKOS_CXX_STANDARD} STREQUAL "c++98") - MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++98'. Use '98' instead.") - SET(KOKKOS_CXX_STANDARD "98") - ELSEIF (${KOKKOS_CXX_STANDARD} STREQUAL "c++11") - MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++11'. Use '11' instead.") - SET(KOKKOS_CXX_STANDARD "11") - ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++14") - MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++14'. Use '14' instead.") - SET(KOKKOS_CXX_STANDARD "14") - ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "c++17") - MESSAGE(WARNING "Deprecated Kokkos C++ standard set as 'c++17'. Use '17' instead.") - SET(KOKKOS_CXX_STANDARD "17") - ENDIF() -ENDIF() - -IF (KOKKOS_CXX_STANDARD AND CMAKE_CXX_STANDARD) - #make sure these are consistent - IF (NOT KOKKOS_CXX_STANDARD STREQUAL CMAKE_CXX_STANDARD) - MESSAGE(FATAL_ERROR "Specified both CMAKE_CXX_STANDARD and KOKKOS_CXX_STANDARD, but they don't match") - ENDIF() -ENDIF() - -IF (${KOKKOS_CXX_STANDARD} STREQUAL "11") - LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_11) -ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "14") - LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_14) -ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "17") - LIST(APPEND KOKKOS_CXX_FEATURES cxx_std_17) -ELSEIF(${KOKKOS_CXX_STANDARD} STREQUAL "98") - MESSAGE(FATAL_ERROR "Kokkos requires C++11 or newer!") -ELSEIF(${KOKKOX_CXX_STANDARD} STREQUAL "1Y") -ELSEIF(${KOKKOX_CXX_STANDARD} STREQUAL "1Z") -ELSEIF(${KOKKOX_CXX_STANDARD} STREQUAL "2A") -ENDIF() - - - - - - -############################ PRINT CONFIGURE STATUS ############################ - -if(KOKKOS_CMAKE_VERBOSE) - message(STATUS "") - message(STATUS "****************** Kokkos Settings ******************") - message(STATUS "Execution Spaces") - - if(KOKKOS_ENABLE_CUDA) - message(STATUS " Device Parallel: Cuda") - else() - message(STATUS " Device Parallel: None") - endif() - - if(KOKKOS_ENABLE_OPENMP) - message(STATUS " Host Parallel: OpenMP") - elseif(KOKKOS_ENABLE_PTHREAD) - message(STATUS " Host Parallel: Pthread") - elseif(KOKKOS_ENABLE_QTHREADS) - message(STATUS " Host Parallel: Qthreads") - elseif(KOKKOS_ENABLE_HPX) - message(STATUS " Host Parallel: HPX") - else() - message(STATUS " Host Parallel: None") - endif() - - if(KOKKOS_ENABLE_SERIAL) - message(STATUS " Host Serial: Serial") - else() - message(STATUS " Host Serial: None") - endif() - - message(STATUS "") - message(STATUS "Architectures:") - foreach(Arch ${KOKKOS_ARCH_LIST}) - string(TOUPPER ${Arch} ARCH) - if (KOKKOS_ENABLE_${ARCH}) - message(STATUS " ${Arch}") - endif() - endforeach() - - message(STATUS "") - message(STATUS "Enabled options:") - - if(KOKKOS_SEPARATE_LIBS) - message(STATUS " KOKKOS_SEPARATE_LIBS") - endif() - - if(KOKKOS_ENABLE_CUDA) - if(KOKKOS_CUDA_DIR) - message(STATUS " KOKKOS_CUDA_DIR: ${KOKKOS_CUDA_DIR}") - endif() - endif() - - if(KOKKOS_QTHREADS_DIR) - message(STATUS " KOKKOS_QTHREADS_DIR: ${KOKKOS_QTHREADS_DIR}") - endif() - - if(KOKKOS_HWLOC_DIR) - message(STATUS " KOKKOS_HWLOC_DIR: ${KOKKOS_HWLOC_DIR}") - endif() - - if(KOKKOS_MEMKIND_DIR) - message(STATUS " KOKKOS_MEMKIND_DIR: ${KOKKOS_MEMKIND_DIR}") - endif() - - if(KOKKOS_HPX_DIR) - message(STATUS " KOKKOS_HPX_DIR: ${KOKKOS_HPX_DIR}") - endif() - - message(STATUS "") - message(STATUS "*****************************************************") - message(STATUS "") -endif() diff --git a/cmake/kokkos_tpls.cmake b/cmake/kokkos_tpls.cmake new file mode 100644 index 00000000000..c228c9600ad --- /dev/null +++ b/cmake/kokkos_tpls.cmake @@ -0,0 +1,23 @@ +IF (KOKKOS_ENABLE_HPX) + FIND_PACKAGE(HPX REQUIRED) + MESSAGE(STATUS "KOKKOS_ENABLE_HPX: ${HPX_DIR}") +ENDIF() + +IF (KOKKOS_ENABLE_HWLOC) + IF (KOKKOS_HWLOC_DIR) + FIND_PACKAGE(HWLOC REQUIRED HINTS ${KOKKOS_HWLOC_DIR}) + ELSE() + FIND_PACKAGE(HWLOC REQUIRED) + ENDIF() + MESSAGE(STATUS "KOKKOS_ENABLE_HWLOC: ${HWLOC_DIR}") +ENDIF() + +IF (KOKKOS_ENABLE_MEMKIND) + IF (KOKKOS_MEMKIND_DIR) + FIND_PACKAGE(MEMKIND REQUIRED HINTS ${KOKKOS_ENABLE_MEMKIND}) + ELSE() + FIND_PACKAGE(MEMKIND REQUIRED) + ENDIF() + MESSAGE(STATUS "KOKKOS_ENABLE_MEMKIND: ${MEMKIND_DIR}") +ENDIF() + diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index 5384671a1f8..60d8a871d7a 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -91,6 +91,9 @@ MACRO(KOKKOS_PACKAGE_DEF) ENDMACRO(KOKKOS_PACKAGE_DEF) MACRO(KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL LIBRARY_NAME) + KOKKOS_LIB_TYPE(${LIBRARY_NAME} INCTYPE) + TARGET_INCLUDE_DIRECTORIES(${LIBRARY_NAME} ${INCTYPE} $) + INSTALL( TARGETS ${LIBRARY_NAME} EXPORT ${PROJECT_NAME} @@ -108,12 +111,14 @@ MACRO(KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL LIBRARY_NAME) ARCHIVE DESTINATION lib ) - #Can we please add a remove duplicates to property append - GET_PROPERTY(LIBLIST GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) - LIST(APPEND LIBLIST ${LIBRARY_NAME}) - LIST(REMOVE_DUPLICATES LIBLIST) - SET_PROPERTY(GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES ${LIBLIST}) - GET_PROPERTY(LIBLIST GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) + INSTALL( + TARGETS ${LIBRARY_NAME} + EXPORT KokkosDeprecatedTargets + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + ) + VERIFY_EMPTY(KOKKOS_ADD_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) ENDMACRO(KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL) @@ -131,9 +136,8 @@ FUNCTION(KOKKOS_ADD_EXECUTABLE EXE_NAME) IF (PARSE_TESTONLYLIBS) TARGET_LINK_LIBRARIES(${EXE_NAME} ${PARSE_TESTONLYLIBS}) ENDIF() - FOREACH(LIB ${KOKKOS_LIBRARIES_NAMES}) - TARGET_LINK_LIBRARIES(${EXE_NAME} ${LIB}) - ENDFOREACH() + #just link to a single lib kokkos now + TARGET_LINK_LIBRARIES(${EXE_NAME} kokkos) VERIFY_EMPTY(KOKKOS_ADD_EXECUTABLE ${PARSE_UNPARSED_ARGUMENTS}) endif() ENDFUNCTION() @@ -172,40 +176,19 @@ ENDIF() ENDFUNCTION() MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT) - if (NOT KOKKOS_HAS_TRILINOS) + IF (NOT KOKKOS_HAS_TRILINOS) #------------ COMPILER AND FEATURE CHECKS ------------------------------------ - include(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake) - set_kokkos_cxx_compiler() - set_kokkos_cxx_standard() + INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_functions.cmake) #------------ GET OPTIONS AND KOKKOS_SETTINGS -------------------------------- - # Add Kokkos' modules to CMake's module path. - set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/") - - set(KOKKOS_CMAKE_VERBOSE True) - include(${KOKKOS_SRC_PATH}/cmake/kokkos_options.cmake) - include(${KOKKOS_SRC_PATH}/cmake/kokkos_settings.cmake) - - #------------ GENERATE HEADER AND SOURCE FILES ------------------------------- - #execute_process( - # COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} PREFIX=${CMAKE_INSTALL_PREFIX} generate_build_settings - # WORKING_DIRECTORY "${Kokkos_BINARY_DIR}" - # OUTPUT_FILE ${Kokkos_BINARY_DIR}/core_src_make.out - # RESULT_VARIABLE GEN_SETTINGS_RESULT - #) - #if (GEN_SETTINGS_RESULT) - # message(FATAL_ERROR "Kokkos settings generation failed:\n" - # "${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} generate_build_settings") - #endif() - #include(${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake) - #install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION lib/cmake/Kokkos) - #install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION lib/cmake) - #install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION ${CMAKE_INSTALL_PREFIX}) - string(REPLACE " " ";" KOKKOS_TPL_INCLUDE_DIRS "${KOKKOS_GMAKE_TPL_INCLUDE_DIRS}") - string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_DIRS "${KOKKOS_GMAKE_TPL_LIBRARY_DIRS}") - list(REMOVE_ITEM KOKKOS_TPL_INCLUDE_DIRS "") - list(REMOVE_ITEM KOKKOS_TPL_LIBRARY_DIRS "") - endif() + # ADD Kokkos' modules to CMake's module path. + SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/") + + INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_enable_options.cmake) + INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_cxx.cmake) + INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tpls.cmake) + INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_arch.cmake) + ENDIF() ENDMACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT) MACRO(KOKKOS_ADD_TEST_EXECUTABLE EXE_NAME) @@ -235,6 +218,7 @@ MACRO(KOKKOS_MAKE_LIBKOKKOS) ADD_LIBRARY(kokkos ${KOKKOS_SOURCE_DIR}/core/src/dummy.cpp) TARGET_LINK_LIBRARIES(kokkos PUBLIC kokkoscore kokkoscontainers) TARGET_LINK_LIBRARIES(kokkos PUBLIC kokkosalgorithms) + KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(kokkos) ENDMACRO() FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) @@ -262,17 +246,17 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) PUBLIC $<$:${KOKKOS_COMPILE_OPTIONS}> ) - if(${CMAKE_VERSION} VERSION_GREATER "3.13" OR ${CMAKE_VERSION} VERSION_EQUAL "3.13") + IF(${CMAKE_VERSION} VERSION_GREATER "3.13" OR ${CMAKE_VERSION} VERSION_EQUAL "3.13") TARGET_LINK_OPTIONS( ${LIBRARY_NAME} PUBLIC ${KOKKOS_LD_FLAGS} ) - else() - #well, this is annoying - I am going to need to hack this for Visual Studio + ELSE() + #WELl, this is annoying - I am going to need to hack this for Visual Studio TARGET_LINK_LIBRARIES( ${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_OPTIONS} ) - endif() + ENDIF() IF (KOKKOS_ENABLE_CUDA) TARGET_COMPILE_OPTIONS( @@ -307,36 +291,31 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) PUBLIC ${KOKKOS_TPL_INCLUDE_DIRS} ) - if (KOKKOS_ENABLE_CUDA) - set(LIB_cuda "-lcuda") - target_link_libraries(${LIBRARY_NAME} PUBLIC cuda) - endif() + IF (KOKKOS_ENABLE_CUDA) + SET(LIB_cuda "-lcuda") + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC cuda) + ENDIF() - if (KOKKOS_ENABLE_HPX) - find_package(HPX REQUIRED) - target_link_libraries(${LIBRARY_NAME} PUBLIC ${HPX_LIBRARIES}) - target_include_directories(${LIBRARY_NAME} PUBLIC ${HPX_INCLUDE_DIRS}) - endif() + IF (KOKKOS_ENABLE_HPX) + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC ${HPX_LIBRARIES}) + TARGET_INCLUDE_DIRECTORIES(${LIBRARY_NAME} PUBLIC ${HPX_INCLUDE_DIRS}) + ENDIF() - if (KOKKOS_ENABLE_HWLOC) - find_package(HWLOC REQUIRED) - target_link_libraries(${LIBRARY_NAME} PRIVATE hwloc) - endif() + IF (KOKKOS_ENABLE_HWLOC) + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE hwloc) + ENDIF() - if (KOKKOS_ENABLE_MEMKIND) - find_package(MEMKIND REQUIRED) - target_link_libraries(${LIBRARY_NAME} PRIVATE memkind) - endif() + IF (KOKKOS_ENABLE_MEMKIND) + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE memkind) + ENDIF() - if (KOKKOS_CXX_STANDARD_IS_FEATURE) - #great! I can't do this the right way - foreach(FEATURE IN LISTS KOKKOS_CXX_FEATURES) - TARGET_COMPILE_FEATURES(${LIBRARY_NAME} PUBLIC ${FEATURE}) - endforeach() - else() - #oh, well, no choice but the wrong way + IF (KOKKOS_CXX_STANDARD_IS_FEATURE) + #GREAT! I can't do this the right way + TARGET_COMPILE_FEATURES(${LIBRARY_NAME} PUBLIC ${KOKKOS_CXX_STANDARD_FEATURE}) + ELSE() + #OH, Well, no choice but the wrong way TARGET_COMPILE_OPTIONS(${LIBRARY_NAME} PUBLIC ${KOKKOS_CXX_STANDARD_FLAG}) - endif() + ENDIF() KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${LIBRARY_NAME}) @@ -385,46 +364,47 @@ ENDIF() ENDFUNCTION(KOKKOS_ADD_INTERFACE_LIBRARY) FUNCTION(KOKKOS_LIB_COMPILE_DEFINITIONS) -IF(KOKKOS_HAS_TRILINOS) - #don't trust tribits to do this correctly - KOKKOS_TARGET_COMPILE_DEFINITIONS(${TARGET} ${ARGN}) -ELSE(TARGET ${TARGET}) - KOKKOS_LIB_TYPE(${TARGET} INCTYPE) - KOKKOS_TARGET_COMPILE_DEFINITIONS(${${PROJECT_NAME}_LIBRARY_NAME_PREFIX}${TARGET} ${INCTYPE} ${ARGN}) + IF(KOKKOS_HAS_TRILINOS) + #don't trust tribits to do this correctly + KOKKOS_TARGET_COMPILE_DEFINITIONS(${TARGET} ${ARGN}) + ELSE(TARGET ${TARGET}) + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + KOKKOS_TARGET_COMPILE_DEFINITIONS(${${PROJECT_NAME}_LIBRARY_NAME_PREFIX}${TARGET} ${INCTYPE} ${ARGN}) + ENDIF() ENDFUNCTION(KOKKOS_LIB_COMPILE_DEFINITIONS) FUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES TARGET) -IF(KOKKOS_HAS_TRILINOS) - #ignore the target, tribits doesn't do anything directly with targets - TRIBITS_INCLUDE_DIRECTORIES(${ARGN}) -ELSE() #append to a list for later - KOKKOS_LIB_TYPE(${TARGET} INCTYPE) - FOREACH(DIR ${ARGN}) - TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} $) - ENDFOREACH() -ENDIF() + IF(KOKKOS_HAS_TRILINOS) + #ignore the target, tribits doesn't do anything directly with targets + TRIBITS_INCLUDE_DIRECTORIES(${ARGN}) + ELSE() #append to a list for later + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + FOREACH(DIR ${ARGN}) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} $) + ENDFOREACH() + ENDIF() ENDFUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES) FUNCTION(KOKKOS_LIB_COMPILE_OPTIONS TARGET) -IF(KOKKOS_HAS_TRILINOS) - #don't trust tribits to do this correctly - KOKKOS_TARGET_COMPILE_OPTIONS(${TARGET} ${ARGN}) -ELSE() - KOKKOS_LIB_TYPE(${TARGET} INCTYPE) - KOKKOS_TARGET_COMPILE_OPTIONS(${${PROJECT_NAME}_LIBRARY_NAME_PREFIX}${TARGET} ${INCTYPE} ${ARGN}) -ENDIF() + IF(KOKKOS_HAS_TRILINOS) + #don't trust tribits to do this correctly + KOKKOS_TARGET_COMPILE_OPTIONS(${TARGET} ${ARGN}) + ELSE() + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + KOKKOS_TARGET_COMPILE_OPTIONS(${${PROJECT_NAME}_LIBRARY_NAME_PREFIX}${TARGET} ${INCTYPE} ${ARGN}) + ENDIF() ENDFUNCTION(KOKKOS_LIB_COMPILE_OPTIONS) MACRO(KOKKOS_ADD_TEST_DIRECTORIES) - if (KOKKOS_HAS_TRILINOS) + IF (KOKKOS_HAS_TRILINOS) TRIBITS_ADD_TEST_DIRECTORIES(${ARGN}) - else() + ELSE() IF(KOKKOS_ENABLE_TESTS) FOREACH(TEST_DIR ${ARGN}) ADD_SUBDIRECTORY(${TEST_DIR}) ENDFOREACH() ENDIF() - endif() + ENDIF() ENDMACRO() MACRO(KOKKOS_ADD_EXAMPLE_DIRECTORIES) diff --git a/cmake/pgi.cmake b/cmake/pgi.cmake new file mode 100644 index 00000000000..3a2d4980229 --- /dev/null +++ b/cmake/pgi.cmake @@ -0,0 +1,6 @@ + +function(kokkos_set_pgi_flags standard) + STRING(TOLOWER ${standard} LC_STANDARD) + GLOBAL_SET(KOKKOS_CXX_STANDARD_FLAG "--c++${LC_STANDARD}") +endfunction() + diff --git a/core/src/dummy.cpp b/core/src/dummy.cpp index e69de29bb2d..d9edf12064e 100644 --- a/core/src/dummy.cpp +++ b/core/src/dummy.cpp @@ -0,0 +1,11 @@ + + +namespace Kokkos { + namespace AvoidCompilerWarnings { + int dontComplain(){ + //keep the compiler from complaining about emptiness + return 0; + } + } +} + diff --git a/core/unit_test/config/cmaketest/CMakeLists.txt b/core/unit_test/config/cmaketest/CMakeLists.txt index 54a4c4a74a1..72fef0a3e81 100644 --- a/core/unit_test/config/cmaketest/CMakeLists.txt +++ b/core/unit_test/config/cmaketest/CMakeLists.txt @@ -19,7 +19,6 @@ set(KOKKOS_CMAKEFILE kokkos_generated_settings.cmake) set(KOKKOS_INTERNAL_CONFIG_TMP KokkosCore_config.tmp) set(KOKKOS_CONFIG_HEADER KokkosCore_config.h) -set(KOKKOS_CMAKE_VERBOSE False) include(${KOKKOS_SRCDIR}/cmake/kokkos_options.cmake) foreach(KOKKOS_DEV ${KOKKOS_DEVICES_LIST}) # Do some initialization: Want to turn everything off for testing From 78a6477de2b701308489204ffb0bba4654a49e61 Mon Sep 17 00:00:00 2001 From: jjwilke Date: Fri, 31 May 2019 11:56:54 -0700 Subject: [PATCH 013/530] fixes for hwloc,memkind tpls --- CMakeLists.txt | 10 ++++++---- cmake/Modules/FindHWLOC.cmake | 6 +++--- cmake/Modules/FindMemkind.cmake | 9 +++++++++ cmake/kokkos_tpls.cmake | 18 ++++++++---------- cmake/kokkos_tribits.cmake | 4 ++-- 5 files changed, 28 insertions(+), 19 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d0bff0e1962..abf407a0a34 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,14 +13,16 @@ set(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR}) set(KOKKOS_PATH ${Kokkos_SOURCE_DIR}) set(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}) +list(APPEND CMAKE_MODULE_PATH cmake/Modules) + IF(NOT KOKKOS_HAS_TRILINOS) - cmake_minimum_required(VERSION 3.8 FATAL_ERROR) + cmake_minimum_required(VERSION 3.10 FATAL_ERROR) IF(NOT DEFINED ${PROJECT_NAME}) PROJECT(Kokkos CXX) ENDIF() - set (Kokkos_VERSION_MAJOR 2) - set (Kokkos_VERSION_MINOR 7) - set (Kokkos_VERSION_PATCH 4) + set (Kokkos_VERSION_MAJOR 3) + set (Kokkos_VERSION_MINOR 0) + set (Kokkos_VERSION_PATCH 0) ENDIF() INCLUDE(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake) diff --git a/cmake/Modules/FindHWLOC.cmake b/cmake/Modules/FindHWLOC.cmake index 13c9224e439..f48ff6e8a51 100644 --- a/cmake/Modules/FindHWLOC.cmake +++ b/cmake/Modules/FindHWLOC.cmake @@ -17,13 +17,13 @@ include(FindPackageHandleStandardArgs) find_package_handle_standard_args(HWLOC DEFAULT_MSG HWLOC_INCLUDE_DIR HWLOC_LIBRARIES) -add_library(hwloc UNKNOWN IMPORTED) +add_library(Kokkos::hwloc UNKNOWN IMPORTED) -set_target_properties(hwloc PROPERTIES +set_target_properties(Kokkos::hwloc PROPERTIES INTERFACE_COMPILE_FEATURES "" INTERFACE_COMPILE_OPTIONS "" INTERFACE_INCLUDE_DIRECTORIES "${HWLOC_INCLUDE_DIR}" - INTERFACE_LINK_LIBRARIES "${HWLOC_LIBRARIES}" + IMPORTED_LOCATION "${HWLOC_LIBRARIES}" ) mark_as_advanced(HWLOC_INCLUDE_DIR HWLOC_LIBRARIES) diff --git a/cmake/Modules/FindMemkind.cmake b/cmake/Modules/FindMemkind.cmake index 245fb44c19a..adc5618e0ed 100644 --- a/cmake/Modules/FindMemkind.cmake +++ b/cmake/Modules/FindMemkind.cmake @@ -17,4 +17,13 @@ include(FindPackageHandleStandardArgs) find_package_handle_standard_args(Memkind DEFAULT_MSG MEMKIND_INCLUDE_DIR MEMKIND_LIBRARIES) +add_library(Kokkos::memkind UNKNOWN IMPORTED) + +set_target_properties(Kokkos::memkind PROPERTIES + INTERFACE_COMPILE_FEATURES "" + INTERFACE_COMPILE_OPTIONS "" + INTERFACE_INCLUDE_DIRECTORIES "${MEMKIND_INCLUDE_DIR}" + IMPORTED_LOCATION "${MEMKIND_LIBRARIES}" +) + mark_as_advanced(MEMKIND_INCLUDE_DIR MEMKIND_LIBRARIES) diff --git a/cmake/kokkos_tpls.cmake b/cmake/kokkos_tpls.cmake index c228c9600ad..c13c41a44c4 100644 --- a/cmake/kokkos_tpls.cmake +++ b/cmake/kokkos_tpls.cmake @@ -4,20 +4,18 @@ IF (KOKKOS_ENABLE_HPX) ENDIF() IF (KOKKOS_ENABLE_HWLOC) - IF (KOKKOS_HWLOC_DIR) - FIND_PACKAGE(HWLOC REQUIRED HINTS ${KOKKOS_HWLOC_DIR}) - ELSE() - FIND_PACKAGE(HWLOC REQUIRED) + FIND_PACKAGE(HWLOC REQUIRED MODULE) + IF (NOT HWLOC_FOUND) + MESSAGE(FATAL_ERROR "Unable to locate hwloc ${KOKKOS_HWLOC_DIR}") ENDIF() - MESSAGE(STATUS "KOKKOS_ENABLE_HWLOC: ${HWLOC_DIR}") + MESSAGE(STATUS "KOKKOS_ENABLE_HWLOC: ${HWLOC_INCLUDE_DIR}") ENDIF() IF (KOKKOS_ENABLE_MEMKIND) - IF (KOKKOS_MEMKIND_DIR) - FIND_PACKAGE(MEMKIND REQUIRED HINTS ${KOKKOS_ENABLE_MEMKIND}) - ELSE() - FIND_PACKAGE(MEMKIND REQUIRED) + FIND_PACKAGE(MEMKIND REQUIRED MODULE) + IF (NOT MEMKIND_FOUND) + MESSAGE(FATAL_ERROR "Unable to locate memkind ${KOKKOS_MEMKIND_DIR}") ENDIF() - MESSAGE(STATUS "KOKKOS_ENABLE_MEMKIND: ${MEMKIND_DIR}") + MESSAGE(STATUS "KOKKOS_ENABLE_MEMKIND: ${MEMKIND_INCLUDE_DIR}") ENDIF() diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index 60d8a871d7a..97fddb2b2b0 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -302,11 +302,11 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) ENDIF() IF (KOKKOS_ENABLE_HWLOC) - TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE hwloc) + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE Kokkos::hwloc) ENDIF() IF (KOKKOS_ENABLE_MEMKIND) - TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE memkind) + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE Kokkos::memkind) ENDIF() IF (KOKKOS_CXX_STANDARD_IS_FEATURE) From 1991a8e64927e6a38955805404cf18207e74832d Mon Sep 17 00:00:00 2001 From: jjwilke Date: Sat, 1 Jun 2019 17:05:37 -0700 Subject: [PATCH 014/530] avoid namespace targets to keep hwloc/memkind out of INTERFACE_LINK_LIBRARIES --- cmake/Modules/FindHWLOC.cmake | 4 ++-- cmake/Modules/FindMemkind.cmake | 4 ++-- cmake/kokkos_tribits.cmake | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cmake/Modules/FindHWLOC.cmake b/cmake/Modules/FindHWLOC.cmake index f48ff6e8a51..4039eb0b2d8 100644 --- a/cmake/Modules/FindHWLOC.cmake +++ b/cmake/Modules/FindHWLOC.cmake @@ -17,9 +17,9 @@ include(FindPackageHandleStandardArgs) find_package_handle_standard_args(HWLOC DEFAULT_MSG HWLOC_INCLUDE_DIR HWLOC_LIBRARIES) -add_library(Kokkos::hwloc UNKNOWN IMPORTED) +add_library(hwloc UNKNOWN IMPORTED) -set_target_properties(Kokkos::hwloc PROPERTIES +set_target_properties(hwloc PROPERTIES INTERFACE_COMPILE_FEATURES "" INTERFACE_COMPILE_OPTIONS "" INTERFACE_INCLUDE_DIRECTORIES "${HWLOC_INCLUDE_DIR}" diff --git a/cmake/Modules/FindMemkind.cmake b/cmake/Modules/FindMemkind.cmake index adc5618e0ed..0daadc65fa5 100644 --- a/cmake/Modules/FindMemkind.cmake +++ b/cmake/Modules/FindMemkind.cmake @@ -17,9 +17,9 @@ include(FindPackageHandleStandardArgs) find_package_handle_standard_args(Memkind DEFAULT_MSG MEMKIND_INCLUDE_DIR MEMKIND_LIBRARIES) -add_library(Kokkos::memkind UNKNOWN IMPORTED) +add_library(memkind UNKNOWN IMPORTED) -set_target_properties(Kokkos::memkind PROPERTIES +set_target_properties(memkind PROPERTIES INTERFACE_COMPILE_FEATURES "" INTERFACE_COMPILE_OPTIONS "" INTERFACE_INCLUDE_DIRECTORIES "${MEMKIND_INCLUDE_DIR}" diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index 97fddb2b2b0..60d8a871d7a 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -302,11 +302,11 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) ENDIF() IF (KOKKOS_ENABLE_HWLOC) - TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE Kokkos::hwloc) + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE hwloc) ENDIF() IF (KOKKOS_ENABLE_MEMKIND) - TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE Kokkos::memkind) + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE memkind) ENDIF() IF (KOKKOS_CXX_STANDARD_IS_FEATURE) From c1942c1956b019a35993a63fbae41d171d35c81b Mon Sep 17 00:00:00 2001 From: jjwilke Date: Sun, 2 Jun 2019 11:42:57 -0600 Subject: [PATCH 015/530] fixes for nvcc_wrapper and c++ standards: related to #2035 --- cmake/kokkos_arch.cmake | 2 +- cmake/kokkos_cxx.cmake | 42 ++++++++++++++++++++++++++++++------ cmake/kokkos_functions.cmake | 2 +- cmake/kokkos_tribits.cmake | 2 +- 4 files changed, 39 insertions(+), 9 deletions(-) diff --git a/cmake/kokkos_arch.cmake b/cmake/kokkos_arch.cmake index ccb2a8eddac..76f6e70d482 100644 --- a/cmake/kokkos_arch.cmake +++ b/cmake/kokkos_arch.cmake @@ -379,7 +379,7 @@ IF (KOKKOS_ARCH_PASCAL61) ENDIF() IF (KOKKOS_ARCH_VOLTA70) - GLOBLA_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_70") + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_70") ENDIF() IF (KOKKOS_ARCH_VOLTA72) diff --git a/cmake/kokkos_cxx.cmake b/cmake/kokkos_cxx.cmake index 826e3fd8de5..8a7c8ab6643 100644 --- a/cmake/kokkos_cxx.cmake +++ b/cmake/kokkos_cxx.cmake @@ -7,13 +7,25 @@ FUNCTION(kokkos_set_cxx_standard_feature standard) #CMake's way of telling us that the standard (or extension) #flags are supported is the extension/standard variables IF (CMAKE_CXX_EXTENSIONS AND ${EXTENSION_NAME}) + MESSAGE(STATUS "Using ${${FEATURE_NAME}} for C++${standard} extensions") GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME}) ELSEIF(NOT CMAKE_CXX_EXTENSIONS AND ${STANDARD_NAME}) + MESSAGE(STATUS "Using ${${STANDARD_NAME}} for C++${standard} standard") GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME}) ELSE() #nope, we can't do anything here + MESSAGE(STATUS "C++${standard} is not supported as a compiler feature - choosing custom flags") GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE "") ENDIF() + + IF(NOT ${FEATURE_NAME} IN_LIST CMAKE_CXX_COMPILE_FEATURES) + IF (KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA") + MESSAGE(STATUS "nvcc_wrapper does not support TARGET_COMPILE_FEATURES") + GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE "") + ELSE() + MESSAGE(FATAL_ERROR "Compiler ${KOKKOS_CXX_COMPILER_ID} should support ${FEATURE_NAME}, but CMake reports feature not supported") + ENDIF() + ENDIF() ENDFUNCTION(kokkos_set_cxx_standard_feature) SET(KOKKOS_CXX_COMPILER ${CMAKE_CXX_COMPILER} CACHE STRING INTERNAL) @@ -27,12 +39,14 @@ EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE INTERNAL_HAVE_COMPILER_NVCC OUTPUT_STRIP_TRAILING_WHITESPACE) + STRING(REGEX REPLACE "^ +" "" INTERNAL_HAVE_COMPILER_NVCC ${INTERNAL_HAVE_COMPILER_NVCC}) + IF(INTERNAL_HAVE_COMPILER_NVCC) # SET the compiler id to nvcc. We use the value used by CMake 3.8. - SET(KOKKOS_CXX_COMPILER_ID NVIDIA CACHE STRING INTERNAL) + SET(KOKKOS_CXX_COMPILER_ID NVIDIA CACHE STRING INTERNAL FORCE) # SET nvcc's compiler version. EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version @@ -117,9 +131,14 @@ ENDIF() kokkos_option(CXX_STANDARD "" STRING "The C++ standard for Kokkos to use: c++11, c++14, or c++17") IF (NOT KOKKOS_CXX_STANDARD AND NOT CMAKE_CXX_STANDARD) - SET(KOKKOS_CXX_STANDARD "11" CACHE STRING "C++ standard") + MESSAGE("Setting default Kokkos CXX standard to 11") + SET(KOKKOS_CXX_STANDARD "11" CACHE STRING "C++ standard" FORCE) + SET(CMAKE_CXX_STANDARD "11" CACHE STRING "C++ standard" FORCE) ELSEIF(NOT KOKKOS_CXX_STANDARD) - SET(KOKKOS_CXX_STANDARD ${CMAKE_CXX_STANDARD} CACHE STRING "C++ standard") + MESSAGE("Setting default Kokkos CXX standard to ${CMAKE_CXX_STANDARD}") + SET(KOKKOS_CXX_STANDARD ${CMAKE_CXX_STANDARD} CACHE STRING "C++ standard" FORCE) +ELSEIF(NOT CMAKE_CXX_STANDARD) + SET(CMAKE_CXX_STANDARD ${KOKKOS_CXX_STANDARD} CACHE STRING "C++ standard" FORCE) ENDIF() @@ -158,10 +177,13 @@ ENDIF() IF (KOKKOS_CXX_STANDARD AND CMAKE_CXX_STANDARD) #make sure these are consistent IF (NOT KOKKOS_CXX_STANDARD STREQUAL CMAKE_CXX_STANDARD) - MESSAGE(FATAL_ERROR "Specified both CMAKE_CXX_STANDARD and KOKKOS_CXX_STANDARD, but they don't match") + MESSAGE(WARNING "Specified both CMAKE_CXX_STANDARD and KOKKOS_CXX_STANDARD, but they don't match") + SET(CMAKE_CXX_STANDARD ${KOKKOS_CXX_STANDARD} CACHE STRING "C++ standard" FORCE) ENDIF() ENDIF() +get_property(CMAKE_CXX_KNOWN_FEATURES GLOBAL PROPERTY CMAKE_CXX_KNOWN_FEATURES) + IF (KOKKOS_CXX_STANDARD STREQUAL "11" ) kokkos_set_cxx_standard_feature(11) GLOBAL_SET(KOKKOS_ENABLE_CXX11 ON) @@ -174,10 +196,18 @@ ELSEIF(KOKKOS_CXX_STANDARD STREQUAL "17") ELSEIF(KOKKOS_CXX_STANDARD STREQUAL "98") MESSAGE(FATAL_ERROR "Kokkos requires C++11 or newer!") ELSE() + #set to empty + GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE "") + IF (KOKKOS_CXX_COMPILER_ID STREQUAL "NVIDIA") + MESSAGE(FATAL_ERROR "nvcc_wrapper does not support intermediate standards (1Y,1Z,2A) - must use 11, 14, or 17") + ENDIF() + #okay, this is funky - kill this variable + #this value is not really valid as a cmake variable + UNSET(CMAKE_CXX_STANDARD CACHE) IF (KOKKOS_CXX_STANDARD STREQUAL "1Y") GLOBAL_SET(KOKKOS_ENABLE_CXX14 ON) ELSEIF (KOKKOS_CXX_STANDARD STREQUAL "1Z") - GLOBAL_SET(KOKKOS_ENABLE_CXX1Z ON) + GLOBAL_SET(KOKKOS_ENABLE_CXX17 ON) ELSEIF (KOKKOS_CXX_STANDARD STREQUAL "2A") GLOBAL_SET(KOKKOS_ENABLE_CXX20 ON) ENDIF() @@ -207,7 +237,7 @@ IF (NOT KOKKOS_CXX_STANDARD_FEATURE) IF (NOT CXX_STD_FLAGS_ACCEPTED) MESSAGE(FATAL_ERROR "${KOKKOS_CXX_COMPILER_ID} did not accept ${KOKKOS_CXX_STANDARD_FLAG}. You likely need to reduce the level of the C++ standard from ${KOKKOS_CXX_STANDARD}") ELSE() - MESSAGE("Compiler features not supported, but ${KOKKOS_CXX_COMPILER_ID} accepts ${KOKKOS_CXX_STANDARD_FLAG}") + MESSAGE(STATUS "Compiler features not supported, but ${KOKKOS_CXX_COMPILER_ID} accepts ${KOKKOS_CXX_STANDARD_FLAG}") ENDIF() ENDIF() diff --git a/cmake/kokkos_functions.cmake b/cmake/kokkos_functions.cmake index e901bceaebf..52c2201bbcf 100644 --- a/cmake/kokkos_functions.cmake +++ b/cmake/kokkos_functions.cmake @@ -31,7 +31,7 @@ FUNCTION(kokkos_option CAMEL_SUFFIX DEFAULT TYPE DOCSTRING) SET(${UC_NAME} ${DEFAULT} CACHE ${TYPE} ${DOCSTRING}) ENDIF() #STORE A Value in the cache to identify whether this is the 1st configure - SET(${CACHE_NAME} ${${UC_NAME}} CACHE ${TYPE} ${DOCSTRING} FORCE) + SET(${CACHE_NAME} ${${UC_NAME}} CACHE INTERNAL ${DOCSTRING} FORCE) IF (${UC_NAME}) #cmake if statements follow really annoying string resolution rules MESSAGE(STATUS "${UC_NAME}=${${UC_NAME}}") diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index 60d8a871d7a..6b24bacbeac 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -309,7 +309,7 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE memkind) ENDIF() - IF (KOKKOS_CXX_STANDARD_IS_FEATURE) + IF (KOKKOS_CXX_STANDARD_FEATURE) #GREAT! I can't do this the right way TARGET_COMPILE_FEATURES(${LIBRARY_NAME} PUBLIC ${KOKKOS_CXX_STANDARD_FEATURE}) ELSE() From 9f99b961555ffcaa4700aa76bb5e93de443f47a1 Mon Sep 17 00:00:00 2001 From: jjwilke Date: Mon, 3 Jun 2019 07:40:10 -0700 Subject: [PATCH 016/530] fixes for in-tree build --- CMakeLists.txt | 6 ++++-- cmake/fake_tribits.cmake | 2 +- cmake/kokkos_cxx.cmake | 30 ++++++++++++++---------------- cmake/kokkos_install.cmake | 7 +------ 4 files changed, 20 insertions(+), 25 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index abf407a0a34..03f83f569dc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,15 +37,17 @@ GLOBAL_SET(KOKKOS_XCOMPILER_OPTIONS) INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake) KOKKOS_SETUP_BUILD_ENVIRONMENT() - GET_DIRECTORY_PROPERTY(HAS_PARENT PARENT_DIRECTORY) IF (KOKKOS_HAS_TRILINOS) SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) SET(KOKKOS_HEADER_DIR ${TRILINOS_INCDIR}) + SET(KOKKOS_IS_SUBDIRECTORY TRUE) ELSEIF(HAS_PARENT) SET(KOKKOS_HEADER_DIR "include/kokkos") + SET(KOKKOS_IS_SUBDIRECTORY TRUE) ELSE() SET(KOKKOS_HEADER_DIR "include") + SET(KOKKOS_IS_SUBDIRECTORY FALSE) ENDIF() @@ -76,7 +78,7 @@ KOKKOS_EXCLUDE_AUTOTOOLS_FILES() KOKKOS_PACKAGE_POSTPROCESS() IF (NOT KOKKOS_HAS_TRILINOS) - #just always do it + #just always do it - no more separate libs stuff KOKKOS_MAKE_LIBKOKKOS() include(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake) ENDIF() diff --git a/cmake/fake_tribits.cmake b/cmake/fake_tribits.cmake index a8a7f705efc..ffdeacc3090 100644 --- a/cmake/fake_tribits.cmake +++ b/cmake/fake_tribits.cmake @@ -35,7 +35,7 @@ MACRO(APPEND_GLOB VAR) ENDMACRO() MACRO(GLOBAL_SET VARNAME) - SET(${VARNAME} ${ARGN} CACHE INTERNAL "") + SET(${VARNAME} ${ARGN} CACHE INTERNAL "" FORCE) ENDMACRO() FUNCTION(VERIFY_EMPTY CONTEXT) diff --git a/cmake/kokkos_cxx.cmake b/cmake/kokkos_cxx.cmake index 8a7c8ab6643..4b22f0deeaf 100644 --- a/cmake/kokkos_cxx.cmake +++ b/cmake/kokkos_cxx.cmake @@ -7,10 +7,10 @@ FUNCTION(kokkos_set_cxx_standard_feature standard) #CMake's way of telling us that the standard (or extension) #flags are supported is the extension/standard variables IF (CMAKE_CXX_EXTENSIONS AND ${EXTENSION_NAME}) - MESSAGE(STATUS "Using ${${FEATURE_NAME}} for C++${standard} extensions") + MESSAGE(STATUS "Using ${${FEATURE_NAME}} for C++${standard} extensions as feature") GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME}) ELSEIF(NOT CMAKE_CXX_EXTENSIONS AND ${STANDARD_NAME}) - MESSAGE(STATUS "Using ${${STANDARD_NAME}} for C++${standard} standard") + MESSAGE(STATUS "Using ${${STANDARD_NAME}} for C++${standard} standard as feature") GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME}) ELSE() #nope, we can't do anything here @@ -130,19 +130,6 @@ ENDIF() kokkos_option(CXX_STANDARD "" STRING "The C++ standard for Kokkos to use: c++11, c++14, or c++17") -IF (NOT KOKKOS_CXX_STANDARD AND NOT CMAKE_CXX_STANDARD) - MESSAGE("Setting default Kokkos CXX standard to 11") - SET(KOKKOS_CXX_STANDARD "11" CACHE STRING "C++ standard" FORCE) - SET(CMAKE_CXX_STANDARD "11" CACHE STRING "C++ standard" FORCE) -ELSEIF(NOT KOKKOS_CXX_STANDARD) - MESSAGE("Setting default Kokkos CXX standard to ${CMAKE_CXX_STANDARD}") - SET(KOKKOS_CXX_STANDARD ${CMAKE_CXX_STANDARD} CACHE STRING "C++ standard" FORCE) -ELSEIF(NOT CMAKE_CXX_STANDARD) - SET(CMAKE_CXX_STANDARD ${KOKKOS_CXX_STANDARD} CACHE STRING "C++ standard" FORCE) -ENDIF() - - - # Set CXX standard flags SET(KOKKOS_ENABLE_CXX11 OFF CACHE INTERNAL "Enable C++11 flags") @@ -173,11 +160,22 @@ IF (KOKKOS_CXX_STANDARD) ENDIF() ENDIF() +IF (NOT KOKKOS_CXX_STANDARD AND NOT CMAKE_CXX_STANDARD) + MESSAGE("Setting default Kokkos CXX standard to 11") + SET(KOKKOS_CXX_STANDARD "11" CACHE STRING "C++ standard" FORCE) + SET(CMAKE_CXX_STANDARD "11" CACHE STRING "C++ standard" FORCE) +ELSEIF(NOT KOKKOS_CXX_STANDARD) + MESSAGE("Setting default Kokkos CXX standard to ${CMAKE_CXX_STANDARD}") + SET(KOKKOS_CXX_STANDARD ${CMAKE_CXX_STANDARD} CACHE STRING "C++ standard" FORCE) +ELSEIF(NOT CMAKE_CXX_STANDARD) + SET(CMAKE_CXX_STANDARD ${KOKKOS_CXX_STANDARD} CACHE STRING "C++ standard" FORCE) +ENDIF() + IF (KOKKOS_CXX_STANDARD AND CMAKE_CXX_STANDARD) #make sure these are consistent IF (NOT KOKKOS_CXX_STANDARD STREQUAL CMAKE_CXX_STANDARD) - MESSAGE(WARNING "Specified both CMAKE_CXX_STANDARD and KOKKOS_CXX_STANDARD, but they don't match") + MESSAGE(WARNING "Specified both CMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD} and KOKKOS_CXX_STANDARD=${KOKKOS_CXX_STANDARD}, but they don't match") SET(CMAKE_CXX_STANDARD ${KOKKOS_CXX_STANDARD} CACHE STRING "C++ standard" FORCE) ENDIF() ENDIF() diff --git a/cmake/kokkos_install.cmake b/cmake/kokkos_install.cmake index 9067b23d892..49e5230aa8e 100644 --- a/cmake/kokkos_install.cmake +++ b/cmake/kokkos_install.cmake @@ -27,11 +27,6 @@ FOREACH(p LIB BIN INCLUDE CMAKE) ENDIF() ENDFOREACH() -INSTALL (FILES - ${Kokkos_BINARY_DIR}/KokkosCore_config.h - DESTINATION ${KOKKOS_HEADER_DIR} -) - INCLUDE(CMakePackageConfigHelpers) CONFIGURE_PACKAGE_CONFIG_FILE(cmake/KokkosConfig.cmake.in "${Kokkos_BINARY_DIR}/KokkosConfig.cmake" @@ -67,5 +62,5 @@ CONFIGURE_FILE(core/src/kokkos.pc.in kokkos.pc @ONLY) INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/kokkos.pc DESTINATION lib/pkgconfig) CONFIGURE_FILE(cmake/KokkosCore_config.h.in KokkosCore_config.h @ONLY) -INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h DESTINATION include) +INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h DESTINATION ${KOKKOS_HEADER_DIR}) From 96ec7f4cbc7c6a22b7cc69d560220c078b483afc Mon Sep 17 00:00:00 2001 From: jjwilke Date: Tue, 4 Jun 2019 16:45:48 -0600 Subject: [PATCH 017/530] pull request fixes: fix KOKKOS_CUDA_OPTIONS --- cmake/fake_tribits.cmake | 2 +- cmake/kokkos_arch.cmake | 68 ++++++++++++++------------------------ cmake/kokkos_cxx.cmake | 5 +-- cmake/kokkos_tribits.cmake | 8 ++--- 4 files changed, 32 insertions(+), 51 deletions(-) diff --git a/cmake/fake_tribits.cmake b/cmake/fake_tribits.cmake index ffdeacc3090..ce404f976aa 100644 --- a/cmake/fake_tribits.cmake +++ b/cmake/fake_tribits.cmake @@ -324,7 +324,7 @@ endforeach() ENDMACRO(PRINTALL) MACRO(GLOBAL_APPEND VARNAME) - SET(TEMP ${VARNAME}) + SET(TEMP ${${VARNAME}}) LIST(APPEND TEMP ${ARGN}) GLOBAL_SET(${VARNAME} ${TEMP}) ENDMACRO() diff --git a/cmake/kokkos_arch.cmake b/cmake/kokkos_arch.cmake index 76f6e70d482..4436140257a 100644 --- a/cmake/kokkos_arch.cmake +++ b/cmake/kokkos_arch.cmake @@ -156,6 +156,8 @@ ENDIF() #------------------------------- KOKKOS_CUDA_OPTIONS --------------------------- +#clear anything that might be in the cache +GLOBAL_SET(KOKKOS_CUDA_OPTIONS "") # Construct the Makefile options IF (KOKKOS_ENABLE_CUDA_LAMBDA) GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "-expt-extended-lambda") @@ -342,53 +344,31 @@ IF (KOKKOS_ENABLE_CUDA_RELOCATED_DEVICE_CODE) ENDIF() -IF (KOKKOS_ARCH_KEPLER30) - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAGS}=sm_30") -ENDIF() - -IF (KOKKOS_ARCH_KEPLER32) - GLOBA_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_32") -ENDIF() - -IF (KOKKOS_ARCH_KEPLER35) - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_35") -ENDIF() - -IF (KOKKOS_ARCH_KEPLER35) - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_37") -ENDIF() - -IF (KOKKOS_ARCH_MAXWELL50) - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_50") -ENDIF() - -IF (KOKKOS_ARCH_MAXWELL52) - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_52") -ENDIF() - -IF (KOKKOS_ARCH_MAXWELL53) - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_53") -ENDIF() - -IF (KOKKOS_ARCH_PASCAL60) - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_60") -ENDIF() - -IF (KOKKOS_ARCH_PASCAL61) - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_61") +SET(CUDA_ARCH_ALREADY_SPECIFIED "") +FUNCTION(CHECK_CUDA_ARCH ARCH FLAG) +IF(KOKKOS_ARCH_${ARCH}) + IF(CUDA_ARCH_ALREADY_SPECIFIED) + MESSAGE(FATAL_ERROR "Multiple GPU architectures given! Already have ${CUDA_ARCH_ALREADY_SPECIFIED}, but trying to add ${ARCH}. If you are re-running CMake, try clearing the cache and running again.") + ENDIF() + SET(CUDA_ARCH_ALREADY_SPECIFIED ${ARCH} PARENT_SCOPE) + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") ENDIF() +ENDFUNCTION(CHECK_CUDA_ARCH ARCH) -IF (KOKKOS_ARCH_VOLTA70) - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_70") -ENDIF() -IF (KOKKOS_ARCH_VOLTA72) - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_72") -ENDIF() +CHECK_CUDA_ARCH(KEPLER30 sm_30) +CHECK_CUDA_ARCH(KEPLER32 sm_32) +CHECK_CUDA_ARCH(KEPLER35 sm_35) +CHECK_CUDA_ARCH(KEPLER37 sm_37) +CHECK_CUDA_ARCH(MAXWELL50 sm_50) +CHECK_CUDA_ARCH(MAXWELL52 sm_52) +CHECK_CUDA_ARCH(MAXWELL53 sm_53) +CHECK_CUDA_ARCH(PASCAL60 sm_60) +CHECK_CUDA_ARCH(PASCAL61 sm_61) +CHECK_CUDA_ARCH(VOLTA70 sm_70) +CHECK_CUDA_ARCH(VOLTA72 sm_72) +CHECK_CUDA_ARCH(TURING75 sm_75) -IF (KOKKOS_ARCH_TURING75) - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=sm_75") -ENDIF() #CMake verbose is kind of pointless #Let's just always print things @@ -421,7 +401,7 @@ MESSAGE(STATUS "") MESSAGE(STATUS "Architectures:") FOREACH(Arch ${KOKKOS_ARCH_LIST}) STRING(TOUPPER ${Arch} ARCH) - IF (KOKKOS_${ARCH}) + IF (KOKKOS_ARCH_${ARCH}) MESSAGE(STATUS " ${Arch}") ENDIF() ENDFOREACH() diff --git a/cmake/kokkos_cxx.cmake b/cmake/kokkos_cxx.cmake index 4b22f0deeaf..ec45dad73a4 100644 --- a/cmake/kokkos_cxx.cmake +++ b/cmake/kokkos_cxx.cmake @@ -161,11 +161,11 @@ IF (KOKKOS_CXX_STANDARD) ENDIF() IF (NOT KOKKOS_CXX_STANDARD AND NOT CMAKE_CXX_STANDARD) - MESSAGE("Setting default Kokkos CXX standard to 11") + MESSAGE(STATUS "Setting default Kokkos CXX standard to 11") SET(KOKKOS_CXX_STANDARD "11" CACHE STRING "C++ standard" FORCE) SET(CMAKE_CXX_STANDARD "11" CACHE STRING "C++ standard" FORCE) ELSEIF(NOT KOKKOS_CXX_STANDARD) - MESSAGE("Setting default Kokkos CXX standard to ${CMAKE_CXX_STANDARD}") + MESSAGE(STATUS "Setting default Kokkos CXX standard to ${CMAKE_CXX_STANDARD}") SET(KOKKOS_CXX_STANDARD ${CMAKE_CXX_STANDARD} CACHE STRING "C++ standard" FORCE) ELSEIF(NOT CMAKE_CXX_STANDARD) SET(CMAKE_CXX_STANDARD ${KOKKOS_CXX_STANDARD} CACHE STRING "C++ standard" FORCE) @@ -212,6 +212,7 @@ ELSE() ENDIF() IF (NOT KOKKOS_CXX_STANDARD_FEATURE) + UNSET(CMAKE_CXX_STANDARD CACHE) #don't let cmake do this as a feature either #we need to pick the C++ flags ourselves IF(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) INCLUDE(${KOKKOS_SRC_PATH}/cmake/cray.cmake) diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index 6b24bacbeac..b0214df2c22 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -252,7 +252,7 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) PUBLIC ${KOKKOS_LD_FLAGS} ) ELSE() - #WELl, this is annoying - I am going to need to hack this for Visual Studio + #Well, this is annoying - I am going to need to hack this for Visual Studio TARGET_LINK_LIBRARIES( ${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_OPTIONS} ) @@ -261,11 +261,11 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) IF (KOKKOS_ENABLE_CUDA) TARGET_COMPILE_OPTIONS( ${LIBRARY_NAME} - PUBLIC ${KOKKOS_CUDA_OPTIONS}> + PUBLIC ${KOKKOS_CUDA_OPTIONS} ) SET(NODEDUP_CUDAFE_OPTIONS) FOREACH(OPT ${NODEDEUP_CUDAFE_OPTIONS}) - LIST(APPEND NODEDUP_CUDAFE_OPTIONS "SHELL: -Xcudafe ${OPT}") + LIST(APPEND NODEDUP_CUDAFE_OPTIONS "-Xcudafe ${OPT}") ENDFOREACH() TARGET_COMPILE_OPTIONS( ${LIBRARY_NAME} @@ -276,7 +276,7 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) IF(KOKKOS_XCOMPILER_OPTIONS) SET(NODEDUP_XCOMPILER_OPTIONS) FOREACH(OPT ${KOKKOS_XCOMPILER_OPTIONS}) - LIST(APPEND NODEDUP_XCOMPILER_OPTIONS "SHELL: -Xcompiler ${OPT}") + LIST(APPEND NODEDUP_XCOMPILER_OPTIONS "-Xcompiler ${OPT}") ENDFOREACH() TARGET_COMPILE_OPTIONS( ${LIBRARY_NAME} From 782f6517cdc677495247a6229362097fd6eac9b4 Mon Sep 17 00:00:00 2001 From: jjwilke Date: Tue, 4 Jun 2019 18:29:16 -0600 Subject: [PATCH 018/530] fix static libkokkos: Issue #2167 --- cmake/kokkos_tribits.cmake | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index b0214df2c22..71c337ad261 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -235,11 +235,20 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) LIST(REMOVE_DUPLICATES PARSE_SOURCES) ENDIF() - ADD_LIBRARY( - ${LIBRARY_NAME} - ${PARSE_HEADERS} - ${PARSE_SOURCES} - ) + IF (KOKKOS_SEPARATE_LIBS) + ADD_LIBRARY( + ${LIBRARY_NAME} + ${PARSE_HEADERS} + ${PARSE_SOURCES} + ) + ELSE() + ADD_LIBRARY( + ${LIBRARY_NAME} + OBJECT + ${PARSE_HEADERS} + ${PARSE_SOURCES} + ) + ENDIF() TARGET_COMPILE_OPTIONS( ${LIBRARY_NAME} @@ -317,6 +326,8 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) TARGET_COMPILE_OPTIONS(${LIBRARY_NAME} PUBLIC ${KOKKOS_CXX_STANDARD_FLAG}) ENDIF() + #Even if separate libs and these are object libraries + #We still need to install them for transitive flags and deps KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${LIBRARY_NAME}) INSTALL( From 8fe68b18d66bcff0b0bc56610c9ebb9fa2f5ec91 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Tue, 4 Jun 2019 19:01:35 -0600 Subject: [PATCH 019/530] Fix some more issues with whitespaces and missing KOKKOS_ENABLE_CUDA_LAMBDA option --- cmake/KokkosCore_config.h.in | 1 + cmake/kokkos_arch.cmake | 38 ++++++++++++++++++------------------ 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/cmake/KokkosCore_config.h.in b/cmake/KokkosCore_config.h.in index 54d97cfd266..3e47183578a 100644 --- a/cmake/KokkosCore_config.h.in +++ b/cmake/KokkosCore_config.h.in @@ -31,6 +31,7 @@ #cmakedefine KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE #cmakedefine KOKKOS_ENABLE_CUDA_UVM +#cmakedefine KOKKOS_ENABLE_CUDA_LAMBDA #cmakedefine KOKKOS_ENABLE_CUDA_LDG_INTRINSIC #cmakedefine KOKKOS_ENABLE_HPX_ASYNC_DISPATCH #cmakedefine KOKKOS_ENABLE_DEBUG diff --git a/cmake/kokkos_arch.cmake b/cmake/kokkos_arch.cmake index 4436140257a..68304d9f1cc 100644 --- a/cmake/kokkos_arch.cmake +++ b/cmake/kokkos_arch.cmake @@ -148,7 +148,7 @@ IF(KOKKOS_ENABLE_COMPILER_WARNINGS) ${COMMON_WARNINGS}) ARCH_FLAGS( - PGI " " + PGI "" GNU ${GNU_WARNINGS} DEFAULT ${COMMON_WARNINGS} ) @@ -195,8 +195,8 @@ ENDIF() IF (KOKKOS_ARCH_ARMV81) ARCH_FLAGS( - Cray " " - PGI " " + Cray "" + PGI "" DEFAULT -march=armv8.1-a ) ENDIF() @@ -204,8 +204,8 @@ ENDIF() IF (KOKKOS_ARCH_ARMV8_THUNDERX) SET(KOKKOS_ARCH_ARMV80 ON CACHE BOOL "enable armv80" FORCE) ARCH_FLAGS( - Cray " " - PGI " " + Cray "" + PGI "" DEFAULT -march=armv8-a -mtune=thunderx ) ENDIF() @@ -213,8 +213,8 @@ ENDIF() IF (KOKKOS_ARCH_ARMV8_THUNDERX2) SET(KOKKOS_ARCH_ARMV81 ON CACHE BOOL "enable armv80" FORCE) ARCH_FLAGS( - Cray " " - PGI " " + Cray "" + PGI "" DEFAULT -march=thunderx2t99 -mtune=thunderx2t99 ) ENDIF() @@ -231,7 +231,7 @@ IF (KOKKOS_ARCH_WSM) ARCH_FLAGS( Intel -xSSE4.2 PGI -tp=nehalem - Cray " " + Cray "" DEFAULT -msse4.2 ) SET(KOKKOS_USE_ISA_X86_64 ON CACHE INTERNAL "x86-64 architecture") @@ -241,7 +241,7 @@ IF (KOKKOS_ARCH_SNB OR KOKKOS_ARCH_AMDAVX) ARCH_FLAGS( Intel -mavx PGI -tp=sandybridge - Cray " " + Cray "" DEFAULT -mavx ) SET(KOKKOS_USE_ISA_X86_64 ON CACHE INTERNAL "x86-64 architecture") @@ -252,7 +252,7 @@ IF (KOKKOS_ARCH_HSW OR KOKKOS_ARCH_BDW) ARCH_FLAGS( Intel -xCORE-AVX2 PGI -tp=haswell - Cray " " + Cray "" DEFAULT -march=core-avx2 -mtune=core-avx2 ) SET(KOKKOS_USE_ISA_X86_64 ON CACHE INTERNAL "x86-64 architecture") @@ -266,8 +266,8 @@ IF (KOKKOS_ARCH_KNL) SET(KOKKOS_ARCH_AVX512MIC ON CACHE BOOL "enable avx-512 MIC" FORCE) ARCH_FLAGS( Intel -xMIC-AVX512 - PGI " " - Cray " " + PGI "" + Cray "" DEFAULT -march=knl -mtune=knl ) SET(KOKKOS_USE_ISA_X86_64 ON CACHE INTERNAL "x86-64 architecture") @@ -278,8 +278,8 @@ IF (KOKKOS_ARCH_SKX) SET(KOKKOS_ARCH_AVX512XEON ON CACHE BOOL "enable avx-512 Xeon" FORCE) ARCH_FLAGS( Intel -xCORE-AVX512 - PGI " " - Cray " " + PGI "" + Cray "" DEFAULT -march=skylake-avx512 -march=skylake-avx512 -mrtm ) SET(KOKKOS_USE_ISA_X86_64 ON CACHE INTERNAL "x86-64 architecture") @@ -288,7 +288,7 @@ ENDIF() IF (KOKKOS_ARCH_POWER7) ARCH_FLAGS( - PGI " " + PGI "" DEFAULT -mcpu=power7 -mtune=power7 ) SET(KOKKOS_USE_ISA_POWERPCBE ON CACHE INTERNAL "Power PC Architecture") @@ -296,8 +296,8 @@ ENDIF() IF (KOKKOS_ARCH_POWER8) ARCH_FLAGS( - PGI " " - NVIDIA " " + PGI "" + NVIDIA "" DEFAULT -mcpu=power8 -mtune=power8 ) SET(KOKKOS_USE_ISA_POWERPCLE ON CACHE INTERNAL "Power PC Architecture") @@ -305,8 +305,8 @@ ENDIF() IF (KOKKOS_ARCH_POWER9) ARCH_FLAGS( - PGI " " - NVIDIA " " + PGI "" + NVIDIA "" DEFAULT -mcpu=power9 -mtune=power9 ) SET(KOKKOS_USE_ISA_POWERPCLE ON CACHE INTERNAL "Power PC Architecture") From f8641d015f371afb6cb49a97545d4e87dda7c5af Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Tue, 4 Jun 2019 19:13:09 -0600 Subject: [PATCH 020/530] Limit CUDA OPTIONS to C++ files --- cmake/kokkos_tribits.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index 71c337ad261..53c15a4d09e 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -270,7 +270,7 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) IF (KOKKOS_ENABLE_CUDA) TARGET_COMPILE_OPTIONS( ${LIBRARY_NAME} - PUBLIC ${KOKKOS_CUDA_OPTIONS} + PUBLIC $<$:${KOKKOS_CUDA_OPTIONS}> ) SET(NODEDUP_CUDAFE_OPTIONS) FOREACH(OPT ${NODEDEUP_CUDAFE_OPTIONS}) From 574cc9af80116bb889470858060dfe649213a2a2 Mon Sep 17 00:00:00 2001 From: jjwilke Date: Mon, 24 Jun 2019 11:44:33 -0700 Subject: [PATCH 021/530] TPL fixes: issue #2098 --- cmake/Modules/FindHWLOC.cmake | 3 ++- cmake/Modules/FindLIBDL.cmake | 13 +++++++++++++ cmake/Modules/FindLIBRT.cmake | 17 +++++++++++++++++ cmake/fake_tribits.cmake | 2 +- cmake/kokkos_arch.cmake | 3 +++ cmake/kokkos_tpls.cmake | 16 +++++++++++++--- cmake/kokkos_tribits.cmake | 22 +++++++++++++++++++--- 7 files changed, 68 insertions(+), 8 deletions(-) create mode 100644 cmake/Modules/FindLIBDL.cmake create mode 100644 cmake/Modules/FindLIBRT.cmake diff --git a/cmake/Modules/FindHWLOC.cmake b/cmake/Modules/FindHWLOC.cmake index 4039eb0b2d8..a7cf079b076 100644 --- a/cmake/Modules/FindHWLOC.cmake +++ b/cmake/Modules/FindHWLOC.cmake @@ -19,10 +19,11 @@ find_package_handle_standard_args(HWLOC DEFAULT_MSG add_library(hwloc UNKNOWN IMPORTED) +#See note in kokkos_tribits.cmake about why they are not included +#INTERFACE_INCLUDE_DIRECTORIES "${HWLOC_INCLUDE_DIR}" set_target_properties(hwloc PROPERTIES INTERFACE_COMPILE_FEATURES "" INTERFACE_COMPILE_OPTIONS "" - INTERFACE_INCLUDE_DIRECTORIES "${HWLOC_INCLUDE_DIR}" IMPORTED_LOCATION "${HWLOC_LIBRARIES}" ) diff --git a/cmake/Modules/FindLIBDL.cmake b/cmake/Modules/FindLIBDL.cmake new file mode 100644 index 00000000000..93a4028c5ef --- /dev/null +++ b/cmake/Modules/FindLIBDL.cmake @@ -0,0 +1,13 @@ +find_path(LIBDL_INCLUDE_DIRS + NAMES dlfcn.h) +find_library(LIBDL_LIBRARIES dl) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(LIBDL DEFAULT_MSG LIBDL_LIBRARIES LIBDL_INCLUDE_DIRS) +mark_as_advanced(LIBDL_INCLUDE_DIRS LIBDL_LIBRARIES) + +add_library(libdl UNKNOWN IMPORTED) +set_target_properties(libdl PROPERTIES + IMPORTED_LOCATION "${LIBDL_LIBRARIES}" + INTERFACE_INCLUDE_DIRECTORIES "${LIBDL_INCLUDE_DIRS}") + diff --git a/cmake/Modules/FindLIBRT.cmake b/cmake/Modules/FindLIBRT.cmake new file mode 100644 index 00000000000..520cfbb067e --- /dev/null +++ b/cmake/Modules/FindLIBRT.cmake @@ -0,0 +1,17 @@ +find_path(LIBRT_INCLUDE_DIRS + NAMES time.h + PATHS ${LIBRT_ROOT}/include/ ${KOKKOS_LIBRT_DIR}/include +) + +find_library(LIBRT_LIBRARIES rt + PATHS ${KOKKOS_HWLOC_DIR}/lib ${LIBRT_ROOT}/lib +) +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(LIBRT DEFAULT_MSG LIBRT_LIBRARIES LIBRT_INCLUDE_DIRS) +mark_as_advanced(LIBRT_INCLUDE_DIRS LIBRT_LIBRARIES) + +add_library(librt UNKNOWN IMPORTED) +set_target_properties(librt PROPERTIES + IMPORTED_LOCATION "${LIBRT_LIBRARIES}" + INTERFACE_INCLUDE_DIRECTORIES "${LIBRT_INCLUDE_DIRS}") + diff --git a/cmake/fake_tribits.cmake b/cmake/fake_tribits.cmake index ce404f976aa..bcb716953ad 100644 --- a/cmake/fake_tribits.cmake +++ b/cmake/fake_tribits.cmake @@ -279,7 +279,7 @@ ELSE() ) target_link_libraries( ${NAME} - PUBLIC ${KOKKOS_LD_FLAGS} + PUBLIC ${KOKKOS_LINK_OPTIONS} ) ENDIF() ENDFUNCTION(KOKKOS_ADD_TEST_LIBRARY) diff --git a/cmake/kokkos_arch.cmake b/cmake/kokkos_arch.cmake index 68304d9f1cc..e5ae427d5dd 100644 --- a/cmake/kokkos_arch.cmake +++ b/cmake/kokkos_arch.cmake @@ -33,6 +33,9 @@ FUNCTION(ARCH_FLAGS) ENDFOREACH() IF (NOT LINK_ONLY) + # The funky logic here is for future handling of argument deduplication + # If we naively pass multiple -Xcompiler flags to target_compile_options + # -Xcompiler will get deduplicated and break the build IF ("-Xcompiler" IN_LIST FLAGS) LIST(REMOVE_ITEM FLAGS "-Xcompiler") LIST(APPEND NEW_XCOMPILER_OPTIONS ${FLAGS}) diff --git a/cmake/kokkos_tpls.cmake b/cmake/kokkos_tpls.cmake index c13c41a44c4..7d0303f594c 100644 --- a/cmake/kokkos_tpls.cmake +++ b/cmake/kokkos_tpls.cmake @@ -1,6 +1,6 @@ IF (KOKKOS_ENABLE_HPX) FIND_PACKAGE(HPX REQUIRED) - MESSAGE(STATUS "KOKKOS_ENABLE_HPX: ${HPX_DIR}") +# MESSAGE(STATUS "KOKKOS_ENABLE_HPX: ${HPX_DIR}") ENDIF() IF (KOKKOS_ENABLE_HWLOC) @@ -8,7 +8,18 @@ IF (KOKKOS_ENABLE_HWLOC) IF (NOT HWLOC_FOUND) MESSAGE(FATAL_ERROR "Unable to locate hwloc ${KOKKOS_HWLOC_DIR}") ENDIF() - MESSAGE(STATUS "KOKKOS_ENABLE_HWLOC: ${HWLOC_INCLUDE_DIR}") +ENDIF() + +IF (KOKKOS_ENABLE_LIBRT) + FIND_PACKAGE(LIBRT REQUIRED MODULE) + IF (NOT LIBRT_FOUND) + MESSAGE(FATAL_ERROR "Unable to locate LIBRT ${KOKKOS_LIBRT_DIR}") + ENDIF() +ENDIF() + +FIND_PACKAGE(LIBDL REQUIRED MODULE) +IF (NOT LIBDL_FOUND) + MESSAGE(FATAL_ERROR "Unable to locate ldl ${KOKKOS_LIBDL_DIR}") ENDIF() IF (KOKKOS_ENABLE_MEMKIND) @@ -16,6 +27,5 @@ IF (KOKKOS_ENABLE_MEMKIND) IF (NOT MEMKIND_FOUND) MESSAGE(FATAL_ERROR "Unable to locate memkind ${KOKKOS_MEMKIND_DIR}") ENDIF() - MESSAGE(STATUS "KOKKOS_ENABLE_MEMKIND: ${MEMKIND_INCLUDE_DIR}") ENDIF() diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index 53c15a4d09e..c5847c670a3 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -257,8 +257,7 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) IF(${CMAKE_VERSION} VERSION_GREATER "3.13" OR ${CMAKE_VERSION} VERSION_EQUAL "3.13") TARGET_LINK_OPTIONS( - ${LIBRARY_NAME} - PUBLIC ${KOKKOS_LD_FLAGS} + ${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_OPTIONS} ) ELSE() #Well, this is annoying - I am going to need to hack this for Visual Studio @@ -311,7 +310,21 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) ENDIF() IF (KOKKOS_ENABLE_HWLOC) - TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE hwloc) + #this is really annoying that I have to do this + #if CMake links statically, it will not link in hwloc which causes undefined refs downstream + #even though hwloc is really "private" and doesn't need to be public I have to link publicly + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC hwloc) + #what I don't want is the headers to be propagated downstream + TARGET_INCLUDE_DIRECTORIES(${LIBRARY_NAME} PRIVATE ${HWLOC_INCLUDE_DIR}) + ENDIF() + + IF (KOKKOS_ENABLE_LIBRT) + #this is really annoying that I have to do this + #if CMake links statically, it will not link in librt which causes undefined refs downstream + #even though librt is really "private" and doesn't need to be public I have to link publicly + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE librt) + #what I don't want is the headers to be propagated downstream + TARGET_INCLUDE_DIRECTORIES(${LIBRARY_NAME} PRIVATE ${LIBRT_INCLUDE_DIR}) ENDIF() IF (KOKKOS_ENABLE_MEMKIND) @@ -326,6 +339,9 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) TARGET_COMPILE_OPTIONS(${LIBRARY_NAME} PUBLIC ${KOKKOS_CXX_STANDARD_FLAG}) ENDIF() + #dlfcn.h is in header files and needs to propagate + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC libdl) + #Even if separate libs and these are object libraries #We still need to install them for transitive flags and deps KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${LIBRARY_NAME}) From 5002db370e430ac60c84e4d70aa246fd66f39449 Mon Sep 17 00:00:00 2001 From: jjwilke Date: Mon, 24 Jun 2019 13:53:24 -0600 Subject: [PATCH 022/530] various fixes: #2098, #2186, #2175 --- CMakeLists.txt | 6 ++++ cmake/Modules/FindHWLOC.cmake | 4 +-- cmake/Modules/FindLIBNUMA.cmake | 31 +++++++++++++++++++ .../{FindMemkind.cmake => FindMEMKIND.cmake} | 0 cmake/kokkos_enable_options.cmake | 17 ++++++++++ cmake/kokkos_tpls.cmake | 7 +++++ cmake/kokkos_tribits.cmake | 18 ++++++++++- 7 files changed, 80 insertions(+), 3 deletions(-) create mode 100644 cmake/Modules/FindLIBNUMA.cmake rename cmake/Modules/{FindMemkind.cmake => FindMEMKIND.cmake} (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 03f83f569dc..c319c0353ff 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,7 @@ set(KOKKOS_SRC_PATH ${Kokkos_SOURCE_DIR}) set(KOKKOS_PATH ${Kokkos_SOURCE_DIR}) set(KOKKOS_TOP_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}) + list(APPEND CMAKE_MODULE_PATH cmake/Modules) IF(NOT KOKKOS_HAS_TRILINOS) @@ -25,6 +26,11 @@ IF(NOT KOKKOS_HAS_TRILINOS) set (Kokkos_VERSION_PATCH 0) ENDIF() +IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12.0") + MESSAGE(STATUS "Setting policy CMP0074 to use _ROOT variables") + CMAKE_POLICY(SET CMP0074 NEW) +ENDIF() + INCLUDE(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake) GLOBAL_SET(KOKKOS_COMPILE_OPTIONS) GLOBAL_SET(KOKKOS_LINK_OPTIONS) diff --git a/cmake/Modules/FindHWLOC.cmake b/cmake/Modules/FindHWLOC.cmake index a7cf079b076..209ef2a807c 100644 --- a/cmake/Modules/FindHWLOC.cmake +++ b/cmake/Modules/FindHWLOC.cmake @@ -10,8 +10,8 @@ # HWLOC_INCLUDE_DIR - HWLOC include directory # HWLOC_LIBRARIES - Libraries needed to use HWLOC -find_path(HWLOC_INCLUDE_DIR hwloc.h PATHS "${KOKKOS_HWLOC_DIR}/include" ${hwloc_ROOT}/include) -find_library(HWLOC_LIBRARIES hwloc PATHS "${KOKKOS_HWLOC_DIR}/lib" ${hwloc_ROOT}/lib) +find_path(HWLOC_INCLUDE_DIR hwloc.h PATHS "${KOKKOS_HWLOC_DIR}/include" ${hwloc_ROOT}/include ${HWLOC_ROOT}/include) +find_library(HWLOC_LIBRARIES hwloc PATHS "${KOKKOS_HWLOC_DIR}/lib" ${hwloc_ROOT}/lib ${HWLOC_ROOT}/lib) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(HWLOC DEFAULT_MSG diff --git a/cmake/Modules/FindLIBNUMA.cmake b/cmake/Modules/FindLIBNUMA.cmake new file mode 100644 index 00000000000..357662730b2 --- /dev/null +++ b/cmake/Modules/FindLIBNUMA.cmake @@ -0,0 +1,31 @@ +#.rst: +# FindLIBNUMA +# ---------- +# +# Try to find LIBNUMA, based on KOKKOS_LIBNUMA_DIR +# +# The following variables are defined: +# +# LIBNUMA_FOUND - System has LIBNUMA +# LIBNUMA_INCLUDE_DIR - LIBNUMA include directory +# LIBNUMA_LIBRARIES - Libraries needed to use LIBNUMA + +find_path(LIBNUMA_INCLUDE_DIR numa.h PATHS "${KOKKOS_LIBNUMA_DIR}/include" ${libnuma_ROOT}/include ${LIBNUMA_ROOT}/include) +find_library(LIBNUMA_LIBRARIES numa PATHS "${KOKKOS_LIBNUMA_DIR}/lib" ${libnuma_ROOT}/lib ${LIBNUMA_ROOT}/lib) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(LIBNUMA DEFAULT_MSG + LIBNUMA_INCLUDE_DIR LIBNUMA_LIBRARIES) + +add_library(libnuma UNKNOWN IMPORTED) + +#See note in kokkos_tribits.cmake about why they are not included +#INTERFACE_INCLUDE_DIRECTORIES "${LIBNUMA_INCLUDE_DIR}" +set_target_properties(libnuma PROPERTIES + INTERFACE_COMPILE_FEATURES "" + INTERFACE_COMPILE_OPTIONS "" + IMPORTED_LOCATION "${LIBNUMA_LIBRARIES}" +) + +mark_as_advanced(LIBNUMA_INCLUDE_DIR LIBNUMA_LIBRARIES) + diff --git a/cmake/Modules/FindMemkind.cmake b/cmake/Modules/FindMEMKIND.cmake similarity index 100% rename from cmake/Modules/FindMemkind.cmake rename to cmake/Modules/FindMEMKIND.cmake diff --git a/cmake/kokkos_enable_options.cmake b/cmake/kokkos_enable_options.cmake index 9b310f5b769..f82eb9f70d6 100644 --- a/cmake/kokkos_enable_options.cmake +++ b/cmake/kokkos_enable_options.cmake @@ -19,6 +19,7 @@ KOKKOS_ENABLE_OPTION(Pthread OFF "Whether to build Pthread backend") KOKKOS_ENABLE_OPTION(Cuda OFF "Whether to build CUDA backend") KOKKOS_ENABLE_OPTION(ROCm OFF "Whether to build AMD ROCm backend") KOKKOS_ENABLE_OPTION(HWLOC OFF "Whether to enable HWLOC features - may also require -DHWLOC_DIR") +KOKKOS_ENABLE_OPTION(LIBNUMA OFF "Whether to enable LIBNuMA features - may also require -DLIBNUMA_DIR") KOKKOS_ENABLE_OPTION(MEMKIND OFF "Whether to enable MEMKIND featuers - may also require -DMEMKIND_DIR") KOKKOS_ENABLE_OPTION(LIBRT OFF "Whether to enable LIBRT features") KOKKOS_ENABLE_OPTION(Cuda_Relocatable_Device_Code OFF "Whether to enable relocatable device code (RDC) for CUDA") @@ -37,6 +38,21 @@ KOKKOS_ENABLE_OPTION(Explicit_Instantiation OFF "Whether to explicitly instantiate certain types to lower future compile times") SET(KOKKOS_ENABLE_ETI ${KOKKOS_ENABLE_EXPLICIT_INSTANTIATION} CACHE INTERNAL "eti") +IF (KOKKOS_DEVICES MATCHES ",") + MESSAGE(WARNING "-- Detected a comma in: KOKKOS_DEVICES=`${KOKKOS_DEVICES}`") + MESSAGE("-- Although we prefer KOKKOS_DEVICES to be semicolon-delimited, we do allow") + MESSAGE("-- comma-delimited values for compatibility with scripts (see github.com/trilinos/Trilinos/issues/2330)") + STRING(REPLACE "," ";" KOKKOS_DEVICES "${KOKKOS_DEVICES}") + MESSAGE("-- Commas were changed to semicolons, now KOKKOS_DEVICES=`${KOKKOS_DEVICES}`") +ENDIF() + +FOREACH(DEV ${KOKKOS_DEVICES}) +STRING(TOUPPER ${DEV} UC_NAME) +SET(ENABLE_NAME KOKKOS_ENABLE_${UC_NAME}) +MESSAGE(STATUS "Setting ${ENABLE_NAME}=ON from KOKKOS_DEVICES") +SET(${ENABLE_NAME} ON CACHE BOOL "Enable device ${DEV}" FORCE) +ENDFOREACH() + IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_QTHREAD) SET(QTHR_DEFAULT ON) ELSE() @@ -97,6 +113,7 @@ SET(KOKKOS_HPX_DIR "" CACHE PATH "Location of HPX library.") SET(KOKKOS_SEPARATE_TESTS OFF CACHE BOOL "Provide unit test targets with finer granularity.") SET(KOKKOS_HWLOC_DIR "" CACHE PATH "Location of hwloc library. (kokkos tpl)") +SET(KOKKOS_LIBNUMA_DIR "" CACHE PATH "Location of libnuma library. (kokkos tpl)") SET(KOKKOS_MEMKIND_DIR "" CACHE PATH "Location of memkind library. (kokkos tpl)") SET(KOKKOS_CUDA_DIR "" CACHE PATH "Location of CUDA library. Defaults to where nvcc installed.") diff --git a/cmake/kokkos_tpls.cmake b/cmake/kokkos_tpls.cmake index 7d0303f594c..fd90cfa3bd1 100644 --- a/cmake/kokkos_tpls.cmake +++ b/cmake/kokkos_tpls.cmake @@ -10,6 +10,13 @@ IF (KOKKOS_ENABLE_HWLOC) ENDIF() ENDIF() +IF (KOKKOS_ENABLE_LIBNUMA) + FIND_PACKAGE(LIBNUMA REQUIRED MODULE) + IF (NOT LIBNUMA_FOUND) + MESSAGE(FATAL_ERROR "Unable to locate libnuma ${KOKKOS_LIBNUMA_DIR}") + ENDIF() +ENDIF() + IF (KOKKOS_ENABLE_LIBRT) FIND_PACKAGE(LIBRT REQUIRED MODULE) IF (NOT LIBRT_FOUND) diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index c5847c670a3..a363845fbc9 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -281,10 +281,17 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) ) ENDIF() + LIST(LENGTH KOKKOS_XCOMPILER_OPTIONS XOPT_LENGTH) + IF (XOPT_LENGTH GREATER 1) + MESSAGE(FATAL_ERROR "CMake deduplication does not allow multiple -Xcompiler flags (${KOKKOS_XCOMPILER_OPTIONS}): will require Kokkos to upgrade to minimum 3.12") + ENDIF() IF(KOKKOS_XCOMPILER_OPTIONS) SET(NODEDUP_XCOMPILER_OPTIONS) FOREACH(OPT ${KOKKOS_XCOMPILER_OPTIONS}) - LIST(APPEND NODEDUP_XCOMPILER_OPTIONS "-Xcompiler ${OPT}") + #I have to do this for now because we can't guarantee 3.12 support + #I really should do this with the shell option + LIST(APPEND NODEDUP_XCOMPILER_OPTIONS -Xcompiler) + LIST(APPEND NODEDUP_XCOMPILER_OPTIONS ${OPT}) ENDFOREACH() TARGET_COMPILE_OPTIONS( ${LIBRARY_NAME} @@ -317,6 +324,15 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) #what I don't want is the headers to be propagated downstream TARGET_INCLUDE_DIRECTORIES(${LIBRARY_NAME} PRIVATE ${HWLOC_INCLUDE_DIR}) ENDIF() + + IF (KOKKOS_ENABLE_LIBNUMA) + #this is really annoying that I have to do this + #if CMake links statically, it will not link in hwloc which causes undefined refs downstream + #even though hwloc is really "private" and doesn't need to be public I have to link publicly + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC libnuma) + #what I don't want is the headers to be propagated downstream + TARGET_INCLUDE_DIRECTORIES(${LIBRARY_NAME} PRIVATE ${LIBNUMA_INCLUDE_DIR}) + ENDIF() IF (KOKKOS_ENABLE_LIBRT) #this is really annoying that I have to do this From 19e6e3229f5faeac09ade3cfb39897e2b3fdac1e Mon Sep 17 00:00:00 2001 From: jjwilke Date: Tue, 25 Jun 2019 12:57:47 -0700 Subject: [PATCH 023/530] recreate imported targets for TPLs --- cmake/KokkosConfig.cmake.in | 37 +++++++++++++++++++++++++++++++++ cmake/Modules/FindHWLOC.cmake | 4 ++-- cmake/Modules/FindLIBDL.cmake | 4 ++-- cmake/Modules/FindLIBNUMA.cmake | 4 ++-- cmake/Modules/FindLIBRT.cmake | 4 ++-- cmake/Modules/FindMEMKIND.cmake | 4 ++-- cmake/kokkos_install.cmake | 1 + cmake/kokkos_tribits.cmake | 10 ++++----- 8 files changed, 53 insertions(+), 15 deletions(-) diff --git a/cmake/KokkosConfig.cmake.in b/cmake/KokkosConfig.cmake.in index c709ad2ec81..c6c2474f63d 100644 --- a/cmake/KokkosConfig.cmake.in +++ b/cmake/KokkosConfig.cmake.in @@ -47,3 +47,40 @@ IF (KOKKOS_ENABLE_HPX) FIND_DEPENDENCY(HPX) ENDIF() +SET(HWLOC_LIBRARIES @HWLOC_LIBRARIES@) +SET(KOKKOS_ENABLE_HWLOC @KOKKOS_ENABLE_HWLOC@) +IF(KOKKOS_ENABLE_HWLOC) + ADD_LIBRARY(Kokkos::hwloc UNKNOWN IMPORTED) + SET_TARGET_PROPERTIES(Kokkos::hwloc PROPERTIES + IMPORTED_LOCATION "${HWLOC_LIBRARIES}" + ) +ENDIF() + +SET(MEMKIND_LIBRARIES @MEMKIND_LIBRARIES@) +SET(MEMKIND_INCLUDE_DIR @MEMKIND_INCLUDE_DIR@) +SET(KOKKOS_ENABLE_MEMKIND @KOKKOS_ENABLE_MEMKIND@) +IF(KOKKOS_ENABLE_MEMKIND) + ADD_LIBRARY(Kokkos::memkind UNKNOWN IMPORTED) + SET_TARGET_PROPERTIES(Kokkos::memkind PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${MEMKIND_INCLUDE_DIR}" + IMPORTED_LOCATION "${MEMKIND_LIBRARIES}" + ) +ENDIF() + +SET(LIBDL_LIBRARIES @LIBDL_LIBRARIES@) +SET(LIBDL_INCLUDE_DIR @LIBDL_INCLUDE_DIR@) +#no if, always need -ldl +ADD_LIBRARY(Kokkos::libdl UNKNOWN IMPORTED) +SET_TARGET_PROPERTIES(Kokkos::libdl PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${LIBDL_INCLUDE_DIR}" + IMPORTED_LOCATION "${LIBDL_LIBRARIES}" +) + +SET(LIBRT_LIBRARIES @LIBRT_LIBRARIES@) +SET(KOKKOS_ENABLE_LIBRT @KOKKOS_ENABLE_LIBRT@) +IF(KOKKOS_ENABLE_LIBRT) + ADD_LIBRARY(Kokkos::librt UNKNOWN IMPORTED) + SET_TARGET_PROPERTIES(Kokkos::librt PROPERTIES + IMPORTED_LOCATION "${LIBRT_LIBRARIES}" + ) +ENDIF() diff --git a/cmake/Modules/FindHWLOC.cmake b/cmake/Modules/FindHWLOC.cmake index 209ef2a807c..b541d983066 100644 --- a/cmake/Modules/FindHWLOC.cmake +++ b/cmake/Modules/FindHWLOC.cmake @@ -17,11 +17,11 @@ include(FindPackageHandleStandardArgs) find_package_handle_standard_args(HWLOC DEFAULT_MSG HWLOC_INCLUDE_DIR HWLOC_LIBRARIES) -add_library(hwloc UNKNOWN IMPORTED) +add_library(Kokkos::hwloc UNKNOWN IMPORTED) #See note in kokkos_tribits.cmake about why they are not included #INTERFACE_INCLUDE_DIRECTORIES "${HWLOC_INCLUDE_DIR}" -set_target_properties(hwloc PROPERTIES +set_target_properties(Kokkos::hwloc PROPERTIES INTERFACE_COMPILE_FEATURES "" INTERFACE_COMPILE_OPTIONS "" IMPORTED_LOCATION "${HWLOC_LIBRARIES}" diff --git a/cmake/Modules/FindLIBDL.cmake b/cmake/Modules/FindLIBDL.cmake index 93a4028c5ef..93e4f972aa2 100644 --- a/cmake/Modules/FindLIBDL.cmake +++ b/cmake/Modules/FindLIBDL.cmake @@ -6,8 +6,8 @@ include(FindPackageHandleStandardArgs) find_package_handle_standard_args(LIBDL DEFAULT_MSG LIBDL_LIBRARIES LIBDL_INCLUDE_DIRS) mark_as_advanced(LIBDL_INCLUDE_DIRS LIBDL_LIBRARIES) -add_library(libdl UNKNOWN IMPORTED) -set_target_properties(libdl PROPERTIES +add_library(Kokkos::libdl UNKNOWN IMPORTED) +set_target_properties(Kokkos::libdl PROPERTIES IMPORTED_LOCATION "${LIBDL_LIBRARIES}" INTERFACE_INCLUDE_DIRECTORIES "${LIBDL_INCLUDE_DIRS}") diff --git a/cmake/Modules/FindLIBNUMA.cmake b/cmake/Modules/FindLIBNUMA.cmake index 357662730b2..42dadfbfc3e 100644 --- a/cmake/Modules/FindLIBNUMA.cmake +++ b/cmake/Modules/FindLIBNUMA.cmake @@ -17,11 +17,11 @@ include(FindPackageHandleStandardArgs) find_package_handle_standard_args(LIBNUMA DEFAULT_MSG LIBNUMA_INCLUDE_DIR LIBNUMA_LIBRARIES) -add_library(libnuma UNKNOWN IMPORTED) +add_library(Kokkos::libnuma UNKNOWN IMPORTED) #See note in kokkos_tribits.cmake about why they are not included #INTERFACE_INCLUDE_DIRECTORIES "${LIBNUMA_INCLUDE_DIR}" -set_target_properties(libnuma PROPERTIES +set_target_properties(Kokkos::libnuma PROPERTIES INTERFACE_COMPILE_FEATURES "" INTERFACE_COMPILE_OPTIONS "" IMPORTED_LOCATION "${LIBNUMA_LIBRARIES}" diff --git a/cmake/Modules/FindLIBRT.cmake b/cmake/Modules/FindLIBRT.cmake index 520cfbb067e..919d3c69f46 100644 --- a/cmake/Modules/FindLIBRT.cmake +++ b/cmake/Modules/FindLIBRT.cmake @@ -10,8 +10,8 @@ include(FindPackageHandleStandardArgs) find_package_handle_standard_args(LIBRT DEFAULT_MSG LIBRT_LIBRARIES LIBRT_INCLUDE_DIRS) mark_as_advanced(LIBRT_INCLUDE_DIRS LIBRT_LIBRARIES) -add_library(librt UNKNOWN IMPORTED) -set_target_properties(librt PROPERTIES +add_library(Kokkos::librt UNKNOWN IMPORTED) +set_target_properties(Kokkos::librt PROPERTIES IMPORTED_LOCATION "${LIBRT_LIBRARIES}" INTERFACE_INCLUDE_DIRECTORIES "${LIBRT_INCLUDE_DIRS}") diff --git a/cmake/Modules/FindMEMKIND.cmake b/cmake/Modules/FindMEMKIND.cmake index 0daadc65fa5..adc5618e0ed 100644 --- a/cmake/Modules/FindMEMKIND.cmake +++ b/cmake/Modules/FindMEMKIND.cmake @@ -17,9 +17,9 @@ include(FindPackageHandleStandardArgs) find_package_handle_standard_args(Memkind DEFAULT_MSG MEMKIND_INCLUDE_DIR MEMKIND_LIBRARIES) -add_library(memkind UNKNOWN IMPORTED) +add_library(Kokkos::memkind UNKNOWN IMPORTED) -set_target_properties(memkind PROPERTIES +set_target_properties(Kokkos::memkind PROPERTIES INTERFACE_COMPILE_FEATURES "" INTERFACE_COMPILE_OPTIONS "" INTERFACE_INCLUDE_DIRECTORIES "${MEMKIND_INCLUDE_DIR}" diff --git a/cmake/kokkos_install.cmake b/cmake/kokkos_install.cmake index 49e5230aa8e..10d70aea981 100644 --- a/cmake/kokkos_install.cmake +++ b/cmake/kokkos_install.cmake @@ -15,6 +15,7 @@ ELSE() LIST(APPEND DEF_INSTALL_CMAKE_DIR lib/CMake/Kokkos) #also add the totally normal place for it to be LIST(APPEND DEF_INSTALL_CMAKE_DIR lib/cmake) + LIST(APPEND DEF_INSTALL_CMAKE_DIR lib/cmake/Kokkos) ENDIF() SET(INSTALL_CMAKE_DIR ${DEF_INSTALL_CMAKE_DIR} CACHE PATH "Installation directory for CMake files") diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index a363845fbc9..1e67f749c96 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -320,7 +320,7 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) #this is really annoying that I have to do this #if CMake links statically, it will not link in hwloc which causes undefined refs downstream #even though hwloc is really "private" and doesn't need to be public I have to link publicly - TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC hwloc) + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC Kokkos::hwloc) #what I don't want is the headers to be propagated downstream TARGET_INCLUDE_DIRECTORIES(${LIBRARY_NAME} PRIVATE ${HWLOC_INCLUDE_DIR}) ENDIF() @@ -329,7 +329,7 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) #this is really annoying that I have to do this #if CMake links statically, it will not link in hwloc which causes undefined refs downstream #even though hwloc is really "private" and doesn't need to be public I have to link publicly - TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC libnuma) + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC Kokkos::libnuma) #what I don't want is the headers to be propagated downstream TARGET_INCLUDE_DIRECTORIES(${LIBRARY_NAME} PRIVATE ${LIBNUMA_INCLUDE_DIR}) ENDIF() @@ -338,13 +338,13 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) #this is really annoying that I have to do this #if CMake links statically, it will not link in librt which causes undefined refs downstream #even though librt is really "private" and doesn't need to be public I have to link publicly - TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE librt) + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE Kokkos::librt) #what I don't want is the headers to be propagated downstream TARGET_INCLUDE_DIRECTORIES(${LIBRARY_NAME} PRIVATE ${LIBRT_INCLUDE_DIR}) ENDIF() IF (KOKKOS_ENABLE_MEMKIND) - TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE memkind) + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE Kokkos::memkind) ENDIF() IF (KOKKOS_CXX_STANDARD_FEATURE) @@ -356,7 +356,7 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) ENDIF() #dlfcn.h is in header files and needs to propagate - TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC libdl) + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC Kokkos::libdl) #Even if separate libs and these are object libraries #We still need to install them for transitive flags and deps From eb380befc3c9587e79476cc394296736816aed3f Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Wed, 26 Jun 2019 13:33:22 -0600 Subject: [PATCH 024/530] Update Trilinos integration scripts and docs --- doc/kokkos-promotion.txt | 40 ++++++------ .../configure-atdm-cuda-depoff-rdc.sh | 63 ------------------- .../configure-atdm-cuda-depon-rdc-dbg.sh | 59 ----------------- .../configure-atdm-cuda-ride-rdc-depoff.sh | 35 +++++++++++ .../configure-atdm-cuda-ride-rdc-depon.sh | 35 +++++++++++ .../blake_jenkins_run_script_pthread_intel | 2 +- .../blake_jenkins_run_script_serial_intel | 2 +- .../white_run_jenkins_script_omp | 2 +- 8 files changed, 94 insertions(+), 144 deletions(-) delete mode 100755 scripts/trilinos-integration/ATDM_configurations/configure-atdm-cuda-depoff-rdc.sh delete mode 100755 scripts/trilinos-integration/ATDM_configurations/configure-atdm-cuda-depon-rdc-dbg.sh create mode 100755 scripts/trilinos-integration/ATDM_configurations/configure-atdm-cuda-ride-rdc-depoff.sh create mode 100755 scripts/trilinos-integration/ATDM_configurations/configure-atdm-cuda-ride-rdc-depon.sh diff --git a/doc/kokkos-promotion.txt b/doc/kokkos-promotion.txt index 3ed8aa0040c..e36a6c98e28 100644 --- a/doc/kokkos-promotion.txt +++ b/doc/kokkos-promotion.txt @@ -26,6 +26,7 @@ supported compilers. Those machines are: white bowman waterman + ride 1.1. Clone kokkos develop branch (or just switch to it) @@ -74,16 +75,16 @@ Step 2: a) serial, openmp, and cuda via the testing scripts in kokkos-kernels/scripts/trilinos-integration (automates the process) b) various ATDM-supported builds via Trilinos configuration scripts located in kokkos{-kernels}/scripts/trilinos-integration/ATDM_configurations (not yet automated) - Run scripts for white (openmp and cuda) and blake (seral) that are provided in kokkos{-kernels}/scripts/trilinos-integration. + Run scripts for automated testing on white (openmp and cuda) and blake (seral) that are provided in kokkos{-kernels}/scripts/trilinos-integration. These scripts load their own modules/environment, so don't require preparation. You can run all four at the same time, use separate directories for each. mkdir serial cd serial nohup KOKKOSKERNELS_PATH/scripts/trilinos-integration/blake_jenkins_run_script_serial_intel & - Use scripts to configure Trilinos for waterman (cuda, cuda-debug, cuda-rdc) that are provided in kokkos-kernels/scripts/trilinos-integration/ATDM_configurations. + Use scripts to configure Trilinos for waterman (cuda, cuda-debug) and ride (cuda-rdc) that are provided in kokkos-kernels/scripts/trilinos-integration/ATDM_configurations. - These scripts load their own modules/environment, so don't require preparation of the system environment. You can run all them all at the same time, use separate directories for each. Instructions for compute node allocation, building, and testing are included in the scripts. + These scripts load their own modules/environment, so don't require preparation of the system environment. You can run them all at the same time, just use separate directories for each. Instructions for compute node allocation, building, and testing are included in the scripts. The Trilinos configuration scripts include an override of the kokkos and kokkos-kernels packages; this requires that a symbolic link for each be created in the Trilinos base directory: @@ -109,12 +110,12 @@ Step 4: This step should be run on kokkos-dev 4.1. If you don't have a GitHub token already, generate one for yourself (this will give you TOKEN): - https://github.com/settings/tokens + https://github.com/settings/tokens 4.2. Get a clean copy of the kokkos and kokkos-kernels develop branches - git clone -b develop git@github.com:kokkos/kokkos.git - git clone -b develop git@github.com:kokkos/kokkos-kernels.git + git clone -b develop git@github.com:kokkos/kokkos.git + git clone -b develop git@github.com:kokkos/kokkos-kernels.git 4.3. If you haven't already, install Ruby and the "github_changelog_generator" "gem" The github_changelog_generator is here: https://github.com/skywinder/github-changelog-generator @@ -123,16 +124,17 @@ Step 4: This step should be run on kokkos-dev Follow the usual configure,make,make install process: https://www.ruby-lang.org/en/documentation/installation/#building-from-source Note that you will likely have to install to a non-default location with "./configure --prefix=/path" - 4.4. Generate the initial changelog. Use the most recent tag as OLDTAG (`git tag -l` can show you all tags). The NEWTAG is the new version number, e.g. "2.04.00". + 4.4. Generate the initial changelog(s). Use the most recent tag as OLDTAG (`git tag -l` can show you all tags). The NEWTAG is the new version number, e.g. "2.04.00". + RUN THIS OUTSIDE THE KOKKOS SOURCE TREE! NOTE: You likely need to set an HTTPS proxy in order for this script to work: - export https_proxy=http://wwwproxy.sandia.gov:80 + export https_proxy=http://wwwproxy.sandia.gov:80 - github_changelog_generator kokkos/kokkos --token TOKEN --no-pull-requests --include-labels 'InDevelop' --exclude-labels 'question,DevelopOnly' --enhancement-labels 'enhancement,Feature Request' --future-release 'NEWTAG' --between-tags 'NEWTAG,OLDTAG' + github_changelog_generator kokkos/kokkos --token TOKEN --no-pull-requests --include-labels 'InDevelop' --exclude-labels 'question,DevelopOnly' --enhancement-labels 'enhancement,Feature Request' --future-release 'NEWTAG' --between-tags 'NEWTAG,OLDTAG' - github_changelog_generator kokkos/kokkos-kernels --token TOKEN --no-pull-requests --include-labels 'InDevelop' --exclude-labels 'question,DevelopOnly' --enhancement-labels 'enhancement,Feature Request' --future-release 'NEWTAG' --between-tags 'NEWTAG,OLDTAG' + github_changelog_generator kokkos/kokkos-kernels --token TOKEN --no-pull-requests --include-labels 'InDevelop' --exclude-labels 'question,DevelopOnly' --enhancement-labels 'enhancement,Feature Request' --future-release 'NEWTAG' --between-tags 'NEWTAG,OLDTAG' 4.5. Manually cleanup and commit the change log. (Copy the new section from the generated CHANGELOG.md to the corresponding KOKKOS_PATH/CHANGELOG.md or KOKKOSKERNELS_PATH/CHANGELOG.md) @@ -205,16 +207,16 @@ Step 5: This step can be done on any SEMS machine (e.g. kokkos-dev). 5.1. Clone the Trilinos corresponding branch (or just switch to it) - git clone -b kokkos-promotion git@github.com:trilinos/Trilinos.git - TRILINOS_PATH=$PWD/Trilinos + git clone -b kokkos-promotion git@github.com:trilinos/Trilinos.git + TRILINOS_PATH=$PWD/Trilinos 5.2. Snapshot Kokkos into Trilinos - this requires python/2.7.9 and that both Trilinos and Kokkos be clean - no untracked or modified files. Run the following outside of the Kokkos and Trilinos source trees. * Use the master branch of Kokkos for this. - module load sems-python/2.7.9 - python $KOKKOS_PATH/scripts/snapshot.py $KOKKOS_PATH $TRILINOS_PATH/packages - python $KOKKOS_PATH/scripts/snapshot.py $KOKKOSKERNELS_PATH $TRILINOS_PATH/packages + module load sems-python/2.7.9 + python $KOKKOS_PATH/scripts/snapshot.py $KOKKOS_PATH $TRILINOS_PATH/packages + python $KOKKOS_PATH/scripts/snapshot.py $KOKKOSKERNELS_PATH $TRILINOS_PATH/packages If snapshotting kokkos-kernels, use the snapshot.py in kokkos. @@ -244,21 +246,21 @@ Step 5: This step can be done on any SEMS machine (e.g. kokkos-dev). Step 6: Push Kokkos + KokkosKernels master and develop branches to respective GitHub repos (requires Owner permission). 6.1. Master branch: - cd KOKKOS_PATH + cd $KOKKOS_PATH git checkout master git push --follow-tags origin master - cd KOKKOSKERNELS_PATH + cd $KOKKOSKERNELS_PATH git checkout master git push --follow-tags origin master 6.2. Develop branch: First merge (--no-ff) master back into develop - cd KOKKOS_PATH + cd $KOKKOS_PATH git checkout develop git merge --no-ff master git push origin develop - cd KOKKOSKERNELS_PATH + cd $KOKKOSKERNELS_PATH git checkout develop git merge --no-ff master git push origin develop diff --git a/scripts/trilinos-integration/ATDM_configurations/configure-atdm-cuda-depoff-rdc.sh b/scripts/trilinos-integration/ATDM_configurations/configure-atdm-cuda-depoff-rdc.sh deleted file mode 100755 index 5e0e107a41b..00000000000 --- a/scripts/trilinos-integration/ATDM_configurations/configure-atdm-cuda-depoff-rdc.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/bash - -echo "SOURCE this script!!" - -#export TRILINOS_DIR=${HOME}/trilinos/Trilinos -export TRILINOS_DIR=${PWD}/../.. - -# Load modules -module purge -source ${TRILINOS_DIR}/cmake/std/atdm/load-env.sh cuda-9.2-rdc-opt -#module swap cmake/3.6.2 cmake/3.12.3 -#source $TRILINOS_DIR/cmake/std/atdm/load-env.sh cuda-9.2-debug-Kepler37 - -# Packages -PACKAGE1=Tpetra -PACKAGE2=Sacado -PACKAGE3=Stokhos -PACKAGE4=MueLu -PACKAGE5=Intrepid2 -PACKAGE6=Ifpack2 -PACKAGE7=Panzer -PACKAGE8=Phalanx -PACKAGE9=Stratimikos -PACKAGE10=Belos - - -rm -rf CMake* - -# Configure -cmake \ - -GNinja \ - -DTrilinos_CONFIGURE_OPTIONS_FILE:STRING=cmake/std/atdm/ATDMDevEnv.cmake \ - -DTrilinos_ENABLE_TESTS=ON \ - -DTrilinos_ENABLE_${PACKAGE1}=ON \ - -DTrilinos_ENABLE_${PACKAGE2}=ON \ - -DTrilinos_ENABLE_${PACKAGE3}=ON \ - -DTrilinos_ENABLE_${PACKAGE4}=ON \ - -DTrilinos_ENABLE_${PACKAGE5}=ON \ - -DTrilinos_ENABLE_${PACKAGE6}=ON \ - -DTrilinos_ENABLE_${PACKAGE7}=ON \ - -DTrilinos_ENABLE_${PACKAGE8}=ON \ - -DTrilinos_ENABLE_${PACKAGE9}=ON \ - -DTrilinos_ENABLE_${PACKAGE10}=ON \ - -DKokkos_ENABLE_Cuda_Relocatable_Device_Code=ON \ - -DKOKKOS_ENABLE_DEPRECATED_CODE=OFF \ - -DKokkos_SOURCE_DIR_OVERRIDE:STRING=kokkos \ - -DKokkosKernels_SOURCE_DIR_OVERRIDE:STRING=kokkos-kernels \ -$TRILINOS_DIR - -# -DTrilinos_ENABLE_TESTS=ON -DTrilinos_ENABLE_${PACKAGE1}=ON \ -# -DKOKKOS_ENABLE_RELOCATABLE_DEVICE_CODE=ON \ - -# Notes: Compile using ninja -# make NP=32 - -# Allocate node: -# bsub -J TestKokkos-DepCodeOn -W 07:00 -Is -n 16 -q rhel7W bash - -# Run tests -# ctest -j16 - -# Submit tests as job -# bsub -x -Is -q rhel7W -n 16 ctest -j16 diff --git a/scripts/trilinos-integration/ATDM_configurations/configure-atdm-cuda-depon-rdc-dbg.sh b/scripts/trilinos-integration/ATDM_configurations/configure-atdm-cuda-depon-rdc-dbg.sh deleted file mode 100755 index d910ea658f6..00000000000 --- a/scripts/trilinos-integration/ATDM_configurations/configure-atdm-cuda-depon-rdc-dbg.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash - -echo "SOURCE this script!!" - -export TRILINOS_DIR=${PWD}/../.. - -# Load modules -module purge -source ${TRILINOS_DIR}/cmake/std/atdm/load-env.sh cuda-9.2-rdc-debug - -# Packages -PACKAGE1=Tpetra -PACKAGE2=Sacado -PACKAGE3=Stokhos -PACKAGE4=MueLu -PACKAGE5=Intrepid2 -PACKAGE6=Ifpack2 -PACKAGE7=Panzer -PACKAGE8=Phalanx -PACKAGE9=Stratimikos -PACKAGE10=Belos - - -rm -rf CMake* - -# Configure -cmake \ - -GNinja \ - -DTrilinos_CONFIGURE_OPTIONS_FILE:STRING=cmake/std/atdm/ATDMDevEnv.cmake \ - -DTrilinos_ENABLE_TESTS=ON \ - -DTrilinos_ENABLE_${PACKAGE1}=ON \ - -DTrilinos_ENABLE_${PACKAGE2}=ON \ - -DTrilinos_ENABLE_${PACKAGE3}=ON \ - -DTrilinos_ENABLE_${PACKAGE4}=ON \ - -DTrilinos_ENABLE_${PACKAGE5}=ON \ - -DTrilinos_ENABLE_${PACKAGE6}=ON \ - -DTrilinos_ENABLE_${PACKAGE7}=ON \ - -DTrilinos_ENABLE_${PACKAGE8}=ON \ - -DTrilinos_ENABLE_${PACKAGE9}=ON \ - -DTrilinos_ENABLE_${PACKAGE10}=ON \ - -DKOKKOS_ENABLE_DEPRECATED_CODE=ON \ - -DKokkos_SOURCE_DIR_OVERRIDE:STRING=kokkos \ - -DKokkosKernels_SOURCE_DIR_OVERRIDE:STRING=kokkos-kernels \ - -DKokkos_ENABLE_Cuda_Relocatable_Device_Code=ON \ -$TRILINOS_DIR - - -# Notes: -# Compile using ninja -# make NP=32 - -# Allocate node: -# bsub -J TestKokkos-DepCodeOn -W 07:00 -Is -n 16 -q rhel7W bash - -# Run tests -# ctest -j8 - -# Submit tests as job -# bsub -x -Is -q rhel7W -n 16 ctest -j8 diff --git a/scripts/trilinos-integration/ATDM_configurations/configure-atdm-cuda-ride-rdc-depoff.sh b/scripts/trilinos-integration/ATDM_configurations/configure-atdm-cuda-ride-rdc-depoff.sh new file mode 100755 index 00000000000..da9017e388d --- /dev/null +++ b/scripts/trilinos-integration/ATDM_configurations/configure-atdm-cuda-ride-rdc-depoff.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +echo "SOURCE this script!!" + +export TRILINOS_DIR=${PWD}/../.. + +# Load modules +module purge +source ${TRILINOS_DIR}/cmake/std/atdm/load-env.sh cuda-9.2-rdc-release-debug-pt + +rm -rf CMake* + +# Configure +cmake \ + -GNinja \ + -DTrilinos_CONFIGURE_OPTIONS_FILE:STRING=cmake/std/atdm/ATDMDevEnv.cmake \ + -DTrilinos_ENABLE_TESTS=ON \ + -DTrilinos_ENABLE_ALL_PACKAGES=ON \ + -DKOKKOS_ENABLE_DEPRECATED_CODE=OFF \ + -DKokkos_SOURCE_DIR_OVERRIDE:STRING=kokkos \ + -DKokkosKernels_SOURCE_DIR_OVERRIDE:STRING=kokkos-kernels \ +$TRILINOS_DIR + +# Notes: +# Compile using ninja +# make NP=32 + +# Allocate node: +# bsub -J TestKokkos-DepCodeOn-rdcpt -W 07:00 -Is -n 16 -q rhel7W bash + +# Run tests +# ctest -j8 + +# Submit tests as job +# bsub -x -Is -q rhel7W -n 16 ctest -j8 diff --git a/scripts/trilinos-integration/ATDM_configurations/configure-atdm-cuda-ride-rdc-depon.sh b/scripts/trilinos-integration/ATDM_configurations/configure-atdm-cuda-ride-rdc-depon.sh new file mode 100755 index 00000000000..01e2def0152 --- /dev/null +++ b/scripts/trilinos-integration/ATDM_configurations/configure-atdm-cuda-ride-rdc-depon.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +echo "SOURCE this script!!" + +export TRILINOS_DIR=${PWD}/../.. + +# Load modules +module purge +source ${TRILINOS_DIR}/cmake/std/atdm/load-env.sh cuda-9.2-rdc-release-debug-pt + +rm -rf CMake* + +# Configure +cmake \ + -GNinja \ + -DTrilinos_CONFIGURE_OPTIONS_FILE:STRING=cmake/std/atdm/ATDMDevEnv.cmake \ + -DTrilinos_ENABLE_TESTS=ON \ + -DTrilinos_ENABLE_ALL_PACKAGES=ON \ + -DKOKKOS_ENABLE_DEPRECATED_CODE=ON \ + -DKokkos_SOURCE_DIR_OVERRIDE:STRING=kokkos \ + -DKokkosKernels_SOURCE_DIR_OVERRIDE:STRING=kokkos-kernels \ +$TRILINOS_DIR + +# Notes: +# Compile using ninja +# make NP=32 + +# Allocate node: +# bsub -J TestKokkos-DepCodeOn-rdcpt -W 07:00 -Is -n 16 -q rhel7W bash + +# Run tests +# ctest -j8 + +# Submit tests as job +# bsub -x -Is -q rhel7W -n 16 ctest -j8 diff --git a/scripts/trilinos-integration/blake_jenkins_run_script_pthread_intel b/scripts/trilinos-integration/blake_jenkins_run_script_pthread_intel index df370509a74..f5aeacdf896 100755 --- a/scripts/trilinos-integration/blake_jenkins_run_script_pthread_intel +++ b/scripts/trilinos-integration/blake_jenkins_run_script_pthread_intel @@ -43,7 +43,7 @@ export JENKINS_DO_SHARED=ON export QUEUE=blake -module load python +module load python/2.7.13 export KOKKOS_PATH=${PWD}/kokkos diff --git a/scripts/trilinos-integration/blake_jenkins_run_script_serial_intel b/scripts/trilinos-integration/blake_jenkins_run_script_serial_intel index 04f1378cec6..a1555f9afb6 100755 --- a/scripts/trilinos-integration/blake_jenkins_run_script_serial_intel +++ b/scripts/trilinos-integration/blake_jenkins_run_script_serial_intel @@ -43,7 +43,7 @@ export JENKINS_DO_SHARED=ON export QUEUE=blake -module load python +module load python/2.7.13 export KOKKOS_PATH=${PWD}/kokkos diff --git a/scripts/trilinos-integration/white_run_jenkins_script_omp b/scripts/trilinos-integration/white_run_jenkins_script_omp index fc7e47bc292..56933f7bfca 100755 --- a/scripts/trilinos-integration/white_run_jenkins_script_omp +++ b/scripts/trilinos-integration/white_run_jenkins_script_omp @@ -40,7 +40,7 @@ export LAPACK_LIBRARIES="${LAPACK_ROOT}/lib/liblapack.a;gfortran;gomp" export JENKINS_DO_TESTS=ON export JENKINS_DO_EXAMPLES=ON -export QUEUE=rhel7F +export QUEUE=rhel7T module load python/2.7.12 From 9095de2666d51deb16ad82a292bf815afa79dce8 Mon Sep 17 00:00:00 2001 From: Jeffery Scott Miles Date: Mon, 1 Jul 2019 11:12:04 -0600 Subject: [PATCH 025/530] Update from kokkos/nvcc_wrapper develop (includes kokkos/nvcc_wrapper#30) -- add -Xnvlink option --- bin/nvcc_wrapper | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/bin/nvcc_wrapper b/bin/nvcc_wrapper index 94bc72854e7..cb3212f2b04 100755 --- a/bin/nvcc_wrapper +++ b/bin/nvcc_wrapper @@ -133,7 +133,11 @@ do if [ $optimization_applied -eq 1 ]; then echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting." else - shared_args="$shared_args $1" + if [ "$1" = "-O" ]; then + shared_args="$shared_args -O2" + else + shared_args="$shared_args $1" + fi optimization_applied=1 fi ;; @@ -179,7 +183,7 @@ do cuda_args="$cuda_args $1" ;; #Handle known nvcc args that have an argument - -rdc|-maxrregcount|--default-stream) + -rdc|-maxrregcount|--default-stream|-Xnvlink) cuda_args="$cuda_args $1 $2" shift ;; @@ -308,16 +312,6 @@ do shift done -#Check if nvcc exists -if [ $host_only -ne 1 ]; then - var=$(which nvcc ) - if [ $? -gt 0 ]; then - echo "Could not find nvcc in PATH" - exit $? - fi -fi - - # Only print host compiler version if [ $get_host_version -eq 1 ]; then $host_compiler --version From 1510c4c905c0734019474701d2b4d00c0bf3e44b Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Mon, 1 Jul 2019 12:13:20 -0600 Subject: [PATCH 026/530] testscatterview: Reduce problem size for serial runs Address issue #2189 --- containers/unit_tests/TestScatterView.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/containers/unit_tests/TestScatterView.hpp b/containers/unit_tests/TestScatterView.hpp index a9d97b32f39..54ec692639d 100644 --- a/containers/unit_tests/TestScatterView.hpp +++ b/containers/unit_tests/TestScatterView.hpp @@ -492,7 +492,8 @@ TEST_F( TEST_CATEGORY, scatterview) { #ifdef KOKKOS_ENABLE_DEBUG int big_n = 100 * 1000; #else - int big_n = 10 * 1000 * 1000; + bool is_serial = std::is_same::value; + int big_n = is_serial ? 100 * 1000 : 10000 * 1000; #endif test_scatter_view(big_n); test_scatter_view(big_n); From 43ed175edd22fdcc34cd5a8185101fc5288f8121 Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Mon, 1 Jul 2019 12:29:50 -0600 Subject: [PATCH 027/530] testscatterview: Add ifguard for KOKKOS_ENABLE_SERIAL --- containers/unit_tests/TestScatterView.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/containers/unit_tests/TestScatterView.hpp b/containers/unit_tests/TestScatterView.hpp index 54ec692639d..206cd9747f3 100644 --- a/containers/unit_tests/TestScatterView.hpp +++ b/containers/unit_tests/TestScatterView.hpp @@ -492,8 +492,14 @@ TEST_F( TEST_CATEGORY, scatterview) { #ifdef KOKKOS_ENABLE_DEBUG int big_n = 100 * 1000; #else + +#ifdef KOKKOS_ENABLE_SERIAL bool is_serial = std::is_same::value; int big_n = is_serial ? 100 * 1000 : 10000 * 1000; +#else + int big_n = 10000 * 1000; +#endif + #endif test_scatter_view(big_n); test_scatter_view(big_n); From 81a3deeeabab9a98b0e9359c6d949fb0e92f8afd Mon Sep 17 00:00:00 2001 From: jjwilke Date: Wed, 3 Jul 2019 13:13:00 -0700 Subject: [PATCH 028/530] fixes for object libs for < 3.12 : #2201 --- CMakeLists.txt | 5 +- cmake/KokkosConfig.cmake.in | 8 +- cmake/fake_tribits.cmake | 19 +++- cmake/kokkos_tribits.cmake | 174 ++++++++++++++++++++++++------------ 4 files changed, 142 insertions(+), 64 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c319c0353ff..257acdff385 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,8 +84,9 @@ KOKKOS_EXCLUDE_AUTOTOOLS_FILES() KOKKOS_PACKAGE_POSTPROCESS() IF (NOT KOKKOS_HAS_TRILINOS) - #just always do it - no more separate libs stuff - KOKKOS_MAKE_LIBKOKKOS() + IF (NOT KOKKOS_SEPARATE_LIBS) + KOKKOS_MAKE_LIBKOKKOS() + ENDIF() include(${KOKKOS_SRC_PATH}/cmake/kokkos_install.cmake) ENDIF() diff --git a/cmake/KokkosConfig.cmake.in b/cmake/KokkosConfig.cmake.in index c6c2474f63d..8d80d6b8830 100644 --- a/cmake/KokkosConfig.cmake.in +++ b/cmake/KokkosConfig.cmake.in @@ -25,7 +25,13 @@ FUNCTION(TARGET_LINK_KOKKOS TARGET) "" ${ARGN} ) - TARGET_LINK_LIBRARIES(${TARGET} ${ARGN} Kokkos::kokkos) + IF (Kokkos_SEPARATE_LIBS) + TARGET_LINK_LIBRARIES(${TARGET} ${ARGN} Kokkos::kokkoscore) + TARGET_LINK_LIBRARIES(${TARGET} ${ARGN} Kokkos::kokkoscontainers) + TARGET_LINK_LIBRARIES(${TARGET} ${ARGN} Kokkos::kokkosalgorithms) + ELSE() + TARGET_LINK_LIBRARIES(${TARGET} ${ARGN} Kokkos::kokkos) + ENDIF() ENDFUNCTION(TARGET_LINK_KOKKOS) #Find dependencies diff --git a/cmake/fake_tribits.cmake b/cmake/fake_tribits.cmake index bcb716953ad..faa3679bc08 100644 --- a/cmake/fake_tribits.cmake +++ b/cmake/fake_tribits.cmake @@ -231,7 +231,7 @@ ENDFUNCTION(KOKKOS_TARGET_INCLUDE_DIRECTORIES TARGET) FUNCTION(KOKKOS_LINK_INTERNAL_LIBRARY TARGET DEPLIB) IF(KOKKOS_HAS_TRILINOS) #do nothing -ELSE(KOKKOS_SEPARATE_LIBS) +ELSE() SET(options INTERFACE) SET(oneValueArgs) SET(multiValueArgs) @@ -246,7 +246,22 @@ ELSE(KOKKOS_SEPARATE_LIBS) ELSE() SET(LINK_TYPE PUBLIC) ENDIF() - TARGET_LINK_LIBRARIES(${TARGET} ${LINK_TYPE} ${DEPLIB}) + IF (KOKKOS_SEPARATE_LIBS) + #these are real libraries (not object libraries) + TARGET_LINK_LIBRARIES(${TARGET} ${LINK_TYPE} ${DEPLIB}) + ELSE() + IF(${CMAKE_VERSION} VERSION_LESS "3.12" OR MSVC) + #we are not able to set properties directly on object libraries yet + #so we have to propagate properties in a super annoying way + GET_TARGET_PROPERTY(DIRS ${DEPLIB} INTERFACE_INCLUDE_DIRECTORIES) + GET_TARGET_PROPERTY(FLAGS ${DEPLIB} INTERFACE_COMPILE_OPTIONS) + TARGET_INCLUDE_DIRECTORIES(${TARGET} ${LINK_TYPE} ${DIRS}) + TARGET_COMPILE_OPTIONS(${TARGET} ${LINK_TYPE} ${FLAGS}) + ELSE() + #we can do this the right way + TARGET_LINK_LIBRARIES(${TARGET} ${LINK_TYPE} ${DEPLIB}) + ENDIF() + ENDIF() VERIFY_EMPTY(KOKKOS_LINK_INTERNAL_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) ENDIF() ENDFUNCTION(KOKKOS_LINK_INTERNAL_LIBRARY) diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index 1e67f749c96..62cbc46e4e1 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -215,12 +215,86 @@ MACRO(KOKKOS_PACKAGE_POSTPROCESS) ENDMACRO(KOKKOS_PACKAGE_POSTPROCESS) MACRO(KOKKOS_MAKE_LIBKOKKOS) - ADD_LIBRARY(kokkos ${KOKKOS_SOURCE_DIR}/core/src/dummy.cpp) - TARGET_LINK_LIBRARIES(kokkos PUBLIC kokkoscore kokkoscontainers) - TARGET_LINK_LIBRARIES(kokkos PUBLIC kokkosalgorithms) + IF (KOKKOS_SEPARATE_LIBS) + MESSAGE(FATAL_ERROR "Internal error: should not make single libkokkos with -DKOKKOS_SEPARATE_LIBS=On") + ENDIF() + IF(${CMAKE_VERSION} VERSION_LESS "3.12" OR MSVC) + #we are not able to set properties directly on object libraries yet + #so we have had to delay kokkos_link_options until here + #and do a bunch of other annoying work + ADD_LIBRARY(kokkos ${KOKKOS_SOURCE_DIR}/core/src/dummy.cpp + $ + $ + ) + TARGET_LINK_LIBRARIES(kokkos PUBLIC ${KOKKOS_LINK_OPTIONS}) + #still need the header-only library + TARGET_LINK_LIBRARIES(kokkos PUBLIC kokkosalgorithms) + KOKKOS_LINK_TPLS(kokkos) + #these properties do not work transitively correctly so we + #need some verbose hackery to make it work + GET_TARGET_PROPERTY(CORE_DIRS kokkoscore INTERFACE_INCLUDE_DIRECTORIES) + GET_TARGET_PROPERTY(CTRS_DIRS kokkoscontainers INTERFACE_INCLUDE_DIRECTORIES) + TARGET_INCLUDE_DIRECTORIES(kokkos PUBLIC ${CORE_DIRS}) + TARGET_INCLUDE_DIRECTORIES(kokkos PUBLIC ${CTRS_DIRS}) + + GET_TARGET_PROPERTY(CORE_FLAGS kokkoscore INTERFACE_COMPILE_OPTIONS) + GET_TARGET_PROPERTY(CTRS_FLAGS kokkoscontainers INTERFACE_COMPILE_OPTIONS) + TARGET_COMPILE_OPTIONS(kokkos PUBLIC ${CORE_FLAGS}) + TARGET_COMPILE_OPTIONS(kokkos PUBLIC ${CTRS_FLAGS}) + ELSE() + ADD_LIBRARY(kokkos ${KOKKOS_SOURCE_DIR}/core/src/dummy.cpp) + TARGET_LINK_LIBRARIES(kokkos PUBLIC kokkoscore kokkoscontainers) + TARGET_LINK_LIBRARIES(kokkos PUBLIC kokkosalgorithms) + ENDIF() KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(kokkos) ENDMACRO() +FUNCTION(KOKKOS_LINK_TPLS LIBRARY_NAME) + IF (KOKKOS_ENABLE_CUDA) + SET(LIB_cuda "-lcuda") + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC cuda) + ENDIF() + + IF (KOKKOS_ENABLE_HPX) + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC ${HPX_LIBRARIES}) + TARGET_INCLUDE_DIRECTORIES(${LIBRARY_NAME} PUBLIC ${HPX_INCLUDE_DIRS}) + ENDIF() + + IF (KOKKOS_ENABLE_HWLOC) + #this is really annoying that I have to do this + #if CMake links statically, it will not link in hwloc which causes undefined refs downstream + #even though hwloc is really "private" and doesn't need to be public I have to link publicly + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC Kokkos::hwloc) + #what I don't want is the headers to be propagated downstream + TARGET_INCLUDE_DIRECTORIES(${LIBRARY_NAME} PRIVATE ${HWLOC_INCLUDE_DIR}) + ENDIF() + + IF (KOKKOS_ENABLE_LIBNUMA) + #this is really annoying that I have to do this + #if CMake links statically, it will not link in hwloc which causes undefined refs downstream + #even though hwloc is really "private" and doesn't need to be public I have to link publicly + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC Kokkos::libnuma) + #what I don't want is the headers to be propagated downstream + TARGET_INCLUDE_DIRECTORIES(${LIBRARY_NAME} PRIVATE ${LIBNUMA_INCLUDE_DIR}) + ENDIF() + + IF (KOKKOS_ENABLE_LIBRT) + #this is really annoying that I have to do this + #if CMake links statically, it will not link in librt which causes undefined refs downstream + #even though librt is really "private" and doesn't need to be public I have to link publicly + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE Kokkos::librt) + #what I don't want is the headers to be propagated downstream + TARGET_INCLUDE_DIRECTORIES(${LIBRARY_NAME} PRIVATE ${LIBRT_INCLUDE_DIR}) + ENDIF() + + IF (KOKKOS_ENABLE_MEMKIND) + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE Kokkos::memkind) + ENDIF() + + #dlfcn.h is in header files and needs to propagate + TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC Kokkos::libdl) +ENDFUNCTION() + FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) CMAKE_PARSE_ARGUMENTS(PARSE "STATIC;SHARED" @@ -241,6 +315,18 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) ${PARSE_HEADERS} ${PARSE_SOURCES} ) + KOKKOS_LINK_TPLS(${LIBRARY_NAME}) + IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.13") + #great, this works the "right" way + TARGET_LINK_OPTIONS( + ${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_OPTIONS} + ) + ELSE() + #well, have to do it the wrong way for now + TARGET_LINK_LIBRARIES( + ${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_OPTIONS} + ) + ENDIF() ELSE() ADD_LIBRARY( ${LIBRARY_NAME} @@ -248,6 +334,24 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) ${PARSE_HEADERS} ${PARSE_SOURCES} ) + IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.13") + #great, this works the "right" way + TARGET_LINK_OPTIONS( + ${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_OPTIONS} + ) + #I can go ahead and link the TPLs here + KOKKOS_LINK_TPLS(${LIBRARY_NAME}) + ELSEIF(${CMAKE_VERSION} VERSION_LESS "3.12" OR MSVC) + #nothing works yet for object libraries + #we will need to hack this later for libkokkos + #I also can't link the TPLs here - also must be delayed + ELSE() + TARGET_LINK_LIBRARIES( + ${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_OPTIONS} + ) + #I can go ahead and link the TPLs here + KOKKOS_LINK_TPLS(${LIBRARY_NAME}) + ENDIF() ENDIF() TARGET_COMPILE_OPTIONS( @@ -255,17 +359,6 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) PUBLIC $<$:${KOKKOS_COMPILE_OPTIONS}> ) - IF(${CMAKE_VERSION} VERSION_GREATER "3.13" OR ${CMAKE_VERSION} VERSION_EQUAL "3.13") - TARGET_LINK_OPTIONS( - ${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_OPTIONS} - ) - ELSE() - #Well, this is annoying - I am going to need to hack this for Visual Studio - TARGET_LINK_LIBRARIES( - ${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_OPTIONS} - ) - ENDIF() - IF (KOKKOS_ENABLE_CUDA) TARGET_COMPILE_OPTIONS( ${LIBRARY_NAME} @@ -306,46 +399,6 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) PUBLIC ${KOKKOS_TPL_INCLUDE_DIRS} ) - IF (KOKKOS_ENABLE_CUDA) - SET(LIB_cuda "-lcuda") - TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC cuda) - ENDIF() - - IF (KOKKOS_ENABLE_HPX) - TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC ${HPX_LIBRARIES}) - TARGET_INCLUDE_DIRECTORIES(${LIBRARY_NAME} PUBLIC ${HPX_INCLUDE_DIRS}) - ENDIF() - - IF (KOKKOS_ENABLE_HWLOC) - #this is really annoying that I have to do this - #if CMake links statically, it will not link in hwloc which causes undefined refs downstream - #even though hwloc is really "private" and doesn't need to be public I have to link publicly - TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC Kokkos::hwloc) - #what I don't want is the headers to be propagated downstream - TARGET_INCLUDE_DIRECTORIES(${LIBRARY_NAME} PRIVATE ${HWLOC_INCLUDE_DIR}) - ENDIF() - - IF (KOKKOS_ENABLE_LIBNUMA) - #this is really annoying that I have to do this - #if CMake links statically, it will not link in hwloc which causes undefined refs downstream - #even though hwloc is really "private" and doesn't need to be public I have to link publicly - TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC Kokkos::libnuma) - #what I don't want is the headers to be propagated downstream - TARGET_INCLUDE_DIRECTORIES(${LIBRARY_NAME} PRIVATE ${LIBNUMA_INCLUDE_DIR}) - ENDIF() - - IF (KOKKOS_ENABLE_LIBRT) - #this is really annoying that I have to do this - #if CMake links statically, it will not link in librt which causes undefined refs downstream - #even though librt is really "private" and doesn't need to be public I have to link publicly - TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE Kokkos::librt) - #what I don't want is the headers to be propagated downstream - TARGET_INCLUDE_DIRECTORIES(${LIBRARY_NAME} PRIVATE ${LIBRT_INCLUDE_DIR}) - ENDIF() - - IF (KOKKOS_ENABLE_MEMKIND) - TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PRIVATE Kokkos::memkind) - ENDIF() IF (KOKKOS_CXX_STANDARD_FEATURE) #GREAT! I can't do this the right way @@ -355,12 +408,15 @@ FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) TARGET_COMPILE_OPTIONS(${LIBRARY_NAME} PUBLIC ${KOKKOS_CXX_STANDARD_FLAG}) ENDIF() - #dlfcn.h is in header files and needs to propagate - TARGET_LINK_LIBRARIES(${LIBRARY_NAME} PUBLIC Kokkos::libdl) - #Even if separate libs and these are object libraries - #We still need to install them for transitive flags and deps - KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${LIBRARY_NAME}) + IF (KOKKOS_SEPARATE_LIBS OR ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12") + #Even if separate libs and these are object libraries + #We still need to install them for transitive flags and deps + KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${LIBRARY_NAME}) + ELSE() + #this is an object library and cmake <3.12 doesn't do this correctly + #so.... do nothing + ENDIF() INSTALL( FILES ${PARSE_HEADERS} From 9ac21d42d0d8ab13c25e45c771a9490b8214574e Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Tue, 9 Jul 2019 14:50:16 -0700 Subject: [PATCH 029/530] Adding clang format style file --- .clang-format | 6 ++++++ scripts/apply-clang-format | 1 + 2 files changed, 7 insertions(+) create mode 100644 .clang-format create mode 100644 scripts/apply-clang-format diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000000..db5f94fa2eb --- /dev/null +++ b/.clang-format @@ -0,0 +1,6 @@ +#Official Tool: clang-format version 8.0.0 +BasedOnStyle: google +SortIncludes: false +AlignConsecutiveAssignments: true +AllowShortCaseLabelsOnASingleLine: true +AllowShortIfStatementsOnASingleLine: true diff --git a/scripts/apply-clang-format b/scripts/apply-clang-format new file mode 100644 index 00000000000..1d1fb92564f --- /dev/null +++ b/scripts/apply-clang-format @@ -0,0 +1 @@ +find -name '*.cpp' -o -name '*.hpp' | xargs clang-format -i From ef05f00205343eb4b847654153bec8a125f101e2 Mon Sep 17 00:00:00 2001 From: Christian Trott Date: Wed, 17 Jul 2019 08:28:16 -0600 Subject: [PATCH 030/530] Apply clang-format --- algorithms/src/Kokkos_Random.hpp | 2672 ++++---- algorithms/src/Kokkos_Sort.hpp | 656 +- algorithms/unit_tests/TestCuda.cpp | 44 +- algorithms/unit_tests/TestHPX.cpp | 39 +- algorithms/unit_tests/TestOpenMP.cpp | 37 +- algorithms/unit_tests/TestROCm.cpp | 49 +- algorithms/unit_tests/TestRandom.hpp | 500 +- algorithms/unit_tests/TestSerial.cpp | 42 +- algorithms/unit_tests/TestSort.hpp | 287 +- algorithms/unit_tests/TestThreads.cpp | 40 +- algorithms/unit_tests/UnitTestMain.cpp | 5 +- benchmarks/atomic/main.cpp | 202 +- benchmarks/bytes_and_flops/bench.hpp | 61 +- benchmarks/bytes_and_flops/bench_stride.hpp | 110 +- .../bytes_and_flops/bench_unroll_stride.hpp | 170 +- benchmarks/bytes_and_flops/main.cpp | 43 +- benchmarks/gather/gather.hpp | 44 +- benchmarks/gather/gather_unroll.hpp | 191 +- benchmarks/gather/main.cpp | 43 +- benchmarks/policy_performance/main.cpp | 168 +- .../policy_performance/policy_perf_test.hpp | 559 +- containers/performance_tests/TestCuda.cpp | 37 +- .../performance_tests/TestDynRankView.hpp | 148 +- .../performance_tests/TestGlobal2LocalIds.hpp | 127 +- containers/performance_tests/TestHPX.cpp | 57 +- containers/performance_tests/TestMain.cpp | 3 +- containers/performance_tests/TestOpenMP.cpp | 68 +- containers/performance_tests/TestROCm.cpp | 27 +- .../performance_tests/TestScatterView.hpp | 101 +- containers/performance_tests/TestThreads.cpp | 44 +- .../TestUnorderedMapPerformance.hpp | 89 +- containers/src/Kokkos_Bitset.hpp | 337 +- containers/src/Kokkos_DualView.hpp | 819 +-- containers/src/Kokkos_DynRankView.hpp | 3165 +++++----- containers/src/Kokkos_DynamicView.hpp | 708 +-- containers/src/Kokkos_ErrorReporter.hpp | 100 +- containers/src/Kokkos_Functional.hpp | 74 +- containers/src/Kokkos_OffsetView.hpp | 3587 ++++++----- containers/src/Kokkos_ScatterView.hpp | 1466 +++-- containers/src/Kokkos_StaticCrsGraph.hpp | 419 +- containers/src/Kokkos_UnorderedMap.hpp | 608 +- containers/src/Kokkos_Vector.hpp | 219 +- containers/src/impl/Kokkos_Bitset_impl.hpp | 49 +- .../src/impl/Kokkos_Functional_impl.hpp | 102 +- .../impl/Kokkos_StaticCrsGraph_factory.hpp | 242 +- .../src/impl/Kokkos_UnorderedMap_impl.cpp | 94 +- .../src/impl/Kokkos_UnorderedMap_impl.hpp | 158 +- containers/unit_tests/TestBitset.hpp | 172 +- containers/unit_tests/TestDualView.hpp | 284 +- containers/unit_tests/TestDynViewAPI.hpp | 2414 +++---- .../unit_tests/TestDynViewAPI_generic.hpp | 9 +- .../unit_tests/TestDynViewAPI_rank12345.hpp | 9 +- .../unit_tests/TestDynViewAPI_rank67.hpp | 9 +- containers/unit_tests/TestDynamicView.hpp | 248 +- containers/unit_tests/TestErrorReporter.hpp | 135 +- containers/unit_tests/TestOffsetView.hpp | 704 ++- containers/unit_tests/TestScatterView.hpp | 653 +- containers/unit_tests/TestStaticCrsGraph.hpp | 295 +- containers/unit_tests/TestUnorderedMap.hpp | 265 +- containers/unit_tests/TestVector.hpp | 107 +- .../TestViewCtorPropEmbeddedDim.hpp | 162 +- containers/unit_tests/UnitTestMain.cpp | 5 +- .../unit_tests/cuda/TestCuda_BitSet.cpp | 5 +- .../unit_tests/cuda/TestCuda_Category.hpp | 10 +- .../unit_tests/cuda/TestCuda_DualView.cpp | 5 +- .../cuda/TestCuda_DynRankViewAPI_generic.cpp | 5 +- .../TestCuda_DynRankViewAPI_rank12345.cpp | 5 +- .../cuda/TestCuda_DynRankViewAPI_rank67.cpp | 5 +- .../unit_tests/cuda/TestCuda_DynamicView.cpp | 5 +- .../cuda/TestCuda_ErrorReporter.cpp | 5 +- .../unit_tests/cuda/TestCuda_OffsetView.cpp | 5 +- .../unit_tests/cuda/TestCuda_ScatterView.cpp | 5 +- .../cuda/TestCuda_StaticCrsGraph.cpp | 5 +- .../unit_tests/cuda/TestCuda_UnorderedMap.cpp | 5 +- .../unit_tests/cuda/TestCuda_Vector.cpp | 5 +- .../cuda/TestCuda_ViewCtorPropEmbeddedDim.cpp | 5 +- containers/unit_tests/hpx/TestHPX_BitSet.cpp | 5 +- .../unit_tests/hpx/TestHPX_Category.hpp | 10 +- .../unit_tests/hpx/TestHPX_DualView.cpp | 5 +- .../hpx/TestHPX_DynRankViewAPI_generic.cpp | 5 +- .../hpx/TestHPX_DynRankViewAPI_rank12345.cpp | 5 +- .../hpx/TestHPX_DynRankViewAPI_rank67.cpp | 5 +- .../unit_tests/hpx/TestHPX_DynamicView.cpp | 5 +- .../unit_tests/hpx/TestHPX_ErrorReporter.cpp | 5 +- .../unit_tests/hpx/TestHPX_OffsetView.cpp | 5 +- .../unit_tests/hpx/TestHPX_ScatterView.cpp | 5 +- .../unit_tests/hpx/TestHPX_StaticCrsGraph.cpp | 5 +- .../unit_tests/hpx/TestHPX_UnorderedMap.cpp | 5 +- containers/unit_tests/hpx/TestHPX_Vector.cpp | 5 +- .../hpx/TestHPX_ViewCtorPropEmbeddedDim.cpp | 5 +- .../unit_tests/openmp/TestOpenMP_BitSet.cpp | 5 +- .../unit_tests/openmp/TestOpenMP_Category.hpp | 10 +- .../unit_tests/openmp/TestOpenMP_DualView.cpp | 5 +- .../TestOpenMP_DynRankViewAPI_generic.cpp | 5 +- .../TestOpenMP_DynRankViewAPI_rank12345.cpp | 5 +- .../TestOpenMP_DynRankViewAPI_rank67.cpp | 5 +- .../openmp/TestOpenMP_DynamicView.cpp | 5 +- .../openmp/TestOpenMP_ErrorReporter.cpp | 5 +- .../openmp/TestOpenMP_OffsetView.cpp | 5 +- .../openmp/TestOpenMP_ScatterView.cpp | 5 +- .../openmp/TestOpenMP_StaticCrsGraph.cpp | 5 +- .../openmp/TestOpenMP_UnorderedMap.cpp | 5 +- .../unit_tests/openmp/TestOpenMP_Vector.cpp | 5 +- .../TestOpenMP_ViewCtorPropEmbeddedDim.cpp | 5 +- .../unit_tests/rocm/TestROCm_BitSet.cpp | 5 +- .../unit_tests/rocm/TestROCm_Category.hpp | 10 +- .../unit_tests/rocm/TestROCm_DualView.cpp | 5 +- .../rocm/TestROCm_DynRankViewAPI_generic.cpp | 5 +- .../TestROCm_DynRankViewAPI_rank12345.cpp | 5 +- .../rocm/TestROCm_DynRankViewAPI_rank67.cpp | 5 +- .../unit_tests/rocm/TestROCm_DynamicView.cpp | 5 +- .../rocm/TestROCm_ErrorReporter.cpp | 5 +- .../unit_tests/rocm/TestROCm_ScatterView.cpp | 5 +- .../rocm/TestROCm_StaticCrsGraph.cpp | 5 +- .../unit_tests/rocm/TestROCm_UnorderedMap.cpp | 5 +- .../unit_tests/rocm/TestROCm_Vector.cpp | 5 +- .../rocm/TestROCm_ViewCtorPropEmbeddedDim.cpp | 5 +- .../unit_tests/serial/TestSerial_BitSet.cpp | 5 +- .../unit_tests/serial/TestSerial_Category.hpp | 10 +- .../unit_tests/serial/TestSerial_DualView.cpp | 5 +- .../TestSerial_DynRankViewAPI_generic.cpp | 5 +- .../TestSerial_DynRankViewAPI_rank12345.cpp | 5 +- .../TestSerial_DynRankViewAPI_rank67.cpp | 5 +- .../serial/TestSerial_DynamicView.cpp | 5 +- .../serial/TestSerial_ErrorReporter.cpp | 5 +- .../serial/TestSerial_OffsetView.cpp | 5 +- .../serial/TestSerial_ScatterView.cpp | 5 +- .../serial/TestSerial_StaticCrsGraph.cpp | 5 +- .../serial/TestSerial_UnorderedMap.cpp | 5 +- .../unit_tests/serial/TestSerial_Vector.cpp | 5 +- .../TestSerial_ViewCtorPropEmbeddedDim.cpp | 5 +- .../unit_tests/threads/TestThreads_BitSet.cpp | 5 +- .../threads/TestThreads_Category.hpp | 10 +- .../threads/TestThreads_DualView.cpp | 5 +- .../TestThreads_DynRankViewAPI_generic.cpp | 5 +- .../TestThreads_DynRankViewAPI_rank12345.cpp | 5 +- .../TestThreads_DynRankViewAPI_rank67.cpp | 5 +- .../threads/TestThreads_DynamicView.cpp | 5 +- .../threads/TestThreads_ErrorReporter.cpp | 5 +- .../threads/TestThreads_OffsetView.cpp | 5 +- .../threads/TestThreads_ScatterView.cpp | 5 +- .../threads/TestThreads_StaticCrsGraph.cpp | 5 +- .../threads/TestThreads_UnorderedMap.cpp | 5 +- .../unit_tests/threads/TestThreads_Vector.cpp | 5 +- .../TestThreads_ViewCtorPropEmbeddedDim.cpp | 5 +- core/perf_test/PerfTestBlasKernels.hpp | 243 +- core/perf_test/PerfTestDriver.hpp | 534 +- core/perf_test/PerfTestGramSchmidt.cpp | 271 +- core/perf_test/PerfTestHexGrad.cpp | 426 +- core/perf_test/PerfTestMDRange.hpp | 751 ++- core/perf_test/PerfTestMain.cpp | 20 +- core/perf_test/PerfTest_Category.hpp | 10 +- core/perf_test/PerfTest_CustomReduction.cpp | 117 +- .../PerfTest_ExecSpacePartitioning.cpp | 1046 ++-- core/perf_test/PerfTest_ViewAllocate.cpp | 114 +- core/perf_test/PerfTest_ViewCopy.hpp | 257 +- core/perf_test/PerfTest_ViewCopy_a123.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_a45.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_a6.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_a7.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_a8.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_b123.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_b45.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_b6.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_b7.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_b8.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_c123.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_c45.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_c6.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_c7.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_c8.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_d123.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_d45.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_d6.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_d7.cpp | 8 +- core/perf_test/PerfTest_ViewCopy_d8.cpp | 8 +- core/perf_test/PerfTest_ViewFill.hpp | 221 +- core/perf_test/PerfTest_ViewFill_123.cpp | 8 +- core/perf_test/PerfTest_ViewFill_45.cpp | 8 +- core/perf_test/PerfTest_ViewFill_6.cpp | 8 +- core/perf_test/PerfTest_ViewFill_7.cpp | 8 +- core/perf_test/PerfTest_ViewFill_8.cpp | 8 +- core/perf_test/PerfTest_ViewResize.hpp | 274 +- core/perf_test/PerfTest_ViewResize_123.cpp | 8 +- core/perf_test/PerfTest_ViewResize_45.cpp | 8 +- core/perf_test/PerfTest_ViewResize_6.cpp | 8 +- core/perf_test/PerfTest_ViewResize_7.cpp | 8 +- core/perf_test/PerfTest_ViewResize_8.cpp | 8 +- core/perf_test/test_atomic.cpp | 449 +- core/perf_test/test_mempool.cpp | 349 +- core/perf_test/test_taskdag.cpp | 253 +- core/src/Cuda/KokkosExp_Cuda_IterateTile.hpp | 1746 +++--- .../KokkosExp_Cuda_IterateTile_Refactor.hpp | 3341 +++++----- core/src/Cuda/Kokkos_CudaSpace.cpp | 931 ++- core/src/Cuda/Kokkos_Cuda_Alloc.hpp | 108 +- .../Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp | 1281 ++-- ...uda_Atomic_Intrinsics_Restore_Builtins.hpp | 2 +- .../Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp | 693 +- core/src/Cuda/Kokkos_Cuda_Error.hpp | 27 +- core/src/Cuda/Kokkos_Cuda_Instance.cpp | 813 ++- core/src/Cuda/Kokkos_Cuda_Instance.hpp | 198 +- core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp | 587 +- core/src/Cuda/Kokkos_Cuda_Locks.cpp | 34 +- core/src/Cuda/Kokkos_Cuda_Locks.hpp | 73 +- core/src/Cuda/Kokkos_Cuda_Parallel.hpp | 3681 ++++++----- core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp | 1195 ++-- core/src/Cuda/Kokkos_Cuda_Task.cpp | 17 +- core/src/Cuda/Kokkos_Cuda_Task.hpp | 1104 ++-- core/src/Cuda/Kokkos_Cuda_Team.hpp | 1105 ++-- core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp | 112 +- core/src/Cuda/Kokkos_Cuda_Vectorization.hpp | 559 +- .../Kokkos_Cuda_Version_9_8_Compatibility.hpp | 123 +- core/src/Cuda/Kokkos_Cuda_View.hpp | 319 +- .../src/Cuda/Kokkos_Cuda_ViewCopyETIAvail.hpp | 7 +- core/src/Cuda/Kokkos_Cuda_ViewCopyETIDecl.hpp | 7 +- core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp | 102 +- core/src/Cuda/Kokkos_Cuda_abort.hpp | 29 +- core/src/HPX/Kokkos_HPX.cpp | 20 +- core/src/HPX/Kokkos_HPX_Task.cpp | 6 +- core/src/HPX/Kokkos_HPX_Task.hpp | 44 +- core/src/HPX/Kokkos_HPX_ViewCopyETIAvail.hpp | 7 +- core/src/HPX/Kokkos_HPX_ViewCopyETIDecl.hpp | 7 +- core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp | 10 +- core/src/KokkosExp_MDRangePolicy.hpp | 716 ++- core/src/Kokkos_AnonymousSpace.hpp | 63 +- core/src/Kokkos_Array.hpp | 413 +- core/src/Kokkos_Atomic.hpp | 107 +- core/src/Kokkos_Complex.hpp | 716 +-- core/src/Kokkos_Concepts.hpp | 409 +- core/src/Kokkos_CopyViews.hpp | 4164 +++++++------ core/src/Kokkos_Core.hpp | 138 +- core/src/Kokkos_Core_fwd.hpp | 319 +- core/src/Kokkos_Crs.hpp | 320 +- core/src/Kokkos_Cuda.hpp | 164 +- core/src/Kokkos_CudaSpace.hpp | 911 ++- core/src/Kokkos_ExecPolicy.hpp | 1069 ++-- core/src/Kokkos_Extents.hpp | 102 +- core/src/Kokkos_Future.hpp | 484 +- core/src/Kokkos_HBWSpace.hpp | 253 +- core/src/Kokkos_HPX.hpp | 455 +- core/src/Kokkos_HostSpace.hpp | 224 +- core/src/Kokkos_Layout.hpp | 314 +- core/src/Kokkos_Macros.hpp | 693 +- core/src/Kokkos_MasterLock.hpp | 9 +- core/src/Kokkos_MemoryPool.hpp | 1127 ++-- core/src/Kokkos_MemoryTraits.hpp | 88 +- core/src/Kokkos_NumericTraits.hpp | 413 +- core/src/Kokkos_OpenMP.hpp | 128 +- core/src/Kokkos_OpenMPTarget.hpp | 72 +- core/src/Kokkos_OpenMPTargetSpace.hpp | 196 +- core/src/Kokkos_Pair.hpp | 308 +- core/src/Kokkos_Parallel.hpp | 393 +- core/src/Kokkos_Parallel_Reduce.hpp | 1132 ++-- core/src/Kokkos_PointerOwnership.hpp | 5 +- core/src/Kokkos_Profiling_ProfileSection.hpp | 90 +- core/src/Kokkos_Qthreads.hpp | 66 +- core/src/Kokkos_ROCm.hpp | 158 +- core/src/Kokkos_ROCmSpace.hpp | 604 +- core/src/Kokkos_ScratchSpace.hpp | 175 +- core/src/Kokkos_Serial.hpp | 1474 +++-- core/src/Kokkos_TaskPolicy.hpp | 9 +- core/src/Kokkos_TaskScheduler.hpp | 737 +-- core/src/Kokkos_TaskScheduler_fwd.hpp | 148 +- core/src/Kokkos_Threads.hpp | 106 +- core/src/Kokkos_Timer.hpp | 30 +- core/src/Kokkos_UniqueToken.hpp | 33 +- core/src/Kokkos_Vectorization.hpp | 3 +- core/src/Kokkos_View.hpp | 3766 ++++++----- core/src/Kokkos_WorkGraphPolicy.hpp | 189 +- core/src/Kokkos_hwloc.hpp | 23 +- core/src/OpenMP/Kokkos_OpenMP_Exec.cpp | 382 +- core/src/OpenMP/Kokkos_OpenMP_Exec.hpp | 302 +- core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp | 1695 +++-- core/src/OpenMP/Kokkos_OpenMP_Task.cpp | 56 +- core/src/OpenMP/Kokkos_OpenMP_Task.hpp | 295 +- core/src/OpenMP/Kokkos_OpenMP_Team.hpp | 383 +- .../OpenMP/Kokkos_OpenMP_ViewCopyETIAvail.hpp | 7 +- .../OpenMP/Kokkos_OpenMP_ViewCopyETIDecl.hpp | 7 +- .../OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp | 69 +- .../OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp | 285 +- .../OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp | 238 +- .../OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp | 861 +-- .../Kokkos_OpenMPTarget_Parallel.hpp | 1139 ++-- .../OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp | 224 +- .../OpenMPTarget/Kokkos_OpenMPTarget_Task.hpp | 329 +- core/src/Qthreads/Kokkos_QthreadsExec.cpp | 443 +- core/src/Qthreads/Kokkos_QthreadsExec.hpp | 603 +- .../src/Qthreads/Kokkos_Qthreads_Parallel.hpp | 934 +-- core/src/Qthreads/Kokkos_Qthreads_Task.cpp | 213 +- core/src/Qthreads/Kokkos_Qthreads_Task.hpp | 111 +- .../Qthreads/Kokkos_Qthreads_TaskQueue.hpp | 279 +- .../Kokkos_Qthreads_TaskQueue_impl.hpp | 298 +- .../KokkosExp_ROCm_IterateTile_Refactor.hpp | 3245 +++++----- core/src/ROCm/Kokkos_ROCm_Atomic.hpp | 815 +-- core/src/ROCm/Kokkos_ROCm_Exec.cpp | 65 +- core/src/ROCm/Kokkos_ROCm_Exec.hpp | 201 +- core/src/ROCm/Kokkos_ROCm_Impl.cpp | 737 ++- core/src/ROCm/Kokkos_ROCm_Invoke.hpp | 111 +- core/src/ROCm/Kokkos_ROCm_Join.hpp | 21 +- core/src/ROCm/Kokkos_ROCm_Parallel.hpp | 2532 ++++---- core/src/ROCm/Kokkos_ROCm_Reduce.hpp | 151 +- core/src/ROCm/Kokkos_ROCm_ReduceScan.hpp | 490 +- core/src/ROCm/Kokkos_ROCm_Scan.hpp | 318 +- core/src/ROCm/Kokkos_ROCm_Space.cpp | 734 ++- core/src/ROCm/Kokkos_ROCm_Task.cpp | 107 +- core/src/ROCm/Kokkos_ROCm_Task.hpp | 478 +- core/src/ROCm/Kokkos_ROCm_Tile.hpp | 617 +- core/src/ROCm/Kokkos_ROCm_Vectorization.hpp | 501 +- .../src/ROCm/Kokkos_ROCm_ViewCopyETIAvail.hpp | 7 +- core/src/ROCm/Kokkos_ROCm_ViewCopyETIDecl.hpp | 7 +- core/src/ROCm/hc_math_std.hpp | 426 +- .../Serial/Kokkos_Serial_ViewCopyETIAvail.hpp | 7 +- .../Serial/Kokkos_Serial_ViewCopyETIDecl.hpp | 7 +- core/src/Threads/Kokkos_ThreadsExec.cpp | 815 ++- core/src/Threads/Kokkos_ThreadsExec.hpp | 780 +-- core/src/Threads/Kokkos_ThreadsExec_base.cpp | 143 +- core/src/Threads/Kokkos_ThreadsTeam.hpp | 1368 ++-- core/src/Threads/Kokkos_Threads_Parallel.hpp | 1401 ++--- .../Kokkos_Threads_ViewCopyETIAvail.hpp | 7 +- .../Kokkos_Threads_ViewCopyETIDecl.hpp | 7 +- .../Kokkos_Threads_WorkGraphPolicy.hpp | 92 +- ...TIInst_int64_t_double_LayoutLeft_Rank1.cpp | 22 +- ...TIInst_int64_t_double_LayoutLeft_Rank2.cpp | 22 +- ...TIInst_int64_t_double_LayoutLeft_Rank3.cpp | 23 +- ...TIInst_int64_t_double_LayoutLeft_Rank4.cpp | 24 +- ...TIInst_int64_t_double_LayoutLeft_Rank5.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank8.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank1.cpp | 22 +- ...IInst_int64_t_double_LayoutRight_Rank2.cpp | 23 +- ...IInst_int64_t_double_LayoutRight_Rank3.cpp | 24 +- ...IInst_int64_t_double_LayoutRight_Rank4.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank5.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank8.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank1.cpp | 23 +- ...Inst_int64_t_double_LayoutStride_Rank2.cpp | 24 +- ...Inst_int64_t_double_LayoutStride_Rank3.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank4.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank5.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank8.cpp | 25 +- ...ETIInst_int64_t_float_LayoutLeft_Rank1.cpp | 22 +- ...ETIInst_int64_t_float_LayoutLeft_Rank2.cpp | 22 +- ...ETIInst_int64_t_float_LayoutLeft_Rank3.cpp | 22 +- ...ETIInst_int64_t_float_LayoutLeft_Rank4.cpp | 23 +- ...ETIInst_int64_t_float_LayoutLeft_Rank5.cpp | 24 +- ...ETIInst_int64_t_float_LayoutLeft_Rank8.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank1.cpp | 22 +- ...TIInst_int64_t_float_LayoutRight_Rank2.cpp | 22 +- ...TIInst_int64_t_float_LayoutRight_Rank3.cpp | 23 +- ...TIInst_int64_t_float_LayoutRight_Rank4.cpp | 24 +- ...TIInst_int64_t_float_LayoutRight_Rank5.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank8.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank1.cpp | 22 +- ...IInst_int64_t_float_LayoutStride_Rank2.cpp | 23 +- ...IInst_int64_t_float_LayoutStride_Rank3.cpp | 24 +- ...IInst_int64_t_float_LayoutStride_Rank4.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank5.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank8.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank1.cpp | 22 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank2.cpp | 23 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank3.cpp | 24 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank4.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank5.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank8.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank1.cpp | 23 +- ...Inst_int64_t_int64_t_LayoutRight_Rank2.cpp | 24 +- ...Inst_int64_t_int64_t_LayoutRight_Rank3.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank4.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank5.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank8.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank1.cpp | 24 +- ...nst_int64_t_int64_t_LayoutStride_Rank2.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank3.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank4.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank5.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank8.cpp | 25 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank1.cpp | 22 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank2.cpp | 22 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank3.cpp | 22 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank4.cpp | 22 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank5.cpp | 22 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank8.cpp | 25 +- ...yETIInst_int64_t_int_LayoutRight_Rank1.cpp | 22 +- ...yETIInst_int64_t_int_LayoutRight_Rank2.cpp | 22 +- ...yETIInst_int64_t_int_LayoutRight_Rank3.cpp | 22 +- ...yETIInst_int64_t_int_LayoutRight_Rank4.cpp | 22 +- ...yETIInst_int64_t_int_LayoutRight_Rank5.cpp | 23 +- ...yETIInst_int64_t_int_LayoutRight_Rank8.cpp | 25 +- ...ETIInst_int64_t_int_LayoutStride_Rank1.cpp | 22 +- ...ETIInst_int64_t_int_LayoutStride_Rank2.cpp | 22 +- ...ETIInst_int64_t_int_LayoutStride_Rank3.cpp | 22 +- ...ETIInst_int64_t_int_LayoutStride_Rank4.cpp | 23 +- ...ETIInst_int64_t_int_LayoutStride_Rank5.cpp | 24 +- ...ETIInst_int64_t_int_LayoutStride_Rank8.cpp | 25 +- ...opyETIInst_int_double_LayoutLeft_Rank1.cpp | 22 +- ...opyETIInst_int_double_LayoutLeft_Rank2.cpp | 22 +- ...opyETIInst_int_double_LayoutLeft_Rank3.cpp | 22 +- ...opyETIInst_int_double_LayoutLeft_Rank4.cpp | 22 +- ...opyETIInst_int_double_LayoutLeft_Rank5.cpp | 22 +- ...opyETIInst_int_double_LayoutLeft_Rank8.cpp | 24 +- ...pyETIInst_int_double_LayoutRight_Rank1.cpp | 22 +- ...pyETIInst_int_double_LayoutRight_Rank2.cpp | 22 +- ...pyETIInst_int_double_LayoutRight_Rank3.cpp | 22 +- ...pyETIInst_int_double_LayoutRight_Rank4.cpp | 22 +- ...pyETIInst_int_double_LayoutRight_Rank5.cpp | 22 +- ...pyETIInst_int_double_LayoutRight_Rank8.cpp | 25 +- ...yETIInst_int_double_LayoutStride_Rank1.cpp | 22 +- ...yETIInst_int_double_LayoutStride_Rank2.cpp | 22 +- ...yETIInst_int_double_LayoutStride_Rank3.cpp | 22 +- ...yETIInst_int_double_LayoutStride_Rank4.cpp | 22 +- ...yETIInst_int_double_LayoutStride_Rank5.cpp | 23 +- ...yETIInst_int_double_LayoutStride_Rank8.cpp | 25 +- ...CopyETIInst_int_float_LayoutLeft_Rank1.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank2.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank3.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank4.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank5.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank8.cpp | 23 +- ...opyETIInst_int_float_LayoutRight_Rank1.cpp | 22 +- ...opyETIInst_int_float_LayoutRight_Rank2.cpp | 22 +- ...opyETIInst_int_float_LayoutRight_Rank3.cpp | 22 +- ...opyETIInst_int_float_LayoutRight_Rank4.cpp | 22 +- ...opyETIInst_int_float_LayoutRight_Rank5.cpp | 22 +- ...opyETIInst_int_float_LayoutRight_Rank8.cpp | 24 +- ...pyETIInst_int_float_LayoutStride_Rank1.cpp | 22 +- ...pyETIInst_int_float_LayoutStride_Rank2.cpp | 22 +- ...pyETIInst_int_float_LayoutStride_Rank3.cpp | 22 +- ...pyETIInst_int_float_LayoutStride_Rank4.cpp | 22 +- ...pyETIInst_int_float_LayoutStride_Rank5.cpp | 22 +- ...pyETIInst_int_float_LayoutStride_Rank8.cpp | 25 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank1.cpp | 22 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank2.cpp | 22 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank3.cpp | 22 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank4.cpp | 22 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank5.cpp | 22 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank8.cpp | 25 +- ...yETIInst_int_int64_t_LayoutRight_Rank1.cpp | 22 +- ...yETIInst_int_int64_t_LayoutRight_Rank2.cpp | 22 +- ...yETIInst_int_int64_t_LayoutRight_Rank3.cpp | 22 +- ...yETIInst_int_int64_t_LayoutRight_Rank4.cpp | 22 +- ...yETIInst_int_int64_t_LayoutRight_Rank5.cpp | 23 +- ...yETIInst_int_int64_t_LayoutRight_Rank8.cpp | 25 +- ...ETIInst_int_int64_t_LayoutStride_Rank1.cpp | 22 +- ...ETIInst_int_int64_t_LayoutStride_Rank2.cpp | 22 +- ...ETIInst_int_int64_t_LayoutStride_Rank3.cpp | 22 +- ...ETIInst_int_int64_t_LayoutStride_Rank4.cpp | 23 +- ...ETIInst_int_int64_t_LayoutStride_Rank5.cpp | 24 +- ...ETIInst_int_int64_t_LayoutStride_Rank8.cpp | 25 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank1.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank2.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank3.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank4.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank5.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank8.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank1.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank2.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank3.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank4.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank5.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank8.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank1.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank2.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank3.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank4.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank5.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank8.cpp | 23 +- ...TIInst_int64_t_double_LayoutLeft_Rank1.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank2.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank3.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank4.cpp | 26 +- ...TIInst_int64_t_double_LayoutLeft_Rank5.cpp | 26 +- ...TIInst_int64_t_double_LayoutLeft_Rank8.cpp | 26 +- ...IInst_int64_t_double_LayoutRight_Rank1.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank2.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank3.cpp | 26 +- ...IInst_int64_t_double_LayoutRight_Rank4.cpp | 26 +- ...IInst_int64_t_double_LayoutRight_Rank5.cpp | 26 +- ...IInst_int64_t_double_LayoutRight_Rank8.cpp | 26 +- ...Inst_int64_t_double_LayoutStride_Rank1.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank2.cpp | 26 +- ...Inst_int64_t_double_LayoutStride_Rank3.cpp | 26 +- ...Inst_int64_t_double_LayoutStride_Rank4.cpp | 26 +- ...Inst_int64_t_double_LayoutStride_Rank5.cpp | 26 +- ...Inst_int64_t_double_LayoutStride_Rank8.cpp | 26 +- ...ETIInst_int64_t_float_LayoutLeft_Rank1.cpp | 25 +- ...ETIInst_int64_t_float_LayoutLeft_Rank2.cpp | 25 +- ...ETIInst_int64_t_float_LayoutLeft_Rank3.cpp | 25 +- ...ETIInst_int64_t_float_LayoutLeft_Rank4.cpp | 25 +- ...ETIInst_int64_t_float_LayoutLeft_Rank5.cpp | 26 +- ...ETIInst_int64_t_float_LayoutLeft_Rank8.cpp | 26 +- ...TIInst_int64_t_float_LayoutRight_Rank1.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank2.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank3.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank4.cpp | 26 +- ...TIInst_int64_t_float_LayoutRight_Rank5.cpp | 26 +- ...TIInst_int64_t_float_LayoutRight_Rank8.cpp | 26 +- ...IInst_int64_t_float_LayoutStride_Rank1.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank2.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank3.cpp | 26 +- ...IInst_int64_t_float_LayoutStride_Rank4.cpp | 26 +- ...IInst_int64_t_float_LayoutStride_Rank5.cpp | 26 +- ...IInst_int64_t_float_LayoutStride_Rank8.cpp | 26 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank1.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank2.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank3.cpp | 26 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank4.cpp | 26 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank5.cpp | 26 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank8.cpp | 26 +- ...Inst_int64_t_int64_t_LayoutRight_Rank1.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank2.cpp | 26 +- ...Inst_int64_t_int64_t_LayoutRight_Rank3.cpp | 26 +- ...Inst_int64_t_int64_t_LayoutRight_Rank4.cpp | 26 +- ...Inst_int64_t_int64_t_LayoutRight_Rank5.cpp | 26 +- ...Inst_int64_t_int64_t_LayoutRight_Rank8.cpp | 26 +- ...nst_int64_t_int64_t_LayoutStride_Rank1.cpp | 26 +- ...nst_int64_t_int64_t_LayoutStride_Rank2.cpp | 26 +- ...nst_int64_t_int64_t_LayoutStride_Rank3.cpp | 26 +- ...nst_int64_t_int64_t_LayoutStride_Rank4.cpp | 26 +- ...nst_int64_t_int64_t_LayoutStride_Rank5.cpp | 26 +- ...nst_int64_t_int64_t_LayoutStride_Rank8.cpp | 26 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank1.cpp | 25 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank2.cpp | 25 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank3.cpp | 25 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank4.cpp | 25 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank5.cpp | 25 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank8.cpp | 26 +- ...yETIInst_int64_t_int_LayoutRight_Rank1.cpp | 25 +- ...yETIInst_int64_t_int_LayoutRight_Rank2.cpp | 25 +- ...yETIInst_int64_t_int_LayoutRight_Rank3.cpp | 25 +- ...yETIInst_int64_t_int_LayoutRight_Rank4.cpp | 25 +- ...yETIInst_int64_t_int_LayoutRight_Rank5.cpp | 25 +- ...yETIInst_int64_t_int_LayoutRight_Rank8.cpp | 26 +- ...ETIInst_int64_t_int_LayoutStride_Rank1.cpp | 25 +- ...ETIInst_int64_t_int_LayoutStride_Rank2.cpp | 25 +- ...ETIInst_int64_t_int_LayoutStride_Rank3.cpp | 25 +- ...ETIInst_int64_t_int_LayoutStride_Rank4.cpp | 25 +- ...ETIInst_int64_t_int_LayoutStride_Rank5.cpp | 26 +- ...ETIInst_int64_t_int_LayoutStride_Rank8.cpp | 26 +- ...opyETIInst_int_double_LayoutLeft_Rank1.cpp | 25 +- ...opyETIInst_int_double_LayoutLeft_Rank2.cpp | 25 +- ...opyETIInst_int_double_LayoutLeft_Rank3.cpp | 25 +- ...opyETIInst_int_double_LayoutLeft_Rank4.cpp | 25 +- ...opyETIInst_int_double_LayoutLeft_Rank5.cpp | 25 +- ...opyETIInst_int_double_LayoutLeft_Rank8.cpp | 26 +- ...pyETIInst_int_double_LayoutRight_Rank1.cpp | 25 +- ...pyETIInst_int_double_LayoutRight_Rank2.cpp | 25 +- ...pyETIInst_int_double_LayoutRight_Rank3.cpp | 25 +- ...pyETIInst_int_double_LayoutRight_Rank4.cpp | 25 +- ...pyETIInst_int_double_LayoutRight_Rank5.cpp | 25 +- ...pyETIInst_int_double_LayoutRight_Rank8.cpp | 26 +- ...yETIInst_int_double_LayoutStride_Rank1.cpp | 25 +- ...yETIInst_int_double_LayoutStride_Rank2.cpp | 25 +- ...yETIInst_int_double_LayoutStride_Rank3.cpp | 25 +- ...yETIInst_int_double_LayoutStride_Rank4.cpp | 25 +- ...yETIInst_int_double_LayoutStride_Rank5.cpp | 25 +- ...yETIInst_int_double_LayoutStride_Rank8.cpp | 26 +- ...CopyETIInst_int_float_LayoutLeft_Rank1.cpp | 25 +- ...CopyETIInst_int_float_LayoutLeft_Rank2.cpp | 25 +- ...CopyETIInst_int_float_LayoutLeft_Rank3.cpp | 25 +- ...CopyETIInst_int_float_LayoutLeft_Rank4.cpp | 25 +- ...CopyETIInst_int_float_LayoutLeft_Rank5.cpp | 25 +- ...CopyETIInst_int_float_LayoutLeft_Rank8.cpp | 25 +- ...opyETIInst_int_float_LayoutRight_Rank1.cpp | 25 +- ...opyETIInst_int_float_LayoutRight_Rank2.cpp | 25 +- ...opyETIInst_int_float_LayoutRight_Rank3.cpp | 25 +- ...opyETIInst_int_float_LayoutRight_Rank4.cpp | 25 +- ...opyETIInst_int_float_LayoutRight_Rank5.cpp | 25 +- ...opyETIInst_int_float_LayoutRight_Rank8.cpp | 26 +- ...pyETIInst_int_float_LayoutStride_Rank1.cpp | 25 +- ...pyETIInst_int_float_LayoutStride_Rank2.cpp | 25 +- ...pyETIInst_int_float_LayoutStride_Rank3.cpp | 25 +- ...pyETIInst_int_float_LayoutStride_Rank4.cpp | 25 +- ...pyETIInst_int_float_LayoutStride_Rank5.cpp | 25 +- ...pyETIInst_int_float_LayoutStride_Rank8.cpp | 26 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank1.cpp | 25 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank2.cpp | 25 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank3.cpp | 25 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank4.cpp | 25 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank5.cpp | 25 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank8.cpp | 26 +- ...yETIInst_int_int64_t_LayoutRight_Rank1.cpp | 25 +- ...yETIInst_int_int64_t_LayoutRight_Rank2.cpp | 25 +- ...yETIInst_int_int64_t_LayoutRight_Rank3.cpp | 25 +- ...yETIInst_int_int64_t_LayoutRight_Rank4.cpp | 25 +- ...yETIInst_int_int64_t_LayoutRight_Rank5.cpp | 25 +- ...yETIInst_int_int64_t_LayoutRight_Rank8.cpp | 26 +- ...ETIInst_int_int64_t_LayoutStride_Rank1.cpp | 25 +- ...ETIInst_int_int64_t_LayoutStride_Rank2.cpp | 25 +- ...ETIInst_int_int64_t_LayoutStride_Rank3.cpp | 25 +- ...ETIInst_int_int64_t_LayoutStride_Rank4.cpp | 25 +- ...ETIInst_int_int64_t_LayoutStride_Rank5.cpp | 26 +- ...ETIInst_int_int64_t_LayoutStride_Rank8.cpp | 26 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank1.cpp | 25 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank2.cpp | 25 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank3.cpp | 25 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank4.cpp | 25 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank5.cpp | 25 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank8.cpp | 25 +- ...wCopyETIInst_int_int_LayoutRight_Rank1.cpp | 25 +- ...wCopyETIInst_int_int_LayoutRight_Rank2.cpp | 25 +- ...wCopyETIInst_int_int_LayoutRight_Rank3.cpp | 25 +- ...wCopyETIInst_int_int_LayoutRight_Rank4.cpp | 25 +- ...wCopyETIInst_int_int_LayoutRight_Rank5.cpp | 25 +- ...wCopyETIInst_int_int_LayoutRight_Rank8.cpp | 25 +- ...CopyETIInst_int_int_LayoutStride_Rank1.cpp | 25 +- ...CopyETIInst_int_int_LayoutStride_Rank2.cpp | 25 +- ...CopyETIInst_int_int_LayoutStride_Rank3.cpp | 25 +- ...CopyETIInst_int_int_LayoutStride_Rank4.cpp | 25 +- ...CopyETIInst_int_int_LayoutStride_Rank5.cpp | 25 +- ...CopyETIInst_int_int_LayoutStride_Rank8.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank1.cpp | 23 +- ...TIInst_int64_t_double_LayoutLeft_Rank2.cpp | 24 +- ...TIInst_int64_t_double_LayoutLeft_Rank3.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank4.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank5.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank8.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank1.cpp | 24 +- ...IInst_int64_t_double_LayoutRight_Rank2.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank3.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank4.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank5.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank8.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank1.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank2.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank3.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank4.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank5.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank8.cpp | 25 +- ...ETIInst_int64_t_float_LayoutLeft_Rank1.cpp | 22 +- ...ETIInst_int64_t_float_LayoutLeft_Rank2.cpp | 23 +- ...ETIInst_int64_t_float_LayoutLeft_Rank3.cpp | 24 +- ...ETIInst_int64_t_float_LayoutLeft_Rank4.cpp | 25 +- ...ETIInst_int64_t_float_LayoutLeft_Rank5.cpp | 25 +- ...ETIInst_int64_t_float_LayoutLeft_Rank8.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank1.cpp | 23 +- ...TIInst_int64_t_float_LayoutRight_Rank2.cpp | 24 +- ...TIInst_int64_t_float_LayoutRight_Rank3.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank4.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank5.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank8.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank1.cpp | 24 +- ...IInst_int64_t_float_LayoutStride_Rank2.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank3.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank4.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank5.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank8.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank1.cpp | 24 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank2.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank3.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank4.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank5.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank8.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank1.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank2.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank3.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank4.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank5.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank8.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank1.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank2.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank3.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank4.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank5.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank8.cpp | 25 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank1.cpp | 22 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank2.cpp | 22 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank3.cpp | 22 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank4.cpp | 23 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank5.cpp | 24 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank8.cpp | 25 +- ...yETIInst_int64_t_int_LayoutRight_Rank1.cpp | 22 +- ...yETIInst_int64_t_int_LayoutRight_Rank2.cpp | 22 +- ...yETIInst_int64_t_int_LayoutRight_Rank3.cpp | 23 +- ...yETIInst_int64_t_int_LayoutRight_Rank4.cpp | 24 +- ...yETIInst_int64_t_int_LayoutRight_Rank5.cpp | 25 +- ...yETIInst_int64_t_int_LayoutRight_Rank8.cpp | 25 +- ...ETIInst_int64_t_int_LayoutStride_Rank1.cpp | 22 +- ...ETIInst_int64_t_int_LayoutStride_Rank2.cpp | 23 +- ...ETIInst_int64_t_int_LayoutStride_Rank3.cpp | 24 +- ...ETIInst_int64_t_int_LayoutStride_Rank4.cpp | 25 +- ...ETIInst_int64_t_int_LayoutStride_Rank5.cpp | 25 +- ...ETIInst_int64_t_int_LayoutStride_Rank8.cpp | 25 +- ...opyETIInst_int_double_LayoutLeft_Rank1.cpp | 22 +- ...opyETIInst_int_double_LayoutLeft_Rank2.cpp | 22 +- ...opyETIInst_int_double_LayoutLeft_Rank3.cpp | 22 +- ...opyETIInst_int_double_LayoutLeft_Rank4.cpp | 22 +- ...opyETIInst_int_double_LayoutLeft_Rank5.cpp | 23 +- ...opyETIInst_int_double_LayoutLeft_Rank8.cpp | 25 +- ...pyETIInst_int_double_LayoutRight_Rank1.cpp | 22 +- ...pyETIInst_int_double_LayoutRight_Rank2.cpp | 22 +- ...pyETIInst_int_double_LayoutRight_Rank3.cpp | 22 +- ...pyETIInst_int_double_LayoutRight_Rank4.cpp | 23 +- ...pyETIInst_int_double_LayoutRight_Rank5.cpp | 24 +- ...pyETIInst_int_double_LayoutRight_Rank8.cpp | 25 +- ...yETIInst_int_double_LayoutStride_Rank1.cpp | 22 +- ...yETIInst_int_double_LayoutStride_Rank2.cpp | 22 +- ...yETIInst_int_double_LayoutStride_Rank3.cpp | 23 +- ...yETIInst_int_double_LayoutStride_Rank4.cpp | 24 +- ...yETIInst_int_double_LayoutStride_Rank5.cpp | 25 +- ...yETIInst_int_double_LayoutStride_Rank8.cpp | 25 +- ...CopyETIInst_int_float_LayoutLeft_Rank1.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank2.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank3.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank4.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank5.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank8.cpp | 25 +- ...opyETIInst_int_float_LayoutRight_Rank1.cpp | 22 +- ...opyETIInst_int_float_LayoutRight_Rank2.cpp | 22 +- ...opyETIInst_int_float_LayoutRight_Rank3.cpp | 22 +- ...opyETIInst_int_float_LayoutRight_Rank4.cpp | 22 +- ...opyETIInst_int_float_LayoutRight_Rank5.cpp | 23 +- ...opyETIInst_int_float_LayoutRight_Rank8.cpp | 25 +- ...pyETIInst_int_float_LayoutStride_Rank1.cpp | 22 +- ...pyETIInst_int_float_LayoutStride_Rank2.cpp | 22 +- ...pyETIInst_int_float_LayoutStride_Rank3.cpp | 22 +- ...pyETIInst_int_float_LayoutStride_Rank4.cpp | 23 +- ...pyETIInst_int_float_LayoutStride_Rank5.cpp | 24 +- ...pyETIInst_int_float_LayoutStride_Rank8.cpp | 25 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank1.cpp | 22 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank2.cpp | 22 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank3.cpp | 22 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank4.cpp | 23 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank5.cpp | 24 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank8.cpp | 25 +- ...yETIInst_int_int64_t_LayoutRight_Rank1.cpp | 22 +- ...yETIInst_int_int64_t_LayoutRight_Rank2.cpp | 22 +- ...yETIInst_int_int64_t_LayoutRight_Rank3.cpp | 23 +- ...yETIInst_int_int64_t_LayoutRight_Rank4.cpp | 24 +- ...yETIInst_int_int64_t_LayoutRight_Rank5.cpp | 25 +- ...yETIInst_int_int64_t_LayoutRight_Rank8.cpp | 25 +- ...ETIInst_int_int64_t_LayoutStride_Rank1.cpp | 22 +- ...ETIInst_int_int64_t_LayoutStride_Rank2.cpp | 23 +- ...ETIInst_int_int64_t_LayoutStride_Rank3.cpp | 24 +- ...ETIInst_int_int64_t_LayoutStride_Rank4.cpp | 25 +- ...ETIInst_int_int64_t_LayoutStride_Rank5.cpp | 25 +- ...ETIInst_int_int64_t_LayoutStride_Rank8.cpp | 25 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank1.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank2.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank3.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank4.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank5.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank8.cpp | 23 +- ...wCopyETIInst_int_int_LayoutRight_Rank1.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank2.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank3.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank4.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank5.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank8.cpp | 24 +- ...CopyETIInst_int_int_LayoutStride_Rank1.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank2.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank3.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank4.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank5.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank8.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank1.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank2.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank3.cpp | 26 +- ...TIInst_int64_t_double_LayoutLeft_Rank4.cpp | 26 +- ...TIInst_int64_t_double_LayoutLeft_Rank5.cpp | 26 +- ...TIInst_int64_t_double_LayoutLeft_Rank8.cpp | 26 +- ...IInst_int64_t_double_LayoutRight_Rank1.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank2.cpp | 26 +- ...IInst_int64_t_double_LayoutRight_Rank3.cpp | 26 +- ...IInst_int64_t_double_LayoutRight_Rank4.cpp | 26 +- ...IInst_int64_t_double_LayoutRight_Rank5.cpp | 26 +- ...IInst_int64_t_double_LayoutRight_Rank8.cpp | 26 +- ...Inst_int64_t_double_LayoutStride_Rank1.cpp | 26 +- ...Inst_int64_t_double_LayoutStride_Rank2.cpp | 26 +- ...Inst_int64_t_double_LayoutStride_Rank3.cpp | 26 +- ...Inst_int64_t_double_LayoutStride_Rank4.cpp | 26 +- ...Inst_int64_t_double_LayoutStride_Rank5.cpp | 26 +- ...Inst_int64_t_double_LayoutStride_Rank8.cpp | 26 +- ...ETIInst_int64_t_float_LayoutLeft_Rank1.cpp | 25 +- ...ETIInst_int64_t_float_LayoutLeft_Rank2.cpp | 25 +- ...ETIInst_int64_t_float_LayoutLeft_Rank3.cpp | 25 +- ...ETIInst_int64_t_float_LayoutLeft_Rank4.cpp | 26 +- ...ETIInst_int64_t_float_LayoutLeft_Rank5.cpp | 26 +- ...ETIInst_int64_t_float_LayoutLeft_Rank8.cpp | 26 +- ...TIInst_int64_t_float_LayoutRight_Rank1.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank2.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank3.cpp | 26 +- ...TIInst_int64_t_float_LayoutRight_Rank4.cpp | 26 +- ...TIInst_int64_t_float_LayoutRight_Rank5.cpp | 26 +- ...TIInst_int64_t_float_LayoutRight_Rank8.cpp | 26 +- ...IInst_int64_t_float_LayoutStride_Rank1.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank2.cpp | 26 +- ...IInst_int64_t_float_LayoutStride_Rank3.cpp | 26 +- ...IInst_int64_t_float_LayoutStride_Rank4.cpp | 26 +- ...IInst_int64_t_float_LayoutStride_Rank5.cpp | 26 +- ...IInst_int64_t_float_LayoutStride_Rank8.cpp | 26 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank1.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank2.cpp | 26 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank3.cpp | 26 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank4.cpp | 26 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank5.cpp | 26 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank8.cpp | 26 +- ...Inst_int64_t_int64_t_LayoutRight_Rank1.cpp | 26 +- ...Inst_int64_t_int64_t_LayoutRight_Rank2.cpp | 26 +- ...Inst_int64_t_int64_t_LayoutRight_Rank3.cpp | 26 +- ...Inst_int64_t_int64_t_LayoutRight_Rank4.cpp | 26 +- ...Inst_int64_t_int64_t_LayoutRight_Rank5.cpp | 26 +- ...Inst_int64_t_int64_t_LayoutRight_Rank8.cpp | 26 +- ...nst_int64_t_int64_t_LayoutStride_Rank1.cpp | 26 +- ...nst_int64_t_int64_t_LayoutStride_Rank2.cpp | 26 +- ...nst_int64_t_int64_t_LayoutStride_Rank3.cpp | 26 +- ...nst_int64_t_int64_t_LayoutStride_Rank4.cpp | 26 +- ...nst_int64_t_int64_t_LayoutStride_Rank5.cpp | 26 +- ...nst_int64_t_int64_t_LayoutStride_Rank8.cpp | 26 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank1.cpp | 25 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank2.cpp | 25 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank3.cpp | 25 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank4.cpp | 25 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank5.cpp | 25 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank8.cpp | 26 +- ...yETIInst_int64_t_int_LayoutRight_Rank1.cpp | 25 +- ...yETIInst_int64_t_int_LayoutRight_Rank2.cpp | 25 +- ...yETIInst_int64_t_int_LayoutRight_Rank3.cpp | 25 +- ...yETIInst_int64_t_int_LayoutRight_Rank4.cpp | 25 +- ...yETIInst_int64_t_int_LayoutRight_Rank5.cpp | 26 +- ...yETIInst_int64_t_int_LayoutRight_Rank8.cpp | 26 +- ...ETIInst_int64_t_int_LayoutStride_Rank1.cpp | 25 +- ...ETIInst_int64_t_int_LayoutStride_Rank2.cpp | 25 +- ...ETIInst_int64_t_int_LayoutStride_Rank3.cpp | 25 +- ...ETIInst_int64_t_int_LayoutStride_Rank4.cpp | 26 +- ...ETIInst_int64_t_int_LayoutStride_Rank5.cpp | 26 +- ...ETIInst_int64_t_int_LayoutStride_Rank8.cpp | 26 +- ...opyETIInst_int_double_LayoutLeft_Rank1.cpp | 25 +- ...opyETIInst_int_double_LayoutLeft_Rank2.cpp | 25 +- ...opyETIInst_int_double_LayoutLeft_Rank3.cpp | 25 +- ...opyETIInst_int_double_LayoutLeft_Rank4.cpp | 25 +- ...opyETIInst_int_double_LayoutLeft_Rank5.cpp | 25 +- ...opyETIInst_int_double_LayoutLeft_Rank8.cpp | 26 +- ...pyETIInst_int_double_LayoutRight_Rank1.cpp | 25 +- ...pyETIInst_int_double_LayoutRight_Rank2.cpp | 25 +- ...pyETIInst_int_double_LayoutRight_Rank3.cpp | 25 +- ...pyETIInst_int_double_LayoutRight_Rank4.cpp | 25 +- ...pyETIInst_int_double_LayoutRight_Rank5.cpp | 25 +- ...pyETIInst_int_double_LayoutRight_Rank8.cpp | 26 +- ...yETIInst_int_double_LayoutStride_Rank1.cpp | 25 +- ...yETIInst_int_double_LayoutStride_Rank2.cpp | 25 +- ...yETIInst_int_double_LayoutStride_Rank3.cpp | 25 +- ...yETIInst_int_double_LayoutStride_Rank4.cpp | 25 +- ...yETIInst_int_double_LayoutStride_Rank5.cpp | 26 +- ...yETIInst_int_double_LayoutStride_Rank8.cpp | 26 +- ...CopyETIInst_int_float_LayoutLeft_Rank1.cpp | 25 +- ...CopyETIInst_int_float_LayoutLeft_Rank2.cpp | 25 +- ...CopyETIInst_int_float_LayoutLeft_Rank3.cpp | 25 +- ...CopyETIInst_int_float_LayoutLeft_Rank4.cpp | 25 +- ...CopyETIInst_int_float_LayoutLeft_Rank5.cpp | 25 +- ...CopyETIInst_int_float_LayoutLeft_Rank8.cpp | 26 +- ...opyETIInst_int_float_LayoutRight_Rank1.cpp | 25 +- ...opyETIInst_int_float_LayoutRight_Rank2.cpp | 25 +- ...opyETIInst_int_float_LayoutRight_Rank3.cpp | 25 +- ...opyETIInst_int_float_LayoutRight_Rank4.cpp | 25 +- ...opyETIInst_int_float_LayoutRight_Rank5.cpp | 25 +- ...opyETIInst_int_float_LayoutRight_Rank8.cpp | 26 +- ...pyETIInst_int_float_LayoutStride_Rank1.cpp | 25 +- ...pyETIInst_int_float_LayoutStride_Rank2.cpp | 25 +- ...pyETIInst_int_float_LayoutStride_Rank3.cpp | 25 +- ...pyETIInst_int_float_LayoutStride_Rank4.cpp | 25 +- ...pyETIInst_int_float_LayoutStride_Rank5.cpp | 25 +- ...pyETIInst_int_float_LayoutStride_Rank8.cpp | 26 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank1.cpp | 25 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank2.cpp | 25 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank3.cpp | 25 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank4.cpp | 25 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank5.cpp | 25 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank8.cpp | 26 +- ...yETIInst_int_int64_t_LayoutRight_Rank1.cpp | 25 +- ...yETIInst_int_int64_t_LayoutRight_Rank2.cpp | 25 +- ...yETIInst_int_int64_t_LayoutRight_Rank3.cpp | 25 +- ...yETIInst_int_int64_t_LayoutRight_Rank4.cpp | 25 +- ...yETIInst_int_int64_t_LayoutRight_Rank5.cpp | 26 +- ...yETIInst_int_int64_t_LayoutRight_Rank8.cpp | 26 +- ...ETIInst_int_int64_t_LayoutStride_Rank1.cpp | 25 +- ...ETIInst_int_int64_t_LayoutStride_Rank2.cpp | 25 +- ...ETIInst_int_int64_t_LayoutStride_Rank3.cpp | 25 +- ...ETIInst_int_int64_t_LayoutStride_Rank4.cpp | 26 +- ...ETIInst_int_int64_t_LayoutStride_Rank5.cpp | 26 +- ...ETIInst_int_int64_t_LayoutStride_Rank8.cpp | 26 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank1.cpp | 25 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank2.cpp | 25 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank3.cpp | 25 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank4.cpp | 25 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank5.cpp | 25 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank8.cpp | 25 +- ...wCopyETIInst_int_int_LayoutRight_Rank1.cpp | 25 +- ...wCopyETIInst_int_int_LayoutRight_Rank2.cpp | 25 +- ...wCopyETIInst_int_int_LayoutRight_Rank3.cpp | 25 +- ...wCopyETIInst_int_int_LayoutRight_Rank4.cpp | 25 +- ...wCopyETIInst_int_int_LayoutRight_Rank5.cpp | 25 +- ...wCopyETIInst_int_int_LayoutRight_Rank8.cpp | 25 +- ...CopyETIInst_int_int_LayoutStride_Rank1.cpp | 25 +- ...CopyETIInst_int_int_LayoutStride_Rank2.cpp | 25 +- ...CopyETIInst_int_int_LayoutStride_Rank3.cpp | 25 +- ...CopyETIInst_int_int_LayoutStride_Rank4.cpp | 25 +- ...CopyETIInst_int_int_LayoutStride_Rank5.cpp | 25 +- ...CopyETIInst_int_int_LayoutStride_Rank8.cpp | 26 +- ...TIInst_int64_t_double_LayoutLeft_Rank1.cpp | 23 +- ...TIInst_int64_t_double_LayoutLeft_Rank2.cpp | 24 +- ...TIInst_int64_t_double_LayoutLeft_Rank3.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank4.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank5.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank8.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank1.cpp | 24 +- ...IInst_int64_t_double_LayoutRight_Rank2.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank3.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank4.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank5.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank8.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank1.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank2.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank3.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank4.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank5.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank8.cpp | 25 +- ...ETIInst_int64_t_float_LayoutLeft_Rank1.cpp | 22 +- ...ETIInst_int64_t_float_LayoutLeft_Rank2.cpp | 23 +- ...ETIInst_int64_t_float_LayoutLeft_Rank3.cpp | 24 +- ...ETIInst_int64_t_float_LayoutLeft_Rank4.cpp | 25 +- ...ETIInst_int64_t_float_LayoutLeft_Rank5.cpp | 25 +- ...ETIInst_int64_t_float_LayoutLeft_Rank8.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank1.cpp | 23 +- ...TIInst_int64_t_float_LayoutRight_Rank2.cpp | 24 +- ...TIInst_int64_t_float_LayoutRight_Rank3.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank4.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank5.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank8.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank1.cpp | 24 +- ...IInst_int64_t_float_LayoutStride_Rank2.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank3.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank4.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank5.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank8.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank1.cpp | 24 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank2.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank3.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank4.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank5.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank8.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank1.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank2.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank3.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank4.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank5.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank8.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank1.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank2.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank3.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank4.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank5.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank8.cpp | 25 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank1.cpp | 22 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank2.cpp | 22 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank3.cpp | 22 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank4.cpp | 23 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank5.cpp | 24 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank8.cpp | 25 +- ...yETIInst_int64_t_int_LayoutRight_Rank1.cpp | 22 +- ...yETIInst_int64_t_int_LayoutRight_Rank2.cpp | 22 +- ...yETIInst_int64_t_int_LayoutRight_Rank3.cpp | 23 +- ...yETIInst_int64_t_int_LayoutRight_Rank4.cpp | 24 +- ...yETIInst_int64_t_int_LayoutRight_Rank5.cpp | 25 +- ...yETIInst_int64_t_int_LayoutRight_Rank8.cpp | 25 +- ...ETIInst_int64_t_int_LayoutStride_Rank1.cpp | 22 +- ...ETIInst_int64_t_int_LayoutStride_Rank2.cpp | 23 +- ...ETIInst_int64_t_int_LayoutStride_Rank3.cpp | 24 +- ...ETIInst_int64_t_int_LayoutStride_Rank4.cpp | 25 +- ...ETIInst_int64_t_int_LayoutStride_Rank5.cpp | 25 +- ...ETIInst_int64_t_int_LayoutStride_Rank8.cpp | 25 +- ...opyETIInst_int_double_LayoutLeft_Rank1.cpp | 22 +- ...opyETIInst_int_double_LayoutLeft_Rank2.cpp | 22 +- ...opyETIInst_int_double_LayoutLeft_Rank3.cpp | 22 +- ...opyETIInst_int_double_LayoutLeft_Rank4.cpp | 22 +- ...opyETIInst_int_double_LayoutLeft_Rank5.cpp | 23 +- ...opyETIInst_int_double_LayoutLeft_Rank8.cpp | 25 +- ...pyETIInst_int_double_LayoutRight_Rank1.cpp | 22 +- ...pyETIInst_int_double_LayoutRight_Rank2.cpp | 22 +- ...pyETIInst_int_double_LayoutRight_Rank3.cpp | 22 +- ...pyETIInst_int_double_LayoutRight_Rank4.cpp | 23 +- ...pyETIInst_int_double_LayoutRight_Rank5.cpp | 24 +- ...pyETIInst_int_double_LayoutRight_Rank8.cpp | 25 +- ...yETIInst_int_double_LayoutStride_Rank1.cpp | 22 +- ...yETIInst_int_double_LayoutStride_Rank2.cpp | 22 +- ...yETIInst_int_double_LayoutStride_Rank3.cpp | 23 +- ...yETIInst_int_double_LayoutStride_Rank4.cpp | 24 +- ...yETIInst_int_double_LayoutStride_Rank5.cpp | 25 +- ...yETIInst_int_double_LayoutStride_Rank8.cpp | 25 +- ...CopyETIInst_int_float_LayoutLeft_Rank1.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank2.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank3.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank4.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank5.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank8.cpp | 25 +- ...opyETIInst_int_float_LayoutRight_Rank1.cpp | 22 +- ...opyETIInst_int_float_LayoutRight_Rank2.cpp | 22 +- ...opyETIInst_int_float_LayoutRight_Rank3.cpp | 22 +- ...opyETIInst_int_float_LayoutRight_Rank4.cpp | 22 +- ...opyETIInst_int_float_LayoutRight_Rank5.cpp | 23 +- ...opyETIInst_int_float_LayoutRight_Rank8.cpp | 25 +- ...pyETIInst_int_float_LayoutStride_Rank1.cpp | 22 +- ...pyETIInst_int_float_LayoutStride_Rank2.cpp | 22 +- ...pyETIInst_int_float_LayoutStride_Rank3.cpp | 22 +- ...pyETIInst_int_float_LayoutStride_Rank4.cpp | 23 +- ...pyETIInst_int_float_LayoutStride_Rank5.cpp | 24 +- ...pyETIInst_int_float_LayoutStride_Rank8.cpp | 25 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank1.cpp | 22 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank2.cpp | 22 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank3.cpp | 22 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank4.cpp | 23 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank5.cpp | 24 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank8.cpp | 25 +- ...yETIInst_int_int64_t_LayoutRight_Rank1.cpp | 22 +- ...yETIInst_int_int64_t_LayoutRight_Rank2.cpp | 22 +- ...yETIInst_int_int64_t_LayoutRight_Rank3.cpp | 23 +- ...yETIInst_int_int64_t_LayoutRight_Rank4.cpp | 24 +- ...yETIInst_int_int64_t_LayoutRight_Rank5.cpp | 25 +- ...yETIInst_int_int64_t_LayoutRight_Rank8.cpp | 25 +- ...ETIInst_int_int64_t_LayoutStride_Rank1.cpp | 22 +- ...ETIInst_int_int64_t_LayoutStride_Rank2.cpp | 23 +- ...ETIInst_int_int64_t_LayoutStride_Rank3.cpp | 24 +- ...ETIInst_int_int64_t_LayoutStride_Rank4.cpp | 25 +- ...ETIInst_int_int64_t_LayoutStride_Rank5.cpp | 25 +- ...ETIInst_int_int64_t_LayoutStride_Rank8.cpp | 25 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank1.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank2.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank3.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank4.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank5.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank8.cpp | 23 +- ...wCopyETIInst_int_int_LayoutRight_Rank1.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank2.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank3.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank4.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank5.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank8.cpp | 24 +- ...CopyETIInst_int_int_LayoutStride_Rank1.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank2.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank3.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank4.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank5.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank8.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank1.cpp | 24 +- ...TIInst_int64_t_double_LayoutLeft_Rank2.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank3.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank4.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank5.cpp | 25 +- ...TIInst_int64_t_double_LayoutLeft_Rank8.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank1.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank2.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank3.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank4.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank5.cpp | 25 +- ...IInst_int64_t_double_LayoutRight_Rank8.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank1.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank2.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank3.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank4.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank5.cpp | 25 +- ...Inst_int64_t_double_LayoutStride_Rank8.cpp | 25 +- ...ETIInst_int64_t_float_LayoutLeft_Rank1.cpp | 23 +- ...ETIInst_int64_t_float_LayoutLeft_Rank2.cpp | 24 +- ...ETIInst_int64_t_float_LayoutLeft_Rank3.cpp | 25 +- ...ETIInst_int64_t_float_LayoutLeft_Rank4.cpp | 25 +- ...ETIInst_int64_t_float_LayoutLeft_Rank5.cpp | 25 +- ...ETIInst_int64_t_float_LayoutLeft_Rank8.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank1.cpp | 24 +- ...TIInst_int64_t_float_LayoutRight_Rank2.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank3.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank4.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank5.cpp | 25 +- ...TIInst_int64_t_float_LayoutRight_Rank8.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank1.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank2.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank3.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank4.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank5.cpp | 25 +- ...IInst_int64_t_float_LayoutStride_Rank8.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank1.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank2.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank3.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank4.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank5.cpp | 25 +- ...IInst_int64_t_int64_t_LayoutLeft_Rank8.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank1.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank2.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank3.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank4.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank5.cpp | 25 +- ...Inst_int64_t_int64_t_LayoutRight_Rank8.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank1.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank2.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank3.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank4.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank5.cpp | 25 +- ...nst_int64_t_int64_t_LayoutStride_Rank8.cpp | 25 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank1.cpp | 22 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank2.cpp | 22 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank3.cpp | 23 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank4.cpp | 24 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank5.cpp | 25 +- ...pyETIInst_int64_t_int_LayoutLeft_Rank8.cpp | 25 +- ...yETIInst_int64_t_int_LayoutRight_Rank1.cpp | 22 +- ...yETIInst_int64_t_int_LayoutRight_Rank2.cpp | 23 +- ...yETIInst_int64_t_int_LayoutRight_Rank3.cpp | 24 +- ...yETIInst_int64_t_int_LayoutRight_Rank4.cpp | 25 +- ...yETIInst_int64_t_int_LayoutRight_Rank5.cpp | 25 +- ...yETIInst_int64_t_int_LayoutRight_Rank8.cpp | 25 +- ...ETIInst_int64_t_int_LayoutStride_Rank1.cpp | 23 +- ...ETIInst_int64_t_int_LayoutStride_Rank2.cpp | 24 +- ...ETIInst_int64_t_int_LayoutStride_Rank3.cpp | 25 +- ...ETIInst_int64_t_int_LayoutStride_Rank4.cpp | 25 +- ...ETIInst_int64_t_int_LayoutStride_Rank5.cpp | 25 +- ...ETIInst_int64_t_int_LayoutStride_Rank8.cpp | 25 +- ...opyETIInst_int_double_LayoutLeft_Rank1.cpp | 22 +- ...opyETIInst_int_double_LayoutLeft_Rank2.cpp | 22 +- ...opyETIInst_int_double_LayoutLeft_Rank3.cpp | 22 +- ...opyETIInst_int_double_LayoutLeft_Rank4.cpp | 23 +- ...opyETIInst_int_double_LayoutLeft_Rank5.cpp | 24 +- ...opyETIInst_int_double_LayoutLeft_Rank8.cpp | 25 +- ...pyETIInst_int_double_LayoutRight_Rank1.cpp | 22 +- ...pyETIInst_int_double_LayoutRight_Rank2.cpp | 22 +- ...pyETIInst_int_double_LayoutRight_Rank3.cpp | 23 +- ...pyETIInst_int_double_LayoutRight_Rank4.cpp | 24 +- ...pyETIInst_int_double_LayoutRight_Rank5.cpp | 25 +- ...pyETIInst_int_double_LayoutRight_Rank8.cpp | 25 +- ...yETIInst_int_double_LayoutStride_Rank1.cpp | 22 +- ...yETIInst_int_double_LayoutStride_Rank2.cpp | 23 +- ...yETIInst_int_double_LayoutStride_Rank3.cpp | 24 +- ...yETIInst_int_double_LayoutStride_Rank4.cpp | 25 +- ...yETIInst_int_double_LayoutStride_Rank5.cpp | 25 +- ...yETIInst_int_double_LayoutStride_Rank8.cpp | 25 +- ...CopyETIInst_int_float_LayoutLeft_Rank1.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank2.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank3.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank4.cpp | 22 +- ...CopyETIInst_int_float_LayoutLeft_Rank5.cpp | 23 +- ...CopyETIInst_int_float_LayoutLeft_Rank8.cpp | 25 +- ...opyETIInst_int_float_LayoutRight_Rank1.cpp | 22 +- ...opyETIInst_int_float_LayoutRight_Rank2.cpp | 22 +- ...opyETIInst_int_float_LayoutRight_Rank3.cpp | 22 +- ...opyETIInst_int_float_LayoutRight_Rank4.cpp | 23 +- ...opyETIInst_int_float_LayoutRight_Rank5.cpp | 24 +- ...opyETIInst_int_float_LayoutRight_Rank8.cpp | 25 +- ...pyETIInst_int_float_LayoutStride_Rank1.cpp | 22 +- ...pyETIInst_int_float_LayoutStride_Rank2.cpp | 22 +- ...pyETIInst_int_float_LayoutStride_Rank3.cpp | 23 +- ...pyETIInst_int_float_LayoutStride_Rank4.cpp | 24 +- ...pyETIInst_int_float_LayoutStride_Rank5.cpp | 25 +- ...pyETIInst_int_float_LayoutStride_Rank8.cpp | 25 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank1.cpp | 22 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank2.cpp | 22 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank3.cpp | 23 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank4.cpp | 24 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank5.cpp | 25 +- ...pyETIInst_int_int64_t_LayoutLeft_Rank8.cpp | 25 +- ...yETIInst_int_int64_t_LayoutRight_Rank1.cpp | 22 +- ...yETIInst_int_int64_t_LayoutRight_Rank2.cpp | 23 +- ...yETIInst_int_int64_t_LayoutRight_Rank3.cpp | 24 +- ...yETIInst_int_int64_t_LayoutRight_Rank4.cpp | 25 +- ...yETIInst_int_int64_t_LayoutRight_Rank5.cpp | 25 +- ...yETIInst_int_int64_t_LayoutRight_Rank8.cpp | 25 +- ...ETIInst_int_int64_t_LayoutStride_Rank1.cpp | 23 +- ...ETIInst_int_int64_t_LayoutStride_Rank2.cpp | 24 +- ...ETIInst_int_int64_t_LayoutStride_Rank3.cpp | 25 +- ...ETIInst_int_int64_t_LayoutStride_Rank4.cpp | 25 +- ...ETIInst_int_int64_t_LayoutStride_Rank5.cpp | 25 +- ...ETIInst_int_int64_t_LayoutStride_Rank8.cpp | 25 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank1.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank2.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank3.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank4.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank5.cpp | 22 +- ...ewCopyETIInst_int_int_LayoutLeft_Rank8.cpp | 24 +- ...wCopyETIInst_int_int_LayoutRight_Rank1.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank2.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank3.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank4.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank5.cpp | 22 +- ...wCopyETIInst_int_int_LayoutRight_Rank8.cpp | 25 +- ...CopyETIInst_int_int_LayoutStride_Rank1.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank2.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank3.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank4.cpp | 22 +- ...CopyETIInst_int_int_LayoutStride_Rank5.cpp | 23 +- ...CopyETIInst_int_int_LayoutStride_Rank8.cpp | 25 +- .../Kokkos_ViewFillCopyETIAvail_Macros.hpp | 2016 ++++-- .../Kokkos_ViewFillCopyETIDecl_Macros.hpp | 1728 +++-- core/src/impl/KokkosExp_Host_IterateTile.hpp | 3822 ++++++------ core/src/impl/KokkosExp_ViewMapping.hpp | 8 +- core/src/impl/Kokkos_AnalyzePolicy.hpp | 298 +- core/src/impl/Kokkos_Atomic_Assembly.hpp | 102 +- .../Kokkos_Atomic_Compare_Exchange_Strong.hpp | 485 +- .../Kokkos_Atomic_Compare_Exchange_Weak.hpp | 332 +- core/src/impl/Kokkos_Atomic_Decrement.hpp | 111 +- core/src/impl/Kokkos_Atomic_Exchange.hpp | 425 +- core/src/impl/Kokkos_Atomic_Fetch_Add.hpp | 378 +- core/src/impl/Kokkos_Atomic_Fetch_And.hpp | 106 +- core/src/impl/Kokkos_Atomic_Fetch_Or.hpp | 106 +- core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp | 305 +- core/src/impl/Kokkos_Atomic_Generic.hpp | 417 +- core/src/impl/Kokkos_Atomic_Increment.hpp | 117 +- core/src/impl/Kokkos_Atomic_Load.hpp | 171 +- core/src/impl/Kokkos_Atomic_Memory_Order.hpp | 47 +- core/src/impl/Kokkos_Atomic_Store.hpp | 179 +- core/src/impl/Kokkos_Atomic_View.hpp | 329 +- core/src/impl/Kokkos_Atomic_Windows.hpp | 329 +- core/src/impl/Kokkos_BitOps.hpp | 128 +- core/src/impl/Kokkos_CPUDiscovery.cpp | 29 +- core/src/impl/Kokkos_CPUDiscovery.hpp | 4 +- core/src/impl/Kokkos_ChaseLev.hpp | 194 +- core/src/impl/Kokkos_ClockTic.hpp | 35 +- core/src/impl/Kokkos_ConcurrentBitset.hpp | 327 +- core/src/impl/Kokkos_Core.cpp | 885 +-- core/src/impl/Kokkos_EBO.hpp | 212 +- core/src/impl/Kokkos_Error.cpp | 76 +- core/src/impl/Kokkos_Error.hpp | 102 +- core/src/impl/Kokkos_ExecPolicy.cpp | 17 +- .../src/impl/Kokkos_FixedBufferMemoryPool.hpp | 134 +- core/src/impl/Kokkos_FunctorAdapter.hpp | 3115 +++++---- core/src/impl/Kokkos_FunctorAnalysis.hpp | 1192 ++-- core/src/impl/Kokkos_HBWSpace.cpp | 316 +- core/src/impl/Kokkos_HostBarrier.cpp | 83 +- core/src/impl/Kokkos_HostBarrier.hpp | 224 +- core/src/impl/Kokkos_HostSpace.cpp | 489 +- core/src/impl/Kokkos_HostSpace_deepcopy.cpp | 90 +- core/src/impl/Kokkos_HostSpace_deepcopy.hpp | 9 +- core/src/impl/Kokkos_HostThreadTeam.cpp | 276 +- core/src/impl/Kokkos_HostThreadTeam.hpp | 1311 ++-- core/src/impl/Kokkos_LIFO.hpp | 160 +- core/src/impl/Kokkos_LinkedListNode.hpp | 98 +- core/src/impl/Kokkos_MemoryPool.cpp | 84 +- core/src/impl/Kokkos_MemoryPoolAllocator.hpp | 42 +- core/src/impl/Kokkos_Memory_Fence.hpp | 58 +- core/src/impl/Kokkos_MultipleTaskQueue.hpp | 504 +- core/src/impl/Kokkos_OldMacros.hpp | 59 +- core/src/impl/Kokkos_OptionalRef.hpp | 76 +- core/src/impl/Kokkos_PhysicalLayout.hpp | 32 +- core/src/impl/Kokkos_Profiling_DeviceInfo.hpp | 4 +- core/src/impl/Kokkos_Profiling_Interface.cpp | 297 +- core/src/impl/Kokkos_Profiling_Interface.hpp | 87 +- core/src/impl/Kokkos_Serial.cpp | 148 +- core/src/impl/Kokkos_Serial_Task.cpp | 9 +- core/src/impl/Kokkos_Serial_Task.hpp | 184 +- .../impl/Kokkos_Serial_WorkGraphPolicy.hpp | 72 +- core/src/impl/Kokkos_SharedAlloc.cpp | 377 +- core/src/impl/Kokkos_SharedAlloc.hpp | 424 +- core/src/impl/Kokkos_SimpleTaskScheduler.hpp | 563 +- core/src/impl/Kokkos_SingleTaskQueue.hpp | 112 +- core/src/impl/Kokkos_Spinwait.cpp | 144 +- core/src/impl/Kokkos_Spinwait.hpp | 67 +- core/src/impl/Kokkos_Tags.hpp | 33 +- core/src/impl/Kokkos_TaskBase.hpp | 224 +- core/src/impl/Kokkos_TaskNode.hpp | 500 +- core/src/impl/Kokkos_TaskPolicyData.hpp | 92 +- core/src/impl/Kokkos_TaskQueue.hpp | 140 +- core/src/impl/Kokkos_TaskQueueCommon.hpp | 355 +- .../impl/Kokkos_TaskQueueMemoryManager.hpp | 162 +- core/src/impl/Kokkos_TaskQueueMultiple.hpp | 154 +- .../impl/Kokkos_TaskQueueMultiple_impl.hpp | 9 +- core/src/impl/Kokkos_TaskQueue_impl.hpp | 430 +- core/src/impl/Kokkos_TaskResult.hpp | 90 +- core/src/impl/Kokkos_TaskTeamMember.hpp | 32 +- core/src/impl/Kokkos_Timer.hpp | 17 +- core/src/impl/Kokkos_Traits.hpp | 528 +- core/src/impl/Kokkos_Utilities.hpp | 449 +- core/src/impl/Kokkos_VLAEmulation.hpp | 153 +- core/src/impl/Kokkos_ViewArray.hpp | 871 +-- core/src/impl/Kokkos_ViewCtor.hpp | 249 +- core/src/impl/Kokkos_ViewFillCopyETIAvail.hpp | 85 +- core/src/impl/Kokkos_ViewFillCopyETIDecl.hpp | 104 +- core/src/impl/Kokkos_ViewLayoutTiled.hpp | 1795 ++++-- core/src/impl/Kokkos_ViewMapping.hpp | 5547 +++++++++-------- core/src/impl/Kokkos_ViewTile.hpp | 251 +- core/src/impl/Kokkos_ViewUniformType.hpp | 105 +- core/src/impl/Kokkos_Volatile_Load.hpp | 152 +- core/src/impl/Kokkos_hwloc.cpp | 720 +-- core/unit_test/TestAggregate.hpp | 112 +- core/unit_test/TestAtomic.hpp | 409 +- core/unit_test/TestAtomicOperations.hpp | 717 +-- .../TestAtomicOperations_complexdouble.hpp | 22 +- .../TestAtomicOperations_complexfloat.hpp | 22 +- .../unit_test/TestAtomicOperations_double.hpp | 26 +- core/unit_test/TestAtomicOperations_float.hpp | 26 +- core/unit_test/TestAtomicOperations_int.hpp | 47 +- .../TestAtomicOperations_longint.hpp | 47 +- .../TestAtomicOperations_longlongint.hpp | 47 +- .../TestAtomicOperations_unsignedint.hpp | 47 +- .../TestAtomicOperations_unsignedlongint.hpp | 47 +- core/unit_test/TestAtomicViews.hpp | 1518 ++--- core/unit_test/TestCXX11.hpp | 388 +- core/unit_test/TestCXX11Deduction.hpp | 54 +- core/unit_test/TestCompilerMacros.hpp | 63 +- core/unit_test/TestComplex.hpp | 331 +- core/unit_test/TestConcurrentBitset.hpp | 145 +- core/unit_test/TestCrs.hpp | 152 +- core/unit_test/TestDeepCopy.hpp | 223 +- core/unit_test/TestDefaultDeviceTypeInit.hpp | 364 +- core/unit_test/TestFunctorAnalysis.hpp | 160 +- core/unit_test/TestHWLOC.cpp | 9 +- core/unit_test/TestHostBarrier.cpp | 9 +- core/unit_test/TestInit.hpp | 24 +- core/unit_test/TestLocalDeepCopy.hpp | 1530 ++--- core/unit_test/TestMDRange.hpp | 3618 ++++++----- core/unit_test/TestMDRange_a.hpp | 13 +- core/unit_test/TestMDRange_b.hpp | 13 +- core/unit_test/TestMDRange_c.hpp | 19 +- core/unit_test/TestMDRange_d.hpp | 29 +- core/unit_test/TestMDRange_e.hpp | 13 +- core/unit_test/TestMemoryPool.hpp | 668 +- core/unit_test/TestPolicyConstruction.hpp | 1288 ++-- core/unit_test/TestRange.hpp | 398 +- core/unit_test/TestReduce.hpp | 454 +- core/unit_test/TestReduceCombinatorical.hpp | 588 +- core/unit_test/TestReduceDeviceView.hpp | 194 +- core/unit_test/TestReducers.hpp | 914 +-- core/unit_test/TestReducers_a.hpp | 7 +- core/unit_test/TestReducers_b.hpp | 7 +- core/unit_test/TestReducers_c.hpp | 7 +- core/unit_test/TestReducers_d.hpp | 8 +- core/unit_test/TestResize.hpp | 119 +- core/unit_test/TestScan.hpp | 101 +- core/unit_test/TestSharedAlloc.hpp | 143 +- core/unit_test/TestTaskScheduler.hpp | 899 ++- core/unit_test/TestTaskScheduler_single.hpp | 53 +- core/unit_test/TestTeam.hpp | 1226 ++-- core/unit_test/TestTeamTeamSize.hpp | 181 +- core/unit_test/TestTeamVector.hpp | 1024 +-- core/unit_test/TestTeamVectorRange.hpp | 410 +- core/unit_test/TestTemplateMetaFunctions.hpp | 158 +- core/unit_test/TestTile.hpp | 138 +- core/unit_test/TestUniqueToken.hpp | 109 +- core/unit_test/TestUtilities.hpp | 444 +- core/unit_test/TestViewAPI.hpp | 1864 +++--- core/unit_test/TestViewAPI_a.hpp | 9 +- core/unit_test/TestViewAPI_b.hpp | 13 +- core/unit_test/TestViewAPI_c.hpp | 11 +- core/unit_test/TestViewAPI_d.hpp | 17 +- core/unit_test/TestViewAPI_e.hpp | 201 +- core/unit_test/TestViewCopy.hpp | 191 +- .../unit_test/TestViewCtorPropEmbeddedDim.hpp | 110 +- .../TestViewLayoutStrideAssignment.hpp | 1081 ++-- core/unit_test/TestViewLayoutTiled.hpp | 2146 ++++--- core/unit_test/TestViewMapping_a.hpp | 1700 ++--- core/unit_test/TestViewMapping_b.hpp | 230 +- core/unit_test/TestViewMapping_subview.hpp | 248 +- core/unit_test/TestViewOfClass.hpp | 80 +- core/unit_test/TestViewSpaceAssign.hpp | 31 +- core/unit_test/TestViewSubview.hpp | 2472 +++++--- core/unit_test/TestView_64bit.hpp | 108 +- core/unit_test/TestWorkGraph.hpp | 86 +- core/unit_test/UnitTestMain.cpp | 4 +- core/unit_test/UnitTestMainInit.cpp | 8 +- core/unit_test/UnitTest_PushFinalizeHook.cpp | 56 +- .../UnitTest_PushFinalizeHook_terminate.cpp | 22 +- .../cuda/TestCudaHostPinned_Category.hpp | 13 +- .../cuda/TestCudaHostPinned_SharedAlloc.cpp | 8 +- .../cuda/TestCudaHostPinned_ViewMapping_a.cpp | 1 - .../cuda/TestCudaHostPinned_ViewMapping_b.cpp | 1 - ...TestCudaHostPinned_ViewMapping_subview.cpp | 1 - core/unit_test/cuda/TestCudaUVM_Category.hpp | 10 +- .../cuda/TestCudaUVM_SharedAlloc.cpp | 8 +- .../cuda/TestCudaUVM_ViewMapping_a.cpp | 1 - .../cuda/TestCudaUVM_ViewMapping_b.cpp | 1 - .../cuda/TestCudaUVM_ViewMapping_subview.cpp | 1 - ...estCuda_AtomicOperations_complexdouble.cpp | 5 +- ...TestCuda_AtomicOperations_complexfloat.cpp | 5 +- .../cuda/TestCuda_AtomicOperations_double.cpp | 5 +- .../cuda/TestCuda_AtomicOperations_float.cpp | 5 +- .../cuda/TestCuda_AtomicOperations_int.cpp | 5 +- .../TestCuda_AtomicOperations_longint.cpp | 5 +- .../TestCuda_AtomicOperations_longlongint.cpp | 5 +- .../TestCuda_AtomicOperations_unsignedint.cpp | 5 +- ...tCuda_AtomicOperations_unsignedlongint.cpp | 5 +- core/unit_test/cuda/TestCuda_AtomicViews.cpp | 5 +- core/unit_test/cuda/TestCuda_Atomics.cpp | 1 - core/unit_test/cuda/TestCuda_Category.hpp | 10 +- core/unit_test/cuda/TestCuda_Complex.cpp | 5 +- .../cuda/TestCuda_DeepCopyAlignment.cpp | 3 +- core/unit_test/cuda/TestCuda_Init.cpp | 10 +- core/unit_test/cuda/TestCuda_InterOp_Init.cpp | 39 +- .../cuda/TestCuda_InterOp_Streams.cpp | 245 +- .../unit_test/cuda/TestCuda_LocalDeepCopy.cpp | 1 - core/unit_test/cuda/TestCuda_MDRange_a.cpp | 5 +- core/unit_test/cuda/TestCuda_MDRange_b.cpp | 5 +- core/unit_test/cuda/TestCuda_MDRange_c.cpp | 5 +- core/unit_test/cuda/TestCuda_MDRange_d.cpp | 5 +- core/unit_test/cuda/TestCuda_MDRange_e.cpp | 5 +- core/unit_test/cuda/TestCuda_Other.cpp | 16 +- core/unit_test/cuda/TestCuda_RangePolicy.cpp | 5 +- core/unit_test/cuda/TestCuda_Scan.cpp | 5 +- core/unit_test/cuda/TestCuda_SharedAlloc.cpp | 8 +- core/unit_test/cuda/TestCuda_Spaces.cpp | 346 +- core/unit_test/cuda/TestCuda_SubView_a.cpp | 66 +- core/unit_test/cuda/TestCuda_SubView_b.cpp | 24 +- core/unit_test/cuda/TestCuda_SubView_c01.cpp | 7 +- core/unit_test/cuda/TestCuda_SubView_c02.cpp | 8 +- core/unit_test/cuda/TestCuda_SubView_c03.cpp | 8 +- core/unit_test/cuda/TestCuda_SubView_c04.cpp | 7 +- core/unit_test/cuda/TestCuda_SubView_c05.cpp | 8 +- core/unit_test/cuda/TestCuda_SubView_c06.cpp | 8 +- core/unit_test/cuda/TestCuda_SubView_c07.cpp | 7 +- core/unit_test/cuda/TestCuda_SubView_c08.cpp | 8 +- core/unit_test/cuda/TestCuda_SubView_c09.cpp | 8 +- core/unit_test/cuda/TestCuda_SubView_c10.cpp | 7 +- core/unit_test/cuda/TestCuda_SubView_c11.cpp | 8 +- core/unit_test/cuda/TestCuda_SubView_c12.cpp | 8 +- core/unit_test/cuda/TestCuda_SubView_c13.cpp | 7 +- core/unit_test/cuda/TestCuda_Task.cpp | 5 +- core/unit_test/cuda/TestCuda_Team.cpp | 74 +- .../cuda/TestCuda_TeamReductionScan.cpp | 52 +- core/unit_test/cuda/TestCuda_TeamScratch.cpp | 48 +- .../cuda/TestCuda_TeamVectorRange.cpp | 6 +- core/unit_test/cuda/TestCuda_UniqueToken.cpp | 5 +- .../TestCuda_ViewLayoutStrideAssignment.cpp | 1 - .../unit_test/cuda/TestCuda_ViewMapping_a.cpp | 1 - .../unit_test/cuda/TestCuda_ViewMapping_b.cpp | 1 - .../cuda/TestCuda_ViewMapping_subview.cpp | 1 - core/unit_test/cuda/TestCuda_ViewOfClass.cpp | 1 - core/unit_test/cuda/TestCuda_WorkGraph.cpp | 4 +- .../default/TestDefaultDeviceType.cpp | 28 +- .../default/TestDefaultDeviceTypeInit_1.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_10.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_11.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_12.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_13.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_14.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_15.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_16.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_2.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_3.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_4.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_5.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_6.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_7.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_8.cpp | 2 +- .../default/TestDefaultDeviceTypeInit_9.cpp | 2 +- .../default/TestDefaultDeviceTypeResize.cpp | 7 +- .../TestDefaultDeviceType_Category.hpp | 12 +- .../default/TestDefaultDeviceType_a1.cpp | 8 +- .../default/TestDefaultDeviceType_a2.cpp | 8 +- .../default/TestDefaultDeviceType_a3.cpp | 8 +- .../default/TestDefaultDeviceType_b1.cpp | 7 +- .../default/TestDefaultDeviceType_b2.cpp | 7 +- .../default/TestDefaultDeviceType_b3.cpp | 7 +- .../default/TestDefaultDeviceType_c1.cpp | 9 +- .../default/TestDefaultDeviceType_c2.cpp | 9 +- .../default/TestDefaultDeviceType_c3.cpp | 9 +- .../default/TestDefaultDeviceType_d.cpp | 24 +- .../hpx/TestHPX_AtomicOperations_double.cpp | 5 +- .../hpx/TestHPX_AtomicOperations_float.cpp | 5 +- .../hpx/TestHPX_AtomicOperations_int.cpp | 5 +- .../hpx/TestHPX_AtomicOperations_longint.cpp | 5 +- .../TestHPX_AtomicOperations_longlongint.cpp | 5 +- .../TestHPX_AtomicOperations_unsignedint.cpp | 5 +- ...stHPX_AtomicOperations_unsignedlongint.cpp | 5 +- core/unit_test/hpx/TestHPX_AtomicViews.cpp | 5 +- core/unit_test/hpx/TestHPX_Atomics.cpp | 1 - core/unit_test/hpx/TestHPX_Category.hpp | 10 +- core/unit_test/hpx/TestHPX_Complex.cpp | 5 +- core/unit_test/hpx/TestHPX_Init.cpp | 10 +- core/unit_test/hpx/TestHPX_InterOp.cpp | 10 +- core/unit_test/hpx/TestHPX_MDRange_a.cpp | 5 +- core/unit_test/hpx/TestHPX_MDRange_b.cpp | 5 +- core/unit_test/hpx/TestHPX_MDRange_c.cpp | 5 +- core/unit_test/hpx/TestHPX_MDRange_d.cpp | 5 +- core/unit_test/hpx/TestHPX_MDRange_e.cpp | 5 +- core/unit_test/hpx/TestHPX_RangePolicy.cpp | 5 +- core/unit_test/hpx/TestHPX_Scan.cpp | 5 +- core/unit_test/hpx/TestHPX_SharedAlloc.cpp | 8 +- core/unit_test/hpx/TestHPX_SubView_a.cpp | 57 +- core/unit_test/hpx/TestHPX_SubView_b.cpp | 24 +- core/unit_test/hpx/TestHPX_SubView_c01.cpp | 7 +- core/unit_test/hpx/TestHPX_SubView_c02.cpp | 8 +- core/unit_test/hpx/TestHPX_SubView_c03.cpp | 8 +- core/unit_test/hpx/TestHPX_SubView_c04.cpp | 7 +- core/unit_test/hpx/TestHPX_SubView_c05.cpp | 8 +- core/unit_test/hpx/TestHPX_SubView_c06.cpp | 8 +- core/unit_test/hpx/TestHPX_SubView_c07.cpp | 7 +- core/unit_test/hpx/TestHPX_SubView_c08.cpp | 8 +- core/unit_test/hpx/TestHPX_SubView_c09.cpp | 8 +- core/unit_test/hpx/TestHPX_SubView_c10.cpp | 7 +- core/unit_test/hpx/TestHPX_SubView_c11.cpp | 8 +- core/unit_test/hpx/TestHPX_SubView_c12.cpp | 8 +- core/unit_test/hpx/TestHPX_SubView_c13.cpp | 7 +- core/unit_test/hpx/TestHPX_Task.cpp | 5 +- core/unit_test/hpx/TestHPX_Team.cpp | 47 +- .../hpx/TestHPX_TeamReductionScan.cpp | 53 +- core/unit_test/hpx/TestHPX_TeamScratch.cpp | 44 +- .../unit_test/hpx/TestHPX_TeamVectorRange.cpp | 6 +- core/unit_test/hpx/TestHPX_UniqueToken.cpp | 5 +- core/unit_test/hpx/TestHPX_ViewMapping_a.cpp | 1 - core/unit_test/hpx/TestHPX_ViewMapping_b.cpp | 1 - .../hpx/TestHPX_ViewMapping_subview.cpp | 1 - core/unit_test/hpx/TestHPX_ViewOfClass.cpp | 1 - core/unit_test/hpx/TestHPX_WorkGraph.cpp | 4 +- core/unit_test/openmp/TestOpenMP.hpp | 22 +- ...tOpenMP_AtomicOperations_complexdouble.cpp | 5 +- ...stOpenMP_AtomicOperations_complexfloat.cpp | 5 +- .../TestOpenMP_AtomicOperations_double.cpp | 5 +- .../TestOpenMP_AtomicOperations_float.cpp | 5 +- .../TestOpenMP_AtomicOperations_int.cpp | 5 +- .../TestOpenMP_AtomicOperations_longint.cpp | 5 +- ...estOpenMP_AtomicOperations_longlongint.cpp | 5 +- ...estOpenMP_AtomicOperations_unsignedint.cpp | 5 +- ...penMP_AtomicOperations_unsignedlongint.cpp | 5 +- .../openmp/TestOpenMP_AtomicViews.cpp | 5 +- core/unit_test/openmp/TestOpenMP_Atomics.cpp | 1 - core/unit_test/openmp/TestOpenMP_Category.hpp | 10 +- core/unit_test/openmp/TestOpenMP_Complex.cpp | 5 +- core/unit_test/openmp/TestOpenMP_Init.cpp | 10 +- core/unit_test/openmp/TestOpenMP_InterOp.cpp | 34 +- .../openmp/TestOpenMP_LocalDeepCopy.cpp | 1 - .../unit_test/openmp/TestOpenMP_MDRange_a.cpp | 5 +- .../unit_test/openmp/TestOpenMP_MDRange_b.cpp | 5 +- .../unit_test/openmp/TestOpenMP_MDRange_c.cpp | 5 +- .../unit_test/openmp/TestOpenMP_MDRange_d.cpp | 5 +- .../unit_test/openmp/TestOpenMP_MDRange_e.cpp | 5 +- core/unit_test/openmp/TestOpenMP_Other.cpp | 108 +- .../openmp/TestOpenMP_RangePolicy.cpp | 5 +- core/unit_test/openmp/TestOpenMP_Scan.cpp | 5 +- .../openmp/TestOpenMP_SharedAlloc.cpp | 8 +- .../unit_test/openmp/TestOpenMP_SubView_a.cpp | 66 +- .../unit_test/openmp/TestOpenMP_SubView_b.cpp | 24 +- .../openmp/TestOpenMP_SubView_c01.cpp | 7 +- .../openmp/TestOpenMP_SubView_c02.cpp | 8 +- .../openmp/TestOpenMP_SubView_c03.cpp | 8 +- .../openmp/TestOpenMP_SubView_c04.cpp | 7 +- .../openmp/TestOpenMP_SubView_c05.cpp | 8 +- .../openmp/TestOpenMP_SubView_c06.cpp | 8 +- .../openmp/TestOpenMP_SubView_c07.cpp | 7 +- .../openmp/TestOpenMP_SubView_c08.cpp | 8 +- .../openmp/TestOpenMP_SubView_c09.cpp | 8 +- .../openmp/TestOpenMP_SubView_c10.cpp | 7 +- .../openmp/TestOpenMP_SubView_c11.cpp | 8 +- .../openmp/TestOpenMP_SubView_c12.cpp | 8 +- .../openmp/TestOpenMP_SubView_c13.cpp | 7 +- core/unit_test/openmp/TestOpenMP_Task.cpp | 5 +- core/unit_test/openmp/TestOpenMP_Team.cpp | 74 +- .../openmp/TestOpenMP_TeamReductionScan.cpp | 53 +- .../openmp/TestOpenMP_TeamScratch.cpp | 48 +- .../openmp/TestOpenMP_TeamTeamSize.cpp | 1 - .../openmp/TestOpenMP_TeamVectorRange.cpp | 6 +- .../openmp/TestOpenMP_UniqueToken.cpp | 5 +- .../TestOpenMP_ViewLayoutStrideAssignment.cpp | 1 - .../openmp/TestOpenMP_ViewMapping_a.cpp | 1 - .../openmp/TestOpenMP_ViewMapping_b.cpp | 1 - .../openmp/TestOpenMP_ViewMapping_subview.cpp | 1 - .../openmp/TestOpenMP_ViewOfClass.cpp | 1 - .../unit_test/openmp/TestOpenMP_WorkGraph.cpp | 4 +- .../openmptarget/TestOpenMPTarget.hpp | 30 +- ...PTarget_AtomicOperations_complexdouble.cpp | 5 +- ...MPTarget_AtomicOperations_complexfloat.cpp | 5 +- ...stOpenMPTarget_AtomicOperations_double.cpp | 5 +- ...estOpenMPTarget_AtomicOperations_float.cpp | 5 +- .../TestOpenMPTarget_AtomicOperations_int.cpp | 5 +- ...tOpenMPTarget_AtomicOperations_longint.cpp | 5 +- ...nMPTarget_AtomicOperations_longlongint.cpp | 5 +- ...nMPTarget_AtomicOperations_unsignedint.cpp | 5 +- ...arget_AtomicOperations_unsignedlongint.cpp | 5 +- .../TestOpenMPTarget_AtomicViews.cpp | 5 +- .../openmptarget/TestOpenMPTarget_Atomics.cpp | 1 - .../TestOpenMPTarget_Category.hpp | 10 +- .../openmptarget/TestOpenMPTarget_Complex.cpp | 5 +- .../openmptarget/TestOpenMPTarget_Init.cpp | 10 +- .../TestOpenMPTarget_MDRange_a.cpp | 5 +- .../TestOpenMPTarget_MDRange_b.cpp | 5 +- .../TestOpenMPTarget_MDRange_c.cpp | 5 +- .../TestOpenMPTarget_MDRange_d.cpp | 5 +- .../TestOpenMPTarget_MDRange_e.cpp | 5 +- .../openmptarget/TestOpenMPTarget_Other.cpp | 12 +- .../TestOpenMPTarget_RangePolicy.cpp | 5 +- .../openmptarget/TestOpenMPTarget_Scan.cpp | 5 +- .../TestOpenMPTarget_SharedAlloc.cpp | 9 +- .../TestOpenMPTarget_SubView_a.cpp | 66 +- .../TestOpenMPTarget_SubView_b.cpp | 24 +- .../TestOpenMPTarget_SubView_c01.cpp | 7 +- .../TestOpenMPTarget_SubView_c02.cpp | 8 +- .../TestOpenMPTarget_SubView_c03.cpp | 8 +- .../TestOpenMPTarget_SubView_c04.cpp | 7 +- .../TestOpenMPTarget_SubView_c05.cpp | 8 +- .../TestOpenMPTarget_SubView_c06.cpp | 8 +- .../TestOpenMPTarget_SubView_c07.cpp | 7 +- .../TestOpenMPTarget_SubView_c08.cpp | 8 +- .../TestOpenMPTarget_SubView_c09.cpp | 8 +- .../TestOpenMPTarget_SubView_c10.cpp | 7 +- .../TestOpenMPTarget_SubView_c11.cpp | 8 +- .../TestOpenMPTarget_SubView_c12.cpp | 8 +- .../openmptarget/TestOpenMPTarget_Team.cpp | 47 +- .../TestOpenMPTarget_TeamReductionScan.cpp | 53 +- .../TestOpenMPTarget_TeamScratch.cpp | 44 +- .../TestOpenMPTarget_ViewMapping_a.cpp | 1 - .../TestOpenMPTarget_ViewMapping_b.cpp | 1 - .../TestOpenMPTarget_ViewMapping_subview.cpp | 1 - .../TestOpenMPTarget_ViewOfClass.cpp | 1 - ...threads_AtomicOperations_complexdouble.cpp | 5 +- ...qthreads_AtomicOperations_complexfloat.cpp | 5 +- .../TestQqthreads_AtomicOperations_double.cpp | 5 +- .../TestQqthreads_AtomicOperations_float.cpp | 5 +- .../TestQqthreads_AtomicOperations_int.cpp | 5 +- ...TestQqthreads_AtomicOperations_longint.cpp | 5 +- ...Qqthreads_AtomicOperations_longlongint.cpp | 5 +- ...Qqthreads_AtomicOperations_unsignedint.cpp | 5 +- ...reads_AtomicOperations_unsignedlongint.cpp | 5 +- .../qthreads/TestQqthreads_MDRange_a.cpp | 5 +- .../qthreads/TestQqthreads_MDRange_b.cpp | 5 +- .../qthreads/TestQqthreads_MDRange_c.cpp | 5 +- .../qthreads/TestQqthreads_MDRange_d.cpp | 5 +- .../qthreads/TestQqthreads_MDRange_e.cpp | 5 +- core/unit_test/qthreads/TestQthreads.hpp | 31 +- .../qthreads/TestQthreads_Atomics.cpp | 17 +- .../qthreads/TestQthreads_Category.hpp | 10 +- .../qthreads/TestQthreads_Complex.cpp | 5 +- .../unit_test/qthreads/TestQthreads_Other.cpp | 46 +- .../qthreads/TestQthreads_Reductions.cpp | 38 +- .../qthreads/TestQthreads_SubView_a.cpp | 35 +- .../qthreads/TestQthreads_SubView_b.cpp | 8 +- .../qthreads/TestQthreads_SubView_c01.cpp | 5 +- .../qthreads/TestQthreads_SubView_c02.cpp | 5 +- .../qthreads/TestQthreads_SubView_c03.cpp | 5 +- .../qthreads/TestQthreads_SubView_c04.cpp | 5 +- .../qthreads/TestQthreads_SubView_c05.cpp | 5 +- .../qthreads/TestQthreads_SubView_c06.cpp | 5 +- .../qthreads/TestQthreads_SubView_c07.cpp | 5 +- .../qthreads/TestQthreads_SubView_c08.cpp | 5 +- .../qthreads/TestQthreads_SubView_c09.cpp | 5 +- .../qthreads/TestQthreads_SubView_c10.cpp | 5 +- .../qthreads/TestQthreads_SubView_c11.cpp | 5 +- .../qthreads/TestQthreads_SubView_c12.cpp | 5 +- .../qthreads/TestQthreads_SubView_c13.cpp | 5 +- core/unit_test/qthreads/TestQthreads_Team.cpp | 30 +- .../qthreads/TestQthreads_ViewAPI_a.cpp | 5 +- .../qthreads/TestQthreads_ViewAPI_b.cpp | 23 +- .../rocm/TestROCmHostPinned_Category.hpp | 10 +- .../rocm/TestROCmHostPinned_SharedAlloc.cpp | 8 +- .../rocm/TestROCmHostPinned_ViewMapping_a.cpp | 1 - .../rocm/TestROCmHostPinned_ViewMapping_b.cpp | 1 - ...TestROCmHostPinned_ViewMapping_subview.cpp | 1 - .../rocm/TestROCm_AtomicOperations_double.cpp | 5 +- .../rocm/TestROCm_AtomicOperations_float.cpp | 5 +- .../rocm/TestROCm_AtomicOperations_int.cpp | 5 +- .../TestROCm_AtomicOperations_longint.cpp | 5 +- .../TestROCm_AtomicOperations_longlongint.cpp | 5 +- .../TestROCm_AtomicOperations_unsignedint.cpp | 5 +- ...tROCm_AtomicOperations_unsignedlongint.cpp | 5 +- core/unit_test/rocm/TestROCm_AtomicViews.cpp | 5 +- core/unit_test/rocm/TestROCm_Atomics.cpp | 1 - core/unit_test/rocm/TestROCm_Category.hpp | 10 +- core/unit_test/rocm/TestROCm_Complex.cpp | 5 +- core/unit_test/rocm/TestROCm_Crs.cpp | 5 +- core/unit_test/rocm/TestROCm_Init.cpp | 10 +- .../rocm/TestROCm_MDRangeReduce_a.cpp | 11 +- .../rocm/TestROCm_MDRangeReduce_b.cpp | 11 +- .../rocm/TestROCm_MDRangeReduce_c.cpp | 11 +- .../rocm/TestROCm_MDRangeReduce_d.cpp | 11 +- .../rocm/TestROCm_MDRangeReduce_e.cpp | 11 +- core/unit_test/rocm/TestROCm_MDRange_a.cpp | 5 +- core/unit_test/rocm/TestROCm_MDRange_b.cpp | 5 +- core/unit_test/rocm/TestROCm_MDRange_c.cpp | 5 +- core/unit_test/rocm/TestROCm_MDRange_d.cpp | 5 +- core/unit_test/rocm/TestROCm_MDRange_e.cpp | 5 +- core/unit_test/rocm/TestROCm_Other.cpp | 14 +- core/unit_test/rocm/TestROCm_RangePolicy.cpp | 5 +- core/unit_test/rocm/TestROCm_Scan.cpp | 5 +- core/unit_test/rocm/TestROCm_SharedAlloc.cpp | 9 +- core/unit_test/rocm/TestROCm_Spaces.cpp | 194 +- core/unit_test/rocm/TestROCm_SubView_a.cpp | 66 +- core/unit_test/rocm/TestROCm_SubView_b.cpp | 24 +- core/unit_test/rocm/TestROCm_SubView_c01.cpp | 7 +- core/unit_test/rocm/TestROCm_SubView_c02.cpp | 8 +- core/unit_test/rocm/TestROCm_SubView_c03.cpp | 8 +- core/unit_test/rocm/TestROCm_SubView_c04.cpp | 7 +- core/unit_test/rocm/TestROCm_SubView_c05.cpp | 8 +- core/unit_test/rocm/TestROCm_SubView_c06.cpp | 8 +- core/unit_test/rocm/TestROCm_SubView_c07.cpp | 7 +- core/unit_test/rocm/TestROCm_SubView_c08.cpp | 8 +- core/unit_test/rocm/TestROCm_SubView_c09.cpp | 8 +- core/unit_test/rocm/TestROCm_SubView_c10.cpp | 7 +- core/unit_test/rocm/TestROCm_SubView_c11.cpp | 8 +- core/unit_test/rocm/TestROCm_SubView_c12.cpp | 8 +- core/unit_test/rocm/TestROCm_SubView_c13.cpp | 7 +- core/unit_test/rocm/TestROCm_Team.cpp | 47 +- .../rocm/TestROCm_TeamReductionScan.cpp | 52 +- core/unit_test/rocm/TestROCm_TeamScratch.cpp | 48 +- .../unit_test/rocm/TestROCm_ViewMapping_a.cpp | 1 - .../unit_test/rocm/TestROCm_ViewMapping_b.cpp | 1 - .../rocm/TestROCm_ViewMapping_subview.cpp | 1 - core/unit_test/rocm/TestROCm_ViewOfClass.cpp | 1 - ...tSerial_AtomicOperations_complexdouble.cpp | 5 +- ...stSerial_AtomicOperations_complexfloat.cpp | 5 +- .../TestSerial_AtomicOperations_double.cpp | 5 +- .../TestSerial_AtomicOperations_float.cpp | 5 +- .../TestSerial_AtomicOperations_int.cpp | 5 +- .../TestSerial_AtomicOperations_longint.cpp | 5 +- ...estSerial_AtomicOperations_longlongint.cpp | 5 +- ...estSerial_AtomicOperations_unsignedint.cpp | 5 +- ...erial_AtomicOperations_unsignedlongint.cpp | 5 +- .../serial/TestSerial_AtomicViews.cpp | 5 +- core/unit_test/serial/TestSerial_Atomics.cpp | 1 - core/unit_test/serial/TestSerial_Category.hpp | 10 +- core/unit_test/serial/TestSerial_Complex.cpp | 5 +- core/unit_test/serial/TestSerial_Init.cpp | 10 +- .../serial/TestSerial_LocalDeepCopy.cpp | 1 - .../unit_test/serial/TestSerial_MDRange_a.cpp | 5 +- .../unit_test/serial/TestSerial_MDRange_b.cpp | 5 +- .../unit_test/serial/TestSerial_MDRange_c.cpp | 5 +- .../unit_test/serial/TestSerial_MDRange_d.cpp | 5 +- .../unit_test/serial/TestSerial_MDRange_e.cpp | 5 +- core/unit_test/serial/TestSerial_Other.cpp | 16 +- .../serial/TestSerial_RangePolicy.cpp | 5 +- core/unit_test/serial/TestSerial_Scan.cpp | 5 +- .../serial/TestSerial_SharedAlloc.cpp | 8 +- .../unit_test/serial/TestSerial_SubView_a.cpp | 66 +- .../unit_test/serial/TestSerial_SubView_b.cpp | 24 +- .../serial/TestSerial_SubView_c01.cpp | 7 +- .../serial/TestSerial_SubView_c02.cpp | 8 +- .../serial/TestSerial_SubView_c03.cpp | 8 +- .../serial/TestSerial_SubView_c04.cpp | 7 +- .../serial/TestSerial_SubView_c05.cpp | 8 +- .../serial/TestSerial_SubView_c06.cpp | 8 +- .../serial/TestSerial_SubView_c07.cpp | 7 +- .../serial/TestSerial_SubView_c08.cpp | 8 +- .../serial/TestSerial_SubView_c09.cpp | 8 +- .../serial/TestSerial_SubView_c10.cpp | 7 +- .../serial/TestSerial_SubView_c11.cpp | 8 +- .../serial/TestSerial_SubView_c12.cpp | 8 +- .../serial/TestSerial_SubView_c13.cpp | 7 +- core/unit_test/serial/TestSerial_Task.cpp | 5 +- core/unit_test/serial/TestSerial_Team.cpp | 74 +- .../serial/TestSerial_TeamReductionScan.cpp | 53 +- .../serial/TestSerial_TeamScratch.cpp | 48 +- .../serial/TestSerial_TeamVectorRange.cpp | 6 +- .../unit_test/serial/TestSerial_ViewAPI_e.cpp | 1 - .../TestSerial_ViewLayoutStrideAssignment.cpp | 1 - .../serial/TestSerial_ViewMapping_a.cpp | 1 - .../serial/TestSerial_ViewMapping_b.cpp | 1 - .../serial/TestSerial_ViewMapping_subview.cpp | 1 - .../serial/TestSerial_ViewOfClass.cpp | 1 - .../unit_test/serial/TestSerial_WorkGraph.cpp | 4 +- .../unit_test/standalone/UnitTestMainInit.cpp | 10 +- ...Threads_AtomicOperations_complexdouble.cpp | 5 +- ...tThreads_AtomicOperations_complexfloat.cpp | 5 +- .../TestThreads_AtomicOperations_double.cpp | 5 +- .../TestThreads_AtomicOperations_float.cpp | 5 +- .../TestThreads_AtomicOperations_int.cpp | 5 +- .../TestThreads_AtomicOperations_longint.cpp | 5 +- ...stThreads_AtomicOperations_longlongint.cpp | 5 +- ...stThreads_AtomicOperations_unsignedint.cpp | 5 +- ...reads_AtomicOperations_unsignedlongint.cpp | 5 +- .../threads/TestThreads_AtomicViews.cpp | 5 +- .../unit_test/threads/TestThreads_Atomics.cpp | 1 - .../threads/TestThreads_Category.hpp | 10 +- .../unit_test/threads/TestThreads_Complex.cpp | 5 +- core/unit_test/threads/TestThreads_Crs.cpp | 4 +- core/unit_test/threads/TestThreads_Init.cpp | 10 +- .../threads/TestThreads_LocalDeepCopy.cpp | 1 - .../threads/TestThreads_MDRange_a.cpp | 5 +- .../threads/TestThreads_MDRange_b.cpp | 5 +- .../threads/TestThreads_MDRange_c.cpp | 5 +- .../threads/TestThreads_MDRange_d.cpp | 5 +- .../threads/TestThreads_MDRange_e.cpp | 5 +- core/unit_test/threads/TestThreads_Other.cpp | 16 +- .../threads/TestThreads_RangePolicy.cpp | 5 +- core/unit_test/threads/TestThreads_Scan.cpp | 5 +- .../threads/TestThreads_SharedAlloc.cpp | 8 +- .../threads/TestThreads_SubView_a.cpp | 66 +- .../threads/TestThreads_SubView_b.cpp | 24 +- .../threads/TestThreads_SubView_c01.cpp | 7 +- .../threads/TestThreads_SubView_c02.cpp | 8 +- .../threads/TestThreads_SubView_c03.cpp | 8 +- .../threads/TestThreads_SubView_c04.cpp | 7 +- .../threads/TestThreads_SubView_c05.cpp | 8 +- .../threads/TestThreads_SubView_c06.cpp | 8 +- .../threads/TestThreads_SubView_c07.cpp | 7 +- .../threads/TestThreads_SubView_c08.cpp | 8 +- .../threads/TestThreads_SubView_c09.cpp | 8 +- .../threads/TestThreads_SubView_c10.cpp | 7 +- .../threads/TestThreads_SubView_c11.cpp | 8 +- .../threads/TestThreads_SubView_c12.cpp | 8 +- .../threads/TestThreads_SubView_c13.cpp | 7 +- core/unit_test/threads/TestThreads_Team.cpp | 74 +- .../threads/TestThreads_TeamReductionScan.cpp | 53 +- .../threads/TestThreads_TeamScratch.cpp | 48 +- .../threads/TestThreads_TeamTeamSize.cpp | 2 - .../threads/TestThreads_TeamVectorRange.cpp | 6 +- ...TestThreads_ViewLayoutStrideAssignment.cpp | 1 - .../threads/TestThreads_ViewMapping_a.cpp | 1 - .../threads/TestThreads_ViewMapping_b.cpp | 1 - .../TestThreads_ViewMapping_subview.cpp | 1 - .../threads/TestThreads_ViewOfClass.cpp | 1 - .../threads/TestThreads_WorkGraph.cpp | 4 +- .../query_cuda_arch.cpp | 24 +- example/cmake_build/cmake_example.cpp | 6 +- example/common/VectorImport.hpp | 278 +- example/common/WrapMPI.hpp | 61 +- example/feint/ElemFunctor.hpp | 426 +- example/feint/feint.hpp | 121 +- example/feint/feint_cuda.cpp | 25 +- example/feint/feint_fwd.hpp | 18 +- example/feint/feint_hpx.cpp | 11 +- example/feint/feint_openmp.cpp | 23 +- example/feint/feint_rocm.cpp | 25 +- example/feint/feint_serial.cpp | 23 +- example/feint/feint_threads.cpp | 17 +- example/feint/main.cpp | 16 +- example/fenl/CGSolve.hpp | 297 +- example/fenl/fenl.cpp | 147 +- example/fenl/fenl.hpp | 46 +- example/fenl/fenl_functors.hpp | 1404 ++--- example/fenl/fenl_impl.hpp | 553 +- example/fenl/main.cpp | 450 +- example/fixture/BoxElemFixture.hpp | 406 +- example/fixture/BoxElemPart.cpp | 454 +- example/fixture/BoxElemPart.hpp | 301 +- example/fixture/HexElement.hpp | 287 +- example/fixture/Main.cpp | 348 +- example/fixture/TestFixture.cpp | 15 +- example/fixture/TestFixture.hpp | 128 +- example/global_2_local_ids/G2L.hpp | 158 +- example/global_2_local_ids/G2L_Main.cpp | 66 +- example/grow_array/grow_array.hpp | 258 +- example/grow_array/main.cpp | 56 +- example/make_buildlink/main.cpp | 13 +- example/md_skeleton/force.cpp | 134 +- example/md_skeleton/main.cpp | 100 +- example/md_skeleton/neighbor.cpp | 189 +- example/md_skeleton/setup.cpp | 167 +- example/multi_fem/BoxMeshFixture.hpp | 661 +- example/multi_fem/BoxMeshPartition.cpp | 446 +- example/multi_fem/BoxMeshPartition.hpp | 143 +- example/multi_fem/Explicit.hpp | 430 +- example/multi_fem/ExplicitFunctors.hpp | 2005 +++--- example/multi_fem/FEMesh.hpp | 30 +- example/multi_fem/HexElement.hpp | 287 +- example/multi_fem/HexExplicitFunctions.hpp | 500 +- example/multi_fem/Implicit.hpp | 317 +- example/multi_fem/ImplicitFunctors.hpp | 694 +-- example/multi_fem/LinAlgBLAS.hpp | 633 +- example/multi_fem/Nonlinear.hpp | 519 +- example/multi_fem/NonlinearElement_Cuda.hpp | 398 +- example/multi_fem/NonlinearFunctors.hpp | 554 +- example/multi_fem/ParallelComm.hpp | 105 +- example/multi_fem/ParallelDataMap.hpp | 529 +- example/multi_fem/ParallelMachine.cpp | 15 +- example/multi_fem/ParallelMachine.hpp | 64 +- example/multi_fem/SparseLinearSystem.hpp | 378 +- example/multi_fem/SparseLinearSystemFill.hpp | 273 +- example/multi_fem/SparseLinearSystem_Cuda.hpp | 160 +- example/multi_fem/TestBoxMeshFixture.hpp | 207 +- example/multi_fem/TestBoxMeshPartition.cpp | 162 +- example/multi_fem/TestCuda.cpp | 128 +- example/multi_fem/TestHost.cpp | 96 +- example/multi_fem/TestHybridFEM.cpp | 414 +- example/query_device/query_device.cpp | 47 +- example/sort_array/main.cpp | 46 +- example/sort_array/sort_array.hpp | 129 +- .../tutorial/01_hello_world/hello_world.cpp | 25 +- .../hello_world_lambda.cpp | 20 +- .../02_simple_reduce/simple_reduce.cpp | 35 +- .../simple_reduce_lambda.cpp | 38 +- .../tutorial/03_simple_view/simple_view.cpp | 45 +- .../simple_view_lambda.cpp | 67 +- .../simple_memoryspaces.cpp | 33 +- .../05_simple_atomics/simple_atomics.cpp | 65 +- .../simple_mdrangepolicy.cpp | 119 +- .../01_data_layouts/data_layouts.cpp | 61 +- .../02_memory_traits/memory_traits.cpp | 75 +- .../Advanced_Views/03_subviews/subviews.cpp | 97 +- .../Advanced_Views/04_dualviews/dual_view.cpp | 198 +- .../05_NVIDIA_UVM/uvm_example.cpp | 80 +- .../overlapping_deepcopy.cpp | 118 +- .../01_random_numbers/random_numbers.cpp | 169 +- .../01_thread_teams/thread_teams.cpp | 56 +- .../thread_teams_lambda.cpp | 42 +- .../nested_parallel_for.cpp | 70 +- .../03_vectorization/vectorization.cpp | 145 +- .../04_team_scan/team_scan.cpp | 140 +- .../launch_bounds/launch_bounds_reduce.cpp | 188 +- example/virtual_functions/classes.cpp | 22 +- example/virtual_functions/classes.hpp | 48 +- example/virtual_functions/main.cpp | 52 +- scripts/testing_scripts/TestEXEC_TEST.cpp | 6 +- 1881 files changed, 108300 insertions(+), 103040 deletions(-) diff --git a/algorithms/src/Kokkos_Random.hpp b/algorithms/src/Kokkos_Random.hpp index e14471a48ae..875149f82e6 100644 --- a/algorithms/src/Kokkos_Random.hpp +++ b/algorithms/src/Kokkos_Random.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -59,6 +59,7 @@ namespace Kokkos { +// clang-format off /*Template functions to get equidistributed random numbers from a generator for a specific Scalar type template @@ -229,1019 +230,979 @@ namespace Kokkos { ViewType::value_type start, ViewType::value_type end); */ +// clang-format on - template - struct rand; - - - template - struct rand { - - KOKKOS_INLINE_FUNCTION - static short max(){return 127;} - KOKKOS_INLINE_FUNCTION - static short draw(Generator& gen) - {return short((gen.rand()&0xff+256)%256);} - KOKKOS_INLINE_FUNCTION - static short draw(Generator& gen, const char& range) - {return char(gen.rand(range));} - KOKKOS_INLINE_FUNCTION - static short draw(Generator& gen, const char& start, const char& end) - {return char(gen.rand(start,end));} - - }; - - template - struct rand { - KOKKOS_INLINE_FUNCTION - static short max(){return 32767;} - KOKKOS_INLINE_FUNCTION - static short draw(Generator& gen) - {return short((gen.rand()&0xffff+65536)%32768);} - KOKKOS_INLINE_FUNCTION - static short draw(Generator& gen, const short& range) - {return short(gen.rand(range));} - KOKKOS_INLINE_FUNCTION - static short draw(Generator& gen, const short& start, const short& end) - {return short(gen.rand(start,end));} - - }; - - template - struct rand { - KOKKOS_INLINE_FUNCTION - static int max(){return Generator::MAX_RAND;} - KOKKOS_INLINE_FUNCTION - static int draw(Generator& gen) - {return gen.rand();} - KOKKOS_INLINE_FUNCTION - static int draw(Generator& gen, const int& range) - {return gen.rand(range);} - KOKKOS_INLINE_FUNCTION - static int draw(Generator& gen, const int& start, const int& end) - {return gen.rand(start,end);} +template +struct rand; - }; +template +struct rand { + KOKKOS_INLINE_FUNCTION + static short max() { return 127; } + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen) { + return short((gen.rand() & 0xff + 256) % 256); + } + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const char& range) { + return char(gen.rand(range)); + } + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const char& start, const char& end) { + return char(gen.rand(start, end)); + } +}; - template - struct rand { - KOKKOS_INLINE_FUNCTION - static unsigned int max () { - return Generator::MAX_URAND; - } - KOKKOS_INLINE_FUNCTION - static unsigned int draw (Generator& gen) { - return gen.urand (); - } - KOKKOS_INLINE_FUNCTION - static unsigned int draw(Generator& gen, const unsigned int& range) { - return gen.urand (range); - } - KOKKOS_INLINE_FUNCTION - static unsigned int - draw (Generator& gen, const unsigned int& start, const unsigned int& end) { - return gen.urand (start, end); - } - }; +template +struct rand { + KOKKOS_INLINE_FUNCTION + static short max() { return 32767; } + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen) { + return short((gen.rand() & 0xffff + 65536) % 32768); + } + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const short& range) { + return short(gen.rand(range)); + } + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const short& start, const short& end) { + return short(gen.rand(start, end)); + } +}; - template - struct rand { - KOKKOS_INLINE_FUNCTION - static long max () { - // FIXME (mfh 26 Oct 2014) It would be better to select the - // return value at compile time, using something like enable_if. - return sizeof (long) == 4 ? - static_cast (Generator::MAX_RAND) : - static_cast (Generator::MAX_RAND64); - } - KOKKOS_INLINE_FUNCTION - static long draw (Generator& gen) { - // FIXME (mfh 26 Oct 2014) It would be better to select the - // return value at compile time, using something like enable_if. - return sizeof (long) == 4 ? - static_cast (gen.rand ()) : - static_cast (gen.rand64 ()); - } - KOKKOS_INLINE_FUNCTION - static long draw (Generator& gen, const long& range) { - // FIXME (mfh 26 Oct 2014) It would be better to select the - // return value at compile time, using something like enable_if. - return sizeof (long) == 4 ? - static_cast (gen.rand (static_cast (range))) : - static_cast (gen.rand64 (range)); - } - KOKKOS_INLINE_FUNCTION - static long draw (Generator& gen, const long& start, const long& end) { - // FIXME (mfh 26 Oct 2014) It would be better to select the - // return value at compile time, using something like enable_if. - return sizeof (long) == 4 ? - static_cast (gen.rand (static_cast (start), - static_cast (end))) : - static_cast (gen.rand64 (start, end)); - } - }; +template +struct rand { + KOKKOS_INLINE_FUNCTION + static int max() { return Generator::MAX_RAND; } + KOKKOS_INLINE_FUNCTION + static int draw(Generator& gen) { return gen.rand(); } + KOKKOS_INLINE_FUNCTION + static int draw(Generator& gen, const int& range) { return gen.rand(range); } + KOKKOS_INLINE_FUNCTION + static int draw(Generator& gen, const int& start, const int& end) { + return gen.rand(start, end); + } +}; - template - struct rand { - KOKKOS_INLINE_FUNCTION - static unsigned long max () { - // FIXME (mfh 26 Oct 2014) It would be better to select the - // return value at compile time, using something like enable_if. - return sizeof (unsigned long) == 4 ? - static_cast (Generator::MAX_URAND) : - static_cast (Generator::MAX_URAND64); - } - KOKKOS_INLINE_FUNCTION - static unsigned long draw (Generator& gen) { - // FIXME (mfh 26 Oct 2014) It would be better to select the - // return value at compile time, using something like enable_if. - return sizeof (unsigned long) == 4 ? - static_cast (gen.urand ()) : - static_cast (gen.urand64 ()); - } - KOKKOS_INLINE_FUNCTION - static unsigned long draw(Generator& gen, const unsigned long& range) { - // FIXME (mfh 26 Oct 2014) It would be better to select the - // return value at compile time, using something like enable_if. - return sizeof (unsigned long) == 4 ? - static_cast (gen.urand (static_cast (range))) : - static_cast (gen.urand64 (range)); - } - KOKKOS_INLINE_FUNCTION - static unsigned long - draw (Generator& gen, const unsigned long& start, const unsigned long& end) { - // FIXME (mfh 26 Oct 2014) It would be better to select the - // return value at compile time, using something like enable_if. - return sizeof (unsigned long) == 4 ? - static_cast (gen.urand (static_cast (start), - static_cast (end))) : - static_cast (gen.urand64 (start, end)); - } - }; +template +struct rand { + KOKKOS_INLINE_FUNCTION + static unsigned int max() { return Generator::MAX_URAND; } + KOKKOS_INLINE_FUNCTION + static unsigned int draw(Generator& gen) { return gen.urand(); } + KOKKOS_INLINE_FUNCTION + static unsigned int draw(Generator& gen, const unsigned int& range) { + return gen.urand(range); + } + KOKKOS_INLINE_FUNCTION + static unsigned int draw(Generator& gen, const unsigned int& start, + const unsigned int& end) { + return gen.urand(start, end); + } +}; - // NOTE (mfh 26 oct 2014) This is a partial specialization for long - // long, a C99 / C++11 signed type which is guaranteed to be at - // least 64 bits. Do NOT write a partial specialization for - // int64_t!!! This is just a typedef! It could be either long or - // long long. We don't know which a priori, and I've seen both. - // The types long and long long are guaranteed to differ, so it's - // always safe to specialize for both. - template - struct rand { - KOKKOS_INLINE_FUNCTION - static long long max () { - // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. - return Generator::MAX_RAND64; - } - KOKKOS_INLINE_FUNCTION - static long long draw (Generator& gen) { - // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. - return gen.rand64 (); - } - KOKKOS_INLINE_FUNCTION - static long long draw (Generator& gen, const long long& range) { - // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. - return gen.rand64 (range); - } - KOKKOS_INLINE_FUNCTION - static long long draw (Generator& gen, const long long& start, const long long& end) { - // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. - return gen.rand64 (start, end); - } - }; +template +struct rand { + KOKKOS_INLINE_FUNCTION + static long max() { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof(long) == 4 ? static_cast(Generator::MAX_RAND) + : static_cast(Generator::MAX_RAND64); + } + KOKKOS_INLINE_FUNCTION + static long draw(Generator& gen) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof(long) == 4 ? static_cast(gen.rand()) + : static_cast(gen.rand64()); + } + KOKKOS_INLINE_FUNCTION + static long draw(Generator& gen, const long& range) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof(long) == 4 + ? static_cast(gen.rand(static_cast(range))) + : static_cast(gen.rand64(range)); + } + KOKKOS_INLINE_FUNCTION + static long draw(Generator& gen, const long& start, const long& end) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof(long) == 4 + ? static_cast( + gen.rand(static_cast(start), static_cast(end))) + : static_cast(gen.rand64(start, end)); + } +}; - // NOTE (mfh 26 oct 2014) This is a partial specialization for - // unsigned long long, a C99 / C++11 unsigned type which is - // guaranteed to be at least 64 bits. Do NOT write a partial - // specialization for uint64_t!!! This is just a typedef! It could - // be either unsigned long or unsigned long long. We don't know - // which a priori, and I've seen both. The types unsigned long and - // unsigned long long are guaranteed to differ, so it's always safe - // to specialize for both. - template - struct rand { - KOKKOS_INLINE_FUNCTION - static unsigned long long max () { - // FIXME (mfh 26 Oct 2014) It's legal for unsigned long long to be > 64 bits. - return Generator::MAX_URAND64; - } - KOKKOS_INLINE_FUNCTION - static unsigned long long draw (Generator& gen) { - // FIXME (mfh 26 Oct 2014) It's legal for unsigned long long to be > 64 bits. - return gen.urand64 (); - } - KOKKOS_INLINE_FUNCTION - static unsigned long long draw (Generator& gen, const unsigned long long& range) { - // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. - return gen.urand64 (range); - } - KOKKOS_INLINE_FUNCTION - static unsigned long long - draw (Generator& gen, const unsigned long long& start, const unsigned long long& end) { - // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. - return gen.urand64 (start, end); - } - }; +template +struct rand { + KOKKOS_INLINE_FUNCTION + static unsigned long max() { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof(unsigned long) == 4 + ? static_cast(Generator::MAX_URAND) + : static_cast(Generator::MAX_URAND64); + } + KOKKOS_INLINE_FUNCTION + static unsigned long draw(Generator& gen) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof(unsigned long) == 4 + ? static_cast(gen.urand()) + : static_cast(gen.urand64()); + } + KOKKOS_INLINE_FUNCTION + static unsigned long draw(Generator& gen, const unsigned long& range) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof(unsigned long) == 4 + ? static_cast( + gen.urand(static_cast(range))) + : static_cast(gen.urand64(range)); + } + KOKKOS_INLINE_FUNCTION + static unsigned long draw(Generator& gen, const unsigned long& start, + const unsigned long& end) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof(unsigned long) == 4 + ? static_cast( + gen.urand(static_cast(start), + static_cast(end))) + : static_cast(gen.urand64(start, end)); + } +}; - template - struct rand { - KOKKOS_INLINE_FUNCTION - static float max(){return 1.0f;} - KOKKOS_INLINE_FUNCTION - static float draw(Generator& gen) - {return gen.frand();} - KOKKOS_INLINE_FUNCTION - static float draw(Generator& gen, const float& range) - {return gen.frand(range);} - KOKKOS_INLINE_FUNCTION - static float draw(Generator& gen, const float& start, const float& end) - {return gen.frand(start,end);} +// NOTE (mfh 26 oct 2014) This is a partial specialization for long +// long, a C99 / C++11 signed type which is guaranteed to be at +// least 64 bits. Do NOT write a partial specialization for +// int64_t!!! This is just a typedef! It could be either long or +// long long. We don't know which a priori, and I've seen both. +// The types long and long long are guaranteed to differ, so it's +// always safe to specialize for both. +template +struct rand { + KOKKOS_INLINE_FUNCTION + static long long max() { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return Generator::MAX_RAND64; + } + KOKKOS_INLINE_FUNCTION + static long long draw(Generator& gen) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.rand64(); + } + KOKKOS_INLINE_FUNCTION + static long long draw(Generator& gen, const long long& range) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.rand64(range); + } + KOKKOS_INLINE_FUNCTION + static long long draw(Generator& gen, const long long& start, + const long long& end) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.rand64(start, end); + } +}; - }; +// NOTE (mfh 26 oct 2014) This is a partial specialization for +// unsigned long long, a C99 / C++11 unsigned type which is +// guaranteed to be at least 64 bits. Do NOT write a partial +// specialization for uint64_t!!! This is just a typedef! It could +// be either unsigned long or unsigned long long. We don't know +// which a priori, and I've seen both. The types unsigned long and +// unsigned long long are guaranteed to differ, so it's always safe +// to specialize for both. +template +struct rand { + KOKKOS_INLINE_FUNCTION + static unsigned long long max() { + // FIXME (mfh 26 Oct 2014) It's legal for unsigned long long to be > 64 + // bits. + return Generator::MAX_URAND64; + } + KOKKOS_INLINE_FUNCTION + static unsigned long long draw(Generator& gen) { + // FIXME (mfh 26 Oct 2014) It's legal for unsigned long long to be > 64 + // bits. + return gen.urand64(); + } + KOKKOS_INLINE_FUNCTION + static unsigned long long draw(Generator& gen, + const unsigned long long& range) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.urand64(range); + } + KOKKOS_INLINE_FUNCTION + static unsigned long long draw(Generator& gen, + const unsigned long long& start, + const unsigned long long& end) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.urand64(start, end); + } +}; - template - struct rand { - KOKKOS_INLINE_FUNCTION - static double max(){return 1.0;} - KOKKOS_INLINE_FUNCTION - static double draw(Generator& gen) - {return gen.drand();} - KOKKOS_INLINE_FUNCTION - static double draw(Generator& gen, const double& range) - {return gen.drand(range);} - KOKKOS_INLINE_FUNCTION - static double draw(Generator& gen, const double& start, const double& end) - {return gen.drand(start,end);} +template +struct rand { + KOKKOS_INLINE_FUNCTION + static float max() { return 1.0f; } + KOKKOS_INLINE_FUNCTION + static float draw(Generator& gen) { return gen.frand(); } + KOKKOS_INLINE_FUNCTION + static float draw(Generator& gen, const float& range) { + return gen.frand(range); + } + KOKKOS_INLINE_FUNCTION + static float draw(Generator& gen, const float& start, const float& end) { + return gen.frand(start, end); + } +}; - }; +template +struct rand { + KOKKOS_INLINE_FUNCTION + static double max() { return 1.0; } + KOKKOS_INLINE_FUNCTION + static double draw(Generator& gen) { return gen.drand(); } + KOKKOS_INLINE_FUNCTION + static double draw(Generator& gen, const double& range) { + return gen.drand(range); + } + KOKKOS_INLINE_FUNCTION + static double draw(Generator& gen, const double& start, const double& end) { + return gen.drand(start, end); + } +}; - template - struct rand > { - KOKKOS_INLINE_FUNCTION - static Kokkos::complex max () { - return Kokkos::complex (1.0, 1.0); - } - KOKKOS_INLINE_FUNCTION - static Kokkos::complex draw (Generator& gen) { - const float re = gen.frand (); - const float im = gen.frand (); - return Kokkos::complex (re, im); - } - KOKKOS_INLINE_FUNCTION - static Kokkos::complex draw (Generator& gen, const Kokkos::complex& range) { - const float re = gen.frand (real (range)); - const float im = gen.frand (imag (range)); - return Kokkos::complex (re, im); - } - KOKKOS_INLINE_FUNCTION - static Kokkos::complex draw (Generator& gen, const Kokkos::complex& start, const Kokkos::complex& end) { - const float re = gen.frand (real (start), real (end)); - const float im = gen.frand (imag (start), imag (end)); - return Kokkos::complex (re, im); - } - }; +template +struct rand > { + KOKKOS_INLINE_FUNCTION + static Kokkos::complex max() { + return Kokkos::complex(1.0, 1.0); + } + KOKKOS_INLINE_FUNCTION + static Kokkos::complex draw(Generator& gen) { + const float re = gen.frand(); + const float im = gen.frand(); + return Kokkos::complex(re, im); + } + KOKKOS_INLINE_FUNCTION + static Kokkos::complex draw(Generator& gen, + const Kokkos::complex& range) { + const float re = gen.frand(real(range)); + const float im = gen.frand(imag(range)); + return Kokkos::complex(re, im); + } + KOKKOS_INLINE_FUNCTION + static Kokkos::complex draw(Generator& gen, + const Kokkos::complex& start, + const Kokkos::complex& end) { + const float re = gen.frand(real(start), real(end)); + const float im = gen.frand(imag(start), imag(end)); + return Kokkos::complex(re, im); + } +}; - template - struct rand > { - KOKKOS_INLINE_FUNCTION - static Kokkos::complex max () { - return Kokkos::complex (1.0, 1.0); - } - KOKKOS_INLINE_FUNCTION - static Kokkos::complex draw (Generator& gen) { - const double re = gen.drand (); - const double im = gen.drand (); - return Kokkos::complex (re, im); - } - KOKKOS_INLINE_FUNCTION - static Kokkos::complex draw (Generator& gen, const Kokkos::complex& range) { - const double re = gen.drand (real (range)); - const double im = gen.drand (imag (range)); - return Kokkos::complex (re, im); - } - KOKKOS_INLINE_FUNCTION - static Kokkos::complex draw (Generator& gen, const Kokkos::complex& start, const Kokkos::complex& end) { - const double re = gen.drand (real (start), real (end)); - const double im = gen.drand (imag (start), imag (end)); - return Kokkos::complex (re, im); - } - }; +template +struct rand > { + KOKKOS_INLINE_FUNCTION + static Kokkos::complex max() { + return Kokkos::complex(1.0, 1.0); + } + KOKKOS_INLINE_FUNCTION + static Kokkos::complex draw(Generator& gen) { + const double re = gen.drand(); + const double im = gen.drand(); + return Kokkos::complex(re, im); + } + KOKKOS_INLINE_FUNCTION + static Kokkos::complex draw(Generator& gen, + const Kokkos::complex& range) { + const double re = gen.drand(real(range)); + const double im = gen.drand(imag(range)); + return Kokkos::complex(re, im); + } + KOKKOS_INLINE_FUNCTION + static Kokkos::complex draw(Generator& gen, + const Kokkos::complex& start, + const Kokkos::complex& end) { + const double re = gen.drand(real(start), real(end)); + const double im = gen.drand(imag(start), imag(end)); + return Kokkos::complex(re, im); + } +}; - template - class Random_XorShift64_Pool; +template +class Random_XorShift64_Pool; - template - class Random_XorShift64 { - private: - uint64_t state_; - const int state_idx_; - friend class Random_XorShift64_Pool; - public: +template +class Random_XorShift64 { + private: + uint64_t state_; + const int state_idx_; + friend class Random_XorShift64_Pool; - typedef DeviceType device_type; + public: + typedef DeviceType device_type; - enum {MAX_URAND = 0xffffffffU}; - enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; - enum {MAX_RAND = static_cast(0xffffffff/2)}; - enum {MAX_RAND64 = static_cast(0xffffffffffffffffLL/2-1)}; + enum { MAX_URAND = 0xffffffffU }; + enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 }; + enum { MAX_RAND = static_cast(0xffffffff / 2) }; + enum { MAX_RAND64 = static_cast(0xffffffffffffffffLL / 2 - 1) }; - KOKKOS_INLINE_FUNCTION - Random_XorShift64 (uint64_t state, int state_idx = 0) - : state_(state==0?uint64_t(1318319):state),state_idx_(state_idx){} + KOKKOS_INLINE_FUNCTION + Random_XorShift64(uint64_t state, int state_idx = 0) + : state_(state == 0 ? uint64_t(1318319) : state), state_idx_(state_idx) {} - KOKKOS_INLINE_FUNCTION - uint32_t urand() { - state_ ^= state_ >> 12; - state_ ^= state_ << 25; - state_ ^= state_ >> 27; - - uint64_t tmp = state_ * 2685821657736338717ULL; - tmp = tmp>>16; - return static_cast(tmp&MAX_URAND); - } + KOKKOS_INLINE_FUNCTION + uint32_t urand() { + state_ ^= state_ >> 12; + state_ ^= state_ << 25; + state_ ^= state_ >> 27; + + uint64_t tmp = state_ * 2685821657736338717ULL; + tmp = tmp >> 16; + return static_cast(tmp & MAX_URAND); + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64() { - state_ ^= state_ >> 12; - state_ ^= state_ << 25; - state_ ^= state_ >> 27; - return (state_ * 2685821657736338717ULL) - 1; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64() { + state_ ^= state_ >> 12; + state_ ^= state_ << 25; + state_ ^= state_ >> 27; + return (state_ * 2685821657736338717ULL) - 1; + } - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& range) { - const uint32_t max_val = (MAX_URAND/range)*range; - uint32_t tmp = urand(); - while(tmp>=max_val) - tmp = urand(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range) { + const uint32_t max_val = (MAX_URAND / range) * range; + uint32_t tmp = urand(); + while (tmp >= max_val) tmp = urand(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& start, const uint32_t& end ) { - return urand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end) { + return urand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& range) { - const uint64_t max_val = (MAX_URAND64/range)*range; - uint64_t tmp = urand64(); - while(tmp>=max_val) - tmp = urand64(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range) { + const uint64_t max_val = (MAX_URAND64 / range) * range; + uint64_t tmp = urand64(); + while (tmp >= max_val) tmp = urand64(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& start, const uint64_t& end ) { - return urand64(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end) { + return urand64(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - int rand() { - return static_cast(urand()/2); - } + KOKKOS_INLINE_FUNCTION + int rand() { return static_cast(urand() / 2); } - KOKKOS_INLINE_FUNCTION - int rand(const int& range) { - const int max_val = (MAX_RAND/range)*range; - int tmp = rand(); - while(tmp>=max_val) - tmp = rand(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + int rand(const int& range) { + const int max_val = (MAX_RAND / range) * range; + int tmp = rand(); + while (tmp >= max_val) tmp = rand(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - int rand(const int& start, const int& end ) { - return rand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end) { + return rand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - int64_t rand64() { - return static_cast(urand64()/2); - } + KOKKOS_INLINE_FUNCTION + int64_t rand64() { return static_cast(urand64() / 2); } - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& range) { - const int64_t max_val = (MAX_RAND64/range)*range; - int64_t tmp = rand64(); - while(tmp>=max_val) - tmp = rand64(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range) { + const int64_t max_val = (MAX_RAND64 / range) * range; + int64_t tmp = rand64(); + while (tmp >= max_val) tmp = rand64(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& start, const int64_t& end ) { - return rand64(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end) { + return rand64(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - float frand() { - return 1.0f * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + float frand() { return 1.0f * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - float frand(const float& range) { - return range * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + float frand(const float& range) { return range * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - float frand(const float& start, const float& end ) { - return frand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end) { + return frand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - double drand() { - return 1.0 * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + double drand() { return 1.0 * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - double drand(const double& range) { - return range * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + double drand(const double& range) { return range * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - double drand(const double& start, const double& end ) { - return drand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end) { + return drand(end - start) + start; + } - //Marsaglia polar method for drawing a standard normal distributed random number - KOKKOS_INLINE_FUNCTION - double normal() { - double S = 2.0; - double U; - while(S>=1.0) { - U = 2.0*drand() - 1.0; - const double V = 2.0*drand() - 1.0; - S = U*U+V*V; - } - return U*std::sqrt(-2.0*log(S)/S); - } + // Marsaglia polar method for drawing a standard normal distributed random + // number + KOKKOS_INLINE_FUNCTION + double normal() { + double S = 2.0; + double U; + while (S >= 1.0) { + U = 2.0 * drand() - 1.0; + const double V = 2.0 * drand() - 1.0; + S = U * U + V * V; + } + return U * std::sqrt(-2.0 * log(S) / S); + } - KOKKOS_INLINE_FUNCTION - double normal(const double& mean, const double& std_dev=1.0) { - return mean + normal()*std_dev; - } + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev = 1.0) { + return mean + normal() * std_dev; + } +}; - }; +template +class Random_XorShift64_Pool { + private: + typedef View lock_type; + typedef View state_data_type; + lock_type locks_; + state_data_type state_; + int num_states_; - template - class Random_XorShift64_Pool { - private: - typedef View lock_type; - typedef View state_data_type; - lock_type locks_; - state_data_type state_; - int num_states_; - - public: - typedef Random_XorShift64 generator_type; - typedef DeviceType device_type; + public: + typedef Random_XorShift64 generator_type; + typedef DeviceType device_type; - KOKKOS_INLINE_FUNCTION - Random_XorShift64_Pool() { - num_states_ = 0; - } - Random_XorShift64_Pool(uint64_t seed) { - num_states_ = 0; + KOKKOS_INLINE_FUNCTION + Random_XorShift64_Pool() { num_states_ = 0; } + Random_XorShift64_Pool(uint64_t seed) { + num_states_ = 0; #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - init(seed,DeviceType::max_hardware_threads()); + init(seed, DeviceType::max_hardware_threads()); #else - init(seed,DeviceType::impl_max_hardware_threads()); + init(seed, DeviceType::impl_max_hardware_threads()); #endif - } + } - KOKKOS_INLINE_FUNCTION - Random_XorShift64_Pool(const Random_XorShift64_Pool& src): - locks_(src.locks_), - state_(src.state_), - num_states_(src.num_states_) - {} + KOKKOS_INLINE_FUNCTION + Random_XorShift64_Pool(const Random_XorShift64_Pool& src) + : locks_(src.locks_), state_(src.state_), num_states_(src.num_states_) {} - KOKKOS_INLINE_FUNCTION - Random_XorShift64_Pool operator = (const Random_XorShift64_Pool& src) { - locks_ = src.locks_; - state_ = src.state_; - num_states_ = src.num_states_; - return *this; - } + KOKKOS_INLINE_FUNCTION + Random_XorShift64_Pool operator=(const Random_XorShift64_Pool& src) { + locks_ = src.locks_; + state_ = src.state_; + num_states_ = src.num_states_; + return *this; + } - void init(uint64_t seed, int num_states) { - if(seed==0) - seed = uint64_t(1318319); - - num_states_ = num_states; - - locks_ = lock_type("Kokkos::Random_XorShift64::locks",num_states_); - state_ = state_data_type("Kokkos::Random_XorShift64::state",num_states_); - - typename state_data_type::HostMirror h_state = create_mirror_view(state_); - typename lock_type::HostMirror h_lock = create_mirror_view(locks_); - - // Execute on the HostMirror's default execution space. - Random_XorShift64 gen(seed,0); - for(int i = 0; i < 17; i++) - gen.rand(); - for(int i = 0; i < num_states_; i++) { - int n1 = gen.rand(); - int n2 = gen.rand(); - int n3 = gen.rand(); - int n4 = gen.rand(); - h_state(i) = (((static_cast(n1)) & 0xffff)<<00) | - (((static_cast(n2)) & 0xffff)<<16) | - (((static_cast(n3)) & 0xffff)<<32) | - (((static_cast(n4)) & 0xffff)<<48); - h_lock(i) = 0; - } - deep_copy(state_,h_state); - deep_copy(locks_,h_lock); - } + void init(uint64_t seed, int num_states) { + if (seed == 0) seed = uint64_t(1318319); + + num_states_ = num_states; + + locks_ = lock_type("Kokkos::Random_XorShift64::locks", num_states_); + state_ = state_data_type("Kokkos::Random_XorShift64::state", num_states_); + + typename state_data_type::HostMirror h_state = create_mirror_view(state_); + typename lock_type::HostMirror h_lock = create_mirror_view(locks_); + + // Execute on the HostMirror's default execution space. + Random_XorShift64 + gen(seed, 0); + for (int i = 0; i < 17; i++) gen.rand(); + for (int i = 0; i < num_states_; i++) { + int n1 = gen.rand(); + int n2 = gen.rand(); + int n3 = gen.rand(); + int n4 = gen.rand(); + h_state(i) = (((static_cast(n1)) & 0xffff) << 00) | + (((static_cast(n2)) & 0xffff) << 16) | + (((static_cast(n3)) & 0xffff) << 32) | + (((static_cast(n4)) & 0xffff) << 48); + h_lock(i) = 0; + } + deep_copy(state_, h_state); + deep_copy(locks_, h_lock); + } - KOKKOS_INLINE_FUNCTION - Random_XorShift64 get_state() const { + KOKKOS_INLINE_FUNCTION + Random_XorShift64 get_state() const { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int i = DeviceType::hardware_thread_id();; + const int i = DeviceType::hardware_thread_id(); + ; #else - const int i = DeviceType::impl_hardware_thread_id();; + const int i = DeviceType::impl_hardware_thread_id(); + ; #endif - return Random_XorShift64(state_(i),i); - } - - // NOTE: state_idx MUST be unique and less than num_states - KOKKOS_INLINE_FUNCTION - Random_XorShift64 get_state(const int state_idx) const { - return Random_XorShift64(state_(state_idx),state_idx); - } - - KOKKOS_INLINE_FUNCTION - void free_state(const Random_XorShift64& state) const { - state_(state.state_idx_) = state.state_; - } - }; + return Random_XorShift64(state_(i), i); + } + // NOTE: state_idx MUST be unique and less than num_states + KOKKOS_INLINE_FUNCTION + Random_XorShift64 get_state(const int state_idx) const { + return Random_XorShift64(state_(state_idx), state_idx); + } - template - class Random_XorShift1024_Pool; + KOKKOS_INLINE_FUNCTION + void free_state(const Random_XorShift64& state) const { + state_(state.state_idx_) = state.state_; + } +}; - template - class Random_XorShift1024 { - private: - int p_; - const int state_idx_; - uint64_t state_[16]; - friend class Random_XorShift1024_Pool; - public: +template +class Random_XorShift1024_Pool; - typedef Random_XorShift1024_Pool pool_type; - typedef DeviceType device_type; +template +class Random_XorShift1024 { + private: + int p_; + const int state_idx_; + uint64_t state_[16]; + friend class Random_XorShift1024_Pool; - enum {MAX_URAND = 0xffffffffU}; - enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; - enum {MAX_RAND = static_cast(0xffffffffU/2)}; - enum {MAX_RAND64 = static_cast(0xffffffffffffffffULL/2-1)}; + public: + typedef Random_XorShift1024_Pool pool_type; + typedef DeviceType device_type; - KOKKOS_INLINE_FUNCTION - Random_XorShift1024 (const typename pool_type::state_data_type& state, int p, int state_idx = 0): - p_(p),state_idx_(state_idx){ - for(int i=0 ; i<16; i++) - state_[i] = state(state_idx,i); - } + enum { MAX_URAND = 0xffffffffU }; + enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 }; + enum { MAX_RAND = static_cast(0xffffffffU / 2) }; + enum { MAX_RAND64 = static_cast(0xffffffffffffffffULL / 2 - 1) }; - KOKKOS_INLINE_FUNCTION - uint32_t urand() { - uint64_t state_0 = state_[ p_ ]; - uint64_t state_1 = state_[ p_ = ( p_ + 1 ) & 15 ]; - state_1 ^= state_1 << 31; - state_1 ^= state_1 >> 11; - state_0 ^= state_0 >> 30; - uint64_t tmp = ( state_[ p_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL; - tmp = tmp>>16; - return static_cast(tmp&MAX_URAND); - } + KOKKOS_INLINE_FUNCTION + Random_XorShift1024(const typename pool_type::state_data_type& state, int p, + int state_idx = 0) + : p_(p), state_idx_(state_idx) { + for (int i = 0; i < 16; i++) state_[i] = state(state_idx, i); + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64() { - uint64_t state_0 = state_[ p_ ]; - uint64_t state_1 = state_[ p_ = ( p_ + 1 ) & 15 ]; - state_1 ^= state_1 << 31; - state_1 ^= state_1 >> 11; - state_0 ^= state_0 >> 30; - return (( state_[ p_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1; - } + KOKKOS_INLINE_FUNCTION + uint32_t urand() { + uint64_t state_0 = state_[p_]; + uint64_t state_1 = state_[p_ = (p_ + 1) & 15]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + uint64_t tmp = (state_[p_] = state_0 ^ state_1) * 1181783497276652981ULL; + tmp = tmp >> 16; + return static_cast(tmp & MAX_URAND); + } - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& range) { - const uint32_t max_val = (MAX_URAND/range)*range; - uint32_t tmp = urand(); - while(tmp>=max_val) - tmp = urand(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64() { + uint64_t state_0 = state_[p_]; + uint64_t state_1 = state_[p_ = (p_ + 1) & 15]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + return ((state_[p_] = state_0 ^ state_1) * 1181783497276652981LL) - 1; + } - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& start, const uint32_t& end ) { - return urand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range) { + const uint32_t max_val = (MAX_URAND / range) * range; + uint32_t tmp = urand(); + while (tmp >= max_val) tmp = urand(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& range) { - const uint64_t max_val = (MAX_URAND64/range)*range; - uint64_t tmp = urand64(); - while(tmp>=max_val) - tmp = urand64(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end) { + return urand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& start, const uint64_t& end ) { - return urand64(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range) { + const uint64_t max_val = (MAX_URAND64 / range) * range; + uint64_t tmp = urand64(); + while (tmp >= max_val) tmp = urand64(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - int rand() { - return static_cast(urand()/2); - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end) { + return urand64(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - int rand(const int& range) { - const int max_val = (MAX_RAND/range)*range; - int tmp = rand(); - while(tmp>=max_val) - tmp = rand(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + int rand() { return static_cast(urand() / 2); } - KOKKOS_INLINE_FUNCTION - int rand(const int& start, const int& end ) { - return rand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + int rand(const int& range) { + const int max_val = (MAX_RAND / range) * range; + int tmp = rand(); + while (tmp >= max_val) tmp = rand(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - int64_t rand64() { - return static_cast(urand64()/2); - } + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end) { + return rand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& range) { - const int64_t max_val = (MAX_RAND64/range)*range; - int64_t tmp = rand64(); - while(tmp>=max_val) - tmp = rand64(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + int64_t rand64() { return static_cast(urand64() / 2); } - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& start, const int64_t& end ) { - return rand64(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range) { + const int64_t max_val = (MAX_RAND64 / range) * range; + int64_t tmp = rand64(); + while (tmp >= max_val) tmp = rand64(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - float frand() { - return 1.0f * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end) { + return rand64(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - float frand(const float& range) { - return range * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + float frand() { return 1.0f * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - float frand(const float& start, const float& end ) { - return frand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + float frand(const float& range) { return range * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - double drand() { - return 1.0 * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end) { + return frand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - double drand(const double& range) { - return range * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + double drand() { return 1.0 * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - double drand(const double& start, const double& end ) { - return frand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + double drand(const double& range) { return range * urand64() / MAX_URAND64; } - //Marsaglia polar method for drawing a standard normal distributed random number - KOKKOS_INLINE_FUNCTION - double normal() { - double S = 2.0; - double U; - while(S>=1.0) { - U = 2.0*drand() - 1.0; - const double V = 2.0*drand() - 1.0; - S = U*U+V*V; - } - return U*std::sqrt(-2.0*log(S)/S); - } + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end) { + return frand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - double normal(const double& mean, const double& std_dev=1.0) { - return mean + normal()*std_dev; - } - }; + // Marsaglia polar method for drawing a standard normal distributed random + // number + KOKKOS_INLINE_FUNCTION + double normal() { + double S = 2.0; + double U; + while (S >= 1.0) { + U = 2.0 * drand() - 1.0; + const double V = 2.0 * drand() - 1.0; + S = U * U + V * V; + } + return U * std::sqrt(-2.0 * log(S) / S); + } + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev = 1.0) { + return mean + normal() * std_dev; + } +}; - template - class Random_XorShift1024_Pool { - private: - typedef View int_view_type; - typedef View state_data_type; +template +class Random_XorShift1024_Pool { + private: + typedef View int_view_type; + typedef View state_data_type; - int_view_type locks_; - state_data_type state_; - int_view_type p_; - int num_states_; - friend class Random_XorShift1024; + int_view_type locks_; + state_data_type state_; + int_view_type p_; + int num_states_; + friend class Random_XorShift1024; - public: - typedef Random_XorShift1024 generator_type; + public: + typedef Random_XorShift1024 generator_type; - typedef DeviceType device_type; + typedef DeviceType device_type; - KOKKOS_INLINE_FUNCTION - Random_XorShift1024_Pool() { - num_states_ = 0; - } + KOKKOS_INLINE_FUNCTION + Random_XorShift1024_Pool() { num_states_ = 0; } - inline - Random_XorShift1024_Pool(uint64_t seed){ - num_states_ = 0; + inline Random_XorShift1024_Pool(uint64_t seed) { + num_states_ = 0; #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - init(seed,DeviceType::max_hardware_threads()); + init(seed, DeviceType::max_hardware_threads()); #else - init(seed,DeviceType::impl_max_hardware_threads()); + init(seed, DeviceType::impl_max_hardware_threads()); #endif - } + } - KOKKOS_INLINE_FUNCTION - Random_XorShift1024_Pool(const Random_XorShift1024_Pool& src): - locks_(src.locks_), - state_(src.state_), - p_(src.p_), - num_states_(src.num_states_) - {} + KOKKOS_INLINE_FUNCTION + Random_XorShift1024_Pool(const Random_XorShift1024_Pool& src) + : locks_(src.locks_), + state_(src.state_), + p_(src.p_), + num_states_(src.num_states_) {} - KOKKOS_INLINE_FUNCTION - Random_XorShift1024_Pool operator = (const Random_XorShift1024_Pool& src) { - locks_ = src.locks_; - state_ = src.state_; - p_ = src.p_; - num_states_ = src.num_states_; - return *this; - } + KOKKOS_INLINE_FUNCTION + Random_XorShift1024_Pool operator=(const Random_XorShift1024_Pool& src) { + locks_ = src.locks_; + state_ = src.state_; + p_ = src.p_; + num_states_ = src.num_states_; + return *this; + } - inline - void init(uint64_t seed, int num_states) { - if(seed==0) - seed = uint64_t(1318319); - num_states_ = num_states; - locks_ = int_view_type("Kokkos::Random_XorShift1024::locks",num_states_); - state_ = state_data_type("Kokkos::Random_XorShift1024::state",num_states_); - p_ = int_view_type("Kokkos::Random_XorShift1024::p",num_states_); - - typename state_data_type::HostMirror h_state = create_mirror_view(state_); - typename int_view_type::HostMirror h_lock = create_mirror_view(locks_); - typename int_view_type::HostMirror h_p = create_mirror_view(p_); - - // Execute on the HostMirror's default execution space. - Random_XorShift64 gen(seed,0); - for(int i = 0; i < 17; i++) - gen.rand(); - for(int i = 0; i < num_states_; i++) { - for(int j = 0; j < 16 ; j++) { - int n1 = gen.rand(); - int n2 = gen.rand(); - int n3 = gen.rand(); - int n4 = gen.rand(); - h_state(i,j) = (((static_cast(n1)) & 0xffff)<<00) | - (((static_cast(n2)) & 0xffff)<<16) | - (((static_cast(n3)) & 0xffff)<<32) | - (((static_cast(n4)) & 0xffff)<<48); - } - h_p(i) = 0; - h_lock(i) = 0; + inline void init(uint64_t seed, int num_states) { + if (seed == 0) seed = uint64_t(1318319); + num_states_ = num_states; + locks_ = int_view_type("Kokkos::Random_XorShift1024::locks", num_states_); + state_ = state_data_type("Kokkos::Random_XorShift1024::state", num_states_); + p_ = int_view_type("Kokkos::Random_XorShift1024::p", num_states_); + + typename state_data_type::HostMirror h_state = create_mirror_view(state_); + typename int_view_type::HostMirror h_lock = create_mirror_view(locks_); + typename int_view_type::HostMirror h_p = create_mirror_view(p_); + + // Execute on the HostMirror's default execution space. + Random_XorShift64 + gen(seed, 0); + for (int i = 0; i < 17; i++) gen.rand(); + for (int i = 0; i < num_states_; i++) { + for (int j = 0; j < 16; j++) { + int n1 = gen.rand(); + int n2 = gen.rand(); + int n3 = gen.rand(); + int n4 = gen.rand(); + h_state(i, j) = (((static_cast(n1)) & 0xffff) << 00) | + (((static_cast(n2)) & 0xffff) << 16) | + (((static_cast(n3)) & 0xffff) << 32) | + (((static_cast(n4)) & 0xffff) << 48); } - deep_copy(state_,h_state); - deep_copy(locks_,h_lock); + h_p(i) = 0; + h_lock(i) = 0; } + deep_copy(state_, h_state); + deep_copy(locks_, h_lock); + } - KOKKOS_INLINE_FUNCTION - Random_XorShift1024 get_state() const { + KOKKOS_INLINE_FUNCTION + Random_XorShift1024 get_state() const { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int i = DeviceType::hardware_thread_id(); + const int i = DeviceType::hardware_thread_id(); #else - const int i = DeviceType::impl_hardware_thread_id(); + const int i = DeviceType::impl_hardware_thread_id(); #endif - return Random_XorShift1024(state_,p_(i),i); - }; + return Random_XorShift1024(state_, p_(i), i); + }; - // NOTE: state_idx MUST be unique and less than num_states - KOKKOS_INLINE_FUNCTION - Random_XorShift1024 get_state(const int state_idx) const { - return Random_XorShift1024(state_,p_(state_idx),state_idx); - } + // NOTE: state_idx MUST be unique and less than num_states + KOKKOS_INLINE_FUNCTION + Random_XorShift1024 get_state(const int state_idx) const { + return Random_XorShift1024(state_, p_(state_idx), state_idx); + } - KOKKOS_INLINE_FUNCTION - void free_state(const Random_XorShift1024& state) const { - for(int i = 0; i<16; i++) - state_(state.state_idx_,i) = state.state_[i]; - p_(state.state_idx_) = state.p_; - } - }; + KOKKOS_INLINE_FUNCTION + void free_state(const Random_XorShift1024& state) const { + for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i]; + p_(state.state_idx_) = state.p_; + } +}; #if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__) - template<> - class Random_XorShift1024 { - private: - int p_; - const int state_idx_; - uint64_t* state_; - const int stride_; - friend class Random_XorShift1024_Pool; - public: +template <> +class Random_XorShift1024 { + private: + int p_; + const int state_idx_; + uint64_t* state_; + const int stride_; + friend class Random_XorShift1024_Pool; - typedef Kokkos::Cuda device_type; - typedef Random_XorShift1024_Pool pool_type; + public: + typedef Kokkos::Cuda device_type; + typedef Random_XorShift1024_Pool pool_type; - enum {MAX_URAND = 0xffffffffU}; - enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; - enum {MAX_RAND = static_cast(0xffffffffU/2)}; - enum {MAX_RAND64 = static_cast(0xffffffffffffffffULL/2-1)}; + enum { MAX_URAND = 0xffffffffU }; + enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 }; + enum { MAX_RAND = static_cast(0xffffffffU / 2) }; + enum { MAX_RAND64 = static_cast(0xffffffffffffffffULL / 2 - 1) }; - KOKKOS_INLINE_FUNCTION - Random_XorShift1024 (const typename pool_type::state_data_type& state, int p, int state_idx = 0): - p_(p),state_idx_(state_idx),state_(&state(state_idx,0)),stride_(state.stride_1()){ - } + KOKKOS_INLINE_FUNCTION + Random_XorShift1024(const typename pool_type::state_data_type& state, int p, + int state_idx = 0) + : p_(p), + state_idx_(state_idx), + state_(&state(state_idx, 0)), + stride_(state.stride_1()) {} - KOKKOS_INLINE_FUNCTION - uint32_t urand() { - uint64_t state_0 = state_[ p_ * stride_ ]; - uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ]; - state_1 ^= state_1 << 31; - state_1 ^= state_1 >> 11; - state_0 ^= state_0 >> 30; - uint64_t tmp = ( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL; - tmp = tmp>>16; - return static_cast(tmp&MAX_URAND); - } + KOKKOS_INLINE_FUNCTION + uint32_t urand() { + uint64_t state_0 = state_[p_ * stride_]; + uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + uint64_t tmp = + (state_[p_ * stride_] = state_0 ^ state_1) * 1181783497276652981ULL; + tmp = tmp >> 16; + return static_cast(tmp & MAX_URAND); + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64() { - uint64_t state_0 = state_[ p_ * stride_ ]; - uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ]; - state_1 ^= state_1 << 31; - state_1 ^= state_1 >> 11; - state_0 ^= state_0 >> 30; - return (( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64() { + uint64_t state_0 = state_[p_ * stride_]; + uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + return ((state_[p_ * stride_] = state_0 ^ state_1) * + 1181783497276652981LL) - + 1; + } - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& range) { - const uint32_t max_val = (MAX_URAND/range)*range; - uint32_t tmp = urand(); - while(tmp>=max_val) - urand(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range) { + const uint32_t max_val = (MAX_URAND / range) * range; + uint32_t tmp = urand(); + while (tmp >= max_val) urand(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& start, const uint32_t& end ) { - return urand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end) { + return urand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& range) { - const uint64_t max_val = (MAX_URAND64/range)*range; - uint64_t tmp = urand64(); - while(tmp>=max_val) - urand64(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range) { + const uint64_t max_val = (MAX_URAND64 / range) * range; + uint64_t tmp = urand64(); + while (tmp >= max_val) urand64(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& start, const uint64_t& end ) { - return urand64(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end) { + return urand64(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - int rand() { - return static_cast(urand()/2); - } + KOKKOS_INLINE_FUNCTION + int rand() { return static_cast(urand() / 2); } - KOKKOS_INLINE_FUNCTION - int rand(const int& range) { - const int max_val = (MAX_RAND/range)*range; - int tmp = rand(); - while(tmp>=max_val) - rand(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + int rand(const int& range) { + const int max_val = (MAX_RAND / range) * range; + int tmp = rand(); + while (tmp >= max_val) rand(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - int rand(const int& start, const int& end ) { - return rand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end) { + return rand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - int64_t rand64() { - return static_cast(urand64()/2); - } + KOKKOS_INLINE_FUNCTION + int64_t rand64() { return static_cast(urand64() / 2); } - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& range) { - const int64_t max_val = (MAX_RAND64/range)*range; - int64_t tmp = rand64(); - while(tmp>=max_val) - rand64(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range) { + const int64_t max_val = (MAX_RAND64 / range) * range; + int64_t tmp = rand64(); + while (tmp >= max_val) rand64(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& start, const int64_t& end ) { - return rand64(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end) { + return rand64(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - float frand() { - return 1.0f * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + float frand() { return 1.0f * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - float frand(const float& range) { - return range * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + float frand(const float& range) { return range * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - float frand(const float& start, const float& end ) { - return frand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end) { + return frand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - double drand() { - return 1.0 * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + double drand() { return 1.0 * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - double drand(const double& range) { - return range * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + double drand(const double& range) { return range * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - double drand(const double& start, const double& end ) { - return frand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end) { + return frand(end - start) + start; + } - //Marsaglia polar method for drawing a standard normal distributed random number - KOKKOS_INLINE_FUNCTION - double normal() { - double S = 2.0; - double U; - while(S>=1.0) { - U = 2.0*drand() - 1.0; - const double V = 2.0*drand() - 1.0; - S = U*U+V*V; - } - return U*std::sqrt(-2.0*log(S)/S); - } + // Marsaglia polar method for drawing a standard normal distributed random + // number + KOKKOS_INLINE_FUNCTION + double normal() { + double S = 2.0; + double U; + while (S >= 1.0) { + U = 2.0 * drand() - 1.0; + const double V = 2.0 * drand() - 1.0; + S = U * U + V * V; + } + return U * std::sqrt(-2.0 * log(S) / S); + } - KOKKOS_INLINE_FUNCTION - double normal(const double& mean, const double& std_dev=1.0) { - return mean + normal()*std_dev; - } - }; + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev = 1.0) { + return mean + normal() * std_dev; + } +}; -template<> -inline -Random_XorShift64_Pool::Random_XorShift64_Pool(uint64_t seed) { +template <> +inline Random_XorShift64_Pool::Random_XorShift64_Pool( + uint64_t seed) { num_states_ = 0; - init(seed,4*32768); + init(seed, 4 * 32768); } -template<> -KOKKOS_INLINE_FUNCTION -Random_XorShift64 Random_XorShift64_Pool::get_state() const { +template <> +KOKKOS_INLINE_FUNCTION Random_XorShift64 +Random_XorShift64_Pool::get_state() const { #ifdef __CUDA_ARCH__ - const int i_offset = (threadIdx.x*blockDim.y + threadIdx.y)*blockDim.z+threadIdx.z; - int i = (((blockIdx.x*gridDim.y+blockIdx.y)*gridDim.z + blockIdx.z) * - blockDim.x*blockDim.y*blockDim.z + i_offset)%num_states_; - while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) { - i+=blockDim.x*blockDim.y*blockDim.z; - if(i>=num_states_) {i = i_offset;} + const int i_offset = + (threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z; + int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) * + blockDim.x * blockDim.y * blockDim.z + + i_offset) % + num_states_; + while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) { + i += blockDim.x * blockDim.y * blockDim.z; + if (i >= num_states_) { + i = i_offset; + } } - return Random_XorShift64(state_(i),i); + return Random_XorShift64(state_(i), i); #else - return Random_XorShift64(state_(0),0); + return Random_XorShift64(state_(0), 0); #endif } -template<> -KOKKOS_INLINE_FUNCTION -void Random_XorShift64_Pool::free_state(const Random_XorShift64 &state) const { +template <> +KOKKOS_INLINE_FUNCTION void Random_XorShift64_Pool::free_state( + const Random_XorShift64& state) const { state_(state.state_idx_) = state.state_; #ifdef __CUDA_ARCH__ locks_(state.state_idx_) = 0; @@ -1249,24 +1210,28 @@ void Random_XorShift64_Pool::free_state(const Random_XorShift64 -inline -Random_XorShift1024_Pool::Random_XorShift1024_Pool(uint64_t seed) { +template <> +inline Random_XorShift1024_Pool::Random_XorShift1024_Pool( + uint64_t seed) { num_states_ = 0; - init(seed,4*32768); + init(seed, 4 * 32768); } -template<> -KOKKOS_INLINE_FUNCTION -Random_XorShift1024 Random_XorShift1024_Pool::get_state() const { +template <> +KOKKOS_INLINE_FUNCTION Random_XorShift1024 +Random_XorShift1024_Pool::get_state() const { #ifdef __CUDA_ARCH__ - const int i_offset = (threadIdx.x*blockDim.y + threadIdx.y)*blockDim.z+threadIdx.z; - int i = (((blockIdx.x*gridDim.y+blockIdx.y)*gridDim.z + blockIdx.z) * - blockDim.x*blockDim.y*blockDim.z + i_offset)%num_states_; - while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) { - i+=blockDim.x*blockDim.y*blockDim.z; - if(i>=num_states_) {i = i_offset;} + const int i_offset = + (threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z; + int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) * + blockDim.x * blockDim.y * blockDim.z + + i_offset) % + num_states_; + while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) { + i += blockDim.x * blockDim.y * blockDim.z; + if (i >= num_states_) { + i = i_offset; + } } return Random_XorShift1024(state_, p_(i), i); @@ -1275,210 +1240,205 @@ Random_XorShift1024 Random_XorShift1024_Pool::get_st #endif } -template<> -KOKKOS_INLINE_FUNCTION -void Random_XorShift1024_Pool::free_state(const Random_XorShift1024 &state) const { - for(int i=0; i<16; i++) - state_(state.state_idx_,i) = state.state_[i]; +template <> +KOKKOS_INLINE_FUNCTION void Random_XorShift1024_Pool::free_state( + const Random_XorShift1024& state) const { + for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i]; #ifdef __CUDA_ARCH__ locks_(state.state_idx_) = 0; return; #endif } - #endif -#if defined(KOKKOS_ENABLE_ROCM) +#if defined(KOKKOS_ENABLE_ROCM) - template<> - class Random_XorShift1024 { - private: - int p_; - const int state_idx_; - uint64_t* state_; - const int stride_; - friend class Random_XorShift1024_Pool; - public: +template <> +class Random_XorShift1024 { + private: + int p_; + const int state_idx_; + uint64_t* state_; + const int stride_; + friend class Random_XorShift1024_Pool; - typedef Kokkos::Experimental::ROCm device_type; - typedef Random_XorShift1024_Pool pool_type; + public: + typedef Kokkos::Experimental::ROCm device_type; + typedef Random_XorShift1024_Pool pool_type; - enum {MAX_URAND = 0xffffffffU}; - enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; - enum {MAX_RAND = static_cast(0xffffffffU/2)}; - enum {MAX_RAND64 = static_cast(0xffffffffffffffffULL/2-1)}; + enum { MAX_URAND = 0xffffffffU }; + enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 }; + enum { MAX_RAND = static_cast(0xffffffffU / 2) }; + enum { MAX_RAND64 = static_cast(0xffffffffffffffffULL / 2 - 1) }; - KOKKOS_INLINE_FUNCTION - Random_XorShift1024 (const typename pool_type::state_data_type& state, int p, int state_idx = 0): - p_(p),state_idx_(state_idx),state_(&state(state_idx,0)),stride_(state.stride_1()){ - } + KOKKOS_INLINE_FUNCTION + Random_XorShift1024(const typename pool_type::state_data_type& state, int p, + int state_idx = 0) + : p_(p), + state_idx_(state_idx), + state_(&state(state_idx, 0)), + stride_(state.stride_1()) {} - KOKKOS_INLINE_FUNCTION - uint32_t urand() { - uint64_t state_0 = state_[ p_ * stride_ ]; - uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ]; - state_1 ^= state_1 << 31; - state_1 ^= state_1 >> 11; - state_0 ^= state_0 >> 30; - uint64_t tmp = ( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL; - tmp = tmp>>16; - return static_cast(tmp&MAX_URAND); - } + KOKKOS_INLINE_FUNCTION + uint32_t urand() { + uint64_t state_0 = state_[p_ * stride_]; + uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + uint64_t tmp = + (state_[p_ * stride_] = state_0 ^ state_1) * 1181783497276652981ULL; + tmp = tmp >> 16; + return static_cast(tmp & MAX_URAND); + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64() { - uint64_t state_0 = state_[ p_ * stride_ ]; - uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ]; - state_1 ^= state_1 << 31; - state_1 ^= state_1 >> 11; - state_0 ^= state_0 >> 30; - return (( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64() { + uint64_t state_0 = state_[p_ * stride_]; + uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + return ((state_[p_ * stride_] = state_0 ^ state_1) * + 1181783497276652981LL) - + 1; + } - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& range) { - const uint32_t max_val = (MAX_URAND/range)*range; - uint32_t tmp = urand(); - while(tmp>=max_val) - urand(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range) { + const uint32_t max_val = (MAX_URAND / range) * range; + uint32_t tmp = urand(); + while (tmp >= max_val) urand(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& start, const uint32_t& end ) { - return urand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end) { + return urand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& range) { - const uint64_t max_val = (MAX_URAND64/range)*range; - uint64_t tmp = urand64(); - while(tmp>=max_val) - urand64(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range) { + const uint64_t max_val = (MAX_URAND64 / range) * range; + uint64_t tmp = urand64(); + while (tmp >= max_val) urand64(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& start, const uint64_t& end ) { - return urand64(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end) { + return urand64(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - int rand() { - return static_cast(urand()/2); - } + KOKKOS_INLINE_FUNCTION + int rand() { return static_cast(urand() / 2); } - KOKKOS_INLINE_FUNCTION - int rand(const int& range) { - const int max_val = (MAX_RAND/range)*range; - int tmp = rand(); - while(tmp>=max_val) - rand(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + int rand(const int& range) { + const int max_val = (MAX_RAND / range) * range; + int tmp = rand(); + while (tmp >= max_val) rand(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - int rand(const int& start, const int& end ) { - return rand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end) { + return rand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - int64_t rand64() { - return static_cast(urand64()/2); - } + KOKKOS_INLINE_FUNCTION + int64_t rand64() { return static_cast(urand64() / 2); } - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& range) { - const int64_t max_val = (MAX_RAND64/range)*range; - int64_t tmp = rand64(); - while(tmp>=max_val) - rand64(); - return tmp%range; - } + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range) { + const int64_t max_val = (MAX_RAND64 / range) * range; + int64_t tmp = rand64(); + while (tmp >= max_val) rand64(); + return tmp % range; + } - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& start, const int64_t& end ) { - return rand64(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end) { + return rand64(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - float frand() { - return 1.0f * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + float frand() { return 1.0f * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - float frand(const float& range) { - return range * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + float frand(const float& range) { return range * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - float frand(const float& start, const float& end ) { - return frand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end) { + return frand(end - start) + start; + } - KOKKOS_INLINE_FUNCTION - double drand() { - return 1.0 * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + double drand() { return 1.0 * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - double drand(const double& range) { - return range * urand64()/MAX_URAND64; - } + KOKKOS_INLINE_FUNCTION + double drand(const double& range) { return range * urand64() / MAX_URAND64; } - KOKKOS_INLINE_FUNCTION - double drand(const double& start, const double& end ) { - return frand(end-start)+start; - } + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end) { + return frand(end - start) + start; + } - //Marsaglia polar method for drawing a standard normal distributed random number - KOKKOS_INLINE_FUNCTION - double normal() { - double S = 2.0; - double U; - while(S>=1.0) { - U = 2.0*drand() - 1.0; - const double V = 2.0*drand() - 1.0; - S = U*U+V*V; - } - return U*std::sqrt(-2.0*log(S)/S); - } + // Marsaglia polar method for drawing a standard normal distributed random + // number + KOKKOS_INLINE_FUNCTION + double normal() { + double S = 2.0; + double U; + while (S >= 1.0) { + U = 2.0 * drand() - 1.0; + const double V = 2.0 * drand() - 1.0; + S = U * U + V * V; + } + return U * std::sqrt(-2.0 * log(S) / S); + } - KOKKOS_INLINE_FUNCTION - double normal(const double& mean, const double& std_dev=1.0) { - return mean + normal()*std_dev; - } - }; + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev = 1.0) { + return mean + normal() * std_dev; + } +}; -template<> -inline -Random_XorShift64_Pool::Random_XorShift64_Pool(uint64_t seed) { +template <> +inline Random_XorShift64_Pool< + Kokkos::Experimental::ROCm>::Random_XorShift64_Pool(uint64_t seed) { num_states_ = 0; - init(seed,4*32768); + init(seed, 4 * 32768); } -template<> -KOKKOS_INLINE_FUNCTION -Random_XorShift64 Random_XorShift64_Pool::get_state() const { +template <> +KOKKOS_INLINE_FUNCTION Random_XorShift64 +Random_XorShift64_Pool::get_state() const { #ifdef __HCC_ACCELERATOR__ - const int i_offset = (threadIdx_x*blockDim_y + threadIdx_y)*blockDim_z+threadIdx_z; - int i = (((blockIdx_x*gridDim_y+blockIdx_y)*gridDim_z + blockIdx_z) * - blockDim_x*blockDim_y*blockDim_z + i_offset)%num_states_; - while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) { - i+=blockDim_x*blockDim_y*blockDim_z; - if(i>=num_states_) {i = i_offset;} + const int i_offset = + (threadIdx_x * blockDim_y + threadIdx_y) * blockDim_z + threadIdx_z; + int i = (((blockIdx_x * gridDim_y + blockIdx_y) * gridDim_z + blockIdx_z) * + blockDim_x * blockDim_y * blockDim_z + + i_offset) % + num_states_; + while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) { + i += blockDim_x * blockDim_y * blockDim_z; + if (i >= num_states_) { + i = i_offset; + } } - return Random_XorShift64(state_(i),i); + return Random_XorShift64(state_(i), i); #else - return Random_XorShift64(state_(0),0); + return Random_XorShift64(state_(0), 0); #endif } -template<> -KOKKOS_INLINE_FUNCTION -void Random_XorShift64_Pool::free_state(const Random_XorShift64 &state) const { +template <> +KOKKOS_INLINE_FUNCTION void +Random_XorShift64_Pool::free_state( + const Random_XorShift64& state) const { #ifdef __HCC_ACCELERATOR__ state_(state.state_idx_) = state.state_; locks_(state.state_idx_) = 0; @@ -1486,24 +1446,28 @@ void Random_XorShift64_Pool::free_state(const Random #endif } - -template<> -inline -Random_XorShift1024_Pool::Random_XorShift1024_Pool(uint64_t seed) { +template <> +inline Random_XorShift1024_Pool< + Kokkos::Experimental::ROCm>::Random_XorShift1024_Pool(uint64_t seed) { num_states_ = 0; - init(seed,4*32768); + init(seed, 4 * 32768); } -template<> -KOKKOS_INLINE_FUNCTION -Random_XorShift1024 Random_XorShift1024_Pool::get_state() const { +template <> +KOKKOS_INLINE_FUNCTION Random_XorShift1024 +Random_XorShift1024_Pool::get_state() const { #ifdef __HCC_ACCELERATOR__ - const int i_offset = (threadIdx_x*blockDim_y + threadIdx_y)*blockDim_z+threadIdx_z; - int i = (((blockIdx_x*gridDim_y+blockIdx_y)*gridDim_z + blockIdx_z) * - blockDim_x*blockDim_y*blockDim_z + i_offset)%num_states_; - while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) { - i+=blockDim_x*blockDim_y*blockDim_z; - if(i>=num_states_) {i = i_offset;} + const int i_offset = + (threadIdx_x * blockDim_y + threadIdx_y) * blockDim_z + threadIdx_z; + int i = (((blockIdx_x * gridDim_y + blockIdx_y) * gridDim_z + blockIdx_z) * + blockDim_x * blockDim_y * blockDim_z + + i_offset) % + num_states_; + while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) { + i += blockDim_x * blockDim_y * blockDim_z; + if (i >= num_states_) { + i = i_offset; + } } return Random_XorShift1024(state_, p_(i), i); @@ -1512,515 +1476,589 @@ Random_XorShift1024 Random_XorShift1024_Pool -KOKKOS_INLINE_FUNCTION -void Random_XorShift1024_Pool::free_state(const Random_XorShift1024 &state) const { +template <> +KOKKOS_INLINE_FUNCTION void +Random_XorShift1024_Pool::free_state( + const Random_XorShift1024& state) const { #ifdef __HCC_ACCELERATOR__ - for(int i=0; i<16; i++) - state_(state.state_idx_,i) = state.state_[i]; + for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i]; locks_(state.state_idx_) = 0; return; #endif } - #endif - namespace Impl { -template +template struct fill_random_functor_range; -template +template struct fill_random_functor_begin_end; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type range; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_): - a(a_),rand_pool(rand_pool_),range(range_) {} + typename ViewType::const_value_type range_) + : a(a_), rand_pool(rand_pool_), range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (const IndexType& i) const { + void operator()(const IndexType& i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) - a(idx) = Rand::draw(gen,range); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) + a(idx) = Rand::draw(gen, range); } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type range; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_): - a(a_),rand_pool(rand_pool_),range(range_) {} + typename ViewType::const_value_type range_) + : a(a_), rand_pool(rand_pool_), range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - a(idx,k) = Rand::draw(gen,range); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + a(idx, k) = Rand::draw(gen, range); } } rand_pool.free_state(gen); } }; - -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type range; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_): - a(a_),rand_pool(rand_pool_),range(range_) {} + typename ViewType::const_value_type range_) + : a(a_), rand_pool(rand_pool_), range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - a(idx,k,l) = Rand::draw(gen,range); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + a(idx, k, l) = Rand::draw(gen, range); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type range; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_): - a(a_),rand_pool(rand_pool_),range(range_) {} + typename ViewType::const_value_type range_) + : a(a_), rand_pool(rand_pool_), range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - for(IndexType m=0;m(a.extent(3));m++) - a(idx,k,l,m) = Rand::draw(gen,range); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + for (IndexType m = 0; m < static_cast(a.extent(3)); m++) + a(idx, k, l, m) = Rand::draw(gen, range); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type range; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_): - a(a_),rand_pool(rand_pool_),range(range_) {} + typename ViewType::const_value_type range_) + : a(a_), rand_pool(rand_pool_), range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - for(IndexType m=0;m(a.extent(3));m++) - for(IndexType n=0;n(a.extent(4));n++) - a(idx,k,l,m,n) = Rand::draw(gen,range); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + for (IndexType m = 0; m < static_cast(a.extent(3)); m++) + for (IndexType n = 0; n < static_cast(a.extent(4)); + n++) + a(idx, k, l, m, n) = Rand::draw(gen, range); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type range; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_): - a(a_),rand_pool(rand_pool_),range(range_) {} + typename ViewType::const_value_type range_) + : a(a_), rand_pool(rand_pool_), range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - for(IndexType m=0;m(a.extent(3));m++) - for(IndexType n=0;n(a.extent(4));n++) - for(IndexType o=0;o(a.extent(5));o++) - a(idx,k,l,m,n,o) = Rand::draw(gen,range); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + for (IndexType m = 0; m < static_cast(a.extent(3)); m++) + for (IndexType n = 0; n < static_cast(a.extent(4)); + n++) + for (IndexType o = 0; o < static_cast(a.extent(5)); + o++) + a(idx, k, l, m, n, o) = Rand::draw(gen, range); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type range; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_): - a(a_),rand_pool(rand_pool_),range(range_) {} + typename ViewType::const_value_type range_) + : a(a_), rand_pool(rand_pool_), range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - for(IndexType m=0;m(a.extent(3));m++) - for(IndexType n=0;n(a.extent(4));n++) - for(IndexType o=0;o(a.extent(5));o++) - for(IndexType p=0;p(a.extent(6));p++) - a(idx,k,l,m,n,o,p) = Rand::draw(gen,range); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + for (IndexType m = 0; m < static_cast(a.extent(3)); m++) + for (IndexType n = 0; n < static_cast(a.extent(4)); + n++) + for (IndexType o = 0; o < static_cast(a.extent(5)); + o++) + for (IndexType p = 0; p < static_cast(a.extent(6)); + p++) + a(idx, k, l, m, n, o, p) = Rand::draw(gen, range); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_range{ +template +struct fill_random_functor_range { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; typename ViewType::const_value_type range; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_range(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type range_): - a(a_),rand_pool(rand_pool_),range(range_) {} + typename ViewType::const_value_type range_) + : a(a_), rand_pool(rand_pool_), range(range_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - for(IndexType m=0;m(a.extent(3));m++) - for(IndexType n=0;n(a.extent(4));n++) - for(IndexType o=0;o(a.extent(5));o++) - for(IndexType p=0;p(a.extent(6));p++) - for(IndexType q=0;q(a.extent(7));q++) - a(idx,k,l,m,n,o,p,q) = Rand::draw(gen,range); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + for (IndexType m = 0; m < static_cast(a.extent(3)); m++) + for (IndexType n = 0; n < static_cast(a.extent(4)); + n++) + for (IndexType o = 0; o < static_cast(a.extent(5)); + o++) + for (IndexType p = 0; p < static_cast(a.extent(6)); + p++) + for (IndexType q = 0; + q < static_cast(a.extent(7)); q++) + a(idx, k, l, m, n, o, p, q) = Rand::draw(gen, range); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; - typename ViewType::const_value_type begin,end; + typename ViewType::const_value_type begin, end; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): - a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + typename ViewType::const_value_type begin_, + typename ViewType::const_value_type end_) + : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) - a(idx) = Rand::draw(gen,begin,end); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) + a(idx) = Rand::draw(gen, begin, end); } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; - typename ViewType::const_value_type begin,end; + typename ViewType::const_value_type begin, end; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): - a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + typename ViewType::const_value_type begin_, + typename ViewType::const_value_type end_) + : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - a(idx,k) = Rand::draw(gen,begin,end); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + a(idx, k) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; - -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; - typename ViewType::const_value_type begin,end; + typename ViewType::const_value_type begin, end; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): - a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + typename ViewType::const_value_type begin_, + typename ViewType::const_value_type end_) + : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - a(idx,k,l) = Rand::draw(gen,begin,end); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + a(idx, k, l) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; - typename ViewType::const_value_type begin,end; + typename ViewType::const_value_type begin, end; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): - a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + typename ViewType::const_value_type begin_, + typename ViewType::const_value_type end_) + : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - for(IndexType m=0;m(a.extent(3));m++) - a(idx,k,l,m) = Rand::draw(gen,begin,end); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + for (IndexType m = 0; m < static_cast(a.extent(3)); m++) + a(idx, k, l, m) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; - typename ViewType::const_value_type begin,end; + typename ViewType::const_value_type begin, end; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): - a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + typename ViewType::const_value_type begin_, + typename ViewType::const_value_type end_) + : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))){ - for(IndexType l=0;l(a.extent(1));l++) - for(IndexType m=0;m(a.extent(2));m++) - for(IndexType n=0;n(a.extent(3));n++) - for(IndexType o=0;o(a.extent(4));o++) - a(idx,l,m,n,o) = Rand::draw(gen,begin,end); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType l = 0; l < static_cast(a.extent(1)); l++) + for (IndexType m = 0; m < static_cast(a.extent(2)); m++) + for (IndexType n = 0; n < static_cast(a.extent(3)); n++) + for (IndexType o = 0; o < static_cast(a.extent(4)); + o++) + a(idx, l, m, n, o) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; - typename ViewType::const_value_type begin,end; + typename ViewType::const_value_type begin, end; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): - a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + typename ViewType::const_value_type begin_, + typename ViewType::const_value_type end_) + : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - for(IndexType m=0;m(a.extent(3));m++) - for(IndexType n=0;n(a.extent(4));n++) - for(IndexType o=0;o(a.extent(5));o++) - a(idx,k,l,m,n,o) = Rand::draw(gen,begin,end); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + for (IndexType m = 0; m < static_cast(a.extent(3)); m++) + for (IndexType n = 0; n < static_cast(a.extent(4)); + n++) + for (IndexType o = 0; o < static_cast(a.extent(5)); + o++) + a(idx, k, l, m, n, o) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; - -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; - typename ViewType::const_value_type begin,end; + typename ViewType::const_value_type begin, end; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): - a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + typename ViewType::const_value_type begin_, + typename ViewType::const_value_type end_) + : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - for(IndexType m=0;m(a.extent(3));m++) - for(IndexType n=0;n(a.extent(4));n++) - for(IndexType o=0;o(a.extent(5));o++) - for(IndexType p=0;p(a.extent(6));p++) - a(idx,k,l,m,n,o,p) = Rand::draw(gen,begin,end); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + for (IndexType m = 0; m < static_cast(a.extent(3)); m++) + for (IndexType n = 0; n < static_cast(a.extent(4)); + n++) + for (IndexType o = 0; o < static_cast(a.extent(5)); + o++) + for (IndexType p = 0; p < static_cast(a.extent(6)); + p++) + a(idx, k, l, m, n, o, p) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; -template -struct fill_random_functor_begin_end{ +template +struct fill_random_functor_begin_end { typedef typename ViewType::execution_space execution_space; ViewType a; RandomPool rand_pool; - typename ViewType::const_value_type begin,end; + typename ViewType::const_value_type begin, end; - typedef rand Rand; + typedef rand + Rand; fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, - typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): - a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + typename ViewType::const_value_type begin_, + typename ViewType::const_value_type end_) + : a(a_), rand_pool(rand_pool_), begin(begin_), end(end_) {} KOKKOS_INLINE_FUNCTION - void operator() (IndexType i) const { + void operator()(IndexType i) const { typename RandomPool::generator_type gen = rand_pool.get_state(); - for(IndexType j=0;j(a.extent(0))) { - for(IndexType k=0;k(a.extent(1));k++) - for(IndexType l=0;l(a.extent(2));l++) - for(IndexType m=0;m(a.extent(3));m++) - for(IndexType n=0;n(a.extent(4));n++) - for(IndexType o=0;o(a.extent(5));o++) - for(IndexType p=0;p(a.extent(6));p++) - for(IndexType q=0;q(a.extent(7));q++) - a(idx,k,l,m,n,o,p,q) = Rand::draw(gen,begin,end); + for (IndexType j = 0; j < loops; j++) { + const IndexType idx = i * loops + j; + if (idx < static_cast(a.extent(0))) { + for (IndexType k = 0; k < static_cast(a.extent(1)); k++) + for (IndexType l = 0; l < static_cast(a.extent(2)); l++) + for (IndexType m = 0; m < static_cast(a.extent(3)); m++) + for (IndexType n = 0; n < static_cast(a.extent(4)); + n++) + for (IndexType o = 0; o < static_cast(a.extent(5)); + o++) + for (IndexType p = 0; p < static_cast(a.extent(6)); + p++) + for (IndexType q = 0; + q < static_cast(a.extent(7)); q++) + a(idx, k, l, m, n, o, p, q) = Rand::draw(gen, begin, end); } } rand_pool.free_state(gen); } }; -} +} // namespace Impl -template -void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type range) { +template +void fill_random(ViewType a, RandomPool g, + typename ViewType::const_value_type range) { int64_t LDA = a.extent(0); - if(LDA>0) - parallel_for((LDA+127)/128,Impl::fill_random_functor_range(a,g,range)); + if (LDA > 0) + parallel_for((LDA + 127) / 128, + Impl::fill_random_functor_range( + a, g, range)); } -template -void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type begin,typename ViewType::const_value_type end ) { +template +void fill_random(ViewType a, RandomPool g, + typename ViewType::const_value_type begin, + typename ViewType::const_value_type end) { int64_t LDA = a.extent(0); - if(LDA>0) - parallel_for((LDA+127)/128,Impl::fill_random_functor_begin_end(a,g,begin,end)); -} + if (LDA > 0) + parallel_for((LDA + 127) / 128, + Impl::fill_random_functor_begin_end( + a, g, begin, end)); } +} // namespace Kokkos #endif diff --git a/algorithms/src/Kokkos_Sort.hpp b/algorithms/src/Kokkos_Sort.hpp index 7fb8505fe53..e64b3276a4b 100644 --- a/algorithms/src/Kokkos_Sort.hpp +++ b/algorithms/src/Kokkos_Sort.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,12 +36,11 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// +// // ************************************************************************ //@HEADER */ - #ifndef KOKKOS_SORT_HPP_ #define KOKKOS_SORT_HPP_ @@ -51,125 +50,107 @@ namespace Kokkos { - namespace Impl { +namespace Impl { - template< class DstViewType , class SrcViewType - , int Rank = DstViewType::Rank > - struct CopyOp; +template +struct CopyOp; - template< class DstViewType , class SrcViewType > - struct CopyOp { - KOKKOS_INLINE_FUNCTION - static void copy(DstViewType const& dst, size_t i_dst, - SrcViewType const& src, size_t i_src ) { - dst(i_dst) = src(i_src); - } - }; +template +struct CopyOp { + KOKKOS_INLINE_FUNCTION + static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, + size_t i_src) { + dst(i_dst) = src(i_src); + } +}; - template< class DstViewType , class SrcViewType > - struct CopyOp { - KOKKOS_INLINE_FUNCTION - static void copy(DstViewType const& dst, size_t i_dst, - SrcViewType const& src, size_t i_src ) { - for(int j = 0;j< (int) dst.extent(1); j++) - dst(i_dst,j) = src(i_src,j); - } - }; +template +struct CopyOp { + KOKKOS_INLINE_FUNCTION + static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, + size_t i_src) { + for (int j = 0; j < (int)dst.extent(1); j++) dst(i_dst, j) = src(i_src, j); + } +}; - template< class DstViewType , class SrcViewType > - struct CopyOp { - KOKKOS_INLINE_FUNCTION - static void copy(DstViewType const& dst, size_t i_dst, - SrcViewType const& src, size_t i_src ) { - for(int j = 0; j +struct CopyOp { + KOKKOS_INLINE_FUNCTION + static void copy(DstViewType const& dst, size_t i_dst, SrcViewType const& src, + size_t i_src) { + for (int j = 0; j < dst.extent(1); j++) + for (int k = 0; k < dst.extent(2); k++) + dst(i_dst, j, k) = src(i_src, j, k); } +}; +} // namespace Impl //---------------------------------------------------------------------------- -template< class KeyViewType - , class BinSortOp - , class Space = typename KeyViewType::device_type - , class SizeType = typename KeyViewType::memory_space::size_type - > +template class BinSort { -public: - - template< class DstViewType , class SrcViewType > + public: + template struct copy_functor { + typedef typename SrcViewType::const_type src_view_type; - typedef typename SrcViewType::const_type src_view_type ; + typedef Impl::CopyOp copy_op; - typedef Impl::CopyOp< DstViewType , src_view_type > copy_op ; + DstViewType dst_values; + src_view_type src_values; + int dst_offset; - DstViewType dst_values ; - src_view_type src_values ; - int dst_offset ; - - copy_functor( DstViewType const & dst_values_ - , int const & dst_offset_ - , SrcViewType const & src_values_ - ) - : dst_values( dst_values_ ) - , src_values( src_values_ ) - , dst_offset( dst_offset_ ) - {} + copy_functor(DstViewType const& dst_values_, int const& dst_offset_, + SrcViewType const& src_values_) + : dst_values(dst_values_), + src_values(src_values_), + dst_offset(dst_offset_) {} KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - copy_op::copy(dst_values,i+dst_offset,src_values,i); + void operator()(const int& i) const { + copy_op::copy(dst_values, i + dst_offset, src_values, i); } }; - template< class DstViewType - , class PermuteViewType - , class SrcViewType - > + template struct copy_permute_functor { - // If a Kokkos::View then can generate constant random access // otherwise can only use the constant type. - typedef typename std::conditional - < Kokkos::is_view< SrcViewType >::value - , Kokkos::View< typename SrcViewType::const_data_type - , typename SrcViewType::array_layout - , typename SrcViewType::device_type - , Kokkos::MemoryTraits - > - , typename SrcViewType::const_type - >::type src_view_type ; - - typedef typename PermuteViewType::const_type perm_view_type ; - - typedef Impl::CopyOp< DstViewType , src_view_type > copy_op ; - - DstViewType dst_values ; - perm_view_type sort_order ; - src_view_type src_values ; - int src_offset ; - - copy_permute_functor( DstViewType const & dst_values_ - , PermuteViewType const & sort_order_ - , SrcViewType const & src_values_ - , int const & src_offset_ - ) - : dst_values( dst_values_ ) - , sort_order( sort_order_ ) - , src_values( src_values_ ) - , src_offset( src_offset_ ) - {} + typedef typename std::conditional< + Kokkos::is_view::value, + Kokkos::View >, + typename SrcViewType::const_type>::type src_view_type; + + typedef typename PermuteViewType::const_type perm_view_type; + + typedef Impl::CopyOp copy_op; + + DstViewType dst_values; + perm_view_type sort_order; + src_view_type src_values; + int src_offset; + + copy_permute_functor(DstViewType const& dst_values_, + PermuteViewType const& sort_order_, + SrcViewType const& src_values_, int const& src_offset_) + : dst_values(dst_values_), + sort_order(sort_order_), + src_values(src_values_), + src_offset(src_offset_) {} KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - copy_op::copy(dst_values,i,src_values,src_offset+sort_order(i)); + void operator()(const int& i) const { + copy_op::copy(dst_values, i, src_values, src_offset + sort_order(i)); } }; - typedef typename Space::execution_space execution_space; + typedef typename Space::execution_space execution_space; typedef BinSortOp bin_op_type; struct bin_count_tag {}; @@ -177,221 +158,236 @@ class BinSort { struct bin_binning_tag {}; struct bin_sort_bins_tag {}; -public: - + public: typedef SizeType size_type; typedef size_type value_type; typedef Kokkos::View offset_type; typedef Kokkos::View bin_count_type; - typedef typename KeyViewType::const_type const_key_view_type ; + typedef typename KeyViewType::const_type const_key_view_type; // If a Kokkos::View then can generate constant random access // otherwise can only use the constant type. - typedef typename std::conditional - < Kokkos::is_view< KeyViewType >::value - , Kokkos::View< typename KeyViewType::const_data_type, - typename KeyViewType::array_layout, - typename KeyViewType::device_type, - Kokkos::MemoryTraits > - , const_key_view_type - >::type const_rnd_key_view_type; + typedef typename std::conditional< + Kokkos::is_view::value, + Kokkos::View >, + const_key_view_type>::type const_rnd_key_view_type; typedef typename KeyViewType::non_const_value_type non_const_key_scalar; - typedef typename KeyViewType::const_value_type const_key_scalar; - - typedef Kokkos::View > bin_count_atomic_type ; + typedef typename KeyViewType::const_value_type const_key_scalar; -private: + typedef Kokkos::View > + bin_count_atomic_type; + private: const_key_view_type keys; const_rnd_key_view_type keys_rnd; -public: - - BinSortOp bin_op ; - offset_type bin_offsets ; - bin_count_atomic_type bin_count_atomic ; - bin_count_type bin_count_const ; - offset_type sort_order ; + public: + BinSortOp bin_op; + offset_type bin_offsets; + bin_count_atomic_type bin_count_atomic; + bin_count_type bin_count_const; + offset_type sort_order; - int range_begin ; - int range_end ; - bool sort_within_bins ; - -public: + int range_begin; + int range_end; + bool sort_within_bins; + public: BinSort() {} //---------------------------------------- - // Constructor: takes the keys, the binning_operator and optionally whether to sort within bins (default false) - BinSort( const_key_view_type keys_ - , int range_begin_ - , int range_end_ - , BinSortOp bin_op_ - , bool sort_within_bins_ = false - ) - : keys(keys_) - , keys_rnd(keys_) - , bin_op(bin_op_) - , bin_offsets() - , bin_count_atomic() - , bin_count_const() - , sort_order() - , range_begin( range_begin_ ) - , range_end( range_end_ ) - , sort_within_bins( sort_within_bins_ ) - { - bin_count_atomic = Kokkos::View("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins()); - bin_count_const = bin_count_atomic; - bin_offsets = offset_type(ViewAllocateWithoutInitializing("Kokkos::SortImpl::BinSortFunctor::bin_offsets"),bin_op.max_bins()); - sort_order = offset_type(ViewAllocateWithoutInitializing("Kokkos::SortImpl::BinSortFunctor::sort_order"),range_end-range_begin); + // Constructor: takes the keys, the binning_operator and optionally whether to + // sort within bins (default false) + BinSort(const_key_view_type keys_, int range_begin_, int range_end_, + BinSortOp bin_op_, bool sort_within_bins_ = false) + : keys(keys_), + keys_rnd(keys_), + bin_op(bin_op_), + bin_offsets(), + bin_count_atomic(), + bin_count_const(), + sort_order(), + range_begin(range_begin_), + range_end(range_end_), + sort_within_bins(sort_within_bins_) { + bin_count_atomic = Kokkos::View( + "Kokkos::SortImpl::BinSortFunctor::bin_count", bin_op.max_bins()); + bin_count_const = bin_count_atomic; + bin_offsets = + offset_type(ViewAllocateWithoutInitializing( + "Kokkos::SortImpl::BinSortFunctor::bin_offsets"), + bin_op.max_bins()); + sort_order = + offset_type(ViewAllocateWithoutInitializing( + "Kokkos::SortImpl::BinSortFunctor::sort_order"), + range_end - range_begin); } - BinSort( const_key_view_type keys_ - , BinSortOp bin_op_ - , bool sort_within_bins_ = false - ) - : BinSort( keys_ , 0 , keys_.extent(0), bin_op_ , sort_within_bins_ ) {} + BinSort(const_key_view_type keys_, BinSortOp bin_op_, + bool sort_within_bins_ = false) + : BinSort(keys_, 0, keys_.extent(0), bin_op_, sort_within_bins_) {} //---------------------------------------- - // Create the permutation vector, the bin_offset array and the bin_count array. Can be called again if keys changed + // Create the permutation vector, the bin_offset array and the bin_count + // array. Can be called again if keys changed void create_permute_vector() { - const size_t len = range_end - range_begin ; - Kokkos::parallel_for ("Kokkos::Sort::BinCount",Kokkos::RangePolicy (0,len),*this); - Kokkos::parallel_scan("Kokkos::Sort::BinOffset",Kokkos::RangePolicy (0,bin_op.max_bins()) ,*this); - - Kokkos::deep_copy(bin_count_atomic,0); - Kokkos::parallel_for ("Kokkos::Sort::BinBinning",Kokkos::RangePolicy (0,len),*this); - - if(sort_within_bins) - Kokkos::parallel_for ("Kokkos::Sort::BinSort",Kokkos::RangePolicy(0,bin_op.max_bins()) ,*this); + const size_t len = range_end - range_begin; + Kokkos::parallel_for( + "Kokkos::Sort::BinCount", + Kokkos::RangePolicy(0, len), *this); + Kokkos::parallel_scan("Kokkos::Sort::BinOffset", + Kokkos::RangePolicy( + 0, bin_op.max_bins()), + *this); + + Kokkos::deep_copy(bin_count_atomic, 0); + Kokkos::parallel_for( + "Kokkos::Sort::BinBinning", + Kokkos::RangePolicy(0, len), *this); + + if (sort_within_bins) + Kokkos::parallel_for( + "Kokkos::Sort::BinSort", + Kokkos::RangePolicy( + 0, bin_op.max_bins()), + *this); } - // Sort a subset of a view with respect to the first dimension using the permutation array - template - void sort( ValuesViewType const & values - , int values_range_begin - , int values_range_end) const - { - typedef - Kokkos::View< typename ValuesViewType::data_type, - typename ValuesViewType::array_layout, - typename ValuesViewType::device_type > - scratch_view_type ; - - const size_t len = range_end - range_begin ; - const size_t values_len = values_range_end - values_range_begin ; + // Sort a subset of a view with respect to the first dimension using the + // permutation array + template + void sort(ValuesViewType const& values, int values_range_begin, + int values_range_end) const { + typedef Kokkos::View + scratch_view_type; + + const size_t len = range_end - range_begin; + const size_t values_len = values_range_end - values_range_begin; if (len != values_len) { - Kokkos::abort("BinSort::sort: values range length != permutation vector length"); + Kokkos::abort( + "BinSort::sort: values range length != permutation vector length"); } #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - scratch_view_type - sorted_values(ViewAllocateWithoutInitializing("Kokkos::SortImpl::BinSortFunctor::sorted_values"), - len, - values.extent(1), - values.extent(2), - values.extent(3), - values.extent(4), - values.extent(5), - values.extent(6), - values.extent(7)); + scratch_view_type sorted_values( + ViewAllocateWithoutInitializing( + "Kokkos::SortImpl::BinSortFunctor::sorted_values"), + len, values.extent(1), values.extent(2), values.extent(3), + values.extent(4), values.extent(5), values.extent(6), values.extent(7)); #else - scratch_view_type - sorted_values(ViewAllocateWithoutInitializing("Kokkos::SortImpl::BinSortFunctor::sorted_values"), - values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 1 ? values.extent(1) : KOKKOS_IMPL_CTOR_DEFAULT_ARG , - values.rank_dynamic > 2 ? values.extent(2) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 3 ? values.extent(3) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 4 ? values.extent(4) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 5 ? values.extent(5) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 6 ? values.extent(6) : KOKKOS_IMPL_CTOR_DEFAULT_ARG, - values.rank_dynamic > 7 ? values.extent(7) : KOKKOS_IMPL_CTOR_DEFAULT_ARG); + scratch_view_type sorted_values( + ViewAllocateWithoutInitializing( + "Kokkos::SortImpl::BinSortFunctor::sorted_values"), + values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 1 ? values.extent(1) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 2 ? values.extent(2) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 3 ? values.extent(3) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 4 ? values.extent(4) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 5 ? values.extent(5) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 6 ? values.extent(6) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG, + values.rank_dynamic > 7 ? values.extent(7) + : KOKKOS_IMPL_CTOR_DEFAULT_ARG); #endif { - copy_permute_functor< scratch_view_type /* DstViewType */ - , offset_type /* PermuteViewType */ - , ValuesViewType /* SrcViewType */ - > - functor( sorted_values , sort_order , values, values_range_begin - range_begin ); - - parallel_for("Kokkos::Sort::CopyPermute", Kokkos::RangePolicy(0,len),functor); + copy_permute_functor + functor(sorted_values, sort_order, values, + values_range_begin - range_begin); + + parallel_for("Kokkos::Sort::CopyPermute", + Kokkos::RangePolicy(0, len), functor); } { - copy_functor< ValuesViewType , scratch_view_type > - functor( values , range_begin , sorted_values ); + copy_functor functor( + values, range_begin, sorted_values); - parallel_for("Kokkos::Sort::Copy", Kokkos::RangePolicy(0,len),functor); + parallel_for("Kokkos::Sort::Copy", + Kokkos::RangePolicy(0, len), functor); } Kokkos::fence(); } - template - void sort( ValuesViewType const & values ) const - { - this->sort( values, 0, /*values.extent(0)*/ range_end - range_begin ); + template + void sort(ValuesViewType const& values) const { + this->sort(values, 0, /*values.extent(0)*/ range_end - range_begin); } // Get the permutation vector KOKKOS_INLINE_FUNCTION - offset_type get_permute_vector() const { return sort_order;} + offset_type get_permute_vector() const { return sort_order; } // Get the start offsets for each bin KOKKOS_INLINE_FUNCTION - offset_type get_bin_offsets() const { return bin_offsets;} + offset_type get_bin_offsets() const { return bin_offsets; } // Get the count for each bin KOKKOS_INLINE_FUNCTION - bin_count_type get_bin_count() const {return bin_count_const;} - -public: + bin_count_type get_bin_count() const { return bin_count_const; } + public: KOKKOS_INLINE_FUNCTION - void operator() (const bin_count_tag& tag, const int& i) const { - const int j = range_begin + i ; + void operator()(const bin_count_tag& tag, const int& i) const { + const int j = range_begin + i; bin_count_atomic(bin_op.bin(keys, j))++; } KOKKOS_INLINE_FUNCTION - void operator() (const bin_offset_tag& tag, const int& i, value_type& offset, const bool& final) const { - if(final) { + void operator()(const bin_offset_tag& tag, const int& i, value_type& offset, + const bool& final) const { + if (final) { bin_offsets(i) = offset; } - offset+=bin_count_const(i); + offset += bin_count_const(i); } KOKKOS_INLINE_FUNCTION - void operator() (const bin_binning_tag& tag, const int& i) const { - const int j = range_begin + i ; - const int bin = bin_op.bin(keys,j); + void operator()(const bin_binning_tag& tag, const int& i) const { + const int j = range_begin + i; + const int bin = bin_op.bin(keys, j); const int count = bin_count_atomic(bin)++; - sort_order(bin_offsets(bin) + count) = j ; + sort_order(bin_offsets(bin) + count) = j; } KOKKOS_INLINE_FUNCTION - void operator() (const bin_sort_bins_tag& tag, const int&i ) const { + void operator()(const bin_sort_bins_tag& tag, const int& i) const { auto bin_size = bin_count_const(i); if (bin_size <= 1) return; - int upper_bound = bin_offsets(i)+bin_size; - bool sorted = false; - while(!sorted) { - sorted = true; + int upper_bound = bin_offsets(i) + bin_size; + bool sorted = false; + while (!sorted) { + sorted = true; int old_idx = sort_order(bin_offsets(i)); int new_idx; - for(int k=bin_offsets(i)+1; k +template struct BinOp1D { int max_bins_; double mul_; typename KeyViewType::const_value_type range_; typename KeyViewType::const_value_type min_; - BinOp1D():max_bins_(0),mul_(0.0), - range_(typename KeyViewType::const_value_type()), - min_(typename KeyViewType::const_value_type()) {} + BinOp1D() + : max_bins_(0), + mul_(0.0), + range_(typename KeyViewType::const_value_type()), + min_(typename KeyViewType::const_value_type()) {} - //Construct BinOp with number of bins, minimum value and maxuimum value + // Construct BinOp with number of bins, minimum value and maxuimum value BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, - typename KeyViewType::const_value_type max ) - :max_bins_(max_bins__+1),mul_(1.0*max_bins__/(max-min)),range_(max-min),min_(min) {} - - //Determine bin index from key value - template - KOKKOS_INLINE_FUNCTION - int bin(ViewType& keys, const int& i) const { - return int(mul_*(keys(i)-min_)); + typename KeyViewType::const_value_type max) + : max_bins_(max_bins__ + 1), + mul_(1.0 * max_bins__ / (max - min)), + range_(max - min), + min_(min) {} + + // Determine bin index from key value + template + KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { + return int(mul_ * (keys(i) - min_)); } - //Return maximum bin index + 1 + // Return maximum bin index + 1 KOKKOS_INLINE_FUNCTION - int max_bins() const { - return max_bins_; - } + int max_bins() const { return max_bins_; } - //Compare to keys within a bin if true new_val will be put before old_val - template - KOKKOS_INLINE_FUNCTION - bool operator()(ViewType& keys, iType1& i1, iType2& i2) const { - return keys(i1) + KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, + iType2& i2) const { + return keys(i1) < keys(i2); } }; -template +template struct BinOp3D { int max_bins_[3]; double mul_[3]; @@ -450,43 +448,42 @@ struct BinOp3D { BinOp3D() {} BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], - typename KeyViewType::const_value_type max[] ) - { + typename KeyViewType::const_value_type max[]) { max_bins_[0] = max_bins__[0]; max_bins_[1] = max_bins__[1]; max_bins_[2] = max_bins__[2]; - mul_[0] = 1.0*max_bins__[0]/(max[0]-min[0]); - mul_[1] = 1.0*max_bins__[1]/(max[1]-min[1]); - mul_[2] = 1.0*max_bins__[2]/(max[2]-min[2]); - range_[0] = max[0]-min[0]; - range_[1] = max[1]-min[1]; - range_[2] = max[2]-min[2]; - min_[0] = min[0]; - min_[1] = min[1]; - min_[2] = min[2]; + mul_[0] = 1.0 * max_bins__[0] / (max[0] - min[0]); + mul_[1] = 1.0 * max_bins__[1] / (max[1] - min[1]); + mul_[2] = 1.0 * max_bins__[2] / (max[2] - min[2]); + range_[0] = max[0] - min[0]; + range_[1] = max[1] - min[1]; + range_[2] = max[2] - min[2]; + min_[0] = min[0]; + min_[1] = min[1]; + min_[2] = min[2]; } - template - KOKKOS_INLINE_FUNCTION - int bin(ViewType& keys, const int& i) const { - return int( (((int(mul_[0]*(keys(i,0)-min_[0]))*max_bins_[1]) + - int(mul_[1]*(keys(i,1)-min_[1])))*max_bins_[2]) + - int(mul_[2]*(keys(i,2)-min_[2]))); - } - - KOKKOS_INLINE_FUNCTION - int max_bins() const { - return max_bins_[0]*max_bins_[1]*max_bins_[2]; + template + KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const { + return int((((int(mul_[0] * (keys(i, 0) - min_[0])) * max_bins_[1]) + + int(mul_[1] * (keys(i, 1) - min_[1]))) * + max_bins_[2]) + + int(mul_[2] * (keys(i, 2) - min_[2]))); } - template KOKKOS_INLINE_FUNCTION - bool operator()(ViewType& keys, iType1& i1 , iType2& i2) const { - if (keys(i1,0)>keys(i2,0)) return true; - else if (keys(i1,0)==keys(i2,0)) { - if (keys(i1,1)>keys(i2,1)) return true; - else if (keys(i1,1)==keys(i2,1)) { - if (keys(i1,2)>keys(i2,2)) return true; + int max_bins() const { return max_bins_[0] * max_bins_[1] * max_bins_[2]; } + + template + KOKKOS_INLINE_FUNCTION bool operator()(ViewType& keys, iType1& i1, + iType2& i2) const { + if (keys(i1, 0) > keys(i2, 0)) + return true; + else if (keys(i1, 0) == keys(i2, 0)) { + if (keys(i1, 1) > keys(i2, 1)) + return true; + else if (keys(i1, 1) == keys(i2, 1)) { + if (keys(i1, 2) > keys(i2, 2)) return true; } } return false; @@ -495,85 +492,80 @@ struct BinOp3D { namespace Impl { -template +template bool try_std_sort(ViewType view) { - bool possible = true; - size_t stride[8] = { view.stride_0() - , view.stride_1() - , view.stride_2() - , view.stride_3() - , view.stride_4() - , view.stride_5() - , view.stride_6() - , view.stride_7() - }; - possible = possible && std::is_same::value; - possible = possible && (ViewType::Rank == 1); - possible = possible && (stride[0] == 1); - if(possible) { - std::sort(view.data(),view.data()+view.extent(0)); + bool possible = true; + size_t stride[8] = {view.stride_0(), view.stride_1(), view.stride_2(), + view.stride_3(), view.stride_4(), view.stride_5(), + view.stride_6(), view.stride_7()}; + possible = possible && + std::is_same::value; + possible = possible && (ViewType::Rank == 1); + possible = possible && (stride[0] == 1); + if (possible) { + std::sort(view.data(), view.data() + view.extent(0)); } return possible; } -template +template struct min_max_functor { - typedef Kokkos::MinMaxScalar minmax_scalar; + typedef Kokkos::MinMaxScalar + minmax_scalar; ViewType view; - min_max_functor(const ViewType& view_):view(view_) {} + min_max_functor(const ViewType& view_) : view(view_) {} KOKKOS_INLINE_FUNCTION - void operator() (const size_t& i, minmax_scalar& minmax) const { - if(view(i) < minmax.min_val) minmax.min_val = view(i); - if(view(i) > minmax.max_val) minmax.max_val = view(i); + void operator()(const size_t& i, minmax_scalar& minmax) const { + if (view(i) < minmax.min_val) minmax.min_val = view(i); + if (view(i) > minmax.max_val) minmax.max_val = view(i); } }; -} +} // namespace Impl -template -void sort( ViewType const & view , bool const always_use_kokkos_sort = false) -{ - if(!always_use_kokkos_sort) { - if(Impl::try_std_sort(view)) return; +template +void sort(ViewType const& view, bool const always_use_kokkos_sort = false) { + if (!always_use_kokkos_sort) { + if (Impl::try_std_sort(view)) return; } typedef BinOp1D CompType; Kokkos::MinMaxScalar result; Kokkos::MinMax reducer(result); - parallel_reduce("Kokkos::Sort::FindExtent",Kokkos::RangePolicy(0,view.extent(0)), - Impl::min_max_functor(view),reducer); - if(result.min_val == result.max_val) return; - BinSort bin_sort(view,CompType(view.extent(0)/2,result.min_val,result.max_val),true); + parallel_reduce("Kokkos::Sort::FindExtent", + Kokkos::RangePolicy( + 0, view.extent(0)), + Impl::min_max_functor(view), reducer); + if (result.min_val == result.max_val) return; + BinSort bin_sort( + view, CompType(view.extent(0) / 2, result.min_val, result.max_val), true); bin_sort.create_permute_vector(); bin_sort.sort(view); } -template -void sort( ViewType view - , size_t const begin - , size_t const end - ) -{ - typedef Kokkos::RangePolicy range_policy ; +template +void sort(ViewType view, size_t const begin, size_t const end) { + typedef Kokkos::RangePolicy range_policy; typedef BinOp1D CompType; Kokkos::MinMaxScalar result; Kokkos::MinMax reducer(result); - parallel_reduce("Kokkos::Sort::FindExtent", range_policy( begin , end ) - , Impl::min_max_functor(view),reducer ); + parallel_reduce("Kokkos::Sort::FindExtent", range_policy(begin, end), + Impl::min_max_functor(view), reducer); - if(result.min_val == result.max_val) return; + if (result.min_val == result.max_val) return; - BinSort - bin_sort(view,begin,end,CompType((end-begin)/2,result.min_val,result.max_val),true); + BinSort bin_sort( + view, begin, end, + CompType((end - begin) / 2, result.min_val, result.max_val), true); bin_sort.create_permute_vector(); - bin_sort.sort(view,begin,end); + bin_sort.sort(view, begin, end); } -} +} // namespace Kokkos #endif diff --git a/algorithms/unit_tests/TestCuda.cpp b/algorithms/unit_tests/TestCuda.cpp index 86fdccd0e78..380a5b99bfc 100644 --- a/algorithms/unit_tests/TestCuda.cpp +++ b/algorithms/unit_tests/TestCuda.cpp @@ -58,50 +58,38 @@ namespace Test { class cuda : public ::testing::Test { -protected: - static void SetUpTestCase() - { - } - static void TearDownTestCase() - { - } + protected: + static void SetUpTestCase() {} + static void TearDownTestCase() {} }; -void cuda_test_random_xorshift64( int num_draws ) -{ +void cuda_test_random_xorshift64(int num_draws) { Impl::test_random >(num_draws); } -void cuda_test_random_xorshift1024( int num_draws ) -{ +void cuda_test_random_xorshift1024(int num_draws) { Impl::test_random >(num_draws); } +#define CUDA_RANDOM_XORSHIFT64(num_draws) \ + TEST_F(cuda, Random_XorShift64) { cuda_test_random_xorshift64(num_draws); } -#define CUDA_RANDOM_XORSHIFT64( num_draws ) \ - TEST_F( cuda, Random_XorShift64 ) { \ - cuda_test_random_xorshift64(num_draws); \ +#define CUDA_RANDOM_XORSHIFT1024(num_draws) \ + TEST_F(cuda, Random_XorShift1024) { \ + cuda_test_random_xorshift1024(num_draws); \ } -#define CUDA_RANDOM_XORSHIFT1024( num_draws ) \ - TEST_F( cuda, Random_XorShift1024 ) { \ - cuda_test_random_xorshift1024(num_draws); \ - } +#define CUDA_SORT_UNSIGNED(size) \ + TEST_F(cuda, SortUnsigned) { Impl::test_sort(size); } -#define CUDA_SORT_UNSIGNED( size ) \ - TEST_F( cuda, SortUnsigned ) { \ - Impl::test_sort< Kokkos::Cuda, unsigned >(size); \ - } - -CUDA_RANDOM_XORSHIFT64( 132141141 ) -CUDA_RANDOM_XORSHIFT1024( 52428813 ) +CUDA_RANDOM_XORSHIFT64(132141141) +CUDA_RANDOM_XORSHIFT1024(52428813) CUDA_SORT_UNSIGNED(171) #undef CUDA_RANDOM_XORSHIFT64 #undef CUDA_RANDOM_XORSHIFT1024 #undef CUDA_SORT_UNSIGNED -} +} // namespace Test #else void KOKKOS_ALGORITHMS_UNITTESTS_TESTCUDA_PREVENT_LINK_ERROR() {} -#endif /* #ifdef KOKKOS_ENABLE_CUDA */ - +#endif /* #ifdef KOKKOS_ENABLE_CUDA */ diff --git a/algorithms/unit_tests/TestHPX.cpp b/algorithms/unit_tests/TestHPX.cpp index e5b7dbdb7a3..1d1022355c2 100644 --- a/algorithms/unit_tests/TestHPX.cpp +++ b/algorithms/unit_tests/TestHPX.cpp @@ -41,7 +41,6 @@ //@HEADER */ - #include #ifdef KOKKOS_ENABLE_HPX @@ -56,41 +55,41 @@ namespace Test { class hpx : public ::testing::Test { -protected: - static void SetUpTestCase() - { + protected: + static void SetUpTestCase() { std::cout << std::setprecision(5) << std::scientific; } - static void TearDownTestCase() - { - } + static void TearDownTestCase() {} }; -#define HPX_RANDOM_XORSHIFT64( num_draws ) \ - TEST_F( hpx, Random_XorShift64 ) { \ - Impl::test_random >(num_draws); \ +#define HPX_RANDOM_XORSHIFT64(num_draws) \ + TEST_F(hpx, Random_XorShift64) { \ + Impl::test_random< \ + Kokkos::Random_XorShift64_Pool >( \ + num_draws); \ } -#define HPX_RANDOM_XORSHIFT1024( num_draws ) \ - TEST_F( hpx, Random_XorShift1024 ) { \ - Impl::test_random >(num_draws); \ +#define HPX_RANDOM_XORSHIFT1024(num_draws) \ + TEST_F(hpx, Random_XorShift1024) { \ + Impl::test_random< \ + Kokkos::Random_XorShift1024_Pool >( \ + num_draws); \ } -#define HPX_SORT_UNSIGNED( size ) \ - TEST_F( hpx, SortUnsigned ) { \ - Impl::test_sort< Kokkos::Experimental::HPX, unsigned >(size); \ +#define HPX_SORT_UNSIGNED(size) \ + TEST_F(hpx, SortUnsigned) { \ + Impl::test_sort(size); \ } -HPX_RANDOM_XORSHIFT64( 10240000 ) -HPX_RANDOM_XORSHIFT1024( 10130144 ) +HPX_RANDOM_XORSHIFT64(10240000) +HPX_RANDOM_XORSHIFT1024(10130144) HPX_SORT_UNSIGNED(171) #undef HPX_RANDOM_XORSHIFT64 #undef HPX_RANDOM_XORSHIFT1024 #undef HPX_SORT_UNSIGNED -} // namespace test +} // namespace Test #else void KOKKOS_ALGORITHMS_UNITTESTS_TESTHPX_PREVENT_LINK_ERROR() {} #endif - diff --git a/algorithms/unit_tests/TestOpenMP.cpp b/algorithms/unit_tests/TestOpenMP.cpp index c4ddde7b7f7..1ecbfcb1e1d 100644 --- a/algorithms/unit_tests/TestOpenMP.cpp +++ b/algorithms/unit_tests/TestOpenMP.cpp @@ -41,7 +41,6 @@ //@HEADER */ - #include #ifdef KOKKOS_ENABLE_OPENMP @@ -56,41 +55,39 @@ namespace Test { class openmp : public ::testing::Test { -protected: - static void SetUpTestCase() - { + protected: + static void SetUpTestCase() { std::cout << std::setprecision(5) << std::scientific; } - static void TearDownTestCase() - { - } + static void TearDownTestCase() {} }; -#define OPENMP_RANDOM_XORSHIFT64( num_draws ) \ - TEST_F( openmp, Random_XorShift64 ) { \ - Impl::test_random >(num_draws); \ +#define OPENMP_RANDOM_XORSHIFT64(num_draws) \ + TEST_F(openmp, Random_XorShift64) { \ + Impl::test_random >( \ + num_draws); \ } -#define OPENMP_RANDOM_XORSHIFT1024( num_draws ) \ - TEST_F( openmp, Random_XorShift1024 ) { \ - Impl::test_random >(num_draws); \ +#define OPENMP_RANDOM_XORSHIFT1024(num_draws) \ + TEST_F(openmp, Random_XorShift1024) { \ + Impl::test_random >( \ + num_draws); \ } -#define OPENMP_SORT_UNSIGNED( size ) \ - TEST_F( openmp, SortUnsigned ) { \ - Impl::test_sort< Kokkos::OpenMP, unsigned >(size); \ +#define OPENMP_SORT_UNSIGNED(size) \ + TEST_F(openmp, SortUnsigned) { \ + Impl::test_sort(size); \ } -OPENMP_RANDOM_XORSHIFT64( 10240000 ) -OPENMP_RANDOM_XORSHIFT1024( 10130144 ) +OPENMP_RANDOM_XORSHIFT64(10240000) +OPENMP_RANDOM_XORSHIFT1024(10130144) OPENMP_SORT_UNSIGNED(171) #undef OPENMP_RANDOM_XORSHIFT64 #undef OPENMP_RANDOM_XORSHIFT1024 #undef OPENMP_SORT_UNSIGNED -} // namespace test +} // namespace Test #else void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {} #endif - diff --git a/algorithms/unit_tests/TestROCm.cpp b/algorithms/unit_tests/TestROCm.cpp index 15179509bbf..0bf242319f6 100644 --- a/algorithms/unit_tests/TestROCm.cpp +++ b/algorithms/unit_tests/TestROCm.cpp @@ -58,51 +58,44 @@ namespace Test { class rocm : public ::testing::Test { -protected: - static void SetUpTestCase() - { + protected: + static void SetUpTestCase() { std::cout << std::setprecision(5) << std::scientific; } - static void TearDownTestCase() - { - } + static void TearDownTestCase() {} }; -void rocm_test_random_xorshift64( int num_draws ) -{ - Impl::test_random >(num_draws); +void rocm_test_random_xorshift64(int num_draws) { + Impl::test_random< + Kokkos::Random_XorShift64_Pool >(num_draws); } -void rocm_test_random_xorshift1024( int num_draws ) -{ - Impl::test_random >(num_draws); +void rocm_test_random_xorshift1024(int num_draws) { + Impl::test_random< + Kokkos::Random_XorShift1024_Pool >(num_draws); } +#define ROCM_RANDOM_XORSHIFT64(num_draws) \ + TEST_F(rocm, Random_XorShift64) { rocm_test_random_xorshift64(num_draws); } -#define ROCM_RANDOM_XORSHIFT64( num_draws ) \ - TEST_F( rocm, Random_XorShift64 ) { \ - rocm_test_random_xorshift64(num_draws); \ - } - -#define ROCM_RANDOM_XORSHIFT1024( num_draws ) \ - TEST_F( rocm, Random_XorShift1024 ) { \ - rocm_test_random_xorshift1024(num_draws); \ +#define ROCM_RANDOM_XORSHIFT1024(num_draws) \ + TEST_F(rocm, Random_XorShift1024) { \ + rocm_test_random_xorshift1024(num_draws); \ } -#define ROCM_SORT_UNSIGNED( size ) \ - TEST_F( rocm, SortUnsigned ) { \ - Impl::test_sort< Kokkos::Experimental::ROCm, unsigned >(size); \ +#define ROCM_SORT_UNSIGNED(size) \ + TEST_F(rocm, SortUnsigned) { \ + Impl::test_sort(size); \ } -ROCM_RANDOM_XORSHIFT64( 132141141 ) -ROCM_RANDOM_XORSHIFT1024( 52428813 ) +ROCM_RANDOM_XORSHIFT64(132141141) +ROCM_RANDOM_XORSHIFT1024(52428813) ROCM_SORT_UNSIGNED(171) #undef ROCM_RANDOM_XORSHIFT64 #undef ROCM_RANDOM_XORSHIFT1024 #undef ROCM_SORT_UNSIGNED -} +} // namespace Test #else void KOKKOS_ALGORITHMS_UNITTESTS_TESTROCM_PREVENT_LINK_ERROR() {} -#endif /* #ifdef KOKKOS_ENABLE_ROCM */ - +#endif /* #ifdef KOKKOS_ENABLE_ROCM */ diff --git a/algorithms/unit_tests/TestRandom.hpp b/algorithms/unit_tests/TestRandom.hpp index 73bd416f2ab..74eeaa4b088 100644 --- a/algorithms/unit_tests/TestRandom.hpp +++ b/algorithms/unit_tests/TestRandom.hpp @@ -54,18 +54,19 @@ namespace Test { -namespace Impl{ +namespace Impl { // This test runs the random number generators and uses some statistic tests to // check the 'goodness' of the random numbers: // (i) mean: the mean is expected to be 0.5*RAND_MAX // (ii) variance: the variance is 1/3*mean*mean // (iii) covariance: the covariance is 0 -// (iv) 1-tupledistr: the mean, variance and covariance of a 1D Histrogram of random numbers -// (v) 3-tupledistr: the mean, variance and covariance of a 3D Histrogram of random numbers +// (iv) 1-tupledistr: the mean, variance and covariance of a 1D Histrogram +// of random numbers (v) 3-tupledistr: the mean, variance and covariance of +// a 3D Histrogram of random numbers #define HIST_DIM3D 24 -#define HIST_DIM1D (HIST_DIM3D*HIST_DIM3D*HIST_DIM3D) +#define HIST_DIM1D (HIST_DIM3D * HIST_DIM3D * HIST_DIM3D) struct RandomProperties { uint64_t count; @@ -77,37 +78,37 @@ struct RandomProperties { KOKKOS_INLINE_FUNCTION RandomProperties() { - count = 0; - mean = 0.0; - variance = 0.0; + count = 0; + mean = 0.0; + variance = 0.0; covariance = 0.0; - min = 1e64; - max = -1e64; + min = 1e64; + max = -1e64; } KOKKOS_INLINE_FUNCTION RandomProperties& operator+=(const RandomProperties& add) { - count += add.count; - mean += add.mean; - variance += add.variance; + count += add.count; + mean += add.mean; + variance += add.variance; covariance += add.covariance; - min = add.minmax?add.max:max; + min = add.min < min ? add.min : min; + max = add.max > max ? add.max : max; return *this; } KOKKOS_INLINE_FUNCTION void operator+=(const volatile RandomProperties& add) volatile { - count += add.count; - mean += add.mean; - variance += add.variance; + count += add.count; + mean += add.mean; + variance += add.variance; covariance += add.covariance; - min = add.minmax?add.max:max; + min = add.min < min ? add.min : min; + max = add.max > max ? add.max : max; } }; -template +template struct test_random_functor { typedef typename GeneratorPool::generator_type rnd_type; @@ -123,38 +124,40 @@ struct test_random_functor { // implementations might violate this upper bound, due to rounding // error. Just in case, we leave an extra space at the end of each // dimension, in the View types below. - typedef Kokkos::View type_1d; + typedef Kokkos::View + type_1d; type_1d density_1d; - typedef Kokkos::View type_3d; + typedef Kokkos::View + type_3d; type_3d density_3d; - test_random_functor (GeneratorPool rand_pool_, type_1d d1d, type_3d d3d) : - rand_pool (rand_pool_), - mean (0.5*Kokkos::rand::max ()), - density_1d (d1d), - density_3d (d3d) - {} + test_random_functor(GeneratorPool rand_pool_, type_1d d1d, type_3d d3d) + : rand_pool(rand_pool_), + mean(0.5 * Kokkos::rand::max()), + density_1d(d1d), + density_3d(d3d) {} KOKKOS_INLINE_FUNCTION - void operator() (int i, RandomProperties& prop) const { + void operator()(int i, RandomProperties& prop) const { using Kokkos::atomic_fetch_add; rnd_type rand_gen = rand_pool.get_state(); for (int k = 0; k < 1024; ++k) { - const Scalar tmp = Kokkos::rand::draw(rand_gen); + const Scalar tmp = Kokkos::rand::draw(rand_gen); prop.count++; prop.mean += tmp; - prop.variance += (tmp-mean)*(tmp-mean); - const Scalar tmp2 = Kokkos::rand::draw(rand_gen); + prop.variance += (tmp - mean) * (tmp - mean); + const Scalar tmp2 = Kokkos::rand::draw(rand_gen); prop.count++; prop.mean += tmp2; - prop.variance += (tmp2-mean)*(tmp2-mean); - prop.covariance += (tmp-mean)*(tmp2-mean); - const Scalar tmp3 = Kokkos::rand::draw(rand_gen); + prop.variance += (tmp2 - mean) * (tmp2 - mean); + prop.covariance += (tmp - mean) * (tmp2 - mean); + const Scalar tmp3 = Kokkos::rand::draw(rand_gen); prop.count++; prop.mean += tmp3; - prop.variance += (tmp3-mean)*(tmp3-mean); - prop.covariance += (tmp2-mean)*(tmp3-mean); + prop.variance += (tmp3 - mean) * (tmp3 - mean); + prop.covariance += (tmp2 - mean) * (tmp3 - mean); // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to // define an exclusive upper bound on the range of random @@ -169,26 +172,32 @@ struct test_random_functor { // returns values of max(), the histograms will still catch this // indirectly, since none of the other values will be filled in. - const Scalar theMax = Kokkos::rand::max (); - - const uint64_t ind1_1d = static_cast (1.0 * HIST_DIM1D * tmp / theMax); - const uint64_t ind2_1d = static_cast (1.0 * HIST_DIM1D * tmp2 / theMax); - const uint64_t ind3_1d = static_cast (1.0 * HIST_DIM1D * tmp3 / theMax); - - const uint64_t ind1_3d = static_cast (1.0 * HIST_DIM3D * tmp / theMax); - const uint64_t ind2_3d = static_cast (1.0 * HIST_DIM3D * tmp2 / theMax); - const uint64_t ind3_3d = static_cast (1.0 * HIST_DIM3D * tmp3 / theMax); - - atomic_fetch_add (&density_1d(ind1_1d), 1); - atomic_fetch_add (&density_1d(ind2_1d), 1); - atomic_fetch_add (&density_1d(ind3_1d), 1); - atomic_fetch_add (&density_3d(ind1_3d, ind2_3d, ind3_3d), 1); + const Scalar theMax = Kokkos::rand::max(); + + const uint64_t ind1_1d = + static_cast(1.0 * HIST_DIM1D * tmp / theMax); + const uint64_t ind2_1d = + static_cast(1.0 * HIST_DIM1D * tmp2 / theMax); + const uint64_t ind3_1d = + static_cast(1.0 * HIST_DIM1D * tmp3 / theMax); + + const uint64_t ind1_3d = + static_cast(1.0 * HIST_DIM3D * tmp / theMax); + const uint64_t ind2_3d = + static_cast(1.0 * HIST_DIM3D * tmp2 / theMax); + const uint64_t ind3_3d = + static_cast(1.0 * HIST_DIM3D * tmp3 / theMax); + + atomic_fetch_add(&density_1d(ind1_1d), 1); + atomic_fetch_add(&density_1d(ind2_1d), 1); + atomic_fetch_add(&density_1d(ind3_1d), 1); + atomic_fetch_add(&density_3d(ind1_3d, ind2_3d, ind3_3d), 1); } rand_pool.free_state(rand_gen); } }; -template +template struct test_histogram1d_functor { typedef RandomProperties value_type; typedef typename DeviceType::execution_space execution_space; @@ -200,34 +209,29 @@ struct test_histogram1d_functor { // implementations might violate this upper bound, due to rounding // error. Just in case, we leave an extra space at the end of each // dimension, in the View type below. - typedef Kokkos::View type_1d; + typedef Kokkos::View type_1d; type_1d density_1d; double mean; - test_histogram1d_functor (type_1d d1d, int num_draws) : - density_1d (d1d), - mean (1.0*num_draws/HIST_DIM1D*3) - { - } + test_histogram1d_functor(type_1d d1d, int num_draws) + : density_1d(d1d), mean(1.0 * num_draws / HIST_DIM1D * 3) {} - KOKKOS_INLINE_FUNCTION void - operator() (const typename memory_space::size_type i, - RandomProperties& prop) const - { + KOKKOS_INLINE_FUNCTION void operator()( + const typename memory_space::size_type i, RandomProperties& prop) const { typedef typename memory_space::size_type size_type; const double count = density_1d(i); prop.mean += count; prop.variance += 1.0 * (count - mean) * (count - mean); - //prop.covariance += 1.0*count*count; + // prop.covariance += 1.0*count*count; prop.min = count < prop.min ? count : prop.min; prop.max = count > prop.max ? count : prop.max; - if (i < static_cast (HIST_DIM1D-1)) { - prop.covariance += (count - mean) * (density_1d(i+1) - mean); + if (i < static_cast(HIST_DIM1D - 1)) { + prop.covariance += (count - mean) * (density_1d(i + 1) - mean); } } }; -template +template struct test_histogram3d_functor { typedef RandomProperties value_type; typedef typename DeviceType::execution_space execution_space; @@ -239,29 +243,28 @@ struct test_histogram3d_functor { // implementations might violate this upper bound, due to rounding // error. Just in case, we leave an extra space at the end of each // dimension, in the View type below. - typedef Kokkos::View type_3d; + typedef Kokkos::View + type_3d; type_3d density_3d; double mean; - test_histogram3d_functor (type_3d d3d, int num_draws) : - density_3d (d3d), - mean (1.0*num_draws/HIST_DIM1D) - {} + test_histogram3d_functor(type_3d d3d, int num_draws) + : density_3d(d3d), mean(1.0 * num_draws / HIST_DIM1D) {} - KOKKOS_INLINE_FUNCTION void - operator() (const typename memory_space::size_type i, - RandomProperties& prop) const - { + KOKKOS_INLINE_FUNCTION void operator()( + const typename memory_space::size_type i, RandomProperties& prop) const { typedef typename memory_space::size_type size_type; - const double count = density_3d(i/(HIST_DIM3D*HIST_DIM3D), - (i % (HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D, - i % HIST_DIM3D); + const double count = density_3d( + i / (HIST_DIM3D * HIST_DIM3D), + (i % (HIST_DIM3D * HIST_DIM3D)) / HIST_DIM3D, i % HIST_DIM3D); prop.mean += count; prop.variance += (count - mean) * (count - mean); - if (i < static_cast (HIST_DIM1D-1)) { - const double count_next = density_3d((i+1)/(HIST_DIM3D*HIST_DIM3D), - ((i+1)%(HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D, - (i+1)%HIST_DIM3D); + if (i < static_cast(HIST_DIM1D - 1)) { + const double count_next = + density_3d((i + 1) / (HIST_DIM3D * HIST_DIM3D), + ((i + 1) % (HIST_DIM3D * HIST_DIM3D)) / HIST_DIM3D, + (i + 1) % HIST_DIM3D); prop.covariance += (count - mean) * (count_next - mean); } } @@ -270,212 +273,223 @@ struct test_histogram3d_functor { // // Templated test that uses the above functors. // -template +template struct test_random_scalar { typedef typename RandomGenerator::generator_type rnd_type; - int pass_mean,pass_var,pass_covar; - int pass_hist1d_mean,pass_hist1d_var,pass_hist1d_covar; - int pass_hist3d_mean,pass_hist3d_var,pass_hist3d_covar; + int pass_mean, pass_var, pass_covar; + int pass_hist1d_mean, pass_hist1d_var, pass_hist1d_covar; + int pass_hist3d_mean, pass_hist3d_var, pass_hist3d_covar; - test_random_scalar (typename test_random_functor::type_1d& density_1d, - typename test_random_functor::type_3d& density_3d, - RandomGenerator& pool, - unsigned int num_draws) - { + test_random_scalar( + typename test_random_functor::type_1d& density_1d, + typename test_random_functor::type_3d& density_3d, + RandomGenerator& pool, unsigned int num_draws) { + using Kokkos::parallel_reduce; using std::cout; using std::endl; - using Kokkos::parallel_reduce; { cout << " -- Testing randomness properties" << endl; RandomProperties result; typedef test_random_functor functor_type; - parallel_reduce (num_draws/1024, functor_type (pool, density_1d, density_3d), result); - - //printf("Result: %lf %lf %lf\n",result.mean/num_draws/3,result.variance/num_draws/3,result.covariance/num_draws/2); - double tolerance = 1.6*std::sqrt(1.0/num_draws); - double mean_expect = 0.5*Kokkos::rand::max(); - double variance_expect = 1.0/3.0*mean_expect*mean_expect; - double mean_eps = mean_expect/(result.mean/num_draws/3)-1.0; - double variance_eps = variance_expect/(result.variance/num_draws/3)-1.0; - double covariance_eps = result.covariance/num_draws/2/variance_expect; - pass_mean = ((-tolerance < mean_eps) && - ( tolerance > mean_eps)) ? 1:0; - pass_var = ((-1.5*tolerance < variance_eps) && - ( 1.5*tolerance > variance_eps)) ? 1:0; - pass_covar = ((-2.0*tolerance < covariance_eps) && - ( 2.0*tolerance > covariance_eps)) ? 1:0; - cout << "Pass: " << pass_mean - << " " << pass_var - << " " << mean_eps - << " " << variance_eps - << " " << covariance_eps - << " || " << tolerance << endl; + parallel_reduce(num_draws / 1024, + functor_type(pool, density_1d, density_3d), result); + + // printf("Result: %lf %lf + // %lf\n",result.mean/num_draws/3,result.variance/num_draws/3,result.covariance/num_draws/2); + double tolerance = 1.6 * std::sqrt(1.0 / num_draws); + double mean_expect = 0.5 * Kokkos::rand::max(); + double variance_expect = 1.0 / 3.0 * mean_expect * mean_expect; + double mean_eps = mean_expect / (result.mean / num_draws / 3) - 1.0; + double variance_eps = + variance_expect / (result.variance / num_draws / 3) - 1.0; + double covariance_eps = + result.covariance / num_draws / 2 / variance_expect; + pass_mean = ((-tolerance < mean_eps) && (tolerance > mean_eps)) ? 1 : 0; + pass_var = ((-1.5 * tolerance < variance_eps) && + (1.5 * tolerance > variance_eps)) + ? 1 + : 0; + pass_covar = ((-2.0 * tolerance < covariance_eps) && + (2.0 * tolerance > covariance_eps)) + ? 1 + : 0; + cout << "Pass: " << pass_mean << " " << pass_var << " " << mean_eps << " " + << variance_eps << " " << covariance_eps << " || " << tolerance + << endl; } { cout << " -- Testing 1-D histogram" << endl; RandomProperties result; - typedef test_histogram1d_functor functor_type; - parallel_reduce (HIST_DIM1D, functor_type (density_1d, num_draws), result); - - double tolerance = 6*std::sqrt(1.0/HIST_DIM1D); - double mean_expect = 1.0*num_draws*3/HIST_DIM1D; - double variance_expect = 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D); - double covariance_expect = -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D; - double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0; - double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0; - double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect; - pass_hist1d_mean = ((-0.0001 < mean_eps) && - ( 0.0001 > mean_eps)) ? 1:0; - pass_hist1d_var = ((-0.07 < variance_eps) && - ( 0.07 > variance_eps)) ? 1:0; - pass_hist1d_covar = ((-0.06 < covariance_eps) && - ( 0.06 > covariance_eps)) ? 1:0; - - cout << "Density 1D: " << mean_eps - << " " << variance_eps - << " " << (result.covariance/HIST_DIM1D/HIST_DIM1D) - << " || " << tolerance - << " " << result.min - << " " << result.max - << " || " << result.variance/HIST_DIM1D - << " " << 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D) - << " || " << result.covariance/HIST_DIM1D - << " " << -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D - << endl; + typedef test_histogram1d_functor + functor_type; + parallel_reduce(HIST_DIM1D, functor_type(density_1d, num_draws), result); + + double tolerance = 6 * std::sqrt(1.0 / HIST_DIM1D); + double mean_expect = 1.0 * num_draws * 3 / HIST_DIM1D; + double variance_expect = + 1.0 * num_draws * 3 / HIST_DIM1D * (1.0 - 1.0 / HIST_DIM1D); + double covariance_expect = -1.0 * num_draws * 3 / HIST_DIM1D / HIST_DIM1D; + double mean_eps = mean_expect / (result.mean / HIST_DIM1D) - 1.0; + double variance_eps = + variance_expect / (result.variance / HIST_DIM1D) - 1.0; + double covariance_eps = + (result.covariance / HIST_DIM1D - covariance_expect) / mean_expect; + pass_hist1d_mean = ((-0.0001 < mean_eps) && (0.0001 > mean_eps)) ? 1 : 0; + pass_hist1d_var = + ((-0.07 < variance_eps) && (0.07 > variance_eps)) ? 1 : 0; + pass_hist1d_covar = + ((-0.06 < covariance_eps) && (0.06 > covariance_eps)) ? 1 : 0; + + cout << "Density 1D: " << mean_eps << " " << variance_eps << " " + << (result.covariance / HIST_DIM1D / HIST_DIM1D) << " || " + << tolerance << " " << result.min << " " << result.max << " || " + << result.variance / HIST_DIM1D << " " + << 1.0 * num_draws * 3 / HIST_DIM1D * (1.0 - 1.0 / HIST_DIM1D) + << " || " << result.covariance / HIST_DIM1D << " " + << -1.0 * num_draws * 3 / HIST_DIM1D / HIST_DIM1D << endl; } { cout << " -- Testing 3-D histogram" << endl; RandomProperties result; - typedef test_histogram3d_functor functor_type; - parallel_reduce (HIST_DIM1D, functor_type (density_3d, num_draws), result); - - double tolerance = 6*std::sqrt(1.0/HIST_DIM1D); - double mean_expect = 1.0*num_draws/HIST_DIM1D; - double variance_expect = 1.0*num_draws/HIST_DIM1D*(1.0-1.0/HIST_DIM1D); - double covariance_expect = -1.0*num_draws/HIST_DIM1D/HIST_DIM1D; - double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0; - double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0; - double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect; - pass_hist3d_mean = ((-tolerance < mean_eps) && - ( tolerance > mean_eps)) ? 1:0; - pass_hist3d_var = ((-1.2*tolerance < variance_eps) && - ( 1.2*tolerance > variance_eps)) ? 1:0; - pass_hist3d_covar = ((-tolerance < covariance_eps) && - ( tolerance > covariance_eps)) ? 1:0; - - cout << "Density 3D: " << mean_eps - << " " << variance_eps - << " " << result.covariance/HIST_DIM1D/HIST_DIM1D - << " || " << tolerance - << " " << result.min - << " " << result.max << endl; + typedef test_histogram3d_functor + functor_type; + parallel_reduce(HIST_DIM1D, functor_type(density_3d, num_draws), result); + + double tolerance = 6 * std::sqrt(1.0 / HIST_DIM1D); + double mean_expect = 1.0 * num_draws / HIST_DIM1D; + double variance_expect = + 1.0 * num_draws / HIST_DIM1D * (1.0 - 1.0 / HIST_DIM1D); + double covariance_expect = -1.0 * num_draws / HIST_DIM1D / HIST_DIM1D; + double mean_eps = mean_expect / (result.mean / HIST_DIM1D) - 1.0; + double variance_eps = + variance_expect / (result.variance / HIST_DIM1D) - 1.0; + double covariance_eps = + (result.covariance / HIST_DIM1D - covariance_expect) / mean_expect; + pass_hist3d_mean = + ((-tolerance < mean_eps) && (tolerance > mean_eps)) ? 1 : 0; + pass_hist3d_var = ((-1.2 * tolerance < variance_eps) && + (1.2 * tolerance > variance_eps)) + ? 1 + : 0; + pass_hist3d_covar = + ((-tolerance < covariance_eps) && (tolerance > covariance_eps)) ? 1 + : 0; + + cout << "Density 3D: " << mean_eps << " " << variance_eps << " " + << result.covariance / HIST_DIM1D / HIST_DIM1D << " || " << tolerance + << " " << result.min << " " << result.max << endl; } } }; template -void test_random(unsigned int num_draws) -{ +void test_random(unsigned int num_draws) { using std::cout; using std::endl; - typename test_random_functor::type_1d density_1d("D1d"); - typename test_random_functor::type_3d density_3d("D3d"); + typename test_random_functor::type_1d density_1d("D1d"); + typename test_random_functor::type_3d density_3d("D3d"); - - uint64_t ticks = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + uint64_t ticks = + std::chrono::high_resolution_clock::now().time_since_epoch().count(); cout << "Test Seed:" << ticks << endl; RandomGenerator pool(ticks); cout << "Test Scalar=int" << endl; - test_random_scalar test_int(density_1d,density_3d,pool,num_draws); - ASSERT_EQ( test_int.pass_mean,1); - ASSERT_EQ( test_int.pass_var,1); - ASSERT_EQ( test_int.pass_covar,1); - ASSERT_EQ( test_int.pass_hist1d_mean,1); - ASSERT_EQ( test_int.pass_hist1d_var,1); - ASSERT_EQ( test_int.pass_hist1d_covar,1); - ASSERT_EQ( test_int.pass_hist3d_mean,1); - ASSERT_EQ( test_int.pass_hist3d_var,1); - ASSERT_EQ( test_int.pass_hist3d_covar,1); - deep_copy(density_1d,0); - deep_copy(density_3d,0); + test_random_scalar test_int(density_1d, density_3d, + pool, num_draws); + ASSERT_EQ(test_int.pass_mean, 1); + ASSERT_EQ(test_int.pass_var, 1); + ASSERT_EQ(test_int.pass_covar, 1); + ASSERT_EQ(test_int.pass_hist1d_mean, 1); + ASSERT_EQ(test_int.pass_hist1d_var, 1); + ASSERT_EQ(test_int.pass_hist1d_covar, 1); + ASSERT_EQ(test_int.pass_hist3d_mean, 1); + ASSERT_EQ(test_int.pass_hist3d_var, 1); + ASSERT_EQ(test_int.pass_hist3d_covar, 1); + deep_copy(density_1d, 0); + deep_copy(density_3d, 0); cout << "Test Scalar=unsigned int" << endl; - test_random_scalar test_uint(density_1d,density_3d,pool,num_draws); - ASSERT_EQ( test_uint.pass_mean,1); - ASSERT_EQ( test_uint.pass_var,1); - ASSERT_EQ( test_uint.pass_covar,1); - ASSERT_EQ( test_uint.pass_hist1d_mean,1); - ASSERT_EQ( test_uint.pass_hist1d_var,1); - ASSERT_EQ( test_uint.pass_hist1d_covar,1); - ASSERT_EQ( test_uint.pass_hist3d_mean,1); - ASSERT_EQ( test_uint.pass_hist3d_var,1); - ASSERT_EQ( test_uint.pass_hist3d_covar,1); - deep_copy(density_1d,0); - deep_copy(density_3d,0); + test_random_scalar test_uint( + density_1d, density_3d, pool, num_draws); + ASSERT_EQ(test_uint.pass_mean, 1); + ASSERT_EQ(test_uint.pass_var, 1); + ASSERT_EQ(test_uint.pass_covar, 1); + ASSERT_EQ(test_uint.pass_hist1d_mean, 1); + ASSERT_EQ(test_uint.pass_hist1d_var, 1); + ASSERT_EQ(test_uint.pass_hist1d_covar, 1); + ASSERT_EQ(test_uint.pass_hist3d_mean, 1); + ASSERT_EQ(test_uint.pass_hist3d_var, 1); + ASSERT_EQ(test_uint.pass_hist3d_covar, 1); + deep_copy(density_1d, 0); + deep_copy(density_3d, 0); cout << "Test Scalar=int64_t" << endl; - test_random_scalar test_int64(density_1d,density_3d,pool,num_draws); - ASSERT_EQ( test_int64.pass_mean,1); - ASSERT_EQ( test_int64.pass_var,1); - ASSERT_EQ( test_int64.pass_covar,1); - ASSERT_EQ( test_int64.pass_hist1d_mean,1); - ASSERT_EQ( test_int64.pass_hist1d_var,1); - ASSERT_EQ( test_int64.pass_hist1d_covar,1); - ASSERT_EQ( test_int64.pass_hist3d_mean,1); - ASSERT_EQ( test_int64.pass_hist3d_var,1); - ASSERT_EQ( test_int64.pass_hist3d_covar,1); - deep_copy(density_1d,0); - deep_copy(density_3d,0); + test_random_scalar test_int64( + density_1d, density_3d, pool, num_draws); + ASSERT_EQ(test_int64.pass_mean, 1); + ASSERT_EQ(test_int64.pass_var, 1); + ASSERT_EQ(test_int64.pass_covar, 1); + ASSERT_EQ(test_int64.pass_hist1d_mean, 1); + ASSERT_EQ(test_int64.pass_hist1d_var, 1); + ASSERT_EQ(test_int64.pass_hist1d_covar, 1); + ASSERT_EQ(test_int64.pass_hist3d_mean, 1); + ASSERT_EQ(test_int64.pass_hist3d_var, 1); + ASSERT_EQ(test_int64.pass_hist3d_covar, 1); + deep_copy(density_1d, 0); + deep_copy(density_3d, 0); cout << "Test Scalar=uint64_t" << endl; - test_random_scalar test_uint64(density_1d,density_3d,pool,num_draws); - ASSERT_EQ( test_uint64.pass_mean,1); - ASSERT_EQ( test_uint64.pass_var,1); - ASSERT_EQ( test_uint64.pass_covar,1); - ASSERT_EQ( test_uint64.pass_hist1d_mean,1); - ASSERT_EQ( test_uint64.pass_hist1d_var,1); - ASSERT_EQ( test_uint64.pass_hist1d_covar,1); - ASSERT_EQ( test_uint64.pass_hist3d_mean,1); - ASSERT_EQ( test_uint64.pass_hist3d_var,1); - ASSERT_EQ( test_uint64.pass_hist3d_covar,1); - deep_copy(density_1d,0); - deep_copy(density_3d,0); + test_random_scalar test_uint64( + density_1d, density_3d, pool, num_draws); + ASSERT_EQ(test_uint64.pass_mean, 1); + ASSERT_EQ(test_uint64.pass_var, 1); + ASSERT_EQ(test_uint64.pass_covar, 1); + ASSERT_EQ(test_uint64.pass_hist1d_mean, 1); + ASSERT_EQ(test_uint64.pass_hist1d_var, 1); + ASSERT_EQ(test_uint64.pass_hist1d_covar, 1); + ASSERT_EQ(test_uint64.pass_hist3d_mean, 1); + ASSERT_EQ(test_uint64.pass_hist3d_var, 1); + ASSERT_EQ(test_uint64.pass_hist3d_covar, 1); + deep_copy(density_1d, 0); + deep_copy(density_3d, 0); cout << "Test Scalar=float" << endl; - test_random_scalar test_float(density_1d,density_3d,pool,num_draws); - ASSERT_EQ( test_float.pass_mean,1); - ASSERT_EQ( test_float.pass_var,1); - ASSERT_EQ( test_float.pass_covar,1); - ASSERT_EQ( test_float.pass_hist1d_mean,1); - ASSERT_EQ( test_float.pass_hist1d_var,1); - ASSERT_EQ( test_float.pass_hist1d_covar,1); - ASSERT_EQ( test_float.pass_hist3d_mean,1); - ASSERT_EQ( test_float.pass_hist3d_var,1); - ASSERT_EQ( test_float.pass_hist3d_covar,1); - deep_copy(density_1d,0); - deep_copy(density_3d,0); + test_random_scalar test_float(density_1d, density_3d, + pool, num_draws); + ASSERT_EQ(test_float.pass_mean, 1); + ASSERT_EQ(test_float.pass_var, 1); + ASSERT_EQ(test_float.pass_covar, 1); + ASSERT_EQ(test_float.pass_hist1d_mean, 1); + ASSERT_EQ(test_float.pass_hist1d_var, 1); + ASSERT_EQ(test_float.pass_hist1d_covar, 1); + ASSERT_EQ(test_float.pass_hist3d_mean, 1); + ASSERT_EQ(test_float.pass_hist3d_var, 1); + ASSERT_EQ(test_float.pass_hist3d_covar, 1); + deep_copy(density_1d, 0); + deep_copy(density_3d, 0); cout << "Test Scalar=double" << endl; - test_random_scalar test_double(density_1d,density_3d,pool,num_draws); - ASSERT_EQ( test_double.pass_mean,1); - ASSERT_EQ( test_double.pass_var,1); - ASSERT_EQ( test_double.pass_covar,1); - ASSERT_EQ( test_double.pass_hist1d_mean,1); - ASSERT_EQ( test_double.pass_hist1d_var,1); - ASSERT_EQ( test_double.pass_hist1d_covar,1); - ASSERT_EQ( test_double.pass_hist3d_mean,1); - ASSERT_EQ( test_double.pass_hist3d_var,1); - ASSERT_EQ( test_double.pass_hist3d_covar,1); -} + test_random_scalar test_double( + density_1d, density_3d, pool, num_draws); + ASSERT_EQ(test_double.pass_mean, 1); + ASSERT_EQ(test_double.pass_var, 1); + ASSERT_EQ(test_double.pass_covar, 1); + ASSERT_EQ(test_double.pass_hist1d_mean, 1); + ASSERT_EQ(test_double.pass_hist1d_var, 1); + ASSERT_EQ(test_double.pass_hist1d_covar, 1); + ASSERT_EQ(test_double.pass_hist3d_mean, 1); + ASSERT_EQ(test_double.pass_hist3d_var, 1); + ASSERT_EQ(test_double.pass_hist3d_covar, 1); } +} // namespace Impl -} // namespace Test +} // namespace Test -#endif //KOKKOS_TEST_UNORDERED_MAP_HPP +#endif // KOKKOS_TEST_UNORDERED_MAP_HPP diff --git a/algorithms/unit_tests/TestSerial.cpp b/algorithms/unit_tests/TestSerial.cpp index 9cf998f7732..955a22ed643 100644 --- a/algorithms/unit_tests/TestSerial.cpp +++ b/algorithms/unit_tests/TestSerial.cpp @@ -52,49 +52,43 @@ #include #include - //---------------------------------------------------------------------------- - namespace Test { class serial : public ::testing::Test { -protected: - static void SetUpTestCase() - { - } + protected: + static void SetUpTestCase() {} - static void TearDownTestCase () - { - } + static void TearDownTestCase() {} }; -#define SERIAL_RANDOM_XORSHIFT64( num_draws ) \ - TEST_F( serial, Random_XorShift64 ) { \ - Impl::test_random >(num_draws); \ +#define SERIAL_RANDOM_XORSHIFT64(num_draws) \ + TEST_F(serial, Random_XorShift64) { \ + Impl::test_random >( \ + num_draws); \ } -#define SERIAL_RANDOM_XORSHIFT1024( num_draws ) \ - TEST_F( serial, Random_XorShift1024 ) { \ - Impl::test_random >(num_draws); \ +#define SERIAL_RANDOM_XORSHIFT1024(num_draws) \ + TEST_F(serial, Random_XorShift1024) { \ + Impl::test_random >( \ + num_draws); \ } -#define SERIAL_SORT_UNSIGNED( size ) \ - TEST_F( serial, SortUnsigned ) { \ - Impl::test_sort< Kokkos::Serial, unsigned >(size); \ +#define SERIAL_SORT_UNSIGNED(size) \ + TEST_F(serial, SortUnsigned) { \ + Impl::test_sort(size); \ } -SERIAL_RANDOM_XORSHIFT64( 10240000 ) -SERIAL_RANDOM_XORSHIFT1024( 10130144 ) +SERIAL_RANDOM_XORSHIFT64(10240000) +SERIAL_RANDOM_XORSHIFT1024(10130144) SERIAL_SORT_UNSIGNED(171) #undef SERIAL_RANDOM_XORSHIFT64 #undef SERIAL_RANDOM_XORSHIFT1024 #undef SERIAL_SORT_UNSIGNED -} // namespace Test +} // namespace Test #else void KOKKOS_ALGORITHMS_UNITTESTS_TESTSERIAL_PREVENT_LINK_ERROR() {} -#endif // KOKKOS_ENABLE_SERIAL - - +#endif // KOKKOS_ENABLE_SERIAL diff --git a/algorithms/unit_tests/TestSort.hpp b/algorithms/unit_tests/TestSort.hpp index 5fd7f09b50e..97b2cbea465 100644 --- a/algorithms/unit_tests/TestSort.hpp +++ b/algorithms/unit_tests/TestSort.hpp @@ -43,235 +43,248 @@ #define KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP #include -#include -#include -#include -#include +#include +#include +#include +#include namespace Test { -namespace Impl{ +namespace Impl { -template +template struct is_sorted_struct { typedef unsigned int value_type; typedef ExecutionSpace execution_space; - Kokkos::View keys; + Kokkos::View keys; - is_sorted_struct(Kokkos::View keys_):keys(keys_) {} + is_sorted_struct(Kokkos::View keys_) : keys(keys_) {} KOKKOS_INLINE_FUNCTION - void operator() (int i, unsigned int& count) const { - if(keys(i)>keys(i+1)) count++; + void operator()(int i, unsigned int& count) const { + if (keys(i) > keys(i + 1)) count++; } }; -template +template struct sum { typedef double value_type; typedef ExecutionSpace execution_space; - Kokkos::View keys; + Kokkos::View keys; - sum(Kokkos::View keys_):keys(keys_) {} + sum(Kokkos::View keys_) : keys(keys_) {} KOKKOS_INLINE_FUNCTION - void operator() (int i, double& count) const { - count+=keys(i); - } + void operator()(int i, double& count) const { count += keys(i); } }; -template +template struct bin3d_is_sorted_struct { typedef unsigned int value_type; typedef ExecutionSpace execution_space; - Kokkos::View keys; + Kokkos::View keys; int max_bins; Scalar min; Scalar max; - bin3d_is_sorted_struct(Kokkos::View keys_,int max_bins_,Scalar min_,Scalar max_): - keys(keys_),max_bins(max_bins_),min(min_),max(max_) { - } + bin3d_is_sorted_struct(Kokkos::View keys_, + int max_bins_, Scalar min_, Scalar max_) + : keys(keys_), max_bins(max_bins_), min(min_), max(max_) {} KOKKOS_INLINE_FUNCTION - void operator() (int i, unsigned int& count) const { - int ix1 = int ((keys(i,0)-min)/max * max_bins); - int iy1 = int ((keys(i,1)-min)/max * max_bins); - int iz1 = int ((keys(i,2)-min)/max * max_bins); - int ix2 = int ((keys(i+1,0)-min)/max * max_bins); - int iy2 = int ((keys(i+1,1)-min)/max * max_bins); - int iz2 = int ((keys(i+1,2)-min)/max * max_bins); - - if (ix1>ix2) count++; - else if(ix1==ix2) { - if (iy1>iy2) count++; - else if ((iy1==iy2) && (iz1>iz2)) count++; + void operator()(int i, unsigned int& count) const { + int ix1 = int((keys(i, 0) - min) / max * max_bins); + int iy1 = int((keys(i, 1) - min) / max * max_bins); + int iz1 = int((keys(i, 2) - min) / max * max_bins); + int ix2 = int((keys(i + 1, 0) - min) / max * max_bins); + int iy2 = int((keys(i + 1, 1) - min) / max * max_bins); + int iz2 = int((keys(i + 1, 2) - min) / max * max_bins); + + if (ix1 > ix2) + count++; + else if (ix1 == ix2) { + if (iy1 > iy2) + count++; + else if ((iy1 == iy2) && (iz1 > iz2)) + count++; } } }; -template +template struct sum3D { typedef double value_type; typedef ExecutionSpace execution_space; - Kokkos::View keys; + Kokkos::View keys; - sum3D(Kokkos::View keys_):keys(keys_) {} + sum3D(Kokkos::View keys_) : keys(keys_) {} KOKKOS_INLINE_FUNCTION - void operator() (int i, double& count) const { - count+=keys(i,0); - count+=keys(i,1); - count+=keys(i,2); + void operator()(int i, double& count) const { + count += keys(i, 0); + count += keys(i, 1); + count += keys(i, 2); } }; -template -void test_1D_sort(unsigned int n,bool force_kokkos) { - typedef Kokkos::View KeyViewType; - KeyViewType keys("Keys",n); +template +void test_1D_sort(unsigned int n, bool force_kokkos) { + typedef Kokkos::View KeyViewType; + KeyViewType keys("Keys", n); // Test sorting array with all numbers equal - Kokkos::deep_copy(keys,KeyType(1)); - Kokkos::sort(keys,force_kokkos); + Kokkos::deep_copy(keys, KeyType(1)); + Kokkos::sort(keys, force_kokkos); Kokkos::Random_XorShift64_Pool g(1931); - Kokkos::fill_random(keys,g,Kokkos::Random_XorShift64_Pool::generator_type::MAX_URAND); + Kokkos::fill_random(keys, g, + Kokkos::Random_XorShift64_Pool< + ExecutionSpace>::generator_type::MAX_URAND); - double sum_before = 0.0; - double sum_after = 0.0; + double sum_before = 0.0; + double sum_after = 0.0; unsigned int sort_fails = 0; - Kokkos::parallel_reduce(n,sum(keys),sum_before); + Kokkos::parallel_reduce(n, sum(keys), sum_before); - Kokkos::sort(keys,force_kokkos); + Kokkos::sort(keys, force_kokkos); - Kokkos::parallel_reduce(n,sum(keys),sum_after); - Kokkos::parallel_reduce(n-1,is_sorted_struct(keys),sort_fails); + Kokkos::parallel_reduce(n, sum(keys), sum_after); + Kokkos::parallel_reduce( + n - 1, is_sorted_struct(keys), sort_fails); - double ratio = sum_before/sum_after; + double ratio = sum_before / sum_after; double epsilon = 1e-10; - unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0; + unsigned int equal_sum = + (ratio > (1.0 - epsilon)) && (ratio < (1.0 + epsilon)) ? 1 : 0; - ASSERT_EQ(sort_fails,0); - ASSERT_EQ(equal_sum,1); + ASSERT_EQ(sort_fails, 0); + ASSERT_EQ(equal_sum, 1); } -template +template void test_3D_sort(unsigned int n) { - typedef Kokkos::View KeyViewType; + typedef Kokkos::View KeyViewType; - KeyViewType keys("Keys",n*n*n); + KeyViewType keys("Keys", n * n * n); Kokkos::Random_XorShift64_Pool g(1931); - Kokkos::fill_random(keys,g,100.0); + Kokkos::fill_random(keys, g, 100.0); - double sum_before = 0.0; - double sum_after = 0.0; + double sum_before = 0.0; + double sum_after = 0.0; unsigned int sort_fails = 0; - Kokkos::parallel_reduce(keys.extent(0),sum3D(keys),sum_before); + Kokkos::parallel_reduce(keys.extent(0), sum3D(keys), + sum_before); int bin_1d = 1; - while( bin_1d*bin_1d*bin_1d*4< (int) keys.extent(0) ) bin_1d*=2; - int bin_max[3] = {bin_1d,bin_1d,bin_1d}; - typename KeyViewType::value_type min[3] = {0,0,0}; - typename KeyViewType::value_type max[3] = {100,100,100}; - - typedef Kokkos::BinOp3D< KeyViewType > BinOp; - BinOp bin_op(bin_max,min,max); - Kokkos::BinSort< KeyViewType , BinOp > - Sorter(keys,bin_op,false); + while (bin_1d * bin_1d * bin_1d * 4 < (int)keys.extent(0)) bin_1d *= 2; + int bin_max[3] = {bin_1d, bin_1d, bin_1d}; + typename KeyViewType::value_type min[3] = {0, 0, 0}; + typename KeyViewType::value_type max[3] = {100, 100, 100}; + + typedef Kokkos::BinOp3D BinOp; + BinOp bin_op(bin_max, min, max); + Kokkos::BinSort Sorter(keys, bin_op, false); Sorter.create_permute_vector(); - Sorter.template sort< KeyViewType >(keys); + Sorter.template sort(keys); - Kokkos::parallel_reduce(keys.extent(0),sum3D(keys),sum_after); - Kokkos::parallel_reduce(keys.extent(0)-1,bin3d_is_sorted_struct(keys,bin_1d,min[0],max[0]),sort_fails); + Kokkos::parallel_reduce(keys.extent(0), sum3D(keys), + sum_after); + Kokkos::parallel_reduce(keys.extent(0) - 1, + bin3d_is_sorted_struct( + keys, bin_1d, min[0], max[0]), + sort_fails); - double ratio = sum_before/sum_after; + double ratio = sum_before / sum_after; double epsilon = 1e-10; - unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0; + unsigned int equal_sum = + (ratio > (1.0 - epsilon)) && (ratio < (1.0 + epsilon)) ? 1 : 0; - if ( sort_fails ) - printf("3D Sort Sum: %f %f Fails: %u\n",sum_before,sum_after,sort_fails); + if (sort_fails) + printf("3D Sort Sum: %f %f Fails: %u\n", sum_before, sum_after, sort_fails); - ASSERT_EQ(sort_fails,0); - ASSERT_EQ(equal_sum,1); + ASSERT_EQ(sort_fails, 0); + ASSERT_EQ(equal_sum, 1); } //---------------------------------------------------------------------------- -template -void test_dynamic_view_sort(unsigned int n ) -{ - typedef Kokkos::Experimental::DynamicView KeyDynamicViewType; - typedef Kokkos::View KeyViewType; +template +void test_dynamic_view_sort(unsigned int n) { + typedef Kokkos::Experimental::DynamicView + KeyDynamicViewType; + typedef Kokkos::View KeyViewType; - const size_t upper_bound = 2 * n ; + const size_t upper_bound = 2 * n; const size_t min_chunk_size = 1024; KeyDynamicViewType keys("Keys", min_chunk_size, upper_bound); keys.resize_serial(n); - KeyViewType keys_view("KeysTmp", n ); + KeyViewType keys_view("KeysTmp", n); // Test sorting array with all numbers equal - Kokkos::deep_copy(keys_view,KeyType(1)); - Kokkos::deep_copy(keys,keys_view); - Kokkos::sort(keys, 0 /* begin */ , n /* end */ ); + Kokkos::deep_copy(keys_view, KeyType(1)); + Kokkos::deep_copy(keys, keys_view); + Kokkos::sort(keys, 0 /* begin */, n /* end */); Kokkos::Random_XorShift64_Pool g(1931); - Kokkos::fill_random(keys_view,g,Kokkos::Random_XorShift64_Pool::generator_type::MAX_URAND); + Kokkos::fill_random(keys_view, g, + Kokkos::Random_XorShift64_Pool< + ExecutionSpace>::generator_type::MAX_URAND); ExecutionSpace().fence(); - Kokkos::deep_copy(keys,keys_view); - //ExecutionSpace().fence(); + Kokkos::deep_copy(keys, keys_view); + // ExecutionSpace().fence(); - double sum_before = 0.0; - double sum_after = 0.0; + double sum_before = 0.0; + double sum_after = 0.0; unsigned int sort_fails = 0; - Kokkos::parallel_reduce(n,sum(keys_view),sum_before); + Kokkos::parallel_reduce(n, sum(keys_view), + sum_before); - Kokkos::sort(keys, 0 /* begin */ , n /* end */ ); + Kokkos::sort(keys, 0 /* begin */, n /* end */); - ExecutionSpace().fence(); // Need this fence to prevent BusError with Cuda - Kokkos::deep_copy( keys_view , keys ); - //ExecutionSpace().fence(); + ExecutionSpace().fence(); // Need this fence to prevent BusError with Cuda + Kokkos::deep_copy(keys_view, keys); + // ExecutionSpace().fence(); - Kokkos::parallel_reduce(n,sum(keys_view),sum_after); - Kokkos::parallel_reduce(n-1,is_sorted_struct(keys_view),sort_fails); + Kokkos::parallel_reduce(n, sum(keys_view), + sum_after); + Kokkos::parallel_reduce( + n - 1, is_sorted_struct(keys_view), sort_fails); - double ratio = sum_before/sum_after; + double ratio = sum_before / sum_after; double epsilon = 1e-10; - unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0; - - if ( sort_fails != 0 || equal_sum != 1 ) { - std::cout << " N = " << n - << " ; sum_before = " << sum_before - << " ; sum_after = " << sum_after - << " ; ratio = " << ratio - << std::endl ; + unsigned int equal_sum = + (ratio > (1.0 - epsilon)) && (ratio < (1.0 + epsilon)) ? 1 : 0; + + if (sort_fails != 0 || equal_sum != 1) { + std::cout << " N = " << n << " ; sum_before = " << sum_before + << " ; sum_after = " << sum_after << " ; ratio = " << ratio + << std::endl; } - ASSERT_EQ(sort_fails,0); - ASSERT_EQ(equal_sum,1); + ASSERT_EQ(sort_fails, 0); + ASSERT_EQ(equal_sum, 1); } //---------------------------------------------------------------------------- -template -void test_issue_1160() -{ +template +void test_issue_1160() { Kokkos::View element_("element", 10); Kokkos::View x_("x", 10); Kokkos::View v_("y", 10); auto h_element = Kokkos::create_mirror_view(element_); - auto h_x = Kokkos::create_mirror_view(x_); - auto h_v = Kokkos::create_mirror_view(v_); + auto h_x = Kokkos::create_mirror_view(x_); + auto h_v = Kokkos::create_mirror_view(v_); h_element(0) = 9; h_element(1) = 8; @@ -292,20 +305,21 @@ void test_issue_1160() Kokkos::deep_copy(v_, h_v); typedef decltype(element_) KeyViewType; - typedef Kokkos::BinOp1D< KeyViewType > BinOp; + typedef Kokkos::BinOp1D BinOp; int begin = 3; - int end = 8; - auto max = h_element(begin); - auto min = h_element(end - 1); + int end = 8; + auto max = h_element(begin); + auto min = h_element(end - 1); BinOp binner(end - begin, min, max); - Kokkos::BinSort Sorter(element_,begin,end,binner,false); + Kokkos::BinSort Sorter(element_, begin, end, binner, + false); Sorter.create_permute_vector(); - Sorter.sort(element_,begin,end); + Sorter.sort(element_, begin, end); - Sorter.sort(x_,begin,end); - Sorter.sort(v_,begin,end); + Sorter.sort(x_, begin, end); + Sorter.sort(v_, begin, end); Kokkos::deep_copy(h_element, element_); Kokkos::deep_copy(h_x, x_); @@ -330,18 +344,17 @@ void test_issue_1160() //---------------------------------------------------------------------------- -template -void test_sort(unsigned int N) -{ - test_1D_sort(N*N*N, true); - test_1D_sort(N*N*N, false); +template +void test_sort(unsigned int N) { + test_1D_sort(N * N * N, true); + test_1D_sort(N * N * N, false); #if !defined(KOKKOS_ENABLE_ROCM) - test_3D_sort(N); - test_dynamic_view_sort(N*N); + test_3D_sort(N); + test_dynamic_view_sort(N * N); #endif test_issue_1160(); } -} -} +} // namespace Impl +} // namespace Test #endif /* KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP */ diff --git a/algorithms/unit_tests/TestThreads.cpp b/algorithms/unit_tests/TestThreads.cpp index 99cdb7da92a..f3be9f3d3cf 100644 --- a/algorithms/unit_tests/TestThreads.cpp +++ b/algorithms/unit_tests/TestThreads.cpp @@ -52,51 +52,45 @@ #include #include - //---------------------------------------------------------------------------- - namespace Test { class threads : public ::testing::Test { -protected: - static void SetUpTestCase() - { + protected: + static void SetUpTestCase() { std::cout << std::setprecision(5) << std::scientific; } - static void TearDownTestCase() - { - } + static void TearDownTestCase() {} }; -#define THREADS_RANDOM_XORSHIFT64( num_draws ) \ - TEST_F( threads, Random_XorShift64 ) { \ - Impl::test_random >(num_draws); \ +#define THREADS_RANDOM_XORSHIFT64(num_draws) \ + TEST_F(threads, Random_XorShift64) { \ + Impl::test_random >( \ + num_draws); \ } -#define THREADS_RANDOM_XORSHIFT1024( num_draws ) \ - TEST_F( threads, Random_XorShift1024 ) { \ - Impl::test_random >(num_draws); \ +#define THREADS_RANDOM_XORSHIFT1024(num_draws) \ + TEST_F(threads, Random_XorShift1024) { \ + Impl::test_random >( \ + num_draws); \ } -#define THREADS_SORT_UNSIGNED( size ) \ - TEST_F( threads, SortUnsigned ) { \ - Impl::test_sort< Kokkos::Threads, double >(size); \ +#define THREADS_SORT_UNSIGNED(size) \ + TEST_F(threads, SortUnsigned) { \ + Impl::test_sort(size); \ } - -THREADS_RANDOM_XORSHIFT64( 10240000 ) -THREADS_RANDOM_XORSHIFT1024( 10130144 ) +THREADS_RANDOM_XORSHIFT64(10240000) +THREADS_RANDOM_XORSHIFT1024(10130144) THREADS_SORT_UNSIGNED(171) #undef THREADS_RANDOM_XORSHIFT64 #undef THREADS_RANDOM_XORSHIFT1024 #undef THREADS_SORT_UNSIGNED -} // namespace Test +} // namespace Test #else void KOKKOS_ALGORITHMS_UNITTESTS_TESTTHREADS_PREVENT_LINK_ERROR() {} #endif - - diff --git a/algorithms/unit_tests/UnitTestMain.cpp b/algorithms/unit_tests/UnitTestMain.cpp index 8feb08332fa..1d26f988e27 100644 --- a/algorithms/unit_tests/UnitTestMain.cpp +++ b/algorithms/unit_tests/UnitTestMain.cpp @@ -45,10 +45,9 @@ #include int main(int argc, char *argv[]) { - Kokkos::initialize(argc,argv); - ::testing::InitGoogleTest(&argc,argv); + Kokkos::initialize(argc, argv); + ::testing::InitGoogleTest(&argc, argv); int result = RUN_ALL_TESTS(); Kokkos::finalize(); return result; } - diff --git a/benchmarks/atomic/main.cpp b/benchmarks/atomic/main.cpp index d86d196249b..5f0977f7548 100644 --- a/benchmarks/atomic/main.cpp +++ b/benchmarks/atomic/main.cpp @@ -1,124 +1,120 @@ -#include -#include -#include +#include +#include +#include -template -double test_atomic(int L, int N, int M,int K,int R,Kokkos::View offsets) { - Kokkos::View output("Output",N); +template +double test_atomic(int L, int N, int M, int K, int R, + Kokkos::View offsets) { + Kokkos::View output("Output", N); Kokkos::Impl::Timer timer; - for(int r = 0; r -double test_no_atomic(int L, int N, int M,int K,int R,Kokkos::View offsets) { - Kokkos::View output("Output",N); +template +double test_no_atomic(int L, int N, int M, int K, int R, + Kokkos::View offsets) { + Kokkos::View output("Output", N); Kokkos::Impl::Timer timer; - for(int r = 0; r\n"); - printf("Example Input GPU:\n"); - printf(" Histogram : 1000000 1000 1 1000 1 10 1\n"); - printf(" MD Force : 100000 100000 100 1000 20 10 4\n"); - printf(" Matrix Assembly : 100000 1000000 50 1000 20 10 4\n"); - Kokkos::finalize(); - return 0; - } + Kokkos::initialize(argc, argv); + { + if (argc < 8) { + printf("Arguments: L N M D K R T\n"); + printf(" L: Number of iterations to run\n"); + printf(" N: Length of array to do atomics into\n"); + printf(" M: Number of atomics per iteration to do\n"); + printf(" D: Distance from index i to do atomics into (randomly)\n"); + printf(" K: Number of FMAD per atomic\n"); + printf(" R: Number of repeats of the experiments\n"); + printf(" T: Type of atomic\n"); + printf(" 1 - int\n"); + printf(" 2 - long\n"); + printf(" 3 - float\n"); + printf(" 4 - double\n"); + printf(" 5 - complex\n"); + printf("Example Input GPU:\n"); + printf(" Histogram : 1000000 1000 1 1000 1 10 1\n"); + printf(" MD Force : 100000 100000 100 1000 20 10 4\n"); + printf(" Matrix Assembly : 100000 1000000 50 1000 20 10 4\n"); + Kokkos::finalize(); + return 0; + } + int L = atoi(argv[1]); + int N = atoi(argv[2]); + int M = atoi(argv[3]); + int D = atoi(argv[4]); + int K = atoi(argv[5]); + int R = atoi(argv[6]); + int type = atoi(argv[7]); - int L = atoi(argv[1]); - int N = atoi(argv[2]); - int M = atoi(argv[3]); - int D = atoi(argv[4]); - int K = atoi(argv[5]); - int R = atoi(argv[6]); - int type = atoi(argv[7]); - - Kokkos::View offsets("Offsets",L,M); - Kokkos::Random_XorShift64_Pool<> pool(12371); - Kokkos::fill_random(offsets,pool,D); - double time = 0; - if(type==1) - time = test_atomic(L,N,M,K,R,offsets); - if(type==2) - time = test_atomic(L,N,M,K,R,offsets); - if(type==3) - time = test_atomic(L,N,M,K,R,offsets); - if(type==4) - time = test_atomic(L,N,M,K,R,offsets); - if(type==5) - time = test_atomic >(L,N,M,K,R,offsets); + Kokkos::View offsets("Offsets", L, M); + Kokkos::Random_XorShift64_Pool<> pool(12371); + Kokkos::fill_random(offsets, pool, D); + double time = 0; + if (type == 1) time = test_atomic(L, N, M, K, R, offsets); + if (type == 2) time = test_atomic(L, N, M, K, R, offsets); + if (type == 3) time = test_atomic(L, N, M, K, R, offsets); + if (type == 4) time = test_atomic(L, N, M, K, R, offsets); + if (type == 5) + time = test_atomic >(L, N, M, K, R, offsets); - double time2 = 1; - if(type==1) - time2 = test_no_atomic(L,N,M,K,R,offsets); - if(type==2) - time2 = test_no_atomic(L,N,M,K,R,offsets); - if(type==3) - time2 = test_no_atomic(L,N,M,K,R,offsets); - if(type==4) - time2 = test_no_atomic(L,N,M,K,R,offsets); - if(type==5) - time2 = test_no_atomic >(L,N,M,K,R,offsets); + double time2 = 1; + if (type == 1) time2 = test_no_atomic(L, N, M, K, R, offsets); + if (type == 2) time2 = test_no_atomic(L, N, M, K, R, offsets); + if (type == 3) time2 = test_no_atomic(L, N, M, K, R, offsets); + if (type == 4) time2 = test_no_atomic(L, N, M, K, R, offsets); + if (type == 5) + time2 = test_no_atomic >(L, N, M, K, R, offsets); - int size = 0; - if(type==1) size = sizeof(int); - if(type==2) size = sizeof(long); - if(type==3) size = sizeof(float); - if(type==4) size = sizeof(double); - if(type==5) size = sizeof(Kokkos::complex); + int size = 0; + if (type == 1) size = sizeof(int); + if (type == 2) size = sizeof(long); + if (type == 3) size = sizeof(float); + if (type == 4) size = sizeof(double); + if (type == 5) size = sizeof(Kokkos::complex); - printf("%i\n",size); - printf("Time: %s %i %i %i %i %i %i (t_atomic: %e t_nonatomic: %e ratio: %lf )( GUpdates/s: %lf GB/s: %lf )\n", - (type==1)?"int": ( - (type==2)?"long": ( - (type==3)?"float": ( - (type==4)?"double":"complex"))), - L,N,M,D,K,R,time,time2,time/time2, - 1.e-9*L*R*M/time, 1.0*L*R*M*2*size/time/1024/1024/1024); -} + printf("%i\n", size); + printf( + "Time: %s %i %i %i %i %i %i (t_atomic: %e t_nonatomic: %e ratio: %lf " + ")( GUpdates/s: %lf GB/s: %lf )\n", + (type == 1) + ? "int" + : ((type == 2) + ? "long" + : ((type == 3) ? "float" + : ((type == 4) ? "double" : "complex"))), + L, N, M, D, K, R, time, time2, time / time2, 1.e-9 * L * R * M / time, + 1.0 * L * R * M * 2 * size / time / 1024 / 1024 / 1024); + } Kokkos::finalize(); } - diff --git a/benchmarks/bytes_and_flops/bench.hpp b/benchmarks/bytes_and_flops/bench.hpp index 59b4d50c441..4c990e3e2f3 100644 --- a/benchmarks/bytes_and_flops/bench.hpp +++ b/benchmarks/bytes_and_flops/bench.hpp @@ -41,59 +41,52 @@ //@HEADER */ -#include -#include +#include +#include -template +template struct Run { -static void run(int N, int K, int R, int F, int T, int S); + static void run(int N, int K, int R, int F, int T, int S); }; -template +template struct RunStride { -static void run_1(int N, int K, int R, int F, int T, int S); -static void run_2(int N, int K, int R, int F, int T, int S); -static void run_3(int N, int K, int R, int F, int T, int S); -static void run_4(int N, int K, int R, int F, int T, int S); -static void run_5(int N, int K, int R, int F, int T, int S); -static void run_6(int N, int K, int R, int F, int T, int S); -static void run_7(int N, int K, int R, int F, int T, int S); -static void run_8(int N, int K, int R, int F, int T, int S); -static void run(int N, int K, int R, int U, int F, int T, int S); + static void run_1(int N, int K, int R, int F, int T, int S); + static void run_2(int N, int K, int R, int F, int T, int S); + static void run_3(int N, int K, int R, int F, int T, int S); + static void run_4(int N, int K, int R, int F, int T, int S); + static void run_5(int N, int K, int R, int F, int T, int S); + static void run_6(int N, int K, int R, int F, int T, int S); + static void run_7(int N, int K, int R, int F, int T, int S); + static void run_8(int N, int K, int R, int F, int T, int S); + static void run(int N, int K, int R, int U, int F, int T, int S); }; #define STRIDE 1 -#include +#include #undef STRIDE #define STRIDE 2 -#include +#include #undef STRIDE #define STRIDE 4 -#include +#include #undef STRIDE #define STRIDE 8 -#include +#include #undef STRIDE #define STRIDE 16 -#include +#include #undef STRIDE #define STRIDE 32 -#include +#include #undef STRIDE -template +template void run_stride_unroll(int N, int K, int R, int D, int U, int F, int T, int S) { - if(D == 1) - RunStride::run(N,K,R,U,F,T,S); - if(D == 2) - RunStride::run(N,K,R,U,F,T,S); - if(D == 4) - RunStride::run(N,K,R,U,F,T,S); - if(D == 8) - RunStride::run(N,K,R,U,F,T,S); - if(D == 16) - RunStride::run(N,K,R,U,F,T,S); - if(D == 32) - RunStride::run(N,K,R,U,F,T,S); + if (D == 1) RunStride::run(N, K, R, U, F, T, S); + if (D == 2) RunStride::run(N, K, R, U, F, T, S); + if (D == 4) RunStride::run(N, K, R, U, F, T, S); + if (D == 8) RunStride::run(N, K, R, U, F, T, S); + if (D == 16) RunStride::run(N, K, R, U, F, T, S); + if (D == 32) RunStride::run(N, K, R, U, F, T, S); } - diff --git a/benchmarks/bytes_and_flops/bench_stride.hpp b/benchmarks/bytes_and_flops/bench_stride.hpp index 6509c654e71..840737a105e 100644 --- a/benchmarks/bytes_and_flops/bench_stride.hpp +++ b/benchmarks/bytes_and_flops/bench_stride.hpp @@ -41,84 +41,82 @@ //@HEADER */ - #define UNROLL 1 -#include +#include #undef UNROLL #define UNROLL 2 -#include +#include #undef UNROLL #define UNROLL 3 -#include +#include #undef UNROLL #define UNROLL 4 -#include +#include #undef UNROLL #define UNROLL 5 -#include +#include #undef UNROLL #define UNROLL 6 -#include +#include #undef UNROLL #define UNROLL 7 -#include +#include #undef UNROLL #define UNROLL 8 -#include +#include #undef UNROLL -template -struct RunStride { -static void run_1(int N, int K, int R, int F, int T, int S) { - Run::run(N,K,R,F,T,S); -} -static void run_2(int N, int K, int R, int F, int T, int S) { - Run::run(N,K,R,F,T,S); -} -static void run_3(int N, int K, int R, int F, int T, int S) { - Run::run(N,K,R,F,T,S); -} -static void run_4(int N, int K, int R, int F, int T, int S) { - Run::run(N,K,R,F,T,S); -} -static void run_5(int N, int K, int R, int F, int T, int S) { - Run::run(N,K,R,F,T,S); -} -static void run_6(int N, int K, int R, int F, int T, int S) { - Run::run(N,K,R,F,T,S); -} -static void run_7(int N, int K, int R, int F, int T, int S) { - Run::run(N,K,R,F,T,S); -} -static void run_8(int N, int K, int R, int F, int T, int S) { - Run::run(N,K,R,F,T,S); -} - -static void run(int N, int K, int R, int U, int F, int T, int S) { - if(U==1) { - run_1(N,K,R,F,T,S); +template +struct RunStride { + static void run_1(int N, int K, int R, int F, int T, int S) { + Run::run(N, K, R, F, T, S); } - if(U==2) { - run_2(N,K,R,F,T,S); + static void run_2(int N, int K, int R, int F, int T, int S) { + Run::run(N, K, R, F, T, S); } - if(U==3) { - run_3(N,K,R,F,T,S); + static void run_3(int N, int K, int R, int F, int T, int S) { + Run::run(N, K, R, F, T, S); } - if(U==4) { - run_4(N,K,R,F,T,S); + static void run_4(int N, int K, int R, int F, int T, int S) { + Run::run(N, K, R, F, T, S); } - if(U==5) { - run_5(N,K,R,F,T,S); + static void run_5(int N, int K, int R, int F, int T, int S) { + Run::run(N, K, R, F, T, S); } - if(U==6) { - run_6(N,K,R,F,T,S); + static void run_6(int N, int K, int R, int F, int T, int S) { + Run::run(N, K, R, F, T, S); } - if(U==7) { - run_7(N,K,R,F,T,S); + static void run_7(int N, int K, int R, int F, int T, int S) { + Run::run(N, K, R, F, T, S); + } + static void run_8(int N, int K, int R, int F, int T, int S) { + Run::run(N, K, R, F, T, S); } - if(U==8) { - run_8(N,K,R,F,T,S); - } -} -}; + static void run(int N, int K, int R, int U, int F, int T, int S) { + if (U == 1) { + run_1(N, K, R, F, T, S); + } + if (U == 2) { + run_2(N, K, R, F, T, S); + } + if (U == 3) { + run_3(N, K, R, F, T, S); + } + if (U == 4) { + run_4(N, K, R, F, T, S); + } + if (U == 5) { + run_5(N, K, R, F, T, S); + } + if (U == 6) { + run_6(N, K, R, F, T, S); + } + if (U == 7) { + run_7(N, K, R, F, T, S); + } + if (U == 8) { + run_8(N, K, R, F, T, S); + } + } +}; diff --git a/benchmarks/bytes_and_flops/bench_unroll_stride.hpp b/benchmarks/bytes_and_flops/bench_unroll_stride.hpp index c6651da1e7c..d15d2d3fa80 100644 --- a/benchmarks/bytes_and_flops/bench_unroll_stride.hpp +++ b/benchmarks/bytes_and_flops/bench_unroll_stride.hpp @@ -41,108 +41,110 @@ //@HEADER */ -template -struct Run { -static void run(int N, int K, int R, int F, int T, int S) { - Kokkos::View A("A",N,K); - Kokkos::View B("B",N,K); - Kokkos::View C("C",N,K); +template +struct Run { + static void run(int N, int K, int R, int F, int T, int S) { + Kokkos::View A("A", N, K); + Kokkos::View B("B", N, K); + Kokkos::View C("C", N, K); - Kokkos::deep_copy(A,Scalar(1.5)); - Kokkos::deep_copy(B,Scalar(2.5)); - Kokkos::deep_copy(C,Scalar(3.5)); + Kokkos::deep_copy(A, Scalar(1.5)); + Kokkos::deep_copy(B, Scalar(2.5)); + Kokkos::deep_copy(C, Scalar(3.5)); - Kokkos::Timer timer; - Kokkos::parallel_for("BenchmarkKernel",Kokkos::TeamPolicy<>(N,T).set_scratch_size(0,Kokkos::PerTeam(S)), - KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type& team) { - const int n = team.league_rank(); - for(int r=0; r1) - Scalar a2 = a1*1.3; -#endif -#if(UNROLL>2) - Scalar a3 = a2*1.1; -#endif -#if(UNROLL>3) - Scalar a4 = a3*1.1; -#endif -#if(UNROLL>4) - Scalar a5 = a4*1.3; -#endif -#if(UNROLL>5) - Scalar a6 = a5*1.1; -#endif -#if(UNROLL>6) - Scalar a7 = a6*1.1; -#endif -#if(UNROLL>7) - Scalar a8 = a7*1.1; + Kokkos::Timer timer; + Kokkos::parallel_for( + "BenchmarkKernel", + Kokkos::TeamPolicy<>(N, T).set_scratch_size(0, Kokkos::PerTeam(S)), + KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) { + const int n = team.league_rank(); + for (int r = 0; r < R; r++) { + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, 0, K), [&](const int& i) { + Scalar a1 = A(n, i, 0); + const Scalar b = B(n, i, 0); +#if (UNROLL > 1) + Scalar a2 = a1 * 1.3; +#endif +#if (UNROLL > 2) + Scalar a3 = a2 * 1.1; +#endif +#if (UNROLL > 3) + Scalar a4 = a3 * 1.1; +#endif +#if (UNROLL > 4) + Scalar a5 = a4 * 1.3; +#endif +#if (UNROLL > 5) + Scalar a6 = a5 * 1.1; +#endif +#if (UNROLL > 6) + Scalar a7 = a6 * 1.1; +#endif +#if (UNROLL > 7) + Scalar a8 = a7 * 1.1; #endif - - for(int f = 0; f1) - a2 += b*a2; + for (int f = 0; f < F; f++) { + a1 += b * a1; +#if (UNROLL > 1) + a2 += b * a2; #endif -#if(UNROLL>2) - a3 += b*a3; +#if (UNROLL > 2) + a3 += b * a3; #endif -#if(UNROLL>3) - a4 += b*a4; +#if (UNROLL > 3) + a4 += b * a4; #endif -#if(UNROLL>4) - a5 += b*a5; +#if (UNROLL > 4) + a5 += b * a5; #endif -#if(UNROLL>5) - a6 += b*a6; +#if (UNROLL > 5) + a6 += b * a6; #endif -#if(UNROLL>6) - a7 += b*a7; +#if (UNROLL > 6) + a7 += b * a7; #endif -#if(UNROLL>7) - a8 += b*a8; +#if (UNROLL > 7) + a8 += b * a8; #endif - - - } -#if(UNROLL==1) - C(n,i,0) = a1; + } +#if (UNROLL == 1) + C(n, i, 0) = a1; #endif -#if(UNROLL==2) - C(n,i,0) = a1+a2; +#if (UNROLL == 2) + C(n, i, 0) = a1 + a2; #endif -#if(UNROLL==3) - C(n,i,0) = a1+a2+a3; +#if (UNROLL == 3) + C(n, i, 0) = a1 + a2 + a3; #endif -#if(UNROLL==4) - C(n,i,0) = a1+a2+a3+a4; +#if (UNROLL == 4) + C(n, i, 0) = a1 + a2 + a3 + a4; #endif -#if(UNROLL==5) - C(n,i,0) = a1+a2+a3+a4+a5; +#if (UNROLL == 5) + C(n, i, 0) = a1 + a2 + a3 + a4 + a5; #endif -#if(UNROLL==6) - C(n,i,0) = a1+a2+a3+a4+a5+a6; +#if (UNROLL == 6) + C(n, i, 0) = a1 + a2 + a3 + a4 + a5 + a6; #endif -#if(UNROLL==7) - C(n,i,0) = a1+a2+a3+a4+a5+a6+a7; +#if (UNROLL == 7) + C(n, i, 0) = a1 + a2 + a3 + a4 + a5 + a6 + a7; #endif -#if(UNROLL==8) - C(n,i,0) = a1+a2+a3+a4+a5+a6+a7+a8; +#if (UNROLL == 8) + C(n, i, 0) = a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8; #endif + }); + } + }); + Kokkos::fence(); + double seconds = timer.seconds(); - }); - } - }); - Kokkos::fence(); - double seconds = timer.seconds(); - - double bytes = 1.0*N*K*R*3*sizeof(Scalar); - double flops = 1.0*N*K*R*(F*2*UNROLL + 2*(UNROLL-1)); - printf("NKRUFTS: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: %lf\n",N,K,R,UNROLL,F,T,S,seconds,1.0*bytes/seconds/1024/1024/1024,1.e-9*flops/seconds); -} + double bytes = 1.0 * N * K * R * 3 * sizeof(Scalar); + double flops = 1.0 * N * K * R * (F * 2 * UNROLL + 2 * (UNROLL - 1)); + printf( + "NKRUFTS: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: " + "%lf\n", + N, K, R, UNROLL, F, T, S, seconds, + 1.0 * bytes / seconds / 1024 / 1024 / 1024, 1.e-9 * flops / seconds); + } }; - diff --git a/benchmarks/bytes_and_flops/main.cpp b/benchmarks/bytes_and_flops/main.cpp index 4f46b38717d..6ce0bc6d657 100644 --- a/benchmarks/bytes_and_flops/main.cpp +++ b/benchmarks/bytes_and_flops/main.cpp @@ -41,25 +41,27 @@ //@HEADER */ -#include -#include -#include -#include +#include +#include +#include +#include int main(int argc, char* argv[]) { Kokkos::initialize(); - - if(argc<10) { + if (argc < 10) { printf("Arguments: N K R D U F T S\n"); printf(" P: Precision (1==float, 2==double)\n"); printf(" N,K: dimensions of the 2D array to allocate\n"); printf(" R: how often to loop through the K dimension with each team\n"); printf(" D: distance between loaded elements (stride)\n"); printf(" U: how many independent flops to do per load\n"); - printf(" F: how many times to repeat the U unrolled operations before reading next element\n"); + printf( + " F: how many times to repeat the U unrolled operations before " + "reading next element\n"); printf(" T: team size\n"); - printf(" S: shared memory per team (used to control occupancy on GPUs)\n"); + printf( + " S: shared memory per team (used to control occupancy on GPUs)\n"); printf("Example Input GPU:\n"); printf(" Bandwidth Bound : 2 100000 1024 1 1 1 1 256 6000\n"); printf(" Cache Bound : 2 100000 1024 64 1 1 1 512 20000\n"); @@ -70,7 +72,6 @@ int main(int argc, char* argv[]) { return 0; } - int P = atoi(argv[1]); int N = atoi(argv[2]); int K = atoi(argv[3]); @@ -81,17 +82,25 @@ int main(int argc, char* argv[]) { int T = atoi(argv[8]); int S = atoi(argv[9]); - if(U>8) {printf("U must be 1-8\n"); return 0;} - if( (D!=1) && (D!=2) && (D!=4) && (D!=8) && (D!=16) && (D!=32)) {printf("D must be one of 1,2,4,8,16,32\n"); return 0;} - if( (P!=1) && (P!=2) ) {printf("P must be one of 1,2\n"); return 0;} + if (U > 8) { + printf("U must be 1-8\n"); + return 0; + } + if ((D != 1) && (D != 2) && (D != 4) && (D != 8) && (D != 16) && (D != 32)) { + printf("D must be one of 1,2,4,8,16,32\n"); + return 0; + } + if ((P != 1) && (P != 2)) { + printf("P must be one of 1,2\n"); + return 0; + } - if(P==1) { - run_stride_unroll(N,K,R,D,U,F,T,S); + if (P == 1) { + run_stride_unroll(N, K, R, D, U, F, T, S); } - if(P==2) { - run_stride_unroll(N,K,R,D,U,F,T,S); + if (P == 2) { + run_stride_unroll(N, K, R, D, U, F, T, S); } Kokkos::finalize(); } - diff --git a/benchmarks/gather/gather.hpp b/benchmarks/gather/gather.hpp index bbbd65850f7..fe17cfa38b4 100644 --- a/benchmarks/gather/gather.hpp +++ b/benchmarks/gather/gather.hpp @@ -41,52 +41,44 @@ //@HEADER */ -template +template struct RunGather { static void run(int N, int K, int D, int R, int F); }; #define UNROLL 1 -#include +#include #undef UNROLL #define UNROLL 2 -#include +#include #undef UNROLL #define UNROLL 3 -#include +#include #undef UNROLL #define UNROLL 4 -#include +#include #undef UNROLL #define UNROLL 5 -#include +#include #undef UNROLL #define UNROLL 6 -#include +#include #undef UNROLL #define UNROLL 7 -#include +#include #undef UNROLL #define UNROLL 8 -#include +#include #undef UNROLL -template +template void run_gather_test(int N, int K, int D, int R, int U, int F) { - if(U == 1) - RunGather::run(N,K,D,R,F); - if(U == 2) - RunGather::run(N,K,D,R,F); - if(U == 3) - RunGather::run(N,K,D,R,F); - if(U == 4) - RunGather::run(N,K,D,R,F); - if(U == 5) - RunGather::run(N,K,D,R,F); - if(U == 6) - RunGather::run(N,K,D,R,F); - if(U == 7) - RunGather::run(N,K,D,R,F); - if(U == 8) - RunGather::run(N,K,D,R,F); + if (U == 1) RunGather::run(N, K, D, R, F); + if (U == 2) RunGather::run(N, K, D, R, F); + if (U == 3) RunGather::run(N, K, D, R, F); + if (U == 4) RunGather::run(N, K, D, R, F); + if (U == 5) RunGather::run(N, K, D, R, F); + if (U == 6) RunGather::run(N, K, D, R, F); + if (U == 7) RunGather::run(N, K, D, R, F); + if (U == 8) RunGather::run(N, K, D, R, F); } diff --git a/benchmarks/gather/gather_unroll.hpp b/benchmarks/gather/gather_unroll.hpp index 1d9c99adf9e..51d602c21a0 100644 --- a/benchmarks/gather/gather_unroll.hpp +++ b/benchmarks/gather/gather_unroll.hpp @@ -41,129 +41,132 @@ //@HEADER */ -#include -#include +#include +#include -template -struct RunGather { -static void run(int N, int K, int D, int R, int F) { - Kokkos::View connectivity("Connectivity",N,K); - Kokkos::View A_in("Input",N); - Kokkos::View B_in("Input",N); - Kokkos::View C("Output",N); +template +struct RunGather { + static void run(int N, int K, int D, int R, int F) { + Kokkos::View connectivity("Connectivity", N, K); + Kokkos::View A_in("Input", N); + Kokkos::View B_in("Input", N); + Kokkos::View C("Output", N); - Kokkos::Random_XorShift64_Pool<> rand_pool(12313); + Kokkos::Random_XorShift64_Pool<> rand_pool(12313); - Kokkos::deep_copy(A_in,1.5); - Kokkos::deep_copy(B_in,2.0); + Kokkos::deep_copy(A_in, 1.5); + Kokkos::deep_copy(B_in, 2.0); - Kokkos::View > A(A_in); - Kokkos::View > B(B_in); - - Kokkos::parallel_for("InitKernel",N, - KOKKOS_LAMBDA (const int& i) { - auto rand_gen = rand_pool.get_state(); - for( int jj=0; jj > A( + A_in); + Kokkos::View > B( + B_in); + Kokkos::parallel_for( + "InitKernel", N, KOKKOS_LAMBDA(const int& i) { + auto rand_gen = rand_pool.get_state(); + for (int jj = 0; jj < K; jj++) { + connectivity(i, jj) = (rand_gen.rand(D) + i - D / 2 + N) % N; + } + rand_pool.free_state(rand_gen); + }); + Kokkos::fence(); - Kokkos::Timer timer; - for(int r = 0; r1) - Scalar a2 = a1*Scalar(1.3); + Kokkos::Timer timer; + for (int r = 0; r < R; r++) { + Kokkos::parallel_for( + "BenchmarkKernel", N, KOKKOS_LAMBDA(const int& i) { + Scalar c = Scalar(0.0); + for (int jj = 0; jj < K; jj++) { + const int j = connectivity(i, jj); + Scalar a1 = A(j); + const Scalar b = B(j); +#if (UNROLL > 1) + Scalar a2 = a1 * Scalar(1.3); #endif -#if(UNROLL>2) - Scalar a3 = a2*Scalar(1.1); +#if (UNROLL > 2) + Scalar a3 = a2 * Scalar(1.1); #endif -#if(UNROLL>3) - Scalar a4 = a3*Scalar(1.1); +#if (UNROLL > 3) + Scalar a4 = a3 * Scalar(1.1); #endif -#if(UNROLL>4) - Scalar a5 = a4*Scalar(1.3); +#if (UNROLL > 4) + Scalar a5 = a4 * Scalar(1.3); #endif -#if(UNROLL>5) - Scalar a6 = a5*Scalar(1.1); +#if (UNROLL > 5) + Scalar a6 = a5 * Scalar(1.1); #endif -#if(UNROLL>6) - Scalar a7 = a6*Scalar(1.1); +#if (UNROLL > 6) + Scalar a7 = a6 * Scalar(1.1); #endif -#if(UNROLL>7) - Scalar a8 = a7*Scalar(1.1); +#if (UNROLL > 7) + Scalar a8 = a7 * Scalar(1.1); #endif - - for(int f = 0; f1) - a2 += b*a2; + for (int f = 0; f < F; f++) { + a1 += b * a1; +#if (UNROLL > 1) + a2 += b * a2; #endif -#if(UNROLL>2) - a3 += b*a3; +#if (UNROLL > 2) + a3 += b * a3; #endif -#if(UNROLL>3) - a4 += b*a4; +#if (UNROLL > 3) + a4 += b * a4; #endif -#if(UNROLL>4) - a5 += b*a5; +#if (UNROLL > 4) + a5 += b * a5; #endif -#if(UNROLL>5) - a6 += b*a6; +#if (UNROLL > 5) + a6 += b * a6; #endif -#if(UNROLL>6) - a7 += b*a7; +#if (UNROLL > 6) + a7 += b * a7; #endif -#if(UNROLL>7) - a8 += b*a8; +#if (UNROLL > 7) + a8 += b * a8; #endif - - - } -#if(UNROLL==1) - c += a1; + } +#if (UNROLL == 1) + c += a1; #endif -#if(UNROLL==2) - c += a1+a2; +#if (UNROLL == 2) + c += a1 + a2; #endif -#if(UNROLL==3) - c += a1+a2+a3; +#if (UNROLL == 3) + c += a1 + a2 + a3; #endif -#if(UNROLL==4) - c += a1+a2+a3+a4; +#if (UNROLL == 4) + c += a1 + a2 + a3 + a4; #endif -#if(UNROLL==5) - c += a1+a2+a3+a4+a5; +#if (UNROLL == 5) + c += a1 + a2 + a3 + a4 + a5; #endif -#if(UNROLL==6) - c += a1+a2+a3+a4+a5+a6; +#if (UNROLL == 6) + c += a1 + a2 + a3 + a4 + a5 + a6; #endif -#if(UNROLL==7) - c += a1+a2+a3+a4+a5+a6+a7; +#if (UNROLL == 7) + c += a1 + a2 + a3 + a4 + a5 + a6 + a7; #endif -#if(UNROLL==8) - c += a1+a2+a3+a4+a5+a6+a7+a8; +#if (UNROLL == 8) + c += a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8; #endif + } + C(i) = c; + }); + Kokkos::fence(); + } + double seconds = timer.seconds(); - } - C(i) = c ; - }); - Kokkos::fence(); + double bytes = 1.0 * N * K * R * (2 * sizeof(Scalar) + sizeof(int)) + + 1.0 * N * R * sizeof(Scalar); + double flops = 1.0 * N * K * R * (F * 2 * UNROLL + 2 * (UNROLL - 1)); + double gather_ops = 1.0 * N * K * R * 2; + printf( + "SNKDRUF: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: " + "%lf GGather/s: %lf\n", + sizeof(Scalar) / 4, N, K, D, R, UNROLL, F, seconds, + 1.0 * bytes / seconds / 1024 / 1024 / 1024, 1.e-9 * flops / seconds, + 1.e-9 * gather_ops / seconds); } - double seconds = timer.seconds(); - - double bytes = 1.0*N*K*R*(2*sizeof(Scalar)+sizeof(int)) + 1.0*N*R*sizeof(Scalar); - double flops = 1.0*N*K*R*(F*2*UNROLL + 2*(UNROLL-1)); - double gather_ops = 1.0*N*K*R*2; - printf("SNKDRUF: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: %lf GGather/s: %lf\n",sizeof(Scalar)/4,N,K,D,R,UNROLL,F,seconds,1.0*bytes/seconds/1024/1024/1024,1.e-9*flops/seconds,1.e-9*gather_ops/seconds); -} }; diff --git a/benchmarks/gather/main.cpp b/benchmarks/gather/main.cpp index ca5238e7fdb..9d018b2140f 100644 --- a/benchmarks/gather/main.cpp +++ b/benchmarks/gather/main.cpp @@ -41,23 +41,26 @@ //@HEADER */ -#include -#include -#include -#include +#include +#include +#include +#include int main(int argc, char* argv[]) { - Kokkos::initialize(argc,argv); + Kokkos::initialize(argc, argv); - if(argc<8) { + if (argc < 8) { printf("Arguments: S N K D\n"); - printf(" S: Scalar Type Size (1==float, 2==double, 4=complex)\n"); + printf( + " S: Scalar Type Size (1==float, 2==double, 4=complex)\n"); printf(" N: Number of entities\n"); printf(" K: Number of things to gather per entity\n"); printf(" D: Max distance of gathered things of an entity\n"); printf(" R: how often to loop through the K dimension with each team\n"); printf(" U: how many independent flops to do per load\n"); - printf(" F: how many times to repeat the U unrolled operations before reading next element\n"); + printf( + " F: how many times to repeat the U unrolled operations before " + "reading next element\n"); printf("Example Input GPU:\n"); printf(" Bandwidth Bound : 2 10000000 1 1 10 1 1\n"); printf(" Cache Bound : 2 10000000 64 1 10 1 1\n"); @@ -68,7 +71,6 @@ int main(int argc, char* argv[]) { return 0; } - int S = atoi(argv[1]); int N = atoi(argv[2]); int K = atoi(argv[3]); @@ -77,17 +79,22 @@ int main(int argc, char* argv[]) { int U = atoi(argv[6]); int F = atoi(argv[7]); - if( (S!=1) && (S!=2) && (S!=4)) {printf("S must be one of 1,2,4\n"); return 0;} - if( N(N,K,D,R,U,F); + if ((S != 1) && (S != 2) && (S != 4)) { + printf("S must be one of 1,2,4\n"); + return 0; + } + if (N < D) { + printf("N must be larger or equal to D\n"); + return 0; + } + if (S == 1) { + run_gather_test(N, K, D, R, U, F); } - if(S==2) { - run_gather_test(N,K,D,R,U,F); + if (S == 2) { + run_gather_test(N, K, D, R, U, F); } - if(S==4) { - run_gather_test >(N,K,D,R,U,F); + if (S == 4) { + run_gather_test >(N, K, D, R, U, F); } Kokkos::finalize(); } - diff --git a/benchmarks/policy_performance/main.cpp b/benchmarks/policy_performance/main.cpp index 2f5395734af..0c237a0adc0 100644 --- a/benchmarks/policy_performance/main.cpp +++ b/benchmarks/policy_performance/main.cpp @@ -44,67 +44,86 @@ #include #include "policy_perf_test.hpp" -int main(int argc, char* argv[] ) { - Kokkos::initialize(argc,argv); +int main(int argc, char* argv[]) { + Kokkos::initialize(argc, argv); - if(argc<10) { + if (argc < 10) { printf(" Ten arguments are needed to run this program:\n"); - printf(" (1)team_range, (2)thread_range, (3)vector_range, (4)outer_repeat, (5)thread_repeat, (6)vector_repeat, (7)team_size, (8)vector_size, (9)schedule, (10)test_type\n"); + printf( + " (1)team_range, (2)thread_range, (3)vector_range, (4)outer_repeat, " + "(5)thread_repeat, (6)vector_repeat, (7)team_size, (8)vector_size, " + "(9)schedule, (10)test_type\n"); printf(" team_range: number of teams (league_size)\n"); printf(" thread_range: range for nested TeamThreadRange parallel_*\n"); printf(" vector_range: range for nested ThreadVectorRange parallel_*\n"); printf(" outer_repeat: number of repeats for outer parallel_* call\n"); - printf(" thread_repeat: number of repeats for TeamThreadRange parallel_* call\n"); - printf(" vector_repeat: number of repeats for ThreadVectorRange parallel_* call\n"); + printf( + " thread_repeat: number of repeats for TeamThreadRange parallel_* " + "call\n"); + printf( + " vector_repeat: number of repeats for ThreadVectorRange parallel_* " + "call\n"); printf(" team_size: number of team members (team_size)\n"); printf(" vector_size: desired vectorization (if possible)\n"); printf(" schedule: 1 == Static 2 == Dynamic\n"); - printf(" test_type: 3-digit code XYZ for testing (nested) parallel_*\n"); - printf(" code key: XYZ X in {1,2,3,4,5}, Y in {0,1,2}, Z in {0,1,2}\n"); + printf( + " test_type: 3-digit code XYZ for testing (nested) parallel_*\n"); + printf( + " code key: XYZ X in {1,2,3,4,5}, Y in {0,1,2}, Z in " + "{0,1,2}\n"); printf(" TeamPolicy:\n"); - printf(" X: 0 = none (never used, makes no sense); 1 = parallel_for; 2 = parallel_reduce\n"); - printf(" Y: 0 = none; 1 = parallel_for; 2 = parallel_reduce\n"); - printf(" Z: 0 = none; 1 = parallel_for; 2 = parallel_reduce\n"); + printf( + " X: 0 = none (never used, makes no sense); 1 = " + "parallel_for; 2 = parallel_reduce\n"); + printf( + " Y: 0 = none; 1 = parallel_for; 2 = " + "parallel_reduce\n"); + printf( + " Z: 0 = none; 1 = parallel_for; 2 = " + "parallel_reduce\n"); printf(" RangePolicy:\n"); - printf(" X: 3 = parallel_for; 4 = parallel_reduce; 5 = parallel_scan\n"); + printf( + " X: 3 = parallel_for; 4 = parallel_reduce; 5 = " + "parallel_scan\n"); printf(" Y: 0 = none\n"); printf(" Z: 0 = none\n"); printf(" Example Input:\n"); - printf(" 100000 32 32 100 100 100 8 1 1 100\n"); + printf(" 100000 32 32 100 100 100 8 1 1 100\n"); Kokkos::finalize(); return 0; } - int team_range = atoi(argv[1]); + int team_range = atoi(argv[1]); int thread_range = atoi(argv[2]); int vector_range = atoi(argv[3]); - int outer_repeat = atoi(argv[4]); + int outer_repeat = atoi(argv[4]); int thread_repeat = atoi(argv[5]); int vector_repeat = atoi(argv[6]); - int team_size = atoi(argv[7]); + int team_size = atoi(argv[7]); int vector_size = atoi(argv[8]); - int schedule = atoi(argv[9]); - int test_type = atoi(argv[10]); + int schedule = atoi(argv[9]); + int test_type = atoi(argv[10]); - int disable_verbose_output = 0; - if ( argc > 11 ) { + int disable_verbose_output = 0; + if (argc > 11) { disable_verbose_output = atoi(argv[11]); } - if ( schedule != 1 && schedule != 2 ) { + if (schedule != 1 && schedule != 2) { printf("schedule: %d\n", schedule); printf("Options for schedule are: 1 == Static 2 == Dynamic\n"); Kokkos::finalize(); return -1; } - if ( test_type != 100 && test_type != 110 && test_type != 111 && test_type != 112 && test_type != 120 && test_type != 121 && test_type != 122 - && test_type != 200 && test_type != 210 && test_type != 211 && test_type != 212 && test_type != 220 && test_type != 221 && test_type != 222 - && test_type != 300 && test_type != 400 && test_type != 500 - ) - { + if (test_type != 100 && test_type != 110 && test_type != 111 && + test_type != 112 && test_type != 120 && test_type != 121 && + test_type != 122 && test_type != 200 && test_type != 210 && + test_type != 211 && test_type != 212 && test_type != 220 && + test_type != 221 && test_type != 222 && test_type != 300 && + test_type != 400 && test_type != 500) { printf("Incorrect test_type option\n"); Kokkos::finalize(); return -2; @@ -112,56 +131,85 @@ int main(int argc, char* argv[] ) { double result = 0.0; - Kokkos::parallel_reduce( "parallel_reduce warmup", Kokkos::TeamPolicy<>(10,1), - KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type team, double& lval) { - lval += 1; - }, result); + Kokkos::parallel_reduce( + "parallel_reduce warmup", Kokkos::TeamPolicy<>(10, 1), + KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type team, + double& lval) { lval += 1; }, + result); - typedef Kokkos::View view_type_1d; - typedef Kokkos::View view_type_2d; + typedef Kokkos::View view_type_1d; + typedef Kokkos::View view_type_2d; typedef Kokkos::View view_type_3d; // Allocate view without initializing - // Call a 'warmup' test with 1 repeat - this will initialize the corresponding view appropriately for test and should obey first-touch etc - // Second call to test is the one we actually care about and time - view_type_1d v_1( Kokkos::ViewAllocateWithoutInitializing("v_1"), team_range*team_size); - view_type_2d v_2( Kokkos::ViewAllocateWithoutInitializing("v_2"), team_range*team_size, thread_range); - view_type_3d v_3( Kokkos::ViewAllocateWithoutInitializing("v_3"), team_range*team_size, thread_range, vector_range); + // Call a 'warmup' test with 1 repeat - this will initialize the corresponding + // view appropriately for test and should obey first-touch etc Second call to + // test is the one we actually care about and time + view_type_1d v_1(Kokkos::ViewAllocateWithoutInitializing("v_1"), + team_range * team_size); + view_type_2d v_2(Kokkos::ViewAllocateWithoutInitializing("v_2"), + team_range * team_size, thread_range); + view_type_3d v_3(Kokkos::ViewAllocateWithoutInitializing("v_3"), + team_range * team_size, thread_range, vector_range); double result_computed = 0.0; - double result_expect = 0.0; - double time = 0.0; + double result_expect = 0.0; + double time = 0.0; - if(schedule==1) { - if ( test_type != 500 ) { + if (schedule == 1) { + if (test_type != 500) { // warmup - no repeat of loops - test_policy,int>(team_range,thread_range,vector_range,1,1,1,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time); - test_policy,int>(team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time); - } - else { + test_policy, int>( + team_range, thread_range, vector_range, 1, 1, 1, team_size, + vector_size, test_type, v_1, v_2, v_3, result_computed, result_expect, + time); + test_policy, int>( + team_range, thread_range, vector_range, outer_repeat, thread_repeat, + vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3, + result_computed, result_expect, time); + } else { // parallel_scan: initialize 1d view for parallel_scan - test_policy,int>(team_range,thread_range,vector_range,1,1,1,team_size,vector_size,100,v_1,v_2,v_3,result_computed,result_expect,time); - test_policy,int>(team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time); + test_policy, int>( + team_range, thread_range, vector_range, 1, 1, 1, team_size, + vector_size, 100, v_1, v_2, v_3, result_computed, result_expect, + time); + test_policy, int>( + team_range, thread_range, vector_range, outer_repeat, thread_repeat, + vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3, + result_computed, result_expect, time); } } - if(schedule==2) { - if ( test_type != 500 ) { + if (schedule == 2) { + if (test_type != 500) { // warmup - no repeat of loops - test_policy,int>(team_range,thread_range,vector_range,1,1,1,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time); - test_policy,int>(team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time); - } - else { + test_policy, int>( + team_range, thread_range, vector_range, 1, 1, 1, team_size, + vector_size, test_type, v_1, v_2, v_3, result_computed, result_expect, + time); + test_policy, int>( + team_range, thread_range, vector_range, outer_repeat, thread_repeat, + vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3, + result_computed, result_expect, time); + } else { // parallel_scan: initialize 1d view for parallel_scan - test_policy,int>(team_range,thread_range,vector_range,1,1,1,team_size,vector_size,100,v_1,v_2,v_3,result_computed,result_expect,time); - test_policy,int>(team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time); + test_policy, int>( + team_range, thread_range, vector_range, 1, 1, 1, team_size, + vector_size, 100, v_1, v_2, v_3, result_computed, result_expect, + time); + test_policy, int>( + team_range, thread_range, vector_range, outer_repeat, thread_repeat, + vector_repeat, team_size, vector_size, test_type, v_1, v_2, v_3, + result_computed, result_expect, time); } } - if ( disable_verbose_output == 0 ) { - printf("%7i %4i %2i %9i %4i %4i %4i %2i %1i %3i %e %e %lf\n",team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,schedule,test_type,result_computed,result_expect,time); - } - else { - printf("%lf\n",time); + if (disable_verbose_output == 0) { + printf("%7i %4i %2i %9i %4i %4i %4i %2i %1i %3i %e %e %lf\n", team_range, + thread_range, vector_range, outer_repeat, thread_repeat, + vector_repeat, team_size, vector_size, schedule, test_type, + result_computed, result_expect, time); + } else { + printf("%lf\n", time); } Kokkos::finalize(); diff --git a/benchmarks/policy_performance/policy_perf_test.hpp b/benchmarks/policy_performance/policy_perf_test.hpp index 1ab437928de..a1e0a8330a9 100644 --- a/benchmarks/policy_performance/policy_perf_test.hpp +++ b/benchmarks/policy_performance/policy_perf_test.hpp @@ -43,297 +43,375 @@ #include -template < class ViewType > +template struct ParallelScanFunctor { using value_type = double; ViewType v; - ParallelScanFunctor( const ViewType & v_ ) - : v(v_) - {} + ParallelScanFunctor(const ViewType& v_) : v(v_) {} KOKKOS_INLINE_FUNCTION - void operator()( const int idx, value_type& val, const bool& final ) const - { - // inclusive scan - val += v(idx); - if ( final ) { - v(idx) = val; - } + void operator()(const int idx, value_type& val, const bool& final) const { + // inclusive scan + val += v(idx); + if (final) { + v(idx) = val; } + } }; -template +template void test_policy(int team_range, int thread_range, int vector_range, - int outer_repeat, int thread_repeat, int inner_repeat, - int team_size, int vector_size, int test_type, - ViewType1 &v1, ViewType2 &v2, ViewType3 &v3, - double &result, double &result_expect, double &time) { - - typedef Kokkos::TeamPolicy t_policy; + int outer_repeat, int thread_repeat, int inner_repeat, + int team_size, int vector_size, int test_type, ViewType1& v1, + ViewType2& v2, ViewType3& v3, double& result, + double& result_expect, double& time) { + typedef Kokkos::TeamPolicy t_policy; typedef typename t_policy::member_type t_team; Kokkos::Timer timer; - for(int orep = 0; orep(v1) + Kokkos::parallel_scan("500 outer scan", team_size * team_range, + ParallelScanFunctor(v1) #if 0 // This does not compile with pre Cuda 8.0 - see Github Issue #913 for explanation KOKKOS_LAMBDA (const int idx, double& val, const bool& final) { @@ -345,11 +423,12 @@ void test_policy(int team_range, int thread_range, int vector_range, } #endif ); - // result = v1( team_size*team_range - 1 ); // won't work with Cuda - need to copy result back to host to print - // result_expect = 0.5*(team_size*team_range)*(team_size*team_range-1); + // result = v1( team_size*team_range - 1 ); // won't work with Cuda - need + // to copy result back to host to print result_expect = + // 0.5*(team_size*team_range)*(team_size*team_range-1); } - } // end outer for loop + } // end outer for loop time = timer.seconds(); -} //end test_policy +} // end test_policy diff --git a/containers/performance_tests/TestCuda.cpp b/containers/performance_tests/TestCuda.cpp index 351fb86df3d..8cef836101c 100644 --- a/containers/performance_tests/TestCuda.cpp +++ b/containers/performance_tests/TestCuda.cpp @@ -42,7 +42,7 @@ */ #include -#if defined( KOKKOS_ENABLE_CUDA ) +#if defined(KOKKOS_ENABLE_CUDA) #include #include @@ -66,45 +66,38 @@ namespace Performance { class cuda : public ::testing::Test { -protected: - static void SetUpTestCase() - { + protected: + static void SetUpTestCase() { std::cout << std::setprecision(5) << std::scientific; Kokkos::InitArguments args(-1, -1, 0); Kokkos::initialize(args); } - static void TearDownTestCase() - { - Kokkos::finalize(); - } + static void TearDownTestCase() { Kokkos::finalize(); } }; -TEST_F( cuda, dynrankview_perf ) -{ +TEST_F(cuda, dynrankview_perf) { std::cout << "Cuda" << std::endl; std::cout << " DynRankView vs View: Initialization Only " << std::endl; - test_dynrankview_op_perf( 40960 ); + test_dynrankview_op_perf(40960); } -TEST_F( cuda, global_2_local) -{ +TEST_F(cuda, global_2_local) { std::cout << "Cuda" << std::endl; std::cout << "size, create, generate, fill, find" << std::endl; - for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + for (unsigned i = Performance::begin_id_size; i <= Performance::end_id_size; + i *= Performance::id_step) test_global_to_local_ids(i); } -TEST_F( cuda, unordered_map_performance_near) -{ - Perf::run_performance_tests("cuda-near"); +TEST_F(cuda, unordered_map_performance_near) { + Perf::run_performance_tests("cuda-near"); } -TEST_F( cuda, unordered_map_performance_far) -{ - Perf::run_performance_tests("cuda-far"); +TEST_F(cuda, unordered_map_performance_far) { + Perf::run_performance_tests("cuda-far"); } -} +} // namespace Performance #else void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTCUDA_PREVENT_EMPTY_LINK_ERROR() {} -#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */ +#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */ diff --git a/containers/performance_tests/TestDynRankView.hpp b/containers/performance_tests/TestDynRankView.hpp index db6274e0579..15c9f7a711c 100644 --- a/containers/performance_tests/TestDynRankView.hpp +++ b/containers/performance_tests/TestDynRankView.hpp @@ -49,109 +49,102 @@ #include -// Compare performance of DynRankView to View, specific focus on the parenthesis operators +// Compare performance of DynRankView to View, specific focus on the parenthesis +// operators namespace Performance { -//View functor +// View functor template struct InitViewFunctor { - typedef Kokkos::View inviewtype; + typedef Kokkos::View inviewtype; inviewtype _inview; - InitViewFunctor( inviewtype &inview_ ) : _inview(inview_) - {} + InitViewFunctor(inviewtype &inview_) : _inview(inview_) {} KOKKOS_INLINE_FUNCTION void operator()(const int i) const { for (unsigned j = 0; j < _inview.extent(1); ++j) { for (unsigned k = 0; k < _inview.extent(2); ++k) { - _inview(i,j,k) = i/2 -j*j + k/3; + _inview(i, j, k) = i / 2 - j * j + k / 3; } } } - struct SumComputationTest - { - typedef Kokkos::View inviewtype; + struct SumComputationTest { + typedef Kokkos::View inviewtype; inviewtype _inview; - typedef Kokkos::View outviewtype; + typedef Kokkos::View outviewtype; outviewtype _outview; KOKKOS_INLINE_FUNCTION - SumComputationTest(inviewtype &inview_ , outviewtype &outview_) : _inview(inview_), _outview(outview_) {} + SumComputationTest(inviewtype &inview_, outviewtype &outview_) + : _inview(inview_), _outview(outview_) {} KOKKOS_INLINE_FUNCTION void operator()(const int i) const { for (unsigned j = 0; j < _inview.extent(1); ++j) { for (unsigned k = 0; k < _inview.extent(2); ++k) { - _outview(i) += _inview(i,j,k) ; + _outview(i) += _inview(i, j, k); } } } }; - }; template struct InitStrideViewFunctor { - typedef Kokkos::View inviewtype; + typedef Kokkos::View inviewtype; inviewtype _inview; - InitStrideViewFunctor( inviewtype &inview_ ) : _inview(inview_) - {} + InitStrideViewFunctor(inviewtype &inview_) : _inview(inview_) {} KOKKOS_INLINE_FUNCTION void operator()(const int i) const { for (unsigned j = 0; j < _inview.extent(1); ++j) { for (unsigned k = 0; k < _inview.extent(2); ++k) { - _inview(i,j,k) = i/2 -j*j + k/3; + _inview(i, j, k) = i / 2 - j * j + k / 3; } } } - }; template struct InitViewRank7Functor { - typedef Kokkos::View inviewtype; + typedef Kokkos::View inviewtype; inviewtype _inview; - InitViewRank7Functor( inviewtype &inview_ ) : _inview(inview_) - {} + InitViewRank7Functor(inviewtype &inview_) : _inview(inview_) {} KOKKOS_INLINE_FUNCTION void operator()(const int i) const { for (unsigned j = 0; j < _inview.extent(1); ++j) { for (unsigned k = 0; k < _inview.extent(2); ++k) { - _inview(i,j,k,0,0,0,0) = i/2 -j*j + k/3; + _inview(i, j, k, 0, 0, 0, 0) = i / 2 - j * j + k / 3; } } } - }; -//DynRankView functor +// DynRankView functor template struct InitDynRankViewFunctor { typedef Kokkos::DynRankView inviewtype; inviewtype _inview; - InitDynRankViewFunctor( inviewtype &inview_ ) : _inview(inview_) - {} + InitDynRankViewFunctor(inviewtype &inview_) : _inview(inview_) {} KOKKOS_INLINE_FUNCTION void operator()(const int i) const { for (unsigned j = 0; j < _inview.extent(1); ++j) { for (unsigned k = 0; k < _inview.extent(2); ++k) { - _inview(i,j,k) = i/2 -j*j + k/3; + _inview(i, j, k) = i / 2 - j * j + k / 3; } } } - struct SumComputationTest - { + struct SumComputationTest { typedef Kokkos::DynRankView inviewtype; inviewtype _inview; @@ -159,108 +152,121 @@ struct InitDynRankViewFunctor { outviewtype _outview; KOKKOS_INLINE_FUNCTION - SumComputationTest(inviewtype &inview_ , outviewtype &outview_) : _inview(inview_), _outview(outview_) {} + SumComputationTest(inviewtype &inview_, outviewtype &outview_) + : _inview(inview_), _outview(outview_) {} KOKKOS_INLINE_FUNCTION void operator()(const int i) const { for (unsigned j = 0; j < _inview.extent(1); ++j) { for (unsigned k = 0; k < _inview.extent(2); ++k) { - _outview(i) += _inview(i,j,k) ; + _outview(i) += _inview(i, j, k); } } } }; - }; - template -void test_dynrankview_op_perf( const int par_size ) -{ - +void test_dynrankview_op_perf(const int par_size) { typedef DeviceType execution_space; typedef typename execution_space::size_type size_type; const size_type dim_2 = 90; const size_type dim_3 = 30; - double elapsed_time_view = 0; - double elapsed_time_compview = 0; + double elapsed_time_view = 0; + double elapsed_time_compview = 0; double elapsed_time_strideview = 0; double elapsed_time_view_rank7 = 0; - double elapsed_time_drview = 0; + double elapsed_time_drview = 0; double elapsed_time_compdrview = 0; Kokkos::Timer timer; { - Kokkos::View testview("testview",par_size,dim_2,dim_3); + Kokkos::View testview("testview", par_size, dim_2, + dim_3); typedef InitViewFunctor FunctorType; timer.reset(); - Kokkos::RangePolicy policy(0,par_size); - Kokkos::parallel_for( policy , FunctorType(testview) ); + Kokkos::RangePolicy policy(0, par_size); + Kokkos::parallel_for(policy, FunctorType(testview)); DeviceType().fence(); elapsed_time_view = timer.seconds(); std::cout << " View time (init only): " << elapsed_time_view << std::endl; - timer.reset(); - Kokkos::View sumview("sumview",par_size); - Kokkos::parallel_for( policy , typename FunctorType::SumComputationTest(testview, sumview) ); + Kokkos::View sumview("sumview", par_size); + Kokkos::parallel_for( + policy, typename FunctorType::SumComputationTest(testview, sumview)); DeviceType().fence(); elapsed_time_compview = timer.seconds(); - std::cout << " View sum computation time: " << elapsed_time_view << std::endl; - + std::cout << " View sum computation time: " << elapsed_time_view + << std::endl; - Kokkos::View teststrideview = Kokkos::subview(testview, Kokkos::ALL, Kokkos::ALL,Kokkos::ALL); + Kokkos::View teststrideview = + Kokkos::subview(testview, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL); typedef InitStrideViewFunctor FunctorStrideType; timer.reset(); - Kokkos::parallel_for( policy , FunctorStrideType(teststrideview) ); + Kokkos::parallel_for(policy, FunctorStrideType(teststrideview)); DeviceType().fence(); elapsed_time_strideview = timer.seconds(); - std::cout << " Strided View time (init only): " << elapsed_time_strideview << std::endl; + std::cout << " Strided View time (init only): " << elapsed_time_strideview + << std::endl; } { - Kokkos::View testview("testview",par_size,dim_2,dim_3,1,1,1,1); + Kokkos::View testview("testview", par_size, + dim_2, dim_3, 1, 1, 1, 1); typedef InitViewRank7Functor FunctorType; timer.reset(); - Kokkos::RangePolicy policy(0,par_size); - Kokkos::parallel_for( policy , FunctorType(testview) ); + Kokkos::RangePolicy policy(0, par_size); + Kokkos::parallel_for(policy, FunctorType(testview)); DeviceType().fence(); elapsed_time_view_rank7 = timer.seconds(); - std::cout << " View Rank7 time (init only): " << elapsed_time_view_rank7 << std::endl; + std::cout << " View Rank7 time (init only): " << elapsed_time_view_rank7 + << std::endl; } { - Kokkos::DynRankView testdrview("testdrview",par_size,dim_2,dim_3); + Kokkos::DynRankView testdrview("testdrview", par_size, + dim_2, dim_3); typedef InitDynRankViewFunctor FunctorType; timer.reset(); - Kokkos::RangePolicy policy(0,par_size); - Kokkos::parallel_for( policy , FunctorType(testdrview) ); + Kokkos::RangePolicy policy(0, par_size); + Kokkos::parallel_for(policy, FunctorType(testdrview)); DeviceType().fence(); elapsed_time_drview = timer.seconds(); - std::cout << " DynRankView time (init only): " << elapsed_time_drview << std::endl; + std::cout << " DynRankView time (init only): " << elapsed_time_drview + << std::endl; timer.reset(); - Kokkos::DynRankView sumview("sumview",par_size); - Kokkos::parallel_for( policy , typename FunctorType::SumComputationTest(testdrview, sumview) ); + Kokkos::DynRankView sumview("sumview", par_size); + Kokkos::parallel_for( + policy, typename FunctorType::SumComputationTest(testdrview, sumview)); DeviceType().fence(); elapsed_time_compdrview = timer.seconds(); - std::cout << " DynRankView sum computation time: " << elapsed_time_compdrview << std::endl; - + std::cout << " DynRankView sum computation time: " + << elapsed_time_compdrview << std::endl; } - std::cout << " Ratio of View to DynRankView time: " << elapsed_time_view / elapsed_time_drview << std::endl; //expect < 1 - std::cout << " Ratio of View to DynRankView sum computation time: " << elapsed_time_compview / elapsed_time_compdrview << std::endl; //expect < 1 - std::cout << " Ratio of View to View Rank7 time: " << elapsed_time_view / elapsed_time_view_rank7 << std::endl; //expect < 1 - std::cout << " Ratio of StrideView to DynRankView time: " << elapsed_time_strideview / elapsed_time_drview << std::endl; //expect < 1 - std::cout << " Ratio of DynRankView to View Rank7 time: " << elapsed_time_drview / elapsed_time_view_rank7 << std::endl; //expect ? + std::cout << " Ratio of View to DynRankView time: " + << elapsed_time_view / elapsed_time_drview + << std::endl; // expect < 1 + std::cout << " Ratio of View to DynRankView sum computation time: " + << elapsed_time_compview / elapsed_time_compdrview + << std::endl; // expect < 1 + std::cout << " Ratio of View to View Rank7 time: " + << elapsed_time_view / elapsed_time_view_rank7 + << std::endl; // expect < 1 + std::cout << " Ratio of StrideView to DynRankView time: " + << elapsed_time_strideview / elapsed_time_drview + << std::endl; // expect < 1 + std::cout << " Ratio of DynRankView to View Rank7 time: " + << elapsed_time_drview / elapsed_time_view_rank7 + << std::endl; // expect ? timer.reset(); -} //end test_dynrankview - +} // end test_dynrankview -} //end Performance +} // namespace Performance #endif - diff --git a/containers/performance_tests/TestGlobal2LocalIds.hpp b/containers/performance_tests/TestGlobal2LocalIds.hpp index 98997b32394..58ce95c9f06 100644 --- a/containers/performance_tests/TestGlobal2LocalIds.hpp +++ b/containers/performance_tests/TestGlobal2LocalIds.hpp @@ -1,12 +1,12 @@ //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -35,7 +35,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// +// // ************************************************************************ //@HEADER @@ -54,153 +54,137 @@ namespace Performance { static const unsigned begin_id_size = 256u; -static const unsigned end_id_size = 1u << 22; -static const unsigned id_step = 2u; +static const unsigned end_id_size = 1u << 22; +static const unsigned id_step = 2u; -union helper -{ +union helper { uint32_t word; uint8_t byte[4]; }; - template -struct generate_ids -{ +struct generate_ids { typedef Device execution_space; typedef typename execution_space::size_type size_type; - typedef Kokkos::View local_id_view; + typedef Kokkos::View local_id_view; local_id_view local_2_global; - generate_ids( local_id_view & ids) - : local_2_global(ids) - { + generate_ids(local_id_view& ids) : local_2_global(ids) { Kokkos::parallel_for(local_2_global.extent(0), *this); } - KOKKOS_INLINE_FUNCTION - void operator()(size_type i) const - { - + void operator()(size_type i) const { helper x = {static_cast(i)}; // shuffle the bytes of i to create a unique, semi-random global_id x.word = ~x.word; uint8_t tmp = x.byte[3]; - x.byte[3] = x.byte[1]; - x.byte[1] = tmp; + x.byte[3] = x.byte[1]; + x.byte[1] = tmp; - tmp = x.byte[2]; + tmp = x.byte[2]; x.byte[2] = x.byte[0]; x.byte[0] = tmp; local_2_global[i] = x.word; } - }; template -struct fill_map -{ +struct fill_map { typedef Device execution_space; typedef typename execution_space::size_type size_type; - typedef Kokkos::View local_id_view; - typedef Kokkos::UnorderedMap global_id_view; + typedef Kokkos::View + local_id_view; + typedef Kokkos::UnorderedMap + global_id_view; global_id_view global_2_local; local_id_view local_2_global; - fill_map( global_id_view gIds, local_id_view lIds) - : global_2_local(gIds) , local_2_global(lIds) - { + fill_map(global_id_view gIds, local_id_view lIds) + : global_2_local(gIds), local_2_global(lIds) { Kokkos::parallel_for(local_2_global.extent(0), *this); } KOKKOS_INLINE_FUNCTION - void operator()(size_type i) const - { - global_2_local.insert( local_2_global[i], i); + void operator()(size_type i) const { + global_2_local.insert(local_2_global[i], i); } - }; template -struct find_test -{ +struct find_test { typedef Device execution_space; typedef typename execution_space::size_type size_type; - typedef Kokkos::View local_id_view; - typedef Kokkos::UnorderedMap global_id_view; + typedef Kokkos::View + local_id_view; + typedef Kokkos::UnorderedMap + global_id_view; global_id_view global_2_local; local_id_view local_2_global; typedef size_t value_type; - find_test( global_id_view gIds, local_id_view lIds, value_type & num_errors) - : global_2_local(gIds) , local_2_global(lIds) - { + find_test(global_id_view gIds, local_id_view lIds, value_type& num_errors) + : global_2_local(gIds), local_2_global(lIds) { Kokkos::parallel_reduce(local_2_global.extent(0), *this, num_errors); } KOKKOS_INLINE_FUNCTION - void init(value_type & v) const - { v = 0; } + void init(value_type& v) const { v = 0; } KOKKOS_INLINE_FUNCTION - void join(volatile value_type & dst, volatile value_type const & src) const - { dst += src; } + void join(volatile value_type& dst, volatile value_type const& src) const { + dst += src; + } KOKKOS_INLINE_FUNCTION - void operator()(size_type i, value_type & num_errors) const - { - uint32_t index = global_2_local.find( local_2_global[i] ); + void operator()(size_type i, value_type& num_errors) const { + uint32_t index = global_2_local.find(local_2_global[i]); - if ( global_2_local.value_at(index) != i) ++num_errors; + if (global_2_local.value_at(index) != i) ++num_errors; } - }; template -void test_global_to_local_ids(unsigned num_ids) -{ - +void test_global_to_local_ids(unsigned num_ids) { typedef Device execution_space; typedef typename execution_space::size_type size_type; - typedef Kokkos::View local_id_view; - typedef Kokkos::UnorderedMap global_id_view; + typedef Kokkos::View local_id_view; + typedef Kokkos::UnorderedMap + global_id_view; - //size + // size std::cout << num_ids << ", "; double elasped_time = 0; Kokkos::Timer timer; local_id_view local_2_global("local_ids", num_ids); - global_id_view global_2_local((3u*num_ids)/2u); + global_id_view global_2_local((3u * num_ids) / 2u); - //create + // create elasped_time = timer.seconds(); std::cout << elasped_time << ", "; timer.reset(); // generate unique ids - { - generate_ids gen(local_2_global); - } + { generate_ids gen(local_2_global); } Device().fence(); // generate elasped_time = timer.seconds(); std::cout << elasped_time << ", "; timer.reset(); - { - fill_map fill(global_2_local, local_2_global); - } + { fill_map fill(global_2_local, local_2_global); } Device().fence(); // fill @@ -208,11 +192,9 @@ void test_global_to_local_ids(unsigned num_ids) std::cout << elasped_time << ", "; timer.reset(); - size_t num_errors = 0; - for (int i=0; i<100; ++i) - { - find_test find(global_2_local, local_2_global,num_errors); + for (int i = 0; i < 100; ++i) { + find_test find(global_2_local, local_2_global, num_errors); } Device().fence(); @@ -220,12 +202,9 @@ void test_global_to_local_ids(unsigned num_ids) elasped_time = timer.seconds(); std::cout << elasped_time << std::endl; - ASSERT_EQ( num_errors, 0u); + ASSERT_EQ(num_errors, 0u); } +} // namespace Performance -} // namespace Performance - - -#endif //KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP - +#endif // KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP diff --git a/containers/performance_tests/TestHPX.cpp b/containers/performance_tests/TestHPX.cpp index 0f43377cee4..8037e926880 100644 --- a/containers/performance_tests/TestHPX.cpp +++ b/containers/performance_tests/TestHPX.cpp @@ -42,7 +42,7 @@ */ #include -#if defined( KOKKOS_ENABLE_HPX ) +#if defined(KOKKOS_ENABLE_HPX) #include @@ -61,70 +61,63 @@ #include #include - namespace Performance { class hpx : public ::testing::Test { -protected: - static void SetUpTestCase() - { + protected: + static void SetUpTestCase() { std::cout << std::setprecision(5) << std::scientific; Kokkos::initialize(); - Kokkos::print_configuration( std::cout ); + Kokkos::print_configuration(std::cout); } - static void TearDownTestCase() - { - Kokkos::finalize(); - } + static void TearDownTestCase() { Kokkos::finalize(); } }; -TEST_F( hpx, dynrankview_perf ) -{ +TEST_F(hpx, dynrankview_perf) { std::cout << "HPX" << std::endl; std::cout << " DynRankView vs View: Initialization Only " << std::endl; - test_dynrankview_op_perf( 8192 ); + test_dynrankview_op_perf(8192); } -TEST_F( hpx, global_2_local) -{ +TEST_F(hpx, global_2_local) { std::cout << "HPX" << std::endl; std::cout << "size, create, generate, fill, find" << std::endl; - for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + for (unsigned i = Performance::begin_id_size; i <= Performance::end_id_size; + i *= Performance::id_step) test_global_to_local_ids(i); } -TEST_F( hpx, unordered_map_performance_near) -{ +TEST_F(hpx, unordered_map_performance_near) { unsigned num_hpx = 4; std::ostringstream base_file_name; base_file_name << "hpx-" << num_hpx << "-near"; - Perf::run_performance_tests(base_file_name.str()); + Perf::run_performance_tests( + base_file_name.str()); } -TEST_F( hpx, unordered_map_performance_far) -{ +TEST_F(hpx, unordered_map_performance_far) { unsigned num_hpx = 4; std::ostringstream base_file_name; base_file_name << "hpx-" << num_hpx << "-far"; - Perf::run_performance_tests(base_file_name.str()); + Perf::run_performance_tests( + base_file_name.str()); } -TEST_F( hpx, scatter_view) -{ +TEST_F(hpx, scatter_view) { std::cout << "ScatterView data-duplicated test:\n"; Perf::test_scatter_view(10, 1000 * 1000); -//std::cout << "ScatterView atomics test:\n"; -//Perf::test_scatter_view(10, 1000 * 1000); + Kokkos::Experimental::ScatterDuplicated, + Kokkos::Experimental::ScatterNonAtomic>(10, + 1000 * 1000); + // std::cout << "ScatterView atomics test:\n"; + // Perf::test_scatter_view(10, 1000 * 1000); } -} // namespace test +} // namespace Performance #else void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTHPX_PREVENT_EMPTY_LINK_ERROR() {} #endif - diff --git a/containers/performance_tests/TestMain.cpp b/containers/performance_tests/TestMain.cpp index 217b01a57af..29ce085e3b4 100644 --- a/containers/performance_tests/TestMain.cpp +++ b/containers/performance_tests/TestMain.cpp @@ -47,7 +47,6 @@ #include int main(int argc, char *argv[]) { - ::testing::InitGoogleTest(&argc,argv); + ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } - diff --git a/containers/performance_tests/TestOpenMP.cpp b/containers/performance_tests/TestOpenMP.cpp index e6218074eaf..ffa10ebe3ea 100644 --- a/containers/performance_tests/TestOpenMP.cpp +++ b/containers/performance_tests/TestOpenMP.cpp @@ -42,7 +42,7 @@ */ #include -#if defined( KOKKOS_ENABLE_OPENMP ) +#if defined(KOKKOS_ENABLE_OPENMP) #include @@ -61,82 +61,72 @@ #include #include - namespace Performance { class openmp : public ::testing::Test { -protected: - static void SetUpTestCase() - { + protected: + static void SetUpTestCase() { std::cout << std::setprecision(5) << std::scientific; Kokkos::initialize(); - Kokkos::OpenMP::print_configuration( std::cout ); + Kokkos::OpenMP::print_configuration(std::cout); } - static void TearDownTestCase() - { - Kokkos::finalize(); - } + static void TearDownTestCase() { Kokkos::finalize(); } }; -TEST_F( openmp, dynrankview_perf ) -{ +TEST_F(openmp, dynrankview_perf) { std::cout << "OpenMP" << std::endl; std::cout << " DynRankView vs View: Initialization Only " << std::endl; - test_dynrankview_op_perf( 8192 ); + test_dynrankview_op_perf(8192); } -TEST_F( openmp, global_2_local) -{ +TEST_F(openmp, global_2_local) { std::cout << "OpenMP" << std::endl; std::cout << "size, create, generate, fill, find" << std::endl; - for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + for (unsigned i = Performance::begin_id_size; i <= Performance::end_id_size; + i *= Performance::id_step) test_global_to_local_ids(i); } -TEST_F( openmp, unordered_map_performance_near) -{ +TEST_F(openmp, unordered_map_performance_near) { unsigned num_openmp = 4; if (Kokkos::hwloc::available()) { num_openmp = Kokkos::hwloc::get_available_numa_count() * - Kokkos::hwloc::get_available_cores_per_numa() * - Kokkos::hwloc::get_available_threads_per_core(); - + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); } std::ostringstream base_file_name; base_file_name << "openmp-" << num_openmp << "-near"; - Perf::run_performance_tests(base_file_name.str()); + Perf::run_performance_tests(base_file_name.str()); } -TEST_F( openmp, unordered_map_performance_far) -{ +TEST_F(openmp, unordered_map_performance_far) { unsigned num_openmp = 4; if (Kokkos::hwloc::available()) { num_openmp = Kokkos::hwloc::get_available_numa_count() * - Kokkos::hwloc::get_available_cores_per_numa() * - Kokkos::hwloc::get_available_threads_per_core(); - + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); } std::ostringstream base_file_name; base_file_name << "openmp-" << num_openmp << "-far"; - Perf::run_performance_tests(base_file_name.str()); + Perf::run_performance_tests(base_file_name.str()); } -TEST_F( openmp, scatter_view) -{ +TEST_F(openmp, scatter_view) { std::cout << "ScatterView data-duplicated test:\n"; Perf::test_scatter_view(10, 1000 * 1000); -//std::cout << "ScatterView atomics test:\n"; -//Perf::test_scatter_view(10, 1000 * 1000); + Kokkos::Experimental::ScatterDuplicated, + Kokkos::Experimental::ScatterNonAtomic>(10, + 1000 * 1000); + // std::cout << "ScatterView atomics test:\n"; + // Perf::test_scatter_view(10, 1000 * 1000); } -} // namespace test +} // namespace Performance #else -void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTOPENMP_PREVENT_EMPTY_LINK_ERROR() {} +void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTOPENMP_PREVENT_EMPTY_LINK_ERROR() { +} #endif - diff --git a/containers/performance_tests/TestROCm.cpp b/containers/performance_tests/TestROCm.cpp index 3cf9f3bd143..dd27a958e8d 100644 --- a/containers/performance_tests/TestROCm.cpp +++ b/containers/performance_tests/TestROCm.cpp @@ -42,7 +42,7 @@ */ #include -#if defined( KOKKOS_ENABLE_ROCM ) +#if defined(KOKKOS_ENABLE_ROCM) #include #include @@ -66,15 +66,14 @@ namespace Performance { class rocm : public ::testing::Test { -protected: - static void SetUpTestCase() - { + protected: + static void SetUpTestCase() { std::cout << std::setprecision(5) << std::scientific; Kokkos::HostSpace::execution_space::initialize(); - Kokkos::Experimental::ROCm::initialize( Kokkos::Experimental::ROCm::SelectDevice(0) ); + Kokkos::Experimental::ROCm::initialize( + Kokkos::Experimental::ROCm::SelectDevice(0)); } - static void TearDownTestCase() - { + static void TearDownTestCase() { Kokkos::Experimental::ROCm::finalize(); Kokkos::HostSpace::execution_space::finalize(); } @@ -97,17 +96,15 @@ TEST_F( rocm, global_2_local) } #endif -TEST_F( rocm, unordered_map_performance_near) -{ - Perf::run_performance_tests("rocm-near"); +TEST_F(rocm, unordered_map_performance_near) { + Perf::run_performance_tests("rocm-near"); } -TEST_F( rocm, unordered_map_performance_far) -{ - Perf::run_performance_tests("rocm-far"); +TEST_F(rocm, unordered_map_performance_far) { + Perf::run_performance_tests("rocm-far"); } -} +} // namespace Performance #else void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTROCM_PREVENT_EMPTY_LINK_ERROR() {} -#endif /* #if defined( KOKKOS_ENABLE_ROCM ) */ +#endif /* #if defined( KOKKOS_ENABLE_ROCM ) */ diff --git a/containers/performance_tests/TestScatterView.hpp b/containers/performance_tests/TestScatterView.hpp index bd9121bb829..008367234d1 100644 --- a/containers/performance_tests/TestScatterView.hpp +++ b/containers/performance_tests/TestScatterView.hpp @@ -49,67 +49,68 @@ namespace Perf { -template -void test_scatter_view(int m, int n) -{ - Kokkos::View original_view("original_view", n); +template +void test_scatter_view(int m, int n) { + Kokkos::View original_view("original_view", + n); { - auto scatter_view = Kokkos::Experimental::create_scatter_view - < Kokkos::Experimental::ScatterSum - , duplication - , contribution - > (original_view); + auto scatter_view = Kokkos::Experimental::create_scatter_view< + Kokkos::Experimental::ScatterSum, duplication, contribution>( + original_view); Kokkos::Experimental::UniqueToken< - ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global> - unique_token{ExecSpace()}; - //auto internal_view = scatter_view.internal_view; + ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global> + unique_token{ExecSpace()}; + // auto internal_view = scatter_view.internal_view; auto policy = Kokkos::RangePolicy(0, n); for (int foo = 0; foo < 5; ++foo) { - { - auto num_threads = unique_token.size(); - std::cout << "num_threads " << num_threads << '\n'; - Kokkos::View hand_coded_duplicate_view("hand_coded_duplicate", num_threads, n); - auto f2 = KOKKOS_LAMBDA(int i) { - auto thread_id = unique_token.acquire(); - for (int j = 0; j < 10; ++j) { - auto k = (i + j) % n; - hand_coded_duplicate_view(thread_id, k, 0) += 4.2; - hand_coded_duplicate_view(thread_id, k, 1) += 2.0; - hand_coded_duplicate_view(thread_id, k, 2) += 1.0; + { + auto num_threads = unique_token.size(); + std::cout << "num_threads " << num_threads << '\n'; + Kokkos::View + hand_coded_duplicate_view("hand_coded_duplicate", num_threads, n); + auto f2 = KOKKOS_LAMBDA(int i) { + auto thread_id = unique_token.acquire(); + for (int j = 0; j < 10; ++j) { + auto k = (i + j) % n; + hand_coded_duplicate_view(thread_id, k, 0) += 4.2; + hand_coded_duplicate_view(thread_id, k, 1) += 2.0; + hand_coded_duplicate_view(thread_id, k, 2) += 1.0; + } + }; + Kokkos::Timer timer; + timer.reset(); + for (int k = 0; k < m; ++k) { + Kokkos::parallel_for(policy, f2, + "hand_coded_duplicate_scatter_view_test"); } - }; - Kokkos::Timer timer; - timer.reset(); - for (int k = 0; k < m; ++k) { - Kokkos::parallel_for(policy, f2, "hand_coded_duplicate_scatter_view_test"); + Kokkos::fence(); + auto t = timer.seconds(); + std::cout << "hand-coded test took " << t << " seconds\n"; } - Kokkos::fence(); - auto t = timer.seconds(); - std::cout << "hand-coded test took " << t << " seconds\n"; - } - { - auto f = KOKKOS_LAMBDA(int i) { - auto scatter_access = scatter_view.access(); - for (int j = 0; j < 10; ++j) { - auto k = (i + j) % n; - scatter_access(k, 0) += 4.2; - scatter_access(k, 1) += 2.0; - scatter_access(k, 2) += 1.0; + { + auto f = KOKKOS_LAMBDA(int i) { + auto scatter_access = scatter_view.access(); + for (int j = 0; j < 10; ++j) { + auto k = (i + j) % n; + scatter_access(k, 0) += 4.2; + scatter_access(k, 1) += 2.0; + scatter_access(k, 2) += 1.0; + } + }; + Kokkos::Timer timer; + timer.reset(); + for (int k = 0; k < m; ++k) { + Kokkos::parallel_for(policy, f, "scatter_view_test"); } - }; - Kokkos::Timer timer; - timer.reset(); - for (int k = 0; k < m; ++k) { - Kokkos::parallel_for(policy, f, "scatter_view_test"); + Kokkos::fence(); + auto t = timer.seconds(); + std::cout << "test took " << t << " seconds\n"; } - Kokkos::fence(); - auto t = timer.seconds(); - std::cout << "test took " << t << " seconds\n"; } } - } } -} +} // namespace Perf #endif diff --git a/containers/performance_tests/TestThreads.cpp b/containers/performance_tests/TestThreads.cpp index 6a02e67b252..9cebb07b49f 100644 --- a/containers/performance_tests/TestThreads.cpp +++ b/containers/performance_tests/TestThreads.cpp @@ -42,7 +42,7 @@ */ #include -#if defined( KOKKOS_ENABLE_THREADS ) +#if defined(KOKKOS_ENABLE_THREADS) #include @@ -65,9 +65,8 @@ namespace Performance { class threads : public ::testing::Test { -protected: - static void SetUpTestCase() - { + protected: + static void SetUpTestCase() { std::cout << std::setprecision(5) << std::scientific; unsigned num_threads = 4; @@ -76,66 +75,57 @@ class threads : public ::testing::Test { num_threads = Kokkos::hwloc::get_available_numa_count() * Kokkos::hwloc::get_available_cores_per_numa() * Kokkos::hwloc::get_available_threads_per_core(); - } std::cout << "Threads: " << num_threads << std::endl; - Kokkos::initialize( Kokkos::InitArguments(num_threads) ); + Kokkos::initialize(Kokkos::InitArguments(num_threads)); } - static void TearDownTestCase() - { - Kokkos::finalize(); - } + static void TearDownTestCase() { Kokkos::finalize(); } }; -TEST_F( threads, dynrankview_perf ) -{ +TEST_F(threads, dynrankview_perf) { std::cout << "Threads" << std::endl; std::cout << " DynRankView vs View: Initialization Only " << std::endl; - test_dynrankview_op_perf( 8192 ); + test_dynrankview_op_perf(8192); } -TEST_F( threads, global_2_local) -{ +TEST_F(threads, global_2_local) { std::cout << "Threads" << std::endl; std::cout << "size, create, generate, fill, find" << std::endl; - for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + for (unsigned i = Performance::begin_id_size; i <= Performance::end_id_size; + i *= Performance::id_step) test_global_to_local_ids(i); } -TEST_F( threads, unordered_map_performance_near) -{ +TEST_F(threads, unordered_map_performance_near) { unsigned num_threads = 4; if (Kokkos::hwloc::available()) { num_threads = Kokkos::hwloc::get_available_numa_count() * Kokkos::hwloc::get_available_cores_per_numa() * Kokkos::hwloc::get_available_threads_per_core(); - } std::ostringstream base_file_name; base_file_name << "threads-" << num_threads << "-near"; - Perf::run_performance_tests(base_file_name.str()); + Perf::run_performance_tests(base_file_name.str()); } -TEST_F( threads, unordered_map_performance_far) -{ +TEST_F(threads, unordered_map_performance_far) { unsigned num_threads = 4; if (Kokkos::hwloc::available()) { num_threads = Kokkos::hwloc::get_available_numa_count() * Kokkos::hwloc::get_available_cores_per_numa() * Kokkos::hwloc::get_available_threads_per_core(); - } std::ostringstream base_file_name; base_file_name << "threads-" << num_threads << "-far"; - Perf::run_performance_tests(base_file_name.str()); + Perf::run_performance_tests(base_file_name.str()); } -} // namespace Performance +} // namespace Performance #else -void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTTHREADS_PREVENT_EMPTY_LINK_ERROR() {} +void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTTHREADS_PREVENT_EMPTY_LINK_ERROR() { +} #endif - diff --git a/containers/performance_tests/TestUnorderedMapPerformance.hpp b/containers/performance_tests/TestUnorderedMapPerformance.hpp index 8d09281ed36..078fcb22364 100644 --- a/containers/performance_tests/TestUnorderedMapPerformance.hpp +++ b/containers/performance_tests/TestUnorderedMapPerformance.hpp @@ -50,12 +50,10 @@ #include #include - namespace Perf { template -struct UnorderedMapTest -{ +struct UnorderedMapTest { typedef Device execution_space; typedef Kokkos::UnorderedMap map_type; typedef typename map_type::histogram_type histogram_type; @@ -68,22 +66,22 @@ struct UnorderedMapTest uint32_t capacity; uint32_t inserts; uint32_t collisions; - double seconds; + double seconds; map_type map; histogram_type histogram; - UnorderedMapTest( uint32_t arg_capacity, uint32_t arg_inserts, uint32_t arg_collisions) - : capacity(arg_capacity) - , inserts(arg_inserts) - , collisions(arg_collisions) - , seconds(0) - , map(capacity) - , histogram(map.get_histogram()) - { - Kokkos::Timer wall_clock ; + UnorderedMapTest(uint32_t arg_capacity, uint32_t arg_inserts, + uint32_t arg_collisions) + : capacity(arg_capacity), + inserts(arg_inserts), + collisions(arg_collisions), + seconds(0), + map(capacity), + histogram(map.get_histogram()) { + Kokkos::Timer wall_clock; wall_clock.reset(); - value_type v = {}; + value_type v = {}; int loop_count = 0; do { ++loop_count; @@ -92,81 +90,79 @@ struct UnorderedMapTest Kokkos::parallel_reduce(inserts, *this, v); if (v.failed_count > 0u) { - const uint32_t new_capacity = map.capacity() + ((map.capacity()*3ull)/20u) + v.failed_count/collisions ; - map.rehash( new_capacity ); + const uint32_t new_capacity = map.capacity() + + ((map.capacity() * 3ull) / 20u) + + v.failed_count / collisions; + map.rehash(new_capacity); } } while (v.failed_count > 0u); seconds = wall_clock.seconds(); - switch (loop_count) - { - case 1u: std::cout << " \033[0;32m" << loop_count << "\033[0m "; break; - case 2u: std::cout << " \033[1;31m" << loop_count << "\033[0m "; break; - default: std::cout << " \033[0;31m" << loop_count << "\033[0m "; break; + switch (loop_count) { + case 1u: std::cout << " \033[0;32m" << loop_count << "\033[0m "; break; + case 2u: std::cout << " \033[1;31m" << loop_count << "\033[0m "; break; + default: std::cout << " \033[0;31m" << loop_count << "\033[0m "; break; } - std::cout << std::setprecision(2) << std::fixed << std::setw(5) << (1e9*(seconds/(inserts))) << "; " << std::flush; + std::cout << std::setprecision(2) << std::fixed << std::setw(5) + << (1e9 * (seconds / (inserts))) << "; " << std::flush; histogram.calculate(); Device().fence(); } - void print(std::ostream & metrics_out, std::ostream & length_out, std::ostream & distance_out, std::ostream & block_distance_out) - { + void print(std::ostream& metrics_out, std::ostream& length_out, + std::ostream& distance_out, std::ostream& block_distance_out) { metrics_out << map.capacity() << " , "; - metrics_out << inserts/collisions << " , "; - metrics_out << (100.0 * inserts/collisions) / map.capacity() << " , "; + metrics_out << inserts / collisions << " , "; + metrics_out << (100.0 * inserts / collisions) / map.capacity() << " , "; metrics_out << inserts << " , "; metrics_out << (map.failed_insert() ? "true" : "false") << " , "; metrics_out << collisions << " , "; - metrics_out << 1e9*(seconds/inserts) << " , "; + metrics_out << 1e9 * (seconds / inserts) << " , "; metrics_out << seconds << std::endl; length_out << map.capacity() << " , "; - length_out << ((100.0 *inserts/collisions) / map.capacity()) << " , "; + length_out << ((100.0 * inserts / collisions) / map.capacity()) << " , "; length_out << collisions << " , "; histogram.print_length(length_out); distance_out << map.capacity() << " , "; - distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , "; + distance_out << ((100.0 * inserts / collisions) / map.capacity()) << " , "; distance_out << collisions << " , "; histogram.print_distance(distance_out); block_distance_out << map.capacity() << " , "; - block_distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , "; + block_distance_out << ((100.0 * inserts / collisions) / map.capacity()) + << " , "; block_distance_out << collisions << " , "; histogram.print_block_distance(block_distance_out); } - KOKKOS_INLINE_FUNCTION - void init( value_type & v ) const - { + void init(value_type& v) const { v.failed_count = 0; - v.max_list = 0; + v.max_list = 0; } KOKKOS_INLINE_FUNCTION - void join( volatile value_type & dst, const volatile value_type & src ) const - { + void join(volatile value_type& dst, const volatile value_type& src) const { dst.failed_count += src.failed_count; dst.max_list = src.max_list < dst.max_list ? dst.max_list : src.max_list; } KOKKOS_INLINE_FUNCTION - void operator()(uint32_t i, value_type & v) const - { - const uint32_t key = Near ? i/collisions : i%(inserts/collisions); - typename map_type::insert_result result = map.insert(key,i); + void operator()(uint32_t i, value_type& v) const { + const uint32_t key = Near ? i / collisions : i % (inserts / collisions); + typename map_type::insert_result result = map.insert(key, i); v.failed_count += !result.failed() ? 0 : 1; - v.max_list = result.list_position() < v.max_list ? v.max_list : result.list_position(); + v.max_list = result.list_position() < v.max_list ? v.max_list + : result.list_position(); } - }; template -void run_performance_tests(std::string const & base_file_name) -{ +void run_performance_tests(std::string const& base_file_name) { #if 0 std::string metrics_file_name = base_file_name + std::string("-metrics.csv"); std::string length_file_name = base_file_name + std::string("-length.csv"); @@ -254,7 +250,6 @@ void run_performance_tests(std::string const & base_file_name) #endif } +} // namespace Perf -} // namespace Perf - -#endif //KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP +#endif // KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP diff --git a/containers/src/Kokkos_Bitset.hpp b/containers/src/Kokkos_Bitset.hpp index 4d78430fc63..e86ac102cb3 100644 --- a/containers/src/Kokkos_Bitset.hpp +++ b/containers/src/Kokkos_Bitset.hpp @@ -53,27 +53,25 @@ namespace Kokkos { -template +template class Bitset; -template +template class ConstBitset; template -void deep_copy( Bitset & dst, Bitset const& src); +void deep_copy(Bitset& dst, Bitset const& src); template -void deep_copy( Bitset & dst, ConstBitset const& src); +void deep_copy(Bitset& dst, ConstBitset const& src); template -void deep_copy( ConstBitset & dst, ConstBitset const& src); - +void deep_copy(ConstBitset& dst, ConstBitset const& src); /// A thread safe view to a bitset template -class Bitset -{ -public: +class Bitset { + public: typedef Device execution_space; typedef unsigned size_type; @@ -81,98 +79,88 @@ class Bitset enum { MOVE_HINT_BACKWARD = 2u }; enum { - BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u - , BIT_SCAN_REVERSE_MOVE_HINT_FORWARD = BIT_SCAN_REVERSE - , BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD = MOVE_HINT_BACKWARD - , BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD = BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD + BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u, + BIT_SCAN_REVERSE_MOVE_HINT_FORWARD = BIT_SCAN_REVERSE, + BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD = MOVE_HINT_BACKWARD, + BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD = BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD }; -private: - enum { block_size = static_cast(sizeof(unsigned)*CHAR_BIT) }; - enum { block_mask = block_size-1u }; + private: + enum { block_size = static_cast(sizeof(unsigned) * CHAR_BIT) }; + enum { block_mask = block_size - 1u }; enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) }; -public: - - + public: /// constructor /// arg_size := number of bit in set Bitset(unsigned arg_size = 0u) - : m_size(arg_size) - , m_last_block_mask(0u) - , m_blocks("Bitset", ((m_size + block_mask) >> block_shift) ) - { - for (int i=0, end = static_cast(m_size & block_mask); i < end; ++i) { + : m_size(arg_size), + m_last_block_mask(0u), + m_blocks("Bitset", ((m_size + block_mask) >> block_shift)) { + for (int i = 0, end = static_cast(m_size & block_mask); i < end; ++i) { m_last_block_mask |= 1u << i; } } KOKKOS_INLINE_FUNCTION - Bitset (const Bitset&) = default; + Bitset(const Bitset&) = default; KOKKOS_INLINE_FUNCTION - Bitset& operator= (const Bitset&) = default; + Bitset& operator=(const Bitset&) = default; KOKKOS_INLINE_FUNCTION - Bitset (Bitset&&) = default; + Bitset(Bitset&&) = default; KOKKOS_INLINE_FUNCTION - Bitset& operator= (Bitset&&) = default; - + Bitset& operator=(Bitset&&) = default; + KOKKOS_INLINE_FUNCTION - ~Bitset () = default; + ~Bitset() = default; /// number of bits in the set /// can be call from the host or the device KOKKOS_FORCEINLINE_FUNCTION - unsigned size() const - { return m_size; } + unsigned size() const { return m_size; } /// number of bits which are set to 1 /// can only be called from the host - unsigned count() const - { - Impl::BitsetCount< Bitset > f(*this); + unsigned count() const { + Impl::BitsetCount > f(*this); return f.apply(); } /// set all bits to 1 /// can only be called from the host - void set() - { - Kokkos::deep_copy(m_blocks, ~0u ); + void set() { + Kokkos::deep_copy(m_blocks, ~0u); if (m_last_block_mask) { - //clear the unused bits in the last block - typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy; - raw_deep_copy( m_blocks.data() + (m_blocks.extent(0) -1u), &m_last_block_mask, sizeof(unsigned)); + // clear the unused bits in the last block + typedef Kokkos::Impl::DeepCopy + raw_deep_copy; + raw_deep_copy(m_blocks.data() + (m_blocks.extent(0) - 1u), + &m_last_block_mask, sizeof(unsigned)); } } /// set all bits to 0 /// can only be called from the host - void reset() - { - Kokkos::deep_copy(m_blocks, 0u ); - } + void reset() { Kokkos::deep_copy(m_blocks, 0u); } /// set all bits to 0 /// can only be called from the host - void clear() - { - Kokkos::deep_copy(m_blocks, 0u ); - } + void clear() { Kokkos::deep_copy(m_blocks, 0u); } /// set i'th bit to 1 /// can only be called from the device KOKKOS_FORCEINLINE_FUNCTION - bool set( unsigned i ) const - { - if ( i < m_size ) { - unsigned * block_ptr = &m_blocks[ i >> block_shift ]; - const unsigned mask = 1u << static_cast( i & block_mask ); + bool set(unsigned i) const { + if (i < m_size) { + unsigned* block_ptr = &m_blocks[i >> block_shift]; + const unsigned mask = 1u << static_cast(i & block_mask); - return !( atomic_fetch_or( block_ptr, mask ) & mask ); + return !(atomic_fetch_or(block_ptr, mask) & mask); } return false; } @@ -180,13 +168,12 @@ class Bitset /// set i'th bit to 0 /// can only be called from the device KOKKOS_FORCEINLINE_FUNCTION - bool reset( unsigned i ) const - { - if ( i < m_size ) { - unsigned * block_ptr = &m_blocks[ i >> block_shift ]; - const unsigned mask = 1u << static_cast( i & block_mask ); + bool reset(unsigned i) const { + if (i < m_size) { + unsigned* block_ptr = &m_blocks[i >> block_shift]; + const unsigned mask = 1u << static_cast(i & block_mask); - return atomic_fetch_and( block_ptr, ~mask ) & mask; + return atomic_fetch_and(block_ptr, ~mask) & mask; } return false; } @@ -194,11 +181,10 @@ class Bitset /// return true if the i'th bit set to 1 /// can only be called from the device KOKKOS_FORCEINLINE_FUNCTION - bool test( unsigned i ) const - { - if ( i < m_size ) { - const unsigned block = volatile_load(&m_blocks[ i >> block_shift ]); - const unsigned mask = 1u << static_cast( i & block_mask ); + bool test(unsigned i) const { + if (i < m_size) { + const unsigned block = volatile_load(&m_blocks[i >> block_shift]); + const unsigned mask = 1u << static_cast(i & block_mask); return block & mask; } return false; @@ -208,90 +194,93 @@ class Bitset /// returns the max number of times those functions should be call /// when searching for an available bit KOKKOS_FORCEINLINE_FUNCTION - unsigned max_hint() const - { - return m_blocks.extent(0); - } + unsigned max_hint() const { return m_blocks.extent(0); } /// find a bit set to 1 near the hint - /// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found - /// and if result.first is false the result.second is a new hint + /// returns a pair< bool, unsigned> where if result.first is true then + /// result.second is the bit found and if result.first is false the + /// result.second is a new hint KOKKOS_INLINE_FUNCTION - Kokkos::pair find_any_set_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const - { - const unsigned block_idx = (hint >> block_shift) < m_blocks.extent(0) ? (hint >> block_shift) : 0; + Kokkos::pair find_any_set_near( + unsigned hint, + unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD) const { + const unsigned block_idx = + (hint >> block_shift) < m_blocks.extent(0) ? (hint >> block_shift) : 0; const unsigned offset = hint & block_mask; - unsigned block = volatile_load(&m_blocks[ block_idx ]); - block = !m_last_block_mask || (block_idx < (m_blocks.extent(0)-1)) ? block : block & m_last_block_mask ; + unsigned block = volatile_load(&m_blocks[block_idx]); + block = !m_last_block_mask || (block_idx < (m_blocks.extent(0) - 1)) + ? block + : block & m_last_block_mask; return find_any_helper(block_idx, offset, block, scan_direction); } /// find a bit set to 0 near the hint - /// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found - /// and if result.first is false the result.second is a new hint + /// returns a pair< bool, unsigned> where if result.first is true then + /// result.second is the bit found and if result.first is false the + /// result.second is a new hint KOKKOS_INLINE_FUNCTION - Kokkos::pair find_any_unset_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const - { + Kokkos::pair find_any_unset_near( + unsigned hint, + unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD) const { const unsigned block_idx = hint >> block_shift; - const unsigned offset = hint & block_mask; - unsigned block = volatile_load(&m_blocks[ block_idx ]); - block = !m_last_block_mask || (block_idx < (m_blocks.extent(0)-1) ) ? ~block : ~block & m_last_block_mask ; + const unsigned offset = hint & block_mask; + unsigned block = volatile_load(&m_blocks[block_idx]); + block = !m_last_block_mask || (block_idx < (m_blocks.extent(0) - 1)) + ? ~block + : ~block & m_last_block_mask; return find_any_helper(block_idx, offset, block, scan_direction); } -private: - + private: KOKKOS_FORCEINLINE_FUNCTION - Kokkos::pair find_any_helper(unsigned block_idx, unsigned offset, unsigned block, unsigned scan_direction) const - { - Kokkos::pair result( block > 0u, 0); + Kokkos::pair find_any_helper(unsigned block_idx, + unsigned offset, unsigned block, + unsigned scan_direction) const { + Kokkos::pair result(block > 0u, 0); if (!result.first) { - result.second = update_hint( block_idx, offset, scan_direction ); - } - else { - result.second = scan_block( (block_idx << block_shift) - , offset - , block - , scan_direction - ); + result.second = update_hint(block_idx, offset, scan_direction); + } else { + result.second = + scan_block((block_idx << block_shift), offset, block, scan_direction); } return result; } - KOKKOS_FORCEINLINE_FUNCTION - unsigned scan_block(unsigned block_start, int offset, unsigned block, unsigned scan_direction ) const - { - offset = !(scan_direction & BIT_SCAN_REVERSE) ? offset : (offset + block_mask) & block_mask; + unsigned scan_block(unsigned block_start, int offset, unsigned block, + unsigned scan_direction) const { + offset = !(scan_direction & BIT_SCAN_REVERSE) + ? offset + : (offset + block_mask) & block_mask; block = Impl::rotate_right(block, offset); - return ((( !(scan_direction & BIT_SCAN_REVERSE) ? - Impl::bit_scan_forward(block) : - ::Kokkos::log2(block) - ) + offset - ) & block_mask - ) + block_start; + return (((!(scan_direction & BIT_SCAN_REVERSE) + ? Impl::bit_scan_forward(block) + : ::Kokkos::log2(block)) + + offset) & + block_mask) + + block_start; } KOKKOS_FORCEINLINE_FUNCTION - unsigned update_hint( long long block_idx, unsigned offset, unsigned scan_direction ) const - { + unsigned update_hint(long long block_idx, unsigned offset, + unsigned scan_direction) const { block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1; block_idx = block_idx >= 0 ? block_idx : m_blocks.extent(0) - 1; - block_idx = block_idx < static_cast(m_blocks.extent(0)) ? block_idx : 0; + block_idx = + block_idx < static_cast(m_blocks.extent(0)) ? block_idx : 0; - return static_cast(block_idx)*block_size + offset; + return static_cast(block_idx) * block_size + offset; } -private: - + private: unsigned m_size; unsigned m_last_block_mask; - View< unsigned *, execution_space, MemoryTraits > m_blocks; + View > m_blocks; -private: + private: template friend class Bitset; @@ -302,87 +291,72 @@ class Bitset friend struct Impl::BitsetCount; template - friend void deep_copy( Bitset & dst, Bitset const& src); + friend void deep_copy(Bitset& dst, Bitset const& src); template - friend void deep_copy( Bitset & dst, ConstBitset const& src); + friend void deep_copy(Bitset& dst, + ConstBitset const& src); }; /// a thread-safe view to a const bitset /// i.e. can only test bits template -class ConstBitset -{ -public: +class ConstBitset { + public: typedef Device execution_space; typedef unsigned size_type; -private: - enum { block_size = static_cast(sizeof(unsigned)*CHAR_BIT) }; - enum { block_mask = block_size -1u }; + private: + enum { block_size = static_cast(sizeof(unsigned) * CHAR_BIT) }; + enum { block_mask = block_size - 1u }; enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) }; -public: - ConstBitset() - : m_size (0) - {} + public: + ConstBitset() : m_size(0) {} ConstBitset(Bitset const& rhs) - : m_size(rhs.m_size) - , m_blocks(rhs.m_blocks) - {} + : m_size(rhs.m_size), m_blocks(rhs.m_blocks) {} ConstBitset(ConstBitset const& rhs) - : m_size( rhs.m_size ) - , m_blocks( rhs.m_blocks ) - {} + : m_size(rhs.m_size), m_blocks(rhs.m_blocks) {} - ConstBitset & operator = (Bitset const & rhs) - { - this->m_size = rhs.m_size; + ConstBitset& operator=(Bitset const& rhs) { + this->m_size = rhs.m_size; this->m_blocks = rhs.m_blocks; return *this; } - ConstBitset & operator = (ConstBitset const & rhs) - { - this->m_size = rhs.m_size; + ConstBitset& operator=(ConstBitset const& rhs) { + this->m_size = rhs.m_size; this->m_blocks = rhs.m_blocks; return *this; } - KOKKOS_FORCEINLINE_FUNCTION - unsigned size() const - { - return m_size; - } + unsigned size() const { return m_size; } - unsigned count() const - { - Impl::BitsetCount< ConstBitset > f(*this); + unsigned count() const { + Impl::BitsetCount > f(*this); return f.apply(); } KOKKOS_FORCEINLINE_FUNCTION - bool test( unsigned i ) const - { - if ( i < m_size ) { - const unsigned block = m_blocks[ i >> block_shift ]; - const unsigned mask = 1u << static_cast( i & block_mask ); + bool test(unsigned i) const { + if (i < m_size) { + const unsigned block = m_blocks[i >> block_shift]; + const unsigned mask = 1u << static_cast(i & block_mask); return block & mask; } return false; } -private: - + private: unsigned m_size; - View< const unsigned *, execution_space, MemoryTraits > m_blocks; + View > m_blocks; -private: + private: template friend class ConstBitset; @@ -390,47 +364,56 @@ class ConstBitset friend struct Impl::BitsetCount; template - friend void deep_copy( Bitset & dst, ConstBitset const& src); + friend void deep_copy(Bitset& dst, + ConstBitset const& src); template - friend void deep_copy( ConstBitset & dst, ConstBitset const& src); + friend void deep_copy(ConstBitset& dst, + ConstBitset const& src); }; - template -void deep_copy( Bitset & dst, Bitset const& src) -{ +void deep_copy(Bitset& dst, Bitset const& src) { if (dst.size() != src.size()) { - throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!"); + throw std::runtime_error( + "Error: Cannot deep_copy bitsets of different sizes!"); } - typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy; - raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), sizeof(unsigned)*src.m_blocks.extent(0)); + typedef Kokkos::Impl::DeepCopy + raw_deep_copy; + raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), + sizeof(unsigned) * src.m_blocks.extent(0)); } template -void deep_copy( Bitset & dst, ConstBitset const& src) -{ +void deep_copy(Bitset& dst, ConstBitset const& src) { if (dst.size() != src.size()) { - throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!"); + throw std::runtime_error( + "Error: Cannot deep_copy bitsets of different sizes!"); } - typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy; - raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), sizeof(unsigned)*src.m_blocks.extent(0)); + typedef Kokkos::Impl::DeepCopy + raw_deep_copy; + raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), + sizeof(unsigned) * src.m_blocks.extent(0)); } template -void deep_copy( ConstBitset & dst, ConstBitset const& src) -{ +void deep_copy(ConstBitset& dst, ConstBitset const& src) { if (dst.size() != src.size()) { - throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!"); + throw std::runtime_error( + "Error: Cannot deep_copy bitsets of different sizes!"); } - typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy; - raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), sizeof(unsigned)*src.m_blocks.extent(0)); + typedef Kokkos::Impl::DeepCopy + raw_deep_copy; + raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), + sizeof(unsigned) * src.m_blocks.extent(0)); } -} // namespace Kokkos - -#endif //KOKKOS_BITSET_HPP +} // namespace Kokkos +#endif // KOKKOS_BITSET_HPP diff --git a/containers/src/Kokkos_DualView.hpp b/containers/src/Kokkos_DualView.hpp index d9b14d67a2f..5df222c7300 100644 --- a/containers/src/Kokkos_DualView.hpp +++ b/containers/src/Kokkos_DualView.hpp @@ -90,47 +90,41 @@ namespace Kokkos { * behavior. Please see the documentation of Kokkos::View for * examples. The default suffices for most users. */ -template< class DataType , - class Arg1Type = void , - class Arg2Type = void , +template -class DualView : public ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > -{ -template< class , class , class , class > friend class DualView ; -public: +class DualView : public ViewTraits { + template + friend class DualView; + + public: //! \name Typedefs for device types and various Kokkos::View specializations. //@{ - typedef ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ; + typedef ViewTraits traits; //! The Kokkos Host Device type; - typedef typename traits::host_mirror_space host_mirror_space ; + typedef typename traits::host_mirror_space host_mirror_space; //! The type of a Kokkos::View on the device. - typedef View< typename traits::data_type , - Arg1Type , - Arg2Type , - Arg3Type > t_dev ; + typedef View t_dev; /// \typedef t_host /// \brief The type of a Kokkos::View host mirror of \c t_dev. - typedef typename t_dev::HostMirror t_host ; + typedef typename t_dev::HostMirror t_host; //! The type of a const View on the device. //! The type of a Kokkos::View on the device. - typedef View< typename traits::const_data_type , - Arg1Type , - Arg2Type , - Arg3Type > t_dev_const ; + typedef View + t_dev_const; /// \typedef t_host_const /// \brief The type of a const View host mirror of \c t_dev_const. typedef typename t_dev_const::HostMirror t_host_const; //! The type of a const, random-access View on the device. - typedef View< typename traits::const_data_type , - typename traits::array_layout , - typename traits::device_type , - Kokkos::MemoryTraits > t_dev_const_randomread ; + typedef View > + t_dev_const_randomread; /// \typedef t_host_const_randomread /// \brief The type of a const, random-access View host mirror of @@ -138,39 +132,36 @@ template< class , class , class , class > friend class DualView ; typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread; //! The type of an unmanaged View on the device. - typedef View< typename traits::data_type , - typename traits::array_layout , - typename traits::device_type , - MemoryUnmanaged> t_dev_um; + typedef View + t_dev_um; //! The type of an unmanaged View host mirror of \c t_dev_um. - typedef View< typename t_host::data_type , - typename t_host::array_layout , - typename t_host::device_type , - MemoryUnmanaged> t_host_um; + typedef View + t_host_um; //! The type of a const unmanaged View on the device. - typedef View< typename traits::const_data_type , - typename traits::array_layout , - typename traits::device_type , - MemoryUnmanaged> t_dev_const_um; + typedef View + t_dev_const_um; //! The type of a const unmanaged View host mirror of \c t_dev_const_um. - typedef View t_host_const_um; + typedef View + t_host_const_um; //! The type of a const, random-access View on the device. - typedef View< typename t_host::const_data_type , - typename t_host::array_layout , - typename t_host::device_type , - Kokkos::MemoryTraits > t_dev_const_randomread_um ; + typedef View > + t_dev_const_randomread_um; /// \typedef t_host_const_randomread /// \brief The type of a const, random-access View host mirror of /// \c t_dev_const_randomread. - typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread_um; + typedef + typename t_dev_const_randomread::HostMirror t_host_const_randomread_um; //@} //! \name The two View instances. @@ -184,18 +175,20 @@ template< class , class , class , class > friend class DualView ; //@{ #ifndef KOKKOS_ENABLE_DEPRECATED_CODE -protected: + protected: // modified_flags[0] -> host // modified_flags[1] -> device - typedef View t_modified_flags; + typedef View t_modified_flags; t_modified_flags modified_flags; -public: + public: #else - typedef View t_modified_flags; - typedef View t_modified_flag; + typedef View + t_modified_flags; + typedef View + t_modified_flag; t_modified_flags modified_flags; - t_modified_flag modified_host,modified_device; + t_modified_flag modified_host, modified_device; #endif //@} @@ -208,11 +201,11 @@ template< class , class , class , class > friend class DualView ; /// default constructors. The "modified" flags are both initialized /// to "unmodified." #ifndef KOKKOS_ENABLE_DEPRECATED_CODE - DualView () = default; + DualView() = default; #else - DualView ():modified_flags (t_modified_flags("DualView::modified_flags")) { - modified_host = t_modified_flag(modified_flags,0); - modified_device = t_modified_flag(modified_flags,1); + DualView() : modified_flags(t_modified_flags("DualView::modified_flags")) { + modified_host = t_modified_flag(modified_flags, 0); + modified_device = t_modified_flag(modified_flags, 1); } #endif @@ -225,52 +218,52 @@ template< class , class , class , class > friend class DualView ; /// View objects. For example, if the View has three dimensions, /// the first three integer arguments will be nonzero, and you may /// omit the integer arguments that follow. - DualView (const std::string& label, - const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, - const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) - : d_view (label, n0, n1, n2, n3, n4, n5, n6, n7) - , h_view (create_mirror_view (d_view)) // without UVM, host View mirrors - , modified_flags (t_modified_flags("DualView::modified_flags")) - { + DualView(const std::string& label, + const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) + : d_view(label, n0, n1, n2, n3, n4, n5, n6, n7), + h_view(create_mirror_view(d_view)) // without UVM, host View mirrors + , + modified_flags(t_modified_flags("DualView::modified_flags")) { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - modified_host = t_modified_flag(modified_flags,0); - modified_device = t_modified_flag(modified_flags,1); + modified_host = t_modified_flag(modified_flags, 0); + modified_device = t_modified_flag(modified_flags, 1); #endif } //! Copy constructor (shallow copy) - template - DualView (const DualView& src) : - d_view (src.d_view), - h_view (src.h_view), - modified_flags (src.modified_flags) + template + DualView(const DualView& src) + : d_view(src.d_view), + h_view(src.h_view), + modified_flags(src.modified_flags) #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - , modified_host(src.modified_host) - , modified_device(src.modified_device) + , + modified_host(src.modified_host), + modified_device(src.modified_device) #endif - {} + { + } //! Subview constructor - template< class SD, class S1 , class S2 , class S3 - , class Arg0 , class ... Args > - DualView( const DualView & src - , const Arg0 & arg0 - , Args ... args - ) - : d_view( Kokkos::subview( src.d_view , arg0 , args ... ) ) - , h_view( Kokkos::subview( src.h_view , arg0 , args ... ) ) - , modified_flags (src.modified_flags) + template + DualView(const DualView& src, const Arg0& arg0, Args... args) + : d_view(Kokkos::subview(src.d_view, arg0, args...)), + h_view(Kokkos::subview(src.h_view, arg0, args...)), + modified_flags(src.modified_flags) #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - , modified_host(src.modified_host) - , modified_device(src.modified_device) + , + modified_host(src.modified_host), + modified_device(src.modified_device) #endif - {} + { + } /// \brief Create DualView from existing device and host View objects. /// @@ -282,34 +275,34 @@ template< class , class , class , class > friend class DualView ; /// /// \param d_view_ Device View /// \param h_view_ Host View (must have type t_host = t_dev::HostMirror) - DualView (const t_dev& d_view_, const t_host& h_view_) : - d_view (d_view_), - h_view (h_view_), - modified_flags (t_modified_flags("DualView::modified_flags")) - { - if ( int(d_view.rank) != int(h_view.rank) || - d_view.extent(0) != h_view.extent(0) || - d_view.extent(1) != h_view.extent(1) || - d_view.extent(2) != h_view.extent(2) || - d_view.extent(3) != h_view.extent(3) || - d_view.extent(4) != h_view.extent(4) || - d_view.extent(5) != h_view.extent(5) || - d_view.extent(6) != h_view.extent(6) || - d_view.extent(7) != h_view.extent(7) || - d_view.stride_0() != h_view.stride_0() || - d_view.stride_1() != h_view.stride_1() || - d_view.stride_2() != h_view.stride_2() || - d_view.stride_3() != h_view.stride_3() || - d_view.stride_4() != h_view.stride_4() || - d_view.stride_5() != h_view.stride_5() || - d_view.stride_6() != h_view.stride_6() || - d_view.stride_7() != h_view.stride_7() || - d_view.span() != h_view.span() ) { - Kokkos::Impl::throw_runtime_exception("DualView constructed with incompatible views"); + DualView(const t_dev& d_view_, const t_host& h_view_) + : d_view(d_view_), + h_view(h_view_), + modified_flags(t_modified_flags("DualView::modified_flags")) { + if (int(d_view.rank) != int(h_view.rank) || + d_view.extent(0) != h_view.extent(0) || + d_view.extent(1) != h_view.extent(1) || + d_view.extent(2) != h_view.extent(2) || + d_view.extent(3) != h_view.extent(3) || + d_view.extent(4) != h_view.extent(4) || + d_view.extent(5) != h_view.extent(5) || + d_view.extent(6) != h_view.extent(6) || + d_view.extent(7) != h_view.extent(7) || + d_view.stride_0() != h_view.stride_0() || + d_view.stride_1() != h_view.stride_1() || + d_view.stride_2() != h_view.stride_2() || + d_view.stride_3() != h_view.stride_3() || + d_view.stride_4() != h_view.stride_4() || + d_view.stride_5() != h_view.stride_5() || + d_view.stride_6() != h_view.stride_6() || + d_view.stride_7() != h_view.stride_7() || + d_view.span() != h_view.span()) { + Kokkos::Impl::throw_runtime_exception( + "DualView constructed with incompatible views"); } #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - modified_host = t_modified_flag(modified_flags,0); - modified_device = t_modified_flag(modified_flags,1); + modified_host = t_modified_flag(modified_flags, 0); + modified_device = t_modified_flag(modified_flags, 1); #endif } @@ -326,119 +319,133 @@ template< class , class , class , class > friend class DualView ; /// /// For example, suppose you create a DualView on Cuda, like this: /// \code - /// typedef Kokkos::DualView dual_view_type; - /// dual_view_type DV ("my dual view", 100); - /// \endcode - /// If you want to get the CUDA device View, do this: - /// \code - /// typename dual_view_type::t_dev cudaView = DV.view (); - /// \endcode - /// and if you want to get the host mirror of that View, do this: - /// \code - /// typedef typename Kokkos::HostSpace::execution_space host_device_type; - /// typename dual_view_type::t_host hostView = DV.view (); - /// \endcode - template< class Device > - KOKKOS_INLINE_FUNCTION - const typename Impl::if_c< - std::is_same::value, - t_dev, - t_host>::type& view () const - { - #ifndef KOKKOS_ENABLE_DEPRECATED_CODE - constexpr bool device_is_memspace = std::is_same::value; - constexpr bool device_is_execspace = std::is_same::value; - constexpr bool device_exec_is_t_dev_exec = std::is_same::value; - constexpr bool device_mem_is_t_dev_mem = std::is_same::value; - constexpr bool device_exec_is_t_host_exec = std::is_same::value; - constexpr bool device_mem_is_t_host_mem = std::is_same::value; - constexpr bool device_is_t_host_device = std::is_same::value; - constexpr bool device_is_t_dev_device = std::is_same::value; + /// typedef Kokkos::DualView + /// dual_view_type; dual_view_type DV ("my dual view", 100); \endcode If you + /// want to get the CUDA device View, do this: \code typename + /// dual_view_type::t_dev cudaView = DV.view (); \endcode and if + /// you want to get the host mirror of that View, do this: \code typedef + /// typename Kokkos::HostSpace::execution_space host_device_type; typename + /// dual_view_type::t_host hostView = DV.view (); \endcode + template + KOKKOS_INLINE_FUNCTION const typename Impl::if_c< + std::is_same::value, + t_dev, t_host>::type& + view() const { +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE + constexpr bool device_is_memspace = + std::is_same::value; + constexpr bool device_is_execspace = + std::is_same::value; + constexpr bool device_exec_is_t_dev_exec = + std::is_same::value; + constexpr bool device_mem_is_t_dev_mem = + std::is_same::value; + constexpr bool device_exec_is_t_host_exec = + std::is_same::value; + constexpr bool device_mem_is_t_host_mem = + std::is_same::value; + constexpr bool device_is_t_host_device = + std::is_same::value; + constexpr bool device_is_t_dev_device = + std::is_same::value; static_assert( device_is_t_dev_device || device_is_t_host_device || - (device_is_memspace && (device_mem_is_t_dev_mem || device_mem_is_t_host_mem) ) || - (device_is_execspace && (device_exec_is_t_dev_exec || device_exec_is_t_host_exec) ) || - ( - (!device_is_execspace && !device_is_memspace) && ( - (device_mem_is_t_dev_mem || device_mem_is_t_host_mem) || - (device_exec_is_t_dev_exec || device_exec_is_t_host_exec) - ) - ) - , - "Template parameter to .view() must exactly match one of the DualView's device types or one of the execution or memory spaces"); - #endif - - return Impl::if_c< - std::is_same< - typename t_dev::memory_space, - typename Device::memory_space>::value, - t_dev, - t_host >::select (d_view , h_view); + (device_is_memspace && + (device_mem_is_t_dev_mem || device_mem_is_t_host_mem)) || + (device_is_execspace && + (device_exec_is_t_dev_exec || device_exec_is_t_host_exec)) || + ((!device_is_execspace && !device_is_memspace) && + ((device_mem_is_t_dev_mem || device_mem_is_t_host_mem) || + (device_exec_is_t_dev_exec || device_exec_is_t_host_exec))), + "Template parameter to .view() must exactly match one of the " + "DualView's device types or one of the execution or memory spaces"); +#endif + + return Impl::if_c::value, + t_dev, t_host>::select(d_view, h_view); } KOKKOS_INLINE_FUNCTION - t_host view_host() const { - return h_view; - } + t_host view_host() const { return h_view; } KOKKOS_INLINE_FUNCTION - t_dev view_device() const { - return d_view; - } + t_dev view_device() const { return d_view; } - template + template static int get_device_side() { - constexpr bool device_is_memspace = std::is_same::value; - constexpr bool device_is_execspace = std::is_same::value; - constexpr bool device_exec_is_t_dev_exec = std::is_same::value; - constexpr bool device_mem_is_t_dev_mem = std::is_same::value; - constexpr bool device_exec_is_t_host_exec = std::is_same::value; - constexpr bool device_mem_is_t_host_mem = std::is_same::value; - constexpr bool device_is_t_host_device = std::is_same::value; - constexpr bool device_is_t_dev_device = std::is_same::value; - - #ifndef KOKKOS_ENABLE_DEPRECATED_CODE + constexpr bool device_is_memspace = + std::is_same::value; + constexpr bool device_is_execspace = + std::is_same::value; + constexpr bool device_exec_is_t_dev_exec = + std::is_same::value; + constexpr bool device_mem_is_t_dev_mem = + std::is_same::value; + constexpr bool device_exec_is_t_host_exec = + std::is_same::value; + constexpr bool device_mem_is_t_host_mem = + std::is_same::value; + constexpr bool device_is_t_host_device = + std::is_same::value; + constexpr bool device_is_t_dev_device = + std::is_same::value; + +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE static_assert( device_is_t_dev_device || device_is_t_host_device || - (device_is_memspace && (device_mem_is_t_dev_mem || device_mem_is_t_host_mem) ) || - (device_is_execspace && (device_exec_is_t_dev_exec || device_exec_is_t_host_exec) ) || - ( - (!device_is_execspace && !device_is_memspace) && ( - (device_mem_is_t_dev_mem || device_mem_is_t_host_mem) || - (device_exec_is_t_dev_exec || device_exec_is_t_host_exec) - ) - ) - , - "Template parameter to .sync() must exactly match one of the DualView's device types or one of the execution or memory spaces"); - #endif + (device_is_memspace && + (device_mem_is_t_dev_mem || device_mem_is_t_host_mem)) || + (device_is_execspace && + (device_exec_is_t_dev_exec || device_exec_is_t_host_exec)) || + ((!device_is_execspace && !device_is_memspace) && + ((device_mem_is_t_dev_mem || device_mem_is_t_host_mem) || + (device_exec_is_t_dev_exec || device_exec_is_t_host_exec))), + "Template parameter to .sync() must exactly match one of the " + "DualView's device types or one of the execution or memory spaces"); +#endif - #ifndef KOKKOS_ENABLE_DEPRECATED_CODE +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE int dev = -1; - #else +#else int dev = 0; - #endif - if(device_is_t_dev_device) dev = 1; - else if(device_is_t_host_device) dev = 0; +#endif + if (device_is_t_dev_device) + dev = 1; + else if (device_is_t_host_device) + dev = 0; else { - if(device_is_memspace) { - if(device_mem_is_t_dev_mem) dev = 1; - if(device_mem_is_t_host_mem) dev = 0; - if(device_mem_is_t_host_mem && device_mem_is_t_dev_mem) dev = -1; + if (device_is_memspace) { + if (device_mem_is_t_dev_mem) dev = 1; + if (device_mem_is_t_host_mem) dev = 0; + if (device_mem_is_t_host_mem && device_mem_is_t_dev_mem) dev = -1; } - if(device_is_execspace) { - if(device_exec_is_t_dev_exec) dev = 1; - if(device_exec_is_t_host_exec) dev = 0; - if(device_exec_is_t_host_exec && device_exec_is_t_dev_exec) dev = -1; + if (device_is_execspace) { + if (device_exec_is_t_dev_exec) dev = 1; + if (device_exec_is_t_host_exec) dev = 0; + if (device_exec_is_t_host_exec && device_exec_is_t_dev_exec) dev = -1; } - if(!device_is_execspace && !device_is_memspace) { - if(device_mem_is_t_dev_mem) dev = 1; - if(device_mem_is_t_host_mem) dev = 0; - if(device_mem_is_t_host_mem && device_mem_is_t_dev_mem) dev = -1; - if(device_exec_is_t_dev_exec) dev = 1; - if(device_exec_is_t_host_exec) dev = 0; - if(device_exec_is_t_host_exec && device_exec_is_t_dev_exec) dev = -1; + if (!device_is_execspace && !device_is_memspace) { + if (device_mem_is_t_dev_mem) dev = 1; + if (device_mem_is_t_host_mem) dev = 0; + if (device_mem_is_t_host_mem && device_mem_is_t_dev_mem) dev = -1; + if (device_exec_is_t_dev_exec) dev = 1; + if (device_exec_is_t_host_exec) dev = 0; + if (device_exec_is_t_host_exec && device_exec_is_t_dev_exec) dev = -1; } } return dev; @@ -461,88 +468,94 @@ template< class , class , class , class > friend class DualView ; /// the data in either View. You must manually mark modified data /// as modified, by calling the modify() method with the /// appropriate template parameter. - template - void sync( const typename Impl::enable_if< - ( std::is_same< typename traits::data_type , typename traits::non_const_data_type>::value) || - ( std::is_same< Device , int>::value) - , int >::type& = 0) - { - if(modified_flags.data()==NULL) return; + template + void sync(const typename Impl::enable_if< + (std::is_same::value) || + (std::is_same::value), + int>::type& = 0) { + if (modified_flags.data() == NULL) return; int dev = get_device_side(); - if (dev == 1) { // if Device is the same as DualView's device type + if (dev == 1) { // if Device is the same as DualView's device type if ((modified_flags(0) > 0) && (modified_flags(0) >= modified_flags(1))) { - deep_copy (d_view, h_view); + deep_copy(d_view, h_view); modified_flags(0) = modified_flags(1) = 0; } } - if (dev == 0) { // hopefully Device is the same as DualView's host type + if (dev == 0) { // hopefully Device is the same as DualView's host type if ((modified_flags(1) > 0) && (modified_flags(1) >= modified_flags(0))) { - deep_copy (h_view, d_view); + deep_copy(h_view, d_view); modified_flags(0) = modified_flags(1) = 0; } } - if(std::is_same::value) { + if (std::is_same::value) { typename t_dev::execution_space().fence(); typename t_host::execution_space().fence(); } } - template - void sync ( const typename Impl::enable_if< - ( ! std::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) || - ( std::is_same< Device , int>::value) - , int >::type& = 0 ) - { - if(modified_flags.data()==NULL) return; + template + void sync(const typename Impl::enable_if< + (!std::is_same::value) || + (std::is_same::value), + int>::type& = 0) { + if (modified_flags.data() == NULL) return; int dev = get_device_side(); - if (dev == 1) { // if Device is the same as DualView's device type + if (dev == 1) { // if Device is the same as DualView's device type if ((modified_flags(0) > 0) && (modified_flags(0) >= modified_flags(1))) { - Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype."); + Impl::throw_runtime_exception( + "Calling sync on a DualView with a const datatype."); } } - if (dev == 0){ // hopefully Device is the same as DualView's host type + if (dev == 0) { // hopefully Device is the same as DualView's host type if ((modified_flags(1) > 0) && (modified_flags(1) >= modified_flags(0))) { - Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype."); + Impl::throw_runtime_exception( + "Calling sync on a DualView with a const datatype."); } } } void sync_host() { - if( ! std::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) - Impl::throw_runtime_exception("Calling sync_host on a DualView with a const datatype."); - if(modified_flags.data()==NULL) return; - if(modified_flags(1) > modified_flags(0)) { - deep_copy (h_view, d_view); + if (!std::is_same::value) + Impl::throw_runtime_exception( + "Calling sync_host on a DualView with a const datatype."); + if (modified_flags.data() == NULL) return; + if (modified_flags(1) > modified_flags(0)) { + deep_copy(h_view, d_view); modified_flags(1) = modified_flags(0) = 0; } } void sync_device() { - if( ! std::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) - Impl::throw_runtime_exception("Calling sync_device on a DualView with a const datatype."); - if(modified_flags.data()==NULL) return; - if(modified_flags(0) > modified_flags(1)) { - deep_copy (d_view, h_view); + if (!std::is_same::value) + Impl::throw_runtime_exception( + "Calling sync_device on a DualView with a const datatype."); + if (modified_flags.data() == NULL) return; + if (modified_flags(0) > modified_flags(1)) { + deep_copy(d_view, h_view); modified_flags(1) = modified_flags(0) = 0; } } - template - bool need_sync() const - { - if(modified_flags.data()==NULL) return false; + template + bool need_sync() const { + if (modified_flags.data() == NULL) return false; int dev = get_device_side(); - if (dev == 1) { // if Device is the same as DualView's device type + if (dev == 1) { // if Device is the same as DualView's device type if ((modified_flags(0) > 0) && (modified_flags(0) >= modified_flags(1))) { return true; } } - if (dev == 0){ // hopefully Device is the same as DualView's host type + if (dev == 0) { // hopefully Device is the same as DualView's host type if ((modified_flags(1) > 0) && (modified_flags(1) >= modified_flags(0))) { return true; } @@ -551,13 +564,13 @@ template< class , class , class , class > friend class DualView ; } inline bool need_sync_host() const { - if(modified_flags.data()==NULL) return false; - return modified_flags(0) friend class DualView ; /// If \c Device is the same as this DualView's device type, then /// mark the device's data as modified. Otherwise, mark the host's /// data as modified. - template - void modify () { - if(modified_flags.data()==NULL) return; + template + void modify() { + if (modified_flags.data() == NULL) return; int dev = get_device_side(); - if (dev == 1) { // if Device is the same as DualView's device type + if (dev == 1) { // if Device is the same as DualView's device type // Increment the device's modified count. - modified_flags(1) = (modified_flags(1) > modified_flags(0) ? - modified_flags(1) : modified_flags(0)) + 1; + modified_flags(1) = + (modified_flags(1) > modified_flags(0) ? modified_flags(1) + : modified_flags(0)) + + 1; } - if (dev == 0) { // hopefully Device is the same as DualView's host type + if (dev == 0) { // hopefully Device is the same as DualView's host type // Increment the host's modified count. - modified_flags(0) = (modified_flags(1) > modified_flags(0) ? - modified_flags(1) : modified_flags(0)) + 1; + modified_flags(0) = + (modified_flags(1) > modified_flags(0) ? modified_flags(1) + : modified_flags(0)) + + 1; } #ifdef KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK @@ -594,10 +611,12 @@ template< class , class , class , class > friend class DualView ; } inline void modify_host() { - if(modified_flags.data()!=NULL) { - modified_flags(0) = (modified_flags(1) > modified_flags(0) ? - modified_flags(1) : modified_flags(0)) + 1; - #ifdef KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK + if (modified_flags.data() != NULL) { + modified_flags(0) = + (modified_flags(1) > modified_flags(0) ? modified_flags(1) + : modified_flags(0)) + + 1; +#ifdef KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK if (modified_flags(0) && modified_flags(1)) { std::string msg = "Kokkos::DualView::modify_host ERROR: "; msg += "Concurrent modification of host and device views "; @@ -606,15 +625,17 @@ template< class , class , class , class > friend class DualView ; msg += "\"\n"; Kokkos::abort(msg.c_str()); } - #endif +#endif } } inline void modify_device() { - if(modified_flags.data()!=NULL) { - modified_flags(1) = (modified_flags(1) > modified_flags(0) ? - modified_flags(1) : modified_flags(0)) + 1; - #ifdef KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK + if (modified_flags.data() != NULL) { + modified_flags(1) = + (modified_flags(1) > modified_flags(0) ? modified_flags(1) + : modified_flags(0)) + + 1; +#ifdef KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK if (modified_flags(0) && modified_flags(1)) { std::string msg = "Kokkos::DualView::modify_device ERROR: "; msg += "Concurrent modification of host and device views "; @@ -623,12 +644,12 @@ template< class , class , class , class > friend class DualView ; msg += "\"\n"; Kokkos::abort(msg.c_str()); } - #endif +#endif } } inline void clear_sync_state() { - if(modified_flags.data()!=NULL) + if (modified_flags.data() != NULL) modified_flags(1) = modified_flags(0) = 0; } @@ -641,75 +662,72 @@ template< class , class , class , class > friend class DualView ; /// This discards any existing contents of the objects, and resets /// their modified flags. It does not copy the old contents /// of either View into the new View objects. - void realloc( const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG ) { - ::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7); - h_view = create_mirror_view( d_view ); - - /* Reset dirty flags */ - if(modified_flags.data()==NULL) { - modified_flags = t_modified_flags("DualView::modified_flags"); - } else - modified_flags(1) = modified_flags(0) = 0; + void realloc(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { + ::Kokkos::realloc(d_view, n0, n1, n2, n3, n4, n5, n6, n7); + h_view = create_mirror_view(d_view); + + /* Reset dirty flags */ + if (modified_flags.data() == NULL) { + modified_flags = t_modified_flags("DualView::modified_flags"); + } else + modified_flags(1) = modified_flags(0) = 0; } /// \brief Resize both views, copying old contents into new if necessary. /// /// This method only copies the old contents into the new View /// objects for the device which was last marked as modified. - void resize( const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG , - const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG ) { - if(modified_flags.data()==NULL) { - modified_flags = t_modified_flags("DualView::modified_flags"); - } - if(modified_flags(1) >= modified_flags(0)) { - /* Resize on Device */ - ::Kokkos::resize(d_view,n0,n1,n2,n3,n4,n5,n6,n7); - h_view = create_mirror_view( d_view ); - - /* Mark Device copy as modified */ - modified_flags(1) = modified_flags(1)+1; - - } else { - /* Realloc on Device */ - - ::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7); - - const bool sizeMismatch = ( h_view.extent(0) != n0 ) || - ( h_view.extent(1) != n1 ) || - ( h_view.extent(2) != n2 ) || - ( h_view.extent(3) != n3 ) || - ( h_view.extent(4) != n4 ) || - ( h_view.extent(5) != n5 ) || - ( h_view.extent(6) != n6 ) || - ( h_view.extent(7) != n7 ); - if ( sizeMismatch ) - ::Kokkos::resize(h_view,n0,n1,n2,n3,n4,n5,n6,n7); - - t_host temp_view = create_mirror_view( d_view ); - - /* Remap on Host */ - Kokkos::deep_copy( temp_view , h_view ); - - h_view = temp_view; - - d_view = create_mirror_view( typename t_dev::execution_space(), h_view ); - - /* Mark Host copy as modified */ - modified_flags(0) = modified_flags(0)+1; - } + void resize(const size_t n0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { + if (modified_flags.data() == NULL) { + modified_flags = t_modified_flags("DualView::modified_flags"); + } + if (modified_flags(1) >= modified_flags(0)) { + /* Resize on Device */ + ::Kokkos::resize(d_view, n0, n1, n2, n3, n4, n5, n6, n7); + h_view = create_mirror_view(d_view); + + /* Mark Device copy as modified */ + modified_flags(1) = modified_flags(1) + 1; + + } else { + /* Realloc on Device */ + + ::Kokkos::realloc(d_view, n0, n1, n2, n3, n4, n5, n6, n7); + + const bool sizeMismatch = + (h_view.extent(0) != n0) || (h_view.extent(1) != n1) || + (h_view.extent(2) != n2) || (h_view.extent(3) != n3) || + (h_view.extent(4) != n4) || (h_view.extent(5) != n5) || + (h_view.extent(6) != n6) || (h_view.extent(7) != n7); + if (sizeMismatch) + ::Kokkos::resize(h_view, n0, n1, n2, n3, n4, n5, n6, n7); + + t_host temp_view = create_mirror_view(d_view); + + /* Remap on Host */ + Kokkos::deep_copy(temp_view, h_view); + + h_view = temp_view; + + d_view = create_mirror_view(typename t_dev::execution_space(), h_view); + + /* Mark Host copy as modified */ + modified_flags(0) = modified_flags(0) + 1; + } } //@} @@ -718,37 +736,35 @@ template< class , class , class , class > friend class DualView ; #ifdef KOKKOS_ENABLE_DEPRECATED_CODE //! The allocation size (same as Kokkos::View::capacity). - size_t capacity() const { - return d_view.span(); - } + size_t capacity() const { return d_view.span(); } #endif //! The allocation size (same as Kokkos::View::span). - KOKKOS_INLINE_FUNCTION constexpr size_t span() const { - return d_view.span(); - } + KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return d_view.span(); } - KOKKOS_INLINE_FUNCTION bool span_is_contiguous() const { - return d_view.span_is_contiguous(); + KOKKOS_INLINE_FUNCTION bool span_is_contiguous() const { + return d_view.span_is_contiguous(); } //! Get stride(s) for each dimension. - template< typename iType> + template void stride(iType* stride_) const { d_view.stride(stride_); } - template< typename iType > - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if< std::is_integral::value , size_t >::type - extent( const iType & r ) const - { return d_view.extent(r); } + template + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if::value, size_t>::type + extent(const iType& r) const { + return d_view.extent(r); + } - template< typename iType > - KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if< std::is_integral::value , int >::type - extent_int( const iType & r ) const - { return static_cast(d_view.extent(r)); } + template + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if::value, int>::type + extent_int(const iType& r) const { + return static_cast(d_view.extent(r)); + } #ifdef KOKKOS_ENABLE_DEPRECATED_CODE /* Deprecate all 'dimension' functions in favor of @@ -756,27 +772,27 @@ template< class , class , class , class > friend class DualView ; */ /* \brief return size of dimension 0 */ - size_t dimension_0() const {return d_view.extent(0);} + size_t dimension_0() const { return d_view.extent(0); } /* \brief return size of dimension 1 */ - size_t dimension_1() const {return d_view.extent(1);} + size_t dimension_1() const { return d_view.extent(1); } /* \brief return size of dimension 2 */ - size_t dimension_2() const {return d_view.extent(2);} + size_t dimension_2() const { return d_view.extent(2); } /* \brief return size of dimension 3 */ - size_t dimension_3() const {return d_view.extent(3);} + size_t dimension_3() const { return d_view.extent(3); } /* \brief return size of dimension 4 */ - size_t dimension_4() const {return d_view.extent(4);} + size_t dimension_4() const { return d_view.extent(4); } /* \brief return size of dimension 5 */ - size_t dimension_5() const {return d_view.extent(5);} + size_t dimension_5() const { return d_view.extent(5); } /* \brief return size of dimension 6 */ - size_t dimension_6() const {return d_view.extent(6);} + size_t dimension_6() const { return d_view.extent(6); } /* \brief return size of dimension 7 */ - size_t dimension_7() const {return d_view.extent(7);} + size_t dimension_7() const { return d_view.extent(7); } #endif //@} }; -} // namespace Kokkos +} // namespace Kokkos //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -787,32 +803,24 @@ template< class , class , class , class > friend class DualView ; namespace Kokkos { namespace Impl { -template< class D, class A1, class A2, class A3, class ... Args > +template struct DualViewSubview { + typedef typename Kokkos::Impl::ViewMapping< + void, Kokkos::ViewTraits, Args...>::traits_type dst_traits; - typedef typename Kokkos::Impl::ViewMapping - < void - , Kokkos::ViewTraits< D, A1, A2, A3 > - , Args ... - >::traits_type dst_traits ; - - typedef Kokkos::DualView - < typename dst_traits::data_type - , typename dst_traits::array_layout - , typename dst_traits::device_type - , typename dst_traits::memory_traits - > type ; + typedef Kokkos::DualView< + typename dst_traits::data_type, typename dst_traits::array_layout, + typename dst_traits::device_type, typename dst_traits::memory_traits> + type; }; } /* namespace Impl */ - -template< class D , class A1 , class A2 , class A3 , class ... Args > -typename Impl::DualViewSubview::type -subview( const DualView & src , Args ... args ) -{ - return typename - Impl::DualViewSubview::type( src , args ... ); +template +typename Impl::DualViewSubview::type subview( + const DualView& src, Args... args) { + return typename Impl::DualViewSubview::type(src, + args...); } } /* namespace Kokkos */ @@ -826,40 +834,35 @@ namespace Kokkos { // Partial specialization of Kokkos::deep_copy() for DualView objects. // -template< class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > -void -deep_copy (DualView dst, // trust me, this must not be a reference - const DualView& src ) -{ - if ( src.need_sync_device() ) { - deep_copy (dst.h_view, src.h_view); +template +void deep_copy( + DualView dst, // trust me, this must not be a reference + const DualView& src) { + if (src.need_sync_device()) { + deep_copy(dst.h_view, src.h_view); dst.modify_host(); - } - else { - deep_copy (dst.d_view, src.d_view); + } else { + deep_copy(dst.d_view, src.d_view); dst.modify_device(); - } + } } -template< class ExecutionSpace , - class DT , class DL , class DD , class DM , - class ST , class SL , class SD , class SM > -void -deep_copy (const ExecutionSpace& exec , - DualView dst, // trust me, this must not be a reference - const DualView& src ) -{ - if ( src.need_sync_device() ) { - deep_copy (exec, dst.h_view, src.h_view); +template +void deep_copy( + const ExecutionSpace& exec, + DualView dst, // trust me, this must not be a reference + const DualView& src) { + if (src.need_sync_device()) { + deep_copy(exec, dst.h_view, src.h_view); dst.modify_host(); } else { - deep_copy (exec, dst.d_view, src.d_view); + deep_copy(exec, dst.d_view, src.d_view); dst.modify_device(); } } -} // namespace Kokkos +} // namespace Kokkos #endif - diff --git a/containers/src/Kokkos_DynRankView.hpp b/containers/src/Kokkos_DynRankView.hpp index d1e6704a573..9bdb7e41413 100644 --- a/containers/src/Kokkos_DynRankView.hpp +++ b/containers/src/Kokkos_DynRankView.hpp @@ -56,255 +56,240 @@ namespace Kokkos { -template< typename DataType , class ... Properties > -class DynRankView; //forward declare +template +class DynRankView; // forward declare namespace Impl { template struct DynRankDimTraits { - - enum : size_t{unspecified = KOKKOS_INVALID_INDEX}; + enum : size_t { unspecified = KOKKOS_INVALID_INDEX }; // Compute the rank of the view from the nonzero dimension arguments. KOKKOS_INLINE_FUNCTION - static size_t computeRank( const size_t N0 - , const size_t N1 - , const size_t N2 - , const size_t N3 - , const size_t N4 - , const size_t N5 - , const size_t N6 - , const size_t /* N7 */) - { - return - ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified && N2 == unspecified && N1 == unspecified && N0 == unspecified) ? 0 - : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified && N2 == unspecified && N1 == unspecified) ? 1 - : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified && N2 == unspecified) ? 2 - : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified) ? 3 - : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified) ? 4 - : ( (N6 == unspecified && N5 == unspecified) ? 5 - : ( (N6 == unspecified) ? 6 - : 7 ) ) ) ) ) ) ); + static size_t computeRank(const size_t N0, const size_t N1, const size_t N2, + const size_t N3, const size_t N4, const size_t N5, + const size_t N6, const size_t /* N7 */) { + return ( + (N6 == unspecified && N5 == unspecified && N4 == unspecified && + N3 == unspecified && N2 == unspecified && N1 == unspecified && + N0 == unspecified) + ? 0 + : ((N6 == unspecified && N5 == unspecified && N4 == unspecified && + N3 == unspecified && N2 == unspecified && N1 == unspecified) + ? 1 + : ((N6 == unspecified && N5 == unspecified && + N4 == unspecified && N3 == unspecified && + N2 == unspecified) + ? 2 + : ((N6 == unspecified && N5 == unspecified && + N4 == unspecified && N3 == unspecified) + ? 3 + : ((N6 == unspecified && N5 == unspecified && + N4 == unspecified) + ? 4 + : ((N6 == unspecified && + N5 == unspecified) + ? 5 + : ((N6 == unspecified) + ? 6 + : 7))))))); } // Compute the rank of the view from the nonzero layout arguments. template - KOKKOS_INLINE_FUNCTION - static size_t computeRank( const Layout& layout ) - { - return computeRank( layout.dimension[0] - , layout.dimension[1] - , layout.dimension[2] - , layout.dimension[3] - , layout.dimension[4] - , layout.dimension[5] - , layout.dimension[6] - , layout.dimension[7] ); + KOKKOS_INLINE_FUNCTION static size_t computeRank(const Layout& layout) { + return computeRank(layout.dimension[0], layout.dimension[1], + layout.dimension[2], layout.dimension[3], + layout.dimension[4], layout.dimension[5], + layout.dimension[6], layout.dimension[7]); } // Extra overload to match that for specialize types v2 - template - KOKKOS_INLINE_FUNCTION - static size_t computeRank( const Kokkos::Impl::ViewCtorProp& /* prop */, const Layout& layout ) - { + template + KOKKOS_INLINE_FUNCTION static size_t computeRank( + const Kokkos::Impl::ViewCtorProp& /* prop */, + const Layout& layout) { return computeRank(layout); } // Create the layout for the rank-7 view. // Non-strided Layout template - KOKKOS_INLINE_FUNCTION - static typename std::enable_if< (std::is_same::value || std::is_same::value) , Layout >::type createLayout( const Layout& layout ) - { - return Layout( layout.dimension[0] != unspecified ? layout.dimension[0] : 1 - , layout.dimension[1] != unspecified ? layout.dimension[1] : 1 - , layout.dimension[2] != unspecified ? layout.dimension[2] : 1 - , layout.dimension[3] != unspecified ? layout.dimension[3] : 1 - , layout.dimension[4] != unspecified ? layout.dimension[4] : 1 - , layout.dimension[5] != unspecified ? layout.dimension[5] : 1 - , layout.dimension[6] != unspecified ? layout.dimension[6] : 1 - , layout.dimension[7] != unspecified ? layout.dimension[7] : 1 - ); + KOKKOS_INLINE_FUNCTION static typename std::enable_if< + (std::is_same::value || + std::is_same::value), + Layout>::type + createLayout(const Layout& layout) { + return Layout(layout.dimension[0] != unspecified ? layout.dimension[0] : 1, + layout.dimension[1] != unspecified ? layout.dimension[1] : 1, + layout.dimension[2] != unspecified ? layout.dimension[2] : 1, + layout.dimension[3] != unspecified ? layout.dimension[3] : 1, + layout.dimension[4] != unspecified ? layout.dimension[4] : 1, + layout.dimension[5] != unspecified ? layout.dimension[5] : 1, + layout.dimension[6] != unspecified ? layout.dimension[6] : 1, + layout.dimension[7] != unspecified ? layout.dimension[7] : 1); } // LayoutStride template - KOKKOS_INLINE_FUNCTION - static typename std::enable_if< (std::is_same::value) , Layout>::type createLayout( const Layout& layout ) - { - return Layout( layout.dimension[0] != unspecified ? layout.dimension[0] : 1 - , layout.stride[0] - , layout.dimension[1] != unspecified ? layout.dimension[1] : 1 - , layout.stride[1] - , layout.dimension[2] != unspecified ? layout.dimension[2] : 1 - , layout.stride[2] - , layout.dimension[3] != unspecified ? layout.dimension[3] : 1 - , layout.stride[3] - , layout.dimension[4] != unspecified ? layout.dimension[4] : 1 - , layout.stride[4] - , layout.dimension[5] != unspecified ? layout.dimension[5] : 1 - , layout.stride[5] - , layout.dimension[6] != unspecified ? layout.dimension[6] : 1 - , layout.stride[6] - , layout.dimension[7] != unspecified ? layout.dimension[7] : 1 - , layout.stride[7] - ); + KOKKOS_INLINE_FUNCTION static typename std::enable_if< + (std::is_same::value), Layout>::type + createLayout(const Layout& layout) { + return Layout(layout.dimension[0] != unspecified ? layout.dimension[0] : 1, + layout.stride[0], + layout.dimension[1] != unspecified ? layout.dimension[1] : 1, + layout.stride[1], + layout.dimension[2] != unspecified ? layout.dimension[2] : 1, + layout.stride[2], + layout.dimension[3] != unspecified ? layout.dimension[3] : 1, + layout.stride[3], + layout.dimension[4] != unspecified ? layout.dimension[4] : 1, + layout.stride[4], + layout.dimension[5] != unspecified ? layout.dimension[5] : 1, + layout.stride[5], + layout.dimension[6] != unspecified ? layout.dimension[6] : 1, + layout.stride[6], + layout.dimension[7] != unspecified ? layout.dimension[7] : 1, + layout.stride[7]); } // Extra overload to match that for specialize types - template - KOKKOS_INLINE_FUNCTION - static typename std::enable_if< (std::is_same::value || std::is_same::value || std::is_same::value) , typename Traits::array_layout >::type createLayout( const Kokkos::Impl::ViewCtorProp& /* prop */, const typename Traits::array_layout& layout ) - { - return createLayout( layout ); + template + KOKKOS_INLINE_FUNCTION static typename std::enable_if< + (std::is_same::value || + std::is_same::value || + std::is_same::value), + typename Traits::array_layout>::type + createLayout(const Kokkos::Impl::ViewCtorProp& /* prop */, + const typename Traits::array_layout& layout) { + return createLayout(layout); } // Create a view from the given dimension arguments. // This is only necessary because the shmem constructor doesn't take a layout. - // NDE shmem View's are not compatible with the added view_alloc value_type / fad_dim deduction functionality + // NDE shmem View's are not compatible with the added view_alloc value_type + // / fad_dim deduction functionality template - static ViewType createView( const ViewArg& arg - , const size_t N0 - , const size_t N1 - , const size_t N2 - , const size_t N3 - , const size_t N4 - , const size_t N5 - , const size_t N6 - , const size_t N7 ) - { - return ViewType( arg - , N0 != unspecified ? N0 : 1 - , N1 != unspecified ? N1 : 1 - , N2 != unspecified ? N2 : 1 - , N3 != unspecified ? N3 : 1 - , N4 != unspecified ? N4 : 1 - , N5 != unspecified ? N5 : 1 - , N6 != unspecified ? N6 : 1 - , N7 != unspecified ? N7 : 1 ); + static ViewType createView(const ViewArg& arg, const size_t N0, + const size_t N1, const size_t N2, const size_t N3, + const size_t N4, const size_t N5, const size_t N6, + const size_t N7) { + return ViewType(arg, N0 != unspecified ? N0 : 1, N1 != unspecified ? N1 : 1, + N2 != unspecified ? N2 : 1, N3 != unspecified ? N3 : 1, + N4 != unspecified ? N4 : 1, N5 != unspecified ? N5 : 1, + N6 != unspecified ? N6 : 1, N7 != unspecified ? N7 : 1); } }; - // Non-strided Layout - template - KOKKOS_INLINE_FUNCTION - static typename std::enable_if< (std::is_same::value || std::is_same::value) && std::is_integral::value , Layout >::type - reconstructLayout( const Layout& layout , iType dynrank ) - { - return Layout( dynrank > 0 ? layout.dimension[0] :KOKKOS_INVALID_INDEX - , dynrank > 1 ? layout.dimension[1] :KOKKOS_INVALID_INDEX - , dynrank > 2 ? layout.dimension[2] :KOKKOS_INVALID_INDEX - , dynrank > 3 ? layout.dimension[3] :KOKKOS_INVALID_INDEX - , dynrank > 4 ? layout.dimension[4] :KOKKOS_INVALID_INDEX - , dynrank > 5 ? layout.dimension[5] :KOKKOS_INVALID_INDEX - , dynrank > 6 ? layout.dimension[6] :KOKKOS_INVALID_INDEX - , dynrank > 7 ? layout.dimension[7] :KOKKOS_INVALID_INDEX - ); - } - - // LayoutStride - template - KOKKOS_INLINE_FUNCTION - static typename std::enable_if< (std::is_same::value) && std::is_integral::value , Layout >::type - reconstructLayout( const Layout& layout , iType dynrank ) - { - return Layout( dynrank > 0 ? layout.dimension[0] :KOKKOS_INVALID_INDEX - , dynrank > 0 ? layout.stride[0] : (0) - , dynrank > 1 ? layout.dimension[1] :KOKKOS_INVALID_INDEX - , dynrank > 1 ? layout.stride[1] : (0) - , dynrank > 2 ? layout.dimension[2] :KOKKOS_INVALID_INDEX - , dynrank > 2 ? layout.stride[2] : (0) - , dynrank > 3 ? layout.dimension[3] :KOKKOS_INVALID_INDEX - , dynrank > 3 ? layout.stride[3] : (0) - , dynrank > 4 ? layout.dimension[4] :KOKKOS_INVALID_INDEX - , dynrank > 4 ? layout.stride[4] : (0) - , dynrank > 5 ? layout.dimension[5] :KOKKOS_INVALID_INDEX - , dynrank > 5 ? layout.stride[5] : (0) - , dynrank > 6 ? layout.dimension[6] :KOKKOS_INVALID_INDEX - , dynrank > 6 ? layout.stride[6] : (0) - , dynrank > 7 ? layout.dimension[7] :KOKKOS_INVALID_INDEX - , dynrank > 7 ? layout.stride[7] : (0) - ); - } +// Non-strided Layout +template +KOKKOS_INLINE_FUNCTION static + typename std::enable_if<(std::is_same::value || + std::is_same::value) && + std::is_integral::value, + Layout>::type + reconstructLayout(const Layout& layout, iType dynrank) { + return Layout(dynrank > 0 ? layout.dimension[0] : KOKKOS_INVALID_INDEX, + dynrank > 1 ? layout.dimension[1] : KOKKOS_INVALID_INDEX, + dynrank > 2 ? layout.dimension[2] : KOKKOS_INVALID_INDEX, + dynrank > 3 ? layout.dimension[3] : KOKKOS_INVALID_INDEX, + dynrank > 4 ? layout.dimension[4] : KOKKOS_INVALID_INDEX, + dynrank > 5 ? layout.dimension[5] : KOKKOS_INVALID_INDEX, + dynrank > 6 ? layout.dimension[6] : KOKKOS_INVALID_INDEX, + dynrank > 7 ? layout.dimension[7] : KOKKOS_INVALID_INDEX); +} +// LayoutStride +template +KOKKOS_INLINE_FUNCTION static typename std::enable_if< + (std::is_same::value) && + std::is_integral::value, + Layout>::type +reconstructLayout(const Layout& layout, iType dynrank) { + return Layout(dynrank > 0 ? layout.dimension[0] : KOKKOS_INVALID_INDEX, + dynrank > 0 ? layout.stride[0] : (0), + dynrank > 1 ? layout.dimension[1] : KOKKOS_INVALID_INDEX, + dynrank > 1 ? layout.stride[1] : (0), + dynrank > 2 ? layout.dimension[2] : KOKKOS_INVALID_INDEX, + dynrank > 2 ? layout.stride[2] : (0), + dynrank > 3 ? layout.dimension[3] : KOKKOS_INVALID_INDEX, + dynrank > 3 ? layout.stride[3] : (0), + dynrank > 4 ? layout.dimension[4] : KOKKOS_INVALID_INDEX, + dynrank > 4 ? layout.stride[4] : (0), + dynrank > 5 ? layout.dimension[5] : KOKKOS_INVALID_INDEX, + dynrank > 5 ? layout.stride[5] : (0), + dynrank > 6 ? layout.dimension[6] : KOKKOS_INVALID_INDEX, + dynrank > 6 ? layout.stride[6] : (0), + dynrank > 7 ? layout.dimension[7] : KOKKOS_INVALID_INDEX, + dynrank > 7 ? layout.stride[7] : (0)); +} /** \brief Debug bounds-checking routines */ // Enhanced debug checking - most infrastructure matches that of functions in // Kokkos_ViewMapping; additional checks for extra arguments beyond rank are 0 -template< unsigned , typename iType0 , class MapType > -KOKKOS_INLINE_FUNCTION -bool dyn_rank_view_verify_operator_bounds( const iType0 & , const MapType & ) -{ return true ; } - -template< unsigned R , typename iType0 , class MapType , typename iType1 , class ... Args > -KOKKOS_INLINE_FUNCTION -bool dyn_rank_view_verify_operator_bounds - ( const iType0 & rank - , const MapType & map - , const iType1 & i - , Args ... args - ) -{ - if ( static_cast(R) < rank ) { - return ( size_t(i) < map.extent(R) ) - && dyn_rank_view_verify_operator_bounds( rank , map , args ... ); - } - else if ( i != 0 ) { - printf("DynRankView Debug Bounds Checking Error: at rank %u\n Extra arguments beyond the rank must be zero \n",R); - return ( false ) - && dyn_rank_view_verify_operator_bounds( rank , map , args ... ); - } - else { - return ( true ) - && dyn_rank_view_verify_operator_bounds( rank , map , args ... ); +template +KOKKOS_INLINE_FUNCTION bool dyn_rank_view_verify_operator_bounds( + const iType0&, const MapType&) { + return true; +} + +template +KOKKOS_INLINE_FUNCTION bool dyn_rank_view_verify_operator_bounds( + const iType0& rank, const MapType& map, const iType1& i, Args... args) { + if (static_cast(R) < rank) { + return (size_t(i) < map.extent(R)) && + dyn_rank_view_verify_operator_bounds(rank, map, args...); + } else if (i != 0) { + printf( + "DynRankView Debug Bounds Checking Error: at rank %u\n Extra " + "arguments beyond the rank must be zero \n", + R); + return (false) && + dyn_rank_view_verify_operator_bounds(rank, map, args...); + } else { + return (true) && + dyn_rank_view_verify_operator_bounds(rank, map, args...); } } -template< unsigned , class MapType > -inline -void dyn_rank_view_error_operator_bounds( char * , int , const MapType & ) -{} - -template< unsigned R , class MapType , class iType , class ... Args > -inline -void dyn_rank_view_error_operator_bounds - ( char * buf - , int len - , const MapType & map - , const iType & i - , Args ... args - ) -{ - const int n = - snprintf(buf,len," %ld < %ld %c" - , static_cast(i) - , static_cast( map.extent(R) ) - , ( sizeof...(Args) ? ',' : ')' ) - ); - dyn_rank_view_error_operator_bounds(buf+n,len-n,map,args...); +template +inline void dyn_rank_view_error_operator_bounds(char*, int, const MapType&) {} + +template +inline void dyn_rank_view_error_operator_bounds(char* buf, int len, + const MapType& map, + const iType& i, Args... args) { + const int n = snprintf( + buf, len, " %ld < %ld %c", static_cast(i), + static_cast(map.extent(R)), (sizeof...(Args) ? ',' : ')')); + dyn_rank_view_error_operator_bounds(buf + n, len - n, map, args...); } // op_rank = rank of the operator version that was called -template< typename MemorySpace - , typename iType0 , typename iType1 , class MapType , class ... Args > -KOKKOS_INLINE_FUNCTION -void dyn_rank_view_verify_operator_bounds - ( const iType0 & op_rank , const iType1 & rank - , const Kokkos::Impl::SharedAllocationTracker & tracker - , const MapType & map , Args ... args ) -{ - if ( static_cast(rank) > op_rank ) { - Kokkos::abort( "DynRankView Bounds Checking Error: Need at least rank arguments to the operator()" ); - } - - if ( ! dyn_rank_view_verify_operator_bounds<0>( rank , map , args ... ) ) { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) +template +KOKKOS_INLINE_FUNCTION void dyn_rank_view_verify_operator_bounds( + const iType0& op_rank, const iType1& rank, + const Kokkos::Impl::SharedAllocationTracker& tracker, const MapType& map, + Args... args) { + if (static_cast(rank) > op_rank) { + Kokkos::abort( + "DynRankView Bounds Checking Error: Need at least rank arguments to " + "the operator()"); + } + + if (!dyn_rank_view_verify_operator_bounds<0>(rank, map, args...)) { +#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) enum { LEN = 1024 }; - char buffer[ LEN ]; + char buffer[LEN]; const std::string label = tracker.template get_label(); - int n = snprintf(buffer,LEN,"DynRankView bounds error of view %s (", label.c_str()); - dyn_rank_view_error_operator_bounds<0>( buffer + n , LEN - n , map , args ... ); + int n = snprintf(buffer, LEN, "DynRankView bounds error of view %s (", + label.c_str()); + dyn_rank_view_error_operator_bounds<0>(buffer + n, LEN - n, map, args...); Kokkos::Impl::throw_runtime_exception(std::string(buffer)); #else Kokkos::abort("DynRankView bounds error"); @@ -312,86 +297,84 @@ void dyn_rank_view_verify_operator_bounds } } - /** \brief Assign compatible default mappings */ struct ViewToDynRankViewTag {}; -} // namespace Impl +} // namespace Impl namespace Impl { -template< class DstTraits , class SrcTraits > -class ViewMapping< DstTraits , SrcTraits , - typename std::enable_if<( - std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value - && - std::is_same< typename DstTraits::specialize , void >::value - && - std::is_same< typename SrcTraits::specialize , void >::value - && - ( - std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value - || - ( - ( - std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value || - std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value || - std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value - ) - && - ( - std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value || - std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value || - std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value - ) - ) - ) - ) , Kokkos::Impl::ViewToDynRankViewTag >::type > -{ -private: - - enum { is_assignable_value_type = - std::is_same< typename DstTraits::value_type - , typename SrcTraits::value_type >::value || - std::is_same< typename DstTraits::value_type - , typename SrcTraits::const_value_type >::value }; - - enum { is_assignable_layout = - std::is_same< typename DstTraits::array_layout - , typename SrcTraits::array_layout >::value || - std::is_same< typename DstTraits::array_layout - , Kokkos::LayoutStride >::value - }; +template +class ViewMapping< + DstTraits, SrcTraits, + typename std::enable_if< + (std::is_same::value && + std::is_same::value && + std::is_same::value && + (std::is_same::value || + ((std::is_same::value || + std::is_same::value || + std::is_same::value) && + (std::is_same::value || + std::is_same::value || + std::is_same::value)))), + Kokkos::Impl::ViewToDynRankViewTag>::type> { + private: + enum { + is_assignable_value_type = + std::is_same::value || + std::is_same::value + }; -public: + enum { + is_assignable_layout = + std::is_same::value || + std::is_same::value + }; - enum { is_assignable = is_assignable_value_type && - is_assignable_layout }; + public: + enum { is_assignable = is_assignable_value_type && is_assignable_layout }; - typedef ViewMapping< DstTraits , typename DstTraits::specialize > DstType ; - typedef ViewMapping< SrcTraits , typename SrcTraits::specialize > SrcType ; + typedef ViewMapping DstType; + typedef ViewMapping SrcType; - template < typename DT , typename ... DP , typename ST , typename ... SP > - KOKKOS_INLINE_FUNCTION - static void assign( Kokkos::DynRankView< DT , DP...> & dst , const Kokkos::View< ST , SP... > & src ) - { - static_assert( is_assignable_value_type - , "View assignment must have same value type or const = non-const" ); + template + KOKKOS_INLINE_FUNCTION static void assign( + Kokkos::DynRankView& dst, const Kokkos::View& src) { + static_assert( + is_assignable_value_type, + "View assignment must have same value type or const = non-const"); - static_assert( is_assignable_layout - , "View assignment must have compatible layout or have rank <= 1" ); + static_assert( + is_assignable_layout, + "View assignment must have compatible layout or have rank <= 1"); // Removed dimension checks... - typedef typename DstType::offset_type dst_offset_type ; - dst.m_map.m_impl_offset = dst_offset_type(std::integral_constant() , src.layout() ); //Check this for integer input1 for padding, etc - dst.m_map.m_impl_handle = Kokkos::Impl::ViewDataHandle< DstTraits >::assign( src.m_map.m_impl_handle , src.m_track ); - dst.m_track.assign( src.m_track , DstTraits::is_managed ); - dst.m_rank = src.Rank ; - } + typedef typename DstType::offset_type dst_offset_type; + dst.m_map.m_impl_offset = dst_offset_type( + std::integral_constant(), + src.layout()); // Check this for integer input1 for padding, etc + dst.m_map.m_impl_handle = Kokkos::Impl::ViewDataHandle::assign( + src.m_map.m_impl_handle, src.m_track); + dst.m_track.assign(src.m_track, DstTraits::is_managed); + dst.m_rank = src.Rank; + } }; -} //end Impl +} // namespace Impl /* \class DynRankView * \brief Container that creates a Kokkos view with rank determined at runtime. @@ -400,7 +383,8 @@ class ViewMapping< DstTraits , SrcTraits , * Changes from View * 1. The rank of the DynRankView is returned by the method rank() * 2. Max rank of a DynRankView is 7 - * 3. subview called with 'subview(...)' or 'subdynrankview(...)' (backward compatibility) + * 3. subview called with 'subview(...)' or 'subdynrankview(...)' (backward + * compatibility) * 4. Every subview is returned with LayoutStride * 5. Copy and Copy-Assign View to DynRankView * 6. deep_copy between Views and DynRankViews @@ -408,93 +392,99 @@ class ViewMapping< DstTraits , SrcTraits , * */ -template< class > struct is_dyn_rank_view : public std::false_type {}; - -template< class D, class ... P > -struct is_dyn_rank_view< Kokkos::DynRankView > : public std::true_type {}; +template +struct is_dyn_rank_view : public std::false_type {}; +template +struct is_dyn_rank_view > : public std::true_type { +}; -template< typename DataType , class ... Properties > -class DynRankView : public ViewTraits< DataType , Properties ... > -{ - static_assert( !std::is_array::value && !std::is_pointer::value , "Cannot template DynRankView with array or pointer datatype - must be pod" ); - -private: - template < class , class ... > friend class DynRankView ; - template < class , class ... > friend class Kokkos::Impl::ViewMapping ; +template +class DynRankView : public ViewTraits { + static_assert(!std::is_array::value && + !std::is_pointer::value, + "Cannot template DynRankView with array or pointer datatype - " + "must be pod"); -public: - typedef ViewTraits< DataType , Properties ... > drvtraits ; + private: + template + friend class DynRankView; + template + friend class Kokkos::Impl::ViewMapping; - typedef View< DataType******* , Properties...> view_type ; + public: + typedef ViewTraits drvtraits; - typedef ViewTraits< DataType******* , Properties ... > traits ; + typedef View view_type; + typedef ViewTraits traits; -private: - typedef Kokkos::Impl::ViewMapping< traits , typename traits::specialize > map_type ; - typedef Kokkos::Impl::SharedAllocationTracker track_type ; + private: + typedef Kokkos::Impl::ViewMapping + map_type; + typedef Kokkos::Impl::SharedAllocationTracker track_type; - track_type m_track ; - map_type m_map ; + track_type m_track; + map_type m_map; unsigned m_rank; -public: + public: KOKKOS_INLINE_FUNCTION - view_type & DownCast() const { return ( view_type & ) (*this); } + view_type& DownCast() const { return (view_type&)(*this); } KOKKOS_INLINE_FUNCTION - const view_type & ConstDownCast() const { return (const view_type & ) (*this); } + const view_type& ConstDownCast() const { return (const view_type&)(*this); } - //Types below - at least the HostMirror requires the value_type, NOT the rank 7 data_type of the traits + // Types below - at least the HostMirror requires the value_type, NOT the rank + // 7 data_type of the traits /** \brief Compatible view of array of scalar types */ - typedef DynRankView< typename drvtraits::scalar_array_type , - typename drvtraits::array_layout , - typename drvtraits::device_type , - typename drvtraits::memory_traits > - array_type ; + typedef DynRankView< + typename drvtraits::scalar_array_type, typename drvtraits::array_layout, + typename drvtraits::device_type, typename drvtraits::memory_traits> + array_type; /** \brief Compatible view of const data type */ - typedef DynRankView< typename drvtraits::const_data_type , - typename drvtraits::array_layout , - typename drvtraits::device_type , - typename drvtraits::memory_traits > - const_type ; + typedef DynRankView< + typename drvtraits::const_data_type, typename drvtraits::array_layout, + typename drvtraits::device_type, typename drvtraits::memory_traits> + const_type; /** \brief Compatible view of non-const data type */ - typedef DynRankView< typename drvtraits::non_const_data_type , - typename drvtraits::array_layout , - typename drvtraits::device_type , - typename drvtraits::memory_traits > - non_const_type ; + typedef DynRankView< + typename drvtraits::non_const_data_type, typename drvtraits::array_layout, + typename drvtraits::device_type, typename drvtraits::memory_traits> + non_const_type; /** \brief Compatible HostMirror view */ - typedef DynRankView< typename drvtraits::non_const_data_type , - typename drvtraits::array_layout , - typename drvtraits::host_mirror_space > - HostMirror ; - + typedef DynRankView + HostMirror; //---------------------------------------- // Domain rank and extents -// enum { Rank = map_type::Rank }; //Will be dyn rank of 7 always, keep the enum? + // enum { Rank = map_type::Rank }; //Will be dyn rank of 7 always, keep the + // enum? - template< typename iType > + template KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if< std::is_integral::value , size_t >::type - extent( const iType & r ) const - { return m_map.extent(r); } + typename std::enable_if::value, size_t>::type + extent(const iType& r) const { + return m_map.extent(r); + } - template< typename iType > + template KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if< std::is_integral::value , int >::type - extent_int( const iType & r ) const - { return static_cast(m_map.extent(r)); } + typename std::enable_if::value, int>::type + extent_int(const iType& r) const { + return static_cast(m_map.extent(r)); + } - KOKKOS_INLINE_FUNCTION constexpr - typename traits::array_layout layout() const - { return m_map.layout(); } + KOKKOS_INLINE_FUNCTION constexpr typename traits::array_layout layout() + const { + return m_map.layout(); + } //---------------------------------------- /* Deprecate all 'dimension' functions in favor of @@ -502,421 +492,572 @@ class DynRankView : public ViewTraits< DataType , Properties ... > */ #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - template< typename iType > + template KOKKOS_INLINE_FUNCTION constexpr - typename std::enable_if< std::is_integral::value , size_t >::type - dimension( const iType & r ) const { return extent( r ); } - - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_map.dimension_0(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_map.dimension_1(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_map.dimension_2(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_map.dimension_3(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_map.dimension_4(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_map.dimension_5(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_map.dimension_6(); } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_map.dimension_7(); } + typename std::enable_if::value, size_t>::type + dimension(const iType& r) const { + return extent(r); + } + + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { + return m_map.dimension_0(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { + return m_map.dimension_1(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { + return m_map.dimension_2(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { + return m_map.dimension_3(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { + return m_map.dimension_4(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { + return m_map.dimension_5(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { + return m_map.dimension_6(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { + return m_map.dimension_7(); + } #endif //---------------------------------------- - KOKKOS_INLINE_FUNCTION constexpr size_t size() const { return m_map.extent(0) * - m_map.extent(1) * - m_map.extent(2) * - m_map.extent(3) * - m_map.extent(4) * - m_map.extent(5) * - m_map.extent(6) * - m_map.extent(7); } - - KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_map.stride_0(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_map.stride_1(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_map.stride_2(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_map.stride_3(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_map.stride_4(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_map.stride_5(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_map.stride_6(); } - KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_map.stride_7(); } - - template< typename iType > - KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_map.stride(s); } + KOKKOS_INLINE_FUNCTION constexpr size_t size() const { + return m_map.extent(0) * m_map.extent(1) * m_map.extent(2) * + m_map.extent(3) * m_map.extent(4) * m_map.extent(5) * + m_map.extent(6) * m_map.extent(7); + } + + KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { + return m_map.stride_0(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { + return m_map.stride_1(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { + return m_map.stride_2(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { + return m_map.stride_3(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { + return m_map.stride_4(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { + return m_map.stride_5(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { + return m_map.stride_6(); + } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { + return m_map.stride_7(); + } + + template + KOKKOS_INLINE_FUNCTION void stride(iType* const s) const { + m_map.stride(s); + } //---------------------------------------- // Range span is the span which contains all members. - typedef typename map_type::reference_type reference_type ; - typedef typename map_type::pointer_type pointer_type ; + typedef typename map_type::reference_type reference_type; + typedef typename map_type::pointer_type pointer_type; - enum { reference_type_is_lvalue_reference = std::is_lvalue_reference< reference_type >::value }; + enum { + reference_type_is_lvalue_reference = + std::is_lvalue_reference::value + }; KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_map.span(); } #ifdef KOKKOS_ENABLE_DEPRECATED_CODE // Deprecated, use 'span()' instead - KOKKOS_INLINE_FUNCTION constexpr size_t capacity() const { return m_map.span(); } + KOKKOS_INLINE_FUNCTION constexpr size_t capacity() const { + return m_map.span(); + } #endif - KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_map.span_is_contiguous(); } - KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const { return m_map.data(); } + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { + return m_map.span_is_contiguous(); + } + KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const { + return m_map.data(); + } #ifdef KOKKOS_ENABLE_DEPRECATED_CODE // Deprecated, use 'span_is_contigous()' instead - KOKKOS_INLINE_FUNCTION constexpr bool is_contiguous() const { return m_map.span_is_contiguous(); } + KOKKOS_INLINE_FUNCTION constexpr bool is_contiguous() const { + return m_map.span_is_contiguous(); + } // Deprecated, use 'data()' instead - KOKKOS_INLINE_FUNCTION constexpr pointer_type ptr_on_device() const { return m_map.data(); } + KOKKOS_INLINE_FUNCTION constexpr pointer_type ptr_on_device() const { + return m_map.data(); + } #endif //---------------------------------------- // Allow specializations to query their specialized map #ifdef KOKKOS_ENABLE_DEPRECATED_CODE KOKKOS_INLINE_FUNCTION - const Kokkos::Impl::ViewMapping< traits , typename traits::specialize > & - implementation_map() const { return m_map ; } + const Kokkos::Impl::ViewMapping& + implementation_map() const { + return m_map; + } #endif KOKKOS_INLINE_FUNCTION - const Kokkos::Impl::ViewMapping< traits , typename traits::specialize > & - impl_map() const { return m_map ; } + const Kokkos::Impl::ViewMapping& + impl_map() const { + return m_map; + } //---------------------------------------- -private: - + private: enum { - is_layout_left = std::is_same< typename traits::array_layout - , Kokkos::LayoutLeft >::value , + is_layout_left = + std::is_same::value, - is_layout_right = std::is_same< typename traits::array_layout - , Kokkos::LayoutRight >::value , + is_layout_right = + std::is_same::value, - is_layout_stride = std::is_same< typename traits::array_layout - , Kokkos::LayoutStride >::value , + is_layout_stride = std::is_same::value, - is_default_map = - std::is_same< typename traits::specialize , void >::value && - ( is_layout_left || is_layout_right || is_layout_stride ) + is_default_map = std::is_same::value && + (is_layout_left || is_layout_right || is_layout_stride) }; - template< class Space , bool = Kokkos::Impl::MemorySpaceAccess< Space , typename traits::memory_space >::accessible > struct verify_space - { KOKKOS_FORCEINLINE_FUNCTION static void check() {} }; + template ::accessible> + struct verify_space { + KOKKOS_FORCEINLINE_FUNCTION static void check() {} + }; - template< class Space > struct verify_space - { KOKKOS_FORCEINLINE_FUNCTION static void check() - { Kokkos::abort("Kokkos::DynRankView ERROR: attempt to access inaccessible memory space"); }; + template + struct verify_space { + KOKKOS_FORCEINLINE_FUNCTION static void check() { + Kokkos::abort( + "Kokkos::DynRankView ERROR: attempt to access inaccessible memory " + "space"); }; + }; // Bounds checking macros -#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) +#if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) // rank of the calling operator - included as first argument in ARG -#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \ - DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \ - Kokkos::Impl::dyn_rank_view_verify_operator_bounds< typename traits::memory_space > ARG ; +#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG) \ + DynRankView::template verify_space< \ + Kokkos::Impl::ActiveExecutionMemorySpace>::check(); \ + Kokkos::Impl::dyn_rank_view_verify_operator_bounds< \ + typename traits::memory_space> \ + ARG; #else -#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \ - DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); +#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG) \ + DynRankView::template verify_space< \ + Kokkos::Impl::ActiveExecutionMemorySpace>::check(); #endif -public: - + public: KOKKOS_INLINE_FUNCTION constexpr unsigned rank() const { return m_rank; } - - //operators () + // operators () // Rank 0 KOKKOS_INLINE_FUNCTION - reference_type operator()() const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank(), m_track, m_map) ) - return impl_map().reference(); - //return m_map.reference(0,0,0,0,0,0,0); - } + reference_type operator()() const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((0, this->rank(), m_track, m_map)) + return impl_map().reference(); + // return m_map.reference(0,0,0,0,0,0,0); + } // Rank 1 - // This assumes a contiguous underlying memory (i.e. no padding, no striding...) - template< typename iType > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< std::is_same::value && std::is_integral::value, reference_type>::type - operator[](const iType & i0) const - { - //Phalanx is violating this, since they use the operator to access ALL elements in the allocation - //KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map) ) - return data()[i0]; - } + // This assumes a contiguous underlying memory (i.e. no padding, no + // striding...) + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + std::is_same::value && + std::is_integral::value, + reference_type>::type + operator[](const iType& i0) const { + // Phalanx is violating this, since they use the operator to access ALL + // elements in the allocation KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , + // this->rank(), m_track, m_map) ) + return data()[i0]; + } - // This assumes a contiguous underlying memory (i.e. no padding, no striding... - // AND a Trilinos/Sacado scalar type ) - template< typename iType > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !std::is_same::value && std::is_integral::value, reference_type>::type - operator[](const iType & i0) const - { -// auto map = impl_map(); - const size_t dim_scalar = m_map.dimension_scalar(); - const size_t bytes = this->span() / dim_scalar; - - typedef Kokkos::View > tmp_view_type; - tmp_view_type rankone_view(this->data(), bytes, dim_scalar); - return rankone_view(i0); - } + // This assumes a contiguous underlying memory (i.e. no padding, no + // striding... AND a Trilinos/Sacado scalar type ) + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !std::is_same::value && + std::is_integral::value, + reference_type>::type + operator[](const iType& i0) const { + // auto map = impl_map(); + const size_t dim_scalar = m_map.dimension_scalar(); + const size_t bytes = this->span() / dim_scalar; + + typedef Kokkos::View< + DataType*, typename traits::array_layout, typename traits::device_type, + Kokkos::MemoryTraits > + tmp_view_type; + tmp_view_type rankone_view(this->data(), bytes, dim_scalar); + return rankone_view(i0); + } // Rank 1 parenthesis - template< typename iType > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType & i0 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map, i0) ) - return m_map.reference(i0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value), + reference_type>::type + operator()(const iType& i0) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((1, this->rank(), m_track, m_map, i0)) + return m_map.reference(i0); + } - template< typename iType > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType & i0 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map, i0) ) - return m_map.reference(i0,0,0,0,0,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + operator()(const iType& i0) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((1, this->rank(), m_track, m_map, i0)) + return m_map.reference(i0, 0, 0, 0, 0, 0, 0); + } // Rank 2 - template< typename iType0 , typename iType1 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank(), m_track, m_map, i0, i1) ) - return m_map.reference(i0,i1); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((2, this->rank(), m_track, m_map, i0, i1)) + return m_map.reference(i0, i1); + } - template< typename iType0 , typename iType1 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank(), m_track, m_map, i0, i1) ) - return m_map.reference(i0,i1,0,0,0,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((2, this->rank(), m_track, m_map, i0, i1)) + return m_map.reference(i0, i1, 0, 0, 0, 0, 0); + } // Rank 3 - template< typename iType0 , typename iType1 , typename iType2 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank(), m_track, m_map, i0, i1, i2) ) - return m_map.reference(i0,i1,i2); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1, const iType2& i2) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (3, this->rank(), m_track, m_map, i0, i1, i2)) + return m_map.reference(i0, i1, i2); + } - template< typename iType0 , typename iType1 , typename iType2 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank(), m_track, m_map, i0, i1, i2) ) - return m_map.reference(i0,i1,i2,0,0,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1, const iType2& i2) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (3, this->rank(), m_track, m_map, i0, i1, i2)) + return m_map.reference(i0, i1, i2, 0, 0, 0, 0); + } // Rank 4 - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank(), m_track, m_map, i0, i1, i2, i3) ) - return m_map.reference(i0,i1,i2,i3); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (4, this->rank(), m_track, m_map, i0, i1, i2, i3)) + return m_map.reference(i0, i1, i2, i3); + } - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank(), m_track, m_map, i0, i1, i2, i3) ) - return m_map.reference(i0,i1,i2,i3,0,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (4, this->rank(), m_track, m_map, i0, i1, i2, i3)) + return m_map.reference(i0, i1, i2, i3, 0, 0, 0); + } // Rank 5 - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4) ) - return m_map.reference(i0,i1,i2,i3,i4); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3, const iType4& i4) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (5, this->rank(), m_track, m_map, i0, i1, i2, i3, i4)) + return m_map.reference(i0, i1, i2, i3, i4); + } - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4) ) - return m_map.reference(i0,i1,i2,i3,i4,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3, const iType4& i4) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (5, this->rank(), m_track, m_map, i0, i1, i2, i3, i4)) + return m_map.reference(i0, i1, i2, i3, i4, 0, 0); + } // Rank 6 - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5) ) - return m_map.reference(i0,i1,i2,i3,i4,i5); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3, const iType4& i4, const iType5& i5) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (6, this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5)) + return m_map.reference(i0, i1, i2, i3, i4, i5); + } - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5) ) - return m_map.reference(i0,i1,i2,i3,i4,i5,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3, const iType4& i4, const iType5& i5) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (6, this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5)) + return m_map.reference(i0, i1, i2, i3, i4, i5, 0); + } // Rank 7 - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 , typename iType6 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5, i6) ) - return m_map.reference(i0,i1,i2,i3,i4,i5,i6); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value), + reference_type>::type + operator()(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3, const iType4& i4, const iType5& i5, + const iType6& i6) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (7, this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5, i6)) + return m_map.reference(i0, i1, i2, i3, i4, i5, i6); + } // Rank 0 KOKKOS_INLINE_FUNCTION - reference_type access() const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank(), m_track, m_map) ) - return impl_map().reference(); - //return m_map.reference(0,0,0,0,0,0,0); - } + reference_type access() const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((0, this->rank(), m_track, m_map)) + return impl_map().reference(); + // return m_map.reference(0,0,0,0,0,0,0); + } // Rank 1 - // Rank 1 parenthesis - template< typename iType > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value), reference_type>::type - access(const iType & i0 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map, i0) ) - return m_map.reference(i0); - } + // Rank 1 parenthesis + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value), + reference_type>::type + access(const iType& i0) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((1, this->rank(), m_track, m_map, i0)) + return m_map.reference(i0); + } - template< typename iType > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - access(const iType & i0 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map, i0) ) - return m_map.reference(i0,0,0,0,0,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + access(const iType& i0) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((1, this->rank(), m_track, m_map, i0)) + return m_map.reference(i0, 0, 0, 0, 0, 0, 0); + } // Rank 2 - template< typename iType0 , typename iType1 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank(), m_track, m_map, i0, i1) ) - return m_map.reference(i0,i1); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((2, this->rank(), m_track, m_map, i0, i1)) + return m_map.reference(i0, i1); + } - template< typename iType0 , typename iType1 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank(), m_track, m_map, i0, i1) ) - return m_map.reference(i0,i1,0,0,0,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((2, this->rank(), m_track, m_map, i0, i1)) + return m_map.reference(i0, i1, 0, 0, 0, 0, 0); + } // Rank 3 - template< typename iType0 , typename iType1 , typename iType2 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank(), m_track, m_map, i0, i1, i2) ) - return m_map.reference(i0,i1,i2); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1, const iType2& i2) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (3, this->rank(), m_track, m_map, i0, i1, i2)) + return m_map.reference(i0, i1, i2); + } - template< typename iType0 , typename iType1 , typename iType2 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank(), m_track, m_map, i0, i1, i2) ) - return m_map.reference(i0,i1,i2,0,0,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1, const iType2& i2) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (3, this->rank(), m_track, m_map, i0, i1, i2)) + return m_map.reference(i0, i1, i2, 0, 0, 0, 0); + } // Rank 4 - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank(), m_track, m_map, i0, i1, i2, i3) ) - return m_map.reference(i0,i1,i2,i3); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (4, this->rank(), m_track, m_map, i0, i1, i2, i3)) + return m_map.reference(i0, i1, i2, i3); + } - template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank(), m_track, m_map, i0, i1, i2, i3) ) - return m_map.reference(i0,i1,i2,i3,0,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1, const iType2& i2, + const iType3& i3) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (4, this->rank(), m_track, m_map, i0, i1, i2, i3)) + return m_map.reference(i0, i1, i2, i3, 0, 0, 0); + } // Rank 5 - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4) ) - return m_map.reference(i0,i1,i2,i3,i4); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3, + const iType4& i4) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (5, this->rank(), m_track, m_map, i0, i1, i2, i3, i4)) + return m_map.reference(i0, i1, i2, i3, i4); + } - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4) ) - return m_map.reference(i0,i1,i2,i3,i4,0,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3, + const iType4& i4) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (5, this->rank(), m_track, m_map, i0, i1, i2, i3, i4)) + return m_map.reference(i0, i1, i2, i3, i4, 0, 0); + } // Rank 6 - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5) ) - return m_map.reference(i0,i1,i2,i3,i4,i5); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_same::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3, + const iType4& i4, const iType5& i5) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (6, this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5)) + return m_map.reference(i0, i1, i2, i3, i4, i5); + } - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5) ) - return m_map.reference(i0,i1,i2,i3,i4,i5,0); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + !(std::is_same::value && + std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3, + const iType4& i4, const iType5& i5) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (6, this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5)) + return m_map.reference(i0, i1, i2, i3, i4, i5, 0); + } // Rank 7 - template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 , typename iType6 > - KOKKOS_INLINE_FUNCTION - typename std::enable_if< (std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const - { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5, i6) ) - return m_map.reference(i0,i1,i2,i3,i4,i5,i6); - } + template + KOKKOS_INLINE_FUNCTION typename std::enable_if< + (std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value && std::is_integral::value && + std::is_integral::value), + reference_type>::type + access(const iType0& i0, const iType1& i1, const iType2& i2, const iType3& i3, + const iType4& i4, const iType5& i5, const iType6& i6) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + (7, this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5, i6)) + return m_map.reference(i0, i1, i2, i3, i4, i5, i6); + } #undef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY @@ -927,405 +1068,393 @@ class DynRankView : public ViewTraits< DataType , Properties ... > ~DynRankView() {} KOKKOS_INLINE_FUNCTION - DynRankView() : m_track(), m_map(), m_rank() {} //Default ctor + DynRankView() : m_track(), m_map(), m_rank() {} // Default ctor KOKKOS_INLINE_FUNCTION - DynRankView( const DynRankView & rhs ) : m_track( rhs.m_track ), m_map( rhs.m_map ), m_rank(rhs.m_rank) {} + DynRankView(const DynRankView& rhs) + : m_track(rhs.m_track), m_map(rhs.m_map), m_rank(rhs.m_rank) {} KOKKOS_INLINE_FUNCTION - DynRankView( DynRankView && rhs ) : m_track( rhs.m_track ), m_map( rhs.m_map ), m_rank(rhs.m_rank) {} + DynRankView(DynRankView&& rhs) + : m_track(rhs.m_track), m_map(rhs.m_map), m_rank(rhs.m_rank) {} KOKKOS_INLINE_FUNCTION - DynRankView & operator = ( const DynRankView & rhs ) { m_track = rhs.m_track; m_map = rhs.m_map; m_rank = rhs.m_rank; return *this; } + DynRankView& operator=(const DynRankView& rhs) { + m_track = rhs.m_track; + m_map = rhs.m_map; + m_rank = rhs.m_rank; + return *this; + } KOKKOS_INLINE_FUNCTION - DynRankView & operator = ( DynRankView && rhs ) { m_track = rhs.m_track; m_map = rhs.m_map; m_rank = rhs.m_rank; return *this; } + DynRankView& operator=(DynRankView&& rhs) { + m_track = rhs.m_track; + m_map = rhs.m_map; + m_rank = rhs.m_rank; + return *this; + } //---------------------------------------- // Compatible view copy constructor and assignment // may assign unmanaged from managed. - template< class RT , class ... RP > - KOKKOS_INLINE_FUNCTION - DynRankView( const DynRankView & rhs ) - : m_track( rhs.m_track , traits::is_managed ) - , m_map() - , m_rank(rhs.m_rank) - { - typedef typename DynRankView ::traits SrcTraits ; - typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , typename traits::specialize > Mapping ; - static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" ); - Mapping::assign( m_map , rhs.m_map , rhs.m_track ); - } + template + KOKKOS_INLINE_FUNCTION DynRankView(const DynRankView& rhs) + : m_track(rhs.m_track, traits::is_managed), m_map(), m_rank(rhs.m_rank) { + typedef typename DynRankView::traits SrcTraits; + typedef Kokkos::Impl::ViewMapping + Mapping; + static_assert(Mapping::is_assignable, + "Incompatible DynRankView copy construction"); + Mapping::assign(m_map, rhs.m_map, rhs.m_track); + } - template< class RT , class ... RP > - KOKKOS_INLINE_FUNCTION - DynRankView & operator = (const DynRankView & rhs ) - { - typedef typename DynRankView ::traits SrcTraits ; - typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , typename traits::specialize > Mapping ; - static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" ); - Mapping::assign( m_map , rhs.m_map , rhs.m_track ); - m_track.assign( rhs.m_track , traits::is_managed ); - m_rank = rhs.rank(); - return *this; - } + template + KOKKOS_INLINE_FUNCTION DynRankView& operator=( + const DynRankView& rhs) { + typedef typename DynRankView::traits SrcTraits; + typedef Kokkos::Impl::ViewMapping + Mapping; + static_assert(Mapping::is_assignable, + "Incompatible DynRankView copy construction"); + Mapping::assign(m_map, rhs.m_map, rhs.m_track); + m_track.assign(rhs.m_track, traits::is_managed); + m_rank = rhs.rank(); + return *this; + } -// Copy/Assign View to DynRankView - template< class RT , class ... RP > - KOKKOS_INLINE_FUNCTION - DynRankView( const View & rhs ) - : m_track() - , m_map() - , m_rank( rhs.Rank ) - { - typedef typename View::traits SrcTraits ; - typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Impl::ViewToDynRankViewTag > Mapping ; - static_assert( Mapping::is_assignable , "Incompatible View to DynRankView copy construction" ); - Mapping::assign( *this , rhs ); - } + // Copy/Assign View to DynRankView + template + KOKKOS_INLINE_FUNCTION DynRankView(const View& rhs) + : m_track(), m_map(), m_rank(rhs.Rank) { + typedef typename View::traits SrcTraits; + typedef Kokkos::Impl::ViewMapping + Mapping; + static_assert(Mapping::is_assignable, + "Incompatible View to DynRankView copy construction"); + Mapping::assign(*this, rhs); + } - template< class RT , class ... RP > - KOKKOS_INLINE_FUNCTION - DynRankView & operator = ( const View & rhs ) - { - typedef typename View::traits SrcTraits ; - typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Impl::ViewToDynRankViewTag > Mapping ; - static_assert( Mapping::is_assignable , "Incompatible View to DynRankView copy assignment" ); - Mapping::assign( *this , rhs ); - return *this ; - } + template + KOKKOS_INLINE_FUNCTION DynRankView& operator=(const View& rhs) { + typedef typename View::traits SrcTraits; + typedef Kokkos::Impl::ViewMapping + Mapping; + static_assert(Mapping::is_assignable, + "Incompatible View to DynRankView copy assignment"); + Mapping::assign(*this, rhs); + return *this; + } //---------------------------------------- // Allocation tracking properties KOKKOS_INLINE_FUNCTION - int use_count() const - { return m_track.use_count(); } + int use_count() const { return m_track.use_count(); } - inline - const std::string label() const - { return m_track.template get_label< typename traits::memory_space >(); } + inline const std::string label() const { + return m_track.template get_label(); + } //---------------------------------------- // Allocation according to allocation properties and array layout - // unused arg_layout dimensions must be set to KOKKOS_INVALID_INDEX so that rank deduction can properly take place - template< class ... P > - explicit inline - DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop - , typename std::enable_if< ! Kokkos::Impl::ViewCtorProp< P... >::has_pointer - , typename traits::array_layout - >::type const & arg_layout - ) - : m_track() - , m_map() - , m_rank( Impl::DynRankDimTraits::template computeRank< typename traits::array_layout, P...>(arg_prop, arg_layout) ) - { - // Append layout and spaces if not input - typedef Kokkos::Impl::ViewCtorProp< P ... > alloc_prop_input ; - - // use 'std::integral_constant' for non-types - // to avoid duplicate class error. - typedef Kokkos::Impl::ViewCtorProp - < P ... - , typename std::conditional - < alloc_prop_input::has_label - , std::integral_constant - , typename std::string - >::type - , typename std::conditional - < alloc_prop_input::has_memory_space - , std::integral_constant - , typename traits::device_type::memory_space - >::type - , typename std::conditional - < alloc_prop_input::has_execution_space - , std::integral_constant - , typename traits::device_type::execution_space - >::type - > alloc_prop ; - - static_assert( traits::is_managed - , "View allocation constructor requires managed memory" ); - - if ( alloc_prop::initialize && + // unused arg_layout dimensions must be set to KOKKOS_INVALID_INDEX so that + // rank deduction can properly take place + template + explicit inline DynRankView( + const Kokkos::Impl::ViewCtorProp& arg_prop, + typename std::enable_if::has_pointer, + typename traits::array_layout>::type const& + arg_layout) + : m_track(), + m_map(), + m_rank(Impl::DynRankDimTraits:: + template computeRank( + arg_prop, arg_layout)) { + // Append layout and spaces if not input + typedef Kokkos::Impl::ViewCtorProp alloc_prop_input; + + // use 'std::integral_constant' for non-types + // to avoid duplicate class error. + typedef Kokkos::Impl::ViewCtorProp< + P..., + typename std::conditional, + typename std::string>::type, + typename std::conditional< + alloc_prop_input::has_memory_space, + std::integral_constant, + typename traits::device_type::memory_space>::type, + typename std::conditional< + alloc_prop_input::has_execution_space, + std::integral_constant, + typename traits::device_type::execution_space>::type> + alloc_prop; + + static_assert(traits::is_managed, + "View allocation constructor requires managed memory"); + + if (alloc_prop::initialize && #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - ! alloc_prop::execution_space::is_initialized() + !alloc_prop::execution_space::is_initialized() #else - ! alloc_prop::execution_space::impl_is_initialized() + !alloc_prop::execution_space::impl_is_initialized() #endif - ) { - // If initializing view data then - // the execution space must be initialized. - Kokkos::Impl::throw_runtime_exception("Constructing DynRankView and initializing data with uninitialized execution space"); - } + ) { + // If initializing view data then + // the execution space must be initialized. + Kokkos::Impl::throw_runtime_exception( + "Constructing DynRankView and initializing data with uninitialized " + "execution space"); + } - // Copy the input allocation properties with possibly defaulted properties - alloc_prop prop_copy( arg_prop ); + // Copy the input allocation properties with possibly defaulted properties + alloc_prop prop_copy(arg_prop); //------------------------------------------------------------ -#if defined( KOKKOS_ENABLE_CUDA ) - // If allocating in CudaUVMSpace must fence before and after - // the allocation to protect against possible concurrent access - // on the CPU and the GPU. - // Fence using the trait's executon space (which will be Kokkos::Cuda) - // to avoid incomplete type errors from usng Kokkos::Cuda directly. - if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { - typename traits::device_type::memory_space::execution_space().fence(); - } +#if defined(KOKKOS_ENABLE_CUDA) + // If allocating in CudaUVMSpace must fence before and after + // the allocation to protect against possible concurrent access + // on the CPU and the GPU. + // Fence using the trait's executon space (which will be Kokkos::Cuda) + // to avoid incomplete type errors from usng Kokkos::Cuda directly. + if (std::is_same::value) { + typename traits::device_type::memory_space::execution_space().fence(); + } #endif -//------------------------------------------------------------ + //------------------------------------------------------------ - Kokkos::Impl::SharedAllocationRecord<> * - record = m_map.allocate_shared( prop_copy, Impl::DynRankDimTraits::template createLayout(arg_prop, arg_layout) ); + Kokkos::Impl::SharedAllocationRecord<>* record = m_map.allocate_shared( + prop_copy, + Impl::DynRankDimTraits:: + template createLayout(arg_prop, arg_layout)); //------------------------------------------------------------ -#if defined( KOKKOS_ENABLE_CUDA ) - if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { - typename traits::device_type::memory_space::execution_space().fence(); - } -#endif -//------------------------------------------------------------ - - // Setup and initialization complete, start tracking - m_track.assign_allocated_record_to_uninitialized( record ); +#if defined(KOKKOS_ENABLE_CUDA) + if (std::is_same::value) { + typename traits::device_type::memory_space::execution_space().fence(); } +#endif + //------------------------------------------------------------ + // Setup and initialization complete, start tracking + m_track.assign_allocated_record_to_uninitialized(record); + } // Wrappers - template< class ... P > - explicit KOKKOS_INLINE_FUNCTION - DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop - , typename std::enable_if< Kokkos::Impl::ViewCtorProp< P... >::has_pointer - , typename traits::array_layout - >::type const & arg_layout - ) - : m_track() // No memory tracking - , m_map( arg_prop , Impl::DynRankDimTraits::template createLayout(arg_prop, arg_layout) ) - , m_rank( Impl::DynRankDimTraits::template computeRank< typename traits::array_layout, P...>(arg_prop, arg_layout) ) - { - static_assert( - std::is_same< pointer_type - , typename Impl::ViewCtorProp< P... >::pointer_type - >::value , - "Constructing DynRankView to wrap user memory must supply matching pointer type" ); - } + template + explicit KOKKOS_INLINE_FUNCTION DynRankView( + const Kokkos::Impl::ViewCtorProp& arg_prop, + typename std::enable_if::has_pointer, + typename traits::array_layout>::type const& + arg_layout) + : m_track() // No memory tracking + , + m_map(arg_prop, + Impl::DynRankDimTraits:: + template createLayout(arg_prop, arg_layout)), + m_rank(Impl::DynRankDimTraits:: + template computeRank( + arg_prop, arg_layout)) { + static_assert( + std::is_same::pointer_type>::value, + "Constructing DynRankView to wrap user memory must supply matching " + "pointer type"); + } //---------------------------------------- - //Constructor(s) + // Constructor(s) // Simple dimension-only layout - template< class ... P > - explicit inline - DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop - , typename std::enable_if< ! Kokkos::Impl::ViewCtorProp< P... >::has_pointer - , size_t - >::type const arg_N0 =KOKKOS_INVALID_INDEX - , const size_t arg_N1 =KOKKOS_INVALID_INDEX - , const size_t arg_N2 =KOKKOS_INVALID_INDEX - , const size_t arg_N3 =KOKKOS_INVALID_INDEX - , const size_t arg_N4 =KOKKOS_INVALID_INDEX - , const size_t arg_N5 =KOKKOS_INVALID_INDEX - , const size_t arg_N6 =KOKKOS_INVALID_INDEX - , const size_t arg_N7 =KOKKOS_INVALID_INDEX - ) - : DynRankView( arg_prop - , typename traits::array_layout - ( arg_N0 , arg_N1 , arg_N2 , arg_N3 , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) - ) - {} - - template< class ... P > - explicit KOKKOS_INLINE_FUNCTION - DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop - , typename std::enable_if< Kokkos::Impl::ViewCtorProp< P... >::has_pointer - , size_t - >::type const arg_N0 =KOKKOS_INVALID_INDEX - , const size_t arg_N1 =KOKKOS_INVALID_INDEX - , const size_t arg_N2 =KOKKOS_INVALID_INDEX - , const size_t arg_N3 =KOKKOS_INVALID_INDEX - , const size_t arg_N4 =KOKKOS_INVALID_INDEX - , const size_t arg_N5 =KOKKOS_INVALID_INDEX - , const size_t arg_N6 =KOKKOS_INVALID_INDEX - , const size_t arg_N7 =KOKKOS_INVALID_INDEX - ) - : DynRankView( arg_prop - , typename traits::array_layout - ( arg_N0 , arg_N1 , arg_N2 , arg_N3 , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) - ) - {} + template + explicit inline DynRankView( + const Kokkos::Impl::ViewCtorProp& arg_prop, + typename std::enable_if::has_pointer, + size_t>::type const arg_N0 = KOKKOS_INVALID_INDEX, + const size_t arg_N1 = KOKKOS_INVALID_INDEX, + const size_t arg_N2 = KOKKOS_INVALID_INDEX, + const size_t arg_N3 = KOKKOS_INVALID_INDEX, + const size_t arg_N4 = KOKKOS_INVALID_INDEX, + const size_t arg_N5 = KOKKOS_INVALID_INDEX, + const size_t arg_N6 = KOKKOS_INVALID_INDEX, + const size_t arg_N7 = KOKKOS_INVALID_INDEX) + : DynRankView(arg_prop, typename traits::array_layout( + arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, + arg_N5, arg_N6, arg_N7)) {} + + template + explicit KOKKOS_INLINE_FUNCTION DynRankView( + const Kokkos::Impl::ViewCtorProp& arg_prop, + typename std::enable_if::has_pointer, + size_t>::type const arg_N0 = KOKKOS_INVALID_INDEX, + const size_t arg_N1 = KOKKOS_INVALID_INDEX, + const size_t arg_N2 = KOKKOS_INVALID_INDEX, + const size_t arg_N3 = KOKKOS_INVALID_INDEX, + const size_t arg_N4 = KOKKOS_INVALID_INDEX, + const size_t arg_N5 = KOKKOS_INVALID_INDEX, + const size_t arg_N6 = KOKKOS_INVALID_INDEX, + const size_t arg_N7 = KOKKOS_INVALID_INDEX) + : DynRankView(arg_prop, typename traits::array_layout( + arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, + arg_N5, arg_N6, arg_N7)) {} // Allocate with label and layout - template< typename Label > - explicit inline - DynRankView( const Label & arg_label - , typename std::enable_if< - Kokkos::Impl::is_view_label