@@ -120,10 +120,13 @@ find_package(Backtrace)
120120
121121# The version number.
122122set (iqtree_VERSION_MAJOR 3)
123- set (iqtree_VERSION_MINOR 0 )
124- set (iqtree_VERSION_PATCH ".1 " )
123+ set (iqtree_VERSION_MINOR 1 )
124+ set (iqtree_VERSION_PATCH ".0 " )
125125
126126option (BUILD_SHARED_LIBS "Build Shared Libraries" OFF )
127+ option (USE_CUDA "Enable CUDA support" OFF )
128+ option (USE_OPENACC "Enable OpenACC support" OFF )
129+ option (USE_OPENACC_PROFILE "Enable profiling instrumentation in OpenACC kernels" OFF )
127130
128131if (CMAKE_C_COMPILER MATCHES "mpic" )
129132 set (IQTREE_FLAGS "${IQTREE_FLAGS} mpi" )
@@ -227,6 +230,13 @@ if (APPLE)
227230 set (__ARM_NEON "TRUE" )
228231 set (NEON 1)
229232 endif ()
233+ execute_process (
234+ COMMAND sw_vers -productVersion
235+ OUTPUT_VARIABLE MACOS_VERSION
236+ OUTPUT_STRIP_TRAILING_WHITESPACE
237+ )
238+ string (REGEX MATCH "^([0-9]+)" MACOS_MAJOR_VERSION "${MACOS_VERSION} " )
239+ message ("macOS version: ${MACOS_VERSION} (major: ${MACOS_MAJOR_VERSION} )" )
230240elseif (UNIX AND NOT APPLE ) # Unix and Linux
231241 execute_process (
232242 COMMAND uname -m
@@ -449,10 +459,86 @@ if (MSVC)
449459endif ()
450460
451461if (NVHPC)
462+
452463 set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -noswitcherror" )
453- set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -cuda" ) # to disable AVX512 intrinsics
464+ # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -cuda") # to disable AVX512 intrinsics
454465 set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Minfo=accel" )
455466endif ()
467+ if (USE_CUDA)
468+ message ("CUDA : Yes" )
469+ enable_language (CUDA )
470+ add_definitions (-DUSE_CUDA )
471+ set (CMAKE_CUDA_STANDARD 14)
472+
473+ # Set CUDA architectures if not already specified
474+ if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
475+ set (CMAKE_CUDA_ARCHITECTURES "70;75;80;86;89;90" )
476+ endif ()
477+
478+ # Manual CUDA runtime detection — avoids FindCUDAToolkit's Threads dependency
479+ # enable_language(CUDA) already found nvcc and set CMAKE_CUDA_* variables.
480+ find_library (CUDART_LIBRARY cudart
481+ HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}
482+ ENV CUDA_HOME
483+ ENV CUDA_PATH
484+ ENV CUDAToolkit_ROOT
485+ PATH_SUFFIXES lib64 lib lib/x86_64-linux-gnu )
486+ if (CUDART_LIBRARY)
487+ message (STATUS "Found cudart: ${CUDART_LIBRARY} " )
488+ else ()
489+ message (FATAL_ERROR "Could not find cudart library. Set CUDA_HOME or pass -DCMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES=<cuda>/lib64" )
490+ endif ()
491+
492+ if (NVHPC)
493+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_CUDA" )
494+ endif ()
495+
496+ if (CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES)
497+ include_directories (${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} )
498+ endif ()
499+ if (CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES)
500+ link_directories (${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} )
501+ endif ()
502+ else ()
503+ message ("CUDA : No" )
504+ endif ()
505+
506+ if (USE_OPENACC STREQUAL "ON" )
507+ message ("OpenACC : Yes" )
508+ add_definitions (-DUSE_OPENACC )
509+ if (GCC)
510+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenacc -O3" )
511+ elseif (NVHPC)
512+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -acc -O3 -fast -Minfo=accel" )
513+ # Target specific GPU architecture for optimal code generation.
514+ # -gpu=ccXX generates arch-specific instructions instead of generic PTX.
515+ # Examples: cc70 (V100), cc80 (A100), cc89 (RTX 4090), cc90 (H100)
516+ # Multiple targets: -DGPU_ARCH="cc70,cc80,cc90"
517+ if (DEFINED GPU_ARCH)
518+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -gpu=${GPU_ARCH} ,maxregcount:128" )
519+ message ("OpenACC GPU : ${GPU_ARCH} " )
520+ else ()
521+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -gpu=cc70,cc80,cc90,maxregcount:128" )
522+ message ("OpenACC GPU : cc70,cc80,cc90 (default)" )
523+ endif ()
524+ endif ()
525+ else ()
526+ message ("OpenACC : NONE" )
527+ endif ()
528+
529+ if (USE_OPENACC_PROFILE STREQUAL "ON" )
530+ if (NOT USE_OPENACC STREQUAL "ON" )
531+ message (FATAL_ERROR "USE_OPENACC_PROFILE requires USE_OPENACC=ON" )
532+ endif ()
533+ add_definitions (-DUSE_OPENACC_PROFILE )
534+ # Replace -O3 -fast with -O2 -g for debuggable profiling builds
535+ string (REPLACE "-O3" "-O2" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} " )
536+ string (REPLACE "-fast" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} " )
537+ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g" )
538+ message ("OpenACC Prof : Yes (-O2 -g, debug symbols enabled)" )
539+ else ()
540+ message ("OpenACC Prof : No" )
541+ endif ()
456542
457543# enable link time optimization
458544if (IQTREE_FLAGS MATCHES "lto" )
@@ -557,8 +643,14 @@ if (NOT IQTREE_FLAGS MATCHES "single")
557643
558644 if (CLANG AND APPLE )
559645 if (OSX_NATIVE_ARCHITECTURE STREQUAL "arm64" )
560- link_directories (${PROJECT_SOURCE_DIR} /libmac_m1 )
561- LIST (APPEND STATIC_LIBS "${PROJECT_SOURCE_DIR} /libmac_m1/libomp.a" "${PROJECT_SOURCE_DIR} /libmac_m1/libz.a" )
646+ if (MACOS_MAJOR_VERSION GREATER_EQUAL 26)
647+ message ("Using libmac26_m1 for macOS ${MACOS_VERSION} " )
648+ link_directories (${PROJECT_SOURCE_DIR} /libmac26_m1 )
649+ LIST (APPEND STATIC_LIBS "${PROJECT_SOURCE_DIR} /libmac26_m1/libomp.a" "${PROJECT_SOURCE_DIR} /libmac_m1/libz.a" )
650+ else ()
651+ link_directories (${PROJECT_SOURCE_DIR} /libmac_m1 )
652+ LIST (APPEND STATIC_LIBS "${PROJECT_SOURCE_DIR} /libmac_m1/libomp.a" "${PROJECT_SOURCE_DIR} /libmac_m1/libz.a" )
653+ endif ()
562654 else ()
563655 link_directories (${PROJECT_SOURCE_DIR} /libmac )
564656 LIST (APPEND STATIC_LIBS "${PROJECT_SOURCE_DIR} /libmac/libomp.a" "${PROJECT_SOURCE_DIR} /libmac/libz.a" )
@@ -1007,6 +1099,11 @@ endif()
10071099target_link_libraries (iqtree3 pll ncl nclextra utils pda lbfgsb whtest sprng vectorclass model
10081100 gsl alignment tree simulator terrace yaml-cpp phyloYAML main ${TARGET_CMAPLE} ${PLATFORM_LIB} ${STD_LIB} ${THREAD_LIB} ${ATOMIC_LIB} )
10091101
1102+ # CUDA libraries
1103+ if (USE_CUDA)
1104+ target_link_libraries (iqtree3 ${CUDART_LIBRARY} )
1105+ endif ()
1106+
10101107LIST (APPEND STATIC_LIBS "${PROJECT_BINARY_DIR} /pll/libpll.a" "${PROJECT_BINARY_DIR} /ncl/libncl.a" "${PROJECT_BINARY_DIR} /nclextra/libnclextra.a" "${PROJECT_BINARY_DIR} /utils/libutils.a" "${PROJECT_BINARY_DIR} /pda/libpda.a" "${PROJECT_BINARY_DIR} /lbfgsb/liblbfgsb.a" "${PROJECT_BINARY_DIR} /whtest/libwhtest.a" "${PROJECT_BINARY_DIR} /sprng/libsprng.a" "${PROJECT_BINARY_DIR} /vectorclass/libvectorclass.a" "${PROJECT_BINARY_DIR} /model/libmodel.a" "${PROJECT_BINARY_DIR} /gsl/libgsl.a" "${PROJECT_BINARY_DIR} /alignment/libalignment.a" "${PROJECT_BINARY_DIR} /tree/libtree.a" "${PROJECT_BINARY_DIR} /simulator/libsimulator.a" "${PROJECT_BINARY_DIR} /terrace/libterrace.a" "${PROJECT_BINARY_DIR} /yaml-cpp/libyaml-cpp.a" "${PROJECT_BINARY_DIR} /phylo-yaml/libphyloYAML.a" "${PROJECT_BINARY_DIR} /main/libmain.a" )
10111108
10121109if (USE_CMAPLE_AA STREQUAL "ON" )
0 commit comments