Skip to content

Commit d34de3e

Browse files
authored
Merge pull request #141 from Hashara/hashara
Hashara
2 parents 8a70472 + a59bb32 commit d34de3e

153 files changed

Lines changed: 11764 additions & 3869 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/ci.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ jobs:
178178

179179
build-macos-x86_64:
180180
name: Mac OS x86-64
181-
runs-on: macos-13 # Mac OS 14 Does Not Support x86-64
181+
runs-on: macos-15-intel # replaces deprecated macos-13
182182

183183
steps:
184184
- name: Checkout
@@ -196,7 +196,7 @@ jobs:
196196
cd build
197197
export CPPFLAGS="-I/usr/local/opt/libomp/include"
198198
export CXXFLAGS="-I/usr/local/opt/libomp/include"
199-
cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++
199+
cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DEIGEN3_INCLUDE_DIR=$(brew --prefix eigen)/include/eigen3
200200
make -j
201201
make package
202202
file iqtree3 | grep x86_64
@@ -249,7 +249,7 @@ jobs:
249249
cd build
250250
export CPPFLAGS="-I/opt/homebrew/opt/libomp/include"
251251
export CXXFLAGS="-I/opt/homebrew/opt/libomp/include"
252-
cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++
252+
cmake .. -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DEIGEN3_INCLUDE_DIR=$(brew --prefix eigen)/include/eigen3
253253
make -j
254254
make package
255255
file iqtree3 | grep arm64

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,4 @@ vectorclass/
3434
zlib-1.2.7/
3535
/.direnv/
3636
/.envrc
37+
softwipe_build

CMakeLists.txt

Lines changed: 102 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,13 @@ find_package(Backtrace)
120120

121121
# The version number.
122122
set (iqtree_VERSION_MAJOR 3)
123-
set (iqtree_VERSION_MINOR 0)
124-
set (iqtree_VERSION_PATCH ".1")
123+
set (iqtree_VERSION_MINOR 1)
124+
set (iqtree_VERSION_PATCH ".0")
125125

126126
option(BUILD_SHARED_LIBS "Build Shared Libraries" OFF)
127+
option(USE_CUDA "Enable CUDA support" OFF)
128+
option(USE_OPENACC "Enable OpenACC support" OFF)
129+
option(USE_OPENACC_PROFILE "Enable profiling instrumentation in OpenACC kernels" OFF)
127130

128131
if (CMAKE_C_COMPILER MATCHES "mpic")
129132
set(IQTREE_FLAGS "${IQTREE_FLAGS} mpi")
@@ -227,6 +230,13 @@ if (APPLE)
227230
set (__ARM_NEON "TRUE")
228231
set (NEON 1)
229232
endif()
233+
execute_process(
234+
COMMAND sw_vers -productVersion
235+
OUTPUT_VARIABLE MACOS_VERSION
236+
OUTPUT_STRIP_TRAILING_WHITESPACE
237+
)
238+
string(REGEX MATCH "^([0-9]+)" MACOS_MAJOR_VERSION "${MACOS_VERSION}")
239+
message("macOS version: ${MACOS_VERSION} (major: ${MACOS_MAJOR_VERSION})")
230240
elseif (UNIX AND NOT APPLE) # Unix and Linux
231241
execute_process(
232242
COMMAND uname -m
@@ -449,10 +459,86 @@ if (MSVC)
449459
endif()
450460

451461
if (NVHPC)
462+
452463
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -noswitcherror")
453-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -cuda") # to disable AVX512 intrinsics
464+
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -cuda") # to disable AVX512 intrinsics
454465
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Minfo=accel")
455466
endif()
467+
if (USE_CUDA)
468+
message("CUDA : Yes")
469+
enable_language(CUDA)
470+
add_definitions(-DUSE_CUDA)
471+
set(CMAKE_CUDA_STANDARD 14)
472+
473+
# Set CUDA architectures if not already specified
474+
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
475+
set(CMAKE_CUDA_ARCHITECTURES "70;75;80;86;89;90")
476+
endif()
477+
478+
# Manual CUDA runtime detection — avoids FindCUDAToolkit's Threads dependency
479+
# enable_language(CUDA) already found nvcc and set CMAKE_CUDA_* variables.
480+
find_library(CUDART_LIBRARY cudart
481+
HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}
482+
ENV CUDA_HOME
483+
ENV CUDA_PATH
484+
ENV CUDAToolkit_ROOT
485+
PATH_SUFFIXES lib64 lib lib/x86_64-linux-gnu)
486+
if (CUDART_LIBRARY)
487+
message(STATUS "Found cudart: ${CUDART_LIBRARY}")
488+
else()
489+
message(FATAL_ERROR "Could not find cudart library. Set CUDA_HOME or pass -DCMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES=<cuda>/lib64")
490+
endif()
491+
492+
if (NVHPC)
493+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_CUDA")
494+
endif()
495+
496+
if (CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES)
497+
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
498+
endif()
499+
if (CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES)
500+
link_directories(${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
501+
endif()
502+
else ()
503+
message("CUDA : No")
504+
endif()
505+
506+
if (USE_OPENACC STREQUAL "ON")
507+
message("OpenACC : Yes")
508+
add_definitions(-DUSE_OPENACC)
509+
if (GCC)
510+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenacc -O3")
511+
elseif (NVHPC)
512+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -acc -O3 -fast -Minfo=accel")
513+
# Target specific GPU architecture for optimal code generation.
514+
# -gpu=ccXX generates arch-specific instructions instead of generic PTX.
515+
# Examples: cc70 (V100), cc80 (A100), cc89 (RTX 4090), cc90 (H100)
516+
# Multiple targets: -DGPU_ARCH="cc70,cc80,cc90"
517+
if (DEFINED GPU_ARCH)
518+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -gpu=${GPU_ARCH},maxregcount:128")
519+
message("OpenACC GPU : ${GPU_ARCH}")
520+
else()
521+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -gpu=cc70,cc80,cc90,maxregcount:128")
522+
message("OpenACC GPU : cc70,cc80,cc90 (default)")
523+
endif()
524+
endif()
525+
else()
526+
message("OpenACC : NONE")
527+
endif()
528+
529+
if (USE_OPENACC_PROFILE STREQUAL "ON")
530+
if (NOT USE_OPENACC STREQUAL "ON")
531+
message(FATAL_ERROR "USE_OPENACC_PROFILE requires USE_OPENACC=ON")
532+
endif()
533+
add_definitions(-DUSE_OPENACC_PROFILE)
534+
# Replace -O3 -fast with -O2 -g for debuggable profiling builds
535+
string(REPLACE "-O3" "-O2" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
536+
string(REPLACE "-fast" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
537+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")
538+
message("OpenACC Prof : Yes (-O2 -g, debug symbols enabled)")
539+
else()
540+
message("OpenACC Prof : No")
541+
endif()
456542

457543
# enable link time optimization
458544
if (IQTREE_FLAGS MATCHES "lto")
@@ -557,8 +643,14 @@ if (NOT IQTREE_FLAGS MATCHES "single")
557643

558644
if(CLANG AND APPLE)
559645
if(OSX_NATIVE_ARCHITECTURE STREQUAL "arm64")
560-
link_directories(${PROJECT_SOURCE_DIR}/libmac_m1)
561-
LIST(APPEND STATIC_LIBS "${PROJECT_SOURCE_DIR}/libmac_m1/libomp.a" "${PROJECT_SOURCE_DIR}/libmac_m1/libz.a")
646+
if(MACOS_MAJOR_VERSION GREATER_EQUAL 26)
647+
message("Using libmac26_m1 for macOS ${MACOS_VERSION}")
648+
link_directories(${PROJECT_SOURCE_DIR}/libmac26_m1)
649+
LIST(APPEND STATIC_LIBS "${PROJECT_SOURCE_DIR}/libmac26_m1/libomp.a" "${PROJECT_SOURCE_DIR}/libmac_m1/libz.a")
650+
else()
651+
link_directories(${PROJECT_SOURCE_DIR}/libmac_m1)
652+
LIST(APPEND STATIC_LIBS "${PROJECT_SOURCE_DIR}/libmac_m1/libomp.a" "${PROJECT_SOURCE_DIR}/libmac_m1/libz.a")
653+
endif()
562654
else()
563655
link_directories(${PROJECT_SOURCE_DIR}/libmac)
564656
LIST(APPEND STATIC_LIBS "${PROJECT_SOURCE_DIR}/libmac/libomp.a" "${PROJECT_SOURCE_DIR}/libmac/libz.a")
@@ -1007,6 +1099,11 @@ endif()
10071099
target_link_libraries(iqtree3 pll ncl nclextra utils pda lbfgsb whtest sprng vectorclass model
10081100
gsl alignment tree simulator terrace yaml-cpp phyloYAML main ${TARGET_CMAPLE} ${PLATFORM_LIB} ${STD_LIB} ${THREAD_LIB} ${ATOMIC_LIB})
10091101

1102+
# CUDA libraries
1103+
if (USE_CUDA)
1104+
target_link_libraries(iqtree3 ${CUDART_LIBRARY})
1105+
endif()
1106+
10101107
LIST(APPEND STATIC_LIBS "${PROJECT_BINARY_DIR}/pll/libpll.a" "${PROJECT_BINARY_DIR}/ncl/libncl.a" "${PROJECT_BINARY_DIR}/nclextra/libnclextra.a" "${PROJECT_BINARY_DIR}/utils/libutils.a" "${PROJECT_BINARY_DIR}/pda/libpda.a" "${PROJECT_BINARY_DIR}/lbfgsb/liblbfgsb.a" "${PROJECT_BINARY_DIR}/whtest/libwhtest.a" "${PROJECT_BINARY_DIR}/sprng/libsprng.a" "${PROJECT_BINARY_DIR}/vectorclass/libvectorclass.a" "${PROJECT_BINARY_DIR}/model/libmodel.a" "${PROJECT_BINARY_DIR}/gsl/libgsl.a" "${PROJECT_BINARY_DIR}/alignment/libalignment.a" "${PROJECT_BINARY_DIR}/tree/libtree.a" "${PROJECT_BINARY_DIR}/simulator/libsimulator.a" "${PROJECT_BINARY_DIR}/terrace/libterrace.a" "${PROJECT_BINARY_DIR}/yaml-cpp/libyaml-cpp.a" "${PROJECT_BINARY_DIR}/phylo-yaml/libphyloYAML.a" "${PROJECT_BINARY_DIR}/main/libmain.a")
10111108

10121109
if (USE_CMAPLE_AA STREQUAL "ON")

0 commit comments

Comments
 (0)