@@ -38,26 +38,25 @@ namespace TreeTools {
3838 return T (1 ) << bit_pos;
3939 }
4040
41- #if __cplusplus >= 202002L
42- #include < bit> // C++20 header for std::popcount
41+ // Hardware POPCNT: available on all x86-64 since 2008 (Nehalem / Barcelona).
42+ // Inline asm emits the instruction directly, without requiring -mpopcnt.
43+ #if (defined(__GNUC__) || defined(__clang__)) && defined(__x86_64__)
4344 inline int32 count_bits (splitbit x) {
44- return static_cast <int32>(std::popcount (x));
45+ uint64_t result;
46+ __asm__ (" popcnt %1, %0" : " =r" (result) : " r" (x));
47+ return static_cast <int32>(result);
4548 }
46- // Option 2: Fallback for C++17 and older
47- #else
48- #if defined(__GNUC__) || defined(__clang__)
49- // GCC and Clang support __builtin_popcountll for long long
50- inline int32 count_bits (splitbit x) {
51- return static_cast <int32>(__builtin_popcountll (x));
52- }
53- #elif defined(_MSC_VER)
49+ #elif defined(_MSC_VER) && defined(_M_X64)
5450#include < intrin.h>
5551 inline int32 count_bits (splitbit x) {
5652 return static_cast <int32>(__popcnt64 (x));
5753 }
54+ #elif defined(__GNUC__) || defined(__clang__)
55+ // Non-x86 (ARM, etc.): builtin maps to efficient native instruction
56+ inline int32 count_bits (splitbit x) {
57+ return static_cast <int32>(__builtin_popcountll (x));
58+ }
5859#else
59- // A slower, but safe and highly portable fallback for all other compilers
60- // This is a last resort if no built-in is available.
6160 inline int32_t count_bits (splitbit x) {
6261 int32_t count = 0 ;
6362 while (x != 0 ) {
@@ -66,9 +65,7 @@ namespace TreeTools {
6665 }
6766 return count;
6867 }
69- #endif // Compiler check for builtins
70-
71- #endif // C++20 check
68+ #endif
7269
7370 class SplitList {
7471 public:
0 commit comments