@@ -6,7 +6,8 @@ using namespace Rcpp;
66
77#include < algorithm> /* for fill */
88#include < array> /* for array */
9- #include < map> /* for map */
9+ #include < string> /* for string (hash key) */
10+ #include < unordered_map> /* for unordered_map */
1011
1112using TreeTools::ct_stack_threshold;
1213using TreeTools::ct_max_leaves_heap;
@@ -178,12 +179,15 @@ List calc_split_frequencies(
178179
179180 StackEntry *const S_start = S.data ();
180181
181- // Use a map to store unique splits and their counts
182- // Key: split bit pattern; Value: index in output
183- std::map<std::vector<Rbyte>, int32> split_map ;
182+ // Hash map for O(1) amortized split deduplication
183+ std::unordered_map<std::string, int32> split_map;
184+ split_map. reserve (ntip_3 * 2 ) ;
184185 std::vector<std::vector<Rbyte>> split_patterns;
185186 std::vector<int32> counts;
186187
188+ // Reusable key buffer — avoids per-split heap allocation
189+ std::string key (nbin, ' \0 ' );
190+
187191 for (int32 i = 0 ; i < n_trees; ++i) {
188192 if (tables[i].NOSWX (ntip_3)) {
189193 continue ;
@@ -247,21 +251,21 @@ List calc_split_frequencies(
247251 const int32 end = tables[i].X_right (k + 1 );
248252 if (start == 0 && end == 0 ) continue ; // No valid cluster at this position
249253
250- // Build the bit pattern for this split
251- std::vector<Rbyte> pattern (nbin, 0 );
254+ // Build the bit pattern into the reusable key buffer
255+ std::fill (key. begin (), key. end (), ' \0 ' );
252256 for (int32 j = start; j <= end; ++j) {
253257 const int32 leaf_idx = tables[i].DECODE (j) - 1 ; // 0-based
254258 const int32 byte_idx = leaf_idx >> 3 ;
255259 const int32 bit_idx = leaf_idx & 7 ;
256- pattern [byte_idx] |= ( Rbyte ( 1 ) << bit_idx);
260+ key [byte_idx] |= static_cast < char >( 1 << bit_idx);
257261 }
258262
259- auto it = split_map.find (pattern );
263+ auto it = split_map.find (key );
260264 if (it == split_map.end ()) {
261265 // New split: record it with count from this reference tree
262266 const int32 idx = split_patterns.size ();
263- split_map[pattern] = idx;
264- split_patterns.push_back ( std::move (pattern ));
267+ split_map. emplace (key, idx) ;
268+ split_patterns.emplace_back (key. begin (), key. end ( ));
265269 counts.push_back (split_count[k]);
266270 }
267271 // If already found, the first reference tree that found it has the
0 commit comments