Skip to content

Commit cfa129a

Browse files
authored
Raise SL_MAX_TIPS from 2048 to 32768 (#265)
- SL_MAX_BINS: 32 -> 512, giving SL_MAX_TIPS = 64 * 512 = 32768 - Introduce SL_STACK_BINS (32) and SL_STACK_SPLITS (2045) as stack allocation thresholds, keeping SplitList objects at ~548 KB (not bloated) - Trees with > 2048 tips use heap allocation in SplitList (transparent to callers) - Update splits_to_tree.cpp to use stack thresholds for its local arrays - Lookup table cost: ~1 MB (lg2 + lg2_double_factorial + lg2_unrooted)
1 parent d9cf7eb commit cfa129a

File tree

2 files changed

+20
-14
lines changed

2 files changed

+20
-14
lines changed

inst/include/TreeTools/SplitList.h

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,19 @@ using splitbit = uint_fast64_t;
1313

1414
#define R_BIN_SIZE int16(8)
1515
#define SL_BIN_SIZE int16(64)
16-
#define SL_MAX_BINS int16(32)
16+
#define SL_MAX_BINS int16(512)
1717

18-
/* * Stack allocation limits (Legacy support for speed)
19-
* Trees smaller than this will use stack arrays.
20-
* Trees larger will trigger heap allocation.
21-
*/
22-
#define SL_MAX_TIPS (SL_BIN_SIZE * SL_MAX_BINS) // 2048
18+
#define SL_MAX_TIPS (SL_BIN_SIZE * SL_MAX_BINS) // 32768
2319
#define SL_MAX_SPLITS (SL_MAX_TIPS - 3)
2420

21+
/* Stack allocation thresholds.
22+
* Trees with n_splits <= SL_STACK_SPLITS AND n_bins <= SL_STACK_BINS
23+
* use fast stack arrays; larger trees fall back to heap allocation.
24+
* Kept at the pre-v1.16 values to avoid bloating SplitList objects.
25+
*/
26+
#define SL_STACK_BINS int16(32)
27+
#define SL_STACK_SPLITS int16(SL_BIN_SIZE * SL_STACK_BINS - 3) // 2045
28+
2529
#define INLASTBIN(n, size) int16((size) - int16((size) - int16((n) % (size))) % (size))
2630
#define INSUBBIN(bin, offset) \
2731
splitbit(x(split, ((bin) * input_bins_per_bin) + (offset)))
@@ -75,10 +79,10 @@ namespace TreeTools {
7579
splitbit** state;
7680

7781
private:
78-
/* STACK STORAGE (Fast path for small trees) */
79-
int32 stack_in_split[SL_MAX_SPLITS];
80-
splitbit stack_state[SL_MAX_SPLITS][SL_MAX_BINS];
81-
splitbit* stack_rows[SL_MAX_SPLITS];
82+
/* STACK STORAGE (Fast path for small trees ≤ SL_STACK_SPLITS splits) */
83+
int32 stack_in_split[SL_STACK_SPLITS];
84+
splitbit stack_state[SL_STACK_SPLITS][SL_STACK_BINS];
85+
splitbit* stack_rows[SL_STACK_SPLITS];
8286

8387
/* HEAP STORAGE (Large trees) */
8488
std::vector<int32> heap_in_split;
@@ -102,7 +106,7 @@ namespace TreeTools {
102106
ASSERT(n_input_bins > 0);
103107
n_bins = int32(n_input_bins + R_BIN_SIZE - 1) / input_bins_per_bin;
104108

105-
bool use_heap = (n_splits > SL_MAX_SPLITS) || (n_bins > SL_MAX_BINS);
109+
bool use_heap = (n_splits > SL_STACK_SPLITS) || (n_bins > SL_STACK_BINS);
106110

107111
if (use_heap) {
108112
heap_in_split.resize(n_splits, 0);

src/splits_to_tree.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,13 @@ IntegerMatrix splits_to_edge(const RawMatrix splits, const IntegerVector nTip) {
3131
const SplitList x(splits);
3232

3333
// Decide whether to use stack or heap allocation based on tree size
34-
const bool use_heap = (n_tip > SL_MAX_TIPS) || (x.n_splits > SL_MAX_SPLITS);
34+
// Use stack arrays for small trees, heap for large
35+
constexpr int32 stack_tip_lim = SL_BIN_SIZE * SL_STACK_BINS;
36+
const bool use_heap = (n_tip > stack_tip_lim) || (x.n_splits > SL_STACK_SPLITS);
3537

3638
// Stack allocation for small trees (fast path)
37-
alignas(64) std::array<int32, SL_MAX_TIPS + SL_MAX_SPLITS> stack_parent{};
38-
alignas(64) std::array<int32, SL_MAX_TIPS> stack_patriarch{};
39+
alignas(64) std::array<int32, stack_tip_lim + SL_STACK_SPLITS> stack_parent{};
40+
alignas(64) std::array<int32, stack_tip_lim> stack_patriarch{};
3941

4042
// Heap allocation for large trees
4143
std::vector<int32> heap_parent;

0 commit comments

Comments
 (0)