Skip to content

Commit 7bbdbdf

Browse files
authored
Update S to use StackEntry structures
2 parents df71707 + 42c9db6 commit 7bbdbdf

File tree

4 files changed

+44
-66
lines changed

4 files changed

+44
-66
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: TreeTools
22
Title: Create, Modify and Analyse Phylogenetic Trees
3-
Version: 2.0.0.9003
3+
Version: 2.0.0.9004
44
Authors@R: c(
55
person("Martin R.", 'Smith', role = c("aut", "cre", "cph"),
66
email = "martin.smith@durham.ac.uk",

NEWS.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
# TreeTools 2.0.0.9003 (development) #
1+
# TreeTools 2.0.0.9004 (development) #
2+
- Support larger trees in `ClusterTable` objects.
3+
* Retires `CT_PUSH` and `CT_POP` macros.
24
- Support larger trees in `Consensus()`.
3-
Uses 32-bit integers, necessitating downstream changes to TreeDist.
5+
* Uses 32-bit integers, necessitating downstream changes to TreeDist.
46

57
# TreeTools 2.0.0.9001 (development) #
68
- Remove hard limit on tree size in `SplitList`.

inst/include/TreeTools/ClusterTable.h

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,23 +13,6 @@
1313
#define UNINIT -999
1414
#define INF TreeTools::INTX_MAX
1515

16-
#define CT_ASSERT_CAN_PUSH() \
17-
ASSERT(static_cast<size_t>(Spos + CT_STACK_SIZE) <= S.size())
18-
19-
#define CT_ASSERT_CAN_POP() ASSERT(Spos >= CT_STACK_SIZE)
20-
21-
#define CT_PUSH(a, b, c, d) \
22-
S[Spos++] = (a); \
23-
S[Spos++] = (b); \
24-
S[Spos++] = (c); \
25-
S[Spos++] = (d)
26-
27-
#define CT_POP(a, b, c, d) \
28-
(d) = S[--Spos]; \
29-
(c) = S[--Spos]; \
30-
(b) = S[--Spos]; \
31-
(a) = S[--Spos]
32-
3316
#define CT_IS_LEAF(a) (a) <= n_tip
3417

3518
namespace TreeTools {
@@ -38,12 +21,6 @@ namespace TreeTools {
3821
inline constexpr int_fast32_t ct_stack_threshold = 8192;
3922
// New increased limit with heap allocation
4023
inline constexpr int_fast32_t ct_max_leaves_heap = 100000;
41-
inline constexpr int_fast32_t ct_stack_size = 4;
42-
43-
// Old hard limit, still used in TreeDist 2.12
44-
// TODO: Update TreeDist to use use heap where necessary
45-
// NOTE: This constant is deprecated - new code should use ct_max_leaves_heap
46-
inline constexpr int_fast32_t ct_max_leaves = 16383;
4724

4825
template <typename T>
4926
inline void resize_uninitialized(std::vector<T>& v, std::size_t n) {

src/consensus.cpp

Lines changed: 39 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,23 @@ using namespace Rcpp;
66

77
#include <algorithm> /* for fill */
88
#include <array> /* for array */
9-
#include <vector> /* for vector */
109

11-
using TreeTools::ct_stack_size;
1210
using TreeTools::ct_stack_threshold;
1311
using TreeTools::ct_max_leaves_heap;
1412

13+
struct StackEntry { int32 L, R, N, W; };
14+
1515
// Helper template function to perform consensus computation
1616
// Uses StackContainer for the S array (either std::array or std::vector)
1717
template<typename StackContainer>
18-
RawMatrix consensus_tree_impl(
18+
RawMatrix calc_consensus_tree(
1919
const List& trees,
2020
const NumericVector& p,
2121
StackContainer& S
2222
) {
2323
int32 v = 0;
2424
int32 w = 0;
2525
int32 L, R, N, W;
26-
int32 L_j, R_j, N_j, W_j;
2726

2827
const int32 n_trees = trees.length();
2928
const int32 frac_thresh = int32(n_trees * p[0]) + 1;
@@ -39,11 +38,21 @@ RawMatrix consensus_tree_impl(
3938
const int32 ntip_3 = n_tip - 3;
4039
const int32 nbin = (n_tip + 7) / 8; // bytes per row in packed output
4140

42-
std::vector<int32> split_count(n_tip, 1);
41+
int32* split_count;
42+
std::array<int32, ct_stack_threshold> split_stack;
43+
std::vector<int32> split_heap;
44+
if (n_tip <= ct_stack_threshold) {
45+
split_count = split_stack.data();
46+
} else {
47+
split_heap.resize(n_tip);
48+
split_count = split_heap.data();
49+
}
50+
51+
StackEntry *const S_start = S.data();
4352

4453
// Packed output: each row has nbin bytes
4554
RawMatrix ret(ntip_3, nbin);
46-
55+
4756
int32 i = 0;
4857
int32 splits_found = 0;
4958

@@ -52,42 +61,38 @@ RawMatrix consensus_tree_impl(
5261
continue;
5362
}
5463

55-
std::fill(split_count.begin(), split_count.end(), 1);
56-
64+
std::fill(split_count, split_count + n_tip, 1);
65+
5766
for (int32 j = i + 1; j < n_trees; ++j) {
5867
ASSERT(tables[i].N() == tables[j].N());
59-
68+
6069
tables[i].CLEAR();
61-
70+
6271
tables[j].TRESET();
6372
tables[j].READT(&v, &w);
6473

6574
int32 j_pos = 0;
66-
int32 Spos = 0; // Empty the stack S. Used in CT_PUSH / CT_POP macros.
75+
StackEntry* S_top = S_start; // Empty the stack S
6776

6877
do {
6978
if (CT_IS_LEAF(v)) {
70-
CT_ASSERT_CAN_PUSH();
71-
CT_PUSH(tables[i].ENCODE(v), tables[i].ENCODE(v), 1, 1);
79+
const auto enc_v = tables[i].ENCODE(v);
80+
*S_top++ = {enc_v, enc_v, 1, 1};
7281
} else {
73-
CT_ASSERT_CAN_POP();
74-
CT_POP(L, R, N, W_j);
75-
76-
W = 1 + W_j;
77-
w = w - W_j;
78-
82+
const StackEntry& entry = *--S_top;
83+
L = entry.L; R = entry.R; N = entry.N;
84+
W = 1 + entry.W;
85+
w -= entry.W;
7986
while (w) {
80-
CT_ASSERT_CAN_POP();
81-
CT_POP(L_j, R_j, N_j, W_j);
82-
if (L_j < L) L = L_j;
83-
if (R_j > R) R = R_j;
84-
N = N + N_j;
85-
W = W + W_j;
86-
w = w - W_j;
87+
const StackEntry& next = *--S_top;
88+
L = std::min(L, next.L); // Faster than ternary operator
89+
R = std::max(R, next.R);
90+
N += next.N;
91+
W += next.W;
92+
w -= next.W;
8793
}
8894

89-
CT_ASSERT_CAN_PUSH();
90-
CT_PUSH(L, R, N, W);
95+
*S_top++ = {L, R, N, W};
9196

9297
++j_pos;
9398

@@ -133,14 +138,8 @@ RawMatrix consensus_tree_impl(
133138
}
134139
} while (i++ != n_trees - thresh); // All clades in p% consensus must occur in first q% of trees.
135140

136-
if (splits_found == 0) {
137-
return RawMatrix(0, nbin);
138-
} else if (splits_found < ntip_3) {
139-
// Return only the rows we filled
140-
return ret(Range(0, splits_found - 1), _);
141-
} else {
142-
return ret;
143-
}
141+
return (splits_found == 0) ? RawMatrix(0, nbin) :
142+
(splits_found < ntip_3) ? ret(Range(0, splits_found - 1), _) : ret;
144143
}
145144

146145
// trees is a list of objects of class phylo, all with the same tip labels
@@ -158,12 +157,12 @@ RawMatrix consensus_tree(const List trees, const NumericVector p) {
158157

159158
if (n_tip <= ct_stack_threshold) {
160159
// Small tree: use stack-allocated array
161-
std::array<int32, ct_stack_size * ct_stack_threshold> S;
162-
return consensus_tree_impl(trees, p, S);
160+
std::array<StackEntry, ct_stack_threshold> S;
161+
return calc_consensus_tree(trees, p, S);
163162
} else {
164163
// Large tree: use heap-allocated vector
165-
std::vector<int32> S(ct_stack_size * n_tip);
166-
return consensus_tree_impl(trees, p, S);
164+
std::vector<StackEntry> S(n_tip);
165+
return calc_consensus_tree(trees, p, S);
167166
}
168167
} catch(const std::exception& e) {
169168
Rcpp::stop(e.what());

0 commit comments

Comments
 (0)