From 2f4ffd82f754a86210420c4357f0d57f4b282b22 Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Wed, 7 May 2025 23:08:31 +0200 Subject: [PATCH 01/21] Back to default rhub.yaml config but remove vignettes. --- .github/workflows/rhub.yaml | 16 -- vignettes/.gitignore | 4 - vignettes/persistence-class.qmd | 112 ---------- vignettes/references.bib | 21 -- vignettes/validation-benchmark.qmd | 340 ----------------------------- 5 files changed, 493 deletions(-) delete mode 100644 vignettes/.gitignore delete mode 100644 vignettes/persistence-class.qmd delete mode 100644 vignettes/references.bib delete mode 100644 vignettes/validation-benchmark.qmd diff --git a/.github/workflows/rhub.yaml b/.github/workflows/rhub.yaml index f3af4e4..74ec7b0 100644 --- a/.github/workflows/rhub.yaml +++ b/.github/workflows/rhub.yaml @@ -51,14 +51,6 @@ jobs: image: ${{ matrix.config.container }} steps: - - name: Install sudo package - run: apt update && apt install sudo - - name: Install gh command line tool - uses: wusatosi/setup-gh@v1 - - name: Install jq command line tool - uses: vegardit/gha-setup-jq@v1 - - name: Install Quarto - uses: quarto-dev/quarto-actions/setup@v2 - uses: r-hub/actions/checkout@v1 - uses: r-hub/actions/platform-info@v1 with: @@ -84,14 +76,6 @@ jobs: config: ${{ fromJson(needs.setup.outputs.platforms) }} steps: - - name: Install sudo package - run: apt update && apt install sudo - - name: Install gh command line tool - uses: wusatosi/setup-gh@v1 - - name: Install jq command line tool - uses: vegardit/gha-setup-jq@v1 - - name: Install Quarto - uses: quarto-dev/quarto-actions/setup@v2 - uses: r-hub/actions/checkout@v1 - uses: r-hub/actions/setup-r@v1 with: diff --git a/vignettes/.gitignore b/vignettes/.gitignore deleted file mode 100644 index ea00d00..0000000 --- a/vignettes/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -*.html -*.R -*.log -*_files diff --git a/vignettes/persistence-class.qmd b/vignettes/persistence-class.qmd deleted file mode 100644 index 0d08e41..0000000 --- a/vignettes/persistence-class.qmd +++ /dev/null @@ -1,112 +0,0 @@ ---- -title: "The persistence class" -vignette: > - %\VignetteIndexEntry{The persistence class} - %\VignetteEngine{quarto::html} - %\VignetteEncoding{UTF-8} -knitr: - opts_chunk: - collapse: true - comment: '#>' ---- - -```{r} -#| label: setup -library(phutil) -``` - -## Structure of the class - -An object of class `persistence` is a list of 2 elements: - -- `pairs`: A list of 2-column matrices containing birth-death pairs. The -$i$-*th* element of the list corresponds to the $(i-1)$-*th* homology -dimension. If there is no pairs for a given dimension but there are pairs in -higher dimensions, the corresponding element(s) is/are filled with a -\eqn{0 \times 2} numeric matrix with 0 rows. - -- `metadata`: A list of length 6 containing information about how the data -was computed: - - - `orderered_pairs`: A boolean indicating whether the pairs in the - `pairs` list are ordered (i.e. the first column is strictly less than the - second column). - - `data`: The name of the object containing the original data on which the - persistence data was computed. - - `engine`: The name of the package and the function of this package that - computed the persistence data in the form - `"package_name::package_function"`. - - `filtration`: The filtration used in the computation in a human-readable - format (i.e. full names, capitals where needed, etc.). - - `parameters`: A list of parameters used in the computation. - - `call`: The exact call that generated the persistence data. - -## Supported inputs - -The `persistence` class is designed to support a variety of inputs, including - -A single numeric matrix - -: If the user provides a matrix, it must have at least 2 columns and each row -represents a topological feature. - -- If it has 2 columns, we assume that the first column corresponds to the birth -of a feature and the second column corresponds to the death of a feature, -irrespective of the order of the columns. In this case, we assume that the -homology dimension of the feature is 0. - -- If it has more than 2 columns, we assume that the first column corresponds to -the homology dimension of the feature, the second column corresponds to the -birth of a feature, and the third column corresponds to the death of a feature, -irrespective of the order of the columns. The remaining columns are ignored. - -A list of numeric matrices - -: If the user provides a list of matrices, each list element corresponds to an -homology dimension, from 0 to some maximum value. Each matrix must have at least -2 columns and each row represents a topological feature in the corresponding -homology dimension (given by the matrix index in the list minus 1). Each matrix -is parsed as described above. - -A dataframe - -: If the user provides an object of class `data.frame`, it must have at least 2 -columns and each row represents a topological feature. If it has exactly 2 -columns, we add a `dimension` column with all values set to 0. If it has more -than 2 columns, we require that `birth` and `death` exist in the column names. -The `birth` and `death` columns are parsed as described above. The remaining -columns are ignored. - -An object of class 'PHom' - -: If the user provides an object of class 'PHom' as typically produced by -`ripserr::vietoris_rips()`, it means that it is a `base::data.frame` with -columns `dimension`, `birth`, and `death` in that specific order. The -`dimension` column is of type integer while the `birth` and `death` columns are -of type numeric. The `dimension` column is used to create a list of matrices, -where each matrix corresponds to an homology dimension, from 0 to the maximum -value in the `dimension` column. - -An object of class 'diagram' - -: If the user provides an object of class 'diagram' as typically produced by -`TDA::*Diag()` functions in entry `diagram`, it means that it is a -`base::matrix` with 3 columns with names `dimension`, `Birth` and `Death` in -that specific order. The `dimension` column is of type integer while the `Birth` -and `Death` columns are of type numeric. Furthermore, the object stores as -attributes the parameters used to compute the diagram and the entire call to the -function that produced the diagram. We first lowercase `Birth` and `Death`. -Next, the `dimension` column is used to create a list of matrices, where each -matrix corresponds to an homology dimension, from 0 to the maximum value in the -`dimension` column. The `birth` and `death` columns are parsed as described -above. The remaining columns are ignored. - -An object of class 'hclust' - -: If the user provides an object of class 'hclust' as typically produced by -`stats::hclust()`, it means that it is a `base::list` which contains the -`height` element which is a set of $n−1$ real values (non-decreasing for -ultrametric trees) storing the clustering height, that is, the value of the -criterion associated with the clustering method for the particular -agglomeration. This is used as homological feature death while a birth of `0` is -typically used. diff --git a/vignettes/references.bib b/vignettes/references.bib deleted file mode 100644 index efc7632..0000000 --- a/vignettes/references.bib +++ /dev/null @@ -1,21 +0,0 @@ -@article{cohen2010lipschitz, - title={Lipschitz functions have L p-stable persistence}, - author={Cohen-Steiner, David and Edelsbrunner, Herbert and Harer, John and Mileyko, Yuriy}, - journal={Foundations of computational mathematics}, - volume={10}, - number={2}, - pages={127--139}, - year={2010}, - publisher={Springer} -} - -@article{bubenik2023exact, - title={Exact weights, path metrics, and algebraic Wasserstein distances}, - author={Bubenik, Peter and Scott, Jonathan and Stanley, Donald}, - journal={Journal of Applied and Computational Topology}, - volume={7}, - number={2}, - pages={185--219}, - year={2023}, - publisher={Springer} -} diff --git a/vignettes/validation-benchmark.qmd b/vignettes/validation-benchmark.qmd deleted file mode 100644 index 36214fc..0000000 --- a/vignettes/validation-benchmark.qmd +++ /dev/null @@ -1,340 +0,0 @@ ---- -title: "Validation and Benchmark of Wasserstein Distances" -vignette: > - %\VignetteIndexEntry{Validation and Benchmark of Wasserstein Distances} - %\VignetteEngine{quarto::html} - %\VignetteEncoding{UTF-8} - %\VignetteDepends{TDA,microbenchmark,ggplot2,scales} -knitr: - opts_chunk: - collapse: true - comment: '#>' -bibliography: references.bib ---- - -This vignette introduces the Wasserstein and bottleneck distances between -persistence diagrams and their implementations in {phutil}, adapted from -[Hera](https://github.com/anigmetov/hera), by way of two tasks: - -1. Validate the implementations on an example computed by hand. -2. Benchmark the implementations against those provided by {TDA} (adapted from -Dionysus). - -In addition to {phutil}, we use {ggplot2} to visualize the benchmark results. -We will also access the {tdaunif} package to generate larger point clouds and the {microbenchmark} package to perform benchmark tests. - -```{r} -#| label: setup -library(phutil) -library(ggplot2) -``` - -## Definitions - -_Persistence diagrams_ are multisets (sets with multiplicity) of points in the -plane that encode the interval decompositions of persistent modules obtained -from filtrations of data (e.g. Vietoris--Rips filtrations of point clouds and -cubical filtrations of numerical arrays). -Most applications consider only ordinary persistent homology, so that all points -live in the upper-half plane; and most involve non-negative-valued filtrations, -so that all points live in the first quadrant. The examples in this vignette -will be no exceptions. - -We'll distinguish between persistence diagrams, which encode one -degree of a persistence module, and _persistence data_, which comprises -persistent pairs of many degrees (and annotated as such). Whereas a diagram is -typically represented as a 2-column matrix with columns for birth and death -values, data are typically represented as a 3-column matrix with an additional -column for (whole number) degrees. - -The most common distance metrics between persistence diagrams exploit the family -of _Minkowski distances_ $D_p$ between points in $\mathbb{R}^n$ defined, for $1 -\leq p < \infty$, as follows: - -$$ D_p(x,y) = \left(\sum_{i=1}^{n}{(x_i - y_i)^p}\right)^{1/p}. $$ - -In the limit $p \to \infty$, this expression approaches the following auxiliary -definition: - -$$ D_\infty(x,y) = \max_{i=1}^{n}{\lvert x_i - y_i \rvert}. $$ - -As the parameter $p$ ranges between $1$ and $\infty$, three of its values yield -familiar distance metrics: The taxicab distance $D_1$, the Euclidean distance -$D_2$, and the Chebyshev distance $D_\infty$. - -The [_Kantorovich_ or _Wasserstein metric_](https://en.wikipedia.org/wiki/Wasserstein_metric) derives from the problem of optimal transport: What is the minimum cost of relocating one distribution to another? We restrict ourselves to persistence diagrams with finitely many off-diagonal point masses, though each diagram is taken to include every point on the diagonal. So the cost of relocating one diagram $X$ to another $Y$ amounts to (a) the cost of relocating some off-diagonal points to other off-diagonal points plus (b) the cost of relocating the remaining off-diagonal points to the diagonal, and vice-versa. - -Because the diagonal points are dense, this cost depends entirely on how the off-diagonal points of both diagrams are matched---either to each other or to the diagonal, with each point matched exactly once. -For this purpose, define a _matching_ to be any bijective map $\varphi : X \to Y$, though in practice we assume that almost all diagonal points are matched to themselves and incur no cost. - -The cost $D(x,\varphi(x))$ of relocating a point $x$ to its matched point -$\varphi(x)$ is typically taken to be a Minkowski distance $D_q(x,\varphi(x)) = -\lVert x - \varphi(x) \rVert_q$, defined by the $L^q$ norm on $\mathbb{R}^2$. -(While simple, this geometric treatment elides that the points in the plane -encode the collection of interval modules into which the persistence module -decomposes. Other metrics have been proposed for this space, but we restrict to -this family here.) - -The total cost of the relocation is canonically taken to be the Minkowski -distance $\left( \sum_{x \in X}{D_q(x,\varphi(x))^p} \right)^{1/p}$ of the vector -of matched-point distances. The Wasserstein distance is defined to be the -infimum of this value over all possible matches. -This yields the formulae - -$$ W_p^q(X,Y) = \inf_{\varphi : X \to Y}{\left( \sum_{x \in X}{{\lVert -x-\varphi(x) \rVert_q}^p} \right)^{1/p}}, $$ - -for $p < \infty$ and - -$$ W_\infty^q(X,Y) = \inf_{\varphi : X \to Y}{\max_{x \in X}{\lVert x-\varphi(x) -\rVert_q}} $$ - -for $p = \infty$. - -See @cohen2010lipschitz and @bubenik2023exact for detailed treatments and -stability results on these families of metrics. - -## Validation - -### Distances between nontrivial diagrams - -The following persistence diagrams provide a tractable example: - -$$ X = \left[ \begin{array}{cc} 1 & 3 \\ 3 & 5 \end{array} \right], \phantom{X = -Y} Y = \left[ \begin{array}{cc} 3 & 4 \end{array} \right]. $$ - -For convenience in the code, we omit dimensionality and focus only on the matrix -representations. - -```{r define small PDs} -X <- rbind( - c(1, 3), - c(3, 5) -) -Y <- rbind( - c(3, 4) -) -``` - -We overlay both diagrams in @fig-plot-small. Note that the vector between the -off-diagonal points $(1,3)$ of $X$ and $(3,4)$ of $Y$ is $(2,1)$, while the -vector from $(1,3)$ to its nearest diagonal point $(2,2)$ is $(1,-1)$. That one -coordinate is the same size while the other is smaller implies that an optimal -matching will always match $(1,3)$ with the diagonal, so long as $p \geq 1$. A -similar argument necessitates that $(3,4)$ of $Y$ must match with $(3,5)$ of -$X$. - -```{r} -#| label: fig-plot-small -#| fig-width: 4 -#| fig-height: 4 -#| fig-align: center -#| fig-cap: "Overlaid persistence diagrams $X$ (circles) and $Y$ (diamond) with dashed segments connecting optimally matched pairs." -par(mar = c(4, 4, 1, 1) + .1) -plot( - NA_real_, - xlim = c(0, 6), ylim = c(0, 6), asp = 1, xlab = "birth", ylab = "death" -) -abline(a = 0, b = 1) -points(X, pch = 1) -points(Y, pch = 5) -segments(X[, 1], X[, 2], c(2, Y[, 1]), c(2, Y[, 2]), lty = 2) -par(mar = c(5, 4, 4, 2) + .1) -``` - -Based on these observations, we get this expression for the Wasserstein distance -using the $q$-norm half-plane metric and the $p$-norm "matched space" metric: - -$$ W_p^q(X,Y) = ( {\lVert a \rVert_q}^p + {\lVert b \rVert_q}^p )^{1/p}, $$ - -where $a = (1,-1)$ and $b = (0,-1)$ are the vectors between matched points. We -can now calculate Wasserstein distances "by hand"; we'll consider those using -the half-plane Minkowski metrics with $q=1,2,\infty$ and the "matched space" -metrics with $p=1,2,\infty$. - -First, with $q=1$, we get $\lVert a \rVert_q = 1+1=2$ and $\lVert b \rVert_q = -0+1=1$. So the $(1,p)$-Wasserstein distance will be the $p$-Minkowski norm of -the vector $(2,1)$, given by $W_p^1(X,Y) = (2^p + 1^p)^{1/p}$. This nets us the -values $W_1^1(X,Y) = 3$ and $W_2^1(X,Y) = \sqrt{5}$. And then $W_\infty^1(X,Y) = -\max(2,1) = 2$. The reader is invited to complete the rest of @tbl-small. - -| Metric | $\lVert a \rVert$ | $\lVert b \rVert$ | $W_1$ | $W_2$ | $W_\infty$ | -|:-------|:----------------:|:----------------:|:-------:|:-------:|:------------:| -| $L^1$ | 2 | 1 | 3 | $\sqrt{5}$ | 2 | -| $L^2$ | $\sqrt{2}$ | 1 | $1+\sqrt{2}$ | $\sqrt{3}$ | $\sqrt{2}$ | -| $L^\infty$ | 1 | 1 | 2 | $\sqrt{2}$ | 1 | - -: Distances between optimally paired features and Wasserstein distances between -$X$ and $Y$ for several choices of half-plane and "matched space" metrics. -{#tbl-small} - -The results make intuitive sense; for example, the values change monotonically -along each row and column. -Let us now validate the bottom row---using the $L^\infty$ distance on the -half-plane, giving the popular _bottleneck distance_---using both Hela, as -exposed through {phutil}, and Dionysus, as exposed through {TDA}: - -```{r validate small PDs with Hera} -wasserstein_distance(X, Y, p = 1) -wasserstein_distance(X, Y, p = 2) -bottleneck_distance(X, Y) -``` - -In order to compute distances with {TDA}, we must restructure the PDs to include -a `"dimension"` column. -Note also that `TDA::wasserstein()` does not take the $1/p$th power after -computing the sum of $p$th powers; we do this manually to get comparable -results: - -```{r validate small PDs with Dionysus} -TDA::wasserstein(cbind(0, X), cbind(0, Y), p = 1, dimension = 0) -sqrt(TDA::wasserstein(cbind(0, X), cbind(0, Y), p = 2, dimension = 0)) -TDA::bottleneck(cbind(0, X), cbind(0, Y), dimension = 0) -``` - -### Distances from the trivial diagram - -An important edge case is when one persistence diagram is trivial, i.e. contains -only the diagonal so is "empty" of off-diagonal points. -This can occur unexpectedly in comparisons of persistence data, as the data may -be large but higher-degree features present in one set but absent in another. -To validate the distances in this case, we create an empty diagram $E$ and use -the same code to compare it to $X$. The point $(3,5)$ of $X$ will be matched to -the diagonal $(4,4)$, which yields the same $\infty$-distance $1$ so the -$L^\infty$ Wasserstein distances will be the same as before. - -```{r validate small PD vs empty} -# empty PD -E <- matrix(NA_real_, nrow = 0, ncol = 2) -# with dimension column -E_ <- cbind(matrix(NA_real_, nrow = 0, ncol = 1), E) -# distance from empty using phutil/Hera -wasserstein_distance(E, X, p = 1) -wasserstein_distance(E, X, p = 2) -bottleneck_distance(E, X) -# distance from empty using TDA/Dionysus -TDA::wasserstein(E_, cbind(0, X), p = 1, dimension = 0) -sqrt(TDA::wasserstein(E_, cbind(0, X), p = 2, dimension = 0)) -TDA::bottleneck(E_, cbind(0, X), dimension = 0) -``` - -## Benchmarks - -For a straightforward benchmark test, we compute PDs from point clouds sampled -with noise from two one-dimensional manifolds embedded in $\mathbb{R}^3$: the -circle as a trefoil knot and the segment as a two-armed archimedian spiral. -To prevent the results from being sensitive to an accident of a single sample, -we generate lists of 24 samples and benchmark only one iteration of each -function on each. - -```{r compute large PDs fake, eval=FALSE} -set.seed(28415) -n <- 24 -PDs1 <- lapply(seq(n), function(i) { - S1 <- tdaunif::sample_trefoil(n = 120, sd = .05) - as_persistence(TDA::ripsDiag(S1, maxdimension = 2, maxscale = 6)) -}) -PDs2 <- lapply(seq(n), function(i) { - S2 <- cbind(tdaunif::sample_arch_spiral(n = 120, arms = 2), 0) - S2 <- tdaunif::add_noise(S2, sd = .05) - as_persistence(TDA::ripsDiag(S2, maxdimension = 2, maxscale = 6)) -}) -``` - -```{r compute large PDs true, echo=FALSE} -n <- 24 -PDs1 <- trefoils -PDs2 <- arch_spirals -``` - -Both implementations are used to compute distances between successive pairs of -diagrams. The computations are annotated by homological degree and Wasserstein -power so that these results can be compared separately. - -```{r} -#| label: benchmark phutil and TDA -#| warning: false -PDs1_ <- lapply(lapply(PDs1, as.data.frame), as.matrix) -PDs2_ <- lapply(lapply(PDs2, as.data.frame), as.matrix) -# iterate over homological degrees and Wasserstein powers -bm_all <- list() -PDs_i <- seq_along(PDs1) -for (dimension in seq(0, 2)) { - # compute - bm_1 <- do.call(rbind, lapply(seq_along(PDs1), function(i) { - as.data.frame(microbenchmark::microbenchmark( - TDA = TDA::wasserstein( - PDs1_[[i]], PDs2_[[i]], dimension = dimension, p = 1 - ), - phutil = wasserstein_distance( - PDs1[[i]], PDs2[[i]], dimension = dimension, p = 1 - ), - times = 1, unit = "ns" - )) - })) - bm_2 <- do.call(rbind, lapply(seq_along(PDs1), function(i) { - as.data.frame(microbenchmark::microbenchmark( - TDA = sqrt(TDA::wasserstein( - PDs1_[[i]], PDs2_[[i]], dimension = dimension, p = 2 - )), - phutil = wasserstein_distance( - PDs1[[i]], PDs2[[i]], dimension = dimension, p = 2 - ), - times = 1, unit = "ns" - )) - })) - bm_inf <- do.call(rbind, lapply(seq_along(PDs1), function(i) { - as.data.frame(microbenchmark::microbenchmark( - TDA = TDA::bottleneck( - PDs1_[[i]], PDs2_[[i]], dimension = dimension - ), - phutil = bottleneck_distance( - PDs1[[i]], PDs2[[i]], dimension = dimension - ), - times = 1, unit = "ns" - )) - })) - # annotate and combine - bm_1$power <- 1; bm_2$power <- 2; bm_inf$power <- Inf - bm_res <- rbind(bm_1, bm_2, bm_inf) - bm_res$degree <- dimension - bm_all <- c(bm_all, list(bm_res)) -} -bm_all <- do.call(rbind, bm_all) -``` - -@fig-benchmark-large compares the distributions of runtimes by homological -degree (column) and Wasserstein power (row). We use nanoseconds in -{microbenchmark} when benchmarking to avoid potential integer overflows. Hence, -we convert the results into seconds ahead of formatting the axis in seconds. - -```{r} -#| label: fig-benchmark-large -#| fig-width: 8 -#| fig-height: 3 -#| fig-align: 'center' -#| fig-retina: 2 -#| fig-cap: "Benchmark comparison of Dionysus via {TDA} and Hera via {phutil} on -#| large persistence diagrams: Violin plots of runtime distributions on a common -#| scale." -bm_all <- transform(bm_all, expr = as.character(expr), time = unlist(time)) -bm_all <- subset(bm_all, select = c(expr, degree, power, time)) -ggplot(bm_all, aes(x = time * 10e-9, y = expr)) + - facet_grid( - rows = vars(power), cols = vars(degree), - labeller = label_both - ) + - geom_violin() + - scale_x_continuous( - transform = "log10", - labels = scales::label_timespan(units = "secs") - ) + - labs(x = NULL, y = NULL) -``` - -We note that Dionysus via {TDA} clearly outperforms Hera via {phutil} on degree-1 PDs, which in these cases have many fewer features. -However, the tables are turned in degree 0, in which the PDs have many more features---which, when present, dominate the total computational cost. -(The implementations are more evenly matched on the degree-2 PDs, which may have to do with many of them being empty.) -While by no means exhaustive and not necessarily representative, these results suggest that Hera via {phutil} scales more efficiently than Dionysus via {TDA} and should therefore be preferred for projects involving more feature-rich data sets. - -## References From dedf08e75c509c1383803381e424b0f14ca64ced Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Thu, 8 May 2025 00:00:13 +0200 Subject: [PATCH 02/21] Vendor cpp11 to debug protection imbalances. --- DESCRIPTION | 3 +- inst/include/cpp11.hpp | 26 + inst/include/cpp11/R.hpp | 137 +++ inst/include/cpp11/altrep.hpp | 8 + inst/include/cpp11/as.hpp | 338 ++++++ inst/include/cpp11/attribute_proxy.hpp | 50 + inst/include/cpp11/data_frame.hpp | 108 ++ inst/include/cpp11/declarations.hpp | 62 + inst/include/cpp11/doubles.hpp | 100 ++ inst/include/cpp11/environment.hpp | 58 + inst/include/cpp11/external_pointer.hpp | 169 +++ inst/include/cpp11/function.hpp | 146 +++ inst/include/cpp11/integers.hpp | 106 ++ inst/include/cpp11/list.hpp | 103 ++ inst/include/cpp11/list_of.hpp | 73 ++ inst/include/cpp11/logicals.hpp | 79 ++ inst/include/cpp11/matrix.hpp | 232 ++++ inst/include/cpp11/named_arg.hpp | 50 + inst/include/cpp11/protect.hpp | 339 ++++++ inst/include/cpp11/r_bool.hpp | 83 ++ inst/include/cpp11/r_string.hpp | 105 ++ inst/include/cpp11/r_vector.hpp | 1458 +++++++++++++++++++++++ inst/include/cpp11/raws.hpp | 87 ++ inst/include/cpp11/sexp.hpp | 80 ++ inst/include/cpp11/strings.hpp | 150 +++ 25 files changed, 4148 insertions(+), 2 deletions(-) create mode 100644 inst/include/cpp11.hpp create mode 100644 inst/include/cpp11/R.hpp create mode 100644 inst/include/cpp11/altrep.hpp create mode 100644 inst/include/cpp11/as.hpp create mode 100644 inst/include/cpp11/attribute_proxy.hpp create mode 100644 inst/include/cpp11/data_frame.hpp create mode 100644 inst/include/cpp11/declarations.hpp create mode 100644 inst/include/cpp11/doubles.hpp create mode 100644 inst/include/cpp11/environment.hpp create mode 100644 inst/include/cpp11/external_pointer.hpp create mode 100644 inst/include/cpp11/function.hpp create mode 100644 inst/include/cpp11/integers.hpp create mode 100644 inst/include/cpp11/list.hpp create mode 100644 inst/include/cpp11/list_of.hpp create mode 100644 inst/include/cpp11/logicals.hpp create mode 100644 inst/include/cpp11/matrix.hpp create mode 100644 inst/include/cpp11/named_arg.hpp create mode 100644 inst/include/cpp11/protect.hpp create mode 100644 inst/include/cpp11/r_bool.hpp create mode 100644 inst/include/cpp11/r_string.hpp create mode 100644 inst/include/cpp11/r_vector.hpp create mode 100644 inst/include/cpp11/raws.hpp create mode 100644 inst/include/cpp11/sexp.hpp create mode 100644 inst/include/cpp11/strings.hpp diff --git a/DESCRIPTION b/DESCRIPTION index f6903c1..d7aa141 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,8 +32,7 @@ Suggests: tinysnapshot, tinytest LinkingTo: - BH, - cpp11 + BH VignetteBuilder: quarto Encoding: UTF-8 diff --git a/inst/include/cpp11.hpp b/inst/include/cpp11.hpp new file mode 100644 index 0000000..e650382 --- /dev/null +++ b/inst/include/cpp11.hpp @@ -0,0 +1,26 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include "cpp11/R.hpp" +#include "cpp11/altrep.hpp" +#include "cpp11/as.hpp" +#include "cpp11/attribute_proxy.hpp" +#include "cpp11/data_frame.hpp" +#include "cpp11/doubles.hpp" +#include "cpp11/environment.hpp" +#include "cpp11/external_pointer.hpp" +#include "cpp11/function.hpp" +#include "cpp11/integers.hpp" +#include "cpp11/list.hpp" +#include "cpp11/list_of.hpp" +#include "cpp11/logicals.hpp" +#include "cpp11/matrix.hpp" +#include "cpp11/named_arg.hpp" +#include "cpp11/protect.hpp" +#include "cpp11/r_bool.hpp" +#include "cpp11/r_string.hpp" +#include "cpp11/r_vector.hpp" +#include "cpp11/raws.hpp" +#include "cpp11/sexp.hpp" +#include "cpp11/strings.hpp" diff --git a/inst/include/cpp11/R.hpp b/inst/include/cpp11/R.hpp new file mode 100644 index 0000000..d0a4c11 --- /dev/null +++ b/inst/include/cpp11/R.hpp @@ -0,0 +1,137 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#ifdef R_INTERNALS_H_ +#if !(defined(R_NO_REMAP) && defined(STRICT_R_HEADERS)) +#error R headers were included before cpp11 headers \ + and at least one of R_NO_REMAP or STRICT_R_HEADERS \ + was not defined. +#endif +#endif + +#ifndef R_NO_REMAP +#define R_NO_REMAP +#endif + +#ifndef STRICT_R_HEADERS +#define STRICT_R_HEADERS +#endif + +#include "R_ext/Boolean.h" +#include "Rinternals.h" +#include "Rversion.h" + +// clang-format off +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wattributes" +#endif + +#ifdef __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wattributes" +#endif +// clang-format on + +#include + +#if defined(R_VERSION) && R_VERSION >= R_Version(4, 4, 0) +// Use R's new macro +#define CPP11_PRIdXLEN_T R_PRIdXLEN_T +#else +// Recreate what new R does +#ifdef LONG_VECTOR_SUPPORT +#define CPP11_PRIdXLEN_T "td" +#else +#define CPP11_PRIdXLEN_T "d" +#endif +#endif + +namespace cpp11 { +namespace literals { + +constexpr R_xlen_t operator""_xl(unsigned long long int value) { return value; } + +} // namespace literals + +namespace traits { +template +struct get_underlying_type { + using type = T; +}; +} // namespace traits + +namespace detail { + +// Annoyingly, `TYPEOF()` returns an `int` rather than a `SEXPTYPE`, +// which can throw warnings with `-Wsign-compare` on Windows. +inline SEXPTYPE r_typeof(SEXP x) { return static_cast(TYPEOF(x)); } + +/// Get an object from an environment +/// +/// SAFETY: Keep as a pure C function. Call like an R API function, i.e. wrap in `safe[]` +/// as required. +inline SEXP r_env_get(SEXP env, SEXP sym) { +#if defined(R_VERSION) && R_VERSION >= R_Version(4, 5, 0) + const Rboolean inherits = FALSE; + return R_getVar(sym, env, inherits); +#else + SEXP out = Rf_findVarInFrame3(env, sym, TRUE); + + // Replicate the 3 checks from `R_getVar()` (along with exact error message): + // - Object must be found in the `env` + // - `R_MissingArg` can't leak from an `env` anymore + // - Promises can't leak from an `env` anymore + + if (out == R_MissingArg) { + Rf_errorcall(R_NilValue, "argument \"%s\" is missing, with no default", + CHAR(PRINTNAME(sym))); + } + + if (out == R_UnboundValue) { + Rf_errorcall(R_NilValue, "object '%s' not found", CHAR(PRINTNAME(sym))); + } + + if (r_typeof(out) == PROMSXP) { + PROTECT(out); + out = Rf_eval(out, env); + UNPROTECT(1); + } + + return out; +#endif +} + +/// Check if an object exists in an environment +/// +/// SAFETY: Keep as a pure C function. Call like an R API function, i.e. wrap in `safe[]` +/// as required. +inline bool r_env_has(SEXP env, SEXP sym) { +#if R_VERSION >= R_Version(4, 2, 0) + return R_existsVarInFrame(env, sym); +#else + return Rf_findVarInFrame3(env, sym, FALSE) != R_UnboundValue; +#endif +} + +} // namespace detail + +template +inline T na(); + +template +inline typename std::enable_if::type, double>::value, + bool>::type +is_na(const T& value) { + return value == na(); +} + +template +inline typename std::enable_if::type, double>::value, + bool>::type +is_na(const T& value) { + return ISNA(value); +} + +} // namespace cpp11 diff --git a/inst/include/cpp11/altrep.hpp b/inst/include/cpp11/altrep.hpp new file mode 100644 index 0000000..1fe5999 --- /dev/null +++ b/inst/include/cpp11/altrep.hpp @@ -0,0 +1,8 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +// It would be nice to remove this since all supported versions of R have ALTREP, but +// some groups rely on both this `#define` and `altrep.hpp` itself existing, like arrow: +// https://github.com/r-lib/cpp11/issues/413 +#define HAS_ALTREP diff --git a/inst/include/cpp11/as.hpp b/inst/include/cpp11/as.hpp new file mode 100644 index 0000000..27232ee --- /dev/null +++ b/inst/include/cpp11/as.hpp @@ -0,0 +1,338 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for modf +#include // for initializer_list +#include // for std::shared_ptr, std::weak_ptr, std::unique_ptr +#include +#include // for string, basic_string +#include // for decay, enable_if, is_same, is_convertible + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_xlength, R_xlen_t +#include "cpp11/protect.hpp" // for stop, protect, safe, protect::function + +namespace cpp11 { + +template +using enable_if_t = typename std::enable_if::type; + +template +using decay_t = typename std::decay::type; + +template +struct is_smart_ptr : std::false_type {}; + +template +struct is_smart_ptr> : std::true_type {}; + +template +struct is_smart_ptr> : std::true_type {}; + +template +struct is_smart_ptr> : std::true_type {}; + +template +using enable_if_constructible_from_sexp = + enable_if_t::value && // workaround for gcc 4.8 + std::is_class::value && std::is_constructible::value, + R>; + +template +using enable_if_is_sexp = enable_if_t::value, R>; + +template +using enable_if_convertible_to_sexp = enable_if_t::value, R>; + +template +using disable_if_convertible_to_sexp = + enable_if_t::value, R>; + +template +using enable_if_integral = + enable_if_t::value && !std::is_same::value && + !std::is_same::value, + R>; + +template +using enable_if_floating_point = + typename std::enable_if::value, R>::type; + +template +using enable_if_enum = enable_if_t::value, R>; + +template +using enable_if_bool = enable_if_t::value, R>; + +template +using enable_if_char = enable_if_t::value, R>; + +template +using enable_if_std_string = enable_if_t::value, R>; + +template +using enable_if_c_string = enable_if_t::value, R>; + +// https://stackoverflow.com/a/1521682/2055486 +// +inline bool is_convertible_without_loss_to_integer(double value) { + double int_part; + return std::modf(value, &int_part) == 0.0; +} + +template +enable_if_constructible_from_sexp as_cpp(SEXP from) { + return T(from); +} + +template +enable_if_is_sexp as_cpp(SEXP from) { + return from; +} + +template +enable_if_integral as_cpp(SEXP from) { + if (Rf_isInteger(from)) { + if (Rf_xlength(from) == 1) { + return INTEGER_ELT(from, 0); + } + } else if (Rf_isReal(from)) { + if (Rf_xlength(from) == 1) { + if (ISNA(REAL_ELT(from, 0))) { + return NA_INTEGER; + } + double value = REAL_ELT(from, 0); + if (is_convertible_without_loss_to_integer(value)) { + return value; + } + } + } else if (Rf_isLogical(from)) { + if (Rf_xlength(from) == 1) { + if (LOGICAL_ELT(from, 0) == NA_LOGICAL) { + return NA_INTEGER; + } + } + } + + throw std::length_error("Expected single integer value"); +} + +template +enable_if_enum as_cpp(SEXP from) { + if (Rf_isInteger(from)) { + using underlying_type = typename std::underlying_type::type; + using int_type = typename std::conditional::value, + int, // as_cpp would trigger + // undesired string conversions + underlying_type>::type; + return static_cast(as_cpp(from)); + } + + throw std::length_error("Expected single integer value"); +} + +template +enable_if_bool as_cpp(SEXP from) { + if (Rf_isLogical(from)) { + if (Rf_xlength(from) == 1) { + return LOGICAL_ELT(from, 0) == 1; + } + } + + throw std::length_error("Expected single logical value"); +} + +template +enable_if_floating_point as_cpp(SEXP from) { + if (Rf_isReal(from)) { + if (Rf_xlength(from) == 1) { + return REAL_ELT(from, 0); + } + } + // All 32 bit integers can be coerced to doubles, so we just convert them. + if (Rf_isInteger(from)) { + if (Rf_xlength(from) == 1) { + if (INTEGER_ELT(from, 0) == NA_INTEGER) { + return NA_REAL; + } + return INTEGER_ELT(from, 0); + } + } + + // Also allow NA values + if (Rf_isLogical(from)) { + if (Rf_xlength(from) == 1) { + if (LOGICAL_ELT(from, 0) == NA_LOGICAL) { + return NA_REAL; + } + } + } + + throw std::length_error("Expected single double value"); +} + +template +enable_if_char as_cpp(SEXP from) { + if (Rf_isString(from)) { + if (Rf_xlength(from) == 1) { + return unwind_protect([&] { return Rf_translateCharUTF8(STRING_ELT(from, 0))[0]; }); + } + } + + throw std::length_error("Expected string vector of length 1"); +} + +template +enable_if_c_string as_cpp(SEXP from) { + if (Rf_isString(from)) { + if (Rf_xlength(from) == 1) { + // TODO: use vmaxget / vmaxset here? + return {unwind_protect([&] { return Rf_translateCharUTF8(STRING_ELT(from, 0)); })}; + } + } + + throw std::length_error("Expected string vector of length 1"); +} + +template +enable_if_std_string as_cpp(SEXP from) { + return {as_cpp(from)}; +} + +/// Temporary workaround for compatibility with cpp11 0.1.0 +template +enable_if_t, T>::value, decay_t> as_cpp(SEXP from) { + return as_cpp>(from); +} + +template +enable_if_integral as_sexp(T from) { + return safe[Rf_ScalarInteger](from); +} + +template +enable_if_floating_point as_sexp(T from) { + return safe[Rf_ScalarReal](from); +} + +template +enable_if_bool as_sexp(T from) { + return safe[Rf_ScalarLogical](from); +} + +template +enable_if_c_string as_sexp(T from) { + return unwind_protect([&] { return Rf_ScalarString(Rf_mkCharCE(from, CE_UTF8)); }); +} + +template +enable_if_std_string as_sexp(const T& from) { + return as_sexp(from.c_str()); +} + +template > +enable_if_integral as_sexp(const Container& from) { + R_xlen_t size = from.size(); + SEXP data = safe[Rf_allocVector](INTSXP, size); + + auto it = from.begin(); + int* data_p = INTEGER(data); + for (R_xlen_t i = 0; i < size; ++i, ++it) { + data_p[i] = *it; + } + return data; +} + +inline SEXP as_sexp(std::initializer_list from) { + return as_sexp>(from); +} + +template > +enable_if_floating_point as_sexp(const Container& from) { + R_xlen_t size = from.size(); + SEXP data = safe[Rf_allocVector](REALSXP, size); + + auto it = from.begin(); + double* data_p = REAL(data); + for (R_xlen_t i = 0; i < size; ++i, ++it) { + data_p[i] = *it; + } + return data; +} + +inline SEXP as_sexp(std::initializer_list from) { + return as_sexp>(from); +} + +template > +enable_if_bool as_sexp(const Container& from) { + R_xlen_t size = from.size(); + SEXP data = safe[Rf_allocVector](LGLSXP, size); + + auto it = from.begin(); + int* data_p = LOGICAL(data); + for (R_xlen_t i = 0; i < size; ++i, ++it) { + data_p[i] = *it; + } + return data; +} + +inline SEXP as_sexp(std::initializer_list from) { + return as_sexp>(from); +} + +namespace detail { +template +SEXP as_sexp_strings(const Container& from, AsCstring&& c_str) { + R_xlen_t size = from.size(); + + SEXP data; + try { + data = PROTECT(safe[Rf_allocVector](STRSXP, size)); + + auto it = from.begin(); + for (R_xlen_t i = 0; i < size; ++i, ++it) { + SET_STRING_ELT(data, i, safe[Rf_mkCharCE](c_str(*it), CE_UTF8)); + } + } catch (const unwind_exception& e) { + UNPROTECT(1); + throw e; + } + + UNPROTECT(1); + return data; +} +} // namespace detail + +class r_string; + +template +using disable_if_r_string = enable_if_t::value, R>; + +template > +enable_if_t::value && + !std::is_convertible::value, + SEXP> +as_sexp(const Container& from) { + return detail::as_sexp_strings(from, [](const std::string& s) { return s.c_str(); }); +} + +template +enable_if_c_string as_sexp(const Container& from) { + return detail::as_sexp_strings(from, [](const char* s) { return s; }); +} + +inline SEXP as_sexp(std::initializer_list from) { + return as_sexp>(from); +} + +template > +enable_if_convertible_to_sexp as_sexp(const T& from) { + return from; +} + +} // namespace cpp11 diff --git a/inst/include/cpp11/attribute_proxy.hpp b/inst/include/cpp11/attribute_proxy.hpp new file mode 100644 index 0000000..5877ad6 --- /dev/null +++ b/inst/include/cpp11/attribute_proxy.hpp @@ -0,0 +1,50 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for initializer_list +#include // for string, basic_string + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_install, PROTECT, Rf_... +#include "cpp11/as.hpp" // for as_sexp +#include "cpp11/protect.hpp" // for protect, safe, protect::function + +namespace cpp11 { + +class sexp; + +template +class attribute_proxy { + private: + const T& parent_; + SEXP symbol_; + + public: + attribute_proxy(const T& parent, const char* index) + : parent_(parent), symbol_(safe[Rf_install](index)) {} + + attribute_proxy(const T& parent, const std::string& index) + : parent_(parent), symbol_(safe[Rf_install](index.c_str())) {} + + attribute_proxy(const T& parent, SEXP index) : parent_(parent), symbol_(index) {} + + template + attribute_proxy& operator=(C rhs) { + SEXP value = PROTECT(as_sexp(rhs)); + Rf_setAttrib(parent_.data(), symbol_, value); + UNPROTECT(1); + return *this; + } + + template + attribute_proxy& operator=(std::initializer_list rhs) { + SEXP value = PROTECT(as_sexp(rhs)); + Rf_setAttrib(parent_.data(), symbol_, value); + UNPROTECT(1); + return *this; + } + + operator SEXP() const { return safe[Rf_getAttrib](parent_.data(), symbol_); } +}; + +} // namespace cpp11 diff --git a/inst/include/cpp11/data_frame.hpp b/inst/include/cpp11/data_frame.hpp new file mode 100644 index 0000000..3c4ba6b --- /dev/null +++ b/inst/include/cpp11/data_frame.hpp @@ -0,0 +1,108 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for abs +#include +#include // for initializer_list +#include // for string, basic_string +#include // for move + +#include "R_ext/Arith.h" // for NA_INTEGER +#include "cpp11/R.hpp" // for Rf_xlength, SEXP, SEXPREC, INTEGER +#include "cpp11/attribute_proxy.hpp" // for attribute_proxy +#include "cpp11/list.hpp" // for list, r_vector<>::r_vector, r_v... +#include "cpp11/r_vector.hpp" // for r_vector + +namespace cpp11 { + +class named_arg; +namespace writable { +class data_frame; +} // namespace writable + +class data_frame : public list { + using list::list; + + friend class writable::data_frame; + + /* we cannot use Rf_getAttrib because it has a special case for c(NA, -n) and creates + * the full vector */ + static SEXP get_attrib0(SEXP x, SEXP sym) { + for (SEXP attr = ATTRIB(x); attr != R_NilValue; attr = CDR(attr)) { + if (TAG(attr) == sym) { + return CAR(attr); + } + } + + return R_NilValue; + } + + static R_xlen_t calc_nrow(SEXP x) { + auto nms = get_attrib0(x, R_RowNamesSymbol); + bool has_short_rownames = + (Rf_isInteger(nms) && Rf_xlength(nms) == 2 && INTEGER(nms)[0] == NA_INTEGER); + if (has_short_rownames) { + return static_cast(abs(INTEGER(nms)[1])); + } + + if (!Rf_isNull(nms)) { + return Rf_xlength(nms); + } + + if (Rf_xlength(x) == 0) { + return 0; + } + + return Rf_xlength(VECTOR_ELT(x, 0)); + } + + public: + /* Adapted from + * https://github.com/wch/r-source/blob/f2a0dfab3e26fb42b8b296fcba40cbdbdbec767d/src/main/attrib.c#L198-L207 + */ + R_xlen_t nrow() const { return calc_nrow(*this); } + R_xlen_t ncol() const { return size(); } +}; + +namespace writable { +class data_frame : public cpp11::data_frame { + private: + writable::list set_data_frame_attributes(writable::list&& x) { + return set_data_frame_attributes(std::move(x), calc_nrow(x)); + } + + writable::list set_data_frame_attributes(writable::list&& x, R_xlen_t nrow) { + x.attr(R_RowNamesSymbol) = {NA_INTEGER, -static_cast(nrow)}; + x.attr(R_ClassSymbol) = "data.frame"; + return std::move(x); + } + + public: + data_frame(const SEXP data) : cpp11::data_frame(set_data_frame_attributes(data)) {} + data_frame(const SEXP data, bool is_altrep) + : cpp11::data_frame(set_data_frame_attributes(data), is_altrep) {} + data_frame(const SEXP data, bool is_altrep, R_xlen_t nrow) + : cpp11::data_frame(set_data_frame_attributes(data, nrow), is_altrep) {} + data_frame(std::initializer_list il) + : cpp11::data_frame(set_data_frame_attributes(writable::list(il))) {} + data_frame(std::initializer_list il) + : cpp11::data_frame(set_data_frame_attributes(writable::list(il))) {} + + using cpp11::data_frame::ncol; + using cpp11::data_frame::nrow; + + attribute_proxy attr(const char* name) const { return {*this, name}; } + + attribute_proxy attr(const std::string& name) const { + return {*this, name.c_str()}; + } + + attribute_proxy attr(SEXP name) const { return {*this, name}; } + + attribute_proxy names() const { return {*this, R_NamesSymbol}; } +}; + +} // namespace writable + +} // namespace cpp11 diff --git a/inst/include/cpp11/declarations.hpp b/inst/include/cpp11/declarations.hpp new file mode 100644 index 0000000..93fee14 --- /dev/null +++ b/inst/include/cpp11/declarations.hpp @@ -0,0 +1,62 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include +#include +#include + +// Davis: From what I can tell, you'd only ever define this if you need to include +// `declarations.hpp` manually in a file, i.e. to possibly use `BEGIN_CPP11` with a +// custom `END_CPP11`, as textshaping does do. Otherwise, `declarations.hpp` is included +// in `code.cpp` and should contain all of the cpp11 type definitions that the generated +// function signatures need to link against. +#ifndef CPP11_PARTIAL +#include "cpp11.hpp" +namespace writable = ::cpp11::writable; +using namespace ::cpp11; +#endif + +#include + +namespace cpp11 { +// No longer used, but was previously used in `code.cpp` code generation in cpp11 0.1.0. +// `code.cpp` could be generated with cpp11 0.1.0, but the package could be compiled with +// cpp11 >0.1.0, so `unmove()` must exist in newer cpp11 too. Eventually remove this once +// we decide enough time has gone by since `unmove()` was removed. +// https://github.com/r-lib/cpp11/issues/88 +// https://github.com/r-lib/cpp11/pull/75 +template +T& unmove(T&& t) { + return t; +} +} // namespace cpp11 + +// We would like to remove this, since all supported versions of R now support proper +// unwind protect, but some groups rely on it existing, like textshaping: +// https://github.com/r-lib/cpp11/issues/414 +#define CPP11_UNWIND R_ContinueUnwind(err); + +#define CPP11_ERROR_BUFSIZE 8192 + +#define BEGIN_CPP11 \ + SEXP err = R_NilValue; \ + char buf[CPP11_ERROR_BUFSIZE] = ""; \ + try { +#define END_CPP11 \ + } \ + catch (cpp11::unwind_exception & e) { \ + err = e.token; \ + } \ + catch (std::exception & e) { \ + strncpy(buf, e.what(), sizeof(buf) - 1); \ + } \ + catch (...) { \ + strncpy(buf, "C++ error (unknown cause)", sizeof(buf) - 1); \ + } \ + if (buf[0] != '\0') { \ + Rf_errorcall(R_NilValue, "%s", buf); \ + } else if (err != R_NilValue) { \ + R_ContinueUnwind(err); \ + } \ + return R_NilValue; diff --git a/inst/include/cpp11/doubles.hpp b/inst/include/cpp11/doubles.hpp new file mode 100644 index 0000000..e83083c --- /dev/null +++ b/inst/include/cpp11/doubles.hpp @@ -0,0 +1,100 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for min, tranform +#include // for array +#include // for initializer_list + +#include "R_ext/Arith.h" // for ISNA +#include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_allocVector, REAL +#include "cpp11/as.hpp" // for as_sexp +#include "cpp11/protect.hpp" // for safe +#include "cpp11/r_vector.hpp" // for vector, vector<>::proxy, vector<>::... +#include "cpp11/sexp.hpp" // for sexp + +// Specializations for doubles + +namespace cpp11 { + +template <> +inline SEXPTYPE r_vector::get_sexptype() { + return REALSXP; +} + +template <> +inline typename r_vector::underlying_type r_vector::get_elt(SEXP x, + R_xlen_t i) { + // NOPROTECT: likely too costly to unwind protect every elt + return REAL_ELT(x, i); +} + +template <> +inline typename r_vector::underlying_type* r_vector::get_p(bool is_altrep, + SEXP data) { + if (is_altrep) { + return nullptr; + } else { + return REAL(data); + } +} + +template <> +inline typename r_vector::underlying_type const* r_vector::get_const_p( + bool is_altrep, SEXP data) { + return REAL_OR_NULL(data); +} + +template <> +inline void r_vector::get_region(SEXP x, R_xlen_t i, R_xlen_t n, + typename r_vector::underlying_type* buf) { + // NOPROTECT: likely too costly to unwind protect here + REAL_GET_REGION(x, i, n, buf); +} + +template <> +inline bool r_vector::const_iterator::use_buf(bool is_altrep) { + return is_altrep; +} + +typedef r_vector doubles; + +namespace writable { + +template <> +inline void r_vector::set_elt(SEXP x, R_xlen_t i, + typename r_vector::underlying_type value) { + // NOPROTECT: Likely too costly to unwind protect every set elt + SET_REAL_ELT(x, i, value); +} + +typedef r_vector doubles; + +} // namespace writable + +typedef r_vector integers; + +inline doubles as_doubles(SEXP x) { + if (detail::r_typeof(x) == REALSXP) { + return doubles(x); + } + + else if (detail::r_typeof(x) == INTSXP) { + integers xn(x); + size_t len = xn.size(); + writable::doubles ret(len); + std::transform(xn.begin(), xn.end(), ret.begin(), [](int value) { + return value == NA_INTEGER ? NA_REAL : static_cast(value); + }); + return ret; + } + + throw type_error(REALSXP, detail::r_typeof(x)); +} + +template <> +inline double na() { + return NA_REAL; +} + +} // namespace cpp11 diff --git a/inst/include/cpp11/environment.hpp b/inst/include/cpp11/environment.hpp new file mode 100644 index 0000000..9d2c2b5 --- /dev/null +++ b/inst/include/cpp11/environment.hpp @@ -0,0 +1,58 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for string, basic_string + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_install, r_env_get... +#include "cpp11/as.hpp" // for as_sexp +#include "cpp11/protect.hpp" // for protect, protect::function, safe, unwin... +#include "cpp11/sexp.hpp" // for sexp + +namespace cpp11 { + +class environment { + private: + sexp env_; + + class proxy { + SEXP parent_; + SEXP name_; + + public: + proxy(SEXP parent, SEXP name) : parent_(parent), name_(name) {} + + template + proxy& operator=(T value) { + safe[Rf_defineVar](name_, as_sexp(value), parent_); + return *this; + } + operator SEXP() const { return safe[detail::r_env_get](parent_, name_); }; + operator sexp() const { return SEXP(); }; + }; + + public: + environment(SEXP env) : env_(env) {} + environment(sexp env) : env_(env) {} + proxy operator[](const SEXP name) const { return {env_, name}; } + proxy operator[](const char* name) const { return operator[](safe[Rf_install](name)); } + proxy operator[](const std::string& name) const { return operator[](name.c_str()); } + + bool exists(SEXP name) const { return safe[detail::r_env_has](env_, name); } + bool exists(const char* name) const { return exists(safe[Rf_install](name)); } + bool exists(const std::string& name) const { return exists(name.c_str()); } + + void remove(SEXP name) { + PROTECT(name); + R_removeVarFromFrame(name, env_); + UNPROTECT(1); + } + + void remove(const char* name) { remove(safe[Rf_install](name)); } + + R_xlen_t size() const { return Rf_xlength(env_); } + + operator SEXP() const { return env_; } +}; + +} // namespace cpp11 diff --git a/inst/include/cpp11/external_pointer.hpp b/inst/include/cpp11/external_pointer.hpp new file mode 100644 index 0000000..409c28e --- /dev/null +++ b/inst/include/cpp11/external_pointer.hpp @@ -0,0 +1,169 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for nullptr_t, NULL +#include // for bad_weak_ptr +#include // for add_lvalue_reference + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, R_NilValue +#include "cpp11/protect.hpp" // for protect, safe, protect::function +#include "cpp11/r_bool.hpp" // for r_bool +#include "cpp11/r_vector.hpp" // for type_error +#include "cpp11/sexp.hpp" // for sexp + +namespace cpp11 { + +template +void default_deleter(T* obj) { + delete obj; +} + +template > +class external_pointer { + private: + sexp data_ = R_NilValue; + + static SEXP valid_type(SEXP data) { + if (data == nullptr) { + throw type_error(EXTPTRSXP, NILSXP); + } + if (detail::r_typeof(data) != EXTPTRSXP) { + throw type_error(EXTPTRSXP, detail::r_typeof(data)); + } + + return data; + } + + static void r_deleter(SEXP p) { + if (detail::r_typeof(p) != EXTPTRSXP) return; + + T* ptr = static_cast(R_ExternalPtrAddr(p)); + + if (ptr == NULL) { + return; + } + + R_ClearExternalPtr(p); + + Deleter(ptr); + } + + public: + using pointer = T*; + + external_pointer() noexcept {} + external_pointer(std::nullptr_t) noexcept {} + + external_pointer(SEXP data) : data_(valid_type(data)) {} + + external_pointer(pointer p, bool use_deleter = true, bool finalize_on_exit = true) + : data_(safe[R_MakeExternalPtr]((void*)p, R_NilValue, R_NilValue)) { + if (use_deleter) { + R_RegisterCFinalizerEx(data_, r_deleter, static_cast(finalize_on_exit)); + } + } + + external_pointer(const external_pointer& rhs) { + data_ = safe[Rf_shallow_duplicate](rhs.data_); + } + + external_pointer(external_pointer&& rhs) { reset(rhs.release()); } + + external_pointer& operator=(external_pointer&& rhs) noexcept { reset(rhs.release()); } + + external_pointer& operator=(std::nullptr_t) noexcept { reset(); }; + + operator SEXP() const noexcept { return data_; } + + pointer get() const noexcept { + pointer addr = static_cast(R_ExternalPtrAddr(data_)); + if (addr == nullptr) { + return nullptr; + } + return addr; + } + + typename std::add_lvalue_reference::type operator*() { + pointer addr = get(); + if (addr == nullptr) { + throw std::bad_weak_ptr(); + } + return *get(); + } + + pointer operator->() const { + pointer addr = get(); + if (addr == nullptr) { + throw std::bad_weak_ptr(); + } + return get(); + } + + pointer release() noexcept { + if (get() == nullptr) { + return nullptr; + } + pointer ptr = get(); + R_ClearExternalPtr(data_); + + return ptr; + } + + void reset(pointer ptr = pointer()) { + SEXP old_data = data_; + data_ = safe[R_MakeExternalPtr]((void*)ptr, R_NilValue, R_NilValue); + r_deleter(old_data); + } + + void swap(external_pointer& other) noexcept { + SEXP tmp = other.data_; + other.data_ = data_; + data_ = tmp; + } + + operator bool() noexcept { return data_ != nullptr; } +}; + +template +void swap(external_pointer& lhs, external_pointer& rhs) noexcept { + lhs.swap(rhs); +} + +template +bool operator==(const external_pointer& x, + const external_pointer& y) { + return x.data_ == y.data_; +} + +template +bool operator!=(const external_pointer& x, + const external_pointer& y) { + return x.data_ != y.data_; +} + +template +bool operator<(const external_pointer& x, + const external_pointer& y) { + return x.data_ < y.data_; +} + +template +bool operator<=(const external_pointer& x, + const external_pointer& y) { + return x.data_ <= y.data_; +} + +template +bool operator>(const external_pointer& x, + const external_pointer& y) { + return x.data_ > y.data_; +} + +template +bool operator>=(const external_pointer& x, + const external_pointer& y) { + return x.data_ >= y.data_; +} + +} // namespace cpp11 diff --git a/inst/include/cpp11/function.hpp b/inst/include/cpp11/function.hpp new file mode 100644 index 0000000..47ed1f0 --- /dev/null +++ b/inst/include/cpp11/function.hpp @@ -0,0 +1,146 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for strcmp + +#include // for snprintf +#include // for string, basic_string +#include // for forward + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, CDR, Rf_install, SETCAR +#include "cpp11/as.hpp" // for as_sexp +#include "cpp11/named_arg.hpp" // for named_arg +#include "cpp11/protect.hpp" // for protect, protect::function, safe +#include "cpp11/sexp.hpp" // for sexp + +namespace cpp11 { + +class function { + public: + function(SEXP data) : data_(data) {} + + template + sexp operator()(Args&&... args) const { + // Size of the arguments plus one for the function name itself + R_xlen_t num_args = sizeof...(args) + 1; + + sexp call(safe[Rf_allocVector](LANGSXP, num_args)); + + construct_call(call, data_, std::forward(args)...); + + return safe[Rf_eval](call, R_GlobalEnv); + } + + private: + sexp data_; + + template + void construct_call(SEXP val, const named_arg& arg, Args&&... args) const { + SETCAR(val, arg.value()); + SET_TAG(val, safe[Rf_install](arg.name())); + val = CDR(val); + construct_call(val, std::forward(args)...); + } + + // Construct the call recursively, each iteration adds an Arg to the pairlist. + template + void construct_call(SEXP val, const T& arg, Args&&... args) const { + SETCAR(val, as_sexp(arg)); + val = CDR(val); + construct_call(val, std::forward(args)...); + } + + // Base case, just return + void construct_call(SEXP val) const {} +}; + +class package { + public: + package(const char* name) : data_(get_namespace(name)) {} + package(const std::string& name) : data_(get_namespace(name.c_str())) {} + function operator[](const char* name) { + return safe[Rf_findFun](safe[Rf_install](name), data_); + } + function operator[](const std::string& name) { return operator[](name.c_str()); } + + private: + static SEXP get_namespace(const char* name) { + if (strcmp(name, "base") == 0) { + return R_BaseEnv; + } + sexp name_sexp = safe[Rf_install](name); + return safe[detail::r_env_get](R_NamespaceRegistry, name_sexp); + } + + // Either base env or in namespace registry, so no protection needed + SEXP data_; +}; + +namespace detail { + +// Special internal way to call `base::message()` +// +// - Pure C, so call with `safe[]` +// - Holds a `static SEXP` for the `base::message` function protected with +// `R_PreserveObject()` +// +// We don't use a `static cpp11::function` because that will infinitely retain a cell in +// our preserve list, which can throw off our counts in the preserve list tests. +inline void r_message(const char* x) { + static SEXP fn = NULL; + + if (fn == NULL) { + fn = Rf_findFun(Rf_install("message"), R_BaseEnv); + R_PreserveObject(fn); + } + + SEXP x_char = PROTECT(Rf_mkCharCE(x, CE_UTF8)); + SEXP x_string = PROTECT(Rf_ScalarString(x_char)); + + SEXP call = PROTECT(Rf_lang2(fn, x_string)); + + Rf_eval(call, R_GlobalEnv); + + UNPROTECT(3); +} + +} // namespace detail + +inline void message(const char* fmt_arg) { +#ifdef CPP11_USE_FMT + std::string msg = fmt::format(fmt_arg); + safe[detail::r_message](msg.c_str()); +#else + char buff[1024]; + int msg; + msg = std::snprintf(buff, 1024, "%s", fmt_arg); + if (msg >= 0 && msg < 1024) { + safe[detail::r_message](buff); + } +#endif +} + +template +void message(const char* fmt_arg, Args... args) { +#ifdef CPP11_USE_FMT + std::string msg = fmt::format(fmt_arg, args...); + safe[detail::r_message](msg.c_str()); +#else + char buff[1024]; + int msg; + msg = std::snprintf(buff, 1024, fmt_arg, args...); + if (msg >= 0 && msg < 1024) { + safe[detail::r_message](buff); + } +#endif +} + +inline void message(const std::string& fmt_arg) { message(fmt_arg.c_str()); } + +template +void message(const std::string& fmt_arg, Args... args) { + message(fmt_arg.c_str(), args...); +} + +} // namespace cpp11 diff --git a/inst/include/cpp11/integers.hpp b/inst/include/cpp11/integers.hpp new file mode 100644 index 0000000..f71dbb6 --- /dev/null +++ b/inst/include/cpp11/integers.hpp @@ -0,0 +1,106 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for min +#include // for array +#include // for initializer_list + +#include "R_ext/Arith.h" // for NA_INTEGER +#include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_allocVector +#include "cpp11/as.hpp" // for as_sexp +#include "cpp11/attribute_proxy.hpp" // for attribute_proxy +#include "cpp11/protect.hpp" // for safe +#include "cpp11/r_vector.hpp" // for r_vector, r_vector<>::proxy +#include "cpp11/sexp.hpp" // for sexp + +// Specializations for integers + +namespace cpp11 { + +template <> +inline SEXPTYPE r_vector::get_sexptype() { + return INTSXP; +} + +template <> +inline typename r_vector::underlying_type r_vector::get_elt(SEXP x, + R_xlen_t i) { + // NOPROTECT: likely too costly to unwind protect every elt + return INTEGER_ELT(x, i); +} + +template <> +inline typename r_vector::underlying_type* r_vector::get_p(bool is_altrep, + SEXP data) { + if (is_altrep) { + return nullptr; + } else { + return INTEGER(data); + } +} + +template <> +inline typename r_vector::underlying_type const* r_vector::get_const_p( + bool is_altrep, SEXP data) { + return INTEGER_OR_NULL(data); +} + +template <> +inline void r_vector::get_region(SEXP x, R_xlen_t i, R_xlen_t n, + typename r_vector::underlying_type* buf) { + // NOPROTECT: likely too costly to unwind protect here + INTEGER_GET_REGION(x, i, n, buf); +} + +template <> +inline bool r_vector::const_iterator::use_buf(bool is_altrep) { + return is_altrep; +} + +typedef r_vector integers; + +namespace writable { + +template <> +inline void r_vector::set_elt(SEXP x, R_xlen_t i, + typename r_vector::underlying_type value) { + // NOPROTECT: Likely too costly to unwind protect every set elt + SET_INTEGER_ELT(x, i, value); +} + +typedef r_vector integers; + +} // namespace writable + +template <> +inline int na() { + return NA_INTEGER; +} + +// forward declaration + +typedef r_vector doubles; + +inline integers as_integers(SEXP x) { + if (detail::r_typeof(x) == INTSXP) { + return integers(x); + } else if (detail::r_typeof(x) == REALSXP) { + doubles xn(x); + writable::integers ret(xn.size()); + std::transform(xn.begin(), xn.end(), ret.begin(), [](double value) { + if (ISNA(value)) { + return NA_INTEGER; + } + if (!is_convertible_without_loss_to_integer(value)) { + throw std::runtime_error("All elements must be integer-like"); + } + return static_cast(value); + }); + return ret; + } + + throw type_error(INTSXP, detail::r_typeof(x)); +} + +} // namespace cpp11 diff --git a/inst/include/cpp11/list.hpp b/inst/include/cpp11/list.hpp new file mode 100644 index 0000000..e615f41 --- /dev/null +++ b/inst/include/cpp11/list.hpp @@ -0,0 +1,103 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for initializer_list + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, SET_VECTOR_ELT +#include "cpp11/attribute_proxy.hpp" // for attribute_proxy +#include "cpp11/protect.hpp" // for safe +#include "cpp11/r_string.hpp" // for r_string +#include "cpp11/r_vector.hpp" // for r_vector, r_vector<>::proxy +#include "cpp11/sexp.hpp" // for sexp + +// Specializations for list + +namespace cpp11 { + +template <> +inline SEXPTYPE r_vector::get_sexptype() { + return VECSXP; +} + +template <> +inline typename r_vector::underlying_type r_vector::get_elt(SEXP x, + R_xlen_t i) { + // NOPROTECT: likely too costly to unwind protect every elt + return VECTOR_ELT(x, i); +} + +template <> +inline typename r_vector::underlying_type* r_vector::get_p(bool, SEXP) { + return nullptr; +} + +template <> +inline typename r_vector::underlying_type const* r_vector::get_const_p( + bool is_altrep, SEXP data) { + // No `VECTOR_PTR_OR_NULL()` + if (is_altrep) { + return nullptr; + } else { + // TODO: Use `VECTOR_PTR_RO()` conditionally once R 4.5.0 is officially released + return static_cast(DATAPTR_RO(data)); + } +} + +/// Specialization for lists, where `x["oob"]` returns `R_NilValue`, like at the R level +template <> +inline SEXP r_vector::get_oob() { + return R_NilValue; +} + +template <> +inline void r_vector::get_region(SEXP x, R_xlen_t i, R_xlen_t n, + typename r_vector::underlying_type* buf) { + cpp11::stop("Unreachable!"); +} + +template <> +inline bool r_vector::const_iterator::use_buf(bool is_altrep) { + return false; +} + +typedef r_vector list; + +namespace writable { + +template <> +inline void r_vector::set_elt(SEXP x, R_xlen_t i, + typename r_vector::underlying_type value) { + // NOPROTECT: Likely too costly to unwind protect every set elt + SET_VECTOR_ELT(x, i, value); +} + +// Requires specialization to handle the fact that, for lists, each element of the +// initializer list is considered the scalar "element", i.e. we don't expect that +// each `named_arg` contains a list of length 1, like we do for the other vector types. +// This means we don't need type checks, length 1 checks, or `get_elt()` for lists. +template <> +inline r_vector::r_vector(std::initializer_list il) + : cpp11::r_vector(safe[Rf_allocVector](VECSXP, il.size())), + capacity_(il.size()) { + unwind_protect([&] { + SEXP names = Rf_allocVector(STRSXP, capacity_); + Rf_setAttrib(data_, R_NamesSymbol, names); + + auto it = il.begin(); + + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + SEXP elt = it->value(); + set_elt(data_, i, elt); + + SEXP name = Rf_mkCharCE(it->name(), CE_UTF8); + SET_STRING_ELT(names, i, name); + } + }); +} + +typedef r_vector list; + +} // namespace writable + +} // namespace cpp11 diff --git a/inst/include/cpp11/list_of.hpp b/inst/include/cpp11/list_of.hpp new file mode 100644 index 0000000..8037a28 --- /dev/null +++ b/inst/include/cpp11/list_of.hpp @@ -0,0 +1,73 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for string, basic_string + +#include "cpp11/R.hpp" // for R_xlen_t, SEXP, SEXPREC, LONG_VECTOR_SUPPORT +#include "cpp11/list.hpp" // for list + +namespace cpp11 { + +template +class list_of : public list { + public: + list_of(const list& data) : list(data) {} + +#ifdef LONG_VECTOR_SUPPORT + T operator[](const int pos) const { return operator[](static_cast(pos)); } +#endif + + T operator[](const R_xlen_t pos) const { return list::operator[](pos); } + + T operator[](const char* pos) const { return list::operator[](pos); } + + T operator[](const std::string& pos) const { return list::operator[](pos.c_str()); } +}; + +namespace writable { +template +class list_of : public writable::list { + public: + list_of(const list& data) : writable::list(data) {} + list_of(R_xlen_t n) : writable::list(n) {} + + class proxy { + private: + writable::list::proxy data_; + + public: + proxy(const writable::list::proxy& data) : data_(data) {} + + operator T() const { return static_cast(*this); } + operator SEXP() const { return static_cast(data_); } +#ifdef LONG_VECTOR_SUPPORT + typename T::proxy operator[](int pos) { return static_cast(data_)[pos]; } +#endif + typename T::proxy operator[](R_xlen_t pos) { return static_cast(data_)[pos]; } + proxy operator[](const char* pos) { static_cast(data_)[pos]; } + proxy operator[](const std::string& pos) { return static_cast(data_)[pos]; } + proxy& operator=(const T& rhs) { + data_ = rhs; + + return *this; + } + }; + +#ifdef LONG_VECTOR_SUPPORT + proxy operator[](int pos) { + return {writable::list::operator[](static_cast(pos))}; + } +#endif + + proxy operator[](R_xlen_t pos) { return writable::list::operator[](pos); } + + proxy operator[](const char* pos) { return {writable::list::operator[](pos)}; } + + proxy operator[](const std::string& pos) { + return writable::list::operator[](pos.c_str()); + } +}; +} // namespace writable + +} // namespace cpp11 diff --git a/inst/include/cpp11/logicals.hpp b/inst/include/cpp11/logicals.hpp new file mode 100644 index 0000000..42e8bd0 --- /dev/null +++ b/inst/include/cpp11/logicals.hpp @@ -0,0 +1,79 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for min +#include // for array +#include // for initializer_list + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_all... +#include "cpp11/attribute_proxy.hpp" // for attribute_proxy +#include "cpp11/protect.hpp" // for safe +#include "cpp11/r_bool.hpp" // for r_bool +#include "cpp11/r_vector.hpp" // for r_vector, r_vector<>::proxy +#include "cpp11/sexp.hpp" // for sexp + +// Specializations for logicals + +namespace cpp11 { + +template <> +inline SEXPTYPE r_vector::get_sexptype() { + return LGLSXP; +} + +template <> +inline typename r_vector::underlying_type r_vector::get_elt(SEXP x, + R_xlen_t i) { + // NOPROTECT: likely too costly to unwind protect every elt + return LOGICAL_ELT(x, i); +} + +template <> +inline typename r_vector::underlying_type* r_vector::get_p(bool is_altrep, + SEXP data) { + if (is_altrep) { + return nullptr; + } else { + return LOGICAL(data); + } +} + +template <> +inline typename r_vector::underlying_type const* r_vector::get_const_p( + bool is_altrep, SEXP data) { + return LOGICAL_OR_NULL(data); +} + +template <> +inline void r_vector::get_region(SEXP x, R_xlen_t i, R_xlen_t n, + typename r_vector::underlying_type* buf) { + // NOPROTECT: likely too costly to unwind protect here + LOGICAL_GET_REGION(x, i, n, buf); +} + +template <> +inline bool r_vector::const_iterator::use_buf(bool is_altrep) { + return is_altrep; +} + +typedef r_vector logicals; + +namespace writable { + +template <> +inline void r_vector::set_elt(SEXP x, R_xlen_t i, + typename r_vector::underlying_type value) { + // NOPROTECT: Likely too costly to unwind protect every set elt + SET_LOGICAL_ELT(x, i, value); +} + +inline bool operator==(const r_vector::proxy& lhs, r_bool rhs) { + return static_cast(lhs).operator==(rhs); +} + +typedef r_vector logicals; + +} // namespace writable + +} // namespace cpp11 diff --git a/inst/include/cpp11/matrix.hpp b/inst/include/cpp11/matrix.hpp new file mode 100644 index 0000000..a545c88 --- /dev/null +++ b/inst/include/cpp11/matrix.hpp @@ -0,0 +1,232 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include +#include // for string + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, R_xlen_t, INT... +#include "cpp11/r_bool.hpp" // for r_bool +#include "cpp11/r_string.hpp" // for r_string +#include "cpp11/r_vector.hpp" // for r_vector +#include "cpp11/sexp.hpp" // for sexp + +namespace cpp11 { + +// matrix dimensions +struct matrix_dims { + protected: + const int nrow_; + const int ncol_; + + public: + matrix_dims(SEXP data) : nrow_(Rf_nrows(data)), ncol_(Rf_ncols(data)) {} + matrix_dims(int nrow, int ncol) : nrow_(nrow), ncol_(ncol) {} + + int nrow() const { return nrow_; } + int ncol() const { return ncol_; } +}; + +// base type for dimension-wise matrix access specialization +struct matrix_slice {}; + +struct by_row : public matrix_slice {}; +struct by_column : public matrix_slice {}; + +// basic properties of matrix slices +template +struct matrix_slices : public matrix_dims { + public: + using matrix_dims::matrix_dims; + using matrix_dims::ncol; + using matrix_dims::nrow; + + int nslices() const; + int slice_size() const; + int slice_stride() const; + int slice_offset(int pos) const; +}; + +// basic properties of matrix row slices +template <> +struct matrix_slices : public matrix_dims { + public: + using matrix_dims::matrix_dims; + using matrix_dims::ncol; + using matrix_dims::nrow; + + int nslices() const { return nrow(); } + int slice_size() const { return ncol(); } + int slice_stride() const { return nrow(); } + int slice_offset(int pos) const { return pos; } +}; + +// basic properties of matrix column slices +template <> +struct matrix_slices : public matrix_dims { + public: + using matrix_dims::matrix_dims; + using matrix_dims::ncol; + using matrix_dims::nrow; + + int nslices() const { return ncol(); } + int slice_size() const { return nrow(); } + int slice_stride() const { return 1; } + int slice_offset(int pos) const { return pos * nrow(); } +}; + +template +class matrix : public matrix_slices { + private: + V vector_; + + public: + // matrix slice: row (if S=by_row) or a column (if S=by_column) + class slice { + private: + const matrix& parent_; + int index_; // slice index + int offset_; // index of the first slice element in parent_.vector_ + + public: + slice(const matrix& parent, int index) + : parent_(parent), index_(index), offset_(parent.slice_offset(index)) {} + + R_xlen_t stride() const { return parent_.slice_stride(); } + R_xlen_t size() const { return parent_.slice_size(); } + + bool operator==(const slice& rhs) const { + return (index_ == rhs.index_) && (parent_.data() == rhs.parent_.data()); + } + bool operator!=(const slice& rhs) const { return !operator==(rhs); } + + T operator[](int pos) const { return parent_.vector_[offset_ + stride() * pos]; } + + // iterates elements of a slice + class iterator { + private: + const slice& slice_; + int pos_; + + public: + using difference_type = std::ptrdiff_t; + using value_type = T; + using pointer = T*; + using reference = T&; + using iterator_category = std::forward_iterator_tag; + + iterator(const slice& slice, R_xlen_t pos) : slice_(slice), pos_(pos) {} + + iterator& operator++() { + ++pos_; + return *this; + } + + bool operator==(const iterator& rhs) const { + return (pos_ == rhs.pos_) && (slice_ == rhs.slice_); + } + bool operator!=(const iterator& rhs) const { return !operator==(rhs); } + + T operator*() const { return slice_[pos_]; }; + }; + + iterator begin() const { return {*this, 0}; } + iterator end() const { return {*this, size()}; } + }; + friend slice; + + // iterates slices (rows or columns -- depending on S template param) of a matrix + class slice_iterator { + private: + const matrix& parent_; + int pos_; + + public: + using difference_type = std::ptrdiff_t; + using value_type = slice; + using pointer = slice*; + using reference = slice&; + using iterator_category = std::forward_iterator_tag; + + slice_iterator(const matrix& parent, R_xlen_t pos) : parent_(parent), pos_(pos) {} + + slice_iterator& operator++() { + ++pos_; + return *this; + } + + bool operator==(const slice_iterator& rhs) const { + return (pos_ == rhs.pos_) && (parent_.data() == rhs.parent_.data()); + } + bool operator!=(const slice_iterator& rhs) const { return !operator==(rhs); } + + slice operator*() { return parent_[pos_]; }; + }; + + public: + matrix(SEXP data) : matrix_slices(data), vector_(data) {} + + template + matrix(const cpp11::matrix& rhs) + : matrix_slices(rhs.nrow(), rhs.ncol()), vector_(rhs.vector()) {} + + matrix(int nrow, int ncol) + : matrix_slices(nrow, ncol), vector_(R_xlen_t(nrow * ncol)) { + vector_.attr(R_DimSymbol) = {nrow, ncol}; + } + + using matrix_slices::nrow; + using matrix_slices::ncol; + using matrix_slices::nslices; + using matrix_slices::slice_size; + using matrix_slices::slice_stride; + using matrix_slices::slice_offset; + + V vector() const { return vector_; } + + SEXP data() const { return vector_.data(); } + + R_xlen_t size() const { return vector_.size(); } + + operator SEXP() const { return SEXP(vector_); } + + // operator sexp() { return sexp(vector_); } + + sexp attr(const char* name) const { return SEXP(vector_.attr(name)); } + + sexp attr(const std::string& name) const { return SEXP(vector_.attr(name)); } + + sexp attr(SEXP name) const { return SEXP(vector_.attr(name)); } + + r_vector names() const { return r_vector(vector_.names()); } + + T operator()(int row, int col) const { return vector_[row + (col * nrow())]; } + + slice operator[](int index) const { return {*this, index}; } + + slice_iterator begin() const { return {*this, 0}; } + slice_iterator end() const { return {*this, nslices()}; } +}; + +template +using doubles_matrix = matrix, double, S>; +template +using integers_matrix = matrix, int, S>; +template +using logicals_matrix = matrix, r_bool, S>; +template +using strings_matrix = matrix, r_string, S>; + +namespace writable { +template +using doubles_matrix = matrix, r_vector::proxy, S>; +template +using integers_matrix = matrix, r_vector::proxy, S>; +template +using logicals_matrix = matrix, r_vector::proxy, S>; +template +using strings_matrix = matrix, r_vector::proxy, S>; +} // namespace writable + +// TODO: Add tests for Matrix class +} // namespace cpp11 diff --git a/inst/include/cpp11/named_arg.hpp b/inst/include/cpp11/named_arg.hpp new file mode 100644 index 0000000..8119d0d --- /dev/null +++ b/inst/include/cpp11/named_arg.hpp @@ -0,0 +1,50 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for size_t + +#include // for initializer_list + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, literals +#include "cpp11/as.hpp" // for as_sexp +#include "cpp11/sexp.hpp" // for sexp + +namespace cpp11 { +class named_arg { + public: + explicit named_arg(const char* name) : name_(name), value_(R_NilValue) {} + named_arg& operator=(std::initializer_list il) { + value_ = as_sexp(il); + return *this; + } + + template + named_arg& operator=(T rhs) { + value_ = as_sexp(rhs); + return *this; + } + + template + named_arg& operator=(std::initializer_list rhs) { + value_ = as_sexp(rhs); + return *this; + } + + const char* name() const { return name_; } + SEXP value() const { return value_; } + + private: + const char* name_; + sexp value_; +}; + +namespace literals { + +inline named_arg operator""_nm(const char* name, std::size_t) { return named_arg(name); } + +} // namespace literals + +using namespace literals; + +} // namespace cpp11 diff --git a/inst/include/cpp11/protect.hpp b/inst/include/cpp11/protect.hpp new file mode 100644 index 0000000..0da6f62 --- /dev/null +++ b/inst/include/cpp11/protect.hpp @@ -0,0 +1,339 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for longjmp, setjmp, jmp_buf +#include // for exception +#include // for std::runtime_error +#include // for string, basic_string +#include // for tuple, make_tuple + +// NB: cpp11/R.hpp must precede R_ext/Error.h to ensure R_NO_REMAP is defined +#include "cpp11/R.hpp" // for SEXP, SEXPREC, CDR, R_NilValue, CAR, R_Pres... + +#include "R_ext/Boolean.h" // for Rboolean +#include "R_ext/Error.h" // for Rf_error, Rf_warning +#include "R_ext/Print.h" // for REprintf +#include "R_ext/Utils.h" // for R_CheckUserInterrupt + +// We would like to remove this, since all supported versions of R now support proper +// unwind protect, but some groups rely on it existing, like arrow and systemfonts +// https://github.com/r-lib/cpp11/issues/412 +#define HAS_UNWIND_PROTECT + +#ifdef CPP11_USE_FMT +#define FMT_HEADER_ONLY +#include "fmt/core.h" +#endif + +namespace cpp11 { +class unwind_exception : public std::exception { + public: + SEXP token; + unwind_exception(SEXP token_) : token(token_) {} +}; + +/// Unwind Protection from C longjmp's, like those used in R error handling +/// +/// @param code The code to which needs to be protected, as a nullary callable +template ()()), SEXP>::value>::type> +SEXP unwind_protect(Fun&& code) { + static SEXP token = [] { + SEXP res = R_MakeUnwindCont(); + R_PreserveObject(res); + return res; + }(); + + std::jmp_buf jmpbuf; + if (setjmp(jmpbuf)) { + throw unwind_exception(token); + } + + SEXP res = R_UnwindProtect( + [](void* data) -> SEXP { + auto callback = static_cast(data); + return static_cast(*callback)(); + }, + &code, + [](void* jmpbuf, Rboolean jump) { + if (jump == TRUE) { + // We need to first jump back into the C++ stacks because you can't safely + // throw exceptions from C stack frames. + longjmp(*static_cast(jmpbuf), 1); + } + }, + &jmpbuf, token); + + // R_UnwindProtect adds the result to the CAR of the continuation token, + // which implicitly protects the result. However if there is no error and + // R_UwindProtect does a normal exit the memory shouldn't be protected, so we + // unset it here before returning the value ourselves. + SETCAR(token, R_NilValue); + + return res; +} + +template ()()), void>::value>::type> +void unwind_protect(Fun&& code) { + (void)unwind_protect([&] { + std::forward(code)(); + return R_NilValue; + }); +} + +template ()())> +typename std::enable_if::value && !std::is_same::value, + R>::type +unwind_protect(Fun&& code) { + R out; + (void)unwind_protect([&] { + out = std::forward(code)(); + return R_NilValue; + }); + return out; +} + +namespace detail { + +template +struct index_sequence { + using type = index_sequence; +}; + +template +struct appended_sequence; + +template +struct appended_sequence, J> : index_sequence {}; + +template +struct make_index_sequence + : appended_sequence::type, N - 1> {}; + +template <> +struct make_index_sequence<0> : index_sequence<> {}; + +template +decltype(std::declval()(std::declval()...)) apply( + F&& f, std::tuple&& a, const index_sequence&) { + return std::forward(f)(std::get(std::move(a))...); +} + +template +decltype(std::declval()(std::declval()...)) apply(F&& f, + std::tuple&& a) { + return apply(std::forward(f), std::move(a), make_index_sequence{}); +} + +// overload to silence a compiler warning that the (empty) tuple parameter is set but +// unused +template +decltype(std::declval()()) apply(F&& f, std::tuple<>&&) { + return std::forward(f)(); +} + +template +struct closure { + decltype(std::declval()(std::declval()...)) operator()() && { + return apply(ptr_, std::move(arefs_)); + } + F* ptr_; + std::tuple arefs_; +}; + +} // namespace detail + +struct protect { + template + struct function { + template + decltype(std::declval()(std::declval()...)) operator()(A&&... a) const { + // workaround to support gcc4.8, which can't capture a parameter pack + return unwind_protect( + detail::closure{ptr_, std::forward_as_tuple(std::forward(a)...)}); + } + + F* ptr_; + }; + + /// May not be applied to a function bearing attributes, which interfere with linkage on + /// some compilers; use an appropriately attributed alternative. (For example, Rf_error + /// bears the [[noreturn]] attribute and must be protected with safe.noreturn rather + /// than safe.operator[]). + template + constexpr function operator[](F* raw) const { + return {raw}; + } + + template + struct noreturn_function { + template + void operator() [[noreturn]] (A&&... a) const { + // workaround to support gcc4.8, which can't capture a parameter pack + unwind_protect( + detail::closure{ptr_, std::forward_as_tuple(std::forward(a)...)}); + // Compiler hint to allow [[noreturn]] attribute; this is never executed since + // the above call will not return. + throw std::runtime_error("[[noreturn]]"); + } + F* ptr_; + }; + + template + constexpr noreturn_function noreturn(F* raw) const { + return {raw}; + } +}; +constexpr struct protect safe = {}; + +inline void check_user_interrupt() { safe[R_CheckUserInterrupt](); } + +#ifdef CPP11_USE_FMT +template +void stop [[noreturn]] (const char* fmt_arg, Args&&... args) { + std::string msg = fmt::format(fmt_arg, std::forward(args)...); + safe.noreturn(Rf_errorcall)(R_NilValue, "%s", msg.c_str()); +} + +template +void stop [[noreturn]] (const std::string& fmt_arg, Args&&... args) { + std::string msg = fmt::format(fmt_arg, std::forward(args)...); + safe.noreturn(Rf_errorcall)(R_NilValue, "%s", msg.c_str()); +} + +template +void warning(const char* fmt_arg, Args&&... args) { + std::string msg = fmt::format(fmt_arg, std::forward(args)...); + safe[Rf_warningcall](R_NilValue, "%s", msg.c_str()); +} + +template +void warning(const std::string& fmt_arg, Args&&... args) { + std::string msg = fmt::format(fmt_arg, std::forward(args)...); + safe[Rf_warningcall](R_NilValue, "%s", msg.c_str()); +} +#else +template +void stop [[noreturn]] (const char* fmt, Args... args) { + safe.noreturn(Rf_errorcall)(R_NilValue, fmt, args...); +} + +template +void stop [[noreturn]] (const std::string& fmt, Args... args) { + safe.noreturn(Rf_errorcall)(R_NilValue, fmt.c_str(), args...); +} + +template +void warning(const char* fmt, Args... args) { + safe[Rf_warningcall](R_NilValue, fmt, args...); +} + +template +void warning(const std::string& fmt, Args... args) { + safe[Rf_warningcall](R_NilValue, fmt.c_str(), args...); +} +#endif + +namespace detail { + +// A doubly-linked list of preserved objects, allowing O(1) insertion/release of objects +// compared to O(N preserved) with `R_PreserveObject()` and `R_ReleaseObject()`. +// +// We let R manage the memory of the list itself by calling `R_PreserveObject()` on it. +// +// cpp11 being a header only library makes creating a "global" preserve list a bit tricky. +// The trick we use here is that static local variables in inline extern functions are +// guaranteed by the standard to be unique across the whole program. Inline functions are +// extern by default, but `static inline` functions are not, so do not change these +// functions to `static`. If we did that, we would end up having one preserve list per +// compilation unit instead. As it stands today, we are fairly confident that we have 1 +// preserve list per package, which seems to work nicely. +// https://stackoverflow.com/questions/185624/what-happens-to-static-variables-in-inline-functions +// https://stackoverflow.com/questions/51612866/global-variables-in-header-only-library +// https://github.com/r-lib/cpp11/issues/330 +// +// > A static local variable in an extern inline function always refers to the +// same object. 7.1.2/4 - C++98/C++14 (n3797) +namespace store { + +inline SEXP init() { + SEXP out = Rf_cons(R_NilValue, Rf_cons(R_NilValue, R_NilValue)); + R_PreserveObject(out); + return out; +} + +inline SEXP get() { + // Note the `static` local variable in the inline extern function here! Guarantees we + // have 1 unique preserve list across all compilation units in the package. + static SEXP out = init(); + return out; +} + +inline R_xlen_t count() { + const R_xlen_t head = 1; + const R_xlen_t tail = 1; + SEXP list = get(); + return Rf_xlength(list) - head - tail; +} + +inline SEXP insert(SEXP x) { + if (x == R_NilValue) { + return R_NilValue; + } + + PROTECT(x); + + SEXP list = get(); + + // Get references to the head of the preserve list and the next element + // after the head + SEXP head = list; + SEXP next = CDR(list); + + // Add a new cell that points to the current head + next. + SEXP cell = PROTECT(Rf_cons(head, next)); + SET_TAG(cell, x); + + // Update the head + next to point at the newly-created cell, + // effectively inserting that cell between the current head + next. + SETCDR(head, cell); + SETCAR(next, cell); + + // UNPROTECT(2); + + return cell; +} + +inline void release(SEXP cell) { + if (cell == R_NilValue) { + return; + } + + // Get a reference to the cells before and after the token. + SEXP lhs = CAR(cell); + SEXP rhs = CDR(cell); + + // Remove the cell from the preserve list -- effectively, we do this + // by updating the 'lhs' and 'rhs' references to point at each-other, + // effectively removing any references to the cell in the pairlist. + SETCDR(lhs, rhs); + SETCAR(rhs, lhs); +} + +inline void print() { + SEXP list = get(); + for (SEXP cell = list; cell != R_NilValue; cell = CDR(cell)) { + REprintf("%p CAR: %p CDR: %p TAG: %p\n", reinterpret_cast(cell), + reinterpret_cast(CAR(cell)), reinterpret_cast(CDR(cell)), + reinterpret_cast(TAG(cell))); + } + REprintf("---\n"); +} + +} // namespace store + +} // namespace detail + +} // namespace cpp11 diff --git a/inst/include/cpp11/r_bool.hpp b/inst/include/cpp11/r_bool.hpp new file mode 100644 index 0000000..cfdf346 --- /dev/null +++ b/inst/include/cpp11/r_bool.hpp @@ -0,0 +1,83 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for numeric_limits +#include +#include // for is_convertible, enable_if + +#include "R_ext/Boolean.h" // for Rboolean +#include "cpp11/R.hpp" // for SEXP, SEXPREC, ... +#include "cpp11/as.hpp" // for as_sexp +#include "cpp11/protect.hpp" // for unwind_protect +#include "cpp11/r_vector.hpp" +#include "cpp11/sexp.hpp" // for sexp + +namespace cpp11 { + +class r_bool { + public: + r_bool() = default; + + r_bool(SEXP data) { + if (Rf_isLogical(data)) { + if (Rf_xlength(data) == 1) { + value_ = static_cast(LOGICAL_ELT(data, 0)); + } + } + throw std::invalid_argument("Invalid r_bool value"); + } + + r_bool(bool value) : value_(value ? TRUE : FALSE) {} + r_bool(Rboolean value) : value_(value) {} + r_bool(int value) : value_(from_int(value)) {} + + operator bool() const { return value_ == TRUE; } + operator int() const { return value_; } + operator Rboolean() const { return value_ ? TRUE : FALSE; } + + bool operator==(r_bool rhs) const { return value_ == rhs.value_; } + bool operator==(bool rhs) const { return operator==(r_bool(rhs)); } + bool operator==(Rboolean rhs) const { return operator==(r_bool(rhs)); } + bool operator==(int rhs) const { return operator==(r_bool(rhs)); } + + private: + static constexpr int na = std::numeric_limits::min(); + + static int from_int(int value) { + if (value == static_cast(FALSE)) return FALSE; + if (value == static_cast(na)) return na; + return TRUE; + } + + int value_ = na; +}; + +inline std::ostream& operator<<(std::ostream& os, r_bool const& value) { + os << ((value == TRUE) ? "TRUE" : "FALSE"); + return os; +} + +template +using enable_if_r_bool = enable_if_t::value, R>; + +template +enable_if_r_bool as_sexp(T from) { + sexp res = Rf_allocVector(LGLSXP, 1); + unwind_protect([&] { SET_LOGICAL_ELT(res.data(), 0, from); }); + return res; +} + +template <> +inline r_bool na() { + return NA_LOGICAL; +} + +namespace traits { +template <> +struct get_underlying_type { + using type = int; +}; +} // namespace traits + +} // namespace cpp11 diff --git a/inst/include/cpp11/r_string.hpp b/inst/include/cpp11/r_string.hpp new file mode 100644 index 0000000..af103f6 --- /dev/null +++ b/inst/include/cpp11/r_string.hpp @@ -0,0 +1,105 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for string, basic_string, operator== +#include // for is_convertible, enable_if + +#include "R_ext/Memory.h" // for vmaxget, vmaxset +#include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_mkCharCE, Rf_translat... +#include "cpp11/as.hpp" // for as_sexp +#include "cpp11/protect.hpp" // for unwind_protect, protect, protect::function +#include "cpp11/sexp.hpp" // for sexp + +namespace cpp11 { + +class r_string { + public: + r_string() = default; + r_string(SEXP data) : data_(data) {} + r_string(const char* data) : data_(safe[Rf_mkCharCE](data, CE_UTF8)) {} + r_string(const std::string& data) + : data_(safe[Rf_mkCharLenCE](data.c_str(), data.size(), CE_UTF8)) {} + + operator SEXP() const { return data_; } + operator sexp() const { return data_; } + operator std::string() const { + std::string res; + res.reserve(size()); + + void* vmax = vmaxget(); + unwind_protect([&] { res.assign(Rf_translateCharUTF8(data_)); }); + vmaxset(vmax); + + return res; + } + + bool operator==(const r_string& rhs) const { return data_.data() == rhs.data_.data(); } + + bool operator==(const SEXP rhs) const { return data_.data() == rhs; } + + bool operator==(const char* rhs) const { + return static_cast(*this) == rhs; + } + + bool operator==(const std::string& rhs) const { + return static_cast(*this) == rhs; + } + + R_xlen_t size() const { return Rf_xlength(data_); } + + private: + sexp data_ = R_NilValue; +}; + +inline SEXP as_sexp(std::initializer_list il) { + R_xlen_t size = il.size(); + + sexp data; + unwind_protect([&] { + data = Rf_allocVector(STRSXP, size); + auto it = il.begin(); + for (R_xlen_t i = 0; i < size; ++i, ++it) { + if (*it == NA_STRING) { + SET_STRING_ELT(data, i, *it); + } else { + SET_STRING_ELT(data, i, Rf_mkCharCE(Rf_translateCharUTF8(*it), CE_UTF8)); + } + } + }); + return data; +} + +template +using enable_if_r_string = enable_if_t::value, R>; + +template +enable_if_r_string as_sexp(T from) { + r_string str(from); + sexp res; + unwind_protect([&] { + res = Rf_allocVector(STRSXP, 1); + + if (str == NA_STRING) { + SET_STRING_ELT(res, 0, str); + } else { + SET_STRING_ELT(res, 0, Rf_mkCharCE(Rf_translateCharUTF8(str), CE_UTF8)); + } + }); + + return res; +} + +template <> +inline r_string na() { + return NA_STRING; +} + +namespace traits { +template <> +struct get_underlying_type { + using type = SEXP; +}; +} // namespace traits + +} // namespace cpp11 diff --git a/inst/include/cpp11/r_vector.hpp b/inst/include/cpp11/r_vector.hpp new file mode 100644 index 0000000..e028929 --- /dev/null +++ b/inst/include/cpp11/r_vector.hpp @@ -0,0 +1,1458 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for ptrdiff_t, size_t + +#include // for max +#include // for array +#include // for snprintf +#include // for memcpy +#include // for exception +#include // for initializer_list +#include // for forward_iterator_tag, random_ac... +#include // for out_of_range +#include // for string, basic_string +#include // for decay, is_same, enable_if, is_c... +#include // for declval + +#include "cpp11/R.hpp" // for R_xlen_t, SEXP, SEXPREC, Rf_xle... +#include "cpp11/attribute_proxy.hpp" // for attribute_proxy +#include "cpp11/named_arg.hpp" // for named_arg +#include "cpp11/protect.hpp" // for store +#include "cpp11/r_string.hpp" // for r_string +#include "cpp11/sexp.hpp" // for sexp + +namespace cpp11 { + +using namespace cpp11::literals; + +namespace writable { +template +class r_vector; +} // namespace writable + +// Declarations +template +class r_vector { + public: + // Forward declare + class const_iterator; + using underlying_type = typename traits::get_underlying_type::type; + + private: + SEXP data_ = R_NilValue; + SEXP protect_ = R_NilValue; + bool is_altrep_ = false; + underlying_type* data_p_ = nullptr; + R_xlen_t length_ = 0; + + public: + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef T value_type; + typedef T* pointer; + typedef T& reference; + + ~r_vector(); + + r_vector() noexcept = default; + r_vector(SEXP data); + r_vector(SEXP data, bool is_altrep); + r_vector(const r_vector& x); + r_vector(r_vector&& x); + r_vector(const writable::r_vector& x); + r_vector(named_arg) = delete; + + r_vector& operator=(const r_vector& rhs); + r_vector& operator=(r_vector&& rhs); + + operator SEXP() const; + operator sexp() const; + +#ifdef LONG_VECTOR_SUPPORT + T operator[](const int pos) const; +#endif + T operator[](const R_xlen_t pos) const; + T operator[](const size_type pos) const; + T operator[](const r_string& name) const; + +#ifdef LONG_VECTOR_SUPPORT + T at(const int pos) const; +#endif + T at(const R_xlen_t pos) const; + T at(const size_type pos) const; + T at(const r_string& name) const; + + bool contains(const r_string& name) const; + bool is_altrep() const; + bool named() const; + R_xlen_t size() const; + bool empty() const; + SEXP data() const; + + const sexp attr(const char* name) const; + const sexp attr(const std::string& name) const; + const sexp attr(SEXP name) const; + + r_vector names() const; + + const_iterator begin() const; + const_iterator end() const; + const_iterator cbegin() const; + const_iterator cend() const; + const_iterator find(const r_string& name) const; + + class const_iterator { + // Iterator references: + // https://cplusplus.com/reference/iterator/ + // https://stackoverflow.com/questions/8054273/how-to-implement-an-stl-style-iterator-and-avoid-common-pitfalls + // It seems like our iterator doesn't fully implement everything for + // `random_access_iterator_tag` (like an `[]` operator, for example). If we discover + // issues with it, we probably need to add more methods. + private: + const r_vector* data_; + R_xlen_t pos_; + std::array buf_; + R_xlen_t block_start_ = 0; + R_xlen_t length_ = 0; + + public: + using difference_type = ptrdiff_t; + using value_type = T; + using pointer = T*; + using reference = T&; + using iterator_category = std::random_access_iterator_tag; + + const_iterator(const r_vector* data, R_xlen_t pos); + + const_iterator operator+(R_xlen_t pos); + ptrdiff_t operator-(const const_iterator& other) const; + + const_iterator& operator++(); + const_iterator& operator--(); + + const_iterator& operator+=(R_xlen_t pos); + const_iterator& operator-=(R_xlen_t pos); + + bool operator!=(const const_iterator& other) const; + bool operator==(const const_iterator& other) const; + + T operator*() const; + + friend class writable::r_vector::iterator; + + private: + /// Implemented in specialization + static bool use_buf(bool is_altrep); + void fill_buf(R_xlen_t pos); + }; + + private: + /// Implemented in specialization + static underlying_type get_elt(SEXP x, R_xlen_t i); + /// Implemented in specialization + static underlying_type* get_p(bool is_altrep, SEXP data); + /// Implemented in specialization + static underlying_type const* get_const_p(bool is_altrep, SEXP data); + /// Implemented in specialization + static void get_region(SEXP x, R_xlen_t i, R_xlen_t n, underlying_type* buf); + /// Implemented in specialization + static SEXPTYPE get_sexptype(); + /// Implemented in specialization (throws by default, specialization in list type) + static T get_oob(); + static SEXP valid_type(SEXP x); + static SEXP valid_length(SEXP x, R_xlen_t n); + + friend class writable::r_vector; +}; + +namespace writable { + +template +using has_begin_fun = std::decay()))>; + +/// Read/write access to new or copied r_vectors +template +class r_vector : public cpp11::r_vector { + public: + // Forward declare + class proxy; + class iterator; + + private: + R_xlen_t capacity_ = 0; + + using cpp11::r_vector::data_; + using cpp11::r_vector::data_p_; + using cpp11::r_vector::is_altrep_; + using cpp11::r_vector::length_; + using cpp11::r_vector::protect_; + + using typename cpp11::r_vector::underlying_type; + + public: + typedef ptrdiff_t difference_type; + typedef size_t size_type; + typedef proxy value_type; + typedef proxy* pointer; + typedef proxy& reference; + + r_vector() noexcept = default; + r_vector(const SEXP& data); + r_vector(SEXP&& data); + r_vector(const SEXP& data, bool is_altrep); + r_vector(SEXP&& data, bool is_altrep); + r_vector(const r_vector& rhs); + r_vector(r_vector&& rhs); + r_vector(const cpp11::r_vector& rhs); + r_vector(std::initializer_list il); + r_vector(std::initializer_list il); + + explicit r_vector(const R_xlen_t size); + + template + r_vector(Iter first, Iter last); + + template > + r_vector(const V& obj); + + r_vector& operator=(const r_vector& rhs); + r_vector& operator=(r_vector&& rhs); + + operator SEXP() const; + +#ifdef LONG_VECTOR_SUPPORT + proxy operator[](const int pos) const; +#endif + proxy operator[](const R_xlen_t pos) const; + proxy operator[](const size_type pos) const; + proxy operator[](const r_string& name) const; + +#ifdef LONG_VECTOR_SUPPORT + proxy at(const int pos) const; +#endif + proxy at(const R_xlen_t pos) const; + proxy at(const size_type pos) const; + proxy at(const r_string& name) const; + + void push_back(T value); + /// Implemented in `strings.hpp` + void push_back(const named_arg& value); + void pop_back(); + + void resize(R_xlen_t count); + void reserve(R_xlen_t new_capacity); + + iterator insert(R_xlen_t pos, T value); + iterator erase(R_xlen_t pos); + + void clear(); + + iterator begin() const; + iterator end() const; + + using cpp11::r_vector::cbegin; + using cpp11::r_vector::cend; + using cpp11::r_vector::size; + + iterator find(const r_string& name) const; + + attribute_proxy> attr(const char* name) const; + attribute_proxy> attr(const std::string& name) const; + attribute_proxy> attr(SEXP name) const; + + attribute_proxy> names() const; + + class proxy { + private: + const SEXP data_; + const R_xlen_t index_; + underlying_type* const p_; + bool is_altrep_; + + public: + proxy(SEXP data, const R_xlen_t index, underlying_type* const p, bool is_altrep); + + proxy& operator=(const proxy& rhs); + + proxy& operator=(const T& rhs); + proxy& operator+=(const T& rhs); + proxy& operator-=(const T& rhs); + proxy& operator*=(const T& rhs); + proxy& operator/=(const T& rhs); + proxy& operator++(int); + proxy& operator--(int); + + void operator++(); + void operator--(); + + operator T() const; + + private: + underlying_type get() const; + void set(underlying_type x); + }; + + class iterator : public cpp11::r_vector::const_iterator { + private: + using cpp11::r_vector::const_iterator::data_; + using cpp11::r_vector::const_iterator::block_start_; + using cpp11::r_vector::const_iterator::pos_; + using cpp11::r_vector::const_iterator::buf_; + using cpp11::r_vector::const_iterator::length_; + using cpp11::r_vector::const_iterator::use_buf; + using cpp11::r_vector::const_iterator::fill_buf; + + public: + using difference_type = ptrdiff_t; + using value_type = proxy; + using pointer = proxy*; + using reference = proxy&; + using iterator_category = std::forward_iterator_tag; + + iterator(const r_vector* data, R_xlen_t pos); + + iterator& operator++(); + + proxy operator*() const; + + using cpp11::r_vector::const_iterator::operator!=; + + iterator& operator+=(R_xlen_t rhs); + iterator operator+(R_xlen_t rhs); + }; + + private: + /// Implemented in specialization + static void set_elt(SEXP x, R_xlen_t i, underlying_type value); + + static SEXP reserve_data(SEXP x, bool is_altrep, R_xlen_t size); + static SEXP resize_data(SEXP x, bool is_altrep, R_xlen_t size); + static SEXP resize_names(SEXP x, R_xlen_t size); + + using cpp11::r_vector::get_elt; + using cpp11::r_vector::get_p; + using cpp11::r_vector::get_const_p; + using cpp11::r_vector::get_sexptype; + using cpp11::r_vector::valid_type; + using cpp11::r_vector::valid_length; +}; +} // namespace writable + +// Implementations below + +template +inline r_vector::~r_vector() { + detail::store::release(protect_); +} + +template +inline r_vector::r_vector(const SEXP data) + : data_(valid_type(data)), + protect_(detail::store::insert(data)), + is_altrep_(ALTREP(data)), + data_p_(get_p(ALTREP(data), data)), + length_(Rf_xlength(data)) {} + +template +inline r_vector::r_vector(const SEXP data, bool is_altrep) + : data_(valid_type(data)), + protect_(detail::store::insert(data)), + is_altrep_(is_altrep), + data_p_(get_p(is_altrep, data)), + length_(Rf_xlength(data)) {} + +// We are in read-only space so we can just copy over all properties except for +// `protect_`, which we need to manage on our own. `x` persists after this call, so we +// don't clear anything. +template +inline r_vector::r_vector(const r_vector& x) { + data_ = x.data_; + protect_ = detail::store::insert(data_); + is_altrep_ = x.is_altrep_; + data_p_ = x.data_p_; + length_ = x.length_; +} + +// `x` here is a temporary value, it is going to be destructed right after this. +// Take ownership over all `x` details, including `protect_`. +// Importantly, set `x.protect_` to `R_NilValue` to prevent the `x` destructor from +// releasing the object that we now own. +template +inline r_vector::r_vector(r_vector&& x) { + data_ = x.data_; + protect_ = x.protect_; + is_altrep_ = x.is_altrep_; + data_p_ = x.data_p_; + length_ = x.length_; + + // Important for `x.protect_`, extra check for everything else + x.data_ = R_NilValue; + x.protect_ = R_NilValue; + x.is_altrep_ = false; + x.data_p_ = nullptr; + x.length_ = 0; +} + +// `x` here is writable, meaning the underlying `SEXP` could have more `capacity` than +// a read only equivalent would expect. This means we have to go through `SEXP` first, +// to truncate the writable data, and then we can wrap it in a read only `r_vector`. +// +// It would be the same scenario if we came from a writable temporary, i.e. +// `writable::r_vector&& x`, so we let this method handle both scenarios. +template +inline r_vector::r_vector(const writable::r_vector& x) + : r_vector(static_cast(x)) {} + +// Same reasoning as `r_vector(const r_vector& x)` constructor +template +inline r_vector& r_vector::operator=(const r_vector& rhs) { + if (data_ == rhs.data_) { + return *this; + } + + // Release existing object that we protect + detail::store::release(protect_); + + data_ = rhs.data_; + protect_ = detail::store::insert(data_); + is_altrep_ = rhs.is_altrep_; + data_p_ = rhs.data_p_; + length_ = rhs.length_; + + return *this; +} + +// Same reasoning as `r_vector(r_vector&& x)` constructor +template +inline r_vector& r_vector::operator=(r_vector&& rhs) { + if (data_ == rhs.data_) { + return *this; + } + + // Release existing object that we protect + detail::store::release(protect_); + + data_ = rhs.data_; + protect_ = rhs.protect_; + is_altrep_ = rhs.is_altrep_; + data_p_ = rhs.data_p_; + length_ = rhs.length_; + + // Important for `rhs.protect_`, extra check for everything else + rhs.data_ = R_NilValue; + rhs.protect_ = R_NilValue; + rhs.is_altrep_ = false; + rhs.data_p_ = nullptr; + rhs.length_ = 0; + + return *this; +} + +template +inline r_vector::operator SEXP() const { + return data_; +} + +template +inline r_vector::operator sexp() const { + return data_; +} + +#ifdef LONG_VECTOR_SUPPORT +template +inline T r_vector::operator[](const int pos) const { + return operator[](static_cast(pos)); +} +#endif + +template +inline T r_vector::operator[](const R_xlen_t pos) const { + // Handles ALTREP, VECSXP, and STRSXP cases through `get_elt()` + const underlying_type elt = (data_p_ != nullptr) ? data_p_[pos] : get_elt(data_, pos); + return static_cast(elt); +} + +template +inline T r_vector::operator[](const size_type pos) const { + return operator[](static_cast(pos)); +} + +template +inline T r_vector::operator[](const r_string& name) const { + SEXP names = this->names(); + R_xlen_t size = Rf_xlength(names); + + for (R_xlen_t pos = 0; pos < size; ++pos) { + auto cur = Rf_translateCharUTF8(STRING_ELT(names, pos)); + if (name == cur) { + return operator[](pos); + } + } + + return get_oob(); +} + +#ifdef LONG_VECTOR_SUPPORT +template +inline T r_vector::at(const int pos) const { + return at(static_cast(pos)); +} +#endif + +template +inline T r_vector::at(const R_xlen_t pos) const { + if (pos < 0 || pos >= length_) { + throw std::out_of_range("r_vector"); + } + + return operator[](pos); +} + +template +inline T r_vector::at(const size_type pos) const { + return at(static_cast(pos)); +} + +template +inline T r_vector::at(const r_string& name) const { + return operator[](name); +} + +template +inline bool r_vector::contains(const r_string& name) const { + SEXP names = this->names(); + R_xlen_t size = Rf_xlength(names); + + for (R_xlen_t pos = 0; pos < size; ++pos) { + auto cur = Rf_translateCharUTF8(STRING_ELT(names, pos)); + if (name == cur) { + return true; + } + } + + return false; +} + +template +inline bool r_vector::is_altrep() const { + return is_altrep_; +} + +template +inline bool r_vector::named() const { + return Rf_getAttrib(data_, R_NamesSymbol) != R_NilValue; +} + +template +inline R_xlen_t r_vector::size() const { + return length_; +} + +template +inline bool r_vector::empty() const { + return (!(this->size() > 0)); +} + +/// Provide access to the underlying data, mainly for interface +/// compatibility with std::vector +template +inline SEXP r_vector::data() const { + return data_; +} + +template +inline const sexp r_vector::attr(const char* name) const { + return SEXP(attribute_proxy>(*this, name)); +} + +template +inline const sexp r_vector::attr(const std::string& name) const { + return SEXP(attribute_proxy>(*this, name.c_str())); +} + +template +inline const sexp r_vector::attr(SEXP name) const { + return SEXP(attribute_proxy>(*this, name)); +} + +template +inline r_vector r_vector::names() const { + SEXP nms = Rf_getAttrib(data_, R_NamesSymbol); + if (nms == R_NilValue) { + return r_vector(); + } else { + return r_vector(nms); + } +} + +template +inline T r_vector::get_oob() { + throw std::out_of_range("r_vector"); +} + +class type_error : public std::exception { + public: + type_error(SEXPTYPE expected, SEXPTYPE actual) : expected_(expected), actual_(actual) {} + virtual const char* what() const noexcept override { + snprintf(str_, 64, "Invalid input type, expected '%s' actual '%s'", + Rf_type2char(expected_), Rf_type2char(actual_)); + return str_; + } + + private: + SEXPTYPE expected_; + SEXPTYPE actual_; + mutable char str_[64]; +}; + +template +inline SEXP r_vector::valid_type(SEXP x) { + const SEXPTYPE type = get_sexptype(); + + if (x == nullptr) { + throw type_error(type, NILSXP); + } + if (detail::r_typeof(x) != type) { + throw type_error(type, detail::r_typeof(x)); + } + + return x; +} + +template +inline SEXP r_vector::valid_length(SEXP x, R_xlen_t n) { + R_xlen_t x_n = Rf_xlength(x); + + if (x_n == n) { + return x; + } + + char message[128]; + snprintf(message, 128, + "Invalid input length, expected '%" CPP11_PRIdXLEN_T + "' actual '%" CPP11_PRIdXLEN_T "'.", + n, x_n); + + throw std::length_error(message); +} + +template +inline typename r_vector::const_iterator r_vector::begin() const { + return const_iterator(this, 0); +} + +template +inline typename r_vector::const_iterator r_vector::end() const { + return const_iterator(this, length_); +} + +template +inline typename r_vector::const_iterator r_vector::cbegin() const { + return const_iterator(this, 0); +} + +template +inline typename r_vector::const_iterator r_vector::cend() const { + return const_iterator(this, length_); +} + +template +r_vector::const_iterator::const_iterator(const r_vector* data, R_xlen_t pos) + : data_(data), pos_(pos), buf_() { + if (use_buf(data_->is_altrep())) { + fill_buf(pos); + } +} + +template +inline typename r_vector::const_iterator& r_vector::const_iterator::operator++() { + ++pos_; + if (use_buf(data_->is_altrep()) && pos_ >= block_start_ + length_) { + fill_buf(pos_); + } + return *this; +} + +template +inline typename r_vector::const_iterator& r_vector::const_iterator::operator--() { + --pos_; + if (use_buf(data_->is_altrep()) && pos_ > 0 && pos_ < block_start_) { + fill_buf(std::max(0_xl, pos_ - 64)); + } + return *this; +} + +template +inline typename r_vector::const_iterator& r_vector::const_iterator::operator+=( + R_xlen_t i) { + pos_ += i; + if (use_buf(data_->is_altrep()) && pos_ >= block_start_ + length_) { + fill_buf(pos_); + } + return *this; +} + +template +inline typename r_vector::const_iterator& r_vector::const_iterator::operator-=( + R_xlen_t i) { + pos_ -= i; + if (use_buf(data_->is_altrep()) && pos_ >= block_start_ + length_) { + fill_buf(std::max(0_xl, pos_ - 64)); + } + return *this; +} + +template +inline bool r_vector::const_iterator::operator!=( + const r_vector::const_iterator& other) const { + return pos_ != other.pos_; +} + +template +inline bool r_vector::const_iterator::operator==( + const r_vector::const_iterator& other) const { + return pos_ == other.pos_; +} + +template +inline ptrdiff_t r_vector::const_iterator::operator-( + const r_vector::const_iterator& other) const { + return pos_ - other.pos_; +} + +template +inline typename r_vector::const_iterator r_vector::const_iterator::operator+( + R_xlen_t rhs) { + auto it = *this; + it += rhs; + return it; +} + +template +inline typename r_vector::const_iterator r_vector::find( + const r_string& name) const { + SEXP names = this->names(); + R_xlen_t size = Rf_xlength(names); + + for (R_xlen_t pos = 0; pos < size; ++pos) { + auto cur = Rf_translateCharUTF8(STRING_ELT(names, pos)); + if (name == cur) { + return begin() + pos; + } + } + + return end(); +} + +template +inline T r_vector::const_iterator::operator*() const { + if (use_buf(data_->is_altrep())) { + // Use pre-loaded buffer for compatible ALTREP types + return static_cast(buf_[pos_ - block_start_]); + } else { + // Otherwise pass through to normal retrieval method + return data_->operator[](pos_); + } +} + +template +inline void r_vector::const_iterator::fill_buf(R_xlen_t pos) { + using namespace cpp11::literals; + length_ = std::min(64_xl, data_->size() - pos); + get_region(data_->data_, pos, length_, buf_.data()); + block_start_ = pos; +} + +namespace writable { + +template +inline r_vector::r_vector(const SEXP& data) + : cpp11::r_vector(safe[Rf_shallow_duplicate](data)), capacity_(length_) {} + +template +inline r_vector::r_vector(SEXP&& data) + : cpp11::r_vector(data), capacity_(length_) {} + +template +inline r_vector::r_vector(const SEXP& data, bool is_altrep) + : cpp11::r_vector(safe[Rf_shallow_duplicate](data), is_altrep), + capacity_(length_) {} + +template +inline r_vector::r_vector(SEXP&& data, bool is_altrep) + : cpp11::r_vector(data, is_altrep), capacity_(length_) {} + +template +inline r_vector::r_vector(const r_vector& rhs) { + // We don't want to just pass through to the read-only constructor because we'd + // have to convert to `SEXP` first, which could truncate, and then we'd still have + // to shallow duplicate after that to really ensure we have a duplicate, which can + // result in too many copies (#369). + // + // Instead we take control of setting all fields to try and only duplicate 1 time. + // If there is extra capacity in the `rhs`, that is also copied over. Resist the urge + // to try and trim the extra capacity during the duplication. We really do want to do a + // shallow duplicate to ensure that ALL attributes are copied over, including `dim` and + // `dimnames`, which would be lost if we instead used `reserve_data()` to do a combined + // duplicate + possible truncate. This is important for the `matrix` class. + data_ = safe[Rf_shallow_duplicate](rhs.data_); + protect_ = detail::store::insert(data_); + is_altrep_ = ALTREP(data_); + data_p_ = (data_ == R_NilValue) ? nullptr : get_p(is_altrep_, data_); + length_ = rhs.length_; + capacity_ = rhs.capacity_; +} + +template +inline r_vector::r_vector(r_vector&& rhs) { + // We don't want to pass through to the read-only constructor from a + // `writable::r_vector&& rhs` as that forces a truncation to be able to generate + // a well-formed read-only vector. Instead, we take advantage of the fact that we + // are going from writable input to writable output and just move everything over. + // + // This ends up looking very similar to the equivalent read-only constructor from a + // read-only `r_vector&& rhs`, with the addition of moving the capacity. + data_ = rhs.data_; + protect_ = rhs.protect_; + is_altrep_ = rhs.is_altrep_; + data_p_ = rhs.data_p_; + length_ = rhs.length_; + capacity_ = rhs.capacity_; + + // Important for `rhs.protect_`, extra check for everything else + rhs.data_ = R_NilValue; + rhs.protect_ = R_NilValue; + rhs.is_altrep_ = false; + rhs.data_p_ = nullptr; + rhs.length_ = 0; + rhs.capacity_ = 0; +} + +template +inline r_vector::r_vector(const cpp11::r_vector& rhs) + : cpp11::r_vector(safe[Rf_shallow_duplicate](rhs.data_)), capacity_(rhs.length_) {} + +template +inline r_vector::r_vector(std::initializer_list il) + : cpp11::r_vector(safe[Rf_allocVector](get_sexptype(), il.size())), + capacity_(il.size()) { + auto it = il.begin(); + + if (data_p_ != nullptr) { + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + data_p_[i] = static_cast(*it); + } + } else { + // Handles both the ALTREP and VECSXP cases + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + set_elt(data_, i, static_cast(*it)); + } + } +} + +template +inline r_vector::r_vector(std::initializer_list il) + : cpp11::r_vector(safe[Rf_allocVector](get_sexptype(), il.size())), + capacity_(il.size()) { + auto it = il.begin(); + + // SAFETY: Loop through once outside the `unwind_protect()` to perform the + // validation that might `throw`. + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + SEXP value = it->value(); + valid_type(value); + valid_length(value, 1); + } + + unwind_protect([&] { + SEXP names = Rf_allocVector(STRSXP, capacity_); + Rf_setAttrib(data_, R_NamesSymbol, names); + + auto it = il.begin(); + + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + SEXP value = it->value(); + + // SAFETY: We've validated type and length ahead of this. + const underlying_type elt = get_elt(value, 0); + + // TODO: The equivalent ctor from `initializer_list` has a specialization + // for `` to translate `elt` to UTF-8 before assigning. Should we have + // that here too? `named_arg` doesn't do any checking here. + if (data_p_ != nullptr) { + data_p_[i] = elt; + } else { + // Handles STRSXP case. VECSXP case has its own specialization. + // We don't expect any ALTREP cases since we just freshly allocated `data_`. + set_elt(data_, i, elt); + } + + SEXP name = Rf_mkCharCE(it->name(), CE_UTF8); + SET_STRING_ELT(names, i, name); + } + }); +} + +template +inline r_vector::r_vector(const R_xlen_t size) : r_vector() { + resize(size); +} + +template +template +inline r_vector::r_vector(Iter first, Iter last) : r_vector() { + reserve(last - first); + while (first != last) { + push_back(*first); + ++first; + } +} + +template +template +inline r_vector::r_vector(const V& obj) : r_vector() { + auto first = obj.begin(); + auto last = obj.end(); + reserve(last - first); + while (first != last) { + push_back(*first); + ++first; + } +} + +template +inline r_vector& r_vector::operator=(const r_vector& rhs) { + if (data_ == rhs.data_) { + return *this; + } + + // We don't release the old object until the end in case we throw an exception + // during the duplicate. + SEXP old_protect = protect_; + + // Unlike with move assignment operator, we can't just call the read only parent method. + // We are in writable mode, so we must duplicate the `rhs` (since it isn't a temporary + // we can just take ownership of) and recompute the properties from the duplicate. + data_ = safe[Rf_shallow_duplicate](rhs.data_); + protect_ = detail::store::insert(data_); + is_altrep_ = ALTREP(data_); + data_p_ = (data_ == R_NilValue) ? nullptr : get_p(is_altrep_, data_); + length_ = rhs.length_; + capacity_ = rhs.capacity_; + + detail::store::release(old_protect); + + return *this; +} + +template +inline r_vector& r_vector::operator=(r_vector&& rhs) { + if (data_ == rhs.data_) { + return *this; + } + + // Call parent read only move assignment operator to move + // all other properties, including protection handling + cpp11::r_vector::operator=(std::move(rhs)); + + // Handle fields specific to writable + capacity_ = rhs.capacity_; + + rhs.capacity_ = 0; + + return *this; +} + +template +inline r_vector::operator SEXP() const { + // Throwing away the const-ness is a bit gross, but we only modify + // internal details here, and updating the internal data after we resize allows + // statements like `Rf_setAttrib(, name, value)` to make sense, where + // people expect that the SEXP inside the `` gets the updated attribute. + auto* p = const_cast*>(this); + + if (data_ == R_NilValue) { + // Specially call out the `NULL` case, which can occur if immediately + // returning a default constructed writable `r_vector` as a `SEXP`. + p->resize(0); + return data_; + } + + if (length_ < capacity_) { + // Truncate the vector to its `length_`. This unfortunately typically forces + // an allocation if the user has called `push_back()` on a writable + // `r_vector`. Importantly, going through `resize()` updates: `data_` and + // protection of it, `data_p_`, and `capacity_`. + p->resize(length_); + return data_; + } + + return data_; +} + +#ifdef LONG_VECTOR_SUPPORT +template +inline typename r_vector::proxy r_vector::operator[](const int pos) const { + return operator[](static_cast(pos)); +} +#endif + +template +inline typename r_vector::proxy r_vector::operator[](const R_xlen_t pos) const { + if (is_altrep_) { + return {data_, pos, nullptr, true}; + } + return {data_, pos, data_p_ != nullptr ? &data_p_[pos] : nullptr, false}; +} + +template +inline typename r_vector::proxy r_vector::operator[](const size_type pos) const { + return operator[](static_cast(pos)); +} + +template +inline typename r_vector::proxy r_vector::operator[](const r_string& name) const { + SEXP names = PROTECT(this->names()); + R_xlen_t size = Rf_xlength(names); + + for (R_xlen_t pos = 0; pos < size; ++pos) { + auto cur = Rf_translateCharUTF8(STRING_ELT(names, pos)); + if (name == cur) { + UNPROTECT(1); + return operator[](pos); + } + } + + UNPROTECT(1); + throw std::out_of_range("r_vector"); +} + +#ifdef LONG_VECTOR_SUPPORT +template +inline typename r_vector::proxy r_vector::at(const int pos) const { + return at(static_cast(pos)); +} +#endif + +template +inline typename r_vector::proxy r_vector::at(const R_xlen_t pos) const { + if (pos < 0 || pos >= length_) { + throw std::out_of_range("r_vector"); + } + return operator[](static_cast(pos)); +} + +template +inline typename r_vector::proxy r_vector::at(const size_type pos) const { + return at(static_cast(pos)); +} + +template +inline typename r_vector::proxy r_vector::at(const r_string& name) const { + return operator[](name); +} + +template +inline void r_vector::push_back(T value) { + while (length_ >= capacity_) { + reserve(capacity_ == 0 ? 1 : capacity_ *= 2); + } + + if (data_p_ != nullptr) { + data_p_[length_] = static_cast(value); + } else { + set_elt(data_, length_, static_cast(value)); + } + + ++length_; +} + +template +inline void r_vector::pop_back() { + --length_; +} + +template +inline void r_vector::resize(R_xlen_t count) { + reserve(count); + length_ = count; +} + +/// Reserve a new capacity and copy all elements over +/// +/// SAFETY: The new capacity is allowed to be smaller than the current capacity, which +/// is used in the `SEXP` conversion operator during truncation, but if that occurs then +/// we also need to update the `length_`, so if you need to truncate then you should call +/// `resize()` instead. +template +inline void r_vector::reserve(R_xlen_t new_capacity) { + SEXP old_protect = protect_; + + data_ = (data_ == R_NilValue) ? safe[Rf_allocVector](get_sexptype(), new_capacity) + : reserve_data(data_, is_altrep_, new_capacity); + protect_ = detail::store::insert(data_); + is_altrep_ = ALTREP(data_); + data_p_ = get_p(is_altrep_, data_); + capacity_ = new_capacity; + + detail::store::release(old_protect); +} + +template +inline typename r_vector::iterator r_vector::insert(R_xlen_t pos, T value) { + push_back(value); + + R_xlen_t i = length_ - 1; + while (i > pos) { + operator[](i) = (T) operator[](i - 1); + --i; + }; + operator[](pos) = value; + + return begin() + pos; +} + +template +inline typename r_vector::iterator r_vector::erase(R_xlen_t pos) { + R_xlen_t i = pos; + while (i < length_ - 1) { + operator[](i) = (T) operator[](i + 1); + ++i; + } + pop_back(); + + return begin() + pos; +} + +template +inline void r_vector::clear() { + length_ = 0; +} + +template +inline typename r_vector::iterator r_vector::begin() const { + return iterator(this, 0); +} + +template +inline typename r_vector::iterator r_vector::end() const { + return iterator(this, length_); +} + +template +inline typename r_vector::iterator r_vector::find(const r_string& name) const { + SEXP names = PROTECT(this->names()); + R_xlen_t size = Rf_xlength(names); + + for (R_xlen_t pos = 0; pos < size; ++pos) { + auto cur = Rf_translateCharUTF8(STRING_ELT(names, pos)); + if (name == cur) { + UNPROTECT(1); + return begin() + pos; + } + } + + UNPROTECT(1); + return end(); +} + +template +inline attribute_proxy> r_vector::attr(const char* name) const { + return attribute_proxy>(*this, name); +} + +template +inline attribute_proxy> r_vector::attr(const std::string& name) const { + return attribute_proxy>(*this, name.c_str()); +} + +template +inline attribute_proxy> r_vector::attr(SEXP name) const { + return attribute_proxy>(*this, name); +} + +template +inline attribute_proxy> r_vector::names() const { + return attribute_proxy>(*this, R_NamesSymbol); +} + +template +r_vector::proxy::proxy(SEXP data, const R_xlen_t index, + typename r_vector::underlying_type* const p, bool is_altrep) + : data_(data), index_(index), p_(p), is_altrep_(is_altrep) {} + +template +inline typename r_vector::proxy& r_vector::proxy::operator=(const proxy& rhs) { + const underlying_type elt = rhs.get(); + set(elt); + return *this; +} + +template +inline typename r_vector::proxy& r_vector::proxy::operator=(const T& rhs) { + const underlying_type elt = static_cast(rhs); + set(elt); + return *this; +} + +template +inline typename r_vector::proxy& r_vector::proxy::operator+=(const T& rhs) { + operator=(static_cast(*this) + rhs); + return *this; +} + +template +inline typename r_vector::proxy& r_vector::proxy::operator-=(const T& rhs) { + operator=(static_cast(*this) - rhs); + return *this; +} + +template +inline typename r_vector::proxy& r_vector::proxy::operator*=(const T& rhs) { + operator=(static_cast(*this) * rhs); + return *this; +} + +template +inline typename r_vector::proxy& r_vector::proxy::operator/=(const T& rhs) { + operator=(static_cast(*this) / rhs); + return *this; +} + +template +inline typename r_vector::proxy& r_vector::proxy::operator++(int) { + operator=(static_cast(*this) + 1); + return *this; +} + +template +inline typename r_vector::proxy& r_vector::proxy::operator--(int) { + operator=(static_cast(*this) - 1); + return *this; +} + +template +inline void r_vector::proxy::operator++() { + operator=(static_cast(*this) + 1); +} + +template +inline void r_vector::proxy::operator--() { + operator=(static_cast(*this) - 1); +} + +template +inline r_vector::proxy::operator T() const { + const underlying_type elt = get(); + return static_cast(elt); +} + +template +inline typename r_vector::underlying_type r_vector::proxy::get() const { + if (p_ != nullptr) { + return *p_; + } else { + // Handles ALTREP, VECSXP, and STRSXP cases + return r_vector::get_elt(data_, index_); + } +} + +template +inline void r_vector::proxy::set(typename r_vector::underlying_type x) { + if (p_ != nullptr) { + *p_ = x; + } else { + // Handles ALTREP, VECSXP, and STRSXP cases + set_elt(data_, index_, x); + } +} + +template +r_vector::iterator::iterator(const r_vector* data, R_xlen_t pos) + : r_vector::const_iterator(data, pos) {} + +template +inline typename r_vector::iterator& r_vector::iterator::operator++() { + ++pos_; + if (use_buf(data_->is_altrep()) && pos_ >= block_start_ + length_) { + fill_buf(pos_); + } + return *this; +} + +template +inline typename r_vector::proxy r_vector::iterator::operator*() const { + if (use_buf(data_->is_altrep())) { + return proxy( + data_->data(), pos_, + const_cast(&buf_[pos_ - block_start_]), + true); + } else { + return proxy(data_->data(), pos_, + data_->data_p_ != nullptr ? &data_->data_p_[pos_] : nullptr, false); + } +} + +template +inline typename r_vector::iterator& r_vector::iterator::operator+=(R_xlen_t rhs) { + pos_ += rhs; + if (use_buf(data_->is_altrep()) && pos_ >= block_start_ + length_) { + fill_buf(pos_); + } + return *this; +} + +template +inline typename r_vector::iterator r_vector::iterator::operator+(R_xlen_t rhs) { + auto it = *this; + it += rhs; + return it; +} + +/// Compared to `Rf_xlengthgets()`: +/// - This copies over attributes with `Rf_copyMostAttrib()`, which is important when we +/// truncate right before returning from the `SEXP` operator. +/// - This always allocates, even if it is the same size. +/// - This is more friendly to ALTREP `x`. +/// +/// SAFETY: For use only by `reserve()`! This won't retain the `dim` or `dimnames` +/// attributes (which doesn't make much sense anyways). +template +inline SEXP r_vector::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) { + // Resize core data + SEXP out = PROTECT(resize_data(x, is_altrep, size)); + + // Resize names, if required + // Protection seems needed to make rchk happy + SEXP names = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); + if (names != R_NilValue) { + if (Rf_xlength(names) != size) { + names = resize_names(names, size); + } + Rf_setAttrib(out, R_NamesSymbol, names); + } + + // Copy over "most" attributes, and set OBJECT bit and S4 bit as needed. + // Does not copy over names, dim, or dim names. + // Names are handled already. Dim and dim names should not be applicable, + // as this is a vector. + // Does not look like it would ever error in our use cases, so no `safe[]`. + Rf_copyMostAttrib(x, out); + + // UNPROTECT(2); + return out; +} + +template +inline SEXP r_vector::resize_data(SEXP x, bool is_altrep, R_xlen_t size) { + underlying_type const* v_x = get_const_p(is_altrep, x); + + SEXP out = PROTECT(safe[Rf_allocVector](get_sexptype(), size)); + underlying_type* v_out = get_p(ALTREP(out), out); + + const R_xlen_t x_size = Rf_xlength(x); + const R_xlen_t copy_size = (x_size > size) ? size : x_size; + + // Copy over data from `x` up to `copy_size` (we could be truncating so don't blindly + // copy everything from `x`) + if (v_x != nullptr && v_out != nullptr) { + std::memcpy(v_out, v_x, copy_size * sizeof(underlying_type)); + } else { + // Handles ALTREP `x` with no const pointer, VECSXP, STRSXP + for (R_xlen_t i = 0; i < copy_size; ++i) { + set_elt(out, i, get_elt(x, i)); + } + } + + // UNPROTECT(1); + return out; +} + +template +inline SEXP r_vector::resize_names(SEXP x, R_xlen_t size) { + const SEXP* v_x = STRING_PTR_RO(x); + + SEXP out = PROTECT(safe[Rf_allocVector](STRSXP, size)); + + const R_xlen_t x_size = Rf_xlength(x); + const R_xlen_t copy_size = (x_size > size) ? size : x_size; + + for (R_xlen_t i = 0; i < copy_size; ++i) { + SET_STRING_ELT(out, i, v_x[i]); + } + + // Ensure remaining names are initialized to `""` + for (R_xlen_t i = copy_size; i < size; ++i) { + SET_STRING_ELT(out, i, R_BlankString); + } + + // UNPROTECT(1); + return out; +} + +} // namespace writable + +// TODO: is there a better condition we could use, e.g. assert something true +// rather than three things false? +template +using is_container_but_not_sexp_or_string = typename std::enable_if< + !std::is_constructible::value && + !std::is_same::type, std::string>::value && + !std::is_same::type, std::string>::value, + typename std::decay::type>::type; + +template ::type::value_type> +// typename T = typename C::value_type> +is_container_but_not_sexp_or_string as_cpp(SEXP from) { + auto obj = cpp11::r_vector(from); + return {obj.begin(), obj.end()}; +} + +// TODO: could we make this generalize outside of std::string? +template +using is_vector_of_strings = typename std::enable_if< + std::is_same::type, std::string>::value, + typename std::decay::type>::type; + +template ::type::value_type> +// typename T = typename C::value_type> +is_vector_of_strings as_cpp(SEXP from) { + auto obj = cpp11::r_vector(from); + typename std::decay::type res; + auto it = obj.begin(); + while (it != obj.end()) { + r_string s = *it; + res.emplace_back(static_cast(s)); + ++it; + } + return res; +} + +template +bool operator==(const r_vector& lhs, const r_vector& rhs) { + if (lhs.size() != rhs.size()) { + return false; + } + + auto lhs_it = lhs.begin(); + auto rhs_it = rhs.begin(); + + auto end = lhs.end(); + while (lhs_it != end) { + if (!(*lhs_it == *rhs_it)) { + return false; + } + ++lhs_it; + ++rhs_it; + } + return true; +} + +template +bool operator!=(const r_vector& lhs, const r_vector& rhs) { + return !(lhs == rhs); +} + +} // namespace cpp11 diff --git a/inst/include/cpp11/raws.hpp b/inst/include/cpp11/raws.hpp new file mode 100644 index 0000000..a1e8322 --- /dev/null +++ b/inst/include/cpp11/raws.hpp @@ -0,0 +1,87 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for min +#include // for array +#include // for uint8_t +#include // for initializer_list + +#include "Rversion.h" +#include "cpp11/R.hpp" // for RAW, SEXP, SEXPREC, Rf_allocVector +#include "cpp11/attribute_proxy.hpp" // for attribute_proxy +#include "cpp11/protect.hpp" // for safe +#include "cpp11/r_vector.hpp" // for r_vector, r_vector<>::proxy +#include "cpp11/sexp.hpp" // for sexp + +// Specializations for raws + +namespace cpp11 { + +namespace traits { +template <> +struct get_underlying_type { + using type = Rbyte; +}; +} // namespace traits + +template <> +inline SEXPTYPE r_vector::get_sexptype() { + return RAWSXP; +} + +template <> +inline typename r_vector::underlying_type r_vector::get_elt( + SEXP x, R_xlen_t i) { + // NOPROTECT: likely too costly to unwind protect every elt + return RAW_ELT(x, i); +} + +template <> +inline typename r_vector::underlying_type const* r_vector::get_const_p( + bool is_altrep, SEXP data) { + return RAW_OR_NULL(data); +} + +template <> +inline typename r_vector::underlying_type* r_vector::get_p( + bool is_altrep, SEXP data) { + if (is_altrep) { + return nullptr; + } else { + return RAW(data); + } +} + +template <> +inline void r_vector::get_region(SEXP x, R_xlen_t i, R_xlen_t n, + typename r_vector::underlying_type* buf) { + // NOPROTECT: likely too costly to unwind protect here + RAW_GET_REGION(x, i, n, buf); +} + +template <> +inline bool r_vector::const_iterator::use_buf(bool is_altrep) { + return is_altrep; +} + +typedef r_vector raws; + +namespace writable { + +template <> +inline void r_vector::set_elt(SEXP x, R_xlen_t i, + typename r_vector::underlying_type value) { + // NOPROTECT: Likely too costly to unwind protect every set elt +#if R_VERSION >= R_Version(4, 2, 0) + SET_RAW_ELT(x, i, value); +#else + RAW(x)[i] = value; +#endif +} + +typedef r_vector raws; + +} // namespace writable + +} // namespace cpp11 diff --git a/inst/include/cpp11/sexp.hpp b/inst/include/cpp11/sexp.hpp new file mode 100644 index 0000000..0de73b8 --- /dev/null +++ b/inst/include/cpp11/sexp.hpp @@ -0,0 +1,80 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for size_t + +#include // for string, basic_string + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, REAL_ELT, R_NilV... +#include "cpp11/attribute_proxy.hpp" // for attribute_proxy +#include "cpp11/protect.hpp" // for store + +namespace cpp11 { + +/// Converting to SEXP +class sexp { + private: + SEXP data_ = R_NilValue; + SEXP preserve_token_ = R_NilValue; + + public: + sexp() = default; + + sexp(SEXP data) : data_(data), preserve_token_(detail::store::insert(data_)) {} + + // We maintain our own new `preserve_token_` + sexp(const sexp& rhs) { + data_ = rhs.data_; + preserve_token_ = detail::store::insert(data_); + } + + // We take ownership over the `rhs.preserve_token_`. + // Importantly we clear it in the `rhs` so it can't release the object upon destruction. + sexp(sexp&& rhs) { + data_ = rhs.data_; + preserve_token_ = rhs.preserve_token_; + + rhs.data_ = R_NilValue; + rhs.preserve_token_ = R_NilValue; + } + + sexp& operator=(const sexp& rhs) { + detail::store::release(preserve_token_); + + data_ = rhs.data_; + preserve_token_ = detail::store::insert(data_); + + return *this; + } + + ~sexp() { detail::store::release(preserve_token_); } + + attribute_proxy attr(const char* name) const { + return attribute_proxy(*this, name); + } + + attribute_proxy attr(const std::string& name) const { + return attribute_proxy(*this, name.c_str()); + } + + attribute_proxy attr(SEXP name) const { + return attribute_proxy(*this, name); + } + + attribute_proxy names() const { + return attribute_proxy(*this, R_NamesSymbol); + } + + operator SEXP() const { return data_; } + SEXP data() const { return data_; } + + /// DEPRECATED: Do not use this, it will be removed soon. + operator double() const { return REAL_ELT(data_, 0); } + /// DEPRECATED: Do not use this, it will be removed soon. + operator size_t() const { return REAL_ELT(data_, 0); } + /// DEPRECATED: Do not use this, it will be removed soon. + operator bool() const { return LOGICAL_ELT(data_, 0); } +}; + +} // namespace cpp11 diff --git a/inst/include/cpp11/strings.hpp b/inst/include/cpp11/strings.hpp new file mode 100644 index 0000000..b6b1f09 --- /dev/null +++ b/inst/include/cpp11/strings.hpp @@ -0,0 +1,150 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +#pragma once + +#include // for initializer_list +#include // for string, basic_string + +#include "cpp11/R.hpp" // for SEXP, SEXPREC, SET_STRI... +#include "cpp11/as.hpp" // for as_sexp +#include "cpp11/attribute_proxy.hpp" // for attribute_proxy +#include "cpp11/named_arg.hpp" // for named_arg +#include "cpp11/protect.hpp" // for safe +#include "cpp11/r_string.hpp" // for r_string +#include "cpp11/r_vector.hpp" // for r_vector, r_vector<>::proxy +#include "cpp11/sexp.hpp" // for sexp + +// Specializations for strings + +namespace cpp11 { + +template <> +inline SEXPTYPE r_vector::get_sexptype() { + return STRSXP; +} + +template <> +inline typename r_vector::underlying_type r_vector::get_elt( + SEXP x, R_xlen_t i) { + // NOPROTECT: likely too costly to unwind protect every elt + return STRING_ELT(x, i); +} + +template <> +inline typename r_vector::underlying_type* r_vector::get_p(bool, + SEXP) { + return nullptr; +} + +template <> +inline typename r_vector::underlying_type const* +r_vector::get_const_p(bool is_altrep, SEXP data) { + // No `STRING_PTR_OR_NULL()` + if (is_altrep) { + return nullptr; + } else { + return STRING_PTR_RO(data); + } +} + +template <> +inline void r_vector::get_region(SEXP x, R_xlen_t i, R_xlen_t n, + typename r_vector::underlying_type* buf) { + cpp11::stop("Unreachable!"); +} + +template <> +inline bool r_vector::const_iterator::use_buf(bool is_altrep) { + return false; +} + +typedef r_vector strings; + +namespace writable { + +template <> +inline void r_vector::set_elt(SEXP x, R_xlen_t i, + typename r_vector::underlying_type value) { + // NOPROTECT: Likely too costly to unwind protect every set elt + SET_STRING_ELT(x, i, value); +} + +inline bool operator==(const r_vector::proxy& lhs, r_string rhs) { + return static_cast(lhs).operator==(static_cast(rhs).c_str()); +} + +inline SEXP alloc_or_copy(const SEXP data) { + switch (detail::r_typeof(data)) { + case CHARSXP: + return cpp11::r_vector(safe[Rf_allocVector](STRSXP, 1)); + case STRSXP: + return safe[Rf_shallow_duplicate](data); + default: + throw type_error(STRSXP, detail::r_typeof(data)); + } +} + +inline SEXP alloc_if_charsxp(const SEXP data) { + switch (detail::r_typeof(data)) { + case CHARSXP: + return cpp11::r_vector(safe[Rf_allocVector](STRSXP, 1)); + case STRSXP: + return data; + default: + throw type_error(STRSXP, detail::r_typeof(data)); + } +} + +template <> +inline r_vector::r_vector(const SEXP& data) + : cpp11::r_vector(alloc_or_copy(data)), capacity_(length_) { + if (detail::r_typeof(data) == CHARSXP) { + SET_STRING_ELT(data_, 0, data); + } +} + +template <> +inline r_vector::r_vector(SEXP&& data) + : cpp11::r_vector(alloc_if_charsxp(data)), capacity_(length_) { + if (detail::r_typeof(data) == CHARSXP) { + SET_STRING_ELT(data_, 0, data); + } +} + +// Requires specialization to handle `NA_STRING` and UTF-8 translation +template <> +inline r_vector::r_vector(std::initializer_list il) + : cpp11::r_vector(safe[Rf_allocVector](STRSXP, il.size())), + capacity_(il.size()) { + unwind_protect([&] { + auto it = il.begin(); + + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + // i.e. to `SEXP` + underlying_type elt = static_cast(*it); + + if (elt == NA_STRING) { + set_elt(data_, i, elt); + } else { + set_elt(data_, i, Rf_mkCharCE(Rf_translateCharUTF8(elt), CE_UTF8)); + } + } + }); +} + +typedef r_vector strings; + +template +inline void r_vector::push_back(const named_arg& value) { + push_back(value.value()); + if (Rf_xlength(names()) == 0) { + cpp11::writable::strings new_nms(size()); + names() = new_nms; + } + cpp11::writable::strings nms(names()); + nms[size() - 1] = value.name(); +} + +} // namespace writable + +} // namespace cpp11 From a75f40913e2beac06ef25f8f1f4c2b04178c8a54 Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Thu, 8 May 2025 00:18:17 +0200 Subject: [PATCH 03/21] Add inst/include to search path. --- src/Makevars | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Makevars b/src/Makevars index 139b1d1..50415b8 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1,2 +1,3 @@ PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) +PKG_CPPFLAGS = -I../inst/include From 7b27499af0cff052d09cea9c528d6bcd0d506104 Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Fri, 9 May 2025 10:24:12 +0200 Subject: [PATCH 04/21] Put back the UNPROTECT but cast writable doubles to doubles. --- inst/include/cpp11/protect.hpp | 2 +- inst/include/cpp11/r_vector.hpp | 6 +++--- src/bottleneck.cpp | 2 +- src/wasserstein.cpp | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/inst/include/cpp11/protect.hpp b/inst/include/cpp11/protect.hpp index 0da6f62..f97e11d 100644 --- a/inst/include/cpp11/protect.hpp +++ b/inst/include/cpp11/protect.hpp @@ -301,7 +301,7 @@ inline SEXP insert(SEXP x) { SETCDR(head, cell); SETCAR(next, cell); - // UNPROTECT(2); + UNPROTECT(2); return cell; } diff --git a/inst/include/cpp11/r_vector.hpp b/inst/include/cpp11/r_vector.hpp index e028929..cdfb391 100644 --- a/inst/include/cpp11/r_vector.hpp +++ b/inst/include/cpp11/r_vector.hpp @@ -1341,7 +1341,7 @@ inline SEXP r_vector::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) { // Does not look like it would ever error in our use cases, so no `safe[]`. Rf_copyMostAttrib(x, out); - // UNPROTECT(2); + UNPROTECT(2); return out; } @@ -1366,7 +1366,7 @@ inline SEXP r_vector::resize_data(SEXP x, bool is_altrep, R_xlen_t size) { } } - // UNPROTECT(1); + UNPROTECT(1); return out; } @@ -1388,7 +1388,7 @@ inline SEXP r_vector::resize_names(SEXP x, R_xlen_t size) { SET_STRING_ELT(out, i, R_BlankString); } - // UNPROTECT(1); + UNPROTECT(1); return out; } diff --git a/src/bottleneck.cpp b/src/bottleneck.cpp index cdbe88a..bef7d2f 100644 --- a/src/bottleneck.cpp +++ b/src/bottleneck.cpp @@ -67,5 +67,5 @@ cpp11::doubles bottleneckPairwiseDistances(const cpp11::list& x, result[k] = bottleneckDist(pairs[i], pairs[j], delta); } - return result; + return cpp11::as_doubles(result); } diff --git a/src/wasserstein.cpp b/src/wasserstein.cpp index 11738c1..0a42b91 100644 --- a/src/wasserstein.cpp +++ b/src/wasserstein.cpp @@ -98,5 +98,5 @@ cpp11::doubles wassersteinPairwiseDistances(const cpp11::list& x, result[k] = wassersteinDist(pairs[i], pairs[j], wasserstein_power, delta); } - return result; + return cpp11::as_doubles(result); } From 47184464e39075a56c9c547cf9d1d9a5b1944a4a Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Fri, 9 May 2025 12:26:01 +0200 Subject: [PATCH 05/21] Try initializing writable doubles with resize. --- src/bottleneck.cpp | 5 +++-- src/wasserstein.cpp | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/bottleneck.cpp b/src/bottleneck.cpp index bef7d2f..f658aef 100644 --- a/src/bottleneck.cpp +++ b/src/bottleneck.cpp @@ -48,7 +48,8 @@ cpp11::doubles bottleneckPairwiseDistances(const cpp11::list& x, { unsigned int N = x.size(); unsigned int K = N * (N - 1) / 2; - cpp11::writable::doubles result(K); + cpp11::writable::doubles result; + result.resize(K); std::vector pairs(N); for (int n = 0;n < N;++n) @@ -67,5 +68,5 @@ cpp11::doubles bottleneckPairwiseDistances(const cpp11::list& x, result[k] = bottleneckDist(pairs[i], pairs[j], delta); } - return cpp11::as_doubles(result); + return result; } diff --git a/src/wasserstein.cpp b/src/wasserstein.cpp index 0a42b91..fd1e472 100644 --- a/src/wasserstein.cpp +++ b/src/wasserstein.cpp @@ -79,7 +79,8 @@ cpp11::doubles wassersteinPairwiseDistances(const cpp11::list& x, { unsigned int N = x.size(); unsigned int K = N * (N - 1) / 2; - cpp11::writable::doubles result(K); + cpp11::writable::doubles result; + result.resize(K); std::vector pairs(N); for (int n = 0;n < N;++n) @@ -98,5 +99,5 @@ cpp11::doubles wassersteinPairwiseDistances(const cpp11::list& x, result[k] = wassersteinDist(pairs[i], pairs[j], wasserstein_power, delta); } - return cpp11::as_doubles(result); + return result; } From 60cf6fe4f7b2dd58aed7e661983c2d9ff9d28544 Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Fri, 9 May 2025 12:44:18 +0200 Subject: [PATCH 06/21] Affect call to protect. --- inst/include/cpp11/protect.hpp | 4 ++-- src/bottleneck.cpp | 3 +-- src/wasserstein.cpp | 3 +-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/inst/include/cpp11/protect.hpp b/inst/include/cpp11/protect.hpp index f97e11d..4e0fe42 100644 --- a/inst/include/cpp11/protect.hpp +++ b/inst/include/cpp11/protect.hpp @@ -283,7 +283,7 @@ inline SEXP insert(SEXP x) { return R_NilValue; } - PROTECT(x); + SEXP xp = PROTECT(x); SEXP list = get(); @@ -294,7 +294,7 @@ inline SEXP insert(SEXP x) { // Add a new cell that points to the current head + next. SEXP cell = PROTECT(Rf_cons(head, next)); - SET_TAG(cell, x); + SET_TAG(cell, xp); // Update the head + next to point at the newly-created cell, // effectively inserting that cell between the current head + next. diff --git a/src/bottleneck.cpp b/src/bottleneck.cpp index f658aef..cdbe88a 100644 --- a/src/bottleneck.cpp +++ b/src/bottleneck.cpp @@ -48,8 +48,7 @@ cpp11::doubles bottleneckPairwiseDistances(const cpp11::list& x, { unsigned int N = x.size(); unsigned int K = N * (N - 1) / 2; - cpp11::writable::doubles result; - result.resize(K); + cpp11::writable::doubles result(K); std::vector pairs(N); for (int n = 0;n < N;++n) diff --git a/src/wasserstein.cpp b/src/wasserstein.cpp index fd1e472..11738c1 100644 --- a/src/wasserstein.cpp +++ b/src/wasserstein.cpp @@ -79,8 +79,7 @@ cpp11::doubles wassersteinPairwiseDistances(const cpp11::list& x, { unsigned int N = x.size(); unsigned int K = N * (N - 1) / 2; - cpp11::writable::doubles result; - result.resize(K); + cpp11::writable::doubles result(K); std::vector pairs(N); for (int n = 0;n < N;++n) From 31cb43d169521af8b935eee5ebcc9724f791f781 Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Fri, 9 May 2025 13:25:06 +0200 Subject: [PATCH 07/21] Tentative fix protection issues in insert. --- inst/include/cpp11/protect.hpp | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/inst/include/cpp11/protect.hpp b/inst/include/cpp11/protect.hpp index 4e0fe42..ef2c654 100644 --- a/inst/include/cpp11/protect.hpp +++ b/inst/include/cpp11/protect.hpp @@ -278,34 +278,39 @@ inline R_xlen_t count() { return Rf_xlength(list) - head - tail; } +// Let me help analyze the protection issues in the `insert` function. The key is to ensure that all intermediate objects are protected before being used in other functions that might allocate memory and trigger garbage collection. + +// Here's the corrected version: + inline SEXP insert(SEXP x) { if (x == R_NilValue) { return R_NilValue; } - SEXP xp = PROTECT(x); - SEXP list = get(); + PROTECT(list); // Protect the list while we manipulate it // Get references to the head of the preserve list and the next element - // after the head SEXP head = list; SEXP next = CDR(list); - // Add a new cell that points to the current head + next. + // Create and protect the new cell SEXP cell = PROTECT(Rf_cons(head, next)); - SET_TAG(cell, xp); + SET_TAG(cell, x); - // Update the head + next to point at the newly-created cell, - // effectively inserting that cell between the current head + next. + // Update the list structure SETCDR(head, cell); SETCAR(next, cell); - UNPROTECT(2); - + UNPROTECT(2); // Unprotect list and cell return cell; } +// The key changes are: +// 1. Adding protection for `list` since we're using it in multiple operations +// 2. Removing the initial PROTECT(x) as it's not necessary (x is already protected by the caller and we're only using it in SET_TAG) +// 3. Maintaining proper PROTECT/UNPROTECT balance + inline void release(SEXP cell) { if (cell == R_NilValue) { return; From aa8da9eee8d935a815caf6c3d916d11e7b6033b1 Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Fri, 9 May 2025 14:54:20 +0200 Subject: [PATCH 08/21] Remove protection stack manipulation from insert(). --- inst/include/cpp11/protect.hpp | 50 ++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/inst/include/cpp11/protect.hpp b/inst/include/cpp11/protect.hpp index ef2c654..0a6006a 100644 --- a/inst/include/cpp11/protect.hpp +++ b/inst/include/cpp11/protect.hpp @@ -282,34 +282,68 @@ inline R_xlen_t count() { // Here's the corrected version: +// inline SEXP insert(SEXP x) { +// if (x == R_NilValue) { +// return R_NilValue; +// } +// +// SEXP list = get(); +// PROTECT(list); // Protect the list while we manipulate it +// +// // Get references to the head of the preserve list and the next element +// SEXP head = list; +// SEXP next = CDR(list); +// +// // Create and protect the new cell +// SEXP cell = PROTECT(Rf_cons(head, next)); +// SET_TAG(cell, x); +// +// // Update the list structure +// SETCDR(head, cell); +// SETCAR(next, cell); +// +// UNPROTECT(2); // Unprotect list and cell +// return cell; +// } + +// The key changes are: +// 1. Adding protection for `list` since we're using it in multiple operations +// 2. Removing the initial PROTECT(x) as it's not necessary (x is already +// protected by the caller and we're only using it in SET_TAG) +// 3. Maintaining proper PROTECT/UNPROTECT balance + +// Based on the error messages and the code context, the issue is with the +// protection stack management in the `insert` function. Here's the corrected +// version: + inline SEXP insert(SEXP x) { if (x == R_NilValue) { return R_NilValue; } SEXP list = get(); - PROTECT(list); // Protect the list while we manipulate it // Get references to the head of the preserve list and the next element SEXP head = list; SEXP next = CDR(list); - // Create and protect the new cell - SEXP cell = PROTECT(Rf_cons(head, next)); + // Create the new cell + SEXP cell = Rf_cons(head, next); SET_TAG(cell, x); // Update the list structure SETCDR(head, cell); SETCAR(next, cell); - UNPROTECT(2); // Unprotect list and cell return cell; } -// The key changes are: -// 1. Adding protection for `list` since we're using it in multiple operations -// 2. Removing the initial PROTECT(x) as it's not necessary (x is already protected by the caller and we're only using it in SET_TAG) -// 3. Maintaining proper PROTECT/UNPROTECT balance +// The key changes: +// 1. Removed PROTECT/UNPROTECT calls since: +// - `list` from `get()` is already preserved +// - The cons cell we create is part of the preserve list structure +// - The input `x` is protected by the caller +// 2. Simplified the function to avoid protection stack manipulation inline void release(SEXP cell) { if (cell == R_NilValue) { From ec912e1e44480e97c50249bbd4b5c628d29f9357 Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Fri, 9 May 2025 15:03:03 +0200 Subject: [PATCH 09/21] Fix protection stack manipulation in reserve_data(). --- inst/include/cpp11/r_vector.hpp | 58 ++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/inst/include/cpp11/r_vector.hpp b/inst/include/cpp11/r_vector.hpp index cdfb391..7232e08 100644 --- a/inst/include/cpp11/r_vector.hpp +++ b/inst/include/cpp11/r_vector.hpp @@ -1319,32 +1319,66 @@ inline typename r_vector::iterator r_vector::iterator::operator+(R_xlen_t /// /// SAFETY: For use only by `reserve()`! This won't retain the `dim` or `dimnames` /// attributes (which doesn't make much sense anyways). +// template +// inline SEXP r_vector::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) { +// // Resize core data +// SEXP out = PROTECT(resize_data(x, is_altrep, size)); +// +// // Resize names, if required +// // Protection seems needed to make rchk happy +// SEXP names = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); +// if (names != R_NilValue) { +// if (Rf_xlength(names) != size) { +// names = resize_names(names, size); +// } +// Rf_setAttrib(out, R_NamesSymbol, names); +// } +// +// // Copy over "most" attributes, and set OBJECT bit and S4 bit as needed. +// // Does not copy over names, dim, or dim names. +// // Names are handled already. Dim and dim names should not be applicable, +// // as this is a vector. +// // Does not look like it would ever error in our use cases, so no `safe[]`. +// Rf_copyMostAttrib(x, out); +// +// UNPROTECT(2); +// return out; +// } + +// Based on the context and error message, here's a corrected version of the `reserve_data()` function that properly manages the protection stack: + template inline SEXP r_vector::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) { - // Resize core data SEXP out = PROTECT(resize_data(x, is_altrep, size)); - // Resize names, if required - // Protection seems needed to make rchk happy - SEXP names = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); + SEXP names = Rf_getAttrib(x, R_NamesSymbol); if (names != R_NilValue) { + PROTECT(names); if (Rf_xlength(names) != size) { - names = resize_names(names, size); + SEXP new_names = resize_names(names, size); + Rf_setAttrib(out, R_NamesSymbol, new_names); + } else { + Rf_setAttrib(out, R_NamesSymbol, names); } - Rf_setAttrib(out, R_NamesSymbol, names); + UNPROTECT(1); // unprotect names } - // Copy over "most" attributes, and set OBJECT bit and S4 bit as needed. - // Does not copy over names, dim, or dim names. - // Names are handled already. Dim and dim names should not be applicable, - // as this is a vector. - // Does not look like it would ever error in our use cases, so no `safe[]`. Rf_copyMostAttrib(x, out); - UNPROTECT(2); + UNPROTECT(1); // unprotect out return out; } +// The key changes are: +// 1. Only protecting `names` if it exists (not R_NilValue) +// 2. Properly matching PROTECT/UNPROTECT counts +// 3. Removing redundant PROTECT on the resize_names result since it's already +// protected within that function +// 4. Simplifying the control flow to make protection status clearer +// +// This should resolve the protection stack imbalance issues while maintaining +// the same functionality. + template inline SEXP r_vector::resize_data(SEXP x, bool is_altrep, R_xlen_t size) { underlying_type const* v_x = get_const_p(is_altrep, x); From 55ace14d880126708af910bad4f98b385adb49c6 Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Fri, 9 May 2025 15:45:39 +0200 Subject: [PATCH 10/21] Fix resize_names(). --- inst/include/cpp11/r_vector.hpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/inst/include/cpp11/r_vector.hpp b/inst/include/cpp11/r_vector.hpp index 7232e08..623f2c2 100644 --- a/inst/include/cpp11/r_vector.hpp +++ b/inst/include/cpp11/r_vector.hpp @@ -1404,20 +1404,20 @@ inline SEXP r_vector::resize_data(SEXP x, bool is_altrep, R_xlen_t size) { return out; } +// Based on the code and environment information provided, I'll modify the `resize_names()` function to properly handle the protection stack and maintain ALTREP compatibility: +// +// ```cpp template inline SEXP r_vector::resize_names(SEXP x, R_xlen_t size) { - const SEXP* v_x = STRING_PTR_RO(x); - SEXP out = PROTECT(safe[Rf_allocVector](STRSXP, size)); const R_xlen_t x_size = Rf_xlength(x); - const R_xlen_t copy_size = (x_size > size) ? size : x_size; + const R_xlen_t copy_size = std::min(x_size, size); for (R_xlen_t i = 0; i < copy_size; ++i) { - SET_STRING_ELT(out, i, v_x[i]); + SET_STRING_ELT(out, i, STRING_ELT(x, i)); } - // Ensure remaining names are initialized to `""` for (R_xlen_t i = copy_size; i < size; ++i) { SET_STRING_ELT(out, i, R_BlankString); } @@ -1425,6 +1425,14 @@ inline SEXP r_vector::resize_names(SEXP x, R_xlen_t size) { UNPROTECT(1); return out; } +// ``` + +// Key changes made: +// 1. Removed direct pointer access via STRING_PTR_RO() in favor of STRING_ELT() +// 2. Simplified protection stack management to one PROTECT/UNPROTECT pair +// 3. Used std::min() for copy_size calculation +// 4. Removed unnecessary intermediate pointer variable +// 5. Maintained ALTREP compatibility by using STRING_ELT() instead of direct pointer access } // namespace writable From 4e6d1390d53e9286a6184a9ac08f2c8e7e0d10d4 Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Fri, 9 May 2025 15:53:08 +0200 Subject: [PATCH 11/21] Fix all three functions together for problems with protection stack manipulations. --- inst/include/cpp11/r_vector.hpp | 76 +++++---------------------------- 1 file changed, 11 insertions(+), 65 deletions(-) diff --git a/inst/include/cpp11/r_vector.hpp b/inst/include/cpp11/r_vector.hpp index 623f2c2..21465af 100644 --- a/inst/include/cpp11/r_vector.hpp +++ b/inst/include/cpp11/r_vector.hpp @@ -1319,120 +1319,66 @@ inline typename r_vector::iterator r_vector::iterator::operator+(R_xlen_t /// /// SAFETY: For use only by `reserve()`! This won't retain the `dim` or `dimnames` /// attributes (which doesn't make much sense anyways). -// template -// inline SEXP r_vector::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) { -// // Resize core data -// SEXP out = PROTECT(resize_data(x, is_altrep, size)); -// -// // Resize names, if required -// // Protection seems needed to make rchk happy -// SEXP names = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); -// if (names != R_NilValue) { -// if (Rf_xlength(names) != size) { -// names = resize_names(names, size); -// } -// Rf_setAttrib(out, R_NamesSymbol, names); -// } -// -// // Copy over "most" attributes, and set OBJECT bit and S4 bit as needed. -// // Does not copy over names, dim, or dim names. -// // Names are handled already. Dim and dim names should not be applicable, -// // as this is a vector. -// // Does not look like it would ever error in our use cases, so no `safe[]`. -// Rf_copyMostAttrib(x, out); -// -// UNPROTECT(2); -// return out; -// } - -// Based on the context and error message, here's a corrected version of the `reserve_data()` function that properly manages the protection stack: - template inline SEXP r_vector::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) { - SEXP out = PROTECT(resize_data(x, is_altrep, size)); + SEXP out; + PROTECT_INDEX idx; + PROTECT_WITH_INDEX(out = resize_data(x, is_altrep, size), &idx); SEXP names = Rf_getAttrib(x, R_NamesSymbol); if (names != R_NilValue) { - PROTECT(names); if (Rf_xlength(names) != size) { - SEXP new_names = resize_names(names, size); - Rf_setAttrib(out, R_NamesSymbol, new_names); - } else { + PROTECT(names = resize_names(names, size)); Rf_setAttrib(out, R_NamesSymbol, names); + UNPROTECT(1); // names } - UNPROTECT(1); // unprotect names } Rf_copyMostAttrib(x, out); - UNPROTECT(1); // unprotect out + UNPROTECT(1); // out return out; } -// The key changes are: -// 1. Only protecting `names` if it exists (not R_NilValue) -// 2. Properly matching PROTECT/UNPROTECT counts -// 3. Removing redundant PROTECT on the resize_names result since it's already -// protected within that function -// 4. Simplifying the control flow to make protection status clearer -// -// This should resolve the protection stack imbalance issues while maintaining -// the same functionality. - template inline SEXP r_vector::resize_data(SEXP x, bool is_altrep, R_xlen_t size) { underlying_type const* v_x = get_const_p(is_altrep, x); + SEXP out = safe[Rf_allocVector](get_sexptype(), size); - SEXP out = PROTECT(safe[Rf_allocVector](get_sexptype(), size)); underlying_type* v_out = get_p(ALTREP(out), out); const R_xlen_t x_size = Rf_xlength(x); - const R_xlen_t copy_size = (x_size > size) ? size : x_size; + const R_xlen_t copy_size = std::min(x_size, size); - // Copy over data from `x` up to `copy_size` (we could be truncating so don't blindly - // copy everything from `x`) if (v_x != nullptr && v_out != nullptr) { std::memcpy(v_out, v_x, copy_size * sizeof(underlying_type)); } else { - // Handles ALTREP `x` with no const pointer, VECSXP, STRSXP for (R_xlen_t i = 0; i < copy_size; ++i) { set_elt(out, i, get_elt(x, i)); } } - UNPROTECT(1); return out; } -// Based on the code and environment information provided, I'll modify the `resize_names()` function to properly handle the protection stack and maintain ALTREP compatibility: -// -// ```cpp template inline SEXP r_vector::resize_names(SEXP x, R_xlen_t size) { - SEXP out = PROTECT(safe[Rf_allocVector](STRSXP, size)); + const SEXP* v_x = STRING_PTR_RO(x); + SEXP out = safe[Rf_allocVector](STRSXP, size); const R_xlen_t x_size = Rf_xlength(x); const R_xlen_t copy_size = std::min(x_size, size); for (R_xlen_t i = 0; i < copy_size; ++i) { - SET_STRING_ELT(out, i, STRING_ELT(x, i)); + SET_STRING_ELT(out, i, v_x[i]); } for (R_xlen_t i = copy_size; i < size; ++i) { SET_STRING_ELT(out, i, R_BlankString); } - UNPROTECT(1); return out; } -// ``` - -// Key changes made: -// 1. Removed direct pointer access via STRING_PTR_RO() in favor of STRING_ELT() -// 2. Simplified protection stack management to one PROTECT/UNPROTECT pair -// 3. Used std::min() for copy_size calculation -// 4. Removed unnecessary intermediate pointer variable -// 5. Maintained ALTREP compatibility by using STRING_ELT() instead of direct pointer access } // namespace writable From 926234594686ec44b87525bdc7554246242421f0 Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Fri, 9 May 2025 16:06:56 +0200 Subject: [PATCH 12/21] Modify also reserve() which calles reserve_data(). --- inst/include/cpp11/r_vector.hpp | 48 ++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/inst/include/cpp11/r_vector.hpp b/inst/include/cpp11/r_vector.hpp index 21465af..f0f9022 100644 --- a/inst/include/cpp11/r_vector.hpp +++ b/inst/include/cpp11/r_vector.hpp @@ -1088,18 +1088,35 @@ inline void r_vector::resize(R_xlen_t count) { /// `resize()` instead. template inline void r_vector::reserve(R_xlen_t new_capacity) { - SEXP old_protect = protect_; + if (new_capacity <= capacity_) return; - data_ = (data_ == R_NilValue) ? safe[Rf_allocVector](get_sexptype(), new_capacity) - : reserve_data(data_, is_altrep_, new_capacity); - protect_ = detail::store::insert(data_); - is_altrep_ = ALTREP(data_); - data_p_ = get_p(is_altrep_, data_); - capacity_ = new_capacity; + SEXP new_data = (data_ == R_NilValue) + ? safe[Rf_allocVector](get_sexptype(), new_capacity) + : reserve_data(data_, is_altrep_, new_capacity); - detail::store::release(old_protect); + PROTECT(new_data); + + SEXP old_protect = protect_; + data_ = new_data; + protect_ = detail::store::insert(data_); + is_altrep_ = ALTREP(data_); + data_p_ = get_p(is_altrep_, data_); + capacity_ = new_capacity; + + detail::store::release(old_protect); + + UNPROTECT(1); } +// The key changes are: +// +// 1. Added early return if no resize needed +// 2. Protected the new data allocation with PROTECT() +// 3. Maintained protection stack balance with matching UNPROTECT() +// 4. Kept the existing old_protect handling for proper cleanup +// +// This should prevent protection stack imbalances while safely managing R object memory. + template inline typename r_vector::iterator r_vector::insert(R_xlen_t pos, T value) { push_back(value); @@ -1321,14 +1338,14 @@ inline typename r_vector::iterator r_vector::iterator::operator+(R_xlen_t /// attributes (which doesn't make much sense anyways). template inline SEXP r_vector::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) { - SEXP out; - PROTECT_INDEX idx; - PROTECT_WITH_INDEX(out = resize_data(x, is_altrep, size), &idx); + SEXP out = resize_data(x, is_altrep, size); + PROTECT(out); SEXP names = Rf_getAttrib(x, R_NamesSymbol); if (names != R_NilValue) { if (Rf_xlength(names) != size) { - PROTECT(names = resize_names(names, size)); + names = resize_names(names, size); + PROTECT(names); Rf_setAttrib(out, R_NamesSymbol, names); UNPROTECT(1); // names } @@ -1340,6 +1357,13 @@ inline SEXP r_vector::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) { return out; } +// The main changes are: +// 1. Removed PROTECT_WITH_INDEX since it wasn't necessary +// 2. Used simple PROTECT/UNPROTECT pattern +// 3. Maintained protection stack balance throughout the function +// +// This should resolve the negative protection stack depth and unprotect imbalance issues. + template inline SEXP r_vector::resize_data(SEXP x, bool is_altrep, R_xlen_t size) { underlying_type const* v_x = get_const_p(is_altrep, x); From 6c7a9173a566af4eb974edf05d4452a2994241c2 Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Fri, 9 May 2025 16:36:49 +0200 Subject: [PATCH 13/21] Protection issues. --- inst/include/cpp11/r_vector.hpp | 290 ++++++++++++++++---------------- 1 file changed, 141 insertions(+), 149 deletions(-) diff --git a/inst/include/cpp11/r_vector.hpp b/inst/include/cpp11/r_vector.hpp index f0f9022..66993a2 100644 --- a/inst/include/cpp11/r_vector.hpp +++ b/inst/include/cpp11/r_vector.hpp @@ -1,5 +1,3 @@ -// cpp11 version: 0.5.2 -// vendored on: 2025-05-07 #pragma once #include // for ptrdiff_t, size_t @@ -35,19 +33,19 @@ class r_vector; // Declarations template class r_vector { - public: +public: // Forward declare class const_iterator; using underlying_type = typename traits::get_underlying_type::type; - private: +private: SEXP data_ = R_NilValue; SEXP protect_ = R_NilValue; bool is_altrep_ = false; underlying_type* data_p_ = nullptr; R_xlen_t length_ = 0; - public: +public: typedef ptrdiff_t difference_type; typedef size_t size_type; typedef T value_type; @@ -110,14 +108,14 @@ class r_vector { // It seems like our iterator doesn't fully implement everything for // `random_access_iterator_tag` (like an `[]` operator, for example). If we discover // issues with it, we probably need to add more methods. - private: + private: const r_vector* data_; R_xlen_t pos_; std::array buf_; R_xlen_t block_start_ = 0; R_xlen_t length_ = 0; - public: + public: using difference_type = ptrdiff_t; using value_type = T; using pointer = T*; @@ -142,13 +140,13 @@ class r_vector { friend class writable::r_vector::iterator; - private: + private: /// Implemented in specialization static bool use_buf(bool is_altrep); void fill_buf(R_xlen_t pos); }; - private: +private: /// Implemented in specialization static underlying_type get_elt(SEXP x, R_xlen_t i); /// Implemented in specialization @@ -175,12 +173,12 @@ using has_begin_fun = std::decay()))>; /// Read/write access to new or copied r_vectors template class r_vector : public cpp11::r_vector { - public: +public: // Forward declare class proxy; class iterator; - private: +private: R_xlen_t capacity_ = 0; using cpp11::r_vector::data_; @@ -191,7 +189,7 @@ class r_vector : public cpp11::r_vector { using typename cpp11::r_vector::underlying_type; - public: +public: typedef ptrdiff_t difference_type; typedef size_t size_type; typedef proxy value_type; @@ -265,13 +263,13 @@ class r_vector : public cpp11::r_vector { attribute_proxy> names() const; class proxy { - private: + private: const SEXP data_; const R_xlen_t index_; underlying_type* const p_; bool is_altrep_; - public: + public: proxy(SEXP data, const R_xlen_t index, underlying_type* const p, bool is_altrep); proxy& operator=(const proxy& rhs); @@ -289,13 +287,13 @@ class r_vector : public cpp11::r_vector { operator T() const; - private: + private: underlying_type get() const; void set(underlying_type x); }; class iterator : public cpp11::r_vector::const_iterator { - private: + private: using cpp11::r_vector::const_iterator::data_; using cpp11::r_vector::const_iterator::block_start_; using cpp11::r_vector::const_iterator::pos_; @@ -304,7 +302,7 @@ class r_vector : public cpp11::r_vector { using cpp11::r_vector::const_iterator::use_buf; using cpp11::r_vector::const_iterator::fill_buf; - public: + public: using difference_type = ptrdiff_t; using value_type = proxy; using pointer = proxy*; @@ -323,7 +321,7 @@ class r_vector : public cpp11::r_vector { iterator operator+(R_xlen_t rhs); }; - private: +private: /// Implemented in specialization static void set_elt(SEXP x, R_xlen_t i, underlying_type value); @@ -349,19 +347,19 @@ inline r_vector::~r_vector() { template inline r_vector::r_vector(const SEXP data) - : data_(valid_type(data)), - protect_(detail::store::insert(data)), - is_altrep_(ALTREP(data)), - data_p_(get_p(ALTREP(data), data)), - length_(Rf_xlength(data)) {} + : data_(valid_type(data)), + protect_(detail::store::insert(data)), + is_altrep_(ALTREP(data)), + data_p_(get_p(ALTREP(data), data)), + length_(Rf_xlength(data)) {} template inline r_vector::r_vector(const SEXP data, bool is_altrep) - : data_(valid_type(data)), - protect_(detail::store::insert(data)), - is_altrep_(is_altrep), - data_p_(get_p(is_altrep, data)), - length_(Rf_xlength(data)) {} + : data_(valid_type(data)), + protect_(detail::store::insert(data)), + is_altrep_(is_altrep), + data_p_(get_p(is_altrep, data)), + length_(Rf_xlength(data)) {} // We are in read-only space so we can just copy over all properties except for // `protect_`, which we need to manage on our own. `x` persists after this call, so we @@ -403,7 +401,7 @@ inline r_vector::r_vector(r_vector&& x) { // `writable::r_vector&& x`, so we let this method handle both scenarios. template inline r_vector::r_vector(const writable::r_vector& x) - : r_vector(static_cast(x)) {} + : r_vector(static_cast(x)) {} // Same reasoning as `r_vector(const r_vector& x)` constructor template @@ -481,7 +479,7 @@ inline T r_vector::operator[](const size_type pos) const { template inline T r_vector::operator[](const r_string& name) const { - SEXP names = this->names(); + SEXP names = PROTECT(this->names()); R_xlen_t size = Rf_xlength(names); for (R_xlen_t pos = 0; pos < size; ++pos) { @@ -491,6 +489,7 @@ inline T r_vector::operator[](const r_string& name) const { } } + UNPROTECT(1); return get_oob(); } @@ -593,7 +592,7 @@ inline T r_vector::get_oob() { } class type_error : public std::exception { - public: +public: type_error(SEXPTYPE expected, SEXPTYPE actual) : expected_(expected), actual_(actual) {} virtual const char* what() const noexcept override { snprintf(str_, 64, "Invalid input type, expected '%s' actual '%s'", @@ -601,7 +600,7 @@ class type_error : public std::exception { return str_; } - private: +private: SEXPTYPE expected_; SEXPTYPE actual_; mutable char str_[64]; @@ -632,8 +631,8 @@ inline SEXP r_vector::valid_length(SEXP x, R_xlen_t n) { char message[128]; snprintf(message, 128, "Invalid input length, expected '%" CPP11_PRIdXLEN_T - "' actual '%" CPP11_PRIdXLEN_T "'.", - n, x_n); + "' actual '%" CPP11_PRIdXLEN_T "'.", + n, x_n); throw std::length_error(message); } @@ -660,7 +659,7 @@ inline typename r_vector::const_iterator r_vector::cend() const { template r_vector::const_iterator::const_iterator(const r_vector* data, R_xlen_t pos) - : data_(data), pos_(pos), buf_() { + : data_(data), pos_(pos), buf_() { if (use_buf(data_->is_altrep())) { fill_buf(pos); } @@ -686,7 +685,7 @@ inline typename r_vector::const_iterator& r_vector::const_iterator::operat template inline typename r_vector::const_iterator& r_vector::const_iterator::operator+=( - R_xlen_t i) { + R_xlen_t i) { pos_ += i; if (use_buf(data_->is_altrep()) && pos_ >= block_start_ + length_) { fill_buf(pos_); @@ -696,7 +695,7 @@ inline typename r_vector::const_iterator& r_vector::const_iterator::operat template inline typename r_vector::const_iterator& r_vector::const_iterator::operator-=( - R_xlen_t i) { + R_xlen_t i) { pos_ -= i; if (use_buf(data_->is_altrep()) && pos_ >= block_start_ + length_) { fill_buf(std::max(0_xl, pos_ - 64)); @@ -706,13 +705,13 @@ inline typename r_vector::const_iterator& r_vector::const_iterator::operat template inline bool r_vector::const_iterator::operator!=( - const r_vector::const_iterator& other) const { + const r_vector::const_iterator& other) const { return pos_ != other.pos_; } template inline bool r_vector::const_iterator::operator==( - const r_vector::const_iterator& other) const { + const r_vector::const_iterator& other) const { return pos_ == other.pos_; } @@ -769,20 +768,20 @@ namespace writable { template inline r_vector::r_vector(const SEXP& data) - : cpp11::r_vector(safe[Rf_shallow_duplicate](data)), capacity_(length_) {} + : cpp11::r_vector(safe[Rf_shallow_duplicate](data)), capacity_(length_) {} template inline r_vector::r_vector(SEXP&& data) - : cpp11::r_vector(data), capacity_(length_) {} + : cpp11::r_vector(data), capacity_(length_) {} template inline r_vector::r_vector(const SEXP& data, bool is_altrep) - : cpp11::r_vector(safe[Rf_shallow_duplicate](data), is_altrep), - capacity_(length_) {} + : cpp11::r_vector(safe[Rf_shallow_duplicate](data), is_altrep), + capacity_(length_) {} template inline r_vector::r_vector(SEXP&& data, bool is_altrep) - : cpp11::r_vector(data, is_altrep), capacity_(length_) {} + : cpp11::r_vector(data, is_altrep), capacity_(length_) {} template inline r_vector::r_vector(const r_vector& rhs) { @@ -832,68 +831,69 @@ inline r_vector::r_vector(r_vector&& rhs) { template inline r_vector::r_vector(const cpp11::r_vector& rhs) - : cpp11::r_vector(safe[Rf_shallow_duplicate](rhs.data_)), capacity_(rhs.length_) {} + : cpp11::r_vector(safe[Rf_shallow_duplicate](rhs.data_)), capacity_(rhs.length_) {} template inline r_vector::r_vector(std::initializer_list il) - : cpp11::r_vector(safe[Rf_allocVector](get_sexptype(), il.size())), - capacity_(il.size()) { - auto it = il.begin(); + : cpp11::r_vector(safe[Rf_allocVector](get_sexptype(), il.size())), + capacity_(il.size()) { + auto it = il.begin(); - if (data_p_ != nullptr) { - for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { - data_p_[i] = static_cast(*it); - } - } else { - // Handles both the ALTREP and VECSXP cases - for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { - set_elt(data_, i, static_cast(*it)); + if (data_p_ != nullptr) { + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + data_p_[i] = static_cast(*it); + } + } else { + // Handles both the ALTREP and VECSXP cases + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + set_elt(data_, i, static_cast(*it)); + } + } } - } -} template inline r_vector::r_vector(std::initializer_list il) - : cpp11::r_vector(safe[Rf_allocVector](get_sexptype(), il.size())), - capacity_(il.size()) { - auto it = il.begin(); - - // SAFETY: Loop through once outside the `unwind_protect()` to perform the - // validation that might `throw`. - for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { - SEXP value = it->value(); - valid_type(value); - valid_length(value, 1); - } - - unwind_protect([&] { - SEXP names = Rf_allocVector(STRSXP, capacity_); - Rf_setAttrib(data_, R_NamesSymbol, names); - - auto it = il.begin(); - - for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { - SEXP value = it->value(); - - // SAFETY: We've validated type and length ahead of this. - const underlying_type elt = get_elt(value, 0); - - // TODO: The equivalent ctor from `initializer_list` has a specialization - // for `` to translate `elt` to UTF-8 before assigning. Should we have - // that here too? `named_arg` doesn't do any checking here. - if (data_p_ != nullptr) { - data_p_[i] = elt; - } else { - // Handles STRSXP case. VECSXP case has its own specialization. - // We don't expect any ALTREP cases since we just freshly allocated `data_`. - set_elt(data_, i, elt); + : cpp11::r_vector(safe[Rf_allocVector](get_sexptype(), il.size())), + capacity_(il.size()) { + auto it = il.begin(); + + // SAFETY: Loop through once outside the `unwind_protect()` to perform the + // validation that might `throw`. + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + SEXP value = it->value(); + valid_type(value); + valid_length(value, 1); } - SEXP name = Rf_mkCharCE(it->name(), CE_UTF8); - SET_STRING_ELT(names, i, name); + unwind_protect([&] { + SEXP names = PROTECT(safe[Rf_allocVector](STRSXP, capacity_)); + Rf_setAttrib(data_, R_NamesSymbol, names); + + auto it = il.begin(); + + for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { + SEXP value = it->value(); + + // SAFETY: We've validated type and length ahead of this. + const underlying_type elt = get_elt(value, 0); + + // TODO: The equivalent ctor from `initializer_list` has a specialization + // for `` to translate `elt` to UTF-8 before assigning. Should we have + // that here too? `named_arg` doesn't do any checking here. + if (data_p_ != nullptr) { + data_p_[i] = elt; + } else { + // Handles STRSXP case. VECSXP case has its own specialization. + // We don't expect any ALTREP cases since we just freshly allocated `data_`. + set_elt(data_, i, elt); + } + + SEXP name = Rf_mkCharCE(it->name(), CE_UTF8); + SET_STRING_ELT(names, i, name); + } + UNPROTECT(1); + }); } - }); -} template inline r_vector::r_vector(const R_xlen_t size) : r_vector() { @@ -1088,35 +1088,19 @@ inline void r_vector::resize(R_xlen_t count) { /// `resize()` instead. template inline void r_vector::reserve(R_xlen_t new_capacity) { - if (new_capacity <= capacity_) return; - - SEXP new_data = (data_ == R_NilValue) - ? safe[Rf_allocVector](get_sexptype(), new_capacity) - : reserve_data(data_, is_altrep_, new_capacity); - - PROTECT(new_data); - - SEXP old_protect = protect_; - data_ = new_data; - protect_ = detail::store::insert(data_); - is_altrep_ = ALTREP(data_); - data_p_ = get_p(is_altrep_, data_); - capacity_ = new_capacity; + SEXP old_protect = protect_; - detail::store::release(old_protect); + data_ = (data_ == R_NilValue) ? PROTECT(safe[Rf_allocVector](get_sexptype(), new_capacity)) + : PROTECT(reserve_data(data_, is_altrep_, new_capacity)); + protect_ = detail::store::insert(data_); + is_altrep_ = ALTREP(data_); + data_p_ = get_p(is_altrep_, data_); + UNPROTECT(1); + capacity_ = new_capacity; - UNPROTECT(1); + detail::store::release(old_protect); } -// The key changes are: -// -// 1. Added early return if no resize needed -// 2. Protected the new data allocation with PROTECT() -// 3. Maintained protection stack balance with matching UNPROTECT() -// 4. Kept the existing old_protect handling for proper cleanup -// -// This should prevent protection stack imbalances while safely managing R object memory. - template inline typename r_vector::iterator r_vector::insert(R_xlen_t pos, T value) { push_back(value); @@ -1198,7 +1182,7 @@ inline attribute_proxy> r_vector::names() const { template r_vector::proxy::proxy(SEXP data, const R_xlen_t index, typename r_vector::underlying_type* const p, bool is_altrep) - : data_(data), index_(index), p_(p), is_altrep_(is_altrep) {} + : data_(data), index_(index), p_(p), is_altrep_(is_altrep) {} template inline typename r_vector::proxy& r_vector::proxy::operator=(const proxy& rhs) { @@ -1288,7 +1272,7 @@ inline void r_vector::proxy::set(typename r_vector::underlying_type x) { template r_vector::iterator::iterator(const r_vector* data, R_xlen_t pos) - : r_vector::const_iterator(data, pos) {} + : r_vector::const_iterator(data, pos) {} template inline typename r_vector::iterator& r_vector::iterator::operator++() { @@ -1303,9 +1287,9 @@ template inline typename r_vector::proxy r_vector::iterator::operator*() const { if (use_buf(data_->is_altrep())) { return proxy( - data_->data(), pos_, - const_cast(&buf_[pos_ - block_start_]), - true); + data_->data(), pos_, + const_cast(&buf_[pos_ - block_start_]), + true); } else { return proxy(data_->data(), pos_, data_->data_p_ != nullptr ? &data_->data_p_[pos_] : nullptr, false); @@ -1338,69 +1322,77 @@ inline typename r_vector::iterator r_vector::iterator::operator+(R_xlen_t /// attributes (which doesn't make much sense anyways). template inline SEXP r_vector::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) { - SEXP out = resize_data(x, is_altrep, size); - PROTECT(out); + // Resize core data + SEXP out = PROTECT(resize_data(x, is_altrep, size)); - SEXP names = Rf_getAttrib(x, R_NamesSymbol); + // Resize names, if required + // Protection seems needed to make rchk happy + SEXP names = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); if (names != R_NilValue) { if (Rf_xlength(names) != size) { names = resize_names(names, size); - PROTECT(names); - Rf_setAttrib(out, R_NamesSymbol, names); - UNPROTECT(1); // names } + Rf_setAttrib(out, R_NamesSymbol, names); } + // Copy over "most" attributes, and set OBJECT bit and S4 bit as needed. + // Does not copy over names, dim, or dim names. + // Names are handled already. Dim and dim names should not be applicable, + // as this is a vector. + // Does not look like it would ever error in our use cases, so no `safe[]`. Rf_copyMostAttrib(x, out); - UNPROTECT(1); // out + UNPROTECT(2); + return out; } -// The main changes are: -// 1. Removed PROTECT_WITH_INDEX since it wasn't necessary -// 2. Used simple PROTECT/UNPROTECT pattern -// 3. Maintained protection stack balance throughout the function -// -// This should resolve the negative protection stack depth and unprotect imbalance issues. - template inline SEXP r_vector::resize_data(SEXP x, bool is_altrep, R_xlen_t size) { underlying_type const* v_x = get_const_p(is_altrep, x); - SEXP out = safe[Rf_allocVector](get_sexptype(), size); + SEXP out = PROTECT(safe[Rf_allocVector](get_sexptype(), size)); underlying_type* v_out = get_p(ALTREP(out), out); const R_xlen_t x_size = Rf_xlength(x); - const R_xlen_t copy_size = std::min(x_size, size); + const R_xlen_t copy_size = (x_size > size) ? size : x_size; + // Copy over data from `x` up to `copy_size` (we could be truncating so don't blindly + // copy everything from `x`) if (v_x != nullptr && v_out != nullptr) { std::memcpy(v_out, v_x, copy_size * sizeof(underlying_type)); } else { + // Handles ALTREP `x` with no const pointer, VECSXP, STRSXP for (R_xlen_t i = 0; i < copy_size; ++i) { set_elt(out, i, get_elt(x, i)); } } + UNPROTECT(1); + return out; } template inline SEXP r_vector::resize_names(SEXP x, R_xlen_t size) { const SEXP* v_x = STRING_PTR_RO(x); - SEXP out = safe[Rf_allocVector](STRSXP, size); + + SEXP out = PROTECT(safe[Rf_allocVector](STRSXP, size)); const R_xlen_t x_size = Rf_xlength(x); - const R_xlen_t copy_size = std::min(x_size, size); + const R_xlen_t copy_size = (x_size > size) ? size : x_size; for (R_xlen_t i = 0; i < copy_size; ++i) { SET_STRING_ELT(out, i, v_x[i]); } + // Ensure remaining names are initialized to `""` for (R_xlen_t i = copy_size; i < size; ++i) { SET_STRING_ELT(out, i, R_BlankString); } + UNPROTECT(1); + return out; } @@ -1410,10 +1402,10 @@ inline SEXP r_vector::resize_names(SEXP x, R_xlen_t size) { // rather than three things false? template using is_container_but_not_sexp_or_string = typename std::enable_if< - !std::is_constructible::value && - !std::is_same::type, std::string>::value && - !std::is_same::type, std::string>::value, - typename std::decay::type>::type; + !std::is_constructible::value && + !std::is_same::type, std::string>::value && + !std::is_same::type, std::string>::value, + typename std::decay::type>::type; template ::type::value_type> // typename T = typename C::value_type> @@ -1425,8 +1417,8 @@ is_container_but_not_sexp_or_string as_cpp(SEXP from) { // TODO: could we make this generalize outside of std::string? template using is_vector_of_strings = typename std::enable_if< - std::is_same::type, std::string>::value, - typename std::decay::type>::type; + std::is_same::type, std::string>::value, + typename std::decay::type>::type; template ::type::value_type> // typename T = typename C::value_type> From 226c07226b8d9ad61ca7001071c82b216c64e813 Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Fri, 9 May 2025 20:07:00 +0200 Subject: [PATCH 14/21] Put back r_vector as in cpp11. --- inst/include/cpp11/r_vector.hpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/inst/include/cpp11/r_vector.hpp b/inst/include/cpp11/r_vector.hpp index 66993a2..3fa6522 100644 --- a/inst/include/cpp11/r_vector.hpp +++ b/inst/include/cpp11/r_vector.hpp @@ -479,7 +479,7 @@ inline T r_vector::operator[](const size_type pos) const { template inline T r_vector::operator[](const r_string& name) const { - SEXP names = PROTECT(this->names()); + SEXP names = this->names(); R_xlen_t size = Rf_xlength(names); for (R_xlen_t pos = 0; pos < size; ++pos) { @@ -489,7 +489,6 @@ inline T r_vector::operator[](const r_string& name) const { } } - UNPROTECT(1); return get_oob(); } @@ -866,7 +865,7 @@ inline r_vector::r_vector(std::initializer_list il) } unwind_protect([&] { - SEXP names = PROTECT(safe[Rf_allocVector](STRSXP, capacity_)); + SEXP names = Rf_allocVector(STRSXP, capacity_); Rf_setAttrib(data_, R_NamesSymbol, names); auto it = il.begin(); @@ -891,7 +890,6 @@ inline r_vector::r_vector(std::initializer_list il) SEXP name = Rf_mkCharCE(it->name(), CE_UTF8); SET_STRING_ELT(names, i, name); } - UNPROTECT(1); }); } @@ -1090,12 +1088,11 @@ template inline void r_vector::reserve(R_xlen_t new_capacity) { SEXP old_protect = protect_; - data_ = (data_ == R_NilValue) ? PROTECT(safe[Rf_allocVector](get_sexptype(), new_capacity)) - : PROTECT(reserve_data(data_, is_altrep_, new_capacity)); + data_ = (data_ == R_NilValue) ? safe[Rf_allocVector](get_sexptype(), new_capacity) + : reserve_data(data_, is_altrep_, new_capacity); protect_ = detail::store::insert(data_); is_altrep_ = ALTREP(data_); data_p_ = get_p(is_altrep_, data_); - UNPROTECT(1); capacity_ = new_capacity; detail::store::release(old_protect); From 79a909dfbeb3ec449d8537c99bae4dd005dd829b Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Fri, 9 May 2025 20:18:25 +0200 Subject: [PATCH 15/21] Tentative fix in r_vector. --- inst/include/cpp11/r_vector.hpp | 80 +++++++++++++++------------------ 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/inst/include/cpp11/r_vector.hpp b/inst/include/cpp11/r_vector.hpp index 3fa6522..e51056b 100644 --- a/inst/include/cpp11/r_vector.hpp +++ b/inst/include/cpp11/r_vector.hpp @@ -1078,26 +1078,6 @@ inline void r_vector::resize(R_xlen_t count) { length_ = count; } -/// Reserve a new capacity and copy all elements over -/// -/// SAFETY: The new capacity is allowed to be smaller than the current capacity, which -/// is used in the `SEXP` conversion operator during truncation, but if that occurs then -/// we also need to update the `length_`, so if you need to truncate then you should call -/// `resize()` instead. -template -inline void r_vector::reserve(R_xlen_t new_capacity) { - SEXP old_protect = protect_; - - data_ = (data_ == R_NilValue) ? safe[Rf_allocVector](get_sexptype(), new_capacity) - : reserve_data(data_, is_altrep_, new_capacity); - protect_ = detail::store::insert(data_); - is_altrep_ = ALTREP(data_); - data_p_ = get_p(is_altrep_, data_); - capacity_ = new_capacity; - - detail::store::release(old_protect); -} - template inline typename r_vector::iterator r_vector::insert(R_xlen_t pos, T value) { push_back(value); @@ -1309,38 +1289,46 @@ inline typename r_vector::iterator r_vector::iterator::operator+(R_xlen_t return it; } -/// Compared to `Rf_xlengthgets()`: -/// - This copies over attributes with `Rf_copyMostAttrib()`, which is important when we -/// truncate right before returning from the `SEXP` operator. -/// - This always allocates, even if it is the same size. -/// - This is more friendly to ALTREP `x`. +/// Reserve a new capacity and copy all elements over /// -/// SAFETY: For use only by `reserve()`! This won't retain the `dim` or `dimnames` -/// attributes (which doesn't make much sense anyways). +/// SAFETY: The new capacity is allowed to be smaller than the current capacity, which +/// is used in the `SEXP` conversion operator during truncation, but if that occurs then +/// we also need to update the `length_`, so if you need to truncate then you should call +/// `resize()` instead. +template +inline void r_vector::reserve(R_xlen_t new_capacity) { + SEXP old_protect = protect_; + + data_ = (data_ == R_NilValue) ? safe[Rf_allocVector](get_sexptype(), new_capacity) + : reserve_data(data_, is_altrep_, new_capacity); + protect_ = detail::store::insert(data_); + is_altrep_ = ALTREP(data_); + data_p_ = get_p(is_altrep_, data_); + capacity_ = new_capacity; + + detail::store::release(old_protect); +} + template inline SEXP r_vector::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) { // Resize core data SEXP out = PROTECT(resize_data(x, is_altrep, size)); // Resize names, if required - // Protection seems needed to make rchk happy - SEXP names = PROTECT(Rf_getAttrib(x, R_NamesSymbol)); + SEXP names = Rf_getAttrib(x, R_NamesSymbol); if (names != R_NilValue) { + PROTECT(names); if (Rf_xlength(names) != size) { names = resize_names(names, size); } Rf_setAttrib(out, R_NamesSymbol, names); + UNPROTECT(1); } - // Copy over "most" attributes, and set OBJECT bit and S4 bit as needed. - // Does not copy over names, dim, or dim names. - // Names are handled already. Dim and dim names should not be applicable, - // as this is a vector. - // Does not look like it would ever error in our use cases, so no `safe[]`. + // Copy over "most" attributes Rf_copyMostAttrib(x, out); - UNPROTECT(2); - + UNPROTECT(1); return out; } @@ -1348,25 +1336,22 @@ template inline SEXP r_vector::resize_data(SEXP x, bool is_altrep, R_xlen_t size) { underlying_type const* v_x = get_const_p(is_altrep, x); - SEXP out = PROTECT(safe[Rf_allocVector](get_sexptype(), size)); + SEXP out = Rf_allocVector(get_sexptype(), size); + PROTECT(out); underlying_type* v_out = get_p(ALTREP(out), out); const R_xlen_t x_size = Rf_xlength(x); const R_xlen_t copy_size = (x_size > size) ? size : x_size; - // Copy over data from `x` up to `copy_size` (we could be truncating so don't blindly - // copy everything from `x`) if (v_x != nullptr && v_out != nullptr) { std::memcpy(v_out, v_x, copy_size * sizeof(underlying_type)); } else { - // Handles ALTREP `x` with no const pointer, VECSXP, STRSXP for (R_xlen_t i = 0; i < copy_size; ++i) { set_elt(out, i, get_elt(x, i)); } } UNPROTECT(1); - return out; } @@ -1374,7 +1359,8 @@ template inline SEXP r_vector::resize_names(SEXP x, R_xlen_t size) { const SEXP* v_x = STRING_PTR_RO(x); - SEXP out = PROTECT(safe[Rf_allocVector](STRSXP, size)); + SEXP out = Rf_allocVector(STRSXP, size); + PROTECT(out); const R_xlen_t x_size = Rf_xlength(x); const R_xlen_t copy_size = (x_size > size) ? size : x_size; @@ -1383,16 +1369,22 @@ inline SEXP r_vector::resize_names(SEXP x, R_xlen_t size) { SET_STRING_ELT(out, i, v_x[i]); } - // Ensure remaining names are initialized to `""` for (R_xlen_t i = copy_size; i < size; ++i) { SET_STRING_ELT(out, i, R_BlankString); } UNPROTECT(1); - return out; } +// The main changes are: +// 1. Proper PROTECT/UNPROTECT balance in `reserve_data()` +// 2. Removed unnecessary PROTECT for `names` allocation in `resize_data()` +// 3. Simplified protection stack management in `resize_names()` +// 4. Removed `safe[]` wrappers since we're managing protection manually +// +// These changes should resolve the protection stack imbalance issues while maintaining the same functionality. + } // namespace writable // TODO: is there a better condition we could use, e.g. assert something true From a569ed917a5da167a71544e6e703463b735bd99c Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Fri, 9 May 2025 20:25:38 +0200 Subject: [PATCH 16/21] Fix one by one. --- inst/include/cpp11/r_vector.hpp | 97 ++++++++++++++++++++------------- 1 file changed, 59 insertions(+), 38 deletions(-) diff --git a/inst/include/cpp11/r_vector.hpp b/inst/include/cpp11/r_vector.hpp index e51056b..2f87562 100644 --- a/inst/include/cpp11/r_vector.hpp +++ b/inst/include/cpp11/r_vector.hpp @@ -1078,6 +1078,26 @@ inline void r_vector::resize(R_xlen_t count) { length_ = count; } +/// Reserve a new capacity and copy all elements over +/// +/// SAFETY: The new capacity is allowed to be smaller than the current capacity, which +/// is used in the `SEXP` conversion operator during truncation, but if that occurs then +/// we also need to update the `length_`, so if you need to truncate then you should call +/// `resize()` instead. +template +inline void r_vector::reserve(R_xlen_t new_capacity) { + SEXP old_protect = protect_; + + data_ = (data_ == R_NilValue) ? safe[Rf_allocVector](get_sexptype(), new_capacity) + : reserve_data(data_, is_altrep_, new_capacity); + protect_ = detail::store::insert(data_); + is_altrep_ = ALTREP(data_); + data_p_ = get_p(is_altrep_, data_); + capacity_ = new_capacity; + + detail::store::release(old_protect); +} + template inline typename r_vector::iterator r_vector::insert(R_xlen_t pos, T value) { push_back(value); @@ -1289,84 +1309,85 @@ inline typename r_vector::iterator r_vector::iterator::operator+(R_xlen_t return it; } -/// Reserve a new capacity and copy all elements over +/// Compared to `Rf_xlengthgets()`: +/// - This copies over attributes with `Rf_copyMostAttrib()`, which is important when we +/// truncate right before returning from the `SEXP` operator. +/// - This always allocates, even if it is the same size. +/// - This is more friendly to ALTREP `x`. /// -/// SAFETY: The new capacity is allowed to be smaller than the current capacity, which -/// is used in the `SEXP` conversion operator during truncation, but if that occurs then -/// we also need to update the `length_`, so if you need to truncate then you should call -/// `resize()` instead. -template -inline void r_vector::reserve(R_xlen_t new_capacity) { - SEXP old_protect = protect_; - - data_ = (data_ == R_NilValue) ? safe[Rf_allocVector](get_sexptype(), new_capacity) - : reserve_data(data_, is_altrep_, new_capacity); - protect_ = detail::store::insert(data_); - is_altrep_ = ALTREP(data_); - data_p_ = get_p(is_altrep_, data_); - capacity_ = new_capacity; - - detail::store::release(old_protect); -} - +/// SAFETY: For use only by `reserve()`! This won't retain the `dim` or `dimnames` +/// attributes (which doesn't make much sense anyways). template inline SEXP r_vector::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) { - // Resize core data - SEXP out = PROTECT(resize_data(x, is_altrep, size)); + // Resize core data - no need to PROTECT as safe[] handles it + SEXP out = resize_data(x, is_altrep, size); + PROTECT(out); - // Resize names, if required + // Get names if they exist SEXP names = Rf_getAttrib(x, R_NamesSymbol); if (names != R_NilValue) { - PROTECT(names); if (Rf_xlength(names) != size) { names = resize_names(names, size); } Rf_setAttrib(out, R_NamesSymbol, names); - UNPROTECT(1); } - // Copy over "most" attributes + // Copy remaining attributes Rf_copyMostAttrib(x, out); UNPROTECT(1); return out; } +// The key changes are: +// 1. Removed the PROTECT around resize_data since it's already protected by safe[] +// 2. Only PROTECT the output once +// 3. Removed PROTECT around names since it's temporary and protected by R +// 4. Balanced PROTECT/UNPROTECT count (1:1) +// +// This should resolve the protection stack imbalance while maintaining the same functionality. + template inline SEXP r_vector::resize_data(SEXP x, bool is_altrep, R_xlen_t size) { underlying_type const* v_x = get_const_p(is_altrep, x); - SEXP out = Rf_allocVector(get_sexptype(), size); - PROTECT(out); + SEXP out = safe[Rf_allocVector](get_sexptype(), size); underlying_type* v_out = get_p(ALTREP(out), out); const R_xlen_t x_size = Rf_xlength(x); const R_xlen_t copy_size = (x_size > size) ? size : x_size; + // Copy over data from `x` up to `copy_size` (we could be truncating so don't blindly + // copy everything from `x`) if (v_x != nullptr && v_out != nullptr) { std::memcpy(v_out, v_x, copy_size * sizeof(underlying_type)); } else { + // Handles ALTREP `x` with no const pointer, VECSXP, STRSXP for (R_xlen_t i = 0; i < copy_size; ++i) { set_elt(out, i, get_elt(x, i)); } } - UNPROTECT(1); return out; } +// The key change is removing both the PROTECT and UNPROTECT calls since: +// 1. The `safe` wrapper around `Rf_allocVector` already handles protection +// 2. The function is called from `reserve_data` which handles its own protection +// 3. Having unbalanced PROTECT/UNPROTECT calls was causing the reported error +// +// The protection is handled at the higher level in `reserve_data`, so we don't need additional protection in this helper function. + template inline SEXP r_vector::resize_names(SEXP x, R_xlen_t size) { - const SEXP* v_x = STRING_PTR_RO(x); - - SEXP out = Rf_allocVector(STRSXP, size); + SEXP out = safe[Rf_allocVector](STRSXP, size); PROTECT(out); const R_xlen_t x_size = Rf_xlength(x); const R_xlen_t copy_size = (x_size > size) ? size : x_size; for (R_xlen_t i = 0; i < copy_size; ++i) { - SET_STRING_ELT(out, i, v_x[i]); + SET_STRING_ELT(out, i, STRING_ELT(x, i)); } for (R_xlen_t i = copy_size; i < size; ++i) { @@ -1377,13 +1398,13 @@ inline SEXP r_vector::resize_names(SEXP x, R_xlen_t size) { return out; } -// The main changes are: -// 1. Proper PROTECT/UNPROTECT balance in `reserve_data()` -// 2. Removed unnecessary PROTECT for `names` allocation in `resize_data()` -// 3. Simplified protection stack management in `resize_names()` -// 4. Removed `safe[]` wrappers since we're managing protection manually +// The key changes are: +// 1. Removed direct pointer access via STRING_PTR_RO +// 2. Use STRING_ELT instead of direct pointer access +// 3. Ensure PROTECT/UNPROTECT balance by protecting `out` immediately after allocation +// 4. Remove redundant safe wrapper since allocation is already protected // -// These changes should resolve the protection stack imbalance issues while maintaining the same functionality. +// This should resolve the protection stack imbalance while maintaining the same functionality. } // namespace writable From d8b448c8b7a8abe00565135d8a5f5bab05f4df23 Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Fri, 9 May 2025 20:34:31 +0200 Subject: [PATCH 17/21] Fix resize_names(). --- inst/include/cpp11/r_vector.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/inst/include/cpp11/r_vector.hpp b/inst/include/cpp11/r_vector.hpp index 2f87562..6d65b9d 100644 --- a/inst/include/cpp11/r_vector.hpp +++ b/inst/include/cpp11/r_vector.hpp @@ -1381,7 +1381,6 @@ inline SEXP r_vector::resize_data(SEXP x, bool is_altrep, R_xlen_t size) { template inline SEXP r_vector::resize_names(SEXP x, R_xlen_t size) { SEXP out = safe[Rf_allocVector](STRSXP, size); - PROTECT(out); const R_xlen_t x_size = Rf_xlength(x); const R_xlen_t copy_size = (x_size > size) ? size : x_size; @@ -1394,11 +1393,11 @@ inline SEXP r_vector::resize_names(SEXP x, R_xlen_t size) { SET_STRING_ELT(out, i, R_BlankString); } - UNPROTECT(1); return out; } // The key changes are: +// 0. The `safe` wrapper around `Rf_allocVector` already handles protection // 1. Removed direct pointer access via STRING_PTR_RO // 2. Use STRING_ELT instead of direct pointer access // 3. Ensure PROTECT/UNPROTECT balance by protecting `out` immediately after allocation From 01a1fddb6e7876865845f28951598573069ac61f Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Fri, 9 May 2025 20:41:36 +0200 Subject: [PATCH 18/21] Fix protect/unprotect issues. --- inst/include/cpp11/r_vector.hpp | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/inst/include/cpp11/r_vector.hpp b/inst/include/cpp11/r_vector.hpp index 6d65b9d..9978a99 100644 --- a/inst/include/cpp11/r_vector.hpp +++ b/inst/include/cpp11/r_vector.hpp @@ -1319,26 +1319,29 @@ inline typename r_vector::iterator r_vector::iterator::operator+(R_xlen_t /// attributes (which doesn't make much sense anyways). template inline SEXP r_vector::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) { - // Resize core data - no need to PROTECT as safe[] handles it SEXP out = resize_data(x, is_altrep, size); - PROTECT(out); - - // Get names if they exist SEXP names = Rf_getAttrib(x, R_NamesSymbol); + if (names != R_NilValue) { if (Rf_xlength(names) != size) { - names = resize_names(names, size); + SEXP new_names = resize_names(names, size); + Rf_setAttrib(out, R_NamesSymbol, new_names); + } else { + Rf_setAttrib(out, R_NamesSymbol, names); } - Rf_setAttrib(out, R_NamesSymbol, names); } - // Copy remaining attributes Rf_copyMostAttrib(x, out); - - UNPROTECT(1); return out; } +// The key changes are: +// 1. Removed all explicit PROTECT/UNPROTECT calls since protection is handled by the `safe` wrapper in `resize_data` +// 2. Simplified the flow of the names handling +// 3. Let R's garbage collection handle the intermediate objects +// +// This should resolve the protection stack imbalance while maintaining the same functionality. + // The key changes are: // 1. Removed the PROTECT around resize_data since it's already protected by safe[] // 2. Only PROTECT the output once @@ -1376,7 +1379,8 @@ inline SEXP r_vector::resize_data(SEXP x, bool is_altrep, R_xlen_t size) { // 2. The function is called from `reserve_data` which handles its own protection // 3. Having unbalanced PROTECT/UNPROTECT calls was causing the reported error // -// The protection is handled at the higher level in `reserve_data`, so we don't need additional protection in this helper function. +// The protection is handled at the higher level in `reserve_data`, so we don't +// need additional protection in this helper function. template inline SEXP r_vector::resize_names(SEXP x, R_xlen_t size) { From 9c6ff2bfd914cdd29354d6b6c388f02a21536cdf Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Fri, 9 May 2025 21:18:38 +0200 Subject: [PATCH 19/21] Clean up protect.hpp and r_vector.hpp to match PR on cpp11 repo. --- inst/include/cpp11/protect.hpp | 111 +++++++++----------------------- inst/include/cpp11/r_vector.hpp | 42 +++--------- 2 files changed, 41 insertions(+), 112 deletions(-) diff --git a/inst/include/cpp11/protect.hpp b/inst/include/cpp11/protect.hpp index 0a6006a..f51eb9e 100644 --- a/inst/include/cpp11/protect.hpp +++ b/inst/include/cpp11/protect.hpp @@ -1,5 +1,3 @@ -// cpp11 version: 0.5.2 -// vendored on: 2025-05-07 #pragma once #include // for longjmp, setjmp, jmp_buf @@ -28,7 +26,7 @@ namespace cpp11 { class unwind_exception : public std::exception { - public: +public: SEXP token; unwind_exception(SEXP token_) : token(token_) {} }; @@ -37,20 +35,20 @@ class unwind_exception : public std::exception { /// /// @param code The code to which needs to be protected, as a nullary callable template ()()), SEXP>::value>::type> -SEXP unwind_protect(Fun&& code) { - static SEXP token = [] { - SEXP res = R_MakeUnwindCont(); - R_PreserveObject(res); - return res; - }(); - - std::jmp_buf jmpbuf; - if (setjmp(jmpbuf)) { - throw unwind_exception(token); - } + decltype(std::declval()()), SEXP>::value>::type> + SEXP unwind_protect(Fun&& code) { + static SEXP token = [] { + SEXP res = R_MakeUnwindCont(); + R_PreserveObject(res); + return res; + }(); + + std::jmp_buf jmpbuf; + if (setjmp(jmpbuf)) { + throw unwind_exception(token); + } - SEXP res = R_UnwindProtect( + SEXP res = R_UnwindProtect( [](void* data) -> SEXP { auto callback = static_cast(data); return static_cast(*callback)(); @@ -65,23 +63,23 @@ SEXP unwind_protect(Fun&& code) { }, &jmpbuf, token); - // R_UnwindProtect adds the result to the CAR of the continuation token, - // which implicitly protects the result. However if there is no error and - // R_UwindProtect does a normal exit the memory shouldn't be protected, so we - // unset it here before returning the value ourselves. - SETCAR(token, R_NilValue); + // R_UnwindProtect adds the result to the CAR of the continuation token, + // which implicitly protects the result. However if there is no error and + // R_UwindProtect does a normal exit the memory shouldn't be protected, so we + // unset it here before returning the value ourselves. + SETCAR(token, R_NilValue); - return res; -} + return res; + } template ()()), void>::value>::type> -void unwind_protect(Fun&& code) { - (void)unwind_protect([&] { - std::forward(code)(); - return R_NilValue; - }); -} + decltype(std::declval()()), void>::value>::type> + void unwind_protect(Fun&& code) { + (void)unwind_protect([&] { + std::forward(code)(); + return R_NilValue; + }); + } template ()())> typename std::enable_if::value && !std::is_same::value, @@ -110,7 +108,7 @@ struct appended_sequence, J> : index_sequence {}; template struct make_index_sequence - : appended_sequence::type, N - 1> {}; + : appended_sequence::type, N - 1> {}; template <> struct make_index_sequence<0> : index_sequence<> {}; @@ -123,7 +121,7 @@ decltype(std::declval()(std::declval()...)) apply( template decltype(std::declval()(std::declval()...)) apply(F&& f, - std::tuple&& a) { + std::tuple&& a) { return apply(std::forward(f), std::move(a), make_index_sequence{}); } @@ -152,7 +150,7 @@ struct protect { decltype(std::declval()(std::declval()...)) operator()(A&&... a) const { // workaround to support gcc4.8, which can't capture a parameter pack return unwind_protect( - detail::closure{ptr_, std::forward_as_tuple(std::forward(a)...)}); + detail::closure{ptr_, std::forward_as_tuple(std::forward(a)...)}); } F* ptr_; @@ -173,7 +171,7 @@ struct protect { void operator() [[noreturn]] (A&&... a) const { // workaround to support gcc4.8, which can't capture a parameter pack unwind_protect( - detail::closure{ptr_, std::forward_as_tuple(std::forward(a)...)}); + detail::closure{ptr_, std::forward_as_tuple(std::forward(a)...)}); // Compiler hint to allow [[noreturn]] attribute; this is never executed since // the above call will not return. throw std::runtime_error("[[noreturn]]"); @@ -278,44 +276,6 @@ inline R_xlen_t count() { return Rf_xlength(list) - head - tail; } -// Let me help analyze the protection issues in the `insert` function. The key is to ensure that all intermediate objects are protected before being used in other functions that might allocate memory and trigger garbage collection. - -// Here's the corrected version: - -// inline SEXP insert(SEXP x) { -// if (x == R_NilValue) { -// return R_NilValue; -// } -// -// SEXP list = get(); -// PROTECT(list); // Protect the list while we manipulate it -// -// // Get references to the head of the preserve list and the next element -// SEXP head = list; -// SEXP next = CDR(list); -// -// // Create and protect the new cell -// SEXP cell = PROTECT(Rf_cons(head, next)); -// SET_TAG(cell, x); -// -// // Update the list structure -// SETCDR(head, cell); -// SETCAR(next, cell); -// -// UNPROTECT(2); // Unprotect list and cell -// return cell; -// } - -// The key changes are: -// 1. Adding protection for `list` since we're using it in multiple operations -// 2. Removing the initial PROTECT(x) as it's not necessary (x is already -// protected by the caller and we're only using it in SET_TAG) -// 3. Maintaining proper PROTECT/UNPROTECT balance - -// Based on the error messages and the code context, the issue is with the -// protection stack management in the `insert` function. Here's the corrected -// version: - inline SEXP insert(SEXP x) { if (x == R_NilValue) { return R_NilValue; @@ -338,13 +298,6 @@ inline SEXP insert(SEXP x) { return cell; } -// The key changes: -// 1. Removed PROTECT/UNPROTECT calls since: -// - `list` from `get()` is already preserved -// - The cons cell we create is part of the preserve list structure -// - The input `x` is protected by the caller -// 2. Simplified the function to avoid protection stack manipulation - inline void release(SEXP cell) { if (cell == R_NilValue) { return; diff --git a/inst/include/cpp11/r_vector.hpp b/inst/include/cpp11/r_vector.hpp index 9978a99..b54c591 100644 --- a/inst/include/cpp11/r_vector.hpp +++ b/inst/include/cpp11/r_vector.hpp @@ -1319,9 +1319,11 @@ inline typename r_vector::iterator r_vector::iterator::operator+(R_xlen_t /// attributes (which doesn't make much sense anyways). template inline SEXP r_vector::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) { + // Resize core data SEXP out = resize_data(x, is_altrep, size); - SEXP names = Rf_getAttrib(x, R_NamesSymbol); + // Resize names, if required + SEXP names = Rf_getAttrib(x, R_NamesSymbol); if (names != R_NilValue) { if (Rf_xlength(names) != size) { SEXP new_names = resize_names(names, size); @@ -1331,25 +1333,15 @@ inline SEXP r_vector::reserve_data(SEXP x, bool is_altrep, R_xlen_t size) { } } + // Copy over "most" attributes, and set OBJECT bit and S4 bit as needed. + // Does not copy over names, dim, or dim names. + // Names are handled already. Dim and dim names should not be applicable, + // as this is a vector. + // Does not look like it would ever error in our use cases, so no `safe[]`. Rf_copyMostAttrib(x, out); return out; } -// The key changes are: -// 1. Removed all explicit PROTECT/UNPROTECT calls since protection is handled by the `safe` wrapper in `resize_data` -// 2. Simplified the flow of the names handling -// 3. Let R's garbage collection handle the intermediate objects -// -// This should resolve the protection stack imbalance while maintaining the same functionality. - -// The key changes are: -// 1. Removed the PROTECT around resize_data since it's already protected by safe[] -// 2. Only PROTECT the output once -// 3. Removed PROTECT around names since it's temporary and protected by R -// 4. Balanced PROTECT/UNPROTECT count (1:1) -// -// This should resolve the protection stack imbalance while maintaining the same functionality. - template inline SEXP r_vector::resize_data(SEXP x, bool is_altrep, R_xlen_t size) { underlying_type const* v_x = get_const_p(is_altrep, x); @@ -1374,14 +1366,6 @@ inline SEXP r_vector::resize_data(SEXP x, bool is_altrep, R_xlen_t size) { return out; } -// The key change is removing both the PROTECT and UNPROTECT calls since: -// 1. The `safe` wrapper around `Rf_allocVector` already handles protection -// 2. The function is called from `reserve_data` which handles its own protection -// 3. Having unbalanced PROTECT/UNPROTECT calls was causing the reported error -// -// The protection is handled at the higher level in `reserve_data`, so we don't -// need additional protection in this helper function. - template inline SEXP r_vector::resize_names(SEXP x, R_xlen_t size) { SEXP out = safe[Rf_allocVector](STRSXP, size); @@ -1393,6 +1377,7 @@ inline SEXP r_vector::resize_names(SEXP x, R_xlen_t size) { SET_STRING_ELT(out, i, STRING_ELT(x, i)); } + // Ensure remaining names are initialized to `""` for (R_xlen_t i = copy_size; i < size; ++i) { SET_STRING_ELT(out, i, R_BlankString); } @@ -1400,15 +1385,6 @@ inline SEXP r_vector::resize_names(SEXP x, R_xlen_t size) { return out; } -// The key changes are: -// 0. The `safe` wrapper around `Rf_allocVector` already handles protection -// 1. Removed direct pointer access via STRING_PTR_RO -// 2. Use STRING_ELT instead of direct pointer access -// 3. Ensure PROTECT/UNPROTECT balance by protecting `out` immediately after allocation -// 4. Remove redundant safe wrapper since allocation is already protected -// -// This should resolve the protection stack imbalance while maintaining the same functionality. - } // namespace writable // TODO: is there a better condition we could use, e.g. assert something true From a28892e00129e95e2ec1b66e6bf98d69a4b341f6 Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Fri, 9 May 2025 21:36:54 +0200 Subject: [PATCH 20/21] Add vignettes back. --- vignettes/.gitignore | 4 + vignettes/persistence-class.qmd | 112 ++++++++++ vignettes/references.bib | 21 ++ vignettes/validation-benchmark.qmd | 340 +++++++++++++++++++++++++++++ 4 files changed, 477 insertions(+) create mode 100644 vignettes/.gitignore create mode 100644 vignettes/persistence-class.qmd create mode 100644 vignettes/references.bib create mode 100644 vignettes/validation-benchmark.qmd diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 0000000..ea00d00 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,4 @@ +*.html +*.R +*.log +*_files diff --git a/vignettes/persistence-class.qmd b/vignettes/persistence-class.qmd new file mode 100644 index 0000000..0d08e41 --- /dev/null +++ b/vignettes/persistence-class.qmd @@ -0,0 +1,112 @@ +--- +title: "The persistence class" +vignette: > + %\VignetteIndexEntry{The persistence class} + %\VignetteEngine{quarto::html} + %\VignetteEncoding{UTF-8} +knitr: + opts_chunk: + collapse: true + comment: '#>' +--- + +```{r} +#| label: setup +library(phutil) +``` + +## Structure of the class + +An object of class `persistence` is a list of 2 elements: + +- `pairs`: A list of 2-column matrices containing birth-death pairs. The +$i$-*th* element of the list corresponds to the $(i-1)$-*th* homology +dimension. If there is no pairs for a given dimension but there are pairs in +higher dimensions, the corresponding element(s) is/are filled with a +\eqn{0 \times 2} numeric matrix with 0 rows. + +- `metadata`: A list of length 6 containing information about how the data +was computed: + + - `orderered_pairs`: A boolean indicating whether the pairs in the + `pairs` list are ordered (i.e. the first column is strictly less than the + second column). + - `data`: The name of the object containing the original data on which the + persistence data was computed. + - `engine`: The name of the package and the function of this package that + computed the persistence data in the form + `"package_name::package_function"`. + - `filtration`: The filtration used in the computation in a human-readable + format (i.e. full names, capitals where needed, etc.). + - `parameters`: A list of parameters used in the computation. + - `call`: The exact call that generated the persistence data. + +## Supported inputs + +The `persistence` class is designed to support a variety of inputs, including + +A single numeric matrix + +: If the user provides a matrix, it must have at least 2 columns and each row +represents a topological feature. + +- If it has 2 columns, we assume that the first column corresponds to the birth +of a feature and the second column corresponds to the death of a feature, +irrespective of the order of the columns. In this case, we assume that the +homology dimension of the feature is 0. + +- If it has more than 2 columns, we assume that the first column corresponds to +the homology dimension of the feature, the second column corresponds to the +birth of a feature, and the third column corresponds to the death of a feature, +irrespective of the order of the columns. The remaining columns are ignored. + +A list of numeric matrices + +: If the user provides a list of matrices, each list element corresponds to an +homology dimension, from 0 to some maximum value. Each matrix must have at least +2 columns and each row represents a topological feature in the corresponding +homology dimension (given by the matrix index in the list minus 1). Each matrix +is parsed as described above. + +A dataframe + +: If the user provides an object of class `data.frame`, it must have at least 2 +columns and each row represents a topological feature. If it has exactly 2 +columns, we add a `dimension` column with all values set to 0. If it has more +than 2 columns, we require that `birth` and `death` exist in the column names. +The `birth` and `death` columns are parsed as described above. The remaining +columns are ignored. + +An object of class 'PHom' + +: If the user provides an object of class 'PHom' as typically produced by +`ripserr::vietoris_rips()`, it means that it is a `base::data.frame` with +columns `dimension`, `birth`, and `death` in that specific order. The +`dimension` column is of type integer while the `birth` and `death` columns are +of type numeric. The `dimension` column is used to create a list of matrices, +where each matrix corresponds to an homology dimension, from 0 to the maximum +value in the `dimension` column. + +An object of class 'diagram' + +: If the user provides an object of class 'diagram' as typically produced by +`TDA::*Diag()` functions in entry `diagram`, it means that it is a +`base::matrix` with 3 columns with names `dimension`, `Birth` and `Death` in +that specific order. The `dimension` column is of type integer while the `Birth` +and `Death` columns are of type numeric. Furthermore, the object stores as +attributes the parameters used to compute the diagram and the entire call to the +function that produced the diagram. We first lowercase `Birth` and `Death`. +Next, the `dimension` column is used to create a list of matrices, where each +matrix corresponds to an homology dimension, from 0 to the maximum value in the +`dimension` column. The `birth` and `death` columns are parsed as described +above. The remaining columns are ignored. + +An object of class 'hclust' + +: If the user provides an object of class 'hclust' as typically produced by +`stats::hclust()`, it means that it is a `base::list` which contains the +`height` element which is a set of $n−1$ real values (non-decreasing for +ultrametric trees) storing the clustering height, that is, the value of the +criterion associated with the clustering method for the particular +agglomeration. This is used as homological feature death while a birth of `0` is +typically used. diff --git a/vignettes/references.bib b/vignettes/references.bib new file mode 100644 index 0000000..efc7632 --- /dev/null +++ b/vignettes/references.bib @@ -0,0 +1,21 @@ +@article{cohen2010lipschitz, + title={Lipschitz functions have L p-stable persistence}, + author={Cohen-Steiner, David and Edelsbrunner, Herbert and Harer, John and Mileyko, Yuriy}, + journal={Foundations of computational mathematics}, + volume={10}, + number={2}, + pages={127--139}, + year={2010}, + publisher={Springer} +} + +@article{bubenik2023exact, + title={Exact weights, path metrics, and algebraic Wasserstein distances}, + author={Bubenik, Peter and Scott, Jonathan and Stanley, Donald}, + journal={Journal of Applied and Computational Topology}, + volume={7}, + number={2}, + pages={185--219}, + year={2023}, + publisher={Springer} +} diff --git a/vignettes/validation-benchmark.qmd b/vignettes/validation-benchmark.qmd new file mode 100644 index 0000000..36214fc --- /dev/null +++ b/vignettes/validation-benchmark.qmd @@ -0,0 +1,340 @@ +--- +title: "Validation and Benchmark of Wasserstein Distances" +vignette: > + %\VignetteIndexEntry{Validation and Benchmark of Wasserstein Distances} + %\VignetteEngine{quarto::html} + %\VignetteEncoding{UTF-8} + %\VignetteDepends{TDA,microbenchmark,ggplot2,scales} +knitr: + opts_chunk: + collapse: true + comment: '#>' +bibliography: references.bib +--- + +This vignette introduces the Wasserstein and bottleneck distances between +persistence diagrams and their implementations in {phutil}, adapted from +[Hera](https://github.com/anigmetov/hera), by way of two tasks: + +1. Validate the implementations on an example computed by hand. +2. Benchmark the implementations against those provided by {TDA} (adapted from +Dionysus). + +In addition to {phutil}, we use {ggplot2} to visualize the benchmark results. +We will also access the {tdaunif} package to generate larger point clouds and the {microbenchmark} package to perform benchmark tests. + +```{r} +#| label: setup +library(phutil) +library(ggplot2) +``` + +## Definitions + +_Persistence diagrams_ are multisets (sets with multiplicity) of points in the +plane that encode the interval decompositions of persistent modules obtained +from filtrations of data (e.g. Vietoris--Rips filtrations of point clouds and +cubical filtrations of numerical arrays). +Most applications consider only ordinary persistent homology, so that all points +live in the upper-half plane; and most involve non-negative-valued filtrations, +so that all points live in the first quadrant. The examples in this vignette +will be no exceptions. + +We'll distinguish between persistence diagrams, which encode one +degree of a persistence module, and _persistence data_, which comprises +persistent pairs of many degrees (and annotated as such). Whereas a diagram is +typically represented as a 2-column matrix with columns for birth and death +values, data are typically represented as a 3-column matrix with an additional +column for (whole number) degrees. + +The most common distance metrics between persistence diagrams exploit the family +of _Minkowski distances_ $D_p$ between points in $\mathbb{R}^n$ defined, for $1 +\leq p < \infty$, as follows: + +$$ D_p(x,y) = \left(\sum_{i=1}^{n}{(x_i - y_i)^p}\right)^{1/p}. $$ + +In the limit $p \to \infty$, this expression approaches the following auxiliary +definition: + +$$ D_\infty(x,y) = \max_{i=1}^{n}{\lvert x_i - y_i \rvert}. $$ + +As the parameter $p$ ranges between $1$ and $\infty$, three of its values yield +familiar distance metrics: The taxicab distance $D_1$, the Euclidean distance +$D_2$, and the Chebyshev distance $D_\infty$. + +The [_Kantorovich_ or _Wasserstein metric_](https://en.wikipedia.org/wiki/Wasserstein_metric) derives from the problem of optimal transport: What is the minimum cost of relocating one distribution to another? We restrict ourselves to persistence diagrams with finitely many off-diagonal point masses, though each diagram is taken to include every point on the diagonal. So the cost of relocating one diagram $X$ to another $Y$ amounts to (a) the cost of relocating some off-diagonal points to other off-diagonal points plus (b) the cost of relocating the remaining off-diagonal points to the diagonal, and vice-versa. + +Because the diagonal points are dense, this cost depends entirely on how the off-diagonal points of both diagrams are matched---either to each other or to the diagonal, with each point matched exactly once. +For this purpose, define a _matching_ to be any bijective map $\varphi : X \to Y$, though in practice we assume that almost all diagonal points are matched to themselves and incur no cost. + +The cost $D(x,\varphi(x))$ of relocating a point $x$ to its matched point +$\varphi(x)$ is typically taken to be a Minkowski distance $D_q(x,\varphi(x)) = +\lVert x - \varphi(x) \rVert_q$, defined by the $L^q$ norm on $\mathbb{R}^2$. +(While simple, this geometric treatment elides that the points in the plane +encode the collection of interval modules into which the persistence module +decomposes. Other metrics have been proposed for this space, but we restrict to +this family here.) + +The total cost of the relocation is canonically taken to be the Minkowski +distance $\left( \sum_{x \in X}{D_q(x,\varphi(x))^p} \right)^{1/p}$ of the vector +of matched-point distances. The Wasserstein distance is defined to be the +infimum of this value over all possible matches. +This yields the formulae + +$$ W_p^q(X,Y) = \inf_{\varphi : X \to Y}{\left( \sum_{x \in X}{{\lVert +x-\varphi(x) \rVert_q}^p} \right)^{1/p}}, $$ + +for $p < \infty$ and + +$$ W_\infty^q(X,Y) = \inf_{\varphi : X \to Y}{\max_{x \in X}{\lVert x-\varphi(x) +\rVert_q}} $$ + +for $p = \infty$. + +See @cohen2010lipschitz and @bubenik2023exact for detailed treatments and +stability results on these families of metrics. + +## Validation + +### Distances between nontrivial diagrams + +The following persistence diagrams provide a tractable example: + +$$ X = \left[ \begin{array}{cc} 1 & 3 \\ 3 & 5 \end{array} \right], \phantom{X = +Y} Y = \left[ \begin{array}{cc} 3 & 4 \end{array} \right]. $$ + +For convenience in the code, we omit dimensionality and focus only on the matrix +representations. + +```{r define small PDs} +X <- rbind( + c(1, 3), + c(3, 5) +) +Y <- rbind( + c(3, 4) +) +``` + +We overlay both diagrams in @fig-plot-small. Note that the vector between the +off-diagonal points $(1,3)$ of $X$ and $(3,4)$ of $Y$ is $(2,1)$, while the +vector from $(1,3)$ to its nearest diagonal point $(2,2)$ is $(1,-1)$. That one +coordinate is the same size while the other is smaller implies that an optimal +matching will always match $(1,3)$ with the diagonal, so long as $p \geq 1$. A +similar argument necessitates that $(3,4)$ of $Y$ must match with $(3,5)$ of +$X$. + +```{r} +#| label: fig-plot-small +#| fig-width: 4 +#| fig-height: 4 +#| fig-align: center +#| fig-cap: "Overlaid persistence diagrams $X$ (circles) and $Y$ (diamond) with dashed segments connecting optimally matched pairs." +par(mar = c(4, 4, 1, 1) + .1) +plot( + NA_real_, + xlim = c(0, 6), ylim = c(0, 6), asp = 1, xlab = "birth", ylab = "death" +) +abline(a = 0, b = 1) +points(X, pch = 1) +points(Y, pch = 5) +segments(X[, 1], X[, 2], c(2, Y[, 1]), c(2, Y[, 2]), lty = 2) +par(mar = c(5, 4, 4, 2) + .1) +``` + +Based on these observations, we get this expression for the Wasserstein distance +using the $q$-norm half-plane metric and the $p$-norm "matched space" metric: + +$$ W_p^q(X,Y) = ( {\lVert a \rVert_q}^p + {\lVert b \rVert_q}^p )^{1/p}, $$ + +where $a = (1,-1)$ and $b = (0,-1)$ are the vectors between matched points. We +can now calculate Wasserstein distances "by hand"; we'll consider those using +the half-plane Minkowski metrics with $q=1,2,\infty$ and the "matched space" +metrics with $p=1,2,\infty$. + +First, with $q=1$, we get $\lVert a \rVert_q = 1+1=2$ and $\lVert b \rVert_q = +0+1=1$. So the $(1,p)$-Wasserstein distance will be the $p$-Minkowski norm of +the vector $(2,1)$, given by $W_p^1(X,Y) = (2^p + 1^p)^{1/p}$. This nets us the +values $W_1^1(X,Y) = 3$ and $W_2^1(X,Y) = \sqrt{5}$. And then $W_\infty^1(X,Y) = +\max(2,1) = 2$. The reader is invited to complete the rest of @tbl-small. + +| Metric | $\lVert a \rVert$ | $\lVert b \rVert$ | $W_1$ | $W_2$ | $W_\infty$ | +|:-------|:----------------:|:----------------:|:-------:|:-------:|:------------:| +| $L^1$ | 2 | 1 | 3 | $\sqrt{5}$ | 2 | +| $L^2$ | $\sqrt{2}$ | 1 | $1+\sqrt{2}$ | $\sqrt{3}$ | $\sqrt{2}$ | +| $L^\infty$ | 1 | 1 | 2 | $\sqrt{2}$ | 1 | + +: Distances between optimally paired features and Wasserstein distances between +$X$ and $Y$ for several choices of half-plane and "matched space" metrics. +{#tbl-small} + +The results make intuitive sense; for example, the values change monotonically +along each row and column. +Let us now validate the bottom row---using the $L^\infty$ distance on the +half-plane, giving the popular _bottleneck distance_---using both Hela, as +exposed through {phutil}, and Dionysus, as exposed through {TDA}: + +```{r validate small PDs with Hera} +wasserstein_distance(X, Y, p = 1) +wasserstein_distance(X, Y, p = 2) +bottleneck_distance(X, Y) +``` + +In order to compute distances with {TDA}, we must restructure the PDs to include +a `"dimension"` column. +Note also that `TDA::wasserstein()` does not take the $1/p$th power after +computing the sum of $p$th powers; we do this manually to get comparable +results: + +```{r validate small PDs with Dionysus} +TDA::wasserstein(cbind(0, X), cbind(0, Y), p = 1, dimension = 0) +sqrt(TDA::wasserstein(cbind(0, X), cbind(0, Y), p = 2, dimension = 0)) +TDA::bottleneck(cbind(0, X), cbind(0, Y), dimension = 0) +``` + +### Distances from the trivial diagram + +An important edge case is when one persistence diagram is trivial, i.e. contains +only the diagonal so is "empty" of off-diagonal points. +This can occur unexpectedly in comparisons of persistence data, as the data may +be large but higher-degree features present in one set but absent in another. +To validate the distances in this case, we create an empty diagram $E$ and use +the same code to compare it to $X$. The point $(3,5)$ of $X$ will be matched to +the diagonal $(4,4)$, which yields the same $\infty$-distance $1$ so the +$L^\infty$ Wasserstein distances will be the same as before. + +```{r validate small PD vs empty} +# empty PD +E <- matrix(NA_real_, nrow = 0, ncol = 2) +# with dimension column +E_ <- cbind(matrix(NA_real_, nrow = 0, ncol = 1), E) +# distance from empty using phutil/Hera +wasserstein_distance(E, X, p = 1) +wasserstein_distance(E, X, p = 2) +bottleneck_distance(E, X) +# distance from empty using TDA/Dionysus +TDA::wasserstein(E_, cbind(0, X), p = 1, dimension = 0) +sqrt(TDA::wasserstein(E_, cbind(0, X), p = 2, dimension = 0)) +TDA::bottleneck(E_, cbind(0, X), dimension = 0) +``` + +## Benchmarks + +For a straightforward benchmark test, we compute PDs from point clouds sampled +with noise from two one-dimensional manifolds embedded in $\mathbb{R}^3$: the +circle as a trefoil knot and the segment as a two-armed archimedian spiral. +To prevent the results from being sensitive to an accident of a single sample, +we generate lists of 24 samples and benchmark only one iteration of each +function on each. + +```{r compute large PDs fake, eval=FALSE} +set.seed(28415) +n <- 24 +PDs1 <- lapply(seq(n), function(i) { + S1 <- tdaunif::sample_trefoil(n = 120, sd = .05) + as_persistence(TDA::ripsDiag(S1, maxdimension = 2, maxscale = 6)) +}) +PDs2 <- lapply(seq(n), function(i) { + S2 <- cbind(tdaunif::sample_arch_spiral(n = 120, arms = 2), 0) + S2 <- tdaunif::add_noise(S2, sd = .05) + as_persistence(TDA::ripsDiag(S2, maxdimension = 2, maxscale = 6)) +}) +``` + +```{r compute large PDs true, echo=FALSE} +n <- 24 +PDs1 <- trefoils +PDs2 <- arch_spirals +``` + +Both implementations are used to compute distances between successive pairs of +diagrams. The computations are annotated by homological degree and Wasserstein +power so that these results can be compared separately. + +```{r} +#| label: benchmark phutil and TDA +#| warning: false +PDs1_ <- lapply(lapply(PDs1, as.data.frame), as.matrix) +PDs2_ <- lapply(lapply(PDs2, as.data.frame), as.matrix) +# iterate over homological degrees and Wasserstein powers +bm_all <- list() +PDs_i <- seq_along(PDs1) +for (dimension in seq(0, 2)) { + # compute + bm_1 <- do.call(rbind, lapply(seq_along(PDs1), function(i) { + as.data.frame(microbenchmark::microbenchmark( + TDA = TDA::wasserstein( + PDs1_[[i]], PDs2_[[i]], dimension = dimension, p = 1 + ), + phutil = wasserstein_distance( + PDs1[[i]], PDs2[[i]], dimension = dimension, p = 1 + ), + times = 1, unit = "ns" + )) + })) + bm_2 <- do.call(rbind, lapply(seq_along(PDs1), function(i) { + as.data.frame(microbenchmark::microbenchmark( + TDA = sqrt(TDA::wasserstein( + PDs1_[[i]], PDs2_[[i]], dimension = dimension, p = 2 + )), + phutil = wasserstein_distance( + PDs1[[i]], PDs2[[i]], dimension = dimension, p = 2 + ), + times = 1, unit = "ns" + )) + })) + bm_inf <- do.call(rbind, lapply(seq_along(PDs1), function(i) { + as.data.frame(microbenchmark::microbenchmark( + TDA = TDA::bottleneck( + PDs1_[[i]], PDs2_[[i]], dimension = dimension + ), + phutil = bottleneck_distance( + PDs1[[i]], PDs2[[i]], dimension = dimension + ), + times = 1, unit = "ns" + )) + })) + # annotate and combine + bm_1$power <- 1; bm_2$power <- 2; bm_inf$power <- Inf + bm_res <- rbind(bm_1, bm_2, bm_inf) + bm_res$degree <- dimension + bm_all <- c(bm_all, list(bm_res)) +} +bm_all <- do.call(rbind, bm_all) +``` + +@fig-benchmark-large compares the distributions of runtimes by homological +degree (column) and Wasserstein power (row). We use nanoseconds in +{microbenchmark} when benchmarking to avoid potential integer overflows. Hence, +we convert the results into seconds ahead of formatting the axis in seconds. + +```{r} +#| label: fig-benchmark-large +#| fig-width: 8 +#| fig-height: 3 +#| fig-align: 'center' +#| fig-retina: 2 +#| fig-cap: "Benchmark comparison of Dionysus via {TDA} and Hera via {phutil} on +#| large persistence diagrams: Violin plots of runtime distributions on a common +#| scale." +bm_all <- transform(bm_all, expr = as.character(expr), time = unlist(time)) +bm_all <- subset(bm_all, select = c(expr, degree, power, time)) +ggplot(bm_all, aes(x = time * 10e-9, y = expr)) + + facet_grid( + rows = vars(power), cols = vars(degree), + labeller = label_both + ) + + geom_violin() + + scale_x_continuous( + transform = "log10", + labels = scales::label_timespan(units = "secs") + ) + + labs(x = NULL, y = NULL) +``` + +We note that Dionysus via {TDA} clearly outperforms Hera via {phutil} on degree-1 PDs, which in these cases have many fewer features. +However, the tables are turned in degree 0, in which the PDs have many more features---which, when present, dominate the total computational cost. +(The implementations are more evenly matched on the degree-2 PDs, which may have to do with many of them being empty.) +While by no means exhaustive and not necessarily representative, these results suggest that Hera via {phutil} scales more efficiently than Dionysus via {TDA} and should therefore be preferred for projects involving more feature-rich data sets. + +## References From b0f46f726f4889462a544904e08862c9d3d43b2d Mon Sep 17 00:00:00 2001 From: Aymeric Stamm Date: Fri, 9 May 2025 21:41:12 +0200 Subject: [PATCH 21/21] Add version info since vendoring cpp11 for now. --- inst/include/cpp11/protect.hpp | 3 +++ inst/include/cpp11/r_vector.hpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/inst/include/cpp11/protect.hpp b/inst/include/cpp11/protect.hpp index f51eb9e..cb1a5d8 100644 --- a/inst/include/cpp11/protect.hpp +++ b/inst/include/cpp11/protect.hpp @@ -1,3 +1,6 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +// modified on: 2025-05-09 #pragma once #include // for longjmp, setjmp, jmp_buf diff --git a/inst/include/cpp11/r_vector.hpp b/inst/include/cpp11/r_vector.hpp index b54c591..67d5aad 100644 --- a/inst/include/cpp11/r_vector.hpp +++ b/inst/include/cpp11/r_vector.hpp @@ -1,3 +1,6 @@ +// cpp11 version: 0.5.2 +// vendored on: 2025-05-07 +// modified on: 2025-05-09 #pragma once #include // for ptrdiff_t, size_t