diff --git a/NAMESPACE b/NAMESPACE index 79cf8be..7ceaf9c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ S3method(get_analytical_columns,ASTR) S3method(get_concentration_columns,ASTR) S3method(get_contextual_columns,ASTR) S3method(get_element_columns,ASTR) +S3method(get_error_columns,ASTR) S3method(get_isotope_columns,ASTR) S3method(get_ratio_columns,ASTR) S3method(get_unit_columns,ASTR) @@ -15,6 +16,7 @@ S3method(print,ASTR) S3method(remove_units,ASTR) S3method(validate,ASTR) S3method(validate,default) +export(abs_to_rel) export(albarede_juteau_1984) export(as_ASTR) export(at_to_wt) @@ -30,6 +32,7 @@ export(get_analytical_columns) export(get_concentration_columns) export(get_contextual_columns) export(get_element_columns) +export(get_error_columns) export(get_isotope_columns) export(get_ratio_columns) export(get_unit_columns) @@ -37,6 +40,7 @@ export(oxide_to_element) export(pb_iso_age_model) export(pointcloud_distribution) export(read_ASTR) +export(rel_to_abs) export(remove_units) export(stacey_kramers_1975) export(unify_concentration_unit) diff --git a/NEWS.md b/NEWS.md index f279ee8..dea1ba0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,3 +2,4 @@ * Implementation of ASTR schema. * Support and conversions for geochemical non-SI units: *at%*, *wt%* (elements and oxides) +* Support and conversions for relative and absolute analytical precisions. diff --git a/R/ASTR_basic.R b/R/ASTR_basic.R index 1d78077..b894e40 100644 --- a/R/ASTR_basic.R +++ b/R/ASTR_basic.R @@ -174,9 +174,11 @@ as_ASTR <- function( df4 <- remove_unit_substrings(df3) # turn into tibble-derived object df5 <- tibble::new_tibble(df4, nrow = nrow(df4), class = "ASTR") + # convert relative to absolute errors + df6 <- rel_to_abs(df5) # post-reading validation if (validate) { - validation_output <- validate(df5, quiet = FALSE) + validation_output <- validate(df6, quiet = FALSE) if (nrow(validation_output) > 0) { warning( "See the full list of validation output with: ", @@ -184,7 +186,7 @@ as_ASTR <- function( ) } } - return(df5) + return(df6) } # helper function to rename column names diff --git a/R/ASTR_colname_parser.R b/R/ASTR_colname_parser.R index 0c9a1c6..506839a 100644 --- a/R/ASTR_colname_parser.R +++ b/R/ASTR_colname_parser.R @@ -282,7 +282,7 @@ is_err_percent <- function(colname) { grepl(err_percent(), colname, perl = TRUE) } is_err_abs <- function(colname) { - grepl(err_abs(), colname, perl = TRUE) + grepl(paste0("(", err_abs(), ")(?!%)"), colname, perl = TRUE) } is_isotope_ratio <- function(colname) { grepl(isotope_ratio(), colname, perl = TRUE) @@ -351,6 +351,8 @@ err_percent <- function() { } err_2sd_percent <- function() "\\_err2SD%" err_sd_percent <- function() "\\_errSD%" +err_2se_percent <- function() "\\_err2SE%" +err_se_percent <- function() "\\_errSE%" err_abs <- function() { paste0(c( diff --git a/R/ASTR_column_select.R b/R/ASTR_column_select.R index f9aac12..68180a1 100644 --- a/R/ASTR_column_select.R +++ b/R/ASTR_column_select.R @@ -80,6 +80,16 @@ get_concentration_columns.ASTR <- function(x, ...) { get_cols_with_ac_class(x, c("ASTR_id", "ASTR_concentration")) } +#' @rdname ASTR +#' @export +get_error_columns <- function(x, ...) { + UseMethod("get_error_columns") +} +#' @export +get_error_columns.ASTR <- function(x, ...) { + get_cols_with_ac_class(x, c("ASTR_id", "ASTR_error")) +} + get_cols_with_unit <- function(x, units) { units <- sapply(units, function(unit) transform_notation(unit)) diff --git a/R/ASTR_conversion_error.R b/R/ASTR_conversion_error.R new file mode 100644 index 0000000..fc8b3f6 --- /dev/null +++ b/R/ASTR_conversion_error.R @@ -0,0 +1,100 @@ +#' Convert between relative and absolute analytical uncertainties +#' +#' Convert relative to absolute analytical uncertainties and vice versa in ASTR +#' objects. Work only for objects of class `ASTR`. +#' +#' @param df An ASTR object +#' @return An ASTR object with converted analytical precision columns. The +#' unchanged input, if it does not contain columns of the respective +#' analytical precision type +#' +#' +#' @name error_conversion +#' @export +#' +#' @examples +#' test_file <- system.file("extdata", "test_data_input_good.csv", package = "ASTR") +#' arch <- read_ASTR(test_file, id_column = "Sample", context = 1:7) +#' +#' arch2 <- abs_to_rel(arch) +#' +#' arch3 <- rel_to_abs(arch2) +#' +#' # Conversion is lossless +#' all.equal(arch$`SiO2_errSD`, arch3$`SiO2_errSD`) +#' +rel_to_abs <- function(df) { + + # Basic checks + checkmate::assert_class(df, "ASTR") + + # Find all error columns + error_cols <- colnames(df)[is_err_percent(colnames(df))] + + if (length(error_cols) == 0) { + return(df) + } + + df_old <- df + + # Process all error columns + for (err_col in error_cols) { + base_name <- remove_suffix(err_col) + + if (base_name %in% names(df)) { + # absolute = relative * measured_value + df[[err_col]] <- df[[err_col]] * df[[base_name]] / ifelse(inherits(df[[base_name]], "units"), 1, 100) + + # Set units to match the concentration column + if (inherits(df[[base_name]], "units")) { + units(df[[err_col]]) <- units(df[[base_name]]) + } else { + units(df[[err_col]]) <- NULL + } + } + } + + # assign ASTR class + df <- preserve_ASTR_attrs(df, df_old) + + # rename column names + colnames(df)[colnames(df) %in% error_cols] <- gsub("%", "", error_cols) + + return(df) +} + +#' @rdname error_conversion +#' @export +abs_to_rel <- function(df) { + + # Basic checks + checkmate::assert_class(df, "ASTR") + + # Find all error columns + error_cols <- colnames(df)[is_err_abs(colnames(df))] + + if (length(error_cols) == 0) { + return(df) + } + + df_old <- df + + # Process all error columns + for (err_col in error_cols) { + base_name <- remove_suffix(err_col) + + # relative = (absolute / measured_value) + df[[err_col]] <- df[[err_col]] / df[[base_name]] * ifelse(inherits(df[[base_name]], "units"), 1, 100) + + # Set units to percent + units(df[[err_col]]) <- units::as_units("%") + } + + # assign ASTR class + df <- preserve_ASTR_attrs(df, df_old) + + # rename columns + colnames(df)[colnames(df) %in% error_cols] <- paste0(error_cols, "%") + + return(df) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index f99197d..9ffedbd 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -23,6 +23,7 @@ reference: Functions for creating and handling ASTR objects. contents: - ASTR + - error_conversion - title: Data subsetting desc: > Pre-compiled lists for easier subsetting of data diff --git a/man/ASTR.Rd b/man/ASTR.Rd index 768494f..91da787 100644 --- a/man/ASTR.Rd +++ b/man/ASTR.Rd @@ -14,6 +14,7 @@ \alias{get_element_columns} \alias{get_ratio_columns} \alias{get_concentration_columns} +\alias{get_error_columns} \alias{get_unit_columns} \alias{remove_units} \alias{unify_concentration_unit} @@ -66,6 +67,8 @@ get_ratio_columns(x, ...) get_concentration_columns(x, ...) +get_error_columns(x, ...) + get_unit_columns(x, units, ...) remove_units(x, ...) diff --git a/man/error_conversion.Rd b/man/error_conversion.Rd new file mode 100644 index 0000000..6f44435 --- /dev/null +++ b/man/error_conversion.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ASTR_conversion_error.R +\name{error_conversion} +\alias{error_conversion} +\alias{rel_to_abs} +\alias{abs_to_rel} +\title{Convert between relative and absolute analytical uncertainties} +\usage{ +rel_to_abs(df) + +abs_to_rel(df) +} +\arguments{ +\item{df}{An ASTR object} +} +\value{ +An ASTR object with converted analytical precision columns. The +unchanged input, if it does not contain columns of the respective +analytical precision type +} +\description{ +Convert relative to absolute analytical uncertainties and vice versa in ASTR +objects. Work only for objects of class \code{ASTR}. +} +\examples{ +test_file <- system.file("extdata", "test_data_input_good.csv", package = "ASTR") +arch <- read_ASTR(test_file, id_column = "Sample", context = 1:7) + +arch2 <- abs_to_rel(arch) + +arch3 <- rel_to_abs(arch2) + +# Conversion is lossless +all.equal(arch$`SiO2_errSD`, arch3$`SiO2_errSD`) + +} diff --git a/tests/testthat/_snaps/ASTR_basic.md b/tests/testthat/_snaps/ASTR_basic.md index e2aaf91..cd73e79 100644 --- a/tests/testthat/_snaps/ASTR_basic.md +++ b/tests/testthat/_snaps/ASTR_basic.md @@ -45,21 +45,21 @@ 12 ICP-MS 0.512991 2.04 0.8000 4.5 [wtP] 0.070 [wtP] 1.56 [wtP] 13 ICP-MS NA -0.24 -0.0504 3.2 [wtP] 0.040 [wtP] 3.64 [wtP] 14 ICP-MS NA -0.42 -0.0420 3.7 [wtP] 0.060 [wtP] 7.51 [wtP] - MgO Al2O3 SiO2 SiO2_errSD% P2O5 S - 1 0.77 [wtP] 3.92 [wtP] 31.63 [wtP] 4.30 [%] 0.15 [wtP] 2.57 [atP] - 2 0.55 [wtP] 3.46 [wtP] 29.06 [wtP] 2.88 [%] NA [wtP] NA [atP] - 3 0.33 [wtP] 5.87 [wtP] 30.50 [wtP] 5.71 [%] 0.11 [wtP] 3.68 [atP] - 4 0.76 [wtP] 4.33 [wtP] 25.73 [wtP] 2.22 [%] 0.23 [wtP] 2.76 [atP] - 5 0.52 [wtP] 4.17 [wtP] 43.50 [wtP] 4.65 [%] 0.40 [wtP] 0.57 [atP] - 6 0.58 [wtP] 3.58 [wtP] 33.83 [wtP] 3.67 [%] 0.29 [wtP] 0.61 [atP] - 7 0.64 [wtP] 5.60 [wtP] 33.81 [wtP] 3.00 [%] 0.43 [wtP] 0.77 [atP] - 8 0.59 [wtP] 4.47 [wtP] 26.39 [wtP] 2.87 [%] 0.62 [wtP] 3.04 [atP] - 9 0.53 [wtP] 3.73 [wtP] 21.18 [wtP] 3.44 [%] 0.24 [wtP] 3.93 [atP] - 10 0.70 [wtP] 6.27 [wtP] 31.73 [wtP] 6.12 [%] 0.28 [wtP] 0.21 [atP] - 11 0.46 [wtP] 2.91 [wtP] 16.91 [wtP] 3.73 [%] 0.22 [wtP] 3.57 [atP] - 12 0.58 [wtP] 3.60 [wtP] 28.49 [wtP] 4.89 [%] 0.19 [wtP] 1.95 [atP] - 13 0.88 [wtP] 5.24 [wtP] 38.04 [wtP] 10.12 [%] 0.30 [wtP] 1.35 [atP] - 14 0.84 [wtP] 3.97 [wtP] 31.67 [wtP] 4.30 [%] 0.18 [wtP] 0.83 [atP] + MgO Al2O3 SiO2 SiO2_errSD P2O5 S + 1 0.77 [wtP] 3.92 [wtP] 31.63 [wtP] 1.360090 [wtP] 0.15 [wtP] 2.57 [atP] + 2 0.55 [wtP] 3.46 [wtP] 29.06 [wtP] 0.836928 [wtP] NA [wtP] NA [atP] + 3 0.33 [wtP] 5.87 [wtP] 30.50 [wtP] 1.741550 [wtP] 0.11 [wtP] 3.68 [atP] + 4 0.76 [wtP] 4.33 [wtP] 25.73 [wtP] 0.571206 [wtP] 0.23 [wtP] 2.76 [atP] + 5 0.52 [wtP] 4.17 [wtP] 43.50 [wtP] 2.022750 [wtP] 0.40 [wtP] 0.57 [atP] + 6 0.58 [wtP] 3.58 [wtP] 33.83 [wtP] 1.241561 [wtP] 0.29 [wtP] 0.61 [atP] + 7 0.64 [wtP] 5.60 [wtP] 33.81 [wtP] 1.014300 [wtP] 0.43 [wtP] 0.77 [atP] + 8 0.59 [wtP] 4.47 [wtP] 26.39 [wtP] 0.757393 [wtP] 0.62 [wtP] 3.04 [atP] + 9 0.53 [wtP] 3.73 [wtP] 21.18 [wtP] 0.728592 [wtP] 0.24 [wtP] 3.93 [atP] + 10 0.70 [wtP] 6.27 [wtP] 31.73 [wtP] 1.941876 [wtP] 0.28 [wtP] 0.21 [atP] + 11 0.46 [wtP] 2.91 [wtP] 16.91 [wtP] 0.630743 [wtP] 0.22 [wtP] 3.57 [atP] + 12 0.58 [wtP] 3.60 [wtP] 28.49 [wtP] 1.393161 [wtP] 0.19 [wtP] 1.95 [atP] + 13 0.88 [wtP] 5.24 [wtP] 38.04 [wtP] 3.849648 [wtP] 0.30 [wtP] 1.35 [atP] + 14 0.84 [wtP] 3.97 [wtP] 31.67 [wtP] 1.361810 [wtP] 0.18 [wtP] 0.83 [atP] CaO TiO2 MnO FeOtot FeOtot_err2SD ZnO 1 2.11 [wtP] 0.52 [wtP] 0.20 [wtP] 43.83 [wtP] 4.120 [wtP] 5.64 [%] 2 2.34 [wtP] 0.49 [wtP] 0.54 [wtP] 51.02 [wtP] 3.890 [wtP] 4.07 [%] diff --git a/tests/testthat/_snaps/goem_sk_lines/image.svg b/tests/testthat/_snaps/geom_sk_lines/image.svg similarity index 100% rename from tests/testthat/_snaps/goem_sk_lines/image.svg rename to tests/testthat/_snaps/geom_sk_lines/image.svg diff --git a/tests/testthat/test_ASTR_column_selection.R b/tests/testthat/test_ASTR_column_selection.R index 860bfd5..764a43d 100644 --- a/tests/testthat/test_ASTR_column_selection.R +++ b/tests/testthat/test_ASTR_column_selection.R @@ -32,8 +32,14 @@ test_that("column selection based on ASTR column types", { "U", "V", "Cr", "Co", "Ni", "Sr", "Se" ) ) + expect_all_true( + colnames(get_error_columns(test_input)) == + c("ID", "d65Cu_err2SD", "SiO2_errSD", "FeOtot_err2SD", "206Pb/204Pb_err2SD", + "207Pb/204Pb_err2SD", "208Pb/204Pb_err2SD", "207Pb/206Pb_err2SD", "208Pb/206Pb_err2SD" + ) + ) expect_all_true( colnames(get_unit_columns(test_input, c("ng/g", "µg/ml", "%"))) == - c("ID", "SiO2_errSD%", "ZnO", "Ag", "Sn") + c("ID", "ZnO", "Ag", "Sn") ) }) diff --git a/tests/testthat/test_conversion_error.R b/tests/testthat/test_conversion_error.R new file mode 100644 index 0000000..4ec8a4b --- /dev/null +++ b/tests/testthat/test_conversion_error.R @@ -0,0 +1,77 @@ +test_input <- suppressWarnings( + read_ASTR( + system.file("extdata", "test_data_input_good.csv", package = "ASTR"), + id_column = "Sample", + context = c("Lab no.", "Site", "latitude", "longitude", "Type", "method_comp") + ) +) + +# --- abs_to_rel --- + +result <- abs_to_rel(test_input) + +test_that("abs_to_rel: numeric conversion is correct", { + expect_equal(as.numeric(result[["d65Cu_err2SD%"]]), + as.numeric(test_input[["d65Cu_err2SD"]] / test_input[["d65Cu"]] * 100), tolerance = 1e-6) +}) + +test_that("abs_to_rel: error col units are percent after conversion", { + expect_equal(as.character(units(result[["SiO2_errSD%"]])), "%") +}) + +test_that("abs_to_rel: input returned unchanged if no absolute error cols present", { + df_no_err <- test_input[, !is_err_abs(colnames(test_input))] + expect_identical(abs_to_rel(df_no_err), df_no_err) +}) + +test_that("abs_to_rel: errors on non-ASTR input", { + expect_error(abs_to_rel(data.frame(x = 1))) +}) + +# --- rel_to_abs --- + +result2 <- rel_to_abs(result) + +test_that("rel_to_abs: numeric conversion is correct", { + expect_equal(as.numeric(result2[["SiO2_errSD"]]), + as.numeric(result[["SiO2_errSD%"]] * result[["SiO2"]] / 100), tolerance = 1e-6) +}) + +test_that("rel_to_abs: column renamed to absolute error", { + expect_all_false(grepl("_err.{3}%", colnames(result2), perl = TRUE)) +}) + +test_that("rel_to_abs: error col units match base col after conversion", { + expect_equal(units(result2[["SiO2_errSD"]]), + units(result2[["SiO2"]])) +}) + +test_that("rel_to_abs: base col and class unchanged", { + expect_equal(result2[["SiO2"]], result[["SiO2"]]) + expect_s3_class(result2, "ASTR") +}) + +test_that("rel_to_abs: input returned unchanged if no relative error cols present", { + df_no_err <- test_input[, !is_err_percent(colnames(test_input))] + expect_identical(rel_to_abs(df_no_err), df_no_err) +}) + +# test not valid because column does no include NAs +#test_that("rel_to_abs: NA in base col produces NA in error col, no crash", { +# na_rows <- is.na(as.numeric(test_input[["SiO2"]])) +# expect_true(all(is.na(as.numeric(result[["SiO2_errSD"]])[na_rows]))) +#}) + +test_that("rel_to_abs: errors on non-ASTR input", { + expect_error(rel_to_abs(data.frame(x = 1))) +}) + +# --- conversion is reversible --- + +error_reversed <- rel_to_abs(abs_to_rel(test_input)) + +test_that("error conversion is reversible and unitless values handled correctly", { + expect_equal(test_input[["SiO_err2SD"]], error_reversed[["SiO2_err2SD"]]) + expect_equal(test_input[["d65Cu_err2SD"]], error_reversed[["d65Cu_err2SD"]]) + expect_equal(test_input[["206Pb/204Pb_err2SD"]], error_reversed[["206Pb/204Pb_err2SD"]]) +}) diff --git a/tests/testthat/test_goem_sk_lines.R b/tests/testthat/test_geom_sk_lines.R similarity index 100% rename from tests/testthat/test_goem_sk_lines.R rename to tests/testthat/test_geom_sk_lines.R diff --git a/vignettes/ASTR.showcase.Rmd b/vignettes/ASTR.showcase.Rmd index 511eb46..c0429f9 100644 --- a/vignettes/ASTR.showcase.Rmd +++ b/vignettes/ASTR.showcase.Rmd @@ -23,6 +23,7 @@ set.seed(123) data <- data.frame( "Group" = c(rep("A", 5), rep("B", 5)), "Cu_wt%" = round(runif(10, 85, 95), 1), + "Cu_err2SD%" = round(runif(10, 0.2, 0.3), 2), "Sn_wt%" = round(runif(10, 5, 10), 1), "As_wt%" = round(runif(10, 1.5, 2.5), 2), "Sb_wt%" = round(runif(10, 0.5, 1), 2), @@ -58,7 +59,7 @@ Note how the column headers are organised according to the ASTR conventions. Thi We can now perform the reading process. As we are working with mock-up data in an R vignette, we do not read from the file system. We only need to call an essential internal function of `read_ASTR()`: `as_ASTR()`. It turns R data.frames to `ASTR` objects. If we would start from an Excel file we would call `read_ASTR()` instead. -To do this in practice we not only have to submit our `data` to `as_ASTR()`, but also set two other arguments: 1. We have to define one of the columns as the ID column with `id_column`, and 2. we explicitly have to mark any column providing contextual information with `context` (i.e. no analytical values, cf. [ASTR schema: Implementation](VG.ASTR.Schema.Implementation.html)). +To do this in practice, we not only have to submit our `data` to `as_ASTR()`, but also set two other arguments: 1. We have to define one of the columns as the ID column with `id_column`, and 2. we explicitly have to mark any column providing contextual information with `context` (i.e. no analytical values, cf. [ASTR schema: Implementation](VG.ASTR.Schema.Implementation.html)). ```{r echo=TRUE} data <- as_ASTR(data, id_column = "Group", context = c("Group")) @@ -80,13 +81,13 @@ This summary of where to find the missing values comes in handy for large datase In our case, the dataset is fine. We might want to check on the missing values in the copper isotope data because apparently there was something wrong with the Excel file. In case of the lead isotope data, however, they are intentional (they were not measured for samples from group A). -Having a closer look on the imported data set shows, that ASTR did one more thing: +Having a closer look on the imported data set shows, that ASTR did two more things: ```{r echo=TRUE} data ``` -Our relative unit "ppm" was converted into SI-units and the values now have a measurement unit R can understand (note that the unit `wt%` is written as `wtP`)^[The %-sign can not be used in custom units]: +The relative analytical precisions of the copper concentrations were automatically converted to absolute analytical precisions, facilitating, e.g., calculations and plotting. And our relative unit "ppm" was converted into SI-units; the values now have a measurement unit R can understand (note that the unit `wt%` is written as `wtP`)^[The %-sign can not be used in custom units]: ``` {r} units(data$Ag[1]) diff --git a/vignettes/VG.ASTR.Schema.Implementation.Rmd b/vignettes/VG.ASTR.Schema.Implementation.Rmd index 8804042..2fd25f0 100644 --- a/vignettes/VG.ASTR.Schema.Implementation.Rmd +++ b/vignettes/VG.ASTR.Schema.Implementation.Rmd @@ -43,8 +43,10 @@ This vignette outlines how the conventions defined in [the ASTR schema](VG.ASTRs * One column must be specified as `ID` column during import to provide a unique identifier for each line in the dataset. To ensure uniqueness of its values, `_1`, `_2`, ... `_n` will be added to non-unique values. The original column is preserved. * Columns without a column header will be removed during import. * The following notations will be automatically identified and replaced with `NA`, unless you explicitly define other values in `read_ASTR()`: `NA`, `N.A.`, `N/A`, `na`, `n/a`, `-`, and `n.d.`. Values containing common excel error messages (*#DIV/0!*, *#VALUE!*, *#REF!*, *#NAME?*, *#NUM!*, *#N/A*, and *#NULL!*) are also replaced with `NA` by default. +* Relative analytical precisions will be converted into absolute analytical precisions. If relative analytical precisions are required, conversion is possible with `abs_to_rel()`. This avoids potential conflicts in handling element concentrations in per cent. * Units will be removed from column names because {units} stores them in the column attributes. This allows for clean column names and therefore of e.g. axis labels in plots. They can be "shifted" from the column attributes back to the column names by `remove_units()` with `recover_unit_names = TRUE`. + # Units * The package relies on `{units}`, which uses the [udunits](https://www.unidata.ucar.edu/software/udunits) C library, for handling all SI units (e.g. *µg/ml*) and relative concentration units (e.g. *ppm(m/V)*). If the mixture type is not specified, *m/m* is assumed. Non-SI units *wt%* and *at%* were defined as `wtP` and `atP`, respectively.