From 8a1c6275ac52a2a77eb48ad4c918663428fbae6f Mon Sep 17 00:00:00 2001 From: "J. Allen Baron" Date: Thu, 12 Dec 2024 17:34:57 -0500 Subject: [PATCH 1/4] Create extract_subclass() and deprecate extract_subtree() Tests: NEEDED --- NAMESPACE | 1 + R/extract.R | 74 +++++++++++++++- R/extract_helpers.R | 24 ++++++ inst/sparql/template/set.rq | 17 ++++ inst/sparql/template/set_ancestors.rq | 17 ++++ inst/sparql/template/set_descendants.rq | 18 ++++ .../sparql/template/set_to_common_ancestor.rq | 33 ++++++++ man/extract_subclass.Rd | 84 +++++++++++++++++++ man/extract_subtree.Rd | 7 +- 9 files changed, 269 insertions(+), 6 deletions(-) create mode 100644 inst/sparql/template/set.rq create mode 100644 inst/sparql/template/set_ancestors.rq create mode 100644 inst/sparql/template/set_descendants.rq create mode 100644 inst/sparql/template/set_to_common_ancestor.rq create mode 100644 man/extract_subclass.Rd diff --git a/NAMESPACE b/NAMESPACE index 07e79589..73ab0e3e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -64,6 +64,7 @@ export(extract_eq_axiom) export(extract_ordo_mappings) export(extract_pm_date) export(extract_pmid) +export(extract_subclass) export(extract_subclass_axiom) export(extract_subtree) export(format_axiom) diff --git a/R/extract.R b/R/extract.R index f8915a58..bd71f07a 100644 --- a/R/extract.R +++ b/R/extract.R @@ -283,7 +283,70 @@ extract_doid_url <- function(doid_edit, include_obsolete = FALSE, } -#' Extract Subtree +#' Extract OBO Classes with Parents +#' +#' Extracts the classes and their `rdfs:subClassOf` _asserted_ parents from an +#' OBO ontology. +#' +#' @inheritParams robot_query +#' @param class One or more valid OBO classes (see [is_valid_obo()] for +#' valid input formats). Accepted values may depend on `method`. +#' @param method The method to use when extracting classes. Options include: +#' +#' * `"self"`: Extract the class(es) only. +#' +#' * `"parents"`: Extract the class(es) and _asserted_ parent(s). +#' +#' * `"descendants"` (default): Extract the class(es) and all descendants. +#' +#' * `"ancestors"`: Extract the class(es) and all ancestors. +#' +#' * `"common_ancestor"`: Extract the class(es) and all ancestors up to a common +#' ancestor. The common ancestor could be in the set. +#' +#' @examples +#' \dontrun{ +#' # Extract all descendants of 'diabetes mellitus' +#' doid_path <- "" +#' extract_subclass(doid_path, "DOID:9351") +#' +#' # Extract all ancestors of 'diabetes mellitus' +#' extract_subclass(doid_path, "DOID:9351", method = "ancestors") +#' +#' # Extract all classes of appendicitis (DOID:8337), alcoholic gastritis +#' (DOID:8680), and viral esophagitis (DOID:6297) up to a common ancestor +#' extract_subclass( +#' doid_path, +#' c("DOID:8337", "DOID:8680", "DOID:6297"), +#' method = "common_ancestor" +#' ) +#' } +#' +#' @returns +#' A [tibble][tibble::tibble] with the columns: `id`, `label`, `parent_id`, +#' and `parent_label`. +#' +#' @seealso [format_subtree()] to arrange data in a tree structure similar to +#' ontology browsers. +#' +#' @export +extract_subclass <- function(input, class, method = "descendants", + .robot_path = NULL, + tidy_what = c("header", "uri_to_curie")) { + stopifnot("`class` must be a valid OBO identifier" = is_valid_obo(class)) + query <- prep_extract_query(class, method) + out <- robot_query( + input, + query, + .robot_path = .robot_path, + tidy_what = tidy_what + ) + + out +} + + +#' Extract Subtree (DEPRECATED) #' #' Extracts the classes and parents of a DO subtree from a `pyDOID.owl.xml` #' object. @@ -299,10 +362,15 @@ extract_doid_url <- function(doid_edit, include_obsolete = FALSE, #' and `parent_label`, with one row for each unique combination for each #' subclass below and including `top_node`. #' -#' @seealso [format_subtree()] to arrange data in a tree structure similar to -#' ontology browsers. +#' @seealso [extract_subclass(method = "descendants")][extract_subclass] for the +#' same result with a single `class`. [format_subtree()] to arrange data in a +#' tree structure similar to ontology browsers. +#' #' @export extract_subtree <- function(x, top_node, reload = FALSE) { + rlang::warn( + "`extract_subtree()` is deprecated. Use `extract_subclass()` instead." + ) owl <- access_owl_xml(x) assert_string(top_node) diff --git a/R/extract_helpers.R b/R/extract_helpers.R index a6896e4b..5a37369b 100644 --- a/R/extract_helpers.R +++ b/R/extract_helpers.R @@ -21,3 +21,27 @@ subtree_query_glue <- ' rdfs:label ?parent_label . FILTER(!isblank(?parent)) }}' + + +# extract_subclass() helpers ------------------------------------------------- + +prep_extract_query <- function(class, method) { + opts <- c("self", "parents", "descendants", "ancestors", "common_ancestor") + if (!method %in% opts) { + rlang::abort("`method` must be one of: ", paste(opts, collapse = ", ")) + } + q_dir <- system.file("sparql", package = "DO.utils") + q_path <- switch( + self = file.path(q_dir, "set.rq"), + descendants = file.path(q_dir, "set_descendants.rq"), + ancestors = file.path(q_dir, "set_ancestors.rq"), + common_ancestor = file.path(q_dir, "set_to_common_ancestor.rq") + ) + + set <- paste0(class, collapse = " ") + # only for common_ancestor + set_values <- set <- paste0(class, collapse = ", ") + query <- glueV(readr::read_file(q_path), set = set, set_values = set_values) + + query +} diff --git a/inst/sparql/template/set.rq b/inst/sparql/template/set.rq new file mode 100644 index 00000000..36896aa0 --- /dev/null +++ b/inst/sparql/template/set.rq @@ -0,0 +1,17 @@ +# Return class & parent IDs and labels for all classes in a set +PREFIX rdfs: +PREFIX oboInOwl: +PREFIX DOID: + +SELECT ?id ?label ?parent_id ?parent_label +WHERE { + VALUES ?set { !<>! } + + ?iri rdfs:subClassOf* ?set ; + oboInOwl:id ?id ; + rdfs:label ?label ; + rdfs:subClassOf ?parent_iri . + + ?parent_iri oboInOwl:id ?parent_id ; + rdfs:label ?parent_label . +} \ No newline at end of file diff --git a/inst/sparql/template/set_ancestors.rq b/inst/sparql/template/set_ancestors.rq new file mode 100644 index 00000000..d9850dd0 --- /dev/null +++ b/inst/sparql/template/set_ancestors.rq @@ -0,0 +1,17 @@ +# Return class & parent IDs and labels for all ancestors of a set of classes +PREFIX rdfs: +PREFIX oboInOwl: +PREFIX DOID: + +SELECT ?id ?label ?parent_id ?parent_label +WHERE { + VALUES ?set { !<>! } + + ?iri ^rdfs:subClassOf* ?set ; + oboInOwl:id ?id ; + rdfs:label ?label ; + rdfs:subClassOf ?parent_iri . + + ?parent_iri oboInOwl:id ?parent_id ; + rdfs:label ?parent_label . +} \ No newline at end of file diff --git a/inst/sparql/template/set_descendants.rq b/inst/sparql/template/set_descendants.rq new file mode 100644 index 00000000..2aa346ab --- /dev/null +++ b/inst/sparql/template/set_descendants.rq @@ -0,0 +1,18 @@ +# Return class & parent IDs and labels for all descendants of a set of classes +# Essentially a multi-subtree query +PREFIX rdfs: +PREFIX oboInOwl: +PREFIX DOID: + +SELECT ?id ?label ?parent_id ?parent_label +WHERE { + VALUES ?set { !<>! } + + ?iri rdfs:subClassOf* ?set ; + oboInOwl:id ?id ; + rdfs:label ?label ; + rdfs:subClassOf ?parent_iri . + + ?parent_iri oboInOwl:id ?parent_id ; + rdfs:label ?parent_label . +} \ No newline at end of file diff --git a/inst/sparql/template/set_to_common_ancestor.rq b/inst/sparql/template/set_to_common_ancestor.rq new file mode 100644 index 00000000..df80a62e --- /dev/null +++ b/inst/sparql/template/set_to_common_ancestor.rq @@ -0,0 +1,33 @@ +# Return class & parent IDs and labels for all classes between a set of classes +# and their nearest common ancestor, inclusive +PREFIX rdfs: +PREFIX oboInOwl: +PREFIX DOID: + +SELECT ?id ?label ?parent_id ?parent_label +WHERE { + { + # find the nearest common ancestor + SELECT ?ancestor_iri (COUNT(?mid) AS ?distance) + WHERE { + ?ancestor_iri ^rdfs:subClassOf* ?mid . + ?mid ^rdfs:subClassOf* !<>! . + FILTER(!isBlank(?ancestor_iri)) + } + GROUP BY ?ancestor_iri + ORDER BY ?distance + LIMIT 1 + } + + # get info for classes in path from set to ancestor + VALUES ?set { !<>! } + + ?iri rdfs:subClassOf* ?ancestor_iri ; + ^rdfs:subClassOf* ?set ; + oboInOwl:id ?id ; + rdfs:label ?label ; + rdfs:subClassOf ?parent_iri . + + ?parent_iri oboInOwl:id ?parent_id ; + rdfs:label ?parent_label . +} \ No newline at end of file diff --git a/man/extract_subclass.Rd b/man/extract_subclass.Rd new file mode 100644 index 00000000..e2e1a709 --- /dev/null +++ b/man/extract_subclass.Rd @@ -0,0 +1,84 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/extract.R +\name{extract_subclass} +\alias{extract_subclass} +\title{Extract OBO Classes with Parents} +\usage{ +extract_subclass( + input, + class, + method = "descendants", + .robot_path = NULL, + tidy_what = c("header", "uri_to_curie") +) +} +\arguments{ +\item{input}{The path to an RDF/OWL file recognized by ROBOT, as a string.} + +\item{class}{One or more valid OBO classes (see \code{\link[=is_valid_obo]{is_valid_obo()}} for +valid input formats). Accepted values may depend on \code{method}.} + +\item{method}{The method to use when extracting classes. Options include: +\itemize{ +\item \code{"self"}: Extract the class(es) only. +\item \code{"parents"}: Extract the class(es) and \emph{asserted} parent(s). +\item \code{"descendants"} (default): Extract the class(es) and all descendants. +\item \code{"ancestors"}: Extract the class(es) and all ancestors. +\item \code{"common_ancestor"}: Extract the class(es) and all ancestors up to a common +ancestor. The common ancestor could be in the set. +}} + +\item{.robot_path}{The path to a ROBOT executable or .jar file, as a string. +When \code{NULL} (default), if a system ROBOT executable is available it will +be used, otherwise an error will be signaled. + +\strong{NOTE:} \code{DO.utils} caches the last ROBOT used for future use.} + +\item{tidy_what}{The elements of a SPARQL-created data.frame to tidy, as a +character vector. One or more of the following: +\itemize{ +\item \code{"everything"} to apply all tidy operations (has precedence over +\code{"nothing"}). +\item \code{"header"} to remove leading \verb{?} from header labels. +\item \code{"unnest"} to unnest list columns with \code{\link[=unnest_cross]{unnest_cross()}}. +\item \code{"uri_to_curie"} to convert all URIs recognized by DO.utils to CURIEs with +\code{\link[=to_curie]{to_curie()}}. +\item \code{"lgl_NA_FALSE"} to replace \code{NA} in logical columns with \code{FALSE}. +\item \code{"as_tibble"} to make the output a \link[tibble:tibble]{tibble}. +\item \code{"rm_lang_tag"} to remove language tags. Tags will only be removed from +\code{character} class columns, and then only if there is one unique language tag +in the given column. +\item \code{"nothing"} to prevent all tidying. +}} +} +\value{ +A \link[tibble:tibble]{tibble} with the columns: \code{id}, \code{label}, \code{parent_id}, +and \code{parent_label}. +} +\description{ +Extracts the classes and their \code{rdfs:subClassOf} \emph{asserted} parents from an +OBO ontology. +} +\examples{ +\dontrun{ +# Extract all descendants of 'diabetes mellitus' +doid_path <- "" +extract_subclass(doid_path, "DOID:9351") + +# Extract all ancestors of 'diabetes mellitus' +extract_subclass(doid_path, "DOID:9351", method = "ancestors") + +# Extract all classes of appendicitis (DOID:8337), alcoholic gastritis +(DOID:8680), and viral esophagitis (DOID:6297) up to a common ancestor +extract_subclass( + doid_path, + c("DOID:8337", "DOID:8680", "DOID:6297"), + method = "common_ancestor" +) +} + +} +\seealso{ +\code{\link[=format_subtree]{format_subtree()}} to arrange data in a tree structure similar to +ontology browsers. +} diff --git a/man/extract_subtree.Rd b/man/extract_subtree.Rd index 2517fcc8..5e2883b8 100644 --- a/man/extract_subtree.Rd +++ b/man/extract_subtree.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/extract.R \name{extract_subtree} \alias{extract_subtree} -\title{Extract Subtree} +\title{Extract Subtree (DEPRECATED)} \usage{ extract_subtree(x, top_node, reload = FALSE) } @@ -26,6 +26,7 @@ Extracts the classes and parents of a DO subtree from a \code{pyDOID.owl.xml} object. } \seealso{ -\code{\link[=format_subtree]{format_subtree()}} to arrange data in a tree structure similar to -ontology browsers. +\link[=extract_subclass]{extract_subclass(method = "descendants")} for the +same result with a single \code{class}. \code{\link[=format_subtree]{format_subtree()}} to arrange data in a +tree structure similar to ontology browsers. } From 8c6a721916bad4e332bc410432fed47289269f21 Mon Sep 17 00:00:00 2001 From: "J. Allen Baron" Date: Thu, 22 Jan 2026 15:44:52 -0500 Subject: [PATCH 2/4] Update extract_subclass() - Fix code so it actually works - Add 'parents' method - Condense to single query template file - Make input error message more informative --- R/extract.R | 35 +++++++++-- R/extract_helpers.R | 63 +++++++++++++++---- inst/sparql/template-subclass-set.rq | 18 ++++++ inst/sparql/template/set.rq | 17 ----- inst/sparql/template/set_ancestors.rq | 17 ----- inst/sparql/template/set_descendants.rq | 18 ------ .../sparql/template/set_to_common_ancestor.rq | 33 ---------- 7 files changed, 99 insertions(+), 102 deletions(-) create mode 100644 inst/sparql/template-subclass-set.rq delete mode 100644 inst/sparql/template/set.rq delete mode 100644 inst/sparql/template/set_ancestors.rq delete mode 100644 inst/sparql/template/set_descendants.rq delete mode 100644 inst/sparql/template/set_to_common_ancestor.rq diff --git a/R/extract.R b/R/extract.R index ae39be85..9fb9d505 100644 --- a/R/extract.R +++ b/R/extract.R @@ -303,6 +303,7 @@ extract_doid_url <- function(doid_edit, include_obsolete = FALSE, #' #' * `"common_ancestor"`: Extract the class(es) and all ancestors up to a common #' ancestor. The common ancestor could be in the set. +#' @inheritDotParams is_valid_obo allow ns_type #' #' @examples #' \dontrun{ @@ -331,10 +332,36 @@ extract_doid_url <- function(doid_edit, include_obsolete = FALSE, #' #' @export extract_subclass <- function(input, class, method = "descendants", - .robot_path = NULL, - tidy_what = c("header", "uri_to_curie")) { - stopifnot("`class` must be a valid OBO identifier" = is_valid_obo(class)) - query <- prep_extract_query(class, method) + .robot_path = NULL, + tidy_what = c("header", "uri_to_curie"), + ...) { + invalid_obo <- !is_valid_obo(class, ...) + if (any(invalid_obo)) { + invalid_n <- sum(invalid_obo) + invalid_pos <- which(invalid_obo) + if (invalid_n > 5) { + invalid_id <- paste0( + invalid_n, + " identifiers are invalid, e.g. positions ", + paste0(invalid_pos[1:3], collapse = ", "), + ", etc." + ) + } else { + invalid_id <- paste0( + class[invalid_obo], " (pos ", invalid_pos, ")" + ) + } + + rlang::abort( + c( + "`class` inputs must all be valid OBO identifiers", + purrr::set_names(invalid_id, rlang::rep_along(invalid_id, "x")) + ) + ) + } + obo_class <- format_obo(class, "obo_curie", ...) + + query <- prep_extract_query(obo_class, method) out <- robot_query( input, query, diff --git a/R/extract_helpers.R b/R/extract_helpers.R index 5a37369b..70be33d3 100644 --- a/R/extract_helpers.R +++ b/R/extract_helpers.R @@ -26,22 +26,59 @@ subtree_query_glue <- ' # extract_subclass() helpers ------------------------------------------------- prep_extract_query <- function(class, method) { - opts <- c("self", "parents", "descendants", "ancestors", "common_ancestor") - if (!method %in% opts) { - rlang::abort("`method` must be one of: ", paste(opts, collapse = ", ")) - } - q_dir <- system.file("sparql", package = "DO.utils") - q_path <- switch( - self = file.path(q_dir, "set.rq"), - descendants = file.path(q_dir, "set_descendants.rq"), - ancestors = file.path(q_dir, "set_ancestors.rq"), - common_ancestor = file.path(q_dir, "set_to_common_ancestor.rq") + sparql_stmts <- c( + self = "?iri rdfs:subClassOf{0} ?set .", + parents = "?iri ^rdfs:subClassOf{0,1} ?set .", + descendants = "?iri rdfs:subClassOf* ?set .", + ancestors = "?iri ^rdfs:subClassOf* ?set .", + common_ancestor = " + { + # find the nearest common ancestor + SELECT ?ancestor_iri (COUNT(?mid) AS ?distance) + WHERE { + ?ancestor_iri ^rdfs:subClassOf* ?mid . + ?mid ^rdfs:subClassOf* !<>! . + FILTER(!isBlank(?ancestor_iri)) + } + GROUP BY ?ancestor_iri + ORDER BY ?distance + LIMIT 1 + } + + # get info for classes in path from set to ancestor + VALUES ?set { !<>! } + + ?iri rdfs:subClassOf* ?ancestor_iri ; + ^rdfs:subClassOf* ?set ." ) + opts <- names(sparql_stmts) + if (!method %in% opts || length(method) != 1) { + rlang::abort( + c( + paste0( + "`method` must be one of: ", + paste0("'", opts, "'", collapse = ", ")), + x = paste0("'", method, "'", collapse = ", ") + ) + ) + } set <- paste0(class, collapse = " ") - # only for common_ancestor - set_values <- set <- paste0(class, collapse = ", ") - query <- glueV(readr::read_file(q_path), set = set, set_values = set_values) + sparql_selector <- sparql_stmts[method] + if (method == "common_ancestor") { + set_list <- paste0(class, collapse = ", ") + sparql_selector <- glueV(sparql_selector, set_list = set_list) + } + q_path <- system.file( + "sparql", "template-subclass-set.rq", + package = "DO.utils", + mustWork = TRUE + ) + query <- glueV( + readr::read_file(q_path), + sparql_selector = sparql_selector, + set = set + ) query } diff --git a/inst/sparql/template-subclass-set.rq b/inst/sparql/template-subclass-set.rq new file mode 100644 index 00000000..a6777091 --- /dev/null +++ b/inst/sparql/template-subclass-set.rq @@ -0,0 +1,18 @@ +# Return class & parent IDs and labels for all classes in a set +PREFIX rdfs: +PREFIX oboInOwl: +PREFIX obo: + +SELECT DISTINCT ?iri ?label ?parent_iri ?parent_label +WHERE { + VALUES ?set { !<>! } + + !<>! + + ?iri rdfs:label ?label . + + OPTIONAL{ + ?iri rdfs:subClassOf ?parent_iri . + ?parent_iri rdfs:label ?parent_label . + } +} diff --git a/inst/sparql/template/set.rq b/inst/sparql/template/set.rq deleted file mode 100644 index 36896aa0..00000000 --- a/inst/sparql/template/set.rq +++ /dev/null @@ -1,17 +0,0 @@ -# Return class & parent IDs and labels for all classes in a set -PREFIX rdfs: -PREFIX oboInOwl: -PREFIX DOID: - -SELECT ?id ?label ?parent_id ?parent_label -WHERE { - VALUES ?set { !<>! } - - ?iri rdfs:subClassOf* ?set ; - oboInOwl:id ?id ; - rdfs:label ?label ; - rdfs:subClassOf ?parent_iri . - - ?parent_iri oboInOwl:id ?parent_id ; - rdfs:label ?parent_label . -} \ No newline at end of file diff --git a/inst/sparql/template/set_ancestors.rq b/inst/sparql/template/set_ancestors.rq deleted file mode 100644 index d9850dd0..00000000 --- a/inst/sparql/template/set_ancestors.rq +++ /dev/null @@ -1,17 +0,0 @@ -# Return class & parent IDs and labels for all ancestors of a set of classes -PREFIX rdfs: -PREFIX oboInOwl: -PREFIX DOID: - -SELECT ?id ?label ?parent_id ?parent_label -WHERE { - VALUES ?set { !<>! } - - ?iri ^rdfs:subClassOf* ?set ; - oboInOwl:id ?id ; - rdfs:label ?label ; - rdfs:subClassOf ?parent_iri . - - ?parent_iri oboInOwl:id ?parent_id ; - rdfs:label ?parent_label . -} \ No newline at end of file diff --git a/inst/sparql/template/set_descendants.rq b/inst/sparql/template/set_descendants.rq deleted file mode 100644 index 2aa346ab..00000000 --- a/inst/sparql/template/set_descendants.rq +++ /dev/null @@ -1,18 +0,0 @@ -# Return class & parent IDs and labels for all descendants of a set of classes -# Essentially a multi-subtree query -PREFIX rdfs: -PREFIX oboInOwl: -PREFIX DOID: - -SELECT ?id ?label ?parent_id ?parent_label -WHERE { - VALUES ?set { !<>! } - - ?iri rdfs:subClassOf* ?set ; - oboInOwl:id ?id ; - rdfs:label ?label ; - rdfs:subClassOf ?parent_iri . - - ?parent_iri oboInOwl:id ?parent_id ; - rdfs:label ?parent_label . -} \ No newline at end of file diff --git a/inst/sparql/template/set_to_common_ancestor.rq b/inst/sparql/template/set_to_common_ancestor.rq deleted file mode 100644 index df80a62e..00000000 --- a/inst/sparql/template/set_to_common_ancestor.rq +++ /dev/null @@ -1,33 +0,0 @@ -# Return class & parent IDs and labels for all classes between a set of classes -# and their nearest common ancestor, inclusive -PREFIX rdfs: -PREFIX oboInOwl: -PREFIX DOID: - -SELECT ?id ?label ?parent_id ?parent_label -WHERE { - { - # find the nearest common ancestor - SELECT ?ancestor_iri (COUNT(?mid) AS ?distance) - WHERE { - ?ancestor_iri ^rdfs:subClassOf* ?mid . - ?mid ^rdfs:subClassOf* !<>! . - FILTER(!isBlank(?ancestor_iri)) - } - GROUP BY ?ancestor_iri - ORDER BY ?distance - LIMIT 1 - } - - # get info for classes in path from set to ancestor - VALUES ?set { !<>! } - - ?iri rdfs:subClassOf* ?ancestor_iri ; - ^rdfs:subClassOf* ?set ; - oboInOwl:id ?id ; - rdfs:label ?label ; - rdfs:subClassOf ?parent_iri . - - ?parent_iri oboInOwl:id ?parent_id ; - rdfs:label ?parent_label . -} \ No newline at end of file From c9de150535a3a33e7071a9a26a8c13c9df00a18d Mon Sep 17 00:00:00 2001 From: "J. Allen Baron" Date: Thu, 22 Jan 2026 15:47:58 -0500 Subject: [PATCH 3/4] Update format_subtree() to use extract_subclass() output Now works for both extract_subtree() and extract_subclass() --- R/extract.R | 2 ++ R/format.R | 10 +++++++-- R/format_helpers.R | 54 +++++++++++++++++++++++++--------------------- 3 files changed, 39 insertions(+), 27 deletions(-) diff --git a/R/extract.R b/R/extract.R index 9fb9d505..789cde61 100644 --- a/R/extract.R +++ b/R/extract.R @@ -369,6 +369,8 @@ extract_subclass <- function(input, class, method = "descendants", tidy_what = tidy_what ) + class(out) <- c("extracted_subclass", class(out)) + out } diff --git a/R/format.R b/R/format.R index 400999bf..33966277 100644 --- a/R/format.R +++ b/R/format.R @@ -152,9 +152,15 @@ format_doid <- function(x, as = "curie", validate = TRUE, #' @export format_subtree <- function(subtree_df, top_node) { rlang::check_installed("tidygraph", reason = "to use `format_subtree()`") + if ("extracted_subclass" %in% class(subtree_df)) { + id_string <- "iri" + } else { + id_string <- "id" + } + top_class <- format_doid(top_node, as = "curie") - tg <- as_subtree_tidygraph(subtree_df, top_class) - formatted <- pivot_subtree(tg, top_class) + tg <- as_subtree_tidygraph(subtree_df, top_class, id_string) + formatted <- pivot_subtree(tg, top_class, id_string) formatted } diff --git a/R/format_helpers.R b/R/format_helpers.R index 1f303aa4..925c5cf6 100644 --- a/R/format_helpers.R +++ b/R/format_helpers.R @@ -8,33 +8,31 @@ #' [disease-ontology.org](https://disease-ontology.org/). #' #' @inheritParams format_subtree +#' @param parent_col The column with 'parent' class IDs. #' #' @family format_subtree() helpers #' @noRd -as_subtree_tidygraph <- function(subtree_df, top_node) { +as_subtree_tidygraph <- function(subtree_df, top_node, id_string) { + parent_col <- paste0("parent_", id_string) # keep all parent info in labels - label_df <- collapse_col_flex( - subtree_df, - "parent_id", - "parent_label" - ) + label_df <- collapse_col(subtree_df, c(parent_col, "parent_label")) # exclude parents which are not subclasses of top_node (usually due to # multi-parentage) - df <- dplyr::filter(subtree_df, .data$parent_id %in% .data$id) + df <- dplyr::filter(subtree_df, .data[[parent_col]] %in% .data[[id_string]]) # fill in subclasses where multi-parentage is within tree - df <- fill_subclass(df) + df <- fill_subclass(df, id_string) # create tidygraph tg <- df %>% - dplyr::select("id", "parent_id") %>% + dplyr::select(dplyr::all_of(c(id_string, parent_col))) %>% tidygraph::as_tbl_graph() %>% # fix needed for labels to match correctly tidygraph::activate("nodes") %>% dplyr::mutate( name = dplyr::if_else( - .data$name %in% label_df$id, + .data$name %in% label_df[[id_string]], .data$name, stringr::str_remove(.data$name, "-[0-9]+$") ) @@ -42,7 +40,7 @@ as_subtree_tidygraph <- function(subtree_df, top_node) { # add labels tidygraph::left_join( label_df, - by = c("name" = "id") + by = c("name" = id_string) ) tg @@ -57,10 +55,12 @@ as_subtree_tidygraph <- function(subtree_df, top_node) { #' #' @param subtree_tg A subtree tidygraph from [as_subtree_tidygraph()]. #' @inheritParams format_subtree +#' @inheritParams as_subtree_tidygraph #' #' @family format_subtree() helpers #' @noRd -pivot_subtree <- function(subtree_tg, top_node) { +pivot_subtree <- function(subtree_tg, top_node, id_string) { + parent_col <- paste0("parent_", id_string) # ensure alphabetical order of classes to match disease-ontology.org tg <- tidygraph::arrange(subtree_tg, .data$label) @@ -90,7 +90,7 @@ pivot_subtree <- function(subtree_tg, top_node) { dplyr::mutate(duplicated = all_duplicated(.data$name)) %>% # mv supporting info to left & tree to right dplyr::select( - "parent_id", "parent_label", id = "name", + dplyr::all_of(parent_col), "parent_label", id = "name", dplyr::everything() ) %>% tidyr::pivot_wider( @@ -110,12 +110,12 @@ pivot_subtree <- function(subtree_tg, top_node) { #' ensures that the subclasses appear each time their parent/superclass does. #' #' @inheritParams as_subtree_tidygraph -#' +#' @inheritParams as_subtree_tidygraph #' @family format_subtree() > as_subtree_tidygraph() helpers #' @noRd -fill_subclass <- function(subtree_df) { - - not_dup <- dplyr::filter(subtree_df, !duplicated(.data$id)) +fill_subclass <- function(subtree_df, id_string) { + parent_col <- paste0("parent_", id_string) + not_dup <- dplyr::filter(subtree_df, !duplicated(.data[[id_string]])) lvl <- 1 res_n <- 1L @@ -124,14 +124,14 @@ fill_subclass <- function(subtree_df) { while (res_n > 0) { if (lvl == 1) { new_rows[[lvl]] <- subtree_df %>% - dplyr::filter(duplicated(.data$id)) %>% - dplyr::mutate(id_new = paste(.data$id, lvl, sep = "-")) + dplyr::filter(duplicated(.data[[id_string]])) %>% + dplyr::mutate(id_new = paste(.data[[id_string]], lvl, sep = "-")) } else { new_rows[[lvl]] <- subtree_df %>% - dplyr::filter(.data$parent_id %in% new_rows[[lvl - 1]]$id) %>% + dplyr::filter(.data[[parent_col]] %in% new_rows[[lvl - 1]]$id) %>% dplyr::mutate( - id_new = paste(.data$id, lvl, sep = "-"), - parent_id_new = paste(.data$parent_id, lvl - 1, sep = "-") + id_new = paste(.data[[id_string]], lvl, sep = "-"), + parent_id_new = paste(.data[[parent_col]], lvl - 1, sep = "-") ) } res_n <- nrow(new_rows[[lvl]]) @@ -149,14 +149,18 @@ fill_subclass <- function(subtree_df) { # https://github.com/tidyverse/funs/issues/54#issuecomment-892377998 id = do.call( dplyr::coalesce, - rev(dplyr::across(dplyr::starts_with("id"))) + rev(dplyr::across(dplyr::starts_with(id_string))) ), parent_id = do.call( dplyr::coalesce, - rev(dplyr::across(dplyr::starts_with("parent_id"))) + rev(dplyr::across(dplyr::starts_with(parent_col))) ) ) %>% - dplyr::select("id", "label", "parent_id", "parent_label") + dplyr::select( + dplyr::all_of( + c(id_string, "label", parent_col, "parent_label") + ) + ) filled_df } From 135245dc166423b99f758cc97a5eae9ad1df03ed Mon Sep 17 00:00:00 2001 From: "J. Allen Baron" Date: Fri, 10 Apr 2026 11:21:33 -0400 Subject: [PATCH 4/4] Rename extract_subclass() to extract_obo_class() --- NAMESPACE | 2 +- R/extract.R | 14 ++++---- R/format.R | 2 +- ...tract_subclass.Rd => extract_obo_class.Rd} | 32 +++++++++++++++---- man/extract_subtree.Rd | 2 +- 5 files changed, 35 insertions(+), 17 deletions(-) rename man/{extract_subclass.Rd => extract_obo_class.Rd} (72%) diff --git a/NAMESPACE b/NAMESPACE index 8c490200..e8ce17e3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -65,11 +65,11 @@ export(elucidate) export(extract_as_tidygraph) export(extract_class_axiom) export(extract_eq_axiom) +export(extract_obo_class) export(extract_obo_mappings) export(extract_ordo_mappings) export(extract_pm_date) export(extract_pmid) -export(extract_subclass) export(extract_subclass_axiom) export(extract_subtree) export(format_axiom) diff --git a/R/extract.R b/R/extract.R index 789cde61..03893ecc 100644 --- a/R/extract.R +++ b/R/extract.R @@ -309,14 +309,14 @@ extract_doid_url <- function(doid_edit, include_obsolete = FALSE, #' \dontrun{ #' # Extract all descendants of 'diabetes mellitus' #' doid_path <- "" -#' extract_subclass(doid_path, "DOID:9351") +#' extract_obo_class(doid_path, "DOID:9351") #' #' # Extract all ancestors of 'diabetes mellitus' -#' extract_subclass(doid_path, "DOID:9351", method = "ancestors") +#' extract_obo_class(doid_path, "DOID:9351", method = "ancestors") #' #' # Extract all classes of appendicitis (DOID:8337), alcoholic gastritis #' (DOID:8680), and viral esophagitis (DOID:6297) up to a common ancestor -#' extract_subclass( +#' extract_obo_class( #' doid_path, #' c("DOID:8337", "DOID:8680", "DOID:6297"), #' method = "common_ancestor" @@ -331,7 +331,7 @@ extract_doid_url <- function(doid_edit, include_obsolete = FALSE, #' ontology browsers. #' #' @export -extract_subclass <- function(input, class, method = "descendants", +extract_obo_class <- function(input, class, method = "descendants", .robot_path = NULL, tidy_what = c("header", "uri_to_curie"), ...) { @@ -369,7 +369,7 @@ extract_subclass <- function(input, class, method = "descendants", tidy_what = tidy_what ) - class(out) <- c("extracted_subclass", class(out)) + class(out) <- c("obo_class", class(out)) out } @@ -391,14 +391,14 @@ extract_subclass <- function(input, class, method = "descendants", #' and `parent_label`, with one row for each unique combination for each #' subclass below and including `top_node`. #' -#' @seealso [extract_subclass(method = "descendants")][extract_subclass] for the +#' @seealso [extract_obo_class(method = "descendants")][extract_obo_class] for the #' same result with a single `class`. [format_subtree()] to arrange data in a #' tree structure similar to ontology browsers. #' #' @export extract_subtree <- function(x, top_node, reload = FALSE) { rlang::warn( - "`extract_subtree()` is deprecated. Use `extract_subclass()` instead." + "`extract_subtree()` is deprecated. Use `extract_obo_class()` instead." ) owl <- access_owl_xml(x) assert_string(top_node) diff --git a/R/format.R b/R/format.R index 33966277..1d4746e9 100644 --- a/R/format.R +++ b/R/format.R @@ -152,7 +152,7 @@ format_doid <- function(x, as = "curie", validate = TRUE, #' @export format_subtree <- function(subtree_df, top_node) { rlang::check_installed("tidygraph", reason = "to use `format_subtree()`") - if ("extracted_subclass" %in% class(subtree_df)) { + if ("obo_class" %in% class(subtree_df)) { id_string <- "iri" } else { id_string <- "id" diff --git a/man/extract_subclass.Rd b/man/extract_obo_class.Rd similarity index 72% rename from man/extract_subclass.Rd rename to man/extract_obo_class.Rd index e2e1a709..4e4b359b 100644 --- a/man/extract_subclass.Rd +++ b/man/extract_obo_class.Rd @@ -1,15 +1,16 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/extract.R -\name{extract_subclass} -\alias{extract_subclass} +\name{extract_obo_class} +\alias{extract_obo_class} \title{Extract OBO Classes with Parents} \usage{ -extract_subclass( +extract_obo_class( input, class, method = "descendants", .robot_path = NULL, - tidy_what = c("header", "uri_to_curie") + tidy_what = c("header", "uri_to_curie"), + ... ) } \arguments{ @@ -50,6 +51,23 @@ character vector. One or more of the following: in the given column. \item \code{"nothing"} to prevent all tidying. }} + +\item{...}{ + Arguments passed on to \code{\link[=is_valid_obo]{is_valid_obo}} + \describe{ + \item{\code{allow}}{The OBO ID format(s) to consider valid; as a character vector. +One or more of: \code{"curie"}, \code{"obo_curie"} (e.g. \code{obo:DOID_4}), \code{"uri"}, +\code{""} (URI in angle brackets), or \code{"ns.lui"} (i.e. an OBO CURIE without +\verb{obo:}). + +\code{"standard"} (default) is a generic grouping that represents syntactically +correct formats (\code{"curie"}, \code{"obo_curie"}, \code{"uri"} and \code{""}). + +\code{"ns.lui"} will only be included if explicitly specified.} + \item{\code{ns_type}}{The type of OBO namespaces to consider valid; as a character +vector. One of: \code{"obo"} (default), \code{"ont"} (ontology primary namespaces +only), or \code{"prop"} (ontology property namespaces only; may not be exhaustive).} + }} } \value{ A \link[tibble:tibble]{tibble} with the columns: \code{id}, \code{label}, \code{parent_id}, @@ -63,14 +81,14 @@ OBO ontology. \dontrun{ # Extract all descendants of 'diabetes mellitus' doid_path <- "" -extract_subclass(doid_path, "DOID:9351") +extract_obo_class(doid_path, "DOID:9351") # Extract all ancestors of 'diabetes mellitus' -extract_subclass(doid_path, "DOID:9351", method = "ancestors") +extract_obo_class(doid_path, "DOID:9351", method = "ancestors") # Extract all classes of appendicitis (DOID:8337), alcoholic gastritis (DOID:8680), and viral esophagitis (DOID:6297) up to a common ancestor -extract_subclass( +extract_obo_class( doid_path, c("DOID:8337", "DOID:8680", "DOID:6297"), method = "common_ancestor" diff --git a/man/extract_subtree.Rd b/man/extract_subtree.Rd index 5e2883b8..2f9fd390 100644 --- a/man/extract_subtree.Rd +++ b/man/extract_subtree.Rd @@ -26,7 +26,7 @@ Extracts the classes and parents of a DO subtree from a \code{pyDOID.owl.xml} object. } \seealso{ -\link[=extract_subclass]{extract_subclass(method = "descendants")} for the +\link[=extract_obo_class]{extract_obo_class(method = "descendants")} for the same result with a single \code{class}. \code{\link[=format_subtree]{format_subtree()}} to arrange data in a tree structure similar to ontology browsers. }