diff --git a/R/sysdata.rda b/R/sysdata.rda index 8fbd5b36..8b509411 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/data-raw/curation_opts.csv b/data-raw/curation_opts.csv deleted file mode 100644 index 46a16c17..00000000 --- a/data-raw/curation_opts.csv +++ /dev/null @@ -1,57 +0,0 @@ -header,template,type,example,optional_values,alternate_title,alternate_format,notes -iri/curie,ID,required manual,DOID:0080943,IRI or CURIE,NA,NA,NA -label,AL rdfs:label@en,required manual,"46,XX sex reversal 5",NA,NA,NA,NA -parent iri/curie,SC % SPLIT=|,required manual,DOID:0111760,disease by infectious agent,CI,IRI or CURIE; CI means Class IRI --> type will be CLASS_TYPE,NA -definition,AL obo:IAO_0000115@en,required manual,"A 46,XX sex reversal that is characterized by genital virilization in 46,XX individuals, associated with congenital heart disease and variable somatic anomalies including blepharophimosis-ptosis-epicanthus inversus syndrome and congenital diaphragmatic hernia and that has_material_basis_in heterozygous mutation in the NR2F2 gene on chromosome 15q26.",NA,NA,NA,NA -definition source(s),>A oboInOwl:hasDbXref SPLIT=|,required manual,url:https://pubmed.ncbi.nlm.nih.gov/29478779/,NA,NA,NA,NA -definition source type(s),>AI dc11:type SPLIT=|,optional manual,curator inference from journal publication,"ECO codes, e.g. ECO:0007645",NA,NA,do not quote!!! -synonym(s): exact,AL oboInOwl:hasExactSynonym@en SPLIT=|,optional manual,hemangiosarcoma,NA,NA,NA,do not quote!!! -synonym(s): broad,AL oboInOwl:hasBroadSynonym@en SPLIT=|,optional manual,NA,NA,NA,NA,NA -synonym(s): narrow,AL oboInOwl:hasNarrowSynonym@en SPLIT=|,optional manual,NA,NA,NA,NA,NA -synonym(s): related,AL oboInOwl:hasRelatedSynonym@en SPLIT=|,optional manual,NA,NA,NA,NA,NA -acronym(s): exact,AL oboInOwl:hasExactSynonym@en SPLIT=|,optional manual,CAMRQ,NA,NA,NA,"must be accompanied by ""acronym annotation"" header/template in the adjacent column to the rigth in robot template" -acronym(s): broad,AL oboInOwl:hasBroadSynonym@en SPLIT=|,optional manual,NA,NA,NA,NA,NA -acronym(s): narrow,AL oboInOwl:hasNarrowSynonym@en SPLIT=|,optional manual,NA,NA,NA,NA,NA -acronym(s): related,AL oboInOwl:hasRelatedSynonym@en SPLIT=|,optional manual,DES,NA,NA,NA,"must be accompanied by ""acronym annotation"" header/template in the adjacent column to the rigth in robot template" -acronym annotation,>AI oboInOwl:hasSynonymType,optional auto,acronym,NA,NA,NA,NA -xref(s),A oboInOwl:hasDbXref SPLIT=|,optional manual,OMIM:618901,NA,NA,NA,NA -skos mapping(s): exact,A skos:exactMatch SPLIT=|,optional manual,OMIM:618901,NA,NA,"should use IRIs and be as follows: -AI skos:exactMatch SPLIT=| - - example input: https://omim.org/MIM:618901",adds skos mappings as strings; current INCORRECT DO format -skos mapping(s): broad,A skos:broadMatch SPLIT=|,optional manual,OMIM:PS613135,NA,NA,"should use IRIs and be as follows: -AI skos:exactMatch SPLIT=| - - example input: https://omim.org/MIM:618901",adds skos mappings as strings; current INCORRECT DO format -skos mapping(s): narrow,A skos:narrowMatch SPLIT=|,optional manual,OMIM:618901,NA,NA,"should use IRIs and be as follows: -AI skos:exactMatch SPLIT=| - - example input: https://omim.org/MIM:618901",adds skos mappings as strings; current INCORRECT DO format -skos mapping(s): related,A skos:relatedMatch SPLIT=|,optional manual,NA,NA,NA,"should use IRIs and be as follows: -AI skos:exactMatch SPLIT=| - - example input: https://omim.org/MIM:618901",adds skos mappings as strings; current INCORRECT DO format -equivalent class,EC %,optional manual,disease and ('has material basis in' some (Viruses or Bacteria or Eukaryota)),NA,NA,NA,NA -sc axiom: inheritance,SC 'has material basis in' some % SPLIT=|,optional manual,NA,NA,NA,NA,NA -sc axiom: anatomical location,SC 'disease has location' some %,optional manual,NA,NA,NA,NA,NA -sc axiom: onset,SC 'existence starts during' some %,optional manual,NA,NA,NA,NA,NA -sc axiom: has_material_basis_in,SC has_material_basis_in some %,optional manual,autosomal dominant inheritance,NA,NA,NA,do not quote!!! -sc axiom: located_in,SC located_in some %,optional manual,NA,rdfs:label (preferred); IRI or CURIE (possible),NA,NA,NA -disjoint class,DC %,optional manual,NA,NA,NA,NA,NA -subset(s),AI oboInOwl:inSubset SPLIT=|,optional manual,DO_AGR_slim,any subset (aka 'slim') defined in doid-edit.owl,NA,NA,NA -deprecate,AT owl:deprecated^^xsd:boolean,optional manual,true,NA,NA,NA,NA -alternate id(s),A oboInOwl:hasAlternativeId SPLIT=|,optional manual,DOID:4,CURIE of deprecated term,NA,NA,NA -term replaced by,AI obo:IAO_0100001,optional manual,DOID:4,IRI or CURIE of term to replace by,NA,NA,NA -comment,AL rdfs:comment@en,optional manual,This is a comment. There should only be one per term.,NA,NA,NA,NA -obo id,A oboInOwl:id,required auto,DOID:0080943,OBO CURIE,NA,NA,"required data, but not necessary to include in manual curation; will be inferred from iri/curie - -if manually entered it must match the CURIE form of iri/curie" -obo namespace,A oboInOwl:hasOBONamespace,required auto,disease_ontology,"OBO namespace of ontology: disease_ontology, symptoms, transmission_process",NA,NA,"required data, but not necessary to include in manual curation; will be automatically added for any new disease - -if manually entered it must be ""disease_ontology"" (without quotes)" -español - label,AL rdfs:label@es,optional manual,NA,NA,NA,NA,NA -español - definition,AL obo:IAO_0000115@es,optional manual,NA,NA,NA,NA,NA -español - synonym(s): exact,AL oboInOwl:hasExactSynonym@es SPLIT=|,optional manual,NA,NA,NA,NA,NA -español - synonym(s): broad,AL oboInOwl:hasBroadSynonym@es SPLIT=|,optional manual,NA,NA,NA,NA,NA -español - synonym(s): narrow,AL oboInOwl:hasNarrowSynonym@es SPLIT=|,optional manual,NA,NA,NA,NA,NA -español - synonym(s): related,AL oboInOwl:hasRelatedSynonym@es SPLIT=|,optional manual,NA,NA,NA,NA,NA -español - acronym(s): exact,AL oboInOwl:hasExactSynonym@es SPLIT=|,optional manual,NA,NA,NA,NA,NA -español - acronym(s): broad,AL oboInOwl:hasBroadSynonym@es SPLIT=|,optional manual,NA,NA,NA,NA,NA -español - acronym(s): narrow,AL oboInOwl:hasNarrowSynonym@es SPLIT=|,optional manual,NA,NA,NA,NA,NA -español - acronym(s): related,AL oboInOwl:hasRelatedSynonym@es SPLIT=|,optional manual,NA,NA,NA,NA,NA diff --git a/data-raw/internal-curation_opts.R b/data-raw/internal-curation_opts.R deleted file mode 100644 index 4b887a31..00000000 --- a/data-raw/internal-curation_opts.R +++ /dev/null @@ -1,19 +0,0 @@ -## code to prepare `curation_opts` internal dataset -rlang::check_installed("googlesheets4") -devtools::load_all() - -curation_opts <- googlesheets4::read_sheet( - "https://docs.google.com/spreadsheets/d/1Zn6p5xkVHUwbWe1N8FUa3fNcEkAOoE9P4ADb12f69hQ/edit", - sheet = "template_options", - col_types = "c" -) |> - dplyr::filter(!is.na(.data$template)) - -readr::write_csv(curation_opts, "data-raw/curation_opts.csv") - -.curation_opts <- dplyr::select( - curation_opts, - tidyselect::all_of(c("header", "template", "type")) -) - -use_data_internal(.curation_opts, overwrite = TRUE) \ No newline at end of file diff --git a/data-raw/internal-sssom_spec.R b/data-raw/internal-sssom_spec.R deleted file mode 100644 index 57739278..00000000 --- a/data-raw/internal-sssom_spec.R +++ /dev/null @@ -1,31 +0,0 @@ -# Capture official SSSOM specification and parse for use by DO.utils -rlang::check_installed("yaml") -devtools::load_all() - -sssom_version <- stringr::str_remove( - httr::HEAD("https://github.com/mapping-commons/sssom/releases/latest/")$url, - ".*/" -) -sssom_yaml_path <- glueV( - "https://raw.githubusercontent.com/mapping-commons/sssom/!<>!/src/sssom_schema/schema/sssom_schema.yaml" -) -.sssom_spec <- yaml::read_yaml(sssom_yaml_path) -.sssom_spec$version <- sssom_version -.sssom_spec$access_date <- Sys.Date() - -.sssom_slot_types <- purrr::map_chr(.sssom_spec$slots, ~ .$range) -.sssom_mapping_slots <- .sssom_spec$classes$mapping$slots - -use_data_internal( - .sssom_spec, - .sssom_slot_types, - .sssom_mapping_slots, - overwrite = TRUE -) - -# save YAML for dev reference -dev_dir <- "setup_docs" -yaml_file <- file.path(dev_dir, paste0("sssom_schema-", sssom_version, ".yaml")) - -if (!dir.exists(dev_dir)) dir.create(dev_dir) -download.file(sssom_yaml_path, yaml_file) diff --git a/data-raw/internal-DO_gs.R b/data-raw/internal/DO_gs.R similarity index 53% rename from data-raw/internal-DO_gs.R rename to data-raw/internal/DO_gs.R index e7cda198..881f39b0 100644 --- a/data-raw/internal-DO_gs.R +++ b/data-raw/internal/DO_gs.R @@ -1,5 +1,9 @@ -## code to save DO Google Sheet information internally (`.DO_gs`) -devtools::load_all() +## code to prepare `.DO_gs` internal dataset ## +# +# Serves as a reference for DO-related Google Sheets and relevant sheets (tabs) +# for data retrieval + +rlang::check_installed("here") .DO_gs <- list( users = list( @@ -12,4 +16,8 @@ devtools::load_all() ) ) -use_data_internal(.DO_gs, overwrite = TRUE) +saveRDS( + .DO_gs, + file = here::here("data-raw", "internal", "DO_gs.rds"), + compress = "bzip2" +) diff --git a/data-raw/internal/DO_gs.rds b/data-raw/internal/DO_gs.rds new file mode 100644 index 00000000..3af90b10 Binary files /dev/null and b/data-raw/internal/DO_gs.rds differ diff --git a/data-raw/internal/curation_opts.R b/data-raw/internal/curation_opts.R new file mode 100644 index 00000000..aded8cd6 --- /dev/null +++ b/data-raw/internal/curation_opts.R @@ -0,0 +1,37 @@ +## code to prepare `.curation_opts` internal dataset ## +# +# This dataset is updated from a a Google Sheet and serves as a schema for +# establishment of curation templates and their conversion to robot templates + +rlang::check_installed( + c("dplyr", "googlesheets4", "here", "vroom") +) + + +out_dir <- here::here("data-raw", "internal") + +# save full schema for developer reference +curation_opts <- googlesheets4::read_sheet( + "https://docs.google.com/spreadsheets/d/1Zn6p5xkVHUwbWe1N8FUa3fNcEkAOoE9P4ADb12f69hQ/edit", + sheet = "template_options", + col_types = "c" +) |> + dplyr::filter(!is.na(.data$template)) + +vroom::vroom_write( + curation_opts, + file.path(out_dir, "curation_opts.tsv"), + na = "" +) + +# save internal data +.curation_opts <- dplyr::select( + curation_opts, + dplyr::all_of(c("data_type", "template", "inclusion")) +) + +saveRDS( + .curation_opts, + file = file.path(out_dir, "curation_opts.rds"), + compress = "bzip2" +) diff --git a/data-raw/internal/curation_opts.rds b/data-raw/internal/curation_opts.rds new file mode 100644 index 00000000..fd80ae1a Binary files /dev/null and b/data-raw/internal/curation_opts.rds differ diff --git a/data-raw/internal/curation_opts.tsv b/data-raw/internal/curation_opts.tsv new file mode 100644 index 00000000..2139e094 --- /dev/null +++ b/data-raw/internal/curation_opts.tsv @@ -0,0 +1,66 @@ +data_type template inclusion example notes optional_values alternate_title alternate_format sparql export_header sparql_dt_motif +id ID required manual DOID:0080943 replaced older 'iri/curie' header for simplicity; id now covered by obo id field IRI or CURIE CI IRI or CURIE; CI means Class IRI --> type will be CLASS_TYPE ?iri a owl:Class . ID +iri/curie ID deprecated DOID:0080943 replaced by 'id' header IRI or CURIE ?iri a owl:Class . IRI +label A rdfs:label required manual 46,XX sex reversal 5 ?iri rdfs:label ?label . LABEL rdfs:label +parent id SC % SPLIT=| required manual DOID:0111760 "accepts CURIE or IRI; intended for only asserted subclass relationships between named classes +--> separated from subclass anon for practical purposes - ROBOT template is the same, ROBOT export differs" disease by infectious agent "?iri rdfs:subClassOf ?parent . +FILTER(!isBlank(?parent))" SubClass Of [NAMED ID] rdfs:subClassOf +definition A IAO:0000115 required manual A 46,XX sex reversal that is characterized by genital virilization in 46,XX individuals, associated with congenital heart disease and variable somatic anomalies including blepharophimosis-ptosis-epicanthus inversus syndrome and congenital diaphragmatic hernia and that has_material_basis_in heterozygous mutation in the NR2F2 gene on chromosome 15q26. ?iri obo:IAO_0000115 ?definition . obo:IAO_0000115 IAO:0000115 +definition source(s) >A oboInOwl:hasDbXref SPLIT=| required manual url:https://pubmed.ncbi.nlm.nih.gov/29478779/ "!<<.definition_axiom>>! + oboInOwl:hasDbXref ?def_src ." IAO:0000115-oboInOwl:hasDbXref +definition source type(s) >AI dc11:type SPLIT=| optional manual curator inference from journal publication do not quote!!! ECO codes, e.g. ECO:0007645 "!<<.definition_axiom>>! + dc:type ?src_type ." IAO:0000115-dc:type +xref(s) A oboInOwl:hasDbXref SPLIT=| optional manual OMIM:618901 ?iri oboInOwl:hasDbXref ?xref . oboInOwl:hasDbXref oboInOwl:hasDbXref +skos mapping(s): exact A skos:exactMatch SPLIT=| optional manual OMIM:618901 adds skos mappings as strings; current INCORRECT DO format "should use IRIs and be as follows: +AI skos:exactMatch SPLIT=| + - example input: https://omim.org/MIM:618901" ?iri skos:exactMatch ?skos_exact . skos:exactMatch skos:exactMatch +skos mapping(s): close A skos:closeMatch SPLIT=| optional manual OMIM:618901 adds skos mappings as strings; current INCORRECT DO format "should use IRIs and be as follows: +AI skos:closeMatch SPLIT=| + - example input: https://omim.org/MIM:618901" ?iri skos:closeMatch ?skos_close . skos:closeMatch skos:closeMatch +skos mapping(s): broad A skos:broadMatch SPLIT=| optional manual OMIM:PS613135 adds skos mappings as strings; current INCORRECT DO format "should use IRIs and be as follows: +AI skos:exactMatch SPLIT=| + - example input: https://omim.org/MIM:618901" ?iri skos:broadMatch ?skos_broad . skos:broadMatch skos:broadMatch +skos mapping(s): narrow A skos:narrowMatch SPLIT=| optional manual OMIM:618901 adds skos mappings as strings; current INCORRECT DO format "should use IRIs and be as follows: +AI skos:exactMatch SPLIT=| + - example input: https://omim.org/MIM:618901" ?iri skos:narrowMatch ?skos_narrow . skos:narrowMatch skos:narrowMatch +skos mapping(s): related A skos:relatedMatch SPLIT=| optional manual adds skos mappings as strings; current INCORRECT DO format "should use IRIs and be as follows: +AI skos:exactMatch SPLIT=| + - example input: https://omim.org/MIM:618901" ?iri skos:relatedMatch ?skos_related . skos:relatedMatch skos:relatedMatch +synonym(s): exact A oboInOwl:hasExactSynonym SPLIT=| optional manual hemangiosarcoma do not quote!!! "glueV: .synonym_stmt, syn_scope = ""Exact""" oboInOwl:hasExactSynonym oboInOwl:hasExactSynonym +synonym(s): broad A oboInOwl:hasBroadSynonym SPLIT=| optional manual "glueV: .synonym_stmt, syn_scope = ""Broad""" oboInOwl:hasBroadSynonym oboInOwl:hasBroadSynonym +synonym(s): narrow A oboInOwl:hasNarrowSynonym SPLIT=| optional manual "glueV: .synonym_stmt, syn_scope = ""Narrow""" oboInOwl:hasNarrowSynonym oboInOwl:hasNarrowSynonym +synonym(s): related A oboInOwl:hasRelatedSynonym SPLIT=| optional manual "glueV: .synonym_stmt, syn_scope = ""Related""" oboInOwl:hasRelatedSynonym oboInOwl:hasRelatedSynonym +acronym(s): exact A oboInOwl:hasExactSynonym SPLIT=| optional manual CAMRQ "must be accompanied by ""acronym annotation"" header/template in the adjacent column to the rigth in robot template" "glueV: .acronym_stmt, acronym_scope = ""Exact""" oboInOwl:hasExactSynonym-OMO:0003012 +acronym(s): broad A oboInOwl:hasBroadSynonym SPLIT=| optional manual "glueV: .acronym_stmt, acronym_scope = ""Broad""" oboInOwl:hasBroadSynonym-OMO:0003012 +acronym(s): narrow A oboInOwl:hasNarrowSynonym SPLIT=| optional manual "glueV: .acronym_stmt, acronym_scope = ""Narrow""" oboInOwl:hasNarrowSynonym-OMO:0003012 +acronym(s): related A oboInOwl:hasRelatedSynonym SPLIT=| optional manual DES "must be accompanied by ""acronym annotation"" header/template in the adjacent column to the rigth in robot template" "glueV: .acronym_stmt, acronym_scope = ""Related""" oboInOwl:hasRelatedSynonym-OMO:0003012 +acronym annotation >AI oboInOwl:hasSynonymType optional auto acronym +eq class EC % SPLIT=| optional manual disease and ('has material basis in' some (Viruses or Bacteria or Eukaryota)) "?iri owl:equivalentClass ?eq . +FILTER(!isBlank(?eq))" Equivalent Class [NAMED ID] owl:equivalentClass +eq class anon EC % SPLIT=| optional manual Equivalent Class [ANON ID] +subclass anon SC % SPLIT=| optional manual 'disease has feature' some cherubism "intended for only subclass of anonymous logical expressions +--> separated from parent id for practical purposes - ROBOT template is the same, ROBOT export differs" SubClass Of [ANON ID] +subclass anon: inheritance SC 'has material basis in' some % SPLIT=| optional manual +subclass anon: anatomical location SC 'disease has location' some % SPLIT=| optional manual +subclass anon: onset SC 'existence starts during' some % SPLIT=| optional manual +subclass anon: has_material_basis_in SC has_material_basis_in some % SPLIT=| optional manual autosomal dominant inheritance do not quote standalone terms!!! +subclass anon: located_in SC located_in some % SPLIT=| optional manual rdfs:label (preferred); IRI or CURIE (possible) +disjoint class DC % SPLIT=| optional manual "?iri owl:disjointClass ?disjoint . +FILTER(!isBlank(?disjoint))" Disjoint With [NAMED ID] owl:disjointWith +disjoint class anon DC % SPLIT=| optional manual Disjoint With [ANON ID] +subset(s) AI oboInOwl:inSubset SPLIT=| optional manual DO_AGR_slim any subset (aka 'slim') defined in doid-edit.owl "?iri oboInOwl:inSubset ?subset_iri . +?subset_iri rdfs:label ?subset ." oboInOwl:inSubset oboInOwl:inSubset +alternate id(s) A oboInOwl:hasAlternativeId SPLIT=| optional manual DOID:4 CURIE of deprecated term ?iri oboInOwl:hasAlternativeId ?alt_id . oboInOwl:hasAlternativeId oboInOwl:hasAlternativeId +deprecated AT owl:deprecated^^xsd:boolean optional manual true ?iri owl:deprecated ?deprecate . owl:deprecated owl:deprecated +obsolescence reason AI IAO:0000231 optional manual terms merged must be IRI or label of child of 'obsolescence reason specification' (IAO:0000225) ?iri IAO:0000231 ?obs_reason . IAO:0000231 +term replaced by AI IAO:0100001 optional manual DOID:4 IRI or CURIE of term to replace by ?iri obo:IAO_0100001 ?term_replaced_by . IAO:0100001 IAO:0100001 +consider instead oboInOwl:consider optional manual ?iri oboInOwl:consider ?consider . oboInOwl:consider oboInOwl:consider +comment A rdfs:comment optional manual This is a comment. There should only be one per term. ?iri rdfs:comment ?comment . rdfs:comment rdfs:comment +created by A oboInOwl:created_by optional manual ?iri oboInOwl:created_by ?created_by . oboInOwl:created_by oboInOwl:created_by +creation date A oboInOwl:creation_date optional manual ?iri oboInOwl:creation_date ?creation_date . oboInOwl:creation_date oboInOwl:creation_date +obo id A oboInOwl:id required auto DOID:0080943 "required data, but not necessary to include in manual curation; will be inferred from iri/curie + +if manually entered it must match the CURIE form of iri/curie" OBO CURIE ?iri oboInOwl:id ?id . oboInOwl:id +obo namespace A oboInOwl:hasOBONamespace required auto disease_ontology "required data, but not necessary to include in manual curation; will be automatically added for any new disease + +if manually entered it must be ""disease_ontology"" (without quotes)" OBO namespace of ontology: disease_ontology, symptoms, transmission_process ?iri oboInOwl:hasOBONamespace ?obo_namespace . oboInOwl:hasOBONamespace oboInOwl:hasOBONamespace diff --git a/data-raw/internal/html4_tags.tsv b/data-raw/internal/html4_tags.tsv new file mode 100644 index 00000000..34ae8e9e --- /dev/null +++ b/data-raw/internal/html4_tags.tsv @@ -0,0 +1,97 @@ +name start_tag end_tag deprecated dtd description +a required required FALSE anchor +abbr required required FALSE "abbreviated form (e.g., www, http, +etc.)" +acronym required required FALSE +address required required FALSE information on author +applet required required TRUE loose java applet +area required forbidden FALSE client-side image map area +b required required FALSE bold text style +base required forbidden FALSE document base uri +basefont required forbidden TRUE loose base font size +bdo required required FALSE i18n bidi over-ride +big required required FALSE large text style +blockquote required required FALSE long quotation +body optional optional FALSE document body +br required forbidden FALSE forced line break +button required required FALSE push button +caption required required FALSE table caption +center required required TRUE loose shorthand for div align=center +cite required required FALSE citation +code required required FALSE computer code fragment +col required forbidden FALSE table column +colgroup required optional FALSE table column group +dd required optional FALSE definition description +del required required FALSE deleted text +dfn required required FALSE instance definition +dir required required TRUE loose directory list +div required required FALSE generic language/style container +dl required required FALSE definition list +dt required optional FALSE definition term +em required required FALSE emphasis +fieldset required required FALSE form control group +font required required TRUE loose local change to font +form required required FALSE interactive form +frame required forbidden FALSE forbidden subwindow +frameset required required FALSE forbidden window subdivision +h1 required required FALSE heading +h2 required required FALSE heading +h3 required required FALSE heading +h4 required required FALSE heading +h5 required required FALSE heading +h6 required required FALSE heading +head optional optional FALSE document head +hr required forbidden FALSE horizontal rule +html optional optional FALSE document root element +i required required FALSE italic text style +iframe required required FALSE loose inline subwindow +img required forbidden FALSE embedded image +input required forbidden FALSE form control +ins required required FALSE inserted text +isindex required forbidden TRUE loose single line prompt +kbd required required FALSE text to be entered by the user +label required required FALSE form field label text +legend required required FALSE fieldset legend +li required optional FALSE list item +link required forbidden FALSE a media-independent link +map required required FALSE client-side image map +menu required required TRUE loose menu list +meta required forbidden FALSE generic metainformation +noframes required required FALSE forbidden "alternate content container for non +frame-based rendering" +noscript required required FALSE "alternate content container for non +script-based rendering" +object required required FALSE generic embedded object +ol required required FALSE ordered list +optgroup required required FALSE option group +option required optional FALSE selectable choice +p required optional FALSE paragraph +param required forbidden FALSE named property value +pre required required FALSE preformatted text +q required required FALSE short inline quotation +s required required TRUE loose strike-through text style +samp required required FALSE "sample program output, scripts, +etc." +script required required FALSE script statements +select required required FALSE option selector +small required required FALSE small text style +span required required FALSE generic language/style container +strike required required TRUE loose strike-through text +strong required required FALSE strong emphasis +style required required FALSE style info +sub required required FALSE subscript +sup required required FALSE superscript +table required required FALSE +tbody optional optional FALSE table body +td required optional FALSE table data cell +textarea required required FALSE multi-line text field +tfoot required optional FALSE table footer +th required optional FALSE table header cell +thead required optional FALSE table header +title required required FALSE document title +tr required optional FALSE table row +tt required required FALSE teletype or monospaced text style +u required required TRUE loose underlined text style +ul required required FALSE unordered list +var required required FALSE "instance of a variable or program +argument" diff --git a/data-raw/internal-html_tags.R b/data-raw/internal/html_tags.R similarity index 51% rename from data-raw/internal-html_tags.R rename to data-raw/internal/html_tags.R index 157ab11f..95de392a 100644 --- a/data-raw/internal-html_tags.R +++ b/data-raw/internal/html_tags.R @@ -1,13 +1,31 @@ +## code to prepare `.html_tags` internal dataset ## +# +# HTML tag information is retrieved from the W3C HTML 4.01 specification and +# serves as a reference for parsing and validation of HTML elements in DO +# website curation +# +# NOTE: HTML 4.01 is used as a reference for tag information, but the web +# now supports the HTML Living Standard (https://html.spec.whatwg.org/), which +# includes additional tags and attributes. + rlang::check_installed( - c("dplyr", "janitor", "purrr", "rvest", "stringr", "tidyr") + c("dplyr", "here", "janitor", "purrr", "rvest", "stringr", "tidyr", "vroom") ) -raw_element_index <- rvest::read_html("https://www.w3.org/TR/html401/index/elements.html") +outdir <- here::here("data-raw", "internal") + +raw_element_index <- rvest::read_html( + "https://www.w3.org/TR/html401/index/elements.html" +) index_legend <- raw_element_index |> rvest::html_text() |> stringr::str_match( - stringr::regex("legend:(.*)name[^[:alnum:]]", dotall = TRUE, ignore_case = TRUE) + stringr::regex( + "legend:(.*)name[^[:alnum:]]", + dotall = TRUE, + ignore_case = TRUE + ) ) |> (\(x) x[, 2])() |> stringr::str_split(",[[:space:]]*") |> @@ -39,4 +57,15 @@ if (nrow(.html_tags) != dplyr::n_distinct(.html_tags$name)) { rlang::abort("Duplicate HTML tag names found") } -use_data_internal(.html_tags, overwrite = TRUE) +# save tabular data for reference +vroom::vroom_write( + .html_tags, + file = file.path(outdir, "html4_tags.tsv"), + na = "" +) + +saveRDS( + .html_tags, + file = file.path(outdir, "html_tags.rds"), + compress = "bzip2" +) diff --git a/data-raw/internal/html_tags.rds b/data-raw/internal/html_tags.rds new file mode 100644 index 00000000..4e3f4e30 Binary files /dev/null and b/data-raw/internal/html_tags.rds differ diff --git a/data-raw/internal/sssom_mapping_slots.rds b/data-raw/internal/sssom_mapping_slots.rds new file mode 100644 index 00000000..623c38cd Binary files /dev/null and b/data-raw/internal/sssom_mapping_slots.rds differ diff --git a/data-raw/internal/sssom_schema-v1.0.0.yaml b/data-raw/internal/sssom_schema-v1.0.0.yaml new file mode 100644 index 00000000..099e8928 --- /dev/null +++ b/data-raw/internal/sssom_schema-v1.0.0.yaml @@ -0,0 +1,792 @@ +id: https://w3id.org/sssom/schema/ +name: sssom +description: Datamodel for Simple Standard for Sharing Ontological Mappings (SSSOM) +imports: +- linkml:types +prefixes: + dcterms: http://purl.org/dc/terms/ + linkml: https://w3id.org/linkml/ + sssom: https://w3id.org/sssom/ + rdfs: http://www.w3.org/2000/01/rdf-schema# + rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# + oboInOwl: http://www.geneontology.org/formats/oboInOwl# + pav: http://purl.org/pav/ + prov: http://www.w3.org/ns/prov# + skos: http://www.w3.org/2004/02/skos/core# + xsd: http://www.w3.org/2001/XMLSchema# + semapv: https://w3id.org/semapv/vocab/ +see_also: +- https://github.com/mapping-commons/sssom +- https://mapping-commons.github.io/sssom/home/ +default_curi_maps: +- semweb_context +- obo_context +default_prefix: sssom +default_range: string + +enums: + entity_type_enum: + permissible_values: + owl class: + meaning: owl:Class + owl object property: + meaning: owl:ObjectProperty + owl data property: + meaning: owl:DataProperty + owl annotation property: + meaning: owl:AnnotationProperty + owl named individual: + meaning: owl:NamedIndividual + skos concept: + meaning: skos:Concept + rdfs resource: + meaning: rdfs:Resource + rdfs class: + meaning: rdfs:Class + rdfs literal: + meaning: rdfs:Literal + description: This value indicate that the entity being mapped is not a semantic entity with a distinct identifier, but is instead represented entirely by its literal label. This value MUST NOT be used in the predicate_type slot. + see_also: + - https://mapping-commons.github.io/sssom/spec-model/#literal-mappings + - https://github.com/mapping-commons/sssom/issues/234 + - https://github.com/mapping-commons/sssom/blob/master/examples/schema/literals.sssom.tsv + rdfs datatype: + meaning: rdfs:Datatype + rdf property: + meaning: rdf:Property + + predicate_modifier_enum: + permissible_values: + Not: Negating the mapping predicate. The meaning of the triple becomes subject_id is not a predicate_id match to object_id. + mapping_cardinality_enum: + permissible_values: + "1:1": One-to-one mapping + "1:n": One-to-many mapping + "n:1": Many-to-one mapping + "1:0": One-to-none mapping + "0:1": None-to-one mapping + "n:n": Many-to-many mapping + +types: + EntityReference: + typeof: uriorcurie + description: | + A reference to an entity involved in the mapping. + base: str + uri: rdfs:Resource + see_also: + - https://mapping-commons.github.io/sssom/spec/#tsv + +slots: + prefix_name: + key: true + range: ncname + prefix_url: + range: uri + curie_map: + description: A dictionary that contains prefixes as keys and their URI expansions as values. + range: prefix + multivalued: true + inlined: true + see_also: + - https://github.com/mapping-commons/sssom/issues/225 + - https://github.com/mapping-commons/sssom/pull/349 + - https://github.com/mapping-commons/sssom/blob/master/examples/schema/curie_map.sssom.tsv + mirror_from: + description: A URL location from which to obtain a resource, such as a mapping set. + range: uri + registry_confidence: + description: This value is set by the registry that indexes the mapping set. It reflects the confidence the registry has in the correctness of the mappings in the mapping set. + range: double + last_updated: + description: The date this reference was last updated. + range: date + local_name: + description: The local name assigned to file that corresponds to the downloaded mapping set. + range: string + mapping_set_references: + description: A list of mapping set references. + range: mapping set reference + multivalued: true + recommended: true + mapping_registry_id: + description: The unique identifier of a mapping registry. + range: EntityReference + required: true + mapping_registry_title: + description: The title of a mapping registry. + range: string + mapping_registry_description: + description: The description of a mapping registry. + range: string + imports: + description: A list of registries that should be imported into this one. + multivalued: true + range: uri + documentation: + description: A URL to the documentation of this mapping commons. + range: uri + homepage: + description: A URL to a homepage of this mapping commons. + range: uri + mappings: + description: Contains a list of mapping objects + range: mapping + multivalued: true + inlined_as_list: true + recommended: true + subject_id: + description: The ID of the subject of the mapping. + range: EntityReference + mappings: + - owl:annotatedSource + slot_uri: owl:annotatedSource + examples: + - value: HP:0009894 + description: The CURIE denoting the Human Phenotype Ontology concept of 'Thickened ears' + subject_label: + description: The label of subject of the mapping + range: string + examples: + - value: Thickened ears + recommended: true + subject_category: + description: The conceptual category to which the subject belongs to. This can + be a string denoting the category or a term from a controlled vocabulary. + This slot is deliberately underspecified. Conceptual categories can range from + those that are found in general upper ontologies such as BFO (e.g. process, temporal region, etc) to those that serve + as upper ontologies in specific domains, such as COB or BioLink (e.g. gene, disease, chemical entity). The purpose of this + optional field is documentation for human reviewers - when a category is known + and documented clearly, the cost of interpreting and evaluating the mapping decreases. + range: string + see_also: + - https://github.com/mapping-commons/sssom/issues/13 + - https://github.com/mapping-commons/sssom/issues/256 + examples: + - value: UBERON:0001062 + description: (The CURIE of the Uberon term for "anatomical entity".) + - value: anatomical entity + description: (A string, rather than ID, describing the "anatomical entity" category. This is possible, but less preferred than using an ID.) + - value: biolink:Gene + description: (The CURIE of the biolink class for genes.) + subject_type: + description: The type of entity that is being mapped. + range: entity_type_enum + instantiates: sssom:Propagatable + annotations: + propagated: true + examples: + - value: owl:Class + predicate_id: + description: The ID of the predicate or relation that relates the subject and + object of this match. + mappings: + - owl:annotatedProperty + range: EntityReference + required: true + slot_uri: owl:annotatedProperty + + examples: + - value: owl:sameAs + description: The subject and the object are instances (owl individuals), and the two instances are the same. + - value: owl:equivalentClass + description: The subject and the object are classes (owl class), and the two classes are the same. + - value: owl:equivalentProperty + description: The subject and the object are properties (owl object, data, annotation properties), and the two properties are the same. + - value: rdfs:subClassOf + description: The subject and the object are classes (owl class), and the subject is a subclass of the object. + - value: rdfs:subPropertyOf + description: The subject and the object are properties (owl object, data, annotation properties), and the subject is a subproperty of the object. + - value: skos:relatedMatch + description: The subject and the object are associated in some unspecified way. + - value: skos:closeMatch + description: The subject and the object are sufficiently similar that they can be used interchangeably in some information retrieval applications. + - value: skos:exactMatch + description: The subject and the object can, with a high degree of confidence, be used interchangeably across a wide range of information retrieval applications. + - value: skos:narrowMatch + description: "From the SKOS primer: A triple skos:narrower (and skos:narrowMatch) asserts that , the object of the triple, is a narrower concept than , the subject of the triple." + - value: skos:broadMatch + description: "From the SKOS primer: A triple skos:broader (and skos:broadMatch) asserts that , the object of the triple, is a broader concept than , the subject of the triple." + - value: oboInOwl:hasDbXref + description: Two terms are related in some way. The meaning is frequently consistent across a single set of mappings. Note this property is often overloaded even where the terms are of a different nature (e.g. interpro2go) + - value: rdfs:seeAlso + description: The subject and the object are associated in some unspecified way. The object IRI often resolves to a resource on the web that provides additional information. + predicate_modifier: + description: A modifier for negating the predicate. See https://github.com/mapping-commons/sssom/issues/40 for discussion + range: predicate_modifier_enum + see_also: + - https://github.com/mapping-commons/sssom/issues/107 + examples: + - value: Not + description: Negates the predicate, see documentation of predicate_modifier_enum + predicate_label: + description: The label of the predicate/relation of the mapping + range: string + examples: + - value: has cross-reference + description: The label of the oboInOwl:hasDbXref property to represent cross-references. + predicate_type: + description: The type of entity that is being mapped. + range: entity_type_enum + examples: + - value: owl:AnnotationProperty + - value: owl:ObjectProperty + object_id: + description: The ID of the object of the mapping. + mappings: + - owl:annotatedTarget + range: EntityReference + slot_uri: owl:annotatedTarget + examples: + - value: HP:0009894 + description: The CURIE denoting the Human Phenotype Ontology concept of 'Thickened ears' + object_label: + description: The label of object of the mapping + range: string + examples: + - value: Thickened ears + recommended: true + object_category: + description: The conceptual category to which the subject belongs to. This can + be a string denoting the category or a term from a controlled vocabulary. + This slot is deliberately underspecified. Conceptual categories can range from + those that are found in general upper ontologies such as BFO (e.g. process, temporal region, etc) to those that serve + as upper ontologies in specific domains, such as COB or BioLink (e.g. gene, disease, chemical entity). The purpose of this + optional field is documentation for human reviewers - when a category is known + and documented clearly, the cost of interpreting and evaluating the mapping decreases. + range: string + see_also: + - https://github.com/mapping-commons/sssom/issues/13 + - https://github.com/mapping-commons/sssom/issues/256 + examples: + - value: UBERON:0001062 + description: (The CURIE of the Uberon term for "anatomical entity".) + - value: anatomical entity + description: (A string, rather than ID, describing the "anatomical entity" category. This is possible, but less preferred than using an ID.) + - value: biolink:Gene + description: (The CURIE of the biolink class for genes.) + mapping_justification: + description: A mapping justification is an action (or the written representation of that action) of showing a mapping to be right or reasonable. + range: EntityReference + pattern: "^semapv:(MappingReview|ManualMappingCuration|LogicalReasoning|LexicalMatching|CompositeMatching|UnspecifiedMatching|SemanticSimilarityThresholdMatching|LexicalSimilarityThresholdMatching|MappingChaining)$" + required: true + any_of: + - equals_string: semapv:LexicalMatching + - equals_string: semapv:LogicalReasoning + - equals_string: semapv:CompositeMatching + - equals_string: semapv:UnspecifiedMatching + - equals_string: semapv:SemanticSimilarityThresholdMatching + - equals_string: semapv:LexicalSimilarityThresholdMatching + - equals_string: semapv:MappingChaining + - equals_string: semapv:MappingReview + - equals_string: semapv:ManualMappingCuration + examples: + - value: semapv:LexicalMatching + - value: semapv:ManualMappingCuration + object_type: + description: The type of entity that is being mapped. + range: entity_type_enum + instantiates: sssom:Propagatable + annotations: + propagated: true + examples: + - value: owl:Class + mapping_set_id: + description: A globally unique identifier for the mapping set (not each individual + mapping). Should be IRI, ideally resolvable. + required: true + range: uri + examples: + - value: http://purl.obolibrary.org/obo/mondo/mappings/mondo_exactmatch_ncit.sssom.tsv + description: (A persistent URI pointing to the latest version of the Mondo - NCIT mapping in the Mondo namespace.) + mapping_set_version: + description: A version string for the mapping. + range: string + slot_uri: owl:versionInfo + examples: + - value: "2020-01-01" + description: (A date-based version that indicates that the mapping was published on the 1st January in 2021.) + - value: "1.2.1" + description: "(A semantic version tag that indicates that this is the 1st major, 2nd minor version, patch 1 (https://semver.org/).)" + mapping_set_group: + description: Set by the owners of the mapping registry. A way to group . + range: string + mapping_set_title: + description: The display name of a mapping set. + range: string + slot_uri: dcterms:title + examples: + - value: "The Mondo-OMIM mappings by Monarch Initiative." + mapping_set_description: + description: A description of the mapping set. + range: string + slot_uri: dcterms:description + examples: + - value: "This mapping set was produced to integrate human and mouse phenotype data at the IMPC. It is primarily used for making mouse phenotypes searchable by human synonyms at https://mousephenotype.org/." + creator_id: + description: Identifies the persons or groups responsible for the creation of + the mapping. The creator is the agent that put the mapping in its published form, + which may be different from the author, which is a person that was actively involved + in the assertion of the mapping. + Recommended to be a list of ORCIDs or otherwise + identifying URIs. + slot_uri: dcterms:creator + range: EntityReference + multivalued: true + creator_label: + description: A string identifying the creator of this mapping. In the spirit of + provenance, consider using creator_id instead. + range: string + multivalued: true + author_id: + description: Identifies the persons or groups responsible for asserting the mappings. + Recommended to be a list of ORCIDs or otherwise + identifying URIs. + slot_uri: pav:authoredBy + range: EntityReference + multivalued: true + author_label: + description: A string identifying the author of this mapping. In the spirit of + provenance, consider using author_id instead. + range: string + multivalued: true + reviewer_id: + description: Identifies the persons or groups that reviewed and confirmed the mapping. + Recommended to be a list of ORCIDs or otherwise + identifying URIs. + range: EntityReference + multivalued: true + reviewer_label: + description: A string identifying the reviewer of this mapping. In the spirit of + provenance, consider using reviewer_id instead. + range: string + multivalued: true + license: + description: A url to the license of the mapping. In absence of a license we assume + no license. + range: uri + slot_uri: dcterms:license + subject_source: + description: URI of vocabulary or identifier source for the subject. + range: EntityReference + instantiates: sssom:Propagatable + annotations: + propagated: true + examples: + - value: obo:mondo.owl + description: A persistent OBO CURIE pointing to the latest version of the Mondo ontology. + - value: wikidata:Q7876491 + description: A Wikidata identifier for the Uberon ontology resource. + subject_source_version: + description: Version IRI or version string of the source of the subject term. + range: string + instantiates: sssom:Propagatable + annotations: + propagated: true + examples: + - value: http://purl.obolibrary.org/obo/mondo/releases/2021-01-30/mondo.owl + description: (A persistent Version IRI pointing to the Mondo version '2021-01-30') + object_source: + description: URI of vocabulary or identifier source for the object. + range: EntityReference + instantiates: sssom:Propagatable + annotations: + propagated: true + examples: + - value: obo:mondo.owl + description: A persistent OBO CURIE pointing to the latest version of the Mondo ontology. + - value: wikidata:Q7876491 + description: A Wikidata identifier for the Uberon ontology resource. + object_source_version: + description: Version IRI or version string of the source of the object term. + range: string + instantiates: sssom:Propagatable + annotations: + propagated: true + examples: + - value: http://purl.obolibrary.org/obo/mondo/releases/2021-01-30/mondo.owl + description: (A persistent Version IRI pointing to the Mondo version '2021-01-30') + mapping_provider: + description: URL pointing to the source that provided the mapping, for example + an ontology that already contains the mappings, or a database from which it was derived. + range: uri + instantiates: sssom:Propagatable + annotations: + propagated: true + mapping_set_source: + description: A mapping set or set of mapping set that was used to derive the mapping set. + slot_uri: prov:wasDerivedFrom + range: uri + multivalued: true + examples: + - value: http://purl.obolibrary.org/obo/mondo/mappings/2022-05-20/mondo_exactmatch_ncit.sssom.tsv + description: A persistent, ideally versioned, link to the mapping set from which the current mapping set is derived. + mapping_source: + description: The mapping set this mapping was originally defined in. mapping_source is used for example when merging multiple + mapping sets or deriving one mapping set from another. + range: EntityReference + examples: + - value: MONDO_MAPPINGS:mondo_exactmatch_ncit.sssom.tsv + mapping_cardinality: + description: A string indicating whether this mapping is from a 1:1 (the subject_id + maps to a single object_id), 1:n (the subject maps to more than one object_id), + n:1, 1:0, 0:1 or n:n group. Note that this is a convenience field that should be derivable + from the mapping set. + range: mapping_cardinality_enum + mapping_tool: + description: A reference to the tool or algorithm that was used to generate the + mapping. Should be a URL pointing to more info about it, but can be free text. + range: string + instantiates: sssom:Propagatable + annotations: + propagated: true + examples: + - value: https://github.com/AgreementMakerLight/AML-Project + mapping_tool_version: + description: Version string that denotes the version of the mapping tool used. + range: string + instantiates: sssom:Propagatable + annotations: + propagated: true + examples: + - value: v3.2 + mapping_date: + description: The date the mapping was asserted. This is different from the date the mapping was published or compiled in a SSSOM file. + slot_uri: pav:authoredOn + range: date + instantiates: sssom:Propagatable + annotations: + propagated: true + publication_date: + description: The date the mapping was published. This is different from the date the mapping was asserted. + slot_uri: dcterms:created + range: date + confidence: + description: A score between 0 and 1 to denote the confidence or probability that + the match is correct, where 1 denotes total confidence. + range: double + minimum_value: 0.0 + maximum_value: 1.0 + subject_match_field: + description: A list of properties (term annotations on the subject) that was used + for the match. + range: EntityReference + multivalued: true + instantiates: sssom:Propagatable + annotations: + propagated: true + object_match_field: + description: A list of properties (term annotations on the object) that was used + for the match. + range: EntityReference + multivalued: true + instantiates: sssom:Propagatable + annotations: + propagated: true + match_string: + description: String that is shared by subj/obj. It is recommended to indicate the + fields for the match using the object and subject_match_field slots. + range: string + multivalued: true + subject_preprocessing: + description: Method of preprocessing applied to the fields of the subject. + If different preprocessing steps were performed on different fields, it is + recommended to store the match in separate rows. + range: EntityReference + multivalued: true + instantiates: sssom:Propagatable + annotations: + propagated: true + examples: + - value: semapv:Stemming + - value: semapv:StopWordRemoval + object_preprocessing: + description: Method of preprocessing applied to the fields of the object. + If different preprocessing steps were performed on different fields, it is + recommended to store the match in separate rows. + range: EntityReference + multivalued: true + instantiates: sssom:Propagatable + annotations: + propagated: true + examples: + - value: semapv:Stemming + - value: semapv:StopWordRemoval + curation_rule: + description: A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. + Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation + rule is captured as a resource rather than a string, which enables higher levels of transparency and sharing across mapping sets. + The URI representation of the curation rule is expected to be a resolvable identifier which provides details about the nature of the curation rule. + range: EntityReference + multivalued: true + see_also: + - https://github.com/mapping-commons/sssom/issues/166 + - https://github.com/mapping-commons/sssom/pull/258 + - https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule.sssom.tsv + curation_rule_text: + description: A curation rule is a (potentially) complex condition executed by an agent that led to the establishment of a mapping. + Curation rules often involve complex domain-specific considerations, which are hard to capture in an automated fashion. The curation + rule should be captured as a resource (entity reference) rather than a string (see curation_rule element), which enables higher levels of transparency and sharing across mapping sets. + The textual representation of curation rule is intended to be used in cases where (1) the creation of a resource is not practical from the + perspective of the mapping_provider and (2) as an additional piece of metadata to augment the curation_rule element with a human readable text. + range: string + multivalued: true + see_also: + - https://github.com/mapping-commons/sssom/issues/166 + - https://github.com/mapping-commons/sssom/pull/258 + - https://github.com/mapping-commons/sssom/blob/master/examples/schema/curation_rule_text.sssom.tsv + similarity_score: + description: A score between 0 and 1 to denote the similarity between two entities, where + 1 denotes equivalence, and 0 denotes disjointness. The score is meant to be used in conjunction + with the similarity_measure field, to document, for example, the lexical or semantic match + of a matching algorithm. + range: double + minimum_value: 0.0 + maximum_value: 1.0 + see_also: + - https://github.com/mapping-commons/sssom/issues/385 + - https://github.com/mapping-commons/sssom/pull/386 + - https://github.com/mapping-commons/sssom/blob/master/examples/schema/similarity_score.sssom.tsv + similarity_measure: + description: The measure used for computing a similarity score. + This field is meant to be used in conjunction with the similarity_score field, to document, + for example, the lexical or semantic match of a matching algorithm. + To make processing this field as unambiguous as possible, we recommend using + wikidata CURIEs, but the type of this field is deliberately unspecified. + range: string + examples: + - value: https://www.wikidata.org/entity/Q865360 + description: (the Wikidata IRI for the Jaccard index measure). + - value: wikidata:Q865360 + description: (the Wikidata CURIE for the Jaccard index measure). + - value: Levenshtein distance + description: (a score to measure the distance between two character sequences). + see_also: + - https://github.com/mapping-commons/sssom/issues/385 + - https://github.com/mapping-commons/sssom/pull/386 + - https://github.com/mapping-commons/sssom/blob/master/examples/schema/similarity_score.sssom.tsv + issue_tracker_item: + description: The issue tracker item discussing this mapping. + range: EntityReference + examples: + - value: SSSOM_GITHUB_ISSUE:166 + description: (A URL resolving to an issue discussing a new SSSOM element request) + see_also: + - https://github.com/mapping-commons/sssom/issues/78 + - https://github.com/mapping-commons/sssom/pull/259 + - https://github.com/mapping-commons/sssom/blob/master/examples/schema/issue_tracker_item.sssom.tsv + issue_tracker: + description: A URL location of the issue tracker for this entity. + range: uri + examples: + - value: https://github.com/mapping-commons/mh_mapping_initiative/issues + description: (A URL resolving to the issue tracker of the Mouse-Human mapping initiative) + see_also: + - https://github.com/mapping-commons/sssom/issues/78 + - https://github.com/mapping-commons/sssom/pull/259 + - https://github.com/mapping-commons/sssom/blob/master/examples/schema/issue_tracker.sssom.tsv + see_also: + description: A URL specific for the mapping instance. E.g. for kboom we have a + per-mapping image that shows surrounding axioms that drive probability. Could + also be a github issue URL that discussed a complicated alignment + slot_uri: rdfs:seeAlso + range: string + multivalued: true + other: + description: Pipe separated list of key value pairs for properties not part of + the SSSOM spec. Can be used to encode additional provenance data. + range: string + comment: + description: Free text field containing either curator notes or text generated + by tool providing additional informative information. + slot_uri: rdfs:comment + range: string + extension_definitions: + description: A list that defines the extension slots used in the mapping set. + range: extension definition + multivalued: true + see_also: + - https://github.com/mapping-commons/sssom/issues/328 + - https://github.com/mapping-commons/sssom/blob/master/examples/schema/extension-slots.sssom.tsv +classes: + mapping set: + description: Represents a set of mappings + slot_usage: + license: + required: true + slots: + - curie_map + - mappings + - mapping_set_id + - mapping_set_version + - mapping_set_source + - mapping_set_title + - mapping_set_description + - creator_id + - creator_label + - license + - subject_type + - subject_source + - subject_source_version + - object_type + - object_source + - object_source_version + - mapping_provider + - mapping_tool + - mapping_tool_version + - mapping_date + - publication_date + - subject_match_field + - object_match_field + - subject_preprocessing + - object_preprocessing + - see_also + - issue_tracker + - other + - comment + - extension_definitions + mapping: + description: Represents an individual mapping between a pair of entities + slots: + - subject_id + - subject_label + - subject_category + - predicate_id + - predicate_label + - predicate_modifier + - object_id + - object_label + - object_category + - mapping_justification + - author_id + - author_label + - reviewer_id + - reviewer_label + - creator_id + - creator_label + - license + - subject_type + - subject_source + - subject_source_version + - object_type + - object_source + - object_source_version + - mapping_provider + - mapping_source + - mapping_cardinality + - mapping_tool + - mapping_tool_version + - mapping_date + - publication_date + - confidence + - curation_rule + - curation_rule_text + - subject_match_field + - object_match_field + - match_string + - subject_preprocessing + - object_preprocessing + - similarity_score + - similarity_measure + - see_also + - issue_tracker_item + - other + - comment + class_uri: owl:Axiom + rules: + - preconditions: + slot_conditions: + subject_type: + equals_string: "rdfs literal" + postconditions: + slot_conditions: + subject_label: + required: true + - preconditions: + slot_conditions: + subject_type: + none_of: + equals_string: "rdfs literal" + postconditions: + slot_conditions: + subject_id: + required: true + - preconditions: + slot_conditions: + object_type: + equals_string: "rdfs literal" + postconditions: + slot_conditions: + object_label: + required: true + - preconditions: + slot_conditions: + object_type: + none_of: + equals_string: "rdfs literal" + postconditions: + slot_conditions: + object_id: + required: true + mapping registry: + description: A registry for managing mapping sets. It holds a set of + mapping set references, and can import other registries. + slots: + - mapping_registry_id + - mapping_registry_title + - mapping_registry_description + - imports + - mapping_set_references + - documentation + - homepage + - issue_tracker + mapping set reference: + description: A reference to a mapping set. It allows to augment mapping + set metadata from the perspective of the registry, for example, providing + confidence, or a local filename or a grouping. + slots: + - mapping_set_id + - mirror_from + - registry_confidence + - mapping_set_group + - last_updated + - local_name + prefix: + slots: + - prefix_name + - prefix_url + extension definition: + description: A definition of an extension (non-standard) slot. + attributes: + slot_name: + description: The name of the extension slot. + range: ncname + required: true + property: + description: The property associated with the extension slot. It is + intended to provide a non-ambiguous meaning to the slot (contrary + to the slot_name, which for brevity reasons may be ambiguous). + range: uriorcurie + type_hint: + description: Expected type of the values of the extension slot. + range: uriorcurie + Propagatable: + class_uri: sssom:Propagatable + description: Metamodel extension class to describe slots whose value can be + propagated down from the MappingSet class to the Mapping class. + see_also: + - https://github.com/mapping-commons/sssom/issues/305 + attributes: + propagated: + description: Indicates whether a slot can be propagated from a mapping + down to individual mappings. + range: boolean + NoTermFound: + class_uri: sssom:NoTermFound + description: sssom:NoTermFound can be used in place of a subject_id or object_id + when the corresponding entity could not be found. It SHOULD be used in conjuction with + a corresponding subject_source or object_source to signify where the term was not found. + see_also: + - https://github.com/mapping-commons/sssom/issues/28 + - https://github.com/mapping-commons/sssom/blob/master/examples/schema/no_term_found.sssom.tsv + diff --git a/data-raw/internal/sssom_slot_types.rds b/data-raw/internal/sssom_slot_types.rds new file mode 100644 index 00000000..a124dbea Binary files /dev/null and b/data-raw/internal/sssom_slot_types.rds differ diff --git a/data-raw/internal/sssom_spec.R b/data-raw/internal/sssom_spec.R new file mode 100644 index 00000000..bfd43f26 --- /dev/null +++ b/data-raw/internal/sssom_spec.R @@ -0,0 +1,58 @@ +## code to prepare `.sssom_spec`, `.sssom_slot_types`, and +## `.sssom_mapping_slots` internal datasets ## +# +# Capture official SSSOM specification and parse for use by DO.utils + +rlang::check_installed( + c("glue", "here", "purrr", "stringr", "yaml") +) + + +# identify latest SSSOM version and construct URL for raw YAML +sssom_version <- stringr::str_remove( + httr::HEAD("https://github.com/mapping-commons/sssom/releases/latest/")$url, + ".*/" +) +sssom_yaml_path <- glue::glue( + "https://raw.githubusercontent.com/mapping-commons/sssom/@sssom_version@/src/sssom_schema/schema/sssom_schema.yaml", + .open = "@", + .close = "@" +) + + +# download YAML schema and parse for internal use +outdir <- here::here("data-raw", "internal") +yaml_file <- file.path(outdir, paste0("sssom_schema-", sssom_version, ".yaml")) + +dl_status <- download.file(sssom_yaml_path, yaml_file) + +if (dl_status != 0) { + rlang::abort( + glue::glue( + "Failed to download SSSOM specification from {sssom_yaml_path}" + ) + ) +} + +.sssom_spec <- yaml::read_yaml(yaml_file) +.sssom_spec$version <- sssom_version +.sssom_spec$access_date <- Sys.Date() + +.sssom_slot_types <- purrr::map_chr(.sssom_spec$slots, ~ .$range) +.sssom_mapping_slots <- .sssom_spec$classes$mapping$slots + +saveRDS( + .sssom_spec, + file = file.path(outdir, "sssom_spec.rds"), + compress = "bzip2" +) +saveRDS( + .sssom_slot_types, + file = file.path(outdir, "sssom_slot_types.rds"), + compress = "bzip2" +) +saveRDS( + .sssom_mapping_slots, + file = file.path(outdir, "sssom_mapping_slots.rds"), + compress = "bzip2" +) diff --git a/data-raw/internal/sssom_spec.rds b/data-raw/internal/sssom_spec.rds new file mode 100644 index 00000000..199abb36 Binary files /dev/null and b/data-raw/internal/sssom_spec.rds differ diff --git a/data-raw/sysdata-update.R b/data-raw/sysdata-update.R new file mode 100644 index 00000000..3e273230 --- /dev/null +++ b/data-raw/sysdata-update.R @@ -0,0 +1,33 @@ +rlang::check_installed("here", "usethis") + +indir <- here::here("data-raw", "internal") + + +# DO Google Sheets reference +.DO_gs <- readRDS(file.path(indir, "DO_gs.rds")) + + +# HTML tags reference +.html_tags <- readRDS(file.path(indir, "html_tags.rds")) + + +# curation template specification +.curation_opts <- readRDS(file.path(indir, "curation_opts.rds")) + + +# SSSOM specification +.sssom_spec <- readRDS(file.path(indir, "sssom_spec.rds")) +.sssom_slot_types <- readRDS(file.path(indir, "sssom_slot_types.rds")) +.sssom_mapping_slots <- readRDS(file.path(indir, "sssom_mapping_slots.rds")) + + +usethis::use_data( + .DO_gs, + .html_tags, + .curation_opts, + .sssom_spec, + .sssom_slot_types, + .sssom_mapping_slots, + internal = TRUE, + overwrite = TRUE +)