diff --git a/.github/workflows/extractRQs.yml b/.github/workflows/extractRQs.yml index d0f4bb6..b5dc91b 100644 --- a/.github/workflows/extractRQs.yml +++ b/.github/workflows/extractRQs.yml @@ -20,6 +20,8 @@ jobs: run: pip install rdflib - name: Extract run: python scripts/transformDotTtlToDotSparql.py + - name: Lint headers + run: python scripts/lint_headers.py - name: Commit new .rq files run: | diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5717ef9 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.planning/ diff --git a/A. Metadata/authors.rq b/A. Metadata/authors.rq index 38a57c8..1d73858 100644 --- a/A. Metadata/authors.rq +++ b/A. Metadata/authors.rq @@ -1,3 +1,8 @@ +# title: Authors of All Pathways +# category: Metadata +# description: Lists all pathway authors with their name, homepage, and ORCID, +# along with the number of pathways each author created. + PREFIX dc: PREFIX foaf: diff --git a/A. Metadata/datacounts/averageDatanodes.rq b/A. Metadata/datacounts/averageDatanodes.rq index 88f62b2..37a263b 100644 --- a/A. Metadata/datacounts/averageDatanodes.rq +++ b/A. Metadata/datacounts/averageDatanodes.rq @@ -1,3 +1,8 @@ +# title: Average Data Nodes per Pathway +# category: Metadata +# description: Calculates the average, minimum, and maximum number of data nodes per +# pathway in WikiPathways. + SELECT (AVG(?no) AS ?avg) (MIN(?no) AS ?min) (MAX(?no) AS ?max) diff --git a/A. Metadata/datacounts/averageGeneProducts.rq b/A. Metadata/datacounts/averageGeneProducts.rq index 4b04573..c462696 100644 --- a/A. Metadata/datacounts/averageGeneProducts.rq +++ b/A. Metadata/datacounts/averageGeneProducts.rq @@ -1,3 +1,8 @@ +# title: Average Gene Products per Pathway +# category: Metadata +# description: Calculates the average, minimum, and maximum number of gene products +# per pathway in WikiPathways. + SELECT (AVG(?no) AS ?avg) (MIN(?no) AS ?min) (MAX(?no) AS ?max) diff --git a/A. Metadata/datacounts/averageInteractions.rq b/A. Metadata/datacounts/averageInteractions.rq index 11e4d75..0451d1f 100644 --- a/A. Metadata/datacounts/averageInteractions.rq +++ b/A. Metadata/datacounts/averageInteractions.rq @@ -1,3 +1,8 @@ +# title: Average Interactions per Pathway +# category: Metadata +# description: Calculates the average, minimum, and maximum number of interactions +# per pathway in WikiPathways. + SELECT (AVG(?no) AS ?avg) (MIN(?no) AS ?min) (MAX(?no) AS ?max) diff --git a/A. Metadata/datacounts/averageMetabolites.rq b/A. Metadata/datacounts/averageMetabolites.rq index 5936678..8ae4ac4 100644 --- a/A. Metadata/datacounts/averageMetabolites.rq +++ b/A. Metadata/datacounts/averageMetabolites.rq @@ -1,3 +1,8 @@ +# title: Average Metabolites per Pathway +# category: Metadata +# description: Calculates the average, minimum, and maximum number of metabolites per +# pathway in WikiPathways. + SELECT (AVG(?no) AS ?avg) (MIN(?no) AS ?min) (MAX(?no) AS ?max) diff --git a/A. Metadata/datacounts/averageProteins.rq b/A. Metadata/datacounts/averageProteins.rq index 7dd1832..c054598 100644 --- a/A. Metadata/datacounts/averageProteins.rq +++ b/A. Metadata/datacounts/averageProteins.rq @@ -1,3 +1,8 @@ +# title: Average Proteins per Pathway +# category: Metadata +# description: Calculates the average, minimum, and maximum number of proteins per +# pathway in WikiPathways. + SELECT (AVG(?no) AS ?avg) (MIN(?no) AS ?min) (MAX(?no) AS ?max) diff --git a/A. Metadata/datacounts/countDataNodes.rq b/A. Metadata/datacounts/countDataNodes.rq index 39776f5..dc89f4b 100644 --- a/A. Metadata/datacounts/countDataNodes.rq +++ b/A. Metadata/datacounts/countDataNodes.rq @@ -1,3 +1,7 @@ +# title: Count of Data Nodes +# category: Metadata +# description: Counts the total number of data nodes in WikiPathways. + SELECT DISTINCT count(?DataNodes) as ?DataNodeCount WHERE { ?DataNodes a wp:DataNode . diff --git a/A. Metadata/datacounts/countGeneProducts.rq b/A. Metadata/datacounts/countGeneProducts.rq index d801061..bb70fe9 100644 --- a/A. Metadata/datacounts/countGeneProducts.rq +++ b/A. Metadata/datacounts/countGeneProducts.rq @@ -1,3 +1,7 @@ +# title: Count of Gene Products +# category: Metadata +# description: Counts the total number of gene products in WikiPathways. + SELECT DISTINCT count(?geneProduct) as ?GeneProductCount WHERE { ?geneProduct a wp:GeneProduct . diff --git a/A. Metadata/datacounts/countInteractions.rq b/A. Metadata/datacounts/countInteractions.rq index 6986d60..6c44bd3 100644 --- a/A. Metadata/datacounts/countInteractions.rq +++ b/A. Metadata/datacounts/countInteractions.rq @@ -1,3 +1,7 @@ +# title: Count of Interactions +# category: Metadata +# description: Counts the total number of interactions in WikiPathways. + SELECT DISTINCT count(?Interaction) as ?InteractionCount WHERE { ?Interaction a wp:Interaction . diff --git a/A. Metadata/datacounts/countMetabolites.rq b/A. Metadata/datacounts/countMetabolites.rq index fe74f13..c20fc83 100644 --- a/A. Metadata/datacounts/countMetabolites.rq +++ b/A. Metadata/datacounts/countMetabolites.rq @@ -1,3 +1,7 @@ +# title: Count of Metabolites +# category: Metadata +# description: Counts the total number of metabolites in WikiPathways. + SELECT DISTINCT count(?Metabolite) as ?MetaboliteCount WHERE { ?Metabolite a wp:Metabolite . diff --git a/A. Metadata/datacounts/countPathways.rq b/A. Metadata/datacounts/countPathways.rq index 28d1bf3..a2e36ec 100644 --- a/A. Metadata/datacounts/countPathways.rq +++ b/A. Metadata/datacounts/countPathways.rq @@ -1,3 +1,7 @@ +# title: Count of Pathways +# category: Metadata +# description: Counts the total number of pathways in WikiPathways. + SELECT DISTINCT count(?Pathway) as ?PathwayCount WHERE { ?Pathway a wp:Pathway, skos:Collection . diff --git a/A. Metadata/datacounts/countProteins.rq b/A. Metadata/datacounts/countProteins.rq index 758277f..fa7d13d 100644 --- a/A. Metadata/datacounts/countProteins.rq +++ b/A. Metadata/datacounts/countProteins.rq @@ -1,3 +1,7 @@ +# title: Count of Proteins +# category: Metadata +# description: Counts the total number of proteins in WikiPathways. + SELECT DISTINCT count(?protein) as ?ProteinCount WHERE { ?protein a wp:Protein . diff --git a/A. Metadata/datacounts/countSignalingPathways.rq b/A. Metadata/datacounts/countSignalingPathways.rq index b81151d..a917c51 100644 --- a/A. Metadata/datacounts/countSignalingPathways.rq +++ b/A. Metadata/datacounts/countSignalingPathways.rq @@ -1,3 +1,8 @@ +# title: Count of Signaling Pathways +# category: Metadata +# description: Counts the total number of signaling pathways in WikiPathways by +# filtering on the signaling pathway ontology tag. + SELECT count(distinct ?pathway) as ?pathwaycount WHERE { ?tag1 a owl:Class ; diff --git a/A. Metadata/datacounts/linkoutCounts.rq b/A. Metadata/datacounts/linkoutCounts.rq index dc0efcf..2734e2d 100644 --- a/A. Metadata/datacounts/linkoutCounts.rq +++ b/A. Metadata/datacounts/linkoutCounts.rq @@ -1,3 +1,8 @@ +# title: External Linkout Counts +# category: Metadata +# description: Counts the number of distinct entities linked to each external database +# (ChEBI, ChemSpider, HMDB, PubChem, Ensembl, NCBI Gene, HGNC, Rhea, UniProt). + SELECT ?pred (COUNT(DISTINCT ?entity) AS ?count) WHERE { VALUES ?pred { # metabolites diff --git a/A. Metadata/datasources/WPforChemSpider.rq b/A. Metadata/datasources/WPforChemSpider.rq index c96ceac..5869cfc 100644 --- a/A. Metadata/datasources/WPforChemSpider.rq +++ b/A. Metadata/datasources/WPforChemSpider.rq @@ -1,4 +1,7 @@ -#List of WikiPathways for ChemSpider identifiers +# title: WikiPathways for ChemSpider Identifiers +# category: Data Sources +# description: Lists pathways containing metabolites with ChemSpider identifiers, +# showing the pathway title and extracted ChemSpider ID. select distinct ?pathwayRes (str(?wpid) as ?pathway) (str(?title) as ?pathwayTitle) (fn:substring(?csId,36) as ?chemspider) where { ?gene a wp:Metabolite ; diff --git a/A. Metadata/datasources/WPforEnsembl.rq b/A. Metadata/datasources/WPforEnsembl.rq index 721f26d..9a303c5 100644 --- a/A. Metadata/datasources/WPforEnsembl.rq +++ b/A. Metadata/datasources/WPforEnsembl.rq @@ -1,11 +1,14 @@ -#List of WikiPathways for Ensembl identifiers - -select distinct ?pathwayRes (str(?wpid) as ?pathway) (str(?title) as ?pathwayTitle) (fn:substring(?ensId,32) as ?ensembl) where { - ?gene a wp:GeneProduct ; - dcterms:identifier ?id ; - dcterms:isPartOf ?pathwayRes ; - wp:bdbEnsembl ?ensId . - ?pathwayRes a wp:Pathway ; - dcterms:identifier ?wpid ; - dc:title ?title . -} +# title: WikiPathways for Ensembl Identifiers +# category: Data Sources +# description: Lists pathways containing gene products with Ensembl identifiers, +# showing the pathway title and extracted Ensembl ID. + +select distinct ?pathwayRes (str(?wpid) as ?pathway) (str(?title) as ?pathwayTitle) (fn:substring(?ensId,32) as ?ensembl) where { + ?gene a wp:GeneProduct ; + dcterms:identifier ?id ; + dcterms:isPartOf ?pathwayRes ; + wp:bdbEnsembl ?ensId . + ?pathwayRes a wp:Pathway ; + dcterms:identifier ?wpid ; + dc:title ?title . +} diff --git a/A. Metadata/datasources/WPforHGNC.rq b/A. Metadata/datasources/WPforHGNC.rq index 6d2b66f..a0cd6d8 100644 --- a/A. Metadata/datasources/WPforHGNC.rq +++ b/A. Metadata/datasources/WPforHGNC.rq @@ -1,4 +1,7 @@ -#List of WikiPathways for HGNC symbols +# title: WikiPathways for HGNC Symbols +# category: Data Sources +# description: Lists pathways containing gene products with HGNC symbol identifiers, +# showing the pathway title and extracted HGNC symbol. select distinct ?pathwayRes (str(?wpid) as ?pathway) (str(?title) as ?pathwayTitle) (fn:substring(?hgncId,37) as ?HGNC) where { ?gene a wp:GeneProduct ; diff --git a/A. Metadata/datasources/WPforHMDB.rq b/A. Metadata/datasources/WPforHMDB.rq index 800bf8f..0a1d18f 100644 --- a/A. Metadata/datasources/WPforHMDB.rq +++ b/A. Metadata/datasources/WPforHMDB.rq @@ -1,4 +1,7 @@ -#ist of WikiPathways for HMDB identifiers +# title: WikiPathways for HMDB Identifiers +# category: Data Sources +# description: Lists pathways containing metabolites with HMDB identifiers, showing +# the pathway title and extracted HMDB ID. select distinct ?pathwayRes (str(?wpid) as ?pathway) (str(?title) as ?pathwayTitle) (fn:substring(?hmdbId,29) as ?hmdb) where { ?gene a wp:Metabolite ; diff --git a/A. Metadata/datasources/WPforNCBI.rq b/A. Metadata/datasources/WPforNCBI.rq index 66a49f2..04dbec4 100644 --- a/A. Metadata/datasources/WPforNCBI.rq +++ b/A. Metadata/datasources/WPforNCBI.rq @@ -1,4 +1,7 @@ -#List of WikiPathways for NCBI Gene identifiers +# title: WikiPathways for NCBI Gene Identifiers +# category: Data Sources +# description: Lists pathways containing gene products with NCBI Gene identifiers, +# showing the pathway title and extracted NCBI Gene ID. select distinct ?pathwayRes (str(?wpid) as ?pathway) (str(?title) as ?pathwayTitle) (fn:substring(?ncbiGeneId,33) as ?NCBIGene) where { ?gene a wp:GeneProduct ; diff --git a/A. Metadata/datasources/WPforPubChemCID.rq b/A. Metadata/datasources/WPforPubChemCID.rq index f4055fc..138b660 100644 --- a/A. Metadata/datasources/WPforPubChemCID.rq +++ b/A. Metadata/datasources/WPforPubChemCID.rq @@ -1,11 +1,14 @@ -#List of WikiPathways for PubChem CID identifiers - -select distinct ?pathwayRes (str(?wpid) as ?pathway) (str(?title) as ?pathwayTitle) (fn:substring(?cid,46) as ?PubChem) where { - ?gene a wp:Metabolite ; - dcterms:identifier ?id ; - dcterms:isPartOf ?pathwayRes ; - wp:bdbPubChem ?cid . - ?pathwayRes a wp:Pathway ; - dcterms:identifier ?wpid ; - dc:title ?title . -} \ No newline at end of file +# title: WikiPathways for PubChem CID Identifiers +# category: Data Sources +# description: Lists pathways containing metabolites with PubChem compound identifiers, +# showing the pathway title and extracted PubChem CID. + +select distinct ?pathwayRes (str(?wpid) as ?pathway) (str(?title) as ?pathwayTitle) (fn:substring(?cid,46) as ?PubChem) where { + ?gene a wp:Metabolite ; + dcterms:identifier ?id ; + dcterms:isPartOf ?pathwayRes ; + wp:bdbPubChem ?cid . + ?pathwayRes a wp:Pathway ; + dcterms:identifier ?wpid ; + dc:title ?title . +} diff --git a/A. Metadata/linksets.rq b/A. Metadata/linksets.rq index 6d5d7a0..e5bca9f 100644 --- a/A. Metadata/linksets.rq +++ b/A. Metadata/linksets.rq @@ -1,3 +1,8 @@ +# title: Linksets Overview +# category: Metadata +# description: Returns all VoID linksets in the WikiPathways RDF with their title, +# creation date, and license information. + SELECT DISTINCT ?dataset (str(?titleLit) as ?title) ?date ?license WHERE { ?dataset a void:Linkset ; diff --git a/A. Metadata/metadata.rq b/A. Metadata/metadata.rq index 9c55307..713f667 100644 --- a/A. Metadata/metadata.rq +++ b/A. Metadata/metadata.rq @@ -1,7 +1,12 @@ +# title: Dataset Metadata +# category: Metadata +# description: Returns all VoID datasets in the WikiPathways RDF with their title, +# creation date, and license information. + SELECT DISTINCT ?dataset (str(?titleLit) as ?title) ?date ?license WHERE { ?dataset a void:Dataset ; dcterms:title ?titleLit ; dcterms:license ?license ; pav:createdOn ?date . -} \ No newline at end of file +} diff --git a/A. Metadata/prefixes.rq b/A. Metadata/prefixes.rq index e652d55..5ff94e0 100644 --- a/A. Metadata/prefixes.rq +++ b/A. Metadata/prefixes.rq @@ -1,3 +1,8 @@ +# title: SPARQL Prefixes +# category: Metadata +# description: Lists all namespace prefixes declared in the WikiPathways SPARQL +# endpoint via SHACL prefix declarations. + PREFIX sh: PREFIX xsd: @@ -6,4 +11,4 @@ SELECT ?prefix ?namespace WHERE { sh:prefix ?prefix ; sh:namespace ?namespace ] . -} \ No newline at end of file +} diff --git a/A. Metadata/species/PWsforSpecies.rq b/A. Metadata/species/PWsforSpecies.rq index 113ef8f..ddbeb0f 100644 --- a/A. Metadata/species/PWsforSpecies.rq +++ b/A. Metadata/species/PWsforSpecies.rq @@ -1,8 +1,14 @@ +# title: Pathways for a Species +# category: Metadata +# description: Lists all pathways for a given species, returning the WikiPathways +# identifier and page URL. Default species is Mus musculus. +# param: species | string | Homo sapiens | Species + SELECT DISTINCT ?wpIdentifier ?pathway ?page WHERE { ?pathway dc:title ?title . ?pathway foaf:page ?page . ?pathway dc:identifier ?wpIdentifier . - ?pathway wp:organismName "Mus musculus" . #Replace "Mus musculus" with other species: "Homo sapiens", "Rattus norvegicus", "Danio rerio" + ?pathway wp:organismName "{{species}}" . } ORDER BY ?wpIdentifier diff --git a/A. Metadata/species/countDataNodePerSpecies.rq b/A. Metadata/species/countDataNodePerSpecies.rq index 97aea3f..6b0b896 100644 --- a/A. Metadata/species/countDataNodePerSpecies.rq +++ b/A. Metadata/species/countDataNodePerSpecies.rq @@ -1,3 +1,8 @@ +# title: Data Nodes per Species +# category: Metadata +# description: Counts the number of distinct data nodes per species in WikiPathways, +# ordered by count descending. + select (count(distinct ?datanode) as ?count) (str(?label) as ?species) where { ?datanode a wp:DataNode ; dcterms:isPartOf ?pw . diff --git a/A. Metadata/species/countGeneProductsPerSpecies.rq b/A. Metadata/species/countGeneProductsPerSpecies.rq index 33fe557..6aa6f4c 100644 --- a/A. Metadata/species/countGeneProductsPerSpecies.rq +++ b/A. Metadata/species/countGeneProductsPerSpecies.rq @@ -1,3 +1,8 @@ +# title: Gene Products per Species +# category: Metadata +# description: Counts the number of distinct gene products per species in WikiPathways, +# ordered by count descending. + select (count(distinct ?gene) as ?count) (str(?label) as ?species) where { ?gene a wp:GeneProduct ; dcterms:isPartOf ?pw . diff --git a/A. Metadata/species/countMetabolitesPerSpecies.rq b/A. Metadata/species/countMetabolitesPerSpecies.rq index 3897da6..4ed0c82 100644 --- a/A. Metadata/species/countMetabolitesPerSpecies.rq +++ b/A. Metadata/species/countMetabolitesPerSpecies.rq @@ -1,3 +1,8 @@ +# title: Metabolites per Species +# category: Metadata +# description: Counts the number of distinct metabolites per species in WikiPathways, +# ordered by count descending. + select (str(?label) as ?species) (count(distinct ?metabolite) as ?count) where { ?metabolite a wp:Metabolite ; dcterms:isPartOf ?pw . diff --git a/A. Metadata/species/countPathwaysPerSpecies.rq b/A. Metadata/species/countPathwaysPerSpecies.rq index 7300184..f495d1b 100644 --- a/A. Metadata/species/countPathwaysPerSpecies.rq +++ b/A. Metadata/species/countPathwaysPerSpecies.rq @@ -1,3 +1,8 @@ +# title: Pathways per Species +# category: Metadata +# description: Counts the number of pathways per species in WikiPathways, returning +# the species name, organism URI, and pathway count. + SELECT DISTINCT (str(?label) as ?name) ?organism (count(?pw) as ?pathwayCount) WHERE { ?pw dc:title ?title ; diff --git a/A. Metadata/species/countProteinsPerSpecies.rq b/A. Metadata/species/countProteinsPerSpecies.rq index 11b912a..9fc7bf0 100644 --- a/A. Metadata/species/countProteinsPerSpecies.rq +++ b/A. Metadata/species/countProteinsPerSpecies.rq @@ -1,3 +1,8 @@ +# title: Proteins per Species +# category: Metadata +# description: Counts the number of distinct proteins per species in WikiPathways, +# ordered by count descending. + select (count(distinct ?protein) as ?count) (str(?label) as ?species) where { ?protein a wp:Protein ; dcterms:isPartOf ?pw . diff --git a/B. Communities/AOP/allPathways.rq b/B. Communities/AOP/allPathways.rq index e9d9a35..175b357 100644 --- a/B. Communities/AOP/allPathways.rq +++ b/B. Communities/AOP/allPathways.rq @@ -1,3 +1,7 @@ +# title: AOP Community Pathways +# category: Communities +# description: Lists all pathways tagged with the AOP community curation tag. + PREFIX wp: PREFIX dc: PREFIX cur: @@ -7,4 +11,4 @@ WHERE { ?pathway wp:ontologyTag cur:AOP ; a wp:Pathway ; dc:title ?title . -} \ No newline at end of file +} diff --git a/B. Communities/AOP/allProteins.rq b/B. Communities/AOP/allProteins.rq index 2cc9987..6e02f2f 100644 --- a/B. Communities/AOP/allProteins.rq +++ b/B. Communities/AOP/allProteins.rq @@ -1,3 +1,7 @@ +# title: AOP Community Proteins +# category: Communities +# description: Lists all proteins found in AOP community pathways. + SELECT DISTINCT ?pathway (str(?label) as ?Protein) WHERE { ?pathway wp:ontologyTag cur:AOP ; diff --git a/B. Communities/CIRM Stem Cell Pathways/allPathways.rq b/B. Communities/CIRM Stem Cell Pathways/allPathways.rq index 8f9752c..cfecf50 100644 --- a/B. Communities/CIRM Stem Cell Pathways/allPathways.rq +++ b/B. Communities/CIRM Stem Cell Pathways/allPathways.rq @@ -1,3 +1,7 @@ +# title: CIRM Stem Cell Pathways +# category: Communities +# description: Lists all pathways tagged with the CIRM Stem Cell community curation tag. + SELECT DISTINCT ?pathway (str(?title) as ?PathwayTitle) WHERE { ?pathway wp:ontologyTag cur:CIRM_Related ; diff --git a/B. Communities/CIRM Stem Cell Pathways/allProteins.rq b/B. Communities/CIRM Stem Cell Pathways/allProteins.rq index 367a6c7..16e021b 100644 --- a/B. Communities/CIRM Stem Cell Pathways/allProteins.rq +++ b/B. Communities/CIRM Stem Cell Pathways/allProteins.rq @@ -1,3 +1,7 @@ +# title: CIRM Stem Cell Proteins +# category: Communities +# description: Lists all proteins found in CIRM Stem Cell community pathways. + SELECT DISTINCT ?pathway (str(?label) as ?Protein) WHERE { ?pathway wp:ontologyTag cur:CIRM_Related ; diff --git a/B. Communities/COVID19/allPathways.rq b/B. Communities/COVID19/allPathways.rq index 5088812..9dc1e50 100644 --- a/B. Communities/COVID19/allPathways.rq +++ b/B. Communities/COVID19/allPathways.rq @@ -1,3 +1,7 @@ +# title: COVID-19 Community Pathways +# category: Communities +# description: Lists all pathways tagged with the COVID-19 community curation tag. + SELECT DISTINCT ?pathway (str(?title) as ?PathwayTitle) WHERE { ?pathway wp:ontologyTag cur:COVID19 ; diff --git a/B. Communities/COVID19/allProteins.rq b/B. Communities/COVID19/allProteins.rq index e576ae1..bddb677 100644 --- a/B. Communities/COVID19/allProteins.rq +++ b/B. Communities/COVID19/allProteins.rq @@ -1,3 +1,7 @@ +# title: COVID-19 Community Proteins +# category: Communities +# description: Lists all proteins found in COVID-19 community pathways. + SELECT DISTINCT ?pathway (str(?label) as ?Protein) WHERE { ?pathway wp:ontologyTag cur:COVID19 ; diff --git a/B. Communities/Inborn Errors of Metabolism/allMetabolicPWs.rq b/B. Communities/Inborn Errors of Metabolism/allMetabolicPWs.rq index ea5dd27..69e40db 100644 --- a/B. Communities/Inborn Errors of Metabolism/allMetabolicPWs.rq +++ b/B. Communities/Inborn Errors of Metabolism/allMetabolicPWs.rq @@ -1,3 +1,8 @@ +# title: Inborn Errors of Metabolism Metabolic Pathways +# category: Communities +# description: Retrieves pathways classified under metabolic pathway ontology terms, +# filtering by label to find metabolic pathway annotations. + SELECT distinct ?pathway ?label ?tag WHERE { ?tag1 a owl:Class ; diff --git a/B. Communities/Inborn Errors of Metabolism/allPathways.rq b/B. Communities/Inborn Errors of Metabolism/allPathways.rq index 0dc3ac8..fc60d19 100644 --- a/B. Communities/Inborn Errors of Metabolism/allPathways.rq +++ b/B. Communities/Inborn Errors of Metabolism/allPathways.rq @@ -1,3 +1,7 @@ +# title: Inborn Errors of Metabolism Pathways +# category: Communities +# description: Lists all pathways tagged with the Inborn Errors of Metabolism (IEM) community curation tag. + SELECT DISTINCT ?pathway (str(?title) as ?PathwayTitle) WHERE { ?pathway wp:ontologyTag cur:IEM ; diff --git a/B. Communities/Inborn Errors of Metabolism/allProteins.rq b/B. Communities/Inborn Errors of Metabolism/allProteins.rq index f5f0bb2..0fdfdb8 100644 --- a/B. Communities/Inborn Errors of Metabolism/allProteins.rq +++ b/B. Communities/Inborn Errors of Metabolism/allProteins.rq @@ -1,3 +1,7 @@ +# title: Inborn Errors of Metabolism Proteins +# category: Communities +# description: Lists all proteins found in Inborn Errors of Metabolism (IEM) community pathways. + SELECT DISTINCT ?pathway (str(?label) as ?Protein) WHERE { ?pathway wp:ontologyTag cur:IEM ; diff --git a/B. Communities/Inborn Errors of Metabolism/countMetabolicPWs.rq b/B. Communities/Inborn Errors of Metabolism/countMetabolicPWs.rq index 03f8ffe..b1c7317 100644 --- a/B. Communities/Inborn Errors of Metabolism/countMetabolicPWs.rq +++ b/B. Communities/Inborn Errors of Metabolism/countMetabolicPWs.rq @@ -1,3 +1,8 @@ +# title: Count of IEM Metabolic Pathways +# category: Communities +# description: Counts the total number of pathways classified under metabolic pathway +# ontology terms. + SELECT count(distinct ?pathway) as ?pathwaycount WHERE { ?tag1 a owl:Class ; diff --git a/B. Communities/Inborn Errors of Metabolism/countProteinsMetabolitesRheaDiseases.rq b/B. Communities/Inborn Errors of Metabolism/countProteinsMetabolitesRheaDiseases.rq index e2b25f1..fa1ee61 100644 --- a/B. Communities/Inborn Errors of Metabolism/countProteinsMetabolitesRheaDiseases.rq +++ b/B. Communities/Inborn Errors of Metabolism/countProteinsMetabolitesRheaDiseases.rq @@ -1,4 +1,9 @@ -#Prefixes required which might not be available in the SPARQL endpoint by default +# title: IEM Proteins, Metabolites, Rhea, and Diseases +# category: Communities +# description: Summarizes IEM community pathways with counts of proteins, metabolites, +# Rhea reaction annotations, missing Rhea IDs, and linked OMIM disease identifiers +# per pathway. + PREFIX wp: PREFIX rdfs: PREFIX dcterms: diff --git a/B. Communities/Lipids/LIPIDMAPS_Federated.rq b/B. Communities/Lipids/LIPIDMAPS_Federated.rq index d993ac5..8522a60 100644 --- a/B. Communities/Lipids/LIPIDMAPS_Federated.rq +++ b/B. Communities/Lipids/LIPIDMAPS_Federated.rq @@ -1,4 +1,9 @@ -#Pathways describing the biology of oxygenated hydrocarbons (LMFA12) +# title: LIPID MAPS Federated Query +# category: Communities +# description: Retrieves lipid names, formulas, and associated pathways for a specific +# LIPID MAPS category by querying the LIPID MAPS SPARQL endpoint. May be slower due to +# external endpoint dependency. + PREFIX chebi: SELECT ?lipid ?name ?formula ?lmid (GROUP_CONCAT(?wpid_;separator=", ") AS ?pathway) diff --git a/B. Communities/Lipids/LipidClassesTotal.rq b/B. Communities/Lipids/LipidClassesTotal.rq index e195239..33530b1 100644 --- a/B. Communities/Lipids/LipidClassesTotal.rq +++ b/B. Communities/Lipids/LipidClassesTotal.rq @@ -1,11 +1,19 @@ +# title: Total Lipid Classes +# category: Communities +# description: Counts the number of individual lipids in a specific LIPID MAPS subclass +# across human pathways. Change the FILTER value to query different subclasses (FA, GL, +# GP, SP, ST, PR, SL, PK). +# param: species | string | Homo sapiens | Species +# param: lipidClass | enum:FA,GL,GP,SP,ST,PR,SL,PK | FA | LIPID MAPS Class + SELECT count(DISTINCT ?lipidID) as ?IndividualLipidsPerClass WHERE { ?metabolite a wp:Metabolite ; dcterms:identifier ?id ; dcterms:isPartOf ?pathwayRes ; wp:bdbLipidMaps ?lipidID . #Metabolite DataNodes need to have a LIPID MAPS ID, for this query to count correctly (some lipids might be missed due to missing Xrefs) ?pathwayRes a wp:Pathway ; - wp:organismName "Homo sapiens"; #Filter for a species (ommit when querying all pathways available for all species) + wp:organismName "{{species}}"; #Filter for a species (ommit when querying all pathways available for all species) dcterms:identifier ?wpid ; dc:title ?title . - FILTER regex(str(?lipidID), "FA" ). #Filter for a LIPID MAPS ID subclass: 'FA' Fatty Acids ; 'GL' Glycerolipid ; 'GP' Glycerophospholipid ; 'SP' Sphingolipids ; 'ST' Sterol lipids ; 'PR' Prenol Lipids ; 'SL' Saccharolipids ; 'PK' Polyketides + FILTER regex(str(?lipidID), "{{lipidClass}}" ). #Filter for a LIPID MAPS ID subclass: 'FA' Fatty Acids ; 'GL' Glycerolipid ; 'GP' Glycerophospholipid ; 'SP' Sphingolipids ; 'ST' Sterol lipids ; 'PR' Prenol Lipids ; 'SL' Saccharolipids ; 'PK' Polyketides } diff --git a/B. Communities/Lipids/LipidsClassesCountPerPathway.rq b/B. Communities/Lipids/LipidsClassesCountPerPathway.rq index 63601ac..b249fa7 100644 --- a/B. Communities/Lipids/LipidsClassesCountPerPathway.rq +++ b/B. Communities/Lipids/LipidsClassesCountPerPathway.rq @@ -1,13 +1,20 @@ +# title: Lipid Classes Count per Pathway +# category: Communities +# description: Counts the number of lipids in a specific LIPID MAPS subclass per human +# pathway, ordered by count. Change the FILTER value to query different subclasses. +# param: species | string | Homo sapiens | Species +# param: lipidClass | enum:FA,GL,GP,SP,ST,PR,SL,PK | FA | LIPID MAPS Class + SELECT DISTINCT ?pathwayRes (str(?wpid) AS ?pathway) (str(?title) AS ?pathwayTitle) (count(DISTINCT ?lipidID) AS ?Class_LipidsInPWs) WHERE { ?metabolite a wp:Metabolite ; dcterms:identifier ?id ; dcterms:isPartOf ?pathwayRes ; wp:bdbLipidMaps ?lipidID . #Metabolite DataNodes need to have a LIPID MAPS ID, for this query to count correctly (some lipids might be missed due to missing Xrefs) ?pathwayRes a wp:Pathway ; - wp:organismName "Homo sapiens" ; #Filter for a species (ommit when querying all pathways available for all species) + wp:organismName "{{species}}" ; #Filter for a species (ommit when querying all pathways available for all species) dcterms:identifier ?wpid ; dc:title ?title . - FILTER regex(str(?lipidID), "FA" ). #Filter for a LIPID MAPS ID subclass: 'FA' Fatty Acids ; 'GL' Glycerolipid ; 'GP' Glycerophospholipid ; 'SP' Sphingolipids ; 'ST' Sterol lipids ; 'PR' Prenol Lipids ; 'SL' Saccharolipids ; 'PK' Polyketides + FILTER regex(str(?lipidID), "{{lipidClass}}" ). #Filter for a LIPID MAPS ID subclass: 'FA' Fatty Acids ; 'GL' Glycerolipid ; 'GP' Glycerophospholipid ; 'SP' Sphingolipids ; 'ST' Sterol lipids ; 'PR' Prenol Lipids ; 'SL' Saccharolipids ; 'PK' Polyketides } ORDER BY DESC(?Class_LipidsInPWs) diff --git a/B. Communities/Lipids/LipidsCountPerPathway.rq b/B. Communities/Lipids/LipidsCountPerPathway.rq index 75f5406..5ea0466 100644 --- a/B. Communities/Lipids/LipidsCountPerPathway.rq +++ b/B. Communities/Lipids/LipidsCountPerPathway.rq @@ -1,3 +1,9 @@ +# title: Lipids Count per Pathway +# category: Communities +# description: Counts the total number of lipids with LIPID MAPS identifiers per human +# pathway, ordered by count. +# param: species | string | Homo sapiens | Species + prefix lipidmaps: #IRI can be used to create URLs from identifiers in line 7 select distinct ?pathwayRes (str(?wpid) as ?pathway) (str(?title) as ?pathwayTitle) (count(distinct ?lipidID) AS ?LipidsInPWs) where { @@ -6,7 +12,7 @@ where { dcterms:isPartOf ?pathwayRes ; #Define metabolites are part of a pathway wp:bdbLipidMaps ?lipidID . #Find the LIPID MAPS identifier for a certain metabolite ?pathwayRes a wp:Pathway ; #Define what is a pathway - wp:organismName "Homo sapiens" ; #Filter pathways on species Human + wp:organismName "{{species}}" ; #Filter pathways on species Human dcterms:identifier ?wpid ; #Obtain identifier of pathway dc:title ?title . #Obtain title of pathway } diff --git a/B. Communities/Lipids/allPathways.rq b/B. Communities/Lipids/allPathways.rq index 8db0ced..9c9042d 100644 --- a/B. Communities/Lipids/allPathways.rq +++ b/B. Communities/Lipids/allPathways.rq @@ -1,3 +1,7 @@ +# title: Lipids Community Pathways +# category: Communities +# description: Lists all pathways tagged with the Lipids community curation tag. + SELECT DISTINCT ?pathway (str(?title) as ?PathwayTitle) WHERE { ?pathway wp:ontologyTag cur:Lipids ; diff --git a/B. Communities/Lipids/allProteins.rq b/B. Communities/Lipids/allProteins.rq index 0d68bbd..dee48f6 100644 --- a/B. Communities/Lipids/allProteins.rq +++ b/B. Communities/Lipids/allProteins.rq @@ -1,3 +1,7 @@ +# title: Lipids Community Proteins +# category: Communities +# description: Lists all proteins found in Lipids community pathways. + SELECT DISTINCT ?pathway (str(?label) as ?Protein) WHERE { ?pathway wp:ontologyTag cur:Lipids ; diff --git a/B. Communities/RareDiseases/allPathways.rq b/B. Communities/RareDiseases/allPathways.rq index d00228f..b2f2568 100644 --- a/B. Communities/RareDiseases/allPathways.rq +++ b/B. Communities/RareDiseases/allPathways.rq @@ -1,3 +1,7 @@ +# title: Rare Diseases Community Pathways +# category: Communities +# description: Lists all pathways tagged with the Rare Diseases community curation tag. + SELECT DISTINCT ?pathway (str(?title) as ?PathwayTitle) WHERE { ?pathway wp:ontologyTag cur:RareDiseases ; diff --git a/B. Communities/RareDiseases/allProteins.rq b/B. Communities/RareDiseases/allProteins.rq index 7d15f83..5dfc8f2 100644 --- a/B. Communities/RareDiseases/allProteins.rq +++ b/B. Communities/RareDiseases/allProteins.rq @@ -1,3 +1,7 @@ +# title: Rare Diseases Community Proteins +# category: Communities +# description: Lists all proteins found in Rare Diseases community pathways. + SELECT DISTINCT ?pathway (str(?label) as ?Protein) WHERE { ?pathway wp:ontologyTag cur:RareDiseases ; diff --git a/B. Communities/Reactome/getPathways.rq b/B. Communities/Reactome/getPathways.rq index be5611b..c8f7b19 100644 --- a/B. Communities/Reactome/getPathways.rq +++ b/B. Communities/Reactome/getPathways.rq @@ -1,3 +1,7 @@ +# title: Reactome Pathways +# category: Communities +# description: Lists all pathways tagged with the Reactome Approved curation tag. + SELECT DISTINCT ?pathway (str(?titleLit) as ?title) WHERE { ?pathway wp:ontologyTag cur:Reactome_Approved ; diff --git a/B. Communities/Reactome/refsReactomeAndWP.rq b/B. Communities/Reactome/refsReactomeAndWP.rq index 6e2f146..e4a49a2 100644 --- a/B. Communities/Reactome/refsReactomeAndWP.rq +++ b/B. Communities/Reactome/refsReactomeAndWP.rq @@ -1,3 +1,8 @@ +# title: References in Both Reactome and WikiPathways +# category: Communities +# description: Counts publication references that appear in both Reactome-approved and +# WikiPathways Analysis Collection pathways. + SELECT (COUNT(DISTINCT ?pubmed) AS ?count) WHERE { ?pubmed a wp:PublicationReference . diff --git a/B. Communities/Reactome/refsReactomeNotWP.rq b/B. Communities/Reactome/refsReactomeNotWP.rq index 9ea9796..ea3b1f5 100644 --- a/B. Communities/Reactome/refsReactomeNotWP.rq +++ b/B. Communities/Reactome/refsReactomeNotWP.rq @@ -1,3 +1,8 @@ +# title: References in Reactome but Not WikiPathways +# category: Communities +# description: Counts publication references found in Reactome-approved pathways but not +# in the WikiPathways Analysis Collection. + SELECT (COUNT(DISTINCT ?pubmed) AS ?count) WHERE { ?pubmed a wp:PublicationReference . diff --git a/B. Communities/Reactome/refsWPNotReactome.rq b/B. Communities/Reactome/refsWPNotReactome.rq index 380e272..e59578a 100644 --- a/B. Communities/Reactome/refsWPNotReactome.rq +++ b/B. Communities/Reactome/refsWPNotReactome.rq @@ -1,3 +1,8 @@ +# title: References in WikiPathways but Not Reactome +# category: Communities +# description: Counts publication references found in the WikiPathways Analysis Collection +# but not in Reactome-approved pathways. + SELECT (COUNT(DISTINCT ?pubmed) AS ?count) WHERE { ?pubmed a wp:PublicationReference . diff --git a/B. Communities/WormBase/allPathways.rq b/B. Communities/WormBase/allPathways.rq index 36082c6..c9cb6f7 100644 --- a/B. Communities/WormBase/allPathways.rq +++ b/B. Communities/WormBase/allPathways.rq @@ -1,3 +1,7 @@ +# title: WormBase Community Pathways +# category: Communities +# description: Lists all pathways tagged with the WormBase Approved community curation tag. + SELECT DISTINCT ?pathway (str(?title) as ?PathwayTitle) WHERE { ?pathway wp:ontologyTag cur:WormBase_Approved ; diff --git a/B. Communities/WormBase/allProteins.rq b/B. Communities/WormBase/allProteins.rq index 0239f7a..2384a06 100644 --- a/B. Communities/WormBase/allProteins.rq +++ b/B. Communities/WormBase/allProteins.rq @@ -1,3 +1,7 @@ +# title: WormBase Community Proteins +# category: Communities +# description: Lists all proteins found in WormBase Approved community pathways. + SELECT DISTINCT ?pathway (str(?label) as ?Protein) WHERE { ?pathway wp:ontologyTag cur:WormBase_Approved ; diff --git a/C. Collaborations/AOP-Wiki/MetaboliteInAOP-Wiki.rq b/C. Collaborations/AOP-Wiki/MetaboliteInAOP-Wiki.rq index 8fe9714..fd77d30 100644 --- a/C. Collaborations/AOP-Wiki/MetaboliteInAOP-Wiki.rq +++ b/C. Collaborations/AOP-Wiki/MetaboliteInAOP-Wiki.rq @@ -1,10 +1,17 @@ -PREFIX aopo: -PREFIX cheminf: +# title: Metabolites in AOP-Wiki +# category: Collaborations +# description: Finds metabolites in human pathways that are linked to stressors in +# AOP-Wiki by querying the AOP-Wiki SPARQL endpoint via ChEBI identifiers. May be +# slower due to external endpoint dependency. +# param: species | string | Homo sapiens | Species -SELECT DISTINCT (str(?title) as ?pathwayName) ?chemical ?ChEBI ?ChemicalName ?mappedid ?LinkedStressor +PREFIX aopo: +PREFIX cheminf: + +SELECT DISTINCT (str(?title) as ?pathwayName) ?chemical ?ChEBI ?ChemicalName ?mappedid ?LinkedStressor WHERE { - ?pathway a wp:Pathway ; wp:organismName "Homo sapiens"; dcterms:identifier ?WPID ; dc:title ?title . + ?pathway a wp:Pathway ; wp:organismName "{{species}}"; dcterms:identifier ?WPID ; dc:title ?title . ?chemical a wp:Metabolite; dcterms:isPartOf ?pathway; wp:bdbChEBI ?mappedid . SERVICE { ?mappedid a cheminf:000407; cheminf:000407 ?ChEBI . diff --git a/C. Collaborations/MetaNetX/reactionID_mapping.rq b/C. Collaborations/MetaNetX/reactionID_mapping.rq index a356ffa..019e648 100644 --- a/C. Collaborations/MetaNetX/reactionID_mapping.rq +++ b/C. Collaborations/MetaNetX/reactionID_mapping.rq @@ -1,4 +1,9 @@ -#Prefixes required which might not be available in the SPARQL endpoint by default +# title: MetaNetX Reaction ID Mapping +# category: Collaborations +# description: Maps Rhea reaction IDs from a WikiPathways pathway to MetaNetX reaction +# identifiers by querying the MetaNetX SPARQL endpoint. May be slower due to external +# endpoint dependency. + PREFIX wp: PREFIX rdfs: PREFIX dcterms: diff --git a/C. Collaborations/MolMeDB/ONEpubchem_MANYpathways.rq b/C. Collaborations/MolMeDB/ONEpubchem_MANYpathways.rq index 13f6ae9..fbf7c49 100644 --- a/C. Collaborations/MolMeDB/ONEpubchem_MANYpathways.rq +++ b/C. Collaborations/MolMeDB/ONEpubchem_MANYpathways.rq @@ -1,3 +1,10 @@ +# title: Pathways for a PubChem Compound (MolMeDB) +# category: Collaborations +# description: Finds all human pathways containing a specific MolMeDB compound by +# resolving its PubChem identifier through the MolMeDB SPARQL endpoint. May be slower +# due to external endpoint dependency. +# param: species | string | Homo sapiens | Species + SELECT DISTINCT ?pathwayRes (str(?wpid) as ?pathway) (str(?title) as ?pathwayTitle) ((substr(str(?COMPOUND),46)) as ?PubChem) WHERE { SERVICE { @@ -11,7 +18,7 @@ SELECT DISTINCT ?pathwayRes (str(?wpid) as ?pathway) (str(?title) as ?pathwayTit wp:bdbPubChem ?COMPOUND . ?pathwayRes a wp:Pathway ; - wp:organismName "Homo sapiens"; + wp:organismName "{{species}}"; dcterms:identifier ?wpid ; dc:title ?title . } diff --git a/C. Collaborations/MolMeDB/SUBSETpathways_ONEpubchem.rq b/C. Collaborations/MolMeDB/SUBSETpathways_ONEpubchem.rq index 9f6e1fd..91b71c0 100644 --- a/C. Collaborations/MolMeDB/SUBSETpathways_ONEpubchem.rq +++ b/C. Collaborations/MolMeDB/SUBSETpathways_ONEpubchem.rq @@ -1,3 +1,10 @@ +# title: PubChem Compound in Pathway Subset (MolMeDB) +# category: Collaborations +# description: Checks a subset of pathways for the presence of a specific MolMeDB +# compound by querying the MolMeDB SPARQL endpoint. Uses nested federation with both +# MolMeDB and WikiPathways endpoints. May be slower due to external endpoint dependency. +# param: species | string | Homo sapiens | Species + SELECT DISTINCT ?pathwayRes (str(?wpid) as ?pathway) (str(?title) as ?pathwayTitle) ((substr(str(?COMPOUND),46)) as ?PubChem) WHERE { SERVICE { SERVICE { @@ -9,7 +16,7 @@ SELECT DISTINCT ?pathwayRes (str(?wpid) as ?pathway) (str(?title) as ?pathwayTit wp:bdbPubChem ?COMPOUND . ?pathwayRes a wp:Pathway ; - wp:organismName "Homo sapiens" ; + wp:organismName "{{species}}" ; dcterms:identifier ?wpid ; dc:title ?title . } diff --git a/C. Collaborations/neXtProt/ProteinCellularLocation.rq b/C. Collaborations/neXtProt/ProteinCellularLocation.rq index 85ec675..56eb06f 100644 --- a/C. Collaborations/neXtProt/ProteinCellularLocation.rq +++ b/C. Collaborations/neXtProt/ProteinCellularLocation.rq @@ -1,3 +1,9 @@ +# title: Protein Cellular Location via neXtProt +# category: Collaborations +# description: Retrieves subcellular locations for proteins in Rett syndrome pathways +# by querying the neXtProt SPARQL endpoint for gold-quality cellular component +# annotations. May be slower due to external endpoint dependency. + PREFIX : select distinct ?pathwayname ?entry str(?gen) (group_concat(distinct str(?loclab); SEPARATOR = ",") as ?locations) where { {?geneProduct a wp:Protein} diff --git a/C. Collaborations/neXtProt/ProteinMitochondria.rq b/C. Collaborations/neXtProt/ProteinMitochondria.rq index 2bf6379..799cd95 100644 --- a/C. Collaborations/neXtProt/ProteinMitochondria.rq +++ b/C. Collaborations/neXtProt/ProteinMitochondria.rq @@ -1,3 +1,9 @@ +# title: Mitochondrial Proteins via neXtProt +# category: Collaborations +# description: Identifies mitochondrial proteins in Rett syndrome pathways by querying +# the neXtProt SPARQL endpoint for gold-quality mitochondrion localization annotations. +# May be slower due to external endpoint dependency. + PREFIX : PREFIX cv: diff --git a/C. Collaborations/smallMolecules_Rhea_IDSM/molecularSimularity_Reactions.rq b/C. Collaborations/smallMolecules_Rhea_IDSM/molecularSimularity_Reactions.rq index c2e632a..4187157 100644 --- a/C. Collaborations/smallMolecules_Rhea_IDSM/molecularSimularity_Reactions.rq +++ b/C. Collaborations/smallMolecules_Rhea_IDSM/molecularSimularity_Reactions.rq @@ -1,3 +1,9 @@ +# title: Molecular Similarity Reactions via Rhea and IDSM +# category: Collaborations +# description: Finds structurally similar compounds for reaction sources and targets in a +# pathway by querying the IDSM structure search service for molecular similarity. May be +# slower due to external endpoint dependency. + PREFIX owl: PREFIX ebi: PREFIX sachem: diff --git a/D. General/GenesofPathway.rq b/D. General/GenesofPathway.rq index f040b00..5d5e7ca 100644 --- a/D. General/GenesofPathway.rq +++ b/D. General/GenesofPathway.rq @@ -1,8 +1,14 @@ +# title: Genes of a Pathway +# category: General +# description: Lists all gene products in a given pathway, returning the pathway +# identifier and gene product labels. +# param: pathwayId | string | WP1560 | Pathway ID + select distinct ?pathway (str(?label) as ?geneProduct) where { - ?geneProduct a wp:GeneProduct . + ?geneProduct a wp:GeneProduct . ?geneProduct rdfs:label ?label . ?geneProduct dcterms:isPartOf ?pathwayRev . ?pathwayRev a wp:Pathway . ?pathwayRev dc:identifier ?pathway . - ?pathwayRev dcterms:identifier "WP1560" . #Replace "WP1560" with WP ID of interest + ?pathwayRev dcterms:identifier "{{pathwayId}}" . } diff --git a/D. General/InteractionsofPathway.rq b/D. General/InteractionsofPathway.rq index cf65977..6e61a67 100644 --- a/D. General/InteractionsofPathway.rq +++ b/D. General/InteractionsofPathway.rq @@ -1,8 +1,14 @@ +# title: Interactions of a Pathway +# category: General +# description: Returns all interactions in a given pathway along with the +# participating data nodes and their labels. +# param: pathwayId | string | WP1425 | Pathway ID + SELECT DISTINCT ?pathway ?interaction ?participants ?DataNodeLabel WHERE { ?pathway a wp:Pathway ; - dc:identifier . + dc:identifier . ?interaction dcterms:isPartOf ?pathway ; a wp:Interaction ; wp:participants ?participants . diff --git a/D. General/MetabolitesofPathway.rq b/D. General/MetabolitesofPathway.rq index f4f2497..cbf2e27 100644 --- a/D. General/MetabolitesofPathway.rq +++ b/D. General/MetabolitesofPathway.rq @@ -1,7 +1,13 @@ +# title: Metabolites of a Pathway +# category: General +# description: Lists all metabolites in a given pathway, returning the pathway +# identifier and metabolite labels. +# param: pathwayId | string | WP1560 | Pathway ID + select distinct ?pathway (str(?label) as ?Metabolite) where { - ?Metabolite a wp:Metabolite ; + ?Metabolite a wp:Metabolite ; rdfs:label ?label ; dcterms:isPartOf ?pathway . ?pathway a wp:Pathway ; - dcterms:identifier "WP1560" . #Replace "WP1560" with WP ID of interest + dcterms:identifier "{{pathwayId}}" . } diff --git a/D. General/OntologyofPathway.rq b/D. General/OntologyofPathway.rq index f4a715f..9dba2e0 100644 --- a/D. General/OntologyofPathway.rq +++ b/D. General/OntologyofPathway.rq @@ -1,9 +1,15 @@ -SELECT (?o as ?pwOntologyTerm) (str(?titleLit) as ?title) ?pathway +# title: Ontology Terms of a Pathway +# category: General +# description: Retrieves all ontology tags associated with a given pathway, +# returning the ontology term URI, pathway title, and identifier. +# param: pathwayId | string | WP1560 | Pathway ID + +SELECT (?o as ?pwOntologyTerm) (str(?titleLit) as ?title) ?pathway WHERE { ?pathwayRDF wp:ontologyTag ?o ; dc:identifier ?pathway ; dc:title ?titleLit ; - dcterms:identifier "WP1560" . #Replace "WP1560" with WP ID of interest + dcterms:identifier "{{pathwayId}}" . FILTER (! regex(str(?pathway), "group")) } diff --git a/E. Literature/allPathwayswithPubMed.rq b/E. Literature/allPathwayswithPubMed.rq index 1716dee..76e6299 100644 --- a/E. Literature/allPathwayswithPubMed.rq +++ b/E. Literature/allPathwayswithPubMed.rq @@ -1,6 +1,11 @@ -SELECT DISTINCT ?pathway ?pubmed -WHERE - {?pubmed a wp:PublicationReference . +# title: All Pathways with PubMed References +# category: Literature +# description: Lists pathways that have associated PubMed publication references, +# returning pathway and PubMed identifiers ordered by pathway. + +SELECT DISTINCT ?pathway ?pubmed +WHERE + {?pubmed a wp:PublicationReference . ?pubmed dcterms:isPartOf ?pathway } ORDER BY ?pathway LIMIT 50 diff --git a/E. Literature/allReferencesForInteraction.rq b/E. Literature/allReferencesForInteraction.rq index b44b619..f4af000 100644 --- a/E. Literature/allReferencesForInteraction.rq +++ b/E. Literature/allReferencesForInteraction.rq @@ -1,6 +1,13 @@ +# title: All References for an Interaction +# category: Literature +# description: Returns all publication references for interactions in a given +# pathway, including references attached to both the interaction itself and its +# participating data nodes. +# param: pathwayId | string | WP5200 | Pathway ID + SELECT DISTINCT ?pathway ?interaction ?pubmed ?partnerref WHERE { ?pathway a wp:Pathway ; - dc:identifier . + dc:identifier . ?interaction dcterms:isPartOf ?pathway ; a wp:Interaction ; wp:participants ?partner; diff --git a/E. Literature/countRefsPerPW.rq b/E. Literature/countRefsPerPW.rq index 95a6891..c014b65 100644 --- a/E. Literature/countRefsPerPW.rq +++ b/E. Literature/countRefsPerPW.rq @@ -1,5 +1,10 @@ +# title: Reference Count per Pathway +# category: Literature +# description: Counts the number of PubMed publication references per pathway, +# sorted by descending reference count. + SELECT DISTINCT ?pathway COUNT(?pubmed) AS ?numberOfReferences -WHERE - {?pubmed a wp:PublicationReference . +WHERE + {?pubmed a wp:PublicationReference . ?pubmed dcterms:isPartOf ?pathway } -ORDER BY DESC(?numberOfReferences) +ORDER BY DESC(?numberOfReferences) diff --git a/E. Literature/referencesForInteraction.rq b/E. Literature/referencesForInteraction.rq index 64ab62c..2e37acb 100644 --- a/E. Literature/referencesForInteraction.rq +++ b/E. Literature/referencesForInteraction.rq @@ -1,8 +1,14 @@ +# title: References for an Interaction +# category: Literature +# description: Returns publication references directly attached to interactions in a +# given pathway, along with the participating data node labels. +# param: pathwayId | string | WP5200 | Pathway ID + SELECT DISTINCT ?pathway ?interaction ?pubmed WHERE { ?pathway a wp:Pathway ; - dc:identifier . #filter for one pathway + dc:identifier . #filter for one pathway ?interaction dcterms:isPartOf ?pathway ; a wp:Interaction ; dcterms:references ?pubmed ; diff --git a/E. Literature/referencesForSpecificInteraction.rq b/E. Literature/referencesForSpecificInteraction.rq index 3d3aaff..8237dfa 100644 --- a/E. Literature/referencesForSpecificInteraction.rq +++ b/E. Literature/referencesForSpecificInteraction.rq @@ -1,8 +1,15 @@ +# title: References for a Specific Interaction +# category: Literature +# description: Returns publication references for a single interaction identified by +# both a pathway and a specific participant URI. +# param: pathwayId | string | WP5200 | Pathway ID +# param: proteinId | string | P35498 | UniProt Protein ID + SELECT DISTINCT ?pathway ?interaction ?pubmed WHERE { ?pathway a wp:Pathway . - ?pathway dc:identifier . #filter for pathway + ?pathway dc:identifier . #filter for pathway ?interaction dcterms:isPartOf ?pathway . ?interaction a wp:Interaction . - ?interaction wp:participants . #filter for interaction + ?interaction wp:participants . #filter for interaction ?interaction dcterms:references ?pubmed . } LIMIT 100 diff --git a/F. Datadump/CyTargetLinkerLinksetInput.rq b/F. Datadump/CyTargetLinkerLinksetInput.rq index cf0ae34..fc124b8 100644 --- a/F. Datadump/CyTargetLinkerLinksetInput.rq +++ b/F. Datadump/CyTargetLinkerLinksetInput.rq @@ -1,3 +1,9 @@ +# title: CyTargetLinker Linkset Input +# category: Data Export +# description: Exports pathway-gene associations formatted as input for +# CyTargetLinker, a Cytoscape app for link set analysis. Returns pathway names and +# IDs paired with HGNC gene symbols and Entrez Gene IDs. + select distinct (str(?title) as ?PathwayName) (str(?wpid) as ?PathwayID) (fn:substring(?genename,37) as ?GeneName) (fn:substring(?ncbiGeneId,34) as ?GeneID) where { ?gene a wp:DataNode ; dcterms:identifier ?id ; diff --git a/F. Datadump/dumpOntologyAndPW.rq b/F. Datadump/dumpOntologyAndPW.rq index 410959a..77fbc53 100644 --- a/F. Datadump/dumpOntologyAndPW.rq +++ b/F. Datadump/dumpOntologyAndPW.rq @@ -1,3 +1,8 @@ +# title: Ontology and Pathway Data Export +# category: Data Export +# description: Exports pathway metadata including page URLs, titles, species, identifiers, +# and associated ontology tags for bulk download. + SELECT DISTINCT ?depicts (str(?titleLit) as ?title) (str(?speciesLabelLit) as ?speciesLabel) ?identifier ?ontology WHERE { ?pathway foaf:page ?depicts . diff --git a/F. Datadump/dumpPWsofSpecies.rq b/F. Datadump/dumpPWsofSpecies.rq index 01020a6..ea37d83 100644 --- a/F. Datadump/dumpPWsofSpecies.rq +++ b/F. Datadump/dumpPWsofSpecies.rq @@ -1,8 +1,14 @@ +# title: Pathways by Species Data Export +# category: Data Export +# description: Exports all pathways for a given species, returning identifiers, +# titles, and page URLs ordered by pathway ID. +# param: species | string | Homo sapiens | Species + SELECT DISTINCT ?wpIdentifier ?pathway ?title ?page WHERE { ?pathway dc:title ?title ; foaf:page ?page ; dc:identifier ?wpIdentifier ; - wp:organismName "Mus musculus" . + wp:organismName "{{species}}" . } ORDER BY ?wpIdentifier diff --git a/G. Curation/MetabolitesDoubleMappingWikidata.rq b/G. Curation/MetabolitesDoubleMappingWikidata.rq index e266d47..99fb6cc 100644 --- a/G. Curation/MetabolitesDoubleMappingWikidata.rq +++ b/G. Curation/MetabolitesDoubleMappingWikidata.rq @@ -1,4 +1,7 @@ -# Finding double mappings to Wikidata for metabolites: +# title: Metabolites with Duplicate Wikidata Mappings +# category: Curation +# description: Detects metabolites that are mapped to more than one Wikidata +# identifier, listing all duplicate mappings per metabolite. PREFIX wdt: diff --git a/G. Curation/MetabolitesNotClassified.rq b/G. Curation/MetabolitesNotClassified.rq index ef60820..b75febe 100644 --- a/G. Curation/MetabolitesNotClassified.rq +++ b/G. Curation/MetabolitesNotClassified.rq @@ -1,4 +1,7 @@ -#Metabolites not classified as such +# title: Unclassified Metabolites +# category: Curation +# description: Finds data nodes with a data source annotation that are not classified +# as metabolites, grouped by data source with counts sorted descending. prefix wp: prefix rdfs: diff --git a/G. Curation/MetabolitesWithoutLinkWikidata.rq b/G. Curation/MetabolitesWithoutLinkWikidata.rq index 0ad8ae6..4e5e2d2 100644 --- a/G. Curation/MetabolitesWithoutLinkWikidata.rq +++ b/G. Curation/MetabolitesWithoutLinkWikidata.rq @@ -1,4 +1,7 @@ -#Metabolites without a link to Wikidata +# title: Metabolites Without Wikidata Links +# category: Curation +# description: Lists metabolites that have no Wikidata identifier mapping, useful for +# identifying gaps in cross-database linkage. PREFIX wdt: diff --git a/G. Curation/PWsWithoutDatanodes.rq b/G. Curation/PWsWithoutDatanodes.rq index 7e1f0d9..6c94c22 100644 --- a/G. Curation/PWsWithoutDatanodes.rq +++ b/G. Curation/PWsWithoutDatanodes.rq @@ -1,4 +1,7 @@ -#Pathways without (annotated) datanodes +# title: Pathways Without Data Nodes +# category: Curation +# description: Finds pathways that contain no data nodes, indicating empty or +# incomplete pathway entries that may need curation. prefix wp: prefix rdfs: diff --git a/G. Curation/PWsWithoutRef.rq b/G. Curation/PWsWithoutRef.rq index 2073eb7..2442109 100644 --- a/G. Curation/PWsWithoutRef.rq +++ b/G. Curation/PWsWithoutRef.rq @@ -1,4 +1,7 @@ -#Pathways without literature references +# title: Pathways Without References +# category: Curation +# description: Lists pathways that have no associated publication references, +# returning species, title, and pathway identifier sorted alphabetically. SELECT (STR(?speciesLabelLit) AS ?species) (STR(?titleLit) AS ?title) ?pathway WHERE { ?pathway a wp:Pathway ; dc:title ?titleLit ; wp:organismName ?speciesLabelLit . diff --git a/G. Curation/countPWsMetabolitesOccurSorted.rq b/G. Curation/countPWsMetabolitesOccurSorted.rq index 5fccacf..ac5bffb 100644 --- a/G. Curation/countPWsMetabolitesOccurSorted.rq +++ b/G. Curation/countPWsMetabolitesOccurSorted.rq @@ -1,4 +1,7 @@ -#Sorting the metabolites by the number of pathways they occur in +# title: Pathways by Metabolite Occurrence Count +# category: Curation +# description: Counts how many pathways each metabolite appears in, filtered to +# metabolites without a Wikidata mapping, sorted by descending pathway count. PREFIX wdt: diff --git a/G. Curation/countPWsWithoutRef.rq b/G. Curation/countPWsWithoutRef.rq index b726bb7..e6c09d2 100644 --- a/G. Curation/countPWsWithoutRef.rq +++ b/G. Curation/countPWsWithoutRef.rq @@ -1,3 +1,8 @@ +# title: Count of Pathways Without References +# category: Curation +# description: Returns the total number of pathways that have no associated +# publication references. + SELECT count(DISTINCT ?pathway) WHERE { ?pathway a wp:Pathway ; dc:title ?titleLit ; wp:organismName ?speciesLabelLit . MINUS { ?pubmed a wp:PublicationReference . diff --git a/H. Chemistry/IDSM_similaritySearch.rq b/H. Chemistry/IDSM_similaritySearch.rq index b26ab26..1b7bac5 100644 --- a/H. Chemistry/IDSM_similaritySearch.rq +++ b/H. Chemistry/IDSM_similaritySearch.rq @@ -1,3 +1,11 @@ +# title: IDSM Chemical Similarity Search +# category: Chemistry +# description: Finds structurally similar ChEBI compounds for source and target +# metabolites in a pathway's directed interactions via the IDSM/ChEBI structure +# search service (idsm.elixir-czech.cz). May be slower due to external endpoint +# dependency. +# param: pathwayId | string | WP4225 | Pathway ID + PREFIX owl: PREFIX ebi: PREFIX sachem: @@ -8,14 +16,14 @@ PREFIX sso: PREFIX rh: PREFIX rdfs: PREFIX xsd: -SELECT distinct ((substr(str(?chebioSrc),32)) as ?SourceOrigin) ((substr(str(?similarSrc),32)) as ?SourceSimilar) ((substr(str(?chebioTgt),32)) as ?TargetOrigin) ((substr(str(?similarTgt),32)) as ?TargetSimilar) #?reaction +SELECT distinct ((substr(str(?chebioSrc),32)) as ?SourceOrigin) ((substr(str(?similarSrc),32)) as ?SourceSimilar) ((substr(str(?chebioTgt),32)) as ?TargetOrigin) ((substr(str(?similarTgt),32)) as ?TargetSimilar) #?reaction WHERE { ?interaction dcterms:isPartOf ?pathway ; a wp:Conversion ; wp:source ?source ; wp:target ?target . ?source wp:bdbChEBI ?chebiSrc . ?target wp:bdbChEBI ?chebiTgt . - ?pathway dcterms:identifier "WP4225". + ?pathway dcterms:identifier "{{pathwayId}}". BIND(iri(concat("http://purl.obolibrary.org/obo/CHEBI_", substr(str(?chebiSrc),37))) AS ?chebioSrc) BIND(iri(concat("http://purl.obolibrary.org/obo/CHEBI_", substr(str(?chebiTgt),37))) AS ?chebioTgt) #IDSM diff --git a/H. Chemistry/smiles.rq b/H. Chemistry/smiles.rq index 7566849..22f53e3 100644 --- a/H. Chemistry/smiles.rq +++ b/H. Chemistry/smiles.rq @@ -1,3 +1,8 @@ +# title: SMILES for Metabolites +# category: Chemistry +# description: Retrieves SMILES chemical structure notations for metabolites via +# their Wikidata links. + PREFIX cheminf: SELECT ?mol ?smilesDepict WHERE { diff --git a/HEADER_CONVENTIONS.md b/HEADER_CONVENTIONS.md new file mode 100644 index 0000000..ac9ec4b --- /dev/null +++ b/HEADER_CONVENTIONS.md @@ -0,0 +1,180 @@ +# Header Conventions Guide + +Definitive reference for `.rq` file header format in the WikiPathways SPARQL query collection. All enrichment work (Phases 2-4) must follow these rules. + +## Header Format Overview + +- Headers are comment lines (`#`) at the **top** of `.rq` files +- The header block ends at the **first blank line** +- One blank line separates headers from the SPARQL query body +- Fields use the format `# field: value` + +## Field Order + +Headers must appear in this order: + +``` +# title: [value] +# category: [value] +# description: [value] +# description: [continued value if multi-line] +# keywords: [optional, comma-separated] +# param: [optional, pipe-delimited] +``` + +**Required fields:** `title`, `category`, `description` +**Optional fields:** `keywords`, `param` + +## Field Specifications + +### `# title:` (required) + +One line. Clear, human-readable display name for the SNORQL UI. + +- Derived from query purpose, not the filename +- Use title case +- Keep concise (under ~60 characters) + +| Good | Bad | +|-----------------------------------|--------------------------| +| `# title: All Pathways for Species` | `# title: allPathwaysBySpecies` | +| `# title: Gene-Pathway Associations` | `# title: query1` | + +### `# category:` (required) + +One line. Exactly one value from the controlled vocabulary in `categories.json`. + +Valid values: Metadata, Data Sources, Communities, Collaborations, General, Literature, Data Export, Curation, Chemistry, DSMN, Authors. + +The category is determined by the query's directory location. See `categories.json` for the directory-to-category mapping. + +### `# description:` (required) + +Explains what the query does and what results to expect. + +**Single-line:** +``` +# description: Lists all pathways in the WikiPathways database. +``` + +**Multi-line:** Repeat the `# description:` prefix on each continuation line. This is required because the SNORQL parser collects all lines matching the `# description:` prefix. Bare continuation lines (e.g., `# continued text`) are NOT captured by the UI. + +``` +# description: Lists all pathways tagged with the AOP community. +# description: Returns pathway identifiers, titles, and organism. +``` + +**Federated queries** (those containing `SERVICE` clauses) should mention federation and potential performance impact: +``` +# description: Retrieves compound mappings from MetaNetX via federation. +# description: Uses a federated SERVICE call; may be slow depending on endpoint availability. +``` + +### `# keywords:` (optional, future) + +Comma-separated values on one line. NOT currently rendered by the SNORQL UI but included for future compatibility. + +``` +# keywords: pathways, species, metadata +``` + +### `# param:` (optional, Phase 4) + +Pipe-delimited format for parameterized queries: + +``` +# param: name | type | defaultValue | label +``` + +**Supported types:** +- `string` -- free-text input +- `uri` -- expects a URI value +- `enum:val1,val2,val3` -- dropdown selection + +Multiple parameters use multiple `# param:` lines. + +## SNORQL Parser Behavior + +The SNORQL parser scans **all lines** in the file for field-prefixed patterns, not just leading lines. This means: + +1. `# title:`, `# category:`, `# description:`, and `# param:` prefixes must **only** appear in the header block +2. Inline SPARQL comments elsewhere in the file must **not** use these exact prefixes +3. Use alternative phrasing for inline comments (e.g., `# Note: this filters by species` instead of `# description: this filters by species`) + +## Existing Comments Handling + +During enrichment (Phase 2+): + +- **Descriptive comments** at the top of `.rq` files should be interpreted and absorbed into `# description:` headers +- **Inline usage hints** (e.g., `# Replace "WP1560" with WP ID of interest`) remain as inline comments BELOW the header block; they are not folded into the description +- **Existing `# title:` or `# description:` lines** that already follow the conventions are kept as-is + +## TTL Metadata Mapping + +For queries with `.ttl` source files, the following mapping applies. This is documented for future reference; TTL metadata extraction is NOT implemented in Phase 1. + +| TTL Field | Header Field | Notes | +|--------------------|-------------------|--------------------------------------------| +| `rdfs:label` | `# title:` | If present; otherwise derive from filename | +| `rdfs:comment` | `# description:` | May need splitting into multiple lines | +| `schema:keywords` | `# keywords:` | NOT mapped to `# category:` | +| (folder location) | `# category:` | Always derived from directory, never TTL | + +## Complete Examples + +### Example 1: Minimal query (title + category + description) + +```sparql +# title: All Pathways +# category: General +# description: Returns all pathways in the WikiPathways database with their titles and organisms. + +SELECT DISTINCT ?pathway ?title ?organism +WHERE { + ?pathway a wp:Pathway ; + dc:title ?title ; + wp:organismName ?organism . +} +ORDER BY ?title +``` + +### Example 2: Multi-line description + +```sparql +# title: AOP Community Pathways +# category: Communities +# description: Lists all pathways tagged with the Adverse Outcome Pathway (AOP) community. +# description: Returns pathway identifiers, titles, and last revision dates. +# description: Useful for tracking AOP-related content in WikiPathways. + +SELECT ?pathway ?title ?date +WHERE { + ?pathway a wp:Pathway ; + dc:title ?title ; + dcterms:subject cur:AOP ; + pav:lastRefreshedOn ?date . +} +ORDER BY DESC(?date) +``` + +### Example 3: Parameterized query (Phase 4) + +```sparql +# title: Pathways by Species +# category: General +# description: Returns all pathways for a given species. +# param: species | enum:Homo sapiens,Mus musculus,Rattus norvegicus,... | Homo sapiens | Species + +SELECT ?pathway ?title +WHERE { + ?pathway a wp:Pathway ; + dc:title ?title ; + wp:organismName "{{species}}" . +} +ORDER BY ?title +``` + +--- + +*Reference document for WikiPathways SPARQL query header enrichment.* +*Controlled category vocabulary: see `categories.json`.* diff --git a/I. DirectedSmallMoleculesNetwork (DSMN)/controlling duplicate mappings from Wikidata.rq b/I. DirectedSmallMoleculesNetwork (DSMN)/controlling duplicate mappings from Wikidata.rq index 0bc6003..4eeb83e 100644 --- a/I. DirectedSmallMoleculesNetwork (DSMN)/controlling duplicate mappings from Wikidata.rq +++ b/I. DirectedSmallMoleculesNetwork (DSMN)/controlling duplicate mappings from Wikidata.rq @@ -1,3 +1,8 @@ +# title: Controlling Duplicate Mappings from Wikidata +# category: DSMN +# description: Detects metabolites mapped to multiple Wikidata identifiers as a +# quality control step in the DSMN workflow. + ### Part 1: ### #Required prefixes for querying WikiPathways content in Blazegraph PREFIX gpml: diff --git a/I. DirectedSmallMoleculesNetwork (DSMN)/extracting directed metabolic reactions.rq b/I. DirectedSmallMoleculesNetwork (DSMN)/extracting directed metabolic reactions.rq index 53d0931..c165db5 100644 --- a/I. DirectedSmallMoleculesNetwork (DSMN)/extracting directed metabolic reactions.rq +++ b/I. DirectedSmallMoleculesNetwork (DSMN)/extracting directed metabolic reactions.rq @@ -1,12 +1,19 @@ +# title: Extracting Directed Metabolic Reactions +# category: DSMN +# description: Extracts directed metabolite-to-metabolite interactions from human +# pathways in the AnalysisCollection, returning source and target identifiers, +# interaction types, and Rhea IDs as part of the DSMN workflow. +# param: species | string | Homo sapiens | Species + ### Part 1: ### -SELECT DISTINCT ?interaction ?sourceDb ?targetDb ?mimtype -?pathway (str(?titleLit) as ?title) +SELECT DISTINCT ?interaction ?sourceDb ?targetDb ?mimtype +?pathway (str(?titleLit) as ?title) ?sourceCHEBI ?targetDbCHEBI ?sourceHMDB ?targetDbHMDB ?InteractionID WHERE { ### Part 2: ### ?pathway a wp:Pathway ; - wp:organismName "Homo sapiens" ; + wp:organismName "{{species}}" ; dc:title ?titleLit . ### Part 3A: ### diff --git a/I. DirectedSmallMoleculesNetwork (DSMN)/extracting ontologies and references for metabolic reactions.rq b/I. DirectedSmallMoleculesNetwork (DSMN)/extracting ontologies and references for metabolic reactions.rq index 7a91a0e..595c345 100644 --- a/I. DirectedSmallMoleculesNetwork (DSMN)/extracting ontologies and references for metabolic reactions.rq +++ b/I. DirectedSmallMoleculesNetwork (DSMN)/extracting ontologies and references for metabolic reactions.rq @@ -1,9 +1,16 @@ +# title: Extracting Ontologies and References for Metabolic Reactions +# category: DSMN +# description: Retrieves ontology annotations, curation status, and literature +# references for directed metabolic reactions in human pathways as part of the +# DSMN workflow. +# param: species | string | Homo sapiens | Species + ### Part 1: ### -SELECT DISTINCT ?interaction ?sourceDb ?targetDb ?PWOnt ?DiseaseOnt +SELECT DISTINCT ?interaction ?sourceDb ?targetDb ?PWOnt ?DiseaseOnt ?curationstatus ?InteractionRef ?PWref ?sourceLit ?targetLit WHERE { ?pathway a wp:Pathway ; - wp:organismName "Homo sapiens"; + wp:organismName "{{species}}"; dc:title ?titleLit . ?interaction dcterms:isPartOf ?pathway ; a wp:DirectedInteraction ; diff --git a/I. DirectedSmallMoleculesNetwork (DSMN)/extracting protein titles and identifiers for metabolic reactions.rq b/I. DirectedSmallMoleculesNetwork (DSMN)/extracting protein titles and identifiers for metabolic reactions.rq index 0ec618e..10efc47 100644 --- a/I. DirectedSmallMoleculesNetwork (DSMN)/extracting protein titles and identifiers for metabolic reactions.rq +++ b/I. DirectedSmallMoleculesNetwork (DSMN)/extracting protein titles and identifiers for metabolic reactions.rq @@ -1,9 +1,16 @@ +# title: Extracting Protein Titles and Identifiers for Metabolic Reactions +# category: DSMN +# description: Extracts catalyzing proteins for directed metabolic reactions in +# human AnalysisCollection pathways, returning Ensembl identifiers and protein +# names as part of the DSMN workflow. +# param: species | string | Homo sapiens | Species + ### Part 1: ### -SELECT DISTINCT ?interaction ?sourceDb ?targetDb ?proteinDBWPs ?proteinName -WHERE { +SELECT DISTINCT ?interaction ?sourceDb ?targetDb ?proteinDBWPs ?proteinName +WHERE { ?pathway a wp:Pathway ; wp:ontologyTag cur:AnalysisCollection ; -wp:organismName "Homo sapiens"; +wp:organismName "{{species}}"; dc:title ?titleLit . ?interaction dcterms:isPartOf ?pathway ; a wp:DirectedInteraction ; diff --git a/J. Authors/authorsOfAPathway.rq b/J. Authors/authorsOfAPathway.rq index 0093a44..2e6caf8 100644 --- a/J. Authors/authorsOfAPathway.rq +++ b/J. Authors/authorsOfAPathway.rq @@ -1,10 +1,16 @@ +# title: Authors of a Pathway +# category: Authors +# description: Lists all authors of a given pathway in ordinal order, returning +# name, ORCID, homepage, and pathway version. +# param: pathwayId | string | WP4846 | Pathway ID + PREFIX dc: PREFIX foaf: PREFIX wpq: PREFIX pav: SELECT ?pathway ?version ?ordinal ?author_ ?name ?orcid ?page WHERE { - VALUES ?pathway { } + VALUES ?pathway { } ?author_ a foaf:Person ; wp:hasAuthorship ?authorship . ?authorship ^wp:hasAuthorship ?pathway ; diff --git a/J. Authors/contributors.rq b/J. Authors/contributors.rq index c59dafd..4519e6e 100644 --- a/J. Authors/contributors.rq +++ b/J. Authors/contributors.rq @@ -1,3 +1,8 @@ +# title: All Contributors +# category: Authors +# description: Counts the number of pathways each first author (ordinal 1) +# contributes to, sorted by descending pathway count. + PREFIX dc: PREFIX foaf: PREFIX wpq: diff --git a/J. Authors/firstAuthors.rq b/J. Authors/firstAuthors.rq index a442bdd..5789a35 100644 --- a/J. Authors/firstAuthors.rq +++ b/J. Authors/firstAuthors.rq @@ -1,3 +1,8 @@ +# title: First Authors of Pathways +# category: Authors +# description: Lists the first author (ordinal 1) of each pathway, ordered by +# pathway version number. + PREFIX dc: PREFIX foaf: PREFIX wpq: diff --git a/J. Authors/pathwayCountWithAtLeastXAuthors.rq b/J. Authors/pathwayCountWithAtLeastXAuthors.rq index 2026e4e..fb93921 100644 --- a/J. Authors/pathwayCountWithAtLeastXAuthors.rq +++ b/J. Authors/pathwayCountWithAtLeastXAuthors.rq @@ -1,3 +1,8 @@ +# title: Pathways with Multiple Authors +# category: Authors +# description: Counts how many pathways have at least N authors for each author +# ordinal position, showing the distribution of author counts across pathways. + PREFIX dc: PREFIX wpq: diff --git a/categories.json b/categories.json new file mode 100644 index 0000000..ed9710e --- /dev/null +++ b/categories.json @@ -0,0 +1,50 @@ +{ + "categories": { + "Metadata": [ + "A. Metadata/", + "A. Metadata/datacounts/", + "A. Metadata/species/" + ], + "Data Sources": [ + "A. Metadata/datasources/" + ], + "Communities": [ + "B. Communities/AOP/", + "B. Communities/CIRM Stem Cell Pathways/", + "B. Communities/COVID19/", + "B. Communities/Inborn Errors of Metabolism/", + "B. Communities/Lipids/", + "B. Communities/RareDiseases/", + "B. Communities/Reactome/", + "B. Communities/WormBase/" + ], + "Collaborations": [ + "C. Collaborations/AOP-Wiki/", + "C. Collaborations/MetaNetX/", + "C. Collaborations/MolMeDB/", + "C. Collaborations/neXtProt/", + "C. Collaborations/smallMolecules_Rhea_IDSM/" + ], + "General": [ + "D. General/" + ], + "Literature": [ + "E. Literature/" + ], + "Data Export": [ + "F. Datadump/" + ], + "Curation": [ + "G. Curation/" + ], + "Chemistry": [ + "H. Chemistry/" + ], + "DSMN": [ + "I. DirectedSmallMoleculesNetwork (DSMN)/" + ], + "Authors": [ + "J. Authors/" + ] + } +} diff --git a/scripts/lint_headers.py b/scripts/lint_headers.py new file mode 100644 index 0000000..627302d --- /dev/null +++ b/scripts/lint_headers.py @@ -0,0 +1,73 @@ +"""CI lint script: validates required headers on all .rq query files.""" + +import pathlib +import re +import sys + +ROOT = pathlib.Path(__file__).resolve().parent.parent +EXCLUDED_DIRS = {".planning", ".git", ".github", "scripts", "tests"} + +REQUIRED_FIELDS = ["title", "category", "description"] +FIELD_PATTERNS = { + field: re.compile(rf"^# {field}: .+") for field in REQUIRED_FIELDS +} + + +def find_rq_files(): + """Return sorted list of .rq file paths, excluding non-query directories.""" + results = [] + for rq_file in sorted(ROOT.rglob("*.rq")): + rel = rq_file.relative_to(ROOT) + parts = rel.parts + if parts and parts[0] in EXCLUDED_DIRS: + continue + results.append(rq_file) + return results + + +def parse_header(filepath): + """Extract consecutive comment lines from the top of an .rq file.""" + lines = [] + with open(filepath, encoding="utf-8") as f: + for line in f: + stripped = line.rstrip("\n\r") + if stripped.startswith("#"): + lines.append(stripped) + else: + break + return lines + + +def lint_file(filepath): + """Check a single .rq file for required header fields. + + Returns a list of error strings (empty if file passes). + """ + header = parse_header(filepath) + rel_path = filepath.relative_to(ROOT) + errors = [] + for field in REQUIRED_FIELDS: + pattern = FIELD_PATTERNS[field] + if not any(pattern.match(line) for line in header): + errors.append(f"{rel_path}: missing '# {field}:' header") + return errors + + +def main(): + """Lint all .rq files and report results.""" + rq_files = find_rq_files() + all_errors = [] + for rq_file in rq_files: + all_errors.extend(lint_file(rq_file)) + + if all_errors: + for error in all_errors: + print(f"ERROR: {error}") + sys.exit(1) + else: + print(f"OK: {len(rq_files)} files passed lint check") + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/scripts/transformDotTtlToDotSparql.py b/scripts/transformDotTtlToDotSparql.py index 01e755c..821061e 100644 --- a/scripts/transformDotTtlToDotSparql.py +++ b/scripts/transformDotTtlToDotSparql.py @@ -2,29 +2,76 @@ import glob from rdflib import Graph -# Path to Turtle files -ttl_files_path = '**/*.ttl' -# Get the list of .ttl files -ttl_files = glob.glob(ttl_files_path, recursive=True) +def extract_header(filepath): + """Extract the leading comment-line header block from an .rq file. + + Reads consecutive lines starting with '#' from the top of the file, + stopping at the first blank line or first non-comment line. Returns + the header lines joined with a trailing newline (the blank separator), + or an empty string if no header is found or the file does not exist. + """ + if not os.path.exists(filepath): + return "" + + header_lines = [] + with open(filepath, encoding="utf-8") as f: + for line in f: + stripped = line.rstrip("\n") + if stripped.startswith("#"): + header_lines.append(stripped) + else: + break + + if header_lines: + return "\n".join(header_lines) + "\n" + return "" -# Process each Turtle file -for i in ttl_files: - fn = os.path.basename(i)[0:-4] # extract name without extension - sparql = i[0:-4] + ".rq" # create .rq filename + +def process_ttl_file(ttl_path): + """Parse a .ttl file and write the extracted SPARQL to a .rq file. + + If the .rq file already exists and has a comment header block, that + header is preserved above the regenerated SPARQL content. If the TTL + contains no SPARQL query, the .rq file is not touched. + """ + rq_path = ttl_path[:-4] + ".rq" + fn = os.path.basename(ttl_path)[:-4] print("file: " + fn) - - # Open .ttl file to write + + header = extract_header(rq_path) + g = Graph() - g.parse(i) + g.parse(ttl_path) - with open(sparql, 'w') as sparql_file: - knows_query = """prefix sh: + knows_query = """prefix sh: SELECT DISTINCT ?query ?sparql WHERE { ?query sh:select | sh:ask | sh:construct ?sparql . }""" - qres = g.query(knows_query) - for row in qres: - sparql_file.write(f"{row.sparql}") + qres = g.query(knows_query) + sparql_content = "" + for row in qres: + sparql_content += str(row.sparql) + + if not sparql_content.strip(): + print(f" WARNING: No SPARQL found in {ttl_path}, skipping .rq write") + return + + with open(rq_path, "w", encoding="utf-8") as sparql_file: + if header: + sparql_file.write(header + "\n") + sparql_file.write(sparql_content) + + +# Path to Turtle files +ttl_files_path = '**/*.ttl' + +if __name__ == "__main__": + # Get the list of .ttl files + ttl_files = glob.glob(ttl_files_path, recursive=True) + + # Process each Turtle file + for i in ttl_files: + process_ttl_file(i) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..85f5a38 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1 @@ +# Shared fixtures for CI script tests diff --git a/tests/fixtures/sample.ttl b/tests/fixtures/sample.ttl new file mode 100644 index 0000000..3150da6 --- /dev/null +++ b/tests/fixtures/sample.ttl @@ -0,0 +1,13 @@ +@prefix ex: . +@prefix rdf: . +@prefix rdfs: . +@prefix schema: . +@prefix sh: . + +ex:sample a sh:SPARQLExecutable, + sh:SPARQLSelectExecutable ; + rdfs:comment "A sample query for testing."@en ; + sh:prefixes _:sparql_examples_prefixes ; + sh:select """SELECT ?x WHERE { ?x a ?type }""" ; + schema:target ; + schema:keywords "test" . diff --git a/tests/fixtures/sample_empty.ttl b/tests/fixtures/sample_empty.ttl new file mode 100644 index 0000000..89ec874 --- /dev/null +++ b/tests/fixtures/sample_empty.ttl @@ -0,0 +1,6 @@ +@prefix ex: . +@prefix rdf: . +@prefix rdfs: . + +ex:empty a rdfs:Resource ; + rdfs:comment "A TTL file with no SPARQL query." . diff --git a/tests/fixtures/sample_no_header.rq b/tests/fixtures/sample_no_header.rq new file mode 100644 index 0000000..18813b5 --- /dev/null +++ b/tests/fixtures/sample_no_header.rq @@ -0,0 +1 @@ +SELECT ?old WHERE { ?old a ?type } diff --git a/tests/fixtures/sample_with_header.rq b/tests/fixtures/sample_with_header.rq new file mode 100644 index 0000000..43cf1bb --- /dev/null +++ b/tests/fixtures/sample_with_header.rq @@ -0,0 +1,5 @@ +# title: Sample Query +# category: Metadata +# description: A test query. + +SELECT ?old WHERE { ?old a ?type } diff --git a/tests/test_categories.py b/tests/test_categories.py new file mode 100644 index 0000000..a0745fc --- /dev/null +++ b/tests/test_categories.py @@ -0,0 +1,96 @@ +"""Validate the controlled category vocabulary against the filesystem.""" + +import json +import os +import pathlib + +import pytest + +ROOT = pathlib.Path(__file__).resolve().parent.parent +CATEGORIES_FILE = ROOT / "categories.json" + +EXCLUDED_DIRS = {".planning", ".git", ".github", "scripts", "tests"} + + +def load_categories(): + with open(CATEGORIES_FILE) as f: + return json.load(f) + + +def find_rq_directories(): + """Return set of relative directory paths that contain .rq files.""" + dirs = set() + for rq_file in ROOT.rglob("*.rq"): + rel = rq_file.parent.relative_to(ROOT) + # Skip excluded top-level directories + parts = rel.parts + if parts and parts[0] in EXCLUDED_DIRS: + continue + # Normalize to string with trailing slash (matching categories.json format) + dirs.add(str(rel) + "/") + return dirs + + +def all_mapped_dirs(data): + """Return set of all directories listed across all categories.""" + result = set() + for folders in data["categories"].values(): + result.update(folders) + return result + + +def category_for_dir(data, directory): + """Return the category name that contains the given directory.""" + for cat_name, folders in data["categories"].items(): + if directory in folders: + return cat_name + return None + + +class TestCategoriesJSON: + def test_valid_json_and_structure(self): + """categories.json loads without error and has the expected structure.""" + data = load_categories() + assert "categories" in data + assert isinstance(data["categories"], dict) + for name, folders in data["categories"].items(): + assert isinstance(name, str) + assert isinstance(folders, list) + for f in folders: + assert isinstance(f, str) + assert f.endswith("/"), f"Folder path must end with /: {f}" + + def test_exactly_11_categories(self): + """The vocabulary contains exactly 11 category names.""" + data = load_categories() + assert len(data["categories"]) == 11, ( + f"Expected 11 categories, got {len(data['categories'])}: " + f"{list(data['categories'].keys())}" + ) + + def test_all_directories_covered(self): + """Every directory containing .rq files maps to a category.""" + data = load_categories() + mapped = all_mapped_dirs(data) + fs_dirs = find_rq_directories() + unmapped = fs_dirs - mapped + assert not unmapped, ( + f"Directories with .rq files not in any category: {sorted(unmapped)}" + ) + + def test_no_orphan_directories(self): + """No query-containing directory is missing from the mapping.""" + data = load_categories() + mapped = all_mapped_dirs(data) + fs_dirs = find_rq_directories() + # Same check as above but phrased for clarity + for d in sorted(fs_dirs): + assert d in mapped, f"Directory '{d}' contains .rq files but is not mapped" + + def test_datasources_maps_to_data_sources(self): + """The datasources/ subfolder maps to 'Data Sources', not 'Metadata'.""" + data = load_categories() + cat = category_for_dir(data, "A. Metadata/datasources/") + assert cat == "Data Sources", ( + f"Expected 'Data Sources' but got '{cat}' for A. Metadata/datasources/" + ) diff --git a/tests/test_ci_script.py b/tests/test_ci_script.py new file mode 100644 index 0000000..5f14638 --- /dev/null +++ b/tests/test_ci_script.py @@ -0,0 +1,121 @@ +"""Tests for the CI TTL-to-SPARQL extraction script with header preservation.""" + +import os +import shutil +import sys + +import pytest + +FIXTURES = os.path.join(os.path.dirname(__file__), "fixtures") + +# Add project root to path so we can import the script module +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from scripts.transformDotTtlToDotSparql import extract_header, process_ttl_file + + +def _copy_fixture(src_name, dst_dir, dst_name=None): + """Copy a fixture file into a temp directory.""" + dst_name = dst_name or src_name + src = os.path.join(FIXTURES, src_name) + dst = os.path.join(dst_dir, dst_name) + shutil.copy2(src, dst) + return dst + + +class TestHeaderPreservation: + """Test 1: Header block is preserved when .rq is regenerated from .ttl.""" + + def test_preserves_existing_headers(self, tmp_path): + ttl = _copy_fixture("sample.ttl", tmp_path) + rq = _copy_fixture("sample_with_header.rq", tmp_path, "sample.rq") + + process_ttl_file(str(ttl)) + + content = open(rq, encoding="utf-8").read() + assert content.startswith("# title: Sample Query\n") + assert "# category: Metadata" in content + assert "# description: A test query." in content + # SPARQL should be the new one from the TTL, not the old one + assert "SELECT ?x WHERE { ?x a ?type }" in content + assert "SELECT ?old" not in content + + +class TestNoHeader: + """Test 2: .rq with no headers stays headerless after regeneration.""" + + def test_no_phantom_header_injected(self, tmp_path): + ttl = _copy_fixture("sample.ttl", tmp_path) + _copy_fixture("sample_no_header.rq", tmp_path, "sample.rq") + + process_ttl_file(str(ttl)) + + content = open(os.path.join(tmp_path, "sample.rq"), encoding="utf-8").read() + assert not content.startswith("#") + assert "SELECT ?x WHERE { ?x a ?type }" in content + + +class TestNoExistingRq: + """Test 3: When no .rq exists, one is created with just SPARQL.""" + + def test_creates_rq_from_scratch(self, tmp_path): + ttl = _copy_fixture("sample.ttl", tmp_path) + rq_path = os.path.join(tmp_path, "sample.rq") + assert not os.path.exists(rq_path) + + process_ttl_file(str(ttl)) + + assert os.path.exists(rq_path) + content = open(rq_path, encoding="utf-8").read() + assert "SELECT ?x WHERE { ?x a ?type }" in content + assert not content.startswith("#") + + +class TestSparqlCorrectness: + """Test 4: Extracted SPARQL matches expected output (regression test).""" + + def test_exact_sparql_extraction(self, tmp_path): + ttl = _copy_fixture("sample.ttl", tmp_path) + + process_ttl_file(str(ttl)) + + content = open(os.path.join(tmp_path, "sample.rq"), encoding="utf-8").read() + assert content.strip() == "SELECT ?x WHERE { ?x a ?type }" + + +class TestBlankLineSeparator: + """Test 5: Exactly one blank line separates header block from SPARQL.""" + + def test_single_blank_line_between_header_and_sparql(self, tmp_path): + ttl = _copy_fixture("sample.ttl", tmp_path) + _copy_fixture("sample_with_header.rq", tmp_path, "sample.rq") + + process_ttl_file(str(ttl)) + + content = open(os.path.join(tmp_path, "sample.rq"), encoding="utf-8").read() + # Split on the last header line + lines = content.split("\n") + # Find the transition from header to SPARQL + header_end = -1 + for idx, line in enumerate(lines): + if line.startswith("#"): + header_end = idx + # Line after last header should be blank, then SPARQL + assert lines[header_end + 1] == "", "Expected blank line after header" + assert lines[header_end + 2].startswith("SELECT"), "Expected SPARQL after blank line" + + +class TestErrorGuard: + """Test 6: Empty TTL (no SPARQL query) does not overwrite existing .rq.""" + + def test_does_not_overwrite_on_empty_sparql(self, tmp_path): + ttl = _copy_fixture("sample_empty.ttl", tmp_path) + rq_path = os.path.join(tmp_path, "sample_empty.rq") + # Create a pre-existing .rq with content + with open(rq_path, "w") as f: + f.write("SELECT ?existing WHERE { ?existing a ?type }\n") + + process_ttl_file(str(ttl)) + + content = open(rq_path, encoding="utf-8").read() + assert "SELECT ?existing" in content, "Existing .rq should not be overwritten" diff --git a/tests/test_headers.py b/tests/test_headers.py new file mode 100644 index 0000000..66f2346 --- /dev/null +++ b/tests/test_headers.py @@ -0,0 +1,176 @@ +"""Validate that all .rq files have required header fields (title, category).""" + +import json +import pathlib +import re + +import pytest + +ROOT = pathlib.Path(__file__).resolve().parent.parent +CATEGORIES_FILE = ROOT / "categories.json" + +EXCLUDED_DIRS = {".planning", ".git", ".github", "scripts", "tests"} + + +def find_rq_files(): + """Return sorted list of .rq file paths, excluding tests/ and other non-query dirs.""" + results = [] + for rq_file in sorted(ROOT.rglob("*.rq")): + rel = rq_file.relative_to(ROOT) + parts = rel.parts + if parts and parts[0] in EXCLUDED_DIRS: + continue + results.append(rq_file) + return results + + +def parse_header(filepath): + """Extract header block from an .rq file. + + The header block is the consecutive sequence of lines starting with '#' + at the top of the file, ending at the first blank line or non-comment line. + Returns a list of header line strings (with the leading '# ' stripped where applicable). + """ + lines = [] + with open(filepath, encoding="utf-8") as f: + for line in f: + stripped = line.rstrip("\n\r") + if stripped.startswith("#"): + lines.append(stripped) + else: + break + return lines + + +def load_valid_categories(): + """Return the set of valid category names from categories.json.""" + with open(CATEGORIES_FILE, encoding="utf-8") as f: + data = json.load(f) + return set(data["categories"].keys()) + + +# Collect files once at module level for parametrization +_RQ_FILES = find_rq_files() +_RQ_PARAMS = [ + pytest.param(f, id=str(f.relative_to(ROOT))) for f in _RQ_FILES +] + + +@pytest.mark.parametrize("rq_file", _RQ_PARAMS) +def test_all_rq_have_title(rq_file): + """Every .rq file must have a '# title: ...' line in its header block.""" + header = parse_header(rq_file) + title_pattern = re.compile(r"^# title: .+") + titles = [line for line in header if title_pattern.match(line)] + assert titles, ( + f"Missing '# title:' header in {rq_file.relative_to(ROOT)}" + ) + + +@pytest.mark.parametrize("rq_file", _RQ_PARAMS) +def test_all_rq_have_valid_category(rq_file): + """Every .rq file must have a '# category: VALUE' line with a valid category.""" + header = parse_header(rq_file) + valid = load_valid_categories() + cat_pattern = re.compile(r"^# category: (.+)") + categories = [] + for line in header: + m = cat_pattern.match(line) + if m: + categories.append(m.group(1).strip()) + assert categories, ( + f"Missing '# category:' header in {rq_file.relative_to(ROOT)}" + ) + for cat in categories: + assert cat in valid, ( + f"Invalid category '{cat}' in {rq_file.relative_to(ROOT)}. " + f"Valid categories: {sorted(valid)}" + ) + + +@pytest.mark.parametrize("rq_file", _RQ_PARAMS) +def test_all_rq_have_description(rq_file): + """Every .rq file must have a '# description: ...' line in its header block.""" + header = parse_header(rq_file) + desc_pattern = re.compile(r"^# description: .+") + descriptions = [line for line in header if desc_pattern.match(line)] + assert descriptions, ( + f"Missing '# description:' header in {rq_file.relative_to(ROOT)}" + ) + + +def test_titles_are_unique(): + """All title values across .rq files must be unique (no duplicates).""" + title_pattern = re.compile(r"^# title: (.+)") + seen = {} + for rq_file in _RQ_FILES: + header = parse_header(rq_file) + for line in header: + m = title_pattern.match(line) + if m: + title = m.group(1).strip() + rel = str(rq_file.relative_to(ROOT)) + if title in seen: + seen[title].append(rel) + else: + seen[title] = [rel] + duplicates = {t: files for t, files in seen.items() if len(files) > 1} + assert not duplicates, ( + f"Duplicate titles found: {duplicates}" + ) + + +def test_header_field_order(): + """When title, category, description are present, they must appear in that order.""" + title_pattern = re.compile(r"^# title: ") + cat_pattern = re.compile(r"^# category: ") + desc_pattern = re.compile(r"^# description: ") + for rq_file in _RQ_FILES: + header = parse_header(rq_file) + title_idx = None + cat_idx = None + desc_idx = None + for i, line in enumerate(header): + if title_pattern.match(line) and title_idx is None: + title_idx = i + if cat_pattern.match(line) and cat_idx is None: + cat_idx = i + if desc_pattern.match(line) and desc_idx is None: + desc_idx = i + if title_idx is not None and cat_idx is not None: + assert title_idx < cat_idx, ( + f"In {rq_file.relative_to(ROOT)}: title (line {title_idx}) " + f"must appear before category (line {cat_idx})" + ) + if cat_idx is not None and desc_idx is not None: + assert cat_idx < desc_idx, ( + f"In {rq_file.relative_to(ROOT)}: category (line {cat_idx}) " + f"must appear before description (line {desc_idx})" + ) + + +def test_blank_line_separator(): + """Files with structured header fields must have a blank line before the query body.""" + field_pattern = re.compile(r"^# (title|category|description|keywords|param): ") + for rq_file in _RQ_FILES: + header = parse_header(rq_file) + # Only check files that have at least one structured header field + has_field = any(field_pattern.match(line) for line in header) + if not has_field: + continue + with open(rq_file, encoding="utf-8") as f: + content = f.read() + lines = content.split("\n") + # Find end of header block (consecutive # lines at top) + header_end = 0 + for i, line in enumerate(lines): + if line.startswith("#"): + header_end = i + 1 + else: + break + # The line immediately after the header block should be blank + if header_end < len(lines): + assert lines[header_end].strip() == "", ( + f"In {rq_file.relative_to(ROOT)}: expected blank line after " + f"header block at line {header_end + 1}, got: '{lines[header_end]}'" + )