nf-core · tntly · Mar 26, 2025 · Mar 26, 2025 · Mar 26, 2025 · Mar 26, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,9 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 Initial release of nf-core/proteinannotator, created with the [nf-core](https://nf-co.re/) template.
 
+### Credits
+
+Special thanks to the following for their contributions to the release:
+
+- [Tien Ly](https://github.com/tntly)
+
 ### `Added`
 
 - [[PR #13](https://github.com/nf-core/proteinannotator/pull/13)] Add nf-core seqkit/stats module
+- [[PR #17](https://github.com/nf-core/proteinannotator/pull/17)] Added Unifire module
 
 ### `Fixed`
 

diff --git a/CITATIONS.md b/CITATIONS.md
@@ -14,6 +14,11 @@
 
 > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online].
 
+- [UniFIRE](https://gitlab.ebi.ac.uk/uniprot-public/unifire)
+
+> UniFIRE (The UniProt Functional annotation Inference Rule Engine) is an engine to execute rules in the UniProt Rule Markup Language (URML) format. It can be used to execute the UniProt annotation rules (UniRule and ARBA).
+> License: Apache License 2.0
+
 - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
 
 > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.

diff --git a/README.md b/README.md
@@ -29,7 +29,10 @@
 
 <!-- TODO nf-core: Include a figure that guides the user through the major workflow steps. Many nf-core
      workflows use the "tube map" design for that. See https://nf-co.re/docs/contributing/design_guidelines#examples for examples.   -->
-<!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline -->1. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
+<!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline -->
+1. Functional Annotation
+  1. Perform UniProt's official annotation pipeline ([`UniFire`](https://gitlab.ebi.ac.uk/uniprot-public/unifire))
+1. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
 
 ## Usage
 

diff --git a/conf/sampleesheet.csv b/conf/sampleesheet.csv
@@ -0,0 +1,2 @@
+id,fasta
+snap25_isoforms,https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/kmerseek/reference/snap25_isoforms_human_P60880.fasta
diff --git a/conf/test.config b/conf/test.config
@@ -27,5 +27,5 @@ params {
     // TODO nf-core: Give any required params for the test so that command line flags are not needed
     // From: https://github.com/nf-core/proteinfold/blob/1.1.1/conf/test.config
     // Example: https://github.com/nf-core/test-datasets/blob/proteinfold/testdata/samplesheet/v1.2/samplesheet.csv
-    input           = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv'
+    input           = params.pipelines_testdata_base_path + 'conf/samplesheet.csv'
 }
diff --git a/conf/test_full.config b/conf/test_full.config
@@ -17,8 +17,6 @@ params {
     // Input data for full size test
     // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA)
     // TODO nf-core: Give any required params for the test so that command line flags are not needed
-    input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv'
+    input = params.pipelines_testdata_base_path + 'proteinannotator/samplesheet/snap25-isoforms.csv'
 
-    // Genome references
-    genome = 'R64-1-1'
 }
diff --git a/docs/output.md b/docs/output.md
@@ -12,10 +12,28 @@ The directories listed below will be created in the results directory after the
 
 The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps:
 
+- [Functional Annotation](#functional-annotation)
+  - [UniFIRE] (#unifire) - Run UniProt's official UniFIRE workflow for protein function prediction
 - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
 - [SeqKit stats](#seqkit_stats) - Simple statistics for protein FASTA files
 - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
 
+### Functional Annotation
+
+#### UniFIRE
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `unifire/`
+  - `predictions_arba.out`: a file containing predictions from the Arba method.
+  - `predictions_unirule.out`: a file containing predictions from the Unirule method.
+  - `mpredictions_unirule-pirsr.out`: a file containing predictions from the Pirsr method.
+
+</details>
+
+[UniFIRE](https://gitlab.ebi.ac.uk/uniprot-public/unifire) (The UniProt Functional annotation Inference Rule Engine) is an engine to execute rules in the UniProt Rule Markup Language (URML) format. It can be used to execute the UniProt annotation rules (UniRule and ARBA).
+
 ### MultiQC
 
 <details markdown="1">

diff --git a/modules/local/unifire/main.nf b/modules/local/unifire/main.nf
@@ -0,0 +1,57 @@
+process UNIFIRE {
+    tag "$meta.id"
+    label 'process_large'
+
+    container "dockerhub.ebi.ac.uk/uniprot-public/unifire:2025.1" // TODO: Update once Bioconda is available
+    containerOptions {
+        if (workflow.containerEngine in ['singularity', 'apptainer']) {
+            return "--bind unifire:/volume"
+        } else {
+            return "-v ./unifire:/volume"
+        }
+    }
+
+    input:
+    tuple val(meta), path(faa, stageAs: "unifire/proteins.fasta")
+
+    output:
+    tuple val(meta), path("unifire/predictions_arba.out")         , emit: arba
+    tuple val(meta), path("unifire/predictions_unirule.out")      , emit: unirule
+    tuple val(meta), path("unifire/predictions_unirule-pirsr.out"), emit: pirsr
+    path "versions.yml"                                           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def VERSION = '2025.1'
+    // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+    """
+    # This tool needs a specific folder to be mounted to work.
+    # Run UniFIRE workflow
+    /opt/scripts/bin/unifire-workflow.sh
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        UniFIRE: ${VERSION}
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def VERSION = '2025.1'
+    """
+    mkdir -p unifire
+    touch unifire/predictions_arba.out
+    touch unifire/predictions_unirule.out
+    touch unifire/predictions_unirule-pirsr.out
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        UniFIRE: ${VERSION}
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/unifire/meta.yml b/modules/local/unifire/meta.yml
@@ -0,0 +1,65 @@
+name: "unifire"
+description: Runs the UniFIRE workflow for protein function prediction
+keywords:
+  - uniprot
+  - unifire
+  - protein function prediction
+  - functional annotation
+  - proteomics
+tools:
+  - "unifire":
+      description: "UniFIRE: Unified Function Inference and Rule Extraction"
+      homepage: "https://gitlab.ebi.ac.uk/uniprot-public/unifire"
+      documentation: "https://gitlab.ebi.ac.uk/uniprot-public/unifire/-/blob/master/README.md?ref_type=heads"
+      tool_dev_url: "https://gitlab.ebi.ac.uk/uniprot-public/unifire"
+      licence: ["Apache-2.0"]
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'sample1', single_end:false ]
+    - faa:
+        type: file
+        description: A protein sequence file in FASTA format
+        pattern: "*.{faa,faa.gz,fasta,fasta.gz,fas,fas.gz,fa,fa.gz}"
+output:
+  - arba:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "unifire/predictions_arba.out":
+          type: file
+          description: Predictions from the Arba method
+          pattern: "unifire/predictions_arba.out"
+  - unirule:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "unifire/predictions_unirule.out":
+          type: file
+          description: Predictions from the Unirule method
+          pattern: "unifire/predictions_unirule.out"
+  - pirsr:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "unifire/predictions_unirule-pirsr.out":
+          type: file
+          description: Predictions from the Pirsr method
+          pattern: "unifire/predictions_unirule-pirsr.out"
+  - versions:
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+authors:
+  - "@tntly"
+maintainers:
+  - "@tntly"
diff --git a/modules/local/unifire/tests/main.nf.test b/modules/local/unifire/tests/main.nf.test
@@ -0,0 +1,75 @@
+nextflow_process {
+
+    name "Test Process UNIFIRE"
+    script "../main.nf"
+    process "UNIFIRE"
+    config './nextflow.config'
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "unifire"
+    tag "seqkit"
+    tag "seqkit/head"
+
+    setup {
+        run("SEQKIT_HEAD") {
+            script "modules/nf-core/seqkit/head/main.nf"
+            process {
+                """
+                input[0] = Channel.from([
+                    [ id:'test1' ], // meta
+                    [file(params.modules_testdata_base_path + 'kmerseek/reference/snap25_isoforms_human_P60880.fasta', checkIfExists: true)], // fastas
+                    1 // seq_count
+                ])
+                """
+            }
+        }
+    }
+
+    test("human - fasta") {
+
+        when {
+            process {
+                """
+                input[0] = SEQKIT_HEAD.out.subset
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.arba).match("arba") },
+                { assert snapshot(process.out.unirule).match("unirule") },
+                { assert snapshot(process.out.pirsr).match("pirsr") },
+                { assert snapshot(process.out.versions).match("versions") }
+            )
+        }
+
+    }
+
+    test("human - fasta - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = SEQKIT_HEAD.out.subset
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.arba).match("arba_stub") },
+                { assert snapshot(process.out.unirule).match("unirule_stub") },
+                { assert snapshot(process.out.pirsr).match("pirsr_stub") },
+                { assert snapshot(process.out.versions).match("versions_stub") }
+            )
+        }
+
+    }
+
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		id,fasta
		snap25_isoforms,https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/kmerseek/reference/snap25_isoforms_human_P60880.fasta