diff --git a/nextstrain-pathogen.yaml b/nextstrain-pathogen.yaml index b74c50d3..ad223f69 100644 --- a/nextstrain-pathogen.yaml +++ b/nextstrain-pathogen.yaml @@ -1,5 +1,32 @@ -# This is currently an empty file to indicate the top level pathogen repo. # The inclusion of this file allows the Nextstrain CLI to run the # `nextstrain build` from any directory regardless of runtime. # # See https://github.com/nextstrain/cli/releases/tag/8.2.0 for more details. +--- +workflows: + ingest: + compatibility: + nextstrain run: True + phylogenetic/all-clades: + compatibility: + nextstrain run: True + snakefile: phylogenetic/Snakefile + configfile: phylogenetic/defaults/mpxv/config.yaml + phylogenetic/clade-I: + compatibility: + nextstrain run: True + snakefile: phylogenetic/Snakefile + configfile: phylogenetic/defaults/clade-i/config.yaml + phylogenetic/clade-IIb: + compatibility: + nextstrain run: True + snakefile: phylogenetic/Snakefile + configfile: phylogenetic/defaults/hmpxv1/config.yaml + phylogenetic/lineage-B.1: + compatibility: + nextstrain run: True + snakefile: phylogenetic/Snakefile + configfile: phylogenetic/defaults/hmpxv1_big/config.yaml + nextclade: + compatibility: + nextstrain run: False diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile index f9a67e9d..ca15dece 100644 --- a/phylogenetic/Snakefile +++ b/phylogenetic/Snakefile @@ -12,7 +12,7 @@ if version.parse(augur_version) < version.parse(min_augur_version): if not config: - configfile: "defaults/hmpxv1/config.yaml" + configfile: os.path.join(workflow.basedir, "defaults/hmpxv1/config.yaml") build_dir = "results" @@ -49,7 +49,7 @@ include: "rules/export.smk" if "custom_rules" in config: for rule_file in config["custom_rules"]: - include: rule_file + include: os.path.join(os.getcwd(), rule_file) rule clean: diff --git a/phylogenetic/defaults/clade-i/config.yaml b/phylogenetic/defaults/clade-i/config.yaml index 82b6170c..68c32272 100644 --- a/phylogenetic/defaults/clade-i/config.yaml +++ b/phylogenetic/defaults/clade-i/config.yaml @@ -1,15 +1,15 @@ -reference: "defaults/clade-i/reference.fasta" -genome_annotation: "defaults/clade-i/genome_annotation.gff3" -genbank_reference: "defaults/clade-i/reference.gb" -include: "defaults/clade-i/include.txt" -exclude: "defaults/exclude.txt" -clades: "defaults/clades.tsv" -lat_longs: "defaults/lat_longs.tsv" -color_ordering: "defaults/color_ordering.tsv" -color_scheme: "defaults/color_schemes.tsv" -auspice_config: "defaults/clade-i/auspice_config.json" -description: "defaults/description.md" -tree_mask: "defaults/clade-i/tree_mask.tsv" +reference: "clade-i/reference.fasta" +genome_annotation: "clade-i/genome_annotation.gff3" +genbank_reference: "clade-i/reference.gb" +include: "clade-i/include.txt" +exclude: "exclude.txt" +clades: "clades.tsv" +lat_longs: "lat_longs.tsv" +color_ordering: "color_ordering.tsv" +color_scheme: "color_schemes.tsv" +auspice_config: "clade-i/auspice_config.json" +description: "description.md" +tree_mask: "clade-i/tree_mask.tsv" # Use `accession` as the ID column since `strain` currently contains duplicates¹. # ¹ https://github.com/nextstrain/mpox/issues/33 @@ -59,7 +59,7 @@ recency: true mask: from_beginning: 800 from_end: 6422 - maskfile: "defaults/clade-i/mask.bed" + maskfile: "clade-i/mask.bed" colors: ignore_categories: diff --git a/phylogenetic/defaults/hmpxv1/config.yaml b/phylogenetic/defaults/hmpxv1/config.yaml index 6f95b154..c388c913 100644 --- a/phylogenetic/defaults/hmpxv1/config.yaml +++ b/phylogenetic/defaults/hmpxv1/config.yaml @@ -1,15 +1,15 @@ -reference: "defaults/reference.fasta" -genome_annotation: "defaults/genome_annotation.gff3" -genbank_reference: "defaults/reference.gb" -include: "defaults/hmpxv1/include.txt" -exclude: "defaults/exclude.txt" -clades: "defaults/clades.tsv" -lat_longs: "defaults/lat_longs.tsv" -color_ordering: "defaults/color_ordering.tsv" -color_scheme: "defaults/color_schemes.tsv" -auspice_config: "defaults/hmpxv1/auspice_config.json" -description: "defaults/description.md" -tree_mask: "defaults/tree_mask.tsv" +reference: "reference.fasta" +genome_annotation: "genome_annotation.gff3" +genbank_reference: "reference.gb" +include: "hmpxv1/include.txt" +exclude: "exclude.txt" +clades: "clades.tsv" +lat_longs: "lat_longs.tsv" +color_ordering: "color_ordering.tsv" +color_scheme: "color_schemes.tsv" +auspice_config: "hmpxv1/auspice_config.json" +description: "description.md" +tree_mask: "tree_mask.tsv" # Use `accession` as the ID column since `strain` currently contains duplicates¹. # ¹ https://github.com/nextstrain/mpox/issues/33 @@ -101,4 +101,4 @@ recency: true mask: from_beginning: 800 from_end: 6422 - maskfile: "defaults/mask.bed" + maskfile: "mask.bed" diff --git a/phylogenetic/defaults/hmpxv1_big/config.yaml b/phylogenetic/defaults/hmpxv1_big/config.yaml index 0e8220dd..83afeeaa 100644 --- a/phylogenetic/defaults/hmpxv1_big/config.yaml +++ b/phylogenetic/defaults/hmpxv1_big/config.yaml @@ -1,15 +1,15 @@ -reference: "defaults/reference.fasta" -genome_annotation: "defaults/genome_annotation.gff3" -genbank_reference: "defaults/reference.gb" -include: "defaults/hmpxv1_big/include.txt" -exclude: "defaults/exclude.txt" -clades: "defaults/clades.tsv" -lat_longs: "defaults/lat_longs.tsv" -color_ordering: "defaults/color_ordering.tsv" -color_scheme: "defaults/color_schemes.tsv" -auspice_config: "defaults/hmpxv1_big/auspice_config.json" -description: "defaults/description.md" -tree_mask: "defaults/tree_mask.tsv" +reference: "reference.fasta" +genome_annotation: "genome_annotation.gff3" +genbank_reference: "reference.gb" +include: "hmpxv1_big/include.txt" +exclude: "exclude.txt" +clades: "clades.tsv" +lat_longs: "lat_longs.tsv" +color_ordering: "color_ordering.tsv" +color_scheme: "color_schemes.tsv" +auspice_config: "hmpxv1_big/auspice_config.json" +description: "description.md" +tree_mask: "tree_mask.tsv" # Use `accession` as the ID column since `strain` currently contains duplicates¹. # ¹ https://github.com/nextstrain/mpox/issues/33 @@ -64,4 +64,4 @@ recency: true mask: from_beginning: 800 from_end: 6422 - maskfile: "defaults/mask.bed" + maskfile: "mask.bed" diff --git a/phylogenetic/defaults/mpxv/config.yaml b/phylogenetic/defaults/mpxv/config.yaml index 7f39b283..7592b023 100644 --- a/phylogenetic/defaults/mpxv/config.yaml +++ b/phylogenetic/defaults/mpxv/config.yaml @@ -1,15 +1,15 @@ -auspice_config: "defaults/mpxv/auspice_config.json" -include: "defaults/mpxv/include.txt" -exclude: "defaults/exclude.txt" -reference: "defaults/reference.fasta" -genome_annotation: "defaults/genome_annotation.gff3" -genbank_reference: "defaults/reference.gb" -lat_longs: "defaults/lat_longs.tsv" -color_ordering: "defaults/color_ordering.tsv" -color_scheme: "defaults/color_schemes.tsv" -description: "defaults/description.md" -clades: "defaults/clades.tsv" -tree_mask: "defaults/tree_mask.tsv" +auspice_config: "mpxv/auspice_config.json" +include: "mpxv/include.txt" +exclude: "exclude.txt" +reference: "reference.fasta" +genome_annotation: "genome_annotation.gff3" +genbank_reference: "reference.gb" +lat_longs: "lat_longs.tsv" +color_ordering: "color_ordering.tsv" +color_scheme: "color_schemes.tsv" +description: "description.md" +clades: "clades.tsv" +tree_mask: "tree_mask.tsv" # Use `accession` as the ID column since `strain` currently contains duplicates¹. # ¹ https://github.com/nextstrain/mpox/issues/33 @@ -94,4 +94,4 @@ recency: true mask: from_beginning: 1350 from_end: 6422 - maskfile: "defaults/mask_overview.bed" + maskfile: "mask_overview.bed" diff --git a/phylogenetic/rules/annotate_phylogeny.smk b/phylogenetic/rules/annotate_phylogeny.smk index a94433f3..cd65d186 100644 --- a/phylogenetic/rules/annotate_phylogeny.smk +++ b/phylogenetic/rules/annotate_phylogeny.smk @@ -60,7 +60,7 @@ rule translate: input: tree=build_dir + "/{build_name}/tree.nwk", node_data=build_dir + "/{build_name}/nt_muts.json", - genome_annotation=config["genome_annotation"], + genome_annotation=resolve_config_path(config["genome_annotation"]), output: node_data=build_dir + "/{build_name}/aa_muts.json", log: @@ -120,7 +120,7 @@ rule clades: tree=build_dir + "/{build_name}/tree.nwk", aa_muts=build_dir + "/{build_name}/aa_muts.json", nuc_muts=build_dir + "/{build_name}/nt_muts.json", - clades=config["clades"], + clades=resolve_config_path(config["clades"]), output: node_data=build_dir + "/{build_name}/clades_raw.json", log: @@ -154,7 +154,7 @@ rule rename_clades: r""" exec &> >(tee {log:q}) - python scripts/clades_renaming.py \ + python {workflow.basedir}/scripts/clades_renaming.py \ --input-node-data {input:q} \ --output-node-data {output.node_data:q} """ @@ -180,7 +180,7 @@ rule assign_clades_via_metadata: r""" exec &> >(tee {log:q}) - python scripts/assign-clades-via-metadata.py \ + python {workflow.basedir}/scripts/assign-clades-via-metadata.py \ --metadata {input.metadata:q} \ --tree {input.tree:q} \ --output-node-data {output.node_data:q} @@ -201,7 +201,7 @@ rule mutation_context: r""" exec &> >(tee {log:q}) - python3 scripts/mutation_context.py \ + python3 {workflow.basedir}/scripts/mutation_context.py \ --tree {input.tree:q} \ --mutations {input.node_data:q} \ --output {output.node_data:q} @@ -226,7 +226,7 @@ rule recency: r""" exec &> >(tee {log:q}) - python3 scripts/construct-recency-from-submission-date.py \ + python3 {workflow.basedir}/scripts/construct-recency-from-submission-date.py \ --metadata {input.metadata:q} \ --metadata-id-columns {params.strain_id:q} \ --output {output:q} 2>&1 diff --git a/phylogenetic/rules/config.smk b/phylogenetic/rules/config.smk index 0d94d6b4..0dedb67c 100644 --- a/phylogenetic/rules/config.smk +++ b/phylogenetic/rules/config.smk @@ -7,6 +7,9 @@ from textwrap import dedent, indent from typing import Union +include: "../../shared/vendored/snakemake/config.smk" + + def as_list(config_param: Union[list,str]) -> list: if isinstance(config_param, list): return config_param diff --git a/phylogenetic/rules/construct_phylogeny.smk b/phylogenetic/rules/construct_phylogeny.smk index f9609cff..40719785 100644 --- a/phylogenetic/rules/construct_phylogeny.smk +++ b/phylogenetic/rules/construct_phylogeny.smk @@ -21,7 +21,7 @@ rule tree: """ input: alignment=build_dir + "/{build_name}/masked.fasta", - tree_mask=config["tree_mask"], + tree_mask=resolve_config_path(config["tree_mask"]), output: tree=build_dir + "/{build_name}/tree_raw.nwk", threads: workflow.cores @@ -64,7 +64,7 @@ rule fix_tree: r""" exec &> >(tee {log:q}) - python3 scripts/fix_tree.py \ + python3 {workflow.basedir}/scripts/fix_tree.py \ --alignment {input.alignment:q} \ --input-tree {input.tree:q} \ {params.root} \ diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk index 01ed2699..0cf5a9f8 100644 --- a/phylogenetic/rules/export.smk +++ b/phylogenetic/rules/export.smk @@ -43,7 +43,7 @@ rule remove_time: r""" exec &> >(tee {log:q}) - python3 scripts/remove_timeinfo.py \ + python3 {workflow.basedir}/scripts/remove_timeinfo.py \ --input-node-data {input:q} \ --output-node-data {output:q} """ @@ -51,8 +51,8 @@ rule remove_time: rule colors: input: - ordering=config["color_ordering"], - color_schemes=config["color_scheme"], + ordering=resolve_config_path(config["color_ordering"]), + color_schemes=resolve_config_path(config["color_scheme"]), metadata=build_dir + "/{build_name}/metadata.tsv", output: colors=build_dir + "/{build_name}/colors.tsv", @@ -66,7 +66,7 @@ rule colors: r""" exec &> >(tee {log:q}) - python3 scripts/assign-colors.py \ + python3 {workflow.basedir}/scripts/assign-colors.py \ --ordering {input.ordering:q} \ --color-schemes {input.color_schemes:q} \ --output {output.colors:q} \ @@ -102,9 +102,9 @@ rule export: else [] ), colors=build_dir + "/{build_name}/colors.tsv", - lat_longs=config["lat_longs"], - description=config["description"], - auspice_config=config["auspice_config"], + lat_longs=resolve_config_path(config["lat_longs"]), + description=resolve_config_path(config["description"]), + auspice_config=resolve_config_path(config["auspice_config"]), output: auspice_json=build_dir + "/{build_name}/tree.json", root_sequence=build_dir + "/{build_name}/tree_root-sequence.json", diff --git a/phylogenetic/rules/prepare_sequences.smk b/phylogenetic/rules/prepare_sequences.smk index 9b7abc02..c0e4d058 100644 --- a/phylogenetic/rules/prepare_sequences.smk +++ b/phylogenetic/rules/prepare_sequences.smk @@ -69,7 +69,7 @@ rule filter: input: sequences="data/sequences.fasta", metadata="data/metadata.tsv", - exclude=config["exclude"], + exclude=resolve_config_path(config["exclude"]), output: sequences=build_dir + "/{build_name}/good_sequences.fasta", metadata=build_dir + "/{build_name}/good_metadata.tsv", @@ -128,7 +128,7 @@ rule add_private_data: r""" exec &> >(tee {log:q}) - python3 scripts/combine_data_sources.py \ + python3 {workflow.basedir}/scripts/combine_data_sources.py \ --metadata nextstrain={input.metadata:q} private={input.private_metadata:q} \ --sequences {input.sequences:q} {input.private_sequences:q} \ --output-metadata {output.metadata:q} \ @@ -182,7 +182,7 @@ rule combine_samples: if config.get("private_metadata", False) else build_dir + "/{build_name}/good_metadata.tsv" ), - include=config["include"], + include=resolve_config_path(config["include"]), output: sequences=build_dir + "/{build_name}/filtered.fasta", metadata=build_dir + "/{build_name}/metadata.tsv", @@ -221,7 +221,7 @@ rule reverse_reverse_complements: r""" exec &> >(tee {log:q}) - python3 scripts/reverse_reversed_sequences.py \ + python3 {workflow.basedir}/scripts/reverse_reversed_sequences.py \ --metadata {input.metadata:q} \ --sequences {input.sequences:q} \ --output {output:q} @@ -234,8 +234,8 @@ rule align: """ input: sequences=build_dir + "/{build_name}/reversed.fasta", - reference=config["reference"], - genome_annotation=config["genome_annotation"], + reference=resolve_config_path(config["reference"]), + genome_annotation=resolve_config_path(config["genome_annotation"]), output: alignment=build_dir + "/{build_name}/aligned.fasta", params: @@ -279,7 +279,7 @@ rule mask: """ input: sequences=build_dir + "/{build_name}/aligned.fasta", - mask=config["mask"]["maskfile"], + mask=resolve_config_path(config["mask"]["maskfile"]), output: build_dir + "/{build_name}/masked.fasta", params: