diff --git a/ingest/Snakefile b/ingest/Snakefile index ac5e56fb..38731495 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -55,7 +55,25 @@ rule extract_open_data: augur filter --metadata {input.metadata} \ --sequences {input.sequences} \ --metadata-id-columns PPX_accession \ - --exclude-where "dataUseTerms=RESTRICTED" \ + --include-where "dataUseTerms=OPEN" \ + --output-metadata {output.metadata} \ + --output-sequences {output.sequences} + """ + + +rule extract_restricted_data: + input: + metadata="results/metadata.tsv", + sequences="results/sequences.fasta", + output: + metadata="results/metadata_restricted.tsv", + sequences="results/sequences_restricted.fasta", + shell: + """ + augur filter --metadata {input.metadata} \ + --sequences {input.sequences} \ + --metadata-id-columns PPX_accession \ + --include-where "dataUseTerms=RESTRICTED" \ --output-metadata {output.metadata} \ --output-sequences {output.sequences} """ diff --git a/ingest/build-configs/nextstrain-automation/config.yaml b/ingest/build-configs/nextstrain-automation/config.yaml index b9f9b9a2..41aa6971 100644 --- a/ingest/build-configs/nextstrain-automation/config.yaml +++ b/ingest/build-configs/nextstrain-automation/config.yaml @@ -16,14 +16,15 @@ upload: # to avoid duplicate files with different compressions files_to_upload: ppx_with_restricted.ndjson.zst: results/ppx.ndjson.zst - metadata_with_restricted.tsv.zst: results/metadata.tsv - sequences_with_restricted.fasta.zst: results/sequences.fasta + nextclade_with_restricted.tsv.zst: results/nextclade.tsv alignment_with_restricted.fasta.zst: results/alignment.fasta translations_with_restricted.zip: results/translations.zip metadata.tsv.zst: results/metadata_open.tsv sequences.fasta.zst: results/sequences_open.fasta + metadata_restricted.tsv.zst: results/metadata_restricted.tsv + sequences_restricted.tsv.zst: results/sequences_restricted.fasta cloudfront_domain: 'data.nextstrain.org' diff --git a/phylogenetic/defaults/clade-i/config.yaml b/phylogenetic/defaults/clade-i/config.yaml index 3ecc34a8..79c97005 100644 --- a/phylogenetic/defaults/clade-i/config.yaml +++ b/phylogenetic/defaults/clade-i/config.yaml @@ -1,7 +1,10 @@ inputs: - - name: pathoplexus - metadata: "https://data.nextstrain.org/files/workflows/mpox/metadata_with_restricted.tsv.zst" - sequences: "https://data.nextstrain.org/files/workflows/mpox/sequences_with_restricted.fasta.zst" + - name: ppx_open + metadata: "https://data.nextstrain.org/files/workflows/mpox/metadata.tsv.zst" + sequences: "https://data.nextstrain.org/files/workflows/mpox/sequences.fasta.zst" + - name: ppx_restricted + metadata: "https://data.nextstrain.org/files/workflows/mpox/metadata_restricted.tsv.zst" + sequences: "https://data.nextstrain.org/files/workflows/mpox/sequences_restricted.fasta.zst" reference: "defaults/clade-i/reference.fasta" genome_annotation: "defaults/clade-i/genome_annotation.gff3" diff --git a/phylogenetic/defaults/hmpxv1/config.yaml b/phylogenetic/defaults/hmpxv1/config.yaml index d4b556d4..9aab378f 100644 --- a/phylogenetic/defaults/hmpxv1/config.yaml +++ b/phylogenetic/defaults/hmpxv1/config.yaml @@ -1,7 +1,10 @@ inputs: - - name: pathoplexus - metadata: "https://data.nextstrain.org/files/workflows/mpox/metadata_with_restricted.tsv.zst" - sequences: "https://data.nextstrain.org/files/workflows/mpox/sequences_with_restricted.fasta.zst" + - name: ppx_open + metadata: "https://data.nextstrain.org/files/workflows/mpox/metadata.tsv.zst" + sequences: "https://data.nextstrain.org/files/workflows/mpox/sequences.fasta.zst" + - name: ppx_restricted + metadata: "https://data.nextstrain.org/files/workflows/mpox/metadata_restricted.tsv.zst" + sequences: "https://data.nextstrain.org/files/workflows/mpox/sequences_restricted.fasta.zst" reference: "defaults/reference.fasta" genome_annotation: "defaults/genome_annotation.gff3" diff --git a/phylogenetic/defaults/hmpxv1_big/config.yaml b/phylogenetic/defaults/hmpxv1_big/config.yaml index 8db5676b..20a90086 100644 --- a/phylogenetic/defaults/hmpxv1_big/config.yaml +++ b/phylogenetic/defaults/hmpxv1_big/config.yaml @@ -1,7 +1,10 @@ inputs: - - name: pathoplexus - metadata: "https://data.nextstrain.org/files/workflows/mpox/metadata_with_restricted.tsv.zst" - sequences: "https://data.nextstrain.org/files/workflows/mpox/sequences_with_restricted.fasta.zst" + - name: ppx_open + metadata: "https://data.nextstrain.org/files/workflows/mpox/metadata.tsv.zst" + sequences: "https://data.nextstrain.org/files/workflows/mpox/sequences.fasta.zst" + - name: ppx_restricted + metadata: "https://data.nextstrain.org/files/workflows/mpox/metadata_restricted.tsv.zst" + sequences: "https://data.nextstrain.org/files/workflows/mpox/sequences_restricted.fasta.zst" reference: "defaults/reference.fasta" genome_annotation: "defaults/genome_annotation.gff3" diff --git a/phylogenetic/defaults/mpxv/config.yaml b/phylogenetic/defaults/mpxv/config.yaml index b0508689..d8604761 100644 --- a/phylogenetic/defaults/mpxv/config.yaml +++ b/phylogenetic/defaults/mpxv/config.yaml @@ -1,7 +1,10 @@ inputs: - - name: pathoplexus - metadata: "https://data.nextstrain.org/files/workflows/mpox/metadata_with_restricted.tsv.zst" - sequences: "https://data.nextstrain.org/files/workflows/mpox/sequences_with_restricted.fasta.zst" + - name: ppx_open + metadata: "https://data.nextstrain.org/files/workflows/mpox/metadata.tsv.zst" + sequences: "https://data.nextstrain.org/files/workflows/mpox/sequences.fasta.zst" + - name: ppx_restricted + metadata: "https://data.nextstrain.org/files/workflows/mpox/metadata_restricted.tsv.zst" + sequences: "https://data.nextstrain.org/files/workflows/mpox/sequences_restricted.fasta.zst" auspice_config: "defaults/mpxv/auspice_config.json" include: "defaults/mpxv/include.txt"