diff --git a/code/reference_data/rss_ld_sketch.ipynb b/code/reference_data/rss_ld_sketch.ipynb index 53b626c5..c35d080f 100644 --- a/code/reference_data/rss_ld_sketch.ipynb +++ b/code/reference_data/rss_ld_sketch.ipynb @@ -698,7 +698,9 @@ "# Steps per chromosome:\n", "# 1. For each block: plink2 --import-dosage → unsorted pgen, then --sort-vars → sorted pgen\n", "# 2. plink2 --pmerge-list → one per-chrom pgen\n", - "# 3. Clean up all intermediates including -merge.* files\n", + "# 3. Concatenate .afreq\n", + "# 4. Clean up intermediates including -merge.* files and per-block dirs\n", + "# 5. zstd compress final pgen/pvar/psam/afreq\n", "#\n", "# Run after process_block completes (or run together: sos run ... process_block merge_chrom)\n", "parameter: chrom = 0\n", @@ -719,7 +721,7 @@ "_chroms = _chroms_to_process(output_dir, chrom)\n", "\n", "input: []\n", - "output: [f\"{output_dir}/{c}/{cohort_id}.{c}.pgen\" for c in _chroms]\n", + "output: [f\"{output_dir}/{c}/{cohort_id}.{c}.pgen.zst\" for c in _chroms]\n", "\n", "for _chrom in _chroms:\n", " bash(f\"\"\"\n", @@ -791,6 +793,13 @@ " for block_dir in \"${{chrom_dir}}\"/*/; do\n", " rm -rf \"${{block_dir}}\"\n", " done\n", + "\n", + " # ── Step 5: zstd compress final outputs ──\n", + " for ext in pgen pvar psam afreq; do\n", + " zstd --ultra -22 -T{numThreads} \"${{final_prefix}}.${{ext}}\" \\\n", + " -o \"${{final_prefix}}.${{ext}}.zst\" \\\n", + " && rm -f \"${{final_prefix}}.${{ext}}\"\n", + " done\n", " \"\"\")" ] }