|
623 | 623 | " fh.write(f\"n_all_na={n_all_na}\\nn_high_msng={n_high_msng}\\n\")\n", |
624 | 624 | " fh.write(f\"n_low_maf={n_low_maf}\\nn_low_mac={n_low_mac}\\n\")\n", |
625 | 625 | "\n", |
626 | | - " # Write .afreq\n", |
627 | | - " with open(afreq_path, \"w\") as fh:\n", |
628 | | - " fh.write(\"#CHROM\\tID\\tREF\\tALT\\tALT_FREQS\\tOBS_CT\\n\")\n", |
629 | | - " for v in var_info:\n", |
630 | | - " fh.write(f\"{v['chr']}\\t{v['id']}\\t{v['ref']}\\t{v['alt']}\\t\"\n", |
631 | | - " f\"{v['af']:.6f}\\t{v['obs_ct']}\\n\")\n", |
632 | 626 | "\n", |
633 | 627 | " # Build G from collected dosages, compute U = $W^T G$, write dosage.gz\n", |
634 | 628 | " # Dosage format=1: ID ALT REF val_S1 ... val_SB\n", |
|
656 | 650 | " U = 2.0 * (U - col_min) / denom\n", |
657 | 651 | " U = np.round(U, 4)\n", |
658 | 652 | "\n", |
| 653 | + " # Record the col min and max for U\n", |
| 654 | + "\n", |
| 655 | + " with open(afreq_path, \"w\") as fh:\n", |
| 656 | + " # Add column headers\n", |
| 657 | + " fh.write(\"#CHROM\\tID\\tREF\\tALT\\tALT_FREQS\\tOBS_CT\\tU_MIN\\tU_MAX\\n\")\n", |
| 658 | + " for j, v in enumerate(var_info):\n", |
| 659 | + " fh.write(f\"{v['chr']}\\t{v['id']}\\t{v['ref']}\\t{v['alt']}\\t\"\n", |
| 660 | + " f\"{v['af']:.6f}\\t{v['obs_ct']}\\t\"\n", |
| 661 | + " f\"{col_min[j]:.6f}\\t{col_max[j]:.6f}\\n\")\n", |
| 662 | + "\n", |
659 | 663 | " with gzip.open(dosage_path, \"wt\", compresslevel=4) as gz:\n", |
660 | 664 | " for j, v in enumerate(var_info):\n", |
661 | 665 | " vals = \" \".join(f\"{x:.4f}\" for x in U[:, j])\n", |
|
779 | 783 | " fi\n", |
780 | 784 | " done\n", |
781 | 785 | "\n", |
| 786 | + " # Step 4: Add U_MIN and U_MAX to the final .pvar file\n", |
| 787 | + " # Extract ID, MIN, and MAX from the concatenated .afreq (columns 2, 7, 8)\n", |
| 788 | + " awk 'NR > 1 {print $2, $7, $8}' \"${final_prefix}.afreq\" > \"${final_prefix}.recovery_params\"\n", |
| 789 | + "\n", |
| 790 | + " # Decompress the .pvar.zst \n", |
| 791 | + " $[plink2_bin] --zst-decompress \"${final_prefix}.pvar.zst\" > \"${final_prefix}.pvar.tmp\"\n", |
| 792 | + "\n", |
| 793 | + " awk 'NR==FNR {min[$1]=$2; max[$1]=$3; next} \n", |
| 794 | + " /^#/ {print $0 \"\\tU_MIN\\tU_MAX\"; next} \n", |
| 795 | + " {if ($3 in min) print $0 \"\\t\" min[$3] \"\\t\" max[$3]; else print $0 \"\\tNA\\tNA\"}' \\\n", |
| 796 | + " \"${final_prefix}.recovery_params\" \"${final_prefix}.pvar.tmp\" > \"${final_prefix}.pvar\"\n", |
| 797 | + "\n", |
| 798 | + " # Replace the compressed file with the updated text file\n", |
| 799 | + " rm \"${final_prefix}.pvar.tmp\" \"${final_prefix}.pvar.zst\" \"${final_prefix}.recovery_params\"\n", |
782 | 800 | "\n", |
783 | 801 | "\n", |
784 | 802 | "R: expand = \"$[ ]\"\n", |
|
0 commit comments