Skip to content

Commit e7a7b8e

Browse files
authored
Merge pull request #1307 from SamuelSun2000/main
Record the min and max to recover original U
2 parents 1442815 + 09f3f9d commit e7a7b8e

1 file changed

Lines changed: 24 additions & 6 deletions

File tree

code/reference_data/rss_ld_sketch.ipynb

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -623,12 +623,6 @@
623623
" fh.write(f\"n_all_na={n_all_na}\\nn_high_msng={n_high_msng}\\n\")\n",
624624
" fh.write(f\"n_low_maf={n_low_maf}\\nn_low_mac={n_low_mac}\\n\")\n",
625625
"\n",
626-
" # Write .afreq\n",
627-
" with open(afreq_path, \"w\") as fh:\n",
628-
" fh.write(\"#CHROM\\tID\\tREF\\tALT\\tALT_FREQS\\tOBS_CT\\n\")\n",
629-
" for v in var_info:\n",
630-
" fh.write(f\"{v['chr']}\\t{v['id']}\\t{v['ref']}\\t{v['alt']}\\t\"\n",
631-
" f\"{v['af']:.6f}\\t{v['obs_ct']}\\n\")\n",
632626
"\n",
633627
" # Build G from collected dosages, compute U = $W^T G$, write dosage.gz\n",
634628
" # Dosage format=1: ID ALT REF val_S1 ... val_SB\n",
@@ -656,6 +650,16 @@
656650
" U = 2.0 * (U - col_min) / denom\n",
657651
" U = np.round(U, 4)\n",
658652
"\n",
653+
" # Record the col min and max for U\n",
654+
"\n",
655+
" with open(afreq_path, \"w\") as fh:\n",
656+
" # Add column headers\n",
657+
" fh.write(\"#CHROM\\tID\\tREF\\tALT\\tALT_FREQS\\tOBS_CT\\tU_MIN\\tU_MAX\\n\")\n",
658+
" for j, v in enumerate(var_info):\n",
659+
" fh.write(f\"{v['chr']}\\t{v['id']}\\t{v['ref']}\\t{v['alt']}\\t\"\n",
660+
" f\"{v['af']:.6f}\\t{v['obs_ct']}\\t\"\n",
661+
" f\"{col_min[j]:.6f}\\t{col_max[j]:.6f}\\n\")\n",
662+
"\n",
659663
" with gzip.open(dosage_path, \"wt\", compresslevel=4) as gz:\n",
660664
" for j, v in enumerate(var_info):\n",
661665
" vals = \" \".join(f\"{x:.4f}\" for x in U[:, j])\n",
@@ -779,6 +783,20 @@
779783
" fi\n",
780784
" done\n",
781785
"\n",
786+
" # Step 4: Add U_MIN and U_MAX to the final .pvar file\n",
787+
" # Extract ID, MIN, and MAX from the concatenated .afreq (columns 2, 7, 8)\n",
788+
" awk 'NR > 1 {print $2, $7, $8}' \"${final_prefix}.afreq\" > \"${final_prefix}.recovery_params\"\n",
789+
"\n",
790+
" # Decompress the .pvar.zst \n",
791+
" $[plink2_bin] --zst-decompress \"${final_prefix}.pvar.zst\" > \"${final_prefix}.pvar.tmp\"\n",
792+
"\n",
793+
" awk 'NR==FNR {min[$1]=$2; max[$1]=$3; next} \n",
794+
" /^#/ {print $0 \"\\tU_MIN\\tU_MAX\"; next} \n",
795+
" {if ($3 in min) print $0 \"\\t\" min[$3] \"\\t\" max[$3]; else print $0 \"\\tNA\\tNA\"}' \\\n",
796+
" \"${final_prefix}.recovery_params\" \"${final_prefix}.pvar.tmp\" > \"${final_prefix}.pvar\"\n",
797+
"\n",
798+
" # Replace the compressed file with the updated text file\n",
799+
" rm \"${final_prefix}.pvar.tmp\" \"${final_prefix}.pvar.zst\" \"${final_prefix}.recovery_params\"\n",
782800
"\n",
783801
"\n",
784802
"R: expand = \"$[ ]\"\n",

0 commit comments

Comments
 (0)