638638 " G = np.array(dosage_matrix, dtype=np.float32).T # (n_samples, n_variants)\n " ,
639639 " del dosage_matrix\n " ,
640640 " G = fill_missing_col_means(G)\n " ,
641+ " \n " ,
642+ " # variant-wise scaling\n " ,
643+ " col_mean = G.mean(axis=0, keepdims=True)\n " ,
644+ " col_std = G.std(axis=0, keepdims=True)\n " ,
645+ " # avoid division by zero\n " ,
646+ " col_std[col_std == 0] = 1.0\n " ,
647+ " G = (G - col_mean) / col_std\n " ,
648+ " \n " ,
641649 " U = W.T @ G # (B, n_variants)\n " ,
642650 " del G\n " ,
643651 " \n " ,
806814 },
807815 {
808816 "cell_type" : " markdown" ,
817+ "id" : " 1968fbf4" ,
809818 "metadata" : {},
810819 "source" : [
811820 " ### Step 4 -- Load LD sketch data into R\n " ,
822831 },
823832 {
824833 "cell_type" : " code" ,
834+ "execution_count" : null ,
835+ "id" : " 8a05b576" ,
825836 "metadata" : {},
837+ "outputs" : [],
826838 "source" : [
827839 " library(pecotmr)\n " ,
828840 " \n " ,
843855 " })\n " ,
844856 " cat(\" R dimensions:\" , dim(res_r$LD_matrix), \"\\ n\" )\n " ,
845857 " res_r$LD_matrix[1:3, 1:3]"
846- ],
847- "outputs" : [],
848- "execution_count" : null
858+ ]
849859 },
850860 {
851861 "cell_type" : " markdown" ,
862+ "id" : " aad6e458" ,
852863 "metadata" : {},
853864 "source" : [
854865 " **Tested output** (ADSP R5 EUR chr22, 10000 pseudo-samples, 2Mb region, 9188 variants):\n " ,
903914 "name" : " python" ,
904915 "nbconvert_exporter" : " python" ,
905916 "pygments_lexer" : " ipython3" ,
906- "version" : " 3.12.13 "
917+ "version" : " 3.12.2 "
907918 },
908919 "sos" : {
909920 "kernels" : [
920931 },
921932 "nbformat" : 4 ,
922933 "nbformat_minor" : 5
923- }
934+ }
0 commit comments