13181318 " parameter: chromosome = []\n " ,
13191319 " # Minor allele count cutoff\n " ,
13201320 " parameter: MAC = 0\n " ,
1321+ " # Specify genotype chromosome for trans analysis (e.g. --trans-geno-chromosome 5)\n " ,
1322+ " # When set, trans step loads this genotype chrom instead of the one from input_files\n " ,
1323+ " parameter: trans_geno_chromosome = \"\"\n " ,
13211324 " \n " ,
13221325 " # Specify the cis window for the up and downstream radius to analyze around the region of interest in units of bp\n " ,
13231326 " # This parameter will be set to zero if `customized_cis_windows` is provided.\n " ,
14221425 " input_chroms = [x[0] for x in input_files]\n " ,
14231426 " input_files = [x[1:] for x in input_files]\n " ,
14241427 " if len(name) == 0:\n " ,
1425- " name = f'{path(input_files[0][0]):bnn}' if len(input_files) == 1 else f'{path(input_files[0][0]):bnnn}'\n "
1428+ " name = f'{path(input_files[0][0]):bnn}' if len(input_files) == 1 else f'{path(input_files[0][0]):bnnn}'\n " ,
1429+ " "
14261430 ]
14271431 },
14281432 {
18691873 " parameter: pval = 0.0\n " ,
18701874 " \n " ,
18711875 " input: input_files, group_by = len(input_files[0]), group_with = \" input_chroms\"\n " ,
1872- " output: nominal = f'{cwd:a}/{_input[0]:bnn}{\" _ %s\" % input_chroms[_index] if input_chroms[_index] != 0 else \"\" }.trans_qtl{\" _p_%.0e\" % pval if pval > 0.0 else \"\" }.pairs.tsv.gz'\n " ,
1876+ " output: nominal = f'{cwd:a}/{_input[0]:bnn}{\" _chr %s\" % input_chroms[_index] if input_chroms[_index] != 0 else \"\" }{ \" _geno_chr%s \" % trans_geno_chromosome if trans_geno_chromosome else \"\" }.trans_qtl{\" _p_%.0e\" % pval if pval > 0.0 else \"\" }.pairs.tsv.gz'\n " ,
18731877 " \n " ,
18741878 " task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'\n " ,
18751879 " python: expand= \" $[ ]\" , stderr = f'{_output[0]}.stderr', stdout = f'{_output[0]}.stdout',container =container\n " ,
18961900 " \n " ,
18971901 " ## Analyze only the regions listed\n " ,
18981902 " if $[region_list.is_file()]:\n " ,
1899- " region = pd.read_csv(\" $[region_list:a]\" )\n " ,
1903+ " region = pd.read_csv(\" $[region_list:a]\" , comment= \" # \" , header=None, sep= \"\\ t \" )\n " ,
19001904 " phenotype_column = 1 if len(region.columns) == 1 else $[region_list_phenotype_column]\n " ,
19011905 " keep_region = region.iloc[:, phenotype_column-1].astype(str).str.strip().to_list()\n " ,
19021906 " phenotype_df = phenotype_df[phenotype_df.index.isin(keep_region)]\n " ,
19481952 " covariates_df = covariates_df[keep_cols]\n " ,
19491953 " \n " ,
19501954 " \n " ,
1955+ " ## If trans_geno_chromosome is specified, load that genotype chrom instead\n " ,
1956+ " trans_geno_chrom = \" $[trans_geno_chromosome]\"\n " ,
1957+ " if trans_geno_chrom:\n " ,
1958+ " trans_geno_chrom = trans_geno_chrom.replace(\" chr\" , \"\" )\n " ,
1959+ " geno_file_str = \" $[genotype_file:a]\"\n " ,
1960+ " if geno_file_str.endswith(\" .bed\" ) or geno_file_str.endswith(\" .pgen\" ):\n " ,
1961+ " # Single plink file: load as-is, will filter variants by chrom after loading\n " ,
1962+ " print(f\" Trans mode: will filter genotype to chr{trans_geno_chrom} after loading\" )\n " ,
1963+ " else:\n " ,
1964+ " # Manifest file: find the plink file for the specified chromosome\n " ,
1965+ " geno_list_df = pd.read_csv(geno_file_str, sep=\"\\ t\" )\n " ,
1966+ " geno_list_df[\" #id\" ] = [x.replace(\" chr\" ,\"\" ) for x in geno_list_df[\" #id\" ].astype(str)]\n " ,
1967+ " geno_row = geno_list_df[geno_list_df[\" #id\" ] == trans_geno_chrom]\n " ,
1968+ " if len(geno_row) == 0:\n " ,
1969+ " raise ValueError(f\" No genotype file found for chromosome {trans_geno_chrom}\" )\n " ,
1970+ " geno_plink = geno_row[\" #path\" ].values[0]\n " ,
1971+ " if geno_plink.endswith(\" .bed\" ):\n " ,
1972+ " geno_plink = geno_plink[:-4]\n " ,
1973+ " elif geno_plink.endswith(\" .pgen\" ):\n " ,
1974+ " geno_plink = geno_plink[:-5]\n " ,
1975+ " plink_prefix_path = geno_plink\n " ,
1976+ " print(f\" Trans mode: overriding genotype to chr{trans_geno_chrom} from {plink_prefix_path}\" )\n " ,
1977+ " \n " ,
19511978 " genotype_df, variant_df = genotypeio.load_genotypes(plink_prefix_path, dosages = True) \n " ,
1979+ " \n " ,
1980+ " ## Filter genotype to specified chromosome if trans_geno_chrom is set\n " ,
1981+ " if trans_geno_chrom:\n " ,
1982+ " chrom_filter = variant_df[\" chrom\" ].astype(str).str.replace(\" chr\" , \"\" ) == trans_geno_chrom\n " ,
1983+ " if chrom_filter.sum() == 0:\n " ,
1984+ " raise ValueError(f\" No variants found for chromosome {trans_geno_chrom} in genotype data\" )\n " ,
1985+ " variant_df = variant_df[chrom_filter]\n " ,
1986+ " genotype_df = genotype_df.loc[variant_df.index]\n " ,
1987+ " print(f\" Trans mode: filtered to {len(variant_df)} variants on chr{trans_geno_chrom}\" )\n " ,
19521988 " ## use custom sample list to subset the covariates data\n " ,
19531989 " if $[keep_sample.is_file()]:\n " ,
19541990 " sample_list = pd.read_csv(\" $[keep_sample:a]\" , comment=\" #\" , header=None, names=[\" sample_id\" ], sep=\"\\ t\" )\n " ,
21782214 " \n " ,
21792215 " bash: expand= \" $[ ]\" , stderr = f'{_output[\" nominal\" ]:nnn}.stderr', stdout = f'{_output[\" nominal\" ]:nnn}.stdout', container = container\n " ,
21802216 " bgzip --compress-level 9 $[_output[\" nominal\" ]:n]\n " ,
2181- " tabix -S 1 -s 1 -b 2 -e 2 $[_output[\" nominal\" ]]\n "
2217+ " tabix -S 1 -s 1 -b 2 -e 2 $[_output[\" nominal\" ]]\n " ,
2218+ " "
21822219 ]
21832220 }
21842221 ],
22172254 },
22182255 "nbformat" : 4 ,
22192256 "nbformat_minor" : 4
2220- }
2257+ }
0 commit comments