Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 42 additions & 5 deletions code/association_scan/TensorQTL/TensorQTL.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1318,6 +1318,9 @@
"parameter: chromosome = []\n",
"# Minor allele count cutoff\n",
"parameter: MAC = 0\n",
"# Specify genotype chromosome for trans analysis (e.g. --trans-geno-chromosome 5)\n",
"# When set, trans step loads this genotype chrom instead of the one from input_files\n",
"parameter: trans_geno_chromosome = \"\"\n",
"\n",
"# Specify the cis window for the up and downstream radius to analyze around the region of interest in units of bp\n",
"# This parameter will be set to zero if `customized_cis_windows` is provided.\n",
Expand Down Expand Up @@ -1422,7 +1425,8 @@
" input_chroms = [x[0] for x in input_files]\n",
" input_files = [x[1:] for x in input_files]\n",
" if len(name) == 0:\n",
" name = f'{path(input_files[0][0]):bnn}' if len(input_files) == 1 else f'{path(input_files[0][0]):bnnn}'\n"
" name = f'{path(input_files[0][0]):bnn}' if len(input_files) == 1 else f'{path(input_files[0][0]):bnnn}'\n",
""
]
},
{
Expand Down Expand Up @@ -1869,7 +1873,7 @@
"parameter: pval = 0.0\n",
"\n",
"input: input_files, group_by = len(input_files[0]), group_with = \"input_chroms\"\n",
"output: nominal = f'{cwd:a}/{_input[0]:bnn}{\"_%s\" % input_chroms[_index] if input_chroms[_index] != 0 else \"\"}.trans_qtl{\"_p_%.0e\" % pval if pval > 0.0 else \"\"}.pairs.tsv.gz'\n",
"output: nominal = f'{cwd:a}/{_input[0]:bnn}{\"_chr%s\" % input_chroms[_index] if input_chroms[_index] != 0 else \"\"}{\"_geno_chr%s\" % trans_geno_chromosome if trans_geno_chromosome else \"\"}.trans_qtl{\"_p_%.0e\" % pval if pval > 0.0 else \"\"}.pairs.tsv.gz'\n",
"\n",
"task: trunk_workers = 1, trunk_size = job_size, walltime = walltime, mem = mem, cores = numThreads, tags = f'{step_name}_{_output[0]:bn}'\n",
"python: expand= \"$[ ]\", stderr = f'{_output[0]}.stderr', stdout = f'{_output[0]}.stdout',container =container\n",
Expand All @@ -1896,7 +1900,7 @@
"\n",
" ## Analyze only the regions listed\n",
" if $[region_list.is_file()]:\n",
" region = pd.read_csv(\"$[region_list:a]\")\n",
" region = pd.read_csv(\"$[region_list:a]\", comment=\"#\", header=None, sep=\"\\t\")\n",
" phenotype_column = 1 if len(region.columns) == 1 else $[region_list_phenotype_column]\n",
" keep_region = region.iloc[:, phenotype_column-1].astype(str).str.strip().to_list()\n",
" phenotype_df = phenotype_df[phenotype_df.index.isin(keep_region)]\n",
Expand Down Expand Up @@ -1948,7 +1952,39 @@
" covariates_df = covariates_df[keep_cols]\n",
" \n",
"\n",
" ## If trans_geno_chromosome is specified, load that genotype chrom instead\n",
" trans_geno_chrom = \"$[trans_geno_chromosome]\"\n",
" if trans_geno_chrom:\n",
" trans_geno_chrom = trans_geno_chrom.replace(\"chr\", \"\")\n",
" geno_file_str = \"$[genotype_file:a]\"\n",
" if geno_file_str.endswith(\".bed\") or geno_file_str.endswith(\".pgen\"):\n",
" # Single plink file: load as-is, will filter variants by chrom after loading\n",
" print(f\"Trans mode: will filter genotype to chr{trans_geno_chrom} after loading\")\n",
" else:\n",
" # Manifest file: find the plink file for the specified chromosome\n",
" geno_list_df = pd.read_csv(geno_file_str, sep=\"\\t\")\n",
" geno_list_df[\"#id\"] = [x.replace(\"chr\",\"\") for x in geno_list_df[\"#id\"].astype(str)]\n",
" geno_row = geno_list_df[geno_list_df[\"#id\"] == trans_geno_chrom]\n",
" if len(geno_row) == 0:\n",
" raise ValueError(f\"No genotype file found for chromosome {trans_geno_chrom}\")\n",
" geno_plink = geno_row[\"#path\"].values[0]\n",
" if geno_plink.endswith(\".bed\"):\n",
" geno_plink = geno_plink[:-4]\n",
" elif geno_plink.endswith(\".pgen\"):\n",
" geno_plink = geno_plink[:-5]\n",
" plink_prefix_path = geno_plink\n",
" print(f\"Trans mode: overriding genotype to chr{trans_geno_chrom} from {plink_prefix_path}\")\n",
"\n",
" genotype_df, variant_df = genotypeio.load_genotypes(plink_prefix_path, dosages = True) \n",
"\n",
" ## Filter genotype to specified chromosome if trans_geno_chrom is set\n",
" if trans_geno_chrom:\n",
" chrom_filter = variant_df[\"chrom\"].astype(str).str.replace(\"chr\", \"\") == trans_geno_chrom\n",
" if chrom_filter.sum() == 0:\n",
" raise ValueError(f\"No variants found for chromosome {trans_geno_chrom} in genotype data\")\n",
" variant_df = variant_df[chrom_filter]\n",
" genotype_df = genotype_df.loc[variant_df.index]\n",
" print(f\"Trans mode: filtered to {len(variant_df)} variants on chr{trans_geno_chrom}\")\n",
" ## use custom sample list to subset the covariates data\n",
" if $[keep_sample.is_file()]:\n",
" sample_list = pd.read_csv(\"$[keep_sample:a]\", comment=\"#\", header=None, names=[\"sample_id\"], sep=\"\\t\")\n",
Expand Down Expand Up @@ -2178,7 +2214,8 @@
"\n",
"bash: expand= \"$[ ]\", stderr = f'{_output[\"nominal\"]:nnn}.stderr', stdout = f'{_output[\"nominal\"]:nnn}.stdout', container = container\n",
" bgzip --compress-level 9 $[_output[\"nominal\"]:n]\n",
" tabix -S 1 -s 1 -b 2 -e 2 $[_output[\"nominal\"]]\n"
" tabix -S 1 -s 1 -b 2 -e 2 $[_output[\"nominal\"]]\n",
""
]
}
],
Expand Down Expand Up @@ -2217,4 +2254,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}