From b2b413a3bfcf8d2f4c8c57d35051298eacd6213b Mon Sep 17 00:00:00 2001 From: Steve Lianoglou Date: Sat, 1 Jun 2024 12:27:09 -0700 Subject: [PATCH] Handle case when group_file doesn't have any confounders When no confounder columns were present in `group_file`, there would be multiple errors. This is an attempt to fix that. 1. A default `confounders = None` is set 2. A chunk of code that manipulates the `confounders` DataFrame was pushed up into the `if len(meta.columns) > 2` block 3. If no confounders are present, the `counts` DataFrame is subset to just include the samples defined in `meta` Please note I'm not a Python or pandas guru, so I'm sure there may be better (more idiomatic) ways to do this, but this seems to do what I think is intended to setup the ultimate call to `differential_splicing()` --- .../differential_splicing/leafcutter_ds.py | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/leafcutter/differential_splicing/leafcutter_ds.py b/leafcutter/differential_splicing/leafcutter_ds.py index 7739b0f..b1168de 100644 --- a/leafcutter/differential_splicing/leafcutter_ds.py +++ b/leafcutter/differential_splicing/leafcutter_ds.py @@ -50,6 +50,7 @@ meta = meta.rename(dict(zip([0, 1], ["sample", "group"])), axis = 1) # Check if there are more than 2 columns in the metadata DataFrame +confounders = None if len(meta.columns) > 2: # Extract the confounders (columns 3 and onwards) confounders = meta.iloc[:, 2:] @@ -71,17 +72,18 @@ #remove samples with missing data (not in origial leafcutter as far as I can tell). confounders = pd.DataFrame(confounders, index = meta['sample']).dropna() -all_samples = set(meta.loc[:,'sample']) -removed_samples = all_samples - set(confounders.index) -if len(removed_samples) != 0: - print('Samples removed due to missing values in covariates...') - print(','.join(list(removed_samples))) - -# Encode the "group" column as numeric (0 and 1) -meta = meta[meta['sample'].isin(confounders.index)] - -#if permute: numeric_x = np.random.permutation(numeric_x) -counts = counts[confounders.index] + # indented to here to fix confounders + all_samples = set(meta.loc[:,'sample']) + removed_samples = all_samples - set(confounders.index) + if len(removed_samples) != 0: + print('Samples removed due to missing values in covariates...') + print(','.join(list(removed_samples))) + # Encode the "group" column as numeric (0 and 1) + meta = meta[meta['sample'].isin(confounders.index)] + #if permute: numeric_x = np.random.permutation(numeric_x) + counts = counts[confounders.index] +else: + counts = counts[meta["sample"].tolist()] scale_factor = 1.