From b2b413a3bfcf8d2f4c8c57d35051298eacd6213b Mon Sep 17 00:00:00 2001
From: Steve Lianoglou <slianoglou@gmail.com>
Date: Sat, 1 Jun 2024 12:27:09 -0700
Subject: [PATCH] Handle case when group_file doesn't have any confounders

When no confounder columns were present in `group_file`, there would be multiple errors. This is an attempt to fix that.

1. A default `confounders = None` is set
2. A chunk of code that manipulates the `confounders` DataFrame was pushed up into the `if len(meta.columns) > 2` block
3. If no confounders are present, the `counts` DataFrame is subset to just include the samples defined in `meta`

Please note I'm not a Python or pandas guru, so I'm sure there may be better (more idiomatic) ways to do this, but this seems to do what I think is intended to setup the ultimate call to `differential_splicing()`
---
 .../differential_splicing/leafcutter_ds.py    | 24 ++++++++++---------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/leafcutter/differential_splicing/leafcutter_ds.py b/leafcutter/differential_splicing/leafcutter_ds.py
index 7739b0f..b1168de 100644
--- a/leafcutter/differential_splicing/leafcutter_ds.py
+++ b/leafcutter/differential_splicing/leafcutter_ds.py
@@ -50,6 +50,7 @@
 meta = meta.rename(dict(zip([0, 1], ["sample", "group"])), axis = 1)
 
 # Check if there are more than 2 columns in the metadata DataFrame
+confounders = None
 if len(meta.columns) > 2:
     # Extract the confounders (columns 3 and onwards)
     confounders = meta.iloc[:, 2:]
@@ -71,17 +72,18 @@
     #remove samples with missing data (not in origial leafcutter as far as I can tell).
     confounders = pd.DataFrame(confounders, index = meta['sample']).dropna()
 
-all_samples = set(meta.loc[:,'sample'])
-removed_samples = all_samples - set(confounders.index)
-if len(removed_samples) != 0:
-    print('Samples removed due to missing values in covariates...')
-    print(','.join(list(removed_samples)))
-
-# Encode the "group" column as numeric (0 and 1)
-meta = meta[meta['sample'].isin(confounders.index)]
-
-#if permute: numeric_x = np.random.permutation(numeric_x)
-counts = counts[confounders.index]
+    # indented to here to fix confounders
+    all_samples = set(meta.loc[:,'sample'])
+    removed_samples = all_samples - set(confounders.index)
+    if len(removed_samples) != 0:
+        print('Samples removed due to missing values in covariates...')
+        print(','.join(list(removed_samples)))
+    # Encode the "group" column as numeric (0 and 1)
+    meta = meta[meta['sample'].isin(confounders.index)]
+    #if permute: numeric_x = np.random.permutation(numeric_x)
+    counts = counts[confounders.index]
+else:
+    counts = counts[meta["sample"].tolist()]
 
 scale_factor = 1.