From 7ce788617a3fdfcb216d055db02f2438bd627015 Mon Sep 17 00:00:00 2001
From: Joshua Gould <joshua-gould@users.noreply.github.com>
Date: Thu, 19 Feb 2026 10:30:29 -0500
Subject: [PATCH 1/2] keep track what complexes pairs belong to

---
 scallops/features/map_eval.py | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/scallops/features/map_eval.py b/scallops/features/map_eval.py
index 28b47b2..806c298 100644
--- a/scallops/features/map_eval.py
+++ b/scallops/features/map_eval.py
@@ -1,3 +1,4 @@
+from collections import defaultdict
 from typing import Literal
 
 import anndata
@@ -30,22 +31,31 @@ def read_corum(path: str) -> pd.DataFrame:
 
     :param path: Path to CORUM CSV (e.g. corum_humanComplexes.txt). Available from
         https://mips.helmholtz-muenchen.de/corum/download
-    :return: Dataframe containing pairs of genes found in CORUM
+    :return: Dataframe containing pairs of genes found and complexes they belong to
     """
 
-    corum_gene_names = pd.read_csv(path, usecols=["subunits_gene_name"], sep="\t")[
-        "subunits_gene_name"
-    ].values
+    df = pd.read_csv(path, usecols=["complex_name", "subunits_gene_name"], sep="\t")
+    corum_gene_names = df["subunits_gene_name"].values
+    complex_names = df["complex_name"].values
     pairs = set()
+    pair_to_complex_names = defaultdict(set)
+
     for i in range(len(corum_gene_names)):
         cluster = corum_gene_names[i].split(";")
+        complex_name = complex_names[i]
         for j in range(len(cluster)):
             for k in range(j):
-                pairs.add((cluster[j], cluster[k]))
-                pairs.add((cluster[k], cluster[j]))
+                p1 = (cluster[j], cluster[k])
+                p2 = (cluster[k], cluster[j])
+                pairs.add(p1)
+                pairs.add(p2)
+                pair_to_complex_names[p1].add(complex_name)
+                pair_to_complex_names[p2].add(complex_name)
     a = []
     b = []
+    c = []
     for p in pairs:
         a.append(p[0])
         b.append(p[1])
-    return pd.DataFrame(data=dict(a=a, b=b))
+        c.append(pair_to_complex_names[p])
+    return pd.DataFrame(data=dict(a=a, b=b, complex_name=c))

From cb0cacbeed5c3d2df6a4d53a28469be12a58574f Mon Sep 17 00:00:00 2001
From: Joshua Gould <joshua-gould@users.noreply.github.com>
Date: Fri, 20 Feb 2026 11:26:31 -0500
Subject: [PATCH 2/2] Fixed conflicts with main

---
 scallops/features/map_eval.py | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/scallops/features/map_eval.py b/scallops/features/map_eval.py
index 91c1e5a..c341a51 100644
--- a/scallops/features/map_eval.py
+++ b/scallops/features/map_eval.py
@@ -1,3 +1,4 @@
+from collections import defaultdict
 from collections.abc import Callable, Sequence
 from typing import Literal
 
@@ -96,22 +97,31 @@ def read_corum(path: str) -> pd.DataFrame:
 
     :param path: Path to CORUM CSV (e.g. corum_humanComplexes.txt). Available from
         https://mips.helmholtz-muenchen.de/corum/download
-    :return: Dataframe containing pairs of genes found in CORUM
+    :return: Dataframe containing pairs of genes found and complexes they belong to
     """
 
-    corum_gene_names = pd.read_csv(path, usecols=["subunits_gene_name"], sep="\t")[
-        "subunits_gene_name"
-    ].values
+    df = pd.read_csv(path, usecols=["complex_name", "subunits_gene_name"], sep="\t")
+    corum_gene_names = df["subunits_gene_name"].values
+    complex_names = df["complex_name"].values
     pairs = set()
+    pair_to_complex_names = defaultdict(set)
+
     for i in range(len(corum_gene_names)):
         cluster = corum_gene_names[i].split(";")
+        complex_name = complex_names[i]
         for j in range(len(cluster)):
             for k in range(j):
-                pairs.add((cluster[j], cluster[k]))
-                pairs.add((cluster[k], cluster[j]))
+                p1 = (cluster[j], cluster[k])
+                p2 = (cluster[k], cluster[j])
+                pairs.add(p1)
+                pairs.add(p2)
+                pair_to_complex_names[p1].add(complex_name)
+                pair_to_complex_names[p2].add(complex_name)
     a = []
     b = []
+    c = []
     for p in pairs:
         a.append(p[0])
         b.append(p[1])
-    return pd.DataFrame(data=dict(a=a, b=b))
+        c.append(pair_to_complex_names[p])
+    return pd.DataFrame(data=dict(a=a, b=b, complex_name=c))