Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions scallops/features/map_eval.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections.abc import Callable, Sequence
from typing import Literal

import anndata
Expand All @@ -6,6 +7,71 @@
from sklearn.metrics.pairwise import cosine_similarity


def recall(
true_positives_df: pd.DataFrame,
similarity_df: pd.DataFrame,
similarity_column_true_positives: str = "value",
similarity_column: str = "value",
quantiles: Sequence[float] = (0.01, 0.05),
two_sided: bool = True,
n_true_positives: Callable[[pd.DataFrame], int] = len,
) -> pd.DataFrame:
"""Compute recall at the specified quantiles.

:param true_positives_df: Dataframe containing true positive pairwise similarities
(e.g. relationships from CORUM)
:param similarity_df: Dataframe containing all pairwise similarities from which
quantiles are computed.
:param similarity_column_true_positives: Column in `true_positives_df` containing
similarity scores.
:param similarity_column: Column in `similarity_df` containing similarity scores.
:param quantiles: List of quantiles to extract relevant gene pairs.
:param two_sided: If two-sided, recall is computed at specific quantiles
and 1-quantiles.
:param n_true_positives: Function that accepts a dataframe and returns the
number of true positives. Default is the length of the dataframe
:return: Dataframe containing recall results.
"""
results = []

n_relevant = n_true_positives(true_positives_df)
quantiles_ = quantiles
if two_sided:
quantiles = np.array(quantiles)
one_minus_quantiles = 1 - np.array(quantiles)
quantiles_ = [
(quantiles[i], one_minus_quantiles[i]) for i in range(len(quantiles))
]
quantiles = np.concatenate((quantiles, one_minus_quantiles))
recall_thresholds = similarity_df[similarity_column].quantile(quantiles)
for quantile in quantiles_:
if two_sided:
threshold_low, threshold_high = (
recall_thresholds[quantile[0]],
recall_thresholds[quantile[1]],
)

df_retrieved = true_positives_df[
(true_positives_df[similarity_column_true_positives] >= threshold_high)
| (true_positives_df[similarity_column_true_positives] <= threshold_low)
]
elif quantile <= 0.5:
df_retrieved = true_positives_df[
true_positives_df[similarity_column_true_positives]
<= recall_thresholds[quantile]
]
else:
df_retrieved = true_positives_df[
true_positives_df[similarity_column_true_positives]
>= recall_thresholds[quantile]
]
n_relevant_retrieved = n_true_positives(df_retrieved)
results.append(
[quantile[0] if two_sided else quantile, n_relevant_retrieved / n_relevant]
)
return pd.DataFrame(results, columns=["quantile", "recall"])


def pairwise_similarities(
data: anndata.AnnData, metric: Literal["cosine", "pearson"] = "cosine"
) -> np.ndarray:
Expand Down