diff --git a/CHANGELOG.md b/CHANGELOG.md index f5f6d8aa2..0b6384bb0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Changed +- The default `min_allowed_nodes_pct` in `adaptive_core_expansion` has been changed from 0.8 to 0.9, meaning that a valid "high" core partition must include at least 90% of all nodes. Components that don't meet this criteria will not be denoised by ACE. + ## [0.29.0] - 2026-06-15 ### Added diff --git a/src/pixelator/pna/graph/adaptive_core_expansion.py b/src/pixelator/pna/graph/adaptive_core_expansion.py index 34a276e4c..aaa418c03 100644 --- a/src/pixelator/pna/graph/adaptive_core_expansion.py +++ b/src/pixelator/pna/graph/adaptive_core_expansion.py @@ -257,7 +257,7 @@ def adaptive_core_expansion( max_iter: int = 200, min_seed_pct: float = 0.1, nodes_to_move_threshold: int = 10, - min_allowed_nodes_pct: float = 0.8, + min_allowed_nodes_pct: float = 0.9, select_lcc: bool = True, ) -> Graph: """Perform Adaptive Core Expansion (ACE) graph partitioning. diff --git a/tests/pna/denoise/test_denoise.py b/tests/pna/denoise/test_denoise.py index 3ca4e68c3..fd37f2155 100644 --- a/tests/pna/denoise/test_denoise.py +++ b/tests/pna/denoise/test_denoise.py @@ -513,7 +513,8 @@ def f(*args, **kwargs): REFERENCE_ACE_COMPONENT = "57129a8b0fff38c6" -REFERENCE_ACE_LOW_NODE_COUNT = 5436 +REFERENCE_ACE_COMPONENT_HIGH_QUALITY = "c4c3ef9497b3746d" +REFERENCE_ACE_LOW_NODE_COUNT = 32 def test_denoise_pls_reference_component_runs_and_cleans_coreness(denoise_pxl_dataset): @@ -588,13 +589,14 @@ def test_denoise_ace_reference_component(denoise_pxl_dataset): denoise_pxl_dataset: Denoise pxl dataset. """ comp_graph = PNAGraph.from_edgelist( - denoise_pxl_dataset.filter(components=[REFERENCE_ACE_COMPONENT]) + denoise_pxl_dataset.filter(components=[REFERENCE_ACE_COMPONENT_HIGH_QUALITY]) .edgelist() .to_polars() .lazy() ) removed = denoise_ace(comp_graph) assert removed != [None] + a = len(removed) assert len(removed) == REFERENCE_ACE_LOW_NODE_COUNT partitions = nx.get_node_attributes(comp_graph.raw, "partition") low_ids = {n for n, p in partitions.items() if p == "low"} @@ -625,7 +627,9 @@ def test_denoise_ace_analysis(denoise_pxl_dataset, tmp_path): == obs["number_of_nodes_removed_in_denoise"] ).all() assert int( - obs.loc[REFERENCE_ACE_COMPONENT, "denoised_nodes_marked_only_by_ace"] + obs.loc[ + REFERENCE_ACE_COMPONENT_HIGH_QUALITY, "denoised_nodes_marked_only_by_ace" + ] ) == (REFERENCE_ACE_LOW_NODE_COUNT) assert ( @@ -633,13 +637,13 @@ def test_denoise_ace_analysis(denoise_pxl_dataset, tmp_path): ) # ACE-only denoising with LCC seed should not produce stranded nodes orig_graph = PNAGraph.from_edgelist( - denoise_pxl_dataset.filter(components=[REFERENCE_ACE_COMPONENT]) + denoise_pxl_dataset.filter(components=[REFERENCE_ACE_COMPONENT_HIGH_QUALITY]) .edgelist() .to_polars() .lazy() ) denoised_graph = PNAGraph.from_edgelist( - denoised_dataset.filter(components=[REFERENCE_ACE_COMPONENT]) + denoised_dataset.filter(components=[REFERENCE_ACE_COMPONENT_HIGH_QUALITY]) .edgelist() .to_polars() .lazy() diff --git a/tests/pna/denoise/test_denoise_cli.py b/tests/pna/denoise/test_denoise_cli.py index 9af480201..8c618d55b 100644 --- a/tests/pna/denoise/test_denoise_cli.py +++ b/tests/pna/denoise/test_denoise_cli.py @@ -72,8 +72,7 @@ def test_denoise_ace_cli_runs_ok(denoise_pxl_file): result = read(out_pxl) obs = result.adata().obs assert ( - int(obs.loc["57129a8b0fff38c6", "denoised_nodes_marked_only_by_ace"]) - == 5436 + int(obs.loc["c4c3ef9497b3746d", "denoised_nodes_marked_only_by_ace"]) == 32 ) summary_cols = [ "denoised_nodes_marked_only_by_ace",