From bf7888c7508418d9bfef005540f0c5ef0843ed46 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 28 Mar 2026 07:30:22 +0000 Subject: [PATCH 1/3] Initial plan From 749bdbbe762caf3e2dd1a32700c71d68109ddd44 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 28 Mar 2026 07:36:26 +0000 Subject: [PATCH 2/3] feat: add programmatic optimization target diagnostics Agent-Logs-Url: https://github.com/SkBlaz/py3plex/sessions/58d30b19-4fb9-4966-946a-f6c620c7e049 Co-authored-by: SkBlaz <10035780+SkBlaz@users.noreply.github.com> --- py3plex/diagnostics/__init__.py | 10 + py3plex/diagnostics/optimization_targets.py | 284 ++++++++++++++++++ .../test_diagnostics_optimization_targets.py | 71 +++++ 3 files changed, 365 insertions(+) create mode 100644 py3plex/diagnostics/optimization_targets.py create mode 100644 tests/test_diagnostics_optimization_targets.py diff --git a/py3plex/diagnostics/__init__.py b/py3plex/diagnostics/__init__.py index 9fa157f44..ce71362d8 100644 --- a/py3plex/diagnostics/__init__.py +++ b/py3plex/diagnostics/__init__.py @@ -20,6 +20,12 @@ ) from .codes import ERROR_CODES, ErrorCode from .utils import fuzzy_match, did_you_mean +from .optimization_targets import ( + OptimizationArea, + OptimizationTarget, + OptimizationTargetReport, + find_optimization_targets, +) __all__ = [ "Diagnostic", @@ -31,4 +37,8 @@ "ErrorCode", "fuzzy_match", "did_you_mean", + "OptimizationArea", + "OptimizationTarget", + "OptimizationTargetReport", + "find_optimization_targets", ] diff --git a/py3plex/diagnostics/optimization_targets.py b/py3plex/diagnostics/optimization_targets.py new file mode 100644 index 000000000..e71361320 --- /dev/null +++ b/py3plex/diagnostics/optimization_targets.py @@ -0,0 +1,284 @@ +"""Performance bottleneck diagnostics for optimization targeting. + +This module provides structured, programmatic recommendations for likely +performance bottlenecks in key workflows: +- centrality robustness computations +- null model generation +- uncertainty quantification +""" + +from __future__ import annotations + +from dataclasses import asdict, dataclass +from enum import Enum +from typing import Any, Dict, List, Optional, Sequence + +from py3plex.profiling import get_monitor + + +class OptimizationArea(str, Enum): + """Supported optimization analysis areas.""" + + CENTRALITY = "centrality" + NULL_MODELS = "null_models" + UNCERTAINTY = "uncertainty" + + +@dataclass(frozen=True) +class OptimizationTarget: + """Single bottleneck and optimization recommendation.""" + + area: OptimizationArea + function: str + file_path: str + line_range: tuple[int, int] + bottleneck: str + complexity: str + optimization_target: str + expected_gain: str + impact: str + + def to_dict(self) -> Dict[str, Any]: + """Return JSON-serializable representation.""" + data = asdict(self) + data["area"] = self.area.value + return data + + +@dataclass +class OptimizationTargetReport: + """Collection of optimization targets and optional runtime context.""" + + targets: List[OptimizationTarget] + network_stats: Dict[str, int] + profiling: Optional[Dict[str, Dict[str, float]]] = None + + def to_dict(self) -> Dict[str, Any]: + """Return JSON-serializable representation.""" + return { + "targets": [target.to_dict() for target in self.targets], + "network_stats": self.network_stats, + "profiling": self.profiling, + "count": len(self.targets), + } + + +_TARGETS: List[OptimizationTarget] = [ + # Centrality + OptimizationTarget( + area=OptimizationArea.CENTRALITY, + function="py3plex.centrality.robustness.robustness_centrality", + file_path="py3plex/centrality/robustness.py", + line_range=(164, 194), + bottleneck=( + "For each candidate node/layer, the workflow builds a fully perturbed " + "network and recomputes the metric from scratch." + ), + complexity="O(targets × metric_cost + targets × graph_copy_cost)", + optimization_target=( + "Avoid full network reconstruction per target by using in-place masking " + "or cached filtered views when evaluating removals." + ), + expected_gain="High for large networks (often multi-x).", + impact="high", + ), + OptimizationTarget( + area=OptimizationArea.CENTRALITY, + function="py3plex.centrality.robustness._compute_avg_shortest_path", + file_path="py3plex/centrality/robustness.py", + line_range=(306, 334), + bottleneck=( + "Average shortest path is recomputed component-by-component with " + "nested all-pairs traversal." + ), + complexity="O(sum over components of n_component²)", + optimization_target=( + "Use approximation/sampling for path lengths on large components and " + "cache component decomposition across repeated evaluations." + ), + expected_gain="High on large connected components.", + impact="high", + ), + # Null models + OptimizationTarget( + area=OptimizationArea.NULL_MODELS, + function="py3plex.nullmodels.executor.generate_null_model", + file_path="py3plex/nullmodels/executor.py", + line_range=(71, 84), + bottleneck=( + "Parallel samples pass the full network object to each worker, " + "creating serialization overhead." + ), + complexity="O(samples × network_serialization_cost)", + optimization_target=( + "Pass compact immutable graph payloads (e.g., edge arrays/seed only) " + "to workers and reconstruct minimal state inside worker." + ), + expected_gain="Medium to high for multiprocessing and large graphs.", + impact="high", + ), + OptimizationTarget( + area=OptimizationArea.NULL_MODELS, + function="py3plex.nullmodels.models.configuration_model", + file_path="py3plex/nullmodels/models.py", + line_range=(174, 188), + bottleneck=( + "Nodes and edges are added one-by-one in Python loops when rebuilding " + "randomized networks." + ), + complexity="O(nodes + edges) with high Python overhead", + optimization_target=( + "Batch edge/node insertion (single add_edges/add_nodes call) and avoid " + "per-edge dict creation in inner loops." + ), + expected_gain="Medium.", + impact="medium", + ), + # Uncertainty + OptimizationTarget( + area=OptimizationArea.UNCERTAINTY, + function="py3plex.uncertainty.partition.CommunityDistribution._compute_coassoc_dense", + file_path="py3plex/uncertainty/partition.py", + line_range=(280, 288), + bottleneck=( + "Exact co-association matrix uses triple-nested Python loops across " + "partitions and node pairs." + ), + complexity="O(partitions × nodes²)", + optimization_target=( + "Vectorize co-association accumulation using numpy/grouped indexing and " + "prefer sparse incremental construction for top-k workflows." + ), + expected_gain="High.", + impact="high", + ), + OptimizationTarget( + area=OptimizationArea.UNCERTAINTY, + function="py3plex.uncertainty.partition_metrics.variation_of_information", + file_path="py3plex/uncertainty/partition_metrics.py", + line_range=(104, 107), + bottleneck=( + "Mutual-information term iterates contingency table with nested Python loops." + ), + complexity="O(n_labels_partition1 × n_labels_partition2)", + optimization_target=( + "Replace nested loops with masked vectorized numpy operations over " + "non-zero contingency entries." + ), + expected_gain="Medium.", + impact="medium", + ), + OptimizationTarget( + area=OptimizationArea.UNCERTAINTY, + function="py3plex.uncertainty.selection_execution.execute_selection_uq", + file_path="py3plex/uncertainty/selection_execution.py", + line_range=(150, 204), + bottleneck=( + "Sample execution loop is serial despite independent replicate runs." + ), + complexity="O(samples × query_runtime)", + optimization_target=( + "Parallelize replicate execution with deterministic child seeds and " + "aggregate reducer state after worker completion." + ), + expected_gain="Medium to high depending on sample count.", + impact="high", + ), +] + + +def _collect_network_stats(network: Any) -> Dict[str, int]: + """Collect lightweight network statistics for report context.""" + stats = {"nodes": 0, "edges": 0, "layers": 0} + + if network is None: + return stats + + try: + if hasattr(network, "get_nodes"): + stats["nodes"] = len(network.get_nodes()) + elif hasattr(network, "nodes"): + stats["nodes"] = len(network.nodes()) + except Exception: + pass + + try: + if hasattr(network, "get_edges"): + stats["edges"] = len(network.get_edges()) + elif hasattr(network, "edges"): + stats["edges"] = len(network.edges()) + except Exception: + pass + + try: + if hasattr(network, "get_layers"): + stats["layers"] = len(network.get_layers()) + elif hasattr(network, "layers"): + stats["layers"] = len(network.layers) + except Exception: + pass + + return stats + + +def _collect_profiled_functions(targets: Sequence[OptimizationTarget]) -> Dict[str, Dict[str, float]]: + """Pull matching profile records from the global performance monitor.""" + monitor = get_monitor() + result: Dict[str, Dict[str, float]] = {} + if not monitor.enabled: + return result + + target_functions = {t.function for t in targets} + for func_name, stats in monitor.stats.items(): + if func_name in target_functions: + call_count = float(stats.get("call_count", 0.0)) + total_time = float(stats.get("total_time", 0.0)) + avg_ms = (total_time / call_count * 1000.0) if call_count > 0 else 0.0 + result[func_name] = { + "call_count": call_count, + "total_time_s": total_time, + "avg_time_ms": avg_ms, + "max_time_s": float(stats.get("max_time", 0.0)), + } + return result + + +def find_optimization_targets( + network: Any = None, + areas: Optional[Sequence[OptimizationArea | str]] = None, + include_profiling: bool = True, +) -> OptimizationTargetReport: + """Find likely performance bottlenecks and concrete optimization targets. + + Parameters + ---------- + network : Any, optional + Network object used for contextual report statistics. + areas : Sequence[OptimizationArea | str], optional + Restrict analysis to a subset of areas. Defaults to all areas. + include_profiling : bool, default=True + If True, include matching entries from global PerformanceMonitor. + + Returns + ------- + OptimizationTargetReport + Structured bottleneck report suitable for programmatic consumption. + """ + normalized_areas = None + if areas is not None: + normalized_areas = {OptimizationArea(a) for a in areas} + + targets = [ + target for target in _TARGETS + if normalized_areas is None or target.area in normalized_areas + ] + + profiling = _collect_profiled_functions(targets) if include_profiling else None + if profiling == {}: + profiling = None + + return OptimizationTargetReport( + targets=targets, + network_stats=_collect_network_stats(network), + profiling=profiling, + ) diff --git a/tests/test_diagnostics_optimization_targets.py b/tests/test_diagnostics_optimization_targets.py new file mode 100644 index 000000000..ce4940745 --- /dev/null +++ b/tests/test_diagnostics_optimization_targets.py @@ -0,0 +1,71 @@ +"""Tests for optimization target diagnostics.""" + +from py3plex.diagnostics import ( + OptimizationArea, + find_optimization_targets, +) +from py3plex.profiling import get_monitor + + +class _TinyNetwork: + """Minimal network stub for diagnostics context.""" + + def get_nodes(self): + return [("a", "L0"), ("b", "L0"), ("a", "L1")] + + def get_edges(self): + return [ + ("a", "b", "L0", "L0"), + ("a", "a", "L0", "L1"), + ] + + def get_layers(self): + return ["L0", "L1"] + + +def test_find_optimization_targets_returns_expected_areas(): + report = find_optimization_targets() + assert report.targets + areas = {target.area for target in report.targets} + assert OptimizationArea.CENTRALITY in areas + assert OptimizationArea.NULL_MODELS in areas + assert OptimizationArea.UNCERTAINTY in areas + + +def test_find_optimization_targets_area_filter(): + report = find_optimization_targets(areas=[OptimizationArea.CENTRALITY]) + assert report.targets + assert all(target.area == OptimizationArea.CENTRALITY for target in report.targets) + + +def test_find_optimization_targets_collects_network_stats(): + report = find_optimization_targets(network=_TinyNetwork()) + assert report.network_stats["nodes"] == 3 + assert report.network_stats["edges"] == 2 + assert report.network_stats["layers"] == 2 + + +def test_find_optimization_targets_includes_profile_data_for_tracked_function(): + monitor = get_monitor() + monitor.clear() + monitor.record( + "py3plex.nullmodels.executor.generate_null_model", + elapsed=0.5, + memory_delta=0.0, + ) + + report = find_optimization_targets(include_profiling=True) + assert report.profiling is not None + assert "py3plex.nullmodels.executor.generate_null_model" in report.profiling + row = report.profiling["py3plex.nullmodels.executor.generate_null_model"] + assert row["call_count"] == 1.0 + assert row["total_time_s"] == 0.5 + assert row["avg_time_ms"] == 500.0 + + +def test_optimization_report_to_dict_is_serializable_shape(): + report = find_optimization_targets(areas=["null_models"]) + payload = report.to_dict() + assert payload["count"] == len(payload["targets"]) + assert payload["targets"] + assert payload["targets"][0]["area"] == "null_models" From c44ee360fc3606fff07b0e0d468d2b5081b25ef6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 28 Mar 2026 07:38:02 +0000 Subject: [PATCH 3/3] fix: address review feedback for optimization diagnostics typing and profiling checks Agent-Logs-Url: https://github.com/SkBlaz/py3plex/sessions/58d30b19-4fb9-4966-946a-f6c620c7e049 Co-authored-by: SkBlaz <10035780+SkBlaz@users.noreply.github.com> --- py3plex/diagnostics/optimization_targets.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/py3plex/diagnostics/optimization_targets.py b/py3plex/diagnostics/optimization_targets.py index e71361320..290de3daa 100644 --- a/py3plex/diagnostics/optimization_targets.py +++ b/py3plex/diagnostics/optimization_targets.py @@ -11,7 +11,7 @@ from dataclasses import asdict, dataclass from enum import Enum -from typing import Any, Dict, List, Optional, Sequence +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union from py3plex.profiling import get_monitor @@ -31,7 +31,7 @@ class OptimizationTarget: area: OptimizationArea function: str file_path: str - line_range: tuple[int, int] + line_range: Tuple[int, int] bottleneck: str complexity: str optimization_target: str @@ -231,7 +231,8 @@ def _collect_profiled_functions(targets: Sequence[OptimizationTarget]) -> Dict[s target_functions = {t.function for t in targets} for func_name, stats in monitor.stats.items(): if func_name in target_functions: - call_count = float(stats.get("call_count", 0.0)) + raw_call_count = stats.get("call_count", 0.0) + call_count = max(float(raw_call_count), 0.0) total_time = float(stats.get("total_time", 0.0)) avg_ms = (total_time / call_count * 1000.0) if call_count > 0 else 0.0 result[func_name] = { @@ -245,7 +246,7 @@ def _collect_profiled_functions(targets: Sequence[OptimizationTarget]) -> Dict[s def find_optimization_targets( network: Any = None, - areas: Optional[Sequence[OptimizationArea | str]] = None, + areas: Optional[Sequence[Union[OptimizationArea, str]]] = None, include_profiling: bool = True, ) -> OptimizationTargetReport: """Find likely performance bottlenecks and concrete optimization targets. @@ -274,7 +275,7 @@ def find_optimization_targets( ] profiling = _collect_profiled_functions(targets) if include_profiling else None - if profiling == {}: + if not profiling: profiling = None return OptimizationTargetReport(