Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions py3plex/diagnostics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
)
from .codes import ERROR_CODES, ErrorCode
from .utils import fuzzy_match, did_you_mean
from .optimization_targets import (
OptimizationArea,
OptimizationTarget,
OptimizationTargetReport,
find_optimization_targets,
)

__all__ = [
"Diagnostic",
Expand All @@ -31,4 +37,8 @@
"ErrorCode",
"fuzzy_match",
"did_you_mean",
"OptimizationArea",
"OptimizationTarget",
"OptimizationTargetReport",
"find_optimization_targets",
]
285 changes: 285 additions & 0 deletions py3plex/diagnostics/optimization_targets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
"""Performance bottleneck diagnostics for optimization targeting.

This module provides structured, programmatic recommendations for likely
performance bottlenecks in key workflows:
- centrality robustness computations
- null model generation
- uncertainty quantification
"""

from __future__ import annotations

from dataclasses import asdict, dataclass
from enum import Enum
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union

from py3plex.profiling import get_monitor


class OptimizationArea(str, Enum):
"""Supported optimization analysis areas."""

CENTRALITY = "centrality"
NULL_MODELS = "null_models"
UNCERTAINTY = "uncertainty"


@dataclass(frozen=True)
class OptimizationTarget:
"""Single bottleneck and optimization recommendation."""

area: OptimizationArea
function: str
file_path: str
line_range: Tuple[int, int]
bottleneck: str
complexity: str
optimization_target: str
expected_gain: str
impact: str

def to_dict(self) -> Dict[str, Any]:
"""Return JSON-serializable representation."""
data = asdict(self)
data["area"] = self.area.value
return data


@dataclass
class OptimizationTargetReport:
"""Collection of optimization targets and optional runtime context."""

targets: List[OptimizationTarget]
network_stats: Dict[str, int]
profiling: Optional[Dict[str, Dict[str, float]]] = None

def to_dict(self) -> Dict[str, Any]:
"""Return JSON-serializable representation."""
return {
"targets": [target.to_dict() for target in self.targets],
"network_stats": self.network_stats,
"profiling": self.profiling,
"count": len(self.targets),
}


_TARGETS: List[OptimizationTarget] = [
# Centrality
OptimizationTarget(
area=OptimizationArea.CENTRALITY,
function="py3plex.centrality.robustness.robustness_centrality",
file_path="py3plex/centrality/robustness.py",
line_range=(164, 194),
bottleneck=(
"For each candidate node/layer, the workflow builds a fully perturbed "
"network and recomputes the metric from scratch."
),
complexity="O(targets × metric_cost + targets × graph_copy_cost)",
optimization_target=(
"Avoid full network reconstruction per target by using in-place masking "
"or cached filtered views when evaluating removals."
),
expected_gain="High for large networks (often multi-x).",
impact="high",
),
OptimizationTarget(
area=OptimizationArea.CENTRALITY,
function="py3plex.centrality.robustness._compute_avg_shortest_path",
file_path="py3plex/centrality/robustness.py",
line_range=(306, 334),
bottleneck=(
"Average shortest path is recomputed component-by-component with "
"nested all-pairs traversal."
),
complexity="O(sum over components of n_component²)",
optimization_target=(
"Use approximation/sampling for path lengths on large components and "
"cache component decomposition across repeated evaluations."
),
expected_gain="High on large connected components.",
impact="high",
),
# Null models
OptimizationTarget(
area=OptimizationArea.NULL_MODELS,
function="py3plex.nullmodels.executor.generate_null_model",
file_path="py3plex/nullmodels/executor.py",
line_range=(71, 84),
bottleneck=(
"Parallel samples pass the full network object to each worker, "
"creating serialization overhead."
),
complexity="O(samples × network_serialization_cost)",
optimization_target=(
"Pass compact immutable graph payloads (e.g., edge arrays/seed only) "
"to workers and reconstruct minimal state inside worker."
),
expected_gain="Medium to high for multiprocessing and large graphs.",
impact="high",
),
OptimizationTarget(
area=OptimizationArea.NULL_MODELS,
function="py3plex.nullmodels.models.configuration_model",
file_path="py3plex/nullmodels/models.py",
line_range=(174, 188),
bottleneck=(
"Nodes and edges are added one-by-one in Python loops when rebuilding "
"randomized networks."
),
complexity="O(nodes + edges) with high Python overhead",
optimization_target=(
"Batch edge/node insertion (single add_edges/add_nodes call) and avoid "
"per-edge dict creation in inner loops."
),
expected_gain="Medium.",
impact="medium",
),
# Uncertainty
OptimizationTarget(
area=OptimizationArea.UNCERTAINTY,
function="py3plex.uncertainty.partition.CommunityDistribution._compute_coassoc_dense",
file_path="py3plex/uncertainty/partition.py",
line_range=(280, 288),
bottleneck=(
"Exact co-association matrix uses triple-nested Python loops across "
"partitions and node pairs."
),
complexity="O(partitions × nodes²)",
optimization_target=(
"Vectorize co-association accumulation using numpy/grouped indexing and "
"prefer sparse incremental construction for top-k workflows."
),
expected_gain="High.",
impact="high",
),
OptimizationTarget(
area=OptimizationArea.UNCERTAINTY,
function="py3plex.uncertainty.partition_metrics.variation_of_information",
file_path="py3plex/uncertainty/partition_metrics.py",
line_range=(104, 107),
bottleneck=(
"Mutual-information term iterates contingency table with nested Python loops."
),
complexity="O(n_labels_partition1 × n_labels_partition2)",
optimization_target=(
"Replace nested loops with masked vectorized numpy operations over "
"non-zero contingency entries."
),
expected_gain="Medium.",
impact="medium",
),
OptimizationTarget(
area=OptimizationArea.UNCERTAINTY,
function="py3plex.uncertainty.selection_execution.execute_selection_uq",
file_path="py3plex/uncertainty/selection_execution.py",
line_range=(150, 204),
bottleneck=(
"Sample execution loop is serial despite independent replicate runs."
),
complexity="O(samples × query_runtime)",
optimization_target=(
"Parallelize replicate execution with deterministic child seeds and "
"aggregate reducer state after worker completion."
),
expected_gain="Medium to high depending on sample count.",
impact="high",
),
]


def _collect_network_stats(network: Any) -> Dict[str, int]:
"""Collect lightweight network statistics for report context."""
stats = {"nodes": 0, "edges": 0, "layers": 0}

if network is None:
return stats

try:
if hasattr(network, "get_nodes"):
stats["nodes"] = len(network.get_nodes())
elif hasattr(network, "nodes"):
stats["nodes"] = len(network.nodes())
except Exception:
pass

try:
if hasattr(network, "get_edges"):
stats["edges"] = len(network.get_edges())
elif hasattr(network, "edges"):
stats["edges"] = len(network.edges())
except Exception:
pass

try:
if hasattr(network, "get_layers"):
stats["layers"] = len(network.get_layers())
elif hasattr(network, "layers"):
stats["layers"] = len(network.layers)
except Exception:
pass

return stats


def _collect_profiled_functions(targets: Sequence[OptimizationTarget]) -> Dict[str, Dict[str, float]]:
"""Pull matching profile records from the global performance monitor."""
monitor = get_monitor()
result: Dict[str, Dict[str, float]] = {}
if not monitor.enabled:
return result

target_functions = {t.function for t in targets}
for func_name, stats in monitor.stats.items():
if func_name in target_functions:
raw_call_count = stats.get("call_count", 0.0)
call_count = max(float(raw_call_count), 0.0)
total_time = float(stats.get("total_time", 0.0))
avg_ms = (total_time / call_count * 1000.0) if call_count > 0 else 0.0
result[func_name] = {
"call_count": call_count,
"total_time_s": total_time,
"avg_time_ms": avg_ms,
"max_time_s": float(stats.get("max_time", 0.0)),
}
return result


def find_optimization_targets(
network: Any = None,
areas: Optional[Sequence[Union[OptimizationArea, str]]] = None,
include_profiling: bool = True,
) -> OptimizationTargetReport:
"""Find likely performance bottlenecks and concrete optimization targets.

Parameters
----------
network : Any, optional
Network object used for contextual report statistics.
areas : Sequence[OptimizationArea | str], optional
Restrict analysis to a subset of areas. Defaults to all areas.
include_profiling : bool, default=True
If True, include matching entries from global PerformanceMonitor.

Returns
-------
OptimizationTargetReport
Structured bottleneck report suitable for programmatic consumption.
"""
normalized_areas = None
if areas is not None:
normalized_areas = {OptimizationArea(a) for a in areas}

targets = [
target for target in _TARGETS
if normalized_areas is None or target.area in normalized_areas
]

profiling = _collect_profiled_functions(targets) if include_profiling else None
if not profiling:
profiling = None

return OptimizationTargetReport(
targets=targets,
network_stats=_collect_network_stats(network),
profiling=profiling,
)
71 changes: 71 additions & 0 deletions tests/test_diagnostics_optimization_targets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""Tests for optimization target diagnostics."""

from py3plex.diagnostics import (
OptimizationArea,
find_optimization_targets,
)
from py3plex.profiling import get_monitor


class _TinyNetwork:
"""Minimal network stub for diagnostics context."""

def get_nodes(self):
return [("a", "L0"), ("b", "L0"), ("a", "L1")]

def get_edges(self):
return [
("a", "b", "L0", "L0"),
("a", "a", "L0", "L1"),
]

def get_layers(self):
return ["L0", "L1"]


def test_find_optimization_targets_returns_expected_areas():
report = find_optimization_targets()
assert report.targets
areas = {target.area for target in report.targets}
assert OptimizationArea.CENTRALITY in areas
assert OptimizationArea.NULL_MODELS in areas
assert OptimizationArea.UNCERTAINTY in areas


def test_find_optimization_targets_area_filter():
report = find_optimization_targets(areas=[OptimizationArea.CENTRALITY])
assert report.targets
assert all(target.area == OptimizationArea.CENTRALITY for target in report.targets)


def test_find_optimization_targets_collects_network_stats():
report = find_optimization_targets(network=_TinyNetwork())
assert report.network_stats["nodes"] == 3
assert report.network_stats["edges"] == 2
assert report.network_stats["layers"] == 2


def test_find_optimization_targets_includes_profile_data_for_tracked_function():
monitor = get_monitor()
monitor.clear()
monitor.record(
"py3plex.nullmodels.executor.generate_null_model",
elapsed=0.5,
memory_delta=0.0,
)

report = find_optimization_targets(include_profiling=True)
assert report.profiling is not None
assert "py3plex.nullmodels.executor.generate_null_model" in report.profiling
row = report.profiling["py3plex.nullmodels.executor.generate_null_model"]
assert row["call_count"] == 1.0
assert row["total_time_s"] == 0.5
assert row["avg_time_ms"] == 500.0


def test_optimization_report_to_dict_is_serializable_shape():
report = find_optimization_targets(areas=["null_models"])
payload = report.to_dict()
assert payload["count"] == len(payload["targets"])
assert payload["targets"]
assert payload["targets"][0]["area"] == "null_models"
Loading