From 09f48c2eea2676dbb5c459010b64b6458c7ab8ec Mon Sep 17 00:00:00 2001 From: Alice-and-Bob Date: Fri, 7 Nov 2025 22:29:55 -0600 Subject: [PATCH 1/2] fix influence spread bug in greedy search --- CHANGELOG.md | 21 +++++++++++++++++++++ docs/api_reference.rst | 12 ++++++------ docs/index.rst | 8 ++++---- docs/quickstart.rst | 27 ++++++++++++--------------- graphem/influence.py | 22 ++++++++++------------ 5 files changed, 53 insertions(+), 37 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..aee6a5a --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,21 @@ +# Changelog + +All notable changes to GraphEm will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Fixed +- **Critical bug fix in influence maximization** (`graphem/influence.py`): Fixed `ndlib_estimated_influence()` function to correctly initialize seed nodes using NDlib's proper API. Previously, the function was using an incorrect configuration method that resulted in seeds not being properly set, leading to inaccurate influence estimations. The fix ensures: + - Seeds are now correctly initialized using `config.add_model_initial_configuration("Infected", seeds)` instead of the incorrect `config.add_node_configuration("status", seed, 1)` + - Influenced node counts are now correctly retrieved from `iterations[-1]['node_count'].get(2, 0)` instead of manually iterating through status values + - All influence maximization benchmarks and comparisons now produce accurate results + - High-degree seeds now correctly show higher influence than low-degree seeds + +This bug affected all influence maximization functionality including `graphem_seed_selection()`, `greedy_seed_selection()`, and benchmark comparisons. Users should re-run any influence maximization experiments performed with previous versions. + +## [Previous Releases] + +For release history before this changelog was established, see the [GitHub Releases](https://github.com/igorrivin/graphem/releases) page. diff --git a/docs/api_reference.rst b/docs/api_reference.rst index 3b08e6f..636da99 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -63,14 +63,14 @@ Key functions: Influence Maximization ---------------------- -**graphem.influence** - Seed selection algorithms for influence maximization in networks. - -Implements GraphEm-based seed selection using radial distances from embedding origin, plus traditional greedy methods with NDlib simulation. +**graphem.influence** - Seed selection and influence estimation for networks. Key functions: -- ``graphem_seed_selection(embedder, k)`` - Select seeds based on radial distances -- ``greedy_seed_selection(G, k, p)`` - Traditional greedy algorithm -- ``ndlib_estimated_influence(G, seeds, p)`` - Evaluate influence using Independent Cascades +- ``graphem_seed_selection(embedder, k, num_iterations)`` - Fast seed selection using embedding radial distances +- ``greedy_seed_selection(G, k, p, iterations_count)`` - Greedy algorithm maximizing marginal influence gain +- ``ndlib_estimated_influence(G, seeds, p, iterations_count)`` - Estimate influence spread via Independent Cascades simulation + +Returns seed node lists and influence spread estimates (node counts). .. automodule:: graphem.influence :members: diff --git a/docs/index.rst b/docs/index.rst index 65b9711..5e5c970 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -136,11 +136,11 @@ High performance index for efficient k-nearest neighbor search in high-dimension Influence Maximization ~~~~~~~~~~~~~~~~~~~~~~ -Advanced algorithms for identifying influential nodes in networks: +Algorithms for identifying influential nodes: -* **GraphEm Method**: Uses embedding radial distances to select diverse, influential seeds -* **Greedy Baseline**: Traditional greedy algorithm for comparison -* **Spread Simulation**: NDlib integration for accurate influence estimation +* **GraphEm Method**: Fast selection using embedding radial distances +* **Greedy Algorithm**: Iterative marginal gain maximization +* **Influence Estimation**: Independent Cascades simulation via NDlib Graph Generators ~~~~~~~~~~~~~~~~ diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 8447393..699648a 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -129,29 +129,26 @@ Load and analyze real-world networks: Influence Maximization ----------------------- -Find the most influential nodes in a network: +Identify influential nodes: .. code-block:: python import networkx as nx - - # Convert to NetworkX for influence analysis + G = nx.Graph() G.add_nodes_from(range(n_vertices)) G.add_edges_from(edges) - - # Method 1: GraphEm-based selection (uses embedding) + + # Fast: embedding-based selection seeds_graphem = ge.graphem_seed_selection(embedder, k=10, num_iterations=20) - - # Method 2: Greedy selection (traditional approach) - seeds_greedy = ge.greedy_seed_selection(G, k=10) - - # Estimate influence spread - influence, iterations = ge.ndlib_estimated_influence( - G, seeds_graphem, p=0.1, iterations_count=200 - ) - - print(f"GraphEm method: {influence} nodes influenced ({influence/n_vertices:.2%})") + + # Accurate: greedy algorithm + seeds_greedy, total_iters = ge.greedy_seed_selection(G, k=10, p=0.1, iterations_count=100) + + # Evaluate influence spread (Independent Cascades model) + influence, iters = ge.ndlib_estimated_influence(G, seeds_graphem, p=0.1, iterations_count=200) + + print(f"Influenced: {influence}/{n_vertices} nodes ({influence/n_vertices:.1%})") Benchmarking and Analysis ------------------------- diff --git a/graphem/influence.py b/graphem/influence.py index ec0bbfe..b0a7645 100644 --- a/graphem/influence.py +++ b/graphem/influence.py @@ -62,25 +62,23 @@ def ndlib_estimated_influence(G, seeds, p=0.1, iterations_count=200): # Configure the Independent Cascades model model = ep.IndependentCascadesModel(G) config = mc.Configuration() - + # Set edge propagation probabilities for e in G.edges(): config.add_edge_configuration("threshold", e, p) - + + # Set initial infected nodes using the proper API + config.add_model_initial_configuration("Infected", seeds) + # Initialize the model with configuration model.set_initial_status(config) - - # Set initial seeds to infected state - for seed in seeds: - config.add_node_configuration("status", seed, 1) - + # Run the simulation iterations = model.iteration_bunch(iterations_count) - - # Get the number of nodes in state 2 (influenced) at the end - final_status = iterations[-1]['status'] - influenced_count = sum(1 for node_state in final_status.values() if node_state == 2) - + + # Get the number of nodes in state 2 (influenced) from node_count + influenced_count = iterations[-1]['node_count'].get(2, 0) + return influenced_count, len(iterations) From 36d245fe3bfb78e1bcd847f76cf0427eca9a02a8 Mon Sep 17 00:00:00 2001 From: Alice-and-Bob Date: Sat, 15 Nov 2025 23:53:59 +0100 Subject: [PATCH 2/2] Release 0.2.0: Major API update with sparse adjacency matrices --- CHANGELOG.md | 55 ++++- README.md | 11 +- docs/api_reference.rst | 15 +- docs/index.rst | 39 ++-- docs/installation.rst | 10 +- docs/quickstart.rst | 101 +++++---- docs/tutorials.rst | 211 +++++++++-------- examples/graph_generator_example.py | 75 +++---- examples/random_regular_example.py | 54 +++-- examples/real_world_datasets_example.py | 126 +++++------ graphem/__init__.py | 7 +- graphem/benchmark.py | 50 ++--- graphem/embedder.py | 96 +++++--- graphem/generators.py | 287 +++++++++++++----------- graphem/influence.py | 4 +- tests/test_embedder.py | 160 +++++++++---- tests/test_generators.py | 269 +++++++++++++--------- tests/test_influence.py | 3 +- 18 files changed, 896 insertions(+), 677 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aee6a5a..c2c524e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,28 @@ All notable changes to GraphEm will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.2.0] - 2025-11-15 + +### Added +- **New graph generators** (`graphem/generators.py`): + - `generate_delaunay_triangulation()`: Generate planar graphs with triangular faces based on Delaunay triangulation + - `generate_complete_bipartite_graph()`: Generate complete bipartite graphs + +### Changed +- **BREAKING: API aligned with CUDA version** - Major API changes for consistency: + - **All generators now return sparse adjacency matrices** (scipy.sparse.csr_matrix) instead of edge lists + - Renamed `erdos_renyi_graph()` → `generate_er()` for consistency with other generators + - `GraphEmbedder` now accepts `adjacency` (sparse matrix) instead of `edges` + `n_vertices` + - `GraphEmbedder` parameter renamed: `my_logger` → `logger_instance` + - `generate_bipartite_graph()` now accepts `p` and `seed` parameters for better control + - `compute_vertex_degrees()` now accepts adjacency matrix instead of edge list + +- **GraphEmbedder improvements**: + - Added `seed` parameter for reproducibility + - Added `get_positions()` method that returns numpy array + - Made `positions` a property (internally uses `_positions`) + - Automatically infers number of vertices from adjacency matrix shape + - Improved adjacency matrix validation ### Fixed - **Critical bug fix in influence maximization** (`graphem/influence.py`): Fixed `ndlib_estimated_influence()` function to correctly initialize seed nodes using NDlib's proper API. Previously, the function was using an incorrect configuration method that resulted in seeds not being properly set, leading to inaccurate influence estimations. The fix ensures: @@ -16,6 +37,38 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 This bug affected all influence maximization functionality including `graphem_seed_selection()`, `greedy_seed_selection()`, and benchmark comparisons. Users should re-run any influence maximization experiments performed with previous versions. +### Migration Guide +To upgrade from 0.1.x to 0.2.0: + +**Generators:** +```python +# OLD (0.1.x) +edges = ge.erdos_renyi_graph(n=100, p=0.1) # Returns edge list +n = 100 + +# NEW (0.2.0) +adj = ge.generate_er(n=100, p=0.1) # Returns sparse adjacency matrix +n = adj.shape[0] # Infer from matrix +``` + +**GraphEmbedder:** +```python +# OLD (0.1.x) +embedder = ge.GraphEmbedder(edges=edges, n_vertices=n, n_components=2, my_logger=logger) + +# NEW (0.2.0) +embedder = ge.GraphEmbedder(adjacency=adj, n_components=2, logger_instance=logger) +``` + +**compute_vertex_degrees:** +```python +# OLD (0.1.x) +degrees = ge.compute_vertex_degrees(n, edges) + +# NEW (0.2.0) +degrees = ge.compute_vertex_degrees(adj) +``` + ## [Previous Releases] For release history before this changelog was established, see the [GitHub Releases](https://github.com/igorrivin/graphem/releases) page. diff --git a/README.md b/README.md index b512373..a36a23d 100644 --- a/README.md +++ b/README.md @@ -65,13 +65,12 @@ pip install git+https://github.com/igorrivin/graphem.git ```python import graphem as ge -# Generate graph -edges = ge.erdos_renyi_graph(n=500, p=0.01) +# Generate graph (returns sparse adjacency matrix) +adjacency = ge.generate_er(n=500, p=0.01) # Create embedder embedder = ge.GraphEmbedder( - edges=edges, - n_vertices=500, + adjacency=adjacency, n_components=3 ) @@ -90,7 +89,7 @@ seeds = ge.graphem_seed_selection(embedder, k=10) # Estimate influence spread import networkx as nx -G = nx.from_edgelist(edges) +G = nx.from_scipy_sparse_array(adjacency) influence, _ = ge.ndlib_estimated_influence(G, seeds, p=0.1) print(f"Influence: {influence} nodes ({influence/500:.1%})") ``` @@ -102,7 +101,7 @@ from graphem.benchmark import benchmark_correlations # Compare embedding radii with centrality measures results = benchmark_correlations( - ge.erdos_renyi_graph, + ge.generate_er, graph_params={'n': 200, 'p': 0.05}, n_components=3, num_iterations=40 diff --git a/docs/api_reference.rst b/docs/api_reference.rst index 636da99..368a040 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -45,15 +45,16 @@ Graph Generators **graphem.generators** - Generate various graph types for testing and experimentation. -Provides NetworkX-based generators for standard graph models including random graphs, scale-free networks, small-world graphs, and more. +Provides NetworkX-based generators for standard graph models including random graphs, scale-free networks, small-world graphs, and more. All generators return sparse adjacency matrices (scipy.sparse.csr_matrix). Key functions: -- ``erdos_renyi_graph(n, p)`` - Random graph with edge probability p -- ``generate_sbm(n_per_block, num_blocks, p_in, p_out)`` - Stochastic block model -- ``generate_ba(n, m)`` - Barabási-Albert preferential attachment -- ``generate_ws(n, k, p)`` - Watts-Strogatz small-world -- ``generate_scale_free(n, ...)`` - Scale-free network -- ``generate_geometric(n, radius)`` - Random geometric graph +- ``generate_er(n, p, seed)`` - Erdős–Rényi random graph with edge probability p +- ``generate_sbm(n_per_block, num_blocks, p_in, p_out, seed)`` - Stochastic block model +- ``generate_ba(n, m, seed)`` - Barabási-Albert preferential attachment +- ``generate_ws(n, k, p, seed)`` - Watts-Strogatz small-world +- ``generate_scale_free(n, seed)`` - Scale-free network +- ``generate_geometric(n, radius, seed)`` - Random geometric graph +- ``compute_vertex_degrees(adjacency)`` - Compute vertex degrees from adjacency matrix .. automodule:: graphem.generators :members: diff --git a/docs/index.rst b/docs/index.rst index 5e5c970..64a6296 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -79,25 +79,24 @@ Quick Start Example import graphem as ge import networkx as nx - # Generate a scale-free network - edges = ge.generate_ba(n=1000, m=3) - + # Generate a scale-free network (returns sparse adjacency matrix) + adjacency = ge.generate_ba(n=1000, m=3, seed=42) + # Create and run embedding embedder = ge.GraphEmbedder( - edges=edges, - n_vertices=1000, + adjacency=adjacency, n_components=3 ) embedder.run_layout(num_iterations=50) - + # Find influential nodes seeds = ge.graphem_seed_selection(embedder, k=20) - + # Estimate influence spread - G = nx.Graph(edges) - influence, _ = ge.ndlib_estimated_influence(G, seeds, p=0.1) + G = nx.from_scipy_sparse_array(adjacency) + influence, _ = ge.ndlib_estimated_influence(G, seeds, p=0.1, iterations_count=100) print(f"Influence spread: {influence} nodes ({influence/1000:.1%})") - + # Visualize results embedder.display_layout() @@ -149,18 +148,18 @@ Comprehensive collection of standard and custom graph models: .. code-block:: python - # Classic models - edges = ge.erdos_renyi_graph(n=500, p=0.02) - edges = ge.generate_ba(n=500, m=3) # Scale-free - edges = ge.generate_ws(n=500, k=6, p=0.1) # Small-world - + # Classic models (all return sparse adjacency matrices) + adjacency = ge.generate_er(n=500, p=0.02, seed=42) + adjacency = ge.generate_ba(n=500, m=3, seed=42) # Scale-free + adjacency = ge.generate_ws(n=500, k=6, p=0.1, seed=42) # Small-world + # Community structures - edges = ge.generate_sbm(sizes=[100, 150, 100], p_in=0.1, p_out=0.01) - edges = ge.generate_caveman(clique_size=10, num_cliques=5) - + adjacency = ge.generate_sbm(n_per_block=100, num_blocks=3, p_in=0.1, p_out=0.01, seed=42) + adjacency = ge.generate_caveman(l=5, k=10) + # Specialized networks - edges = ge.generate_geometric(n=300, radius=0.2) - edges = ge.generate_road_network(grid_size=20, connection_prob=0.8) + adjacency = ge.generate_geometric(n=300, radius=0.2, seed=42) + adjacency = ge.generate_road_network(width=20, height=20) Performance Characteristics --------------------------- diff --git a/docs/installation.rst b/docs/installation.rst index ed22ec9..3cec37b 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -79,12 +79,12 @@ Test your installation: .. code-block:: python import graphem as ge - - # Generate a small test graph - edges = ge.erdos_renyi_graph(n=100, p=0.1) - embedder = ge.GraphEmbedder(edges, n_vertices=100) + + # Generate a small test graph (returns sparse adjacency matrix) + adjacency = ge.generate_er(n=100, p=0.1, seed=42) + embedder = ge.GraphEmbedder(adjacency=adjacency, n_components=2) embedder.run_layout(num_iterations=10) - + print("GraphEm installation successful!") Troubleshooting diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 699648a..eb2eba0 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -24,29 +24,29 @@ Let's start with a simple example of embedding a random graph: import graphem as ge import numpy as np - # Generate a random graph - edges = ge.erdos_renyi_graph(n=200, p=0.05) - + # Generate a random graph (returns sparse adjacency matrix) + adjacency = ge.generate_er(n=200, p=0.05, seed=42) + # Create an embedder embedder = ge.GraphEmbedder( - edges=edges, - n_vertices=200, + adjacency=adjacency, n_components=3, # 3D embedding L_min=10.0, # Minimum edge length k_attr=0.5, # Attraction force k_inter=0.1, # Repulsion force n_neighbors=15 # Nearest neighbors ) - + # Compute the embedding embedder.run_layout(num_iterations=40) - + # Visualize the result embedder.display_layout(edge_width=0.5, node_size=5) Understanding the Parameters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* **adjacency**: Sparse adjacency matrix (scipy.sparse format) * **n_components**: Embedding space dimension (2D or 3D) * **L_min**: Controls minimum distance between connected nodes * **k_attr**: Strength of attractive forces between connected nodes @@ -56,51 +56,53 @@ Understanding the Parameters Graph Generation ---------------- -GraphEm provides various graph generators: +GraphEm provides various graph generators that return sparse adjacency matrices: .. code-block:: python # Scale-free network (Barabási–Albert) - edges = ge.generate_ba(n=500, m=3) - + adjacency = ge.generate_ba(n=500, m=3, seed=42) + # Small-world network (Watts–Strogatz) - edges = ge.generate_ws(n=500, k=6, p=0.1) - + adjacency = ge.generate_ws(n=500, k=6, p=0.1, seed=42) + # Stochastic block model - edges = ge.generate_sbm(n_per_block=100, num_blocks=3, p_in=0.1, p_out=0.01) - + adjacency = ge.generate_sbm(n_per_block=100, num_blocks=3, p_in=0.1, p_out=0.01, seed=42) + # Random regular graph - edges = ge.generate_random_regular(n=300, d=4) + adjacency = ge.generate_random_regular(n=300, d=4, seed=42) Complete Graph Generator Reference ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -GraphEm provides 12+ graph generators for different network types: +GraphEm provides 14+ graph generators for different network types: .. code-block:: python # Random graphs - edges = ge.erdos_renyi_graph(n=500, p=0.02) # Random graph - edges = ge.generate_random_regular(n=300, d=4) # Regular degree - edges = ge.generate_geometric(n=200, radius=0.2) # Geometric graph + adjacency = ge.generate_er(n=500, p=0.02, seed=42) # Erdős-Rényi random graph + adjacency = ge.generate_random_regular(n=300, d=4, seed=42) # Regular degree + adjacency = ge.generate_geometric(n=200, radius=0.2, seed=42) # Geometric graph # Scale-free and complex networks - edges = ge.generate_ba(n=500, m=3) # Barabási-Albert - edges = ge.generate_scale_free(n=400, alpha=0.41, beta=0.54) # Scale-free - edges = ge.generate_power_cluster(n=500, m=3, p=0.5) # Powerlaw cluster + adjacency = ge.generate_ba(n=500, m=3, seed=42) # Barabási-Albert + adjacency = ge.generate_scale_free(n=400, seed=42) # Scale-free + adjacency = ge.generate_power_cluster(n=500, m=3, p=0.5, seed=42) # Powerlaw cluster # Small-world networks - edges = ge.generate_ws(n=500, k=6, p=0.1) # Watts-Strogatz + adjacency = ge.generate_ws(n=500, k=6, p=0.1, seed=42) # Watts-Strogatz # Community structures - edges = ge.generate_sbm(n_per_block=100, num_blocks=3, p_in=0.1, p_out=0.01) - edges = ge.generate_caveman(l=10, k=10) # Connected caveman - edges = ge.generate_relaxed_caveman(l=10, k=10, p=0.1) # Relaxed caveman + adjacency = ge.generate_sbm(n_per_block=100, num_blocks=3, p_in=0.1, p_out=0.01, seed=42) + adjacency = ge.generate_caveman(l=10, k=10) # Connected caveman + adjacency = ge.generate_relaxed_caveman(l=10, k=10, p=0.1, seed=42) # Relaxed caveman # Specialized networks - edges = ge.generate_bipartite_graph(n_top=100, n_bottom=150) # Bipartite - edges = ge.generate_balanced_tree(r=3, h=8) # Balanced tree - edges = ge.generate_road_network(width=20, height=20) # Grid-like road network + adjacency = ge.generate_bipartite_graph(n_top=100, n_bottom=150, p=0.1, seed=42) # Random bipartite + adjacency = ge.generate_complete_bipartite_graph(n_top=50, n_bottom=100) # Complete bipartite + adjacency = ge.generate_delaunay_triangulation(n=100, seed=42) # Planar triangulation + adjacency = ge.generate_balanced_tree(r=3, h=8) # Balanced tree + adjacency = ge.generate_road_network(width=20, height=20) # Grid-like road network Working with Real Data ---------------------- @@ -109,20 +111,27 @@ Load and analyze real-world networks: .. code-block:: python - # Load a dataset (includes several network datasets) + # Load a dataset (returns edge list) vertices, edges = ge.load_dataset('snap-ca-GrQc') # Collaboration network - n_vertices = len(vertices) - + + # Convert edges to sparse adjacency matrix + import scipy.sparse as sp + n = len(vertices) + rows = edges[:, 0] + cols = edges[:, 1] + data = np.ones(len(edges), dtype=int) + adjacency = sp.csr_matrix((data, (rows, cols)), shape=(n, n)) + adjacency = adjacency + adjacency.T # Make symmetric + # Create embedder for larger networks embedder = ge.GraphEmbedder( - edges=edges, - n_vertices=n_vertices, + adjacency=adjacency, n_components=2, n_neighbors=20, # More neighbors for denser graphs sample_size=512, # Larger sample for accuracy batch_size=2048 # Larger batches for efficiency ) - + embedder.run_layout(num_iterations=100) embedder.display_layout() @@ -135,9 +144,8 @@ Identify influential nodes: import networkx as nx - G = nx.Graph() - G.add_nodes_from(range(n_vertices)) - G.add_edges_from(edges) + # Convert adjacency matrix to NetworkX graph + G = nx.from_scipy_sparse_array(adjacency) # Fast: embedding-based selection seeds_graphem = ge.graphem_seed_selection(embedder, k=10, num_iterations=20) @@ -148,6 +156,7 @@ Identify influential nodes: # Evaluate influence spread (Independent Cascades model) influence, iters = ge.ndlib_estimated_influence(G, seeds_graphem, p=0.1, iterations_count=200) + n_vertices = adjacency.shape[0] print(f"Influenced: {influence}/{n_vertices} nodes ({influence/n_vertices:.1%})") Benchmarking and Analysis @@ -159,15 +168,15 @@ Compare different centrality measures: from graphem.benchmark import benchmark_correlations from graphem.visualization import report_full_correlation_matrix - + # Run comprehensive benchmark results = benchmark_correlations( graph_generator=ge.generate_ba, - graph_params={'n': 300, 'm': 3}, + graph_params={'n': 300, 'm': 3, 'seed': 42}, n_components=3, num_iterations=50 ) - + # Display correlation matrix correlation_matrix = report_full_correlation_matrix( results['radii'], # Embedding-based centrality @@ -187,8 +196,7 @@ Performance Tips .. code-block:: python embedder = ge.GraphEmbedder( - edges=edges, - n_vertices=n_vertices, + adjacency=adjacency, n_components=2, # 2D is faster than 3D n_neighbors=10, # Fewer neighbors = faster sample_size=256, # Automatically limited to len(edges) @@ -204,7 +212,7 @@ GraphEm automatically uses GPU if JAX detects CUDA: import jax print("Available devices:", jax.devices()) # Check for GPU - + # Force CPU usage if needed with jax.default_device(jax.devices('cpu')[0]): embedder.run_layout(num_iterations=50) @@ -217,8 +225,7 @@ For very large graphs, process in chunks: # For graphs with >100k nodes, consider reducing parameters embedder = ge.GraphEmbedder( - edges=edges, - n_vertices=n_vertices, + adjacency=adjacency, n_neighbors=5, # Minimum viable k sample_size=128, # Automatically limited to len(edges) batch_size=1024 # Automatically limited to n_vertices @@ -230,4 +237,4 @@ Next Steps * Explore the :doc:`tutorials` for detailed examples * Check the :doc:`api_reference` for complete documentation * See :doc:`examples` for real-world use cases -* Read :doc:`contributing` to help improve GraphEm \ No newline at end of file +* Read :doc:`contributing` to help improve GraphEm diff --git a/docs/tutorials.rst b/docs/tutorials.rst index df748f1..c1bb26f 100644 --- a/docs/tutorials.rst +++ b/docs/tutorials.rst @@ -17,14 +17,12 @@ GraphEm combines spectral methods with force-directed layout for high-quality em import numpy as np import matplotlib.pyplot as plt - # Create a graph with known structure - edges = ge.generate_sbm(n_per_block=50, num_blocks=3, p_in=0.15, p_out=0.02) - n_vertices = 150 + # Create a graph with known structure (returns sparse adjacency matrix) + adjacency = ge.generate_sbm(n_per_block=50, num_blocks=3, p_in=0.15, p_out=0.02, seed=42) # Create embedder with detailed parameters embedder = ge.GraphEmbedder( - edges=edges, - n_vertices=n_vertices, + adjacency=adjacency, n_components=2, L_min=5.0, # Shorter edges for tighter layout k_attr=0.8, # Strong attraction within communities @@ -36,7 +34,7 @@ GraphEm combines spectral methods with force-directed layout for high-quality em positions_history = [] for i in range(0, 100, 10): embedder.run_layout(num_iterations=10) - positions_history.append(np.array(embedder.positions)) + positions_history.append(embedder.get_positions()) # Final visualization embedder.display_layout() @@ -64,12 +62,12 @@ Understanding how parameters affect the embedding: for i, params in enumerate(params_to_test, 1): embedder = ge.GraphEmbedder( - edges=edges, n_vertices=n_vertices, n_components=2, + adjacency=adjacency, n_components=2, L_min=params['L_min'], k_attr=params['k_attr'], k_inter=params['k_inter'] ) embedder.run_layout(num_iterations=50) - - pos = np.array(embedder.positions) + + pos = embedder.get_positions() fig.add_trace(go.Scatter(x=pos[:, 0], y=pos[:, 1], mode='markers', name=params['title']), row=1, col=i) @@ -85,14 +83,12 @@ For large networks, optimize performance and memory usage: .. code-block:: python - # Generate a large scale-free network - large_edges = ge.generate_ba(n=10000, m=5) - n_vertices = 10000 + # Generate a large scale-free network (returns sparse adjacency matrix) + adjacency = ge.generate_ba(n=10000, m=5, seed=42) # Optimized embedder for large graphs large_embedder = ge.GraphEmbedder( - edges=large_edges, - n_vertices=n_vertices, + adjacency=adjacency, n_components=2, # 2D is faster than 3D L_min=2.0, k_attr=0.3, @@ -106,13 +102,14 @@ For large networks, optimize performance and memory usage: # Progressive refinement print("Initial layout...") large_embedder.run_layout(num_iterations=20) - + print("Refinement...") large_embedder.k_attr = 0.5 # Increase attraction for refinement large_embedder.run_layout(num_iterations=30) # Sample visualization (full graph would be too dense) - pos = np.array(large_embedder.positions) + pos = large_embedder.get_positions() + n_vertices = adjacency.shape[0] sample_nodes = np.random.choice(n_vertices, 1000, replace=False) import plotly.graph_objects as go @@ -132,35 +129,37 @@ For extremely large networks, use chunked processing: .. code-block:: python - def embed_large_network_chunked(edges, n_vertices, chunk_size=5000): + def embed_large_network_chunked(adjacency, chunk_size=5000): """Embed very large networks in chunks.""" - + + n_vertices = adjacency.shape[0] + if n_vertices <= chunk_size: # Small enough to process normally - embedder = ge.GraphEmbedder(edges=edges, n_vertices=n_vertices) + embedder = ge.GraphEmbedder(adjacency=adjacency) embedder.run_layout(num_iterations=50) - return embedder.positions - + return embedder.get_positions() + # For very large networks, use progressive approach print(f"Processing {n_vertices} nodes in chunks of {chunk_size}") - - # Start with a subgraph - node_subset = np.random.choice(n_vertices, chunk_size, replace=False) - mask = np.isin(edges[:, 0], node_subset) & np.isin(edges[:, 1], node_subset) - subset_edges = edges[mask] - - # Remap node IDs to 0-based consecutive - old_to_new = {old: new for new, old in enumerate(node_subset)} - remapped_edges = np.array([[old_to_new[e[0]], old_to_new[e[1]]] - for e in subset_edges]) - + + # Start with a subgraph - sample nodes and extract subgraph + import networkx as nx + G = nx.from_scipy_sparse_array(adjacency) + node_subset = np.random.choice(list(G.nodes()), chunk_size, replace=False) + G_subset = G.subgraph(node_subset).copy() + G_subset = nx.convert_node_labels_to_integers(G_subset) + + # Get adjacency matrix of subset + subset_adjacency = nx.adjacency_matrix(G_subset, dtype=int) + # Embed subset - embedder = ge.GraphEmbedder(edges=remapped_edges, n_vertices=len(node_subset)) + embedder = ge.GraphEmbedder(adjacency=subset_adjacency) embedder.run_layout(num_iterations=100) - + # This is a simplified example - full implementation would # gradually add nodes and refine positions - return embedder.positions + return embedder.get_positions() Influence Maximization Applications ----------------------------------- @@ -173,23 +172,21 @@ Simulate information spread in social networks: .. code-block:: python import networkx as nx - - # Create a social network-like graph - social_edges = ge.generate_ws(n=1000, k=8, p=0.1) # Small-world - G = nx.Graph() - G.add_nodes_from(range(1000)) - G.add_edges_from(social_edges) - + + # Create a social network-like graph (returns sparse adjacency matrix) + adjacency = ge.generate_ws(n=1000, k=8, p=0.1, seed=42) # Small-world + G = nx.from_scipy_sparse_array(adjacency) + # Compare different seed selection strategies strategies = { 'Random': np.random.choice(1000, 20, replace=False).tolist(), 'High Degree': sorted(G.nodes(), key=G.degree, reverse=True)[:20], 'GraphEm': None, # Will compute below - 'Greedy': ge.greedy_seed_selection(G, k=20) + 'Greedy': ge.greedy_seed_selection(G, k=20, p=0.05) } - + # Compute GraphEm strategy - embedder = ge.GraphEmbedder(edges=social_edges, n_vertices=1000, n_components=2) + embedder = ge.GraphEmbedder(adjacency=adjacency, n_components=2) strategies['GraphEm'] = ge.graphem_seed_selection(embedder, k=20) # Simulate influence spread for each strategy @@ -207,12 +204,12 @@ Simulate information spread in social networks: if best_strategy == 'GraphEm': # We already have the embedding - pos = np.array(embedder.positions) + pos = embedder.get_positions() else: # Create embedding for visualization - embedder = ge.GraphEmbedder(edges=social_edges, n_vertices=1000, n_components=2) + embedder = ge.GraphEmbedder(adjacency=adjacency, n_components=2) embedder.run_layout(num_iterations=50) - pos = np.array(embedder.positions) + pos = embedder.get_positions() # Create visualization highlighting seed nodes import plotly.graph_objects as go @@ -246,39 +243,32 @@ Analyze how network structure affects influence spread: def analyze_network_robustness(generator, params, attack_strategies): """Analyze robustness under different attack strategies.""" - - # Generate base network - edges = generator(**params) - n_vertices = params['n'] - G = nx.Graph() - G.add_nodes_from(range(n_vertices)) - G.add_edges_from(edges) + + # Generate base network (returns sparse adjacency matrix) + adjacency = generator(**params) + G = nx.from_scipy_sparse_array(adjacency) results = {} for strategy_name, attack_function in attack_strategies.items(): # Remove nodes according to strategy - nodes_to_remove = attack_function(G, int(0.1 * n_vertices)) # Remove 10% + nodes_to_remove = attack_function(G, int(0.1 * G.number_of_nodes())) # Remove 10% G_attacked = G.copy() G_attacked.remove_nodes_from(nodes_to_remove) - + # Recompute largest connected component largest_cc = max(nx.connected_components(G_attacked), key=len) G_cc = G_attacked.subgraph(largest_cc).copy() - + G_cc = nx.convert_node_labels_to_integers(G_cc) + # Test influence spread in remaining network if len(G_cc) > 50: # Only if significant network remains - cc_edges = np.array(list(G_cc.edges())) - embedder = ge.GraphEmbedder(edges=cc_edges, n_vertices=len(G_cc)) - - # Remap node IDs - node_mapping = {old: new for new, old in enumerate(G_cc.nodes())} - remapped_edges = np.array([[node_mapping[e[0]], node_mapping[e[1]]] - for e in cc_edges]) - - embedder = ge.GraphEmbedder(edges=remapped_edges, n_vertices=len(G_cc)) + # Get adjacency matrix of remaining network + cc_adjacency = nx.adjacency_matrix(G_cc, dtype=int) + + embedder = ge.GraphEmbedder(adjacency=cc_adjacency, n_components=2) seeds = ge.graphem_seed_selection(embedder, k=min(10, len(G_cc)//10)) - + influence, _ = ge.ndlib_estimated_influence(G_cc, seeds, p=0.1) results[strategy_name] = { 'remaining_nodes': len(G_cc), @@ -305,9 +295,9 @@ Analyze how network structure affects influence spread: # Test on different network types network_types = [ - ('Scale-Free', ge.generate_ba, {'n': 500, 'm': 3}), - ('Small-World', ge.generate_ws, {'n': 500, 'k': 6, 'p': 0.1}), - ('Random', ge.erdos_renyi_graph, {'n': 500, 'p': 0.012}) + ('Scale-Free', ge.generate_ba, {'n': 500, 'm': 3, 'seed': 42}), + ('Small-World', ge.generate_ws, {'n': 500, 'k': 6, 'p': 0.1, 'seed': 42}), + ('Random', ge.generate_er, {'n': 500, 'p': 0.012, 'seed': 42}) ] for net_name, generator, params in network_types: @@ -332,10 +322,10 @@ Comparing Embedding-Based and Traditional Centralities # Generate different network types for comparison networks = [ - ('Erdős–Rényi', ge.erdos_renyi_graph, {'n': 300, 'p': 0.02}), - ('Scale-Free', ge.generate_ba, {'n': 300, 'm': 2}), - ('Small-World', ge.generate_ws, {'n': 300, 'k': 4, 'p': 0.1}), - ('Community', ge.generate_sbm, {'n_per_block': 100, 'num_blocks': 3, 'p_in': 0.1, 'p_out': 0.01}) + ('Erdős–Rényi', ge.generate_er, {'n': 300, 'p': 0.02, 'seed': 42}), + ('Scale-Free', ge.generate_ba, {'n': 300, 'm': 2, 'seed': 42}), + ('Small-World', ge.generate_ws, {'n': 300, 'k': 4, 'p': 0.1, 'seed': 42}), + ('Community', ge.generate_sbm, {'n_per_block': 100, 'num_blocks': 3, 'p_in': 0.1, 'p_out': 0.01, 'seed': 42}) ] correlation_results = {} @@ -382,20 +372,23 @@ Creating Domain-Specific Networks .. code-block:: python def generate_hierarchical_network(levels=3, branching=3, intra_level_prob=0.1): - """Generate a hierarchical network structure.""" + """Generate a hierarchical network structure, returns sparse adjacency matrix.""" + import networkx as nx + import scipy.sparse as sp + nodes_per_level = [branching ** i for i in range(levels)] total_nodes = sum(nodes_per_level) - - edges = [] - node_id = 0 + + # Create NetworkX graph for easier construction + G = nx.Graph() + G.add_nodes_from(range(total_nodes)) level_starts = [0] - + # Create hierarchical connections for level in range(levels - 1): level_start = level_starts[level] level_size = nodes_per_level[level] - next_level_size = nodes_per_level[level + 1] - + # Connect each node in current level to nodes in next level for i in range(level_size): current_node = level_start + i @@ -403,29 +396,28 @@ Creating Domain-Specific Networks start_next = level_starts[level] + level_size + i * branching for j in range(branching): if start_next + j < total_nodes: - edges.append([current_node, start_next + j]) - + G.add_edge(current_node, start_next + j) + level_starts.append(level_starts[-1] + level_size) - + # Add intra-level connections for level in range(levels): level_start = level_starts[level] level_size = nodes_per_level[level] - + for i in range(level_size): for j in range(i + 1, level_size): if np.random.random() < intra_level_prob: - edges.append([level_start + i, level_start + j]) - - return np.array(edges) + G.add_edge(level_start + i, level_start + j) + + return nx.adjacency_matrix(G, dtype=int) # Test the custom generator - hier_edges = generate_hierarchical_network(levels=4, branching=2, intra_level_prob=0.2) - + hier_adjacency = generate_hierarchical_network(levels=4, branching=2, intra_level_prob=0.2) + # Embed and visualize embedder = ge.GraphEmbedder( - edges=hier_edges, - n_vertices=hier_edges.max() + 1, + adjacency=hier_adjacency, n_components=2, L_min=3.0, k_attr=0.7, @@ -446,28 +438,27 @@ GPU Acceleration Tips # Check available devices print("Available devices:", jax.devices()) - + # For consistent GPU usage across runs - def setup_gpu_embedding(edges, n_vertices, device_id=0): + def setup_gpu_embedding(adjacency, device_id=0): """Setup embedder with specific GPU device.""" - + # Force specific device if multiple GPUs available if len(jax.devices('gpu')) > 1: device = jax.devices('gpu')[device_id] with jax.default_device(device): embedder = ge.GraphEmbedder( - edges=edges, - n_vertices=n_vertices, + adjacency=adjacency, batch_size=8192, # Automatically limited to n_vertices sample_size=1024 # Automatically limited to len(edges) ) return embedder else: - return ge.GraphEmbedder(edges=edges, n_vertices=n_vertices) + return ge.GraphEmbedder(adjacency=adjacency) # Example with large graph - large_edges = ge.generate_ba(n=20000, m=4) - embedder = setup_gpu_embedding(large_edges, 20000) + adjacency = ge.generate_ba(n=20000, m=4, seed=42) + embedder = setup_gpu_embedding(adjacency) # Time the embedding import time @@ -483,25 +474,25 @@ Profiling and Optimization .. code-block:: python # Profile memory usage and computation time - def profile_embedding(edges, n_vertices, iterations=50): + def profile_embedding(adjacency, iterations=50): """Profile embedding performance.""" import psutil import os - + process = psutil.Process(os.getpid()) - + # Memory before mem_before = process.memory_info().rss / 1024 / 1024 # MB - + # Time embedding start_time = time.time() - embedder = ge.GraphEmbedder(edges=edges, n_vertices=n_vertices) + embedder = ge.GraphEmbedder(adjacency=adjacency) embedder.run_layout(num_iterations=iterations) end_time = time.time() - + # Memory after mem_after = process.memory_info().rss / 1024 / 1024 # MB - + return { 'time': end_time - start_time, 'memory_used': mem_after - mem_before, @@ -511,8 +502,8 @@ Profiling and Optimization # Test different graph sizes sizes = [500, 1000, 2000, 5000] for n in sizes: - edges = ge.generate_ba(n=n, m=3) - stats = profile_embedding(edges, n, iterations=30) + adjacency = ge.generate_ba(n=n, m=3, seed=42) + stats = profile_embedding(adjacency, iterations=30) print(f"n={n:4d}: {stats['time']:5.2f}s, " f"{stats['memory_used']:6.1f}MB used, " f"{stats['final_memory']:6.1f}MB total") diff --git a/examples/graph_generator_example.py b/examples/graph_generator_example.py index 560887b..4535bba 100644 --- a/examples/graph_generator_example.py +++ b/examples/graph_generator_example.py @@ -11,7 +11,7 @@ from graphem.embedder import GraphEmbedder from graphem.generators import ( - erdos_renyi_graph, + generate_er, generate_random_regular, generate_scale_free, generate_geometric, @@ -23,10 +23,10 @@ ) -def test_graph_generator(generator, params, name, dim=3, num_iterations=30): +def test_graph_generator(generator, params, name, n_components=3, num_iterations=30): """ Test a graph generator function and visualize the resulting embedding. - + Parameters: generator: function Graph generator function @@ -34,7 +34,7 @@ def test_graph_generator(generator, params, name, dim=3, num_iterations=30): Parameters for the graph generator name: str Name of the graph type for display - dim: int + n_components: int Dimension of the embedding num_iterations: int Number of layout iterations @@ -42,44 +42,39 @@ def test_graph_generator(generator, params, name, dim=3, num_iterations=30): print(f"\n{'='*50}") print(f"Testing {name} graph") print(f"{'='*50}") - - # Generate graph - edges = generator(**params) - - # Determine number of vertices - if len(edges) > 0: - n = int(max(np.max(edges) + 1, params.get('n', 0))) - else: - n = params.get('n', 0) - - print(f"Generated graph with {n} vertices and {len(edges)} edges") - + + # Generate graph (returns sparse adjacency matrix) + adjacency = generator(**params) + + # Get graph properties + n = adjacency.shape[0] + num_edges = adjacency.nnz // 2 # Divide by 2 for undirected graphs + + print(f"Generated graph with {n} vertices and {num_edges} edges") + # Create NetworkX graph for visualization - G = nx.Graph() - G.add_nodes_from(range(n)) - G.add_edges_from(edges) - + G = nx.from_scipy_sparse_array(adjacency) + print("Graph statistics:") - print(f"- Density: {2 * len(edges) / (n * (n - 1)):.4f}") - print(f"- Average degree: {2 * len(edges) / n:.2f}") - + print(f"- Density: {2 * num_edges / (n * (n - 1)):.4f}") + print(f"- Average degree: {2 * num_edges / n:.2f}") + try: print(f"- Average shortest path length: {nx.average_shortest_path_length(G):.2f}") except nx.NetworkXError as e: print("- Average shortest path length: N/A") print(e) - + try: print(f"- Average clustering coefficient: {nx.average_clustering(G):.4f}") except nx.NetworkXError as e: print("- Average clustering coefficient: N/A") print(e) - + # Create and run embedder embedder = GraphEmbedder( - edges=edges, - n_vertices=n, - n_components=dim, + adjacency=adjacency, + n_components=n_components, L_min=10.0, k_attr=0.5, k_inter=0.1, @@ -88,14 +83,14 @@ def test_graph_generator(generator, params, name, dim=3, num_iterations=30): batch_size=1024, verbose=True ) - + print(f"Running layout for {num_iterations} iterations...") embedder.run_layout(num_iterations=num_iterations) - + # Display the graph print("Displaying graph layout...") embedder.display_layout(edge_width=1, node_size=5) - + return embedder @@ -109,56 +104,56 @@ def main(): {'n': 100, 'd': 3, 'seed': 42}, 'Random Regular' ) - + # Test Scale-Free Graph test_graph_generator( generate_scale_free, {'n': 100, 'seed': 42}, 'Scale-Free' ) - + # Test Random Geometric Graph test_graph_generator( generate_geometric, {'n': 100, 'radius': 0.15, 'seed': 42}, 'Random Geometric' ) - + # Test Caveman Graph test_graph_generator( generate_caveman, {'l': 5, 'k': 20}, 'Caveman' ) - + # Test Relaxed Caveman Graph test_graph_generator( generate_relaxed_caveman, {'l': 5, 'k': 20, 'p': 0.1, 'seed': 42}, 'Relaxed Caveman' ) - + # Test Erdős–Rényi Graph test_graph_generator( - erdos_renyi_graph, + generate_er, {'n': 100, 'p': 0.05, 'seed': 42}, 'Erdős–Rényi' ) - + # Test Watts-Strogatz Small-World Graph test_graph_generator( generate_ws, {'n': 100, 'k': 4, 'p': 0.1, 'seed': 42}, 'Watts-Strogatz Small-World' ) - + # Test Barabási-Albert Graph test_graph_generator( generate_ba, {'n': 100, 'm': 2, 'seed': 42}, 'Barabási-Albert' ) - + # Test Stochastic Block Model test_graph_generator( generate_sbm, diff --git a/examples/random_regular_example.py b/examples/random_regular_example.py index 1260e19..acb932f 100644 --- a/examples/random_regular_example.py +++ b/examples/random_regular_example.py @@ -16,7 +16,7 @@ from graphem.benchmark import run_benchmark from graphem.visualization import report_full_correlation_matrix from graphem.generators import ( - erdos_renyi_graph, + generate_er, generate_random_regular, generate_ws, generate_ba @@ -51,21 +51,20 @@ def test_random_regular_varying_degree(n=100, degrees=None, dim=3, num_iteration print(f"Random Regular Graph with degree d={d}") print(f"{'-'*25}") - # Generate graph + # Generate graph (returns sparse adjacency matrix) start_time = time.time() - edges = generate_random_regular(n=n, d=d, seed=42) + adjacency = generate_random_regular(n=n, d=d, seed=42) gen_time = time.time() - start_time - - print(f"Generated graph with {n} vertices, {len(edges)} edges in {gen_time:.2f}s") - + + num_edges = adjacency.nnz // 2 + print(f"Generated graph with {n} vertices, {num_edges} edges in {gen_time:.2f}s") + # Create NetworkX graph for analysis - G = nx.Graph() - G.add_nodes_from(range(n)) - G.add_edges_from(edges) - + G = nx.from_scipy_sparse_array(adjacency) + # Analyze graph properties - density = 2 * len(edges) / (n * (n - 1)) - avg_degree = 2 * len(edges) / n + density = 2 * num_edges / (n * (n - 1)) + avg_degree = 2 * num_edges / n print("Graph statistics:") print(f"- Density: {density:.4f}") @@ -98,8 +97,7 @@ def test_random_regular_varying_degree(n=100, degrees=None, dim=3, num_iteration # Create and run embedder embedder = GraphEmbedder( - edges=edges, - n_vertices=n, + adjacency=adjacency, n_components=dim, L_min=10.0, k_attr=0.5, @@ -133,7 +131,7 @@ def test_random_regular_varying_degree(n=100, degrees=None, dim=3, num_iteration }) # Calculate centrality measures - positions = np.array(embedder.positions) + positions = embedder.get_positions() radii = np.linalg.norm(positions, axis=1) degree_centrality = np.array([d for _, d in G.degree()]) @@ -214,21 +212,20 @@ def test_random_regular_varying_size(degree=3, sizes=None, dim=3, num_iterations print(f"Random Regular Graph with size n={n}") print(f"{'-'*25}") - # Generate graph + # Generate graph (returns sparse adjacency matrix) start_time = time.time() - edges = generate_random_regular(n=n, d=degree, seed=42) + adjacency = generate_random_regular(n=n, d=degree, seed=42) gen_time = time.time() - start_time - - print(f"Generated graph with {n} vertices, {len(edges)} edges in {gen_time:.2f}s") - + + num_edges = adjacency.nnz // 2 + print(f"Generated graph with {n} vertices, {num_edges} edges in {gen_time:.2f}s") + # Create NetworkX graph for analysis - G = nx.Graph() - G.add_nodes_from(range(n)) - G.add_edges_from(edges) - + G = nx.from_scipy_sparse_array(adjacency) + # Analyze graph properties - density = 2 * len(edges) / (n * (n - 1)) - avg_degree = 2 * len(edges) / n + density = 2 * num_edges / (n * (n - 1)) + avg_degree = 2 * num_edges / n print("Graph statistics:") print(f"- Density: {density:.4f}") @@ -260,8 +257,7 @@ def test_random_regular_varying_size(degree=3, sizes=None, dim=3, num_iterations # Create and run embedder embedder = GraphEmbedder( - edges=edges, - n_vertices=n, + adjacency=adjacency, n_components=dim, L_min=10.0, k_attr=0.5, @@ -339,7 +335,7 @@ def compare_with_benchmark(): graph_configs = [ (generate_random_regular, {'n': 100, 'd': 3, 'seed': 42}, 'Random Regular (d=3)'), (generate_random_regular, {'n': 100, 'd': 5, 'seed': 42}, 'Random Regular (d=5)'), - (erdos_renyi_graph, {'n': 100, 'p': 0.03, 'seed': 42}, 'Erdős–Rényi'), + (generate_er, {'n': 100, 'p': 0.03, 'seed': 42}, 'Erdős–Rényi'), (generate_ws, {'n': 100, 'k': 4, 'p': 0.1, 'seed': 42}, 'Watts-Strogatz'), (generate_ba, {'n': 100, 'm': 2, 'seed': 42}, 'Barabási-Albert') ] diff --git a/examples/real_world_datasets_example.py b/examples/real_world_datasets_example.py index c77605c..de189d4 100644 --- a/examples/real_world_datasets_example.py +++ b/examples/real_world_datasets_example.py @@ -78,38 +78,39 @@ def analyze_dataset(dataset_name, sample_size=None, dim=3, num_iterations=30): vertices, edges = load_dataset(dataset_name) n_vertices = len(vertices) load_time = time.time() - start_time - + print(f"Loaded dataset with {n_vertices:,} vertices and {len(edges):,} edges in {load_time:.2f}s") - + + # Convert edges to sparse adjacency matrix + import scipy.sparse as sp + rows = edges[:, 0] + cols = edges[:, 1] + data = np.ones(len(edges), dtype=int) + adjacency = sp.csr_matrix((data, (rows, cols)), shape=(n_vertices, n_vertices)) + adjacency = adjacency + adjacency.T # Make symmetric + # Sample the graph if needed if sample_size is not None and sample_size < n_vertices: print(f"Sampling {sample_size:,} vertices from the graph...") - sampled_vertices = np.random.choice(vertices, sample_size, replace=False) - - # Filter edges that contain sampled vertices - sampled_edges = [] - for u, v in edges: - if u in sampled_vertices and v in sampled_vertices: - sampled_edges.append((u, v)) - - vertices = sampled_vertices - edges = np.array(sampled_edges) + # Create NetworkX graph for sampling + G = nx.from_scipy_sparse_array(adjacency) + sampled_nodes = np.random.choice(list(G.nodes()), sample_size, replace=False) + G = G.subgraph(sampled_nodes).copy() + G = nx.convert_node_labels_to_integers(G) + + # Convert back to adjacency matrix + adjacency = nx.adjacency_matrix(G, dtype=int) n_vertices = sample_size - - print(f"Sampled graph has {n_vertices:,} vertices and {len(edges):,} edges") - - # Create NetworkX graph for analysis - G = nx.Graph() - G.add_nodes_from(vertices) - G.add_edges_from(edges) - G = nx.convert_node_labels_to_integers(G, - first_label=0, - ordering='default', - label_attribute=None) + + print(f"Sampled graph has {n_vertices:,} vertices and {adjacency.nnz // 2:,} edges") + else: + # Create NetworkX graph for analysis + G = nx.from_scipy_sparse_array(adjacency) # Analyze graph properties - density = 2 * len(edges) / (n_vertices * (n_vertices - 1)) - avg_degree = 2 * len(edges) / n_vertices + num_edges = adjacency.nnz // 2 + density = 2 * num_edges / (n_vertices * (n_vertices - 1)) + avg_degree = 2 * num_edges / n_vertices print("Graph statistics:") print(f"- Density: {density:.6f}") @@ -126,11 +127,12 @@ def analyze_dataset(dataset_name, sample_size=None, dim=3, num_iterations=30): if len(largest_cc) < n_vertices: print(f"Extracting largest connected component with {len(largest_cc):,} vertices...") G_cc = G.subgraph(largest_cc).copy() - + # Re-index nodes to be consecutive integers G_cc = nx.convert_node_labels_to_integers(G_cc) - - # Extract edges from the largest component + + # Extract adjacency matrix from the largest component + adjacency = nx.adjacency_matrix(G_cc, dtype=int) n_vertices = len(largest_cc) # Compute diameter if manageable @@ -161,15 +163,13 @@ def analyze_dataset(dataset_name, sample_size=None, dim=3, num_iterations=30): # Create and run embedder print(f"Creating embedding in dimension {dim}...") - # Create and run embedder embedder = GraphEmbedder( - edges=G_cc.edges, - n_vertices=G_cc.number_of_nodes(), + adjacency=adjacency, n_components=dim, L_min=4.0, k_attr=0.5, k_inter=0.1, - n_neighbors=min(15, G_cc.number_of_nodes() // 10), + n_neighbors=min(15, n_vertices // 10), sample_size=512, batch_size=1024, verbose=False @@ -183,9 +183,9 @@ def analyze_dataset(dataset_name, sample_size=None, dim=3, num_iterations=30): # Calculate centrality measures print("Calculating centrality measures...") - + # Get positions and calculate radial distances - positions = np.array(embedder.positions) + positions = embedder.get_positions() radii = np.linalg.norm(positions, axis=1) # Calculate centrality measures @@ -280,37 +280,34 @@ def compare_datasets(dataset_names, sample_size=1000, dim=3, num_iterations=30): vertices, edges = load_dataset(dataset_name) n_vertices = len(vertices) load_time = time.time() - start_time - + print(f"Loaded dataset with {n_vertices:,} vertices and {len(edges):,} edges in {load_time:.2f}s") - + + # Convert edges to sparse adjacency matrix + import scipy.sparse as sp + rows = edges[:, 0] + cols = edges[:, 1] + data = np.ones(len(edges), dtype=int) + adjacency = sp.csr_matrix((data, (rows, cols)), shape=(n_vertices, n_vertices)) + adjacency = adjacency + adjacency.T # Make symmetric + # Sample the graph print(f"Sampling {sample_size:,} vertices from the graph...") - sampled_vertices = np.random.choice(vertices, sample_size, replace=False) - - # Filter edges that contain sampled vertices - sampled_edges = [] - for u, v in edges: - if u in sampled_vertices and v in sampled_vertices: - sampled_edges.append((u, v)) - - vertices = sampled_vertices - edges = np.array(sampled_edges) + G = nx.from_scipy_sparse_array(adjacency) + sampled_nodes = np.random.choice(list(G.nodes()), sample_size, replace=False) + G = G.subgraph(sampled_nodes).copy() + G = nx.convert_node_labels_to_integers(G) + + # Convert back to adjacency matrix + adjacency = nx.adjacency_matrix(G, dtype=int) n_vertices = sample_size - - print(f"Sampled graph has {n_vertices:,} vertices and {len(edges):,} edges") - - # Create NetworkX graph for analysis - G = nx.Graph() - G.add_nodes_from(vertices) - G.add_edges_from(edges) - G = nx.convert_node_labels_to_integers(G, - first_label=0, - ordering='default', - label_attribute=None) - + + print(f"Sampled graph has {n_vertices:,} vertices and {adjacency.nnz // 2:,} edges") + # Analyze graph properties - density = 2 * len(edges) / (n_vertices * (n_vertices - 1)) - avg_degree = 2 * len(edges) / n_vertices + num_edges = adjacency.nnz // 2 + density = 2 * num_edges / (n_vertices * (n_vertices - 1)) + avg_degree = 2 * num_edges / n_vertices # Get largest connected component largest_cc = max(nx.connected_components(G), key=len) @@ -326,8 +323,8 @@ def compare_datasets(dataset_names, sample_size=1000, dim=3, num_iterations=30): # Re-index nodes to be consecutive integers G_cc = nx.convert_node_labels_to_integers(G_cc) - # Extract edges from the largest component - edges = G_cc.edges + # Extract adjacency matrix from the largest component + adjacency = nx.adjacency_matrix(G_cc, dtype=int) n_vertices = len(largest_cc) # Compute average shortest path length if manageable @@ -350,8 +347,7 @@ def compare_datasets(dataset_names, sample_size=1000, dim=3, num_iterations=30): # Create and run embedder embedder = GraphEmbedder( - edges=edges, - n_vertices=n_vertices, + adjacency=adjacency, n_components=dim, L_min=4.0, k_attr=0.5, @@ -370,7 +366,7 @@ def compare_datasets(dataset_names, sample_size=1000, dim=3, num_iterations=30): results.append({ 'dataset': dataset_name, 'vertices': n_vertices, - 'edges': len(edges), + 'edges': num_edges, 'density': density, 'avg_degree': avg_degree, 'lcc_size': lcc_size, diff --git a/graphem/__init__.py b/graphem/__init__.py index bc38c09..a3d2787 100644 --- a/graphem/__init__.py +++ b/graphem/__init__.py @@ -7,7 +7,7 @@ from graphem.influence import graphem_seed_selection, ndlib_estimated_influence, greedy_seed_selection from graphem.datasets import load_dataset from graphem.generators import ( - erdos_renyi_graph, + generate_er, generate_sbm, generate_ba, generate_ws, @@ -19,7 +19,10 @@ generate_power_cluster, generate_random_regular, generate_bipartite_graph, - generate_relaxed_caveman + generate_complete_bipartite_graph, + generate_delaunay_triangulation, + generate_relaxed_caveman, + compute_vertex_degrees ) from graphem.visualization import ( report_corr, diff --git a/graphem/benchmark.py b/graphem/benchmark.py index b8b4716..8bfde02 100644 --- a/graphem/benchmark.py +++ b/graphem/benchmark.py @@ -36,23 +36,18 @@ def run_benchmark(graph_generator, graph_params, n_components=3, L_min=10.0, k_a """ logger.info(f"Running benchmark with {graph_generator.__name__}...") - # Generate the graph + # Generate the graph (returns sparse adjacency matrix) start_time = time.time() - edges = graph_generator(**graph_params) - + adjacency = graph_generator(**graph_params) + # Count vertices and edges - if len(edges) > 0: - n = max(np.max(edges) + 1, graph_params.get('n', 0)) - else: - n = graph_params.get('n', 0) - m = len(edges) - + n = adjacency.shape[0] + m = adjacency.nnz // 2 + logger.info(f"Generated graph with {n} vertices and {m} edges") - + # Convert to NetworkX graph for centrality calculations - nx_graph = nx.Graph() - nx_graph.add_nodes_from(range(n)) - nx_graph.add_edges_from(edges) + nx_graph = nx.from_scipy_sparse_array(adjacency) # Calculate centrality measures logger.info("Calculating centrality measures...") @@ -94,8 +89,7 @@ def run_benchmark(graph_generator, graph_params, n_components=3, L_min=10.0, k_a # Create embedder logger.info("Creating embedder...") embedder = GraphEmbedder( - edges=edges, - n_vertices=n, + adjacency=adjacency, n_components=n_components, L_min=L_min, k_attr=k_attr, @@ -113,7 +107,7 @@ def run_benchmark(graph_generator, graph_params, n_components=3, L_min=10.0, k_a layout_time = time.time() - layout_start # Get positions and calculate radial distances - positions = np.array(embedder.positions) + positions = embedder.get_positions() radii = np.linalg.norm(positions, axis=1) # Return benchmark data @@ -233,23 +227,18 @@ def run_influence_benchmark(graph_generator, graph_params, k=10, p=0.1, iteratio """ logger.info(f"Running influence benchmark with {graph_generator.__name__}...") - # Generate the graph + # Generate the graph (returns sparse adjacency matrix) start_time = time.time() - edges = graph_generator(**graph_params) - + adjacency = graph_generator(**graph_params) + # Count vertices and edges - if len(edges) > 0: - n = max(np.max(edges) + 1, graph_params.get('n', 0)) - else: - n = graph_params.get('n', 0) - m = len(edges) - + n = adjacency.shape[0] + m = adjacency.nnz // 2 + logger.info(f"Generated graph with {n} vertices and {m} edges") - + # Convert to NetworkX graph - nx_graph = nx.Graph() - nx_graph.add_nodes_from(range(n)) - nx_graph.add_edges_from(edges) + nx_graph = nx.from_scipy_sparse_array(adjacency) # Default layout parameters if layout_params is None: @@ -265,8 +254,7 @@ def run_influence_benchmark(graph_generator, graph_params, k=10, p=0.1, iteratio # Create embedder logger.info("Creating embedder...") embedder = GraphEmbedder( - edges=edges, - n_vertices=n, + adjacency=adjacency, n_components=n_components, **layout_params, verbose=True diff --git a/graphem/embedder.py b/graphem/embedder.py index 657511b..87f9f2f 100644 --- a/graphem/embedder.py +++ b/graphem/embedder.py @@ -23,8 +23,8 @@ class GraphEmbedder: A class for embedding graphs using the Laplacian embedding. Attributes: - edges: np.ndarray of shape (num_edges, 2) - Array of edge pairs (i, j) with i < j. + adjacency: scipy.sparse matrix + Sparse adjacency matrix of the graph. n: int Number of vertices in the graph. n_components: int @@ -41,18 +41,16 @@ class GraphEmbedder: Number of samples for kNN search (automatically limited to number of edges). batch_size: int Batch size for kNN search (automatically limited to number of vertices). - my_logger: loguru.logger + logger_instance: loguru.logger Logger object to use for logging. """ - def __init__(self, edges, n_vertices, n_components=2, L_min=1.0, k_attr=0.2, k_inter=0.5, n_neighbors=10, sample_size=256, batch_size=1024, my_logger=None, verbose=True): + def __init__(self, adjacency, n_components=2, L_min=1.0, k_attr=0.2, k_inter=0.5, n_neighbors=10, sample_size=256, batch_size=1024, logger_instance=None, verbose=True, seed=None): """ Initialize the GraphEmbedder. Parameters: - edges: array-like of shape (num_edges, 2) - Array of edge pairs (i, j). - n_vertices: int - Number of vertices in the graph. + adjacency: scipy.sparse matrix + Sparse adjacency matrix of the graph. n_components: int, default=2 Dimension of the embedding. L_min: float, default=1.0 @@ -64,45 +62,81 @@ def __init__(self, edges, n_vertices, n_components=2, L_min=1.0, k_attr=0.2, k_i n_neighbors: int, default=10 Number of nearest neighbors to consider. sample_size: int, default=256 - Number of samples for kNN search. Automatically limited to min(sample_size, len(edges)). + Number of samples for kNN search. batch_size: int, default=1024 - Batch size for kNN search. Automatically limited to min(batch_size, n_vertices). - my_logger: loguru.logger, optional + Batch size for kNN search. + logger_instance: loguru.logger, optional Logger object to use for logging. verbose: bool, default=True Whether to display progress information. + seed: int, optional + Random seed for reproducibility. """ + # Validate and convert adjacency matrix to csr_matrix + if sp.issparse(adjacency): + # Convert to csr_matrix (not csr_array) for compatibility + adjacency = sp.csr_matrix(adjacency) + elif isinstance(adjacency, np.ndarray): + adjacency = sp.csr_matrix(adjacency) + else: + adjacency = sp.csr_matrix(np.asarray(adjacency)) + + # Check if square + if adjacency.shape[0] != adjacency.shape[1]: + raise ValueError(f"Adjacency matrix must be square, got shape {adjacency.shape}") + + # Check for empty graph + if adjacency.shape[0] == 0: + raise ValueError("Adjacency matrix cannot be empty") + + self.adjacency = adjacency + self.n = adjacency.shape[0] + + # Extract edges from adjacency matrix for JAX operations + edges_coo = sp.triu(self.adjacency, k=1).tocoo() + edges = np.column_stack([edges_coo.row, edges_coo.col]) self.edges = jnp.array(edges) - self.n = n_vertices + self.n_components = n_components self.L_min = L_min self.k_attr = k_attr self.k_inter = k_inter self.n_neighbors = n_neighbors self.sample_size = min(sample_size, len(edges)) - self.batch_size = min(batch_size, n_vertices) - if my_logger is None: + self.batch_size = min(batch_size, self.n) + self.seed = seed + + if logger_instance is None: logger.remove() sink = sys.stdout if verbose else open(os.devnull, 'w', encoding='utf-8') logger.add(sink, level="INFO") self.logger = logger - """ System logger """ else: - self.logger = my_logger + self.logger = logger_instance self.logger.info("Logger initialized") - self.positions = self._laplacian_embedding() + + self._positions = self._laplacian_embedding() + + @property + def positions(self): + """Get the current positions of vertices.""" + return self._positions + + def get_positions(self): + """ + Get the current positions of vertices. + + Returns: + np.ndarray: Array of shape (n, n_components) with vertex positions. + """ + return np.array(self._positions) def _laplacian_embedding(self): """ Compute the Laplacian embedding of the graph. """ self.logger.info("Computing Laplacian embedding") - edges_np = np.array(self.edges) - row = edges_np[:, 0] - col = edges_np[:, 1] - data = np.ones(len(edges_np)) - A = sp.csr_matrix((data, (row, col)), shape=(self.n, self.n)) - L = laplacian(A + A.transpose(), normed=True) + L = laplacian(self.adjacency, normed=True) k = self.n_components + 1 _, eigenvectors = spla.eigsh(L, k, which='SM') lap_embedding = eigenvectors[:, 1:k] @@ -220,13 +254,13 @@ def update_positions(self): Update the positions of the vertices based on the spring forces and intersection forces. """ self.logger.info("Updating positions") - spring_forces = self.compute_spring_forces(self.positions, self.edges, self.L_min, self.k_attr) - midpoints = (self.positions[self.edges[:, 0]] + self.positions[self.edges[:, 1]]) / 2.0 + spring_forces = self.compute_spring_forces(self._positions, self.edges, self.L_min, self.k_attr) + midpoints = (self._positions[self.edges[:, 0]] + self._positions[self.edges[:, 1]]) / 2.0 knn_idx, sampled_indices = self.locate_knn_midpoints(midpoints, self.n_neighbors) - inter_forces = self.compute_intersection_forces_with_knn_index(self.positions, self.edges, knn_idx, sampled_indices, self.k_inter) + inter_forces = self.compute_intersection_forces_with_knn_index(self._positions, self.edges, knn_idx, sampled_indices, self.k_inter) forces = spring_forces + inter_forces - new_positions = self.positions + forces - self.positions = (new_positions - jnp.mean(new_positions, axis=0)) / (jnp.std(new_positions, axis=0) + 1e-6) + new_positions = self._positions + forces + self._positions = (new_positions - jnp.mean(new_positions, axis=0)) / (jnp.std(new_positions, axis=0) + 1e-6) self.logger.info("Positions updated") def run_layout(self, num_iterations=100): @@ -236,7 +270,7 @@ def run_layout(self, num_iterations=100): self.logger.info("Running layout") for _ in tqdm(range(num_iterations)): self.update_positions() - return self.positions + return self.get_positions() def display_layout(self, edge_width=1, node_size=3, node_colors=None): """ @@ -284,7 +318,7 @@ def _display_layout_2d(self, edge_width=1, node_size=3, node_colors=None): None Displays a Plotly 2D figure with vertices plotted as red markers and edges as gray lines. """ - pos = np.array(self.positions) + pos = np.array(self._positions) edges = np.array(self.edges) x_edges = [] @@ -343,7 +377,7 @@ def _display_layout_3d(self, edge_width=1, node_size=3, node_colors=None): None Displays a Plotly 3D figure with vertices plotted as red markers and edges as gray lines. """ - pos = np.array(self.positions) + pos = np.array(self._positions) edges = np.array(self.edges) x_edges, y_edges, z_edges = [], [], [] diff --git a/graphem/generators.py b/graphem/generators.py index 2057793..4712ed0 100644 --- a/graphem/generators.py +++ b/graphem/generators.py @@ -2,13 +2,34 @@ Graph generators for Graphem. This module provides various functions to generate different types of graphs. +All generators return scipy sparse CSR matrices representing the adjacency matrix. """ import numpy as np import networkx as nx +import scipy.sparse as sp -def erdos_renyi_graph(n, p, seed=0): +def _nx_to_sparse_adjacency(G): + """Convert NetworkX graph to sparse adjacency matrix.""" + return nx.adjacency_matrix(G, dtype=int) + + +def _edges_to_sparse_adjacency(edges, n): + """Convert edge list to sparse adjacency matrix.""" + if len(edges) == 0: + return sp.csr_matrix((n, n), dtype=int) + + # Create undirected adjacency matrix from edges + rows = np.concatenate([edges[:, 0], edges[:, 1]]) + cols = np.concatenate([edges[:, 1], edges[:, 0]]) + data = np.ones(len(rows), dtype=int) + + adj = sp.csr_matrix((data, (rows, cols)), shape=(n, n)) + return adj + + +def generate_er(n, p, seed=0): """ Generate a random undirected graph using the Erdős–Rényi G(n, p) model. @@ -21,46 +42,31 @@ def erdos_renyi_graph(n, p, seed=0): Random seed for reproducibility. Returns: - edges: np.ndarray of shape (num_edges, 2) - Array of edge pairs (i, j) with i < j. + adjacency: scipy.sparse.csr_matrix + Sparse adjacency matrix (n × n). """ - # Set the random seed - np.random.seed(seed) - - # Generate a random adjacency matrix - adj_matrix = np.random.random((n, n)) < p - - # Make symmetric and zero out the diagonal - adj_matrix = np.triu(adj_matrix, k=1) - - # Get the edges - edges = np.array(list(zip(*np.where(adj_matrix)))) - - return edges + G = nx.erdos_renyi_graph(n, p, seed=seed) + return _nx_to_sparse_adjacency(G) -def compute_vertex_degrees(n, edges): +def compute_vertex_degrees(adjacency): """ - Compute the degree of each vertex from the edge list. + Compute the degree of each vertex from the adjacency matrix. Parameters: - n: number of vertices - edges: array of shape (num_edges, 2) + adjacency: scipy.sparse matrix + Sparse adjacency matrix Returns: degrees: np.array of shape (n,) with degree of each vertex """ - degrees = np.zeros(n) - for i, j in edges: - degrees[i] += 1 - degrees[j] += 1 - return degrees + return np.array(adjacency.sum(axis=1)).flatten() def generate_sbm(n_per_block=75, num_blocks=4, p_in=0.15, p_out=0.01, labels=False, seed=0): """ Generate a stochastic block model graph. - + Parameters: n_per_block: int Number of vertices per block. @@ -74,39 +80,33 @@ def generate_sbm(n_per_block=75, num_blocks=4, p_in=0.15, p_out=0.01, labels=Fal If True, return vertex labels. seed: int Random seed for reproducibility. - + Returns: - edges: np.ndarray of shape (num_edges, 2) - Array of edge pairs (i, j) with i < j. + adjacency: scipy.sparse.csr_matrix + Sparse adjacency matrix (n × n). labels: np.ndarray of shape (n,) (only if labels=True) Block labels for each vertex. """ - # Use NetworkX to generate the SBM sizes = [n_per_block] * num_blocks p_matrix = np.ones((num_blocks, num_blocks)) * p_out np.fill_diagonal(p_matrix, p_in) - - # Generate the graph + np.random.seed(seed) G = nx.stochastic_block_model(sizes, p_matrix, seed=seed) - - # Extract edges and ensure i < j - edges = np.array(list(G.edges())) - edges = np.sort(edges, axis=1) - - # Return labels if requested + + adjacency = _nx_to_sparse_adjacency(G) + if labels: - # Generate labels (block IDs for each vertex) vertex_labels = np.repeat(np.arange(num_blocks), n_per_block) - return edges, vertex_labels + return adjacency, vertex_labels - return edges + return adjacency def generate_ba(n=300, m=3, seed=0): """ Generate a Barabási-Albert preferential attachment graph. - + Parameters: n: int Number of vertices. @@ -114,21 +114,19 @@ def generate_ba(n=300, m=3, seed=0): Number of edges to attach from a new vertex to existing vertices. seed: int Random seed for reproducibility. - + Returns: - edges: np.ndarray of shape (num_edges, 2) - Array of edge pairs (i, j) with i < j. + adjacency: scipy.sparse.csr_matrix + Sparse adjacency matrix (n × n). """ G = nx.barabasi_albert_graph(n, m, seed=seed) - edges = np.array(list(G.edges())) - edges = np.sort(edges, axis=1) - return edges + return _nx_to_sparse_adjacency(G) def generate_ws(n=1000, k=6, p=0.3, seed=0): """ Generate a Watts-Strogatz small-world graph. - + Parameters: n: int Number of vertices. @@ -138,21 +136,19 @@ def generate_ws(n=1000, k=6, p=0.3, seed=0): Probability of rewiring each edge. seed: int Random seed for reproducibility. - + Returns: - edges: np.ndarray of shape (num_edges, 2) - Array of edge pairs (i, j) with i < j. + adjacency: scipy.sparse.csr_matrix + Sparse adjacency matrix (n × n). """ G = nx.watts_strogatz_graph(n, k, p, seed=seed) - edges = np.array(list(G.edges())) - edges = np.sort(edges, axis=1) - return edges + return _nx_to_sparse_adjacency(G) def generate_power_cluster(n=1000, m=3, p=0.5, seed=0): """ Generate a powerlaw cluster graph. - + Parameters: n: int Number of vertices. @@ -162,88 +158,136 @@ def generate_power_cluster(n=1000, m=3, p=0.5, seed=0): Probability of adding a triangle after adding a random edge. seed: int Random seed for reproducibility. - + Returns: - edges: np.ndarray of shape (num_edges, 2) - Array of edge pairs (i, j) with i < j. + adjacency: scipy.sparse.csr_matrix + Sparse adjacency matrix (n × n). """ G = nx.powerlaw_cluster_graph(n, m, p, seed=seed) - edges = np.array(list(G.edges())) - edges = np.sort(edges, axis=1) - return edges + return _nx_to_sparse_adjacency(G) def generate_road_network(width=30, height=30): """ Generate a 2D grid graph representing a road network. - + Parameters: width: int Width of the grid. height: int Height of the grid. - + Returns: - edges: np.ndarray of shape (num_edges, 2) - Array of edge pairs (i, j) with i < j. + adjacency: scipy.sparse.csr_matrix + Sparse adjacency matrix (n × n). """ G = nx.grid_2d_graph(width, height) - - # Convert node labels from (x,y) tuples to integers mapping = {node: i for i, node in enumerate(G.nodes())} G = nx.relabel_nodes(G, mapping) - - edges = np.array(list(G.edges())) - edges = np.sort(edges, axis=1) - return edges + return _nx_to_sparse_adjacency(G) -def generate_bipartite_graph(n_top=50, n_bottom=100): +def generate_bipartite_graph(n_top=50, n_bottom=100, p=0.1, seed=0): """ Generate a random bipartite graph. - + + Parameters: + n_top: int + Number of vertices in the top set. + n_bottom: int + Number of vertices in the bottom set. + p: float + Probability of edge between any pair of vertices from different sets. + seed: int + Random seed for reproducibility. + + Returns: + adjacency: scipy.sparse.csr_matrix + Sparse adjacency matrix (n × n). + """ + G = nx.bipartite.random_graph(n_top, n_bottom, p, seed=seed) + return _nx_to_sparse_adjacency(G) + + +def generate_complete_bipartite_graph(n_top=50, n_bottom=100): + """ + Generate a complete bipartite graph. + + In a complete bipartite graph, every vertex in the top set is connected + to every vertex in the bottom set. + Parameters: n_top: int Number of vertices in the top set. n_bottom: int Number of vertices in the bottom set. - + + Returns: + adjacency: scipy.sparse.csr_matrix + Sparse adjacency matrix (n × n). + """ + n_top, n_bottom = int(n_top), int(n_bottom) + G = nx.complete_bipartite_graph(n_top, n_bottom) + return _nx_to_sparse_adjacency(G) + + +def generate_delaunay_triangulation(n=100, seed=0): + """ + Generate a Delaunay triangulation graph. + + Vertices are randomly placed in a 2D unit square, and edges are created + based on the Delaunay triangulation of these points. The resulting graph + has planar structure with triangular faces. + + Parameters: + n: int + Number of vertices. + seed: int + Random seed for reproducibility. + Returns: - edges: np.ndarray of shape (num_edges, 2) - Array of edge pairs (i, j) with i < j. + adjacency: scipy.sparse.csr_matrix + Sparse adjacency matrix (n × n). """ - G = nx.bipartite.random_graph(n_top, n_bottom, 0.1) - - # Ensure all edges have i < j - edges = np.array(list(G.edges())) - edges = np.sort(edges, axis=1) - return edges + from scipy.spatial import Delaunay + + rng = np.random.RandomState(seed) + pts = rng.rand(n, 2) + + tri = Delaunay(pts) + + G = nx.Graph() + G.add_nodes_from(range(n)) + + for simplex in tri.simplices: + i, j, k = simplex + G.add_edges_from([(i, j), (j, k), (k, i)]) + + return _nx_to_sparse_adjacency(G) def generate_balanced_tree(r=2, h=10): """ Generate a balanced r-ary tree of height h. - + Parameters: r: int Branching factor of the tree. h: int Height of the tree. - + Returns: - edges: np.ndarray of shape (num_edges, 2) - Array of edge pairs (i, j) with i < j. + adjacency: scipy.sparse.csr_matrix + Sparse adjacency matrix (n × n). """ G = nx.balanced_tree(r, h) - edges = np.array(list(G.edges())) - edges = np.sort(edges, axis=1) - return edges + return _nx_to_sparse_adjacency(G) def generate_random_regular(n=100, d=3, seed=0): """ Generate a random regular graph where each node has degree d. - + Parameters: n: int Number of vertices. @@ -251,21 +295,19 @@ def generate_random_regular(n=100, d=3, seed=0): Degree of each vertex. seed: int Random seed for reproducibility. - + Returns: - edges: np.ndarray of shape (num_edges, 2) - Array of edge pairs (i, j) with i < j. + adjacency: scipy.sparse.csr_matrix + Sparse adjacency matrix (n × n). """ G = nx.random_regular_graph(d, n, seed=seed) - edges = np.array(list(G.edges())) - edges = np.sort(edges, axis=1) - return edges + return _nx_to_sparse_adjacency(G) def generate_scale_free(n=100, alpha=0.41, beta=0.54, gamma=0.05, delta_in=0.2, delta_out=0, seed=0): """ Generate a scale-free graph using Holme and Kim algorithm. - + Parameters: n: int Number of vertices. @@ -273,26 +315,21 @@ def generate_scale_free(n=100, alpha=0.41, beta=0.54, gamma=0.05, delta_in=0.2, Parameters for the scale-free graph generation. seed: int Random seed for reproducibility. - + Returns: - edges: np.ndarray of shape (num_edges, 2) - Array of edge pairs (i, j) with i < j. + adjacency: scipy.sparse.csr_matrix + Sparse adjacency matrix (n × n). """ G = nx.scale_free_graph(n, alpha, beta, gamma, delta_in, delta_out, seed=seed) - # Convert to undirected graph by dropping edge directions G = G.to_undirected() - # Remove self-loops G.remove_edges_from(nx.selfloop_edges(G)) - - edges = np.array(list(G.edges())) - edges = np.sort(edges, axis=1) - return edges + return _nx_to_sparse_adjacency(G) def generate_geometric(n=100, radius=0.2, dim=2, seed=0): """ Generate a random geometric graph in a unit cube. - + Parameters: n: int Number of vertices. @@ -302,42 +339,38 @@ def generate_geometric(n=100, radius=0.2, dim=2, seed=0): Dimension of the space. seed: int Random seed for reproducibility. - + Returns: - edges: np.ndarray of shape (num_edges, 2) - Array of edge pairs (i, j) with i < j. + adjacency: scipy.sparse.csr_matrix + Sparse adjacency matrix (n × n). """ G = nx.random_geometric_graph(n, radius, dim=dim, seed=seed) - edges = np.array(list(G.edges())) - edges = np.sort(edges, axis=1) - return edges + return _nx_to_sparse_adjacency(G) def generate_caveman(l=10, k=10): """ Generate a caveman graph with l cliques of size k. - + Parameters: l: int Number of cliques. k: int Size of each clique. - + Returns: - edges: np.ndarray of shape (num_edges, 2) - Array of edge pairs (i, j) with i < j. + adjacency: scipy.sparse.csr_matrix + Sparse adjacency matrix (n × n). """ G = nx.caveman_graph(l, k) - edges = np.array(list(G.edges())) - edges = np.sort(edges, axis=1) - return edges + return _nx_to_sparse_adjacency(G) def generate_relaxed_caveman(l=10, k=10, p=0.1, seed=0): """ Generate a relaxed caveman graph with l cliques of size k, and a rewiring probability p. - + Parameters: l: int Number of cliques. @@ -347,13 +380,11 @@ def generate_relaxed_caveman(l=10, k=10, p=0.1, seed=0): Rewiring probability. seed: int Random seed for reproducibility. - + Returns: - edges: np.ndarray of shape (num_edges, 2) - Array of edge pairs (i, j) with i < j. + adjacency: scipy.sparse.csr_matrix + Sparse adjacency matrix (n × n). """ np.random.seed(seed) G = nx.relaxed_caveman_graph(l, k, p) - edges = np.array(list(G.edges())) - edges = np.sort(edges, axis=1) - return edges + return _nx_to_sparse_adjacency(G) diff --git a/graphem/influence.py b/graphem/influence.py index b0a7645..0403a9b 100644 --- a/graphem/influence.py +++ b/graphem/influence.py @@ -26,9 +26,9 @@ def graphem_seed_selection(embedder, k, num_iterations=20): """ # Run the layout algorithm embedder.run_layout(num_iterations=num_iterations) - + # Compute radial distances from the origin (0, 0, 0) - positions = np.array(embedder.positions) + positions = embedder.get_positions() radial_distances = np.linalg.norm(positions, axis=1) # Select the k nodes with the highest radial distances diff --git a/tests/test_embedder.py b/tests/test_embedder.py index 2b6b595..bdcb829 100644 --- a/tests/test_embedder.py +++ b/tests/test_embedder.py @@ -2,8 +2,9 @@ import pytest import numpy as np +import scipy.sparse as sp from graphem.embedder import GraphEmbedder -from graphem.generators import erdos_renyi_graph, generate_random_regular +from graphem.generators import generate_er, generate_random_regular class TestEmbedder: @@ -11,16 +12,15 @@ class TestEmbedder: def test_embedder_initialization(self): """Test embedder initialization.""" - edges = generate_random_regular(n=50, d=4, seed=42) - + adj = generate_random_regular(n=50, d=4, seed=42) + embedder = GraphEmbedder( - edges=edges, - n_vertices=50, + adjacency=adj, n_components=2, sample_size=256, verbose=False ) - + assert embedder.n == 50 assert embedder.n_components == 2 assert embedder.positions.shape == (50, 2) @@ -28,91 +28,165 @@ def test_embedder_initialization(self): def test_embedder_dimensions(self): """Test embedder with different dimensions.""" - edges = generate_random_regular(n=40, d=4, seed=42) - + adj = generate_random_regular(n=40, d=4, seed=42) + for dim in [2, 3, 4]: embedder = GraphEmbedder( - edges=edges, - n_vertices=40, + adjacency=adj, n_components=dim, sample_size=200, verbose=False ) - + assert embedder.n_components == dim assert embedder.positions.shape == (40, dim) def test_layout_execution(self): """Test layout algorithm execution.""" - edges = generate_random_regular(n=40, d=4, seed=42) - + adj = generate_random_regular(n=40, d=4, seed=42) + embedder = GraphEmbedder( - edges=edges, - n_vertices=40, + adjacency=adj, n_components=2, sample_size=128, n_neighbors=10, verbose=False ) - - initial_positions = embedder.positions.copy() - embedder.run_layout(num_iterations=3) - - assert not np.array_equal(initial_positions, embedder.positions) - assert embedder.positions.shape == (40, 2) - assert np.all(np.isfinite(embedder.positions)) + + initial_positions = embedder.get_positions().copy() + final_positions = embedder.run_layout(num_iterations=3) + + assert not np.array_equal(initial_positions, final_positions) + assert final_positions.shape == (40, 2) + assert np.all(np.isfinite(final_positions)) def test_disconnected_graph(self): """Test embedder with disconnected graph.""" # Create two disconnected triangles - edges = np.array([ - [0, 1], [1, 2], [2, 0], # Triangle 1 - [3, 4], [4, 5], [5, 3] # Triangle 2 - ]) - + n = 6 + adj = sp.csr_matrix((n, n), dtype=int) + # Triangle 1: vertices 0, 1, 2 + adj[0, 1] = adj[1, 0] = 1 + adj[1, 2] = adj[2, 1] = 1 + adj[2, 0] = adj[0, 2] = 1 + # Triangle 2: vertices 3, 4, 5 + adj[3, 4] = adj[4, 3] = 1 + adj[4, 5] = adj[5, 4] = 1 + adj[5, 3] = adj[3, 5] = 1 + embedder = GraphEmbedder( - edges=edges, - n_vertices=6, + adjacency=adj, n_components=2, sample_size=6, verbose=False ) - + embedder.run_layout(num_iterations=2) assert embedder.positions.shape == (6, 2) def test_layout_stability(self): """Test that layout runs are numerically stable.""" - edges = generate_random_regular(n=30, d=4, seed=42) - + adj = generate_random_regular(n=30, d=4, seed=42) + embedder = GraphEmbedder( - edges=edges, - n_vertices=30, + adjacency=adj, n_components=2, sample_size=64, verbose=False ) - + for _ in range(3): embedder.run_layout(num_iterations=2) - + assert np.all(np.isfinite(embedder.positions)) - + max_coord = np.max(np.abs(embedder.positions)) assert max_coord < 1000 # Reasonable bound def test_large_graphs(self): """Test embedder with large graphs.""" - edges = erdos_renyi_graph(n=200, p=0.02, seed=42) - + adj = generate_er(n=200, p=0.02, seed=42) + embedder = GraphEmbedder( - edges=edges, - n_vertices=200, + adjacency=adj, n_components=2, sample_size=512, batch_size=1024, verbose=False ) - + assert embedder.positions.shape == (200, 2) - assert np.all(np.isfinite(embedder.positions)) \ No newline at end of file + assert np.all(np.isfinite(embedder.positions)) + + def test_get_positions_method(self): + """Test get_positions() returns numpy array.""" + adj = generate_random_regular(n=20, d=3, seed=42) + + embedder = GraphEmbedder( + adjacency=adj, + n_components=2, + verbose=False + ) + + positions = embedder.get_positions() + assert isinstance(positions, np.ndarray) + assert positions.shape == (20, 2) + assert np.all(np.isfinite(positions)) + + def test_adjacency_validation(self): + """Test adjacency matrix validation.""" + # Test with non-square matrix + with pytest.raises(ValueError, match="must be square"): + adj = sp.csr_matrix((5, 3), dtype=int) + GraphEmbedder(adjacency=adj, n_components=2, verbose=False) + + # Test with empty matrix + with pytest.raises(ValueError, match="cannot be empty"): + adj = sp.csr_matrix((0, 0), dtype=int) + GraphEmbedder(adjacency=adj, n_components=2, verbose=False) + + def test_seed_parameter(self): + """Test seed parameter for reproducibility.""" + adj = generate_random_regular(n=30, d=4, seed=42) + + # Create two embedders with same seed + embedder1 = GraphEmbedder( + adjacency=adj, + n_components=2, + seed=123, + verbose=False + ) + + embedder2 = GraphEmbedder( + adjacency=adj, + n_components=2, + seed=123, + verbose=False + ) + + pos1 = embedder1.get_positions() + pos2 = embedder2.get_positions() + + # Positions should have same structure (eigenvectors can have sign flip) + # Test that positions are the same up to sign and permutation + assert pos1.shape == pos2.shape + + # Check that the positions are close (accounting for potential sign flip) + assert np.allclose(pos1, pos2) or np.allclose(pos1, -pos2) or \ + np.allclose(pos1[:, 0], pos2[:, 0]) or np.allclose(pos1[:, 1], pos2[:, 1]) + + def test_logger_instance_parameter(self): + """Test custom logger instance parameter.""" + import logging + + custom_logger = logging.getLogger("test_logger") + adj = generate_random_regular(n=20, d=3, seed=42) + + embedder = GraphEmbedder( + adjacency=adj, + n_components=2, + logger_instance=custom_logger, + verbose=False + ) + + assert embedder.logger == custom_logger diff --git a/tests/test_generators.py b/tests/test_generators.py index 609ea62..c862221 100644 --- a/tests/test_generators.py +++ b/tests/test_generators.py @@ -3,8 +3,9 @@ import pytest import numpy as np import networkx as nx +import scipy.sparse as sp from graphem.generators import ( - erdos_renyi_graph, + generate_er, generate_random_regular, generate_scale_free, generate_geometric, @@ -12,7 +13,11 @@ generate_relaxed_caveman, generate_ws, generate_ba, - generate_sbm + generate_sbm, + generate_bipartite_graph, + generate_complete_bipartite_graph, + generate_delaunay_triangulation, + compute_vertex_degrees ) @@ -22,163 +27,209 @@ class TestGenerators: def test_erdos_renyi_graph(self): """Test Erdős-Rényi graph generator.""" n, p = 50, 0.1 - edges = erdos_renyi_graph(n=n, p=p, seed=42) - - assert isinstance(edges, np.ndarray) - assert edges.shape[1] == 2 - assert edges.dtype in [np.int32, np.int64] - - if len(edges) > 0: - assert np.min(edges) >= 0 - assert np.max(edges) < n - - assert not np.any(edges[:, 0] == edges[:, 1]) - assert 0 <= len(edges) <= n * (n - 1) // 2 + adj = generate_er(n=n, p=p, seed=42) + + assert sp.issparse(adj) + assert adj.shape == (n, n) + assert adj.dtype == np.int32 or adj.dtype == np.int64 + + # Check symmetric + assert np.allclose((adj - adj.T).data, 0) + + # Check no self-loops + assert adj.diagonal().sum() == 0 + + # Check edges within range + assert 0 <= adj.nnz // 2 <= n * (n - 1) // 2 def test_random_regular_graph(self): """Test random regular graph generator.""" n, d = 20, 3 - edges = generate_random_regular(n=n, d=d, seed=42) - - assert isinstance(edges, np.ndarray) - assert edges.shape[1] == 2 - - G = nx.Graph() - G.add_nodes_from(range(n)) - G.add_edges_from(edges) - - degrees = dict(G.degree()) - for node in range(n): - assert degrees[node] == d + adj = generate_random_regular(n=n, d=d, seed=42) + + assert sp.issparse(adj) + assert adj.shape == (n, n) + + # Check all vertices have degree d + degrees = compute_vertex_degrees(adj) + assert np.all(degrees == d) def test_scale_free_graph(self): """Test scale-free graph generator.""" n = 50 - edges = generate_scale_free(n=n, seed=42) - - assert isinstance(edges, np.ndarray) - assert edges.shape[1] == 2 - - if len(edges) > 0: - assert np.min(edges) >= 0 - assert np.max(edges) < n + adj = generate_scale_free(n=n, seed=42) + + assert sp.issparse(adj) + assert adj.shape == (n, n) + assert adj.nnz > 0 def test_geometric_graph(self): """Test random geometric graph generator.""" n, radius = 30, 0.3 - edges = generate_geometric(n=n, radius=radius, seed=42) - - assert isinstance(edges, np.ndarray) - assert edges.shape[1] == 2 - - if len(edges) > 0: - assert np.min(edges) >= 0 - assert np.max(edges) < n + adj = generate_geometric(n=n, radius=radius, seed=42) + + assert sp.issparse(adj) + assert adj.shape == (n, n) def test_caveman_graph(self): """Test caveman graph generator.""" l, k = 3, 5 - edges = generate_caveman(l=l, k=k) - - assert isinstance(edges, np.ndarray) - assert edges.shape[1] == 2 - + adj = generate_caveman(l=l, k=k) + + assert sp.issparse(adj) total_vertices = l * k - if len(edges) > 0: - assert np.min(edges) >= 0 - assert np.max(edges) < total_vertices + assert adj.shape == (total_vertices, total_vertices) def test_relaxed_caveman_graph(self): """Test relaxed caveman graph generator.""" l, k, p = 3, 5, 0.1 - edges = generate_relaxed_caveman(l=l, k=k, p=p, seed=42) - - assert isinstance(edges, np.ndarray) - assert edges.shape[1] == 2 - + adj = generate_relaxed_caveman(l=l, k=k, p=p, seed=42) + + assert sp.issparse(adj) total_vertices = l * k - if len(edges) > 0: - assert np.min(edges) >= 0 - assert np.max(edges) < total_vertices + assert adj.shape == (total_vertices, total_vertices) def test_watts_strogatz_graph(self): """Test Watts-Strogatz small-world graph generator.""" n, k, p = 20, 4, 0.3 - edges = generate_ws(n=n, k=k, p=p, seed=42) - - assert isinstance(edges, np.ndarray) - assert edges.shape[1] == 2 - - if len(edges) > 0: - assert np.min(edges) >= 0 - assert np.max(edges) < n + adj = generate_ws(n=n, k=k, p=p, seed=42) + + assert sp.issparse(adj) + assert adj.shape == (n, n) def test_barabasi_albert_graph(self): """Test Barabási-Albert graph generator.""" n, m = 50, 2 - edges = generate_ba(n=n, m=m, seed=42) - - assert isinstance(edges, np.ndarray) - assert edges.shape[1] == 2 - - if len(edges) > 0: - assert np.min(edges) >= 0 - assert np.max(edges) < n + adj = generate_ba(n=n, m=m, seed=42) + + assert sp.issparse(adj) + assert adj.shape == (n, n) def test_stochastic_block_model(self): """Test Stochastic Block Model generator.""" n_per_block, num_blocks = 10, 3 p_in, p_out = 0.8, 0.1 - edges = generate_sbm( - n_per_block=n_per_block, - num_blocks=num_blocks, - p_in=p_in, - p_out=p_out, + adj = generate_sbm( + n_per_block=n_per_block, + num_blocks=num_blocks, + p_in=p_in, + p_out=p_out, seed=42 ) - - assert isinstance(edges, np.ndarray) - assert edges.shape[1] == 2 - + + assert sp.issparse(adj) total_vertices = n_per_block * num_blocks - if len(edges) > 0: - assert np.min(edges) >= 0 - assert np.max(edges) < total_vertices + assert adj.shape == (total_vertices, total_vertices) + + def test_bipartite_graph(self): + """Test random bipartite graph generator.""" + n_top, n_bottom = 20, 30 + p = 0.2 + adj = generate_bipartite_graph(n_top=n_top, n_bottom=n_bottom, p=p, seed=42) + + assert sp.issparse(adj) + total_vertices = n_top + n_bottom + assert adj.shape == (total_vertices, total_vertices) + + # Test reproducibility with same seed + adj2 = generate_bipartite_graph(n_top=n_top, n_bottom=n_bottom, p=p, seed=42) + assert np.allclose((adj - adj2).data, 0) + + # Test different seeds give different results + adj3 = generate_bipartite_graph(n_top=n_top, n_bottom=n_bottom, p=p, seed=123) + if adj.nnz > 0 and adj3.nnz > 0: + assert not np.allclose((adj - adj3).data, 0) + def test_complete_bipartite_graph(self): + """Test complete bipartite graph generator.""" + n_top, n_bottom = 10, 15 + adj = generate_complete_bipartite_graph(n_top=n_top, n_bottom=n_bottom) + + assert sp.issparse(adj) + total_vertices = n_top + n_bottom + assert adj.shape == (total_vertices, total_vertices) + + # Complete bipartite should have exactly n_top * n_bottom edges + assert adj.nnz == 2 * n_top * n_bottom # Each edge counted twice + + # Convert to NetworkX and verify bipartite structure + G = nx.from_scipy_sparse_array(adj) + assert nx.is_bipartite(G) + + def test_delaunay_triangulation(self): + """Test Delaunay triangulation graph generator.""" + n = 50 + adj = generate_delaunay_triangulation(n=n, seed=42) + + assert sp.issparse(adj) + assert adj.shape == (n, n) + assert adj.nnz > 0 + + # Convert to NetworkX + G = nx.from_scipy_sparse_array(adj) + + # Delaunay triangulation should be connected for random points + assert nx.is_connected(G) + + # Test reproducibility with same seed + adj2 = generate_delaunay_triangulation(n=n, seed=42) + assert np.allclose((adj - adj2).data, 0) + + # Test different seeds give different results + adj3 = generate_delaunay_triangulation(n=n, seed=123) + assert not np.allclose((adj - adj3).data, 0) def test_reproducible_results(self): """Test that generators produce reproducible results with same seed.""" n, p = 30, 0.2 - - edges1 = erdos_renyi_graph(n=n, p=p, seed=123) - edges2 = erdos_renyi_graph(n=n, p=p, seed=123) - - np.testing.assert_array_equal(edges1, edges2) + + adj1 = generate_er(n=n, p=p, seed=123) + adj2 = generate_er(n=n, p=p, seed=123) + + assert np.allclose((adj1 - adj2).data, 0) def test_different_seeds(self): """Test that different seeds produce different results.""" n, p = 30, 0.3 - - edges1 = erdos_renyi_graph(n=n, p=p, seed=123) - edges2 = erdos_renyi_graph(n=n, p=p, seed=456) - - if len(edges1) > 0 and len(edges2) > 0: - assert not np.array_equal(edges1, edges2) - - def test_edge_format(self): - """Test that all generators return edges in consistent format.""" + + adj1 = generate_er(n=n, p=p, seed=123) + adj2 = generate_er(n=n, p=p, seed=456) + + if adj1.nnz > 0 and adj2.nnz > 0: + assert not np.allclose((adj1 - adj2).data, 0) + + def test_adjacency_format(self): + """Test that all generators return sparse matrices in consistent format.""" generators_params = [ - (erdos_renyi_graph, {"n": 20, "p": 0.1, "seed": 42}), + (generate_er, {"n": 20, "p": 0.1, "seed": 42}), (generate_random_regular, {"n": 20, "d": 3, "seed": 42}), (generate_ws, {"n": 20, "k": 4, "p": 0.3, "seed": 42}), (generate_ba, {"n": 20, "m": 2, "seed": 42}), ] - + for generator, params in generators_params: - edges = generator(**params) - - assert isinstance(edges, np.ndarray) - if len(edges) > 0: - assert edges.shape[1] == 2 - assert edges.ndim == 2 - assert np.issubdtype(edges.dtype, np.integer) \ No newline at end of file + adj = generator(**params) + + assert sp.issparse(adj) + assert adj.shape[0] == adj.shape[1] # Square + assert adj.shape[0] > 0 # Non-empty + # Check symmetric (undirected graph) + assert np.allclose((adj - adj.T).data, 0) + + def test_compute_vertex_degrees(self): + """Test vertex degree computation.""" + # Create a simple graph + n = 5 + adj = generate_er(n=n, p=0.5, seed=42) + + degrees = compute_vertex_degrees(adj) + + assert isinstance(degrees, np.ndarray) + assert degrees.shape == (n,) + assert np.all(degrees >= 0) + + # Verify degree calculation + adj_csr = sp.csr_matrix(adj) # Convert to csr_matrix for row slicing + for i in range(n): + expected_degree = adj_csr[i].nnz + assert degrees[i] == expected_degree diff --git a/tests/test_influence.py b/tests/test_influence.py index 52c2f58..5f3feef 100644 --- a/tests/test_influence.py +++ b/tests/test_influence.py @@ -3,8 +3,9 @@ import pytest import numpy as np import networkx as nx +import scipy.sparse as sp from graphem.embedder import GraphEmbedder -from graphem.generators import erdos_renyi_graph +from graphem.generators import generate_er from graphem.influence import ndlib_estimated_influence