From c147e31502c66ae8b8c0bffacb3e883be35393ef Mon Sep 17 00:00:00 2001 From: Ryan DeWolfe Date: Sun, 8 Mar 2026 21:40:19 +0100 Subject: [PATCH 1/9] ECG implementation --- src/Graphs.jl | 3 + src/community/ecg.jl | 136 ++++++++++++++++++++++++++++++++++++++++++ test/community/ecg.jl | 106 ++++++++++++++++++++++++++++++++ test/runtests.jl | 1 + 4 files changed, 246 insertions(+) create mode 100644 src/community/ecg.jl create mode 100644 test/community/ecg.jl diff --git a/src/Graphs.jl b/src/Graphs.jl index 5380f88f2..9df01764e 100644 --- a/src/Graphs.jl +++ b/src/Graphs.jl @@ -327,6 +327,8 @@ export triangles, label_propagation, louvain, + ecg, + ecg_weights, maximal_cliques, maximum_clique, clique_number, @@ -553,6 +555,7 @@ include("centrality/radiality.jl") include("community/modularity.jl") include("community/label_propagation.jl") include("community/louvain.jl") +include("community/ecg.jl") include("community/core-periphery.jl") include("community/clustering.jl") include("community/cliques.jl") diff --git a/src/community/ecg.jl b/src/community/ecg.jl new file mode 100644 index 000000000..f2c92397a --- /dev/null +++ b/src/community/ecg.jl @@ -0,0 +1,136 @@ +""" + ecg(g; γ=1, ensemble_size::Integer=16, min_edge_weight=0.05, distmx::AbstractArray{<:Number}=weights(g), max_moves::Integer=1000, max_merges::Integer=1000, move_tol::Real=10e-10, merge_tol::Real=10e-10, rng=nothing, seed=nothing) + +Community detection using ensemble clustering for graphs (ECG). Weights the edges based on the +proportion of time the endpoints are in the same cluster of a Louvain without merges before running +a final Louvain to detect communities. + +### Optional Arguments +- `distmx=weights(g)`: distance matrix for weighted graphs +- `ensemble_size=16`: the number of no merge Louvains in the ensemble +- `min_edge_weight`: the minimum edge weight passed to the final Louvain (to retain the original topology). +- `γ=1.0`: where `γ > 0` is a resolution parameter. Higher resolutions lead to more + communities, while lower resolutions lead to fewer communities. Where `γ=1.0` it + leads to the traditional definition of the modularity. +- `max_moves=1000`: maximum number of rounds moving vertices before merging for each Louvain. +- `max_merges=1000`: maximum number of merges in the final Louvain. +- `move_tol=10e-10`: necessary increase of modularity to move a vertex in each Louvain. +- `merge_tol=10e-10`: necessary increase of modularity in the move stage to merge in the final Louvain. +- `rng=nothing`: rng to use for reproducibility. May only pass one of rng or seed. +- `seed=nothing`: seed to use for reproducibility. May only pass one of rng or seed. + +### References +- [Valérie Poulin and François Théberge. Ensemble Clustering for Graphs: Comparisons and Applications. Applied Network Science, 4:4 (2019)][https://doi.org/10.1007/s41109-019-0162-z] + + +# Examples +```jldoctest +julia> using Graphs + +julia> barbell = blockdiag(complete_graph(3), complete_graph(3)); + +julia> add_edge!(barbell, 1, 4); + +julia> ecg(barbell) +6-element Vector{Int64}: + 1 + 1 + 1 + 2 + 2 + 2 + +julia> ecg(barbell, γ=0.01) +6-element Vector{Int64}: + 1 + 1 + 1 + 1 + 1 + 1 +``` +""" +function ecg( + g::AbstractGraph{T}; + γ=1.0, + ensemble_size::Integer=16, + min_edge_weight::Real=0.05, + distmx::AbstractArray{<:Number}=weights(g), + max_moves::Integer=1000, + max_merges::Integer=1000, + move_tol::Real=10e-10, + merge_tol::Real=10e-10, + rng::Union{Nothing,AbstractRNG}=nothing, + seed::Union{Nothing,Integer}=nothing, +) where {T} + rng = rng_from_rng_or_seed(rng, seed) + if nv(g) == 0 + return T[] + end + ensemble_weights = ecg_weights( + g; + γ=γ, + ensemble_size=ensemble_size, + distmx=distmx, + max_moves=max_moves, + move_tol=move_tol, + rng=rng, + ) + weights = + (1-min_edge_weight)*ensemble_weights + + min_edge_weight * adjacency_matrix(g, Float64) + return louvain( + g; + γ=γ, + distmx=weights, + max_moves=max_moves, + max_merges=max_merges, + move_tol=move_tol, + merge_tol=merge_tol, + rng=rng, + ) +end + +""" + ensemble_weights(g; c, distmx, max_moves, move_tol, rng, seed) + +Compute edge weights via an ensemble of no merge Louvains. The weight of each edge is +the proportion of time the endpoints are in the same community. +""" +function ecg_weights( + g::AbstractGraph{T}; + γ=1.0, + ensemble_size::Integer=16, + distmx::AbstractArray{<:Number}=weights(g), + max_moves::Integer=1000, + move_tol::Real=10e-10, + rng::Union{Nothing,AbstractRNG}=nothing, + seed::Union{Nothing,Integer}=nothing, +) where {T} + rng = rng_from_rng_or_seed(rng, seed) + # Create sparse adjacency matrix full of explicit zeros + ensemble_weights = adjacency_matrix(g, Float64) + ensemble_weights.nzval .= 0 + + for _ in 1:ensemble_size + ensemble_communities = louvain( + g; + γ=γ, + distmx=distmx, + max_moves=max_moves, + max_merges=0, + move_tol=move_tol, + rng=rng, + ) + for e in edges(g) + if ensemble_communities[src(e)] == ensemble_communities[dst(e)] + ensemble_weights[src(e), dst(e)] += 1 / ensemble_size + if !is_directed(g) + ensemble_weights[dst(e), src(e)] += 1 / ensemble_size + end + end + end + end + + return ensemble_weights +end diff --git a/test/community/ecg.jl b/test/community/ecg.jl new file mode 100644 index 000000000..e70976742 --- /dev/null +++ b/test/community/ecg.jl @@ -0,0 +1,106 @@ +@testset "ECG" begin + # Test ecg_weights + # Undirected + barbell = barbell_graph(3, 3) + c = sparse( + [ + 0.0 1.0 1.0 0.0 0.0 0.0; + 1.0 0.0 1.0 0.0 0.0 0.0; + 1.0 1.0 0.0 0.0 0.0 0.0; + 0.0 0.0 0.0 0.0 1.0 1.0; + 0.0 0.0 0.0 1.0 0.0 1.0; + 0.0 0.0 0.0 1.0 1.0 0.0 + ], + ) + for g in test_generic_graphs(barbell) + r = ecg_weights(g) + dropzeros!(r) + @test c == r + end + + # Empty + empty = SimpleGraph(10) + c = spzeros(10, 10) + for g in test_generic_graphs(empty) + r = @inferred ecg_weights(g) + dropzeros!(r) + @test c == r + end + + # Undirected loops + loops = complete_graph(2) + add_edge!(loops, 1, 1) + add_edge!(loops, 2, 2) + c = sparse([ + 2.0 0.0; + 0.0 2.0 + ]) + for g in test_generic_graphs(loops) + r = ecg_weights(g) + dropzeros!(r) + @test c == r + end + + # Directed + triangle = SimpleDiGraph(3) + add_edge!(triangle, 1, 2) + add_edge!(triangle, 2, 3) + add_edge!(triangle, 3, 1) + + # Directed Loops + barbell = blockdiag(triangle, triangle) + add_edge!(barbell, 1, 4) + c = sparse( + [ + 0.0 1.0 0.0 0.0 0.0 0.0; + 0.0 0.0 1.0 0.0 0.0 0.0; + 1.0 0.0 0.0 0.0 0.0 0.0; + 0.0 0.0 0.0 0.0 1.0 0.0; + 0.0 0.0 0.0 0.0 0.0 1.0; + 0.0 0.0 0.0 1.0 0.0 0.0 + ], + ) + for g in test_generic_graphs(barbell) + r = ecg_weights(g) + dropzeros!(r) + @test r == c + end + + # Directed loops + barbell = SimpleDiGraph(2) + add_edge!(barbell, 1, 1) + add_edge!(barbell, 2, 2) + add_edge!(barbell, 1, 2) + c = sparse([ + 1.0 0.0; + 0.0 1.0 + ]) + for g in test_generic_graphs(barbell) + r = ecg_weights(g) + dropzeros!(r) + @test r == c + end + + # Test ECG + # Undirected + barbell = barbell_graph(3, 3) + c = [1, 1, 1, 2, 2, 2] + for g in test_generic_graphs(barbell) + r = ecg(g) + @test c == r + end + + # Directed + triangle = SimpleDiGraph(3) + add_edge!(triangle, 1, 2) + add_edge!(triangle, 2, 3) + add_edge!(triangle, 3, 1) + + barbell = blockdiag(triangle, triangle) + add_edge!(barbell, 1, 4) + c = [1, 1, 1, 2, 2, 2] + for g in test_generic_graphs(barbell) + r = ecg(g) + @test r == c + end +end diff --git a/test/runtests.jl b/test/runtests.jl index c4da7ef47..d5da00643 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -118,6 +118,7 @@ tests = [ "traversals/all_simple_paths", "community/cliques", "community/core-periphery", + "community/ecg", "community/independent_sets", "community/label_propagation", "community/louvain", From f2869936d696dff974838d6adbdb4092d46697d1 Mon Sep 17 00:00:00 2001 From: Ryan DeWolfe Date: Sun, 8 Mar 2026 22:14:53 +0100 Subject: [PATCH 2/9] Add tests to make code cov happy --- test/community/ecg.jl | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/test/community/ecg.jl b/test/community/ecg.jl index e70976742..bcde1fca0 100644 --- a/test/community/ecg.jl +++ b/test/community/ecg.jl @@ -18,7 +18,7 @@ @test c == r end - # Empty + # Empty, no edges empty = SimpleGraph(10) c = spzeros(10, 10) for g in test_generic_graphs(empty) @@ -27,6 +27,14 @@ @test c == r end + # Empty, no nodes + empty = SimpleGraph() + c = spzeros(0, 0) + for g in test_generic_graphs(empty) + r = @inferred ecg_weights(g) + @test c == r + end + # Undirected loops loops = complete_graph(2) add_edge!(loops, 1, 1) @@ -103,4 +111,19 @@ r = ecg(g) @test r == c end + + # Empty, no edges + empty = SimpleGraph(10) + c = collect(1:10) + for g in test_generic_graphs(empty) + r = ecg(g) + @test c == r + end + + # Empty, no nodes + empty = SimpleGraph() + for g in test_generic_graphs(empty) + r = ecg(g) + @test length(r) == 0 + end end From 9c74c3b7089f1a1b75dfa8b3401fcc1d357ddba4 Mon Sep 17 00:00:00 2001 From: Ryan DeWolfe Date: Sun, 8 Mar 2026 22:15:20 +0100 Subject: [PATCH 3/9] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 30638257c..c8c15f8e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ We follow SemVer as most of the Julia ecosystem. Below you might see the "breaki ## unreleased - `is_articulation(g, v)` for checking whether a single vertex is an articulation point +- ECG community detection algorithm ## v1.14.0 - 2026-02-26 From 4805750e609d98deec904f8c4c91508b0e8874b5 Mon Sep 17 00:00:00 2001 From: Ryan DeWolfe Date: Sun, 12 Apr 2026 15:21:51 +0200 Subject: [PATCH 4/9] Change 10e-10 to 1e-9 --- src/community/ecg.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/community/ecg.jl b/src/community/ecg.jl index f2c92397a..6530ff933 100644 --- a/src/community/ecg.jl +++ b/src/community/ecg.jl @@ -1,5 +1,5 @@ """ - ecg(g; γ=1, ensemble_size::Integer=16, min_edge_weight=0.05, distmx::AbstractArray{<:Number}=weights(g), max_moves::Integer=1000, max_merges::Integer=1000, move_tol::Real=10e-10, merge_tol::Real=10e-10, rng=nothing, seed=nothing) + ecg(g; γ=1, ensemble_size::Integer=16, min_edge_weight=0.05, distmx::AbstractArray{<:Number}=weights(g), max_moves::Integer=1000, max_merges::Integer=1000, move_tol::Real=1e-9, merge_tol::Real=1e-9, rng=nothing, seed=nothing) Community detection using ensemble clustering for graphs (ECG). Weights the edges based on the proportion of time the endpoints are in the same cluster of a Louvain without merges before running @@ -14,8 +14,8 @@ a final Louvain to detect communities. leads to the traditional definition of the modularity. - `max_moves=1000`: maximum number of rounds moving vertices before merging for each Louvain. - `max_merges=1000`: maximum number of merges in the final Louvain. -- `move_tol=10e-10`: necessary increase of modularity to move a vertex in each Louvain. -- `merge_tol=10e-10`: necessary increase of modularity in the move stage to merge in the final Louvain. +- `move_tol=1e-9`: necessary increase of modularity to move a vertex in each Louvain. +- `merge_tol=1e-9`: necessary increase of modularity in the move stage to merge in the final Louvain. - `rng=nothing`: rng to use for reproducibility. May only pass one of rng or seed. - `seed=nothing`: seed to use for reproducibility. May only pass one of rng or seed. @@ -58,8 +58,8 @@ function ecg( distmx::AbstractArray{<:Number}=weights(g), max_moves::Integer=1000, max_merges::Integer=1000, - move_tol::Real=10e-10, - merge_tol::Real=10e-10, + move_tol::Real=1e-9, + merge_tol::Real=1e-9, rng::Union{Nothing,AbstractRNG}=nothing, seed::Union{Nothing,Integer}=nothing, ) where {T} @@ -103,7 +103,7 @@ function ecg_weights( ensemble_size::Integer=16, distmx::AbstractArray{<:Number}=weights(g), max_moves::Integer=1000, - move_tol::Real=10e-10, + move_tol::Real=1e-9, rng::Union{Nothing,AbstractRNG}=nothing, seed::Union{Nothing,Integer}=nothing, ) where {T} From d3506a5abb08e928f76e8dae6b0666090b7712a8 Mon Sep 17 00:00:00 2001 From: Ryan DeWolfe Date: Sun, 12 Apr 2026 16:27:13 +0200 Subject: [PATCH 5/9] Add flag for min weight outside the 2core --- src/community/ecg.jl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/community/ecg.jl b/src/community/ecg.jl index 6530ff933..0a629e025 100644 --- a/src/community/ecg.jl +++ b/src/community/ecg.jl @@ -8,7 +8,8 @@ a final Louvain to detect communities. ### Optional Arguments - `distmx=weights(g)`: distance matrix for weighted graphs - `ensemble_size=16`: the number of no merge Louvains in the ensemble -- `min_edge_weight`: the minimum edge weight passed to the final Louvain (to retain the original topology). +- `min_edge_weight=0.05`: the minimum edge weight passed to the final Louvain (to retain the original topology). +- `min_weight_outside_2core=true`: a flag to set the weight of edges outside the 2-core to the minimum value. - `γ=1.0`: where `γ > 0` is a resolution parameter. Higher resolutions lead to more communities, while lower resolutions lead to fewer communities. Where `γ=1.0` it leads to the traditional definition of the modularity. @@ -55,6 +56,7 @@ function ecg( γ=1.0, ensemble_size::Integer=16, min_edge_weight::Real=0.05, + min_weight_outside_2core::Bool=true, distmx::AbstractArray{<:Number}=weights(g), max_moves::Integer=1000, max_merges::Integer=1000, @@ -76,6 +78,11 @@ function ecg( move_tol=move_tol, rng=rng, ) + if min_weight_outside_2core + mask = core_number(g) < 2 + indices = findall(i -> mask[i[1]] || mask[i[2]], CartesianIndices(ensemble_weights)) + ensemble_weights[indices] .= 0.0 + end weights = (1-min_edge_weight)*ensemble_weights + min_edge_weight * adjacency_matrix(g, Float64) From 51235e11937587a79cea8f3988c843c6cc545756 Mon Sep 17 00:00:00 2001 From: Ryan DeWolfe Date: Sun, 12 Apr 2026 16:29:51 +0200 Subject: [PATCH 6/9] Add ecg to docs --- docs/src/algorithms/community.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/src/algorithms/community.md b/docs/src/algorithms/community.md index e70de215e..c774471d6 100644 --- a/docs/src/algorithms/community.md +++ b/docs/src/algorithms/community.md @@ -20,6 +20,7 @@ Pages = [ "community/core-periphery.jl", "community/label_propagation.jl", "community/louvain.jl", + "community/ecg.jl", "community/modularity.jl", "community/rich_club.jl", ] From 462b1c3cb7cce5a76970e68f1f24c1cf7947eb1e Mon Sep 17 00:00:00 2001 From: Ryan DeWolfe Date: Sun, 12 Apr 2026 17:12:57 +0200 Subject: [PATCH 7/9] Fix min weight outside 2core implementation --- src/community/ecg.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/community/ecg.jl b/src/community/ecg.jl index 0a629e025..84866632c 100644 --- a/src/community/ecg.jl +++ b/src/community/ecg.jl @@ -79,8 +79,11 @@ function ecg( rng=rng, ) if min_weight_outside_2core - mask = core_number(g) < 2 - indices = findall(i -> mask[i[1]] || mask[i[2]], CartesianIndices(ensemble_weights)) + corenum = core_number(g) + indices = findall( + i -> (corenum[i[1]] < 2) || (corenum[i[2]] < 2), + CartesianIndices(ensemble_weights), + ) ensemble_weights[indices] .= 0.0 end weights = From 3128b66f3c2c8e0bd0ee506e780e3211771b64fb Mon Sep 17 00:00:00 2001 From: Ryan DeWolfe Date: Mon, 13 Apr 2026 11:06:10 +0200 Subject: [PATCH 8/9] More docs and error message for min_weight_outside_core --- src/community/ecg.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/community/ecg.jl b/src/community/ecg.jl index 84866632c..3efb53335 100644 --- a/src/community/ecg.jl +++ b/src/community/ecg.jl @@ -1,5 +1,5 @@ """ - ecg(g; γ=1, ensemble_size::Integer=16, min_edge_weight=0.05, distmx::AbstractArray{<:Number}=weights(g), max_moves::Integer=1000, max_merges::Integer=1000, move_tol::Real=1e-9, merge_tol::Real=1e-9, rng=nothing, seed=nothing) + ecg(g; γ=1, ensemble_size::Integer=16, min_edge_weight=0.05, min_weight_outside_2core::Bool=true, distmx::AbstractArray{<:Number}=weights(g), max_moves::Integer=1000, max_merges::Integer=1000, move_tol::Real=1e-9, merge_tol::Real=1e-9, rng=nothing, seed=nothing) Community detection using ensemble clustering for graphs (ECG). Weights the edges based on the proportion of time the endpoints are in the same cluster of a Louvain without merges before running @@ -9,7 +9,7 @@ a final Louvain to detect communities. - `distmx=weights(g)`: distance matrix for weighted graphs - `ensemble_size=16`: the number of no merge Louvains in the ensemble - `min_edge_weight=0.05`: the minimum edge weight passed to the final Louvain (to retain the original topology). -- `min_weight_outside_2core=true`: a flag to set the weight of edges outside the 2-core to the minimum value. +- `min_weight_outside_2core=true`: a flag to set the weight of edges outside the 2-core to the minimum value. If the graph is directed, the coreness is computed only using out degrees. Must be false is the graph has loops or parallel edges. - `γ=1.0`: where `γ > 0` is a resolution parameter. Higher resolutions lead to more communities, while lower resolutions lead to fewer communities. Where `γ=1.0` it leads to the traditional definition of the modularity. @@ -65,6 +65,11 @@ function ecg( rng::Union{Nothing,AbstractRNG}=nothing, seed::Union{Nothing,Integer}=nothing, ) where {T} + min_weight_outside_2core && + has_self_loops(g) && + throw( + ArgumentError("min_weight_outside_2core must be false if the graph has loops.") + ) rng = rng_from_rng_or_seed(rng, seed) if nv(g) == 0 return T[] From d9cd585d33675f25914069cf5d74ba6268fcda93 Mon Sep 17 00:00:00 2001 From: Ryan DeWolfe Date: Tue, 14 Apr 2026 23:01:43 +0200 Subject: [PATCH 9/9] Fix doc strings --- src/community/ecg.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/community/ecg.jl b/src/community/ecg.jl index 3efb53335..7c79bbcfe 100644 --- a/src/community/ecg.jl +++ b/src/community/ecg.jl @@ -1,5 +1,5 @@ """ - ecg(g; γ=1, ensemble_size::Integer=16, min_edge_weight=0.05, min_weight_outside_2core::Bool=true, distmx::AbstractArray{<:Number}=weights(g), max_moves::Integer=1000, max_merges::Integer=1000, move_tol::Real=1e-9, merge_tol::Real=1e-9, rng=nothing, seed=nothing) + ecg(g; γ=1, ensemble_size=16, min_edge_weight=0.05, min_weight_outside_2core=true, distmx=weights(g), max_moves=1000, max_merges=1000, move_tol=1e-9, merge_tol=1e-9, rng=nothing, seed=nothing) Community detection using ensemble clustering for graphs (ECG). Weights the edges based on the proportion of time the endpoints are in the same cluster of a Louvain without merges before running @@ -107,7 +107,7 @@ function ecg( end """ - ensemble_weights(g; c, distmx, max_moves, move_tol, rng, seed) + ecg_weights(g; γ=1.0, ensemble_size=16, distmx=weights(g), max_moves=1000, move_tol=1e-9, rng=nothing, seed=nothing) Compute edge weights via an ensemble of no merge Louvains. The weight of each edge is the proportion of time the endpoints are in the same community.