From 4783ca84eb939501736c05f56cfcbb0d1a94121b Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Wed, 8 Apr 2026 13:32:35 -0700 Subject: [PATCH 1/4] Fix Leiden tests to all run, add some constexpr to some multi_gpu references, restructure Leiden a little to decrease library size --- cpp/CMakeLists.txt | 4 + cpp/src/community/detail/common_methods.cuh | 2 +- cpp/src/community/detail/common_methods.hpp | 4 +- .../community/detail/decision_graph_mis.cuh | 96 +++++ .../community/detail/decision_graph_mis.hpp | 31 ++ .../detail/decision_graph_mis_mg_v32_e32.cu | 18 + .../detail/decision_graph_mis_mg_v64_e64.cu | 18 + .../detail/decision_graph_mis_sg_v32_e32.cu | 18 + .../detail/decision_graph_mis_sg_v64_e64.cu | 18 + .../detail/maximal_independent_moves.cuh | 4 +- cpp/src/community/detail/refine.hpp | 41 +- cpp/src/community/detail/refine_impl.cuh | 397 +++++++----------- cpp/src/community/detail/refine_mg_v32_e32.cu | 60 +-- cpp/src/community/detail/refine_mg_v64_e64.cu | 60 +-- cpp/src/community/detail/refine_sg_v32_e32.cu | 60 +-- cpp/src/community/detail/refine_sg_v64_e64.cu | 60 +-- cpp/src/components/mis_impl.cuh | 4 +- .../sampling/detail/gather_one_hop_impl.cuh | 4 +- cpp/src/structure/relabel_impl.cuh | 2 +- cpp/src/structure/renumber_edgelist_impl.cuh | 2 +- cpp/src/structure/renumber_utils_impl.cuh | 2 +- cpp/tests/community/leiden_test.cpp | 35 +- 22 files changed, 544 insertions(+), 396 deletions(-) create mode 100644 cpp/src/community/detail/decision_graph_mis.cuh create mode 100644 cpp/src/community/detail/decision_graph_mis.hpp create mode 100644 cpp/src/community/detail/decision_graph_mis_mg_v32_e32.cu create mode 100644 cpp/src/community/detail/decision_graph_mis_mg_v64_e64.cu create mode 100644 cpp/src/community/detail/decision_graph_mis_sg_v32_e32.cu create mode 100644 cpp/src/community/detail/decision_graph_mis_sg_v64_e64.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 31a7800225c..78dc3bc88a3 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -175,6 +175,8 @@ set(CUGRAPH_SG_SOURCES src/community/detail/common_methods_sg_v32_e32.cu src/community/detail/refine_sg_v64_e64.cu src/community/detail/refine_sg_v32_e32.cu + src/community/detail/decision_graph_mis_sg_v64_e64.cu + src/community/detail/decision_graph_mis_sg_v32_e32.cu src/community/edge_triangle_count_sg_v64_e64.cu src/community/edge_triangle_count_sg_v32_e32.cu src/community/detail/maximal_independent_moves_sg_v64_e64.cu @@ -349,6 +351,8 @@ set(CUGRAPH_MG_SOURCES src/community/detail/common_methods_mg_v32_e32.cu src/community/detail/refine_mg_v64_e64.cu src/community/detail/refine_mg_v32_e32.cu + src/community/detail/decision_graph_mis_mg_v64_e64.cu + src/community/detail/decision_graph_mis_mg_v32_e32.cu src/community/edge_triangle_count_mg_v64_e64.cu src/community/edge_triangle_count_mg_v32_e32.cu src/community/detail/maximal_independent_moves_mg_v64_e64.cu diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh index ed0a9cd2693..386d67f8f44 100644 --- a/cpp/src/community/detail/common_methods.cuh +++ b/cpp/src/community/detail/common_methods.cuh @@ -400,7 +400,7 @@ rmm::device_uvector update_clustering_by_delta_modularity( cugraph::get_dataframe_buffer_end(output_buffer)), detail::count_updown_moves_op_t{up_down}); - if (multi_gpu) { + if constexpr (multi_gpu) { nr_moves = host_scalar_allreduce( handle.get_comms(), nr_moves, raft::comms::op_t::SUM, handle.get_stream()); } diff --git a/cpp/src/community/detail/common_methods.hpp b/cpp/src/community/detail/common_methods.hpp index 5220b9b1a5c..d663e8107e7 100644 --- a/cpp/src/community/detail/common_methods.hpp +++ b/cpp/src/community/detail/common_methods.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once @@ -51,7 +51,7 @@ void timer_display_and_clear(raft::handle_t const& handle, HighResTimer const& hr_timer, std::ostream& os) { - if (multi_gpu) { + if constexpr (multi_gpu) { if (handle.get_comms().get_rank() == 0) hr_timer.display_and_clear(os); } else { hr_timer.display_and_clear(os); diff --git a/cpp/src/community/detail/decision_graph_mis.cuh b/cpp/src/community/detail/decision_graph_mis.cuh new file mode 100644 index 00000000000..542c803d84f --- /dev/null +++ b/cpp/src/community/detail/decision_graph_mis.cuh @@ -0,0 +1,96 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#pragma once + +#include "common_methods.hpp" +#include "decision_graph_mis.hpp" +#include "maximal_independent_moves.cuh" + +#include +#include +#include +#include +#include + +#include +#include + +namespace cugraph { +namespace detail { + +template +rmm::device_uvector vertices_in_mis_from_decision_edgelist( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + raft::host_span vertex_partition_range_lasts, + rmm::device_uvector&& d_srcs, + rmm::device_uvector&& d_dsts) +{ + using edge_t = vertex_t; + + constexpr bool decision_store_transposed = false; + + cugraph::graph_t decision_graph(handle); + + std::optional> renumber_map{std::nullopt}; + + if constexpr (multi_gpu) { + std::tie(d_srcs, d_dsts, std::ignore) = + cugraph::shuffle_ext_edges(handle, + std::move(d_srcs), + std::move(d_dsts), + std::vector{}, + false); + } + + std::tie(decision_graph, std::ignore, renumber_map) = + create_graph_from_edgelist( + handle, + std::nullopt, + std::move(d_srcs), + std::move(d_dsts), + std::vector{}, + cugraph::graph_properties_t{false, false}, + true /* renumber */); + + auto decision_graph_view = decision_graph.view(); + + auto vertices_in_mis = + maximal_independent_moves(handle, decision_graph_view, rng_state); + + rmm::device_uvector numbering_indices((*renumber_map).size(), handle.get_stream()); + detail::sequence_fill(handle.get_stream(), + numbering_indices.data(), + numbering_indices.size(), + decision_graph_view.local_vertex_partition_range_first()); + + relabel( + handle, + std::make_tuple(static_cast(numbering_indices.begin()), + static_cast((*renumber_map).begin())), + decision_graph_view.local_vertex_partition_range_size(), + vertices_in_mis.data(), + vertices_in_mis.size(), + false); + + numbering_indices.resize(0, handle.get_stream()); + numbering_indices.shrink_to_fit(handle.get_stream()); + + (*renumber_map).resize(0, handle.get_stream()); + (*renumber_map).shrink_to_fit(handle.get_stream()); + + if constexpr (multi_gpu) { + std::tie(vertices_in_mis, std::ignore) = + cugraph::shuffle_int_vertices(handle, + std::move(vertices_in_mis), + std::vector{}, + vertex_partition_range_lasts); + } + + return vertices_in_mis; +} + +} // namespace detail +} // namespace cugraph diff --git a/cpp/src/community/detail/decision_graph_mis.hpp b/cpp/src/community/detail/decision_graph_mis.hpp new file mode 100644 index 00000000000..caf527bd60a --- /dev/null +++ b/cpp/src/community/detail/decision_graph_mis.hpp @@ -0,0 +1,31 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#pragma once + +#include +#include +#include + +#include + +namespace cugraph { +namespace detail { + +/** + * @brief Build a decision graph from an edgelist, compute a maximal independent set of moves, + * relabel MIS vertices to original ids, and (multi-GPU) shuffle them to owning ranks. + * + * @param vertex_partition_range_lasts Used only when multi_gpu is true (shuffle_int_vertices). + */ +template +rmm::device_uvector vertices_in_mis_from_decision_edgelist( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + raft::host_span vertex_partition_range_lasts, + rmm::device_uvector&& d_srcs, + rmm::device_uvector&& d_dsts); + +} // namespace detail +} // namespace cugraph diff --git a/cpp/src/community/detail/decision_graph_mis_mg_v32_e32.cu b/cpp/src/community/detail/decision_graph_mis_mg_v32_e32.cu new file mode 100644 index 00000000000..d7eb3c296c3 --- /dev/null +++ b/cpp/src/community/detail/decision_graph_mis_mg_v32_e32.cu @@ -0,0 +1,18 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#include "decision_graph_mis.cuh" + +namespace cugraph { +namespace detail { + +template rmm::device_uvector vertices_in_mis_from_decision_edgelist( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + raft::host_span vertex_partition_range_lasts, + rmm::device_uvector&& d_srcs, + rmm::device_uvector&& d_dsts); + +} // namespace detail +} // namespace cugraph diff --git a/cpp/src/community/detail/decision_graph_mis_mg_v64_e64.cu b/cpp/src/community/detail/decision_graph_mis_mg_v64_e64.cu new file mode 100644 index 00000000000..aea1e20d028 --- /dev/null +++ b/cpp/src/community/detail/decision_graph_mis_mg_v64_e64.cu @@ -0,0 +1,18 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#include "decision_graph_mis.cuh" + +namespace cugraph { +namespace detail { + +template rmm::device_uvector vertices_in_mis_from_decision_edgelist( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + raft::host_span vertex_partition_range_lasts, + rmm::device_uvector&& d_srcs, + rmm::device_uvector&& d_dsts); + +} // namespace detail +} // namespace cugraph diff --git a/cpp/src/community/detail/decision_graph_mis_sg_v32_e32.cu b/cpp/src/community/detail/decision_graph_mis_sg_v32_e32.cu new file mode 100644 index 00000000000..053e4bb8b7d --- /dev/null +++ b/cpp/src/community/detail/decision_graph_mis_sg_v32_e32.cu @@ -0,0 +1,18 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#include "decision_graph_mis.cuh" + +namespace cugraph { +namespace detail { + +template rmm::device_uvector vertices_in_mis_from_decision_edgelist( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + raft::host_span vertex_partition_range_lasts, + rmm::device_uvector&& d_srcs, + rmm::device_uvector&& d_dsts); + +} // namespace detail +} // namespace cugraph diff --git a/cpp/src/community/detail/decision_graph_mis_sg_v64_e64.cu b/cpp/src/community/detail/decision_graph_mis_sg_v64_e64.cu new file mode 100644 index 00000000000..43f9da45d87 --- /dev/null +++ b/cpp/src/community/detail/decision_graph_mis_sg_v64_e64.cu @@ -0,0 +1,18 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ +#include "decision_graph_mis.cuh" + +namespace cugraph { +namespace detail { + +template rmm::device_uvector vertices_in_mis_from_decision_edgelist( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + raft::host_span vertex_partition_range_lasts, + rmm::device_uvector&& d_srcs, + rmm::device_uvector&& d_dsts); + +} // namespace detail +} // namespace cugraph diff --git a/cpp/src/community/detail/maximal_independent_moves.cuh b/cpp/src/community/detail/maximal_independent_moves.cuh index 0a59d99f5b4..710e0dc100b 100644 --- a/cpp/src/community/detail/maximal_independent_moves.cuh +++ b/cpp/src/community/detail/maximal_independent_moves.cuh @@ -91,7 +91,7 @@ rmm::device_uvector maximal_independent_moves( // Select a random set of candidate vertices vertex_t nr_remaining_vertices_to_check = remaining_vertices.size(); - if (multi_gpu) { + if constexpr (multi_gpu) { nr_remaining_vertices_to_check = host_scalar_allreduce(handle.get_comms(), nr_remaining_vertices_to_check, raft::comms::op_t::SUM, @@ -268,7 +268,7 @@ rmm::device_uvector maximal_independent_moves( remaining_vertices.begin()); nr_remaining_vertices_to_check = remaining_vertices.size(); - if (multi_gpu) { + if constexpr (multi_gpu) { nr_remaining_vertices_to_check = host_scalar_allreduce(handle.get_comms(), nr_remaining_vertices_to_check, raft::comms::op_t::SUM, diff --git a/cpp/src/community/detail/refine.hpp b/cpp/src/community/detail/refine.hpp index 1d4d7f8d735..b94906a71ce 100644 --- a/cpp/src/community/detail/refine.hpp +++ b/cpp/src/community/detail/refine.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once @@ -16,28 +16,23 @@ namespace cugraph { namespace detail { -template -std::tuple, - std::pair, - rmm::device_uvector>> -refine_clustering( - raft::handle_t const& handle, - raft::random::RngState& rng_state, - graph_view_t const& graph_view, - std::optional> - edge_weight_view, - weight_t total_edge_weight, - weight_t resolution, - weight_t theta, - rmm::device_uvector const& vertex_weights_v, - rmm::device_uvector&& cluster_keys_v, - rmm::device_uvector&& cluster_weights_v, - rmm::device_uvector&& next_clusters_v, - edge_src_property_t const& src_vertex_weights_cache, - edge_src_property_t const& - src_clusters_cache, - edge_dst_property_t const& - dst_clusters_cache); +template +std::tuple, + std::pair, rmm::device_uvector>> +refine_clustering(raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + weight_t total_edge_weight, + weight_t resolution, + weight_t theta, + rmm::device_uvector const& vertex_weights_v, + rmm::device_uvector&& cluster_keys_v, + rmm::device_uvector&& cluster_weights_v, + rmm::device_uvector&& next_clusters_v, + edge_src_property_t const& src_vertex_weights_cache, + edge_src_property_t const& src_clusters_cache, + edge_dst_property_t const& dst_clusters_cache); } } // namespace cugraph diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index 04626bcc6de..458f85dd482 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -5,8 +5,8 @@ #pragma once #include "common_methods.hpp" +#include "decision_graph_mis.cuh" #include "detail/shuffle_wrappers.hpp" -#include "maximal_independent_moves.hpp" #include #include @@ -19,17 +19,23 @@ #include #include #include +#include #include #include +#include #include #include #include #include +#include #include +#include +#include #include #include +#include #include #include #include @@ -50,8 +56,6 @@ struct is_bitwise_comparable> : std::true_type {}; namespace cugraph { namespace detail { -// FIXME: check if this is still the case -// a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used template struct leiden_key_aggregated_edge_op_t { weight_t total_edge_weight{}; @@ -120,33 +124,25 @@ struct leiden_key_aggregated_edge_op_t { } }; -template -std::tuple, - std::pair, - rmm::device_uvector>> -refine_clustering( - raft::handle_t const& handle, - raft::random::RngState& rng_state, - GraphViewType const& graph_view, - std::optional> - edge_weight_view, - weight_t total_edge_weight, - weight_t resolution, - weight_t theta, - rmm::device_uvector const& weighted_degree_of_vertices, - rmm::device_uvector&& louvain_cluster_keys, - rmm::device_uvector&& louvain_cluster_weights, - rmm::device_uvector&& louvain_assignment_of_vertices, - edge_src_property_t const& - src_vertex_weights_cache, - edge_src_property_t const& src_louvain_assignment_cache, - edge_dst_property_t const& dst_louvain_assignment_cache) +template +std::tuple, + std::pair, rmm::device_uvector>> +refine_clustering(raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + weight_t total_edge_weight, + weight_t resolution, + weight_t theta, + rmm::device_uvector const& weighted_degree_of_vertices, + rmm::device_uvector&& louvain_cluster_keys, + rmm::device_uvector&& louvain_cluster_weights, + rmm::device_uvector&& louvain_assignment_of_vertices, + edge_src_property_t const& src_vertex_weights_cache, + edge_src_property_t const& src_louvain_assignment_cache, + edge_dst_property_t const& dst_louvain_assignment_cache) { const weight_t POSITIVE_GAIN = 1e-6; - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; kv_store_t cluster_key_weight_map(louvain_cluster_keys.begin(), louvain_cluster_keys.end(), @@ -161,7 +157,7 @@ refine_clustering( louvain_cluster_weights.shrink_to_fit(handle.get_stream()); rmm::device_uvector vertex_louvain_cluster_weights(0, handle.get_stream()); - if (GraphViewType::is_multi_gpu) { + if constexpr (multi_gpu) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); @@ -199,12 +195,12 @@ refine_clustering( per_v_transform_reduce_outgoing_e( handle, graph_view, - GraphViewType::is_multi_gpu + multi_gpu ? src_louvain_assignment_cache.view() : make_edge_src_property_view(graph_view, louvain_assignment_of_vertices.begin(), louvain_assignment_of_vertices.size()), - GraphViewType::is_multi_gpu + multi_gpu ? dst_louvain_assignment_cache.view() : make_edge_dst_property_view(graph_view, louvain_assignment_of_vertices.begin(), @@ -254,7 +250,7 @@ refine_clustering( edge_src_property_t src_louvain_cluster_weight_cache(handle); edge_src_property_t src_cut_to_louvain_cache(handle); - if (GraphViewType::is_multi_gpu) { + if constexpr (multi_gpu) { // Update cluster weight, weighted degree and cut for edge sources src_louvain_cluster_weight_cache = edge_src_property_t(handle, graph_view); update_edge_src_property(handle, @@ -312,7 +308,7 @@ refine_clustering( singleton_and_connected_flags.end(), [] __device__(auto flag) { return flag > 0; }); - if (GraphViewType::is_multi_gpu) { + if constexpr (multi_gpu) { nr_remaining_active_vertices = host_scalar_allreduce(handle.get_comms(), nr_remaining_active_vertices, raft::comms::op_t::SUM, @@ -321,10 +317,10 @@ refine_clustering( if (nr_remaining_active_vertices == 0) { break; } - // Update Leiden assignment to edge sources and destinitions + // Update Leiden assignment to edge sources and destinations // and singleton mask to edge sources - if constexpr (GraphViewType::is_multi_gpu) { + if constexpr (multi_gpu) { src_leiden_assignment_cache = edge_src_property_t(handle, graph_view); dst_leiden_assignment_cache = edge_dst_property_t(handle, graph_view); src_singleton_and_connected_flag_cache = @@ -343,7 +339,7 @@ refine_clustering( } auto src_input_property_values = - GraphViewType::is_multi_gpu + multi_gpu ? view_concat(src_louvain_assignment_cache.view(), src_leiden_assignment_cache.view()) : view_concat( make_edge_src_property_view(graph_view, @@ -353,7 +349,7 @@ refine_clustering( graph_view, leiden_assignment.begin(), leiden_assignment.size())); auto dst_input_property_values = - GraphViewType::is_multi_gpu + multi_gpu ? view_concat(dst_louvain_assignment_cache.view(), dst_leiden_assignment_cache.view()) : view_concat( make_edge_dst_property_view(graph_view, @@ -381,10 +377,9 @@ refine_clustering( src_input_property_values, dst_input_property_values, *edge_weight_view, - GraphViewType::is_multi_gpu - ? dst_leiden_assignment_cache.view() - : make_edge_dst_property_view( - graph_view, leiden_assignment.begin(), leiden_assignment.size()), + multi_gpu ? dst_leiden_assignment_cache.view() + : make_edge_dst_property_view( + graph_view, leiden_assignment.begin(), leiden_assignment.size()), [] __device__(auto src, auto dst, cuda::std::tuple src_louvain_leidn, @@ -419,37 +414,10 @@ refine_clustering( // leiden(v) // louvain(v) - auto zipped_src_device_view = - GraphViewType::is_multi_gpu - ? view_concat(src_vertex_weights_cache.view(), - src_cut_to_louvain_cache.view(), - src_louvain_cluster_weight_cache.view(), - src_singleton_and_connected_flag_cache.view(), - src_leiden_assignment_cache.view(), - src_louvain_assignment_cache.view()) - : view_concat( - make_edge_src_property_view( - graph_view, weighted_degree_of_vertices.begin(), weighted_degree_of_vertices.size()), - make_edge_src_property_view( - graph_view, - weighted_cut_of_vertices_to_louvain.begin(), - weighted_cut_of_vertices_to_louvain.size()), - make_edge_src_property_view(graph_view, - vertex_louvain_cluster_weights.begin(), - vertex_louvain_cluster_weights.size()), - make_edge_src_property_view(graph_view, - singleton_and_connected_flags.begin(), - singleton_and_connected_flags.size()), - make_edge_src_property_view( - graph_view, leiden_assignment.begin(), leiden_assignment.size()), - make_edge_src_property_view(graph_view, - louvain_assignment_of_vertices.begin(), - louvain_assignment_of_vertices.size())); - rmm::device_uvector louvain_of_leiden_keys_used_in_edge_reduction( 0, handle.get_stream()); - if (GraphViewType::is_multi_gpu) { + if constexpr (multi_gpu) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); @@ -472,9 +440,6 @@ refine_clustering( major_comm_size, minor_comm_size}; - // cugraph::detail::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ - // comm_size, major_comm_size, minor_comm_size}; - louvain_of_leiden_keys_used_in_edge_reduction = cugraph::collect_values_for_keys(handle, leiden_to_louvain_map.view(), @@ -518,25 +483,63 @@ refine_clustering( // raft::random::DeviceState device_state(rng_state); - auto gain_and_dst_output_pairs = - allocate_dataframe_buffer>( - graph_view.local_vertex_partition_range_size(), handle.get_stream()); + auto const n_local_vertices = graph_view.local_vertex_partition_range_size(); + rmm::device_uvector vertex_best_move_gain(n_local_vertices, handle.get_stream()); + rmm::device_uvector vertex_best_move_cluster_id(n_local_vertices, + handle.get_stream()); - per_v_transform_reduce_dst_key_aggregated_outgoing_e( - handle, - graph_view, - zipped_src_device_view, - *edge_weight_view, - GraphViewType::is_multi_gpu - ? dst_leiden_assignment_cache.view() - : make_edge_dst_property_view( - graph_view, leiden_assignment.begin(), leiden_assignment.size()), - leiden_cluster_key_values_map.view(), - detail::leiden_key_aggregated_edge_op_t{ - total_edge_weight, resolution, theta, device_state}, - cuda::std::make_tuple(weight_t{0}, vertex_t{-1}), - reduce_op::maximum>(), - cugraph::get_dataframe_buffer_begin(gain_and_dst_output_pairs)); + if constexpr (multi_gpu) { + auto zipped_src_device_view = view_concat(src_vertex_weights_cache.view(), + src_cut_to_louvain_cache.view(), + src_louvain_cluster_weight_cache.view(), + src_singleton_and_connected_flag_cache.view(), + src_leiden_assignment_cache.view(), + src_louvain_assignment_cache.view()); + per_v_transform_reduce_dst_key_aggregated_outgoing_e( + handle, + graph_view, + zipped_src_device_view, + *edge_weight_view, + dst_leiden_assignment_cache.view(), + leiden_cluster_key_values_map.view(), + detail::leiden_key_aggregated_edge_op_t{ + total_edge_weight, resolution, theta, device_state}, + cuda::std::make_tuple(weight_t{0}, vertex_t{-1}), + reduce_op::maximum>(), + thrust::make_zip_iterator(vertex_best_move_gain.begin(), + vertex_best_move_cluster_id.begin())); + } else { + auto zipped_src_device_view = view_concat( + make_edge_src_property_view( + graph_view, weighted_degree_of_vertices.begin(), weighted_degree_of_vertices.size()), + make_edge_src_property_view(graph_view, + weighted_cut_of_vertices_to_louvain.begin(), + weighted_cut_of_vertices_to_louvain.size()), + make_edge_src_property_view(graph_view, + vertex_louvain_cluster_weights.begin(), + vertex_louvain_cluster_weights.size()), + make_edge_src_property_view( + graph_view, singleton_and_connected_flags.begin(), singleton_and_connected_flags.size()), + make_edge_src_property_view( + graph_view, leiden_assignment.begin(), leiden_assignment.size()), + make_edge_src_property_view(graph_view, + louvain_assignment_of_vertices.begin(), + louvain_assignment_of_vertices.size())); + per_v_transform_reduce_dst_key_aggregated_outgoing_e( + handle, + graph_view, + zipped_src_device_view, + *edge_weight_view, + make_edge_dst_property_view( + graph_view, leiden_assignment.begin(), leiden_assignment.size()), + leiden_cluster_key_values_map.view(), + detail::leiden_key_aggregated_edge_op_t{ + total_edge_weight, resolution, theta, device_state}, + cuda::std::make_tuple(weight_t{0}, vertex_t{-1}), + reduce_op::maximum>(), + thrust::make_zip_iterator(vertex_best_move_gain.begin(), + vertex_best_move_cluster_id.begin())); + } src_leiden_assignment_cache.clear(); dst_leiden_assignment_cache.clear(); @@ -552,150 +555,73 @@ refine_clustering( refined_community_cuts.shrink_to_fit(handle.get_stream()); // - // Create edgelist from (source, target community, modularity gain) tuple + // Filter out moves with -ve gains // + auto [keep_count, keep_flags] = detail::mark_entries( + handle, + static_cast(n_local_vertices), + cuda::proclaim_return_type([gain_ptr = vertex_best_move_gain.data(), + cluster_ptr = vertex_best_move_cluster_id.data(), + min_gain = POSITIVE_GAIN] __device__(size_t i) { + return (gain_ptr[i] > min_gain) && (cluster_ptr[i] >= vertex_t{0}); + })); - vertex_t num_vertices = graph_view.local_vertex_partition_range_size(); - auto gain_and_dst_first = cugraph::get_dataframe_buffer_cbegin(gain_and_dst_output_pairs); - auto gain_and_dst_last = cugraph::get_dataframe_buffer_cend(gain_and_dst_output_pairs); - - auto vertex_begin = - thrust::make_counting_iterator(graph_view.local_vertex_partition_range_first()); - auto vertex_end = - thrust::make_counting_iterator(graph_view.local_vertex_partition_range_last()); + vertex_best_move_gain.resize(0, handle.get_stream()); + vertex_best_move_gain.shrink_to_fit(handle.get_stream()); - // - // Filter out moves with -ve gains - // + raft::device_span const keep_mask_span{keep_flags.data(), keep_flags.size()}; - vertex_t nr_valid_tuples = thrust::count_if(handle.get_thrust_policy(), - gain_and_dst_first, - gain_and_dst_last, - [] __device__(auto gain_dst_pair) { - vertex_t dst = cuda::std::get<1>(gain_dst_pair); - weight_t gain = cuda::std::get<0>(gain_dst_pair); - return (gain > POSITIVE_GAIN) && (dst >= 0); - }); + vertex_t nr_valid_tuples = static_cast(keep_count); vertex_t total_nr_valid_tuples = nr_valid_tuples; - if (GraphViewType::is_multi_gpu) { + if constexpr (multi_gpu) { total_nr_valid_tuples = host_scalar_allreduce( handle.get_comms(), total_nr_valid_tuples, raft::comms::op_t::SUM, handle.get_stream()); } if (total_nr_valid_tuples == 0) { - cugraph::resize_dataframe_buffer(gain_and_dst_output_pairs, 0, handle.get_stream()); - cugraph::shrink_to_fit_dataframe_buffer(gain_and_dst_output_pairs, handle.get_stream()); + vertex_best_move_cluster_id.resize(0, handle.get_stream()); + vertex_best_move_cluster_id.shrink_to_fit(handle.get_stream()); break; } - rmm::device_uvector d_srcs(nr_valid_tuples, handle.get_stream()); - rmm::device_uvector d_dsts(nr_valid_tuples, handle.get_stream()); - - auto d_src_dst_iterator = thrust::make_zip_iterator(d_srcs.begin(), d_dsts.begin()); - auto edge_begin = thrust::make_zip_iterator( - vertex_begin, cuda::std::get<1>(gain_and_dst_first.get_iterator_tuple())); - auto edge_end = thrust::make_zip_iterator( - vertex_end, cuda::std::get<1>(gain_and_dst_last.get_iterator_tuple())); - - thrust::copy_if(handle.get_thrust_policy(), - edge_begin, - edge_end, - gain_and_dst_first, - d_src_dst_iterator, - [] __device__(auto pair) { - auto gain = cuda::std::get<0>(pair); - auto dst = cuda::std::get<1>(pair); - return (gain > POSITIVE_GAIN) && (dst >= 0); - }); - - // - // Create decision graph from edgelist - // - constexpr bool store_transposed = false; - constexpr bool multi_gpu = GraphViewType::is_multi_gpu; - using DecisionGraphViewType = cugraph::graph_view_t; - - cugraph::graph_t decision_graph(handle); - - std::optional> renumber_map{std::nullopt}; - - if constexpr (multi_gpu) { - std::tie(d_srcs, d_dsts, std::ignore) = - cugraph::shuffle_ext_edges(handle, - std::move(d_srcs), - std::move(d_dsts), - std::vector{}, - GraphViewType::is_storage_transposed); - } - - std::tie(decision_graph, std::ignore, renumber_map) = - create_graph_from_edgelist( - handle, - std::nullopt, - std::move(d_srcs), - std::move(d_dsts), - std::vector{}, - cugraph::graph_properties_t{false, false}, - true /* renumber */); - - auto decision_graph_view = decision_graph.view(); - - // - // Determine a set of moves using MIS of the decision_graph - // - - auto vertices_in_mis = maximal_independent_moves( - handle, decision_graph_view, rng_state); - - rmm::device_uvector numbering_indices((*renumber_map).size(), handle.get_stream()); + rmm::device_uvector d_srcs(n_local_vertices, handle.get_stream()); detail::sequence_fill(handle.get_stream(), - numbering_indices.data(), - numbering_indices.size(), - decision_graph_view.local_vertex_partition_range_first()); + d_srcs.data(), + d_srcs.size(), + graph_view.local_vertex_partition_range_first()); - // - // Apply Renumber map to get original vertex ids - // - relabel( - handle, - std::make_tuple(static_cast(numbering_indices.begin()), - static_cast((*renumber_map).begin())), - decision_graph_view.local_vertex_partition_range_size(), - vertices_in_mis.data(), - vertices_in_mis.size(), - false); - - numbering_indices.resize(0, handle.get_stream()); - numbering_indices.shrink_to_fit(handle.get_stream()); + rmm::device_uvector d_dsts(keep_count, handle.get_stream()); + copy_if_mask_set(handle, + vertex_best_move_cluster_id.begin(), + vertex_best_move_cluster_id.end(), + keep_flags.begin(), + d_dsts.begin()); - (*renumber_map).resize(0, handle.get_stream()); - (*renumber_map).shrink_to_fit(handle.get_stream()); + d_srcs = detail::keep_marked_entries(handle, std::move(d_srcs), keep_mask_span, keep_count); - if (GraphViewType::is_multi_gpu) { - std::tie(vertices_in_mis, std::ignore) = - cugraph::shuffle_int_vertices(handle, - std::move(vertices_in_mis), - std::vector{}, - graph_view.vertex_partition_range_lasts()); - } + auto vertices_in_mis = vertices_in_mis_from_decision_edgelist( + handle, + rng_state, + graph_view.vertex_partition_range_lasts(), + std::move(d_srcs), + std::move(d_dsts)); // // Mark the chosen vertices as non-singleton and update their leiden cluster to dst // - thrust::for_each( handle.get_thrust_policy(), vertices_in_mis.begin(), vertices_in_mis.end(), - [dst_first = cuda::std::get<1>(gain_and_dst_first.get_iterator_tuple()), + [best_move_leiden = vertex_best_move_cluster_id.data(), leiden_assignment = leiden_assignment.data(), singleton_and_connected_flags = singleton_and_connected_flags.data(), v_first = graph_view.local_vertex_partition_range_first()] __device__(vertex_t v) { auto v_offset = v - v_first; - auto dst = *(dst_first + v_offset); + auto target_leiden = best_move_leiden[v_offset]; singleton_and_connected_flags[v_offset] = false; - leiden_assignment[v_offset] = dst; + leiden_assignment[v_offset] = target_leiden; }); // @@ -703,20 +629,19 @@ refine_clustering( // rmm::device_uvector dst_vertices(vertices_in_mis.size(), handle.get_stream()); - thrust::transform( - handle.get_thrust_policy(), - vertices_in_mis.begin(), - vertices_in_mis.end(), - dst_vertices.begin(), - cuda::proclaim_return_type( - [dst_first = cuda::std::get<1>(gain_and_dst_first.get_iterator_tuple()), - v_first = graph_view.local_vertex_partition_range_first()] __device__(vertex_t v) { - auto dst = *(dst_first + v - v_first); - return dst; - })); + { + auto map_first = cuda::make_transform_iterator( + vertices_in_mis.begin(), + shift_left_t{graph_view.local_vertex_partition_range_first()}); + thrust::gather(handle.get_thrust_policy(), + map_first, + map_first + vertices_in_mis.size(), + vertex_best_move_cluster_id.begin(), + dst_vertices.begin()); + } - cugraph::resize_dataframe_buffer(gain_and_dst_output_pairs, 0, handle.get_stream()); - cugraph::shrink_to_fit_dataframe_buffer(gain_and_dst_output_pairs, handle.get_stream()); + vertex_best_move_cluster_id.resize(0, handle.get_stream()); + vertex_best_move_cluster_id.shrink_to_fit(handle.get_stream()); vertices_in_mis.resize(0, handle.get_stream()); vertices_in_mis.shrink_to_fit(handle.get_stream()); @@ -730,7 +655,7 @@ refine_clustering( handle.get_stream()); // Shuffle dst vertices to owner GPU, according to vetex partitioning - if constexpr (GraphViewType::is_multi_gpu) { + if constexpr (multi_gpu) { std::tie(dst_vertices, std::ignore) = cugraph::shuffle_int_vertices(handle, std::move(dst_vertices), @@ -747,16 +672,15 @@ refine_clustering( } // - // Makr all the dest vertices as non-sigleton + // Mark all the dest vertices as non-singleton // - thrust::for_each( - handle.get_thrust_policy(), - dst_vertices.begin(), - dst_vertices.end(), - [singleton_and_connected_flags = singleton_and_connected_flags.data(), - v_first = graph_view.local_vertex_partition_range_first()] __device__(vertex_t v) { - singleton_and_connected_flags[v - v_first] = false; - }); + thrust::scatter(handle.get_thrust_policy(), + cuda::make_constant_iterator(uint8_t{0}), + cuda::make_constant_iterator(uint8_t{0}) + dst_vertices.size(), + cuda::make_transform_iterator( + dst_vertices.begin(), + shift_left_t{graph_view.local_vertex_partition_range_first()}), + singleton_and_connected_flags.begin()); dst_vertices.resize(0, handle.get_stream()); dst_vertices.shrink_to_fit(handle.get_stream()); @@ -795,7 +719,9 @@ refine_clustering( leiden_keys_to_read_louvain.resize(nr_unique_leiden_clusters, handle.get_stream()); - if constexpr (GraphViewType::is_multi_gpu) { + rmm::device_uvector louvain_of_leiden_cluster_keys(0, handle.get_stream()); + + if constexpr (multi_gpu) { std::tie(leiden_keys_to_read_louvain, std::ignore) = cugraph::shuffle_int_vertices(handle, std::move(leiden_keys_to_read_louvain), @@ -812,11 +738,7 @@ refine_clustering( leiden_keys_to_read_louvain.begin(), leiden_keys_to_read_louvain.end()))); leiden_keys_to_read_louvain.resize(nr_unique_leiden_clusters, handle.get_stream()); - } - rmm::device_uvector lovain_of_leiden_cluster_keys(0, handle.get_stream()); - - if (GraphViewType::is_multi_gpu) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); @@ -839,27 +761,24 @@ refine_clustering( major_comm_size, minor_comm_size}; - // cugraph::detail::compute_gpu_id_from_ext_vertex_t vertex_to_gpu_id_op{ - // comm_size, major_comm_size, minor_comm_size}; - - lovain_of_leiden_cluster_keys = + louvain_of_leiden_cluster_keys = cugraph::collect_values_for_keys(handle, leiden_to_louvain_map.view(), leiden_keys_to_read_louvain.begin(), leiden_keys_to_read_louvain.end(), vertex_to_gpu_id_op); - } else { - lovain_of_leiden_cluster_keys.resize(leiden_keys_to_read_louvain.size(), handle.get_stream()); + louvain_of_leiden_cluster_keys.resize(leiden_keys_to_read_louvain.size(), handle.get_stream()); leiden_to_louvain_map.view().find(leiden_keys_to_read_louvain.begin(), leiden_keys_to_read_louvain.end(), - lovain_of_leiden_cluster_keys.begin(), + louvain_of_leiden_cluster_keys.begin(), handle.get_stream()); } + return std::make_tuple(std::move(leiden_assignment), std::make_pair(std::move(leiden_keys_to_read_louvain), - std::move(lovain_of_leiden_cluster_keys))); + std::move(louvain_of_leiden_cluster_keys))); } } // namespace detail } // namespace cugraph diff --git a/cpp/src/community/detail/refine_mg_v32_e32.cu b/cpp/src/community/detail/refine_mg_v32_e32.cu index 8f8264f0dcc..add3aa217dd 100644 --- a/cpp/src/community/detail/refine_mg_v32_e32.cu +++ b/cpp/src/community/detail/refine_mg_v32_e32.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #include "community/detail/refine_impl.cuh" @@ -9,37 +9,39 @@ namespace detail { template std::tuple, std::pair, rmm::device_uvector>> -refine_clustering(raft::handle_t const& handle, - raft::random::RngState& rng_state, - cugraph::graph_view_t const& graph_view, - std::optional> edge_weight_view, - float total_edge_weight, - float resolution, - float theta, - rmm::device_uvector const& vertex_weights_v, - rmm::device_uvector&& cluster_keys_v, - rmm::device_uvector&& cluster_weights_v, - rmm::device_uvector&& next_clusters_v, - edge_src_property_t const& src_vertex_weights_cache, - edge_src_property_t const& src_clusters_cache, - edge_dst_property_t const& dst_clusters_cache); +refine_clustering( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view, + float total_edge_weight, + float resolution, + float theta, + rmm::device_uvector const& vertex_weights_v, + rmm::device_uvector&& cluster_keys_v, + rmm::device_uvector&& cluster_weights_v, + rmm::device_uvector&& next_clusters_v, + edge_src_property_t const& src_vertex_weights_cache, + edge_src_property_t const& src_clusters_cache, + edge_dst_property_t const& dst_clusters_cache); template std::tuple, std::pair, rmm::device_uvector>> -refine_clustering(raft::handle_t const& handle, - raft::random::RngState& rng_state, - cugraph::graph_view_t const& graph_view, - std::optional> edge_weight_view, - double total_edge_weight, - double resolution, - double theta, - rmm::device_uvector const& vertex_weights_v, - rmm::device_uvector&& cluster_keys_v, - rmm::device_uvector&& cluster_weights_v, - rmm::device_uvector&& next_clusters_v, - edge_src_property_t const& src_vertex_weights_cache, - edge_src_property_t const& src_clusters_cache, - edge_dst_property_t const& dst_clusters_cache); +refine_clustering( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view, + double total_edge_weight, + double resolution, + double theta, + rmm::device_uvector const& vertex_weights_v, + rmm::device_uvector&& cluster_keys_v, + rmm::device_uvector&& cluster_weights_v, + rmm::device_uvector&& next_clusters_v, + edge_src_property_t const& src_vertex_weights_cache, + edge_src_property_t const& src_clusters_cache, + edge_dst_property_t const& dst_clusters_cache); } // namespace detail } // namespace cugraph diff --git a/cpp/src/community/detail/refine_mg_v64_e64.cu b/cpp/src/community/detail/refine_mg_v64_e64.cu index 34408be7e51..48f18829fba 100644 --- a/cpp/src/community/detail/refine_mg_v64_e64.cu +++ b/cpp/src/community/detail/refine_mg_v64_e64.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #include "community/detail/refine_impl.cuh" @@ -9,37 +9,39 @@ namespace detail { template std::tuple, std::pair, rmm::device_uvector>> -refine_clustering(raft::handle_t const& handle, - raft::random::RngState& rng_state, - cugraph::graph_view_t const& graph_view, - std::optional> edge_weight_view, - float total_edge_weight, - float resolution, - float theta, - rmm::device_uvector const& vertex_weights_v, - rmm::device_uvector&& cluster_keys_v, - rmm::device_uvector&& cluster_weights_v, - rmm::device_uvector&& next_clusters_v, - edge_src_property_t const& src_vertex_weights_cache, - edge_src_property_t const& src_clusters_cache, - edge_dst_property_t const& dst_clusters_cache); +refine_clustering( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view, + float total_edge_weight, + float resolution, + float theta, + rmm::device_uvector const& vertex_weights_v, + rmm::device_uvector&& cluster_keys_v, + rmm::device_uvector&& cluster_weights_v, + rmm::device_uvector&& next_clusters_v, + edge_src_property_t const& src_vertex_weights_cache, + edge_src_property_t const& src_clusters_cache, + edge_dst_property_t const& dst_clusters_cache); template std::tuple, std::pair, rmm::device_uvector>> -refine_clustering(raft::handle_t const& handle, - raft::random::RngState& rng_state, - cugraph::graph_view_t const& graph_view, - std::optional> edge_weight_view, - double total_edge_weight, - double resolution, - double theta, - rmm::device_uvector const& vertex_weights_v, - rmm::device_uvector&& cluster_keys_v, - rmm::device_uvector&& cluster_weights_v, - rmm::device_uvector&& next_clusters_v, - edge_src_property_t const& src_vertex_weights_cache, - edge_src_property_t const& src_clusters_cache, - edge_dst_property_t const& dst_clusters_cache); +refine_clustering( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view, + double total_edge_weight, + double resolution, + double theta, + rmm::device_uvector const& vertex_weights_v, + rmm::device_uvector&& cluster_keys_v, + rmm::device_uvector&& cluster_weights_v, + rmm::device_uvector&& next_clusters_v, + edge_src_property_t const& src_vertex_weights_cache, + edge_src_property_t const& src_clusters_cache, + edge_dst_property_t const& dst_clusters_cache); } // namespace detail } // namespace cugraph diff --git a/cpp/src/community/detail/refine_sg_v32_e32.cu b/cpp/src/community/detail/refine_sg_v32_e32.cu index b0da5ce286d..8ac909fa804 100644 --- a/cpp/src/community/detail/refine_sg_v32_e32.cu +++ b/cpp/src/community/detail/refine_sg_v32_e32.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #include "community/detail/refine_impl.cuh" @@ -9,37 +9,39 @@ namespace detail { template std::tuple, std::pair, rmm::device_uvector>> -refine_clustering(raft::handle_t const& handle, - raft::random::RngState& rng_state, - cugraph::graph_view_t const& graph_view, - std::optional> edge_weight_view, - float total_edge_weight, - float resolution, - float theta, - rmm::device_uvector const& vertex_weights_v, - rmm::device_uvector&& cluster_keys_v, - rmm::device_uvector&& cluster_weights_v, - rmm::device_uvector&& next_clusters_v, - edge_src_property_t const& src_vertex_weights_cache, - edge_src_property_t const& src_clusters_cache, - edge_dst_property_t const& dst_clusters_cache); +refine_clustering( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view, + float total_edge_weight, + float resolution, + float theta, + rmm::device_uvector const& vertex_weights_v, + rmm::device_uvector&& cluster_keys_v, + rmm::device_uvector&& cluster_weights_v, + rmm::device_uvector&& next_clusters_v, + edge_src_property_t const& src_vertex_weights_cache, + edge_src_property_t const& src_clusters_cache, + edge_dst_property_t const& dst_clusters_cache); template std::tuple, std::pair, rmm::device_uvector>> -refine_clustering(raft::handle_t const& handle, - raft::random::RngState& rng_state, - cugraph::graph_view_t const& graph_view, - std::optional> edge_weight_view, - double total_edge_weight, - double resolution, - double theta, - rmm::device_uvector const& vertex_weights_v, - rmm::device_uvector&& cluster_keys_v, - rmm::device_uvector&& cluster_weights_v, - rmm::device_uvector&& next_clusters_v, - edge_src_property_t const& src_vertex_weights_cache, - edge_src_property_t const& src_clusters_cache, - edge_dst_property_t const& dst_clusters_cache); +refine_clustering( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view, + double total_edge_weight, + double resolution, + double theta, + rmm::device_uvector const& vertex_weights_v, + rmm::device_uvector&& cluster_keys_v, + rmm::device_uvector&& cluster_weights_v, + rmm::device_uvector&& next_clusters_v, + edge_src_property_t const& src_vertex_weights_cache, + edge_src_property_t const& src_clusters_cache, + edge_dst_property_t const& dst_clusters_cache); } // namespace detail } // namespace cugraph diff --git a/cpp/src/community/detail/refine_sg_v64_e64.cu b/cpp/src/community/detail/refine_sg_v64_e64.cu index 1de01afe4bc..717688d2e99 100644 --- a/cpp/src/community/detail/refine_sg_v64_e64.cu +++ b/cpp/src/community/detail/refine_sg_v64_e64.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #include "community/detail/refine_impl.cuh" @@ -9,37 +9,39 @@ namespace detail { template std::tuple, std::pair, rmm::device_uvector>> -refine_clustering(raft::handle_t const& handle, - raft::random::RngState& rng_state, - cugraph::graph_view_t const& graph_view, - std::optional> edge_weight_view, - float total_edge_weight, - float resolution, - float theta, - rmm::device_uvector const& vertex_weights_v, - rmm::device_uvector&& cluster_keys_v, - rmm::device_uvector&& cluster_weights_v, - rmm::device_uvector&& next_clusters_v, - edge_src_property_t const& src_vertex_weights_cache, - edge_src_property_t const& src_clusters_cache, - edge_dst_property_t const& dst_clusters_cache); +refine_clustering( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view, + float total_edge_weight, + float resolution, + float theta, + rmm::device_uvector const& vertex_weights_v, + rmm::device_uvector&& cluster_keys_v, + rmm::device_uvector&& cluster_weights_v, + rmm::device_uvector&& next_clusters_v, + edge_src_property_t const& src_vertex_weights_cache, + edge_src_property_t const& src_clusters_cache, + edge_dst_property_t const& dst_clusters_cache); template std::tuple, std::pair, rmm::device_uvector>> -refine_clustering(raft::handle_t const& handle, - raft::random::RngState& rng_state, - cugraph::graph_view_t const& graph_view, - std::optional> edge_weight_view, - double total_edge_weight, - double resolution, - double theta, - rmm::device_uvector const& vertex_weights_v, - rmm::device_uvector&& cluster_keys_v, - rmm::device_uvector&& cluster_weights_v, - rmm::device_uvector&& next_clusters_v, - edge_src_property_t const& src_vertex_weights_cache, - edge_src_property_t const& src_clusters_cache, - edge_dst_property_t const& dst_clusters_cache); +refine_clustering( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view, + double total_edge_weight, + double resolution, + double theta, + rmm::device_uvector const& vertex_weights_v, + rmm::device_uvector&& cluster_keys_v, + rmm::device_uvector&& cluster_weights_v, + rmm::device_uvector&& next_clusters_v, + edge_src_property_t const& src_vertex_weights_cache, + edge_src_property_t const& src_clusters_cache, + edge_dst_property_t const& dst_clusters_cache); } // namespace detail } // namespace cugraph diff --git a/cpp/src/components/mis_impl.cuh b/cpp/src/components/mis_impl.cuh index 9cad71154a8..7bf7d26a77b 100644 --- a/cpp/src/components/mis_impl.cuh +++ b/cpp/src/components/mis_impl.cuh @@ -101,7 +101,7 @@ rmm::device_uvector maximal_independent_set( // Select a random set of candidate vertices vertex_t nr_remaining_vertices_to_check = remaining_vertices.size(); - if (multi_gpu) { + if constexpr (multi_gpu) { nr_remaining_vertices_to_check = host_scalar_allreduce(handle.get_comms(), nr_remaining_vertices_to_check, raft::comms::op_t::SUM, @@ -278,7 +278,7 @@ rmm::device_uvector maximal_independent_set( remaining_vertices.begin()); nr_remaining_vertices_to_check = remaining_vertices.size(); - if (multi_gpu) { + if constexpr (multi_gpu) { nr_remaining_vertices_to_check = host_scalar_allreduce(handle.get_comms(), nr_remaining_vertices_to_check, raft::comms::op_t::SUM, diff --git a/cpp/src/sampling/detail/gather_one_hop_impl.cuh b/cpp/src/sampling/detail/gather_one_hop_impl.cuh index fa51d5f3a0a..c1e9209b6d6 100644 --- a/cpp/src/sampling/detail/gather_one_hop_impl.cuh +++ b/cpp/src/sampling/detail/gather_one_hop_impl.cuh @@ -483,7 +483,7 @@ temporal_gather_one_hop_edgelist( handle.get_stream()); size_t starting_pos{0}; - if (multi_gpu) { + if constexpr (multi_gpu) { auto sizes = cugraph::host_scalar_allgather( handle.get_comms(), active_majors.size(), handle.get_stream()); std::exclusive_scan(sizes.begin(), sizes.end(), sizes.begin(), size_t{0}); @@ -495,7 +495,7 @@ temporal_gather_one_hop_edgelist( vertex_label_time_positions.end(), starting_pos); - if (multi_gpu) { + if constexpr (multi_gpu) { auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto all_minor_keys = diff --git a/cpp/src/structure/relabel_impl.cuh b/cpp/src/structure/relabel_impl.cuh index d9c41c40585..8ccf792dccb 100644 --- a/cpp/src/structure/relabel_impl.cuh +++ b/cpp/src/structure/relabel_impl.cuh @@ -47,7 +47,7 @@ void relabel(raft::handle_t const& handle, bool skip_missing_labels, bool do_expensive_check) { - if (multi_gpu) { + if constexpr (multi_gpu) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); diff --git a/cpp/src/structure/renumber_edgelist_impl.cuh b/cpp/src/structure/renumber_edgelist_impl.cuh index b6fce34c5c6..6ae30acd4a7 100644 --- a/cpp/src/structure/renumber_edgelist_impl.cuh +++ b/cpp/src/structure/renumber_edgelist_impl.cuh @@ -744,7 +744,7 @@ compute_renumber_map(raft::handle_t const& handle, size_t mid_degree_threshold{detail::mid_degree_threshold}; size_t low_degree_threshold{detail::low_degree_threshold}; size_t hypersparse_degree_threshold{1}; - if (multi_gpu) { + if constexpr (multi_gpu) { auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); mid_degree_threshold *= minor_comm_size; diff --git a/cpp/src/structure/renumber_utils_impl.cuh b/cpp/src/structure/renumber_utils_impl.cuh index 5522b300655..e37908dffbd 100644 --- a/cpp/src/structure/renumber_utils_impl.cuh +++ b/cpp/src/structure/renumber_utils_impl.cuh @@ -543,7 +543,7 @@ void unrenumber_int_vertices(raft::handle_t const& handle, "+ num_vertices)."); } - if (multi_gpu) { + if constexpr (multi_gpu) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); auto const comm_rank = comm.get_rank(); diff --git a/cpp/tests/community/leiden_test.cpp b/cpp/tests/community/leiden_test.cpp index 75e45426697..9fa1d4a5b4d 100644 --- a/cpp/tests/community/leiden_test.cpp +++ b/cpp/tests/community/leiden_test.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #include "utilities/base_fixture.hpp" @@ -173,9 +173,6 @@ TEST_P(Tests_Leiden_File64, CheckInt64Int64FloatFloat) override_File_Usecase_with_cmd_line_arguments(GetParam())); } -#if 0 -// FIXME: We should use these tests, gtest-1.11.0 makes it a runtime error -// to define and not instantiate these. TEST_P(Tests_Leiden_Rmat, CheckInt32Int32FloatFloat) { run_current_test( @@ -199,9 +196,7 @@ TEST_P(Tests_Leiden_Rmat64, CheckInt64Int64FloatFloat) run_current_test( override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); } -#endif -// FIXME: Expand testing once we evaluate RMM memory use INSTANTIATE_TEST_SUITE_P( simple_test, Tests_Leiden_File, @@ -232,4 +227,32 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values(Leiden_Usecase{}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); +INSTANTIATE_TEST_SUITE_P(rmat_small_tests, + Tests_Leiden_Rmat, + ::testing::Combine(::testing::Values(Leiden_Usecase{100, 1.0, false}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_Leiden_Rmat32, + ::testing::Combine( + ::testing::Values(Leiden_Usecase{}), + ::testing::Values(cugraph::test::Rmat_Usecase(12, 32, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat64_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat64_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_Leiden_Rmat64, + ::testing::Combine( + ::testing::Values(Leiden_Usecase{}), + ::testing::Values(cugraph::test::Rmat_Usecase(12, 32, 0.57, 0.19, 0.19, 0, true, false)))); + CUGRAPH_TEST_PROGRAM_MAIN() From 4634d8cd3084d04ba40e75eef05703b1e6deb6ac Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Thu, 9 Apr 2026 11:54:42 -0700 Subject: [PATCH 2/4] update include statement to use separately compiled object --- cpp/src/community/detail/refine_impl.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index 458f85dd482..204def5f278 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -5,7 +5,7 @@ #pragma once #include "common_methods.hpp" -#include "decision_graph_mis.cuh" +#include "decision_graph_mis.hpp" #include "detail/shuffle_wrappers.hpp" #include From bd4eca03b5f6c0089760ed0ee4d99162d60f9f31 Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Mon, 13 Apr 2026 16:03:24 -0700 Subject: [PATCH 3/4] a few minor PR comments --- cpp/src/community/detail/decision_graph_mis.cuh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/community/detail/decision_graph_mis.cuh b/cpp/src/community/detail/decision_graph_mis.cuh index 542c803d84f..c596d1671d3 100644 --- a/cpp/src/community/detail/decision_graph_mis.cuh +++ b/cpp/src/community/detail/decision_graph_mis.cuh @@ -28,14 +28,14 @@ rmm::device_uvector vertices_in_mis_from_decision_edgelist( rmm::device_uvector&& d_srcs, rmm::device_uvector&& d_dsts) { + // NOTE: the maximum number of edges is the number of vertices in the graph, + // so we can use the vertex type for the edge type using edge_t = vertex_t; constexpr bool decision_store_transposed = false; cugraph::graph_t decision_graph(handle); - std::optional> renumber_map{std::nullopt}; - if constexpr (multi_gpu) { std::tie(d_srcs, d_dsts, std::ignore) = cugraph::shuffle_ext_edges(handle, @@ -45,6 +45,7 @@ rmm::device_uvector vertices_in_mis_from_decision_edgelist( false); } + std::optional> renumber_map{std::nullopt}; std::tie(decision_graph, std::ignore, renumber_map) = create_graph_from_edgelist( handle, From ec26ff8ad68b3eb2cd0cb0163cf96909520526e2 Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Tue, 14 Apr 2026 13:21:36 -0700 Subject: [PATCH 4/4] Try eliminating CUDART dependency --- cpp/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 7cf1d1aa1f2..e0dd2f0383b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -487,6 +487,7 @@ foreach(suffix IN LISTS cugraph_suffixes) CUDA_STANDARD_REQUIRED ON POSITION_INDEPENDENT_CODE ON INTERFACE_POSITION_INDEPENDENT_CODE ON + NO_CUDART_DEP ON ) ################################################################################