-
Notifications
You must be signed in to change notification settings - Fork 356
Reduce binary size part2 #5486
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
rapids-bot
merged 8 commits into
rapidsai:main
from
ChuckHastings:reduce_binary_size_part2
Apr 23, 2026
Merged
Reduce binary size part2 #5486
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
4783ca8
Fix Leiden tests to all run, add some constexpr to some multi_gpu ref…
ChuckHastings 5735572
Merge branch 'main' into reduce_binary_size_part2
ChuckHastings 4634d8c
update include statement to use separately compiled object
ChuckHastings e888c3b
Merge branch 'main' into reduce_binary_size_part2
ChuckHastings cecbc27
Merge branch 'main' into reduce_binary_size_part2
ChuckHastings bd4eca0
a few minor PR comments
ChuckHastings ec26ff8
Try eliminating CUDART dependency
ChuckHastings af39191
Merge branch 'main' into reduce_binary_size_part2
ChuckHastings File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,97 @@ | ||
| /* | ||
| * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
| #pragma once | ||
|
|
||
| #include "common_methods.hpp" | ||
| #include "decision_graph_mis.hpp" | ||
| #include "maximal_independent_moves.cuh" | ||
|
|
||
| #include <cugraph/arithmetic_variant_types.hpp> | ||
| #include <cugraph/detail/utility_wrappers.hpp> | ||
| #include <cugraph/graph.hpp> | ||
| #include <cugraph/graph_functions.hpp> | ||
| #include <cugraph/shuffle_functions.hpp> | ||
|
|
||
| #include <optional> | ||
| #include <tuple> | ||
|
|
||
| namespace cugraph { | ||
| namespace detail { | ||
|
|
||
| template <typename vertex_t, bool multi_gpu> | ||
| rmm::device_uvector<vertex_t> vertices_in_mis_from_decision_edgelist( | ||
| raft::handle_t const& handle, | ||
| raft::random::RngState& rng_state, | ||
| raft::host_span<vertex_t const> vertex_partition_range_lasts, | ||
| rmm::device_uvector<vertex_t>&& d_srcs, | ||
| rmm::device_uvector<vertex_t>&& d_dsts) | ||
| { | ||
| // NOTE: the maximum number of edges is the number of vertices in the graph, | ||
| // so we can use the vertex type for the edge type | ||
| using edge_t = vertex_t; | ||
|
|
||
| constexpr bool decision_store_transposed = false; | ||
|
|
||
| cugraph::graph_t<vertex_t, edge_t, decision_store_transposed, multi_gpu> decision_graph(handle); | ||
|
|
||
| if constexpr (multi_gpu) { | ||
| std::tie(d_srcs, d_dsts, std::ignore) = | ||
| cugraph::shuffle_ext_edges(handle, | ||
| std::move(d_srcs), | ||
| std::move(d_dsts), | ||
| std::vector<arithmetic_device_uvector_t>{}, | ||
| false); | ||
| } | ||
|
|
||
| std::optional<rmm::device_uvector<vertex_t>> renumber_map{std::nullopt}; | ||
| std::tie(decision_graph, std::ignore, renumber_map) = | ||
| create_graph_from_edgelist<vertex_t, edge_t, decision_store_transposed, multi_gpu>( | ||
| handle, | ||
| std::nullopt, | ||
| std::move(d_srcs), | ||
| std::move(d_dsts), | ||
| std::vector<arithmetic_device_uvector_t>{}, | ||
| cugraph::graph_properties_t{false, false}, | ||
| true /* renumber */); | ||
|
|
||
| auto decision_graph_view = decision_graph.view(); | ||
|
|
||
| auto vertices_in_mis = | ||
| maximal_independent_moves<vertex_t, edge_t, multi_gpu>(handle, decision_graph_view, rng_state); | ||
|
|
||
| rmm::device_uvector<vertex_t> numbering_indices((*renumber_map).size(), handle.get_stream()); | ||
| detail::sequence_fill(handle.get_stream(), | ||
| numbering_indices.data(), | ||
| numbering_indices.size(), | ||
| decision_graph_view.local_vertex_partition_range_first()); | ||
|
|
||
| relabel<vertex_t, multi_gpu>( | ||
| handle, | ||
| std::make_tuple(static_cast<vertex_t const*>(numbering_indices.begin()), | ||
| static_cast<vertex_t const*>((*renumber_map).begin())), | ||
| decision_graph_view.local_vertex_partition_range_size(), | ||
| vertices_in_mis.data(), | ||
| vertices_in_mis.size(), | ||
| false); | ||
|
|
||
| numbering_indices.resize(0, handle.get_stream()); | ||
| numbering_indices.shrink_to_fit(handle.get_stream()); | ||
|
|
||
| (*renumber_map).resize(0, handle.get_stream()); | ||
| (*renumber_map).shrink_to_fit(handle.get_stream()); | ||
|
|
||
| if constexpr (multi_gpu) { | ||
| std::tie(vertices_in_mis, std::ignore) = | ||
| cugraph::shuffle_int_vertices(handle, | ||
| std::move(vertices_in_mis), | ||
| std::vector<cugraph::arithmetic_device_uvector_t>{}, | ||
| vertex_partition_range_lasts); | ||
| } | ||
|
|
||
| return vertices_in_mis; | ||
| } | ||
|
|
||
| } // namespace detail | ||
| } // namespace cugraph | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| /* | ||
| * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
| #pragma once | ||
|
|
||
| #include <raft/core/handle.hpp> | ||
| #include <raft/core/host_span.hpp> | ||
| #include <raft/random/rng_state.hpp> | ||
|
|
||
| #include <rmm/device_uvector.hpp> | ||
|
|
||
| namespace cugraph { | ||
| namespace detail { | ||
|
|
||
| /** | ||
| * @brief Build a decision graph from an edgelist, compute a maximal independent set of moves, | ||
| * relabel MIS vertices to original ids, and (multi-GPU) shuffle them to owning ranks. | ||
| * | ||
| * @param vertex_partition_range_lasts Used only when multi_gpu is true (shuffle_int_vertices). | ||
| */ | ||
| template <typename vertex_t, bool multi_gpu> | ||
| rmm::device_uvector<vertex_t> vertices_in_mis_from_decision_edgelist( | ||
| raft::handle_t const& handle, | ||
| raft::random::RngState& rng_state, | ||
| raft::host_span<vertex_t const> vertex_partition_range_lasts, | ||
| rmm::device_uvector<vertex_t>&& d_srcs, | ||
| rmm::device_uvector<vertex_t>&& d_dsts); | ||
|
|
||
| } // namespace detail | ||
| } // namespace cugraph |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| /* | ||
| * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
| #include "decision_graph_mis.cuh" | ||
|
|
||
| namespace cugraph { | ||
| namespace detail { | ||
|
|
||
| template rmm::device_uvector<int32_t> vertices_in_mis_from_decision_edgelist<int32_t, true>( | ||
| raft::handle_t const& handle, | ||
| raft::random::RngState& rng_state, | ||
| raft::host_span<int32_t const> vertex_partition_range_lasts, | ||
| rmm::device_uvector<int32_t>&& d_srcs, | ||
| rmm::device_uvector<int32_t>&& d_dsts); | ||
|
|
||
| } // namespace detail | ||
| } // namespace cugraph |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| /* | ||
| * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
| #include "decision_graph_mis.cuh" | ||
|
|
||
| namespace cugraph { | ||
| namespace detail { | ||
|
|
||
| template rmm::device_uvector<int64_t> vertices_in_mis_from_decision_edgelist<int64_t, true>( | ||
| raft::handle_t const& handle, | ||
| raft::random::RngState& rng_state, | ||
| raft::host_span<int64_t const> vertex_partition_range_lasts, | ||
| rmm::device_uvector<int64_t>&& d_srcs, | ||
| rmm::device_uvector<int64_t>&& d_dsts); | ||
|
|
||
| } // namespace detail | ||
| } // namespace cugraph |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| /* | ||
| * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
| #include "decision_graph_mis.cuh" | ||
|
|
||
| namespace cugraph { | ||
| namespace detail { | ||
|
|
||
| template rmm::device_uvector<int32_t> vertices_in_mis_from_decision_edgelist<int32_t, false>( | ||
| raft::handle_t const& handle, | ||
| raft::random::RngState& rng_state, | ||
| raft::host_span<int32_t const> vertex_partition_range_lasts, | ||
| rmm::device_uvector<int32_t>&& d_srcs, | ||
| rmm::device_uvector<int32_t>&& d_dsts); | ||
|
|
||
| } // namespace detail | ||
| } // namespace cugraph |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| /* | ||
| * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
| #include "decision_graph_mis.cuh" | ||
|
|
||
| namespace cugraph { | ||
| namespace detail { | ||
|
|
||
| template rmm::device_uvector<int64_t> vertices_in_mis_from_decision_edgelist<int64_t, false>( | ||
| raft::handle_t const& handle, | ||
| raft::random::RngState& rng_state, | ||
| raft::host_span<int64_t const> vertex_partition_range_lasts, | ||
| rmm::device_uvector<int64_t>&& d_srcs, | ||
| rmm::device_uvector<int64_t>&& d_dsts); | ||
|
|
||
| } // namespace detail | ||
| } // namespace cugraph |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we better avoid this?
We currently don't build for edge_t != vertex_t cases in our regular release but we still allow users to manually build for these cases.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Or is there a guarantee that # edges <= # vertices in this use case? In that case, we should add a comment here to avoid future confusion.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I've added a comment. The number of entries in d_srcs/d_dsts starts as the number of vertices in the graph and then gets filtered down, so it's guaranteed to fit in a vertex_t.