Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
d7051d4
use cudf pack with pinned mr
nirandaperera Apr 13, 2026
a2a27c9
Merge branch 'main' of github.com:rapidsai/rapidsmpf into table_chunk…
nirandaperera Apr 14, 2026
3e593a0
Merge branch 'main' into table_chunk_pinned_copy
nirandaperera Apr 15, 2026
b4fc954
Apply suggestions from code review
nirandaperera Apr 17, 2026
9412778
Merge branch 'table_chunk_pinned_copy' of github.com:nirandaperera/ra…
nirandaperera Apr 20, 2026
0b00c6e
addressing PR comments
nirandaperera Apr 20, 2026
634c903
Merge branch 'main' of github.com:rapidsai/rapidsmpf into table_chunk…
nirandaperera Apr 20, 2026
6371c76
Merge branch 'main' of github.com:rapidsai/rapidsmpf into table_chunk…
nirandaperera Apr 22, 2026
5fe889b
addressing comments
nirandaperera Apr 22, 2026
7769d1f
minor doc change
nirandaperera Apr 22, 2026
53d7fe7
Merge branch 'main' into table_chunk_pinned_copy
nirandaperera Apr 30, 2026
aa7668d
Merge branch 'main' of github.com:rapidsai/rapidsmpf into table_chunk…
nirandaperera May 11, 2026
a01e7d1
merge conflicts and simplify API
nirandaperera May 12, 2026
5a9ef07
fix explanation
nirandaperera May 12, 2026
10c2a53
Merge branch 'main' into table_chunk_pinned_copy
nirandaperera May 12, 2026
879662d
Apply suggestions from code review
nirandaperera May 13, 2026
e49fe94
Merge branch 'main' into table_chunk_pinned_copy
nirandaperera May 13, 2026
ff2b6f8
Merge branch 'main' into table_chunk_pinned_copy
wence- May 14, 2026
4b42195
Update cpp/include/rapidsmpf/memory/resource_types.hpp
nirandaperera May 14, 2026
0616180
Merge branch 'main' into table_chunk_pinned_copy
nirandaperera May 14, 2026
9661bd0
Merge branch 'main' into table_chunk_pinned_copy
nirandaperera May 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions cpp/include/rapidsmpf/memory/buffer_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,16 +307,20 @@ class BufferResource {
);

/**
* @brief Move device buffer data into a Buffer.
* @brief Move device or pinned host buffer data into a Buffer.
*
* This operation is cheap; no copy is performed. The resulting Buffer resides in
* device memory.
* This operation is cheap; no copy is performed.
*
* The resulting Buffer's memory type is inferred from @p data's memory
* resource: if the resource is host-accessible (e.g. pinned host memory),
* the Buffer is created with `MemoryType::PINNED_HOST`; otherwise it is
* created with `MemoryType::DEVICE`.
*
* If @p stream differs from the device buffer's current stream:
* - @p stream is synchronized with the device buffer's current stream, and
* - the device buffer's current stream is updated to @p stream.
*
* @param data Unique pointer to the device buffer.
* @param data Unique pointer to the device or pinned host buffer.
* @param stream CUDA stream associated with the new Buffer. Use or synchronize with
* this stream when operating on the Buffer.
* @return Unique pointer to the resulting Buffer.
Expand Down
41 changes: 41 additions & 0 deletions cpp/include/rapidsmpf/memory/resource_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,45 @@ using any_device_resource = cuda::mr::any_resource<cuda::mr::device_accessible>;
using any_host_device_resource =
cuda::mr::any_resource<cuda::mr::host_accessible, cuda::mr::device_accessible>;

/**
* @brief Check whether a type-erased memory resource is host-accessible.
*
* Queries the resource's `dynamic_accessibility_property` and returns true if
* the reported accessibility is host-only or host-and-device.
*
* @tparam Properties The property pack of the resource reference.
* @param mr The memory resource reference to query.
* @return True if the resource is host-accessible, false otherwise.
*/
template <typename... Properties>
[[nodiscard]] bool is_host_accessible(
cuda::mr::resource_ref<Properties...> const& mr
) noexcept {
auto const accessibility =
get_property(mr, cuda::mr::dynamic_accessibility_property{});
return accessibility == cuda::mr::__memory_accessibility::__host
|| accessibility == cuda::mr::__memory_accessibility::__host_device;
Comment on lines +35 to +36
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there really no public way to do this?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately no (at least not that I know of)

}

/**
* @brief Check whether a type-erased memory resource is device-accessible.
*
* Queries the resource's `dynamic_accessibility_property` and returns true if
* the reported accessibility is device-only or host-and-device.
*
* @tparam Properties The property pack of the resource reference.
* @param mr The memory resource reference to query.
* @return True if the resource is device-accessible, false otherwise.
*/
template <typename... Properties>
[[nodiscard]] bool is_device_accessible(
cuda::mr::resource_ref<Properties...> const& mr
) noexcept {
// See `is_host_accessible` for why the call is unqualified (ADL).
auto const accessibility =
get_property(mr, cuda::mr::dynamic_accessibility_property{});
return accessibility == cuda::mr::__memory_accessibility::__device
|| accessibility == cuda::mr::__memory_accessibility::__host_device;
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can avoid the dynamic lookup if you if constexpr (has_property<mr, device_accessible>) otherwise fallback?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


} // namespace rapidsmpf
10 changes: 10 additions & 0 deletions cpp/src/memory/buffer_resource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <rapidsmpf/memory/buffer_resource.hpp>
#include <rapidsmpf/memory/host_buffer.hpp>
#include <rapidsmpf/memory/host_memory_resource.hpp>
#include <rapidsmpf/memory/resource_types.hpp>
#include <rapidsmpf/stream_ordered_timing.hpp>
#include <rapidsmpf/utils/string.hpp>

Expand Down Expand Up @@ -215,6 +216,15 @@ std::unique_ptr<Buffer> BufferResource::move(
cuda_stream_join(stream, upstream);
data->set_stream(stream);
}

if (is_host_accessible(data->memory_resource())) {
auto pinned_host_buffer = std::make_unique<HostBuffer>(
Comment thread
wence- marked this conversation as resolved.
HostBuffer::from_rmm_device_buffer(std::move(data), stream)
);
return std::unique_ptr<Buffer>(
new Buffer(std::move(pinned_host_buffer), stream, MemoryType::PINNED_HOST)
);
Comment thread
wence- marked this conversation as resolved.
}
return std::unique_ptr<Buffer>(new Buffer(std::move(data), MemoryType::DEVICE));
}

Expand Down
7 changes: 3 additions & 4 deletions cpp/src/memory/host_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@

#include <utility>

#include <cuda/memory>

#include <rapidsmpf/memory/cuda_memcpy_async.hpp>
#include <rapidsmpf/memory/host_buffer.hpp>
#include <rapidsmpf/memory/memory_type.hpp>
#include <rapidsmpf/memory/resource_types.hpp>

namespace rapidsmpf {

Expand Down Expand Up @@ -145,8 +144,8 @@ HostBuffer HostBuffer::from_rmm_device_buffer(
);

RAPIDSMPF_EXPECTS(
cuda::is_host_accessible(pinned_host_buffer->data()),
"pinned_host_buffer must be host accessible",
is_host_accessible(pinned_host_buffer->memory_resource()),
"pinned_host_buffer's memory resource must be host accessible",
std::logic_error
);

Expand Down
66 changes: 45 additions & 21 deletions cpp/src/streaming/cudf/table_chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,24 +152,29 @@ bool TableChunk::is_spillable() const {
}

TableChunk TableChunk::copy(MemoryReservation& reservation) const {
// This method handles the three possible cases:
// This method handles the two possible cases. Note that
// `!is_available() && packed_data_ == nullptr` is an invalid state, so the
// remaining valid combinations collapse into:
//
// 1. The chunk is available and the reservation specifies device memory.
// In this case, we can directly use cudf to create a deep copy of the
// table by copying the table_view() into device memory.
// 1. The chunk is available and not yet packed. The table is copied/packed
// into the reservation-specified memory type using libcudf:
// a. DEVICE - cudf-copy table_view() into device memory.
// b. PINNED_HOST - cudf::pack table_view() directly into pinned memory.
// c. HOST - cudf::pack table_view() into intermediate device
// memory and then copy to host memory.
//
// 2. The chunk is available and the data is a generic cudf table that is
// not already packed. In this case, the table data must first be packed
// before copying it to host or pinned memory.
//
// 3. The chunk data is already packed (packed_data_ != nullptr).
// In this case, we simply use buffer_copy() to copy the packed data
// into the reservation-specified memory type. The original memory
// type of the chunk does not matter.
// 2. The chunk data is already packed (packed_data_ != nullptr).
// Use buffer_copy() to copy the packed data into the reservation-
// specified memory type. The original memory type of the chunk does
// not matter.
BufferResource* br = reservation.br();
if (is_available()) {

// If the table view is available and the table is not packed, we can use libcudf to
// copy the table in device memory, or pack it to pinned/ host memory. Else, fall
// through to case 2 (ie. use buffer_copy).
if (is_available() && packed_data_ == nullptr) {
switch (reservation.mem_type()) {
case MemoryType::DEVICE: // Case 1.
case MemoryType::DEVICE: // Case 1a.
{
// Use libcudf to copy the table_view().
auto const nbytes = data_alloc_size(MemoryType::DEVICE);
Expand All @@ -184,10 +189,28 @@ TableChunk TableChunk::copy(MemoryReservation& reservation) const {
br->release(reservation, nbytes);
return TableChunk(std::move(table), stream());
}
case MemoryType::HOST:
case MemoryType::PINNED_HOST:
// Case 2.
if (packed_data_ == nullptr) {
case MemoryType::PINNED_HOST: // Case 1b.
{
StreamOrderedTiming timing{stream(), br->statistics()};

// use cudf pack with pinned mr
auto packed_pinned = cudf::pack(table_view(), stream(), br->pinned_mr());
auto nbytes = packed_pinned.gpu_data->size();

br->statistics()->record_copy(
MemoryType::DEVICE, MemoryType::PINNED_HOST, nbytes, std::move(timing)
);
// update the provided `reservation`
br->release(reservation, nbytes);
auto host_buffer = br->move(std::move(packed_pinned.gpu_data), stream());
return TableChunk(
std::make_unique<PackedData>(
std::move(packed_pinned.metadata), std::move(host_buffer)
)
);
}
case MemoryType::HOST: // Case 1c.
{
// We use libcudf's pack() to serialize `table_view()` into a
// packed_columns and then we move the packed_columns' gpu_data to a
// new host buffer.
Expand Down Expand Up @@ -218,15 +241,16 @@ TableChunk TableChunk::copy(MemoryReservation& reservation) const {
packed_data->data = br->move(std::move(packed_data->data), reservation);
return TableChunk(std::move(packed_data));
}
break;
default:
RAPIDSMPF_FAIL("MemoryType: unknown");
}
}
// Note, `is_available() == false` implies `packed_data_ != nullptr`.
// `!is_available() && packed_data_ == nullptr` is an invalid state, so
// reaching this point implies `packed_data_ != nullptr`.
RAPIDSMPF_EXPECTS(packed_data_ != nullptr, "something went wrong");

// Case 3.
// Case 2. The chunk data is already packed (packed_data_ != nullptr). We need
// to copy the packed data into the reservation-specified memory type.
auto const nbytes = packed_data_->data->size;
auto metadata = std::make_unique<std::vector<std::uint8_t>>(*packed_data_->metadata);
auto data = br->allocate(nbytes, packed_data_->stream(), reservation);
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ target_sources(
test_cupti_monitor.cpp
test_error_macros.cpp
test_host_buffer.cpp
test_memory_resources.cpp
test_metadata_payload_exchange.cpp
test_misc.cpp
test_partition.cpp
Expand Down
13 changes: 12 additions & 1 deletion cpp/tests/test_host_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,18 @@ TEST(PinnedResource, max_pool_size_limit) {
}

TEST(PinnedResource, from_default_options) {
auto mr = rapidsmpf::PinnedMemoryResource::from_options(rapidsmpf::config::Options{});
{
// disabled by default
auto mr =
rapidsmpf::PinnedMemoryResource::from_options(rapidsmpf::config::Options{});
EXPECT_EQ(mr, rapidsmpf::PinnedMemoryResource::Disabled);
}

// check default pool values, if enabled
std::unordered_map<std::string, std::string> strings = {{"pinned_memory", "True"}};
auto mr = rapidsmpf::PinnedMemoryResource::from_options(
rapidsmpf::config::Options(strings)
);
if (mr == rapidsmpf::PinnedMemoryResource::Disabled) {
GTEST_SKIP() << "PinnedMemoryResource is not supported";
}
Expand Down
64 changes: 64 additions & 0 deletions cpp/tests/test_memory_resources.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/**
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/

#include <vector>

#include <gtest/gtest.h>

#include <cuda/memory_resource>

#include <rmm/mr/cuda_async_memory_resource.hpp>
#include <rmm/mr/cuda_memory_resource.hpp>

#include <rapidsmpf/memory/host_memory_resource.hpp>
#include <rapidsmpf/memory/pinned_memory_resource.hpp>
#include <rapidsmpf/memory/resource_types.hpp>

namespace {

std::vector<cuda::mr::any_resource<cuda::mr::host_accessible>> make_host_resources() {
std::vector<cuda::mr::any_resource<cuda::mr::host_accessible>> resources;
resources.emplace_back(rapidsmpf::HostMemoryResource{});
if (rapidsmpf::is_pinned_memory_resources_supported()) {
resources.emplace_back(*rapidsmpf::PinnedMemoryResource::make_if_available());
}
return resources;
}

std::vector<cuda::mr::any_resource<cuda::mr::device_accessible>> make_device_resources() {
std::vector<cuda::mr::any_resource<cuda::mr::device_accessible>> resources;
resources.emplace_back(rmm::mr::cuda_memory_resource{});
resources.emplace_back(rmm::mr::cuda_async_memory_resource{});
if (rapidsmpf::is_pinned_memory_resources_supported()) {
resources.emplace_back(*rapidsmpf::PinnedMemoryResource::make_if_available());
}
return resources;
}

} // namespace

TEST(MemoryResourceAccessibility, IsHostAccessible) {
auto resources = make_host_resources();
for (auto& mr : resources) {
cuda::mr::resource_ref<cuda::mr::host_accessible> ref{mr};
EXPECT_TRUE(rapidsmpf::is_host_accessible(ref));
// PinnedMemoryResource is host- and device-accessible; the rest are host-only.
if (cuda::mr::resource_cast<rapidsmpf::PinnedMemoryResource>(&mr) == nullptr) {
EXPECT_FALSE(rapidsmpf::is_device_accessible(ref));
}
}
}

TEST(MemoryResourceAccessibility, IsDeviceAccessible) {
auto resources = make_device_resources();
for (auto& mr : resources) {
cuda::mr::resource_ref<cuda::mr::device_accessible> ref{mr};
EXPECT_TRUE(rapidsmpf::is_device_accessible(ref));
// PinnedMemoryResource is host- and device-accessible; the rest are device-only.
if (cuda::mr::resource_cast<rapidsmpf::PinnedMemoryResource>(&mr) == nullptr) {
EXPECT_FALSE(rapidsmpf::is_host_accessible(ref));
}
}
}
Loading