diff --git a/cpp/include/rapidsmpf/memory/buffer_resource.hpp b/cpp/include/rapidsmpf/memory/buffer_resource.hpp index fe0216cfa..e7cf3c4b6 100644 --- a/cpp/include/rapidsmpf/memory/buffer_resource.hpp +++ b/cpp/include/rapidsmpf/memory/buffer_resource.hpp @@ -307,16 +307,20 @@ class BufferResource { ); /** - * @brief Move device buffer data into a Buffer. + * @brief Move device or pinned host buffer data into a Buffer. * - * This operation is cheap; no copy is performed. The resulting Buffer resides in - * device memory. + * This operation is cheap; no copy is performed. + * + * The resulting Buffer's memory type is inferred from @p data's memory + * resource: if the resource is host-accessible (e.g. pinned host memory), + * the Buffer is created with `MemoryType::PINNED_HOST`; otherwise it is + * created with `MemoryType::DEVICE`. * * If @p stream differs from the device buffer's current stream: * - @p stream is synchronized with the device buffer's current stream, and * - the device buffer's current stream is updated to @p stream. * - * @param data Unique pointer to the device buffer. + * @param data Unique pointer to the device or pinned host buffer. * @param stream CUDA stream associated with the new Buffer. Use or synchronize with * this stream when operating on the Buffer. * @return Unique pointer to the resulting Buffer. diff --git a/cpp/include/rapidsmpf/memory/resource_types.hpp b/cpp/include/rapidsmpf/memory/resource_types.hpp index 3df7b2590..85c4a7911 100644 --- a/cpp/include/rapidsmpf/memory/resource_types.hpp +++ b/cpp/include/rapidsmpf/memory/resource_types.hpp @@ -16,4 +16,45 @@ using any_device_resource = cuda::mr::any_resource; using any_host_device_resource = cuda::mr::any_resource; +/** + * @brief Check whether a type-erased memory resource is host-accessible. + * + * Queries the resource's `dynamic_accessibility_property` and returns true if + * the reported accessibility is host-only or host-and-device. + * + * @tparam Properties The property pack of the resource reference. + * @param mr The memory resource reference to query. + * @return True if the resource is host-accessible, false otherwise. + */ +template +[[nodiscard]] bool is_host_accessible( + cuda::mr::resource_ref const& mr +) noexcept { + auto const accessibility = + get_property(mr, cuda::mr::dynamic_accessibility_property{}); + return accessibility == cuda::mr::__memory_accessibility::__host + || accessibility == cuda::mr::__memory_accessibility::__host_device; +} + +/** + * @brief Check whether a type-erased memory resource is device-accessible. + * + * Queries the resource's `dynamic_accessibility_property` and returns true if + * the reported accessibility is device-only or host-and-device. + * + * @tparam Properties The property pack of the resource reference. + * @param mr The memory resource reference to query. + * @return True if the resource is device-accessible, false otherwise. + */ +template +[[nodiscard]] bool is_device_accessible( + cuda::mr::resource_ref const& mr +) noexcept { + // See `is_host_accessible` for why the call is unqualified (ADL). + auto const accessibility = + get_property(mr, cuda::mr::dynamic_accessibility_property{}); + return accessibility == cuda::mr::__memory_accessibility::__device + || accessibility == cuda::mr::__memory_accessibility::__host_device; +} + } // namespace rapidsmpf diff --git a/cpp/src/memory/buffer_resource.cpp b/cpp/src/memory/buffer_resource.cpp index 34d0f0500..598f78abc 100644 --- a/cpp/src/memory/buffer_resource.cpp +++ b/cpp/src/memory/buffer_resource.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -215,6 +216,15 @@ std::unique_ptr BufferResource::move( cuda_stream_join(stream, upstream); data->set_stream(stream); } + + if (is_host_accessible(data->memory_resource())) { + auto pinned_host_buffer = std::make_unique( + HostBuffer::from_rmm_device_buffer(std::move(data), stream) + ); + return std::unique_ptr( + new Buffer(std::move(pinned_host_buffer), stream, MemoryType::PINNED_HOST) + ); + } return std::unique_ptr(new Buffer(std::move(data), MemoryType::DEVICE)); } diff --git a/cpp/src/memory/host_buffer.cpp b/cpp/src/memory/host_buffer.cpp index 43c288c2a..734c4d734 100644 --- a/cpp/src/memory/host_buffer.cpp +++ b/cpp/src/memory/host_buffer.cpp @@ -5,11 +5,10 @@ #include -#include - #include #include #include +#include namespace rapidsmpf { @@ -145,8 +144,8 @@ HostBuffer HostBuffer::from_rmm_device_buffer( ); RAPIDSMPF_EXPECTS( - cuda::is_host_accessible(pinned_host_buffer->data()), - "pinned_host_buffer must be host accessible", + is_host_accessible(pinned_host_buffer->memory_resource()), + "pinned_host_buffer's memory resource must be host accessible", std::logic_error ); diff --git a/cpp/src/streaming/cudf/table_chunk.cpp b/cpp/src/streaming/cudf/table_chunk.cpp index 26e527655..9ad2e7c18 100644 --- a/cpp/src/streaming/cudf/table_chunk.cpp +++ b/cpp/src/streaming/cudf/table_chunk.cpp @@ -152,24 +152,29 @@ bool TableChunk::is_spillable() const { } TableChunk TableChunk::copy(MemoryReservation& reservation) const { - // This method handles the three possible cases: + // This method handles the two possible cases. Note that + // `!is_available() && packed_data_ == nullptr` is an invalid state, so the + // remaining valid combinations collapse into: // - // 1. The chunk is available and the reservation specifies device memory. - // In this case, we can directly use cudf to create a deep copy of the - // table by copying the table_view() into device memory. + // 1. The chunk is available and not yet packed. The table is copied/packed + // into the reservation-specified memory type using libcudf: + // a. DEVICE - cudf-copy table_view() into device memory. + // b. PINNED_HOST - cudf::pack table_view() directly into pinned memory. + // c. HOST - cudf::pack table_view() into intermediate device + // memory and then copy to host memory. // - // 2. The chunk is available and the data is a generic cudf table that is - // not already packed. In this case, the table data must first be packed - // before copying it to host or pinned memory. - // - // 3. The chunk data is already packed (packed_data_ != nullptr). - // In this case, we simply use buffer_copy() to copy the packed data - // into the reservation-specified memory type. The original memory - // type of the chunk does not matter. + // 2. The chunk data is already packed (packed_data_ != nullptr). + // Use buffer_copy() to copy the packed data into the reservation- + // specified memory type. The original memory type of the chunk does + // not matter. BufferResource* br = reservation.br(); - if (is_available()) { + + // If the table view is available and the table is not packed, we can use libcudf to + // copy the table in device memory, or pack it to pinned/ host memory. Else, fall + // through to case 2 (ie. use buffer_copy). + if (is_available() && packed_data_ == nullptr) { switch (reservation.mem_type()) { - case MemoryType::DEVICE: // Case 1. + case MemoryType::DEVICE: // Case 1a. { // Use libcudf to copy the table_view(). auto const nbytes = data_alloc_size(MemoryType::DEVICE); @@ -184,10 +189,28 @@ TableChunk TableChunk::copy(MemoryReservation& reservation) const { br->release(reservation, nbytes); return TableChunk(std::move(table), stream()); } - case MemoryType::HOST: - case MemoryType::PINNED_HOST: - // Case 2. - if (packed_data_ == nullptr) { + case MemoryType::PINNED_HOST: // Case 1b. + { + StreamOrderedTiming timing{stream(), br->statistics()}; + + // use cudf pack with pinned mr + auto packed_pinned = cudf::pack(table_view(), stream(), br->pinned_mr()); + auto nbytes = packed_pinned.gpu_data->size(); + + br->statistics()->record_copy( + MemoryType::DEVICE, MemoryType::PINNED_HOST, nbytes, std::move(timing) + ); + // update the provided `reservation` + br->release(reservation, nbytes); + auto host_buffer = br->move(std::move(packed_pinned.gpu_data), stream()); + return TableChunk( + std::make_unique( + std::move(packed_pinned.metadata), std::move(host_buffer) + ) + ); + } + case MemoryType::HOST: // Case 1c. + { // We use libcudf's pack() to serialize `table_view()` into a // packed_columns and then we move the packed_columns' gpu_data to a // new host buffer. @@ -218,15 +241,16 @@ TableChunk TableChunk::copy(MemoryReservation& reservation) const { packed_data->data = br->move(std::move(packed_data->data), reservation); return TableChunk(std::move(packed_data)); } - break; default: RAPIDSMPF_FAIL("MemoryType: unknown"); } } - // Note, `is_available() == false` implies `packed_data_ != nullptr`. + // `!is_available() && packed_data_ == nullptr` is an invalid state, so + // reaching this point implies `packed_data_ != nullptr`. RAPIDSMPF_EXPECTS(packed_data_ != nullptr, "something went wrong"); - // Case 3. + // Case 2. The chunk data is already packed (packed_data_ != nullptr). We need + // to copy the packed data into the reservation-specified memory type. auto const nbytes = packed_data_->data->size; auto metadata = std::make_unique>(*packed_data_->metadata); auto data = br->allocate(nbytes, packed_data_->stream(), reservation); diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index d701dc2b0..2f5a52979 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -82,6 +82,7 @@ target_sources( test_cupti_monitor.cpp test_error_macros.cpp test_host_buffer.cpp + test_memory_resources.cpp test_metadata_payload_exchange.cpp test_misc.cpp test_partition.cpp diff --git a/cpp/tests/test_host_buffer.cpp b/cpp/tests/test_host_buffer.cpp index f324cee3d..e02f47f53 100644 --- a/cpp/tests/test_host_buffer.cpp +++ b/cpp/tests/test_host_buffer.cpp @@ -321,7 +321,18 @@ TEST(PinnedResource, max_pool_size_limit) { } TEST(PinnedResource, from_default_options) { - auto mr = rapidsmpf::PinnedMemoryResource::from_options(rapidsmpf::config::Options{}); + { + // disabled by default + auto mr = + rapidsmpf::PinnedMemoryResource::from_options(rapidsmpf::config::Options{}); + EXPECT_EQ(mr, rapidsmpf::PinnedMemoryResource::Disabled); + } + + // check default pool values, if enabled + std::unordered_map strings = {{"pinned_memory", "True"}}; + auto mr = rapidsmpf::PinnedMemoryResource::from_options( + rapidsmpf::config::Options(strings) + ); if (mr == rapidsmpf::PinnedMemoryResource::Disabled) { GTEST_SKIP() << "PinnedMemoryResource is not supported"; } diff --git a/cpp/tests/test_memory_resources.cpp b/cpp/tests/test_memory_resources.cpp new file mode 100644 index 000000000..5a72bb89f --- /dev/null +++ b/cpp/tests/test_memory_resources.cpp @@ -0,0 +1,64 @@ +/** + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include + +#include + +#include +#include + +#include +#include +#include + +namespace { + +std::vector> make_host_resources() { + std::vector> resources; + resources.emplace_back(rapidsmpf::HostMemoryResource{}); + if (rapidsmpf::is_pinned_memory_resources_supported()) { + resources.emplace_back(*rapidsmpf::PinnedMemoryResource::make_if_available()); + } + return resources; +} + +std::vector> make_device_resources() { + std::vector> resources; + resources.emplace_back(rmm::mr::cuda_memory_resource{}); + resources.emplace_back(rmm::mr::cuda_async_memory_resource{}); + if (rapidsmpf::is_pinned_memory_resources_supported()) { + resources.emplace_back(*rapidsmpf::PinnedMemoryResource::make_if_available()); + } + return resources; +} + +} // namespace + +TEST(MemoryResourceAccessibility, IsHostAccessible) { + auto resources = make_host_resources(); + for (auto& mr : resources) { + cuda::mr::resource_ref ref{mr}; + EXPECT_TRUE(rapidsmpf::is_host_accessible(ref)); + // PinnedMemoryResource is host- and device-accessible; the rest are host-only. + if (cuda::mr::resource_cast(&mr) == nullptr) { + EXPECT_FALSE(rapidsmpf::is_device_accessible(ref)); + } + } +} + +TEST(MemoryResourceAccessibility, IsDeviceAccessible) { + auto resources = make_device_resources(); + for (auto& mr : resources) { + cuda::mr::resource_ref ref{mr}; + EXPECT_TRUE(rapidsmpf::is_device_accessible(ref)); + // PinnedMemoryResource is host- and device-accessible; the rest are device-only. + if (cuda::mr::resource_cast(&mr) == nullptr) { + EXPECT_FALSE(rapidsmpf::is_host_accessible(ref)); + } + } +}