From b04f41db6ca9b59329190d275e9f6ee5d0f05f8b Mon Sep 17 00:00:00 2001 From: Dana Robinson Date: Tue, 3 Mar 2026 11:01:51 -0700 Subject: [PATCH 1/6] Add a Doxygen note about max IO size to API calls --- docs/Doxyfile.in | 1 + include/hipfile.h | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index ad39635e..95110db3 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -289,6 +289,7 @@ ALIASES += buffer_base_param="Base address of the GPU buffer" ALIASES += batch_handle_param="Opaque handle for batch operations" ALIASES += hipstream_param="The stream used for async IO requests" ALIASES += hipstream_if_null="If NULL, this request will be synchronous" +ALIASES += max_io_size_note="@note The maximum IO size is determined by the Linux kernel and is currently 2^31 - PAGE_SIZE" # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For diff --git a/include/hipfile.h b/include/hipfile.h index e6ffe649..ccf898b0 100644 --- a/include/hipfile.h +++ b/include/hipfile.h @@ -517,6 +517,8 @@ hipFileError_t hipFileBufDeregister(const void *buffer_base); * @param [in] file_offset Offset into the file that should be read from * @param [in] buffer_offset Offset of the GPU buffer that that the data should be written to * + * \max_io_size_note + * * @return if >= 0: Number of bytes read * @return if -1: System error (check `errno` for the specific error) * @return else: Negative value of the related hipFileOpError_t @@ -538,6 +540,8 @@ ssize_t hipFileRead(hipFileHandle_t fh, void *buffer_base, size_t size, hoff_t f * @param [in] file_offset Offset into the file that should be written to * @param [in] buffer_offset Offset of the GPU buffer that the data should be read from * + * \max_io_size_note + * * @return if >= 0: Number of bytes written * @return if -1: System error (check `errno` for the specific error) * @return else: Negative value of the related hipFileOpError_t @@ -838,6 +842,8 @@ void hipFileBatchIODestroy(hipFileBatchHandle_t batch_idp); * @param [out] bytes_read_p Number of bytes read * @param [in] stream \hipstream_param. \hipstream_if_null. * + * \max_io_size_note + * * @return \hipfile_error_return */ HIPFILE_API @@ -856,6 +862,8 @@ hipFileError_t hipFileReadAsync(hipFileHandle_t fh, void *buffer_base, size_t *s * @param [out] bytes_written_p Number of bytes written * @param [in] stream \hipstream_param. \hipstream_if_null. * + * \max_io_size_note + * * @return \hipfile_error_return */ HIPFILE_API From 015c09fb67fa1caca1f8609b3bfbecdc468cfc4f Mon Sep 17 00:00:00 2001 From: Kurt McMillan Date: Fri, 13 Mar 2026 20:55:45 +0000 Subject: [PATCH 2/6] hipFile: Mirror kernel's MAX_RW_COUNT computation Since PAGE_SIZE is not the same on all plaforms, mirror the kernel's computation of MAX_RW_COUNT so that hipFile doesn't use an incorrect value. --- src/amd_detail/backend.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/amd_detail/backend.h b/src/amd_detail/backend.h index e35e8d6b..3f50bcd0 100644 --- a/src/amd_detail/backend.h +++ b/src/amd_detail/backend.h @@ -11,6 +11,7 @@ #include "io.h" #include "sys.h" +#include #include #include #include @@ -19,9 +20,19 @@ namespace hipFile { +static const size_t PAGE_SIZE{[] { + long v = sysconf(_SC_PAGE_SIZE); + if (v == -1) { + throw std::runtime_error("sysconf(_SC_PAGE_SIZE) failed"); + } + return static_cast(v); +}()}; + +static const size_t PAGE_MASK{~(PAGE_SIZE - 1)}; + // The maximum number of bytes that can be transferred in a single read() or // write() system call. Mirrors kernel's MAX_RW_COUNT -static const size_t MAX_RW_COUNT = 0x7ffff000; +static const size_t MAX_RW_COUNT = (INT_MAX & PAGE_MASK); /// @brief Backend is not enabled struct BackendDisabled : public std::runtime_error { From 7718cfae342fe53621c7bb624970fc21f5b292af Mon Sep 17 00:00:00 2001 From: Dana Robinson Date: Wed, 1 Apr 2026 13:44:38 -0600 Subject: [PATCH 3/6] MAX_RW_COUNT --> getMaxRwCount() The max IO size constants are initialized using a lambda that: * Occurs in every TU where it's used * Can throw on errors during static initialization The solution is to replace the globals + lambda with inline functions that have static variables. --- src/amd_detail/backend.h | 35 +++++++++++++++------ src/amd_detail/backend/asyncop-fallback.cpp | 2 +- src/amd_detail/backend/fallback.cpp | 6 ++-- src/amd_detail/backend/fastpath.cpp | 2 +- test/amd_detail/async.cpp | 10 +++--- test/amd_detail/fallback.cpp | 5 +-- test/amd_detail/fastpath.cpp | 6 ++-- 7 files changed, 41 insertions(+), 25 deletions(-) diff --git a/src/amd_detail/backend.h b/src/amd_detail/backend.h index 3f50bcd0..6639be64 100644 --- a/src/amd_detail/backend.h +++ b/src/amd_detail/backend.h @@ -20,19 +20,34 @@ namespace hipFile { -static const size_t PAGE_SIZE{[] { - long v = sysconf(_SC_PAGE_SIZE); - if (v == -1) { - throw std::runtime_error("sysconf(_SC_PAGE_SIZE) failed"); - } - return static_cast(v); -}()}; +[[nodiscard]] inline size_t +getPageSize() +{ + static const size_t value = [] { + const long v = sysconf(_SC_PAGE_SIZE); + if (v == -1) { + throw std::runtime_error("sysconf(_SC_PAGE_SIZE) failed"); + } + return static_cast(v); + }(); + return value; +} -static const size_t PAGE_MASK{~(PAGE_SIZE - 1)}; +[[nodiscard]] inline size_t +getPageMask() +{ + static const size_t value = ~(getPageSize() - 1); + return value; +} // The maximum number of bytes that can be transferred in a single read() or -// write() system call. Mirrors kernel's MAX_RW_COUNT -static const size_t MAX_RW_COUNT = (INT_MAX & PAGE_MASK); +// write() system call. Calculation is same as kernel's MAX_RW_COUNT. +[[nodiscard]] inline size_t +getMaxRwCount() +{ + static const size_t value = INT_MAX & getPageMask(); + return value; +} /// @brief Backend is not enabled struct BackendDisabled : public std::runtime_error { diff --git a/src/amd_detail/backend/asyncop-fallback.cpp b/src/amd_detail/backend/asyncop-fallback.cpp index e1306947..c68f00df 100644 --- a/src/amd_detail/backend/asyncop-fallback.cpp +++ b/src/amd_detail/backend/asyncop-fallback.cpp @@ -43,7 +43,7 @@ AsyncOpFallback::AsyncOpFallback(IoType _io_type, std::shared_ptr _file, size_t *_size, hoff_t *_file_offset, hoff_t *_buffer_offset, ssize_t *_bytes_transferred) : AsyncOp{_io_type, _file, _buffer, _stream, _size, _file_offset, _buffer_offset, _bytes_transferred}, - submitted_size{std::min(*_size, hipFile::MAX_RW_COUNT)}, bytes_transferred_internal{0}, + submitted_size{std::min(*_size, hipFile::getMaxRwCount())}, bytes_transferred_internal{0}, gpu_buffer{buffer->getBuffer()}, bounce_buffer_dev_ptr{nullptr}, bounce_buffer{nullptr, [](void *addr) { (void)addr; }} { diff --git a/src/amd_detail/backend/fallback.cpp b/src/amd_detail/backend/fallback.cpp index b82da993..33bb9c08 100644 --- a/src/amd_detail/backend/fallback.cpp +++ b/src/amd_detail/backend/fallback.cpp @@ -74,7 +74,7 @@ Fallback::_io_impl(IoType type, std::shared_ptr file, std::shared_ptr file, std::shared_ptr stream) { - size_t limited_size = min(*size_p, hipFile::MAX_RW_COUNT); + size_t limited_size = min(*size_p, hipFile::getMaxRwCount()); if (!paramsValid(buffer, limited_size, *file_offset_p, *buffer_offset_p)) { throw std::invalid_argument("The selected file or buffer region is invalid"); @@ -226,7 +226,7 @@ async_io_bind_params(void *userargs) const hoff_t *file_offset = get_variant_ptr(op->file_offset); op->file_offset.emplace(*file_offset); const size_t *size = get_variant_ptr(op->size); - op->size = std::min(*size, hipFile::MAX_RW_COUNT); + op->size = std::min(*size, hipFile::getMaxRwCount()); if (std::get(op->size) > op->submitted_size) { op->bytes_transferred_internal = -hipFileInvalidValue; diff --git a/src/amd_detail/backend/fastpath.cpp b/src/amd_detail/backend/fastpath.cpp index 3ea3fe1f..bf122862 100644 --- a/src/amd_detail/backend/fastpath.cpp +++ b/src/amd_detail/backend/fastpath.cpp @@ -184,7 +184,7 @@ Fastpath::_io_impl(IoType type, shared_ptr file, shared_ptr buff // Illegal Seek error is returned. To avoid this, hipFile limits IO size to // MAX_RW_COUNT. When amdgpu/kfd properly handles IO sizes > MAX_RW_COUNT // this can be removed. - size = std::min(size, MAX_RW_COUNT); + size = std::min(size, hipFile::getMaxRwCount()); // Ensure HIP Runtime is initialized. This is a temporary fix to a SEGFAULT // in the HIP Runtime when hipFileRead/hipFileWrite is the first HIP API diff --git a/test/amd_detail/async.cpp b/test/amd_detail/async.cpp index 5749b5c9..a474c152 100644 --- a/test/amd_detail/async.cpp +++ b/test/amd_detail/async.cpp @@ -163,14 +163,14 @@ TEST_F(HipFileAsyncOp, AsyncOpFallbackLimitsMaxIoSize) ssize_t bytes_transferred = 0; auto op_data = std::shared_ptr(new uint8_t[sizeof(AsyncOpFallback)]); auto bounce_buffer = std::shared_ptr(new uint8_t[1_KiB]); - EXPECT_CALL(mhip, hipHostMalloc(hipFile::MAX_RW_COUNT, _)).WillOnce(Return(bounce_buffer.get())); + EXPECT_CALL(mhip, hipHostMalloc(hipFile::getMaxRwCount(), _)).WillOnce(Return(bounce_buffer.get())); EXPECT_CALL(mhip, hipHostMalloc(sizeof(AsyncOpFallback), _)).WillOnce(Return(op_data.get())); EXPECT_CALL(mhip, hipHostGetDevicePointer(Eq(bounce_buffer.get()), _)); EXPECT_CALL(mhip, hipHostFree(Eq(bounce_buffer.get()))); EXPECT_CALL(mhip, hipHostFree(Eq(op_data.get()))); auto op = std::shared_ptr(new AsyncOpFallback{ IoType::Read, file, buffer, stream, &size, &file_offset, &buffer_offset, &bytes_transferred}); - ASSERT_EQ(op->submitted_size, hipFile::MAX_RW_COUNT); + ASSERT_EQ(op->submitted_size, hipFile::getMaxRwCount()); } TEST_F(HipFileAsyncOp, AsyncOpFallback_new_failure_throws_bad_alloc) @@ -619,16 +619,16 @@ TEST_F(AsyncIoOpCleanup, cleanupInvalidOpSetsError) struct AsyncIoOpLimitedSize : public AsyncIoOp { void SetUp() override { - size = hipFile::MAX_RW_COUNT + 1; + size = hipFile::getMaxRwCount() + 1; AsyncIoOp::SetUp(); } }; TEST_F(AsyncIoOpLimitedSize, bindLimitsSize) { - EXPECT_CALL(*mbuffer, getLength).WillOnce(Return(hipFile::MAX_RW_COUNT)); + EXPECT_CALL(*mbuffer, getLength).WillOnce(Return(hipFile::getMaxRwCount())); async_io_bind_params(op.get()); - ASSERT_EQ(std::get(op->size), hipFile::MAX_RW_COUNT); + ASSERT_EQ(std::get(op->size), hipFile::getMaxRwCount()); } struct AsyncIoOpBindParams { diff --git a/test/amd_detail/fallback.cpp b/test/amd_detail/fallback.cpp index f257727a..bd4b2f21 100644 --- a/test/amd_detail/fallback.cpp +++ b/test/amd_detail/fallback.cpp @@ -259,7 +259,7 @@ TEST_P(FallbackParam, FallbackIoTruncatesSizeToMAX_RW_COUNT) { expect_buffer_registration(mhip, hipMemoryTypeDevice); auto buf{reinterpret_cast(0xABABABAB)}; - Context::get()->registerBuffer(buf, MAX_RW_COUNT + 1, 0); + Context::get()->registerBuffer(buf, hipFile::getMaxRwCount() + 1, 0); auto big_buffer{Context::get()->getRegisteredBuffer(buf)}; EXPECT_CALL(mcfg, fallback()).WillOnce(Return(true)); @@ -286,7 +286,8 @@ TEST_P(FallbackParam, FallbackIoTruncatesSizeToMAX_RW_COUNT) } EXPECT_CALL(msys, munmap); - ASSERT_EQ(MAX_RW_COUNT, Fallback().io(io_type, file, big_buffer, SIZE_MAX, 0, 0, 16 * 1024 * 1024)); + ASSERT_EQ(hipFile::getMaxRwCount(), + Fallback().io(io_type, file, big_buffer, SIZE_MAX, 0, 0, 16 * 1024 * 1024)); } TEST_P(FallbackParam, FallbackIoThrowsOnBounceBufferAllocationFailure) diff --git a/test/amd_detail/fastpath.cpp b/test/amd_detail/fastpath.cpp index 31a1e659..ce71179f 100644 --- a/test/amd_detail/fastpath.cpp +++ b/test/amd_detail/fastpath.cpp @@ -679,16 +679,16 @@ TEST_P(FastpathIoParam, IoSizeIsTruncatedToMaxRWCount) expect_io(DEFAULT_UNBUFFERED_FD, reinterpret_cast(DEFAULT_BUFFER_ADDR), buffer_size); switch (GetParam()) { case IoType::Read: - EXPECT_CALL(mhip, hipAmdFileRead(_, _, MAX_RW_COUNT, _)).WillOnce(Return(MAX_RW_COUNT)); + EXPECT_CALL(mhip, hipAmdFileRead(_, _, getMaxRwCount(), _)).WillOnce(Return(getMaxRwCount())); break; case IoType::Write: - EXPECT_CALL(mhip, hipAmdFileWrite(_, _, MAX_RW_COUNT, _)).WillOnce(Return(MAX_RW_COUNT)); + EXPECT_CALL(mhip, hipAmdFileWrite(_, _, getMaxRwCount(), _)).WillOnce(Return(getMaxRwCount())); break; default: FAIL() << "Invalid IoType"; } - ASSERT_EQ(Fastpath().io(GetParam(), mfile, mbuffer, io_size, 0, 0), MAX_RW_COUNT); + ASSERT_EQ(Fastpath().io(GetParam(), mfile, mbuffer, io_size, 0, 0), getMaxRwCount()); } // Note: Tests for fallback eligible exceptions are further down this file From a865f6bb7b7b81a2db15fa186ac1993f7247c48b Mon Sep 17 00:00:00 2001 From: Dana Robinson Date: Wed, 1 Apr 2026 13:55:01 -0600 Subject: [PATCH 4/6] Remove duplicate text from header --- include/hipfile.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/hipfile.h b/include/hipfile.h index ccf898b0..b1b40587 100644 --- a/include/hipfile.h +++ b/include/hipfile.h @@ -508,9 +508,6 @@ hipFileError_t hipFileBufDeregister(const void *buffer_base); * @brief Synchronously read data from a file into a GPU buffer * @ingroup file * - * hipFileRead() will transfer at most 0x7ffff000 (2,147,479,552) bytes, - * returning the number of bytes actually transferred. - * * @param [in] fh \hipfile_handle_param * @param [in] buffer_base \buffer_base_param * @param [in] size Number of bytes that should be read @@ -531,9 +528,6 @@ ssize_t hipFileRead(hipFileHandle_t fh, void *buffer_base, size_t size, hoff_t f * @brief Synchronously write data from a GPU buffer to a file * @ingroup file * - * hipFileWrite() will transfer at most 0x7ffff000 (2,147,479,552) bytes, - * returning the number of bytes actually transferred. - * * @param [in] fh \hipfile_handle_param * @param [in] buffer_base \buffer_base_param * @param [in] size Number of bytes that should be written From f2134b11d7b03e7edda4f9625e168848301d6218 Mon Sep 17 00:00:00 2001 From: Dana Robinson Date: Wed, 1 Apr 2026 13:56:45 -0600 Subject: [PATCH 5/6] Static cast INT_MAX to size_t --- src/amd_detail/backend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/amd_detail/backend.h b/src/amd_detail/backend.h index 6639be64..d1182e98 100644 --- a/src/amd_detail/backend.h +++ b/src/amd_detail/backend.h @@ -45,7 +45,7 @@ getPageMask() [[nodiscard]] inline size_t getMaxRwCount() { - static const size_t value = INT_MAX & getPageMask(); + static const size_t value = static_cast(INT_MAX) & getPageMask(); return value; } From 00f0d13fd7b4fff47a7410e1403dc0a82f81e480 Mon Sep 17 00:00:00 2001 From: Dana Robinson Date: Wed, 1 Apr 2026 14:05:32 -0600 Subject: [PATCH 6/6] Address Copilot concerns * Minor change to Doxygen macro * _SC_PAGE_SIZE --> _SC_PAGESIZE --- docs/Doxyfile.in | 2 +- src/amd_detail/backend.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index 95110db3..70058fb9 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -289,7 +289,7 @@ ALIASES += buffer_base_param="Base address of the GPU buffer" ALIASES += batch_handle_param="Opaque handle for batch operations" ALIASES += hipstream_param="The stream used for async IO requests" ALIASES += hipstream_if_null="If NULL, this request will be synchronous" -ALIASES += max_io_size_note="@note The maximum IO size is determined by the Linux kernel and is currently 2^31 - PAGE_SIZE" +ALIASES += max_io_size_note="@note The maximum IO size is determined by the Linux kernel and is currently 2^31 - the system page size" # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For diff --git a/src/amd_detail/backend.h b/src/amd_detail/backend.h index d1182e98..3a2e33c2 100644 --- a/src/amd_detail/backend.h +++ b/src/amd_detail/backend.h @@ -24,9 +24,9 @@ namespace hipFile { getPageSize() { static const size_t value = [] { - const long v = sysconf(_SC_PAGE_SIZE); + const long v = sysconf(_SC_PAGESIZE); if (v == -1) { - throw std::runtime_error("sysconf(_SC_PAGE_SIZE) failed"); + throw std::runtime_error("sysconf(_SC_PAGESIZE) failed"); } return static_cast(v); }();