diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index ad39635e..70058fb9 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -289,6 +289,7 @@ ALIASES += buffer_base_param="Base address of the GPU buffer" ALIASES += batch_handle_param="Opaque handle for batch operations" ALIASES += hipstream_param="The stream used for async IO requests" ALIASES += hipstream_if_null="If NULL, this request will be synchronous" +ALIASES += max_io_size_note="@note The maximum IO size is determined by the Linux kernel and is currently 2^31 - the system page size" # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For diff --git a/include/hipfile.h b/include/hipfile.h index e6ffe649..b1b40587 100644 --- a/include/hipfile.h +++ b/include/hipfile.h @@ -508,15 +508,14 @@ hipFileError_t hipFileBufDeregister(const void *buffer_base); * @brief Synchronously read data from a file into a GPU buffer * @ingroup file * - * hipFileRead() will transfer at most 0x7ffff000 (2,147,479,552) bytes, - * returning the number of bytes actually transferred. - * * @param [in] fh \hipfile_handle_param * @param [in] buffer_base \buffer_base_param * @param [in] size Number of bytes that should be read * @param [in] file_offset Offset into the file that should be read from * @param [in] buffer_offset Offset of the GPU buffer that that the data should be written to * + * \max_io_size_note + * * @return if >= 0: Number of bytes read * @return if -1: System error (check `errno` for the specific error) * @return else: Negative value of the related hipFileOpError_t @@ -529,15 +528,14 @@ ssize_t hipFileRead(hipFileHandle_t fh, void *buffer_base, size_t size, hoff_t f * @brief Synchronously write data from a GPU buffer to a file * @ingroup file * - * hipFileWrite() will transfer at most 0x7ffff000 (2,147,479,552) bytes, - * returning the number of bytes actually transferred. - * * @param [in] fh \hipfile_handle_param * @param [in] buffer_base \buffer_base_param * @param [in] size Number of bytes that should be written * @param [in] file_offset Offset into the file that should be written to * @param [in] buffer_offset Offset of the GPU buffer that the data should be read from * + * \max_io_size_note + * * @return if >= 0: Number of bytes written * @return if -1: System error (check `errno` for the specific error) * @return else: Negative value of the related hipFileOpError_t @@ -838,6 +836,8 @@ void hipFileBatchIODestroy(hipFileBatchHandle_t batch_idp); * @param [out] bytes_read_p Number of bytes read * @param [in] stream \hipstream_param. \hipstream_if_null. * + * \max_io_size_note + * * @return \hipfile_error_return */ HIPFILE_API @@ -856,6 +856,8 @@ hipFileError_t hipFileReadAsync(hipFileHandle_t fh, void *buffer_base, size_t *s * @param [out] bytes_written_p Number of bytes written * @param [in] stream \hipstream_param. \hipstream_if_null. * + * \max_io_size_note + * * @return \hipfile_error_return */ HIPFILE_API diff --git a/src/amd_detail/backend.h b/src/amd_detail/backend.h index e35e8d6b..3a2e33c2 100644 --- a/src/amd_detail/backend.h +++ b/src/amd_detail/backend.h @@ -11,6 +11,7 @@ #include "io.h" #include "sys.h" +#include #include #include #include @@ -19,9 +20,34 @@ namespace hipFile { +[[nodiscard]] inline size_t +getPageSize() +{ + static const size_t value = [] { + const long v = sysconf(_SC_PAGESIZE); + if (v == -1) { + throw std::runtime_error("sysconf(_SC_PAGESIZE) failed"); + } + return static_cast(v); + }(); + return value; +} + +[[nodiscard]] inline size_t +getPageMask() +{ + static const size_t value = ~(getPageSize() - 1); + return value; +} + // The maximum number of bytes that can be transferred in a single read() or -// write() system call. Mirrors kernel's MAX_RW_COUNT -static const size_t MAX_RW_COUNT = 0x7ffff000; +// write() system call. Calculation is same as kernel's MAX_RW_COUNT. +[[nodiscard]] inline size_t +getMaxRwCount() +{ + static const size_t value = static_cast(INT_MAX) & getPageMask(); + return value; +} /// @brief Backend is not enabled struct BackendDisabled : public std::runtime_error { diff --git a/src/amd_detail/backend/asyncop-fallback.cpp b/src/amd_detail/backend/asyncop-fallback.cpp index e1306947..c68f00df 100644 --- a/src/amd_detail/backend/asyncop-fallback.cpp +++ b/src/amd_detail/backend/asyncop-fallback.cpp @@ -43,7 +43,7 @@ AsyncOpFallback::AsyncOpFallback(IoType _io_type, std::shared_ptr _file, size_t *_size, hoff_t *_file_offset, hoff_t *_buffer_offset, ssize_t *_bytes_transferred) : AsyncOp{_io_type, _file, _buffer, _stream, _size, _file_offset, _buffer_offset, _bytes_transferred}, - submitted_size{std::min(*_size, hipFile::MAX_RW_COUNT)}, bytes_transferred_internal{0}, + submitted_size{std::min(*_size, hipFile::getMaxRwCount())}, bytes_transferred_internal{0}, gpu_buffer{buffer->getBuffer()}, bounce_buffer_dev_ptr{nullptr}, bounce_buffer{nullptr, [](void *addr) { (void)addr; }} { diff --git a/src/amd_detail/backend/fallback.cpp b/src/amd_detail/backend/fallback.cpp index b82da993..33bb9c08 100644 --- a/src/amd_detail/backend/fallback.cpp +++ b/src/amd_detail/backend/fallback.cpp @@ -74,7 +74,7 @@ Fallback::_io_impl(IoType type, std::shared_ptr file, std::shared_ptr file, std::shared_ptr stream) { - size_t limited_size = min(*size_p, hipFile::MAX_RW_COUNT); + size_t limited_size = min(*size_p, hipFile::getMaxRwCount()); if (!paramsValid(buffer, limited_size, *file_offset_p, *buffer_offset_p)) { throw std::invalid_argument("The selected file or buffer region is invalid"); @@ -226,7 +226,7 @@ async_io_bind_params(void *userargs) const hoff_t *file_offset = get_variant_ptr(op->file_offset); op->file_offset.emplace(*file_offset); const size_t *size = get_variant_ptr(op->size); - op->size = std::min(*size, hipFile::MAX_RW_COUNT); + op->size = std::min(*size, hipFile::getMaxRwCount()); if (std::get(op->size) > op->submitted_size) { op->bytes_transferred_internal = -hipFileInvalidValue; diff --git a/src/amd_detail/backend/fastpath.cpp b/src/amd_detail/backend/fastpath.cpp index 3ea3fe1f..bf122862 100644 --- a/src/amd_detail/backend/fastpath.cpp +++ b/src/amd_detail/backend/fastpath.cpp @@ -184,7 +184,7 @@ Fastpath::_io_impl(IoType type, shared_ptr file, shared_ptr buff // Illegal Seek error is returned. To avoid this, hipFile limits IO size to // MAX_RW_COUNT. When amdgpu/kfd properly handles IO sizes > MAX_RW_COUNT // this can be removed. - size = std::min(size, MAX_RW_COUNT); + size = std::min(size, hipFile::getMaxRwCount()); // Ensure HIP Runtime is initialized. This is a temporary fix to a SEGFAULT // in the HIP Runtime when hipFileRead/hipFileWrite is the first HIP API diff --git a/test/amd_detail/async.cpp b/test/amd_detail/async.cpp index 5749b5c9..a474c152 100644 --- a/test/amd_detail/async.cpp +++ b/test/amd_detail/async.cpp @@ -163,14 +163,14 @@ TEST_F(HipFileAsyncOp, AsyncOpFallbackLimitsMaxIoSize) ssize_t bytes_transferred = 0; auto op_data = std::shared_ptr(new uint8_t[sizeof(AsyncOpFallback)]); auto bounce_buffer = std::shared_ptr(new uint8_t[1_KiB]); - EXPECT_CALL(mhip, hipHostMalloc(hipFile::MAX_RW_COUNT, _)).WillOnce(Return(bounce_buffer.get())); + EXPECT_CALL(mhip, hipHostMalloc(hipFile::getMaxRwCount(), _)).WillOnce(Return(bounce_buffer.get())); EXPECT_CALL(mhip, hipHostMalloc(sizeof(AsyncOpFallback), _)).WillOnce(Return(op_data.get())); EXPECT_CALL(mhip, hipHostGetDevicePointer(Eq(bounce_buffer.get()), _)); EXPECT_CALL(mhip, hipHostFree(Eq(bounce_buffer.get()))); EXPECT_CALL(mhip, hipHostFree(Eq(op_data.get()))); auto op = std::shared_ptr(new AsyncOpFallback{ IoType::Read, file, buffer, stream, &size, &file_offset, &buffer_offset, &bytes_transferred}); - ASSERT_EQ(op->submitted_size, hipFile::MAX_RW_COUNT); + ASSERT_EQ(op->submitted_size, hipFile::getMaxRwCount()); } TEST_F(HipFileAsyncOp, AsyncOpFallback_new_failure_throws_bad_alloc) @@ -619,16 +619,16 @@ TEST_F(AsyncIoOpCleanup, cleanupInvalidOpSetsError) struct AsyncIoOpLimitedSize : public AsyncIoOp { void SetUp() override { - size = hipFile::MAX_RW_COUNT + 1; + size = hipFile::getMaxRwCount() + 1; AsyncIoOp::SetUp(); } }; TEST_F(AsyncIoOpLimitedSize, bindLimitsSize) { - EXPECT_CALL(*mbuffer, getLength).WillOnce(Return(hipFile::MAX_RW_COUNT)); + EXPECT_CALL(*mbuffer, getLength).WillOnce(Return(hipFile::getMaxRwCount())); async_io_bind_params(op.get()); - ASSERT_EQ(std::get(op->size), hipFile::MAX_RW_COUNT); + ASSERT_EQ(std::get(op->size), hipFile::getMaxRwCount()); } struct AsyncIoOpBindParams { diff --git a/test/amd_detail/fallback.cpp b/test/amd_detail/fallback.cpp index f257727a..bd4b2f21 100644 --- a/test/amd_detail/fallback.cpp +++ b/test/amd_detail/fallback.cpp @@ -259,7 +259,7 @@ TEST_P(FallbackParam, FallbackIoTruncatesSizeToMAX_RW_COUNT) { expect_buffer_registration(mhip, hipMemoryTypeDevice); auto buf{reinterpret_cast(0xABABABAB)}; - Context::get()->registerBuffer(buf, MAX_RW_COUNT + 1, 0); + Context::get()->registerBuffer(buf, hipFile::getMaxRwCount() + 1, 0); auto big_buffer{Context::get()->getRegisteredBuffer(buf)}; EXPECT_CALL(mcfg, fallback()).WillOnce(Return(true)); @@ -286,7 +286,8 @@ TEST_P(FallbackParam, FallbackIoTruncatesSizeToMAX_RW_COUNT) } EXPECT_CALL(msys, munmap); - ASSERT_EQ(MAX_RW_COUNT, Fallback().io(io_type, file, big_buffer, SIZE_MAX, 0, 0, 16 * 1024 * 1024)); + ASSERT_EQ(hipFile::getMaxRwCount(), + Fallback().io(io_type, file, big_buffer, SIZE_MAX, 0, 0, 16 * 1024 * 1024)); } TEST_P(FallbackParam, FallbackIoThrowsOnBounceBufferAllocationFailure) diff --git a/test/amd_detail/fastpath.cpp b/test/amd_detail/fastpath.cpp index 31a1e659..ce71179f 100644 --- a/test/amd_detail/fastpath.cpp +++ b/test/amd_detail/fastpath.cpp @@ -679,16 +679,16 @@ TEST_P(FastpathIoParam, IoSizeIsTruncatedToMaxRWCount) expect_io(DEFAULT_UNBUFFERED_FD, reinterpret_cast(DEFAULT_BUFFER_ADDR), buffer_size); switch (GetParam()) { case IoType::Read: - EXPECT_CALL(mhip, hipAmdFileRead(_, _, MAX_RW_COUNT, _)).WillOnce(Return(MAX_RW_COUNT)); + EXPECT_CALL(mhip, hipAmdFileRead(_, _, getMaxRwCount(), _)).WillOnce(Return(getMaxRwCount())); break; case IoType::Write: - EXPECT_CALL(mhip, hipAmdFileWrite(_, _, MAX_RW_COUNT, _)).WillOnce(Return(MAX_RW_COUNT)); + EXPECT_CALL(mhip, hipAmdFileWrite(_, _, getMaxRwCount(), _)).WillOnce(Return(getMaxRwCount())); break; default: FAIL() << "Invalid IoType"; } - ASSERT_EQ(Fastpath().io(GetParam(), mfile, mbuffer, io_size, 0, 0), MAX_RW_COUNT); + ASSERT_EQ(Fastpath().io(GetParam(), mfile, mbuffer, io_size, 0, 0), getMaxRwCount()); } // Note: Tests for fallback eligible exceptions are further down this file