diff --git a/CHANGELOG.md b/CHANGELOG.md index f1119aa2..431de028 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ * The CMake namespace was changed from `roc::` to `hip::` * `AIS_BUILD_EXAMPLES` has been renamed to `AIS_INSTALL_EXAMPLES` * `AIS_USE_SANITIZERS` now also enables the following sanitizers: integer, float-divide-by-zero, local-bounds, vptr, nullability (in addition to address, leak, and undefined). Sanitizers should also now emit usable stack trace info. +* Added check in the Fastpath/AIS backend to ensure the HIP Runtime is initialized. This avoids causing a segfault in the HIP Runtime. ### Removed * The rocFile library has been completely removed and the code is now a part of hipFile. diff --git a/src/amd_detail/backend/fastpath.cpp b/src/amd_detail/backend/fastpath.cpp index aba587c8..70bf39af 100644 --- a/src/amd_detail/backend/fastpath.cpp +++ b/src/amd_detail/backend/fastpath.cpp @@ -174,6 +174,15 @@ Fastpath::io(IoType type, shared_ptr file, shared_ptr buffer, si // this can be removed. size = std::min(size, MAX_RW_COUNT); + // Ensure HIP Runtime is initialized. This is a temporary fix to a SEGFAULT + // in the HIP Runtime when hipFileRead/hipFileWrite is the first HIP API + // call of a new thread. + thread_local bool hip_inited{false}; + if (!hip_inited) { + Context::get()->hipInit(); + hip_inited = true; + } + switch (type) { case IoType::Read: nbytes = Context::get()->hipAmdFileRead(handle, devptr, size, file_offset); diff --git a/src/amd_detail/hip.cpp b/src/amd_detail/hip.cpp index 963495d6..9396eda2 100644 --- a/src/amd_detail/hip.cpp +++ b/src/amd_detail/hip.cpp @@ -198,4 +198,10 @@ Hip::hipStreamGetDevice(hipStream_t stream) const return device_id; } +void +Hip::hipInit() const +{ + (void)throwOnHipError(::hipInit(0)); +} + } diff --git a/src/amd_detail/hip.h b/src/amd_detail/hip.h index 8b399d3b..66393043 100644 --- a/src/amd_detail/hip.h +++ b/src/amd_detail/hip.h @@ -74,6 +74,7 @@ struct Hip { size_t sharedMemBytes, hipStream_t stream) const; virtual int hipDeviceGetAttribute(hipDeviceAttribute_t attr, int device_id) const; virtual hipDevice_t hipStreamGetDevice(hipStream_t stream) const; + virtual void hipInit() const; struct RuntimeError : public std::runtime_error { hipError_t error; diff --git a/test/amd_detail/fastpath.cpp b/test/amd_detail/fastpath.cpp index 258f29d0..47b05d6b 100644 --- a/test/amd_detail/fastpath.cpp +++ b/test/amd_detail/fastpath.cpp @@ -327,36 +327,56 @@ INSTANTIATE_TEST_SUITE_P(FastpathTest, FastpathUnalignedBufferOffsetsParam, struct FastpathIoParam : public FastpathTestBase, public TestWithParam { - void expect_io() + StrictMock mhip; + + // Setup expectations on the mocks called to validate IO arguments + void expect_validate() { EXPECT_CALL(*mbuffer, getBuffer).WillOnce(Return(DEFAULT_BUFFER_ADDR)); EXPECT_CALL(*mbuffer, getLength).WillOnce(Return(DEFAULT_BUFFER_LENGTH)); EXPECT_CALL(*mfile, getUnbufferedFd).WillOnce(Return(DEFAULT_UNBUFFERED_FD)); } - void expect_io(optional fd, void *bufptr, size_t buflen) + // Setup expectations on the mocks called to validate IO arguments and + // the mocks called up to, but not including, hipAmdFileRead/hipAmdFileWrite + void expect_io() + { + expect_validate(); + EXPECT_CALL(mhip, hipInit); + } + + // Setup expectations on the mocks called to validate IO arguments + void expect_validate(optional fd, void *bufptr, size_t buflen) { EXPECT_CALL(*mbuffer, getBuffer).WillOnce(Return(bufptr)); EXPECT_CALL(*mbuffer, getLength).WillOnce(Return(buflen)); EXPECT_CALL(*mfile, getUnbufferedFd).WillOnce(Return(fd)); } + + // Setup expectations on the mocks called to validate IO arguments and + // the mocks called up to, but not including, hipAmdFileRead/hipAmdFileWrite + void expect_io(optional fd, void *bufptr, size_t buflen) + { + expect_validate(fd, bufptr, buflen); + EXPECT_CALL(mhip, hipInit); + } }; TEST_P(FastpathIoParam, IoRejectsNegativeFileOffset) { - expect_io(); + expect_validate(); ASSERT_THROW(Fastpath().io(GetParam(), mfile, mbuffer, 0, -1, 0), std::invalid_argument); } TEST_P(FastpathIoParam, IoRejectsNegativeBufferOffset) { - expect_io(); + expect_validate(); ASSERT_THROW(Fastpath().io(GetParam(), mfile, mbuffer, DEFAULT_IO_SIZE, 0, -1), std::invalid_argument); } TEST_P(FastpathIoParam, IoRejectsBufferOffsetLargerThanBufferLength) { - expect_io(); + expect_validate(); ASSERT_THROW(Fastpath().io(GetParam(), mfile, mbuffer, DEFAULT_IO_SIZE, 0, static_cast(DEFAULT_BUFFER_LENGTH) + 1), std::invalid_argument); @@ -364,22 +384,20 @@ TEST_P(FastpathIoParam, IoRejectsBufferOffsetLargerThanBufferLength) TEST_P(FastpathIoParam, IoRejectsIoSizeLargerThanBufferLength) { - expect_io(); + expect_validate(); ASSERT_THROW(Fastpath().io(GetParam(), mfile, mbuffer, DEFAULT_BUFFER_LENGTH + 1, 0, 0), std::invalid_argument); } TEST_P(FastpathIoParam, IoRejectsIoThatCouldOverflowBuffer) { - expect_io(); + expect_validate(); ASSERT_THROW(Fastpath().io(GetParam(), mfile, mbuffer, DEFAULT_BUFFER_LENGTH, DEFAULT_FILE_OFFSET, 1), std::invalid_argument); } TEST_P(FastpathIoParam, IoConfiguresHandle) { - StrictMock mhip; - hipAmdFileHandle_t handle{}; handle.fd = DEFAULT_UNBUFFERED_FD.value(); @@ -400,8 +418,6 @@ TEST_P(FastpathIoParam, IoConfiguresHandle) TEST_P(FastpathIoParam, IoCalculatesCorrectDevicePointer) { - StrictMock mhip; - off_t buffer_offset{0x1000}; void *buffer_addr{reinterpret_cast(0x20000)}; void *expected_device_ptr{reinterpret_cast(0x21000)}; @@ -423,8 +439,6 @@ TEST_P(FastpathIoParam, IoCalculatesCorrectDevicePointer) TEST_P(FastpathIoParam, IoPassesThroughSizeAndFileOffset) { - StrictMock mhip; - expect_io(); switch (GetParam()) { case IoType::Read: @@ -442,8 +456,6 @@ TEST_P(FastpathIoParam, IoPassesThroughSizeAndFileOffset) TEST_P(FastpathIoParam, IoReturnsBytesTransferred) { - StrictMock mhip; - expect_io(); switch (GetParam()) { case IoType::Read: @@ -463,8 +475,6 @@ TEST_P(FastpathIoParam, IoReturnsBytesTransferred) TEST_P(FastpathIoParam, IoReturnsBytesTransferredShort) { - StrictMock mhip; - size_t nbytes{4096}; ASSERT_LT(nbytes, DEFAULT_IO_SIZE); @@ -489,8 +499,6 @@ TEST_P(FastpathIoParam, IoReturnsBytesTransferredShort) // Ensure hip errors thrown by Hip::hipAmdFileRead/Hip::hipAmdFileWrite are not masked TEST_P(FastpathIoParam, IoDoesNotMaskHipRuntimeError) { - StrictMock mhip; - expect_io(); switch (GetParam()) { case IoType::Read: @@ -511,8 +519,6 @@ TEST_P(FastpathIoParam, IoDoesNotMaskHipRuntimeError) // Ensure hip errors thrown by Hip::hipAmdFileRead/Hip::hipAmdFileWrite are not masked TEST_P(FastpathIoParam, IoDoesNotMaskSystemError) { - StrictMock mhip; - expect_io(); switch (GetParam()) { case IoType::Read: @@ -533,8 +539,6 @@ TEST_P(FastpathIoParam, IoDoesNotMaskSystemError) // Ensure IO size is truncated to MAX_RW_COUNT TEST_P(FastpathIoParam, IoSizeIsTruncatedToMaxRWCount) { - StrictMock mhip; - const size_t buffer_size{SIZE_MAX}; const size_t io_size{SIZE_MAX}; diff --git a/test/amd_detail/mhip.h b/test/amd_detail/mhip.h index f8d0d630..c5352cf5 100644 --- a/test/amd_detail/mhip.h +++ b/test/amd_detail/mhip.h @@ -48,6 +48,6 @@ struct MHip : Hip { (const, override)); MOCK_METHOD(int, hipDeviceGetAttribute, (hipDeviceAttribute_t attr, int device_id), (const, override)); MOCK_METHOD(hipDevice_t, hipStreamGetDevice, (hipStream_t stream), (const, override)); + MOCK_METHOD(void, hipInit, (), (const, override)); }; - }