Skip to content

Commit 1fe9289

Browse files
authored
⚡ (patch) Preload unwind instructions for cxa_throw & rethrow (#83)
On the stm32f103c8 benchmark "0% cleanup" - 10x frames: 96us - 30x frames: 201.3us - 50x frames: 308.6us - 70x frames: 389.2us (previously 422us) Withe 70x frames we've reduced the runtime by 32.8us
1 parent 69ee942 commit 1fe9289

5 files changed

Lines changed: 91 additions & 41 deletions

File tree

benchmark/platforms/lpc4078.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include <libhal-arm-mcu/dwt_counter.hpp>
1616
#include <libhal-arm-mcu/lpc40/clock.hpp>
17+
#include <libhal-exceptions/control.hpp>
1718

1819
#include <resource_list.hpp>
1920

demos/platforms/lpc4078.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include <libhal-arm-mcu/dwt_counter.hpp>
1616
#include <libhal-arm-mcu/lpc40/clock.hpp>
17+
#include <libhal-exceptions/control.hpp>
1718

1819
#include <resource_list.hpp>
1920

demos/platforms/stm32f103c8.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include <libhal-arm-mcu/dwt_counter.hpp>
1616
#include <libhal-arm-mcu/stm32f1/clock.hpp>
17+
#include <libhal-exceptions/control.hpp>
1718

1819
#include <resource_list.hpp>
1920

include/libhal-exceptions/control.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,5 +53,4 @@ std::terminate_handler set_terminate(
5353
* @return std::terminate_handler - the currently set terminate handler
5454
*/
5555
std::terminate_handler get_terminate() noexcept;
56-
5756
} // namespace hal

src/arm_cortex/estell/exception.cpp

Lines changed: 88 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,29 @@
2323

2424
#include "internal.hpp"
2525

26+
// NOLINTBEGIN(bugprone-reserved-identifier)
27+
// NOLINTBEGIN(readability-identifier-naming)
28+
extern "C"
29+
{
30+
void _exit([[maybe_unused]] int rc);
31+
void* __wrap___cxa_allocate_exception(size_t);
32+
void __wrap___cxa_free_exception(void*);
33+
void __wrap___cxa_call_unexpected(void*);
34+
void __wrap___cxa_end_catch();
35+
void* __wrap___cxa_begin_catch(void*);
36+
void __wrap___cxa_end_cleanup();
37+
void __wrap__Unwind_Resume(void*);
38+
void __wrap___cxa_rethrow() noexcept(false);
39+
void __wrap___cxa_throw(ke::exception_ptr p_thrown_exception,
40+
std::type_info* p_type_info,
41+
ke::destructor_t p_destructor) noexcept(false);
42+
} // extern "C"
43+
// NOLINTEND(readability-identifier-naming)
44+
// NOLINTEND(bugprone-reserved-identifier)
45+
2646
namespace ke {
2747

28-
union instructions_t
48+
struct instructions_t
2949
{
3050
// Why the length of 8? ARM unwind instructions are capped at 7 bytes for all
3151
// possible functions. The 8th instruction will always be the finish byte.
@@ -128,23 +148,6 @@ index_entry_t const& get_index_entry(std::uint32_t p_program_counter)
128148
return *(index - 1);
129149
}
130150

131-
[[gnu::always_inline]] inline void pop_registers(cortex_m_cpu& p_cpu,
132-
std::uint32_t mask)
133-
{
134-
// The mask may not demand that the stack pointer be popped, but the
135-
// stack pointer will still need to be popped anyway, so this check
136-
// determines if the mask handles this or not.
137-
std::uint32_t const* stack_pointer = *p_cpu.sp;
138-
139-
while (mask) {
140-
auto reg_to_restore = std::countr_zero(mask);
141-
mask &= ~(1 << reg_to_restore);
142-
p_cpu[reg_to_restore] = *(stack_pointer++);
143-
}
144-
145-
p_cpu.sp = stack_pointer;
146-
}
147-
148151
[[gnu::always_inline]] inline constexpr std::uint32_t read_uleb128(
149152
std::uint8_t const** p_ptr)
150153
{
@@ -1669,35 +1672,20 @@ void unwind_frame(instructions_t const& p_instructions, cortex_m_cpu& p_cpu)
16691672
}
16701673

16711674
[[gnu::always_inline]]
1672-
inline instructions_t create_instructions_from_entry(
1673-
exception_object const& p_exception_object)
1675+
constexpr instructions_t personality_to_unwind_instructions(
1676+
std::uint32_t const* p_handler_data)
16741677
{
16751678
constexpr auto personality_type = hal::bit_mask::from<24, 27>();
1676-
constexpr auto generic = hal::bit_mask::from<31>();
1677-
1678-
instructions_t unwind{};
1679-
1680-
std::uint32_t const* handler_data = nullptr;
1681-
auto const& entry = *p_exception_object.cache.entry_ptr;
1682-
if (entry.has_inlined_personality()) {
1683-
handler_data = &entry.personality_offset;
1684-
} else {
1685-
auto const* personality = p_exception_object.cache.personality;
1686-
if (hal::bit_extract<generic>(personality[0])) {
1687-
handler_data = &personality[0];
1688-
} else {
1689-
handler_data = &personality[1];
1690-
}
1691-
}
16921679

1693-
std::uint32_t header = handler_data[0];
1680+
instructions_t unwind;
1681+
std::uint32_t header = p_handler_data[0];
16941682

16951683
if (hal::bit_extract<personality_type>(header) == 0x0) {
16961684
unwind.data[0] = hal::bit_extract<su16::instruction0>(header);
16971685
unwind.data[1] = hal::bit_extract<su16::instruction1>(header);
16981686
unwind.data[2] = hal::bit_extract<su16::instruction2>(header);
16991687
} else {
1700-
std::uint32_t first_word = handler_data[1];
1688+
std::uint32_t first_word = p_handler_data[1];
17011689
std::uint32_t length = hal::bit_extract<lu16_32::length>(header);
17021690
switch (length) {
17031691
case 1: {
@@ -1710,7 +1698,7 @@ inline instructions_t create_instructions_from_entry(
17101698
break;
17111699
}
17121700
case 2: {
1713-
uint32_t last_word = handler_data[2];
1701+
uint32_t last_word = p_handler_data[2];
17141702
unwind.data[0] = hal::bit_extract<lu16_32::instruction0>(header);
17151703
unwind.data[1] = hal::bit_extract<lu16_32::instruction1>(header);
17161704
unwind.data[2] = hal::bit_extract<lu16_32::instruction2>(first_word);
@@ -1739,10 +1727,31 @@ inline instructions_t create_instructions_from_entry(
17391727
}
17401728
}
17411729
}
1742-
17431730
return unwind;
17441731
}
17451732

1733+
[[gnu::always_inline]]
1734+
constexpr instructions_t create_instructions_from_entry(
1735+
exception_object const& p_exception_object)
1736+
{
1737+
constexpr auto generic = hal::bit_mask::from<31>();
1738+
1739+
std::uint32_t const* handler_data = nullptr;
1740+
auto const& entry = *p_exception_object.cache.entry_ptr;
1741+
if (entry.has_inlined_personality()) {
1742+
handler_data = &entry.personality_offset;
1743+
} else {
1744+
auto const* personality = p_exception_object.cache.personality;
1745+
if (hal::bit_extract<generic>(personality[0])) {
1746+
handler_data = &personality[0];
1747+
} else {
1748+
handler_data = &personality[1];
1749+
}
1750+
}
1751+
1752+
return personality_to_unwind_instructions(handler_data);
1753+
}
1754+
17461755
void raise_exception(exception_object& p_exception_object)
17471756
{
17481757
while (true) {
@@ -1788,6 +1797,42 @@ void raise_exception(exception_object& p_exception_object)
17881797
}
17891798
}
17901799
}
1800+
1801+
consteval instructions_t spare_instruction()
1802+
{
1803+
instructions_t spare{};
1804+
// This is the SPARE instruction directly after the FINISH instruction within
1805+
// the ARM EH ABI.
1806+
spare.data[0] = 0b10110001;
1807+
spare.data[1] = 0b00000000;
1808+
return spare;
1809+
}
1810+
1811+
template<typename F>
1812+
instructions_t cache(F* p_function_to_be_cached)
1813+
{
1814+
auto const function_address =
1815+
reinterpret_cast<std::uintptr_t>(p_function_to_be_cached);
1816+
1817+
auto const& entry = ke::get_index_entry(function_address);
1818+
if (entry.has_inlined_personality()) {
1819+
return personality_to_unwind_instructions(&entry.personality_offset);
1820+
} else {
1821+
// Set unwind information for this cached item to SPARE (the one right
1822+
// after FINISH in the ARM EH-ABI), which will call terminate. Because
1823+
// this cache call will occur at static initialization, we cannot throw
1824+
// and hope to be caught by anything. Nor can we terminate as the
1825+
// application hasn't provided a terminate handler. We could busy loop
1826+
// here as well and I'm not opposed to that. But for now, we'll just
1827+
// terminate when either of our cached functions isn't re-entrant. In
1828+
// general, we must make sure that our cached functions are re-entrant to
1829+
// reduce on cycles.
1830+
return spare_instruction();
1831+
}
1832+
}
1833+
1834+
instructions_t cxa_throw_unwind_instructions = cache(&__wrap___cxa_throw);
1835+
instructions_t cxa_rethrow_unwind_instructions = cache(&__wrap___cxa_rethrow);
17911836
} // namespace ke
17921837

17931838
// NOLINTBEGIN(bugprone-reserved-identifier)
@@ -2036,6 +2081,8 @@ extern "C"
20362081
// have in the C++ throw RTTI list, might as well
20372082
// reuse it here.
20382083
}
2084+
ke::unwind_frame(ke::cxa_rethrow_unwind_instructions, exception_object.cpu);
2085+
20392086
// Raise exception returns when an error or call to terminate has been found
20402087
ke::raise_exception(exception_object);
20412088
// TODO(#38): this area is considered a catch block, meaning that the
@@ -2066,6 +2113,7 @@ extern "C"
20662113
// have in the C++ throw RTTI list, might as well
20672114
// reuse it here.
20682115
}
2116+
ke::unwind_frame(ke::cxa_throw_unwind_instructions, exception_object.cpu);
20692117
// Raise exception returns when an error or call to terminate has been found
20702118
ke::raise_exception(exception_object);
20712119
// TODO(#38): this area is considered a catch block, meaning that the

0 commit comments

Comments
 (0)