Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/felix86/common/config.inc
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,4 @@ X(Performance, bool, auto_compress, false, FELIX86_AUTO_COMPRESS, "Automatically
X(Performance, bool, scan_ahead_multi, true, FELIX86_SCAN_AHEAD_MULTI, "Scan ahead to multiple blocks when possible, avoiding even more flag calculations")
X(Performance, bool, no_address_overflow, true, FELIX86_NO_ADDRESS_OVERFLOW, "Assume addresses won't overflow in 32-bit apps, which allows for some optimizations")
X(Performance, bool, group_loadstore, true, FELIX86_GROUP_LOADSTORE, "Load/store SIMD state in groups of eight, may improve performance")
X(Performance, bool, aligned_tso_optimizations, false, FELIX86_ALIGNED_TSO_OPTIMIZATIONS, "Use atomics for TSO emulation, replace with fenced load/stores if unaligned, may improve performance")
95 changes: 93 additions & 2 deletions src/felix86/hle/signals.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1169,8 +1169,95 @@ bool handle_wild_sigabrt(ThreadState* current_state, siginfo_t* info, ucontext_t
}
}

bool handle_unaligned_tso_atomic(ThreadState* current_state, siginfo_t* info, ucontext_t* context, u64 pc) {
if (!is_in_jit_code(current_state, (u8*)pc)) {
return false;
}

if (!g_config.aligned_tso_optimizations) {
return false;
}

u32 current_instruction;
current_instruction = *(u32*)pc;

u32 mask_swap = (0b11111 << 27) | 0b1111111;
u32 expected_swap = (0b00001 << 27) | 0b0101111;
u32 mask_add = (0b11111 << 27) | 0b1111111;
u32 expected_add = (0b00000 << 27) | 0b0101111;
bool is_amoswap = (current_instruction & mask_swap) == expected_swap;
bool is_amoadd = (current_instruction & mask_add) == expected_add;
if (!is_amoswap && !is_amoadd) {
return false;
}

u32 size = (current_instruction >> 12) & 0b111;
ASSERT(size == 0b001 || size == 0b010 || size == 0b011);

u32 rd = (current_instruction >> 7) & 0b11111;
if (is_amoswap && rd != 0) {
return false;
}

u32 rs = (current_instruction >> 20) & 0b11111;
if (is_amoadd && rs != 0) {
return false;
}

// It's an unaligned AMOSWAP used for TSO emulation, replace it with store instruction + fence
u32 address = (current_instruction >> 15) & 0b11111;
if (is_amoadd) {
Assembler cas((u8*)pc + 4, sizeof(u32));
switch (size) {
case 0b001: {
ASSERT(Extensions::Zabha);
cas.LHU(biscuit::GPR(rd), 0, biscuit::GPR(address));
break;
}
case 0b010: {
cas.LWU(biscuit::GPR(rd), 0, biscuit::GPR(address));
break;
}
case 0b011: {
cas.LD(biscuit::GPR(rd), 0, biscuit::GPR(address));
break;
}
default: {
UNREACHABLE();
}
}
Assembler pas((u8*)(pc), sizeof(u32));
pas.FENCE(FenceOrder::RW, FenceOrder::W);
} else {
ASSERT(is_amoswap);
Assembler cas((u8*)pc, sizeof(u32));
switch (size) {
case 0b001: {
ASSERT(Extensions::Zabha);
cas.SH(biscuit::GPR(rs), 0, biscuit::GPR(address));
break;
}
case 0b010: {
cas.SW(biscuit::GPR(rs), 0, biscuit::GPR(address));
break;
}
case 0b011: {
cas.SD(biscuit::GPR(rs), 0, biscuit::GPR(address));
break;
}
default: {
UNREACHABLE();
}
}
Assembler pas((u8*)(pc + 4), sizeof(u32));
pas.FENCE(FenceOrder::RW, FenceOrder::W);
}

flush_icache_global(pc, pc + 8);
return true;
}

bool handle_synchronous(ThreadState* current_state, siginfo_t* info, ucontext_t* context, u64 pc) {
// We can't cause a SIGSEGV SI_KERNEL from RISC-V, so fix up info->si_code to match x86 behavior
if (!is_in_jit_code(current_state, (u8*)pc)) {
return false;
}
Expand Down Expand Up @@ -1209,6 +1296,7 @@ bool handle_synchronous(ThreadState* current_state, siginfo_t* info, ucontext_t*
u64 actual_rip = get_actual_rip(*current_block, pc);

int sig;
// We can't cause a SIGSEGV SI_KERNEL from RISC-V, so fix up info->si_code to match x86 behavior
if (next_instruction == expected_hlt) {
sig = SIGSEGV;
info->si_code = SI_KERNEL;
Expand All @@ -1233,8 +1321,11 @@ bool handle_synchronous(ThreadState* current_state, siginfo_t* info, ucontext_t*
return true;
}

constexpr std::array<RegisteredHostSignal, 6> host_signals = {{
constexpr std::array<RegisteredHostSignal, 7> host_signals = {{
{SIGSEGV, SEGV_ACCERR, handle_safepoint},
// Note: Regrettably this causes the same fault as the SMC handling fault with no way to detect
// However, since we patch the atomics, if a fault happens again it will be properly handled by handle_smc
{SIGSEGV, SEGV_ACCERR, handle_unaligned_tso_atomic},
{SIGSEGV, SEGV_ACCERR, handle_smc},
{SIGSEGV, SEGV_MAPERR, handle_synchronous},
{SIGILL, 0, handle_breakpoint},
Expand Down
96 changes: 79 additions & 17 deletions src/felix86/v2/recompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2867,33 +2867,64 @@ biscuit::GPR Recompiler::getCond(int cond) {
}

void Recompiler::readMemory(biscuit::GPR dest, biscuit::GPR address, i64 offset, x86_size_e size) {
// Warning: Don't change the LBU->LB etc. here, they must zero extend
bool emulate_tso = g_config.always_tso && !Extensions::TSO && !(g_config.no_tso_stack && current_instruction_on_stack && !g_config.paranoid);
bool use_atomics = g_config.aligned_tso_optimizations && offset == 0;
switch (size) {
case X86_SIZE_BYTE: {
as.LBU(dest, offset, address);
use_atomics &= Extensions::Zabha;
if (emulate_tso && use_atomics) {
as.AMOADD_B(Ordering::AQ, dest, x0, address);
as.ANDI(dest, dest, 0xFF);
} else {
as.LBU(dest, offset, address);
if (emulate_tso) {
as.FENCE(FenceOrder::R, FenceOrder::RW);
}
}
break;
}
case X86_SIZE_WORD: {
as.LHU(dest, offset, address);
use_atomics &= Extensions::Zabha;
if (emulate_tso && use_atomics) {
as.AMOADD_H(Ordering::AQ, dest, x0, address);
as.ZEXTH(dest, dest);
} else {
as.LHU(dest, offset, address);
if (emulate_tso) {
as.FENCE(FenceOrder::R, FenceOrder::RW);
}
}
break;
}
case X86_SIZE_DWORD: {
as.LWU(dest, offset, address);
if (emulate_tso && use_atomics) {
as.AMOADD_W(Ordering::AQ, dest, x0, address);
as.ZEXTW(dest, dest);
} else {
as.LWU(dest, offset, address);
if (emulate_tso) {
as.FENCE(FenceOrder::R, FenceOrder::RW);
}
}
break;
}
case X86_SIZE_QWORD: {
as.LD(dest, offset, address);
if (emulate_tso && use_atomics) {
as.AMOADD_D(Ordering::AQ, dest, x0, address);
as.NOP();
} else {
as.LD(dest, offset, address);
if (emulate_tso) {
as.FENCE(FenceOrder::R, FenceOrder::RW);
}
}
break;
}
default: {
UNREACHABLE();
break;
}
}

if (g_config.always_tso && !Extensions::TSO && !(g_config.no_tso_stack && current_instruction_on_stack && !g_config.paranoid)) {
as.FENCE(FenceOrder::R, FenceOrder::RW);
}
}

void Recompiler::readMemory(biscuit::Vec vec, biscuit::GPR address, int size) {
Expand Down Expand Up @@ -2936,25 +2967,56 @@ void Recompiler::readMemory(biscuit::Vec vec, biscuit::GPR address, int size) {
}

void Recompiler::writeMemory(biscuit::GPR src, biscuit::GPR address, i64 offset, x86_size_e size) {
if (g_config.always_tso && !Extensions::TSO && !(g_config.no_tso_stack && current_instruction_on_stack && !g_config.paranoid)) {
as.FENCE(FenceOrder::RW, FenceOrder::W);
}

bool emulate_tso = g_config.always_tso && !Extensions::TSO && !(g_config.no_tso_stack && current_instruction_on_stack && !g_config.paranoid);
bool use_atomics = g_config.aligned_tso_optimizations && offset == 0;
switch (size) {
case X86_SIZE_BYTE: {
as.SB(src, offset, address);
use_atomics &= Extensions::Zabha;
if (emulate_tso && use_atomics) {
as.AMOSWAP_B(Ordering::RL, x0, src, address);
} else {
if (emulate_tso) {
as.FENCE(FenceOrder::RW, FenceOrder::W);
}
as.SB(src, offset, address);
}
break;
}
case X86_SIZE_WORD: {
as.SH(src, offset, address);
use_atomics &= Extensions::Zabha;
if (emulate_tso && use_atomics) {
as.AMOSWAP_H(Ordering::RL, x0, src, address);
as.NOP();
} else {
if (emulate_tso) {
as.FENCE(FenceOrder::RW, FenceOrder::W);
}
as.SH(src, offset, address);
}
break;
}
case X86_SIZE_DWORD: {
as.SW(src, offset, address);
if (emulate_tso && use_atomics) {
as.AMOSWAP_W(Ordering::RL, x0, src, address);
as.NOP();
} else {
if (emulate_tso) {
as.FENCE(FenceOrder::RW, FenceOrder::W);
}
as.SW(src, offset, address);
}
break;
}
case X86_SIZE_QWORD: {
as.SD(src, offset, address);
if (emulate_tso && use_atomics) {
as.AMOSWAP_D(Ordering::RL, x0, src, address);
as.NOP();
} else {
if (emulate_tso) {
as.FENCE(FenceOrder::RW, FenceOrder::W);
}
as.SD(src, offset, address);
}
break;
}
default: {
Expand Down
Loading