Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 115 additions & 2 deletions src/felix86/v2/handlers.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include <cmath>
#include <functional>
#include <Zydis/Zydis.h>
#include "Zydis/DecoderTypes.h"
#include "Zydis/SharedTypes.h"
Expand Down Expand Up @@ -135,7 +134,12 @@ static inline bool AttemptCmpFusing(Recompiler& rec, u64 rip, Assembler& as, Zyd
return false;
}

auto [next_instruction, next_operands] = rec.getNextInstruction();
auto opt = rec.getNextInstruction();
if (!opt.has_value()) {
return false;
}

auto [next_instruction, next_operands] = *opt;
switch (next_instruction->mnemonic) {
case ZYDIS_MNEMONIC_CMOVL: {
biscuit::GPR cond = rec.scratch();
Expand Down Expand Up @@ -271,6 +275,115 @@ static inline bool AttemptCmpFusing(Recompiler& rec, u64 rip, Assembler& as, Zyd
rec.skipNext();
return true;
}
case ZYDIS_MNEMONIC_JL:
case ZYDIS_MNEMONIC_JLE:
case ZYDIS_MNEMONIC_JNL:
case ZYDIS_MNEMONIC_JNLE:
case ZYDIS_MNEMONIC_JB:
case ZYDIS_MNEMONIC_JBE:
case ZYDIS_MNEMONIC_JNB:
case ZYDIS_MNEMONIC_JNBE:
case ZYDIS_MNEMONIC_JZ:
case ZYDIS_MNEMONIC_JNZ: {
// The earlier check confirmed that no flags are needed after this jump, so we can freely fuse instructions here
biscuit::GPR op0 = rec.getGPR(&operands[0]);
biscuit::GPR op1 = rec.getGPR(&operands[1]);
biscuit::GPR lhs, rhs;
bool needs_sext = instruction.operand_width != 64;
// TODO: zero-extend the immediate in op1 when not sign extending and add the below condition
// && (instruction.mnemonic == ZYDIS_MNEMONIC_JL || instruction.mnemonic == ZYDIS_MNEMONIC_JLE ||
// instruction.mnemonic == ZYDIS_MNEMONIC_JNL || instruction.mnemonic == ZYDIS_MNEMONIC_JNLE);
if (needs_sext) {
lhs = rec.scratch();
rhs = rec.scratch();
rec.sext(lhs, op0, rec.zydisToSize(instruction.operand_width));
rec.sext(rhs, op1, rec.zydisToSize(instruction.operand_width));
} else {
lhs = op0;
rhs = op1;
}

if (g_config.auto_compress) {
as.DisableOptimization(Optimization::AutoCompress);
}
u64 immediate = rec.sextImmediate(rec.getImmediate(&next_operands[0]), next_operands[0].imm.size);
u64 rip_false = next_rip + next_instruction->length;
u64 rip_true = rip_false + immediate;
Label true_label;
switch (next_instruction->mnemonic) {
case ZYDIS_MNEMONIC_JL: {
as.BLT(lhs, rhs, &true_label);
break;
}
case ZYDIS_MNEMONIC_JLE: {
as.BLE(lhs, rhs, &true_label);
break;
}
case ZYDIS_MNEMONIC_JNL: {
as.BGE(lhs, rhs, &true_label);
break;
}
case ZYDIS_MNEMONIC_JNLE: {
as.BGT(lhs, rhs, &true_label);
break;
}
case ZYDIS_MNEMONIC_JB: {
as.BLTU(lhs, rhs, &true_label);
break;
}
case ZYDIS_MNEMONIC_JBE: {
as.BLEU(lhs, rhs, &true_label);
break;
}
case ZYDIS_MNEMONIC_JNB: {
as.BGEU(lhs, rhs, &true_label);
break;
}
case ZYDIS_MNEMONIC_JNBE: {
as.BGTU(lhs, rhs, &true_label);
break;
}
case ZYDIS_MNEMONIC_JZ: {
as.BEQ(lhs, rhs, &true_label);
break;
}
case ZYDIS_MNEMONIC_JNZ: {
as.BNE(lhs, rhs, &true_label);
break;
}
default: {
UNREACHABLE();
}
}

biscuit::GPR ripreg = rec.allocatedGPR(X86_REF_RIP);
u64 rip_false_offset = rip_false - rec.getCurrentRipregValue();
rec.addi(ripreg, ripreg, rip_false_offset);
if (g_mode32) {
rec.zext(ripreg, ripreg, X86_SIZE_DWORD);
rip_false = (u32)rip_false;
}

as.AUIPC(t5, 0); // <- must be before link point, see invalidate_caller_thunk
rec.jumpAndLink(rip_false);

as.Bind(&true_label);
u64 rip_true_offset = rip_true - rec.getCurrentRipregValue();
rec.addi(ripreg, ripreg, rip_true_offset);
if (g_mode32) {
rec.zext(ripreg, ripreg, X86_SIZE_DWORD);
rip_true = (u32)rip_true;
}

as.AUIPC(t5, 0); // <- must be before link point, see invalidate_caller_thunk
rec.jumpAndLink(rip_true);
rec.skipNext();
rec.stopCompiling();
if (g_config.auto_compress) {
as.EnableOptimization(Optimization::AutoCompress);
}
return true;
}
default: {
break;
}
Expand Down
23 changes: 13 additions & 10 deletions src/felix86/v2/recompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -715,8 +715,10 @@ u64 Recompiler::compileSequence(u64 rip) {
return rip;
}

std::pair<ZydisDecodedInstruction*, ZydisDecodedOperand*> Recompiler::getNextInstruction() {
ASSERT(instructions.size() > current_instruction_index + 1);
std::optional<std::pair<ZydisDecodedInstruction*, ZydisDecodedOperand*>> Recompiler::getNextInstruction() {
if (current_instruction_index + 1 >= instructions.size()) {
return std::nullopt;
}
auto& [instruction, operands] = instructions[current_instruction_index + 1];
return std::make_pair(&instruction, operands);
}
Expand Down Expand Up @@ -2199,13 +2201,14 @@ void Recompiler::scanAhead(u64 rip) {
// If all the landing places overwrite the flags (1 landing spot for jmp, 2 for jcc)
// then we can skip those flag calculations
if (is_jump && operands[0].type == ZYDIS_OPERAND_TYPE_IMMEDIATE) {
u32 flags_we_care_about =
ZYDIS_CPUFLAG_OF | ZYDIS_CPUFLAG_CF | ZYDIS_CPUFLAG_ZF | ZYDIS_CPUFLAG_SF | ZYDIS_CPUFLAG_AF | ZYDIS_CPUFLAG_PF;
auto scan_landing_block = [&](u64 rip_ahead) {
bool jump_to_self = rip_ahead == initial_rip;
ZydisDecodedInstruction instruction_ahead;
u32 changed_this_block = 0;
u32 used_this_block = 0;
u32 flags_we_care_about =
ZYDIS_CPUFLAG_OF | ZYDIS_CPUFLAG_CF | ZYDIS_CPUFLAG_ZF | ZYDIS_CPUFLAG_SF | ZYDIS_CPUFLAG_AF | ZYDIS_CPUFLAG_PF;

// 10 is heuristically picked with no real reason
// If we go too high we risk messing our performance
// TODO: some benchmarking may be in order
Expand Down Expand Up @@ -2292,27 +2295,27 @@ void Recompiler::scanAhead(u64 rip) {
// If the JCC actually uses the flag, that's fine because the flag access will be after the usage
// so the instruction handler will emit that flag
if (thrashed_ahead & ZYDIS_CPUFLAG_CF) {
flag_access_cpazso[0].push_back({true, rip});
flag_access_cpazso[0].push_back({true, UINT64_MAX});
}

if (thrashed_ahead & ZYDIS_CPUFLAG_PF) {
flag_access_cpazso[1].push_back({true, rip});
flag_access_cpazso[1].push_back({true, UINT64_MAX});
}

if (thrashed_ahead & ZYDIS_CPUFLAG_AF) {
flag_access_cpazso[2].push_back({true, rip});
flag_access_cpazso[2].push_back({true, UINT64_MAX});
}

if (thrashed_ahead & ZYDIS_CPUFLAG_ZF) {
flag_access_cpazso[3].push_back({true, rip});
flag_access_cpazso[3].push_back({true, UINT64_MAX});
}

if (thrashed_ahead & ZYDIS_CPUFLAG_SF) {
flag_access_cpazso[4].push_back({true, rip});
flag_access_cpazso[4].push_back({true, UINT64_MAX});
}

if (thrashed_ahead & ZYDIS_CPUFLAG_OF) {
flag_access_cpazso[5].push_back({true, rip});
flag_access_cpazso[5].push_back({true, UINT64_MAX});
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/felix86/v2/recompiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -702,7 +702,7 @@ struct Recompiler {
current_ripreg_value = value;
}

std::pair<ZydisDecodedInstruction*, ZydisDecodedOperand*> getNextInstruction();
std::optional<std::pair<ZydisDecodedInstruction*, ZydisDecodedOperand*>> getNextInstruction();

private:
struct FlagAccess {
Expand Down
Loading