Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions counts/Base.json
Original file line number Diff line number Diff line change
Expand Up @@ -11607,13 +11607,12 @@
"disassembly": "mul [rdi]"
},
"48f727": {
"instruction_count": 6,
"instruction_count": 5,
"expected_asm": [
"LD ra, a0, 0x0(0)",
"MULHU t3, t0, ra",
"MULHU a2, t0, ra",
"MUL t0, t0, ra",
"ADDI a2, t3, 0x0(0)",
"SLTU s5, zero, t3",
"SLTU s5, zero, a2",
"ADDI s9, s5, 0x0(0)"
],
"disassembly": "mul [rdi]"
Expand Down
7 changes: 3 additions & 4 deletions counts/Base_NoFlags.json
Original file line number Diff line number Diff line change
Expand Up @@ -5585,12 +5585,11 @@
"disassembly": "mul [rdi]"
},
"48f727": {
"instruction_count": 4,
"instruction_count": 3,
"expected_asm": [
"LD ra, a0, 0x0(0)",
"MULHU t3, t0, ra",
"MUL t0, t0, ra",
"ADDI a2, t3, 0x0(0)"
"MULHU a2, t0, ra",
"MUL t0, t0, ra"
],
"disassembly": "mul [rdi]"
},
Expand Down
6 changes: 5 additions & 1 deletion external/biscuit/include/biscuit/literal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ namespace biscuit {
* as.JR(x2); // Execution continues elsewhere
* as.Place(&literal); // Place the literal at this location in the buffer
* @endcode
*/
*/
template<class T>
class Literal {
public:
Expand Down Expand Up @@ -104,6 +104,10 @@ class Literal {
return m_location;
}

[[nodiscard]] T GetValue() const noexcept {
return m_value;
}

private:
// A literal instance is inherently bound to the assembler it's
// used with, as the offsets within the literal set depend on
Expand Down
1 change: 1 addition & 0 deletions src/felix86/common/config.inc
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,4 @@ X(Performance, bool, auto_compress, false, FELIX86_AUTO_COMPRESS, "Automatically
X(Performance, bool, scan_ahead_multi, true, FELIX86_SCAN_AHEAD_MULTI, "Scan ahead to multiple blocks when possible, avoiding even more flag calculations", false)
X(Performance, bool, pclmulqdq, true, FELIX86_PCLMULQDQ, "Enable the PCLMULQDQ instruction, might improve performance in some applications", false)
X(Performance, bool, no_address_overflow, true, FELIX86_NO_ADDRESS_OVERFLOW, "Assume addresses won't overflow in 32-bit apps, which allows for some optimizations", false)
X(Performance, bool, literal_pooling, true, FELIX86_LITERAL_POOLING, "Place 64-bit immediates in a literal pool after the block", false)
1 change: 1 addition & 0 deletions src/felix86/repl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ void __attribute__((noreturn)) enter_repl() {
g_config.quiet = true;
g_config.inline_syscalls = false;
g_config.scan_ahead_multi = false;
g_config.literal_pooling = false;
Extensions::G = true;
Extensions::B = true;
Extensions::C = true;
Expand Down
1 change: 1 addition & 0 deletions src/felix86/tools/generate_instruction_count.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ u8 outlast_camera2[] = {
int main() {
g_config.inline_syscalls = false;
g_config.scan_ahead_multi = false;
g_config.literal_pooling = false;
Extensions::G = true;
Extensions::B = true;
Extensions::C = true;
Expand Down
13 changes: 11 additions & 2 deletions src/felix86/v2/handlers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,15 @@ FAST_HANDLE(MOV) {
bool not_same = rec.zydisToRef(operands[0].reg.value) != rec.zydisToRef(operands[1].reg.value);
bool mem_reg = operands[0].type == ZYDIS_OPERAND_TYPE_MEMORY && operands[1].type == ZYDIS_OPERAND_TYPE_REGISTER;
bool reg_mem = operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && operands[1].type == ZYDIS_OPERAND_TYPE_MEMORY;
if (not_same && reg_reg) {
bool reg_imm64 = g_config.literal_pooling && operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && operands[0].size == 64 &&
operands[1].type == ZYDIS_OPERAND_TYPE_IMMEDIATE && operands[1].size == 64;
if (reg_imm64) {
u64 immediate = operands[1].imm.value.u;
Literal<u64>* literal = rec.pushPendingLiteral(immediate);
biscuit::GPR reg = rec.getGPR(&operands[0]);
as.LD(reg, literal);
rec.setGPR(&operands[0], reg);
} else if (not_same && reg_reg) {
// Save a mask by doing it this way
biscuit::GPR src = rec.getGPR(&operands[1], X86_SIZE_QWORD);
if (rec.zydisToSize(operands[1].reg.value) == X86_SIZE_BYTE_HIGH) {
Expand Down Expand Up @@ -3716,7 +3724,8 @@ FAST_HANDLE(MUL) {
break;
}
case X86_SIZE_QWORD: {
biscuit::GPR result = rec.scratch();
bool is_src_rdx = operands[0].type == ZYDIS_OPERAND_TYPE_REGISTER && rec.zydisToRef(operands[0].reg.value) == X86_REF_RDX;
biscuit::GPR result = is_src_rdx ? rec.scratch() : rec.getGPR(X86_REF_RDX, X86_SIZE_QWORD);
biscuit::GPR rax = rec.getGPR(X86_REF_RAX, X86_SIZE_QWORD);
as.MULHU(result, rax, src);
as.MUL(rax, rax, src);
Expand Down
12 changes: 12 additions & 0 deletions src/felix86/v2/recompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,9 @@ u64 Recompiler::compile(ThreadState* state, u64 rip) {

u64 end = (u64)as.GetCursorPointer();

// Place literal pool after the block
expirePendingLiterals();

ASSERT(end - start >= 8); // At least 2 instructions, so that our unlinking logic works

host_pc_map[block_meta.address_end - 1] = &block_meta;
Expand Down Expand Up @@ -2728,6 +2731,15 @@ void Recompiler::expirePendingLinks(u64 rip) {
block_meta.pending_links.clear();
}

void Recompiler::expirePendingLiterals() {
if (g_config.literal_pooling) {
for (auto& literal : pending_literals) {
as.Place(&literal);
}
pending_literals.clear();
}
}

u64 Recompiler::zextImmediate(u64 imm, ZyanU8 size) {
switch (size) {
case 8: {
Expand Down
16 changes: 16 additions & 0 deletions src/felix86/v2/recompiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,20 @@ struct Recompiler {

std::pair<ZydisDecodedInstruction*, ZydisDecodedOperand*> getNextInstruction();

biscuit::Literal<u64>* pushPendingLiteral(u64 value) {
for (auto& item : pending_literals) {
if (item.GetValue() == value) {
// Literal already exists, don't push it again
return &item;
}
}

pending_literals.push_back(biscuit::Literal<u64>{value});
return &pending_literals.back();
}

void expirePendingLiterals();

private:
struct FlagAccess {
bool modification; // true if modified, false if used
Expand Down Expand Up @@ -761,6 +775,8 @@ struct Recompiler {

bool relocatable = false;

std::vector<biscuit::Literal<u64>> pending_literals;

constexpr static std::array scratch_gprs = {
x1, x6, x28, x29, x7, x30, x31,
};
Expand Down
Loading