From 84a26f9a968b0a9bcd55495a61fd659d0e3e1892 Mon Sep 17 00:00:00 2001 From: Paris Oplopoios <21157395+OFFTKP@users.noreply.github.com> Date: Mon, 1 Jun 2026 10:28:01 +0300 Subject: [PATCH 1/9] . --- src/felix86/common/config.inc | 2 +- src/felix86/common/feature.cpp | 2 +- src/felix86/v2/handlers.cpp | 57 ++++++++++++++++++++++++++++++++++ src/felix86/v2/handlers.inc | 2 ++ tests/FEX/vex.cpp | 4 +-- 5 files changed, 63 insertions(+), 4 deletions(-) diff --git a/src/felix86/common/config.inc b/src/felix86/common/config.inc index 8ca9adcba..8dfe26c53 100644 --- a/src/felix86/common/config.inc +++ b/src/felix86/common/config.inc @@ -28,7 +28,7 @@ X(CPUID, bool, no_avx, false, FELIX86_NO_AVX, "Disable AVX instructions") X(CPUID, bool, no_avx2, false, FELIX86_NO_AVX2, "Disable AVX2 instructions") X(CPUID, bool, no_fma3, false, FELIX86_NO_FMA3, "Disable FMA3 instructions") X(CPUID, bool, no_bmi1, false, FELIX86_NO_BMI1, "Disable BMI1 instructions") -X(CPUID, bool, no_bmi2, true, FELIX86_NO_BMI2, "Disable BMI2 instructions") +X(CPUID, bool, no_bmi2, false, FELIX86_NO_BMI2, "Disable BMI2 instructions") X(CPUID, bool, no_pclmulqdq, false, FELIX86_NO_PCLMULQDQ, "Disable PCLMULQDQ instruction") X(CPUID, std::string, manufacturer_id, "GenuineIntel", FELIX86_MANUFACTURER_ID, "Set a manufacturer ID") X(CPUID, std::string, cpu_name, "", FELIX86_CPU_NAME, "Set a specific CPU name in CPUID, up to 48 characters") diff --git a/src/felix86/common/feature.cpp b/src/felix86/common/feature.cpp index d6d353eb5..cfd1eed5c 100644 --- a/src/felix86/common/feature.cpp +++ b/src/felix86/common/feature.cpp @@ -72,7 +72,7 @@ bool is_feature_enabled(x86_feature feature) { return !g_config.no_bmi1 && is_feature_enabled(x86_feature::AVX); } case x86_feature::BMI2: { - return !g_config.no_bmi2 && is_feature_enabled(x86_feature::BMI1); + return !g_config.no_bmi2 && is_feature_enabled(x86_feature::BMI1) && Extensions::Zbc; } case x86_feature::F16C: { return is_feature_enabled(x86_feature::AVX) && Extensions::Zvfhmin; diff --git a/src/felix86/v2/handlers.cpp b/src/felix86/v2/handlers.cpp index 3250e076c..4c1ab2e42 100644 --- a/src/felix86/v2/handlers.cpp +++ b/src/felix86/v2/handlers.cpp @@ -7015,6 +7015,63 @@ FAST_HANDLE(MULX) { } } +// The implementations of PEXT and PDEP are adapted from ZP7 +// ZP7 (Zach's Peppy Parallel-Prefix-Popcountin' PEXT/PDEP Polyfill) +// +// Copyright (c) 2020 Zach Wegner +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +FAST_HANDLE(PEXT) { + biscuit::GPR data = rec.getGPR(&operands[1]); + biscuit::GPR mask = rec.getGPR(&operands[2]); + biscuit::GPR mask_temp = rec.scratch(); + biscuit::GPR neg2 = rec.scratch(); + biscuit::GPR bit = rec.scratch(); + biscuit::GPR data_temp = rec.scratch(); + biscuit::GPR andn_temp = rec.scratch(); + biscuit::GPR and_temp = rec.scratch(); + as.LI(neg2, -2); + as.AND(data_temp, data, mask); + for (int i = 0; i < 6; i++) { + int shift = 1 << i; + if (i == 0) { + as.CLMUL(bit, mask, neg2); + as.AND(mask_temp, mask, bit); + } else if (i != 5) { + as.CLMUL(bit, mask_temp, neg2); + as.AND(mask_temp, mask_temp, bit); + } else if (i == 5) { + as.NEG(bit, mask_temp); + as.SLLI(bit, bit, 1); + } else { + UNREACHABLE(); + } + as.ANDN(andn_temp, data_temp, bit); + as.AND(and_temp, data_temp, bit); + as.SRLI(and_temp, and_temp, shift); + as.OR(data_temp, and_temp, andn_temp); + } + rec.setGPR(&operands[0], data_temp); +} + +FAST_HANDLE(PDEP) {} + FAST_HANDLE(BZHI) { biscuit::GPR dst = rec.getGPR(&operands[0], X86_SIZE_QWORD); biscuit::GPR src = rec.getGPR(&operands[1]); diff --git a/src/felix86/v2/handlers.inc b/src/felix86/v2/handlers.inc index 33fc1e72f..c1392b963 100644 --- a/src/felix86/v2/handlers.inc +++ b/src/felix86/v2/handlers.inc @@ -210,6 +210,8 @@ X(SHRX) X(SARX) X(MULX) X(RORX) +X(PEXT) +X(PDEP) X(BZHI) X(CLFLUSH) X(CRC32) diff --git a/tests/FEX/vex.cpp b/tests/FEX/vex.cpp index 32dccd6ac..706c8ca70 100644 --- a/tests/FEX/vex.cpp +++ b/tests/FEX/vex.cpp @@ -35,8 +35,8 @@ VEX_TEST(blsr); VEX_TEST(bzhi); VEX_TEST(mulx); -// VEX_TEST(pdep); -// VEX_TEST(pext); +VEX_TEST(pdep); +VEX_TEST(pext); VEX_TEST(rorx); VEX_TEST(sarx); VEX_TEST(shlx); From 7d7611e6dbe6dca3e315d3f1666a6dbb751ab9d6 Mon Sep 17 00:00:00 2001 From: Paris Oplopoios <21157395+OFFTKP@users.noreply.github.com> Date: Mon, 1 Jun 2026 10:32:19 +0300 Subject: [PATCH 2/9] . --- src/felix86/v2/handlers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/felix86/v2/handlers.cpp b/src/felix86/v2/handlers.cpp index 4c1ab2e42..25fe3a445 100644 --- a/src/felix86/v2/handlers.cpp +++ b/src/felix86/v2/handlers.cpp @@ -7038,7 +7038,7 @@ FAST_HANDLE(MULX) { // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. FAST_HANDLE(PEXT) { - biscuit::GPR data = rec.getGPR(&operands[1]); + biscuit::GPR data = rec.getGPR(&operands[1], X86_SIZE_QWORD); biscuit::GPR mask = rec.getGPR(&operands[2]); biscuit::GPR mask_temp = rec.scratch(); biscuit::GPR neg2 = rec.scratch(); From aa704235bd6d6cebd5d1855396ea8d936f6ac269 Mon Sep 17 00:00:00 2001 From: Paris Oplopoios <21157395+OFFTKP@users.noreply.github.com> Date: Mon, 1 Jun 2026 10:37:13 +0300 Subject: [PATCH 3/9] . --- src/felix86/v2/handlers.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/felix86/v2/handlers.cpp b/src/felix86/v2/handlers.cpp index 25fe3a445..4e3b5d4ee 100644 --- a/src/felix86/v2/handlers.cpp +++ b/src/felix86/v2/handlers.cpp @@ -7048,12 +7048,10 @@ FAST_HANDLE(PEXT) { biscuit::GPR and_temp = rec.scratch(); as.LI(neg2, -2); as.AND(data_temp, data, mask); + as.NOT(mask_temp, mask); for (int i = 0; i < 6; i++) { int shift = 1 << i; - if (i == 0) { - as.CLMUL(bit, mask, neg2); - as.AND(mask_temp, mask, bit); - } else if (i != 5) { + if (i != 5) { as.CLMUL(bit, mask_temp, neg2); as.AND(mask_temp, mask_temp, bit); } else if (i == 5) { From d41cbed1003c65d89427a00edb5e7d28382fc239 Mon Sep 17 00:00:00 2001 From: Paris Oplopoios <21157395+OFFTKP@users.noreply.github.com> Date: Mon, 1 Jun 2026 10:42:23 +0300 Subject: [PATCH 4/9] . --- src/felix86/v2/handlers.cpp | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/src/felix86/v2/handlers.cpp b/src/felix86/v2/handlers.cpp index 4e3b5d4ee..4645624c1 100644 --- a/src/felix86/v2/handlers.cpp +++ b/src/felix86/v2/handlers.cpp @@ -7068,7 +7068,39 @@ FAST_HANDLE(PEXT) { rec.setGPR(&operands[0], data_temp); } -FAST_HANDLE(PDEP) {} +FAST_HANDLE(PDEP) { + biscuit::GPR data = rec.getGPR(&operands[1], X86_SIZE_QWORD); + biscuit::GPR mask = rec.getGPR(&operands[2]); + biscuit::GPR mask_temp = rec.scratch(); + biscuit::GPR neg2 = rec.scratch(); + biscuit::GPR bit = rec.scratch(); + biscuit::GPR data_temp = data; // for the first iteration, avoid a mv + biscuit::GPR andn_temp = rec.scratch(); + biscuit::GPR and_temp = rec.scratch(); + as.LI(neg2, -2); + as.NOT(mask_temp, mask); + for (int i = 0; i < 6; i++) { + int shift = 1 << i; + if (i != 5) { + as.CLMUL(bit, mask_temp, neg2); + as.AND(mask_temp, mask_temp, bit); + } else if (i == 5) { + as.NEG(bit, mask_temp); + as.SLLI(bit, bit, 1); + } else { + UNREACHABLE(); + } + as.ANDN(andn_temp, data_temp, bit); + as.SLLI(and_temp, data_temp, shift); + as.AND(and_temp, and_temp, bit); + if (data_temp == data) { + data_temp = rec.scratch(); + } + as.OR(data_temp, and_temp, andn_temp); + } + as.AND(data_temp, data_temp, mask); + rec.setGPR(&operands[0], data_temp); +} FAST_HANDLE(BZHI) { biscuit::GPR dst = rec.getGPR(&operands[0], X86_SIZE_QWORD); From f5ff1bd0faf7aaf783d55f60198024fa472ca25f Mon Sep 17 00:00:00 2001 From: Paris Oplopoios <21157395+OFFTKP@users.noreply.github.com> Date: Mon, 1 Jun 2026 10:45:17 +0300 Subject: [PATCH 5/9] . --- src/felix86/v2/handlers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/felix86/v2/handlers.cpp b/src/felix86/v2/handlers.cpp index 4645624c1..e03ddba31 100644 --- a/src/felix86/v2/handlers.cpp +++ b/src/felix86/v2/handlers.cpp @@ -7079,7 +7079,7 @@ FAST_HANDLE(PDEP) { biscuit::GPR and_temp = rec.scratch(); as.LI(neg2, -2); as.NOT(mask_temp, mask); - for (int i = 0; i < 6; i++) { + for (int i = 5; i >= 0; i--) { int shift = 1 << i; if (i != 5) { as.CLMUL(bit, mask_temp, neg2); From 3f476a783ddc8f32e4ce0b1b7ab68f33e22e1701 Mon Sep 17 00:00:00 2001 From: Paris Oplopoios <21157395+OFFTKP@users.noreply.github.com> Date: Mon, 1 Jun 2026 10:52:58 +0300 Subject: [PATCH 6/9] . --- src/felix86/v2/handlers.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/felix86/v2/handlers.cpp b/src/felix86/v2/handlers.cpp index e03ddba31..a119652b8 100644 --- a/src/felix86/v2/handlers.cpp +++ b/src/felix86/v2/handlers.cpp @@ -7077,10 +7077,10 @@ FAST_HANDLE(PDEP) { biscuit::GPR data_temp = data; // for the first iteration, avoid a mv biscuit::GPR andn_temp = rec.scratch(); biscuit::GPR and_temp = rec.scratch(); + as.ADDI(sp, sp, -6 * (int)sizeof(u64)); as.LI(neg2, -2); as.NOT(mask_temp, mask); - for (int i = 5; i >= 0; i--) { - int shift = 1 << i; + for (int i = 0; i < 6; i++) { if (i != 5) { as.CLMUL(bit, mask_temp, neg2); as.AND(mask_temp, mask_temp, bit); @@ -7090,6 +7090,11 @@ FAST_HANDLE(PDEP) { } else { UNREACHABLE(); } + as.SD(bit, i * sizeof(u64), sp); + } + for (int i = 5; i >= 0; i--) { + int shift = 1 << i; + as.LD(bit, i * sizeof(u64), sp); as.ANDN(andn_temp, data_temp, bit); as.SLLI(and_temp, data_temp, shift); as.AND(and_temp, and_temp, bit); @@ -7099,6 +7104,7 @@ FAST_HANDLE(PDEP) { as.OR(data_temp, and_temp, andn_temp); } as.AND(data_temp, data_temp, mask); + as.ADDI(sp, sp, 6 * (int)sizeof(u64)); rec.setGPR(&operands[0], data_temp); } From 36db0bb80ccfe333558ffb587efb8b08eb94df2a Mon Sep 17 00:00:00 2001 From: Paris Oplopoios <21157395+OFFTKP@users.noreply.github.com> Date: Mon, 1 Jun 2026 10:57:11 +0300 Subject: [PATCH 7/9] . --- src/felix86/v2/handlers.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/felix86/v2/handlers.cpp b/src/felix86/v2/handlers.cpp index a119652b8..15d9013ca 100644 --- a/src/felix86/v2/handlers.cpp +++ b/src/felix86/v2/handlers.cpp @@ -7038,6 +7038,7 @@ FAST_HANDLE(MULX) { // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. FAST_HANDLE(PEXT) { + WARN_ONCE("This program uses PEXT"); biscuit::GPR data = rec.getGPR(&operands[1], X86_SIZE_QWORD); biscuit::GPR mask = rec.getGPR(&operands[2]); biscuit::GPR mask_temp = rec.scratch(); @@ -7069,6 +7070,7 @@ FAST_HANDLE(PEXT) { } FAST_HANDLE(PDEP) { + WARN_ONCE("This program uses PDEP"); biscuit::GPR data = rec.getGPR(&operands[1], X86_SIZE_QWORD); biscuit::GPR mask = rec.getGPR(&operands[2]); biscuit::GPR mask_temp = rec.scratch(); From aebcd2313b94012aab3a9fc93b25e1fb0ca91f56 Mon Sep 17 00:00:00 2001 From: Paris Oplopoios <21157395+OFFTKP@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:11:38 +0300 Subject: [PATCH 8/9] . --- src/felix86/v2/handlers.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/felix86/v2/handlers.cpp b/src/felix86/v2/handlers.cpp index 15d9013ca..b9d332e11 100644 --- a/src/felix86/v2/handlers.cpp +++ b/src/felix86/v2/handlers.cpp @@ -7046,7 +7046,6 @@ FAST_HANDLE(PEXT) { biscuit::GPR bit = rec.scratch(); biscuit::GPR data_temp = rec.scratch(); biscuit::GPR andn_temp = rec.scratch(); - biscuit::GPR and_temp = rec.scratch(); as.LI(neg2, -2); as.AND(data_temp, data, mask); as.NOT(mask_temp, mask); @@ -7078,7 +7077,6 @@ FAST_HANDLE(PDEP) { biscuit::GPR bit = rec.scratch(); biscuit::GPR data_temp = data; // for the first iteration, avoid a mv biscuit::GPR andn_temp = rec.scratch(); - biscuit::GPR and_temp = rec.scratch(); as.ADDI(sp, sp, -6 * (int)sizeof(u64)); as.LI(neg2, -2); as.NOT(mask_temp, mask); @@ -7098,12 +7096,12 @@ FAST_HANDLE(PDEP) { int shift = 1 << i; as.LD(bit, i * sizeof(u64), sp); as.ANDN(andn_temp, data_temp, bit); - as.SLLI(and_temp, data_temp, shift); - as.AND(and_temp, and_temp, bit); + as.SLLI(neg2, data_temp, shift); + as.AND(neg2, neg2, bit); if (data_temp == data) { data_temp = rec.scratch(); } - as.OR(data_temp, and_temp, andn_temp); + as.OR(data_temp, neg2, andn_temp); } as.AND(data_temp, data_temp, mask); as.ADDI(sp, sp, 6 * (int)sizeof(u64)); From b038f3a4ff2038b8cc1b1fedea6d518fcb060b3a Mon Sep 17 00:00:00 2001 From: Paris Oplopoios <21157395+OFFTKP@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:11:55 +0300 Subject: [PATCH 9/9] . --- src/felix86/v2/handlers.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/felix86/v2/handlers.cpp b/src/felix86/v2/handlers.cpp index b9d332e11..0eceee32d 100644 --- a/src/felix86/v2/handlers.cpp +++ b/src/felix86/v2/handlers.cpp @@ -7061,9 +7061,9 @@ FAST_HANDLE(PEXT) { UNREACHABLE(); } as.ANDN(andn_temp, data_temp, bit); - as.AND(and_temp, data_temp, bit); - as.SRLI(and_temp, and_temp, shift); - as.OR(data_temp, and_temp, andn_temp); + as.AND(bit, data_temp, bit); + as.SRLI(bit, bit, shift); + as.OR(data_temp, bit, andn_temp); } rec.setGPR(&operands[0], data_temp); }