diff --git a/src/felix86/common/config.inc b/src/felix86/common/config.inc index 8ca9adcba..8dfe26c53 100644 --- a/src/felix86/common/config.inc +++ b/src/felix86/common/config.inc @@ -28,7 +28,7 @@ X(CPUID, bool, no_avx, false, FELIX86_NO_AVX, "Disable AVX instructions") X(CPUID, bool, no_avx2, false, FELIX86_NO_AVX2, "Disable AVX2 instructions") X(CPUID, bool, no_fma3, false, FELIX86_NO_FMA3, "Disable FMA3 instructions") X(CPUID, bool, no_bmi1, false, FELIX86_NO_BMI1, "Disable BMI1 instructions") -X(CPUID, bool, no_bmi2, true, FELIX86_NO_BMI2, "Disable BMI2 instructions") +X(CPUID, bool, no_bmi2, false, FELIX86_NO_BMI2, "Disable BMI2 instructions") X(CPUID, bool, no_pclmulqdq, false, FELIX86_NO_PCLMULQDQ, "Disable PCLMULQDQ instruction") X(CPUID, std::string, manufacturer_id, "GenuineIntel", FELIX86_MANUFACTURER_ID, "Set a manufacturer ID") X(CPUID, std::string, cpu_name, "", FELIX86_CPU_NAME, "Set a specific CPU name in CPUID, up to 48 characters") diff --git a/src/felix86/common/feature.cpp b/src/felix86/common/feature.cpp index d6d353eb5..cfd1eed5c 100644 --- a/src/felix86/common/feature.cpp +++ b/src/felix86/common/feature.cpp @@ -72,7 +72,7 @@ bool is_feature_enabled(x86_feature feature) { return !g_config.no_bmi1 && is_feature_enabled(x86_feature::AVX); } case x86_feature::BMI2: { - return !g_config.no_bmi2 && is_feature_enabled(x86_feature::BMI1); + return !g_config.no_bmi2 && is_feature_enabled(x86_feature::BMI1) && Extensions::Zbc; } case x86_feature::F16C: { return is_feature_enabled(x86_feature::AVX) && Extensions::Zvfhmin; diff --git a/src/felix86/v2/handlers.cpp b/src/felix86/v2/handlers.cpp index 3250e076c..0eceee32d 100644 --- a/src/felix86/v2/handlers.cpp +++ b/src/felix86/v2/handlers.cpp @@ -7015,6 +7015,99 @@ FAST_HANDLE(MULX) { } } +// The implementations of PEXT and PDEP are adapted from ZP7 +// ZP7 (Zach's Peppy Parallel-Prefix-Popcountin' PEXT/PDEP Polyfill) +// +// Copyright (c) 2020 Zach Wegner +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +FAST_HANDLE(PEXT) { + WARN_ONCE("This program uses PEXT"); + biscuit::GPR data = rec.getGPR(&operands[1], X86_SIZE_QWORD); + biscuit::GPR mask = rec.getGPR(&operands[2]); + biscuit::GPR mask_temp = rec.scratch(); + biscuit::GPR neg2 = rec.scratch(); + biscuit::GPR bit = rec.scratch(); + biscuit::GPR data_temp = rec.scratch(); + biscuit::GPR andn_temp = rec.scratch(); + as.LI(neg2, -2); + as.AND(data_temp, data, mask); + as.NOT(mask_temp, mask); + for (int i = 0; i < 6; i++) { + int shift = 1 << i; + if (i != 5) { + as.CLMUL(bit, mask_temp, neg2); + as.AND(mask_temp, mask_temp, bit); + } else if (i == 5) { + as.NEG(bit, mask_temp); + as.SLLI(bit, bit, 1); + } else { + UNREACHABLE(); + } + as.ANDN(andn_temp, data_temp, bit); + as.AND(bit, data_temp, bit); + as.SRLI(bit, bit, shift); + as.OR(data_temp, bit, andn_temp); + } + rec.setGPR(&operands[0], data_temp); +} + +FAST_HANDLE(PDEP) { + WARN_ONCE("This program uses PDEP"); + biscuit::GPR data = rec.getGPR(&operands[1], X86_SIZE_QWORD); + biscuit::GPR mask = rec.getGPR(&operands[2]); + biscuit::GPR mask_temp = rec.scratch(); + biscuit::GPR neg2 = rec.scratch(); + biscuit::GPR bit = rec.scratch(); + biscuit::GPR data_temp = data; // for the first iteration, avoid a mv + biscuit::GPR andn_temp = rec.scratch(); + as.ADDI(sp, sp, -6 * (int)sizeof(u64)); + as.LI(neg2, -2); + as.NOT(mask_temp, mask); + for (int i = 0; i < 6; i++) { + if (i != 5) { + as.CLMUL(bit, mask_temp, neg2); + as.AND(mask_temp, mask_temp, bit); + } else if (i == 5) { + as.NEG(bit, mask_temp); + as.SLLI(bit, bit, 1); + } else { + UNREACHABLE(); + } + as.SD(bit, i * sizeof(u64), sp); + } + for (int i = 5; i >= 0; i--) { + int shift = 1 << i; + as.LD(bit, i * sizeof(u64), sp); + as.ANDN(andn_temp, data_temp, bit); + as.SLLI(neg2, data_temp, shift); + as.AND(neg2, neg2, bit); + if (data_temp == data) { + data_temp = rec.scratch(); + } + as.OR(data_temp, neg2, andn_temp); + } + as.AND(data_temp, data_temp, mask); + as.ADDI(sp, sp, 6 * (int)sizeof(u64)); + rec.setGPR(&operands[0], data_temp); +} + FAST_HANDLE(BZHI) { biscuit::GPR dst = rec.getGPR(&operands[0], X86_SIZE_QWORD); biscuit::GPR src = rec.getGPR(&operands[1]); diff --git a/src/felix86/v2/handlers.inc b/src/felix86/v2/handlers.inc index 33fc1e72f..c1392b963 100644 --- a/src/felix86/v2/handlers.inc +++ b/src/felix86/v2/handlers.inc @@ -210,6 +210,8 @@ X(SHRX) X(SARX) X(MULX) X(RORX) +X(PEXT) +X(PDEP) X(BZHI) X(CLFLUSH) X(CRC32) diff --git a/tests/FEX/vex.cpp b/tests/FEX/vex.cpp index 32dccd6ac..706c8ca70 100644 --- a/tests/FEX/vex.cpp +++ b/tests/FEX/vex.cpp @@ -35,8 +35,8 @@ VEX_TEST(blsr); VEX_TEST(bzhi); VEX_TEST(mulx); -// VEX_TEST(pdep); -// VEX_TEST(pext); +VEX_TEST(pdep); +VEX_TEST(pext); VEX_TEST(rorx); VEX_TEST(sarx); VEX_TEST(shlx);