diff --git a/src/felix86/common/global.cpp b/src/felix86/common/global.cpp index 95db4c293..7a2ab32c3 100644 --- a/src/felix86/common/global.cpp +++ b/src/felix86/common/global.cpp @@ -98,7 +98,7 @@ void ProcessGlobals::initialize() { perf = std::make_unique(); - cas128_lock = 0; + memset(cas128_locks, 0, sizeof(cas128_locks)); // HACK: Don't clear as they get shared per mount namespace // TODO: proper mount namespacing when we need it diff --git a/src/felix86/common/global.hpp b/src/felix86/common/global.hpp index d4d0ec2ea..377a90a11 100644 --- a/src/felix86/common/global.hpp +++ b/src/felix86/common/global.hpp @@ -49,7 +49,7 @@ struct ProcessGlobals { std::unique_ptr perf; // For cmpxchg16b - u32 cas128_lock = 0; + u32 cas128_locks[256]; // TODO: this isn't per CLONE_VM but per mount namespace // But we don't care for now diff --git a/src/felix86/v2/handlers.cpp b/src/felix86/v2/handlers.cpp index 164b2137d..f6f635321 100644 --- a/src/felix86/v2/handlers.cpp +++ b/src/felix86/v2/handlers.cpp @@ -10684,10 +10684,21 @@ FAST_HANDLE(CMPXCHG16B) { biscuit::Label spinloop, writeloop; biscuit::GPR lock_address = rec.scratch(); biscuit::GPR lock = rec.scratch(); - as.LI(lock_address, (u64)&g_process_globals.cas128_lock); + as.LI(lock_address, (u64)&g_process_globals.cas128_locks); + // We will pick one of 256 different spinlocks based on a hash created by our address + // This means that if two cmpxchg16b target the same address they will spin on the same lock + // but if they target a different address they will likely get a different lock, which should decrease + // lock contention + constexpr u32 knuth_hash = 2654435761u; + as.LI(mem1, knuth_hash); + as.SRLI(mem0, address, 4); // shift out low bits since they are 0 to get a better hash + as.MULW(mem0, mem0, mem1); + as.ANDI(mem0, mem0, 0xFF); + static_assert(sizeof(g_process_globals.cas128_locks) == 256 * sizeof(u32)); + as.SH2ADD(lock_address, mem0, lock_address); - as.Bind(&spinloop); as.LI(lock, 1); + as.Bind(&spinloop); as.AMOSWAP_W(Ordering::AQRL, lock, lock, lock_address); as.BNEZ(lock, &spinloop);