From 82a3e1bbc1c449f0ee7a96bc238ee3b80109455f Mon Sep 17 00:00:00 2001 From: Borys Date: Mon, 1 Jun 2026 11:26:47 +0300 Subject: [PATCH] refactor: add more SIMD into OAHSet --- src/core/CMakeLists.txt | 2 +- src/core/oah_set.cc | 184 ++++++++++++++++++++++++++ src/core/oah_set.h | 278 +++++++-------------------------------- src/core/oah_set_test.cc | 71 ++++++++++ src/core/simd_op.h | 23 ++-- 5 files changed, 314 insertions(+), 244 deletions(-) create mode 100644 src/core/oah_set.cc diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 9af042538a62..6a85394f67ff 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -29,7 +29,7 @@ add_library(dfly_core allocation_tracker.cc bloom.cc topk.cc compact_object.cc c segment_allocator.cc score_map.cc small_string.cc sorted_map.cc stream_node.cc task_queue.cc tx_queue.cc string_set.cc string_map.cc tiering_types.cc top_keys.cc detail/bitpacking.cc detail/listpack_wrap.cc detail/listpack.cc - oah_entry.cc) + oah_entry.cc oah_set.cc) cxx_link(dfly_core base dfly_search_core dfly_page_usage fibers2 jsonpath absl::flat_hash_map absl::str_format absl::random_random redis_lib diff --git a/src/core/oah_set.cc b/src/core/oah_set.cc new file mode 100644 index 000000000000..2cdea48fb2f0 --- /dev/null +++ b/src/core/oah_set.cc @@ -0,0 +1,184 @@ +// Copyright 2024, DragonflyDB authors. All rights reserved. +// See LICENSE for licensing terms. +// + +#include "core/oah_set.h" + +#include + +#include "base/logging.h" + +namespace dfly { + +// Several definitions below are `inline FORCE_INLINE`: the inline keyword makes +// them COMDAT (non-interposable), which is what lets always_inline apply to an +// out-of-line member, so they fold into their in-TU callers (notably AddImpl into +// AddMany's bulk-insert loop, and FindMatch into both AddImpl and FindInternal). + +template +inline FORCE_INLINE OAHSet::LaneMasks OAHSet::ProbeLanes(const OAHEntry* base, + uint64_t ext_hash) noexcept { + auto data_v = Wide::Load(reinterpret_cast(base)); + auto hash_v = (data_v & Wide::Fill(OAHEntry::kExtHashShiftedMask)) >> OAHEntry::kExtHashShift; + // ~is_empty stops an empty lane's zero hash from aliasing a hash/lazy-zero match. + auto is_empty = data_v == uint64_t(0); + auto candidate = ((hash_v == ext_hash) | (hash_v == uint64_t(0))) & ~is_empty; + return {candidate.GetMSBs(), is_empty.GetMSBs()}; +} + +inline FORCE_INLINE void OAHSet::RefreshStaleCandidate(OAHEntry& e, uint64_t ext_hash) { + if (e.GetHash() != ext_hash) + e.SetExtHash(CalcExtHash(Hash(e.Key()), capacity_log_)); + e.ExpireIfNeeded(time_now_, &size_, &obj_alloc_used_); +} + +// 2-lane SIMD strides; the vector's size is a power of 2, >= 2. +inline FORCE_INLINE OAHEntry* OAHSet::ProbeExtensionVector(uint32_t ext_bid, std::string_view str, + uint64_t ext_hash) { + auto& vec = entries_[ext_bid].AsVector(); + auto* raw_arr = vec.Raw(); + const size_t size = vec.Size(); + DCHECK_GE(size, size_t(kVectorLaneStep)); + DCHECK(std::has_single_bit(size)); + + for (size_t base = 0; base < size; base += kVectorLaneStep) { + auto cand_bits = ProbeLanes(&raw_arr[base], ext_hash).candidates; + while (cand_bits) { + const uint32_t j = std::countr_zero(cand_bits); + cand_bits &= cand_bits - 1; + OAHEntry& re = raw_arr[base + j]; + if (re.Key() != str) { + RefreshStaleCandidate(re, ext_hash); + continue; + } + re.ExpireIfNeeded(time_now_, &size_, &obj_alloc_used_); + return &re; + } + } + return nullptr; +} + +// Scans the displacement window then the extension vector for `str`. entries_ +// spans (1 << capacity_log_) + kDisplacementSize - 1 with bid < (1 << capacity_log_), +// so the window read stays in bounds. +inline FORCE_INLINE OAHSet::MatchResult OAHSet::FindMatch(uint32_t bid, uint32_t ext_bid, + uint32_t cand_bits, std::string_view str, + uint64_t ext_hash) { + while (cand_bits) { + const uint32_t i = std::countr_zero(cand_bits); + cand_bits &= cand_bits - 1; + const uint32_t bucket_id = bid + i; + OAHEntry& e = entries_[bucket_id]; + if (e.IsVector()) // vectors live only at the extension point + continue; + if (e.Key() != str) { + RefreshStaleCandidate(e, ext_hash); + continue; + } + e.ExpireIfNeeded(time_now_, &size_, &obj_alloc_used_); + return {&e, bucket_id, 0}; + } + if (entries_[ext_bid].IsVector()) { + if (OAHEntry* hit = ProbeExtensionVector(ext_bid, str, ext_hash)) + return {hit, ext_bid, static_cast(hit - entries_[ext_bid].AsVector().Raw())}; + } + return {nullptr, 0, 0}; +} + +inline FORCE_INLINE bool OAHSet::AddImpl(std::string_view str, uint32_t ttl_sec) { + if (size_ >= entries_.size()) [[unlikely]] { + Reserve(BucketCount() * 2); + } + DCHECK_GE(Capacity(), kDisplacementSize); + + uint64_t hash = Hash(str); + auto bucket_id = BucketId(hash, capacity_log_); + PREFETCH_READ(entries_.data() + bucket_id); + + const ssize_t mem_before = zmalloc_used_memory_tl; + OAHEntry entry(str, EntryTTL(ttl_sec)); + if (ttl_sec != UINT32_MAX) + expiration_used_ = true; + const size_t entry_alloc_size = zmalloc_used_memory_tl - mem_before; + + const uint32_t ext_bid = GetExtensionPoint(bucket_id); + PREFETCH_READ(entries_[ext_bid].Raw()); + + const uint64_t ext_hash = CalcExtHash(hash, capacity_log_); + entry.SetExtHash(ext_hash); + + const LaneMasks masks = ProbeLanes(&entries_[bucket_id], ext_hash); + const MatchResult m = FindMatch(bucket_id, ext_bid, masks.candidates, str, ext_hash); + if (m.matched && !m.matched->Empty()) + return false; + + obj_alloc_used_ += entry_alloc_size; + ++size_; + // Place it: reuse an expired duplicate's slot, else a free window lane, else + // spill into the extension vector. + if (m.matched) { + *m.matched = std::move(entry); + } else if (masks.empties) { + entries_[bucket_id + std::countr_zero(masks.empties)] = std::move(entry); + } else { + ptr_vectors_alloc_used_ += entries_[ext_bid].Insert(std::move(entry)); + } + return true; +} + +bool OAHSet::Add(std::string_view str, uint32_t ttl_sec) { + return AddImpl(str, ttl_sec); +} + +unsigned OAHSet::AddMany(absl::Span span, uint32_t ttl_sec, bool keepttl) { + Reserve(span.size()); + unsigned res = 0; + const bool has_ttl = ttl_sec != UINT32_MAX; + for (auto& s : span) { + if (AddImpl(s, ttl_sec)) { + ++res; + } else if (has_ttl && !keepttl) { + auto it = Find(s); + if (it != end()) + it.SetExpiryTime(ttl_sec); + } + } + return res; +} + +inline FORCE_INLINE OAHSet::iterator OAHSet::FindInternal(uint32_t bid, std::string_view str, + uint64_t hash) { + const uint64_t ext_hash = CalcExtHash(hash, capacity_log_); + const uint32_t cand_bits = ProbeLanes(&entries_[bid], ext_hash).candidates; + const MatchResult m = FindMatch(bid, GetExtensionPoint(bid), cand_bits, str, ext_hash); + if (m.matched && !m.matched->Empty()) // empty => matched but just expired, i.e. gone + return iterator{this, m.bucket_id, m.pos_in_vec}; + return end(); +} + +OAHSet::iterator OAHSet::Find(std::string_view member) { + if (entries_.empty()) + return end(); + uint64_t hash = Hash(member); + return FindInternal(BucketId(hash, capacity_log_), member, hash); +} + +bool OAHSet::Erase(std::string_view str) { + if (entries_.empty()) + return false; + uint64_t hash = Hash(str); + auto item = FindInternal(BucketId(hash, capacity_log_), str, hash); + if (item == end()) + return false; + --size_; + obj_alloc_used_ -= item->AllocSize(); + *item = OAHEntry(); + uint32_t erase_bucket = item.bucket_id(); + if (entries_[erase_bucket].IsVector() && entries_[erase_bucket].AsVector().Empty()) { + ptr_vectors_alloc_used_ -= entries_[erase_bucket].AsVector().AllocSize(); + entries_[erase_bucket] = OAHEntry(); + } + return true; +} + +} // namespace dfly diff --git a/src/core/oah_set.h b/src/core/oah_set.h index acb05f92dfe3..683ad6687d32 100644 --- a/src/core/oah_set.h +++ b/src/core/oah_set.h @@ -164,97 +164,25 @@ class OAHSet { // Open Addressing Hash Set static constexpr std::uint32_t kVectorLaneStep = 2; using VectorWide = SimdOp; - explicit OAHSet() = default; - - bool Add(std::string_view str, uint32_t ttl_sec = UINT32_MAX) { - // Bootstrap or grow before any bucket math: on first Add capacity_log_==0 - // and entries_.data()==nullptr, so computing BucketId or prefetching - // would be UB (shift-by-64 + null deref-via-offset). - if (size_ >= entries_.size()) [[unlikely]] { - Reserve(BucketCount() * 2); - } - assert(Capacity() >= kDisplacementSize); - - uint64_t hash = Hash(str); - auto bucket_id = BucketId(hash, capacity_log_); - PREFETCH_READ(entries_.data() + bucket_id); - - // Build the entry between the bucket prefetch and the SIMD probe so - // zmalloc + memcpy overlap with the cacheline fetch. On a duplicate - // hit below, ~OAHEntry frees this allocation on return. - // entry_alloc_size is read off zmalloc's tl counter, which zmalloc - // already updates internally, avoiding a second mi_usable_size call. - const ssize_t mem_before = zmalloc_used_memory_tl; - OAHEntry entry(str, EntryTTL(ttl_sec)); - - if (ttl_sec != UINT32_MAX) - expiration_used_ = true; - - const size_t entry_alloc_size = zmalloc_used_memory_tl - mem_before; - - const uint32_t ext_bid = GetExtensionPoint(bucket_id); - PREFETCH_READ(entries_[ext_bid].Raw()); - - const uint64_t ext_hash = CalcExtHash(hash, capacity_log_); - entry.SetExtHash(ext_hash); - - auto data_v = EntryWide::Load(reinterpret_cast(&entries_[bucket_id])); - auto hash_v = - (data_v & EntryWide::Fill(OAHEntry::kExtHashShiftedMask)) >> OAHEntry::kExtHashShift; - - // !is_empty guards an empty lane's zero hash_v from aliasing a hash - // match when ext_hash==0 or the lazy-init (stored==0) branch. - auto is_empty = data_v == uint64_t(0); - auto candidate = ((hash_v == ext_hash) | (hash_v == uint64_t(0))) & ~is_empty; - - OAHEntry* reuse_slot = nullptr; - - auto cand_bits = candidate.GetMSBs(); - while (cand_bits) { - const uint32_t i = std::countr_zero(cand_bits); - cand_bits &= cand_bits - 1; - - OAHEntry& e = entries_[bucket_id + i]; - if (e.IsVector()) - continue; - if (e.Key() != str) { // after rehash, the pointer can miss hash so we need to set it for - // better performance - if (e.GetHash() != ext_hash) { - e.SetExtHash(CalcExtHash(Hash(e.Key()), capacity_log_)); - } - e.ExpireIfNeeded(time_now_, &size_, &obj_alloc_used_); - continue; - } - e.ExpireIfNeeded(time_now_, &size_, &obj_alloc_used_); - if (!e.Empty()) - return false; - reuse_slot = &e; - break; - } - - if (reuse_slot == nullptr && entries_[ext_bid].IsVector()) { - if (OAHEntry* hit = ProbeExtensionVector(ext_bid, str, ext_hash)) { - if (!hit->Empty()) - return false; - reuse_slot = hit; - } - } + // Result of a SIMD probe over a run of OAHEntry lanes. + struct LaneMasks { + // Non-empty lanes whose stored ext-hash matches or is lazily zero: key-compare + // candidates the caller confirms by key. + uint32_t candidates; + uint32_t empties; // empty lanes (data_ == 0); Add uses these to pick a slot. + }; - obj_alloc_used_ += entry_alloc_size; - ++size_; + // Vectorized hash probe over Wide::kLanes consecutive OAHEntry lanes from + // `base`. Backs the displacement-window (EntryWide) and extension-vector + // (VectorWide) scans. + template + static LaneMasks ProbeLanes(const OAHEntry* base, uint64_t ext_hash) noexcept; - if (reuse_slot) { - *reuse_slot = std::move(entry); - return true; - } + explicit OAHSet() = default; - if (auto empty_bits = is_empty.GetMSBs(); empty_bits) { - entries_[bucket_id + std::countr_zero(empty_bits)] = std::move(entry); - } else { - ptr_vectors_alloc_used_ += entries_[ext_bid].Insert(std::move(entry)); - } - return true; - } + // Inserts `str` (optional TTL); returns false if already present. Thin + // out-of-line entry point over the FORCE_INLINE AddImpl. + bool Add(std::string_view str, uint32_t ttl_sec = UINT32_MAX); void Reserve(size_t sz) { sz = absl::bit_ceil(sz); @@ -329,65 +257,14 @@ class OAHSet { // Open Addressing Hash Set return end; } - // Walk the vector at ext_bid (vectors live only at the extension point) - // looking for str. Returns nullptr if not found. Returns a pointer if found: - // caller checks .Empty() to distinguish "live match" (return false from Add) - // from "match-but-expired" (slot is now vacant; reuse it for the new entry). - // - // Vectors have power-of-2 capacity with minimum 2, so we sweep in 2-lane - // SIMD strides. - OAHEntry* ProbeExtensionVector(uint32_t ext_bid, std::string_view str, uint64_t ext_hash) { - auto& vec = entries_[ext_bid].AsVector(); - auto* raw_arr = vec.Raw(); - const size_t size = vec.Size(); - assert(size >= kVectorLaneStep && std::has_single_bit(size)); - - for (size_t base = 0; base < size; base += kVectorLaneStep) { - auto data_v = VectorWide::Load(reinterpret_cast(&raw_arr[base])); - auto hash_v = - (data_v & VectorWide::Fill(OAHEntry::kExtHashShiftedMask)) >> OAHEntry::kExtHashShift; - auto is_empty = data_v == uint64_t(0); - auto candidate = ((hash_v == ext_hash) | (hash_v == uint64_t(0))) & ~is_empty; - - auto cand_bits = candidate.GetMSBs(); - while (cand_bits) { - const uint32_t j = std::countr_zero(cand_bits); - cand_bits &= cand_bits - 1; - - OAHEntry& re = raw_arr[base + j]; - if (re.Key() != str) { // after rehash, the pointer can miss hash so we need to set it for - // better performance - if (re.GetHash() != ext_hash) { - re.SetExtHash(CalcExtHash(Hash(re.Key()), capacity_log_)); - } - re.ExpireIfNeeded(time_now_, &size_, &obj_alloc_used_); - continue; - } - re.ExpireIfNeeded(time_now_, &size_, &obj_alloc_used_); - return &re; - } - } - return nullptr; - } + // Searches the extension-point vector for `str`. Returns the matched slot + // (possibly now-empty after expiry, which the caller reuses) or nullptr. + OAHEntry* ProbeExtensionVector(uint32_t ext_bid, std::string_view str, uint64_t ext_hash); // keepttl=true: existing entries are left alone (current/legacy behavior). // keepttl=false: when ttl_sec is set, existing entries' expiry is updated to ttl_sec. unsigned AddMany(absl::Span span, uint32_t ttl_sec = UINT32_MAX, - bool keepttl = true) { - Reserve(span.size()); - unsigned res = 0; - const bool has_ttl = ttl_sec != UINT32_MAX; - for (auto& s : span) { - if (Add(s, ttl_sec)) { - ++res; - } else if (has_ttl && !keepttl) { - auto it = Find(s); - if (it != end()) - it.SetExpiryTime(ttl_sec); - } - } - return res; - } + bool keepttl = true); // TODO: Consider using chunks for this as in StringSet void Fill(OAHSet* other) { @@ -458,52 +335,9 @@ class OAHSet { // Open Addressing Hash Set return {}; } - bool Erase(std::string_view str) { - if (entries_.empty()) - return false; + bool Erase(std::string_view str); - uint64_t hash = Hash(str); - auto bucket_id = BucketId(hash, capacity_log_); - auto item = FindInternal(bucket_id, str, hash); - if (item != end()) { - --size_; - obj_alloc_used_ -= item->AllocSize(); - *item = OAHEntry(); - uint32_t erase_bucket = item.bucket_id(); - if (entries_[erase_bucket].IsVector()) { - if (entries_[erase_bucket].AsVector().Empty()) { - ptr_vectors_alloc_used_ -= entries_[erase_bucket].AsVector().AllocSize(); - entries_[erase_bucket] = OAHEntry(); - } - } - return true; - } - return false; - } - - iterator Find(std::string_view member) { - if (entries_.empty()) - return end(); - - uint64_t hash = Hash(member); - auto bucket_id = BucketId(hash, capacity_log_); - - const auto ext_hash = CalcExtHash(hash, capacity_log_); - - // fast check - for (uint32_t i = 0; i < kDisplacementSize; i++) { - const uint32_t bid = bucket_id + i; - if ((entries_[bid].GetHash() == ext_hash) && entries_[bid].IsEntry()) { - if (entries_[bid].Key() == member) { - entries_[bid].ExpireIfNeeded(time_now_, &size_, &obj_alloc_used_); - return !entries_[bid].Empty() ? iterator{this, bid, 0} : end(); - } - } - } - - auto res = FindInternal(bucket_id, member, hash); - return res; - } + iterator Find(std::string_view member); bool Contains(std::string_view member) { return Find(member) != end(); @@ -720,39 +554,27 @@ class OAHSet { // Open Addressing Hash Set return bid; } - // Searches for a string within a bucket entry (which may be a single entry or a vector). - // Returns the position within the bucket if found, or std::nullopt if not found. - std::optional FindInBucket(OAHEntry& bucket, std::string_view str, uint64_t ext_hash) { - if (bucket.IsEntry()) { - bucket.ExpireIfNeeded(time_now_, &size_, &obj_alloc_used_); - return CheckExtendedHash(bucket, ext_hash) && bucket.Key() == str ? 0 - : std::optional(); - } - if (bucket.IsVector()) { - auto& vec = bucket.AsVector(); - auto raw_arr = vec.Raw(); - for (size_t i = 0, size = vec.Size(); i < size; ++i) { - raw_arr[i].ExpireIfNeeded(time_now_, &size_, &obj_alloc_used_); - if (CheckExtendedHash(raw_arr[i], ext_hash) && raw_arr[i].Key() == str) { - return i; - } - } - } - return std::nullopt; - } + // The body of Add, FORCE_INLINE so it folds into Add and AddMany. + bool AddImpl(std::string_view str, uint32_t ttl_sec); - // return bucket_id and position otherwise max - iterator FindInternal(uint32_t bid, std::string_view str, uint64_t hash) { - const auto ext_hash = CalcExtHash(hash, capacity_log_); - for (uint32_t i = 0; i < kDisplacementSize; i++) { - const uint32_t bucket_id = bid + i; - auto pos = FindInBucket(entries_[bucket_id], str, ext_hash); - if (pos) { - return iterator{this, bucket_id, *pos}; - } - } - return end(); - } + // Outcome of a key probe. A raw slot (not an iterator) so the caller can + // inspect/overwrite a matched-but-just-expired entry: it is Empty(), and + // dereferencing an iterator to it would hit OAHEntry::operator[]'s !Empty() assert. + struct MatchResult { + OAHEntry* matched; // matched entry, or null if the key is absent; + // may be Empty() (just expired) — the caller reuses it + uint32_t bucket_id; // location of `matched`, for building an iterator + uint32_t pos_in_vec; // position within a vector bucket (0 for single entries) + }; + + // Shared core of AddImpl and FindInternal: scans the window (cand_bits from a + // prior ProbeLanes) then the extension vector for `str`. + MatchResult FindMatch(uint32_t bid, uint32_t ext_bid, uint32_t cand_bits, std::string_view str, + uint64_t ext_hash); + + // Probes for `str`; returns an iterator to the live entry or end(). Shared by + // Find and Erase. + iterator FindInternal(uint32_t bid, std::string_view str, uint64_t hash); static uint64_t CalcExtHash(uint64_t hash, uint32_t capacity_log) { const uint32_t start_hash_bit = capacity_log > kShiftLog ? capacity_log - kShiftLog : 0; @@ -766,6 +588,10 @@ class OAHSet { // Open Addressing Hash Set return ext_hash; } + // Probe candidate whose key didn't match: refresh its stale/lazy-zero ext-hash + // cache so later probes skip it, then apply pending expiry. + void RefreshStaleCandidate(OAHEntry& e, uint64_t ext_hash); + bool CheckBucketAffiliation(OAHEntry& entry, uint32_t bucket_id) { assert(!entry.IsVector()); if (entry.Empty()) @@ -781,18 +607,6 @@ class OAHSet { // Open Addressing Hash Set return bucket_id == stored_bucket_id; } - bool CheckExtendedHash(OAHEntry& entry, uint64_t ext_hash) { - auto stored_hash = entry.GetHash(); - if (!stored_hash) { - if (entry.IsEntry()) { - stored_hash = SetEntryHash(entry, Hash(entry.Key())); - } else { - return false; - } - } - return stored_hash == ext_hash; - } - // return new bucket_id uint32_t RehashEntry(OAHEntry& entry, uint32_t current_bucket_id, uint32_t prev_capacity_log) { assert(!entry.IsVector()); diff --git a/src/core/oah_set_test.cc b/src/core/oah_set_test.cc index 54d0b94d8d35..9fb610a63d18 100644 --- a/src/core/oah_set_test.cc +++ b/src/core/oah_set_test.cc @@ -262,6 +262,77 @@ TEST_F(OAHSetTest, DisplacedBug) { ss_->Add("HPq"); } +// Stresses the SIMD Find/Erase probe across every code path: the displacement +// window, the extension vector (forced by many collisions into a tiny table), +// the lazy-zero hash cache left behind by repeated rehashes, and TTL expiry +// observed during a Find/Erase probe. Mixed live/erased/expired members must +// be resolved correctly. +TEST_F(OAHSetTest, SimdFindEraseStress) { + constexpr size_t kNum = 20000; + ss_->Reserve(4); // start tiny so growth + vector overflow both happen + ss_->set_time(10); + + std::vector live; // present, no TTL + std::vector ttl_alive; // present, TTL in the future + std::vector ttl_dead; // inserted with TTL that expires at time=50 + std::vector erased; // inserted then erased + + for (size_t i = 0; i < kNum; ++i) { + std::string s = absl::StrCat("simd_member_", i); + switch (i % 4) { + case 0: + EXPECT_TRUE(ss_->Add(s)); + live.push_back(s); + break; + case 1: + EXPECT_TRUE(ss_->Add(s, 100)); // expires at 110, survives time=50 + ttl_alive.push_back(s); + break; + case 2: + EXPECT_TRUE(ss_->Add(s, 5)); // expires at 15, dead by time=50 + ttl_dead.push_back(s); + break; + default: + EXPECT_TRUE(ss_->Add(s)); + EXPECT_TRUE(ss_->Erase(s)); + erased.push_back(s); + break; + } + } + + ss_->set_time(50); // ttl_dead entries are now expired + + for (const auto& s : live) { + auto it = ss_->Find(s); + ASSERT_NE(it, ss_->end()) << s; + EXPECT_EQ(it->Key(), s); + EXPECT_FALSE(it.HasExpiry()); + } + for (const auto& s : ttl_alive) { + auto it = ss_->Find(s); + ASSERT_NE(it, ss_->end()) << s; + EXPECT_EQ(it.ExpiryTime(), 110u); + } + for (const auto& s : ttl_dead) { + EXPECT_EQ(ss_->Find(s), ss_->end()) << "should be expired: " << s; + EXPECT_FALSE(ss_->Erase(s)) << "expired erase: " << s; + } + for (const auto& s : erased) { + EXPECT_EQ(ss_->Find(s), ss_->end()) << "should be erased: " << s; + EXPECT_FALSE(ss_->Erase(s)) << "double erase: " << s; + } + + // Erase every live + ttl_alive member via the SIMD probe; each must hit once. + for (const auto& s : live) + EXPECT_TRUE(ss_->Erase(s)) << s; + for (const auto& s : ttl_alive) + EXPECT_TRUE(ss_->Erase(s)) << s; + for (const auto& s : live) + EXPECT_EQ(ss_->Find(s), ss_->end()) << s; + for (const auto& s : ttl_alive) + EXPECT_EQ(ss_->Find(s), ss_->end()) << s; +} + TEST_F(OAHSetTest, Resizing) { constexpr size_t num_strs = 4096; unordered_set strs; diff --git a/src/core/simd_op.h b/src/core/simd_op.h index 346b29ba1b59..66b31f40247b 100644 --- a/src/core/simd_op.h +++ b/src/core/simd_op.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include #include @@ -32,48 +33,48 @@ template class SimdOp { using BitsType = std::uint32_t; static constexpr std::size_t kLanes = N; - SimdOp() = default; + constexpr SimdOp() noexcept = default; // Filling via `Vec{} + value` lowers to vpbroadcast on AVX2 / dup on // NEON; a per-lane scalar loop pessimizes to vpinsrq + vperm2i128. - static SimdOp Fill(T value) { + static constexpr SimdOp Fill(T value) noexcept { return Vec{} + value; } - static SimdOp Load(const T* ptr) { + static constexpr SimdOp Load(const T* ptr) noexcept { Vec v; std::memcpy(&v, ptr, sizeof(Vec)); return v; } - SimdOp operator&(const SimdOp& o) const { + constexpr SimdOp operator&(const SimdOp& o) const noexcept { return v_ & o.v_; } - SimdOp operator|(const SimdOp& o) const { + constexpr SimdOp operator|(const SimdOp& o) const noexcept { return v_ | o.v_; } - SimdOp operator>>(unsigned shift) const { + constexpr SimdOp operator>>(unsigned shift) const noexcept { return v_ >> shift; } - SimdOp operator~() const { + constexpr SimdOp operator~() const noexcept { return ~v_; } - SimdOp operator==(const SimdOp& o) const { // NOLINT + constexpr SimdOp operator==(const SimdOp& o) const noexcept { // NOLINT return Vec(v_ == o.v_); } - SimdOp operator==(T value) const { // NOLINT + constexpr SimdOp operator==(T value) const noexcept { // NOLINT return Vec(v_ == (Vec{} + value)); } // Packs the most-significant bit of every lane into a uint32_t bitmask // (LSB = lane 0). For the output of `operator==` (lanes are all-ones or // all-zeros) this is equivalent to "bit i set iff lane i is non-zero". - BitsType GetMSBs() const { + BitsType GetMSBs() const noexcept { // We hand-write the per-ISA movemask because no portable C++ / // vector-extension formulation lowers to a single movemask instruction // — every alternative we tried measured ~5% slower on OAHSet's hot path. @@ -110,7 +111,7 @@ template class SimdOp { } private: - SimdOp(Vec v) : v_(v) { // NOLINT(google-explicit-constructor) + constexpr SimdOp(Vec v) noexcept : v_(v) { // NOLINT(google-explicit-constructor) } Vec v_{};