From 9a10b7a56a080a5280a0c59593ef7df5356b7f6f Mon Sep 17 00:00:00 2001 From: Lukasz Klimek <842586+lklimek@users.noreply.github.com> Date: Thu, 19 Feb 2026 13:07:12 +0100 Subject: [PATCH 01/17] fix(sdk): prevent nonce cache regression on broadcast failure When broadcast() fails, refresh_identity_nonce() previously removed nonce cache entries entirely. If the TX had partially reached some Tenderdash nodes (entered their mempool) before the error, the next retry would re-fetch the old nonce from Platform (TX not yet processed), derive the same nonce, create identical TX bytes, and get rejected with "tx already exists in cache". Fix by marking cache entries as stale (timestamp=0) instead of removing them. This forces a re-fetch from Platform on the next use while preserving the cached nonce value. The existing MAX(platform, cached) comparison then ensures the nonce always advances, preventing the SDK from recreating an identical state transition. Closes dashpay/dash-evo-tool#588 Co-Authored-By: Claude Opus 4.6 --- packages/rs-sdk/src/sdk.rs | 62 +++++++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 11 deletions(-) diff --git a/packages/rs-sdk/src/sdk.rs b/packages/rs-sdk/src/sdk.rs index f8e5eefa84..f26a85ee8a 100644 --- a/packages/rs-sdk/src/sdk.rs +++ b/packages/rs-sdk/src/sdk.rs @@ -18,7 +18,7 @@ use dash_context_provider::MockContextProvider; use dpp::bincode; use dpp::bincode::error::DecodeError; use dpp::dashcore::Network; -use dpp::identity::identity_nonce::IDENTITY_NONCE_VALUE_FILTER; +use dpp::identity::identity_nonce::{IDENTITY_NONCE_VALUE_FILTER, MAX_MISSING_IDENTITY_REVISIONS}; use dpp::prelude::IdentityNonce; use dpp::version::{PlatformVersion, PlatformVersionCurrentVersion}; use drive::grovedb::operations::proof::GroveDBProof; @@ -452,16 +452,28 @@ impl Sdk { } Entry::Occupied(mut e) => { let (current_nonce, _) = e.get(); - let insert_nonce = if platform_nonce > *current_nonce { + // If the cached nonce has drifted too far ahead of what + // Platform reports (e.g. due to repeated broadcast failures + // where the TX never actually made it to the mempool), reset + // to the platform value to avoid "nonce too far in future". + let effective_current = if (*current_nonce & IDENTITY_NONCE_VALUE_FILTER) + .saturating_sub(platform_nonce & IDENTITY_NONCE_VALUE_FILTER) + > MAX_MISSING_IDENTITY_REVISIONS + { + platform_nonce + } else { + *current_nonce + }; + let insert_nonce = if platform_nonce > effective_current { if bump_first { platform_nonce + 1 } else { platform_nonce } } else if bump_first { - *current_nonce + 1 + effective_current + 1 } else { - *current_nonce + effective_current }; e.insert((insert_nonce, current_time_s)); Ok(insert_nonce & IDENTITY_NONCE_VALUE_FILTER) @@ -551,16 +563,28 @@ impl Sdk { } Entry::Occupied(mut e) => { let (current_nonce, _) = e.get(); - let insert_nonce = if platform_nonce > *current_nonce { + // If the cached nonce has drifted too far ahead of what + // Platform reports (e.g. due to repeated broadcast failures + // where the TX never actually made it to the mempool), reset + // to the platform value to avoid "nonce too far in future". + let effective_current = if (*current_nonce & IDENTITY_NONCE_VALUE_FILTER) + .saturating_sub(platform_nonce & IDENTITY_NONCE_VALUE_FILTER) + > MAX_MISSING_IDENTITY_REVISIONS + { + platform_nonce + } else { + *current_nonce + }; + let insert_nonce = if platform_nonce > effective_current { if bump_first { platform_nonce + 1 } else { platform_nonce } } else if bump_first { - *current_nonce + 1 + effective_current + 1 } else { - *current_nonce + effective_current }; e.insert((insert_nonce, current_time_s)); Ok(insert_nonce & IDENTITY_NONCE_VALUE_FILTER) @@ -585,12 +609,23 @@ impl Sdk { } } - /// Forces reload of the identity nonce from Platform on the next call to `get_identity_nonce`. + /// Marks identity nonce cache entries as stale so they are re-fetched from + /// Platform on the next call to [`get_identity_nonce`] or + /// [`get_identity_contract_nonce`]. + /// + /// Unlike removing the entries, marking them as stale preserves the cached + /// nonce **value**. When the re-fetch happens the SDK takes the maximum of + /// the platform value and the cached value, which prevents nonce regression. + /// Nonce regression would otherwise cause the SDK to recreate an identical + /// state transition that is already sitting in the Tenderdash mempool, + /// resulting in a "tx already exists in cache" error. pub async fn refresh_identity_nonce(&self, identity_id: &Identifier) { { let mut identity_nonce_counter = self.internal_cache.identity_nonce_counter.lock().await; - identity_nonce_counter.remove(identity_id); + if let Some((_, timestamp)) = identity_nonce_counter.get_mut(identity_id) { + *timestamp = 0; + } } { let mut identity_contract_nonce_counter = self @@ -598,8 +633,13 @@ impl Sdk { .identity_contract_nonce_counter .lock() .await; - identity_contract_nonce_counter - .retain(|(cached_identity_id, _), _| cached_identity_id != identity_id); + for ((cached_identity_id, _), (_, timestamp)) in + identity_contract_nonce_counter.iter_mut() + { + if cached_identity_id == identity_id { + *timestamp = 0; + } + } } } From d29905b08d92019f46cd146fce24729c91eecc3a Mon Sep 17 00:00:00 2001 From: Lukasz Klimek <842586+lklimek@users.noreply.github.com> Date: Thu, 19 Feb 2026 13:37:51 +0100 Subject: [PATCH 02/17] refactor(sdk): strip nonce upper bits at fetch instead of on every return path The nonce cache previously stored raw values with the upper "missing revisions" bits intact, requiring IDENTITY_NONCE_VALUE_FILTER masking on every return path. This made comparisons between platform_nonce and cached values unreliable since the upper bits could differ. Strip the upper bits immediately after fetching from Platform so the cache only ever holds plain nonce values. This simplifies all downstream comparisons, prevents +1 increments from overflowing into the upper bits, and removes redundant masking from 7 return paths. Co-Authored-By: Claude Opus 4.6 --- packages/rs-sdk/src/sdk.rs | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/packages/rs-sdk/src/sdk.rs b/packages/rs-sdk/src/sdk.rs index f26a85ee8a..393b967d29 100644 --- a/packages/rs-sdk/src/sdk.rs +++ b/packages/rs-sdk/src/sdk.rs @@ -432,6 +432,10 @@ impl Sdk { }; let nonce = if should_query_platform { + // Strip the upper "missing revisions" bits immediately so the + // cache only ever holds plain nonce values. This makes all + // downstream comparisons and increments safe without needing to + // mask on every return path. let platform_nonce = IdentityNonceFetcher::fetch_with_settings( self, identity_id, @@ -439,7 +443,7 @@ impl Sdk { ) .await? .unwrap_or(IdentityNonceFetcher(0)) - .0; + .0 & IDENTITY_NONCE_VALUE_FILTER; match entry { Entry::Vacant(e) => { let insert_nonce = if bump_first { @@ -448,7 +452,7 @@ impl Sdk { platform_nonce }; e.insert((insert_nonce, current_time_s)); - Ok(insert_nonce & IDENTITY_NONCE_VALUE_FILTER) + Ok(insert_nonce) } Entry::Occupied(mut e) => { let (current_nonce, _) = e.get(); @@ -456,8 +460,7 @@ impl Sdk { // Platform reports (e.g. due to repeated broadcast failures // where the TX never actually made it to the mempool), reset // to the platform value to avoid "nonce too far in future". - let effective_current = if (*current_nonce & IDENTITY_NONCE_VALUE_FILTER) - .saturating_sub(platform_nonce & IDENTITY_NONCE_VALUE_FILTER) + let effective_current = if current_nonce.saturating_sub(platform_nonce) > MAX_MISSING_IDENTITY_REVISIONS { platform_nonce @@ -476,7 +479,7 @@ impl Sdk { effective_current }; e.insert((insert_nonce, current_time_s)); - Ok(insert_nonce & IDENTITY_NONCE_VALUE_FILTER) + Ok(insert_nonce) } } } else { @@ -489,9 +492,9 @@ impl Sdk { if bump_first { let insert_nonce = current_nonce + 1; e.insert((insert_nonce, current_time_s)); - Ok(insert_nonce & IDENTITY_NONCE_VALUE_FILTER) + Ok(insert_nonce) } else { - Ok(*current_nonce & IDENTITY_NONCE_VALUE_FILTER) + Ok(*current_nonce) } } } @@ -543,6 +546,8 @@ impl Sdk { }; if should_query_platform { + // Strip the upper "missing revisions" bits immediately so the + // cache only ever holds plain nonce values. let platform_nonce = IdentityContractNonceFetcher::fetch_with_settings( self, (identity_id, contract_id), @@ -550,7 +555,7 @@ impl Sdk { ) .await? .unwrap_or(IdentityContractNonceFetcher(0)) - .0; + .0 & IDENTITY_NONCE_VALUE_FILTER; match entry { Entry::Vacant(e) => { let insert_nonce = if bump_first { @@ -559,7 +564,7 @@ impl Sdk { platform_nonce }; e.insert((insert_nonce, current_time_s)); - Ok(insert_nonce & IDENTITY_NONCE_VALUE_FILTER) + Ok(insert_nonce) } Entry::Occupied(mut e) => { let (current_nonce, _) = e.get(); @@ -567,8 +572,7 @@ impl Sdk { // Platform reports (e.g. due to repeated broadcast failures // where the TX never actually made it to the mempool), reset // to the platform value to avoid "nonce too far in future". - let effective_current = if (*current_nonce & IDENTITY_NONCE_VALUE_FILTER) - .saturating_sub(platform_nonce & IDENTITY_NONCE_VALUE_FILTER) + let effective_current = if current_nonce.saturating_sub(platform_nonce) > MAX_MISSING_IDENTITY_REVISIONS { platform_nonce @@ -587,7 +591,7 @@ impl Sdk { effective_current }; e.insert((insert_nonce, current_time_s)); - Ok(insert_nonce & IDENTITY_NONCE_VALUE_FILTER) + Ok(insert_nonce) } } } else { @@ -600,9 +604,9 @@ impl Sdk { if bump_first { let insert_nonce = current_nonce + 1; e.insert((insert_nonce, current_time_s)); - Ok(insert_nonce & IDENTITY_NONCE_VALUE_FILTER) + Ok(insert_nonce) } else { - Ok(*current_nonce & IDENTITY_NONCE_VALUE_FILTER) + Ok(*current_nonce) } } } From 18dff6c2f5c5ae7950c7e7767f8dcbf8f45110b2 Mon Sep 17 00:00:00 2001 From: Lukasz Klimek <842586+lklimek@users.noreply.github.com> Date: Thu, 19 Feb 2026 13:43:20 +0100 Subject: [PATCH 03/17] clippy: remove unused Sdk::parse_proof, Sdk::parse_proof_with_metadata --- packages/rs-sdk/src/sdk.rs | 44 -------------------------------------- 1 file changed, 44 deletions(-) diff --git a/packages/rs-sdk/src/sdk.rs b/packages/rs-sdk/src/sdk.rs index 393b967d29..b5f66ac66f 100644 --- a/packages/rs-sdk/src/sdk.rs +++ b/packages/rs-sdk/src/sdk.rs @@ -240,27 +240,6 @@ impl Sdk { .expect("mock should be created") } - /// Retrieve object `O` from proof contained in `request` (of type `R`) and `response`. - /// - /// This method is used to retrieve objects from proofs returned by Dash Platform. - /// - /// ## Generic Parameters - /// - /// - `R`: Type of the request that was used to fetch the proof. - /// - `O`: Type of the object to be retrieved from the proof. - pub(crate) async fn parse_proof + MockResponse>( - &self, - request: O::Request, - response: O::Response, - ) -> Result, Error> - where - O::Request: Mockable + TransportRequest, - { - self.parse_proof_with_metadata(request, response) - .await - .map(|result| result.0) - } - /// Return freshness criteria (height tolerance and time tolerance) for given request method. /// /// Note that if self.metadata_height_tolerance or self.metadata_time_tolerance_ms is None, @@ -279,29 +258,6 @@ impl Sdk { } } - /// Retrieve object `O` from proof contained in `request` (of type `R`) and `response`. - /// - /// This method is used to retrieve objects from proofs returned by Dash Platform. - /// - /// ## Generic Parameters - /// - /// - `R`: Type of the request that was used to fetch the proof. - /// - `O`: Type of the object to be retrieved from the proof. - pub(crate) async fn parse_proof_with_metadata + MockResponse>( - &self, - request: O::Request, - response: O::Response, - ) -> Result<(Option, ResponseMetadata), Error> - where - O::Request: Mockable + TransportRequest, - { - let (object, metadata, _proof) = self - .parse_proof_with_metadata_and_proof(request, response) - .await?; - - Ok((object, metadata)) - } - /// Verify response metadata against the current state of the SDK. pub fn verify_response_metadata( &self, From 1f3d4f1470102f00fdbf3e2797097af012f47462 Mon Sep 17 00:00:00 2001 From: Lukasz Klimek <842586+lklimek@users.noreply.github.com> Date: Thu, 19 Feb 2026 13:59:58 +0100 Subject: [PATCH 04/17] fix(sdk): force nonce re-fetch when cache drifts too far ahead of platform The nonce cache could drift arbitrarily far ahead of Platform through repeated bump_first calls without successful broadcasts. Each bump refreshed the cache timestamp, preventing time-based staleness from triggering a re-fetch. Add last_platform_nonce to the cache entry and check drift inline in should_query_platform. When the cached nonce exceeds the last known platform value by more than MAX_MISSING_IDENTITY_REVISIONS, force a re-fetch and use platform's response as the source of truth. Co-Authored-By: Claude Opus 4.6 --- packages/rs-sdk/src/internal_cache/mod.rs | 26 ++--- packages/rs-sdk/src/sdk.rs | 111 +++++++--------------- 2 files changed, 47 insertions(+), 90 deletions(-) diff --git a/packages/rs-sdk/src/internal_cache/mod.rs b/packages/rs-sdk/src/internal_cache/mod.rs index 9a07ce72d9..2704dec71c 100644 --- a/packages/rs-sdk/src/internal_cache/mod.rs +++ b/packages/rs-sdk/src/internal_cache/mod.rs @@ -1,10 +1,19 @@ use crate::platform::Identifier; use crate::sdk::LastQueryTimestamp; -use dpp::prelude; use dpp::prelude::IdentityNonce; use std::collections::BTreeMap; use tokio::sync::Mutex; +/// Cached nonce state: `(current_nonce, last_fetch_timestamp, last_platform_nonce)`. +/// +/// - `current_nonce` — the nonce value the SDK will use next (may be bumped +/// ahead of what Platform reported). +/// - `last_fetch_timestamp` — when we last fetched from Platform. +/// - `last_platform_nonce` — the value Platform returned on the last fetch +/// (after stripping upper bits). Used to detect when local bumps have +/// drifted too far ahead, triggering a re-fetch. +pub(crate) type NonceCacheEntry = (IdentityNonce, LastQueryTimestamp, IdentityNonce); + /// This is a cache that is internal to the SDK that the user does not have to worry about pub struct InternalSdkCache { /// This is the identity nonce counter for the sdk @@ -14,8 +23,7 @@ pub struct InternalSdkCache { /// This update can involve querying Platform for the current identity nonce /// If the sdk user requests to put a state transition the counter is checked and either /// returns an error or is updated. - pub(crate) identity_nonce_counter: - tokio::sync::Mutex>, + pub(crate) identity_nonce_counter: tokio::sync::Mutex>, /// This is the identity contract nonce counter for the sdk /// The sdk will automatically manage this counter for the user. @@ -23,21 +31,17 @@ pub struct InternalSdkCache { /// This update can involve querying Platform for the current identity contract nonce /// If the sdk user requests to put a state transition the counter is checked and either /// returns an error or is updated. - pub(crate) identity_contract_nonce_counter: tokio::sync::Mutex< - BTreeMap<(Identifier, Identifier), (prelude::IdentityNonce, LastQueryTimestamp)>, - >, + pub(crate) identity_contract_nonce_counter: + tokio::sync::Mutex>, } impl Default for InternalSdkCache { fn default() -> Self { InternalSdkCache { - identity_nonce_counter: Mutex::new(BTreeMap::< - Identifier, - (IdentityNonce, LastQueryTimestamp), - >::new()), + identity_nonce_counter: Mutex::new(BTreeMap::::new()), identity_contract_nonce_counter: Mutex::new(BTreeMap::< (Identifier, Identifier), - (IdentityNonce, LastQueryTimestamp), + NonceCacheEntry, >::new()), } } diff --git a/packages/rs-sdk/src/sdk.rs b/packages/rs-sdk/src/sdk.rs index b5f66ac66f..cf72f44625 100644 --- a/packages/rs-sdk/src/sdk.rs +++ b/packages/rs-sdk/src/sdk.rs @@ -377,13 +377,17 @@ impl Sdk { let should_query_platform = match &entry { Entry::Vacant(_) => true, Entry::Occupied(e) => { - let (_, last_query_time) = e.get(); + let (current_nonce, last_query_time, last_platform_nonce) = e.get(); + // Re-fetch if the cache is stale by time OR if local bumps + // have drifted too far ahead of what Platform last reported. *last_query_time < current_time_s.saturating_sub( settings .identity_nonce_stale_time_s .unwrap_or(DEFAULT_IDENTITY_NONCE_STALE_TIME_S), ) + || current_nonce.saturating_sub(*last_platform_nonce) + > MAX_MISSING_IDENTITY_REVISIONS } }; @@ -400,54 +404,30 @@ impl Sdk { .await? .unwrap_or(IdentityNonceFetcher(0)) .0 & IDENTITY_NONCE_VALUE_FILTER; + let insert_nonce = if bump_first { + platform_nonce + 1 + } else { + platform_nonce + }; match entry { Entry::Vacant(e) => { - let insert_nonce = if bump_first { - platform_nonce + 1 - } else { - platform_nonce - }; - e.insert((insert_nonce, current_time_s)); - Ok(insert_nonce) + e.insert((insert_nonce, current_time_s, platform_nonce)); } Entry::Occupied(mut e) => { - let (current_nonce, _) = e.get(); - // If the cached nonce has drifted too far ahead of what - // Platform reports (e.g. due to repeated broadcast failures - // where the TX never actually made it to the mempool), reset - // to the platform value to avoid "nonce too far in future". - let effective_current = if current_nonce.saturating_sub(platform_nonce) - > MAX_MISSING_IDENTITY_REVISIONS - { - platform_nonce - } else { - *current_nonce - }; - let insert_nonce = if platform_nonce > effective_current { - if bump_first { - platform_nonce + 1 - } else { - platform_nonce - } - } else if bump_first { - effective_current + 1 - } else { - effective_current - }; - e.insert((insert_nonce, current_time_s)); - Ok(insert_nonce) + e.insert((insert_nonce, current_time_s, platform_nonce)); } } + Ok(insert_nonce) } else { match entry { Entry::Vacant(_) => { panic!("this can not happen, vacant entry not possible"); } Entry::Occupied(mut e) => { - let (current_nonce, _) = e.get(); + let (current_nonce, _, last_platform_nonce) = e.get(); if bump_first { let insert_nonce = current_nonce + 1; - e.insert((insert_nonce, current_time_s)); + e.insert((insert_nonce, current_time_s, *last_platform_nonce)); Ok(insert_nonce) } else { Ok(*current_nonce) @@ -491,13 +471,17 @@ impl Sdk { let should_query_platform = match &entry { Entry::Vacant(_) => true, Entry::Occupied(e) => { - let (_, last_query_time) = e.get(); + let (current_nonce, last_query_time, last_platform_nonce) = e.get(); + // Re-fetch if the cache is stale by time OR if local bumps + // have drifted too far ahead of what Platform last reported. *last_query_time < current_time_s.saturating_sub( settings .identity_nonce_stale_time_s .unwrap_or(DEFAULT_IDENTITY_NONCE_STALE_TIME_S), ) + || current_nonce.saturating_sub(*last_platform_nonce) + > MAX_MISSING_IDENTITY_REVISIONS } }; @@ -512,54 +496,30 @@ impl Sdk { .await? .unwrap_or(IdentityContractNonceFetcher(0)) .0 & IDENTITY_NONCE_VALUE_FILTER; + let insert_nonce = if bump_first { + platform_nonce + 1 + } else { + platform_nonce + }; match entry { Entry::Vacant(e) => { - let insert_nonce = if bump_first { - platform_nonce + 1 - } else { - platform_nonce - }; - e.insert((insert_nonce, current_time_s)); - Ok(insert_nonce) + e.insert((insert_nonce, current_time_s, platform_nonce)); } Entry::Occupied(mut e) => { - let (current_nonce, _) = e.get(); - // If the cached nonce has drifted too far ahead of what - // Platform reports (e.g. due to repeated broadcast failures - // where the TX never actually made it to the mempool), reset - // to the platform value to avoid "nonce too far in future". - let effective_current = if current_nonce.saturating_sub(platform_nonce) - > MAX_MISSING_IDENTITY_REVISIONS - { - platform_nonce - } else { - *current_nonce - }; - let insert_nonce = if platform_nonce > effective_current { - if bump_first { - platform_nonce + 1 - } else { - platform_nonce - } - } else if bump_first { - effective_current + 1 - } else { - effective_current - }; - e.insert((insert_nonce, current_time_s)); - Ok(insert_nonce) + e.insert((insert_nonce, current_time_s, platform_nonce)); } } + Ok(insert_nonce) } else { match entry { Entry::Vacant(_) => { panic!("this can not happen, vacant entry not possible"); } Entry::Occupied(mut e) => { - let (current_nonce, _) = e.get(); + let (current_nonce, _, last_platform_nonce) = e.get(); if bump_first { let insert_nonce = current_nonce + 1; - e.insert((insert_nonce, current_time_s)); + e.insert((insert_nonce, current_time_s, *last_platform_nonce)); Ok(insert_nonce) } else { Ok(*current_nonce) @@ -572,18 +532,11 @@ impl Sdk { /// Marks identity nonce cache entries as stale so they are re-fetched from /// Platform on the next call to [`get_identity_nonce`] or /// [`get_identity_contract_nonce`]. - /// - /// Unlike removing the entries, marking them as stale preserves the cached - /// nonce **value**. When the re-fetch happens the SDK takes the maximum of - /// the platform value and the cached value, which prevents nonce regression. - /// Nonce regression would otherwise cause the SDK to recreate an identical - /// state transition that is already sitting in the Tenderdash mempool, - /// resulting in a "tx already exists in cache" error. pub async fn refresh_identity_nonce(&self, identity_id: &Identifier) { { let mut identity_nonce_counter = self.internal_cache.identity_nonce_counter.lock().await; - if let Some((_, timestamp)) = identity_nonce_counter.get_mut(identity_id) { + if let Some((_, timestamp, _)) = identity_nonce_counter.get_mut(identity_id) { *timestamp = 0; } } @@ -593,7 +546,7 @@ impl Sdk { .identity_contract_nonce_counter .lock() .await; - for ((cached_identity_id, _), (_, timestamp)) in + for ((cached_identity_id, _), (_, timestamp, _)) in identity_contract_nonce_counter.iter_mut() { if cached_identity_id == identity_id { From 8aae8c2fdb83580c171aafdfa88e6e6766885c4e Mon Sep 17 00:00:00 2001 From: Lukasz Klimek <842586+lklimek@users.noreply.github.com> Date: Thu, 19 Feb 2026 14:03:29 +0100 Subject: [PATCH 05/17] refactor(sdk): extract shared nonce cache logic into get_or_fetch_nonce helper get_identity_nonce and get_identity_contract_nonce had identical cache logic differing only in key type and fetcher. Extract the shared staleness check, drift detection, fetch, bump, and cache update into a generic async helper that both methods delegate to. Co-Authored-By: Claude Opus 4.6 --- packages/rs-sdk/src/sdk.rs | 151 ++++++++++++++----------------------- 1 file changed, 56 insertions(+), 95 deletions(-) diff --git a/packages/rs-sdk/src/sdk.rs b/packages/rs-sdk/src/sdk.rs index cf72f44625..fdee06ddbc 100644 --- a/packages/rs-sdk/src/sdk.rs +++ b/packages/rs-sdk/src/sdk.rs @@ -1,7 +1,7 @@ //! [Sdk] entrypoint to Dash Platform. use crate::error::{Error, StaleNodeError}; -use crate::internal_cache::InternalSdkCache; +use crate::internal_cache::{InternalSdkCache, NonceCacheEntry}; use crate::mock::MockResponse; #[cfg(feature = "mocks")] use crate::mock::{provider::GrpcContextProvider, MockDashPlatformSdk}; @@ -368,88 +368,34 @@ impl Sdk { settings: Option, ) -> Result { let settings = settings.unwrap_or_default(); - let current_time_s = get_current_time_seconds(); - - // we start by only using a read lock, as this speeds up the system - let mut identity_nonce_counter = self.internal_cache.identity_nonce_counter.lock().await; - let entry = identity_nonce_counter.entry(identity_id); - - let should_query_platform = match &entry { - Entry::Vacant(_) => true, - Entry::Occupied(e) => { - let (current_nonce, last_query_time, last_platform_nonce) = e.get(); - // Re-fetch if the cache is stale by time OR if local bumps - // have drifted too far ahead of what Platform last reported. - *last_query_time - < current_time_s.saturating_sub( - settings - .identity_nonce_stale_time_s - .unwrap_or(DEFAULT_IDENTITY_NONCE_STALE_TIME_S), - ) - || current_nonce.saturating_sub(*last_platform_nonce) - > MAX_MISSING_IDENTITY_REVISIONS - } - }; - - let nonce = if should_query_platform { - // Strip the upper "missing revisions" bits immediately so the - // cache only ever holds plain nonce values. This makes all - // downstream comparisons and increments safe without needing to - // mask on every return path. - let platform_nonce = IdentityNonceFetcher::fetch_with_settings( - self, - identity_id, - settings.request_settings, - ) - .await? - .unwrap_or(IdentityNonceFetcher(0)) - .0 & IDENTITY_NONCE_VALUE_FILTER; - let insert_nonce = if bump_first { - platform_nonce + 1 - } else { - platform_nonce - }; - match entry { - Entry::Vacant(e) => { - e.insert((insert_nonce, current_time_s, platform_nonce)); - } - Entry::Occupied(mut e) => { - e.insert((insert_nonce, current_time_s, platform_nonce)); - } - } - Ok(insert_nonce) - } else { - match entry { - Entry::Vacant(_) => { - panic!("this can not happen, vacant entry not possible"); - } - Entry::Occupied(mut e) => { - let (current_nonce, _, last_platform_nonce) = e.get(); - if bump_first { - let insert_nonce = current_nonce + 1; - e.insert((insert_nonce, current_time_s, *last_platform_nonce)); - Ok(insert_nonce) - } else { - Ok(*current_nonce) - } - } - } - }; + let mut cache = self.internal_cache.identity_nonce_counter.lock().await; + let nonce = + Self::get_or_fetch_nonce(cache.entry(identity_id), bump_first, &settings, || async { + Ok(IdentityNonceFetcher::fetch_with_settings( + self, + identity_id, + settings.request_settings, + ) + .await? + .unwrap_or(IdentityNonceFetcher(0)) + .0) + }) + .await?; tracing::trace!( identity_id = %identity_id, bump_first, - nonce = ?nonce, + nonce, "Fetched identity nonce" ); - nonce + Ok(nonce) } - // TODO: Move to a separate struct - /// Updates or fetches the nonce for a given identity and contract pair from a cache, - /// querying Platform if the cached value is stale or absent. Optionally - /// increments the nonce before storing it, based on the provided settings. + /// Updates or fetches the nonce for a given identity and contract pair from + /// the cache, querying Platform if the cached value is stale or absent. + /// Optionally increments the nonce before storing it, based on the provided + /// settings. pub async fn get_identity_contract_nonce( &self, identity_id: Identifier, @@ -458,15 +404,42 @@ impl Sdk { settings: Option, ) -> Result { let settings = settings.unwrap_or_default(); - let current_time_s = get_current_time_seconds(); - - // we start by only using a read lock, as this speeds up the system - let mut identity_contract_nonce_counter = self + let mut cache = self .internal_cache .identity_contract_nonce_counter .lock() .await; - let entry = identity_contract_nonce_counter.entry((identity_id, contract_id)); + Self::get_or_fetch_nonce( + cache.entry((identity_id, contract_id)), + bump_first, + &settings, + || async { + Ok(IdentityContractNonceFetcher::fetch_with_settings( + self, + (identity_id, contract_id), + settings.request_settings, + ) + .await? + .unwrap_or(IdentityContractNonceFetcher(0)) + .0) + }, + ) + .await + } + + /// Shared nonce cache logic. Checks staleness and drift, fetches from + /// Platform when needed, and maintains the cache entry. + async fn get_or_fetch_nonce( + entry: Entry<'_, K, NonceCacheEntry>, + bump_first: bool, + settings: &PutSettings, + fetch_from_platform: F, + ) -> Result + where + F: FnOnce() -> Fut, + Fut: std::future::Future>, + { + let current_time_s = get_current_time_seconds(); let should_query_platform = match &entry { Entry::Vacant(_) => true, @@ -488,27 +461,15 @@ impl Sdk { if should_query_platform { // Strip the upper "missing revisions" bits immediately so the // cache only ever holds plain nonce values. - let platform_nonce = IdentityContractNonceFetcher::fetch_with_settings( - self, - (identity_id, contract_id), - settings.request_settings, - ) - .await? - .unwrap_or(IdentityContractNonceFetcher(0)) - .0 & IDENTITY_NONCE_VALUE_FILTER; + let platform_nonce = fetch_from_platform().await? & IDENTITY_NONCE_VALUE_FILTER; let insert_nonce = if bump_first { platform_nonce + 1 } else { platform_nonce }; - match entry { - Entry::Vacant(e) => { - e.insert((insert_nonce, current_time_s, platform_nonce)); - } - Entry::Occupied(mut e) => { - e.insert((insert_nonce, current_time_s, platform_nonce)); - } - } + entry + .and_modify(|e| *e = (insert_nonce, current_time_s, platform_nonce)) + .or_insert((insert_nonce, current_time_s, platform_nonce)); Ok(insert_nonce) } else { match entry { From 65e69beef23a647ca9bb9b873bf462b4fcc582d2 Mon Sep 17 00:00:00 2001 From: Lukasz Klimek <842586+lklimek@users.noreply.github.com> Date: Thu, 19 Feb 2026 14:49:21 +0100 Subject: [PATCH 06/17] fix(sdk): release nonce cache lock during platform fetch and preserve higher cached nonce - Do not hold the Mutex across the async fetch_from_platform call; split get_or_fetch_nonce into lock-check-unlock / fetch / lock-update so other callers are not blocked during the network round-trip. - After re-fetching, keep the higher of the cached nonce vs the Platform nonce to prevent regression when Platform has not yet indexed a recent successful broadcast. - Replace panic!("vacant entry not possible") with a structure that cannot reach the impossible state (early-return from cache hit). - Tighten drift threshold from > to >= MAX_MISSING_IDENTITY_REVISIONS as a precaution to stay within Platform's own limit. - Simplify cache write from and_modify().or_insert() to a single insert. Co-Authored-By: Claude Opus 4.6 --- packages/rs-sdk/src/sdk.rs | 116 ++++++++++++++++++++----------------- 1 file changed, 64 insertions(+), 52 deletions(-) diff --git a/packages/rs-sdk/src/sdk.rs b/packages/rs-sdk/src/sdk.rs index fdee06ddbc..6cb247c1be 100644 --- a/packages/rs-sdk/src/sdk.rs +++ b/packages/rs-sdk/src/sdk.rs @@ -32,7 +32,7 @@ pub use rs_dapi_client::RequestSettings; use rs_dapi_client::{ transport::TransportRequest, DapiClient, DapiClientError, DapiRequestExecutor, ExecutionResult, }; -use std::collections::btree_map::Entry; +use std::collections::BTreeMap; use std::fmt::Debug; #[cfg(feature = "mocks")] use std::num::NonZeroUsize; @@ -43,8 +43,9 @@ use std::sync::atomic::Ordering; use std::sync::{atomic, Arc}; #[cfg(not(target_arch = "wasm32"))] use std::time::{SystemTime, UNIX_EPOCH}; +use tokio::sync::Mutex; #[cfg(feature = "mocks")] -use tokio::sync::{Mutex, MutexGuard}; +use tokio::sync::MutexGuard; use tokio_util::sync::{CancellationToken, WaitForCancellationFuture}; use zeroize::Zeroizing; @@ -368,9 +369,12 @@ impl Sdk { settings: Option, ) -> Result { let settings = settings.unwrap_or_default(); - let mut cache = self.internal_cache.identity_nonce_counter.lock().await; - let nonce = - Self::get_or_fetch_nonce(cache.entry(identity_id), bump_first, &settings, || async { + let nonce = Self::get_or_fetch_nonce( + &self.internal_cache.identity_nonce_counter, + identity_id, + bump_first, + &settings, + || async { Ok(IdentityNonceFetcher::fetch_with_settings( self, identity_id, @@ -379,8 +383,9 @@ impl Sdk { .await? .unwrap_or(IdentityNonceFetcher(0)) .0) - }) - .await?; + }, + ) + .await?; tracing::trace!( identity_id = %identity_id, @@ -404,13 +409,9 @@ impl Sdk { settings: Option, ) -> Result { let settings = settings.unwrap_or_default(); - let mut cache = self - .internal_cache - .identity_contract_nonce_counter - .lock() - .await; Self::get_or_fetch_nonce( - cache.entry((identity_id, contract_id)), + &self.internal_cache.identity_contract_nonce_counter, + (identity_id, contract_id), bump_first, &settings, || async { @@ -429,8 +430,13 @@ impl Sdk { /// Shared nonce cache logic. Checks staleness and drift, fetches from /// Platform when needed, and maintains the cache entry. - async fn get_or_fetch_nonce( - entry: Entry<'_, K, NonceCacheEntry>, + /// + /// The cache lock is held only briefly to read/write the entry; it is + /// **not** held across the async `fetch_from_platform` call so that other + /// callers are not blocked during the network round-trip. + async fn get_or_fetch_nonce( + cache: &Mutex>, + key: K, bump_first: bool, settings: &PutSettings, fetch_from_platform: F, @@ -441,53 +447,59 @@ impl Sdk { { let current_time_s = get_current_time_seconds(); - let should_query_platform = match &entry { - Entry::Vacant(_) => true, - Entry::Occupied(e) => { - let (current_nonce, last_query_time, last_platform_nonce) = e.get(); - // Re-fetch if the cache is stale by time OR if local bumps - // have drifted too far ahead of what Platform last reported. - *last_query_time + // Phase 1: Try to serve from cache without a network fetch. + { + let mut cache_guard = cache.lock().await; + if let Some(&(current_nonce, last_query_time, last_platform_nonce)) = + cache_guard.get(&key) + { + let stale_by_time = last_query_time < current_time_s.saturating_sub( settings .identity_nonce_stale_time_s .unwrap_or(DEFAULT_IDENTITY_NONCE_STALE_TIME_S), - ) - || current_nonce.saturating_sub(*last_platform_nonce) - > MAX_MISSING_IDENTITY_REVISIONS - } - }; - - if should_query_platform { - // Strip the upper "missing revisions" bits immediately so the - // cache only ever holds plain nonce values. - let platform_nonce = fetch_from_platform().await? & IDENTITY_NONCE_VALUE_FILTER; - let insert_nonce = if bump_first { - platform_nonce + 1 - } else { - platform_nonce - }; - entry - .and_modify(|e| *e = (insert_nonce, current_time_s, platform_nonce)) - .or_insert((insert_nonce, current_time_s, platform_nonce)); - Ok(insert_nonce) - } else { - match entry { - Entry::Vacant(_) => { - panic!("this can not happen, vacant entry not possible"); - } - Entry::Occupied(mut e) => { - let (current_nonce, _, last_platform_nonce) = e.get(); + ); + // Precautionary: use >= so we stay within Platform's own + // MAX_MISSING_IDENTITY_REVISIONS limit. + let drifted = current_nonce.saturating_sub(last_platform_nonce) + >= MAX_MISSING_IDENTITY_REVISIONS; + + if !stale_by_time && !drifted { + // Cache is fresh — serve from it. if bump_first { let insert_nonce = current_nonce + 1; - e.insert((insert_nonce, current_time_s, *last_platform_nonce)); - Ok(insert_nonce) + cache_guard + .insert(key, (insert_nonce, current_time_s, last_platform_nonce)); + return Ok(insert_nonce); } else { - Ok(*current_nonce) + return Ok(current_nonce); } } } - } + } // lock released — entry was absent or stale + + // Phase 2: Fetch from Platform without holding the cache lock. + // Strip the upper "missing revisions" bits immediately so the + // cache only ever holds plain nonce values. + let platform_nonce = fetch_from_platform().await? & IDENTITY_NONCE_VALUE_FILTER; + + // Phase 3: Re-acquire lock and update cache. + // Re-read the entry since another caller may have updated the cache + // while we were fetching. Keep the higher of cached vs Platform + // nonce to avoid regression (Platform may not have indexed a recent + // successful broadcast yet). + let mut cache_guard = cache.lock().await; + let base_nonce = match cache_guard.get(&key) { + Some(&(current_nonce, _, _)) if current_nonce > platform_nonce => current_nonce, + _ => platform_nonce, + }; + let insert_nonce = if bump_first { + base_nonce + 1 + } else { + base_nonce + }; + cache_guard.insert(key, (insert_nonce, current_time_s, platform_nonce)); + Ok(insert_nonce) } /// Marks identity nonce cache entries as stale so they are re-fetched from From 5b28eccf0c20adfff482212f66f6a7a31a3dcc61 Mon Sep 17 00:00:00 2001 From: Lukasz Klimek <842586+lklimek@users.noreply.github.com> Date: Thu, 19 Feb 2026 15:08:50 +0100 Subject: [PATCH 07/17] test(sdk): add edge-case tests for nonce cache logic Cover get_or_fetch_nonce with 25 parameterized test cases: - empty cache fetch (basic, zero, filter-max, upper-bits stripping) - fresh cache hit (no-bump, bump, drift below threshold) - stale/drifted cache re-fetch (no-regress, platform-higher, drift-at-max) - error propagation, refresh staleness, key isolation, concurrency races Co-Authored-By: Claude Opus 4.6 --- packages/rs-sdk/src/sdk.rs | 269 +++++++++++++++++++++++++++++++++++++ 1 file changed, 269 insertions(+) diff --git a/packages/rs-sdk/src/sdk.rs b/packages/rs-sdk/src/sdk.rs index 6cb247c1be..f080a1ce06 100644 --- a/packages/rs-sdk/src/sdk.rs +++ b/packages/rs-sdk/src/sdk.rs @@ -1305,4 +1305,273 @@ mod test { assert_eq!(result.is_err(), expect_err); } + + // ---- Nonce cache edge-case tests for get_or_fetch_nonce ---- + + mod nonce_cache { + use super::*; + use crate::internal_cache::NonceCacheEntry; + use crate::platform::transition::put_settings::PutSettings; + use dpp::identity::identity_nonce::{ + IDENTITY_NONCE_VALUE_FILTER, MAX_MISSING_IDENTITY_REVISIONS, + }; + use std::collections::BTreeMap; + use test_case::test_case; + use tokio::sync::Mutex; + + /// Helper: shorthand for get_or_fetch_nonce. + async fn fetch( + cache: &Mutex>, + bump: bool, + settings: &PutSettings, + platform_nonce: u64, + ) -> u64 { + super::super::Sdk::get_or_fetch_nonce(cache, 1u32, bump, settings, || async move { + Ok(platform_nonce) + }) + .await + .unwrap() + } + + fn now_s() -> u64 { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs() + } + + fn empty_cache() -> Mutex> { + Mutex::new(BTreeMap::new()) + } + + fn seeded_cache( + key: u32, + current_nonce: u64, + timestamp: u64, + last_platform_nonce: u64, + ) -> Mutex> { + let mut map = BTreeMap::new(); + map.insert(key, (current_nonce, timestamp, last_platform_nonce)); + Mutex::new(map) + } + + fn never_stale() -> PutSettings { + PutSettings { + identity_nonce_stale_time_s: Some(u64::MAX), + ..Default::default() + } + } + + // --- Empty cache: platform fetched, result = platform ± bump --- + // (platform_nonce, bump, expected_result, expected_stored_platform) + #[test_case(42, false, 42, 42 ; "basic no bump")] + #[test_case(42, true, 43, 42 ; "basic with bump")] + #[test_case(0, false, 0, 0 ; "zero no bump")] + #[test_case(0, true, 1, 0 ; "zero with bump")] + #[test_case(IDENTITY_NONCE_VALUE_FILTER, false, IDENTITY_NONCE_VALUE_FILTER, IDENTITY_NONCE_VALUE_FILTER ; "filter max no bump")] + #[test_case(IDENTITY_NONCE_VALUE_FILTER, true, IDENTITY_NONCE_VALUE_FILTER + 1, IDENTITY_NONCE_VALUE_FILTER ; "filter max with bump overflows")] + #[test_case(42 | (3 << 40), false, 42, 42 ; "upper bits stripped no bump")] + #[test_case(42 | (3 << 40), true, 43, 42 ; "upper bits stripped with bump")] + #[tokio::test] + async fn empty_cache_fetch( + platform_nonce: u64, + bump: bool, + expected: u64, + expected_stored_platform: u64, + ) { + let cache = empty_cache(); + let result = fetch(&cache, bump, &Default::default(), platform_nonce).await; + assert_eq!(result, expected); + let guard = cache.lock().await; + let &(nonce, _, platform) = guard.get(&1u32).unwrap(); + assert_eq!(nonce, expected); + assert_eq!(platform, expected_stored_platform); + } + + // --- Fresh cache hit (no platform fetch) --- + // (cached, last_platform, bump, expected) + #[test_case(10, 10, false, 10 ; "no bump returns cached")] + #[test_case(10, 10, true, 11 ; "bump increments cached")] + #[test_case(10 + MAX_MISSING_IDENTITY_REVISIONS - 1, 10, false, 10 + MAX_MISSING_IDENTITY_REVISIONS - 1 ; "drift just below max serves from cache")] + #[tokio::test] + async fn fresh_cache_hit(cached_nonce: u64, last_platform: u64, bump: bool, expected: u64) { + let cache = seeded_cache(1, cached_nonce, now_s(), last_platform); + let settings = never_stale(); + let result = + super::super::Sdk::get_or_fetch_nonce(&cache, 1u32, bump, &settings, || async { + panic!("should not fetch from platform") + }) + .await + .unwrap(); + assert_eq!(result, expected); + } + + // --- Stale or drifted cache triggers re-fetch --- + // stale_by_drift=false: timestamp=0, default settings (time-based staleness). + // stale_by_drift=true: timestamp=now, never_stale settings (drift-based staleness). + // Result = max(cached, platform) ± bump. + // (cached, cached_plat, platform_returns, bump, stale_by_drift, expected, expected_stored_plat) + #[test_case(10, 10, 15, false, false, 15, 15 ; "stale uses higher platform")] + #[test_case(10, 10, 15, true, false, 16, 15 ; "stale uses higher platform with bump")] + #[test_case(20, 10, 15, false, false, 20, 15 ; "preserves higher cached nonce")] + #[test_case(20, 10, 15, true, false, 21, 15 ; "preserves higher cached nonce with bump")] + #[test_case(10, 5, 50, false, false, 50, 50 ; "platform much higher replaces cache")] + #[test_case(10, 5, 50, true, false, 51, 50 ; "platform much higher replaces cache with bump")] + #[test_case(100, 90, 50 | (5 << 40), false, false, 100, 50 ; "upper bits stripped cache preserved")] + #[test_case(10 + MAX_MISSING_IDENTITY_REVISIONS, 10, 10 + MAX_MISSING_IDENTITY_REVISIONS, false, true, 10 + MAX_MISSING_IDENTITY_REVISIONS, 10 + MAX_MISSING_IDENTITY_REVISIONS ; "drift at max triggers refetch")] + #[tokio::test] + async fn cache_refetch( + cached_nonce: u64, + cached_platform: u64, + platform_returns: u64, + bump: bool, + stale_by_drift: bool, + expected: u64, + expected_stored_platform: u64, + ) { + let (timestamp, settings) = if stale_by_drift { + (now_s(), never_stale()) + } else { + (0, PutSettings::default()) + }; + let cache = seeded_cache(1, cached_nonce, timestamp, cached_platform); + let result = fetch(&cache, bump, &settings, platform_returns).await; + assert_eq!(result, expected); + let guard = cache.lock().await; + let &(_, _, platform) = guard.get(&1u32).unwrap(); + assert_eq!(platform, expected_stored_platform); + } + + // --- Multiple sequential bumps from cache --- + #[tokio::test] + async fn multiple_bumps_from_fresh_cache() { + let cache = seeded_cache(1, 5, now_s(), 5); + let settings = never_stale(); + for expected in 6..=10 { + let result = super::super::Sdk::get_or_fetch_nonce( + &cache, + 1u32, + true, + &settings, + || async { panic!("should not fetch from platform") }, + ) + .await + .unwrap(); + assert_eq!(result, expected); + } + let guard = cache.lock().await; + let &(nonce, _, platform) = guard.get(&1u32).unwrap(); + assert_eq!(nonce, 10); + assert_eq!(platform, 5, "last platform nonce unchanged through bumps"); + } + + // --- Fetch error propagates, cache untouched --- + #[tokio::test] + async fn fetch_error_propagates_and_cache_unchanged() { + let cache = seeded_cache(1, 10, 0, 10); + let result = super::super::Sdk::get_or_fetch_nonce( + &cache, + 1u32, + false, + &Default::default(), + || async { Err(crate::Error::Generic("platform unavailable".to_string())) }, + ) + .await; + assert!(result.is_err()); + let guard = cache.lock().await; + let &(nonce, ts, platform) = guard.get(&1u32).unwrap(); + assert_eq!(nonce, 10); + assert_eq!(ts, 0, "timestamp should not have changed"); + assert_eq!(platform, 10); + } + + // --- refresh_identity_nonce marks entry stale --- + #[tokio::test] + async fn refresh_marks_entries_stale_forcing_refetch() { + let cache = seeded_cache(1, 10, now_s(), 10); + let settings = never_stale(); + + // Confirm cache is served. + let result = + super::super::Sdk::get_or_fetch_nonce(&cache, 1u32, false, &settings, || async { + panic!("should not fetch") + }) + .await + .unwrap(); + assert_eq!(result, 10); + + // Simulate refresh_identity_nonce: set timestamp to 0. + cache.lock().await.get_mut(&1u32).unwrap().1 = 0; + + // With default settings (stale_time=1200s), timestamp=0 is stale. + let result = fetch(&cache, false, &PutSettings::default(), 20).await; + assert_eq!(result, 20, "should re-fetch after refresh"); + } + + // --- Different keys are isolated --- + #[tokio::test] + async fn different_keys_are_isolated() { + let cache = empty_cache(); + let settings = never_stale(); + + // Seed two keys via fetches. + for (key, val) in [(1u32, 100u64), (2u32, 200u64)] { + super::super::Sdk::get_or_fetch_nonce( + &cache, + key, + true, + &Default::default(), + || async move { Ok(val) }, + ) + .await + .unwrap(); + } + + // Read back: each key has its own value, served from cache. + for (key, expected) in [(1u32, 101u64), (2u32, 201u64)] { + let result = super::super::Sdk::get_or_fetch_nonce( + &cache, + key, + false, + &settings, + || async { panic!("should serve from cache") }, + ) + .await + .unwrap(); + assert_eq!(result, expected); + } + } + + // --- Concurrent cache update during fetch --- + // (concurrent_nonce, platform_returns, bump, expected) + #[test_case(15, 10, false, 15 ; "concurrent higher wins")] + #[test_case(5, 20, true, 21 ; "platform higher wins with bump")] + #[tokio::test] + async fn concurrent_update( + concurrent_nonce: u64, + platform_returns: u64, + bump: bool, + expected: u64, + ) { + let cache = Arc::new(empty_cache()); + let cache_clone = Arc::clone(&cache); + let result = super::super::Sdk::get_or_fetch_nonce( + &cache, + 1u32, + bump, + &Default::default(), + || async move { + // Simulate another caller updating cache during our fetch. + cache_clone + .lock() + .await + .insert(1u32, (concurrent_nonce, now_s(), concurrent_nonce)); + Ok(platform_returns) + }, + ) + .await + .unwrap(); + assert_eq!(result, expected); + } + } } From 603d9ec95e4caf3087c065f92e123fdb0fb14280 Mon Sep 17 00:00:00 2001 From: Lukasz Klimek <842586+lklimek@users.noreply.github.com> Date: Mon, 23 Feb 2026 10:06:05 +0100 Subject: [PATCH 08/17] refactor(sdk): encapsulate nonce cache into dedicated NonceCache struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address PR #3111 review comments from shumkov: - Replace NonceCacheEntry tuple with named struct (current_nonce, last_fetch_timestamp, last_fetched_platform_nonce) - Replace (Identifier, Identifier) key with IdentityContractPair struct - Create NonceCache struct encapsulating all nonce logic with per-map Mutex locking and configurable stale time - Remove InternalSdkCache (was nonce-only, pointless wrapper) - Make Sdk nonce methods thin wrappers delegating to NonceCache - Move get_or_fetch_nonce, get_current_time_seconds, DEFAULT_IDENTITY_NONCE_STALE_TIME_S, and tests to internal_cache/mod.rs - Remove LastQueryTimestamp and StalenessInSeconds type aliases from sdk.rs Public API unchanged — all callers continue using sdk.get_identity_nonce(), sdk.get_identity_contract_nonce(), and sdk.refresh_identity_nonce(). Co-Authored-By: Claude Opus 4.6 --- packages/rs-sdk/src/internal_cache/mod.rs | 568 ++++++++++++++++++++-- packages/rs-sdk/src/sdk.rs | 459 ++--------------- 2 files changed, 561 insertions(+), 466 deletions(-) diff --git a/packages/rs-sdk/src/internal_cache/mod.rs b/packages/rs-sdk/src/internal_cache/mod.rs index 2704dec71c..7932248fd2 100644 --- a/packages/rs-sdk/src/internal_cache/mod.rs +++ b/packages/rs-sdk/src/internal_cache/mod.rs @@ -1,48 +1,540 @@ +use crate::error::Error; +use crate::platform::transition::put_settings::PutSettings; use crate::platform::Identifier; -use crate::sdk::LastQueryTimestamp; +use dpp::identity::identity_nonce::{IDENTITY_NONCE_VALUE_FILTER, MAX_MISSING_IDENTITY_REVISIONS}; use dpp::prelude::IdentityNonce; use std::collections::BTreeMap; +use std::future::Future; use tokio::sync::Mutex; -/// Cached nonce state: `(current_nonce, last_fetch_timestamp, last_platform_nonce)`. +#[cfg(not(target_arch = "wasm32"))] +use std::time::{SystemTime, UNIX_EPOCH}; + +/// The default identity nonce stale time in seconds (20 minutes). +const DEFAULT_IDENTITY_NONCE_STALE_TIME_S: u64 = 1200; + +/// Cached nonce state for a single identity or identity-contract pair. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct NonceCacheEntry { + pub(crate) current_nonce: IdentityNonce, + pub(crate) last_fetch_timestamp: u64, + pub(crate) last_fetched_platform_nonce: IdentityNonce, +} + +/// Compound key for identity-contract nonce lookups. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub(crate) struct IdentityContractPair { + pub(crate) identity_id: Identifier, + pub(crate) contract_id: Identifier, +} + +impl From<(Identifier, Identifier)> for IdentityContractPair { + fn from((identity_id, contract_id): (Identifier, Identifier)) -> Self { + Self { + identity_id, + contract_id, + } + } +} + +/// Nonce cache for identity and identity-contract nonces. /// -/// - `current_nonce` — the nonce value the SDK will use next (may be bumped -/// ahead of what Platform reported). -/// - `last_fetch_timestamp` — when we last fetched from Platform. -/// - `last_platform_nonce` — the value Platform returned on the last fetch -/// (after stripping upper bits). Used to detect when local bumps have -/// drifted too far ahead, triggering a re-fetch. -pub(crate) type NonceCacheEntry = (IdentityNonce, LastQueryTimestamp, IdentityNonce); - -/// This is a cache that is internal to the SDK that the user does not have to worry about -pub struct InternalSdkCache { - /// This is the identity nonce counter for the sdk - /// The sdk will automatically manage this counter for the user. - /// When the sdk user requests to update identities, withdraw or transfer - /// this will be automatically updated - /// This update can involve querying Platform for the current identity nonce - /// If the sdk user requests to put a state transition the counter is checked and either - /// returns an error or is updated. - pub(crate) identity_nonce_counter: tokio::sync::Mutex>, - - /// This is the identity contract nonce counter for the sdk - /// The sdk will automatically manage this counter for the user. - /// When the sdk user requests to put documents this will be automatically updated - /// This update can involve querying Platform for the current identity contract nonce - /// If the sdk user requests to put a state transition the counter is checked and either - /// returns an error or is updated. - pub(crate) identity_contract_nonce_counter: - tokio::sync::Mutex>, -} - -impl Default for InternalSdkCache { +/// Encapsulates all nonce caching logic previously spread across +/// `Sdk`. Uses per-map locking so identity +/// and contract nonce queries don't block each other. +pub(crate) struct NonceCache { + identity_nonces: Mutex>, + contract_nonces: Mutex>, + default_stale_time_s: u64, +} + +impl Default for NonceCache { fn default() -> Self { - InternalSdkCache { - identity_nonce_counter: Mutex::new(BTreeMap::::new()), - identity_contract_nonce_counter: Mutex::new(BTreeMap::< - (Identifier, Identifier), - NonceCacheEntry, - >::new()), + Self { + identity_nonces: Mutex::new(BTreeMap::new()), + contract_nonces: Mutex::new(BTreeMap::new()), + default_stale_time_s: DEFAULT_IDENTITY_NONCE_STALE_TIME_S, } } } + +/// Helper function to get current timestamp in seconds. +/// Works in both native and WASM environments. +fn get_current_time_seconds() -> u64 { + #[cfg(not(target_arch = "wasm32"))] + { + match SystemTime::now().duration_since(UNIX_EPOCH) { + Ok(n) => n.as_secs(), + Err(_) => panic!("SystemTime before UNIX EPOCH!"), + } + } + #[cfg(target_arch = "wasm32")] + { + // In WASM, we use JavaScript's Date.now() which returns milliseconds + // We need to convert to seconds + (js_sys::Date::now() / 1000.0) as u64 + } +} + +impl NonceCache { + /// Get or fetch identity nonce from cache. + pub(crate) async fn get_identity_nonce( + &self, + identity_id: Identifier, + bump_first: bool, + settings: &PutSettings, + fetch_from_platform: F, + ) -> Result + where + F: FnOnce() -> Fut, + Fut: Future>, + { + Self::get_or_fetch_nonce( + &self.identity_nonces, + identity_id, + bump_first, + settings, + self.default_stale_time_s, + fetch_from_platform, + ) + .await + } + + /// Get or fetch identity-contract nonce from cache. + pub(crate) async fn get_identity_contract_nonce( + &self, + identity_id: Identifier, + contract_id: Identifier, + bump_first: bool, + settings: &PutSettings, + fetch_from_platform: F, + ) -> Result + where + F: FnOnce() -> Fut, + Fut: Future>, + { + Self::get_or_fetch_nonce( + &self.contract_nonces, + IdentityContractPair { + identity_id, + contract_id, + }, + bump_first, + settings, + self.default_stale_time_s, + fetch_from_platform, + ) + .await + } + + /// Marks all nonce cache entries for the given identity as stale. + pub(crate) async fn refresh(&self, identity_id: &Identifier) { + { + let mut guard = self.identity_nonces.lock().await; + if let Some(entry) = guard.get_mut(identity_id) { + entry.last_fetch_timestamp = 0; + } + } + { + let mut guard = self.contract_nonces.lock().await; + for (pair, entry) in guard.iter_mut() { + if pair.identity_id == *identity_id { + entry.last_fetch_timestamp = 0; + } + } + } + } + + /// Shared nonce cache logic. Checks staleness and drift, fetches from + /// Platform when needed, and maintains the cache entry. + /// + /// The cache lock is held only briefly to read/write the entry; it is + /// **not** held across the async `fetch_from_platform` call so that other + /// callers are not blocked during the network round-trip. + async fn get_or_fetch_nonce( + cache: &Mutex>, + key: K, + bump_first: bool, + settings: &PutSettings, + default_stale_time_s: u64, + fetch_from_platform: F, + ) -> Result + where + F: FnOnce() -> Fut, + Fut: Future>, + { + let current_time_s = get_current_time_seconds(); + + // Phase 1: Try to serve from cache without a network fetch. + { + let mut cache_guard = cache.lock().await; + if let Some(entry) = cache_guard.get(&key).copied() { + let stale_by_time = entry.last_fetch_timestamp + < current_time_s.saturating_sub( + settings + .identity_nonce_stale_time_s + .unwrap_or(default_stale_time_s), + ); + // Precautionary: use >= so we stay within Platform's own + // MAX_MISSING_IDENTITY_REVISIONS limit. + let drifted = entry + .current_nonce + .saturating_sub(entry.last_fetched_platform_nonce) + >= MAX_MISSING_IDENTITY_REVISIONS; + + if !stale_by_time && !drifted { + // Cache is fresh -- serve from it. + if bump_first { + let insert_nonce = entry.current_nonce + 1; + cache_guard.insert( + key, + NonceCacheEntry { + current_nonce: insert_nonce, + last_fetch_timestamp: current_time_s, + last_fetched_platform_nonce: entry.last_fetched_platform_nonce, + }, + ); + return Ok(insert_nonce); + } else { + return Ok(entry.current_nonce); + } + } + } + } // lock released -- entry was absent or stale + + // Phase 2: Fetch from Platform without holding the cache lock. + // Strip the upper "missing revisions" bits immediately so the + // cache only ever holds plain nonce values. + let platform_nonce = fetch_from_platform().await? & IDENTITY_NONCE_VALUE_FILTER; + + // Phase 3: Re-acquire lock and update cache. + // Re-read the entry since another caller may have updated the cache + // while we were fetching. Keep the higher of cached vs Platform + // nonce to avoid regression (Platform may not have indexed a recent + // successful broadcast yet). + let mut cache_guard = cache.lock().await; + let base_nonce = match cache_guard.get(&key) { + Some(entry) if entry.current_nonce > platform_nonce => entry.current_nonce, + _ => platform_nonce, + }; + let insert_nonce = if bump_first { + base_nonce + 1 + } else { + base_nonce + }; + cache_guard.insert( + key, + NonceCacheEntry { + current_nonce: insert_nonce, + last_fetch_timestamp: current_time_s, + last_fetched_platform_nonce: platform_nonce, + }, + ); + Ok(insert_nonce) + } +} + +#[cfg(test)] +mod nonce_cache_tests { + use super::*; + use crate::platform::transition::put_settings::PutSettings; + use dpp::identity::identity_nonce::{ + IDENTITY_NONCE_VALUE_FILTER, MAX_MISSING_IDENTITY_REVISIONS, + }; + use std::collections::BTreeMap; + use std::sync::Arc; + use test_case::test_case; + use tokio::sync::Mutex; + + /// Helper: shorthand for get_or_fetch_nonce. + async fn fetch( + cache: &Mutex>, + bump: bool, + settings: &PutSettings, + platform_nonce: u64, + ) -> u64 { + NonceCache::get_or_fetch_nonce( + cache, + 1u32, + bump, + settings, + DEFAULT_IDENTITY_NONCE_STALE_TIME_S, + || async move { Ok(platform_nonce) }, + ) + .await + .unwrap() + } + + fn now_s() -> u64 { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_secs() + } + + fn empty_cache() -> Mutex> { + Mutex::new(BTreeMap::new()) + } + + fn seeded_cache( + key: u32, + current_nonce: u64, + timestamp: u64, + last_platform_nonce: u64, + ) -> Mutex> { + let mut map = BTreeMap::new(); + map.insert( + key, + NonceCacheEntry { + current_nonce, + last_fetch_timestamp: timestamp, + last_fetched_platform_nonce: last_platform_nonce, + }, + ); + Mutex::new(map) + } + + fn never_stale() -> PutSettings { + PutSettings { + identity_nonce_stale_time_s: Some(u64::MAX), + ..Default::default() + } + } + + // --- Empty cache: platform fetched, result = platform +/- bump --- + // (platform_nonce, bump, expected_result, expected_stored_platform) + #[test_case(42, false, 42, 42 ; "basic no bump")] + #[test_case(42, true, 43, 42 ; "basic with bump")] + #[test_case(0, false, 0, 0 ; "zero no bump")] + #[test_case(0, true, 1, 0 ; "zero with bump")] + #[test_case(IDENTITY_NONCE_VALUE_FILTER, false, IDENTITY_NONCE_VALUE_FILTER, IDENTITY_NONCE_VALUE_FILTER ; "filter max no bump")] + #[test_case(IDENTITY_NONCE_VALUE_FILTER, true, IDENTITY_NONCE_VALUE_FILTER + 1, IDENTITY_NONCE_VALUE_FILTER ; "filter max with bump overflows")] + #[test_case(42 | (3 << 40), false, 42, 42 ; "upper bits stripped no bump")] + #[test_case(42 | (3 << 40), true, 43, 42 ; "upper bits stripped with bump")] + #[tokio::test] + async fn empty_cache_fetch( + platform_nonce: u64, + bump: bool, + expected: u64, + expected_stored_platform: u64, + ) { + let cache = empty_cache(); + let result = fetch(&cache, bump, &Default::default(), platform_nonce).await; + assert_eq!(result, expected); + let guard = cache.lock().await; + let entry = guard.get(&1u32).unwrap(); + assert_eq!(entry.current_nonce, expected); + assert_eq!(entry.last_fetched_platform_nonce, expected_stored_platform); + } + + // --- Fresh cache hit (no platform fetch) --- + // (cached, last_platform, bump, expected) + #[test_case(10, 10, false, 10 ; "no bump returns cached")] + #[test_case(10, 10, true, 11 ; "bump increments cached")] + #[test_case(10 + MAX_MISSING_IDENTITY_REVISIONS - 1, 10, false, 10 + MAX_MISSING_IDENTITY_REVISIONS - 1 ; "drift just below max serves from cache")] + #[tokio::test] + async fn fresh_cache_hit(cached_nonce: u64, last_platform: u64, bump: bool, expected: u64) { + let cache = seeded_cache(1, cached_nonce, now_s(), last_platform); + let settings = never_stale(); + let result = NonceCache::get_or_fetch_nonce( + &cache, + 1u32, + bump, + &settings, + DEFAULT_IDENTITY_NONCE_STALE_TIME_S, + || async { panic!("should not fetch from platform") }, + ) + .await + .unwrap(); + assert_eq!(result, expected); + } + + // --- Stale or drifted cache triggers re-fetch --- + // (cached, cached_plat, platform_returns, bump, stale_by_drift, expected, expected_stored_plat) + #[test_case(10, 10, 15, false, false, 15, 15 ; "stale uses higher platform")] + #[test_case(10, 10, 15, true, false, 16, 15 ; "stale uses higher platform with bump")] + #[test_case(20, 10, 15, false, false, 20, 15 ; "preserves higher cached nonce")] + #[test_case(20, 10, 15, true, false, 21, 15 ; "preserves higher cached nonce with bump")] + #[test_case(10, 5, 50, false, false, 50, 50 ; "platform much higher replaces cache")] + #[test_case(10, 5, 50, true, false, 51, 50 ; "platform much higher replaces cache with bump")] + #[test_case(100, 90, 50 | (5 << 40), false, false, 100, 50 ; "upper bits stripped cache preserved")] + #[test_case(10 + MAX_MISSING_IDENTITY_REVISIONS, 10, 10 + MAX_MISSING_IDENTITY_REVISIONS, false, true, 10 + MAX_MISSING_IDENTITY_REVISIONS, 10 + MAX_MISSING_IDENTITY_REVISIONS ; "drift at max triggers refetch")] + #[tokio::test] + async fn cache_refetch( + cached_nonce: u64, + cached_platform: u64, + platform_returns: u64, + bump: bool, + stale_by_drift: bool, + expected: u64, + expected_stored_platform: u64, + ) { + let (timestamp, settings) = if stale_by_drift { + (now_s(), never_stale()) + } else { + (0, PutSettings::default()) + }; + let cache = seeded_cache(1, cached_nonce, timestamp, cached_platform); + let result = fetch(&cache, bump, &settings, platform_returns).await; + assert_eq!(result, expected); + let guard = cache.lock().await; + let entry = guard.get(&1u32).unwrap(); + assert_eq!(entry.last_fetched_platform_nonce, expected_stored_platform); + } + + // --- Multiple sequential bumps from cache --- + #[tokio::test] + async fn multiple_bumps_from_fresh_cache() { + let cache = seeded_cache(1, 5, now_s(), 5); + let settings = never_stale(); + for expected in 6..=10 { + let result = NonceCache::get_or_fetch_nonce( + &cache, + 1u32, + true, + &settings, + DEFAULT_IDENTITY_NONCE_STALE_TIME_S, + || async { panic!("should not fetch from platform") }, + ) + .await + .unwrap(); + assert_eq!(result, expected); + } + let guard = cache.lock().await; + let entry = guard.get(&1u32).unwrap(); + assert_eq!(entry.current_nonce, 10); + assert_eq!( + entry.last_fetched_platform_nonce, 5, + "last platform nonce unchanged through bumps" + ); + } + + // --- Fetch error propagates, cache untouched --- + #[tokio::test] + async fn fetch_error_propagates_and_cache_unchanged() { + let cache = seeded_cache(1, 10, 0, 10); + let result = NonceCache::get_or_fetch_nonce( + &cache, + 1u32, + false, + &Default::default(), + DEFAULT_IDENTITY_NONCE_STALE_TIME_S, + || async { Err(crate::Error::Generic("platform unavailable".to_string())) }, + ) + .await; + assert!(result.is_err()); + let guard = cache.lock().await; + let entry = guard.get(&1u32).unwrap(); + assert_eq!(entry.current_nonce, 10); + assert_eq!( + entry.last_fetch_timestamp, 0, + "timestamp should not have changed" + ); + assert_eq!(entry.last_fetched_platform_nonce, 10); + } + + // --- refresh marks entry stale --- + #[tokio::test] + async fn refresh_marks_entries_stale_forcing_refetch() { + let cache = seeded_cache(1, 10, now_s(), 10); + let settings = never_stale(); + + // Confirm cache is served. + let result = NonceCache::get_or_fetch_nonce( + &cache, + 1u32, + false, + &settings, + DEFAULT_IDENTITY_NONCE_STALE_TIME_S, + || async { panic!("should not fetch") }, + ) + .await + .unwrap(); + assert_eq!(result, 10); + + // Simulate refresh: set timestamp to 0. + cache + .lock() + .await + .get_mut(&1u32) + .unwrap() + .last_fetch_timestamp = 0; + + // With default settings (stale_time=1200s), timestamp=0 is stale. + let result = fetch(&cache, false, &PutSettings::default(), 20).await; + assert_eq!(result, 20, "should re-fetch after refresh"); + } + + // --- Different keys are isolated --- + #[tokio::test] + async fn different_keys_are_isolated() { + let cache = empty_cache(); + let settings = never_stale(); + + // Seed two keys via fetches. + for (key, val) in [(1u32, 100u64), (2u32, 200u64)] { + NonceCache::get_or_fetch_nonce( + &cache, + key, + true, + &Default::default(), + DEFAULT_IDENTITY_NONCE_STALE_TIME_S, + || async move { Ok(val) }, + ) + .await + .unwrap(); + } + + // Read back: each key has its own value, served from cache. + for (key, expected) in [(1u32, 101u64), (2u32, 201u64)] { + let result = NonceCache::get_or_fetch_nonce( + &cache, + key, + false, + &settings, + DEFAULT_IDENTITY_NONCE_STALE_TIME_S, + || async { panic!("should serve from cache") }, + ) + .await + .unwrap(); + assert_eq!(result, expected); + } + } + + // --- Concurrent cache update during fetch --- + // (concurrent_nonce, platform_returns, bump, expected) + #[test_case(15, 10, false, 15 ; "concurrent higher wins")] + #[test_case(5, 20, true, 21 ; "platform higher wins with bump")] + #[tokio::test] + async fn concurrent_update( + concurrent_nonce: u64, + platform_returns: u64, + bump: bool, + expected: u64, + ) { + let cache = Arc::new(empty_cache()); + let cache_clone = Arc::clone(&cache); + let result = NonceCache::get_or_fetch_nonce( + &cache, + 1u32, + bump, + &Default::default(), + DEFAULT_IDENTITY_NONCE_STALE_TIME_S, + || async move { + // Simulate another caller updating cache during our fetch. + cache_clone.lock().await.insert( + 1u32, + NonceCacheEntry { + current_nonce: concurrent_nonce, + last_fetch_timestamp: now_s(), + last_fetched_platform_nonce: concurrent_nonce, + }, + ); + Ok(platform_returns) + }, + ) + .await + .unwrap(); + assert_eq!(result, expected); + } +} diff --git a/packages/rs-sdk/src/sdk.rs b/packages/rs-sdk/src/sdk.rs index f080a1ce06..55b0dea747 100644 --- a/packages/rs-sdk/src/sdk.rs +++ b/packages/rs-sdk/src/sdk.rs @@ -1,7 +1,7 @@ //! [Sdk] entrypoint to Dash Platform. use crate::error::{Error, StaleNodeError}; -use crate::internal_cache::{InternalSdkCache, NonceCacheEntry}; +use crate::internal_cache::NonceCache; use crate::mock::MockResponse; #[cfg(feature = "mocks")] use crate::mock::{provider::GrpcContextProvider, MockDashPlatformSdk}; @@ -18,7 +18,6 @@ use dash_context_provider::MockContextProvider; use dpp::bincode; use dpp::bincode::error::DecodeError; use dpp::dashcore::Network; -use dpp::identity::identity_nonce::{IDENTITY_NONCE_VALUE_FILTER, MAX_MISSING_IDENTITY_REVISIONS}; use dpp::prelude::IdentityNonce; use dpp::version::{PlatformVersion, PlatformVersionCurrentVersion}; use drive::grovedb::operations::proof::GroveDBProof; @@ -32,7 +31,6 @@ pub use rs_dapi_client::RequestSettings; use rs_dapi_client::{ transport::TransportRequest, DapiClient, DapiClientError, DapiRequestExecutor, ExecutionResult, }; -use std::collections::BTreeMap; use std::fmt::Debug; #[cfg(feature = "mocks")] use std::num::NonZeroUsize; @@ -41,11 +39,8 @@ use std::path::Path; use std::path::PathBuf; use std::sync::atomic::Ordering; use std::sync::{atomic, Arc}; -#[cfg(not(target_arch = "wasm32"))] -use std::time::{SystemTime, UNIX_EPOCH}; -use tokio::sync::Mutex; #[cfg(feature = "mocks")] -use tokio::sync::MutexGuard; +use tokio::sync::{Mutex, MutexGuard}; use tokio_util::sync::{CancellationToken, WaitForCancellationFuture}; use zeroize::Zeroizing; @@ -55,8 +50,6 @@ pub const DEFAULT_CONTRACT_CACHE_SIZE: usize = 100; pub const DEFAULT_TOKEN_CONFIG_CACHE_SIZE: usize = 100; /// How many quorum public keys fit in the cache. pub const DEFAULT_QUORUM_PUBLIC_KEYS_CACHE_SIZE: usize = 100; -/// The default identity nonce stale time in seconds -pub const DEFAULT_IDENTITY_NONCE_STALE_TIME_S: u64 = 1200; //20 minutes /// The default metadata time tolerance for checkpoint queries in milliseconds const ADDRESS_STATE_TIME_TOLERANCE_MS: u64 = 31 * 60 * 1000; @@ -71,12 +64,6 @@ const DEFAULT_REQUEST_SETTINGS: RequestSettings = RequestSettings { max_decoding_message_size: None, }; -/// a type to represent staleness in seconds -pub type StalenessInSeconds = u64; - -/// The last query timestamp -pub type LastQueryTimestamp = u64; - /// Dash Platform SDK /// /// This is the main entry point for interacting with Dash Platform. @@ -111,8 +98,8 @@ pub struct Sdk { /// This is set to `true` by default. `false` is not implemented yet. proofs: bool, - /// An internal SDK cache managed exclusively by the SDK - internal_cache: Arc, + /// Nonce cache managed exclusively by the SDK. + nonce_cache: Arc, /// Context provider used by the SDK. /// @@ -151,7 +138,7 @@ impl Clone for Sdk { network: self.network, inner: self.inner.clone(), proofs: self.proofs, - internal_cache: Arc::clone(&self.internal_cache), + nonce_cache: Arc::clone(&self.nonce_cache), context_provider: ArcSwapOption::new(self.context_provider.load_full()), cancel_token: self.cancel_token.clone(), metadata_last_seen_height: Arc::clone(&self.metadata_last_seen_height), @@ -211,24 +198,6 @@ enum SdkInstance { }, } -/// Helper function to get current timestamp in seconds -/// Works in both native and WASM environments -fn get_current_time_seconds() -> u64 { - #[cfg(not(target_arch = "wasm32"))] - { - match SystemTime::now().duration_since(UNIX_EPOCH) { - Ok(n) => n.as_secs(), - Err(_) => panic!("SystemTime before UNIX EPOCH!"), - } - } - #[cfg(target_arch = "wasm32")] - { - // In WASM, we use JavaScript's Date.now() which returns milliseconds - // We need to convert to seconds - (js_sys::Date::now() / 1000.0) as u64 - } -} - impl Sdk { /// Initialize Dash Platform SDK in mock mode. /// @@ -369,12 +338,9 @@ impl Sdk { settings: Option, ) -> Result { let settings = settings.unwrap_or_default(); - let nonce = Self::get_or_fetch_nonce( - &self.internal_cache.identity_nonce_counter, - identity_id, - bump_first, - &settings, - || async { + let nonce = self + .nonce_cache + .get_identity_nonce(identity_id, bump_first, &settings, || async { Ok(IdentityNonceFetcher::fetch_with_settings( self, identity_id, @@ -383,9 +349,8 @@ impl Sdk { .await? .unwrap_or(IdentityNonceFetcher(0)) .0) - }, - ) - .await?; + }) + .await?; tracing::trace!( identity_id = %identity_id, @@ -409,124 +374,31 @@ impl Sdk { settings: Option, ) -> Result { let settings = settings.unwrap_or_default(); - Self::get_or_fetch_nonce( - &self.internal_cache.identity_contract_nonce_counter, - (identity_id, contract_id), - bump_first, - &settings, - || async { - Ok(IdentityContractNonceFetcher::fetch_with_settings( - self, - (identity_id, contract_id), - settings.request_settings, - ) - .await? - .unwrap_or(IdentityContractNonceFetcher(0)) - .0) - }, - ) - .await - } - - /// Shared nonce cache logic. Checks staleness and drift, fetches from - /// Platform when needed, and maintains the cache entry. - /// - /// The cache lock is held only briefly to read/write the entry; it is - /// **not** held across the async `fetch_from_platform` call so that other - /// callers are not blocked during the network round-trip. - async fn get_or_fetch_nonce( - cache: &Mutex>, - key: K, - bump_first: bool, - settings: &PutSettings, - fetch_from_platform: F, - ) -> Result - where - F: FnOnce() -> Fut, - Fut: std::future::Future>, - { - let current_time_s = get_current_time_seconds(); - - // Phase 1: Try to serve from cache without a network fetch. - { - let mut cache_guard = cache.lock().await; - if let Some(&(current_nonce, last_query_time, last_platform_nonce)) = - cache_guard.get(&key) - { - let stale_by_time = last_query_time - < current_time_s.saturating_sub( - settings - .identity_nonce_stale_time_s - .unwrap_or(DEFAULT_IDENTITY_NONCE_STALE_TIME_S), - ); - // Precautionary: use >= so we stay within Platform's own - // MAX_MISSING_IDENTITY_REVISIONS limit. - let drifted = current_nonce.saturating_sub(last_platform_nonce) - >= MAX_MISSING_IDENTITY_REVISIONS; - - if !stale_by_time && !drifted { - // Cache is fresh — serve from it. - if bump_first { - let insert_nonce = current_nonce + 1; - cache_guard - .insert(key, (insert_nonce, current_time_s, last_platform_nonce)); - return Ok(insert_nonce); - } else { - return Ok(current_nonce); - } - } - } - } // lock released — entry was absent or stale - - // Phase 2: Fetch from Platform without holding the cache lock. - // Strip the upper "missing revisions" bits immediately so the - // cache only ever holds plain nonce values. - let platform_nonce = fetch_from_platform().await? & IDENTITY_NONCE_VALUE_FILTER; - - // Phase 3: Re-acquire lock and update cache. - // Re-read the entry since another caller may have updated the cache - // while we were fetching. Keep the higher of cached vs Platform - // nonce to avoid regression (Platform may not have indexed a recent - // successful broadcast yet). - let mut cache_guard = cache.lock().await; - let base_nonce = match cache_guard.get(&key) { - Some(&(current_nonce, _, _)) if current_nonce > platform_nonce => current_nonce, - _ => platform_nonce, - }; - let insert_nonce = if bump_first { - base_nonce + 1 - } else { - base_nonce - }; - cache_guard.insert(key, (insert_nonce, current_time_s, platform_nonce)); - Ok(insert_nonce) + self.nonce_cache + .get_identity_contract_nonce( + identity_id, + contract_id, + bump_first, + &settings, + || async { + Ok(IdentityContractNonceFetcher::fetch_with_settings( + self, + (identity_id, contract_id), + settings.request_settings, + ) + .await? + .unwrap_or(IdentityContractNonceFetcher(0)) + .0) + }, + ) + .await } /// Marks identity nonce cache entries as stale so they are re-fetched from /// Platform on the next call to [`get_identity_nonce`] or /// [`get_identity_contract_nonce`]. pub async fn refresh_identity_nonce(&self, identity_id: &Identifier) { - { - let mut identity_nonce_counter = - self.internal_cache.identity_nonce_counter.lock().await; - if let Some((_, timestamp, _)) = identity_nonce_counter.get_mut(identity_id) { - *timestamp = 0; - } - } - { - let mut identity_contract_nonce_counter = self - .internal_cache - .identity_contract_nonce_counter - .lock() - .await; - for ((cached_identity_id, _), (_, timestamp, _)) in - identity_contract_nonce_counter.iter_mut() - { - if cached_identity_id == identity_id { - *timestamp = 0; - } - } - } + self.nonce_cache.refresh(identity_id).await; } /// Return [Dash Platform version](PlatformVersion) information used by this SDK. @@ -1043,7 +915,7 @@ impl SdkBuilder { proofs:self.proofs, context_provider: ArcSwapOption::new( self.context_provider.map(Arc::new)), cancel_token: self.cancel_token, - internal_cache: Default::default(), + nonce_cache: Default::default(), // Note: in the future, we need to securely initialize initial height during Sdk bootstrap or first request. metadata_last_seen_height: Arc::new(atomic::AtomicU64::new(0)), metadata_height_tolerance: self.metadata_height_tolerance, @@ -1110,7 +982,7 @@ impl SdkBuilder { }, dump_dir: self.dump_dir.clone(), proofs:self.proofs, - internal_cache: Default::default(), + nonce_cache: Default::default(), context_provider: ArcSwapOption::new(Some(Arc::new(context_provider))), cancel_token: self.cancel_token, metadata_last_seen_height: Arc::new(atomic::AtomicU64::new(0)), @@ -1305,273 +1177,4 @@ mod test { assert_eq!(result.is_err(), expect_err); } - - // ---- Nonce cache edge-case tests for get_or_fetch_nonce ---- - - mod nonce_cache { - use super::*; - use crate::internal_cache::NonceCacheEntry; - use crate::platform::transition::put_settings::PutSettings; - use dpp::identity::identity_nonce::{ - IDENTITY_NONCE_VALUE_FILTER, MAX_MISSING_IDENTITY_REVISIONS, - }; - use std::collections::BTreeMap; - use test_case::test_case; - use tokio::sync::Mutex; - - /// Helper: shorthand for get_or_fetch_nonce. - async fn fetch( - cache: &Mutex>, - bump: bool, - settings: &PutSettings, - platform_nonce: u64, - ) -> u64 { - super::super::Sdk::get_or_fetch_nonce(cache, 1u32, bump, settings, || async move { - Ok(platform_nonce) - }) - .await - .unwrap() - } - - fn now_s() -> u64 { - std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_secs() - } - - fn empty_cache() -> Mutex> { - Mutex::new(BTreeMap::new()) - } - - fn seeded_cache( - key: u32, - current_nonce: u64, - timestamp: u64, - last_platform_nonce: u64, - ) -> Mutex> { - let mut map = BTreeMap::new(); - map.insert(key, (current_nonce, timestamp, last_platform_nonce)); - Mutex::new(map) - } - - fn never_stale() -> PutSettings { - PutSettings { - identity_nonce_stale_time_s: Some(u64::MAX), - ..Default::default() - } - } - - // --- Empty cache: platform fetched, result = platform ± bump --- - // (platform_nonce, bump, expected_result, expected_stored_platform) - #[test_case(42, false, 42, 42 ; "basic no bump")] - #[test_case(42, true, 43, 42 ; "basic with bump")] - #[test_case(0, false, 0, 0 ; "zero no bump")] - #[test_case(0, true, 1, 0 ; "zero with bump")] - #[test_case(IDENTITY_NONCE_VALUE_FILTER, false, IDENTITY_NONCE_VALUE_FILTER, IDENTITY_NONCE_VALUE_FILTER ; "filter max no bump")] - #[test_case(IDENTITY_NONCE_VALUE_FILTER, true, IDENTITY_NONCE_VALUE_FILTER + 1, IDENTITY_NONCE_VALUE_FILTER ; "filter max with bump overflows")] - #[test_case(42 | (3 << 40), false, 42, 42 ; "upper bits stripped no bump")] - #[test_case(42 | (3 << 40), true, 43, 42 ; "upper bits stripped with bump")] - #[tokio::test] - async fn empty_cache_fetch( - platform_nonce: u64, - bump: bool, - expected: u64, - expected_stored_platform: u64, - ) { - let cache = empty_cache(); - let result = fetch(&cache, bump, &Default::default(), platform_nonce).await; - assert_eq!(result, expected); - let guard = cache.lock().await; - let &(nonce, _, platform) = guard.get(&1u32).unwrap(); - assert_eq!(nonce, expected); - assert_eq!(platform, expected_stored_platform); - } - - // --- Fresh cache hit (no platform fetch) --- - // (cached, last_platform, bump, expected) - #[test_case(10, 10, false, 10 ; "no bump returns cached")] - #[test_case(10, 10, true, 11 ; "bump increments cached")] - #[test_case(10 + MAX_MISSING_IDENTITY_REVISIONS - 1, 10, false, 10 + MAX_MISSING_IDENTITY_REVISIONS - 1 ; "drift just below max serves from cache")] - #[tokio::test] - async fn fresh_cache_hit(cached_nonce: u64, last_platform: u64, bump: bool, expected: u64) { - let cache = seeded_cache(1, cached_nonce, now_s(), last_platform); - let settings = never_stale(); - let result = - super::super::Sdk::get_or_fetch_nonce(&cache, 1u32, bump, &settings, || async { - panic!("should not fetch from platform") - }) - .await - .unwrap(); - assert_eq!(result, expected); - } - - // --- Stale or drifted cache triggers re-fetch --- - // stale_by_drift=false: timestamp=0, default settings (time-based staleness). - // stale_by_drift=true: timestamp=now, never_stale settings (drift-based staleness). - // Result = max(cached, platform) ± bump. - // (cached, cached_plat, platform_returns, bump, stale_by_drift, expected, expected_stored_plat) - #[test_case(10, 10, 15, false, false, 15, 15 ; "stale uses higher platform")] - #[test_case(10, 10, 15, true, false, 16, 15 ; "stale uses higher platform with bump")] - #[test_case(20, 10, 15, false, false, 20, 15 ; "preserves higher cached nonce")] - #[test_case(20, 10, 15, true, false, 21, 15 ; "preserves higher cached nonce with bump")] - #[test_case(10, 5, 50, false, false, 50, 50 ; "platform much higher replaces cache")] - #[test_case(10, 5, 50, true, false, 51, 50 ; "platform much higher replaces cache with bump")] - #[test_case(100, 90, 50 | (5 << 40), false, false, 100, 50 ; "upper bits stripped cache preserved")] - #[test_case(10 + MAX_MISSING_IDENTITY_REVISIONS, 10, 10 + MAX_MISSING_IDENTITY_REVISIONS, false, true, 10 + MAX_MISSING_IDENTITY_REVISIONS, 10 + MAX_MISSING_IDENTITY_REVISIONS ; "drift at max triggers refetch")] - #[tokio::test] - async fn cache_refetch( - cached_nonce: u64, - cached_platform: u64, - platform_returns: u64, - bump: bool, - stale_by_drift: bool, - expected: u64, - expected_stored_platform: u64, - ) { - let (timestamp, settings) = if stale_by_drift { - (now_s(), never_stale()) - } else { - (0, PutSettings::default()) - }; - let cache = seeded_cache(1, cached_nonce, timestamp, cached_platform); - let result = fetch(&cache, bump, &settings, platform_returns).await; - assert_eq!(result, expected); - let guard = cache.lock().await; - let &(_, _, platform) = guard.get(&1u32).unwrap(); - assert_eq!(platform, expected_stored_platform); - } - - // --- Multiple sequential bumps from cache --- - #[tokio::test] - async fn multiple_bumps_from_fresh_cache() { - let cache = seeded_cache(1, 5, now_s(), 5); - let settings = never_stale(); - for expected in 6..=10 { - let result = super::super::Sdk::get_or_fetch_nonce( - &cache, - 1u32, - true, - &settings, - || async { panic!("should not fetch from platform") }, - ) - .await - .unwrap(); - assert_eq!(result, expected); - } - let guard = cache.lock().await; - let &(nonce, _, platform) = guard.get(&1u32).unwrap(); - assert_eq!(nonce, 10); - assert_eq!(platform, 5, "last platform nonce unchanged through bumps"); - } - - // --- Fetch error propagates, cache untouched --- - #[tokio::test] - async fn fetch_error_propagates_and_cache_unchanged() { - let cache = seeded_cache(1, 10, 0, 10); - let result = super::super::Sdk::get_or_fetch_nonce( - &cache, - 1u32, - false, - &Default::default(), - || async { Err(crate::Error::Generic("platform unavailable".to_string())) }, - ) - .await; - assert!(result.is_err()); - let guard = cache.lock().await; - let &(nonce, ts, platform) = guard.get(&1u32).unwrap(); - assert_eq!(nonce, 10); - assert_eq!(ts, 0, "timestamp should not have changed"); - assert_eq!(platform, 10); - } - - // --- refresh_identity_nonce marks entry stale --- - #[tokio::test] - async fn refresh_marks_entries_stale_forcing_refetch() { - let cache = seeded_cache(1, 10, now_s(), 10); - let settings = never_stale(); - - // Confirm cache is served. - let result = - super::super::Sdk::get_or_fetch_nonce(&cache, 1u32, false, &settings, || async { - panic!("should not fetch") - }) - .await - .unwrap(); - assert_eq!(result, 10); - - // Simulate refresh_identity_nonce: set timestamp to 0. - cache.lock().await.get_mut(&1u32).unwrap().1 = 0; - - // With default settings (stale_time=1200s), timestamp=0 is stale. - let result = fetch(&cache, false, &PutSettings::default(), 20).await; - assert_eq!(result, 20, "should re-fetch after refresh"); - } - - // --- Different keys are isolated --- - #[tokio::test] - async fn different_keys_are_isolated() { - let cache = empty_cache(); - let settings = never_stale(); - - // Seed two keys via fetches. - for (key, val) in [(1u32, 100u64), (2u32, 200u64)] { - super::super::Sdk::get_or_fetch_nonce( - &cache, - key, - true, - &Default::default(), - || async move { Ok(val) }, - ) - .await - .unwrap(); - } - - // Read back: each key has its own value, served from cache. - for (key, expected) in [(1u32, 101u64), (2u32, 201u64)] { - let result = super::super::Sdk::get_or_fetch_nonce( - &cache, - key, - false, - &settings, - || async { panic!("should serve from cache") }, - ) - .await - .unwrap(); - assert_eq!(result, expected); - } - } - - // --- Concurrent cache update during fetch --- - // (concurrent_nonce, platform_returns, bump, expected) - #[test_case(15, 10, false, 15 ; "concurrent higher wins")] - #[test_case(5, 20, true, 21 ; "platform higher wins with bump")] - #[tokio::test] - async fn concurrent_update( - concurrent_nonce: u64, - platform_returns: u64, - bump: bool, - expected: u64, - ) { - let cache = Arc::new(empty_cache()); - let cache_clone = Arc::clone(&cache); - let result = super::super::Sdk::get_or_fetch_nonce( - &cache, - 1u32, - bump, - &Default::default(), - || async move { - // Simulate another caller updating cache during our fetch. - cache_clone - .lock() - .await - .insert(1u32, (concurrent_nonce, now_s(), concurrent_nonce)); - Ok(platform_returns) - }, - ) - .await - .unwrap(); - assert_eq!(result, expected); - } - } } From d1225ccd303cd521a5898f87471717b994e194f7 Mon Sep 17 00:00:00 2001 From: Lukasz Klimek <842586+lklimek@users.noreply.github.com> Date: Mon, 23 Feb 2026 10:53:32 +0100 Subject: [PATCH 09/17] fix(sdk): replace BTreeMap with LruCache in NonceCache to fix TOCTOU race and bound memory - Switch from BTreeMap to lru::LruCache (bounded to 100 entries) for both identity and contract nonce caches, fixing unbounded growth - Hold cache lock end-to-end across async Platform fetch, eliminating the TOCTOU race condition (SEC-001) that could cause nonce regression - Apply IDENTITY_NONCE_VALUE_FILTER mask on every nonce bump, not just after Platform fetch (SEC-002) - Change refresh() to pop() entries entirely instead of zeroing timestamps, matching pre-refactor behavior (CODE-001) - Return Result from get_current_time_seconds() instead of panicking on clock error (RUST-001) - Do not update last_fetch_timestamp on cache-only bumps (CODE-002) - Make lru a non-optional dependency (remove feature-gating behind mocks) - Update tests: add LRU eviction, nonce filter masking, and timestamp preservation tests; remove concurrent_update tests (race window gone) Co-Authored-By: Claude Opus 4.6 --- packages/rs-sdk/Cargo.toml | 4 +- packages/rs-sdk/src/internal_cache/mod.rs | 313 +++++++++++++--------- 2 files changed, 192 insertions(+), 125 deletions(-) diff --git a/packages/rs-sdk/Cargo.toml b/packages/rs-sdk/Cargo.toml index 41ffae8278..8f19a236bb 100644 --- a/packages/rs-sdk/Cargo.toml +++ b/packages/rs-sdk/Cargo.toml @@ -36,7 +36,7 @@ dotenvy = { version = "0.15.7", optional = true } envy = { version = "0.4.2", optional = true } futures = { version = "0.3.30" } derive_more = { version = "1.0", features = ["from"] } -lru = { version = "0.16.3", optional = true } +lru = { version = "0.16.3" } bip37-bloom-filter = { git = "https://github.com/dashpay/rs-bip37-bloom-filter", branch = "develop" } zeroize = { version = "1.8", features = ["derive"] } @@ -92,7 +92,7 @@ mocks = [ "drive-proof-verifier/mocks", "dep:dotenvy", "dep:envy", - "dep:lru", + "zeroize/serde", ] diff --git a/packages/rs-sdk/src/internal_cache/mod.rs b/packages/rs-sdk/src/internal_cache/mod.rs index 7932248fd2..c017cf8efc 100644 --- a/packages/rs-sdk/src/internal_cache/mod.rs +++ b/packages/rs-sdk/src/internal_cache/mod.rs @@ -3,8 +3,10 @@ use crate::platform::transition::put_settings::PutSettings; use crate::platform::Identifier; use dpp::identity::identity_nonce::{IDENTITY_NONCE_VALUE_FILTER, MAX_MISSING_IDENTITY_REVISIONS}; use dpp::prelude::IdentityNonce; -use std::collections::BTreeMap; +use lru::LruCache; use std::future::Future; +use std::hash::Hash; +use std::num::NonZeroUsize; use tokio::sync::Mutex; #[cfg(not(target_arch = "wasm32"))] @@ -13,6 +15,9 @@ use std::time::{SystemTime, UNIX_EPOCH}; /// The default identity nonce stale time in seconds (20 minutes). const DEFAULT_IDENTITY_NONCE_STALE_TIME_S: u64 = 1200; +/// Maximum number of entries in each nonce LRU cache. +const DEFAULT_NONCE_CACHE_SIZE: usize = 100; + /// Cached nonce state for a single identity or identity-contract pair. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) struct NonceCacheEntry { @@ -42,17 +47,22 @@ impl From<(Identifier, Identifier)> for IdentityContractPair { /// Encapsulates all nonce caching logic previously spread across /// `Sdk`. Uses per-map locking so identity /// and contract nonce queries don't block each other. +/// +/// Backed by [`LruCache`] to bound memory usage and automatically +/// evict least-recently-used entries. pub(crate) struct NonceCache { - identity_nonces: Mutex>, - contract_nonces: Mutex>, + identity_nonces: Mutex>, + contract_nonces: Mutex>, default_stale_time_s: u64, } impl Default for NonceCache { fn default() -> Self { + let cap = NonZeroUsize::new(DEFAULT_NONCE_CACHE_SIZE) + .expect("DEFAULT_NONCE_CACHE_SIZE must be > 0"); Self { - identity_nonces: Mutex::new(BTreeMap::new()), - contract_nonces: Mutex::new(BTreeMap::new()), + identity_nonces: Mutex::new(LruCache::new(cap)), + contract_nonces: Mutex::new(LruCache::new(cap)), default_stale_time_s: DEFAULT_IDENTITY_NONCE_STALE_TIME_S, } } @@ -60,19 +70,19 @@ impl Default for NonceCache { /// Helper function to get current timestamp in seconds. /// Works in both native and WASM environments. -fn get_current_time_seconds() -> u64 { +fn get_current_time_seconds() -> Result { #[cfg(not(target_arch = "wasm32"))] { - match SystemTime::now().duration_since(UNIX_EPOCH) { - Ok(n) => n.as_secs(), - Err(_) => panic!("SystemTime before UNIX EPOCH!"), - } + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs()) + .map_err(|e| Error::Generic(format!("SystemTime before UNIX EPOCH: {e}"))) } #[cfg(target_arch = "wasm32")] { // In WASM, we use JavaScript's Date.now() which returns milliseconds // We need to convert to seconds - (js_sys::Date::now() / 1000.0) as u64 + Ok((js_sys::Date::now() / 1000.0) as u64) } } @@ -127,20 +137,22 @@ impl NonceCache { .await } - /// Marks all nonce cache entries for the given identity as stale. + /// Removes all nonce cache entries for the given identity, forcing + /// a fresh Platform fetch on the next access. pub(crate) async fn refresh(&self, identity_id: &Identifier) { { let mut guard = self.identity_nonces.lock().await; - if let Some(entry) = guard.get_mut(identity_id) { - entry.last_fetch_timestamp = 0; - } + guard.pop(identity_id); } { let mut guard = self.contract_nonces.lock().await; - for (pair, entry) in guard.iter_mut() { - if pair.identity_id == *identity_id { - entry.last_fetch_timestamp = 0; - } + let keys_to_remove: Vec = guard + .iter() + .filter(|(pair, _)| pair.identity_id == *identity_id) + .map(|(pair, _)| *pair) + .collect(); + for key in keys_to_remove { + guard.pop(&key); } } } @@ -148,11 +160,11 @@ impl NonceCache { /// Shared nonce cache logic. Checks staleness and drift, fetches from /// Platform when needed, and maintains the cache entry. /// - /// The cache lock is held only briefly to read/write the entry; it is - /// **not** held across the async `fetch_from_platform` call so that other - /// callers are not blocked during the network round-trip. - async fn get_or_fetch_nonce( - cache: &Mutex>, + /// The cache lock is held **end-to-end** including across the async + /// `fetch_from_platform` call. This eliminates the TOCTOU race window + /// that previously existed when the lock was released before the fetch. + async fn get_or_fetch_nonce( + cache: &Mutex>, key: K, bump_first: bool, settings: &PutSettings, @@ -163,66 +175,67 @@ impl NonceCache { F: FnOnce() -> Fut, Fut: Future>, { - let current_time_s = get_current_time_seconds(); + let current_time_s = get_current_time_seconds()?; - // Phase 1: Try to serve from cache without a network fetch. - { - let mut cache_guard = cache.lock().await; - if let Some(entry) = cache_guard.get(&key).copied() { + let mut cache_guard = cache.lock().await; + + // Check if we have a fresh cache hit (use peek so we don't + // promote the entry just for a staleness check). + let needs_fetch = match cache_guard.peek(&key) { + Some(entry) => { let stale_by_time = entry.last_fetch_timestamp < current_time_s.saturating_sub( settings .identity_nonce_stale_time_s .unwrap_or(default_stale_time_s), ); - // Precautionary: use >= so we stay within Platform's own - // MAX_MISSING_IDENTITY_REVISIONS limit. let drifted = entry .current_nonce .saturating_sub(entry.last_fetched_platform_nonce) >= MAX_MISSING_IDENTITY_REVISIONS; + stale_by_time || drifted + } + None => true, + }; - if !stale_by_time && !drifted { - // Cache is fresh -- serve from it. - if bump_first { - let insert_nonce = entry.current_nonce + 1; - cache_guard.insert( - key, - NonceCacheEntry { - current_nonce: insert_nonce, - last_fetch_timestamp: current_time_s, - last_fetched_platform_nonce: entry.last_fetched_platform_nonce, - }, - ); - return Ok(insert_nonce); - } else { - return Ok(entry.current_nonce); - } - } + if !needs_fetch { + // Cache is fresh — serve from it. Use get_mut() to promote + // in LRU order and mutate in place. + let entry = cache_guard + .get_mut(&key) + .expect("entry must exist; we just peeked it"); + if bump_first { + let insert_nonce = (entry.current_nonce + 1) & IDENTITY_NONCE_VALUE_FILTER; + entry.current_nonce = insert_nonce; + // Do NOT update last_fetch_timestamp on cache-only bumps + return Ok(insert_nonce); + } else { + return Ok(entry.current_nonce); } - } // lock released -- entry was absent or stale + } - // Phase 2: Fetch from Platform without holding the cache lock. + // Fetch from Platform while holding the lock — eliminates + // TOCTOU race at the cost of serializing concurrent fetches + // for the same cache map. This is acceptable because: + // 1. Nonce fetches are infrequent (only on stale/missing). + // 2. Correctness > throughput for nonce management. + // // Strip the upper "missing revisions" bits immediately so the // cache only ever holds plain nonce values. let platform_nonce = fetch_from_platform().await? & IDENTITY_NONCE_VALUE_FILTER; - // Phase 3: Re-acquire lock and update cache. - // Re-read the entry since another caller may have updated the cache - // while we were fetching. Keep the higher of cached vs Platform - // nonce to avoid regression (Platform may not have indexed a recent - // successful broadcast yet). - let mut cache_guard = cache.lock().await; - let base_nonce = match cache_guard.get(&key) { + // Keep the higher of cached vs Platform nonce to avoid regression + // (Platform may not have indexed a recent successful broadcast yet). + let base_nonce = match cache_guard.peek(&key) { Some(entry) if entry.current_nonce > platform_nonce => entry.current_nonce, _ => platform_nonce, }; let insert_nonce = if bump_first { - base_nonce + 1 + (base_nonce + 1) & IDENTITY_NONCE_VALUE_FILTER } else { base_nonce }; - cache_guard.insert( + cache_guard.put( key, NonceCacheEntry { current_nonce: insert_nonce, @@ -241,14 +254,14 @@ mod nonce_cache_tests { use dpp::identity::identity_nonce::{ IDENTITY_NONCE_VALUE_FILTER, MAX_MISSING_IDENTITY_REVISIONS, }; - use std::collections::BTreeMap; - use std::sync::Arc; + use lru::LruCache; + use std::num::NonZeroUsize; use test_case::test_case; use tokio::sync::Mutex; /// Helper: shorthand for get_or_fetch_nonce. async fn fetch( - cache: &Mutex>, + cache: &Mutex>, bump: bool, settings: &PutSettings, platform_nonce: u64, @@ -272,8 +285,12 @@ mod nonce_cache_tests { .as_secs() } - fn empty_cache() -> Mutex> { - Mutex::new(BTreeMap::new()) + fn lru_cap() -> NonZeroUsize { + NonZeroUsize::new(16).unwrap() + } + + fn empty_cache() -> Mutex> { + Mutex::new(LruCache::new(lru_cap())) } fn seeded_cache( @@ -281,9 +298,9 @@ mod nonce_cache_tests { current_nonce: u64, timestamp: u64, last_platform_nonce: u64, - ) -> Mutex> { - let mut map = BTreeMap::new(); - map.insert( + ) -> Mutex> { + let mut map = LruCache::new(lru_cap()); + map.put( key, NonceCacheEntry { current_nonce, @@ -308,7 +325,7 @@ mod nonce_cache_tests { #[test_case(0, false, 0, 0 ; "zero no bump")] #[test_case(0, true, 1, 0 ; "zero with bump")] #[test_case(IDENTITY_NONCE_VALUE_FILTER, false, IDENTITY_NONCE_VALUE_FILTER, IDENTITY_NONCE_VALUE_FILTER ; "filter max no bump")] - #[test_case(IDENTITY_NONCE_VALUE_FILTER, true, IDENTITY_NONCE_VALUE_FILTER + 1, IDENTITY_NONCE_VALUE_FILTER ; "filter max with bump overflows")] + #[test_case(IDENTITY_NONCE_VALUE_FILTER, true, 0, IDENTITY_NONCE_VALUE_FILTER ; "filter max with bump wraps via mask")] #[test_case(42 | (3 << 40), false, 42, 42 ; "upper bits stripped no bump")] #[test_case(42 | (3 << 40), true, 43, 42 ; "upper bits stripped with bump")] #[tokio::test] @@ -321,7 +338,7 @@ mod nonce_cache_tests { let cache = empty_cache(); let result = fetch(&cache, bump, &Default::default(), platform_nonce).await; assert_eq!(result, expected); - let guard = cache.lock().await; + let mut guard = cache.lock().await; let entry = guard.get(&1u32).unwrap(); assert_eq!(entry.current_nonce, expected); assert_eq!(entry.last_fetched_platform_nonce, expected_stored_platform); @@ -377,7 +394,7 @@ mod nonce_cache_tests { let cache = seeded_cache(1, cached_nonce, timestamp, cached_platform); let result = fetch(&cache, bump, &settings, platform_returns).await; assert_eq!(result, expected); - let guard = cache.lock().await; + let mut guard = cache.lock().await; let entry = guard.get(&1u32).unwrap(); assert_eq!(entry.last_fetched_platform_nonce, expected_stored_platform); } @@ -400,7 +417,7 @@ mod nonce_cache_tests { .unwrap(); assert_eq!(result, expected); } - let guard = cache.lock().await; + let mut guard = cache.lock().await; let entry = guard.get(&1u32).unwrap(); assert_eq!(entry.current_nonce, 10); assert_eq!( @@ -424,7 +441,7 @@ mod nonce_cache_tests { .await; assert!(result.is_err()); let guard = cache.lock().await; - let entry = guard.get(&1u32).unwrap(); + let entry = guard.peek(&1u32).unwrap(); assert_eq!(entry.current_nonce, 10); assert_eq!( entry.last_fetch_timestamp, 0, @@ -433,36 +450,38 @@ mod nonce_cache_tests { assert_eq!(entry.last_fetched_platform_nonce, 10); } - // --- refresh marks entry stale --- + // --- refresh removes entries entirely, forcing a refetch --- #[tokio::test] - async fn refresh_marks_entries_stale_forcing_refetch() { - let cache = seeded_cache(1, 10, now_s(), 10); + async fn refresh_removes_entries_forcing_refetch() { + let nonce_cache = NonceCache::default(); + let identity_id = Identifier::default(); let settings = never_stale(); - // Confirm cache is served. - let result = NonceCache::get_or_fetch_nonce( - &cache, - 1u32, - false, - &settings, - DEFAULT_IDENTITY_NONCE_STALE_TIME_S, - || async { panic!("should not fetch") }, - ) - .await - .unwrap(); - assert_eq!(result, 10); + // Seed via initial fetch. + let nonce = nonce_cache + .get_identity_nonce(identity_id, true, &settings, || async { Ok(10u64) }) + .await + .unwrap(); + assert_eq!(nonce, 11); - // Simulate refresh: set timestamp to 0. - cache - .lock() + // Confirm cache is served (no platform call). + let nonce = nonce_cache + .get_identity_nonce(identity_id, true, &settings, || async { + panic!("should not fetch") + }) .await - .get_mut(&1u32) - .unwrap() - .last_fetch_timestamp = 0; + .unwrap(); + assert_eq!(nonce, 12); + + // Refresh should remove the entry. + nonce_cache.refresh(&identity_id).await; - // With default settings (stale_time=1200s), timestamp=0 is stale. - let result = fetch(&cache, false, &PutSettings::default(), 20).await; - assert_eq!(result, 20, "should re-fetch after refresh"); + // Next call must fetch from platform again. + let nonce = nonce_cache + .get_identity_nonce(identity_id, true, &settings, || async { Ok(20u64) }) + .await + .unwrap(); + assert_eq!(nonce, 21); } // --- Different keys are isolated --- @@ -501,40 +520,88 @@ mod nonce_cache_tests { } } - // --- Concurrent cache update during fetch --- - // (concurrent_nonce, platform_returns, bump, expected) - #[test_case(15, 10, false, 15 ; "concurrent higher wins")] - #[test_case(5, 20, true, 21 ; "platform higher wins with bump")] + // --- LRU eviction: oldest entry evicted when capacity exceeded --- #[tokio::test] - async fn concurrent_update( - concurrent_nonce: u64, - platform_returns: u64, - bump: bool, - expected: u64, - ) { - let cache = Arc::new(empty_cache()); - let cache_clone = Arc::clone(&cache); + async fn lru_eviction_when_capacity_exceeded() { + let cap = NonZeroUsize::new(2).unwrap(); + let cache: Mutex> = Mutex::new(LruCache::new(cap)); + + // Insert 3 entries into a size-2 cache. + for key in 1u32..=3 { + NonceCache::get_or_fetch_nonce( + &cache, + key, + false, + &Default::default(), + DEFAULT_IDENTITY_NONCE_STALE_TIME_S, + || async move { Ok(key as u64 * 10) }, + ) + .await + .unwrap(); + } + + let mut guard = cache.lock().await; + // Key 1 should have been evicted (LRU). + assert!(guard.peek(&1u32).is_none(), "key 1 should be evicted"); + assert!(guard.peek(&2u32).is_some(), "key 2 should still exist"); + assert!(guard.peek(&3u32).is_some(), "key 3 should still exist"); + } + + // --- Nonce filter mask is applied on every bump --- + #[tokio::test] + async fn nonce_filter_applied_on_bump_near_boundary() { + // Seed cache with nonce at the filter boundary. + // last_fetched_platform_nonce must be close enough to avoid drift-triggered refetch. + let cache = seeded_cache( + 1, + IDENTITY_NONCE_VALUE_FILTER, + now_s(), + IDENTITY_NONCE_VALUE_FILTER, + ); + let settings = never_stale(); + let result = NonceCache::get_or_fetch_nonce( &cache, 1u32, - bump, - &Default::default(), + true, + &settings, DEFAULT_IDENTITY_NONCE_STALE_TIME_S, - || async move { - // Simulate another caller updating cache during our fetch. - cache_clone.lock().await.insert( - 1u32, - NonceCacheEntry { - current_nonce: concurrent_nonce, - last_fetch_timestamp: now_s(), - last_fetched_platform_nonce: concurrent_nonce, - }, - ); - Ok(platform_returns) - }, + || async { panic!("should not fetch from platform") }, ) .await .unwrap(); - assert_eq!(result, expected); + + // (IDENTITY_NONCE_VALUE_FILTER + 1) & IDENTITY_NONCE_VALUE_FILTER == 0 + assert_eq!( + result, 0, + "bump past filter boundary should wrap to 0 via mask" + ); + } + + // --- Cache-only bumps do NOT update last_fetch_timestamp --- + #[tokio::test] + async fn cache_bump_does_not_update_timestamp() { + let original_ts = now_s() - 100; // 100 seconds ago + let cache = seeded_cache(1, 10, original_ts, 10); + let settings = never_stale(); + + // Bump from cache (no platform fetch). + NonceCache::get_or_fetch_nonce( + &cache, + 1u32, + true, + &settings, + DEFAULT_IDENTITY_NONCE_STALE_TIME_S, + || async { panic!("should not fetch from platform") }, + ) + .await + .unwrap(); + + let mut guard = cache.lock().await; + let entry = guard.get(&1u32).unwrap(); + assert_eq!( + entry.last_fetch_timestamp, original_ts, + "timestamp should not be updated on cache-only bump" + ); } } From f7ca660b2da3861dc09bf52f77a91f5ab6e9ff94 Mon Sep 17 00:00:00 2001 From: Lukasz Klimek <842586+lklimek@users.noreply.github.com> Date: Mon, 23 Feb 2026 11:27:30 +0100 Subject: [PATCH 10/17] fix(sdk): harden NonceCache with overflow guard, three-phase locking, and tracing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address five security findings (SEC-001 through SEC-005) in the NonceCache: - SEC-001: Return `Error::NonceOverflow` instead of silently wrapping the nonce to zero when it reaches `IDENTITY_NONCE_VALUE_FILTER`. Wrapping would permanently lock out the identity. - SEC-002: Bump default LRU cache capacity from 100 to 1000 to reduce silent eviction of actively-bumped nonces. - SEC-003: Restore three-phase lock pattern — check cache under lock, release lock during platform fetch, re-acquire and merge with `max(cached, platform)`. Eliminates serialization of all nonce fetches across the async boundary. - SEC-004: Add `tracing::trace!` for cache misses, stale/drift refetches, and anti-regression events. No logging on the hot cache-hit path. - SEC-005: Capture a fresh timestamp after the platform fetch so `last_fetch_timestamp` reflects actual receipt time. Also removes all runtime `expect()` calls — the remaining `.expect()` on `NonZeroUsize::new()` is in a `const` context (compile-time evaluation). Co-Authored-By: Claude Opus 4.6 --- packages/rs-sdk/src/error.rs | 5 + packages/rs-sdk/src/internal_cache/mod.rs | 153 +++++++++++++++------- 2 files changed, 110 insertions(+), 48 deletions(-) diff --git a/packages/rs-sdk/src/error.rs b/packages/rs-sdk/src/error.rs index 5e2543180b..babb0b23c5 100644 --- a/packages/rs-sdk/src/error.rs +++ b/packages/rs-sdk/src/error.rs @@ -75,6 +75,11 @@ pub enum Error { /// Invalid credit transfer configuration #[error("Invalid credit transfer: {0}")] InvalidCreditTransfer(String), + /// Identity nonce overflow: the nonce has reached its maximum value and + /// cannot be incremented further without wrapping to zero. + #[error("Identity nonce overflow: nonce has reached the maximum value ({0})")] + NonceOverflow(u64), + /// Generic error // TODO: Use domain specific errors instead of generic ones #[error("SDK error: {0}")] diff --git a/packages/rs-sdk/src/internal_cache/mod.rs b/packages/rs-sdk/src/internal_cache/mod.rs index c017cf8efc..57e8a87f2d 100644 --- a/packages/rs-sdk/src/internal_cache/mod.rs +++ b/packages/rs-sdk/src/internal_cache/mod.rs @@ -16,7 +16,8 @@ use std::time::{SystemTime, UNIX_EPOCH}; const DEFAULT_IDENTITY_NONCE_STALE_TIME_S: u64 = 1200; /// Maximum number of entries in each nonce LRU cache. -const DEFAULT_NONCE_CACHE_SIZE: usize = 100; +const DEFAULT_NONCE_CACHE_SIZE: NonZeroUsize = + NonZeroUsize::new(1000).expect("DEFAULT_NONCE_CACHE_SIZE must be > 0"); /// Cached nonce state for a single identity or identity-contract pair. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -58,11 +59,9 @@ pub(crate) struct NonceCache { impl Default for NonceCache { fn default() -> Self { - let cap = NonZeroUsize::new(DEFAULT_NONCE_CACHE_SIZE) - .expect("DEFAULT_NONCE_CACHE_SIZE must be > 0"); Self { - identity_nonces: Mutex::new(LruCache::new(cap)), - contract_nonces: Mutex::new(LruCache::new(cap)), + identity_nonces: Mutex::new(LruCache::new(DEFAULT_NONCE_CACHE_SIZE)), + contract_nonces: Mutex::new(LruCache::new(DEFAULT_NONCE_CACHE_SIZE)), default_stale_time_s: DEFAULT_IDENTITY_NONCE_STALE_TIME_S, } } @@ -86,6 +85,15 @@ fn get_current_time_seconds() -> Result { } } +/// Increment `nonce` with masking and return an error on overflow (wrap to zero). +fn bump_nonce(nonce: u64) -> Result { + let bumped = (nonce + 1) & IDENTITY_NONCE_VALUE_FILTER; + if bumped < nonce { + return Err(Error::NonceOverflow(nonce)); + } + Ok(bumped) +} + impl NonceCache { /// Get or fetch identity nonce from cache. pub(crate) async fn get_identity_nonce( @@ -160,9 +168,14 @@ impl NonceCache { /// Shared nonce cache logic. Checks staleness and drift, fetches from /// Platform when needed, and maintains the cache entry. /// - /// The cache lock is held **end-to-end** including across the async - /// `fetch_from_platform` call. This eliminates the TOCTOU race window - /// that previously existed when the lock was released before the fetch. + /// Uses a three-phase approach: + /// 1. Check cache under lock — return immediately if fresh. + /// 2. Fetch from Platform **without** holding the lock. + /// 3. Re-acquire lock and merge using `max(cached, platform)`. + /// + /// This accepts a narrow TOCTOU race where two concurrent callers for the + /// same key may both fetch from Platform, but the `max()` merge ensures no + /// nonce regression. async fn get_or_fetch_nonce( cache: &Mutex>, key: K, @@ -177,12 +190,12 @@ impl NonceCache { { let current_time_s = get_current_time_seconds()?; - let mut cache_guard = cache.lock().await; + // Phase 1: Check cache under lock + { + let mut cache_guard = cache.lock().await; - // Check if we have a fresh cache hit (use peek so we don't - // promote the entry just for a staleness check). - let needs_fetch = match cache_guard.peek(&key) { - Some(entry) => { + // Use peek so we don't promote the entry just for a staleness check. + if let Some(entry) = cache_guard.peek(&key) { let stale_by_time = entry.last_fetch_timestamp < current_time_s.saturating_sub( settings @@ -193,45 +206,57 @@ impl NonceCache { .current_nonce .saturating_sub(entry.last_fetched_platform_nonce) >= MAX_MISSING_IDENTITY_REVISIONS; - stale_by_time || drifted - } - None => true, - }; - if !needs_fetch { - // Cache is fresh — serve from it. Use get_mut() to promote - // in LRU order and mutate in place. - let entry = cache_guard - .get_mut(&key) - .expect("entry must exist; we just peeked it"); - if bump_first { - let insert_nonce = (entry.current_nonce + 1) & IDENTITY_NONCE_VALUE_FILTER; - entry.current_nonce = insert_nonce; - // Do NOT update last_fetch_timestamp on cache-only bumps - return Ok(insert_nonce); + if !stale_by_time && !drifted { + // Fresh hit — serve from cache. Promote in LRU via get_mut + // and mutate in place. Safe because we just confirmed the + // entry exists via peek above. + if let Some(entry) = cache_guard.get_mut(&key) { + if bump_first { + let insert_nonce = bump_nonce(entry.current_nonce)?; + entry.current_nonce = insert_nonce; + // Do NOT update last_fetch_timestamp on cache-only bumps + return Ok(insert_nonce); + } else { + return Ok(entry.current_nonce); + } + } + } + + if stale_by_time { + tracing::trace!("nonce cache stale, re-fetching from platform"); + } else { + tracing::trace!("nonce cache drifted, re-fetching from platform"); + } } else { - return Ok(entry.current_nonce); + tracing::trace!("nonce cache miss, fetching from platform"); } - } + } // lock released - // Fetch from Platform while holding the lock — eliminates - // TOCTOU race at the cost of serializing concurrent fetches - // for the same cache map. This is acceptable because: - // 1. Nonce fetches are infrequent (only on stale/missing). - // 2. Correctness > throughput for nonce management. + // Phase 2: Fetch from Platform without holding the lock // // Strip the upper "missing revisions" bits immediately so the // cache only ever holds plain nonce values. let platform_nonce = fetch_from_platform().await? & IDENTITY_NONCE_VALUE_FILTER; + // Phase 3: Re-acquire lock, use max(cached, platform) + // + // Capture a fresh timestamp so last_fetch_timestamp reflects when + // the data was actually received, not when the function was entered. + let current_time_s = get_current_time_seconds()?; + let mut cache_guard = cache.lock().await; + // Keep the higher of cached vs Platform nonce to avoid regression // (Platform may not have indexed a recent successful broadcast yet). let base_nonce = match cache_guard.peek(&key) { - Some(entry) if entry.current_nonce > platform_nonce => entry.current_nonce, + Some(entry) if entry.current_nonce > platform_nonce => { + tracing::trace!("nonce cache: preserved higher cached nonce over platform"); + entry.current_nonce + } _ => platform_nonce, }; let insert_nonce = if bump_first { - (base_nonce + 1) & IDENTITY_NONCE_VALUE_FILTER + bump_nonce(base_nonce)? } else { base_nonce }; @@ -259,7 +284,7 @@ mod nonce_cache_tests { use test_case::test_case; use tokio::sync::Mutex; - /// Helper: shorthand for get_or_fetch_nonce. + /// Helper: shorthand for get_or_fetch_nonce that expects success. async fn fetch( cache: &Mutex>, bump: bool, @@ -278,6 +303,24 @@ mod nonce_cache_tests { .unwrap() } + /// Helper: shorthand for get_or_fetch_nonce that returns the Result. + async fn try_fetch( + cache: &Mutex>, + bump: bool, + settings: &PutSettings, + platform_nonce: u64, + ) -> Result { + NonceCache::get_or_fetch_nonce( + cache, + 1u32, + bump, + settings, + DEFAULT_IDENTITY_NONCE_STALE_TIME_S, + || async move { Ok(platform_nonce) }, + ) + .await + } + fn now_s() -> u64 { std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) @@ -325,7 +368,6 @@ mod nonce_cache_tests { #[test_case(0, false, 0, 0 ; "zero no bump")] #[test_case(0, true, 1, 0 ; "zero with bump")] #[test_case(IDENTITY_NONCE_VALUE_FILTER, false, IDENTITY_NONCE_VALUE_FILTER, IDENTITY_NONCE_VALUE_FILTER ; "filter max no bump")] - #[test_case(IDENTITY_NONCE_VALUE_FILTER, true, 0, IDENTITY_NONCE_VALUE_FILTER ; "filter max with bump wraps via mask")] #[test_case(42 | (3 << 40), false, 42, 42 ; "upper bits stripped no bump")] #[test_case(42 | (3 << 40), true, 43, 42 ; "upper bits stripped with bump")] #[tokio::test] @@ -344,6 +386,23 @@ mod nonce_cache_tests { assert_eq!(entry.last_fetched_platform_nonce, expected_stored_platform); } + // --- SEC-001: Bumping at filter max returns overflow error --- + #[tokio::test] + async fn filter_max_with_bump_returns_overflow_error() { + let cache = empty_cache(); + let result = try_fetch( + &cache, + true, + &Default::default(), + IDENTITY_NONCE_VALUE_FILTER, + ) + .await; + assert!( + matches!(result, Err(Error::NonceOverflow(_))), + "expected NonceOverflow error, got: {result:?}" + ); + } + // --- Fresh cache hit (no platform fetch) --- // (cached, last_platform, bump, expected) #[test_case(10, 10, false, 10 ; "no bump returns cached")] @@ -540,16 +599,16 @@ mod nonce_cache_tests { .unwrap(); } - let mut guard = cache.lock().await; + let guard = cache.lock().await; // Key 1 should have been evicted (LRU). assert!(guard.peek(&1u32).is_none(), "key 1 should be evicted"); assert!(guard.peek(&2u32).is_some(), "key 2 should still exist"); assert!(guard.peek(&3u32).is_some(), "key 3 should still exist"); } - // --- Nonce filter mask is applied on every bump --- + // --- SEC-001: Nonce overflow returns error on cache-hit bump --- #[tokio::test] - async fn nonce_filter_applied_on_bump_near_boundary() { + async fn nonce_overflow_returns_error_on_cache_hit() { // Seed cache with nonce at the filter boundary. // last_fetched_platform_nonce must be close enough to avoid drift-triggered refetch. let cache = seeded_cache( @@ -568,13 +627,11 @@ mod nonce_cache_tests { DEFAULT_IDENTITY_NONCE_STALE_TIME_S, || async { panic!("should not fetch from platform") }, ) - .await - .unwrap(); + .await; - // (IDENTITY_NONCE_VALUE_FILTER + 1) & IDENTITY_NONCE_VALUE_FILTER == 0 - assert_eq!( - result, 0, - "bump past filter boundary should wrap to 0 via mask" + assert!( + matches!(result, Err(Error::NonceOverflow(n)) if n == IDENTITY_NONCE_VALUE_FILTER), + "expected NonceOverflow error at filter max, got: {result:?}" ); } From d54fb1603d7d9c552b34f1fe4428b1c7d74e19dd Mon Sep 17 00:00:00 2001 From: Lukasz Klimek <842586+lklimek@users.noreply.github.com> Date: Mon, 23 Feb 2026 11:58:59 +0100 Subject: [PATCH 11/17] fix(sdk): return IdentityNonceNotFound instead of defaulting to nonce 0 Replace `.unwrap_or(0)` fallback with an explicit error when Platform returns no nonce for an identity or identity-contract pair. A stale DAPI node that has not yet indexed the identity would previously cause the SDK to silently use nonce 0, leading to rejected state transitions. Now the closure surfaces `Error::IdentityNonceNotFound` and logs a `warn!` so the caller can retry against a different node. Also documents the LRU eviction, drift threshold, and DAPI staleness edge cases on `NonceCache`. Co-Authored-By: Claude Opus 4.6 --- packages/rs-sdk/src/error.rs | 11 ++++++ packages/rs-sdk/src/internal_cache/mod.rs | 32 ++++++++++++++++ packages/rs-sdk/src/sdk.rs | 46 ++++++++++++++++++++--- 3 files changed, 83 insertions(+), 6 deletions(-) diff --git a/packages/rs-sdk/src/error.rs b/packages/rs-sdk/src/error.rs index babb0b23c5..dc855bcab9 100644 --- a/packages/rs-sdk/src/error.rs +++ b/packages/rs-sdk/src/error.rs @@ -79,6 +79,17 @@ pub enum Error { /// cannot be incremented further without wrapping to zero. #[error("Identity nonce overflow: nonce has reached the maximum value ({0})")] NonceOverflow(u64), + /// Identity nonce not found on Platform. + /// + /// Platform returned no nonce for the requested identity (or identity– + /// contract pair). This usually means the queried DAPI node has not yet + /// indexed the identity — for example right after identity creation or + /// when the node is lagging behind the chain tip. + /// + /// **Recovery**: retry the state transition; the SDK will re-fetch the + /// nonce from a (potentially different) DAPI node on the next attempt. + #[error("Identity nonce not found on platform: {0}")] + IdentityNonceNotFound(String), /// Generic error // TODO: Use domain specific errors instead of generic ones diff --git a/packages/rs-sdk/src/internal_cache/mod.rs b/packages/rs-sdk/src/internal_cache/mod.rs index 57e8a87f2d..28100180a6 100644 --- a/packages/rs-sdk/src/internal_cache/mod.rs +++ b/packages/rs-sdk/src/internal_cache/mod.rs @@ -51,6 +51,29 @@ impl From<(Identifier, Identifier)> for IdentityContractPair { /// /// Backed by [`LruCache`] to bound memory usage and automatically /// evict least-recently-used entries. +/// +/// # LRU eviction +/// +/// Each map is capped at [`DEFAULT_NONCE_CACHE_SIZE`] (1000) entries. +/// Evicting an entry with unconfirmed local bumps may cause a nonce +/// collision on re-fetch. Unlikely in practice (requires >1000 +/// concurrent identities); the rejection is transient and resolves +/// on retry once the pending transaction confirms. +/// +/// # Drift threshold +/// +/// Up to 23 nonces can be bumped from cache before a Platform re-fetch +/// is forced (drift limit [`MAX_MISSING_IDENTITY_REVISIONS`] = 24). +/// The 20-minute stale timer provides an additional backstop. If +/// transactions fail silently, the cache may advance past the on-chain +/// nonce until the next re-fetch realigns it. +/// +/// # DAPI node staleness +/// +/// A stale DAPI node may report an identity as unknown. The fetch +/// closure surfaces this as [`Error::IdentityNonceNotFound`] rather +/// than defaulting to nonce 0. Worst case: one state transition +/// attempt fails and must be retried against a different node. pub(crate) struct NonceCache { identity_nonces: Mutex>, contract_nonces: Mutex>, @@ -176,6 +199,15 @@ impl NonceCache { /// This accepts a narrow TOCTOU race where two concurrent callers for the /// same key may both fetch from Platform, but the `max()` merge ensures no /// nonce regression. + /// + /// # Errors + /// + /// - Returns whatever error `fetch_from_platform` produces, including + /// [`Error::IdentityNonceNotFound`] when the queried DAPI node does + /// not know the identity. Callers should retry in that case; a + /// different node will likely have the data. + /// - Returns [`Error::NonceOverflow`] if bumping would wrap past the + /// 40-bit nonce value filter. async fn get_or_fetch_nonce( cache: &Mutex>, key: K, diff --git a/packages/rs-sdk/src/sdk.rs b/packages/rs-sdk/src/sdk.rs index 55b0dea747..baf5d3916e 100644 --- a/packages/rs-sdk/src/sdk.rs +++ b/packages/rs-sdk/src/sdk.rs @@ -331,6 +331,14 @@ impl Sdk { /// Updates or fetches the nonce for a given identity from the cache, /// querying Platform if the cached value is stale or absent. Optionally /// increments the nonce before storing it, based on the provided settings. + /// + /// # Errors + /// + /// Returns [`Error::IdentityNonceNotFound`] if the queried DAPI node + /// does not know the identity (e.g. the node is stale or the identity + /// was just created). The worst-case impact is that the caller must + /// retry the state transition; the next attempt will re-fetch from + /// Platform and likely reach an up-to-date node. pub async fn get_identity_nonce( &self, identity_id: Identifier, @@ -341,14 +349,23 @@ impl Sdk { let nonce = self .nonce_cache .get_identity_nonce(identity_id, bump_first, &settings, || async { - Ok(IdentityNonceFetcher::fetch_with_settings( + let fetcher = IdentityNonceFetcher::fetch_with_settings( self, identity_id, settings.request_settings, ) .await? - .unwrap_or(IdentityNonceFetcher(0)) - .0) + .ok_or_else(|| { + tracing::warn!( + identity_id = %identity_id, + "Platform returned no nonce for identity; \ + node may be stale or identity may not exist yet" + ); + Error::IdentityNonceNotFound(format!( + "identity {identity_id}: platform returned no nonce" + )) + })?; + Ok(fetcher.0) }) .await?; @@ -366,6 +383,12 @@ impl Sdk { /// the cache, querying Platform if the cached value is stale or absent. /// Optionally increments the nonce before storing it, based on the provided /// settings. + /// + /// # Errors + /// + /// Returns [`Error::IdentityNonceNotFound`] if the queried DAPI node + /// does not know the identity–contract pair. See + /// [`get_identity_nonce`](Self::get_identity_nonce) for details. pub async fn get_identity_contract_nonce( &self, identity_id: Identifier, @@ -381,14 +404,25 @@ impl Sdk { bump_first, &settings, || async { - Ok(IdentityContractNonceFetcher::fetch_with_settings( + let fetcher = IdentityContractNonceFetcher::fetch_with_settings( self, (identity_id, contract_id), settings.request_settings, ) .await? - .unwrap_or(IdentityContractNonceFetcher(0)) - .0) + .ok_or_else(|| { + tracing::warn!( + identity_id = %identity_id, + contract_id = %contract_id, + "Platform returned no nonce for identity-contract pair; \ + node may be stale or identity may not exist yet" + ); + Error::IdentityNonceNotFound(format!( + "identity {identity_id} contract {contract_id}: \ + platform returned no nonce" + )) + })?; + Ok(fetcher.0) }, ) .await From dbcd19277fac012cb6d930b7271ccafaa17ffd31 Mon Sep 17 00:00:00 2001 From: Lukasz Klimek <842586+lklimek@users.noreply.github.com> Date: Mon, 23 Feb 2026 12:11:14 +0100 Subject: [PATCH 12/17] refactor(sdk): improve NonceCache code quality and add missing test - Remove dead `From<(Identifier, Identifier)>` impl (RUST-005) - Replace double hash lookup (peek + get_mut) with single get_mut (RUST-002) - Add `Debug` bound to generic key, include key in trace logs (RUST-003) - Add manual `Debug` impl for `NonceCache` (RUST-004) - Remove redundant test imports after `use super::*` (RUST-006) - Add test for bump_first=false after bump_first=true sequence (CODE-004) Co-Authored-By: Claude Opus 4.6 --- packages/rs-sdk/src/internal_cache/mod.rs | 90 ++++++++++++++--------- 1 file changed, 56 insertions(+), 34 deletions(-) diff --git a/packages/rs-sdk/src/internal_cache/mod.rs b/packages/rs-sdk/src/internal_cache/mod.rs index 28100180a6..e644695d58 100644 --- a/packages/rs-sdk/src/internal_cache/mod.rs +++ b/packages/rs-sdk/src/internal_cache/mod.rs @@ -4,6 +4,7 @@ use crate::platform::Identifier; use dpp::identity::identity_nonce::{IDENTITY_NONCE_VALUE_FILTER, MAX_MISSING_IDENTITY_REVISIONS}; use dpp::prelude::IdentityNonce; use lru::LruCache; +use std::fmt::Debug; use std::future::Future; use std::hash::Hash; use std::num::NonZeroUsize; @@ -34,15 +35,6 @@ pub(crate) struct IdentityContractPair { pub(crate) contract_id: Identifier, } -impl From<(Identifier, Identifier)> for IdentityContractPair { - fn from((identity_id, contract_id): (Identifier, Identifier)) -> Self { - Self { - identity_id, - contract_id, - } - } -} - /// Nonce cache for identity and identity-contract nonces. /// /// Encapsulates all nonce caching logic previously spread across @@ -80,6 +72,14 @@ pub(crate) struct NonceCache { default_stale_time_s: u64, } +impl Debug for NonceCache { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("NonceCache") + .field("default_stale_time_s", &self.default_stale_time_s) + .finish_non_exhaustive() + } +} + impl Default for NonceCache { fn default() -> Self { Self { @@ -208,7 +208,7 @@ impl NonceCache { /// different node will likely have the data. /// - Returns [`Error::NonceOverflow`] if bumping would wrap past the /// 40-bit nonce value filter. - async fn get_or_fetch_nonce( + async fn get_or_fetch_nonce( cache: &Mutex>, key: K, bump_first: bool, @@ -226,8 +226,7 @@ impl NonceCache { { let mut cache_guard = cache.lock().await; - // Use peek so we don't promote the entry just for a staleness check. - if let Some(entry) = cache_guard.peek(&key) { + if let Some(entry) = cache_guard.get_mut(&key) { let stale_by_time = entry.last_fetch_timestamp < current_time_s.saturating_sub( settings @@ -240,28 +239,24 @@ impl NonceCache { >= MAX_MISSING_IDENTITY_REVISIONS; if !stale_by_time && !drifted { - // Fresh hit — serve from cache. Promote in LRU via get_mut - // and mutate in place. Safe because we just confirmed the - // entry exists via peek above. - if let Some(entry) = cache_guard.get_mut(&key) { - if bump_first { - let insert_nonce = bump_nonce(entry.current_nonce)?; - entry.current_nonce = insert_nonce; - // Do NOT update last_fetch_timestamp on cache-only bumps - return Ok(insert_nonce); - } else { - return Ok(entry.current_nonce); - } + // Fresh hit — serve from cache, mutate in place. + if bump_first { + let insert_nonce = bump_nonce(entry.current_nonce)?; + entry.current_nonce = insert_nonce; + // Do NOT update last_fetch_timestamp on cache-only bumps + return Ok(insert_nonce); + } else { + return Ok(entry.current_nonce); } } if stale_by_time { - tracing::trace!("nonce cache stale, re-fetching from platform"); + tracing::trace!(key = ?key, "nonce cache stale, re-fetching from platform"); } else { - tracing::trace!("nonce cache drifted, re-fetching from platform"); + tracing::trace!(key = ?key, "nonce cache drifted, re-fetching from platform"); } } else { - tracing::trace!("nonce cache miss, fetching from platform"); + tracing::trace!(key = ?key, "nonce cache miss, fetching from platform"); } } // lock released @@ -282,7 +277,7 @@ impl NonceCache { // (Platform may not have indexed a recent successful broadcast yet). let base_nonce = match cache_guard.peek(&key) { Some(entry) if entry.current_nonce > platform_nonce => { - tracing::trace!("nonce cache: preserved higher cached nonce over platform"); + tracing::trace!(key = ?key, "nonce cache: preserved higher cached nonce over platform"); entry.current_nonce } _ => platform_nonce, @@ -308,13 +303,7 @@ impl NonceCache { mod nonce_cache_tests { use super::*; use crate::platform::transition::put_settings::PutSettings; - use dpp::identity::identity_nonce::{ - IDENTITY_NONCE_VALUE_FILTER, MAX_MISSING_IDENTITY_REVISIONS, - }; - use lru::LruCache; - use std::num::NonZeroUsize; use test_case::test_case; - use tokio::sync::Mutex; /// Helper: shorthand for get_or_fetch_nonce that expects success. async fn fetch( @@ -693,4 +682,37 @@ mod nonce_cache_tests { "timestamp should not be updated on cache-only bump" ); } + + // --- Reading after bump returns the bumped nonce without incrementing --- + #[tokio::test] + async fn read_after_bump_returns_bumped_nonce() { + let cache = seeded_cache(1, 5, now_s(), 5); + let settings = never_stale(); + + // bump_first=true → 6 + let nonce = NonceCache::get_or_fetch_nonce( + &cache, + 1u32, + true, + &settings, + DEFAULT_IDENTITY_NONCE_STALE_TIME_S, + || async { panic!("should not fetch from platform") }, + ) + .await + .unwrap(); + assert_eq!(nonce, 6); + + // bump_first=false → still 6 + let nonce = NonceCache::get_or_fetch_nonce( + &cache, + 1u32, + false, + &settings, + DEFAULT_IDENTITY_NONCE_STALE_TIME_S, + || async { panic!("should not fetch from platform") }, + ) + .await + .unwrap(); + assert_eq!(nonce, 6); + } } From f6b03f07c33872e263ab16b783175d729a4f8655 Mon Sep 17 00:00:00 2001 From: Lukasz Klimek <842586+lklimek@users.noreply.github.com> Date: Mon, 23 Feb 2026 12:14:25 +0100 Subject: [PATCH 13/17] fix(sdk): harden NonceCache drift calculation and add concurrency tests - Preserve max(old, new) last_fetched_platform_nonce when cached nonce wins the Phase 3 merge, preventing drift inflation from a temporarily lower platform value (CODE-001) - Simplify bump_nonce to explicit boundary check instead of indirect masking arithmetic (RUST-001) - Add concurrent bump test (20 tasks via Barrier on fresh cache) - Add concurrent stale-fetch test (50 tasks through Phase 2 merge) Both concurrency tests assert all returned nonces are unique, validating the three-phase locking under contention. Co-Authored-By: Claude Opus 4.6 --- packages/rs-sdk/src/internal_cache/mod.rs | 120 ++++++++++++++++++++-- 1 file changed, 111 insertions(+), 9 deletions(-) diff --git a/packages/rs-sdk/src/internal_cache/mod.rs b/packages/rs-sdk/src/internal_cache/mod.rs index e644695d58..153783fb2a 100644 --- a/packages/rs-sdk/src/internal_cache/mod.rs +++ b/packages/rs-sdk/src/internal_cache/mod.rs @@ -108,13 +108,13 @@ fn get_current_time_seconds() -> Result { } } -/// Increment `nonce` with masking and return an error on overflow (wrap to zero). +/// Increment `nonce` by one, returning an error if it is already at the +/// 40-bit maximum ([`IDENTITY_NONCE_VALUE_FILTER`]). fn bump_nonce(nonce: u64) -> Result { - let bumped = (nonce + 1) & IDENTITY_NONCE_VALUE_FILTER; - if bumped < nonce { + if nonce >= IDENTITY_NONCE_VALUE_FILTER { return Err(Error::NonceOverflow(nonce)); } - Ok(bumped) + Ok(nonce + 1) } impl NonceCache { @@ -275,12 +275,18 @@ impl NonceCache { // Keep the higher of cached vs Platform nonce to avoid regression // (Platform may not have indexed a recent successful broadcast yet). - let base_nonce = match cache_guard.peek(&key) { + // Also preserve the higher last_fetched_platform_nonce so the drift + // calculation doesn't inflate when Platform temporarily returns a + // lower value than previously seen. + let (base_nonce, effective_platform_nonce) = match cache_guard.peek(&key) { Some(entry) if entry.current_nonce > platform_nonce => { tracing::trace!(key = ?key, "nonce cache: preserved higher cached nonce over platform"); - entry.current_nonce + ( + entry.current_nonce, + entry.last_fetched_platform_nonce.max(platform_nonce), + ) } - _ => platform_nonce, + _ => (platform_nonce, platform_nonce), }; let insert_nonce = if bump_first { bump_nonce(base_nonce)? @@ -292,7 +298,7 @@ impl NonceCache { NonceCacheEntry { current_nonce: insert_nonce, last_fetch_timestamp: current_time_s, - last_fetched_platform_nonce: platform_nonce, + last_fetched_platform_nonce: effective_platform_nonce, }, ); Ok(insert_nonce) @@ -454,7 +460,7 @@ mod nonce_cache_tests { #[test_case(20, 10, 15, true, false, 21, 15 ; "preserves higher cached nonce with bump")] #[test_case(10, 5, 50, false, false, 50, 50 ; "platform much higher replaces cache")] #[test_case(10, 5, 50, true, false, 51, 50 ; "platform much higher replaces cache with bump")] - #[test_case(100, 90, 50 | (5 << 40), false, false, 100, 50 ; "upper bits stripped cache preserved")] + #[test_case(100, 90, 50 | (5 << 40), false, false, 100, 90 ; "upper bits stripped cache preserved")] #[test_case(10 + MAX_MISSING_IDENTITY_REVISIONS, 10, 10 + MAX_MISSING_IDENTITY_REVISIONS, false, true, 10 + MAX_MISSING_IDENTITY_REVISIONS, 10 + MAX_MISSING_IDENTITY_REVISIONS ; "drift at max triggers refetch")] #[tokio::test] async fn cache_refetch( @@ -683,6 +689,102 @@ mod nonce_cache_tests { ); } + // --- Concurrent bumps from fresh cache all produce unique nonces --- + #[tokio::test] + async fn concurrent_bumps_produce_unique_nonces() { + use std::sync::Arc; + + // Keep under MAX_MISSING_IDENTITY_REVISIONS (24) so the drift + // threshold is not hit and all tasks stay on the cache-hit path. + let num_tasks: u64 = 20; + let cache = Arc::new(seeded_cache(1, 0, now_s(), 0)); + let barrier = Arc::new(tokio::sync::Barrier::new(num_tasks as usize)); + + let mut handles = Vec::with_capacity(num_tasks as usize); + for _ in 0..num_tasks { + let cache = Arc::clone(&cache); + let barrier = Arc::clone(&barrier); + handles.push(tokio::spawn(async move { + barrier.wait().await; + NonceCache::get_or_fetch_nonce( + &cache, + 1u32, + true, + &PutSettings { + identity_nonce_stale_time_s: Some(u64::MAX), + ..Default::default() + }, + DEFAULT_IDENTITY_NONCE_STALE_TIME_S, + || async { panic!("should not fetch from platform") }, + ) + .await + .unwrap() + })); + } + + let mut nonces = Vec::with_capacity(num_tasks as usize); + for handle in handles { + nonces.push(handle.await.unwrap()); + } + + nonces.sort(); + let before = nonces.len(); + nonces.dedup(); + assert_eq!( + nonces.len(), + before, + "duplicate nonces detected under concurrent access" + ); + // Should be a contiguous range 1..=num_tasks (bumped from 0). + assert_eq!(nonces, (1..=num_tasks).collect::>()); + } + + // --- Concurrent stale fetches all produce unique nonces --- + #[tokio::test] + async fn concurrent_stale_fetches_produce_unique_nonces() { + use std::sync::Arc; + + let num_tasks: u64 = 50; + // Stale timestamp forces all tasks into Phase 2 (platform fetch). + let cache = Arc::new(seeded_cache(1, 0, 0, 0)); + let barrier = Arc::new(tokio::sync::Barrier::new(num_tasks as usize)); + + let mut handles = Vec::with_capacity(num_tasks as usize); + for _ in 0..num_tasks { + let cache = Arc::clone(&cache); + let barrier = Arc::clone(&barrier); + handles.push(tokio::spawn(async move { + barrier.wait().await; + NonceCache::get_or_fetch_nonce( + &cache, + 1u32, + true, + &PutSettings::default(), + DEFAULT_IDENTITY_NONCE_STALE_TIME_S, + || async { Ok(100u64) }, + ) + .await + .unwrap() + })); + } + + let mut nonces = Vec::with_capacity(num_tasks as usize); + for handle in handles { + nonces.push(handle.await.unwrap()); + } + + nonces.sort(); + let before = nonces.len(); + nonces.dedup(); + assert_eq!( + nonces.len(), + before, + "duplicate nonces detected under concurrent stale fetches" + ); + // All should be > 100 (bumped from platform nonce). + assert!(nonces.iter().all(|&n| n > 100)); + } + // --- Reading after bump returns the bumped nonce without incrementing --- #[tokio::test] async fn read_after_bump_returns_bumped_nonce() { From ce2e7ef41df9cbbd69179ebb7a36a57ee0d96045 Mon Sep 17 00:00:00 2001 From: Lukasz Klimek <842586+lklimek@users.noreply.github.com> Date: Mon, 23 Feb 2026 12:40:32 +0100 Subject: [PATCH 14/17] fix(wasm-sdk): handle NonceOverflow and IdentityNonceNotFound error variants The rs-sdk added two new error variants (NonceOverflow, IdentityNonceNotFound) but the wasm-sdk error conversion was not updated, causing a non-exhaustive match compilation failure in CI. Co-Authored-By: Claude Opus 4.6 --- packages/wasm-sdk/src/error.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/packages/wasm-sdk/src/error.rs b/packages/wasm-sdk/src/error.rs index 6f0af6d813..6f41a1d76e 100644 --- a/packages/wasm-sdk/src/error.rs +++ b/packages/wasm-sdk/src/error.rs @@ -31,6 +31,8 @@ pub enum WasmSdkErrorKind { Cancelled, StaleNode, StateTransitionBroadcastError, + NonceOverflow, + IdentityNonceNotFound, // Local helper kinds InvalidArgument, @@ -177,6 +179,18 @@ impl From for WasmSdkError { Cancelled(msg) => Self::new(WasmSdkErrorKind::Cancelled, msg, None, retriable), StaleNode(e) => Self::new(WasmSdkErrorKind::StaleNode, e.to_string(), None, retriable), StateTransitionBroadcastError(e) => WasmSdkError::from(e), + NonceOverflow(nonce) => Self::new( + WasmSdkErrorKind::NonceOverflow, + format!( + "Identity nonce overflow: nonce has reached the maximum value ({})", + nonce + ), + None, + false, + ), + IdentityNonceNotFound(msg) => { + Self::new(WasmSdkErrorKind::IdentityNonceNotFound, msg, None, true) + } NoAvailableAddressesToRetry(inner) => Self::new( WasmSdkErrorKind::DapiClientError, format!("no available addresses to retry, last error: {}", inner), @@ -253,6 +267,8 @@ impl WasmSdkError { K::Cancelled => "Cancelled", K::StaleNode => "StaleNode", K::StateTransitionBroadcastError => "StateTransitionBroadcastError", + K::NonceOverflow => "NonceOverflow", + K::IdentityNonceNotFound => "IdentityNonceNotFound", K::InvalidArgument => "InvalidArgument", K::SerializationError => "SerializationError", K::NotFound => "NotFound", From 31674784dad210b7fd7574c8be23970d1e91e6d5 Mon Sep 17 00:00:00 2001 From: Lukasz Klimek <842586+lklimek@users.noreply.github.com> Date: Mon, 23 Feb 2026 13:28:09 +0100 Subject: [PATCH 15/17] fix(sdk): preserve cached nonce on refresh to prevent tx duplicate on retry The LruCache refactoring (d1225ccd) inadvertently changed refresh() from "mark stale" to "pop entry", reintroducing the exact bug from dashpay/dash-evo-tool#588: after a broadcast failure, the bumped nonce was lost, causing the retry to create a byte-identical state transition ("tx already exists in cache"). Changed refresh() to set last_fetch_timestamp = 0 instead of popping, preserving the cached nonce so Phase 3's max(cached, platform) merge advances past the already-used nonce. Added tests covering both scenarios: Platform returning a stale nonce (cached value preserved) and Platform returning a higher nonce (Platform value used). Co-Authored-By: Claude Opus 4.6 --- packages/rs-sdk/src/internal_cache/mod.rs | 68 +++++++++++++++++++---- 1 file changed, 57 insertions(+), 11 deletions(-) diff --git a/packages/rs-sdk/src/internal_cache/mod.rs b/packages/rs-sdk/src/internal_cache/mod.rs index 153783fb2a..e35334d1bd 100644 --- a/packages/rs-sdk/src/internal_cache/mod.rs +++ b/packages/rs-sdk/src/internal_cache/mod.rs @@ -168,22 +168,35 @@ impl NonceCache { .await } - /// Removes all nonce cache entries for the given identity, forcing - /// a fresh Platform fetch on the next access. + /// Marks all nonce cache entries for the given identity as stale, + /// forcing a fresh Platform fetch on the next access while + /// **preserving** the cached nonce value. + /// + /// This is critical for correctness: after a broadcast failure the + /// cached nonce (already bumped) must be preserved so that the + /// Phase 3 `max(cached, platform)` merge in [`get_or_fetch_nonce`] + /// can advance past the nonce that was already used, preventing + /// "tx already exists in cache" errors on retry. pub(crate) async fn refresh(&self, identity_id: &Identifier) { { let mut guard = self.identity_nonces.lock().await; - guard.pop(identity_id); + if let Some(entry) = guard.get_mut(identity_id) { + entry.last_fetch_timestamp = 0; + } } { let mut guard = self.contract_nonces.lock().await; - let keys_to_remove: Vec = guard + // LruCache doesn't support iter_mut, so collect keys first + // then mark each entry stale via get_mut. + let keys: Vec = guard .iter() .filter(|(pair, _)| pair.identity_id == *identity_id) .map(|(pair, _)| *pair) .collect(); - for key in keys_to_remove { - guard.pop(&key); + for key in keys { + if let Some(entry) = guard.get_mut(&key) { + entry.last_fetch_timestamp = 0; + } } } } @@ -538,19 +551,19 @@ mod nonce_cache_tests { // --- refresh removes entries entirely, forcing a refetch --- #[tokio::test] - async fn refresh_removes_entries_forcing_refetch() { + async fn refresh_marks_stale_but_preserves_cached_nonce() { let nonce_cache = NonceCache::default(); let identity_id = Identifier::default(); let settings = never_stale(); - // Seed via initial fetch. + // Seed via initial fetch (platform returns 10, bump to 11). let nonce = nonce_cache .get_identity_nonce(identity_id, true, &settings, || async { Ok(10u64) }) .await .unwrap(); assert_eq!(nonce, 11); - // Confirm cache is served (no platform call). + // Bump again from cache (11 → 12). let nonce = nonce_cache .get_identity_nonce(identity_id, true, &settings, || async { panic!("should not fetch") @@ -559,10 +572,43 @@ mod nonce_cache_tests { .unwrap(); assert_eq!(nonce, 12); - // Refresh should remove the entry. + // Simulate broadcast failure: refresh marks the entry stale + // but preserves the cached nonce value (12). + nonce_cache.refresh(&identity_id).await; + + // Next call re-fetches from Platform (returns 10, the old value + // because the tx hasn't confirmed yet). Phase 3 should see + // max(cached=12, platform=10) = 12, then bump to 13. + // This is the fix for dashpay/dash-evo-tool#588: without + // preserving the cached nonce, we'd get 11 (same as the first + // broadcast), causing "tx already exists in cache". + let nonce = nonce_cache + .get_identity_nonce(identity_id, true, &settings, || async { Ok(10u64) }) + .await + .unwrap(); + assert_eq!(nonce, 13); + } + + /// Refresh followed by a Platform fetch returning a HIGHER nonce + /// (the pending tx was confirmed) should use the Platform value. + #[tokio::test] + async fn refresh_uses_platform_nonce_when_higher() { + let nonce_cache = NonceCache::default(); + let identity_id = Identifier::default(); + // Use default stale time so that timestamp=0 is detected as stale. + let settings = PutSettings::default(); + + // Seed: platform=10, bump to 11. + let nonce = nonce_cache + .get_identity_nonce(identity_id, true, &settings, || async { Ok(10u64) }) + .await + .unwrap(); + assert_eq!(nonce, 11); + nonce_cache.refresh(&identity_id).await; - // Next call must fetch from platform again. + // Platform has advanced to 20 (tx confirmed + others). + // Phase 3: max(cached=11, platform=20) = 20, bump to 21. let nonce = nonce_cache .get_identity_nonce(identity_id, true, &settings, || async { Ok(20u64) }) .await From 9a4f1d73e62fefe0cf4b0020da194dd32600bc06 Mon Sep 17 00:00:00 2001 From: Lukasz Klimek <842586+lklimek@users.noreply.github.com> Date: Mon, 23 Feb 2026 13:38:22 +0100 Subject: [PATCH 16/17] refactor(sdk): address review findings on NonceCache refresh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Extract STALE_TIMESTAMP constant to replace magic value 0 - Document intentional get_mut() LRU promotion in refresh() — refreshed entries are actively in use and should be preserved under eviction pressure - Fix stale section comment that still said "removes entries" - Fix primary test to use PutSettings::default() so the stale timestamp actually triggers a Platform re-fetch; add AtomicBool assertion verifying the fetch closure is invoked Co-Authored-By: Claude Opus 4.6 --- packages/rs-sdk/src/internal_cache/mod.rs | 36 +++++++++++++++++++---- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/packages/rs-sdk/src/internal_cache/mod.rs b/packages/rs-sdk/src/internal_cache/mod.rs index e35334d1bd..e5d7f52142 100644 --- a/packages/rs-sdk/src/internal_cache/mod.rs +++ b/packages/rs-sdk/src/internal_cache/mod.rs @@ -20,6 +20,12 @@ const DEFAULT_IDENTITY_NONCE_STALE_TIME_S: u64 = 1200; const DEFAULT_NONCE_CACHE_SIZE: NonZeroUsize = NonZeroUsize::new(1000).expect("DEFAULT_NONCE_CACHE_SIZE must be > 0"); +/// Sentinel timestamp used by [`NonceCache::refresh`] to mark cache entries +/// as stale without removing them. Any real `SystemTime` timestamp will be +/// greater than this value, so the staleness check in [`NonceCache::get_or_fetch_nonce`] +/// will always trigger a re-fetch for entries with this timestamp. +const STALE_TIMESTAMP: u64 = 0; + /// Cached nonce state for a single identity or identity-contract pair. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) struct NonceCacheEntry { @@ -177,11 +183,19 @@ impl NonceCache { /// Phase 3 `max(cached, platform)` merge in [`get_or_fetch_nonce`] /// can advance past the nonce that was already used, preventing /// "tx already exists in cache" errors on retry. + /// + /// # LRU promotion + /// + /// We intentionally use `get_mut()` (which promotes entries to + /// most-recently-used) rather than `peek_mut()`. A refreshed entry + /// is one whose nonce is actively in use — it was just bumped for a + /// broadcast attempt — so it should be preserved over idle entries + /// under LRU eviction pressure. pub(crate) async fn refresh(&self, identity_id: &Identifier) { { let mut guard = self.identity_nonces.lock().await; if let Some(entry) = guard.get_mut(identity_id) { - entry.last_fetch_timestamp = 0; + entry.last_fetch_timestamp = STALE_TIMESTAMP; } } { @@ -195,7 +209,7 @@ impl NonceCache { .collect(); for key in keys { if let Some(entry) = guard.get_mut(&key) { - entry.last_fetch_timestamp = 0; + entry.last_fetch_timestamp = STALE_TIMESTAMP; } } } @@ -549,12 +563,16 @@ mod nonce_cache_tests { assert_eq!(entry.last_fetched_platform_nonce, 10); } - // --- refresh removes entries entirely, forcing a refetch --- + // --- refresh marks entries stale, preserving cached nonce value --- #[tokio::test] async fn refresh_marks_stale_but_preserves_cached_nonce() { + use std::sync::atomic::{AtomicBool, Ordering}; + let nonce_cache = NonceCache::default(); let identity_id = Identifier::default(); - let settings = never_stale(); + // Use default stale time so that STALE_TIMESTAMP (0) is detected + // as stale, exercising the full Phase 2 + Phase 3 re-fetch path. + let settings = PutSettings::default(); // Seed via initial fetch (platform returns 10, bump to 11). let nonce = nonce_cache @@ -582,10 +600,18 @@ mod nonce_cache_tests { // This is the fix for dashpay/dash-evo-tool#588: without // preserving the cached nonce, we'd get 11 (same as the first // broadcast), causing "tx already exists in cache". + let fetched = AtomicBool::new(false); let nonce = nonce_cache - .get_identity_nonce(identity_id, true, &settings, || async { Ok(10u64) }) + .get_identity_nonce(identity_id, true, &settings, || async { + fetched.store(true, Ordering::Relaxed); + Ok(10u64) + }) .await .unwrap(); + assert!( + fetched.load(Ordering::Relaxed), + "platform fetch must be triggered after refresh" + ); assert_eq!(nonce, 13); } From 730c3e7fd04400647d65a58108842d2c03dccc29 Mon Sep 17 00:00:00 2001 From: Lukasz Klimek <842586+lklimek@users.noreply.github.com> Date: Mon, 23 Feb 2026 16:46:07 +0100 Subject: [PATCH 17/17] refactor(sdk): remove closures from NonceCache public API, take &Sdk instead Move Platform fetch logic from Sdk call sites into NonceCache's pub(crate) methods (get_identity_nonce, get_identity_contract_nonce). The private get_or_fetch_nonce helper retains its generic closure signature unchanged. This eliminates closure boilerplate from sdk.rs and co-locates the fetch + cache logic in one place. Refresh tests now use Sdk::new_mock() with expect_fetch expectations instead of hand-rolled closures. Co-Authored-By: Claude Opus 4.6 --- packages/rs-sdk/src/internal_cache/mod.rs | 135 ++++++++++++++-------- packages/rs-sdk/src/sdk.rs | 51 +------- 2 files changed, 93 insertions(+), 93 deletions(-) diff --git a/packages/rs-sdk/src/internal_cache/mod.rs b/packages/rs-sdk/src/internal_cache/mod.rs index e5d7f52142..ee132faa66 100644 --- a/packages/rs-sdk/src/internal_cache/mod.rs +++ b/packages/rs-sdk/src/internal_cache/mod.rs @@ -1,8 +1,10 @@ use crate::error::Error; use crate::platform::transition::put_settings::PutSettings; -use crate::platform::Identifier; +use crate::platform::{Fetch, Identifier}; +use crate::Sdk; use dpp::identity::identity_nonce::{IDENTITY_NONCE_VALUE_FILTER, MAX_MISSING_IDENTITY_REVISIONS}; use dpp::prelude::IdentityNonce; +use drive_proof_verifier::types::{IdentityContractNonceFetcher, IdentityNonceFetcher}; use lru::LruCache; use std::fmt::Debug; use std::future::Future; @@ -124,42 +126,53 @@ fn bump_nonce(nonce: u64) -> Result { } impl NonceCache { - /// Get or fetch identity nonce from cache. - pub(crate) async fn get_identity_nonce( + /// Get or fetch identity nonce from cache, querying Platform via the SDK + /// when the cached value is stale or absent. + pub(crate) async fn get_identity_nonce( &self, + sdk: &Sdk, identity_id: Identifier, bump_first: bool, settings: &PutSettings, - fetch_from_platform: F, - ) -> Result - where - F: FnOnce() -> Fut, - Fut: Future>, - { + ) -> Result { + let request_settings = settings.request_settings; Self::get_or_fetch_nonce( &self.identity_nonces, identity_id, bump_first, settings, self.default_stale_time_s, - fetch_from_platform, + || async move { + let fetcher = + IdentityNonceFetcher::fetch_with_settings(sdk, identity_id, request_settings) + .await? + .ok_or_else(|| { + tracing::warn!( + identity_id = %identity_id, + "Platform returned no nonce for identity; \ + node may be stale or identity may not exist yet" + ); + Error::IdentityNonceNotFound(format!( + "identity {identity_id}: platform returned no nonce" + )) + })?; + Ok(fetcher.0) + }, ) .await } - /// Get or fetch identity-contract nonce from cache. - pub(crate) async fn get_identity_contract_nonce( + /// Get or fetch identity-contract nonce from cache, querying Platform via + /// the SDK when the cached value is stale or absent. + pub(crate) async fn get_identity_contract_nonce( &self, + sdk: &Sdk, identity_id: Identifier, contract_id: Identifier, bump_first: bool, settings: &PutSettings, - fetch_from_platform: F, - ) -> Result - where - F: FnOnce() -> Fut, - Fut: Future>, - { + ) -> Result { + let request_settings = settings.request_settings; Self::get_or_fetch_nonce( &self.contract_nonces, IdentityContractPair { @@ -169,7 +182,27 @@ impl NonceCache { bump_first, settings, self.default_stale_time_s, - fetch_from_platform, + || async move { + let fetcher = IdentityContractNonceFetcher::fetch_with_settings( + sdk, + (identity_id, contract_id), + request_settings, + ) + .await? + .ok_or_else(|| { + tracing::warn!( + identity_id = %identity_id, + contract_id = %contract_id, + "Platform returned no nonce for identity-contract pair; \ + node may be stale or identity may not exist yet" + ); + Error::IdentityNonceNotFound(format!( + "identity {identity_id} contract {contract_id}: \ + platform returned no nonce" + )) + })?; + Ok(fetcher.0) + }, ) .await } @@ -566,33 +599,37 @@ mod nonce_cache_tests { // --- refresh marks entries stale, preserving cached nonce value --- #[tokio::test] async fn refresh_marks_stale_but_preserves_cached_nonce() { - use std::sync::atomic::{AtomicBool, Ordering}; + use drive_proof_verifier::types::IdentityNonceFetcher; - let nonce_cache = NonceCache::default(); + let mut sdk = crate::Sdk::new_mock(); let identity_id = Identifier::default(); // Use default stale time so that STALE_TIMESTAMP (0) is detected // as stale, exercising the full Phase 2 + Phase 3 re-fetch path. let settings = PutSettings::default(); + // Mock: platform always returns nonce 10. + sdk.mock() + .expect_fetch::(identity_id, Some(IdentityNonceFetcher(10u64))) + .await + .expect("set mock expectation"); + // Seed via initial fetch (platform returns 10, bump to 11). - let nonce = nonce_cache - .get_identity_nonce(identity_id, true, &settings, || async { Ok(10u64) }) + let nonce = sdk + .get_identity_nonce(identity_id, true, Some(settings)) .await .unwrap(); assert_eq!(nonce, 11); // Bump again from cache (11 → 12). - let nonce = nonce_cache - .get_identity_nonce(identity_id, true, &settings, || async { - panic!("should not fetch") - }) + let nonce = sdk + .get_identity_nonce(identity_id, true, Some(settings)) .await .unwrap(); assert_eq!(nonce, 12); // Simulate broadcast failure: refresh marks the entry stale // but preserves the cached nonce value (12). - nonce_cache.refresh(&identity_id).await; + sdk.refresh_identity_nonce(&identity_id).await; // Next call re-fetches from Platform (returns 10, the old value // because the tx hasn't confirmed yet). Phase 3 should see @@ -600,18 +637,10 @@ mod nonce_cache_tests { // This is the fix for dashpay/dash-evo-tool#588: without // preserving the cached nonce, we'd get 11 (same as the first // broadcast), causing "tx already exists in cache". - let fetched = AtomicBool::new(false); - let nonce = nonce_cache - .get_identity_nonce(identity_id, true, &settings, || async { - fetched.store(true, Ordering::Relaxed); - Ok(10u64) - }) + let nonce = sdk + .get_identity_nonce(identity_id, true, Some(settings)) .await .unwrap(); - assert!( - fetched.load(Ordering::Relaxed), - "platform fetch must be triggered after refresh" - ); assert_eq!(nonce, 13); } @@ -619,24 +648,40 @@ mod nonce_cache_tests { /// (the pending tx was confirmed) should use the Platform value. #[tokio::test] async fn refresh_uses_platform_nonce_when_higher() { - let nonce_cache = NonceCache::default(); + use drive_proof_verifier::types::IdentityNonceFetcher; + + let mut sdk = crate::Sdk::new_mock(); let identity_id = Identifier::default(); // Use default stale time so that timestamp=0 is detected as stale. let settings = PutSettings::default(); + // First expectation: platform returns 10. + sdk.mock() + .expect_fetch::(identity_id, Some(IdentityNonceFetcher(10u64))) + .await + .expect("set mock expectation"); + // Seed: platform=10, bump to 11. - let nonce = nonce_cache - .get_identity_nonce(identity_id, true, &settings, || async { Ok(10u64) }) + let nonce = sdk + .get_identity_nonce(identity_id, true, Some(settings)) .await .unwrap(); assert_eq!(nonce, 11); - nonce_cache.refresh(&identity_id).await; + sdk.refresh_identity_nonce(&identity_id).await; + + // Update expectation: platform now returns 20. + sdk.mock() + .remove_fetch_expectation::(identity_id) + .await; + sdk.mock() + .expect_fetch::(identity_id, Some(IdentityNonceFetcher(20u64))) + .await + .expect("set mock expectation"); - // Platform has advanced to 20 (tx confirmed + others). // Phase 3: max(cached=11, platform=20) = 20, bump to 21. - let nonce = nonce_cache - .get_identity_nonce(identity_id, true, &settings, || async { Ok(20u64) }) + let nonce = sdk + .get_identity_nonce(identity_id, true, Some(settings)) .await .unwrap(); assert_eq!(nonce, 21); diff --git a/packages/rs-sdk/src/sdk.rs b/packages/rs-sdk/src/sdk.rs index baf5d3916e..201addaf93 100644 --- a/packages/rs-sdk/src/sdk.rs +++ b/packages/rs-sdk/src/sdk.rs @@ -6,7 +6,7 @@ use crate::mock::MockResponse; #[cfg(feature = "mocks")] use crate::mock::{provider::GrpcContextProvider, MockDashPlatformSdk}; use crate::platform::transition::put_settings::PutSettings; -use crate::platform::{Fetch, Identifier}; +use crate::platform::Identifier; use arc_swap::ArcSwapOption; use dapi_grpc::mock::Mockable; use dapi_grpc::platform::v0::{Proof, ResponseMetadata}; @@ -21,7 +21,6 @@ use dpp::dashcore::Network; use dpp::prelude::IdentityNonce; use dpp::version::{PlatformVersion, PlatformVersionCurrentVersion}; use drive::grovedb::operations::proof::GroveDBProof; -use drive_proof_verifier::types::{IdentityContractNonceFetcher, IdentityNonceFetcher}; use drive_proof_verifier::FromProof; pub use http::Uri; #[cfg(feature = "mocks")] @@ -348,25 +347,7 @@ impl Sdk { let settings = settings.unwrap_or_default(); let nonce = self .nonce_cache - .get_identity_nonce(identity_id, bump_first, &settings, || async { - let fetcher = IdentityNonceFetcher::fetch_with_settings( - self, - identity_id, - settings.request_settings, - ) - .await? - .ok_or_else(|| { - tracing::warn!( - identity_id = %identity_id, - "Platform returned no nonce for identity; \ - node may be stale or identity may not exist yet" - ); - Error::IdentityNonceNotFound(format!( - "identity {identity_id}: platform returned no nonce" - )) - })?; - Ok(fetcher.0) - }) + .get_identity_nonce(self, identity_id, bump_first, &settings) .await?; tracing::trace!( @@ -398,33 +379,7 @@ impl Sdk { ) -> Result { let settings = settings.unwrap_or_default(); self.nonce_cache - .get_identity_contract_nonce( - identity_id, - contract_id, - bump_first, - &settings, - || async { - let fetcher = IdentityContractNonceFetcher::fetch_with_settings( - self, - (identity_id, contract_id), - settings.request_settings, - ) - .await? - .ok_or_else(|| { - tracing::warn!( - identity_id = %identity_id, - contract_id = %contract_id, - "Platform returned no nonce for identity-contract pair; \ - node may be stale or identity may not exist yet" - ); - Error::IdentityNonceNotFound(format!( - "identity {identity_id} contract {contract_id}: \ - platform returned no nonce" - )) - })?; - Ok(fetcher.0) - }, - ) + .get_identity_contract_nonce(self, identity_id, contract_id, bump_first, &settings) .await }