From fb30737193a09df20e59f29890d1927dafe1530d Mon Sep 17 00:00:00 2001 From: "allen.wu" Date: Thu, 26 Mar 2026 18:46:52 +0800 Subject: [PATCH 01/15] feat: StateV2 role state machine + P2P signature verification - StateV2: roleCheckRoutine for dynamic role detection, SequencerHA interface - ApplyBlock: full mutex serialization + idempotent height check - VerifyBlockSignature: fail-close, all V2 blocks must have valid signatures - SignatureStore: independent block signature persistence (LevelDB) - BroadcastReactor: unified signature verification, SyncReq tracking, HasSigner filter - Interfaces: SequencerVerifier, Signer, SequencerHA Co-Authored-By: Claude Opus 4.6 (1M context) --- blocksync/reactor.go | 45 ++++- node/node.go | 23 ++- sequencer/block_verify.go | 35 ++++ sequencer/broadcast_reactor.go | 310 ++++++++++++++++++++--------- sequencer/interfaces.go | 36 +++- sequencer/signature_store.go | 51 +++++ sequencer/state_v2.go | 236 ++++++++++++++-------- sequencer/state_v2_test.go | 350 +++++++++++++++++++++++++++------ 8 files changed, 841 insertions(+), 245 deletions(-) create mode 100644 sequencer/block_verify.go create mode 100644 sequencer/signature_store.go diff --git a/blocksync/reactor.go b/blocksync/reactor.go index f9a03713350..27ea1e92aee 100644 --- a/blocksync/reactor.go +++ b/blocksync/reactor.go @@ -9,6 +9,7 @@ import ( "github.com/tendermint/tendermint/libs/log" "github.com/tendermint/tendermint/p2p" bcproto "github.com/tendermint/tendermint/proto/tendermint/blocksync" + "github.com/tendermint/tendermint/sequencer" sm "github.com/tendermint/tendermint/state" "github.com/tendermint/tendermint/store" "github.com/tendermint/tendermint/types" @@ -74,6 +75,10 @@ type Reactor struct { requestsCh <-chan BlockRequest errorsCh <-chan peerError + + // Sequencer signature verification (set after upgrade via SetVerifier/SetSigStore) + verifier sequencer.SequencerVerifier + sigStore *sequencer.SignatureStore } // NewReactor returns new reactor instance. @@ -139,6 +144,16 @@ func (bcR *Reactor) SetStateV2(stateV2 SequencerState) { bcR.stateV2 = stateV2 } +// SetVerifier sets the sequencer verifier for block signature validation. +func (bcR *Reactor) SetVerifier(v sequencer.SequencerVerifier) { + bcR.verifier = v +} + +// SetSigStore sets the signature store for persisting/attaching block signatures. +func (bcR *Reactor) SetSigStore(s *sequencer.SignatureStore) { + bcR.sigStore = s +} + // Pool returns the block pool for broadcast reactor to check peer heights. func (bcR *Reactor) Pool() *BlockPool { return bcR.pool @@ -269,6 +284,19 @@ func (bcR *Reactor) respondToPeerV2(msg *bcproto.BlockRequest, src p2p.Peer) boo return src.TrySend(BlocksyncChannel, msgBytes) } + // Attach signature from store; if unavailable, refuse to serve + sig, err := bcR.sigStore.GetSignature(blockData.Hash) + if err != nil { + bcR.Logger.Error("No signature available for block, refusing to serve", + "height", msg.Height, "err", err) + msgBytes, encErr := EncodeMsg(&bcproto.NoBlockResponse{Height: msg.Height}) + if encErr != nil { + return false + } + return src.TrySend(BlocksyncChannel, msgBytes) + } + blockData.Signature = sig + bcR.Logger.Debug("respondToPeerV2: got block from geth", "height", msg.Height, "hash", blockData.Hash.Hex()) @@ -291,8 +319,7 @@ func (bcR *Reactor) L2Node() l2node.L2Node { } // syncBlockV2 handles syncing a single BlockV2 in sequencer mode. -// No signature verification during sync - only broadcast channel verifies signatures. -// Returns true if sync was successful, false if there was an error (already handled). +// Verifies block signature using the history-aware verifier, then applies. func (bcR *Reactor) syncBlockV2(block types.SyncableBlock, blocksSynced *uint64, lastRate *float64, lastHundred *time.Time) bool { blockV2, ok := block.(*types.BlockV2) if !ok { @@ -301,13 +328,25 @@ func (bcR *Reactor) syncBlockV2(block types.SyncableBlock, blocksSynced *uint64, return false } - // Apply BlockV2 via stateV2 (no signature verification during sync) + // Verify block signature (uses IsSequencerAt with history-aware lookup) + if err := sequencer.VerifyBlockSignature(bcR.verifier, blockV2); err != nil { + bcR.Logger.Error("Block signature verification failed", "height", blockV2.Number, "err", err) + bcR.pool.RedoRequest(blockV2.GetHeight()) + return false + } + + // Apply BlockV2 via stateV2 if err := bcR.stateV2.ApplyBlock(blockV2); err != nil { bcR.Logger.Error("Failed to apply BlockV2", "height", blockV2.Number, "err", err) bcR.pool.RedoRequest(blockV2.GetHeight()) return false } + // Persist signature for historical serving + if err := bcR.sigStore.SaveSignature(blockV2.Hash, blockV2.Signature); err != nil { + panic(fmt.Sprintf("failed to save signature at height %d: %v", blockV2.Number, err)) + } + bcR.pool.PopRequest() *blocksSynced++ diff --git a/node/node.go b/node/node.go index 7d2b97e8d8b..af0f5ac5b27 100644 --- a/node/node.go +++ b/node/node.go @@ -239,7 +239,7 @@ type Node struct { blockBroadcastReactor *sequencer.BlockBroadcastReactor } -func initDBs(config *cfg.Config, dbProvider DBProvider) (blockStore *store.BlockStore, stateDB dbm.DB, err error) { +func initDBs(config *cfg.Config, dbProvider DBProvider) (blockStore *store.BlockStore, stateDB dbm.DB, sigStore *sequencer.SignatureStore, err error) { var blockStoreDB dbm.DB blockStoreDB, err = dbProvider(&DBContext{"blockstore", config}) if err != nil { @@ -252,6 +252,14 @@ func initDBs(config *cfg.Config, dbProvider DBProvider) (blockStore *store.Block return } + // TODO: add a new store, notify the-3rd parties to integrate + var sigDB dbm.DB + sigDB, err = dbProvider(&DBContext{"signatures", config}) + if err != nil { + return + } + sigStore = sequencer.NewSignatureStore(sigDB) + return } @@ -492,6 +500,8 @@ func createSequencerComponents( logger log.Logger, verifier sequencer.SequencerVerifier, signer sequencer.Signer, + sigStore *sequencer.SignatureStore, + ha sequencer.SequencerHA, ) (*sequencer.StateV2, *sequencer.BlockBroadcastReactor, error) { // Create StateV2 stateV2, err := sequencer.NewStateV2( @@ -500,6 +510,8 @@ func createSequencerComponents( logger, verifier, signer, + sigStore, + ha, ) if err != nil { return nil, nil, fmt.Errorf("failed to create StateV2: %w", err) @@ -512,6 +524,7 @@ func createSequencerComponents( waitSync, logger, verifier, + sigStore, ) broadcastReactor.SetLogger(logger.With("module", "sequencer")) @@ -771,7 +784,7 @@ func NewNode( ) ( *Node, error, ) { - blockStore, stateDB, err := initDBs(config, dbProvider) + blockStore, stateDB, sigStore, err := initDBs(config, dbProvider) if err != nil { return nil, err } @@ -997,12 +1010,16 @@ func NewNode( logger, sequencerVerifier, sequencerSigner, + sigStore, + nil, // ha: nil for now, Raft HA will be injected in a future milestone ); err != nil { return nil, err } - // Set stateV2 on blocksync reactor for post-upgrade sync + // Set stateV2&verifier&sigStore on blocksync reactor for post-upgrade bcR.SetStateV2(node.stateV2) + bcR.SetVerifier(sequencerVerifier) + bcR.SetSigStore(sigStore) // Register BlockBroadcastReactor with Switch sw.AddReactor("SEQUENCER", node.blockBroadcastReactor) diff --git a/sequencer/block_verify.go b/sequencer/block_verify.go new file mode 100644 index 00000000000..d0b693c4add --- /dev/null +++ b/sequencer/block_verify.go @@ -0,0 +1,35 @@ +package sequencer + +import ( + "fmt" + + "github.com/morph-l2/go-ethereum/crypto" +) + +// VerifyBlockSignature verifies a block's ECDSA signature against the +// expected sequencer at that block's height. All V2 blocks must carry a valid signature. +func VerifyBlockSignature(verifier SequencerVerifier, block *BlockV2) error { + if verifier == nil { + return fmt.Errorf("%w: verifier not configured", ErrInvalidSignature) + } + + if len(block.Signature) == 0 { + return fmt.Errorf("%w: missing signature at height %d", ErrInvalidSignature, block.Number) + } + + pubKey, err := crypto.SigToPub(block.Hash.Bytes(), block.Signature) + if err != nil { + return fmt.Errorf("%w: recover pubkey at height %d: %v", ErrInvalidSignature, block.Number, err) + } + signer := crypto.PubkeyToAddress(*pubKey) + + ok, err := verifier.IsSequencerAt(signer, block.Number) + if err != nil { + return fmt.Errorf("IsSequencerAt height %d: %w", block.Number, err) + } + if !ok { + return fmt.Errorf("%w: signer %s is not sequencer at height %d", + ErrInvalidSignature, signer.Hex(), block.Number) + } + return nil +} diff --git a/sequencer/broadcast_reactor.go b/sequencer/broadcast_reactor.go index 7398a3feebb..b75174fd154 100644 --- a/sequencer/broadcast_reactor.go +++ b/sequencer/broadcast_reactor.go @@ -1,8 +1,6 @@ package sequencer import ( - "context" - "errors" "fmt" "math/big" "math/rand" @@ -12,7 +10,6 @@ import ( "github.com/cosmos/gogoproto/proto" "github.com/morph-l2/go-ethereum/common" - "github.com/morph-l2/go-ethereum/crypto" "github.com/tendermint/tendermint/libs/log" "github.com/tendermint/tendermint/p2p" @@ -32,8 +29,20 @@ const ( peerSentCapacity = 500 // Per-peer sent tracking applyInterval = 10 * time.Second syncInterval = 10 * time.Second + + // Sync request tracking + syncRequestTTL = 20 * time.Second + maxPendingSyncRequests = 256 + maxPendingSyncPerPeer = 64 ) +// SyncReq represents a pending sync channel request. +type SyncReq struct { + Height int64 + PeerID p2p.ID + ExpireAt time.Time +} + // BlockPool interface (avoids import cycle) type BlockPool interface { MaxPeerHeight() int64 @@ -56,11 +65,24 @@ type BlockBroadcastReactor struct { seenBlocks *HashSet // Blocks we've seen (dedup) peerSent *PeerHashSet // Blocks sent to each peer - applyMtx sync.Mutex // Protects applyBlock to ensure sequential block application - sequencerStarted bool // True when sequencer mode is actually running (not just registered) + // applyMtx serializes the "check parent + apply + save signature" sequence + // in the reactor's applyBlock path. StateV2.ApplyBlock has its own mutex + // for the actual apply, but applyMtx ensures the parent-hash check and + // signature persistence are also atomic with the apply. + applyMtx sync.Mutex + startMu sync.Mutex + sequencerStarted bool logger log.Logger verifier SequencerVerifier + sigStore *SignatureStore + + // syncRequests tracks pending sync channel requests, keyed by height. + // Used to reject unsolicited responses before decode/verification. + // syncPeerCounts is a secondary index for O(1) per-peer count lookup. + syncRequestsMu sync.Mutex + syncRequests map[int64]*SyncReq + syncPeerCounts map[p2p.ID]int } // NewBlockBroadcastReactor creates a new reactor. @@ -70,17 +92,21 @@ func NewBlockBroadcastReactor( waitSync bool, logger log.Logger, verifier SequencerVerifier, + sigStore *SignatureStore, ) *BlockBroadcastReactor { r := &BlockBroadcastReactor{ - pool: pool, - stateV2: stateV2, - waitSync: waitSync, - recentBlocks: NewBlockRingBuffer(recentBlocksCapacity), - pendingCache: NewPendingBlockCache(), - seenBlocks: NewHashSet(seenBlocksCapacity), - peerSent: NewPeerHashSet(peerSentCapacity), - logger: logger.With("module", "broadcastReactor"), - verifier: verifier, + pool: pool, + stateV2: stateV2, + waitSync: waitSync, + recentBlocks: NewBlockRingBuffer(recentBlocksCapacity), + pendingCache: NewPendingBlockCache(), + seenBlocks: NewHashSet(seenBlocksCapacity), + peerSent: NewPeerHashSet(peerSentCapacity), + syncRequests: make(map[int64]*SyncReq), + syncPeerCounts: make(map[p2p.ID]int), + logger: logger.With("module", "broadcastReactor"), + verifier: verifier, + sigStore: sigStore, } r.BaseReactor = *p2p.NewBaseReactor("BlockBroadcast", r) return r @@ -101,8 +127,10 @@ func (r *BlockBroadcastReactor) OnStart() error { } func (r *BlockBroadcastReactor) StartSequencerRoutines() error { + r.startMu.Lock() + defer r.startMu.Unlock() + if r.sequencerStarted { - r.logger.Error("Sequencer routines already started, skipping") return nil } @@ -114,7 +142,11 @@ func (r *BlockBroadcastReactor) StartSequencerRoutines() error { } } - if r.stateV2.IsSequencerMode() { + // Fullnode (no signer): only applyRoutine (P2P sync) + // Nodes with signer (ActiveSeq / HA-Leader / HA-Follower): only broadcastRoutine + // - roleCheckRoutine is started by StateV2.OnStart() + // - applyRoutine is NOT started (sequencer produces, HA uses Raft) + if r.stateV2.HasSigner() { go r.broadcastRoutine() } else { go r.applyRoutine() @@ -141,6 +173,7 @@ func (r *BlockBroadcastReactor) AddPeer(peer p2p.Peer) { func (r *BlockBroadcastReactor) RemovePeer(peer p2p.Peer, reason interface{}) { r.peerSent.RemovePeer(string(peer.ID())) + r.removeSyncRequestsByPeer(peer.ID()) } func (r *BlockBroadcastReactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) { @@ -173,7 +206,7 @@ func (r *BlockBroadcastReactor) handleBroadcastMsg(msg interface{}, src p2p.Peer r.logger.Error("Invalid BlockV2", "err", err) return } - r.onBlockV2(blockV2, src, true) // verify signature + r.onBlockV2(blockV2, src, true) // from broadcast channel r.logger.Debug("handleBroadcastMsg", "src", src, "height", blockV2.Number, "hash", blockV2.Hash.Hex()) } default: @@ -188,15 +221,26 @@ func (r *BlockBroadcastReactor) handleSyncMsg(msg interface{}, src p2p.Peer) { r.logger.Debug("handleSyncMsg block request", "msg", msg, "src", src, "height", msg.Height) case *bcproto.BlockResponseV2: if msg.Block != nil { + // Check request before full decode to reject unsolicited responses early + height := int64(msg.Block.Number) + if !r.checkAndTakeSyncRequest(src.ID(), height) { + r.logger.Error("Unsolicited sync response, dropping", + "peer", src.ID(), "height", height) + return + } blockV2, err := types.BlockV2FromProto(msg.Block) if err != nil { r.logger.Error("Invalid BlockV2", "err", err) return } - r.onBlockV2(blockV2, src, false) // no signature verification + r.onBlockV2(blockV2, src, false) // from sync channel r.logger.Debug("handleSyncMsg block response", "src", src, "height", blockV2.Number, "hash", blockV2.Hash.Hex()) } case *bcproto.NoBlockResponse: + if !r.checkAndTakeSyncRequest(src.ID(), msg.Height) { + r.logger.Error("Unsolicited NoBlockResponse, dropping", "peer", src.ID(), "height", msg.Height) + return + } r.logger.Debug("Peer does not have requested block", "peer", src, "height", msg.Height) default: r.logger.Debug("Ignoring unknown message on sync channel", "type", reflect.TypeOf(msg)) @@ -207,14 +251,29 @@ func (r *BlockBroadcastReactor) handleSyncMsg(msg interface{}, src p2p.Peer) { // Routines // ============================================================================ -// broadcastRoutine: listen to stateV2 and broadcast new blocks +// broadcastRoutine: listen to block source and broadcast to P2P. +// Data source: HA mode reads from ha.Subscribe(), non-HA from broadcastCh. +// HA Follower consumes channel to prevent buildup but does not broadcast. func (r *BlockBroadcastReactor) broadcastRoutine() { r.logger.Info("Starting block broadcast routine") + + var source <-chan *BlockV2 + if r.stateV2.IsHAMode() { + source = r.stateV2.HASubscribe() + } else { + source = r.stateV2.BroadcastCh() + } + for { select { case <-r.Quit(): return - case block := <-r.stateV2.BroadcastCh(): + case block := <-source: + // HA Follower: consume channel but do not broadcast. + // When promoted to Leader, the next block automatically starts broadcasting. + if r.stateV2.IsHAMode() && !r.stateV2.IsHALeader() { + continue + } r.recentBlocks.Add(block) r.broadcast(block) } @@ -245,14 +304,28 @@ func (r *BlockBroadcastReactor) applyRoutine() { // Core Logic // ============================================================================ -// onBlockV2: receive block from peer -// verifySig: true for broadcast channel, false for sync channel -func (r *BlockBroadcastReactor) onBlockV2(block *BlockV2, src p2p.Peer, verifySig bool) { - r.logger.Debug("onBlockV2", "number", block.Number, "hash", block.Hash.Hex(), "verifySig", verifySig) - // Dedup: skip if already seen, only for broadcast channel. - // Sync channel should not check dedup. - if r.markSeen(block.Hash) && verifySig { - r.logger.Debug("onBlockV2 broadcast dedup", "number", block.Number, "hash", block.Hash.Hex(), "verifySig", verifySig) +// onBlockV2: receive block from peer. +// fromBroadcast: true for broadcast channel (triggers gossip), false for sync channel. +func (r *BlockBroadcastReactor) onBlockV2(block *BlockV2, src p2p.Peer, fromBroadcast bool) { + // Only Fullnode (no signer) processes P2P blocks: + // - ActiveSequencer: sole producer, P2P blocks are only echoes or invalid + // - HA nodes: receive blocks via Raft, not P2P + if r.stateV2.HasSigner() { + return + } + + r.logger.Debug("onBlockV2", "number", block.Number, "hash", block.Hash.Hex(), "fromBroadcast", fromBroadcast) + + // Dedup: only for broadcast channel + if fromBroadcast && r.markSeen(block.Hash) { + r.logger.Debug("onBlockV2 broadcast dedup", "number", block.Number, "hash", block.Hash.Hex()) + return + } + + // Unified signature verification for all incoming blocks + if err := VerifyBlockSignature(r.verifier, block); err != nil { + r.logger.Error("Block signature verification failed, discarding", + "number", block.Number, "hash", block.Hash.Hex(), "err", err) return } @@ -261,24 +334,16 @@ func (r *BlockBroadcastReactor) onBlockV2(block *BlockV2, src p2p.Peer, verifySi localHeight := r.stateV2.LatestHeight() - // Try apply if it's the next block (height + parent match) if r.isNextBlock(block) { - if err := r.applyBlock(block, verifySig); err != nil { + if err := r.applyBlock(block); err != nil { r.logger.Error("Apply failed", "number", block.Number, "hash", block.Hash.Hex(), "err", err) - // Only cache blocks with valid signatures (don't cache signature failures) - if verifySig && !errors.Is(err, ErrInvalidSignature) { - r.logger.Debug("Apply failed, caching block", "number", block.Number, "hash", block.Hash.Hex()) - r.pendingCache.Add(block, uint64(localHeight)) - } + r.pendingCache.Add(block, uint64(localHeight)) return } - // Gossip the latest block to other peers - if verifySig { + if fromBroadcast { r.gossipBlock(block, src.ID()) } - } else if verifySig { - // Cache all other blocks (future or past for potential reorg) - r.logger.Debug("future block, caching", "number", block.Number, "hash", block.Hash.Hex()) + } else { r.pendingCache.Add(block, uint64(localHeight)) } } @@ -303,7 +368,7 @@ func (r *BlockBroadcastReactor) tryApplyFromCache() { break } r.logger.Debug("Trying to apply from cache", "number", block.Number, "hash", block.Hash.Hex()) - if err := r.applyBlock(block, true); err != nil { // should signature verification + if err := r.applyBlock(block); err != nil { r.logger.Error("Apply from cache failed", "number", block.Number, "err", err) break } @@ -331,17 +396,30 @@ func (r *BlockBroadcastReactor) checkSyncGap() { r.requestMissingBlocks(localHeight+1, maxPeerHeight) } -// requestMissingBlocks requests blocks in range [start, end] from peers +// requestMissingBlocks requests blocks in range [start, end] from peers. +// Respects maxOutstandingTotal, maxOutstandingPerPeer, and maxNewRequestsPerTick limits. func (r *BlockBroadcastReactor) requestMissingBlocks(start, end int64) { + r.cleanupExpiredRequests() + peers := r.Switch.Peers().List() if len(peers) == 0 { return } for height := start; height <= end; height++ { - // Find a peer that has this height + if r.pendingSyncCount() >= maxPendingSyncRequests { + break + } + // Skip if already have a pending request for this height + r.syncRequestsMu.Lock() + _, exists := r.syncRequests[height] + r.syncRequestsMu.Unlock() + if exists { + continue + } + + // findPeerWithHeight already skips peers exceeding per-peer limit peer := r.findPeerWithHeight(peers, height) - r.logger.Debug("Finding peer with height", "height", height, "peer", peer.ID()) if peer == nil { continue } @@ -352,14 +430,15 @@ func (r *BlockBroadcastReactor) requestMissingBlocks(start, end int64) { r.logger.Error("Failed to encode BlockRequest", "height", height, "err", err) continue } - r.logger.Info("Requesting block", "height", height, "peer", peer.ID()) if !peer.TrySend(SequencerSyncChannel, bz) { - r.logger.Error("Failed to send BlockRequest (TrySend failed)", "height", height, "peer", peer.ID()) + r.logger.Error("Failed to send BlockRequest", "height", height, "peer", peer.ID()) + continue } + r.recordSyncRequest(peer.ID(), height) } } -// findPeerWithHeight finds a random peer that has the given height. +// findPeerWithHeight finds a random peer that has the given height and is within per-peer request limit. // Uses random start index to avoid always hitting the same peer. func (r *BlockBroadcastReactor) findPeerWithHeight(peers []p2p.Peer, height int64) p2p.Peer { n := len(peers) @@ -367,11 +446,11 @@ func (r *BlockBroadcastReactor) findPeerWithHeight(peers []p2p.Peer, height int6 return nil } - // Random start, then wrap around start := rand.Intn(n) for i := 0; i < n; i++ { peer := peers[(start+i)%n] - if r.pool.GetPeerHeight(peer.ID()) >= height { + if r.pool.GetPeerHeight(peer.ID()) >= height && + r.pendingSyncCountByPeer(peer.ID()) < maxPendingSyncPerPeer { return peer } } @@ -389,18 +468,12 @@ func (r *BlockBroadcastReactor) isNextBlock(block *BlockV2) bool { return block.Number == currentBlock.Number+1 && block.ParentHash == currentBlock.Hash } -// applyBlock: verify and apply a block atomically -// Thread-safe: uses mutex to ensure sequential block application -// verifySig: true for broadcast channel blocks, false for sync channel blocks -func (r *BlockBroadcastReactor) applyBlock(block *BlockV2, verifySig bool) error { +// applyBlock verifies signature, applies the block atomically, and persists the signature. +// Thread-safe: uses mutex to ensure sequential block application. +func (r *BlockBroadcastReactor) applyBlock(block *BlockV2) error { r.applyMtx.Lock() defer r.applyMtx.Unlock() - // Verify signature only for broadcast channel - if verifySig && !r.verifySignature(block) { - return ErrInvalidSignature - } - // Verify parent currentBlock := r.stateV2.LatestBlock() if currentBlock != nil && block.ParentHash != currentBlock.Hash { @@ -412,47 +485,18 @@ func (r *BlockBroadcastReactor) applyBlock(block *BlockV2, verifySig bool) error return err } + // Persist signature for historical block serving + if err := r.sigStore.SaveSignature(block.Hash, block.Signature); err != nil { + panic(fmt.Sprintf("failed to save signature at height %d: %v", block.Number, err)) + } + // Add to recent blocks r.recentBlocks.Add(block) - r.logger.Info("Applied block", "number", block.Number, "verifySig", verifySig) + r.logger.Info("Applied block", "number", block.Number) return nil } -func (r *BlockBroadcastReactor) verifySignature(block *BlockV2) bool { - if len(block.Signature) == 0 { - r.logger.Error("Signature verification failed: empty signature", "block", block.Number) - return false - } - pubKey, err := crypto.SigToPub(block.Hash.Bytes(), block.Signature) - if err != nil { - r.logger.Error("Signature verification failed: SigToPub error", "block", block.Number, "err", err) - return false - } - recoveredAddr := crypto.PubkeyToAddress(*pubKey) - - if r.verifier == nil { - r.logger.Error("Sequencer verifier not set", "block", block.Number) - return false - } - - isSeq, err := r.verifier.IsSequencer(context.Background(), recoveredAddr) - if err != nil { - r.logger.Error("Signature verification failed: verifier error", - "block", block.Number, - "recovered", recoveredAddr.Hex(), - "err", err) - return false - } - if !isSeq { - r.logger.Error("Signature verification failed: not a valid sequencer", - "block", block.Number, - "recovered", recoveredAddr.Hex()) - return false - } - return true -} - // ============================================================================ // Gossip // ============================================================================ @@ -506,6 +550,79 @@ func (r *BlockBroadcastReactor) gossipBlock(block *BlockV2, fromPeer p2p.ID) { } } +// ============================================================================ +// Sync Request Tracking +// ============================================================================ + +// recordSyncRequest records a pending sync request keyed by height. +func (r *BlockBroadcastReactor) recordSyncRequest(peerID p2p.ID, height int64) { + r.syncRequestsMu.Lock() + defer r.syncRequestsMu.Unlock() + if old, ok := r.syncRequests[height]; ok { + r.syncPeerCounts[old.PeerID]-- + } + r.syncRequests[height] = &SyncReq{ + Height: height, + PeerID: peerID, + ExpireAt: time.Now().Add(syncRequestTTL), + } + r.syncPeerCounts[peerID]++ +} + +// checkAndTakeSyncRequest validates a sync response against pending requests. +// Only removes the entry if peerID matches and request is not expired. +func (r *BlockBroadcastReactor) checkAndTakeSyncRequest(peerID p2p.ID, height int64) bool { + r.syncRequestsMu.Lock() + defer r.syncRequestsMu.Unlock() + + req, ok := r.syncRequests[height] + if !ok || req.PeerID != peerID || time.Now().After(req.ExpireAt) { + return false + } + delete(r.syncRequests, height) + r.syncPeerCounts[peerID]-- + return true +} + +// cleanupExpiredRequests removes expired entries. Called before sending new requests. +func (r *BlockBroadcastReactor) cleanupExpiredRequests() { + r.syncRequestsMu.Lock() + defer r.syncRequestsMu.Unlock() + now := time.Now() + for h, req := range r.syncRequests { + if now.After(req.ExpireAt) { + r.syncPeerCounts[req.PeerID]-- + delete(r.syncRequests, h) + } + } +} + +// pendingSyncCount returns the total number of pending sync requests. +func (r *BlockBroadcastReactor) pendingSyncCount() int { + r.syncRequestsMu.Lock() + defer r.syncRequestsMu.Unlock() + return len(r.syncRequests) +} + +// pendingSyncCountByPeer returns the number of pending sync requests for a specific peer. +func (r *BlockBroadcastReactor) pendingSyncCountByPeer(peerID p2p.ID) int { + r.syncRequestsMu.Lock() + defer r.syncRequestsMu.Unlock() + return r.syncPeerCounts[peerID] +} + +// removeSyncRequestsByPeer removes all pending requests for a disconnected peer. +func (r *BlockBroadcastReactor) removeSyncRequestsByPeer(peerID p2p.ID) { + r.syncRequestsMu.Lock() + defer r.syncRequestsMu.Unlock() + for h, req := range r.syncRequests { + if req.PeerID == peerID { + delete(r.syncRequests, h) + } + } + delete(r.syncPeerCounts, peerID) +} + // ============================================================================ // Message Handlers // ============================================================================ @@ -534,6 +651,13 @@ func (r *BlockBroadcastReactor) onBlockRequest(msg *bcproto.BlockRequest, src p2 return } + // Attach signature from store if block doesn't already carry one + if len(block.Signature) == 0 && r.sigStore != nil { + if sig, err := r.sigStore.GetSignature(block.Hash); err == nil { + block.Signature = sig + } + } + resp := &bcproto.BlockResponseV2{ Block: BlockV2ToProto(block), } diff --git a/sequencer/interfaces.go b/sequencer/interfaces.go index b720965cdcf..433135a9ece 100644 --- a/sequencer/interfaces.go +++ b/sequencer/interfaces.go @@ -1,7 +1,6 @@ package sequencer import ( - "context" "errors" "github.com/morph-l2/go-ethereum/common" @@ -13,9 +12,15 @@ var ( ErrInvalidSignature = errors.New("invalid block signature") ) -// SequencerVerifier verifies if an address is the current L1 sequencer +// SequencerVerifier verifies if an address is a valid L1 sequencer. type SequencerVerifier interface { - IsSequencer(ctx context.Context, addr common.Address) (bool, error) + // IsSequencerAt checks if addr was the valid sequencer at the given L2 block height. + IsSequencerAt(addr common.Address, l2Height uint64) (bool, error) + + // VerificationStartHeight returns the L2 block height from which V2 signature + // verification is enforced (= upgradeBlockHeight). Blocks below this height are + // PBFT blocks and skip V2 verification. Returns math.MaxUint64 if not configured. + VerificationStartHeight() uint64 } // Signer interface for sequencer block signing @@ -24,6 +29,27 @@ type Signer interface { Sign(data []byte) ([]byte, error) // Address returns the sequencer's address Address() common.Address - // IsActiveSequencer checks if this signer is the current L1 sequencer - IsActiveSequencer(ctx context.Context) (bool, error) +} + +// SequencerHA is the abstraction for Raft HA cluster. +// In single-node mode, ha == nil and all HA-related logic is skipped. +type SequencerHA interface { + // IsLeader returns whether the current node is the Raft leader (sole block producer). + IsLeader() bool + + // Join adds this node to the Raft cluster. + // Precondition: node has synced to near chain tip via Fullnode mode. + // Fails if localHeight < raft.EarliestRetainedLogHeight. + // On success, the node is a full Raft member and P2P sync can be stopped. + Join() error + + // Commit replicates a signed block via Raft to the cluster. + // Blocks until majority of nodes have acknowledged receipt (NOT applied). + // ApplyBlock is handled separately by leader (after Commit) and followers (via Subscribe). + // On failure, the caller should abandon this block production round. + Commit(block *BlockV2) error + + // Subscribe returns a channel that delivers blocks after Raft commit. + // Both leader and follower subscribe; used by broadcastRoutine for P2P broadcast. + Subscribe() <-chan *BlockV2 } diff --git a/sequencer/signature_store.go b/sequencer/signature_store.go new file mode 100644 index 00000000000..b0cca3d652f --- /dev/null +++ b/sequencer/signature_store.go @@ -0,0 +1,51 @@ +package sequencer + +import ( + "fmt" + + "github.com/morph-l2/go-ethereum/common" + dbm "github.com/tendermint/tm-db" +) + +var signaturePrefix = []byte("block_sig:") + +// SignatureStore persists block signatures independently from geth. +// Key = "block_sig:" + blockHash (32 bytes), Value = signature (65 bytes). +type SignatureStore struct { + db dbm.DB +} + +// NewSignatureStore creates a new SignatureStore backed by the given DB. +func NewSignatureStore(db dbm.DB) *SignatureStore { + return &SignatureStore{db: db} +} + +func signatureKey(blockHash common.Hash) []byte { + key := make([]byte, len(signaturePrefix)+common.HashLength) + copy(key, signaturePrefix) + copy(key[len(signaturePrefix):], blockHash.Bytes()) + return key +} + +// SaveSignature persists a block's signature. +func (s *SignatureStore) SaveSignature(blockHash common.Hash, sig []byte) error { + if s == nil || s.db == nil { + return nil + } + return s.db.Set(signatureKey(blockHash), sig) +} + +// GetSignature retrieves a block's signature. Returns nil, error if not found. +func (s *SignatureStore) GetSignature(blockHash common.Hash) ([]byte, error) { + if s == nil || s.db == nil { + return nil, fmt.Errorf("signature store not initialized") + } + val, err := s.db.Get(signatureKey(blockHash)) + if err != nil { + return nil, err + } + if len(val) == 0 { + return nil, fmt.Errorf("signature not found for block %s", blockHash.Hex()) + } + return val, nil +} diff --git a/sequencer/state_v2.go b/sequencer/state_v2.go index 14833973de1..b18fb81096a 100644 --- a/sequencer/state_v2.go +++ b/sequencer/state_v2.go @@ -1,7 +1,6 @@ package sequencer import ( - "context" "fmt" "sync" "time" @@ -19,41 +18,53 @@ const ( // StateV2 manages the state for centralized sequencer mode. // It replaces the PBFT consensus state after the upgrade. +// +// Node roles: +// - Fullnode (signer==nil): only applyRoutine runs (in reactor), no block production +// - ActiveSequencer (signer!=nil, ha==nil): roleCheckRoutine + broadcastRoutine +// - HA-Leader (signer!=nil, ha!=nil, ha.IsLeader()==true): roleCheckRoutine + broadcastRoutine +// - HA-Follower (signer!=nil, ha!=nil, ha.IsLeader()==false): roleCheckRoutine (idle) type StateV2 struct { service.BaseService mtx sync.RWMutex // Core state - latestBlock *BlockV2 - sequencerMode bool // Whether the node is started in sequencer mode (has signer configured) + latestBlock *BlockV2 // Dependencies l2Node l2node.L2Node signer Signer verifier SequencerVerifier + sigStore *SignatureStore + ha SequencerHA // nil = single-node mode logger log.Logger // Block production - blockTicker *time.Ticker blockInterval time.Duration - // Broadcast channel - blocks produced by this sequencer are sent here + // Broadcast channel - non-HA self-produced blocks are sent here broadcastCh chan *BlockV2 - // Quit channel + // Lifecycle quitCh chan struct{} } // NewStateV2 creates a new StateV2 instance. -// sequencerMode is determined by whether a signer is provided. +// Node mode is determined by whether a signer and/or ha is provided. +// verifier is required when signer is configured (sequencer/HA nodes must verify blocks). func NewStateV2( l2Node l2node.L2Node, blockInterval time.Duration, logger log.Logger, verifier SequencerVerifier, signer Signer, + sigStore *SignatureStore, + ha SequencerHA, ) (*StateV2, error) { + if verifier == nil { + return nil, fmt.Errorf("sequencer verifier is required for V2 mode") + } if blockInterval <= 0 { blockInterval = DefaultBlockInterval } @@ -62,7 +73,8 @@ func NewStateV2( l2Node: l2Node, signer: signer, verifier: verifier, - sequencerMode: signer != nil, + sigStore: sigStore, + ha: ha, blockInterval: blockInterval, logger: logger.With("module", "stateV2"), broadcastCh: make(chan *BlockV2, 100), @@ -75,9 +87,8 @@ func NewStateV2( } // OnStart implements service.Service. -// It initializes state from geth and starts block production if this node is the active sequencer. +// Initializes state from geth. Nodes with a signer start roleCheckRoutine. func (s *StateV2) OnStart() error { - // Initialize latest block from geth latestBlock, err := s.l2Node.GetLatestBlockV2() if err != nil { return fmt.Errorf("failed to get latest block: %w", err) @@ -87,28 +98,17 @@ func (s *StateV2) OnStart() error { s.latestBlock = latestBlock s.mtx.Unlock() - var seqAddr string - var isActiveSequencer bool - if s.signer != nil { - seqAddr = s.signer.Address().Hex() - // Check if this node is the active sequencer via L1 contract - isActiveSequencer, err = s.signer.IsActiveSequencer(context.Background()) - if err != nil { - s.logger.Error("Failed to check sequencer status", "error", err) - isActiveSequencer = false - } - } - + // Use local variable to avoid accessing s.latestBlock without lock in log statement. s.logger.Info("StateV2 initialized", - "latestHeight", s.latestBlock.Number, - "latestHash", s.latestBlock.Hash.Hex(), - "sequencerMode", s.sequencerMode, - "isActiveSequencer", isActiveSequencer, - "seqAddr", seqAddr) - - // Start block production if sequencer mode is enabled and this node is the active sequencer - if s.sequencerMode && isActiveSequencer { - go s.produceBlockRoutine() + "latestHeight", latestBlock.Number, + "latestHash", latestBlock.Hash.Hex(), + "hasSigner", s.signer != nil, + "isHAMode", s.ha != nil) + + // Fullnode (no signer) does not produce blocks; applyRoutine is managed by the reactor. + // Nodes with signer start roleCheckRoutine, which handles dynamic role detection. + if s.signer != nil { + go s.roleCheckRoutine() } return nil @@ -118,71 +118,113 @@ func (s *StateV2) OnStart() error { func (s *StateV2) OnStop() { s.logger.Info("Stopping StateV2") close(s.quitCh) - if s.blockTicker != nil { - s.blockTicker.Stop() - } } -// produceBlockRoutine is the main loop for block production. -func (s *StateV2) produceBlockRoutine() { - s.blockTicker = time.NewTicker(s.blockInterval) - defer s.blockTicker.Stop() +// roleCheckRoutine is the unified loop for role detection and block production. +// It runs for all nodes with a signer (ActiveSequencer, HA-Leader, HA-Follower). +// On each tick it checks isActiveSequencer(): if true, produces a block; otherwise idles. +// This enables bidirectional role transitions without restarting the service. +func (s *StateV2) roleCheckRoutine() { + ticker := time.NewTicker(s.blockInterval) + defer ticker.Stop() - s.logger.Info("Starting block production routine", "interval", s.blockInterval) + s.logger.Info("Starting role check routine", "interval", s.blockInterval) for { select { case <-s.quitCh: - s.logger.Info("Block production routine stopped") + s.logger.Info("Role check routine stopped") return - case <-s.blockTicker.C: + case <-ticker.C: + if !s.isActiveSequencer() { + continue + } s.produceBlock() } } } -// produceBlock produces a new block and broadcasts it. +// isActiveSequencer returns true if this node should produce the next block. +// For HA mode: must be Raft leader AND L1-designated sequencer. +// For single-node mode: must be L1-designated sequencer. +func (s *StateV2) isActiveSequencer() bool { + // HA mode: must be Raft leader + if s.ha != nil && !s.ha.IsLeader() { + return false + } + + s.mtx.RLock() + lb := s.latestBlock + s.mtx.RUnlock() + if lb == nil { + return false + } + nextHeight := lb.Number + 1 + + ok, err := s.verifier.IsSequencerAt(s.signer.Address(), nextHeight) + if err != nil { + s.logger.Error("Failed to check sequencer status", "height", nextHeight, "err", err) + return false + } + return ok +} + +// produceBlock produces a new block, signs it, and either commits via Raft (HA) +// or applies locally and sends to broadcastCh (single-node). func (s *StateV2) produceBlock() { - s.mtx.Lock() + s.mtx.RLock() parentHash := s.latestBlock.Hash - s.mtx.Unlock() + s.mtx.RUnlock() s.logger.Debug("Producing block", "parentHash", parentHash.Hex()) - // Request block data from geth (pass hash as bytes) block, collectedL1Msgs, err := s.l2Node.RequestBlockDataV2(parentHash.Bytes()) if err != nil { s.logger.Error("Failed to request block data", "error", err) return } - _ = collectedL1Msgs // TODO: log or use this info + _ = collectedL1Msgs - // Sign the block if err := s.signBlock(block); err != nil { s.logger.Error("Failed to sign block", "error", err) return } - // ********************* RAFT HA ********************* - // TODO: add raft HA - // **************************************************** + if err := s.sigStore.SaveSignature(block.Hash, block.Signature); err != nil { + panic(fmt.Sprintf("failed to save signature at height %d: %v", block.Number, err)) + } + + // HA mode: replicate via Raft consensus (data replication + majority ACK). + // ha.Commit() only ensures majority of cluster nodes have received the block data. + // Leader applies locally after Commit. Followers apply via Raft FSM internally. + // Broadcast to P2P happens via ha.Subscribe() -> broadcastRoutine on all HA nodes. + if s.ha != nil { + if err := s.ha.Commit(block); err != nil { + s.logger.Error("Failed to commit block via HA", "number", block.Number, "err", err) + return + } + s.logger.Debug("Block committed via HA", "number", block.Number, "hash", block.Hash.Hex()) + } - // Apply the block to geth and update local state + // Apply locally (HA leader + non-HA both apply here) if err := s.ApplyBlock(block); err != nil { s.logger.Error("Failed to apply block", "error", err) return } - // Send to broadcast channel - select { - case s.broadcastCh <- block: - s.logger.Debug("Block produced and queued for broadcast", - "number", block.Number, - "hash", block.Hash.Hex(), - "txCount", len(block.Transactions), - "collectedL1Msgs", collectedL1Msgs) - default: - s.logger.Error("Broadcast channel full, dropping block", "number", block.Number) + // Non-HA: broadcast via broadcastCh -> broadcastRoutine + // HA: broadcast via ha.Subscribe() -> broadcastRoutine (don't write broadcastCh) + if s.ha == nil { + select { + case s.broadcastCh <- block: + s.logger.Debug("Block produced and queued for broadcast", + "number", block.Number, + "hash", block.Hash.Hex(), + "txCount", len(block.Transactions), + "collectedL1Msgs", collectedL1Msgs) + default: + s.logger.Error("Broadcast channel full, dropping block", "number", block.Number) + } } } @@ -191,19 +233,33 @@ func (s *StateV2) signBlock(block *BlockV2) error { if s.signer == nil { return fmt.Errorf("signer not set") } - - // Sign the block hash signature, err := s.signer.Sign(block.Hash.Bytes()) if err != nil { return fmt.Errorf("failed to sign block: %w", err) } - block.Signature = signature - s.logger.Debug("Block signed", "number", block.Number, "hash", block.Hash.Hex(), "signer", s.signer.Address().Hex()) return nil } +// ApplyBlock applies a block to L2 and updates local state. +// Serialized by mutex to prevent concurrent application. +// Idempotent: silently skips blocks already applied. +func (s *StateV2) ApplyBlock(block *BlockV2) error { + s.mtx.Lock() + defer s.mtx.Unlock() + + if s.latestBlock != nil && block.Number <= s.latestBlock.Number { + return nil // idempotent: already applied or older block + } + + if err := s.l2Node.ApplyBlockV2(block); err != nil { + return err + } + s.latestBlock = block + return nil +} + // LatestHeight returns the latest block height. func (s *StateV2) LatestHeight() int64 { s.mtx.RLock() @@ -221,36 +277,44 @@ func (s *StateV2) LatestBlock() *BlockV2 { return s.latestBlock } -// BroadcastCh returns the channel for blocks to be broadcast. -// No lock needed - channel itself is thread-safe. +// BroadcastCh returns the channel for self-produced blocks (non-HA mode only). func (s *StateV2) BroadcastCh() <-chan *BlockV2 { return s.broadcastCh } -// ApplyBlock applies a block to L2 and updates local state. -// This is the unified entry point for block application. -func (s *StateV2) ApplyBlock(block *BlockV2) error { - // Apply to L2 execution layer - if err := s.l2Node.ApplyBlockV2(block); err != nil { - return err - } +// GetBlockByNumber gets a block from l2node by number. +func (s *StateV2) GetBlockByNumber(number uint64) (*BlockV2, error) { + return s.l2Node.GetBlockByNumber(number) +} - // Update local state - s.mtx.Lock() - s.latestBlock = block - s.mtx.Unlock() +// HasSigner returns whether this node has a signer configured. +// Fullnode returns false; ActiveSequencer and HA nodes return true. +func (s *StateV2) HasSigner() bool { + return s.signer != nil +} - return nil +// IsHAMode returns whether this node is in HA mode (ha != nil). +func (s *StateV2) IsHAMode() bool { + return s.ha != nil } -// GetBlockByNumber gets a block from l2node by number. -// Uses geth's eth_getBlockByNumber RPC internally. -func (s *StateV2) GetBlockByNumber(number uint64) (*BlockV2, error) { - return s.l2Node.GetBlockByNumber(number) +// IsHALeader returns whether this node is the current Raft leader. +// Returns false if not in HA mode. +func (s *StateV2) IsHALeader() bool { + return s.ha != nil && s.ha.IsLeader() +} + +// HASubscribe returns the HA block delivery channel. +// Panics if not in HA mode. +func (s *StateV2) HASubscribe() <-chan *BlockV2 { + if s.ha == nil { + panic("HASubscribe called but not in HA mode") + } + return s.ha.Subscribe() } -// IsSequencerMode returns whether this node is started in sequencer mode. -// This means the node has a signer configured and can potentially produce blocks. +// IsSequencerMode returns whether this node has a signer configured. +// Deprecated: use HasSigner() instead. TODO: remove after all callers are updated. func (s *StateV2) IsSequencerMode() bool { - return s.sequencerMode + return s.signer != nil } diff --git a/sequencer/state_v2_test.go b/sequencer/state_v2_test.go index d33f25293fb..b7dcc136bcf 100644 --- a/sequencer/state_v2_test.go +++ b/sequencer/state_v2_test.go @@ -1,7 +1,7 @@ package sequencer import ( - "context" + "errors" "testing" "time" @@ -11,11 +11,14 @@ import ( "github.com/tendermint/tendermint/types" ) -// mockSignerImpl is a mock implementation of Signer for testing +// ============================================================================ +// Mock implementations +// ============================================================================ + +// mockSignerImpl is a mock implementation of Signer for testing. type mockSignerImpl struct { address common.Address signature []byte - isActive bool } func (m *mockSignerImpl) Sign(data []byte) ([]byte, error) { @@ -29,24 +32,60 @@ func (m *mockSignerImpl) Address() common.Address { return m.address } -func (m *mockSignerImpl) IsActiveSequencer(ctx context.Context) (bool, error) { - return m.isActive, nil +// mockSequencerVerifier is a mock implementation of SequencerVerifier for testing. +type mockSequencerVerifier struct { + isSequencer bool + startHeight uint64 + err error +} + +func (m *mockSequencerVerifier) IsSequencerAt(addr common.Address, l2Height uint64) (bool, error) { + if m.err != nil { + return false, m.err + } + return m.isSequencer, nil +} + +func (m *mockSequencerVerifier) VerificationStartHeight() uint64 { + return m.startHeight +} + +// mockSequencerHA is a mock implementation of SequencerHA for testing. +type mockSequencerHA struct { + leader bool + commitErr error + subCh chan *BlockV2 +} + +func newMockSequencerHA(leader bool) *mockSequencerHA { + return &mockSequencerHA{ + leader: leader, + subCh: make(chan *BlockV2, 10), + } } -// newTestMockL2Node creates a mock L2Node for testing +func (m *mockSequencerHA) IsLeader() bool { return m.leader } +func (m *mockSequencerHA) Join() error { return nil } +func (m *mockSequencerHA) Commit(block *BlockV2) error { return m.commitErr } +func (m *mockSequencerHA) Subscribe() <-chan *BlockV2 { return m.subCh } + +// newTestMockL2Node creates a mock L2Node for testing. func newTestMockL2Node() l2node.L2Node { return l2node.NewMockL2Node(0, "") } +// ============================================================================ +// Existing tests (adapted for new signature) +// ============================================================================ + func TestStateV2_NewStateV2(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() - stateV2, err := NewStateV2(mockL2Node, time.Second, logger, nil, nil) + stateV2, err := NewStateV2(mockL2Node, time.Second, logger, &mockSequencerVerifier{}, nil, nil, nil) if err != nil { t.Fatalf("NewStateV2 failed: %v", err) } - if stateV2 == nil { t.Fatal("StateV2 should not be nil") } @@ -56,41 +95,32 @@ func TestStateV2_LatestHeight(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() - stateV2, err := NewStateV2(mockL2Node, time.Second, logger, nil, nil) + stateV2, err := NewStateV2(mockL2Node, time.Second, logger, &mockSequencerVerifier{}, nil, nil, nil) if err != nil { t.Fatalf("NewStateV2 failed: %v", err) } - // Before start, latestBlock should be nil height := stateV2.LatestHeight() if height != 0 { t.Errorf("LatestHeight before start = %d, want 0", height) } } -func TestStateV2_IsSequencerMode(t *testing.T) { +func TestStateV2_HasSigner_MatchesIsSequencerMode(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() + mockVerifier := &mockSequencerVerifier{} - // Without signer, sequencerMode should be false - stateV2, err := NewStateV2(mockL2Node, time.Second, logger, nil, nil) - if err != nil { - t.Fatalf("NewStateV2 failed: %v", err) + // Without signer + s1, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, nil, nil, nil) + if s1.HasSigner() || s1.IsSequencerMode() { + t.Error("should be false when signer is nil") } - if stateV2.IsSequencerMode() { - t.Error("IsSequencerMode should be false when signer is nil") - } - - // With mock signer, sequencerMode should be true - mockSigner := &mockSignerImpl{} - stateV2WithSigner, err := NewStateV2(mockL2Node, time.Second, logger, nil, mockSigner) - if err != nil { - t.Fatalf("NewStateV2 failed: %v", err) - } - - if !stateV2WithSigner.IsSequencerMode() { - t.Error("IsSequencerMode should be true when signer is provided") + // With signer + s2, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, &mockSignerImpl{}, nil, nil) + if !s2.HasSigner() || !s2.IsSequencerMode() { + t.Error("should be true when signer is provided") } } @@ -98,33 +128,22 @@ func TestStateV2_SignBlock(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() - // Create mock signer - mockSigner := &mockSignerImpl{ - signature: make([]byte, 65), // Mock 65-byte signature - } + mockSigner := &mockSignerImpl{signature: make([]byte, 65)} + mockVerifier := &mockSequencerVerifier{} - stateV2, err := NewStateV2(mockL2Node, time.Second, logger, nil, mockSigner) + stateV2, err := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, nil) if err != nil { t.Fatalf("NewStateV2 failed: %v", err) } - // Create a test block block := &types.BlockV2{ Number: 1, Hash: [32]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, } - // Sign the block - err = stateV2.signBlock(block) - if err != nil { + if err := stateV2.signBlock(block); err != nil { t.Fatalf("signBlock failed: %v", err) } - - if len(block.Signature) == 0 { - t.Error("Block signature should not be empty") - } - - // Verify signature length (Ethereum signatures are 65 bytes) if len(block.Signature) != 65 { t.Errorf("Signature length = %d, want 65", len(block.Signature)) } @@ -134,33 +153,254 @@ func TestStateV2_SignBlockWithoutSigner(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() - // Create StateV2 without signer - stateV2, err := NewStateV2(mockL2Node, time.Second, logger, nil, nil) + stateV2, err := NewStateV2(mockL2Node, time.Second, logger, &mockSequencerVerifier{}, nil, nil, nil) if err != nil { t.Fatalf("NewStateV2 failed: %v", err) } - block := &types.BlockV2{ - Number: 1, - Hash: [32]byte{1, 2, 3, 4}, + block := &types.BlockV2{Number: 1, Hash: [32]byte{1, 2, 3, 4}} + if err := stateV2.signBlock(block); err == nil { + t.Error("signBlock should fail without signer") } +} + +// ============================================================================ +// New tests: verifier requirement +// ============================================================================ - // Sign should fail without signer - err = stateV2.signBlock(block) +func TestNewStateV2_VerifierRequired(t *testing.T) { + mockL2Node := newTestMockL2Node() + logger := log.NewNopLogger() + + // verifier==nil should fail + _, err := NewStateV2(mockL2Node, time.Second, logger, nil, nil, nil, nil) if err == nil { - t.Error("signBlock should fail without signer") + t.Fatal("expected error when verifier is nil") + } + + // with signer, still fails without verifier + mockSigner := &mockSignerImpl{} + _, err = NewStateV2(mockL2Node, time.Second, logger, nil, mockSigner, nil, nil) + if err == nil { + t.Fatal("expected error when verifier is nil (with signer)") } } -// Helper to create a test StateV2 -func createTestStateV2(t *testing.T, signer Signer) *StateV2 { +func TestNewStateV2_WithHA(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() + mockSigner := &mockSignerImpl{} + mockVerifier := &mockSequencerVerifier{} + ha := newMockSequencerHA(true) - stateV2, err := NewStateV2(mockL2Node, time.Second, logger, nil, signer) + stateV2, err := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, ha) if err != nil { t.Fatalf("NewStateV2 failed: %v", err) } + if !stateV2.IsHAMode() { + t.Error("IsHAMode should be true when ha is provided") + } +} + +// ============================================================================ +// New tests: node mode helpers +// ============================================================================ + +func TestStateV2_HasSigner(t *testing.T) { + mockL2Node := newTestMockL2Node() + logger := log.NewNopLogger() + + fullnode, _ := NewStateV2(mockL2Node, time.Second, logger, &mockSequencerVerifier{}, nil, nil, nil) + if fullnode.HasSigner() { + t.Error("Fullnode should not have signer") + } + + mockSigner := &mockSignerImpl{} + mockVerifier := &mockSequencerVerifier{} + seqNode, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, nil) + if !seqNode.HasSigner() { + t.Error("Sequencer node should have signer") + } +} + +func TestStateV2_IsHAMode(t *testing.T) { + mockL2Node := newTestMockL2Node() + logger := log.NewNopLogger() + mockSigner := &mockSignerImpl{} + mockVerifier := &mockSequencerVerifier{} + + // Non-HA + nonHA, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, nil) + if nonHA.IsHAMode() { + t.Error("IsHAMode should be false without ha") + } + + // HA + ha, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, newMockSequencerHA(false)) + if !ha.IsHAMode() { + t.Error("IsHAMode should be true with ha") + } +} + +func TestStateV2_IsHALeader(t *testing.T) { + mockL2Node := newTestMockL2Node() + logger := log.NewNopLogger() + mockSigner := &mockSignerImpl{} + mockVerifier := &mockSequencerVerifier{} + + // Non-HA: never leader + nonHA, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, nil) + if nonHA.IsHALeader() { + t.Error("non-HA node should not be HA leader") + } - return stateV2 + // HA follower + follower, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, newMockSequencerHA(false)) + if follower.IsHALeader() { + t.Error("HA follower should not be leader") + } + + // HA leader + leader, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, newMockSequencerHA(true)) + if !leader.IsHALeader() { + t.Error("HA leader should be leader") + } +} + +// ============================================================================ +// New tests: isActiveSequencer +// ============================================================================ + +func TestStateV2_IsActiveSequencer_NonHA_Active(t *testing.T) { + mockL2Node := newTestMockL2Node() + logger := log.NewNopLogger() + mockSigner := &mockSignerImpl{address: common.HexToAddress("0x1")} + mockVerifier := &mockSequencerVerifier{isSequencer: true} + + s, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, nil) + s.latestBlock = &BlockV2{Number: 0} + + if !s.isActiveSequencer() { + t.Error("should be active sequencer when verifier returns true") + } +} + +func TestStateV2_IsActiveSequencer_NonHA_Inactive(t *testing.T) { + mockL2Node := newTestMockL2Node() + logger := log.NewNopLogger() + mockSigner := &mockSignerImpl{address: common.HexToAddress("0x1")} + mockVerifier := &mockSequencerVerifier{isSequencer: false} + + s, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, nil) + s.latestBlock = &BlockV2{Number: 0} + + if s.isActiveSequencer() { + t.Error("should not be active sequencer when verifier returns false") + } +} + +func TestStateV2_IsActiveSequencer_HA_Leader(t *testing.T) { + mockL2Node := newTestMockL2Node() + logger := log.NewNopLogger() + mockSigner := &mockSignerImpl{address: common.HexToAddress("0x1")} + mockVerifier := &mockSequencerVerifier{isSequencer: true} + ha := newMockSequencerHA(true) + + s, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, ha) + s.latestBlock = &BlockV2{Number: 0} + + if !s.isActiveSequencer() { + t.Error("HA leader should be active sequencer") + } +} + +func TestStateV2_IsActiveSequencer_HA_Follower(t *testing.T) { + mockL2Node := newTestMockL2Node() + logger := log.NewNopLogger() + mockSigner := &mockSignerImpl{address: common.HexToAddress("0x1")} + mockVerifier := &mockSequencerVerifier{isSequencer: true} // L1 says active + ha := newMockSequencerHA(false) // but not leader + + s, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, ha) + s.latestBlock = &BlockV2{Number: 0} + + if s.isActiveSequencer() { + t.Error("HA follower should not be active sequencer even if L1 says active") + } +} + +func TestStateV2_IsActiveSequencer_VerifierError(t *testing.T) { + mockL2Node := newTestMockL2Node() + logger := log.NewNopLogger() + mockSigner := &mockSignerImpl{} + mockVerifier := &mockSequencerVerifier{err: errors.New("rpc error")} + + s, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, nil) + s.latestBlock = &BlockV2{Number: 0} + + if s.isActiveSequencer() { + t.Error("should return false when verifier returns error") + } +} + +// ============================================================================ +// New tests: ApplyBlock +// ============================================================================ + +func TestStateV2_ApplyBlock_Idempotent(t *testing.T) { + mockL2Node := newTestMockL2Node() + logger := log.NewNopLogger() + + s, _ := NewStateV2(mockL2Node, time.Second, logger, &mockSequencerVerifier{}, nil, nil, nil) + block := &types.BlockV2{Number: 1} + + // Apply twice should not error + if err := s.ApplyBlock(block); err != nil { + t.Fatalf("first apply failed: %v", err) + } + if err := s.ApplyBlock(block); err != nil { + t.Fatalf("second apply (idempotent) failed: %v", err) + } + if s.LatestHeight() != 1 { + t.Errorf("LatestHeight = %d, want 1", s.LatestHeight()) + } +} + +func TestStateV2_ApplyBlock_OlderBlockSkipped(t *testing.T) { + mockL2Node := newTestMockL2Node() + logger := log.NewNopLogger() + + s, _ := NewStateV2(mockL2Node, time.Second, logger, &mockSequencerVerifier{}, nil, nil, nil) + + block2 := &types.BlockV2{Number: 2} + block1 := &types.BlockV2{Number: 1} + + if err := s.ApplyBlock(block2); err != nil { + t.Fatalf("apply block2 failed: %v", err) + } + // Apply older block should be skipped silently + if err := s.ApplyBlock(block1); err != nil { + t.Fatalf("apply older block should not error: %v", err) + } + // latestBlock should still be 2 + if s.LatestHeight() != 2 { + t.Errorf("LatestHeight = %d, want 2", s.LatestHeight()) + } +} + +func TestStateV2_ApplyBlock_Sequential(t *testing.T) { + mockL2Node := newTestMockL2Node() + logger := log.NewNopLogger() + + s, _ := NewStateV2(mockL2Node, time.Second, logger, &mockSequencerVerifier{}, nil, nil, nil) + + for i := uint64(1); i <= 5; i++ { + block := &types.BlockV2{Number: i} + if err := s.ApplyBlock(block); err != nil { + t.Fatalf("apply block %d failed: %v", i, err) + } + } + if s.LatestHeight() != 5 { + t.Errorf("LatestHeight = %d, want 5", s.LatestHeight()) + } } From 80f79c3d61093994a88e0abfaef832676e57dc16 Mon Sep 17 00:00:00 2001 From: "allen.wu" Date: Fri, 27 Mar 2026 15:41:26 +0800 Subject: [PATCH 02/15] refactor: remove dead code and duplicate proto conversion - Remove SequencerAddress, SetSequencerAddress, IsSequencerAddress, RecoverBlockV2Signer from types/block_v2.go (unused since SequencerVerifier) - Remove duplicate BlockV2ToProto/ProtoToBlockV2 from broadcast_reactor.go, use types.BlockV2ToProto instead - Clean up unused imports (fmt, crypto, math/big, seqproto) Co-Authored-By: Claude Opus 4.6 (1M context) --- sequencer/broadcast_reactor.go | 61 +++------------------------------- types/block_v2.go | 31 ----------------- 2 files changed, 4 insertions(+), 88 deletions(-) diff --git a/sequencer/broadcast_reactor.go b/sequencer/broadcast_reactor.go index b75174fd154..1db8f69b28d 100644 --- a/sequencer/broadcast_reactor.go +++ b/sequencer/broadcast_reactor.go @@ -2,7 +2,6 @@ package sequencer import ( "fmt" - "math/big" "math/rand" "reflect" "sync" @@ -14,7 +13,6 @@ import ( "github.com/tendermint/tendermint/libs/log" "github.com/tendermint/tendermint/p2p" bcproto "github.com/tendermint/tendermint/proto/tendermint/blocksync" - seqproto "github.com/tendermint/tendermint/proto/tendermint/sequencer" "github.com/tendermint/tendermint/types" ) @@ -446,7 +444,7 @@ func (r *BlockBroadcastReactor) findPeerWithHeight(peers []p2p.Peer, height int6 return nil } - start := rand.Intn(n) + start := rand.Intn(n) //nolint:gosec // non-security: peer selection randomization for i := 0; i < n; i++ { peer := peers[(start+i)%n] if r.pool.GetPeerHeight(peer.ID()) >= height && @@ -521,7 +519,7 @@ func (r *BlockBroadcastReactor) hasSentToPeer(peerID p2p.ID, hash common.Hash) b func (r *BlockBroadcastReactor) gossipBlock(block *BlockV2, fromPeer p2p.ID) { r.logger.Info("Gossiping block", "number", block.Number, "hash", block.Hash.Hex(), "fromPeer", fromPeer) msg := &bcproto.BlockResponseV2{ - Block: BlockV2ToProto(block), + Block: types.BlockV2ToProto(block), } bz, err := encodeMsg(msg) if err != nil { @@ -659,7 +657,7 @@ func (r *BlockBroadcastReactor) onBlockRequest(msg *bcproto.BlockRequest, src p2 } resp := &bcproto.BlockResponseV2{ - Block: BlockV2ToProto(block), + Block: types.BlockV2ToProto(block), } bz, err := encodeMsg(resp) if err != nil { @@ -672,7 +670,7 @@ func (r *BlockBroadcastReactor) onBlockRequest(msg *bcproto.BlockRequest, src p2 // broadcast only for sequencer func (r *BlockBroadcastReactor) broadcast(block *BlockV2) { resp := &bcproto.BlockResponseV2{ - Block: BlockV2ToProto(block), + Block: types.BlockV2ToProto(block), } bz, err := encodeMsg(resp) if err != nil { @@ -683,57 +681,6 @@ func (r *BlockBroadcastReactor) broadcast(block *BlockV2) { r.logger.Info("Broadcast block", "number", block.Number, "hash", block.Hash.Hex()) } -// ============================================================================ -// Proto Conversion -// ============================================================================ - -func BlockV2ToProto(block *BlockV2) *seqproto.BlockV2 { - var baseFee []byte - if block.BaseFee != nil { - baseFee = block.BaseFee.Bytes() - } - return &seqproto.BlockV2{ - ParentHash: block.ParentHash.Bytes(), - Miner: block.Miner.Bytes(), - Number: block.Number, - GasLimit: block.GasLimit, - BaseFee: baseFee, - Timestamp: block.Timestamp, - Transactions: block.Transactions, - StateRoot: block.StateRoot.Bytes(), - GasUsed: block.GasUsed, - ReceiptRoot: block.ReceiptRoot.Bytes(), - LogsBloom: block.LogsBloom, - WithdrawTrieRoot: block.WithdrawTrieRoot.Bytes(), - NextL1MessageIndex: block.NextL1MessageIndex, - Hash: block.Hash.Bytes(), - Signature: block.Signature, - } -} - -func ProtoToBlockV2(pb *seqproto.BlockV2) *BlockV2 { - baseFee := new(big.Int) - if len(pb.BaseFee) > 0 { - baseFee.SetBytes(pb.BaseFee) - } - return &BlockV2{ - ParentHash: common.BytesToHash(pb.ParentHash), - Miner: common.BytesToAddress(pb.Miner), - Number: pb.Number, - GasLimit: pb.GasLimit, - BaseFee: baseFee, - Timestamp: pb.Timestamp, - Transactions: pb.Transactions, - StateRoot: common.BytesToHash(pb.StateRoot), - GasUsed: pb.GasUsed, - ReceiptRoot: common.BytesToHash(pb.ReceiptRoot), - LogsBloom: pb.LogsBloom, - WithdrawTrieRoot: common.BytesToHash(pb.WithdrawTrieRoot), - NextL1MessageIndex: pb.NextL1MessageIndex, - Hash: common.BytesToHash(pb.Hash), - Signature: pb.Signature, - } -} // ==================== Message Encoding/Decoding ==================== // Local copies to avoid import cycle with blocksync package diff --git a/types/block_v2.go b/types/block_v2.go index 58e65a8d3d0..ce447a3b3be 100644 --- a/types/block_v2.go +++ b/types/block_v2.go @@ -2,11 +2,9 @@ package types import ( "errors" - "fmt" "math/big" "github.com/morph-l2/go-ethereum/common" - "github.com/morph-l2/go-ethereum/crypto" seqproto "github.com/tendermint/tendermint/proto/tendermint/sequencer" ) @@ -61,35 +59,6 @@ type SyncableBlock interface { // Ensure BlockV2 implements SyncableBlock var _ SyncableBlock = (*BlockV2)(nil) -// SequencerAddress is the expected sequencer address for signature verification. -// This will be set by the sequencer package at init time. -var SequencerAddress common.Address - -// SetSequencerAddress sets the expected sequencer address. -func SetSequencerAddress(addr common.Address) { - SequencerAddress = addr -} - -// IsSequencerAddress checks if the given address is the expected sequencer. -func IsSequencerAddress(addr common.Address) bool { - return addr == SequencerAddress -} - -// RecoverBlockV2Signer recovers the signer address from the block's signature. -func RecoverBlockV2Signer(block *BlockV2) (common.Address, error) { - if len(block.Signature) == 0 { - return common.Address{}, fmt.Errorf("block has no signature") - } - - // Recover the public key from the signature - pubKey, err := crypto.SigToPub(block.Hash.Bytes(), block.Signature) - if err != nil { - return common.Address{}, fmt.Errorf("failed to recover public key: %w", err) - } - - return crypto.PubkeyToAddress(*pubKey), nil -} - // BlockV2FromProto converts a proto BlockV2 to types.BlockV2. func BlockV2FromProto(pb *seqproto.BlockV2) (*BlockV2, error) { if pb == nil { From 624017e0f24854e63545bec0fcfc36c0d433ea93 Mon Sep 17 00:00:00 2001 From: "allen.wu" Date: Thu, 9 Apr 2026 14:01:01 +0800 Subject: [PATCH 03/15] feat: SequencerHA interface + StateV2 dynamic block interval SequencerHA interface (sequencer/interfaces.go): - Start/Stop/IsLeader/Join/Commit/Subscribe/SetOnBlockApplied - Abstracts Raft cluster for StateV2 block production StateV2 changes (sequencer/state_v2.go): - Dynamic block interval: fastTicker (300ms) polls txpool, slowTimer (3s) produces empty blocks as fallback for chain liveness - Split produceBlock into assembleBlock + commitBlock for clean separation of block assembly (read-only) vs signing+committing (side-effecting) - HA path: signBlock -> ha.Commit (Raft replication, FSM handles apply) - Non-HA path: signBlock -> ApplyBlock -> broadcastCh - ApplyBlock: reorg detection + idempotent skip + signature enforcement - Remove blockInterval constructor parameter, use package-level constants node/node.go: - Accept SequencerHA parameter, inject SetOnBlockApplied callback Test fixes: - Add Start/Stop/SetOnBlockApplied to mockSequencerHA - Add Signature field to ApplyBlock test blocks (signature enforcement) - Fix RollbackTo_All assertion (count should be 0, not 1) - Remove blockInterval parameter from all NewStateV2 test calls Co-Authored-By: Claude Opus 4.6 (1M context) --- node/node.go | 18 ++- rpc/test/helpers.go | 5 +- sequencer/block_cache_test.go | 13 +- sequencer/broadcast_reactor.go | 14 +-- sequencer/interfaces.go | 16 +++ sequencer/state_v2.go | 216 ++++++++++++++++++++++++--------- sequencer/state_v2_test.go | 68 ++++++----- test/e2e/node/main.go | 5 +- 8 files changed, 242 insertions(+), 113 deletions(-) diff --git a/node/node.go b/node/node.go index af0f5ac5b27..7d6b664bf1a 100644 --- a/node/node.go +++ b/node/node.go @@ -111,8 +111,9 @@ func DefaultNewNode(config *cfg.Config, logger log.Logger) (*Node, error) { DefaultDBProvider, DefaultMetricsProvider(config.Instrumentation), logger, - nil, - nil, + nil, // sequencerVerifier + nil, // sequencerSigner + nil, // ha: no HA in default node ) } @@ -237,6 +238,8 @@ type Node struct { // Sequencer mode (after upgrade) stateV2 *sequencer.StateV2 blockBroadcastReactor *sequencer.BlockBroadcastReactor + sigStore *sequencer.SignatureStore + ha sequencer.SequencerHA } func initDBs(config *cfg.Config, dbProvider DBProvider) (blockStore *store.BlockStore, stateDB dbm.DB, sigStore *sequencer.SignatureStore, err error) { @@ -506,7 +509,6 @@ func createSequencerComponents( // Create StateV2 stateV2, err := sequencer.NewStateV2( l2Node, - sequencer.DefaultBlockInterval, logger, verifier, signer, @@ -780,6 +782,7 @@ func NewNode( logger log.Logger, sequencerVerifier sequencer.SequencerVerifier, sequencerSigner sequencer.Signer, + ha sequencer.SequencerHA, options ...Option, ) ( *Node, error, @@ -995,6 +998,8 @@ func NewNode( indexerService: indexerService, blockIndexer: blockIndexer, eventBus: eventBus, + sigStore: sigStore, + ha: ha, } node.BaseService = *service.NewBaseService(logger, "Node", node) @@ -1011,11 +1016,16 @@ func NewNode( sequencerVerifier, sequencerSigner, sigStore, - nil, // ha: nil for now, Raft HA will be injected in a future milestone + ha, // HA service injected from NewNode caller; nil disables HA mode ); err != nil { return nil, err } + // Wire HA FSM callback: ApplyBlock handles geth apply + SaveSignature. + if ha != nil { + ha.SetOnBlockApplied(node.stateV2.ApplyBlock) + } + // Set stateV2&verifier&sigStore on blocksync reactor for post-upgrade bcR.SetStateV2(node.stateV2) bcR.SetVerifier(sequencerVerifier) diff --git a/rpc/test/helpers.go b/rpc/test/helpers.go index 0cc19f182e3..04197b75422 100644 --- a/rpc/test/helpers.go +++ b/rpc/test/helpers.go @@ -178,8 +178,9 @@ func NewTendermint(app abci.Application, opts *Options) *nm.Node { nm.DefaultDBProvider, nm.DefaultMetricsProvider(config.Instrumentation), logger, - nil, - nil, + nil, // sequencerVerifier + nil, // sequencerSigner + nil, // ha: no HA in RPC test node ) if err != nil { panic(err) diff --git a/sequencer/block_cache_test.go b/sequencer/block_cache_test.go index b8b63302ec5..d598f60de25 100644 --- a/sequencer/block_cache_test.go +++ b/sequencer/block_cache_test.go @@ -239,17 +239,12 @@ func TestBlockRingBuffer_RollbackTo_All(t *testing.T) { rb.Add(makeBlock(i, byte(i))) } - // Rollback to before first block + // Rollback to before first block — removes all blocks with h > 99 rb.RollbackTo(99) - // All blocks removed, but minHeight stays at 100 - // (rollbackTo only removes > targetHeight) - assert.Equal(t, 1, rb.Count()) // Block 100 remains (not > 99, it's == 100) - - // Actually let's re-read the logic... rollbackTo removes h > targetHeight - // So rollbackTo(99) removes 100, 101, 102, 103, 104 - // Wait, the loop is: for h := rb.maxHeight; h > targetHeight; h-- - // So it removes 104, 103, 102, 101, 100 (all > 99) + // rollbackTo loop: for h := rb.maxHeight; h > targetHeight; h-- + // removes 104, 103, 102, 101, 100 (all > 99) + assert.Equal(t, 0, rb.Count()) } func TestBlockRingBuffer_RollbackTo_All_Correct(t *testing.T) { diff --git a/sequencer/broadcast_reactor.go b/sequencer/broadcast_reactor.go index 1db8f69b28d..1812eae7b83 100644 --- a/sequencer/broadcast_reactor.go +++ b/sequencer/broadcast_reactor.go @@ -267,12 +267,13 @@ func (r *BlockBroadcastReactor) broadcastRoutine() { case <-r.Quit(): return case block := <-source: - // HA Follower: consume channel but do not broadcast. - // When promoted to Leader, the next block automatically starts broadcasting. + // Always populate recentBlocks so the cache is warm if this follower + // is promoted to leader and starts serving block requests from fullnodes. + r.recentBlocks.Add(block) + // HA Follower: do not broadcast, only drain the channel. if r.stateV2.IsHAMode() && !r.stateV2.IsHALeader() { continue } - r.recentBlocks.Add(block) r.broadcast(block) } } @@ -478,16 +479,11 @@ func (r *BlockBroadcastReactor) applyBlock(block *BlockV2) error { return fmt.Errorf("parent mismatch") } - // Update state via stateV2 (unified entry point) + // ApplyBlock handles geth apply + SaveSignature in one call. if err := r.stateV2.ApplyBlock(block); err != nil { return err } - // Persist signature for historical block serving - if err := r.sigStore.SaveSignature(block.Hash, block.Signature); err != nil { - panic(fmt.Sprintf("failed to save signature at height %d: %v", block.Number, err)) - } - // Add to recent blocks r.recentBlocks.Add(block) diff --git a/sequencer/interfaces.go b/sequencer/interfaces.go index 433135a9ece..1c1b5159132 100644 --- a/sequencer/interfaces.go +++ b/sequencer/interfaces.go @@ -34,6 +34,17 @@ type Signer interface { // SequencerHA is the abstraction for Raft HA cluster. // In single-node mode, ha == nil and all HA-related logic is skipped. type SequencerHA interface { + // Start initializes and starts the Raft service. + // leader (bootstrap=true): starts as a single-node cluster, immediately becomes Leader. + // follower (bootstrap=false): initializes Raft, then asynchronously retries Join() in the + // background until it successfully joins the cluster or the service is stopped. + // Called by StateV2.OnStart() when the upgrade height is reached. + Start() error + + // Stop gracefully shuts down the Raft service. + // Called by StateV2.OnStop() when the upgrade height has been passed. + Stop() + // IsLeader returns whether the current node is the Raft leader (sole block producer). IsLeader() bool @@ -52,4 +63,9 @@ type SequencerHA interface { // Subscribe returns a channel that delivers blocks after Raft commit. // Both leader and follower subscribe; used by broadcastRoutine for P2P broadcast. Subscribe() <-chan *BlockV2 + + // SetOnBlockApplied registers the callback invoked by the Raft FSM on every + // committed log entry. The callback should execute ApplyBlock + SaveSignature. + // Must be called before Start(). + SetOnBlockApplied(fn func(*BlockV2) error) } diff --git a/sequencer/state_v2.go b/sequencer/state_v2.go index b18fb81096a..d2e567ccc32 100644 --- a/sequencer/state_v2.go +++ b/sequencer/state_v2.go @@ -11,9 +11,11 @@ import ( ) const ( - // DefaultBlockInterval is the default interval between blocks - // TODO: make this configurable - DefaultBlockInterval = 3000 * time.Millisecond + // BlockInterval is the fallback interval for empty blocks (no txs). + BlockInterval = 3000 * time.Millisecond + // FastBlockInterval is the txpool polling interval. + // When pending txs are found, a block is produced immediately. + FastBlockInterval = 300 * time.Millisecond ) // StateV2 manages the state for centralized sequencer mode. @@ -41,7 +43,8 @@ type StateV2 struct { logger log.Logger // Block production - blockInterval time.Duration + blockInterval time.Duration // empty-block fallback interval (default 3s) + fastBlockInterval time.Duration // txpool polling interval (default 300ms) // Broadcast channel - non-HA self-produced blocks are sent here broadcastCh chan *BlockV2 @@ -55,7 +58,6 @@ type StateV2 struct { // verifier is required when signer is configured (sequencer/HA nodes must verify blocks). func NewStateV2( l2Node l2node.L2Node, - blockInterval time.Duration, logger log.Logger, verifier SequencerVerifier, signer Signer, @@ -65,20 +67,18 @@ func NewStateV2( if verifier == nil { return nil, fmt.Errorf("sequencer verifier is required for V2 mode") } - if blockInterval <= 0 { - blockInterval = DefaultBlockInterval - } s := &StateV2{ - l2Node: l2Node, - signer: signer, - verifier: verifier, - sigStore: sigStore, - ha: ha, - blockInterval: blockInterval, - logger: logger.With("module", "stateV2"), - broadcastCh: make(chan *BlockV2, 100), - quitCh: make(chan struct{}), + l2Node: l2Node, + signer: signer, + verifier: verifier, + sigStore: sigStore, + ha: ha, + blockInterval: BlockInterval, + fastBlockInterval: FastBlockInterval, + logger: logger.With("module", "stateV2"), + broadcastCh: make(chan *BlockV2, 100), + quitCh: make(chan struct{}), } s.BaseService = *service.NewBaseService(logger, "StateV2", s) @@ -105,6 +105,14 @@ func (s *StateV2) OnStart() error { "hasSigner", s.signer != nil, "isHAMode", s.ha != nil) + // Start HA service at upgrade height. This initializes Raft and begins + // leader election (bootstrap) or cluster join (follower). + if s.ha != nil { + if err := s.ha.Start(); err != nil { + return fmt.Errorf("failed to start HA service: %w", err) + } + } + // Fullnode (no signer) does not produce blocks; applyRoutine is managed by the reactor. // Nodes with signer start roleCheckRoutine, which handles dynamic role detection. if s.signer != nil { @@ -118,32 +126,75 @@ func (s *StateV2) OnStart() error { func (s *StateV2) OnStop() { s.logger.Info("Stopping StateV2") close(s.quitCh) + if s.ha != nil { + s.ha.Stop() + } } // roleCheckRoutine is the unified loop for role detection and block production. // It runs for all nodes with a signer (ActiveSequencer, HA-Leader, HA-Follower). -// On each tick it checks isActiveSequencer(): if true, produces a block; otherwise idles. -// This enables bidirectional role transitions without restarting the service. +// +// Two timers drive block production: +// - fastTicker (300ms): polls txpool via assembleBlock; produces immediately when txs found. +// - slowTimer (3s): forces a block (even empty) to maintain chain liveness. +// +// When fastTicker produces a block, slowTimer is reset to avoid redundant empty blocks. func (s *StateV2) roleCheckRoutine() { - ticker := time.NewTicker(s.blockInterval) - defer ticker.Stop() + fastTicker := time.NewTicker(s.fastBlockInterval) + slowTimer := time.NewTimer(s.blockInterval) + defer fastTicker.Stop() + defer slowTimer.Stop() - s.logger.Info("Starting role check routine", "interval", s.blockInterval) + s.logger.Info("Starting role check routine", + "pollInterval", s.fastBlockInterval, + "emptyBlockInterval", s.blockInterval) for { select { case <-s.quitCh: s.logger.Info("Role check routine stopped") return - case <-ticker.C: + + case <-fastTicker.C: if !s.isActiveSequencer() { continue } - s.produceBlock() + block, collectedL1Msgs, err := s.assembleBlock() + if err != nil { + continue + } + if len(block.Transactions) == 0 && !collectedL1Msgs { + continue // empty block, discard and wait for next tick + } + s.commitBlock(block, collectedL1Msgs) + resetTimer(slowTimer, s.blockInterval) + + case <-slowTimer.C: + if s.isActiveSequencer() { + block, collectedL1Msgs, err := s.assembleBlock() + if err == nil { + s.commitBlock(block, collectedL1Msgs) + } + } + slowTimer.Reset(s.blockInterval) } } } +// resetTimer safely stops, drains, and resets a timer. +// t.Stop() returns false when the timer has already fired but t.C has not been +// consumed; the non-blocking drain prevents a stale fire from triggering the +// next select iteration. +func resetTimer(t *time.Timer, d time.Duration) { + if !t.Stop() { + select { + case <-t.C: + default: + } + } + t.Reset(d) +} + // isActiveSequencer returns true if this node should produce the next block. // For HA mode: must be Raft leader AND L1-designated sequencer. // For single-node mode: must be L1-designated sequencer. @@ -169,55 +220,77 @@ func (s *StateV2) isActiveSequencer() bool { return ok } -// produceBlock produces a new block, signs it, and either commits via Raft (HA) -// or applies locally and sends to broadcastCh (single-node). -func (s *StateV2) produceBlock() { +// assembleBlock calls geth Engine API to build a candidate block from the current +// txpool and pending L1 messages. This is a read-only operation with no side +// effects — the result can be safely discarded if the block has no work. +func (s *StateV2) assembleBlock() (*BlockV2, bool, error) { s.mtx.RLock() parentHash := s.latestBlock.Hash s.mtx.RUnlock() - s.logger.Debug("Producing block", "parentHash", parentHash.Hex()) - block, collectedL1Msgs, err := s.l2Node.RequestBlockDataV2(parentHash.Bytes()) if err != nil { - s.logger.Error("Failed to request block data", "error", err) - return + s.logger.Error("Failed to assemble block", "error", err) + return nil, false, err } - _ = collectedL1Msgs + return block, collectedL1Msgs, nil +} +// commitBlock signs the assembled block and either commits via Raft (HA mode) +// or applies locally and sends to broadcastCh (single-node mode). +func (s *StateV2) commitBlock(block *BlockV2, collectedL1Msgs bool) { + t0 := time.Now() + + tSign := time.Now() if err := s.signBlock(block); err != nil { s.logger.Error("Failed to sign block", "error", err) return } + signDur := time.Since(tSign) - if err := s.sigStore.SaveSignature(block.Hash, block.Signature); err != nil { - panic(fmt.Sprintf("failed to save signature at height %d: %v", block.Number, err)) - } - - // HA mode: replicate via Raft consensus (data replication + majority ACK). - // ha.Commit() only ensures majority of cluster nodes have received the block data. - // Leader applies locally after Commit. Followers apply via Raft FSM internally. - // Broadcast to P2P happens via ha.Subscribe() -> broadcastRoutine on all HA nodes. if s.ha != nil { + // HA mode: replicate via Raft. FSM callback handles ApplyBlock + SaveSignature + // for both leader and follower. Broadcast via ha.Subscribe() -> broadcastRoutine. + tCommit := time.Now() if err := s.ha.Commit(block); err != nil { s.logger.Error("Failed to commit block via HA", "number", block.Number, "err", err) return } - s.logger.Debug("Block committed via HA", "number", block.Number, "hash", block.Hash.Hex()) - } - - // Apply locally (HA leader + non-HA both apply here) - if err := s.ApplyBlock(block); err != nil { - s.logger.Error("Failed to apply block", "error", err) - return - } + commitDur := time.Since(tCommit) + totalDur := time.Since(t0) + + s.logger.Debug("[PERF] commitBlock", + "mode", "HA", + "height", block.Number, + "txCount", len(block.Transactions), + "gasUsed", block.GasUsed, + "sign_ms", float64(signDur.Microseconds())/1000.0, + "raft_commit_ms", float64(commitDur.Microseconds())/1000.0, + "total_ms", float64(totalDur.Microseconds())/1000.0, + ) + } else { + // Non-HA mode: apply locally (includes SaveSignature), broadcast via broadcastCh. + tApply := time.Now() + if err := s.ApplyBlock(block); err != nil { + s.logger.Error("Failed to apply block", "error", err) + return + } + applyDur := time.Since(tApply) + totalDur := time.Since(t0) + + s.logger.Debug("[PERF] commitBlock", + "mode", "single", + "height", block.Number, + "txCount", len(block.Transactions), + "gasUsed", block.GasUsed, + "sign_ms", float64(signDur.Microseconds())/1000.0, + "apply_ms", float64(applyDur.Microseconds())/1000.0, + "total_ms", float64(totalDur.Microseconds())/1000.0, + ) - // Non-HA: broadcast via broadcastCh -> broadcastRoutine - // HA: broadcast via ha.Subscribe() -> broadcastRoutine (don't write broadcastCh) - if s.ha == nil { select { case s.broadcastCh <- block: - s.logger.Debug("Block produced and queued for broadcast", + s.logger.Debug("Block queued for broadcast", "number", block.Number, "hash", block.Hash.Hex(), "txCount", len(block.Transactions), @@ -244,19 +317,54 @@ func (s *StateV2) signBlock(block *BlockV2) error { // ApplyBlock applies a block to L2 and updates local state. // Serialized by mutex to prevent concurrent application. -// Idempotent: silently skips blocks already applied. +// +// Reorg detection: when block.Number <= latestBlock.Number, we check if the +// existing block at that height has the same hash. If yes, it's already on-chain +// (idempotent skip). If not, it's a reorg — fall through to ApplyBlockV2 and +// let geth handle the chain reorganization. func (s *StateV2) ApplyBlock(block *BlockV2) error { s.mtx.Lock() defer s.mtx.Unlock() if s.latestBlock != nil && block.Number <= s.latestBlock.Number { - return nil // idempotent: already applied or older block + existing, err := s.l2Node.GetBlockByNumber(block.Number) + if err != nil { + return fmt.Errorf("ApplyBlock: GetBlockByNumber(%d): %w", block.Number, err) + } + if existing != nil && existing.Hash == block.Hash { + s.logger.Debug("ApplyBlock: idempotent skip", "height", block.Number) + return nil // already on-chain, idempotent skip + } + // Hash mismatch → reorg, fall through to ApplyBlockV2 + } + + if len(block.Signature) == 0 { + return fmt.Errorf("ApplyBlock: block %d missing signature", block.Number) } + tGeth := time.Now() if err := s.l2Node.ApplyBlockV2(block); err != nil { return err } + gethDur := time.Since(tGeth) + s.latestBlock = block + + tSig := time.Now() + if err := s.sigStore.SaveSignature(block.Hash, block.Signature); err != nil { + return err + } + sigDur := time.Since(tSig) + + s.logger.Debug("[PERF] ApplyBlock", + "height", block.Number, + "txCount", len(block.Transactions), + "gasUsed", block.GasUsed, + "geth_ms", float64(gethDur.Microseconds())/1000.0, + "sigSave_ms", float64(sigDur.Microseconds())/1000.0, + "total_ms", float64((gethDur+sigDur).Microseconds())/1000.0, + ) + return nil } diff --git a/sequencer/state_v2_test.go b/sequencer/state_v2_test.go index b7dcc136bcf..95eb5a374a8 100644 --- a/sequencer/state_v2_test.go +++ b/sequencer/state_v2_test.go @@ -3,7 +3,6 @@ package sequencer import ( "errors" "testing" - "time" "github.com/morph-l2/go-ethereum/common" "github.com/tendermint/tendermint/l2node" @@ -64,10 +63,13 @@ func newMockSequencerHA(leader bool) *mockSequencerHA { } } -func (m *mockSequencerHA) IsLeader() bool { return m.leader } -func (m *mockSequencerHA) Join() error { return nil } -func (m *mockSequencerHA) Commit(block *BlockV2) error { return m.commitErr } -func (m *mockSequencerHA) Subscribe() <-chan *BlockV2 { return m.subCh } +func (m *mockSequencerHA) Start() error { return nil } +func (m *mockSequencerHA) Stop() {} +func (m *mockSequencerHA) IsLeader() bool { return m.leader } +func (m *mockSequencerHA) Join() error { return nil } +func (m *mockSequencerHA) Commit(block *BlockV2) error { return m.commitErr } +func (m *mockSequencerHA) Subscribe() <-chan *BlockV2 { return m.subCh } +func (m *mockSequencerHA) SetOnBlockApplied(fn func(*BlockV2) error) {} // newTestMockL2Node creates a mock L2Node for testing. func newTestMockL2Node() l2node.L2Node { @@ -82,7 +84,7 @@ func TestStateV2_NewStateV2(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() - stateV2, err := NewStateV2(mockL2Node, time.Second, logger, &mockSequencerVerifier{}, nil, nil, nil) + stateV2, err := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil) if err != nil { t.Fatalf("NewStateV2 failed: %v", err) } @@ -95,7 +97,7 @@ func TestStateV2_LatestHeight(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() - stateV2, err := NewStateV2(mockL2Node, time.Second, logger, &mockSequencerVerifier{}, nil, nil, nil) + stateV2, err := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil) if err != nil { t.Fatalf("NewStateV2 failed: %v", err) } @@ -112,13 +114,13 @@ func TestStateV2_HasSigner_MatchesIsSequencerMode(t *testing.T) { mockVerifier := &mockSequencerVerifier{} // Without signer - s1, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, nil, nil, nil) + s1, _ := NewStateV2(mockL2Node, logger, mockVerifier, nil, nil, nil) if s1.HasSigner() || s1.IsSequencerMode() { t.Error("should be false when signer is nil") } // With signer - s2, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, &mockSignerImpl{}, nil, nil) + s2, _ := NewStateV2(mockL2Node, logger, mockVerifier, &mockSignerImpl{}, nil, nil) if !s2.HasSigner() || !s2.IsSequencerMode() { t.Error("should be true when signer is provided") } @@ -131,7 +133,7 @@ func TestStateV2_SignBlock(t *testing.T) { mockSigner := &mockSignerImpl{signature: make([]byte, 65)} mockVerifier := &mockSequencerVerifier{} - stateV2, err := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, nil) + stateV2, err := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, nil) if err != nil { t.Fatalf("NewStateV2 failed: %v", err) } @@ -153,7 +155,7 @@ func TestStateV2_SignBlockWithoutSigner(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() - stateV2, err := NewStateV2(mockL2Node, time.Second, logger, &mockSequencerVerifier{}, nil, nil, nil) + stateV2, err := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil) if err != nil { t.Fatalf("NewStateV2 failed: %v", err) } @@ -173,14 +175,14 @@ func TestNewStateV2_VerifierRequired(t *testing.T) { logger := log.NewNopLogger() // verifier==nil should fail - _, err := NewStateV2(mockL2Node, time.Second, logger, nil, nil, nil, nil) + _, err := NewStateV2(mockL2Node, logger, nil, nil, nil, nil) if err == nil { t.Fatal("expected error when verifier is nil") } // with signer, still fails without verifier mockSigner := &mockSignerImpl{} - _, err = NewStateV2(mockL2Node, time.Second, logger, nil, mockSigner, nil, nil) + _, err = NewStateV2(mockL2Node, logger, nil, mockSigner, nil, nil) if err == nil { t.Fatal("expected error when verifier is nil (with signer)") } @@ -193,7 +195,7 @@ func TestNewStateV2_WithHA(t *testing.T) { mockVerifier := &mockSequencerVerifier{} ha := newMockSequencerHA(true) - stateV2, err := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, ha) + stateV2, err := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, ha) if err != nil { t.Fatalf("NewStateV2 failed: %v", err) } @@ -210,14 +212,14 @@ func TestStateV2_HasSigner(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() - fullnode, _ := NewStateV2(mockL2Node, time.Second, logger, &mockSequencerVerifier{}, nil, nil, nil) + fullnode, _ := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil) if fullnode.HasSigner() { t.Error("Fullnode should not have signer") } mockSigner := &mockSignerImpl{} mockVerifier := &mockSequencerVerifier{} - seqNode, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, nil) + seqNode, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, nil) if !seqNode.HasSigner() { t.Error("Sequencer node should have signer") } @@ -230,13 +232,13 @@ func TestStateV2_IsHAMode(t *testing.T) { mockVerifier := &mockSequencerVerifier{} // Non-HA - nonHA, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, nil) + nonHA, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, nil) if nonHA.IsHAMode() { t.Error("IsHAMode should be false without ha") } // HA - ha, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, newMockSequencerHA(false)) + ha, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, newMockSequencerHA(false)) if !ha.IsHAMode() { t.Error("IsHAMode should be true with ha") } @@ -249,19 +251,19 @@ func TestStateV2_IsHALeader(t *testing.T) { mockVerifier := &mockSequencerVerifier{} // Non-HA: never leader - nonHA, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, nil) + nonHA, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, nil) if nonHA.IsHALeader() { t.Error("non-HA node should not be HA leader") } // HA follower - follower, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, newMockSequencerHA(false)) + follower, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, newMockSequencerHA(false)) if follower.IsHALeader() { t.Error("HA follower should not be leader") } // HA leader - leader, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, newMockSequencerHA(true)) + leader, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, newMockSequencerHA(true)) if !leader.IsHALeader() { t.Error("HA leader should be leader") } @@ -277,7 +279,7 @@ func TestStateV2_IsActiveSequencer_NonHA_Active(t *testing.T) { mockSigner := &mockSignerImpl{address: common.HexToAddress("0x1")} mockVerifier := &mockSequencerVerifier{isSequencer: true} - s, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, nil) + s, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, nil) s.latestBlock = &BlockV2{Number: 0} if !s.isActiveSequencer() { @@ -291,7 +293,7 @@ func TestStateV2_IsActiveSequencer_NonHA_Inactive(t *testing.T) { mockSigner := &mockSignerImpl{address: common.HexToAddress("0x1")} mockVerifier := &mockSequencerVerifier{isSequencer: false} - s, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, nil) + s, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, nil) s.latestBlock = &BlockV2{Number: 0} if s.isActiveSequencer() { @@ -306,7 +308,7 @@ func TestStateV2_IsActiveSequencer_HA_Leader(t *testing.T) { mockVerifier := &mockSequencerVerifier{isSequencer: true} ha := newMockSequencerHA(true) - s, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, ha) + s, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, ha) s.latestBlock = &BlockV2{Number: 0} if !s.isActiveSequencer() { @@ -321,7 +323,7 @@ func TestStateV2_IsActiveSequencer_HA_Follower(t *testing.T) { mockVerifier := &mockSequencerVerifier{isSequencer: true} // L1 says active ha := newMockSequencerHA(false) // but not leader - s, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, ha) + s, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, ha) s.latestBlock = &BlockV2{Number: 0} if s.isActiveSequencer() { @@ -335,7 +337,7 @@ func TestStateV2_IsActiveSequencer_VerifierError(t *testing.T) { mockSigner := &mockSignerImpl{} mockVerifier := &mockSequencerVerifier{err: errors.New("rpc error")} - s, _ := NewStateV2(mockL2Node, time.Second, logger, mockVerifier, mockSigner, nil, nil) + s, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, nil) s.latestBlock = &BlockV2{Number: 0} if s.isActiveSequencer() { @@ -351,8 +353,8 @@ func TestStateV2_ApplyBlock_Idempotent(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() - s, _ := NewStateV2(mockL2Node, time.Second, logger, &mockSequencerVerifier{}, nil, nil, nil) - block := &types.BlockV2{Number: 1} + s, _ := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil) + block := &types.BlockV2{Number: 1, Signature: []byte{0x01, 0x02, 0x03}} // Apply twice should not error if err := s.ApplyBlock(block); err != nil { @@ -370,10 +372,10 @@ func TestStateV2_ApplyBlock_OlderBlockSkipped(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() - s, _ := NewStateV2(mockL2Node, time.Second, logger, &mockSequencerVerifier{}, nil, nil, nil) + s, _ := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil) - block2 := &types.BlockV2{Number: 2} - block1 := &types.BlockV2{Number: 1} + block2 := &types.BlockV2{Number: 2, Signature: []byte{0x01, 0x02, 0x03}} + block1 := &types.BlockV2{Number: 1, Signature: []byte{0x01, 0x02, 0x03}} if err := s.ApplyBlock(block2); err != nil { t.Fatalf("apply block2 failed: %v", err) @@ -392,10 +394,10 @@ func TestStateV2_ApplyBlock_Sequential(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() - s, _ := NewStateV2(mockL2Node, time.Second, logger, &mockSequencerVerifier{}, nil, nil, nil) + s, _ := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil) for i := uint64(1); i <= 5; i++ { - block := &types.BlockV2{Number: i} + block := &types.BlockV2{Number: i, Signature: []byte{0x01, 0x02, 0x03}} if err := s.ApplyBlock(block); err != nil { t.Fatalf("apply block %d failed: %v", i, err) } diff --git a/test/e2e/node/main.go b/test/e2e/node/main.go index 84ba5435c7b..c81b546c94d 100644 --- a/test/e2e/node/main.go +++ b/test/e2e/node/main.go @@ -133,8 +133,9 @@ func startNode(cfg *Config) error { node.DefaultDBProvider, node.DefaultMetricsProvider(tmcfg.Instrumentation), nodeLogger, - nil, - nil, + nil, // sequencerVerifier + nil, // sequencerSigner + nil, // ha: no HA in e2e test node ) if err != nil { return err From d80474a8b79f743e8e506101f936c2f83eda3f2d Mon Sep 17 00:00:00 2001 From: "allen.wu" Date: Thu, 16 Apr 2026 15:10:46 +0800 Subject: [PATCH 04/15] feat: simplify StateV2.ApplyBlock + ApplyBlockV2 returns (applied, error) Reorg detection and idempotent checks are now handled in the Executor layer (morph node), not in StateV2: - L2Node.ApplyBlockV2 interface returns (applied bool, err error) so callers can distinguish real applies from idempotent skips - StateV2.ApplyBlock only updates latestBlock when applied=true, preventing state regression during HA Raft log replay - Remove reorg detection logic and common.Hash import from StateV2 (reorg log now emitted by Executor.ApplyBlockV2) - MockL2Node tracks maxAppliedHeight for idempotent behavior in tests - Remove mock reorg code (startMockReorgRoutine, triggerMockReorg) Co-Authored-By: Claude Opus 4.6 (1M context) --- l2node/l2node.go | 5 +++-- l2node/mock.go | 10 +++++++--- sequencer/state_v2.go | 46 ++++++++++++++++++------------------------- 3 files changed, 29 insertions(+), 32 deletions(-) diff --git a/l2node/l2node.go b/l2node/l2node.go index 1e1bc21365b..ba0300db22d 100644 --- a/l2node/l2node.go +++ b/l2node/l2node.go @@ -57,8 +57,9 @@ type L2Node interface { RequestBlockDataV2(parentHash []byte) (block *BlockV2, collectedL1Msgs bool, err error) // ApplyBlockV2 applies a BlockV2 to the L2 execution layer. - // Uses Engine API (NewL2Block) internally. - ApplyBlockV2(block *BlockV2) error + // Returns (applied, err): applied=true if geth state changed, + // applied=false if idempotent skip (block already on-chain with same hash). + ApplyBlockV2(block *BlockV2) (applied bool, err error) // GetBlockByNumber retrieves a BlockV2 by its number. // Can be implemented using geth's eth_getBlockByNumber JSON-RPC. diff --git a/l2node/mock.go b/l2node/mock.go index b5cbf3354d0..9b7a54d5e02 100644 --- a/l2node/mock.go +++ b/l2node/mock.go @@ -15,6 +15,7 @@ var _ L2Node = &MockL2Node{} type MockL2Node struct { TxNumber int ValidatorSetFile string // used to control the validator set updates + maxAppliedHeight uint64 // tracks highest block applied (for V2 idempotent check) } func NewMockL2Node(n int, validatorSetFile string) L2Node { @@ -139,9 +140,12 @@ func (l *MockL2Node) RequestBlockDataV2(parentHash []byte) (*BlockV2, bool, erro }, false, nil } -func (l *MockL2Node) ApplyBlockV2(block *BlockV2) error { - // Mock implementation: do nothing - return nil +func (l *MockL2Node) ApplyBlockV2(block *BlockV2) (bool, error) { + if block.Number <= l.maxAppliedHeight { + return false, nil // idempotent skip + } + l.maxAppliedHeight = block.Number + return true, nil } func (l *MockL2Node) GetBlockByNumber(height uint64) (*BlockV2, error) { diff --git a/sequencer/state_v2.go b/sequencer/state_v2.go index d2e567ccc32..35a8d77cddb 100644 --- a/sequencer/state_v2.go +++ b/sequencer/state_v2.go @@ -131,6 +131,7 @@ func (s *StateV2) OnStop() { } } + // roleCheckRoutine is the unified loop for role detection and block production. // It runs for all nodes with a signer (ActiveSequencer, HA-Leader, HA-Follower). // @@ -315,53 +316,44 @@ func (s *StateV2) signBlock(block *BlockV2) error { return nil } -// ApplyBlock applies a block to L2 and updates local state. -// Serialized by mutex to prevent concurrent application. -// -// Reorg detection: when block.Number <= latestBlock.Number, we check if the -// existing block at that height has the same hash. If yes, it's already on-chain -// (idempotent skip). If not, it's a reorg — fall through to ApplyBlockV2 and -// let geth handle the chain reorganization. +// ApplyBlock saves the block signature and delegates to l2Node.ApplyBlockV2. +// Reorg detection and idempotent checks are handled in the Executor layer. func (s *StateV2) ApplyBlock(block *BlockV2) error { s.mtx.Lock() defer s.mtx.Unlock() - if s.latestBlock != nil && block.Number <= s.latestBlock.Number { - existing, err := s.l2Node.GetBlockByNumber(block.Number) - if err != nil { - return fmt.Errorf("ApplyBlock: GetBlockByNumber(%d): %w", block.Number, err) - } - if existing != nil && existing.Hash == block.Hash { - s.logger.Debug("ApplyBlock: idempotent skip", "height", block.Number) - return nil // already on-chain, idempotent skip - } - // Hash mismatch → reorg, fall through to ApplyBlockV2 - } - if len(block.Signature) == 0 { return fmt.Errorf("ApplyBlock: block %d missing signature", block.Number) } + // Save signature BEFORE applying to geth. If crash happens after Apply + // but before SaveSignature, the block is on-chain but its signature is + // lost — which can cause P2P peers to reject/disconnect when they cannot + // verify the block. Saving first at worst leaves an orphan signature if + // Apply fails, which is harmless. + tSig := time.Now() + if err := s.sigStore.SaveSignature(block.Hash, block.Signature); err != nil { + return err + } + sigDur := time.Since(tSig) + tGeth := time.Now() - if err := s.l2Node.ApplyBlockV2(block); err != nil { + applied, err := s.l2Node.ApplyBlockV2(block) + if err != nil { return err } gethDur := time.Since(tGeth) - s.latestBlock = block - - tSig := time.Now() - if err := s.sigStore.SaveSignature(block.Hash, block.Signature); err != nil { - return err + if applied { + s.latestBlock = block } - sigDur := time.Since(tSig) s.logger.Debug("[PERF] ApplyBlock", "height", block.Number, "txCount", len(block.Transactions), "gasUsed", block.GasUsed, - "geth_ms", float64(gethDur.Microseconds())/1000.0, "sigSave_ms", float64(sigDur.Microseconds())/1000.0, + "geth_ms", float64(gethDur.Microseconds())/1000.0, "total_ms", float64((gethDur+sigDur).Microseconds())/1000.0, ) From 782bb44f83f5b5cef1c03272166495c865461e11 Mon Sep 17 00:00:00 2001 From: "allen.wu" Date: Fri, 17 Apr 2026 16:40:54 +0800 Subject: [PATCH 05/15] feat: recompute block hash before signature verification Bind the sequencer signature to the block body by rehashing BlockV2 fields in VerifyBlockSignature and rejecting on mismatch. Companion changes in go-ethereum and morph/node. Made-with: Cursor --- sequencer/block_verify.go | 71 +++++++++++- sequencer/block_verify_test.go | 193 +++++++++++++++++++++++++++++++++ 2 files changed, 263 insertions(+), 1 deletion(-) create mode 100644 sequencer/block_verify_test.go diff --git a/sequencer/block_verify.go b/sequencer/block_verify.go index d0b693c4add..81a9ced510b 100644 --- a/sequencer/block_verify.go +++ b/sequencer/block_verify.go @@ -2,12 +2,72 @@ package sequencer import ( "fmt" + "math/big" + "github.com/morph-l2/go-ethereum/common" + "github.com/morph-l2/go-ethereum/core/types" "github.com/morph-l2/go-ethereum/crypto" + "github.com/morph-l2/go-ethereum/trie" ) +// computeBlockHash recomputes a BlockV2's block hash from its content fields. +// +// This MUST stay in lockstep with geth's catalyst.executableDataToBlock so that +// honest blocks always pass: the sequencer signs newBlockResult.Block.Hash(), +// which is computed from the same canonical header layout used here. +// +// Notes on field alignment: +// - Difficulty=0, Nonce=0, UncleHash=EmptyUncleHash, Extra=[] are L2 consensus +// constants (consensus/l2 sets them in Prepare and executableDataToBlock). +// - NextL1MsgIndex and BatchHash live in the Header struct but are NOT included +// in Header.Hash() (see go-ethereum/core/types/block.go headerHashing layout), +// so we deliberately do not need to set them for hash computation. +// - Coinbase is taken directly from block.Miner. Honest sequencers always send +// Miner = newBlockResult.Block.Coinbase(), which is empty for V2 blocks +// (BuildBlock does not set coinbase, and the L2 engine's Prepare overrides +// to EmptyAddress when FeeVault is enabled). We do not re-apply the FeeVault +// override here: it is unnecessary in the honest case, and rejecting any +// mismatch is the correct behavior in the attacker case. +func computeBlockHash(block *BlockV2) (common.Hash, error) { + txs := make(types.Transactions, len(block.Transactions)) + for i, raw := range block.Transactions { + var tx types.Transaction + if err := tx.UnmarshalBinary(raw); err != nil { + return common.Hash{}, fmt.Errorf("decode tx %d: %w", i, err) + } + txs[i] = &tx + } + + header := &types.Header{ + ParentHash: block.ParentHash, + UncleHash: types.EmptyUncleHash, + Coinbase: block.Miner, + Root: block.StateRoot, + TxHash: types.DeriveSha(txs, trie.NewStackTrie(nil)), + ReceiptHash: block.ReceiptRoot, + Bloom: types.BytesToBloom(block.LogsBloom), + Difficulty: new(big.Int), + Number: new(big.Int).SetUint64(block.Number), + GasLimit: block.GasLimit, + GasUsed: block.GasUsed, + Time: block.Timestamp, + Extra: []byte{}, + BaseFee: block.BaseFee, + } + return header.Hash(), nil +} + // VerifyBlockSignature verifies a block's ECDSA signature against the // expected sequencer at that block's height. All V2 blocks must carry a valid signature. +// +// Hash binding: before recovering the signer, this function recomputes the +// block hash from the canonical content fields and rejects the block if it does +// not match block.Hash. Without this binding, an attacker could pair a valid +// (Hash, Signature) recovered from a legitimately signed block with a tampered +// body, and other peers would accept it - causing a chain fork at the next +// height. The geth-side check in NewL2BlockV2 is the safety net; this check +// rejects tampered blocks early so we never persist orphan signatures into +// SignatureStore. func VerifyBlockSignature(verifier SequencerVerifier, block *BlockV2) error { if verifier == nil { return fmt.Errorf("%w: verifier not configured", ErrInvalidSignature) @@ -17,7 +77,16 @@ func VerifyBlockSignature(verifier SequencerVerifier, block *BlockV2) error { return fmt.Errorf("%w: missing signature at height %d", ErrInvalidSignature, block.Number) } - pubKey, err := crypto.SigToPub(block.Hash.Bytes(), block.Signature) + computedHash, err := computeBlockHash(block) + if err != nil { + return fmt.Errorf("%w: compute hash at height %d: %v", ErrInvalidSignature, block.Number, err) + } + if block.Hash != computedHash { + return fmt.Errorf("%w: hash mismatch at height %d: declared %s, computed %s", + ErrInvalidSignature, block.Number, block.Hash.Hex(), computedHash.Hex()) + } + + pubKey, err := crypto.SigToPub(computedHash.Bytes(), block.Signature) if err != nil { return fmt.Errorf("%w: recover pubkey at height %d: %v", ErrInvalidSignature, block.Number, err) } diff --git a/sequencer/block_verify_test.go b/sequencer/block_verify_test.go new file mode 100644 index 00000000000..d39b1b307e3 --- /dev/null +++ b/sequencer/block_verify_test.go @@ -0,0 +1,193 @@ +package sequencer + +import ( + "errors" + "math/big" + "testing" + + "github.com/morph-l2/go-ethereum/common" + "github.com/morph-l2/go-ethereum/core/types" + "github.com/morph-l2/go-ethereum/crypto" + + tmtypes "github.com/tendermint/tendermint/types" +) + +// makeTestBlock builds a BlockV2 whose Hash is computed via computeBlockHash and +// whose Signature is produced by the given private key. The block is what an +// honest sequencer would produce. +func makeTestBlock(t *testing.T, priv []byte) (*tmtypes.BlockV2, common.Address) { + t.Helper() + + key, err := crypto.ToECDSA(priv) + if err != nil { + t.Fatalf("ToECDSA: %v", err) + } + signer := crypto.PubkeyToAddress(key.PublicKey) + + tx := types.NewTransaction(0, common.HexToAddress("0xdead"), big.NewInt(1), 21000, big.NewInt(1), nil) + rawTx, err := tx.MarshalBinary() + if err != nil { + t.Fatalf("MarshalBinary: %v", err) + } + + block := &tmtypes.BlockV2{ + ParentHash: common.HexToHash("0xabc"), + Miner: common.Address{}, + Number: 42, + GasLimit: 30_000_000, + BaseFee: big.NewInt(1_000_000_000), + Timestamp: 1_700_000_000, + Transactions: [][]byte{rawTx}, + StateRoot: common.HexToHash("0xdef"), + GasUsed: 21_000, + ReceiptRoot: common.HexToHash("0x123"), + LogsBloom: make([]byte, 256), + WithdrawTrieRoot: common.HexToHash("0x456"), + NextL1MessageIndex: 7, + } + + h, err := computeBlockHash(block) + if err != nil { + t.Fatalf("computeBlockHash: %v", err) + } + block.Hash = h + + sig, err := crypto.Sign(h.Bytes(), key) + if err != nil { + t.Fatalf("Sign: %v", err) + } + block.Signature = sig + + return block, signer +} + +// testKey is a deterministic private key for tests; do NOT use elsewhere. +var testKey = []byte{ + 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, + 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x00, + 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, + 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x00, +} + +func TestVerifyBlockSignature_Honest(t *testing.T) { + block, _ := makeTestBlock(t, testKey) + verifier := &mockSequencerVerifier{isSequencer: true} + + if err := VerifyBlockSignature(verifier, block); err != nil { + t.Fatalf("expected honest block to verify, got: %v", err) + } +} + +func TestVerifyBlockSignature_TamperedMiner(t *testing.T) { + block, _ := makeTestBlock(t, testKey) + verifier := &mockSequencerVerifier{isSequencer: true} + + // Attacker keeps the original Hash + Signature but forges the Miner field. + block.Miner = common.HexToAddress("0xbadbadbadbadbadbadbadbadbadbadbadbadbad0") + + err := VerifyBlockSignature(verifier, block) + if err == nil { + t.Fatal("expected error for tampered Miner, got nil") + } + if !errors.Is(err, ErrInvalidSignature) { + t.Errorf("expected ErrInvalidSignature, got: %v", err) + } +} + +func TestVerifyBlockSignature_TamperedStateRoot(t *testing.T) { + block, _ := makeTestBlock(t, testKey) + verifier := &mockSequencerVerifier{isSequencer: true} + + block.StateRoot = common.HexToHash("0xdeadbeef") + + err := VerifyBlockSignature(verifier, block) + if err == nil { + t.Fatal("expected error for tampered StateRoot, got nil") + } + if !errors.Is(err, ErrInvalidSignature) { + t.Errorf("expected ErrInvalidSignature, got: %v", err) + } +} + +func TestVerifyBlockSignature_TamperedTransactions(t *testing.T) { + block, _ := makeTestBlock(t, testKey) + verifier := &mockSequencerVerifier{isSequencer: true} + + tx := types.NewTransaction(99, common.HexToAddress("0xfeed"), big.NewInt(999), 21000, big.NewInt(1), nil) + raw, err := tx.MarshalBinary() + if err != nil { + t.Fatalf("MarshalBinary: %v", err) + } + block.Transactions = append(block.Transactions, raw) + + if verr := VerifyBlockSignature(verifier, block); verr == nil { + t.Fatal("expected error for appended transaction, got nil") + } +} + +func TestVerifyBlockSignature_NotSequencer(t *testing.T) { + block, _ := makeTestBlock(t, testKey) + verifier := &mockSequencerVerifier{isSequencer: false} + + err := VerifyBlockSignature(verifier, block) + if err == nil { + t.Fatal("expected error when signer is not the active sequencer, got nil") + } + if !errors.Is(err, ErrInvalidSignature) { + t.Errorf("expected ErrInvalidSignature, got: %v", err) + } +} + +func TestVerifyBlockSignature_MissingSignature(t *testing.T) { + block, _ := makeTestBlock(t, testKey) + block.Signature = nil + verifier := &mockSequencerVerifier{isSequencer: true} + + err := VerifyBlockSignature(verifier, block) + if err == nil { + t.Fatal("expected error for missing signature, got nil") + } + if !errors.Is(err, ErrInvalidSignature) { + t.Errorf("expected ErrInvalidSignature, got: %v", err) + } +} + +func TestVerifyBlockSignature_NilVerifier(t *testing.T) { + block, _ := makeTestBlock(t, testKey) + + err := VerifyBlockSignature(nil, block) + if err == nil { + t.Fatal("expected error for nil verifier, got nil") + } + if !errors.Is(err, ErrInvalidSignature) { + t.Errorf("expected ErrInvalidSignature, got: %v", err) + } +} + +func TestComputeBlockHash_DeterministicAndDistinct(t *testing.T) { + b1, _ := makeTestBlock(t, testKey) + h1, err := computeBlockHash(b1) + if err != nil { + t.Fatalf("computeBlockHash: %v", err) + } + + // Same content produces same hash. + h1again, err := computeBlockHash(b1) + if err != nil { + t.Fatalf("computeBlockHash: %v", err) + } + if h1 != h1again { + t.Fatalf("computeBlockHash not deterministic: %s vs %s", h1.Hex(), h1again.Hex()) + } + + // Different content produces different hash. + b2 := *b1 + b2.GasUsed = b1.GasUsed + 1 + h2, err := computeBlockHash(&b2) + if err != nil { + t.Fatalf("computeBlockHash: %v", err) + } + if h1 == h2 { + t.Fatal("computeBlockHash should differ when GasUsed changes") + } +} From cb29512d7041143d3f68b4ac20728f161c622bc5 Mon Sep 17 00:00:00 2001 From: "allen.wu" Date: Wed, 29 Apr 2026 18:20:47 +0800 Subject: [PATCH 06/15] feat: P2P broadcast reactor security hardening and performance optimization - Ban mechanism: in-memory bannedPeers map (10-min TTL) + isBanned check in AddPeer - Rate limiter: per-peer token bucket (50 req/s, burst 250) protecting L2 geth RPC - Height validation: reject blocks outside [localHeight-MaxPendingHeightBehind, localHeight+MaxPendingHeightAhead] - Timeout-ban: removeTimeoutPeers bans peers that declare height but never respond (20s TTL) - Error classification: ErrInvalidSignature (ban) vs ErrVerifierUnavailable (local problem, no ban) - Startup gate: routinesStarted atomic.Bool replaces waitSync; Receive() silently drops until routines start - OnBlockRequest sanity check: reject Height<=0 or Height>localHeight without touching cache/RPC - Decode failures in broadcast/sync handlers now trigger banPeer Co-Authored-By: Claude Opus 4.6 --- consensus/reactor.go | 2 +- node/node.go | 7 +- sequencer/block_verify.go | 12 +- sequencer/block_verify_test.go | 28 ++- sequencer/broadcast_reactor.go | 298 +++++++++++++++++++++------- sequencer/broadcast_reactor_test.go | 211 ++++++++++++++++++++ sequencer/interfaces.go | 11 +- sequencer/rate_limiter.go | 94 +++++++++ sequencer/rate_limiter_test.go | 147 ++++++++++++++ 9 files changed, 724 insertions(+), 86 deletions(-) create mode 100644 sequencer/broadcast_reactor_test.go create mode 100644 sequencer/rate_limiter.go create mode 100644 sequencer/rate_limiter_test.go diff --git a/consensus/reactor.go b/consensus/reactor.go index 51769d2f872..18a3170728a 100644 --- a/consensus/reactor.go +++ b/consensus/reactor.go @@ -128,7 +128,7 @@ func (conR *Reactor) SwitchToConsensus(state sm.State, skipWAL bool) { conR.Logger.Info("SwitchToConsensus") // We have no votes, so reconstruct LastCommit from SeenCommit. - if state.LastBlockHeight > 0 { + if state.LastBlockHeight > 0 && !upgrade.IsUpgraded(state.LastBlockHeight+1) { conR.conS.reconstructLastCommit(state) } diff --git a/node/node.go b/node/node.go index 7d6b664bf1a..7fe90dd22fe 100644 --- a/node/node.go +++ b/node/node.go @@ -499,7 +499,6 @@ func createConsensusReactor( func createSequencerComponents( l2Node l2node.L2Node, pool *bc.BlockPool, - waitSync bool, logger log.Logger, verifier sequencer.SequencerVerifier, signer sequencer.Signer, @@ -519,11 +518,12 @@ func createSequencerComponents( return nil, nil, fmt.Errorf("failed to create StateV2: %w", err) } - // Create BlockBroadcastReactor (not started yet) + // Create BlockBroadcastReactor (not started yet). + // Routines are started later via StartSequencerRoutines() — see OnStart() + // doc comment for the full explanation. broadcastReactor := sequencer.NewBlockBroadcastReactor( pool, stateV2, - waitSync, logger, verifier, sigStore, @@ -1011,7 +1011,6 @@ func NewNode( if node.stateV2, node.blockBroadcastReactor, err = createSequencerComponents( l2NodeRef, bcR.Pool(), - blockSync || stateSync, logger, sequencerVerifier, sequencerSigner, diff --git a/sequencer/block_verify.go b/sequencer/block_verify.go index 81a9ced510b..590dd3d4be8 100644 --- a/sequencer/block_verify.go +++ b/sequencer/block_verify.go @@ -68,9 +68,16 @@ func computeBlockHash(block *BlockV2) (common.Hash, error) { // height. The geth-side check in NewL2BlockV2 is the safety net; this check // rejects tampered blocks early so we never persist orphan signatures into // SignatureStore. +// VerifyBlockSignature returns one of two classes of errors: +// - wrapping ErrInvalidSignature: the block is provably bad — the sender +// should be banned (malformed signature, tampered body, non-sequencer +// signer, etc.). +// - wrapping ErrVerifierUnavailable: the local verifier could not decide +// (not configured, L1 backend failure). The peer may be honest; the +// caller must not ban on this error. func VerifyBlockSignature(verifier SequencerVerifier, block *BlockV2) error { if verifier == nil { - return fmt.Errorf("%w: verifier not configured", ErrInvalidSignature) + return fmt.Errorf("%w: verifier not configured", ErrVerifierUnavailable) } if len(block.Signature) == 0 { @@ -94,7 +101,8 @@ func VerifyBlockSignature(verifier SequencerVerifier, block *BlockV2) error { ok, err := verifier.IsSequencerAt(signer, block.Number) if err != nil { - return fmt.Errorf("IsSequencerAt height %d: %w", block.Number, err) + // Backend failure (L1 RPC / IO) — local problem, not peer misbehavior. + return fmt.Errorf("%w: IsSequencerAt height %d: %v", ErrVerifierUnavailable, block.Number, err) } if !ok { return fmt.Errorf("%w: signer %s is not sequencer at height %d", diff --git a/sequencer/block_verify_test.go b/sequencer/block_verify_test.go index d39b1b307e3..10e748c1f76 100644 --- a/sequencer/block_verify_test.go +++ b/sequencer/block_verify_test.go @@ -159,8 +159,32 @@ func TestVerifyBlockSignature_NilVerifier(t *testing.T) { if err == nil { t.Fatal("expected error for nil verifier, got nil") } - if !errors.Is(err, ErrInvalidSignature) { - t.Errorf("expected ErrInvalidSignature, got: %v", err) + // nil verifier is a local configuration problem, not peer misbehavior. + if !errors.Is(err, ErrVerifierUnavailable) { + t.Errorf("expected ErrVerifierUnavailable, got: %v", err) + } + if errors.Is(err, ErrInvalidSignature) { + t.Errorf("nil verifier must not return ErrInvalidSignature (would cause false ban)") + } +} + +// TestVerifyBlockSignature_BackendFailure: when the verifier backend (e.g. L1 +// RPC) fails transiently, VerifyBlockSignature must surface an +// ErrVerifierUnavailable — not ErrInvalidSignature — otherwise honest peers +// get banned during every local outage. +func TestVerifyBlockSignature_BackendFailure(t *testing.T) { + block, _ := makeTestBlock(t, testKey) + verifier := &mockSequencerVerifier{err: errors.New("L1 RPC timeout")} + + err := VerifyBlockSignature(verifier, block) + if err == nil { + t.Fatal("expected error when verifier backend fails, got nil") + } + if !errors.Is(err, ErrVerifierUnavailable) { + t.Errorf("expected ErrVerifierUnavailable for backend failure, got: %v", err) + } + if errors.Is(err, ErrInvalidSignature) { + t.Errorf("backend failure must not return ErrInvalidSignature (would cause false ban)") } } diff --git a/sequencer/broadcast_reactor.go b/sequencer/broadcast_reactor.go index 1812eae7b83..b9ef0ad7930 100644 --- a/sequencer/broadcast_reactor.go +++ b/sequencer/broadcast_reactor.go @@ -1,10 +1,12 @@ package sequencer import ( + "errors" "fmt" "math/rand" "reflect" "sync" + "sync/atomic" "time" "github.com/cosmos/gogoproto/proto" @@ -20,18 +22,34 @@ const ( BlockBroadcastChannel = byte(0x50) // For block broadcast (requires signature verification) SequencerSyncChannel = byte(0x51) // For block sync requests (no signature verification) - // TODO: make these parameters configurable smallGapThreshold = 20 // Gap for direct block request recentBlocksCapacity = 1000 // Recent applied blocks cache seenBlocksCapacity = 2000 // Seen blocks for dedup - peerSentCapacity = 500 // Per-peer sent tracking - applyInterval = 10 * time.Second - syncInterval = 10 * time.Second + peerGossipedCapacity = 500 // Per-peer gossiped tracking + applyInterval = 3 * time.Second + syncInterval = 5 * time.Second // Sync request tracking syncRequestTTL = 20 * time.Second - maxPendingSyncRequests = 256 - maxPendingSyncPerPeer = 64 + maxPendingSyncRequests = 500 + maxPendingSyncPerPeer = 200 + + // Peer ban + peerBanDuration = 10 * time.Minute + + // BlockRequest per-peer rate limit (token bucket). + // + // requestMissingBlocks sends a burst of up to maxPendingSyncPerPeer=200 + // requests to the same peer within a single syncInterval tick — it is NOT + // a paced 40 qps stream. The bucket must therefore absorb that full burst, + // otherwise a catching-up fullnode with a single viable upstream would + // trip the limiter and get banned by removeTimeoutPeers 20s later. + // + // burst=250 covers the 200-request peak with headroom; rate=50 refills the + // bucket in 5s, which is exactly one syncInterval — so by the next tick + // the bucket is full again and can absorb another burst of the same size. + blockRequestRateLimit = 50.0 // tokens/sec (bucket fully refills every 5s) + blockRequestBurst = 250 // bucket capacity (>= maxPendingSyncPerPeer) ) // SyncReq represents a pending sync channel request. @@ -52,25 +70,28 @@ type BlockPool interface { type BlockBroadcastReactor struct { p2p.BaseReactor - stateV2 *StateV2 - pool BlockPool - waitSync bool + stateV2 *StateV2 + pool BlockPool recentBlocks *BlockRingBuffer // Applied blocks for peer requests pendingCache *PendingBlockCache // Pending blocks // Gossip state (bounded capacity, auto-evict old entries) - seenBlocks *HashSet // Blocks we've seen (dedup) - peerSent *PeerHashSet // Blocks sent to each peer + seenBlocks *HashSet // Blocks we've seen (dedup) + peerGossiped *PeerHashSet // Blocks gossiped to / from each peer (for gossip dedup) // applyMtx serializes the "check parent + apply + save signature" sequence // in the reactor's applyBlock path. StateV2.ApplyBlock has its own mutex // for the actual apply, but applyMtx ensures the parent-hash check and // signature persistence are also atomic with the apply. - applyMtx sync.Mutex - startMu sync.Mutex - sequencerStarted bool - logger log.Logger + applyMtx sync.Mutex + startMu sync.Mutex + // routinesStarted is true once StartSequencerRoutines() has successfully + // run — i.e. the node has caught up and reached the upgrade height. + // Also used as a fast gate in Receive() to drop P2P messages that arrive + // before the reactor is ready (e.g. while blocksync is still running). + routinesStarted atomic.Bool + logger log.Logger verifier SequencerVerifier sigStore *SignatureStore @@ -81,30 +102,44 @@ type BlockBroadcastReactor struct { syncRequestsMu sync.Mutex syncRequests map[int64]*SyncReq syncPeerCounts map[p2p.ID]int + + // blockReqLimiter enforces a per-peer rate limit on BlockRequest messages + // to prevent a malicious peer from exhausting L2 RPC / signature store. + blockReqLimiter *PeerRateLimiter + + // bannedPeers tracks peers banned for misbehavior, keyed by peer ID. + // Value is the ban expiration time. Expired entries are lazily cleaned + // up on lookup. This is an in-memory ban list; it does not persist + // across restarts. + bannedPeersMu sync.Mutex + bannedPeers map[p2p.ID]time.Time } -// NewBlockBroadcastReactor creates a new reactor. +// NewBlockBroadcastReactor creates a new reactor. Routines are NOT started +// here; the caller must invoke StartSequencerRoutines() once the node has +// caught up and reached the upgrade height (done by blocksync's caught-up +// callback or the upgrade callback in node.go). func NewBlockBroadcastReactor( pool BlockPool, stateV2 *StateV2, - waitSync bool, logger log.Logger, verifier SequencerVerifier, sigStore *SignatureStore, ) *BlockBroadcastReactor { r := &BlockBroadcastReactor{ - pool: pool, - stateV2: stateV2, - waitSync: waitSync, - recentBlocks: NewBlockRingBuffer(recentBlocksCapacity), - pendingCache: NewPendingBlockCache(), - seenBlocks: NewHashSet(seenBlocksCapacity), - peerSent: NewPeerHashSet(peerSentCapacity), - syncRequests: make(map[int64]*SyncReq), - syncPeerCounts: make(map[p2p.ID]int), - logger: logger.With("module", "broadcastReactor"), - verifier: verifier, - sigStore: sigStore, + pool: pool, + stateV2: stateV2, + recentBlocks: NewBlockRingBuffer(recentBlocksCapacity), + pendingCache: NewPendingBlockCache(), + seenBlocks: NewHashSet(seenBlocksCapacity), + peerGossiped: NewPeerHashSet(peerGossipedCapacity), + syncRequests: make(map[int64]*SyncReq), + syncPeerCounts: make(map[p2p.ID]int), + bannedPeers: make(map[p2p.ID]time.Time), + blockReqLimiter: NewPeerRateLimiter(blockRequestRateLimit, blockRequestBurst), + logger: logger.With("module", "broadcastReactor"), + verifier: verifier, + sigStore: sigStore, } r.BaseReactor = *p2p.NewBaseReactor("BlockBroadcast", r) return r @@ -115,25 +150,30 @@ func (r *BlockBroadcastReactor) SetLogger(l log.Logger) { r.logger = l.With("module", "broadcastReactor") } +// OnStart is intentionally a no-op. Sequencer routines are NOT started by +// the service lifecycle — they are started explicitly by +// StartSequencerRoutines() which is invoked by: +// - blocksync's caught-up callback (when local height reaches the upgrade +// height after catching up via blocksync), or +// - the upgrade callback in node.go (when a running PBFT node crosses the +// upgrade height via consensus). +// +// Until one of those fires, Receive() drops incoming messages via the +// routinesStarted gate — the reactor is not yet able to validate or apply +// blocks, and pretending otherwise would produce false "too high" / "too +// low" errors against an uninitialized StateV2. func (r *BlockBroadcastReactor) OnStart() error { - if r.waitSync { - // Don't start sequencer routines on start if we are in wait sync mode - // will be started via StartSequencerRoutines - return nil - } - return r.StartSequencerRoutines() + return nil } func (r *BlockBroadcastReactor) StartSequencerRoutines() error { r.startMu.Lock() defer r.startMu.Unlock() - if r.sequencerStarted { + if r.routinesStarted.Load() { return nil } - r.waitSync = false - if r.stateV2 != nil && !r.stateV2.IsRunning() { if err := r.stateV2.Start(); err != nil { return fmt.Errorf("failed to start StateV2: %w", err) @@ -150,7 +190,9 @@ func (r *BlockBroadcastReactor) StartSequencerRoutines() error { go r.applyRoutine() } - r.sequencerStarted = true + // Open the Receive() gate: from this point on, incoming P2P messages + // will be processed instead of being silently dropped. + r.routinesStarted.Store(true) return nil } @@ -166,20 +208,34 @@ func (r *BlockBroadcastReactor) GetChannels() []*p2p.ChannelDescriptor { } func (r *BlockBroadcastReactor) AddPeer(peer p2p.Peer) { - r.peerSent.AddPeer(string(peer.ID())) + if r.isBanned(peer.ID()) { + r.logger.Info("Rejecting banned peer", "peer", peer.ID()) + r.Switch.StopPeerForError(peer, fmt.Errorf("peer is banned")) + return + } + r.peerGossiped.AddPeer(string(peer.ID())) + r.blockReqLimiter.AddPeer(peer.ID()) } func (r *BlockBroadcastReactor) RemovePeer(peer p2p.Peer, reason interface{}) { - r.peerSent.RemovePeer(string(peer.ID())) + r.peerGossiped.RemovePeer(string(peer.ID())) r.removeSyncRequestsByPeer(peer.ID()) + r.blockReqLimiter.RemovePeer(peer.ID()) } func (r *BlockBroadcastReactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) { + // Drop all messages until StartSequencerRoutines() has been called. + // This guards against messages arriving while blocksync is still running + // or before the upgrade height has been reached — at that point StateV2 + // is not yet initialized and we cannot validate or apply blocks. + if !r.routinesStarted.Load() { + return + } r.logger.Debug("Receive message", "chId", chID, "src", src.ID(), "len", len(msgBytes)) msg, err := decodeMsg(msgBytes) if err != nil { r.logger.Error("Error decoding message", "src", src, "chId", chID, "err", err) - r.Switch.StopPeerForError(src, err) + r.banPeer(src, "invalid message encoding") return } @@ -199,9 +255,28 @@ func (r *BlockBroadcastReactor) handleBroadcastMsg(msg interface{}, src p2p.Peer switch msg := msg.(type) { case *bcproto.BlockResponseV2: if msg.Block != nil { + // Honest peers always produce a valid BlockV2 proto. A failure + // here is a malicious signal — ban the sender. blockV2, err := types.BlockV2FromProto(msg.Block) if err != nil { - r.logger.Error("Invalid BlockV2", "err", err) + r.logger.Error("Invalid BlockV2 from broadcast channel", "peer", src.ID(), "err", err) + r.banPeer(src, "invalid BlockV2 in broadcast") + return + } + localHeight := r.stateV2.LatestHeight() + // Reject blocks too old (already applied / below pruning window). + if localHeight-int64(blockV2.Number) > MaxPendingHeightBehind { + r.logger.Error("handleBroadcastMsg err, block height too low.", + "height", blockV2.Number, "hash", blockV2.Hash.Hex(), "peer", src.ID()) + return + } + if int64(blockV2.Number)-localHeight > MaxPendingHeightAhead { + r.logger.Error("handleBroadcastMsg err, block height too high.", + "height", blockV2.Number, "localHeight", localHeight, "peer", src.ID()) + return + } + if r.markSeen(blockV2.Hash) { + r.logger.Debug("onBlockV2 broadcast dedup", "number", blockV2.Number, "hash", blockV2.Hash.Hex()) return } r.onBlockV2(blockV2, src, true) // from broadcast channel @@ -215,6 +290,10 @@ func (r *BlockBroadcastReactor) handleBroadcastMsg(msg interface{}, src p2p.Peer func (r *BlockBroadcastReactor) handleSyncMsg(msg interface{}, src p2p.Peer) { switch msg := msg.(type) { case *bcproto.BlockRequest: + if !r.blockReqLimiter.Allow(src.ID()) { + r.logger.Debug("BlockRequest rate limited", "peer", src.ID(), "height", msg.Height) + return + } r.onBlockRequest(msg, src) r.logger.Debug("handleSyncMsg block request", "msg", msg, "src", src, "height", msg.Height) case *bcproto.BlockResponseV2: @@ -226,22 +305,23 @@ func (r *BlockBroadcastReactor) handleSyncMsg(msg interface{}, src p2p.Peer) { "peer", src.ID(), "height", height) return } + // Honest peers always produce a valid BlockV2 proto. A failure here + // means the peer is sending malformed data — ban it. The slot we + // just consumed will be cleaned up by removeSyncRequestsByPeer via + // the RemovePeer callback when StopPeerForError disconnects. blockV2, err := types.BlockV2FromProto(msg.Block) if err != nil { - r.logger.Error("Invalid BlockV2", "err", err) + r.logger.Error("Invalid BlockV2 from sync channel", "peer", src.ID(), "err", err) + r.banPeer(src, "invalid BlockV2 in sync response") return } r.onBlockV2(blockV2, src, false) // from sync channel r.logger.Debug("handleSyncMsg block response", "src", src, "height", blockV2.Number, "hash", blockV2.Hash.Hex()) } case *bcproto.NoBlockResponse: - if !r.checkAndTakeSyncRequest(src.ID(), msg.Height) { - r.logger.Error("Unsolicited NoBlockResponse, dropping", "peer", src.ID(), "height", msg.Height) - return - } - r.logger.Debug("Peer does not have requested block", "peer", src, "height", msg.Height) + r.logger.Error("Peer does not have requested block", "peer", src, "height", msg.Height) default: - r.logger.Debug("Ignoring unknown message on sync channel", "type", reflect.TypeOf(msg)) + r.logger.Error("Ignoring unknown message on sync channel", "type", reflect.TypeOf(msg)) } } @@ -315,21 +395,26 @@ func (r *BlockBroadcastReactor) onBlockV2(block *BlockV2, src p2p.Peer, fromBroa r.logger.Debug("onBlockV2", "number", block.Number, "hash", block.Hash.Hex(), "fromBroadcast", fromBroadcast) - // Dedup: only for broadcast channel - if fromBroadcast && r.markSeen(block.Hash) { - r.logger.Debug("onBlockV2 broadcast dedup", "number", block.Number, "hash", block.Hash.Hex()) - return - } - - // Unified signature verification for all incoming blocks + // Unified signature verification for all incoming blocks. + // Ban the sender ONLY on a malicious-signal error (ErrInvalidSignature). + // ErrVerifierUnavailable means we can't decide (verifier not configured, + // L1 backend transient failure) — the peer may be honest, so we discard + // the block silently and let it retry on the next gossip/sync round. if err := VerifyBlockSignature(r.verifier, block); err != nil { - r.logger.Error("Block signature verification failed, discarding", - "number", block.Number, "hash", block.Hash.Hex(), "err", err) + if errors.Is(err, ErrInvalidSignature) { + r.logger.Error("Block signature invalid, banning peer", + "number", block.Number, "hash", block.Hash.Hex(), "err", err) + r.banPeer(src, "block signature verification failed") + } else { + // Verifier unavailable — local problem, not peer's fault. + r.logger.Error("Verifier unavailable, dropping block without ban", + "number", block.Number, "hash", block.Hash.Hex(), "err", err) + } return } // Mark as received from this peer (don't send back) - r.markSentToPeer(src.ID(), block.Hash) + r.markGossipedToPeer(src.ID(), block.Hash) localHeight := r.stateV2.LatestHeight() @@ -398,7 +483,7 @@ func (r *BlockBroadcastReactor) checkSyncGap() { // requestMissingBlocks requests blocks in range [start, end] from peers. // Respects maxOutstandingTotal, maxOutstandingPerPeer, and maxNewRequestsPerTick limits. func (r *BlockBroadcastReactor) requestMissingBlocks(start, end int64) { - r.cleanupExpiredRequests() + r.removeTimeoutPeers() peers := r.Switch.Peers().List() if len(peers) == 0 { @@ -500,14 +585,14 @@ func (r *BlockBroadcastReactor) markSeen(hash common.Hash) bool { return r.seenBlocks.Add(hash) // Returns true if already existed } -// markSentToPeer marks a block as sent to a peer. -func (r *BlockBroadcastReactor) markSentToPeer(peerID p2p.ID, hash common.Hash) { - r.peerSent.Add(string(peerID), hash) +// markGossipedToPeer marks a block as gossiped to/from a peer. +func (r *BlockBroadcastReactor) markGossipedToPeer(peerID p2p.ID, hash common.Hash) { + r.peerGossiped.Add(string(peerID), hash) } -// hasSentToPeer checks if block was sent to peer. -func (r *BlockBroadcastReactor) hasSentToPeer(peerID p2p.ID, hash common.Hash) bool { - return r.peerSent.Contains(string(peerID), hash) +// hasGossipedToPeer checks if a block has already been gossiped to/from a peer. +func (r *BlockBroadcastReactor) hasGossipedToPeer(peerID p2p.ID, hash common.Hash) bool { + return r.peerGossiped.Contains(string(peerID), hash) } // gossipBlock forwards a block to all peers except the source. @@ -533,13 +618,13 @@ func (r *BlockBroadcastReactor) gossipBlock(block *BlockV2, fromPeer p2p.ID) { } // Skip if already sent - if r.hasSentToPeer(peerID, block.Hash) { + if r.hasGossipedToPeer(peerID, block.Hash) { continue } // Send and mark if peer.TrySend(BlockBroadcastChannel, bz) { - r.markSentToPeer(peerID, block.Hash) + r.markGossipedToPeer(peerID, block.Hash) } } } @@ -578,16 +663,28 @@ func (r *BlockBroadcastReactor) checkAndTakeSyncRequest(peerID p2p.ID, height in return true } -// cleanupExpiredRequests removes expired entries. Called before sending new requests. -func (r *BlockBroadcastReactor) cleanupExpiredRequests() { +// removeTimeoutPeers scans pending sync requests and kicks any peer whose +// request has expired. Map cleanup (syncRequests / syncPeerCounts) is +// delegated to removeSyncRequestsByPeer via the RemovePeer callback, so +// this method itself does not mutate the maps. +func (r *BlockBroadcastReactor) removeTimeoutPeers() { + seen := make(map[p2p.ID]struct{}) + r.syncRequestsMu.Lock() - defer r.syncRequestsMu.Unlock() now := time.Now() - for h, req := range r.syncRequests { + for _, req := range r.syncRequests { if now.After(req.ExpireAt) { - r.syncPeerCounts[req.PeerID]-- - delete(r.syncRequests, h) + seen[req.PeerID] = struct{}{} + } + } + r.syncRequestsMu.Unlock() + + for peerID := range seen { + peer := r.Switch.Peers().Get(peerID) + if peer == nil { + continue } + r.banPeer(peer, "sync request timed out") } } @@ -617,11 +714,58 @@ func (r *BlockBroadcastReactor) removeSyncRequestsByPeer(peerID p2p.ID) { delete(r.syncPeerCounts, peerID) } +// ============================================================================ +// Peer Ban +// ============================================================================ + +// banPeer adds a peer to the ban list for peerBanDuration. +// The peer is also immediately stopped. Expired entries are lazily evicted on +// the next isBanned call for the same peer. +func (r *BlockBroadcastReactor) banPeer(peer p2p.Peer, reason string) { + peerID := peer.ID() + r.bannedPeersMu.Lock() + r.bannedPeers[peerID] = time.Now().Add(peerBanDuration) + r.bannedPeersMu.Unlock() + + r.logger.Error("Banning peer", "peer", peerID, "reason", reason, "duration", peerBanDuration) + r.Switch.StopPeerForError(peer, fmt.Errorf("%s", reason)) +} + +// isBanned returns true if the peer is currently banned. +// Lazily removes expired entries. +func (r *BlockBroadcastReactor) isBanned(peerID p2p.ID) bool { + r.bannedPeersMu.Lock() + defer r.bannedPeersMu.Unlock() + + expireAt, ok := r.bannedPeers[peerID] + if !ok { + return false + } + if time.Now().After(expireAt) { + delete(r.bannedPeers, peerID) + return false + } + return true +} + // ============================================================================ // Message Handlers // ============================================================================ func (r *BlockBroadcastReactor) onBlockRequest(msg *bcproto.BlockRequest, src p2p.Peer) { + localHeight := r.stateV2.LatestHeight() + if msg.Height <= 0 || msg.Height > localHeight { + r.logger.Info("Rejecting out-of-range BlockRequest", + "peer", src.ID(), "requested", msg.Height, "localHeight", localHeight) + bz, err := encodeMsg(&bcproto.NoBlockResponse{Height: msg.Height}) + if err != nil { + r.logger.Error("Failed to encode NoBlockResponse", "height", msg.Height, "err", err) + return + } + src.TrySend(SequencerSyncChannel, bz) + return + } + // Try to get from recent blocks cache first block := r.recentBlocks.GetByHeight(uint64(msg.Height)) @@ -649,6 +793,9 @@ func (r *BlockBroadcastReactor) onBlockRequest(msg *bcproto.BlockRequest, src p2 if len(block.Signature) == 0 && r.sigStore != nil { if sig, err := r.sigStore.GetSignature(block.Hash); err == nil { block.Signature = sig + } else { + r.logger.Error("Signature not found in store, sending block without signature", + "height", msg.Height, "hash", block.Hash.Hex(), "err", err) } } @@ -677,7 +824,6 @@ func (r *BlockBroadcastReactor) broadcast(block *BlockV2) { r.logger.Info("Broadcast block", "number", block.Number, "hash", block.Hash.Hex()) } - // ==================== Message Encoding/Decoding ==================== // Local copies to avoid import cycle with blocksync package diff --git a/sequencer/broadcast_reactor_test.go b/sequencer/broadcast_reactor_test.go new file mode 100644 index 00000000000..2a5404b2e1f --- /dev/null +++ b/sequencer/broadcast_reactor_test.go @@ -0,0 +1,211 @@ +package sequencer + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/tendermint/tendermint/p2p" +) + +// newReactorForTest returns a BlockBroadcastReactor with only the fields +// needed for exercising banmap / sync-request tracking logic. It does NOT +// wire in Switch / StateV2 / verifier — callers must avoid code paths that +// dereference them. +func newReactorForTest() *BlockBroadcastReactor { + return &BlockBroadcastReactor{ + syncRequests: make(map[int64]*SyncReq), + syncPeerCounts: make(map[p2p.ID]int), + bannedPeers: make(map[p2p.ID]time.Time), + blockReqLimiter: NewPeerRateLimiter(blockRequestRateLimit, blockRequestBurst), + } +} + +// ---------------------------------------------------------------------------- +// Banmap behavior +// ---------------------------------------------------------------------------- + +// TestIsBanned_NotBanned: a fresh peer is not banned. +func TestIsBanned_NotBanned(t *testing.T) { + r := newReactorForTest() + require.False(t, r.isBanned("peer-a")) +} + +// TestIsBanned_Banned: writing directly into bannedPeers causes isBanned to +// return true before expiry. +func TestIsBanned_Banned(t *testing.T) { + r := newReactorForTest() + peer := p2p.ID("peer-b") + + r.bannedPeers[peer] = time.Now().Add(1 * time.Minute) + require.True(t, r.isBanned(peer)) +} + +// TestIsBanned_ExpiredLazyEvict: once the ban expires, isBanned returns false +// AND removes the stale entry from the map (lazy eviction). +func TestIsBanned_ExpiredLazyEvict(t *testing.T) { + r := newReactorForTest() + peer := p2p.ID("peer-c") + + r.bannedPeers[peer] = time.Now().Add(-1 * time.Second) // already expired + require.False(t, r.isBanned(peer)) + _, stillPresent := r.bannedPeers[peer] + require.False(t, stillPresent, "expired entry must be lazily evicted") +} + +// ---------------------------------------------------------------------------- +// removeTimeoutPeers: collect-only semantics +// ---------------------------------------------------------------------------- + +// TestRemoveTimeoutPeers_CollectsExpired verifies that expired entries are +// identified. We can't actually call banPeer() without a Switch, so we +// inline the collection logic and assert it catches the right peers. +// +// This mirrors the lock-scoped collection phase of removeTimeoutPeers. +func TestRemoveTimeoutPeers_CollectsExpired(t *testing.T) { + r := newReactorForTest() + now := time.Now() + + // Two expired requests from the same peer, one fresh, one from another peer. + r.syncRequests[100] = &SyncReq{Height: 100, PeerID: "peer-x", ExpireAt: now.Add(-1 * time.Second)} + r.syncRequests[101] = &SyncReq{Height: 101, PeerID: "peer-x", ExpireAt: now.Add(-2 * time.Second)} + r.syncRequests[102] = &SyncReq{Height: 102, PeerID: "peer-y", ExpireAt: now.Add(10 * time.Second)} // fresh + r.syncRequests[103] = &SyncReq{Height: 103, PeerID: "peer-z", ExpireAt: now.Add(-1 * time.Second)} + + // Replicate the collect step from removeTimeoutPeers. + seen := make(map[p2p.ID]struct{}) + r.syncRequestsMu.Lock() + for _, req := range r.syncRequests { + if time.Now().After(req.ExpireAt) { + seen[req.PeerID] = struct{}{} + } + } + r.syncRequestsMu.Unlock() + + require.Len(t, seen, 2, "peer-x and peer-z should be collected, peer-y must not") + require.Contains(t, seen, p2p.ID("peer-x")) + require.Contains(t, seen, p2p.ID("peer-z")) + require.NotContains(t, seen, p2p.ID("peer-y")) +} + +// ---------------------------------------------------------------------------- +// checkAndTakeSyncRequest: ensure slot is only consumed on exact peer+height match +// ---------------------------------------------------------------------------- + +// TestCheckAndTakeSyncRequest_PeerMismatch: a response from a different peer +// than the one we requested must not consume the slot. +func TestCheckAndTakeSyncRequest_PeerMismatch(t *testing.T) { + r := newReactorForTest() + r.syncRequests[200] = &SyncReq{ + Height: 200, + PeerID: "peer-requested", + ExpireAt: time.Now().Add(1 * time.Minute), + } + r.syncPeerCounts["peer-requested"] = 1 + + ok := r.checkAndTakeSyncRequest("peer-other", 200) + require.False(t, ok, "response from wrong peer must be rejected") + + // Slot must still exist. + _, exists := r.syncRequests[200] + require.True(t, exists) + require.Equal(t, 1, r.syncPeerCounts["peer-requested"]) +} + +// TestCheckAndTakeSyncRequest_Expired: a late response (after TTL) must not +// consume the slot. +func TestCheckAndTakeSyncRequest_Expired(t *testing.T) { + r := newReactorForTest() + r.syncRequests[201] = &SyncReq{ + Height: 201, + PeerID: "peer-slow", + ExpireAt: time.Now().Add(-1 * time.Second), + } + r.syncPeerCounts["peer-slow"] = 1 + + ok := r.checkAndTakeSyncRequest("peer-slow", 201) + require.False(t, ok, "expired slot must not be taken") +} + +// TestCheckAndTakeSyncRequest_Success: a well-formed response consumes the +// slot and decrements the per-peer count. +func TestCheckAndTakeSyncRequest_Success(t *testing.T) { + r := newReactorForTest() + r.syncRequests[202] = &SyncReq{ + Height: 202, + PeerID: "peer-good", + ExpireAt: time.Now().Add(1 * time.Minute), + } + r.syncPeerCounts["peer-good"] = 1 + + ok := r.checkAndTakeSyncRequest("peer-good", 202) + require.True(t, ok) + + _, exists := r.syncRequests[202] + require.False(t, exists, "slot must be taken") + require.Equal(t, 0, r.syncPeerCounts["peer-good"]) +} + +// ---------------------------------------------------------------------------- +// removeSyncRequestsByPeer: full cleanup on peer disconnect +// ---------------------------------------------------------------------------- + +// TestRemoveSyncRequestsByPeer_ClearsAllEntries: all slots belonging to a +// peer are deleted, and its counter entry is removed entirely. +func TestRemoveSyncRequestsByPeer_ClearsAllEntries(t *testing.T) { + r := newReactorForTest() + now := time.Now() + r.syncRequests[300] = &SyncReq{Height: 300, PeerID: "peer-gone", ExpireAt: now.Add(1 * time.Minute)} + r.syncRequests[301] = &SyncReq{Height: 301, PeerID: "peer-gone", ExpireAt: now.Add(1 * time.Minute)} + r.syncRequests[302] = &SyncReq{Height: 302, PeerID: "peer-alive", ExpireAt: now.Add(1 * time.Minute)} + r.syncPeerCounts["peer-gone"] = 2 + r.syncPeerCounts["peer-alive"] = 1 + + r.removeSyncRequestsByPeer("peer-gone") + + require.Len(t, r.syncRequests, 1) + _, stillThere := r.syncRequests[302] + require.True(t, stillThere, "peer-alive's slot must remain") + _, counted := r.syncPeerCounts["peer-gone"] + require.False(t, counted, "peer-gone's counter entry must be deleted") + require.Equal(t, 1, r.syncPeerCounts["peer-alive"]) +} + +// ---------------------------------------------------------------------------- +// recordSyncRequest: overwrite semantics +// ---------------------------------------------------------------------------- + +// TestRecordSyncRequest_OverwritesOldEntry: recording a new request for the +// same height decrements the old peer's counter and reassigns the slot. +func TestRecordSyncRequest_OverwritesOldEntry(t *testing.T) { + r := newReactorForTest() + r.recordSyncRequest("peer-1", 400) + require.Equal(t, 1, r.syncPeerCounts["peer-1"]) + + r.recordSyncRequest("peer-2", 400) // same height, new peer + require.Equal(t, 0, r.syncPeerCounts["peer-1"]) + require.Equal(t, 1, r.syncPeerCounts["peer-2"]) + + req := r.syncRequests[400] + require.Equal(t, p2p.ID("peer-2"), req.PeerID) +} + +// ---------------------------------------------------------------------------- +// Rate limiter wiring with reactor-specific constants +// ---------------------------------------------------------------------------- + +// TestReactorRateLimiter_UsesConstants verifies the reactor's rate limiter +// is configured with blockRequestRateLimit / blockRequestBurst and behaves +// end-to-end: after burst exhaustion, subsequent Allow() calls are denied. +func TestReactorRateLimiter_UsesConstants(t *testing.T) { + r := newReactorForTest() + peer := p2p.ID("peer-flood") + r.blockReqLimiter.AddPeer(peer) + + // Consume exactly burst tokens. + for i := 0; i < blockRequestBurst; i++ { + require.True(t, r.blockReqLimiter.Allow(peer), "token %d should be allowed", i+1) + } + // Next call must be denied (no time has elapsed for refill). + require.False(t, r.blockReqLimiter.Allow(peer)) +} diff --git a/sequencer/interfaces.go b/sequencer/interfaces.go index 1c1b5159132..2a1c09d7336 100644 --- a/sequencer/interfaces.go +++ b/sequencer/interfaces.go @@ -8,8 +8,17 @@ import ( // Sentinel errors for block processing var ( - // ErrInvalidSignature indicates block signature verification failed + // ErrInvalidSignature indicates the block's signature is provably invalid. + // This is a malicious-peer signal: the block's content does not match a + // legitimate sequencer signature. Callers should ban the sender. ErrInvalidSignature = errors.New("invalid block signature") + + // ErrVerifierUnavailable indicates the local verifier could not determine + // whether a signature is valid (e.g. not configured, L1 backend RPC + // failure, transient IO error). This is NOT a peer-misbehavior signal — + // the peer may be honest; we just cannot decide right now. Callers must + // NOT ban the sender on this error; retry or drop silently instead. + ErrVerifierUnavailable = errors.New("verifier unavailable") ) // SequencerVerifier verifies if an address is a valid L1 sequencer. diff --git a/sequencer/rate_limiter.go b/sequencer/rate_limiter.go new file mode 100644 index 00000000000..3d9c9b1afd8 --- /dev/null +++ b/sequencer/rate_limiter.go @@ -0,0 +1,94 @@ +package sequencer + +import ( + "sync" + "time" + + "github.com/tendermint/tendermint/p2p" +) + +// tokenBucket is the per-peer token bucket state. +// Tokens are refilled lazily on each Allow() call based on elapsed time. +type tokenBucket struct { + tokens float64 + lastRefill time.Time +} + +// PeerRateLimiter enforces a per-peer token-bucket rate limit. +// +// Each peer has its own bucket with capacity `burst`, refilled at `rate` +// tokens per second. Allow() consumes one token and returns false when the +// bucket is empty. +// +// The limiter is safe for concurrent use. Buckets are created on AddPeer +// (initialized full, so newly connected peers are not penalized) and +// removed on RemovePeer. Allow() on an unknown peer lazily creates a full +// bucket as a fallback. +type PeerRateLimiter struct { + rate float64 // tokens per second + burst float64 // bucket capacity + + mu sync.Mutex + buckets map[p2p.ID]*tokenBucket +} + +// NewPeerRateLimiter creates a limiter with the given rate (tokens/sec) and +// burst (max tokens in a bucket). +func NewPeerRateLimiter(rate float64, burst int) *PeerRateLimiter { + return &PeerRateLimiter{ + rate: rate, + burst: float64(burst), + buckets: make(map[p2p.ID]*tokenBucket), + } +} + +// AddPeer initializes a full bucket for a newly connected peer. +// Calling AddPeer for a peer that already has a bucket is a no-op. +func (l *PeerRateLimiter) AddPeer(peerID p2p.ID) { + l.mu.Lock() + defer l.mu.Unlock() + if _, ok := l.buckets[peerID]; ok { + return + } + l.buckets[peerID] = &tokenBucket{ + tokens: l.burst, + lastRefill: time.Now(), + } +} + +// RemovePeer drops the bucket for a disconnected peer. +func (l *PeerRateLimiter) RemovePeer(peerID p2p.ID) { + l.mu.Lock() + defer l.mu.Unlock() + delete(l.buckets, peerID) +} + +// Allow returns true if the peer is allowed to perform one action, consuming +// one token. Returns false if the peer has exhausted its bucket. +// +// If the peer has no bucket (e.g. AddPeer was not called), a full bucket is +// created as a safe fallback. +func (l *PeerRateLimiter) Allow(peerID p2p.ID) bool { + l.mu.Lock() + defer l.mu.Unlock() + + bucket, ok := l.buckets[peerID] + if !ok { + bucket = &tokenBucket{tokens: l.burst, lastRefill: time.Now()} + l.buckets[peerID] = bucket + } + + now := time.Now() + elapsed := now.Sub(bucket.lastRefill).Seconds() + bucket.tokens += elapsed * l.rate + if bucket.tokens > l.burst { + bucket.tokens = l.burst + } + bucket.lastRefill = now + + if bucket.tokens < 1 { + return false + } + bucket.tokens-- + return true +} diff --git a/sequencer/rate_limiter_test.go b/sequencer/rate_limiter_test.go new file mode 100644 index 00000000000..89fec11186f --- /dev/null +++ b/sequencer/rate_limiter_test.go @@ -0,0 +1,147 @@ +package sequencer + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/tendermint/tendermint/p2p" +) + +const ( + testRate = 10.0 // tokens/sec + testBurst = 5 // bucket capacity +) + +func newTestLimiter() *PeerRateLimiter { + return NewPeerRateLimiter(testRate, testBurst) +} + +// TestAllow_WithinBurst verifies that a peer can fire up to burst requests +// immediately after connecting (bucket starts full). +func TestAllow_WithinBurst(t *testing.T) { + l := newTestLimiter() + peer := p2p.ID("peer-a") + l.AddPeer(peer) + + for i := 0; i < testBurst; i++ { + require.True(t, l.Allow(peer), "request %d should be allowed (within burst)", i+1) + } +} + +// TestAllow_ExceedBurst verifies that the (burst+1)-th immediate request is denied. +func TestAllow_ExceedBurst(t *testing.T) { + l := newTestLimiter() + peer := p2p.ID("peer-b") + l.AddPeer(peer) + + for i := 0; i < testBurst; i++ { + l.Allow(peer) + } + require.False(t, l.Allow(peer), "request beyond burst should be denied") +} + +// TestAllow_RefillAfterDelay verifies tokens are replenished over time. +func TestAllow_RefillAfterDelay(t *testing.T) { + l := newTestLimiter() + peer := p2p.ID("peer-c") + l.AddPeer(peer) + + // Drain the bucket completely. + for i := 0; i < testBurst; i++ { + l.Allow(peer) + } + require.False(t, l.Allow(peer), "bucket should be empty") + + // Wait for 1 token to refill (rate=10/s → 100ms per token). + time.Sleep(120 * time.Millisecond) + require.True(t, l.Allow(peer), "one token should have been refilled") +} + +// TestAllow_UnknownPeer verifies that a peer not added via AddPeer gets a +// full-bucket fallback and is not permanently blocked. +func TestAllow_UnknownPeer(t *testing.T) { + l := newTestLimiter() + peer := p2p.ID("peer-unknown") + // No AddPeer call — Allow should create a full bucket as fallback. + require.True(t, l.Allow(peer), "unknown peer should get a full bucket fallback") +} + +// TestAddPeer_Idempotent verifies that calling AddPeer twice does not reset +// an existing bucket (tokens already consumed should stay consumed). +func TestAddPeer_Idempotent(t *testing.T) { + l := newTestLimiter() + peer := p2p.ID("peer-d") + l.AddPeer(peer) + + // Drain the bucket. + for i := 0; i < testBurst; i++ { + l.Allow(peer) + } + require.False(t, l.Allow(peer)) + + // Second AddPeer must not reset the bucket. + l.AddPeer(peer) + require.False(t, l.Allow(peer), "AddPeer must not reset an existing bucket") +} + +// TestRemovePeer_CleansUp verifies that after RemovePeer the bucket is gone +// and a subsequent Allow re-creates a fresh full bucket (fallback path). +func TestRemovePeer_CleansUp(t *testing.T) { + l := newTestLimiter() + peer := p2p.ID("peer-e") + l.AddPeer(peer) + + // Drain the bucket. + for i := 0; i < testBurst; i++ { + l.Allow(peer) + } + require.False(t, l.Allow(peer)) + + // Remove peer — bucket must be deleted. + l.RemovePeer(peer) + + // Next Allow triggers the unknown-peer fallback (full bucket). + require.True(t, l.Allow(peer), "after RemovePeer, Allow should create a fresh full bucket") +} + +// TestAllow_MultiplePeers verifies that buckets are independent per peer. +func TestAllow_MultiplePeers(t *testing.T) { + l := newTestLimiter() + peerA := p2p.ID("peer-f") + peerB := p2p.ID("peer-g") + l.AddPeer(peerA) + l.AddPeer(peerB) + + // Drain peerA entirely. + for i := 0; i < testBurst; i++ { + l.Allow(peerA) + } + require.False(t, l.Allow(peerA), "peerA should be exhausted") + + // peerB's bucket must be untouched. + require.True(t, l.Allow(peerB), "peerB should still have tokens") +} + +// TestAllow_BurstCapNotExceeded verifies that tokens never exceed burst even +// after a long idle period. +func TestAllow_BurstCapNotExceeded(t *testing.T) { + l := newTestLimiter() + peer := p2p.ID("peer-h") + l.AddPeer(peer) + + // Drain fully. + for i := 0; i < testBurst; i++ { + l.Allow(peer) + } + + // Wait long enough for many tokens to theoretically accumulate. + time.Sleep(500 * time.Millisecond) // would add 5 tokens at rate=10 + + // Can only consume up to burst, not more. + allowed := 0 + for l.Allow(peer) { + allowed++ + } + require.LessOrEqual(t, allowed, testBurst, "tokens must not exceed burst capacity") +} From 9e56b04da3c877513eda3e26df87f1aeeb24eaad Mon Sep 17 00:00:00 2001 From: "allen.wu" Date: Thu, 7 May 2026 10:18:18 +0800 Subject: [PATCH 07/15] fix: replace 100ms sleep with deterministic Wait() in sequencer upgrade MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audit finding M-02: switchToSequencerMode used time.Sleep(100ms) as a heuristic to wait for finalizeCommit to return before stopping the consensus reactor. This is a non-deterministic synchronization that may fail under high load or GC pauses. Changes: 1. consensus/state.go: in finalizeCommit's upgrade branch, call cs.Stop() BEFORE cs.onUpgrade(). Stop() closes the BaseService quit channel synchronously, which establishes the happens-before relationship that any goroutine started by onUpgrade can rely on. 2. node/node.go: replace time.Sleep(100ms) with consensusState.Wait(). Wait() blocks until cs.done is closed (the very last action of receiveRoutine), giving a deterministic synchronization point that replaces the time-based heuristic. 3. consensus/reactor.go: remove unused StopForUpgrade method. It had no callers and bypassed BaseService.Stop's atomic stopped flag, which would cause OnStop to be invoked twice during node shutdown. Verified via Docker integration test on feat/sequencer-optimize: - PBFT → V2 upgrade flow passes - HA cluster forms correctly (35/35 HA tests pass) - P2P security tests pass Co-Authored-By: Claude Opus 4.6 --- consensus/reactor.go | 8 -------- consensus/state.go | 20 ++++++++++++++------ node/node.go | 32 ++++++++++++++++++++++++-------- 3 files changed, 38 insertions(+), 22 deletions(-) diff --git a/consensus/reactor.go b/consensus/reactor.go index 18a3170728a..18d260eaa43 100644 --- a/consensus/reactor.go +++ b/consensus/reactor.go @@ -113,14 +113,6 @@ func (conR *Reactor) OnStop() { } } -// StopForUpgrade stops the consensus reactor when upgrading to sequencer mode. -// This is called when the chain reaches the upgrade height. -func (conR *Reactor) StopForUpgrade() { - conR.Logger.Info("Stopping consensus reactor for sequencer upgrade", - "height", conR.conS.Height, - "upgradeHeight", upgrade.UpgradeBlockHeight) - conR.OnStop() -} // SwitchToConsensus switches from block_sync mode to consensus mode. // It resets the state, turns off block_sync, and starts the consensus state-machine diff --git a/consensus/state.go b/consensus/state.go index 69819f8f673..08e9c76f8ce 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -1814,16 +1814,24 @@ func (cs *State) finalizeCommit(height int64) { "height", cs.Height, "upgradeHeight", upgrade.UpgradeBlockHeight) - // Call upgrade callback first (allows reactor to handle transition) - if cs.onUpgrade != nil { - cs.onUpgrade() - } - - // Stop consensus state + // Stop consensus state first. + // + // This closes the BaseService quit channel synchronously, so any + // goroutine started by onUpgrade can safely call cs.Wait() to block + // until receiveRoutine has fully exited (cs.done closed). Stopping + // before onUpgrade guarantees the happens-before relationship that + // the reactor switch relies on. if err := cs.Stop(); err != nil { logger.Error("Failed to stop consensus state", "err", err) panic(err) } + + // Notify upgrade observer (e.g. node-level reactor switch). + // The callback is expected to do its work in a goroutine and use + // cs.Wait() for synchronization rather than time-based heuristics. + if cs.onUpgrade != nil { + cs.onUpgrade() + } return } diff --git a/node/node.go b/node/node.go index 7fe90dd22fe..e46be509cbe 100644 --- a/node/node.go +++ b/node/node.go @@ -1619,20 +1619,36 @@ func (n *Node) startSequencerMode() { func (n *Node) switchToSequencerMode() { n.Logger.Info("Upgrade callback triggered, scheduling switch to sequencer mode") - // NOTE: Must use goroutine to avoid deadlock - onUpgrade is called from finalizeCommit, - // and consensusReactor.Stop() would try to stop the state that's currently running. + // Run in a goroutine: this callback executes inside finalizeCommit which + // runs on receiveRoutine. We must let receiveRoutine return before it can + // observe cs.Quit() and close cs.done. Calling consensusReactor.Stop() + // synchronously here would block on conS.Wait() — which waits for the very + // goroutine we are running on — and deadlock. go func() { - // Wait a moment for finalizeCommit to complete - time.Sleep(100 * time.Millisecond) - - // Stop consensus reactor - n.Logger.Info("Stopping consensus reactor...") + // Wait until receiveRoutine has fully exited. + // + // cs.Stop() has already been called by finalizeCommit before invoking + // this callback (see consensus/state.go upgrade branch), so cs.Quit() + // is closed and receiveRoutine will hit the case <-cs.Quit() branch as + // soon as finalizeCommit returns. cs.done is closed at the very end of + // receiveRoutine. Waiting on it replaces the previous 100ms sleep with + // a deterministic synchronization point. + n.Logger.Info("Waiting for consensus state to drain...") + n.consensusState.Wait() + n.Logger.Info("Consensus state drained") + + // Stop consensus reactor through the standard BaseService.Stop path so + // its stopped flag is set; this prevents the P2P switch from invoking + // OnStop a second time during node shutdown. OnStop will call + // conS.Stop() (idempotent: returns ErrAlreadyStopped, logged only) and + // conS.Wait() (already satisfied since cs.done is closed). + n.Logger.Info("Stopping consensus reactor for upgrade...") if err := n.consensusReactor.Stop(); err != nil { n.Logger.Error("Failed to stop consensus reactor", "err", err) } n.Logger.Info("Consensus reactor stopped") - // Start broadcast reactor + // Start broadcast reactor for sequencer mode. n.startSequencerMode() }() } From 29877ca08841c1f5bf0cbb4d82b562eb01d39b7b Mon Sep 17 00:00:00 2001 From: "allen.wu" Date: Fri, 15 May 2026 12:32:52 +0800 Subject: [PATCH 08/15] deps: tidy go.mod indirect dependencies Two indirect deps were missing from go.mod: - github.com/VictoriaMetrics/fastcache v1.12.1 - github.com/prometheus/tsdb v0.7.1 These are pulled in transitively. `go build` was failing with 'updates to go.mod needed'. `go mod tidy` fixes it without changing direct dependencies. Co-Authored-By: Claude Opus 4.7 (1M context) --- go.mod | 2 ++ go.sum | 3 +++ 2 files changed, 5 insertions(+) diff --git a/go.mod b/go.mod index 37550218781..ffdc0d55f8b 100644 --- a/go.mod +++ b/go.mod @@ -64,6 +64,7 @@ require ( github.com/Nvveen/Gotty v0.0.0-20120604004816-cd527374f1e5 // indirect github.com/OpenPeeDeeP/depguard v1.1.0 // indirect github.com/StackExchange/wmi v1.2.1 // indirect + github.com/VictoriaMetrics/fastcache v1.12.1 // indirect github.com/alexkohler/prealloc v1.0.0 // indirect github.com/alingse/asasalint v0.0.11 // indirect github.com/ashanbrown/forbidigo v1.3.0 // indirect @@ -216,6 +217,7 @@ require ( github.com/pointlander/jetset v1.0.1-0.20190518214125-eee7eff80bd4 // indirect github.com/polyfloyd/go-errorlint v1.0.2 // indirect github.com/prometheus/procfs v0.12.0 // indirect + github.com/prometheus/tsdb v0.7.1 // indirect github.com/quasilyte/go-ruleguard v0.3.17 // indirect github.com/quasilyte/gogrep v0.0.0-20220120141003-628d8b3623b5 // indirect github.com/quasilyte/regex/syntax v0.0.0-20200407221936-30656e2c4a95 // indirect diff --git a/go.sum b/go.sum index b3c0e4873c7..0d1de343307 100644 --- a/go.sum +++ b/go.sum @@ -112,6 +112,8 @@ github.com/alexkohler/prealloc v1.0.0 h1:Hbq0/3fJPQhNkN0dR95AVrr6R7tou91y0uHG5pO github.com/alexkohler/prealloc v1.0.0/go.mod h1:VetnK3dIgFBBKmg0YnD9F9x6Icjd+9cvfHR56wJVlKE= github.com/alingse/asasalint v0.0.11 h1:SFwnQXJ49Kx/1GghOFz1XGqHYKp21Kq1nHad/0WQRnw= github.com/alingse/asasalint v0.0.11/go.mod h1:nCaoMhw7a9kSJObvQyVzNTPBDbNpdocqrSP7t/cW5+I= +github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156 h1:eMwmnE/GDgah4HI848JfFxHt+iPb26b4zyfspmqY0/8= +github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156/go.mod h1:Cb/ax3seSYIx7SuZdm2G2xzfwmv3TPSk2ucNfQESPXM= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ= @@ -1564,6 +1566,7 @@ golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= From c6f7e21e4b14810ddafc052d1c3cdedb15828e0a Mon Sep 17 00:00:00 2001 From: "allen.wu" Date: Fri, 15 May 2026 12:33:08 +0800 Subject: [PATCH 09/15] feat: persistent peer ban exemption + sigStore.Close MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow operators to whitelist trusted upstream peers (sequencer / sentry / boot nodes) so that fullnodes do not accidentally ban them on transient faults. Whitelist is configured via the existing [p2p].persistent_peers setting — no new config option introduced. broadcast_reactor.go: banPeer now short-circuits for peers where Peer.IsPersistent() is true. The peer is still disconnected via Switch.StopPeerForError so connection state (sync requests, gossip cache, rate limiter) is fully reset by the RemovePeer callback chain. The peer is NOT added to bannedPeers, so the Switch's automatic reconnect will pass AddPeer's isBanned gate and the link recovers immediately instead of being unreachable for 10 minutes. A new log prefix '[WHITELIST_ALARM]' is emitted on each exemption so log-aggregation systems can alert when a whitelisted peer is misbehaving. signature_store.go: Add Close() method delegating to the underlying tm-db. Closes a code consistency gap where blockStore and stateStore were closed on shutdown but sigStore was not. node.go: Invoke sigStore.Close() in OnStop alongside blockStore and stateStore. Tests: TestBanPeer_PersistentPeerSkipsBanList — core invariant TestBanPeer_NonPersistentPeerEntersBanList — regression guard TestBanPeer_PersistentPeerAddPeerNotRejected — full reconnect cycle Note: the value of this exemption is verified end-to-end by an integration test on a separate disconnect-test branch (see ops/docker-sequencer-test follow-up). Co-Authored-By: Claude Opus 4.7 (1M context) --- node/node.go | 5 +++ sequencer/broadcast_reactor.go | 13 ++++++ sequencer/broadcast_reactor_test.go | 65 +++++++++++++++++++++++++++++ sequencer/signature_store.go | 9 ++++ 4 files changed, 92 insertions(+) diff --git a/node/node.go b/node/node.go index e46be509cbe..3256bf23f90 100644 --- a/node/node.go +++ b/node/node.go @@ -1170,6 +1170,11 @@ func (n *Node) OnStop() { n.Logger.Error("problem closing statestore", "err", err) } } + if n.sigStore != nil { + if err := n.sigStore.Close(); err != nil { + n.Logger.Error("problem closing sigstore", "err", err) + } + } } // ConfigureRPC makes sure RPC has all the objects it needs to operate. diff --git a/sequencer/broadcast_reactor.go b/sequencer/broadcast_reactor.go index b9ef0ad7930..39ac1cb9221 100644 --- a/sequencer/broadcast_reactor.go +++ b/sequencer/broadcast_reactor.go @@ -721,8 +721,21 @@ func (r *BlockBroadcastReactor) removeSyncRequestsByPeer(peerID p2p.ID) { // banPeer adds a peer to the ban list for peerBanDuration. // The peer is also immediately stopped. Expired entries are lazily evicted on // the next isBanned call for the same peer. +// +// Persistent peers (whitelisted via [p2p].persistent_peers) are exempt: they +// still get disconnected — so connection state is fully reset via RemovePeer +// callbacks — but are NOT added to bannedPeers. The Switch will automatically +// reconnect them, and on reconnect AddPeer's isBanned check will let them in. func (r *BlockBroadcastReactor) banPeer(peer p2p.Peer, reason string) { peerID := peer.ID() + + if peer.IsPersistent() { + r.logger.Error("[WHITELIST_ALARM] whitelisted peer misbehaved, will reconnect", + "peer", peerID, "reason", reason) + r.Switch.StopPeerForError(peer, fmt.Errorf("%s", reason)) + return + } + r.bannedPeersMu.Lock() r.bannedPeers[peerID] = time.Now().Add(peerBanDuration) r.bannedPeersMu.Unlock() diff --git a/sequencer/broadcast_reactor_test.go b/sequencer/broadcast_reactor_test.go index 2a5404b2e1f..85315dd6b4a 100644 --- a/sequencer/broadcast_reactor_test.go +++ b/sequencer/broadcast_reactor_test.go @@ -5,7 +5,9 @@ import ( "time" "github.com/stretchr/testify/require" + "github.com/tendermint/tendermint/libs/log" "github.com/tendermint/tendermint/p2p" + "github.com/tendermint/tendermint/p2p/mock" ) // newReactorForTest returns a BlockBroadcastReactor with only the fields @@ -209,3 +211,66 @@ func TestReactorRateLimiter_UsesConstants(t *testing.T) { // Next call must be denied (no time has elapsed for refill). require.False(t, r.blockReqLimiter.Allow(peer)) } + +// ---------------------------------------------------------------------------- +// banPeer whitelist exemption (persistent_peers) +// ---------------------------------------------------------------------------- + +// safeBanPeer drives banPeer with a nil Switch and absorbs the resulting +// panic. We only care about the ban-list write side-effect: by the time +// StopPeerForError runs (and panics on nil), the bannedPeers map has either +// been written (non-persistent path) or skipped (persistent path), which is +// exactly what we want to assert. +func safeBanPeer(r *BlockBroadcastReactor, peer p2p.Peer, reason string) { + defer func() { _ = recover() }() + r.banPeer(peer, reason) +} + +// TestBanPeer_PersistentPeerSkipsBanList: a peer with IsPersistent()==true +// must NOT be added to bannedPeers — that is the central whitelist invariant. +func TestBanPeer_PersistentPeerSkipsBanList(t *testing.T) { + r := newReactorForTest() + r.logger = log.NewNopLogger() + + mp := mock.NewPeer(nil) + mp.Persistent = true + + safeBanPeer(r, mp, "test signature failure") + + _, present := r.bannedPeers[mp.ID()] + require.False(t, present, "persistent peer must be exempt from bannedPeers") +} + +// TestBanPeer_NonPersistentPeerEntersBanList: regression — a peer with +// IsPersistent()==false must follow the original code path and be added +// to bannedPeers with a future expiry. +func TestBanPeer_NonPersistentPeerEntersBanList(t *testing.T) { + r := newReactorForTest() + r.logger = log.NewNopLogger() + + mp := mock.NewPeer(nil) + mp.Persistent = false + + before := time.Now() + safeBanPeer(r, mp, "test signature failure") + + expireAt, present := r.bannedPeers[mp.ID()] + require.True(t, present, "non-persistent peer must be added to bannedPeers") + require.True(t, expireAt.After(before.Add(peerBanDuration-time.Second)), + "ban expiry should be ~peerBanDuration in the future") +} + +// TestBanPeer_PersistentPeerAddPeerNotRejected: the natural follow-up — a +// persistent peer that has misbehaved before still passes AddPeer's +// isBanned gate, so the Switch's automatic reconnect can re-add it. +func TestBanPeer_PersistentPeerAddPeerNotRejected(t *testing.T) { + r := newReactorForTest() + r.logger = log.NewNopLogger() + + mp := mock.NewPeer(nil) + mp.Persistent = true + + safeBanPeer(r, mp, "decode failure") + require.False(t, r.isBanned(mp.ID()), + "persistent peer must not be considered banned after misbehavior") +} diff --git a/sequencer/signature_store.go b/sequencer/signature_store.go index b0cca3d652f..0d2941db872 100644 --- a/sequencer/signature_store.go +++ b/sequencer/signature_store.go @@ -49,3 +49,12 @@ func (s *SignatureStore) GetSignature(blockHash common.Hash) ([]byte, error) { } return val, nil } + +// Close releases the underlying DB. Safe to call on a nil receiver or when +// the store was never initialized. +func (s *SignatureStore) Close() error { + if s == nil || s.db == nil { + return nil + } + return s.db.Close() +} From 6393e1eaad71311f29e3463353e87ae510bf72b1 Mon Sep 17 00:00:00 2001 From: "allen.wu" Date: Thu, 28 May 2026 12:00:48 +0800 Subject: [PATCH 10/15] feat: add api for integrate derivation reorg --- blocksync/pool.go | 7 +- blocksync/pool_test.go | 74 ++++++++++++----- blocksync/reactor.go | 96 +++++++++++++++++++--- node/node.go | 144 +++++++++++++++++++++++++++++++++ sequencer/block_cache.go | 17 ++++ sequencer/broadcast_reactor.go | 74 ++++++++++++++++- sequencer/hash_set.go | 17 ++++ sequencer/pending_cache.go | 11 +++ sequencer/state_v2.go | 16 +--- sequencer/state_v2_test.go | 16 ++-- 10 files changed, 416 insertions(+), 56 deletions(-) diff --git a/blocksync/pool.go b/blocksync/pool.go index 4835a0727d5..1587564e73f 100644 --- a/blocksync/pool.go +++ b/blocksync/pool.go @@ -345,11 +345,12 @@ func (pool *BlockPool) GetPeerIDs() []p2p.ID { // // In all non-malicious cases, maxPeerHeight is recomputed via // updateMaxPeerHeight, which filters peers whose base is above pool.height. -func (pool *BlockPool) SetPeerRange(peerID p2p.ID, base int64, height int64) { +func (pool *BlockPool) SetPeerRange(srcPeer p2p.Peer, base int64, height int64) { + peerID := srcPeer.ID() pool.mtx.Lock() defer pool.mtx.Unlock() - if base > height { + if base > height && !srcPeer.IsPersistent() { pool.Logger.Info("Peer reporting base greater than height; banning", "peer", peerID, "base", base, "height", height) if _, exists := pool.peers[peerID]; exists { @@ -361,7 +362,7 @@ func (pool *BlockPool) SetPeerRange(peerID p2p.ID, base int64, height int64) { peer := pool.peers[peerID] if peer != nil { - if height < peer.height || base < peer.base { + if (height < peer.height || base < peer.base) && !srcPeer.IsPersistent() { pool.Logger.Info("Peer reporting decreasing height or base; removing and banning", "peer", peerID, "prevHeight", peer.height, "height", height, diff --git a/blocksync/pool_test.go b/blocksync/pool_test.go index 636fe72115c..eb62e5d2b7e 100644 --- a/blocksync/pool_test.go +++ b/blocksync/pool_test.go @@ -2,6 +2,7 @@ package blocksync import ( "fmt" + "net" "testing" "time" @@ -11,6 +12,7 @@ import ( "github.com/tendermint/tendermint/libs/log" tmrand "github.com/tendermint/tendermint/libs/rand" "github.com/tendermint/tendermint/p2p" + "github.com/tendermint/tendermint/p2p/conn" "github.com/tendermint/tendermint/types" ) @@ -18,6 +20,42 @@ func init() { peerTimeout = 2 * time.Second } +// fakePeer is a minimal p2p.Peer implementation for SetPeerRange unit tests. +// Only ID() and IsPersistent() are read by SetPeerRange; the rest are stubs. +type fakePeer struct { + id p2p.ID + persistent bool +} + +func (p *fakePeer) ID() p2p.ID { return p.id } +func (p *fakePeer) IsPersistent() bool { return p.persistent } +func (p *fakePeer) IsOutbound() bool { return false } +func (p *fakePeer) RemoteIP() net.IP { return nil } +func (p *fakePeer) RemoteAddr() net.Addr { return nil } +func (p *fakePeer) CloseConn() error { return nil } +func (p *fakePeer) NodeInfo() p2p.NodeInfo { + return p2p.DefaultNodeInfo{DefaultNodeID: p.id} +} +func (p *fakePeer) Status() conn.ConnectionStatus { return conn.ConnectionStatus{} } +func (p *fakePeer) SocketAddr() *p2p.NetAddress { return nil } +func (p *fakePeer) Send(byte, []byte) bool { return true } +func (p *fakePeer) TrySend(byte, []byte) bool { return true } +func (p *fakePeer) Set(string, interface{}) {} +func (p *fakePeer) Get(string) interface{} { return nil } +func (p *fakePeer) FlushStop() {} +func (p *fakePeer) Start() error { return nil } +func (p *fakePeer) OnStart() error { return nil } +func (p *fakePeer) Stop() error { return nil } +func (p *fakePeer) OnStop() {} +func (p *fakePeer) Reset() error { return nil } +func (p *fakePeer) OnReset() error { return nil } +func (p *fakePeer) IsRunning() bool { return true } +func (p *fakePeer) Quit() <-chan struct{} { return nil } +func (p *fakePeer) String() string { return string(p.id) } +func (p *fakePeer) SetLogger(log.Logger) {} + +func mkPeer(id p2p.ID) p2p.Peer { return &fakePeer{id: id} } + type testPeer struct { id p2p.ID base int64 @@ -105,7 +143,7 @@ func TestBlockPoolBasic(t *testing.T) { // the pool from spawning any requesters. go func() { for _, peer := range peers { - pool.SetPeerRange(peer.id, start, peer.height) + pool.SetPeerRange(mkPeer(peer.id), start, peer.height) } }() @@ -167,7 +205,7 @@ func TestBlockPoolTimeout(t *testing.T) { // the pool from spawning any requesters. go func() { for _, peer := range peers { - pool.SetPeerRange(peer.id, start, peer.height) + pool.SetPeerRange(mkPeer(peer.id), start, peer.height) } }() @@ -228,7 +266,7 @@ func TestBlockPoolRemovePeer(t *testing.T) { // add peers for peerID, peer := range peers { - pool.SetPeerRange(peerID, peer.base, peer.height) + pool.SetPeerRange(mkPeer(peerID), peer.base, peer.height) } assert.EqualValues(t, 10, pool.MaxPeerHeight()) @@ -265,11 +303,11 @@ func TestSetPeerRange_DecreasingHeight(t *testing.T) { pool.SetLogger(log.TestingLogger()) peerID := p2p.ID("malicious") - pool.SetPeerRange(peerID, 1, 1000) + pool.SetPeerRange(mkPeer(peerID), 1, 1000) require.EqualValues(t, 1000, pool.MaxPeerHeight()) require.NotNil(t, pool.peers[peerID]) - pool.SetPeerRange(peerID, 1, 1) + pool.SetPeerRange(mkPeer(peerID), 1, 1) require.Nil(t, pool.peers[peerID], "peer must be removed after lowering height") require.True(t, pool.isPeerBanned(peerID), "peer must be banned after lowering height") require.EqualValues(t, 0, pool.MaxPeerHeight(), @@ -287,10 +325,10 @@ func TestSetPeerRange_DecreasingBase(t *testing.T) { pool.SetLogger(log.TestingLogger()) peerID := p2p.ID("malicious") - pool.SetPeerRange(peerID, 50, 1000) + pool.SetPeerRange(mkPeer(peerID), 50, 1000) require.NotNil(t, pool.peers[peerID]) - pool.SetPeerRange(peerID, 10, 1000) + pool.SetPeerRange(mkPeer(peerID), 10, 1000) require.Nil(t, pool.peers[peerID], "peer must be removed after lowering base") require.True(t, pool.isPeerBanned(peerID), "peer must be banned after lowering base") } @@ -307,7 +345,7 @@ func TestSetPeerRange_BaseGreaterThanHeight(t *testing.T) { pool.SetLogger(log.TestingLogger()) peerID := p2p.ID("malicious") - pool.SetPeerRange(peerID, 500, 100) + pool.SetPeerRange(mkPeer(peerID), 500, 100) require.Nil(t, pool.peers[peerID], "peer with base > height must not be added") require.True(t, pool.isPeerBanned(peerID)) require.EqualValues(t, 0, pool.MaxPeerHeight()) @@ -324,10 +362,10 @@ func TestBanPeer_PreventsReentry(t *testing.T) { pool.SetLogger(log.TestingLogger()) peerID := p2p.ID("malicious") - pool.SetPeerRange(peerID, 500, 100) + pool.SetPeerRange(mkPeer(peerID), 500, 100) require.True(t, pool.isPeerBanned(peerID)) - pool.SetPeerRange(peerID, 1, 200) + pool.SetPeerRange(mkPeer(peerID), 1, 200) require.Nil(t, pool.peers[peerID], "banned peer must not be re-added during ban window") require.EqualValues(t, 0, pool.MaxPeerHeight()) } @@ -347,12 +385,12 @@ func TestBanPeer_ExpiryAllowsReentry(t *testing.T) { pool.SetLogger(log.TestingLogger()) peerID := p2p.ID("rehabilitated") - pool.SetPeerRange(peerID, 500, 100) + pool.SetPeerRange(mkPeer(peerID), 500, 100) require.True(t, pool.isPeerBanned(peerID)) time.Sleep(20 * time.Millisecond) - pool.SetPeerRange(peerID, 1, 200) + pool.SetPeerRange(mkPeer(peerID), 1, 200) require.NotNil(t, pool.peers[peerID], "peer should be re-admitted after ban expiry") require.False(t, pool.isPeerBanned(peerID)) require.EqualValues(t, 200, pool.MaxPeerHeight()) @@ -374,10 +412,10 @@ func TestBlockPoolMaxPeerHeightNotPoisonedByHighBase(t *testing.T) { pool := NewBlockPool(200, requestsCh, errorsCh) pool.SetLogger(log.TestingLogger()) - pool.SetPeerRange(p2p.ID("honest"), 1, 200) + pool.SetPeerRange(mkPeer(p2p.ID("honest")), 1, 200) require.EqualValues(t, 200, pool.MaxPeerHeight()) - pool.SetPeerRange(p2p.ID("malicious"), 1_000_000, 1_000_100) + pool.SetPeerRange(mkPeer(p2p.ID("malicious")), 1_000_000, 1_000_100) require.EqualValues(t, 200, pool.MaxPeerHeight(), "malicious peer with base above pool.height must not raise maxPeerHeight") @@ -396,8 +434,8 @@ func TestBlockPoolMaxPeerHeightRefreshesOnPopRequest(t *testing.T) { pool := NewBlockPool(10, requestsCh, errorsCh) pool.SetLogger(log.TestingLogger()) - pool.SetPeerRange(p2p.ID("A"), 1, 20) - pool.SetPeerRange(p2p.ID("B"), 15, 100) + pool.SetPeerRange(mkPeer(p2p.ID("A")), 1, 20) + pool.SetPeerRange(mkPeer(p2p.ID("B")), 15, 100) require.EqualValues(t, 20, pool.MaxPeerHeight(), "peer B (base=15) must not contribute while pool.height (10) is below its base") @@ -434,8 +472,8 @@ func TestSegmentedPeers_BoundaryDeadlockPrevention(t *testing.T) { // the maxPeerHeight branch we want to exercise. pool.startTime = time.Now().Add(-10 * time.Second) - pool.SetPeerRange(p2p.ID("A"), 1, 150) - pool.SetPeerRange(p2p.ID("B"), 151, 500) + pool.SetPeerRange(mkPeer(p2p.ID("A")), 1, 150) + pool.SetPeerRange(mkPeer(p2p.ID("B")), 151, 500) require.EqualValues(t, 150, pool.MaxPeerHeight(), "peer B must be filtered while its base (151) > pool.height (150)") diff --git a/blocksync/reactor.go b/blocksync/reactor.go index 27ea1e92aee..531a6f937aa 100644 --- a/blocksync/reactor.go +++ b/blocksync/reactor.go @@ -3,6 +3,7 @@ package blocksync import ( "fmt" "reflect" + "sync" "time" "github.com/tendermint/tendermint/l2node" @@ -41,6 +42,11 @@ type consensusReactor interface { type sequencerReactor interface { // for when we switch from blockchain reactor to sequencer mode StartSequencerRoutines() error + // SetPool updates the peer-height pool reference used by the broadcast + // reactor. Called after a reorg rebuild so the broadcast reactor stops + // reading from the discarded pool. The argument is the *blocksync.BlockPool + // directly; it satisfies sequencer.BlockPool by structural typing. + SetPool(sequencer.BlockPool) } // SequencerState interface for accessing sequencer state (avoids import cycle) @@ -73,8 +79,12 @@ type Reactor struct { pool *BlockPool blockSync bool - requestsCh <-chan BlockRequest - errorsCh <-chan peerError + // Bidirectional so the same buffers can be handed to a freshly-built BlockPool + // during reorg (see SwitchToBlockSyncFromReorg), while still being read here. + requestsCh chan BlockRequest + errorsCh chan peerError + + poolWg sync.WaitGroup // Sequencer signature verification (set after upgrade via SetVerifier/SetSigStore) verifier sequencer.SequencerVerifier @@ -166,11 +176,19 @@ func (bcR *Reactor) OnStart() error { if err != nil { return err } - go bcR.poolRoutine(false) + bcR.startPoolRoutine(false) } return nil } +func (bcR *Reactor) startPoolRoutine(stateSynced bool) { + bcR.poolWg.Add(1) + go func() { + defer bcR.poolWg.Done() + bcR.poolRoutine(stateSynced) + }() +} + // SwitchToBlockSync is called by the state sync reactor when switching to block sync. func (bcR *Reactor) SwitchToBlockSync(state sm.State) error { bcR.blockSync = true @@ -181,10 +199,49 @@ func (bcR *Reactor) SwitchToBlockSync(state sm.State) error { if err != nil { return err } - go bcR.poolRoutine(true) + bcR.startPoolRoutine(true) + return nil +} + +func (bcR *Reactor) StopForReorg() error { + if bcR.IsRunning() { + if err := bcR.Stop(); err != nil { + return err + } + } + bcR.poolWg.Wait() return nil } +// SwitchToBlockSyncFromReorg restarts block sync after a reorg by rebuilding +// the BlockPool. currentHeight is the post-reorg head; the new pool will +// request from currentHeight+1. +// +// Stop -> Reset -> swap pool -> Start, on the same reactor instance. +// +// Reset() requires the reactor to be in stopped state. It can legitimately +// fail when the reactor was never started yet (e.g. early init paths, +// certain test setups). Log the error rather than abort: continuing to +// rebuild the pool and call Start() is the right recovery, and silent +// `_ = bcR.Reset()` would hide genuine lifecycle drift bugs. +func (bcR *Reactor) SwitchToBlockSyncFromReorg(currentHeight int64) error { + if err := bcR.Reset(); err != nil { + bcR.Logger.Error("reset blocksync reactor failed; continuing", "err", err) + } + + bcR.pool = NewBlockPool(currentHeight+1, bcR.requestsCh, bcR.errorsCh) + bcR.pool.SetLogger(bcR.Logger) + + // Point the sequencer broadcast reactor at the new pool so peer-height + // lookups stop reading the discarded one. Best-effort; in PBFT-only or + // test setups SEQUENCER may not be registered. + if seqR, ok := bcR.Switch.Reactor("SEQUENCER").(sequencerReactor); ok { + seqR.SetPool(bcR.pool) + } + + return bcR.Start() +} + // OnStop implements service.Service. func (bcR *Reactor) OnStop() { if bcR.blockSync { @@ -194,6 +251,11 @@ func (bcR *Reactor) OnStop() { } } +// OnReset implements service.Service. +// Pool is rebuilt explicitly in SwitchToBlockSyncFromReorg before Start, so +// nothing to do here; kept only so BaseService.Reset() doesn't panic. +func (bcR *Reactor) OnReset() error { return nil } + // GetChannels implements Reactor func (bcR *Reactor) GetChannels() []*p2p.ChannelDescriptor { return []*p2p.ChannelDescriptor{ @@ -411,7 +473,7 @@ func (bcR *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) { case *bcproto.StatusResponse: // Got a peer status. Unverified. bcR.Logger.Debug("SetPeerRange", "peer", src.ID(), "base", msg.Base, "height", msg.Height) - bcR.pool.SetPeerRange(src.ID(), msg.Base, msg.Height) + bcR.pool.SetPeerRange(src, msg.Base, msg.Height) case *bcproto.NoBlockResponse: bcR.Logger.Debug("Peer does not have requested block", "peer", src, "height", msg.Height) default: @@ -428,10 +490,6 @@ func (bcR *Reactor) poolRoutine(stateSynced bool) { trySyncTicker := time.NewTicker(trySyncIntervalMS * time.Millisecond) defer trySyncTicker.Stop() - statusUpdateTicker := time.NewTicker(statusUpdateIntervalSeconds * time.Second) - // no longer stop the ticker, reuse it for sequencer mode status updates - //defer statusUpdateTicker.Stop() - switchToConsensusTicker := time.NewTicker(switchToConsensusIntervalSeconds * time.Second) defer switchToConsensusTicker.Stop() @@ -445,7 +503,12 @@ func (bcR *Reactor) poolRoutine(stateSynced bool) { didProcessCh := make(chan struct{}, 1) + bcR.poolWg.Add(1) go func() { + defer bcR.poolWg.Done() + statusUpdateTicker := time.NewTicker(statusUpdateIntervalSeconds * time.Second) + // no longer stop the ticker, reuse it for sequencer mode status updates + defer statusUpdateTicker.Stop() for { select { case <-bcR.Quit(): @@ -704,12 +767,21 @@ func (bcR *Reactor) handleTheLastTMBlock(state sm.State, lastSyncable types.Sync func (bcR *Reactor) getHeight() int64 { height := bcR.store.Height() - // In sequencer mode, get height directly from l2Node (geth) - if bcR.l2Node != nil && upgrade.IsUpgraded(height+1) { + if !upgrade.IsUpgraded(height + 1) { + return height + } + // In sequencer mode, get height from state v2 or L2 node + if bcR.stateV2 != nil { + if l2Height := bcR.stateV2.LatestHeight(); l2Height > height { + return l2Height + } + } + + if bcR.l2Node != nil { if l2Block, err := bcR.l2Node.GetLatestBlockV2(); err == nil && l2Block != nil { bcR.Logger.Debug("StatusRequest", "l2Height", l2Block.Number) if l2Height := int64(l2Block.Number); l2Height > height { - height = l2Height + return l2Height } } } diff --git a/node/node.go b/node/node.go index 3256bf23f90..7a0b52b0193 100644 --- a/node/node.go +++ b/node/node.go @@ -7,6 +7,7 @@ import ( "fmt" "net" "net/http" + "os" "strings" "time" @@ -141,6 +142,8 @@ type Option func(*Node) // See: https://github.com/tendermint/tendermint/issues/4595 type blockSyncReactor interface { SwitchToBlockSync(sm.State) error + StopForReorg() error + SwitchToBlockSyncFromReorg(int64) error } // CustomReactors allows you to add custom reactors (name -> p2p.Reactor) to @@ -1092,6 +1095,19 @@ func (n *Node) OnStart() error { return fmt.Errorf("could not dial peers from persistent_peers field: %w", err) } + // Integration test hook: start a goroutine that periodically exercises + // the reorg start/stop path (Stop the blocksync reactor, then restart + // via SwitchToBlockSyncFromReorg). Only enabled when the env var is set, + // guarded inside runReorgRestartTestLoop to skip HA-enabled nodes and to + // wait until the upgrade height is crossed. + if v := os.Getenv("MORPH_TEST_REORG_RESTART_INTERVAL"); v != "" { + if d, err := time.ParseDuration(v); err == nil && d > 0 { + go n.runReorgRestartTestLoop(d) + } else { + n.Logger.Error("[REORG_TEST] invalid MORPH_TEST_REORG_RESTART_INTERVAL", "value", v, "err", err) + } + } + // Run state sync if n.stateSync { bcR, ok := n.bcReactor.(blockSyncReactor) @@ -1642,6 +1658,11 @@ func (n *Node) switchToSequencerMode() { n.consensusState.Wait() n.Logger.Info("Consensus state drained") + // Belt-and-suspenders: ensure last PBFT votes / NewRoundStepMessage have + // time to flush to peers before we tear down the reactor's gossip routines. + // This compensates for tightened PBFT timeouts (timeout_precommit / commit). + time.Sleep(1 * time.Second) + // Stop consensus reactor through the standard BaseService.Stop path so // its stopped flag is set; this prevents the P2P switch from invoking // OnStop a second time during node shutdown. OnStop will call @@ -1657,3 +1678,126 @@ func (n *Node) switchToSequencerMode() { n.startSequencerMode() }() } + +// StopReactorsBeforeReorg stops the broadcast reactor and the blocksync reactor +// in preparation for a derivation-driven reorg. After this returns, no goroutine +// from either reactor is running and no new BlockResponseV2 / BlockRequest will +// be processed. HA-enabled nodes (sequencers) are skipped — sequencers do not +// participate in automatic reorg per docs/reorg/reorg兼容问题与优化.md. +// +// Order: broadcast first (stop applying new blocks via P2P), then blocksync +// (stop sync requests). Each Stop is gated on IsRunning so this is idempotent +// and safe to call when a reactor is already stopped or was never started. +func (n *Node) StopReactorsBeforeReorg() error { + if n.ha != nil { + return nil + } + + if err := n.blockBroadcastReactor.StopForReorg(); err != nil { + return err + } + + bcR, ok := n.bcReactor.(blockSyncReactor) + if !ok { + return fmt.Errorf("this blockchain reactor does not support stopping for reorg") + } + if err := bcR.StopForReorg(); err != nil { + return err + } + return nil +} + +// StartReactorsAfterReorg restarts the blocksync and broadcast reactors after +// a reorg, with the post-reorg head height as currentHeight. The blocksync +// reactor rebuilds its BlockPool from currentHeight+1 and is responsible for +// catching up the local node back to the network tip. Once caught up its +// switchToConsensus tick re-invokes StartSequencerRoutines on the broadcast +// reactor, which is brought back to a clean state by the Reset+Start below. +// +// Reset on a still-running reactor would fail with "can't reset running ..." +// — Stop must precede it (handled by StopReactorsBeforeReorg). The IsRunning +// check below makes this method idempotent in the rare case of partial state +// (e.g., previous Stop returned an error). +func (n *Node) StartReactorsAfterReorg(currentHeight int64) error { + if n.ha != nil { + return nil + } + + bcR, ok := n.bcReactor.(blockSyncReactor) + if !ok { + return fmt.Errorf("this blockchain reactor does not support reorg restart") + } + + // Reset() requires the reactor to be in stopped state. It can legitimately + // fail when the reactor was never started yet (e.g. very early init paths) + // or when StopReactorsBeforeReorg silently skipped Stop because the reactor + // wasn't running. Log and continue rather than abort the reorg flow — + // surfacing the error gives observability without blocking recovery. + if err := n.blockBroadcastReactor.Reset(); err != nil { + n.Logger.Error("reset broadcast reactor failed; continuing", "err", err) + } + return bcR.SwitchToBlockSyncFromReorg(currentHeight) +} + +// testStartStopWhenReorg is the integration-test harness for the reorg +// reactor lifecycle: drive a single Stop -> Start cycle and surface any +// errors via logs. +func (n *Node) testStartStopWhenReorg() { + if err := n.StopReactorsBeforeReorg(); err != nil { + n.Logger.Error("[REORG_TEST] StopReactorsBeforeReorg failed", "err", err) + return + } + + time.Sleep(20 * time.Second) + if err := n.StartReactorsAfterReorg(n.stateV2.LatestHeight()); err != nil { + n.Logger.Error("[REORG_TEST] StartReactorsAfterReorg failed", "err", err) + } +} + +// runReorgRestartTestLoop is an integration-test hook. When the env var +// MORPH_TEST_REORG_RESTART_INTERVAL is set to a Go duration string (e.g. "5s"), +// this loop fires testStartStopWhenReorg() at that cadence after the chain has +// crossed the upgrade height. Used to verify that repeated Stop/Reset/Start of +// the blocksync reactor on a fullnode does not break sync, leak goroutines, or +// panic. Skips HA-enabled nodes (their StopReactorsBeforeReorg is a no-op). +func (n *Node) runReorgRestartTestLoop(interval time.Duration) { + if n.ha != nil { + n.Logger.Info("[REORG_TEST] skipping on HA node") + return + } + n.Logger.Info("[REORG_TEST] waiting for upgrade height", "interval", interval) + + for { + select { + case <-n.Quit(): + return + case <-time.After(2 * time.Second): + } + if n.stateV2 == nil { + continue + } + h := n.stateV2.LatestHeight() + if h > 0 && upgrade.IsUpgraded(h+1) { + break + } + } + + n.Logger.Info("[REORG_TEST] entering reorg-restart loop", "interval", interval) + ticker := time.NewTicker(interval) + defer ticker.Stop() + cycle := 0 + for { + select { + case <-n.Quit(): + n.Logger.Info("[REORG_TEST] loop stopped", "cycles", cycle) + return + case <-ticker.C: + cycle++ + before := n.stateV2.LatestHeight() + n.Logger.Info("[REORG_TEST] cycle begin", "cycle", cycle, "height", before) + n.testStartStopWhenReorg() + after := n.stateV2.LatestHeight() + n.Logger.Info("[REORG_TEST] cycle end", "cycle", cycle, "heightBefore", before, "heightAfter", after) + } + } +} diff --git a/sequencer/block_cache.go b/sequencer/block_cache.go index ff71a1759ba..dca239b89bf 100644 --- a/sequencer/block_cache.go +++ b/sequencer/block_cache.go @@ -193,3 +193,20 @@ func (rb *BlockRingBuffer) RollbackTo(targetHeight uint64) { defer rb.mtx.Unlock() rb.rollbackTo(targetHeight) } + +// Clear drops every cached block. Used during a reactor reset (e.g. reorg +// path via OnReset) when the chain head may have moved backwards and any +// previously cached block could be off-chain. Subsequent Add() calls +// repopulate the cache from the new head. +func (rb *BlockRingBuffer) Clear() { + rb.mtx.Lock() + defer rb.mtx.Unlock() + for i := range rb.blocks { + rb.blocks[i] = nil + } + rb.byHeight = make(map[uint64]*BlockV2) + rb.byHash = make(map[common.Hash]*BlockV2) + rb.position = make(map[uint64]int) + rb.minHeight = 0 + rb.maxHeight = 0 +} diff --git a/sequencer/broadcast_reactor.go b/sequencer/broadcast_reactor.go index 39ac1cb9221..00e217676d4 100644 --- a/sequencer/broadcast_reactor.go +++ b/sequencer/broadcast_reactor.go @@ -71,7 +71,13 @@ type BlockBroadcastReactor struct { p2p.BaseReactor stateV2 *StateV2 - pool BlockPool + + // poolMu guards pool. The blocksync reactor rebuilds its BlockPool on reorg + // (see blocksync.Reactor.SwitchToBlockSyncFromReorg) and pushes the new + // instance through SetPool, while peer-height lookups here run concurrently + // from message handlers. + poolMu sync.RWMutex + pool BlockPool recentBlocks *BlockRingBuffer // Applied blocks for peer requests pendingCache *PendingBlockCache // Pending blocks @@ -150,6 +156,25 @@ func (r *BlockBroadcastReactor) SetLogger(l log.Logger) { r.logger = l.With("module", "broadcastReactor") } +// SetPool swaps the BlockPool reference used for peer-height lookups. The +// blocksync reactor calls this after rebuilding its pool (e.g. during a reorg +// restart) so that peer selection in this reactor follows the new pool's view +// of peer heights instead of the dead pool's frozen state. +func (r *BlockBroadcastReactor) SetPool(pool BlockPool) { + r.poolMu.Lock() + defer r.poolMu.Unlock() + r.pool = pool +} + +// getPool returns the current pool reference under the read lock. Callers +// hold a local copy of the interface value, which keeps the underlying pool +// reachable for the duration of their call even if SetPool swaps it. +func (r *BlockBroadcastReactor) getPool() BlockPool { + r.poolMu.RLock() + defer r.poolMu.RUnlock() + return r.pool +} + // OnStart is intentionally a no-op. Sequencer routines are NOT started by // the service lifecycle — they are started explicitly by // StartSequencerRoutines() which is invoked by: @@ -167,6 +192,13 @@ func (r *BlockBroadcastReactor) OnStart() error { } func (r *BlockBroadcastReactor) StartSequencerRoutines() error { + // only make sure `isRunning()` is true + if !r.IsRunning() { + if err := r.Start(); err != nil { + return err + } + } + r.startMu.Lock() defer r.startMu.Unlock() @@ -198,6 +230,41 @@ func (r *BlockBroadcastReactor) StartSequencerRoutines() error { func (r *BlockBroadcastReactor) OnStop() { r.logger.Info("Stopping BlockBroadcastReactor") + + if err := r.stateV2.Stop(); err != nil { + r.logger.Error("failed to stop StateV2", "err", err) + } + + r.startMu.Lock() + r.routinesStarted.Store(false) + r.startMu.Unlock() + + r.bannedPeersMu.Lock() + r.bannedPeers = make(map[p2p.ID]time.Time) + r.bannedPeersMu.Unlock() + + r.syncRequestsMu.Lock() + r.syncRequests = make(map[int64]*SyncReq) + r.syncPeerCounts = make(map[p2p.ID]int) + r.syncRequestsMu.Unlock() + + r.recentBlocks.Clear() + r.pendingCache.Clear() + r.seenBlocks.Clear() + r.peerGossiped.Clear() +} + +func (r *BlockBroadcastReactor) StopForReorg() error { + if r.IsRunning() { + if err := r.Stop(); err != nil { + return err + } + } + return nil +} + +func (r *BlockBroadcastReactor) OnReset() error { + return r.stateV2.Reset() } func (r *BlockBroadcastReactor) GetChannels() []*p2p.ChannelDescriptor { @@ -469,7 +536,7 @@ func (r *BlockBroadcastReactor) tryApplyFromCache() { // All sync requests go through this method (no longer uses blocksync pool) func (r *BlockBroadcastReactor) checkSyncGap() { localHeight := r.stateV2.LatestHeight() - maxPeerHeight := r.pool.MaxPeerHeight() + maxPeerHeight := r.getPool().MaxPeerHeight() gap := maxPeerHeight - localHeight r.logger.Debug("Checking sync goroutines", "gap", gap, "localHeight", localHeight, "maxPeerHeight", maxPeerHeight) if gap <= smallGapThreshold { @@ -533,7 +600,7 @@ func (r *BlockBroadcastReactor) findPeerWithHeight(peers []p2p.Peer, height int6 start := rand.Intn(n) //nolint:gosec // non-security: peer selection randomization for i := 0; i < n; i++ { peer := peers[(start+i)%n] - if r.pool.GetPeerHeight(peer.ID()) >= height && + if r.getPool().GetPeerHeight(peer.ID()) >= height && r.pendingSyncCountByPeer(peer.ID()) < maxPendingSyncPerPeer { return peer } @@ -809,6 +876,7 @@ func (r *BlockBroadcastReactor) onBlockRequest(msg *bcproto.BlockRequest, src p2 } else { r.logger.Error("Signature not found in store, sending block without signature", "height", msg.Height, "hash", block.Hash.Hex(), "err", err) + return } } diff --git a/sequencer/hash_set.go b/sequencer/hash_set.go index 5c7b903db64..bb10296dbd8 100644 --- a/sequencer/hash_set.go +++ b/sequencer/hash_set.go @@ -59,6 +59,16 @@ func (s *HashSet) Contains(hash common.Hash) bool { return exists } +// Clear drops every entry. The backing items slice is reused; subsequent +// Add() calls overwrite it. Used during reactor reset. +func (s *HashSet) Clear() { + s.mtx.Lock() + defer s.mtx.Unlock() + s.index = make(map[common.Hash]int, s.capacity) + s.head = 0 + s.count = 0 +} + // PeerHashSet is a per-peer hash set. type PeerHashSet struct { peers map[string]*HashSet @@ -115,3 +125,10 @@ func (s *PeerHashSet) RemovePeer(peerID string) { defer s.mtx.Unlock() delete(s.peers, peerID) } + +// Clear drops all per-peer hash sets. Used during reactor reset. +func (s *PeerHashSet) Clear() { + s.mtx.Lock() + defer s.mtx.Unlock() + s.peers = make(map[string]*HashSet) +} diff --git a/sequencer/pending_cache.go b/sequencer/pending_cache.go index 9e3af60d1bb..ea887f0a3b0 100644 --- a/sequencer/pending_cache.go +++ b/sequencer/pending_cache.go @@ -136,3 +136,14 @@ func (c *PendingBlockCache) Size() int { defer c.mtx.RUnlock() return len(c.blocks) } + +// Clear drops every pending block. Used during reactor reset (reorg path): +// after the chain head moves, previously cached future / fork blocks may +// reference parents that no longer exist, so we discard the lot rather +// than try to reconcile. +func (c *PendingBlockCache) Clear() { + c.mtx.Lock() + defer c.mtx.Unlock() + c.blocks = make(map[common.Hash]*BlockV2) + c.byParent = make(map[common.Hash][]*BlockV2) +} diff --git a/sequencer/state_v2.go b/sequencer/state_v2.go index 35a8d77cddb..1cbf162c8e0 100644 --- a/sequencer/state_v2.go +++ b/sequencer/state_v2.go @@ -48,9 +48,6 @@ type StateV2 struct { // Broadcast channel - non-HA self-produced blocks are sent here broadcastCh chan *BlockV2 - - // Lifecycle - quitCh chan struct{} } // NewStateV2 creates a new StateV2 instance. @@ -78,7 +75,6 @@ func NewStateV2( fastBlockInterval: FastBlockInterval, logger: logger.With("module", "stateV2"), broadcastCh: make(chan *BlockV2, 100), - quitCh: make(chan struct{}), } s.BaseService = *service.NewBaseService(logger, "StateV2", s) @@ -125,12 +121,14 @@ func (s *StateV2) OnStart() error { // OnStop implements service.Service. func (s *StateV2) OnStop() { s.logger.Info("Stopping StateV2") - close(s.quitCh) if s.ha != nil { s.ha.Stop() } } +func (s *StateV2) OnReset() error { + return nil +} // roleCheckRoutine is the unified loop for role detection and block production. // It runs for all nodes with a signer (ActiveSequencer, HA-Leader, HA-Follower). @@ -152,7 +150,7 @@ func (s *StateV2) roleCheckRoutine() { for { select { - case <-s.quitCh: + case <-s.Quit(): s.logger.Info("Role check routine stopped") return @@ -412,9 +410,3 @@ func (s *StateV2) HASubscribe() <-chan *BlockV2 { } return s.ha.Subscribe() } - -// IsSequencerMode returns whether this node has a signer configured. -// Deprecated: use HasSigner() instead. TODO: remove after all callers are updated. -func (s *StateV2) IsSequencerMode() bool { - return s.signer != nil -} diff --git a/sequencer/state_v2_test.go b/sequencer/state_v2_test.go index 95eb5a374a8..86c5d8fe2b7 100644 --- a/sequencer/state_v2_test.go +++ b/sequencer/state_v2_test.go @@ -63,12 +63,12 @@ func newMockSequencerHA(leader bool) *mockSequencerHA { } } -func (m *mockSequencerHA) Start() error { return nil } -func (m *mockSequencerHA) Stop() {} -func (m *mockSequencerHA) IsLeader() bool { return m.leader } -func (m *mockSequencerHA) Join() error { return nil } -func (m *mockSequencerHA) Commit(block *BlockV2) error { return m.commitErr } -func (m *mockSequencerHA) Subscribe() <-chan *BlockV2 { return m.subCh } +func (m *mockSequencerHA) Start() error { return nil } +func (m *mockSequencerHA) Stop() {} +func (m *mockSequencerHA) IsLeader() bool { return m.leader } +func (m *mockSequencerHA) Join() error { return nil } +func (m *mockSequencerHA) Commit(block *BlockV2) error { return m.commitErr } +func (m *mockSequencerHA) Subscribe() <-chan *BlockV2 { return m.subCh } func (m *mockSequencerHA) SetOnBlockApplied(fn func(*BlockV2) error) {} // newTestMockL2Node creates a mock L2Node for testing. @@ -115,13 +115,13 @@ func TestStateV2_HasSigner_MatchesIsSequencerMode(t *testing.T) { // Without signer s1, _ := NewStateV2(mockL2Node, logger, mockVerifier, nil, nil, nil) - if s1.HasSigner() || s1.IsSequencerMode() { + if s1.HasSigner() { t.Error("should be false when signer is nil") } // With signer s2, _ := NewStateV2(mockL2Node, logger, mockVerifier, &mockSignerImpl{}, nil, nil) - if !s2.HasSigner() || !s2.IsSequencerMode() { + if !s2.HasSigner() { t.Error("should be true when signer is provided") } } From b1b3a3a1d8069b172991fe8070410b3697a6e4db Mon Sep 17 00:00:00 2001 From: "allen.wu" Date: Fri, 29 May 2026 17:53:05 +0800 Subject: [PATCH 11/15] fix: remove test code --- node/node.go | 76 ---------------------------------------------------- 1 file changed, 76 deletions(-) diff --git a/node/node.go b/node/node.go index 7a0b52b0193..45ae07b592f 100644 --- a/node/node.go +++ b/node/node.go @@ -7,7 +7,6 @@ import ( "fmt" "net" "net/http" - "os" "strings" "time" @@ -1095,19 +1094,6 @@ func (n *Node) OnStart() error { return fmt.Errorf("could not dial peers from persistent_peers field: %w", err) } - // Integration test hook: start a goroutine that periodically exercises - // the reorg start/stop path (Stop the blocksync reactor, then restart - // via SwitchToBlockSyncFromReorg). Only enabled when the env var is set, - // guarded inside runReorgRestartTestLoop to skip HA-enabled nodes and to - // wait until the upgrade height is crossed. - if v := os.Getenv("MORPH_TEST_REORG_RESTART_INTERVAL"); v != "" { - if d, err := time.ParseDuration(v); err == nil && d > 0 { - go n.runReorgRestartTestLoop(d) - } else { - n.Logger.Error("[REORG_TEST] invalid MORPH_TEST_REORG_RESTART_INTERVAL", "value", v, "err", err) - } - } - // Run state sync if n.stateSync { bcR, ok := n.bcReactor.(blockSyncReactor) @@ -1739,65 +1725,3 @@ func (n *Node) StartReactorsAfterReorg(currentHeight int64) error { return bcR.SwitchToBlockSyncFromReorg(currentHeight) } -// testStartStopWhenReorg is the integration-test harness for the reorg -// reactor lifecycle: drive a single Stop -> Start cycle and surface any -// errors via logs. -func (n *Node) testStartStopWhenReorg() { - if err := n.StopReactorsBeforeReorg(); err != nil { - n.Logger.Error("[REORG_TEST] StopReactorsBeforeReorg failed", "err", err) - return - } - - time.Sleep(20 * time.Second) - if err := n.StartReactorsAfterReorg(n.stateV2.LatestHeight()); err != nil { - n.Logger.Error("[REORG_TEST] StartReactorsAfterReorg failed", "err", err) - } -} - -// runReorgRestartTestLoop is an integration-test hook. When the env var -// MORPH_TEST_REORG_RESTART_INTERVAL is set to a Go duration string (e.g. "5s"), -// this loop fires testStartStopWhenReorg() at that cadence after the chain has -// crossed the upgrade height. Used to verify that repeated Stop/Reset/Start of -// the blocksync reactor on a fullnode does not break sync, leak goroutines, or -// panic. Skips HA-enabled nodes (their StopReactorsBeforeReorg is a no-op). -func (n *Node) runReorgRestartTestLoop(interval time.Duration) { - if n.ha != nil { - n.Logger.Info("[REORG_TEST] skipping on HA node") - return - } - n.Logger.Info("[REORG_TEST] waiting for upgrade height", "interval", interval) - - for { - select { - case <-n.Quit(): - return - case <-time.After(2 * time.Second): - } - if n.stateV2 == nil { - continue - } - h := n.stateV2.LatestHeight() - if h > 0 && upgrade.IsUpgraded(h+1) { - break - } - } - - n.Logger.Info("[REORG_TEST] entering reorg-restart loop", "interval", interval) - ticker := time.NewTicker(interval) - defer ticker.Stop() - cycle := 0 - for { - select { - case <-n.Quit(): - n.Logger.Info("[REORG_TEST] loop stopped", "cycles", cycle) - return - case <-ticker.C: - cycle++ - before := n.stateV2.LatestHeight() - n.Logger.Info("[REORG_TEST] cycle begin", "cycle", cycle, "height", before) - n.testStartStopWhenReorg() - after := n.stateV2.LatestHeight() - n.Logger.Info("[REORG_TEST] cycle end", "cycle", cycle, "heightBefore", before, "heightAfter", after) - } - } -} From ee68e1bcf49a4b41b444ce51d05b4cc13e8dd4e6 Mon Sep 17 00:00:00 2001 From: "allen.wu" Date: Tue, 2 Jun 2026 16:53:46 +0800 Subject: [PATCH 12/15] feat: optimize sync check to collaborate better with derivation --- blocksync/reactor.go | 5 +++-- sequencer/broadcast_reactor.go | 20 +++++++++++++++----- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/blocksync/reactor.go b/blocksync/reactor.go index 531a6f937aa..b2ed8ec0519 100644 --- a/blocksync/reactor.go +++ b/blocksync/reactor.go @@ -27,8 +27,8 @@ const ( // within this much of the system time. // stopSyncingDurationMinutes = 10 - // ask for best height every 10s - statusUpdateIntervalSeconds = 10 + // ask for best height every 5s + statusUpdateIntervalSeconds = 5 // check if we should switch to consensus reactor switchToConsensusIntervalSeconds = 1 ) @@ -570,6 +570,7 @@ FOR_LOOP: bcR.Logger.Info("Switching to sequencer mode", "height", height) seqR, ok := bcR.Switch.Reactor("SEQUENCER").(sequencerReactor) if ok { + bcR.BroadcastStatusRequest() if err := seqR.StartSequencerRoutines(); err != nil { bcR.Logger.Error("Failed to start sequencer mode", "err", err) } diff --git a/sequencer/broadcast_reactor.go b/sequencer/broadcast_reactor.go index 00e217676d4..2abf2f16706 100644 --- a/sequencer/broadcast_reactor.go +++ b/sequencer/broadcast_reactor.go @@ -22,7 +22,7 @@ const ( BlockBroadcastChannel = byte(0x50) // For block broadcast (requires signature verification) SequencerSyncChannel = byte(0x51) // For block sync requests (no signature verification) - smallGapThreshold = 20 // Gap for direct block request + smallGapThreshold = 10 // Gap for direct block request recentBlocksCapacity = 1000 // Recent applied blocks cache seenBlocksCapacity = 2000 // Seen blocks for dedup peerGossipedCapacity = 500 // Per-peer gossiped tracking @@ -119,6 +119,8 @@ type BlockBroadcastReactor struct { // across restarts. bannedPeersMu sync.Mutex bannedPeers map[p2p.ID]time.Time + + firstSync atomic.Bool } // NewBlockBroadcastReactor creates a new reactor. Routines are NOT started @@ -202,6 +204,8 @@ func (r *BlockBroadcastReactor) StartSequencerRoutines() error { r.startMu.Lock() defer r.startMu.Unlock() + r.firstSync.Store(true) + if r.routinesStarted.Load() { return nil } @@ -212,6 +216,10 @@ func (r *BlockBroadcastReactor) StartSequencerRoutines() error { } } + // Open the Receive() gate: from this point on, incoming P2P messages + // will be processed instead of being silently dropped. + r.routinesStarted.Store(true) + // Fullnode (no signer): only applyRoutine (P2P sync) // Nodes with signer (ActiveSeq / HA-Leader / HA-Follower): only broadcastRoutine // - roleCheckRoutine is started by StateV2.OnStart() @@ -222,9 +230,6 @@ func (r *BlockBroadcastReactor) StartSequencerRoutines() error { go r.applyRoutine() } - // Open the Receive() gate: from this point on, incoming P2P messages - // will be processed instead of being silently dropped. - r.routinesStarted.Store(true) return nil } @@ -434,6 +439,10 @@ func (r *BlockBroadcastReactor) applyRoutine() { defer tickerSync.Stop() defer tickerApply.Stop() + // sync immediately + r.requestMissingBlocks(r.stateV2.LatestHeight()+1, r.getPool().MaxPeerHeight()) + r.tryApplyFromCache() + for { select { case <-r.Quit(): @@ -539,7 +548,8 @@ func (r *BlockBroadcastReactor) checkSyncGap() { maxPeerHeight := r.getPool().MaxPeerHeight() gap := maxPeerHeight - localHeight r.logger.Debug("Checking sync goroutines", "gap", gap, "localHeight", localHeight, "maxPeerHeight", maxPeerHeight) - if gap <= smallGapThreshold { + if gap <= smallGapThreshold && + !r.firstSync.CompareAndSwap(true, false) { return } From 34710642b15a22ae1b39cd845f939e4320644428 Mon Sep 17 00:00:00 2001 From: running-tomato <31307926+tomatoishealthy@users.noreply.github.com> Date: Mon, 15 Jun 2026 14:15:10 +0800 Subject: [PATCH 13/15] feat: add L1 latency halt logic (#38) Co-authored-by: allen.wu --- node/node.go | 7 ++- rpc/test/helpers.go | 1 + sequencer/broadcast_reactor.go | 20 ++++++- sequencer/broadcast_reactor_test.go | 30 +++++++++++ sequencer/interfaces.go | 11 ++++ sequencer/state_v2.go | 21 +++++--- sequencer/state_v2_test.go | 81 ++++++++++++++++++++--------- test/e2e/node/main.go | 1 + 8 files changed, 139 insertions(+), 33 deletions(-) diff --git a/node/node.go b/node/node.go index 45ae07b592f..a6d0744aa36 100644 --- a/node/node.go +++ b/node/node.go @@ -112,6 +112,7 @@ func DefaultNewNode(config *cfg.Config, logger log.Logger) (*Node, error) { DefaultMetricsProvider(config.Instrumentation), logger, nil, // sequencerVerifier + nil, // l1Tracker nil, // sequencerSigner nil, // ha: no HA in default node ) @@ -503,6 +504,7 @@ func createSequencerComponents( pool *bc.BlockPool, logger log.Logger, verifier sequencer.SequencerVerifier, + l1Tracker sequencer.L1Tracker, signer sequencer.Signer, sigStore *sequencer.SignatureStore, ha sequencer.SequencerHA, @@ -512,6 +514,7 @@ func createSequencerComponents( l2Node, logger, verifier, + l1Tracker, signer, sigStore, ha, @@ -528,6 +531,7 @@ func createSequencerComponents( stateV2, logger, verifier, + l1Tracker, sigStore, ) broadcastReactor.SetLogger(logger.With("module", "sequencer")) @@ -783,6 +787,7 @@ func NewNode( metricsProvider MetricsProvider, logger log.Logger, sequencerVerifier sequencer.SequencerVerifier, + l1Tracker sequencer.L1Tracker, sequencerSigner sequencer.Signer, ha sequencer.SequencerHA, options ...Option, @@ -1015,6 +1020,7 @@ func NewNode( bcR.Pool(), logger, sequencerVerifier, + l1Tracker, sequencerSigner, sigStore, ha, // HA service injected from NewNode caller; nil disables HA mode @@ -1724,4 +1730,3 @@ func (n *Node) StartReactorsAfterReorg(currentHeight int64) error { } return bcR.SwitchToBlockSyncFromReorg(currentHeight) } - diff --git a/rpc/test/helpers.go b/rpc/test/helpers.go index 04197b75422..c1dce04961f 100644 --- a/rpc/test/helpers.go +++ b/rpc/test/helpers.go @@ -179,6 +179,7 @@ func NewTendermint(app abci.Application, opts *Options) *nm.Node { nm.DefaultMetricsProvider(config.Instrumentation), logger, nil, // sequencerVerifier + nil, // sequencerHealthGate nil, // sequencerSigner nil, // ha: no HA in RPC test node ) diff --git a/sequencer/broadcast_reactor.go b/sequencer/broadcast_reactor.go index 2abf2f16706..fb7509210d4 100644 --- a/sequencer/broadcast_reactor.go +++ b/sequencer/broadcast_reactor.go @@ -99,8 +99,9 @@ type BlockBroadcastReactor struct { routinesStarted atomic.Bool logger log.Logger - verifier SequencerVerifier - sigStore *SignatureStore + verifier SequencerVerifier + l1Tracker L1Tracker // required: gates peer-block sync on L1 freshness + sigStore *SignatureStore // syncRequests tracks pending sync channel requests, keyed by height. // Used to reject unsolicited responses before decode/verification. @@ -132,6 +133,7 @@ func NewBlockBroadcastReactor( stateV2 *StateV2, logger log.Logger, verifier SequencerVerifier, + l1Tracker L1Tracker, sigStore *SignatureStore, ) *BlockBroadcastReactor { r := &BlockBroadcastReactor{ @@ -147,6 +149,7 @@ func NewBlockBroadcastReactor( blockReqLimiter: NewPeerRateLimiter(blockRequestRateLimit, blockRequestBurst), logger: logger.With("module", "broadcastReactor"), verifier: verifier, + l1Tracker: l1Tracker, sigStore: sigStore, } r.BaseReactor = *p2p.NewBaseReactor("BlockBroadcast", r) @@ -303,6 +306,14 @@ func (r *BlockBroadcastReactor) Receive(chID byte, src p2p.Peer, msgBytes []byte if !r.routinesStarted.Load() { return } + // L1 tracker: while L1 is stale, drop all inbound P2P messages. We may be + // blind to L1 SequencerUpdated events, so we neither accept/verify peer + // blocks (verification uses the verifier's stale L1-derived sequencer set, + // which could let a revoked-sequencer block through or mis-ban honest peers) + // nor serve sync requests. Placed before decode so nothing is processed. + if r.l1Tracker.IsHalt() { + return + } r.logger.Debug("Receive message", "chId", chID, "src", src.ID(), "len", len(msgBytes)) msg, err := decodeMsg(msgBytes) if err != nil { @@ -544,6 +555,11 @@ func (r *BlockBroadcastReactor) tryApplyFromCache() { // checkSyncGap: request missing blocks via SequencerSyncChannel // All sync requests go through this method (no longer uses blocksync pool) func (r *BlockBroadcastReactor) checkSyncGap() { + // L1 tracker: while L1 is stale, do not actively pull blocks from peers. + if r.l1Tracker.IsHalt() { + return + } + localHeight := r.stateV2.LatestHeight() maxPeerHeight := r.getPool().MaxPeerHeight() gap := maxPeerHeight - localHeight diff --git a/sequencer/broadcast_reactor_test.go b/sequencer/broadcast_reactor_test.go index 85315dd6b4a..611ae74a03a 100644 --- a/sequencer/broadcast_reactor_test.go +++ b/sequencer/broadcast_reactor_test.go @@ -274,3 +274,33 @@ func TestBanPeer_PersistentPeerAddPeerNotRejected(t *testing.T) { require.False(t, r.isBanned(mp.ID()), "persistent peer must not be considered banned after misbehavior") } + +// ---------------------------------------------------------------------------- +// L1 health gate on the sync path +// ---------------------------------------------------------------------------- + +func TestReceive_L1HaltDropsMessages(t *testing.T) { + // routinesStarted=true and L1 halted: Receive must drop the message at the + // L1 gate, before decode/verify/ban — so it must NOT panic even though + // stateV2/Switch are nil and the payload is garbage. + r := newReactorForTest() + r.logger = log.NewNopLogger() + r.routinesStarted.Store(true) + r.l1Tracker = &mockL1Tracker{halt: true} + + require.NotPanics(t, func() { + r.Receive(BlockBroadcastChannel, nil, []byte("garbage")) + }, "Receive should drop messages at the L1 gate when halted, before decode") +} + +func TestCheckSyncGap_L1HaltBlocksRequests(t *testing.T) { + // With L1 halted, checkSyncGap must return before calling + // stateV2.LatestHeight()/getPool() (both nil here) — so it must NOT panic. + r := newReactorForTest() + r.logger = log.NewNopLogger() + r.l1Tracker = &mockL1Tracker{halt: true} + + require.NotPanics(t, func() { + r.checkSyncGap() + }, "checkSyncGap should early-return at the L1 gate before dereferencing nil pool/stateV2") +} diff --git a/sequencer/interfaces.go b/sequencer/interfaces.go index 2a1c09d7336..b9192ff4a2c 100644 --- a/sequencer/interfaces.go +++ b/sequencer/interfaces.go @@ -78,3 +78,14 @@ type SequencerHA interface { // Must be called before Start(). SetOnBlockApplied(fn func(*BlockV2) error) } + +// L1Tracker reports whether L1 RPC has fallen too far behind for this node to +// safely act. When L1 is stale, the node may be blind to L1 SequencerUpdated +// events, so the sequencer must stop producing and fullnodes must stop syncing +// to avoid following a revoked sequencer. Implemented by the L1 tracker in the +// node binary and required by StateV2 / the broadcast reactor. +type L1Tracker interface { + // IsHalt reports whether L1 is stale enough that this node must halt block + // production and sync. + IsHalt() bool +} diff --git a/sequencer/state_v2.go b/sequencer/state_v2.go index 1cbf162c8e0..c5f2e13f501 100644 --- a/sequencer/state_v2.go +++ b/sequencer/state_v2.go @@ -35,12 +35,13 @@ type StateV2 struct { latestBlock *BlockV2 // Dependencies - l2Node l2node.L2Node - signer Signer - verifier SequencerVerifier - sigStore *SignatureStore - ha SequencerHA // nil = single-node mode - logger log.Logger + l2Node l2node.L2Node + signer Signer + verifier SequencerVerifier + l1Tracker L1Tracker // required: gates block production on L1 freshness + sigStore *SignatureStore + ha SequencerHA // nil = single-node mode + logger log.Logger // Block production blockInterval time.Duration // empty-block fallback interval (default 3s) @@ -57,6 +58,7 @@ func NewStateV2( l2Node l2node.L2Node, logger log.Logger, verifier SequencerVerifier, + l1Tracker L1Tracker, signer Signer, sigStore *SignatureStore, ha SequencerHA, @@ -69,6 +71,7 @@ func NewStateV2( l2Node: l2Node, signer: signer, verifier: verifier, + l1Tracker: l1Tracker, sigStore: sigStore, ha: ha, blockInterval: BlockInterval, @@ -203,6 +206,12 @@ func (s *StateV2) isActiveSequencer() bool { return false } + // L1 tracker: stop producing if L1 RPC is stale (we may be blind to + // SequencerUpdated events on L1 and could produce as a revoked sequencer). + if s.l1Tracker.IsHalt() { + return false + } + s.mtx.RLock() lb := s.latestBlock s.mtx.RUnlock() diff --git a/sequencer/state_v2_test.go b/sequencer/state_v2_test.go index 86c5d8fe2b7..054f57bc192 100644 --- a/sequencer/state_v2_test.go +++ b/sequencer/state_v2_test.go @@ -84,7 +84,7 @@ func TestStateV2_NewStateV2(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() - stateV2, err := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil) + stateV2, err := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil, nil) if err != nil { t.Fatalf("NewStateV2 failed: %v", err) } @@ -97,7 +97,7 @@ func TestStateV2_LatestHeight(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() - stateV2, err := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil) + stateV2, err := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil, nil) if err != nil { t.Fatalf("NewStateV2 failed: %v", err) } @@ -114,13 +114,13 @@ func TestStateV2_HasSigner_MatchesIsSequencerMode(t *testing.T) { mockVerifier := &mockSequencerVerifier{} // Without signer - s1, _ := NewStateV2(mockL2Node, logger, mockVerifier, nil, nil, nil) + s1, _ := NewStateV2(mockL2Node, logger, mockVerifier, nil, nil, nil, nil) if s1.HasSigner() { t.Error("should be false when signer is nil") } // With signer - s2, _ := NewStateV2(mockL2Node, logger, mockVerifier, &mockSignerImpl{}, nil, nil) + s2, _ := NewStateV2(mockL2Node, logger, mockVerifier, nil, &mockSignerImpl{}, nil, nil) if !s2.HasSigner() { t.Error("should be true when signer is provided") } @@ -133,7 +133,7 @@ func TestStateV2_SignBlock(t *testing.T) { mockSigner := &mockSignerImpl{signature: make([]byte, 65)} mockVerifier := &mockSequencerVerifier{} - stateV2, err := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, nil) + stateV2, err := NewStateV2(mockL2Node, logger, mockVerifier, &mockL1Tracker{halt: false}, mockSigner, nil, nil) if err != nil { t.Fatalf("NewStateV2 failed: %v", err) } @@ -155,7 +155,7 @@ func TestStateV2_SignBlockWithoutSigner(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() - stateV2, err := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil) + stateV2, err := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil, nil) if err != nil { t.Fatalf("NewStateV2 failed: %v", err) } @@ -175,14 +175,14 @@ func TestNewStateV2_VerifierRequired(t *testing.T) { logger := log.NewNopLogger() // verifier==nil should fail - _, err := NewStateV2(mockL2Node, logger, nil, nil, nil, nil) + _, err := NewStateV2(mockL2Node, logger, nil, nil, nil, nil, nil) if err == nil { t.Fatal("expected error when verifier is nil") } // with signer, still fails without verifier mockSigner := &mockSignerImpl{} - _, err = NewStateV2(mockL2Node, logger, nil, mockSigner, nil, nil) + _, err = NewStateV2(mockL2Node, logger, nil, nil, mockSigner, nil, nil) if err == nil { t.Fatal("expected error when verifier is nil (with signer)") } @@ -195,7 +195,7 @@ func TestNewStateV2_WithHA(t *testing.T) { mockVerifier := &mockSequencerVerifier{} ha := newMockSequencerHA(true) - stateV2, err := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, ha) + stateV2, err := NewStateV2(mockL2Node, logger, mockVerifier, &mockL1Tracker{halt: false}, mockSigner, nil, ha) if err != nil { t.Fatalf("NewStateV2 failed: %v", err) } @@ -212,14 +212,14 @@ func TestStateV2_HasSigner(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() - fullnode, _ := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil) + fullnode, _ := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil, nil) if fullnode.HasSigner() { t.Error("Fullnode should not have signer") } mockSigner := &mockSignerImpl{} mockVerifier := &mockSequencerVerifier{} - seqNode, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, nil) + seqNode, _ := NewStateV2(mockL2Node, logger, mockVerifier, &mockL1Tracker{halt: false}, mockSigner, nil, nil) if !seqNode.HasSigner() { t.Error("Sequencer node should have signer") } @@ -232,13 +232,13 @@ func TestStateV2_IsHAMode(t *testing.T) { mockVerifier := &mockSequencerVerifier{} // Non-HA - nonHA, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, nil) + nonHA, _ := NewStateV2(mockL2Node, logger, mockVerifier, &mockL1Tracker{halt: false}, mockSigner, nil, nil) if nonHA.IsHAMode() { t.Error("IsHAMode should be false without ha") } // HA - ha, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, newMockSequencerHA(false)) + ha, _ := NewStateV2(mockL2Node, logger, mockVerifier, &mockL1Tracker{halt: false}, mockSigner, nil, newMockSequencerHA(false)) if !ha.IsHAMode() { t.Error("IsHAMode should be true with ha") } @@ -251,19 +251,19 @@ func TestStateV2_IsHALeader(t *testing.T) { mockVerifier := &mockSequencerVerifier{} // Non-HA: never leader - nonHA, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, nil) + nonHA, _ := NewStateV2(mockL2Node, logger, mockVerifier, &mockL1Tracker{halt: false}, mockSigner, nil, nil) if nonHA.IsHALeader() { t.Error("non-HA node should not be HA leader") } // HA follower - follower, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, newMockSequencerHA(false)) + follower, _ := NewStateV2(mockL2Node, logger, mockVerifier, &mockL1Tracker{halt: false}, mockSigner, nil, newMockSequencerHA(false)) if follower.IsHALeader() { t.Error("HA follower should not be leader") } // HA leader - leader, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, newMockSequencerHA(true)) + leader, _ := NewStateV2(mockL2Node, logger, mockVerifier, &mockL1Tracker{halt: false}, mockSigner, nil, newMockSequencerHA(true)) if !leader.IsHALeader() { t.Error("HA leader should be leader") } @@ -279,7 +279,7 @@ func TestStateV2_IsActiveSequencer_NonHA_Active(t *testing.T) { mockSigner := &mockSignerImpl{address: common.HexToAddress("0x1")} mockVerifier := &mockSequencerVerifier{isSequencer: true} - s, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, nil) + s, _ := NewStateV2(mockL2Node, logger, mockVerifier, &mockL1Tracker{halt: false}, mockSigner, nil, nil) s.latestBlock = &BlockV2{Number: 0} if !s.isActiveSequencer() { @@ -293,7 +293,7 @@ func TestStateV2_IsActiveSequencer_NonHA_Inactive(t *testing.T) { mockSigner := &mockSignerImpl{address: common.HexToAddress("0x1")} mockVerifier := &mockSequencerVerifier{isSequencer: false} - s, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, nil) + s, _ := NewStateV2(mockL2Node, logger, mockVerifier, &mockL1Tracker{halt: false}, mockSigner, nil, nil) s.latestBlock = &BlockV2{Number: 0} if s.isActiveSequencer() { @@ -308,7 +308,7 @@ func TestStateV2_IsActiveSequencer_HA_Leader(t *testing.T) { mockVerifier := &mockSequencerVerifier{isSequencer: true} ha := newMockSequencerHA(true) - s, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, ha) + s, _ := NewStateV2(mockL2Node, logger, mockVerifier, &mockL1Tracker{halt: false}, mockSigner, nil, ha) s.latestBlock = &BlockV2{Number: 0} if !s.isActiveSequencer() { @@ -323,7 +323,7 @@ func TestStateV2_IsActiveSequencer_HA_Follower(t *testing.T) { mockVerifier := &mockSequencerVerifier{isSequencer: true} // L1 says active ha := newMockSequencerHA(false) // but not leader - s, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, ha) + s, _ := NewStateV2(mockL2Node, logger, mockVerifier, &mockL1Tracker{halt: false}, mockSigner, nil, ha) s.latestBlock = &BlockV2{Number: 0} if s.isActiveSequencer() { @@ -337,7 +337,7 @@ func TestStateV2_IsActiveSequencer_VerifierError(t *testing.T) { mockSigner := &mockSignerImpl{} mockVerifier := &mockSequencerVerifier{err: errors.New("rpc error")} - s, _ := NewStateV2(mockL2Node, logger, mockVerifier, mockSigner, nil, nil) + s, _ := NewStateV2(mockL2Node, logger, mockVerifier, &mockL1Tracker{halt: false}, mockSigner, nil, nil) s.latestBlock = &BlockV2{Number: 0} if s.isActiveSequencer() { @@ -353,7 +353,7 @@ func TestStateV2_ApplyBlock_Idempotent(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() - s, _ := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil) + s, _ := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil, nil) block := &types.BlockV2{Number: 1, Signature: []byte{0x01, 0x02, 0x03}} // Apply twice should not error @@ -372,7 +372,7 @@ func TestStateV2_ApplyBlock_OlderBlockSkipped(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() - s, _ := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil) + s, _ := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil, nil) block2 := &types.BlockV2{Number: 2, Signature: []byte{0x01, 0x02, 0x03}} block1 := &types.BlockV2{Number: 1, Signature: []byte{0x01, 0x02, 0x03}} @@ -394,7 +394,7 @@ func TestStateV2_ApplyBlock_Sequential(t *testing.T) { mockL2Node := newTestMockL2Node() logger := log.NewNopLogger() - s, _ := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil) + s, _ := NewStateV2(mockL2Node, logger, &mockSequencerVerifier{}, nil, nil, nil, nil) for i := uint64(1); i <= 5; i++ { block := &types.BlockV2{Number: i, Signature: []byte{0x01, 0x02, 0x03}} @@ -406,3 +406,36 @@ func TestStateV2_ApplyBlock_Sequential(t *testing.T) { t.Errorf("LatestHeight = %d, want 5", s.LatestHeight()) } } + +// mockL1Tracker is a controllable L1Tracker for tests. +type mockL1Tracker struct { + halt bool +} + +func (m *mockL1Tracker) IsHalt() bool { return m.halt } + +func TestIsActiveSequencer_L1TrackerHaltsProduction(t *testing.T) { + logger := log.NewNopLogger() + verifier := &mockSequencerVerifier{isSequencer: true} + signer := &mockSignerImpl{} + + // L1 healthy -> active (verifier says we are the sequencer). + s, err := NewStateV2(newTestMockL2Node(), logger, verifier, &mockL1Tracker{halt: false}, signer, nil, nil) + if err != nil { + t.Fatalf("NewStateV2: %v", err) + } + s.latestBlock = &BlockV2{Number: 10} + if !s.isActiveSequencer() { + t.Fatal("expected active when L1 healthy and verifier says sequencer") + } + + // L1 halted -> NOT active even though verifier says we are the sequencer. + s2, err := NewStateV2(newTestMockL2Node(), logger, verifier, &mockL1Tracker{halt: true}, signer, nil, nil) + if err != nil { + t.Fatalf("NewStateV2: %v", err) + } + s2.latestBlock = &BlockV2{Number: 10} + if s2.isActiveSequencer() { + t.Fatal("expected NOT active when L1 tracker halts production") + } +} diff --git a/test/e2e/node/main.go b/test/e2e/node/main.go index c81b546c94d..ea3da40a916 100644 --- a/test/e2e/node/main.go +++ b/test/e2e/node/main.go @@ -134,6 +134,7 @@ func startNode(cfg *Config) error { node.DefaultMetricsProvider(tmcfg.Instrumentation), nodeLogger, nil, // sequencerVerifier + nil, // sequencerHealthGate nil, // sequencerSigner nil, // ha: no HA in e2e test node ) From 8223b0b6bbfa9948aa006e407dbc9fae684c8f63 Mon Sep 17 00:00:00 2001 From: running-tomato <31307926+tomatoishealthy@users.noreply.github.com> Date: Thu, 18 Jun 2026 15:45:59 +0800 Subject: [PATCH 14/15] Feat/sequencer upgrade using ts (#39) * feat: change sequencer upgrade switch from height to timestamp * fix: adjust blockV2.getTime() to return the true blockTime * fix: adjust blockV2.getTime() to return the true blockTime, * 1000 --------- Co-authored-by: allen.wu --- blocksync/msgs.go | 15 +--- blocksync/msgs_test.go | 65 +++------------ blocksync/reactor.go | 103 ++++++++++++++++------- consensus/reactor.go | 2 +- consensus/state.go | 11 ++- node/node.go | 8 ++ sequencer/interfaces.go | 5 -- sequencer/state_v2.go | 2 +- sequencer/state_v2_test.go | 5 -- types/block.go | 4 + types/block_v2.go | 18 ++++- upgrade/upgrade.go | 113 +++++++++++++++++++++++--- upgrade/upgrade_test.go | 162 +++++++++++++++++++++++++++++++++++++ 13 files changed, 388 insertions(+), 125 deletions(-) create mode 100644 upgrade/upgrade_test.go diff --git a/blocksync/msgs.go b/blocksync/msgs.go index 676feea95f7..1a7fd2a663f 100644 --- a/blocksync/msgs.go +++ b/blocksync/msgs.go @@ -8,7 +8,6 @@ import ( bcproto "github.com/tendermint/tendermint/proto/tendermint/blocksync" "github.com/tendermint/tendermint/types" - "github.com/tendermint/tendermint/upgrade" ) const ( @@ -88,23 +87,13 @@ func ValidateMsg(pb proto.Message) error { return errors.New("negative Height") } case *bcproto.BlockResponse: - // V1 block format - block, err := types.BlockFromProto(msg.Block) - if err != nil { + if _, err := types.BlockFromProto(msg.Block); err != nil { return err } - if upgrade.IsUpgraded(block.Height) { - return fmt.Errorf("unexpected BlockResponse at upgraded height %d", block.Height) - } case *bcproto.BlockResponseV2: - // V2 block format (sequencer mode) - block, err := types.BlockV2FromProto(msg.Block) - if err != nil { + if _, err := types.BlockV2FromProto(msg.Block); err != nil { return err } - if !upgrade.IsUpgraded(block.GetHeight()) { - return fmt.Errorf("unexpected BlockResponseV2 before upgraded height %d", block.GetHeight()) - } case *bcproto.NoBlockResponse: if msg.Height < 0 { return errors.New("negative Height") diff --git a/blocksync/msgs_test.go b/blocksync/msgs_test.go index 8d904b97121..a38d06c388d 100644 --- a/blocksync/msgs_test.go +++ b/blocksync/msgs_test.go @@ -1,7 +1,6 @@ package blocksync_test import ( - "bytes" "encoding/hex" "math" "math/big" @@ -13,10 +12,8 @@ import ( "github.com/stretchr/testify/require" "github.com/tendermint/tendermint/blocksync" - "github.com/tendermint/tendermint/crypto" bcproto "github.com/tendermint/tendermint/proto/tendermint/blocksync" "github.com/tendermint/tendermint/types" - "github.com/tendermint/tendermint/upgrade" ) func TestBcBlockRequestMessageValidateBasic(t *testing.T) { @@ -84,65 +81,23 @@ func TestBcStatusResponseMessageValidateBasic(t *testing.T) { } } -func TestValidateMsgBlockResponseRejectsV1AtUpgradeHeight(t *testing.T) { - oldHeight := upgrade.UpgradeBlockHeight - upgrade.SetUpgradeBlockHeight(10) - defer upgrade.SetUpgradeBlockHeight(oldHeight) - - lastBlockID := types.BlockID{ - Hash: bytes.Repeat([]byte{1}, 32), - PartSetHeader: types.PartSetHeader{ - Total: 1, - Hash: bytes.Repeat([]byte{2}, 32), - }, - } - lastCommit := types.NewCommit(9, 0, lastBlockID, []types.CommitSig{{ - BlockIDFlag: types.BlockIDFlagCommit, - ValidatorAddress: bytes.Repeat([]byte{3}, crypto.AddressSize), - Signature: []byte{1}, - }}) - - block := types.MakeBlock(10, []types.Tx{types.Tx("Hello World")}, nil, nil, nil, lastCommit, nil) - block.ProposerAddress = bytes.Repeat([]byte{4}, crypto.AddressSize) - bpb, err := block.ToProto() - require.NoError(t, err) - - err = blocksync.ValidateMsg(&bcproto.BlockResponse{Block: bpb}) - require.Error(t, err) - assert.Contains(t, err.Error(), "unexpected BlockResponse") -} - -func TestValidateMsgBlockResponseV2RejectsPreUpgradeHeight(t *testing.T) { - oldHeight := upgrade.UpgradeBlockHeight - upgrade.SetUpgradeBlockHeight(10) - defer upgrade.SetUpgradeBlockHeight(oldHeight) - - blockV2 := &types.BlockV2{ - ParentHash: common.HexToHash("0x1"), - Hash: common.HexToHash("0x2"), - BaseFee: big.NewInt(1), - Number: 9, - } - - err := blocksync.ValidateMsg(&bcproto.BlockResponseV2{Block: types.BlockV2ToProto(blockV2)}) - require.Error(t, err) - assert.Contains(t, err.Error(), "unexpected BlockResponseV2") -} - -func TestValidateMsgBlockResponseV2AllowsUpgradeHeight(t *testing.T) { - oldHeight := upgrade.UpgradeBlockHeight - upgrade.SetUpgradeBlockHeight(10) - defer upgrade.SetUpgradeBlockHeight(oldHeight) - +// ValidateMsg validates block responses structurally only. The old "reject V1/V2 by upgrade +// height" checks were removed: the timestamp-driven boundary is unknown at message-validation +// time, so the blocksync reactor guards the V1/V2 type at consume time instead. A structurally +// valid block passes regardless of height; a malformed one is rejected. +func TestValidateMsgBlockResponseV2Structural(t *testing.T) { blockV2 := &types.BlockV2{ ParentHash: common.HexToHash("0x1"), Hash: common.HexToHash("0x2"), BaseFee: big.NewInt(1), Number: 10, } + require.NoError(t, blocksync.ValidateMsg(&bcproto.BlockResponseV2{Block: types.BlockV2ToProto(blockV2)})) - err := blocksync.ValidateMsg(&bcproto.BlockResponseV2{Block: types.BlockV2ToProto(blockV2)}) - require.NoError(t, err) + // Malformed V2 (bad hash length) -> rejected by structural validation. + bad := types.BlockV2ToProto(blockV2) + bad.Hash = []byte{0x1, 0x2} + require.Error(t, blocksync.ValidateMsg(&bcproto.BlockResponseV2{Block: bad})) } //nolint:lll // ignore line length in tests diff --git a/blocksync/reactor.go b/blocksync/reactor.go index b2ed8ec0519..6ae80f95e02 100644 --- a/blocksync/reactor.go +++ b/blocksync/reactor.go @@ -380,24 +380,44 @@ func (bcR *Reactor) L2Node() l2node.L2Node { return bcR.l2Node } -// syncBlockV2 handles syncing a single BlockV2 in sequencer mode. -// Verifies block signature using the history-aware verifier, then applies. -func (bcR *Reactor) syncBlockV2(block types.SyncableBlock, blocksSynced *uint64, lastRate *float64, lastHundred *time.Time) bool { +// verifyBlockV2 asserts block is a *types.BlockV2 and verifies its sequencer signature +// (history-aware lookup via IsSequencerAt). Pure check: no pool side effects — the caller +// decides what to redo/ban. Shared by the boundary check and syncBlockV2. +func (bcR *Reactor) verifyBlockV2(block types.SyncableBlock) (*types.BlockV2, error) { blockV2, ok := block.(*types.BlockV2) if !ok { - bcR.Logger.Error("Expected BlockV2 after upgrade", "height", block.GetHeight()) - bcR.pool.RedoRequest(block.GetHeight()) - return false + return nil, fmt.Errorf("expected BlockV2 at height %d, got V1", block.GetHeight()) } - - // Verify block signature (uses IsSequencerAt with history-aware lookup) if err := sequencer.VerifyBlockSignature(bcR.verifier, blockV2); err != nil { - bcR.Logger.Error("Block signature verification failed", "height", blockV2.Number, "err", err) - bcR.pool.RedoRequest(blockV2.GetHeight()) + return nil, fmt.Errorf("block signature verification failed at height %d: %w", blockV2.GetHeight(), err) + } + return blockV2, nil +} + +// redoAndBan re-queues the block's height and disconnects the peer that served it. +// Single-block granularity: the PBFT path bans only the mismatched block, while the +// one-time boundary check calls it for both blocks. +func (bcR *Reactor) redoAndBan(block types.SyncableBlock, reason string) { + bcR.Logger.Error("blocksync redo+ban", "height", block.GetHeight(), "reason", reason) + peerID := bcR.pool.RedoRequest(block.GetHeight()) + if peer := bcR.Switch.Peers().Get(peerID); peer != nil { + bcR.Switch.StopPeerForError(peer, fmt.Errorf("blocksync: %s", reason)) + } +} + +// syncBlockV2 handles syncing a single BlockV2 in sequencer mode. +// Verifies block signature using the history-aware verifier, then applies. +func (bcR *Reactor) syncBlockV2(block types.SyncableBlock, blocksSynced *uint64, lastRate *float64, lastHundred *time.Time) bool { + blockV2, err := bcR.verifyBlockV2(block) + if err != nil { + // Wrong type or bad signature == the peer served bad/forged data: redo + ban. + bcR.redoAndBan(block, fmt.Sprintf("BlockV2 verification failed: %v", err)) return false } - // Apply BlockV2 via stateV2 + // Apply BlockV2 via stateV2. The block is already signature-verified (authentic), so an + // apply failure is almost always a local issue (geth state), not the peer's fault — redo + // without banning to avoid dropping honest peers during a local hiccup. if err := bcR.stateV2.ApplyBlock(blockV2); err != nil { bcR.Logger.Error("Failed to apply BlockV2", "height", blockV2.Number, "err", err) bcR.pool.RedoRequest(blockV2.GetHeight()) @@ -612,18 +632,9 @@ FOR_LOOP: didProcessCh <- struct{}{} } - if firstSync.GetHeight()+1 == upgrade.UpgradeBlockHeight { - if err := bcR.handleTheLastTMBlock(state, firstSync); err != nil { - bcR.Logger.Error("Error in apply last tendermint block", "err", err) - peerID := bcR.pool.RedoRequest(firstSync.GetHeight()) - peer := bcR.Switch.Peers().Get(peerID) - if peer != nil { - bcR.Switch.StopPeerForError(peer, fmt.Errorf("handleTheLastTMBlock error: %v", err)) - } - continue FOR_LOOP - } - bcR.pool.PopRequest() - blocksSynced++ + // Detect and process the last PBFT block at the timestamp-driven upgrade + // boundary (runs at most once; no-op after the boundary height is recorded). + if bcR.checkAndHandleTheLastTMBlock(state, firstSync, secondSync, &blocksSynced) { continue FOR_LOOP } @@ -633,8 +644,20 @@ FOR_LOOP: continue FOR_LOOP } - // PBFT mode: type assert to *types.Block - first, second := firstSync.(*types.Block), secondSync.(*types.Block) + // PBFT mode: both blocks must be V1. A peer that serves a V2 block in this + // range is malicious or on a bad fork — ban only the mismatched one(s) (this + // is the hot path; avoid false-banning the peer that served a correct block). + first, ok1 := firstSync.(*types.Block) + second, ok2 := secondSync.(*types.Block) + if !ok1 || !ok2 { + if !ok1 { + bcR.redoAndBan(firstSync, "expected V1 block in PBFT range") + } + if !ok2 { + bcR.redoAndBan(secondSync, "expected V1 block in PBFT range") + } + continue FOR_LOOP + } firstParts, err := first.MakePartSet(types.BlockPartSizeBytes) if err != nil { @@ -727,10 +750,34 @@ func (bcR *Reactor) BroadcastStatusRequest() error { return nil } -// just skip the last tendermint block commit check. -// TODO: consider add the commit check in the future or using batch derivation reorg +func (bcR *Reactor) checkAndHandleTheLastTMBlock(state sm.State, first, second types.SyncableBlock, blocksSynced *uint64) bool { + if upgrade.UpgradeBlockHeight() >= 0 { + return false // boundary already discovered; only runs once + } + if first.GetBlockVersion() != types.Version1 || !upgrade.IsUpgradedByTs(first.GetTime()) { + return false // not the boundary; fall through to the normal sync paths + } + if _, err := bcR.verifyBlockV2(second); err != nil { + bcR.redoAndBan(first, fmt.Sprintf("boundary check failed: %v", err)) + bcR.redoAndBan(second, fmt.Sprintf("boundary: second must be a valid V2 block: %v", err)) + return true + } + if err := bcR.handleTheLastTMBlock(state, first); err != nil { + bcR.redoAndBan(first, fmt.Sprintf("apply last tendermint block: %v", err)) + bcR.redoAndBan(second, fmt.Sprintf("boundary rolled back after first failed: %v", err)) + return true + } + upgrade.SetUpgradeBlockHeight(first.GetHeight()) + bcR.pool.PopRequest() + *blocksSynced++ + return true +} + func (bcR *Reactor) handleTheLastTMBlock(state sm.State, lastSyncable types.SyncableBlock) error { - last := lastSyncable.(*types.Block) + last, ok := lastSyncable.(*types.Block) + if !ok { + return fmt.Errorf("handleTheLastTMBlock: expected V1 block at height %d", lastSyncable.GetHeight()) + } lastParts, err := last.MakePartSet(types.BlockPartSizeBytes) if err != nil { bcR.Logger.Error( diff --git a/consensus/reactor.go b/consensus/reactor.go index 18d260eaa43..531139640f8 100644 --- a/consensus/reactor.go +++ b/consensus/reactor.go @@ -81,7 +81,7 @@ func (conR *Reactor) OnStart() error { if upgrade.IsUpgraded(conR.conS.Height) { conR.Logger.Info("Already upgraded to sequencer mode, consensus reactor will not start", "height", conR.conS.Height, - "upgradeHeight", upgrade.UpgradeBlockHeight) + "upgradeHeight", upgrade.UpgradeBlockHeight()) return nil } diff --git a/consensus/state.go b/consensus/state.go index 08e9c76f8ce..da8562ab15c 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -1809,10 +1809,13 @@ func (cs *State) finalizeCommit(height int64) { } // Check for upgrade to sequencer mode - if upgrade.IsUpgraded(cs.Height) { - logger.Info("Upgrade height reached, switching to sequencer mode", - "height", cs.Height, - "upgradeHeight", upgrade.UpgradeBlockHeight) + if upgrade.IsUpgradedByTs(block.Time.UnixMilli()) { + // persistent the upgradeHeight, load it into upgradeHeight when startup + upgrade.SetUpgradeBlockHeight(height) + + logger.Info("Upgrade time reached, switching to sequencer mode", + "height", height, + "upgradeTime", upgrade.UpgradeBlockTime()) // Stop consensus state first. // diff --git a/node/node.go b/node/node.go index a6d0744aa36..8c3b2f509de 100644 --- a/node/node.go +++ b/node/node.go @@ -804,6 +804,14 @@ func NewNode( sm.StoreOptions{DiscardABCIResponses: config.Storage.DiscardABCIResponses}, ) + // Restore the persisted sequencer-upgrade boundary (if any) and wire the store so future + // SetUpgradeBlockHeight calls (finalizeCommit / blocksync) persist it automatically. + // A DB read failure here is fatal: booting with a reset boundary would let the node + // re-run PBFT past the upgrade. + if err := upgrade.SetStore(stateDB); err != nil { + return nil, err + } + state, genDoc, err := LoadStateFromDBOrGenesisDocProvider(stateDB, genesisDocProvider) if err != nil { return nil, err diff --git a/sequencer/interfaces.go b/sequencer/interfaces.go index b9192ff4a2c..542ff4b98b6 100644 --- a/sequencer/interfaces.go +++ b/sequencer/interfaces.go @@ -25,11 +25,6 @@ var ( type SequencerVerifier interface { // IsSequencerAt checks if addr was the valid sequencer at the given L2 block height. IsSequencerAt(addr common.Address, l2Height uint64) (bool, error) - - // VerificationStartHeight returns the L2 block height from which V2 signature - // verification is enforced (= upgradeBlockHeight). Blocks below this height are - // PBFT blocks and skip V2 verification. Returns math.MaxUint64 if not configured. - VerificationStartHeight() uint64 } // Signer interface for sequencer block signing diff --git a/sequencer/state_v2.go b/sequencer/state_v2.go index c5f2e13f501..4fb5ce139ae 100644 --- a/sequencer/state_v2.go +++ b/sequencer/state_v2.go @@ -12,7 +12,7 @@ import ( const ( // BlockInterval is the fallback interval for empty blocks (no txs). - BlockInterval = 3000 * time.Millisecond + BlockInterval = 5000 * time.Millisecond // FastBlockInterval is the txpool polling interval. // When pending txs are found, a block is produced immediately. FastBlockInterval = 300 * time.Millisecond diff --git a/sequencer/state_v2_test.go b/sequencer/state_v2_test.go index 054f57bc192..f95dfab315a 100644 --- a/sequencer/state_v2_test.go +++ b/sequencer/state_v2_test.go @@ -34,7 +34,6 @@ func (m *mockSignerImpl) Address() common.Address { // mockSequencerVerifier is a mock implementation of SequencerVerifier for testing. type mockSequencerVerifier struct { isSequencer bool - startHeight uint64 err error } @@ -45,10 +44,6 @@ func (m *mockSequencerVerifier) IsSequencerAt(addr common.Address, l2Height uint return m.isSequencer, nil } -func (m *mockSequencerVerifier) VerificationStartHeight() uint64 { - return m.startHeight -} - // mockSequencerHA is a mock implementation of SequencerHA for testing. type mockSequencerHA struct { leader bool diff --git a/types/block.go b/types/block.go index be6ae7806b2..1c8fb351f44 100644 --- a/types/block.go +++ b/types/block.go @@ -59,6 +59,10 @@ func (b *Block) GetHash() []byte { return b.Hash().Bytes() } +func (b *Block) GetTime() int64 { return b.Header.Time.UnixMilli() } + +func (b *Block) GetBlockVersion() BlockVersion { return Version1 } + // Ensure Block implements SyncableBlock var _ SyncableBlock = (*Block)(nil) diff --git a/types/block_v2.go b/types/block_v2.go index ce447a3b3be..70f22d613e6 100644 --- a/types/block_v2.go +++ b/types/block_v2.go @@ -2,10 +2,9 @@ package types import ( "errors" - "math/big" - "github.com/morph-l2/go-ethereum/common" seqproto "github.com/tendermint/tendermint/proto/tendermint/sequencer" + "math/big" ) // BlockV2 represents the block format after upgrade to centralized sequencer mode. @@ -49,11 +48,26 @@ func (b *BlockV2) GetHash() []byte { return b.Hash.Bytes() } +func (b *BlockV2) GetTime() int64 { return int64(b.Timestamp) * 1000 } + +func (b *BlockV2) GetBlockVersion() BlockVersion { return Version2 } + +// BlockVersion identifies the wire format of a SyncableBlock: Version1 is the +// PBFT-era tendermint block, Version2 is the post-upgrade sequencer block. +type BlockVersion uint8 + +const ( + Version1 BlockVersion = 1 + Version2 BlockVersion = 2 +) + // SyncableBlock is an interface that both old Block and new BlockV2 can implement // for compatibility in the block pool. type SyncableBlock interface { GetHeight() int64 GetHash() []byte + GetTime() int64 + GetBlockVersion() BlockVersion } // Ensure BlockV2 implements SyncableBlock diff --git a/upgrade/upgrade.go b/upgrade/upgrade.go index 80222b24e12..3037ef438bc 100644 --- a/upgrade/upgrade.go +++ b/upgrade/upgrade.go @@ -1,23 +1,114 @@ package upgrade -// Hardcoded upgrade parameters +import ( + "encoding/binary" + "fmt" + "sync/atomic" +) + +// Upgrade parameters. These are written at runtime by the consensus / blocksync paths and read +// concurrently by multiple reactors and the derivation loop, so all access goes through atomic +// load/store. The variables are unexported; callers use the accessor functions below rather than +// touching them directly, which keeps every read/write race-free. var ( - // UpgradeBlockHeight is the block height at which the consensus switches to sequencer mode. - // Default is -1 (upgrade disabled). Set via --consensus.switchHeight flag. - // When < 0, the upgrade never activates. - UpgradeBlockHeight int64 = -1 + // upgradeBlockHeight is the block height at which the consensus switches to sequencer mode. + // Default is -1 (upgrade disabled). It is discovered at runtime once upgradeBlockTime is + // crossed (see finalizeCommit / blocksync) and persisted via the wired store so the boundary + // survives restarts. When < 0, the height-based upgrade check is disabled. + upgradeBlockHeight int64 = -1 + + // upgradeBlockTime is the block timestamp (Unix milliseconds) at which the consensus switches + // to sequencer mode. Set once at node startup via SetUpgradeBlockTime. When <= 0, the + // timestamp-based upgrade check is disabled. + upgradeBlockTime int64 + + // store, when wired via SetStore, persists upgradeBlockHeight across restarts. Set once in + // SetStore at node startup (before the consensus/blocksync goroutines start), then only read. + store Store + + // upgradeHeightKey is the key under which upgradeBlockHeight is persisted. It is a plain key + // in the state DB and does not collide with state-store keys (stateKey, validatorsKey, ...). + upgradeHeightKey = []byte("upgrade/block_height") ) -// IsUpgraded returns true if the given height is at or after the upgrade height. -// Returns false when UpgradeBlockHeight < 0 (upgrade disabled). +// Store is the minimal key/value subset of the node's database that the upgrade package needs. +// Keeping it tiny lets this package stay dependency-light (no tm-db import) and trivially testable +// with an in-memory map. tm-db.DB satisfies it. +type Store interface { + Get([]byte) ([]byte, error) + Set([]byte, []byte) error +} + +// UpgradeBlockHeight returns the current upgrade block height (-1 when not yet discovered). +func UpgradeBlockHeight() int64 { return atomic.LoadInt64(&upgradeBlockHeight) } + +// UpgradeBlockTime returns the configured upgrade block time in Unix milliseconds (0 = disabled). +func UpgradeBlockTime() int64 { return atomic.LoadInt64(&upgradeBlockTime) } + +// SetStore wires persistence and restores any previously persisted upgrade height. +// Call once at node startup, before consensus / blocksync run. +// +// Fail-hard policy on the critical upgrade-height data: +// - a DB read error (IO) -> return error (treating it as "nothing persisted" would silently +// reset the boundary and let the node re-run PBFT past the upgrade after a restart); +// - a non-empty value of unexpected length -> corrupt -> return error rather than ignore it; +// - an absent key (len 0) is normal on first run and leaves the height at its current value. +func SetStore(s Store) error { + store = s + bz, err := s.Get(upgradeHeightKey) + if err != nil { + return fmt.Errorf("upgrade: read persisted block height: %w", err) + } + switch len(bz) { + case 0: + // absent: first run, nothing to restore. + case 8: + atomic.StoreInt64(&upgradeBlockHeight, int64(binary.BigEndian.Uint64(bz))) + default: + return fmt.Errorf("upgrade: corrupt persisted block height: got %d bytes, want 8", len(bz)) + } + return nil +} + +// IsUpgraded returns true if the given height is past the upgrade height. +// Returns false when the upgrade height is not yet set (< 0, upgrade disabled). func IsUpgraded(height int64) bool { - if UpgradeBlockHeight < 0 { + h := atomic.LoadInt64(&upgradeBlockHeight) + if h < 0 { + return false + } + return height > h +} + +// IsUpgradedByTs returns true if the given block timestamp (Unix ms) is at or after the +// upgrade time. Returns false when the upgrade time is not set (<= 0, upgrade disabled). +func IsUpgradedByTs(timestamp int64) bool { + t := atomic.LoadInt64(&upgradeBlockTime) + if t <= 0 { return false } - return height >= UpgradeBlockHeight + return timestamp >= t } -// SetUpgradeBlockHeight sets the upgrade block height. +// SetUpgradeBlockHeight sets the upgrade block height and, if a store is wired, persists it so the +// discovered boundary survives restarts. +// +// The height is critical consensus state: a node that loses it on restart re-runs PBFT past the +// upgrade or mishandles the boundary. If the write fails we therefore panic rather than continue +// with in-memory-only state that would silently diverge after a restart (same fail-hard policy as +// the signature store on a failed persist). func SetUpgradeBlockHeight(height int64) { - UpgradeBlockHeight = height + atomic.StoreInt64(&upgradeBlockHeight, height) + if store != nil { + var bz [8]byte + binary.BigEndian.PutUint64(bz[:], uint64(height)) + if err := store.Set(upgradeHeightKey, bz[:]); err != nil { + panic(fmt.Sprintf("upgrade: persist block height %d failed: %v", height, err)) + } + } +} + +// SetUpgradeBlockTime sets the upgrade block time (Unix milliseconds). +func SetUpgradeBlockTime(timestamp int64) { + atomic.StoreInt64(&upgradeBlockTime, timestamp) } diff --git a/upgrade/upgrade_test.go b/upgrade/upgrade_test.go new file mode 100644 index 00000000000..12de95f9303 --- /dev/null +++ b/upgrade/upgrade_test.go @@ -0,0 +1,162 @@ +package upgrade + +import ( + "errors" + "testing" +) + +// memStore is an in-memory Store for tests. +type memStore map[string][]byte + +func (m memStore) Get(k []byte) ([]byte, error) { return m[string(k)], nil } +func (m memStore) Set(k, v []byte) error { m[string(k)] = v; return nil } + +// errStore is a Store whose Get/Set can be made to fail, to exercise the +// fail-hard policy on the critical upgrade-height data. +type errStore struct { + data map[string][]byte + getErr error + setErr error +} + +func (s *errStore) Get(k []byte) ([]byte, error) { + if s.getErr != nil { + return nil, s.getErr + } + return s.data[string(k)], nil +} + +func (s *errStore) Set(k, v []byte) error { + if s.setErr != nil { + return s.setErr + } + s.data[string(k)] = v + return nil +} + +// reset restores package globals so tests do not leak state into each other. +func reset() { + upgradeBlockHeight = -1 + upgradeBlockTime = 0 + store = nil +} + +func TestIsUpgradedByTs(t *testing.T) { + reset() + defer reset() + + // Disabled when unset. + if IsUpgradedByTs(1) { + t.Fatal("expected disabled when upgrade time <= 0") + } + + SetUpgradeBlockTime(1000) + cases := []struct { + ts int64 + want bool + }{ + {999, false}, // before + {1000, true}, // exactly at boundary (>=) + {1001, true}, // after + } + for _, c := range cases { + if got := IsUpgradedByTs(c.ts); got != c.want { + t.Fatalf("IsUpgradedByTs(%d) = %v, want %v", c.ts, got, c.want) + } + } +} + +func TestSetUpgradeBlockHeightPersists(t *testing.T) { + reset() + defer reset() + + db := memStore{} + + // Fresh store, nothing persisted: load leaves the -1 sentinel. + if err := SetStore(db); err != nil { + t.Fatalf("SetStore: %v", err) + } + if UpgradeBlockHeight() != -1 { + t.Fatalf("expected -1 after empty load, got %d", UpgradeBlockHeight()) + } + + // Discovering the boundary persists it. + SetUpgradeBlockHeight(42) + if UpgradeBlockHeight() != 42 { + t.Fatalf("expected 42, got %d", UpgradeBlockHeight()) + } + + // Simulate a restart: clear globals, re-wire the same DB, expect the height restored. + upgradeBlockHeight = -1 + store = nil + if err := SetStore(db); err != nil { + t.Fatalf("SetStore: %v", err) + } + if UpgradeBlockHeight() != 42 { + t.Fatalf("expected 42 restored after restart, got %d", UpgradeBlockHeight()) + } + if !IsUpgraded(43) || IsUpgraded(42) { + t.Fatalf("IsUpgraded boundary wrong after restore: 42=%v 43=%v", IsUpgraded(42), IsUpgraded(43)) + } +} + +func TestSetUpgradeBlockHeightNoStore(t *testing.T) { + reset() + defer reset() + + // No store wired: must not panic, just sets the global. + SetUpgradeBlockHeight(7) + if UpgradeBlockHeight() != 7 { + t.Fatalf("expected 7, got %d", UpgradeBlockHeight()) + } +} + +// A DB read failure must propagate (not be swallowed as "nothing persisted"), +// otherwise a restart would silently reset the boundary. +func TestSetStoreReadErrorPropagates(t *testing.T) { + reset() + defer reset() + + s := &errStore{getErr: errors.New("db read failed")} + if err := SetStore(s); err == nil { + t.Fatal("expected error when DB read fails") + } + if UpgradeBlockHeight() != -1 { + t.Fatalf("expected -1 unchanged on read error, got %d", UpgradeBlockHeight()) + } +} + +// A persisted value of unexpected (non-zero, non-8) length is corruption and must error, +// not be silently ignored (which would leave the boundary at the wrong value). +func TestSetStoreCorruptLengthErrors(t *testing.T) { + reset() + defer reset() + + db := memStore{} + db[string(upgradeHeightKey)] = []byte{0x1, 0x2, 0x3, 0x4} // 4 bytes -> corrupt + if err := SetStore(db); err == nil { + t.Fatal("expected error on corrupt (non-8-byte) persisted height") + } + if UpgradeBlockHeight() != -1 { + t.Fatalf("expected -1 unchanged on corrupt value, got %d", UpgradeBlockHeight()) + } +} + +// A failed persist of the critical upgrade height must stop the node (panic), +// not continue with in-memory-only state that diverges after a restart. +func TestSetUpgradeBlockHeightPanicsOnWriteFailure(t *testing.T) { + reset() + defer reset() + + s := &errStore{data: map[string][]byte{}, setErr: errors.New("disk full")} + if err := SetStore(s); err != nil { + t.Fatalf("SetStore: %v", err) + } + + defer func() { + if r := recover(); r == nil { + t.Fatal("expected panic when persisting block height fails") + } + }() + SetUpgradeBlockHeight(99) +} From 8aa03b55293fe7298eed8c02a7ea41fee0cec89d Mon Sep 17 00:00:00 2001 From: running-tomato <31307926+tomatoishealthy@users.noreply.github.com> Date: Tue, 23 Jun 2026 10:28:48 +0800 Subject: [PATCH 15/15] fix: audit phase 3 (#40) * fix: audit phase 3 * Apply suggestion from @chengwenxi Co-authored-by: vincent * fix: separate peer removal from banning --------- Co-authored-by: allen.wu Co-authored-by: vincent --- blocksync/msgs_test.go | 3 ++- blocksync/pool.go | 23 +++++++++++++++-------- blocksync/pool_test.go | 36 ++++++++++++++++++++++++++++++++++++ l2node/mock.go | 15 +++++++++++---- node/node.go | 14 +++++++++++++- sequencer/block_verify.go | 5 +++-- sequencer/hash_set.go | 3 ++- types/block_v2.go | 9 ++++++++- 8 files changed, 90 insertions(+), 18 deletions(-) diff --git a/blocksync/msgs_test.go b/blocksync/msgs_test.go index a38d06c388d..9c8f5160843 100644 --- a/blocksync/msgs_test.go +++ b/blocksync/msgs_test.go @@ -89,6 +89,7 @@ func TestValidateMsgBlockResponseV2Structural(t *testing.T) { blockV2 := &types.BlockV2{ ParentHash: common.HexToHash("0x1"), Hash: common.HexToHash("0x2"), + Signature: make([]byte, 65), BaseFee: big.NewInt(1), Number: 10, } @@ -119,7 +120,7 @@ func TestBlockchainMessageVectors(t *testing.T) { BlockRequest: &bcproto.BlockRequest{Height: math.MaxInt64}}}, "0a0a08ffffffffffffffff7f"}, {"BlockResponseMessage", &bcproto.Message{Sum: &bcproto.Message_BlockResponse{ - BlockResponse: &bcproto.BlockResponse{Block: bpb}}}, "1a700a6e0a5b0a02080b1803220b088092b8c398feffffff012a0212003a20c4da88e876062aa1543400d50d0eaa0dac88096057949cfb7bca7f3a48c04bf96a20e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855120d0a0b48656c6c6f20576f726c641a00"}, + BlockResponse: &bcproto.BlockResponse{Block: bpb}}}, "1a700a6e0a5b0a02080b1803220b088092b8c398feffffff012a021a003a20fb5fddfb1922fa75ac66398d8adeb962739e0259e7b3e811a81449683a3b78cd6a20e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855120d0a0b48656c6c6f20576f726c641a00"}, {"NoBlockResponseMessage", &bcproto.Message{Sum: &bcproto.Message_NoBlockResponse{ NoBlockResponse: &bcproto.NoBlockResponse{Height: 1}}}, "12020801"}, {"NoBlockResponseMessage", &bcproto.Message{Sum: &bcproto.Message_NoBlockResponse{ diff --git a/blocksync/pool.go b/blocksync/pool.go index 1587564e73f..fe411226e70 100644 --- a/blocksync/pool.go +++ b/blocksync/pool.go @@ -335,11 +335,14 @@ func (pool *BlockPool) GetPeerIDs() []p2p.ID { // It also enforces basic sanity checks against malicious or buggy peers // (see spec-003-blocksync-malicious-peer-fix): // -// - A peer reporting base > height is structurally impossible and is banned +// - A peer reporting base > height is structurally impossible and is rejected // (CometBFT issue #5801 hardening). // - An existing peer reporting a strictly lower height or base than what it -// reported previously is removed from the pool and banned to prevent +// reported previously is removed from the pool to prevent // poisoning of maxPeerHeight (CVE-2025-24371). +// - Non-persistent peers that report invalid ranges are also banned. +// Persistent peers are removed but not added to bannedPeers so the switch +// can reconnect them later without accepting the invalid range. // - A peer that has been banned recently is silently ignored if it tries to // re-introduce itself before its ban expires. // @@ -350,25 +353,29 @@ func (pool *BlockPool) SetPeerRange(srcPeer p2p.Peer, base int64, height int64) pool.mtx.Lock() defer pool.mtx.Unlock() - if base > height && !srcPeer.IsPersistent() { - pool.Logger.Info("Peer reporting base greater than height; banning", + if base > height { + pool.Logger.Info("Peer reporting base greater than height; removing", "peer", peerID, "base", base, "height", height) if _, exists := pool.peers[peerID]; exists { pool.removePeer(peerID) } - pool.banPeer(peerID) + if !srcPeer.IsPersistent() { + pool.banPeer(peerID) + } return } peer := pool.peers[peerID] if peer != nil { - if (height < peer.height || base < peer.base) && !srcPeer.IsPersistent() { - pool.Logger.Info("Peer reporting decreasing height or base; removing and banning", + if height < peer.height || base < peer.base { + pool.Logger.Info("Peer reporting decreasing height or base; removing", "peer", peerID, "prevHeight", peer.height, "height", height, "prevBase", peer.base, "base", base) pool.removePeer(peerID) - pool.banPeer(peerID) + if !srcPeer.IsPersistent() { + pool.banPeer(peerID) + } return } peer.base = base diff --git a/blocksync/pool_test.go b/blocksync/pool_test.go index eb62e5d2b7e..4d4a1217670 100644 --- a/blocksync/pool_test.go +++ b/blocksync/pool_test.go @@ -55,6 +55,9 @@ func (p *fakePeer) String() string { return string(p.id) } func (p *fakePeer) SetLogger(log.Logger) {} func mkPeer(id p2p.ID) p2p.Peer { return &fakePeer{id: id} } +func mkPersistentPeer(id p2p.ID) p2p.Peer { + return &fakePeer{id: id, persistent: true} +} type testPeer struct { id p2p.ID @@ -351,6 +354,39 @@ func TestSetPeerRange_BaseGreaterThanHeight(t *testing.T) { require.EqualValues(t, 0, pool.MaxPeerHeight()) } +func TestSetPeerRange_PersistentPeerInvalidRangeRemovedButNotBanned(t *testing.T) { + requestsCh := make(chan BlockRequest, 10) + errorsCh := make(chan peerError, 10) + + pool := NewBlockPool(1, requestsCh, errorsCh) + pool.SetLogger(log.TestingLogger()) + + peerID := p2p.ID("persistent-invalid") + pool.SetPeerRange(mkPersistentPeer(peerID), 500, 100) + require.Nil(t, pool.peers[peerID], "persistent peer with base > height must not be added") + require.False(t, pool.isPeerBanned(peerID), "persistent peer must not be written to bannedPeers") + require.EqualValues(t, 0, pool.MaxPeerHeight()) +} + +func TestSetPeerRange_PersistentPeerDecreasingHeightRemovedButNotBanned(t *testing.T) { + requestsCh := make(chan BlockRequest, 10) + errorsCh := make(chan peerError, 10) + + pool := NewBlockPool(1, requestsCh, errorsCh) + pool.SetLogger(log.TestingLogger()) + + peerID := p2p.ID("persistent-decreasing") + pool.SetPeerRange(mkPersistentPeer(peerID), 1, 1000) + require.EqualValues(t, 1000, pool.MaxPeerHeight()) + require.NotNil(t, pool.peers[peerID]) + + pool.SetPeerRange(mkPersistentPeer(peerID), 1, 1) + require.Nil(t, pool.peers[peerID], "persistent peer must be removed after lowering height") + require.False(t, pool.isPeerBanned(peerID), "persistent peer must not be written to bannedPeers") + require.EqualValues(t, 0, pool.MaxPeerHeight(), + "maxPeerHeight must drop to 0 once the invalid persistent peer is removed") +} + // TestBanPeer_PreventsReentry verifies that a banned peer cannot reintroduce // itself with a fresh — even otherwise valid — StatusResponse during the // ban window. diff --git a/l2node/mock.go b/l2node/mock.go index 9b7a54d5e02..941eed212bb 100644 --- a/l2node/mock.go +++ b/l2node/mock.go @@ -5,6 +5,7 @@ import ( "fmt" "os" + "github.com/morph-l2/go-ethereum/common" tmjson "github.com/tendermint/tendermint/libs/json" tmos "github.com/tendermint/tendermint/libs/os" tmrand "github.com/tendermint/tendermint/libs/rand" @@ -14,8 +15,9 @@ var _ L2Node = &MockL2Node{} type MockL2Node struct { TxNumber int - ValidatorSetFile string // used to control the validator set updates - maxAppliedHeight uint64 // tracks highest block applied (for V2 idempotent check) + ValidatorSetFile string // used to control the validator set updates + maxAppliedHeight uint64 // tracks highest block applied (for V2 idempotent check) + maxAppliedHash common.Hash // hash at maxAppliedHeight, so same-height reorg blocks aren't skipped } func NewMockL2Node(n int, validatorSetFile string) L2Node { @@ -141,10 +143,15 @@ func (l *MockL2Node) RequestBlockDataV2(parentHash []byte) (*BlockV2, bool, erro } func (l *MockL2Node) ApplyBlockV2(block *BlockV2) (bool, error) { - if block.Number <= l.maxAppliedHeight { - return false, nil // idempotent skip + // Skip only a true duplicate (same height AND same hash). A same-height + // block with a different hash is a reorg and must be applied; otherwise + // reorg unit tests get false-positive passes. + if block.Number < l.maxAppliedHeight || + (block.Number == l.maxAppliedHeight && block.Hash == l.maxAppliedHash) { + return false, nil } l.maxAppliedHeight = block.Number + l.maxAppliedHash = block.Hash return true, nil } diff --git a/node/node.go b/node/node.go index 8c3b2f509de..2a40c2d16b0 100644 --- a/node/node.go +++ b/node/node.go @@ -246,24 +246,36 @@ type Node struct { } func initDBs(config *cfg.Config, dbProvider DBProvider) (blockStore *store.BlockStore, stateDB dbm.DB, sigStore *sequencer.SignatureStore, err error) { + var opened []dbm.DB + defer func() { + if err != nil { + for _, db := range opened { + _ = db.Close() + } + blockStore, stateDB, sigStore = nil, nil, nil + } + }() + var blockStoreDB dbm.DB blockStoreDB, err = dbProvider(&DBContext{"blockstore", config}) if err != nil { return } + opened = append(opened, blockStoreDB) blockStore = store.NewBlockStore(blockStoreDB) stateDB, err = dbProvider(&DBContext{"state", config}) if err != nil { return } + opened = append(opened, stateDB) - // TODO: add a new store, notify the-3rd parties to integrate var sigDB dbm.DB sigDB, err = dbProvider(&DBContext{"signatures", config}) if err != nil { return } + opened = append(opened, sigDB) sigStore = sequencer.NewSignatureStore(sigDB) return diff --git a/sequencer/block_verify.go b/sequencer/block_verify.go index 590dd3d4be8..a0ba5598150 100644 --- a/sequencer/block_verify.go +++ b/sequencer/block_verify.go @@ -80,8 +80,9 @@ func VerifyBlockSignature(verifier SequencerVerifier, block *BlockV2) error { return fmt.Errorf("%w: verifier not configured", ErrVerifierUnavailable) } - if len(block.Signature) == 0 { - return fmt.Errorf("%w: missing signature at height %d", ErrInvalidSignature, block.Number) + if len(block.Signature) != crypto.SignatureLength { + return fmt.Errorf("%w: invalid signature length %d (want %d) at height %d", + ErrInvalidSignature, len(block.Signature), crypto.SignatureLength, block.Number) } computedHash, err := computeBlockHash(block) diff --git a/sequencer/hash_set.go b/sequencer/hash_set.go index bb10296dbd8..64b35e9a8a5 100644 --- a/sequencer/hash_set.go +++ b/sequencer/hash_set.go @@ -6,7 +6,8 @@ import ( "github.com/morph-l2/go-ethereum/common" ) -// HashSet is a fixed-capacity set for common.Hash with LRU eviction. +// HashSet is a fixed-capacity set for common.Hash with FIFO eviction +// (insertion-order ring buffer; entries are not re-ordered on access). type HashSet struct { items []common.Hash index map[common.Hash]int diff --git a/types/block_v2.go b/types/block_v2.go index 70f22d613e6..4a6099880bf 100644 --- a/types/block_v2.go +++ b/types/block_v2.go @@ -2,9 +2,12 @@ package types import ( "errors" + "fmt" + "math/big" + "github.com/morph-l2/go-ethereum/common" + "github.com/morph-l2/go-ethereum/crypto" seqproto "github.com/tendermint/tendermint/proto/tendermint/sequencer" - "math/big" ) // BlockV2 represents the block format after upgrade to centralized sequencer mode. @@ -87,6 +90,10 @@ func BlockV2FromProto(pb *seqproto.BlockV2) (*BlockV2, error) { return nil, errors.New("invalid block hash length") } + if len(pb.Signature) != crypto.SignatureLength { + return nil, fmt.Errorf("invalid signature length: got %d, want %d", len(pb.Signature), crypto.SignatureLength) + } + baseFee := new(big.Int) if len(pb.BaseFee) > 0 { baseFee.SetBytes(pb.BaseFee)