From ab2f71df973df268655844013d8be090aa7de8e9 Mon Sep 17 00:00:00 2001 From: corey Date: Fri, 26 Jun 2026 10:34:44 +0800 Subject: [PATCH 1/2] test(executor): QA chaos hook to fork a fullnode and observe self-heal Continuously fork the local tip in the block-production path (distinct timestamp so the fork is guaranteed to diverge from canonical) and emit staged REORG-TEST [1/3] FORK / [2/3] DETECT / [3/3] RECOVER logs so QA can watch a fullnode diverge and heal via L1 derivation. QA-only; not for production. Co-authored-by: Cursor --- node/core/executor.go | 93 +++++++++++++++++++++++++++------- node/derivation/derivation.go | 36 +++++++++++-- node/types/retryable_client.go | 8 ++- 3 files changed, 114 insertions(+), 23 deletions(-) diff --git a/node/core/executor.go b/node/core/executor.go index 0c581973e..d73e9a881 100644 --- a/node/core/executor.go +++ b/node/core/executor.go @@ -6,6 +6,7 @@ import ( "fmt" "math/big" syncos "sync" + "time" "github.com/morph-l2/go-ethereum/accounts/abi/bind" "github.com/morph-l2/go-ethereum/common" @@ -130,6 +131,61 @@ func NewExecutor(newSyncFunc NewSyncerFunc, config *Config, tmPubKey crypto.PubK return executor, nil } +// QA TEST BRANCH ONLY (test/fullnode-fork-selfheal). +// +// This node forks the chain CONTINUOUSLY: for every canonical block delivered at +// the tip, instead of applying it we assemble our OWN block at the same height +// (timestamp pinned to the parent's, valid post-Emerald, so the hash is guaranteed +// to differ from the canonical sibling) and apply that. tendermint's StateV2 still +// advances using the canonical block it handed us, so geth runs a private fork +// chain at the unsafe tip while StateV2 tracks canonical heights. +// +// Recovery is the REAL self-heal path: as each L1 batch covering the forked range +// is derived, local-verify finds a blob mismatch and runs deriveForce (wrapped in +// withReactorsQuiesced -> SetCanonical -> StartReactorsAfterReorg), rewriting that +// batch back onto canonical and re-syncing StateV2 from geth. The tip then forks +// again -> the node is permanently "always forking, always reorging". No reorg +// logic is modified; this only swaps the applied block + adds logs. +// +// SAFETY: +// - MUST run only on a dedicated fullnode. Never on a sequencer or in production. +// - StateV2 stores the block signature keyed by the CANONICAL hash, but geth +// holds the fork block at that height, so this node has no valid signature for +// its own unsafe (fork) blocks. Never let any peer use this node as a P2P data +// source for the unsafe range -- they will reject the signature / disconnect. + +// injectForkBlockLocked assembles this node's own block on the given geth head +// (timestamp pinned to the parent's, guaranteed distinct from the canonical +// sibling) and applies it, extending the local fork by one block. The canonical +// block is intentionally dropped, so this skips updateNextL1MessageIndex -- safe +// ONLY because a fullnode never produces blocks (that index is read only on the +// sequencer's RequestBlockDataV2 path). Caller must hold e.mu. +// +// Edge case: if canonical produced an empty block in the same second (so it too +// has time == parent.Time and identical empty content), the assembled fork would +// hash-collide with canonical and not actually diverge -- harmless, just no [1/3]. +func (e *Executor) injectForkBlockLocked(head *eth.Block, canonicalNumber uint64) (bool, error) { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + forkTimestamp := head.Time() + l2Block, err := e.l2Client.AssembleL2BlockV2WithTimestamp(ctx, head.Hash(), forkTimestamp, nil) + if err != nil { + return false, fmt.Errorf("FORK-INJECT: assemble fork block: %w", err) + } + if _, err := e.l2Client.NewL2BlockV2(context.Background(), l2Block); err != nil { + return false, fmt.Errorf("FORK-INJECT: apply fork block: %w", err) + } + e.metrics.Height.Set(float64(l2Block.Number)) + e.logger.Info("########## REORG-TEST [1/3] FORKED — replaced canonical block with own fork ##########", + "forkNumber", l2Block.Number, + "forkHash", l2Block.Hash.Hex(), + "parentHash", head.Hash().Hex(), + "forkTimestamp", l2Block.Timestamp, + "droppedCanonicalNumber", canonicalNumber) + return true, nil +} + var _ l2node.L2Node = (*Executor)(nil) func (e *Executor) RequestBlockData(height int64) (txs [][]byte, blockMeta []byte, collectedL1Msgs bool, err error) { @@ -454,27 +510,30 @@ func (e *Executor) RequestBlockDataV2(parentHashBytes []byte) (*l2node.BlockV2, func (e *Executor) ApplyBlockV2(block *l2node.BlockV2) (applied bool, err error) { e.mu.Lock() defer e.mu.Unlock() - execBlock := blockV2ToExecutableL2Data(block) - // Reorg / idempotent detection: only check when incoming block height - // is at or below the current geth head (normal sequential blocks skip this). - currentHeight, chkErr := e.l2Client.BlockNumber(context.Background()) - if chkErr == nil && block.Number <= currentHeight { - existing, exErr := e.l2Client.BlockByNumber(context.Background(), big.NewInt(int64(block.Number))) - if exErr == nil && existing != nil { - if existing.Hash() == execBlock.Hash { - e.logger.Debug("ApplyBlockV2: idempotent skip", "number", execBlock.Number) - return false, nil - } - e.logger.Info("ApplyBlockV2: REORG detected", - "targetHeight", execBlock.Number, - "newHash", execBlock.Hash.Hex(), - "existingHash", existing.Hash().Hex(), - "currentHead", currentHeight, - ) + // QA TEST BRANCH ONLY (test/fullnode-fork-selfheal): continuously fork the + // tip. When the incoming canonical block extends our current head (number == + // head+1) we apply our OWN divergent block at that height instead. StateV2 + // still advances on the canonical block, so geth keeps a private fork at the + // unsafe tip until each L1 batch is derived and deriveForce reorgs it back + // (see derivation [2/3]/[3/3]). Only blocks that do NOT extend the tip fall + // through to the normal apply: idempotent re-applies (number <= head) and gaps + // (number > head+1). Note a catch-up block IS number == head+1, so it is forked + // too -- that is exactly how the tip re-diverges after each deriveForce reorg. + if head, hErr := e.l2Client.BlockByNumber(context.Background(), nil); hErr == nil && head != nil && block.Number == head.NumberU64()+1 { + // On assemble/apply failure there is no graceful fallback in the forked + // steady state: geth has no canonical parent (canonical N was never landed), + // so the canonical apply below errors and blocksync simply retries this + // height until assemble recovers. assemble rarely fails, so this is mostly + // a theoretical path. + if forked, fErr := e.injectForkBlockLocked(head, block.Number); fErr == nil { + return forked, nil + } else { + e.logger.Error("FORK-INJECT failed; this height will error and be retried by blocksync", "number", block.Number, "err", fErr) } } + execBlock := blockV2ToExecutableL2Data(block) if _, err := e.l2Client.NewL2BlockV2(context.Background(), execBlock); err != nil { e.logger.Error("failed to apply block v2", "number", execBlock.Number, diff --git a/node/derivation/derivation.go b/node/derivation/derivation.go index 19b524bb7..110203444 100644 --- a/node/derivation/derivation.go +++ b/node/derivation/derivation.go @@ -365,11 +365,16 @@ func (d *Derivation) derivationBlock(ctx context.Context) { "l2Latest", currentL2Latest) return } - // Scenario C: sequencer stopped → L1 blob fill-gap. - d.logger.Info("local verify: lastBlock missing and L2 head flat across polls; fallback to L1 blob fill-gap (scenario C)", + // Scenario C: sequencer stopped → L1 blob fill-gap. For a forked + // node stuck below the batch end this is also the recovery path: + // the local head is a fork lower than lastBlockNumber, so deriveForce + // rewrites/fills the batch from L1 back onto canonical. + preReorgHead, _ := d.l2Client.BlockNumber(ctx) + d.logger.Info("########## REORG-TEST [2/3] REORG DETECTED — local head below L1 batch end; triggering deriveForce fill-gap ##########", "batchIndex", batchInfo.batchIndex, - "lastBlockNumber", batchInfo.lastBlockNumber, - "l2Latest", currentL2Latest) + "localHead", preReorgHead, + "firstL2Block", batchInfo.firstBlockNumber, + "lastL2Block", batchInfo.lastBlockNumber) batchInfoFull, fetchErr := d.fetchRollupDataByTxHash(lg.TxHash, lg.BlockNumber) if fetchErr != nil { if errors.Is(fetchErr, types.ErrNotCommitBatchTx) { @@ -399,6 +404,10 @@ func (d *Derivation) derivationBlock(ctx context.Context) { "batchIndex", batchInfo.batchIndex, "error", err) return } + d.logger.Info("########## REORG-TEST [3/3] RECOVERED — deriveForce fill-gap rewrote batch onto canonical ##########", + "batchIndex", batchInfo.batchIndex, + "preReorgHead", preReorgHead, + "reorgedToBlock", lastHeader.Number.Uint64()) d.metrics.SetL2DeriveHeight(lastHeader.Number.Uint64()) d.metrics.SetSyncedBatchIndex(batchInfo.batchIndex) @@ -417,8 +426,11 @@ func (d *Derivation) derivationBlock(ctx context.Context) { } for i := range rebuilt { if rebuilt[i] != batchInfo.blobHashes[i] { - d.logger.Info("blob hash mismatch; triggering self-heal reorg", + d.logger.Info("########## REORG-TEST [2/3] REORG DETECTED — local blob mismatch vs L1 batch; triggering deriveForce self-heal ##########", "batchIndex", batchInfo.batchIndex, + "firstL2Block", batchInfo.firstBlockNumber, + "lastL2Block", batchInfo.lastBlockNumber, + "mismatchAtBlob", i, "expected", batchInfo.blobHashes[i].Hex(), "rebuilt", rebuilt[i].Hex()) @@ -429,6 +441,14 @@ func (d *Derivation) derivationBlock(ctx context.Context) { return } + // Snapshot the local head before reorg so the [3/3] log can + // show roughly how many blocks were rolled back / rewritten. + preReorgHead, _ := d.l2Client.BlockNumber(ctx) + depthApprox := uint64(0) + if preReorgHead >= batchInfo.firstBlockNumber { + depthApprox = preReorgHead - batchInfo.firstBlockNumber + 1 + } + // Quiesce blocksync + broadcast reactors via withReactorsQuiesced // so the deferred Start runs whether deriveForce succeeds // or fails — without it, a deriveForce error would leave @@ -443,6 +463,12 @@ func (d *Derivation) derivationBlock(ctx context.Context) { "batchIndex", batchInfo.batchIndex, "error", err) return } + d.logger.Info("########## REORG-TEST [3/3] RECOVERED — deriveForce rewrote batch onto canonical ##########", + "batchIndex", batchInfo.batchIndex, + "preReorgHead", preReorgHead, + "reorgedFromBlock", batchInfo.firstBlockNumber, + "reorgedToBlock", lastHeader.Number.Uint64(), + "depthApprox", depthApprox) break } } diff --git a/node/types/retryable_client.go b/node/types/retryable_client.go index ffe9f7f3f..fcfa3493b 100644 --- a/node/types/retryable_client.go +++ b/node/types/retryable_client.go @@ -305,7 +305,13 @@ func retryableError(err error) bool { // AssembleL2BlockV2 assembles a L2 block based on parent hash. func (rc *RetryableClient) AssembleL2BlockV2(ctx context.Context, parentHash common.Hash, transactions eth.Transactions) (ret *catalyst.ExecutableL2Data, err error) { - timestamp := uint64(time.Now().Unix()) + return rc.AssembleL2BlockV2WithTimestamp(ctx, parentHash, uint64(time.Now().Unix()), transactions) +} + +// AssembleL2BlockV2WithTimestamp assembles a L2 block on parentHash with an +// explicit timestamp. Used by the QA fork-injection test to deterministically +// diverge from the sequencer's canonical block (which uses time.Now()). +func (rc *RetryableClient) AssembleL2BlockV2WithTimestamp(ctx context.Context, parentHash common.Hash, timestamp uint64, transactions eth.Transactions) (ret *catalyst.ExecutableL2Data, err error) { if retryErr := backoff.Retry(func() error { resp, respErr := rc.authClient.AssembleL2BlockV2(ctx, parentHash, ×tamp, transactions) if respErr != nil { From 03bd02b9d769884f0a272b2b706c6e20d1849b51 Mon Sep 17 00:00:00 2001 From: corey Date: Fri, 26 Jun 2026 10:35:03 +0800 Subject: [PATCH 2/2] fix(derivation): make deriveForce verify local blocks before keeping them (#1008) Verify-driven deriveForce: walking from the canonical anchor (firstBlockNumber-1), keep each local block while its content matches the batch, then rewrite via NewSafeL2Block from the first divergent or missing height onward. Replaces the old skipNumber fill-gap, which blindly trusted local blocks and could grow a permanent shadow chain (correct tx content, wrong parent/state hashes) that never self-healed. This is the fix under test by the chaos hook in the preceding commit; it is the same change as PR #1008. Co-authored-by: Cursor --- node/derivation/batch_info.go | 41 +++++++++++ node/derivation/derivation.go | 130 +++++++++++++--------------------- 2 files changed, 92 insertions(+), 79 deletions(-) diff --git a/node/derivation/batch_info.go b/node/derivation/batch_info.go index 344e1cb9b..020c13acf 100644 --- a/node/derivation/batch_info.go +++ b/node/derivation/batch_info.go @@ -1,6 +1,7 @@ package derivation import ( + "bytes" "encoding/binary" "fmt" "math/big" @@ -254,6 +255,46 @@ func encodeTransactions(txs []*eth.Transaction) [][]byte { return enc } +// blockContentMatches reports whether a local L2 block carries the same +// content the batch committed for that height: timestamp, gas limit, base +// fee and the ordered tx list (L1 messages then L2 txs, by binary encoding). +// The batch has no parent hashes, so this is content-only; deriveForce pairs +// it with a canonical anchor to conclude a block is canonical. +func blockContentMatches(local *eth.Block, sd *catalyst.SafeL2Data) bool { + h := local.Header() + if h.Time != sd.Timestamp { + return false + } + if h.GasLimit != sd.GasLimit { + return false + } + if !baseFeeEqual(h.BaseFee, sd.BaseFee) { + return false + } + txs := local.Transactions() + if len(txs) != len(sd.Transactions) { + return false + } + for i, tx := range txs { + enc, err := tx.MarshalBinary() + if err != nil || !bytes.Equal(enc, sd.Transactions[i]) { + return false + } + } + return true +} + +// baseFeeEqual treats nil and zero as equal — ParseBatch normalises a zero +// base fee to nil, while a local header may carry an explicit zero. +func baseFeeEqual(a, b *big.Int) bool { + az := a == nil || a.Sign() == 0 + bz := b == nil || b.Sign() == 0 + if az || bz { + return az && bz + } + return a.Cmp(b) == 0 +} + type txQueue struct { txs eth.Transactions pointer int diff --git a/node/derivation/derivation.go b/node/derivation/derivation.go index 110203444..c913d0496 100644 --- a/node/derivation/derivation.go +++ b/node/derivation/derivation.go @@ -365,12 +365,13 @@ func (d *Derivation) derivationBlock(ctx context.Context) { "l2Latest", currentL2Latest) return } - // Scenario C: sequencer stopped → L1 blob fill-gap. For a forked - // node stuck below the batch end this is also the recovery path: - // the local head is a fork lower than lastBlockNumber, so deriveForce - // rewrites/fills the batch from L1 back onto canonical. + // Scenario C: batch tip missing locally and L2 head flat across + // polls (sequencer stopped, or node stuck on a fork below the + // batch end). Pull the real batch and let deriveForce reconcile + // in one pass — verify present blocks, append the missing tail, + // then advance. preReorgHead, _ := d.l2Client.BlockNumber(ctx) - d.logger.Info("########## REORG-TEST [2/3] REORG DETECTED — local head below L1 batch end; triggering deriveForce fill-gap ##########", + d.logger.Info("########## REORG-TEST [2/3] REORG DETECTED — local head below L1 batch end; triggering deriveForce reconcile ##########", "batchIndex", batchInfo.batchIndex, "localHead", preReorgHead, "firstL2Block", batchInfo.firstBlockNumber, @@ -380,7 +381,7 @@ func (d *Derivation) derivationBlock(ctx context.Context) { if errors.Is(fetchErr, types.ErrNotCommitBatchTx) { continue } - d.logger.Error("local verify fill-gap: fetch real batch failed", + d.logger.Error("local verify reconcile: fetch real batch failed", "batchIndex", batchInfo.batchIndex, "error", fetchErr) return } @@ -391,20 +392,16 @@ func (d *Derivation) derivationBlock(ctx context.Context) { // reactors stopped indefinitely (Stop is idempotent on // retry, but Start is never reached). err = d.withReactorsQuiesced(ctx, batchInfo.batchIndex, func() error { - localLatest, err := d.l2Client.BlockNumber(ctx) - if err != nil { - return fmt.Errorf("read local latest: %w", err) - } var derErr error - lastHeader, derErr = d.deriveForce(batchInfoFull, localLatest) + lastHeader, derErr = d.deriveForce(batchInfoFull) return derErr }) if err != nil { - d.logger.Error("local verify fill-gap: derive failed", + d.logger.Error("local verify reconcile: derive failed", "batchIndex", batchInfo.batchIndex, "error", err) return } - d.logger.Info("########## REORG-TEST [3/3] RECOVERED — deriveForce fill-gap rewrote batch onto canonical ##########", + d.logger.Info("########## REORG-TEST [3/3] RECOVERED — deriveForce reconciled batch onto canonical ##########", "batchIndex", batchInfo.batchIndex, "preReorgHead", preReorgHead, "reorgedToBlock", lastHeader.Number.Uint64()) @@ -455,7 +452,7 @@ func (d *Derivation) derivationBlock(ctx context.Context) { // reactors stopped indefinitely. err = d.withReactorsQuiesced(ctx, batchInfo.batchIndex, func() error { var derErr error - lastHeader, derErr = d.deriveForce(batchInfoFull, 0) + lastHeader, derErr = d.deriveForce(batchInfoFull) return derErr }) if err != nil { @@ -662,12 +659,7 @@ func (d *Derivation) fetchRollupDataByTxHash(txHash common.Hash, blockNumber uin } } - // Get L2 height - l2Height, err := d.l2Client.BlockNumber(d.ctx) - if err != nil { - return nil, fmt.Errorf("query l2 block number error:%v", err) - } - rollupData, err := d.parseBatch(batch, l2Height) + rollupData, err := d.parseBatch(batch) if err != nil { d.logger.Error("parse batch failed", "txNonce", tx.Nonce(), "txHash", txHash, "l1BlockNumber", blockNumber) @@ -777,25 +769,27 @@ func (d *Derivation) UnPackData(data []byte) (geth.RPCRollupBatch, error) { return batch, nil } -func (d *Derivation) parseBatch(batch geth.RPCRollupBatch, l2Height uint64) (*BatchInfo, error) { +func (d *Derivation) parseBatch(batch geth.RPCRollupBatch) (*BatchInfo, error) { batchInfo := new(BatchInfo) if err := batchInfo.ParseBatch(batch); err != nil { return nil, fmt.Errorf("parse batch error:%v", err) } - if err := d.handleL1Message(batchInfo, batchInfo.parentTotalL1MessagePopped, l2Height); err != nil { + if err := d.handleL1Message(batchInfo, batchInfo.parentTotalL1MessagePopped); err != nil { return nil, fmt.Errorf("handle l1 message error:%v", err) } return batchInfo, nil } -func (d *Derivation) handleL1Message(rollupData *BatchInfo, parentTotalL1MessagePopped, l2Height uint64) error { +// handleL1Message populates each block's SafeL2Data.Transactions with its L1 +// messages (prepended before the L2 txs). It runs for EVERY block in the batch, +// including ones already present locally: deriveForce content-compares and may +// rewrite present blocks, so it needs the complete tx list — a partial list +// mis-compares and would write L1-message-less blocks. We rely on snapshots +// shipping the full DB so historical L1 messages are always local; if that ever +// breaks, the count check below fails loudly instead of corrupting the chain. +func (d *Derivation) handleL1Message(rollupData *BatchInfo, parentTotalL1MessagePopped uint64) error { totalL1MessagePopped := parentTotalL1MessagePopped for bIndex, block := range rollupData.blockContexts { - // This may happen to nodes started from snapshot, in which case we will no longer handle L1Msg - if block.Number <= l2Height { - totalL1MessagePopped += uint64(block.l1MsgNum) - continue - } var l1Transactions []*eth.Transaction l1Messages, err := d.getL1Message(totalL1MessagePopped, uint64(block.l1MsgNum)) if err != nil { @@ -858,20 +852,6 @@ func (d *Derivation) derive(rollupData *BatchInfo) (*eth.Header, error) { return lastHeader, nil } -// deriveForce writes the batch's blocks via NewSafeL2Block. -// -// skipNumber lets one implementation serve two SPEC-005 §4.3 Path B scenarios: -// -// - skipNumber == 0 (scenario B, self-heal): every block is written; EL -// SetCanonical reorgs the local fork onto the L1-canonical chain. -// - skipNumber > 0 (scenario C, fill-gap): blocks with Number ≤ skipNumber -// are skipped (already present locally, presumed valid via P2P), only -// the missing tail is appended; no reorg of existing blocks. -// -// In both cases the parent of the first block we actually write must exist -// locally. For scenario B that's batch.firstBlockNumber-1 (above safe head). -// For scenario C with skipNumber == localLatestL2 that's localLatestL2 itself -// (necessarily ≥ firstBlockNumber-1 once skipNumber covers everything below). // withReactorsQuiesced runs body while consensus reactors (blocksync / // broadcast) are paused, guaranteeing StartReactorsAfterReorg runs even if // body returns an error. The restart height comes from the L2 EL latest @@ -923,45 +903,25 @@ func (d *Derivation) withReactorsQuiesced(ctx context.Context, batchIndex uint64 return body() } -func (d *Derivation) deriveForce(rollupData *BatchInfo, skipNumber uint64) (*eth.Header, error) { +// deriveForce reconciles the local chain with the batch's canonical content +// over [firstBlockNumber, lastBlockNumber] and returns the tip header. +// +// Walking from the canonical anchor (firstBlockNumber-1), each height is +// kept while its local content matches the batch, then rewritten via +// NewSafeL2Block from the first divergent or missing height onward. A kept +// block is canonical by induction: matching content on a canonical parent +// reproduces the canonical block (the batch has no parent hashes, so content +// is the only signal — and it suffices given the anchor). This replaces the +// old skipNumber fill-gap, which blindly trusted local blocks and could grow +// a permanent shadow chain. +func (d *Derivation) deriveForce(rollupData *BatchInfo) (*eth.Header, error) { firstNum := rollupData.firstBlockNumber if firstNum == 0 { return nil, fmt.Errorf("invalid firstBlockNumber 0 for batch %d", rollupData.batchIndex) } - // Race short-circuit: scenario C dispatch is decided before reactors are - // quiesced (HeaderByNumber check at derivationBlock vs StopReactors inside - // withReactorsQuiesced), so blocksync can backfill past lastBlockNumber in - // that small window. When that happens, skipNumber (= localLatest read - // after Stop) ends up >= the batch tip. Without this guard the loop below - // would `continue` on every block, return header(skipNumber) — a block - // past the batch — and then verifyBatchRoots / advanceSafe upstream would - // run against the wrong header (false stateException + safe head pushed - // past the batch). Returning header(lastBlockNumber) collapses this case - // to the same outcome scenario A would have produced if the dispatch had - // caught the now-present batch tip. - if skipNumber >= rollupData.lastBlockNumber { - lastHeader, err := d.l2Client.HeaderByNumber(d.ctx, big.NewInt(int64(rollupData.lastBlockNumber))) - if err != nil { - return nil, fmt.Errorf("read batch tip at %d: %w", rollupData.lastBlockNumber, err) - } - if lastHeader == nil { - return nil, fmt.Errorf("batch tip at %d missing", rollupData.lastBlockNumber) - } - d.logger.Info("deriveForce: P2P caught up past batch tip during scenario-C dispatch window; no-op write", - "batchIndex", rollupData.batchIndex, - "lastBlockNumber", rollupData.lastBlockNumber, - "skipNumber", skipNumber) - return lastHeader, nil - } - - // Anchor: parent of the first block we will WRITE must exist locally. - // scenario B (skipNumber==0): firstNum-1. - // scenario C: max(firstNum-1, skipNumber). + // Anchor: parent of the batch's first block must exist locally. parentNum := firstNum - 1 - if skipNumber > parentNum { - parentNum = skipNumber - } lastHeader, err := d.l2Client.HeaderByNumber(d.ctx, big.NewInt(int64(parentNum))) if err != nil { return nil, fmt.Errorf("read parent header at %d: %w", parentNum, err) @@ -970,11 +930,23 @@ func (d *Derivation) deriveForce(rollupData *BatchInfo, skipNumber uint64) (*eth return nil, fmt.Errorf("parent header at %d missing", parentNum) } + rewriting := false for _, blockData := range rollupData.blockContexts { - // Skip blocks already present locally (scenario C). For scenario B - // skipNumber == 0 means this branch is never taken. - if blockData.SafeL2Data.Number <= skipNumber { - continue + // Keep the local block while its content still matches the batch; at + // the first divergent or missing height switch to rewrite for the + // rest of the range (canonical by induction from the anchor). + if !rewriting { + local, lErr := d.l2Client.BlockByNumber(d.ctx, big.NewInt(int64(blockData.SafeL2Data.Number))) + if lErr != nil && !errors.Is(lErr, ethereum.NotFound) { + return nil, fmt.Errorf("read local block %d: %w", blockData.SafeL2Data.Number, lErr) + } + if local != nil && blockContentMatches(local, blockData.SafeL2Data) { + lastHeader = local.Header() + continue + } + rewriting = true + d.logger.Info("deriveForce: local fork/gap; rewriting batch tail onto canonical", + "batchIndex", rollupData.batchIndex, "fromBlock", blockData.SafeL2Data.Number) } // Pin the parent so SetCanonical reorgs from the local fork to the