Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 76 additions & 17 deletions node/core/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"math/big"
syncos "sync"
"time"

"github.com/morph-l2/go-ethereum/accounts/abi/bind"
"github.com/morph-l2/go-ethereum/common"
Expand Down Expand Up @@ -130,6 +131,61 @@ func NewExecutor(newSyncFunc NewSyncerFunc, config *Config, tmPubKey crypto.PubK
return executor, nil
}

// QA TEST BRANCH ONLY (test/fullnode-fork-selfheal).
//
// This node forks the chain CONTINUOUSLY: for every canonical block delivered at
// the tip, instead of applying it we assemble our OWN block at the same height
// (timestamp pinned to the parent's, valid post-Emerald, so the hash is guaranteed
// to differ from the canonical sibling) and apply that. tendermint's StateV2 still
// advances using the canonical block it handed us, so geth runs a private fork
// chain at the unsafe tip while StateV2 tracks canonical heights.
//
// Recovery is the REAL self-heal path: as each L1 batch covering the forked range
// is derived, local-verify finds a blob mismatch and runs deriveForce (wrapped in
// withReactorsQuiesced -> SetCanonical -> StartReactorsAfterReorg), rewriting that
// batch back onto canonical and re-syncing StateV2 from geth. The tip then forks
// again -> the node is permanently "always forking, always reorging". No reorg
// logic is modified; this only swaps the applied block + adds logs.
//
// SAFETY:
// - MUST run only on a dedicated fullnode. Never on a sequencer or in production.
// - StateV2 stores the block signature keyed by the CANONICAL hash, but geth
// holds the fork block at that height, so this node has no valid signature for
// its own unsafe (fork) blocks. Never let any peer use this node as a P2P data
// source for the unsafe range -- they will reject the signature / disconnect.

// injectForkBlockLocked assembles this node's own block on the given geth head
// (timestamp pinned to the parent's, guaranteed distinct from the canonical
// sibling) and applies it, extending the local fork by one block. The canonical
// block is intentionally dropped, so this skips updateNextL1MessageIndex -- safe
// ONLY because a fullnode never produces blocks (that index is read only on the
// sequencer's RequestBlockDataV2 path). Caller must hold e.mu.
//
// Edge case: if canonical produced an empty block in the same second (so it too
// has time == parent.Time and identical empty content), the assembled fork would
// hash-collide with canonical and not actually diverge -- harmless, just no [1/3].
func (e *Executor) injectForkBlockLocked(head *eth.Block, canonicalNumber uint64) (bool, error) {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()

forkTimestamp := head.Time()
l2Block, err := e.l2Client.AssembleL2BlockV2WithTimestamp(ctx, head.Hash(), forkTimestamp, nil)
if err != nil {
return false, fmt.Errorf("FORK-INJECT: assemble fork block: %w", err)
}
if _, err := e.l2Client.NewL2BlockV2(context.Background(), l2Block); err != nil {
return false, fmt.Errorf("FORK-INJECT: apply fork block: %w", err)
Comment on lines +168 to +177

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🩺 Stability & Availability | 🟠 Major | ⚡ Quick win

Do not use unbounded RPC contexts while holding e.mu.

Line 157 creates a 30s context, but Line 169 discards it for NewL2BlockV2; Line 514 also calls BlockNumber with context.Background() inside the executor mutex. If either retrying RPC stalls, ApplyBlockV2 can block other executor work behind the lock. Use bounded contexts for both calls.

Suggested bounded-context fix
-	if _, err := e.l2Client.NewL2BlockV2(context.Background(), l2Block); err != nil {
+	if _, err := e.l2Client.NewL2BlockV2(ctx, l2Block); err != nil {
 		return false, fmt.Errorf("FORK-INJECT: apply fork block: %w", err)
 	}
-	if head, hErr := e.l2Client.BlockNumber(context.Background()); hErr == nil && block.Number == head+1 {
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+	defer cancel()
+	if head, hErr := e.l2Client.BlockNumber(ctx); hErr == nil && block.Number == head+1 {

Also applies to: 514-519

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@node/core/executor.go` around lines 157 - 170, The executor methods are
mixing bounded and unbounded RPC contexts while holding e.mu, which can block
other work if an RPC stalls. In ApplyBlockV2, reuse the existing timeout context
for NewL2BlockV2 instead of context.Background(), and in the BlockNumber path
around the executor mutex replace the background context with a bounded timeout
context as well. Update the affected calls in executor.go so both NewL2BlockV2
and BlockNumber use the same finite-lifetime context pattern as BlockByNumber
and AssembleL2BlockV2WithTimestamp.

}
e.metrics.Height.Set(float64(l2Block.Number))
e.logger.Info("########## REORG-TEST [1/3] FORKED — replaced canonical block with own fork ##########",
"forkNumber", l2Block.Number,
"forkHash", l2Block.Hash.Hex(),
"parentHash", head.Hash().Hex(),
"forkTimestamp", l2Block.Timestamp,
"droppedCanonicalNumber", canonicalNumber)
return true, nil
}

var _ l2node.L2Node = (*Executor)(nil)

func (e *Executor) RequestBlockData(height int64) (txs [][]byte, blockMeta []byte, collectedL1Msgs bool, err error) {
Expand Down Expand Up @@ -454,27 +510,30 @@ func (e *Executor) RequestBlockDataV2(parentHashBytes []byte) (*l2node.BlockV2,
func (e *Executor) ApplyBlockV2(block *l2node.BlockV2) (applied bool, err error) {
e.mu.Lock()
defer e.mu.Unlock()
execBlock := blockV2ToExecutableL2Data(block)

// Reorg / idempotent detection: only check when incoming block height
// is at or below the current geth head (normal sequential blocks skip this).
currentHeight, chkErr := e.l2Client.BlockNumber(context.Background())
if chkErr == nil && block.Number <= currentHeight {
existing, exErr := e.l2Client.BlockByNumber(context.Background(), big.NewInt(int64(block.Number)))
if exErr == nil && existing != nil {
if existing.Hash() == execBlock.Hash {
e.logger.Debug("ApplyBlockV2: idempotent skip", "number", execBlock.Number)
return false, nil
}
e.logger.Info("ApplyBlockV2: REORG detected",
"targetHeight", execBlock.Number,
"newHash", execBlock.Hash.Hex(),
"existingHash", existing.Hash().Hex(),
"currentHead", currentHeight,
)
// QA TEST BRANCH ONLY (test/fullnode-fork-selfheal): continuously fork the
// tip. When the incoming canonical block extends our current head (number ==
// head+1) we apply our OWN divergent block at that height instead. StateV2
// still advances on the canonical block, so geth keeps a private fork at the
// unsafe tip until each L1 batch is derived and deriveForce reorgs it back
// (see derivation [2/3]/[3/3]). Only blocks that do NOT extend the tip fall
// through to the normal apply: idempotent re-applies (number <= head) and gaps
// (number > head+1). Note a catch-up block IS number == head+1, so it is forked
// too -- that is exactly how the tip re-diverges after each deriveForce reorg.
if head, hErr := e.l2Client.BlockByNumber(context.Background(), nil); hErr == nil && head != nil && block.Number == head.NumberU64()+1 {
// On assemble/apply failure there is no graceful fallback in the forked
// steady state: geth has no canonical parent (canonical N was never landed),
// so the canonical apply below errors and blocksync simply retries this
// height until assemble recovers. assemble rarely fails, so this is mostly
// a theoretical path.
if forked, fErr := e.injectForkBlockLocked(head, block.Number); fErr == nil {
return forked, nil
} else {
e.logger.Error("FORK-INJECT failed; this height will error and be retried by blocksync", "number", block.Number, "err", fErr)
}
}

execBlock := blockV2ToExecutableL2Data(block)
if _, err := e.l2Client.NewL2BlockV2(context.Background(), execBlock); err != nil {
e.logger.Error("failed to apply block v2",
"number", execBlock.Number,
Expand Down
41 changes: 41 additions & 0 deletions node/derivation/batch_info.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package derivation

import (
"bytes"
"encoding/binary"
"fmt"
"math/big"
Expand Down Expand Up @@ -254,6 +255,46 @@ func encodeTransactions(txs []*eth.Transaction) [][]byte {
return enc
}

// blockContentMatches reports whether a local L2 block carries the same
// content the batch committed for that height: timestamp, gas limit, base
// fee and the ordered tx list (L1 messages then L2 txs, by binary encoding).
// The batch has no parent hashes, so this is content-only; deriveForce pairs
// it with a canonical anchor to conclude a block is canonical.
func blockContentMatches(local *eth.Block, sd *catalyst.SafeL2Data) bool {
h := local.Header()
if h.Time != sd.Timestamp {
return false
}
if h.GasLimit != sd.GasLimit {
return false
}
if !baseFeeEqual(h.BaseFee, sd.BaseFee) {
return false
}
txs := local.Transactions()
if len(txs) != len(sd.Transactions) {
return false
}
for i, tx := range txs {
enc, err := tx.MarshalBinary()
if err != nil || !bytes.Equal(enc, sd.Transactions[i]) {
Comment thread
coderabbitai[bot] marked this conversation as resolved.
return false
}
}
return true
}

// baseFeeEqual treats nil and zero as equal — ParseBatch normalises a zero
// base fee to nil, while a local header may carry an explicit zero.
func baseFeeEqual(a, b *big.Int) bool {
az := a == nil || a.Sign() == 0
bz := b == nil || b.Sign() == 0
if az || bz {
return az && bz
}
return a.Cmp(b) == 0
}

type txQueue struct {
txs eth.Transactions
pointer int
Expand Down
Loading
Loading