diff --git a/crates/synth-synthesis/src/optimizer_bridge.rs b/crates/synth-synthesis/src/optimizer_bridge.rs
index 863ae39..4e59fe6 100644
--- a/crates/synth-synthesis/src/optimizer_bridge.rs
+++ b/crates/synth-synthesis/src/optimizer_bridge.rs
@@ -46,6 +46,147 @@ fn fold_mem_offset(base: u32, offset: u32) -> (u32, i32) {
     }
 }
 
+/// The linear-memory base the optimized (absolute) path materializes.
+const BASE_CSE_LINMEM_BASE: u32 = 0x2000_0100;
+/// VCR-RA lever 3 base register: R11 is OUTSIDE the `reallocate_function` pool
+/// (R0–R8), so the range-reallocator identity-preserves it — the hoisted base
+/// survives across every straight-line segment untouched, letting us materialize
+/// it ONCE at entry rather than per-segment. R11 is also outside local
+/// promotion's pool (R4–R8) and is not the encoder scratch (R12). The optimized
+/// path's only other uses of R11 — synthetic-local-255 (`Select` nesting) and the
+/// `(R10,R11)` i64 pair — are excluded by the planner's disqualification set.
+const BASE_CSE_REG: crate::rules::Reg = crate::rules::Reg::R11;
+
+/// VCR-RA lever 3 (#468, epic #242): plan for hoisting the loop-invariant
+/// linear-memory base out of a run of constant-address memory accesses.
+#[derive(Debug, Default, PartialEq)]
+struct BaseCsePlan {
+    /// Address vreg → folded immediate (`const_addr + access_offset`, ≤ imm12).
+    /// The access is rewritten to `[R11, #imm]`, dropping its per-access
+    /// `movw/movt` base + `add`.
+    fold: std::collections::HashMap<u32, i32>,
+    /// Const vregs whose ONLY use is a folded address — their materialization is
+    /// dropped (this is what makes the reserved base a net register-pressure win
+    /// rather than a wash).
+    skip_const: std::collections::HashSet<u32>,
+}
+
+/// Source (read) vregs of an opcode, or `None` if the opcode is outside the
+/// base-CSE-safe set — any global/memory-size/select/call/i64/unknown op needs a
+/// high register (R9 globals / R10 memsize / R11 select-temp+i64-pair) or a
+/// behaviour v1 does not model, so its presence declines base-CSE for the whole
+/// function (returns `None` → planner bails → byte-identical per-access path).
+/// `_ => None` is the safety backstop: an unenumerated opcode disqualifies rather
+/// than risk a missed clobber of the reserved R11 on this un-byte-gated path.
+fn base_cse_sources(op: &Opcode) -> Option<Vec<u32>> {
+    use Opcode::*;
+    let two = |a: &OptReg, b: &OptReg| Some(vec![a.0, b.0]);
+    let one = |a: &OptReg| Some(vec![a.0]);
+    match op {
+        // i32 binops: read src1, src2.
+        Add { src1, src2, .. }
+        | Sub { src1, src2, .. }
+        | Mul { src1, src2, .. }
+        | DivS { src1, src2, .. }
+        | DivU { src1, src2, .. }
+        | RemS { src1, src2, .. }
+        | RemU { src1, src2, .. }
+        | And { src1, src2, .. }
+        | Or { src1, src2, .. }
+        | Xor { src1, src2, .. }
+        | Shl { src1, src2, .. }
+        | ShrS { src1, src2, .. }
+        | ShrU { src1, src2, .. }
+        | Rotl { src1, src2, .. }
+        | Rotr { src1, src2, .. }
+        | Eq { src1, src2, .. }
+        | Ne { src1, src2, .. }
+        | LtS { src1, src2, .. }
+        | LtU { src1, src2, .. }
+        | LeS { src1, src2, .. }
+        | LeU { src1, src2, .. }
+        | GtS { src1, src2, .. }
+        | GtU { src1, src2, .. }
+        | GeS { src1, src2, .. }
+        | GeU { src1, src2, .. } => two(src1, src2),
+        // i32 unops.
+        Clz { src, .. }
+        | Ctz { src, .. }
+        | Popcnt { src, .. }
+        | Extend8S { src, .. }
+        | Extend16S { src, .. }
+        | Eqz { src, .. }
+        | Copy { src, .. }
+        | Store { src, .. }
+        | TeeStore { src, .. } => one(src),
+        // Memory accesses: the address vreg is a read (this is the use that the
+        // planner pairs with a const def); stores additionally read `src`.
+        MemStore { src, addr, .. } | MemStoreSubword { src, addr, .. } => two(src, addr),
+        MemLoad { addr, .. } | MemLoadSubword { addr, .. } => one(addr),
+        Return { value } => Some(value.iter().map(|r| r.0).collect()),
+        // No register reads. `Label` is allowed: a function body carries a trailing
+        // structural end-label even with no real branching, and a label with
+        // nothing branching to it does not split control flow. `Branch` /
+        // `CondBranch` (below) are what indicate a genuine multi-block function.
+        Const { .. } | Load { .. } | Nop | Label { .. } => Some(vec![]),
+        // Everything else → disqualify the function. This INCLUDES `Branch` /
+        // `CondBranch`: v1 confines base-CSE to functions with no control-flow
+        // divergence — #468's straight-line field-initializer target — keeping it
+        // clear of the optimized path's (separately-tracked) multi-block lowering.
+        // R11 is realloc-immune (out of the R0–R8 pool) so a hoisted base WOULD
+        // survive branches; restricting to single-block is the conservative choice.
+        // Also covers Select / Global* / MemorySize-Grow / Call / all i64 (high-reg
+        // users) and any unenumerated opcode (safety backstop).
+        _ => None,
+    }
+}
+
+/// Decide whether base-CSE activates for this function and, if so, which const
+/// addresses fold. Returns `None` (decline → unchanged per-access codegen) unless
+/// ≥2 constant-address accesses fold and every opcode is base-CSE-safe.
+fn plan_base_cse(instructions: &[Instruction]) -> Option<BaseCsePlan> {
+    use std::collections::HashMap;
+    let mut const_val: HashMap<u32, i32> = HashMap::new();
+    let mut uses: HashMap<u32, u32> = HashMap::new();
+    // (addr vreg, static access offset) for every linear-memory access.
+    let mut accesses: Vec<(u32, u32)> = Vec::new();
+    for inst in instructions {
+        match &inst.opcode {
+            Opcode::Const { dest, value } => {
+                const_val.insert(dest.0, *value);
+            }
+            Opcode::MemStore { addr, offset, .. }
+            | Opcode::MemLoad { addr, offset, .. }
+            | Opcode::MemStoreSubword { addr, offset, .. }
+            | Opcode::MemLoadSubword { addr, offset, .. } => {
+                accesses.push((addr.0, *offset));
+            }
+            _ => {}
+        }
+        // A single unenumerated/disqualifying opcode declines the whole function.
+        let srcs = base_cse_sources(&inst.opcode)?;
+        for v in srcs {
+            *uses.entry(v).or_insert(0) += 1;
+        }
+    }
+    let mut plan = BaseCsePlan::default();
+    for (addr_vreg, off) in accesses {
+        // Foldable iff the address is a compile-time constant whose ONLY use is
+        // this access, and base+addr+offset stays in the imm12 window so the
+        // access immediate `[R11, #imm]` encodes directly.
+        if let Some(&aval) = const_val.get(&addr_vreg)
+            && uses.get(&addr_vreg) == Some(&1)
+        {
+            let folded = (aval as i64) + (off as i64);
+            if (0..=0xFFF).contains(&folded) {
+                plan.fold.insert(addr_vreg, folded as i32);
+                plan.skip_const.insert(addr_vreg);
+            }
+        }
+    }
+    (plan.fold.len() >= 2).then_some(plan)
+}
+
 /// Optimization configuration
 #[derive(Debug, Clone)]
 pub struct OptimizationConfig {
@@ -2315,7 +2456,28 @@ impl OptimizerBridge {
         // AAPCS arguments that must NOT be clobbered by i64 op handlers — at least
         // until the user's WASM has done a `local.get` of each. Using Vec because
         // `Reg` does not derive Hash (matches `instruction_selector::alloc_consecutive_pair`).
-        let param_reserved_regs: Vec<Reg> = param_regs[..num_params.min(4)].to_vec();
+        let mut param_reserved_regs: Vec<Reg> = param_regs[..num_params.min(4)].to_vec();
+
+        // VCR-RA lever 3 base-CSE (#468, epic #242): if the function is a run of
+        // constant-address memory accesses, reserve R11 as a persistent base
+        // register (excluded from every allocator via `param_reserved_regs`),
+        // materialize the linear-memory base into it ONCE at entry, and fold each
+        // const address into the access immediate (`str V,[R11,#ADDR]`) — dropping
+        // the per-access `movw/movt` base re-materialization (#468's complaint)
+        // AND the now-dead address materialization (the pressure relief that keeps
+        // the reserved base a net win). R11 is realloc-immune (outside the R0–R8
+        // pool), so the single entry materialization survives every segment.
+        // Opt-in (`SYNTH_BASE_CSE=1`) → off ⇒ byte-identical. The optimized path
+        // is the ONLY caller of `ir_to_arm`, so this never reaches the relocatable
+        // lowering (which already pins the base in `fp`).
+        let base_cse: Option<BaseCsePlan> = if std::env::var("SYNTH_BASE_CSE").is_ok() {
+            plan_base_cse(instructions)
+        } else {
+            None
+        };
+        if base_cse.is_some() {
+            param_reserved_regs.push(BASE_CSE_REG);
+        }
 
         // Track which ARM register currently holds each local variable
         // This avoids stack spills for simple cases
@@ -2617,6 +2779,22 @@ impl OptimizerBridge {
             }
         }
 
+        // VCR-RA lever 3 base-CSE: materialize the linear-memory base into the
+        // reserved R11 ONCE, before any access. Placed before the second pass so
+        // it precedes every folded `[R11,#ADDR]` and so `ir_to_arm_idx` (recorded
+        // during the loop) accounts for these two leading instructions. R11 is
+        // realloc-immune, so this single def reaches every later use unremapped.
+        if base_cse.is_some() {
+            arm_instrs.push(ArmOp::Movw {
+                rd: BASE_CSE_REG,
+                imm16: (BASE_CSE_LINMEM_BASE & 0xFFFF) as u16,
+            });
+            arm_instrs.push(ArmOp::Movt {
+                rd: BASE_CSE_REG,
+                imm16: ((BASE_CSE_LINMEM_BASE >> 16) & 0xFFFF) as u16,
+            });
+        }
+
         // Second pass: generate ARM instructions
         for inst in instructions {
             match &inst.opcode {
@@ -2698,17 +2876,29 @@ impl OptimizerBridge {
 
                 // Constant: mov immediate to register
                 Opcode::Const { dest, value } => {
+                    // VCR-RA lever 3 base-CSE: this const is a folded address — its
+                    // ONLY use is a `[R11,#ADDR]` access (planner-verified single
+                    // use), so do not materialize it at all. Dropping it is the
+                    // register-pressure relief that makes the reserved base a win.
+                    if let Some(plan) = &base_cse
+                        && plan.skip_const.contains(&dest.0)
+                    {
+                        continue;
+                    }
                     // Allocate a register for this constant
                     let rd = if let Some(&r) = vreg_to_arm.get(&dest.0) {
                         r
                     } else {
                         // Find next available temp register
-                        // Exclude live vregs (not dead) and local_to_reg to avoid clobbering
+                        // Exclude live vregs (not dead) and local_to_reg to avoid clobbering.
+                        // Base-CSE reserves R11 (in `param_reserved_regs`); fold it in
+                        // so the const pool never hands out the live base register.
                         let used: Vec<_> = vreg_to_arm
                             .iter()
                             .filter(|(k, _)| !dead_vregs.contains(k))
                             .map(|(_, v)| *v)
                             .chain(local_to_reg.values().copied())
+                            .chain(param_reserved_regs.iter().copied())
                             .collect();
                         // Expanded temp register pool: R4-R11 (callee-saved) plus R3
                         // Note: R0-R2 are reserved for params/return, R12 is IP, R13 is SP, R14 is LR, R15 is PC
@@ -4636,78 +4826,111 @@ impl OptimizerBridge {
                 // argument on every `i32.load`. Use the scratch helper so
                 // the destination is picked from the callee-saved bank.
                 Opcode::MemLoad { dest, addr, offset } => {
-                    let r_addr = get_arm_reg(addr, &vreg_to_arm, &spilled_vregs)?;
-                    let rd = alloc_i32_scratch(
-                        &vreg_to_arm,
-                        &local_to_reg,
-                        &param_reserved_regs,
-                        &[r_addr],
-                    );
-                    vreg_to_arm.insert(dest.0, rd);
+                    // VCR-RA lever 3 base-CSE: const address → load directly off the
+                    // once-materialized base in R11 (no per-access base / add / addr).
+                    if let Some(plan) = &base_cse
+                        && let Some(&folded) = plan.fold.get(&addr.0)
+                    {
+                        let rd = alloc_i32_scratch(
+                            &vreg_to_arm,
+                            &local_to_reg,
+                            &param_reserved_regs,
+                            &[],
+                        );
+                        vreg_to_arm.insert(dest.0, rd);
+                        arm_instrs.push(ArmOp::Ldr {
+                            rd,
+                            addr: crate::rules::MemAddr::imm(BASE_CSE_REG, folded),
+                        });
+                        last_result_vreg = Some(dest.0);
+                    } else {
+                        let r_addr = get_arm_reg(addr, &vreg_to_arm, &spilled_vregs)?;
+                        let rd = alloc_i32_scratch(
+                            &vreg_to_arm,
+                            &local_to_reg,
+                            &param_reserved_regs,
+                            &[r_addr],
+                        );
+                        vreg_to_arm.insert(dest.0, rd);
 
-                    // Linear memory base 0x20000100 (SRAM, above stack area).
-                    // #382: fold a large static offset (> imm12) into the
-                    // compile-time-constant base so the access immediate is 0.
-                    let (base, mem_off) = fold_mem_offset(0x20000100, *offset);
-                    let base_lo = (base & 0xFFFF) as u16;
-                    let base_hi = ((base >> 16) & 0xFFFF) as u16;
+                        // Linear memory base 0x20000100 (SRAM, above stack area).
+                        // #382: fold a large static offset (> imm12) into the
+                        // compile-time-constant base so the access immediate is 0.
+                        let (base, mem_off) = fold_mem_offset(0x20000100, *offset);
+                        let base_lo = (base & 0xFFFF) as u16;
+                        let base_hi = ((base >> 16) & 0xFFFF) as u16;
 
-                    // Load base address into R12 (scratch register)
-                    arm_instrs.push(ArmOp::Movw {
-                        rd: Reg::R12,
-                        imm16: base_lo,
-                    });
-                    arm_instrs.push(ArmOp::Movt {
-                        rd: Reg::R12,
-                        imm16: base_hi,
-                    });
-                    // Add WASM address offset
-                    arm_instrs.push(ArmOp::Add {
-                        rd: Reg::R12,
-                        rn: Reg::R12,
-                        op2: Operand2::Reg(r_addr),
-                    });
-                    // Load from [base + wasm_addr + static_offset]
-                    arm_instrs.push(ArmOp::Ldr {
-                        rd,
-                        addr: crate::rules::MemAddr::imm(Reg::R12, mem_off),
-                    });
-                    last_result_vreg = Some(dest.0);
+                        // Load base address into R12 (scratch register)
+                        arm_instrs.push(ArmOp::Movw {
+                            rd: Reg::R12,
+                            imm16: base_lo,
+                        });
+                        arm_instrs.push(ArmOp::Movt {
+                            rd: Reg::R12,
+                            imm16: base_hi,
+                        });
+                        // Add WASM address offset
+                        arm_instrs.push(ArmOp::Add {
+                            rd: Reg::R12,
+                            rn: Reg::R12,
+                            op2: Operand2::Reg(r_addr),
+                        });
+                        // Load from [base + wasm_addr + static_offset]
+                        arm_instrs.push(ArmOp::Ldr {
+                            rd,
+                            addr: crate::rules::MemAddr::imm(Reg::R12, mem_off),
+                        });
+                        last_result_vreg = Some(dest.0);
+                    }
                 }
 
                 // MemStore: store 32-bit value to linear memory
                 // Generates: MOVW R12, #base_lo; MOVT R12, #base_hi; ADD R12, R12, Raddr; STR Rsrc, [R12, #offset]
                 Opcode::MemStore { src, addr, offset } => {
-                    let r_addr = get_arm_reg(addr, &vreg_to_arm, &spilled_vregs)?;
-                    let r_src = get_arm_reg(src, &vreg_to_arm, &spilled_vregs)?;
+                    // VCR-RA lever 3 base-CSE: the address is a folded compile-time
+                    // constant — store directly off the once-materialized base in
+                    // R11, dropping the per-access `movw/movt` + `add` and the
+                    // address materialization (skipped at its `Const`).
+                    if let Some(plan) = &base_cse
+                        && let Some(&folded) = plan.fold.get(&addr.0)
+                    {
+                        let r_src = get_arm_reg(src, &vreg_to_arm, &spilled_vregs)?;
+                        arm_instrs.push(ArmOp::Str {
+                            rd: r_src,
+                            addr: crate::rules::MemAddr::imm(BASE_CSE_REG, folded),
+                        });
+                    } else {
+                        let r_addr = get_arm_reg(addr, &vreg_to_arm, &spilled_vregs)?;
+                        let r_src = get_arm_reg(src, &vreg_to_arm, &spilled_vregs)?;
 
-                    // Linear memory base 0x20000100 (SRAM, above stack area).
-                    // #382: fold a large static offset (> imm12) into the
-                    // compile-time-constant base so the access immediate is 0.
-                    let (base, mem_off) = fold_mem_offset(0x20000100, *offset);
-                    let base_lo = (base & 0xFFFF) as u16;
-                    let base_hi = ((base >> 16) & 0xFFFF) as u16;
+                        // Linear memory base 0x20000100 (SRAM, above stack area).
+                        // #382: fold a large static offset (> imm12) into the
+                        // compile-time-constant base so the access immediate is 0.
+                        let (base, mem_off) = fold_mem_offset(0x20000100, *offset);
+                        let base_lo = (base & 0xFFFF) as u16;
+                        let base_hi = ((base >> 16) & 0xFFFF) as u16;
 
-                    // Load base address into R12 (scratch register)
-                    arm_instrs.push(ArmOp::Movw {
-                        rd: Reg::R12,
-                        imm16: base_lo,
-                    });
-                    arm_instrs.push(ArmOp::Movt {
-                        rd: Reg::R12,
-                        imm16: base_hi,
-                    });
-                    // Add WASM address offset
-                    arm_instrs.push(ArmOp::Add {
-                        rd: Reg::R12,
-                        rn: Reg::R12,
-                        op2: Operand2::Reg(r_addr),
-                    });
-                    // Store to [base + wasm_addr + static_offset]
-                    arm_instrs.push(ArmOp::Str {
-                        rd: r_src,
-                        addr: crate::rules::MemAddr::imm(Reg::R12, mem_off),
-                    });
+                        // Load base address into R12 (scratch register)
+                        arm_instrs.push(ArmOp::Movw {
+                            rd: Reg::R12,
+                            imm16: base_lo,
+                        });
+                        arm_instrs.push(ArmOp::Movt {
+                            rd: Reg::R12,
+                            imm16: base_hi,
+                        });
+                        // Add WASM address offset
+                        arm_instrs.push(ArmOp::Add {
+                            rd: Reg::R12,
+                            rn: Reg::R12,
+                            op2: Operand2::Reg(r_addr),
+                        });
+                        // Store to [base + wasm_addr + static_offset]
+                        arm_instrs.push(ArmOp::Str {
+                            rd: r_src,
+                            addr: crate::rules::MemAddr::imm(Reg::R12, mem_off),
+                        });
+                    }
                     // MemStore does not produce a value
                 }
 
@@ -4726,34 +4949,48 @@ impl OptimizerBridge {
                     width,
                     signed,
                 } => {
-                    let r_addr = get_arm_reg(addr, &vreg_to_arm, &spilled_vregs)?;
-                    let rd = alloc_i32_scratch(
-                        &vreg_to_arm,
-                        &local_to_reg,
-                        &param_reserved_regs,
-                        &[r_addr],
-                    );
-                    vreg_to_arm.insert(dest.0, rd);
+                    // VCR-RA lever 3 base-CSE: const address → subword load off R11.
+                    let (rd, addr_mem) = if let Some(plan) = &base_cse
+                        && let Some(&folded) = plan.fold.get(&addr.0)
+                    {
+                        let rd = alloc_i32_scratch(
+                            &vreg_to_arm,
+                            &local_to_reg,
+                            &param_reserved_regs,
+                            &[],
+                        );
+                        vreg_to_arm.insert(dest.0, rd);
+                        (rd, crate::rules::MemAddr::imm(BASE_CSE_REG, folded))
+                    } else {
+                        let r_addr = get_arm_reg(addr, &vreg_to_arm, &spilled_vregs)?;
+                        let rd = alloc_i32_scratch(
+                            &vreg_to_arm,
+                            &local_to_reg,
+                            &param_reserved_regs,
+                            &[r_addr],
+                        );
+                        vreg_to_arm.insert(dest.0, rd);
 
-                    // #382: fold a large static offset (> imm12) into the
-                    // compile-time-constant base so the access immediate is 0.
-                    let (base, mem_off) = fold_mem_offset(0x20000100, *offset);
-                    let base_lo = (base & 0xFFFF) as u16;
-                    let base_hi = ((base >> 16) & 0xFFFF) as u16;
-                    arm_instrs.push(ArmOp::Movw {
-                        rd: Reg::R12,
-                        imm16: base_lo,
-                    });
-                    arm_instrs.push(ArmOp::Movt {
-                        rd: Reg::R12,
-                        imm16: base_hi,
-                    });
-                    arm_instrs.push(ArmOp::Add {
-                        rd: Reg::R12,
-                        rn: Reg::R12,
-                        op2: Operand2::Reg(r_addr),
-                    });
-                    let addr_mem = crate::rules::MemAddr::imm(Reg::R12, mem_off);
+                        // #382: fold a large static offset (> imm12) into the
+                        // compile-time-constant base so the access immediate is 0.
+                        let (base, mem_off) = fold_mem_offset(0x20000100, *offset);
+                        let base_lo = (base & 0xFFFF) as u16;
+                        let base_hi = ((base >> 16) & 0xFFFF) as u16;
+                        arm_instrs.push(ArmOp::Movw {
+                            rd: Reg::R12,
+                            imm16: base_lo,
+                        });
+                        arm_instrs.push(ArmOp::Movt {
+                            rd: Reg::R12,
+                            imm16: base_hi,
+                        });
+                        arm_instrs.push(ArmOp::Add {
+                            rd: Reg::R12,
+                            rn: Reg::R12,
+                            op2: Operand2::Reg(r_addr),
+                        });
+                        (rd, crate::rules::MemAddr::imm(Reg::R12, mem_off))
+                    };
                     let sub_op = match (*width, *signed) {
                         (1, false) => ArmOp::Ldrb { rd, addr: addr_mem },
                         (1, true) => ArmOp::Ldrsb { rd, addr: addr_mem },
@@ -4777,28 +5014,36 @@ impl OptimizerBridge {
                     offset,
                     width,
                 } => {
-                    let r_addr = get_arm_reg(addr, &vreg_to_arm, &spilled_vregs)?;
-                    let r_src = get_arm_reg(src, &vreg_to_arm, &spilled_vregs)?;
-
-                    // #382: fold a large static offset (> imm12) into the
-                    // compile-time-constant base so the access immediate is 0.
-                    let (base, mem_off) = fold_mem_offset(0x20000100, *offset);
-                    let base_lo = (base & 0xFFFF) as u16;
-                    let base_hi = ((base >> 16) & 0xFFFF) as u16;
-                    arm_instrs.push(ArmOp::Movw {
-                        rd: Reg::R12,
-                        imm16: base_lo,
-                    });
-                    arm_instrs.push(ArmOp::Movt {
-                        rd: Reg::R12,
-                        imm16: base_hi,
-                    });
-                    arm_instrs.push(ArmOp::Add {
-                        rd: Reg::R12,
-                        rn: Reg::R12,
-                        op2: Operand2::Reg(r_addr),
-                    });
-                    let addr_mem = crate::rules::MemAddr::imm(Reg::R12, mem_off);
+                    // VCR-RA lever 3 base-CSE: const address → subword store off R11.
+                    let (r_src, addr_mem) = if let Some(plan) = &base_cse
+                        && let Some(&folded) = plan.fold.get(&addr.0)
+                    {
+                        let r_src = get_arm_reg(src, &vreg_to_arm, &spilled_vregs)?;
+                        (r_src, crate::rules::MemAddr::imm(BASE_CSE_REG, folded))
+                    } else {
+                        let r_addr = get_arm_reg(addr, &vreg_to_arm, &spilled_vregs)?;
+                        let r_src = get_arm_reg(src, &vreg_to_arm, &spilled_vregs)?;
+
+                        // #382: fold a large static offset (> imm12) into the
+                        // compile-time-constant base so the access immediate is 0.
+                        let (base, mem_off) = fold_mem_offset(0x20000100, *offset);
+                        let base_lo = (base & 0xFFFF) as u16;
+                        let base_hi = ((base >> 16) & 0xFFFF) as u16;
+                        arm_instrs.push(ArmOp::Movw {
+                            rd: Reg::R12,
+                            imm16: base_lo,
+                        });
+                        arm_instrs.push(ArmOp::Movt {
+                            rd: Reg::R12,
+                            imm16: base_hi,
+                        });
+                        arm_instrs.push(ArmOp::Add {
+                            rd: Reg::R12,
+                            rn: Reg::R12,
+                            op2: Operand2::Reg(r_addr),
+                        });
+                        (r_src, crate::rules::MemAddr::imm(Reg::R12, mem_off))
+                    };
                     let sub_op = match *width {
                         1 => ArmOp::Strb {
                             rd: r_src,
@@ -5285,6 +5530,174 @@ impl Default for OptimizerBridge {
 mod tests {
     use super::*;
 
+    // ---- base-CSE planner (VCR-RA lever 3, #468) ----
+
+    fn inst(op: Opcode) -> Instruction {
+        Instruction {
+            id: 0,
+            opcode: op,
+            block_id: 0,
+            is_dead: false,
+        }
+    }
+    fn vr(n: u32) -> OptReg {
+        OptReg(n)
+    }
+    /// `[Const addr, Const val, MemStore]` triples for `(addr, val)` pairs, using
+    /// fresh vregs per pair (single-use addresses, the foldable shape).
+    fn const_addr_stores(pairs: &[(i32, i32)]) -> Vec<Instruction> {
+        let mut out = Vec::new();
+        for (i, (addr, val)) in pairs.iter().enumerate() {
+            let av = (i as u32) * 2;
+            let vv = av + 1;
+            out.push(inst(Opcode::Const {
+                dest: vr(av),
+                value: *addr,
+            }));
+            out.push(inst(Opcode::Const {
+                dest: vr(vv),
+                value: *val,
+            }));
+            out.push(inst(Opcode::MemStore {
+                src: vr(vv),
+                addr: vr(av),
+                offset: 0,
+            }));
+        }
+        out
+    }
+
+    #[test]
+    fn plan_base_cse_folds_two_or_more_const_addr_stores() {
+        let ir = const_addr_stores(&[(0, 11), (4, 22), (8, 33)]);
+        let plan = plan_base_cse(&ir).expect("activates with 3 foldable stores");
+        assert_eq!(plan.fold.len(), 3);
+        assert_eq!(plan.skip_const.len(), 3);
+        // addr vreg 0 → folded immediate 0; vreg 2 → 4; vreg 4 → 8.
+        assert_eq!(plan.fold.get(&0), Some(&0));
+        assert_eq!(plan.fold.get(&2), Some(&4));
+        assert_eq!(plan.fold.get(&4), Some(&8));
+    }
+
+    #[test]
+    fn plan_base_cse_declines_below_two_folds() {
+        let ir = const_addr_stores(&[(0, 11)]);
+        assert_eq!(plan_base_cse(&ir), None);
+    }
+
+    #[test]
+    fn plan_base_cse_declines_on_disqualifying_op() {
+        // A Select anywhere needs R11 for the synthetic-select temp → decline.
+        let mut ir = const_addr_stores(&[(0, 11), (4, 22)]);
+        ir.push(inst(Opcode::Select {
+            dest: vr(100),
+            val_true: vr(101),
+            val_false: vr(102),
+            cond: vr(103),
+        }));
+        assert_eq!(plan_base_cse(&ir), None);
+    }
+
+    #[test]
+    fn plan_base_cse_declines_on_control_flow() {
+        // A `CondBranch` (br_if) makes the function multi-block — outside v1 scope
+        // (and clear of the optimized path's separately-tracked multi-block bug).
+        let mut ir = const_addr_stores(&[(0, 11), (4, 22)]);
+        ir.push(inst(Opcode::CondBranch {
+            cond: vr(100),
+            target: 0,
+        }));
+        assert_eq!(plan_base_cse(&ir), None);
+    }
+
+    #[test]
+    fn plan_base_cse_allows_trailing_structural_label() {
+        // A bare `Label` (the function-end marker with nothing branching to it)
+        // does NOT split control flow, so base-CSE still activates.
+        let mut ir = const_addr_stores(&[(0, 11), (4, 22)]);
+        ir.push(inst(Opcode::Label { id: 99 }));
+        let plan = plan_base_cse(&ir).expect("activates despite a structural label");
+        assert_eq!(plan.fold.len(), 2);
+    }
+
+    #[test]
+    fn plan_base_cse_declines_imm12_overflow_addr() {
+        // 0x1000 + 0 exceeds the imm12 window → that access does not fold; with
+        // only one other foldable store the function falls below threshold.
+        let ir = const_addr_stores(&[(0x1000, 11), (4, 22)]);
+        let plan = plan_base_cse(&ir);
+        // Only the (4,22) store folds → 1 fold → below the ≥2 threshold → None.
+        assert_eq!(plan, None);
+    }
+
+    #[test]
+    fn plan_base_cse_declines_multi_use_addr() {
+        // An address vreg used by TWO stores is not single-use → not folded.
+        // (Both stores reuse addr vreg 0; the second's value is vreg 2.)
+        let ir = vec![
+            inst(Opcode::Const {
+                dest: vr(0),
+                value: 4,
+            }),
+            inst(Opcode::Const {
+                dest: vr(1),
+                value: 11,
+            }),
+            inst(Opcode::MemStore {
+                src: vr(1),
+                addr: vr(0),
+                offset: 0,
+            }),
+            inst(Opcode::Const {
+                dest: vr(2),
+                value: 22,
+            }),
+            inst(Opcode::MemStore {
+                src: vr(2),
+                addr: vr(0),
+                offset: 0,
+            }),
+        ];
+        // addr vreg 0 has use_count 2 → neither store folds → None.
+        assert_eq!(plan_base_cse(&ir), None);
+    }
+
+    #[test]
+    fn plan_base_cse_folds_static_offset_into_immediate() {
+        // A non-zero static access offset folds into the immediate (ADDR + off).
+        let ir = vec![
+            inst(Opcode::Const {
+                dest: vr(0),
+                value: 0,
+            }),
+            inst(Opcode::Const {
+                dest: vr(1),
+                value: 11,
+            }),
+            inst(Opcode::MemStore {
+                src: vr(1),
+                addr: vr(0),
+                offset: 16,
+            }),
+            inst(Opcode::Const {
+                dest: vr(2),
+                value: 0,
+            }),
+            inst(Opcode::Const {
+                dest: vr(3),
+                value: 22,
+            }),
+            inst(Opcode::MemStore {
+                src: vr(3),
+                addr: vr(2),
+                offset: 32,
+            }),
+        ];
+        let plan = plan_base_cse(&ir).expect("activates");
+        assert_eq!(plan.fold.get(&0), Some(&16)); // 0 + 16
+        assert_eq!(plan.fold.get(&2), Some(&32)); // 0 + 32
+    }
+
     #[test]
     fn test_optimizer_bridge_basic() {
         let bridge = OptimizerBridge::new();
diff --git a/scripts/repro/base_cse_branch.wat b/scripts/repro/base_cse_branch.wat
new file mode 100644
index 0000000..dd871e8
--- /dev/null
+++ b/scripts/repro/base_cse_branch.wat
@@ -0,0 +1,22 @@
+;; VCR-RA lever 3 base-CSE (#468, epic #242) — CONTROL-FLOW non-vacuity fixture.
+;;
+;; The base-CSE hoist materializes the linear-memory base into R11 once at entry.
+;; R11 is OUTSIDE the range-reallocator's R0–R8 pool, so it is identity-preserved
+;; across every straight-line segment — the single entry materialization must stay
+;; valid across a branch. This fixture splits the constant-address stores with a
+;; `br_if`, so the differential exercises the path where a hoisted base is USED
+;; after a control-flow edge (the cross-segment hazard the R11 choice neutralizes,
+;; and which a purely straight-line fixture could never surface).
+;;
+;; init_branch(sel): always stores fields 0,4; if sel!=0 also stores fields 8,12.
+;; Generic — neutral addresses/values, tied to nothing real.
+(module
+  (memory 1)
+  (export "memory" (memory 0))
+  (func (export "init_branch") (param $sel i32)
+    (i32.store   (i32.const 0)  (i32.const 11))
+    (i32.store   (i32.const 4)  (i32.const 22))
+    (block $skip
+      (br_if $skip (i32.eqz (local.get $sel)))
+      (i32.store   (i32.const 8)  (i32.const 33))
+      (i32.store16 (i32.const 12) (i32.const 44)))))
diff --git a/scripts/repro/base_cse_differential.py b/scripts/repro/base_cse_differential.py
new file mode 100644
index 0000000..eead0bd
--- /dev/null
+++ b/scripts/repro/base_cse_differential.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python3
+"""VCR-RA lever 3 / VCR-ORACLE-001 (#468, #242) — EXECUTION-validate base-CSE.
+
+base-CSE (SYNTH_BASE_CSE=1) hoists the linear-memory base into R11 once at entry
+and folds each constant store address into the access immediate (`str V,[R11,#ADDR]`),
+dropping the per-access `movw/movt` base re-materialization and the address
+materialization. The optimized (non-relocatable) path it changes has NO frozen
+cargo byte-gate, so EXECUTION is the correctness oracle: this harness runs each
+fixture under unicorn in BOTH flag-off and flag-on builds and asserts the resulting
+LINEAR MEMORY is bit-identical to wasmtime ground truth. (These fixtures write
+memory and return nothing, so memory — not a return register — is the observable.)
+
+Two fixtures:
+  * init_fields  — 7 consecutive const-address stores (the straight-line #468 case;
+                   the big byte win, and the non-vacuity case for "fold fired").
+  * init_branch  — const-address stores split by a `br_if`. R11 is outside the
+                   range-reallocator pool (R0–R8) so the single entry materialization
+                   must survive the branch; this is the control-flow case a
+                   straight-line fixture cannot exercise. Run for sel=0 AND sel!=0.
+
+NON-VACUITY: aborts unless the flag-on init_fields build is strictly smaller than
+flag-off (the base was actually hoisted) and the branch sweep covers both arms.
+
+Run (needs wasmtime + unicorn + pyelftools):
+  python scripts/repro/base_cse_differential.py
+Exits nonzero on any mismatch or vacuity failure.
+"""
+import subprocess
+import sys
+
+import wasmtime
+from elftools.elf.elffile import ELFFile
+from unicorn import UC_ARCH_ARM, UC_MODE_THUMB, Uc, UcError
+from unicorn.arm_const import UC_ARM_REG_LR, UC_ARM_REG_R0, UC_ARM_REG_SP
+
+SYNTH = "./target/release/synth"
+# The optimized path materializes this absolute linear-memory base.
+LINMEM = 0x20000100
+CODE, STK, RET = 0x200000, 0x90000, 0x300000
+# Fields written by each fixture: (wasm_addr, width_bytes).
+FIELDS = [(0, 4), (4, 4), (8, 4), (12, 2), (14, 2), (16, 1), (17, 1)]
+BRANCH_FIELDS = [(0, 4), (4, 4), (8, 4), (12, 2)]
+
+
+def compile_elf(wat, out, base_cse):
+    env = {"PATH": "/usr/bin:/bin"}
+    if base_cse:
+        env["SYNTH_BASE_CSE"] = "1"
+    r = subprocess.run(
+        [SYNTH, "compile", wat, "-o", out, "-b", "arm", "--target", "cortex-m4",
+         "--all-exports"],
+        capture_output=True, text=True, env={**env},
+    )
+    if r.returncode != 0:
+        sys.exit(f"compile failed ({wat}, base_cse={base_cse}): {r.stderr}")
+
+
+def load(elf, func):
+    """Return (code_bytes, sh_addr, func_entry_offset_in_text)."""
+    f = ELFFile(open(elf, "rb"))
+    text = f.get_section_by_name(".text")
+    code, base = text.data(), text["sh_addr"]
+    if not code:
+        sys.exit(f"{elf}: .text empty")
+    # Find the function's address via the symbol table. synth emits the symtab
+    # with an empty section NAME, so look it up by section TYPE, not by ".symtab".
+    fa = None
+    for s in f.iter_sections():
+        if s.header.sh_type == "SHT_SYMTAB":
+            for sym in s.iter_symbols():
+                if sym.name == func:
+                    fa = sym["st_value"]
+                    break
+    if fa is None:
+        sys.exit(f"{elf}: symbol {func} not found")
+    return code, base, fa
+
+
+def run_arm(elf, func, params):
+    code, base, fa = load(elf, func)
+    mu = Uc(UC_ARCH_ARM, UC_MODE_THUMB)
+    mu.mem_map(CODE, 0x10000)
+    mu.mem_map(LINMEM & ~0xFFFF, 0x20000)  # covers LINMEM + field range
+    mu.mem_map(STK - 0x8000, 0x10000)
+    mu.mem_map(RET, 0x1000)
+    mu.mem_write(CODE, code)
+    mu.reg_write(UC_ARM_REG_SP, STK)
+    mu.reg_write(UC_ARM_REG_LR, RET | 1)
+    for i, p in enumerate(params):
+        mu.reg_write(UC_ARM_REG_R0 + i, p & 0xFFFFFFFF)
+    try:
+        mu.emu_start((CODE + fa - base) | 1, RET, count=100000)
+    except UcError as e:
+        return f"ERR:{e}"
+    out = {}
+    for (off, w) in (FIELDS if func == "init_fields" else BRANCH_FIELDS):
+        out[off] = int.from_bytes(mu.mem_read(LINMEM + off, w), "little")
+    return out
+
+
+def wasm_mem(wat, func, params):
+    engine = wasmtime.Engine()
+    module = wasmtime.Module(engine, open(wat, "rb").read())
+    store = wasmtime.Store(engine)
+    inst = wasmtime.Instance(store, module, [])
+    inst.exports(store)[func](store, *params)
+    mem = inst.exports(store)["memory"]
+    data = mem.read(store, 0, 64)
+    out = {}
+    for (off, w) in (FIELDS if func == "init_fields" else BRANCH_FIELDS):
+        out[off] = int.from_bytes(data[off:off + w], "little")
+    return out
+
+
+def text_len(elf):
+    return len(ELFFile(open(elf, "rb")).get_section_by_name(".text").data())
+
+
+def check(label, wat, func, params, fails):
+    off_elf, on_elf = f"/tmp/bcse_{func}_off.elf", f"/tmp/bcse_{func}_on.elf"
+    gt = wasm_mem(wat, func, params)
+    r_off = run_arm(off_elf, func, params)
+    r_on = run_arm(on_elf, func, params)
+    ok = isinstance(r_off, dict) and isinstance(r_on, dict) and r_off == gt and r_on == gt
+    fails[0] += 0 if ok else 1
+    flag = "" if ok else "  <-- MISMATCH"
+    print(f"{label} {func}{tuple(params)}: off={'ERR' if not isinstance(r_off,dict) else 'ok'} "
+          f"on={'ERR' if not isinstance(r_on,dict) else 'ok'} vs wasmtime{flag}")
+    if not ok:
+        print(f"    off={r_off}\n    on ={r_on}\n    wt ={gt}")
+
+
+def main():
+    # Compile both fixtures, both flag states.
+    compile_elf("scripts/repro/redundant_base_materialization.wat",
+                "/tmp/bcse_init_fields_off.elf", False)
+    compile_elf("scripts/repro/redundant_base_materialization.wat",
+                "/tmp/bcse_init_fields_on.elf", True)
+    compile_elf("scripts/repro/base_cse_branch.wat",
+                "/tmp/bcse_init_branch_off.elf", False)
+    compile_elf("scripts/repro/base_cse_branch.wat",
+                "/tmp/bcse_init_branch_on.elf", True)
+
+    # Non-vacuity: the hoist must actually shrink the straight-line fixture.
+    off_len = text_len("/tmp/bcse_init_fields_off.elf")
+    on_len = text_len("/tmp/bcse_init_fields_on.elf")
+    if not on_len < off_len:
+        sys.exit(f"VACUOUS: flag-on init_fields .text ({on_len}B) not < flag-off "
+                 f"({off_len}B) — base-CSE did not fire")
+
+    fails = [0]
+    # ACTIVE case: a straight-line const-address-store function. base-CSE fires;
+    # assert flag-off == flag-on == wasmtime memory.
+    check("[straight]", "scripts/repro/redundant_base_materialization.wat",
+          "init_fields", [], fails)
+
+    # DECLINE case: the optimized path's multi-block lowering is outside base-CSE's
+    # v1 scope, so the planner DISQUALIFIES any function with control flow. The
+    # correct, sufficient oracle is therefore that base-CSE is a NO-OP here:
+    # flag-on .text must be byte-identical to flag-off (base-CSE declined). This is
+    # what keeps the lever clear of the (separately-tracked) optimized-path
+    # multi-block bug — we don't execute it, we prove we never touched it.
+    with open("/tmp/bcse_init_branch_off.elf", "rb") as a:
+        off_text = ELFFile(a).get_section_by_name(".text").data()
+    with open("/tmp/bcse_init_branch_on.elf", "rb") as b:
+        on_text = ELFFile(b).get_section_by_name(".text").data()
+    if off_text == on_text:
+        print("[branch decline] init_branch: flag-on .text byte-identical to flag-off "
+              "(base-CSE correctly declined on control flow)")
+    else:
+        print("[branch decline] init_branch: FLAG-ON .text DIFFERS from flag-off "
+              "<-- base-CSE must NOT activate on control-flow functions")
+        fails[0] += 1
+
+    print(f"\ninit_fields .text {off_len}B -> {on_len}B (-{off_len - on_len}B, "
+          f"{100*(off_len-on_len)//off_len}%); base hoisted + addresses folded")
+    print("ORACLE: PASS" if fails[0] == 0 else f"ORACLE: FAIL ({fails[0]})")
+    sys.exit(1 if fails[0] else 0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/repro/redundant_base_materialization.md b/scripts/repro/redundant_base_materialization.md
index d07e36f..a416338 100644
--- a/scripts/repro/redundant_base_materialization.md
+++ b/scripts/repro/redundant_base_materialization.md
@@ -1,10 +1,9 @@
 # #468 scoping spike — redundant linear-memory-base materialization
 
 **Issue:** synth#468 · **Epic:** #242 (VCR-*) · north-star #390
-**Status:** SCOPING SPIKE (no codegen change — frozen-safe by construction).
-The byte-changing CSE is the explicitly-separate next gated step (flag-off →
-on-target cycle gate → default-on flip), exactly like the cmp→select /
-local-promotion / immediate-shift levers.
+**Status:** IMPLEMENTED flag-off (`SYNTH_BASE_CSE`) — see "Implemented" below.
+Default-on flip held for the on-target cycle gate, exactly like the cmp→select /
+local-promotion / immediate-shift / dead-frame levers.
 
 ## The pattern
 
@@ -90,14 +89,52 @@ only result checks are the out-of-CI unicorn differential + the on-target gate.
 relocatable lowering), not a free property — and the optimized-path result evidence
 is the differential, not a cargo test.
 
-## Next gated step (separate PR)
-
-1. Flag-off CSE in `optimizer_bridge.rs` (`SYNTH_BASE_CSE`, default off ⇒
-   bit-identical optimized path) — reserve a callee-saved base reg, hoist
-   `movw/movt` once per const-base store-run, rewrite stores to `[base,#off]`.
-   Soundness: only runs of stores whose base is the same compile-time constant
-   with no intervening base clobber; base reg dead-after / restored in epilogue.
-2. Differential (unicorn): optimized-path ELF, flag-off == flag-on == wasmtime;
-   the 7-store fixture is the non-vacuity case.
-3. On-target cycle gate (same protocol as the prior levers), then default-on flip
-   + re-freeze any optimized-path goldens it touches.
+## Implemented — `SYNTH_BASE_CSE` (flag-off)
+
+The scoping spike above anticipated "reserve a callee-saved base reg, hoist
+`movw/movt` once per straight-line run." The implementation found a cleaner,
+stronger invariant and is **simpler** than the per-run plan:
+
+* **R11 is realloc-immune.** `reallocate_function`'s pool is `R0–R8`; it
+  identity-preserves everything outside it. So the base lives in **R11**,
+  materialized **once at function entry**, and survives every later segment
+  untouched — no per-run re-materialization, no cross-segment remap hazard. R11
+  is also outside local promotion's `R4–R8` pool and is not the encoder scratch
+  (R12). It is reserved from every optimized-path allocator via
+  `param_reserved_regs` + the const pool.
+* **Const addresses fold into the access immediate.** `i32.store (i32.const ADDR)
+  V` → `str V,[R11,#ADDR+off]` (planner-verified single-use const address,
+  `ADDR+off ≤ imm12`), dropping the per-access `movw/movt` base **and** the now-
+  dead address materialization — the latter is the register-pressure relief that
+  makes the reserved base a net win, not a wash.
+* **A standalone planner** (`plan_base_cse`, unit-tested) decides activation:
+  ≥2 foldable const-address accesses AND every opcode in the base-CSE-safe set.
+  Any `Branch`/`CondBranch` (multi-block), `Select`, `Global*`, `MemorySize/Grow`,
+  `Call`, i64, or unenumerated op declines the whole function (`None` → unchanged
+  per-access codegen). v1 is therefore confined to single-basic-block field
+  initializers — #468's exact target — keeping it clear of the optimized path's
+  separately-tracked multi-block lowering.
+
+Result on `init_fields`: **.text 336 B → 218 B (−118 B, −35 %)**, base
+materialized once, all 7 addresses folded, matching the relocatable path's
+`str [fp,#off]` shape.
+
+### Oracle (this path has NO cargo byte-gate — frozen gate compiles `--relocatable`)
+
+1. **Flag-off bit-identical** — verified by an explicit `.text` diff of a fixture
+   corpus against a pre-change baseline binary (4/4 identical), plus the full
+   optimized-path test suite (`wast_compile` et al.) green. base-CSE is `None`
+   when the flag is unset, so off ⇒ byte-identical by construction.
+2. **Differential** (`base_cse_differential.py`, unicorn): `init_fields`
+   flag-off == flag-on == wasmtime by comparing **linear memory** (the fixture
+   returns nothing); `init_branch` asserts flag-on `.text` byte-identical to
+   flag-off (base-CSE correctly **declines** on control flow).
+3. **On-target cycle gate** (same protocol as the prior levers), then default-on
+   flip via a `SYNTH_NO_BASE_CSE` opt-out — **held for silicon**.
+
+### Follow-ups
+* Multi-block support (needs the optimized path's `block`/`br_if` lowering fixed
+  first — `init_branch` flag-off already miscompiles, independent of base-CSE; a
+  separate optimized-path control-flow bug worth its own issue).
+* Dynamic (non-const) addresses in an active function could still source the base
+  from R11 (`add R12,R11,r_addr`) instead of re-materializing — deferred.
diff --git a/scripts/repro/redundant_base_materialization.wat b/scripts/repro/redundant_base_materialization.wat
index d09bd2e..0c579e5 100644
--- a/scripts/repro/redundant_base_materialization.wat
+++ b/scripts/repro/redundant_base_materialization.wat
@@ -12,6 +12,7 @@
 ;; Generic addresses/values — exhibits the pattern, tied to nothing real.
 (module
   (memory 1)
+  (export "memory" (memory 0))
   (func (export "init_fields")
     (i32.store   (i32.const 0)  (i32.const 11))
     (i32.store   (i32.const 4)  (i32.const 22))