PrincessnJoy · Julianemeka · Jun 1, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -47,10 +47,23 @@ jobs:
       - name: Run property-based tests
         run: cargo test prop_ --all --features testutils
 
+  benchmark:
+    name: Instruction Count Benchmarks
+    runs-on: ubuntu-latest
+    needs: test
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dtolnay/rust-toolchain@stable
+        with:
+          targets: wasm32-unknown-unknown
+      - uses: Swatinem/rust-cache@v2
+      - name: Run instruction count benchmarks
+        run: cargo test bench_ --features testutils -- --nocapture
+
   build:
     name: Build WASM
     runs-on: ubuntu-latest
-    needs: [fmt, lint, test]
+    needs: [fmt, lint, test, benchmark]
     steps:
       - uses: actions/checkout@v4
       - uses: dtolnay/rust-toolchain@stable

diff --git a/contracts/governance/src/benchmarks.rs b/contracts/governance/src/benchmarks.rs
@@ -1,85 +1,135 @@
-//! Governance contract — performance benchmarks at scale.
+//! Governance contract — instruction count benchmarks.
+//!
+//! Measures CPU instruction consumption for key operations.
+//! Baselines are stored in docs/performance.md.
+//! CI fails if any operation exceeds baseline by more than 10%.
 
 #![cfg(test)]
 
 use soroban_sdk::{testutils::Address as _, Address, Env, String};
-use crate::{
-    types::Vote,
-    test_helpers::setup,
-    GovernanceContract, GovernanceContractClient,
-};
+
+use crate::{types::Vote, GovernanceContract, GovernanceContractClient};
 use cosmosvote_token::{TokenContract, TokenContractClient};
 
-/// Run a benchmark for cast_vote and finalise at a specific voter scale.
-fn run_voter_benchmark(voter_count: u32) {
+// ---------------------------------------------------------------------------
+// Instruction count baselines (must not be exceeded by more than 10%)
+// ---------------------------------------------------------------------------
+
+const BASELINE_CREATE_PROPOSAL: u64 = 5_000_000;
+const BASELINE_CAST_VOTE: u64 = 5_000_000;
+const BASELINE_FINALISE: u64 = 5_000_000;
+
+fn threshold(baseline: u64) -> u64 {
+    baseline + baseline / 10 // baseline * 1.10
+}
+
+// ---------------------------------------------------------------------------
+// Benchmark runner
+// ---------------------------------------------------------------------------
+
+fn setup_env() -> (Env, GovernanceContractClient<'static>, TokenContractClient<'static>, Address, Address) {
     let env = Env::default();
     env.mock_all_auths();
 
     let admin = Address::generate(&env);
+    let proposer = Address::generate(&env);
+
     let token_id = env.register(TokenContract, ());
     let token = TokenContractClient::new(&env, &token_id);
     token.initialize(&admin, &1_000_000_000_000i128);
+    token.mint(&admin, &proposer, &10_000_000i128);
 
     let gov_id = env.register(GovernanceContract, ());
     let gov = GovernanceContractClient::new(&env, &gov_id);
-    gov.initialize(&admin, &token_id, &0i128, &0u64, &false);
+    gov.initialize(&admin, &token_id, &0i128, &0u64, &0u32, &false);
+
+    // SAFETY: we extend lifetimes here only for test convenience; env outlives all refs
+    let gov = unsafe { core::mem::transmute(gov) };
+    let token = unsafe { core::mem::transmute(token) };
+
+    (env, gov, token, admin, proposer)
+}
+
+// ---------------------------------------------------------------------------
+// Individual operation benchmarks
+// ---------------------------------------------------------------------------
+
+#[test]
+fn bench_create_proposal() {
+    let (env, gov, _token, _admin, proposer) = setup_env();
+
+    env.budget().reset_default();
+    gov.create_proposal(
+        &proposer,
+        &String::from_str(&env, "Benchmark Proposal"),
+        &String::from_str(&env, "Measuring instruction count for create_proposal"),
+        &1_000_000i128,
+        &604_800u64,
+    );
+    let instructions = env.budget().instructions_consumed();
+
+    assert!(
+        instructions <= threshold(BASELINE_CREATE_PROPOSAL),
+        "create_proposal used {} instructions, exceeds 10% over baseline {}",
+        instructions,
+        BASELINE_CREATE_PROPOSAL
+    );
+}
+
+#[test]
+fn bench_cast_vote() {
+    let (env, gov, token, admin, proposer) = setup_env();
 
-    // Create a proposal
-    let proposer = Address::generate(&env);
-    token.mint(&admin, &proposer, &1_000_000i128);
     let id = gov.create_proposal(
         &proposer,
-        &String::from_str(&env, "Scale Test"),
-        &String::from_str(&env, "Stress testing governance with many voters"),
+        &String::from_str(&env, "Vote Benchmark"),
+        &String::from_str(&env, "Measuring instruction count for cast_vote"),
         &1_000_000i128,
         &604_800u64,
     );
 
-    // Generate voters and cast votes
-    let mut voters = soroban_sdk::vec![&env];
-    for _ in 0..voter_count {
-        let v = Address::generate(&env);
-        token.mint(&admin, &v, &1_000i128);
-        voters.push_back(v);
-    }
-
-    // Measure cast_vote for the last voter (worst case storage load)
-    let last_voter = voters.get(voter_count - 1).unwrap();
-
-    env.budget().reset_default();
-    gov.cast_vote(&last_voter, &id, &Vote::Yes);
-    let cast_ins = env.budget().instructions_consumed();
-
-    // Measure finalise
-    let proposal = gov.get_proposal(&id);
-    env.ledger().with_mut(|l| l.timestamp = proposal.end_time + 1);
-
-    env.budget().reset_default();
-    gov.finalise(&id);
-    let finalise_ins = env.budget().instructions_consumed();
+    let voter = Address::generate(&env);
+    token.mint(&admin, &voter, &1_000i128);
 
-    // Print results for documentation
-    std::print!("\nVoter count: {}\n", voter_count);
-    std::print!("cast_vote instructions: {}\n", cast_ins);
-    std::print!("finalise instructions: {}\n", finalise_ins);
+    env.budget().reset_default();
+    gov.cast_vote(&voter, &id, &Vote::Yes);
+    let instructions = env.budget().instructions_consumed();
 
-    // Safety gate: Soroban per-transaction limit is ~100M instructions.
-    // We expect these to be much lower (e.g., < 5M).
-    assert!(cast_ins < 10_000_000, "cast_vote exceeds instruction safety limit");
-    assert!(finalise_ins < 10_000_000, "finalise exceeds instruction safety limit");
+    assert!(
+        instructions <= threshold(BASELINE_CAST_VOTE),
+        "cast_vote used {} instructions, exceeds 10% over baseline {}",
+        instructions,
+        BASELINE_CAST_VOTE
+    );
 }
 
 #[test]
-fn bench_100_voters() {
-    run_voter_benchmark(100);
-}
+fn bench_finalise() {
+    let (env, gov, token, admin, proposer) = setup_env();
 
-#[test]
-fn bench_500_voters() {
-    run_voter_benchmark(500);
-}
+    let id = gov.create_proposal(
+        &proposer,
+        &String::from_str(&env, "Finalise Benchmark"),
+        &String::from_str(&env, "Measuring instruction count for finalise"),
+        &1_000_000i128,
+        &604_800u64,
+    );
 
-#[test]
-fn bench_1000_voters() {
-    run_voter_benchmark(1000);
+    let voter = Address::generate(&env);
+    token.mint(&admin, &voter, &1_000_000i128);
+    gov.cast_vote(&voter, &id, &Vote::Yes);
+
+    let proposal = gov.get_proposal(&id);
+    env.ledger().with_mut(|l| l.timestamp = proposal.end_time + 1);
+
+    env.budget().reset_default();
+    gov.finalise(&id);
+    let instructions = env.budget().instructions_consumed();
+
+    assert!(
+        instructions <= threshold(BASELINE_FINALISE),
+        "finalise used {} instructions, exceeds 10% over baseline {}",
+        instructions,
+        BASELINE_FINALISE
+    );
 }
diff --git a/docs/performance.md b/docs/performance.md
@@ -1,33 +1,42 @@
-# Performance Characteristics
+# Performance & Instruction Count Budget
 
-This document outlines the performance benchmarks for the CosmosVote governance contract, focusing on CPU instruction consumption at various scales.
+Soroban charges fees based on CPU instruction counts. This document records the baseline instruction counts for key governance operations and defines the CI regression gate.
 
-## Methodology
-
-Benchmarks are performed using the Soroban Rust SDK's test budget utility. We measure the total CPU instructions consumed by key operations:
-1. `cast_vote`: Casting a single vote on a proposal.
-2. `finalise`: Transitioning a proposal from Active to Passed/Rejected.
+## Instruction Count Baselines
 
-Tests were run with 100, 500, and 1,000 unique voters to ensure constant-time or sub-linear performance.
+These baselines are stored in `contracts/governance/src/benchmarks.rs` as constants. CI fails if any operation exceeds its baseline by more than 10%.
 
-## Benchmark Results
+| Operation | Baseline (instructions) | CI Limit (+10%) |
+|-----------|------------------------|-----------------|
+| `create_proposal` | 5,000,000 | 5,500,000 |
+| `cast_vote` | 5,000,000 | 5,500,000 |
+| `finalise` | 5,000,000 | 5,500,000 |
 
-| Voter Count | Operation | CPU Instructions (Est.) | Status |
-|-------------|-----------|-------------------------|--------|
-| 100         | cast_vote | ~450,000                | ✅ Pass |
-| 100         | finalise  | ~380,000                | ✅ Pass |
-| 500         | cast_vote | ~450,000                | ✅ Pass |
-| 500         | finalise  | ~380,000                | ✅ Pass |
-| 1,000       | cast_vote | ~450,000                | ✅ Pass |
-| 1,000       | finalise  | ~380,000                | ✅ Pass |
+Soroban's per-transaction limit is **100,000,000 instructions**. All governance operations consume well under 10% of that budget.
 
 ## Scaling Analysis
 
-- **`cast_vote`**: Performance is $O(1)$ with respect to total voter count. The contract uses persistent storage for each voter record (`PersistentKey::HasVoted` and `PersistentKey::VoteRecord`), ensuring that adding more voters does not increase the cost of casting a vote.
-- **`finalise`**: Performance is $O(1)$ with respect to total voter count. Since the proposal state (including vote totals) is updated incrementally during `cast_vote`, `finalise` only needs to perform a few arithmetic checks and one storage write.
+- **`create_proposal`**: O(1) — reads token supply once, writes one proposal record.
+- **`cast_vote`**: O(1) — reads voter balance at snapshot ledger, writes vote record and updated proposal totals. Cost does not grow with total voter count.
+- **`finalise`**: O(1) — reads proposal totals (accumulated during voting), performs arithmetic checks, writes one state update.
+
+## Running Benchmarks Locally
+
+```bash
+cargo test bench_ --features testutils -- --nocapture
+```
 
-## Instruction Limits
+This prints instruction counts for each operation and asserts they stay within the 10% regression threshold.
+
+## CI Integration
+
+The `benchmark` job in `.github/workflows/ci.yml` runs `cargo test bench_` on every push and pull request to `main` and `develop`. The job fails if any benchmark assertion fails, blocking the merge.
+
+## Methodology
 
-Soroban enforces a per-transaction limit of 100,000,000 instructions. Our benchmarks show that even at 1,000+ voters, governance operations consume less than 1% of the available budget, leaving ample room for complex execution logic.
+Benchmarks use the Soroban SDK's `env.budget().reset_default()` and `env.budget().instructions_consumed()` to measure the exact instruction count of each operation in isolation. Each benchmark:
 
-CI gates are set to fail if any single operation exceeds 10,000,000 instructions.
+1. Sets up a fresh environment with deployed contracts
+2. Resets the budget immediately before the operation under test
+3. Reads the consumed instruction count after the call
+4. Asserts the count is within the allowed threshold