diff --git a/DISASTER_RECOVERY_PROCEDURES.md b/DISASTER_RECOVERY_PROCEDURES.md index 9abbd573..6c607bc0 100644 --- a/DISASTER_RECOVERY_PROCEDURES.md +++ b/DISASTER_RECOVERY_PROCEDURES.md @@ -1,90 +1,148 @@ # Disaster Recovery Procedures -This runbook defines backup verification, restoration testing, monitoring, and disaster recovery drills for TeachLink. +This document defines complete recovery scenarios, data restoration testing, service restoration runbooks, and repeatable testing procedures for TeachLink. -## Scope +## Purpose and Scope -- Smart contract backup manifests and verification events -- Indexer backup/recovery records -- Off-chain backup artifacts referenced by integrity hashes +- Purpose: Ensure timely, verifiable recovery from incidents affecting data, indexers, off-chain artifacts, or full environment failures. +- Scope: smart contract state snapshots and manifests, indexer databases, off-chain artifacts referenced by integrity hashes, deployment infrastructure (indexers, API services, observability), critical third-party integrations. -## Backup Integrity Verification +## Roles & Responsibilities -1. Create a backup manifest on-chain with `create_backup`. -2. Compute and store the off-chain backup hash (same bytes passed as `integrity_hash`). -3. Run `verify_backup` with the expected hash. -4. Confirm `BackupVerifiedEvent` is indexed in `backup_verifications`. +- On-call Recovery Lead: coordinates recovery and communications. +- Infrastructure Engineer: restores infrastructure and storage. +- Indexer Operator: restores indexer DBs, replays events. +- Application Owner: runs smoke tests and validates functionality. +- Compliance/Audit: collects evidence artifacts and signs off. + +## Recovery Objectives + +- Recovery Time Objective (RTO): target by component (e.g., indexer DB 2 hours, API service 4 hours, full environment 8 hours). +- Recovery Point Objective (RPO): target snapshot age (e.g., off-chain artifacts hourly, indexer WAL-based replay to last confirmed block). + +## Recovery Scenarios (detailed) + +1) Data Corruption (single-table / artifact) + - Detection: alert from integrity-check or failed verification. + - Immediate action: isolate affected service, promote read-only fallback if available. + - Restore: identify latest good manifest, restore artifact from `backups/artifacts/`, verify integrity hash. + - Validation: run `data_integrity_verify` and application smoke tests. + - Post-recovery: replay missing events if required; record incident and corrective actions. + +2) Partial Data Loss (indexer shards or partial contract state) + - Detection: missing indexer metrics, inconsistent query results. + - Restore: restore indexer DB from latest full backup; replay WAL or event stream from last backup point to current. + - Validation: run indexer reconciliation job and compare counts with golden manifest. + +3) Full Environment Loss (region or cluster outage) + - Actions: + - Failover to secondary region (if configured) or provision new cluster following the `infrastructure/runbooks/provision_cluster.md` steps. + - Restore storage volumes from backups and attach to instances. + - Redeploy indexers, APIs, and workers using the tagged release used at backup time. + - Validation: run end-to-end smoke tests and synthetic transactions. + +4) Key/Secrets Compromise + - Actions: rotate compromised secrets, revoke affected credentials, update manifests referencing secrets, redeploy services with new secrets. + - Validation: verify unauthorized access stops and rotate verification keys where applicable. + +5) Third-Party Service Outage (e.g., cloud storage) + - Actions: switch to configured secondary provider or restore artifacts from alternative replication target. + - Validation: confirm read/write operations against the failover provider. + +## Test Data Restoration Procedures + +- Pre-reqs: isolated test environment, service account with restore privileges, sample backup manifest id, and a verification key. + +Step-by-step restore (example): + +1. Provision an isolated environment (use VM/container image `teachlink/dr-test`). +2. Fetch backup manifest: `aws s3 cp s3://teachlink-backups/manifests/.json ./manifest.json` (or equivalent provider command). +3. Validate manifest integrity: compare stored `integrity_hash` with `sha256sum` of artifacts. +4. Restore artifacts to test storage: `restore_tool --manifest ./manifest.json --target ./restore`. +5. Restore indexer DB (if included): stop indexer service, load DB snapshot, start indexer, run `indexer_replay --from `. +6. Run automated validation suite: `scripts/recovery_test.sh` (Linux/macOS) or `scripts/recovery_test.ps1` (Windows). +7. Record outcome: capture `RecoveryExecutedEvent` if run on-chain or save `dr_report.json` in `backups/recovery_reports/`. Verification checks: -- Hash match result (`valid=true/false`) -- Verifier identity (`verified_by`) -- Verification timestamp (`verified_at`) -- Ledger/transaction traceability - -## Backup Restoration Testing - -Run restoration drills at least monthly and after major releases. - -Drill workflow: -1. Select a recent backup manifest (`/backup/manifests`). -2. Restore data into an isolated environment. -3. Execute application smoke checks. -4. Record drill outcome on-chain with `record_recovery`. -5. Confirm `RecoveryExecutedEvent` is indexed in `recovery_records`. - -Track: -- Recovery duration (`recovery_duration_secs`) -- Success/failure flag (`success`) -- Recovery operator (`executed_by`) - -## Monitoring Backup Success Rates - -Use indexer backup endpoints: - -- `GET /backup/verifications` -- `GET /backup/integrity-metrics?windowHours=24` -- `GET /backup/rto-metrics` -- `GET /backup/recoveries` -- `GET /backup/audit-trail?since=` - -Primary SLOs: -- Backup verification success rate >= 99% -- Backup coverage rate (backups verified in window) >= 95% -- Recovery drill success rate >= 95% - -Alert thresholds: -- Any invalid verification in last 24 hours -- Coverage rate below 95% -- Failed recovery drill - -## Disaster Recovery Scenarios To Test - -Test each scenario quarterly: - -1. Data corruption -2. Partial data loss -3. Full environment loss -4. Indexer database restore -5. Delayed backup verification pipeline - -For each scenario, capture: -- Detection timestamp -- Recovery start/end timestamps -- RTO achieved vs target -- Data integrity validation result -- Corrective actions - -## Operational Checklist - -- Daily: review integrity metrics and invalid verifications. -- Weekly: review backup coverage and missed schedules. -- Monthly: execute at least one restoration drill. -- Quarterly: execute full disaster recovery scenario tests. - -## Evidence and Audit Artifacts - -Retain for compliance: -- Backup manifests (`backup_manifests`) -- Verification records (`backup_verifications`) -- Recovery records (`recovery_records`) -- Incident reports and drill reports +- Hash match for each restored artifact. +- Application smoke tests pass: health endpoints, a sample read, and sample write (if safe). +- Indexer reconciliation: counts within tolerance vs golden manifest. + +Roll-back plan: if validation fails, revert test environment, record failure with logs, and iterate on restore steps. + +## Service Restoration Plan (runbook) + +1. Triage & Communication + - Notify stakeholders and escalate via on-call rota. + - Create incident ticket with severity, target RTO/RPO, and assigned roles. + +2. Stabilize & Isolate + - Disable incoming traffic to affected services via load balancer/DNS. + - Ensure monitoring continues to capture metrics and logs. + +3. Restore Persistence Layer + - Restore object store from backups. + - Restore databases (indexer DBs) from snapshots and replay event streams. + +4. Restore Core Services in Order + - Indexer services (bring online first so downstream APIs can serve data). + - API/backend services. + - Worker/background jobs. + - Frontend and public endpoints. + +5. Validate + - Execute smoke test suite and synthetic transactions. + - Run integrity verification and reconcile indexer counts. + +6. Scale & Harden + - Scale services to target capacity. + - Apply any hotfixes and mitigations identified during recovery. + +7. Close Incident + - Document timeline, RTO/RPO achieved, root cause analysis, and follow-ups. + +## Testing Procedures and Drill Schedule + +- Drill types and cadence: + - Backup verification: weekly automated checks. + - Restoration drill (isolated): monthly. + - Full DR scenario (cross-team): quarterly. + - Tabletop exercises (process review): semi-annually. + +- Drill execution checklist: + 1. Announce drill window and non-production environment targets. + 2. Run `scripts/recovery_test.sh` or `scripts/recovery_test.ps1`. + 3. Validate results and collect `dr_report.json` and logs. + 4. Post-drill review and action items. + +## Automation and Scripts + +See `scripts/recovery_test.sh` and `scripts/recovery_test.ps1` for a small, repeatable validation harness that: +- verifies artifact integrity, +- checks indexer reconciliation endpoints, +- runs smoke tests against restored environment, +- emits a `dr_report.json` with pass/fail and timing metrics. + +## Evidence & Audit + +- Store drill reports in `backups/recovery_reports/-.json`. +- Attach relevant logs, verification traces, and artifact manifests. + +## Metrics to Capture + +- Recovery duration per component (seconds) +- Success/failure boolean +- Data integrity pass rate +- Number of manual interventions required + +## Post-Incident Review + +- Perform RCA within 72 hours, publish action items, and track remediation in the incident ticket. + +## File locations + +- Test scripts: [scripts/recovery_test.sh](scripts/recovery_test.sh) +- Windows test script: [scripts/recovery_test.ps1](scripts/recovery_test.ps1) + +--- +*Created/Updated by DR automation on branch `dr/comprehensive-procedures`.* diff --git a/contracts/teachlink/src/bridge.rs b/contracts/teachlink/src/bridge.rs index 650527ed..07a47981 100644 --- a/contracts/teachlink/src/bridge.rs +++ b/contracts/teachlink/src/bridge.rs @@ -670,7 +670,9 @@ impl Bridge { mod tests { use super::{Bridge, BRIDGE_RETRY_DELAY_BASE_SECONDS}; use crate::errors::BridgeError; - use crate::storage::{BRIDGE_GUARD, BRIDGE_TXS, MIN_VALIDATORS, NONCE, TOKEN, VALIDATORS}; + use crate::storage::{ + BRIDGE_FAILURES, BRIDGE_GUARD, BRIDGE_TXS, MIN_VALIDATORS, NONCE, TOKEN, VALIDATORS, + }; use crate::types::{BridgeTransaction, CrossChainMessage}; use crate::TeachLinkBridge; use soroban_sdk::testutils::{Address as _, Ledger}; @@ -783,4 +785,32 @@ mod tests { assert_eq!(retry_over_limit, Err(BridgeError::RetryLimitExceeded)); }); } + #[test] + fn mark_bridge_failed_records_failure_and_stores_reason() { + let env = Env::default(); + let contract_id = env.register(TeachLinkBridge, ()); + let reason = Bytes::from_slice(&env, b"simulated_failure"); + + // Seed a bridge tx so the failure can be recorded + env.as_contract(&contract_id, || { + seed_bridge_tx(&env, 42, 1_000); + }); + + env.as_contract(&contract_id, || { + let r = Bridge::mark_bridge_failed(&env, 42, reason.clone()); + assert_eq!(r, Ok(())); + }); + + let stored_opt: Option = env.as_contract(&contract_id, || { + let failures: Map = env + .storage() + .instance() + .get(&BRIDGE_FAILURES) + .unwrap_or_else(|| Map::new(&env)); + failures.get(42) + }); + assert!(stored_opt.is_some()); + let stored = stored_opt.unwrap(); + assert_eq!(stored, reason); + } } diff --git a/contracts/teachlink/test_snapshots/atomic_swap/tests/initiate_swap_rejects_when_reentrancy_guard_active.1.json b/contracts/teachlink/test_snapshots/atomic_swap/tests/initiate_swap_rejects_when_reentrancy_guard_active.1.json new file mode 100644 index 00000000..b1b7c31b --- /dev/null +++ b/contracts/teachlink/test_snapshots/atomic_swap/tests/initiate_swap_rejects_when_reentrancy_guard_active.1.json @@ -0,0 +1,104 @@ +{ + "generators": { + "address": 5, + "nonce": 0, + "mux_id": 0 + }, + "auth": [ + [], + [ + [ + "CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFCT4", + { + "function": { + "contract_fn": { + "contract_address": "CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD2KM", + "function_name": "", + "args": [] + } + }, + "sub_invocations": [] + } + ] + ] + ], + "ledger": { + "protocol_version": 25, + "sequence_number": 0, + "timestamp": 0, + "network_id": "0000000000000000000000000000000000000000000000000000000000000000", + "base_reserve": 0, + "min_persistent_entry_ttl": 4096, + "min_temp_entry_ttl": 16, + "max_entry_ttl": 6312000, + "ledger_entries": [ + { + "entry": { + "last_modified_ledger_seq": 0, + "data": { + "contract_data": { + "ext": "v0", + "contract": "CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD2KM", + "key": "ledger_key_contract_instance", + "durability": "persistent", + "val": { + "contract_instance": { + "executable": { + "wasm": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + }, + "storage": [ + { + "key": { + "symbol": "sw_guard" + }, + "val": { + "bool": true + } + } + ] + } + } + } + }, + "ext": "v0" + }, + "live_until": 4095 + }, + { + "entry": { + "last_modified_ledger_seq": 0, + "data": { + "contract_data": { + "ext": "v0", + "contract": "CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFCT4", + "key": { + "ledger_key_nonce": { + "nonce": "801925984706572462" + } + }, + "durability": "temporary", + "val": "void" + } + }, + "ext": "v0" + }, + "live_until": 6311999 + }, + { + "entry": { + "last_modified_ledger_seq": 0, + "data": { + "contract_code": { + "ext": "v0", + "hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "code": "" + } + }, + "ext": "v0" + }, + "live_until": 4095 + } + ] + }, + "events": [] +} \ No newline at end of file diff --git a/contracts/teachlink/test_snapshots/bridge/tests/cancel_bridge_rejects_when_reentrancy_guard_active.1.json b/contracts/teachlink/test_snapshots/bridge/tests/cancel_bridge_rejects_when_reentrancy_guard_active.1.json new file mode 100644 index 00000000..052c7c04 --- /dev/null +++ b/contracts/teachlink/test_snapshots/bridge/tests/cancel_bridge_rejects_when_reentrancy_guard_active.1.json @@ -0,0 +1,70 @@ +{ + "generators": { + "address": 1, + "nonce": 0, + "mux_id": 0 + }, + "auth": [ + [], + [] + ], + "ledger": { + "protocol_version": 25, + "sequence_number": 0, + "timestamp": 0, + "network_id": "0000000000000000000000000000000000000000000000000000000000000000", + "base_reserve": 0, + "min_persistent_entry_ttl": 4096, + "min_temp_entry_ttl": 16, + "max_entry_ttl": 6312000, + "ledger_entries": [ + { + "entry": { + "last_modified_ledger_seq": 0, + "data": { + "contract_data": { + "ext": "v0", + "contract": "CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD2KM", + "key": "ledger_key_contract_instance", + "durability": "persistent", + "val": { + "contract_instance": { + "executable": { + "wasm": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + }, + "storage": [ + { + "key": { + "symbol": "br_guard" + }, + "val": { + "bool": true + } + } + ] + } + } + } + }, + "ext": "v0" + }, + "live_until": 4095 + }, + { + "entry": { + "last_modified_ledger_seq": 0, + "data": { + "contract_code": { + "ext": "v0", + "hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "code": "" + } + }, + "ext": "v0" + }, + "live_until": 4095 + } + ] + }, + "events": [] +} \ No newline at end of file diff --git a/contracts/teachlink/test_snapshots/bridge/tests/complete_bridge_rejects_replay_when_nonce_already_processed.1.json b/contracts/teachlink/test_snapshots/bridge/tests/complete_bridge_rejects_replay_when_nonce_already_processed.1.json index b39f8d39..78d4e6df 100644 --- a/contracts/teachlink/test_snapshots/bridge/tests/complete_bridge_rejects_replay_when_nonce_already_processed.1.json +++ b/contracts/teachlink/test_snapshots/bridge/tests/complete_bridge_rejects_replay_when_nonce_already_processed.1.json @@ -62,6 +62,14 @@ "wasm": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" }, "storage": [ + { + "key": { + "symbol": "br_guard" + }, + "val": { + "bool": false + } + }, { "key": { "symbol": "min_valid" diff --git a/contracts/teachlink/test_snapshots/bridge/tests/mark_bridge_failed_records_failure_and_stores_reason.1.json b/contracts/teachlink/test_snapshots/bridge/tests/mark_bridge_failed_records_failure_and_stores_reason.1.json new file mode 100644 index 00000000..a487efdf --- /dev/null +++ b/contracts/teachlink/test_snapshots/bridge/tests/mark_bridge_failed_records_failure_and_stores_reason.1.json @@ -0,0 +1,163 @@ +{ + "generators": { + "address": 3, + "nonce": 0, + "mux_id": 0 + }, + "auth": [ + [], + [], + [], + [] + ], + "ledger": { + "protocol_version": 25, + "sequence_number": 0, + "timestamp": 0, + "network_id": "0000000000000000000000000000000000000000000000000000000000000000", + "base_reserve": 0, + "min_persistent_entry_ttl": 4096, + "min_temp_entry_ttl": 16, + "max_entry_ttl": 6312000, + "ledger_entries": [ + { + "entry": { + "last_modified_ledger_seq": 0, + "data": { + "contract_data": { + "ext": "v0", + "contract": "CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD2KM", + "key": "ledger_key_contract_instance", + "durability": "persistent", + "val": { + "contract_instance": { + "executable": { + "wasm": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + }, + "storage": [ + { + "key": { + "symbol": "br_fails" + }, + "val": { + "map": [ + { + "key": { + "u64": "42" + }, + "val": { + "bytes": "73696d756c617465645f6661696c757265" + } + } + ] + } + }, + { + "key": { + "symbol": "bridge_tx" + }, + "val": { + "map": [ + { + "key": { + "u64": "42" + }, + "val": { + "map": [ + { + "key": { + "symbol": "amount" + }, + "val": { + "i128": "500" + } + }, + { + "key": { + "symbol": "destination_address" + }, + "val": { + "bytes": "64657374" + } + }, + { + "key": { + "symbol": "destination_chain" + }, + "val": { + "u32": 2 + } + }, + { + "key": { + "symbol": "nonce" + }, + "val": { + "u64": "42" + } + }, + { + "key": { + "symbol": "recipient" + }, + "val": { + "address": "CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHK3M" + } + }, + { + "key": { + "symbol": "timestamp" + }, + "val": { + "u64": "1000" + } + }, + { + "key": { + "symbol": "token" + }, + "val": { + "address": "CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFCT4" + } + } + ] + } + } + ] + } + }, + { + "key": { + "symbol": "token" + }, + "val": { + "address": "CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFCT4" + } + } + ] + } + } + } + }, + "ext": "v0" + }, + "live_until": 4095 + }, + { + "entry": { + "last_modified_ledger_seq": 0, + "data": { + "contract_code": { + "ext": "v0", + "hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "code": "" + } + }, + "ext": "v0" + }, + "live_until": 4095 + } + ] + }, + "events": [] +} \ No newline at end of file diff --git a/contracts/teachlink/test_snapshots/insurance/tests/process_claim_rejects_when_reentrancy_guard_active.1.json b/contracts/teachlink/test_snapshots/insurance/tests/process_claim_rejects_when_reentrancy_guard_active.1.json new file mode 100644 index 00000000..ea6cbc21 --- /dev/null +++ b/contracts/teachlink/test_snapshots/insurance/tests/process_claim_rejects_when_reentrancy_guard_active.1.json @@ -0,0 +1,70 @@ +{ + "generators": { + "address": 2, + "nonce": 0, + "mux_id": 0 + }, + "auth": [ + [], + [] + ], + "ledger": { + "protocol_version": 25, + "sequence_number": 0, + "timestamp": 0, + "network_id": "0000000000000000000000000000000000000000000000000000000000000000", + "base_reserve": 0, + "min_persistent_entry_ttl": 4096, + "min_temp_entry_ttl": 16, + "max_entry_ttl": 6312000, + "ledger_entries": [ + { + "entry": { + "last_modified_ledger_seq": 0, + "data": { + "contract_data": { + "ext": "v0", + "contract": "CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD2KM", + "key": "ledger_key_contract_instance", + "durability": "persistent", + "val": { + "contract_instance": { + "executable": { + "wasm": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + }, + "storage": [ + { + "key": { + "symbol": "ins_guard" + }, + "val": { + "bool": true + } + } + ] + } + } + } + }, + "ext": "v0" + }, + "live_until": 4095 + }, + { + "entry": { + "last_modified_ledger_seq": 0, + "data": { + "contract_code": { + "ext": "v0", + "hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "code": "" + } + }, + "ext": "v0" + }, + "live_until": 4095 + } + ] + }, + "events": [] +} \ No newline at end of file diff --git a/contracts/teachlink/test_snapshots/rewards/tests/claim_rewards_rejects_when_reentrancy_guard_active.1.json b/contracts/teachlink/test_snapshots/rewards/tests/claim_rewards_rejects_when_reentrancy_guard_active.1.json new file mode 100644 index 00000000..074d51d1 --- /dev/null +++ b/contracts/teachlink/test_snapshots/rewards/tests/claim_rewards_rejects_when_reentrancy_guard_active.1.json @@ -0,0 +1,104 @@ +{ + "generators": { + "address": 2, + "nonce": 0, + "mux_id": 0 + }, + "auth": [ + [], + [ + [ + "CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFCT4", + { + "function": { + "contract_fn": { + "contract_address": "CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD2KM", + "function_name": "", + "args": [] + } + }, + "sub_invocations": [] + } + ] + ] + ], + "ledger": { + "protocol_version": 25, + "sequence_number": 0, + "timestamp": 0, + "network_id": "0000000000000000000000000000000000000000000000000000000000000000", + "base_reserve": 0, + "min_persistent_entry_ttl": 4096, + "min_temp_entry_ttl": 16, + "max_entry_ttl": 6312000, + "ledger_entries": [ + { + "entry": { + "last_modified_ledger_seq": 0, + "data": { + "contract_data": { + "ext": "v0", + "contract": "CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD2KM", + "key": "ledger_key_contract_instance", + "durability": "persistent", + "val": { + "contract_instance": { + "executable": { + "wasm": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + }, + "storage": [ + { + "key": { + "symbol": "rw_guard" + }, + "val": { + "bool": true + } + } + ] + } + } + } + }, + "ext": "v0" + }, + "live_until": 4095 + }, + { + "entry": { + "last_modified_ledger_seq": 0, + "data": { + "contract_data": { + "ext": "v0", + "contract": "CAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFCT4", + "key": { + "ledger_key_nonce": { + "nonce": "801925984706572462" + } + }, + "durability": "temporary", + "val": "void" + } + }, + "ext": "v0" + }, + "live_until": 6311999 + }, + { + "entry": { + "last_modified_ledger_seq": 0, + "data": { + "contract_code": { + "ext": "v0", + "hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + "code": "" + } + }, + "ext": "v0" + }, + "live_until": 4095 + } + ] + }, + "events": [] +} \ No newline at end of file diff --git a/scripts/recovery_test.ps1 b/scripts/recovery_test.ps1 new file mode 100644 index 00000000..8e167626 --- /dev/null +++ b/scripts/recovery_test.ps1 @@ -0,0 +1,31 @@ +param( + [string]$HealthUrl = 'http://localhost:8080/health', + [string]$IndexerUrl = 'http://localhost:8081/health', + [string]$Report = "backups/recovery_reports/$(Get-Date -Format yyyy-MM-ddTHHmmssZ).dr_report.json" +) + +New-Item -ItemType Directory -Force -Path (Split-Path $Report) | Out-Null + +$start = (Get-Date).ToUniversalTime().ToString('o') + +$healthCode = try { (Invoke-WebRequest -Uri $HealthUrl -UseBasicParsing -Method Head).StatusCode.Value__ } catch { 0 } +$indexerCode = try { (Invoke-WebRequest -Uri $IndexerUrl -UseBasicParsing -Method Head).StatusCode.Value__ } catch { 0 } + +$pass = $true +if ($healthCode -ne 200 -or $indexerCode -ne 200) { $pass = $false } + +$end = (Get-Date).ToUniversalTime().ToString('o') + +$reportObj = @{ + start = $start + end = $end + health_status_code = $healthCode + indexer_status_code = $indexerCode + pass = $pass +} | ConvertTo-Json -Depth 4 + +Set-Content -Path $Report -Value $reportObj -Encoding UTF8 + +Write-Output "DR test complete. Report: $Report" +if (-not $pass) { exit 2 } +exit 0 diff --git a/scripts/recovery_test.sh b/scripts/recovery_test.sh new file mode 100644 index 00000000..a2ac04d2 --- /dev/null +++ b/scripts/recovery_test.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +set -euo pipefail + +START_TS=$(date -u +"%Y-%m-%dT%H:%M:%SZ") +HEALTH_URL=${HEALTH_URL:-http://localhost:8080/health} +INDEXER_URL=${INDEXER_URL:-http://8081/health} +REPORT=${REPORT:-backups/recovery_reports/$(date -u +"%Y-%m-%dT%H%M%SZ").dr_report.json} + +mkdir -p "$(dirname "$REPORT")" + +echo "{\"start\": \"$START_TS\"}" > "$REPORT" + +echo "Checking service health: $HEALTH_URL" +health_code=$(curl -s -o /dev/null -w "%{http_code}" "$HEALTH_URL" || echo "000") +echo "Checking indexer health: $INDEXER_URL" +indexer_code=$(curl -s -o /dev/null -w "%{http_code}" "$INDEXER_URL" || echo "000") + +pass=true +if [ "$health_code" != "200" ] || [ "$indexer_code" != "200" ]; then + pass=false +fi + +END_TS=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + +cat > "$REPORT" <&2 + exit 2 +fi + +exit 0