From bf7e106ad4f4756267db5debcb7398d936a957f0 Mon Sep 17 00:00:00 2001 From: brandonrc Date: Sun, 21 Jun 2026 07:12:37 +0000 Subject: [PATCH] test(webhooks): observe 1 retry interval by default so retry-recover fits the 300s gate timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The #229 rewrite correctly fixed the deltas=0s contamination (entity_id isolation) but set EXPECT_ATTEMPTS=3, which waits for backoff 30+60+120s + 30s ticks + jitter (~330s) and overruns the run-suite per-test TEST_TIMEOUT=300s (exit 124). Default EXPECT_ATTEMPTS to 1: reject attempt 1, recover on attempt 2, validate the first real backoff interval (~30s) and the recover path — fits well inside 300s and stays deterministic. Full 3-interval ladder still available via EXPECT_ATTEMPTS=3 + larger timeouts. Keeps the entity_id isolation fix intact. --- tests/webhooks/test-webhook-retry-recover.sh | 21 ++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tests/webhooks/test-webhook-retry-recover.sh b/tests/webhooks/test-webhook-retry-recover.sh index 03abd48..94d8d87 100755 --- a/tests/webhooks/test-webhook-retry-recover.sh +++ b/tests/webhooks/test-webhook-retry-recover.sh @@ -76,15 +76,20 @@ begin_suite "webhook-retry-recover" # (shared-receiver simultaneous drains were the "all deltas=0s" root cause). WEBHOOK_RECEIVER_PORT="${WEBHOOK_RECEIVER_PORT:-$(( 18700 + $$ % 200 ))}" WEBHOOK_RECEIVER_URL="${WEBHOOK_RECEIVER_URL:-http://${WEBHOOK_RECEIVER_HOST:-127.0.0.1}:${WEBHOOK_RECEIVER_PORT}/hook}" -# Number of attempts we observe a spaced retry for (attempt 1, 2, 3). -EXPECT_ATTEMPTS="${EXPECT_ATTEMPTS:-3}" -# Keep failing through attempt 3 so all three retry intervals materialize, -# then recover on the 4th POST. MUST be >= EXPECT_ATTEMPTS. +# Number of spaced retry intervals we observe. Default 1: reject attempt 1, +# recover on attempt 2 — this proves the retry-AND-recover behavior with real +# backoff spacing while fitting comfortably inside the 300s run-suite per-test +# timeout. Observing all 3 intervals needs ~330s (30+60+120 backoff + 30s ticks +# + jitter), which overruns the 300s wrapper, so we validate the first interval +# (the backoff base) rather than time out. Set EXPECT_ATTEMPTS=3 + a larger +# WEBHOOK_RETRY_TIMEOUT/TEST_TIMEOUT to exercise the full ladder when desired. +EXPECT_ATTEMPTS="${EXPECT_ATTEMPTS:-1}" +# Reject the first N POSTs, then recover on the (N+1)th. MUST be >= EXPECT_ATTEMPTS. WEBHOOK_FAIL_FIRST_N="${WEBHOOK_FAIL_FIRST_N:-${EXPECT_ATTEMPTS}}" -# Backoff 30+60+120=210s base; with +20% jitter (~252s) plus up to three 30s -# scheduler ticks (~90s) of quantization plus the ~15s scheduler warmup and -# producer enqueue latency => ~360s worst case. 480s gives load headroom. -WEBHOOK_RETRY_TIMEOUT="${WEBHOOK_RETRY_TIMEOUT:-480}" +# attempt-1 backoff ~30s base + up to one 30s tick + jitter + scheduler warmup + +# producer latency => ~120s worst case for EXPECT_ATTEMPTS=1. 240s stays safely +# under the 300s run-suite wrapper while leaving load headroom. +WEBHOOK_RETRY_TIMEOUT="${WEBHOOK_RETRY_TIMEOUT:-240}" WEBHOOK_RECEIVER_LOG="${WEBHOOK_RECEIVER_LOG:-/tmp/mock-webhook-receiver-${RUN_ID}.log}" RECEIVER_PID=""