diff --git a/internal/layers/enrollment.go b/internal/layers/enrollment.go index d418ec442..4e00aef04 100644 --- a/internal/layers/enrollment.go +++ b/internal/layers/enrollment.go @@ -15,6 +15,17 @@ const ( // repoMaintenanceWorkflow is the workflow file that handles enrollment. repoMaintenanceWorkflow = "repo-maintenance.yml" + + // enrollmentWaitTimeout is the maximum time to wait for the + // repo-maintenance workflow run to appear and complete. + enrollmentWaitTimeout = 3 * time.Minute + + // enrollmentPollInitial is the initial polling interval for + // workflow run status checks. + enrollmentPollInitial = 2 * time.Second + + // enrollmentPollMax is the maximum polling interval (backoff cap). + enrollmentPollMax = 15 * time.Second ) // EnrollmentLayer monitors workflow-driven enrollment of target repos. @@ -82,11 +93,11 @@ func (l *EnrollmentLayer) Install(ctx context.Context) error { } l.ui.StepDone("dispatched repo-maintenance workflow") - // Wait for the workflow run to complete. + // Wait for the workflow run to complete (bounded by enrollmentWaitTimeout). + l.ui.StepStart("waiting for enrollment workflow to complete") run, err := l.awaitWorkflowRun(ctx, dispatchTime) if err != nil { l.ui.StepWarn(fmt.Sprintf("could not confirm enrollment: %v", err)) - l.ui.StepInfo("check the repo-maintenance workflow in .fullsend for results") return nil // non-fatal — enrollment may still succeed } @@ -105,18 +116,35 @@ func (l *EnrollmentLayer) Install(ctx context.Context) error { } // awaitWorkflowRun polls for a repo-maintenance workflow run created after -// dispatchTime and waits for it to complete. +// dispatchTime and waits for it to complete. It uses exponential backoff +// and a bounded timeout to avoid long silent waits. func (l *EnrollmentLayer) awaitWorkflowRun(ctx context.Context, dispatchTime time.Time) (*forge.WorkflowRun, error) { - for attempt := range 36 { // 3 minutes max + deadline := time.Now().Add(enrollmentWaitTimeout) + interval := enrollmentPollInitial + start := time.Now() + + for { + if time.Now().After(deadline) { + elapsed := time.Since(start).Round(time.Second) + return nil, fmt.Errorf( + "timed out after %s waiting for repo-maintenance workflow; "+ + "check the workflow in .fullsend and re-run install if needed", + elapsed, + ) + } + select { case <-ctx.Done(): return nil, ctx.Err() - case <-time.After(5 * time.Second): + case <-time.After(interval): } + elapsed := time.Since(start).Round(time.Second) + runs, err := l.client.ListWorkflowRuns(ctx, l.org, forge.ConfigRepoName, repoMaintenanceWorkflow) if err != nil { - l.ui.StepInfo(fmt.Sprintf("waiting for workflow run (attempt %d)...", attempt+1)) + l.ui.StepInfo(fmt.Sprintf("waiting for workflow registration (%s elapsed)...", elapsed)) + interval = nextInterval(interval) continue } @@ -133,11 +161,21 @@ func (l *EnrollmentLayer) awaitWorkflowRun(ctx context.Context, dispatchTime tim if run.Status == "completed" { return run, nil } - l.ui.StepInfo(fmt.Sprintf("workflow run: %s (%s)", run.HTMLURL, run.Status)) + l.ui.StepInfo(fmt.Sprintf("workflow run %s (%s, %s elapsed)", run.HTMLURL, run.Status, elapsed)) break // found our run, keep waiting } + + interval = nextInterval(interval) + } +} + +// nextInterval doubles the polling interval up to enrollmentPollMax. +func nextInterval(current time.Duration) time.Duration { + next := current * 2 + if next > enrollmentPollMax { + return enrollmentPollMax } - return nil, fmt.Errorf("timed out waiting for repo-maintenance workflow") + return next } // showWorkflowLogs fetches and displays workflow run logs locally so the user diff --git a/internal/layers/enrollment_test.go b/internal/layers/enrollment_test.go index 2d243af95..701f58715 100644 --- a/internal/layers/enrollment_test.go +++ b/internal/layers/enrollment_test.go @@ -470,3 +470,40 @@ func TestEnrollmentLayer_Analyze_PerRepoGuardCheckError(t *testing.T) { assert.Contains(t, report.Details[0], "all 1 repos failed guard check") assert.Contains(t, report.Details[1], "guard check failed, skipped") } + +func TestEnrollmentLayer_Install_ContextCancelled(t *testing.T) { + // No workflow runs configured — awaitWorkflowRun will poll until + // context is cancelled. + client := &forge.FakeClient{} + repos := []string{"repo-a"} + layer, buf := newEnrollmentLayer(t, client, repos, nil) + + ctx, cancel := context.WithCancel(context.Background()) + // Cancel immediately so the first poll iteration exits. + cancel() + + err := layer.Install(ctx) + require.NoError(t, err) // Install treats timeout/cancel as non-fatal + + output := buf.String() + assert.Contains(t, output, "could not confirm enrollment") +} + +func TestNextInterval(t *testing.T) { + tests := []struct { + name string + current time.Duration + expected time.Duration + }{ + {"doubles small interval", 2 * time.Second, 4 * time.Second}, + {"doubles again", 4 * time.Second, 8 * time.Second}, + {"caps at max", 8 * time.Second, enrollmentPollMax}, + {"stays at max", enrollmentPollMax, enrollmentPollMax}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := nextInterval(tt.current) + assert.Equal(t, tt.expected, got) + }) + } +}