From ac01c49fe0c5f874365d17f09719c3e2c4327a43 Mon Sep 17 00:00:00 2001 From: mjoffre Date: Thu, 28 May 2026 20:48:31 +0000 Subject: [PATCH 1/2] fix(cli): convert deploy/push timeouts from absolute to activity-based idle timeouts The 15-minute timeout on bl deploy and bl push was an absolute timer that fired regardless of whether the build was actively making progress. This caused false timeout failures for long-running but healthy builds. Changes: - deploy.go: Replace time.After with activity-tracking in both deployResourceInteractive and deployAdditionalResource. The idle timer resets on status changes and incoming build log entries. - push.go: Same activity-based idle timeout for watchBuildLogsNonInteractive. - Update --timeout flag descriptions to clarify idle behavior. - Error messages now say 'no progress detected for ' instead of 'deployment timed out after '. Fixes ENG-2691 Co-Authored-By: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- cli/deploy.go | 207 +++++++++++++++++++++++++++++--------------------- cli/push.go | 28 +++++-- 2 files changed, 145 insertions(+), 90 deletions(-) diff --git a/cli/deploy.go b/cli/deploy.go index 15c4d951..81725eb0 100644 --- a/cli/deploy.go +++ b/cli/deploy.go @@ -339,7 +339,7 @@ all projects in a monorepo (looks for blaxel.toml in subdirectories).`, cmd.Flags().BoolVar(&experimental, "experimental", false, "Enable experimental features (e.g. USER directive support)") cmd.Flags().StringArrayVarP(®istryCreds, "registry-cred", "c", []string{}, "Registry credentials (format: registry=username:password, repeatable)") cmd.Flags().StringVar(&dockerConfigPath, "docker-config", "", "Path to a Docker config.json file with registry credentials") - cmd.Flags().StringVar(&timeoutStr, "timeout", "", "Timeout for build and deployment monitoring (e.g. 30m, 1h). Defaults to 15m") + cmd.Flags().StringVar(&timeoutStr, "timeout", "", "Idle timeout for build and deployment monitoring (e.g. 30m, 1h). Resets on progress. Defaults to 15m") cmd.Flags().StringVar(&buildEnvPath, "build-env-file", "", "Path to a build env file with Docker build args (default: auto-detect .env.build)") return cmd } @@ -1280,7 +1280,23 @@ func (d *Deployment) deployResourceInteractive(resource *deploy.Resource, model // Start monitoring the resource status statusTicker := time.NewTicker(3 * time.Second) defer statusTicker.Stop() - statusTimeout := time.After(d.timeout) + + // Activity-based idle timeout: resets whenever progress is detected + // (status changes or build log activity) so long-running but active + // builds are not killed prematurely. + var lastActivityMu sync.Mutex + lastActivityTime := time.Now() + markActive := func() { + lastActivityMu.Lock() + lastActivityTime = time.Now() + lastActivityMu.Unlock() + } + checkIdleTimeout := func() bool { + lastActivityMu.Lock() + elapsed := time.Since(lastActivityTime) + lastActivityMu.Unlock() + return elapsed >= d.timeout + } // Grace period for stale FAILED status - if we don't see any status change within this time, // accept that the FAILED status is real (handles case where new deployment fails immediately) @@ -1298,16 +1314,19 @@ func (d *Deployment) deployResourceInteractive(resource *deploy.Resource, model for { select { - case <-statusTimeout: - if logWatcher != nil { - logWatcher.Stop() - } - model.UpdateResource(idx, deploy.StatusFailed, "Deployment timeout", fmt.Errorf("deployment timed out after %s", d.timeout)) - return case <-staleFailedGracePeriod: // Grace period expired - if status is still FAILED, accept it as real staleGracePeriodExpired = true case <-statusTicker.C: + // Check idle timeout + if checkIdleTimeout() { + if logWatcher != nil { + logWatcher.Stop() + } + model.UpdateResource(idx, deploy.StatusFailed, "Deployment timeout", fmt.Errorf("no progress detected for %s", d.timeout)) + return + } + status, err := getResourceStatus(strings.ToLower(resource.Kind), resource.Name) if err != nil { // Continue polling on temporary errors @@ -1322,6 +1341,7 @@ func (d *Deployment) deployResourceInteractive(resource *deploy.Resource, model // Only log status changes if status != lastStatus { lastStatus = status + markActive() // Map API status to our UI status and update switch status { @@ -1347,6 +1367,7 @@ func (d *Deployment) deployResourceInteractive(resource *deploy.Resource, model resource.Name, func(log string) { model.AddBuildLog(idx, log) + markActive() }, d.timeout, ) @@ -1472,95 +1493,111 @@ func (d *Deployment) deployAdditionalResource(resource *deploy.Resource, model * additionalTimeout = d.timeout } ticker := time.NewTicker(3 * time.Second) - timeout := time.After(additionalTimeout) + + // Activity-based idle timeout for additional resources + var addlActivityMu sync.Mutex + addlLastActivity := time.Now() + addlMarkActive := func() { + addlActivityMu.Lock() + addlLastActivity = time.Now() + addlActivityMu.Unlock() + } + addlCheckIdle := func() bool { + addlActivityMu.Lock() + elapsed := time.Since(addlLastActivity) + addlActivityMu.Unlock() + return elapsed >= additionalTimeout + } + lastStatus := "" // Track last status to avoid duplicate logs var logWatcher interface{ Stop() } buildLogStarted := false sawBuildingStatus := false // Track if we've seen BUILDING status - for { - select { - case <-timeout: + for range ticker.C { + if addlCheckIdle() { if logWatcher != nil { logWatcher.Stop() } - model.UpdateResource(idx, deploy.StatusFailed, "Timeout", fmt.Errorf("deployment timed out after %s", additionalTimeout)) + model.UpdateResource(idx, deploy.StatusFailed, "Timeout", fmt.Errorf("no progress detected for %s", additionalTimeout)) ticker.Stop() return - case <-ticker.C: - status, err := getResourceStatus(strings.ToLower(resource.Kind), resource.Name) - if err != nil { - continue - } + } - // Logs handling - if status != lastStatus { - lastStatus = status - model.AddBuildLog(idx, fmt.Sprintf("Status: %s", status)) - - switch status { - case "UPLOADING": - model.UpdateResource(idx, deploy.StatusUploading, "Uploading code", nil) - case "BUILDING": - sawBuildingStatus = true - model.UpdateResource(idx, deploy.StatusBuilding, "Building image", nil) - - // Start build log watcher if not already started - if !buildLogStarted { - buildLogStarted = true - client := core.GetClient() - workspace := core.GetWorkspace() - - lw := mon.NewBuildLogWatcher( - client, - workspace, - strings.ToLower(resource.Kind), - resource.Name, - func(log string) { - model.AddBuildLog(idx, log) - }, - additionalTimeout, - ) - lw.Start() - logWatcher = lw - } - case "DEPLOYING": - if logWatcher != nil { - logWatcher.Stop() - logWatcher = nil - } - model.UpdateResource(idx, deploy.StatusDeploying, "Deploying to cluster", nil) - case "DEPLOYED": - // If skipBuild is false (AutoGenerated=true), we MUST have seen BUILDING status - if resource.AutoGenerated && !sawBuildingStatus { - // This is a mistake - continue monitoring - continue - } - if logWatcher != nil { - logWatcher.Stop() - } - - model.UpdateResource(idx, deploy.StatusComplete, "Applied successfully", nil) - ticker.Stop() - return - case "FAILED": - if logWatcher != nil { - logWatcher.Stop() - } - model.UpdateResource(idx, deploy.StatusFailed, "Failed", fmt.Errorf("deployment failed")) - ticker.Stop() - return - case "DEACTIVATED", "DEACTIVATING", "DELETING": - if logWatcher != nil { - logWatcher.Stop() - } - model.UpdateResource(idx, deploy.StatusFailed, fmt.Sprintf("Unexpected status: %s", status), fmt.Errorf("resource is being deactivated or deleted")) - ticker.Stop() - return - default: - // Continue monitoring for unknown statuses - model.UpdateResource(idx, deploy.StatusDeploying, fmt.Sprintf("Status: %s", status), nil) + status, err := getResourceStatus(strings.ToLower(resource.Kind), resource.Name) + if err != nil { + continue + } + + // Logs handling + if status != lastStatus { + lastStatus = status + addlMarkActive() + model.AddBuildLog(idx, fmt.Sprintf("Status: %s", status)) + + switch status { + case "UPLOADING": + model.UpdateResource(idx, deploy.StatusUploading, "Uploading code", nil) + case "BUILDING": + sawBuildingStatus = true + model.UpdateResource(idx, deploy.StatusBuilding, "Building image", nil) + + // Start build log watcher if not already started + if !buildLogStarted { + buildLogStarted = true + client := core.GetClient() + workspace := core.GetWorkspace() + + lw := mon.NewBuildLogWatcher( + client, + workspace, + strings.ToLower(resource.Kind), + resource.Name, + func(log string) { + model.AddBuildLog(idx, log) + addlMarkActive() + }, + additionalTimeout, + ) + lw.Start() + logWatcher = lw + } + case "DEPLOYING": + if logWatcher != nil { + logWatcher.Stop() + logWatcher = nil + } + model.UpdateResource(idx, deploy.StatusDeploying, "Deploying to cluster", nil) + case "DEPLOYED": + // If skipBuild is false (AutoGenerated=true), we MUST have seen BUILDING status + if resource.AutoGenerated && !sawBuildingStatus { + // This is a mistake - continue monitoring + continue + } + if logWatcher != nil { + logWatcher.Stop() + } + + model.UpdateResource(idx, deploy.StatusComplete, "Applied successfully", nil) + ticker.Stop() + return + case "FAILED": + if logWatcher != nil { + logWatcher.Stop() + } + model.UpdateResource(idx, deploy.StatusFailed, "Failed", fmt.Errorf("deployment failed")) + ticker.Stop() + return + case "DEACTIVATED", "DEACTIVATING", "DELETING": + if logWatcher != nil { + logWatcher.Stop() } + model.UpdateResource(idx, deploy.StatusFailed, fmt.Sprintf("Unexpected status: %s", status), fmt.Errorf("resource is being deactivated or deleted")) + ticker.Stop() + return + default: + // Continue monitoring for unknown statuses + model.UpdateResource(idx, deploy.StatusDeploying, fmt.Sprintf("Status: %s", status), nil) } } } diff --git a/cli/push.go b/cli/push.go index 53c9dc74..e24dcae9 100644 --- a/cli/push.go +++ b/cli/push.go @@ -8,6 +8,7 @@ import ( "os/signal" "path/filepath" "strings" + "sync" "time" "github.com/atotto/clipboard" @@ -398,7 +399,7 @@ For private registries, supply credentials via --registry-cred or --docker-confi cmd.Flags().BoolVarP(&noTTY, "yes", "y", false, "Skip interactive mode") cmd.Flags().StringArrayVarP(®istryCreds, "registry-cred", "c", []string{}, "Registry credentials (format: registry=username:password, repeatable)") cmd.Flags().StringVar(&dockerConfigPath, "docker-config", "", "Path to a Docker config.json file with registry credentials") - cmd.Flags().StringVar(&timeoutStr, "timeout", "", "Timeout for build log monitoring (e.g. 30m, 1h). Defaults to 15m") + cmd.Flags().StringVar(&timeoutStr, "timeout", "", "Idle timeout for build log monitoring (e.g. 30m, 1h). Resets on progress. Defaults to 15m") cmd.Flags().StringVar(&buildEnvPath, "build-env-file", "", "Path to a build env file with Docker build args (default: auto-detect .env.build)") cmd.Flags().BoolVar(&skipBuild, "skip-build", false, "Skip the image build step (use existing built image if available)") @@ -432,9 +433,25 @@ func watchBuildLogsNonInteractive(resourceType, name string, noTTY bool, buildTi close(doneCh) }() + // Activity-based idle timeout for build monitoring + var buildActivityMu sync.Mutex + buildLastActivity := time.Now() + buildMarkActive := func() { + buildActivityMu.Lock() + buildLastActivity = time.Now() + buildActivityMu.Unlock() + } + buildCheckIdle := func() bool { + buildActivityMu.Lock() + elapsed := time.Since(buildLastActivity) + buildActivityMu.Unlock() + return elapsed >= buildTimeout + } + // Use the BuildLogWatcher to stream logs logWatcher := mon.NewBuildLogWatcher(client, workspace, resourceType, name, func(msg string) { fmt.Println(msg) + buildMarkActive() }, buildTimeout) logWatcher.Start() defer logWatcher.Stop() @@ -443,21 +460,22 @@ func watchBuildLogsNonInteractive(resourceType, name string, noTTY bool, buildTi ticker := time.NewTicker(5 * time.Second) defer ticker.Stop() - timeout := time.After(buildTimeout) - for { select { case <-ctx.Done(): return fmt.Errorf("build monitoring cancelled") - case <-timeout: - return fmt.Errorf("build timed out after %s", buildTimeout) case <-ticker.C: + if buildCheckIdle() { + return fmt.Errorf("no progress detected for %s", buildTimeout) + } + // Check if the image exists in the registry (build completed) status, err := getImageBuildStatus(resourceType, name) if err != nil { // Image not found yet, continue waiting continue } + buildMarkActive() if status == "succeeded" { logWatcher.Stop() time.Sleep(1 * time.Second) // Allow final logs to flush From 119a6f1f71255c3af29bf3a14684ec71a4287707 Mon Sep 17 00:00:00 2001 From: mjoffre Date: Fri, 29 May 2026 19:12:54 +0000 Subject: [PATCH 2/2] fix(cli): increase default deploy/push timeout from 15m to 1h The 15-minute default timeout on bl deploy and bl push was too short for long-running builds. Increase to 1 hour. Fixes ENG-2691 Co-Authored-By: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- cli/deploy.go | 209 ++++++++++++++++++-------------------------- cli/monitor/logs.go | 4 +- cli/push.go | 28 ++---- 3 files changed, 93 insertions(+), 148 deletions(-) diff --git a/cli/deploy.go b/cli/deploy.go index 81725eb0..4deae5e0 100644 --- a/cli/deploy.go +++ b/cli/deploy.go @@ -339,7 +339,7 @@ all projects in a monorepo (looks for blaxel.toml in subdirectories).`, cmd.Flags().BoolVar(&experimental, "experimental", false, "Enable experimental features (e.g. USER directive support)") cmd.Flags().StringArrayVarP(®istryCreds, "registry-cred", "c", []string{}, "Registry credentials (format: registry=username:password, repeatable)") cmd.Flags().StringVar(&dockerConfigPath, "docker-config", "", "Path to a Docker config.json file with registry credentials") - cmd.Flags().StringVar(&timeoutStr, "timeout", "", "Idle timeout for build and deployment monitoring (e.g. 30m, 1h). Resets on progress. Defaults to 15m") + cmd.Flags().StringVar(&timeoutStr, "timeout", "", "Timeout for build and deployment monitoring (e.g. 30m, 1h). Defaults to 1h") cmd.Flags().StringVar(&buildEnvPath, "build-env-file", "", "Path to a build env file with Docker build args (default: auto-detect .env.build)") return cmd } @@ -1280,23 +1280,7 @@ func (d *Deployment) deployResourceInteractive(resource *deploy.Resource, model // Start monitoring the resource status statusTicker := time.NewTicker(3 * time.Second) defer statusTicker.Stop() - - // Activity-based idle timeout: resets whenever progress is detected - // (status changes or build log activity) so long-running but active - // builds are not killed prematurely. - var lastActivityMu sync.Mutex - lastActivityTime := time.Now() - markActive := func() { - lastActivityMu.Lock() - lastActivityTime = time.Now() - lastActivityMu.Unlock() - } - checkIdleTimeout := func() bool { - lastActivityMu.Lock() - elapsed := time.Since(lastActivityTime) - lastActivityMu.Unlock() - return elapsed >= d.timeout - } + statusTimeout := time.After(d.timeout) // Grace period for stale FAILED status - if we don't see any status change within this time, // accept that the FAILED status is real (handles case where new deployment fails immediately) @@ -1314,19 +1298,16 @@ func (d *Deployment) deployResourceInteractive(resource *deploy.Resource, model for { select { + case <-statusTimeout: + if logWatcher != nil { + logWatcher.Stop() + } + model.UpdateResource(idx, deploy.StatusFailed, "Deployment timeout", fmt.Errorf("deployment timed out after %s", d.timeout)) + return case <-staleFailedGracePeriod: // Grace period expired - if status is still FAILED, accept it as real staleGracePeriodExpired = true case <-statusTicker.C: - // Check idle timeout - if checkIdleTimeout() { - if logWatcher != nil { - logWatcher.Stop() - } - model.UpdateResource(idx, deploy.StatusFailed, "Deployment timeout", fmt.Errorf("no progress detected for %s", d.timeout)) - return - } - status, err := getResourceStatus(strings.ToLower(resource.Kind), resource.Name) if err != nil { // Continue polling on temporary errors @@ -1341,7 +1322,6 @@ func (d *Deployment) deployResourceInteractive(resource *deploy.Resource, model // Only log status changes if status != lastStatus { lastStatus = status - markActive() // Map API status to our UI status and update switch status { @@ -1367,7 +1347,6 @@ func (d *Deployment) deployResourceInteractive(resource *deploy.Resource, model resource.Name, func(log string) { model.AddBuildLog(idx, log) - markActive() }, d.timeout, ) @@ -1486,118 +1465,102 @@ func (d *Deployment) deployAdditionalResource(resource *deploy.Resource, model * model.AddBuildLog(idx, "Verifying deployment status...") // Simple status monitoring for additional resources - // Additional resources use a shorter default (10m) than the main resource (15m), + // Additional resources use a shorter default (10m) than the main resource (1h), // but respect the user-specified --timeout if explicitly provided. additionalTimeout := 10 * time.Minute if d.timeoutExplicit { additionalTimeout = d.timeout } ticker := time.NewTicker(3 * time.Second) - - // Activity-based idle timeout for additional resources - var addlActivityMu sync.Mutex - addlLastActivity := time.Now() - addlMarkActive := func() { - addlActivityMu.Lock() - addlLastActivity = time.Now() - addlActivityMu.Unlock() - } - addlCheckIdle := func() bool { - addlActivityMu.Lock() - elapsed := time.Since(addlLastActivity) - addlActivityMu.Unlock() - return elapsed >= additionalTimeout - } - + timeout := time.After(additionalTimeout) lastStatus := "" // Track last status to avoid duplicate logs var logWatcher interface{ Stop() } buildLogStarted := false sawBuildingStatus := false // Track if we've seen BUILDING status - for range ticker.C { - if addlCheckIdle() { + for { + select { + case <-timeout: if logWatcher != nil { logWatcher.Stop() } - model.UpdateResource(idx, deploy.StatusFailed, "Timeout", fmt.Errorf("no progress detected for %s", additionalTimeout)) + model.UpdateResource(idx, deploy.StatusFailed, "Timeout", fmt.Errorf("deployment timed out after %s", additionalTimeout)) ticker.Stop() return - } - - status, err := getResourceStatus(strings.ToLower(resource.Kind), resource.Name) - if err != nil { - continue - } - - // Logs handling - if status != lastStatus { - lastStatus = status - addlMarkActive() - model.AddBuildLog(idx, fmt.Sprintf("Status: %s", status)) - - switch status { - case "UPLOADING": - model.UpdateResource(idx, deploy.StatusUploading, "Uploading code", nil) - case "BUILDING": - sawBuildingStatus = true - model.UpdateResource(idx, deploy.StatusBuilding, "Building image", nil) - - // Start build log watcher if not already started - if !buildLogStarted { - buildLogStarted = true - client := core.GetClient() - workspace := core.GetWorkspace() - - lw := mon.NewBuildLogWatcher( - client, - workspace, - strings.ToLower(resource.Kind), - resource.Name, - func(log string) { - model.AddBuildLog(idx, log) - addlMarkActive() - }, - additionalTimeout, - ) - lw.Start() - logWatcher = lw - } - case "DEPLOYING": - if logWatcher != nil { - logWatcher.Stop() - logWatcher = nil - } - model.UpdateResource(idx, deploy.StatusDeploying, "Deploying to cluster", nil) - case "DEPLOYED": - // If skipBuild is false (AutoGenerated=true), we MUST have seen BUILDING status - if resource.AutoGenerated && !sawBuildingStatus { - // This is a mistake - continue monitoring - continue - } - if logWatcher != nil { - logWatcher.Stop() - } + case <-ticker.C: + status, err := getResourceStatus(strings.ToLower(resource.Kind), resource.Name) + if err != nil { + continue + } - model.UpdateResource(idx, deploy.StatusComplete, "Applied successfully", nil) - ticker.Stop() - return - case "FAILED": - if logWatcher != nil { - logWatcher.Stop() - } - model.UpdateResource(idx, deploy.StatusFailed, "Failed", fmt.Errorf("deployment failed")) - ticker.Stop() - return - case "DEACTIVATED", "DEACTIVATING", "DELETING": - if logWatcher != nil { - logWatcher.Stop() + // Logs handling + if status != lastStatus { + lastStatus = status + model.AddBuildLog(idx, fmt.Sprintf("Status: %s", status)) + + switch status { + case "UPLOADING": + model.UpdateResource(idx, deploy.StatusUploading, "Uploading code", nil) + case "BUILDING": + sawBuildingStatus = true + model.UpdateResource(idx, deploy.StatusBuilding, "Building image", nil) + + // Start build log watcher if not already started + if !buildLogStarted { + buildLogStarted = true + client := core.GetClient() + workspace := core.GetWorkspace() + + lw := mon.NewBuildLogWatcher( + client, + workspace, + strings.ToLower(resource.Kind), + resource.Name, + func(log string) { + model.AddBuildLog(idx, log) + }, + additionalTimeout, + ) + lw.Start() + logWatcher = lw + } + case "DEPLOYING": + if logWatcher != nil { + logWatcher.Stop() + logWatcher = nil + } + model.UpdateResource(idx, deploy.StatusDeploying, "Deploying to cluster", nil) + case "DEPLOYED": + // If skipBuild is false (AutoGenerated=true), we MUST have seen BUILDING status + if resource.AutoGenerated && !sawBuildingStatus { + // This is a mistake - continue monitoring + continue + } + if logWatcher != nil { + logWatcher.Stop() + } + + model.UpdateResource(idx, deploy.StatusComplete, "Applied successfully", nil) + ticker.Stop() + return + case "FAILED": + if logWatcher != nil { + logWatcher.Stop() + } + model.UpdateResource(idx, deploy.StatusFailed, "Failed", fmt.Errorf("deployment failed")) + ticker.Stop() + return + case "DEACTIVATED", "DEACTIVATING", "DELETING": + if logWatcher != nil { + logWatcher.Stop() + } + model.UpdateResource(idx, deploy.StatusFailed, fmt.Sprintf("Unexpected status: %s", status), fmt.Errorf("resource is being deactivated or deleted")) + ticker.Stop() + return + default: + // Continue monitoring for unknown statuses + model.UpdateResource(idx, deploy.StatusDeploying, fmt.Sprintf("Status: %s", status), nil) } - model.UpdateResource(idx, deploy.StatusFailed, fmt.Sprintf("Unexpected status: %s", status), fmt.Errorf("resource is being deactivated or deleted")) - ticker.Stop() - return - default: - // Continue monitoring for unknown statuses - model.UpdateResource(idx, deploy.StatusDeploying, fmt.Sprintf("Status: %s", status), nil) } } } diff --git a/cli/monitor/logs.go b/cli/monitor/logs.go index 138d8467..a3839a72 100644 --- a/cli/monitor/logs.go +++ b/cli/monitor/logs.go @@ -47,11 +47,11 @@ type BuildLogWatcher struct { } // DefaultBuildTimeout is the default timeout for build log monitoring. -const DefaultBuildTimeout = 15 * time.Minute +const DefaultBuildTimeout = 1 * time.Hour // NewBuildLogWatcher creates a new build log watcher. // The timeout parameter controls the log query window duration. -// Pass 0 to use the default of 15 minutes. +// Pass 0 to use the default of 1 hour. func NewBuildLogWatcher(client *blaxel.Client, workspace, resourceType, resourceName string, onLog func(string), timeout time.Duration) *BuildLogWatcher { if timeout <= 0 { timeout = DefaultBuildTimeout diff --git a/cli/push.go b/cli/push.go index e24dcae9..de04c0b3 100644 --- a/cli/push.go +++ b/cli/push.go @@ -8,7 +8,6 @@ import ( "os/signal" "path/filepath" "strings" - "sync" "time" "github.com/atotto/clipboard" @@ -399,7 +398,7 @@ For private registries, supply credentials via --registry-cred or --docker-confi cmd.Flags().BoolVarP(&noTTY, "yes", "y", false, "Skip interactive mode") cmd.Flags().StringArrayVarP(®istryCreds, "registry-cred", "c", []string{}, "Registry credentials (format: registry=username:password, repeatable)") cmd.Flags().StringVar(&dockerConfigPath, "docker-config", "", "Path to a Docker config.json file with registry credentials") - cmd.Flags().StringVar(&timeoutStr, "timeout", "", "Idle timeout for build log monitoring (e.g. 30m, 1h). Resets on progress. Defaults to 15m") + cmd.Flags().StringVar(&timeoutStr, "timeout", "", "Timeout for build log monitoring (e.g. 30m, 1h). Defaults to 1h") cmd.Flags().StringVar(&buildEnvPath, "build-env-file", "", "Path to a build env file with Docker build args (default: auto-detect .env.build)") cmd.Flags().BoolVar(&skipBuild, "skip-build", false, "Skip the image build step (use existing built image if available)") @@ -433,25 +432,9 @@ func watchBuildLogsNonInteractive(resourceType, name string, noTTY bool, buildTi close(doneCh) }() - // Activity-based idle timeout for build monitoring - var buildActivityMu sync.Mutex - buildLastActivity := time.Now() - buildMarkActive := func() { - buildActivityMu.Lock() - buildLastActivity = time.Now() - buildActivityMu.Unlock() - } - buildCheckIdle := func() bool { - buildActivityMu.Lock() - elapsed := time.Since(buildLastActivity) - buildActivityMu.Unlock() - return elapsed >= buildTimeout - } - // Use the BuildLogWatcher to stream logs logWatcher := mon.NewBuildLogWatcher(client, workspace, resourceType, name, func(msg string) { fmt.Println(msg) - buildMarkActive() }, buildTimeout) logWatcher.Start() defer logWatcher.Stop() @@ -460,22 +443,21 @@ func watchBuildLogsNonInteractive(resourceType, name string, noTTY bool, buildTi ticker := time.NewTicker(5 * time.Second) defer ticker.Stop() + timeout := time.After(buildTimeout) + for { select { case <-ctx.Done(): return fmt.Errorf("build monitoring cancelled") + case <-timeout: + return fmt.Errorf("build timed out after %s", buildTimeout) case <-ticker.C: - if buildCheckIdle() { - return fmt.Errorf("no progress detected for %s", buildTimeout) - } - // Check if the image exists in the registry (build completed) status, err := getImageBuildStatus(resourceType, name) if err != nil { // Image not found yet, continue waiting continue } - buildMarkActive() if status == "succeeded" { logWatcher.Stop() time.Sleep(1 * time.Second) // Allow final logs to flush