Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 37 additions & 5 deletions crates/temps-core/src/workflow_executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ impl WorkflowExecutor {
Self { job_tracker }
}

fn is_cancellation_error(error: &WorkflowError) -> bool {
matches!(
error,
WorkflowError::WorkflowCancelled | WorkflowError::BuildCancelled
)
}

/// Execute a workflow with proper dependency resolution and parallel execution
pub async fn execute_workflow(
&self,
Expand Down Expand Up @@ -554,8 +561,17 @@ impl WorkflowExecutor {
(job_id_clone, job_result)
}
Err(e) => {
let error_msg = format!("❌ Job failed: {}", e);
error!("❌ Job '{}' failed: {}", job_id_clone, e);
let is_cancellation = Self::is_cancellation_error(&e);
let error_msg = if is_cancellation {
format!("Job cancelled: {}", e)
} else {
format!("Job failed: {}", e)
};
if is_cancellation {
warn!("Job '{}' cancelled: {}", job_id_clone, e);
} else {
error!("Job '{}' failed: {}", job_id_clone, e);
}

// Log the error to the job's context so it appears in the job logs
if let Err(log_err) = error_context.log(&error_msg).await {
Expand All @@ -565,15 +581,19 @@ impl WorkflowExecutor {
);
}

// Call cleanup on job failure
warn!("🧹 Calling cleanup for failed job '{}'", job_id_clone);
// Call cleanup on job failure or cancellation
warn!("Calling cleanup for stopped job '{}'", job_id_clone);
if let Err(cleanup_err) = job.cleanup(&error_context).await {
error!("Failed to cleanup job '{}': {}", job_id_clone, cleanup_err);
}

(
job_id_clone,
JobResult::failure(error_context, e.to_string()),
if is_cancellation {
JobResult::cancelled(error_context)
} else {
JobResult::failure(error_context, e.to_string())
},
)
}
}
Expand Down Expand Up @@ -679,6 +699,18 @@ mod tests {
cancelled: bool,
}

#[test]
fn test_cancellation_errors_are_not_failures() {
assert!(WorkflowExecutor::is_cancellation_error(
&WorkflowError::WorkflowCancelled
));
assert!(WorkflowExecutor::is_cancellation_error(
&WorkflowError::BuildCancelled
));
assert!(!WorkflowExecutor::is_cancellation_error(
&WorkflowError::JobExecutionFailed("Build failed".to_string())
));
}
#[async_trait::async_trait]
impl WorkflowCancellationProvider for TestCancellationProvider {
async fn is_cancelled(&self, _workflow_run_id: &str) -> Result<bool, WorkflowError> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -330,8 +330,9 @@ impl WorkflowExecutionService {
Err(e) => {
// Check if this is a cancellation error
let error_message = format!("{}", e);
let is_cancellation =
error_message.contains("cancelled") || error_message.contains("Cancelled");
let lower_error_message = error_message.to_lowercase();
let is_cancellation = lower_error_message.contains("cancelled")
|| lower_error_message.contains("canceled");

if is_cancellation {
info!(
Expand Down
4 changes: 2 additions & 2 deletions crates/temps-proxy/src/service/proxy_log_service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -814,7 +814,7 @@ impl ProxyLogService {
SELECT
project_id,
COALESCE(COUNT(*), 0) as total_requests,
COALESCE(SUM(CASE WHEN status_code >= 400 THEN 1 ELSE 0 END), 0) as total_errors,
COALESCE(SUM(CASE WHEN status_code >= 500 THEN 1 ELSE 0 END), 0) as total_errors,
COALESCE(AVG(response_time_ms)::float8, 0) as avg_response_time_ms
FROM proxy_logs
WHERE timestamp >= $1
Expand Down Expand Up @@ -1859,7 +1859,7 @@ pub struct ProjectHealthSummary {
pub project_id: i32,
/// Total requests in the period
pub total_requests: i64,
/// Total errors (status >= 400) in the period
/// Total server errors (status >= 500) in the period
pub total_errors: i64,
/// Average response time in ms
pub avg_response_time_ms: f64,
Expand Down
2 changes: 1 addition & 1 deletion crates/temps-proxy/src/storage/clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1194,7 +1194,7 @@ impl ProxyLogStorage for ClickHouseProxyLogStore {
"SELECT \
assumeNotNull(project_id) AS project_id, \
count() AS total_requests, \
countIf(status_code >= 400) AS total_errors, \
countIf(status_code >= 500) AS total_errors, \
ifNull(avg(response_time_ms), 0) AS avg_response_time_ms \
FROM proxy_logs FINAL \
WHERE {} \
Expand Down
Loading