Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions packit_service/worker/handlers/distgit.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from packit_service import sentry_integration
from packit_service.config import ServiceConfig
from packit_service.constants import (
BASE_RETRY_INTERVAL_IN_MINUTES_FOR_OUTAGES,
CONTACTS_URL,
DEFAULT_RETRY_BACKOFF,
MSG_DOWNSTREAM_JOB_ERROR_HEADER,
Expand Down Expand Up @@ -937,6 +938,29 @@ def _run(self) -> TaskResults:
)
self.pushgateway.fedora_ci_koji_builds_queued.inc()
except Exception as ex:
if (
isinstance(ex, PackitCommandFailedError)
and self.celery_task
and not self.celery_task.is_last_try()
):
koji_build.set_status("retry")
interval = BASE_RETRY_INTERVAL_IN_MINUTES_FOR_OUTAGES * 2**self.celery_task.retries
self.report(
commit_status=BaseCommitStatus.pending,
description="Failed to submit the build. The task will be"
f" retried in {interval} {'minute' if interval == 1 else 'minutes'}.",
url=get_koji_build_info_url(koji_build.id),
)
kargs = self.celery_task.task.request.kwargs.copy()
self.celery_task.retry(delay=interval * 60, kargs=kargs)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

It's good practice to pass the exception to the retry call. This helps with logging and debugging, as Celery will log the exception that caused the retry.

Suggested change
self.celery_task.retry(delay=interval * 60, kargs=kargs)
self.celery_task.retry(ex=ex, delay=interval * 60, kargs=kargs)
References
  1. All errors and exceptions should be logged, with sufficient information for administrators to begin triage. (link)

return TaskResults(
success=True,
details={
"msg": "Task will be retried because of failure"
f" when submitting the build: {ex}",
},
)

sentry_integration.send_to_sentry(ex)
self.report(
commit_status=BaseCommitStatus.error,
Expand Down
51 changes: 50 additions & 1 deletion tests/integration/test_koji_build_cancel.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ogr.services.pagure import PagureProject
from packit.api import PackitAPI
from packit.config import Deployment, JobConfigTriggerType
from packit.exceptions import PackitException
from packit.exceptions import PackitCommandFailedError, PackitException
from packit.local_project import LocalProjectBuilder
from packit.utils import commands

Expand Down Expand Up @@ -244,6 +244,55 @@ def test_downstream_koji_scratch_build_cancel_running(mock_distgit_pr_functional
assert first_dict_value(results["job"])["success"]


def test_downstream_koji_scratch_build_retry_on_submission_failure(
mock_distgit_pr_functionality,
):
"""Test that DownstreamKojiScratchBuildHandler retries on build submission failure.

Simulates a Koji CLI failure (e.g. network issue, Koji outage) and verifies
the handler sets retry status and schedules a retry via Celery.
"""
flexmock(PackitAPI).should_receive("init_kerberos_ticket")
koji_build_target = flexmock(
id=123,
target="main",
status="queued",
)
koji_build_target.should_receive("set_status").with_args("retry").once()
koji_build_target.should_receive("set_task_id")
koji_build_target.should_receive("set_web_url")
koji_build_target.should_receive("set_build_logs_urls")
koji_build_target.should_receive("set_data")
koji_build_target.should_receive("set_build_submission_stdout")
flexmock(KojiBuildTargetModel).should_receive("create").and_return(koji_build_target)
flexmock(KojiBuildGroupModel).should_receive("create").and_return(
flexmock(grouped_targets=[koji_build_target]),
)

# Simulate koji CLI failure
flexmock(commands).should_receive("run_command_remote").and_raise(
PackitCommandFailedError,
"Command failed",
stdout_output="",
stderr_output="koji: AuthError: unable to obtain a session",
)

# Mock the Celery task's retry method to prevent actual retry and verify it's called
flexmock(run_downstream_koji_scratch_build_handler).should_receive("retry").once()

processing_results = SteveJobs().process_message(mock_distgit_pr_functionality)
event_dict, _, job_config, package_config = get_parameters_from_results(
processing_results[:1],
)
results = run_downstream_koji_scratch_build_handler(
package_config=package_config,
event=event_dict,
job_config=job_config,
)

assert first_dict_value(results["job"])["success"]


def test_downstream_koji_build_cancel_running(monkeypatch):
"""Test that DownstreamKojiBuildHandler calls cancel_running_builds.

Expand Down
Loading