Skip to content

Commit 8be70a4

Browse files
author
Project Team
committed
Fix retry logic: treat OCR ERROR as failure, add retry endpoint and UI button
- worker.py: raise RuntimeError when validate_label returns ERROR status so queue.fail() is called and retries are triggered instead of marking the job completed - api.py: add POST /verify/retry/{job_id} endpoint to re-enqueue a failed job with the same image and ground truth - ui_routes.py: add POST /ui/verify/retry/{job_id} route that re-enqueues and redirects to the new pending page - verify_pending.html: show a Retry button in the onFailed() handler that POSTs to the retry route
1 parent 8254089 commit 8be70a4

File tree

4 files changed

+125
-0
lines changed

4 files changed

+125
-0
lines changed

app/api.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1060,6 +1060,61 @@ async def get_async_verify_status(
10601060
)
10611061

10621062

1063+
@app.post("/verify/retry/{job_id}", response_model=AsyncVerifySubmitResponse)
1064+
async def retry_async_verify(
1065+
job_id: str,
1066+
username: str = Depends(get_current_user)
1067+
) -> AsyncVerifySubmitResponse:
1068+
"""
1069+
Re-enqueue a failed (or completed) single-image verify job using the
1070+
same image and ground truth as the original submission.
1071+
1072+
Intended for use after a job reaches ``failed`` status so the user can
1073+
retry without re-uploading the image.
1074+
1075+
**Response:**
1076+
- ``job_id``: New job identifier — poll ``GET /verify/status/{job_id}``
1077+
- ``status``: ``pending``
1078+
1079+
**Example:**
1080+
```bash
1081+
curl -X POST https://example.com/verify/retry/abc123
1082+
```
1083+
"""
1084+
correlation_id = get_correlation_id()
1085+
logger.info(f"[{correlation_id}] POST /verify/retry/{job_id}")
1086+
1087+
original = verify_queue.get(job_id)
1088+
if original is None:
1089+
raise HTTPException(
1090+
status_code=status.HTTP_404_NOT_FOUND,
1091+
detail=f"Verify job {job_id} not found",
1092+
)
1093+
1094+
# Make sure the image file still exists on the shared volume
1095+
image_path = original.get("image_path")
1096+
if not image_path or not Path(image_path).exists():
1097+
raise HTTPException(
1098+
status_code=status.HTTP_410_GONE,
1099+
detail="Original image file no longer available; please re-upload.",
1100+
)
1101+
1102+
new_job_id = verify_queue.enqueue(
1103+
image_path=image_path,
1104+
ground_truth=original.get("ground_truth"),
1105+
)
1106+
1107+
logger.info(
1108+
f"[{correlation_id}] Retried job {job_id} as new job {new_job_id}"
1109+
)
1110+
1111+
return AsyncVerifySubmitResponse(
1112+
job_id=new_job_id,
1113+
status="pending",
1114+
message=f"Job re-submitted. Poll GET /verify/status/{new_job_id} for results.",
1115+
)
1116+
1117+
10631118
# ============================================================================
10641119
# Exception Handlers
10651120
# ============================================================================

app/templates/verify_pending.html

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ <h4 class="mb-2" id="statusHeading">Verifying Label</h4>
6969

7070
<!-- Action buttons (hidden until terminal state) -->
7171
<div id="actions" style="display:none;" class="mt-3 d-grid gap-2">
72+
<!-- Retry button: only shown on failure; submits a form POST to re-enqueue -->
73+
<form id="retryForm" method="post" style="display:none;">
74+
<button type="submit" class="btn btn-warning w-100">
75+
<i class="bi bi-arrow-clockwise"></i> Retry
76+
</button>
77+
</form>
7278
<a href="/ui/verify" class="btn btn-primary">
7379
<i class="bi bi-arrow-repeat"></i> Verify Another Label
7480
</a>
@@ -96,6 +102,7 @@ <h4 class="mb-2" id="statusHeading">Verifying Label</h4>
96102
const elError = document.getElementById('errorDetail');
97103
const elErrorTxt = document.getElementById('errorText');
98104
const elActions = document.getElementById('actions');
105+
const elRetryForm = document.getElementById('retryForm');
99106

100107
let pollTimer = null;
101108

@@ -171,6 +178,11 @@ <h4 class="mb-2" id="statusHeading">Verifying Label</h4>
171178
updateDots(data.attempts, data.max_attempts, true);
172179
elErrorTxt.textContent = data.error || 'Unknown error';
173180
elError.style.display = 'block';
181+
182+
// Wire up the retry form to POST to the UI retry endpoint
183+
elRetryForm.action = `/ui/verify/retry/${JOB_ID}`;
184+
elRetryForm.style.display = 'block';
185+
174186
elActions.style.display = 'block';
175187
}
176188

app/ui_routes.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,56 @@ async def ui_verify_result(
502502
)
503503

504504

505+
@router.post("/ui/verify/retry/{job_id}")
506+
async def ui_verify_retry(
507+
request: Request,
508+
job_id: str,
509+
username: str = Depends(get_current_user_ui)
510+
):
511+
"""
512+
Re-enqueue a failed verify job from the UI and redirect to the new pending page.
513+
Called by the Retry button on the verify_pending page.
514+
"""
515+
from api import verify_queue
516+
517+
original = verify_queue.get(job_id)
518+
if original is None:
519+
return templates.TemplateResponse(
520+
"index.html",
521+
{
522+
"request": request,
523+
"username": username,
524+
"error": "Job not found or expired. Please re-upload your image.",
525+
"ollama_host": settings.ollama_host,
526+
"default_timeout": settings.ollama_timeout_seconds,
527+
},
528+
)
529+
530+
image_path = original.get("image_path")
531+
if not image_path or not Path(image_path).exists():
532+
return templates.TemplateResponse(
533+
"index.html",
534+
{
535+
"request": request,
536+
"username": username,
537+
"error": "Original image file is no longer available. Please re-upload.",
538+
"ollama_host": settings.ollama_host,
539+
"default_timeout": settings.ollama_timeout_seconds,
540+
},
541+
)
542+
543+
new_job_id = verify_queue.enqueue(
544+
image_path=image_path,
545+
ground_truth=original.get("ground_truth"),
546+
)
547+
logger.info(f"[ui] Retried job {job_id} as new job {new_job_id}")
548+
549+
return RedirectResponse(
550+
url=f"/ui/verify/pending/{new_job_id}",
551+
status_code=status.HTTP_303_SEE_OTHER,
552+
)
553+
554+
505555
@router.get("/ui/health", response_class=HTMLResponse)
506556
async def ui_health(request: Request):
507557
"""

app/worker.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,14 @@ def process_job(job: dict, validator: LabelValidator) -> dict:
9090

9191
result = validator.validate_label(image_path, ground_truth)
9292
result["image_path"] = Path(image_path).name
93+
94+
# If the validator returned an ERROR status (e.g. Ollama sentinel absent,
95+
# OCR extraction failed), treat it as a retriable failure rather than a
96+
# completed job. Raising here causes the worker loop to call queue.fail(),
97+
# which requeues the job if attempts remain.
98+
if result.get("status") == "ERROR":
99+
raise RuntimeError(result.get("error") or "OCR returned ERROR status")
100+
93101
return result
94102

95103

0 commit comments

Comments
 (0)