Skip to content

Commit 86e394d

Browse files
author
Project Team
committed
Fix all failing tests: async batch polling, CLI ocr_backend removal, JPEG/TIFF only
- verify_label.py: remove ocr_backend param from LabelValidator() calls and CLI arg parser; Ollama is the only backend so the param is vestigial - verify_label.py: drop PNG from batch image scanner (JPEG/TIFF only) - test_fastapi_endpoints.py: rewrite batch tests to use async POST→poll flow; add _poll_batch_job helper; mock LabelValidator via @patch('api.LabelValidator') - test_cli.py: remove --ocr-backend flag from subprocess call; assert on JSON structure rather than exit code (Ollama unavailable in test env = ERROR status) - Dockerfile: copy samples/ into test stage so sample-based tests don't skip All 99 tests pass (11 skipped, 0 failed).
1 parent 1f3e614 commit 86e394d

4 files changed

Lines changed: 122 additions & 63 deletions

File tree

Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ RUN pip install --user --no-cache-dir -r requirements.txt -r requirements-dev.tx
1919
FROM builder AS test
2020

2121
COPY app/ /app
22+
COPY samples/ /app/samples
2223
WORKDIR /app
2324

2425
ENV PATH=/root/.local/bin:$PATH

app/tests/test_api/test_fastapi_endpoints.py

Lines changed: 98 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,37 @@ def large_image_bytes():
119119
# Single Verify Endpoint Tests
120120
# ============================================================================
121121

122-
def test_verify_success_no_ground_truth(authenticated_client, sample_image_bytes):
122+
MOCK_COMPLIANT_RESULT = {
123+
"status": "COMPLIANT",
124+
"validation_level": "STRUCTURAL_ONLY",
125+
"extracted_fields": {
126+
"brand_name": "Ridge & Co.",
127+
"abv_numeric": 7.5,
128+
"government_warning": {"present": True},
129+
},
130+
"validation_results": {"structural": [], "accuracy": []},
131+
"violations": [],
132+
"warnings": [],
133+
"processing_time_seconds": 0.1,
134+
}
135+
136+
MOCK_FULL_VALIDATION_RESULT = {
137+
**MOCK_COMPLIANT_RESULT,
138+
"validation_level": "FULL_VALIDATION",
139+
"validation_results": {
140+
"structural": [],
141+
"accuracy": [{"field": "brand_name", "valid": True}],
142+
},
143+
}
144+
145+
146+
@patch('api.LabelValidator')
147+
def test_verify_success_no_ground_truth(mock_validator_class, authenticated_client, sample_image_bytes):
123148
"""Test single label verification without ground truth (structural only)."""
149+
mock_validator = Mock()
150+
mock_validator.validate_label.return_value = MOCK_COMPLIANT_RESULT
151+
mock_validator_class.return_value = mock_validator
152+
124153
response = authenticated_client.post(
125154
"/verify",
126155
files={"image": ("label.jpg", sample_image_bytes, "image/jpeg")}
@@ -129,33 +158,31 @@ def test_verify_success_no_ground_truth(authenticated_client, sample_image_bytes
129158
assert response.status_code == 200
130159
data = response.json()
131160

132-
# Check response structure
133161
assert "status" in data
134162
assert data["status"] in ["COMPLIANT", "NON_COMPLIANT", "PARTIAL_VALIDATION"]
135163
assert "validation_level" in data
136164
assert "extracted_fields" in data
137165
assert "validation_results" in data
138166
assert "violations" in data
139167
assert "processing_time_seconds" in data
140-
141-
# Should be structural only without ground truth
142168
assert data["validation_level"] == "STRUCTURAL_ONLY"
143169

144170

145-
def test_verify_success_with_ground_truth(authenticated_client, sample_image_bytes, sample_ground_truth_json):
171+
@patch('api.LabelValidator')
172+
def test_verify_success_with_ground_truth(mock_validator_class, authenticated_client, sample_image_bytes, sample_ground_truth_json):
146173
"""Test single label verification with ground truth (full validation)."""
174+
mock_validator = Mock()
175+
mock_validator.validate_label.return_value = MOCK_FULL_VALIDATION_RESULT
176+
mock_validator_class.return_value = mock_validator
177+
147178
response = authenticated_client.post(
148179
"/verify",
149180
files={"image": ("label.jpg", sample_image_bytes, "image/jpeg")},
150-
data={
151-
"ground_truth": sample_ground_truth_json
152-
}
181+
data={"ground_truth": sample_ground_truth_json}
153182
)
154183

155184
assert response.status_code == 200
156185
data = response.json()
157-
158-
# Should perform full validation with ground truth
159186
assert data["validation_level"] == "FULL_VALIDATION"
160187
assert "accuracy" in data["validation_results"]
161188

@@ -266,51 +293,73 @@ def test_verify_ocr_failure(mock_validator_class, authenticated_client, sample_i
266293
# Batch Verify Endpoint Tests
267294
# ============================================================================
268295

269-
def test_batch_success(authenticated_client, sample_batch_zip):
296+
def _poll_batch_job(client, job_id: str, max_polls: int = 10):
297+
"""Poll GET /verify/batch/{job_id} until the job reaches a terminal state."""
298+
for _ in range(max_polls):
299+
r = client.get(f"/verify/batch/{job_id}")
300+
assert r.status_code == 200, f"Status poll failed: {r.text}"
301+
data = r.json()
302+
if data["status"] in ("completed", "failed", "cancelled"):
303+
return data
304+
pytest.fail(f"Batch job {job_id} did not reach terminal state after {max_polls} polls")
305+
306+
307+
@patch('api.LabelValidator')
308+
def test_batch_success(mock_validator_class, authenticated_client, sample_batch_zip):
270309
"""Test batch verification with valid ZIP file."""
310+
mock_validator = Mock()
311+
mock_validator.validate_label.return_value = MOCK_COMPLIANT_RESULT
312+
mock_validator_class.return_value = mock_validator
313+
271314
response = authenticated_client.post(
272315
"/verify/batch",
273316
files={"batch_file": ("batch.zip", sample_batch_zip, "application/zip")}
274317
)
275-
318+
276319
assert response.status_code == 200
277-
data = response.json()
278-
279-
# Check response structure
320+
submit = response.json()
321+
assert "job_id" in submit
322+
assert submit["total_images"] == 3
323+
324+
data = _poll_batch_job(authenticated_client, submit["job_id"])
325+
280326
assert "results" in data
281327
assert "summary" in data
282-
283-
# Check summary
328+
284329
summary = data["summary"]
285330
assert "total" in summary
286331
assert "compliant" in summary
287332
assert "non_compliant" in summary
288333
assert "errors" in summary
289334
assert "total_processing_time_seconds" in summary
290-
291-
# Should have processed 3 images
335+
292336
assert summary["total"] == 3
293337
assert len(data["results"]) == 3
294-
295-
# Each result should have required fields
338+
296339
for result in data["results"]:
297340
assert "status" in result
298341
assert "validation_level" in result
299342
assert "image_path" in result
300343

301344

302-
def test_batch_with_ground_truth(authenticated_client, sample_batch_zip):
345+
@patch('api.LabelValidator')
346+
def test_batch_with_ground_truth(mock_validator_class, authenticated_client, sample_batch_zip):
303347
"""Test batch verification with ground truth JSON files in ZIP."""
304-
# The sample_batch_zip fixture includes JSON files
348+
mock_validator = Mock()
349+
mock_validator.validate_label.return_value = MOCK_FULL_VALIDATION_RESULT
350+
mock_validator_class.return_value = mock_validator
351+
305352
response = authenticated_client.post(
306353
"/verify/batch",
307354
files={"batch_file": ("batch.zip", sample_batch_zip, "application/zip")}
308355
)
309-
356+
310357
assert response.status_code == 200
311-
data = response.json()
312-
313-
# At least some results should have full validation
358+
submit = response.json()
359+
assert "job_id" in submit
360+
361+
data = _poll_batch_job(authenticated_client, submit["job_id"])
362+
314363
full_validations = [
315364
r for r in data["results"]
316365
if r.get("validation_level") == "FULL_VALIDATION"
@@ -499,18 +548,24 @@ def test_verify_png_rejected(authenticated_client, sample_image_bytes):
499548
assert "Invalid file type" in data["detail"]
500549

501550

502-
def test_batch_with_custom_timeout(authenticated_client, sample_batch_zip):
551+
@patch('api.LabelValidator')
552+
def test_batch_with_custom_timeout(mock_validator_class, authenticated_client, sample_batch_zip):
503553
"""Test batch verification with custom timeout."""
554+
mock_validator = Mock()
555+
mock_validator.validate_label.return_value = MOCK_COMPLIANT_RESULT
556+
mock_validator_class.return_value = mock_validator
557+
504558
response = authenticated_client.post(
505559
"/verify/batch",
506560
files={"batch_file": ("batch.zip", sample_batch_zip, "application/zip")},
507-
data={
508-
"timeout": 30
509-
}
561+
data={"timeout": 30}
510562
)
511-
563+
512564
assert response.status_code == 200
513-
data = response.json()
565+
submit = response.json()
566+
assert "job_id" in submit
567+
568+
data = _poll_batch_job(authenticated_client, submit["job_id"])
514569
assert data["summary"]["total"] == 3
515570

516571

@@ -520,7 +575,7 @@ def test_batch_partial_failure(mock_validator_class, authenticated_client, sampl
520575
# Mock validator to fail on second image
521576
mock_validator = Mock()
522577
call_count = [0]
523-
578+
524579
def side_effect(*args, **kwargs):
525580
call_count[0] += 1
526581
if call_count[0] == 2:
@@ -534,22 +589,25 @@ def side_effect(*args, **kwargs):
534589
"warnings": [],
535590
"processing_time_seconds": 1.0
536591
}
537-
592+
538593
mock_validator.validate_label.side_effect = side_effect
539594
mock_validator_class.return_value = mock_validator
540-
595+
541596
response = authenticated_client.post(
542597
"/verify/batch",
543598
files={"batch_file": ("batch.zip", sample_batch_zip, "application/zip")}
544599
)
545-
600+
546601
assert response.status_code == 200
547-
data = response.json()
548-
602+
submit = response.json()
603+
assert "job_id" in submit
604+
605+
data = _poll_batch_job(authenticated_client, submit["job_id"])
606+
549607
# Should have 3 results (2 success + 1 error)
550608
assert data["summary"]["total"] == 3
551609
assert data["summary"]["errors"] == 1
552-
610+
553611
# Check that error result has error field
554612
error_results = [r for r in data["results"] if r.get("status") == "ERROR"]
555613
assert len(error_results) == 1

app/tests/test_integration/test_cli.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,23 +25,33 @@ def test_cli_missing_file():
2525
assert result.returncode != 0
2626

2727

28-
@pytest.mark.slow
2928
def test_cli_with_sample(good_label_path):
30-
"""Test CLI with actual sample image."""
29+
"""Test CLI produces valid JSON output for a real sample image.
30+
31+
Ollama is not available in the test environment, so the result may have
32+
status='ERROR' and exit code 1, but the CLI must always emit well-formed
33+
JSON containing the required top-level fields.
34+
"""
3135
if not good_label_path.exists():
3236
pytest.skip("Golden sample not available")
33-
37+
3438
result = subprocess.run(
3539
["python3", "verify_label.py", str(good_label_path)],
3640
capture_output=True,
3741
text=True,
38-
timeout=10
42+
timeout=15,
43+
)
44+
45+
# CLI must produce output regardless of Ollama availability
46+
assert result.stdout.strip(), (
47+
f"CLI produced no stdout:\nstderr: {result.stderr}"
3948
)
40-
41-
# Should produce valid JSON
49+
4250
try:
4351
data = json.loads(result.stdout)
44-
assert 'status' in data
45-
assert 'extracted_fields' in data
4652
except json.JSONDecodeError:
47-
pytest.fail("CLI did not output valid JSON")
53+
pytest.fail(f"CLI did not output valid JSON:\n{result.stdout}")
54+
55+
assert "status" in data, "Result missing 'status' field"
56+
assert "extracted_fields" in data, "Result missing 'extracted_fields' field"
57+
assert "validation_results" in data, "Result missing 'validation_results' field"

app/verify_label.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@ def load_ground_truth(ground_truth_path: Optional[str]) -> Optional[Dict[str, An
9191

9292
def validate_single_label(image_path: str,
9393
ground_truth_path: Optional[str],
94-
ocr_backend: str,
9594
verbose: bool = False) -> Dict[str, Any]:
9695
"""Validate a single label image."""
9796
# Check if image exists
@@ -114,9 +113,9 @@ def validate_single_label(image_path: str,
114113

115114
# Initialize validator
116115
if verbose:
117-
print(f"Initializing {ocr_backend} OCR backend...", file=sys.stderr)
116+
print(f"Initializing Ollama OCR backend...", file=sys.stderr)
118117

119-
validator = LabelValidator(ocr_backend=ocr_backend)
118+
validator = LabelValidator()
120119

121120
# Validate
122121
if verbose:
@@ -132,13 +131,12 @@ def validate_single_label(image_path: str,
132131

133132
def validate_batch(directory: str,
134133
ground_truth_dir: Optional[str],
135-
ocr_backend: str,
136134
verbose: bool = False) -> List[Dict[str, Any]]:
137135
"""Validate all images in a directory."""
138136
results = []
139137

140-
# Find all image files
141-
image_extensions = {'.jpg', '.jpeg', '.png', '.tif', '.tiff'}
138+
# Find all image files (JPEG and TIFF only)
139+
image_extensions = {'.jpg', '.jpeg', '.tif', '.tiff'}
142140
image_files = []
143141

144142
dir_path = Path(directory)
@@ -170,7 +168,6 @@ def validate_batch(directory: str,
170168
result = validate_single_label(
171169
str(image_path),
172170
ground_truth_path,
173-
ocr_backend,
174171
verbose=False # Don't duplicate verbose output
175172
)
176173

@@ -241,11 +238,6 @@ def main():
241238
parser.add_argument('--ground-truth-dir', metavar='DIR',
242239
help='Directory containing ground truth JSON files for batch processing')
243240

244-
# OCR options
245-
parser.add_argument('--ocr-backend', choices=['tesseract', 'ollama'],
246-
default='tesseract',
247-
help='OCR backend: tesseract (fast, ~1s) or ollama (accurate, ~60s). Default: tesseract')
248-
249241
# Output options
250242
parser.add_argument('--verbose', '-v', action='store_true',
251243
help='Print verbose progress information to stderr')
@@ -263,7 +255,6 @@ def main():
263255
results = validate_batch(
264256
args.batch,
265257
args.ground_truth_dir,
266-
args.ocr_backend,
267258
args.verbose
268259
)
269260

@@ -278,7 +269,6 @@ def main():
278269
result = validate_single_label(
279270
args.image_path,
280271
args.ground_truth,
281-
args.ocr_backend,
282272
args.verbose
283273
)
284274

0 commit comments

Comments
 (0)