Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ LLM이 결정하지만, 사용자 질의의 입력 신호에 대해 **첫 worker
| 사용자 질의 신호 | 첫 sub-agent | 첫 worker | prompt 위치 |
|:---|:---|:---|:---|
| 데이터 첨부(csv/xlsx/json/pdf/docx) + 분석/차트 요청 | `data_science_team` | **`data_engineer`** (ONE-pass inspect) → `data_analyst`(python_repl + 차트) | `SYSTEM_SUPERVISOR_PROMPT` `# TEAM SELECTION HINTS` + `TEAM_SUPERVISOR_PROMPT` `# DATA SCIENCE TEAM HANDOFF` |
| 이미지 첨부 | `vision_team` | `image_inspector` → `image_editor` | `SYSTEM_SUPERVISOR_PROMPT` `# TEAM SELECTION HINTS` |
| 이미지 첨부 | `vision_team` | `vision_analyst` (tools: `get_image_metadata`, `resize_image`) | `SYSTEM_SUPERVISOR_PROMPT` `# REQUIRED FIRST ROUTES` + `TEAM_SUPERVISOR_PROMPT` `# VISION TEAM HANDOFF` |
| 최신 정보·뉴스·"latest" 요청 | `research_team` | `search` → 필요 시 `web_scraper` | `RESEARCH_TEAM_SUPERVISOR_PROMPT` |
| repo 바인딩 + 코드 수정/실행 | `coding_team` | `codebase_explorer` → `implementation_engineer` → (선택) `runtime_verifier` | `SYSTEM_SUPERVISOR_PROMPT` `# CRITICAL GUIDELINES 2a/2b` |
| 명시적 보고서/슬라이드/문서 작성 | `writing_team` | `note_taker` → `doc_writer` | `SYSTEM_SUPERVISOR_PROMPT` `# CRITICAL GUIDELINES 6a` |
Expand Down
41 changes: 41 additions & 0 deletions apps/backend/tests/test_agent_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,52 @@ def test_vision_tools_with_dummy_image():
meta_result = get_image_metadata.invoke({"base64_image": dummy_base64})
assert "JPEG" in meta_result
assert "100, 100" in meta_result
# New metadata fields — locks the structured payload so prompt drift
# doesn't silently shrink what vision_analyst sees.
assert "FileSize:" in meta_result
assert "EXIF:" in meta_result
assert "Alpha:" in meta_result

resize_result = resize_image.invoke(
{"base64_image": dummy_base64, "max_width": 50, "max_height": 50}
)
assert "successfully resized to (50, 50)" in resize_result
# The summary must also report the original size and the file-size delta
# so the analyst can reason about whether the resize actually saved bytes.
assert "from (100, 100)" in resize_result
assert "->" in resize_result


def test_resize_image_applies_exif_orientation_correction():
"""EXIF Orientation=6 (rotate 90 CW) must swap width/height after resize.

Without ``ImageOps.exif_transpose`` a portrait phone photo (stored as
landscape pixels with an EXIF rotate-90 tag) is fed to the LLM rotated,
which silently degrades vision-analyst accuracy. This test pins the fix.
"""
import base64
import io
from PIL import Image
from agent_tools.vision import resize_image

# 100x200 stored pixels; EXIF says "rotate 90 CW for display" → after
# transpose the dimensions become (200, 100).
img = Image.new("RGB", (100, 200), color="green")
exif = img.getexif()
exif[0x0112] = 6 # Orientation tag: rotate 90 CW
buffered = io.BytesIO()
img.save(buffered, format="JPEG", exif=exif)
b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")

# Resize box larger than both axes so thumbnail() is a no-op and we
# observe purely the EXIF correction.
result = resize_image.invoke(
{"base64_image": b64, "max_width": 400, "max_height": 400}
)

assert "(200, 100)" in result, (
f"EXIF orientation correction missing — expected (200, 100), got: {result}"
)


def _make_runtime(tmp_path, attachments):
Expand Down
50 changes: 40 additions & 10 deletions packages/agent-tools/src/agent_tools/vision.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,39 @@
import base64
import io
from typing import Annotated, Optional
from PIL import Image

from PIL import Image, ImageOps
from langchain_core.tools import tool


def _exif_corrected(img: Image.Image) -> Image.Image:
"""Rotate/flip per the EXIF Orientation tag so portrait photos read upright.

Without this, a phone photo whose EXIF says "rotate 90 CW for display"
stays in its stored orientation and downstream LLM vision misreads the
scene (people lying down, text rotated, etc.).
"""
return ImageOps.exif_transpose(img) or img


@tool
def get_image_metadata(
base64_image: Annotated[str, "The base64 encoded image string."],
) -> str:
"""Extracts metadata such as format, size, and mode from a base64 encoded image."""
"""Extract format, size, color mode, file size, EXIF, and alpha info from a base64 image."""
try:
image_data = base64.b64decode(base64_image)
img = Image.open(io.BytesIO(image_data))
return f"Format: {img.format}, Size: {img.size}, Mode: {img.mode}"
has_exif = bool(
getattr(img, "_getexif", lambda: None)() or img.info.get("exif")
)
has_alpha = img.mode in ("RGBA", "LA") or (
img.mode == "P" and "transparency" in img.info
)
return (
f"Format: {img.format}, Size: {img.size}, Mode: {img.mode}, "
f"FileSize: {len(image_data)} bytes, EXIF: {has_exif}, Alpha: {has_alpha}"
)
except Exception as e:
return f"Error extracting metadata: {str(e)}"

Expand All @@ -28,20 +48,30 @@ def resize_image(
Optional[int], "Maximum height for the resized image. Defaults to 1024."
] = 1024,
) -> str:
"""Resizes an image while maintaining aspect ratio and returns the new base64 string."""
"""Resize an image (aspect-preserving, EXIF-orientation aware).

Returns a short factual summary: original size → new size and file-size
delta. The new base64 is intentionally not returned to avoid blowing up
the LLM context window — vision_analyst already has the original image
in its input messages; this tool exists so the analyst can confirm that
a smaller copy is feasible and reason about it.
"""
try:
image_data = base64.b64decode(base64_image)
img = Image.open(io.BytesIO(image_data))
original_size = img.size
original_fmt = img.format if img.format else "JPEG"
img = _exif_corrected(img)

# Maintain aspect ratio
img.thumbnail((max_width, max_height))

buffered = io.BytesIO()
# Save back to same format if possible, otherwise default to JPEG
fmt = img.format if img.format else "JPEG"
img.save(buffered, format=fmt)
img.save(buffered, format=original_fmt)
new_bytes = buffered.getvalue()

new_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
return f"Image successfully resized to {img.size}. New Base64 length: {len(new_base64)}"
return (
f"Image successfully resized to {img.size} from {original_size}. "
f"FileSize: {len(image_data)} -> {len(new_bytes)} bytes."
)
except Exception as e:
return f"Error resizing image: {str(e)}"
Loading