Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[pytest]
addopts = --import-mode=importlib
testpaths =
services/gateway-api/tests
services/evaluator/tests
9 changes: 9 additions & 0 deletions scripts/test-pytest.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env bash
set -euo pipefail

ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"

cd "$ROOT_DIR"

uv run --project services/gateway-api --group dev pytest services/gateway-api/tests "$@"
uv run --project services/evaluator --group dev pytest services/evaluator/tests "$@"
19 changes: 12 additions & 7 deletions services/evaluator/app/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from pydantic_settings import BaseSettings
from functools import lru_cache

from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
Expand All @@ -7,11 +9,11 @@ class Settings(BaseSettings):
log_level: str = "INFO"

# DB
database_url: str
database_url: str | None = None

# LLM (Judge 용)
llm_api_base_url: str | None = None
llm_api_key: str
llm_api_key: str | None = None
openai_model_judge: str = "gpt-5-mini"

# Batch Evaluation Scheduler
Expand All @@ -33,9 +35,12 @@ class Settings(BaseSettings):
smtp_from_email: str | None = None # 발신자 이메일
smtp_to_emails: str | None = None # 수신자 이메일들 (쉼표로 구분)

class Config:
env_file = ".env"
env_file_encoding = "utf-8"
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
)


settings = Settings()
@lru_cache
def get_settings() -> Settings:
return Settings()
23 changes: 18 additions & 5 deletions services/evaluator/app/db.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,29 @@
from functools import lru_cache

from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, declarative_base

from .config import settings

engine = create_engine(settings.database_url)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
from .config import get_settings

Base = declarative_base()


@lru_cache
def get_engine():
settings = get_settings()
if not settings.database_url:
raise RuntimeError("DATABASE_URL is required to initialize the database engine.")

return create_engine(settings.database_url)


@lru_cache
def get_session_factory():
return sessionmaker(autocommit=False, autoflush=False, bind=get_engine())


def get_db():
db = SessionLocal()
db = get_session_factory()()
try:
yield db
finally:
Expand Down
22 changes: 17 additions & 5 deletions services/evaluator/app/llm_judge.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import textwrap
import time
from functools import lru_cache
from typing import TypedDict

from fastapi import HTTPException
Expand All @@ -12,7 +13,7 @@
AuthenticationError,
)

from .config import settings
from .config import get_settings
from .models import LLMLog


Expand All @@ -24,10 +25,19 @@ class EvaluationResult(TypedDict):
raw_judge_response: str


client = OpenAI(
api_key=settings.llm_api_key,
base_url=settings.llm_api_base_url or None,
)
@lru_cache
def get_client() -> OpenAI:
settings = get_settings()
if not settings.llm_api_key:
raise HTTPException(
status_code=500,
detail="LLM_API_KEY is required for LLM judge evaluation.",
)

return OpenAI(
api_key=settings.llm_api_key,
base_url=settings.llm_api_base_url or None,
)


def build_evaluation_prompt(log: LLMLog) -> str:
Expand Down Expand Up @@ -110,7 +120,9 @@ def run_judge(log: LLMLog) -> EvaluationResult:
"""
하나의 LLMLog에 대해 Judge LLM을 호출하고 EvaluationResult 반환.
"""
settings = get_settings()
prompt = build_evaluation_prompt(log)
client = get_client()

try:
start = time.perf_counter()
Expand Down
60 changes: 34 additions & 26 deletions services/evaluator/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@
import time
from prometheus_client import generate_latest, CONTENT_TYPE_LATEST

from .db import Base, engine, get_db
from .db import Base, get_db, get_engine
from .models import LLMLog, LLMEvaluation
from .rules import basic_rule_evaluate
from .llm_judge import run_judge
from .config import settings
from .config import get_settings
from .scheduler import start_scheduler, stop_scheduler
from .utils import get_pending_logs
from .metrics import record_evaluation, update_pending_logs_count
from .notifier import send_low_quality_alert

settings = get_settings()

# 로깅 설정
logging.basicConfig(
level=getattr(logging, settings.log_level.upper()),
Expand All @@ -24,29 +26,33 @@
logger = logging.getLogger(__name__)


@asynccontextmanager
async def lifespan(app: FastAPI):
"""
FastAPI 앱의 수명 주기 관리.
시작 시 테이블 생성 및 스케줄러 시작, 종료 시 스케줄러 중지.
"""
# Startup
logger.info("Starting Evaluator Service...")
Base.metadata.create_all(bind=engine)
start_scheduler()
yield
# Shutdown
logger.info("Stopping Evaluator Service...")
stop_scheduler()


# FastAPI 앱 생성
app = FastAPI(
title="LLM Quality Observer - Evaluator Service",
description="룰 기반 및 LLM-as-a-judge 방식으로 LLM 응답 품질을 평가하는 서비스",
version="1.0.0",
lifespan=lifespan,
)
def create_app(*, testing: bool = False) -> FastAPI:
@asynccontextmanager
async def lifespan(app: FastAPI):
"""
FastAPI 앱의 수명 주기 관리.
시작 시 테이블 생성 및 스케줄러 시작, 종료 시 스케줄러 중지.
"""
if not getattr(app.state, "testing", False):
logger.info("Starting Evaluator Service...")
Base.metadata.create_all(bind=get_engine())
start_scheduler()
yield
if not getattr(app.state, "testing", False):
logger.info("Stopping Evaluator Service...")
stop_scheduler()

app = FastAPI(
title="LLM Quality Observer - Evaluator Service",
description="룰 기반 및 LLM-as-a-judge 방식으로 LLM 응답 품질을 평가하는 서비스",
version="1.0.0",
lifespan=lifespan,
)
app.state.testing = testing
return app


app = create_app()


@app.get("/health")
Expand All @@ -57,7 +63,7 @@ def health_check():
"""
return {
"status": "ok",
"env": settings.app_env,
"env": get_settings().app_env,
}


Expand Down Expand Up @@ -93,6 +99,7 @@ def evaluate_once(
pending_logs = get_pending_logs(db, limit=limit)

if not pending_logs:
settings = get_settings()
return {
"evaluated": 0,
"judge_type": judge_type,
Expand All @@ -102,6 +109,7 @@ def evaluate_once(
# 2. 각 로그에 대해 평가 수행
evaluated_count = 0
judge_model_name = ""
settings = get_settings()

for log in pending_logs:
try:
Expand Down
10 changes: 9 additions & 1 deletion services/evaluator/app/notifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

from .config import settings
from .config import get_settings
from .models import LLMLog, LLMEvaluation
from .metrics import record_notification, record_low_quality_alert

Expand All @@ -29,6 +29,8 @@ def send_slack_notification(message: str, notification_type: str = "alert") -> b
Returns:
bool: 전송 성공 여부
"""
settings = get_settings()

if not settings.slack_webhook_url:
logger.debug("Slack webhook URL이 설정되지 않았습니다.")
return False
Expand Down Expand Up @@ -61,6 +63,8 @@ def send_discord_notification(message: str, notification_type: str = "alert") ->
Returns:
bool: 전송 성공 여부
"""
settings = get_settings()

if not settings.discord_webhook_url:
logger.debug("Discord webhook URL이 설정되지 않았습니다.")
return False
Expand Down Expand Up @@ -95,6 +99,8 @@ async def send_email_notification(subject: str, message: str, notification_type:
Returns:
bool: 전송 성공 여부
"""
settings = get_settings()

if not all([
settings.smtp_host,
settings.smtp_username,
Expand Down Expand Up @@ -154,6 +160,8 @@ def send_low_quality_alert(log: LLMLog, evaluation: LLMEvaluation):
log: LLM 로그
evaluation: 평가 결과
"""
settings = get_settings()

if evaluation.overall_score >= settings.notification_score_threshold:
# 임계값 이상이면 알림 안 보냄
return
Expand Down
8 changes: 5 additions & 3 deletions services/evaluator/app/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from apscheduler.triggers.interval import IntervalTrigger
from sqlalchemy.orm import Session

from .config import settings
from .db import SessionLocal
from .config import get_settings
from .db import get_session_factory
from .utils import get_pending_logs
from .models import LLMLog, LLMEvaluation
from .rules import basic_rule_evaluate
Expand All @@ -36,7 +36,8 @@ def run_batch_evaluation():
"""
logger.info("Starting batch evaluation...")

db: Session = SessionLocal()
settings = get_settings()
db: Session = get_session_factory()()
try:
# 1. 평가 대기 중인 로그 가져오기
pending_logs = get_pending_logs(
Expand Down Expand Up @@ -148,6 +149,7 @@ def start_scheduler():
스케줄러를 시작합니다.
"""
global scheduler
settings = get_settings()

if not settings.enable_auto_evaluation:
logger.info("Auto evaluation is disabled")
Expand Down
5 changes: 2 additions & 3 deletions services/evaluator/app/schemas.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from typing import Optional
from pydantic import BaseModel, Field
from pydantic import BaseModel, ConfigDict, Field


class EvaluationResult(BaseModel):
Expand All @@ -14,5 +14,4 @@ class EvaluationResult(BaseModel):
judge_model: str = Field(default="rule-basic-v1", description="평가에 사용된 모델/룰 버전")
comment: Optional[str] = Field(default=None, description="평가 근거 또는 코멘트")

class Config:
from_attributes = True # Pydantic v2에서 ORM 모드 활성화
model_config = ConfigDict(from_attributes=True)
5 changes: 5 additions & 0 deletions services/evaluator/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,10 @@ dependencies = [
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"

[dependency-groups]
dev = [
"pytest>=8.0",
]

[tool.setuptools]
packages = ["app"]
11 changes: 11 additions & 0 deletions services/evaluator/tests/test_health.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,20 @@
Health check endpoint tests
"""

import sys
from pathlib import Path

from fastapi.testclient import TestClient

SERVICE_ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(SERVICE_ROOT))

for module_name in [name for name in list(sys.modules) if name == "app" or name.startswith("app.")]:
sys.modules.pop(module_name)

from app.main import app

app.state.testing = True
client = TestClient(app)


Expand Down
17 changes: 11 additions & 6 deletions services/gateway-api/app/config.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from pydantic_settings import BaseSettings
from functools import lru_cache

from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
app_env: str = "local"

database_url: str
database_url: str | None = None
openai_model_main: str = "gpt-5-mini"

llm_api_base_url: str | None = None
Expand All @@ -16,9 +18,12 @@ class Settings(BaseSettings):

log_level: str = "INFO"

class Config:
env_file = ".env"
env_file_encoding = "utf-8"
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
)


settings = Settings()
@lru_cache
def get_settings() -> Settings:
return Settings()
Loading
Loading