Skip to content

Benchmark

Benchmark #2

Workflow file for this run

# ============================================================
# Benchmark Workflow - 性能基準測試
# ============================================================
#
# 觸發條件:
# - 每週定時運行
# - 手動觸發
#
# ============================================================
name: Benchmark
on:
schedule:
# 每週一凌晨 2 點運行
- cron: '0 2 * * 1'
workflow_dispatch:
inputs:
benchmark_type:
description: '基準測試類型'
required: true
default: 'all'
type: choice
options:
- all
- llm
- rag
- agent
env:
PYTHON_VERSION: '3.11'
jobs:
# ==================== LLM 基準測試 ====================
benchmark-llm:
name: LLM 性能測試
runs-on: ubuntu-latest
if: github.event.inputs.benchmark_type == 'llm' || github.event.inputs.benchmark_type == 'all' || github.event_name == 'schedule'
steps:
- name: 📥 Checkout 代碼
uses: actions/checkout@v4
- name: 🐍 設置 Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pip'
- name: 📦 安裝依賴
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install openai asyncio
- name: 🏃 運行 LLM 基準測試
run: |
python benchmarks/benchmark_llm.py
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
- name: 📊 上傳測試結果
uses: actions/upload-artifact@v4
with:
name: llm-benchmark-results
path: benchmarks/results/*
retention-days: 90
# ==================== RAG 基準測試 ====================
benchmark-rag:
name: RAG 性能測試
runs-on: ubuntu-latest
if: github.event.inputs.benchmark_type == 'rag' || github.event.inputs.benchmark_type == 'all' || github.event_name == 'schedule'
steps:
- name: 📥 Checkout 代碼
uses: actions/checkout@v4
- name: 🐍 設置 Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pip'
- name: 📦 安裝依賴
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install chromadb sentence-transformers
- name: 🏃 運行 RAG 基準測試
run: |
python benchmarks/benchmark_rag.py
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
- name: 📊 上傳測試結果
uses: actions/upload-artifact@v4
with:
name: rag-benchmark-results
path: benchmarks/results/*
retention-days: 90
# ==================== Agent 基準測試 ====================
benchmark-agent:
name: Agent 性能測試
runs-on: ubuntu-latest
if: github.event.inputs.benchmark_type == 'agent' || github.event.inputs.benchmark_type == 'all' || github.event_name == 'schedule'
steps:
- name: 📥 Checkout 代碼
uses: actions/checkout@v4
- name: 🐍 設置 Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pip'
- name: 📦 安裝依賴
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: 🏃 運行 Agent 基準測試
run: |
python benchmarks/benchmark_agent.py
- name: 📊 上傳測試結果
uses: actions/upload-artifact@v4
with:
name: agent-benchmark-results
path: benchmarks/results/*
retention-days: 90
# ==================== 結果分析和報告 ====================
analyze-results:
name: 分析測試結果
runs-on: ubuntu-latest
needs: [benchmark-llm, benchmark-rag, benchmark-agent]
if: always()
steps:
- name: 📥 Checkout 代碼
uses: actions/checkout@v4
- name: 📥 下載所有測試結果
uses: actions/download-artifact@v4
with:
path: all-results/
- name: 🐍 設置 Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: 📦 安裝依賴
run: |
pip install pandas matplotlib seaborn
- name: 📊 生成分析報告
run: |
python -c "
import json
import os
from pathlib import Path
results_dir = Path('all-results')
all_results = {}
# 收集所有結果
for result_file in results_dir.rglob('*.json'):
with open(result_file) as f:
data = json.load(f)
all_results[result_file.stem] = data
# 生成摘要
print('=== 基準測試摘要 ===')
print(json.dumps(all_results, indent=2))
# 保存摘要
with open('benchmark-summary.json', 'w') as f:
json.dump(all_results, f, indent=2)
"
- name: 📊 上傳分析報告
uses: actions/upload-artifact@v4
with:
name: benchmark-analysis
path: benchmark-summary.json
retention-days: 90
- name: 💬 創建問題評論
if: github.event_name == 'schedule'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const summary = fs.readFileSync('benchmark-summary.json', 'utf8');
github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: `基準測試報告 - ${new Date().toISOString().split('T')[0]}`,
body: `## 🔥 每週基準測試報告\n\n\`\`\`json\n${summary}\n\`\`\`\n\n查看詳細結果請查看工作流運行。`,
labels: ['benchmark', 'automated']
});
# ==================== 性能趨勢跟蹤 ====================
track-performance:
name: 跟蹤性能趨勢
runs-on: ubuntu-latest
needs: analyze-results
if: github.event_name == 'schedule'
steps:
- name: 📥 Checkout 代碼
uses: actions/checkout@v4
with:
ref: gh-pages
token: ${{ secrets.GITHUB_TOKEN }}
- name: 📥 下載分析報告
uses: actions/download-artifact@v4
with:
name: benchmark-analysis
path: ./
- name: 📈 更新性能趨勢數據
run: |
# 創建趨勢數據目錄
mkdir -p performance-trends
# 添加時間戳並保存
DATE=$(date +%Y-%m-%d)
cp benchmark-summary.json performance-trends/${DATE}.json
# 生成趨勢圖表(如果需要)
- name: 📤 提交更新
run: |
git config user.name "GitHub Actions"
git config user.email "actions@github.com"
git add performance-trends/
git commit -m "Add performance benchmark results for $(date +%Y-%m-%d)" || echo "No changes"
git push
continue-on-error: true