Benchmark #2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # ============================================================ | |
| # Benchmark Workflow - 性能基準測試 | |
| # ============================================================ | |
| # | |
| # 觸發條件: | |
| # - 每週定時運行 | |
| # - 手動觸發 | |
| # | |
| # ============================================================ | |
| name: Benchmark | |
| on: | |
| schedule: | |
| # 每週一凌晨 2 點運行 | |
| - cron: '0 2 * * 1' | |
| workflow_dispatch: | |
| inputs: | |
| benchmark_type: | |
| description: '基準測試類型' | |
| required: true | |
| default: 'all' | |
| type: choice | |
| options: | |
| - all | |
| - llm | |
| - rag | |
| - agent | |
| env: | |
| PYTHON_VERSION: '3.11' | |
| jobs: | |
| # ==================== LLM 基準測試 ==================== | |
| benchmark-llm: | |
| name: LLM 性能測試 | |
| runs-on: ubuntu-latest | |
| if: github.event.inputs.benchmark_type == 'llm' || github.event.inputs.benchmark_type == 'all' || github.event_name == 'schedule' | |
| steps: | |
| - name: 📥 Checkout 代碼 | |
| uses: actions/checkout@v4 | |
| - name: 🐍 設置 Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ env.PYTHON_VERSION }} | |
| cache: 'pip' | |
| - name: 📦 安裝依賴 | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -r requirements.txt | |
| pip install openai asyncio | |
| - name: 🏃 運行 LLM 基準測試 | |
| run: | | |
| python benchmarks/benchmark_llm.py | |
| env: | |
| OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
| - name: 📊 上傳測試結果 | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: llm-benchmark-results | |
| path: benchmarks/results/* | |
| retention-days: 90 | |
| # ==================== RAG 基準測試 ==================== | |
| benchmark-rag: | |
| name: RAG 性能測試 | |
| runs-on: ubuntu-latest | |
| if: github.event.inputs.benchmark_type == 'rag' || github.event.inputs.benchmark_type == 'all' || github.event_name == 'schedule' | |
| steps: | |
| - name: 📥 Checkout 代碼 | |
| uses: actions/checkout@v4 | |
| - name: 🐍 設置 Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ env.PYTHON_VERSION }} | |
| cache: 'pip' | |
| - name: 📦 安裝依賴 | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -r requirements.txt | |
| pip install chromadb sentence-transformers | |
| - name: 🏃 運行 RAG 基準測試 | |
| run: | | |
| python benchmarks/benchmark_rag.py | |
| env: | |
| OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
| - name: 📊 上傳測試結果 | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: rag-benchmark-results | |
| path: benchmarks/results/* | |
| retention-days: 90 | |
| # ==================== Agent 基準測試 ==================== | |
| benchmark-agent: | |
| name: Agent 性能測試 | |
| runs-on: ubuntu-latest | |
| if: github.event.inputs.benchmark_type == 'agent' || github.event.inputs.benchmark_type == 'all' || github.event_name == 'schedule' | |
| steps: | |
| - name: 📥 Checkout 代碼 | |
| uses: actions/checkout@v4 | |
| - name: 🐍 設置 Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ env.PYTHON_VERSION }} | |
| cache: 'pip' | |
| - name: 📦 安裝依賴 | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install -r requirements.txt | |
| - name: 🏃 運行 Agent 基準測試 | |
| run: | | |
| python benchmarks/benchmark_agent.py | |
| - name: 📊 上傳測試結果 | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: agent-benchmark-results | |
| path: benchmarks/results/* | |
| retention-days: 90 | |
| # ==================== 結果分析和報告 ==================== | |
| analyze-results: | |
| name: 分析測試結果 | |
| runs-on: ubuntu-latest | |
| needs: [benchmark-llm, benchmark-rag, benchmark-agent] | |
| if: always() | |
| steps: | |
| - name: 📥 Checkout 代碼 | |
| uses: actions/checkout@v4 | |
| - name: 📥 下載所有測試結果 | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: all-results/ | |
| - name: 🐍 設置 Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ env.PYTHON_VERSION }} | |
| - name: 📦 安裝依賴 | |
| run: | | |
| pip install pandas matplotlib seaborn | |
| - name: 📊 生成分析報告 | |
| run: | | |
| python -c " | |
| import json | |
| import os | |
| from pathlib import Path | |
| results_dir = Path('all-results') | |
| all_results = {} | |
| # 收集所有結果 | |
| for result_file in results_dir.rglob('*.json'): | |
| with open(result_file) as f: | |
| data = json.load(f) | |
| all_results[result_file.stem] = data | |
| # 生成摘要 | |
| print('=== 基準測試摘要 ===') | |
| print(json.dumps(all_results, indent=2)) | |
| # 保存摘要 | |
| with open('benchmark-summary.json', 'w') as f: | |
| json.dump(all_results, f, indent=2) | |
| " | |
| - name: 📊 上傳分析報告 | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-analysis | |
| path: benchmark-summary.json | |
| retention-days: 90 | |
| - name: 💬 創建問題評論 | |
| if: github.event_name == 'schedule' | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const fs = require('fs'); | |
| const summary = fs.readFileSync('benchmark-summary.json', 'utf8'); | |
| github.rest.issues.create({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| title: `基準測試報告 - ${new Date().toISOString().split('T')[0]}`, | |
| body: `## 🔥 每週基準測試報告\n\n\`\`\`json\n${summary}\n\`\`\`\n\n查看詳細結果請查看工作流運行。`, | |
| labels: ['benchmark', 'automated'] | |
| }); | |
| # ==================== 性能趨勢跟蹤 ==================== | |
| track-performance: | |
| name: 跟蹤性能趨勢 | |
| runs-on: ubuntu-latest | |
| needs: analyze-results | |
| if: github.event_name == 'schedule' | |
| steps: | |
| - name: 📥 Checkout 代碼 | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: gh-pages | |
| token: ${{ secrets.GITHUB_TOKEN }} | |
| - name: 📥 下載分析報告 | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: benchmark-analysis | |
| path: ./ | |
| - name: 📈 更新性能趨勢數據 | |
| run: | | |
| # 創建趨勢數據目錄 | |
| mkdir -p performance-trends | |
| # 添加時間戳並保存 | |
| DATE=$(date +%Y-%m-%d) | |
| cp benchmark-summary.json performance-trends/${DATE}.json | |
| # 生成趨勢圖表(如果需要) | |
| - name: 📤 提交更新 | |
| run: | | |
| git config user.name "GitHub Actions" | |
| git config user.email "actions@github.com" | |
| git add performance-trends/ | |
| git commit -m "Add performance benchmark results for $(date +%Y-%m-%d)" || echo "No changes" | |
| git push | |
| continue-on-error: true |