Benchmark #2

Workflow file for this run

.github/workflows/benchmark.yml at d39a7b2

	# ============================================================
	# Benchmark Workflow - 性能基準測試
	# ============================================================
	#
	# 觸發條件：
	# - 每週定時運行
	# - 手動觸發
	#
	# ============================================================

	name: Benchmark

	on:
	schedule:
	# 每週一凌晨 2 點運行
	- cron: '0 2 * * 1'
	workflow_dispatch:
	inputs:
	benchmark_type:
	description: '基準測試類型'
	required: true
	default: 'all'
	type: choice
	options:
	- all
	- llm
	- rag
	- agent

	env:
	PYTHON_VERSION: '3.11'

	jobs:
	# ==================== LLM 基準測試 ====================
	benchmark-llm:
	name: LLM 性能測試
	runs-on: ubuntu-latest
	if: github.event.inputs.benchmark_type == 'llm' \|\| github.event.inputs.benchmark_type == 'all' \|\| github.event_name == 'schedule'

	steps:
	- name: 📥 Checkout 代碼
	uses: actions/checkout@v4

	- name: 🐍 設置 Python
	uses: actions/setup-python@v5
	with:
	python-version: ${{ env.PYTHON_VERSION }}
	cache: 'pip'

	- name: 📦 安裝依賴
	run: \|
	python -m pip install --upgrade pip
	pip install -r requirements.txt
	pip install openai asyncio

	- name: 🏃 運行 LLM 基準測試
	run: \|
	python benchmarks/benchmark_llm.py
	env:
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

	- name: 📊 上傳測試結果
	uses: actions/upload-artifact@v4
	with:
	name: llm-benchmark-results
	path: benchmarks/results/*
	retention-days: 90

	# ==================== RAG 基準測試 ====================
	benchmark-rag:
	name: RAG 性能測試
	runs-on: ubuntu-latest
	if: github.event.inputs.benchmark_type == 'rag' \|\| github.event.inputs.benchmark_type == 'all' \|\| github.event_name == 'schedule'

	steps:
	- name: 📥 Checkout 代碼
	uses: actions/checkout@v4

	- name: 🐍 設置 Python
	uses: actions/setup-python@v5
	with:
	python-version: ${{ env.PYTHON_VERSION }}
	cache: 'pip'

	- name: 📦 安裝依賴
	run: \|
	python -m pip install --upgrade pip
	pip install -r requirements.txt
	pip install chromadb sentence-transformers

	- name: 🏃 運行 RAG 基準測試
	run: \|
	python benchmarks/benchmark_rag.py
	env:
	OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

	- name: 📊 上傳測試結果
	uses: actions/upload-artifact@v4
	with:
	name: rag-benchmark-results
	path: benchmarks/results/*
	retention-days: 90

	# ==================== Agent 基準測試 ====================
	benchmark-agent:
	name: Agent 性能測試
	runs-on: ubuntu-latest
	if: github.event.inputs.benchmark_type == 'agent' \|\| github.event.inputs.benchmark_type == 'all' \|\| github.event_name == 'schedule'

	steps:
	- name: 📥 Checkout 代碼
	uses: actions/checkout@v4

	- name: 🐍 設置 Python
	uses: actions/setup-python@v5
	with:
	python-version: ${{ env.PYTHON_VERSION }}
	cache: 'pip'

	- name: 📦 安裝依賴
	run: \|
	python -m pip install --upgrade pip
	pip install -r requirements.txt

	- name: 🏃 運行 Agent 基準測試
	run: \|
	python benchmarks/benchmark_agent.py

	- name: 📊 上傳測試結果
	uses: actions/upload-artifact@v4
	with:
	name: agent-benchmark-results
	path: benchmarks/results/*
	retention-days: 90

	# ==================== 結果分析和報告 ====================
	analyze-results:
	name: 分析測試結果
	runs-on: ubuntu-latest
	needs: [benchmark-llm, benchmark-rag, benchmark-agent]
	if: always()

	steps:
	- name: 📥 Checkout 代碼
	uses: actions/checkout@v4

	- name: 📥 下載所有測試結果
	uses: actions/download-artifact@v4
	with:
	path: all-results/

	- name: 🐍 設置 Python
	uses: actions/setup-python@v5
	with:
	python-version: ${{ env.PYTHON_VERSION }}

	- name: 📦 安裝依賴
	run: \|
	pip install pandas matplotlib seaborn

	- name: 📊 生成分析報告
	run: \|
	python -c "
	import json
	import os
	from pathlib import Path

	results_dir = Path('all-results')
	all_results = {}

	# 收集所有結果
	for result_file in results_dir.rglob('*.json'):
	with open(result_file) as f:
	data = json.load(f)
	all_results[result_file.stem] = data

	# 生成摘要
	print('=== 基準測試摘要 ===')
	print(json.dumps(all_results, indent=2))

	# 保存摘要
	with open('benchmark-summary.json', 'w') as f:
	json.dump(all_results, f, indent=2)
	"

	- name: 📊 上傳分析報告
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-analysis
	path: benchmark-summary.json
	retention-days: 90

	- name: 💬 創建問題評論
	if: github.event_name == 'schedule'
	uses: actions/github-script@v7
	with:
	script: \|
	const fs = require('fs');
	const summary = fs.readFileSync('benchmark-summary.json', 'utf8');

	github.rest.issues.create({
	owner: context.repo.owner,
	repo: context.repo.repo,
	title: `基準測試報告 - ${new Date().toISOString().split('T')[0]}`,
	body: `## 🔥 每週基準測試報告\n\n\`\`\`json\n${summary}\n\`\`\`\n\n查看詳細結果請查看工作流運行。`,
	labels: ['benchmark', 'automated']
	});

	# ==================== 性能趨勢跟蹤 ====================
	track-performance:
	name: 跟蹤性能趨勢
	runs-on: ubuntu-latest
	needs: analyze-results
	if: github.event_name == 'schedule'

	steps:
	- name: 📥 Checkout 代碼
	uses: actions/checkout@v4
	with:
	ref: gh-pages
	token: ${{ secrets.GITHUB_TOKEN }}

	- name: 📥 下載分析報告
	uses: actions/download-artifact@v4
	with:
	name: benchmark-analysis
	path: ./

	- name: 📈 更新性能趨勢數據
	run: \|
	# 創建趨勢數據目錄
	mkdir -p performance-trends

	# 添加時間戳並保存
	DATE=$(date +%Y-%m-%d)
	cp benchmark-summary.json performance-trends/${DATE}.json

	# 生成趨勢圖表（如果需要）

	- name: 📤 提交更新
	run: \|
	git config user.name "GitHub Actions"
	git config user.email "actions@github.com"
	git add performance-trends/
	git commit -m "Add performance benchmark results for $(date +%Y-%m-%d)" \|\| echo "No changes"
	git push
	continue-on-error: true

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Benchmark #2

Workflow file

Benchmark #2

Uh oh!

Workflow file for this run