From c82230342d68b265ba4373004988e8d5a6ca915f Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 31 Dec 2025 14:11:42 +0000 Subject: [PATCH 1/3] =?UTF-8?q?feat:=20=E5=B0=88=E6=A1=88=E5=93=81?= =?UTF-8?q?=E8=B3=AA=E5=84=AA=E5=8C=96=E8=88=87=E5=85=A7=E5=AE=B9=E6=93=B4?= =?UTF-8?q?=E5=85=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## 安全性修復 - 修復 8 個檔案中的 eval() 安全性問題,改用 ast 模組進行安全計算 - 涵蓋 MCP、Agent、RAG 等模組的計算器工具 ## 內容更新 - 更新 10+ 個檔案中過時的 gpt-3.5-turbo 參考至 gpt-4o-mini - 更新價格表至 2025 年版本 ## 新增內容 - 企業級 MLOps 完整指南:成熟度模型、Feature Store、Kubeflow Pipeline、藍綠部署 - 知識圖譜應用指南:GraphRAG、多跳推理、知識更新策略 - AI 倫理與法規指南:EU AI Act、公平性分析、隱私保護、合規框架 ## 新增實戰專案 - VLM 圖像分析系統:多 VLM 提供者支援、文件分析、圖表數據提取 --- ...25\350\246\217\346\214\207\345\215\227.md" | 586 ++++++++++++++ ...14\346\225\264\346\214\207\345\215\227.md" | 743 +++++++++++++++++ .../README.md" | 23 +- ...50\210\207_Agentic_Workflows_2024-2025.md" | 31 +- .../01_react_agent/react_agent_basic.py" | 21 +- .../examples/4_agent_collaboration.py" | 26 +- .../examples/02_crewai_multi_agent.py" | 23 +- ...11\347\224\250\346\214\207\345\215\227.md" | 662 +++++++++++++++ .../README.md" | 23 +- .../VLM-Image-Analyzer/README.md" | 756 ++++++++++++++++++ .../01-Prompt-Engineering.md" | 4 +- .../02-ChatGPT-API-Systems.md" | 8 +- .../03-LangChain-Basics.md" | 25 +- .../04-LangChain-Chat-Data.md" | 4 +- .../05-LangChain-Agents.md" | 35 +- .../07-Advanced-RAG.md" | 8 +- .../12-Gradio-Applications.md" | 4 +- exercises/agent/01-tool-use.md | 2 +- .../prompt-engineering/01-basic-prompts.md | 2 +- 19 files changed, 2941 insertions(+), 45 deletions(-) create mode 100644 "2.\346\267\261\345\205\245LLM\346\250\241\345\236\213\345\267\245\347\250\213\350\210\207LLM\351\201\213\347\266\255/10.\351\200\262\351\232\216\350\251\261\351\241\214/AI\345\200\253\347\220\206\350\210\207\346\263\225\350\246\217\346\214\207\345\215\227.md" create mode 100644 "2.\346\267\261\345\205\245LLM\346\250\241\345\236\213\345\267\245\347\250\213\350\210\207LLM\351\201\213\347\266\255/10.\351\200\262\351\232\216\350\251\261\351\241\214/\344\274\201\346\245\255\347\264\232MLOps\345\256\214\346\225\264\346\214\207\345\215\227.md" create mode 100644 "5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/4.RAG\350\210\207\346\252\242\347\264\242/\347\237\245\350\255\230\345\234\226\350\255\234\346\207\211\347\224\250\346\214\207\345\215\227.md" create mode 100644 "5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/\345\257\246\346\210\260\351\240\205\347\233\256/VLM-Image-Analyzer/README.md" diff --git "a/2.\346\267\261\345\205\245LLM\346\250\241\345\236\213\345\267\245\347\250\213\350\210\207LLM\351\201\213\347\266\255/10.\351\200\262\351\232\216\350\251\261\351\241\214/AI\345\200\253\347\220\206\350\210\207\346\263\225\350\246\217\346\214\207\345\215\227.md" "b/2.\346\267\261\345\205\245LLM\346\250\241\345\236\213\345\267\245\347\250\213\350\210\207LLM\351\201\213\347\266\255/10.\351\200\262\351\232\216\350\251\261\351\241\214/AI\345\200\253\347\220\206\350\210\207\346\263\225\350\246\217\346\214\207\345\215\227.md" new file mode 100644 index 0000000..8cc6159 --- /dev/null +++ "b/2.\346\267\261\345\205\245LLM\346\250\241\345\236\213\345\267\245\347\250\213\350\210\207LLM\351\201\213\347\266\255/10.\351\200\262\351\232\216\350\251\261\351\241\214/AI\345\200\253\347\220\206\350\210\207\346\263\225\350\246\217\346\214\207\345\215\227.md" @@ -0,0 +1,586 @@ +# AI 倫理與法規完整指南 + +> 最後更新:2025-01 + +## 📋 概述 + +隨著 AI 技術的快速發展,AI 倫理和法規遵循變得越來越重要。本指南涵蓋主要法規框架、倫理原則和實務合規策略。 + +## 🌍 全球 AI 法規概覽 + +### 歐盟 AI 法案 (EU AI Act) + +``` +風險分級管理: + +┌─────────────────────────────────────────┐ +│ 不可接受的風險 (Prohibited) │ +│ • 社會信用評分系統 │ +│ • 公共場所即時生物識別 │ +│ • 操縱性 AI 系統 │ +├─────────────────────────────────────────┤ +│ 高風險 (High-Risk) │ +│ • 醫療診斷系統 │ +│ • 信用評估系統 │ +│ • 招聘與人力資源系統 │ +│ • 教育評估系統 │ +│ 要求:透明度、人工監督、風險評估 │ +├─────────────────────────────────────────┤ +│ 有限風險 (Limited Risk) │ +│ • 聊天機器人 │ +│ • 情感識別系統 │ +│ 要求:透明度揭露 │ +├─────────────────────────────────────────┤ +│ 最小風險 (Minimal Risk) │ +│ • 垃圾郵件過濾 │ +│ • 遊戲 AI │ +│ 要求:無特殊要求 │ +└─────────────────────────────────────────┘ +``` + +### 中國 AI 法規 + +``` +主要法規: +1. 《生成式人工智能服務管理暫行辦法》 + - 適用於向公眾提供生成式AI服務 + - 要求內容安全審核 + - 演算法備案制度 + +2. 《互聯網信息服務深度合成管理規定》 + - 深度偽造技術規範 + - 標記要求 + +3. 《互聯網信息服務算法推薦管理規定》 + - 演算法透明度 + - 用戶權益保護 +``` + +### 美國 AI 政策 + +``` +主要框架: +1. AI 行政命令 (Executive Order on AI) + - 安全與安保要求 + - 報告義務 + +2. NIST AI 風險管理框架 + - 自願性指導 + - 風險評估方法 + +3. 各州法律 + - 加州 CCPA 延伸 + - 紐約 AI 招聘法 +``` + +## 🎯 核心倫理原則 + +### 1. 公平性 (Fairness) + +```python +from sklearn.metrics import confusion_matrix +import numpy as np + +class FairnessAnalyzer: + """公平性分析器""" + + def __init__(self, predictions, labels, sensitive_attribute): + self.predictions = predictions + self.labels = labels + self.sensitive = sensitive_attribute + + def demographic_parity(self) -> dict: + """人口統計平等:各群體的正預測率應相近""" + groups = np.unique(self.sensitive) + rates = {} + + for group in groups: + mask = self.sensitive == group + positive_rate = np.mean(self.predictions[mask]) + rates[group] = positive_rate + + # 計算差異 + max_diff = max(rates.values()) - min(rates.values()) + + return { + "rates": rates, + "max_difference": max_diff, + "is_fair": max_diff < 0.1 # 10% 閾值 + } + + def equalized_odds(self) -> dict: + """均等機會:各群體的 TPR 和 FPR 應相近""" + groups = np.unique(self.sensitive) + metrics = {} + + for group in groups: + mask = self.sensitive == group + tn, fp, fn, tp = confusion_matrix( + self.labels[mask], + self.predictions[mask] + ).ravel() + + tpr = tp / (tp + fn) if (tp + fn) > 0 else 0 + fpr = fp / (fp + tn) if (fp + tn) > 0 else 0 + + metrics[group] = {"TPR": tpr, "FPR": fpr} + + return metrics + + def individual_fairness(self, similarity_matrix) -> float: + """個體公平:相似個體應得到相似對待""" + n = len(self.predictions) + violations = 0 + + for i in range(n): + for j in range(i + 1, n): + # 如果相似但預測不同 + if similarity_matrix[i, j] > 0.9: # 相似閾值 + if self.predictions[i] != self.predictions[j]: + violations += 1 + + return 1 - (violations / (n * (n - 1) / 2)) + + def generate_report(self) -> str: + """生成公平性報告""" + dp = self.demographic_parity() + eo = self.equalized_odds() + + report = ["=== AI 公平性分析報告 ===\n"] + + report.append("\n📊 人口統計平等分析:") + for group, rate in dp["rates"].items(): + report.append(f" 群體 {group}: 正預測率 = {rate:.2%}") + report.append(f" 最大差異: {dp['max_difference']:.2%}") + report.append(f" 是否公平: {'✅ 是' if dp['is_fair'] else '❌ 否'}") + + report.append("\n📊 均等機會分析:") + for group, metrics in eo.items(): + report.append(f" 群體 {group}: TPR={metrics['TPR']:.2%}, FPR={metrics['FPR']:.2%}") + + return "\n".join(report) + + +# 使用範例 +analyzer = FairnessAnalyzer( + predictions=model_predictions, + labels=true_labels, + sensitive_attribute=gender_data +) + +print(analyzer.generate_report()) +``` + +### 2. 透明度與可解釋性 (Transparency & Explainability) + +```python +import shap +from lime import lime_tabular + +class ExplainabilityTools: + """可解釋性工具集""" + + def __init__(self, model, X_train): + self.model = model + self.X_train = X_train + + def shap_explanation(self, X_test): + """SHAP 解釋""" + explainer = shap.TreeExplainer(self.model) + shap_values = explainer.shap_values(X_test) + + return { + "shap_values": shap_values, + "expected_value": explainer.expected_value, + "feature_importance": np.abs(shap_values).mean(axis=0) + } + + def lime_explanation(self, instance, feature_names): + """LIME 局部解釋""" + explainer = lime_tabular.LimeTabularExplainer( + self.X_train, + feature_names=feature_names, + mode="classification" + ) + + exp = explainer.explain_instance( + instance, + self.model.predict_proba, + num_features=10 + ) + + return { + "local_explanation": exp.as_list(), + "prediction": exp.predict_proba + } + + def generate_explanation_text(self, instance, feature_names) -> str: + """生成可讀的解釋文本""" + lime_exp = self.lime_explanation(instance, feature_names) + + text = ["=== 模型決策解釋 ===\n"] + text.append(f"預測結果:{lime_exp['prediction']}\n") + text.append("\n影響因素(按重要性排序):") + + for feature, weight in lime_exp["local_explanation"][:5]: + direction = "正向" if weight > 0 else "負向" + text.append(f" • {feature}: {direction}影響 ({abs(weight):.3f})") + + return "\n".join(text) + + +# 使用範例 +explainer = ExplainabilityTools(model, X_train) +explanation = explainer.generate_explanation_text( + test_instance, + feature_names=["年齡", "收入", "信用歷史", "負債比率"] +) +print(explanation) +``` + +### 3. 隱私保護 (Privacy) + +```python +from typing import List, Dict +import hashlib +import numpy as np + +class PrivacyProtection: + """隱私保護工具""" + + @staticmethod + def k_anonymity(data, quasi_identifiers: List[str], k: int) -> bool: + """檢查是否滿足 k-匿名性""" + groups = data.groupby(quasi_identifiers).size() + return all(groups >= k) + + @staticmethod + def differential_privacy_noise( + data: np.ndarray, + epsilon: float, + sensitivity: float + ) -> np.ndarray: + """添加差分隱私噪音(拉普拉斯機制)""" + noise = np.random.laplace( + loc=0, + scale=sensitivity / epsilon, + size=data.shape + ) + return data + noise + + @staticmethod + def pseudonymize(data: str, salt: str) -> str: + """假名化處理""" + return hashlib.sha256((data + salt).encode()).hexdigest()[:16] + + @staticmethod + def data_minimization_check( + required_fields: List[str], + collected_fields: List[str] + ) -> Dict: + """資料最小化檢查""" + unnecessary = set(collected_fields) - set(required_fields) + + return { + "required": required_fields, + "collected": collected_fields, + "unnecessary": list(unnecessary), + "is_compliant": len(unnecessary) == 0 + } + + +class PIIDetector: + """個人可識別資訊偵測器""" + + PII_PATTERNS = { + "email": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", + "phone_tw": r"09\d{8}", + "id_tw": r"[A-Z][12]\d{8}", + "credit_card": r"\d{4}[- ]?\d{4}[- ]?\d{4}[- ]?\d{4}", + } + + def detect(self, text: str) -> Dict[str, List[str]]: + """偵測文本中的 PII""" + import re + findings = {} + + for pii_type, pattern in self.PII_PATTERNS.items(): + matches = re.findall(pattern, text) + if matches: + findings[pii_type] = matches + + return findings + + def redact(self, text: str) -> str: + """遮蔽 PII""" + import re + redacted = text + + for pii_type, pattern in self.PII_PATTERNS.items(): + redacted = re.sub(pattern, f"[{pii_type.upper()}_REDACTED]", redacted) + + return redacted +``` + +### 4. 問責機制 (Accountability) + +```python +from datetime import datetime +from typing import Optional +import json + +class AIAuditLog: + """AI 審計日誌系統""" + + def __init__(self, storage_backend): + self.storage = storage_backend + + def log_model_decision( + self, + model_id: str, + input_data: dict, + output: dict, + explanation: Optional[str] = None, + user_id: Optional[str] = None + ): + """記錄模型決策""" + log_entry = { + "timestamp": datetime.utcnow().isoformat(), + "model_id": model_id, + "model_version": self._get_model_version(model_id), + "input_hash": self._hash_data(input_data), + "output": output, + "explanation": explanation, + "user_id": user_id, + "traceable_id": self._generate_trace_id() + } + + self.storage.append(log_entry) + return log_entry["traceable_id"] + + def log_human_override( + self, + trace_id: str, + override_by: str, + reason: str, + new_decision: dict + ): + """記錄人工覆寫""" + log_entry = { + "timestamp": datetime.utcnow().isoformat(), + "type": "human_override", + "original_trace_id": trace_id, + "override_by": override_by, + "reason": reason, + "new_decision": new_decision + } + + self.storage.append(log_entry) + + def generate_audit_report( + self, + start_date: datetime, + end_date: datetime + ) -> dict: + """生成審計報告""" + logs = self.storage.query( + filter={ + "timestamp": {"$gte": start_date, "$lte": end_date} + } + ) + + return { + "period": f"{start_date} to {end_date}", + "total_decisions": len(logs), + "human_overrides": sum(1 for l in logs if l.get("type") == "human_override"), + "model_breakdown": self._group_by_model(logs), + "generated_at": datetime.utcnow().isoformat() + } +``` + +## 🛡️ 合規實施框架 + +### 風險評估模板 + +```python +class AIRiskAssessment: + """AI 風險評估框架""" + + RISK_CATEGORIES = { + "bias": "偏見與歧視風險", + "privacy": "隱私風險", + "security": "安全風險", + "transparency": "透明度風險", + "reliability": "可靠性風險", + "accountability": "問責風險" + } + + RISK_LEVELS = { + "low": {"score": 1, "color": "🟢"}, + "medium": {"score": 2, "color": "🟡"}, + "high": {"score": 3, "color": "🟠"}, + "critical": {"score": 4, "color": "🔴"} + } + + def __init__(self, system_name: str, system_description: str): + self.system_name = system_name + self.description = system_description + self.assessments = {} + + def assess_risk( + self, + category: str, + level: str, + description: str, + mitigation: str + ): + """評估特定風險""" + if category not in self.RISK_CATEGORIES: + raise ValueError(f"未知風險類別: {category}") + + self.assessments[category] = { + "level": level, + "score": self.RISK_LEVELS[level]["score"], + "description": description, + "mitigation": mitigation + } + + def calculate_overall_risk(self) -> str: + """計算總體風險等級""" + if not self.assessments: + return "unknown" + + avg_score = sum(a["score"] for a in self.assessments.values()) / len(self.assessments) + + if avg_score <= 1.5: + return "low" + elif avg_score <= 2.5: + return "medium" + elif avg_score <= 3.5: + return "high" + else: + return "critical" + + def generate_report(self) -> str: + """生成風險評估報告""" + report = [ + "=" * 50, + f"AI 系統風險評估報告", + "=" * 50, + f"\n系統名稱: {self.system_name}", + f"系統描述: {self.description}", + f"評估日期: {datetime.now().strftime('%Y-%m-%d')}", + f"\n總體風險等級: {self.RISK_LEVELS[self.calculate_overall_risk()]['color']} {self.calculate_overall_risk().upper()}", + "\n" + "-" * 50, + "詳細評估:", + "-" * 50 + ] + + for category, assessment in self.assessments.items(): + level_info = self.RISK_LEVELS[assessment["level"]] + report.extend([ + f"\n{level_info['color']} {self.RISK_CATEGORIES[category]}", + f" 風險等級: {assessment['level'].upper()}", + f" 風險描述: {assessment['description']}", + f" 緩解措施: {assessment['mitigation']}" + ]) + + return "\n".join(report) + + +# 使用範例 +assessment = AIRiskAssessment( + system_name="信用評估 AI 系統", + system_description="用於評估貸款申請人信用風險的機器學習系統" +) + +assessment.assess_risk( + category="bias", + level="medium", + description="模型可能對某些人口群體存在偏見", + mitigation="實施定期公平性審計,使用去偏見技術" +) + +assessment.assess_risk( + category="privacy", + level="high", + description="處理大量敏感個人財務資料", + mitigation="實施資料加密、存取控制和定期隱私影響評估" +) + +assessment.assess_risk( + category="transparency", + level="medium", + description="決策過程不夠透明", + mitigation="整合 SHAP/LIME 解釋工具,提供決策理由" +) + +print(assessment.generate_report()) +``` + +## 📋 合規檢查清單 + +### 部署前檢查 + +```markdown +## AI 系統部署前合規檢查清單 + +### 1. 資料與隱私 ☐ +- [ ] 已進行資料保護影響評估 (DPIA) +- [ ] 已取得必要的資料使用同意 +- [ ] 已實施資料最小化原則 +- [ ] 敏感資料已加密或匿名化 +- [ ] 已建立資料保留和刪除政策 + +### 2. 公平性與偏見 ☐ +- [ ] 已進行偏見審計 +- [ ] 已測試不同人口群體的表現 +- [ ] 已記錄模型限制和潛在偏見 +- [ ] 已建立偏見監控機制 + +### 3. 透明度 ☐ +- [ ] 已準備模型文檔(Model Card) +- [ ] 用戶知道正在與 AI 互動 +- [ ] 已整合可解釋性工具 +- [ ] 決策邏輯可被審計 + +### 4. 安全性 ☐ +- [ ] 已進行對抗攻擊測試 +- [ ] 已建立異常偵測機制 +- [ ] 已實施存取控制 +- [ ] 已建立事件響應計劃 + +### 5. 人工監督 ☐ +- [ ] 高風險決策有人工審核 +- [ ] 已建立覆寫機制 +- [ ] 操作人員已受訓練 +- [ ] 已定義升級流程 + +### 6. 問責機制 ☐ +- [ ] 已建立審計日誌 +- [ ] 已指定責任人 +- [ ] 已建立投訴處理機制 +- [ ] 已準備事故報告流程 +``` + +## 📚 資源參考 + +### 法規文件 +- [EU AI Act 官方文本](https://eur-lex.europa.eu) +- [NIST AI RMF](https://www.nist.gov/itl/ai-risk-management-framework) +- [中國生成式AI管理辦法](http://www.cac.gov.cn) + +### 倫理指南 +- IEEE Ethically Aligned Design +- Partnership on AI Guidelines +- OECD AI Principles + +### 技術工具 +- Fairlearn:公平性工具包 +- AI Fairness 360:IBM 公平性工具 +- SHAP/LIME:可解釋性工具 +- Responsible AI Toolbox:微軟工具包 + +--- + +*本指南持續更新中,法規資訊請以官方來源為準。* diff --git "a/2.\346\267\261\345\205\245LLM\346\250\241\345\236\213\345\267\245\347\250\213\350\210\207LLM\351\201\213\347\266\255/10.\351\200\262\351\232\216\350\251\261\351\241\214/\344\274\201\346\245\255\347\264\232MLOps\345\256\214\346\225\264\346\214\207\345\215\227.md" "b/2.\346\267\261\345\205\245LLM\346\250\241\345\236\213\345\267\245\347\250\213\350\210\207LLM\351\201\213\347\266\255/10.\351\200\262\351\232\216\350\251\261\351\241\214/\344\274\201\346\245\255\347\264\232MLOps\345\256\214\346\225\264\346\214\207\345\215\227.md" new file mode 100644 index 0000000..5f1623a --- /dev/null +++ "b/2.\346\267\261\345\205\245LLM\346\250\241\345\236\213\345\267\245\347\250\213\350\210\207LLM\351\201\213\347\266\255/10.\351\200\262\351\232\216\350\251\261\351\241\214/\344\274\201\346\245\255\347\264\232MLOps\345\256\214\346\225\264\346\214\207\345\215\227.md" @@ -0,0 +1,743 @@ +# 企業級 MLOps 完整指南 + +> 最後更新:2025-01 + +## 📋 概述 + +本指南涵蓋企業級 MLOps 的最佳實踐,包括模型生命週期管理、自動化部署、監控告警與治理框架。 + +## 🎯 MLOps 成熟度模型 + +### 等級 0:手動流程 +``` +特徵: +- 手動訓練與部署 +- 無版本控制 +- 無監控 + +問題: +- 難以復現 +- 部署緩慢 +- 無法擴展 +``` + +### 等級 1:ML 流水線自動化 +``` +特徵: +- 自動化訓練流水線 +- 基礎版本控制 +- 簡單監控 + +改進: +- 可復現性提升 +- 部署時間縮短 +``` + +### 等級 2:CI/CD 整合 +``` +特徵: +- 持續整合/持續部署 +- 完整版本控制 +- 自動化測試 + +改進: +- 快速迭代 +- 品質保證 +``` + +### 等級 3:完全自動化 +``` +特徵: +- 自動重訓練 +- 自動監控與告警 +- 自動回滾 + +改進: +- 最小人工介入 +- 7x24 運營 +``` + +## 🏗️ 企業級 MLOps 架構 + +``` +┌─────────────────────────────────────────────────────────────┐ +│ MLOps 平台架構 │ +├─────────────────────────────────────────────────────────────┤ +│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ +│ │ 資料層 │→│ 特徵層 │→│ 訓練層 │→│ 部署層 │ │ +│ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │ +│ ↓ ↓ ↓ ↓ │ +│ ┌─────────────────────────────────────────────────┐ │ +│ │ 監控與治理層 │ │ +│ └─────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ +``` + +## 1️⃣ 資料管理 + +### Feature Store 實作 + +```python +from feast import FeatureStore, Entity, Feature, FeatureView +from feast.types import Float32, Int64, String +from datetime import timedelta + +# 定義實體 +user = Entity( + name="user_id", + value_type=String, + description="使用者唯一識別碼" +) + +# 定義特徵視圖 +user_features = FeatureView( + name="user_features", + entities=[user], + ttl=timedelta(days=1), + features=[ + Feature(name="total_purchases", dtype=Float32), + Feature(name="avg_order_value", dtype=Float32), + Feature(name="days_since_last_order", dtype=Int64), + Feature(name="customer_segment", dtype=String), + ], + online=True, + source=user_data_source, +) + +# 使用特徵 +store = FeatureStore("feature_repo/") + +# 訓練時獲取歷史特徵 +training_df = store.get_historical_features( + entity_df=entity_df, + features=[ + "user_features:total_purchases", + "user_features:avg_order_value", + "user_features:customer_segment" + ] +).to_df() + +# 推論時獲取線上特徵 +online_features = store.get_online_features( + features=[ + "user_features:total_purchases", + "user_features:customer_segment" + ], + entity_rows=[{"user_id": "user_123"}] +).to_dict() +``` + +### 資料品質檢查 + +```python +import great_expectations as gx +from great_expectations.core.batch import BatchRequest + +# 建立資料上下文 +context = gx.get_context() + +# 定義期望 +expectation_suite = context.create_expectation_suite( + expectation_suite_name="training_data_suite" +) + +# 添加驗證規則 +validator = context.get_validator( + batch_request=batch_request, + expectation_suite_name="training_data_suite" +) + +# 資料完整性檢查 +validator.expect_column_values_to_not_be_null("user_id") +validator.expect_column_values_to_be_between( + "age", min_value=0, max_value=120 +) + +# 資料分佈檢查 +validator.expect_column_mean_to_be_between( + "purchase_amount", min_value=10, max_value=1000 +) + +# 執行驗證 +results = validator.validate() + +if not results.success: + raise DataQualityError(f"資料品質檢查失敗: {results}") +``` + +## 2️⃣ 模型訓練流水線 + +### Kubeflow Pipeline + +```python +from kfp import dsl +from kfp.components import create_component_from_func + +@create_component_from_func +def preprocess_data(input_path: str, output_path: str): + """資料預處理組件""" + import pandas as pd + from sklearn.preprocessing import StandardScaler + + df = pd.read_parquet(input_path) + + # 清洗資料 + df = df.dropna() + + # 特徵工程 + scaler = StandardScaler() + numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns + df[numeric_cols] = scaler.fit_transform(df[numeric_cols]) + + df.to_parquet(output_path) + return output_path + +@create_component_from_func +def train_model( + data_path: str, + model_path: str, + hyperparameters: dict +): + """模型訓練組件""" + import mlflow + from sklearn.ensemble import GradientBoostingClassifier + + with mlflow.start_run(): + # 載入資料 + df = pd.read_parquet(data_path) + X = df.drop('target', axis=1) + y = df['target'] + + # 訓練模型 + model = GradientBoostingClassifier(**hyperparameters) + model.fit(X, y) + + # 記錄指標 + mlflow.log_params(hyperparameters) + mlflow.sklearn.log_model(model, "model") + + # 儲存模型 + joblib.dump(model, model_path) + + return model_path + +@create_component_from_func +def evaluate_model(model_path: str, test_data_path: str) -> dict: + """模型評估組件""" + from sklearn.metrics import accuracy_score, f1_score, roc_auc_score + + model = joblib.load(model_path) + test_df = pd.read_parquet(test_data_path) + + X_test = test_df.drop('target', axis=1) + y_test = test_df['target'] + + predictions = model.predict(X_test) + probabilities = model.predict_proba(X_test)[:, 1] + + metrics = { + "accuracy": accuracy_score(y_test, predictions), + "f1_score": f1_score(y_test, predictions), + "auc_roc": roc_auc_score(y_test, probabilities) + } + + return metrics + +@dsl.pipeline( + name="enterprise-ml-pipeline", + description="企業級機器學習訓練流水線" +) +def ml_pipeline( + raw_data_path: str, + model_output_path: str, + hyperparameters: dict +): + # 資料預處理 + preprocess_task = preprocess_data( + input_path=raw_data_path, + output_path="/tmp/processed_data.parquet" + ) + + # 模型訓練 + train_task = train_model( + data_path=preprocess_task.output, + model_path=model_output_path, + hyperparameters=hyperparameters + ) + train_task.after(preprocess_task) + + # 模型評估 + evaluate_task = evaluate_model( + model_path=train_task.output, + test_data_path="/tmp/test_data.parquet" + ) + evaluate_task.after(train_task) +``` + +## 3️⃣ 模型版本管理 + +### MLflow Model Registry + +```python +import mlflow +from mlflow.tracking import MlflowClient + +client = MlflowClient() + +# 註冊模型 +model_uri = f"runs:/{run_id}/model" +result = mlflow.register_model(model_uri, "production-classifier") + +# 模型版本管理 +client.transition_model_version_stage( + name="production-classifier", + version=result.version, + stage="Staging" +) + +# 添加模型描述 +client.update_model_version( + name="production-classifier", + version=result.version, + description="v2.0 - 新增特徵,AUC 提升 5%" +) + +# 模型標籤 +client.set_model_version_tag( + name="production-classifier", + version=result.version, + key="approved_by", + value="ml-team-lead" +) + +# 載入特定版本模型 +model = mlflow.pyfunc.load_model( + model_uri=f"models:/production-classifier/Staging" +) +``` + +## 4️⃣ 自動化部署 + +### Kubernetes 部署配置 + +```yaml +# model-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ml-model-service + labels: + app: ml-model +spec: + replicas: 3 + selector: + matchLabels: + app: ml-model + template: + metadata: + labels: + app: ml-model + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" + spec: + containers: + - name: model-server + image: ml-model:v2.0 + ports: + - containerPort: 8080 + resources: + requests: + memory: "2Gi" + cpu: "1" + limits: + memory: "4Gi" + cpu: "2" + env: + - name: MODEL_PATH + value: "/models/production" + - name: LOG_LEVEL + value: "INFO" + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /ready + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 5 + volumeMounts: + - name: model-storage + mountPath: /models + volumes: + - name: model-storage + persistentVolumeClaim: + claimName: model-pvc +--- +apiVersion: v1 +kind: Service +metadata: + name: ml-model-service +spec: + selector: + app: ml-model + ports: + - port: 80 + targetPort: 8080 + type: LoadBalancer +--- +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: ml-model-hpa +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: ml-model-service + minReplicas: 2 + maxReplicas: 10 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: 80 +``` + +### 藍綠部署策略 + +```python +class BlueGreenDeployer: + """藍綠部署管理器""" + + def __init__(self, kubernetes_client, namespace="production"): + self.client = kubernetes_client + self.namespace = namespace + + def deploy_new_version(self, model_version: str): + """部署新版本到綠色環境""" + green_deployment = { + "apiVersion": "apps/v1", + "kind": "Deployment", + "metadata": { + "name": f"ml-model-green-{model_version}", + "labels": {"color": "green", "version": model_version} + }, + "spec": { + "replicas": 3, + "selector": {"matchLabels": {"color": "green"}}, + "template": { + "spec": { + "containers": [{ + "name": "model-server", + "image": f"ml-model:{model_version}", + "ports": [{"containerPort": 8080}] + }] + } + } + } + } + + self.client.create_deployment( + namespace=self.namespace, + body=green_deployment + ) + + # 等待就緒 + self._wait_for_ready(f"ml-model-green-{model_version}") + + def run_smoke_tests(self, endpoint: str) -> bool: + """執行冒煙測試""" + test_cases = [ + {"input": "test_input_1", "expected_type": "prediction"}, + {"input": "test_input_2", "expected_type": "prediction"}, + ] + + for test in test_cases: + response = requests.post( + f"{endpoint}/predict", + json={"input": test["input"]} + ) + if response.status_code != 200: + return False + + return True + + def switch_traffic(self, to_color: str): + """切換流量""" + service_patch = { + "spec": { + "selector": {"color": to_color} + } + } + + self.client.patch_service( + name="ml-model-service", + namespace=self.namespace, + body=service_patch + ) + + def rollback(self): + """回滾到藍色環境""" + self.switch_traffic("blue") + print("已回滾到穩定版本") +``` + +## 5️⃣ 監控與告警 + +### Prometheus 指標 + +```python +from prometheus_client import Counter, Histogram, Gauge, start_http_server + +# 定義指標 +PREDICTION_LATENCY = Histogram( + 'model_prediction_latency_seconds', + 'Time spent processing prediction', + buckets=[.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5] +) + +PREDICTION_COUNT = Counter( + 'model_predictions_total', + 'Total number of predictions', + ['model_version', 'status'] +) + +MODEL_ACCURACY = Gauge( + 'model_accuracy', + 'Current model accuracy', + ['model_version'] +) + +DATA_DRIFT_SCORE = Gauge( + 'data_drift_score', + 'Data drift detection score', + ['feature'] +) + +class MonitoredPredictor: + """帶監控的預測器""" + + def __init__(self, model, version: str): + self.model = model + self.version = version + + @PREDICTION_LATENCY.time() + def predict(self, input_data): + try: + result = self.model.predict(input_data) + PREDICTION_COUNT.labels( + model_version=self.version, + status="success" + ).inc() + return result + except Exception as e: + PREDICTION_COUNT.labels( + model_version=self.version, + status="error" + ).inc() + raise e + + def update_accuracy(self, accuracy: float): + MODEL_ACCURACY.labels(model_version=self.version).set(accuracy) +``` + +### 資料漂移偵測 + +```python +from scipy import stats +import numpy as np +from typing import Dict, List + +class DriftDetector: + """資料漂移偵測器""" + + def __init__(self, reference_data: pd.DataFrame, threshold: float = 0.05): + self.reference_data = reference_data + self.threshold = threshold + self.reference_stats = self._compute_stats(reference_data) + + def _compute_stats(self, data: pd.DataFrame) -> Dict: + """計算統計量""" + stats_dict = {} + for col in data.columns: + if data[col].dtype in ['float64', 'int64']: + stats_dict[col] = { + 'mean': data[col].mean(), + 'std': data[col].std(), + 'distribution': data[col].values + } + return stats_dict + + def detect_drift(self, current_data: pd.DataFrame) -> Dict[str, float]: + """偵測資料漂移""" + drift_scores = {} + + for col in current_data.columns: + if col not in self.reference_stats: + continue + + ref_dist = self.reference_stats[col]['distribution'] + cur_dist = current_data[col].values + + # KS 檢定 + statistic, p_value = stats.ks_2samp(ref_dist, cur_dist) + drift_scores[col] = { + 'statistic': statistic, + 'p_value': p_value, + 'is_drifted': p_value < self.threshold + } + + # 更新 Prometheus 指標 + DATA_DRIFT_SCORE.labels(feature=col).set(statistic) + + return drift_scores + + def get_drift_report(self, current_data: pd.DataFrame) -> str: + """生成漂移報告""" + drift_results = self.detect_drift(current_data) + + report = ["=== 資料漂移偵測報告 ===\n"] + + drifted_features = [ + col for col, result in drift_results.items() + if result['is_drifted'] + ] + + if drifted_features: + report.append(f"⚠️ 偵測到 {len(drifted_features)} 個特徵發生漂移:\n") + for col in drifted_features: + result = drift_results[col] + report.append( + f" - {col}: KS統計量={result['statistic']:.4f}, " + f"p值={result['p_value']:.4f}\n" + ) + else: + report.append("✅ 未偵測到顯著資料漂移\n") + + return "".join(report) +``` + +## 6️⃣ 模型治理 + +### 模型審計追蹤 + +```python +from datetime import datetime +from typing import Optional +import json + +class ModelAuditLogger: + """模型審計日誌""" + + def __init__(self, storage_backend): + self.storage = storage_backend + + def log_training( + self, + model_id: str, + training_data_hash: str, + hyperparameters: dict, + metrics: dict, + trained_by: str + ): + """記錄訓練事件""" + event = { + "event_type": "model_training", + "timestamp": datetime.utcnow().isoformat(), + "model_id": model_id, + "training_data_hash": training_data_hash, + "hyperparameters": hyperparameters, + "metrics": metrics, + "trained_by": trained_by + } + self.storage.append(event) + + def log_deployment( + self, + model_id: str, + version: str, + environment: str, + approved_by: str, + approval_notes: Optional[str] = None + ): + """記錄部署事件""" + event = { + "event_type": "model_deployment", + "timestamp": datetime.utcnow().isoformat(), + "model_id": model_id, + "version": version, + "environment": environment, + "approved_by": approved_by, + "approval_notes": approval_notes + } + self.storage.append(event) + + def log_prediction( + self, + model_id: str, + input_hash: str, + output: dict, + latency_ms: float + ): + """記錄預測事件(採樣)""" + event = { + "event_type": "model_prediction", + "timestamp": datetime.utcnow().isoformat(), + "model_id": model_id, + "input_hash": input_hash, + "output_summary": json.dumps(output)[:200], + "latency_ms": latency_ms + } + self.storage.append(event) + + def get_model_lineage(self, model_id: str) -> List[dict]: + """獲取模型血緣""" + return self.storage.query( + filter={"model_id": model_id}, + sort_by="timestamp" + ) +``` + +## 📚 最佳實踐總結 + +### 1. 資料管理 +- 使用 Feature Store 統一特徵管理 +- 實施資料品質檢查 +- 保留資料版本與血緣 + +### 2. 模型開發 +- 標準化訓練流水線 +- 完整的版本控制 +- 自動化測試 + +### 3. 部署策略 +- 採用藍綠或金絲雀部署 +- 實施健康檢查 +- 配置自動擴展 + +### 4. 監控告警 +- 追蹤關鍵指標 +- 偵測資料漂移 +- 設置告警閾值 + +### 5. 治理合規 +- 完整審計追蹤 +- 模型解釋性 +- 符合法規要求 + +--- + +*本指南持續更新中,歡迎貢獻改進建議。* diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/11.MCP\345\215\224\350\255\260\350\210\207\345\267\245\345\205\267\350\252\277\347\224\250/README.md" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/11.MCP\345\215\224\350\255\260\350\210\207\345\267\245\345\205\267\350\252\277\347\224\250/README.md" index 6ef378c..73b34d8 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/11.MCP\345\215\224\350\255\260\350\210\207\345\267\245\345\205\267\350\252\277\347\224\250/README.md" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/11.MCP\345\215\224\350\255\260\350\210\207\345\267\245\345\205\267\350\252\277\347\224\250/README.md" @@ -335,8 +335,29 @@ async def calculate(expression: str) -> str: Returns: 計算結果 """ + import ast + import operator + + # 安全的運算符映射 + ops = { + ast.Add: operator.add, ast.Sub: operator.sub, + ast.Mult: operator.mul, ast.Div: operator.truediv, + ast.Pow: operator.pow, ast.USub: operator.neg + } + + def safe_eval(node): + if isinstance(node, ast.Num): + return node.n + elif isinstance(node, ast.BinOp): + return ops[type(node.op)](safe_eval(node.left), safe_eval(node.right)) + elif isinstance(node, ast.UnaryOp): + return ops[type(node.op)](safe_eval(node.operand)) + else: + raise ValueError(f"不支援的運算: {type(node)}") + try: - result = eval(expression) # 注意: 生產環境需要安全處理 + tree = ast.parse(expression, mode='eval') + result = safe_eval(tree.body) return f"結果: {result}" except Exception as e: return f"計算錯誤: {str(e)}" diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/AI_Agents_\350\210\207_Agentic_Workflows_2024-2025.md" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/AI_Agents_\350\210\207_Agentic_Workflows_2024-2025.md" index b028d70..5d93704 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/AI_Agents_\350\210\207_Agentic_Workflows_2024-2025.md" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/AI_Agents_\350\210\207_Agentic_Workflows_2024-2025.md" @@ -484,9 +484,28 @@ def search_tool(query: str) -> str: return mock_results.get(query, "未找到相關資訊") def calculator_tool(expression: str) -> str: - """計算器工具""" + """計算器工具(安全版本)""" + import ast + import operator + + ops = { + ast.Add: operator.add, ast.Sub: operator.sub, + ast.Mult: operator.mul, ast.Div: operator.truediv, + ast.Pow: operator.pow, ast.USub: operator.neg + } + + def safe_eval(node): + if isinstance(node, ast.Num): + return node.n + elif isinstance(node, ast.BinOp): + return ops[type(node.op)](safe_eval(node.left), safe_eval(node.right)) + elif isinstance(node, ast.UnaryOp): + return ops[type(node.op)](safe_eval(node.operand)) + raise ValueError("不支援的運算") + try: - result = eval(expression) # 注意:生產環境應使用安全的計算方法 + tree = ast.parse(expression, mode='eval') + result = safe_eval(tree.body) return str(result) except Exception as e: return f"計算錯誤: {str(e)}" @@ -2016,13 +2035,13 @@ class AgentEvaluator: class CostTracker: """成本追蹤器""" - # 2024 價格(美元) + # 2025 價格(美元) PRICING = { - "gpt-4": {"input": 0.03 / 1000, "output": 0.06 / 1000}, + "gpt-4o": {"input": 0.0025 / 1000, "output": 0.01 / 1000}, + "gpt-4o-mini": {"input": 0.00015 / 1000, "output": 0.0006 / 1000}, "gpt-4-turbo": {"input": 0.01 / 1000, "output": 0.03 / 1000}, - "gpt-3.5-turbo": {"input": 0.0005 / 1000, "output": 0.0015 / 1000}, "claude-3-opus": {"input": 0.015 / 1000, "output": 0.075 / 1000}, - "claude-3-sonnet": {"input": 0.003 / 1000, "output": 0.015 / 1000}, + "claude-3.5-sonnet": {"input": 0.003 / 1000, "output": 0.015 / 1000}, } def __init__(self): diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/examples/01_react_agent/react_agent_basic.py" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/examples/01_react_agent/react_agent_basic.py" index 0475a9b..b12556e 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/examples/01_react_agent/react_agent_basic.py" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/examples/01_react_agent/react_agent_basic.py" @@ -89,8 +89,27 @@ def _calculator_tool(self, expression: str) -> str: Returns: 計算結果 """ + import ast + import operator + + ops = { + ast.Add: operator.add, ast.Sub: operator.sub, + ast.Mult: operator.mul, ast.Div: operator.truediv, + ast.Pow: operator.pow, ast.USub: operator.neg + } + + def safe_eval(node): + if isinstance(node, ast.Num): + return node.n + elif isinstance(node, ast.BinOp): + return ops[type(node.op)](safe_eval(node.left), safe_eval(node.right)) + elif isinstance(node, ast.UnaryOp): + return ops[type(node.op)](safe_eval(node.operand)) + raise ValueError("不支援的運算") + try: - result = eval(expression) # 注意:生產環境應使用安全的計算方法 + tree = ast.parse(expression, mode='eval') + result = safe_eval(tree.body) return str(result) except Exception as e: return f"計算錯誤: {str(e)}" diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/5.\351\200\262\351\232\216 RAG \350\210\207\345\244\232\345\205\203\350\263\207\346\226\231\346\252\242\347\264\242/examples/4_agent_collaboration.py" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/5.\351\200\262\351\232\216 RAG \350\210\207\345\244\232\345\205\203\350\263\207\346\226\231\346\252\242\347\264\242/examples/4_agent_collaboration.py" index 0c22f5e..de95573 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/5.\351\200\262\351\232\216 RAG \350\210\207\345\244\232\345\205\203\350\263\207\346\226\231\346\252\242\347\264\242/examples/4_agent_collaboration.py" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/5.\351\200\262\351\232\216 RAG \350\210\207\345\244\232\345\205\203\350\263\207\346\226\231\346\252\242\347\264\242/examples/4_agent_collaboration.py" @@ -118,13 +118,27 @@ def _run(self, expression: str) -> str: # 清理輸入 expression = expression.strip() - # 安全檢查:只允許數字和基本運算符 - allowed_chars = set("0123456789+-*/().() ") - if not all(c in allowed_chars for c in expression): - return "錯誤:表達式包含不允許的字符" + # 使用 ast 模組進行安全計算 + import ast + import operator + + ops = { + ast.Add: operator.add, ast.Sub: operator.sub, + ast.Mult: operator.mul, ast.Div: operator.truediv, + ast.Pow: operator.pow, ast.USub: operator.neg + } - # 計算 - result = eval(expression) + def safe_eval(node): + if isinstance(node, ast.Num): + return node.n + elif isinstance(node, ast.BinOp): + return ops[type(node.op)](safe_eval(node.left), safe_eval(node.right)) + elif isinstance(node, ast.UnaryOp): + return ops[type(node.op)](safe_eval(node.operand)) + raise ValueError("不支援的運算") + + tree = ast.parse(expression, mode='eval') + result = safe_eval(tree.body) return f"計算結果: {result}" except ZeroDivisionError: diff --git "a/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/3.Agent\347\263\273\347\265\261/examples/02_crewai_multi_agent.py" "b/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/3.Agent\347\263\273\347\265\261/examples/02_crewai_multi_agent.py" index 1978989..6399ab1 100644 --- "a/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/3.Agent\347\263\273\347\265\261/examples/02_crewai_multi_agent.py" +++ "b/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/3.Agent\347\263\273\347\265\261/examples/02_crewai_multi_agent.py" @@ -331,9 +331,28 @@ def search_web(query: str) -> str: return f"搜索結果:{query} 的相關信息..." def calculate(expression: str) -> str: - """計算器""" + """計算器(安全版本)""" + import ast + import operator + + ops = { + ast.Add: operator.add, ast.Sub: operator.sub, + ast.Mult: operator.mul, ast.Div: operator.truediv, + ast.Pow: operator.pow, ast.USub: operator.neg + } + + def safe_eval(node): + if isinstance(node, ast.Num): + return node.n + elif isinstance(node, ast.BinOp): + return ops[type(node.op)](safe_eval(node.left), safe_eval(node.right)) + elif isinstance(node, ast.UnaryOp): + return ops[type(node.op)](safe_eval(node.operand)) + raise ValueError("不支援的運算") + try: - result = eval(expression) + tree = ast.parse(expression, mode='eval') + result = safe_eval(tree.body) return f"計算結果: {result}" except: return "計算錯誤" diff --git "a/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/4.RAG\350\210\207\346\252\242\347\264\242/\347\237\245\350\255\230\345\234\226\350\255\234\346\207\211\347\224\250\346\214\207\345\215\227.md" "b/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/4.RAG\350\210\207\346\252\242\347\264\242/\347\237\245\350\255\230\345\234\226\350\255\234\346\207\211\347\224\250\346\214\207\345\215\227.md" new file mode 100644 index 0000000..a8e5a9d --- /dev/null +++ "b/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/4.RAG\350\210\207\346\252\242\347\264\242/\347\237\245\350\255\230\345\234\226\350\255\234\346\207\211\347\224\250\346\214\207\345\215\227.md" @@ -0,0 +1,662 @@ +# 知識圖譜與 LLM 整合應用指南 + +> 最後更新:2025-01 + +## 📋 概述 + +知識圖譜(Knowledge Graph)是一種結構化的知識表示方式,將實體和關係組織成圖結構。本指南探討如何將知識圖譜與 LLM 結合,提升 AI 應用的準確性和可解釋性。 + +## 🎯 為什麼需要知識圖譜? + +### LLM 的限制 +``` +1. 幻覺問題 - 可能生成錯誤資訊 +2. 知識過時 - 訓練資料有截止日期 +3. 缺乏推理 - 難以進行複雜邏輯推理 +4. 不可解釋 - 難以追蹤答案來源 +``` + +### 知識圖譜的優勢 +``` +1. 結構化知識 - 明確的實體和關係 +2. 可驗證性 - 可追溯知識來源 +3. 推理能力 - 支援多跳推理 +4. 即時更新 - 容易更新知識 +``` + +## 🏗️ 知識圖譜基礎 + +### 圖結構基本概念 + +``` +節點 (Nodes/Entities): +┌──────────┐ ┌──────────┐ ┌──────────┐ +│ 蘋果公司 │ │ 庫克 │ │ iPhone │ +└──────────┘ └──────────┘ └──────────┘ + +邊 (Edges/Relations): +蘋果公司 ──[CEO]──→ 庫克 +蘋果公司 ──[生產]──→ iPhone +庫克 ──[管理]──→ iPhone 團隊 +``` + +### RDF 三元組 + +```python +# 主語 - 謂語 - 賓語 (Subject - Predicate - Object) +triples = [ + ("蘋果公司", "CEO", "庫克"), + ("蘋果公司", "成立年份", "1976"), + ("蘋果公司", "產品", "iPhone"), + ("蘋果公司", "產品", "MacBook"), + ("庫克", "國籍", "美國"), + ("庫克", "職位", "CEO"), + ("iPhone", "類型", "智慧型手機"), +] +``` + +## 1️⃣ 建構知識圖譜 + +### 使用 Neo4j + +```python +from neo4j import GraphDatabase +from typing import List, Dict, Tuple + +class KnowledgeGraphBuilder: + """知識圖譜建構器""" + + def __init__(self, uri: str, user: str, password: str): + self.driver = GraphDatabase.driver(uri, auth=(user, password)) + + def close(self): + self.driver.close() + + def create_entity(self, name: str, entity_type: str, properties: Dict = None): + """建立實體節點""" + with self.driver.session() as session: + query = f""" + MERGE (e:{entity_type} {{name: $name}}) + SET e += $properties + RETURN e + """ + result = session.run( + query, + name=name, + properties=properties or {} + ) + return result.single() + + def create_relation( + self, + from_entity: str, + to_entity: str, + relation_type: str, + properties: Dict = None + ): + """建立關係""" + with self.driver.session() as session: + query = """ + MATCH (a {name: $from_entity}) + MATCH (b {name: $to_entity}) + MERGE (a)-[r:$relation_type]->(b) + SET r += $properties + RETURN r + """ + result = session.run( + query, + from_entity=from_entity, + to_entity=to_entity, + relation_type=relation_type, + properties=properties or {} + ) + return result.single() + + def query_neighbors(self, entity_name: str, depth: int = 1) -> List[Dict]: + """查詢鄰居節點""" + with self.driver.session() as session: + query = f""" + MATCH (e {{name: $name}})-[r*1..{depth}]-(neighbor) + RETURN e, r, neighbor + """ + results = session.run(query, name=entity_name) + return [record.data() for record in results] + + def find_path( + self, + from_entity: str, + to_entity: str, + max_depth: int = 5 + ) -> List[Dict]: + """尋找兩實體間的路徑""" + with self.driver.session() as session: + query = f""" + MATCH path = shortestPath( + (a {{name: $from_entity}})-[*1..{max_depth}]-(b {{name: $to_entity}}) + ) + RETURN path + """ + results = session.run( + query, + from_entity=from_entity, + to_entity=to_entity + ) + return [record.data() for record in results] + + +# 使用範例 +kg = KnowledgeGraphBuilder( + uri="bolt://localhost:7687", + user="neo4j", + password="password" +) + +# 建立實體 +kg.create_entity("蘋果公司", "Company", {"founded": 1976, "industry": "科技"}) +kg.create_entity("庫克", "Person", {"nationality": "美國"}) +kg.create_entity("iPhone", "Product", {"type": "智慧型手機"}) + +# 建立關係 +kg.create_relation("蘋果公司", "庫克", "HAS_CEO") +kg.create_relation("蘋果公司", "iPhone", "PRODUCES") +``` + +### 從文本抽取知識 + +```python +from openai import OpenAI +import json + +class KnowledgeExtractor: + """從文本抽取知識三元組""" + + def __init__(self): + self.client = OpenAI() + + def extract_entities_and_relations(self, text: str) -> List[Dict]: + """抽取實體和關係""" + prompt = f""" + 從以下文本中抽取實體和關係,以 JSON 格式返回。 + + 文本: + {text} + + 輸出格式: + {{ + "entities": [ + {{"name": "實體名稱", "type": "實體類型"}} + ], + "relations": [ + {{"subject": "主語", "predicate": "謂語", "object": "賓語"}} + ] + }} + + 只返回 JSON,不要其他內容。 + """ + + response = self.client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": prompt}], + temperature=0 + ) + + result = json.loads(response.choices[0].message.content) + return result + + def extract_from_documents( + self, + documents: List[str], + kg_builder: KnowledgeGraphBuilder + ): + """從多個文檔抽取並建構知識圖譜""" + for doc in documents: + extracted = self.extract_entities_and_relations(doc) + + # 建立實體 + for entity in extracted["entities"]: + kg_builder.create_entity( + name=entity["name"], + entity_type=entity["type"] + ) + + # 建立關係 + for relation in extracted["relations"]: + kg_builder.create_relation( + from_entity=relation["subject"], + to_entity=relation["object"], + relation_type=relation["predicate"] + ) + + +# 使用範例 +extractor = KnowledgeExtractor() +text = """ +蘋果公司由史蒂夫·喬布斯於1976年創立。目前的CEO是蒂姆·庫克。 +該公司總部位於加州庫比蒂諾,主要產品包括iPhone、iPad和Mac電腦。 +2023年,蘋果公司的市值超過3兆美元。 +""" + +result = extractor.extract_entities_and_relations(text) +print(json.dumps(result, ensure_ascii=False, indent=2)) +``` + +## 2️⃣ GraphRAG:知識圖譜增強的 RAG + +### 架構概覽 + +``` +使用者問題 + ↓ +┌────────────────────────────────────────┐ +│ GraphRAG 系統 │ +├────────────────────────────────────────┤ +│ 1. 問題解析 → 抽取實體 │ +│ ↓ │ +│ 2. 圖譜查詢 → 獲取相關子圖 │ +│ ↓ │ +│ 3. 向量檢索 → 補充文本資訊 │ +│ ↓ │ +│ 4. 上下文融合 → 結構+文本 │ +│ ↓ │ +│ 5. LLM 生成 → 產生答案 │ +└────────────────────────────────────────┘ + ↓ +結構化且可驗證的答案 +``` + +### 實作 GraphRAG + +```python +from typing import List, Dict, Optional +from openai import OpenAI +import numpy as np + +class GraphRAG: + """GraphRAG 系統""" + + def __init__( + self, + kg_builder: KnowledgeGraphBuilder, + vector_store, # 向量資料庫實例 + llm_client: OpenAI + ): + self.kg = kg_builder + self.vector_store = vector_store + self.llm = llm_client + + def extract_query_entities(self, question: str) -> List[str]: + """從問題中抽取實體""" + prompt = f""" + 從以下問題中抽取關鍵實體名稱,返回 JSON 列表。 + + 問題:{question} + + 返回格式:["實體1", "實體2", ...] + 只返回 JSON,不要其他內容。 + """ + + response = self.llm.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": prompt}], + temperature=0 + ) + + return json.loads(response.choices[0].message.content) + + def get_subgraph( + self, + entities: List[str], + depth: int = 2 + ) -> Dict: + """獲取相關子圖""" + subgraph = { + "nodes": [], + "edges": [] + } + + for entity in entities: + neighbors = self.kg.query_neighbors(entity, depth) + for record in neighbors: + subgraph["nodes"].append(record.get("neighbor")) + subgraph["edges"].extend(record.get("r", [])) + + return subgraph + + def subgraph_to_text(self, subgraph: Dict) -> str: + """將子圖轉換為文本描述""" + descriptions = [] + + for edge in subgraph["edges"]: + desc = f"{edge['from']} {edge['type']} {edge['to']}" + descriptions.append(desc) + + return "\n".join(descriptions) + + def hybrid_retrieve( + self, + question: str, + entities: List[str], + top_k: int = 5 + ) -> Dict: + """混合檢索:圖譜 + 向量""" + # 1. 圖譜檢索 + subgraph = self.get_subgraph(entities) + graph_context = self.subgraph_to_text(subgraph) + + # 2. 向量檢索 + vector_results = self.vector_store.similarity_search( + question, + k=top_k + ) + text_context = "\n".join([doc.page_content for doc in vector_results]) + + return { + "graph_context": graph_context, + "text_context": text_context, + "subgraph": subgraph + } + + def generate_answer( + self, + question: str, + context: Dict + ) -> Dict: + """生成答案""" + prompt = f""" + 基於以下知識回答問題。 + + 知識圖譜資訊: + {context['graph_context']} + + 相關文檔: + {context['text_context']} + + 問題:{question} + + 請提供準確的答案,並說明資訊來源。如果知識不足以回答,請明確說明。 + """ + + response = self.llm.chat.completions.create( + model="gpt-4o", + messages=[{"role": "user", "content": prompt}], + temperature=0 + ) + + return { + "answer": response.choices[0].message.content, + "sources": { + "graph_entities": list(context["subgraph"]["nodes"]), + "text_documents": context["text_context"][:500] + "..." + } + } + + def query(self, question: str) -> Dict: + """完整的 GraphRAG 查詢流程""" + # 1. 抽取實體 + entities = self.extract_query_entities(question) + + # 2. 混合檢索 + context = self.hybrid_retrieve(question, entities) + + # 3. 生成答案 + result = self.generate_answer(question, context) + + return result + + +# 使用範例 +graph_rag = GraphRAG(kg_builder, vector_store, OpenAI()) +result = graph_rag.query("蘋果公司的CEO是誰?他負責哪些產品?") +print(result["answer"]) +``` + +## 3️⃣ 多跳推理 + +### 實作複雜推理 + +```python +class MultiHopReasoner: + """多跳推理器""" + + def __init__(self, kg: KnowledgeGraphBuilder, llm: OpenAI): + self.kg = kg + self.llm = llm + + def decompose_question(self, question: str) -> List[str]: + """分解複雜問題""" + prompt = f""" + 將以下複雜問題分解為簡單的子問題,每個子問題應該可以通過單一查詢回答。 + + 問題:{question} + + 返回 JSON 列表格式:["子問題1", "子問題2", ...] + """ + + response = self.llm.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": prompt}], + temperature=0 + ) + + return json.loads(response.choices[0].message.content) + + def answer_subquestion( + self, + subquestion: str, + context: str = "" + ) -> Dict: + """回答子問題""" + # 抽取實體 + entities = self._extract_entities(subquestion) + + # 查詢圖譜 + results = [] + for entity in entities: + neighbors = self.kg.query_neighbors(entity, depth=1) + results.extend(neighbors) + + # 生成答案 + graph_context = self._format_results(results) + + prompt = f""" + 基於以下知識回答問題。 + + 已知資訊: + {context} + + 圖譜知識: + {graph_context} + + 問題:{subquestion} + + 簡潔回答: + """ + + response = self.llm.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": prompt}], + temperature=0 + ) + + return { + "question": subquestion, + "answer": response.choices[0].message.content, + "evidence": graph_context + } + + def multi_hop_reason(self, question: str) -> Dict: + """執行多跳推理""" + # 1. 分解問題 + subquestions = self.decompose_question(question) + + # 2. 依序回答子問題 + reasoning_chain = [] + accumulated_context = "" + + for subq in subquestions: + result = self.answer_subquestion(subq, accumulated_context) + reasoning_chain.append(result) + accumulated_context += f"\n{subq}: {result['answer']}" + + # 3. 綜合最終答案 + final_answer = self._synthesize_answer(question, reasoning_chain) + + return { + "question": question, + "reasoning_chain": reasoning_chain, + "final_answer": final_answer + } + + def _synthesize_answer( + self, + original_question: str, + reasoning_chain: List[Dict] + ) -> str: + """綜合推理鏈生成最終答案""" + chain_text = "\n".join([ + f"Q: {step['question']}\nA: {step['answer']}" + for step in reasoning_chain + ]) + + prompt = f""" + 基於以下推理過程,回答原始問題。 + + 推理過程: + {chain_text} + + 原始問題:{original_question} + + 請給出完整、準確的最終答案: + """ + + response = self.llm.chat.completions.create( + model="gpt-4o", + messages=[{"role": "user", "content": prompt}], + temperature=0 + ) + + return response.choices[0].message.content + + +# 使用範例 +reasoner = MultiHopReasoner(kg_builder, OpenAI()) +result = reasoner.multi_hop_reason( + "蘋果公司CEO的國籍是什麼?該國家的首都在哪裡?" +) + +print("推理鏈:") +for step in result["reasoning_chain"]: + print(f" Q: {step['question']}") + print(f" A: {step['answer']}\n") +print(f"最終答案:{result['final_answer']}") +``` + +## 4️⃣ 知識圖譜更新 + +### 增量更新策略 + +```python +class KnowledgeGraphUpdater: + """知識圖譜增量更新器""" + + def __init__(self, kg: KnowledgeGraphBuilder, extractor: KnowledgeExtractor): + self.kg = kg + self.extractor = extractor + + def update_from_news(self, news_articles: List[str]): + """從新聞更新知識""" + for article in news_articles: + # 抽取新知識 + new_knowledge = self.extractor.extract_entities_and_relations(article) + + # 檢查衝突 + conflicts = self._check_conflicts(new_knowledge) + + if conflicts: + # 解決衝突(保留最新資訊) + self._resolve_conflicts(conflicts, new_knowledge) + else: + # 直接添加 + self._add_knowledge(new_knowledge) + + def _check_conflicts(self, new_knowledge: Dict) -> List[Dict]: + """檢查與現有知識的衝突""" + conflicts = [] + + for relation in new_knowledge["relations"]: + # 查詢現有關係 + existing = self.kg.query_relation( + relation["subject"], + relation["predicate"] + ) + + if existing and existing != relation["object"]: + conflicts.append({ + "type": "value_conflict", + "existing": existing, + "new": relation + }) + + return conflicts + + def _resolve_conflicts( + self, + conflicts: List[Dict], + new_knowledge: Dict + ): + """解決衝突""" + for conflict in conflicts: + # 策略:保留新資訊,但記錄歷史 + self.kg.update_relation_with_history( + conflict["new"]["subject"], + conflict["new"]["predicate"], + conflict["new"]["object"], + previous_value=conflict["existing"] + ) + + def _add_knowledge(self, knowledge: Dict): + """添加新知識""" + for entity in knowledge["entities"]: + self.kg.create_entity(entity["name"], entity["type"]) + + for relation in knowledge["relations"]: + self.kg.create_relation( + relation["subject"], + relation["object"], + relation["predicate"] + ) +``` + +## 📚 應用場景 + +### 1. 企業知識管理 +- 組織架構圖譜 +- 產品知識庫 +- 客戶關係圖 + +### 2. 醫療健康 +- 疾病-症狀-藥物關係 +- 醫學文獻知識 +- 診斷輔助系統 + +### 3. 金融風控 +- 企業關聯關係 +- 投資者網路 +- 風險傳導分析 + +### 4. 智能客服 +- 產品 FAQ 圖譜 +- 問題解決路徑 +- 知識推薦 + +## 💡 最佳實踐 + +1. **本體設計** - 先設計好實體類型和關係類型 +2. **品質控制** - 建立知識驗證機制 +3. **版本管理** - 追蹤知識變更歷史 +4. **效能優化** - 合理使用索引和快取 +5. **與 LLM 結合** - 發揮各自優勢 + +--- + +*本指南持續更新中,歡迎貢獻改進建議。* diff --git "a/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/README.md" "b/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/README.md" index e293fc4..f4f9b74 100644 --- "a/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/README.md" +++ "b/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/README.md" @@ -412,9 +412,28 @@ from langchain_community.tools import DuckDuckGoSearchRun search = DuckDuckGoSearchRun() def calculator(expression: str) -> str: - """計算數學表達式""" + """計算數學表達式(安全版本)""" + import ast + import operator + + ops = { + ast.Add: operator.add, ast.Sub: operator.sub, + ast.Mult: operator.mul, ast.Div: operator.truediv, + ast.Pow: operator.pow, ast.USub: operator.neg + } + + def safe_eval(node): + if isinstance(node, ast.Num): + return node.n + elif isinstance(node, ast.BinOp): + return ops[type(node.op)](safe_eval(node.left), safe_eval(node.right)) + elif isinstance(node, ast.UnaryOp): + return ops[type(node.op)](safe_eval(node.operand)) + raise ValueError("不支援的運算") + try: - return str(eval(expression)) + tree = ast.parse(expression, mode='eval') + return str(safe_eval(tree.body)) except: return "Invalid expression" diff --git "a/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/\345\257\246\346\210\260\351\240\205\347\233\256/VLM-Image-Analyzer/README.md" "b/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/\345\257\246\346\210\260\351\240\205\347\233\256/VLM-Image-Analyzer/README.md" new file mode 100644 index 0000000..cc29843 --- /dev/null +++ "b/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/\345\257\246\346\210\260\351\240\205\347\233\256/VLM-Image-Analyzer/README.md" @@ -0,0 +1,756 @@ +# VLM 圖像分析系統實戰專案 + +> 使用視覺語言模型 (Vision-Language Model) 建構智能圖像分析應用 + +## 📋 專案概述 + +本專案實作一個基於 VLM 的多功能圖像分析系統,支援: +- 圖像描述與標籤生成 +- 視覺問答 (VQA) +- 文件/發票 OCR 分析 +- 圖表數據提取 +- 多圖像比較分析 + +## 🎯 學習目標 + +完成本專案後,你將掌握: +- VLM API 整合 (GPT-4V, Claude Vision, Gemini) +- 圖像預處理與最佳化 +- 多模態提示工程 +- 批量處理與效能優化 +- 生產環境部署 + +## 🏗️ 系統架構 + +``` +┌─────────────────────────────────────────────────────────────┐ +│ VLM 圖像分析系統 │ +├─────────────────────────────────────────────────────────────┤ +│ 輸入層 │ +│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ +│ │ 單張圖像 │ │ 多張圖像 │ │ PDF文件 │ │ 視頻幀 │ │ +│ └────┬────┘ └────┬────┘ └────┬────┘ └────┬────┘ │ +│ └────────────┴────────────┴────────────┘ │ +│ ↓ │ +│ ┌─────────────────────────────────────────────────────┐ │ +│ │ 預處理模組 │ │ +│ │ • 圖像壓縮/調整大小 • 格式轉換 • Base64 編碼 │ │ +│ └─────────────────────────────────────────────────────┘ │ +│ ↓ │ +│ ┌─────────────────────────────────────────────────────┐ │ +│ │ VLM 引擎 │ │ +│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │ +│ │ │ GPT-4V │ │ Claude │ │ Gemini │ ← 可切換 │ │ +│ │ └─────────┘ └─────────┘ └─────────┘ │ │ +│ └─────────────────────────────────────────────────────┘ │ +│ ↓ │ +│ ┌─────────────────────────────────────────────────────┐ │ +│ │ 後處理模組 │ │ +│ │ • 結構化輸出 • 資料驗證 • 結果快取 │ │ +│ └─────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ +``` + +## 📦 專案結構 + +``` +VLM-Image-Analyzer/ +├── README.md +├── requirements.txt +├── .env.example +├── src/ +│ ├── __init__.py +│ ├── core/ +│ │ ├── __init__.py +│ │ ├── vlm_client.py # VLM 客戶端抽象 +│ │ ├── image_processor.py # 圖像預處理 +│ │ └── prompt_templates.py # 提示模板 +│ ├── analyzers/ +│ │ ├── __init__.py +│ │ ├── image_captioner.py # 圖像描述 +│ │ ├── document_analyzer.py # 文件分析 +│ │ ├── chart_extractor.py # 圖表數據提取 +│ │ └── vqa_engine.py # 視覺問答 +│ ├── api/ +│ │ ├── __init__.py +│ │ └── routes.py # FastAPI 路由 +│ └── utils/ +│ ├── __init__.py +│ └── helpers.py +├── tests/ +│ ├── test_vlm_client.py +│ ├── test_analyzers.py +│ └── sample_images/ +├── examples/ +│ ├── 01_basic_usage.py +│ ├── 02_document_ocr.py +│ ├── 03_chart_analysis.py +│ └── 04_batch_processing.py +└── docker/ + ├── Dockerfile + └── docker-compose.yml +``` + +## 🚀 快速開始 + +### 1. 環境設定 + +```bash +# 克隆專案 +git clone +cd VLM-Image-Analyzer + +# 建立虛擬環境 +python -m venv venv +source venv/bin/activate # Windows: venv\Scripts\activate + +# 安裝依賴 +pip install -r requirements.txt + +# 設定環境變數 +cp .env.example .env +# 編輯 .env 填入 API 金鑰 +``` + +### 2. 基本使用 + +```python +from src.core.vlm_client import VLMClient +from src.analyzers.image_captioner import ImageCaptioner + +# 初始化客戶端 +client = VLMClient(provider="openai") # 或 "anthropic", "google" + +# 圖像描述 +captioner = ImageCaptioner(client) +result = captioner.caption("path/to/image.jpg") +print(result.description) +print(result.tags) +``` + +## 💻 核心程式碼 + +### VLM 客戶端抽象 + +```python +# src/core/vlm_client.py +from abc import ABC, abstractmethod +from typing import List, Dict, Optional, Union +from pathlib import Path +import base64 +from PIL import Image +import io + +class VLMProvider(ABC): + """VLM 提供者抽象基類""" + + @abstractmethod + def analyze_image( + self, + image: Union[str, bytes, Path], + prompt: str, + max_tokens: int = 1024 + ) -> str: + """分析單張圖像""" + pass + + @abstractmethod + def analyze_multiple_images( + self, + images: List[Union[str, bytes, Path]], + prompt: str, + max_tokens: int = 2048 + ) -> str: + """分析多張圖像""" + pass + + +class OpenAIVLM(VLMProvider): + """OpenAI GPT-4V 實作""" + + def __init__(self, api_key: str, model: str = "gpt-4o"): + from openai import OpenAI + self.client = OpenAI(api_key=api_key) + self.model = model + + def _encode_image(self, image: Union[str, bytes, Path]) -> str: + """將圖像編碼為 base64""" + if isinstance(image, bytes): + return base64.b64encode(image).decode('utf-8') + elif isinstance(image, (str, Path)): + with open(image, "rb") as f: + return base64.b64encode(f.read()).decode('utf-8') + raise ValueError("不支援的圖像格式") + + def analyze_image( + self, + image: Union[str, bytes, Path], + prompt: str, + max_tokens: int = 1024 + ) -> str: + base64_image = self._encode_image(image) + + response = self.client.chat.completions.create( + model=self.model, + max_tokens=max_tokens, + messages=[ + { + "role": "user", + "content": [ + {"type": "text", "text": prompt}, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_image}", + "detail": "high" + } + } + ] + } + ] + ) + + return response.choices[0].message.content + + def analyze_multiple_images( + self, + images: List[Union[str, bytes, Path]], + prompt: str, + max_tokens: int = 2048 + ) -> str: + content = [{"type": "text", "text": prompt}] + + for i, image in enumerate(images): + base64_image = self._encode_image(image) + content.append({ + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_image}", + "detail": "high" + } + }) + + response = self.client.chat.completions.create( + model=self.model, + max_tokens=max_tokens, + messages=[{"role": "user", "content": content}] + ) + + return response.choices[0].message.content + + +class AnthropicVLM(VLMProvider): + """Anthropic Claude Vision 實作""" + + def __init__(self, api_key: str, model: str = "claude-3-5-sonnet-20241022"): + import anthropic + self.client = anthropic.Anthropic(api_key=api_key) + self.model = model + + def _encode_image(self, image: Union[str, bytes, Path]) -> tuple: + """編碼圖像並檢測類型""" + if isinstance(image, bytes): + data = image + else: + with open(image, "rb") as f: + data = f.read() + + # 檢測圖像類型 + if data[:8] == b'\x89PNG\r\n\x1a\n': + media_type = "image/png" + elif data[:2] == b'\xff\xd8': + media_type = "image/jpeg" + else: + media_type = "image/jpeg" # 預設 + + return base64.b64encode(data).decode('utf-8'), media_type + + def analyze_image( + self, + image: Union[str, bytes, Path], + prompt: str, + max_tokens: int = 1024 + ) -> str: + base64_data, media_type = self._encode_image(image) + + response = self.client.messages.create( + model=self.model, + max_tokens=max_tokens, + messages=[ + { + "role": "user", + "content": [ + { + "type": "image", + "source": { + "type": "base64", + "media_type": media_type, + "data": base64_data + } + }, + {"type": "text", "text": prompt} + ] + } + ] + ) + + return response.content[0].text + + def analyze_multiple_images( + self, + images: List[Union[str, bytes, Path]], + prompt: str, + max_tokens: int = 2048 + ) -> str: + content = [] + + for image in images: + base64_data, media_type = self._encode_image(image) + content.append({ + "type": "image", + "source": { + "type": "base64", + "media_type": media_type, + "data": base64_data + } + }) + + content.append({"type": "text", "text": prompt}) + + response = self.client.messages.create( + model=self.model, + max_tokens=max_tokens, + messages=[{"role": "user", "content": content}] + ) + + return response.content[0].text + + +class VLMClient: + """VLM 客戶端工廠""" + + PROVIDERS = { + "openai": OpenAIVLM, + "anthropic": AnthropicVLM, + } + + def __init__( + self, + provider: str = "openai", + api_key: Optional[str] = None, + **kwargs + ): + import os + + if provider not in self.PROVIDERS: + raise ValueError(f"不支援的提供者: {provider}") + + if api_key is None: + env_key = f"{provider.upper()}_API_KEY" + api_key = os.getenv(env_key) + + self.provider = self.PROVIDERS[provider](api_key, **kwargs) + + def analyze( + self, + image: Union[str, bytes, Path, List], + prompt: str, + **kwargs + ) -> str: + if isinstance(image, list): + return self.provider.analyze_multiple_images(image, prompt, **kwargs) + return self.provider.analyze_image(image, prompt, **kwargs) +``` + +### 圖像預處理 + +```python +# src/core/image_processor.py +from PIL import Image +from pathlib import Path +from typing import Union, Tuple +import io + +class ImageProcessor: + """圖像預處理器""" + + def __init__( + self, + max_size: Tuple[int, int] = (2048, 2048), + quality: int = 85, + format: str = "JPEG" + ): + self.max_size = max_size + self.quality = quality + self.format = format + + def process( + self, + image: Union[str, Path, bytes, Image.Image] + ) -> bytes: + """處理圖像:調整大小、壓縮、格式轉換""" + # 載入圖像 + if isinstance(image, (str, Path)): + img = Image.open(image) + elif isinstance(image, bytes): + img = Image.open(io.BytesIO(image)) + elif isinstance(image, Image.Image): + img = image + else: + raise ValueError("不支援的圖像類型") + + # 轉換為 RGB(處理 RGBA) + if img.mode in ('RGBA', 'P'): + img = img.convert('RGB') + + # 調整大小(保持比例) + img.thumbnail(self.max_size, Image.Resampling.LANCZOS) + + # 輸出為 bytes + buffer = io.BytesIO() + img.save(buffer, format=self.format, quality=self.quality) + return buffer.getvalue() + + def get_image_info( + self, + image: Union[str, Path, bytes] + ) -> dict: + """獲取圖像資訊""" + if isinstance(image, (str, Path)): + img = Image.open(image) + file_size = Path(image).stat().st_size + else: + img = Image.open(io.BytesIO(image)) + file_size = len(image) + + return { + "width": img.width, + "height": img.height, + "mode": img.mode, + "format": img.format, + "file_size_kb": file_size / 1024 + } +``` + +### 文件分析器 + +```python +# src/analyzers/document_analyzer.py +from dataclasses import dataclass +from typing import List, Dict, Optional +import json + +from ..core.vlm_client import VLMClient +from ..core.prompt_templates import DOCUMENT_ANALYSIS_PROMPT + +@dataclass +class DocumentAnalysisResult: + """文件分析結果""" + document_type: str + extracted_text: str + structured_data: Dict + confidence: float + language: str + +class DocumentAnalyzer: + """文件/發票分析器""" + + SUPPORTED_TYPES = [ + "invoice", "receipt", "contract", + "id_card", "business_card", "form" + ] + + def __init__(self, vlm_client: VLMClient): + self.client = vlm_client + + def analyze( + self, + image_path: str, + document_type: Optional[str] = None + ) -> DocumentAnalysisResult: + """分析文件圖像""" + prompt = self._build_prompt(document_type) + + response = self.client.analyze(image_path, prompt) + + # 解析結構化輸出 + result = self._parse_response(response) + + return DocumentAnalysisResult(**result) + + def _build_prompt(self, document_type: Optional[str]) -> str: + """建構分析提示""" + if document_type: + type_specific = f"這是一張{document_type}的圖像。" + else: + type_specific = "請先判斷這是什麼類型的文件。" + + return f""" + {type_specific} + + 請仔細分析這張文件圖像,並提取以下資訊: + + 1. 文件類型(如:發票、收據、合約、表格等) + 2. 完整的文字內容(OCR) + 3. 結構化資料(JSON 格式): + - 對於發票/收據:日期、金額、供應商、項目明細 + - 對於合約:當事人、日期、主要條款 + - 對於表格:表格結構和內容 + + 請以以下 JSON 格式回覆: + {{ + "document_type": "類型", + "extracted_text": "完整文字內容", + "structured_data": {{...}}, + "confidence": 0.95, + "language": "zh-TW" + }} + + 只返回 JSON,不要其他內容。 + """ + + def _parse_response(self, response: str) -> Dict: + """解析 VLM 回應""" + # 嘗試提取 JSON + try: + # 移除可能的 markdown 標記 + if "```json" in response: + response = response.split("```json")[1].split("```")[0] + elif "```" in response: + response = response.split("```")[1].split("```")[0] + + return json.loads(response.strip()) + except json.JSONDecodeError: + return { + "document_type": "unknown", + "extracted_text": response, + "structured_data": {}, + "confidence": 0.5, + "language": "unknown" + } + + def batch_analyze( + self, + image_paths: List[str], + document_type: Optional[str] = None + ) -> List[DocumentAnalysisResult]: + """批量分析文件""" + results = [] + for path in image_paths: + result = self.analyze(path, document_type) + results.append(result) + return results +``` + +### 圖表數據提取 + +```python +# src/analyzers/chart_extractor.py +from dataclasses import dataclass +from typing import List, Dict, Optional +import json + +from ..core.vlm_client import VLMClient + +@dataclass +class ChartData: + """圖表數據""" + chart_type: str + title: Optional[str] + x_axis: Optional[str] + y_axis: Optional[str] + data_points: List[Dict] + summary: str + +class ChartExtractor: + """圖表數據提取器""" + + CHART_TYPES = [ + "bar", "line", "pie", "scatter", + "area", "histogram", "box", "heatmap" + ] + + def __init__(self, vlm_client: VLMClient): + self.client = vlm_client + + def extract(self, image_path: str) -> ChartData: + """從圖表圖像提取數據""" + prompt = """ + 分析這張圖表圖像,提取所有可見的數據。 + + 請提供: + 1. 圖表類型(如:折線圖、長條圖、圓餅圖等) + 2. 標題(如果有) + 3. X 軸和 Y 軸標籤 + 4. 所有可讀取的數據點 + 5. 圖表的主要發現或趨勢總結 + + 以 JSON 格式回覆: + { + "chart_type": "類型", + "title": "標題", + "x_axis": "X軸標籤", + "y_axis": "Y軸標籤", + "data_points": [ + {"category": "類別1", "value": 100}, + {"category": "類別2", "value": 200} + ], + "summary": "主要發現總結" + } + + 只返回 JSON。 + """ + + response = self.client.analyze(image_path, prompt) + data = self._parse_response(response) + + return ChartData(**data) + + def compare_charts( + self, + image_paths: List[str] + ) -> Dict: + """比較多張圖表""" + prompt = """ + 比較這些圖表,分析它們之間的關係和差異。 + + 對於每張圖表,提取關鍵數據點。 + 然後進行比較分析: + 1. 相同之處 + 2. 差異之處 + 3. 趨勢對比 + 4. 整體結論 + + 以 JSON 格式回覆。 + """ + + response = self.client.analyze(image_paths, prompt) + return json.loads(response) + + def _parse_response(self, response: str) -> Dict: + """解析回應""" + try: + if "```json" in response: + response = response.split("```json")[1].split("```")[0] + return json.loads(response.strip()) + except: + return { + "chart_type": "unknown", + "title": None, + "x_axis": None, + "y_axis": None, + "data_points": [], + "summary": response + } +``` + +## 🧪 測試 + +```python +# tests/test_vlm_client.py +import pytest +from src.core.vlm_client import VLMClient +from src.analyzers.image_captioner import ImageCaptioner + +class TestVLMClient: + + @pytest.fixture + def client(self): + return VLMClient(provider="openai") + + def test_single_image_analysis(self, client): + result = client.analyze( + "tests/sample_images/cat.jpg", + "描述這張圖片的內容" + ) + assert isinstance(result, str) + assert len(result) > 0 + + def test_multiple_images(self, client): + images = [ + "tests/sample_images/cat.jpg", + "tests/sample_images/dog.jpg" + ] + result = client.analyze( + images, + "比較這兩張圖片的差異" + ) + assert isinstance(result, str) + +class TestDocumentAnalyzer: + + @pytest.fixture + def analyzer(self): + client = VLMClient(provider="openai") + return DocumentAnalyzer(client) + + def test_invoice_analysis(self, analyzer): + result = analyzer.analyze( + "tests/sample_images/invoice.jpg", + document_type="invoice" + ) + assert result.document_type == "invoice" + assert "total" in result.structured_data or "金額" in str(result.structured_data) +``` + +## 🐳 Docker 部署 + +```dockerfile +# docker/Dockerfile +FROM python:3.11-slim + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY src/ ./src/ +COPY main.py . + +EXPOSE 8000 + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] +``` + +```yaml +# docker/docker-compose.yml +version: '3.8' + +services: + vlm-analyzer: + build: + context: .. + dockerfile: docker/Dockerfile + ports: + - "8000:8000" + environment: + - OPENAI_API_KEY=${OPENAI_API_KEY} + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + volumes: + - ../uploads:/app/uploads + restart: unless-stopped +``` + +## 📊 效能優化 + +### 1. 圖像壓縮 +- 自動調整圖像大小至最佳解析度 +- 使用適當的壓縮品質 + +### 2. 批量處理 +- 使用非同步處理多張圖像 +- 實作請求佇列和速率限制 + +### 3. 快取策略 +- 快取重複分析結果 +- 使用內容雜湊作為快取鍵 + +## 📚 延伸閱讀 + +- [OpenAI Vision 文件](https://platform.openai.com/docs/guides/vision) +- [Claude Vision 文件](https://docs.anthropic.com/en/docs/vision) +- [視覺語言模型入門](../../../3.LLM應用工程/10.多模態生成/5.視覺語言模型/) + +--- + +*本專案持續更新中,歡迎貢獻改進建議。* diff --git "a/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/01-Prompt-Engineering.md" "b/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/01-Prompt-Engineering.md" index f151166..38b281c 100644 --- "a/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/01-Prompt-Engineering.md" +++ "b/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/01-Prompt-Engineering.md" @@ -38,7 +38,7 @@ import os client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) -def get_completion(prompt, model="gpt-3.5-turbo"): +def get_completion(prompt, model="gpt-4o-mini"): messages = [{"role": "user", "content": prompt}] response = client.chat.completions.create( model=model, @@ -638,7 +638,7 @@ client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) class ReviewAnalyzer: def __init__(self): - self.model = "gpt-3.5-turbo" + self.model = "gpt-4o-mini" def get_completion(self, prompt, temperature=0): messages = [{"role": "user", "content": prompt}] diff --git "a/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/02-ChatGPT-API-Systems.md" "b/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/02-ChatGPT-API-Systems.md" index 9eaec0f..ba001ca 100644 --- "a/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/02-ChatGPT-API-Systems.md" +++ "b/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/02-ChatGPT-API-Systems.md" @@ -33,7 +33,7 @@ import json client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) -def get_completion(prompt, model="gpt-3.5-turbo"): +def get_completion(prompt, model="gpt-4o-mini"): """單輪對話的簡化版本""" messages = [{"role": "user", "content": prompt}] response = client.chat.completions.create( @@ -43,7 +43,7 @@ def get_completion(prompt, model="gpt-3.5-turbo"): ) return response.choices[0].message.content -def get_completion_from_messages(messages, model="gpt-3.5-turbo", temperature=0): +def get_completion_from_messages(messages, model="gpt-4o-mini", temperature=0): """多輪對話版本""" response = client.chat.completions.create( model=model, @@ -86,7 +86,7 @@ print(response) #### Tokens 計算 ```python -def get_completion_and_token_count(messages, model="gpt-3.5-turbo", temperature=0): +def get_completion_and_token_count(messages, model="gpt-4o-mini", temperature=0): """取得回應並計算 token 數量""" response = client.chat.completions.create( model=model, @@ -802,7 +802,7 @@ import hashlib from functools import lru_cache @lru_cache(maxsize=100) -def cached_completion(prompt_hash, model="gpt-3.5-turbo"): +def cached_completion(prompt_hash, model="gpt-4o-mini"): """使用快取避免重複的 API 呼叫""" # 實際實作需要從 hash 反查原始 prompt # 這裡僅示範概念 diff --git "a/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/03-LangChain-Basics.md" "b/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/03-LangChain-Basics.md" index f2e773e..0eb53ba 100644 --- "a/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/03-LangChain-Basics.md" +++ "b/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/03-LangChain-Basics.md" @@ -89,7 +89,7 @@ from langchain_openai import ChatOpenAI # 初始化聊天模型 llm = ChatOpenAI( - model="gpt-3.5-turbo", + model="gpt-4o-mini", temperature=0.7, # 控制輸出的隨機性 max_tokens=100 # 限制輸出長度 ) @@ -627,9 +627,28 @@ def get_current_weather(location: str) -> str: return weather_data.get(location, "查無此地點的天氣資訊") def calculate(expression: str) -> str: - """計算數學表達式""" + """計算數學表達式(安全版本)""" + import ast + import operator + + ops = { + ast.Add: operator.add, ast.Sub: operator.sub, + ast.Mult: operator.mul, ast.Div: operator.truediv, + ast.Pow: operator.pow, ast.USub: operator.neg + } + + def safe_eval(node): + if isinstance(node, ast.Num): + return node.n + elif isinstance(node, ast.BinOp): + return ops[type(node.op)](safe_eval(node.left), safe_eval(node.right)) + elif isinstance(node, ast.UnaryOp): + return ops[type(node.op)](safe_eval(node.operand)) + raise ValueError("不支援的運算") + try: - result = eval(expression) + tree = ast.parse(expression, mode='eval') + result = safe_eval(tree.body) return f"計算結果:{result}" except: return "計算錯誤" diff --git "a/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/04-LangChain-Chat-Data.md" "b/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/04-LangChain-Chat-Data.md" index 9f67cd1..dc33d1c 100644 --- "a/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/04-LangChain-Chat-Data.md" +++ "b/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/04-LangChain-Chat-Data.md" @@ -548,7 +548,7 @@ for doc in compressed_docs: from langchain.chains import RetrievalQA from langchain_openai import ChatOpenAI -llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0) +llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) qa_chain = RetrievalQA.from_chain_type( llm=llm, @@ -727,7 +727,7 @@ class TaiwanKnowledgeBot: def create_conversation_chain(self): """建立對話鏈""" - llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0) + llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) memory = ConversationBufferMemory( memory_key="chat_history", diff --git "a/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/05-LangChain-Agents.md" "b/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/05-LangChain-Agents.md" index a4f15bf..5324406 100644 --- "a/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/05-LangChain-Agents.md" +++ "b/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/05-LangChain-Agents.md" @@ -66,7 +66,7 @@ functions = [ messages = [{"role": "user", "content": "台北現在天氣如何?"}] response = client.chat.completions.create( - model="gpt-3.5-turbo", + model="gpt-4o-mini", messages=messages, functions=functions, function_call="auto" # 讓模型自動決定是否呼叫函數 @@ -102,7 +102,7 @@ if response_message.function_call: # 獲取最終回應 second_response = client.chat.completions.create( - model="gpt-3.5-turbo", + model="gpt-4o-mini", messages=messages ) @@ -148,9 +148,28 @@ def get_stock_price(symbol: str) -> Dict[str, Any]: return {"error": "找不到該股票"} def calculate(expression: str) -> Dict[str, Any]: - """計算數學表達式""" + """計算數學表達式(安全版本)""" + import ast + import operator + + ops = { + ast.Add: operator.add, ast.Sub: operator.sub, + ast.Mult: operator.mul, ast.Div: operator.truediv, + ast.Pow: operator.pow, ast.USub: operator.neg + } + + def safe_eval(node): + if isinstance(node, ast.Num): + return node.n + elif isinstance(node, ast.BinOp): + return ops[type(node.op)](safe_eval(node.left), safe_eval(node.right)) + elif isinstance(node, ast.UnaryOp): + return ops[type(node.op)](safe_eval(node.operand)) + raise ValueError("不支援的運算") + try: - result = eval(expression) + tree = ast.parse(expression, mode='eval') + result = safe_eval(tree.body) return {"expression": expression, "result": result} except Exception as e: return {"error": str(e)} @@ -206,7 +225,7 @@ def run_conversation(user_message: str): # 第一次 API 呼叫 response = client.chat.completions.create( - model="gpt-3.5-turbo", + model="gpt-4o-mini", messages=messages, functions=functions, function_call="auto" @@ -238,7 +257,7 @@ def run_conversation(user_message: str): # 第二次 API 呼叫 second_response = client.chat.completions.create( - model="gpt-3.5-turbo", + model="gpt-4o-mini", messages=messages ) @@ -384,7 +403,7 @@ from langchain.agents import AgentExecutor, create_openai_functions_agent from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder # 初始化模型 -llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0) +llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) # 建立提示模板 prompt = ChatPromptTemplate.from_messages([ @@ -470,7 +489,7 @@ from datetime import datetime class MultiToolAssistant: def __init__(self): - self.llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0) + self.llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) self.tools = self.create_tools() self.agent_executor = self.create_agent() diff --git "a/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/07-Advanced-RAG.md" "b/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/07-Advanced-RAG.md" index 30800a0..8920fe8 100644 --- "a/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/07-Advanced-RAG.md" +++ "b/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/07-Advanced-RAG.md" @@ -51,7 +51,7 @@ from langchain_core.output_parsers import StrOutputParser class QueryRewriter: def __init__(self): - self.llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0) + self.llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) def rewrite_for_retrieval(self, query: str) -> str: """重寫查詢以提升檢索效果""" @@ -126,7 +126,7 @@ print(f"\n後退提示:{rewriter.step_back_prompting(original_query)}") ```python class QueryDecomposer: def __init__(self): - self.llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0) + self.llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) def decompose(self, complex_query: str) -> list: """將複雜查詢分解為子問題""" @@ -341,7 +341,7 @@ for i, (doc, score) in enumerate(reranked_docs, 1): ```python class LLMReranker: def __init__(self): - self.llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0) + self.llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) def rerank( self, @@ -530,7 +530,7 @@ print(json.dumps(quality_scores, indent=2, ensure_ascii=False)) ```python class AdvancedRAGSystem: def __init__(self): - self.llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0) + self.llm = ChatOpenAI(model="gpt-4o-mini", temperature=0) self.query_rewriter = QueryRewriter() self.evaluator = RAGEvaluator() diff --git "a/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/12-Gradio-Applications.md" "b/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/12-Gradio-Applications.md" index 7b4ef5e..b36595c 100644 --- "a/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/12-Gradio-Applications.md" +++ "b/6.DeepLearning.ai\347\237\255\350\252\262\347\250\213\345\255\270\347\277\222\347\264\200\351\214\204/12-Gradio-Applications.md" @@ -24,7 +24,7 @@ client = OpenAI() def chat(message, history): """聊天功能""" response = client.chat.completions.create( - model="gpt-3.5-turbo", + model="gpt-4o-mini", messages=[{"role": "user", "content": message}] ) return response.choices[0].message.content @@ -33,7 +33,7 @@ def chat(message, history): demo = gr.ChatInterface( fn=chat, title="AI 聊天機器人", - description="使用 GPT-3.5 的聊天機器人" + description="使用 GPT-4o-mini 的聊天機器人" ) # 啟動 diff --git a/exercises/agent/01-tool-use.md b/exercises/agent/01-tool-use.md index 504c52e..158f4c1 100644 --- a/exercises/agent/01-tool-use.md +++ b/exercises/agent/01-tool-use.md @@ -138,7 +138,7 @@ def chat_with_tools(user_message: str, tools: list, executor: ToolExecutor) -> s # 第一次調用:獲取工具調用意圖 response = client.chat.completions.create( - model="gpt-3.5-turbo", + model="gpt-4o-mini", messages=messages, tools=tools, tool_choice="auto" diff --git a/exercises/prompt-engineering/01-basic-prompts.md b/exercises/prompt-engineering/01-basic-prompts.md index 41d84d0..4e71c3b 100644 --- a/exercises/prompt-engineering/01-basic-prompts.md +++ b/exercises/prompt-engineering/01-basic-prompts.md @@ -150,7 +150,7 @@ client = OpenAI() def get_response(prompt): response = client.chat.completions.create( - model="gpt-3.5-turbo", + model="gpt-4o-mini", messages=[{"role": "user", "content": prompt}], max_tokens=500 ) From 7114db665012db27885805016a1fd327182a44b3 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 31 Dec 2025 14:21:18 +0000 Subject: [PATCH 2/3] =?UTF-8?q?fix:=20=E7=A8=8B=E5=BC=8F=E7=A2=BC=E5=93=81?= =?UTF-8?q?=E8=B3=AA=E6=94=B9=E9=80=B2=E8=88=87=E4=BE=9D=E8=B3=B4=E6=9B=B4?= =?UTF-8?q?=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## LangChain 依賴更新 - rag_agent_system.py: langchain.chat_models → langchain_openai - CONTRIBUTING.md: langchain.llms → langchain_openai ## 異常處理改進 - 06_model_comparison.py: 修復 4 個 bare except - 03_gemini_basic.py: 修復 2 個 bare except - 01_llama31_complete.py: 修復 1 個 bare except - 02_crewai_multi_agent.py: 修復 1 個 bare except 所有 bare except 改為具體異常類型以提升可調試性 --- .../examples/06_model_comparison.py" | 10 +++++----- .../examples/basic_apis/03_gemini_basic.py" | 4 ++-- .../src/rag_agent_system.py" | 2 +- .../examples/01_llama31_complete.py" | 2 +- .../examples/02_crewai_multi_agent.py" | 4 ++-- CONTRIBUTING.md | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/1.LLM \351\203\250\347\275\262/examples/06_model_comparison.py" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/1.LLM \351\203\250\347\275\262/examples/06_model_comparison.py" index dfb09b5..08a61a5 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/1.LLM \351\203\250\347\275\262/examples/06_model_comparison.py" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/1.LLM \351\203\250\347\275\262/examples/06_model_comparison.py" @@ -282,8 +282,8 @@ def demo_basic_comparison(): try: ollama.list() available_models.extend(["llama3.1:8b", "mistral:7b"]) - except: - console.print("[yellow]⚠️ Ollama 不可用[/yellow]") + except Exception as e: + console.print(f"[yellow]⚠️ Ollama 不可用: {e}[/yellow]") if not available_models: console.print("[red]❌ 沒有可用的模型[/red]\n") @@ -324,7 +324,7 @@ def demo_performance_comparison(): try: ollama.list() models.append("llama3.1:8b") - except: + except Exception: pass if len(models) < 2: @@ -444,7 +444,7 @@ def demo_quality_comparison(): try: ollama.list() models.append("llama3.1:8b") - except: + except Exception: pass if not models: @@ -477,7 +477,7 @@ def demo_save_comparison(): try: ollama.list() models.append("llama3.1:8b") - except: + except Exception: pass if not models: diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/2.LLM as API/examples/basic_apis/03_gemini_basic.py" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/2.LLM as API/examples/basic_apis/03_gemini_basic.py" index 6745145..783bb0b 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/2.LLM as API/examples/basic_apis/03_gemini_basic.py" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/2.LLM as API/examples/basic_apis/03_gemini_basic.py" @@ -36,7 +36,7 @@ def basic_chat(): print(f" - Prompt tokens: {response.usage_metadata.prompt_token_count}") print(f" - Candidates tokens: {response.usage_metadata.candidates_token_count}") print(f" - Total tokens: {response.usage_metadata.total_token_count}") - except: + except AttributeError: pass return response @@ -196,7 +196,7 @@ def safety_settings_example(): try: for rating in response.candidates[0].safety_ratings: print(f" - {rating.category.name}: {rating.probability.name}") - except: + except (AttributeError, IndexError): print(" 無安全評分資訊") return response diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/9.\345\257\246\346\210\260/9.1-RAG-Agent\347\253\257\345\210\260\347\253\257\345\257\246\346\210\260/src/rag_agent_system.py" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/9.\345\257\246\346\210\260/9.1-RAG-Agent\347\253\257\345\210\260\347\253\257\345\257\246\346\210\260/src/rag_agent_system.py" index b1200e5..c94f68b 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/9.\345\257\246\346\210\260/9.1-RAG-Agent\347\253\257\345\210\260\347\253\257\345\257\246\346\210\260/src/rag_agent_system.py" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/9.\345\257\246\346\210\260/9.1-RAG-Agent\347\253\257\345\210\260\347\253\257\345\257\246\346\210\260/src/rag_agent_system.py" @@ -4,7 +4,7 @@ from typing import List, Dict, Any, Optional, Tuple import json -from langchain.chat_models import ChatOpenAI +from langchain_openai import ChatOpenAI from langchain.schema import HumanMessage, SystemMessage, AIMessage from src.vector_store import VectorStoreManager, HybridSearcher diff --git "a/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/1.\345\244\247\345\236\213\350\252\236\350\250\200\346\250\241\345\236\213(LLM)/examples/01_llama31_complete.py" "b/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/1.\345\244\247\345\236\213\350\252\236\350\250\200\346\250\241\345\236\213(LLM)/examples/01_llama31_complete.py" index d78847b..c301c10 100644 --- "a/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/1.\345\244\247\345\236\213\350\252\236\350\250\200\346\250\241\345\236\213(LLM)/examples/01_llama31_complete.py" +++ "b/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/1.\345\244\247\345\236\213\350\252\236\350\250\200\346\250\241\345\236\213(LLM)/examples/01_llama31_complete.py" @@ -215,7 +215,7 @@ def _parse_tool_call(self, text: str) -> Optional[Dict]: tool_call = json.loads(json_str) if 'tool' in tool_call: return tool_call - except: + except (json.JSONDecodeError, ValueError): pass return None diff --git "a/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/3.Agent\347\263\273\347\265\261/examples/02_crewai_multi_agent.py" "b/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/3.Agent\347\263\273\347\265\261/examples/02_crewai_multi_agent.py" index 6399ab1..6145e18 100644 --- "a/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/3.Agent\347\263\273\347\265\261/examples/02_crewai_multi_agent.py" +++ "b/5.AI\347\240\224\347\251\266\345\211\215\346\262\277_2024-2025/3.Agent\347\263\273\347\265\261/examples/02_crewai_multi_agent.py" @@ -354,8 +354,8 @@ def safe_eval(node): tree = ast.parse(expression, mode='eval') result = safe_eval(tree.body) return f"計算結果: {result}" - except: - return "計算錯誤" + except (SyntaxError, ValueError, TypeError) as e: + return f"計算錯誤: {e}" search_tool = Tool( name="網絡搜索", diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b774360..c81d2e1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -224,7 +224,7 @@ mypy . --ignore-missing-imports from typing import List, Dict, Optional import numpy as np -from langchain.llms import OpenAI +from langchain_openai import OpenAI class ExampleClass: From ce53f4c5d2dca968dee04172ef5601113dd89ead Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 31 Dec 2025 14:36:26 +0000 Subject: [PATCH 3/3] =?UTF-8?q?refactor:=20=E7=A8=8B=E5=BC=8F=E7=A2=BC?= =?UTF-8?q?=E5=93=81=E8=B3=AA=E8=88=87API=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 主要更新: - 修復 5 處 bare except,使用具體異常類型 (Exception, AttributeError 等) - 更新 LangChain imports 至最新版本: - langchain.vectorstores → langchain_community.vectorstores - langchain.embeddings → langchain_openai / langchain_huggingface - langchain.llms → langchain_openai - langchain.chat_models → langchain_openai - 更新過時的 API 參考: - text-davinci-003 → gpt-4o-mini (使用新 SDK) - gpt-4-turbo-preview → gpt-4o - gpt-4-vision-preview → gpt-4o (支援多模態) 影響檔案:14 個 --- .../tools/benchmark.py" | 2 +- .../2.LLM as API/README.md" | 5 ++--- .../examples/basic_apis/04_api_comparison.py" | 2 +- .../streamlit_rag_chat.py" | 6 +++--- ...50\210\207_Agentic_Workflows_2024-2025.md" | 7 ++++--- .../3.Agent/examples/utils/agent_utils.py" | 2 +- .../3.Agent/examples/utils/evaluator.py" | 2 +- .../README.md" | 20 +++++++++---------- ...00\346\226\260\347\231\274\345\261\225.md" | 2 +- ...346\205\213RAG\347\263\273\347\265\261.md" | 2 +- .../05_quantization_comparison.py" | 2 +- .../2.KV-Cache/01_kv_cache_basic.py" | 2 +- .../examples/sample_code.py" | 2 +- README.md | 12 +++++------ 14 files changed, 34 insertions(+), 34 deletions(-) diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/1.LLM \351\203\250\347\275\262/tools/benchmark.py" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/1.LLM \351\203\250\347\275\262/tools/benchmark.py" index 6a3cf80..b28c17a 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/1.LLM \351\203\250\347\275\262/tools/benchmark.py" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/1.LLM \351\203\250\347\275\262/tools/benchmark.py" @@ -223,7 +223,7 @@ def main(): ollama_models = ollama.list().get('models', []) if ollama_models: default_models.append(ollama_models[0]['name']) - except: + except (ImportError, Exception): pass if default_models: diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/2.LLM as API/README.md" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/2.LLM as API/README.md" index dc309e6..1856866 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/2.LLM as API/README.md" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/2.LLM as API/README.md" @@ -378,9 +378,8 @@ if __name__ == "__main__": **RAG 應用範例**: ```python import gradio as gr -from langchain.vectorstores import Chroma -from langchain.embeddings import OpenAIEmbeddings -from langchain.chat_models import ChatOpenAI +from langchain_community.vectorstores import Chroma +from langchain_openai import OpenAIEmbeddings, ChatOpenAI from langchain.chains import RetrievalQA # 初始化 diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/2.LLM as API/examples/basic_apis/04_api_comparison.py" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/2.LLM as API/examples/basic_apis/04_api_comparison.py" index 5bd43a9..42bd633 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/2.LLM as API/examples/basic_apis/04_api_comparison.py" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/2.LLM as API/examples/basic_apis/04_api_comparison.py" @@ -82,7 +82,7 @@ def test_gemini(prompt: str, model_name: str = "gemini-1.5-pro") -> Dict: prompt_tokens = response.usage_metadata.prompt_token_count completion_tokens = response.usage_metadata.candidates_token_count total_tokens = response.usage_metadata.total_token_count - except: + except AttributeError: prompt_tokens = 0 completion_tokens = 0 total_tokens = 0 diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/2.LLM as API/examples/frontend_integration/streamlit_rag_chat.py" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/2.LLM as API/examples/frontend_integration/streamlit_rag_chat.py" index e5668a6..23dd165 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/2.LLM as API/examples/frontend_integration/streamlit_rag_chat.py" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/2.LLM as API/examples/frontend_integration/streamlit_rag_chat.py" @@ -17,9 +17,9 @@ # 嘗試導入 LangChain(如果可用) try: from langchain.text_splitter import RecursiveCharacterTextSplitter - from langchain.embeddings import OpenAIEmbeddings - from langchain.vectorstores import FAISS - from langchain.document_loaders import ( + from langchain_openai import OpenAIEmbeddings + from langchain_community.vectorstores import FAISS + from langchain_community.document_loaders import ( TextLoader, PDFMinerLoader, UnstructuredMarkdownLoader diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/AI_Agents_\350\210\207_Agentic_Workflows_2024-2025.md" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/AI_Agents_\350\210\207_Agentic_Workflows_2024-2025.md" index 5d93704..c30eace 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/AI_Agents_\350\210\207_Agentic_Workflows_2024-2025.md" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/AI_Agents_\350\210\207_Agentic_Workflows_2024-2025.md" @@ -170,7 +170,7 @@ plan = [ **實作範例**: ```python from langchain.memory import ConversationBufferMemory, VectorStoreRetrieverMemory -from langchain.vectorstores import Chroma +from langchain_community.vectorstores import Chroma # 短期記憶 short_term_memory = ConversationBufferMemory( @@ -527,8 +527,9 @@ print(f"答案:{answer}") ```python from langchain.agents import initialize_agent, Tool, AgentType -from langchain.llms import OpenAI -from langchain.utilities import GoogleSearchAPIWrapper, PythonREPL +from langchain_openai import OpenAI +from langchain_community.utilities import GoogleSearchAPIWrapper +from langchain_experimental.utilities import PythonREPL # 初始化工具 search = GoogleSearchAPIWrapper() diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/examples/utils/agent_utils.py" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/examples/utils/agent_utils.py" index af90647..8b03157 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/examples/utils/agent_utils.py" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/examples/utils/agent_utils.py" @@ -46,7 +46,7 @@ def setup_environment() -> Dict[str, str]: def get_llm( - model: str = "gpt-4-turbo-preview", + model: str = "gpt-4o", temperature: float = 0.7, max_tokens: Optional[int] = None, streaming: bool = False, diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/examples/utils/evaluator.py" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/examples/utils/evaluator.py" index 3d21c4d..0ac5513 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/examples/utils/evaluator.py" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/3.Agent/examples/utils/evaluator.py" @@ -20,7 +20,7 @@ class AgentEvaluator: 使用 LLM 評估 Agent 的輸出質量。 """ - def __init__(self, model: str = "gpt-4-turbo-preview"): + def __init__(self, model: str = "gpt-4o"): """ 初始化評估器 diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/4.(RAG) \345\237\272\347\244\216/README.md" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/4.(RAG) \345\237\272\347\244\216/README.md" index 790817e..cb9c874 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/4.(RAG) \345\237\272\347\244\216/README.md" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/4.(RAG) \345\237\272\347\244\216/README.md" @@ -257,9 +257,9 @@ print(f"元數據: {chunks[0].metadata}") ### 4.4.3 使用 Chroma 建立向量資料庫 ```python -from langchain.vectorstores import Chroma -from langchain.embeddings import HuggingFaceEmbeddings -from langchain.document_loaders import TextLoader +from langchain_community.vectorstores import Chroma +from langchain_huggingface import HuggingFaceEmbeddings +from langchain_community.document_loaders import TextLoader from langchain.text_splitter import RecursiveCharacterTextSplitter # 1. 準備數據 @@ -318,9 +318,9 @@ for doc, score in results_with_scores: ### 4.4.4 完整的 RAG 系統實現 ```python -from langchain.vectorstores import Chroma -from langchain.embeddings import HuggingFaceEmbeddings -from langchain.llms import OpenAI +from langchain_community.vectorstores import Chroma +from langchain_huggingface import HuggingFaceEmbeddings +from langchain_openai import OpenAI from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate @@ -378,11 +378,11 @@ for i, doc in enumerate(result['source_documents'], 1): ### 4.4.5 使用 LangChain 的簡化 RAG ```python -from langchain.document_loaders import TextLoader +from langchain_community.document_loaders import TextLoader from langchain.text_splitter import CharacterTextSplitter -from langchain.vectorstores import FAISS -from langchain.embeddings import HuggingFaceEmbeddings -from langchain.chat_models import ChatOpenAI +from langchain_community.vectorstores import FAISS +from langchain_huggingface import HuggingFaceEmbeddings +from langchain_openai import ChatOpenAI from langchain.chains import ConversationalRetrievalChain from langchain.memory import ConversationBufferMemory diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/5.\351\200\262\351\232\216 RAG \350\210\207\345\244\232\345\205\203\350\263\207\346\226\231\346\252\242\347\264\242/2025_RAG\346\212\200\350\241\223\346\234\200\346\226\260\347\231\274\345\261\225.md" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/5.\351\200\262\351\232\216 RAG \350\210\207\345\244\232\345\205\203\350\263\207\346\226\231\346\252\242\347\264\242/2025_RAG\346\212\200\350\241\223\346\234\200\346\226\260\347\231\274\345\261\225.md" index 9bda96d..fdc2ccc 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/5.\351\200\262\351\232\216 RAG \350\210\207\345\244\232\345\205\203\350\263\207\346\226\231\346\252\242\347\264\242/2025_RAG\346\212\200\350\241\223\346\234\200\346\226\260\347\231\274\345\261\225.md" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/5.\351\200\262\351\232\216 RAG \350\210\207\345\244\232\345\205\203\350\263\207\346\226\231\346\252\242\347\264\242/2025_RAG\346\212\200\350\241\223\346\234\200\346\226\260\347\231\274\345\261\225.md" @@ -165,7 +165,7 @@ index = VectorStoreIndex.from_documents( ) # 多模態查詢 -multimodal_llm = OpenAIMultiModal(model="gpt-4-vision-preview") +multimodal_llm = OpenAIMultiModal(model="gpt-4o") # GPT-4o 支援多模態 query_engine = index.as_query_engine( llm=multimodal_llm, image_similarity_top_k=5 diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/5.\351\200\262\351\232\216 RAG \350\210\207\345\244\232\345\205\203\350\263\207\346\226\231\346\252\242\347\264\242/RAG_2.0_\350\210\207\345\244\232\346\250\241\346\205\213RAG\347\263\273\347\265\261.md" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/5.\351\200\262\351\232\216 RAG \350\210\207\345\244\232\345\205\203\350\263\207\346\226\231\346\252\242\347\264\242/RAG_2.0_\350\210\207\345\244\232\346\250\241\346\205\213RAG\347\263\273\347\265\261.md" index 40994c6..7940e9b 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/5.\351\200\262\351\232\216 RAG \350\210\207\345\244\232\345\205\203\350\263\207\346\226\231\346\252\242\347\264\242/RAG_2.0_\350\210\207\345\244\232\346\250\241\346\205\213RAG\347\263\273\347\265\261.md" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/5.\351\200\262\351\232\216 RAG \350\210\207\345\244\232\345\205\203\350\263\207\346\226\231\346\252\242\347\264\242/RAG_2.0_\350\210\207\345\244\232\346\250\241\346\205\213RAG\347\263\273\347\265\261.md" @@ -349,7 +349,7 @@ class MultimodalRAG: def __init__(self): self.text_embeddings = OpenAIEmbeddings() - self.vision_model = ChatOpenAI(model="gpt-4-vision-preview") + self.vision_model = ChatOpenAI(model="gpt-4o") # GPT-4o 具備視覺能力 def process_document_with_images(self, pdf_path: str): """處理包含圖像的 PDF 文檔""" diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/6.\346\216\250\350\253\226\345\204\252\345\214\226/1.\351\207\217\345\214\226\346\212\200\350\241\223/05_quantization_comparison.py" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/6.\346\216\250\350\253\226\345\204\252\345\214\226/1.\351\207\217\345\214\226\346\212\200\350\241\223/05_quantization_comparison.py" index 714d770..472509a 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/6.\346\216\250\350\253\226\345\204\252\345\214\226/1.\351\207\217\345\214\226\346\212\200\350\241\223/05_quantization_comparison.py" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/6.\346\216\250\350\253\226\345\204\252\345\214\226/1.\351\207\217\345\214\226\346\212\200\350\241\223/05_quantization_comparison.py" @@ -530,7 +530,7 @@ def plot_results(self): # 顯示(如果在互動環境中) try: plt.show() - except: + except Exception: pass diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/6.\346\216\250\350\253\226\345\204\252\345\214\226/2.KV-Cache/01_kv_cache_basic.py" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/6.\346\216\250\350\253\226\345\204\252\345\214\226/2.KV-Cache/01_kv_cache_basic.py" index 891b911..4f42eaf 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/6.\346\216\250\350\253\226\345\204\252\345\214\226/2.KV-Cache/01_kv_cache_basic.py" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/6.\346\216\250\350\253\226\345\204\252\345\214\226/2.KV-Cache/01_kv_cache_basic.py" @@ -342,7 +342,7 @@ def _plot_comparison(self, results: Dict): try: plt.show() - except: + except Exception: pass except Exception as e: diff --git "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/9.\345\257\246\346\210\260/9.2-LLM\350\207\252\345\213\225\345\214\226\345\267\245\344\275\234\346\265\201\347\250\213/examples/sample_code.py" "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/9.\345\257\246\346\210\260/9.2-LLM\350\207\252\345\213\225\345\214\226\345\267\245\344\275\234\346\265\201\347\250\213/examples/sample_code.py" index 8f6774f..64b48dc 100644 --- "a/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/9.\345\257\246\346\210\260/9.2-LLM\350\207\252\345\213\225\345\214\226\345\267\245\344\275\234\346\265\201\347\250\213/examples/sample_code.py" +++ "b/3.LLM\346\207\211\347\224\250\345\267\245\347\250\213/9.\345\257\246\346\210\260/9.2-LLM\350\207\252\345\213\225\345\214\226\345\267\245\344\275\234\346\265\201\347\250\213/examples/sample_code.py" @@ -28,7 +28,7 @@ def process_user_data(data): if item['age'] > 18: result.append(item) return result - except: + except (KeyError, TypeError): return [] diff --git a/README.md b/README.md index d280f73..e3b1b44 100644 --- a/README.md +++ b/README.md @@ -1782,17 +1782,17 @@ APIs 是部署 LLMs 的便捷方式。以下是如何使用一些常見的私有 - 使用所獲取的 API Key,在您的應用程序中集成 API。 - 例如,使用 OpenAI 的 API,可以參考以下 Python 代碼: ```python - import openai + from openai import OpenAI - openai.api_key = 'your-api-key-here' + client = OpenAI(api_key='your-api-key-here') - response = openai.Completion.create( - engine="text-davinci-003", - prompt="Hello, world!", + response = client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Hello, world!"}], max_tokens=50 ) - print(response.choices[0].text.strip()) + print(response.choices[0].message.content) ``` #### 步驟 2:運行開源 LLMs