diff --git a/backend/models/schemas.py b/backend/models/schemas.py index 75a4805..71754b6 100644 --- a/backend/models/schemas.py +++ b/backend/models/schemas.py @@ -17,6 +17,7 @@ class ChatMessage(BaseModel): content: str timestamp: Optional[datetime] = None sources: List[str] = [] + benchmarks: Optional[dict] = None class ChatRequest(BaseModel): diff --git a/backend/requirements.txt b/backend/requirements.txt index d9dcaef..fd58dbd 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -14,3 +14,4 @@ python-dotenv==1.0.1 httpx==0.27.0 pytest==8.3.0 pytest-asyncio==0.24.0 +psutil \ No newline at end of file diff --git a/backend/routes/chat.py b/backend/routes/chat.py index 4fb781b..42e4e87 100644 --- a/backend/routes/chat.py +++ b/backend/routes/chat.py @@ -9,8 +9,14 @@ from models.schemas import ChatRequest, ChatResponse from services import ollama_service, db_service -router = APIRouter() +import time +import psutil + +def _get_memory_usage(): + mem = psutil.virtual_memory() + return round(mem.used / (1024 ** 3), 1), round(mem.total / (1024 ** 3), 1) +router = APIRouter() def _retrieve_context(*args, **kwargs): from services import rag_service as rag_service_module @@ -57,6 +63,9 @@ async def chat_stream(req: ChatRequest): """Streaming chat — returns Server-Sent Events.""" if not await ollama_service.is_ollama_running(): raise HTTPException(503, "Ollama not running. Run: `ollama serve`") + + first_token_time = None + start_time = time.perf_counter() db_service.create_session(req.session_id, model=req.model) history = db_service.get_history(req.session_id) @@ -70,6 +79,8 @@ async def chat_stream(req: ChatRequest): full_reply = [] async def event_stream(): + nonlocal first_token_time + token_count = 0 async for token in ollama_service.chat_stream( message=req.message, model=req.model, @@ -78,12 +89,29 @@ async def event_stream(): language=req.language, temperature=req.temperature, ): + if first_token_time is None: + first_token_time = time.perf_counter() full_reply.append(token) + token_count += 1 yield f"data: {json.dumps({'token': token})}\n\n" + end_time = time.perf_counter() + complete = "".join(full_reply) - db_service.save_message(req.session_id, "assistant", complete, sources) - yield f"data: {json.dumps({'done': True, 'sources': sources})}\n\n" + ttft_ms = round((first_token_time - start_time) * 1000) if first_token_time else 0 + total_duration_ms = round((end_time - start_time) * 1000) + memory_used_gb, memory_total_gb = _get_memory_usage() + + benchmarks = { + "ttft_ms": ttft_ms, + "total_duration_ms": total_duration_ms, + "token_count": token_count, + "memory_used_gb": memory_used_gb, + "memory_total_gb": memory_total_gb, + } + + db_service.save_message(req.session_id, "assistant", complete, sources, benchmarks) + yield f"data: {json.dumps({'done': True, 'sources': sources, 'benchmarks': benchmarks})}\n\n" return StreamingResponse(event_stream(), media_type="text/event-stream") diff --git a/backend/services/db_service.py b/backend/services/db_service.py index 374d2bc..b88ac00 100644 --- a/backend/services/db_service.py +++ b/backend/services/db_service.py @@ -76,6 +76,7 @@ def init_db(): content TEXT NOT NULL, sources TEXT DEFAULT '[]', created_at TEXT DEFAULT (datetime('now')), + benchmarks TEXT DEFAULT '{}', FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE ); @@ -128,7 +129,9 @@ def init_db(): except sqlite3.OperationalError: pass # column already exists - + cols = [row[1] for row in conn.execute("PRAGMA table_info(messages)").fetchall()] + if "benchmarks" not in cols: + conn.execute("ALTER TABLE messages ADD COLUMN benchmarks TEXT DEFAULT '{}'") # ─── Sessions ──────────────────────────────────────────────── def create_session(session_id: str, title: str = "New Chat", model: str = "llama3") -> dict: with get_db() as conn: @@ -173,12 +176,12 @@ def get_all_sessions() -> list[dict]: # ─── Messages ──────────────────────────────────────────────── -def save_message(session_id: str, role: str, content: str, sources: list = None): +def save_message(session_id: str, role: str, content: str, sources: list = None, benchmarks: dict = None): sources = sources or [] with get_db() as conn: conn.execute( - "INSERT INTO messages (session_id, role, content, sources) VALUES (?,?,?,?)", - (session_id, role, content, json.dumps(sources)), + "INSERT INTO messages (session_id, role, content, sources, benchmarks) VALUES (?,?,?,?,?)", + (session_id, role, content, json.dumps(sources), json.dumps(benchmarks)), ) conn.execute( "UPDATE sessions SET updated_at=datetime('now'), message_count=message_count+1 WHERE id=?", @@ -206,7 +209,7 @@ def get_history(session_id: str, limit: int = 20) -> list[dict]: def get_messages_full(session_id: str) -> list[dict]: with get_db() as conn: rows = conn.execute( - "SELECT role, content, sources, created_at FROM messages WHERE session_id=? ORDER BY created_at ASC", + "SELECT role, content, sources, created_at, benchmarks FROM messages WHERE session_id=? ORDER BY created_at ASC", (session_id,), ).fetchall() return [ @@ -215,6 +218,7 @@ def get_messages_full(session_id: str) -> list[dict]: "content": r["content"], "sources": json.loads(r["sources"] or "[]"), "created_at": r["created_at"], + "benchmarks": json.loads(r["benchmarks"] or {}) } for r in rows ] diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index 7e99049..b6e8233 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -82,8 +82,8 @@ export default function App() { await api.streamMessage( { message: text, session_id: activeSid, model, use_documents: documents.length > 0, language }, (token) => setMessages(prev => prev.map(m => m.id === aiMsg.id ? { ...m, content: m.content + token } : m)), - (sources) => { - setMessages(prev => prev.map(m => m.id === aiMsg.id ? { ...m, sources, streaming: false } : m)); + (sources, benchmarks) => { + setMessages(prev => prev.map(m => m.id === aiMsg.id ? { ...m, sources, benchmarks, streaming: false } : m)); refreshSessions(); } ); diff --git a/frontend/src/components/ChatWindow.jsx b/frontend/src/components/ChatWindow.jsx index 90d32ab..5cab23f 100644 --- a/frontend/src/components/ChatWindow.jsx +++ b/frontend/src/components/ChatWindow.jsx @@ -1,18 +1,84 @@ import { useState, useRef, useEffect } from "react"; import { exportSession } from "../utils/api"; -import { AppLogoIcon, FileIcon, LockIcon } from "./Icons"; +import { AppLogoIcon, ChartIcon, CloseIcon, CopyIcon, FileIcon, LockIcon, PlusCircleIcon, TemplateIcon } from "./Icons"; +import CodeBlockWithCopy from "./CodeBlockWithCopy"; +import PromptTemplateDialog from "./PromptTemplateDialog"; export default function ChatWindow({ messages, loading, onSend, sessionId }) { const [input, setInput] = useState(""); + const [showPlusMenu, setShowPlusMenu] = useState(false); + const [showTemplateDialog, setShowTemplateDialog] = useState(false); + const [selectedTemplate, setSelectedTemplate] = useState(null); const bottomRef = useRef(null); const textareaRef = useRef(null); + const plusMenuRef = useRef(null); + + // NEW: state for selected messages and export format + const [selectedMessages, setSelectedMessages] = useState([]); + const [exportFormat, setExportFormat] = useState("markdown"); + const [copiedMsgId, setCopiedMsgId] = useState(null); + const [hoveredStatsId, setHoveredStatsId] = useState(null); useEffect(() => { bottomRef.current?.scrollIntoView({ behavior: "smooth" }); }, [messages]); + // Close plus menu on outside click + useEffect(() => { + function handleClickOutside(e) { + if (plusMenuRef.current && !plusMenuRef.current.contains(e.target)) { + setShowPlusMenu(false); + } + } + if (showPlusMenu) document.addEventListener("mousedown", handleClickOutside); + return () => document.removeEventListener("mousedown", handleClickOutside); + }, [showPlusMenu]); + + function copyToClipboard(msgId, content) { + navigator.clipboard.writeText(content); + setCopiedMsgId(msgId); + setTimeout(() => setCopiedMsgId(null), 2000); + } + + function handleSelectTemplate(template) { + setSelectedTemplate(template); + setShowTemplateDialog(false); + setShowPlusMenu(false); + setTimeout(() => textareaRef.current?.focus(), 0); + } + + // Parse code blocks for copy button + function parseMessageWithCodeBlocks(content) { + if (!content) return [{ type: "text", content: "" }]; + const parts = []; + const regex = /```(\w*)\n([\s\S]*?)```/g; + let lastIndex = 0; + let match; + while ((match = regex.exec(content)) !== null) { + if (match.index > lastIndex) { + parts.push({ type: "text", content: content.slice(lastIndex, match.index) }); + } + parts.push({ + type: "code", + language: match[1] || "text", + code: match[2].trim() + }); + lastIndex = match.index + match[0].length; + } + if (lastIndex < content.length) { + parts.push({ type: "text", content: content.slice(lastIndex) }); + } + if (parts.length === 0) { + parts.push({ type: "text", content }); + } + return parts; + } function send() { - if (!input.trim() || loading) return; - onSend(input.trim()); + if ((!input.trim() && !selectedTemplate) || loading) return; + const message = selectedTemplate + ? `${selectedTemplate.prompt}\n\n${input.trim()}`.trim() + : input.trim(); + onSend(message); setInput(""); + setSelectedTemplate(null); if (textareaRef.current) { textareaRef.current.style.height = "auto"; } } @@ -100,6 +166,74 @@ export default function ChatWindow({ messages, loading, onSend, sessionId }) { You )} + {msg.role === "assistant" && !msg.streaming && ( +
+ {/* Copy button */} + + + {/* Stats hover button */} +
setHoveredStatsId(msg.id)} + onMouseLeave={() => setHoveredStatsId(null)} + > + + + {hoveredStatsId === msg.id && msg.benchmarks && Object.keys(msg.benchmarks).length > 0 && ( +
+
+

Performance

+
+
+ Time to first token + {(msg.benchmarks.ttft_ms / 1000).toFixed(2)}s +
+
+ Total duration + {(msg.benchmarks.total_duration_ms / 1000).toFixed(2)}s +
+
+ Tokens generated + {msg.benchmarks.token_count} +
+ {msg.benchmarks.memory_used_gb && ( +
+
+ RAM usage + + {msg.benchmarks.memory_used_gb} / {msg.benchmarks.memory_total_gb} GB + + i + + Total system memory in use across all processes, not just the LLM. + + + +
+
+ )} +
+
+
+ )} +
+
+ )} ))} @@ -123,21 +257,70 @@ export default function ChatWindow({ messages, loading, onSend, sessionId }) {
- {/* Input */} + {/* Prompt Template Dialog */} + {showTemplateDialog && ( + { setShowTemplateDialog(false); setShowPlusMenu(false); }} + /> + )} + + {/* Input Form Footer */}
-