From 1e6f96f0a1b173bf6c47b0e9c5f1c4e4396fa3eb Mon Sep 17 00:00:00 2001 From: SagarSawlani Date: Thu, 11 Jun 2026 04:42:05 +0530 Subject: [PATCH 1/2] feat: add streamed response performance benchmarks --- backend/models/schemas.py | 1 + backend/requirements.txt | 1 + backend/routes/chat.py | 34 +++++- backend/services/db_service.py | 14 ++- frontend/src/App.jsx | 4 +- frontend/src/components/ChatWindow.jsx | 151 ++++++++++++++++++++++--- frontend/src/components/Icons.jsx | 17 +++ frontend/src/utils/api.js | 2 +- 8 files changed, 195 insertions(+), 29 deletions(-) diff --git a/backend/models/schemas.py b/backend/models/schemas.py index 75a4805..71754b6 100644 --- a/backend/models/schemas.py +++ b/backend/models/schemas.py @@ -17,6 +17,7 @@ class ChatMessage(BaseModel): content: str timestamp: Optional[datetime] = None sources: List[str] = [] + benchmarks: Optional[dict] = None class ChatRequest(BaseModel): diff --git a/backend/requirements.txt b/backend/requirements.txt index d9dcaef..fd58dbd 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -14,3 +14,4 @@ python-dotenv==1.0.1 httpx==0.27.0 pytest==8.3.0 pytest-asyncio==0.24.0 +psutil \ No newline at end of file diff --git a/backend/routes/chat.py b/backend/routes/chat.py index 4fb781b..37db39c 100644 --- a/backend/routes/chat.py +++ b/backend/routes/chat.py @@ -9,8 +9,14 @@ from models.schemas import ChatRequest, ChatResponse from services import ollama_service, db_service -router = APIRouter() +import time +import psutil + +def _get_memory_usage(): + mem = psutil.virtual_memory() + return round(mem.used / (1024 ** 3), 1), round(mem.total / (1024 ** 3), 1) +router = APIRouter() def _retrieve_context(*args, **kwargs): from services import rag_service as rag_service_module @@ -57,6 +63,9 @@ async def chat_stream(req: ChatRequest): """Streaming chat — returns Server-Sent Events.""" if not await ollama_service.is_ollama_running(): raise HTTPException(503, "Ollama not running. Run: `ollama serve`") + + first_token_time = None + start_time = time.perf_counter() db_service.create_session(req.session_id, model=req.model) history = db_service.get_history(req.session_id) @@ -70,6 +79,8 @@ async def chat_stream(req: ChatRequest): full_reply = [] async def event_stream(): + nonlocal first_token_time + token_count = 0 async for token in ollama_service.chat_stream( message=req.message, model=req.model, @@ -78,12 +89,29 @@ async def event_stream(): language=req.language, temperature=req.temperature, ): + if first_token_time == None: + first_token_time = time.perf_counter() full_reply.append(token) + token_count += 1 yield f"data: {json.dumps({'token': token})}\n\n" + end_time = time.perf_counter() + complete = "".join(full_reply) - db_service.save_message(req.session_id, "assistant", complete, sources) - yield f"data: {json.dumps({'done': True, 'sources': sources})}\n\n" + ttft_ms = round((first_token_time - start_time) * 1000) if first_token_time else 0 + total_duration_ms = round((end_time - start_time) * 1000) + memory_used_gb, memory_total_gb = _get_memory_usage() + + benchmarks = { + "ttft_ms": ttft_ms, + "total_duration_ms": total_duration_ms, + "token_count": token_count, + "memory_used_gb": memory_used_gb, + "memory_total_gb": memory_total_gb, + } + + db_service.save_message(req.session_id, "assistant", complete, sources, benchmarks) + yield f"data: {json.dumps({'done': True, 'sources': sources, 'benchmarks': benchmarks})}\n\n" return StreamingResponse(event_stream(), media_type="text/event-stream") diff --git a/backend/services/db_service.py b/backend/services/db_service.py index dd33814..39aae8b 100644 --- a/backend/services/db_service.py +++ b/backend/services/db_service.py @@ -76,6 +76,7 @@ def init_db(): content TEXT NOT NULL, sources TEXT DEFAULT '[]', created_at TEXT DEFAULT (datetime('now')), + benchmarks TEXT DEFAULT '{}', FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE ); @@ -123,7 +124,9 @@ def init_db(): """) - + cols = [row[1] for row in conn.execute("PRAGMA table_info(messages)").fetchall()] + if "benchmarks" not in cols: + conn.execute("ALTER TABLE messages ADD COLUMN benchmarks TEXT DEFAULT '{}'") # ─── Sessions ──────────────────────────────────────────────── def create_session(session_id: str, title: str = "New Chat", model: str = "llama3") -> dict: with get_db() as conn: @@ -162,12 +165,12 @@ def get_all_sessions() -> list[dict]: # ─── Messages ──────────────────────────────────────────────── -def save_message(session_id: str, role: str, content: str, sources: list = None): +def save_message(session_id: str, role: str, content: str, sources: list = None, benchmarks: dict = None): sources = sources or [] with get_db() as conn: conn.execute( - "INSERT INTO messages (session_id, role, content, sources) VALUES (?,?,?,?)", - (session_id, role, content, json.dumps(sources)), + "INSERT INTO messages (session_id, role, content, sources, benchmarks) VALUES (?,?,?,?,?)", + (session_id, role, content, json.dumps(sources), json.dumps(benchmarks)), ) conn.execute( "UPDATE sessions SET updated_at=datetime('now'), message_count=message_count+1 WHERE id=?", @@ -195,7 +198,7 @@ def get_history(session_id: str, limit: int = 20) -> list[dict]: def get_messages_full(session_id: str) -> list[dict]: with get_db() as conn: rows = conn.execute( - "SELECT role, content, sources, created_at FROM messages WHERE session_id=? ORDER BY created_at ASC", + "SELECT role, content, sources, created_at, benchmarks FROM messages WHERE session_id=? ORDER BY created_at ASC", (session_id,), ).fetchall() return [ @@ -204,6 +207,7 @@ def get_messages_full(session_id: str) -> list[dict]: "content": r["content"], "sources": json.loads(r["sources"] or "[]"), "created_at": r["created_at"], + "benchmarks": json.loads(r["benchmarks"] or {}) } for r in rows ] diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index 73529f0..2d55836 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -77,8 +77,8 @@ export default function App() { await api.streamMessage( { message: text, session_id: sessionId, model, use_documents: documents.length > 0, language }, (token) => setMessages(prev => prev.map(m => m.id === aiMsg.id ? { ...m, content: m.content + token } : m)), - (sources) => { - setMessages(prev => prev.map(m => m.id === aiMsg.id ? { ...m, sources, streaming: false } : m)); + (sources, benchmarks) => { + setMessages(prev => prev.map(m => m.id === aiMsg.id ? { ...m, sources, benchmarks, streaming: false } : m)); refreshSessions(); } ); diff --git a/frontend/src/components/ChatWindow.jsx b/frontend/src/components/ChatWindow.jsx index 2faadb0..25aff2d 100644 --- a/frontend/src/components/ChatWindow.jsx +++ b/frontend/src/components/ChatWindow.jsx @@ -1,17 +1,23 @@ import { useState, useRef, useEffect } from "react"; import { exportSession } from "../utils/api"; -import { AppLogoIcon, CloseIcon, FileIcon, LockIcon, PlusCircleIcon, TemplateIcon } from "./Icons"; +import { AppLogoIcon, ChartIcon, CloseIcon, CopyIcon, FileIcon, LockIcon, PlusCircleIcon, TemplateIcon } from "./Icons"; import CodeBlockWithCopy from "./CodeBlockWithCopy"; import PromptTemplateDialog from "./PromptTemplateDialog"; export default function ChatWindow({ messages, loading, onSend, sessionId }) { const [input, setInput] = useState(""); + const [showPlusMenu, setShowPlusMenu] = useState(false); + const [showTemplateDialog, setShowTemplateDialog] = useState(false); + const [selectedTemplate, setSelectedTemplate] = useState(null); const bottomRef = useRef(null); const textareaRef = useRef(null); + const plusMenuRef = useRef(null); // NEW: state for selected messages and export format const [selectedMessages, setSelectedMessages] = useState([]); const [exportFormat, setExportFormat] = useState("markdown"); + const [copiedMsgId, setCopiedMsgId] = useState(null); + const [hoveredStatsId, setHoveredStatsId] = useState(null); useEffect(() => { bottomRef.current?.scrollIntoView({ behavior: "smooth" }); }, [messages]); @@ -26,6 +32,12 @@ export default function ChatWindow({ messages, loading, onSend, sessionId }) { return () => document.removeEventListener("mousedown", handleClickOutside); }, [showPlusMenu]); + function copyToClipboard(msgId, content) { + navigator.clipboard.writeText(content); + setCopiedMsgId(msgId); + setTimeout(() => setCopiedMsgId(null), 2000); + } + function handleSelectTemplate(template) { setSelectedTemplate(template); setShowTemplateDialog(false); @@ -279,15 +291,72 @@ export default function ChatWindow({ messages, loading, onSend, sessionId }) { )} - {msg.role === "assistant" && ( -
+ {msg.role === "assistant" && !msg.streaming && ( +
+ {/* Copy button */} + + {/* Stats hover button */} +
setHoveredStatsId(msg.id)} + onMouseLeave={() => setHoveredStatsId(null)} + > + + + {hoveredStatsId === msg.id && msg.benchmarks && Object.keys(msg.benchmarks).length > 0 && ( +
+
+

Performance

+
+
+ Time to first token + {(msg.benchmarks.ttft_ms / 1000).toFixed(2)}s +
+
+ Total duration + {(msg.benchmarks.total_duration_ms / 1000).toFixed(2)}s +
+
+ Tokens generated + {msg.benchmarks.token_count} +
+ {msg.benchmarks.memory_used_gb && ( +
+
+ RAM usage + + {msg.benchmarks.memory_used_gb} / {msg.benchmarks.memory_total_gb} GB + + i + + Total system memory in use across all processes, not just the LLM. + + + +
+
+ )} +
+
+
+ )} +
)}
@@ -316,22 +385,68 @@ export default function ChatWindow({ messages, loading, onSend, sessionId }) {
+ {/* Prompt Template Dialog */} + {showTemplateDialog && ( + { setShowTemplateDialog(false); setShowPlusMenu(false); }} + /> + )} + {/* Input Form Footer */}
-