From 1e6f96f0a1b173bf6c47b0e9c5f1c4e4396fa3eb Mon Sep 17 00:00:00 2001
From: SagarSawlani <sawlanisagar1@gmail.com>
Date: Thu, 11 Jun 2026 04:42:05 +0530
Subject: [PATCH 1/2] feat: add streamed response performance benchmarks

---
 backend/models/schemas.py              |   1 +
 backend/requirements.txt               |   1 +
 backend/routes/chat.py                 |  34 +++++-
 backend/services/db_service.py         |  14 ++-
 frontend/src/App.jsx                   |   4 +-
 frontend/src/components/ChatWindow.jsx | 151 ++++++++++++++++++++++---
 frontend/src/components/Icons.jsx      |  17 +++
 frontend/src/utils/api.js              |   2 +-
 8 files changed, 195 insertions(+), 29 deletions(-)

diff --git a/backend/models/schemas.py b/backend/models/schemas.py
index 75a4805..71754b6 100644
--- a/backend/models/schemas.py
+++ b/backend/models/schemas.py
@@ -17,6 +17,7 @@ class ChatMessage(BaseModel):
     content: str
     timestamp: Optional[datetime] = None
     sources: List[str] = []
+    benchmarks: Optional[dict] = None
 
 
 class ChatRequest(BaseModel):
diff --git a/backend/requirements.txt b/backend/requirements.txt
index d9dcaef..fd58dbd 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -14,3 +14,4 @@ python-dotenv==1.0.1
 httpx==0.27.0
 pytest==8.3.0
 pytest-asyncio==0.24.0
+psutil
\ No newline at end of file
diff --git a/backend/routes/chat.py b/backend/routes/chat.py
index 4fb781b..37db39c 100644
--- a/backend/routes/chat.py
+++ b/backend/routes/chat.py
@@ -9,8 +9,14 @@
 from models.schemas import ChatRequest, ChatResponse
 from services import ollama_service, db_service
 
-router = APIRouter()
+import time 
+import psutil
+
+def _get_memory_usage():
+    mem = psutil.virtual_memory()
+    return round(mem.used / (1024 ** 3), 1), round(mem.total / (1024 ** 3), 1)
 
+router = APIRouter()
 
 def _retrieve_context(*args, **kwargs):
     from services import rag_service as rag_service_module
@@ -57,6 +63,9 @@ async def chat_stream(req: ChatRequest):
     """Streaming chat — returns Server-Sent Events."""
     if not await ollama_service.is_ollama_running():
         raise HTTPException(503, "Ollama not running. Run: `ollama serve`")
+    
+    first_token_time = None 
+    start_time = time.perf_counter()
 
     db_service.create_session(req.session_id, model=req.model)
     history = db_service.get_history(req.session_id)
@@ -70,6 +79,8 @@ async def chat_stream(req: ChatRequest):
     full_reply = []
 
     async def event_stream():
+        nonlocal first_token_time
+        token_count = 0
         async for token in ollama_service.chat_stream(
             message=req.message,
             model=req.model,
@@ -78,12 +89,29 @@ async def event_stream():
             language=req.language,
             temperature=req.temperature,
         ):
+            if first_token_time == None:
+                first_token_time = time.perf_counter()
             full_reply.append(token)
+            token_count += 1
             yield f"data: {json.dumps({'token': token})}\n\n"
 
+        end_time = time.perf_counter()
+
         complete = "".join(full_reply)
-        db_service.save_message(req.session_id, "assistant", complete, sources)
-        yield f"data: {json.dumps({'done': True, 'sources': sources})}\n\n"
+        ttft_ms = round((first_token_time - start_time) * 1000) if first_token_time else 0
+        total_duration_ms = round((end_time - start_time) * 1000)
+        memory_used_gb, memory_total_gb = _get_memory_usage()
+
+        benchmarks = {
+            "ttft_ms": ttft_ms,
+            "total_duration_ms": total_duration_ms,
+            "token_count": token_count,
+            "memory_used_gb": memory_used_gb,
+            "memory_total_gb": memory_total_gb,
+        }
+
+        db_service.save_message(req.session_id, "assistant", complete, sources, benchmarks)
+        yield f"data: {json.dumps({'done': True, 'sources': sources, 'benchmarks': benchmarks})}\n\n"
         
 
     return StreamingResponse(event_stream(), media_type="text/event-stream")
diff --git a/backend/services/db_service.py b/backend/services/db_service.py
index dd33814..39aae8b 100644
--- a/backend/services/db_service.py
+++ b/backend/services/db_service.py
@@ -76,6 +76,7 @@ def init_db():
                 content TEXT NOT NULL,
                 sources TEXT DEFAULT '[]',
                 created_at TEXT DEFAULT (datetime('now')),
+                benchmarks TEXT DEFAULT '{}',
                 FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE CASCADE
             );
 
@@ -123,7 +124,9 @@ def init_db():
 
         """)
 
-
+        cols = [row[1] for row in conn.execute("PRAGMA table_info(messages)").fetchall()]
+        if "benchmarks" not in cols:
+            conn.execute("ALTER TABLE messages ADD COLUMN benchmarks TEXT DEFAULT '{}'")
 # ─── Sessions ────────────────────────────────────────────────
 def create_session(session_id: str, title: str = "New Chat", model: str = "llama3") -> dict:
     with get_db() as conn:
@@ -162,12 +165,12 @@ def get_all_sessions() -> list[dict]:
 
 
 # ─── Messages ────────────────────────────────────────────────
-def save_message(session_id: str, role: str, content: str, sources: list = None):
+def save_message(session_id: str, role: str, content: str, sources: list = None, benchmarks: dict = None):
     sources = sources or []
     with get_db() as conn:
         conn.execute(
-            "INSERT INTO messages (session_id, role, content, sources) VALUES (?,?,?,?)",
-            (session_id, role, content, json.dumps(sources)),
+            "INSERT INTO messages (session_id, role, content, sources, benchmarks) VALUES (?,?,?,?,?)",
+            (session_id, role, content, json.dumps(sources), json.dumps(benchmarks)),
         )
         conn.execute(
             "UPDATE sessions SET updated_at=datetime('now'), message_count=message_count+1 WHERE id=?",
@@ -195,7 +198,7 @@ def get_history(session_id: str, limit: int = 20) -> list[dict]:
 def get_messages_full(session_id: str) -> list[dict]:
     with get_db() as conn:
         rows = conn.execute(
-            "SELECT role, content, sources, created_at FROM messages WHERE session_id=? ORDER BY created_at ASC",
+            "SELECT role, content, sources, created_at, benchmarks FROM messages WHERE session_id=? ORDER BY created_at ASC",
             (session_id,),
         ).fetchall()
         return [
@@ -204,6 +207,7 @@ def get_messages_full(session_id: str) -> list[dict]:
                 "content": r["content"],
                 "sources": json.loads(r["sources"] or "[]"),
                 "created_at": r["created_at"],
+                "benchmarks": json.loads(r["benchmarks"] or {})
             }
             for r in rows
         ]
diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx
index 73529f0..2d55836 100644
--- a/frontend/src/App.jsx
+++ b/frontend/src/App.jsx
@@ -77,8 +77,8 @@ export default function App() {
         await api.streamMessage(
           { message: text, session_id: sessionId, model, use_documents: documents.length > 0, language },
           (token) => setMessages(prev => prev.map(m => m.id === aiMsg.id ? { ...m, content: m.content + token } : m)),
-          (sources) => {
-            setMessages(prev => prev.map(m => m.id === aiMsg.id ? { ...m, sources, streaming: false } : m));
+          (sources, benchmarks) => {
+            setMessages(prev => prev.map(m => m.id === aiMsg.id ? { ...m, sources, benchmarks, streaming: false } : m));
             refreshSessions();
           }
         );
diff --git a/frontend/src/components/ChatWindow.jsx b/frontend/src/components/ChatWindow.jsx
index 2faadb0..25aff2d 100644
--- a/frontend/src/components/ChatWindow.jsx
+++ b/frontend/src/components/ChatWindow.jsx
@@ -1,17 +1,23 @@
 import { useState, useRef, useEffect } from "react";
 import { exportSession } from "../utils/api";
-import { AppLogoIcon, CloseIcon, FileIcon, LockIcon, PlusCircleIcon, TemplateIcon } from "./Icons";
+import { AppLogoIcon, ChartIcon, CloseIcon, CopyIcon, FileIcon, LockIcon, PlusCircleIcon, TemplateIcon } from "./Icons";
 import CodeBlockWithCopy from "./CodeBlockWithCopy";
 import PromptTemplateDialog from "./PromptTemplateDialog";
 
 export default function ChatWindow({ messages, loading, onSend, sessionId }) {
   const [input, setInput] = useState("");
+  const [showPlusMenu, setShowPlusMenu] = useState(false);
+  const [showTemplateDialog, setShowTemplateDialog] = useState(false);
+  const [selectedTemplate, setSelectedTemplate] = useState(null);
   const bottomRef = useRef(null);
   const textareaRef = useRef(null);
+  const plusMenuRef = useRef(null);
 
   // NEW: state for selected messages and export format
   const [selectedMessages, setSelectedMessages] = useState([]);
   const [exportFormat, setExportFormat] = useState("markdown");
+  const [copiedMsgId, setCopiedMsgId] = useState(null);
+  const [hoveredStatsId, setHoveredStatsId] = useState(null);
 
   useEffect(() => { bottomRef.current?.scrollIntoView({ behavior: "smooth" }); }, [messages]);
 
@@ -26,6 +32,12 @@ export default function ChatWindow({ messages, loading, onSend, sessionId }) {
     return () => document.removeEventListener("mousedown", handleClickOutside);
   }, [showPlusMenu]);
 
+  function copyToClipboard(msgId, content) {
+    navigator.clipboard.writeText(content);
+    setCopiedMsgId(msgId);
+    setTimeout(() => setCopiedMsgId(null), 2000);
+  }
+
   function handleSelectTemplate(template) {
     setSelectedTemplate(template);
     setShowTemplateDialog(false);
@@ -279,15 +291,72 @@ export default function ChatWindow({ messages, loading, onSend, sessionId }) {
                   </button>
                 </div>
               )}
-              {msg.role === "assistant" && (
-                <div className="flex justify-end mt-1 mr-1">
+              {msg.role === "assistant" && !msg.streaming && (
+                <div className="flex justify-end mt-1.5 mr-1 items-center gap-1">
+                  {/* Copy button */}
                   <button
-                    onClick={() => exportSingleMessage(msg.id)}
-                    className="text-xs text-gray-500 hover:text-purple-400 transition"
-                    title="Export this message"
+                    onClick={() => copyToClipboard(msg.id, msg.content)}
+                    className="p-1 rounded hover:bg-gray-800 text-gray-500 hover:text-gray-300 transition"
+                    title="Copy response"
                   >
-                    ↓
+                    {copiedMsgId === msg.id ? (
+                      <svg className="w-4 h-4 text-green-400" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"><path d="M20 6 9 17l-5-5" /></svg>
+                    ) : (
+                      <CopyIcon className="w-4 h-4" />
+                    )}
                   </button>
+
+                  {/* Stats hover button */}
+                  <div
+                    className="relative"
+                    onMouseEnter={() => setHoveredStatsId(msg.id)}
+                    onMouseLeave={() => setHoveredStatsId(null)}
+                  >
+                    <button
+                      className="p-1 rounded hover:bg-gray-800 text-gray-500 hover:text-gray-300 transition"
+                      title="Performance stats"
+                    >
+                      <ChartIcon className="w-4 h-4" />
+                    </button>
+
+                    {hoveredStatsId === msg.id && msg.benchmarks && Object.keys(msg.benchmarks).length > 0 && (
+                      <div className="absolute right-0 bottom-0 translate-x-full pl-2 z-50">
+                        <div className="bg-gray-900 border border-gray-700 rounded-lg p-3 shadow-xl min-w-[220px]">
+                        <p className="text-xs font-semibold text-gray-300 mb-2">Performance</p>
+                        <div className="space-y-1.5 text-xs text-gray-400">
+                          <div className="flex justify-between">
+                            <span>Time to first token</span>
+                            <span className="text-gray-300">{(msg.benchmarks.ttft_ms / 1000).toFixed(2)}s</span>
+                          </div>
+                          <div className="flex justify-between">
+                            <span>Total duration</span>
+                            <span className="text-gray-300">{(msg.benchmarks.total_duration_ms / 1000).toFixed(2)}s</span>
+                          </div>
+                          <div className="flex justify-between">
+                            <span>Tokens generated</span>
+                            <span className="text-gray-300">{msg.benchmarks.token_count}</span>
+                          </div>
+                          {msg.benchmarks.memory_used_gb && (
+                            <div>
+                              <div className="flex justify-between items-center">
+                                <span>RAM usage</span>
+                                <span className="inline-flex items-center gap-1 text-gray-300">
+                                  {msg.benchmarks.memory_used_gb} / {msg.benchmarks.memory_total_gb} GB
+                                  <span className="group relative">
+                                    <span className="inline-flex items-center justify-center w-3.5 h-3.5 rounded-full border border-gray-600 text-gray-500 text-[9px] font-bold cursor-help leading-none">i</span>
+                                    <span className="hidden group-hover:block absolute right-0 top-full mt-1 bg-gray-800 border border-gray-600 rounded-md px-2 py-1.5 text-[10px] text-gray-400 w-[180px] leading-tight z-50 shadow-lg">
+                                      Total system memory in use across all processes, not just the LLM.
+                                    </span>
+                                  </span>
+                                </span>
+                              </div>
+                            </div>
+                          )}
+                        </div>
+                        </div>
+                      </div>
+                    )}
+                  </div>
                 </div>
               )}
             </div>
@@ -316,22 +385,68 @@ export default function ChatWindow({ messages, loading, onSend, sessionId }) {
         <div ref={bottomRef} />
       </div>
 
+      {/* Prompt Template Dialog */}
+      {showTemplateDialog && (
+        <PromptTemplateDialog
+          onSelect={handleSelectTemplate}
+          onClose={() => { setShowTemplateDialog(false); setShowPlusMenu(false); }}
+        />
+      )}
+
       {/* Input Form Footer */}
       <div className="px-4 pb-4 pt-2 shrink-0">
         <div className="flex items-end gap-2 bg-gray-900 border border-gray-700 rounded-2xl px-4 py-3 focus-within:border-purple-500 transition-colors">
-          <textarea
-            ref={textareaRef}
-            value={input}
-            onChange={(e) => { setInput(e.target.value); autoResize(e); }}
-            onKeyDown={handleKey}
-            placeholder="Ask anything... (Enter to send, Shift+Enter for new line)"
-            rows={1}
-            className="flex-1 bg-transparent text-sm text-gray-100 placeholder-gray-500 resize-none outline-none"
-            style={{ minHeight: "24px", maxHeight: "160px" }}
-          />
+          {/* Plus button for prompt templates */}
+          <div className="relative shrink-0" ref={plusMenuRef}>
+            <button
+              onClick={() => setShowPlusMenu(p => !p)}
+              className="p-1 text-gray-500 hover:text-purple-400 transition"
+              title="Insert prompt template"
+            >
+              <PlusCircleIcon className="w-5 h-5" />
+            </button>
+            {showPlusMenu && (
+              <div className="absolute bottom-full mb-2 left-0 bg-gray-800 border border-gray-700 rounded-lg shadow-xl py-1 min-w-[180px] z-50">
+                <button
+                  onClick={() => { setShowTemplateDialog(true); }}
+                  className="w-full text-left px-3 py-2 text-sm text-gray-300 hover:bg-gray-700 hover:text-purple-300 transition flex items-center gap-2"
+                >
+                  <TemplateIcon className="w-4 h-4" />
+                  Use Prompt Template
+                </button>
+              </div>
+            )}
+          </div>
+
+          {/* Selected template chip */}
+          <div className="flex-1 flex flex-col gap-1">
+            {selectedTemplate && (
+              <div className="flex items-center gap-1.5 bg-gray-800 rounded-lg px-2.5 py-1 w-fit">
+                <TemplateIcon className="w-3.5 h-3.5 text-purple-400" />
+                <span className="text-xs text-gray-300">{selectedTemplate.prompt_title}</span>
+                <button
+                  onClick={() => setSelectedTemplate(null)}
+                  className="text-gray-500 hover:text-gray-300 transition"
+                >
+                  <CloseIcon className="w-3 h-3" />
+                </button>
+              </div>
+            )}
+            <textarea
+              ref={textareaRef}
+              value={input}
+              onChange={(e) => { setInput(e.target.value); autoResize(e); }}
+              onKeyDown={handleKey}
+              placeholder="Ask anything... (Enter to send, Shift+Enter for new line)"
+              rows={1}
+              className="bg-transparent text-sm text-gray-100 placeholder-gray-500 resize-none outline-none w-full"
+              style={{ minHeight: "24px", maxHeight: "160px" }}
+            />
+          </div>
+
           <button 
             onClick={send} 
-            disabled={!input.trim() || loading}
+            disabled={(!input.trim() && !selectedTemplate) || loading}
             className="shrink-0 text-sm bg-purple-600 hover:bg-purple-500 disabled:opacity-40 disabled:cursor-not-allowed text-white px-4 py-2 rounded-xl transition font-medium"
           >
             Send →
diff --git a/frontend/src/components/Icons.jsx b/frontend/src/components/Icons.jsx
index 18b6172..248e9cd 100644
--- a/frontend/src/components/Icons.jsx
+++ b/frontend/src/components/Icons.jsx
@@ -300,3 +300,20 @@ export function TemplateIcon(props) {
     </Icon>
   );
 }
+
+export function CopyIcon(props) {
+  return (
+    <Icon {...props}>
+      <rect x="9" y="9" width="13" height="13" rx="2" ry="2" />
+      <path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1" />
+    </Icon>
+  );
+}
+
+export function ChartIcon(props) {
+  return (
+    <Icon {...props}>
+      <polyline points="22 12 18 12 15 21 9 3 6 12 2 12" />
+    </Icon>
+  );
+}
diff --git a/frontend/src/utils/api.js b/frontend/src/utils/api.js
index 53ba1b9..eacdb67 100644
--- a/frontend/src/utils/api.js
+++ b/frontend/src/utils/api.js
@@ -54,7 +54,7 @@ export function streamMessage(body, onToken, onDone) {
         if (done) return;
         decoder.decode(value).split("\n").forEach(line => {
           if (line.startsWith("data: ")) {
-            try { const d = JSON.parse(line.slice(6)); if (d.token) onToken(d.token); if (d.done) onDone(d.sources||[]); } catch {}
+            try { const d = JSON.parse(line.slice(6)); if (d.token) onToken(d.token); if (d.done) onDone(d.sources||[], d.benchmarks||null); } catch {}
           }
         });
         return pump();

From bf60b3a4ce991d21b5ea26313b4810c1212751b4 Mon Sep 17 00:00:00 2001
From: SagarSawlani <sawlanisagar1@gmail.com>
Date: Thu, 11 Jun 2026 20:32:34 +0530
Subject: [PATCH 2/2] Fix Ruff lint error

---
 backend/routes/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/routes/chat.py b/backend/routes/chat.py
index 37db39c..42e4e87 100644
--- a/backend/routes/chat.py
+++ b/backend/routes/chat.py
@@ -89,7 +89,7 @@ async def event_stream():
             language=req.language,
             temperature=req.temperature,
         ):
-            if first_token_time == None:
+            if first_token_time is None:
                 first_token_time = time.perf_counter()
             full_reply.append(token)
             token_count += 1