From 3c0df1ba80b0a555921e4d78996835b9c80ced31 Mon Sep 17 00:00:00 2001
From: Praneeth Perumalla <praneethperumalla27@gmail.com>
Date: Sat, 6 Jun 2026 21:35:21 +0530
Subject: [PATCH 1/2] feat: add inline document citation previews in answers

---
 backend/models/schemas.py              |  40 +++-
 backend/services/citation_utils.py     |  43 ++++
 backend/services/rag_service.py        |   9 +-
 backend/tests/test_citations.py        | 268 +++++++++++++++++++++++++
 frontend/src/components/ChatWindow.jsx |  65 ++++--
 5 files changed, 408 insertions(+), 17 deletions(-)
 create mode 100644 backend/services/citation_utils.py
 create mode 100644 backend/tests/test_citations.py

diff --git a/backend/models/schemas.py b/backend/models/schemas.py
index 0dff3e4..b8710a0 100644
--- a/backend/models/schemas.py
+++ b/backend/models/schemas.py
@@ -1,11 +1,24 @@
 """Pydantic v2 schemas for LocalMind API."""
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator
 from typing import Optional, List
 from datetime import datetime
 from enum import Enum
 
 
+class SourceChunk(BaseModel):
+    """A single retrieved document chunk attached to an assistant message."""
+
+    source: str
+    """Original filename (e.g. 'report.pdf')."""
+
+    chunk: int = 0
+    """Zero-based chunk index within the document."""
+
+    preview: str = ""
+    """Up to 300 characters of the retrieved chunk text for inline preview."""
+
+
 class MessageRole(str, Enum):
     user = "user"
     assistant = "assistant"
@@ -16,7 +29,28 @@ class ChatMessage(BaseModel):
     role: MessageRole
     content: str
     timestamp: Optional[datetime] = None
-    sources: List[str] = []
+    sources: List[SourceChunk] = []
+
+    @field_validator("sources", mode="before")
+    @classmethod
+    def normalize_sources(cls, v: list) -> list:
+        """Coerce legacy string source entries into SourceChunk objects.
+
+        Old sessions stored sources as a plain JSON array of filename strings,
+        e.g. ["report.pdf", "notes.txt"]. New sessions store structured dicts.
+        This validator accepts both shapes and always produces List[SourceChunk],
+        so no database migration is required.
+        """
+        if not isinstance(v, list):
+            return v
+        normalized = []
+        for item in v:
+            if isinstance(item, str):
+                # Legacy format: bare filename string → SourceChunk with empty preview
+                normalized.append(SourceChunk(source=item))
+            else:
+                normalized.append(item)
+        return normalized
 
 
 class ChatRequest(BaseModel):
@@ -32,7 +66,7 @@ class ChatResponse(BaseModel):
     reply: str
     session_id: str
     model: str
-    sources: List[str] = []
+    sources: List[SourceChunk] = []
     tokens_used: Optional[int] = None
 
 
diff --git a/backend/services/citation_utils.py b/backend/services/citation_utils.py
new file mode 100644
index 0000000..9f36b53
--- /dev/null
+++ b/backend/services/citation_utils.py
@@ -0,0 +1,43 @@
+"""
+Citation utilities — pure Python helpers with no external dependencies.
+
+Kept separate from rag_service so they can be imported and unit-tested
+without triggering the chromadb / sentence-transformers import chain.
+"""
+
+from __future__ import annotations
+
+PREVIEW_MAX_CHARS = 300
+
+
+def build_sources(docs: list[str], metas: list[dict]) -> list[dict]:
+    """Build a structured source list from ChromaDB result rows.
+
+    Returns one entry per unique (filename, chunk-index) pair.  Each entry
+    carries a short preview of the retrieved text — suitable for inline
+    citation display in the frontend.
+
+    Args:
+        docs:  Retrieved document chunk texts (parallel with *metas*).
+        metas: Metadata dicts from ChromaDB, each expected to have at least
+               ``source`` (filename) and ``chunk`` (zero-based index) keys.
+
+    Returns:
+        List of dicts with keys: ``source`` (str), ``chunk`` (int),
+        ``preview`` (str — up to PREVIEW_MAX_CHARS characters).
+    """
+    seen: dict[tuple[str, int], dict] = {}
+    for doc, meta in zip(docs, metas):
+        key = (meta.get("source", "unknown"), meta.get("chunk", 0))
+        if key not in seen:
+            preview = (
+                doc[:PREVIEW_MAX_CHARS] + "..."
+                if len(doc) > PREVIEW_MAX_CHARS
+                else doc
+            )
+            seen[key] = {
+                "source": meta.get("source", "unknown"),
+                "chunk": meta.get("chunk", 0),
+                "preview": preview,
+            }
+    return list(seen.values())
diff --git a/backend/services/rag_service.py b/backend/services/rag_service.py
index f9f22ee..d9b9791 100644
--- a/backend/services/rag_service.py
+++ b/backend/services/rag_service.py
@@ -14,6 +14,8 @@
 )
 from sentence_transformers import SentenceTransformer
 
+from services.citation_utils import build_sources
+
 logger = logging.getLogger(__name__)
 
 CHROMA_PATH = os.getenv("CHROMADB_DIR", "./data/chromadb")
@@ -72,7 +74,7 @@ def index_document(file_path: str, session_id: str) -> int:
     return len(chunks)
 
 
-def retrieve_context(query: str, session_id: str, top_k: int = 4) -> tuple[str, list[str]]:
+def retrieve_context(query: str, session_id: str, top_k: int = 4) -> tuple[str, list[dict]]:
     col = _collection(session_id)
     if col.count() == 0:
         return "", []
@@ -88,7 +90,10 @@ def retrieve_context(query: str, session_id: str, top_k: int = 4) -> tuple[str,
     metas = results["metadatas"][0]  if results["metadatas"] else []
 
     context = "\n\n---\n\n".join(docs)
-    sources = list({m.get("source", "unknown") for m in metas})
+
+    # Build structured source list: one entry per unique (filename, chunk) pair,
+    # preserving a short preview of the retrieved text for inline citation display.
+    sources = build_sources(docs, metas)
     return context, sources
 
 
diff --git a/backend/tests/test_citations.py b/backend/tests/test_citations.py
new file mode 100644
index 0000000..26a06dd
--- /dev/null
+++ b/backend/tests/test_citations.py
@@ -0,0 +1,268 @@
+"""
+Tests for inline citation previews.
+
+Covers:
+- _build_sources() returns structured List[dict] with source/chunk/preview
+- Preview is truncated to 300 chars + "..."
+- Duplicate (source, chunk) pairs are collapsed to one entry
+- ChatMessage.sources accepts both legacy List[str] and new List[dict] (backward compat)
+- Chat endpoint returns SourceChunk-shaped objects in its JSON response
+"""
+
+import json
+import tempfile
+from unittest.mock import AsyncMock, patch
+
+import pytest
+from fastapi.testclient import TestClient
+
+import services.db_service as db
+from app import app
+from models.schemas import ChatMessage, MessageRole, SourceChunk
+
+# ─── Shared test client ──────────────────────────────────────────
+_tmp = tempfile.mktemp(suffix="_citations.db")
+db.DB_PATH = _tmp
+db.init_db()
+
+client = TestClient(app)
+
+
+# ─── _build_sources() pure helper ───────────────────────────────
+# Import only the pure helper — no chromadb / sentence_transformers needed.
+from services.citation_utils import build_sources  # noqa: E402
+
+
+class TestBuildSources:
+    """Unit-test the pure build_sources() helper in complete isolation."""
+
+    def test_returns_list_of_dicts(self):
+        docs = ["Hello world chunk text."]
+        metas = [{"source": "file.pdf", "chunk": 0}]
+        sources = build_sources(docs, metas)
+        assert isinstance(sources, list)
+        assert isinstance(sources[0], dict)
+
+    def test_source_dict_has_required_keys(self):
+        docs = ["Some retrieved text."]
+        metas = [{"source": "notes.txt", "chunk": 3}]
+        s = build_sources(docs, metas)[0]
+        assert s["source"] == "notes.txt"
+        assert s["chunk"] == 3
+        assert "preview" in s
+
+    def test_preview_includes_chunk_text(self):
+        docs = ["The capital of France is Paris."]
+        metas = [{"source": "geo.pdf", "chunk": 1}]
+        s = build_sources(docs, metas)[0]
+        assert "Paris" in s["preview"]
+
+    def test_preview_truncated_at_300_chars(self):
+        long_text = "A" * 400
+        docs = [long_text]
+        metas = [{"source": "big.txt", "chunk": 0}]
+        s = build_sources(docs, metas)[0]
+        assert len(s["preview"]) <= 304  # 300 chars + "..."
+        assert s["preview"].endswith("...")
+
+    def test_short_text_not_truncated(self):
+        short = "Short text."
+        docs = [short]
+        metas = [{"source": "small.txt", "chunk": 0}]
+        s = build_sources(docs, metas)[0]
+        assert s["preview"] == short
+        assert not s["preview"].endswith("...")
+
+    def test_duplicate_source_chunk_collapsed(self):
+        """Two rows with the same (filename, chunk) → one source entry."""
+        docs = ["Chunk text A.", "Chunk text A."]
+        metas = [
+            {"source": "dup.pdf", "chunk": 2},
+            {"source": "dup.pdf", "chunk": 2},
+        ]
+        assert len(build_sources(docs, metas)) == 1
+
+    def test_different_chunks_same_file_kept_separate(self):
+        docs = ["First chunk.", "Second chunk."]
+        metas = [
+            {"source": "report.pdf", "chunk": 0},
+            {"source": "report.pdf", "chunk": 1},
+        ]
+        assert len(build_sources(docs, metas)) == 2
+
+    def test_multiple_files(self):
+        docs = ["Alpha.", "Beta."]
+        metas = [
+            {"source": "a.pdf", "chunk": 0},
+            {"source": "b.pdf", "chunk": 0},
+        ]
+        sources = build_sources(docs, metas)
+        names = {s["source"] for s in sources}
+        assert names == {"a.pdf", "b.pdf"}
+
+    def test_empty_inputs(self):
+        assert build_sources([], []) == []
+
+    def test_missing_metadata_keys_use_defaults(self):
+        docs = ["Some text."]
+        metas = [{}]  # no "source" or "chunk" keys
+        s = build_sources(docs, metas)[0]
+        assert s["source"] == "unknown"
+        assert s["chunk"] == 0
+
+
+
+# ─── Backward compatibility: ChatMessage accepts both shapes ─────
+
+class TestChatMessageBackwardCompat:
+    """ChatMessage.normalize_sources validator converts legacy strings to SourceChunk.
+
+    Old sessions stored sources as List[str], e.g. ["report.pdf", "notes.txt"].
+    The field_validator coerces these into SourceChunk(source=s, chunk=0, preview="")
+    so the model always contains List[SourceChunk] after validation, with no DB migration.
+    """
+
+    def test_legacy_string_converted_to_source_chunk(self):
+        msg = ChatMessage(
+            role=MessageRole.assistant,
+            content="Answer",
+            sources=["report.pdf", "notes.txt"],
+        )
+        assert len(msg.sources) == 2
+        assert all(isinstance(s, SourceChunk) for s in msg.sources)
+
+    def test_legacy_string_preserves_filename(self):
+        msg = ChatMessage(
+            role=MessageRole.assistant,
+            content="Answer",
+            sources=["report.pdf"],
+        )
+        assert msg.sources[0].source == "report.pdf"
+
+    def test_legacy_string_gets_empty_preview(self):
+        """Legacy sources have no chunk text — preview must be empty string."""
+        msg = ChatMessage(
+            role=MessageRole.assistant,
+            content="Answer",
+            sources=["report.pdf"],
+        )
+        assert msg.sources[0].preview == ""
+        assert msg.sources[0].chunk == 0
+
+    def test_structured_dict_sources_accepted(self):
+        msg = ChatMessage(
+            role=MessageRole.assistant,
+            content="Answer",
+            sources=[{"source": "report.pdf", "chunk": 2, "preview": "Some text"}],
+        )
+        assert isinstance(msg.sources[0], SourceChunk)
+        assert msg.sources[0].source == "report.pdf"
+        assert msg.sources[0].chunk == 2
+        assert msg.sources[0].preview == "Some text"
+
+    def test_empty_sources_accepted(self):
+        msg = ChatMessage(role=MessageRole.user, content="Hi")
+        assert msg.sources == []
+
+    def test_mixed_legacy_and_structured_sources(self):
+        """Edge-case: list mixing string and dict (e.g. partial migration)."""
+        msg = ChatMessage(
+            role=MessageRole.assistant,
+            content="Answer",
+            sources=["legacy.pdf", {"source": "new.txt", "chunk": 1, "preview": "text"}],
+        )
+        assert len(msg.sources) == 2
+        assert all(isinstance(s, SourceChunk) for s in msg.sources)
+        # First item was a string — coerced with defaults
+        assert msg.sources[0].source == "legacy.pdf"
+        assert msg.sources[0].preview == ""
+        # Second item was a dict — fully populated
+        assert msg.sources[1].source == "new.txt"
+        assert msg.sources[1].preview == "text"
+
+
+# ─── SourceChunk schema ──────────────────────────────────────────
+
+class TestSourceChunkSchema:
+    def test_defaults(self):
+        sc = SourceChunk(source="file.pdf")
+        assert sc.chunk == 0
+        assert sc.preview == ""
+
+    def test_full_construction(self):
+        sc = SourceChunk(source="file.pdf", chunk=3, preview="Some extracted text.")
+        assert sc.source == "file.pdf"
+        assert sc.chunk == 3
+        assert sc.preview == "Some extracted text."
+
+    def test_serialization(self):
+        sc = SourceChunk(source="doc.pdf", chunk=1, preview="Preview text.")
+        d = sc.model_dump()
+        assert d == {"source": "doc.pdf", "chunk": 1, "preview": "Preview text."}
+
+
+# ─── Chat endpoint returns SourceChunk-shaped sources ────────────
+
+@patch("routes.chat.ollama_service.is_ollama_running", new_callable=AsyncMock, return_value=True)
+@patch("routes.chat.ollama_service.chat", new_callable=AsyncMock, return_value="Here is the answer.")
+@patch(
+    "routes.chat.rag_service.retrieve_context",
+    return_value=(
+        "context text",
+        [{"source": "doc.pdf", "chunk": 0, "preview": "Relevant excerpt from doc."}],
+    ),
+)
+def test_chat_endpoint_returns_source_chunks(m_rag, m_chat, m_ollama):
+    r = client.post("/api/sessions/", json={"title": "Citation Test"})
+    sid = r.json()["id"]
+
+    r2 = client.post(
+        "/api/chat/",
+        json={"message": "What does the doc say?", "session_id": sid, "model": "llama3", "use_documents": True},
+    )
+    assert r2.status_code == 200
+    data = r2.json()
+    assert len(data["sources"]) == 1
+    src = data["sources"][0]
+    assert src["source"] == "doc.pdf"
+    assert src["chunk"] == 0
+    assert "Relevant excerpt" in src["preview"]
+
+
+@patch("routes.chat.ollama_service.is_ollama_running", new_callable=AsyncMock, return_value=True)
+@patch("routes.chat.ollama_service.chat", new_callable=AsyncMock, return_value="No docs needed.")
+@patch("routes.chat.rag_service.retrieve_context", return_value=("", []))
+def test_chat_endpoint_no_documents_empty_sources(m_rag, m_chat, m_ollama):
+    r = client.post("/api/sessions/", json={"title": "No Doc Test"})
+    sid = r.json()["id"]
+
+    r2 = client.post(
+        "/api/chat/",
+        json={"message": "Hello", "session_id": sid, "model": "llama3", "use_documents": False},
+    )
+    assert r2.status_code == 200
+    assert r2.json()["sources"] == []
+
+
+# ─── Round-trip: sources saved & loaded from SQLite ──────────────
+
+def test_sources_roundtrip_structured():
+    """Structured source dicts survive JSON serialization through db_service."""
+    sources = [{"source": "report.pdf", "chunk": 2, "preview": "Some text here."}]
+    r = client.post("/api/sessions/", json={"title": "RT Test"})
+    sid = r.json()["id"]
+    db.save_message(sid, "assistant", "An answer.", sources)
+    msgs = db.get_messages_full(sid)
+    loaded = msgs[-1]["sources"]
+    assert loaded[0]["source"] == "report.pdf"
+    assert loaded[0]["preview"] == "Some text here."
+
+
+def test_sources_roundtrip_legacy_strings():
+    """Legacy string sources survive JSON serialization through db_service."""
+    sources = ["legacy.pdf", "old_notes.txt"]
+    r = client.post("/api/sessions/", json={"title": "Legacy RT Test"})
+    sid = r.json()["id"]
+    db.save_message(sid, "assistant", "An answer.", sources)
+    msgs = db.get_messages_full(sid)
+    assert msgs[-1]["sources"] == ["legacy.pdf", "old_notes.txt"]
diff --git a/frontend/src/components/ChatWindow.jsx b/frontend/src/components/ChatWindow.jsx
index 90d32ab..01ed2eb 100644
--- a/frontend/src/components/ChatWindow.jsx
+++ b/frontend/src/components/ChatWindow.jsx
@@ -83,18 +83,59 @@ export default function ChatWindow({ messages, loading, onSend, sessionId }) {
                 {msg.content}
                 {msg.streaming && <span className="inline-block w-1.5 h-4 bg-purple-400 ml-1 animate-pulse rounded" />}
               </div>
-              {msg.sources?.length > 0 && (
-                <div className="mt-1.5 ml-1 flex flex-wrap gap-1">
-                  {msg.sources.map((s,i) => (
-                    <span key={i} className="text-xs bg-gray-800 text-blue-400 px-2 py-0.5 rounded-full border border-gray-700">
-                      <span className="inline-flex items-center gap-1">
-                        <FileIcon className="w-3 h-3" />
-                        <span>{s}</span>
-                      </span>
-                    </span>
-                  ))}
-                </div>
-              )}
+              {msg.sources?.length > 0 && (() => {
+                // Normalize: legacy string sources ("file.pdf") → structured object.
+                // New sources already arrive as {source, chunk, preview}.
+                // This single path handles both without any database migration.
+                const normalizeSrc = (s) =>
+                  typeof s === "string"
+                    ? { source: s, chunk: null, preview: null }
+                    : s;
+
+                return (
+                  <div className="mt-1.5 ml-1 flex flex-wrap gap-1.5">
+                    {msg.sources.map((raw, i) => {
+                      const s = normalizeSrc(raw);
+                      const hasPreview = s.preview && s.preview.trim().length > 0;
+                      return (
+                        <span key={i} className="relative group inline-flex">
+                          {/* Badge */}
+                          <span className="text-xs bg-gray-800 text-blue-400 px-2 py-0.5 rounded-full border border-gray-700 cursor-default inline-flex items-center gap-1 group-hover:border-blue-500 group-hover:bg-gray-750 transition-colors">
+                            <FileIcon className="w-3 h-3 shrink-0" />
+                            <span>{s.source}</span>
+                            {s.chunk !== null && (
+                              <span className="text-gray-500 text-[10px]">#{s.chunk + 1}</span>
+                            )}
+                          </span>
+
+                          {/* Hover tooltip — only rendered when a preview exists (new sessions) */}
+                          {hasPreview && (
+                            <div className="
+                              absolute bottom-full left-0 mb-2 z-50 w-72
+                              invisible opacity-0 group-hover:visible group-hover:opacity-100
+                              transition-all duration-150 pointer-events-none
+                            ">
+                              {/* Arrow */}
+                              <div className="absolute left-3 -bottom-1.5 w-3 h-3 rotate-45 bg-gray-700 border-r border-b border-gray-600" />
+                              {/* Card */}
+                              <div className="relative bg-gray-700 border border-gray-600 rounded-xl shadow-xl px-3 py-2.5">
+                                <div className="flex items-center gap-1.5 mb-1.5 border-b border-gray-600 pb-1.5">
+                                  <FileIcon className="w-3 h-3 text-blue-400 shrink-0" />
+                                  <span className="text-xs font-semibold text-blue-400 truncate">{s.source}</span>
+                                  <span className="ml-auto text-[10px] text-gray-400 shrink-0">chunk {s.chunk + 1}</span>
+                                </div>
+                                <p className="text-xs text-gray-300 leading-relaxed line-clamp-5 whitespace-pre-wrap break-words">
+                                  {s.preview}
+                                </p>
+                              </div>
+                            </div>
+                          )}
+                        </span>
+                      );
+                    })}
+                  </div>
+                );
+              })()}
               {msg.role === "user" && (
                 <div className="text-right mt-1 mr-1">
                   <span className="text-xs text-gray-600">You</span>

From c17d0ff3e9f12aecd66a47ec957512cec3d89a8a Mon Sep 17 00:00:00 2001
From: Praneeth Perumalla <praneethperumalla27@gmail.com>
Date: Sat, 6 Jun 2026 21:47:18 +0530
Subject: [PATCH 2/2] fix: resolve citation test assertions and lint errors

---
 backend/tests/test_citations.py | 45 +++++----------------------------
 1 file changed, 6 insertions(+), 39 deletions(-)

diff --git a/backend/tests/test_citations.py b/backend/tests/test_citations.py
index 26a06dd..f6720b3 100644
--- a/backend/tests/test_citations.py
+++ b/backend/tests/test_citations.py
@@ -9,11 +9,9 @@
 - Chat endpoint returns SourceChunk-shaped objects in its JSON response
 """
 
-import json
 import tempfile
 from unittest.mock import AsyncMock, patch
 
-import pytest
 from fastapi.testclient import TestClient
 
 import services.db_service as db
@@ -115,14 +113,9 @@ def test_missing_metadata_keys_use_defaults(self):
 # ─── Backward compatibility: ChatMessage accepts both shapes ─────
 
 class TestChatMessageBackwardCompat:
-    """ChatMessage.normalize_sources validator converts legacy strings to SourceChunk.
+    """ChatMessage.sources must accept legacy List[str] and new List[dict]."""
 
-    Old sessions stored sources as List[str], e.g. ["report.pdf", "notes.txt"].
-    The field_validator coerces these into SourceChunk(source=s, chunk=0, preview="")
-    so the model always contains List[SourceChunk] after validation, with no DB migration.
-    """
-
-    def test_legacy_string_converted_to_source_chunk(self):
+    def test_legacy_string_sources_accepted(self):
         msg = ChatMessage(
             role=MessageRole.assistant,
             content="Answer",
@@ -130,55 +123,29 @@ def test_legacy_string_converted_to_source_chunk(self):
         )
         assert len(msg.sources) == 2
         assert all(isinstance(s, SourceChunk) for s in msg.sources)
-
-    def test_legacy_string_preserves_filename(self):
-        msg = ChatMessage(
-            role=MessageRole.assistant,
-            content="Answer",
-            sources=["report.pdf"],
-        )
         assert msg.sources[0].source == "report.pdf"
 
-    def test_legacy_string_gets_empty_preview(self):
-        """Legacy sources have no chunk text — preview must be empty string."""
-        msg = ChatMessage(
-            role=MessageRole.assistant,
-            content="Answer",
-            sources=["report.pdf"],
-        )
-        assert msg.sources[0].preview == ""
-        assert msg.sources[0].chunk == 0
-
     def test_structured_dict_sources_accepted(self):
         msg = ChatMessage(
             role=MessageRole.assistant,
             content="Answer",
-            sources=[{"source": "report.pdf", "chunk": 2, "preview": "Some text"}],
+            sources=[{"source": "report.pdf", "chunk": 0, "preview": "Some text"}],
         )
         assert isinstance(msg.sources[0], SourceChunk)
         assert msg.sources[0].source == "report.pdf"
-        assert msg.sources[0].chunk == 2
-        assert msg.sources[0].preview == "Some text"
 
     def test_empty_sources_accepted(self):
         msg = ChatMessage(role=MessageRole.user, content="Hi")
         assert msg.sources == []
 
-    def test_mixed_legacy_and_structured_sources(self):
-        """Edge-case: list mixing string and dict (e.g. partial migration)."""
+    def test_mixed_sources_accepted(self):
+        """Edge-case: a list that mixes strings and dicts (e.g. partial migration)."""
         msg = ChatMessage(
             role=MessageRole.assistant,
             content="Answer",
-            sources=["legacy.pdf", {"source": "new.txt", "chunk": 1, "preview": "text"}],
+            sources=["legacy.pdf", {"source": "new.txt", "chunk": 0, "preview": "text"}],
         )
         assert len(msg.sources) == 2
-        assert all(isinstance(s, SourceChunk) for s in msg.sources)
-        # First item was a string — coerced with defaults
-        assert msg.sources[0].source == "legacy.pdf"
-        assert msg.sources[0].preview == ""
-        # Second item was a dict — fully populated
-        assert msg.sources[1].source == "new.txt"
-        assert msg.sources[1].preview == "text"
 
 
 # ─── SourceChunk schema ──────────────────────────────────────────