codedogQBY · codedogQBY · Jun 12, 2026
diff --git a/packages/app/src/lib/rag/book-extractor.ts b/packages/app/src/lib/rag/book-extractor.ts
@@ -4,7 +4,7 @@
  * text segments with EPUB CFI references for precise navigation.
  */
 import { DocumentLoader } from "@/lib/reader/document-loader";
-import type { TOCItem } from "@/lib/reader/document-loader";
+import { buildChapterSectionGroups } from "@readany/core/rag";
 import * as CFI from "foliate-js/epubcfi.js";
 
 export interface TextSegment {
@@ -35,32 +35,34 @@ export async function extractBookChapters(filePath: string): Promise<ChapterData
 
   const sections = book.sections ?? [];
   const toc = book.toc ?? [];
-  const tocMap = buildTocMap(toc);
+  const chapterGroups = buildChapterSectionGroups(sections, toc);
 
   const chapters: ChapterData[] = [];
 
-  for (let i = 0; i < sections.length; i++) {
-    const section = sections[i];
-    if (!section.createDocument) continue;
+  for (const group of chapterGroups) {
+    const chapterSegments: TextSegment[] = [];
 
-    try {
-      const doc = await section.createDocument();
-      const body = doc.body;
-      if (!body) continue;
-
-      const title = tocMap.get(i) ?? tocMap.get(section.href ?? "") ?? `Section ${i + 1}`;
-      const baseCfi = section.cfi || CFI.fake.fromIndex(i);
+    for (const sectionIndex of group.sectionIndices) {
+      const section = sections[sectionIndex];
+      if (!section?.createDocument) continue;
 
-      const segments = extractSegmentsWithCfi(doc, baseCfi);
+      try {
+        const doc = await section.createDocument();
+        const body = doc.body;
+        if (!body) continue;
+
+        const baseCfi = section.cfi || CFI.fake.fromIndex(sectionIndex);
+        chapterSegments.push(...extractSegmentsWithCfi(doc, baseCfi));
+      } catch (err) {
+        console.warn(`[extractBookChapters] Failed to extract section ${sectionIndex}:`, err);
+      }
+    }
 
-      if (segments.length === 0) continue;
+    if (chapterSegments.length === 0) continue;
 
-      const content = segments.map((s) => s.text).join("\n\n");
+    const content = chapterSegments.map((s) => s.text).join("\n\n");
 
-      chapters.push({ index: i, title, content, segments });
-    } catch (err) {
-      console.warn(`[extractBookChapters] Failed to extract section ${i}:`, err);
-    }
+    chapters.push({ index: group.index, title: group.title, content, segments: chapterSegments });
   }
 
   return chapters;
@@ -127,9 +129,10 @@ function getTextNodes(element: Element): Text[] {
   const walker = element.ownerDocument.createTreeWalker(element, NodeFilter.SHOW_TEXT, null);
 
   const nodes: Text[] = [];
-  let node: Text | null;
-  while ((node = walker.nextNode() as Text | null)) {
-    if (node.textContent && node.textContent.trim()) {
+  while (true) {
+    const node = walker.nextNode() as Text | null;
+    if (!node) break;
+    if (node.textContent?.trim()) {
       nodes.push(node);
     }
   }
@@ -141,8 +144,9 @@ function extractBlockText(block: Element): string {
   const walker = block.ownerDocument.createTreeWalker(block, NodeFilter.SHOW_TEXT, null);
 
   const texts: string[] = [];
-  let node: Text | null;
-  while ((node = walker.nextNode() as Text | null)) {
+  while (true) {
+    const node = walker.nextNode() as Text | null;
+    if (!node) break;
     const text = node.textContent?.trim();
     if (text) {
       texts.push(text);
@@ -152,29 +156,6 @@ function extractBlockText(block: Element): string {
   return texts.join(" ");
 }
 
-function buildTocMap(toc: TOCItem[]): Map<string | number, string> {
-  const map = new Map<string | number, string>();
-
-  function walk(items: TOCItem[]) {
-    for (const item of items) {
-      if (item.label) {
-        map.set(item.index, item.label);
-        if (item.href) {
-          const base = item.href.split("#")[0];
-          map.set(base, item.label);
-          map.set(item.href, item.label);
-        }
-      }
-      if (item.subitems?.length) {
-        walk(item.subitems);
-      }
-    }
-  }
-
-  walk(toc);
-  return map;
-}
-
 async function extractPdfChapters(fileBytes: Uint8Array): Promise<ChapterData[]> {
   const pdfjsLib = await import("pdfjs-dist");
   pdfjsLib.GlobalWorkerOptions.workerSrc = `https://cdn.jsdelivr.net/npm/pdfjs-dist@${pdfjsLib.version}/build/pdf.worker.min.mjs`;

diff --git a/packages/core/src/ai/__tests__/tools.test.ts b/packages/core/src/ai/__tests__/tools.test.ts
@@ -285,9 +285,9 @@ describe("ragToc tool", () => {
 
     expect(result.totalChapters).toBe(3);
     expect(result.chapters).toEqual([
-      { index: 0, title: "Intro" },
-      { index: 1, title: "Chapter 1" },
-      { index: 2, title: "Chapter 2" },
+      { index: 0, number: 1, title: "Intro" },
+      { index: 1, number: 2, title: "Chapter 1" },
+      { index: 2, number: 3, title: "Chapter 2" },
     ]);
   });
 });

diff --git a/packages/core/src/ai/tools/rag-tools.ts b/packages/core/src/ai/tools/rag-tools.ts
@@ -97,7 +97,7 @@ export function createRagTocTool(bookId: string): ToolDefinition {
   return {
     name: "ragToc",
     description:
-      "Get the table of contents of the current book. Use this when the user wants to see the book structure or navigate to a specific chapter.",
+      "Get the table of contents of the current book. Use this when the user wants to see the book structure or navigate to a specific chapter. Use the returned 'index' when calling chapter tools; 'number' is the human-readable chapter order.",
     parameters: {},
     execute: async () => {
       // Get unique chapter titles from chunks
@@ -110,8 +110,9 @@ export function createRagTocTool(bookId: string): ToolDefinition {
       }
 
       return {
-        chapters: Array.from(chapters.entries()).map(([index, title]) => ({
+        chapters: Array.from(chapters.entries()).map(([index, title], ordinal) => ({
           index,
+          number: ordinal + 1,
           title,
         })),
         totalChapters: chapters.size,

diff --git a/packages/core/src/rag/chapter-structure.test.ts b/packages/core/src/rag/chapter-structure.test.ts
@@ -0,0 +1,72 @@
+import { describe, expect, it } from "vitest";
+import { buildChapterSectionGroups } from "./chapter-structure";
+
+describe("buildChapterSectionGroups", () => {
+  it("uses leaf TOC entries as logical chapters for multi-volume books", () => {
+    const groups = buildChapterSectionGroups(
+      [
+        { href: "cover.xhtml" },
+        { href: "volume-1.xhtml" },
+        { href: "chapter-1.xhtml" },
+        { href: "chapter-1-extra.xhtml" },
+        { href: "chapter-2.xhtml" },
+        { href: "volume-2.xhtml" },
+        { href: "chapter-3.xhtml" },
+      ],
+      [
+        {
+          label: "第一卷",
+          href: "volume-1.xhtml",
+          subitems: [
+            { label: "第一章", href: "chapter-1.xhtml" },
+            { label: "第二章", href: "chapter-2.xhtml" },
+          ],
+        },
+        {
+          label: "第二卷",
+          href: "volume-2.xhtml",
+          subitems: [{ label: "第三章", href: "chapter-3.xhtml" }],
+        },
+      ],
+    );
+
+    expect(groups).toEqual([
+      { index: 0, title: "第一章", sectionIndices: [2, 3] },
+      { index: 1, title: "第二章", sectionIndices: [4] },
+      { index: 2, title: "第三章", sectionIndices: [6] },
+    ]);
+  });
+
+  it("falls back to top-level TOC entries when no leaf hrefs exist", () => {
+    const groups = buildChapterSectionGroups(
+      [{ href: "intro.xhtml" }, { href: "body.xhtml" }],
+      [{ label: "正文", href: "body.xhtml", subitems: [] }],
+    );
+
+    expect(groups).toEqual([{ index: 0, title: "正文", sectionIndices: [1] }]);
+  });
+
+  it("normalizes encoded and relative hrefs before matching sections", () => {
+    const groups = buildChapterSectionGroups(
+      [{ href: "Text/第1章.xhtml" }, { href: "Text/%E7%AC%AC2%E7%AB%A0.xhtml" }],
+      [
+        { label: "第一章", href: "./Text/%E7%AC%AC1%E7%AB%A0.xhtml#start" },
+        { label: "第二章", href: "第2章.xhtml" },
+      ],
+    );
+
+    expect(groups).toEqual([
+      { index: 0, title: "第一章", sectionIndices: [0] },
+      { index: 1, title: "第二章", sectionIndices: [1] },
+    ]);
+  });
+
+  it("falls back to one group per section when TOC has no usable anchors", () => {
+    const groups = buildChapterSectionGroups([{ href: "a.xhtml" }, { href: "b.xhtml" }], []);
+
+    expect(groups).toEqual([
+      { index: 0, title: "Section 1", sectionIndices: [0] },
+      { index: 1, title: "Section 2", sectionIndices: [1] },
+    ]);
+  });
+});