From 244f78ea0fec0a6ea0b81974fdb167bf3471b42f Mon Sep 17 00:00:00 2001 From: codedogQBY <1369175442@qq.com> Date: Sat, 13 Jun 2026 06:37:28 +0800 Subject: [PATCH] fix(mobile): extract covers from large epub imports --- .../src/lib/book/metadata-extractor.ts | 173 ++++++++++++++++-- .../src/lib/platform/expo-platform-service.ts | 6 + packages/app-expo/src/stores/library-store.ts | 18 +- 3 files changed, 176 insertions(+), 21 deletions(-) diff --git a/packages/app-expo/src/lib/book/metadata-extractor.ts b/packages/app-expo/src/lib/book/metadata-extractor.ts index d75a82b2..360903b8 100644 --- a/packages/app-expo/src/lib/book/metadata-extractor.ts +++ b/packages/app-expo/src/lib/book/metadata-extractor.ts @@ -33,16 +33,45 @@ interface BlobLikeFile { slice(start?: number, end?: number, contentType?: string): SliceReadable; } +const EPUB_TEXT_ENTRY_MAX_BYTES = 4 * 1024 * 1024; +const EPUB_COVER_ENTRY_MAX_BYTES = 24 * 1024 * 1024; + // ─── EPUB extraction ──────────────────────────────────────────────── export async function extractEpubMetadata(fileBytes: Uint8Array): Promise { const buf = new Uint8Array(fileBytes); const directory = parseZipDirectory(buf); + return extractEpubMetadataFromReaders({ + logPrefix: "extractEpubMetadata", + readText: async (path) => readTextFromZip(buf, directory, path, EPUB_TEXT_ENTRY_MAX_BYTES), + readBytes: async (path) => readBytesFromZip(buf, directory, path, EPUB_COVER_ENTRY_MAX_BYTES), + }); +} + +export async function extractEpubMetadataFromFile(file: BlobLikeFile): Promise { + const directory = await parseZipDirectoryFromFile(file); + + return extractEpubMetadataFromReaders({ + logPrefix: "extractEpubMetadataFromFile", + readText: (path) => readTextFromZipFile(file, directory, path, EPUB_TEXT_ENTRY_MAX_BYTES), + readBytes: (path) => readBytesFromZipFile(file, directory, path, EPUB_COVER_ENTRY_MAX_BYTES), + }); +} + +async function extractEpubMetadataFromReaders({ + logPrefix, + readText, + readBytes, +}: { + logPrefix: string; + readText: (path: string) => Promise; + readBytes: (path: string) => Promise; +}): Promise { // 1. Read container.xml to find OPF path - const containerXml = readTextFromZip(buf, directory, "META-INF/container.xml"); + const containerXml = await readText("META-INF/container.xml"); if (!containerXml) { - console.warn("[extractEpubMetadata] container.xml not found"); + console.warn(`[${logPrefix}] container.xml not found`); return { title: "", author: "", coverBytes: null, coverMimeType: null }; } @@ -50,9 +79,9 @@ export async function extractEpubMetadata(fileBytes: Uint8Array): Promise { const headerBuffer = await file.slice(0, PDB_HEADER_LENGTH).arrayBuffer(); @@ -562,7 +594,6 @@ interface ZipDirectoryEntry { */ function parseZipDirectory(buf: Uint8Array): ZipDirectoryEntry[] { const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength); - const entries: ZipDirectoryEntry[] = []; // Find End of Central Directory let eocdOffset = -1; @@ -572,12 +603,49 @@ function parseZipDirectory(buf: Uint8Array): ZipDirectoryEntry[] { break; } } - if (eocdOffset === -1) return entries; + if (eocdOffset === -1) return []; const cdOffset = view.getUint32(eocdOffset + 16, true); const cdCount = view.getUint16(eocdOffset + 10, true); + const cdSize = view.getUint32(eocdOffset + 12, true); + + if (cdOffset < 0 || cdOffset >= buf.byteLength) return []; + const cdEnd = Math.min(buf.byteLength, cdOffset + cdSize); + return parseZipCentralDirectory(buf.slice(cdOffset, cdEnd), cdCount); +} + +async function parseZipDirectoryFromFile(file: BlobLikeFile): Promise { + const fileSize = file.size ?? 0; + if (fileSize < 22) return []; + + const tailLength = Math.min(fileSize, ZIP_EOCD_SEARCH_BYTES); + const tailStart = fileSize - tailLength; + const tail = new Uint8Array(await file.slice(tailStart, fileSize).arrayBuffer()); + const view = new DataView(tail.buffer, tail.byteOffset, tail.byteLength); + + let eocdOffset = -1; + for (let i = tail.byteLength - 22; i >= 0; i--) { + if (view.getUint32(i, true) === 0x06054b50) { + eocdOffset = i; + break; + } + } + if (eocdOffset === -1) return []; + + const cdCount = view.getUint16(eocdOffset + 10, true); + const cdSize = view.getUint32(eocdOffset + 12, true); + const cdOffset = view.getUint32(eocdOffset + 16, true); + if (cdOffset < 0 || cdSize <= 0 || cdOffset + cdSize > fileSize) return []; + + const cdBuffer = new Uint8Array(await file.slice(cdOffset, cdOffset + cdSize).arrayBuffer()); + return parseZipCentralDirectory(cdBuffer, cdCount); +} + +function parseZipCentralDirectory(buf: Uint8Array, cdCount: number): ZipDirectoryEntry[] { + const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength); + const entries: ZipDirectoryEntry[] = []; + let pos = 0; - let pos = cdOffset; for (let i = 0; i < cdCount; i++) { if (pos + 46 > buf.byteLength) break; if (view.getUint32(pos, true) !== 0x02014b50) break; @@ -638,6 +706,50 @@ function decompressEntry(buf: Uint8Array, entry: ZipDirectoryEntry): Uint8Array return null; } +async function decompressEntryFromFile( + file: BlobLikeFile, + entry: ZipDirectoryEntry, + maxUncompressedSize: number, +): Promise { + if ( + entry.compressedSize <= 0 || + entry.compressedSize > maxUncompressedSize || + entry.uncompressedSize > maxUncompressedSize + ) { + return null; + } + + const header = new Uint8Array( + await file.slice(entry.localHeaderOffset, entry.localHeaderOffset + 30).arrayBuffer(), + ); + if (header.byteLength < 30) return null; + + const view = new DataView(header.buffer, header.byteOffset, header.byteLength); + if (view.getUint32(0, true) !== 0x04034b50) return null; + + const localFilenameLen = view.getUint16(26, true); + const localExtraLen = view.getUint16(28, true); + const dataStart = entry.localHeaderOffset + 30 + localFilenameLen + localExtraLen; + const dataEnd = dataStart + entry.compressedSize; + + const data = new Uint8Array(await file.slice(dataStart, dataEnd).arrayBuffer()); + if (data.byteLength !== entry.compressedSize) return null; + + if (entry.compressionMethod === 0) { + return data; + } + + if (entry.compressionMethod === 8) { + try { + return pako.inflateRaw(data); + } catch { + return null; + } + } + + return null; +} + /** * Find a ZIP entry by name (case-insensitive fallback) and decompress it. */ @@ -645,6 +757,7 @@ function findAndDecompress( buf: Uint8Array, directory: ZipDirectoryEntry[], path: string, + maxUncompressedSize = Number.POSITIVE_INFINITY, ): Uint8Array | null { // Exact match first let entry = directory.find((e) => e.filename === path); @@ -654,15 +767,32 @@ function findAndDecompress( entry = directory.find((e) => e.filename.toLowerCase() === lower); } if (!entry) return null; + if (entry.uncompressedSize > maxUncompressedSize) return null; return decompressEntry(buf, entry); } +async function findAndDecompressFromFile( + file: BlobLikeFile, + directory: ZipDirectoryEntry[], + path: string, + maxUncompressedSize: number, +): Promise { + let entry = directory.find((e) => e.filename === path); + if (!entry) { + const lower = path.toLowerCase(); + entry = directory.find((e) => e.filename.toLowerCase() === lower); + } + if (!entry) return null; + return decompressEntryFromFile(file, entry, maxUncompressedSize); +} + function readTextFromZip( buf: Uint8Array, directory: ZipDirectoryEntry[], path: string, + maxUncompressedSize = Number.POSITIVE_INFINITY, ): string | null { - const data = findAndDecompress(buf, directory, path); + const data = findAndDecompress(buf, directory, path, maxUncompressedSize); if (!data) return null; return new TextDecoder().decode(data); } @@ -671,8 +801,29 @@ function readBytesFromZip( buf: Uint8Array, directory: ZipDirectoryEntry[], path: string, + maxUncompressedSize = Number.POSITIVE_INFINITY, ): Uint8Array | null { - return findAndDecompress(buf, directory, path); + return findAndDecompress(buf, directory, path, maxUncompressedSize); +} + +async function readTextFromZipFile( + file: BlobLikeFile, + directory: ZipDirectoryEntry[], + path: string, + maxUncompressedSize: number, +): Promise { + const data = await findAndDecompressFromFile(file, directory, path, maxUncompressedSize); + if (!data) return null; + return new TextDecoder().decode(data); +} + +async function readBytesFromZipFile( + file: BlobLikeFile, + directory: ZipDirectoryEntry[], + path: string, + maxUncompressedSize: number, +): Promise { + return findAndDecompressFromFile(file, directory, path, maxUncompressedSize); } // ─── XML helpers (no DOMParser, regex-based) ──────────────────────── diff --git a/packages/app-expo/src/lib/platform/expo-platform-service.ts b/packages/app-expo/src/lib/platform/expo-platform-service.ts index 1935cf27..dfad631f 100644 --- a/packages/app-expo/src/lib/platform/expo-platform-service.ts +++ b/packages/app-expo/src/lib/platform/expo-platform-service.ts @@ -759,6 +759,7 @@ export class ExpoPlatformService implements IPlatformService { let BufferMod: any; try { TcpSocket = (await import("react-native-tcp-socket")).default; + // biome-ignore lint/style/useNodejsImportProtocol: React Native needs the buffer polyfill package. BufferMod = (await import("buffer")).Buffer; } catch (e) { throw new Error(`Native TCP Socket unavailable: ${e instanceof Error ? e.message : e}`); @@ -852,6 +853,11 @@ function extensionToMime(ext: string): string { txt: "text/plain", umd: "application/octet-stream", zip: "application/zip", + jpg: "image/jpeg", + jpeg: "image/jpeg", + png: "image/png", + webp: "image/webp", + gif: "image/gif", }; return map[ext.toLowerCase()] || "application/octet-stream"; } diff --git a/packages/app-expo/src/stores/library-store.ts b/packages/app-expo/src/stores/library-store.ts index 6e1877da..c3995cb0 100644 --- a/packages/app-expo/src/stores/library-store.ts +++ b/packages/app-expo/src/stores/library-store.ts @@ -177,17 +177,15 @@ async function extractMobileImportMetadata(params: { const { filePath, format, fileName, fileSize, sourceBytes } = params; if (format === "epub") { - const bytes = - sourceBytes ?? - (fileSize > 0 && fileSize <= MOBILE_IMPORT_METADATA_MAX_BYTES - ? await getPlatformService().readFile(filePath) - : null); - if (bytes) { - return extractBookMetadata(bytes, format, fileName); + if (sourceBytes) { + return extractBookMetadata(sourceBytes, format, fileName); } - console.warn( - `[extractMobileImportMetadata] Skip EPUB metadata for large file: ${fileName} (${fileSize} bytes)`, - ); + if (fileSize > 0 && fileSize <= MOBILE_IMPORT_METADATA_MAX_BYTES) { + return extractBookMetadata(await getPlatformService().readFile(filePath), format, fileName); + } + + const rangeReadable = await createRangeReadableFile(filePath, fileSize); + return extractBookMetadataFromFile(rangeReadable, format, fileName); } if (format === "mobi" || format === "azw" || format === "azw3") {