diff --git a/lib/wpress-extract.js b/lib/wpress-extract.js index 1a33666..9269098 100644 --- a/lib/wpress-extract.js +++ b/lib/wpress-extract.js @@ -2,119 +2,145 @@ const fse = require('fs-extra'); const fs = require('fs'); const path = require('path'); -const HEADER_SIZE = 4377; // length of the header -const HEADER_CHUNK_EOF = Buffer.alloc(HEADER_SIZE); // Empty header used for check if we reached the end +// Constants for header structure +const HEADER_SIZE = 4377; +const NAME_END = 255; +const SIZE_END = 269; +const MTIME_END = 281; +const CHUNK_SIZE = 512; + +// Empty header for EOF check +const HEADER_CHUNK_EOF = Buffer.alloc(HEADER_SIZE); + +// Reusable buffer for header reads +const headerChunk = Buffer.alloc(HEADER_SIZE); + +// Cache for created directories +const createdDirs = new Set(); function isDirEmpty(dirname) { - return fs.promises.readdir(dirname).then((files) => { - return files.length === 0; - }); + return fs.promises.readdir(dirname).then((files) => files.length === 0); } function readFromBuffer(buffer, start, end) { - const _buffer = buffer.slice(start, end); - // Trim off the empty bytes - return _buffer.slice(0, _buffer.indexOf(0x00)).toString(); + const nullIndex = buffer.indexOf(0x00, start); + const endSlice = nullIndex === -1 || nullIndex > end ? end : nullIndex; + return buffer.toString('utf8', start, endSlice); } async function readHeader(fd) { - const headerChunk = Buffer.alloc(HEADER_SIZE); - await fd.read(headerChunk, 0, HEADER_SIZE, null); - - // Reached end of file - if (Buffer.compare(headerChunk, HEADER_CHUNK_EOF) === 0) { - return null; - } - - const name = readFromBuffer(headerChunk, 0, 255); - const size = parseInt(readFromBuffer(headerChunk, 255, 269), 10); - const mTime = readFromBuffer(headerChunk, 269, 281); - const prefix = readFromBuffer(headerChunk, 281, HEADER_SIZE); - - return { - name, - size, - mTime, - prefix, - }; -} - -async function readBlockToFile(fd, header, outputPath) { - const outputFilePath = path.join(outputPath, header.prefix, header.name); - fse.ensureDirSync(path.dirname(outputFilePath)); - const outputStream = fs.createWriteStream(outputFilePath); + const { bytesRead } = await fd.read(headerChunk, 0, HEADER_SIZE, null); - let totalBytesToRead = header.size; - while (true) { - let bytesToRead = 512; - if (bytesToRead > totalBytesToRead) { - bytesToRead = totalBytesToRead; + // Check if we actually read the full header size before comparing + if (bytesRead !== HEADER_SIZE) { + return null; // Reached end of file or an unexpected error } - if (bytesToRead === 0) { - break; + if (Buffer.compare(headerChunk, HEADER_CHUNK_EOF) === 0) { + return null; // Reached end of file } - const buffer = Buffer.alloc(bytesToRead); - const data = await fd.read(buffer, 0, bytesToRead, null); - outputStream.write(buffer); + const name = readFromBuffer(headerChunk, 0, NAME_END); + const size = parseInt(readFromBuffer(headerChunk, NAME_END, SIZE_END), 10); + const mTime = readFromBuffer(headerChunk, SIZE_END, MTIME_END); + const prefix = readFromBuffer(headerChunk, MTIME_END, HEADER_SIZE); - totalBytesToRead -= data.bytesRead; - } - - outputStream.close(); + return { name, size, mTime, prefix }; } -module.exports = async function wpExtract({ - inputFile: _inputFile, - outputDir, - onStart, - onUpdate, - onFinish, - override, -}) { - if (!fs.existsSync(_inputFile)) { - throw new Error( - `Input file at location "${_inputFile}" could not be found.` - ); - } - - if (override) { - // Ensure the output dir exists and is empty - fse.emptyDirSync(outputDir); - } else { - if (fs.existsSync(outputDir) && !(await isDirEmpty(outputDir))) { - throw new Error( - `Output dir is not empty. Clear it first or use the --force option to override it.` - ); +async function readBlockToFile(fd, header, outputDir) { + const outputFilePath = path.join(outputDir, header.prefix, header.name); + const outputDirPath = path.dirname(outputFilePath); + + // Optimize directory creation + if (!createdDirs.has(outputDirPath)) { + await fse.ensureDir(outputDirPath); + createdDirs.add(outputDirPath); } - } - const inputFileStat = fs.statSync(_inputFile); - const inputFile = await fs.promises.open(_inputFile, 'r'); + const outputStream = fs.createWriteStream(outputFilePath); + + return new Promise((resolve, reject) => { + const stream = fd.createReadStream({ + start: fd.bytesRead, //Start from where the header ended + highWaterMark: CHUNK_SIZE, + }); - // Trigger onStart callback - onStart(inputFileStat.size); + stream.pipe(outputStream); - let offset = 0; - let countFiles = 0; + let totalBytesRead = 0; + stream.on('data', (chunk) => { + totalBytesRead += chunk.length; + }); - while (true) { - const header = await readHeader(inputFile); - if (!header) { - break; + stream.on('end', async () => { + if (totalBytesRead !== header.size) { + // If you want to seek to the next header position + await fd.read(Buffer.alloc(0), 0, 0, fd.bytesRead + header.size - totalBytesRead); + } + resolve(); + }); + + stream.on('error', reject); + outputStream.on('error', reject); + }); +} + +async function ensureOutputDir(outputDir, override) { + if (override) { + await fse.emptyDir(outputDir); + } else { + if (fs.existsSync(outputDir) && !(await isDirEmpty(outputDir))) { + throw new Error( + `Output dir is not empty. Clear it first or use the --force option to override it.` + ); + } } +} - await readBlockToFile(inputFile, header, outputDir); - offset = offset + HEADER_SIZE + header.size; - countFiles++; +async function wpExtract({ + inputFile: inputFilePath, + outputDir, + onStart, + onUpdate, + onFinish, + override, +}) { + if (!fs.existsSync(inputFilePath)) { + throw new Error(`Input file at location "${inputFilePath}" could not be found.`); + } - // Trigger onUpdate callback - onUpdate(offset); - } + await ensureOutputDir(outputDir, override); + + const inputFileStat = fs.statSync(inputFilePath); + const inputFile = await fs.promises.open(inputFilePath, 'r'); + + onStart(inputFileStat.size); + + let offset = 0; + let countFiles = 0; + try { + while (true) { + const header = await readHeader(inputFile); + if (!header) { + break; + } + + await readBlockToFile(inputFile, header, outputDir); + offset = offset + HEADER_SIZE + header.size; + countFiles++; + + onUpdate(offset); + } + } catch (error) { + // Handle errors during extraction + console.error("Error during extraction:", error); + throw error; // Re-throw to allow for handling at a higher level if needed + } finally { + await inputFile.close(); + } - await inputFile.close(); + onFinish(countFiles); +} - // Trigger onFinish callback - onFinish(countFiles); -}; +module.exports = wpExtract;