Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 116 additions & 90 deletions lib/wpress-extract.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,119 +2,145 @@ const fse = require('fs-extra');
const fs = require('fs');
const path = require('path');

const HEADER_SIZE = 4377; // length of the header
const HEADER_CHUNK_EOF = Buffer.alloc(HEADER_SIZE); // Empty header used for check if we reached the end
// Constants for header structure
const HEADER_SIZE = 4377;
const NAME_END = 255;
const SIZE_END = 269;
const MTIME_END = 281;
const CHUNK_SIZE = 512;

// Empty header for EOF check
const HEADER_CHUNK_EOF = Buffer.alloc(HEADER_SIZE);

// Reusable buffer for header reads
const headerChunk = Buffer.alloc(HEADER_SIZE);

// Cache for created directories
const createdDirs = new Set();

function isDirEmpty(dirname) {
return fs.promises.readdir(dirname).then((files) => {
return files.length === 0;
});
return fs.promises.readdir(dirname).then((files) => files.length === 0);
}

function readFromBuffer(buffer, start, end) {
const _buffer = buffer.slice(start, end);
// Trim off the empty bytes
return _buffer.slice(0, _buffer.indexOf(0x00)).toString();
const nullIndex = buffer.indexOf(0x00, start);
const endSlice = nullIndex === -1 || nullIndex > end ? end : nullIndex;
return buffer.toString('utf8', start, endSlice);
}

async function readHeader(fd) {
const headerChunk = Buffer.alloc(HEADER_SIZE);
await fd.read(headerChunk, 0, HEADER_SIZE, null);

// Reached end of file
if (Buffer.compare(headerChunk, HEADER_CHUNK_EOF) === 0) {
return null;
}

const name = readFromBuffer(headerChunk, 0, 255);
const size = parseInt(readFromBuffer(headerChunk, 255, 269), 10);
const mTime = readFromBuffer(headerChunk, 269, 281);
const prefix = readFromBuffer(headerChunk, 281, HEADER_SIZE);

return {
name,
size,
mTime,
prefix,
};
}

async function readBlockToFile(fd, header, outputPath) {
const outputFilePath = path.join(outputPath, header.prefix, header.name);
fse.ensureDirSync(path.dirname(outputFilePath));
const outputStream = fs.createWriteStream(outputFilePath);
const { bytesRead } = await fd.read(headerChunk, 0, HEADER_SIZE, null);

let totalBytesToRead = header.size;
while (true) {
let bytesToRead = 512;
if (bytesToRead > totalBytesToRead) {
bytesToRead = totalBytesToRead;
// Check if we actually read the full header size before comparing
if (bytesRead !== HEADER_SIZE) {
return null; // Reached end of file or an unexpected error
}

if (bytesToRead === 0) {
break;
if (Buffer.compare(headerChunk, HEADER_CHUNK_EOF) === 0) {
return null; // Reached end of file
}

const buffer = Buffer.alloc(bytesToRead);
const data = await fd.read(buffer, 0, bytesToRead, null);
outputStream.write(buffer);
const name = readFromBuffer(headerChunk, 0, NAME_END);
const size = parseInt(readFromBuffer(headerChunk, NAME_END, SIZE_END), 10);
const mTime = readFromBuffer(headerChunk, SIZE_END, MTIME_END);
const prefix = readFromBuffer(headerChunk, MTIME_END, HEADER_SIZE);

totalBytesToRead -= data.bytesRead;
}

outputStream.close();
return { name, size, mTime, prefix };
}

module.exports = async function wpExtract({
inputFile: _inputFile,
outputDir,
onStart,
onUpdate,
onFinish,
override,
}) {
if (!fs.existsSync(_inputFile)) {
throw new Error(
`Input file at location "${_inputFile}" could not be found.`
);
}

if (override) {
// Ensure the output dir exists and is empty
fse.emptyDirSync(outputDir);
} else {
if (fs.existsSync(outputDir) && !(await isDirEmpty(outputDir))) {
throw new Error(
`Output dir is not empty. Clear it first or use the --force option to override it.`
);
async function readBlockToFile(fd, header, outputDir) {
const outputFilePath = path.join(outputDir, header.prefix, header.name);
const outputDirPath = path.dirname(outputFilePath);

// Optimize directory creation
if (!createdDirs.has(outputDirPath)) {
await fse.ensureDir(outputDirPath);
createdDirs.add(outputDirPath);
}
}

const inputFileStat = fs.statSync(_inputFile);
const inputFile = await fs.promises.open(_inputFile, 'r');
const outputStream = fs.createWriteStream(outputFilePath);

return new Promise((resolve, reject) => {
const stream = fd.createReadStream({
start: fd.bytesRead, //Start from where the header ended
highWaterMark: CHUNK_SIZE,
});

// Trigger onStart callback
onStart(inputFileStat.size);
stream.pipe(outputStream);

let offset = 0;
let countFiles = 0;
let totalBytesRead = 0;
stream.on('data', (chunk) => {
totalBytesRead += chunk.length;
});

while (true) {
const header = await readHeader(inputFile);
if (!header) {
break;
stream.on('end', async () => {
if (totalBytesRead !== header.size) {
// If you want to seek to the next header position
await fd.read(Buffer.alloc(0), 0, 0, fd.bytesRead + header.size - totalBytesRead);
}
resolve();
});

stream.on('error', reject);
outputStream.on('error', reject);
});
}

async function ensureOutputDir(outputDir, override) {
if (override) {
await fse.emptyDir(outputDir);
} else {
if (fs.existsSync(outputDir) && !(await isDirEmpty(outputDir))) {
throw new Error(
`Output dir is not empty. Clear it first or use the --force option to override it.`
);
}
}
}

await readBlockToFile(inputFile, header, outputDir);
offset = offset + HEADER_SIZE + header.size;
countFiles++;
async function wpExtract({
inputFile: inputFilePath,
outputDir,
onStart,
onUpdate,
onFinish,
override,
}) {
if (!fs.existsSync(inputFilePath)) {
throw new Error(`Input file at location "${inputFilePath}" could not be found.`);
}

// Trigger onUpdate callback
onUpdate(offset);
}
await ensureOutputDir(outputDir, override);

const inputFileStat = fs.statSync(inputFilePath);
const inputFile = await fs.promises.open(inputFilePath, 'r');

onStart(inputFileStat.size);

let offset = 0;
let countFiles = 0;
try {
while (true) {
const header = await readHeader(inputFile);
if (!header) {
break;
}

await readBlockToFile(inputFile, header, outputDir);
offset = offset + HEADER_SIZE + header.size;
countFiles++;

onUpdate(offset);
}
} catch (error) {
// Handle errors during extraction
console.error("Error during extraction:", error);
throw error; // Re-throw to allow for handling at a higher level if needed
} finally {
await inputFile.close();
}

await inputFile.close();
onFinish(countFiles);
}

// Trigger onFinish callback
onFinish(countFiles);
};
module.exports = wpExtract;