-
Notifications
You must be signed in to change notification settings - Fork 391
Closed
Labels
Description
Title of this PDF:
Sodalitas delectus ipsum aperio facere.
is extracted as
4PEBMJUBTEFMFDUVTJQTVNBQFSJPGBDFSF
This PDF was exported from Confluence by Atlassian.
pdfinfo
Title: Sodalitas delectus ipsum aperio facere. - test-automation - Confluence
Creator: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/131.0.0.0 Safari/537.36
Producer: Skia/PDF m131
CreationDate: Fri Dec 27 07:20:30 2024 -03
ModDate: Fri Dec 27 07:20:30 2024 -03
Custom Metadata: no
Metadata Stream: no
Tagged: yes
UserProperties: no
Suspects: no
Form: none
JavaScript: no
Pages: 1
Encrypted: no
Page size: 612 x 792 pts (letter)
Page rot: 0
File size: 15981 bytes
Optimized: no
PDF version: 1.4
Used method
export async function parsePDF(filepath: string) {
// https://github.com/modesty/pdf2json
let parsed = false;
/* eslint-disable-next-line */
const pdfParser = new PDFParser(this, true);
/* eslint-disable-next-line */
pdfParser.on('pdfParser_dataError', (errData) => console.error(errData.parserError));
/* eslint-disable-next-line */
pdfParser.on('pdfParser_dataReady', (_) => {
parsed = true;
});
/* eslint-disable-next-line */
await pdfParser.loadPDF(filepath);
let i = 0;
const max = 5;
while (!parsed && i < max) {
await sleep(1, 'Waiting for parsed PDF');
i += 1;
}
if (i === max && !parsed) {
throw new Error('Timeout while waiting for parsed PDF');
}
/* eslint-disable-next-line */
return unixifyLineEndings(pdfParser.getRawTextContent());
}