-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhtml-policy-loader.js
More file actions
50 lines (41 loc) · 1.29 KB
/
html-policy-loader.js
File metadata and controls
50 lines (41 loc) · 1.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
const fs = require('fs-extra')
const puppeteer = require('puppeteer')
const Mercury = require('@postlight/mercury-parser')
module.exports = class HTMLPolicyLoader {
static async start () {
this.browser = await puppeteer.launch()
}
static async stop () {
await this.browser.close()
}
static async process (url, path) {
try {
const page = await this.browser.newPage()
await page.goto(url, { waitUntil: 'networkidle2' })
// Scroll down google docs pages
let pages = await page.$$(`.kix-page`)
for (const page of pages) {
await page.hover()
}
// Get HTML after running javascript
const renderedHTML = await page.evaluate(async () => {
window.scrollTo(0, document.body.scrollHeight)
return {
text: document.body.innerText,
bodyHTML: document.body.innerHTML
}
})
// Parse HTML into markdown
let markdown = await Mercury.parse(url, {
html: renderedHTML.bodyHTML,
contentType: 'markdown'
})
fs.outputFileSync(`${path}.md`, markdown.content)
fs.outputFileSync(`${path}.txt`, renderedHTML.text)
// Save as a PDF
await page.pdf({ path: `${path}.pdf` })
} catch (err) {
console.log('HTMLPolicyLoader Error', err)
}
}
}