-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathcrawl
More file actions
executable file
·38 lines (36 loc) · 876 Bytes
/
crawl
File metadata and controls
executable file
·38 lines (36 loc) · 876 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env node
require('dotenv').config();
const argv = require('yargs')
.usage('Usage: $0 [options]')
.option('url', {
alias: 'u',
describe: 'The URL the crawler should enter the site from',
})
.option('resume', {
alias: 'r',
boolean: true,
describe: 'Resume crawler from existing queue',
})
.option('max-radius', {
alias: 'm',
number: true,
describe: 'The maximum radius from the entry URL to crawl',
default: Infinity,
})
.check(({ url, resume }) => {
if (url && resume) {
throw new Error('--url and --resume are mutually exclusive');
}
return true;
})
.help().argv;
const db = require('./db');
const crawl = require('./crawler');
(async () => {
await db.connect();
if (!argv.resume) {
await db.flush();
}
await crawl(argv.url, { maxRadius: argv.maxRadius });
db.close();
})();