forked from dontbesilent2025/extract-getnote-articles
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun.js
More file actions
101 lines (86 loc) · 2.65 KB
/
run.js
File metadata and controls
101 lines (86 loc) · 2.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
const { chromium } = require('playwright');
const fs = require('fs');
const path = require('path');
// 从URL或直接的ID中提取知识库ID
function extractTopicId(urlOrId) {
// 如果是URL,提取ID
const urlMatch = urlOrId.match(/subject\/([^\/\?]+)/);
if (urlMatch) {
return urlMatch[1];
}
// 否则假设就是ID
return urlOrId;
}
// 从URL中提取followName参数
function extractFollowName(url) {
try {
const urlObj = new URL(url);
const followName = urlObj.searchParams.get('followName');
if (followName) {
// URL解码
return decodeURIComponent(followName);
}
} catch (e) {
// 不是有效的URL
}
return null;
}
// 检查输入是否为完整URL
function isFullUrl(input) {
return input.startsWith('http://') || input.startsWith('https://');
}
// 获取不冲突的文件夹名
function getUniqueOutputDir(baseName) {
let outputDir = `./${baseName}`;
let counter = 2;
// 如果文件夹已存在,添加序号
while (fs.existsSync(outputDir)) {
outputDir = `./${baseName}_${counter}`;
counter++;
}
return outputDir;
}
async function main() {
const args = process.argv.slice(2);
if (args.length === 0) {
console.log('❌ 错误:请提供知识库URL或ID');
console.log('');
console.log('用法:');
console.log(' node run.js <URL或ID> [输出目录]');
console.log('');
console.log('示例:');
console.log(' node run.js https://www.biji.com/subject/ABC123/DEFAULT?followName=...');
console.log(' node run.js ABC123');
console.log(' node run.js ABC123 "自定义目录名"');
process.exit(1);
}
const urlOrId = args[0];
const topicId = extractTopicId(urlOrId);
// 确定输出目录名:优先使用用户指定的,其次使用followName,最后使用topicId
let outputDirName;
if (args[1]) {
outputDirName = args[1];
} else {
const followName = extractFollowName(urlOrId);
outputDirName = followName || topicId;
}
// 获取不冲突的输出目录
const outputDir = getUniqueOutputDir(outputDirName);
const finalDirName = path.basename(outputDir);
console.log('=== Get笔记文案提取工具 ===');
console.log(`输入: ${urlOrId}`);
console.log(`知识库ID: ${topicId}`);
console.log(`输出目录: ${finalDirName}`);
console.log('');
// 调用原始的 extract.js
// 如果是完整URL,传递完整URL;否则只传递ID
const { spawn } = require('child_process');
const extract = spawn('node', ['extract.js', urlOrId, outputDir, '0', '0'], {
stdio: 'inherit',
cwd: __dirname
});
extract.on('close', (code) => {
process.exit(code);
});
}
main().catch(console.error);