From b687b73a3a7b361234363ca260c027d1dcfccfd6 Mon Sep 17 00:00:00 2001 From: Takahiro Ebato Date: Sun, 1 Mar 2026 18:44:20 +0900 Subject: [PATCH 1/5] feat: add llms.txt and LLM-friendly markdown docs generation --- .gitignore | 4 + build/build-doc.js | 9 +++ build/build-llms.js | 189 ++++++++++++++++++++++++++++++++++++++++++++ package-lock.json | 47 +++++++++++ package.json | 2 + 5 files changed, 251 insertions(+) create mode 100644 build/build-llms.js diff --git a/.gitignore b/.gitignore index 5493e498a..869aa4c1c 100644 --- a/.gitignore +++ b/.gitignore @@ -181,6 +181,10 @@ slides/webgl /public/zh/coding-standard-content.html /public/en/coding-standard-content.html /public/*.html +/public/en/llms.txt +/public/en/llms-documents +/public/zh/llms.txt +/public/zh/llms-documents # Editor temporal files diff --git a/build/build-doc.js b/build/build-doc.js index a7d49f9ca..31854e2df 100644 --- a/build/build-doc.js +++ b/build/build-doc.js @@ -184,6 +184,15 @@ async function run() { console.log('Error happens when copying to dest folders.'); console.log(e); } + + try { + const buildLlms = require('./build-llms'); + buildLlms(); + } + catch (e) { + console.log('Error happens when building llms documents.'); + console.log(e); + } } console.log('All done.'); diff --git a/build/build-llms.js b/build/build-llms.js new file mode 100644 index 000000000..0d79296bd --- /dev/null +++ b/build/build-llms.js @@ -0,0 +1,189 @@ +/** + * Converts built part JSONs (HTML desc) to Markdown using turndown, + * and generates llms.txt + individual .md files. + * + * Mechanically converts documents/*-parts/*.json to llms-documents/*-parts/*.md. + * Type information is extracted from documents/*.json (full schema) via traverse. + * + * Prerequisites: JSON must be built first (node build.js --env dev) + * Usage: node build/build-llms.js --env dev + */ +const fs = require('fs'); +const fse = require('fs-extra'); +const path = require('path'); +const globby = require('globby'); +const TurndownService = require('turndown'); +const {gfm} = require('turndown-plugin-gfm'); +const {traverse} = require('../tool/schemaHelper'); +const {readConfigEnvFile} = require('./helper'); + +// --- Constants --- + +const LANGUAGES = ['en', 'zh']; +const OUTPUT_DIR_NAME = 'llms-documents'; +const MAX_HEADING_DEPTH = 6; + +const CATEGORY_LABELS = { + en: {'option-parts': 'Option', 'option-gl-parts': 'Option GL', 'api-parts': 'API', 'tutorial-parts': 'Tutorial'}, + zh: {'option-parts': '配置项 (Option)', 'option-gl-parts': 'Option GL', 'api-parts': 'API', 'tutorial-parts': '教程 (Tutorial)'} +}; + +const LLMS_TXT_HEADER = [ + '# Apache ECharts Documentation', + '', + '> Apache ECharts is a free, powerful charting and visualization library offering easy ways to add intuitive, interactive, and highly customizable charts to your commercial products.', + '' +].join('\n'); + +// --- Config --- + +const argv = require('yargs').argv; +const envType = (argv.dev != null || argv.debug != null || argv.env === 'dev') ? 'dev' : argv.env; +if (!envType) throw new Error('--env MUST be specified'); +const config = readConfigEnvFile(envType); + +// --- Turndown --- + +const td = new TurndownService({headingStyle: 'atx', codeBlockStyle: 'fenced'}); +td.use(gfm); +td.addRule('iframe', {filter: 'iframe', replacement: () => ''}); + +function htmlToMd(html) { + return html ? td.turndown(html).replace(/\n{3,}/g, '\n\n').trim() : ''; +} + +// --- Extract type info from full schema JSON --- + +function buildTypeMap(schemaJsonPath, docName) { + if (!fs.existsSync(schemaJsonPath)) return {}; + const schema = JSON.parse(fs.readFileSync(schemaJsonPath, 'utf-8')); + const typeMap = {}; + traverse(schema, docName, (schemaPath, node) => { + if (node.type || node.default != null) { + typeMap[schemaPath] = { + type: node.type ? (Array.isArray(node.type) ? node.type.join('|') : node.type) : null, + default: node.default != null ? String(node.default) : null + }; + } + }); + return typeMap; +} + +// --- Convert part JSON to Markdown --- + +function formatPropertyEntry(key, val, typeInfo) { + const heading = '#'.repeat(Math.min(key.split('.').length + 1, MAX_HEADING_DEPTH)) + ' ' + key; + const meta = [ + typeInfo && typeInfo.type && `- **Type**: \`${typeInfo.type}\``, + typeInfo && typeInfo.default != null && `- **Default**: \`${typeInfo.default}\`` + ].filter(Boolean); + const body = val.desc ? htmlToMd(val.desc) : ''; + return [heading, ...meta, ...(body ? ['', body] : []), '']; +} + +function jsonToMd(data, typeMap, partKey) { + const lines = Object.entries(data).flatMap(([key, val]) => { + const fullKey = partKey ? `${partKey}.${key}` : key; + return formatPropertyEntry(key, val, typeMap[fullKey]); + }); + return lines.join('\n').replace(/\n{3,}/g, '\n\n').trimEnd() + '\n'; +} + +// --- File output --- + +function writeFile(dir, name, content, category) { + const fullPath = path.resolve(dir, name); + fse.ensureDirSync(path.dirname(fullPath)); + fs.writeFileSync(fullPath, content, 'utf-8'); + return {name, path: fullPath, category}; +} + +// --- Process a single *-parts/ directory --- + +function processPartsDir(partsDir, outDir, typeMap) { + const dirName = path.basename(partsDir); + + const jsonFiles = globby.sync(path.join(partsDir, '*.json')) + .filter(f => !path.basename(f).includes('-outline')); + + return jsonFiles.map(f => { + const baseName = path.basename(f, '.json'); + const data = JSON.parse(fs.readFileSync(f, 'utf-8')); + const content = `# ${baseName}\n\n` + jsonToMd(data, typeMap, baseName); + return writeFile(outDir, `${dirName}/${baseName}.md`, content, dirName); + }); +} + +// --- Generate docs for a single language --- + +function generateDocsForLang(lang) { + const docsDir = path.resolve(config.releaseDestDir, lang, 'documents'); + const outDir = path.resolve(config.releaseDestDir, lang, OUTPUT_DIR_NAME); + fse.ensureDirSync(outDir); + + // Step 1: Build a type map from full schema JSONs (option.json, api.json, etc.) + // by traversing the nested schema tree to collect type/default for each + // property path (e.g. "option.title.show" -> {type: "boolean", default: "true"}). + const schemaFiles = globby.sync(path.join(docsDir, '*.json')); + const typeMap = schemaFiles.reduce((map, f) => { + const docName = path.basename(f, '.json'); + return {...map, ...buildTypeMap(f, docName)}; + }, {}); + + // Step 2: For each *-parts/ directory, read part JSONs (e.g. option.title.json), + // convert each property's HTML desc field to Markdown via turndown, + // attach type/default from the type map, and write as .md files. + const partsDirs = globby.sync(path.join(docsDir, '*-parts'), {onlyDirectories: true}); + const files = partsDirs + .flatMap(dir => processPartsDir(dir, outDir, typeMap)) + .sort((a, b) => a.name.localeCompare(b.name)); + + console.log(`Generated ${files.length} docs for ${lang}`); + return files; +} + +// --- llms.txt --- + +function groupByCategory(files) { + return files.reduce((groups, f) => ({ + ...groups, + [f.category]: [...(groups[f.category] || []), f] + }), {}); +} + +function writeLlmsTxt(lang, files) { + const langDir = path.resolve(config.releaseDestDir, lang); + fse.ensureDirSync(langDir); + const labels = CATEGORY_LABELS[lang] || CATEGORY_LABELS.en; + const groups = groupByCategory(files); + + const sections = Object.keys(groups) + .sort() + .flatMap(cat => [ + `## ${labels[cat] || cat}`, '', + ...groups[cat].map(f => + `- [${path.basename(f.name, '.md')}](${OUTPUT_DIR_NAME}/${f.name})` + ), + '' + ]); + + const content = [LLMS_TXT_HEADER, ...sections].join('\n').trimEnd() + '\n'; + fs.writeFileSync(path.join(langDir, 'llms.txt'), content, 'utf-8'); + console.log(`Generated ${lang}/llms.txt`); +} + +// --- Main --- + +function buildLlms() { + console.log('Building llms documents ...'); + for (const lang of LANGUAGES) { + // Step 1-2: Generate individual .md files from part JSONs. + const files = generateDocsForLang(lang); + // Step 3: Generate llms.txt index listing all .md files. + if (files.length > 0) writeLlmsTxt(lang, files); + } + console.log('Build llms documents done.'); +} + +module.exports = buildLlms; +if (require.main === module) buildLlms(); diff --git a/package-lock.json b/package-lock.json index 00636955e..754f99de7 100644 --- a/package-lock.json +++ b/package-lock.json @@ -42,6 +42,8 @@ "open": "^8.4.0", "sass.js": "^0.11.1", "sassjs-loader": "^2.0.0", + "turndown": "^7.2.2", + "turndown-plugin-gfm": "^1.0.2", "vue-loader": "^15.9.2", "vue-template-compiler": "^2.6.11", "webpack": "^5.105.0", @@ -1546,6 +1548,13 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@mixmark-io/domino": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@mixmark-io/domino/-/domino-2.2.0.tgz", + "integrity": "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==", + "dev": true, + "license": "BSD-2-Clause" + }, "node_modules/@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", @@ -5366,6 +5375,23 @@ "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", "dev": true }, + "node_modules/turndown": { + "version": "7.2.2", + "resolved": "https://registry.npmjs.org/turndown/-/turndown-7.2.2.tgz", + "integrity": "sha512-1F7db8BiExOKxjSMU2b7if62D/XOyQyZbPKq/nUwopfgnHlqXHqQ0lvfUTeUIr1lZJzOPFn43dODyMSIfvWRKQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@mixmark-io/domino": "^2.2.0" + } + }, + "node_modules/turndown-plugin-gfm": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/turndown-plugin-gfm/-/turndown-plugin-gfm-1.0.2.tgz", + "integrity": "sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg==", + "dev": true, + "license": "MIT" + }, "node_modules/type-fest": { "version": "4.41.0", "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.41.0.tgz", @@ -7142,6 +7168,12 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "@mixmark-io/domino": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@mixmark-io/domino/-/domino-2.2.0.tgz", + "integrity": "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==", + "dev": true + }, "@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", @@ -9910,6 +9942,21 @@ "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", "dev": true }, + "turndown": { + "version": "7.2.2", + "resolved": "https://registry.npmjs.org/turndown/-/turndown-7.2.2.tgz", + "integrity": "sha512-1F7db8BiExOKxjSMU2b7if62D/XOyQyZbPKq/nUwopfgnHlqXHqQ0lvfUTeUIr1lZJzOPFn43dODyMSIfvWRKQ==", + "dev": true, + "requires": { + "@mixmark-io/domino": "^2.2.0" + } + }, + "turndown-plugin-gfm": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/turndown-plugin-gfm/-/turndown-plugin-gfm-1.0.2.tgz", + "integrity": "sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg==", + "dev": true + }, "type-fest": { "version": "4.41.0", "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-4.41.0.tgz", diff --git a/package.json b/package.json index 3fca43dc9..af4325dc6 100644 --- a/package.json +++ b/package.json @@ -38,6 +38,8 @@ "open": "^8.4.0", "sass.js": "^0.11.1", "sassjs-loader": "^2.0.0", + "turndown": "^7.2.2", + "turndown-plugin-gfm": "^1.0.2", "vue-loader": "^15.9.2", "vue-template-compiler": "^2.6.11", "webpack": "^5.105.0", From 3bf7a840dac32bcad0b4933b923199cc892b88b5 Mon Sep 17 00:00:00 2001 From: Takahiro Ebato Date: Sun, 1 Mar 2026 23:24:01 +0900 Subject: [PATCH 2/5] add link resolution --- build/build-llms.js | 72 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 6 deletions(-) diff --git a/build/build-llms.js b/build/build-llms.js index 0d79296bd..3fa2e0e50 100644 --- a/build/build-llms.js +++ b/build/build-llms.js @@ -69,22 +69,71 @@ function buildTypeMap(schemaJsonPath, docName) { return typeMap; } +// --- Resolve links in HTML --- +// Best-effort rewriting of and in HTML +// so that turndown produces markdown links pointing to the correct .md files. +// Some source links have non-standard formats (e.g. missing "#", no dot separator) +// that cannot be resolved; these are left as-is or linked to the orphan file. + +function tryResolveFileKey(linkPath, fileKeys) { + const [seg, ...rest] = linkPath.split('.'); + const frag = rest.length > 0 ? rest.join('.') : null; + const segL = seg.toLowerCase(); + const keysArr = [...fileKeys]; + + const key = fileKeys.has(seg) + ? seg + : keysArr.find(k => k.toLowerCase() === segL) + ?? keysArr.find(k => { + const kl = k.toLowerCase(); + return kl === segL + 's' || kl + 's' === segL; + }) + ?? null; + + return key ? {key, frag} : null; +} + +function tryResolveHtmlLinks(html, strippedKeys, docPrefix, hasOrphanFile) { + // Same-category links: href="#property.path" -> href="docPrefix.fileKey.md#fragment" + const resolved = html.replace(/href="#([^"]+)"/g, (match, lp) => { + const r = tryResolveFileKey(lp, strippedKeys); + if (!r) { + if (hasOrphanFile) return `href="${docPrefix}.md#${lp}"`; + return match; + } + return `href="${docPrefix}.${r.key}.md${r.frag ? '#' + r.frag : ''}"`; + }); + + // Cross-category links: href="api.html#echarts.init" -> href="../api-parts/api.echarts.md#init" + // Tutorial is a single file, so href="tutorial.html#X" -> href="../tutorial-parts/tutorial.md#X" + return resolved.replace( + /href="(option|api|tutorial)\.html#([^"]+)"/g, + (_, docType, fragment) => { + if (docType === 'tutorial') { + return `href="../tutorial-parts/tutorial.md#${fragment}"`; + } + const {key, frag} = tryResolveFileKey(fragment, new Set([fragment.split('.')[0]])); + return `href="../${docType}-parts/${docType}.${key}.md${frag ? '#' + frag : ''}"`; + } + ); +} + // --- Convert part JSON to Markdown --- -function formatPropertyEntry(key, val, typeInfo) { +function formatPropertyEntry(key, val, typeInfo, linkResolver) { const heading = '#'.repeat(Math.min(key.split('.').length + 1, MAX_HEADING_DEPTH)) + ' ' + key; const meta = [ typeInfo && typeInfo.type && `- **Type**: \`${typeInfo.type}\``, typeInfo && typeInfo.default != null && `- **Default**: \`${typeInfo.default}\`` ].filter(Boolean); - const body = val.desc ? htmlToMd(val.desc) : ''; + const body = val.desc ? htmlToMd(linkResolver(val.desc)) : ''; return [heading, ...meta, ...(body ? ['', body] : []), '']; } -function jsonToMd(data, typeMap, partKey) { +function jsonToMd(data, typeMap, partKey, linkResolver) { const lines = Object.entries(data).flatMap(([key, val]) => { const fullKey = partKey ? `${partKey}.${key}` : key; - return formatPropertyEntry(key, val, typeMap[fullKey]); + return formatPropertyEntry(key, val, typeMap[fullKey], linkResolver); }); return lines.join('\n').replace(/\n{3,}/g, '\n\n').trimEnd() + '\n'; } @@ -102,14 +151,25 @@ function writeFile(dir, name, content, category) { function processPartsDir(partsDir, outDir, typeMap) { const dirName = path.basename(partsDir); + const docPrefix = dirName.replace(/-parts$/, ''); const jsonFiles = globby.sync(path.join(partsDir, '*.json')) .filter(f => !path.basename(f).includes('-outline')); + // Collect file keys for link resolution (e.g. "option.title", "option.series-bar") + const fileKeys = new Set(jsonFiles.map(f => path.basename(f, '.json'))); + const strippedKeys = new Set([...fileKeys].map(k => + k.startsWith(docPrefix + '.') ? k.slice(docPrefix.length + 1) : k + )); + const hasOrphanFile = fileKeys.has(docPrefix); + + // Create a link resolver that rewrites HTML hrefs before turndown + const linkResolver = (html) => tryResolveHtmlLinks(html, strippedKeys, docPrefix, hasOrphanFile); + return jsonFiles.map(f => { const baseName = path.basename(f, '.json'); const data = JSON.parse(fs.readFileSync(f, 'utf-8')); - const content = `# ${baseName}\n\n` + jsonToMd(data, typeMap, baseName); + const content = `# ${baseName}\n\n` + jsonToMd(data, typeMap, baseName, linkResolver); return writeFile(outDir, `${dirName}/${baseName}.md`, content, dirName); }); } @@ -131,7 +191,7 @@ function generateDocsForLang(lang) { }, {}); // Step 2: For each *-parts/ directory, read part JSONs (e.g. option.title.json), - // convert each property's HTML desc field to Markdown via turndown, + // resolve internal links in HTML, convert desc to Markdown via turndown, // attach type/default from the type map, and write as .md files. const partsDirs = globby.sync(path.join(docsDir, '*-parts'), {onlyDirectories: true}); const files = partsDirs From 14bfa8b789d609e8c381e43c68d91b483aad994c Mon Sep 17 00:00:00 2001 From: Takahiro Ebato Date: Sat, 4 Apr 2026 13:28:29 +0900 Subject: [PATCH 3/5] refactor build-llms.js --- build/build-llms.js | 271 ++++++++++++++++++++++++++++++-------------- 1 file changed, 186 insertions(+), 85 deletions(-) diff --git a/build/build-llms.js b/build/build-llms.js index 3fa2e0e50..25d0b312b 100644 --- a/build/build-llms.js +++ b/build/build-llms.js @@ -2,7 +2,9 @@ * Converts built part JSONs (HTML desc) to Markdown using turndown, * and generates llms.txt + individual .md files. * - * Mechanically converts documents/*-parts/*.json to llms-documents/*-parts/*.md. + * Mechanically converts documents/*-parts/*.json to llms-documents/ (.md files). + * Root files (e.g. option.md) are placed at llms-documents/, while part files + * (e.g. option.title.md) are placed at llms-documents/*-parts/. * Type information is extracted from documents/*.json (full schema) via traverse. * * Prerequisites: JSON must be built first (node build.js --env dev) @@ -23,7 +25,7 @@ const LANGUAGES = ['en', 'zh']; const OUTPUT_DIR_NAME = 'llms-documents'; const MAX_HEADING_DEPTH = 6; -const CATEGORY_LABELS = { +const SECTION_LABELS = { en: {'option-parts': 'Option', 'option-gl-parts': 'Option GL', 'api-parts': 'API', 'tutorial-parts': 'Tutorial'}, zh: {'option-parts': '配置项 (Option)', 'option-gl-parts': 'Option GL', 'api-parts': 'API', 'tutorial-parts': '教程 (Tutorial)'} }; @@ -54,6 +56,15 @@ function htmlToMd(html) { // --- Extract type info from full schema JSON --- +/** + * Extract type and default value info from a full schema JSON by traversing + * the nested schema tree. + * + * @param {string} schemaJsonPath - path to schema JSON (e.g. "documents/option.json") + * @param {string} docName - e.g. "option", "api" + * @returns {Object} + * e.g. { "option.title.show": {type: "boolean", default: "true"} } + */ function buildTypeMap(schemaJsonPath, docName) { if (!fs.existsSync(schemaJsonPath)) return {}; const schema = JSON.parse(fs.readFileSync(schemaJsonPath, 'utf-8')); @@ -73,104 +84,185 @@ function buildTypeMap(schemaJsonPath, docName) { // Best-effort rewriting of and in HTML // so that turndown produces markdown links pointing to the correct .md files. // Some source links have non-standard formats (e.g. missing "#", no dot separator) -// that cannot be resolved; these are left as-is or linked to the orphan file. +// that cannot be resolved; these are left as-is or linked to the root file. -function tryResolveFileKey(linkPath, fileKeys) { +/** + * Split linkPath into a part key (first segment) and fragment (rest), matching + * the key against partKeys with case-insensitive and singular/plural fallback. + * + * @param {string} linkPath - e.g. "title.show", "echarts.init" + * @param {Set} partKeys - e.g. Set{'title','series-bar','geo',...} + * @returns {{key: string, frag: string|null}|null} + * e.g. "title.show" -> {key: "title", frag: "show"} + * "angleAxis.axisLabel.interval" -> {key: "angleAxis", frag: "axisLabel.interval"} + * "geo" -> {key: "geo", frag: null} + * "unknown" -> null + */ +function tryResolvePartKey(linkPath, partKeys) { const [seg, ...rest] = linkPath.split('.'); const frag = rest.length > 0 ? rest.join('.') : null; + + if (partKeys.has(seg)) return {key: seg, frag}; + + // Fallback: case-insensitive and singular/plural matching const segL = seg.toLowerCase(); - const keysArr = [...fileKeys]; - - const key = fileKeys.has(seg) - ? seg - : keysArr.find(k => k.toLowerCase() === segL) - ?? keysArr.find(k => { - const kl = k.toLowerCase(); - return kl === segL + 's' || kl + 's' === segL; - }) - ?? null; - - return key ? {key, frag} : null; + for (const k of partKeys) { + if (k.toLowerCase() === segL) return {key: k, frag}; + } + for (const k of partKeys) { + const kl = k.toLowerCase(); + if (kl === segL + 's' || kl + 's' === segL) return {key: k, frag}; + } + return null; } -function tryResolveHtmlLinks(html, strippedKeys, docPrefix, hasOrphanFile) { - // Same-category links: href="#property.path" -> href="docPrefix.fileKey.md#fragment" - const resolved = html.replace(/href="#([^"]+)"/g, (match, lp) => { - const r = tryResolveFileKey(lp, strippedKeys); - if (!r) { - if (hasOrphanFile) return `href="${docPrefix}.md#${lp}"`; - return match; - } - return `href="${docPrefix}.${r.key}.md${r.frag ? '#' + r.frag : ''}"`; - }); +/** + * Resolve a link path to an href pointing to the correct .md file. + * If partKeys contains a match, link to the individual part file; + * otherwise fall back to the root file. + * + * @param {string} linkPath - e.g. "title.show", "visualMap" + * @param {Set} partKeys - keys of individual part files + * @param {string} pathPrefix - path prefix for part files + * same-doc: "option" -> "option.title.md" + * cross-doc: "../api-parts/api" -> "../api-parts/api.echarts.md" + * @param {string|null} rootPath - path prefix for root file fallback + * same-doc: "../option" -> "../option.md#visualMap" + * cross-doc: "../api" -> "../api.md#events" + * @returns {string|null} resolved href attribute string, or null + */ +function resolveLink(linkPath, partKeys, pathPrefix, rootPath) { + const resolved = tryResolvePartKey(linkPath, partKeys); + if (!resolved) { + if (rootPath) return `href="${rootPath}.md#${linkPath}"`; + return null; + } + return `href="${pathPrefix}.${resolved.key}.md${resolved.frag ? '#' + resolved.frag : ''}"`; +} + +/** + * Rewrite internal links in HTML so that turndown produces correct .md links. + * Handles two patterns: + * 1. Same-doc: href="#title.show" -> href="option.title.md#show" + * 2. Cross-doc: href="api.html#echarts.init" -> href="../api-parts/api.echarts.md#init" + * Unresolvable links are left as-is or fall back to the root file. + * + * @param {string} html - HTML string containing links + * @param {Object>} partKeysByDoc - part keys for all docs + * @param {string} docName - current doc name (e.g. "option") + * @returns {string} HTML with rewritten href attributes + */ +function tryResolveHtmlLinks(html, partKeysByDoc, docName) { + const partKeys = partKeysByDoc[docName]; - // Cross-category links: href="api.html#echarts.init" -> href="../api-parts/api.echarts.md#init" - // Tutorial is a single file, so href="tutorial.html#X" -> href="../tutorial-parts/tutorial.md#X" + // Same-doc links: href="#title.show" -> href="option.title.md#show" + const resolved = html.replace(/href="#([^"]+)"/g, (match, linkPath) => + (partKeys && resolveLink(linkPath, partKeys, docName, `../${docName}`)) || match + ); + + // Cross-doc links: href="api.html#echarts.init" -> href="../api-parts/api.echarts.md#init" return resolved.replace( - /href="(option|api|tutorial)\.html#([^"]+)"/g, - (_, docType, fragment) => { - if (docType === 'tutorial') { - return `href="../tutorial-parts/tutorial.md#${fragment}"`; - } - const {key, frag} = tryResolveFileKey(fragment, new Set([fragment.split('.')[0]])); - return `href="../${docType}-parts/${docType}.${key}.md${frag ? '#' + frag : ''}"`; + /href="(option-gl|option|api|tutorial)\.html#([^"]+)"/g, + (match, targetDoc, fragment) => { + const keys = partKeysByDoc[targetDoc]; + if (!keys) return match; + return resolveLink(fragment, keys, `../${targetDoc}-parts/${targetDoc}`, `../${targetDoc}`) || match; } ); } // --- Convert part JSON to Markdown --- -function formatPropertyEntry(key, val, typeInfo, linkResolver) { +function formatPropertyEntry(key, entry, typeInfo, linkResolver) { const heading = '#'.repeat(Math.min(key.split('.').length + 1, MAX_HEADING_DEPTH)) + ' ' + key; const meta = [ typeInfo && typeInfo.type && `- **Type**: \`${typeInfo.type}\``, typeInfo && typeInfo.default != null && `- **Default**: \`${typeInfo.default}\`` ].filter(Boolean); - const body = val.desc ? htmlToMd(linkResolver(val.desc)) : ''; + const body = entry.desc ? htmlToMd(linkResolver(entry.desc)) : ''; return [heading, ...meta, ...(body ? ['', body] : []), '']; } -function jsonToMd(data, typeMap, partKey, linkResolver) { - const lines = Object.entries(data).flatMap(([key, val]) => { - const fullKey = partKey ? `${partKey}.${key}` : key; - return formatPropertyEntry(key, val, typeMap[fullKey], linkResolver); +function jsonToMd(data, typeMap, baseName, linkResolver) { + const lines = Object.entries(data).flatMap(([key, entry]) => { + const fullKey = baseName ? `${baseName}.${key}` : key; + return formatPropertyEntry(key, entry, typeMap[fullKey], linkResolver); }); return lines.join('\n').replace(/\n{3,}/g, '\n\n').trimEnd() + '\n'; } -// --- File output --- +// --- Collect part JSON files --- -function writeFile(dir, name, content, category) { - const fullPath = path.resolve(dir, name); - fse.ensureDirSync(path.dirname(fullPath)); - fs.writeFileSync(fullPath, content, 'utf-8'); - return {name, path: fullPath, category}; +/** + * Collect part JSON files for each *-parts/ directory, excluding outline files. + * + * @param {string[]} partsDirs - paths to *-parts/ directories + * @returns {Object} dir path -> JSON file paths + */ +function collectPartJsonFiles(partsDirs) { + const jsonFilesByDir = {}; + for (const dir of partsDirs) { + jsonFilesByDir[dir] = globby.sync(path.join(dir, '*.json')) + .filter(filePath => !path.basename(filePath).includes('-outline')); + } + return jsonFilesByDir; } -// --- Process a single *-parts/ directory --- +// --- Collect file keys for link resolution across docs --- -function processPartsDir(partsDir, outDir, typeMap) { - const dirName = path.basename(partsDir); - const docPrefix = dirName.replace(/-parts$/, ''); +/** + * Build a map of doc name -> Set of part keys for all *-parts/ directories. + * Part keys are file names with the doc name stripped (e.g. "option.title" -> "title"). + * Root files (e.g. "option.json") are excluded since they are not individual part files. + * + * @param {string[]} partsDirs - paths to *-parts/ directories + * @param {Object} jsonFilesByDir - pre-collected JSON file paths + * @returns {Object>} partKeysByDoc - e.g. { option: Set{'title','geo',...}, api: Set{'echarts',...} } + */ +function buildPartKeysByDoc(partsDirs, jsonFilesByDir) { + const partKeysByDoc = {}; + for (const dir of partsDirs) { + const docName = path.basename(dir).replace(/-parts$/, ''); + partKeysByDoc[docName] = new Set( + jsonFilesByDir[dir].map(filePath => path.basename(filePath, '.json')) + .filter(k => k !== docName) + .map(k => k.startsWith(docName + '.') ? k.slice(docName.length + 1) : k) + ); + } + return partKeysByDoc; +} - const jsonFiles = globby.sync(path.join(partsDir, '*.json')) - .filter(f => !path.basename(f).includes('-outline')); +// --- Process a single *-parts/ directory --- - // Collect file keys for link resolution (e.g. "option.title", "option.series-bar") - const fileKeys = new Set(jsonFiles.map(f => path.basename(f, '.json'))); - const strippedKeys = new Set([...fileKeys].map(k => - k.startsWith(docPrefix + '.') ? k.slice(docPrefix.length + 1) : k - )); - const hasOrphanFile = fileKeys.has(docPrefix); +/** + * Convert part JSON files in a single *-parts/ directory to Markdown. + * Each JSON file becomes a .md file with resolved links and type info. + * Root files (e.g. option.json) are output to the parent directory. + * + * @param {string} partsDir - path to a *-parts/ directory (e.g. "documents/option-parts") + * @param {string} outDir - output base directory (e.g. "llms-documents") + * @param {Object} typeMap - property path -> {type, default} map + * @param {Object>} partKeysByDoc - part keys for all docs + * @param {string[]} jsonFiles - pre-collected JSON file paths for this directory + * @returns {{name: string, path: string, section: string}[]} output file descriptors + */ +function processPartsDir(partsDir, outDir, typeMap, partKeysByDoc, jsonFiles) { + const dirName = path.basename(partsDir); + const docName = dirName.replace(/-parts$/, ''); // Create a link resolver that rewrites HTML hrefs before turndown - const linkResolver = (html) => tryResolveHtmlLinks(html, strippedKeys, docPrefix, hasOrphanFile); + const linkResolver = (html) => tryResolveHtmlLinks(html, partKeysByDoc, docName); - return jsonFiles.map(f => { - const baseName = path.basename(f, '.json'); - const data = JSON.parse(fs.readFileSync(f, 'utf-8')); + return jsonFiles.map(filePath => { + const baseName = path.basename(filePath, '.json'); + const data = JSON.parse(fs.readFileSync(filePath, 'utf-8')); const content = `# ${baseName}\n\n` + jsonToMd(data, typeMap, baseName, linkResolver); - return writeFile(outDir, `${dirName}/${baseName}.md`, content, dirName); + const isRoot = baseName === docName; + const fileName = isRoot ? `${baseName}.md` : `${dirName}/${baseName}.md`; + const fullPath = path.resolve(outDir, fileName); + fse.ensureDirSync(path.dirname(fullPath)); + fs.writeFileSync(fullPath, content, 'utf-8'); + return {name: fileName, path: fullPath, section: dirName}; }); } @@ -185,17 +277,22 @@ function generateDocsForLang(lang) { // by traversing the nested schema tree to collect type/default for each // property path (e.g. "option.title.show" -> {type: "boolean", default: "true"}). const schemaFiles = globby.sync(path.join(docsDir, '*.json')); - const typeMap = schemaFiles.reduce((map, f) => { - const docName = path.basename(f, '.json'); - return {...map, ...buildTypeMap(f, docName)}; - }, {}); + const typeMap = {}; + for (const filePath of schemaFiles) { + Object.assign(typeMap, buildTypeMap(filePath, path.basename(filePath, '.json'))); + } - // Step 2: For each *-parts/ directory, read part JSONs (e.g. option.title.json), + // Step 2: Collect part JSON files and file keys for all *-parts/ directories upfront, + // so that cross-doc links can be resolved against actual files. + const partsDirs = globby.sync(path.join(docsDir, '*-parts'), {onlyDirectories: true}); + const jsonFilesByDir = collectPartJsonFiles(partsDirs); + const partKeysByDoc = buildPartKeysByDoc(partsDirs, jsonFilesByDir); + + // Step 3: For each *-parts/ directory, read part JSONs (e.g. option.title.json), // resolve internal links in HTML, convert desc to Markdown via turndown, // attach type/default from the type map, and write as .md files. - const partsDirs = globby.sync(path.join(docsDir, '*-parts'), {onlyDirectories: true}); const files = partsDirs - .flatMap(dir => processPartsDir(dir, outDir, typeMap)) + .flatMap(dir => processPartsDir(dir, outDir, typeMap, partKeysByDoc, jsonFilesByDir[dir])) .sort((a, b) => a.name.localeCompare(b.name)); console.log(`Generated ${files.length} docs for ${lang}`); @@ -204,26 +301,30 @@ function generateDocsForLang(lang) { // --- llms.txt --- -function groupByCategory(files) { - return files.reduce((groups, f) => ({ - ...groups, - [f.category]: [...(groups[f.category] || []), f] - }), {}); -} - function writeLlmsTxt(lang, files) { const langDir = path.resolve(config.releaseDestDir, lang); fse.ensureDirSync(langDir); - const labels = CATEGORY_LABELS[lang] || CATEGORY_LABELS.en; - const groups = groupByCategory(files); + const labels = SECTION_LABELS[lang] || SECTION_LABELS.en; + const groups = {}; + for (const file of files) { + if (!groups[file.section]) groups[file.section] = []; + groups[file.section].push(file); + } const sections = Object.keys(groups) .sort() - .flatMap(cat => [ - `## ${labels[cat] || cat}`, '', - ...groups[cat].map(f => - `- [${path.basename(f.name, '.md')}](${OUTPUT_DIR_NAME}/${f.name})` - ), + .flatMap(sectionKey => [ + `## ${labels[sectionKey] || sectionKey}`, '', + ...groups[sectionKey] + .sort((a, b) => { + const aIsRoot = !a.name.includes('/'); + const bIsRoot = !b.name.includes('/'); + if (aIsRoot !== bIsRoot) return aIsRoot ? -1 : 1; + return a.name.localeCompare(b.name); + }) + .map(file => + `- [${path.basename(file.name, '.md')}](${OUTPUT_DIR_NAME}/${file.name})` + ), '' ]); From 97349943def5a8d33ae86a9237dc584a66df871d Mon Sep 17 00:00:00 2001 From: Takahiro Ebato Date: Sat, 4 Apr 2026 14:41:09 +0900 Subject: [PATCH 4/5] improve link resolution, refactor build-llms.js --- build/build-llms.js | 60 ++++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/build/build-llms.js b/build/build-llms.js index 25d0b312b..871ee280a 100644 --- a/build/build-llms.js +++ b/build/build-llms.js @@ -73,7 +73,9 @@ function buildTypeMap(schemaJsonPath, docName) { if (node.type || node.default != null) { typeMap[schemaPath] = { type: node.type ? (Array.isArray(node.type) ? node.type.join('|') : node.type) : null, - default: node.default != null ? String(node.default) : null + default: node.default != null + ? (typeof node.default === 'object' ? JSON.stringify(node.default) : String(node.default)) + : null }; } }); @@ -124,51 +126,67 @@ function tryResolvePartKey(linkPath, partKeys) { * @param {string} linkPath - e.g. "title.show", "visualMap" * @param {Set} partKeys - keys of individual part files * @param {string} pathPrefix - path prefix for part files - * same-doc: "option" -> "option.title.md" - * cross-doc: "../api-parts/api" -> "../api-parts/api.echarts.md" - * @param {string|null} rootPath - path prefix for root file fallback - * same-doc: "../option" -> "../option.md#visualMap" + * from part: "option" -> "option.title.md" + * from root: "option-parts/option" -> "option-parts/option.title.md" + * cross-doc: "../api-parts/api" -> "../api-parts/api.echarts.md" + * @param {string} rootPath - path prefix for root file fallback + * from part: "../option" -> "../option.md#visualMap" + * from root: "option" -> "option.md#visualMap" * cross-doc: "../api" -> "../api.md#events" - * @returns {string|null} resolved href attribute string, or null + * @returns {string} resolved href attribute string */ function resolveLink(linkPath, partKeys, pathPrefix, rootPath) { const resolved = tryResolvePartKey(linkPath, partKeys); if (!resolved) { - if (rootPath) return `href="${rootPath}.md#${linkPath}"`; - return null; + return `href="${rootPath}.md#${linkPath}"`; } return `href="${pathPrefix}.${resolved.key}.md${resolved.frag ? '#' + resolved.frag : ''}"`; } /** - * Rewrite internal links in HTML so that turndown produces correct .md links. - * Handles two patterns: - * 1. Same-doc: href="#title.show" -> href="option.title.md#show" - * 2. Cross-doc: href="api.html#echarts.init" -> href="../api-parts/api.echarts.md#init" - * Unresolvable links are left as-is or fall back to the root file. + * Rewrite internal links and image paths in HTML before turndown conversion. + * Handles three patterns: + * 1. Same-doc: href="#title.show" -> href="option.title.md#show" + * 2. Cross-doc: href="api.html#echarts.init" -> href="../api-parts/api.echarts.md#init" + * 3. Images: src="documents/asset/img/..." -> src="../../documents/asset/img/..." + * Unresolvable links fall back to the root file. * * @param {string} html - HTML string containing links * @param {Object>} partKeysByDoc - part keys for all docs * @param {string} docName - current doc name (e.g. "option") - * @returns {string} HTML with rewritten href attributes + * @param {boolean} isRoot - whether the current file is a root file + * @returns {string} HTML with rewritten href attributes and image paths */ -function tryResolveHtmlLinks(html, partKeysByDoc, docName) { +function tryResolveHtmlLinks(html, partKeysByDoc, docName, isRoot) { const partKeys = partKeysByDoc[docName]; + // Path prefixes differ depending on whether current file is root or part: + // root (llms-documents/option.md) -> part: "option-parts/option", root: "option" + // part (llms-documents/option-parts/option.*.md) -> part: "option", root: "../option" + const sameDocPartPrefix = isRoot ? `${docName}-parts/${docName}` : docName; + const sameDocRootPath = isRoot ? docName : `../${docName}`; + const crossDocPrefix = isRoot ? '' : '../'; // Same-doc links: href="#title.show" -> href="option.title.md#show" const resolved = html.replace(/href="#([^"]+)"/g, (match, linkPath) => - (partKeys && resolveLink(linkPath, partKeys, docName, `../${docName}`)) || match + partKeys ? resolveLink(linkPath, partKeys, sameDocPartPrefix, sameDocRootPath) : match ); // Cross-doc links: href="api.html#echarts.init" -> href="../api-parts/api.echarts.md#init" - return resolved.replace( + const crossResolved = resolved.replace( /href="(option-gl|option|api|tutorial)\.html#([^"]+)"/g, (match, targetDoc, fragment) => { const keys = partKeysByDoc[targetDoc]; if (!keys) return match; - return resolveLink(fragment, keys, `../${targetDoc}-parts/${targetDoc}`, `../${targetDoc}`) || match; + return resolveLink(fragment, keys, `${crossDocPrefix}${targetDoc}-parts/${targetDoc}`, `${crossDocPrefix}${targetDoc}`); } ); + + // Image paths: src="documents/asset/..." -> relative path to public/{lang}/documents/asset/ + const imgPrefix = isRoot ? '../' : '../../'; + return crossResolved.replace( + /src="(documents\/asset\/[^"]*)"/g, + (_, src) => `src="${imgPrefix}${src}"` + ); } // --- Convert part JSON to Markdown --- @@ -250,14 +268,12 @@ function processPartsDir(partsDir, outDir, typeMap, partKeysByDoc, jsonFiles) { const dirName = path.basename(partsDir); const docName = dirName.replace(/-parts$/, ''); - // Create a link resolver that rewrites HTML hrefs before turndown - const linkResolver = (html) => tryResolveHtmlLinks(html, partKeysByDoc, docName); - return jsonFiles.map(filePath => { const baseName = path.basename(filePath, '.json'); const data = JSON.parse(fs.readFileSync(filePath, 'utf-8')); - const content = `# ${baseName}\n\n` + jsonToMd(data, typeMap, baseName, linkResolver); const isRoot = baseName === docName; + const linkResolver = (html) => tryResolveHtmlLinks(html, partKeysByDoc, docName, isRoot); + const content = `# ${baseName}\n\n` + jsonToMd(data, typeMap, baseName, linkResolver); const fileName = isRoot ? `${baseName}.md` : `${dirName}/${baseName}.md`; const fullPath = path.resolve(outDir, fileName); fse.ensureDirSync(path.dirname(fullPath)); From 8d32be06e993ee8c3781d399fb6a6254898cccc3 Mon Sep 17 00:00:00 2001 From: plainheart Date: Tue, 7 Apr 2026 09:17:51 +0800 Subject: [PATCH 5/5] fix globby compatibility in Windows & tweak zh section labels & update turndown dep --- build/build-llms.js | 14 +++++++++----- package-lock.json | 18 +++++++++++------- package.json | 2 +- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/build/build-llms.js b/build/build-llms.js index 871ee280a..71c37cfc9 100644 --- a/build/build-llms.js +++ b/build/build-llms.js @@ -27,7 +27,7 @@ const MAX_HEADING_DEPTH = 6; const SECTION_LABELS = { en: {'option-parts': 'Option', 'option-gl-parts': 'Option GL', 'api-parts': 'API', 'tutorial-parts': 'Tutorial'}, - zh: {'option-parts': '配置项 (Option)', 'option-gl-parts': 'Option GL', 'api-parts': 'API', 'tutorial-parts': '教程 (Tutorial)'} + zh: {'option-parts': '配置项 (Option)', 'option-gl-parts': 'GL配置项 (Option GL)', 'api-parts': 'API', 'tutorial-parts': '教程 (Tutorial)'} }; const LLMS_TXT_HEADER = [ @@ -66,7 +66,7 @@ function htmlToMd(html) { * e.g. { "option.title.show": {type: "boolean", default: "true"} } */ function buildTypeMap(schemaJsonPath, docName) { - if (!fs.existsSync(schemaJsonPath)) return {}; + if (docName === 'tutorial' || !fs.existsSync(schemaJsonPath)) return {}; const schema = JSON.parse(fs.readFileSync(schemaJsonPath, 'utf-8')); const typeMap = {}; traverse(schema, docName, (schemaPath, node) => { @@ -220,7 +220,7 @@ function jsonToMd(data, typeMap, baseName, linkResolver) { function collectPartJsonFiles(partsDirs) { const jsonFilesByDir = {}; for (const dir of partsDirs) { - jsonFilesByDir[dir] = globby.sync(path.join(dir, '*.json')) + jsonFilesByDir[dir] = globby.sync('*.json', { cwd: dir, absolute: true }) .filter(filePath => !path.basename(filePath).includes('-outline')); } return jsonFilesByDir; @@ -292,7 +292,7 @@ function generateDocsForLang(lang) { // Step 1: Build a type map from full schema JSONs (option.json, api.json, etc.) // by traversing the nested schema tree to collect type/default for each // property path (e.g. "option.title.show" -> {type: "boolean", default: "true"}). - const schemaFiles = globby.sync(path.join(docsDir, '*.json')); + const schemaFiles = globby.sync('*.json', { cwd: docsDir, absolute: true }); const typeMap = {}; for (const filePath of schemaFiles) { Object.assign(typeMap, buildTypeMap(filePath, path.basename(filePath, '.json'))); @@ -300,7 +300,11 @@ function generateDocsForLang(lang) { // Step 2: Collect part JSON files and file keys for all *-parts/ directories upfront, // so that cross-doc links can be resolved against actual files. - const partsDirs = globby.sync(path.join(docsDir, '*-parts'), {onlyDirectories: true}); + const partsDirs = globby.sync('*-parts', { + cwd: docsDir, + absolute: true, + onlyDirectories: true + }); const jsonFilesByDir = collectPartJsonFiles(partsDirs); const partKeysByDoc = buildPartKeysByDoc(partsDirs, jsonFilesByDir); diff --git a/package-lock.json b/package-lock.json index 754f99de7..2b6bd2722 100644 --- a/package-lock.json +++ b/package-lock.json @@ -42,7 +42,7 @@ "open": "^8.4.0", "sass.js": "^0.11.1", "sassjs-loader": "^2.0.0", - "turndown": "^7.2.2", + "turndown": "^7.2.4", "turndown-plugin-gfm": "^1.0.2", "vue-loader": "^15.9.2", "vue-template-compiler": "^2.6.11", @@ -5376,13 +5376,17 @@ "dev": true }, "node_modules/turndown": { - "version": "7.2.2", - "resolved": "https://registry.npmjs.org/turndown/-/turndown-7.2.2.tgz", - "integrity": "sha512-1F7db8BiExOKxjSMU2b7if62D/XOyQyZbPKq/nUwopfgnHlqXHqQ0lvfUTeUIr1lZJzOPFn43dODyMSIfvWRKQ==", + "version": "7.2.4", + "resolved": "https://registry.npmmirror.com/turndown/-/turndown-7.2.4.tgz", + "integrity": "sha512-I8yFsfRzmzK0WV1pNNOA4A7y4RDfFxPRxb3t+e3ui14qSGOxGtiSP6GjeX+Y6CHb7HYaFj7ECUD7VE5kQMZWGQ==", "dev": true, "license": "MIT", "dependencies": { "@mixmark-io/domino": "^2.2.0" + }, + "engines": { + "node": ">=18", + "npm": ">=9" } }, "node_modules/turndown-plugin-gfm": { @@ -9943,9 +9947,9 @@ "dev": true }, "turndown": { - "version": "7.2.2", - "resolved": "https://registry.npmjs.org/turndown/-/turndown-7.2.2.tgz", - "integrity": "sha512-1F7db8BiExOKxjSMU2b7if62D/XOyQyZbPKq/nUwopfgnHlqXHqQ0lvfUTeUIr1lZJzOPFn43dODyMSIfvWRKQ==", + "version": "7.2.4", + "resolved": "https://registry.npmmirror.com/turndown/-/turndown-7.2.4.tgz", + "integrity": "sha512-I8yFsfRzmzK0WV1pNNOA4A7y4RDfFxPRxb3t+e3ui14qSGOxGtiSP6GjeX+Y6CHb7HYaFj7ECUD7VE5kQMZWGQ==", "dev": true, "requires": { "@mixmark-io/domino": "^2.2.0" diff --git a/package.json b/package.json index af4325dc6..cf0ec2d0a 100644 --- a/package.json +++ b/package.json @@ -38,7 +38,7 @@ "open": "^8.4.0", "sass.js": "^0.11.1", "sassjs-loader": "^2.0.0", - "turndown": "^7.2.2", + "turndown": "^7.2.4", "turndown-plugin-gfm": "^1.0.2", "vue-loader": "^15.9.2", "vue-template-compiler": "^2.6.11",