From 9326690392735822679cb6c91b7d84b4af21cb4e Mon Sep 17 00:00:00 2001 From: tophf Date: Sun, 10 Jan 2021 19:08:04 +0300 Subject: [PATCH 1/3] normalize line endings when testing equality --- test/javascript-mixed/javascript-mixed.test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/javascript-mixed/javascript-mixed.test.js b/test/javascript-mixed/javascript-mixed.test.js index 5d1fea6..6ff6b6f 100644 --- a/test/javascript-mixed/javascript-mixed.test.js +++ b/test/javascript-mixed/javascript-mixed.test.js @@ -24,7 +24,7 @@ test('codemirror javascript-mixed regression', (t) => { } else { // write the output to a file for ease of manual diffs when regression happens fs.writeFileSync(path.join(__dirname, 'code-tokens.out'), tokenRes, 'utf8'); - t.equal(tokenRes, expected); + t.equal(tokenRes.replace(/\r\n/g, '\n'), expected.replace(/\r\n/g, '\n')); t.end(); } }); From a891af31ec63a997b75678c040bd5896812af8f5 Mon Sep 17 00:00:00 2001 From: tophf Date: Sun, 10 Jan 2021 19:25:11 +0300 Subject: [PATCH 2/3] reduce verbosity of rule definitions --- .eslintrc.js | 2 +- mode/javascript-mixed/javascript-mixed.js | 386 +++++++++++----------- 2 files changed, 195 insertions(+), 193 deletions(-) diff --git a/.eslintrc.js b/.eslintrc.js index 69fa892..25c273b 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -4,7 +4,7 @@ module.exports = { "eslint:recommended" ], parserOptions: { - "ecmaVersion": 2018, + "ecmaVersion": 2020, ecmaFeatures: { legacyDecorators: true, }, diff --git a/mode/javascript-mixed/javascript-mixed.js b/mode/javascript-mixed/javascript-mixed.js index b8ba5a9..cf99f98 100644 --- a/mode/javascript-mixed/javascript-mixed.js +++ b/mode/javascript-mixed/javascript-mixed.js @@ -209,10 +209,10 @@ // Helpers to token string template in local mode - function prepReparseStringTemplateInLocalMode(modeToUse, stream, state, - hasBeginBacktick = true) { - dbg(`Entering local ${modeToUse.name} mode...`); - if (hasBeginBacktick) { + /** @this {Rule} */ + function prepReparseStringTemplateInLocalMode({stream, state}) { + dbg(`Entering local ${this.mode.name} mode...`); + if (this.hasBeginBacktick !== false) { // spit out beginning backtick as a token, and leave the rest of the text for local mode parsing stream.backUp(tokenLength(stream) - 1); } else { @@ -224,23 +224,25 @@ forceJsModeToQuasi(stream, state.jsState); // switch to local mode for subsequent text - state.localMode = modeToUse; + state.localMode = this.mode; state.localState = CodeMirror.startState(state.localMode); state.inJsExprInStringTemplate = false; state.jsExprDepthInStringTemplate = 0; } - function isEndBacktick(stream, state) { + /** @this {Rule} */ + function isEndBacktick({stream, state}) { // check it hits ending backtick for string template, // ignoring the backticks that appear inside a JS expression. return !state.inJsExprInStringTemplate && stream.peek() === '`' && tokenLastChar(stream) !== '\\'; // ensure it is not an escaped backtick (doesn't count) } - function exitLocalModeWithEndBacktick(stream, state) { + /** @this {Rule} */ + function exitLocalModeWithEndBacktick(ctx) { dbg('Exiting local html/css mode...'); // parse the ending JS string template backtick in js mode - return jsMode.token(stream, state.jsState); + ctx.style = jsMode.token(ctx.stream, ctx.state.jsState); } // Local mode-specific helpers to handle js expression in string template @@ -386,12 +388,16 @@ stream.backUp(tokenLength(stream) - backtickPos); } - function tokenInLocalModeStringTemplate(stream, state) { + /** @this {Rule} */ + function tokenInLocalModeStringTemplate(ctx) { + const {stream, state} = ctx; if (state.inJsExprInStringTemplate) { - return tokenJsExpressionInStringTemplate(stream, state); + ctx.style = tokenJsExpressionInStringTemplate(stream, state); + return; } if (state.tokenizePostJsExpr) { - return state.tokenizePostJsExpr(stream, state); + ctx.style = state.tokenizePostJsExpr(stream, state); + return; } // else normal local mode tokenization const style = state.localMode.token(stream, state.localState); @@ -399,7 +405,8 @@ excludeEndBacktickFromToken(stream, style); const jsExprStart = state.localMode.indexOfJsExprStart(stream, state); if (jsExprStart < 0) { - return style; + ctx.style = style; + return; } // case there is an js expression state.localMode.ensureProperLocalModeStatePostJsExpr(stream, state, style); @@ -413,43 +420,47 @@ // would recognize it as an js expression and tokenize as such. // Note: cannot increment state.jsExprDepthInStringTemplate yet, // as the ${ to be handled by js tokenizer the next time - return style; + ctx.style = style; } // Helpers to token plain string (single/double-quoted) in local mode - function prepReparsePlainStringInLocalMode(modeToUse, stream, state) { - dbg(`Entering local ${modeToUse.name} mode... (plain string)`); + /** @this {Rule} */ + function prepReparsePlainStringInLocalMode({stream, state}) { + dbg(`Entering local ${this.mode.name} mode... (plain string)`); // dbg(` ${stream.start}-${stream.pos}:\t${stream.current()}`); const oldPos = stream.pos; // spit out beginning beginning quote as a token, and leave the rest of the text for local mode parsing stream.backUp(tokenLength(stream) - 1); // switch to local mode for subsequent text - state.localMode = modeToUse; + state.localMode = this.mode; state.localState = CodeMirror.startState(state.localMode); // use end quote position to detect the end of the local html mode state.localState.localHtmlPlainStringEndPos = oldPos; } - function exitLocalModeWithEndQuote(stream) { + /** @this {Rule} */ + function exitLocalModeWithEndQuote(ctx) { dbg('Exiting local html/css mode... (plain string)'); // parse the ending JS string quote, // cannot use the jsMode to parse, as it will be treated as the beginning of a string. // so we simulate it here. - stream.next(); // should be single or double quote; - return 'string'; // the expected style + ctx.stream.next(); // should be single or double quote; + ctx.style = 'string'; // the expected style } - function tokenInLocalModePlainString(stream, state) { + /** @this {Rule} */ + function tokenInLocalModePlainString(ctx) { + const {stream, state} = ctx; const style = state.localMode.token(stream, state.localState); if (stream.pos >= state.localState.localHtmlPlainStringEndPos) { // backUp text beyond the string, plus one to exclude end quote stream.backUp(stream.pos - state.localState.localHtmlPlainStringEndPos + 1); } dbg(' local mode token (plain string) - ', stream.current(), `[${style}]`); - return style; + ctx.style = style; } /* eslint max-classes-per-file: ["error", 2] */ @@ -509,126 +520,130 @@ } } + /** @typedef {function(RunContext):(?boolean)} RuleLambda */ + /** @typedef {Rule | RuleLambda | [type|match,?text|RegExp,?opts]} RuleOpts */ - class Rule { - constructor(props) { - this.curContext = props.curContext; - // lambda for match condition - this.match = props.match; - this.nextContext = props.nextContext; - // optional, lambda for additional logic to run if matched - this.caseMatched = props.caseMatched; - // optional, lambda for additional logic to run if not matched - this.caseNotMatched = props.caseNotMatched; - } + /** + * @typedef Rule + * @property {boolean} id - current context + * @property {?string} next - next context or null + * @property {Object} [mode] - CodeMirror mode + * @property {boolean} [hasBeginBacktick=true] + * @property {RuleLambda} match - matching function + * @property {RuleLambda} [onMatch] - runs if matched + * @property {RuleLambda} [onMiss] - runs if not matched + */ - run(ctx) { - const state = ctx.state; - if (this.match(ctx)) { - state.maybeLocalContext = this.nextContext; - if (state.maybeLocalContext == null) { - // local mode done, reset - state.localMode = null; - state.localState = null; + /** + * @param {string} prefix + * @param {RuleOpts[]} ruleOptsSequence + * @return {Rule[]} + */ + function makeRules(prefix, ...ruleOptsSequence) { + const res = []; + ruleOptsSequence.forEach((rules, seqIndex) => { + const seqPrefix = prefix + '-' + (seqIndex + 1); + rules.forEach((rule, i) => { + if (typeof rule === 'function') { + rule = {match: rule}; + } else if (Array.isArray(rule)) { + let fn; + let [matcher, text, opts] = rule; + if (typeof matcher === 'function') { + fn = matcher; + opts = text; + } else if (text instanceof RegExp) { + fn = ctx => ctx.type === matcher && text.test(ctx.text); + } else if (typeof text === 'string') { + fn = ctx => ctx.type === matcher && text === ctx.text; + } else { + fn = ctx => ctx.type === matcher; + opts = text; + } + rule = opts || {}; + rule.match = fn; } - if (this.caseMatched) { this.caseMatched(ctx); } - return true; - } // case rule transition criteria not matched - if (this.caseNotMatched) { - this.caseNotMatched(ctx); - } else { // default not matched logic: reset local mode matching - state.maybeLocalContext = null; + if (rule.id === undefined) rule.id = i ? seqPrefix + i : ''; + if (rule.next === undefined) rule.next = seqPrefix + (i + 1); + res.push(rule); + }); + }); + return res; + } + + /** + * @this {RunContext} ctx + * @param {Rule} rule + * @return {?boolean} true if matched + */ + function runRule(rule) { + const {state} = this; + if (rule.match(this)) { + state.maybeLocalContext = rule.next; + if (state.maybeLocalContext == null) { + // local mode done, reset + state.localMode = null; + state.localState = null; } - return false; + rule.onMatch?.(this); + return true; + } // case rule transition criteria not matched + if (rule.onMiss) { + rule.onMiss(this); + } else { // default not matched logic: reset local mode matching + state.maybeLocalContext = null; } } function matchRule(ruleMap, stream, state, jsTokenStyle) { const ctx = RunContext.get(stream, state, jsTokenStyle); const rules = ruleMap[state.maybeLocalContext || '']; - for (const r of rules) { - // dbg(' rule:', r.curContext, r.match.toString()); - const matched = r.run(ctx); - // dbg(' => rule output tokenStyle', ctx.style); - if (matched) { - break; - } - } + rules.some(runRule, ctx); return ctx.style; } // define the transition rules to enter local CSS mode; - const cssRules = [ - // for pattern GM_addStyle(`css-string`); - new Rule({ - curContext: '', - match: ctx => ctx.type === 'variable' && ctx.text === 'GM_addStyle', - nextContext: 'css-1', - }), - new Rule({ - curContext: 'css-1', - match: ctx => ctx.type === '(' && ctx.text === '(', - nextContext: 'css-2', - }), - new Rule({ - curContext: 'css-2', - match: ctx => ctx.type === 'quasi', // if it's a string template - nextContext: 'css-in', - caseMatched: ctx => prepReparseStringTemplateInLocalMode(cssMode, ctx.stream, ctx.state), - }), - new Rule({ - curContext: 'css-in', - match: ctx => isEndBacktick(ctx.stream, ctx.state), - nextContext: null, // then exit local css mode - caseMatched: ctx => { ctx.style = exitLocalModeWithEndBacktick(ctx.stream, ctx.state); }, - caseNotMatched: ctx => { // else stay in local mode - ctx.style = tokenInLocalModeStringTemplate(ctx.stream, ctx.state); - }, - }), - - // for pattern GM.addStyle(`css-string`); - // (i.e., Greasemonkey v4 style for GM_addStyle) - new Rule({ - curContext: '', - match: ctx => ctx.type === 'variable' && ctx.text === 'GM', - nextContext: 'css-31', - }), - new Rule({ - curContext: 'css-31', - match: ctx => ctx.type === '.' && ctx.text === '.', - nextContext: 'css-32', - }), - new Rule({ - curContext: 'css-32', - match: ctx => ctx.type === 'variable' && ctx.text === 'addStyle', - nextContext: 'css-33', - }), - new Rule({ - curContext: 'css-33', - match: ctx => ctx.type === '(' && ctx.text === '(', - nextContext: 'css-34', - }), - new Rule({ - curContext: 'css-34', - match: ctx => ctx.type === 'quasi', // if it's a string template - nextContext: 'css-in', - caseMatched: ctx => prepReparseStringTemplateInLocalMode(cssMode, ctx.stream, ctx.state), - }), - - // for pattern var someCSS = /* css */ `css-string` - new Rule({ - curContext: '', - match: ctx => ctx.jsTokenStyle === 'comment' && /^\/\*\s*css\s*\*\/$/i.test(ctx.text), - nextContext: 'css-21', - }), - new Rule({ - curContext: 'css-21', - match: ctx => ctx.type === 'quasi', - nextContext: 'css-in', - caseMatched: ctx => prepReparseStringTemplateInLocalMode(cssMode, ctx.stream, ctx.state), - }), - ]; - + const cssRules = makeRules('css', + // GM_addStyle(`css-string`); + [ + ['variable', 'GM_addStyle'], + ['(', '('], + ['quasi', { // if it's a string template + next: 'css-in', + mode: cssMode, + onMatch: prepReparseStringTemplateInLocalMode, + }], + [isEndBacktick, { + id: 'css-in', + next: null, // then exit local css mode + onMatch: exitLocalModeWithEndBacktick, + onMiss: tokenInLocalModeStringTemplate, // else stay in local mode + }], + ], + // GM.addStyle(`css-string`); + [ + ['variable', 'GM'], + ['.', '.'], + ['variable', 'addStyle'], + ['(', '('], + ['quasi', { // if it's a string template + next: 'css-in', + mode: cssMode, + onMatch: prepReparseStringTemplateInLocalMode, + }], + ], + // var someCSS = /* css */ `css-string` + // var someCSS = /* lang=css */ `css-string` + // var someCSS = /* language=css */ `css-string` + [ + ctx => ctx.jsTokenStyle === 'comment' && + /^\/\*\s*(lang(uage)?\s*=\s*)?css\s*\*\/$/i.test(ctx.text), + ['quasi', { + next: 'css-in', + mode: cssMode, + onMatch: prepReparseStringTemplateInLocalMode, + }], + ]); const [RE_HTML_BASE, RE_HTML_PLAIN_STRING, RE_HTML_STRING_TEMPLATE] = (() => { const reHtmlBaseStr = /\s*<\/?[a-zA-Z0-9]+(\s|\/?>)/.source; @@ -640,80 +655,67 @@ })(); // define the transition rules to enter local html mode; - const htmlRules = [ + const htmlRules = makeRules('html', // inside a html string template - new Rule({ - curContext: 'html-in', - match: ctx => isEndBacktick(ctx.stream, ctx.state), - nextContext: null, // then exit local html mode - caseMatched: ctx => { ctx.style = exitLocalModeWithEndBacktick(ctx.stream, ctx.state); }, - caseNotMatched: ctx => { // else stay in local mode - ctx.style = tokenInLocalModeStringTemplate(ctx.stream, ctx.state); - }, - }), - - // for pattern var someHTML = /* html */ `html-string` - new Rule({ - curContext: '', - match: ctx => ctx.jsTokenStyle === 'comment' && /^\/\*\s*html\s*\*\/$/i.test(ctx.text), - nextContext: 'html-21', - }), - new Rule({ - curContext: 'html-21', - match: ctx => ctx.type === 'quasi', - nextContext: 'html-in', - caseMatched: ctx => prepReparseStringTemplateInLocalMode(htmlmixedMode, - ctx.stream, ctx.state), - }), - + [ + [isEndBacktick, { + id: 'html-in', + next: null, // then exit local html mode + onMatch: exitLocalModeWithEndBacktick, + onMiss: tokenInLocalModeStringTemplate, // else stay in local mode + }], + ], + // var someHTML = /* html */ `html-string` + // var someHTML = /* lang=html */ `html-string` + // var someHTML = /* language=html */ `html-string` + [ + ctx => ctx.jsTokenStyle === 'comment' && + /^\/\*\s*(lang(uage)?\s*=\s*)?html\s*\*\/$/i.test(ctx.text), + ['quasi', { + next: 'html-in', + mode: htmlmixedMode, + onMatch: prepReparseStringTemplateInLocalMode, + }], + ], // for plain string (single or double quoted) that looks like html // e.g., '
hello', "
", '
', etc. - new Rule({ - curContext: '', - match: ctx => ctx.type === 'string' && RE_HTML_PLAIN_STRING.test(ctx.text), - nextContext: 'html-str-in', - caseMatched: ctx => prepReparsePlainStringInLocalMode(htmlNoMatchClosingMode, - ctx.stream, ctx.state), - }), - new Rule({ - curContext: 'html-str-in', - match: ctx => ctx.stream.start >= ctx.state.localState.localHtmlPlainStringEndPos - 1, // match the expected ending quote by position - nextContext: null, // then exit local html mode - caseMatched: ctx => { ctx.style = exitLocalModeWithEndQuote(ctx.stream, ctx.state); }, - caseNotMatched: ctx => { ctx.style = tokenInLocalModePlainString(ctx.stream, ctx.state); }, // else stay local mode - }), - + [ + ['string', RE_HTML_PLAIN_STRING, { + mode: htmlNoMatchClosingMode, + onMatch: prepReparsePlainStringInLocalMode, + }], + // match the expected ending quote by position + [ctx => ctx.stream.start >= ctx.state.localState.localHtmlPlainStringEndPos - 1, { + next: null, // then exit local html mode + onMatch: exitLocalModeWithEndQuote, + onMiss: tokenInLocalModePlainString, // else stay local mode + }], + ], // for HTML string template (without inline comment as a hint) - new Rule({ - curContext: '', - match: ctx => ctx.type === 'quasi' && RE_HTML_STRING_TEMPLATE.test(ctx.text), - nextContext: 'html-in', - caseMatched: ctx => prepReparseStringTemplateInLocalMode(htmlmixedMode, - ctx.stream, ctx.state), - }), - + [ + ['quasi', RE_HTML_STRING_TEMPLATE, { + next: 'html-in', + mode: htmlmixedMode, + onMatch: prepReparseStringTemplateInLocalMode, + }], + ], // for HTML string template (where first line is blank, html started in second line) - new Rule({ - curContext: '', - match: ctx => ctx.type === 'quasi' && /^[`](\\)?\s*$/.test(ctx.text), // first line is blank - nextContext: 'html-51', - }), - new Rule({ - curContext: 'html-51', - match: ctx => ctx.type === 'quasi' && RE_HTML_BASE.test(ctx.text), // second line starts with a tag - nextContext: 'html-in', - caseMatched: ctx => prepReparseStringTemplateInLocalMode(htmlmixedMode, - ctx.stream, ctx.state, false), - }), - - ]; - - // a map of all rules, keyed by curContext for quick look up during matching + [ + ['quasi', /^[`](\\)?\s*$/], // first line is blank + ['quasi', RE_HTML_BASE, { // second line starts with a tag + next: 'html-in', + mode: htmlmixedMode, + hasBeginBacktick: false, + onMatch: prepReparseStringTemplateInLocalMode, + }], + ]); + + // a map of all rules, keyed by id for quick look up during matching const allRuleMap = (() => { const res = {}; for (const rules of [htmlRules, cssRules]) { for (const rule of rules) { - const key = rule.curContext; + const key = rule.id; res[key] = res[key] || []; res[key].push(rule); } From 6797d5a703c86672f622c4e7d34e4483776069b2 Mon Sep 17 00:00:00 2001 From: tophf Date: Sat, 21 Aug 2021 17:17:44 +0300 Subject: [PATCH 3/3] use Rule objects + multi-type `match` + tiny speedup --- DEVELOPERS.md | 2 +- mode/javascript-mixed/javascript-mixed.js | 239 ++++++++++-------- .../{code-source.txt => code-source.js} | 40 +-- test/javascript-mixed/code-tokens.out | 102 +++++--- test/javascript-mixed/code-tokens.txt | 102 +++++--- .../javascript-mixed/javascript-mixed.test.js | 4 +- test/javascript-mixed/runmode.html | 2 +- 7 files changed, 283 insertions(+), 208 deletions(-) rename test/javascript-mixed/{code-source.txt => code-source.js} (93%) diff --git a/DEVELOPERS.md b/DEVELOPERS.md index 5386be3..2f5d81c 100644 --- a/DEVELOPERS.md +++ b/DEVELOPERS.md @@ -5,7 +5,7 @@ Testing: - let you highlight the specified sources in standalone - useful to check the details of the token generated - it is used to construct the regression test input / output. -- By default, it reads the test case file, [`test/javascript-mixed/code-source.txt`](test/javascript-mixed/code-source.txt), and highlights it. +- By default, it reads the test case file, [`test/javascript-mixed/code-source.js`](test/javascript-mixed/code-source.js), and highlights it. - To use it, run `bin/start-runmode.sh` in project root. Open the specified URL in a browser. - Open the helper as file (i.e., with `file://` protocol ) will not work: It uses `fetch` to get the sample input sources, which requires http protocol. diff --git a/mode/javascript-mixed/javascript-mixed.js b/mode/javascript-mixed/javascript-mixed.js index cf99f98..abf45ae 100644 --- a/mode/javascript-mixed/javascript-mixed.js +++ b/mode/javascript-mixed/javascript-mixed.js @@ -520,130 +520,142 @@ } } + // a map of all rules, keyed by id/type for quick lookup during matching + const rulesById = {}; + const rulesByType = {}; + const rulesByLangCmt = []; + /** @typedef {function(RunContext):(?boolean)} RuleLambda */ - /** @typedef {Rule | RuleLambda | [type|match,?text|RegExp,?opts]} RuleOpts */ /** * @typedef Rule * @property {boolean} id - current context * @property {?string} next - next context or null + * @property {?string} type - token type + * @property {?string} style - token style, only 'comment' is handled for language hints * @property {Object} [mode] - CodeMirror mode * @property {boolean} [hasBeginBacktick=true] - * @property {RuleLambda} match - matching function + * @property {RuleLambda|string|RegExp} match - matching function/text/regexp, + * in case of text/regexp the function is auto-created by makeRules() * @property {RuleLambda} [onMatch] - runs if matched * @property {RuleLambda} [onMiss] - runs if not matched */ /** - * @param {string} prefix - * @param {RuleOpts[]} ruleOptsSequence + * @param {Object} prefixedRuleGroups * @return {Rule[]} */ - function makeRules(prefix, ...ruleOptsSequence) { - const res = []; - ruleOptsSequence.forEach((rules, seqIndex) => { - const seqPrefix = prefix + '-' + (seqIndex + 1); + function makeRules(prefixedRuleGroups) { + Object.entries(prefixedRuleGroups).forEach(([seqPrefix, rules]) => { rules.forEach((rule, i) => { - if (typeof rule === 'function') { - rule = {match: rule}; - } else if (Array.isArray(rule)) { - let fn; - let [matcher, text, opts] = rule; - if (typeof matcher === 'function') { - fn = matcher; - opts = text; - } else if (text instanceof RegExp) { - fn = ctx => ctx.type === matcher && text.test(ctx.text); - } else if (typeof text === 'string') { - fn = ctx => ctx.type === matcher && text === ctx.text; + const {match, type, style} = rule; + if (typeof match !== 'function') { + if (typeof match === 'string') { + rule.match = ctx => ctx.type === type && ctx.text === match; + } else if (match instanceof RegExp) { + rule.match = type + ? ctx => ctx.type === type && match.test(ctx.text) + : ctx => match.test(ctx.text); } else { - fn = ctx => ctx.type === matcher; - opts = text; + rule.match = ctx => ctx.type === type; } - rule = opts || {}; - rule.match = fn; } - if (rule.id === undefined) rule.id = i ? seqPrefix + i : ''; - if (rule.next === undefined) rule.next = seqPrefix + (i + 1); - res.push(rule); + if (rule.id === undefined) { + rule.id = i ? `${seqPrefix}-${i}` : ''; + } + if (rule.next === undefined) { + rule.next = `${seqPrefix}-${i + 1}`; + } + if (rule.id) { + rulesById[rule.id] = [rule]; + } + if (style === 'comment') { + rulesByLangCmt.push(rule); + } + (rulesByType[type || ''] || (rulesByType[type || ''] = [])).push(rule); }); }); - return res; } - /** - * @this {RunContext} ctx - * @param {Rule} rule - * @return {?boolean} true if matched - */ - function runRule(rule) { - const {state} = this; - if (rule.match(this)) { - state.maybeLocalContext = rule.next; - if (state.maybeLocalContext == null) { - // local mode done, reset - state.localMode = null; - state.localState = null; + function matchRule(stream, state, jsTokenStyle) { + const ctx = RunContext.get(stream, state, jsTokenStyle); + const id = state.maybeLocalContext || ''; + const rules = id ? rulesById[id] + : jsTokenStyle === 'comment' && ctx.text[1] === '*' ? rulesByLangCmt + : rulesByType[ctx.type]; + if (rules) { + for (const rule of rules) { + if (rule.id === id) { + if (rule.match(ctx)) { + state.maybeLocalContext = rule.next; + if (rule.next == null) { + // local mode done, reset + state.localMode = null; + state.localState = null; + } + rule.onMatch?.(ctx); + break; + } // case rule transition criteria not matched + if (rule.onMiss) { + rule.onMiss(ctx); + } else { // default not matched logic: reset local mode matching + state.maybeLocalContext = null; + } + } } - rule.onMatch?.(this); - return true; - } // case rule transition criteria not matched - if (rule.onMiss) { - rule.onMiss(this); - } else { // default not matched logic: reset local mode matching - state.maybeLocalContext = null; } - } - - function matchRule(ruleMap, stream, state, jsTokenStyle) { - const ctx = RunContext.get(stream, state, jsTokenStyle); - const rules = ruleMap[state.maybeLocalContext || '']; - rules.some(runRule, ctx); return ctx.style; } // define the transition rules to enter local CSS mode; - const cssRules = makeRules('css', + makeRules({ // GM_addStyle(`css-string`); - [ - ['variable', 'GM_addStyle'], - ['(', '('], - ['quasi', { // if it's a string template + css1: [ + { match: 'GM_addStyle', type: 'variable' }, + { match: '(', type: '(' }, + { + type: 'quasi', // if it's a string template next: 'css-in', mode: cssMode, onMatch: prepReparseStringTemplateInLocalMode, - }], - [isEndBacktick, { + }, + { + match: isEndBacktick, id: 'css-in', next: null, // then exit local css mode onMatch: exitLocalModeWithEndBacktick, onMiss: tokenInLocalModeStringTemplate, // else stay in local mode - }], + }, ], // GM.addStyle(`css-string`); - [ - ['variable', 'GM'], - ['.', '.'], - ['variable', 'addStyle'], - ['(', '('], - ['quasi', { // if it's a string template + css2: [ + { match: 'GM', type: 'variable' }, + { match: '.', type: '.' }, + { match: 'addStyle', type: 'variable' }, + { match: '(', type: '(' }, + { + type: 'quasi', // if it's a string template next: 'css-in', mode: cssMode, onMatch: prepReparseStringTemplateInLocalMode, - }], + }, ], // var someCSS = /* css */ `css-string` // var someCSS = /* lang=css */ `css-string` // var someCSS = /* language=css */ `css-string` - [ - ctx => ctx.jsTokenStyle === 'comment' && - /^\/\*\s*(lang(uage)?\s*=\s*)?css\s*\*\/$/i.test(ctx.text), - ['quasi', { + css3: [ + { + style: 'comment', + match: /^\/\*\s*(lang(uage)?\s*=\s*)?css\s*\*\/$/i + }, + { + type: 'quasi', next: 'css-in', mode: cssMode, onMatch: prepReparseStringTemplateInLocalMode, - }], - ]); + }, + ], + }); const [RE_HTML_BASE, RE_HTML_PLAIN_STRING, RE_HTML_STRING_TEMPLATE] = (() => { const reHtmlBaseStr = /\s*<\/?[a-zA-Z0-9]+(\s|\/?>)/.source; @@ -655,74 +667,77 @@ })(); // define the transition rules to enter local html mode; - const htmlRules = makeRules('html', + makeRules({ // inside a html string template - [ - [isEndBacktick, { + html1: [ + { + match: isEndBacktick, id: 'html-in', next: null, // then exit local html mode onMatch: exitLocalModeWithEndBacktick, onMiss: tokenInLocalModeStringTemplate, // else stay in local mode - }], + }, ], // var someHTML = /* html */ `html-string` // var someHTML = /* lang=html */ `html-string` // var someHTML = /* language=html */ `html-string` - [ - ctx => ctx.jsTokenStyle === 'comment' && - /^\/\*\s*(lang(uage)?\s*=\s*)?html\s*\*\/$/i.test(ctx.text), - ['quasi', { + html2: [ + { + style: 'comment', + match: /^\/\*\s*(lang(uage)?\s*=\s*)?html\s*\*\/$/i, + }, + { + type: 'quasi', next: 'html-in', mode: htmlmixedMode, onMatch: prepReparseStringTemplateInLocalMode, - }], + }, ], // for plain string (single or double quoted) that looks like html // e.g., '
hello', "
", '
', etc. - [ - ['string', RE_HTML_PLAIN_STRING, { + html3: [ + { + match: RE_HTML_PLAIN_STRING, + type: 'string', mode: htmlNoMatchClosingMode, onMatch: prepReparsePlainStringInLocalMode, - }], + }, // match the expected ending quote by position - [ctx => ctx.stream.start >= ctx.state.localState.localHtmlPlainStringEndPos - 1, { + { + match: ctx => ctx.stream.start >= ctx.state.localState.localHtmlPlainStringEndPos - 1, next: null, // then exit local html mode onMatch: exitLocalModeWithEndQuote, onMiss: tokenInLocalModePlainString, // else stay local mode - }], + }, ], // for HTML string template (without inline comment as a hint) - [ - ['quasi', RE_HTML_STRING_TEMPLATE, { + html4: [ + { + match: RE_HTML_STRING_TEMPLATE, + type: 'quasi', next: 'html-in', mode: htmlmixedMode, onMatch: prepReparseStringTemplateInLocalMode, - }], + }, ], // for HTML string template (where first line is blank, html started in second line) - [ - ['quasi', /^[`](\\)?\s*$/], // first line is blank - ['quasi', RE_HTML_BASE, { // second line starts with a tag + html5: [ + // first line is blank + { + match: /^[`](\\)?\s*$/, + type: 'quasi', + }, + // second line starts with a tag + { + match: RE_HTML_BASE, + type: 'quasi', next: 'html-in', mode: htmlmixedMode, hasBeginBacktick: false, onMatch: prepReparseStringTemplateInLocalMode, - }], - ]); - - // a map of all rules, keyed by id for quick look up during matching - const allRuleMap = (() => { - const res = {}; - for (const rules of [htmlRules, cssRules]) { - for (const rule of rules) { - const key = rule.id; - res[key] = res[key] || []; - res[key].push(rule); - } - } - return res; - })(); - + }, + ], + }); function jsToken(stream, state) { // dbg('jsToken -', `${stream.pos}: ${stream.string.substring(stream.pos).substring(0, 8)}`, state.lastType); @@ -755,7 +770,7 @@ } // match to see if it needs to switch to local html mode, return local mode style if applicable - const maybeLocalStyle = matchRule(allRuleMap, stream, state, tokenStyle); + const maybeLocalStyle = matchRule(stream, state, tokenStyle); if (maybeLocalStyle !== STYLE_PASS) { tokenStyle = maybeLocalStyle; diff --git a/test/javascript-mixed/code-source.txt b/test/javascript-mixed/code-source.js similarity index 93% rename from test/javascript-mixed/code-source.txt rename to test/javascript-mixed/code-source.js index de50ae7..2236382 100644 --- a/test/javascript-mixed/code-source.txt +++ b/test/javascript-mixed/code-source.js @@ -1,6 +1,7 @@ // ==UserScript== // @name Test HTML/CSS highlights // ==/UserScript== +/* eslint-disable no-undef, no-unused-vars */ document.body.insertAdjacentHTML('beforeend', `

hello world

@@ -29,16 +30,19 @@ someElt.innerHTML = ` `; // highlight arbitrary string template with a inline comment as a hint -const someHTML = /* html */ `some text +let someHTML = /* html */ `some text

`; +const someHTMLwithLangHint = /*lang = html */ `some text

`; +const someHTMLwithLanguageHint = /*language=html*/`some text

`; + let someCSS = /* CSS */ `.a:visited { color: purple; }`; // the inline comment hint also works for tagged string template -let someCSS = String.raw /*css*/ `\ +someCSS = String.raw /*css*/ `\ #\some-id { color: purple; }`; @@ -51,14 +55,14 @@ const otherHTML = `
// corner case: that text after the last html tag / css rule. // ensure they won't break subsequent highlighting someHTML=`

text

\n`; -foo = 1; +let foo = 1; someHTML=`

text

\n`; // some comments after the template foo = 1; someHTML=`

text

more text post last tag`; foo = 1; GM_addStyle(`.foo { border: 1px; -} acme`; +} acme`); bar = 1; // corner case: there are backticks within HTML string templates @@ -115,15 +119,15 @@ someUi = `
let someUi = `
Hello World -
`); +
`; // works too for backtick in the expression not at the beginning of a token/line. -let someUi = `
Hello World -
`); +`; // ensure simple cases work too -let someUi = `
Hello
`; +someUi = `
Hello
`; someUi = `
Hello
`; someUi = `
${someTextFunc()}
`; @@ -154,32 +158,32 @@ someUi = `Hello World -
`); +
`; // nested case 2 -let someUi = `
Hello World -
`); +
`; // leading blank spaces before inner ending backtick -let someUi = `
Hello World -
`); +
`; // } inside the expression is treated properly too -let someUi = `
other text `)}">Hello World -
`); +
`; someUi = `
other text `)}">Hello World -
`); +
`; // corner cases involving escaping // - not a legit js expression, as the $ is escaped @@ -195,7 +199,7 @@ someUi = String.raw`

prefix\${expr1()}suffix2


`; // - a legit js expression, the $ looks escaped, but it is not: the backslash itself is escaped someUi = String.raw`

prefix\\${expr1()}suffix


`; -const foo = 1; +foo = 1; // works for CSS too diff --git a/test/javascript-mixed/code-tokens.out b/test/javascript-mixed/code-tokens.out index f3d95e0..39d3c4e 100644 --- a/test/javascript-mixed/code-tokens.out +++ b/test/javascript-mixed/code-tokens.out @@ -4,6 +4,8 @@ comment | // @name Test HTML/CSS highlights undefined | comment | // ==/UserScript== undefined | +comment | /* eslint-disable no-undef, no-unused-vars */ +undefined | undefined | variable | document undefined | . @@ -222,7 +224,7 @@ undefined | undefined | comment | // highlight arbitrary string template with a inline comment as a hint undefined | -keyword | const +keyword | let null | def | someHTML null | @@ -249,6 +251,52 @@ string-2 | ` undefined | ; undefined | undefined | +keyword | const +null | +def | someHTMLwithLangHint +null | +operator | = +null | +comment | /*lang = html */ +null | +string-2 | ` +null | some text +tag bracket | < +tag | br +tag bracket | > +tag bracket | < +tag | hr +null | +attribute | class +null | = +string | "acme" +tag bracket | > +string-2 | ` +undefined | ; +undefined | +keyword | const +null | +def | someHTMLwithLanguageHint +null | +operator | = +null | +comment | /*language=html*/ +string-2 | ` +null | some text +tag bracket | < +tag | br +tag bracket | > +tag bracket | < +tag | hr +null | +attribute | class +null | = +string | "acme" +tag bracket | > +string-2 | ` +undefined | ; +undefined | +undefined | keyword | let null | def | someCSS @@ -278,9 +326,7 @@ undefined | undefined | comment | // the inline comment hint also works for tagged string template undefined | -keyword | let -null | -def | someCSS +variable | someCSS null | operator | = null | @@ -354,7 +400,9 @@ null | \n string-2 | ` undefined | ; undefined | -variable | foo +keyword | let +null | +def | foo null | operator | = null | @@ -423,6 +471,7 @@ null | } null | tag | acme string-2 | ` +undefined | ) undefined | ; undefined | variable | bar @@ -489,7 +538,7 @@ undefined | undefined | keyword | var null | -variable | uiHtml +def | uiHtml null | operator | = null | @@ -610,7 +659,7 @@ undefined | undefined | keyword | var null | -variable | dbgMsg +def | dbgMsg null | operator | = null | @@ -626,7 +675,7 @@ comment | // undefined | variable | someElt undefined | . -variable | innerHTML +property | innerHTML null | operator | = null | @@ -676,7 +725,7 @@ undefined | undefined | variable | console undefined | . -variable | debug +property | debug undefined | ( string | ' tag bracket | < @@ -893,15 +942,12 @@ tag bracket | string-2 | ` -undefined | ) undefined | ; undefined | undefined | comment | // works too for backtick in the expression not at the beginning of a token/line. undefined | -keyword | let -null | -def | someUi +variable | someUi null | operator | = null | @@ -929,15 +975,12 @@ tag bracket | string-2 | ` -undefined | ) undefined | ; undefined | undefined | comment | // ensure simple cases work too undefined | -keyword | let -null | -def | someUi +variable | someUi null | operator | = null | @@ -1246,9 +1289,7 @@ undefined | undefined | comment | // ensure nested case works too undefined | -keyword | let -null | -def | someUi +variable | someUi null | operator | = null | @@ -1289,15 +1330,12 @@ tag bracket | string-2 | ` -undefined | ) undefined | ; undefined | undefined | comment | // nested case 2 undefined | -keyword | let -null | -def | someUi +variable | someUi null | operator | = null | @@ -1334,15 +1372,12 @@ tag bracket | string-2 | ` -undefined | ) undefined | ; undefined | undefined | comment | // leading blank spaces before inner ending backtick undefined | -keyword | let -null | -def | someUi +variable | someUi null | operator | = null | @@ -1380,15 +1415,12 @@ tag bracket | string-2 | ` -undefined | ) undefined | ; undefined | undefined | comment | // } inside the expression is treated properly too undefined | -keyword | let -null | -def | someUi +variable | someUi null | operator | = null | @@ -1420,7 +1452,6 @@ tag bracket | string-2 | ` -undefined | ) undefined | ; undefined | undefined | @@ -1460,7 +1491,6 @@ tag bracket | string-2 | ` -undefined | ) undefined | ; undefined | undefined | @@ -1519,9 +1549,7 @@ string-2 | ` undefined | ; undefined | undefined | -keyword | const -null | -def | foo +variable | foo null | operator | = null | diff --git a/test/javascript-mixed/code-tokens.txt b/test/javascript-mixed/code-tokens.txt index f3d95e0..39d3c4e 100644 --- a/test/javascript-mixed/code-tokens.txt +++ b/test/javascript-mixed/code-tokens.txt @@ -4,6 +4,8 @@ comment | // @name Test HTML/CSS highlights undefined | comment | // ==/UserScript== undefined | +comment | /* eslint-disable no-undef, no-unused-vars */ +undefined | undefined | variable | document undefined | . @@ -222,7 +224,7 @@ undefined | undefined | comment | // highlight arbitrary string template with a inline comment as a hint undefined | -keyword | const +keyword | let null | def | someHTML null | @@ -249,6 +251,52 @@ string-2 | ` undefined | ; undefined | undefined | +keyword | const +null | +def | someHTMLwithLangHint +null | +operator | = +null | +comment | /*lang = html */ +null | +string-2 | ` +null | some text +tag bracket | < +tag | br +tag bracket | > +tag bracket | < +tag | hr +null | +attribute | class +null | = +string | "acme" +tag bracket | > +string-2 | ` +undefined | ; +undefined | +keyword | const +null | +def | someHTMLwithLanguageHint +null | +operator | = +null | +comment | /*language=html*/ +string-2 | ` +null | some text +tag bracket | < +tag | br +tag bracket | > +tag bracket | < +tag | hr +null | +attribute | class +null | = +string | "acme" +tag bracket | > +string-2 | ` +undefined | ; +undefined | +undefined | keyword | let null | def | someCSS @@ -278,9 +326,7 @@ undefined | undefined | comment | // the inline comment hint also works for tagged string template undefined | -keyword | let -null | -def | someCSS +variable | someCSS null | operator | = null | @@ -354,7 +400,9 @@ null | \n string-2 | ` undefined | ; undefined | -variable | foo +keyword | let +null | +def | foo null | operator | = null | @@ -423,6 +471,7 @@ null | } null | tag | acme string-2 | ` +undefined | ) undefined | ; undefined | variable | bar @@ -489,7 +538,7 @@ undefined | undefined | keyword | var null | -variable | uiHtml +def | uiHtml null | operator | = null | @@ -610,7 +659,7 @@ undefined | undefined | keyword | var null | -variable | dbgMsg +def | dbgMsg null | operator | = null | @@ -626,7 +675,7 @@ comment | // undefined | variable | someElt undefined | . -variable | innerHTML +property | innerHTML null | operator | = null | @@ -676,7 +725,7 @@ undefined | undefined | variable | console undefined | . -variable | debug +property | debug undefined | ( string | ' tag bracket | < @@ -893,15 +942,12 @@ tag bracket | string-2 | ` -undefined | ) undefined | ; undefined | undefined | comment | // works too for backtick in the expression not at the beginning of a token/line. undefined | -keyword | let -null | -def | someUi +variable | someUi null | operator | = null | @@ -929,15 +975,12 @@ tag bracket | string-2 | ` -undefined | ) undefined | ; undefined | undefined | comment | // ensure simple cases work too undefined | -keyword | let -null | -def | someUi +variable | someUi null | operator | = null | @@ -1246,9 +1289,7 @@ undefined | undefined | comment | // ensure nested case works too undefined | -keyword | let -null | -def | someUi +variable | someUi null | operator | = null | @@ -1289,15 +1330,12 @@ tag bracket | string-2 | ` -undefined | ) undefined | ; undefined | undefined | comment | // nested case 2 undefined | -keyword | let -null | -def | someUi +variable | someUi null | operator | = null | @@ -1334,15 +1372,12 @@ tag bracket | string-2 | ` -undefined | ) undefined | ; undefined | undefined | comment | // leading blank spaces before inner ending backtick undefined | -keyword | let -null | -def | someUi +variable | someUi null | operator | = null | @@ -1380,15 +1415,12 @@ tag bracket | string-2 | ` -undefined | ) undefined | ; undefined | undefined | comment | // } inside the expression is treated properly too undefined | -keyword | let -null | -def | someUi +variable | someUi null | operator | = null | @@ -1420,7 +1452,6 @@ tag bracket | string-2 | ` -undefined | ) undefined | ; undefined | undefined | @@ -1460,7 +1491,6 @@ tag bracket | string-2 | ` -undefined | ) undefined | ; undefined | undefined | @@ -1519,9 +1549,7 @@ string-2 | ` undefined | ; undefined | undefined | -keyword | const -null | -def | foo +variable | foo null | operator | = null | diff --git a/test/javascript-mixed/javascript-mixed.test.js b/test/javascript-mixed/javascript-mixed.test.js index 6ff6b6f..a3f86d4 100644 --- a/test/javascript-mixed/javascript-mixed.test.js +++ b/test/javascript-mixed/javascript-mixed.test.js @@ -8,12 +8,12 @@ import path from 'path'; /** * A sanity / regression test for javascript-mixed mode, comparing - * the text in code-source.txt agains the expected tokens + * the text in code-source.js agains the expected tokens * The tokens format can be generated by opening runmode.html, an utitlity to * tokenize any codes. It is also useful to visualize the results in case regression happens. */ test('codemirror javascript-mixed regression', (t) => { - const code = fs.readFileSync(path.join(__dirname, 'code-source.txt'), 'utf8'); + const code = fs.readFileSync(path.join(__dirname, 'code-source.js'), 'utf8'); const expected = fs.readFileSync(path.join(__dirname, 'code-tokens.txt'), 'utf8'); let tokenRes = ''; const EOF_HINT = '// EOF'; // used to signify the end of tokenization diff --git a/test/javascript-mixed/runmode.html b/test/javascript-mixed/runmode.html index 6a06e3d..22c4e24 100644 --- a/test/javascript-mixed/runmode.html +++ b/test/javascript-mixed/runmode.html @@ -68,7 +68,7 @@

Performance Test

} async function loadSources() { - let resp = await fetch('code-source.txt'); + let resp = await fetch('code-source.js'); document.getElementById('code').value = await resp.text(); resp = await fetch('code-perftest-source.txt');