`) are transparently skipped, since comment bytes
+ // never update this flag. Only consulted when `last_sig` is identifier-like.
+ let mut prev_is_expr_keyword = false;
while index < bytes.len() {
- match bytes[index] {
- b'\'' | b'"' => {
- index = skip_string_literal(bytes, index);
- }
- b'`' => {
- index = skip_template_literal(bytes, index);
- }
- b'/' if bytes.get(index + 1) == Some(&b'/') => {
- let comment_start = index;
- let content_start = index + 2;
- index = content_start;
-
- while index < bytes.len() && bytes[index] != b'\n' {
- index += 1;
+ match stack.last_mut().expect("scanner stack is never empty") {
+ Ctx::Code {
+ brace_depth,
+ jsx_expr,
+ } => match bytes[index] {
+ b'\'' | b'"' => {
+ index = skip_string_literal(bytes, index);
+ last_sig = b'"';
}
+ b'`' => {
+ index = skip_template_literal(bytes, index);
+ last_sig = b'`';
+ }
+ b'/' if bytes.get(index + 1) == Some(&b'/') => {
+ let comment_start = index;
+ let content_start = index + 2;
+ index = content_start;
- comments.push(SourceComment {
- byte_offset: comment_start,
- content: &source[content_start..index],
- kind: CommentKind::Line,
- });
- }
- b'/' if bytes.get(index + 1) == Some(&b'*') => {
- let comment_start = index;
- let content_start = index + 2;
- index = content_start;
-
- while index < bytes.len() {
- if bytes[index] == b'*' && bytes.get(index + 1) == Some(&b'/') {
- break;
+ while index < bytes.len() && bytes[index] != b'\n' {
+ index += 1;
}
- index += 1;
+
+ comments.push(SourceComment {
+ byte_offset: comment_start,
+ content: &source[content_start..index],
+ kind: CommentKind::Line,
+ });
}
+ b'/' if bytes.get(index + 1) == Some(&b'*') => {
+ let comment_start = index;
+ let content_start = index + 2;
+ index = content_start;
+
+ while index < bytes.len() {
+ if bytes[index] == b'*' && bytes.get(index + 1) == Some(&b'/') {
+ break;
+ }
+ index += 1;
+ }
+
+ let content_end = index;
+ if index < bytes.len() {
+ index += 2;
+ }
- let content_end = index;
- if index < bytes.len() {
+ comments.push(SourceComment {
+ byte_offset: comment_start,
+ content: &source[content_start..content_end],
+ kind: CommentKind::Block,
+ });
+ }
+ b'{' => {
+ *brace_depth += 1;
+ last_sig = b'{';
+ index += 1;
+ }
+ b'}' => {
+ if *brace_depth > 0 {
+ *brace_depth -= 1;
+ } else if *jsx_expr {
+ // Closes a JSX expression container; back to children.
+ stack.pop();
+ }
+ last_sig = b'}';
+ index += 1;
+ }
+ b'<' if is_jsx_tag_start(bytes, index, last_sig, prev_is_expr_keyword) => {
+ stack.push(Ctx::JsxTag);
+ index += 1;
+ }
+ b if is_ident_byte(b) => {
+ // Consume the whole identifier/number run and record whether
+ // it is an expression-starting keyword. A keyword reached via
+ // member access (`obj.return`) is a property name, not the
+ // keyword, so it does not count.
+ let start = index;
+ while index < bytes.len() && is_ident_byte(bytes[index]) {
+ index += 1;
+ }
+ last_sig = bytes[index - 1];
+ prev_is_expr_keyword = !preceded_by_dot(bytes, start)
+ && is_expression_keyword(&bytes[start..index]);
+ }
+ b' ' | b'\t' | b'\r' | b'\n' => {
+ index += 1;
+ }
+ other => {
+ last_sig = other;
+ prev_is_expr_keyword = false;
+ index += 1;
+ }
+ },
+ Ctx::JsxTag => match bytes[index] {
+ b'\'' | b'"' => {
+ index = skip_string_literal(bytes, index);
+ }
+ b'{' => {
+ stack.push(Ctx::Code {
+ brace_depth: 0,
+ jsx_expr: true,
+ });
+ index += 1;
+ }
+ b'/' if bytes.get(index + 1) == Some(&b'>') => {
+ // Self-closing tag: no children.
+ stack.pop();
index += 2;
+ last_sig = b']';
}
-
- comments.push(SourceComment {
- byte_offset: comment_start,
- content: &source[content_start..content_end],
- kind: CommentKind::Block,
- });
- }
- _ => {
- index += 1;
- }
+ b'>' => {
+ stack.pop();
+ stack.push(Ctx::JsxChildren);
+ index += 1;
+ }
+ b',' | b';' => {
+ // Not a real JSX tag (e.g. a TS generic such as `
`).
+ // Pop only the tentative tag frame, returning to the actual
+ // surrounding context (the code frame that saw the `<`,
+ // which may itself be a JSX expression container with its own
+ // brace depth), and re-scan this byte there.
+ stack.pop();
+ }
+ _ => {
+ index += 1;
+ }
+ },
+ Ctx::JsxChildren => match bytes[index] {
+ b'{' => {
+ stack.push(Ctx::Code {
+ brace_depth: 0,
+ jsx_expr: true,
+ });
+ index += 1;
+ }
+ b'<' if bytes.get(index + 1) == Some(&b'/') => {
+ // Closing tag ``: skip to `>` and leave children.
+ index += 2;
+ while index < bytes.len() && bytes[index] != b'>' {
+ index += 1;
+ }
+ if index < bytes.len() {
+ index += 1;
+ }
+ stack.pop();
+ last_sig = b']';
+ }
+ b'<' => {
+ // Nested element open tag.
+ stack.push(Ctx::JsxTag);
+ index += 1;
+ }
+ _ => {
+ // Text content — quotes/backticks/`//` are literal here.
+ index += 1;
+ }
+ },
}
}
comments
}
+fn is_ident_byte(b: u8) -> bool {
+ b.is_ascii_alphanumeric() || b == b'_' || b == b'$'
+}
+
+/// Keywords after which a `<` begins an expression, so it may open a JSX element
+/// (`return `, `export default
`, `yield
`, …). In a comparison or
+/// generic the byte before `<` is an operand identifier, never one of these, so
+/// allowing them does not reintroduce false positives.
+fn is_expression_keyword(word: &[u8]) -> bool {
+ matches!(
+ word,
+ b"return"
+ | b"yield"
+ | b"await"
+ | b"void"
+ | b"typeof"
+ | b"delete"
+ | b"throw"
+ | b"do"
+ | b"else"
+ | b"case"
+ | b"default"
+ | b"in"
+ | b"of"
+ | b"instanceof"
+ )
+}
+
+/// A byte that ends a value/expression, after which a `<` is a comparison or TS
+/// generic rather than the start of a JSX element.
+fn is_value_ending(b: u8) -> bool {
+ is_ident_byte(b) || matches!(b, b')' | b']' | b'.' | b'"' | b'\'' | b'`')
+}
+
+/// Whether the `<` at `index` (in code context) opens a JSX element. Requires an
+/// expression position and a following tag-name char or `>` (fragment). This
+/// matches `.tsx` semantics, where a bare `` is JSX and a generic arrow must
+/// be written `` (handled by the `,` bail in `JsxTag`).
+///
+/// `last_sig` is the preceding significant byte; `prev_is_expr_keyword` is set by
+/// the caller when that byte ends an expression-starting keyword (e.g. `return`).
+/// When the preceding byte is identifier-like it is normally a value (so `<` is a
+/// comparison/generic) — unless it was such a keyword.
+fn is_jsx_tag_start(bytes: &[u8], index: usize, last_sig: u8, prev_is_expr_keyword: bool) -> bool {
+ let next_is_tag = match bytes.get(index + 1) {
+ Some(&b) => b.is_ascii_alphabetic() || b == b'_' || b == b'$' || b == b'>',
+ None => false,
+ };
+ if !next_is_tag {
+ return false;
+ }
+
+ if !is_value_ending(last_sig) {
+ return true;
+ }
+
+ // The preceding byte ends a value. Only an identifier ending might actually
+ // be an expression-starting keyword (e.g. `return`); other value endings
+ // (`)`, `]`, `.`, quotes) never are.
+ is_ident_byte(last_sig) && prev_is_expr_keyword
+}
+
+/// Whether the identifier starting at `start` is reached via member access
+/// (`obj.return`), in which case it is a property name, not a keyword. Skips
+/// whitespace so `obj. return` is also recognised as a property access.
+fn preceded_by_dot(bytes: &[u8], start: usize) -> bool {
+ let mut i = start;
+ while i > 0 && matches!(bytes[i - 1], b' ' | b'\t' | b'\r' | b'\n') {
+ i -= 1;
+ }
+ i > 0 && bytes[i - 1] == b'.'
+}
+
fn skip_string_literal(bytes: &[u8], start: usize) -> usize {
let delim = bytes[start];
let mut i = start + 1;
@@ -77,6 +322,14 @@ fn skip_string_literal(bytes: &[u8], start: usize) -> usize {
i = (i + 2).min(bytes.len());
} else if bytes[i] == delim {
return i + 1;
+ } else if bytes[i] == b'\n' {
+ // A single/double-quoted JS string cannot contain an unescaped
+ // newline. Hitting one means the opening quote was not a string
+ // delimiter (e.g. an apostrophe in JSX text like `'`).
+ // Treat it as a plain character so scanning resumes — otherwise we
+ // would skip ahead to the next stray quote and swallow any comments
+ // (including lingui directives) in between.
+ return start + 1;
} else {
i += 1;
}
@@ -312,6 +565,96 @@ mod tests {
assert_eq!(line_comments(source), vec![(39, " real")]);
}
+ #[test]
+ fn quote_in_jsx_text_does_not_swallow_following_comment() {
+ // The apostrophe is JSX text, not a string opener. A real string can't
+ // span the newline, so the following comment must still be found.
+ let source = "const x = ';\n// real";
+ assert_eq!(line_comments(source), vec![(28, " real")]);
+ }
+
+ #[test]
+ fn quote_in_jsx_text_after_return_keyword_does_not_swallow_comment() {
+ // `return` is an expression-starting keyword, so the `<` opens JSX even
+ // though the byte before it is identifier-like.
+ let source = "function f() { return '
; }\n// real";
+ assert_eq!(line_comments(source), vec![(38, " real")]);
+ }
+
+ #[test]
+ fn jsx_after_export_default_is_detected() {
+ let source = "export default ';\n// real";
+ assert_eq!(line_comments(source), vec![(29, " real")]);
+ }
+
+ #[test]
+ fn identifier_ending_in_keyword_is_not_treated_as_jsx() {
+ // `myreturn` is a value identifier, so `<` is a comparison (the lack of
+ // a space still forces the keyword check via `next_is_tag`), and the
+ // quoted text after it is a normal string literal (not JSX text).
+ let source = "const a = myreturn` inside a JSX expression container is a generic arrow, not a
+ // nested element. Bailing must return to *that container* so its closing
+ // `}` returns to JSX children. Otherwise the lone backtick in the JSX
+ // text below would be treated as a template opener in stray code and run
+ // away to EOF, swallowing the comment.
+ let source = "const x = {((v)=>v)}`
;\n// real";
+ assert_eq!(line_comments(source), vec![(34, " real")]);
+ }
+
+ #[test]
+ fn backtick_in_jsx_text_does_not_swallow_following_comment() {
+ // A backtick is JSX text, not a template-literal opener. Real templates
+ // span newlines, so only JSX awareness (not a newline bail) saves us.
+ let source = "const x = () => `
;\n// real";
+ assert_eq!(line_comments(source), vec![(26, " real")]);
+ }
+
+ #[test]
+ fn template_literal_inside_jsx_expression_is_still_skipped() {
+ // Inside a JSX expression container `{...}` we are back in code, so a
+ // real template literal must still be skipped (its `//` is not a comment).
+ let source = "const x = {`// not a comment`}
;\n// real";
+ assert_eq!(line_comments(source), vec![(39, " real")]);
+ }
+
+ #[test]
+ fn ts_generic_is_not_mistaken_for_jsx() {
+ // `Record` is a generic, not JSX; a following comment and
+ // its quotes must scan normally.
+ let source = "const x: Record = {};\n// real";
+ assert_eq!(line_comments(source), vec![(33, " real")]);
+ }
+
+ #[test]
+ fn lone_quote_before_newline_is_not_a_string() {
+ // A lone quote followed by a newline is not a string literal, so a
+ // comment on a later line must still be discovered.
+ let source = "a = ';\nb = '; // c\n";
+ assert_eq!(line_comments(source), vec![(14, " c")]);
+ }
+
#[test]
fn string_containing_backslash_n_is_not_newline() {
// The literal text \n in a string (escaped), not a real newline
diff --git a/crates/lingui_macro/tests/lingui_directive.rs b/crates/lingui_macro/tests/lingui_directive.rs
index 6865119..5aac9ea 100644
--- a/crates/lingui_macro/tests/lingui_directive.rs
+++ b/crates/lingui_macro/tests/lingui_directive.rs
@@ -253,6 +253,29 @@ to!(
"#
);
+to!(
+ jsx_trans_with_unclosed_quotes,
+ LinguiOptions {
+ id_prefix_leader: Some(".".into()),
+ ..Default::default()
+ },
+ r#"
+ // lingui-set idPrefix="root"
+ import type { MessageDescriptor } from '@lingui/core'
+ import { msg, t } from '@lingui/core/macro'
+ import { Trans } from '@lingui/react/macro'
+
+ const X = () => '
+ const Y = () => `
+
+ // lingui-set idPrefix="different"
+ const different = {
+ a: msg({ id: '.a', message: `different a` }),
+ b: msg({ id: '.b', message: `different b` }),
+ } as const satisfies Record
+ "#
+);
+
to!(
jsx_trans_with_directive_context,
r#"
diff --git a/crates/lingui_macro/tests/snapshots/lingui_directive__jsx_trans_with_unclosed_quotes.snap b/crates/lingui_macro/tests/snapshots/lingui_directive__jsx_trans_with_unclosed_quotes.snap
new file mode 100644
index 0000000..47d14ec
--- /dev/null
+++ b/crates/lingui_macro/tests/snapshots/lingui_directive__jsx_trans_with_unclosed_quotes.snap
@@ -0,0 +1,36 @@
+---
+source: crates/lingui_macro/tests/lingui_directive.rs
+info:
+ id_prefix_leader: "."
+---
+// lingui-set idPrefix="root"
+import type { MessageDescriptor } from '@lingui/core'
+import { msg, t } from '@lingui/core/macro'
+import { Trans } from '@lingui/react/macro'
+
+const X = () => '
+const Y = () => `
+
+// lingui-set idPrefix="different"
+const different = {
+ a: msg({ id: '.a', message: `different a` }),
+ b: msg({ id: '.b', message: `different b` }),
+} as const satisfies Record
+
+↓ ↓ ↓ ↓ ↓ ↓
+
+// lingui-set idPrefix="root"
+import type { MessageDescriptor } from '@lingui/core';
+const X = ()=>'
;
+const Y = ()=>`
;
+// lingui-set idPrefix="different"
+const different = {
+ a: /*i18n*/ {
+ id: "different.a",
+ message: "different a"
+ },
+ b: /*i18n*/ {
+ id: "different.b",
+ message: "different b"
+ }
+} as const satisfies Record;