From 8769d5ae2311a31476319748c13fe536e44607c4 Mon Sep 17 00:00:00 2001 From: Elliot Shepherd Date: Thu, 19 Mar 2026 13:52:11 +1100 Subject: [PATCH 1/2] fix: preserve span info by using tokenize_with_location --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index a37f66b..dba54c5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -82,7 +82,7 @@ pub fn parse_sql_with_options( // Note: trailing_commas option support depends on sqlparser version let tokens = sqlparser::tokenizer::Tokenizer::new(dialect_impl.as_ref(), sql) - .tokenize() + .tokenize_with_location() .map_err(|e| { let error = ParseError { message: e.to_string(), @@ -92,7 +92,7 @@ pub fn parse_sql_with_options( serde_wasm_bindgen::to_value(&error).unwrap_or(JsValue::from_str(&e.to_string())) })?; - parser = parser.with_tokens(tokens); + parser = parser.with_tokens_with_locations(tokens); let statements = parser.parse_statements().map_err(|e| { let error = ParseError { From ab0cfae5bd80997fed469047acdc106905fb63e0 Mon Sep 17 00:00:00 2001 From: Elliot Shepherd Date: Thu, 19 Mar 2026 14:03:34 +1100 Subject: [PATCH 2/2] feat: add parseWithComments with correct span serialization --- src/index.ts | 2 +- src/lib.rs | 68 +++++++++++++++++++++++++++++++++++++++++++ src/parser.ts | 18 ++++++++++++ src/types/comments.ts | 23 +++++++++++++++ src/types/index.ts | 1 + src/wasm.ts | 1 + tests/builds.test.ts | 2 ++ tests/parser.test.ts | 59 +++++++++++++++++++++++++++++++++++++ 8 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 src/types/comments.ts diff --git a/src/index.ts b/src/index.ts index 33e4a7c..f8707d5 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,7 +4,7 @@ */ // Parser -export { Parser, init, parse, validate, format } from './parser.js'; +export { Parser, init, parse, parseWithComments, validate, format } from './parser.js'; export type { ParserOptions, DialectInput } from './parser.js'; // Dialects diff --git a/src/lib.rs b/src/lib.rs index dba54c5..9f42034 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ use serde::{Deserialize, Serialize}; +use sqlparser::ast::comments::{Comment as SqlComment, CommentWithSpan}; use sqlparser::dialect::{ AnsiDialect, BigQueryDialect, ClickHouseDialect, DatabricksDialect, Dialect, DuckDbDialect, GenericDialect, HiveDialect, MsSqlDialect, MySqlDialect, OracleDialect, PostgreSqlDialect, @@ -163,6 +164,73 @@ pub fn get_supported_dialects() -> JsValue { serde_wasm_bindgen::to_value(&dialects).unwrap() } +/// A serializable source comment +#[derive(Serialize)] +#[serde(rename_all = "camelCase")] +pub struct SerializedComment { + pub comment_type: String, + pub content: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub prefix: Option, + pub start_line: u64, + pub start_column: u64, + pub end_line: u64, + pub end_column: u64, +} + +impl From<&CommentWithSpan> for SerializedComment { + fn from(c: &CommentWithSpan) -> Self { + let (comment_type, content, prefix) = match &c.comment { + SqlComment::SingleLine { content, prefix } => ( + "singleLine".to_string(), + content.clone(), + Some(prefix.clone()), + ), + SqlComment::MultiLine(content) => ("multiLine".to_string(), content.clone(), None), + }; + SerializedComment { + comment_type, + content, + prefix, + start_line: c.span.start.line, + start_column: c.span.start.column, + end_line: c.span.end.line, + end_column: c.span.end.column, + } + } +} + +/// Parse SQL and return both AST and source comments +#[wasm_bindgen] +pub fn parse_sql_with_comments(dialect: &str, sql: &str) -> Result { + let dialect_impl = get_dialect(dialect); + let (statements, comments) = Parser::parse_sql_with_comments(dialect_impl.as_ref(), sql) + .map_err(|e| { + let error = ParseError { + message: e.to_string(), + line: None, + column: None, + }; + serde_wasm_bindgen::to_value(&error).unwrap_or(JsValue::from_str(&e.to_string())) + })?; + + let comments_vec: Vec = comments.into(); + let serialized_comments: Vec = + comments_vec.iter().map(SerializedComment::from).collect(); + + #[derive(Serialize)] + struct Result { + statements: Vec, + comments: Vec, + } + + serde_wasm_bindgen::to_value(&Result { + statements, + comments: serialized_comments, + }) + .map_err(|e| JsValue::from_str(&format!("Serialization error: {}", e))) +} + /// Validate SQL syntax without returning the full AST #[wasm_bindgen] pub fn validate_sql(dialect: &str, sql: &str) -> Result { diff --git a/src/parser.ts b/src/parser.ts index 20e8da7..d8468a8 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -2,6 +2,7 @@ import type { Dialect, DialectName } from './dialects.js'; import { dialectFromString, GenericDialect } from './dialects.js'; import { ParserError } from './types/errors.js'; import type { Statement } from './types/ast.js'; +import type { ParseWithCommentsResult } from './types/comments.js'; import { getWasmModule } from './wasm.js'; export { init } from './wasm.js'; @@ -87,6 +88,16 @@ export class Parser { return new Parser(resolveDialect(dialect)).parse(sql); } + /** Parse SQL and return both AST and source comments */ + static parseWithComments(sql: string, dialect: DialectInput = 'generic'): ParseWithCommentsResult { + const wasm = getWasmModule(); + try { + return wasm.parse_sql_with_comments(resolveDialect(dialect).name, sql) as ParseWithCommentsResult; + } catch (error) { + throw ParserError.fromWasmError(error); + } + } + /** Parse SQL and return AST as JSON string */ static parseToJson(sql: string, dialect: DialectInput = 'generic'): string { const wasm = getWasmModule(); @@ -145,6 +156,13 @@ export function parse(sql: string, dialect: DialectInput = 'generic'): Statement return Parser.parse(sql, dialect); } +/** + * Parse SQL and return both AST and source comments + */ +export function parseWithComments(sql: string, dialect: DialectInput = 'generic'): ParseWithCommentsResult { + return Parser.parseWithComments(sql, dialect); +} + /** * Validate SQL syntax * @throws ParserError if SQL is invalid diff --git a/src/types/comments.ts b/src/types/comments.ts new file mode 100644 index 0000000..3260fba --- /dev/null +++ b/src/types/comments.ts @@ -0,0 +1,23 @@ +/** A source code comment extracted from parsed SQL */ +export interface SourceComment { + /** "singleLine" for -- comments, "multiLine" for block comments */ + commentType: 'singleLine' | 'multiLine' + /** The comment text content (excluding markers) */ + content: string + /** For single-line comments, the prefix (e.g. "--", "#") */ + prefix?: string + /** Start line (1-based) */ + startLine: number + /** Start column (1-based) */ + startColumn: number + /** End line (1-based) */ + endLine: number + /** End column (1-based) */ + endColumn: number +} + +/** Result of parsing SQL with comments */ +export interface ParseWithCommentsResult { + statements: T[] + comments: SourceComment[] +} diff --git a/src/types/index.ts b/src/types/index.ts index ecf1423..33eb23f 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -1,2 +1,3 @@ export * from './ast.js'; export * from './errors.js'; +export * from './comments.js'; diff --git a/src/wasm.ts b/src/wasm.ts index d124a56..02fdd6b 100644 --- a/src/wasm.ts +++ b/src/wasm.ts @@ -4,6 +4,7 @@ import { WasmInitError } from './types/errors.js'; export interface WasmModule { parse_sql: (dialect: string, sql: string) => unknown; parse_sql_with_options: (dialect: string, sql: string, options: unknown) => unknown; + parse_sql_with_comments: (dialect: string, sql: string) => unknown; parse_sql_to_json_string: (dialect: string, sql: string) => string; parse_sql_to_string: (dialect: string, sql: string) => string; format_sql: (dialect: string, sql: string) => string; diff --git a/tests/builds.test.ts b/tests/builds.test.ts index 6063552..7ba1ba8 100644 --- a/tests/builds.test.ts +++ b/tests/builds.test.ts @@ -16,6 +16,7 @@ describe('ESM build', () => { const esm = await import('../dist/index.mjs'); expect(typeof esm.parse).toBe('function'); + expect(typeof esm.parseWithComments).toBe('function'); expect(typeof esm.format).toBe('function'); expect(typeof esm.validate).toBe('function'); expect(typeof esm.Parser).toBe('function'); @@ -53,6 +54,7 @@ describe('CJS build', () => { const cjs = require('../dist/index.cjs'); expect(typeof cjs.parse).toBe('function'); + expect(typeof cjs.parseWithComments).toBe('function'); expect(typeof cjs.format).toBe('function'); expect(typeof cjs.validate).toBe('function'); expect(typeof cjs.Parser).toBe('function'); diff --git a/tests/parser.test.ts b/tests/parser.test.ts index 587d3f0..e2200a6 100644 --- a/tests/parser.test.ts +++ b/tests/parser.test.ts @@ -15,6 +15,7 @@ import { isUpdate, isDelete, } from './test-utils'; +import { Parser, parseWithComments } from '../src'; describe('Parser', () => { describe('basic parsing', () => { @@ -189,6 +190,64 @@ describe('Comments', () => { }); }); +describe('parseWithComments', () => { + test('returns statements and comments', () => { + const result = Parser.parseWithComments('SELECT 1 -- a comment'); + expect(result.statements).toHaveLength(1); + expect(result.comments).toHaveLength(1); + }); + + test('extracts single-line comment fields', () => { + const result = Parser.parseWithComments('SELECT 1 -- hello'); + const c = result.comments[0]; + expect(c.commentType).toBe('singleLine'); + expect(c.content).toContain('hello'); + expect(c.prefix).toBe('--'); + }); + + test('extracts multi-line comment fields', () => { + const result = Parser.parseWithComments('SELECT /* block */ 1'); + const c = result.comments[0]; + expect(c.commentType).toBe('multiLine'); + expect(c.content).toContain('block'); + expect(c.prefix).toBeUndefined(); + }); + + test('span fields report correct positions', () => { + // "-- span test" starts at column 10 on line 1 + const result = Parser.parseWithComments('SELECT 1 -- span test'); + const c = result.comments[0]; + expect(c.startLine).toBe(1); + expect(c.startColumn).toBe(10); + expect(c.endLine).toBe(1); + expect(c.endColumn).toBe(22); + }); + + test('extracts multiple comments', () => { + const sql = '-- first\nSELECT /* second */ 1 -- third'; + const result = Parser.parseWithComments(sql); + expect(result.comments.length).toBeGreaterThanOrEqual(3); + }); + + test('returns empty comments array when none present', () => { + const result = Parser.parseWithComments('SELECT 1'); + expect(result.statements).toHaveLength(1); + expect(result.comments).toHaveLength(0); + }); + + test('accepts dialect argument', () => { + const result = Parser.parseWithComments('SELECT $1 -- param', 'postgresql'); + expect(result.statements).toHaveLength(1); + expect(result.comments).toHaveLength(1); + }); + + test('convenience function works', () => { + const result = parseWithComments('SELECT 1 -- test'); + expect(result.statements).toHaveLength(1); + expect(result.comments).toHaveLength(1); + }); +}); + describe('Whitespace handling', () => { test('handles extra whitespace', () => { parseOne('SELECT 1 FROM t');