From 2da65e358292b8c6ffd49198c91bb5e04c23153d Mon Sep 17 00:00:00 2001
From: Saptak Manna <saptakmanna100@gmail.com>
Date: Fri, 5 Jun 2026 18:57:20 +0530
Subject: [PATCH 1/6] add basic files setup for lexer

---
 internal/sql/lexer/errors.go   |   8 ++
 internal/sql/lexer/peekable.go |  68 +++++++++++++++++
 internal/sql/lexer/position.go |  37 +++++++++
 internal/sql/lexer/tokens.go   | 133 +++++++++++++++++++++++++++++++++
 4 files changed, 246 insertions(+)
 create mode 100644 internal/sql/lexer/errors.go
 create mode 100644 internal/sql/lexer/peekable.go
 create mode 100644 internal/sql/lexer/position.go
 create mode 100644 internal/sql/lexer/tokens.go

diff --git a/internal/sql/lexer/errors.go b/internal/sql/lexer/errors.go
new file mode 100644
index 0000000..74f0c62
--- /dev/null
+++ b/internal/sql/lexer/errors.go
@@ -0,0 +1,8 @@
+package lexer
+
+import "errors"
+
+var (
+	// ErrUnexpectedToken is returned when the lexer encounters an unexpected token.
+	ErrUnexpectedToken = errors.New("unexpected token")
+)
diff --git a/internal/sql/lexer/peekable.go b/internal/sql/lexer/peekable.go
new file mode 100644
index 0000000..e873112
--- /dev/null
+++ b/internal/sql/lexer/peekable.go
@@ -0,0 +1,68 @@
+package lexer
+
+// PeekableIterator provides up to 1 element lookahead over a stream of elements.
+//
+// It is intended for use by tokenizers and parsers to enforce an LL(1) design:
+// only one element may be observed ahead of the consumption point. The backing
+// stream is a simple function, so no buffering array is needed and arbitrarily
+// large inputs are supported with no extra overhead.
+//
+// This type is not safe for concurrent use.
+type PeekableIterator[T any] struct {
+	nextFn func() T // produces the next element on demand
+	peeked *T       // buffered lookahead element, nil if not yet peeked
+	count  int      // number of elements consumed via Next()
+}
+
+// NewPeekableIterator creates a PeekableIterator backed by the given function.
+// nextFn is called each time a new element is needed.
+func NewPeekableIterator[T any](nextFn func() T) *PeekableIterator[T] {
+	return &PeekableIterator[T]{nextFn: nextFn}
+}
+
+// Peek returns the next element without consuming it.
+// Successive calls return the same element until Next() is called.
+func (p *PeekableIterator[T]) Peek() T {
+	if p.peeked != nil {
+		return *p.peeked
+	}
+	v := p.nextFn()
+	p.peeked = &v
+	return v
+}
+
+// Next consumes and returns the next element.
+func (p *PeekableIterator[T]) Next() T {
+	var v T
+	if p.peeked != nil {
+		v = *p.peeked
+		p.peeked = nil
+	} else {
+		v = p.nextFn()
+	}
+	p.count++
+	return v
+}
+
+// ExpectNextValue consumes and returns the next element if it equals expected.
+// Returns a pointer to the consumed element, or nil if it did not match.
+func (p *PeekableIterator[T]) ExpectNextValue(expected T, eq func(a, b T) bool) *T {
+	return p.ExpectNextMatches(func(v T) bool { return eq(v, expected) })
+}
+
+// ExpectNextMatches consumes and returns the next element if it satisfies the
+// predicate. Returns a pointer to the consumed element, or nil otherwise.
+// The predicate is called at most once.
+func (p *PeekableIterator[T]) ExpectNextMatches(predicate func(T) bool) *T {
+	v := p.Peek()
+	if predicate(v) {
+		p.Next() // consume
+		return &v
+	}
+	return nil
+}
+
+// Count returns the number of elements consumed via Next() so far.
+func (p *PeekableIterator[T]) Count() int {
+	return p.count
+}
diff --git a/internal/sql/lexer/position.go b/internal/sql/lexer/position.go
new file mode 100644
index 0000000..8610246
--- /dev/null
+++ b/internal/sql/lexer/position.go
@@ -0,0 +1,37 @@
+package lexer
+
+// Position represents a position in the source input.
+// It tracks three values:
+//
+//   - Index:  absolute character offset from the start of the entire input (0-based).
+//     It counts every character (including newlines) and never resets.
+//   - Line:   the current line number (1-based). Increments only when a '\n' is encountered.
+//   - Column: the position within the current line (1-based). Resets to 1 on every new line.
+type Position struct {
+	Index  int
+	Line   int
+	Column int
+}
+
+// NewPosition returns a Position initialised to the start of the input.
+func NewPosition() Position {
+	return Position{
+		Index:  0,
+		Line:   1,
+		Column: 1,
+	}
+}
+
+// Advance moves the position forward by one character.
+// If the character is a newline ('\n'), the line number is incremented
+// and the column is reset to 1. Otherwise the column is incremented.
+// The absolute index is always incremented by 1.
+func (p *Position) Advance(ch rune) {
+	p.Index++
+	if ch == '\n' {
+		p.Line++
+		p.Column = 1
+	} else {
+		p.Column++
+	}
+}
diff --git a/internal/sql/lexer/tokens.go b/internal/sql/lexer/tokens.go
new file mode 100644
index 0000000..244fce3
--- /dev/null
+++ b/internal/sql/lexer/tokens.go
@@ -0,0 +1,133 @@
+package lexer
+
+import "fmt"
+
+// TokenType represents the type of a lexical token.
+// The underlying string value doubles as the human-readable name,
+// so no separate name map is needed.
+type TokenType string
+
+const (
+	// Special
+	T_INVALID TokenType = "INVALID"
+	T_EOF     TokenType = "EOF"
+
+	// Identifiers and literals
+	T_IDENT      TokenType = "IDENT"
+	T_INT_LIT    TokenType = "INT_LIT"
+	T_FLOAT_LIT  TokenType = "FLOAT_LIT"
+	T_STRING_LIT TokenType = "STRING_LIT"
+
+	// Arithmetic operators
+	T_PLUS     TokenType = "+"
+	T_MINUS    TokenType = "-"
+	T_ASTERISK TokenType = "*"
+	T_SLASH    TokenType = "/"
+	T_MODULO   TokenType = "%"
+
+	// Comparison operators
+	T_EQUAL         TokenType = "="
+	T_NOT_EQUAL     TokenType = "!="
+	T_DIAMOND       TokenType = "<>"
+	T_LESS          TokenType = "<"
+	T_GREATER       TokenType = ">"
+	T_LESS_EQUAL    TokenType = "<="
+	T_GREATER_EQUAL TokenType = ">="
+
+	// Delimiters
+	T_COMMA  TokenType = ","
+	T_SEMI   TokenType = ";"
+	T_LPAREN TokenType = "("
+	T_RPAREN TokenType = ")"
+
+	// Database manipulation
+	T_CREATE   TokenType = "CREATE"
+	T_DROP     TokenType = "DROP"
+	T_DATABASE TokenType = "DATABASE"
+	T_USE      TokenType = "USE"
+
+	// Table manipulation
+	T_TABLE  TokenType = "TABLE"
+	T_ALTER  TokenType = "ALTER"
+	T_ADD    TokenType = "ADD"
+	T_MODIFY TokenType = "MODIFY"
+	T_COLUMN TokenType = "COLUMN"
+	T_RENAME TokenType = "RENAME"
+	T_TO     TokenType = "TO"
+
+	// Column constraints
+	T_PRIMARY TokenType = "PRIMARY"
+	T_KEY     TokenType = "KEY"
+	T_UNIQUE  TokenType = "UNIQUE"
+	T_NOT     TokenType = "NOT"
+	T_NULL    TokenType = "NULL"
+	T_DEFAULT TokenType = "DEFAULT"
+
+	// Data types
+	T_INT       TokenType = "INT"
+	T_BIGINT    TokenType = "BIGINT"
+	T_VARCHAR   TokenType = "VARCHAR"
+	T_BOOLEAN   TokenType = "BOOLEAN"
+	T_TEXT      TokenType = "TEXT"
+	T_TIMESTAMP TokenType = "TIMESTAMP"
+
+	// SELECT
+	T_SELECT TokenType = "SELECT"
+	T_FROM   TokenType = "FROM"
+	T_WHERE  TokenType = "WHERE"
+	T_LIMIT  TokenType = "LIMIT"
+	T_AS     TokenType = "AS"
+
+	// INSERT
+	T_INSERT TokenType = "INSERT"
+	T_INTO   TokenType = "INTO"
+	T_VALUES TokenType = "VALUES"
+
+	// UPDATE / DELETE
+	T_UPDATE TokenType = "UPDATE"
+	T_SET    TokenType = "SET"
+	T_DELETE TokenType = "DELETE"
+
+	// Logical operators
+	T_AND TokenType = "AND"
+	T_OR  TokenType = "OR"
+
+	// Literals
+	T_TRUE  TokenType = "TRUE"
+	T_FALSE TokenType = "FALSE"
+)
+
+// LookupKeyword returns the keyword TokenType for the given identifier string.
+// If the string is not a keyword, it returns T_INVALID.
+// The caller should pass the uppercased string.
+//
+// Since TokenType is a string and every keyword const equals its SQL text,
+// we simply cast and check via a switch — no extra map or slice needed.
+func LookupKeyword(ident string) TokenType {
+	switch TokenType(ident) {
+	case T_CREATE, T_DROP, T_DATABASE, T_USE,
+		T_TABLE, T_ALTER, T_ADD, T_MODIFY, T_COLUMN, T_RENAME, T_TO,
+		T_PRIMARY, T_KEY, T_UNIQUE, T_NOT, T_NULL, T_DEFAULT,
+		T_INT, T_BIGINT, T_VARCHAR, T_BOOLEAN, T_TEXT, T_TIMESTAMP,
+		T_SELECT, T_FROM, T_WHERE, T_LIMIT, T_AS,
+		T_INSERT, T_INTO, T_VALUES,
+		T_UPDATE, T_SET, T_DELETE,
+		T_AND, T_OR,
+		T_TRUE, T_FALSE:
+		return TokenType(ident)
+	default:
+		return T_INVALID
+	}
+}
+
+// Token represents a single lexical token produced by the lexer.
+type Token struct {
+	Type    TokenType // the type of the token
+	Literal string    // the raw text of the token from the source input
+	Pos     Position  // the position of the first character of the token
+}
+
+// String returns a human-readable representation of the token.
+func (t Token) String() string {
+	return fmt.Sprintf("%s(%q) at %d:%d", t.Type, t.Literal, t.Pos.Line, t.Pos.Column)
+}

From a0d4c720b3f129b1b02294d28b2ce4c81ffd843e Mon Sep 17 00:00:00 2001
From: rahulc0dy <rc645312@gmail.com>
Date: Sun, 7 Jun 2026 17:05:02 +0530
Subject: [PATCH 2/6] Implement SQL lexer

---
 internal/sql/lexer/errors.go                  |   9 +
 internal/sql/lexer/keywords.go                |  98 ++++++
 internal/sql/lexer/lexer.go                   | 211 ++++++++++++
 .../sql/lexer/{peekable.go => lookahead.go}   |  27 +-
 internal/sql/lexer/tokens.go                  | 319 ++++++++++++------
 5 files changed, 537 insertions(+), 127 deletions(-)
 create mode 100644 internal/sql/lexer/keywords.go
 create mode 100644 internal/sql/lexer/lexer.go
 rename internal/sql/lexer/{peekable.go => lookahead.go} (58%)

diff --git a/internal/sql/lexer/errors.go b/internal/sql/lexer/errors.go
index 74f0c62..916192d 100644
--- a/internal/sql/lexer/errors.go
+++ b/internal/sql/lexer/errors.go
@@ -5,4 +5,13 @@ import "errors"
 var (
 	// ErrUnexpectedToken is returned when the lexer encounters an unexpected token.
 	ErrUnexpectedToken = errors.New("unexpected token")
+
+	// ErrUnexpectedEOF is returned when the lexer encounters the end of input.
+	ErrUnexpectedEOF = errors.New("unexpected EOF")
+
+	// ErrUnterminatedString is returned when the lexer encounters an unterminated string literal.
+	ErrUnterminatedString = errors.New("unterminated string")
+
+	// ErrUnterminatedBlockComment is returned when the lexer encounters an unterminated block comment.
+	ErrUnterminatedBlockComment = errors.New("unterminated block comment")
 )
diff --git a/internal/sql/lexer/keywords.go b/internal/sql/lexer/keywords.go
new file mode 100644
index 0000000..8cfb371
--- /dev/null
+++ b/internal/sql/lexer/keywords.go
@@ -0,0 +1,98 @@
+package lexer
+
+import "strings"
+
+// keywords maps the canonical (upper-case) spelling of every reserved word to
+// its TokenType. The lookup is always done on the upper-cased form of whatever
+// the source contained, giving the grammar its case-insensitive keyword
+// semantics while leaving Token.Literal in its original casing.
+var keywords = map[string]TokenType{
+	// DDL / database
+	"CREATE":   TOKEN_CREATE,
+	"DATABASE": TOKEN_DATABASE,
+	"USE":      TOKEN_USE,
+	"DROP":     TOKEN_DROP,
+	"IF":       TOKEN_IF,
+	"EXISTS":   TOKEN_EXISTS,
+
+	// Table DDL
+	"TABLE":  TOKEN_TABLE,
+	"ALTER":  TOKEN_ALTER,
+	"ADD":    TOKEN_ADD,
+	"COLUMN": TOKEN_COLUMN,
+	"MODIFY": TOKEN_MODIFY,
+	"RENAME": TOKEN_RENAME,
+	"TO":     TOKEN_TO,
+
+	// DML
+	"SELECT":   TOKEN_SELECT,
+	"DISTINCT": TOKEN_DISTINCT,
+	"ALL":      TOKEN_ALL,
+	"FROM":     TOKEN_FROM,
+	"WHERE":    TOKEN_WHERE,
+	"AS":       TOKEN_AS,
+	"INSERT":   TOKEN_INSERT,
+	"INTO":     TOKEN_INTO,
+	"VALUES":   TOKEN_VALUES,
+	"UPDATE":   TOKEN_UPDATE,
+	"SET":      TOKEN_SET,
+	"DELETE":   TOKEN_DELETE,
+
+	// JOIN
+	"JOIN":  TOKEN_JOIN,
+	"INNER": TOKEN_INNER,
+	"LEFT":  TOKEN_LEFT,
+	"RIGHT": TOKEN_RIGHT,
+	"FULL":  TOKEN_FULL,
+	"OUTER": TOKEN_OUTER,
+	"CROSS": TOKEN_CROSS,
+	"ON":    TOKEN_ON,
+
+	// Clauses
+	"GROUP":  TOKEN_GROUP,
+	"BY":     TOKEN_BY,
+	"HAVING": TOKEN_HAVING,
+	"ORDER":  TOKEN_ORDER,
+	"ASC":    TOKEN_ASC,
+	"DESC":   TOKEN_DESC,
+	"LIMIT":  TOKEN_LIMIT,
+	"OFFSET": TOKEN_OFFSET,
+
+	// Constraints
+	"PRIMARY":    TOKEN_PRIMARY,
+	"KEY":        TOKEN_KEY,
+	"NOT":        TOKEN_NOT,
+	"NULL":       TOKEN_NULL,
+	"DEFAULT":    TOKEN_DEFAULT,
+	"UNIQUE":     TOKEN_UNIQUE,
+	"REFERENCES": TOKEN_REFERENCES,
+
+	// Logical / predicates
+	"AND":     TOKEN_AND,
+	"OR":      TOKEN_OR,
+	"TRUE":    TOKEN_TRUE,
+	"FALSE":   TOKEN_FALSE,
+	"LIKE":    TOKEN_LIKE,
+	"IS":      TOKEN_IS,
+	"IN":      TOKEN_IN,
+	"BETWEEN": TOKEN_BETWEEN,
+
+	// Data types
+	"INT":       TOKEN_INT,
+	"BIGINT":    TOKEN_BIGINT,
+	"VARCHAR":   TOKEN_VARCHAR,
+	"BOOLEAN":   TOKEN_BOOLEAN,
+	"TEXT":      TOKEN_TEXT,
+	"TIMESTAMP": TOKEN_TIMESTAMP,
+}
+
+// lookupIdent returns the keyword TokenType for s if it is a reserved word,
+// or TOKEN_IDENT if it is a plain user-defined name.
+// The comparison is case-insensitive: "select", "SELECT", and "SeLeCt" all
+// resolve to TOKEN_SELECT.
+func lookupIdent(s string) TokenType {
+	if tt, ok := keywords[strings.ToUpper(s)]; ok {
+		return tt
+	}
+	return TOKEN_IDENT
+}
diff --git a/internal/sql/lexer/lexer.go b/internal/sql/lexer/lexer.go
new file mode 100644
index 0000000..b744736
--- /dev/null
+++ b/internal/sql/lexer/lexer.go
@@ -0,0 +1,211 @@
+package lexer
+
+// Lexer tokenizes SQL source text into a stream of Tokens
+type Lexer struct {
+	src []rune   // full input as runes
+	pos Position // current read cursor(line, col, index)
+}
+
+// NewLexer creates a Lexer fro the given SQL source string
+func NewLexer(src string) *Lexer {
+	return &Lexer{
+		src: []rune(src),
+		pos: NewPosition(),
+	}
+}
+
+func (l *Lexer) peek() rune {
+	if l.pos.Index >= len(l.src) {
+		return 0
+	}
+	return l.src[l.pos.Index]
+}
+
+func (l *Lexer) advance() rune {
+	ch := l.src[l.pos.Index]
+	l.pos.Advance(ch)
+	return ch
+}
+
+// skipWhitespace consumes spaces, tabs, \r, \n
+func (l *Lexer) skipWhitespace() {
+	for l.pos.Index < len(l.src) {
+		ch := l.src[l.pos.Index]
+		if ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' {
+			l.pos.Advance(ch)
+		} else {
+			break
+		}
+	}
+}
+
+// makeToken is a convenience to build a Token with the given fields.
+func (l *Lexer) makeToken(typ TokenType, lit string, line, col int) Token {
+	return Token{Type: typ, Literal: lit, Line: line, Col: col}
+}
+
+// scanIdentifier reads a keyword or user identifier.
+// Precondition: peek() is a letter.
+func (l *Lexer) scanIdentifier() Token {
+	startLine, startCol := l.pos.Line, l.pos.Column
+	start := l.pos.Index
+	for l.pos.Index < len(l.src) {
+		ch := l.src[l.pos.Index]
+		if isLetter(ch) || isDigit(ch) || ch == '_' {
+			l.pos.Advance(ch)
+		} else {
+			break
+		}
+	}
+	lit := string(l.src[start:l.pos.Index])
+	typ := lookupIdent(lit) // keyword or TOKEN_IDENT
+	return l.makeToken(typ, lit, startLine, startCol)
+}
+
+func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') }
+func isDigit(ch rune) bool  { return ch >= '0' && ch <= '9' }
+
+func (l *Lexer) scanNumber() Token {
+	startLine, startCol := l.pos.Line, l.pos.Column
+	start := l.pos.Index
+	isFloat := false
+
+	// Leading '.' case
+	if l.peek() == '.' {
+		isFloat = true
+		l.advance()
+	}
+
+	// Digits consumption
+	for l.pos.Index < len(l.src) && isDigit((l.src[l.pos.Index])) {
+		l.advance()
+	}
+
+	// Decimal check
+	if !isFloat && l.peek() == '.' {
+		nextIdx := l.pos.Index + 1
+		if nextIdx >= len(l.src) || isDigit(l.src[nextIdx]) || !isLetter(l.src[nextIdx]) && l.src[nextIdx] != '_' {
+			isFloat = true
+			l.advance()
+			for l.pos.Index < len(l.src) && isDigit((l.src[l.pos.Index])) {
+				l.advance()
+			}
+		}
+	}
+
+	lit := string(l.src[start:l.pos.Index])
+	if isFloat {
+		return l.makeToken(TOKEN_FLOAT, lit, startLine, startCol)
+	}
+	return l.makeToken(TOKEN_INTEGER, lit, startLine, startCol)
+}
+
+func (l *Lexer) scanString() Token {
+	startLine, startCol := l.pos.Line, l.pos.Column
+	l.advance()
+
+	var buf []rune
+	for {
+		if l.pos.Index >= len(l.src) {
+			return l.makeToken(TOKEN_ILLEGAL, string(buf), startLine, startCol)
+		}
+		ch := l.advance()
+		if ch == '\'' {
+			if l.peek() == '\'' {
+				l.advance()
+				buf = append(buf, '\'')
+			} else {
+				break
+			}
+		} else {
+			buf = append(buf, ch)
+		}
+	}
+
+	return l.makeToken(TOKEN_STRING, string(buf), startLine, startCol)
+}
+
+func (l *Lexer) NextToken() Token {
+	l.skipWhitespace()
+
+	if l.pos.Index >= len(l.src) {
+		return l.makeToken(TOKEN_EOF, "", l.pos.Line, l.pos.Column)
+	}
+
+	startLine, startCol := l.pos.Line, l.pos.Column
+	ch := l.peek()
+
+	// Identifier or Keyword
+	if isLetter(ch) {
+		return l.scanIdentifier()
+	}
+
+	// Number
+	if isDigit(ch) {
+		return l.scanNumber()
+	}
+
+	if ch == '.' {
+		nextIdx := l.pos.Index + 1
+		if nextIdx < len(l.src) && isDigit(l.src[nextIdx]) {
+			return l.scanNumber()
+		}
+		l.advance()
+		return l.makeToken(TOKEN_DOT, ".", startLine, startCol)
+	}
+
+	// String literal
+	if ch == '\'' {
+		return l.scanString()
+	}
+
+	// Single character
+	l.advance()
+	switch ch {
+	case '(':
+		return l.makeToken(TOKEN_LPAREN, "(", startLine, startCol)
+	case ')':
+		return l.makeToken(TOKEN_RPAREN, ")", startLine, startCol)
+	case ',':
+		return l.makeToken(TOKEN_COMMA, ",", startLine, startCol)
+	case ';':
+		return l.makeToken(TOKEN_SEMICOLON, ";", startLine, startCol)
+	case '+':
+		return l.makeToken(TOKEN_PLUS, "+", startLine, startCol)
+	case '-':
+		return l.makeToken(TOKEN_MINUS, "-", startLine, startCol)
+	case '*':
+		return l.makeToken(TOKEN_STAR, "*", startLine, startCol)
+	case '/':
+		return l.makeToken(TOKEN_SLASH, "/", startLine, startCol)
+	case '%':
+		return l.makeToken(TOKEN_PERCENT, "%", startLine, startCol)
+	case '=':
+		return l.makeToken(TOKEN_EQ, "=", startLine, startCol)
+	// ── Multi-character operators ──
+	case '<':
+		if l.peek() == '=' {
+			l.advance()
+			return l.makeToken(TOKEN_LTE, "<=", startLine, startCol)
+		}
+		if l.peek() == '>' {
+			l.advance()
+			return l.makeToken(TOKEN_NEQ, "<>", startLine, startCol)
+		}
+		return l.makeToken(TOKEN_LT, "<", startLine, startCol)
+	case '>':
+		if l.peek() == '=' {
+			l.advance()
+			return l.makeToken(TOKEN_GTE, ">=", startLine, startCol)
+		}
+		return l.makeToken(TOKEN_GT, ">", startLine, startCol)
+	case '!':
+		if l.peek() == '=' {
+			l.advance()
+			return l.makeToken(TOKEN_NEQ, "!=", startLine, startCol)
+		}
+		return l.makeToken(TOKEN_ILLEGAL, "!", startLine, startCol)
+	default:
+		return l.makeToken(TOKEN_ILLEGAL, string(ch), startLine, startCol)
+	}
+}
diff --git a/internal/sql/lexer/peekable.go b/internal/sql/lexer/lookahead.go
similarity index 58%
rename from internal/sql/lexer/peekable.go
rename to internal/sql/lexer/lookahead.go
index e873112..320662e 100644
--- a/internal/sql/lexer/peekable.go
+++ b/internal/sql/lexer/lookahead.go
@@ -1,28 +1,25 @@
 package lexer
 
-// PeekableIterator provides up to 1 element lookahead over a stream of elements.
-//
-// It is intended for use by tokenizers and parsers to enforce an LL(1) design:
-// only one element may be observed ahead of the consumption point. The backing
-// stream is a simple function, so no buffering array is needed and arbitrarily
-// large inputs are supported with no extra overhead.
+// LookaheadIterator provides single-element lookahead over an arbitrary stream.
+// Used by the parser to inspect the next token without consuming it.
+// Not used by the lexer itself.
 //
 // This type is not safe for concurrent use.
-type PeekableIterator[T any] struct {
+type LookaheadIterator[T any] struct {
 	nextFn func() T // produces the next element on demand
 	peeked *T       // buffered lookahead element, nil if not yet peeked
 	count  int      // number of elements consumed via Next()
 }
 
-// NewPeekableIterator creates a PeekableIterator backed by the given function.
+// NewLookaheadIterator creates a LookaheadIterator backed by the given function.
 // nextFn is called each time a new element is needed.
-func NewPeekableIterator[T any](nextFn func() T) *PeekableIterator[T] {
-	return &PeekableIterator[T]{nextFn: nextFn}
+func NewLookaheadIterator[T any](nextFn func() T) *LookaheadIterator[T] {
+	return &LookaheadIterator[T]{nextFn: nextFn}
 }
 
 // Peek returns the next element without consuming it.
 // Successive calls return the same element until Next() is called.
-func (p *PeekableIterator[T]) Peek() T {
+func (p *LookaheadIterator[T]) Peek() T {
 	if p.peeked != nil {
 		return *p.peeked
 	}
@@ -32,7 +29,7 @@ func (p *PeekableIterator[T]) Peek() T {
 }
 
 // Next consumes and returns the next element.
-func (p *PeekableIterator[T]) Next() T {
+func (p *LookaheadIterator[T]) Next() T {
 	var v T
 	if p.peeked != nil {
 		v = *p.peeked
@@ -46,14 +43,14 @@ func (p *PeekableIterator[T]) Next() T {
 
 // ExpectNextValue consumes and returns the next element if it equals expected.
 // Returns a pointer to the consumed element, or nil if it did not match.
-func (p *PeekableIterator[T]) ExpectNextValue(expected T, eq func(a, b T) bool) *T {
+func (p *LookaheadIterator[T]) ExpectNextValue(expected T, eq func(a, b T) bool) *T {
 	return p.ExpectNextMatches(func(v T) bool { return eq(v, expected) })
 }
 
 // ExpectNextMatches consumes and returns the next element if it satisfies the
 // predicate. Returns a pointer to the consumed element, or nil otherwise.
 // The predicate is called at most once.
-func (p *PeekableIterator[T]) ExpectNextMatches(predicate func(T) bool) *T {
+func (p *LookaheadIterator[T]) ExpectNextMatches(predicate func(T) bool) *T {
 	v := p.Peek()
 	if predicate(v) {
 		p.Next() // consume
@@ -63,6 +60,6 @@ func (p *PeekableIterator[T]) ExpectNextMatches(predicate func(T) bool) *T {
 }
 
 // Count returns the number of elements consumed via Next() so far.
-func (p *PeekableIterator[T]) Count() int {
+func (p *LookaheadIterator[T]) Count() int {
 	return p.count
 }
diff --git a/internal/sql/lexer/tokens.go b/internal/sql/lexer/tokens.go
index 244fce3..1ca8ef9 100644
--- a/internal/sql/lexer/tokens.go
+++ b/internal/sql/lexer/tokens.go
@@ -2,132 +2,227 @@ package lexer
 
 import "fmt"
 
-// TokenType represents the type of a lexical token.
-// The underlying string value doubles as the human-readable name,
-// so no separate name map is needed.
-type TokenType string
+// TokenType is an integer tag that identifies what kind of lexical unit a
+// Token represents. Every terminal in the grammar maps to exactly one
+// TokenType constant.
+type TokenType int
 
 const (
 	// Special
-	T_INVALID TokenType = "INVALID"
-	T_EOF     TokenType = "EOF"
+	TOKEN_EOF     TokenType = iota // end of input; always the last token
+	TOKEN_ILLEGAL                  // unrecognised character; carries the raw byte
 
-	// Identifiers and literals
-	T_IDENT      TokenType = "IDENT"
-	T_INT_LIT    TokenType = "INT_LIT"
-	T_FLOAT_LIT  TokenType = "FLOAT_LIT"
-	T_STRING_LIT TokenType = "STRING_LIT"
-
-	// Arithmetic operators
-	T_PLUS     TokenType = "+"
-	T_MINUS    TokenType = "-"
-	T_ASTERISK TokenType = "*"
-	T_SLASH    TokenType = "/"
-	T_MODULO   TokenType = "%"
+	// Literals
+	TOKEN_IDENT
+	TOKEN_INTEGER
+	TOKEN_FLOAT
+	TOKEN_STRING
+
+	// DDL / database keywords
+	TOKEN_CREATE
+	TOKEN_DATABASE
+	TOKEN_USE
+	TOKEN_DROP
+	TOKEN_IF
+	TOKEN_EXISTS
+	TOKEN_TABLE
+	TOKEN_ALTER
+	TOKEN_ADD
+	TOKEN_COLUMN
+	TOKEN_MODIFY
+	TOKEN_RENAME
+	TOKEN_TO
+
+	// DML keywords
+	TOKEN_SELECT
+	TOKEN_DISTINCT
+	TOKEN_ALL
+	TOKEN_FROM
+	TOKEN_WHERE
+	TOKEN_AS
+	TOKEN_INSERT
+	TOKEN_INTO
+	TOKEN_VALUES
+	TOKEN_UPDATE
+	TOKEN_SET
+	TOKEN_DELETE
+
+	// JOIN keywords
+	TOKEN_JOIN
+	TOKEN_INNER
+	TOKEN_LEFT
+	TOKEN_RIGHT
+	TOKEN_FULL
+	TOKEN_OUTER
+	TOKEN_CROSS
+	TOKEN_ON
+
+	// Clause keywords
+	TOKEN_GROUP
+	TOKEN_BY
+	TOKEN_HAVING
+	TOKEN_ORDER
+	TOKEN_ASC
+	TOKEN_DESC
+	TOKEN_LIMIT
+	TOKEN_OFFSET
+
+	// Constraint / type keywords
+	TOKEN_PRIMARY
+	TOKEN_KEY
+	TOKEN_NOT
+	TOKEN_NULL
+	TOKEN_DEFAULT
+	TOKEN_UNIQUE
+	TOKEN_REFERENCES
+
+	// Logical / predicate keywords
+	TOKEN_AND
+	TOKEN_OR
+	TOKEN_TRUE
+	TOKEN_FALSE
+	TOKEN_LIKE
+	TOKEN_IS
+	TOKEN_IN
+	TOKEN_BETWEEN
+
+	// Data-type keywords
+	TOKEN_INT
+	TOKEN_BIGINT
+	TOKEN_VARCHAR
+	TOKEN_BOOLEAN
+	TOKEN_TEXT
+	TOKEN_TIMESTAMP
 
 	// Comparison operators
-	T_EQUAL         TokenType = "="
-	T_NOT_EQUAL     TokenType = "!="
-	T_DIAMOND       TokenType = "<>"
-	T_LESS          TokenType = "<"
-	T_GREATER       TokenType = ">"
-	T_LESS_EQUAL    TokenType = "<="
-	T_GREATER_EQUAL TokenType = ">="
-
-	// Delimiters
-	T_COMMA  TokenType = ","
-	T_SEMI   TokenType = ";"
-	T_LPAREN TokenType = "("
-	T_RPAREN TokenType = ")"
-
-	// Database manipulation
-	T_CREATE   TokenType = "CREATE"
-	T_DROP     TokenType = "DROP"
-	T_DATABASE TokenType = "DATABASE"
-	T_USE      TokenType = "USE"
-
-	// Table manipulation
-	T_TABLE  TokenType = "TABLE"
-	T_ALTER  TokenType = "ALTER"
-	T_ADD    TokenType = "ADD"
-	T_MODIFY TokenType = "MODIFY"
-	T_COLUMN TokenType = "COLUMN"
-	T_RENAME TokenType = "RENAME"
-	T_TO     TokenType = "TO"
-
-	// Column constraints
-	T_PRIMARY TokenType = "PRIMARY"
-	T_KEY     TokenType = "KEY"
-	T_UNIQUE  TokenType = "UNIQUE"
-	T_NOT     TokenType = "NOT"
-	T_NULL    TokenType = "NULL"
-	T_DEFAULT TokenType = "DEFAULT"
-
-	// Data types
-	T_INT       TokenType = "INT"
-	T_BIGINT    TokenType = "BIGINT"
-	T_VARCHAR   TokenType = "VARCHAR"
-	T_BOOLEAN   TokenType = "BOOLEAN"
-	T_TEXT      TokenType = "TEXT"
-	T_TIMESTAMP TokenType = "TIMESTAMP"
-
-	// SELECT
-	T_SELECT TokenType = "SELECT"
-	T_FROM   TokenType = "FROM"
-	T_WHERE  TokenType = "WHERE"
-	T_LIMIT  TokenType = "LIMIT"
-	T_AS     TokenType = "AS"
-
-	// INSERT
-	T_INSERT TokenType = "INSERT"
-	T_INTO   TokenType = "INTO"
-	T_VALUES TokenType = "VALUES"
-
-	// UPDATE / DELETE
-	T_UPDATE TokenType = "UPDATE"
-	T_SET    TokenType = "SET"
-	T_DELETE TokenType = "DELETE"
-
-	// Logical operators
-	T_AND TokenType = "AND"
-	T_OR  TokenType = "OR"
+	TOKEN_EQ  // =
+	TOKEN_NEQ // != or <>
+	TOKEN_LT  // <
+	TOKEN_GT  // >
+	TOKEN_LTE // <=
+	TOKEN_GTE // >=
 
-	// Literals
-	T_TRUE  TokenType = "TRUE"
-	T_FALSE TokenType = "FALSE"
+	// Arithmetic operators
+	TOKEN_PLUS    // +
+	TOKEN_MINUS   // -
+	TOKEN_STAR    // *
+	TOKEN_SLASH   // /
+	TOKEN_PERCENT // %
+
+	// Punctuation
+	TOKEN_LPAREN    // (
+	TOKEN_RPAREN    // )
+	TOKEN_COMMA     // ,
+	TOKEN_DOT       // .
+	TOKEN_SEMICOLON // ;
 )
 
-// LookupKeyword returns the keyword TokenType for the given identifier string.
-// If the string is not a keyword, it returns T_INVALID.
-// The caller should pass the uppercased string.
-//
-// Since TokenType is a string and every keyword const equals its SQL text,
-// we simply cast and check via a switch — no extra map or slice needed.
-func LookupKeyword(ident string) TokenType {
-	switch TokenType(ident) {
-	case T_CREATE, T_DROP, T_DATABASE, T_USE,
-		T_TABLE, T_ALTER, T_ADD, T_MODIFY, T_COLUMN, T_RENAME, T_TO,
-		T_PRIMARY, T_KEY, T_UNIQUE, T_NOT, T_NULL, T_DEFAULT,
-		T_INT, T_BIGINT, T_VARCHAR, T_BOOLEAN, T_TEXT, T_TIMESTAMP,
-		T_SELECT, T_FROM, T_WHERE, T_LIMIT, T_AS,
-		T_INSERT, T_INTO, T_VALUES,
-		T_UPDATE, T_SET, T_DELETE,
-		T_AND, T_OR,
-		T_TRUE, T_FALSE:
-		return TokenType(ident)
-	default:
-		return T_INVALID
+// tokenNames provides a human-readable label for each TokenType; used by
+// String() and by test failure messages.
+var tokenNames = map[TokenType]string{
+	TOKEN_EOF:        "EOF",
+	TOKEN_ILLEGAL:    "ILLEGAL",
+	TOKEN_IDENT:      "IDENT",
+	TOKEN_INTEGER:    "INTEGER",
+	TOKEN_FLOAT:      "FLOAT",
+	TOKEN_STRING:     "STRING",
+	TOKEN_CREATE:     "CREATE",
+	TOKEN_DATABASE:   "DATABASE",
+	TOKEN_USE:        "USE",
+	TOKEN_DROP:       "DROP",
+	TOKEN_IF:         "IF",
+	TOKEN_EXISTS:     "EXISTS",
+	TOKEN_TABLE:      "TABLE",
+	TOKEN_ALTER:      "ALTER",
+	TOKEN_ADD:        "ADD",
+	TOKEN_COLUMN:     "COLUMN",
+	TOKEN_MODIFY:     "MODIFY",
+	TOKEN_RENAME:     "RENAME",
+	TOKEN_TO:         "TO",
+	TOKEN_SELECT:     "SELECT",
+	TOKEN_DISTINCT:   "DISTINCT",
+	TOKEN_ALL:        "ALL",
+	TOKEN_FROM:       "FROM",
+	TOKEN_WHERE:      "WHERE",
+	TOKEN_AS:         "AS",
+	TOKEN_INSERT:     "INSERT",
+	TOKEN_INTO:       "INTO",
+	TOKEN_VALUES:     "VALUES",
+	TOKEN_UPDATE:     "UPDATE",
+	TOKEN_SET:        "SET",
+	TOKEN_DELETE:     "DELETE",
+	TOKEN_JOIN:       "JOIN",
+	TOKEN_INNER:      "INNER",
+	TOKEN_LEFT:       "LEFT",
+	TOKEN_RIGHT:      "RIGHT",
+	TOKEN_FULL:       "FULL",
+	TOKEN_OUTER:      "OUTER",
+	TOKEN_CROSS:      "CROSS",
+	TOKEN_ON:         "ON",
+	TOKEN_GROUP:      "GROUP",
+	TOKEN_BY:         "BY",
+	TOKEN_HAVING:     "HAVING",
+	TOKEN_ORDER:      "ORDER",
+	TOKEN_ASC:        "ASC",
+	TOKEN_DESC:       "DESC",
+	TOKEN_LIMIT:      "LIMIT",
+	TOKEN_OFFSET:     "OFFSET",
+	TOKEN_PRIMARY:    "PRIMARY",
+	TOKEN_KEY:        "KEY",
+	TOKEN_NOT:        "NOT",
+	TOKEN_NULL:       "NULL",
+	TOKEN_DEFAULT:    "DEFAULT",
+	TOKEN_UNIQUE:     "UNIQUE",
+	TOKEN_REFERENCES: "REFERENCES",
+	TOKEN_AND:        "AND",
+	TOKEN_OR:         "OR",
+	TOKEN_TRUE:       "TRUE",
+	TOKEN_FALSE:      "FALSE",
+	TOKEN_LIKE:       "LIKE",
+	TOKEN_IS:         "IS",
+	TOKEN_IN:         "IN",
+	TOKEN_BETWEEN:    "BETWEEN",
+	TOKEN_INT:        "INT",
+	TOKEN_BIGINT:     "BIGINT",
+	TOKEN_VARCHAR:    "VARCHAR",
+	TOKEN_BOOLEAN:    "BOOLEAN",
+	TOKEN_TEXT:       "TEXT",
+	TOKEN_TIMESTAMP:  "TIMESTAMP",
+	TOKEN_EQ:         "=",
+	TOKEN_NEQ:        "!=",
+	TOKEN_LT:         "<",
+	TOKEN_GT:         ">",
+	TOKEN_LTE:        "<=",
+	TOKEN_GTE:        ">=",
+	TOKEN_PLUS:       "+",
+	TOKEN_MINUS:      "-",
+	TOKEN_STAR:       "*",
+	TOKEN_SLASH:      "/",
+	TOKEN_PERCENT:    "%",
+	TOKEN_LPAREN:     "(",
+	TOKEN_RPAREN:     ")",
+	TOKEN_COMMA:      ",",
+	TOKEN_DOT:        ".",
+	TOKEN_SEMICOLON:  ";",
+}
+
+// String returns the human-readable name of a TokenType.
+func (t TokenType) String() string {
+	if s, ok := tokenNames[t]; ok {
+		return s
 	}
+	return fmt.Sprintf("TokenType(%d)", int(t))
 }
 
-// Token represents a single lexical token produced by the lexer.
+// Token is a single lexical unit produced by the Lexer.
 type Token struct {
-	Type    TokenType // the type of the token
-	Literal string    // the raw text of the token from the source input
-	Pos     Position  // the position of the first character of the token
+	Type    TokenType // what kind of token this is
+	Literal string    // raw source text (string tokens have quotes stripped and
+	//                   escapes decoded; keywords preserve their original casing)
+	Line int // 1-based line number of the token's first character
+	Col  int // 1-based column number of the token's first character
 }
 
-// String returns a human-readable representation of the token.
 func (t Token) String() string {
-	return fmt.Sprintf("%s(%q) at %d:%d", t.Type, t.Literal, t.Pos.Line, t.Pos.Column)
+	return fmt.Sprintf("Token{%-12s %q  %d:%d}", t.Type, t.Literal, t.Line, t.Col)
 }

From 07b2f6f4f8380b1aa744c04cafcf14731bff1c0e Mon Sep 17 00:00:00 2001
From: rahulc0dy <rc645312@gmail.com>
Date: Sun, 7 Jun 2026 18:52:18 +0530
Subject: [PATCH 3/6] Remove redundant files and simplify lexer

---
 internal/sql/lexer/errors.go   | 17 --------
 internal/sql/lexer/lexer.go    | 73 +++++++++++++++++++++-------------
 internal/sql/lexer/position.go | 37 -----------------
 3 files changed, 46 insertions(+), 81 deletions(-)
 delete mode 100644 internal/sql/lexer/errors.go
 delete mode 100644 internal/sql/lexer/position.go

diff --git a/internal/sql/lexer/errors.go b/internal/sql/lexer/errors.go
deleted file mode 100644
index 916192d..0000000
--- a/internal/sql/lexer/errors.go
+++ /dev/null
@@ -1,17 +0,0 @@
-package lexer
-
-import "errors"
-
-var (
-	// ErrUnexpectedToken is returned when the lexer encounters an unexpected token.
-	ErrUnexpectedToken = errors.New("unexpected token")
-
-	// ErrUnexpectedEOF is returned when the lexer encounters the end of input.
-	ErrUnexpectedEOF = errors.New("unexpected EOF")
-
-	// ErrUnterminatedString is returned when the lexer encounters an unterminated string literal.
-	ErrUnterminatedString = errors.New("unterminated string")
-
-	// ErrUnterminatedBlockComment is returned when the lexer encounters an unterminated block comment.
-	ErrUnterminatedBlockComment = errors.New("unterminated block comment")
-)
diff --git a/internal/sql/lexer/lexer.go b/internal/sql/lexer/lexer.go
index b744736..f0fe4ba 100644
--- a/internal/sql/lexer/lexer.go
+++ b/internal/sql/lexer/lexer.go
@@ -1,38 +1,57 @@
 package lexer
 
+// Position represents a position in the source input.
+// It tracks three values:
+//
+//   - Index:  absolute character offset from the start of the entire input (0-based).
+//     It counts every character (including newlines) and never resets.
+//   - Line:   the current line number (1-based). Increments only when a '\n' is encountered.
+//   - Column: the position within the current line (1-based). Resets to 1 on every new line.
+type position struct {
+	index  int
+	line   int
+	column int
+}
+
 // Lexer tokenizes SQL source text into a stream of Tokens
 type Lexer struct {
 	src []rune   // full input as runes
-	pos Position // current read cursor(line, col, index)
+	pos position // current read cursor(line, col, index)
 }
 
 // NewLexer creates a Lexer fro the given SQL source string
 func NewLexer(src string) *Lexer {
 	return &Lexer{
 		src: []rune(src),
-		pos: NewPosition(),
+		pos: position{0, 1, 1},
 	}
 }
 
 func (l *Lexer) peek() rune {
-	if l.pos.Index >= len(l.src) {
+	if l.pos.index >= len(l.src) {
 		return 0
 	}
-	return l.src[l.pos.Index]
+	return l.src[l.pos.index]
 }
 
 func (l *Lexer) advance() rune {
-	ch := l.src[l.pos.Index]
-	l.pos.Advance(ch)
+	ch := l.src[l.pos.index]
+	l.pos.index++
+	if ch == '\n' {
+		l.pos.line++
+		l.pos.column = 1
+	} else {
+		l.pos.column++
+	}
 	return ch
 }
 
 // skipWhitespace consumes spaces, tabs, \r, \n
 func (l *Lexer) skipWhitespace() {
-	for l.pos.Index < len(l.src) {
-		ch := l.src[l.pos.Index]
+	for l.pos.index < len(l.src) {
+		ch := l.src[l.pos.index]
 		if ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' {
-			l.pos.Advance(ch)
+			l.advance()
 		} else {
 			break
 		}
@@ -47,17 +66,17 @@ func (l *Lexer) makeToken(typ TokenType, lit string, line, col int) Token {
 // scanIdentifier reads a keyword or user identifier.
 // Precondition: peek() is a letter.
 func (l *Lexer) scanIdentifier() Token {
-	startLine, startCol := l.pos.Line, l.pos.Column
-	start := l.pos.Index
-	for l.pos.Index < len(l.src) {
-		ch := l.src[l.pos.Index]
+	startLine, startCol := l.pos.line, l.pos.column
+	start := l.pos.index
+	for l.pos.index < len(l.src) {
+		ch := l.src[l.pos.index]
 		if isLetter(ch) || isDigit(ch) || ch == '_' {
-			l.pos.Advance(ch)
+			l.advance()
 		} else {
 			break
 		}
 	}
-	lit := string(l.src[start:l.pos.Index])
+	lit := string(l.src[start:l.pos.index])
 	typ := lookupIdent(lit) // keyword or TOKEN_IDENT
 	return l.makeToken(typ, lit, startLine, startCol)
 }
@@ -66,8 +85,8 @@ func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && c
 func isDigit(ch rune) bool  { return ch >= '0' && ch <= '9' }
 
 func (l *Lexer) scanNumber() Token {
-	startLine, startCol := l.pos.Line, l.pos.Column
-	start := l.pos.Index
+	startLine, startCol := l.pos.line, l.pos.column
+	start := l.pos.index
 	isFloat := false
 
 	// Leading '.' case
@@ -77,23 +96,23 @@ func (l *Lexer) scanNumber() Token {
 	}
 
 	// Digits consumption
-	for l.pos.Index < len(l.src) && isDigit((l.src[l.pos.Index])) {
+	for l.pos.index < len(l.src) && isDigit((l.src[l.pos.index])) {
 		l.advance()
 	}
 
 	// Decimal check
 	if !isFloat && l.peek() == '.' {
-		nextIdx := l.pos.Index + 1
+		nextIdx := l.pos.index + 1
 		if nextIdx >= len(l.src) || isDigit(l.src[nextIdx]) || !isLetter(l.src[nextIdx]) && l.src[nextIdx] != '_' {
 			isFloat = true
 			l.advance()
-			for l.pos.Index < len(l.src) && isDigit((l.src[l.pos.Index])) {
+			for l.pos.index < len(l.src) && isDigit((l.src[l.pos.index])) {
 				l.advance()
 			}
 		}
 	}
 
-	lit := string(l.src[start:l.pos.Index])
+	lit := string(l.src[start:l.pos.index])
 	if isFloat {
 		return l.makeToken(TOKEN_FLOAT, lit, startLine, startCol)
 	}
@@ -101,12 +120,12 @@ func (l *Lexer) scanNumber() Token {
 }
 
 func (l *Lexer) scanString() Token {
-	startLine, startCol := l.pos.Line, l.pos.Column
+	startLine, startCol := l.pos.line, l.pos.column
 	l.advance()
 
 	var buf []rune
 	for {
-		if l.pos.Index >= len(l.src) {
+		if l.pos.index >= len(l.src) {
 			return l.makeToken(TOKEN_ILLEGAL, string(buf), startLine, startCol)
 		}
 		ch := l.advance()
@@ -128,11 +147,11 @@ func (l *Lexer) scanString() Token {
 func (l *Lexer) NextToken() Token {
 	l.skipWhitespace()
 
-	if l.pos.Index >= len(l.src) {
-		return l.makeToken(TOKEN_EOF, "", l.pos.Line, l.pos.Column)
+	if l.pos.index >= len(l.src) {
+		return l.makeToken(TOKEN_EOF, "", l.pos.line, l.pos.column)
 	}
 
-	startLine, startCol := l.pos.Line, l.pos.Column
+	startLine, startCol := l.pos.line, l.pos.column
 	ch := l.peek()
 
 	// Identifier or Keyword
@@ -146,7 +165,7 @@ func (l *Lexer) NextToken() Token {
 	}
 
 	if ch == '.' {
-		nextIdx := l.pos.Index + 1
+		nextIdx := l.pos.index + 1
 		if nextIdx < len(l.src) && isDigit(l.src[nextIdx]) {
 			return l.scanNumber()
 		}
diff --git a/internal/sql/lexer/position.go b/internal/sql/lexer/position.go
deleted file mode 100644
index 8610246..0000000
--- a/internal/sql/lexer/position.go
+++ /dev/null
@@ -1,37 +0,0 @@
-package lexer
-
-// Position represents a position in the source input.
-// It tracks three values:
-//
-//   - Index:  absolute character offset from the start of the entire input (0-based).
-//     It counts every character (including newlines) and never resets.
-//   - Line:   the current line number (1-based). Increments only when a '\n' is encountered.
-//   - Column: the position within the current line (1-based). Resets to 1 on every new line.
-type Position struct {
-	Index  int
-	Line   int
-	Column int
-}
-
-// NewPosition returns a Position initialised to the start of the input.
-func NewPosition() Position {
-	return Position{
-		Index:  0,
-		Line:   1,
-		Column: 1,
-	}
-}
-
-// Advance moves the position forward by one character.
-// If the character is a newline ('\n'), the line number is incremented
-// and the column is reset to 1. Otherwise the column is incremented.
-// The absolute index is always incremented by 1.
-func (p *Position) Advance(ch rune) {
-	p.Index++
-	if ch == '\n' {
-		p.Line++
-		p.Column = 1
-	} else {
-		p.Column++
-	}
-}

From ab07f8af9abdc54c1ba4d8a1e18232e6e18a832f Mon Sep 17 00:00:00 2001
From: rahulc0dy <rc645312@gmail.com>
Date: Mon, 8 Jun 2026 10:08:03 +0530
Subject: [PATCH 4/6] Implement custom lexer error type

---
 internal/sql/lexer/.gitkeep  |   0
 internal/sql/lexer/errors.go |  38 +++++++++
 internal/sql/lexer/lexer.go  | 150 +++++++++++++++++++++++------------
 3 files changed, 139 insertions(+), 49 deletions(-)
 delete mode 100644 internal/sql/lexer/.gitkeep
 create mode 100644 internal/sql/lexer/errors.go

diff --git a/internal/sql/lexer/.gitkeep b/internal/sql/lexer/.gitkeep
deleted file mode 100644
index e69de29..0000000
diff --git a/internal/sql/lexer/errors.go b/internal/sql/lexer/errors.go
new file mode 100644
index 0000000..645bfd2
--- /dev/null
+++ b/internal/sql/lexer/errors.go
@@ -0,0 +1,38 @@
+package lexer
+
+import (
+	"errors"
+	"fmt"
+)
+
+// Sentinel errors — compare with errors.Is, never inspect the message string.
+var (
+	ErrUnexpectedChar      = errors.New("unexpected character")
+	ErrUnterminatedString  = errors.New("unterminated string literal")
+	ErrUnterminatedComment = errors.New("unterminated block comment")
+)
+
+// LexError wraps a sentinel with the source location and a message.
+type LexError struct {
+	Err  error
+	Line int
+	Col  int
+	Msg  string
+}
+
+func (e *LexError) Error() string {
+	return fmt.Sprintf("%d:%d: %s: %s", e.Line, e.Col, e.Err.Error(), e.Msg)
+}
+
+// Unwrap lets errors.Is / errors.As traverse to the sentinel.
+func (e *LexError) Unwrap() error { return e.Err }
+
+// lexErr — Msg now carries only the specific detail, not a repetition of the sentinel.
+func lexErr(sentinel error, line, col int, format string, args ...any) *LexError {
+	return &LexError{
+		Err:  sentinel,
+		Line: line,
+		Col:  col,
+		Msg:  fmt.Sprintf(format, args...),
+	}
+}
diff --git a/internal/sql/lexer/lexer.go b/internal/sql/lexer/lexer.go
index f0fe4ba..ab0db42 100644
--- a/internal/sql/lexer/lexer.go
+++ b/internal/sql/lexer/lexer.go
@@ -34,7 +34,17 @@ func (l *Lexer) peek() rune {
 	return l.src[l.pos.index]
 }
 
+func (l *Lexer) peekNext() rune {
+	if l.pos.index+1 >= len(l.src) {
+		return 0
+	}
+	return l.src[l.pos.index+1]
+}
+
 func (l *Lexer) advance() rune {
+	if l.pos.index >= len(l.src) {
+		return 0
+	}
 	ch := l.src[l.pos.index]
 	l.pos.index++
 	if ch == '\n' {
@@ -46,16 +56,57 @@ func (l *Lexer) advance() rune {
 	return ch
 }
 
-// skipWhitespace consumes spaces, tabs, \r, \n
-func (l *Lexer) skipWhitespace() {
+// skipLineComment discards everything from the current position to end-of-line.
+// Precondition: the two leading '-' characters have already been consumed.
+func (l *Lexer) skipLineComment() {
+	for l.pos.index < len(l.src) && l.src[l.pos.index] != '\n' {
+		l.advance()
+	}
+}
+
+// skipBlockComment discards everything up to and including the closing */.
+// Precondition: the opening /* has already been consumed.
+func (l *Lexer) skipBlockComment(openLine, openCol int) error {
+	for l.pos.index < len(l.src) {
+		if l.peek() == '*' && l.peekNext() == '/' {
+			l.advance() // *
+			l.advance() // /
+			return nil
+		}
+		l.advance()
+	}
+	// End of input without finding */
+	return lexErr(ErrUnterminatedComment, l.pos.line, l.pos.column,
+		"expected '*/' to close '/*' opened at %d:%d", openLine, openCol)
+}
+
+// skipWhitespaceAndComments returns an error only for an unterminated block comment.
+// All other skipped content (whitespace, line comments) is infallible.
+func (l *Lexer) skipWhitespaceAndComments() error {
 	for l.pos.index < len(l.src) {
 		ch := l.src[l.pos.index]
-		if ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' {
+		switch {
+		case ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n':
 			l.advance()
-		} else {
-			break
+
+		case ch == '-' && l.peekNext() == '-':
+			l.advance()
+			l.advance()
+			l.skipLineComment()
+
+		case ch == '/' && l.peekNext() == '*':
+			openLine, openCol := l.pos.line, l.pos.column
+			l.advance()
+			l.advance()
+			if err := l.skipBlockComment(openLine, openCol); err != nil {
+				return err
+			}
+
+		default:
+			return nil
 		}
 	}
+	return nil
 }
 
 // makeToken is a convenience to build a Token with the given fields.
@@ -70,7 +121,7 @@ func (l *Lexer) scanIdentifier() Token {
 	start := l.pos.index
 	for l.pos.index < len(l.src) {
 		ch := l.src[l.pos.index]
-		if isLetter(ch) || isDigit(ch) || ch == '_' {
+		if isIdentPart(ch) {
 			l.advance()
 		} else {
 			break
@@ -81,6 +132,9 @@ func (l *Lexer) scanIdentifier() Token {
 	return l.makeToken(typ, lit, startLine, startCol)
 }
 
+func isIdentStart(ch rune) bool { return isLetter(ch) || ch == '_' }
+func isIdentPart(ch rune) bool  { return isLetter(ch) || isDigit(ch) || ch == '_' }
+
 func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') }
 func isDigit(ch rune) bool  { return ch >= '0' && ch <= '9' }
 
@@ -102,8 +156,8 @@ func (l *Lexer) scanNumber() Token {
 
 	// Decimal check
 	if !isFloat && l.peek() == '.' {
-		nextIdx := l.pos.index + 1
-		if nextIdx >= len(l.src) || isDigit(l.src[nextIdx]) || !isLetter(l.src[nextIdx]) && l.src[nextIdx] != '_' {
+		nextCh := l.peekNext()
+		if nextCh == 0 || isDigit(nextCh) || (!isLetter(nextCh) && nextCh != '_') {
 			isFloat = true
 			l.advance()
 			for l.pos.index < len(l.src) && isDigit((l.src[l.pos.index])) {
@@ -119,112 +173,110 @@ func (l *Lexer) scanNumber() Token {
 	return l.makeToken(TOKEN_INTEGER, lit, startLine, startCol)
 }
 
-func (l *Lexer) scanString() Token {
+func (l *Lexer) scanString() (Token, error) {
 	startLine, startCol := l.pos.line, l.pos.column
-	l.advance()
+	l.advance() // consume opening '
 
 	var buf []rune
 	for {
 		if l.pos.index >= len(l.src) {
-			return l.makeToken(TOKEN_ILLEGAL, string(buf), startLine, startCol)
+			return l.makeToken(TOKEN_ILLEGAL, string(buf), startLine, startCol),
+				lexErr(ErrUnterminatedString, l.pos.line, l.pos.column,
+					"expected closing ' (string opened at %d:%d)", startLine, startCol)
 		}
+
 		ch := l.advance()
 		if ch == '\'' {
-			if l.peek() == '\'' {
+			if l.peek() == '\'' { // '' is the SQL escape for a literal single-quote
 				l.advance()
 				buf = append(buf, '\'')
 			} else {
-				break
+				break // normal close
 			}
 		} else {
 			buf = append(buf, ch)
 		}
 	}
 
-	return l.makeToken(TOKEN_STRING, string(buf), startLine, startCol)
+	return l.makeToken(TOKEN_STRING, string(buf), startLine, startCol), nil
 }
 
-func (l *Lexer) NextToken() Token {
-	l.skipWhitespace()
+func (l *Lexer) NextToken() (Token, error) {
+	if err := l.skipWhitespaceAndComments(); err != nil {
+		return l.makeToken(TOKEN_EOF, "", l.pos.line, l.pos.column), err
+	}
 
 	if l.pos.index >= len(l.src) {
-		return l.makeToken(TOKEN_EOF, "", l.pos.line, l.pos.column)
+		return l.makeToken(TOKEN_EOF, "", l.pos.line, l.pos.column), nil
 	}
 
 	startLine, startCol := l.pos.line, l.pos.column
 	ch := l.peek()
 
-	// Identifier or Keyword
-	if isLetter(ch) {
-		return l.scanIdentifier()
+	if isIdentStart(ch) {
+		return l.scanIdentifier(), nil
 	}
-
-	// Number
 	if isDigit(ch) {
-		return l.scanNumber()
+		return l.scanNumber(), nil
 	}
-
 	if ch == '.' {
-		nextIdx := l.pos.index + 1
-		if nextIdx < len(l.src) && isDigit(l.src[nextIdx]) {
-			return l.scanNumber()
+		if next := l.peekNext(); next != 0 && isDigit(next) {
+			return l.scanNumber(), nil
 		}
 		l.advance()
-		return l.makeToken(TOKEN_DOT, ".", startLine, startCol)
+		return l.makeToken(TOKEN_DOT, ".", startLine, startCol), nil
 	}
-
-	// String literal
 	if ch == '\'' {
 		return l.scanString()
 	}
 
-	// Single character
 	l.advance()
 	switch ch {
 	case '(':
-		return l.makeToken(TOKEN_LPAREN, "(", startLine, startCol)
+		return l.makeToken(TOKEN_LPAREN, "(", startLine, startCol), nil
 	case ')':
-		return l.makeToken(TOKEN_RPAREN, ")", startLine, startCol)
+		return l.makeToken(TOKEN_RPAREN, ")", startLine, startCol), nil
 	case ',':
-		return l.makeToken(TOKEN_COMMA, ",", startLine, startCol)
+		return l.makeToken(TOKEN_COMMA, ",", startLine, startCol), nil
 	case ';':
-		return l.makeToken(TOKEN_SEMICOLON, ";", startLine, startCol)
+		return l.makeToken(TOKEN_SEMICOLON, ";", startLine, startCol), nil
 	case '+':
-		return l.makeToken(TOKEN_PLUS, "+", startLine, startCol)
+		return l.makeToken(TOKEN_PLUS, "+", startLine, startCol), nil
 	case '-':
-		return l.makeToken(TOKEN_MINUS, "-", startLine, startCol)
+		return l.makeToken(TOKEN_MINUS, "-", startLine, startCol), nil
 	case '*':
-		return l.makeToken(TOKEN_STAR, "*", startLine, startCol)
+		return l.makeToken(TOKEN_STAR, "*", startLine, startCol), nil
 	case '/':
-		return l.makeToken(TOKEN_SLASH, "/", startLine, startCol)
+		return l.makeToken(TOKEN_SLASH, "/", startLine, startCol), nil
 	case '%':
-		return l.makeToken(TOKEN_PERCENT, "%", startLine, startCol)
+		return l.makeToken(TOKEN_PERCENT, "%", startLine, startCol), nil
 	case '=':
-		return l.makeToken(TOKEN_EQ, "=", startLine, startCol)
+		return l.makeToken(TOKEN_EQ, "=", startLine, startCol), nil
 	// ── Multi-character operators ──
 	case '<':
 		if l.peek() == '=' {
 			l.advance()
-			return l.makeToken(TOKEN_LTE, "<=", startLine, startCol)
+			return l.makeToken(TOKEN_LTE, "<=", startLine, startCol), nil
 		}
 		if l.peek() == '>' {
 			l.advance()
-			return l.makeToken(TOKEN_NEQ, "<>", startLine, startCol)
+			return l.makeToken(TOKEN_NEQ, "<>", startLine, startCol), nil
 		}
-		return l.makeToken(TOKEN_LT, "<", startLine, startCol)
+		return l.makeToken(TOKEN_LT, "<", startLine, startCol), nil
 	case '>':
 		if l.peek() == '=' {
 			l.advance()
-			return l.makeToken(TOKEN_GTE, ">=", startLine, startCol)
+			return l.makeToken(TOKEN_GTE, ">=", startLine, startCol), nil
 		}
-		return l.makeToken(TOKEN_GT, ">", startLine, startCol)
+		return l.makeToken(TOKEN_GT, ">", startLine, startCol), nil
 	case '!':
 		if l.peek() == '=' {
 			l.advance()
-			return l.makeToken(TOKEN_NEQ, "!=", startLine, startCol)
+			return l.makeToken(TOKEN_NEQ, "!=", startLine, startCol), nil
 		}
-		return l.makeToken(TOKEN_ILLEGAL, "!", startLine, startCol)
+		return l.makeToken(TOKEN_ILLEGAL, "!", startLine, startCol), lexErr(ErrUnexpectedChar, startLine, startCol, "'!'; did you mean '!='?")
+
 	default:
-		return l.makeToken(TOKEN_ILLEGAL, string(ch), startLine, startCol)
+		return l.makeToken(TOKEN_ILLEGAL, string(ch), startLine, startCol), lexErr(ErrUnexpectedChar, startLine, startCol, "%q", ch)
 	}
 }

From 10077ba910272cc0381cc749d3a2a70fb8315315 Mon Sep 17 00:00:00 2001
From: Saptak Manna <saptakmanna100@gmail.com>
Date: Mon, 8 Jun 2026 11:16:50 +0530
Subject: [PATCH 5/6] test: implement comprehensive unit tests for SQL lexer,
 tokens, and lookahead logic

---
 internal/sql/lexer/lexer_test.go     | 1292 ++++++++++++++++++++++++++
 internal/sql/lexer/lookahead_test.go |  442 +++++++++
 internal/sql/lexer/tokens_test.go    |  149 +++
 3 files changed, 1883 insertions(+)
 create mode 100644 internal/sql/lexer/lexer_test.go
 create mode 100644 internal/sql/lexer/lookahead_test.go
 create mode 100644 internal/sql/lexer/tokens_test.go

diff --git a/internal/sql/lexer/lexer_test.go b/internal/sql/lexer/lexer_test.go
new file mode 100644
index 0000000..8ef2b4d
--- /dev/null
+++ b/internal/sql/lexer/lexer_test.go
@@ -0,0 +1,1292 @@
+package lexer
+
+import (
+	"errors"
+	"fmt"
+	"testing"
+)
+
+// ---------- helpers ----------------------------------------------------------
+
+// tok is a compact constructor for expected Token values in table-driven tests.
+func tok(typ TokenType, lit string, line, col int) Token {
+	return Token{Type: typ, Literal: lit, Line: line, Col: col}
+}
+
+// collectAll drives the lexer to exhaustion and returns every token it emits
+// (including the final EOF). It fails the test on the first error.
+func collectAll(t *testing.T, input string) []Token {
+	t.Helper()
+	l := NewLexer(input)
+	var tokens []Token
+	for {
+		token, err := l.NextToken()
+		if err != nil {
+			t.Fatalf("unexpected error at %d:%d: %v", token.Line, token.Col, err)
+		}
+		tokens = append(tokens, token)
+		if token.Type == TOKEN_EOF {
+			break
+		}
+	}
+	return tokens
+}
+
+// requireTokens asserts the full token stream for a given input, including the
+// trailing EOF.
+func requireTokens(t *testing.T, input string, want []Token) {
+	t.Helper()
+	got := collectAll(t, input)
+	if len(got) != len(want) {
+		t.Fatalf("token count mismatch: got %d, want %d\ngot:  %v\nwant: %v",
+			len(got), len(want), got, want)
+	}
+	for i := range want {
+		if got[i] != want[i] {
+			t.Errorf("token[%d]: got %v, want %v", i, got[i], want[i])
+		}
+	}
+}
+
+// requireError asserts that lexing produces a specific sentinel error and an
+// ILLEGAL or EOF token at the expected position.
+func requireError(t *testing.T, input string, sentinel error) {
+	t.Helper()
+	l := NewLexer(input)
+	for {
+		_, err := l.NextToken()
+		if err != nil {
+			if !errors.Is(err, sentinel) {
+				t.Fatalf("expected error wrapping %v, got %v", sentinel, err)
+			}
+			return
+		}
+	}
+}
+
+// ---------- EOF & empty input ------------------------------------------------
+
+func TestNextToken_EmptyInput(t *testing.T) {
+	requireTokens(t, "", []Token{
+		tok(TOKEN_EOF, "", 1, 1),
+	})
+}
+
+func TestNextToken_OnlyWhitespace(t *testing.T) {
+	requireTokens(t, "   \t \r\n  \n  ", []Token{
+		tok(TOKEN_EOF, "", 3, 3),
+	})
+}
+
+func TestNextToken_RepeatedEOF(t *testing.T) {
+	l := NewLexer("")
+	for i := 0; i < 5; i++ {
+		token, err := l.NextToken()
+		if err != nil {
+			t.Fatalf("iteration %d: unexpected error: %v", i, err)
+		}
+		if token.Type != TOKEN_EOF {
+			t.Fatalf("iteration %d: expected EOF, got %v", i, token)
+		}
+	}
+}
+
+// ---------- Single-character punctuation -------------------------------------
+
+func TestNextToken_Punctuation(t *testing.T) {
+	tests := []struct {
+		input string
+		want  Token
+	}{
+		{"(", tok(TOKEN_LPAREN, "(", 1, 1)},
+		{")", tok(TOKEN_RPAREN, ")", 1, 1)},
+		{",", tok(TOKEN_COMMA, ",", 1, 1)},
+		{".", tok(TOKEN_DOT, ".", 1, 1)},
+		{";", tok(TOKEN_SEMICOLON, ";", 1, 1)},
+	}
+	for _, tc := range tests {
+		t.Run(tc.input, func(t *testing.T) {
+			requireTokens(t, tc.input, []Token{
+				tc.want,
+				tok(TOKEN_EOF, "", 1, 2),
+			})
+		})
+	}
+}
+
+// ---------- Arithmetic operators ---------------------------------------------
+
+func TestNextToken_ArithmeticOperators(t *testing.T) {
+	tests := []struct {
+		input string
+		want  Token
+	}{
+		{"+", tok(TOKEN_PLUS, "+", 1, 1)},
+		{"-", tok(TOKEN_MINUS, "-", 1, 1)},
+		{"*", tok(TOKEN_STAR, "*", 1, 1)},
+		{"/", tok(TOKEN_SLASH, "/", 1, 1)},
+		{"%", tok(TOKEN_PERCENT, "%", 1, 1)},
+	}
+	for _, tc := range tests {
+		t.Run(tc.input, func(t *testing.T) {
+			requireTokens(t, tc.input, []Token{
+				tc.want,
+				tok(TOKEN_EOF, "", 1, 2),
+			})
+		})
+	}
+}
+
+// ---------- Comparison operators (single & multi-char) -----------------------
+
+func TestNextToken_ComparisonOperators(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+		want  []Token
+	}{
+		{"EQ", "=", []Token{
+			tok(TOKEN_EQ, "=", 1, 1),
+			tok(TOKEN_EOF, "", 1, 2),
+		}},
+		{"LT", "<", []Token{
+			tok(TOKEN_LT, "<", 1, 1),
+			tok(TOKEN_EOF, "", 1, 2),
+		}},
+		{"GT", ">", []Token{
+			tok(TOKEN_GT, ">", 1, 1),
+			tok(TOKEN_EOF, "", 1, 2),
+		}},
+		{"LTE", "<=", []Token{
+			tok(TOKEN_LTE, "<=", 1, 1),
+			tok(TOKEN_EOF, "", 1, 3),
+		}},
+		{"GTE", ">=", []Token{
+			tok(TOKEN_GTE, ">=", 1, 1),
+			tok(TOKEN_EOF, "", 1, 3),
+		}},
+		{"NEQ_bang", "!=", []Token{
+			tok(TOKEN_NEQ, "!=", 1, 1),
+			tok(TOKEN_EOF, "", 1, 3),
+		}},
+		{"NEQ_diamond", "<>", []Token{
+			tok(TOKEN_NEQ, "<>", 1, 1),
+			tok(TOKEN_EOF, "", 1, 3),
+		}},
+	}
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			requireTokens(t, tc.input, tc.want)
+		})
+	}
+}
+
+// ---------- Lone bang (!) is ILLEGAL -----------------------------------------
+
+func TestNextToken_LoneBang_IsIllegal(t *testing.T) {
+	l := NewLexer("!")
+	token, err := l.NextToken()
+	if err == nil {
+		t.Fatal("expected error for lone '!'")
+	}
+	if !errors.Is(err, ErrUnexpectedChar) {
+		t.Fatalf("expected ErrUnexpectedChar, got %v", err)
+	}
+	if token.Type != TOKEN_ILLEGAL {
+		t.Fatalf("expected TOKEN_ILLEGAL, got %v", token.Type)
+	}
+	if token.Literal != "!" {
+		t.Fatalf("expected literal '!', got %q", token.Literal)
+	}
+}
+
+// ---------- Integer literals -------------------------------------------------
+
+func TestNextToken_Integers(t *testing.T) {
+	tests := []struct {
+		input string
+		want  Token
+	}{
+		{"0", tok(TOKEN_INTEGER, "0", 1, 1)},
+		{"1", tok(TOKEN_INTEGER, "1", 1, 1)},
+		{"42", tok(TOKEN_INTEGER, "42", 1, 1)},
+		{"999999", tok(TOKEN_INTEGER, "999999", 1, 1)},
+	}
+	for _, tc := range tests {
+		t.Run(tc.input, func(t *testing.T) {
+			requireTokens(t, tc.input, []Token{
+				tc.want,
+				tok(TOKEN_EOF, "", 1, len(tc.input)+1),
+			})
+		})
+	}
+}
+
+// ---------- Float literals ---------------------------------------------------
+
+func TestNextToken_Floats(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+		want  Token
+	}{
+		{"simple", "3.14", tok(TOKEN_FLOAT, "3.14", 1, 1)},
+		{"leading_dot", ".5", tok(TOKEN_FLOAT, ".5", 1, 1)},
+		{"trailing_dot", "5.", tok(TOKEN_FLOAT, "5.", 1, 1)},
+		{"zero_dot_zero", "0.0", tok(TOKEN_FLOAT, "0.0", 1, 1)},
+		{"large", "12345.6789", tok(TOKEN_FLOAT, "12345.6789", 1, 1)},
+	}
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			requireTokens(t, tc.input, []Token{
+				tc.want,
+				tok(TOKEN_EOF, "", 1, len(tc.input)+1),
+			})
+		})
+	}
+}
+
+// ---------- Dot-disambiguation (dot vs float) --------------------------------
+
+func TestNextToken_DotVsFloat(t *testing.T) {
+	t.Run("dot_followed_by_identifier", func(t *testing.T) {
+		// "t.id" → IDENT "t", DOT ".", IDENT "id"
+		requireTokens(t, "t.id", []Token{
+			tok(TOKEN_IDENT, "t", 1, 1),
+			tok(TOKEN_DOT, ".", 1, 2),
+			tok(TOKEN_IDENT, "id", 1, 3),
+			tok(TOKEN_EOF, "", 1, 5),
+		})
+	})
+
+	t.Run("dot_followed_by_digit", func(t *testing.T) {
+		// ".5" → FLOAT ".5"
+		requireTokens(t, ".5", []Token{
+			tok(TOKEN_FLOAT, ".5", 1, 1),
+			tok(TOKEN_EOF, "", 1, 3),
+		})
+	})
+
+	t.Run("dot_alone", func(t *testing.T) {
+		requireTokens(t, ".", []Token{
+			tok(TOKEN_DOT, ".", 1, 1),
+			tok(TOKEN_EOF, "", 1, 2),
+		})
+	})
+
+	t.Run("number_dot_ident_is_int_then_dot_then_ident", func(t *testing.T) {
+		// "42.col" → INTEGER "42", DOT ".", IDENT "col"
+		requireTokens(t, "42.col", []Token{
+			tok(TOKEN_INTEGER, "42", 1, 1),
+			tok(TOKEN_DOT, ".", 1, 3),
+			tok(TOKEN_IDENT, "col", 1, 4),
+			tok(TOKEN_EOF, "", 1, 7),
+		})
+	})
+
+	t.Run("number_dot_underscore_is_int_then_dot_then_ident", func(t *testing.T) {
+		// "1._x" → INTEGER "1", DOT ".", IDENT "_x"
+		requireTokens(t, "1._x", []Token{
+			tok(TOKEN_INTEGER, "1", 1, 1),
+			tok(TOKEN_DOT, ".", 1, 2),
+			tok(TOKEN_IDENT, "_x", 1, 3),
+			tok(TOKEN_EOF, "", 1, 5),
+		})
+	})
+}
+
+// ---------- String literals --------------------------------------------------
+
+func TestNextToken_Strings(t *testing.T) {
+	tests := []struct {
+		name    string
+		input   string
+		wantLit string
+	}{
+		{"empty", "''", ""},
+		{"simple", "'hello'", "hello"},
+		{"with_spaces", "'hello world'", "hello world"},
+		{"with_digits", "'abc123'", "abc123"},
+		{"escaped_quote", "'it''s'", "it's"},
+		{"double_escaped", "'a''''b'", "a''b"},
+		{"only_escaped", "''''", "'"},
+	}
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			l := NewLexer(tc.input)
+			token, err := l.NextToken()
+			if err != nil {
+				t.Fatalf("unexpected error: %v", err)
+			}
+			if token.Type != TOKEN_STRING {
+				t.Fatalf("expected TOKEN_STRING, got %v", token.Type)
+			}
+			if token.Literal != tc.wantLit {
+				t.Fatalf("literal: got %q, want %q", token.Literal, tc.wantLit)
+			}
+			if token.Line != 1 || token.Col != 1 {
+				t.Fatalf("position: got %d:%d, want 1:1", token.Line, token.Col)
+			}
+		})
+	}
+}
+
+func TestNextToken_UnterminatedString(t *testing.T) {
+	inputs := []string{
+		"'hello",
+		"'",
+		"'unterminated",
+		"'it''s still open",
+	}
+	for _, input := range inputs {
+		t.Run(fmt.Sprintf("%q", input), func(t *testing.T) {
+			requireError(t, input, ErrUnterminatedString)
+		})
+	}
+}
+
+// ---------- Identifiers ------------------------------------------------------
+
+func TestNextToken_Identifiers(t *testing.T) {
+	tests := []struct {
+		input string
+		want  Token
+	}{
+		{"foo", tok(TOKEN_IDENT, "foo", 1, 1)},
+		{"Bar", tok(TOKEN_IDENT, "Bar", 1, 1)},
+		{"_private", tok(TOKEN_IDENT, "_private", 1, 1)},
+		{"col1", tok(TOKEN_IDENT, "col1", 1, 1)},
+		{"_", tok(TOKEN_IDENT, "_", 1, 1)},
+		{"a_b_c", tok(TOKEN_IDENT, "a_b_c", 1, 1)},
+		{"CamelCase", tok(TOKEN_IDENT, "CamelCase", 1, 1)},
+		{"x123abc", tok(TOKEN_IDENT, "x123abc", 1, 1)},
+	}
+	for _, tc := range tests {
+		t.Run(tc.input, func(t *testing.T) {
+			requireTokens(t, tc.input, []Token{
+				tc.want,
+				tok(TOKEN_EOF, "", 1, len(tc.input)+1),
+			})
+		})
+	}
+}
+
+// ---------- Keywords (case-insensitive) --------------------------------------
+
+func TestNextToken_AllKeywords(t *testing.T) {
+	// Exhaustive coverage of every keyword in the keywords map.
+	// Each entry tests UPPER, lower, and MiXeD casing.
+	allKeywords := []struct {
+		upper string
+		typ   TokenType
+	}{
+		{"CREATE", TOKEN_CREATE},
+		{"DATABASE", TOKEN_DATABASE},
+		{"USE", TOKEN_USE},
+		{"DROP", TOKEN_DROP},
+		{"IF", TOKEN_IF},
+		{"EXISTS", TOKEN_EXISTS},
+		{"TABLE", TOKEN_TABLE},
+		{"ALTER", TOKEN_ALTER},
+		{"ADD", TOKEN_ADD},
+		{"COLUMN", TOKEN_COLUMN},
+		{"MODIFY", TOKEN_MODIFY},
+		{"RENAME", TOKEN_RENAME},
+		{"TO", TOKEN_TO},
+		{"SELECT", TOKEN_SELECT},
+		{"DISTINCT", TOKEN_DISTINCT},
+		{"ALL", TOKEN_ALL},
+		{"FROM", TOKEN_FROM},
+		{"WHERE", TOKEN_WHERE},
+		{"AS", TOKEN_AS},
+		{"INSERT", TOKEN_INSERT},
+		{"INTO", TOKEN_INTO},
+		{"VALUES", TOKEN_VALUES},
+		{"UPDATE", TOKEN_UPDATE},
+		{"SET", TOKEN_SET},
+		{"DELETE", TOKEN_DELETE},
+		{"JOIN", TOKEN_JOIN},
+		{"INNER", TOKEN_INNER},
+		{"LEFT", TOKEN_LEFT},
+		{"RIGHT", TOKEN_RIGHT},
+		{"FULL", TOKEN_FULL},
+		{"OUTER", TOKEN_OUTER},
+		{"CROSS", TOKEN_CROSS},
+		{"ON", TOKEN_ON},
+		{"GROUP", TOKEN_GROUP},
+		{"BY", TOKEN_BY},
+		{"HAVING", TOKEN_HAVING},
+		{"ORDER", TOKEN_ORDER},
+		{"ASC", TOKEN_ASC},
+		{"DESC", TOKEN_DESC},
+		{"LIMIT", TOKEN_LIMIT},
+		{"OFFSET", TOKEN_OFFSET},
+		{"PRIMARY", TOKEN_PRIMARY},
+		{"KEY", TOKEN_KEY},
+		{"NOT", TOKEN_NOT},
+		{"NULL", TOKEN_NULL},
+		{"DEFAULT", TOKEN_DEFAULT},
+		{"UNIQUE", TOKEN_UNIQUE},
+		{"REFERENCES", TOKEN_REFERENCES},
+		{"AND", TOKEN_AND},
+		{"OR", TOKEN_OR},
+		{"TRUE", TOKEN_TRUE},
+		{"FALSE", TOKEN_FALSE},
+		{"LIKE", TOKEN_LIKE},
+		{"IS", TOKEN_IS},
+		{"IN", TOKEN_IN},
+		{"BETWEEN", TOKEN_BETWEEN},
+		{"INT", TOKEN_INT},
+		{"BIGINT", TOKEN_BIGINT},
+		{"VARCHAR", TOKEN_VARCHAR},
+		{"BOOLEAN", TOKEN_BOOLEAN},
+		{"TEXT", TOKEN_TEXT},
+		{"TIMESTAMP", TOKEN_TIMESTAMP},
+	}
+	for _, kw := range allKeywords {
+		t.Run(kw.upper, func(t *testing.T) {
+			// Upper case
+			tokens := collectAll(t, kw.upper)
+			if tokens[0].Type != kw.typ {
+				t.Errorf("UPPER %q: got type %v, want %v", kw.upper, tokens[0].Type, kw.typ)
+			}
+			if tokens[0].Literal != kw.upper {
+				t.Errorf("UPPER %q: literal got %q, want %q", kw.upper, tokens[0].Literal, kw.upper)
+			}
+		})
+	}
+}
+
+func TestNextToken_KeywordsCaseInsensitive(t *testing.T) {
+	// Verify that the literal preserves original casing while the type is correct.
+	cases := []struct {
+		input   string
+		wantTyp TokenType
+		wantLit string
+	}{
+		{"select", TOKEN_SELECT, "select"},
+		{"SELECT", TOKEN_SELECT, "SELECT"},
+		{"SeLeCt", TOKEN_SELECT, "SeLeCt"},
+		{"from", TOKEN_FROM, "from"},
+		{"From", TOKEN_FROM, "From"},
+		{"insert", TOKEN_INSERT, "insert"},
+		{"InSeRt", TOKEN_INSERT, "InSeRt"},
+		{"null", TOKEN_NULL, "null"},
+		{"Null", TOKEN_NULL, "Null"},
+		{"true", TOKEN_TRUE, "true"},
+		{"false", TOKEN_FALSE, "false"},
+		{"FaLsE", TOKEN_FALSE, "FaLsE"},
+	}
+	for _, tc := range cases {
+		t.Run(tc.input, func(t *testing.T) {
+			tokens := collectAll(t, tc.input)
+			if tokens[0].Type != tc.wantTyp {
+				t.Errorf("type: got %v, want %v", tokens[0].Type, tc.wantTyp)
+			}
+			if tokens[0].Literal != tc.wantLit {
+				t.Errorf("literal: got %q, want %q", tokens[0].Literal, tc.wantLit)
+			}
+		})
+	}
+}
+
+// ---------- Comments ---------------------------------------------------------
+
+func TestNextToken_LineComment(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+		want  []Token
+	}{
+		{"comment_at_end", "42 -- comment", []Token{
+			tok(TOKEN_INTEGER, "42", 1, 1),
+			tok(TOKEN_EOF, "", 1, 14),
+		}},
+		{"comment_only", "-- everything is a comment", []Token{
+			tok(TOKEN_EOF, "", 1, 27),
+		}},
+		{"comment_before_newline", "-- comment\n42", []Token{
+			tok(TOKEN_INTEGER, "42", 2, 1),
+			tok(TOKEN_EOF, "", 2, 3),
+		}},
+		{"multiple_line_comments", "-- first\n-- second\n42", []Token{
+			tok(TOKEN_INTEGER, "42", 3, 1),
+			tok(TOKEN_EOF, "", 3, 3),
+		}},
+	}
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			requireTokens(t, tc.input, tc.want)
+		})
+	}
+}
+
+func TestNextToken_BlockComment(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+		want  []Token
+	}{
+		{"inline", "/* comment */ 42", []Token{
+			tok(TOKEN_INTEGER, "42", 1, 15),
+			tok(TOKEN_EOF, "", 1, 17),
+		}},
+		{"multi_line", "/* line1\nline2 */ 42", []Token{
+			tok(TOKEN_INTEGER, "42", 2, 10),
+			tok(TOKEN_EOF, "", 2, 12),
+		}},
+		{"empty_block", "/**/ 42", []Token{
+			tok(TOKEN_INTEGER, "42", 1, 6),
+			tok(TOKEN_EOF, "", 1, 8),
+		}},
+		{"adjacent", "/*a*//*b*/ 42", []Token{
+			tok(TOKEN_INTEGER, "42", 1, 12),
+			tok(TOKEN_EOF, "", 1, 14),
+		}},
+		{"comment_only", "/* eof in comment? no, closed */", []Token{
+			tok(TOKEN_EOF, "", 1, 33),
+		}},
+	}
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			requireTokens(t, tc.input, tc.want)
+		})
+	}
+}
+
+func TestNextToken_UnterminatedBlockComment(t *testing.T) {
+	inputs := []string{
+		"/* unclosed",
+		"/* also \n unclosed",
+		"/*",
+	}
+	for _, input := range inputs {
+		t.Run(fmt.Sprintf("%q", input), func(t *testing.T) {
+			requireError(t, input, ErrUnterminatedComment)
+		})
+	}
+}
+
+func TestNextToken_MixedComments(t *testing.T) {
+	input := "-- line\n/* block */ SELECT"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_SELECT, "SELECT", 2, 13),
+		tok(TOKEN_EOF, "", 2, 19),
+	})
+}
+
+// ---------- Illegal characters -----------------------------------------------
+
+func TestNextToken_IllegalCharacters(t *testing.T) {
+	illegals := []string{"@", "#", "$", "^", "&", "~", "\\", "`", "?", "|"}
+	for _, ch := range illegals {
+		t.Run(ch, func(t *testing.T) {
+			l := NewLexer(ch)
+			token, err := l.NextToken()
+			if err == nil {
+				t.Fatal("expected error for illegal character")
+			}
+			if !errors.Is(err, ErrUnexpectedChar) {
+				t.Fatalf("expected ErrUnexpectedChar, got %v", err)
+			}
+			if token.Type != TOKEN_ILLEGAL {
+				t.Fatalf("expected TOKEN_ILLEGAL, got %v", token.Type)
+			}
+			if token.Literal != ch {
+				t.Fatalf("literal: got %q, want %q", token.Literal, ch)
+			}
+		})
+	}
+}
+
+// ---------- Line/column tracking ---------------------------------------------
+
+func TestNextToken_LineColTracking(t *testing.T) {
+	input := "SELECT\n  *\nFROM t"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_SELECT, "SELECT", 1, 1),
+		tok(TOKEN_STAR, "*", 2, 3),
+		tok(TOKEN_FROM, "FROM", 3, 1),
+		tok(TOKEN_IDENT, "t", 3, 6),
+		tok(TOKEN_EOF, "", 3, 7),
+	})
+}
+
+func TestNextToken_TabTracking(t *testing.T) {
+	// Tabs count as single column advances.
+	input := "\tSELECT"
+	tokens := collectAll(t, input)
+	if tokens[0].Col != 2 {
+		t.Errorf("expected column 2 after tab, got %d", tokens[0].Col)
+	}
+}
+
+func TestNextToken_MultipleNewlines(t *testing.T) {
+	input := "\n\n\n42"
+	tokens := collectAll(t, input)
+	if tokens[0].Line != 4 || tokens[0].Col != 1 {
+		t.Errorf("expected 4:1, got %d:%d", tokens[0].Line, tokens[0].Col)
+	}
+}
+
+func TestNextToken_CarriageReturnLineFeed(t *testing.T) {
+	// \r is treated as whitespace but doesn't increment line; only \n does.
+	input := "a\r\nb"
+	tokens := collectAll(t, input)
+	// 'a' at 1:1
+	if tokens[0].Line != 1 || tokens[0].Col != 1 {
+		t.Errorf("'a' expected 1:1, got %d:%d", tokens[0].Line, tokens[0].Col)
+	}
+	// 'b' at 2:1
+	if tokens[1].Line != 2 || tokens[1].Col != 1 {
+		t.Errorf("'b' expected 2:1, got %d:%d", tokens[1].Line, tokens[1].Col)
+	}
+}
+
+// ---------- Whitespace sensitivity -------------------------------------------
+
+func TestNextToken_MultipleSpaces(t *testing.T) {
+	input := "a     b"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_IDENT, "a", 1, 1),
+		tok(TOKEN_IDENT, "b", 1, 7),
+		tok(TOKEN_EOF, "", 1, 8),
+	})
+}
+
+func TestNextToken_NoWhitespace(t *testing.T) {
+	input := "a+b"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_IDENT, "a", 1, 1),
+		tok(TOKEN_PLUS, "+", 1, 2),
+		tok(TOKEN_IDENT, "b", 1, 3),
+		tok(TOKEN_EOF, "", 1, 4),
+	})
+}
+
+// ---------- LexError structure -----------------------------------------------
+
+func TestLexError_ErrorMessage(t *testing.T) {
+	e := lexErr(ErrUnexpectedChar, 5, 10, "'@'")
+	want := "5:10: unexpected character: '@'"
+	if e.Error() != want {
+		t.Errorf("got %q, want %q", e.Error(), want)
+	}
+}
+
+func TestLexError_Unwrap(t *testing.T) {
+	e := lexErr(ErrUnterminatedString, 1, 1, "detail")
+	if !errors.Is(e, ErrUnterminatedString) {
+		t.Error("errors.Is should match sentinel")
+	}
+	var le *LexError
+	if !errors.As(e, &le) {
+		t.Error("errors.As should succeed for *LexError")
+	}
+	if le.Line != 1 || le.Col != 1 {
+		t.Errorf("position: got %d:%d, want 1:1", le.Line, le.Col)
+	}
+}
+
+// ---------- Full SQL statements (integration) --------------------------------
+
+func TestNextToken_SelectStatement(t *testing.T) {
+	input := "SELECT id, name FROM users WHERE age >= 18;"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_SELECT, "SELECT", 1, 1),
+		tok(TOKEN_IDENT, "id", 1, 8),
+		tok(TOKEN_COMMA, ",", 1, 10),
+		tok(TOKEN_IDENT, "name", 1, 12),
+		tok(TOKEN_FROM, "FROM", 1, 17),
+		tok(TOKEN_IDENT, "users", 1, 22),
+		tok(TOKEN_WHERE, "WHERE", 1, 28),
+		tok(TOKEN_IDENT, "age", 1, 34),
+		tok(TOKEN_GTE, ">=", 1, 38),
+		tok(TOKEN_INTEGER, "18", 1, 41),
+		tok(TOKEN_SEMICOLON, ";", 1, 43),
+		tok(TOKEN_EOF, "", 1, 44),
+	})
+}
+
+func TestNextToken_InsertStatement(t *testing.T) {
+	input := "INSERT INTO users (name, age) VALUES ('Alice', 30);"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_INSERT, "INSERT", 1, 1),
+		tok(TOKEN_INTO, "INTO", 1, 8),
+		tok(TOKEN_IDENT, "users", 1, 13),
+		tok(TOKEN_LPAREN, "(", 1, 19),
+		tok(TOKEN_IDENT, "name", 1, 20),
+		tok(TOKEN_COMMA, ",", 1, 24),
+		tok(TOKEN_IDENT, "age", 1, 26),
+		tok(TOKEN_RPAREN, ")", 1, 29),
+		tok(TOKEN_VALUES, "VALUES", 1, 31),
+		tok(TOKEN_LPAREN, "(", 1, 38),
+		tok(TOKEN_STRING, "Alice", 1, 39),
+		tok(TOKEN_COMMA, ",", 1, 46),
+		tok(TOKEN_INTEGER, "30", 1, 48),
+		tok(TOKEN_RPAREN, ")", 1, 50),
+		tok(TOKEN_SEMICOLON, ";", 1, 51),
+		tok(TOKEN_EOF, "", 1, 52),
+	})
+}
+
+func TestNextToken_CreateTable(t *testing.T) {
+	input := `CREATE TABLE users (
+    id INT PRIMARY KEY,
+    name VARCHAR NOT NULL,
+    active BOOLEAN DEFAULT TRUE
+);`
+	requireTokens(t, input, []Token{
+		tok(TOKEN_CREATE, "CREATE", 1, 1),
+		tok(TOKEN_TABLE, "TABLE", 1, 8),
+		tok(TOKEN_IDENT, "users", 1, 14),
+		tok(TOKEN_LPAREN, "(", 1, 20),
+		// line 2
+		tok(TOKEN_IDENT, "id", 2, 5),
+		tok(TOKEN_INT, "INT", 2, 8),
+		tok(TOKEN_PRIMARY, "PRIMARY", 2, 12),
+		tok(TOKEN_KEY, "KEY", 2, 20),
+		tok(TOKEN_COMMA, ",", 2, 23),
+		// line 3
+		tok(TOKEN_IDENT, "name", 3, 5),
+		tok(TOKEN_VARCHAR, "VARCHAR", 3, 10),
+		tok(TOKEN_NOT, "NOT", 3, 18),
+		tok(TOKEN_NULL, "NULL", 3, 22),
+		tok(TOKEN_COMMA, ",", 3, 26),
+		// line 4
+		tok(TOKEN_IDENT, "active", 4, 5),
+		tok(TOKEN_BOOLEAN, "BOOLEAN", 4, 12),
+		tok(TOKEN_DEFAULT, "DEFAULT", 4, 20),
+		tok(TOKEN_TRUE, "TRUE", 4, 28),
+		// line 5
+		tok(TOKEN_RPAREN, ")", 5, 1),
+		tok(TOKEN_SEMICOLON, ";", 5, 2),
+		tok(TOKEN_EOF, "", 5, 3),
+	})
+}
+
+func TestNextToken_UpdateStatement(t *testing.T) {
+	input := "UPDATE users SET name = 'Bob' WHERE id = 1;"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_UPDATE, "UPDATE", 1, 1),
+		tok(TOKEN_IDENT, "users", 1, 8),
+		tok(TOKEN_SET, "SET", 1, 14),
+		tok(TOKEN_IDENT, "name", 1, 18),
+		tok(TOKEN_EQ, "=", 1, 23),
+		tok(TOKEN_STRING, "Bob", 1, 25),
+		tok(TOKEN_WHERE, "WHERE", 1, 31),
+		tok(TOKEN_IDENT, "id", 1, 37),
+		tok(TOKEN_EQ, "=", 1, 40),
+		tok(TOKEN_INTEGER, "1", 1, 42),
+		tok(TOKEN_SEMICOLON, ";", 1, 43),
+		tok(TOKEN_EOF, "", 1, 44),
+	})
+}
+
+func TestNextToken_DeleteStatement(t *testing.T) {
+	input := "DELETE FROM users WHERE id = 1;"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_DELETE, "DELETE", 1, 1),
+		tok(TOKEN_FROM, "FROM", 1, 8),
+		tok(TOKEN_IDENT, "users", 1, 13),
+		tok(TOKEN_WHERE, "WHERE", 1, 19),
+		tok(TOKEN_IDENT, "id", 1, 25),
+		tok(TOKEN_EQ, "=", 1, 28),
+		tok(TOKEN_INTEGER, "1", 1, 30),
+		tok(TOKEN_SEMICOLON, ";", 1, 31),
+		tok(TOKEN_EOF, "", 1, 32),
+	})
+}
+
+func TestNextToken_JoinQuery(t *testing.T) {
+	input := "SELECT a.id FROM a INNER JOIN b ON a.id = b.a_id"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_SELECT, "SELECT", 1, 1),
+		tok(TOKEN_IDENT, "a", 1, 8),
+		tok(TOKEN_DOT, ".", 1, 9),
+		tok(TOKEN_IDENT, "id", 1, 10),
+		tok(TOKEN_FROM, "FROM", 1, 13),
+		tok(TOKEN_IDENT, "a", 1, 18),
+		tok(TOKEN_INNER, "INNER", 1, 20),
+		tok(TOKEN_JOIN, "JOIN", 1, 26),
+		tok(TOKEN_IDENT, "b", 1, 31),
+		tok(TOKEN_ON, "ON", 1, 33),
+		tok(TOKEN_IDENT, "a", 1, 36),
+		tok(TOKEN_DOT, ".", 1, 37),
+		tok(TOKEN_IDENT, "id", 1, 38),
+		tok(TOKEN_EQ, "=", 1, 41),
+		tok(TOKEN_IDENT, "b", 1, 43),
+		tok(TOKEN_DOT, ".", 1, 44),
+		tok(TOKEN_IDENT, "a_id", 1, 45),
+		tok(TOKEN_EOF, "", 1, 49),
+	})
+}
+
+func TestNextToken_GroupByHavingOrderBy(t *testing.T) {
+	input := "SELECT dept, COUNT(*) FROM emp GROUP BY dept HAVING COUNT(*) > 5 ORDER BY dept ASC LIMIT 10 OFFSET 5"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_SELECT, "SELECT", 1, 1),
+		tok(TOKEN_IDENT, "dept", 1, 8),
+		tok(TOKEN_COMMA, ",", 1, 12),
+		tok(TOKEN_IDENT, "COUNT", 1, 14),
+		tok(TOKEN_LPAREN, "(", 1, 19),
+		tok(TOKEN_STAR, "*", 1, 20),
+		tok(TOKEN_RPAREN, ")", 1, 21),
+		tok(TOKEN_FROM, "FROM", 1, 23),
+		tok(TOKEN_IDENT, "emp", 1, 28),
+		tok(TOKEN_GROUP, "GROUP", 1, 32),
+		tok(TOKEN_BY, "BY", 1, 38),
+		tok(TOKEN_IDENT, "dept", 1, 41),
+		tok(TOKEN_HAVING, "HAVING", 1, 46),
+		tok(TOKEN_IDENT, "COUNT", 1, 53),
+		tok(TOKEN_LPAREN, "(", 1, 58),
+		tok(TOKEN_STAR, "*", 1, 59),
+		tok(TOKEN_RPAREN, ")", 1, 60),
+		tok(TOKEN_GT, ">", 1, 62),
+		tok(TOKEN_INTEGER, "5", 1, 64),
+		tok(TOKEN_ORDER, "ORDER", 1, 66),
+		tok(TOKEN_BY, "BY", 1, 72),
+		tok(TOKEN_IDENT, "dept", 1, 75),
+		tok(TOKEN_ASC, "ASC", 1, 80),
+		tok(TOKEN_LIMIT, "LIMIT", 1, 84),
+		tok(TOKEN_INTEGER, "10", 1, 90),
+		tok(TOKEN_OFFSET, "OFFSET", 1, 93),
+		tok(TOKEN_INTEGER, "5", 1, 100),
+		tok(TOKEN_EOF, "", 1, 101),
+	})
+}
+
+func TestNextToken_ComplexExpression(t *testing.T) {
+	input := "WHERE x BETWEEN 1 AND 10 AND name LIKE 'foo%' OR val IS NOT NULL AND id IN (1, 2, 3)"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_WHERE, "WHERE", 1, 1),
+		tok(TOKEN_IDENT, "x", 1, 7),
+		tok(TOKEN_BETWEEN, "BETWEEN", 1, 9),
+		tok(TOKEN_INTEGER, "1", 1, 17),
+		tok(TOKEN_AND, "AND", 1, 19),
+		tok(TOKEN_INTEGER, "10", 1, 23),
+		tok(TOKEN_AND, "AND", 1, 26),
+		tok(TOKEN_IDENT, "name", 1, 30),
+		tok(TOKEN_LIKE, "LIKE", 1, 35),
+		tok(TOKEN_STRING, "foo%", 1, 40),
+		tok(TOKEN_OR, "OR", 1, 47),
+		tok(TOKEN_IDENT, "val", 1, 50),
+		tok(TOKEN_IS, "IS", 1, 54),
+		tok(TOKEN_NOT, "NOT", 1, 57),
+		tok(TOKEN_NULL, "NULL", 1, 61),
+		tok(TOKEN_AND, "AND", 1, 66),
+		tok(TOKEN_IDENT, "id", 1, 70),
+		tok(TOKEN_IN, "IN", 1, 73),
+		tok(TOKEN_LPAREN, "(", 1, 76),
+		tok(TOKEN_INTEGER, "1", 1, 77),
+		tok(TOKEN_COMMA, ",", 1, 78),
+		tok(TOKEN_INTEGER, "2", 1, 80),
+		tok(TOKEN_COMMA, ",", 1, 81),
+		tok(TOKEN_INTEGER, "3", 1, 83),
+		tok(TOKEN_RPAREN, ")", 1, 84),
+		tok(TOKEN_EOF, "", 1, 85),
+	})
+}
+
+func TestNextToken_AlterTable(t *testing.T) {
+	input := "ALTER TABLE users ADD COLUMN email TEXT UNIQUE"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_ALTER, "ALTER", 1, 1),
+		tok(TOKEN_TABLE, "TABLE", 1, 7),
+		tok(TOKEN_IDENT, "users", 1, 13),
+		tok(TOKEN_ADD, "ADD", 1, 19),
+		tok(TOKEN_COLUMN, "COLUMN", 1, 23),
+		tok(TOKEN_IDENT, "email", 1, 30),
+		tok(TOKEN_TEXT, "TEXT", 1, 36),
+		tok(TOKEN_UNIQUE, "UNIQUE", 1, 41),
+		tok(TOKEN_EOF, "", 1, 47),
+	})
+}
+
+func TestNextToken_DropIfExists(t *testing.T) {
+	input := "DROP TABLE IF EXISTS users;"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_DROP, "DROP", 1, 1),
+		tok(TOKEN_TABLE, "TABLE", 1, 6),
+		tok(TOKEN_IF, "IF", 1, 12),
+		tok(TOKEN_EXISTS, "EXISTS", 1, 15),
+		tok(TOKEN_IDENT, "users", 1, 22),
+		tok(TOKEN_SEMICOLON, ";", 1, 27),
+		tok(TOKEN_EOF, "", 1, 28),
+	})
+}
+
+func TestNextToken_CreateDatabase(t *testing.T) {
+	input := "CREATE DATABASE mydb;"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_CREATE, "CREATE", 1, 1),
+		tok(TOKEN_DATABASE, "DATABASE", 1, 8),
+		tok(TOKEN_IDENT, "mydb", 1, 17),
+		tok(TOKEN_SEMICOLON, ";", 1, 21),
+		tok(TOKEN_EOF, "", 1, 22),
+	})
+}
+
+func TestNextToken_UseDatabase(t *testing.T) {
+	input := "USE mydb;"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_USE, "USE", 1, 1),
+		tok(TOKEN_IDENT, "mydb", 1, 5),
+		tok(TOKEN_SEMICOLON, ";", 1, 9),
+		tok(TOKEN_EOF, "", 1, 10),
+	})
+}
+
+func TestNextToken_RenameTable(t *testing.T) {
+	input := "ALTER TABLE old_name RENAME TO new_name;"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_ALTER, "ALTER", 1, 1),
+		tok(TOKEN_TABLE, "TABLE", 1, 7),
+		tok(TOKEN_IDENT, "old_name", 1, 13),
+		tok(TOKEN_RENAME, "RENAME", 1, 22),
+		tok(TOKEN_TO, "TO", 1, 29),
+		tok(TOKEN_IDENT, "new_name", 1, 32),
+		tok(TOKEN_SEMICOLON, ";", 1, 40),
+		tok(TOKEN_EOF, "", 1, 41),
+	})
+}
+
+func TestNextToken_SelectWithAlias(t *testing.T) {
+	input := "SELECT DISTINCT name AS n FROM users"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_SELECT, "SELECT", 1, 1),
+		tok(TOKEN_DISTINCT, "DISTINCT", 1, 8),
+		tok(TOKEN_IDENT, "name", 1, 17),
+		tok(TOKEN_AS, "AS", 1, 22),
+		tok(TOKEN_IDENT, "n", 1, 25),
+		tok(TOKEN_FROM, "FROM", 1, 27),
+		tok(TOKEN_IDENT, "users", 1, 32),
+		tok(TOKEN_EOF, "", 1, 37),
+	})
+}
+
+func TestNextToken_SelectAllJoins(t *testing.T) {
+	input := "LEFT OUTER JOIN RIGHT OUTER JOIN FULL OUTER JOIN CROSS JOIN"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_LEFT, "LEFT", 1, 1),
+		tok(TOKEN_OUTER, "OUTER", 1, 6),
+		tok(TOKEN_JOIN, "JOIN", 1, 12),
+		tok(TOKEN_RIGHT, "RIGHT", 1, 17),
+		tok(TOKEN_OUTER, "OUTER", 1, 23),
+		tok(TOKEN_JOIN, "JOIN", 1, 29),
+		tok(TOKEN_FULL, "FULL", 1, 34),
+		tok(TOKEN_OUTER, "OUTER", 1, 39),
+		tok(TOKEN_JOIN, "JOIN", 1, 45),
+		tok(TOKEN_CROSS, "CROSS", 1, 50),
+		tok(TOKEN_JOIN, "JOIN", 1, 56),
+		tok(TOKEN_EOF, "", 1, 60),
+	})
+}
+
+func TestNextToken_ForeignKeyReference(t *testing.T) {
+	input := "user_id BIGINT REFERENCES users(id)"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_IDENT, "user_id", 1, 1),
+		tok(TOKEN_BIGINT, "BIGINT", 1, 9),
+		tok(TOKEN_REFERENCES, "REFERENCES", 1, 16),
+		tok(TOKEN_IDENT, "users", 1, 27),
+		tok(TOKEN_LPAREN, "(", 1, 32),
+		tok(TOKEN_IDENT, "id", 1, 33),
+		tok(TOKEN_RPAREN, ")", 1, 35),
+		tok(TOKEN_EOF, "", 1, 36),
+	})
+}
+
+func TestNextToken_TimestampColumn(t *testing.T) {
+	input := "created_at TIMESTAMP NOT NULL DEFAULT '2024-01-01'"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_IDENT, "created_at", 1, 1),
+		tok(TOKEN_TIMESTAMP, "TIMESTAMP", 1, 12),
+		tok(TOKEN_NOT, "NOT", 1, 22),
+		tok(TOKEN_NULL, "NULL", 1, 26),
+		tok(TOKEN_DEFAULT, "DEFAULT", 1, 31),
+		tok(TOKEN_STRING, "2024-01-01", 1, 39),
+		tok(TOKEN_EOF, "", 1, 51),
+	})
+}
+
+func TestNextToken_ArithmeticExpression(t *testing.T) {
+	input := "a + b - c * d / e % f"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_IDENT, "a", 1, 1),
+		tok(TOKEN_PLUS, "+", 1, 3),
+		tok(TOKEN_IDENT, "b", 1, 5),
+		tok(TOKEN_MINUS, "-", 1, 7),
+		tok(TOKEN_IDENT, "c", 1, 9),
+		tok(TOKEN_STAR, "*", 1, 11),
+		tok(TOKEN_IDENT, "d", 1, 13),
+		tok(TOKEN_SLASH, "/", 1, 15),
+		tok(TOKEN_IDENT, "e", 1, 17),
+		tok(TOKEN_PERCENT, "%", 1, 19),
+		tok(TOKEN_IDENT, "f", 1, 21),
+		tok(TOKEN_EOF, "", 1, 22),
+	})
+}
+
+func TestNextToken_SelectAll(t *testing.T) {
+	input := "SELECT ALL * FROM t"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_SELECT, "SELECT", 1, 1),
+		tok(TOKEN_ALL, "ALL", 1, 8),
+		tok(TOKEN_STAR, "*", 1, 12),
+		tok(TOKEN_FROM, "FROM", 1, 14),
+		tok(TOKEN_IDENT, "t", 1, 19),
+		tok(TOKEN_EOF, "", 1, 20),
+	})
+}
+
+func TestNextToken_DescOrder(t *testing.T) {
+	input := "ORDER BY col DESC"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_ORDER, "ORDER", 1, 1),
+		tok(TOKEN_BY, "BY", 1, 7),
+		tok(TOKEN_IDENT, "col", 1, 10),
+		tok(TOKEN_DESC, "DESC", 1, 14),
+		tok(TOKEN_EOF, "", 1, 18),
+	})
+}
+
+// ---------- Edge cases -------------------------------------------------------
+
+func TestNextToken_MinusVsLineComment(t *testing.T) {
+	// Single minus is TOKEN_MINUS; double minus is a line comment.
+	t.Run("single_minus", func(t *testing.T) {
+		requireTokens(t, "3 - 1", []Token{
+			tok(TOKEN_INTEGER, "3", 1, 1),
+			tok(TOKEN_MINUS, "-", 1, 3),
+			tok(TOKEN_INTEGER, "1", 1, 5),
+			tok(TOKEN_EOF, "", 1, 6),
+		})
+	})
+	t.Run("double_minus_is_comment", func(t *testing.T) {
+		requireTokens(t, "3 -- 1", []Token{
+			tok(TOKEN_INTEGER, "3", 1, 1),
+			tok(TOKEN_EOF, "", 1, 7),
+		})
+	})
+}
+
+func TestNextToken_SlashVsBlockComment(t *testing.T) {
+	// Single slash is TOKEN_SLASH; /* starts a block comment.
+	t.Run("single_slash", func(t *testing.T) {
+		requireTokens(t, "3 / 1", []Token{
+			tok(TOKEN_INTEGER, "3", 1, 1),
+			tok(TOKEN_SLASH, "/", 1, 3),
+			tok(TOKEN_INTEGER, "1", 1, 5),
+			tok(TOKEN_EOF, "", 1, 6),
+		})
+	})
+	t.Run("slash_star_is_comment", func(t *testing.T) {
+		requireTokens(t, "3 /* comment */ / 1", []Token{
+			tok(TOKEN_INTEGER, "3", 1, 1),
+			tok(TOKEN_SLASH, "/", 1, 17),
+			tok(TOKEN_INTEGER, "1", 1, 19),
+			tok(TOKEN_EOF, "", 1, 20),
+		})
+	})
+}
+
+func TestNextToken_LessThanAmbiguity(t *testing.T) {
+	// < alone, <=, <>
+	t.Run("lt_followed_by_space", func(t *testing.T) {
+		requireTokens(t, "a < b", []Token{
+			tok(TOKEN_IDENT, "a", 1, 1),
+			tok(TOKEN_LT, "<", 1, 3),
+			tok(TOKEN_IDENT, "b", 1, 5),
+			tok(TOKEN_EOF, "", 1, 6),
+		})
+	})
+	t.Run("lt_followed_by_eq", func(t *testing.T) {
+		requireTokens(t, "a<=b", []Token{
+			tok(TOKEN_IDENT, "a", 1, 1),
+			tok(TOKEN_LTE, "<=", 1, 2),
+			tok(TOKEN_IDENT, "b", 1, 4),
+			tok(TOKEN_EOF, "", 1, 5),
+		})
+	})
+	t.Run("lt_followed_by_gt", func(t *testing.T) {
+		requireTokens(t, "a<>b", []Token{
+			tok(TOKEN_IDENT, "a", 1, 1),
+			tok(TOKEN_NEQ, "<>", 1, 2),
+			tok(TOKEN_IDENT, "b", 1, 4),
+			tok(TOKEN_EOF, "", 1, 5),
+		})
+	})
+}
+
+func TestNextToken_ConsecutiveOperators(t *testing.T) {
+	input := ">=<="
+	requireTokens(t, input, []Token{
+		tok(TOKEN_GTE, ">=", 1, 1),
+		tok(TOKEN_LTE, "<=", 1, 3),
+		tok(TOKEN_EOF, "", 1, 5),
+	})
+}
+
+func TestNextToken_StringInContext(t *testing.T) {
+	input := "WHERE name = 'O''Brien'"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_WHERE, "WHERE", 1, 1),
+		tok(TOKEN_IDENT, "name", 1, 7),
+		tok(TOKEN_EQ, "=", 1, 12),
+		tok(TOKEN_STRING, "O'Brien", 1, 14),
+		tok(TOKEN_EOF, "", 1, 24),
+	})
+}
+
+func TestNextToken_FloatInExpression(t *testing.T) {
+	input := "price * 1.08 + .5"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_IDENT, "price", 1, 1),
+		tok(TOKEN_STAR, "*", 1, 7),
+		tok(TOKEN_FLOAT, "1.08", 1, 9),
+		tok(TOKEN_PLUS, "+", 1, 14),
+		tok(TOKEN_FLOAT, ".5", 1, 16),
+		tok(TOKEN_EOF, "", 1, 18),
+	})
+}
+
+func TestNextToken_IdentStartingWithUnderscore(t *testing.T) {
+	input := "_foo _123 __"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_IDENT, "_foo", 1, 1),
+		tok(TOKEN_IDENT, "_123", 1, 6),
+		tok(TOKEN_IDENT, "__", 1, 11),
+		tok(TOKEN_EOF, "", 1, 13),
+	})
+}
+
+func TestNextToken_KeywordAsPrefix(t *testing.T) {
+	// "selection" should be IDENT, not SELECT + "ion"
+	requireTokens(t, "selection", []Token{
+		tok(TOKEN_IDENT, "selection", 1, 1),
+		tok(TOKEN_EOF, "", 1, 10),
+	})
+}
+
+func TestNextToken_MultiLineString(t *testing.T) {
+	// Strings can span newlines.
+	input := "'line1\nline2'"
+	l := NewLexer(input)
+	token, err := l.NextToken()
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if token.Type != TOKEN_STRING {
+		t.Fatalf("expected TOKEN_STRING, got %v", token.Type)
+	}
+	if token.Literal != "line1\nline2" {
+		t.Fatalf("literal: got %q, want %q", token.Literal, "line1\nline2")
+	}
+}
+
+func TestNextToken_NumberFollowedByDotFollowedByNumber(t *testing.T) {
+	// "1.2.3" → FLOAT "1.2", then ".3" starts a leading-dot float.
+	requireTokens(t, "1.2.3", []Token{
+		tok(TOKEN_FLOAT, "1.2", 1, 1),
+		tok(TOKEN_FLOAT, ".3", 1, 4),
+		tok(TOKEN_EOF, "", 1, 6),
+	})
+}
+
+func TestNextToken_ModifyKeyword(t *testing.T) {
+	input := "ALTER TABLE t MODIFY COLUMN c INT;"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_ALTER, "ALTER", 1, 1),
+		tok(TOKEN_TABLE, "TABLE", 1, 7),
+		tok(TOKEN_IDENT, "t", 1, 13),
+		tok(TOKEN_MODIFY, "MODIFY", 1, 15),
+		tok(TOKEN_COLUMN, "COLUMN", 1, 22),
+		tok(TOKEN_IDENT, "c", 1, 29),
+		tok(TOKEN_INT, "INT", 1, 31),
+		tok(TOKEN_SEMICOLON, ";", 1, 34),
+		tok(TOKEN_EOF, "", 1, 35),
+	})
+}
+
+func TestNextToken_SelectWithComments(t *testing.T) {
+	input := `SELECT -- column list
+    id, /* primary key */
+    name
+FROM users;`
+	requireTokens(t, input, []Token{
+		tok(TOKEN_SELECT, "SELECT", 1, 1),
+		tok(TOKEN_IDENT, "id", 2, 5),
+		tok(TOKEN_COMMA, ",", 2, 7),
+		tok(TOKEN_IDENT, "name", 3, 5),
+		tok(TOKEN_FROM, "FROM", 4, 1),
+		tok(TOKEN_IDENT, "users", 4, 6),
+		tok(TOKEN_SEMICOLON, ";", 4, 11),
+		tok(TOKEN_EOF, "", 4, 12),
+	})
+}
+
+func TestNextToken_OperatorsWithNoSpaces(t *testing.T) {
+	input := "(a+b)*(c-d)"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_LPAREN, "(", 1, 1),
+		tok(TOKEN_IDENT, "a", 1, 2),
+		tok(TOKEN_PLUS, "+", 1, 3),
+		tok(TOKEN_IDENT, "b", 1, 4),
+		tok(TOKEN_RPAREN, ")", 1, 5),
+		tok(TOKEN_STAR, "*", 1, 6),
+		tok(TOKEN_LPAREN, "(", 1, 7),
+		tok(TOKEN_IDENT, "c", 1, 8),
+		tok(TOKEN_MINUS, "-", 1, 9),
+		tok(TOKEN_IDENT, "d", 1, 10),
+		tok(TOKEN_RPAREN, ")", 1, 11),
+		tok(TOKEN_EOF, "", 1, 12),
+	})
+}
+
+func TestNextToken_NumberAtEndOfInput(t *testing.T) {
+	// Number followed immediately by EOF, with trailing dot.
+	requireTokens(t, "42.", []Token{
+		tok(TOKEN_FLOAT, "42.", 1, 1),
+		tok(TOKEN_EOF, "", 1, 4),
+	})
+}
+
+func TestNextToken_SelectStar(t *testing.T) {
+	input := "SELECT * FROM t;"
+	requireTokens(t, input, []Token{
+		tok(TOKEN_SELECT, "SELECT", 1, 1),
+		tok(TOKEN_STAR, "*", 1, 8),
+		tok(TOKEN_FROM, "FROM", 1, 10),
+		tok(TOKEN_IDENT, "t", 1, 15),
+		tok(TOKEN_SEMICOLON, ";", 1, 16),
+		tok(TOKEN_EOF, "", 1, 17),
+	})
+}
+
+func TestNextToken_NegativeNumberContext(t *testing.T) {
+	// Minus is a separate token; the parser handles negation semantically.
+	requireTokens(t, "-42", []Token{
+		tok(TOKEN_MINUS, "-", 1, 1),
+		tok(TOKEN_INTEGER, "42", 1, 2),
+		tok(TOKEN_EOF, "", 1, 4),
+	})
+}
+
+func TestNextToken_ErrorRecovery(t *testing.T) {
+	// After hitting an illegal character, the lexer should still be able to
+	// produce subsequent tokens.
+	l := NewLexer("@ SELECT")
+	token, err := l.NextToken()
+	if err == nil || token.Type != TOKEN_ILLEGAL {
+		t.Fatalf("expected ILLEGAL token with error, got %v, err=%v", token, err)
+	}
+	// The next call should produce SELECT.
+	token, err = l.NextToken()
+	if err != nil {
+		t.Fatalf("unexpected error after recovery: %v", err)
+	}
+	if token.Type != TOKEN_SELECT {
+		t.Fatalf("expected SELECT after recovery, got %v", token.Type)
+	}
+}
diff --git a/internal/sql/lexer/lookahead_test.go b/internal/sql/lexer/lookahead_test.go
new file mode 100644
index 0000000..520b6c6
--- /dev/null
+++ b/internal/sql/lexer/lookahead_test.go
@@ -0,0 +1,442 @@
+package lexer
+
+import "testing"
+
+// ---------- LookaheadIterator ------------------------------------------------
+
+func TestLookaheadIterator_BasicNextAndPeek(t *testing.T) {
+	seq := []int{10, 20, 30}
+	idx := 0
+	iter := NewLookaheadIterator(func() int {
+		v := seq[idx]
+		idx++
+		return v
+	})
+
+	// Peek should return first element without consuming.
+	if got := iter.Peek(); got != 10 {
+		t.Fatalf("Peek() = %d, want 10", got)
+	}
+	if got := iter.Peek(); got != 10 {
+		t.Fatalf("second Peek() = %d, want 10 (should be idempotent)", got)
+	}
+	if iter.Count() != 0 {
+		t.Fatalf("Count() = %d after Peek, want 0", iter.Count())
+	}
+
+	// Next should consume the peeked element.
+	if got := iter.Next(); got != 10 {
+		t.Fatalf("Next() = %d, want 10", got)
+	}
+	if iter.Count() != 1 {
+		t.Fatalf("Count() = %d after first Next, want 1", iter.Count())
+	}
+
+	// Next without prior Peek.
+	if got := iter.Next(); got != 20 {
+		t.Fatalf("Next() = %d, want 20", got)
+	}
+	if iter.Count() != 2 {
+		t.Fatalf("Count() = %d, want 2", iter.Count())
+	}
+
+	// Peek then Next.
+	if got := iter.Peek(); got != 30 {
+		t.Fatalf("Peek() = %d, want 30", got)
+	}
+	if got := iter.Next(); got != 30 {
+		t.Fatalf("Next() = %d, want 30", got)
+	}
+	if iter.Count() != 3 {
+		t.Fatalf("Count() = %d, want 3", iter.Count())
+	}
+}
+
+func TestLookaheadIterator_NextWithoutPeek(t *testing.T) {
+	calls := 0
+	iter := NewLookaheadIterator(func() int {
+		calls++
+		return calls
+	})
+
+	// Calling Next without Peek should call nextFn directly.
+	if got := iter.Next(); got != 1 {
+		t.Fatalf("Next() = %d, want 1", got)
+	}
+	if got := iter.Next(); got != 2 {
+		t.Fatalf("Next() = %d, want 2", got)
+	}
+	if calls != 2 {
+		t.Fatalf("nextFn called %d times, want 2", calls)
+	}
+}
+
+func TestLookaheadIterator_PeekDoesNotCallNextFnTwice(t *testing.T) {
+	calls := 0
+	iter := NewLookaheadIterator(func() string {
+		calls++
+		return "hello"
+	})
+
+	_ = iter.Peek()
+	_ = iter.Peek()
+	_ = iter.Peek()
+
+	if calls != 1 {
+		t.Fatalf("nextFn called %d times, want 1 (Peek should buffer)", calls)
+	}
+}
+
+func TestLookaheadIterator_Count_StartsAtZero(t *testing.T) {
+	iter := NewLookaheadIterator(func() int { return 0 })
+	if iter.Count() != 0 {
+		t.Fatalf("Count() = %d, want 0", iter.Count())
+	}
+}
+
+func TestLookaheadIterator_Count_IncrementedByNext(t *testing.T) {
+	iter := NewLookaheadIterator(func() int { return 42 })
+	for i := 1; i <= 5; i++ {
+		iter.Next()
+		if iter.Count() != i {
+			t.Fatalf("after %d Next calls: Count() = %d", i, iter.Count())
+		}
+	}
+}
+
+func TestLookaheadIterator_Count_NotIncrementedByPeek(t *testing.T) {
+	iter := NewLookaheadIterator(func() int { return 1 })
+	iter.Peek()
+	iter.Peek()
+	if iter.Count() != 0 {
+		t.Fatalf("Peek should not increment Count; got %d", iter.Count())
+	}
+}
+
+// ---------- ExpectNextValue --------------------------------------------------
+
+func TestLookaheadIterator_ExpectNextValue_Match(t *testing.T) {
+	seq := []int{5, 10, 15}
+	idx := 0
+	iter := NewLookaheadIterator(func() int {
+		v := seq[idx]
+		idx++
+		return v
+	})
+
+	eq := func(a, b int) bool { return a == b }
+
+	result := iter.ExpectNextValue(5, eq)
+	if result == nil {
+		t.Fatal("expected match, got nil")
+	}
+	if *result != 5 {
+		t.Fatalf("matched value = %d, want 5", *result)
+	}
+	if iter.Count() != 1 {
+		t.Fatalf("Count() = %d, want 1 (match should consume)", iter.Count())
+	}
+}
+
+func TestLookaheadIterator_ExpectNextValue_NoMatch(t *testing.T) {
+	seq := []int{5, 10}
+	idx := 0
+	iter := NewLookaheadIterator(func() int {
+		v := seq[idx]
+		idx++
+		return v
+	})
+
+	eq := func(a, b int) bool { return a == b }
+
+	result := iter.ExpectNextValue(999, eq)
+	if result != nil {
+		t.Fatalf("expected nil for non-match, got %d", *result)
+	}
+	if iter.Count() != 0 {
+		t.Fatalf("Count() = %d, want 0 (non-match should not consume)", iter.Count())
+	}
+
+	// The element should still be available.
+	if got := iter.Next(); got != 5 {
+		t.Fatalf("Next() after failed expect = %d, want 5", got)
+	}
+}
+
+func TestLookaheadIterator_ExpectNextValue_ConsecutiveMatches(t *testing.T) {
+	seq := []int{1, 2, 3}
+	idx := 0
+	iter := NewLookaheadIterator(func() int {
+		v := seq[idx]
+		idx++
+		return v
+	})
+
+	eq := func(a, b int) bool { return a == b }
+
+	for i, expected := range seq {
+		result := iter.ExpectNextValue(expected, eq)
+		if result == nil {
+			t.Fatalf("step %d: expected match for %d, got nil", i, expected)
+		}
+		if *result != expected {
+			t.Fatalf("step %d: got %d, want %d", i, *result, expected)
+		}
+	}
+	if iter.Count() != 3 {
+		t.Fatalf("Count() = %d, want 3", iter.Count())
+	}
+}
+
+func TestLookaheadIterator_ExpectNextValue_FailThenSucceed(t *testing.T) {
+	seq := []int{1, 2}
+	idx := 0
+	iter := NewLookaheadIterator(func() int {
+		v := seq[idx]
+		idx++
+		return v
+	})
+
+	eq := func(a, b int) bool { return a == b }
+
+	// Fail: looking for 2, but next is 1.
+	if r := iter.ExpectNextValue(2, eq); r != nil {
+		t.Fatalf("expected nil, got %d", *r)
+	}
+	// Succeed: looking for 1, and next is 1.
+	if r := iter.ExpectNextValue(1, eq); r == nil {
+		t.Fatal("expected match for 1, got nil")
+	}
+}
+
+// ---------- ExpectNextMatches ------------------------------------------------
+
+func TestLookaheadIterator_ExpectNextMatches_PredicateTrue(t *testing.T) {
+	iter := NewLookaheadIterator(func() int { return 42 })
+
+	result := iter.ExpectNextMatches(func(v int) bool { return v > 0 })
+	if result == nil {
+		t.Fatal("expected match, got nil")
+	}
+	if *result != 42 {
+		t.Fatalf("matched = %d, want 42", *result)
+	}
+	if iter.Count() != 1 {
+		t.Fatalf("Count() = %d, want 1", iter.Count())
+	}
+}
+
+func TestLookaheadIterator_ExpectNextMatches_PredicateFalse(t *testing.T) {
+	iter := NewLookaheadIterator(func() int { return 42 })
+
+	result := iter.ExpectNextMatches(func(v int) bool { return v < 0 })
+	if result != nil {
+		t.Fatalf("expected nil, got %d", *result)
+	}
+	if iter.Count() != 0 {
+		t.Fatalf("Count() = %d, want 0 (no consume on mismatch)", iter.Count())
+	}
+}
+
+func TestLookaheadIterator_ExpectNextMatches_PredicateCalledOnce(t *testing.T) {
+	iter := NewLookaheadIterator(func() int { return 1 })
+	calls := 0
+	iter.ExpectNextMatches(func(v int) bool {
+		calls++
+		return false
+	})
+	if calls != 1 {
+		t.Fatalf("predicate called %d times, want 1", calls)
+	}
+}
+
+func TestLookaheadIterator_ExpectNextMatches_DoesNotConsumeOnMismatch(t *testing.T) {
+	seq := []string{"hello", "world"}
+	idx := 0
+	iter := NewLookaheadIterator(func() string {
+		v := seq[idx]
+		idx++
+		return v
+	})
+
+	// Mismatch.
+	result := iter.ExpectNextMatches(func(v string) bool { return v == "world" })
+	if result != nil {
+		t.Fatalf("expected nil, got %q", *result)
+	}
+
+	// "hello" should still be there.
+	got := iter.Next()
+	if got != "hello" {
+		t.Fatalf("Next() = %q, want 'hello' (should not have been consumed)", got)
+	}
+}
+
+// ---------- Generic type support (strings) -----------------------------------
+
+func TestLookaheadIterator_WithStrings(t *testing.T) {
+	words := []string{"SELECT", "FROM", "WHERE"}
+	idx := 0
+	iter := NewLookaheadIterator(func() string {
+		v := words[idx]
+		idx++
+		return v
+	})
+
+	if got := iter.Peek(); got != "SELECT" {
+		t.Fatalf("Peek() = %q, want 'SELECT'", got)
+	}
+	if got := iter.Next(); got != "SELECT" {
+		t.Fatalf("Next() = %q, want 'SELECT'", got)
+	}
+	if got := iter.Next(); got != "FROM" {
+		t.Fatalf("Next() = %q, want 'FROM'", got)
+	}
+	if got := iter.Peek(); got != "WHERE" {
+		t.Fatalf("Peek() = %q, want 'WHERE'", got)
+	}
+	if got := iter.Next(); got != "WHERE" {
+		t.Fatalf("Next() = %q, want 'WHERE'", got)
+	}
+	if iter.Count() != 3 {
+		t.Fatalf("Count() = %d, want 3", iter.Count())
+	}
+}
+
+// ---------- Integration: LookaheadIterator wrapping the Lexer ----------------
+
+func TestLookaheadIterator_WithLexer(t *testing.T) {
+	l := NewLexer("SELECT * FROM t;")
+	iter := NewLookaheadIterator(func() Token {
+		tok, _ := l.NextToken()
+		return tok
+	})
+
+	// Peek should give SELECT.
+	peeked := iter.Peek()
+	if peeked.Type != TOKEN_SELECT {
+		t.Fatalf("Peek() type = %v, want SELECT", peeked.Type)
+	}
+
+	// Next should consume the same SELECT.
+	got := iter.Next()
+	if got.Type != TOKEN_SELECT {
+		t.Fatalf("Next() type = %v, want SELECT", got.Type)
+	}
+
+	// Next → STAR.
+	got = iter.Next()
+	if got.Type != TOKEN_STAR {
+		t.Fatalf("Next() type = %v, want STAR", got.Type)
+	}
+
+	// Peek → FROM.
+	peeked = iter.Peek()
+	if peeked.Type != TOKEN_FROM {
+		t.Fatalf("Peek() type = %v, want FROM", peeked.Type)
+	}
+
+	// ExpectNextValue should match FROM.
+	eq := func(a, b Token) bool { return a.Type == b.Type }
+	result := iter.ExpectNextValue(Token{Type: TOKEN_FROM}, eq)
+	if result == nil {
+		t.Fatal("expected FROM to match, got nil")
+	}
+	if result.Literal != "FROM" {
+		t.Fatalf("matched literal = %q, want 'FROM'", result.Literal)
+	}
+
+	// ExpectNextMatches for an identifier.
+	result = iter.ExpectNextMatches(func(tok Token) bool {
+		return tok.Type == TOKEN_IDENT
+	})
+	if result == nil {
+		t.Fatal("expected IDENT match, got nil")
+	}
+	if result.Literal != "t" {
+		t.Fatalf("matched literal = %q, want 't'", result.Literal)
+	}
+
+	// SEMICOLON.
+	got = iter.Next()
+	if got.Type != TOKEN_SEMICOLON {
+		t.Fatalf("Next() type = %v, want SEMICOLON", got.Type)
+	}
+
+	// EOF.
+	got = iter.Next()
+	if got.Type != TOKEN_EOF {
+		t.Fatalf("Next() type = %v, want EOF", got.Type)
+	}
+
+	if iter.Count() != 6 {
+		t.Fatalf("Count() = %d, want 6", iter.Count())
+	}
+}
+
+func TestLookaheadIterator_ExpectNextValue_NoMatchDoesNotAdvanceLexer(t *testing.T) {
+	l := NewLexer("SELECT FROM")
+	iter := NewLookaheadIterator(func() Token {
+		tok, _ := l.NextToken()
+		return tok
+	})
+
+	eq := func(a, b Token) bool { return a.Type == b.Type }
+
+	// Try to match FROM, but next is SELECT — should fail.
+	result := iter.ExpectNextValue(Token{Type: TOKEN_FROM}, eq)
+	if result != nil {
+		t.Fatal("expected nil, got a match")
+	}
+
+	// SELECT should still be the next token.
+	got := iter.Next()
+	if got.Type != TOKEN_SELECT {
+		t.Fatalf("Next() after failed expect = %v, want SELECT", got.Type)
+	}
+}
+
+// ---------- Edge: struct types with LookaheadIterator ------------------------
+
+type testPair struct {
+	key   string
+	value int
+}
+
+func TestLookaheadIterator_WithStructs(t *testing.T) {
+	pairs := []testPair{
+		{"a", 1},
+		{"b", 2},
+		{"c", 3},
+	}
+	idx := 0
+	iter := NewLookaheadIterator(func() testPair {
+		v := pairs[idx]
+		idx++
+		return v
+	})
+
+	// Peek.
+	peeked := iter.Peek()
+	if peeked.key != "a" || peeked.value != 1 {
+		t.Fatalf("Peek() = %+v, want {a 1}", peeked)
+	}
+
+	// ExpectNextMatches with struct field check.
+	result := iter.ExpectNextMatches(func(p testPair) bool {
+		return p.key == "a"
+	})
+	if result == nil {
+		t.Fatal("expected match, got nil")
+	}
+
+	// Next.
+	got := iter.Next()
+	if got.key != "b" {
+		t.Fatalf("Next() = %+v, want key='b'", got)
+	}
+
+	if iter.Count() != 2 {
+		t.Fatalf("Count() = %d, want 2", iter.Count())
+	}
+}
diff --git a/internal/sql/lexer/tokens_test.go b/internal/sql/lexer/tokens_test.go
new file mode 100644
index 0000000..cadb4fe
--- /dev/null
+++ b/internal/sql/lexer/tokens_test.go
@@ -0,0 +1,149 @@
+package lexer
+
+import (
+	"fmt"
+	"testing"
+)
+
+// ---------- TokenType.String() -----------------------------------------------
+
+func TestTokenType_String_KnownTypes(t *testing.T) {
+	// Every entry in the tokenNames map should be returned by String().
+	for tt, name := range tokenNames {
+		t.Run(name, func(t *testing.T) {
+			got := tt.String()
+			if got != name {
+				t.Errorf("TokenType(%d).String() = %q, want %q", int(tt), got, name)
+			}
+		})
+	}
+}
+
+func TestTokenType_String_UnknownType(t *testing.T) {
+	unknown := TokenType(9999)
+	got := unknown.String()
+	want := fmt.Sprintf("TokenType(%d)", 9999)
+	if got != want {
+		t.Errorf("got %q, want %q", got, want)
+	}
+}
+
+func TestTokenType_String_AllTokenTypesHaveNames(t *testing.T) {
+	// Walk through the iota range to ensure no gaps in the tokenNames map.
+	// This uses the fact that all token types are contiguous iota values
+	// from TOKEN_EOF (0) to TOKEN_SEMICOLON.
+	allTokenTypes := []TokenType{
+		TOKEN_EOF, TOKEN_ILLEGAL,
+		TOKEN_IDENT, TOKEN_INTEGER, TOKEN_FLOAT, TOKEN_STRING,
+		TOKEN_CREATE, TOKEN_DATABASE, TOKEN_USE, TOKEN_DROP, TOKEN_IF,
+		TOKEN_EXISTS, TOKEN_TABLE, TOKEN_ALTER, TOKEN_ADD, TOKEN_COLUMN,
+		TOKEN_MODIFY, TOKEN_RENAME, TOKEN_TO,
+		TOKEN_SELECT, TOKEN_DISTINCT, TOKEN_ALL, TOKEN_FROM, TOKEN_WHERE,
+		TOKEN_AS, TOKEN_INSERT, TOKEN_INTO, TOKEN_VALUES, TOKEN_UPDATE,
+		TOKEN_SET, TOKEN_DELETE,
+		TOKEN_JOIN, TOKEN_INNER, TOKEN_LEFT, TOKEN_RIGHT, TOKEN_FULL,
+		TOKEN_OUTER, TOKEN_CROSS, TOKEN_ON,
+		TOKEN_GROUP, TOKEN_BY, TOKEN_HAVING, TOKEN_ORDER, TOKEN_ASC,
+		TOKEN_DESC, TOKEN_LIMIT, TOKEN_OFFSET,
+		TOKEN_PRIMARY, TOKEN_KEY, TOKEN_NOT, TOKEN_NULL, TOKEN_DEFAULT,
+		TOKEN_UNIQUE, TOKEN_REFERENCES,
+		TOKEN_AND, TOKEN_OR, TOKEN_TRUE, TOKEN_FALSE, TOKEN_LIKE,
+		TOKEN_IS, TOKEN_IN, TOKEN_BETWEEN,
+		TOKEN_INT, TOKEN_BIGINT, TOKEN_VARCHAR, TOKEN_BOOLEAN, TOKEN_TEXT,
+		TOKEN_TIMESTAMP,
+		TOKEN_EQ, TOKEN_NEQ, TOKEN_LT, TOKEN_GT, TOKEN_LTE, TOKEN_GTE,
+		TOKEN_PLUS, TOKEN_MINUS, TOKEN_STAR, TOKEN_SLASH, TOKEN_PERCENT,
+		TOKEN_LPAREN, TOKEN_RPAREN, TOKEN_COMMA, TOKEN_DOT, TOKEN_SEMICOLON,
+	}
+	for _, tt := range allTokenTypes {
+		name := tt.String()
+		// The fallback is "TokenType(<int>)". If we see that, the map is incomplete.
+		if name == fmt.Sprintf("TokenType(%d)", int(tt)) {
+			t.Errorf("TokenType %d has no human-readable name in tokenNames", int(tt))
+		}
+	}
+}
+
+// ---------- Token.String() ---------------------------------------------------
+
+func TestToken_String(t *testing.T) {
+	tests := []struct {
+		token Token
+		want  string
+	}{
+		{
+			Token{Type: TOKEN_SELECT, Literal: "SELECT", Line: 1, Col: 1},
+			`Token{SELECT       "SELECT"  1:1}`,
+		},
+		{
+			Token{Type: TOKEN_INTEGER, Literal: "42", Line: 3, Col: 15},
+			`Token{INTEGER      "42"  3:15}`,
+		},
+		{
+			Token{Type: TOKEN_STRING, Literal: "hello", Line: 1, Col: 10},
+			`Token{STRING       "hello"  1:10}`,
+		},
+		{
+			Token{Type: TOKEN_EOF, Literal: "", Line: 5, Col: 1},
+			`Token{EOF          ""  5:1}`,
+		},
+		{
+			Token{Type: TOKEN_ILLEGAL, Literal: "@", Line: 1, Col: 1},
+			`Token{ILLEGAL      "@"  1:1}`,
+		},
+	}
+	for _, tc := range tests {
+		t.Run(tc.want, func(t *testing.T) {
+			got := tc.token.String()
+			if got != tc.want {
+				t.Errorf("got  %q\nwant %q", got, tc.want)
+			}
+		})
+	}
+}
+
+// ---------- lookupIdent (keywords.go) ----------------------------------------
+
+func TestLookupIdent_ReturnsKeywordType(t *testing.T) {
+	for word, expected := range keywords {
+		got := lookupIdent(word)
+		if got != expected {
+			t.Errorf("lookupIdent(%q) = %v, want %v", word, got, expected)
+		}
+	}
+}
+
+func TestLookupIdent_CaseInsensitive(t *testing.T) {
+	tests := []struct {
+		input string
+		want  TokenType
+	}{
+		{"select", TOKEN_SELECT},
+		{"SELECT", TOKEN_SELECT},
+		{"SeLeCt", TOKEN_SELECT},
+		{"from", TOKEN_FROM},
+		{"FROM", TOKEN_FROM},
+		{"fRoM", TOKEN_FROM},
+	}
+	for _, tc := range tests {
+		t.Run(tc.input, func(t *testing.T) {
+			got := lookupIdent(tc.input)
+			if got != tc.want {
+				t.Errorf("lookupIdent(%q) = %v, want %v", tc.input, got, tc.want)
+			}
+		})
+	}
+}
+
+func TestLookupIdent_ReturnsIdentForNonKeywords(t *testing.T) {
+	nonKeywords := []string{
+		"foo", "bar", "my_table", "userId", "x", "_private",
+		"selection", "fromage", "orderly", "deleteme",
+	}
+	for _, word := range nonKeywords {
+		got := lookupIdent(word)
+		if got != TOKEN_IDENT {
+			t.Errorf("lookupIdent(%q) = %v, want TOKEN_IDENT", word, got)
+		}
+	}
+}

From 0abd42ab7181e434a6c7482c0afee0f5a4271f7b Mon Sep 17 00:00:00 2001
From: rahulc0dy <rc645312@gmail.com>
Date: Mon, 8 Jun 2026 12:11:16 +0530
Subject: [PATCH 6/6] Escape linting for token variable names

---
 internal/sql/lexer/tokens.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/internal/sql/lexer/tokens.go b/internal/sql/lexer/tokens.go
index 1ca8ef9..b80679b 100644
--- a/internal/sql/lexer/tokens.go
+++ b/internal/sql/lexer/tokens.go
@@ -7,6 +7,7 @@ import "fmt"
 // TokenType constant.
 type TokenType int
 
+//nolint:revive // We prefer ALL_CAPS for token constants
 const (
 	// Special
 	TOKEN_EOF     TokenType = iota // end of input; always the last token