From e03a0b572be426522b2dfabc550b49438eea2ac3 Mon Sep 17 00:00:00 2001
From: Matt Sherman <mwsherman@gmail.com>
Date: Sun, 15 Feb 2026 13:50:25 -0500
Subject: [PATCH 01/15] implement 8-bit

---
 graphemes/ansi.go      | 76 ++++++++++++++++-----------------
 graphemes/ansi_test.go | 95 ++++++++++++++++++++----------------------
 graphemes/iterator.go  | 17 ++++----
 3 files changed, 91 insertions(+), 97 deletions(-)

diff --git a/graphemes/ansi.go b/graphemes/ansi.go
index 3a038db..8aa9b96 100644
--- a/graphemes/ansi.go
+++ b/graphemes/ansi.go
@@ -3,23 +3,27 @@ package graphemes
 // ansiEscapeLength returns the byte length of a valid ANSI escape/control
 // sequence at the start of data, or 0 if none.
 //
-// Input is UTF-8. This recognizes both:
+// This recognizes both:
 //   - 7-bit representations (ESC + final/intermediate bytes), and
-//   - UTF-8 encodings of 8-bit C1 controls (U+0080..U+009F => 0xC2 0x80..0x9F).
+//   - 8-bit C1 controls (raw bytes 0x80..0x9F per ECMA-48).
 //
 // Recognized forms (ECMA-48 / ISO 6429):
-//   - CSI: ESC [ then parameter bytes (0x30–0x3F), intermediate (0x20–0x2F), final (0x40–0x7E)
-//   - OSC: ESC ] then payload until ST (ESC \) or BEL (0x07)
-//   - DCS, SOS, PM, APC: ESC P / X / ^ / _ then payload until ST (ESC \)
+//   - CSI: ESC [ (or 0x9B) then parameter bytes (0x30–0x3F), intermediate (0x20–0x2F), final (0x40–0x7E)
+//   - OSC: ESC ] (or 0x9D) then payload until ST, BEL (0x07), CAN (0x18), or SUB (0x1A)
+//   - DCS, SOS, PM, APC: ESC P/X/^/_ (or 0x90/0x98/0x9E/0x9F) then payload until ST, CAN, or SUB
 //   - Two-byte: ESC + Fe/Fs (0x40–0x7E excluding above), or Fp (0x30–0x3F), or nF (0x20–0x2F then final)
+//   - Standalone C1 controls (0x80..0x9F not listed above): single byte
 func ansiEscapeLength[T ~string | ~[]byte](data T) int {
 	n := len(data)
-	if n < 2 {
+	if n == 0 {
 		return 0
 	}
 
 	switch data[0] {
 	case esc:
+		if n < 2 {
+			return 0
+		}
 		b1 := data[1]
 		switch b1 {
 		case '[': // CSI
@@ -61,34 +65,31 @@ func ansiEscapeLength[T ~string | ~[]byte](data T) int {
 			return 0
 		}
 
-	case c1UTF8Lead:
-		b1 := data[1]
-		if b1 < 0x80 || b1 > 0x9F {
+	case 0x9B: // C1 CSI
+		body := csiLength(data[1:])
+		if body == 0 {
 			return 0
 		}
+		return 1 + body
 
-		switch b1 {
-		case 0x9B: // CSI
-			body := csiLength(data[2:])
-			if body == 0 {
-				return 0
-			}
-			return 2 + body
-		case 0x9D: // OSC – allows BEL or ST as terminator
-			body := oscLength(data[2:])
-			if body < 0 {
-				return 0
-			}
-			return 2 + body
-		case 0x90, 0x98, 0x9E, 0x9F: // DCS, SOS, PM, APC – require ST only
-			body := stSequenceLength(data[2:])
-			if body < 0 {
-				return 0
-			}
-			return 2 + body
-		default:
-			// Any other C1 control (UTF-8 encoded) is one control sequence token.
-			return 2
+	case 0x9D: // C1 OSC
+		body := oscLength(data[1:])
+		if body < 0 {
+			return 0
+		}
+		return 1 + body
+
+	case 0x90, 0x98, 0x9E, 0x9F: // C1 DCS, SOS, PM, APC
+		body := stSequenceLength(data[1:])
+		if body < 0 {
+			return 0
+		}
+		return 1 + body
+
+	default:
+		if data[0] >= 0x80 && data[0] <= 0x9F {
+			// Any other C1 control is a single-byte sequence.
+			return 1
 		}
 	}
 
@@ -132,12 +133,13 @@ func csiLength[T ~string | ~[]byte](data T) int {
 //   - -1: not terminated in the provided data
 //
 // OSC accepts BEL (0x07) or ST as terminator by widespread convention.
+// ST may be 7-bit (ESC \) or C1 (0x9C).
 // Per ECMA-48, CAN (0x18) and SUB (0x1A) cancel the control string; in that
 // case they are not part of the OSC sequence length.
 func oscLength[T ~string | ~[]byte](data T) int {
 	for i := 0; i < len(data); i++ {
 		b := data[i]
-		if b == bel {
+		if b == bel || b == st {
 			return i + 1
 		}
 		if b == can || b == sub {
@@ -146,9 +148,6 @@ func oscLength[T ~string | ~[]byte](data T) int {
 		if b == esc && i+1 < len(data) && data[i+1] == '\\' {
 			return i + 2
 		}
-		if b == c1UTF8Lead && i+1 < len(data) && data[i+1] == 0x9C {
-			return i + 2
-		}
 	}
 	return -1
 }
@@ -161,6 +160,7 @@ func oscLength[T ~string | ~[]byte](data T) int {
 //   - -1: not terminated in the provided data
 //
 // Used for DCS, SOS, PM, and APC, which per ECMA-48 terminate with ST.
+// ST may be 7-bit (ESC \) or C1 (0x9C).
 // CAN (0x18) and SUB (0x1A) cancel the control string; in that case they are
 // not part of the sequence length.
 func stSequenceLength[T ~string | ~[]byte](data T) int {
@@ -168,10 +168,10 @@ func stSequenceLength[T ~string | ~[]byte](data T) int {
 		if data[i] == can || data[i] == sub {
 			return i
 		}
-		if data[i] == esc && i+1 < len(data) && data[i+1] == '\\' {
-			return i + 2
+		if data[i] == st {
+			return i + 1
 		}
-		if data[i] == c1UTF8Lead && i+1 < len(data) && data[i+1] == 0x9C {
+		if data[i] == esc && i+1 < len(data) && data[i+1] == '\\' {
 			return i + 2
 		}
 	}
diff --git a/graphemes/ansi_test.go b/graphemes/ansi_test.go
index 32b37f1..6d7b434 100644
--- a/graphemes/ansi_test.go
+++ b/graphemes/ansi_test.go
@@ -158,54 +158,49 @@ func TestAnsiEscapeSequencesAsGraphemes(t *testing.T) {
 			expected: []string{"\x1b[m", "x"},
 		},
 		{
-			name:     "UTF-8 C1 CSI then text",
-			input:    "\xC2\x9B31mhello",
-			expected: []string{"\xC2\x9B31m", "h", "e", "l", "l", "o"},
+			name:     "C1 CSI then text",
+			input:    "\x9B31mhello",
+			expected: []string{"\x9B31m", "h", "e", "l", "l", "o"},
 		},
 		{
-			name:     "UTF-8 C1 OSC with UTF-8 C1 ST terminator",
-			input:    "\xC2\x9D0;Title\xC2\x9C",
-			expected: []string{"\xC2\x9D0;Title\xC2\x9C"},
+			name:     "C1 OSC with C1 ST terminator",
+			input:    "\x9D0;Title\x9C",
+			expected: []string{"\x9D0;Title\x9C"},
 		},
 		{
-			name:     "UTF-8 C1 OSC with 7-bit ST terminator",
-			input:    "\xC2\x9D0;Title\x1b\\",
-			expected: []string{"\xC2\x9D0;Title\x1b\\"},
+			name:     "C1 OSC with 7-bit ST terminator",
+			input:    "\x9D0;Title\x1b\\",
+			expected: []string{"\x9D0;Title\x1b\\"},
 		},
 		{
-			name:     "7-bit OSC with UTF-8 C1 ST terminator",
-			input:    "\x1b]0;Title\xC2\x9C",
-			expected: []string{"\x1b]0;Title\xC2\x9C"},
+			name:     "7-bit OSC with C1 ST terminator",
+			input:    "\x1b]0;Title\x9C",
+			expected: []string{"\x1b]0;Title\x9C"},
 		},
 		{
-			name:     "UTF-8 C1 DCS with UTF-8 C1 ST terminator",
-			input:    "\xC2\x90qpayload\xC2\x9C",
-			expected: []string{"\xC2\x90qpayload\xC2\x9C"},
+			name:     "C1 DCS with C1 ST terminator",
+			input:    "\x90qpayload\x9C",
+			expected: []string{"\x90qpayload\x9C"},
 		},
 		{
-			name:     "UTF-8 C1 DCS canceled by CAN",
-			input:    "\xC2\x90qpayload\x18x",
-			expected: []string{"\xC2\x90qpayload", "\x18", "x"},
+			name:     "C1 DCS canceled by CAN",
+			input:    "\x90qpayload\x18x",
+			expected: []string{"\x90qpayload", "\x18", "x"},
 		},
 		{
-			name:     "UTF-8 C1 DCS with 7-bit ST terminator",
-			input:    "\xC2\x90qpayload\x1b\\",
-			expected: []string{"\xC2\x90qpayload\x1b\\"},
+			name:     "C1 DCS with 7-bit ST terminator",
+			input:    "\x90qpayload\x1b\\",
+			expected: []string{"\x90qpayload\x1b\\"},
 		},
 		{
-			name:     "7-bit DCS with UTF-8 C1 ST terminator",
-			input:    "\x1bPqpayload\xC2\x9C",
-			expected: []string{"\x1bPqpayload\xC2\x9C"},
+			name:     "7-bit DCS with C1 ST terminator",
+			input:    "\x1bPqpayload\x9C",
+			expected: []string{"\x1bPqpayload\x9C"},
 		},
 		{
-			name:     "UTF-8 C1 Fe IND control",
-			input:    "\xC2\x84",
-			expected: []string{"\xC2\x84"},
-		},
-		{
-			name:     "UTF-8 C1 lead byte for non-C1 codepoint is not ANSI",
-			input:    "\u00A9",
-			expected: []string{"\u00A9"},
+			name:     "C1 Fe IND control",
+			input:    "\x84",
+			expected: []string{"\x84"},
 		},
 		{
 			name:     "nF malformed: no final byte",
@@ -228,29 +223,29 @@ func TestAnsiEscapeSequencesAsGraphemes(t *testing.T) {
 			expected: []string{"\x1b[0 q"},
 		},
 		{
-			name:     "UTF-8 C1 OSC unterminated",
-			input:    "\xC2\x9D0;title",
-			expected: []string{"\xC2\x9D", "0", ";", "t", "i", "t", "l", "e"},
+			name:     "C1 OSC unterminated",
+			input:    "\x9D0;title",
+			expected: []string{"\x9D", "0", ";", "t", "i", "t", "l", "e"},
 		},
 		{
-			name:     "UTF-8 C1 DCS unterminated",
-			input:    "\xC2\x90data",
-			expected: []string{"\xC2\x90", "d", "a", "t", "a"},
+			name:     "C1 DCS unterminated",
+			input:    "\x90data",
+			expected: []string{"\x90", "d", "a", "t", "a"},
 		},
 		{
-			name:     "UTF-8 C1 SOS with UTF-8 C1 ST terminator",
-			input:    "\xC2\x98hello\xC2\x9C",
-			expected: []string{"\xC2\x98hello\xC2\x9C"},
+			name:     "C1 SOS with C1 ST terminator",
+			input:    "\x98hello\x9C",
+			expected: []string{"\x98hello\x9C"},
 		},
 		{
-			name:     "UTF-8 C1 PM with 7-bit ST terminator",
-			input:    "\xC2\x9Emsg\x1b\\",
-			expected: []string{"\xC2\x9Emsg\x1b\\"},
+			name:     "C1 PM with 7-bit ST terminator",
+			input:    "\x9Emsg\x1b\\",
+			expected: []string{"\x9Emsg\x1b\\"},
 		},
 		{
-			name:     "UTF-8 C1 APC with UTF-8 C1 ST terminator",
-			input:    "\xC2\x9Fdata\xC2\x9C",
-			expected: []string{"\xC2\x9Fdata\xC2\x9C"},
+			name:     "C1 APC with C1 ST terminator",
+			input:    "\x9Fdata\x9C",
+			expected: []string{"\x9Fdata\x9C"},
 		},
 		{
 			name:     "single ESC byte",
@@ -258,9 +253,9 @@ func TestAnsiEscapeSequencesAsGraphemes(t *testing.T) {
 			expected: []string{"\x1b"},
 		},
 		{
-			name:     "single C1 lead byte (incomplete UTF-8)",
-			input:    "\xC2",
-			expected: []string{"\xC2"},
+			name:     "single C1 control byte",
+			input:    "\x84",
+			expected: []string{"\x84"},
 		},
 		{
 			name:     "SOS canceled by CAN",
diff --git a/graphemes/iterator.go b/graphemes/iterator.go
index a734657..8494296 100644
--- a/graphemes/iterator.go
+++ b/graphemes/iterator.go
@@ -38,12 +38,12 @@ var (
 )
 
 const (
-	esc        = 0x1B
-	cr         = 0x0D
-	bel        = 0x07
-	can        = 0x18
-	sub        = 0x1A
-	c1UTF8Lead = 0xC2
+	esc = 0x1B
+	cr  = 0x0D
+	bel = 0x07
+	can = 0x18
+	sub = 0x1A
+	st  = 0x9C // C1 String Terminator
 )
 
 // Next advances the iterator to the next grapheme cluster.
@@ -54,7 +54,8 @@ func (iter *Iterator[T]) Next() bool {
 	}
 	iter.start = iter.pos
 
-	if iter.AnsiEscapeSequences && (iter.data[iter.pos] == esc || iter.data[iter.pos] == c1UTF8Lead) {
+	b := iter.data[iter.pos]
+	if iter.AnsiEscapeSequences && (b == esc || (b >= 0x80 && b <= 0x9F)) {
 		if a := ansiEscapeLength(iter.data[iter.pos:]); a > 0 {
 			iter.pos += a
 			return true
@@ -62,8 +63,6 @@ func (iter *Iterator[T]) Next() bool {
 	}
 
 	// ASCII hot path: any ASCII is one grapheme when next byte is ASCII or end.
-	// Fall through on CR so splitfunc can handle CR+LF as a single cluster.
-	b := iter.data[iter.pos]
 	if b < utf8.RuneSelf && b != cr {
 		if iter.pos+1 >= len(iter.data) || iter.data[iter.pos+1] < utf8.RuneSelf {
 			iter.pos++

From b8f2c25fff652fe6124b54da05d0f99c888da478 Mon Sep 17 00:00:00 2001
From: Matt Sherman <mwsherman@gmail.com>
Date: Sun, 15 Feb 2026 13:59:34 -0500
Subject: [PATCH 02/15] add comparative tests

---
 graphemes/comparative/comparative_test.go | 125 ++++++++++++++++++++++
 graphemes/comparative/go.mod              |  10 +-
 graphemes/comparative/go.sum              |  10 ++
 3 files changed, 144 insertions(+), 1 deletion(-)

diff --git a/graphemes/comparative/comparative_test.go b/graphemes/comparative/comparative_test.go
index fa5ac61..9420081 100644
--- a/graphemes/comparative/comparative_test.go
+++ b/graphemes/comparative/comparative_test.go
@@ -1,9 +1,11 @@
 package comparative
 
 import (
+	"reflect"
 	"strings"
 	"testing"
 
+	"github.com/charmbracelet/x/ansi"
 	"github.com/clipperhouse/uax29/v2/graphemes"
 	"github.com/clipperhouse/uax29/v2/testdata"
 	"github.com/rivo/uniseg"
@@ -75,3 +77,126 @@ func BenchmarkGraphemesASCII(b *testing.B) {
 		}
 	})
 }
+
+// TestAnsiBoundaryAgreement verifies that our ANSI sequence parsing produces
+// the same token boundaries as charmbracelet/x/ansi's DecodeSequence.
+// Inputs use ASCII text between sequences so grapheme clustering differences
+// don't obscure ANSI boundary comparison.
+func TestAnsiBoundaryAgreement(t *testing.T) {
+	tests := []struct {
+		name  string
+		input string
+	}{
+		// 7-bit CSI
+		{"SGR reset", "\x1b[0m"},
+		{"SGR color then text then reset", "\x1b[31mhello\x1b[0m"},
+		{"CSI bold+color", "\x1b[1;32m"},
+		{"CSI cursor position", "\x1b[10;20H"},
+
+		// 7-bit OSC
+		{"OSC title with BEL", "\x1b]0;My Title\x07"},
+		{"OSC title with ST", "\x1b]0;Title\x1b\\"},
+
+		// 7-bit DCS/SOS/PM/APC
+		{"DCS with ST", "\x1bPq#0;2;0;0;0\x1b\\"},
+		{"SOS with ST", "\x1bXhello\x1b\\"},
+		{"PM with ST", "\x1b^msg\x1b\\"},
+		{"APC with ST", "\x1b_data\x1b\\"},
+
+		// Two-byte Fe/Fs/Fp
+		{"Fe IND", "\x1bD"},
+		{"Fs RIS", "\x1bc"},
+		{"Fp DECSC", "\x1b7"},
+
+		// C1 8-bit
+		{"C1 CSI then text", "\x9B31mhello"},
+		{"C1 OSC with C1 ST", "\x9D0;Title\x9C"},
+		{"C1 OSC with 7-bit ST", "\x9D0;Title\x1b\\"},
+		{"C1 DCS with 7-bit ST", "\x90qpayload\x1b\\"},
+		{"C1 DCS with C1 ST", "\x90qpayload\x9C"},
+		{"C1 SOS with C1 ST", "\x98hello\x9C"},
+		{"C1 PM with 7-bit ST", "\x9Emsg\x1b\\"},
+		{"C1 APC with C1 ST", "\x9Fdata\x9C"},
+
+		// CSI variants (from charmbracelet test suite)
+		{"CSI private mode", "\x1b[?1049h"},
+		{"CSI subparams (colons)", "\x1b[38:2:255:0:255;1m"},
+		{"CSI with intermediate", "\x1b[0 q"},
+		{"CSI no params", "\x1b[m"},
+		{"CSI mouse click", "\x1b[<0;1;1M"},
+		{"CSI mouse wheel", "\x1b[<64;2;11m"},
+		{"CSI bracketed paste on", "\x1b[?2004h"},
+		{"CSI bracketed paste content", "\x1b[200~pasted text\x1b[201~"},
+
+		// SS3 / SS2 (Single Shift)
+		{"SS3 7-bit", "\x1bOA"},
+		{"SS3 8-bit", "\x8fA"},
+		{"SS2 7-bit", "\x1bNA"},
+		{"SS2 8-bit", "\x8eA"},
+
+		// nF sequences
+		{"nF charset G0", "\x1b(A"},
+		{"nF charset G0 then text", "\x1b(Btext"},
+
+		// DCS with params
+		{"DCS with params and C1 ST", "\x1bP0;1|17/ab\x9c"},
+		{"C1 DCS with params and C1 ST", "\x90?123;456+q\x9c"},
+
+		// APC payload (Kitty graphics protocol)
+		{"APC kitty graphics", "\x1b_Gf=24,s=10,v=20,o=z;aGVsbG8gd29ybGQ=\x1b\\"},
+
+		// C1 CSI with multiple params
+		{"C1 CSI multiple params", "\x9B1;2;3m"},
+
+		// Mixed 7-bit and C1
+		{"mixed 7-bit and C1", "\x1b[1m\x9B31mhello\x1b[0m"},
+
+		// Concatenated sequences
+		{"concatenated CSI+OSC", "\x1b[1;2;3m\x1b]2;Terminal\x07"},
+		{"OSC then CSI", "\x1b]0;Title\x07\x1b[31mred\x1b[0m"},
+
+		// Text around sequences
+		{"text around SGR", "hello, \x1b[1;2;3mworld\x1b[0m!"},
+
+		// Realistic colored output
+		{"colored ls", "\x1b[1;34mDocuments\x1b[0m  \x1b[0;32mbuild.sh\x1b[0m"},
+
+		// Plain text (no ANSI)
+		{"plain ASCII", "hello world"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ours := uax29Tokens(tt.input)
+			theirs := charmTokens(tt.input)
+			if !reflect.DeepEqual(ours, theirs) {
+				t.Errorf("boundary mismatch\nours:   %q\ntheirs: %q", ours, theirs)
+			}
+		})
+	}
+}
+
+// uax29Tokens segments the input using our graphemes iterator with ANSI support.
+func uax29Tokens(input string) []string {
+	iter := graphemes.FromString(input)
+	iter.AnsiEscapeSequences = true
+	var tokens []string
+	for iter.Next() {
+		tokens = append(tokens, iter.Value())
+	}
+	return tokens
+}
+
+// charmTokens segments the input using charmbracelet/x/ansi's DecodeSequence.
+func charmTokens(input string) []string {
+	var state byte
+	remaining := input
+	var tokens []string
+	for len(remaining) > 0 {
+		seq, _, n, newState := ansi.DecodeSequence(remaining, state, nil)
+		tokens = append(tokens, seq)
+		state = newState
+		remaining = remaining[n:]
+	}
+	return tokens
+}
diff --git a/graphemes/comparative/go.mod b/graphemes/comparative/go.mod
index 570fcd1..24fdeec 100644
--- a/graphemes/comparative/go.mod
+++ b/graphemes/comparative/go.mod
@@ -1,10 +1,18 @@
 module github.com/clipperhouse/uax29/graphemes/comparative
 
-go 1.18
+go 1.24.2
 
 require (
 	github.com/clipperhouse/uax29/v2 v2.6.0
 	github.com/rivo/uniseg v0.4.7
 )
 
+require (
+	github.com/charmbracelet/x/ansi v0.11.6 // indirect
+	github.com/clipperhouse/displaywidth v0.9.0 // indirect
+	github.com/clipperhouse/stringish v0.1.1 // indirect
+	github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
+	github.com/mattn/go-runewidth v0.0.19 // indirect
+)
+
 replace github.com/clipperhouse/uax29/v2 => ../../
diff --git a/graphemes/comparative/go.sum b/graphemes/comparative/go.sum
index 9008848..df31c87 100644
--- a/graphemes/comparative/go.sum
+++ b/graphemes/comparative/go.sum
@@ -1,2 +1,12 @@
+github.com/charmbracelet/x/ansi v0.11.6 h1:GhV21SiDz/45W9AnV2R61xZMRri5NlLnl6CVF7ihZW8=
+github.com/charmbracelet/x/ansi v0.11.6/go.mod h1:2JNYLgQUsyqaiLovhU2Rv/pb8r6ydXKS3NIttu3VGZQ=
+github.com/clipperhouse/displaywidth v0.9.0 h1:Qb4KOhYwRiN3viMv1v/3cTBlz3AcAZX3+y9OLhMtAtA=
+github.com/clipperhouse/displaywidth v0.9.0/go.mod h1:aCAAqTlh4GIVkhQnJpbL0T/WfcrJXHcj8C0yjYcjOZA=
+github.com/clipperhouse/stringish v0.1.1 h1:+NSqMOr3GR6k1FdRhhnXrLfztGzuG+VuFDfatpWHKCs=
+github.com/clipperhouse/stringish v0.1.1/go.mod h1:v/WhFtE1q0ovMta2+m+UbpZ+2/HEXNWYXQgCt4hdOzA=
+github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag=
+github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
+github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw=
+github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
 github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
 github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=

From bc458abd57de23adbc0b120d632151efb32a7578 Mon Sep 17 00:00:00 2001
From: Matt Sherman <mwsherman@gmail.com>
Date: Sun, 15 Feb 2026 14:21:56 -0500
Subject: [PATCH 03/15] add benchmarks

---
 graphemes/comparative/comparative_test.go | 71 +++++++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/graphemes/comparative/comparative_test.go b/graphemes/comparative/comparative_test.go
index 9420081..512264d 100644
--- a/graphemes/comparative/comparative_test.go
+++ b/graphemes/comparative/comparative_test.go
@@ -176,6 +176,77 @@ func TestAnsiBoundaryAgreement(t *testing.T) {
 	}
 }
 
+// ansiSample builds a realistic ANSI-heavy string simulating colored terminal output.
+func ansiSample() string {
+	var b strings.Builder
+	colors := []string{
+		"\x1b[1;34m", // bold blue
+		"\x1b[0;32m", // green
+		"\x1b[0;36m", // cyan
+		"\x1b[1;31m", // bold red
+		"\x1b[33m",   // yellow
+	}
+	reset := "\x1b[0m"
+	lines := []string{
+		"drwxr-xr-x  5 user staff  160 Jan  1 12:00 Documents",
+		"drwxr-xr-x  3 user staff   96 Feb  2 09:30 Downloads",
+		"-rwxr-xr-x  1 user staff 8432 Mar 15 14:22 build.sh",
+		"lrwxr-xr-x  1 user staff   11 Apr 20 08:00 config",
+		"-rw-r--r--  1 user staff 1024 May  5 16:45 README.md",
+	}
+	for round := 0; round < 40; round++ {
+		for i, line := range lines {
+			color := colors[i%len(colors)]
+			if i%5 == 0 {
+				b.WriteString("\x1b]0;terminal - round ")
+				b.WriteString(string(rune('0' + round%10)))
+				b.WriteString("\x07")
+			}
+			b.WriteString(color)
+			b.WriteString(line)
+			b.WriteString(reset)
+			b.WriteString("\n")
+		}
+	}
+	return b.String()
+}
+
+func BenchmarkAnsiIteration(b *testing.B) {
+	input := ansiSample()
+	n := int64(len(input))
+
+	b.Run("clipperhouse/uax29", func(b *testing.B) {
+		b.SetBytes(n)
+		b.ReportAllocs()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			count := 0
+			g := graphemes.FromString(input)
+			g.AnsiEscapeSequences = true
+			for g.Next() {
+				count++
+			}
+		}
+	})
+
+	b.Run("charmbracelet/x/ansi", func(b *testing.B) {
+		b.SetBytes(n)
+		b.ReportAllocs()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			count := 0
+			var state byte
+			remaining := input
+			for len(remaining) > 0 {
+				_, _, advance, newState := ansi.DecodeSequence(remaining, state, nil)
+				state = newState
+				remaining = remaining[advance:]
+				count++
+			}
+		}
+	})
+}
+
 // uax29Tokens segments the input using our graphemes iterator with ANSI support.
 func uax29Tokens(input string) []string {
 	iter := graphemes.FromString(input)

From e611f515f5ab490e32911586920866b4153b3bdd Mon Sep 17 00:00:00 2001
From: Matt Sherman <mwsherman@gmail.com>
Date: Sun, 15 Feb 2026 14:31:34 -0500
Subject: [PATCH 04/15] comments

---
 graphemes/README.md   | 2 +-
 graphemes/iterator.go | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/graphemes/README.md b/graphemes/README.md
index ca146e0..cfd4540 100644
--- a/graphemes/README.md
+++ b/graphemes/README.md
@@ -86,7 +86,7 @@ for g.Next() {
 }
 ```
 
-We implement [ECMA-48](https://ecma-international.org/publications-and-standards/standards/ecma-48/) C0 and C1 control codes, 7-bit and 8-bit, in UTF-8 encoding.
+We implement [ECMA-48](https://ecma-international.org/publications-and-standards/standards/ecma-48/) C0 and C1 control codes, 7-bit and 8-bit. Note that 8-bit control codes are not UTF-8 encoded, and in fact are not valid UTF-8. Caveat emptor.
 
 ### Benchmarks
 
diff --git a/graphemes/iterator.go b/graphemes/iterator.go
index 8494296..3f18ade 100644
--- a/graphemes/iterator.go
+++ b/graphemes/iterator.go
@@ -27,8 +27,13 @@ type Iterator[T ~string | ~[]byte] struct {
 	data  T
 	pos   int
 	start int
-	// AnsiEscapeSequences treats ANSI escape sequences (ECMA-48) as single grapheme
-	// clusters when true. Default is false.
+	// AnsiEscapeSequences treats ANSI escape sequences (ECMA-48) as single
+	// grapheme clusters when true. The default is false.
+	//
+	// This option recognizes 7-bit and 8-bit control codes from ECMA-48. 8-bit
+	// control codes are not UTF-8 encoded, i.e. not valid UTF-8. If you
+	// choose this option, you are choosing to interpret non-UTF-8 data,
+	// caveat emptor.
 	AnsiEscapeSequences bool
 }
 

From 9fe5eb730f0254fd9486387a28f97fcc81a84401 Mon Sep 17 00:00:00 2001
From: Matt Sherman <mwsherman@gmail.com>
Date: Sun, 15 Feb 2026 14:41:24 -0500
Subject: [PATCH 05/15] Add some adversarial tests.

---
 graphemes/ansi_test.go | 72 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/graphemes/ansi_test.go b/graphemes/ansi_test.go
index 6d7b434..e288de8 100644
--- a/graphemes/ansi_test.go
+++ b/graphemes/ansi_test.go
@@ -257,6 +257,26 @@ func TestAnsiEscapeSequencesAsGraphemes(t *testing.T) {
 			input:    "\x84",
 			expected: []string{"\x84"},
 		},
+		{
+			name:     "UTF-8 cafe",
+			input:    "café",
+			expected: []string{"c", "a", "f", "é"},
+		},
+		{
+			name:     "UTF-8 Japanese text",
+			input:    "日本語",
+			expected: []string{"日", "本", "語"},
+		},
+		{
+			name:     "UTF-8 runes with continuation bytes in C1 range",
+			input:    "Āğל",
+			expected: []string{"Ā", "ğ", "ל"},
+		},
+		{
+			name:     "mixed ANSI and UTF-8 adversarial payload",
+			input:    "\x1b[31mĀğ日本語café\x1b[0m",
+			expected: []string{"\x1b[31m", "Ā", "ğ", "日", "本", "語", "c", "a", "f", "é", "\x1b[0m"},
+		},
 		{
 			name:     "SOS canceled by CAN",
 			input:    "\x1bXhello\x18z",
@@ -304,6 +324,58 @@ func TestAnsiEscapeSequencesAsGraphemes(t *testing.T) {
 	}
 }
 
+func TestAnsiEscapeSequencesPureUTF8Parity(t *testing.T) {
+	t.Parallel()
+
+	samples := []string{
+		"café",
+		"日本語",
+		"Āğל",
+		"A\u0301",
+		"👩🏽‍💻",
+		"Résumé — 東京 — 👍",
+	}
+
+	collectString := func(input string, ansi bool) []string {
+		iter := graphemes.FromString(input)
+		iter.AnsiEscapeSequences = ansi
+		var out []string
+		for iter.Next() {
+			out = append(out, iter.Value())
+		}
+		return out
+	}
+
+	collectBytes := func(input string, ansi bool) []string {
+		iter := graphemes.FromBytes([]byte(input))
+		iter.AnsiEscapeSequences = ansi
+		var out []string
+		for iter.Next() {
+			out = append(out, string(iter.Value()))
+		}
+		return out
+	}
+
+	for i, sample := range samples {
+		sample := sample
+		t.Run("sample-"+string(rune('A'+i)), func(t *testing.T) {
+			t.Parallel()
+
+			stringNoANSI := collectString(sample, false)
+			stringANSI := collectString(sample, true)
+			if !reflect.DeepEqual(stringNoANSI, stringANSI) {
+				t.Fatalf("string parity mismatch for %q\noff=%q\non=%q", sample, stringNoANSI, stringANSI)
+			}
+
+			bytesNoANSI := collectBytes(sample, false)
+			bytesANSI := collectBytes(sample, true)
+			if !reflect.DeepEqual(bytesNoANSI, bytesANSI) {
+				t.Fatalf("bytes parity mismatch for %q\noff=%q\non=%q", sample, bytesNoANSI, bytesANSI)
+			}
+		})
+	}
+}
+
 // ansiSample builds a string that mixes ANSI escape sequences with regular text,
 // simulating realistic terminal output (colored words, resets, bold, etc.).
 func ansiSample() string {

From 25998bcd4a8005242bc5666339d30fff8e79c008 Mon Sep 17 00:00:00 2001
From: Matt Sherman <mwsherman@gmail.com>
Date: Sun, 15 Feb 2026 15:46:52 -0500
Subject: [PATCH 06/15] Separate 7-bit and 8-bit options

---
 graphemes/README.md                       |  13 +-
 graphemes/ansi.go                         | 121 +++++---------
 graphemes/ansi8.go                        |  75 +++++++++
 graphemes/ansi_test.go                    | 183 ++++++++++++++++------
 graphemes/comparative/comparative_test.go |   2 +-
 graphemes/comparative/go.mod              |   2 +-
 graphemes/iterator.go                     |  23 ++-
 7 files changed, 278 insertions(+), 141 deletions(-)
 create mode 100644 graphemes/ansi8.go

diff --git a/graphemes/README.md b/graphemes/README.md
index cfd4540..9a64051 100644
--- a/graphemes/README.md
+++ b/graphemes/README.md
@@ -74,7 +74,7 @@ for g.Next() {                     // Next() returns true until end of data
 
 ### ANSI escape sequences
 
-By the UAX 29 specification, ANSI escape sequences are not grapheme clusters. To treat these sequences as a single cluster, set the `AnsiEscapeSequences` option to true.
+By the UAX 29 specification, ANSI escape sequences are not grapheme clusters. To treat 7-bit ANSI escape sequences as a single cluster, set `AnsiEscapeSequences` to true.
 
 ```go
 text := "Hello, \x1b[31mworld\x1b[0m!"
@@ -86,7 +86,16 @@ for g.Next() {
 }
 ```
 
-We implement [ECMA-48](https://ecma-international.org/publications-and-standards/standards/ecma-48/) C0 and C1 control codes, 7-bit and 8-bit. Note that 8-bit control codes are not UTF-8 encoded, and in fact are not valid UTF-8. Caveat emptor.
+To also parse 8-bit C1 controls (non-UTF-8 bytes), set `AnsiEscapeSequences8Bit` to true.
+
+```go
+g.AnsiEscapeSequences = true     // 7-bit forms (ESC ...)
+g.AnsiEscapeSequences8Bit = true // 8-bit C1 forms (0x80-0x9F), not valid UTF-8
+```
+
+For ESC-initiated (7-bit) control strings, only 7-bit terminators are recognized.
+
+We implement [ECMA-48](https://ecma-international.org/publications-and-standards/standards/ecma-48/) control codes in both 7-bit and 8-bit representations. 8-bit control codes are not UTF-8 encoded and are not valid UTF-8.
 
 ### Benchmarks
 
diff --git a/graphemes/ansi.go b/graphemes/ansi.go
index 8aa9b96..ae0c6da 100644
--- a/graphemes/ansi.go
+++ b/graphemes/ansi.go
@@ -1,96 +1,59 @@
 package graphemes
 
-// ansiEscapeLength returns the byte length of a valid ANSI escape/control
+// ansiEscapeLength returns the byte length of a valid 7-bit ANSI escape
 // sequence at the start of data, or 0 if none.
 //
-// This recognizes both:
-//   - 7-bit representations (ESC + final/intermediate bytes), and
-//   - 8-bit C1 controls (raw bytes 0x80..0x9F per ECMA-48).
-//
 // Recognized forms (ECMA-48 / ISO 6429):
-//   - CSI: ESC [ (or 0x9B) then parameter bytes (0x30–0x3F), intermediate (0x20–0x2F), final (0x40–0x7E)
-//   - OSC: ESC ] (or 0x9D) then payload until ST, BEL (0x07), CAN (0x18), or SUB (0x1A)
-//   - DCS, SOS, PM, APC: ESC P/X/^/_ (or 0x90/0x98/0x9E/0x9F) then payload until ST, CAN, or SUB
-//   - Two-byte: ESC + Fe/Fs (0x40–0x7E excluding above), or Fp (0x30–0x3F), or nF (0x20–0x2F then final)
-//   - Standalone C1 controls (0x80..0x9F not listed above): single byte
+//   - CSI: ESC [ then parameter bytes (0x30-0x3F), intermediate (0x20-0x2F), final (0x40-0x7E)
+//   - OSC: ESC ] then payload until BEL (0x07), 7-bit ST (ESC \), CAN (0x18), or SUB (0x1A)
+//   - DCS, SOS, PM, APC: ESC P/X/^/_ then payload until 7-bit ST (ESC \), CAN, or SUB
+//   - Two-byte: ESC + Fe/Fs (0x40-0x7E excluding above), or Fp (0x30-0x3F), or nF (0x20-0x2F then final)
 func ansiEscapeLength[T ~string | ~[]byte](data T) int {
 	n := len(data)
-	if n == 0 {
+	if n < 2 || data[0] != esc {
 		return 0
 	}
 
-	switch data[0] {
-	case esc:
-		if n < 2 {
-			return 0
-		}
-		b1 := data[1]
-		switch b1 {
-		case '[': // CSI
-			body := csiLength(data[2:])
-			if body == 0 {
-				return 0
-			}
-			return 2 + body
-		case ']': // OSC – allows BEL or ST as terminator
-			body := oscLength(data[2:])
-			if body < 0 {
-				return 0
-			}
-			return 2 + body
-		case 'P', 'X', '^', '_': // DCS, SOS, PM, APC – require ST only
-			body := stSequenceLength(data[2:])
-			if body < 0 {
-				return 0
-			}
-			return 2 + body
-		}
-		if b1 >= 0x40 && b1 <= 0x7E {
-			// Fe/Fs two-byte; [ ] P X ^ _ handled above
-			return 2
-		}
-		if b1 >= 0x30 && b1 <= 0x3F {
-			// Fp (private) two-byte
-			return 2
-		}
-		if b1 >= 0x20 && b1 <= 0x2F {
-			// nF: intermediates then one final (0x30–0x7E)
-			i := 2
-			for i < n && data[i] >= 0x20 && data[i] <= 0x2F {
-				i++
-			}
-			if i < n && data[i] >= 0x30 && data[i] <= 0x7E {
-				return i + 1
-			}
-			return 0
-		}
-
-	case 0x9B: // C1 CSI
-		body := csiLength(data[1:])
+	b1 := data[1]
+	switch b1 {
+	case '[': // CSI
+		body := csiLength(data[2:])
 		if body == 0 {
 			return 0
 		}
-		return 1 + body
-
-	case 0x9D: // C1 OSC
-		body := oscLength(data[1:])
+		return 2 + body
+	case ']': // OSC - allows BEL or 7-bit ST terminator
+		body := oscLength(data[2:])
 		if body < 0 {
 			return 0
 		}
-		return 1 + body
-
-	case 0x90, 0x98, 0x9E, 0x9F: // C1 DCS, SOS, PM, APC
-		body := stSequenceLength(data[1:])
+		return 2 + body
+	case 'P', 'X', '^', '_': // DCS, SOS, PM, APC
+		body := stSequenceLength(data[2:])
 		if body < 0 {
 			return 0
 		}
-		return 1 + body
+		return 2 + body
+	}
 
-	default:
-		if data[0] >= 0x80 && data[0] <= 0x9F {
-			// Any other C1 control is a single-byte sequence.
-			return 1
+	if b1 >= 0x40 && b1 <= 0x7E {
+		// Fe/Fs two-byte; [ ] P X ^ _ handled above
+		return 2
+	}
+	if b1 >= 0x30 && b1 <= 0x3F {
+		// Fp (private) two-byte
+		return 2
+	}
+	if b1 >= 0x20 && b1 <= 0x2F {
+		// nF: intermediates then one final (0x30-0x7E)
+		i := 2
+		for i < n && data[i] >= 0x20 && data[i] <= 0x2F {
+			i++
+		}
+		if i < n && data[i] >= 0x30 && data[i] <= 0x7E {
+			return i + 1
 		}
+		return 0
 	}
 
 	return 0
@@ -126,20 +89,19 @@ func csiLength[T ~string | ~[]byte](data T) int {
 }
 
 // oscLength returns the length of the OSC body.
-// data is the slice after "ESC ]" (or C1 OSC).
+// data is the slice after "ESC ]".
 //
 // Returns:
 //   - n >= 0: consumed body length (includes BEL/ST terminator when present)
 //   - -1: not terminated in the provided data
 //
-// OSC accepts BEL (0x07) or ST as terminator by widespread convention.
-// ST may be 7-bit (ESC \) or C1 (0x9C).
+// OSC accepts BEL (0x07) or 7-bit ST (ESC \) as terminators by widespread convention.
 // Per ECMA-48, CAN (0x18) and SUB (0x1A) cancel the control string; in that
 // case they are not part of the OSC sequence length.
 func oscLength[T ~string | ~[]byte](data T) int {
 	for i := 0; i < len(data); i++ {
 		b := data[i]
-		if b == bel || b == st {
+		if b == bel {
 			return i + 1
 		}
 		if b == can || b == sub {
@@ -153,14 +115,14 @@ func oscLength[T ~string | ~[]byte](data T) int {
 }
 
 // stSequenceLength returns the length of a control-string body.
-// data is the slice after "ESC x" (or C1 DCS/SOS/PM/APC).
+// data is the slice after "ESC x".
 //
 // Returns:
 //   - n >= 0: consumed body length (includes ST terminator when present)
 //   - -1: not terminated in the provided data
 //
 // Used for DCS, SOS, PM, and APC, which per ECMA-48 terminate with ST.
-// ST may be 7-bit (ESC \) or C1 (0x9C).
+// ST here is the 7-bit form (ESC \).
 // CAN (0x18) and SUB (0x1A) cancel the control string; in that case they are
 // not part of the sequence length.
 func stSequenceLength[T ~string | ~[]byte](data T) int {
@@ -168,9 +130,6 @@ func stSequenceLength[T ~string | ~[]byte](data T) int {
 		if data[i] == can || data[i] == sub {
 			return i
 		}
-		if data[i] == st {
-			return i + 1
-		}
 		if data[i] == esc && i+1 < len(data) && data[i+1] == '\\' {
 			return i + 2
 		}
diff --git a/graphemes/ansi8.go b/graphemes/ansi8.go
new file mode 100644
index 0000000..97d59c9
--- /dev/null
+++ b/graphemes/ansi8.go
@@ -0,0 +1,75 @@
+package graphemes
+
+// ansiEscapeLength8Bit returns the byte length of a valid 8-bit C1 ANSI
+// sequence at the start of data, or 0 if none.
+//
+// Recognized forms (ECMA-48 / ISO 6429):
+//   - C1 CSI (0x9B) body as parameter/intermediate/final bytes
+//   - C1 OSC (0x9D) body terminated by BEL, C1 ST, 7-bit ST, CAN, or SUB
+//   - C1 DCS/SOS/PM/APC (0x90/0x98/0x9E/0x9F) body terminated by C1 ST, 7-bit ST, CAN, or SUB
+//   - Standalone C1 controls (0x80..0x9F not listed above): single byte
+func ansiEscapeLength8Bit[T ~string | ~[]byte](data T) int {
+	if len(data) == 0 {
+		return 0
+	}
+
+	switch data[0] {
+	case 0x9B: // C1 CSI
+		body := csiLength(data[1:])
+		if body == 0 {
+			return 0
+		}
+		return 1 + body
+	case 0x9D: // C1 OSC
+		body := oscLengthC1(data[1:])
+		if body < 0 {
+			return 0
+		}
+		return 1 + body
+	case 0x90, 0x98, 0x9E, 0x9F: // C1 DCS, SOS, PM, APC
+		body := stSequenceLengthC1(data[1:])
+		if body < 0 {
+			return 0
+		}
+		return 1 + body
+	default:
+		if data[0] >= 0x80 && data[0] <= 0x9F {
+			return 1
+		}
+	}
+
+	return 0
+}
+
+func oscLengthC1[T ~string | ~[]byte](data T) int {
+	for i := 0; i < len(data); i++ {
+		b := data[i]
+		if b == bel || b == st {
+			return i + 1
+		}
+		if b == can || b == sub {
+			return i
+		}
+		if b == esc && i+1 < len(data) && data[i+1] == '\\' {
+			return i + 2
+		}
+	}
+	return -1
+}
+
+// stSequenceLengthC1 parses DCS/SOS/PM/APC bodies that may
+// terminate with either 7-bit ST (ESC \) or C1 ST (0x9C).
+func stSequenceLengthC1[T ~string | ~[]byte](data T) int {
+	for i := 0; i < len(data); i++ {
+		if data[i] == can || data[i] == sub {
+			return i
+		}
+		if data[i] == st {
+			return i + 1
+		}
+		if data[i] == esc && i+1 < len(data) && data[i+1] == '\\' {
+			return i + 2
+		}
+	}
+	return -1
+}
diff --git a/graphemes/ansi_test.go b/graphemes/ansi_test.go
index e288de8..df33564 100644
--- a/graphemes/ansi_test.go
+++ b/graphemes/ansi_test.go
@@ -9,14 +9,104 @@ import (
 	"github.com/clipperhouse/uax29/v2/testdata"
 )
 
-func TestAnsiEscapeSequencesAsGraphemes(t *testing.T) {
+type ansiCase struct {
+	name     string
+	input    string
+	expected []string
+}
+
+func assertANSITokens(t *testing.T, input string, expected []string, sevenBit, eightBit bool) {
+	t.Helper()
+
+	assertEqual := func(kind string, got []string) {
+		t.Helper()
+		if !reflect.DeepEqual(got, expected) {
+			t.Errorf("%s mismatch\ngot %q\nexpected %q", kind, got, expected)
+		}
+	}
+
+	iterString := graphemes.FromString(input)
+	iterString.AnsiEscapeSequences = sevenBit
+	iterString.AnsiEscapeSequences8Bit = eightBit
+	var gotString []string
+	for iterString.Next() {
+		gotString = append(gotString, iterString.Value())
+	}
+	assertEqual("string", gotString)
+
+	iterBytes := graphemes.FromBytes([]byte(input))
+	iterBytes.AnsiEscapeSequences = sevenBit
+	iterBytes.AnsiEscapeSequences8Bit = eightBit
+	var gotBytes []string
+	for iterBytes.Next() {
+		gotBytes = append(gotBytes, string(iterBytes.Value()))
+	}
+	assertEqual("bytes", gotBytes)
+}
+
+func runANSICases(t *testing.T, tests []ansiCase, sevenBit, eightBit bool) {
+	t.Helper()
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			assertANSITokens(t, tt.input, tt.expected, sevenBit, eightBit)
+		})
+	}
+}
+
+func TestAnsiEscapeSequences7BitOnlyAsGraphemes(t *testing.T) {
+	t.Parallel()
+
+	tests := []ansiCase{
+		{name: "SGR reset", input: "\x1b[0m", expected: []string{"\x1b[0m"}},
+		{name: "SGR red then text", input: "\x1b[31mhello", expected: []string{"\x1b[31m", "h", "e", "l", "l", "o"}},
+		{name: "CSI with valid intermediate", input: "\x1b[0 q", expected: []string{"\x1b[0 q"}},
+		{name: "OSC window title then BEL", input: "\x1b]0;My Title\x07", expected: []string{"\x1b]0;My Title\x07"}},
+		{name: "OSC window title then ST", input: "\x1b]0;Title\x1b\\", expected: []string{"\x1b]0;Title\x1b\\"}},
+		{name: "DCS with ST terminator", input: "\x1bPq#0;2;0;0;0\x1b\\", expected: []string{"\x1bPq#0;2;0;0;0\x1b\\"}},
+		{name: "DCS canceled by CAN", input: "\x1bPqdata\x18z", expected: []string{"\x1bPqdata", "\x18", "z"}},
+		{name: "SOS with ST terminator", input: "\x1bXhello\x1b\\", expected: []string{"\x1bXhello\x1b\\"}},
+		{name: "PM with ST terminator", input: "\x1b^msg\x1b\\", expected: []string{"\x1b^msg\x1b\\"}},
+		{name: "APC with ST terminator", input: "\x1b_data\x1b\\", expected: []string{"\x1b_data\x1b\\"}},
+		{name: "two-byte Fe", input: "\x1bD", expected: []string{"\x1bD"}},
+		{name: "two-byte Fp", input: "\x1b7", expected: []string{"\x1b7"}},
+		{name: "nF with multiple intermediates", input: "\x1b !Fx", expected: []string{"\x1b !F", "x"}},
+		{name: "malformed CSI remains split", input: "\x1b[ 1mok", expected: []string{"\x1b", "[", " ", "1", "m", "o", "k"}},
+		{name: "C1 CSI is not parsed", input: "\x9B31mhello", expected: []string{"\x9B", "3", "1", "m", "h", "e", "l", "l", "o"}},
+		{name: "7-bit OSC does not accept C1 ST", input: "\x1b]0;Title\x9Cz", expected: []string{"\x1b", "]", "0", ";", "T", "i", "t", "l", "e", "\x9C", "z"}},
+	}
+
+	runANSICases(t, tests, true, false)
+}
+
+func TestAnsiEscapeSequences8BitOnlyAsGraphemes(t *testing.T) {
 	t.Parallel()
 
-	tests := []struct {
-		name     string
-		input    string
-		expected []string
-	}{
+	tests := []ansiCase{
+		{name: "C1 CSI then text", input: "\x9B31mhello", expected: []string{"\x9B31m", "h", "e", "l", "l", "o"}},
+		{name: "C1 CSI multiple params", input: "\x9B1;2;3m", expected: []string{"\x9B1;2;3m"}},
+		{name: "C1 OSC with C1 ST", input: "\x9D0;Title\x9C", expected: []string{"\x9D0;Title\x9C"}},
+		{name: "C1 OSC with 7-bit ST", input: "\x9D0;Title\x1b\\", expected: []string{"\x9D0;Title\x1b\\"}},
+		{name: "C1 DCS with C1 ST", input: "\x90qpayload\x9C", expected: []string{"\x90qpayload\x9C"}},
+		{name: "C1 DCS with 7-bit ST", input: "\x90qpayload\x1b\\", expected: []string{"\x90qpayload\x1b\\"}},
+		{name: "C1 DCS canceled by CAN", input: "\x90qpayload\x18x", expected: []string{"\x90qpayload", "\x18", "x"}},
+		{name: "C1 SOS with C1 ST", input: "\x98hello\x9C", expected: []string{"\x98hello\x9C"}},
+		{name: "C1 PM with 7-bit ST", input: "\x9Emsg\x1b\\", expected: []string{"\x9Emsg\x1b\\"}},
+		{name: "C1 APC with C1 ST", input: "\x9Fdata\x9C", expected: []string{"\x9Fdata\x9C"}},
+		{name: "single C1 Fe control", input: "\x84", expected: []string{"\x84"}},
+		{name: "C1 OSC unterminated", input: "\x9D0;title", expected: []string{"\x9D", "0", ";", "t", "i", "t", "l", "e"}},
+		{name: "C1 DCS unterminated", input: "\x90data", expected: []string{"\x90", "d", "a", "t", "a"}},
+		{name: "7-bit ESC sequence is not parsed", input: "\x1b[31mhello", expected: []string{"\x1b", "[", "3", "1", "m", "h", "e", "l", "l", "o"}},
+	}
+
+	runANSICases(t, tests, false, true)
+}
+
+func TestAnsiEscapeSequencesBothEnabledAsGraphemes(t *testing.T) {
+	t.Parallel()
+
+	tests := []ansiCase{
 		{
 			name:     "SGR reset",
 			input:    "\x1b[0m",
@@ -37,6 +127,11 @@ func TestAnsiEscapeSequencesAsGraphemes(t *testing.T) {
 			input:    "\x1b]0;My Title\x07",
 			expected: []string{"\x1b]0;My Title\x07"},
 		},
+		{
+			name:     "OSC UTF-8 payload does not terminate at continuation byte",
+			input:    "\x1b]0;本\x07",
+			expected: []string{"\x1b]0;本\x07"},
+		},
 		{
 			name:     "OSC window title then ST",
 			input:    "\x1b]0;Title\x1b\\",
@@ -47,6 +142,11 @@ func TestAnsiEscapeSequencesAsGraphemes(t *testing.T) {
 			input:    "\x1bPq#0;2;0;0;0\x1b\\",
 			expected: []string{"\x1bPq#0;2;0;0;0\x1b\\"},
 		},
+		{
+			name:     "DCS UTF-8 payload does not terminate at continuation byte",
+			input:    "\x1bPq本\x1b\\",
+			expected: []string{"\x1bPq本\x1b\\"},
+		},
 		{
 			name:     "DCS with BEL in payload is not a single sequence",
 			input:    "\x1bPq\x07rest",
@@ -175,7 +275,7 @@ func TestAnsiEscapeSequencesAsGraphemes(t *testing.T) {
 		{
 			name:     "7-bit OSC with C1 ST terminator",
 			input:    "\x1b]0;Title\x9C",
-			expected: []string{"\x1b]0;Title\x9C"},
+			expected: []string{"\x1b", "]", "0", ";", "T", "i", "t", "l", "e", "\x9C"},
 		},
 		{
 			name:     "C1 DCS with C1 ST terminator",
@@ -195,7 +295,7 @@ func TestAnsiEscapeSequencesAsGraphemes(t *testing.T) {
 		{
 			name:     "7-bit DCS with C1 ST terminator",
 			input:    "\x1bPqpayload\x9C",
-			expected: []string{"\x1bPqpayload\x9C"},
+			expected: []string{"\x1b", "P", "q", "p", "a", "y", "l", "o", "a", "d", "\x9C"},
 		},
 		{
 			name:     "C1 Fe IND control",
@@ -294,34 +394,7 @@ func TestAnsiEscapeSequencesAsGraphemes(t *testing.T) {
 		},
 	}
 
-	for _, tt := range tests {
-		tt := tt
-		t.Run(tt.name, func(t *testing.T) {
-			t.Parallel()
-			assertEqual := func(kind string, got []string) {
-				t.Helper()
-				if !reflect.DeepEqual(got, tt.expected) {
-					t.Errorf("%s mismatch\ngot %q\nexpected %q", kind, got, tt.expected)
-				}
-			}
-
-			iterString := graphemes.FromString(tt.input)
-			iterString.AnsiEscapeSequences = true
-			var gotString []string
-			for iterString.Next() {
-				gotString = append(gotString, iterString.Value())
-			}
-			assertEqual("string", gotString)
-
-			iterBytes := graphemes.FromBytes([]byte(tt.input))
-			iterBytes.AnsiEscapeSequences = true
-			var gotBytes []string
-			for iterBytes.Next() {
-				gotBytes = append(gotBytes, string(iterBytes.Value()))
-			}
-			assertEqual("bytes", gotBytes)
-		})
-	}
+	runANSICases(t, tests, true, true)
 }
 
 func TestAnsiEscapeSequencesPureUTF8Parity(t *testing.T) {
@@ -336,9 +409,10 @@ func TestAnsiEscapeSequencesPureUTF8Parity(t *testing.T) {
 		"Résumé — 東京 — 👍",
 	}
 
-	collectString := func(input string, ansi bool) []string {
+	collectString := func(input string, ansi7, ansi8 bool) []string {
 		iter := graphemes.FromString(input)
-		iter.AnsiEscapeSequences = ansi
+		iter.AnsiEscapeSequences = ansi7
+		iter.AnsiEscapeSequences8Bit = ansi8
 		var out []string
 		for iter.Next() {
 			out = append(out, iter.Value())
@@ -346,9 +420,10 @@ func TestAnsiEscapeSequencesPureUTF8Parity(t *testing.T) {
 		return out
 	}
 
-	collectBytes := func(input string, ansi bool) []string {
+	collectBytes := func(input string, ansi7, ansi8 bool) []string {
 		iter := graphemes.FromBytes([]byte(input))
-		iter.AnsiEscapeSequences = ansi
+		iter.AnsiEscapeSequences = ansi7
+		iter.AnsiEscapeSequences8Bit = ansi8
 		var out []string
 		for iter.Next() {
 			out = append(out, string(iter.Value()))
@@ -361,16 +436,26 @@ func TestAnsiEscapeSequencesPureUTF8Parity(t *testing.T) {
 		t.Run("sample-"+string(rune('A'+i)), func(t *testing.T) {
 			t.Parallel()
 
-			stringNoANSI := collectString(sample, false)
-			stringANSI := collectString(sample, true)
-			if !reflect.DeepEqual(stringNoANSI, stringANSI) {
-				t.Fatalf("string parity mismatch for %q\noff=%q\non=%q", sample, stringNoANSI, stringANSI)
-			}
+			stringBase := collectString(sample, false, false)
+			for _, flags := range []struct {
+				name  string
+				ansi7 bool
+				ansi8 bool
+			}{
+				{name: "7-bit only", ansi7: true, ansi8: false},
+				{name: "8-bit only", ansi7: false, ansi8: true},
+				{name: "both", ansi7: true, ansi8: true},
+			} {
+				gotString := collectString(sample, flags.ansi7, flags.ansi8)
+				if !reflect.DeepEqual(stringBase, gotString) {
+					t.Fatalf("string parity mismatch for %q (%s)\noff=%q\non=%q", sample, flags.name, stringBase, gotString)
+				}
 
-			bytesNoANSI := collectBytes(sample, false)
-			bytesANSI := collectBytes(sample, true)
-			if !reflect.DeepEqual(bytesNoANSI, bytesANSI) {
-				t.Fatalf("bytes parity mismatch for %q\noff=%q\non=%q", sample, bytesNoANSI, bytesANSI)
+				bytesBase := collectBytes(sample, false, false)
+				gotBytes := collectBytes(sample, flags.ansi7, flags.ansi8)
+				if !reflect.DeepEqual(bytesBase, gotBytes) {
+					t.Fatalf("bytes parity mismatch for %q (%s)\noff=%q\non=%q", sample, flags.name, bytesBase, gotBytes)
+				}
 			}
 		})
 	}
diff --git a/graphemes/comparative/comparative_test.go b/graphemes/comparative/comparative_test.go
index 512264d..11e6fb1 100644
--- a/graphemes/comparative/comparative_test.go
+++ b/graphemes/comparative/comparative_test.go
@@ -139,7 +139,6 @@ func TestAnsiBoundaryAgreement(t *testing.T) {
 		{"nF charset G0 then text", "\x1b(Btext"},
 
 		// DCS with params
-		{"DCS with params and C1 ST", "\x1bP0;1|17/ab\x9c"},
 		{"C1 DCS with params and C1 ST", "\x90?123;456+q\x9c"},
 
 		// APC payload (Kitty graphics protocol)
@@ -251,6 +250,7 @@ func BenchmarkAnsiIteration(b *testing.B) {
 func uax29Tokens(input string) []string {
 	iter := graphemes.FromString(input)
 	iter.AnsiEscapeSequences = true
+	iter.AnsiEscapeSequences8Bit = true
 	var tokens []string
 	for iter.Next() {
 		tokens = append(tokens, iter.Value())
diff --git a/graphemes/comparative/go.mod b/graphemes/comparative/go.mod
index 24fdeec..33c156e 100644
--- a/graphemes/comparative/go.mod
+++ b/graphemes/comparative/go.mod
@@ -3,12 +3,12 @@ module github.com/clipperhouse/uax29/graphemes/comparative
 go 1.24.2
 
 require (
+	github.com/charmbracelet/x/ansi v0.11.6
 	github.com/clipperhouse/uax29/v2 v2.6.0
 	github.com/rivo/uniseg v0.4.7
 )
 
 require (
-	github.com/charmbracelet/x/ansi v0.11.6 // indirect
 	github.com/clipperhouse/displaywidth v0.9.0 // indirect
 	github.com/clipperhouse/stringish v0.1.1 // indirect
 	github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
diff --git a/graphemes/iterator.go b/graphemes/iterator.go
index 3f18ade..9266757 100644
--- a/graphemes/iterator.go
+++ b/graphemes/iterator.go
@@ -27,14 +27,17 @@ type Iterator[T ~string | ~[]byte] struct {
 	data  T
 	pos   int
 	start int
-	// AnsiEscapeSequences treats ANSI escape sequences (ECMA-48) as single
-	// grapheme clusters when true. The default is false.
+	// AnsiEscapeSequences treats 7-bit ANSI escape sequences (ECMA-48) as
+	// single grapheme clusters when true. The default is false.
 	//
-	// This option recognizes 7-bit and 8-bit control codes from ECMA-48. 8-bit
-	// control codes are not UTF-8 encoded, i.e. not valid UTF-8. If you
-	// choose this option, you are choosing to interpret non-UTF-8 data,
-	// caveat emptor.
+	// 8-bit controls are not enabled by this option. See AnsiEscapeSequences8Bit.
 	AnsiEscapeSequences bool
+	// AnsiEscapeSequences8Bit treats 8-bit C1 control codes (ECMA-48) as single
+	// grapheme clusters when true. The default is false.
+	//
+	// 8-bit control bytes are not UTF-8 encoded, i.e. not valid UTF-8. If you
+	// choose this option, you are choosing to interpret non-UTF-8 data.
+	AnsiEscapeSequences8Bit bool
 }
 
 var (
@@ -60,12 +63,18 @@ func (iter *Iterator[T]) Next() bool {
 	iter.start = iter.pos
 
 	b := iter.data[iter.pos]
-	if iter.AnsiEscapeSequences && (b == esc || (b >= 0x80 && b <= 0x9F)) {
+	if iter.AnsiEscapeSequences && b == esc {
 		if a := ansiEscapeLength(iter.data[iter.pos:]); a > 0 {
 			iter.pos += a
 			return true
 		}
 	}
+	if iter.AnsiEscapeSequences8Bit && b >= 0x80 && b <= 0x9F {
+		if a := ansiEscapeLength8Bit(iter.data[iter.pos:]); a > 0 {
+			iter.pos += a
+			return true
+		}
+	}
 
 	// ASCII hot path: any ASCII is one grapheme when next byte is ASCII or end.
 	if b < utf8.RuneSelf && b != cr {

From 5740126778ae5af6c39dcf05cefde5ee26a691d1 Mon Sep 17 00:00:00 2001
From: Matt Sherman <mwsherman@gmail.com>
Date: Sun, 15 Feb 2026 15:54:29 -0500
Subject: [PATCH 07/15] more benchmarks

---
 graphemes/README.md                       |   2 +-
 graphemes/ansi_test.go                    | 140 ++++++++++++++++++++--
 graphemes/comparative/comparative_test.go |  80 +++++++++++--
 graphemes/iterator.go                     |   5 +-
 4 files changed, 208 insertions(+), 19 deletions(-)

diff --git a/graphemes/README.md b/graphemes/README.md
index 9a64051..d58ae69 100644
--- a/graphemes/README.md
+++ b/graphemes/README.md
@@ -95,7 +95,7 @@ g.AnsiEscapeSequences8Bit = true // 8-bit C1 forms (0x80-0x9F), not valid UTF-8
 
 For ESC-initiated (7-bit) control strings, only 7-bit terminators are recognized.
 
-We implement [ECMA-48](https://ecma-international.org/publications-and-standards/standards/ecma-48/) control codes in both 7-bit and 8-bit representations. 8-bit control codes are not UTF-8 encoded and are not valid UTF-8.
+We implement [ECMA-48](https://ecma-international.org/publications-and-standards/standards/ecma-48/) control codes in both 7-bit and 8-bit representations. 8-bit control codes are not UTF-8 encoded and are not valid UTF-8, caveat emptor.
 
 ### Benchmarks
 
diff --git a/graphemes/ansi_test.go b/graphemes/ansi_test.go
index df33564..e42d018 100644
--- a/graphemes/ansi_test.go
+++ b/graphemes/ansi_test.go
@@ -508,21 +508,129 @@ func ansiSample() string {
 	return b.String()
 }
 
+// ansiSample8Bit builds a string that uses 8-bit C1 initiators.
+func ansiSample8Bit() string {
+	var b strings.Builder
+
+	lines := []string{
+		"drwxr-xr-x  5 user staff  160 Jan  1 12:00 Documents",
+		"drwxr-xr-x  3 user staff   96 Feb  2 09:30 Downloads",
+		"-rwxr-xr-x  1 user staff 8432 Mar 15 14:22 build.sh",
+		"lrwxr-xr-x  1 user staff   11 Apr 20 08:00 config -> /etc/config",
+		"-rw-r--r--  1 user staff 1024 May  5 16:45 README.md",
+		"total 42",
+		"drwxr-xr-x  2 user staff   64 Jun 10 11:11 src",
+		"-rw-r--r--  1 user staff  512 Jul  7 07:07 main.go",
+		"error: file not found: missing.txt",
+		"warning: deprecated function used in line 42",
+	}
+
+	for round := 0; round < 20; round++ {
+		for i, line := range lines {
+			// C1 OSC: 0x9D ... BEL
+			if i%5 == 0 {
+				b.WriteByte(0x9D)
+				b.WriteString("0;terminal - round ")
+				b.WriteString(string(rune('0' + round%10)))
+				b.WriteByte(0x07)
+			}
+			// C1 CSI SGR: 0x9B ... m
+			b.WriteByte(0x9B)
+			b.WriteString("1;3")
+			b.WriteString(string(rune('0' + (i % 8))))
+			b.WriteByte('m')
+			b.WriteString(line)
+			// C1 CSI reset: 0x9B0m
+			b.WriteByte(0x9B)
+			b.WriteString("0m")
+			b.WriteByte('\n')
+		}
+	}
+	return b.String()
+}
+
+// ansiSampleMixed builds a string with both 7-bit and 8-bit ANSI forms.
+func ansiSampleMixed() string {
+	var b strings.Builder
+	a7 := ansiSample()
+	a8 := ansiSample8Bit()
+	b.WriteString(a7)
+	b.WriteString(a8)
+	return b.String()
+}
+
 // BenchmarkAnsiOption benchmarks the iterator on text that contains ANSI escapes,
 // and on plain text, with the AnsiEscapeSequences option on and off.
 func BenchmarkAnsiOption(b *testing.B) {
-	ansi := ansiSample()
+	ansi7 := ansiSample()
+	ansi8 := ansiSample8Bit()
+	ansiMixed := ansiSampleMixed()
 	plain, err := testdata.Sample()
 	if err != nil {
 		b.Fatal(err)
 	}
 	plainStr := string(plain)
 
-	b.Run("AnsiText/OptionOn", func(b *testing.B) {
-		b.SetBytes(int64(len(ansi)))
+	b.Run("AnsiText7Bit/Option7BitOn", func(b *testing.B) {
+		b.SetBytes(int64(len(ansi7)))
+		for i := 0; i < b.N; i++ {
+			iter := graphemes.FromString(ansi7)
+			iter.AnsiEscapeSequences = true
+			c := 0
+			for iter.Next() {
+				_ = iter.Value()
+				c++
+			}
+			b.ReportMetric(float64(c), "tokens")
+		}
+	})
+
+	b.Run("AnsiText7Bit/OptionOff", func(b *testing.B) {
+		b.SetBytes(int64(len(ansi7)))
+		for i := 0; i < b.N; i++ {
+			iter := graphemes.FromString(ansi7)
+			c := 0
+			for iter.Next() {
+				_ = iter.Value()
+				c++
+			}
+			b.ReportMetric(float64(c), "tokens")
+		}
+	})
+
+	b.Run("AnsiText8Bit/Option8BitOn", func(b *testing.B) {
+		b.SetBytes(int64(len(ansi8)))
+		for i := 0; i < b.N; i++ {
+			iter := graphemes.FromString(ansi8)
+			iter.AnsiEscapeSequences8Bit = true
+			c := 0
+			for iter.Next() {
+				_ = iter.Value()
+				c++
+			}
+			b.ReportMetric(float64(c), "tokens")
+		}
+	})
+
+	b.Run("AnsiText8Bit/OptionOff", func(b *testing.B) {
+		b.SetBytes(int64(len(ansi8)))
 		for i := 0; i < b.N; i++ {
-			iter := graphemes.FromString(ansi)
+			iter := graphemes.FromString(ansi8)
+			c := 0
+			for iter.Next() {
+				_ = iter.Value()
+				c++
+			}
+			b.ReportMetric(float64(c), "tokens")
+		}
+	})
+
+	b.Run("AnsiTextMixed/BothOptionsOn", func(b *testing.B) {
+		b.SetBytes(int64(len(ansiMixed)))
+		for i := 0; i < b.N; i++ {
+			iter := graphemes.FromString(ansiMixed)
 			iter.AnsiEscapeSequences = true
+			iter.AnsiEscapeSequences8Bit = true
 			c := 0
 			for iter.Next() {
 				_ = iter.Value()
@@ -532,10 +640,25 @@ func BenchmarkAnsiOption(b *testing.B) {
 		}
 	})
 
-	b.Run("AnsiText/OptionOff", func(b *testing.B) {
-		b.SetBytes(int64(len(ansi)))
+	b.Run("PlainText/Option7BitOn", func(b *testing.B) {
+		b.SetBytes(int64(len(plainStr)))
 		for i := 0; i < b.N; i++ {
-			iter := graphemes.FromString(ansi)
+			iter := graphemes.FromString(plainStr)
+			iter.AnsiEscapeSequences = true
+			c := 0
+			for iter.Next() {
+				_ = iter.Value()
+				c++
+			}
+			b.ReportMetric(float64(c), "tokens")
+		}
+	})
+
+	b.Run("PlainText/Option8BitOn", func(b *testing.B) {
+		b.SetBytes(int64(len(plainStr)))
+		for i := 0; i < b.N; i++ {
+			iter := graphemes.FromString(plainStr)
+			iter.AnsiEscapeSequences8Bit = true
 			c := 0
 			for iter.Next() {
 				_ = iter.Value()
@@ -545,11 +668,12 @@ func BenchmarkAnsiOption(b *testing.B) {
 		}
 	})
 
-	b.Run("PlainText/OptionOn", func(b *testing.B) {
+	b.Run("PlainText/BothOptionsOn", func(b *testing.B) {
 		b.SetBytes(int64(len(plainStr)))
 		for i := 0; i < b.N; i++ {
 			iter := graphemes.FromString(plainStr)
 			iter.AnsiEscapeSequences = true
+			iter.AnsiEscapeSequences8Bit = true
 			c := 0
 			for iter.Next() {
 				_ = iter.Value()
diff --git a/graphemes/comparative/comparative_test.go b/graphemes/comparative/comparative_test.go
index 11e6fb1..f91de82 100644
--- a/graphemes/comparative/comparative_test.go
+++ b/graphemes/comparative/comparative_test.go
@@ -210,17 +210,52 @@ func ansiSample() string {
 	return b.String()
 }
 
+func ansiSample8Bit() string {
+	var b strings.Builder
+	lines := []string{
+		"drwxr-xr-x  5 user staff  160 Jan  1 12:00 Documents",
+		"drwxr-xr-x  3 user staff   96 Feb  2 09:30 Downloads",
+		"-rwxr-xr-x  1 user staff 8432 Mar 15 14:22 build.sh",
+		"lrwxr-xr-x  1 user staff   11 Apr 20 08:00 config",
+		"-rw-r--r--  1 user staff 1024 May  5 16:45 README.md",
+	}
+	for round := 0; round < 40; round++ {
+		for i, line := range lines {
+			if i%5 == 0 {
+				b.WriteByte(0x9D)
+				b.WriteString("0;terminal - round ")
+				b.WriteString(string(rune('0' + round%10)))
+				b.WriteByte(0x07)
+			}
+			b.WriteByte(0x9B)
+			b.WriteString("1;3")
+			b.WriteString(string(rune('0' + (i % 8))))
+			b.WriteByte('m')
+			b.WriteString(line)
+			b.WriteByte(0x9B)
+			b.WriteString("0m")
+			b.WriteString("\n")
+		}
+	}
+	return b.String()
+}
+
+func ansiSampleMixed() string {
+	return ansiSample() + ansiSample8Bit()
+}
+
 func BenchmarkAnsiIteration(b *testing.B) {
-	input := ansiSample()
-	n := int64(len(input))
+	input7 := ansiSample()
+	input8 := ansiSample8Bit()
+	inputMixed := ansiSampleMixed()
 
-	b.Run("clipperhouse/uax29", func(b *testing.B) {
-		b.SetBytes(n)
+	b.Run("clipperhouse/uax29/7bit", func(b *testing.B) {
+		b.SetBytes(int64(len(input7)))
 		b.ReportAllocs()
 		b.ResetTimer()
 		for i := 0; i < b.N; i++ {
 			count := 0
-			g := graphemes.FromString(input)
+			g := graphemes.FromString(input7)
 			g.AnsiEscapeSequences = true
 			for g.Next() {
 				count++
@@ -228,14 +263,43 @@ func BenchmarkAnsiIteration(b *testing.B) {
 		}
 	})
 
-	b.Run("charmbracelet/x/ansi", func(b *testing.B) {
-		b.SetBytes(n)
+	b.Run("clipperhouse/uax29/8bit", func(b *testing.B) {
+		b.SetBytes(int64(len(input8)))
+		b.ReportAllocs()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			count := 0
+			g := graphemes.FromString(input8)
+			g.AnsiEscapeSequences8Bit = true
+			for g.Next() {
+				count++
+			}
+		}
+	})
+
+	b.Run("clipperhouse/uax29/both", func(b *testing.B) {
+		b.SetBytes(int64(len(inputMixed)))
+		b.ReportAllocs()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			count := 0
+			g := graphemes.FromString(inputMixed)
+			g.AnsiEscapeSequences = true
+			g.AnsiEscapeSequences8Bit = true
+			for g.Next() {
+				count++
+			}
+		}
+	})
+
+	b.Run("charmbracelet/x/ansi/mixed", func(b *testing.B) {
+		b.SetBytes(int64(len(inputMixed)))
 		b.ReportAllocs()
 		b.ResetTimer()
 		for i := 0; i < b.N; i++ {
 			count := 0
 			var state byte
-			remaining := input
+			remaining := inputMixed
 			for len(remaining) > 0 {
 				_, _, advance, newState := ansi.DecodeSequence(remaining, state, nil)
 				state = newState
diff --git a/graphemes/iterator.go b/graphemes/iterator.go
index 9266757..a3f9aac 100644
--- a/graphemes/iterator.go
+++ b/graphemes/iterator.go
@@ -30,13 +30,14 @@ type Iterator[T ~string | ~[]byte] struct {
 	// AnsiEscapeSequences treats 7-bit ANSI escape sequences (ECMA-48) as
 	// single grapheme clusters when true. The default is false.
 	//
-	// 8-bit controls are not enabled by this option. See AnsiEscapeSequences8Bit.
+	// 8-bit controls are not enabled by this option. See [AnsiEscapeSequences8Bit].
 	AnsiEscapeSequences bool
 	// AnsiEscapeSequences8Bit treats 8-bit C1 control codes (ECMA-48) as single
 	// grapheme clusters when true. The default is false.
 	//
 	// 8-bit control bytes are not UTF-8 encoded, i.e. not valid UTF-8. If you
-	// choose this option, you are choosing to interpret non-UTF-8 data.
+	// choose this option, you are choosing to interpret non-UTF-8 data, caveat
+	// emptor.
 	AnsiEscapeSequences8Bit bool
 }
 

From e40113c3ac9c9cb71c9a14cd4be70f7879fd8d21 Mon Sep 17 00:00:00 2001
From: Matt Sherman <mwsherman@gmail.com>
Date: Sun, 15 Feb 2026 16:32:52 -0500
Subject: [PATCH 08/15] Stricter 8-bit terminators

---
 graphemes/README.md                       |  1 +
 graphemes/ansi.go                         |  6 +++---
 graphemes/ansi8.go                        | 16 +++++-----------
 graphemes/ansi_test.go                    | 12 ++++++------
 graphemes/comparative/comparative_test.go |  3 ---
 5 files changed, 15 insertions(+), 23 deletions(-)

diff --git a/graphemes/README.md b/graphemes/README.md
index d58ae69..3f8a5e3 100644
--- a/graphemes/README.md
+++ b/graphemes/README.md
@@ -94,6 +94,7 @@ g.AnsiEscapeSequences8Bit = true // 8-bit C1 forms (0x80-0x9F), not valid UTF-8
 ```
 
 For ESC-initiated (7-bit) control strings, only 7-bit terminators are recognized.
+For C1-initiated (8-bit) control strings, only C1 ST (`0x9C`) is recognized as ST.
 
 We implement [ECMA-48](https://ecma-international.org/publications-and-standards/standards/ecma-48/) control codes in both 7-bit and 8-bit representations. 8-bit control codes are not UTF-8 encoded and are not valid UTF-8, caveat emptor.
 
diff --git a/graphemes/ansi.go b/graphemes/ansi.go
index ae0c6da..9cd09b4 100644
--- a/graphemes/ansi.go
+++ b/graphemes/ansi.go
@@ -17,7 +17,7 @@ func ansiEscapeLength[T ~string | ~[]byte](data T) int {
 	b1 := data[1]
 	switch b1 {
 	case '[': // CSI
-		body := csiLength(data[2:])
+		body := csiBodyLength(data[2:])
 		if body == 0 {
 			return 0
 		}
@@ -59,14 +59,14 @@ func ansiEscapeLength[T ~string | ~[]byte](data T) int {
 	return 0
 }
 
-// csiLength returns the length of the CSI body (param/intermediate/final bytes).
+// csiBodyLength returns the length of the CSI body (param/intermediate/final bytes).
 // data is the slice after "ESC [".
 // Per ECMA-48, the CSI body has the form:
 //
 //	parameters (0x30–0x3F)*, intermediates (0x20–0x2F)*, final (0x40–0x7E)
 //
 // Once an intermediate byte is seen, subsequent parameter bytes are invalid.
-func csiLength[T ~string | ~[]byte](data T) int {
+func csiBodyLength[T ~string | ~[]byte](data T) int {
 	seenIntermediate := false
 	for i := 0; i < len(data); i++ {
 		b := data[i]
diff --git a/graphemes/ansi8.go b/graphemes/ansi8.go
index 97d59c9..a5fde3b 100644
--- a/graphemes/ansi8.go
+++ b/graphemes/ansi8.go
@@ -5,8 +5,8 @@ package graphemes
 //
 // Recognized forms (ECMA-48 / ISO 6429):
 //   - C1 CSI (0x9B) body as parameter/intermediate/final bytes
-//   - C1 OSC (0x9D) body terminated by BEL, C1 ST, 7-bit ST, CAN, or SUB
-//   - C1 DCS/SOS/PM/APC (0x90/0x98/0x9E/0x9F) body terminated by C1 ST, 7-bit ST, CAN, or SUB
+//   - C1 OSC (0x9D) body terminated by BEL, C1 ST, CAN, or SUB
+//   - C1 DCS/SOS/PM/APC (0x90/0x98/0x9E/0x9F) body terminated by C1 ST, CAN, or SUB
 //   - Standalone C1 controls (0x80..0x9F not listed above): single byte
 func ansiEscapeLength8Bit[T ~string | ~[]byte](data T) int {
 	if len(data) == 0 {
@@ -15,7 +15,7 @@ func ansiEscapeLength8Bit[T ~string | ~[]byte](data T) int {
 
 	switch data[0] {
 	case 0x9B: // C1 CSI
-		body := csiLength(data[1:])
+		body := csiBodyLength(data[1:])
 		if body == 0 {
 			return 0
 		}
@@ -50,15 +50,12 @@ func oscLengthC1[T ~string | ~[]byte](data T) int {
 		if b == can || b == sub {
 			return i
 		}
-		if b == esc && i+1 < len(data) && data[i+1] == '\\' {
-			return i + 2
-		}
 	}
 	return -1
 }
 
-// stSequenceLengthC1 parses DCS/SOS/PM/APC bodies that may
-// terminate with either 7-bit ST (ESC \) or C1 ST (0x9C).
+// stSequenceLengthC1 parses DCS/SOS/PM/APC bodies that terminate with C1 ST
+// (0x9C), or are canceled by CAN/SUB.
 func stSequenceLengthC1[T ~string | ~[]byte](data T) int {
 	for i := 0; i < len(data); i++ {
 		if data[i] == can || data[i] == sub {
@@ -67,9 +64,6 @@ func stSequenceLengthC1[T ~string | ~[]byte](data T) int {
 		if data[i] == st {
 			return i + 1
 		}
-		if data[i] == esc && i+1 < len(data) && data[i+1] == '\\' {
-			return i + 2
-		}
 	}
 	return -1
 }
diff --git a/graphemes/ansi_test.go b/graphemes/ansi_test.go
index e42d018..74137ea 100644
--- a/graphemes/ansi_test.go
+++ b/graphemes/ansi_test.go
@@ -87,12 +87,12 @@ func TestAnsiEscapeSequences8BitOnlyAsGraphemes(t *testing.T) {
 		{name: "C1 CSI then text", input: "\x9B31mhello", expected: []string{"\x9B31m", "h", "e", "l", "l", "o"}},
 		{name: "C1 CSI multiple params", input: "\x9B1;2;3m", expected: []string{"\x9B1;2;3m"}},
 		{name: "C1 OSC with C1 ST", input: "\x9D0;Title\x9C", expected: []string{"\x9D0;Title\x9C"}},
-		{name: "C1 OSC with 7-bit ST", input: "\x9D0;Title\x1b\\", expected: []string{"\x9D0;Title\x1b\\"}},
+		{name: "C1 OSC with 7-bit ST is not parsed as one sequence", input: "\x9D0;Title\x1b\\", expected: []string{"\x9D", "0", ";", "T", "i", "t", "l", "e", "\x1b", "\\"}},
 		{name: "C1 DCS with C1 ST", input: "\x90qpayload\x9C", expected: []string{"\x90qpayload\x9C"}},
-		{name: "C1 DCS with 7-bit ST", input: "\x90qpayload\x1b\\", expected: []string{"\x90qpayload\x1b\\"}},
+		{name: "C1 DCS with 7-bit ST is not parsed as one sequence", input: "\x90qpayload\x1b\\", expected: []string{"\x90", "q", "p", "a", "y", "l", "o", "a", "d", "\x1b", "\\"}},
 		{name: "C1 DCS canceled by CAN", input: "\x90qpayload\x18x", expected: []string{"\x90qpayload", "\x18", "x"}},
 		{name: "C1 SOS with C1 ST", input: "\x98hello\x9C", expected: []string{"\x98hello\x9C"}},
-		{name: "C1 PM with 7-bit ST", input: "\x9Emsg\x1b\\", expected: []string{"\x9Emsg\x1b\\"}},
+		{name: "C1 PM with 7-bit ST is not parsed as one sequence", input: "\x9Emsg\x1b\\", expected: []string{"\x9E", "m", "s", "g", "\x1b", "\\"}},
 		{name: "C1 APC with C1 ST", input: "\x9Fdata\x9C", expected: []string{"\x9Fdata\x9C"}},
 		{name: "single C1 Fe control", input: "\x84", expected: []string{"\x84"}},
 		{name: "C1 OSC unterminated", input: "\x9D0;title", expected: []string{"\x9D", "0", ";", "t", "i", "t", "l", "e"}},
@@ -270,7 +270,7 @@ func TestAnsiEscapeSequencesBothEnabledAsGraphemes(t *testing.T) {
 		{
 			name:     "C1 OSC with 7-bit ST terminator",
 			input:    "\x9D0;Title\x1b\\",
-			expected: []string{"\x9D0;Title\x1b\\"},
+			expected: []string{"\x9D", "0", ";", "T", "i", "t", "l", "e", "\x1b\\"},
 		},
 		{
 			name:     "7-bit OSC with C1 ST terminator",
@@ -290,7 +290,7 @@ func TestAnsiEscapeSequencesBothEnabledAsGraphemes(t *testing.T) {
 		{
 			name:     "C1 DCS with 7-bit ST terminator",
 			input:    "\x90qpayload\x1b\\",
-			expected: []string{"\x90qpayload\x1b\\"},
+			expected: []string{"\x90", "q", "p", "a", "y", "l", "o", "a", "d", "\x1b\\"},
 		},
 		{
 			name:     "7-bit DCS with C1 ST terminator",
@@ -340,7 +340,7 @@ func TestAnsiEscapeSequencesBothEnabledAsGraphemes(t *testing.T) {
 		{
 			name:     "C1 PM with 7-bit ST terminator",
 			input:    "\x9Emsg\x1b\\",
-			expected: []string{"\x9Emsg\x1b\\"},
+			expected: []string{"\x9E", "m", "s", "g", "\x1b\\"},
 		},
 		{
 			name:     "C1 APC with C1 ST terminator",
diff --git a/graphemes/comparative/comparative_test.go b/graphemes/comparative/comparative_test.go
index f91de82..b28a4e2 100644
--- a/graphemes/comparative/comparative_test.go
+++ b/graphemes/comparative/comparative_test.go
@@ -111,11 +111,8 @@ func TestAnsiBoundaryAgreement(t *testing.T) {
 		// C1 8-bit
 		{"C1 CSI then text", "\x9B31mhello"},
 		{"C1 OSC with C1 ST", "\x9D0;Title\x9C"},
-		{"C1 OSC with 7-bit ST", "\x9D0;Title\x1b\\"},
-		{"C1 DCS with 7-bit ST", "\x90qpayload\x1b\\"},
 		{"C1 DCS with C1 ST", "\x90qpayload\x9C"},
 		{"C1 SOS with C1 ST", "\x98hello\x9C"},
-		{"C1 PM with 7-bit ST", "\x9Emsg\x1b\\"},
 		{"C1 APC with C1 ST", "\x9Fdata\x9C"},
 
 		// CSI variants (from charmbracelet test suite)

From baed4dba8cb6811b05e011b517ad97297185cfe8 Mon Sep 17 00:00:00 2001
From: Matt Sherman <mwsherman@gmail.com>
Date: Sun, 15 Feb 2026 16:34:56 -0500
Subject: [PATCH 09/15] Add fuzz for ANSI

---
 graphemes/fuzz_test.go | 70 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/graphemes/fuzz_test.go b/graphemes/fuzz_test.go
index d6b840a..9d39677 100644
--- a/graphemes/fuzz_test.go
+++ b/graphemes/fuzz_test.go
@@ -153,3 +153,73 @@ func FuzzInvalid(f *testing.F) {
 		}
 	})
 }
+
+// FuzzANSIOptions fuzzes iterator roundtripping with ANSI options enabled.
+// This specifically exercises 7-bit only, 8-bit only, and combined modes.
+func FuzzANSIOptions(f *testing.F) {
+	if testing.Short() {
+		f.Skip("skipping fuzz test in short mode")
+	}
+
+	seeds := [][]byte{
+		[]byte("\x1b[31mhello\x1b[0m"),            // 7-bit CSI
+		[]byte("\x1b]0;Title\x07"),                // 7-bit OSC + BEL
+		[]byte("\x1bPqpayload\x1b\\"),             // 7-bit DCS + 7-bit ST
+		[]byte("\x9B31mhello"),                    // C1 CSI
+		[]byte("\x9D0;Title\x9C"),                 // C1 OSC + C1 ST
+		[]byte("\x90qpayload\x9C"),                // C1 DCS + C1 ST
+		[]byte("\x98hello\x9C"),                   // C1 SOS + C1 ST
+		[]byte("\x9Emsg\x9C"),                     // C1 PM + C1 ST
+		[]byte("\x9Fdata\x9C"),                    // C1 APC + C1 ST
+		[]byte("\x1b]0;Title\x9C"),                // 7-bit initiator + C1 ST (strict negative)
+		[]byte("\x9D0;Title\x1b\\"),               // C1 initiator + 7-bit ST (strict negative)
+		[]byte("\x1b]0;本\x07"),                    // UTF-8 in OSC payload
+		[]byte("\x90q本\x9C"),                     // UTF-8 in C1 DCS payload
+		[]byte("\x1b[31m\x9B1;32mtext\x1b[0m"),    // mixed 7-bit + 8-bit CSI
+		[]byte("\x1b"),                            // truncated ESC
+		[]byte("\x9D0;unterminated"),              // unterminated C1 OSC
+		[]byte("plain UTF-8: café 日本語 👩🏽‍💻"), // non-ANSI UTF-8
+	}
+	for _, s := range seeds {
+		f.Add(s)
+	}
+
+	f.Fuzz(func(t *testing.T, original []byte) {
+		validOriginal := utf8.Valid(original)
+
+		modes := []struct {
+			name     string
+			ansi7Bit bool
+			ansi8Bit bool
+		}{
+			{name: "off", ansi7Bit: false, ansi8Bit: false},
+			{name: "7bit", ansi7Bit: true, ansi8Bit: false},
+			{name: "8bit", ansi7Bit: false, ansi8Bit: true},
+			{name: "both", ansi7Bit: true, ansi8Bit: true},
+		}
+
+		for _, mode := range modes {
+			tokens := graphemes.FromBytes(original)
+			tokens.AnsiEscapeSequences = mode.ansi7Bit
+			tokens.AnsiEscapeSequences8Bit = mode.ansi8Bit
+
+			var all [][]byte
+			for tokens.Next() {
+				all = append(all, tokens.Value())
+			}
+
+			roundtrip := make([]byte, 0, len(original))
+			for _, s := range all {
+				roundtrip = append(roundtrip, s...)
+			}
+
+			if !bytes.Equal(roundtrip, original) {
+				t.Fatalf("%s mode: bytes did not roundtrip", mode.name)
+			}
+
+			if validOriginal != utf8.Valid(roundtrip) {
+				t.Fatalf("%s mode: utf8 validity of original did not match roundtrip", mode.name)
+			}
+		}
+	})
+}

From e2754ac2988f8ed11d691fa3db79b1242d06a4ee Mon Sep 17 00:00:00 2001
From: Matt Sherman <mwsherman@gmail.com>
Date: Sun, 15 Feb 2026 16:54:37 -0500
Subject: [PATCH 10/15] Add fuzz to Actions

---
 .github/workflows/gofuzz.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/gofuzz.yml b/.github/workflows/gofuzz.yml
index 0168a66..13f29dd 100644
--- a/.github/workflows/gofuzz.yml
+++ b/.github/workflows/gofuzz.yml
@@ -11,7 +11,7 @@ jobs:
     strategy:
         matrix:
           package: [words, sentences, graphemes, phrases]
-          fuzzer: [FuzzValidShort, FuzzValidLong, FuzzInvalid]
+          fuzzer: [FuzzValidShort, FuzzValidLong, FuzzInvalid, FuzzANSIOptions]
     runs-on: ubuntu-latest
     steps:
     - name: Set up Go

From 43a34cf68d4c586d4bb19a87d5a635b24e73de06 Mon Sep 17 00:00:00 2001
From: Matt Sherman <mwsherman@gmail.com>
Date: Sun, 15 Feb 2026 22:17:24 -0500
Subject: [PATCH 11/15] a bit more compat

---
 graphemes/comparative/comparative_test.go | 67 +++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/graphemes/comparative/comparative_test.go b/graphemes/comparative/comparative_test.go
index b28a4e2..2e3b8ef 100644
--- a/graphemes/comparative/comparative_test.go
+++ b/graphemes/comparative/comparative_test.go
@@ -159,6 +159,19 @@ func TestAnsiBoundaryAgreement(t *testing.T) {
 
 		// Plain text (no ANSI)
 		{"plain ASCII", "hello world"},
+
+		// DecodeSequence parser parity edge cases
+		{"single ESC byte", "\x1b"},
+		{"single NUL byte", "\x00"},
+		{"ASCII DEL byte", "\x7f"},
+		{"DEL between ASCII runes", "a\x7fb"},
+		{"double ESC", "\x1b\x1b"},
+		{"double ST 7-bit", "\x1b\\\x1b\\"},
+		{"double ST 8-bit", "\x9c\x9c"},
+		{"single-param OSC", "\x1b]112\x07"},
+		{"ESC with intermediate", "\x1b Q"},
+		{"DCS containing DEL payload", "\x1bP1;2+xa\x7fb\x1b\\"},
+		{"OSC with C1 bytes in payload", "\x1b]11;\x90?\x1b\\"},
 	}
 
 	for _, tt := range tests {
@@ -172,6 +185,60 @@ func TestAnsiBoundaryAgreement(t *testing.T) {
 	}
 }
 
+// TestAnsiBoundaryKnownDivergences documents cases where our grapheme-oriented
+// tokenizer intentionally differs from charmbracelet/x/ansi DecodeSequence.
+func TestAnsiBoundaryKnownDivergences(t *testing.T) {
+	tests := []struct {
+		name   string
+		input  string
+		reason string
+	}{
+		{
+			name:   "unterminated CSI",
+			input:  "\x1b[1;2;3",
+			reason: "DecodeSequence returns one unterminated CSI token; we split when no final byte is present",
+		},
+		{
+			name:   "unterminated OSC",
+			input:  "\x1b]11;ff/00/ff",
+			reason: "DecodeSequence returns one unterminated OSC token; we split when OSC has no BEL/ST/CAN/SUB terminator",
+		},
+		{
+			name:   "unterminated OSC followed by CSI",
+			input:  "\x1b]11;ff/00/ff\x1b[1;2;3m",
+			reason: "DecodeSequence ends OSC at ESC and parses following CSI; we require explicit OSC terminator",
+		},
+		{
+			name:   "unterminated OSC followed by bare ESC",
+			input:  "\x1b]11;ff/00/ff\x1b",
+			reason: "DecodeSequence emits unterminated OSC then ESC; we split because OSC is invalid without terminator",
+		},
+		{
+			name:   "unterminated DCS",
+			input:  "\x1bP1;2+xa",
+			reason: "DecodeSequence returns one unterminated DCS token; we split when DCS has no ST/CAN/SUB terminator",
+		},
+		{
+			name:   "invalid DCS immediately terminated",
+			input:  "\x1bP\x1b\\ab",
+			reason: "DecodeSequence emits ESC P token before ST; we do not treat invalid DCS start as a sequence",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ours := uax29Tokens(tt.input)
+			theirs := charmTokens(tt.input)
+			if reflect.DeepEqual(ours, theirs) {
+				t.Fatalf("expected divergence, but boundaries matched\nreason: %s\ntokens: %q", tt.reason, ours)
+			}
+			t.Logf("reason: %s", tt.reason)
+			t.Logf("ours:   %q", ours)
+			t.Logf("theirs: %q", theirs)
+		})
+	}
+}
+
 // ansiSample builds a realistic ANSI-heavy string simulating colored terminal output.
 func ansiSample() string {
 	var b strings.Builder

From 0072a1b0f46f5e4564db1d360d564267f4d3c101 Mon Sep 17 00:00:00 2001
From: Matt Sherman <mwsherman@gmail.com>
Date: Sun, 15 Feb 2026 22:59:22 -0500
Subject: [PATCH 12/15] tweak

---
 graphemes/ansi8.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/graphemes/ansi8.go b/graphemes/ansi8.go
index a5fde3b..ec66446 100644
--- a/graphemes/ansi8.go
+++ b/graphemes/ansi8.go
@@ -58,10 +58,11 @@ func oscLengthC1[T ~string | ~[]byte](data T) int {
 // (0x9C), or are canceled by CAN/SUB.
 func stSequenceLengthC1[T ~string | ~[]byte](data T) int {
 	for i := 0; i < len(data); i++ {
-		if data[i] == can || data[i] == sub {
+		b := data[i]
+		if b == can || b == sub {
 			return i
 		}
-		if data[i] == st {
+		if b == st {
 			return i + 1
 		}
 	}

From 902cb81f35ceef5005db822b4e354999f9b729ec Mon Sep 17 00:00:00 2001
From: Matt Sherman <mwsherman@gmail.com>
Date: Sun, 15 Feb 2026 23:06:11 -0500
Subject: [PATCH 13/15] comment

---
 graphemes/ansi8.go | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/graphemes/ansi8.go b/graphemes/ansi8.go
index ec66446..d9b0c48 100644
--- a/graphemes/ansi8.go
+++ b/graphemes/ansi8.go
@@ -41,6 +41,15 @@ func ansiEscapeLength8Bit[T ~string | ~[]byte](data T) int {
 	return 0
 }
 
+// oscLengthC1 returns the length of a C1 OSC body.
+// data is the slice after the C1 OSC initiator (0x9D).
+//
+// Returns:
+//   - n >= 0: consumed body length (includes BEL/ST terminator when present)
+//   - -1: not terminated in the provided data
+//
+// Terminators: BEL (0x07) or C1 ST (0x9C).
+// CAN (0x18) and SUB (0x1A) cancel the control string.
 func oscLengthC1[T ~string | ~[]byte](data T) int {
 	for i := 0; i < len(data); i++ {
 		b := data[i]

From 81b723c283133749c02e3b6a7a4d9b23140dceb6 Mon Sep 17 00:00:00 2001
From: Matt Sherman <mwsherman@gmail.com>
Date: Sun, 15 Feb 2026 23:06:26 -0500
Subject: [PATCH 14/15] more efficient CI fuzz

---
 .github/workflows/gofuzz.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/gofuzz.yml b/.github/workflows/gofuzz.yml
index 13f29dd..ca6306f 100644
--- a/.github/workflows/gofuzz.yml
+++ b/.github/workflows/gofuzz.yml
@@ -11,7 +11,10 @@ jobs:
     strategy:
         matrix:
           package: [words, sentences, graphemes, phrases]
-          fuzzer: [FuzzValidShort, FuzzValidLong, FuzzInvalid, FuzzANSIOptions]
+          fuzzer: [FuzzValidShort, FuzzValidLong, FuzzInvalid]
+          include:
+            - package: graphemes
+              fuzzer: FuzzANSIOptions
     runs-on: ubuntu-latest
     steps:
     - name: Set up Go

From 1adff8283a122837c7a533313b24125e8c9e832e Mon Sep 17 00:00:00 2001
From: Matt Sherman <mwsherman@gmail.com>
Date: Mon, 16 Feb 2026 09:51:07 -0500
Subject: [PATCH 15/15] comments

---
 graphemes/iterator.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/graphemes/iterator.go b/graphemes/iterator.go
index a3f9aac..90d669a 100644
--- a/graphemes/iterator.go
+++ b/graphemes/iterator.go
@@ -27,12 +27,12 @@ type Iterator[T ~string | ~[]byte] struct {
 	data  T
 	pos   int
 	start int
-	// AnsiEscapeSequences treats 7-bit ANSI escape sequences (ECMA-48) as
+	// AnsiEscapeSequences treats 7-bit C0 ANSI escape sequences (ECMA-48) as
 	// single grapheme clusters when true. The default is false.
 	//
 	// 8-bit controls are not enabled by this option. See [AnsiEscapeSequences8Bit].
 	AnsiEscapeSequences bool
-	// AnsiEscapeSequences8Bit treats 8-bit C1 control codes (ECMA-48) as single
+	// AnsiEscapeSequences8Bit treats 8-bit C1 ANSI escape sequences (ECMA-48) as single
 	// grapheme clusters when true. The default is false.
 	//
 	// 8-bit control bytes are not UTF-8 encoded, i.e. not valid UTF-8. If you
@@ -52,7 +52,7 @@ const (
 	bel = 0x07
 	can = 0x18
 	sub = 0x1A
-	st  = 0x9C // C1 String Terminator
+	st  = 0x9C
 )
 
 // Next advances the iterator to the next grapheme cluster.