From 453603f6485c825a6da202d0b94dcd68e2673bc2 Mon Sep 17 00:00:00 2001 From: Jose Alekhinne Date: Sun, 10 May 2026 14:29:38 -0700 Subject: [PATCH] fix(sanitize): UTF-8-safe truncation, Zl/Zp stripping, opts length caps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to PR #76 (MCP-SAN + MCP-COV hardening) addressing three issues surfaced during post-merge review: 1. truncate() now backs up to a UTF-8 rune boundary via utf8.RuneStart so byte-level cuts never produce invalid UTF-8. Reflect() and SessionID() delegate to the shared helper. 2. StripControl() now drops U+2028 (LINE SEPARATOR) and U+2029 (PARAGRAPH SEPARATOR) explicitly. unicode.IsControl does not match these (categories Zl/Zp), yet Markdown renderers may treat them as line breaks — leaving them in opens a newline injection path through reflected content. Constants live in internal/config/sanitize. 3. SanitizedOpts now enforces MaxOptsFieldLen (4 KB) on context, rationale, consequence, lesson, and application — secondary prose fields that previously had no length cap. Signature changes to (entity.EntryOpts, error); the two call sites in route/tool/tool.go propagate the error to the MCP client via InputTooLong, naming the offending field. Spec: specs/sanitize-hardening-followup.md --- internal/config/mcp/cfg/config.go | 5 + internal/config/sanitize/sanitize.go | 12 ++ internal/mcp/server/extract/extract.go | 37 ++++- internal/mcp/server/extract/extract_test.go | 82 ++++++++++- internal/mcp/server/route/tool/tool.go | 17 ++- internal/sanitize/content.go | 16 +- internal/sanitize/path.go | 5 +- internal/sanitize/reflect.go | 10 +- internal/sanitize/sanitize_test.go | 113 +++++++++++++++ internal/sanitize/truncate.go | 24 ++- specs/sanitize-hardening-followup.md | 153 ++++++++++++++++++++ 11 files changed, 443 insertions(+), 31 deletions(-) create mode 100644 specs/sanitize-hardening-followup.md diff --git a/internal/config/mcp/cfg/config.go b/internal/config/mcp/cfg/config.go index 33a0d1774..d78df750c 100644 --- a/internal/config/mcp/cfg/config.go +++ b/internal/config/mcp/cfg/config.go @@ -32,4 +32,9 @@ const ( MaxCallerLen = 128 // MaxURILen is the maximum byte length for resource URIs. MaxURILen = 512 + // MaxOptsFieldLen is the maximum byte length for secondary entry + // option fields (context, rationale, consequence, lesson, + // application). Tighter than MaxContentLen because these are + // qualifiers, not primary content. + MaxOptsFieldLen = 4_000 ) diff --git a/internal/config/sanitize/sanitize.go b/internal/config/sanitize/sanitize.go index 0bd7fd0ff..100c57395 100644 --- a/internal/config/sanitize/sanitize.go +++ b/internal/config/sanitize/sanitize.go @@ -32,3 +32,15 @@ const ( // identifier. MaxSessionIDLen = 128 ) + +// Unicode line/paragraph separators that [unicode.IsControl] does +// not match but which Markdown renderers may interpret as line +// breaks. Stripped from reflected content alongside ASCII control +// characters to close a newline injection path. +const ( + // LineSeparator is U+2028 (LINE SEPARATOR, category Zl). + LineSeparator = '
' + + // ParagraphSeparator is U+2029 (PARAGRAPH SEPARATOR, category Zp). + ParagraphSeparator = '
' +) diff --git a/internal/mcp/server/extract/extract.go b/internal/mcp/server/extract/extract.go index 270b4dad9..3f7ae8eb4 100644 --- a/internal/mcp/server/extract/extract.go +++ b/internal/mcp/server/extract/extract.go @@ -90,22 +90,53 @@ func Opts(args map[string]interface{}) entity.EntryOpts { } // SanitizedOpts builds EntryOpts with content sanitization applied -// to all text fields. +// to all text fields. Returns an error if any secondary field +// exceeds [cfg.MaxOptsFieldLen]; length is checked on raw input +// before sanitization to prevent abuse via large payloads. // // Parameters: // - args: MCP tool arguments with optional entry fields // // Returns: // - entity.EntryOpts: sanitized options struct +// - error: non-nil if any secondary field exceeds MaxOptsFieldLen func SanitizedOpts( args map[string]interface{}, -) entity.EntryOpts { +) (entity.EntryOpts, error) { opts := Opts(args) + // MCP-SAN.1: Enforce length on secondary prose fields before + // sanitization. Order is fixed so error messages are + // deterministic for tests. + if len(opts.Context) > cfg.MaxOptsFieldLen { + return entity.EntryOpts{}, errMcp.InputTooLong( + cli.AttrContext, cfg.MaxOptsFieldLen, + ) + } + if len(opts.Rationale) > cfg.MaxOptsFieldLen { + return entity.EntryOpts{}, errMcp.InputTooLong( + cli.AttrRationale, cfg.MaxOptsFieldLen, + ) + } + if len(opts.Consequence) > cfg.MaxOptsFieldLen { + return entity.EntryOpts{}, errMcp.InputTooLong( + cli.AttrConsequence, cfg.MaxOptsFieldLen, + ) + } + if len(opts.Lesson) > cfg.MaxOptsFieldLen { + return entity.EntryOpts{}, errMcp.InputTooLong( + cli.AttrLesson, cfg.MaxOptsFieldLen, + ) + } + if len(opts.Application) > cfg.MaxOptsFieldLen { + return entity.EntryOpts{}, errMcp.InputTooLong( + cli.AttrApplication, cfg.MaxOptsFieldLen, + ) + } opts.Context = sanitize.Content(opts.Context) opts.Rationale = sanitize.Content(opts.Rationale) opts.Consequence = sanitize.Content(opts.Consequence) opts.Lesson = sanitize.Content(opts.Lesson) opts.Application = sanitize.Content(opts.Application) opts.SessionID = sanitize.SessionID(opts.SessionID) - return opts + return opts, nil } diff --git a/internal/mcp/server/extract/extract_test.go b/internal/mcp/server/extract/extract_test.go index 1ac47af8d..180bb418e 100644 --- a/internal/mcp/server/extract/extract_test.go +++ b/internal/mcp/server/extract/extract_test.go @@ -106,8 +106,88 @@ func TestSanitizedOpts(t *testing.T) { "context": "safe text", "rationale": "good reason", } - opts := SanitizedOpts(args) + opts, err := SanitizedOpts(args) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } if opts.Context != "safe text" { t.Errorf("context = %q", opts.Context) } } + +func TestSanitizedOptsEmpty(t *testing.T) { + opts, err := SanitizedOpts(map[string]interface{}{}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if opts.Context != "" || opts.Rationale != "" { + t.Errorf("expected empty opts, got %+v", opts) + } +} + +func TestSanitizedOptsContextTooLong(t *testing.T) { + args := map[string]interface{}{ + "context": strings.Repeat("x", cfg.MaxOptsFieldLen+1), + } + _, err := SanitizedOpts(args) + if err == nil { + t.Fatal("expected InputTooLong error for context") + } + if !strings.Contains(err.Error(), "context") { + t.Errorf("error should name field 'context', got %q", err.Error()) + } +} + +func TestSanitizedOptsRationaleTooLong(t *testing.T) { + args := map[string]interface{}{ + "rationale": strings.Repeat("y", cfg.MaxOptsFieldLen+1), + } + _, err := SanitizedOpts(args) + if err == nil { + t.Fatal("expected InputTooLong error for rationale") + } + if !strings.Contains(err.Error(), "rationale") { + t.Errorf("error should name field 'rationale', got %q", err.Error()) + } +} + +func TestSanitizedOptsConsequenceTooLong(t *testing.T) { + args := map[string]interface{}{ + "consequence": strings.Repeat("z", cfg.MaxOptsFieldLen+1), + } + _, err := SanitizedOpts(args) + if err == nil { + t.Fatal("expected InputTooLong error for consequence") + } +} + +func TestSanitizedOptsLessonTooLong(t *testing.T) { + args := map[string]interface{}{ + "lesson": strings.Repeat("a", cfg.MaxOptsFieldLen+1), + } + _, err := SanitizedOpts(args) + if err == nil { + t.Fatal("expected InputTooLong error for lesson") + } +} + +func TestSanitizedOptsApplicationTooLong(t *testing.T) { + args := map[string]interface{}{ + "application": strings.Repeat("b", cfg.MaxOptsFieldLen+1), + } + _, err := SanitizedOpts(args) + if err == nil { + t.Fatal("expected InputTooLong error for application") + } +} + +func TestSanitizedOptsAtBoundary(t *testing.T) { + // Exactly MaxOptsFieldLen bytes — must be accepted. + args := map[string]interface{}{ + "rationale": strings.Repeat("ok", cfg.MaxOptsFieldLen/2), + } + _, err := SanitizedOpts(args) + if err != nil { + t.Errorf("boundary value rejected: %v", err) + } +} diff --git a/internal/mcp/server/route/tool/tool.go b/internal/mcp/server/route/tool/tool.go index b2057ea7f..55b381332 100644 --- a/internal/mcp/server/route/tool/tool.go +++ b/internal/mcp/server/route/tool/tool.go @@ -54,9 +54,11 @@ func add( // MCP-SAN.3: Sanitize content before writing to .context/. content = sanitize.Content(content) - t, addErr := handler.Add( - d, entryType, content, extract.SanitizedOpts(args), - ) + opts, optsErr := extract.SanitizedOpts(args) + if optsErr != nil { + return out.ToolError(id, optsErr.Error()) + } + t, addErr := handler.Add(d, entryType, content, opts) return out.ToolResult(id, t, addErr) } @@ -161,10 +163,11 @@ func watchUpdate( // MCP-SAN.3: Sanitize content before writing to .context/. content = sanitize.Content(content) - t, updateErr := handler.WatchUpdate( - d, entryType, content, - extract.SanitizedOpts(args), - ) + opts, optsErr := extract.SanitizedOpts(args) + if optsErr != nil { + return out.ToolError(id, optsErr.Error()) + } + t, updateErr := handler.WatchUpdate(d, entryType, content, opts) return out.ToolResult(id, t, updateErr) } diff --git a/internal/sanitize/content.go b/internal/sanitize/content.go index c849f0c6b..3dbe25529 100644 --- a/internal/sanitize/content.go +++ b/internal/sanitize/content.go @@ -39,14 +39,21 @@ func Content(s string) string { return s } -// StripControl removes ASCII control characters from s, preserving -// tab (\t), line feed (\n), and carriage return (\r). +// StripControl removes ASCII control characters and Unicode line/ +// paragraph separators (U+2028, U+2029) from s, preserving tab (\t), +// line feed (\n), and carriage return (\r). +// +// U+2028 (Zl) and U+2029 (Zp) are filtered explicitly because +// [unicode.IsControl] does not match them, yet Markdown renderers +// may treat them as line breaks. Stripping them closes a newline +// injection path through reflected content. // // Parameters: // - s: input string potentially containing control characters // // Returns: -// - string: s with control characters removed (except tab and newlines) +// - string: s with control characters and Zl/Zp separators removed +// (tab and newlines preserved) func StripControl(s string) string { return strings.Map(func(r rune) rune { if r == rune(token.Tab[0]) || @@ -54,6 +61,9 @@ func StripControl(s string) string { r == rune(token.NewlineCRLF[0]) { return r } + if r == cfgSan.LineSeparator || r == cfgSan.ParagraphSeparator { + return -1 + } if unicode.IsControl(r) { return -1 } diff --git a/internal/sanitize/path.go b/internal/sanitize/path.go index 07b5cb93a..54a7156e6 100644 --- a/internal/sanitize/path.go +++ b/internal/sanitize/path.go @@ -29,8 +29,5 @@ func SessionID(s string) string { s = strings.ReplaceAll(s, cfgSan.Backslash, "") s = regex.SanSessionIDUnsafe.ReplaceAllString(s, cfgSan.HyphenReplace) s = strings.Trim(s, cfgSan.HyphenReplace) - if len(s) > cfgSan.MaxSessionIDLen { - s = s[:cfgSan.MaxSessionIDLen] - } - return s + return truncate(s, cfgSan.MaxSessionIDLen) } diff --git a/internal/sanitize/reflect.go b/internal/sanitize/reflect.go index 8a3c6c298..004c7f2e2 100644 --- a/internal/sanitize/reflect.go +++ b/internal/sanitize/reflect.go @@ -7,8 +7,8 @@ package sanitize // Reflect strips control characters from s and truncates to maxLen -// bytes. Used when reflecting untrusted input back in error messages -// to prevent log injection. +// bytes on a UTF-8 rune boundary. Used when reflecting untrusted +// input back in error messages to prevent log injection. // // Parameters: // - s: untrusted input string to sanitize for reflection @@ -17,9 +17,5 @@ package sanitize // Returns: // - string: s with control characters removed and length capped func Reflect(s string, maxLen int) string { - s = StripControl(s) - if maxLen > 0 && len(s) > maxLen { - s = s[:maxLen] - } - return s + return truncate(StripControl(s), maxLen) } diff --git a/internal/sanitize/sanitize_test.go b/internal/sanitize/sanitize_test.go index 47863a626..c70a938dc 100644 --- a/internal/sanitize/sanitize_test.go +++ b/internal/sanitize/sanitize_test.go @@ -9,6 +9,7 @@ package sanitize import ( "strings" "testing" + "unicode/utf8" ) func TestContentEscapesEntryHeaders(t *testing.T) { @@ -170,3 +171,115 @@ func TestSessionIDReplacesUnsafe(t *testing.T) { t.Errorf("got %q, contains unsafe chars", got) } } + +// --- UTF-8-safe truncation (follow-up to PR #76) --- + +func TestTruncateMultibyteRuneBoundary(t *testing.T) { + // "héllo" — 'é' is U+00E9, a 2-byte UTF-8 rune (0xC3 0xA9). + // Bytes: 'h' 0xC3 0xA9 'l' 'l' 'o' (6 bytes total). + // Cutting at 2 bytes lands inside 'é'; must back up to 1. + got := truncate("héllo", 2) + if got != "h" { + t.Errorf("truncate at mid-rune = %q (% x), want %q", got, got, "h") + } + if !utf8Valid(got) { + t.Errorf("truncate produced invalid UTF-8: % x", got) + } +} + +func TestTruncateThreeByteRune(t *testing.T) { + // '€' is U+20AC, 3 bytes (0xE2 0x82 0xAC). + // Cutting "a€b" at 2 lands inside '€'; back up to 1. + got := truncate("a€b", 2) + if got != "a" { + t.Errorf("got %q, want %q", got, "a") + } +} + +func TestTruncateFourByteRune(t *testing.T) { + // '🜲' is U+1F732, 4 bytes (0xF0 0x9F 0x9C 0xB2). + // Cutting "🜲x" at 2 lands inside the emoji; back up to 0. + got := truncate("🜲x", 2) + if got != "" { + t.Errorf("got %q (% x), want empty", got, got) + } +} + +func TestTruncateAtExactRuneBoundary(t *testing.T) { + // "héllo" cut at 3 bytes lands exactly after 'é' (1+2=3). + got := truncate("héllo", 3) + if got != "hé" { + t.Errorf("got %q, want %q", got, "hé") + } +} + +func TestTruncatePreservesValidUTF8(t *testing.T) { + // Repeated 2-byte rune; any cut must terminate at an even byte + // offset relative to the start of the run. + in := strings.Repeat("é", 100) // 200 bytes + for cut := 0; cut <= 200; cut++ { + got := truncate(in, cut) + if !utf8Valid(got) { + t.Errorf("cut=%d produced invalid UTF-8: % x", cut, got) + } + } +} + +func TestReflectMultibyteSafe(t *testing.T) { + // Reflect should also produce valid UTF-8 when truncating. + got := Reflect("héllo world", 2) + if got != "h" { + t.Errorf("Reflect mid-rune = %q, want %q", got, "h") + } +} + +func TestSessionIDMultibyteSafe(t *testing.T) { + // Pre-sanitize the string so unsafe chars become hyphens, then + // run a long input through SessionID; the result must be valid + // UTF-8 even at the truncation boundary. + in := strings.Repeat("a", 130) // exceeds MaxSessionIDLen=128 + got := SessionID(in) + if !utf8Valid(got) { + t.Errorf("SessionID produced invalid UTF-8: % x", got) + } +} + +// --- Zl/Zp separator stripping (follow-up to PR #76) --- + +func TestStripControlRemovesLineSeparator(t *testing.T) { + // U+2028 — LINE SEPARATOR. + got := StripControl("a
b") + if got != "ab" { + t.Errorf("got %q, want %q", got, "ab") + } +} + +func TestStripControlRemovesParagraphSeparator(t *testing.T) { + // U+2029 — PARAGRAPH SEPARATOR. + got := StripControl("a
b") + if got != "ab" { + t.Errorf("got %q, want %q", got, "ab") + } +} + +func TestStripControlRemovesBothSeparators(t *testing.T) { + got := StripControl("x
y
z") + if got != "xyz" { + t.Errorf("got %q, want %q", got, "xyz") + } +} + +func TestReflectStripsLineSeparator(t *testing.T) { + // Newline injection via U+2028 must not survive reflection in + // error messages. + got := Reflect("tool
name", 0) + if got != "toolname" { + t.Errorf("got %q, want %q", got, "toolname") + } +} + +// utf8Valid is a thin alias for [utf8.ValidString], used to make the +// "did we cut mid-rune?" assertion read clearly at the call sites. +func utf8Valid(s string) bool { + return utf8.ValidString(s) +} diff --git a/internal/sanitize/truncate.go b/internal/sanitize/truncate.go index ac85867bb..39811b3ab 100644 --- a/internal/sanitize/truncate.go +++ b/internal/sanitize/truncate.go @@ -6,18 +6,30 @@ package sanitize -// truncate returns s truncated to maxLen bytes. -// If maxLen is 0 or negative, s is returned unchanged. +import "unicode/utf8" + +// truncate returns s truncated to at most maxLen bytes, with the cut +// snapped to a UTF-8 rune boundary so the result is always valid +// UTF-8. If maxLen is 0 or negative, s is returned unchanged. +// +// When the byte at position maxLen is a UTF-8 continuation byte, the +// cut backs up to the start of that rune so the partial rune is +// dropped entirely. If the entire prefix is one large rune that would +// not fit, the result is the empty string. // // Parameters: // - s: input string to truncate // - maxLen: maximum byte length; 0 or negative means no truncation // // Returns: -// - string: s truncated to at most maxLen bytes +// - string: s truncated to at most maxLen bytes, valid UTF-8 func truncate(s string, maxLen int) string { - if maxLen > 0 && len(s) > maxLen { - return s[:maxLen] + if maxLen <= 0 || len(s) <= maxLen { + return s + } + cut := maxLen + for cut > 0 && !utf8.RuneStart(s[cut]) { + cut-- } - return s + return s[:cut] } diff --git a/specs/sanitize-hardening-followup.md b/specs/sanitize-hardening-followup.md new file mode 100644 index 000000000..240ba25d4 --- /dev/null +++ b/specs/sanitize-hardening-followup.md @@ -0,0 +1,153 @@ +--- +title: Sanitize hardening follow-up (UTF-8 truncation, Zl/Zp, opts caps) +date: 2026-05-10 +status: ready +owner: jose +scope: bug fix — three surgical fixes to internal/sanitize and MCP extract +related: + - specs/future-complete/context-hub.md +prior: + - PR #76 (MCP-SAN + MCP-COV hardening) — landed at cf097a69 +--- + +# Spec: Sanitize Hardening Follow-up + +PR #76 added the `internal/sanitize` package and applied it across the +MCP server's untrusted input surface. Three issues surfaced during +post-merge review. This spec records the corrective fixes. + +## Problem + +### Issue 1 — Byte-level truncation can split UTF-8 runes + +`internal/sanitize/truncate.go:18-23`, `reflect.go:21-23`, and +`path.go:32-34` all do `s = s[:maxLen]` on bytes. If the cut lands +inside a multi-byte UTF-8 rune, the result contains an invalid +trailing byte sequence. Existing tests only cover ASCII so the bug +is silently uncovered. Downstream consumers (JSON encoders, file +writers, log lines) handle invalid UTF-8 inconsistently — some +escape, some replace with U+FFFD, some pass through. + +### Issue 2 — `StripControl` misses U+2028 and U+2029 + +`internal/sanitize/content.go:50-62` filters via `unicode.IsControl`. +That predicate returns **false** for U+2028 (LINE SEPARATOR) and +U+2029 (PARAGRAPH SEPARATOR), which are Unicode category `Zl`/`Zp`, +not `Cc`. Since `Content` writes into Markdown that some renderers +parse as line breaks, these can still inject visual newlines. + +### Issue 3 — Secondary opts fields have no length cap + +`extract.EntryArgs` enforces `MaxContentLen` on the primary +`content` field. `extract.SanitizedOpts` (extract.go:100-111) +sanitizes `Context`, `Rationale`, `Consequence`, `Lesson`, and +`Application` but applies no length limit. An attacker can send +10 MB in `rationale` and it will be sanitized and written. + +## Approach + +### Issue 1 — Rune-safe truncation + +`truncate(s, maxLen)` cuts at `maxLen` bytes, then backs up to a +rune-start boundary using `utf8.RuneStart`. All three internal call +sites (`Reflect`, `SessionID`, internal helpers) delegate to this +shared helper instead of duplicating the slice. The function +remains unexported. + +### Issue 2 — Explicit Zl/Zp handling + +Add an explicit `r == '
' || r == '
'` check to the +`StripControl` rune predicate before falling through to +`unicode.IsControl`. Both runes get dropped (return `-1` from the +`strings.Map` callback). + +### Issue 3 — Length cap for opts fields + +Add `MaxOptsFieldLen` constant (4 KB) to `internal/config/mcp/cfg`. +Change `SanitizedOpts` signature to `(entity.EntryOpts, error)`. +Reject any opts field exceeding the cap via `errMcp.InputTooLong`, +with the field name in the error. Update both call sites in +`internal/mcp/server/route/tool/tool.go` (`add`, `watchUpdate`) to +propagate the error to the MCP client. + +Rationale for 4 KB rather than `MaxContentLen` (32 KB): the secondary +fields are qualifiers, not primary content. A tighter cap reduces +abuse surface and pushes large prose into the primary `content` +field where it belongs. + +## Behavior + +### Happy path (Issue 1) +- ASCII input within limit: unchanged. +- Multi-byte UTF-8 input within limit: unchanged. +- Multi-byte UTF-8 input exceeding limit where the cut lands on a + rune start: result is exactly `maxLen` bytes, valid UTF-8. +- Multi-byte UTF-8 input exceeding limit where the cut lands inside + a rune: result is fewer than `maxLen` bytes, terminates on a + rune boundary, no trailing invalid bytes. + +### Happy path (Issue 2) +- Input containing `
` or `
` is stripped of both runes. +- Existing behavior for tab, LF, CR, and `Cc` control chars unchanged. + +### Happy path (Issue 3) +- Opts field within cap: passes through sanitization unchanged. +- Opts field exceeding cap: caller receives `InputTooLong(field, MaxOptsFieldLen)`. + Field name is the canonical MCP arg key (e.g., `"rationale"`). + +### Edge cases + +| Case | Expected | +|------|----------| +| `truncate("", 100)` | `""` | +| `truncate("a", 0)` | `"a"` (no truncation when maxLen ≤ 0) | +| `truncate(, 3)` | `""` (back up below the rune start) | +| `StripControl("
")` | `""` | +| `SanitizedOpts` with empty opts | no error | +| `SanitizedOpts` with `rationale = strings.Repeat("a", 4097)` | `InputTooLong("rationale", 4096)` error | + +## Interface + +No public API changes to the `sanitize` package — `truncate` stays +unexported. + +`extract.SanitizedOpts` signature changes: + +```go +// Before: +func SanitizedOpts(args map[string]interface{}) entity.EntryOpts + +// After: +func SanitizedOpts(args map[string]interface{}) (entity.EntryOpts, error) +``` + +## Implementation + +### Files to modify + +| File | Change | +|------|--------| +| `internal/sanitize/truncate.go` | Rune-safe via `utf8.RuneStart` | +| `internal/sanitize/reflect.go` | Delegate to `truncate` | +| `internal/sanitize/path.go` | Delegate to `truncate` | +| `internal/sanitize/content.go` | Add Zl/Zp check in `StripControl` | +| `internal/sanitize/sanitize_test.go` | New tests for the above | +| `internal/config/mcp/cfg/config.go` | Add `MaxOptsFieldLen = 4_000` | +| `internal/mcp/server/extract/extract.go` | New error-returning `SanitizedOpts` | +| `internal/mcp/server/extract/extract_test.go` | New length-cap tests | +| `internal/mcp/server/route/tool/tool.go` | Propagate the new error | + +## Testing + +- Unit: rune-boundary cuts at 2-, 3-, and 4-byte runes. +- Unit: Zl/Zp stripping (single, mixed, repeated). +- Unit: opts-field length rejection per field. +- Integration: existing MCP server tests must continue to pass. + +## Non-Goals + +- Adding length caps for `Branch`, `Commit`, `Priority`, `Section`, + `SessionID` (already capped via `sanitize.SessionID`). These are + outside the review scope and will be tracked separately if needed. +- Adding fuzz tests (suggested as a nit, deferred). +- Refactoring `Content`'s overlapping regex passes (separate nit).