diff --git a/docs/cosmetic-backlog.md b/docs/cosmetic-backlog.md index bdcac0491..6e37617f1 100644 --- a/docs/cosmetic-backlog.md +++ b/docs/cosmetic-backlog.md @@ -715,3 +715,8 @@ - [R202606b-ARCH-8] Scheduler 字段索引块与 per-field godoc 双份维护,必漂移 — internal/cron/scheduler.go:28-44 - [R202606b-PERF-016] prepareInbound 每条入站消息 slog.With 分配新 logger(owner 路径丢弃) — internal/dispatch/dispatch.go:658-661 - [R202606b-ARCH-7] internal/session/api 包仅 1 消费者(sysession),抽象成本>收益,可并入消费者 — internal/session/api/capabilities.go:1-42 +- [R202606c-GO-003] recordBootStep Get→Register 非原子 TOCTOU(init 期单线程实际无并发 trigger) — internal/wireup/boot.go:96 +- [R202606c-GO-005] selfupdate Checker.installed 无同步(单 goroutine Run 契约,无实际并发) — internal/selfupdate/checker.go:125 +- [R202606c-ARCH-8] wireup requiredBootSteps 缺 schedulers,scheduler 注册漏掉不被 Validate 捕获 — internal/wireup/boot.go:112 +- [R202606c-ARCH-5] attachment.Dir 是可变 package var 被 tracker 路径遍历安全守卫消费,应 const+WithDir — internal/attachment/store.go:43 +- [R202606c-CR-006] sysession renameOne 截断 title 不加省略号,clip 中途断词 — internal/sysession/auto_titler.go:685 diff --git a/internal/attachment/refcount_test.go b/internal/attachment/refcount_test.go index 62779c8c8..131d6b3b8 100644 --- a/internal/attachment/refcount_test.go +++ b/internal/attachment/refcount_test.go @@ -420,8 +420,10 @@ func TestUpdateMetaFile_AppliesMutation(t *testing.T) { // synthesise upload metadata). Tracker callers must check this and // log a warning, not crash. func TestUpdateMetaFile_MissingSidecar(t *testing.T) { + dir := t.TempDir() + metaPath := filepath.Join(dir, "ghost.meta") changed, err := UpdateMetaFile( - filepath.Join(t.TempDir(), "ghost.meta"), + metaPath, func(m *Meta) bool { return true }, ) if err == nil { @@ -430,4 +432,14 @@ func TestUpdateMetaFile_MissingSidecar(t *testing.T) { if changed { t.Errorf("changed=true for missing sidecar") } + // [R202606c-CR-003] The error string must not echo the workspace / + // absolute meta path — package policy keeps operator-only paths out + // of error strings (the path travels separately to the Observer). + msg := err.Error() + if strings.Contains(msg, metaPath) || strings.Contains(msg, dir) { + t.Errorf("error leaks absolute path: %q", msg) + } + if strings.ContainsRune(msg, filepath.Separator) { + t.Errorf("error contains path separator (likely leaks a path): %q", msg) + } } diff --git a/internal/attachment/store.go b/internal/attachment/store.go index f6b8a84f8..f0b249b05 100644 --- a/internal/attachment/store.go +++ b/internal/attachment/store.go @@ -471,7 +471,7 @@ func GCWithRefs(ctx context.Context, workspace string, opts GCOptions) (GCResult continue } abs := filepath.Join(dayPath, name) - metaPath := metaPathFor(abs) + metaPath := MetaPathFor(abs) keep, reason, err := shouldKeepAttachment(metaPath, dayTime, uploadCutoff, refCutoffMS) if err != nil { @@ -626,10 +626,12 @@ func loadMetaFile(path string) (*Meta, error) { return &m, nil } -// metaPathFor returns the sibling .meta path for an attachment +// MetaPathFor returns the sibling .meta path for an attachment // payload file. Matches the layout Persist creates: strips the -// payload extension and appends ".meta". -func metaPathFor(absPayload string) string { +// payload extension and appends ".meta". Exported so the tracker +// shares one implementation instead of mirroring it (a divergence +// would silently split the .meta namespace between GC and tracker). +func MetaPathFor(absPayload string) string { base := filepath.Base(absPayload) if idx := strings.LastIndex(base, "."); idx > 0 { return filepath.Join(filepath.Dir(absPayload), base[:idx]+".meta") @@ -661,8 +663,10 @@ func UpdateMetaFile(metaPath string, mutate func(*Meta) bool) (bool, error) { if m == nil { // Legacy attachment with no meta; we cannot append references // without inventing upload metadata, so we refuse rather than - // write a partial sidecar. - return false, fmt.Errorf("meta sidecar missing: %s", metaPath) + // write a partial sidecar. Keep the path out of the error + // string (package policy: workspace paths are operator-only). + // The caller already has metaPath for its OnMetaWriteError log. + return false, errors.New("meta sidecar missing") } if !mutate(m) { return false, nil diff --git a/internal/attachment/tracker/tracker.go b/internal/attachment/tracker/tracker.go index 99a54bc28..8383abfa5 100644 --- a/internal/attachment/tracker/tracker.go +++ b/internal/attachment/tracker/tracker.go @@ -467,12 +467,17 @@ func (t *Tracker) handleBump(job trackerJob) { t.pendingSize.Add(1) continue } - // Keep the highest timeMS observed and push the flushAt - // deadline out — coalesce semantics. + // Keep the highest timeMS observed, but DO NOT reset flushAt + // [R202606c-GO-002]: a key that is bumped faster than the + // coalesce window must still flush at its first deadline. + // Pushing flushAt out on every bump let a hot key starve + // forever (no .meta write until Stop/Flush), during which the + // GC reads stale on-disk meta and can delete a still-referenced + // attachment. Keep the original deadline so every key lands on + // disk within at most one CoalesceWindow of entering pending. if job.timeMS > prev.timeMS { prev.timeMS = job.timeMS } - prev.flushAt = flushAt t.pending[key] = prev } } @@ -586,7 +591,7 @@ func (t *Tracker) flushAll() { // at ERROR level to avoid log spam for legitimate churn (e.g. a // file deleted between persist and bump). func (t *Tracker) applyBump(key coalesceKey, bump pendingBump) { - metaPath := metaPathFor(key.absPath) + metaPath := attachment.MetaPathFor(key.absPath) changed, err := attachment.UpdateMetaFile(metaPath, func(m *attachment.Meta) bool { addedRef := m.AddReference(key.keyhash) // Always advance LastReferencedAt to max(current, bump) — @@ -643,14 +648,3 @@ func resolveAttachmentPath(workspace, p string) string { } return filepath.Join(workspace, cleaned) } - -// metaPathFor mirrors attachment.metaPathFor but is re-exported so -// tracker tests can round-trip without crossing the package -// boundary. Strip the payload extension, append .meta. -func metaPathFor(payload string) string { - base := filepath.Base(payload) - if idx := strings.LastIndex(base, "."); idx > 0 { - return filepath.Join(filepath.Dir(payload), base[:idx]+".meta") - } - return payload + ".meta" -} diff --git a/internal/attachment/tracker/tracker_test.go b/internal/attachment/tracker/tracker_test.go index 6196ea9fe..c7c1045b2 100644 --- a/internal/attachment/tracker/tracker_test.go +++ b/internal/attachment/tracker/tracker_test.go @@ -637,6 +637,94 @@ func TestStatsPendingNoRace(t *testing.T) { wg.Wait() } +// TestHandleBump_DoesNotDeferFlushDeadline (R202606c-GO-002) pins the +// fix for the starvation bug: a key bumped repeatedly faster than the +// coalesce window must still flush at its FIRST deadline, not have its +// flushAt pushed out on every bump. Before the fix, a hot key never +// reached flushDue until Stop/Flush, leaving its .meta stale on disk +// long enough for the GC to delete a still-referenced attachment. +// +// We drive handleBump directly on the test goroutine after stopping the +// worker (same pattern as the SecondSelectHonorsClose tests) so reads of +// t.pending are race-free, and inject a mutable clock so the deadline is +// deterministic. +func TestHandleBump_DoesNotDeferFlushDeadline(t *testing.T) { + ws := t.TempDir() + rel, _ := writeAttachment(t, ws, time.Now().Format("2006-01-02"), "hot", + attachment.Meta{UploadedAt: time.Now()}) + + var clockMu sync.Mutex + nowT := time.Unix(1_000_000, 0) + clock := func() time.Time { + clockMu.Lock() + defer clockMu.Unlock() + return nowT + } + advance := func(d time.Duration) { + clockMu.Lock() + defer clockMu.Unlock() + nowT = nowT.Add(d) + } + + window := time.Second + tr, err := NewTracker(Options{ + Workspaces: func(string) string { return ws }, + CoalesceWindow: window, + ChannelBuffer: 64, + Clock: clock, + Observer: &countingObs{}, + }) + if err != nil { + t.Fatal(err) + } + // Stop the worker so we can drive handleBump synchronously and read + // t.pending without racing the run goroutine. + stopCtx, cancel := context.WithTimeout(context.Background(), 3*time.Second) + defer cancel() + if err := tr.Stop(stopCtx); err != nil { + t.Fatalf("Stop: %v", err) + } + + abs := filepath.Join(ws, filepath.FromSlash(rel)) + key := coalesceKey{keyhash: "kh", absPath: abs} + + // First bump at T0 → flushAt should be T0+window. + tr.handleBump(trackerJob{kind: jobKindBump, keyhash: "kh", + absPaths: []string{rel}, timeMS: 100}) + prev, ok := tr.pending[key] + if !ok { + t.Fatalf("key not pending after first bump") + } + wantDeadline := time.Unix(1_000_000, 0).Add(window) + if !prev.flushAt.Equal(wantDeadline) { + t.Fatalf("first flushAt=%v want %v", prev.flushAt, wantDeadline) + } + + // Several more bumps, each advancing the clock by less than the + // window. The deadline must NOT move. + for i := 0; i < 5; i++ { + advance(window / 4) + tr.handleBump(trackerJob{kind: jobKindBump, keyhash: "kh", + absPaths: []string{rel}, timeMS: int64(101 + i)}) + got := tr.pending[key] + if !got.flushAt.Equal(wantDeadline) { + t.Fatalf("flushAt moved on bump %d: got %v want %v (first deadline must hold)", + i, got.flushAt, wantDeadline) + } + // timeMS still tracks the latest observed value. + if want := int64(101 + i); got.timeMS != want { + t.Fatalf("timeMS=%d want %d", got.timeMS, want) + } + } + + // Once the clock passes the first deadline, flushDue must drain it. + advance(window) + tr.flushDue() + if _, still := tr.pending[key]; still { + t.Fatalf("key still pending after deadline elapsed; flush was starved") + } +} + // TestPendingSize_TracksMapLen pins the invariant that pendingSize // stays in sync with len(t.pending) across the three mutation // paths (insert, update, delete). A divergence — e.g. forgetting diff --git a/internal/dashboard/project/files_list.go b/internal/dashboard/project/files_list.go index f01cf10ff..aa419cd45 100644 --- a/internal/dashboard/project/files_list.go +++ b/internal/dashboard/project/files_list.go @@ -199,13 +199,10 @@ func (h *Handlers) HandleFilesList(w http.ResponseWriter, r *http.Request) { } // Dirs first, then names (case-insensitive) so the listing reads like a - // file manager regardless of the OS readdir order. - sort.Slice(entries, func(i, j int) bool { - if entries[i].IsDir != entries[j].IsDir { - return entries[i].IsDir - } - return strings.ToLower(entries[i].Name) < strings.ToLower(entries[j].Name) - }) + // file manager regardless of the OS readdir order. Pre-lowercase names once + // (paired with their entry) so the comparator avoids two strings.ToLower + // allocations per comparison — ~22k comparisons for a 2000-entry dir. + sortEntries(entries) httputil.WriteJSON(w, map[string]any{ "dir": cleanDir, @@ -213,3 +210,27 @@ func (h *Handlers) HandleFilesList(w http.ResponseWriter, r *http.Request) { "truncated": truncated, }) } + +// sortEntries orders entries dirs-first, then by case-insensitive name. +// It pre-computes the lowercased names once (paired with their entry so the +// pairing survives sort swaps) rather than calling strings.ToLower twice per +// comparison. +func sortEntries(entries []listEntry) { + type entryWithLower struct { + entry listEntry + lower string + } + paired := make([]entryWithLower, len(entries)) + for i := range entries { + paired[i] = entryWithLower{entry: entries[i], lower: strings.ToLower(entries[i].Name)} + } + sort.Slice(paired, func(i, j int) bool { + if paired[i].entry.IsDir != paired[j].entry.IsDir { + return paired[i].entry.IsDir + } + return paired[i].lower < paired[j].lower + }) + for i := range paired { + entries[i] = paired[i].entry + } +} diff --git a/internal/dashboard/project/files_list_sort_test.go b/internal/dashboard/project/files_list_sort_test.go new file mode 100644 index 000000000..0d1afc212 --- /dev/null +++ b/internal/dashboard/project/files_list_sort_test.go @@ -0,0 +1,83 @@ +package project + +import ( + "sort" + "strings" + "testing" +) + +// oldSortEntries is the original in-place comparator (two ToLower per compare). +// Kept here only to pin that the optimized sortEntries produces an identical +// ordering (R202606c-PERF-001). +func oldSortEntries(entries []listEntry) { + sort.Slice(entries, func(i, j int) bool { + if entries[i].IsDir != entries[j].IsDir { + return entries[i].IsDir + } + return strings.ToLower(entries[i].Name) < strings.ToLower(entries[j].Name) + }) +} + +func namesOf(entries []listEntry) []string { + out := make([]string, len(entries)) + for i, e := range entries { + out[i] = e.Name + } + return out +} + +func TestSortEntries_MatchesOldComparator(t *testing.T) { + t.Parallel() + mixed := []listEntry{ + {Name: "Zebra.txt"}, + {Name: "alpha", IsDir: true}, + {Name: "Bravo", IsDir: true}, + {Name: "apple.go"}, + {Name: "APPLE.md"}, + {Name: "charlie", IsDir: true}, + {Name: "README"}, + {Name: "readme.lower"}, + {Name: "zeta.txt"}, + {Name: ".hidden"}, + {Name: ".Config", IsDir: true}, + } + + // Independent copies for each comparator. + want := make([]listEntry, len(mixed)) + copy(want, mixed) + oldSortEntries(want) + + got := make([]listEntry, len(mixed)) + copy(got, mixed) + sortEntries(got) + + wantNames := namesOf(want) + gotNames := namesOf(got) + if len(wantNames) != len(gotNames) { + t.Fatalf("length mismatch: want %d got %d", len(wantNames), len(gotNames)) + } + for i := range wantNames { + if wantNames[i] != gotNames[i] { + t.Fatalf("order mismatch at %d: want %v, got %v", i, wantNames, gotNames) + } + } + + // Sanity: all dirs precede all files. + seenFile := false + for _, e := range got { + if !e.IsDir { + seenFile = true + } else if seenFile { + t.Fatalf("dir %q appears after a file — dirs-first violated: %v", e.Name, gotNames) + } + } +} + +func TestSortEntries_Empty(t *testing.T) { + t.Parallel() + var entries []listEntry + sortEntries(entries) // must not panic + if len(entries) != 0 { + t.Fatalf("expected empty, got %v", entries) + } +} diff --git a/internal/platform/weixin/api.go b/internal/platform/weixin/api.go index e19f82fec..318ccb263 100644 --- a/internal/platform/weixin/api.go +++ b/internal/platform/weixin/api.go @@ -238,7 +238,10 @@ func (c *apiClient) post(ctx context.Context, endpoint string, body any) ([]byte return nil, fmt.Errorf("read body: %w", err) } if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("http %d: %s", resp.StatusCode, string(data)) + // data is the raw iLink API response body and may contain C1/bidi/ + // control bytes; sanitize before embedding in the error string so it + // cannot poison structured logs / terminal rendering at the caller. + return nil, fmt.Errorf("http %d: %s", resp.StatusCode, osutil.SanitizeForLog(string(data), 256)) } return data, nil } diff --git a/internal/platform/weixin/api_errbody_sanitize_test.go b/internal/platform/weixin/api_errbody_sanitize_test.go new file mode 100644 index 000000000..91ece97ad --- /dev/null +++ b/internal/platform/weixin/api_errbody_sanitize_test.go @@ -0,0 +1,50 @@ +package weixin + +import ( + "context" + "net/http" + "net/http/httptest" + "strings" + "testing" +) + +// R202606c-SEC-1: a non-200 iLink response body is attacker-influenced and may +// carry C0/C1/bidi/control bytes. post must wrap it through SanitizeForLog +// before embedding it in the returned error string so it cannot poison the +// caller's structured logs / terminal rendering, and must truncate to 256. +func TestPost_SanitizesErrorBody(t *testing.T) { + t.Parallel() + + // Body with embedded control bytes (NUL, tab, BEL, ESC) plus a long tail + // to exercise the 256-char truncation. + poison := "fail\x00\x07\x1b\ttail" + strings.Repeat("A", 1024) + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusBadGateway) + _, _ = w.Write([]byte(poison)) + })) + defer srv.Close() + + // srv.URL is http://127.0.0.1: → loopback, so no SSRF guard is + // installed and the request reaches the test server. + c := newAPIClient(srv.URL, "tok") + _, err := c.post(context.Background(), "send", map[string]string{"k": "v"}) + if err == nil { + t.Fatal("expected error for non-200 response") + } + msg := err.Error() + + // No raw control byte may survive into the error string. + for _, b := range []byte{0x00, 0x07, 0x1b, 0x09} { + if strings.IndexByte(msg, b) >= 0 { + t.Errorf("error string still contains raw control byte 0x%02x: %q", b, msg) + } + } + // Sanitized output replaces control bytes with '_' and truncates the body + // to 256 chars, so the full 1024-char tail must NOT appear verbatim. + if strings.Contains(msg, strings.Repeat("A", 257)) { + t.Errorf("error body was not truncated to 256 chars: %q", msg) + } + if !strings.Contains(msg, "http 502") { + t.Errorf("error should still carry the status code, got %q", msg) + } +} diff --git a/internal/project/manager.go b/internal/project/manager.go index 092a75d85..c3e564d99 100644 --- a/internal/project/manager.go +++ b/internal/project/manager.go @@ -105,7 +105,22 @@ func dirModTimeMillis(entry os.DirEntry, path string) int64 { } // Scan discovers all subdirectories under root and loads their project configs. +// +// R202606c-CR-002: the whole scan — disk read, CreatedAt migration, and the +// m.projects swap — runs under the write lock. Previously only the final swap +// was locked while the on-disk read happened lock-free, so a writer +// (BindChat/SetFavorite/UpdateConfig/UnbindAllChat) could append+persist a +// binding in the window between this function reading the pre-change file and +// installing its fresh map, silently dropping the just-made change from the +// in-memory index. Serializing the entire scan against those writers (which +// now also persist under the same lock) makes the two operations atomic w.r.t. +// each other: a scan either sees a writer's change already on disk or runs +// entirely before it. Scan is periodic and project mutations are low frequency, +// so holding the lock across the disk IO is acceptable. func (m *Manager) Scan() error { + m.mu.Lock() + defer m.mu.Unlock() + entries, err := os.ReadDir(m.root) if err != nil { return fmt.Errorf("scan projects root: %w", err) @@ -262,7 +277,6 @@ func (m *Manager) Scan() error { } } - m.mu.Lock() m.projects = projects m.rebuildBindingIndex() // Drop the inode-walk fallback memo: it is keyed to the project set we just @@ -272,7 +286,6 @@ func (m *Manager) Scan() error { // concurrent ResolveWorkspaces reader never sees fresh projects with a stale // cache. Go 1.21+ sync.Map.Clear. m.resolveCache.Clear() - m.mu.Unlock() slog.Info("scanned projects", "root", m.root, "count", len(projects)) return nil @@ -338,10 +351,19 @@ func (m *Manager) BindChat(projectName, platform, chatType, chatID string) error if err := validateBindingField(platform, chatType, chatID); err != nil { return fmt.Errorf("%w: BindChat: %s", ErrInvalidConfig, err.Error()) } + // R202606c-CR-002: hold the write lock across saveConfigToPath. The + // periodic Scan() (server_loops.go) takes the same write lock and + // overwrites m.projects with the map freshly loaded from disk. If the + // save happened after Unlock(), a Scan firing in the window between + // Unlock and save would reload the pre-binding on-disk file and clobber + // the in-memory binding we just appended (lost until the next save+scan). + // saveConfigToPath is pure file IO and never re-enters m.mu, so holding + // the lock across it is reentrancy-safe; project mutations are low + // frequency so the wider lock window is acceptable. m.mu.Lock() + defer m.mu.Unlock() p, ok := m.projects[projectName] if !ok { - m.mu.Unlock() // R183-SEC-L1: use %q to mirror UpdateConfig / SetFavorite (lines 211, // 237). An exported function is one future caller away from being // reached without a trust-boundary ValidateProjectName; defense-in- @@ -355,18 +377,13 @@ func (m *Manager) BindChat(projectName, platform, chatType, chatID string) error // Check if already bound for _, b := range p.Config.ChatBindings { if b.Platform == platform && b.ChatID == chatID && b.ChatType == chatType { - m.mu.Unlock() return nil // already bound } } p.Config.ChatBindings = append(p.Config.ChatBindings, binding) m.rebuildBindingIndex() - cfgSnap := snapshotConfig(p) - path := p.configPath() - m.mu.Unlock() - - return saveConfigToPath(path, cfgSnap) + return saveConfigToPath(p.configPath(), snapshotConfig(p)) } // UnbindAllChat removes all bindings for a given chat across all projects. @@ -380,7 +397,12 @@ func (m *Manager) BindChat(projectName, platform, chatType, chatID string) error // ("across all projects") and removes the non-deterministic routing that // last-writer-wins index rebuilds produced for multi-bound chats. func (m *Manager) UnbindAllChat(platform, chatType, chatID string) error { + // R202606c-CR-002: persist under the lock so a concurrent Scan() cannot + // reload the pre-change on-disk configs and resurrect the bindings we just + // stripped. See BindChat for the full rationale; saveConfigToPath does not + // re-enter m.mu. m.mu.Lock() + defer m.mu.Unlock() type pendingSave struct { path string @@ -408,7 +430,6 @@ func (m *Manager) UnbindAllChat(platform, chatType, chatID string) error { if changed { m.rebuildBindingIndex() } - m.mu.Unlock() var firstErr error for _, s := range saves { @@ -422,33 +443,34 @@ func (m *Manager) UnbindAllChat(platform, chatType, chatID string) error { // SetFavorite toggles a project's Favorite flag and persists it atomically. // Only touches Favorite — other config fields are preserved. func (m *Manager) SetFavorite(name string, favorite bool) error { + // R202606c-CR-002: persist under the lock so a concurrent Scan() cannot + // reload the pre-change on-disk config and drop this Favorite flip. See + // BindChat for the full rationale; saveConfigToPath does not re-enter m.mu. m.mu.Lock() + defer m.mu.Unlock() p, ok := m.projects[name] if !ok { - m.mu.Unlock() // R182-SEC-L1: %q mirrors UpdateConfig (line 234). set_favorite now // validates at the RPC boundary (R182-SEC-M1), but function is // defense-in-depth for any future caller that forgets to validate. return fmt.Errorf("%w: %q", ErrNotFound, name) } if p.Config.Favorite == favorite { - m.mu.Unlock() return nil } p.Config.Favorite = favorite - cfgSnap := snapshotConfig(p) - path := p.configPath() - m.mu.Unlock() - - return saveConfigToPath(path, cfgSnap) + return saveConfigToPath(p.configPath(), snapshotConfig(p)) } // UpdateConfig updates a project's config and persists it. func (m *Manager) UpdateConfig(name string, cfg ProjectConfig) error { + // R202606c-CR-002: persist under the lock so a concurrent Scan() cannot + // reload the pre-change on-disk config and drop this update. See BindChat + // for the full rationale; saveConfigToPath does not re-enter m.mu. m.mu.Lock() + defer m.mu.Unlock() p, ok := m.projects[name] if !ok { - m.mu.Unlock() // R181-SEC-P2-2: name comes from reverse-RPC frames (update_config) // and dashboard query strings. %q escapes bidi/C1/newline so the // wrapped error cannot forge structured log entries when the caller @@ -458,11 +480,7 @@ func (m *Manager) UpdateConfig(name string, cfg ProjectConfig) error { } p.Config = cfg m.rebuildBindingIndex() - cfgSnap := snapshotConfig(p) - path := p.configPath() - m.mu.Unlock() - - return saveConfigToPath(path, cfgSnap) + return saveConfigToPath(p.configPath(), snapshotConfig(p)) } // ProjectNames returns the set of current project names. diff --git a/internal/project/manager_test.go b/internal/project/manager_test.go index 0090880a3..3c6eaf439 100644 --- a/internal/project/manager_test.go +++ b/internal/project/manager_test.go @@ -5,6 +5,7 @@ import ( "os" "path/filepath" "strings" + "sync" "testing" "time" ) @@ -1052,3 +1053,64 @@ func TestResolveWorkspaces_FallbackCacheInvalidatedOnScan(t *testing.T) { t.Errorf("post-rescan ResolveWorkspaces(%q) = %q, want no match (cache should be invalidated)", aliasWS, got) } } + +// TestBindChat_ConcurrentScan_DoesNotDropBinding pins R202606c-CR-002: the +// periodic Scan() (server_loops.go) takes the manager write lock and replaces +// m.projects with the map freshly loaded from disk. The original BindChat +// released the lock BEFORE saveConfigToPath, so a Scan firing in that window +// would reload the pre-binding on-disk file and clobber the just-appended +// in-memory binding. With the save moved under the lock, BindChat and Scan are +// strictly serialized: Scan either observes the binding already persisted, or +// runs entirely before the append — it can never reload a half-applied state. +// +// The test races a BindChat against a Scan many times on the same manager and +// asserts the binding is never lost. The -race detector also flags the +// concurrent m.projects access if the locking ever regresses. +func TestBindChat_ConcurrentScan_DoesNotDropBinding(t *testing.T) { + t.Parallel() + const iters = 200 + for i := 0; i < iters; i++ { + root := t.TempDir() + makeProjectDir(t, root, "racy", nil) + m, _ := NewManager(root, PlannerDefaults{}) + if err := m.Scan(); err != nil { + t.Fatalf("initial Scan: %v", err) + } + + var wg sync.WaitGroup + wg.Add(2) + go func() { + defer wg.Done() + if err := m.BindChat("racy", "feishu", "group", "chatX"); err != nil { + t.Errorf("BindChat: %v", err) + } + }() + go func() { + defer wg.Done() + // Concurrent periodic rescan reloading from disk. + if err := m.Scan(); err != nil { + t.Errorf("concurrent Scan: %v", err) + } + }() + wg.Wait() + + // After both complete, the binding must be present. If Scan ran first, + // it reloaded the empty config but BindChat's later save+append wins; if + // BindChat ran first, Scan reloads the persisted binding. Either way the + // binding both lives in memory and is on disk. A torn ordering (save + // after unlock) would intermittently leave the binding only in a stale + // in-memory copy that Scan then overwrote — i.e. lost. + if p := m.ProjectForChat("feishu", "group", "chatX"); p == nil { + t.Fatalf("iter %d: binding lost from in-memory index after concurrent Scan", i) + } + + // And it must be durable: a fresh manager reading purely from disk sees it. + m2, _ := NewManager(root, PlannerDefaults{}) + if err := m2.Scan(); err != nil { + t.Fatalf("reload Scan: %v", err) + } + if p := m2.Get("racy"); p == nil || len(p.Config.ChatBindings) != 1 { + t.Fatalf("iter %d: binding not durably persisted; p = %+v", i, p) + } + } +} diff --git a/internal/transcribe/transcribe.go b/internal/transcribe/transcribe.go index b815b2f72..48f00bcbf 100644 --- a/internal/transcribe/transcribe.go +++ b/internal/transcribe/transcribe.go @@ -285,6 +285,17 @@ func (s *awsService) buildInput(encoding types.MediaEncoding, sampleRate int32) input.LanguageCode = types.LanguageCode(s.cfg.LanguageCode) return input } + if len(parts) == 1 { + // R202606c-CR-001: a comma that stripped down to a single entry + // (e.g. "zh-CN," with a trailing comma) is effectively single- + // language. AWS Transcribe Streaming rejects + // IdentifyMultipleLanguages=true with fewer than two LanguageOptions + // (400 BadRequestException), so degrade to single-LanguageCode using + // the one surviving normalized entry rather than the raw string. + input.IdentifyMultipleLanguages = false + input.LanguageCode = types.LanguageCode(parts[0]) + return input + } input.LanguageOptions = aws.String(strings.Join(parts, ",")) input.PreferredLanguage = types.LanguageCode(parts[0]) } else { diff --git a/internal/transcribe/transcribe_test.go b/internal/transcribe/transcribe_test.go index 959a7dbba..d86053c74 100644 --- a/internal/transcribe/transcribe_test.go +++ b/internal/transcribe/transcribe_test.go @@ -206,6 +206,54 @@ func TestBuildInput_MultiLanguage_Spaces(t *testing.T) { } } +// TestBuildInput_MultiLangDegradesToSingle pins R202606c-CR-001: a +// LanguageCode that contains a comma (so isMultiLang() returns true) but +// normalizes to a single entry — e.g. a trailing comma "zh-CN," or a +// leading comma ",en-US" — must NOT set IdentifyMultipleLanguages with a +// one-element LanguageOptions. AWS Transcribe Streaming returns a 400 +// BadRequestException when IdentifyMultipleLanguages=true is paired with +// fewer than two languages, which would fail every transcription. The +// builder must degrade to single-LanguageCode using the surviving entry. +func TestBuildInput_MultiLangDegradesToSingle(t *testing.T) { + tests := []struct { + name string + lang string + want string + }{ + {"trailing comma", "zh-CN,", "zh-CN"}, + {"leading comma", ",en-US", "en-US"}, + {"double comma around one", ",,ja-JP,,", "ja-JP"}, + {"spaces and commas", " zh-CN , ", "zh-CN"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + svc := newWithClient(&mockTranscribeAPI{}, Config{LanguageCode: tt.lang}) + // Sanity: these inputs DO trip isMultiLang (they contain a comma), + // which is exactly why the degrade path matters. + if !svc.isMultiLang() { + t.Fatalf("test precondition: isMultiLang() should be true for %q", tt.lang) + } + input := svc.buildInput(types.MediaEncodingPcm, 16000) + + if input.IdentifyMultipleLanguages { + t.Errorf("IdentifyMultipleLanguages should be false when config "+ + "resolves to a single language (%q)", tt.lang) + } + if input.LanguageCode != types.LanguageCode(tt.want) { + t.Errorf("LanguageCode = %q, want %q", input.LanguageCode, tt.want) + } + if input.LanguageOptions != nil { + t.Errorf("LanguageOptions should be nil for degraded single language, got %v", + input.LanguageOptions) + } + if input.PreferredLanguage != "" { + t.Errorf("PreferredLanguage should be empty for degraded single language, got %q", + input.PreferredLanguage) + } + }) + } +} + // TestLookupFFmpeg_NoProcessWideCache pins R240-SEC-9 (#1050): the previous // implementation memoised the first exec.LookPath result for the lifetime // of the process via sync.Once, so a startup-time PATH state stayed pinned diff --git a/internal/upstream/connector.go b/internal/upstream/connector.go index 67ba47bc8..56a6759f8 100644 --- a/internal/upstream/connector.go +++ b/internal/upstream/connector.go @@ -522,9 +522,16 @@ func marshalResult(v any) (json.RawMessage, error) { // does not expect; trim before returning so the wire format matches // the prior json.Marshal output exactly. if err := enc.Encode(v); err != nil { - // On error, drop the buffer (potentially partially-written) so a - // poison value cannot leak into the next reuse. - marshalResultBufPool.Put(new(bytes.Buffer)) + // On error, Reset clears any partially-written bytes (no poison can + // survive), so return the original buffer to the pool instead of + // discarding it — discarding shrinks the pool's effective size by one + // on every error. Mirror the happy-path oversized-drop policy. + buf.Reset() + if buf.Cap() <= marshalResultMaxRetainBytes { + marshalResultBufPool.Put(buf) + } else { + marshalResultBufPool.Put(new(bytes.Buffer)) + } return nil, err } out := buf.Bytes() diff --git a/internal/upstream/connector_marshalresult_pool_test.go b/internal/upstream/connector_marshalresult_pool_test.go new file mode 100644 index 000000000..5c9e1b07c --- /dev/null +++ b/internal/upstream/connector_marshalresult_pool_test.go @@ -0,0 +1,56 @@ +package upstream + +import ( + "bytes" + "testing" +) + +// R202606c-GO-006: on an Encode error, marshalResult must Reset the buffer it +// took from the pool and return THAT buffer (not a fresh empty one), so the +// pool's effective size does not shrink by one on every error. We exercise the +// error path by encoding an unmarshalable value (a chan), then confirm the +// happy path can immediately reuse a non-empty pooled buffer. +func TestMarshalResult_ErrorPathReturnsBufferToPool(t *testing.T) { + // Seed the pool with a buffer whose contents would be poison if leaked. + seed := new(bytes.Buffer) + seed.WriteString("POISON-do-not-leak") + marshalResultBufPool.Put(seed) + + // Encoding a chan fails (json: unsupported type). The error path must + // Reset + return the buffer, not panic. + if _, err := marshalResult(make(chan int)); err == nil { + t.Fatal("expected marshal error for chan value") + } + + // The happy path must still produce correct output (no poison bytes leak + // from a recycled buffer, since the error path Reset it before Put). + out, err := marshalResult(map[string]string{"k": "v"}) + if err != nil { + t.Fatalf("happy-path marshal failed: %v", err) + } + got := string(out) + if want := `{"k":"v"}`; got != want { + t.Fatalf("marshalResult = %q, want %q", got, want) + } + if bytes.Contains(out, []byte("POISON")) { + t.Fatalf("poison bytes leaked into output: %q", out) + } +} + +// Many sequential errors must not panic and must keep producing correct +// output afterwards — a smoke test that the pool stays healthy under repeated +// error-path Puts (the buffer is returned, never a half-written one reused raw). +func TestMarshalResult_RepeatedErrorsStayHealthy(t *testing.T) { + for i := 0; i < 100; i++ { + if _, err := marshalResult(make(chan int)); err == nil { + t.Fatalf("iteration %d: expected error", i) + } + } + out, err := marshalResult([]int{1, 2, 3}) + if err != nil { + t.Fatalf("marshal after errors failed: %v", err) + } + if want := "[1,2,3]"; string(out) != want { + t.Fatalf("marshalResult = %q, want %q", out, want) + } +}