Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions internal/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
"runtime/debug"
"strings"
"sync"
"sync/atomic"
Expand Down Expand Up @@ -158,6 +159,15 @@ func (p *CommentWorkerPool) Submit(f func() ([]model.LlmComment, error)) {
defer p.wg.Done()
p.semaphore <- struct{}{} // acquire
defer func() { <-p.semaphore }() // release
// Contain a panic in the submitted work so one bad unit of work cannot
// crash the whole process. Registered last => runs first on unwind,
// before the semaphore-release and wg.Done() defers, so the semaphore
// is still drained and the WaitGroup still decremented.
defer func() {
if r := recover(); r != nil {
fmt.Fprintf(stdout.Writer(), "[ocr] CommentWorkerPool panic: %v\n%s\n", r, debug.Stack())
}
}()

comments, err := f()
if err != nil {
Expand All @@ -170,6 +180,19 @@ func (p *CommentWorkerPool) Submit(f func() ([]model.LlmComment, error)) {
}

// Await blocks until all submitted work has completed and returns aggregated results.
//
// Note: a panic in submitted work is recovered and logged inside Submit (see the
// recover defer there) but is not surfaced here as an error or reflected in the
// returned count. A unit of work that panics therefore contributes no comments and
// is indistinguishable from one that legitimately produced zero. Callers that need
// to detect failed work should not rely on the result count alone.
//
// Concurrency contract: Await must not run concurrently with Submit. Submit calls
// wg.Add(1) synchronously, so a Submit racing Await would risk sync.WaitGroup's
// "Add called concurrently with Wait" panic. Callers must ensure every Submit has
// returned before calling Await; dispatchSubtasks satisfies this by joining its
// per-file goroutines (wg.Wait) before the Await here, and the async tool path only
// submits from within those goroutines.
func (p *CommentWorkerPool) Await() []model.LlmComment {
p.wg.Wait()
return p.results
Expand Down Expand Up @@ -405,6 +428,24 @@ func (a *Agent) dispatchSubtasks(ctx context.Context) ([]model.LlmComment, error
go func(d model.Diff) {
defer wg.Done()
defer func() { <-sem }() // release
// A panic while reviewing one file must be isolated exactly like an
// error return: counted in subtaskFailed and recorded as a
// subtask_error warning, so other files still complete and the
// all-failed rollup below stays correct. Registered before the
// timeout-cancel defer, so cancel() still runs first on unwind.
defer func() {
if r := recover(); r != nil {
atomic.AddInt64(&a.subtaskFailed, 1)
fmt.Fprintf(stdout.Writer(), "[ocr] Subtask panic for %s: %v\n%s\n", d.NewPath, r, debug.Stack())
// Mirror the error-path telemetry so a recovered panic is also
// traced. Use the parent ctx, not fileCtx: the timeout cancel()
// defer runs before this recover defer (LIFO), so fileCtx is
// already cancelled by the time we get here.
telemetry.ErrorEvent(ctx, "subtask.panic", fmt.Errorf("panic: %v", r),
telemetry.AnyToAttr("file.path", d.NewPath))
a.recordWarning("subtask_error", d.NewPath, fmt.Sprintf("panic: %v", r))
}
}()

var fileCtx context.Context
var cancel context.CancelFunc
Expand Down
30 changes: 30 additions & 0 deletions internal/agent/worker_pool_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package agent

import (
"testing"

"github.com/open-code-review/open-code-review/internal/model"
)

// TestCommentWorkerPool_PanicIsIsolated proves that a panic in one submitted
// unit of work is contained so it cannot crash the process, while other
// submitted work still completes (issue #171 / CWE-248).
//
// Before the fix the panicking closure terminates the whole test process
// (RED); after the recover() defer in Submit it is swallowed and the healthy
// closure's result still arrives (GREEN).
func TestCommentWorkerPool_PanicIsIsolated(t *testing.T) {
p := NewCommentWorkerPool(2)

p.Submit(func() ([]model.LlmComment, error) {
panic("boom while reviewing EVIL.go")
})
p.Submit(func() ([]model.LlmComment, error) {
return []model.LlmComment{{}}, nil // healthy file must still complete
})

got := p.Await() // (a) must NOT crash; (b) must return the healthy result
if len(got) != 1 {
t.Fatalf("expected 1 collected comment from the healthy closure, got %d", len(got))
}
}
Loading