Skip to content
This repository was archived by the owner on Nov 5, 2021. It is now read-only.

Commit 1a77284

Browse files
authored
Fix a goroutine leaking bug in external probe. (#584)
Fix a bug where our process-wait goroutine will not exit until the probe start context is explicitly canceled. This bug was introduced when we added handling for clean exit (June 2020: CR 318986425). Also, add a test that would have caught this bug. PiperOrigin-RevId: 366547916
1 parent 925bfed commit 1a77284

2 files changed

Lines changed: 116 additions & 13 deletions

File tree

probes/external/external.go

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,28 @@ func substituteLabels(in string, labels map[string]string) (string, bool) {
224224
return output, foundAll
225225
}
226226

227+
type command interface {
228+
Wait() error
229+
}
230+
231+
// monitorCommand waits for the process to terminate and sets cmdRunning to
232+
// false when that happens.
233+
func (p *Probe) monitorCommand(startCtx context.Context, cmd command) error {
234+
err := cmd.Wait()
235+
236+
// Spare logging error message if killed explicitly.
237+
select {
238+
case <-startCtx.Done():
239+
return nil
240+
default:
241+
}
242+
243+
if exitErr, ok := err.(*exec.ExitError); ok {
244+
return fmt.Errorf("external probe process died with the status: %s. Stderr: %s", exitErr.Error(), string(exitErr.Stderr))
245+
}
246+
return err
247+
}
248+
227249
func (p *Probe) startCmdIfNotRunning(startCtx context.Context) error {
228250
// Start external probe command if it's not running already. Note that here we
229251
// are trusting the cmdRunning to be set correctly. It can be false for 3 reasons:
@@ -265,21 +287,11 @@ func (p *Probe) startCmdIfNotRunning(startCtx context.Context) error {
265287
// This goroutine waits for the process to terminate and sets cmdRunning to
266288
// false when that happens.
267289
go func() {
268-
err := cmd.Wait()
290+
if err := p.monitorCommand(startCtx, cmd); err != nil {
291+
p.l.Error(err.Error())
292+
}
269293
close(doneChan)
270294
p.cmdRunning = false
271-
272-
// Spare logging error message if killed explicitly.
273-
select {
274-
case <-startCtx.Done():
275-
return
276-
}
277-
278-
if err != nil {
279-
if exitErr, ok := err.(*exec.ExitError); ok {
280-
p.l.Errorf("external probe process died with the status: %s. Stderr: %s", exitErr.Error(), string(exitErr.Stderr))
281-
}
282-
}
283295
}()
284296
go p.readProbeReplies(doneChan)
285297
p.cmdRunning = true

probes/external/external_test.go

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@ import (
1818
"bufio"
1919
"bytes"
2020
"context"
21+
"errors"
2122
"fmt"
2223
"io"
2324
"os"
25+
"os/exec"
2426
"reflect"
2527
"strings"
2628
"testing"
@@ -719,3 +721,92 @@ func TestCommandParsing(t *testing.T) {
719721
t.Errorf("Got command args=%v, want command args=%v", p.cmdArgs, wantArgs)
720722
}
721723
}
724+
725+
type fakeCommand struct {
726+
exitCtx context.Context
727+
startCtx context.Context
728+
waitErr error
729+
}
730+
731+
func (fc *fakeCommand) Wait() error {
732+
select {
733+
case <-fc.exitCtx.Done():
734+
case <-fc.startCtx.Done():
735+
}
736+
return fc.waitErr
737+
}
738+
739+
func TestMonitorCommand(t *testing.T) {
740+
tests := []struct {
741+
desc string
742+
waitErr error
743+
finishCmd bool
744+
cancelCtx bool
745+
wantErr bool
746+
wantStderr bool
747+
}{
748+
{
749+
desc: "Command exit with no error",
750+
finishCmd: true,
751+
wantErr: false,
752+
},
753+
{
754+
desc: "Cancel context, no error",
755+
cancelCtx: true,
756+
wantErr: false,
757+
},
758+
{
759+
desc: "command exit with exit error",
760+
finishCmd: true,
761+
waitErr: &exec.ExitError{Stderr: []byte("exit-error exiting")},
762+
wantErr: true,
763+
wantStderr: true,
764+
},
765+
{
766+
desc: "command exit with no exit error",
767+
finishCmd: true,
768+
waitErr: errors.New("some-error"),
769+
wantErr: true,
770+
wantStderr: false,
771+
},
772+
}
773+
774+
for _, test := range tests {
775+
t.Run(test.desc, func(t *testing.T) {
776+
exitCtx, exitFunc := context.WithCancel(context.Background())
777+
startCtx, startCancelFunc := context.WithCancel(context.Background())
778+
cmd := &fakeCommand{
779+
exitCtx: exitCtx,
780+
startCtx: startCtx,
781+
waitErr: test.waitErr,
782+
}
783+
784+
p := &Probe{}
785+
errCh := make(chan error)
786+
go func() {
787+
errCh <- p.monitorCommand(startCtx, cmd)
788+
}()
789+
790+
if test.finishCmd {
791+
exitFunc()
792+
}
793+
if test.cancelCtx {
794+
startCancelFunc()
795+
}
796+
797+
err := <-errCh
798+
if (err != nil) != test.wantErr {
799+
t.Errorf("Got error: %v, want error?= %v", err, test.wantErr)
800+
}
801+
802+
if err != nil {
803+
if test.wantStderr && !strings.Contains(err.Error(), "Stderr") {
804+
t.Errorf("Want std err: %v, got std err: %v", test.wantStderr, strings.Contains(err.Error(), "Stderr"))
805+
}
806+
}
807+
808+
exitFunc()
809+
startCancelFunc()
810+
})
811+
}
812+
}

0 commit comments

Comments
 (0)