From cfc8a5789fd40473681882198da77accd6a6d5d9 Mon Sep 17 00:00:00 2001 From: Wei Fu Date: Tue, 4 Jul 2023 23:08:07 +0800 Subject: [PATCH] pkg/expect: fix data race Let's say there is command which outputs one line and exit with error. There are three goroutines to acquire the lock: 1. ep.read() 2. ep.waitSaveExitErr() 3. ep.Expect() When ep.read goroutine reads the log but it doesn't acquire the lock in time, the ep.waitSaveExitErr acquires the lock and updates the `exitErr`. And then ep.Expect acquires lock but it doesn't see any log yet and then returns err. It's hard to reproduce it in local. Add the extra sleep can reproduce it. ```diff diff --git a/pkg/expect/expect.go b/pkg/expect/expect.go index a512a3ce4..602bea73f 100644 --- a/pkg/expect/expect.go +++ b/pkg/expect/expect.go @@ -128,6 +128,7 @@ func (ep *ExpectProcess) tryReadNextLine(r *bufio.Reader) error { printDebugLines := os.Getenv("EXPECT_DEBUG") != "" l, err := r.ReadString('\n') + time.Sleep(10 * time.Millisecond) ep.mu.Lock() defer ep.mu.Unlock() ``` See it once in Github Action [1]. In order to fix it, the patch introduces `readCloseCh` to wait for ep.read to get all the data and retry it. [1]: https://github.com/etcd-io/etcd/pull/16137#issuecomment-1605838518 Signed-off-by: Wei Fu --- pkg/expect/expect.go | 24 +++++++++++++++++++++++- pkg/expect/expect_test.go | 8 ++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/pkg/expect/expect.go b/pkg/expect/expect.go index 9ad2c8a0e06b..1d4e980a7f9d 100644 --- a/pkg/expect/expect.go +++ b/pkg/expect/expect.go @@ -44,6 +44,8 @@ type ExpectProcess struct { fpty *os.File wg sync.WaitGroup + readCloseCh chan struct{} // close it if async read goroutine exits + mu sync.Mutex // protects lines, count, cur, exitErr and exitCode lines []string count int // increment whenever new line gets added @@ -69,6 +71,7 @@ func NewExpectWithEnv(name string, args []string, env []string, serverProcessCon }, } ep.cmd = commandFromConfig(ep.cfg) + ep.readCloseCh = make(chan struct{}) if ep.fpty, err = pty.Start(ep.cmd); err != nil { return nil, err @@ -100,7 +103,10 @@ func (ep *ExpectProcess) Pid() int { } func (ep *ExpectProcess) read() { - defer ep.wg.Done() + defer func() { + ep.wg.Done() + close(ep.readCloseCh) + }() defer func(fpty *os.File) { err := fpty.Close() if err != nil { @@ -187,9 +193,25 @@ func (ep *ExpectProcess) ExpectFunc(ctx context.Context, f func(string) bool) (s } } + select { + // NOTE: we wait readCloseCh for ep.read() to complete draining the log before acquring the lock. + case <-ep.readCloseCh: + case <-ctx.Done(): + return "", fmt.Errorf("failed to find match string: %w", ctx.Err()) + } + ep.mu.Lock() defer ep.mu.Unlock() + // retry it since we get all the log data + for i < len(ep.lines) { + line := ep.lines[i] + i++ + if f(line) { + return line, nil + } + } + lastLinesIndex := len(ep.lines) - DEBUG_LINES_TAIL if lastLinesIndex < 0 { lastLinesIndex = 0 diff --git a/pkg/expect/expect_test.go b/pkg/expect/expect_test.go index 93dfdc4a47e6..f5029323de0f 100644 --- a/pkg/expect/expect_test.go +++ b/pkg/expect/expect_test.go @@ -210,3 +210,11 @@ func TestExitCodeAfterKill(t *testing.T) { assert.Equal(t, 137, code) assert.NoError(t, err) } + +func TestExpectForFailFastCommand(t *testing.T) { + ep, err := NewExpect("sh", "-c", `echo "curl: (59) failed setting cipher list"; exit 59`) + require.NoError(t, err) + + _, err = ep.Expect("failed setting cipher list") + require.NoError(t, err) +}