From cfc8a5789fd40473681882198da77accd6a6d5d9 Mon Sep 17 00:00:00 2001
From: Wei Fu <fuweid89@gmail.com>
Date: Tue, 4 Jul 2023 23:08:07 +0800
Subject: [PATCH] pkg/expect: fix data race

Let's say there is command which outputs one line and exit with error.

There are three goroutines to acquire the lock:

1. ep.read()
2. ep.waitSaveExitErr()
3. ep.Expect()

When ep.read goroutine reads the log but it doesn't acquire the lock in
time, the ep.waitSaveExitErr acquires the lock and updates the
`exitErr`. And then ep.Expect acquires lock but it doesn't see any log
yet and then returns err.

It's hard to reproduce it in local. Add the extra sleep can reproduce it.

```diff
diff --git a/pkg/expect/expect.go b/pkg/expect/expect.go
index a512a3ce4..602bea73f 100644
--- a/pkg/expect/expect.go
+++ b/pkg/expect/expect.go
@@ -128,6 +128,7 @@ func (ep *ExpectProcess) tryReadNextLine(r *bufio.Reader) error {
        printDebugLines := os.Getenv("EXPECT_DEBUG") != ""
        l, err := r.ReadString('\n')

+       time.Sleep(10 * time.Millisecond)
        ep.mu.Lock()
        defer ep.mu.Unlock()
```

See it once in Github Action [1]. In order to fix it, the patch introduces
`readCloseCh` to wait for ep.read to get all the data and retry it.

[1]: https://github.com/etcd-io/etcd/pull/16137#issuecomment-1605838518

Signed-off-by: Wei Fu <fuweid89@gmail.com>
---
 pkg/expect/expect.go      | 24 +++++++++++++++++++++++-
 pkg/expect/expect_test.go |  8 ++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/pkg/expect/expect.go b/pkg/expect/expect.go
index 9ad2c8a0e06b..1d4e980a7f9d 100644
--- a/pkg/expect/expect.go
+++ b/pkg/expect/expect.go
@@ -44,6 +44,8 @@ type ExpectProcess struct {
 	fpty *os.File
 	wg   sync.WaitGroup
 
+	readCloseCh chan struct{} // close it if async read goroutine exits
+
 	mu       sync.Mutex // protects lines, count, cur, exitErr and exitCode
 	lines    []string
 	count    int   // increment whenever new line gets added
@@ -69,6 +71,7 @@ func NewExpectWithEnv(name string, args []string, env []string, serverProcessCon
 		},
 	}
 	ep.cmd = commandFromConfig(ep.cfg)
+	ep.readCloseCh = make(chan struct{})
 
 	if ep.fpty, err = pty.Start(ep.cmd); err != nil {
 		return nil, err
@@ -100,7 +103,10 @@ func (ep *ExpectProcess) Pid() int {
 }
 
 func (ep *ExpectProcess) read() {
-	defer ep.wg.Done()
+	defer func() {
+		ep.wg.Done()
+		close(ep.readCloseCh)
+	}()
 	defer func(fpty *os.File) {
 		err := fpty.Close()
 		if err != nil {
@@ -187,9 +193,25 @@ func (ep *ExpectProcess) ExpectFunc(ctx context.Context, f func(string) bool) (s
 		}
 	}
 
+	select {
+	// NOTE: we wait readCloseCh for ep.read() to complete draining the log before acquring the lock.
+	case <-ep.readCloseCh:
+	case <-ctx.Done():
+		return "", fmt.Errorf("failed to find match string: %w", ctx.Err())
+	}
+
 	ep.mu.Lock()
 	defer ep.mu.Unlock()
 
+	// retry it since we get all the log data
+	for i < len(ep.lines) {
+		line := ep.lines[i]
+		i++
+		if f(line) {
+			return line, nil
+		}
+	}
+
 	lastLinesIndex := len(ep.lines) - DEBUG_LINES_TAIL
 	if lastLinesIndex < 0 {
 		lastLinesIndex = 0
diff --git a/pkg/expect/expect_test.go b/pkg/expect/expect_test.go
index 93dfdc4a47e6..f5029323de0f 100644
--- a/pkg/expect/expect_test.go
+++ b/pkg/expect/expect_test.go
@@ -210,3 +210,11 @@ func TestExitCodeAfterKill(t *testing.T) {
 	assert.Equal(t, 137, code)
 	assert.NoError(t, err)
 }
+
+func TestExpectForFailFastCommand(t *testing.T) {
+	ep, err := NewExpect("sh", "-c", `echo "curl: (59) failed setting cipher list"; exit 59`)
+	require.NoError(t, err)
+
+	_, err = ep.Expect("failed setting cipher list")
+	require.NoError(t, err)
+}