Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Really fix flaky TestMultipleClients tests #603

Merged
merged 4 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 36 additions & 28 deletions crypto/tls/test/tls_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@ import (
"path/filepath"
"runtime"
"strconv"
"strings"
"testing"
"time"

"github.com/gogo/status"
"github.com/hashicorp/go-cleanhttp"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -103,6 +105,7 @@ func newIntegrationClientServer(

serv, err := server.New(cfg)
require.NoError(t, err)
defer serv.Shutdown()

serv.HTTP.HandleFunc("/hello", func(w http.ResponseWriter, _ *http.Request) {
fmt.Fprintf(w, "OK")
Expand All @@ -115,36 +118,42 @@ func newIntegrationClientServer(
require.NoError(t, err)
}()

// Wait until the server is up and running
assert.Eventually(t, func() bool {
conn, err := net.DialTimeout("tcp", httpAddr.String(), 1*time.Second)
if err != nil {
t.Logf("error dialing http: %v", err)
return false
}
defer conn.Close()
grpcConn, err := net.DialTimeout("tcp", grpcAddr.String(), 1*time.Second)
if err != nil {
t.Logf("error dialing grpc: %v", err)
return false
}
defer grpcConn.Close()
return true
}, 2500*time.Millisecond, 1*time.Second, "server is not up")

httpURL := fmt.Sprintf("https://localhost:%d/hello", httpAddr.Port)
grpcHost := net.JoinHostPort("localhost", strconv.Itoa(grpcAddr.Port))

for _, tc := range tcs {
tlsClientConfig, err := tc.tlsConfig.GetTLSConfig()
require.NoError(t, err)

// HTTP
t.Run("HTTP/"+tc.name, func(t *testing.T) {
transport := &http.Transport{TLSClientConfig: tlsClientConfig}
tlsClientConfig, err := tc.tlsConfig.GetTLSConfig()
require.NoError(t, err)

transport := cleanhttp.DefaultTransport()
transport.TLSClientConfig = tlsClientConfig
client := &http.Client{Transport: transport}

resp, err := client.Get(httpURL)
cancellableCtx, cancel := context.WithCancel(context.Background())
defer cancel()

req, err := http.NewRequestWithContext(cancellableCtx, http.MethodGet, httpURL, nil)
require.NoError(t, err)

resp, err := client.Do(req)
// We retry the request a few times in case of a TCP reset (and we're expecting an error)
// Sometimes, the server resets the connection rather than sending the TLS error
// Seems that even Google have issues with RST flakiness: https://go-review.googlesource.com/c/go/+/527196
isRST := func(err error) bool {
if err == nil {
return false
}
return strings.Contains(err.Error(), "connection reset by peer") || strings.Contains(err.Error(), "broken pipe")
}
for i := 0; i < 3 && isRST(err) && tc.httpExpectError != nil; i++ {
time.Sleep(100 * time.Millisecond)
resp, err = client.Do(req)
if err == nil {
defer resp.Body.Close()
}
}
if err == nil {
defer resp.Body.Close()
}
Expand Down Expand Up @@ -175,16 +184,18 @@ func newIntegrationClientServer(
dialOptions = append([]grpc.DialOption{grpc.WithDefaultCallOptions(clientConfig.CallOptions()...)}, dialOptions...)

conn, err := grpc.NewClient(grpcHost, dialOptions...)
assert.NoError(t, err, tc.name)
require.NoError(t, err, tc.name)
require.NoError(t, err, tc.name)
defer conn.Close()

cancellableCtx, cancel := context.WithCancel(context.Background())
defer cancel()

client := grpc_health_v1.NewHealthClient(conn)

// TODO: Investigate why the client doesn't really receive the
// error about the bad certificate from the server side and just
// see connection closed instead
resp, err := client.Check(context.TODO(), &grpc_health_v1.HealthCheckRequest{})
resp, err := client.Check(cancellableCtx, &grpc_health_v1.HealthCheckRequest{})
if tc.grpcExpectError != nil {
tc.grpcExpectError(t, err)
return
Expand All @@ -194,10 +205,7 @@ func newIntegrationClientServer(
assert.Equal(t, grpc_health_v1.HealthCheckResponse_SERVING, resp.Status)
}
})

}

serv.Shutdown()
}

func TestServerWithoutTlsEnabled(t *testing.T) {
Expand Down
13 changes: 5 additions & 8 deletions kv/memberlist/memberlist_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -590,12 +590,16 @@ func TestMultipleClientsWithMixedLabelsAndExpectFailure(t *testing.T) {
// 1) ""
// 2) "label1"
// 3) "label2"
// 4) "label3"
// 5) "label4"
//
// We expect that it won't be possible to build a memberlist cluster with mixed labels.
var membersLabel = []string{
"",
"label1",
"label2",
"label3",
"label4",
}

configGen := func(i int) KVConfig {
Expand All @@ -609,7 +613,7 @@ func TestMultipleClientsWithMixedLabelsAndExpectFailure(t *testing.T) {

err := testMultipleClientsWithConfigGenerator(t, len(membersLabel), configGen)
require.Error(t, err)
require.Contains(t, err.Error(), fmt.Sprintf("expected to see %d members, got", len(membersLabel)))
require.Contains(t, err.Error(), fmt.Sprintf("expected to see at least %d updates", len(membersLabel)))
}

func TestMultipleClientsWithMixedLabelsAndClusterLabelVerificationDisabled(t *testing.T) {
Expand Down Expand Up @@ -720,7 +724,6 @@ func testMultipleClientsWithConfigGenerator(t *testing.T, members int, configGen
firstKv := clients[0]
ctx, cancel := context.WithTimeout(context.Background(), casInterval*3) // Watch for 3x cas intervals.
updates := 0
gotMembers := 0
firstKv.WatchKey(ctx, key, func(in interface{}) bool {
updates++

Expand All @@ -733,18 +736,12 @@ func testMultipleClientsWithConfigGenerator(t *testing.T, members int, configGen
"tokens, oldest timestamp:", now.Sub(time.Unix(minTimestamp, 0)).String(),
"avg timestamp:", now.Sub(time.Unix(avgTimestamp, 0)).String(),
"youngest timestamp:", now.Sub(time.Unix(maxTimestamp, 0)).String())
gotMembers = len(r.Members)
return true // yes, keep watching
})
cancel() // make linter happy

t.Logf("Ring updates observed: %d", updates)

// We expect that all members are in the ring
if gotMembers != members {
return fmt.Errorf("expected to see %d members, got %d", members, gotMembers)
}

if updates < members {
// in general, at least one update from each node. (although that's not necessarily true...
// but typically we get more updates than that anyway)
Expand Down