Skip to content

Commit

Permalink
Merge pull request #18684 from serathius/robustness-dir-cleanup
Browse files Browse the repository at this point in the history
Robustness dir cleanup
  • Loading branch information
serathius authored Oct 14, 2024
2 parents d6412f4 + 8cf1121 commit 04efee2
Show file tree
Hide file tree
Showing 9 changed files with 106 additions and 104 deletions.
10 changes: 5 additions & 5 deletions tests/robustness/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ Errors in etcd model could be causing false positives, which makes the ability t
* **For remote runs on CI:** you need to go to the [Prow Dashboard](https://prow.k8s.io/job-history/gs/kubernetes-jenkins/logs/ci-etcd-robustness-amd64), go to a build, download one of the Artifacts (`artifacts/results.zip`), and extract it locally.
![Prow job run page](./prow_job.png)
![Prow job run page](readme-images/prow_job.png)
![Prow job artifacts run page](./prow_job_artifacts_page.png)
![Prow job artifacts run page](readme-images/prow_job_artifacts_page.png)
![Prow job artifacts run page artifacts dir](./prow_job_artifacts_dir_page.png)
![Prow job artifacts run page artifacts dir](readme-images/prow_job_artifacts_dir_page.png)
Each directory will be prefixed by `TestRobustness` each containing a robustness test report.
![artifact archive](./artifact_archive.png)
![artifact archive](readme-images/artifact_archive.png)
Pick one of the directories within the archive corresponding to the failed test scenario.
The largest directory by size usually corresponds to the failed scenario.
Expand Down Expand Up @@ -134,7 +134,7 @@ Open `/tmp/TestRobustnessRegression_Issue14370/1715157774429416550/history.html`
Jump to the error in linearization by clicking `[ jump to first error ]` on the top of the page.

You should see a graph similar to the one on the image below.
![issue14370](./issue14370.png)
![issue14370](readme-images/issue14370.png)

Last correct request (connected with grey line) is a `Put` request that succeeded and got revision `168`.
All following requests are invalid (connected with red line) as they have revision `167`.
Expand Down
19 changes: 9 additions & 10 deletions tests/robustness/watch.go → tests/robustness/client/watch.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

package robustness
package client

import (
"context"
Expand All @@ -21,25 +21,24 @@ import (
"time"

"go.etcd.io/etcd/tests/v3/framework/e2e"
"go.etcd.io/etcd/tests/v3/robustness/client"
"go.etcd.io/etcd/tests/v3/robustness/identity"
"go.etcd.io/etcd/tests/v3/robustness/report"
)

func collectClusterWatchEvents(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster, maxRevisionChan <-chan int64, cfg watchConfig, baseTime time.Time, ids identity.Provider) []report.ClientReport {
func CollectClusterWatchEvents(ctx context.Context, t *testing.T, clus *e2e.EtcdProcessCluster, maxRevisionChan <-chan int64, cfg WatchConfig, baseTime time.Time, ids identity.Provider) []report.ClientReport {
mux := sync.Mutex{}
var wg sync.WaitGroup
reports := make([]report.ClientReport, len(clus.Procs))
memberMaxRevisionChans := make([]chan int64, len(clus.Procs))
for i, member := range clus.Procs {
c, err := client.NewRecordingClient(member.EndpointsGRPC(), ids, baseTime)
c, err := NewRecordingClient(member.EndpointsGRPC(), ids, baseTime)
if err != nil {
t.Fatal(err)
}
memberMaxRevisionChan := make(chan int64, 1)
memberMaxRevisionChans[i] = memberMaxRevisionChan
wg.Add(1)
go func(i int, c *client.RecordingClient) {
go func(i int, c *RecordingClient) {
defer wg.Done()
defer c.Close()
watchUntilRevision(ctx, t, c, memberMaxRevisionChan, cfg)
Expand All @@ -60,12 +59,12 @@ func collectClusterWatchEvents(ctx context.Context, t *testing.T, clus *e2e.Etcd
return reports
}

type watchConfig struct {
requestProgress bool
type WatchConfig struct {
RequestProgress bool
}

// watchUntilRevision watches all changes until context is cancelled, it has observed revision provided via maxRevisionChan or maxRevisionChan was closed.
func watchUntilRevision(ctx context.Context, t *testing.T, c *client.RecordingClient, maxRevisionChan <-chan int64, cfg watchConfig) {
func watchUntilRevision(ctx context.Context, t *testing.T, c *RecordingClient, maxRevisionChan <-chan int64, cfg WatchConfig) {
var maxRevision int64
var lastRevision int64 = 1
ctx, cancel := context.WithCancel(ctx)
Expand Down Expand Up @@ -100,7 +99,7 @@ resetWatch:
t.Logf("Watch channel closed")
continue resetWatch
}
if cfg.requestProgress {
if cfg.RequestProgress {
c.RequestProgress(ctx)
}

Expand All @@ -124,7 +123,7 @@ resetWatch:
}
}

func validateGotAtLeastOneProgressNotify(t *testing.T, reports []report.ClientReport, expectProgressNotify bool) {
func ValidateGotAtLeastOneProgressNotify(t *testing.T, reports []report.ClientReport, expectProgressNotify bool) {
var gotProgressNotify = false
external:
for _, r := range reports {
Expand Down
42 changes: 22 additions & 20 deletions tests/robustness/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@ import (

"go.etcd.io/etcd/tests/v3/framework"
"go.etcd.io/etcd/tests/v3/framework/e2e"
"go.etcd.io/etcd/tests/v3/robustness/client"
"go.etcd.io/etcd/tests/v3/robustness/failpoint"
"go.etcd.io/etcd/tests/v3/robustness/identity"
"go.etcd.io/etcd/tests/v3/robustness/model"
"go.etcd.io/etcd/tests/v3/robustness/report"
"go.etcd.io/etcd/tests/v3/robustness/scenarios"
"go.etcd.io/etcd/tests/v3/robustness/traffic"
"go.etcd.io/etcd/tests/v3/robustness/validate"
)
Expand All @@ -43,21 +45,21 @@ func TestMain(m *testing.M) {

func TestRobustnessExploratory(t *testing.T) {
testRunner.BeforeTest(t)
for _, s := range exploratoryScenarios(t) {
t.Run(s.name, func(t *testing.T) {
for _, s := range scenarios.Exploratory(t) {
t.Run(s.Name, func(t *testing.T) {
lg := zaptest.NewLogger(t)
s.cluster.Logger = lg
s.Cluster.Logger = lg
ctx := context.Background()
c, err := e2e.NewEtcdProcessCluster(ctx, t, e2e.WithConfig(&s.cluster))
c, err := e2e.NewEtcdProcessCluster(ctx, t, e2e.WithConfig(&s.Cluster))
if err != nil {
t.Fatal(err)
}
defer forcestopCluster(c)
s.failpoint, err = failpoint.PickRandom(c, s.profile)
s.Failpoint, err = failpoint.PickRandom(c, s.Profile)
if err != nil {
t.Fatal(err)
}
t.Run(s.failpoint.Name(), func(t *testing.T) {
t.Run(s.Failpoint.Name(), func(t *testing.T) {
testRobustness(ctx, t, lg, s, c)
})
})
Expand All @@ -66,12 +68,12 @@ func TestRobustnessExploratory(t *testing.T) {

func TestRobustnessRegression(t *testing.T) {
testRunner.BeforeTest(t)
for _, s := range regressionScenarios(t) {
t.Run(s.name, func(t *testing.T) {
for _, s := range scenarios.Regression(t) {
t.Run(s.Name, func(t *testing.T) {
lg := zaptest.NewLogger(t)
s.cluster.Logger = lg
s.Cluster.Logger = lg
ctx := context.Background()
c, err := e2e.NewEtcdProcessCluster(ctx, t, e2e.WithConfig(&s.cluster))
c, err := e2e.NewEtcdProcessCluster(ctx, t, e2e.WithConfig(&s.Cluster))
if err != nil {
t.Fatal(err)
}
Expand All @@ -81,7 +83,7 @@ func TestRobustnessRegression(t *testing.T) {
}
}

func testRobustness(ctx context.Context, t *testing.T, lg *zap.Logger, s testScenario, c *e2e.EtcdProcessCluster) {
func testRobustness(ctx context.Context, t *testing.T, lg *zap.Logger, s scenarios.TestScenario, c *e2e.EtcdProcessCluster) {
r := report.TestReport{Logger: lg, Cluster: c}
// t.Failed() returns false during panicking. We need to forcibly
// save data on panicking.
Expand All @@ -90,24 +92,24 @@ func testRobustness(ctx context.Context, t *testing.T, lg *zap.Logger, s testSce
defer func() {
r.Report(t, panicked)
}()
r.Client = s.run(ctx, t, lg, c)
r.Client = runScenario(ctx, t, s, lg, c)
persistedRequests, err := report.PersistedRequestsCluster(lg, c)
if err != nil {
t.Fatal(err)
}

failpointImpactingWatch := s.failpoint == failpoint.SleepBeforeSendWatchResponse
failpointImpactingWatch := s.Failpoint == failpoint.SleepBeforeSendWatchResponse
if !failpointImpactingWatch {
watchProgressNotifyEnabled := c.Cfg.ServerConfig.ExperimentalWatchProgressNotifyInterval != 0
validateGotAtLeastOneProgressNotify(t, r.Client, s.watch.requestProgress || watchProgressNotifyEnabled)
client.ValidateGotAtLeastOneProgressNotify(t, r.Client, s.Watch.RequestProgress || watchProgressNotifyEnabled)
}
validateConfig := validate.Config{ExpectRevisionUnique: s.traffic.ExpectUniqueRevision()}
validateConfig := validate.Config{ExpectRevisionUnique: s.Traffic.ExpectUniqueRevision()}
r.Visualize = validate.ValidateAndReturnVisualize(t, lg, validateConfig, r.Client, persistedRequests, 5*time.Minute)

panicked = false
}

func (s testScenario) run(ctx context.Context, t *testing.T, lg *zap.Logger, clus *e2e.EtcdProcessCluster) (reports []report.ClientReport) {
func runScenario(ctx context.Context, t *testing.T, s scenarios.TestScenario, lg *zap.Logger, clus *e2e.EtcdProcessCluster) (reports []report.ClientReport) {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
g := errgroup.Group{}
Expand All @@ -122,7 +124,7 @@ func (s testScenario) run(ctx context.Context, t *testing.T, lg *zap.Logger, clu
defer close(failpointInjected)
// Give some time for traffic to reach qps target before injecting failpoint.
time.Sleep(time.Second)
fr, err := failpoint.Inject(ctx, t, lg, clus, s.failpoint, baseTime, ids)
fr, err := failpoint.Inject(ctx, t, lg, clus, s.Failpoint, baseTime, ids)
if err != nil {
t.Error(err)
cancel()
Expand All @@ -138,14 +140,14 @@ func (s testScenario) run(ctx context.Context, t *testing.T, lg *zap.Logger, clu
maxRevisionChan := make(chan int64, 1)
g.Go(func() error {
defer close(maxRevisionChan)
operationReport = traffic.SimulateTraffic(ctx, t, lg, clus, s.profile, s.traffic, failpointInjected, baseTime, ids)
operationReport = traffic.SimulateTraffic(ctx, t, lg, clus, s.Profile, s.Traffic, failpointInjected, baseTime, ids)
maxRevision := operationsMaxRevision(operationReport)
maxRevisionChan <- maxRevision
lg.Info("Finished simulating traffic", zap.Int64("max-revision", maxRevision))
lg.Info("Finished simulating Traffic", zap.Int64("max-revision", maxRevision))
return nil
})
g.Go(func() error {
watchReport = collectClusterWatchEvents(ctx, t, clus, maxRevisionChan, s.watch, baseTime, ids)
watchReport = client.CollectClusterWatchEvents(ctx, t, clus, maxRevisionChan, s.Watch, baseTime, ids)
return nil
})
g.Wait()
Expand Down
File renamed without changes
File renamed without changes
File renamed without changes
Loading

0 comments on commit 04efee2

Please sign in to comment.