From 27cc0a3296d42baee782a774cd32e20e617721be Mon Sep 17 00:00:00 2001 From: Jonathan Chappelow Date: Fri, 16 Feb 2024 09:30:44 -0600 Subject: [PATCH] ci,test: retain data in failed integ/acct tests, test fixes This will skip removal of the temporary folder for root_dir if the test fails. The containers are still stopped and removed. This also sets the race detector options to kill kwild when a race is detected, and to write races to a file in the retained root_dir instead of stderr, which would be inaccessible after the containers are stopped and removed. Finally, this logs to kwild.log for integration tests too. It was already doing this for acceptance tests. test/acceptance: remove messy flag test: use require in validator tests when needed to avoid panics ci: use tagged golangci-lint-action tests: mitigate premature join expiry tests: wait for sync on other nodes fix error check in validator join expiry --- .github/workflows/ci.yaml | 3 +- Taskfile.yml | 3 +- cmd/kwil-admin/nodecfg/generate.go | 1 + .../compose/kwil/single/docker-compose.yml | 4 +- test/acceptance/docker-compose-dev.yml | 2 + test/acceptance/docker-compose.yml | 2 + test/acceptance/helper.go | 39 ++++++++----------- test/acceptance/kwild_test.go | 8 ---- test/go.mod | 2 +- test/integration/docker-compose.yml.template | 12 ++++++ test/integration/helper.go | 13 ++++++- test/integration/kwild_test.go | 8 +++- test/specifications/validator_ops.go | 25 ++++++------ 13 files changed, 68 insertions(+), 54 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b0b0ca73d..806ff8b13 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -84,7 +84,7 @@ jobs: go build -mod=readonly ./... ./parse/... ./core/... ./test/specifications/ - name: Lint - uses: golangci/golangci-lint-action@3cfe3a4abbb849e10058ce4af15d205b6da42804 + uses: golangci/golangci-lint-action@v4.0.0 with: install-mode: "binary" version: "latest" @@ -170,6 +170,7 @@ jobs: git_commit=${{ github.sha }} version=${{ env.GIT_TAG }} build_time=${{ env.BUILD_TIME }} + # go_race=-race file: ./build/package/docker/kwild.dockerfile push: false tags: kwild:latest diff --git a/Taskfile.yml b/Taskfile.yml index c061f7bd9..fca0e2ce2 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -157,7 +157,7 @@ tasks: cmds: - task: dev:up:nb - dev:up:nb: # run `task dev:up:nb -- -messy` if you don't want cleanup + dev:up:nb: desc: Start the dev environment without rebuilding docker image env: # NOTE: this timeout should be long enough to attach to debugger @@ -196,7 +196,6 @@ tasks: # e.g. # - task test:act:nb -- -remote # - task test:act:nb -- -drivers grpc - # - task test:act:nb -- -messy # - task test:act:nb -- -parallel-mode test:act:nb: desc: Run acceptance tests without building docker image diff --git a/cmd/kwil-admin/nodecfg/generate.go b/cmd/kwil-admin/nodecfg/generate.go index b41678a71..df918691f 100644 --- a/cmd/kwil-admin/nodecfg/generate.go +++ b/cmd/kwil-admin/nodecfg/generate.go @@ -267,6 +267,7 @@ func GenerateTestnetConfig(genCfg *TestnetGenerateConfig, opts *ConfigOpts) erro return fmt.Errorf("failed to merge config file %s: %w", genCfg.ConfigFile, err) } } + cfg.Logging.OutputPaths = append(cfg.Logging.OutputPaths, "kwild.log") privateKeys := make([]cmtEd.PrivKey, nNodes) for i := range privateKeys { diff --git a/deployments/compose/kwil/single/docker-compose.yml b/deployments/compose/kwil/single/docker-compose.yml index a0d346195..720491fb3 100644 --- a/deployments/compose/kwil/single/docker-compose.yml +++ b/deployments/compose/kwil/single/docker-compose.yml @@ -23,7 +23,7 @@ services: -c max_wal_senders=10 -c max_replication_slots=10 -c max_prepared_transactions=2 - volumes: + volumes: - pgkwil:/var/lib/postgresql/data - ./init.sql:/docker-entrypoint-initdb.d/create_user.sql networks: @@ -75,4 +75,4 @@ networks: ipam: driver: default config: - - subnet: 172.5.100.0/16 + - subnet: 172.5.100.0/23 diff --git a/test/acceptance/docker-compose-dev.yml b/test/acceptance/docker-compose-dev.yml index 3e33c7b94..71746cfa7 100644 --- a/test/acceptance/docker-compose-dev.yml +++ b/test/acceptance/docker-compose-dev.yml @@ -14,6 +14,8 @@ services: - "40000:40000" # debugger, if build with debug dockerfile #env_file: # NOTE: docker compose by default will use `.env` file if presented + environment: + GORACE: "halt_on_error=1 log_path=/app/kwil/datarace" volumes: - type: bind source: ${KWIL_HOME:-./.testnode} diff --git a/test/acceptance/docker-compose.yml b/test/acceptance/docker-compose.yml index b8a25ba5e..0381f1794 100644 --- a/test/acceptance/docker-compose.yml +++ b/test/acceptance/docker-compose.yml @@ -12,6 +12,8 @@ services: - "40000" # debugger, if build with debug dockerfile #env_file: # NOTE: docker compose by default will use `.env` file if presented + environment: + GORACE: "halt_on_error=1 log_path=/app/kwil/datarace" volumes: - type: bind source: ${KWIL_HOME:-./.testnode} diff --git a/test/acceptance/helper.go b/test/acceptance/helper.go index 48aa63ea9..9775810f7 100644 --- a/test/acceptance/helper.go +++ b/test/acceptance/helper.go @@ -46,9 +46,6 @@ type ActTestCfg struct { SchemaFile string DockerComposeFile string DockerComposeOverrideFile string - // NoCleanup is used to keep the test environment from being cleaned up - // i.e. config files, logs, containers - NoCleanup bool WaitTimeout time.Duration LogLevel string @@ -148,9 +145,6 @@ func (r *ActHelper) LoadConfig() *ActTestCfg { cfg.GasEnabled, err = strconv.ParseBool(getEnv("KACT_GAS_ENABLED", "false")) require.NoError(r.t, err, "invalid gasEnabled bool") - cfg.NoCleanup, err = strconv.ParseBool(getEnv("KACT_NO_CLEANUP", "false")) - require.NoError(r.t, err, "invalid noCleanup bool") - // value is in format of "10s" or "1m" waitTimeout := getEnv("KACT_WAIT_TIMEOUT", "10s") cfg.WaitTimeout, err = time.ParseDuration(waitTimeout) @@ -176,16 +170,17 @@ func (r *ActHelper) updateEnv(k, v string) { func (r *ActHelper) generateNodeConfig() { r.t.Logf("generate node config") - var tmpPath string - if r.cfg.NoCleanup { - var err error - tmpPath, err = os.MkdirTemp("", "TestKwilAct") - if err != nil { - r.t.Fatal(err) - } - } else { - tmpPath = r.t.TempDir() // automatically removed by testing.T.Cleanup + tmpPath, err := os.MkdirTemp("", "TestKwilAct") + if err != nil { + r.t.Fatal(err) } + r.t.Cleanup(func() { + if r.t.Failed() { + r.t.Logf("Retaining data for failed test at path %v", tmpPath) + return + } + os.RemoveAll(tmpPath) + }) r.t.Logf("created test temp directory: %s", tmpPath) @@ -195,7 +190,7 @@ func (r *ActHelper) generateNodeConfig() { } creatorIdent := hex.EncodeToString(r.cfg.CreatorSigner.Identity()) - err := nodecfg.GenerateNodeConfig(&nodecfg.NodeGenerateConfig{ + err = nodecfg.GenerateNodeConfig(&nodecfg.NodeGenerateConfig{ ChainID: TestChainID, BlockInterval: time.Second, // InitialHeight: 0, @@ -229,13 +224,11 @@ func (r *ActHelper) runDockerCompose(ctx context.Context) { dc, err := compose.NewDockerCompose(composeFiles...) require.NoError(r.t, err, "failed to create docker compose object for single kwild node") - if !r.cfg.NoCleanup { - r.t.Cleanup(func() { - r.t.Logf("teardown docker compose") - err := dc.Down(ctx, compose.RemoveOrphans(true), compose.RemoveImagesLocal, compose.RemoveVolumes(true)) - require.NoErrorf(r.t, err, "failed to teardown %s", dc.Services()) - }) - } + r.t.Cleanup(func() { + r.t.Logf("teardown docker compose") + err := dc.Down(ctx, compose.RemoveOrphans(true), compose.RemoveImagesLocal, compose.RemoveVolumes(true)) + require.NoErrorf(r.t, err, "failed to teardown %s", dc.Services()) + }) // NOTE: if you run with debugger image, you need to attach to the debugger // before the timeout diff --git a/test/acceptance/kwild_test.go b/test/acceptance/kwild_test.go index 9148eec4b..4550841ce 100644 --- a/test/acceptance/kwild_test.go +++ b/test/acceptance/kwild_test.go @@ -15,7 +15,6 @@ import ( var dev = flag.Bool("dev", false, "run for development purpose (no tests)") var remote = flag.Bool("remote", false, "test against remote node") -var noCleanup = flag.Bool("messy", false, "do not cleanup test directories or stop the docker compose when done") // NOTE: `-parallel` is a flag that is already used by `go test` var parallelMode = flag.Bool("parallel-mode", false, "run tests in parallelMode mode") @@ -34,10 +33,6 @@ func TestLocalDevSetup(t *testing.T) { cfg.DockerComposeFile = "./docker-compose-dev.yml" // use the dev compose file cfg.GasEnabled = true - if *noCleanup { - cfg.NoCleanup = true - } - helper.Setup(ctx) helper.WaitUntilInterrupt() } @@ -60,9 +55,6 @@ func TestKwildTransferAcceptance(t *testing.T) { helper := acceptance.NewActHelper(t) cfg := helper.LoadConfig() cfg.GasEnabled = true - if *noCleanup { - cfg.NoCleanup = true - } if !*remote { helper.Setup(ctx) } diff --git a/test/go.mod b/test/go.mod index 91c6fd6c7..c32a1b000 100644 --- a/test/go.mod +++ b/test/go.mod @@ -22,7 +22,6 @@ require ( github.com/testcontainers/testcontainers-go v0.27.0 github.com/testcontainers/testcontainers-go/modules/compose v0.27.0 go.uber.org/zap v1.26.0 - google.golang.org/grpc v1.60.0 ) require ( @@ -242,6 +241,7 @@ require ( google.golang.org/genproto v0.0.0-20231106174013-bbf56f31fb17 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20231106174013-bbf56f31fb17 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20231120223509-83a465c0220f // indirect + google.golang.org/grpc v1.60.0 // indirect google.golang.org/protobuf v1.31.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect diff --git a/test/integration/docker-compose.yml.template b/test/integration/docker-compose.yml.template index 16973dcd9..d1118e27b 100644 --- a/test/integration/docker-compose.yml.template +++ b/test/integration/docker-compose.yml.template @@ -11,6 +11,8 @@ services: - "26657" #env_file: # NOTE: docker compose by default will use `.env` file if presented + environment: + GORACE: "halt_on_error=1 log_path=/app/kwil/datarace" volumes: - type: bind source: ${KWIL_HOME:-./.testnet}/node0 @@ -68,6 +70,8 @@ services: - "50051" - "26656" - "26657" + environment: + GORACE: "halt_on_error=1 log_path=/app/kwil/datarace" volumes: - type: bind source: ${KWIL_HOME:-./.testnet}/node1 @@ -125,6 +129,8 @@ services: - "50051" - "26656" - "26657" + environment: + GORACE: "halt_on_error=1 log_path=/app/kwil/datarace" volumes: - type: bind source: ${KWIL_HOME:-./.testnet}/node2 @@ -185,6 +191,8 @@ services: - "50051" - "26656" - "26657" + environment: + GORACE: "halt_on_error=1 log_path=/app/kwil/datarace" volumes: - type: bind source: ${KWIL_HOME:-./.testnet}/node3 @@ -242,6 +250,8 @@ services: - "50051" - "26656" - "26657" + environment: + GORACE: "halt_on_error=1 log_path=/app/kwil/datarace" volumes: - type: bind source: ${KWIL_HOME:-./.testnet}/node4 @@ -299,6 +309,8 @@ services: - "50051" - "26656" - "26657" + environment: + GORACE: "halt_on_error=1 log_path=/app/kwil/datarace" volumes: - type: bind source: ${KWIL_HOME:-./.testnet}/node5 diff --git a/test/integration/helper.go b/test/integration/helper.go index 6f234bbff..b2a23b31b 100644 --- a/test/integration/helper.go +++ b/test/integration/helper.go @@ -433,7 +433,18 @@ func (r *IntHelper) RunDockerComposeWithServices(ctx context.Context, services [ // 4. Generate node configuration files // 5. Run docker-compose with the given services func (r *IntHelper) Setup(ctx context.Context, services []string) { - tmpDir := r.t.TempDir() + tmpDir, err := os.MkdirTemp("", "TestKwilInt") + if err != nil { + r.t.Fatal(err) + } + r.t.Cleanup(func() { + if r.t.Failed() { + r.t.Logf("Retaining data for failed test at path %v", tmpDir) + return + } + os.RemoveAll(tmpDir) + }) + r.t.Logf("create test directory: %s", tmpDir) r.prepareDockerCompose(ctx, tmpDir) diff --git a/test/integration/kwild_test.go b/test/integration/kwild_test.go index e3efac9b1..d97f208eb 100644 --- a/test/integration/kwild_test.go +++ b/test/integration/kwild_test.go @@ -78,6 +78,8 @@ func TestKwildDatabaseIntegration(t *testing.T) { // Create a new database and verify that the database exists on other nodes specifications.DatabaseDeploySpecification(ctx, t, node0Driver) + // TODO: wait for node 1 and 2 to hit whatever height 0 is at + time.Sleep(2 * time.Second) specifications.DatabaseVerifySpecification(ctx, t, node1Driver, true) specifications.DatabaseVerifySpecification(ctx, t, node2Driver, true) @@ -143,7 +145,7 @@ func TestKwildValidatorUpdatesIntegration(t *testing.T) { ctx := context.Background() - const expiryBlocks = 10 + const expiryBlocks = 20 const blockInterval = time.Second const numVals, numNonVals = 3, 1 opts := []integration.HelperOpt{ @@ -154,7 +156,7 @@ func TestKwildValidatorUpdatesIntegration(t *testing.T) { integration.WithGas(), // must give the joining node some gas too } - expiryWait := 2 * expiryBlocks * blockInterval + const expiryWait = 3 * expiryBlocks * blockInterval / 2 testDrivers := strings.Split(*drivers, ",") for _, driverType := range testDrivers { @@ -190,6 +192,7 @@ func TestKwildValidatorUpdatesIntegration(t *testing.T) { - Consensus reached, Node3 is a Validator */ specifications.ValidatorNodeJoinSpecification(ctx, t, joinerDriver, joinerPubKey, 3) + time.Sleep(2 * time.Second) // Node 0,1 approves specifications.ValidatorNodeApproveSpecification(ctx, t, node0Driver, joinerPubKey, 3, 3, false) specifications.ValidatorNodeApproveSpecification(ctx, t, node1Driver, joinerPubKey, 3, 4, true) @@ -206,6 +209,7 @@ func TestKwildValidatorUpdatesIntegration(t *testing.T) { Rejoin: (same as join process) */ specifications.ValidatorNodeJoinSpecification(ctx, t, joinerDriver, joinerPubKey, 3) + time.Sleep(2 * time.Second) // Node 0, 1 approves specifications.ValidatorNodeApproveSpecification(ctx, t, node0Driver, joinerPubKey, 3, 3, false) specifications.ValidatorNodeApproveSpecification(ctx, t, node1Driver, joinerPubKey, 3, 4, true) diff --git a/test/specifications/validator_ops.go b/test/specifications/validator_ops.go index abfa0ec5c..814f88dcf 100644 --- a/test/specifications/validator_ops.go +++ b/test/specifications/validator_ops.go @@ -2,7 +2,6 @@ package specifications import ( "context" - "errors" "testing" "time" @@ -10,8 +9,6 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" ) func CurrentValidatorsSpecification(ctx context.Context, t *testing.T, netops ValidatorOpsDsl, count int) { @@ -30,14 +27,14 @@ func ValidatorNodeJoinSpecification(ctx context.Context, t *testing.T, netops Va // Validator issues a Join request rec, err := netops.ValidatorNodeJoin(ctx) - assert.NoError(t, err) + require.NoError(t, err) // Ensure that the Tx is mined. expectTxSuccess(t, netops, ctx, rec, defaultTxQueryTimeout)() // Get Request status, #approvals = 0, #board = valCount joinStatus, err := netops.ValidatorJoinStatus(ctx, joiner) - assert.NoError(t, err) + require.NoError(t, err) assert.Equal(t, valCount, len(joinStatus.Board)) assert.Equal(t, 0, approvalCount(joinStatus)) @@ -64,7 +61,7 @@ func ValidatorNodeRemoveSpecificationV4R1(ctx context.Context, t *testing.T, n0, // node 0 sends remove tx targeting node 3 rec, err := n0.ValidatorNodeRemove(ctx, target) - assert.NoError(t, err) + require.NoError(t, err) expectTxSuccess(t, n0, ctx, rec, defaultTxQueryTimeout)() @@ -114,18 +111,18 @@ func ValidatorNodeApproveSpecification(ctx context.Context, t *testing.T, netops // Get current validator count, should be equal to preCnt vals, err := netops.ValidatorsList(ctx) - assert.NoError(t, err) + require.NoError(t, err) assert.Equal(t, preCnt, len(vals)) // Get Join Request status, #board = preCnt joinStatus, err := netops.ValidatorJoinStatus(ctx, joiner) - assert.NoError(t, err) + require.NoError(t, err) assert.Equal(t, preCnt, len(joinStatus.Board)) preApprovalCnt := approvalCount(joinStatus) // Approval Request rec, err := netops.ValidatorNodeApprove(ctx, joiner) - assert.NoError(t, err) + require.NoError(t, err) // Ensure that the Tx is mined. expectTxSuccess(t, netops, ctx, rec, defaultTxQueryTimeout)() @@ -140,7 +137,7 @@ func ValidatorNodeApproveSpecification(ctx context.Context, t *testing.T, netops assert.Error(t, err) assert.Nil(t, joinStatus) } else { - assert.NoError(t, err) + require.NoError(t, err) postApprovalCnt := approvalCount(joinStatus) assert.Equal(t, preApprovalCnt+1, postApprovalCnt) } @@ -161,7 +158,7 @@ func ValidatorNodeLeaveSpecification(ctx context.Context, t *testing.T, netops V // Validator issues a Leave request rec, err := netops.ValidatorNodeLeave(ctx) - assert.NoError(t, err) + require.NoError(t, err) // Ensure that the Validator Leave Tx is mined. expectTxSuccess(t, netops, ctx, rec, defaultTxQueryTimeout)() @@ -177,7 +174,7 @@ func approvalCount(joinStatus *types.JoinRequest) int { cnt := 0 for _, vote := range joinStatus.Approved { if vote { - cnt += 1 + cnt++ } } return cnt @@ -195,7 +192,7 @@ func ValidatorJoinExpirySpecification(ctx context.Context, t *testing.T, netops // Get Request status, #approvals = 0 joinStatus, err := netops.ValidatorJoinStatus(ctx, joiner) - assert.NoError(t, err) + require.NoError(t, err) assert.Equal(t, 0, approvalCount(joinStatus)) // Wait for the join request to expire @@ -204,6 +201,6 @@ func ValidatorJoinExpirySpecification(ctx context.Context, t *testing.T, netops // join request should be expired and removed joinStatus, err = netops.ValidatorJoinStatus(ctx, joiner) - errors.Is(err, status.Error(codes.NotFound, "no active join request")) + assert.Equal(t, err.Error(), "no active join request for that validator") assert.Nil(t, joinStatus) }