diff --git a/.github/workflows/functional-tests.yml b/.github/workflows/functional-tests.yml index 9066d8829f5..a5741896cfd 100644 --- a/.github/workflows/functional-tests.yml +++ b/.github/workflows/functional-tests.yml @@ -20,22 +20,18 @@ env: TERRAFORM_VERSION: 1.10.2 jobs: - run-deep: - name: Deep upgrade tests + run-upgrade: + name: Upgrade tests runs-on: ubuntu-latest strategy: fail-fast: false matrix: environment: - 'pro' - active-version: - - '8.17' - - '8.18' - - '8.19' - - '9.0' - - '9.1' - - '8.x' # Latest 8 version - - '9.x' # Latest 9 version + upgrade-path: + - '8.15, 8.16, 8.17' + - '8.17, 8.18, 9.0' + - '8.17, 8.19, 9.1' steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # v3.1.2 @@ -55,9 +51,7 @@ jobs: working-directory: ${{ github.workspace }}/functionaltests run: | export TF_VAR_CREATED_DATE=$(date +%s) - VERSION="${{ matrix.active-version }}" - VERSION="${VERSION//./_}" - go test -run="_to_${VERSION}" -skip="_to_${VERSION}_to_|Standalone_to_Managed" -v -timeout=30m -target="${{ matrix.environment }}" ./ + go test -run="TestUpgrade" -v -timeout=60m -target="${{ matrix.environment }}" -upgrade-path="${{ matrix.upgrade-path }}" ./ run-standalone: name: Standalone-to-managed tests @@ -67,6 +61,10 @@ jobs: matrix: environment: - 'pro' + test-name: + - 'Managed7' + - 'Managed8' + - 'Managed9' steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - uses: hashicorp/setup-terraform@b9cd54a3c349d3f38e8881555d616ced269862dd # v3.1.2 @@ -86,7 +84,7 @@ jobs: working-directory: ${{ github.workspace }}/functionaltests run: | export TF_VAR_CREATED_DATE=$(date +%s) - go test -run="Standalone_to_Managed" -v -timeout=60m -target="${{ matrix.environment }}" ./ + go test -run="TestStandaloneManaged*/${{ matrix.test-name }}" -v -timeout=60m -target="${{ matrix.environment }}" ./ # notify: # if: always() diff --git a/functionaltests/8_16_test.go b/functionaltests/8_16_test.go deleted file mode 100644 index 93bd4fa8ccb..00000000000 --- a/functionaltests/8_16_test.go +++ /dev/null @@ -1,92 +0,0 @@ -// Licensed to Elasticsearch B.V. under one or more contributor -// license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright -// ownership. Elasticsearch B.V. licenses this file to you under -// the Apache License, Version 2.0 (the "License"); you may -// not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package functionaltests - -import ( - "testing" -) - -// In 8.15, the data stream management was migrated from ILM to DSL. -// However, a bug was introduced, causing data streams to be unmanaged. -// See https://github.com/elastic/apm-server/issues/13898. -// -// It was fixed by defaulting data stream management to DSL, and eventually -// reverted back to ILM in 8.17. Therefore, data streams created in 8.15 and -// 8.16 are managed by DSL instead of ILM. - -func TestUpgrade_8_15_to_8_16_Snapshot(t *testing.T) { - t.Parallel() - from := getLatestSnapshot(t, "8.15") - to := getLatestSnapshot(t, "8.16") - if !from.CanUpgradeTo(to.Version) { - t.Skipf("upgrade from %s to %s is not allowed", from.Version, to.Version) - return - } - - scenarios := basicUpgradeLazyRolloverDSLTestScenarios( - from.Version, - to.Version, - apmErrorLogs{ - tlsHandshakeError, - esReturnedUnknown503, - preconditionFailed, - populateSourcemapServerShuttingDown, - refreshCacheCtxDeadline, - refreshCacheCtxCanceled, - // TODO: remove once fixed - populateSourcemapFetcher403, - }, - ) - for _, scenario := range scenarios { - t.Run(scenario.Name, func(t *testing.T) { - t.Parallel() - scenario.Runner.Run(t) - }) - } -} - -func TestUpgrade_8_15_to_8_16_BC(t *testing.T) { - t.Parallel() - from := getLatestVersionOrSkip(t, "8.15") - to := getLatestBCOrSkip(t, "8.16") - if !from.CanUpgradeTo(to.Version) { - t.Skipf("upgrade from %s to %s is not allowed", from.Version, to.Version) - return - } - - scenarios := basicUpgradeLazyRolloverDSLTestScenarios( - from.Version, - to.Version, - apmErrorLogs{ - tlsHandshakeError, - esReturnedUnknown503, - preconditionFailed, - populateSourcemapServerShuttingDown, - refreshCacheCtxDeadline, - refreshCacheCtxCanceled, - // TODO: remove once fixed - populateSourcemapFetcher403, - }, - ) - for _, scenario := range scenarios { - t.Run(scenario.Name, func(t *testing.T) { - t.Parallel() - scenario.Runner.Run(t) - }) - } -} diff --git a/functionaltests/8_17_test.go b/functionaltests/8_17_test.go deleted file mode 100644 index aa20e6cb4c0..00000000000 --- a/functionaltests/8_17_test.go +++ /dev/null @@ -1,138 +0,0 @@ -// Licensed to Elasticsearch B.V. under one or more contributor -// license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright -// ownership. Elasticsearch B.V. licenses this file to you under -// the Apache License, Version 2.0 (the "License"); you may -// not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package functionaltests - -import ( - "testing" - - "github.com/elastic/apm-server/functionaltests/internal/asserts" -) - -// In 8.15, the data stream management was migrated from ILM to DSL. -// However, a bug was introduced, causing data streams to be unmanaged. -// See https://github.com/elastic/apm-server/issues/13898. -// -// It was fixed by defaulting data stream management to DSL, and eventually -// reverted back to ILM in 8.17. Therefore, data streams created in 8.15 and -// 8.16 are managed by DSL instead of ILM. - -func TestUpgrade_8_16_to_8_17_Snapshot(t *testing.T) { - t.Parallel() - from := getLatestSnapshot(t, "8.16") - to := getLatestSnapshot(t, "8.17") - if !from.CanUpgradeTo(to.Version) { - t.Skipf("upgrade from %s to %s is not allowed", from.Version, to.Version) - return - } - - scenarios := allBasicUpgradeScenarios( - from.Version, - to.Version, - // Data streams managed by DSL pre-upgrade. - asserts.CheckDataStreamsWant{ - Quantity: 8, - PreferIlm: false, - DSManagedBy: managedByDSL, - IndicesPerDS: 1, - IndicesManagedBy: []string{managedByDSL}, - }, - // Data streams managed by ILM post-upgrade. - // However, the index created before upgrade is still managed by DSL. - asserts.CheckDataStreamsWant{ - Quantity: 8, - PreferIlm: true, - DSManagedBy: managedByILM, - IndicesPerDS: 1, - IndicesManagedBy: []string{managedByDSL}, - }, - // Verify lazy rollover happened, i.e. 2 indices. - asserts.CheckDataStreamsWant{ - Quantity: 8, - PreferIlm: true, - DSManagedBy: managedByILM, - IndicesPerDS: 2, - IndicesManagedBy: []string{managedByDSL, managedByILM}, - }, - apmErrorLogs{ - tlsHandshakeError, - esReturnedUnknown503, - refreshCache503, - refreshCacheCtxCanceled, - populateSourcemapFetcher403, - }, - ) - for _, scenario := range scenarios { - t.Run(scenario.Name, func(t *testing.T) { - t.Parallel() - scenario.Runner.Run(t) - }) - } -} - -func TestUpgrade_8_16_to_8_17_BC(t *testing.T) { - t.Parallel() - from := getLatestVersionOrSkip(t, "8.16") - to := getLatestBCOrSkip(t, "8.17") - if !from.CanUpgradeTo(to.Version) { - t.Skipf("upgrade from %s to %s is not allowed", from.Version, to.Version) - return - } - - scenarios := allBasicUpgradeScenarios( - from.Version, - to.Version, - // Data streams managed by DSL pre-upgrade. - asserts.CheckDataStreamsWant{ - Quantity: 8, - PreferIlm: false, - DSManagedBy: managedByDSL, - IndicesPerDS: 1, - IndicesManagedBy: []string{managedByDSL}, - }, - // Data streams managed by ILM post-upgrade. - // However, the index created before upgrade is still managed by DSL. - asserts.CheckDataStreamsWant{ - Quantity: 8, - PreferIlm: true, - DSManagedBy: managedByILM, - IndicesPerDS: 1, - IndicesManagedBy: []string{managedByDSL}, - }, - // Verify lazy rollover happened, i.e. 2 indices. - asserts.CheckDataStreamsWant{ - Quantity: 8, - PreferIlm: true, - DSManagedBy: managedByILM, - IndicesPerDS: 2, - IndicesManagedBy: []string{managedByDSL, managedByILM}, - }, - apmErrorLogs{ - tlsHandshakeError, - esReturnedUnknown503, - refreshCache503, - refreshCacheCtxCanceled, - populateSourcemapFetcher403, - }, - ) - for _, scenario := range scenarios { - t.Run(scenario.Name, func(t *testing.T) { - t.Parallel() - scenario.Runner.Run(t) - }) - } -} diff --git a/functionaltests/8_18_test.go b/functionaltests/8_18_test.go deleted file mode 100644 index be3966ba10d..00000000000 --- a/functionaltests/8_18_test.go +++ /dev/null @@ -1,76 +0,0 @@ -// Licensed to Elasticsearch B.V. under one or more contributor -// license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright -// ownership. Elasticsearch B.V. licenses this file to you under -// the Apache License, Version 2.0 (the "License"); you may -// not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package functionaltests - -import ( - "testing" -) - -func TestUpgrade_8_17_to_8_18_Snapshot(t *testing.T) { - t.Parallel() - from := getLatestSnapshot(t, "8.17") - to := getLatestSnapshot(t, "8.18") - if !from.CanUpgradeTo(to.Version) { - t.Skipf("upgrade from %s to %s is not allowed", from.Version, to.Version) - return - } - - scenarios := basicUpgradeILMTestScenarios( - from.Version, - to.Version, - apmErrorLogs{ - tlsHandshakeError, - esReturnedUnknown503, - refreshCache503, - populateSourcemapFetcher403, - }, - ) - for _, scenario := range scenarios { - t.Run(scenario.Name, func(t *testing.T) { - t.Parallel() - scenario.Runner.Run(t) - }) - } -} - -func TestUpgrade_8_17_to_8_18_BC(t *testing.T) { - t.Parallel() - from := getLatestVersionOrSkip(t, "8.17") - to := getLatestBCOrSkip(t, "8.18") - if !from.CanUpgradeTo(to.Version) { - t.Skipf("upgrade from %s to %s is not allowed", from.Version, to.Version) - return - } - - scenarios := basicUpgradeILMTestScenarios( - from.Version, - to.Version, - apmErrorLogs{ - tlsHandshakeError, - esReturnedUnknown503, - refreshCache503, - populateSourcemapFetcher403, - }, - ) - for _, scenario := range scenarios { - t.Run(scenario.Name, func(t *testing.T) { - t.Parallel() - scenario.Runner.Run(t) - }) - } -} diff --git a/functionaltests/8_19_test.go b/functionaltests/8_19_test.go deleted file mode 100644 index 194d716c363..00000000000 --- a/functionaltests/8_19_test.go +++ /dev/null @@ -1,81 +0,0 @@ -// Licensed to Elasticsearch B.V. under one or more contributor -// license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright -// ownership. Elasticsearch B.V. licenses this file to you under -// the Apache License, Version 2.0 (the "License"); you may -// not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package functionaltests - -import ( - "testing" -) - -// Data streams get marked for lazy rollover by ES when something -// changed in the underlying template(s), which in this case is -// the apm-data plugin update for 8.19 and 9.1: -// https://github.com/elastic/elasticsearch/pull/119995. - -func TestUpgrade_8_18_to_8_19_Snapshot(t *testing.T) { - t.Parallel() - from := getLatestSnapshot(t, "8.18") - to := getLatestSnapshot(t, "8.19") - if !from.CanUpgradeTo(to.Version) { - t.Skipf("upgrade from %s to %s is not allowed", from.Version, to.Version) - return - } - - scenarios := basicUpgradeLazyRolloverILMTestScenarios( - from.Version, - to.Version, - apmErrorLogs{ - tlsHandshakeError, - esReturnedUnknown503, - refreshCache503, - populateSourcemapFetcher403, - }, - ) - for _, scenario := range scenarios { - t.Run(scenario.Name, func(t *testing.T) { - t.Parallel() - scenario.Runner.Run(t) - }) - } -} - -func TestUpgrade_8_18_to_8_19_BC(t *testing.T) { - t.Parallel() - from := getLatestVersionOrSkip(t, "8.18") - to := getLatestBCOrSkip(t, "8.19") - if !from.CanUpgradeTo(to.Version) { - t.Skipf("upgrade from %s to %s is not allowed", from.Version, to.Version) - return - } - - scenarios := basicUpgradeLazyRolloverILMTestScenarios( - from.Version, - to.Version, - apmErrorLogs{ - tlsHandshakeError, - esReturnedUnknown503, - refreshCache503, - populateSourcemapFetcher403, - }, - ) - for _, scenario := range scenarios { - t.Run(scenario.Name, func(t *testing.T) { - t.Parallel() - scenario.Runner.Run(t) - }) - } -} diff --git a/functionaltests/9_0_test.go b/functionaltests/9_0_test.go deleted file mode 100644 index d087fb20cd5..00000000000 --- a/functionaltests/9_0_test.go +++ /dev/null @@ -1,78 +0,0 @@ -// Licensed to Elasticsearch B.V. under one or more contributor -// license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright -// ownership. Elasticsearch B.V. licenses this file to you under -// the Apache License, Version 2.0 (the "License"); you may -// not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package functionaltests - -import ( - "testing" -) - -func TestUpgrade_8_18_to_9_0_Snapshot(t *testing.T) { - t.Parallel() - from := getLatestSnapshot(t, "8.18") - to := getLatestSnapshot(t, "9.0") - if !from.CanUpgradeTo(to.Version) { - t.Skipf("upgrade from %s to %s is not allowed", from.Version, to.Version) - return - } - - scenarios := basicUpgradeILMTestScenarios( - from.Version, - to.Version, - apmErrorLogs{ - tlsHandshakeError, - esReturnedUnknown503, - refreshCache503, - // TODO: remove once fixed - populateSourcemapFetcher403, - }, - ) - for _, scenario := range scenarios { - t.Run(scenario.Name, func(t *testing.T) { - t.Parallel() - scenario.Runner.Run(t) - }) - } -} - -func TestUpgrade_8_18_to_9_0_BC(t *testing.T) { - t.Parallel() - from := getLatestVersionOrSkip(t, "8.18") - to := getLatestBCOrSkip(t, "9.0") - if !from.CanUpgradeTo(to.Version) { - t.Skipf("upgrade from %s to %s is not allowed", from.Version, to.Version) - return - } - - scenarios := basicUpgradeILMTestScenarios( - from.Version, - to.Version, - apmErrorLogs{ - tlsHandshakeError, - esReturnedUnknown503, - refreshCache503, - // TODO: remove once fixed - populateSourcemapFetcher403, - }, - ) - for _, scenario := range scenarios { - t.Run(scenario.Name, func(t *testing.T) { - t.Parallel() - scenario.Runner.Run(t) - }) - } -} diff --git a/functionaltests/9_1_test.go b/functionaltests/9_1_test.go deleted file mode 100644 index 4570b2bd1b8..00000000000 --- a/functionaltests/9_1_test.go +++ /dev/null @@ -1,143 +0,0 @@ -// Licensed to Elasticsearch B.V. under one or more contributor -// license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright -// ownership. Elasticsearch B.V. licenses this file to you under -// the Apache License, Version 2.0 (the "License"); you may -// not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package functionaltests - -import ( - "testing" -) - -// Data streams get marked for lazy rollover by ES when something -// changed in the underlying template(s), which in this case is -// the apm-data plugin update for 8.19 and 9.1: -// https://github.com/elastic/elasticsearch/pull/119995. - -func TestUpgrade_9_0_to_9_1_Snapshot(t *testing.T) { - t.Parallel() - from := getLatestSnapshot(t, "9.0") - to := getLatestSnapshot(t, "9.1") - if !from.CanUpgradeTo(to.Version) { - t.Skipf("upgrade from %s to %s is not allowed", from.Version, to.Version) - return - } - - scenarios := basicUpgradeLazyRolloverILMTestScenarios( - from.Version, - to.Version, - apmErrorLogs{ - tlsHandshakeError, - esReturnedUnknown503, - refreshCache503, - populateSourcemapFetcher403, - refreshCache403, - refreshCacheESConfigInvalid, - }, - ) - for _, scenario := range scenarios { - t.Run(scenario.Name, func(t *testing.T) { - t.Parallel() - scenario.Runner.Run(t) - }) - } -} - -func TestUpgrade_9_0_to_9_1_BC(t *testing.T) { - t.Parallel() - from := getLatestVersionOrSkip(t, "9.0") - to := getLatestBCOrSkip(t, "9.1") - if !from.CanUpgradeTo(to.Version) { - t.Skipf("upgrade from %s to %s is not allowed", from.Version, to.Version) - return - } - - scenarios := basicUpgradeLazyRolloverILMTestScenarios( - from.Version, - to.Version, - apmErrorLogs{ - tlsHandshakeError, - esReturnedUnknown503, - refreshCache503, - populateSourcemapFetcher403, - refreshCache403, - refreshCacheESConfigInvalid, - }, - ) - for _, scenario := range scenarios { - t.Run(scenario.Name, func(t *testing.T) { - t.Parallel() - scenario.Runner.Run(t) - }) - } -} - -func TestUpgrade_8_19_to_9_1_Snapshot(t *testing.T) { - t.Parallel() - from := getLatestSnapshot(t, "8.19") - to := getLatestSnapshot(t, "9.1") - if !from.CanUpgradeTo(to.Version) { - t.Skipf("upgrade from %s to %s is not allowed", from.Version, to.Version) - return - } - - scenarios := basicUpgradeILMTestScenarios( - from.Version, - to.Version, - apmErrorLogs{ - tlsHandshakeError, - esReturnedUnknown503, - refreshCache503, - populateSourcemapFetcher403, - refreshCache403, - refreshCacheESConfigInvalid, - }, - ) - for _, scenario := range scenarios { - t.Run(scenario.Name, func(t *testing.T) { - t.Parallel() - scenario.Runner.Run(t) - }) - } -} - -func TestUpgrade_8_19_to_9_1_BC(t *testing.T) { - t.Parallel() - from := getLatestVersionOrSkip(t, "8.19") - to := getLatestBCOrSkip(t, "9.1") - if !from.CanUpgradeTo(to.Version) { - t.Skipf("upgrade from %s to %s is not allowed", from.Version, to.Version) - return - } - - scenarios := basicUpgradeILMTestScenarios( - from.Version, - to.Version, - apmErrorLogs{ - tlsHandshakeError, - esReturnedUnknown503, - refreshCache503, - populateSourcemapFetcher403, - refreshCache403, - refreshCacheESConfigInvalid, - }, - ) - for _, scenario := range scenarios { - t.Run(scenario.Name, func(t *testing.T) { - t.Parallel() - scenario.Runner.Run(t) - }) - } -} diff --git a/functionaltests/README.md b/functionaltests/README.md index 8eb72d2d3c7..2416a6f5444 100644 --- a/functionaltests/README.md +++ b/functionaltests/README.md @@ -4,64 +4,67 @@ The functional tests test that APM Server works as expected after version upgrades. -## Running the Tests - -To run the tests, you will first need to set the `EC_API_KEY` environment variable, which can be obtained by following -[this guide](https://www.elastic.co/guide/en/cloud/current/ec-api-authentication.html). +## Code Details -Then, from the current directory, simply run: -```sh -go test -v -timeout=30m -cleanup-on-failure=false -target="pro" ./ +The following is a simplified directory structure of functional tests. ``` - -You can also specify a specific test you want to run, for example: -```sh -go test -run=TestUpgrade_8_18_to_9_0 -v -timeout=30m -cleanup-on-failure=false -target="pro" ./ +- functionaltests/ + |- infra/ + |- internal/ + |- main_test.go + |- standalone_test.go + |- upgrade_test.go ``` -Note: Before running tests, make sure to delete the Terraforms by running `rm -r tf-*`. +The `internal/` directory contains helper packages used in the tests, e.g. Elasticsearch, Kibana client wrapper etc. -### Debugging the Tests +The `infra/` directory contains infrastructure related code. In our case, we use Terraform for deploying the stack in Elastic Cloud. +The Terraform files are located in `infra/terraform`, and are copied into `tf-/` e.g. `tf-TestUpgrade_8_19_to_9_0/`, at the start of each test (since Terraform saves state in the directory it is initialized in). -If you get some errors after running the test, you can try heading to the [Elastic Cloud console](https://cloud.elastic.co/home) -in order to access the Kibana instance. From there, you can use Dev Tools to check the data streams etc. +The rest are test files / utility functions for the tests. -Note: If the tests failed due to deployment, you may need to access the Elastic Cloud admin console instead to check the -deployment errors. +### Upgrade Tests -## Code Structure +The upgrade tests reside in `upgrade_test.go`. -The following is the simplified directory structure of functional tests. -``` -- functionaltests/ - |- infra/ - |- internal/ - |- main_test.go - |- x_y_test.go -``` +These tests take an `upgrade-path` argument that represents a list of versions for the upgrade test. +The test will create a deployment with the first version, perform ingestion and check that everything is expected. +Then, it will consecutively upgrade to the next version, perform ingestion and check again. +For example, if we provide an `upgrade-path` of `8.15, 8.16, 8.17`, a deployment will be created in `8.15`, upgraded to `8.16` and then to `8.17`. -All the functional tests are written in the current directory. +We provide the `upgrade-path` argument through GitHub workflow matrix, see `functional-tests.yml`. +Configuration for the upgrade test can be found in `upgrade-config.yaml`. -The `internal/` directory contains helper packages used in the tests, e.g. Elasticsearch, Kibana client wrapper etc. +### Standalone-to-Managed Tests -The `infra/` directory contains infrastructure related code. In our case, we use Terraform for deploying the stack in -Elastic Cloud. The Terraform files are located in `infra/terraform`, and are copied into `tf-/` e.g. -`tf-TestUpgrade_8_19_to_9_0/`, at the start of each test (since Terraform saves state in the directory it is initialized -in). +The standalone-to-managed tests reside in `standalone_test.go` -### Upgrade Tests +These tests test the migration from standalone APM Server to Fleet-managed. Currently, the tests include: +- 7.x standalone -> 8.x standalone -> 8.x managed -> 9.x managed +- 7.x standalone -> 8.x standalone -> 9.x standalone -> 9.x managed +- 7.x standalone -> 7.x managed -> 8.x managed -> 9.x managed + +## Running the Tests -We suggest each upgrade test to be named in the format of `TestUpgrade__to_[_to_]*[_]?`. -This means that the test will start from `from_version`, and be upgraded to `to_version_1`, then subsequently to -`to_version_2` etc. all the way to `to_version_N`. +To run the tests, you will first need to set the `EC_API_KEY` environment variable, which can be obtained by following [this guide](https://www.elastic.co/guide/en/cloud/current/ec-api-authentication.html). -The upgrade tests are implemented in each version test file. The test file is named after the last version of the upgrade -chain. For example, `TestUpgrade_8_15_to_8_16` will be in `8_16_test.go`. +### Upgrade Tests + +For upgrade tests: +```sh +go test -run=TestUpgrade_UpgradePath -v -timeout=60m -cleanup-on-failure=false -target="pro" -upgrade-path="" ./ +``` ### Standalone-to-Managed Tests -If the standalone-to-managed test includes an upgrade, simply add `Standalone_to_Managed` at the end of the test name, -e.g. `TestUpgrade_7_17_to_8_x_Standalone_to_Managed`. Otherwise, if there is no upgrade simply omit the `Upgrade` -prefix, e.g. `Test_7_17_Standalone_to_Managed`. +For standalone tests: +```sh +go test -run=TestStandaloneManaged -v -timeout=60m -cleanup-on-failure=false -target="pro" ./ +``` + +## Debugging the Tests + +If you get some errors after running the test, you can try heading to the [Elastic Cloud console](https://cloud.elastic.co/home) in order to access the Kibana instance. +From there, you can use Dev Tools to check the data streams etc. -The standalone-to-managed tests are implemented in `standalone_test.go`. \ No newline at end of file +Note: If the tests failed due to deployment or in CI, you may need to access the Elastic Cloud admin console instead to check the errors. diff --git a/functionaltests/basic_upgrade.go b/functionaltests/basic_upgrade.go deleted file mode 100644 index 954838d39ac..00000000000 --- a/functionaltests/basic_upgrade.go +++ /dev/null @@ -1,190 +0,0 @@ -// Licensed to Elasticsearch B.V. under one or more contributor -// license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright -// ownership. Elasticsearch B.V. licenses this file to you under -// the Apache License, Version 2.0 (the "License"); you may -// not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package functionaltests - -import ( - "context" - "testing" - - "github.com/elastic/apm-server/functionaltests/internal/asserts" - "github.com/elastic/apm-server/functionaltests/internal/ecclient" -) - -type basicUpgradeTestScenario struct { - Name string - Runner testStepsRunner -} - -// basicUpgradeILMTestScenarios returns all scenarios for basic upgrade test -// from `fromVersion` to `toVersion`. The test assumes that all data streams -// (before and after upgrade) are using Index Lifecycle Management (ILM) -// instead of Data Stream Lifecycle Management (DSL), which should be the case -// for most recent APM data streams. -func basicUpgradeILMTestScenarios( - fromVersion ecclient.StackVersion, - toVersion ecclient.StackVersion, - apmErrorLogsIgnored apmErrorLogs, -) []basicUpgradeTestScenario { - checkILM := asserts.CheckDataStreamsWant{ - Quantity: 8, - PreferIlm: true, - DSManagedBy: managedByILM, - IndicesPerDS: 1, - IndicesManagedBy: []string{managedByILM}, - } - - return allBasicUpgradeScenarios( - fromVersion, toVersion, - checkILM, checkILM, checkILM, - apmErrorLogsIgnored, - ) -} - -// basicUpgradeLazyRolloverILMTestScenarios returns all scenarios for basic -// upgrade test from `fromVersion` to `toVersion`. The test assumes that all -// data streams (before and after upgrade) are using Index Lifecycle Management -// (ILM) instead of Data Stream Lifecycle Management (DSL), which should be the -// case for most recent APM data streams. It will also verify that lazy -// rollover happened on post-upgrade ingestion. -func basicUpgradeLazyRolloverILMTestScenarios( - fromVersion ecclient.StackVersion, - toVersion ecclient.StackVersion, - apmErrorLogsIgnored apmErrorLogs, -) []basicUpgradeTestScenario { - // All data streams should be managed by ILM. - checkILM := asserts.CheckDataStreamsWant{ - Quantity: 8, - PreferIlm: true, - DSManagedBy: managedByILM, - IndicesPerDS: 1, - IndicesManagedBy: []string{managedByILM}, - } - // Verify lazy rollover happened, i.e. 2 indices per data stream. - checkILMRollover := asserts.CheckDataStreamsWant{ - Quantity: 8, - PreferIlm: true, - DSManagedBy: managedByILM, - IndicesPerDS: 2, - IndicesManagedBy: []string{managedByILM, managedByILM}, - } - - return allBasicUpgradeScenarios( - fromVersion, toVersion, - checkILM, checkILM, checkILMRollover, - apmErrorLogsIgnored, - ) -} - -// basicUpgradeLazyRolloverDSLTestScenarios returns all scenarios for basic -// upgrade test from `fromVersion` to `toVersion`. The test assumes that all -// data streams (before and after upgrade) are using Data Stream Lifecycle -// Management (DSL) instead of Index Lifecycle Management (ILM). It will also -// verify that lazy rollover happened on post-upgrade ingestion. -func basicUpgradeLazyRolloverDSLTestScenarios( - fromVersion ecclient.StackVersion, - toVersion ecclient.StackVersion, - apmErrorLogsIgnored apmErrorLogs, -) []basicUpgradeTestScenario { - // All data streams should be managed by DSL. - checkDSL := asserts.CheckDataStreamsWant{ - Quantity: 8, - PreferIlm: false, - DSManagedBy: managedByDSL, - IndicesPerDS: 1, - IndicesManagedBy: []string{managedByDSL}, - } - // Verify lazy rollover happened, i.e. 2 indices per data stream. - checkDSLRollover := asserts.CheckDataStreamsWant{ - Quantity: 8, - PreferIlm: false, - DSManagedBy: managedByDSL, - IndicesPerDS: 2, - IndicesManagedBy: []string{managedByDSL, managedByDSL}, - } - - return allBasicUpgradeScenarios( - fromVersion, toVersion, - checkDSL, checkDSL, checkDSLRollover, - apmErrorLogsIgnored, - ) -} - -// allBasicUpgradeScenarios returns all basic upgrade test scenarios. -// The scenarios involved are: -// -// - Default: The cluster is created, some data is ingested and the first -// check ensures that it's in the expected state. Then, an upgrade -// is triggered, and a second check confirms that the state did not -// drift after upgrade. A new ingestion is performed, and a third -// check verifies that ingestion works as expected after upgrade. -// Finally, error logs are examined to ensure there are no unexpected -// errors. -// -// - Reroute: Same as Default scenario, except after the cluster is created, -// we insert a reroute ingest pipeline to reroute all APM data streams to -// a new namespace. This test is to ensure that APM data streams rerouting -// still works as expected across ingestion and upgrade. -// See https://github.com/elastic/apm-server/issues/14060 for motivation. -func allBasicUpgradeScenarios( - fromVersion ecclient.StackVersion, - toVersion ecclient.StackVersion, - checkPreUpgradeAfterIngest asserts.CheckDataStreamsWant, - checkPostUpgradeBeforeIngest asserts.CheckDataStreamsWant, - checkPostUpgradeAfterIngest asserts.CheckDataStreamsWant, - apmErrorLogsIgnored apmErrorLogs, -) []basicUpgradeTestScenario { - var scenarios []basicUpgradeTestScenario - - // Default - scenarios = append(scenarios, basicUpgradeTestScenario{ - Name: "Default", - Runner: testStepsRunner{ - Steps: []testStep{ - createStep{DeployVersion: fromVersion}, - ingestStep{CheckDataStream: checkPreUpgradeAfterIngest}, - upgradeStep{NewVersion: toVersion, CheckDataStream: checkPostUpgradeBeforeIngest}, - ingestStep{CheckDataStream: checkPostUpgradeAfterIngest}, - checkErrorLogsStep{APMErrorLogsIgnored: apmErrorLogsIgnored}, - }, - }, - }) - - // Reroute - rerouteNamespace := "rerouted" - setupFn := stepFunc(func(t *testing.T, ctx context.Context, e *testStepEnv, previousRes testStepResult) testStepResult { - t.Log("create reroute processors") - createRerouteIngestPipeline(t, ctx, e.esc, rerouteNamespace) - return previousRes - }) - scenarios = append(scenarios, basicUpgradeTestScenario{ - Name: "Reroute", - Runner: testStepsRunner{ - DataStreamNamespace: rerouteNamespace, - Steps: []testStep{ - createStep{DeployVersion: fromVersion}, - customStep{Func: setupFn}, - ingestStep{CheckDataStream: checkPreUpgradeAfterIngest}, - upgradeStep{NewVersion: toVersion, CheckDataStream: checkPostUpgradeBeforeIngest}, - ingestStep{CheckDataStream: checkPostUpgradeAfterIngest}, - checkErrorLogsStep{APMErrorLogsIgnored: apmErrorLogsIgnored}, - }, - }, - }) - - return scenarios -} diff --git a/functionaltests/internal/asserts/data_streams.go b/functionaltests/internal/asserts/data_streams.go index 4baa34c720e..62eb9d0c416 100644 --- a/functionaltests/internal/asserts/data_streams.go +++ b/functionaltests/internal/asserts/data_streams.go @@ -21,61 +21,32 @@ import ( "testing" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "github.com/elastic/go-elasticsearch/v8/typedapi/types" ) -type CheckDataStreamsWant struct { - Quantity int +type DataStreamExpectation struct { DSManagedBy string PreferIlm bool - IndicesPerDS int IndicesManagedBy []string } -// CheckDataStreams asserts expected values on specific data streams. -func CheckDataStreams(t *testing.T, expected CheckDataStreamsWant, actual []types.DataStream) { +// DataStreamsMeetExpectation asserts that each data stream have expected values individually. +func DataStreamsMeetExpectation(t *testing.T, expected map[string]DataStreamExpectation, actual []types.DataStream) { t.Helper() - // Preliminarily check that these two are matching, to avoid panic later. - require.Len(t, expected.IndicesManagedBy, expected.IndicesPerDS, - "length of IndicesManagedBy should be equal to IndicesPerDS") - assert.Len(t, actual, expected.Quantity, "number of APM data streams differs from expectations") - - for _, v := range actual { - if expected.PreferIlm { - assert.True(t, v.PreferIlm, "data stream %s should prefer ILM", v.Name) - } else { - assert.False(t, v.PreferIlm, "data stream %s should not prefer ILM", v.Name) - } + assert.Len(t, actual, len(expected), "number of APM data streams differs from expectations") - assert.Equal(t, expected.DSManagedBy, v.NextGenerationManagedBy.Name, - `data stream %s should be managed by "%s"`, v.Name, expected.DSManagedBy, - ) - assert.Len(t, v.Indices, expected.IndicesPerDS, - "data stream %s should have %d indices", v.Name, expected.IndicesPerDS, - ) - for i, index := range v.Indices { - assert.Equal(t, expected.IndicesManagedBy[i], index.ManagedBy.Name, - `index %s should be managed by "%s"`, index.IndexName, - expected.IndicesManagedBy[i], - ) + // Check that all expected data streams appear. + mp := dataStreamsMap(actual) + for ds := range expected { + if _, ok := mp[ds]; !ok { + t.Errorf("expected data stream %s not found", ds) + continue } } -} - -type CheckDataStreamIndividualWant struct { - DSManagedBy string - PreferIlm bool - IndicesManagedBy []string -} - -func CheckDataStreamsIndividually(t *testing.T, expected map[string]CheckDataStreamIndividualWant, actual []types.DataStream) { - t.Helper() - - assert.Len(t, actual, len(expected), "number of APM data streams differs from expectations") + // Check that data streams are in expected state. for _, v := range actual { e, ok := expected[v.Name] if !ok { @@ -83,23 +54,36 @@ func CheckDataStreamsIndividually(t *testing.T, expected map[string]CheckDataStr continue } - if e.PreferIlm { - assert.True(t, v.PreferIlm, "data stream %s should prefer ILM", v.Name) - } else { - assert.False(t, v.PreferIlm, "data stream %s should not prefer ILM", v.Name) - } + checkSingleDataStream(t, e, v) + } +} - assert.Equal(t, e.DSManagedBy, v.NextGenerationManagedBy.Name, - `data stream %s should be managed by "%s"`, v.Name, e.DSManagedBy, - ) - assert.Len(t, v.Indices, len(e.IndicesManagedBy), - "data stream %s should have %d indices", v.Name, len(e.IndicesManagedBy), +func dataStreamsMap(dataStreams []types.DataStream) map[string]types.DataStream { + result := make(map[string]types.DataStream) + for _, dataStream := range dataStreams { + result[dataStream.Name] = dataStream + } + return result +} + +func checkSingleDataStream(t *testing.T, expected DataStreamExpectation, actual types.DataStream) { + if expected.PreferIlm { + assert.True(t, actual.PreferIlm, "data stream %s should prefer ILM", actual.Name) + } else { + assert.False(t, actual.PreferIlm, "data stream %s should not prefer ILM", actual.Name) + } + + assert.Equal(t, expected.DSManagedBy, actual.NextGenerationManagedBy.Name, + `data stream %s should be managed by "%s"`, actual.Name, expected.DSManagedBy, + ) + + assert.Len(t, actual.Indices, len(expected.IndicesManagedBy), + "data stream %s should have %d indices", actual.Name, len(expected.IndicesManagedBy), + ) + for i, index := range actual.Indices { + assert.Equal(t, expected.IndicesManagedBy[i], index.ManagedBy.Name, + `index %s should be managed by "%s"`, index.IndexName, + expected.IndicesManagedBy[i], ) - for i, index := range v.Indices { - assert.Equal(t, e.IndicesManagedBy[i], index.ManagedBy.Name, - `index %s should be managed by "%s"`, index.IndexName, - e.IndicesManagedBy[i], - ) - } } } diff --git a/functionaltests/internal/asserts/data_streams_test.go b/functionaltests/internal/asserts/data_streams_test.go index 60f8fd529a3..5a1be57b611 100644 --- a/functionaltests/internal/asserts/data_streams_test.go +++ b/functionaltests/internal/asserts/data_streams_test.go @@ -31,17 +31,22 @@ import ( func TestCheckDataStreams(t *testing.T) { tests := []struct { name string - expected asserts.CheckDataStreamsWant + expected map[string]asserts.DataStreamExpectation dss []types.DataStream }{ { name: "default", - expected: asserts.CheckDataStreamsWant{ - Quantity: 2, - DSManagedBy: "Index Lifecycle Management", - PreferIlm: true, - IndicesPerDS: 2, - IndicesManagedBy: []string{"Data Stream Lifecycle", "Index Lifecycle Management"}, + expected: map[string]asserts.DataStreamExpectation{ + "logs-apm.error-default": { + DSManagedBy: "Index Lifecycle Management", + PreferIlm: true, + IndicesManagedBy: []string{"Data Stream Lifecycle", "Index Lifecycle Management"}, + }, + "metrics-apm.app.opbeans_python-default": { + DSManagedBy: "Index Lifecycle Management", + PreferIlm: true, + IndicesManagedBy: []string{"Data Stream Lifecycle", "Index Lifecycle Management"}, + }, }, dss: []types.DataStream{ { @@ -79,7 +84,7 @@ func TestCheckDataStreams(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - asserts.CheckDataStreams(t, tt.expected, tt.dss) + asserts.DataStreamsMeetExpectation(t, tt.expected, tt.dss) assert.False(t, t.Failed()) }) } diff --git a/functionaltests/internal/asserts/doc_count.go b/functionaltests/internal/asserts/doc_count.go index 70cd1248ebe..bfe8131e992 100644 --- a/functionaltests/internal/asserts/doc_count.go +++ b/functionaltests/internal/asserts/doc_count.go @@ -18,7 +18,6 @@ package asserts import ( - "fmt" "testing" "github.com/stretchr/testify/assert" @@ -26,44 +25,54 @@ import ( "github.com/elastic/apm-server/functionaltests/internal/esclient" ) -// CheckDocCount checks if difference in data stream document count between -// current and previous state is equal to the expected, barring the skipped -// data streams. -// -// NOTE: Ingestion should never remove documents. If the expectedDiff is -// negative, the data stream will be expected to appear, but the document -// count will not be asserted. -func CheckDocCount( - t *testing.T, - currDocCount, prevDocCount, expectedDiff esclient.DataStreamsDocCount, -) { +// DocExistFor checks if documents exist for the expected data stream / index. +func DocExistFor(t *testing.T, currDocCount map[string]int, names []string) { + t.Helper() + + for _, name := range names { + if _, ok := currDocCount[name]; !ok { + t.Errorf("expected %s not found", name) + continue + } + } +} + +// DocCountIncreased checks if current document counts for all data streams / indices +// increased from the previous. +func DocCountIncreased(t *testing.T, currDocCount, prevDocCount map[string]int) { t.Helper() + if currDocCount == nil { + currDocCount = esclient.DataStreamsDocCount{} + } if prevDocCount == nil { - prevDocCount = map[string]int{} + prevDocCount = esclient.DataStreamsDocCount{} } - // Check that all expected data streams appear. - for ds := range expectedDiff { - if _, ok := currDocCount[ds]; !ok { - t.Errorf("expected data stream %s not found", ds) - continue - } + // Check that document counts have increased for all data streams. + for ds, currCount := range currDocCount { + prevCount := prevDocCount[ds] + assert.Greaterf(t, currCount, prevCount, + "document count did not increase for data stream %s", ds) } +} - // Check document counts for all data streams. - for ds, v := range currDocCount { - e, ok := expectedDiff[ds] - if !ok { - t.Errorf("unexpected documents (%d) for data stream %s", v, ds) - continue - } +// DocCountStayedTheSame checks if current document counts for all data streams / indices +// stayed the same from the previous. +func DocCountStayedTheSame(t *testing.T, currDocCount, prevDocCount map[string]int) { + t.Helper() - if e < 0 { - continue - } + if currDocCount == nil { + currDocCount = esclient.DataStreamsDocCount{} + } + if prevDocCount == nil { + prevDocCount = esclient.DataStreamsDocCount{} + } - assert.Equal(t, e, v-prevDocCount[ds], - fmt.Sprintf("wrong document count difference for data stream %s", ds)) + // Check that document counts stayed the same for all data streams. + for ds, currCount := range currDocCount { + prevCount := prevDocCount[ds] + assert.Equalf(t, currCount, prevCount, + "document count changed for data stream %s", ds) } } diff --git a/functionaltests/internal/asserts/doc_count_test.go b/functionaltests/internal/asserts/doc_count_test.go index 04b4e9c8bcc..a3e360fa326 100644 --- a/functionaltests/internal/asserts/doc_count_test.go +++ b/functionaltests/internal/asserts/doc_count_test.go @@ -25,7 +25,7 @@ import ( "github.com/elastic/apm-server/functionaltests/internal/esclient" ) -func TestCheckDocCount(t *testing.T) { +func TestDocCountIncreased(t *testing.T) { type args struct { currDocCount esclient.DataStreamsDocCount prevDocCount esclient.DataStreamsDocCount @@ -48,17 +48,12 @@ func TestCheckDocCount(t *testing.T) { "logs-apm.error-default": 100, "metrics-apm.app.opbeans_python-default": 10, }, - expectedDiff: esclient.DataStreamsDocCount{ - "traces-apm-default": 50, - "logs-apm.error-default": 100, - "metrics-apm.app.opbeans_python-default": -1, - }, }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - CheckDocCount(t, tt.args.currDocCount, tt.args.prevDocCount, tt.args.expectedDiff) + DocCountIncreased(t, tt.args.currDocCount, tt.args.prevDocCount) assert.False(t, t.Failed()) }) } diff --git a/functionaltests/internal/asserts/doc_count_v7.go b/functionaltests/internal/asserts/doc_count_v7.go deleted file mode 100644 index b8206f1ae5f..00000000000 --- a/functionaltests/internal/asserts/doc_count_v7.go +++ /dev/null @@ -1,69 +0,0 @@ -// Licensed to Elasticsearch B.V. under one or more contributor -// license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright -// ownership. Elasticsearch B.V. licenses this file to you under -// the Apache License, Version 2.0 (the "License"); you may -// not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package asserts - -import ( - "fmt" - "testing" - - "github.com/stretchr/testify/assert" - - "github.com/elastic/apm-server/functionaltests/internal/esclient" -) - -// CheckDocCountV7 checks if difference in index document count between -// current and previous state is equal to the expected, barring the skipped -// indices. -// -// NOTE: Ingestion should never remove documents. If the expectedDiff is -// negative, the index will be expected to appear, but the document count -// will not be asserted. -func CheckDocCountV7( - t *testing.T, - currDocCount, prevDocCount, expectedDiff esclient.IndicesDocCount, -) { - t.Helper() - - if prevDocCount == nil { - prevDocCount = map[string]int{} - } - - // Check that all expected indices appear. - for idx := range expectedDiff { - if _, ok := currDocCount[idx]; !ok { - t.Errorf("expected index %s not found", idx) - continue - } - } - - // Check document counts for all indices. - for idx, v := range currDocCount { - e, ok := expectedDiff[idx] - if !ok { - t.Errorf("unexpected documents (%d) for index %s", v, idx) - continue - } - - if e < 0 { - continue - } - - assert.Equal(t, e, v-prevDocCount[idx], - fmt.Sprintf("wrong document count difference for index %s", idx)) - } -} diff --git a/functionaltests/internal/esclient/client.go b/functionaltests/internal/esclient/client.go index 6863258cc50..6f7fd4679a3 100644 --- a/functionaltests/internal/esclient/client.go +++ b/functionaltests/internal/esclient/client.go @@ -134,8 +134,8 @@ func (c *Client) GetDataStream(ctx context.Context, name string) ([]types.DataSt return resp.DataStreams, nil } -// DocCount is used to unmarshal response from ES|QL query. -type DocCount struct { +// docCount is used to unmarshal response from ES|QL query. +type docCount struct { DataStream string Count int } @@ -153,7 +153,7 @@ func (c *Client) APMDSDocCount(ctx context.Context) (DataStreamsDocCount, error) | SORT count DESC` qry := c.es.Esql.Query().Query(q) - resp, err := query.Helper[DocCount](ctx, qry) + resp, err := query.Helper[docCount](ctx, qry) if err != nil { var eserr *types.ElasticsearchError // suppress this error as it only indicates no data is available yet. diff --git a/functionaltests/internal/esclient/client_v7.go b/functionaltests/internal/esclient/client_v7.go index 211ff160dd1..07ddad84969 100644 --- a/functionaltests/internal/esclient/client_v7.go +++ b/functionaltests/internal/esclient/client_v7.go @@ -38,8 +38,6 @@ type IndicesDocCount map[string]int func (c *Client) APMIdxDocCountV7(ctx context.Context) (IndicesDocCount, error) { indicesToCheck := []string{ "apm-*-transaction-*", "apm-*-span-*", "apm-*-error-*", "apm-*-metric-*", - "apm-*-profile-*", - "apm-*-onboarding-*", } count := IndicesDocCount{} @@ -95,6 +93,11 @@ func (c *Client) getDocCountV7(ctx context.Context, name string) (docCountV7, er return docCountV7{}, fmt.Errorf("cannot get count for %s: %w", name, err) } + // If not found, return zero count instead of error. + if resp.StatusCode == http.StatusNotFound { + return docCountV7{Count: 0}, nil + } + if resp.StatusCode > http.StatusOK { return docCountV7{}, fmt.Errorf( "count request for %s returned unexpected status code: %d", diff --git a/functionaltests/internal/kbclient/upgrade_assist_test.go b/functionaltests/internal/kbclient/upgrade_assist_test.go index 73cf92944d7..1e2f450cc25 100644 --- a/functionaltests/internal/kbclient/upgrade_assist_test.go +++ b/functionaltests/internal/kbclient/upgrade_assist_test.go @@ -39,7 +39,6 @@ import ( // Quantity: 8, // PreferIlm: true, // DSManagedBy: managedByILM, -// IndicesPerDS: 1, // IndicesManagedBy: []string{managedByILM}, // } // @@ -51,7 +50,7 @@ import ( // }, // ingestV7Step{}, // upgradeV7Step{NewVersion: getLatestSnapshot(t, "8")}, -// ingestStep{CheckDataStream: checkILM}, +// ingestStep{CheckDataStreams: checkILM}, // }, // } // runner.Run(t) diff --git a/functionaltests/logs_filters.go b/functionaltests/logs_filters.go index 365eb826c83..29b024b20d5 100644 --- a/functionaltests/logs_filters.go +++ b/functionaltests/logs_filters.go @@ -102,6 +102,12 @@ var ( }, }) + syncSourcemapContextCanceled = apmErrorLog(types.Query{ + MatchPhrase: map[string]types.MatchPhraseQuery{ + "message": {Query: "failed to sync sourcemaps metadata: failed to run initial search query: fetcher unavailable: context canceled"}, + }, + }) + waitServerReadyCtxCanceled = apmErrorLog(types.Query{ MatchPhrase: map[string]types.MatchPhraseQuery{ "message": {Query: "error waiting for server to be ready: context canceled"}, diff --git a/functionaltests/main_test.go b/functionaltests/main_test.go index d1bd1be1ae3..1b357b63904 100644 --- a/functionaltests/main_test.go +++ b/functionaltests/main_test.go @@ -27,6 +27,31 @@ import ( "github.com/elastic/apm-server/functionaltests/internal/ecclient" ) +var ( + // cleanupOnFailure determines whether the created resources should be cleaned up on test failure. + cleanupOnFailure = flag.Bool( + "cleanup-on-failure", + true, + "Whether to run cleanup even if the test failed.", + ) + + // target is the Elastic Cloud environment to target with these test. + // We use 'pro' for production as that is the key used to retrieve EC_API_KEY from secret storage. + target = flag.String( + "target", + "pro", + "The target environment where to run tests againts. Valid values are: qa, pro.", + ) + + upgradePath = flag.String( + "upgrade-path", + "", + "Versions to be used in TestUpgrade_UpgradePath_Snapshot in upgrade_test.go, separated by commas", + ) +) + +var vsCache *versionsCache + func TestMain(m *testing.M) { flag.Parse() @@ -50,26 +75,11 @@ func TestMain(m *testing.M) { return } - candidates, err := ecc.GetCandidateVersionInfos(ctx, ecRegion) - if err != nil { - log.Fatal(err) - return - } - fetchedCandidates = candidates - - snapshots, err := ecc.GetSnapshotVersionInfos(ctx, ecRegion) - if err != nil { - log.Fatal(err) - return - } - fetchedSnapshots = snapshots - - versions, err := ecc.GetVersionInfos(ctx, ecRegion) + vsCache, err = newVersionsCache(ctx, ecc, ecRegion) if err != nil { log.Fatal(err) return } - fetchedVersions = versions code := m.Run() os.Exit(code) diff --git a/functionaltests/utils.go b/functionaltests/setup.go similarity index 52% rename from functionaltests/utils.go rename to functionaltests/setup.go index 23e4a04ed64..f77256a6a03 100644 --- a/functionaltests/utils.go +++ b/functionaltests/setup.go @@ -19,11 +19,8 @@ package functionaltests import ( "context" - "flag" "fmt" - "maps" "os" - "slices" "strconv" "strings" "testing" @@ -41,129 +38,18 @@ import ( "github.com/elastic/apm-server/functionaltests/internal/terraform" ) -var ( - // cleanupOnFailure determines whether the created resources should be cleaned up on test failure. - cleanupOnFailure = flag.Bool( - "cleanup-on-failure", - true, - "Whether to run cleanup even if the test failed.", - ) - - // target is the Elastic Cloud environment to target with these test. - // We use 'pro' for production as that is the key used to retrieve EC_API_KEY from secret storage. - target = flag.String( - "target", - "pro", - "The target environment where to run tests againts. Valid values are: qa, pro.", - ) -) - const ( - // managedByDSL is the constant string used by Elasticsearch to specify that an Index is managed by Data Stream Lifecycle management. + // managedByDSL is the constant string used by Elasticsearch to specify that + // an index is managed by Data Stream Lifecycle management. managedByDSL = "Data stream lifecycle" - // managedByILM is the constant string used by Elasticsearch to specify that an Index is managed by Index Lifecycle Management. + // managedByILM is the constant string used by Elasticsearch to specify that + // an index is managed by Index Lifecycle Management. managedByILM = "Index Lifecycle Management" ) -var ( - // fetchedCandidates are the build-candidate stack versions prefetched from Elastic Cloud API. - fetchedCandidates ecclient.StackVersionInfos - // fetchedSnapshots are the snapshot stack versions prefetched from Elastic Cloud API. - fetchedSnapshots ecclient.StackVersionInfos - // fetchedVersions are the non-snapshot stack versions prefetched from Elastic Cloud API. - fetchedVersions ecclient.StackVersionInfos -) - -// getLatestVersionOrSkip retrieves the latest non-snapshot version for the version prefix. -// If the version is not found, the test is skipped via t.Skip. -func getLatestVersionOrSkip(t *testing.T, prefix string) ecclient.StackVersionInfo { - t.Helper() - version, ok := fetchedVersions.LatestFor(prefix) - if !ok { - t.Skipf("version for '%s' not found in EC region %s, skipping test", prefix, regionFrom(*target)) - return ecclient.StackVersionInfo{} - } - return version -} - -// getLatestBCOrSkip retrieves the latest build-candidate version for the version prefix. -// If the version is not found, the test is skipped via t.Skip. -func getLatestBCOrSkip(t *testing.T, prefix string) ecclient.StackVersionInfo { - t.Helper() - candidate, ok := fetchedCandidates.LatestFor(prefix) - if !ok { - t.Skipf("BC for '%s' not found in EC region %s, skipping test", prefix, regionFrom(*target)) - return ecclient.StackVersionInfo{} - } - - // Check that the BC version is actually latest, otherwise skip test. - versionInfo := getLatestVersionOrSkip(t, prefix) - if versionInfo.Version.Major != candidate.Version.Major { - t.Skipf("BC for '%s' is invalid in EC region %s, skipping test", prefix, regionFrom(*target)) - return ecclient.StackVersionInfo{} - } - if versionInfo.Version.Minor > candidate.Version.Minor { - t.Skipf("BC for '%s' is less than latest normal version in EC region %s, skipping test", - prefix, regionFrom(*target)) - return ecclient.StackVersionInfo{} - } - - return candidate -} - -// getLatestSnapshot retrieves the latest snapshot version for the version prefix. -func getLatestSnapshot(t *testing.T, prefix string) ecclient.StackVersionInfo { - t.Helper() - version, ok := fetchedSnapshots.LatestFor(prefix) - require.True(t, ok, "snapshot for '%s' found in EC region %s", prefix, regionFrom(*target)) - return version -} - -// expectedDataStreamsIngest represent the expected number of ingested document -// after a single run of ingest. -// -// NOTE: The aggregation data streams have negative counts, because they are -// expected to appear but the document counts should not be asserted. -func expectedDataStreamsIngest(namespace string) esclient.DataStreamsDocCount { - return map[string]int{ - fmt.Sprintf("traces-apm-%s", namespace): 15013, - fmt.Sprintf("metrics-apm.app.opbeans_python-%s", namespace): 1437, - fmt.Sprintf("metrics-apm.internal-%s", namespace): 1351, - fmt.Sprintf("logs-apm.error-%s", namespace): 364, - // Ignore aggregation data streams. - fmt.Sprintf("metrics-apm.service_destination.1m-%s", namespace): -1, - fmt.Sprintf("metrics-apm.service_transaction.1m-%s", namespace): -1, - fmt.Sprintf("metrics-apm.service_summary.1m-%s", namespace): -1, - fmt.Sprintf("metrics-apm.transaction.1m-%s", namespace): -1, - } -} - -// emptyDataStreamsIngest represent an empty ingestion. -// It is useful for asserting that the document count did not change after an operation. -// -// NOTE: The aggregation data streams have negative counts, because they -// are expected to appear but the document counts should not be asserted. -func emptyDataStreamsIngest(namespace string) esclient.DataStreamsDocCount { - return map[string]int{ - fmt.Sprintf("traces-apm-%s", namespace): 0, - fmt.Sprintf("metrics-apm.app.opbeans_python-%s", namespace): 0, - fmt.Sprintf("metrics-apm.internal-%s", namespace): 0, - fmt.Sprintf("logs-apm.error-%s", namespace): 0, - // Ignore aggregation data streams. - fmt.Sprintf("metrics-apm.service_destination.1m-%s", namespace): -1, - fmt.Sprintf("metrics-apm.service_transaction.1m-%s", namespace): -1, - fmt.Sprintf("metrics-apm.service_summary.1m-%s", namespace): -1, - fmt.Sprintf("metrics-apm.transaction.1m-%s", namespace): -1, - } -} - -func allDataStreams(namespace string) []string { - return slices.Collect(maps.Keys(expectedDataStreamsIngest(namespace))) -} - const ( targetQA = "qa" - // we use 'pro' because is the target passed by the Buildkite pipeline running + // we use 'pro' because it is the target passed by the Buildkite pipeline running // these tests. targetProd = "pro" ) @@ -262,6 +148,7 @@ func createCluster( target string, fromVersion ecclient.StackVersion, enableIntegrations bool, + cleanupOnFailure bool, ) deploymentInfo { t.Helper() @@ -277,7 +164,7 @@ func createCluster( require.NoError(t, tf.Apply(ctx, ecTarget, ecRegion, ecDeploymentTpl, version, integrations, name)) t.Cleanup(func() { - if !t.Failed() || (t.Failed() && *cleanupOnFailure) { + if !t.Failed() || cleanupOnFailure { t.Log("cleanup terraform resources") require.NoError(t, tf.Destroy(ctx, ecTarget, ecRegion, ecDeploymentTpl, name, version)) } else { @@ -353,78 +240,3 @@ func createAPMGenerator(t *testing.T, ctx context.Context, esc *esclient.Client, g := gen.New(deployInfo.APMServerURL, apiKey, kbc, logger) return g } - -func sliceToSet[T comparable](s []T) map[T]bool { - m := make(map[T]bool) - for _, ele := range s { - m[ele] = true - } - return m -} - -// getAPMDataStreams get all APM related data streams. -func getAPMDataStreams(t *testing.T, ctx context.Context, esc *esclient.Client, ignoreDS ...string) []types.DataStream { - t.Helper() - dataStreams, err := esc.GetDataStream(ctx, "*apm*") - require.NoError(t, err) - - ignore := sliceToSet(ignoreDS) - return slices.DeleteFunc(dataStreams, func(ds types.DataStream) bool { - return ignore[ds.Name] - }) -} - -// getDocCountPerDS retrieves document count per data stream for versions >= 8.0. -func getDocCountPerDS(t *testing.T, ctx context.Context, esc *esclient.Client, ignoreDS ...string) esclient.DataStreamsDocCount { - t.Helper() - count, err := esc.APMDSDocCount(ctx) - require.NoError(t, err) - - ignore := sliceToSet(ignoreDS) - maps.DeleteFunc(count, func(ds string, _ int) bool { - return ignore[ds] - }) - return count -} - -// getDocCountPerDS retrieves document count per data stream for versions < 8.0. -func getDocCountPerDSV7(t *testing.T, ctx context.Context, esc *esclient.Client, namespace string) esclient.DataStreamsDocCount { - t.Helper() - count, err := esc.APMDSDocCountV7(ctx, namespace) - require.NoError(t, err) - return count -} - -// getDocCountPerIndexV7 retrieves document count per index for versions < 8.0. -func getDocCountPerIndexV7(t *testing.T, ctx context.Context, esc *esclient.Client) esclient.IndicesDocCount { - t.Helper() - count, err := esc.APMIdxDocCountV7(ctx) - require.NoError(t, err) - return count -} - -// createRerouteIngestPipeline creates custom pipelines to reroute logs, metrics and traces to different -// data streams specified by namespace. -func createRerouteIngestPipeline(t *testing.T, ctx context.Context, esc *esclient.Client, namespace string) { - t.Helper() - for _, pipeline := range []string{"logs@custom", "metrics@custom", "traces@custom"} { - err := esc.CreateIngestPipeline(ctx, pipeline, []types.ProcessorContainer{ - { - Reroute: &types.RerouteProcessor{ - Namespace: []string{namespace}, - }, - }, - }) - require.NoError(t, err) - } -} - -// performManualRollovers rollover all logs, metrics and traces data streams to new indices. -func performManualRollovers(t *testing.T, ctx context.Context, esc *esclient.Client, namespace string) { - t.Helper() - - for _, ds := range allDataStreams(namespace) { - err := esc.PerformManualRollover(ctx, ds) - require.NoError(t, err) - } -} diff --git a/functionaltests/standalone_test.go b/functionaltests/standalone_test.go index 727958152d2..6e9ab11b256 100644 --- a/functionaltests/standalone_test.go +++ b/functionaltests/standalone_test.go @@ -24,12 +24,10 @@ import ( "github.com/elastic/apm-server/functionaltests/internal/ecclient" ) -func TestUpgrade_7_17_to_8_x_to_9_x_Snapshot_Standalone_to_Managed(t *testing.T) { - t.Parallel() - - from7 := getLatestSnapshot(t, "7.17") - to8 := getLatestSnapshot(t, "8") - to9 := getLatestSnapshot(t, "9") +func TestStandaloneManaged_7_17_to_8_x_to_9_x_Snapshot(t *testing.T) { + from7 := vsCache.GetLatestSnapshot(t, "7.17") + to8 := vsCache.GetLatestSnapshot(t, "8") + to9 := vsCache.GetLatestSnapshot(t, "9") if !from7.CanUpgradeTo(to8.Version) { t.Skipf("upgrade from %s to %s is not allowed", from7.Version, to8.Version) return @@ -58,53 +56,19 @@ func TestUpgrade_7_17_to_8_x_to_9_x_Snapshot_Standalone_to_Managed(t *testing.T) }) } -func TestUpgrade_7_17_to_8_x_to_9_x_BC_Standalone_to_Managed(t *testing.T) { - t.Parallel() - - from7 := getLatestVersionOrSkip(t, "7.17") - to8 := getLatestVersionOrSkip(t, "8") - to9 := getLatestBCOrSkip(t, "9") - if !from7.CanUpgradeTo(to8.Version) { - t.Skipf("upgrade from %s to %s is not allowed", from7.Version, to8.Version) - return - } - if !to8.CanUpgradeTo(to9.Version) { - t.Skipf("upgrade from %s to %s is not allowed", to8.Version, to9.Version) - return - } - - t.Run("Managed in 7", func(t *testing.T) { - t.Parallel() - runner := managed7Runner(from7.Version, to8.Version, to9.Version) - runner.Run(t) - }) - - t.Run("Managed in 8", func(t *testing.T) { - t.Parallel() - runner := managed8Runner(from7.Version, to8.Version, to9.Version) - runner.Run(t) - }) - - t.Run("Managed in 9", func(t *testing.T) { - t.Parallel() - runner := managed9Runner(from7.Version, to8.Version, to9.Version) - runner.Run(t) - }) -} - func managed7Runner(fromVersion7, toVersion8, toVersion9 ecclient.StackVersion) testStepsRunner { - checkILM := asserts.CheckDataStreamIndividualWant{ + checkILM := asserts.DataStreamExpectation{ PreferIlm: true, DSManagedBy: managedByILM, IndicesManagedBy: []string{managedByILM}, } - checkILMRollover := asserts.CheckDataStreamIndividualWant{ + checkILMRollover := asserts.DataStreamExpectation{ PreferIlm: true, DSManagedBy: managedByILM, IndicesManagedBy: []string{managedByILM, managedByILM}, } - check := map[string]asserts.CheckDataStreamIndividualWant{ + check := map[string]asserts.DataStreamExpectation{ // These data streams are created in 7.x as well, so when we ingest // again in 8.x, they will be rolled-over. "traces-apm-%s": checkILMRollover, @@ -112,7 +76,7 @@ func managed7Runner(fromVersion7, toVersion8, toVersion9 ecclient.StackVersion) "metrics-apm.internal-%s": checkILMRollover, "logs-apm.error-%s": checkILMRollover, // These data streams are only created in 8.x, so they will only have - // 1 index. + // 1 index per. "metrics-apm.service_destination.1m-%s": checkILM, "metrics-apm.service_transaction.1m-%s": checkILM, "metrics-apm.service_summary.1m-%s": checkILM, @@ -128,6 +92,7 @@ func managed7Runner(fromVersion7, toVersion8, toVersion9 ecclient.StackVersion) } return testStepsRunner{ + Target: *target, Steps: []testStep{ // Start from 7.x. createStep{ @@ -141,19 +106,19 @@ func managed7Runner(fromVersion7, toVersion8, toVersion9 ecclient.StackVersion) // Upgrade to 8.x. upgradeV7Step{NewVersion: toVersion8}, ingestStep{ - IgnoreDataStreams: ignoredDataStreams, - CheckIndividualDataStream: check, + IgnoreDataStreams: ignoredDataStreams, + CheckDataStreams: check, }, // Resolve deprecations and upgrade to 9.x. resolveDeprecationsStep{}, upgradeStep{ - NewVersion: toVersion9, - IgnoreDataStreams: ignoredDataStreams, - CheckIndividualDataStream: check, + NewVersion: toVersion9, + IgnoreDataStreams: ignoredDataStreams, + CheckDataStreams: check, }, ingestStep{ - IgnoreDataStreams: ignoredDataStreams, - CheckIndividualDataStream: check, + IgnoreDataStreams: ignoredDataStreams, + CheckDataStreams: check, }, checkErrorLogsStep{ ESErrorLogsIgnored: esErrorLogs{ @@ -163,13 +128,14 @@ func managed7Runner(fromVersion7, toVersion8, toVersion9 ecclient.StackVersion) APMErrorLogsIgnored: apmErrorLogs{ tlsHandshakeError, esReturnedUnknown503, + refreshCache403, refreshCache503, + refreshCacheCtxDeadline, + refreshCacheESConfigInvalid, preconditionClusterInfoCtxCanceled, waitServerReadyCtxCanceled, grpcServerStopped, populateSourcemapFetcher403, - refreshCache403, - refreshCacheESConfigInvalid, }, }, }, @@ -177,31 +143,11 @@ func managed7Runner(fromVersion7, toVersion8, toVersion9 ecclient.StackVersion) } func managed8Runner(fromVersion7, toVersion8, toVersion9 ecclient.StackVersion) testStepsRunner { - checkILMAll := asserts.CheckDataStreamsWant{ - Quantity: 8, + check := dataStreamsExpectations(asserts.DataStreamExpectation{ PreferIlm: true, DSManagedBy: managedByILM, - IndicesPerDS: 1, IndicesManagedBy: []string{managedByILM}, - } - checkILM := asserts.CheckDataStreamIndividualWant{ - PreferIlm: true, - DSManagedBy: managedByILM, - IndicesManagedBy: []string{managedByILM}, - } - - check := map[string]asserts.CheckDataStreamIndividualWant{ - // These data streams are created in 7.x. - "traces-apm-%s": checkILM, - "metrics-apm.app.opbeans_python-%s": checkILM, - "metrics-apm.internal-%s": checkILM, - "logs-apm.error-%s": checkILM, - // These data streams are created in 8.x. - "metrics-apm.service_destination.1m-%s": checkILM, - "metrics-apm.service_transaction.1m-%s": checkILM, - "metrics-apm.service_summary.1m-%s": checkILM, - "metrics-apm.transaction.1m-%s": checkILM, - } + }) // These data streams are created in 7.x, but not used in 8.x and 9.x, // so we ignore them to avoid wrong assertions. @@ -212,29 +158,31 @@ func managed8Runner(fromVersion7, toVersion8, toVersion9 ecclient.StackVersion) } return testStepsRunner{ + Target: *target, Steps: []testStep{ // Start from 7.x. createStep{ DeployVersion: fromVersion7, APMDeploymentMode: apmStandalone, + CleanupOnFailure: *cleanupOnFailure, }, ingestV7Step{}, // Upgrade to 8.x. upgradeV7Step{NewVersion: toVersion8}, - ingestStep{CheckDataStream: checkILMAll}, + ingestStep{CheckDataStreams: check}, // Migrate to managed migrateManagedStep{}, - ingestStep{CheckDataStream: checkILMAll}, + ingestStep{CheckDataStreams: check}, // Resolve deprecations and upgrade to 9.x. resolveDeprecationsStep{}, upgradeStep{ - NewVersion: toVersion9, - IgnoreDataStreams: ignoredDataStreams, - CheckIndividualDataStream: check, + NewVersion: toVersion9, + IgnoreDataStreams: ignoredDataStreams, + CheckDataStreams: check, }, ingestStep{ - IgnoreDataStreams: ignoredDataStreams, - CheckIndividualDataStream: check, + IgnoreDataStreams: ignoredDataStreams, + CheckDataStreams: check, }, checkErrorLogsStep{ ESErrorLogsIgnored: esErrorLogs{ @@ -243,10 +191,11 @@ func managed8Runner(fromVersion7, toVersion8, toVersion9 ecclient.StackVersion) APMErrorLogsIgnored: apmErrorLogs{ tlsHandshakeError, esReturnedUnknown503, - refreshCache503, - populateSourcemapFetcher403, refreshCache403, + refreshCache503, + refreshCacheCtxDeadline, refreshCacheESConfigInvalid, + populateSourcemapFetcher403, }, }, }, @@ -254,36 +203,36 @@ func managed8Runner(fromVersion7, toVersion8, toVersion9 ecclient.StackVersion) } func managed9Runner(fromVersion7, toVersion8, toVersion9 ecclient.StackVersion) testStepsRunner { - // Data streams in 8.x should be all ILM if upgraded to a stack < 8.15 and > 8.16. - checkILMAll := asserts.CheckDataStreamsWant{ - Quantity: 8, + // Data streams created in latest 8.x and 9.x should be all ILM. + check := dataStreamsExpectations(asserts.DataStreamExpectation{ PreferIlm: true, DSManagedBy: managedByILM, - IndicesPerDS: 1, IndicesManagedBy: []string{managedByILM}, - } + }) return testStepsRunner{ + Target: *target, Steps: []testStep{ // Start from 7.x. createStep{ DeployVersion: fromVersion7, APMDeploymentMode: apmStandalone, + CleanupOnFailure: *cleanupOnFailure, }, ingestV7Step{}, // Upgrade to 8.x. upgradeV7Step{NewVersion: toVersion8}, - ingestStep{CheckDataStream: checkILMAll}, + ingestStep{CheckDataStreams: check}, // Resolve deprecations and upgrade to 9.x. resolveDeprecationsStep{}, upgradeStep{ - NewVersion: toVersion9, - CheckDataStream: checkILMAll, + NewVersion: toVersion9, + CheckDataStreams: check, }, - ingestStep{CheckDataStream: checkILMAll}, + ingestStep{CheckDataStreams: check}, // Migrate to managed. migrateManagedStep{}, - ingestStep{CheckDataStream: checkILMAll}, + ingestStep{CheckDataStreams: check}, checkErrorLogsStep{ ESErrorLogsIgnored: esErrorLogs{ eventLoopShutdown, @@ -292,9 +241,10 @@ func managed9Runner(fromVersion7, toVersion8, toVersion9 ecclient.StackVersion) tlsHandshakeError, esReturnedUnknown503, refreshCache503, - populateSourcemapFetcher403, refreshCache403, + refreshCacheCtxDeadline, refreshCacheESConfigInvalid, + populateSourcemapFetcher403, }, }, }, diff --git a/functionaltests/steps.go b/functionaltests/steps.go index 69772aa4a5e..0d580a21381 100644 --- a/functionaltests/steps.go +++ b/functionaltests/steps.go @@ -20,11 +20,15 @@ package functionaltests import ( "context" "fmt" + "maps" + "slices" "testing" "time" "github.com/stretchr/testify/require" + "github.com/elastic/go-elasticsearch/v8/typedapi/types" + "github.com/elastic/apm-server/functionaltests/internal/asserts" "github.com/elastic/apm-server/functionaltests/internal/ecclient" "github.com/elastic/apm-server/functionaltests/internal/esclient" @@ -36,6 +40,9 @@ import ( // testStepsRunner consists of composable testStep(s) that is run // in sequence. type testStepsRunner struct { + // Target is the target environment for the Elastic Cloud deployment. + Target string + // DataStreamNamespace is the namespace for the APM data streams // that is being tested. Defaults to "default". // @@ -47,9 +54,11 @@ type testStepsRunner struct { Steps []testStep } -// Run runs the test steps in sequence, passing the result from the current step -// into the next step etc. +// Run runs the test steps in sequence. func (r testStepsRunner) Run(t *testing.T) { + if r.Target == "" { + r.Target = targetProd + } if r.DataStreamNamespace == "" { r.DataStreamNamespace = "default" } @@ -64,31 +73,16 @@ func (r testStepsRunner) Run(t *testing.T) { start := time.Now() ctx := context.Background() - env := testStepEnv{dsNamespace: r.DataStreamNamespace} - currentRes := testStepResult{} + env := testStepEnv{target: r.Target, dsNamespace: r.DataStreamNamespace} for _, step := range r.Steps { - currentRes = step.Step(t, ctx, &env, currentRes) + step.Step(t, ctx, &env) t.Logf("time elapsed: %s", time.Since(start)) } } -// testStepResult contains the results of running the step. -type testStepResult struct { - // DSDocCount is the data streams document counts that is the - // result of this step. - // - // Note: Only applicable for stack versions >= 8.0. - DSDocCount esclient.DataStreamsDocCount - - // IndicesDocCount is the indices document counts that is the - // result of this step. - // - // Note: Only applicable for stack versions < 8.0. - IndicesDocCount esclient.IndicesDocCount -} - // testStepEnv is the environment of the step that is run. type testStepEnv struct { + target string dsNamespace string versions []ecclient.StackVersion integrations bool @@ -106,7 +100,7 @@ func (env *testStepEnv) currentVersion() ecclient.StackVersion { } type testStep interface { - Step(t *testing.T, ctx context.Context, e *testStepEnv, previousRes testStepResult) testStepResult + Step(t *testing.T, ctx context.Context, e *testStepEnv) } // apmDeploymentMode is the deployment mode of APM in the cluster. @@ -128,16 +122,15 @@ func (mode apmDeploymentMode) enableIntegrations() bool { // Hosted (ECH) cluster with the provided stack version. It also creates the // necessary clients and set them into testStepEnv. // -// The output of this step is the initial document counts in ES. -// // Note: This step should always be the first step of any test runs, since it // initializes all the necessary dependencies for subsequent steps. type createStep struct { DeployVersion ecclient.StackVersion APMDeploymentMode apmDeploymentMode + CleanupOnFailure bool } -func (c createStep) Step(t *testing.T, ctx context.Context, e *testStepEnv, _ testStepResult) testStepResult { +func (c createStep) Step(t *testing.T, ctx context.Context, e *testStepEnv) { integrations := c.APMDeploymentMode.enableIntegrations() if c.DeployVersion.Major < 8 && integrations { t.Fatal("create step cannot enable integrations for versions < 8.0") @@ -145,69 +138,53 @@ func (c createStep) Step(t *testing.T, ctx context.Context, e *testStepEnv, _ te t.Logf("------ cluster setup %s ------", c.DeployVersion) e.tf = initTerraformRunner(t) - deployInfo := createCluster(t, ctx, e.tf, *target, c.DeployVersion, integrations) + deployInfo := createCluster(t, ctx, e.tf, e.target, c.DeployVersion, integrations, c.CleanupOnFailure) e.esc = createESClient(t, deployInfo) e.kbc = createKibanaClient(t, deployInfo) e.gen = createAPMGenerator(t, ctx, e.esc, e.kbc, deployInfo) // Update the latest environment version to the new one. e.versions = append(e.versions, c.DeployVersion) e.integrations = integrations - - if e.currentVersion().Major < 8 { - return testStepResult{IndicesDocCount: getDocCountPerIndexV7(t, ctx, e.esc)} - } - return testStepResult{DSDocCount: getDocCountPerDS(t, ctx, e.esc)} } -var _ testStep = ingestStep{} - // ingestStep performs ingestion to the APM Server deployed on ECH. After // ingestion, it checks if the document counts difference between current // and previous is expected, and if the data streams are in an expected // state. // -// The output of this step is the data streams document counts after ingestion. -// // NOTE: Only works for versions >= 8.0. type ingestStep struct { - CheckDataStream asserts.CheckDataStreamsWant + // CheckDataStreams is used to check the data streams individually. + // The data stream names can contain '%s' to indicate namespace. + CheckDataStreams map[string]asserts.DataStreamExpectation + // IgnoreDataStreams are the data streams to be ignored in assertions. // The data stream names can contain '%s' to indicate namespace. IgnoreDataStreams []string - // CheckIndividualDataStream is used to check the data streams individually - // instead of as a whole using CheckDataStream. - // The data stream names can contain '%s' to indicate namespace. - CheckIndividualDataStream map[string]asserts.CheckDataStreamIndividualWant } -var _ testStep = ingestStep{} +func (i ingestStep) Step(t *testing.T, ctx context.Context, e *testStepEnv) { -func (i ingestStep) Step(t *testing.T, ctx context.Context, e *testStepEnv, previousRes testStepResult) testStepResult { if e.currentVersion().Major < 8 { t.Fatal("ingest step should only be used for versions >= 8.0") } + ignoreDS := formatAll(i.IgnoreDataStreams, e.dsNamespace) + beforeIngestDSDocCount := getDocCountPerDS(t, ctx, e.esc, ignoreDS...) + t.Logf("------ ingest in %s------", e.currentVersion()) err := e.gen.RunBlockingWait(ctx, e.currentVersion(), e.integrations) require.NoError(t, err) t.Logf("------ ingest check in %s ------", e.currentVersion()) - t.Log("check number of documents after ingestion") - ignoreDS := formatAll(i.IgnoreDataStreams, e.dsNamespace) - dsDocCount := getDocCountPerDS(t, ctx, e.esc, ignoreDS...) - asserts.CheckDocCount(t, dsDocCount, previousRes.DSDocCount, - expectedDataStreamsIngest(e.dsNamespace)) - t.Log("check data streams after ingestion") dataStreams := getAPMDataStreams(t, ctx, e.esc, ignoreDS...) - if i.CheckIndividualDataStream != nil { - expected := formatAllMap(i.CheckIndividualDataStream, e.dsNamespace) - asserts.CheckDataStreamsIndividually(t, expected, dataStreams) - } else { - asserts.CheckDataStreams(t, i.CheckDataStream, dataStreams) - } + expected := formatAllMap(i.CheckDataStreams, e.dsNamespace) + asserts.DataStreamsMeetExpectation(t, expected, dataStreams) - return testStepResult{DSDocCount: dsDocCount} + t.Log("check number of documents increased after ingestion") + afterIngestDSDocCount := getDocCountPerDS(t, ctx, e.esc, ignoreDS...) + asserts.DocCountIncreased(t, afterIngestDSDocCount, beforeIngestDSDocCount) } func formatAll(formats []string, s string) []string { @@ -230,68 +207,61 @@ func formatAllMap[T any](m map[string]T, s string) map[string]T { // version. It also adds the new version into testStepEnv. After upgrade, it // checks that the document counts did not change across upgrade. // -// The output of this step is the data streams document counts after upgrade. -// // NOTE: Only works from versions >= 8.0. type upgradeStep struct { - NewVersion ecclient.StackVersion - CheckDataStream asserts.CheckDataStreamsWant + // NewVersion is the version to upgrade into. + NewVersion ecclient.StackVersion + + // CheckDataStreams is used to check the data streams individually. + // The data stream names can contain '%s' to indicate namespace. + CheckDataStreams map[string]asserts.DataStreamExpectation + // IgnoreDataStreams are the data streams to be ignored in assertions. // The data stream names can contain '%s' to indicate namespace. IgnoreDataStreams []string - // CheckIndividualDataStream is used to check the data streams individually - // instead of as a whole using CheckDataStream. - // The data stream names can contain '%s' to indicate namespace. - CheckIndividualDataStream map[string]asserts.CheckDataStreamIndividualWant } -var _ testStep = upgradeStep{} - -func (u upgradeStep) Step(t *testing.T, ctx context.Context, e *testStepEnv, previousRes testStepResult) testStepResult { +func (u upgradeStep) Step(t *testing.T, ctx context.Context, e *testStepEnv) { if e.currentVersion().Major < 8 { t.Fatal("upgrade step should only be used from versions >= 8.0") } + ignoreDS := formatAll(u.IgnoreDataStreams, e.dsNamespace) + beforeUpgradeDSDocCount := getDocCountPerDS(t, ctx, e.esc, ignoreDS...) + t.Logf("------ upgrade %s to %s ------", e.currentVersion(), u.NewVersion) - upgradeCluster(t, ctx, e.tf, *target, u.NewVersion, e.integrations) + upgradeCluster(t, ctx, e.tf, e.target, u.NewVersion, e.integrations) // Update the environment version to the new one. e.versions = append(e.versions, u.NewVersion) t.Logf("------ upgrade check in %s ------", e.currentVersion()) - t.Log("check number of documents across upgrade") - // We assert that no changes happened in the number of documents after upgrade - // to ensure the state didn't change. - // We don't expect any change here unless something broke during the upgrade. - ignoreDS := formatAll(u.IgnoreDataStreams, e.dsNamespace) - dsDocCount := getDocCountPerDS(t, ctx, e.esc, ignoreDS...) - asserts.CheckDocCount(t, dsDocCount, previousRes.DSDocCount, - emptyDataStreamsIngest(e.dsNamespace)) - t.Log("check data streams after upgrade") dataStreams := getAPMDataStreams(t, ctx, e.esc, ignoreDS...) - if u.CheckIndividualDataStream != nil { - expected := formatAllMap(u.CheckIndividualDataStream, e.dsNamespace) - asserts.CheckDataStreamsIndividually(t, expected, dataStreams) - } else { - asserts.CheckDataStreams(t, u.CheckDataStream, dataStreams) - } + expected := formatAllMap(u.CheckDataStreams, e.dsNamespace) + asserts.DataStreamsMeetExpectation(t, expected, dataStreams) - return testStepResult{DSDocCount: dsDocCount} + t.Log("check number of documents stayed the same across upgrade") + // We assert that no changes happened in the number of documents after upgrade + // to ensure the state didn't change. + // We don't expect any change here unless something broke during the upgrade. + afterUpgradeDSDocCount := getDocCountPerDS(t, ctx, e.esc, ignoreDS...) + asserts.DocCountStayedTheSame(t, afterUpgradeDSDocCount, beforeUpgradeDSDocCount) } // checkErrorLogsStep checks if there are any unexpected error logs from both // Elasticsearch and APM Server. The provided APMErrorLogsIgnored is used to // ignore some APM logs from being included in the assertion. -// -// The output of this step is the previous step's result. type checkErrorLogsStep struct { - ESErrorLogsIgnored esErrorLogs + // ESErrorLogsIgnored are the error logs query from Elasticsearch that are + // to be ignored. + ESErrorLogsIgnored esErrorLogs + + // APMErrorLogsIgnored are the error logs query from APM Server that are + // to be ignored. APMErrorLogsIgnored apmErrorLogs } -var _ testStep = checkErrorLogsStep{} - -func (c checkErrorLogsStep) Step(t *testing.T, ctx context.Context, e *testStepEnv, previousRes testStepResult) testStepResult { +func (c checkErrorLogsStep) Step(t *testing.T, ctx context.Context, e *testStepEnv) { t.Log("------ check ES and APM error logs ------") t.Log("checking ES error logs") resp, err := e.esc.GetESErrorLogs(ctx, c.ESErrorLogsIgnored.ToQueries()...) @@ -302,20 +272,252 @@ func (c checkErrorLogsStep) Step(t *testing.T, ctx context.Context, e *testStepE resp, err = e.esc.GetAPMErrorLogs(ctx, c.APMErrorLogsIgnored.ToQueries()...) require.NoError(t, err) asserts.ZeroAPMLogs(t, *resp) +} + +// createReroutePipelineStep creates custom ingest pipelines to reroute logs, +// metrics and traces to different data streams specified by namespace. +type createReroutePipelineStep struct { + DataStreamNamespace string +} - return previousRes +func (c createReroutePipelineStep) Step(t *testing.T, ctx context.Context, e *testStepEnv) { + t.Log("create reroute ingest pipelines") + for _, pipeline := range []string{"logs@custom", "metrics@custom", "traces@custom"} { + err := e.esc.CreateIngestPipeline(ctx, pipeline, []types.ProcessorContainer{ + { + Reroute: &types.RerouteProcessor{ + Namespace: []string{c.DataStreamNamespace}, + }, + }, + }) + require.NoError(t, err) + } + e.dsNamespace = c.DataStreamNamespace } -type stepFunc func(t *testing.T, ctx context.Context, e *testStepEnv, previousRes testStepResult) testStepResult +// ingestV7Step performs ingestion to the APM Server deployed on ECH. +// After ingestion, it checks if the document counts difference between +// current and previous is expected. +// +// NOTE: Only works for versions 7.x. +type ingestV7Step struct{} + +func (i ingestV7Step) Step(t *testing.T, ctx context.Context, e *testStepEnv) { + if e.currentVersion().Major >= 8 { + t.Fatal("ingest v7 step should only be used for versions < 8.0") + } + + var beforeIngestDSDocCount esclient.DataStreamsDocCount + var beforeIngestIdxDocCount esclient.IndicesDocCount + if e.integrations { + beforeIngestDSDocCount = getDocCountPerDSV7(t, ctx, e.esc, e.dsNamespace) + } else { + beforeIngestIdxDocCount = getDocCountPerIndexV7(t, ctx, e.esc) + } + + t.Logf("------ ingest in %s ------", e.currentVersion()) + err := e.gen.RunBlockingWait(ctx, e.currentVersion(), e.integrations) + require.NoError(t, err) -// customStep is a custom step to be defined by the user's. The step will run -// the provided Func. -type customStep struct { - Func stepFunc + t.Logf("------ ingest check in %s ------", e.currentVersion()) + t.Log("check number of documents increased after ingestion") + if e.integrations { + // Managed, check data streams. + afterIngestDSDocCount := getDocCountPerDSV7(t, ctx, e.esc, e.dsNamespace) + asserts.DocExistFor(t, afterIngestDSDocCount, expectedV7DataStreams(e.dsNamespace)) + asserts.DocCountIncreased(t, afterIngestDSDocCount, beforeIngestDSDocCount) + } else { + // Standalone, check indices. + afterIngestIdxDocCount := getDocCountPerIndexV7(t, ctx, e.esc) + asserts.DocExistFor(t, afterIngestIdxDocCount, expectedIndices()) + asserts.DocCountIncreased(t, afterIngestIdxDocCount, beforeIngestIdxDocCount) + } } -var _ testStep = customStep{} +// upgradeV7Step upgrades the ECH deployment from its current version to +// the new version. It also adds the new version into testStepEnv. After +// upgrade, it checks that the document counts did not change across upgrade. +// +// NOTE: Only works from versions 7.x. +type upgradeV7Step struct { + NewVersion ecclient.StackVersion +} -func (c customStep) Step(t *testing.T, ctx context.Context, e *testStepEnv, previousRes testStepResult) testStepResult { - return c.Func(t, ctx, e, previousRes) +func (u upgradeV7Step) Step(t *testing.T, ctx context.Context, e *testStepEnv) { + if e.currentVersion().Major >= 8 { + t.Fatal("upgrade v7 step should only be used from versions < 8.0") + } + + var beforeUpgradeDSDocCount esclient.DataStreamsDocCount + var beforeUpgradeIdxDocCount esclient.IndicesDocCount + if e.integrations { + beforeUpgradeDSDocCount = getDocCountPerDSV7(t, ctx, e.esc, e.dsNamespace) + } else { + beforeUpgradeIdxDocCount = getDocCountPerIndexV7(t, ctx, e.esc) + } + + t.Logf("------ upgrade %s to %s ------", e.currentVersion(), u.NewVersion) + upgradeCluster(t, ctx, e.tf, e.target, u.NewVersion, e.integrations) + // Update the environment version to the new one. + e.versions = append(e.versions, u.NewVersion) + + t.Logf("------ upgrade check in %s ------", e.currentVersion()) + t.Log("check number of documents stayed the same across upgrade") + // We assert that no changes happened in the number of documents after upgrade + // to ensure the state didn't change. + // We don't expect any change here unless something broke during the upgrade. + if e.integrations { + // Managed, check data streams. + afterUpgradeDSDocCount := getDocCountPerDSV7(t, ctx, e.esc, e.dsNamespace) + asserts.DocExistFor(t, afterUpgradeDSDocCount, expectedV7DataStreams(e.dsNamespace)) + asserts.DocCountStayedTheSame(t, afterUpgradeDSDocCount, beforeUpgradeDSDocCount) + } else { + // Standalone, check indices. + afterUpgradeIdxDocCount := getDocCountPerIndexV7(t, ctx, e.esc) + asserts.DocExistFor(t, afterUpgradeIdxDocCount, expectedIndices()) + asserts.DocCountStayedTheSame(t, afterUpgradeIdxDocCount, beforeUpgradeIdxDocCount) + } +} + +// migrateManagedStep migrates the ECH APM deployment from standalone mode to +// managed mode, which involves enabling the integrations server via Kibana. +// It also checks that the document counts did not change across the migration. +type migrateManagedStep struct { + // IgnoreDataStreams are the data streams to be ignored in assertions. + // The data stream names can contain '%s' to indicate namespace. + // + // Only applicable if used in version >= 8.0. + IgnoreDataStreams []string +} + +func (m migrateManagedStep) Step(t *testing.T, ctx context.Context, e *testStepEnv) { + if e.integrations { + t.Fatal("migrate managed step should only be used on standalone") + } + + var beforeMigrateDSDocCount esclient.DataStreamsDocCount + var beforeMigrateIdxDocCount esclient.IndicesDocCount + if e.currentVersion().Major >= 8 { + beforeMigrateDSDocCount = getDocCountPerDS(t, ctx, e.esc, e.dsNamespace) + } else { + beforeMigrateIdxDocCount = getDocCountPerIndexV7(t, ctx, e.esc) + } + + t.Logf("------ migrate to managed for %s ------", e.currentVersion()) + t.Log("enable integrations server") + err := e.kbc.EnableIntegrationsServer(ctx) + require.NoError(t, err) + e.integrations = true + + // APM Server needs some time to start serving requests again, and we don't have any + // visibility on when this completes. + // NOTE: This value comes from empirical observations. + time.Sleep(80 * time.Second) + + t.Log("check number of documents stayed the same across migration to managed") + // We assert that no changes happened in the number of documents after migration + // to ensure the state didn't change. + // We don't expect any change here unless something broke during the migration. + if e.currentVersion().Major >= 8 { + afterMigrateDSDocCount := getDocCountPerDS(t, ctx, e.esc, e.dsNamespace) + asserts.DocExistFor(t, afterMigrateDSDocCount, expectedDataStreams(e.dsNamespace)) + asserts.DocCountStayedTheSame(t, afterMigrateDSDocCount, beforeMigrateDSDocCount) + } else { + afterMigrateIdxDocCount := getDocCountPerIndexV7(t, ctx, e.esc) + asserts.DocExistFor(t, afterMigrateIdxDocCount, expectedIndices()) + asserts.DocCountStayedTheSame(t, afterMigrateIdxDocCount, beforeMigrateIdxDocCount) + } +} + +// resolveDeprecationsStep resolves critical migration deprecation warnings from Elasticsearch regarding +// indices created in 7.x not being compatible with 9.x. +type resolveDeprecationsStep struct{} + +func (r resolveDeprecationsStep) Step(t *testing.T, ctx context.Context, e *testStepEnv) { + t.Logf("------ resolve migration deprecations in %s ------", e.currentVersion()) + err := e.kbc.ResolveMigrationDeprecations(ctx) + require.NoError(t, err) +} + +func expectedDataStreams(namespace string) []string { + return []string{ + fmt.Sprintf("traces-apm-%s", namespace), + fmt.Sprintf("metrics-apm.app.opbeans_python-%s", namespace), + fmt.Sprintf("metrics-apm.internal-%s", namespace), + fmt.Sprintf("logs-apm.error-%s", namespace), + fmt.Sprintf("metrics-apm.service_destination.1m-%s", namespace), + fmt.Sprintf("metrics-apm.service_transaction.1m-%s", namespace), + fmt.Sprintf("metrics-apm.service_summary.1m-%s", namespace), + fmt.Sprintf("metrics-apm.transaction.1m-%s", namespace), + } +} + +func expectedV7DataStreams(namespace string) []string { + return []string{ + fmt.Sprintf("traces-apm-%s", namespace), + fmt.Sprintf("metrics-apm.app.opbeans_python-%s", namespace), + fmt.Sprintf("metrics-apm.app.opbeans_node-%s", namespace), + fmt.Sprintf("metrics-apm.app.opbeans_go-%s", namespace), + fmt.Sprintf("metrics-apm.app.opbeans_ruby-%s", namespace), + fmt.Sprintf("metrics-apm.internal-%s", namespace), + fmt.Sprintf("logs-apm.error-%s", namespace), + } +} + +func expectedIndices() []string { + return []string{ + "apm-*-error-*", + "apm-*-span-*", + "apm-*-transaction-*", + "apm-*-metric-*", + } +} + +func sliceToSet[T comparable](s []T) map[T]bool { + m := make(map[T]bool) + for _, ele := range s { + m[ele] = true + } + return m +} + +// getAPMDataStreams get all APM related data streams. +func getAPMDataStreams(t *testing.T, ctx context.Context, esc *esclient.Client, ignoreDS ...string) []types.DataStream { + t.Helper() + dataStreams, err := esc.GetDataStream(ctx, "*apm*") + require.NoError(t, err) + + ignore := sliceToSet(ignoreDS) + return slices.DeleteFunc(dataStreams, func(ds types.DataStream) bool { + return ignore[ds.Name] + }) +} + +// getDocCountPerDS retrieves document count per data stream for versions >= 8.0. +func getDocCountPerDS(t *testing.T, ctx context.Context, esc *esclient.Client, ignoreDS ...string) esclient.DataStreamsDocCount { + t.Helper() + count, err := esc.APMDSDocCount(ctx) + require.NoError(t, err) + + ignore := sliceToSet(ignoreDS) + maps.DeleteFunc(count, func(ds string, _ int) bool { + return ignore[ds] + }) + return count +} + +// getDocCountPerDSV7 retrieves document count per data stream for versions < 8.0. +func getDocCountPerDSV7(t *testing.T, ctx context.Context, esc *esclient.Client, namespace string) esclient.DataStreamsDocCount { + t.Helper() + count, err := esc.APMDSDocCountV7(ctx, namespace) + require.NoError(t, err) + return count +} + +// getDocCountPerIndexV7 retrieves document count per index for versions < 8.0. +func getDocCountPerIndexV7(t *testing.T, ctx context.Context, esc *esclient.Client) esclient.IndicesDocCount { + t.Helper() + count, err := esc.APMIdxDocCountV7(ctx) + require.NoError(t, err) + return count } diff --git a/functionaltests/steps_v7.go b/functionaltests/steps_v7.go deleted file mode 100644 index 953807f8889..00000000000 --- a/functionaltests/steps_v7.go +++ /dev/null @@ -1,219 +0,0 @@ -// Licensed to Elasticsearch B.V. under one or more contributor -// license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright -// ownership. Elasticsearch B.V. licenses this file to you under -// the Apache License, Version 2.0 (the "License"); you may -// not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package functionaltests - -import ( - "context" - "fmt" - "testing" - "time" - - "github.com/stretchr/testify/require" - - "github.com/elastic/apm-server/functionaltests/internal/asserts" - "github.com/elastic/apm-server/functionaltests/internal/ecclient" - "github.com/elastic/apm-server/functionaltests/internal/esclient" -) - -func expectedIndicesIngest() esclient.IndicesDocCount { - return esclient.IndicesDocCount{ - "apm-*-error-*": 364, - "apm-*-profile-*": 0, - "apm-*-span-*": 10885, - "apm-*-transaction-*": 4128, - // Ignore aggregation indices. - "apm-*-metric-*": -1, - "apm-*-onboarding-*": -1, - } -} - -func emptyIndicesIngest() esclient.IndicesDocCount { - return esclient.IndicesDocCount{ - "apm-*-error-*": 0, - "apm-*-profile-*": 0, - "apm-*-span-*": 0, - "apm-*-transaction-*": 0, - "apm-*-onboarding-*": 0, - "apm-*-metric-*": -1, - } -} - -func expectedDataStreamsIngestV7(namespace string) esclient.DataStreamsDocCount { - return map[string]int{ - fmt.Sprintf("traces-apm-%s", namespace): 15013, - fmt.Sprintf("logs-apm.error-%s", namespace): 364, - fmt.Sprintf("metrics-apm.app.opbeans_python-%s", namespace): 1492, - fmt.Sprintf("metrics-apm.app.opbeans_node-%s", namespace): 27, - fmt.Sprintf("metrics-apm.app.opbeans_go-%s", namespace): 11, - fmt.Sprintf("metrics-apm.app.opbeans_ruby-%s", namespace): 24, - // Document count fluctuates constantly. - fmt.Sprintf("metrics-apm.internal-%s", namespace): -1, - } -} - -func emptyDataStreamsIngestV7(namespace string) esclient.DataStreamsDocCount { - return map[string]int{ - fmt.Sprintf("traces-apm-%s", namespace): 0, - fmt.Sprintf("metrics-apm.app.opbeans_python-%s", namespace): 0, - fmt.Sprintf("metrics-apm.app.opbeans_node-%s", namespace): 0, - fmt.Sprintf("metrics-apm.app.opbeans_go-%s", namespace): 0, - fmt.Sprintf("metrics-apm.app.opbeans_ruby-%s", namespace): 0, - fmt.Sprintf("metrics-apm.internal-%s", namespace): 0, - fmt.Sprintf("logs-apm.error-%s", namespace): 0, - } -} - -// ingestV7Step performs ingestion to the APM Server deployed on ECH. -// After ingestion, it checks if the document counts difference between -// current and previous is expected. -// -// The output of this step is the indices document counts after ingestion. -// -// NOTE: Only works for versions 7.x. -type ingestV7Step struct{} - -var _ testStep = ingestV7Step{} - -func (i ingestV7Step) Step(t *testing.T, ctx context.Context, e *testStepEnv, previousRes testStepResult) testStepResult { - if e.currentVersion().Major >= 8 { - t.Fatal("ingest v7 step should only be used for versions < 8.0") - } - - t.Logf("------ ingest in %s ------", e.currentVersion()) - err := e.gen.RunBlockingWait(ctx, e.currentVersion(), e.integrations) - require.NoError(t, err) - - t.Logf("------ ingest check in %s ------", e.currentVersion()) - t.Log("check number of documents after ingestion") - // Standalone, check indices. - if !e.integrations { - idxDocCount := getDocCountPerIndexV7(t, ctx, e.esc) - asserts.CheckDocCountV7(t, idxDocCount, previousRes.IndicesDocCount, - expectedIndicesIngest()) - return testStepResult{IndicesDocCount: idxDocCount} - } - - // Managed, check data streams - dsDocCount := getDocCountPerDSV7(t, ctx, e.esc, e.dsNamespace) - asserts.CheckDocCount(t, dsDocCount, previousRes.DSDocCount, - expectedDataStreamsIngestV7(e.dsNamespace)) - return testStepResult{DSDocCount: dsDocCount} -} - -// upgradeV7Step upgrades the ECH deployment from its current version to -// the new version. It also adds the new version into testStepEnv. After -// upgrade, it checks that the document counts did not change across upgrade. -// -// The output of this step is the indices document counts if upgrading to 7.x, -// or data streams document counts if upgrading to >= 8.0. -// -// NOTE: Only works from versions 7.x. -type upgradeV7Step struct { - NewVersion ecclient.StackVersion -} - -var _ testStep = upgradeV7Step{} - -func (u upgradeV7Step) Step(t *testing.T, ctx context.Context, e *testStepEnv, previousRes testStepResult) testStepResult { - if e.currentVersion().Major >= 8 { - t.Fatal("upgrade v7 step should only be used from versions < 8.0") - } - - t.Logf("------ upgrade %s to %s ------", e.currentVersion(), u.NewVersion) - upgradeCluster(t, ctx, e.tf, *target, u.NewVersion, e.integrations) - // Update the environment version to the new one. - e.versions = append(e.versions, u.NewVersion) - - t.Logf("------ upgrade check in %s ------", e.currentVersion()) - t.Log("check number of documents across upgrade") - // We assert that no changes happened in the number of documents after upgrade - // to ensure the state didn't change. - // We don't expect any change here unless something broke during the upgrade. - if !e.integrations { - // Standalone, return indices even if upgraded to >= 8.0, since indices - // will simply be ignored by 8.x checks. - idxDocCount := getDocCountPerIndexV7(t, ctx, e.esc) - asserts.CheckDocCountV7(t, idxDocCount, previousRes.IndicesDocCount, - emptyIndicesIngest()) - return testStepResult{IndicesDocCount: idxDocCount} - } - - // Managed, should be data streams regardless of upgrade. - dsDocCount := getDocCountPerDSV7(t, ctx, e.esc, e.dsNamespace) - asserts.CheckDocCount(t, dsDocCount, previousRes.DSDocCount, - emptyDataStreamsIngestV7(e.dsNamespace)) - return testStepResult{DSDocCount: getDocCountPerDS(t, ctx, e.esc)} -} - -// migrateManagedStep migrates the ECH APM deployment from standalone mode to -// managed mode, which involves enabling the integrations server via Kibana. -// It also checks that the document counts did not change across the migration. -// -// The output of this step is the indices document counts if version < 8.0, -// or data streams document counts if version >= 8.0. -type migrateManagedStep struct{} - -var _ testStep = migrateManagedStep{} - -func (m migrateManagedStep) Step(t *testing.T, ctx context.Context, e *testStepEnv, previousRes testStepResult) testStepResult { - if e.integrations { - t.Fatal("migrate managed step should only be used on standalone") - } - - t.Logf("------ migrate to managed for %s ------", e.currentVersion()) - t.Log("enable integrations server") - err := e.kbc.EnableIntegrationsServer(ctx) - require.NoError(t, err) - e.integrations = true - - // APM Server needs some time to start serving requests again, and we don't have any - // visibility on when this completes. - // NOTE: This value comes from empirical observations. - time.Sleep(80 * time.Second) - - t.Log("check number of documents across migration to managed") - // We assert that no changes happened in the number of documents after migration - // to ensure the state didn't change. - // We don't expect any change here unless something broke during the migration. - if e.currentVersion().Major < 8 { - idxDocCount := getDocCountPerIndexV7(t, ctx, e.esc) - asserts.CheckDocCountV7(t, idxDocCount, previousRes.IndicesDocCount, - emptyIndicesIngest()) - return testStepResult{IndicesDocCount: idxDocCount} - } - - dsDocCount := getDocCountPerDS(t, ctx, e.esc) - asserts.CheckDocCount(t, dsDocCount, previousRes.DSDocCount, - emptyDataStreamsIngest(e.dsNamespace)) - return testStepResult{DSDocCount: dsDocCount} -} - -// resolveDeprecationsStep resolves critical migration deprecation warnings from Elasticsearch regarding -// indices created in 7.x not being compatible with 9.x. -// -// The output of this step is the previous test step result. -type resolveDeprecationsStep struct{} - -var _ testStep = resolveDeprecationsStep{} - -func (r resolveDeprecationsStep) Step(t *testing.T, ctx context.Context, e *testStepEnv, previousRes testStepResult) testStepResult { - t.Logf("------ resolve migration deprecations in %s ------", e.currentVersion()) - err := e.kbc.ResolveMigrationDeprecations(ctx) - require.NoError(t, err) - return previousRes -} diff --git a/functionaltests/upgrade-config.yaml b/functionaltests/upgrade-config.yaml new file mode 100644 index 00000000000..1704103e774 --- /dev/null +++ b/functionaltests/upgrade-config.yaml @@ -0,0 +1,18 @@ +# Define the expected data stream lifecycle of the minor version. +# If not defined, defaults to ILM. +data-stream-lifecycle: + 8.15: DSL + 8.16: DSL + +# Define versions that have lazy rollover, i.e. when upgrading from some older +# version to the specified minor version, there will be lazy rollover. +# Exceptions are specified as a list, i.e. no lazy rollover if upgrade is from +# that minor version. +lazy-rollover-with-exceptions: + 8.16: + 8.17: + # 8.19 and 9.1 have the same template due to + # https://github.com/elastic/elasticsearch/pull/123166. + 8.19: + 9.1: + - "8.19" \ No newline at end of file diff --git a/functionaltests/upgrade_test.go b/functionaltests/upgrade_test.go new file mode 100644 index 00000000000..282813b1a93 --- /dev/null +++ b/functionaltests/upgrade_test.go @@ -0,0 +1,229 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package functionaltests + +import ( + "fmt" + "os" + "slices" + "strings" + "testing" + + "gopkg.in/yaml.v2" + + "github.com/elastic/apm-server/functionaltests/internal/asserts" + "github.com/elastic/apm-server/functionaltests/internal/ecclient" +) + +func formatUpgradePath(p string) string { + splits := strings.Split(p, ",") + for i := range splits { + splits[i] = strings.TrimSpace(splits[i]) + } + return strings.ReplaceAll(strings.Join(splits, "_to_"), ".", "_") +} + +func TestUpgrade_UpgradePath_Snapshot(t *testing.T) { + // The versions are separated by commas. + if strings.TrimSpace(*upgradePath) == "" { + t.Fatal("no upgrade versions specified") + } + splits := strings.Split(*upgradePath, ",") + if len(splits) < 2 { + t.Fatal("need to specify at least 2 upgrade versions") + } + + // Get all snapshot versions based on input. + var versionInfos []ecclient.StackVersionInfo + for i, s := range splits { + versionInfo := vsCache.GetLatestSnapshot(t, strings.TrimSpace(s)) + if i != 0 { + prevVersionInfo := versionInfos[len(versionInfos)-1] + if !prevVersionInfo.CanUpgradeTo(versionInfo.Version) { + t.Fatalf("%s is not upgradable to %s", prevVersionInfo.Version, versionInfo.Version) + } + } + versionInfos = append(versionInfos, versionInfo) + } + + config, err := parseConfig("upgrade-config.yaml") + if err != nil { + t.Fatal(err) + } + + t.Run(formatUpgradePath(*upgradePath), func(t *testing.T) { + t.Run("Default", func(t *testing.T) { + t.Parallel() + steps := buildTestSteps(t, versionInfos, config, false) + runner := testStepsRunner{ + Target: *target, + Steps: steps, + } + runner.Run(t) + }) + + t.Run("Reroute", func(t *testing.T) { + t.Parallel() + steps := buildTestSteps(t, versionInfos, config, true) + runner := testStepsRunner{ + Target: *target, + Steps: steps, + } + runner.Run(t) + }) + }) +} + +func buildTestSteps(t *testing.T, versionInfos ecclient.StackVersionInfos, config upgradeTestConfig, reroute bool) []testStep { + t.Helper() + + var steps []testStep + var indicesManagedBy []string + + for i, info := range versionInfos { + lifecycle := config.ExpectedLifecycle(info.Version) + // Create deployment using first version, create reroute (if enabled) and ingest. + if i == 0 { + indicesManagedBy = append(indicesManagedBy, lifecycle) + steps = append(steps, createStep{ + DeployVersion: info.Version, + CleanupOnFailure: *cleanupOnFailure, + }) + if reroute { + steps = append(steps, createReroutePipelineStep{DataStreamNamespace: "reroute"}) + } + steps = append(steps, ingestStep{ + CheckDataStreams: dataStreamsExpectations(asserts.DataStreamExpectation{ + PreferIlm: lifecycle == managedByILM, + DSManagedBy: lifecycle, + IndicesManagedBy: indicesManagedBy, + }), + }) + continue + } + + // Upgrade deployment to new version and ingest. + prev := versionInfos[i-1].Version + oldIndicesManagedBy := slices.Clone(indicesManagedBy) + if config.HasLazyRollover(prev, info.Version) { + indicesManagedBy = append(indicesManagedBy, lifecycle) + } + steps = append(steps, + upgradeStep{ + NewVersion: info.Version, + CheckDataStreams: dataStreamsExpectations(asserts.DataStreamExpectation{ + PreferIlm: lifecycle == managedByILM, + DSManagedBy: lifecycle, + // After upgrade, the indices should still be managed by + // the same lifecycle management. + IndicesManagedBy: oldIndicesManagedBy, + }), + }, + ingestStep{ + CheckDataStreams: dataStreamsExpectations(asserts.DataStreamExpectation{ + PreferIlm: lifecycle == managedByILM, + DSManagedBy: lifecycle, + // After ingestion, lazy rollover should kick in if applicable. + IndicesManagedBy: indicesManagedBy, + }), + }, + ) + } + + // Check error logs, ignoring some that are due to intermittent issues + // unrelated to our test. + steps = append(steps, checkErrorLogsStep{ + APMErrorLogsIgnored: apmErrorLogs{ + tlsHandshakeError, + esReturnedUnknown503, + refreshCache503, + refreshCacheCtxCanceled, + refreshCacheCtxDeadline, + refreshCacheESConfigInvalid, + preconditionFailed, + populateSourcemapFetcher403, + populateSourcemapServerShuttingDown, + syncSourcemapContextCanceled, + }, + }) + + return steps +} + +func dataStreamsExpectations(expect asserts.DataStreamExpectation) map[string]asserts.DataStreamExpectation { + return map[string]asserts.DataStreamExpectation{ + "traces-apm-%s": expect, + "metrics-apm.app.opbeans_python-%s": expect, + "metrics-apm.internal-%s": expect, + "logs-apm.error-%s": expect, + "metrics-apm.service_destination.1m-%s": expect, + "metrics-apm.service_transaction.1m-%s": expect, + "metrics-apm.service_summary.1m-%s": expect, + "metrics-apm.transaction.1m-%s": expect, + } +} + +type upgradeTest struct { + Versions []string `yaml:"versions"` +} + +type upgradeTestConfig struct { + UpgradeTests map[string]upgradeTest `yaml:"upgrade-tests"` + DataStreamLifecycle map[string]string `yaml:"data-stream-lifecycle"` + LazyRolloverWithExceptions map[string][]string `yaml:"lazy-rollover-with-exceptions"` +} + +// ExpectedLifecycle returns the lifecycle management that is expected of the provided version. +func (cfg upgradeTestConfig) ExpectedLifecycle(version ecclient.StackVersion) string { + lifecycle, ok := cfg.DataStreamLifecycle[version.MajorMinor()] + if !ok { + return managedByILM + } + if strings.EqualFold(lifecycle, "DSL") { + return managedByDSL + } + return managedByILM +} + +// HasLazyRollover checks if the upgrade path is expected to have lazy rollover. +func (cfg upgradeTestConfig) HasLazyRollover(from, to ecclient.StackVersion) bool { + exceptions, ok := cfg.LazyRolloverWithExceptions[to.MajorMinor()] + if !ok { + return false + } + for _, exception := range exceptions { + if strings.EqualFold(from.MajorMinor(), exception) { + return false + } + } + return true +} + +func parseConfig(filename string) (upgradeTestConfig, error) { + b, err := os.ReadFile(filename) + if err != nil { + return upgradeTestConfig{}, fmt.Errorf("failed to read %s: %w", filename, err) + } + + config := upgradeTestConfig{} + if err = yaml.Unmarshal(b, &config); err != nil { + return upgradeTestConfig{}, fmt.Errorf("failed to unmarshal upgrade test config: %w", err) + } + + return config, nil +} diff --git a/functionaltests/versions.go b/functionaltests/versions.go new file mode 100644 index 00000000000..ba539fa56cd --- /dev/null +++ b/functionaltests/versions.go @@ -0,0 +1,107 @@ +// Licensed to Elasticsearch B.V. under one or more contributor +// license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright +// ownership. Elasticsearch B.V. licenses this file to you under +// the Apache License, Version 2.0 (the "License"); you may +// not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package functionaltests + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/elastic/apm-server/functionaltests/internal/ecclient" +) + +func newVersionsCache(ctx context.Context, ecc *ecclient.Client, ecRegion string) (*versionsCache, error) { + candidates, err := ecc.GetCandidateVersionInfos(ctx, ecRegion) + if err != nil { + return nil, err + } + + snapshots, err := ecc.GetSnapshotVersionInfos(ctx, ecRegion) + if err != nil { + return nil, err + } + + versions, err := ecc.GetVersionInfos(ctx, ecRegion) + if err != nil { + return nil, err + } + + return &versionsCache{ + fetchedCandidates: candidates, + fetchedSnapshots: snapshots, + fetchedVersions: versions, + region: ecRegion, + }, nil +} + +type versionsCache struct { + // fetchedCandidates are the build-candidate stack versions prefetched from Elastic Cloud API. + fetchedCandidates ecclient.StackVersionInfos + // fetchedSnapshots are the snapshot stack versions prefetched from Elastic Cloud API. + fetchedSnapshots ecclient.StackVersionInfos + // fetchedVersions are the non-snapshot stack versions prefetched from Elastic Cloud API. + fetchedVersions ecclient.StackVersionInfos + + region string +} + +// GetLatestSnapshot retrieves the latest snapshot version for the version prefix. +func (c *versionsCache) GetLatestSnapshot(t *testing.T, prefix string) ecclient.StackVersionInfo { + t.Helper() + version, ok := c.fetchedSnapshots.LatestFor(prefix) + require.True(t, ok, "snapshot for '%s' found in EC region %s", prefix, c.region) + return version +} + +// GetLatestVersionOrSkip retrieves the latest non-snapshot version for the version prefix. +// If the version is not found, the test is skipped. +func (c *versionsCache) GetLatestVersionOrSkip(t *testing.T, prefix string) ecclient.StackVersionInfo { + t.Helper() + version, ok := c.fetchedVersions.LatestFor(prefix) + if !ok { + t.Skipf("version for '%s' not found in EC region %s, skipping test", prefix, c.region) + return ecclient.StackVersionInfo{} + } + return version +} + +// GetLatestBCOrSkip retrieves the latest build-candidate version for the version prefix. +// If the version is not found, the test is skipped. +func (c *versionsCache) GetLatestBCOrSkip(t *testing.T, prefix string) ecclient.StackVersionInfo { + t.Helper() + candidate, ok := c.fetchedCandidates.LatestFor(prefix) + if !ok { + t.Skipf("BC for '%s' not found in EC region %s, skipping test", prefix, c.region) + return ecclient.StackVersionInfo{} + } + + // Check that the BC version is actually latest, otherwise skip the test. + versionInfo := c.GetLatestVersionOrSkip(t, prefix) + if versionInfo.Version.Major != candidate.Version.Major { + t.Skipf("BC for '%s' is invalid in EC region %s, skipping test", prefix, c.region) + return ecclient.StackVersionInfo{} + } + if versionInfo.Version.Minor > candidate.Version.Minor { + t.Skipf("BC for '%s' is less than latest normal version in EC region %s, skipping test", + prefix, c.region) + return ecclient.StackVersionInfo{} + } + + return candidate +}