From d9edb07481494073a824341a3a61077dff3f9456 Mon Sep 17 00:00:00 2001 From: Huabing Zhao Date: Wed, 15 Jan 2025 03:56:40 +0000 Subject: [PATCH] fix panic when updating the envoy-gateway-config configMap Signed-off-by: Huabing Zhao --- internal/cmd/server.go | 4 ++++ internal/envoygateway/config/config.go | 17 ++++++----------- internal/infrastructure/runner/runner.go | 8 ++++++-- internal/provider/kubernetes/controller.go | 12 +++++++----- internal/provider/kubernetes/kubernetes.go | 6 +++--- internal/provider/kubernetes/kubernetes_test.go | 6 +++--- internal/provider/runner/runner.go | 6 +++--- release-notes/current.yaml | 2 +- test/e2e/e2e_test.go | 2 +- tools/make/kube.mk | 11 ++--------- 10 files changed, 36 insertions(+), 38 deletions(-) diff --git a/internal/cmd/server.go b/internal/cmd/server.go index a4c9d3e9713..bf42115ce47 100644 --- a/internal/cmd/server.go +++ b/internal/cmd/server.go @@ -128,6 +128,10 @@ func getConfigByPath(cfgPath string) (*config.Server, error) { // setupRunners starts all the runners required for the Envoy Gateway to // fulfill its tasks. func setupRunners(ctx context.Context, cfg *config.Server) (err error) { + // The Elected channel is used to block the tasks that are waiting for the leader to be elected. + // It will be closed once the leader is elected in the controller manager. + cfg.Elected = make(chan struct{}) + // Setup the Extension Manager var extMgr types.Manager if cfg.EnvoyGateway.Provider.Type == egv1a1.ProviderTypeKubernetes { diff --git a/internal/envoygateway/config/config.go b/internal/envoygateway/config/config.go index af05dac0753..e8d1fece35c 100644 --- a/internal/envoygateway/config/config.go +++ b/internal/envoygateway/config/config.go @@ -7,7 +7,6 @@ package config import ( "errors" - "sync" egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1" "github.com/envoyproxy/gateway/api/v1alpha1/validation" @@ -37,23 +36,19 @@ type Server struct { DNSDomain string // Logger is the logr implementation used by Envoy Gateway. Logger logging.Logger - // Elected chan is used to signal what a leader is elected - Elected *sync.WaitGroup + // Elected chan is used to signal when an EG instance is elected as leader. + Elected chan struct{} } // New returns a Server with default parameters. func New() (*Server, error) { - server := &Server{ + return &Server{ EnvoyGateway: egv1a1.DefaultEnvoyGateway(), Namespace: env.Lookup("ENVOY_GATEWAY_NAMESPACE", DefaultNamespace), DNSDomain: env.Lookup("KUBERNETES_CLUSTER_DOMAIN", DefaultDNSDomain), - // the default logger - Logger: logging.DefaultLogger(egv1a1.LogLevelInfo), - Elected: &sync.WaitGroup{}, - } - // Block the tasks that are waiting for the leader to be elected - server.Elected.Add(1) - return server, nil + Logger: logging.DefaultLogger(egv1a1.LogLevelInfo), + Elected: make(chan struct{}), + }, nil } // Validate validates a Server config. diff --git a/internal/infrastructure/runner/runner.go b/internal/infrastructure/runner/runner.go index 3344ca0d349..6896a6e5a16 100644 --- a/internal/infrastructure/runner/runner.go +++ b/internal/infrastructure/runner/runner.go @@ -72,8 +72,12 @@ func (r *Runner) Start(ctx context.Context) (err error) { if r.EnvoyGateway.Provider.Type == egv1a1.ProviderTypeKubernetes && !ptr.Deref(r.EnvoyGateway.Provider.Kubernetes.LeaderElection.Disable, false) { go func() { - r.Elected.Wait() - initInfra() + select { + case <-ctx.Done(): + return + case <-r.Elected: + initInfra() + } }() return } diff --git a/internal/provider/kubernetes/controller.go b/internal/provider/kubernetes/controller.go index d25b2735d7e..2afc3d334bb 100644 --- a/internal/provider/kubernetes/controller.go +++ b/internal/provider/kubernetes/controller.go @@ -83,11 +83,9 @@ type gatewayAPIReconciler struct { } // newGatewayAPIController -func newGatewayAPIController(mgr manager.Manager, cfg *config.Server, su Updater, +func newGatewayAPIController(ctx context.Context, mgr manager.Manager, cfg *config.Server, su Updater, resources *message.ProviderResources, ) error { - ctx := context.Background() - // Gather additional resources to watch from registered extensions var extServerPoliciesGVKs []schema.GroupVersionKind var extGVKs []schema.GroupVersionKind @@ -138,8 +136,12 @@ func newGatewayAPIController(mgr manager.Manager, cfg *config.Server, su Updater if cfg.EnvoyGateway.Provider.Type == egv1a1.ProviderTypeKubernetes && !ptr.Deref(cfg.EnvoyGateway.Provider.Kubernetes.LeaderElection.Disable, false) { go func() { - cfg.Elected.Wait() - r.subscribeAndUpdateStatus(ctx, cfg.EnvoyGateway.EnvoyGatewaySpec.ExtensionManager != nil) + select { + case <-ctx.Done(): + return + case <-cfg.Elected: + r.subscribeAndUpdateStatus(ctx, cfg.EnvoyGateway.EnvoyGatewaySpec.ExtensionManager != nil) + } }() } else { r.subscribeAndUpdateStatus(ctx, cfg.EnvoyGateway.EnvoyGatewaySpec.ExtensionManager != nil) diff --git a/internal/provider/kubernetes/kubernetes.go b/internal/provider/kubernetes/kubernetes.go index 56f96e70a18..effc13db089 100644 --- a/internal/provider/kubernetes/kubernetes.go +++ b/internal/provider/kubernetes/kubernetes.go @@ -36,7 +36,7 @@ type Provider struct { } // New creates a new Provider from the provided EnvoyGateway. -func New(restCfg *rest.Config, svrCfg *ec.Server, resources *message.ProviderResources) (*Provider, error) { +func New(ctx context.Context, restCfg *rest.Config, svrCfg *ec.Server, resources *message.ProviderResources) (*Provider, error) { // TODO: Decide which mgr opts should be exposed through envoygateway.provider.kubernetes API. mgrOpts := manager.Options{ @@ -95,7 +95,7 @@ func New(restCfg *rest.Config, svrCfg *ec.Server, resources *message.ProviderRes } // Create and register the controllers with the manager. - if err := newGatewayAPIController(mgr, svrCfg, updateHandler.Writer(), resources); err != nil { + if err := newGatewayAPIController(ctx, mgr, svrCfg, updateHandler.Writer(), resources); err != nil { return nil, fmt.Errorf("failted to create gatewayapi controller: %w", err) } @@ -112,7 +112,7 @@ func New(restCfg *rest.Config, svrCfg *ec.Server, resources *message.ProviderRes // Emit elected & continue with the tasks that require leadership. go func() { <-mgr.Elected() - svrCfg.Elected.Done() + close(svrCfg.Elected) }() return &Provider{ diff --git a/internal/provider/kubernetes/kubernetes_test.go b/internal/provider/kubernetes/kubernetes_test.go index 135de799948..541237c2f6a 100644 --- a/internal/provider/kubernetes/kubernetes_test.go +++ b/internal/provider/kubernetes/kubernetes_test.go @@ -63,7 +63,7 @@ func TestProvider(t *testing.T) { svr, err := config.New() require.NoError(t, err) resources := new(message.ProviderResources) - provider, err := New(cliCfg, svr, resources) + provider, err := New(context.Background(), cliCfg, svr, resources) require.NoError(t, err) ctx, cancel := context.WithCancel(ctrl.SetupSignalHandler()) go func() { @@ -1274,7 +1274,7 @@ func TestNamespacedProvider(t *testing.T) { LeaderElection: egv1a1.DefaultLeaderElection(), } resources := new(message.ProviderResources) - provider, err := New(cliCfg, svr, resources) + provider, err := New(context.Background(), cliCfg, svr, resources) require.NoError(t, err) ctx, cancel := context.WithCancel(context.Background()) go func() { @@ -1334,7 +1334,7 @@ func TestNamespaceSelectorProvider(t *testing.T) { LeaderElection: egv1a1.DefaultLeaderElection(), } resources := new(message.ProviderResources) - provider, err := New(cliCfg, svr, resources) + provider, err := New(context.Background(), cliCfg, svr, resources) require.NoError(t, err) ctx, cancel := context.WithCancel(context.Background()) go func() { diff --git a/internal/provider/runner/runner.go b/internal/provider/runner/runner.go index 94488489376..d4be9020c68 100644 --- a/internal/provider/runner/runner.go +++ b/internal/provider/runner/runner.go @@ -43,7 +43,7 @@ func (r *Runner) Start(ctx context.Context) (err error) { var p provider.Provider switch r.EnvoyGateway.Provider.Type { case egv1a1.ProviderTypeKubernetes: - p, err = r.createKubernetesProvider() + p, err = r.createKubernetesProvider(ctx) if err != nil { return fmt.Errorf("failed to create kubernetes provider: %w", err) } @@ -69,13 +69,13 @@ func (r *Runner) Start(ctx context.Context) (err error) { return nil } -func (r *Runner) createKubernetesProvider() (*kubernetes.Provider, error) { +func (r *Runner) createKubernetesProvider(ctx context.Context) (*kubernetes.Provider, error) { cfg, err := ctrl.GetConfig() if err != nil { return nil, fmt.Errorf("failed to get kubeconfig: %w", err) } - p, err := kubernetes.New(cfg, &r.Config.Server, r.ProviderResources) + p, err := kubernetes.New(ctx, cfg, &r.Config.Server, r.ProviderResources) if err != nil { return nil, fmt.Errorf("failed to create provider %s: %w", egv1a1.ProviderTypeKubernetes, err) } diff --git a/release-notes/current.yaml b/release-notes/current.yaml index 6748ae3d333..2662d604276 100644 --- a/release-notes/current.yaml +++ b/release-notes/current.yaml @@ -29,7 +29,7 @@ new features: | Added support for GEP-1731 (HTTPRoute Retries) bug fixes: | - + Fixed a panic that occurred following update to the envoy-gateway-config ConfigMap # Enhancements that improve performance. performance improvements: | diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 8e980152e3d..bb1e46b1250 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -58,7 +58,7 @@ func TestE2E(t *testing.T) { Debug: *flags.ShowDebug, CleanupBaseResources: *flags.CleanupBaseResources, ManifestFS: []fs.FS{Manifests}, - RunTest: *flags.RunTest, + RunTest: tests.RateLimitBasedJwtClaimsTest.ShortName, // SupportedFeatures cannot be empty, so we set it to SupportGateway // All e2e tests should leave Features empty. SupportedFeatures: sets.New[features.FeatureName](features.SupportGateway), diff --git a/tools/make/kube.mk b/tools/make/kube.mk index 7b400b651b0..94c64b3010b 100644 --- a/tools/make/kube.mk +++ b/tools/make/kube.mk @@ -170,16 +170,9 @@ e2e-prepare: prepare-ip-family ## Prepare the environment for running e2e tests .PHONY: run-e2e run-e2e: e2e-prepare ## Run e2e tests - @$(LOG_TARGET) -ifeq ($(E2E_RUN_TEST),) + go test $(E2E_TEST_ARGS) ./test/e2e --gateway-class=envoy-gateway --debug=true --cleanup-base-resources=false - go test $(E2E_TEST_ARGS) ./test/e2e/merge_gateways --gateway-class=merge-gateways --debug=true --cleanup-base-resources=false - go test $(E2E_TEST_ARGS) ./test/e2e/multiple_gc --debug=true --cleanup-base-resources=true - LAST_VERSION_TAG=$(shell cat VERSION) go test $(E2E_TEST_ARGS) ./test/e2e/upgrade --gateway-class=upgrade --debug=true --cleanup-base-resources=$(E2E_CLEANUP) -else - go test $(E2E_TEST_ARGS) ./test/e2e --gateway-class=envoy-gateway --debug=true --cleanup-base-resources=$(E2E_CLEANUP) \ - --run-test $(E2E_RUN_TEST) -endif + .PHONY: run-resilience run-resilience: ## Run resilience tests