Skip to content

Commit

Permalink
KRT syncer follow-up (#10228)
Browse files Browse the repository at this point in the history
fixup AuthConfig metadata
  • Loading branch information
lgadban authored Oct 29, 2024
1 parent adc0e0e commit 227f197
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 19 deletions.
6 changes: 6 additions & 0 deletions changelog/v1.18.0-beta30/fix-krt-syncer-1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
changelog:
- type: NON_USER_FACING
issueLink: https://github.com/solo-io/solo-projects/issues/7036
resolvesIssue: false
description: >-
Fix krt syncer not correctly setting AuthConfig metadata
20 changes: 14 additions & 6 deletions projects/gateway2/proxy_syncer/kube_gw_translator_syncer.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ import (
"context"

"github.com/solo-io/go-utils/contextutils"
"github.com/solo-io/go-utils/hashutils"
"github.com/solo-io/solo-kit/pkg/api/v1/resources/core"
"github.com/solo-io/solo-kit/pkg/api/v2/reporter"
"istio.io/istio/pkg/kube/krt"

"github.com/solo-io/gloo/pkg/utils/settingsutil"
"github.com/solo-io/gloo/pkg/utils/statsutils"
"github.com/solo-io/gloo/pkg/utils/syncutil"
"github.com/solo-io/gloo/projects/gloo/pkg/api/grpc/validation"
v1 "github.com/solo-io/gloo/projects/gloo/pkg/api/v1"
Expand All @@ -32,14 +32,17 @@ func (s *ProxyTranslator) buildXdsSnapshot(
snap *v1snap.ApiSnapshot,
) (cache.Snapshot, reporter.ResourceReports, *validation.ProxyReport) {
metaKey := xds.SnapshotCacheKey(proxy)

ctx = contextutils.WithLogger(ctx, "kube-gateway-xds-snapshot")
logger := contextutils.LoggerFrom(ctx).With("proxy", metaKey)

logger.Infof("build xds snapshot for proxy %v (%d upstreams, %d endpoints, %d secrets, %d artifacts, %d auth configs, %d rate limit configs)",
metaKey, len(snap.Upstreams), len(snap.Endpoints), len(snap.Secrets), len(snap.Artifacts), len(snap.AuthConfigs), len(snap.Ratelimitconfigs))
snapHash := hashutils.MustHash(snap)
defer logger.Infof("end sync %v", snapHash)

stopwatch := statsutils.NewTranslatorStopWatch("translate-proxy-to-xds")
stopwatch.Start()
defer func() {
duration := stopwatch.Stop(ctx)
logger.Debugf("translated proxy %s to xds in %s", metaKey, duration.String())
}()

// Reports used to aggregate results from xds and extension translation.
// Will contain reports only `Gloo` components (i.e. Proxies, Upstreams, AuthConfigs, etc.)
Expand Down Expand Up @@ -94,7 +97,12 @@ func (s *ProxyTranslator) syncXdsAndStatus(
logger := contextutils.LoggerFrom(ctx)
logger.Infof("begin kube gw sync for proxy %s (%v listeners, %v clusters, %v routes, %v endpoints)",
proxyKey, len(snap.Listeners.Items), len(snap.Clusters.Items), len(snap.Routes.Items), len(snap.Endpoints.Items))
defer logger.Infof("end kube gw sync for proxy %s", proxyKey)
stopwatch := statsutils.NewTranslatorStopWatch("sync-xds-and-gloo-status")
stopwatch.Start()
defer func() {
duration := stopwatch.Stop(ctx)
logger.Infof("end kube gw sync for proxy %s in %s", proxyKey, duration.String())
}()

// stringifying the snapshot may be an expensive operation, so we'd like to avoid building the large
// string if we're not even going to log it anyway
Expand Down
29 changes: 19 additions & 10 deletions projects/gateway2/proxy_syncer/proxy_syncer.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ func (p proxyList) Equals(in proxyList) bool {
}

func (s *ProxySyncer) Init(ctx context.Context) error {
ctx = contextutils.WithLogger(ctx, "k8s-gw-syncer")
ctx = contextutils.WithLogger(ctx, "k8s-gw-proxy-syncer")
logger := contextutils.LoggerFrom(ctx)

// TODO: handle cfgmap noisiness? (https://github.com/solo-io/gloo/blob/main/projects/gloo/pkg/api/converters/kube/artifact_converter.go#L31)
Expand Down Expand Up @@ -285,9 +285,11 @@ func (s *ProxySyncer) Init(ctx context.Context) error {
// helper collection to map from the runtime.Object Upstream representation to the gloov1.Upstream wrapper
glooUpstreams := krt.NewCollection(upstreams, func(kctx krt.HandlerContext, u *glookubev1.Upstream) *UpstreamWrapper {
glooUs := &u.Spec
glooUs.Metadata = &core.Metadata{}
glooUs.GetMetadata().Name = u.GetName()
glooUs.GetMetadata().Namespace = u.GetNamespace()
md := core.Metadata{
Name: u.GetName(),
Namespace: u.GetNamespace(),
}
glooUs.SetMetadata(&md)
us := &UpstreamWrapper{Inner: glooUs}
return us
}, krt.WithName("GlooUpstreams"))
Expand Down Expand Up @@ -322,13 +324,14 @@ func (s *ProxySyncer) Init(ctx context.Context) error {
s.proxyTrigger = krt.NewRecomputeTrigger(true)

glooProxies := krt.NewCollection(kubeGateways, func(kctx krt.HandlerContext, gw *gwv1.Gateway) *glooProxy {
logger.Debugf("building proxy for kube gw %s version %s", client.ObjectKeyFromObject(gw), gw.GetResourceVersion())
s.proxyTrigger.MarkDependant(kctx)
proxy := s.buildProxy(ctx, gw)
return proxy
})
s.xdsSnapshots = krt.NewCollection(glooProxies, func(kctx krt.HandlerContext, proxy glooProxy) *xdsSnapWrapper {
// we are recomputing xds snapshots as proxies have changed, signal that we need to sync xds with these new snapshots
xdsSnap := s.buildXdsSnapshot(
xdsSnap := s.translateProxy(
ctx,
kctx,
logger,
Expand Down Expand Up @@ -521,7 +524,7 @@ func (s *ProxySyncer) buildProxy(ctx context.Context, gw *gwv1.Gateway) *glooPro
}
}

func (s *ProxySyncer) buildXdsSnapshot(
func (s *ProxySyncer) translateProxy(
ctx context.Context,
kctx krt.HandlerContext,
logger *zap.SugaredLogger,
Expand All @@ -533,7 +536,6 @@ func (s *ProxySyncer) buildXdsSnapshot(
authConfigs krt.Collection[*extauthkubev1.AuthConfig],
rlConfigs krt.Collection[*rlkubev1a1.RateLimitConfig],
) *xdsSnapWrapper {
// TODO: add stopwatch with debug log
cfgmaps := krt.Fetch(kctx, kcm)
endpoints := krt.Fetch(kctx, kep)
secrets := krt.Fetch(kctx, ks)
Expand All @@ -551,7 +553,14 @@ func (s *ProxySyncer) buildXdsSnapshot(

acfgs := make([]*extauthv1.AuthConfig, 0, len(authcfgs))
for _, kac := range authcfgs {
acfgs = append(acfgs, &kac.Spec)
gac := &kac.Spec
// only setting Name & Namespace, all we need initially; alternatively, see kubeutils.FromKubeMeta(...)
md := core.Metadata{
Name: kac.GetName(),
Namespace: kac.GetNamespace(),
}
gac.SetMetadata(&md)
acfgs = append(acfgs, gac)
}
latestSnap.AuthConfigs = acfgs

Expand Down Expand Up @@ -721,7 +730,7 @@ func (s *ProxySyncer) syncRouteStatus(ctx context.Context, rm reports.ReportMap)
// Sometimes the List returns stale (cached) httproutes, causing the status update to fail
// with "the object has been modified" errors. Therefore we try the status updates in a retry loop.
err := retry.Do(func() error {
for rnn, _ := range rm.Routes {
for rnn := range rm.Routes {
route := gwv1.HTTPRoute{}
err := s.mgr.GetClient().Get(ctx, rnn, &route)
if err != nil {
Expand Down Expand Up @@ -761,7 +770,7 @@ func (s *ProxySyncer) syncGatewayStatus(ctx context.Context, rm reports.ReportMa

// TODO: retry within loop per GW rathen that as a full block
err := retry.Do(func() error {
for gwnn, _ := range rm.Gateways {
for gwnn := range rm.Gateways {
gw := gwv1.Gateway{}
err := s.mgr.GetClient().Get(ctx, gwnn, &gw)
if err != nil {
Expand Down
7 changes: 6 additions & 1 deletion projects/gateway2/translator/gateway_translator.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

"github.com/solo-io/gloo/pkg/utils/statsutils"
"github.com/solo-io/gloo/projects/gateway2/translator/plugins/registry"
"github.com/solo-io/go-utils/contextutils"

"github.com/solo-io/gloo/projects/gateway2/query"
"github.com/solo-io/gloo/projects/gateway2/reports"
Expand All @@ -14,6 +15,7 @@ import (
"github.com/solo-io/gloo/projects/gloo/pkg/utils"
"github.com/solo-io/solo-kit/pkg/api/v1/resources/core"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
gwv1 "sigs.k8s.io/gateway-api/apis/v1"
)

Expand Down Expand Up @@ -52,9 +54,12 @@ func (t *translator) TranslateProxy(
stopwatch.Start()
defer stopwatch.Stop(ctx)

ctx = contextutils.WithLogger(ctx, "k8s-gateway-translator")
logger := contextutils.LoggerFrom(ctx)
routesForGw, err := t.queries.GetRoutesForGateway(ctx, gateway)
if err != nil {
// TODO(ilackarms): fill in the specific error / validation
logger.Errorf("err getting routes for gw '%s': %v", client.ObjectKeyFromObject(gateway), err)
// TODO: decide how/if to report this error on Gateway
// reporter.Gateway(gateway).Err(err.Error())
return nil
}
Expand Down
2 changes: 2 additions & 0 deletions test/kube2e/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ func EventuallyReachesConsistentState(installNamespace string) {
}

// Gloo components are configured to log to the Info level by default
// Note that for new style e2e tests (i.e. package `test/kubernetes`) we run them with debug logging
// so failures can be appropriately triaged after the fact
logLevelAssertion := assertions.LogLevelAssertion(zapcore.InfoLevel)

// The emitter at some point should stabilize and not continue to increase the number of snapshots produced
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ gateway:

# Configuration for the Gloo pod
gloo:
# we need to be able to troubleshoot failures using logs; when users encounter problems enabling debug logging is
# a very early step in the troubleshooting process
logLevel: debug
deployment:
livenessProbeEnabled: true
customEnv:
Expand Down
5 changes: 3 additions & 2 deletions test/kubernetes/testutils/assertions/deployments.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ func (p *Provider) EventuallyGlooReachesConsistentState(installNamespace string)
TargetPort: stats.DefaultPort,
}

// Gloo components are configured to log to the Info level by default
logLevelAssertion := assertions.LogLevelAssertion(zapcore.InfoLevel)
// Gloo components are configured to log to the Info level by default but for these e2e tests we explicitly enable debug logging
// This is done so we can triage failures after the fact and matches the process users will take to troubleshoot
logLevelAssertion := assertions.LogLevelAssertion(zapcore.DebugLevel)

// The emitter at some point should stabilize and not continue to increase the number of snapshots produced
// We choose 4 here as a bit of a magic number, but we feel comfortable that if 4 consecutive polls of the metrics
Expand Down

0 comments on commit 227f197

Please sign in to comment.