From 34a27586ffe9f01df245e271ee2577b0a0288ee0 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Tue, 3 Sep 2024 12:50:18 -0500 Subject: [PATCH 1/7] fix: Debug logging the cache --- api/v1/zz_generated.deepcopy.go | 1 - .../apps.wandb.com_weightsandbiases.yaml | 19 ++++++++++++------- controllers/weightsandbiases_controller.go | 13 +++++++------ 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 0aed5c3..b61fc2b 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -1,5 +1,4 @@ //go:build !ignore_autogenerated -// +build !ignore_autogenerated /* Copyright 2023. diff --git a/config/crd/bases/apps.wandb.com_weightsandbiases.yaml b/config/crd/bases/apps.wandb.com_weightsandbiases.yaml index f22633c..1c5fbed 100644 --- a/config/crd/bases/apps.wandb.com_weightsandbiases.yaml +++ b/config/crd/bases/apps.wandb.com_weightsandbiases.yaml @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: annotations: - controller-gen.kubebuilder.io/version: v0.12.0 + controller-gen.kubebuilder.io/version: v0.14.0 name: weightsandbiases.apps.wandb.com spec: group: apps.wandb.com @@ -22,14 +22,19 @@ spec: description: WeightsAndBiases is the Schema for the weightsandbiases API properties: apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources type: string kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds type: string metadata: type: object diff --git a/controllers/weightsandbiases_controller.go b/controllers/weightsandbiases_controller.go index 3cbcf1f..ef76e47 100644 --- a/controllers/weightsandbiases_controller.go +++ b/controllers/weightsandbiases_controller.go @@ -18,10 +18,11 @@ package controllers import ( "context" + "reflect" + "github.com/wandb/operator/pkg/wandb/spec/state" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" - "reflect" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" @@ -113,7 +114,7 @@ func (r *WeightsAndBiasesReconciler) Reconcile(ctx context.Context, req ctrl.Req currentActiveSpec, err := specManager.GetActive() if err != nil { - // This scenario can happen if we have not successfully deploy in the + // This scenario can happen if we have not successfully deployed in the // past. log.Info("No active spec found.") } @@ -126,16 +127,16 @@ func (r *WeightsAndBiasesReconciler) Reconcile(ctx context.Context, req ctrl.Req deployerSpec, err = r.DeployerClient.GetSpec(license, currentActiveSpec) if err != nil { log.Info("Failed to get spec from deployer", "error", err) - // This scenario may occur if the user disables networking, or if the deployer - // is not operational, and a version has been deployed successfully. Rather than - // reverting to the container defaults, we've stored the most recent successful - // deployer release in the cache + // Attempt to retrieve the cached release if deployerSpec, err = specManager.Get("latest-cached-release"); err != nil { log.Error(err, "No cached release found for deployer spec", err, "error") + } else { + log.Info("Using cached deployer spec", "cachedSpec", deployerSpec) } } if deployerSpec != nil { + log.Info("Writing deployer spec to cache", "cachedSpec", deployerSpec) if err := specManager.Set("latest-cached-release", deployerSpec); err != nil { r.Recorder.Event(wandb, corev1.EventTypeNormal, "SecretWriteFailed", "Unable to write secret to kubernetes") log.Error(err, "Unable to save latest release.") From d8bc73b66b406721526ce4cd05ea1da5ddcaa5e3 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Tue, 3 Sep 2024 12:54:12 -0500 Subject: [PATCH 2/7] comment --- controllers/weightsandbiases_controller.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/controllers/weightsandbiases_controller.go b/controllers/weightsandbiases_controller.go index ef76e47..8a072d4 100644 --- a/controllers/weightsandbiases_controller.go +++ b/controllers/weightsandbiases_controller.go @@ -127,7 +127,11 @@ func (r *WeightsAndBiasesReconciler) Reconcile(ctx context.Context, req ctrl.Req deployerSpec, err = r.DeployerClient.GetSpec(license, currentActiveSpec) if err != nil { log.Info("Failed to get spec from deployer", "error", err) - // Attempt to retrieve the cached release + // This scenario may occur if the user disables networking, or if the deployer + // is not operational, and a version has been deployed successfully. Rather than + // reverting to the container defaults, we've stored the most recent successful + // deployer release in the cache + // Attempt to retrieve the cached release if deployerSpec, err = specManager.Get("latest-cached-release"); err != nil { log.Error(err, "No cached release found for deployer spec", err, "error") } else { From f4103462c8360ef5b2ab92e10885efa405e9687e Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Tue, 3 Sep 2024 13:41:18 -0500 Subject: [PATCH 3/7] debug mode --- controllers/weightsandbiases_controller.go | 23 +++++++++++++++++++++- main.go | 10 +++++++--- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/controllers/weightsandbiases_controller.go b/controllers/weightsandbiases_controller.go index 8a072d4..eea1b57 100644 --- a/controllers/weightsandbiases_controller.go +++ b/controllers/weightsandbiases_controller.go @@ -55,6 +55,7 @@ type WeightsAndBiasesReconciler struct { Scheme *runtime.Scheme Recorder record.EventRecorder DryRun bool + Debug bool } //+kubebuilder:rbac:groups=apps.wandb.com,resources=weightsandbiases,verbs=get;list;watch;create;update;patch;delete @@ -131,7 +132,7 @@ func (r *WeightsAndBiasesReconciler) Reconcile(ctx context.Context, req ctrl.Req // is not operational, and a version has been deployed successfully. Rather than // reverting to the container defaults, we've stored the most recent successful // deployer release in the cache - // Attempt to retrieve the cached release + // Attempt to retrieve the cached release if deployerSpec, err = specManager.Get("latest-cached-release"); err != nil { log.Error(err, "No cached release found for deployer spec", err, "error") } else { @@ -150,11 +151,31 @@ func (r *WeightsAndBiasesReconciler) Reconcile(ctx context.Context, req ctrl.Req } desiredSpec := new(spec.Spec) + + if r.Debug { + log.Info("Initial desired spec", "spec", desiredSpec.SensitiveValuesMasked()) + } + // First takes precedence desiredSpec.Merge(crdSpec) + if r.Debug { + log.Info("Desired spec after merging crdSpec", "spec", desiredSpec.SensitiveValuesMasked()) + } + desiredSpec.Merge(userInputSpec) + if r.Debug { + log.Info("Desired spec after merging userInputSpec", "spec", desiredSpec.SensitiveValuesMasked()) + } + desiredSpec.Merge(deployerSpec) + if r.Debug { + log.Info("Desired spec after merging deployerSpec", "spec", desiredSpec.SensitiveValuesMasked()) + } + desiredSpec.Merge(operator.Defaults(wandb, r.Scheme)) + if r.Debug { + log.Info("Desired spec after merging operator defaults", "spec", desiredSpec.SensitiveValuesMasked()) + } log.Info("Desired spec", "spec", desiredSpec.SensitiveValuesMasked()) diff --git a/main.go b/main.go index 67bd760..0dadd35 100644 --- a/main.go +++ b/main.go @@ -19,12 +19,14 @@ package main import ( "flag" "fmt" - "github.com/wandb/operator/pkg/wandb/spec/channel/deployer" "os" - "sigs.k8s.io/controller-runtime/pkg/cache" + "github.com/wandb/operator/pkg/wandb/spec/channel/deployer" + "strings" + "sigs.k8s.io/controller-runtime/pkg/cache" + // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) // to ensure that exec-entrypoint and run can make use of them. @@ -56,13 +58,14 @@ func init() { func main() { var metricsAddr, deployerAPI, probeAddr, isolationNamespaces string - var enableLeaderElection, airgapped bool + var enableLeaderElection, airgapped, debug bool flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") flag.StringVar(&deployerAPI, "deployer-channel-url", "", "URL of the deployer channel") flag.BoolVar(&airgapped, "airgapped", false, "Enable airgapped mode") + flag.BoolVar(&debug, "debug", false, "Enable debug mode") flag.BoolVar(&enableLeaderElection, "leader-elect", false, "Enable leader election for controller manager. "+ "Enabling this will ensure there is only one active controller manager.") @@ -117,6 +120,7 @@ func main() { Client: mgr.GetClient(), Scheme: mgr.GetScheme(), DeployerClient: &deployer.DeployerClient{DeployerChannelUrl: deployerAPI}, + Debug: debug, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "WeightsAndBiases") os.Exit(1) From e0631b0f9424d2174de7fc53c5feaa07faec2cf4 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Tue, 3 Sep 2024 14:11:47 -0500 Subject: [PATCH 4/7] fix: Debug logic --- controllers/weightsandbiases_controller.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/controllers/weightsandbiases_controller.go b/controllers/weightsandbiases_controller.go index eea1b57..afc08a0 100644 --- a/controllers/weightsandbiases_controller.go +++ b/controllers/weightsandbiases_controller.go @@ -135,13 +135,16 @@ func (r *WeightsAndBiasesReconciler) Reconcile(ctx context.Context, req ctrl.Req // Attempt to retrieve the cached release if deployerSpec, err = specManager.Get("latest-cached-release"); err != nil { log.Error(err, "No cached release found for deployer spec", err, "error") - } else { - log.Info("Using cached deployer spec", "cachedSpec", deployerSpec) + } + if r.Debug { + log.Info("Using cached deployer spec", "spec", deployerSpec) } } if deployerSpec != nil { - log.Info("Writing deployer spec to cache", "cachedSpec", deployerSpec) + if r.Debug { + log.Info("Writing deployer spec to cache", "spec", deployerSpec) + } if err := specManager.Set("latest-cached-release", deployerSpec); err != nil { r.Recorder.Event(wandb, corev1.EventTypeNormal, "SecretWriteFailed", "Unable to write secret to kubernetes") log.Error(err, "Unable to save latest release.") @@ -228,7 +231,9 @@ func (r *WeightsAndBiasesReconciler) Reconcile(ctx context.Context, req ctrl.Req statusManager.Set(status.InvalidConfig) return ctrlqueue.Requeue(desiredSpec) } - log.Info("Successfully saved active spec") + if r.Debug { + log.Info("Successfully saved active spec", "spec", desiredSpec.SensitiveValuesMasked()) + } r.Recorder.Event(wandb, corev1.EventTypeNormal, "Completed", "Completed reconcile successfully") statusManager.Set(status.Completed) From 128ff7f9c96e83fb87f3394f00bc4cfc1f62ddda Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Tue, 3 Sep 2024 14:15:44 -0500 Subject: [PATCH 5/7] more debug work --- controllers/weightsandbiases_controller.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/controllers/weightsandbiases_controller.go b/controllers/weightsandbiases_controller.go index afc08a0..b496e4c 100644 --- a/controllers/weightsandbiases_controller.go +++ b/controllers/weightsandbiases_controller.go @@ -137,13 +137,13 @@ func (r *WeightsAndBiasesReconciler) Reconcile(ctx context.Context, req ctrl.Req log.Error(err, "No cached release found for deployer spec", err, "error") } if r.Debug { - log.Info("Using cached deployer spec", "spec", deployerSpec) + log.Info("Using cached deployer spec", "spec", deployerSpec.SensitiveValuesMasked()) } } if deployerSpec != nil { if r.Debug { - log.Info("Writing deployer spec to cache", "spec", deployerSpec) + log.Info("Writing deployer spec to cache", "spec", deployerSpec.SensitiveValuesMasked()) } if err := specManager.Set("latest-cached-release", deployerSpec); err != nil { r.Recorder.Event(wandb, corev1.EventTypeNormal, "SecretWriteFailed", "Unable to write secret to kubernetes") @@ -223,7 +223,7 @@ func (r *WeightsAndBiasesReconciler) Reconcile(ctx context.Context, req ctrl.Req return ctrlqueue.Requeue(desiredSpec) } } - log.Info("Successfully applied spec", "spec", desiredSpec) + log.Info("Successfully applied spec", "spec", desiredSpec.SensitiveValuesMasked()) if err := specManager.SetActive(desiredSpec); err != nil { r.Recorder.Event(wandb, corev1.EventTypeNormal, "SetActiveFailed", "Failed to save active state") From 4d8aa026be339a8ff127b626befc4aa4e7790499 Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Tue, 3 Sep 2024 14:57:54 -0500 Subject: [PATCH 6/7] fix: diff --- controllers/weightsandbiases_controller.go | 3 ++ pkg/wandb/spec/spec.go | 35 ++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/controllers/weightsandbiases_controller.go b/controllers/weightsandbiases_controller.go index b496e4c..f95d608 100644 --- a/controllers/weightsandbiases_controller.go +++ b/controllers/weightsandbiases_controller.go @@ -191,6 +191,9 @@ func (r *WeightsAndBiasesReconciler) Reconcile(ctx context.Context, req ctrl.Req statusManager.Set(status.Completed) return ctrlqueue.Requeue(desiredSpec) } + if r.Debug { + log.Info("Specs are not equal", "diff", currentActiveSpec.DiffValues(desiredSpec)) + } } if desiredSpec.Chart == nil { diff --git a/pkg/wandb/spec/spec.go b/pkg/wandb/spec/spec.go index ca66ca6..e31bc6a 100644 --- a/pkg/wandb/spec/spec.go +++ b/pkg/wandb/spec/spec.go @@ -88,6 +88,41 @@ func (s *Spec) IsEqual(spec *Spec) bool { return isReleaseEqual && isValuesEqual && isMetadataEqual } +func (s *Spec) DiffValues(other *Spec) map[string]interface{} { + return diffMaps(s.Values, other.Values) +} + +func diffMaps(a, b map[string]interface{}) map[string]interface{} { + diff := make(map[string]interface{}) + for key, aValue := range a { + if bValue, ok := b[key]; ok { + if !reflect.DeepEqual(aValue, bValue) { + switch aValue.(type) { + case map[string]interface{}: + if bMap, ok := bValue.(map[string]interface{}); ok { + nestedDiff := diffMaps(aValue.(map[string]interface{}), bMap) + if len(nestedDiff) > 0 { + diff[key] = nestedDiff + } + } else { + diff[key] = aValue + } + default: + diff[key] = aValue + } + } + } else { + diff[key] = aValue + } + } + for key, bValue := range b { + if _, ok := a[key]; !ok { + diff[key] = bValue + } + } + return diff +} + func (s *Spec) mergeConfig(values Values) (err error) { if s.Values == nil { s.Values = values From 542e5c37cc75ad5b5eda22260c6a4b2551a8e19d Mon Sep 17 00:00:00 2001 From: Zachary Blasczyk Date: Tue, 3 Sep 2024 15:04:21 -0500 Subject: [PATCH 7/7] fix: Behind the debug flag, order --- controllers/weightsandbiases_controller.go | 6 ++-- pkg/wandb/spec/spec.go | 35 ---------------------- 2 files changed, 2 insertions(+), 39 deletions(-) diff --git a/controllers/weightsandbiases_controller.go b/controllers/weightsandbiases_controller.go index 5127aec..5f5fe46 100644 --- a/controllers/weightsandbiases_controller.go +++ b/controllers/weightsandbiases_controller.go @@ -190,12 +190,10 @@ func (r *WeightsAndBiasesReconciler) Reconcile(ctx context.Context, req ctrl.Req log.Info("No changes found") statusManager.Set(status.Completed) return ctrlqueue.Requeue(desiredSpec) - } else { - diff := currentActiveSpec.DiffValues(desiredSpec) - log.Info("Changes found", "diff", diff) } if r.Debug { - log.Info("Specs are not equal", "diff", currentActiveSpec.DiffValues(desiredSpec)) + diff := currentActiveSpec.DiffValues(desiredSpec) + log.Info("Specs are not equal", "diff", diff) } } diff --git a/pkg/wandb/spec/spec.go b/pkg/wandb/spec/spec.go index 0b942e7..e31bc6a 100644 --- a/pkg/wandb/spec/spec.go +++ b/pkg/wandb/spec/spec.go @@ -190,38 +190,3 @@ func maskValues(values map[string]interface{}) map[string]interface{} { } return newValues } - -func (s *Spec) DiffValues(other *Spec) map[string]interface{} { - return diffMaps(s.Values, other.Values) -} - -func diffMaps(a, b map[string]interface{}) map[string]interface{} { - diff := make(map[string]interface{}) - for key, aValue := range a { - if bValue, ok := b[key]; ok { - if !reflect.DeepEqual(aValue, bValue) { - switch aValue.(type) { - case map[string]interface{}: - if bMap, ok := bValue.(map[string]interface{}); ok { - nestedDiff := diffMaps(aValue.(map[string]interface{}), bMap) - if len(nestedDiff) > 0 { - diff[key] = nestedDiff - } - } else { - diff[key] = aValue - } - default: - diff[key] = aValue - } - } - } else { - diff[key] = aValue - } - } - for key, bValue := range b { - if _, ok := a[key]; !ok { - diff[key] = bValue - } - } - return diff -}