From 9ee434394776ea1ef15c9768e2e6cd2afb597e75 Mon Sep 17 00:00:00 2001 From: Mike Dame Date: Wed, 11 Dec 2024 09:52:32 -0500 Subject: [PATCH] Reload Collectors without restart when ConfigMap changes (#1903) Kubernetes automatically updates mounted configmaps in a pod (see https://kubernetes.io/docs/tutorials/configuration/updating-configuration-via-a-configmap/). But, the workload needs to watch for the update itself. So in our case, if the collector knows to watch for file changes, it can automatically signal a dynamic config reload without restarting the collector. This forks the default fileprovider in our collectors to create a new odigosfileprovider that uses fsnotify to watch for updates to the collector ConfigMap. When the ConfigMap is updated, the new fileprovider will signal the collector to hot-reload its config. Technically, we watch for `fsnotify.Remove` events, because the projected configmap data is a symlink, not an actual copy of the configmap (meaning that watching `fsnotify.Write` doesn't trigger any update). This means we don't need to restart the collector deployments or daemonsets for basic config updates, so those controllers have been updated to no longer update deployments when just the configmap has changed. They can of course still be manually redeployed with `kubectl`. In my manual testing, it took about 1 minute for the change to be reflected, which is due to the default kubelet sync period (https://kubernetes.io/docs/tasks/configure-pod-container/configure-pod-configmap/#mounted-configmaps-are-updated-automatically). Not sure how we could test for this automatically but working on that --------- Co-authored-by: Ben Elferink --- .../controllers/datacollection/configmap.go | 15 +- .../controllers/datacollection/daemonset.go | 7 +- autoscaler/controllers/datacollection/root.go | 2 +- autoscaler/controllers/gateway/configmap.go | 16 +- autoscaler/controllers/gateway/deployment.go | 6 +- autoscaler/controllers/gateway/root.go | 4 +- collector/builder-config.yaml | 7 +- collector/odigosotelcol/go.mod | 9 +- collector/odigosotelcol/go.sum | 6 +- collector/odigosotelcol/main.go | 10 +- collector/providers/odigosfileprovider/go.mod | 26 +++ collector/providers/odigosfileprovider/go.sum | 47 ++++++ .../providers/odigosfileprovider/provider.go | 148 ++++++++++++++++++ .../03-assert-action-created.yaml | 86 ++++++++++ .../workload-lifecycle/03-create-action.yaml | 14 ++ .../workload-lifecycle/03-wait-for-trace.yaml | 7 + .../e2e/workload-lifecycle/chainsaw-test.yaml | 65 ++++++++ 17 files changed, 430 insertions(+), 45 deletions(-) create mode 100644 collector/providers/odigosfileprovider/go.mod create mode 100644 collector/providers/odigosfileprovider/go.sum create mode 100644 collector/providers/odigosfileprovider/provider.go create mode 100644 tests/e2e/workload-lifecycle/03-assert-action-created.yaml create mode 100644 tests/e2e/workload-lifecycle/03-create-action.yaml create mode 100644 tests/e2e/workload-lifecycle/03-wait-for-trace.yaml diff --git a/autoscaler/controllers/datacollection/configmap.go b/autoscaler/controllers/datacollection/configmap.go index aab745125c..060f817f2b 100644 --- a/autoscaler/controllers/datacollection/configmap.go +++ b/autoscaler/controllers/datacollection/configmap.go @@ -30,7 +30,7 @@ import ( func SyncConfigMap(apps *odigosv1.InstrumentedApplicationList, dests *odigosv1.DestinationList, allProcessors *odigosv1.ProcessorList, datacollection *odigosv1.CollectorsGroup, ctx context.Context, - c client.Client, scheme *runtime.Scheme, disableNameProcessor bool) (string, error) { + c client.Client, scheme *runtime.Scheme, disableNameProcessor bool) error { logger := log.FromContext(ctx) processors := commonconf.FilterAndSortProcessorsByOrderHint(allProcessors, odigosv1.CollectorsGroupRoleNodeCollector) @@ -42,9 +42,8 @@ func SyncConfigMap(apps *odigosv1.InstrumentedApplicationList, dests *odigosv1.D desired, err := getDesiredConfigMap(apps, dests, processors, datacollection, scheme, setTracesLoadBalancer, disableNameProcessor) if err != nil { logger.Error(err, "failed to get desired config map") - return "", err + return err } - desiredData := desired.Data[constsK8s.OdigosNodeCollectorConfigMapKey] existing := &v1.ConfigMap{} if err := c.Get(ctx, client.ObjectKey{Namespace: datacollection.Namespace, Name: datacollection.Name}, existing); err != nil { @@ -53,12 +52,12 @@ func SyncConfigMap(apps *odigosv1.InstrumentedApplicationList, dests *odigosv1.D _, err := createConfigMap(desired, ctx, c) if err != nil { logger.Error(err, "failed to create config map") - return "", err + return err } - return desiredData, nil + return nil } else { logger.Error(err, "failed to get config map") - return "", err + return err } } @@ -66,10 +65,10 @@ func SyncConfigMap(apps *odigosv1.InstrumentedApplicationList, dests *odigosv1.D _, err = patchConfigMap(ctx, existing, desired, c) if err != nil { logger.Error(err, "failed to patch config map") - return "", err + return err } - return desiredData, nil + return nil } func patchConfigMap(ctx context.Context, existing *v1.ConfigMap, desired *v1.ConfigMap, c client.Client) (*v1.ConfigMap, error) { diff --git a/autoscaler/controllers/datacollection/daemonset.go b/autoscaler/controllers/datacollection/daemonset.go index dbfc70571a..7049263d87 100644 --- a/autoscaler/controllers/datacollection/daemonset.go +++ b/autoscaler/controllers/datacollection/daemonset.go @@ -113,7 +113,7 @@ func syncDaemonSet(ctx context.Context, dests *odigosv1.DestinationList, datacol logger.Error(err, "Failed to get signals from otelcol config") return nil, err } - desiredDs, err := getDesiredDaemonSet(datacollection, otelcolConfigContent, scheme, imagePullSecrets, odigosVersion, k8sVersion, odigletDaemonsetPodSpec) + desiredDs, err := getDesiredDaemonSet(datacollection, scheme, imagePullSecrets, odigosVersion, k8sVersion, odigletDaemonsetPodSpec) if err != nil { logger.Error(err, "Failed to get desired DaemonSet") return nil, err @@ -169,7 +169,7 @@ func getOdigletDaemonsetPodSpec(ctx context.Context, c client.Client, namespace return &odigletDaemonset.Spec.Template.Spec, nil } -func getDesiredDaemonSet(datacollection *odigosv1.CollectorsGroup, configData string, +func getDesiredDaemonSet(datacollection *odigosv1.CollectorsGroup, scheme *runtime.Scheme, imagePullSecrets []string, odigosVersion string, k8sVersion *version.Version, odigletDaemonsetPodSpec *corev1.PodSpec, ) (*appsv1.DaemonSet, error) { @@ -212,9 +212,6 @@ func getDesiredDaemonSet(datacollection *odigosv1.CollectorsGroup, configData st Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Labels: NodeCollectorsLabels, - Annotations: map[string]string{ - configHashAnnotation: common.Sha256Hash(configData), - }, }, Spec: corev1.PodSpec{ NodeSelector: odigletDaemonsetPodSpec.NodeSelector, diff --git a/autoscaler/controllers/datacollection/root.go b/autoscaler/controllers/datacollection/root.go index 80a6f11def..14a5dffdf1 100644 --- a/autoscaler/controllers/datacollection/root.go +++ b/autoscaler/controllers/datacollection/root.go @@ -61,7 +61,7 @@ func syncDataCollection(instApps *odigosv1.InstrumentedApplicationList, dests *o logger := log.FromContext(ctx) logger.V(0).Info("Syncing data collection") - _, err := SyncConfigMap(instApps, dests, processors, dataCollection, ctx, c, scheme, disableNameProcessor) + err := SyncConfigMap(instApps, dests, processors, dataCollection, ctx, c, scheme, disableNameProcessor) if err != nil { logger.Error(err, "Failed to sync config map") return err diff --git a/autoscaler/controllers/gateway/configmap.go b/autoscaler/controllers/gateway/configmap.go index 1a5ca18d5c..c94c903279 100644 --- a/autoscaler/controllers/gateway/configmap.go +++ b/autoscaler/controllers/gateway/configmap.go @@ -111,7 +111,7 @@ func addSelfTelemetryPipeline(c *config.Config, ownTelemetryPort int32) error { return nil } -func syncConfigMap(dests *odigosv1.DestinationList, allProcessors *odigosv1.ProcessorList, gateway *odigosv1.CollectorsGroup, ctx context.Context, c client.Client, scheme *runtime.Scheme) (string, []odigoscommon.ObservabilitySignal, error) { +func syncConfigMap(dests *odigosv1.DestinationList, allProcessors *odigosv1.ProcessorList, gateway *odigosv1.CollectorsGroup, ctx context.Context, c client.Client, scheme *runtime.Scheme) ([]odigoscommon.ObservabilitySignal, error) { logger := log.FromContext(ctx) memoryLimiterConfiguration := common.GetMemoryLimiterConfig(gateway.Spec.ResourcesSettings) @@ -127,7 +127,7 @@ func syncConfigMap(dests *odigosv1.DestinationList, allProcessors *odigosv1.Proc ) if err != nil { logger.Error(err, "Failed to calculate config") - return "", nil, err + return nil, err } for destName, destErr := range status.Destination { @@ -170,7 +170,7 @@ func syncConfigMap(dests *odigosv1.DestinationList, allProcessors *odigosv1.Proc if err := ctrl.SetControllerReference(gateway, desiredCM, scheme); err != nil { logger.Error(err, "Failed to set controller reference") - return "", nil, err + return nil, err } existing := &v1.ConfigMap{} @@ -180,12 +180,12 @@ func syncConfigMap(dests *odigosv1.DestinationList, allProcessors *odigosv1.Proc _, err := createConfigMap(desiredCM, ctx, c) if err != nil { logger.Error(err, "Failed to create gateway config map") - return "", nil, err + return nil, err } - return desiredData, signals, nil + return signals, nil } else { logger.Error(err, "Failed to get gateway config map") - return "", nil, err + return nil, err } } @@ -193,10 +193,10 @@ func syncConfigMap(dests *odigosv1.DestinationList, allProcessors *odigosv1.Proc _, err = patchConfigMap(existing, desiredCM, ctx, c) if err != nil { logger.Error(err, "Failed to patch gateway config map") - return "", nil, err + return nil, err } - return desiredData, signals, nil + return signals, nil } func createConfigMap(desired *v1.ConfigMap, ctx context.Context, c client.Client) (*v1.ConfigMap, error) { diff --git a/autoscaler/controllers/gateway/deployment.go b/autoscaler/controllers/gateway/deployment.go index 207d7ae65c..002ec1b5c9 100644 --- a/autoscaler/controllers/gateway/deployment.go +++ b/autoscaler/controllers/gateway/deployment.go @@ -33,7 +33,7 @@ const ( configHashAnnotation = "odigos.io/config-hash" ) -func syncDeployment(dests *odigosv1.DestinationList, gateway *odigosv1.CollectorsGroup, configData string, +func syncDeployment(dests *odigosv1.DestinationList, gateway *odigosv1.CollectorsGroup, ctx context.Context, c client.Client, scheme *runtime.Scheme, imagePullSecrets []string, odigosVersion string) (*appsv1.Deployment, error) { logger := log.FromContext(ctx) @@ -42,8 +42,8 @@ func syncDeployment(dests *odigosv1.DestinationList, gateway *odigosv1.Collector return nil, errors.Join(err, errors.New("failed to get secrets hash")) } - // Calculate the hash of the config data and the secrets version hash, this is used to make sure the gateway will restart when the config changes - configDataHash := common.Sha256Hash(fmt.Sprintf("%s-%s", configData, secretsVersionHash)) + // Use the hash of the secrets to make sure the gateway will restart when the secrets (mounted as environment variables) changes + configDataHash := common.Sha256Hash(secretsVersionHash) desiredDeployment, err := getDesiredDeployment(dests, configDataHash, gateway, scheme, imagePullSecrets, odigosVersion) if err != nil { return nil, errors.Join(err, errors.New("failed to get desired deployment")) diff --git a/autoscaler/controllers/gateway/root.go b/autoscaler/controllers/gateway/root.go index 552296b40a..1869e35fe5 100644 --- a/autoscaler/controllers/gateway/root.go +++ b/autoscaler/controllers/gateway/root.go @@ -64,7 +64,7 @@ func syncGateway(dests *odigosv1.DestinationList, processors *odigosv1.Processor logger := log.FromContext(ctx) logger.V(0).Info("Syncing gateway") - configData, signals, err := syncConfigMap(dests, processors, gateway, ctx, c, scheme) + signals, err := syncConfigMap(dests, processors, gateway, ctx, c, scheme) if err != nil { logger.Error(err, "Failed to sync config map") return err @@ -82,7 +82,7 @@ func syncGateway(dests *odigosv1.DestinationList, processors *odigosv1.Processor return err } - _, err = syncDeployment(dests, gateway, configData, ctx, c, scheme, imagePullSecrets, odigosVersion) + _, err = syncDeployment(dests, gateway, ctx, c, scheme, imagePullSecrets, odigosVersion) if err != nil { logger.Error(err, "Failed to sync deployment") return err diff --git a/collector/builder-config.yaml b/collector/builder-config.yaml index 4f05e3da59..e3d7fd5384 100644 --- a/collector/builder-config.yaml +++ b/collector/builder-config.yaml @@ -106,6 +106,10 @@ connectors: - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/connector/servicegraphconnector v0.106.0 - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/connector/spanmetricsconnector v0.106.0 +providers: + - gomod: go.opentelemetry.io/collector/odigos/providers/odigosfileprovider v0.106.0 # fork default file provider for config reloading + - gomod: go.opentelemetry.io/collector/confmap/provider/envprovider v0.106.0 + replaces: - github.com/open-telemetry/opentelemetry-collector-contrib/odigos/processor/odigosresourcenameprocessor => ../processors/odigosresourcenameprocessor - github.com/open-telemetry/opentelemetry-collector-contrib/odigos/processor/odigossamplingprocessor => ../processors/odigossamplingprocessor @@ -113,4 +117,5 @@ replaces: - github.com/open-telemetry/opentelemetry-collector-contrib/odigos/processor/odigossqldboperationprocessor => ../processors/odigossqldboperationprocessor - github.com/open-telemetry/opentelemetry-collector-contrib/odigos/exporter/azureblobstorageexporter => ../exporters/azureblobstorageexporter - github.com/open-telemetry/opentelemetry-collector-contrib/odigos/exporter/googlecloudstorageexporter => ../exporters/googlecloudstorageexporter - - github.com/open-telemetry/opentelemetry-collector-contrib/odigos/processor/odigostrafficmetrics => ../processors/odigostrafficmetrics \ No newline at end of file + - github.com/open-telemetry/opentelemetry-collector-contrib/odigos/processor/odigostrafficmetrics => ../processors/odigostrafficmetrics + - go.opentelemetry.io/collector/odigos/providers/odigosfileprovider => ../providers/odigosfileprovider \ No newline at end of file diff --git a/collector/odigosotelcol/go.mod b/collector/odigosotelcol/go.mod index 0553e35e81..f84fc878b0 100644 --- a/collector/odigosotelcol/go.mod +++ b/collector/odigosotelcol/go.mod @@ -88,10 +88,6 @@ require ( go.opentelemetry.io/collector/confmap v0.106.0 go.opentelemetry.io/collector/confmap/converter/expandconverter v0.106.0 go.opentelemetry.io/collector/confmap/provider/envprovider v0.106.0 - go.opentelemetry.io/collector/confmap/provider/fileprovider v0.106.0 - go.opentelemetry.io/collector/confmap/provider/httpprovider v0.106.0 - go.opentelemetry.io/collector/confmap/provider/httpsprovider v0.106.0 - go.opentelemetry.io/collector/confmap/provider/yamlprovider v0.106.0 go.opentelemetry.io/collector/connector v0.106.0 go.opentelemetry.io/collector/connector/forwardconnector v0.106.0 go.opentelemetry.io/collector/exporter v0.106.0 @@ -102,6 +98,7 @@ require ( go.opentelemetry.io/collector/extension v0.106.0 go.opentelemetry.io/collector/extension/ballastextension v0.106.0 go.opentelemetry.io/collector/extension/zpagesextension v0.106.0 + go.opentelemetry.io/collector/odigos/providers/odigosfileprovider v0.106.0 go.opentelemetry.io/collector/otelcol v0.106.0 go.opentelemetry.io/collector/processor v0.106.0 go.opentelemetry.io/collector/processor/batchprocessor v0.106.0 @@ -288,7 +285,7 @@ require ( github.com/expr-lang/expr v1.16.9 // indirect github.com/fatih/color v1.16.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect - github.com/fsnotify/fsnotify v1.7.0 // indirect + github.com/fsnotify/fsnotify v1.8.0 // indirect github.com/getsentry/sentry-go v0.28.1 // indirect github.com/go-faster/city v1.0.1 // indirect github.com/go-faster/errors v0.7.1 // indirect @@ -606,4 +603,6 @@ replace github.com/open-telemetry/opentelemetry-collector-contrib/odigos/exporte replace github.com/open-telemetry/opentelemetry-collector-contrib/odigos/processor/odigostrafficmetrics => ../processors/odigostrafficmetrics +replace go.opentelemetry.io/collector/odigos/providers/odigosfileprovider => ../providers/odigosfileprovider + exclude github.com/knadh/koanf v1.5.0 diff --git a/collector/odigosotelcol/go.sum b/collector/odigosotelcol/go.sum index a9849107ef..d8037b65f8 100644 --- a/collector/odigosotelcol/go.sum +++ b/collector/odigosotelcol/go.sum @@ -557,8 +557,8 @@ github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHk github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= -github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= -github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M= +github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/getsentry/sentry-go v0.28.1 h1:zzaSm/vHmGllRM6Tpx1492r0YDzauArdBfkJRtY6P5k= github.com/getsentry/sentry-go v0.28.1/go.mod h1:1fQZ+7l7eeJ3wYi82q5Hg8GqAPgefRq+FP/QhafYVgg= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= @@ -1632,8 +1632,6 @@ go.opentelemetry.io/collector/confmap/provider/fileprovider v0.106.0 h1:dcVkkO67 go.opentelemetry.io/collector/confmap/provider/fileprovider v0.106.0/go.mod h1:9x/xMWlsGXMqD6hMReaY4efmYWBNMmbnoSnR0CDEsGM= go.opentelemetry.io/collector/confmap/provider/httpprovider v0.106.0 h1:0I7v8jz2IdzeWTWn9gHxFhiS39kU4qaGmmjN+bggV78= go.opentelemetry.io/collector/confmap/provider/httpprovider v0.106.0/go.mod h1:BHMNn6Xk8PpB3z/iYaYfinvYVNgiipbluOyqGCdc9Y8= -go.opentelemetry.io/collector/confmap/provider/httpsprovider v0.106.0 h1:tRsvkjfoziU+RomFT9A+6nYfK3nK0UWpjfCYONUMHoc= -go.opentelemetry.io/collector/confmap/provider/httpsprovider v0.106.0/go.mod h1:p4/tcZEOREkJU9se9l2YghKo12PxOx3IkSJSuT3W1SA= go.opentelemetry.io/collector/confmap/provider/yamlprovider v0.106.0 h1:pgEyIQsGJzODcDV96d/W6vQsbqtmZWS+J+5GT1aAHdA= go.opentelemetry.io/collector/confmap/provider/yamlprovider v0.106.0/go.mod h1:MppH9T0CS0G5QfCmOUkGKN1fHu4eiG0mZkMXpnyYnbU= go.opentelemetry.io/collector/connector v0.106.0 h1:Q2IsX4SfmV9PKjXUc7IvEFpB1FJqpUQ6/GA1/gTncI8= diff --git a/collector/odigosotelcol/main.go b/collector/odigosotelcol/main.go index d476b3d9c2..a60d772a1c 100644 --- a/collector/odigosotelcol/main.go +++ b/collector/odigosotelcol/main.go @@ -9,11 +9,8 @@ import ( "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/confmap" "go.opentelemetry.io/collector/confmap/converter/expandconverter" + odigosfileprovider "go.opentelemetry.io/collector/odigos/providers/odigosfileprovider" envprovider "go.opentelemetry.io/collector/confmap/provider/envprovider" - fileprovider "go.opentelemetry.io/collector/confmap/provider/fileprovider" - httpprovider "go.opentelemetry.io/collector/confmap/provider/httpprovider" - httpsprovider "go.opentelemetry.io/collector/confmap/provider/httpsprovider" - yamlprovider "go.opentelemetry.io/collector/confmap/provider/yamlprovider" "go.opentelemetry.io/collector/otelcol" ) @@ -30,11 +27,8 @@ func main() { ConfigProviderSettings: otelcol.ConfigProviderSettings{ ResolverSettings: confmap.ResolverSettings{ ProviderFactories: []confmap.ProviderFactory{ + odigosfileprovider.NewFactory(), envprovider.NewFactory(), - fileprovider.NewFactory(), - httpprovider.NewFactory(), - httpsprovider.NewFactory(), - yamlprovider.NewFactory(), }, ConverterFactories: []confmap.ConverterFactory{ expandconverter.NewFactory(), diff --git a/collector/providers/odigosfileprovider/go.mod b/collector/providers/odigosfileprovider/go.mod new file mode 100644 index 0000000000..3a7ff7f0ed --- /dev/null +++ b/collector/providers/odigosfileprovider/go.mod @@ -0,0 +1,26 @@ +module odigosfileprovider + +go 1.22.0 + +toolchain go1.22.6 + +require ( + github.com/fsnotify/fsnotify v1.8.0 + go.opentelemetry.io/collector/confmap v0.106.0 + go.uber.org/zap v1.27.0 +) + +require ( + github.com/go-viper/mapstructure/v2 v2.2.1 // indirect + github.com/hashicorp/go-version v1.7.0 // indirect + github.com/knadh/koanf/maps v0.1.1 // indirect + github.com/knadh/koanf/providers/confmap v0.1.0 // indirect + github.com/knadh/koanf/v2 v2.1.2 // indirect + github.com/mitchellh/copystructure v1.2.0 // indirect + github.com/mitchellh/reflectwalk v1.0.2 // indirect + go.opentelemetry.io/collector/featuregate v1.12.0 // indirect + go.opentelemetry.io/collector/internal/globalgates v0.106.0 // indirect + go.uber.org/multierr v1.11.0 // indirect + golang.org/x/sys v0.13.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/collector/providers/odigosfileprovider/go.sum b/collector/providers/odigosfileprovider/go.sum new file mode 100644 index 0000000000..62c2ece8ce --- /dev/null +++ b/collector/providers/odigosfileprovider/go.sum @@ -0,0 +1,47 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M= +github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/go-viper/mapstructure/v2 v2.2.1 h1:ZAaOCxANMuZx5RCeg0mBdEZk7DZasvvZIxtHqx8aGss= +github.com/go-viper/mapstructure/v2 v2.2.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= +github.com/hashicorp/go-version v1.7.0 h1:5tqGy27NaOTB8yJKUZELlFAS/LTKJkrmONwQKeRZfjY= +github.com/hashicorp/go-version v1.7.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= +github.com/knadh/koanf/maps v0.1.1 h1:G5TjmUh2D7G2YWf5SQQqSiHRJEjaicvU0KpypqB3NIs= +github.com/knadh/koanf/maps v0.1.1/go.mod h1:npD/QZY3V6ghQDdcQzl1W4ICNVTkohC8E73eI2xW4yI= +github.com/knadh/koanf/providers/confmap v0.1.0 h1:gOkxhHkemwG4LezxxN8DMOFopOPghxRVp7JbIvdvqzU= +github.com/knadh/koanf/providers/confmap v0.1.0/go.mod h1:2uLhxQzJnyHKfxG927awZC7+fyHFdQkd697K4MdLnIU= +github.com/knadh/koanf/v2 v2.1.2 h1:I2rtLRqXRy1p01m/utEtpZSSA6dcJbgGVuE27kW2PzQ= +github.com/knadh/koanf/v2 v2.1.2/go.mod h1:Gphfaen0q1Fc1HTgJgSTC4oRX9R2R5ErYMZJy8fLJBo= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= +github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= +github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= +github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +go.opentelemetry.io/collector/confmap v0.106.0 h1:oZ/QfGjtOTz6sEbNkp8CxSwDFRHXej8u6MywvhTzjqI= +go.opentelemetry.io/collector/confmap v0.106.0/go.mod h1:X+nvuiQs3zdeXKkrEX1Ta3R49eLZ2/NYZLs3KUp1pik= +go.opentelemetry.io/collector/featuregate v1.12.0 h1:l5WbV2vMQd2bL8ubfGrbKNtZaeJRckE12CTHvRe47Tw= +go.opentelemetry.io/collector/featuregate v1.12.0/go.mod h1:PsOINaGgTiFc+Tzu2K/X2jP+Ngmlp7YKGV1XrnBkH7U= +go.opentelemetry.io/collector/internal/globalgates v0.106.0 h1:Rg6ZM2DROO4nx93nEFoNInisUGLHBq4IAU0oK1/T7jw= +go.opentelemetry.io/collector/internal/globalgates v0.106.0/go.mod h1:Z5US6O2xkZAtxVSSBnHAPFZwPhFoxlyKLUvS67Vx4gc= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= +go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= +golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/collector/providers/odigosfileprovider/provider.go b/collector/providers/odigosfileprovider/provider.go new file mode 100644 index 0000000000..b58623940b --- /dev/null +++ b/collector/providers/odigosfileprovider/provider.go @@ -0,0 +1,148 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +// forked from go.opentelemetry.io/collector/confmap/provider/fileprovider +package odigosfileprovider + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + + "github.com/fsnotify/fsnotify" + "go.opentelemetry.io/collector/confmap" + "go.uber.org/zap" +) + +const schemeName = "file" + +type provider struct { + wg sync.WaitGroup + mu sync.Mutex + + watcher *fsnotify.Watcher + running bool + logger *zap.Logger +} + +// NewFactory returns a factory for a confmap.Provider that reads the configuration from a file. +// +// This Provider supports "file" scheme, and can be called with a "uri" that follows: +// +// file-uri = "file:" local-path +// local-path = [ drive-letter ] file-path +// drive-letter = ALPHA ":" +// +// The "file-path" can be relative or absolute, and it can be any OS supported format. +// +// Examples: +// `file:path/to/file` - relative path (unix, windows) +// `file:/path/to/file` - absolute path (unix, windows) +// `file:c:/path/to/file` - absolute path including drive-letter (windows) +// `file:c:\path\to\file` - absolute path including drive-letter (windows) +// +// This provider is forked from the default upstream OSS fileprovider (go.opentelemetry.io/collector/confmap/provider/fileprovider) +// to provide file watching and reloading. It is exactly the same except it uses fsnotify to watch +// for changes to the config file in an infinite routine. When a change is found, the confmap.WatcherFunc +// is called to signal the collector to reload its config. +// Because Odigos mounts collecotr configs from a ConfigMap, the mounted file is a symlink. So we watch for +// add/remove events (rather than write events). Kubernetes automatically updates the projected contents when +// the configmap changes. This lets us use new config changes without restarting the collector deployment. +func NewFactory() confmap.ProviderFactory { + return confmap.NewProviderFactory(newProvider) +} + +func newProvider(c confmap.ProviderSettings) confmap.Provider { + watcher, err := fsnotify.NewWatcher() + if err != nil { + c.Logger.Error("unable to start fsnotify watcher", zap.Error(err)) + } + return &provider{ + logger: c.Logger, + watcher: watcher, + running: false, + } +} + +func (fmp *provider) Retrieve(ctx context.Context, uri string, wf confmap.WatcherFunc) (*confmap.Retrieved, error) { + fmp.mu.Lock() + defer fmp.mu.Unlock() + + if !strings.HasPrefix(uri, schemeName+":") { + return nil, fmt.Errorf("%q uri is not supported by %q provider", uri, schemeName) + } + + // Clean the path before using it. + file := filepath.Clean(uri[len(schemeName)+1:]) + content, err := os.ReadFile(file) + if err != nil { + return nil, fmt.Errorf("unable to read the file %v: %w", uri, err) + } + + err = fmp.watcher.Add(file) + if err != nil { + return nil, err + } + + // start a new watcher routine only if one isn't already running, since Retrieve could be called multiple times + if !fmp.running { + fmp.running = true + fmp.wg.Add(1) + go func() { + defer fmp.wg.Done() + LOOP: + for { + select { + case event, ok := <-fmp.watcher.Events: + if !ok { + fmp.logger.Info("watch channel closed") + break LOOP + } + // k8s configmaps are mounted as symlinks; need to watch for remove, not write + if event.Has(fsnotify.Remove) { + fmp.watcher.Remove(file) + fmp.watcher.Add(file) + wf(&confmap.ChangeEvent{}) + } + + case err, ok := <-fmp.watcher.Errors: + if !ok { + fmp.logger.Info("fsnotify error channel closed") + break LOOP + } + wf(&confmap.ChangeEvent{Error: fmt.Errorf("error watching event %+v", err)}) + + case <-ctx.Done(): + err := fmp.watcher.Close() + if err != nil { + fmp.logger.Error("error closing fsnotify watcher", zap.Error(err)) + } + break LOOP + } + } + fmp.mu.Lock() + fmp.running = false + fmp.mu.Unlock() + }() + } + + return confmap.NewRetrievedFromYAML(content) +} + +func (*provider) Scheme() string { + return schemeName +} + +func (fmp *provider) Shutdown(context.Context) error { + // close watcher channels + err := fmp.watcher.Close() + if err != nil { + fmp.logger.Error("error closing fsnotify watcher", zap.Error(err)) + } + // wait for watcher routine to finish + fmp.wg.Wait() + return nil +} diff --git a/tests/e2e/workload-lifecycle/03-assert-action-created.yaml b/tests/e2e/workload-lifecycle/03-assert-action-created.yaml new file mode 100644 index 0000000000..8729b705ac --- /dev/null +++ b/tests/e2e/workload-lifecycle/03-assert-action-created.yaml @@ -0,0 +1,86 @@ +apiVersion: odigos.io/v1alpha1 +kind: Processor +metadata: + generation: 1 + name: insert-cluster-name + namespace: odigos-test + ownerReferences: + - apiVersion: actions.odigos.io/v1alpha1 + kind: AddClusterInfo + name: insert-cluster-name +spec: + collectorRoles: + - CLUSTER_GATEWAY + orderHint: 1 + processorConfig: + attributes: + - action: insert + key: k8s.cluster.name + value: e2e-test-cluster + processorName: insert-cluster-name + signals: + - TRACES + - METRICS + - LOGS + type: resource +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: "1" # new processor should not cause a new deployment + labels: + odigos.io/collector-role: "CLUSTER_GATEWAY" + name: odigos-gateway + namespace: odigos-test + ownerReferences: + - apiVersion: odigos.io/v1alpha1 + blockOwnerDeletion: true + controller: true + kind: CollectorsGroup + name: odigos-gateway +spec: + replicas: 1 + selector: + matchLabels: + odigos.io/collector-role: "CLUSTER_GATEWAY" + template: + metadata: + labels: + odigos.io/collector-role: "CLUSTER_GATEWAY" + spec: + containers: + - env: + - name: ODIGOS_VERSION + valueFrom: + configMapKeyRef: + key: ODIGOS_VERSION + name: odigos-deployment + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: GOMEMLIMIT + (value != null): true + name: gateway + resources: + requests: + (memory != null): true + limits: + (memory != null): true + volumeMounts: + - mountPath: /conf + name: collector-conf + volumes: + - configMap: + defaultMode: 420 + items: + - key: collector-conf + path: collector-conf.yaml + name: odigos-gateway + name: collector-conf +status: + availableReplicas: 1 + readyReplicas: 1 + replicas: 1 \ No newline at end of file diff --git a/tests/e2e/workload-lifecycle/03-create-action.yaml b/tests/e2e/workload-lifecycle/03-create-action.yaml new file mode 100644 index 0000000000..58fa92d210 --- /dev/null +++ b/tests/e2e/workload-lifecycle/03-create-action.yaml @@ -0,0 +1,14 @@ +apiVersion: actions.odigos.io/v1alpha1 +kind: AddClusterInfo +metadata: + name: insert-cluster-name + namespace: odigos-test +spec: + actionName: insert-cluster-name + clusterAttributes: + - attributeName: k8s.cluster.name + attributeStringValue: e2e-test-cluster + signals: + - TRACES + - METRICS + - LOGS diff --git a/tests/e2e/workload-lifecycle/03-wait-for-trace.yaml b/tests/e2e/workload-lifecycle/03-wait-for-trace.yaml new file mode 100644 index 0000000000..ab535094a4 --- /dev/null +++ b/tests/e2e/workload-lifecycle/03-wait-for-trace.yaml @@ -0,0 +1,7 @@ +apiVersion: e2e.tests.odigos.io/v1 +kind: TraceTest +description: This test waits for a trace that is generated from the successful instrumented services. +query: | + { resource.k8s.cluster.name = "e2e-test-cluster" } +expected: + count: 13 diff --git a/tests/e2e/workload-lifecycle/chainsaw-test.yaml b/tests/e2e/workload-lifecycle/chainsaw-test.yaml index 9479dbcc89..850227763c 100644 --- a/tests/e2e/workload-lifecycle/chainsaw-test.yaml +++ b/tests/e2e/workload-lifecycle/chainsaw-test.yaml @@ -217,3 +217,68 @@ spec: ../../common/flush_traces.sh fi done + + - name: "03 - Create cluster info action" + try: + - apply: + file: 03-create-action.yaml + - assert: + file: 03-assert-action-created.yaml + + - name: "03 - Collector config reload" + try: + - script: + timeout: 200s + content: | + while true; do + kubectl logs deployment.apps/odigos-gateway -n odigos-test | grep -q "Config updated" + if [ $? -eq 0 ]; then + break; + else + sleep 3 + fi + done + + - name: "03 - Generate Traffic" + try: + - script: + timeout: 200s + content: | + set -e + + NAMESPACE="default" + DEPLOYMENTS=$(kubectl get deployments -n $NAMESPACE -o jsonpath='{.items[*].metadata.name}') + + + for DEPLOYMENT in $DEPLOYMENTS; do + echo "Waiting for deployment $DEPLOYMENT to finish rollout..." + kubectl rollout status deployment/$DEPLOYMENT -n $NAMESPACE + if [ $? -ne 0 ]; then + echo "Deployment $DEPLOYMENT failed to finish rollout." + exit 1 + fi + done + + + kubectl apply -f 01-generate-traffic.yaml + job_name=$(kubectl get -f 01-generate-traffic.yaml -o=jsonpath='{.metadata.name}') + kubectl wait --for=condition=complete job/$job_name + kubectl delete -f 01-generate-traffic.yaml + + - name: "03 - Wait for Traces" + try: + - script: + timeout: 60s + content: | + + sleep 20 + + while true; do + ../../common/traceql_runner.sh 03-wait-for-trace.yaml + if [ $? -eq 0 ]; then + break + else + sleep 3 + ../../common/flush_traces.sh + fi + done \ No newline at end of file