diff --git a/cni/network/common.go b/cni/network/common.go index e76d864f4a..260579326a 100644 --- a/cni/network/common.go +++ b/cni/network/common.go @@ -4,26 +4,14 @@ import ( "encoding/json" "io" "os" - "reflect" "github.com/Azure/azure-container-networking/cni" - "github.com/Azure/azure-container-networking/telemetry" "github.com/containernetworking/cni/pkg/skel" cniTypes "github.com/containernetworking/cni/pkg/types" "github.com/pkg/errors" "go.uber.org/zap" ) -// send error report to hostnetagent if CNI encounters any error. -func ReportPluginError(reportManager *telemetry.ReportManager, tb *telemetry.TelemetryBuffer, err error) { - logger.Error("Report plugin error") - reflect.ValueOf(reportManager.Report).Elem().FieldByName("ErrorMessage").SetString(err.Error()) - - if err := reportManager.SendReport(tb); err != nil { - logger.Error("SendReport failed", zap.Error(err)) - } -} - func validateConfig(jsonBytes []byte) error { var conf struct { Name string `json:"name"` diff --git a/cni/network/network.go b/cni/network/network.go index b25d3de5de..a09db694c0 100644 --- a/cni/network/network.go +++ b/cni/network/network.go @@ -8,12 +8,10 @@ import ( "encoding/json" "fmt" "net" - "os" "regexp" "strconv" "time" - "github.com/Azure/azure-container-networking/aitelemetry" "github.com/Azure/azure-container-networking/cni" "github.com/Azure/azure-container-networking/cni/api" "github.com/Azure/azure-container-networking/cni/log" @@ -40,7 +38,10 @@ import ( ) // matches if the string fully consists of zero or more alphanumeric, dots, dashes, parentheses, spaces, or underscores -var allowedInput = regexp.MustCompile(`^[a-zA-Z0-9._\-\(\) ]*$`) +var ( + allowedInput = regexp.MustCompile(`^[a-zA-Z0-9._\-\(\) ]*$`) + telemetryClient = telemetry.AIClient +) const ( dockerNetworkOption = "com.docker.network.generic" @@ -80,8 +81,6 @@ type NetPlugin struct { *cni.Plugin nm network.NetworkManager ipamInvoker IPAMInvoker - report *telemetry.CNIReport - tb *telemetry.TelemetryBuffer nnsClient NnsClient multitenancyClient MultitenancyClient netClient InterfaceGetter @@ -148,11 +147,6 @@ func NewPlugin(name string, }, nil } -func (plugin *NetPlugin) SetCNIReport(report *telemetry.CNIReport, tb *telemetry.TelemetryBuffer) { - plugin.report = report - plugin.tb = tb -} - // Starts the plugin. func (plugin *NetPlugin) Start(config *common.PluginConfig) error { // Initialize base plugin. @@ -179,13 +173,6 @@ func (plugin *NetPlugin) Start(config *common.PluginConfig) error { return nil } -func sendEvent(plugin *NetPlugin, msg string) { - eventMsg := fmt.Sprintf("[%d] %s", os.Getpid(), msg) - plugin.report.Version = plugin.Version - plugin.report.EventMessage = eventMsg - telemetry.SendCNIEvent(plugin.tb, plugin.report) -} - func (plugin *NetPlugin) GetAllEndpointState(networkid string) (*api.AzureCNIState, error) { st := api.AzureCNIState{ ContainerInterfaces: make(map[string]api.PodNetworkInterfaceInfo), @@ -307,35 +294,11 @@ func (plugin *NetPlugin) getPodInfo(args string) (name, ns string, err error) { return k8sPodName, k8sNamespace, nil } -func SetCustomDimensions(cniMetric *telemetry.AIMetric, nwCfg *cni.NetworkConfig, err error) { - if cniMetric == nil { - logger.Error("Unable to set custom dimension. Report is nil") - return - } - - if err != nil { - cniMetric.Metric.CustomDimensions[telemetry.StatusStr] = telemetry.FailedStr - } else { - cniMetric.Metric.CustomDimensions[telemetry.StatusStr] = telemetry.SucceededStr - } - - if nwCfg != nil { - if nwCfg.MultiTenancy { - cniMetric.Metric.CustomDimensions[telemetry.CNIModeStr] = telemetry.MultiTenancyStr - } else { - cniMetric.Metric.CustomDimensions[telemetry.CNIModeStr] = telemetry.SingleTenancyStr - } - - cniMetric.Metric.CustomDimensions[telemetry.CNINetworkModeStr] = nwCfg.Mode - } -} - -func (plugin *NetPlugin) setCNIReportDetails(nwCfg *cni.NetworkConfig, opType, msg string) { - plugin.report.OperationType = opType - plugin.report.SubContext = fmt.Sprintf("%+v", nwCfg) - plugin.report.EventMessage = msg - plugin.report.BridgeDetails.NetworkMode = nwCfg.Mode - plugin.report.InterfaceDetails.SecondaryCAUsedCount = plugin.nm.GetNumberOfEndpoints("", nwCfg.Name) +func (plugin *NetPlugin) setCNIReportDetails(containerID, opType, msg string) { + telemetryClient.Settings().OperationType = opType + telemetryClient.Settings().SubContext = containerID + telemetryClient.Settings().EventMessage = msg + telemetryClient.Settings().Version = plugin.Version } func addNatIPV6SubnetInfo(nwCfg *cni.NetworkConfig, @@ -361,7 +324,6 @@ func (plugin *NetPlugin) addIpamInvoker(ipamAddConfig IPAMAddConfig) (IPAMAddRes if err != nil { return IPAMAddResult{}, errors.Wrap(err, "failed to add ipam invoker") } - sendEvent(plugin, fmt.Sprintf("Allocated IPAddress from ipam interface: %+v", ipamAddResult.PrettyString())) return ipamAddResult, nil } @@ -393,12 +355,10 @@ func (plugin *NetPlugin) Add(args *cniSkel.CmdArgs) error { enableInfraVnet bool enableSnatForDNS bool k8sPodName string - cniMetric telemetry.AIMetric epInfos []*network.EndpointInfo ) startTime := time.Now() - logger.Info("Processing ADD command", zap.String("containerId", args.ContainerID), zap.String("netNS", args.Netns), @@ -406,8 +366,6 @@ func (plugin *NetPlugin) Add(args *cniSkel.CmdArgs) error { zap.Any("args", args.Args), zap.String("path", args.Path), zap.ByteString("stdinData", args.StdinData)) - sendEvent(plugin, fmt.Sprintf("[cni-net] Processing ADD command with args {ContainerID:%v Netns:%v IfName:%v Args:%v Path:%v StdinData:%s}.", - args.ContainerID, args.Netns, args.IfName, args.Args, args.Path, args.StdinData)) // Parse network configuration from stdin. nwCfg, err := cni.ParseNetworkConfig(args.StdinData) @@ -421,20 +379,20 @@ func (plugin *NetPlugin) Add(args *cniSkel.CmdArgs) error { return err } + // Parse Pod arguments. + k8sPodName, k8sNamespace, err := plugin.getPodInfo(args.Args) + if err != nil { + return err + } + telemetryClient.Settings().ContainerName = k8sPodName + ":" + k8sNamespace + + plugin.setCNIReportDetails(args.ContainerID, CNI_ADD, "") + telemetryClient.SendEvent(fmt.Sprintf("[cni-net] Processing ADD command with args {ContainerID:%v Netns:%v IfName:%v Args:%v Path:%v StdinData:%s}.", + args.ContainerID, args.Netns, args.IfName, args.Args, args.Path, args.StdinData)) + iptables.DisableIPTableLock = nwCfg.DisableIPTableLock - plugin.setCNIReportDetails(nwCfg, CNI_ADD, "") defer func() { - operationTimeMs := time.Since(startTime).Milliseconds() - cniMetric.Metric = aitelemetry.Metric{ - Name: telemetry.CNIAddTimeMetricStr, - Value: float64(operationTimeMs), - AppVersion: plugin.Version, - CustomDimensions: make(map[string]string), - } - SetCustomDimensions(&cniMetric, nwCfg, err) - telemetry.SendCNIMetric(&cniMetric, plugin.tb) - // Add Interfaces to result. // previously we had a default interface info to select which interface info was the one to be returned from cni add cniResult := &cniTypesCurr.Result{} @@ -479,17 +437,14 @@ func (plugin *NetPlugin) Add(args *cniSkel.CmdArgs) error { zap.String("pod", k8sPodName), zap.Any("IPs", cniResult.IPs), zap.Error(log.NewErrorWithoutStackTrace(err))) - }() - ipamAddResult = IPAMAddResult{interfaceInfo: make(map[string]network.InterfaceInfo)} + telemetryClient.SendEvent(fmt.Sprintf("ADD command completed with [ipamAddResult]: %s [epInfos]: %s [error]: %v ", ipamAddResult.PrettyString(), network.FormatSliceOfPointersToString(epInfos), err)) - // Parse Pod arguments. - k8sPodName, k8sNamespace, err := plugin.getPodInfo(args.Args) - if err != nil { - return err - } + operationTimeMs := time.Since(startTime).Milliseconds() + telemetryClient.SendMetric(telemetry.CNIAddTimeMetricStr, float64(operationTimeMs), make(map[string]string)) + }() - plugin.report.ContainerName = k8sPodName + ":" + k8sNamespace + ipamAddResult = IPAMAddResult{interfaceInfo: make(map[string]network.InterfaceInfo)} k8sContainerID := args.ContainerID if len(k8sContainerID) == 0 { @@ -542,7 +497,7 @@ func (plugin *NetPlugin) Add(args *cniSkel.CmdArgs) error { // triggered only in swift v1 multitenancy // dual nic multitenancy -> two interface infos // multitenancy (swift v1) -> one interface info - plugin.report.Context = "AzureCNIMultitenancy" + telemetryClient.Settings().Context = "AzureCNIMultitenancy" plugin.multitenancyClient.Init(cnsClient, AzureNetIOShim{}) // Temporary if block to determining whether we disable SNAT on host (for multi-tenant scenario only) @@ -584,15 +539,9 @@ func (plugin *NetPlugin) Add(args *cniSkel.CmdArgs) error { if err != nil { return fmt.Errorf("IPAM Invoker Add failed with error: %w", err) } - - // TODO: This proably needs to be changed as we return all interfaces... - // sendEvent(plugin, fmt.Sprintf("Allocated IPAddress from ipam DefaultInterface: %+v, SecondaryInterfaces: %+v", ipamAddResult.interfaceInfo[ifIndex], ipamAddResult.interfaceInfo)) } policies := cni.GetPoliciesFromNwCfg(nwCfg.AdditionalArgs) - // moved to addIpamInvoker - // sendEvent(plugin, fmt.Sprintf("Allocated IPAddress from ipam interface: %+v", ipamAddResult.PrettyString())) - defer func() { //nolint:gocritic if err != nil { // for swift v1 multi-tenancies scenario, CNI is not supposed to invoke CNS for cleaning Ips @@ -610,6 +559,7 @@ func (plugin *NetPlugin) Add(args *cniSkel.CmdArgs) error { infraSeen := false endpointIndex := 1 + for key := range ipamAddResult.interfaceInfo { ifInfo := ipamAddResult.interfaceInfo[key] logger.Info("Processing interfaceInfo:", zap.Any("ifInfo", ifInfo)) @@ -679,8 +629,6 @@ func (plugin *NetPlugin) Add(args *cniSkel.CmdArgs) error { if err != nil { return errors.Wrap(err, "failed to create endpoint") // behavior can change if you don't assign to err prior to returning } - // telemetry added - sendEvent(plugin, fmt.Sprintf("CNI ADD Process succeeded for interfaces: %v", ipamAddResult.PrettyString())) return nil } @@ -1015,11 +963,8 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error { k8sNamespace string networkID string nwInfo network.EndpointInfo - cniMetric telemetry.AIMetric ) - startTime := time.Now() - logger.Info("Processing DEL command", zap.String("containerId", args.ContainerID), zap.String("netNS", args.Netns), @@ -1027,13 +972,14 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error { zap.Any("args", args.Args), zap.String("path", args.Path), zap.ByteString("stdinData", args.StdinData)) - sendEvent(plugin, fmt.Sprintf("[cni-net] Processing DEL command with args {ContainerID:%v Netns:%v IfName:%v Args:%v Path:%v, StdinData:%s}.", - args.ContainerID, args.Netns, args.IfName, args.Args, args.Path, args.StdinData)) defer func() { logger.Info("DEL command completed", zap.String("pod", k8sPodName), zap.Error(log.NewErrorWithoutStackTrace(err))) + telemetryClient.SendEvent(fmt.Sprintf("DEL command completed: [podname]: %s [namespace]: %s [error]: %v", k8sPodName, k8sNamespace, err)) + operationTimeMs := time.Since(startTime).Milliseconds() + telemetryClient.SendMetric(telemetry.CNIDelTimeMetricStr, float64(operationTimeMs), make(map[string]string)) }() // Parse network configuration from stdin. @@ -1051,30 +997,18 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error { if k8sPodName, k8sNamespace, err = plugin.getPodInfo(args.Args); err != nil { logger.Error("Failed to get POD info", zap.Error(err)) } + telemetryClient.Settings().ContainerName = k8sPodName + ":" + k8sNamespace - plugin.setCNIReportDetails(nwCfg, CNI_DEL, "") - plugin.report.ContainerName = k8sPodName + ":" + k8sNamespace + plugin.setCNIReportDetails(args.ContainerID, CNI_DEL, "") + telemetryClient.SendEvent(fmt.Sprintf("[cni-net] Processing DEL command with args {ContainerID:%v Netns:%v IfName:%v Args:%v Path:%v, StdinData:%s}.", + args.ContainerID, args.Netns, args.IfName, args.Args, args.Path, args.StdinData)) iptables.DisableIPTableLock = nwCfg.DisableIPTableLock - sendMetricFunc := func() { - operationTimeMs := time.Since(startTime).Milliseconds() - cniMetric.Metric = aitelemetry.Metric{ - Name: telemetry.CNIDelTimeMetricStr, - Value: float64(operationTimeMs), - AppVersion: plugin.Version, - CustomDimensions: make(map[string]string), - } - SetCustomDimensions(&cniMetric, nwCfg, err) - telemetry.SendCNIMetric(&cniMetric, plugin.tb) - } - platformInit(nwCfg) logger.Info("Execution mode", zap.String("mode", nwCfg.ExecutionMode)) if nwCfg.ExecutionMode == string(util.Baremetal) { - // schedule send metric before attempting delete - defer sendMetricFunc() _, err = plugin.nnsClient.DeleteContainerNetworking(context.Background(), k8sPodName, args.Netns) if err != nil { return fmt.Errorf("nnsClient.DeleteContainerNetworking failed with err %w", err) @@ -1164,13 +1098,12 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error { if len(epInfos) == 0 { endpointID := plugin.nm.GetEndpointID(args.ContainerID, args.IfName) if !nwCfg.MultiTenancy { - logger.Error("Failed to query endpoint", + logger.Warn("Could not query endpoint", zap.String("endpoint", endpointID), zap.Error(err)) - logger.Error("Release ip by ContainerID (endpoint not found)", + logger.Warn("Release ip by ContainerID (endpoint not found)", zap.String("containerID", args.ContainerID)) - sendEvent(plugin, fmt.Sprintf("Release ip by ContainerID (endpoint not found):%v", args.ContainerID)) if err = plugin.ipamInvoker.Delete(nil, nwCfg, args, nwInfo.Options); err != nil { return plugin.RetriableError(fmt.Errorf("failed to release address(no endpoint): %w", err)) } @@ -1195,18 +1128,16 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error { logger.Info("Deleting the endpoints from the ipam") // delete endpoint state in cns and in statefile for _, epInfo := range epInfos { - // schedule send metric before attempting delete - defer sendMetricFunc() //nolint:gocritic logger.Info("Deleting endpoint", zap.String("endpointID", epInfo.EndpointID)) - sendEvent(plugin, fmt.Sprintf("Deleting endpoint:%v", epInfo.EndpointID)) + telemetryClient.SendEvent("Deleting endpoint: " + epInfo.EndpointID) if !nwCfg.MultiTenancy && (epInfo.NICType == cns.InfraNIC || epInfo.NICType == "") { // Delegated/secondary nic ips are statically allocated so we don't need to release // Call into IPAM plugin to release the endpoint's addresses. for i := range epInfo.IPAddresses { logger.Info("Release ip", zap.String("ip", epInfo.IPAddresses[i].IP.String())) - sendEvent(plugin, fmt.Sprintf("Release ip:%s", epInfo.IPAddresses[i].IP.String())) + telemetryClient.SendEvent(fmt.Sprintf("Release ip: %s container id: %s endpoint id: %s", epInfo.IPAddresses[i].IP.String(), args.ContainerID, epInfo.EndpointID)) err = plugin.ipamInvoker.Delete(&epInfo.IPAddresses[i], nwCfg, args, nwInfo.Options) if err != nil { return plugin.RetriableError(fmt.Errorf("failed to release address: %w", err)) @@ -1226,7 +1157,6 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error { if err != nil { return plugin.RetriableError(fmt.Errorf("failed to save state: %w", err)) } - sendEvent(plugin, fmt.Sprintf("CNI DEL succeeded : Released ip %+v podname %v namespace %v", nwCfg.IPAM.Address, k8sPodName, k8sNamespace)) return err } @@ -1242,11 +1172,8 @@ func (plugin *NetPlugin) Update(args *cniSkel.CmdArgs) error { podCfg *cni.K8SPodEnvArgs orchestratorContext []byte targetNetworkConfig *cns.GetNetworkContainerResponse - cniMetric telemetry.AIMetric ) - startTime := time.Now() - logger.Info("Processing UPDATE command", zap.String("netns", args.Netns), zap.String("args", args.Args), @@ -1266,19 +1193,9 @@ func (plugin *NetPlugin) Update(args *cniSkel.CmdArgs) error { logger.Info("Read network configuration", zap.Any("config", nwCfg)) iptables.DisableIPTableLock = nwCfg.DisableIPTableLock - plugin.setCNIReportDetails(nwCfg, CNI_UPDATE, "") + plugin.setCNIReportDetails(args.ContainerID, CNI_UPDATE, "") defer func() { - operationTimeMs := time.Since(startTime).Milliseconds() - cniMetric.Metric = aitelemetry.Metric{ - Name: telemetry.CNIUpdateTimeMetricStr, - Value: float64(operationTimeMs), - AppVersion: plugin.Version, - CustomDimensions: make(map[string]string), - } - SetCustomDimensions(&cniMetric, nwCfg, err) - telemetry.SendCNIMetric(&cniMetric, plugin.tb) - if result == nil { result = &cniTypesCurr.Result{} } @@ -1425,7 +1342,7 @@ func (plugin *NetPlugin) Update(args *cniSkel.CmdArgs) error { } msg := fmt.Sprintf("CNI UPDATE succeeded : Updated %+v podname %v namespace %v", targetNetworkConfig, k8sPodName, k8sNamespace) - plugin.setCNIReportDetails(nwCfg, CNI_UPDATE, msg) + plugin.setCNIReportDetails(args.ContainerID, CNI_UPDATE, msg) return nil } diff --git a/cni/network/network_linux_test.go b/cni/network/network_linux_test.go index 55bef48fa6..97304569b3 100644 --- a/cni/network/network_linux_test.go +++ b/cni/network/network_linux_test.go @@ -13,7 +13,6 @@ import ( "github.com/Azure/azure-container-networking/cns" "github.com/Azure/azure-container-networking/network" "github.com/Azure/azure-container-networking/platform" - "github.com/Azure/azure-container-networking/telemetry" cniSkel "github.com/containernetworking/cni/pkg/skel" "github.com/containernetworking/cni/pkg/types" "github.com/stretchr/testify/assert" @@ -261,8 +260,6 @@ func TestPluginLinuxAdd(t *testing.T) { plugin: &NetPlugin{ Plugin: resources.Plugin, nm: network.NewMockNetworkmanager(network.NewMockEndpointClient(nil)), - tb: &telemetry.TelemetryBuffer{}, - report: &telemetry.CNIReport{}, multitenancyClient: NewMockMultitenancy(false, []*cns.GetNetworkContainerResponse{GetTestCNSResponse3()}), }, args: &cniSkel.CmdArgs{ @@ -341,8 +338,6 @@ func TestPluginLinuxAdd(t *testing.T) { plugin: &NetPlugin{ Plugin: resources.Plugin, nm: network.NewMockNetworkmanager(network.NewMockEndpointClient(nil)), - tb: &telemetry.TelemetryBuffer{}, - report: &telemetry.CNIReport{}, ipamInvoker: &MockIpamInvoker{ add: func(opt IPAMAddConfig) (ipamAddResult IPAMAddResult, err error) { ipamAddResult = IPAMAddResult{interfaceInfo: make(map[string]network.InterfaceInfo)} diff --git a/cni/network/network_test.go b/cni/network/network_test.go index f9bfa12084..bb123ede04 100644 --- a/cni/network/network_test.go +++ b/cni/network/network_test.go @@ -17,7 +17,6 @@ import ( "github.com/Azure/azure-container-networking/network/networkutils" "github.com/Azure/azure-container-networking/network/policy" "github.com/Azure/azure-container-networking/nns" - "github.com/Azure/azure-container-networking/telemetry" cniSkel "github.com/containernetworking/cni/pkg/skel" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -76,7 +75,6 @@ func GetTestResources() *NetPlugin { config := &common.PluginConfig{} grpcClient := &nns.MockGrpcClient{} plugin, _ := NewPlugin(pluginName, config, grpcClient, &Multitenancy{}) - plugin.report = &telemetry.CNIReport{} mockNetworkManager := acnnetwork.NewMockNetworkmanager(acnnetwork.NewMockEndpointClient(nil)) plugin.nm = mockNetworkManager plugin.ipamInvoker = NewMockIpamInvoker(isIPv6, false, false, false, false) @@ -491,8 +489,6 @@ func TestAddDualStack(t *testing.T) { Plugin: cniPlugin, nm: acnnetwork.NewMockNetworkmanager(acnnetwork.NewMockEndpointClient(nil)), ipamInvoker: NewMockIpamInvoker(true, false, false, false, false), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, }, wantErr: false, }, @@ -502,8 +498,6 @@ func TestAddDualStack(t *testing.T) { Plugin: cniPlugin, nm: acnnetwork.NewMockNetworkmanager(acnnetwork.NewMockEndpointClient(nil)), ipamInvoker: NewMockIpamInvoker(true, false, true, false, false), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, }, wantErr: true, }, @@ -548,8 +542,6 @@ func TestPluginGet(t *testing.T) { Plugin: plugin, nm: acnnetwork.NewMockNetworkmanager(acnnetwork.NewMockEndpointClient(nil)), ipamInvoker: NewMockIpamInvoker(false, false, false, false, false), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, }, wantErr: false, }, @@ -560,8 +552,6 @@ func TestPluginGet(t *testing.T) { Plugin: plugin, nm: acnnetwork.NewMockNetworkmanager(acnnetwork.NewMockEndpointClient(nil)), ipamInvoker: NewMockIpamInvoker(false, false, false, false, false), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, }, wantErr: true, wantErrMsg: "Network not found", @@ -573,8 +563,6 @@ func TestPluginGet(t *testing.T) { Plugin: plugin, nm: acnnetwork.NewMockNetworkmanager(acnnetwork.NewMockEndpointClient(nil)), ipamInvoker: NewMockIpamInvoker(false, false, false, false, false), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, }, wantErr: true, wantErrMsg: "Endpoint not found", @@ -750,8 +738,6 @@ func TestPluginMultitenancyAdd(t *testing.T) { plugin: &NetPlugin{ Plugin: plugin, nm: acnnetwork.NewMockNetworkmanager(acnnetwork.NewMockEndpointClient(nil)), - tb: &telemetry.TelemetryBuffer{}, - report: &telemetry.CNIReport{}, multitenancyClient: NewMockMultitenancy(false, []*cns.GetNetworkContainerResponse{GetTestCNSResponse1()}), }, @@ -769,8 +755,6 @@ func TestPluginMultitenancyAdd(t *testing.T) { plugin: &NetPlugin{ Plugin: plugin, nm: acnnetwork.NewMockNetworkmanager(acnnetwork.NewMockEndpointClient(nil)), - tb: &telemetry.TelemetryBuffer{}, - report: &telemetry.CNIReport{}, multitenancyClient: NewMockMultitenancy(true, []*cns.GetNetworkContainerResponse{GetTestCNSResponse1()}), }, args: &cniSkel.CmdArgs{ @@ -905,8 +889,6 @@ func TestPluginBaremetalAdd(t *testing.T) { plugin: &NetPlugin{ Plugin: plugin, nm: acnnetwork.NewMockNetworkmanager(acnnetwork.NewMockEndpointClient(nil)), - tb: &telemetry.TelemetryBuffer{}, - report: &telemetry.CNIReport{}, nnsClient: &nns.MockGrpcClient{}, }, args: &cniSkel.CmdArgs{ @@ -923,8 +905,6 @@ func TestPluginBaremetalAdd(t *testing.T) { plugin: &NetPlugin{ Plugin: plugin, nm: acnnetwork.NewMockNetworkmanager(acnnetwork.NewMockEndpointClient(nil)), - tb: &telemetry.TelemetryBuffer{}, - report: &telemetry.CNIReport{}, nnsClient: &nns.MockGrpcClient{Fail: true}, }, args: &cniSkel.CmdArgs{ @@ -1333,8 +1313,6 @@ func TestPluginSwiftV2Add(t *testing.T) { Plugin: plugin, nm: acnnetwork.NewMockNetworkmanager(acnnetwork.NewMockEndpointClient(nil)), ipamInvoker: NewMockIpamInvoker(false, false, false, true, false), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, netClient: &InterfaceGetterMock{ interfaces: []net.Interface{ {Name: "eth0"}, @@ -1350,8 +1328,6 @@ func TestPluginSwiftV2Add(t *testing.T) { Plugin: plugin, nm: acnnetwork.NewMockNetworkmanager(acnnetwork.NewMockEndpointClient(nil)), ipamInvoker: NewMockIpamInvoker(false, false, false, true, true), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, netClient: &InterfaceGetterMock{ interfaces: []net.Interface{ {Name: "eth0"}, @@ -1374,8 +1350,6 @@ func TestPluginSwiftV2Add(t *testing.T) { return nil })), ipamInvoker: NewMockIpamInvoker(false, false, false, true, false), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, netClient: &InterfaceGetterMock{ interfaces: []net.Interface{ {Name: "eth0"}, @@ -1392,8 +1366,6 @@ func TestPluginSwiftV2Add(t *testing.T) { Plugin: plugin, nm: acnnetwork.NewMockNetworkmanager(acnnetwork.NewMockEndpointClient(nil)), ipamInvoker: NewMockIpamInvoker(false, false, false, true, false), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, netClient: &InterfaceGetterMock{ interfaces: []net.Interface{}, }, @@ -1408,8 +1380,6 @@ func TestPluginSwiftV2Add(t *testing.T) { Plugin: plugin, nm: acnnetwork.NewMockNetworkmanager(acnnetwork.NewMockEndpointClient(nil)), ipamInvoker: NewMockIpamInvoker(false, false, false, false, false), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, netClient: &InterfaceGetterMock{ interfaces: []net.Interface{}, }, @@ -1492,8 +1462,6 @@ func TestPluginSwiftV2MultipleAddDelete(t *testing.T) { NICType: cns.NodeNetworkInterfaceFrontendNIC, }, }), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, netClient: &InterfaceGetterMock{ interfaces: []net.Interface{ {Name: "eth0"}, @@ -1517,8 +1485,6 @@ func TestPluginSwiftV2MultipleAddDelete(t *testing.T) { NICType: cns.BackendNIC, }, }), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, netClient: &InterfaceGetterMock{ interfaces: []net.Interface{ {Name: "eth0"}, @@ -1542,8 +1508,6 @@ func TestPluginSwiftV2MultipleAddDelete(t *testing.T) { NICType: cns.NodeNetworkInterfaceFrontendNIC, }, }), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, netClient: &InterfaceGetterMock{ interfaces: []net.Interface{ {Name: "eth0"}, @@ -1575,8 +1539,6 @@ func TestPluginSwiftV2MultipleAddDelete(t *testing.T) { NICType: cns.NodeNetworkInterfaceFrontendNIC, }, }), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, netClient: &InterfaceGetterMock{ interfaces: []net.Interface{ {Name: "eth0"}, @@ -1637,8 +1599,6 @@ func TestFindMasterInterface(t *testing.T) { name: "Find master interface by infraNIC with a master interfaceName in swiftv1 path", plugin: &NetPlugin{ Plugin: plugin, - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, netClient: &InterfaceGetterMock{ interfaces: []net.Interface{ { @@ -1668,8 +1628,6 @@ func TestFindMasterInterface(t *testing.T) { name: "Find master interface by one infraNIC", plugin: &NetPlugin{ Plugin: plugin, - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, netClient: &InterfaceGetterMock{ interfaces: []net.Interface{ { @@ -1712,8 +1670,6 @@ func TestFindMasterInterface(t *testing.T) { name: "Find master interface from multiple infraNIC interfaces", plugin: &NetPlugin{ Plugin: plugin, - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, netClient: &InterfaceGetterMock{ interfaces: []net.Interface{ { @@ -1770,8 +1726,6 @@ func TestFindMasterInterface(t *testing.T) { name: "Find master interface by delegatedVMNIC", plugin: &NetPlugin{ Plugin: plugin, - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, netClient: &InterfaceGetterMock{ interfaces: []net.Interface{ { diff --git a/cni/network/network_windows_test.go b/cni/network/network_windows_test.go index ae640f6825..8c47d739ca 100644 --- a/cni/network/network_windows_test.go +++ b/cni/network/network_windows_test.go @@ -16,7 +16,6 @@ import ( "github.com/Azure/azure-container-networking/network/hnswrapper" "github.com/Azure/azure-container-networking/network/policy" "github.com/Azure/azure-container-networking/platform" - "github.com/Azure/azure-container-networking/telemetry" hnsv2 "github.com/Microsoft/hcsshim/hcn" cniSkel "github.com/containernetworking/cni/pkg/skel" "github.com/stretchr/testify/assert" @@ -409,8 +408,6 @@ func TestGetNetworkNameFromCNS(t *testing.T) { Plugin: plugin, nm: network.NewMockNetworkmanager(network.NewMockEndpointClient(nil)), ipamInvoker: NewMockIpamInvoker(false, false, false, false, false), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, }, netNs: "net", nwCfg: &cni.NetworkConfig{ @@ -442,8 +439,6 @@ func TestGetNetworkNameFromCNS(t *testing.T) { Plugin: plugin, nm: network.NewMockNetworkmanager(network.NewMockEndpointClient(nil)), ipamInvoker: NewMockIpamInvoker(false, false, false, false, false), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, }, netNs: "net", nwCfg: &cni.NetworkConfig{ @@ -475,8 +470,6 @@ func TestGetNetworkNameFromCNS(t *testing.T) { Plugin: plugin, nm: network.NewMockNetworkmanager(network.NewMockEndpointClient(nil)), ipamInvoker: NewMockIpamInvoker(false, false, false, false, false), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, }, netNs: "net", nwCfg: &cni.NetworkConfig{ @@ -508,8 +501,6 @@ func TestGetNetworkNameFromCNS(t *testing.T) { Plugin: plugin, nm: network.NewMockNetworkmanager(network.NewMockEndpointClient(nil)), ipamInvoker: NewMockIpamInvoker(false, false, false, false, false), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, }, netNs: "", nwCfg: &cni.NetworkConfig{ @@ -541,8 +532,6 @@ func TestGetNetworkNameFromCNS(t *testing.T) { Plugin: plugin, nm: network.NewMockNetworkmanager(network.NewMockEndpointClient(nil)), ipamInvoker: NewMockIpamInvoker(false, false, false, false, false), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, }, netNs: "azure", nwCfg: &cni.NetworkConfig{ @@ -602,8 +591,6 @@ func TestGetNetworkNameSwiftv2FromCNS(t *testing.T) { Plugin: plugin, nm: network.NewMockNetworkmanager(network.NewMockEndpointClient(nil)), ipamInvoker: NewMockIpamInvoker(false, false, false, true, false), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, }, netNs: "azure", nwCfg: &cni.NetworkConfig{ @@ -624,8 +611,6 @@ func TestGetNetworkNameSwiftv2FromCNS(t *testing.T) { Plugin: plugin, nm: network.NewMockNetworkmanager(network.NewMockEndpointClient(nil)), ipamInvoker: NewMockIpamInvoker(false, false, false, true, false), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, }, netNs: "azure", nwCfg: &cni.NetworkConfig{ @@ -646,8 +631,6 @@ func TestGetNetworkNameSwiftv2FromCNS(t *testing.T) { Plugin: plugin, nm: network.NewMockNetworkmanager(network.NewMockEndpointClient(nil)), ipamInvoker: NewMockIpamInvoker(false, false, false, false, false), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, }, netNs: "azure", nwCfg: &cni.NetworkConfig{ @@ -664,8 +647,6 @@ func TestGetNetworkNameSwiftv2FromCNS(t *testing.T) { Plugin: plugin, nm: network.NewMockNetworkmanager(network.NewMockEndpointClient(nil)), ipamInvoker: NewMockIpamInvoker(false, false, false, false, false), - report: &telemetry.CNIReport{}, - tb: &telemetry.TelemetryBuffer{}, }, netNs: "azure", nwCfg: &cni.NetworkConfig{ @@ -729,8 +710,6 @@ func TestPluginMultitenancyWindowsAdd(t *testing.T) { plugin: &NetPlugin{ Plugin: plugin, nm: network.NewMockNetworkmanager(network.NewMockEndpointClient(nil)), - tb: &telemetry.TelemetryBuffer{}, - report: &telemetry.CNIReport{}, multitenancyClient: NewMockMultitenancy(false, []*cns.GetNetworkContainerResponse{GetTestCNSResponse1(), GetTestCNSResponse2()}), }, @@ -748,8 +727,6 @@ func TestPluginMultitenancyWindowsAdd(t *testing.T) { plugin: &NetPlugin{ Plugin: plugin, nm: network.NewMockNetworkmanager(network.NewMockEndpointClient(nil)), - tb: &telemetry.TelemetryBuffer{}, - report: &telemetry.CNIReport{}, multitenancyClient: NewMockMultitenancy(true, []*cns.GetNetworkContainerResponse{GetTestCNSResponse1(), GetTestCNSResponse2()}), }, args: &cniSkel.CmdArgs{ @@ -993,8 +970,6 @@ func TestPluginWindowsAdd(t *testing.T) { plugin: &NetPlugin{ Plugin: resources.Plugin, nm: network.NewMockNetworkmanager(network.NewMockEndpointClient(nil)), - tb: &telemetry.TelemetryBuffer{}, - report: &telemetry.CNIReport{}, multitenancyClient: NewMockMultitenancy(false, []*cns.GetNetworkContainerResponse{GetTestCNSResponse1(), GetTestCNSResponse2()}), }, args: &cniSkel.CmdArgs{ @@ -1163,10 +1138,9 @@ func TestPluginWindowsAdd(t *testing.T) { // Based on a live swiftv2 windows cluster's (infra + delegated) cns invoker response name: "Add Happy Path Swiftv2", plugin: &NetPlugin{ - Plugin: resources.Plugin, - nm: network.NewMockNetworkmanager(network.NewMockEndpointClient(nil)), - tb: &telemetry.TelemetryBuffer{}, - report: &telemetry.CNIReport{}, + Plugin: resources.Plugin, + nm: network.NewMockNetworkmanager(network.NewMockEndpointClient(nil)), + ipamInvoker: NewCustomMockIpamInvoker(GetTestCNSResponseSecondaryWindows(macAddress)), netClient: &InterfaceGetterMock{ // used in secondary find master interface diff --git a/cni/network/plugin/main.go b/cni/network/plugin/main.go index 89b36994ac..08ccc3cbd1 100644 --- a/cni/network/plugin/main.go +++ b/cni/network/plugin/main.go @@ -8,7 +8,6 @@ import ( "os" "time" - "github.com/Azure/azure-container-networking/aitelemetry" "github.com/Azure/azure-container-networking/cni" "github.com/Azure/azure-container-networking/cni/api" zaplog "github.com/Azure/azure-container-networking/cni/log" @@ -16,14 +15,12 @@ import ( "github.com/Azure/azure-container-networking/common" "github.com/Azure/azure-container-networking/nns" "github.com/Azure/azure-container-networking/platform" - "github.com/Azure/azure-container-networking/store" "github.com/Azure/azure-container-networking/telemetry" "github.com/pkg/errors" "go.uber.org/zap" ) const ( - hostNetAgentURL = "http://168.63.129.16/machine/plugins?comp=netagent&type=cnireport" ipamQueryURL = "http://168.63.129.16/machine/plugins?comp=nmagent&type=getinterfaceinfov1" pluginName = "CNI" telemetryNumRetries = 5 @@ -53,23 +50,16 @@ func printVersion() { } func rootExecute() error { - var ( - config common.PluginConfig - tb *telemetry.TelemetryBuffer - ) + var config common.PluginConfig config.Version = version reportManager := &telemetry.ReportManager{ - HostNetAgentURL: hostNetAgentURL, - ContentType: telemetry.ContentType, Report: &telemetry.CNIReport{ - Context: "AzureCNI", - SystemDetails: telemetry.SystemInfo{}, - InterfaceDetails: telemetry.InterfaceInfo{}, - BridgeDetails: telemetry.BridgeInfo{}, - Version: version, - Logger: logger, + Context: "AzureCNI", + SystemDetails: telemetry.SystemInfo{}, + Version: version, + Logger: logger, }, } @@ -101,32 +91,20 @@ func rootExecute() error { cniReport.VMUptime = upTime.Format("2006-01-02 15:04:05") } - // CNI Acquires lock + // CNI attempts to acquire lock if err = netPlugin.Plugin.InitializeKeyValueStore(&config); err != nil { + // Error acquiring lock network.PrintCNIError(fmt.Sprintf("Failed to initialize key-value store of network plugin: %v", err)) - tb = telemetry.NewTelemetryBuffer(logger) - if tberr := tb.Connect(); tberr != nil { - logger.Error("Cannot connect to telemetry service", zap.Error(tberr)) - return errors.Wrap(err, "lock acquire error") - } + // Connect to telemetry service if it is running, otherwise skips telemetry + telemetry.AIClient.ConnectTelemetry(logger) + defer telemetry.AIClient.DisconnectTelemetry() - network.ReportPluginError(reportManager, tb, err) - - if errors.Is(err, store.ErrTimeoutLockingStore) { - var cniMetric telemetry.AIMetric - cniMetric.Metric = aitelemetry.Metric{ - Name: telemetry.CNILockTimeoutStr, - Value: 1.0, - CustomDimensions: make(map[string]string), - } - sendErr := telemetry.SendCNIMetric(&cniMetric, tb) - if sendErr != nil { - logger.Error("Couldn't send cnilocktimeout metric", zap.Error(sendErr)) - } + if telemetry.AIClient.IsConnected() { + telemetry.AIClient.SendError(err) + } else { + logger.Error("Not connected to telemetry service, skipping sending error to application insights") } - - tb.Close() return errors.Wrap(err, "lock acquire error") } @@ -139,21 +117,19 @@ func rootExecute() error { os.Exit(1) } }() - + // At this point, lock is acquired // Start telemetry process if not already started. This should be done inside lock, otherwise multiple process // end up creating/killing telemetry process results in undesired state. - tb = telemetry.NewTelemetryBuffer(logger) - tb.ConnectToTelemetryService(telemetryNumRetries, telemetryWaitTimeInMilliseconds) - defer tb.Close() - - netPlugin.SetCNIReport(cniReport, tb) + telemetry.AIClient.StartAndConnectTelemetry(logger) + defer telemetry.AIClient.DisconnectTelemetry() + telemetry.AIClient.SetSettings(cniReport) t := time.Now() cniReport.Timestamp = t.Format("2006-01-02 15:04:05") if err = netPlugin.Start(&config); err != nil { network.PrintCNIError(fmt.Sprintf("Failed to start network plugin, err:%v.\n", err)) - network.ReportPluginError(reportManager, tb, err) + telemetry.AIClient.SendError(err) panic("network plugin start fatal error") } @@ -186,13 +162,8 @@ func rootExecute() error { if cniCmd == cni.CmdVersion { return errors.Wrap(err, "Execute netplugin failure") } - netPlugin.Stop() - if err != nil { - network.ReportPluginError(reportManager, tb, err) - } - return errors.Wrap(err, "Execute netplugin failure") } diff --git a/cni/network/stateless/main.go b/cni/network/stateless/main.go index 2b273da7eb..a42647eb14 100644 --- a/cni/network/stateless/main.go +++ b/cni/network/stateless/main.go @@ -24,11 +24,10 @@ import ( var logger = zapLog.CNILogger.With(zap.String("component", "cni-main")) const ( - hostNetAgentURL = "http://168.63.129.16/machine/plugins?comp=netagent&type=cnireport" - ipamQueryURL = "http://168.63.129.16/machine/plugins?comp=nmagent&type=getinterfaceinfov1" - pluginName = "CNI" - name = "azure-vnet" - stateless = true + ipamQueryURL = "http://168.63.129.16/machine/plugins?comp=nmagent&type=getinterfaceinfov1" + pluginName = "CNI" + name = "azure-vnet" + stateless = true ) // Version is populated by make during build. @@ -51,10 +50,7 @@ func printVersion() { } func rootExecute() error { - var ( - config common.PluginConfig - tb *telemetry.TelemetryBuffer - ) + var config common.PluginConfig log.SetName(name) log.SetLevel(log.LevelInfo) @@ -67,14 +63,10 @@ func rootExecute() error { config.Stateless = stateless reportManager := &telemetry.ReportManager{ - HostNetAgentURL: hostNetAgentURL, - ContentType: telemetry.ContentType, Report: &telemetry.CNIReport{ - Context: "AzureCNI", - SystemDetails: telemetry.SystemInfo{}, - InterfaceDetails: telemetry.InterfaceInfo{}, - BridgeDetails: telemetry.BridgeInfo{}, - Version: version, + Context: "AzureCNI", + SystemDetails: telemetry.SystemInfo{}, + Version: version, }, } @@ -112,19 +104,17 @@ func rootExecute() error { } }() - // Connect to the telemetry process. - tb = telemetry.NewTelemetryBuffer(logger) - tb.ConnectToTelemetry() - defer tb.Close() - - netPlugin.SetCNIReport(cniReport, tb) + // Connect to the telemetry process. Does not start the telemetry service if it is not running. + telemetry.AIClient.ConnectTelemetry(logger) + defer telemetry.AIClient.DisconnectTelemetry() + telemetry.AIClient.SetSettings(cniReport) t := time.Now() cniReport.Timestamp = t.Format("2006-01-02 15:04:05") if err = netPlugin.Start(&config); err != nil { network.PrintCNIError(fmt.Sprintf("Failed to start network plugin, err:%v.\n", err)) - network.ReportPluginError(reportManager, tb, err) + telemetry.AIClient.SendError(err) panic("network plugin start fatal error") } } @@ -151,10 +141,6 @@ func rootExecute() error { } netPlugin.Stop() - if err != nil { - network.ReportPluginError(reportManager, tb, err) - } - return errors.Wrap(err, "Execute netplugin failure") } diff --git a/network/endpoint.go b/network/endpoint.go index fab75d3186..bd9fa7fc9b 100644 --- a/network/endpoint.go +++ b/network/endpoint.go @@ -151,15 +151,35 @@ type apipaClient interface { CreateHostNCApipaEndpoint(ctx context.Context, networkContainerID string) (string, error) } +// FormatSliceOfPointersToString takes in a slice of pointers, and for each pointer, dereferences the pointer if not nil +// and then formats it to its string representation, returning a string where each line is a separate item in the slice. +// This is used for convenience to get a string representation of the actual structs and their fields +// in slices of pointers since the default string representation of a slice of pointers is a list of memory addresses. +func FormatSliceOfPointersToString[T any](slice []*T) string { + var builder strings.Builder + for _, ptr := range slice { + if ptr != nil { + fmt.Fprintf(&builder, "%+v \n", *ptr) + } + } + return builder.String() +} + func (epInfo *EndpointInfo) PrettyString() string { - return fmt.Sprintf("EndpointID:%s ContainerID:%s NetNsPath:%s IfName:%s IfIndex:%d MacAddr:%s IPAddrs:%v Gateways:%v Data:%+v NICType: %s NetworkContainerID: %s HostIfName: %s NetNs: %s Options: %v", + return fmt.Sprintf("EndpointID:%s ContainerID:%s NetNsPath:%s IfName:%s IfIndex:%d MacAddr:%s IPAddrs:%v Gateways:%v Data:%+v NICType: %s "+ + "NetworkContainerID: %s HostIfName: %s NetNs: %s Options: %v MasterIfName: %s HNSEndpointID: %s HNSNetworkID: %s", epInfo.EndpointID, epInfo.ContainerID, epInfo.NetNsPath, epInfo.IfName, epInfo.IfIndex, epInfo.MacAddress.String(), epInfo.IPAddresses, - epInfo.Gateways, epInfo.Data, epInfo.NICType, epInfo.NetworkContainerID, epInfo.HostIfName, epInfo.NetNs, epInfo.Options) + epInfo.Gateways, epInfo.Data, epInfo.NICType, epInfo.NetworkContainerID, epInfo.HostIfName, epInfo.NetNs, epInfo.Options, epInfo.MasterIfName, + epInfo.HNSEndpointID, epInfo.HNSNetworkID) } func (ifInfo *InterfaceInfo) PrettyString() string { - return fmt.Sprintf("Name:%s NICType:%v MacAddr:%s IPConfigs:%+v Routes:%+v DNSInfo:%+v", - ifInfo.Name, ifInfo.NICType, ifInfo.MacAddress.String(), ifInfo.IPConfigs, ifInfo.Routes, ifInfo.DNS) + var ncresponse string + if ifInfo.NCResponse != nil { + ncresponse = fmt.Sprintf("%+v", *ifInfo.NCResponse) + } + return fmt.Sprintf("Name:%s NICType:%v MacAddr:%s IPConfigs:%s Routes:%+v DNSInfo:%+v NCResponse: %s", + ifInfo.Name, ifInfo.NICType, ifInfo.MacAddress.String(), FormatSliceOfPointersToString(ifInfo.IPConfigs), ifInfo.Routes, ifInfo.DNS, ncresponse) } // NewEndpoint creates a new endpoint in the network. diff --git a/network/endpoint_test.go b/network/endpoint_test.go index 2e796df9a6..bc31c3b3ac 100644 --- a/network/endpoint_test.go +++ b/network/endpoint_test.go @@ -22,6 +22,32 @@ func TestEndpoint(t *testing.T) { RunSpecs(t, "Endpoint Suite") } +var _ = Describe("Test FormatStructPointers", func() { + ptrSlice := []*IPConfig{ + { + Gateway: net.ParseIP("10.10.0.1"), + }, + { + Gateway: net.ParseIP("10.10.0.2"), + }, + } + Describe("Test FormatStructPointers", func() { + Context("When passing in a slice of pointers", func() { + It("Should create a pretty printed string of the contents", func() { + result := FormatSliceOfPointersToString(ptrSlice) + Expect(result).To(Equal("{Address:{IP: Mask:} Gateway:10.10.0.1} \n{Address:{IP: Mask:} Gateway:10.10.0.2} \n")) + }) + }) + Context("When passing in nil", func() { + It("Should not error", func() { + var empty []*IPConfig + result := FormatSliceOfPointersToString(empty) + Expect(result).To(Equal("")) + }) + }) + }) +}) + var _ = Describe("Test Endpoint", func() { Describe("Test getEndpoint", func() { Context("When endpoint not exists", func() { diff --git a/network/manager.go b/network/manager.go index e5d6d57330..7bc1441fea 100644 --- a/network/manager.go +++ b/network/manager.go @@ -422,13 +422,13 @@ func (nm *networkManager) UpdateEndpointState(eps []*endpoint) error { } ifnameToIPInfoMap := generateCNSIPInfoMap(eps) // key : interface name, value : IPInfo - for _, ipinfo := range ifnameToIPInfoMap { - logger.Info("Update endpoint state", zap.String("hnsEndpointID", ipinfo.HnsEndpointID), zap.String("hnsNetworkID", ipinfo.HnsNetworkID), + for key, ipinfo := range ifnameToIPInfoMap { + logger.Info("Update endpoint state", zap.String("ifname", key), zap.String("hnsEndpointID", ipinfo.HnsEndpointID), zap.String("hnsNetworkID", ipinfo.HnsNetworkID), zap.String("hostVethName", ipinfo.HostVethName), zap.String("macAddress", ipinfo.MacAddress), zap.String("nicType", string(ipinfo.NICType))) } - // we assume all endpoints have the same container id cnsEndpointID := eps[0].ContainerID + if err := validateUpdateEndpointState(cnsEndpointID, ifnameToIPInfoMap); err != nil { return errors.Wrap(err, "failed to validate update endpoint state that will be sent to cns") } diff --git a/telemetry/telemetry.go b/telemetry/telemetry.go index dfa052794e..04313631f6 100644 --- a/telemetry/telemetry.go +++ b/telemetry/telemetry.go @@ -79,8 +79,6 @@ type CNIReport struct { VnetAddressSpace []string OSDetails OSInfo SystemDetails SystemInfo - InterfaceDetails InterfaceInfo - BridgeDetails BridgeInfo Metadata common.Metadata `json:"compute"` Logger *zap.Logger } @@ -91,9 +89,7 @@ type AIMetric struct { // ReportManager structure. type ReportManager struct { - HostNetAgentURL string - ContentType string - Report interface{} + Report interface{} } // GetReport retrieves orchestrator, system, OS and Interface details and create a report structure. diff --git a/telemetry/telemetry_client.go b/telemetry/telemetry_client.go new file mode 100644 index 0000000000..54241c42eb --- /dev/null +++ b/telemetry/telemetry_client.go @@ -0,0 +1,113 @@ +package telemetry + +import ( + "fmt" + "os" + "sync" + + "github.com/Azure/azure-container-networking/aitelemetry" + "go.uber.org/zap" +) + +const ( + telemetryNumberRetries = 5 + telemetryWaitTimeInMilliseconds = 200 +) + +type Client struct { + cniReportSettings *CNIReport + tb *TelemetryBuffer + logger *zap.Logger + lock sync.Mutex +} + +// package level variable for application insights telemetry +var AIClient = NewClient() + +func NewClient() *Client { + return &Client{ + cniReportSettings: &CNIReport{}, + } +} + +// Settings gets a pointer to the cni report struct, used to modify individual fields +func (c *Client) Settings() *CNIReport { + return c.cniReportSettings +} + +// SetSettings REPLACES the pointer to the cni report struct and should only be used on startup +func (c *Client) SetSettings(settings *CNIReport) { + c.cniReportSettings = settings +} + +func (c *Client) IsConnected() bool { + return c.tb != nil && c.tb.Connected +} + +func (c *Client) ConnectTelemetry(logger *zap.Logger) { + c.tb = NewTelemetryBuffer(logger) + c.tb.ConnectToTelemetry() + c.logger = logger +} + +func (c *Client) StartAndConnectTelemetry(logger *zap.Logger) { + c.tb = NewTelemetryBuffer(logger) + c.tb.ConnectToTelemetryService(telemetryNumberRetries, telemetryWaitTimeInMilliseconds) + c.logger = logger +} + +func (c *Client) DisconnectTelemetry() { + if c.tb == nil { + return + } + c.tb.Close() +} + +func (c *Client) sendEvent(msg string) { + if c.tb == nil { + return + } + c.lock.Lock() + defer c.lock.Unlock() + eventMsg := fmt.Sprintf("[%d] %s", os.Getpid(), msg) + c.cniReportSettings.EventMessage = eventMsg + SendCNIEvent(c.tb, c.cniReportSettings) +} + +func (c *Client) sendLog(msg string) { + if c.logger == nil { + return + } + c.logger.Info("Telemetry Event", zap.String("message", msg)) +} + +func (c *Client) SendEvent(msg string) { + c.sendEvent(msg) +} + +func (c *Client) SendError(err error) { + if err == nil { + return + } + // when the cni report reaches the telemetry service, the ai log message + // is set to either the cni report's event message or error message, + // whichever is not empty, so we can always just set the event message + c.sendEvent(err.Error()) +} + +func (c *Client) SendMetric(name string, value float64, customDims map[string]string) { + if c.tb == nil { + return + } + err := SendCNIMetric(&AIMetric{ + aitelemetry.Metric{ + Name: name, + Value: value, + AppVersion: c.Settings().Version, + CustomDimensions: customDims, + }, + }, c.tb) + if err != nil { + c.sendLog("Couldn't send metric: " + err.Error()) + } +} diff --git a/telemetry/telemetry_client_test.go b/telemetry/telemetry_client_test.go new file mode 100644 index 0000000000..2c33f6540d --- /dev/null +++ b/telemetry/telemetry_client_test.go @@ -0,0 +1,65 @@ +package telemetry + +import ( + "errors" + "regexp" + "testing" + + "github.com/stretchr/testify/require" + "go.uber.org/zap" +) + +var errMockTelemetryClient = errors.New("mock telemetry client error") + +func TestClient(t *testing.T) { + allowedErrorMsg := regexp.MustCompile(`^\[\d+\] mock telemetry client error`) + allowedEventMsg := regexp.MustCompile(`^\[\d+\] telemetry event`) + + emptyClient := NewClient() + + // an empty client should not cause panics + require.NotPanics(t, func() { emptyClient.SendEvent("no errors") }) + + require.NotPanics(t, func() { emptyClient.SendError(errMockTelemetryClient) }) + + require.NotPanics(t, func() { emptyClient.DisconnectTelemetry() }) + + require.NotPanics(t, func() { emptyClient.sendLog("no errors") }) + + require.NotPanics(t, func() { emptyClient.sendEvent("no errors") }) + + logger, err := zap.NewDevelopment() + require.NoError(t, err) + + // should not panic if connecting telemetry fails or succeeds + require.NotPanics(t, func() { emptyClient.ConnectTelemetry(logger) }) + + // should set logger during connection + require.Equal(t, logger, emptyClient.logger) + + // for testing, we create a new telemetry buffer and assign it + emptyClient.tb = &TelemetryBuffer{} + + // test sending error + require.NotPanics(t, func() { emptyClient.SendError(errMockTelemetryClient) }) + require.Regexp(t, allowedErrorMsg, emptyClient.Settings().EventMessage) + + // test sending event, error is empty + require.NotPanics(t, func() { emptyClient.SendEvent("telemetry event") }) + require.Regexp(t, allowedEventMsg, emptyClient.Settings().EventMessage) + require.Equal(t, "", emptyClient.Settings().ErrorMessage) + + // test sending aimetrics doesn't panic... + require.NotPanics(t, func() { emptyClient.SendMetric("", 0, nil) }) + // ...and doesn't affect the cni report + require.Regexp(t, allowedEventMsg, emptyClient.Settings().EventMessage) + require.Equal(t, "", emptyClient.Settings().ErrorMessage) + + emptyClient.Settings().Context = "abc" + require.Equal(t, "abc", emptyClient.Settings().Context) + + myClient := &Client{ + tb: &TelemetryBuffer{}, + } + require.NotPanics(t, func() { myClient.DisconnectTelemetry() }) +} diff --git a/telemetry/telemetrybuffer.go b/telemetry/telemetrybuffer.go index ce30e642c2..c44a6988a4 100644 --- a/telemetry/telemetrybuffer.go +++ b/telemetry/telemetrybuffer.go @@ -308,7 +308,7 @@ func (tb *TelemetryBuffer) StartTelemetryService(path string, args []string) err err := tb.plc.KillProcessByName(TelemetryServiceProcessName) if err != nil { if tb.logger != nil { - tb.logger.Error("Failed to kill process by", zap.String("TelemetryServiceProcessName", TelemetryServiceProcessName), zap.Error(err)) + tb.logger.Warn("Failed to kill process by", zap.String("TelemetryServiceProcessName", TelemetryServiceProcessName), zap.Error(err)) } else { log.Logf("[Telemetry] Failed to kill process by telemetryServiceProcessName %s due to %v", TelemetryServiceProcessName, err) } diff --git a/telemetry/telemetrybuffer_test.go b/telemetry/telemetrybuffer_test.go index 19b3abe495..8cd6e307bc 100644 --- a/telemetry/telemetrybuffer_test.go +++ b/telemetry/telemetrybuffer_test.go @@ -188,3 +188,22 @@ func TestStartTelemetryService(t *testing.T) { err := tb.StartTelemetryService("", nil) require.Error(t, err) } + +// TestExtraneousClose checks that closing potentially multiple times after a failed connect won't panic +func TestExtraneousClose(_ *testing.T) { + tb := NewTelemetryBuffer(nil) + + tb.Close() + tb.Close() + + tb.ConnectToTelemetry() + + tb.Close() + tb.Close() + + tb = NewTelemetryBuffer(nil) + tb.ConnectToTelemetryService(telemetryNumberRetries, telemetryWaitTimeInMilliseconds) + + tb.Close() + tb.Close() +}