Skip to content

Commit 482e86f

Browse files
committed
align flow with previous telemetry flow
tested and none panic: telemetry service running, lock acquired telemetry service not running, lock acquired telemetry service running, lock not acquired telemetry service not running, lock not acquired stateless if telemetry service not running stateless if telemetry service is running
1 parent 4a97303 commit 482e86f

File tree

4 files changed

+43
-23
lines changed

4 files changed

+43
-23
lines changed

cni/network/network.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -973,7 +973,7 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
973973
logger.Info("DEL command completed",
974974
zap.String("pod", k8sPodName),
975975
zap.Error(log.NewErrorWithoutStackTrace(err)))
976-
telemetryClient.SendEvent(fmt.Sprintf("DEL command completed: [released ip]: %+v [podname]: %s [namespace]: %s [error]: %v", nwCfg.IPAM.Address, k8sPodName, k8sNamespace, err))
976+
telemetryClient.SendEvent(fmt.Sprintf("DEL command completed: [podname]: %s [namespace]: %s [error]: %v", k8sPodName, k8sNamespace, err))
977977
}()
978978

979979
// Parse network configuration from stdin.

cni/network/plugin/main.go

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,13 @@ import (
88
"os"
99
"time"
1010

11-
"github.com/Azure/azure-container-networking/aitelemetry"
1211
"github.com/Azure/azure-container-networking/cni"
1312
"github.com/Azure/azure-container-networking/cni/api"
1413
zaplog "github.com/Azure/azure-container-networking/cni/log"
1514
"github.com/Azure/azure-container-networking/cni/network"
1615
"github.com/Azure/azure-container-networking/common"
1716
"github.com/Azure/azure-container-networking/nns"
1817
"github.com/Azure/azure-container-networking/platform"
19-
"github.com/Azure/azure-container-networking/store"
2018
"github.com/Azure/azure-container-networking/telemetry"
2119
"github.com/pkg/errors"
2220
"go.uber.org/zap"
@@ -93,30 +91,19 @@ func rootExecute() error {
9391
cniReport.VMUptime = upTime.Format("2006-01-02 15:04:05")
9492
}
9593

96-
// Start telemetry process if not already started. This should be done inside lock, otherwise multiple process
97-
// end up creating/killing telemetry process results in undesired state.
98-
telemetry.AIClient.StartAndConnectTelemetry(logger)
99-
defer telemetry.AIClient.DisconnectTelemetry()
100-
telemetry.AIClient.SetSettings(cniReport)
101-
102-
// CNI Acquires lock
94+
// CNI attempts to acquire lock
10395
if err = netPlugin.Plugin.InitializeKeyValueStore(&config); err != nil {
96+
// Error acquiring lock
10497
network.PrintCNIError(fmt.Sprintf("Failed to initialize key-value store of network plugin: %v", err))
10598

106-
if !telemetry.AIClient.IsConnected() {
107-
logger.Error("Not connected to telemetry service, skipping sending error to application insights")
108-
return errors.Wrap(err, "lock acquire error")
109-
}
110-
telemetry.AIClient.SendError(err)
99+
// Connect to telemetry service if it is running, otherwise skips telemetry
100+
telemetry.AIClient.ConnectTelemetry(logger)
101+
defer telemetry.AIClient.DisconnectTelemetry()
111102

112-
if errors.Is(err, store.ErrTimeoutLockingStore) {
113-
var cniMetric telemetry.AIMetric
114-
cniMetric.Metric = aitelemetry.Metric{
115-
Name: telemetry.CNILockTimeoutStr,
116-
Value: 1.0,
117-
CustomDimensions: make(map[string]string),
118-
}
119-
telemetry.AIClient.SendMetric(&cniMetric)
103+
if telemetry.AIClient.IsConnected() {
104+
telemetry.AIClient.SendError(err)
105+
} else {
106+
logger.Error("Not connected to telemetry service, skipping sending error to application insights")
120107
}
121108
return errors.Wrap(err, "lock acquire error")
122109
}
@@ -130,6 +117,12 @@ func rootExecute() error {
130117
os.Exit(1)
131118
}
132119
}()
120+
// At this point, lock is acquired
121+
// Start telemetry process if not already started. This should be done inside lock, otherwise multiple process
122+
// end up creating/killing telemetry process results in undesired state.
123+
telemetry.AIClient.StartAndConnectTelemetry(logger)
124+
defer telemetry.AIClient.DisconnectTelemetry()
125+
telemetry.AIClient.SetSettings(cniReport)
133126

134127
t := time.Now()
135128
cniReport.Timestamp = t.Format("2006-01-02 15:04:05")

telemetry/telemetry_client_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,12 @@ func TestClient(t *testing.T) {
5454
// ...and doesn't affect the cni report
5555
require.Regexp(t, allowedEventMsg, emptyClient.Settings().EventMessage)
5656
require.Equal(t, "", emptyClient.Settings().ErrorMessage)
57+
58+
emptyClient.Settings().Context = "abc"
59+
require.Equal(t, "abc", emptyClient.Settings().Context)
60+
61+
myClient := &Client{
62+
tb: &TelemetryBuffer{},
63+
}
64+
require.NotPanics(t, func() { myClient.DisconnectTelemetry() })
5765
}

telemetry/telemetrybuffer_test.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,3 +188,22 @@ func TestStartTelemetryService(t *testing.T) {
188188
err := tb.StartTelemetryService("", nil)
189189
require.Error(t, err)
190190
}
191+
192+
// TestExtraneousClose checks that closing potentially multiple times after a failed connect won't panic
193+
func TestExtraneousClose(t *testing.T) {
194+
tb := NewTelemetryBuffer(nil)
195+
196+
tb.Close()
197+
tb.Close()
198+
199+
tb.ConnectToTelemetry()
200+
201+
tb.Close()
202+
tb.Close()
203+
204+
tb = NewTelemetryBuffer(nil)
205+
tb.ConnectToTelemetryService(telemetryNumberRetries, telemetryWaitTimeInMilliseconds)
206+
207+
tb.Close()
208+
tb.Close()
209+
}

0 commit comments

Comments
 (0)