From 0de6a01824bee7453c6997a12e1f6a5830564f9d Mon Sep 17 00:00:00 2001
From: Joel Takvorian <joel.takvorian@qaraywa.net>
Date: Tue, 12 Nov 2024 17:55:45 +0100
Subject: [PATCH] NETOBSERV-1954: fix under-estimation of traffic (#444)

* NETOBSERV-1954: fix under-estimation of traffic

Remove flawed logic that ignores some flows, especially visible under
stress
This came from that time: https://github.com/netobserv/netobserv-ebpf-agent/pull/49/files#diff-f3248f855cc319b6d96125842a513586d6d8cbfb1e241a93e79911d08e41c728L334-L336

Old comment said: "...when a new flow maps to the same entry, it has some zombie entries." - this is weird and I'm not sure how that would be possible, flow_metrics entries should be zeroed as we're using designed initializers. In any case, relying on a single eviction time reference is wrong because flows are evicted one by one, resulting in many new flows having their timer set before the "last eviction time".

If we find evidence of those "zombie entries" then we must find another
way to fix them.

* do not set startTime=0
---
 pkg/flow/tracer_map.go | 14 --------------
 pkg/model/record.go    |  2 +-
 2 files changed, 1 insertion(+), 15 deletions(-)

diff --git a/pkg/flow/tracer_map.go b/pkg/flow/tracer_map.go
index a24d13d0e..7137ea142 100644
--- a/pkg/flow/tracer_map.go
+++ b/pkg/flow/tracer_map.go
@@ -26,7 +26,6 @@ type MapTracer struct {
 	staleEntriesEvictTimeout time.Duration
 	// manages the access to the eviction routines, avoiding two evictions happening at the same time
 	evictionCond               *sync.Cond
-	lastEvictionNs             uint64
 	metrics                    *metrics.Metrics
 	timeSpentinLookupAndDelete prometheus.Histogram
 }
@@ -40,7 +39,6 @@ func NewMapTracer(fetcher mapFetcher, evictionTimeout, staleEntriesEvictTimeout
 	return &MapTracer{
 		mapFetcher:                 fetcher,
 		evictionTimeout:            evictionTimeout,
-		lastEvictionNs:             uint64(monotime.Now()),
 		evictionCond:               sync.NewCond(&sync.Mutex{}),
 		staleEntriesEvictTimeout:   staleEntriesEvictTimeout,
 		metrics:                    m,
@@ -101,7 +99,6 @@ func (m *MapTracer) evictFlows(ctx context.Context, forceGC bool, forwardFlows c
 	currentTime := time.Now()
 
 	var forwardingFlows []*model.Record
-	laterFlowNs := uint64(0)
 	flows := m.mapFetcher.LookupAndDeleteMap(m.metrics)
 	elapsed := time.Since(currentTime)
 	for flowKey, flowMetrics := range flows {
@@ -110,10 +107,6 @@ func (m *MapTracer) evictFlows(ctx context.Context, forceGC bool, forwardFlows c
 		if aggregatedMetrics.EndMonoTimeTs == 0 {
 			continue
 		}
-		// If it iterated an entry that do not have updated flows
-		if aggregatedMetrics.EndMonoTimeTs > laterFlowNs {
-			laterFlowNs = aggregatedMetrics.EndMonoTimeTs
-		}
 		forwardingFlows = append(forwardingFlows, model.NewRecord(
 			flowKey,
 			aggregatedMetrics,
@@ -122,7 +115,6 @@ func (m *MapTracer) evictFlows(ctx context.Context, forceGC bool, forwardFlows c
 		))
 	}
 	m.mapFetcher.DeleteMapsStaleEntries(m.staleEntriesEvictTimeout)
-	m.lastEvictionNs = laterFlowNs
 	select {
 	case <-ctx.Done():
 		mtlog.Debug("skipping flow eviction as agent is being stopped")
@@ -146,12 +138,6 @@ func (m *MapTracer) aggregate(metrics []ebpf.BpfFlowMetrics) *ebpf.BpfFlowMetric
 	}
 	aggr := &ebpf.BpfFlowMetrics{}
 	for i := range metrics {
-		// eBPF hashmap values are not zeroed when the entry is removed. That causes that we
-		// might receive entries from previous collect-eviction timeslots.
-		// We need to check the flow time and discard old flows.
-		if metrics[i].StartMonoTimeTs <= m.lastEvictionNs || metrics[i].EndMonoTimeTs <= m.lastEvictionNs {
-			continue
-		}
 		model.Accumulate(aggr, &metrics[i])
 	}
 	return aggr
diff --git a/pkg/model/record.go b/pkg/model/record.go
index 8d4c6c708..4b0406add 100644
--- a/pkg/model/record.go
+++ b/pkg/model/record.go
@@ -92,7 +92,7 @@ func NewRecord(
 
 func Accumulate(r *ebpf.BpfFlowMetrics, src *ebpf.BpfFlowMetrics) {
 	// time == 0 if the value has not been yet set
-	if r.StartMonoTimeTs == 0 || r.StartMonoTimeTs > src.StartMonoTimeTs {
+	if r.StartMonoTimeTs == 0 || (r.StartMonoTimeTs > src.StartMonoTimeTs && src.StartMonoTimeTs != 0) {
 		r.StartMonoTimeTs = src.StartMonoTimeTs
 	}
 	if r.EndMonoTimeTs == 0 || r.EndMonoTimeTs < src.EndMonoTimeTs {