Skip to content

Commit

Permalink
Add -recheck-with-time-limit support (#223)
Browse files Browse the repository at this point in the history
process-exporter already supports the -recheck flag which makes it run
the whole matching logic on each scrape. This is very useful when trying
to monitor processes which change their names shortly after start.

Sadly, -recheck carries a rather high performance penalty. At the same
time, process name changes are very common directly after start, are
seldomly expected during usage.

This commit introduces -recheck-with-time-limit which rechecks processes
N seconds after their start and stops doing so afterwards. This combines
the accuracy benefits of -recheck with the performance gains of not
using -recheck.
  • Loading branch information
hoffie authored Apr 16, 2024
1 parent 6caf441 commit 0bcf42e
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 43 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ as well as group name.
re-evaluated. This is disabled by default as an optimization, but since
processes can choose to change their names, this may result in a process
falling into the wrong group if we happen to see it for the first time before
it's assumed its proper name.
it's assumed its proper name. You can use -recheck-with-time-limit to enable this
feature only for a specific duration after process starts.

-procnames is intended as a quick alternative to using a config file. Details
in the following section.
Expand Down
21 changes: 14 additions & 7 deletions cmd/process-exporter/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ func main() {
"path to YAML web config file")
recheck = flag.Bool("recheck", false,
"recheck process names on each scrape")
recheckTimeLimit = flag.Duration("recheck-with-time-limit", 0,
"recheck processes only this much time after their start, but no longer.")
debug = flag.Bool("debug", false,
"log debugging information to stdout")
showVersion = flag.Bool("version", false,
Expand Down Expand Up @@ -232,15 +234,20 @@ func main() {
matchnamer = namemapper
}

if *recheckTimeLimit != 0 {
*recheck = true
}

pc, err := collector.NewProcessCollector(
collector.ProcessCollectorOption{
ProcFSPath: *procfsPath,
Children: *children,
Threads: *threads,
GatherSMaps: *smaps,
Namer: matchnamer,
Recheck: *recheck,
Debug: *debug,
ProcFSPath: *procfsPath,
Children: *children,
Threads: *threads,
GatherSMaps: *smaps,
Namer: matchnamer,
Recheck: *recheck,
RecheckTimeLimit: *recheckTimeLimit,
Debug: *debug,
},
)
if err != nil {
Expand Down
18 changes: 10 additions & 8 deletions collector/process_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package collector

import (
"log"
"time"

common "github.com/ncabatoff/process-exporter"
"github.com/ncabatoff/process-exporter/proc"
Expand Down Expand Up @@ -155,13 +156,14 @@ type (
}

ProcessCollectorOption struct {
ProcFSPath string
Children bool
Threads bool
GatherSMaps bool
Namer common.MatchNamer
Recheck bool
Debug bool
ProcFSPath string
Children bool
Threads bool
GatherSMaps bool
Namer common.MatchNamer
Recheck bool
RecheckTimeLimit time.Duration
Debug bool
}

NamedProcessCollector struct {
Expand All @@ -186,7 +188,7 @@ func NewProcessCollector(options ProcessCollectorOption) (*NamedProcessCollector
fs.GatherSMaps = options.GatherSMaps
p := &NamedProcessCollector{
scrapeChan: make(chan scrapeRequest),
Grouper: proc.NewGrouper(options.Namer, options.Children, options.Threads, options.Recheck, options.Debug),
Grouper: proc.NewGrouper(options.Namer, options.Children, options.Threads, options.Recheck, options.RecheckTimeLimit, options.Debug),
source: fs,
threads: options.Threads,
smaps: options.GatherSMaps,
Expand Down
4 changes: 2 additions & 2 deletions proc/grouper.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ type (
func lessThreads(x, y Threads) bool { return seq.Compare(x, y) < 0 }

// NewGrouper creates a grouper.
func NewGrouper(namer common.MatchNamer, trackChildren, trackThreads, alwaysRecheck, debug bool) *Grouper {
func NewGrouper(namer common.MatchNamer, trackChildren, trackThreads, recheck bool, recheckTimeLimit time.Duration, debug bool) *Grouper {
g := Grouper{
groupAccum: make(map[string]Counts),
threadAccum: make(map[string]map[string]Threads),
tracker: NewTracker(namer, trackChildren, alwaysRecheck, debug),
tracker: NewTracker(namer, trackChildren, recheck, recheckTimeLimit, debug),
debug: debug,
}
return &g
Expand Down
8 changes: 4 additions & 4 deletions proc/grouper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func TestGrouperBasic(t *testing.T) {
},
}

gr := NewGrouper(newNamer(n1, n2), false, false, false, false)
gr := NewGrouper(newNamer(n1, n2), false, false, false, 0, false)
for i, tc := range tests {
got := rungroup(t, gr, procInfoIter(tc.procs...))
if diff := cmp.Diff(got, tc.want); diff != "" {
Expand Down Expand Up @@ -128,7 +128,7 @@ func TestGrouperProcJoin(t *testing.T) {
},
}

gr := NewGrouper(newNamer(n1), false, false, false, false)
gr := NewGrouper(newNamer(n1), false, false, false, 0, false)
for i, tc := range tests {
got := rungroup(t, gr, procInfoIter(tc.procs...))
if diff := cmp.Diff(got, tc.want); diff != "" {
Expand Down Expand Up @@ -171,7 +171,7 @@ func TestGrouperNonDecreasing(t *testing.T) {
},
}

gr := NewGrouper(newNamer(n1), false, false, false, false)
gr := NewGrouper(newNamer(n1), false, false, false, 0, false)
for i, tc := range tests {
got := rungroup(t, gr, procInfoIter(tc.procs...))
if diff := cmp.Diff(got, tc.want); diff != "" {
Expand Down Expand Up @@ -224,7 +224,7 @@ func TestGrouperThreads(t *testing.T) {
}

opts := cmpopts.SortSlices(lessThreads)
gr := NewGrouper(newNamer(n), false, true, false, false)
gr := NewGrouper(newNamer(n), false, true, false, 0, false)
for i, tc := range tests {
got := rungroup(t, gr, procInfoIter(tc.proc))
if diff := cmp.Diff(got, tc.want, opts); diff != "" {
Expand Down
45 changes: 28 additions & 17 deletions proc/tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@ type (
// namer wanted tracked.
trackChildren bool
// never ignore processes, i.e. always re-check untracked processes in case comm has changed
alwaysRecheck bool
username map[int]string
debug bool
recheck bool
// limit rechecks to this much time
recheckTimeLimit time.Duration
username map[int]string
debug bool
}

// Delta is an alias of Counts used to signal that its contents are not
Expand Down Expand Up @@ -139,15 +141,16 @@ func (tp *trackedProc) getUpdate() Update {
}

// NewTracker creates a Tracker.
func NewTracker(namer common.MatchNamer, trackChildren bool, alwaysRecheck bool, debug bool) *Tracker {
func NewTracker(namer common.MatchNamer, trackChildren bool, recheck bool, recheckTimeLimit time.Duration, debug bool) *Tracker {
return &Tracker{
namer: namer,
tracked: make(map[ID]*trackedProc),
procIds: make(map[int]ID),
trackChildren: trackChildren,
alwaysRecheck: alwaysRecheck,
username: make(map[int]string),
debug: debug,
namer: namer,
tracked: make(map[ID]*trackedProc),
procIds: make(map[int]ID),
trackChildren: trackChildren,
recheck: recheck,
recheckTimeLimit: recheckTimeLimit,
username: make(map[int]string),
debug: debug,
}
}

Expand All @@ -174,11 +177,19 @@ func (t *Tracker) track(groupName string, idinfo IDInfo) {
t.tracked[idinfo.ID] = &tproc
}

func (t *Tracker) ignore(id ID) {
func (t *Tracker) ignore(id ID, startTime time.Time) {
// only ignore ID if we didn't set recheck to true
if t.alwaysRecheck == false {
t.tracked[id] = nil
if t.recheck {
if t.recheckTimeLimit == 0 {
// plain -recheck with no time limit:
return
}
if startTime.Add(t.recheckTimeLimit).After(time.Now()) {
// -recheckWithTimeLimit is used and the limit is not reached yet:
return
}
}
t.tracked[id] = nil
}

func (tp *trackedProc) update(metrics Metrics, now time.Time, cerrs *CollectErrors, threads []Thread) {
Expand Down Expand Up @@ -341,7 +352,7 @@ func (t *Tracker) checkAncestry(idinfo IDInfo, newprocs map[ID]IDInfo) string {
log.Printf("ignoring unmatched proc with no matched parent: %+v", idinfo)
}
// Reached root of process tree without finding a tracked parent.
t.ignore(idinfo.ID)
t.ignore(idinfo.ID, idinfo.Static.StartTime)
return ""
}

Expand All @@ -357,7 +368,7 @@ func (t *Tracker) checkAncestry(idinfo IDInfo, newprocs map[ID]IDInfo) string {
return ptproc.groupName
}
// We've found an untracked parent.
t.ignore(idinfo.ID)
t.ignore(idinfo.ID, idinfo.Static.StartTime)
return ""
}

Expand All @@ -378,7 +389,7 @@ func (t *Tracker) checkAncestry(idinfo IDInfo, newprocs map[ID]IDInfo) string {
if t.debug {
log.Printf("ignoring unmatched proc with no matched parent: %+v", idinfo)
}
t.ignore(idinfo.ID)
t.ignore(idinfo.ID, idinfo.Static.StartTime)
return ""
}

Expand Down
8 changes: 4 additions & 4 deletions proc/tracker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func TestTrackerBasic(t *testing.T) {
},
}
// Note that n3 should not be tracked according to our namer.
tr := NewTracker(newNamer(n1, n2, n4), false, false, false)
tr := NewTracker(newNamer(n1, n2, n4), false, false, 0, false)

opts := cmpopts.SortSlices(lessUpdateGroupName)
for i, tc := range tests {
Expand Down Expand Up @@ -78,7 +78,7 @@ func TestTrackerChildren(t *testing.T) {
},
}
// Only n2 and children of n2s should be tracked
tr := NewTracker(newNamer(n2), true, false, false)
tr := NewTracker(newNamer(n2), true, false, 0, false)

for i, tc := range tests {
_, got, err := tr.Update(procInfoIter(tc.procs...))
Expand Down Expand Up @@ -111,7 +111,7 @@ func TestTrackerMetrics(t *testing.T) {
Filedesc{2, 20}, tm, 1, States{Running: 1}, msi{}, nil},
},
}
tr := NewTracker(newNamer(n), false, false, false)
tr := NewTracker(newNamer(n), false, false, 0, false)

for i, tc := range tests {
_, got, err := tr.Update(procInfoIter(tc.proc))
Expand Down Expand Up @@ -169,7 +169,7 @@ func TestTrackerThreads(t *testing.T) {
},
},
}
tr := NewTracker(newNamer(n), false, false, false)
tr := NewTracker(newNamer(n), false, false, 0, false)

opts := cmpopts.SortSlices(lessThreadUpdate)
for i, tc := range tests {
Expand Down

0 comments on commit 0bcf42e

Please sign in to comment.