From bc013802415f96d4a8a1f82953c51b120ba0dcff Mon Sep 17 00:00:00 2001 From: Deploy Date: Mon, 28 Mar 2022 19:59:22 +0300 Subject: [PATCH 1/2] Fix for gaps in prometheus metrics. Instead of single crons.prom file, now each job will have it's own. However, by default node_exporter' textfile don't merge them and use only first one. Details in https://github.com/prometheus/node_exporter/issues/1885 Updates: * instead of single crons.prom file, each job now have - and locks - it's own * added same HELP header to all files * cron_job metric renamed to cronjob * minor README updates --- README.md | 11 ++++++----- main.go | 25 ++++++++++++++++--------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 9b3c5bb..c1e9211 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ The program can be used as follows: cronmanager -c command -n jobname [ -t time in seconds ] [ -l log file ] ``` -The `command`is the only mandatory argument. Notice that you cannot a bash shell or any of its shell built-ins as the command. So, the following examples will **not work**: +Both `command` and `jobname` are mandatory arguments. Notice that you cannot a bash shell or any of its shell built-ins as the command. So, the following examples will **not work**: ```bash cronmanager -c "echo 'hello' > somefile" @@ -79,10 +79,11 @@ For the tool to work, the `/opt/prometheus/exporters/dist/textfile/ `path **m Once cronmanager starts a job, it will wait for the specified seconds (using `-t` or the default 3600 seconds). If the cron is still running, cronmanager writes to a file under the exporters path. The file name consists of the job name followed by the `.prom` extension. For example, if you run the command like this `cronmanager -c "some_command some_arguments" -n "myjob"` the following file will be created: `/opt/prometheus/exporters/dist/textfile/myjob.prom`. The contents of the file are as follows: ```plain -# TYPE cron_job gauge -cron_job{"name=cron1","dimension=failed"} 0 -cron_job{"name=cron1","dimension=delayed"} 0 -cron_job{"name=cron1","dimension=duration"} 10 +# HELP cronjob metric generated by cronmanager +# TYPE cronjob gauge +cronjob{"name=cron1","dimension=failed"} 0 +cronjob{"name=cron1","dimension=delayed"} 0 +cronjob{"name=cron1","dimension=duration"} 10 ``` The numbers change to `1` depending on the issue found with the cron job (delayed/failed or both). diff --git a/main.go b/main.go index 33bc43c..2d343e7 100644 --- a/main.go +++ b/main.go @@ -74,6 +74,7 @@ func main() { writeToExporter(*jobnamePtr, "duration", strconv.FormatFloat(jobDuration, 'f', 0, 64)) // Store last timestamp writeToExporter(*jobnamePtr, "last", fmt.Sprintf("%d", time.Now().Unix())) +// writeToExporter(*jobnamePtr, "start", fmt.Sprintf("%d", jobStartTime.Unix())) } }() @@ -140,21 +141,25 @@ func main() { writeToExporter(*jobnamePtr, "last", fmt.Sprintf("%d", time.Now().Unix())) } -func getExporterPath() string { +func getExporterPath(jobName string) string { exporterPath, exists := os.LookupEnv("COLLECTOR_TEXTFILE_PATH") - exporterPath = exporterPath + "/crons.prom" + exporterPath = exporterPath + "/" + jobName + ".prom" if !exists { - exporterPath = "/var/cache/prometheus/crons.prom" + exporterPath = "/var/cache/prometheus/" + jobName + ".prom" } return exporterPath } func writeToExporter(jobName string, label string, metric string) { jobNeedle := "cronjob{name=\"" + jobName + "\",dimension=\"" + label + "\"}" - typeData := "# TYPE cron_job gauge" + // both TYPE and HELP must be the same across all .prom files + // otherwise node_exporter textfile won't merge them + // see https://github.com/prometheus/node_exporter/issues/1885 + helpData := "# HELP cronjob metric generated by cronmanager" + typeData := "# TYPE cronjob gauge" jobData := jobNeedle + " " + metric - exporterPath := getExporterPath() + exporterPath := getExporterPath(jobName) // Lock filepath to prevent race conditions lock := fslock.New(exporterPath) err := lock.Lock() @@ -165,7 +170,8 @@ func writeToExporter(jobName string, label string, metric string) { input, err := ioutil.ReadFile(exporterPath) if err != nil { - // We're not sure why we can't read from the file. Let's try creating it and fail if that didn't work either + // We're not sure why we can't read from the file. + // Let's try creating it and fail if that didn't work either if _, err := os.Create(exporterPath); err != nil { log.Fatal("Couldn't read or write to the exporter file. Check parent directory permissions") } @@ -175,13 +181,14 @@ func writeToExporter(jobName string, label string, metric string) { if re.Match(input) { input = re.ReplaceAll(input, []byte(jobData+"\n")) } else { - // If the job is not there then either there is no TYPE header at all and this is the first job + // If TYPE line is not there then this is the first run of the job if re := regexp.MustCompile(typeData); !re.Match(input) { - // Add the TYPE and the job data + // Add HELP, TYPE and the job data + input = append(input, helpData+"\n"...) input = append(input, typeData+"\n"...) input = append(input, jobData+"\n"...) } else { - // Or there is a TYPE header with one or more other jobs. Just append the job to the TYPE header + // Or there is a TYPE header with one or more other jobs. Just append the job to the TYPE headers input = re.ReplaceAll(input, []byte(typeData+"\n"+jobData)) } } From 92bea497f8c3e5b5f86dbc2fad12de24e9b95811 Mon Sep 17 00:00:00 2001 From: Deploy Date: Thu, 31 Mar 2022 13:50:31 +0300 Subject: [PATCH 2/2] Lock removed; use tmp files instead --- main.go | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/main.go b/main.go index 2d343e7..948cbe3 100644 --- a/main.go +++ b/main.go @@ -16,7 +16,7 @@ import ( "syscall" "time" - "github.com/juju/fslock" +// "github.com/juju/fslock" ) //isDelayed: Used to signal that the cron job delay was triggered @@ -161,18 +161,19 @@ func writeToExporter(jobName string, label string, metric string) { exporterPath := getExporterPath(jobName) // Lock filepath to prevent race conditions - lock := fslock.New(exporterPath) - err := lock.Lock() - if err != nil { - log.Println("Error locking file " + exporterPath) - } - defer lock.Unlock() + // however, lock also prevents reading +// lock := fslock.New(exporterPath+".tmp") +// err := lock.Lock() +// if err != nil { +// log.Println("Error locking file " + exporterPath) +// } +// defer lock.Unlock() input, err := ioutil.ReadFile(exporterPath) if err != nil { // We're not sure why we can't read from the file. // Let's try creating it and fail if that didn't work either - if _, err := os.Create(exporterPath); err != nil { + if _, err := os.Create(exporterPath+".tmp"); err != nil { log.Fatal("Couldn't read or write to the exporter file. Check parent directory permissions") } } @@ -192,7 +193,7 @@ func writeToExporter(jobName string, label string, metric string) { input = re.ReplaceAll(input, []byte(typeData+"\n"+jobData)) } } - f, err := os.Create(exporterPath) + f, err := os.Create(exporterPath+".tmp") if err != nil { log.Fatal(err) } @@ -200,9 +201,11 @@ func writeToExporter(jobName string, label string, metric string) { if err != nil { log.Fatal(err) } - - defer f.Close() if _, err = f.Write(input); err != nil { log.Fatal(err) } + f.Close(); + if err = os.Rename(exporterPath+".tmp", exporterPath); err != nil { + log.Fatal(err) + } }