Skip to content
This repository has been archived by the owner on Aug 13, 2019. It is now read-only.

Commit

Permalink
fix the "failed compaction" metric. (#613)
Browse files Browse the repository at this point in the history
Signed-off-by: Krasi Georgiev <[email protected]>
  • Loading branch information
krasi-georgiev authored May 30, 2019
1 parent 13c80a5 commit 882162d
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 9 deletions.
9 changes: 0 additions & 9 deletions compact.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ type LeveledCompactor struct {
type compactorMetrics struct {
ran prometheus.Counter
populatingBlocks prometheus.Gauge
failed prometheus.Counter
overlappingBlocks prometheus.Counter
duration prometheus.Histogram
chunkSize prometheus.Histogram
Expand All @@ -103,10 +102,6 @@ func newCompactorMetrics(r prometheus.Registerer) *compactorMetrics {
Name: "prometheus_tsdb_compaction_populating_block",
Help: "Set to 1 when a block is currently being written to the disk.",
})
m.failed = prometheus.NewCounter(prometheus.CounterOpts{
Name: "prometheus_tsdb_compactions_failed_total",
Help: "Total number of compactions that failed for the partition.",
})
m.overlappingBlocks = prometheus.NewCounter(prometheus.CounterOpts{
Name: "prometheus_tsdb_vertical_compactions_total",
Help: "Total number of compactions done on overlapping blocks.",
Expand Down Expand Up @@ -136,7 +131,6 @@ func newCompactorMetrics(r prometheus.Registerer) *compactorMetrics {
r.MustRegister(
m.ran,
m.populatingBlocks,
m.failed,
m.overlappingBlocks,
m.duration,
m.chunkRange,
Expand Down Expand Up @@ -541,9 +535,6 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blocks ...BlockRe
if err := os.RemoveAll(tmp); err != nil {
level.Error(c.logger).Log("msg", "removed tmp folder after failed compaction", "err", err.Error())
}
if err != nil {
c.metrics.failed.Inc()
}
c.metrics.ran.Inc()
c.metrics.duration.Observe(time.Since(t).Seconds())
}(time.Now())
Expand Down
3 changes: 3 additions & 0 deletions compact_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1042,13 +1042,16 @@ func TestDeleteCompactionBlockAfterFailedReload(t *testing.T) {

testutil.Equals(t, 0.0, prom_testutil.ToFloat64(db.metrics.reloadsFailed), "initial 'failed db reload' count metrics mismatch")
testutil.Equals(t, 0.0, prom_testutil.ToFloat64(db.compactor.(*LeveledCompactor).metrics.ran), "initial `compactions` count metric mismatch")
testutil.Equals(t, 0.0, prom_testutil.ToFloat64(db.metrics.compactionsFailed), "initial `compactions failed` count metric mismatch")

// Do the compaction and check the metrics.
// Compaction should succeed, but the reload should fail and
// the new block created from the compaction should be deleted.
testutil.NotOk(t, db.compact())
testutil.Equals(t, 1.0, prom_testutil.ToFloat64(db.metrics.reloadsFailed), "'failed db reload' count metrics mismatch")
testutil.Equals(t, 1.0, prom_testutil.ToFloat64(db.compactor.(*LeveledCompactor).metrics.ran), "`compaction` count metric mismatch")
testutil.Equals(t, 1.0, prom_testutil.ToFloat64(db.metrics.compactionsFailed), "`compactions failed` count metric mismatch")

actBlocks, err = blockDirs(db.Dir())
testutil.Ok(t, err)
testutil.Equals(t, expBlocks, len(actBlocks)-1, "block count should be the same as before the compaction") // -1 to exclude the corrupted block.
Expand Down
11 changes: 11 additions & 0 deletions db.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ type dbMetrics struct {
reloads prometheus.Counter
reloadsFailed prometheus.Counter
compactionsTriggered prometheus.Counter
compactionsFailed prometheus.Counter
timeRetentionCount prometheus.Counter
compactionsSkipped prometheus.Counter
startTime prometheus.GaugeFunc
Expand Down Expand Up @@ -191,6 +192,10 @@ func newDBMetrics(db *DB, r prometheus.Registerer) *dbMetrics {
Name: "prometheus_tsdb_compactions_triggered_total",
Help: "Total number of triggered compactions for the partition.",
})
m.compactionsFailed = prometheus.NewCounter(prometheus.CounterOpts{
Name: "prometheus_tsdb_compactions_failed_total",
Help: "Total number of compactions that failed for the partition.",
})
m.timeRetentionCount = prometheus.NewCounter(prometheus.CounterOpts{
Name: "prometheus_tsdb_time_retentions_total",
Help: "The number of times that blocks were deleted because the maximum time limit was exceeded.",
Expand Down Expand Up @@ -231,6 +236,7 @@ func newDBMetrics(db *DB, r prometheus.Registerer) *dbMetrics {
m.reloadsFailed,
m.timeRetentionCount,
m.compactionsTriggered,
m.compactionsFailed,
m.startTime,
m.tombCleanTimer,
m.blocksBytes,
Expand Down Expand Up @@ -411,6 +417,11 @@ func (a dbAppender) Commit() error {
func (db *DB) compact() (err error) {
db.cmtx.Lock()
defer db.cmtx.Unlock()
defer func() {
if err != nil {
db.metrics.compactionsFailed.Inc()
}
}()
// Check whether we have pending head blocks that are ready to be persisted.
// They have the highest priority.
for {
Expand Down

0 comments on commit 882162d

Please sign in to comment.