From ce08521490fcdefd2bc81d241a5c445137deee1c Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Thu, 16 May 2019 14:39:10 -0700 Subject: [PATCH 01/16] Move WAL Watcher from Prometheus to TSDB WAL package. Signed-off-by: Callum Styan --- block.go | 25 +- cmd/tsdb/main.go | 3 +- compact.go | 19 +- compact_test.go | 1 + db_test.go | 45 +- go.mod | 1 + go.sum | 6 +- head.go | 387 +++--------- head_test.go | 173 +++--- mocks_test.go | 1 + querier.go | 33 +- querier_test.go | 58 +- record/internal.go | 371 ++++++++++++ record.go => record/record.go | 10 +- record_test.go => record/record_test.go | 16 +- tombstones.go => record/tombstones.go | 60 +- .../tombstones_test.go | 18 +- wal.go | 112 ++-- checkpoint.go => wal/checkpoint.go | 40 +- checkpoint_test.go => wal/checkpoint_test.go | 36 +- wal/reader_test.go | 22 +- wal/wal_watcher.go | 556 ++++++++++++++++++ wal/wal_watcher_test.go | 509 ++++++++++++++++ wal_test.go | 103 ++-- 24 files changed, 1925 insertions(+), 680 deletions(-) create mode 100644 record/internal.go rename record.go => record/record.go (97%) rename record_test.go => record/record_test.go (89%) rename tombstones.go => record/tombstones.go (80%) rename tombstones_test.go => record/tombstones_test.go (89%) rename checkpoint.go => wal/checkpoint.go (88%) rename checkpoint_test.go => wal/checkpoint_test.go (89%) create mode 100644 wal/wal_watcher.go create mode 100644 wal/wal_watcher_test.go diff --git a/block.go b/block.go index d0fe2b2f..516bd00b 100644 --- a/block.go +++ b/block.go @@ -32,6 +32,7 @@ import ( "github.com/prometheus/tsdb/fileutil" "github.com/prometheus/tsdb/index" "github.com/prometheus/tsdb/labels" + "github.com/prometheus/tsdb/record" ) // IndexWriter serializes the index for a block of series data. @@ -136,7 +137,7 @@ type BlockReader interface { Chunks() (ChunkReader, error) // Tombstones returns a TombstoneReader over the block's deleted data. - Tombstones() (TombstoneReader, error) + Tombstones() (record.TombstoneReader, error) // Meta provides meta information about the block reader. Meta() BlockMeta @@ -278,7 +279,7 @@ type Block struct { chunkr ChunkReader indexr IndexReader - tombstones TombstoneReader + tombstones record.TombstoneReader logger log.Logger @@ -320,7 +321,7 @@ func OpenBlock(logger log.Logger, dir string, pool chunkenc.Pool) (pb *Block, er } closers = append(closers, ir) - tr, sizeTomb, err := readTombstones(dir) + tr, sizeTomb, err := record.ReadTombstones(dir) if err != nil { return nil, err } @@ -411,7 +412,7 @@ func (pb *Block) Chunks() (ChunkReader, error) { } // Tombstones returns a new TombstoneReader against the block data. -func (pb *Block) Tombstones() (TombstoneReader, error) { +func (pb *Block) Tombstones() (record.TombstoneReader, error) { if err := pb.startRead(); err != nil { return nil, err } @@ -482,7 +483,7 @@ func (r blockIndexReader) Close() error { } type blockTombstoneReader struct { - TombstoneReader + record.TombstoneReader b *Block } @@ -518,7 +519,7 @@ func (pb *Block) Delete(mint, maxt int64, ms ...labels.Matcher) error { ir := pb.indexr // Choose only valid postings which have chunks in the time-range. - stones := newMemTombstones() + stones := record.NewMemTombstones() var lset labels.Labels var chks []chunks.Meta @@ -534,7 +535,7 @@ Outer: if chk.OverlapsClosedInterval(mint, maxt) { // Delete only until the current values and not beyond. tmin, tmax := clampInterval(mint, maxt, chks[0].MinTime, chks[len(chks)-1].MaxTime) - stones.addInterval(p.At(), Interval{tmin, tmax}) + stones.AddInterval(p.At(), record.Interval{tmin, tmax}) continue Outer } } @@ -544,9 +545,9 @@ Outer: return p.Err() } - err = pb.tombstones.Iter(func(id uint64, ivs Intervals) error { + err = pb.tombstones.Iter(func(id uint64, ivs record.Intervals) error { for _, iv := range ivs { - stones.addInterval(id, iv) + stones.AddInterval(id, iv) } return nil }) @@ -556,7 +557,7 @@ Outer: pb.tombstones = stones pb.meta.Stats.NumTombstones = pb.tombstones.Total() - n, err := writeTombstoneFile(pb.logger, pb.dir, pb.tombstones) + n, err := record.WriteTombstoneFile(pb.logger, pb.dir, pb.tombstones) if err != nil { return err } @@ -574,7 +575,7 @@ Outer: func (pb *Block) CleanTombstones(dest string, c Compactor) (*ulid.ULID, error) { numStones := 0 - if err := pb.tombstones.Iter(func(id uint64, ivs Intervals) error { + if err := pb.tombstones.Iter(func(id uint64, ivs record.Intervals) error { numStones += len(ivs) return nil }); err != nil { @@ -609,7 +610,7 @@ func (pb *Block) Snapshot(dir string) error { for _, fname := range []string{ metaFilename, indexFilename, - tombstoneFilename, + record.TombstoneFilename, } { if err := os.Link(filepath.Join(pb.dir, fname), filepath.Join(blockDir, fname)); err != nil { return errors.Wrapf(err, "create snapshot %s", fname) diff --git a/cmd/tsdb/main.go b/cmd/tsdb/main.go index e3dc530a..829891ef 100644 --- a/cmd/tsdb/main.go +++ b/cmd/tsdb/main.go @@ -36,6 +36,7 @@ import ( "github.com/prometheus/tsdb/chunks" tsdb_errors "github.com/prometheus/tsdb/errors" "github.com/prometheus/tsdb/labels" + "github.com/prometheus/tsdb/record" "gopkg.in/alecthomas/kingpin.v2" ) @@ -306,7 +307,7 @@ func (b *writeBenchmark) ingestScrapesShard(lbls []labels.Labels, scrapeCount in s.ref = &ref } else if err := app.AddFast(*s.ref, ts, float64(s.value)); err != nil { - if errors.Cause(err) != tsdb.ErrNotFound { + if errors.Cause(err) != record.ErrNotFound { panic(err) } diff --git a/compact.go b/compact.go index 9443c99e..3e2652fd 100644 --- a/compact.go +++ b/compact.go @@ -35,6 +35,7 @@ import ( "github.com/prometheus/tsdb/fileutil" "github.com/prometheus/tsdb/index" "github.com/prometheus/tsdb/labels" + "github.com/prometheus/tsdb/record" ) // ExponentialBlockRanges returns the time ranges based on the stepSize. @@ -607,7 +608,7 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blocks ...BlockRe } // Create an empty tombstones file. - if _, err := writeTombstoneFile(c.logger, tmp, newMemTombstones()); err != nil { + if _, err := record.WriteTombstoneFile(c.logger, tmp, record.NewMemTombstones()); err != nil { return errors.Wrap(err, "write new tombstones file") } @@ -876,15 +877,15 @@ type compactionSeriesSet struct { p index.Postings index IndexReader chunks ChunkReader - tombstones TombstoneReader + tombstones record.TombstoneReader l labels.Labels c []chunks.Meta - intervals Intervals + intervals record.Intervals err error } -func newCompactionSeriesSet(i IndexReader, c ChunkReader, t TombstoneReader, p index.Postings) *compactionSeriesSet { +func newCompactionSeriesSet(i IndexReader, c ChunkReader, t record.TombstoneReader, p index.Postings) *compactionSeriesSet { return &compactionSeriesSet{ index: i, chunks: c, @@ -914,7 +915,7 @@ func (c *compactionSeriesSet) Next() bool { if len(c.intervals) > 0 { chks := make([]chunks.Meta, 0, len(c.c)) for _, chk := range c.c { - if !(Interval{chk.MinTime, chk.MaxTime}.isSubrange(c.intervals)) { + if !(record.Interval{chk.MinTime, chk.MaxTime}.IsSubrange(c.intervals)) { chks = append(chks, chk) } } @@ -942,7 +943,7 @@ func (c *compactionSeriesSet) Err() error { return c.p.Err() } -func (c *compactionSeriesSet) At() (labels.Labels, []chunks.Meta, Intervals) { +func (c *compactionSeriesSet) At() (labels.Labels, []chunks.Meta, record.Intervals) { return c.l, c.c, c.intervals } @@ -952,7 +953,7 @@ type compactionMerger struct { aok, bok bool l labels.Labels c []chunks.Meta - intervals Intervals + intervals record.Intervals } func newCompactionMerger(a, b ChunkSeriesSet) (*compactionMerger, error) { @@ -1008,7 +1009,7 @@ func (c *compactionMerger) Next() bool { _, cb, rb := c.b.At() for _, r := range rb { - ra = ra.add(r) + ra = ra.Add(r) } c.l = append(c.l[:0], l...) @@ -1029,6 +1030,6 @@ func (c *compactionMerger) Err() error { return c.b.Err() } -func (c *compactionMerger) At() (labels.Labels, []chunks.Meta, Intervals) { +func (c *compactionMerger) At() (labels.Labels, []chunks.Meta, record.Intervals) { return c.l, c.c, c.intervals } diff --git a/compact_test.go b/compact_test.go index 18990ed5..bee741e6 100644 --- a/compact_test.go +++ b/compact_test.go @@ -30,6 +30,7 @@ import ( "github.com/prometheus/tsdb/chunks" "github.com/prometheus/tsdb/fileutil" "github.com/prometheus/tsdb/labels" + "github.com/prometheus/tsdb/record" "github.com/prometheus/tsdb/testutil" ) diff --git a/db_test.go b/db_test.go index 25fb8a7e..7e1e1b96 100644 --- a/db_test.go +++ b/db_test.go @@ -33,6 +33,7 @@ import ( "github.com/prometheus/tsdb/chunks" "github.com/prometheus/tsdb/index" "github.com/prometheus/tsdb/labels" + "github.com/prometheus/tsdb/record" "github.com/prometheus/tsdb/testutil" "github.com/prometheus/tsdb/tsdbutil" "github.com/prometheus/tsdb/wal" @@ -196,7 +197,7 @@ func TestDBAppenderAddRef(t *testing.T) { testutil.Ok(t, err) err = app2.AddFast(9999999, 1, 1) - testutil.Equals(t, ErrNotFound, errors.Cause(err)) + testutil.Equals(t, record.ErrNotFound, errors.Cause(err)) testutil.Ok(t, app2.Commit()) @@ -243,27 +244,27 @@ func TestDeleteSimple(t *testing.T) { numSamples := int64(10) cases := []struct { - intervals Intervals + intervals record.Intervals remaint []int64 }{ { - intervals: Intervals{{0, 3}}, + intervals: record.Intervals{{0, 3}}, remaint: []int64{4, 5, 6, 7, 8, 9}, }, { - intervals: Intervals{{1, 3}}, + intervals: record.Intervals{{1, 3}}, remaint: []int64{0, 4, 5, 6, 7, 8, 9}, }, { - intervals: Intervals{{1, 3}, {4, 7}}, + intervals: record.Intervals{{1, 3}, {4, 7}}, remaint: []int64{0, 8, 9}, }, { - intervals: Intervals{{1, 3}, {4, 700}}, + intervals: record.Intervals{{1, 3}, {4, 700}}, remaint: []int64{0}, }, { // This case is to ensure that labels and symbols are deleted. - intervals: Intervals{{0, 9}}, + intervals: record.Intervals{{0, 9}}, remaint: []int64{}, }, } @@ -359,7 +360,7 @@ func TestAmendDatapointCausesError(t *testing.T) { app = db.Appender() _, err = app.Add(labels.Labels{}, 0, 1) - testutil.Equals(t, ErrAmendSample, err) + testutil.Equals(t, record.ErrAmendSample, err) testutil.Ok(t, app.Rollback()) } @@ -393,7 +394,7 @@ func TestNonDuplicateNaNDatapointsCausesAmendError(t *testing.T) { app = db.Appender() _, err = app.Add(labels.Labels{}, 0, math.Float64frombits(0x7ff0000000000002)) - testutil.Equals(t, ErrAmendSample, err) + testutil.Equals(t, record.ErrAmendSample, err) } func TestSkippingInvalidValuesInSameTxn(t *testing.T) { @@ -561,11 +562,11 @@ func TestDB_SnapshotWithDelete(t *testing.T) { testutil.Ok(t, app.Commit()) cases := []struct { - intervals Intervals + intervals record.Intervals remaint []int64 }{ { - intervals: Intervals{{1, 3}, {4, 7}}, + intervals: record.Intervals{{1, 3}, {4, 7}}, remaint: []int64{0, 8, 9}, }, } @@ -888,11 +889,11 @@ func TestTombstoneClean(t *testing.T) { testutil.Ok(t, app.Commit()) cases := []struct { - intervals Intervals + intervals record.Intervals remaint []int64 }{ { - intervals: Intervals{{1, 3}, {4, 7}}, + intervals: record.Intervals{{1, 3}, {4, 7}}, remaint: []int64{0, 8, 9}, }, } @@ -964,7 +965,7 @@ func TestTombstoneClean(t *testing.T) { } for _, b := range db.Blocks() { - testutil.Equals(t, newMemTombstones(), b.tombstones) + testutil.Equals(t, record.NewMemTombstones(), b.tombstones) } } } @@ -990,8 +991,8 @@ func TestTombstoneCleanFail(t *testing.T) { block, err := OpenBlock(nil, blockDir, nil) testutil.Ok(t, err) // Add some some fake tombstones to trigger the compaction. - tomb := newMemTombstones() - tomb.addInterval(0, Interval{0, 1}) + tomb := record.NewMemTombstones() + tomb.AddInterval(0, record.Interval{0, 1}) block.tombstones = tomb db.blocks = append(db.blocks, block) @@ -1470,13 +1471,13 @@ func TestInitializeHeadTimestamp(t *testing.T) { w, err := wal.New(nil, nil, path.Join(dir, "wal"), false) testutil.Ok(t, err) - var enc RecordEncoder + var enc record.RecordEncoder err = w.Log( - enc.Series([]RefSeries{ + enc.Series([]record.RefSeries{ {Ref: 123, Labels: labels.FromStrings("a", "1")}, {Ref: 124, Labels: labels.FromStrings("a", "2")}, }, nil), - enc.Samples([]RefSample{ + enc.Samples([]record.RefSample{ {Ref: 123, T: 5000, V: 1}, {Ref: 124, T: 15000, V: 1}, }, nil), @@ -1520,13 +1521,13 @@ func TestInitializeHeadTimestamp(t *testing.T) { w, err := wal.New(nil, nil, path.Join(dir, "wal"), false) testutil.Ok(t, err) - var enc RecordEncoder + var enc record.RecordEncoder err = w.Log( - enc.Series([]RefSeries{ + enc.Series([]record.RefSeries{ {Ref: 123, Labels: labels.FromStrings("a", "1")}, {Ref: 124, Labels: labels.FromStrings("a", "2")}, }, nil), - enc.Samples([]RefSample{ + enc.Samples([]record.RefSample{ {Ref: 123, T: 5000, V: 1}, {Ref: 124, T: 15000, V: 1}, }, nil), diff --git a/go.mod b/go.mod index ccdd4372..c75e4ed7 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/oklog/ulid v1.3.1 github.com/pkg/errors v0.8.0 github.com/prometheus/client_golang v1.0.0 + github.com/prometheus/prometheus v2.5.0+incompatible golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4 golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5 gopkg.in/alecthomas/kingpin.v2 v2.2.6 diff --git a/go.sum b/go.sum index 365fa5ec..ad7f9516 100644 --- a/go.sum +++ b/go.sum @@ -27,11 +27,11 @@ github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= +github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= -github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515 h1:T+h1c/A9Gawja4Y9mFVWj2vyii2bbUNDw3kt9VxK2EY= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= @@ -59,6 +59,8 @@ github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d h1:GoAlyOgbOEIFd github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2 h1:6LJUbpNm42llc4HRCuvApCSWB/WfhuNo9K98Q9sNGfs= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/prometheus v2.5.0+incompatible h1:7QPitgO2kOFG8ecuRn9O/4L9+10He72rVRJvMXrE9Hg= +github.com/prometheus/prometheus v2.5.0+incompatible/go.mod h1:oAIUtOny2rjMX0OWN5vPR5/q/twIROJvdqnQKDdil/s= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= diff --git a/head.go b/head.go index 0adb8847..c1c8e6d5 100644 --- a/head.go +++ b/head.go @@ -33,28 +33,18 @@ import ( "github.com/prometheus/tsdb/encoding" "github.com/prometheus/tsdb/index" "github.com/prometheus/tsdb/labels" + "github.com/prometheus/tsdb/record" "github.com/prometheus/tsdb/wal" ) var ( - // ErrNotFound is returned if a looked up resource was not found. - ErrNotFound = errors.Errorf("not found") - - // ErrOutOfOrderSample is returned if an appended sample has a - // timestamp smaller than the most recent sample. - ErrOutOfOrderSample = errors.New("out of order sample") - - // ErrAmendSample is returned if an appended sample has the same timestamp - // as the most recent sample but a different value. - ErrAmendSample = errors.New("amending sample") - // ErrOutOfBounds is returned if an appended sample is out of the // writable time range. ErrOutOfBounds = errors.New("out of bounds") // emptyTombstoneReader is a no-op Tombstone Reader. // This is used by head to satisfy the Tombstones() function call. - emptyTombstoneReader = newMemTombstones() + emptyTombstoneReader = record.NewMemTombstones() ) // Head handles reads and writes of time series data within a time window. @@ -256,12 +246,12 @@ func NewHead(r prometheus.Registerer, l log.Logger, wal *wal.WAL, chunkRange int // Samples before the mint timestamp are discarded. func (h *Head) processWALSamples( minValidTime int64, - input <-chan []RefSample, output chan<- []RefSample, + input <-chan []record.RefSample, output chan<- []record.RefSample, ) (unknownRefs uint64) { defer close(output) // Mitigate lock contention in getByID. - refSeries := map[uint64]*memSeries{} + refSeries := map[uint64]*record.MemSeries{} mint, maxt := int64(math.MaxInt64), int64(math.MinInt64) @@ -279,7 +269,7 @@ func (h *Head) processWALSamples( } refSeries[s.Ref] = ms } - _, chunkCreated := ms.append(s.T, s.V) + _, chunkCreated := ms.Append(s.T, s.V) if chunkCreated { h.metrics.chunksCreated.Inc() h.metrics.chunks.Inc() @@ -331,8 +321,8 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { wg sync.WaitGroup multiRefLock sync.Mutex n = runtime.GOMAXPROCS(0) - inputs = make([]chan []RefSample, n) - outputs = make([]chan []RefSample, n) + inputs = make([]chan []record.RefSample, n) + outputs = make([]chan []record.RefSample, n) ) wg.Add(n) @@ -349,10 +339,10 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { }() for i := 0; i < n; i++ { - outputs[i] = make(chan []RefSample, 300) - inputs[i] = make(chan []RefSample, 300) + outputs[i] = make(chan []record.RefSample, 300) + inputs[i] = make(chan []record.RefSample, 300) - go func(input <-chan []RefSample, output chan<- []RefSample) { + go func(input <-chan []record.RefSample, output chan<- []record.RefSample) { unknown := h.processWALSamples(h.minValidTime, input, output) atomic.AddUint64(&unknownRefs, unknown) wg.Done() @@ -376,7 +366,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { rec := r.Record() switch dec.Type(rec) { - case RecordSeries: + case record.RecordSeries: series, err = dec.Series(rec, series) if err != nil { return &wal.CorruptionErr{ @@ -399,7 +389,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { h.lastSeriesID = s.Ref } } - case RecordSamples: + case record.RecordSamples: samples, err = dec.Samples(rec, samples) s := samples if err != nil { @@ -418,9 +408,9 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { if len(samples) < m { m = len(samples) } - shards := make([][]RefSample, n) + shards := make([][]record.RefSample, n) for i := 0; i < n; i++ { - var buf []RefSample + var buf []record.RefSample select { case buf = <-outputs[i]: default: @@ -440,7 +430,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { samples = samples[m:] } samples = s // Keep whole slice for reuse. - case RecordTombstones: + case record.RecordTombstones: tstones, err = dec.Tombstones(rec, tstones) if err != nil { return &wal.CorruptionErr{ @@ -450,15 +440,15 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { } } for _, s := range tstones { - for _, itv := range s.intervals { + for _, itv := range s.Intervals { if itv.Maxt < h.minValidTime { continue } - if m := h.series.getByID(s.ref); m == nil { + if m := h.series.getByID(s.Ref); m == nil { unknownRefs++ continue } - allStones.addInterval(s.ref, itv) + allStones.AddInterval(s.Ref, itv) } } default: @@ -508,8 +498,8 @@ func (h *Head) Init(minValidTime int64) error { level.Info(h.logger).Log("msg", "replaying WAL, this may take awhile") // Backfill the checkpoint first if it exists. - dir, startFrom, err := LastCheckpoint(h.wal.Dir()) - if err != nil && err != ErrNotFound { + dir, startFrom, err := wal.LastCheckpoint(h.wal.Dir()) + if err != nil && err != record.ErrNotFound { return errors.Wrap(err, "find last checkpoint") } multiRef := map[uint64]uint64{} @@ -629,7 +619,7 @@ func (h *Head) Truncate(mint int64) (err error) { return ok } h.metrics.checkpointCreationTotal.Inc() - if _, err = Checkpoint(h.wal, first, last, keep, mint); err != nil { + if _, err = wal.Checkpoint(h.wal, first, last, keep, mint); err != nil { h.metrics.checkpointCreationFail.Inc() return errors.Wrap(err, "create checkpoint") } @@ -651,7 +641,7 @@ func (h *Head) Truncate(mint int64) (err error) { h.deletedMtx.Unlock() h.metrics.checkpointDeleteTotal.Inc() - if err := DeleteCheckpoints(h.wal.Dir(), last); err != nil { + if err := wal.DeleteCheckpoints(h.wal.Dir(), last); err != nil { // Leftover old checkpoints do not cause problems down the line beyond // occupying disk space. // They will just be ignored since a higher checkpoint exists. @@ -693,7 +683,7 @@ func (h *rangeHead) Chunks() (ChunkReader, error) { return h.head.chunksRange(h.mint, h.maxt), nil } -func (h *rangeHead) Tombstones() (TombstoneReader, error) { +func (h *rangeHead) Tombstones() (record.TombstoneReader, error) { return emptyTombstoneReader, nil } @@ -739,7 +729,7 @@ func (a *initAppender) Add(lset labels.Labels, t int64, v float64) (uint64, erro func (a *initAppender) AddFast(ref uint64, t int64, v float64) error { if a.app == nil { - return ErrNotFound + return record.ErrNotFound } return a.app.AddFast(ref, t, v) } @@ -789,15 +779,15 @@ func max(a, b int64) int64 { return b } -func (h *Head) getAppendBuffer() []RefSample { +func (h *Head) getAppendBuffer() []record.RefSample { b := h.appendPool.Get() if b == nil { - return make([]RefSample, 0, 512) + return make([]record.RefSample, 0, 512) } - return b.([]RefSample) + return b.([]record.RefSample) } -func (h *Head) putAppendBuffer(b []RefSample) { +func (h *Head) putAppendBuffer(b []record.RefSample) { //lint:ignore SA6002 safe to ignore and actually fixing it has some performance penalty. h.appendPool.Put(b[:0]) } @@ -820,8 +810,8 @@ type headAppender struct { minValidTime int64 // No samples below this timestamp are allowed. mint, maxt int64 - series []RefSeries - samples []RefSample + series []record.RefSeries + samples []record.RefSample } func (a *headAppender) Add(lset labels.Labels, t int64, v float64) (uint64, error) { @@ -834,12 +824,12 @@ func (a *headAppender) Add(lset labels.Labels, t int64, v float64) (uint64, erro s, created := a.head.getOrCreate(lset.Hash(), lset) if created { - a.series = append(a.series, RefSeries{ - Ref: s.ref, + a.series = append(a.series, record.RefSeries{ + Ref: s.Ref, Labels: lset, }) } - return s.ref, a.AddFast(s.ref, t, v) + return s.Ref, a.AddFast(s.Ref, t, v) } func (a *headAppender) AddFast(ref uint64, t int64, v float64) error { @@ -849,14 +839,14 @@ func (a *headAppender) AddFast(ref uint64, t int64, v float64) error { s := a.head.series.getByID(ref) if s == nil { - return errors.Wrap(ErrNotFound, "unknown series") + return errors.Wrap(record.ErrNotFound, "unknown series") } s.Lock() - if err := s.appendable(t, v); err != nil { + if err := s.Appendable(t, v); err != nil { s.Unlock() return err } - s.pendingCommit = true + s.PendingCommit = true s.Unlock() if t < a.mint { @@ -866,11 +856,11 @@ func (a *headAppender) AddFast(ref uint64, t int64, v float64) error { a.maxt = t } - a.samples = append(a.samples, RefSample{ + a.samples = append(a.samples, record.RefSample{ Ref: ref, T: t, V: v, - series: s, + Series: s, }) return nil } @@ -884,7 +874,7 @@ func (a *headAppender) log() error { defer func() { a.head.putBytesBuffer(buf) }() var rec []byte - var enc RecordEncoder + var enc record.RecordEncoder if len(a.series) > 0 { rec = enc.Series(a.series, buf) @@ -916,10 +906,10 @@ func (a *headAppender) Commit() error { total := len(a.samples) for _, s := range a.samples { - s.series.Lock() - ok, chunkCreated := s.series.append(s.T, s.V) - s.series.pendingCommit = false - s.series.Unlock() + s.Series.Lock() + ok, chunkCreated := s.Series.Append(s.T, s.V) + s.Series.PendingCommit = false + s.Series.Unlock() if !ok { total-- @@ -939,9 +929,9 @@ func (a *headAppender) Commit() error { func (a *headAppender) Rollback() error { a.head.metrics.activeAppenders.Dec() for _, s := range a.samples { - s.series.Lock() - s.series.pendingCommit = false - s.series.Unlock() + s.Series.Lock() + s.Series.PendingCommit = false + s.Series.Unlock() } a.head.putAppendBuffer(a.samples) @@ -964,21 +954,21 @@ func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error { return errors.Wrap(err, "select series") } - var stones []Stone + var stones []record.Stone dirty := false for p.Next() { series := h.series.getByID(p.At()) - t0, t1 := series.minTime(), series.maxTime() + t0, t1 := series.MinTime(), series.MaxTime() if t0 == math.MinInt64 || t1 == math.MinInt64 { continue } // Delete only until the current values and not beyond. t0, t1 = clampInterval(mint, maxt, t0, t1) if h.wal != nil { - stones = append(stones, Stone{p.At(), Intervals{{t0, t1}}}) + stones = append(stones, record.Stone{p.At(), record.Intervals{{t0, t1}}}) } - if err := h.chunkRewrite(p.At(), Intervals{{t0, t1}}); err != nil { + if err := h.chunkRewrite(p.At(), record.Intervals{{t0, t1}}); err != nil { return errors.Wrap(err, "delete samples") } dirty = true @@ -986,7 +976,7 @@ func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error { if p.Err() != nil { return p.Err() } - var enc RecordEncoder + var enc record.RecordEncoder if h.wal != nil { // Although we don't store the stones in the head // we need to write them to the WAL to mark these as deleted @@ -1005,7 +995,7 @@ func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error { // chunkRewrite re-writes the chunks which overlaps with deleted ranges // and removes the samples in the deleted ranges. // Chunks is deleted if no samples are left at the end. -func (h *Head) chunkRewrite(ref uint64, dranges Intervals) (err error) { +func (h *Head) chunkRewrite(ref uint64, dranges record.Intervals) (err error) { if len(dranges) == 0 { return nil } @@ -1013,18 +1003,18 @@ func (h *Head) chunkRewrite(ref uint64, dranges Intervals) (err error) { ms := h.series.getByID(ref) ms.Lock() defer ms.Unlock() - if len(ms.chunks) == 0 { + if len(ms.Chunks) == 0 { return nil } - metas := ms.chunksMetas() + metas := ms.ChunksMetas() mint, maxt := metas[0].MinTime, metas[len(metas)-1].MaxTime it := newChunkSeriesIterator(metas, dranges, mint, maxt) - ms.reset() + ms.Reset() for it.Next() { t, v := it.At() - ok, _ := ms.append(t, v) + ok, _ := ms.Append(t, v) if !ok { level.Warn(h.logger).Log("msg", "failed to add sample during delete") } @@ -1097,7 +1087,7 @@ func (h *Head) gc() { } // Tombstones returns a new reader over the head's tombstones -func (h *Head) Tombstones() (TombstoneReader, error) { +func (h *Head) Tombstones() (record.TombstoneReader, error) { return emptyTombstoneReader, nil } @@ -1202,22 +1192,22 @@ func (h *headChunkReader) Chunk(ref uint64) (chunkenc.Chunk, error) { s := h.head.series.getByID(sid) // This means that the series has been garbage collected. if s == nil { - return nil, ErrNotFound + return nil, record.ErrNotFound } s.Lock() - c := s.chunk(int(cid)) + c := s.Chunk(int(cid)) // This means that the chunk has been garbage collected or is outside // the specified range. if c == nil || !c.OverlapsClosedInterval(h.mint, h.maxt) { s.Unlock() - return nil, ErrNotFound + return nil, record.ErrNotFound } s.Unlock() return &safeChunk{ - Chunk: c.chunk, + Chunk: c.Chunk, s: s, cid: int(cid), }, nil @@ -1225,7 +1215,7 @@ func (h *headChunkReader) Chunk(ref uint64) (chunkenc.Chunk, error) { type safeChunk struct { chunkenc.Chunk - s *memSeries + s *record.MemSeries cid int } @@ -1295,7 +1285,7 @@ func (h *headIndexReader) Postings(name, value string) (index.Postings, error) { } func (h *headIndexReader) SortedPostings(p index.Postings) index.Postings { - series := make([]*memSeries, 0, 128) + series := make([]*record.MemSeries, 0, 128) // Fetch all the series only once. for p.Next() { @@ -1311,13 +1301,13 @@ func (h *headIndexReader) SortedPostings(p index.Postings) index.Postings { } sort.Slice(series, func(i, j int) bool { - return labels.Compare(series[i].lset, series[j].lset) < 0 + return labels.Compare(series[i].Lset, series[j].Lset) < 0 }) // Convert back to list. ep := make([]uint64, 0, len(series)) for _, p := range series { - ep = append(ep, p.ref) + ep = append(ep, p.Ref) } return index.NewListPostings(ep) } @@ -1328,16 +1318,16 @@ func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]chunks if s == nil { h.head.metrics.seriesNotFound.Inc() - return ErrNotFound + return record.ErrNotFound } - *lbls = append((*lbls)[:0], s.lset...) + *lbls = append((*lbls)[:0], s.Lset...) s.Lock() defer s.Unlock() *chks = (*chks)[:0] - for i, c := range s.chunks { + for i, c := range s.Chunks { // Do not expose chunks that are outside of the specified range. if !c.OverlapsClosedInterval(h.mint, h.maxt) { continue @@ -1368,7 +1358,7 @@ func (h *headIndexReader) LabelIndices() ([][]string, error) { return res, nil } -func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*memSeries, bool) { +func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*record.MemSeries, bool) { // Just using `getOrSet` below would be semantically sufficient, but we'd create // a new series on every sample inserted via Add(), which causes allocations // and makes our series IDs rather random and harder to compress in postings. @@ -1383,8 +1373,8 @@ func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*memSeries, bool) { return h.getOrCreateWithID(id, hash, lset) } -func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSeries, bool) { - s := newMemSeries(lset, id, h.chunkRange) +func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*record.MemSeries, bool) { + s := record.NewMemSeries(lset, id, h.chunkRange) s, created := h.series.getOrSet(hash, s) if !created { @@ -1418,21 +1408,21 @@ func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSerie // on top of a regular hashmap and holds a slice of series to resolve hash collisions. // Its methods require the hash to be submitted with it to avoid re-computations throughout // the code. -type seriesHashmap map[uint64][]*memSeries +type seriesHashmap map[uint64][]*record.MemSeries -func (m seriesHashmap) get(hash uint64, lset labels.Labels) *memSeries { +func (m seriesHashmap) get(hash uint64, lset labels.Labels) *record.MemSeries { for _, s := range m[hash] { - if s.lset.Equals(lset) { + if s.Lset.Equals(lset) { return s } } return nil } -func (m seriesHashmap) set(hash uint64, s *memSeries) { +func (m seriesHashmap) set(hash uint64, s *record.MemSeries) { l := m[hash] for i, prev := range l { - if prev.lset.Equals(s.lset) { + if prev.Lset.Equals(s.Lset) { l[i] = s return } @@ -1441,9 +1431,9 @@ func (m seriesHashmap) set(hash uint64, s *memSeries) { } func (m seriesHashmap) del(hash uint64, lset labels.Labels) { - var rem []*memSeries + var rem []*record.MemSeries for _, s := range m[hash] { - if !s.lset.Equals(lset) { + if !s.Lset.Equals(lset) { rem = append(rem, s) } } @@ -1459,7 +1449,7 @@ func (m seriesHashmap) del(hash uint64, lset labels.Labels) { // with the maps was profiled to be slower – likely due to the additional pointer // dereferences. type stripeSeries struct { - series [stripeSize]map[uint64]*memSeries + series [stripeSize]map[uint64]*record.MemSeries hashes [stripeSize]seriesHashmap locks [stripeSize]stripeLock } @@ -1479,7 +1469,7 @@ func newStripeSeries() *stripeSeries { s := &stripeSeries{} for i := range s.series { - s.series[i] = map[uint64]*memSeries{} + s.series[i] = map[uint64]*record.MemSeries{} } for i := range s.hashes { s.hashes[i] = seriesHashmap{} @@ -1502,9 +1492,9 @@ func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) { for hash, all := range s.hashes[i] { for _, series := range all { series.Lock() - rmChunks += series.truncateChunksBefore(mint) + rmChunks += series.TruncateChunksBefore(mint) - if len(series.chunks) > 0 || series.pendingCommit { + if len(series.Chunks) > 0 || series.PendingCommit { series.Unlock() continue } @@ -1514,15 +1504,15 @@ func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) { // series alike. // If we don't hold them all, there's a very small chance that a series receives // samples again while we are half-way into deleting it. - j := int(series.ref & stripeMask) + j := int(series.Ref & stripeMask) if i != j { s.locks[j].Lock() } - deleted[series.ref] = struct{}{} - s.hashes[i].del(hash, series.lset) - delete(s.series[j], series.ref) + deleted[series.Ref] = struct{}{} + s.hashes[i].del(hash, series.Lset) + delete(s.series[j], series.Ref) if i != j { s.locks[j].Unlock() @@ -1538,7 +1528,7 @@ func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) { return deleted, rmChunks } -func (s *stripeSeries) getByID(id uint64) *memSeries { +func (s *stripeSeries) getByID(id uint64) *record.MemSeries { i := id & stripeMask s.locks[i].RLock() @@ -1548,7 +1538,7 @@ func (s *stripeSeries) getByID(id uint64) *memSeries { return series } -func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *memSeries { +func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *record.MemSeries { i := hash & stripeMask s.locks[i].RLock() @@ -1558,220 +1548,27 @@ func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *memSeries { return series } -func (s *stripeSeries) getOrSet(hash uint64, series *memSeries) (*memSeries, bool) { +func (s *stripeSeries) getOrSet(hash uint64, series *record.MemSeries) (*record.MemSeries, bool) { i := hash & stripeMask s.locks[i].Lock() - if prev := s.hashes[i].get(hash, series.lset); prev != nil { + if prev := s.hashes[i].get(hash, series.Lset); prev != nil { s.locks[i].Unlock() return prev, false } s.hashes[i].set(hash, series) s.locks[i].Unlock() - i = series.ref & stripeMask + i = series.Ref & stripeMask s.locks[i].Lock() - s.series[i][series.ref] = series + s.series[i][series.Ref] = series s.locks[i].Unlock() return series, true } -type sample struct { - t int64 - v float64 -} - -func (s sample) T() int64 { - return s.t -} - -func (s sample) V() float64 { - return s.v -} - -// memSeries is the in-memory representation of a series. None of its methods -// are goroutine safe and it is the caller's responsibility to lock it. -type memSeries struct { - sync.Mutex - - ref uint64 - lset labels.Labels - chunks []*memChunk - headChunk *memChunk - chunkRange int64 - firstChunkID int - - nextAt int64 // Timestamp at which to cut the next chunk. - sampleBuf [4]sample - pendingCommit bool // Whether there are samples waiting to be committed to this series. - - app chunkenc.Appender // Current appender for the chunk. -} - -func newMemSeries(lset labels.Labels, id uint64, chunkRange int64) *memSeries { - s := &memSeries{ - lset: lset, - ref: id, - chunkRange: chunkRange, - nextAt: math.MinInt64, - } - return s -} - -func (s *memSeries) minTime() int64 { - if len(s.chunks) == 0 { - return math.MinInt64 - } - return s.chunks[0].minTime -} - -func (s *memSeries) maxTime() int64 { - c := s.head() - if c == nil { - return math.MinInt64 - } - return c.maxTime -} - -func (s *memSeries) cut(mint int64) *memChunk { - c := &memChunk{ - chunk: chunkenc.NewXORChunk(), - minTime: mint, - maxTime: math.MinInt64, - } - s.chunks = append(s.chunks, c) - s.headChunk = c - - // Set upper bound on when the next chunk must be started. An earlier timestamp - // may be chosen dynamically at a later point. - s.nextAt = rangeForTimestamp(mint, s.chunkRange) - - app, err := c.chunk.Appender() - if err != nil { - panic(err) - } - s.app = app - return c -} - -func (s *memSeries) chunksMetas() []chunks.Meta { - metas := make([]chunks.Meta, 0, len(s.chunks)) - for _, chk := range s.chunks { - metas = append(metas, chunks.Meta{Chunk: chk.chunk, MinTime: chk.minTime, MaxTime: chk.maxTime}) - } - return metas -} - -// reset re-initialises all the variable in the memSeries except 'lset', 'ref', -// and 'chunkRange', like how it would appear after 'newMemSeries(...)'. -func (s *memSeries) reset() { - s.chunks = nil - s.headChunk = nil - s.firstChunkID = 0 - s.nextAt = math.MinInt64 - s.sampleBuf = [4]sample{} - s.pendingCommit = false - s.app = nil -} - -// appendable checks whether the given sample is valid for appending to the series. -func (s *memSeries) appendable(t int64, v float64) error { - c := s.head() - if c == nil { - return nil - } - - if t > c.maxTime { - return nil - } - if t < c.maxTime { - return ErrOutOfOrderSample - } - // We are allowing exact duplicates as we can encounter them in valid cases - // like federation and erroring out at that time would be extremely noisy. - if math.Float64bits(s.sampleBuf[3].v) != math.Float64bits(v) { - return ErrAmendSample - } - return nil -} - -func (s *memSeries) chunk(id int) *memChunk { - ix := id - s.firstChunkID - if ix < 0 || ix >= len(s.chunks) { - return nil - } - return s.chunks[ix] -} - -func (s *memSeries) chunkID(pos int) int { - return pos + s.firstChunkID -} - -// truncateChunksBefore removes all chunks from the series that have not timestamp -// at or after mint. Chunk IDs remain unchanged. -func (s *memSeries) truncateChunksBefore(mint int64) (removed int) { - var k int - for i, c := range s.chunks { - if c.maxTime >= mint { - break - } - k = i + 1 - } - s.chunks = append(s.chunks[:0], s.chunks[k:]...) - s.firstChunkID += k - if len(s.chunks) == 0 { - s.headChunk = nil - } else { - s.headChunk = s.chunks[len(s.chunks)-1] - } - - return k -} - -// append adds the sample (t, v) to the series. -func (s *memSeries) append(t int64, v float64) (success, chunkCreated bool) { - // Based on Gorilla white papers this offers near-optimal compression ratio - // so anything bigger that this has diminishing returns and increases - // the time range within which we have to decompress all samples. - const samplesPerChunk = 120 - - c := s.head() - - if c == nil { - c = s.cut(t) - chunkCreated = true - } - numSamples := c.chunk.NumSamples() - - // Out of order sample. - if c.maxTime >= t { - return false, chunkCreated - } - // If we reach 25% of a chunk's desired sample count, set a definitive time - // at which to start the next chunk. - // At latest it must happen at the timestamp set when the chunk was cut. - if numSamples == samplesPerChunk/4 { - s.nextAt = computeChunkEndTime(c.minTime, c.maxTime, s.nextAt) - } - if t >= s.nextAt { - c = s.cut(t) - chunkCreated = true - } - s.app.Append(t, v) - - c.maxTime = t - - s.sampleBuf[0] = s.sampleBuf[1] - s.sampleBuf[1] = s.sampleBuf[2] - s.sampleBuf[2] = s.sampleBuf[3] - s.sampleBuf[3] = sample{t: t, v: v} - - return true, chunkCreated -} - // computeChunkEndTime estimates the end timestamp based the beginning of a chunk, // its current timestamp and the upper bound up to which we insert data. // It assumes that the time range is 1/4 full. diff --git a/head_test.go b/head_test.go index 040ae828..50b66196 100644 --- a/head_test.go +++ b/head_test.go @@ -30,6 +30,7 @@ import ( "github.com/prometheus/tsdb/chunks" "github.com/prometheus/tsdb/index" "github.com/prometheus/tsdb/labels" + "github.com/prometheus/tsdb/record" "github.com/prometheus/tsdb/testutil" "github.com/prometheus/tsdb/tsdbutil" "github.com/prometheus/tsdb/wal" @@ -51,14 +52,14 @@ func BenchmarkCreateSeries(b *testing.B) { } func populateTestWAL(t testing.TB, w *wal.WAL, recs []interface{}) { - var enc RecordEncoder + var enc record.RecordEncoder for _, r := range recs { switch v := r.(type) { - case []RefSeries: + case []record.RefSeries: testutil.Ok(t, w.Log(enc.Series(v, nil))) - case []RefSample: + case []record.RefSample: testutil.Ok(t, w.Log(enc.Samples(v, nil))) - case []Stone: + case []record.Stone: testutil.Ok(t, w.Log(enc.Tombstones(v, nil))) } } @@ -69,22 +70,22 @@ func readTestWAL(t testing.TB, dir string) (recs []interface{}) { testutil.Ok(t, err) defer sr.Close() - var dec RecordDecoder + var dec record.RecordDecoder r := wal.NewReader(sr) for r.Next() { rec := r.Record() switch dec.Type(rec) { - case RecordSeries: + case record.RecordSeries: series, err := dec.Series(rec, nil) testutil.Ok(t, err) recs = append(recs, series) - case RecordSamples: + case record.RecordSamples: samples, err := dec.Samples(rec, nil) testutil.Ok(t, err) recs = append(recs, samples) - case RecordTombstones: + case record.RecordTombstones: tstones, err := dec.Tombstones(rec, nil) testutil.Ok(t, err) recs = append(recs, tstones) @@ -223,38 +224,38 @@ func TestHead_Truncate(t *testing.T) { s3, _ := h.getOrCreate(3, labels.FromStrings("a", "1", "b", "2")) s4, _ := h.getOrCreate(4, labels.FromStrings("a", "2", "b", "2", "c", "1")) - s1.chunks = []*memChunk{ - {minTime: 0, maxTime: 999}, - {minTime: 1000, maxTime: 1999}, - {minTime: 2000, maxTime: 2999}, + s1.Chunks = []*record.MemChunk{ + {MinTime: 0, MaxTime: 999}, + {MinTime: 1000, MaxTime: 1999}, + {MinTime: 2000, MaxTime: 2999}, } - s2.chunks = []*memChunk{ - {minTime: 1000, maxTime: 1999}, - {minTime: 2000, maxTime: 2999}, - {minTime: 3000, maxTime: 3999}, + s2.Chunks = []*record.MemChunk{ + {MinTime: 1000, MaxTime: 1999}, + {MinTime: 2000, MaxTime: 2999}, + {MinTime: 3000, MaxTime: 3999}, } - s3.chunks = []*memChunk{ - {minTime: 0, maxTime: 999}, - {minTime: 1000, maxTime: 1999}, + s3.Chunks = []*record.MemChunk{ + {MinTime: 0, MaxTime: 999}, + {MinTime: 1000, MaxTime: 1999}, } - s4.chunks = []*memChunk{} + s4.Chunks = []*record.MemChunk{} // Truncation need not be aligned. testutil.Ok(t, h.Truncate(1)) testutil.Ok(t, h.Truncate(2000)) - testutil.Equals(t, []*memChunk{ - {minTime: 2000, maxTime: 2999}, - }, h.series.getByID(s1.ref).chunks) + testutil.Equals(t, []*record.MemChunk{ + {MinTime: 2000, MaxTime: 2999}, + }, h.series.getByID(s1.Ref).Chunks) - testutil.Equals(t, []*memChunk{ - {minTime: 2000, maxTime: 2999}, - {minTime: 3000, maxTime: 3999}, - }, h.series.getByID(s2.ref).chunks) + testutil.Equals(t, []*record.MemChunk{ + {MinTime: 2000, MaxTime: 2999}, + {MinTime: 3000, MaxTime: 3999}, + }, h.series.getByID(s2.Ref).Chunks) - testutil.Assert(t, h.series.getByID(s3.ref) == nil, "") - testutil.Assert(t, h.series.getByID(s4.ref) == nil, "") + testutil.Assert(t, h.series.getByID(s3.Ref) == nil, "") + testutil.Assert(t, h.series.getByID(s4.Ref) == nil, "") postingsA1, _ := index.ExpandPostings(h.postings.Get("a", "1")) postingsA2, _ := index.ExpandPostings(h.postings.Get("a", "2")) @@ -263,10 +264,10 @@ func TestHead_Truncate(t *testing.T) { postingsC1, _ := index.ExpandPostings(h.postings.Get("c", "1")) postingsAll, _ := index.ExpandPostings(h.postings.Get("", "")) - testutil.Equals(t, []uint64{s1.ref}, postingsA1) - testutil.Equals(t, []uint64{s2.ref}, postingsA2) - testutil.Equals(t, []uint64{s1.ref, s2.ref}, postingsB1) - testutil.Equals(t, []uint64{s1.ref, s2.ref}, postingsAll) + testutil.Equals(t, []uint64{s1.Ref}, postingsA1) + testutil.Equals(t, []uint64{s2.Ref}, postingsA2) + testutil.Equals(t, []uint64{s1.Ref, s2.Ref}, postingsB1) + testutil.Equals(t, []uint64{s1.Ref, s2.Ref}, postingsAll) testutil.Assert(t, postingsB2 == nil, "") testutil.Assert(t, postingsC1 == nil, "") @@ -288,28 +289,28 @@ func TestHead_Truncate(t *testing.T) { // Validate various behaviors brought on by firstChunkID accounting for // garbage collected chunks. func TestMemSeries_truncateChunks(t *testing.T) { - s := newMemSeries(labels.FromStrings("a", "b"), 1, 2000) + s := record.NewMemSeries(labels.FromStrings("a", "b"), 1, 2000) for i := 0; i < 4000; i += 5 { - ok, _ := s.append(int64(i), float64(i)) + ok, _ := s.Append(int64(i), float64(i)) testutil.Assert(t, ok == true, "sample append failed") } // Check that truncate removes half of the chunks and afterwards // that the ID of the last chunk still gives us the same chunk afterwards. - countBefore := len(s.chunks) - lastID := s.chunkID(countBefore - 1) - lastChunk := s.chunk(lastID) + countBefore := len(s.Chunks) + lastID := s.ChunkID(countBefore - 1) + lastChunk := s.Chunk(lastID) - testutil.Assert(t, s.chunk(0) != nil, "") + testutil.Assert(t, s.Chunk(0) != nil, "") testutil.Assert(t, lastChunk != nil, "") - s.truncateChunksBefore(2000) + s.TruncateChunksBefore(2000) - testutil.Equals(t, int64(2000), s.chunks[0].minTime) - testutil.Assert(t, s.chunk(0) == nil, "first chunks not gone") - testutil.Equals(t, countBefore/2, len(s.chunks)) - testutil.Equals(t, lastChunk, s.chunk(lastID)) + testutil.Equals(t, int64(2000), s.Chunks[0].MinTime) + testutil.Assert(t, s.Chunk(0) == nil, "first chunks not gone") + testutil.Equals(t, countBefore/2, len(s.Chunks)) + testutil.Equals(t, lastChunk, s.Chunk(lastID)) // Validate that the series' sample buffer is applied correctly to the last chunk // after truncation. @@ -371,27 +372,27 @@ func TestHeadDeleteSimple(t *testing.T) { lblDefault := labels.Label{"a", "b"} cases := []struct { - dranges Intervals + dranges record.Intervals smplsExp []sample }{ { - dranges: Intervals{{0, 3}}, + dranges: record.Intervals{{0, 3}}, smplsExp: buildSmpls([]int64{4, 5, 6, 7, 8, 9}), }, { - dranges: Intervals{{1, 3}}, + dranges: record.Intervals{{1, 3}}, smplsExp: buildSmpls([]int64{0, 4, 5, 6, 7, 8, 9}), }, { - dranges: Intervals{{1, 3}, {4, 7}}, + dranges: record.Intervals{{1, 3}, {4, 7}}, smplsExp: buildSmpls([]int64{0, 8, 9}), }, { - dranges: Intervals{{1, 3}, {4, 700}}, + dranges: record.Intervals{{1, 3}, {4, 700}}, smplsExp: buildSmpls([]int64{0}), }, { // This case is to ensure that labels and symbols are deleted. - dranges: Intervals{{0, 9}}, + dranges: record.Intervals{{0, 9}}, smplsExp: buildSmpls([]int64{}), }, } @@ -591,7 +592,7 @@ func TestDeletedSamplesAndSeriesStillInWALAfterCheckpoint(t *testing.T) { testutil.Ok(t, hb.Close()) // Confirm there's been a checkpoint. - cdir, _, err := LastCheckpoint(dir) + cdir, _, err := wal.LastCheckpoint(dir) testutil.Ok(t, err) // Read in checkpoint and WAL. recs := readTestWAL(t, cdir) @@ -600,11 +601,11 @@ func TestDeletedSamplesAndSeriesStillInWALAfterCheckpoint(t *testing.T) { var series, samples, stones int for _, rec := range recs { switch rec.(type) { - case []RefSeries: + case []record.RefSeries: series++ - case []RefSample: + case []record.RefSample: samples++ - case []Stone: + case []record.Stone: stones++ default: t.Fatalf("unknown record type") @@ -692,18 +693,18 @@ func TestDelete_e2e(t *testing.T) { // Delete a time-range from each-selector. dels := []struct { ms []labels.Matcher - drange Intervals + drange record.Intervals }{ { ms: []labels.Matcher{labels.NewEqualMatcher("a", "b")}, - drange: Intervals{{300, 500}, {600, 670}}, + drange: record.Intervals{{300, 500}, {600, 670}}, }, { ms: []labels.Matcher{ labels.NewEqualMatcher("a", "b"), labels.NewEqualMatcher("job", "prom-k8s"), }, - drange: Intervals{{300, 500}, {100, 670}}, + drange: record.Intervals{{300, 500}, {100, 670}}, }, { ms: []labels.Matcher{ @@ -711,7 +712,7 @@ func TestDelete_e2e(t *testing.T) { labels.NewEqualMatcher("instance", "localhost:9090"), labels.NewEqualMatcher("job", "prometheus"), }, - drange: Intervals{{300, 400}, {100, 6700}}, + drange: record.Intervals{{300, 400}, {100, 6700}}, }, // TODO: Add Regexp Matchers. } @@ -794,12 +795,12 @@ func boundedSamples(full []tsdbutil.Sample, mint, maxt int64) []tsdbutil.Sample return full } -func deletedSamples(full []tsdbutil.Sample, dranges Intervals) []tsdbutil.Sample { +func deletedSamples(full []tsdbutil.Sample, dranges record.Intervals) []tsdbutil.Sample { ds := make([]tsdbutil.Sample, 0, len(full)) Outer: for _, s := range full { for _, r := range dranges { - if r.inBounds(s.T()) { + if r.InBounds(s.T()) { continue Outer } } @@ -852,42 +853,42 @@ func TestComputeChunkEndTime(t *testing.T) { } func TestMemSeries_append(t *testing.T) { - s := newMemSeries(labels.Labels{}, 1, 500) + s := record.NewMemSeries(labels.Labels{}, 1, 500) // Add first two samples at the very end of a chunk range and the next two // on and after it. // New chunk must correctly be cut at 1000. - ok, chunkCreated := s.append(998, 1) + ok, chunkCreated := s.Append(998, 1) testutil.Assert(t, ok, "append failed") testutil.Assert(t, chunkCreated, "first sample created chunk") - ok, chunkCreated = s.append(999, 2) + ok, chunkCreated = s.Append(999, 2) testutil.Assert(t, ok, "append failed") testutil.Assert(t, !chunkCreated, "second sample should use same chunk") - ok, chunkCreated = s.append(1000, 3) + ok, chunkCreated = s.Append(1000, 3) testutil.Assert(t, ok, "append failed") testutil.Assert(t, chunkCreated, "expected new chunk on boundary") - ok, chunkCreated = s.append(1001, 4) + ok, chunkCreated = s.Append(1001, 4) testutil.Assert(t, ok, "append failed") testutil.Assert(t, !chunkCreated, "second sample should use same chunk") - testutil.Assert(t, s.chunks[0].minTime == 998 && s.chunks[0].maxTime == 999, "wrong chunk range") - testutil.Assert(t, s.chunks[1].minTime == 1000 && s.chunks[1].maxTime == 1001, "wrong chunk range") + testutil.Assert(t, s.Chunks[0].MinTime == 998 && s.Chunks[0].MaxTime == 999, "wrong chunk range") + testutil.Assert(t, s.Chunks[1].MinTime == 1000 && s.Chunks[1].MaxTime == 1001, "wrong chunk range") // Fill the range [1000,2000) with many samples. Intermediate chunks should be cut // at approximately 120 samples per chunk. for i := 1; i < 1000; i++ { - ok, _ := s.append(1001+int64(i), float64(i)) + ok, _ := s.Append(1001+int64(i), float64(i)) testutil.Assert(t, ok, "append failed") } - testutil.Assert(t, len(s.chunks) > 7, "expected intermediate chunks") + testutil.Assert(t, len(s.Chunks) > 7, "expected intermediate chunks") // All chunks but the first and last should now be moderately full. - for i, c := range s.chunks[1 : len(s.chunks)-1] { - testutil.Assert(t, c.chunk.NumSamples() > 100, "unexpected small chunk %d of length %d", i, c.chunk.NumSamples()) + for i, c := range s.Chunks[1 : len(s.Chunks)-1] { + testutil.Assert(t, c.Chunk.NumSamples() > 100, "unexpected small chunk %d of length %d", i, c.Chunk.NumSamples()) } } @@ -900,9 +901,9 @@ func TestGCChunkAccess(t *testing.T) { h.initTime(0) s, _ := h.getOrCreate(1, labels.FromStrings("a", "1")) - s.chunks = []*memChunk{ - {minTime: 0, maxTime: 999}, - {minTime: 1000, maxTime: 1999}, + s.Chunks = []*record.MemChunk{ + {MinTime: 0, MaxTime: 999}, + {MinTime: 1000, MaxTime: 1999}, } idx := h.indexRange(0, 1500) @@ -926,7 +927,7 @@ func TestGCChunkAccess(t *testing.T) { testutil.Ok(t, h.Truncate(1500)) // Remove a chunk. _, err = cr.Chunk(chunks[0].Ref) - testutil.Equals(t, ErrNotFound, err) + testutil.Equals(t, record.ErrNotFound, err) _, err = cr.Chunk(chunks[1].Ref) testutil.Ok(t, err) } @@ -940,9 +941,9 @@ func TestGCSeriesAccess(t *testing.T) { h.initTime(0) s, _ := h.getOrCreate(1, labels.FromStrings("a", "1")) - s.chunks = []*memChunk{ - {minTime: 0, maxTime: 999}, - {minTime: 1000, maxTime: 1999}, + s.Chunks = []*record.MemChunk{ + {MinTime: 0, MaxTime: 999}, + {MinTime: 1000, MaxTime: 1999}, } idx := h.indexRange(0, 2000) @@ -965,12 +966,12 @@ func TestGCSeriesAccess(t *testing.T) { testutil.Ok(t, h.Truncate(2000)) // Remove the series. - testutil.Equals(t, (*memSeries)(nil), h.series.getByID(1)) + testutil.Equals(t, (*record.MemSeries)(nil), h.series.getByID(1)) _, err = cr.Chunk(chunks[0].Ref) - testutil.Equals(t, ErrNotFound, err) + testutil.Equals(t, record.ErrNotFound, err) _, err = cr.Chunk(chunks[1].Ref) - testutil.Equals(t, ErrNotFound, err) + testutil.Equals(t, record.ErrNotFound, err) } func TestUncommittedSamplesNotLostOnTruncate(t *testing.T) { @@ -1028,7 +1029,7 @@ func TestRemoveSeriesAfterRollbackAndTruncate(t *testing.T) { // Truncate again, this time the series should be deleted testutil.Ok(t, h.Truncate(2050)) - testutil.Equals(t, (*memSeries)(nil), h.series.getByHash(lset.Hash(), lset)) + testutil.Equals(t, (*record.MemSeries)(nil), h.series.getByHash(lset.Hash(), lset)) } func TestHead_LogRollback(t *testing.T) { @@ -1080,7 +1081,7 @@ func TestWalRepair_DecodingError(t *testing.T) { res[0] = byte(RecordInvalid) return res }, - enc.Series([]RefSeries{{Ref: 1, Labels: labels.FromStrings("a", "b")}}, []byte{}), + enc.Series([]record.RefSeries{{Ref: 1, Labels: labels.FromStrings("a", "b")}}, []byte{}), 9, 5, }, @@ -1088,7 +1089,7 @@ func TestWalRepair_DecodingError(t *testing.T) { func(rec []byte) []byte { return rec[:3] }, - enc.Series([]RefSeries{{Ref: 1, Labels: labels.FromStrings("a", "b")}}, []byte{}), + enc.Series([]record.RefSeries{{Ref: 1, Labels: labels.FromStrings("a", "b")}}, []byte{}), 9, 5, }, @@ -1096,7 +1097,7 @@ func TestWalRepair_DecodingError(t *testing.T) { func(rec []byte) []byte { return rec[:3] }, - enc.Samples([]RefSample{{Ref: 0, T: 99, V: 1}}, []byte{}), + enc.Samples([]record.RefSample{{Ref: 0, T: 99, V: 1}}, []byte{}), 9, 5, }, @@ -1104,7 +1105,7 @@ func TestWalRepair_DecodingError(t *testing.T) { func(rec []byte) []byte { return rec[:3] }, - enc.Tombstones([]Stone{{ref: 1, intervals: Intervals{}}}, []byte{}), + enc.Tombstones([]record.Stone{{Ref: 1, Intervals: record.Intervals{}}}, []byte{}), 9, 5, }, diff --git a/mocks_test.go b/mocks_test.go index 35f5ffec..f48ea9aa 100644 --- a/mocks_test.go +++ b/mocks_test.go @@ -18,6 +18,7 @@ import ( "github.com/prometheus/tsdb/chunks" "github.com/prometheus/tsdb/index" "github.com/prometheus/tsdb/labels" + "github.com/prometheus/tsdb/record" ) type mockIndexWriter struct { diff --git a/querier.go b/querier.go index fbd9493f..2b655106 100644 --- a/querier.go +++ b/querier.go @@ -25,6 +25,7 @@ import ( tsdb_errors "github.com/prometheus/tsdb/errors" "github.com/prometheus/tsdb/index" "github.com/prometheus/tsdb/labels" + "github.com/prometheus/tsdb/record" ) // Querier provides querying access over time series data of a fixed @@ -204,7 +205,7 @@ func NewBlockQuerier(b BlockReader, mint, maxt int64) (Querier, error) { type blockQuerier struct { index IndexReader chunks ChunkReader - tombstones TombstoneReader + tombstones record.TombstoneReader closed bool @@ -670,7 +671,7 @@ func (s *mergedVerticalSeriesSet) Next() bool { // actual series itself. type ChunkSeriesSet interface { Next() bool - At() (labels.Labels, []chunks.Meta, Intervals) + At() (labels.Labels, []chunks.Meta, record.Intervals) Err() error } @@ -679,19 +680,19 @@ type ChunkSeriesSet interface { type baseChunkSeries struct { p index.Postings index IndexReader - tombstones TombstoneReader + tombstones record.TombstoneReader lset labels.Labels chks []chunks.Meta - intervals Intervals + intervals record.Intervals err error } // LookupChunkSeries retrieves all series for the given matchers and returns a ChunkSeriesSet // over them. It drops chunks based on tombstones in the given reader. -func LookupChunkSeries(ir IndexReader, tr TombstoneReader, ms ...labels.Matcher) (ChunkSeriesSet, error) { +func LookupChunkSeries(ir IndexReader, tr record.TombstoneReader, ms ...labels.Matcher) (ChunkSeriesSet, error) { if tr == nil { - tr = newMemTombstones() + tr = record.NewMemTombstones() } p, err := PostingsForMatchers(ir, ms...) if err != nil { @@ -704,7 +705,7 @@ func LookupChunkSeries(ir IndexReader, tr TombstoneReader, ms ...labels.Matcher) }, nil } -func (s *baseChunkSeries) At() (labels.Labels, []chunks.Meta, Intervals) { +func (s *baseChunkSeries) At() (labels.Labels, []chunks.Meta, record.Intervals) { return s.lset, s.chks, s.intervals } @@ -721,7 +722,7 @@ func (s *baseChunkSeries) Next() bool { ref := s.p.At() if err := s.index.Series(ref, &lset, &chkMetas); err != nil { // Postings may be stale. Skip if no underlying series exists. - if errors.Cause(err) == ErrNotFound { + if errors.Cause(err) == record.ErrNotFound { continue } s.err = err @@ -740,7 +741,7 @@ func (s *baseChunkSeries) Next() bool { // Only those chunks that are not entirely deleted. chks := make([]chunks.Meta, 0, len(s.chks)) for _, chk := range s.chks { - if !(Interval{chk.MinTime, chk.MaxTime}.isSubrange(s.intervals)) { + if !(record.Interval{chk.MinTime, chk.MaxTime}.IsSubrange(s.intervals)) { chks = append(chks, chk) } } @@ -767,10 +768,10 @@ type populatedChunkSeries struct { err error chks []chunks.Meta lset labels.Labels - intervals Intervals + intervals record.Intervals } -func (s *populatedChunkSeries) At() (labels.Labels, []chunks.Meta, Intervals) { +func (s *populatedChunkSeries) At() (labels.Labels, []chunks.Meta, record.Intervals) { return s.lset, s.chks, s.intervals } @@ -801,7 +802,7 @@ func (s *populatedChunkSeries) Next() bool { c.Chunk, s.err = s.chunks.Chunk(c.Ref) if s.err != nil { // This means that the chunk has be garbage collected. Remove it from the list. - if s.err == ErrNotFound { + if s.err == record.ErrNotFound { s.err = nil // Delete in-place. s.chks = append(chks[:j], chks[j+1:]...) @@ -865,7 +866,7 @@ type chunkSeries struct { mint, maxt int64 - intervals Intervals + intervals record.Intervals } func (s *chunkSeries) Labels() labels.Labels { @@ -1066,7 +1067,7 @@ type chunkSeriesIterator struct { maxt, mint int64 - intervals Intervals + intervals record.Intervals } func newChunkSeriesIterator(cs []chunks.Meta, dranges Intervals, mint, maxt int64) *chunkSeriesIterator { @@ -1168,7 +1169,7 @@ func (it *chunkSeriesIterator) Err() error { type deletedIterator struct { it chunkenc.Iterator - intervals Intervals + intervals record.Intervals } func (it *deletedIterator) At() (int64, float64) { @@ -1181,7 +1182,7 @@ Outer: ts, _ := it.it.At() for _, tr := range it.intervals { - if tr.inBounds(ts) { + if tr.InBounds(ts) { continue Outer } diff --git a/querier_test.go b/querier_test.go index 2be48fcd..a1bdf395 100644 --- a/querier_test.go +++ b/querier_test.go @@ -29,6 +29,7 @@ import ( "github.com/prometheus/tsdb/chunks" "github.com/prometheus/tsdb/index" "github.com/prometheus/tsdb/labels" + "github.com/prometheus/tsdb/record" "github.com/prometheus/tsdb/testutil" "github.com/prometheus/tsdb/tsdbutil" ) @@ -188,6 +189,19 @@ func expandSeriesIterator(it SeriesIterator) (r []tsdbutil.Sample, err error) { return r, it.Err() } +type sample struct { + t int64 + v float64 +} + +func (s sample) T() int64 { + return s.t +} + +func (s sample) V() float64 { + return s.v +} + type seriesSamples struct { lset map[string]string chunks [][]sample @@ -368,7 +382,7 @@ Outer: querier := &blockQuerier{ index: ir, chunks: cr, - tombstones: newMemTombstones(), + tombstones: record.NewMemTombstones(), mint: c.mint, maxt: c.maxt, @@ -415,7 +429,7 @@ func TestBlockQuerierDelete(t *testing.T) { cases := struct { data []seriesSamples - tombstones TombstoneReader + tombstones record.TombstoneReader queries []query }{ data: []seriesSamples{ @@ -460,10 +474,10 @@ func TestBlockQuerierDelete(t *testing.T) { }, }, }, - tombstones: &memTombstones{intvlGroups: map[uint64]Intervals{ - 1: Intervals{{1, 3}}, - 2: Intervals{{1, 3}, {6, 10}}, - 3: Intervals{{6, 10}}, + tombstones: &record.MemTombstones{IntvlGroups: map[uint64]record.Intervals{ + 1: record.Intervals{{1, 3}}, + 2: record.Intervals{{1, 3}, {6, 10}}, + 3: record.Intervals{{6, 10}}, }}, queries: []query{ { @@ -637,7 +651,7 @@ func TestBaseChunkSeries(t *testing.T) { bcs := &baseChunkSeries{ p: index.NewListPostings(tc.postings), index: mi, - tombstones: newMemTombstones(), + tombstones: record.NewMemTombstones(), } i := 0 @@ -1159,7 +1173,7 @@ func (m *mockChunkSeriesSet) Next() bool { return m.i < len(m.l) } -func (m *mockChunkSeriesSet) At() (labels.Labels, []chunks.Meta, Intervals) { +func (m *mockChunkSeriesSet) At() (labels.Labels, []chunks.Meta, record.Intervals) { return m.l[m.i], m.cm[m.i], nil } @@ -1254,18 +1268,18 @@ func TestDeletedIterator(t *testing.T) { } cases := []struct { - r Intervals + r record.Intervals }{ - {r: Intervals{{1, 20}}}, - {r: Intervals{{1, 10}, {12, 20}, {21, 23}, {25, 30}}}, - {r: Intervals{{1, 10}, {12, 20}, {20, 30}}}, - {r: Intervals{{1, 10}, {12, 23}, {25, 30}}}, - {r: Intervals{{1, 23}, {12, 20}, {25, 30}}}, - {r: Intervals{{1, 23}, {12, 20}, {25, 3000}}}, - {r: Intervals{{0, 2000}}}, - {r: Intervals{{500, 2000}}}, - {r: Intervals{{0, 200}}}, - {r: Intervals{{1000, 20000}}}, + {r: record.Intervals{{1, 20}}}, + {r: record.Intervals{{1, 10}, {12, 20}, {21, 23}, {25, 30}}}, + {r: record.Intervals{{1, 10}, {12, 20}, {20, 30}}}, + {r: record.Intervals{{1, 10}, {12, 23}, {25, 30}}}, + {r: record.Intervals{{1, 23}, {12, 20}, {25, 30}}}, + {r: record.Intervals{{1, 23}, {12, 20}, {25, 3000}}}, + {r: record.Intervals{{0, 2000}}}, + {r: record.Intervals{{500, 2000}}}, + {r: record.Intervals{{0, 200}}}, + {r: record.Intervals{{1000, 20000}}}, } for _, c := range cases { @@ -1275,7 +1289,7 @@ func TestDeletedIterator(t *testing.T) { for it.Next() { i++ for _, tr := range ranges { - if tr.inBounds(i) { + if tr.InBounds(i) { i = tr.Maxt + 1 ranges = ranges[1:] } @@ -1290,7 +1304,7 @@ func TestDeletedIterator(t *testing.T) { // There has been an extra call to Next(). i++ for _, tr := range ranges { - if tr.inBounds(i) { + if tr.InBounds(i) { i = tr.Maxt + 1 ranges = ranges[1:] } @@ -1403,7 +1417,7 @@ func (m mockIndex) SortedPostings(p index.Postings) index.Postings { func (m mockIndex) Series(ref uint64, lset *labels.Labels, chks *[]chunks.Meta) error { s, ok := m.series[ref] if !ok { - return ErrNotFound + return record.ErrNotFound } *lset = append((*lset)[:0], s.l...) *chks = append((*chks)[:0], s.chunks...) diff --git a/record/internal.go b/record/internal.go new file mode 100644 index 00000000..840023c6 --- /dev/null +++ b/record/internal.go @@ -0,0 +1,371 @@ +// Copyright 2017 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package record + +import ( + "errors" + "hash" + "hash/crc32" + "math" + "os" + "path/filepath" + "sync" + + "github.com/prometheus/tsdb/chunkenc" + "github.com/prometheus/tsdb/chunks" + "github.com/prometheus/tsdb/fileutil" + "github.com/prometheus/tsdb/labels" +) + +var ( + // ErrOutOfOrderSample is returned if an appended sample has a + // timestamp smaller than the most recent sample. + ErrOutOfOrderSample = errors.New("out of order sample") + + // ErrNotFound is returned if a looked up resource was not found. + ErrNotFound = errors.New("not found") + + // ErrAmendSample is returned if an appended sample has the same timestamp + // as the most recent sample but a different value. + ErrAmendSample = errors.New("amending sample") +) + +// The table gets initialized with sync.Once but may still cause a race +// with any other use of the crc32 package anywhere. Thus we initialize it +// before. +var castagnoliTable *crc32.Table + +func init() { + castagnoliTable = crc32.MakeTable(crc32.Castagnoli) +} + +// NewCRC32 initializes a CRC32 hash with a preconfigured polynomial, so the +// polynomial may be easily changed in one location at a later time, if necessary. +func NewCRC32() hash.Hash32 { + return crc32.New(castagnoliTable) +} + +type sample struct { + t int64 + v float64 +} + +func (s sample) T() int64 { + return s.t +} + +func (s sample) V() float64 { + return s.v +} + +// SizeReader returns the size of the object in bytes. +type SizeReader interface { + // Size returns the size in bytes. + Size() int64 +} + +// RefSeries is the series labels with the series ID. +type RefSeries struct { + Ref uint64 + Labels labels.Labels +} + +// RefSample is a timestamp/value pair associated with a reference to a series. +type RefSample struct { + Ref uint64 + T int64 + V float64 + Series *MemSeries +} + +// MemSeries is the in-memory representation of a series. None of its methods +// are goroutine safe and it is the caller's responsibility to lock it. +type MemSeries struct { + sync.Mutex + + Ref uint64 + PendingCommit bool // Whether there are samples waiting to be committed to this series. + Chunks []*MemChunk + Lset labels.Labels + + headChunk *MemChunk + chunkRange int64 + firstChunkID int + + nextAt int64 // Timestamp at which to cut the next chunk. + sampleBuf [4]sample + + app chunkenc.Appender // Current appender for the chunk. +} + +func NewMemSeries(lset labels.Labels, id uint64, chunkRange int64) *MemSeries { + s := &MemSeries{ + Lset: lset, + Ref: id, + chunkRange: chunkRange, + nextAt: math.MinInt64, + } + return s +} + +func (s *MemSeries) MinTime() int64 { + if len(s.Chunks) == 0 { + return math.MinInt64 + } + return s.Chunks[0].MinTime +} + +func (s *MemSeries) MaxTime() int64 { + c := s.head() + if c == nil { + return math.MinInt64 + } + return c.MaxTime +} + +func (s *MemSeries) cut(mint int64) *MemChunk { + c := &MemChunk{ + Chunk: chunkenc.NewXORChunk(), + MinTime: mint, + MaxTime: math.MinInt64, + } + s.Chunks = append(s.Chunks, c) + s.headChunk = c + + // Set upper bound on when the next chunk must be started. An earlier timestamp + // may be chosen dynamically at a later point. + s.nextAt = rangeForTimestamp(mint, s.chunkRange) + + app, err := c.Chunk.Appender() + if err != nil { + panic(err) + } + s.app = app + return c +} + +func (s *MemSeries) ChunksMetas() []chunks.Meta { + metas := make([]chunks.Meta, 0, len(s.Chunks)) + for _, chk := range s.Chunks { + metas = append(metas, chunks.Meta{Chunk: chk.Chunk, MinTime: chk.MinTime, MaxTime: chk.MaxTime}) + } + return metas +} + +// reset re-initialises all the variable in the MemSeries except 'lset', 'ref', +// and 'chunkRange', like how it would appear after 'newMemSeries(...)'. +func (s *MemSeries) Reset() { + s.Chunks = nil + s.headChunk = nil + s.firstChunkID = 0 + s.nextAt = math.MinInt64 + s.sampleBuf = [4]sample{} + s.PendingCommit = false + s.app = nil +} + +// Appendable checks whether the given sample is valid for appending to the series. +func (s *MemSeries) Appendable(t int64, v float64) error { + c := s.head() + if c == nil { + return nil + } + + if t > c.MaxTime { + return nil + } + if t < c.MaxTime { + return ErrOutOfOrderSample + } + // We are allowing exact duplicates as we can encounter them in valid cases + // like federation and erroring out at that time would be extremely noisy. + if math.Float64bits(s.sampleBuf[3].v) != math.Float64bits(v) { + return ErrAmendSample + } + return nil +} + +func (s *MemSeries) Chunk(id int) *MemChunk { + ix := id - s.firstChunkID + if ix < 0 || ix >= len(s.Chunks) { + return nil + } + return s.Chunks[ix] +} + +func (s *MemSeries) ChunkID(pos int) int { + return pos + s.firstChunkID +} + +// TruncateChunksBefore removes all chunks from the series that have not timestamp +// at or after mint. Chunk IDs remain unchanged. +func (s *MemSeries) TruncateChunksBefore(mint int64) (removed int) { + var k int + for i, c := range s.Chunks { + if c.MaxTime >= mint { + break + } + k = i + 1 + } + s.Chunks = append(s.Chunks[:0], s.Chunks[k:]...) + s.firstChunkID += k + if len(s.Chunks) == 0 { + s.headChunk = nil + } else { + s.headChunk = s.Chunks[len(s.Chunks)-1] + } + + return k +} + +// Append adds the sample (t, v) to the series. +func (s *MemSeries) Append(t int64, v float64) (success, chunkCreated bool) { + // Based on Gorilla white papers this offers near-optimal compression ratio + // so anything bigger that this has diminishing returns and increases + // the time range within which we have to decompress all samples. + const samplesPerChunk = 120 + + c := s.head() + + if c == nil { + c = s.cut(t) + chunkCreated = true + } + numSamples := c.Chunk.NumSamples() + + // Out of order sample. + if c.MaxTime >= t { + return false, chunkCreated + } + // If we reach 25% of a chunk's desired sample count, set a definitive time + // at which to start the next chunk. + // At latest it must happen at the timestamp set when the chunk was cut. + if numSamples == samplesPerChunk/4 { + s.nextAt = computeChunkEndTime(c.MinTime, c.MaxTime, s.nextAt) + } + if t >= s.nextAt { + c = s.cut(t) + chunkCreated = true + } + s.app.Append(t, v) + + c.MaxTime = t + + s.sampleBuf[0] = s.sampleBuf[1] + s.sampleBuf[1] = s.sampleBuf[2] + s.sampleBuf[2] = s.sampleBuf[3] + s.sampleBuf[3] = sample{t: t, v: v} + + return true, chunkCreated +} + +func (s *MemSeries) Iterator(id int) chunkenc.Iterator { + c := s.Chunk(id) + // TODO(fabxc): Work around! A querier may have retrieved a pointer to a series' chunk, + // which got then garbage collected before it got accessed. + // We must ensure to not garbage collect as long as any readers still hold a reference. + if c == nil { + return chunkenc.NewNopIterator() + } + + if id-s.firstChunkID < len(s.Chunks)-1 { + return c.Chunk.Iterator() + } + // Serve the last 4 samples for the last chunk from the sample buffer + // as their compressed bytes may be mutated by added samples. + it := &MemSafeIterator{ + Iterator: c.Chunk.Iterator(), + i: -1, + total: c.Chunk.NumSamples(), + buf: s.sampleBuf, + } + return it +} + +func (s *MemSeries) head() *MemChunk { + return s.headChunk +} + +type MemChunk struct { + Chunk chunkenc.Chunk + MinTime, MaxTime int64 +} + +// Returns true if the chunk overlaps [mint, maxt]. +func (mc *MemChunk) OverlapsClosedInterval(mint, maxt int64) bool { + return mc.MinTime <= maxt && mint <= mc.MaxTime +} + +type MemSafeIterator struct { + chunkenc.Iterator + + i int + total int + buf [4]sample +} + +func (it *MemSafeIterator) Next() bool { + if it.i+1 >= it.total { + return false + } + it.i++ + if it.total-it.i > 4 { + return it.Iterator.Next() + } + return true +} + +func (it *MemSafeIterator) At() (int64, float64) { + if it.total-it.i > 4 { + return it.Iterator.At() + } + s := it.buf[4-(it.total-it.i)] + return s.t, s.v +} + +func rangeForTimestamp(t int64, width int64) (maxt int64) { + return (t/width)*width + width +} + +// computeChunkEndTime estimates the end timestamp based the beginning of a chunk, +// its current timestamp and the upper bound up to which we insert data. +// It assumes that the time range is 1/4 full. +func computeChunkEndTime(start, cur, max int64) int64 { + a := (max - start) / ((cur - start + 1) * 4) + if a == 0 { + return max + } + return start + (max-start)/a +} + +// RenameFile renames the file from, removing to if it already exists before doing the rename. +func RenameFile(from, to string) error { + if err := os.RemoveAll(to); err != nil { + return err + } + if err := os.Rename(from, to); err != nil { + return err + } + + // Directory was renamed; sync parent dir to persist rename. + pdir, err := fileutil.OpenDir(filepath.Dir(to)) + if err != nil { + return err + } + + if err = pdir.Sync(); err != nil { + pdir.Close() + return err + } + return pdir.Close() +} diff --git a/record.go b/record/record.go similarity index 97% rename from record.go rename to record/record.go index 8d9c5751..887f9275 100644 --- a/record.go +++ b/record/record.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package tsdb +package record import ( "math" @@ -131,8 +131,8 @@ func (d *RecordDecoder) Tombstones(rec []byte, tstones []Stone) ([]Stone, error) } for dec.Len() > 0 && dec.Err() == nil { tstones = append(tstones, Stone{ - ref: dec.Be64(), - intervals: Intervals{ + Ref: dec.Be64(), + Intervals: Intervals{ {Mint: dec.Varint64(), Maxt: dec.Varint64()}, }, }) @@ -198,8 +198,8 @@ func (e *RecordEncoder) Tombstones(tstones []Stone, b []byte) []byte { buf.PutByte(byte(RecordTombstones)) for _, s := range tstones { - for _, iv := range s.intervals { - buf.PutBE64(s.ref) + for _, iv := range s.Intervals { + buf.PutBE64(s.Ref) buf.PutVarint64(iv.Mint) buf.PutVarint64(iv.Maxt) } diff --git a/record_test.go b/record/record_test.go similarity index 89% rename from record_test.go rename to record/record_test.go index 8316ccf3..fdc69514 100644 --- a/record_test.go +++ b/record/record_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package tsdb +package record import ( "testing" @@ -55,11 +55,11 @@ func TestRecord_EncodeDecode(t *testing.T) { // Intervals get split up into single entries. So we don't get back exactly // what we put in. tstones := []Stone{ - {ref: 123, intervals: Intervals{ + {Ref: 123, Intervals: Intervals{ {Mint: -1000, Maxt: 1231231}, {Mint: 5000, Maxt: 0}, }}, - {ref: 13, intervals: Intervals{ + {Ref: 13, Intervals: Intervals{ {Mint: -1000, Maxt: -11}, {Mint: 5000, Maxt: 1000}, }}, @@ -67,10 +67,10 @@ func TestRecord_EncodeDecode(t *testing.T) { decTstones, err := dec.Tombstones(enc.Tombstones(tstones, nil), nil) testutil.Ok(t, err) testutil.Equals(t, []Stone{ - {ref: 123, intervals: Intervals{{Mint: -1000, Maxt: 1231231}}}, - {ref: 123, intervals: Intervals{{Mint: 5000, Maxt: 0}}}, - {ref: 13, intervals: Intervals{{Mint: -1000, Maxt: -11}}}, - {ref: 13, intervals: Intervals{{Mint: 5000, Maxt: 1000}}}, + {Ref: 123, Intervals: Intervals{{Mint: -1000, Maxt: 1231231}}}, + {Ref: 123, Intervals: Intervals{{Mint: 5000, Maxt: 0}}}, + {Ref: 13, Intervals: Intervals{{Mint: -1000, Maxt: -11}}}, + {Ref: 13, Intervals: Intervals{{Mint: 5000, Maxt: 1000}}}, }, decTstones) } @@ -105,7 +105,7 @@ func TestRecord_Corruputed(t *testing.T) { t.Run("Test corrupted tombstone record", func(t *testing.T) { tstones := []Stone{ - {ref: 123, intervals: Intervals{ + {Ref: 123, Intervals: Intervals{ {Mint: -1000, Maxt: 1231231}, {Mint: 5000, Maxt: 0}, }}, diff --git a/tombstones.go b/record/tombstones.go similarity index 80% rename from tombstones.go rename to record/tombstones.go index d7b76230..23f62ee7 100644 --- a/tombstones.go +++ b/record/tombstones.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package tsdb +package record import ( "encoding/binary" @@ -30,7 +30,7 @@ import ( "github.com/prometheus/tsdb/fileutil" ) -const tombstoneFilename = "tombstones" +const TombstoneFilename = "tombstones" const ( // MagicTombstone is 4 bytes at the head of a tombstone file. @@ -54,7 +54,7 @@ type TombstoneReader interface { Close() error } -func writeTombstoneFile(logger log.Logger, dir string, tr TombstoneReader) (int64, error) { +func WriteTombstoneFile(logger log.Logger, dir string, tr TombstoneReader) (int64, error) { path := filepath.Join(dir, tombstoneFilename) tmp := path + ".tmp" hash := newCRC32() @@ -129,11 +129,11 @@ func writeTombstoneFile(logger log.Logger, dir string, tr TombstoneReader) (int6 // Stone holds the information on the posting and time-range // that is deleted. type Stone struct { - ref uint64 - intervals Intervals + Ref uint64 + Intervals Intervals } -func readTombstones(dir string) (TombstoneReader, int64, error) { +func ReadTombstones(dir string) (TombstoneReader, int64, error) { b, err := ioutil.ReadFile(filepath.Join(dir, tombstoneFilename)) if os.IsNotExist(err) { return newMemTombstones(), 0, nil @@ -158,7 +158,7 @@ func readTombstones(dir string) (TombstoneReader, int64, error) { } // Verify checksum. - hash := newCRC32() + hash := NewCRC32() if _, err := hash.Write(d.Get()); err != nil { return nil, 0, errors.Wrap(err, "write to hash") } @@ -166,7 +166,7 @@ func readTombstones(dir string) (TombstoneReader, int64, error) { return nil, 0, errors.New("checksum did not match") } - stonesMap := newMemTombstones() + stonesMap := NewMemTombstones() for d.Len() > 0 { k := d.Uvarint64() @@ -176,33 +176,33 @@ func readTombstones(dir string) (TombstoneReader, int64, error) { return nil, 0, d.Err() } - stonesMap.addInterval(k, Interval{mint, maxt}) + stonesMap.AddInterval(k, Interval{mint, maxt}) } return stonesMap, int64(len(b)), nil } -type memTombstones struct { - intvlGroups map[uint64]Intervals +type MemTombstones struct { + IntvlGroups map[uint64]Intervals mtx sync.RWMutex } -// newMemTombstones creates new in memory TombstoneReader +// NewMemTombstones creates new in memory TombstoneReader // that allows adding new intervals. -func newMemTombstones() *memTombstones { - return &memTombstones{intvlGroups: make(map[uint64]Intervals)} +func NewMemTombstones() *MemTombstones { + return &MemTombstones{IntvlGroups: make(map[uint64]Intervals)} } -func (t *memTombstones) Get(ref uint64) (Intervals, error) { +func (t *MemTombstones) Get(ref uint64) (Intervals, error) { t.mtx.RLock() defer t.mtx.RUnlock() - return t.intvlGroups[ref], nil + return t.IntvlGroups[ref], nil } -func (t *memTombstones) Iter(f func(uint64, Intervals) error) error { +func (t *MemTombstones) Iter(f func(uint64, Intervals) error) error { t.mtx.RLock() defer t.mtx.RUnlock() - for ref, ivs := range t.intvlGroups { + for ref, ivs := range t.IntvlGroups { if err := f(ref, ivs); err != nil { return err } @@ -210,23 +210,23 @@ func (t *memTombstones) Iter(f func(uint64, Intervals) error) error { return nil } -func (t *memTombstones) Total() uint64 { +func (t *MemTombstones) Total() uint64 { t.mtx.RLock() defer t.mtx.RUnlock() total := uint64(0) - for _, ivs := range t.intvlGroups { + for _, ivs := range t.IntvlGroups { total += uint64(len(ivs)) } return total } -// addInterval to an existing memTombstones -func (t *memTombstones) addInterval(ref uint64, itvs ...Interval) { +// AddInterval to an existing MemTombstones +func (t *MemTombstones) AddInterval(ref uint64, itvs ...Interval) { t.mtx.Lock() defer t.mtx.Unlock() for _, itv := range itvs { - t.intvlGroups[ref] = t.intvlGroups[ref].add(itv) + t.IntvlGroups[ref] = t.IntvlGroups[ref].Add(itv) } } @@ -239,13 +239,13 @@ type Interval struct { Mint, Maxt int64 } -func (tr Interval) inBounds(t int64) bool { +func (tr Interval) InBounds(t int64) bool { return t >= tr.Mint && t <= tr.Maxt } -func (tr Interval) isSubrange(dranges Intervals) bool { +func (tr Interval) IsSubrange(dranges Intervals) bool { for _, r := range dranges { - if r.inBounds(tr.Mint) && r.inBounds(tr.Maxt) { + if r.InBounds(tr.Mint) && r.InBounds(tr.Maxt) { return true } } @@ -256,12 +256,12 @@ func (tr Interval) isSubrange(dranges Intervals) bool { // Intervals represents a set of increasing and non-overlapping time-intervals. type Intervals []Interval -// add the new time-range to the existing ones. +// Add the new time-range to the existing ones. // The existing ones must be sorted. -func (itvs Intervals) add(n Interval) Intervals { +func (itvs Intervals) Add(n Interval) Intervals { for i, r := range itvs { // TODO(gouthamve): Make this codepath easier to digest. - if r.inBounds(n.Mint-1) || r.inBounds(n.Mint) { + if r.InBounds(n.Mint-1) || r.InBounds(n.Mint) { if n.Maxt > r.Maxt { itvs[i].Maxt = n.Maxt } @@ -282,7 +282,7 @@ func (itvs Intervals) add(n Interval) Intervals { return itvs } - if r.inBounds(n.Maxt+1) || r.inBounds(n.Maxt) { + if r.InBounds(n.Maxt+1) || r.InBounds(n.Maxt) { if n.Mint < r.Maxt { itvs[i].Mint = n.Mint } diff --git a/tombstones_test.go b/record/tombstones_test.go similarity index 89% rename from tombstones_test.go rename to record/tombstones_test.go index 33ebb3bc..1d8bb8e6 100644 --- a/tombstones_test.go +++ b/record/tombstones_test.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package tsdb +package record import ( "io/ioutil" @@ -33,7 +33,7 @@ func TestWriteAndReadbackTombStones(t *testing.T) { ref := uint64(0) - stones := newMemTombstones() + stones := NewMemTombstones() // Generate the tombstones. for i := 0; i < 100; i++ { ref += uint64(rand.Int31n(10)) + 1 @@ -41,16 +41,16 @@ func TestWriteAndReadbackTombStones(t *testing.T) { dranges := make(Intervals, 0, numRanges) mint := rand.Int63n(time.Now().UnixNano()) for j := 0; j < numRanges; j++ { - dranges = dranges.add(Interval{mint, mint + rand.Int63n(1000)}) + dranges = dranges.Add(Interval{mint, mint + rand.Int63n(1000)}) mint += rand.Int63n(1000) + 1 } - stones.addInterval(ref, dranges...) + stones.AddInterval(ref, dranges...) } - _, err := writeTombstoneFile(log.NewNopLogger(), tmpdir, stones) + _, err := WriteTombstoneFile(log.NewNopLogger(), tmpdir, stones) testutil.Ok(t, err) - restr, _, err := readTombstones(tmpdir) + restr, _, err := ReadTombstones(tmpdir) testutil.Ok(t, err) // Compare the two readers. @@ -122,20 +122,20 @@ func TestAddingNewIntervals(t *testing.T) { for _, c := range cases { - testutil.Equals(t, c.exp, c.exist.add(c.new)) + testutil.Equals(t, c.exp, c.exist.Add(c.new)) } } // TestMemTombstonesConcurrency to make sure they are safe to access from different goroutines. func TestMemTombstonesConcurrency(t *testing.T) { - tomb := newMemTombstones() + tomb := NewMemTombstones() totalRuns := 100 var wg sync.WaitGroup wg.Add(2) go func() { for x := 0; x < totalRuns; x++ { - tomb.addInterval(uint64(x), Interval{int64(x), int64(x)}) + tomb.AddInterval(uint64(x), Interval{int64(x), int64(x)}) } wg.Done() }() diff --git a/wal.go b/wal.go index 49f55fe4..27e4c0ba 100644 --- a/wal.go +++ b/wal.go @@ -34,6 +34,7 @@ import ( "github.com/prometheus/tsdb/encoding" "github.com/prometheus/tsdb/fileutil" "github.com/prometheus/tsdb/labels" + "github.com/prometheus/tsdb/record" "github.com/prometheus/tsdb/wal" ) @@ -89,9 +90,9 @@ func newWalMetrics(wal *SegmentWAL, r prometheus.Registerer) *walMetrics { // DEPRECATED: use wal pkg combined with the record codex instead. type WAL interface { Reader() WALReader - LogSeries([]RefSeries) error - LogSamples([]RefSample) error - LogDeletes([]Stone) error + LogSeries([]record.RefSeries) error + LogSamples([]record.RefSample) error + LogDeletes([]record.Stone) error Truncate(mint int64, keep func(uint64) bool) error Close() error } @@ -99,27 +100,12 @@ type WAL interface { // WALReader reads entries from a WAL. type WALReader interface { Read( - seriesf func([]RefSeries), - samplesf func([]RefSample), - deletesf func([]Stone), + seriesf func([]record.RefSeries), + samplesf func([]record.RefSample), + deletesf func([]record.Stone), ) error } -// RefSeries is the series labels with the series ID. -type RefSeries struct { - Ref uint64 - Labels labels.Labels -} - -// RefSample is a timestamp/value pair associated with a reference to a series. -type RefSample struct { - Ref uint64 - T int64 - V float64 - - series *memSeries -} - // segmentFile wraps a file object of a segment and tracks the highest timestamp // it contains. During WAL truncating, all segments with no higher timestamp than // the truncation threshold can be compacted. @@ -240,9 +226,9 @@ type repairingWALReader struct { } func (r *repairingWALReader) Read( - seriesf func([]RefSeries), - samplesf func([]RefSample), - deletesf func([]Stone), + seriesf func([]record.RefSeries), + samplesf func([]record.RefSample), + deletesf func([]record.Stone), ) error { err := r.r.Read(seriesf, samplesf, deletesf) if err == nil { @@ -348,8 +334,8 @@ func (w *SegmentWAL) Truncate(mint int64, keep func(uint64) bool) error { var ( csf = newSegmentFile(f) crc32 = newCRC32() - decSeries = []RefSeries{} - activeSeries = []RefSeries{} + decSeries = []record.RefSeries{} + activeSeries = []record.RefSeries{} ) for r.next() { @@ -427,7 +413,7 @@ func (w *SegmentWAL) Truncate(mint int64, keep func(uint64) bool) error { // LogSeries writes a batch of new series labels to the log. // The series have to be ordered. -func (w *SegmentWAL) LogSeries(series []RefSeries) error { +func (w *SegmentWAL) LogSeries(series []record.RefSeries) error { buf := w.getBuffer() flag := w.encodeSeries(buf, series) @@ -454,7 +440,7 @@ func (w *SegmentWAL) LogSeries(series []RefSeries) error { } // LogSamples writes a batch of new samples to the log. -func (w *SegmentWAL) LogSamples(samples []RefSample) error { +func (w *SegmentWAL) LogSamples(samples []record.RefSample) error { buf := w.getBuffer() flag := w.encodeSamples(buf, samples) @@ -480,7 +466,7 @@ func (w *SegmentWAL) LogSamples(samples []RefSample) error { } // LogDeletes write a batch of new deletes to the log. -func (w *SegmentWAL) LogDeletes(stones []Stone) error { +func (w *SegmentWAL) LogDeletes(stones []record.Stone) error { buf := w.getBuffer() flag := w.encodeDeletes(buf, stones) @@ -498,7 +484,7 @@ func (w *SegmentWAL) LogDeletes(stones []Stone) error { tf := w.head() for _, s := range stones { - for _, iv := range s.intervals { + for _, iv := range s.Intervals { if tf.maxTime < iv.Maxt { tf.maxTime = iv.Maxt } @@ -791,7 +777,7 @@ const ( walDeletesSimple = 1 ) -func (w *SegmentWAL) encodeSeries(buf *encoding.Encbuf, series []RefSeries) uint8 { +func (w *SegmentWAL) encodeSeries(buf *encoding.Encbuf, series []record.RefSeries) uint8 { for _, s := range series { buf.PutBE64(s.Ref) buf.PutUvarint(len(s.Labels)) @@ -804,7 +790,7 @@ func (w *SegmentWAL) encodeSeries(buf *encoding.Encbuf, series []RefSeries) uint return walSeriesSimple } -func (w *SegmentWAL) encodeSamples(buf *encoding.Encbuf, samples []RefSample) uint8 { +func (w *SegmentWAL) encodeSamples(buf *encoding.Encbuf, samples []record.RefSample) uint8 { if len(samples) == 0 { return walSamplesSimple } @@ -825,10 +811,10 @@ func (w *SegmentWAL) encodeSamples(buf *encoding.Encbuf, samples []RefSample) ui return walSamplesSimple } -func (w *SegmentWAL) encodeDeletes(buf *encoding.Encbuf, stones []Stone) uint8 { +func (w *SegmentWAL) encodeDeletes(buf *encoding.Encbuf, stones []record.Stone) uint8 { for _, s := range stones { - for _, iv := range s.intervals { - buf.PutBE64(s.ref) + for _, iv := range s.Intervals { + buf.PutBE64(s.Ref) buf.PutVarint64(iv.Mint) buf.PutVarint64(iv.Maxt) } @@ -871,9 +857,9 @@ func (r *walReader) Err() error { } func (r *walReader) Read( - seriesf func([]RefSeries), - samplesf func([]RefSample), - deletesf func([]Stone), + seriesf func([]record.RefSeries), + samplesf func([]record.RefSample), + deletesf func([]record.Stone), ) error { // Concurrency for replaying the WAL is very limited. We at least split out decoding and // processing into separate threads. @@ -892,19 +878,19 @@ func (r *walReader) Read( for x := range datac { switch v := x.(type) { - case []RefSeries: + case []record.RefSeries: if seriesf != nil { seriesf(v) } //lint:ignore SA6002 safe to ignore and actually fixing it has some performance penalty. seriesPool.Put(v[:0]) - case []RefSample: + case []record.RefSample: if samplesf != nil { samplesf(v) } //lint:ignore SA6002 safe to ignore and actually fixing it has some performance penalty. samplePool.Put(v[:0]) - case []Stone: + case []record.Stone: if deletesf != nil { deletesf(v) } @@ -925,11 +911,11 @@ func (r *walReader) Read( // Those should generally be catched by entry decoding before. switch et { case WALEntrySeries: - var series []RefSeries + var series []record.RefSeries if v := seriesPool.Get(); v == nil { - series = make([]RefSeries, 0, 512) + series = make([]record.RefSeries, 0, 512) } else { - series = v.([]RefSeries) + series = v.([]record.RefSeries) } err = r.decodeSeries(flag, b, &series) @@ -946,11 +932,11 @@ func (r *walReader) Read( } } case WALEntrySamples: - var samples []RefSample + var samples []record.RefSample if v := samplePool.Get(); v == nil { - samples = make([]RefSample, 0, 512) + samples = make([]record.RefSample, 0, 512) } else { - samples = v.([]RefSample) + samples = v.([]record.RefSample) } err = r.decodeSamples(flag, b, &samples) @@ -968,11 +954,11 @@ func (r *walReader) Read( } } case WALEntryDeletes: - var deletes []Stone + var deletes []record.Stone if v := deletePool.Get(); v == nil { - deletes = make([]Stone, 0, 512) + deletes = make([]record.Stone, 0, 512) } else { - deletes = v.([]Stone) + deletes = v.([]record.Stone) } err = r.decodeDeletes(flag, b, &deletes) @@ -985,7 +971,7 @@ func (r *walReader) Read( // Update the times for the WAL segment file. cf := r.current() for _, s := range deletes { - for _, iv := range s.intervals { + for _, iv := range s.Intervals { if cf.maxTime < iv.Maxt { cf.maxTime = iv.Maxt } @@ -1122,7 +1108,7 @@ func (r *walReader) entry(cr io.Reader) (WALEntryType, byte, []byte, error) { return etype, flag, buf, nil } -func (r *walReader) decodeSeries(flag byte, b []byte, res *[]RefSeries) error { +func (r *walReader) decodeSeries(flag byte, b []byte, res *[]record.RefSeries) error { dec := encoding.Decbuf{B: b} for len(dec.B) > 0 && dec.Err() == nil { @@ -1136,7 +1122,7 @@ func (r *walReader) decodeSeries(flag byte, b []byte, res *[]RefSeries) error { } sort.Sort(lset) - *res = append(*res, RefSeries{ + *res = append(*res, record.RefSeries{ Ref: ref, Labels: lset, }) @@ -1150,7 +1136,7 @@ func (r *walReader) decodeSeries(flag byte, b []byte, res *[]RefSeries) error { return nil } -func (r *walReader) decodeSamples(flag byte, b []byte, res *[]RefSample) error { +func (r *walReader) decodeSamples(flag byte, b []byte, res *[]record.RefSample) error { if len(b) == 0 { return nil } @@ -1166,7 +1152,7 @@ func (r *walReader) decodeSamples(flag byte, b []byte, res *[]RefSample) error { dtime := dec.Varint64() val := dec.Be64() - *res = append(*res, RefSample{ + *res = append(*res, record.RefSample{ Ref: uint64(int64(baseRef) + dref), T: baseTime + dtime, V: math.Float64frombits(val), @@ -1182,13 +1168,13 @@ func (r *walReader) decodeSamples(flag byte, b []byte, res *[]RefSample) error { return nil } -func (r *walReader) decodeDeletes(flag byte, b []byte, res *[]Stone) error { +func (r *walReader) decodeDeletes(flag byte, b []byte, res *[]record.Stone) error { dec := &encoding.Decbuf{B: b} for dec.Len() > 0 && dec.Err() == nil { - *res = append(*res, Stone{ - ref: dec.Be64(), - intervals: Intervals{ + *res = append(*res, record.Stone{ + Ref: dec.Be64(), + Intervals: record.Intervals{ {Mint: dec.Varint64(), Maxt: dec.Varint64()}, }, }) @@ -1268,23 +1254,23 @@ func MigrateWAL(logger log.Logger, dir string) (err error) { rdr := w.Reader() var ( - enc RecordEncoder + enc record.RecordEncoder b []byte ) decErr := rdr.Read( - func(s []RefSeries) { + func(s []record.RefSeries) { if err != nil { return } err = repl.Log(enc.Series(s, b[:0])) }, - func(s []RefSample) { + func(s []record.RefSample) { if err != nil { return } err = repl.Log(enc.Samples(s, b[:0])) }, - func(s []Stone) { + func(s []record.Stone) { if err != nil { return } diff --git a/checkpoint.go b/wal/checkpoint.go similarity index 88% rename from checkpoint.go rename to wal/checkpoint.go index eccfa62b..d9595ff3 100644 --- a/checkpoint.go +++ b/wal/checkpoint.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package tsdb +package wal import ( "fmt" @@ -27,7 +27,7 @@ import ( "github.com/pkg/errors" tsdb_errors "github.com/prometheus/tsdb/errors" "github.com/prometheus/tsdb/fileutil" - "github.com/prometheus/tsdb/wal" + "github.com/prometheus/tsdb/record" ) // CheckpointStats returns stats about a created checkpoint. @@ -63,7 +63,7 @@ func LastCheckpoint(dir string) (string, int, error) { } return filepath.Join(dir, fi.Name()), idx, nil } - return "", 0, ErrNotFound + return "", 0, record.ErrNotFound } // DeleteCheckpoints deletes all checkpoints in a directory below a given index. @@ -99,15 +99,15 @@ const checkpointPrefix = "checkpoint." // segmented format as the original WAL itself. // This makes it easy to read it through the WAL package and concatenate // it with the original WAL. -func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64) (*CheckpointStats, error) { +func Checkpoint(w *WAL, from, to int, keep func(id uint64) bool, mint int64) (*CheckpointStats, error) { stats := &CheckpointStats{} var sgmReader io.ReadCloser { - var sgmRange []wal.SegmentRange + var sgmRange []SegmentRange dir, idx, err := LastCheckpoint(w.Dir()) - if err != nil && err != ErrNotFound { + if err != nil && err != record.ErrNotFound { return nil, errors.Wrap(err, "find last checkpoint") } last := idx + 1 @@ -118,11 +118,11 @@ func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64) // Ignore WAL files below the checkpoint. They shouldn't exist to begin with. from = last - sgmRange = append(sgmRange, wal.SegmentRange{Dir: dir, Last: math.MaxInt32}) + sgmRange = append(sgmRange, SegmentRange{Dir: dir, Last: math.MaxInt32}) } - sgmRange = append(sgmRange, wal.SegmentRange{Dir: w.Dir(), First: from, Last: to}) - sgmReader, err = wal.NewSegmentsRangeReader(sgmRange...) + sgmRange = append(sgmRange, SegmentRange{Dir: w.Dir(), First: from, Last: to}) + sgmReader, err = NewSegmentsRangeReader(sgmRange...) if err != nil { return nil, errors.Wrap(err, "create segment reader") } @@ -135,7 +135,7 @@ func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64) if err := os.MkdirAll(cpdirtmp, 0777); err != nil { return nil, errors.Wrap(err, "create checkpoint dir") } - cp, err := wal.New(nil, nil, cpdirtmp, w.CompressionEnabled()) + cp, err := New(nil, nil, cpdirtmp, w.CompressionEnabled()) if err != nil { return nil, errors.Wrap(err, "open checkpoint") } @@ -146,14 +146,14 @@ func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64) os.RemoveAll(cpdirtmp) }() - r := wal.NewReader(sgmReader) + r := NewReader(sgmReader) var ( - series []RefSeries - samples []RefSample - tstones []Stone - dec RecordDecoder - enc RecordEncoder + series []record.RefSeries + samples []record.RefSample + tstones []record.Stone + dec record.RecordDecoder + enc record.RecordEncoder buf []byte recs [][]byte ) @@ -167,7 +167,7 @@ func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64) rec := r.Record() switch dec.Type(rec) { - case RecordSeries: + case record.RecordSeries: series, err = dec.Series(rec, series) if err != nil { return nil, errors.Wrap(err, "decode series") @@ -185,7 +185,7 @@ func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64) stats.TotalSeries += len(series) stats.DroppedSeries += len(series) - len(repl) - case RecordSamples: + case record.RecordSamples: samples, err = dec.Samples(rec, samples) if err != nil { return nil, errors.Wrap(err, "decode samples") @@ -203,7 +203,7 @@ func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64) stats.TotalSamples += len(samples) stats.DroppedSamples += len(samples) - len(repl) - case RecordTombstones: + case record.RecordTombstones: tstones, err = dec.Tombstones(rec, tstones) if err != nil { return nil, errors.Wrap(err, "decode deletes") @@ -211,7 +211,7 @@ func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64) // Drop irrelevant tombstones in place. repl := tstones[:0] for _, s := range tstones { - for _, iv := range s.intervals { + for _, iv := range s.Intervals { if iv.Maxt >= mint { repl = append(repl, s) break diff --git a/checkpoint_test.go b/wal/checkpoint_test.go similarity index 89% rename from checkpoint_test.go rename to wal/checkpoint_test.go index 0779894b..37e52263 100644 --- a/checkpoint_test.go +++ b/wal/checkpoint_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package tsdb +package wal import ( "fmt" @@ -25,8 +25,8 @@ import ( "github.com/pkg/errors" "github.com/prometheus/tsdb/fileutil" "github.com/prometheus/tsdb/labels" + "github.com/prometheus/tsdb/record" "github.com/prometheus/tsdb/testutil" - "github.com/prometheus/tsdb/wal" ) func TestLastCheckpoint(t *testing.T) { @@ -37,7 +37,7 @@ func TestLastCheckpoint(t *testing.T) { }() _, _, err = LastCheckpoint(dir) - testutil.Equals(t, ErrNotFound, err) + testutil.Equals(t, record.ErrNotFound, err) testutil.Ok(t, os.MkdirAll(filepath.Join(dir, "checkpoint.0000"), 0777)) s, k, err := LastCheckpoint(dir) @@ -94,18 +94,18 @@ func TestCheckpoint(t *testing.T) { testutil.Ok(t, os.RemoveAll(dir)) }() - var enc RecordEncoder + var enc record.RecordEncoder // Create a dummy segment to bump the initial number. - seg, err := wal.CreateSegment(dir, 100) + seg, err := CreateSegment(dir, 100) testutil.Ok(t, err) testutil.Ok(t, seg.Close()) // Manually create checkpoint for 99 and earlier. - w, err := wal.New(nil, nil, filepath.Join(dir, "checkpoint.0099"), compress) + w, err := New(nil, nil, filepath.Join(dir, "checkpoint.0099"), compress) testutil.Ok(t, err) // Add some data we expect to be around later. - err = w.Log(enc.Series([]RefSeries{ + err = w.Log(enc.Series([]record.RefSeries{ {Ref: 0, Labels: labels.FromStrings("a", "b", "c", "0")}, {Ref: 1, Labels: labels.FromStrings("a", "b", "c", "1")}, }, nil)) @@ -113,7 +113,7 @@ func TestCheckpoint(t *testing.T) { testutil.Ok(t, w.Close()) // Start a WAL and write records to it as usual. - w, err = wal.NewSize(nil, nil, dir, 64*1024, compress) + w, err = NewSize(nil, nil, dir, 64*1024, compress) testutil.Ok(t, err) var last int64 @@ -125,7 +125,7 @@ func TestCheckpoint(t *testing.T) { } // Write some series initially. if i == 0 { - b := enc.Series([]RefSeries{ + b := enc.Series([]record.RefSeries{ {Ref: 2, Labels: labels.FromStrings("a", "b", "c", "2")}, {Ref: 3, Labels: labels.FromStrings("a", "b", "c", "3")}, {Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")}, @@ -136,7 +136,7 @@ func TestCheckpoint(t *testing.T) { // Write samples until the WAL has enough segments. // Make them have drifting timestamps within a record to see that they // get filtered properly. - b := enc.Samples([]RefSample{ + b := enc.Samples([]record.RefSample{ {Ref: 0, T: last, V: float64(i)}, {Ref: 1, T: last + 10000, V: float64(i)}, {Ref: 2, T: last + 20000, V: float64(i)}, @@ -161,22 +161,22 @@ func TestCheckpoint(t *testing.T) { testutil.Equals(t, 1, len(files)) testutil.Equals(t, "checkpoint.000106", files[0]) - sr, err := wal.NewSegmentsReader(filepath.Join(dir, "checkpoint.000106")) + sr, err := NewSegmentsReader(filepath.Join(dir, "checkpoint.000106")) testutil.Ok(t, err) defer sr.Close() - var dec RecordDecoder - var series []RefSeries - r := wal.NewReader(sr) + var dec record.RecordDecoder + var series []record.RefSeries + r := NewReader(sr) for r.Next() { rec := r.Record() switch dec.Type(rec) { - case RecordSeries: + case record.RecordSeries: series, err = dec.Series(rec, series) testutil.Ok(t, err) - case RecordSamples: + case record.RecordSamples: samples, err := dec.Samples(rec, nil) testutil.Ok(t, err) for _, s := range samples { @@ -185,7 +185,7 @@ func TestCheckpoint(t *testing.T) { } } testutil.Ok(t, r.Err()) - testutil.Equals(t, []RefSeries{ + testutil.Equals(t, []record.RefSeries{ {Ref: 0, Labels: labels.FromStrings("a", "b", "c", "0")}, {Ref: 2, Labels: labels.FromStrings("a", "b", "c", "2")}, {Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")}, @@ -201,7 +201,7 @@ func TestCheckpointNoTmpFolderAfterError(t *testing.T) { defer func() { testutil.Ok(t, os.RemoveAll(dir)) }() - w, err := wal.NewSize(nil, nil, dir, 64*1024, false) + w, err := NewSize(nil, nil, dir, 64*1024, false) testutil.Ok(t, err) testutil.Ok(t, w.Log([]byte{99})) w.Close() diff --git a/wal/reader_test.go b/wal/reader_test.go index 96d15225..0bb0cb13 100644 --- a/wal/reader_test.go +++ b/wal/reader_test.go @@ -41,7 +41,7 @@ type reader interface { Offset() int64 } -type record struct { +type rec struct { t recType b []byte } @@ -59,13 +59,13 @@ var readerConstructors = map[string]func(io.Reader) reader{ var data = make([]byte, 100000) var testReaderCases = []struct { - t []record + t []rec exp [][]byte fail bool }{ // Sequence of valid records. { - t: []record{ + t: []rec{ {recFull, data[0:200]}, {recFirst, data[200:300]}, {recLast, data[300:400]}, @@ -89,7 +89,7 @@ var testReaderCases = []struct { }, // Exactly at the limit of one page minus the header size { - t: []record{ + t: []rec{ {recFull, data[0 : pageSize-recordHeaderSize]}, }, exp: [][]byte{ @@ -99,7 +99,7 @@ var testReaderCases = []struct { // More than a full page, this exceeds our buffer and can never happen // when written by the WAL. { - t: []record{ + t: []rec{ {recFull, data[0 : pageSize+1]}, }, fail: true, @@ -108,7 +108,7 @@ var testReaderCases = []struct { // NB currently the non-live reader succeeds on this. I think this is a bug. // but we've seen it in production. { - t: []record{ + t: []rec{ {recFull, data[:pageSize/2]}, {recFull, data[:pageSize/2]}, }, @@ -119,22 +119,22 @@ var testReaderCases = []struct { }, // Invalid orders of record types. { - t: []record{{recMiddle, data[:200]}}, + t: []rec{{recMiddle, data[:200]}}, fail: true, }, { - t: []record{{recLast, data[:200]}}, + t: []rec{{recLast, data[:200]}}, fail: true, }, { - t: []record{ + t: []rec{ {recFirst, data[:200]}, {recFull, data[200:400]}, }, fail: true, }, { - t: []record{ + t: []rec{ {recFirst, data[:100]}, {recMiddle, data[100:200]}, {recFull, data[200:400]}, @@ -143,7 +143,7 @@ var testReaderCases = []struct { }, // Non-zero data after page termination. { - t: []record{ + t: []rec{ {recFull, data[:100]}, {recPageTerm, append(make([]byte, pageSize-recordHeaderSize-102), 1)}, }, diff --git a/wal/wal_watcher.go b/wal/wal_watcher.go new file mode 100644 index 00000000..f9f7776c --- /dev/null +++ b/wal/wal_watcher.go @@ -0,0 +1,556 @@ +// Copyright 2018 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package wal + +import ( + "fmt" + "io" + "math" + "os" + "path" + "sort" + "strconv" + "strings" + "time" + + "github.com/go-kit/kit/log" + "github.com/go-kit/kit/log/level" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/prometheus/pkg/timestamp" + "github.com/prometheus/tsdb/fileutil" + "github.com/prometheus/tsdb/record" +) + +const ( + readPeriod = 10 * time.Millisecond + checkpointPeriod = 5 * time.Second + segmentCheckPeriod = 100 * time.Millisecond + consumer = "consumer" +) + +var ( + watcherRecordsRead = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "prometheus", + Subsystem: "wal_watcher", + Name: "records_read_total", + Help: "Number of records read by the WAL watcher from the WAL.", + }, + []string{consumer, "type"}, + ) + watcherRecordDecodeFails = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "prometheus", + Subsystem: "wal_watcher", + Name: "record_decode_failures_total", + Help: "Number of records read by the WAL watcher that resulted in an error when decoding.", + }, + []string{consumer}, + ) + watcherSamplesSentPreTailing = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "prometheus", + Subsystem: "wal_watcher", + Name: "samples_sent_pre_tailing_total", + Help: "Number of sample records read by the WAL watcher and sent to remote write during replay of existing WAL.", + }, + []string{consumer}, + ) + watcherCurrentSegment = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "prometheus", + Subsystem: "wal_watcher", + Name: "current_segment", + Help: "Current segment the WAL watcher is reading records from.", + }, + []string{consumer}, + ) +) + +func init() { + prometheus.MustRegister(watcherRecordsRead) + prometheus.MustRegister(watcherRecordDecodeFails) + prometheus.MustRegister(watcherSamplesSentPreTailing) + prometheus.MustRegister(watcherCurrentSegment) +} + +type writeTo interface { + Append([]record.RefSample) bool + StoreSeries([]record.RefSeries, int) + SeriesReset(int) +} + +// WALWatcher watches the TSDB WAL for a given WriteTo. +type WALWatcher struct { + name string + writer writeTo + logger log.Logger + walDir string + lastCheckpoint string + + startTime int64 + + recordsReadMetric *prometheus.CounterVec + recordDecodeFailsMetric prometheus.Counter + samplesSentPreTailing prometheus.Counter + currentSegmentMetric prometheus.Gauge + + quit chan struct{} + done chan struct{} + + // For testing, stop when we hit this segment. + maxSegment int +} + +// NewWALWatcher creates a new WAL watcher for a given WriteTo. +func NewWALWatcher(logger log.Logger, name string, writer writeTo, walDir string) *WALWatcher { + if logger == nil { + logger = log.NewNopLogger() + } + return &WALWatcher{ + logger: logger, + writer: writer, + walDir: path.Join(walDir, "wal"), + name: name, + quit: make(chan struct{}), + done: make(chan struct{}), + + maxSegment: -1, + } +} + +func (w *WALWatcher) setMetrics() { + // Setup the WAL Watchers metrics. We do this here rather than in the + // constructor because of the ordering of creating Queue Managers's, + // stopping them, and then starting new ones in storage/remote/storage.go ApplyConfig. + w.recordsReadMetric = watcherRecordsRead.MustCurryWith(prometheus.Labels{consumer: w.name}) + w.recordDecodeFailsMetric = watcherRecordDecodeFails.WithLabelValues(w.name) + w.samplesSentPreTailing = watcherSamplesSentPreTailing.WithLabelValues(w.name) + w.currentSegmentMetric = watcherCurrentSegment.WithLabelValues(w.name) +} + +// Start the WALWatcher. +func (w *WALWatcher) Start() { + w.setMetrics() + level.Info(w.logger).Log("msg", "starting WAL watcher", "queue", w.name) + + go w.loop() +} + +// Stop the WALWatcher. +func (w *WALWatcher) Stop() { + close(w.quit) + <-w.done + + // Records read metric has series and samples. + watcherRecordsRead.DeleteLabelValues(w.name, "series") + watcherRecordsRead.DeleteLabelValues(w.name, "samples") + watcherRecordDecodeFails.DeleteLabelValues(w.name) + watcherSamplesSentPreTailing.DeleteLabelValues(w.name) + watcherCurrentSegment.DeleteLabelValues(w.name) + + level.Info(w.logger).Log("msg", "WAL watcher stopped", "queue", w.name) +} + +func (w *WALWatcher) loop() { + defer close(w.done) + + // We may encourter failures processing the WAL; we should wait and retry. + for !isClosed(w.quit) { + w.startTime = timestamp.FromTime(time.Now()) + if err := w.run(); err != nil { + level.Error(w.logger).Log("msg", "error tailing WAL", "err", err) + } + + select { + case <-w.quit: + return + case <-time.After(5 * time.Second): + } + } +} + +func (w *WALWatcher) run() error { + _, lastSegment, err := w.firstAndLast() + if err != nil { + return errors.Wrap(err, "wal.Segments") + } + + // Backfill from the checkpoint first if it exists. + lastCheckpoint, checkpointIndex, err := LastCheckpoint(w.walDir) + if err != nil && err != record.ErrNotFound { + return errors.Wrap(err, "LastCheckpoint") + } + + if err == nil { + if err = w.readCheckpoint(lastCheckpoint); err != nil { + return errors.Wrap(err, "readCheckpoint") + } + } + w.lastCheckpoint = lastCheckpoint + + currentSegment, err := w.findSegmentForIndex(checkpointIndex) + if err != nil { + return err + } + + level.Debug(w.logger).Log("msg", "tailing WAL", "lastCheckpoint", lastCheckpoint, "checkpointIndex", checkpointIndex, "currentSegment", currentSegment, "lastSegment", lastSegment) + for !isClosed(w.quit) { + w.currentSegmentMetric.Set(float64(currentSegment)) + level.Debug(w.logger).Log("msg", "processing segment", "currentSegment", currentSegment) + + // On start, after reading the existing WAL for series records, we have a pointer to what is the latest segment. + // On subsequent calls to this function, currentSegment will have been incremented and we should open that segment. + if err := w.watch(currentSegment, currentSegment >= lastSegment); err != nil { + return err + } + + // For testing: stop when you hit a specific segment. + if currentSegment == w.maxSegment { + return nil + } + + currentSegment++ + } + + return nil +} + +// findSegmentForIndex finds the first segment greater than or equal to index. +func (w *WALWatcher) findSegmentForIndex(index int) (int, error) { + refs, err := w.segments(w.walDir) + if err != nil { + return -1, nil + } + + for _, r := range refs { + if r >= index { + return r, nil + } + } + + return -1, errors.New("failed to find segment for index") +} + +func (w *WALWatcher) firstAndLast() (int, int, error) { + refs, err := w.segments(w.walDir) + if err != nil { + return -1, -1, nil + } + + if len(refs) == 0 { + return -1, -1, nil + } + return refs[0], refs[len(refs)-1], nil +} + +// Copied from tsdb/wal/wal.go so we do not have to open a WAL. +// Plan is to move WAL watcher to TSDB and dedupe these implementations. +func (w *WALWatcher) segments(dir string) ([]int, error) { + files, err := fileutil.ReadDir(dir) + if err != nil { + return nil, err + } + + var refs []int + var last int + for _, fn := range files { + k, err := strconv.Atoi(fn) + if err != nil { + continue + } + if len(refs) > 0 && k > last+1 { + return nil, errors.New("segments are not sequential") + } + refs = append(refs, k) + last = k + } + sort.Ints(refs) + + return refs, nil +} + +// Use tail true to indicate that the reader is currently on a segment that is +// actively being written to. If false, assume it's a full segment and we're +// replaying it on start to cache the series records. +func (w *WALWatcher) watch(segmentNum int, tail bool) error { + segment, err := OpenReadSegment(SegmentName(w.walDir, segmentNum)) + if err != nil { + return err + } + defer segment.Close() + + reader := NewLiveReader(w.logger, prometheus.DefaultRegisterer, segment) + + readTicker := time.NewTicker(readPeriod) + defer readTicker.Stop() + + checkpointTicker := time.NewTicker(checkpointPeriod) + defer checkpointTicker.Stop() + + segmentTicker := time.NewTicker(segmentCheckPeriod) + defer segmentTicker.Stop() + + // If we're replaying the segment we need to know the size of the file to know + // when to return from watch and move on to the next segment. + size := int64(math.MaxInt64) + if !tail { + segmentTicker.Stop() + checkpointTicker.Stop() + var err error + size, err = getSegmentSize(w.walDir, segmentNum) + if err != nil { + return errors.Wrap(err, "getSegmentSize") + } + } + + for { + select { + case <-w.quit: + return nil + + case <-checkpointTicker.C: + // Periodically check if there is a new checkpoint so we can garbage + // collect labels. As this is considered an optimisation, we ignore + // errors during checkpoint processing. + if err := w.garbageCollectSeries(segmentNum); err != nil { + level.Warn(w.logger).Log("msg", "error process checkpoint", "err", err) + } + + case <-segmentTicker.C: + _, last, err := w.firstAndLast() + if err != nil { + return errors.Wrap(err, "segments") + } + + // Check if new segments exists. + if last <= segmentNum { + continue + } + + err = w.readSegment(reader, segmentNum, tail) + + // Ignore errors reading to end of segment whilst replaying the WAL. + if !tail { + if err != nil && err != io.EOF { + level.Warn(w.logger).Log("msg", "ignoring error reading to end of segment, may have dropped data", "err", err) + } else if reader.Offset() != size { + level.Warn(w.logger).Log("msg", "expected to have read whole segment, may have dropped data", "segment", segmentNum, "read", reader.Offset(), "size", size) + } + return nil + } + + // Otherwise, when we are tailing, non-EOFs are fatal. + if err != io.EOF { + return err + } + + return nil + + case <-readTicker.C: + err = w.readSegment(reader, segmentNum, tail) + + // Ignore all errors reading to end of segment whilst replaying the WAL. + if !tail { + if err != nil && err != io.EOF { + level.Warn(w.logger).Log("msg", "ignoring error reading to end of segment, may have dropped data", "segment", segmentNum, "err", err) + } else if reader.Offset() != size { + level.Warn(w.logger).Log("msg", "expected to have read whole segment, may have dropped data", "segment", segmentNum, "read", reader.Offset(), "size", size) + } + return nil + } + + // Otherwise, when we are tailing, non-EOFs are fatal. + if err != io.EOF { + return err + } + } + } +} + +func (w *WALWatcher) garbageCollectSeries(segmentNum int) error { + dir, _, err := LastCheckpoint(w.walDir) + if err != nil && err != record.ErrNotFound { + return errors.Wrap(err, "LastCheckpoint") + } + + if dir == "" || dir == w.lastCheckpoint { + return nil + } + w.lastCheckpoint = dir + + index, err := checkpointNum(dir) + if err != nil { + return errors.Wrap(err, "error parsing checkpoint filename") + } + + if index >= segmentNum { + level.Debug(w.logger).Log("msg", "current segment is behind the checkpoint, skipping reading of checkpoint", "current", fmt.Sprintf("%08d", segmentNum), "checkpoint", dir) + return nil + } + + level.Debug(w.logger).Log("msg", "new checkpoint detected", "new", dir, "currentSegment", segmentNum) + + if err = w.readCheckpoint(dir); err != nil { + return errors.Wrap(err, "readCheckpoint") + } + + // Clear series with a checkpoint or segment index # lower than the checkpoint we just read. + w.writer.SeriesReset(index) + return nil +} + +func (w *WALWatcher) readSegment(r *LiveReader, segmentNum int, tail bool) error { + var ( + dec record.RecordDecoder + series []record.RefSeries + samples []record.RefSample + ) + + for r.Next() && !isClosed(w.quit) { + rec := r.Record() + w.recordsReadMetric.WithLabelValues(recordType(dec.Type(rec))).Inc() + + switch dec.Type(rec) { + case record.RecordSeries: + series, err := dec.Series(rec, series[:0]) + if err != nil { + w.recordDecodeFailsMetric.Inc() + return err + } + w.writer.StoreSeries(series, segmentNum) + + case record.RecordSamples: + // If we're not tailing a segment we can ignore any samples records we see. + // This speeds up replay of the WAL by > 10x. + if !tail { + break + } + samples, err := dec.Samples(rec, samples[:0]) + if err != nil { + w.recordDecodeFailsMetric.Inc() + return err + } + var send []record.RefSample + for _, s := range samples { + if s.T > w.startTime { + send = append(send, s) + } + } + if len(send) > 0 { + // Blocks until the sample is sent to all remote write endpoints or closed (because enqueue blocks). + w.writer.Append(send) + } + + case record.RecordTombstones: + // noop + case record.RecordInvalid: + return errors.New("invalid record") + + default: + w.recordDecodeFailsMetric.Inc() + return errors.New("unknown TSDB record type") + } + } + return r.Err() +} + +func recordType(rt record.RecordType) string { + switch rt { + case record.RecordInvalid: + return "invalid" + case record.RecordSeries: + return "series" + case record.RecordSamples: + return "samples" + case record.RecordTombstones: + return "tombstones" + default: + return "unknown" + } +} + +// Read all the series records from a Checkpoint directory. +func (w *WALWatcher) readCheckpoint(checkpointDir string) error { + level.Debug(w.logger).Log("msg", "reading checkpoint", "dir", checkpointDir) + index, err := checkpointNum(checkpointDir) + if err != nil { + return errors.Wrap(err, "checkpointNum") + } + + // Ensure we read the whole contents of every segment in the checkpoint dir. + segs, err := w.segments(checkpointDir) + if err != nil { + return errors.Wrap(err, "Unable to get segments checkpoint dir") + } + for _, seg := range segs { + size, err := getSegmentSize(checkpointDir, seg) + if err != nil { + return errors.Wrap(err, "getSegmentSize") + } + + sr, err := OpenReadSegment(SegmentName(checkpointDir, seg)) + if err != nil { + return errors.Wrap(err, "unable to open segment") + } + defer sr.Close() + + r := NewLiveReader(w.logger, prometheus.DefaultRegisterer, sr) + if err := w.readSegment(r, index, false); err != io.EOF && err != nil { + return errors.Wrap(err, "readSegment") + } + + if r.Offset() != size { + return fmt.Errorf("readCheckpoint wasn't able to read all data from the checkpoint %s/%08d, size: %d, totalRead: %d", checkpointDir, seg, size, r.Offset()) + } + } + + level.Debug(w.logger).Log("msg", "read series references from checkpoint", "checkpoint", checkpointDir) + return nil +} + +func checkpointNum(dir string) (int, error) { + // Checkpoint dir names are in the format checkpoint.000001 + chunks := strings.Split(dir, ".") + if len(chunks) != 2 { + return 0, errors.Errorf("invalid checkpoint dir string: %s", dir) + } + + result, err := strconv.Atoi(chunks[1]) + if err != nil { + return 0, errors.Errorf("invalid checkpoint dir string: %s", dir) + } + + return result, nil +} + +// Get size of segment. +func getSegmentSize(dir string, index int) (int64, error) { + i := int64(-1) + fi, err := os.Stat(SegmentName(dir, index)) + if err == nil { + i = fi.Size() + } + return i, err +} + +func isClosed(c chan struct{}) bool { + select { + case <-c: + return true + default: + return false + } +} diff --git a/wal/wal_watcher_test.go b/wal/wal_watcher_test.go new file mode 100644 index 00000000..e8e1d4b7 --- /dev/null +++ b/wal/wal_watcher_test.go @@ -0,0 +1,509 @@ +// Copyright 2018 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package wal + +import ( + "fmt" + "io/ioutil" + "math/rand" + "os" + "path" + "sync" + "testing" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/tsdb/labels" + "github.com/prometheus/tsdb/record" + "github.com/prometheus/tsdb/testutil" +) + +var defaultRetryInterval = 100 * time.Millisecond +var defaultRetries = 100 + +// retry executes f() n times at each interval until it returns true. +func retry(t *testing.T, interval time.Duration, n int, f func() bool) { + t.Helper() + ticker := time.NewTicker(interval) + for i := 0; i <= n; i++ { + if f() { + return + } + <-ticker.C + } + ticker.Stop() + t.Logf("function returned false") +} + +type writeToMock struct { + samplesAppended int + seriesLock sync.Mutex + seriesSegmentIndexes map[uint64]int +} + +func (wtm *writeToMock) Append(s []record.RefSample) bool { + wtm.samplesAppended += len(s) + return true +} + +func (wtm *writeToMock) StoreSeries(series []record.RefSeries, index int) { + wtm.seriesLock.Lock() + defer wtm.seriesLock.Unlock() + for _, s := range series { + wtm.seriesSegmentIndexes[s.Ref] = index + } +} + +func (wtm *writeToMock) SeriesReset(index int) { + // Check for series that are in segments older than the checkpoint + // that were not also present in the checkpoint. + wtm.seriesLock.Lock() + defer wtm.seriesLock.Unlock() + for k, v := range wtm.seriesSegmentIndexes { + if v < index { + delete(wtm.seriesSegmentIndexes, k) + } + } +} + +func (wtm *writeToMock) checkNumLabels() int { + wtm.seriesLock.Lock() + defer wtm.seriesLock.Unlock() + return len(wtm.seriesSegmentIndexes) +} + +func newWriteToMock() *writeToMock { + return &writeToMock{ + seriesSegmentIndexes: make(map[uint64]int), + } +} + +func TestTailSamples(t *testing.T) { + pageSize := 32 * 1024 + const seriesCount = 10 + const samplesCount = 250 + now := time.Now() + + dir, err := ioutil.TempDir("", "readCheckpoint") + testutil.Ok(t, err) + defer os.RemoveAll(dir) + + wdir := path.Join(dir, "wal") + err = os.Mkdir(wdir, 0777) + testutil.Ok(t, err) + + enc := record.RecordEncoder{} + w, err := NewSize(nil, prometheus.DefaultRegisterer, wdir, 128*pageSize, false) + testutil.Ok(t, err) + + // Write to the initial segment then checkpoint. + for i := 0; i < seriesCount; i++ { + ref := i + 100 + series := enc.Series([]record.RefSeries{ + record.RefSeries{ + Ref: uint64(ref), + Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}}, + }, + }, nil) + testutil.Ok(t, w.Log(series)) + + for j := 0; j < samplesCount; j++ { + inner := rand.Intn(ref + 1) + sample := enc.Samples([]record.RefSample{ + record.RefSample{ + Ref: uint64(inner), + T: int64(now.UnixNano()) + 1, + V: float64(i), + }, + }, nil) + testutil.Ok(t, w.Log(sample)) + } + } + + // Start read after checkpoint, no more data written. + first, last, err := w.Segments() + testutil.Ok(t, err) + + wt := newWriteToMock() + watcher := NewWALWatcher(nil, "", wt, dir) + watcher.startTime = now.UnixNano() + + // Set the Watcher's metrics so they're not nil pointers. + watcher.setMetrics() + for i := first; i <= last; i++ { + segment, err := OpenReadSegment(SegmentName(watcher.walDir, i)) + testutil.Ok(t, err) + defer segment.Close() + + reader := NewLiveReader(nil, prometheus.DefaultRegisterer, segment) + // Use tail true so we can ensure we got the right number of samples. + watcher.readSegment(reader, i, true) + } + + expectedSeries := seriesCount + expectedSamples := seriesCount * samplesCount + retry(t, defaultRetryInterval, defaultRetries, func() bool { + return wt.checkNumLabels() >= expectedSeries + }) + testutil.Equals(t, expectedSeries, wt.checkNumLabels()) + testutil.Equals(t, expectedSamples, wt.samplesAppended) +} + +func TestReadToEndNoCheckpoint(t *testing.T) { + pageSize := 32 * 1024 + const seriesCount = 10 + const samplesCount = 250 + + dir, err := ioutil.TempDir("", "readToEnd_noCheckpoint") + testutil.Ok(t, err) + defer os.RemoveAll(dir) + wdir := path.Join(dir, "wal") + err = os.Mkdir(wdir, 0777) + testutil.Ok(t, err) + + w, err := NewSize(nil, nil, wdir, 128*pageSize, false) + testutil.Ok(t, err) + + var recs [][]byte + + enc := record.RecordEncoder{} + + for i := 0; i < seriesCount; i++ { + series := enc.Series([]record.RefSeries{ + record.RefSeries{ + Ref: uint64(i), + Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}}, + }, + }, nil) + recs = append(recs, series) + for j := 0; j < samplesCount; j++ { + sample := enc.Samples([]record.RefSample{ + record.RefSample{ + Ref: uint64(j), + T: int64(i), + V: float64(i), + }, + }, nil) + + recs = append(recs, sample) + + // Randomly batch up records. + if rand.Intn(4) < 3 { + testutil.Ok(t, w.Log(recs...)) + recs = recs[:0] + } + } + } + testutil.Ok(t, w.Log(recs...)) + + _, _, err = w.Segments() + testutil.Ok(t, err) + + wt := newWriteToMock() + watcher := NewWALWatcher(nil, "", wt, dir) + go watcher.Start() + + expected := seriesCount + retry(t, defaultRetryInterval, defaultRetries, func() bool { + return wt.checkNumLabels() >= expected + }) + watcher.Stop() + testutil.Equals(t, expected, wt.checkNumLabels()) +} + +func TestReadToEndWithCheckpoint(t *testing.T) { + segmentSize := 32 * 1024 + // We need something similar to this # of series and samples + // in order to get enough segments for us to checkpoint. + const seriesCount = 10 + const samplesCount = 250 + + dir, err := ioutil.TempDir("", "readToEnd_withCheckpoint") + testutil.Ok(t, err) + defer os.RemoveAll(dir) + + wdir := path.Join(dir, "wal") + err = os.Mkdir(wdir, 0777) + testutil.Ok(t, err) + + enc := record.RecordEncoder{} + w, err := NewSize(nil, nil, wdir, segmentSize, false) + testutil.Ok(t, err) + + // Write to the initial segment then checkpoint. + for i := 0; i < seriesCount; i++ { + ref := i + 100 + series := enc.Series([]record.RefSeries{ + record.RefSeries{ + Ref: uint64(ref), + Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}}, + }, + }, nil) + testutil.Ok(t, w.Log(series)) + + for j := 0; j < samplesCount; j++ { + inner := rand.Intn(ref + 1) + sample := enc.Samples([]record.RefSample{ + record.RefSample{ + Ref: uint64(inner), + T: int64(i), + V: float64(i), + }, + }, nil) + testutil.Ok(t, w.Log(sample)) + } + } + + Checkpoint(w, 0, 1, func(x uint64) bool { return true }, 0) + w.Truncate(1) + + // Write more records after checkpointing. + for i := 0; i < seriesCount; i++ { + series := enc.Series([]record.RefSeries{ + record.RefSeries{ + Ref: uint64(i), + Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}}, + }, + }, nil) + testutil.Ok(t, w.Log(series)) + + for j := 0; j < samplesCount; j++ { + sample := enc.Samples([]record.RefSample{ + record.RefSample{ + Ref: uint64(j), + T: int64(i), + V: float64(i), + }, + }, nil) + testutil.Ok(t, w.Log(sample)) + } + } + + _, _, err = w.Segments() + testutil.Ok(t, err) + wt := newWriteToMock() + watcher := NewWALWatcher(nil, "", wt, dir) + go watcher.Start() + + expected := seriesCount * 2 + retry(t, defaultRetryInterval, defaultRetries, func() bool { + return wt.checkNumLabels() >= expected + }) + watcher.Stop() + testutil.Equals(t, expected, wt.checkNumLabels()) +} + +func TestReadCheckpoint(t *testing.T) { + pageSize := 32 * 1024 + const seriesCount = 10 + const samplesCount = 250 + + dir, err := ioutil.TempDir("", "readCheckpoint") + testutil.Ok(t, err) + defer os.RemoveAll(dir) + + wdir := path.Join(dir, "wal") + err = os.Mkdir(wdir, 0777) + testutil.Ok(t, err) + + os.Create(SegmentName(wdir, 30)) + + enc := record.RecordEncoder{} + w, err := NewSize(nil, nil, wdir, 128*pageSize, false) + testutil.Ok(t, err) + + // Write to the initial segment then checkpoint. + for i := 0; i < seriesCount; i++ { + ref := i + 100 + series := enc.Series([]record.RefSeries{ + record.RefSeries{ + Ref: uint64(ref), + Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}}, + }, + }, nil) + testutil.Ok(t, w.Log(series)) + + for j := 0; j < samplesCount; j++ { + inner := rand.Intn(ref + 1) + sample := enc.Samples([]record.RefSample{ + record.RefSample{ + Ref: uint64(inner), + T: int64(i), + V: float64(i), + }, + }, nil) + testutil.Ok(t, w.Log(sample)) + } + } + Checkpoint(w, 30, 31, func(x uint64) bool { return true }, 0) + w.Truncate(32) + + // Start read after checkpoint, no more data written. + _, _, err = w.Segments() + testutil.Ok(t, err) + + wt := newWriteToMock() + watcher := NewWALWatcher(nil, "", wt, dir) + // watcher. + go watcher.Start() + + expectedSeries := seriesCount + retry(t, defaultRetryInterval, defaultRetries, func() bool { + return wt.checkNumLabels() >= expectedSeries + }) + watcher.Stop() + testutil.Equals(t, expectedSeries, wt.checkNumLabels()) +} + +func TestReadCheckpointMultipleSegments(t *testing.T) { + pageSize := 32 * 1024 + + const segments = 1 + const seriesCount = 20 + const samplesCount = 300 + + dir, err := ioutil.TempDir("", "readCheckpoint") + testutil.Ok(t, err) + defer os.RemoveAll(dir) + + wdir := path.Join(dir, "wal") + err = os.Mkdir(wdir, 0777) + testutil.Ok(t, err) + + enc := record.RecordEncoder{} + w, err := NewSize(nil, nil, wdir, pageSize, false) + testutil.Ok(t, err) + + // Write a bunch of data. + for i := 0; i < segments; i++ { + for j := 0; j < seriesCount; j++ { + ref := j + (i * 100) + series := enc.Series([]record.RefSeries{ + record.RefSeries{ + Ref: uint64(ref), + Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", j)}}, + }, + }, nil) + testutil.Ok(t, w.Log(series)) + + for k := 0; k < samplesCount; k++ { + inner := rand.Intn(ref + 1) + sample := enc.Samples([]record.RefSample{ + record.RefSample{ + Ref: uint64(inner), + T: int64(i), + V: float64(i), + }, + }, nil) + testutil.Ok(t, w.Log(sample)) + } + } + } + + // At this point we should have at least 6 segments, lets create a checkpoint dir of the first 5. + checkpointDir := dir + "/wal/checkpoint.000004" + err = os.Mkdir(checkpointDir, 0777) + testutil.Ok(t, err) + for i := 0; i <= 4; i++ { + err := os.Rename(SegmentName(dir+"/wal", i), SegmentName(checkpointDir, i)) + testutil.Ok(t, err) + } + + wt := newWriteToMock() + watcher := NewWALWatcher(nil, "", wt, dir) + watcher.maxSegment = -1 + + // Set the Watcher's metrics so they're not nil pointers. + watcher.setMetrics() + + lastCheckpoint, _, err := LastCheckpoint(watcher.walDir) + testutil.Ok(t, err) + + err = watcher.readCheckpoint(lastCheckpoint) + testutil.Ok(t, err) +} + +func TestCheckpointSeriesReset(t *testing.T) { + segmentSize := 32 * 1024 + // We need something similar to this # of series and samples + // in order to get enough segments for us to checkpoint. + const seriesCount = 20 + const samplesCount = 350 + + dir, err := ioutil.TempDir("", "seriesReset") + testutil.Ok(t, err) + defer os.RemoveAll(dir) + + wdir := path.Join(dir, "wal") + err = os.Mkdir(wdir, 0777) + testutil.Ok(t, err) + + enc := record.RecordEncoder{} + w, err := NewSize(nil, nil, wdir, segmentSize, false) + testutil.Ok(t, err) + + // Write to the initial segment, then checkpoint later. + for i := 0; i < seriesCount; i++ { + ref := i + 100 + series := enc.Series([]record.RefSeries{ + record.RefSeries{ + Ref: uint64(ref), + Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}}, + }, + }, nil) + testutil.Ok(t, w.Log(series)) + + for j := 0; j < samplesCount; j++ { + inner := rand.Intn(ref + 1) + sample := enc.Samples([]record.RefSample{ + record.RefSample{ + Ref: uint64(inner), + T: int64(i), + V: float64(i), + }, + }, nil) + testutil.Ok(t, w.Log(sample)) + } + } + + _, _, err = w.Segments() + testutil.Ok(t, err) + + wt := newWriteToMock() + watcher := NewWALWatcher(nil, "", wt, dir) + watcher.maxSegment = -1 + go watcher.Start() + + expected := seriesCount + retry(t, defaultRetryInterval, defaultRetries, func() bool { + return wt.checkNumLabels() >= expected + }) + testutil.Equals(t, seriesCount, wt.checkNumLabels()) + + _, err = Checkpoint(w, 2, 4, func(x uint64) bool { return true }, 0) + testutil.Ok(t, err) + + err = w.Truncate(5) + testutil.Ok(t, err) + + _, cpi, err := LastCheckpoint(path.Join(dir, "wal")) + testutil.Ok(t, err) + err = watcher.garbageCollectSeries(cpi + 1) + testutil.Ok(t, err) + + watcher.Stop() + // If you modify the checkpoint and truncate segment #'s run the test to see how + // many series records you end up with and change the last Equals check accordingly + // or modify the Equals to Assert(len(wt.seriesLabels) < seriesCount*10) + testutil.Equals(t, 14, wt.checkNumLabels()) +} diff --git a/wal_test.go b/wal_test.go index 0fed5b41..c9a9168d 100644 --- a/wal_test.go +++ b/wal_test.go @@ -29,6 +29,7 @@ import ( "github.com/go-kit/kit/log" "github.com/prometheus/tsdb/fileutil" "github.com/prometheus/tsdb/labels" + "github.com/prometheus/tsdb/record" "github.com/prometheus/tsdb/testutil" "github.com/prometheus/tsdb/wal" ) @@ -95,10 +96,10 @@ func TestSegmentWAL_Truncate(t *testing.T) { w.segmentSize = 10000 for i := 0; i < numMetrics; i += batch { - var rs []RefSeries + var rs []record.RefSeries for j, s := range series[i : i+batch] { - rs = append(rs, RefSeries{Labels: s, Ref: uint64(i+j) + 1}) + rs = append(rs, record.RefSeries{Labels: s, Ref: uint64(i+j) + 1}) } err := w.LogSeries(rs) testutil.Ok(t, err) @@ -125,11 +126,11 @@ func TestSegmentWAL_Truncate(t *testing.T) { err = w.Truncate(1000, keepf) testutil.Ok(t, err) - var expected []RefSeries + var expected []record.RefSeries for i := 1; i <= numMetrics; i++ { if i%2 == 1 || uint64(i) >= boundarySeries { - expected = append(expected, RefSeries{Ref: uint64(i), Labels: series[i-1]}) + expected = append(expected, record.RefSeries{Ref: uint64(i), Labels: series[i-1]}) } } @@ -143,10 +144,10 @@ func TestSegmentWAL_Truncate(t *testing.T) { w, err = OpenSegmentWAL(dir, nil, 0, nil) testutil.Ok(t, err) - var readSeries []RefSeries + var readSeries []record.RefSeries r := w.Reader() - testutil.Ok(t, r.Read(func(s []RefSeries) { + testutil.Ok(t, r.Read(func(s []record.RefSeries) { readSeries = append(readSeries, s...) }, nil, nil)) @@ -172,9 +173,9 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { }() var ( - recordedSeries [][]RefSeries - recordedSamples [][]RefSample - recordedDeletes [][]Stone + recordedSeries [][]record.RefSeries + recordedSamples [][]record.RefSample + recordedDeletes [][]record.Stone ) var totalSamples int @@ -190,29 +191,29 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { r := w.Reader() var ( - resultSeries [][]RefSeries - resultSamples [][]RefSample - resultDeletes [][]Stone + resultSeries [][]record.RefSeries + resultSamples [][]record.RefSample + resultDeletes [][]record.Stone ) - serf := func(series []RefSeries) { + serf := func(series []record.RefSeries) { if len(series) > 0 { - clsets := make([]RefSeries, len(series)) + clsets := make([]record.RefSeries, len(series)) copy(clsets, series) resultSeries = append(resultSeries, clsets) } } - smplf := func(smpls []RefSample) { + smplf := func(smpls []record.RefSample) { if len(smpls) > 0 { - csmpls := make([]RefSample, len(smpls)) + csmpls := make([]record.RefSample, len(smpls)) copy(csmpls, smpls) resultSamples = append(resultSamples, csmpls) } } - delf := func(stones []Stone) { + delf := func(stones []record.Stone) { if len(stones) > 0 { - cst := make([]Stone, len(stones)) + cst := make([]record.Stone, len(stones)) copy(cst, stones) resultDeletes = append(resultDeletes, cst) } @@ -228,11 +229,11 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { // Insert in batches and generate different amounts of samples for each. for i := 0; i < len(series); i += stepSize { - var samples []RefSample - var stones []Stone + var samples []record.RefSample + var stones []record.Stone for j := 0; j < i*10; j++ { - samples = append(samples, RefSample{ + samples = append(samples, record.RefSample{ Ref: uint64(j % 10000), T: int64(j * 2), V: rand.Float64(), @@ -241,13 +242,13 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { for j := 0; j < i*20; j++ { ts := rand.Int63() - stones = append(stones, Stone{rand.Uint64(), Intervals{{ts, ts + rand.Int63n(10000)}}}) + stones = append(stones, record.Stone{rand.Uint64(), record.Intervals{{ts, ts + rand.Int63n(10000)}}}) } lbls := series[i : i+stepSize] - series := make([]RefSeries, 0, len(series)) + series := make([]record.RefSeries, 0, len(series)) for j, l := range lbls { - series = append(series, RefSeries{ + series = append(series, record.RefSeries{ Ref: uint64(i + j), Labels: l, }) @@ -382,8 +383,8 @@ func TestWALRestoreCorrupted(t *testing.T) { w, err := OpenSegmentWAL(dir, nil, 0, nil) testutil.Ok(t, err) - testutil.Ok(t, w.LogSamples([]RefSample{{T: 1, V: 2}})) - testutil.Ok(t, w.LogSamples([]RefSample{{T: 2, V: 3}})) + testutil.Ok(t, w.LogSamples([]record.RefSample{{T: 1, V: 2}})) + testutil.Ok(t, w.LogSamples([]record.RefSample{{T: 2, V: 3}})) testutil.Ok(t, w.cut()) @@ -392,8 +393,8 @@ func TestWALRestoreCorrupted(t *testing.T) { // Hopefully cut will complete by 2 seconds. time.Sleep(2 * time.Second) - testutil.Ok(t, w.LogSamples([]RefSample{{T: 3, V: 4}})) - testutil.Ok(t, w.LogSamples([]RefSample{{T: 5, V: 6}})) + testutil.Ok(t, w.LogSamples([]record.RefSample{{T: 3, V: 4}})) + testutil.Ok(t, w.LogSamples([]record.RefSample{{T: 5, V: 6}})) testutil.Ok(t, w.Close()) @@ -414,24 +415,24 @@ func TestWALRestoreCorrupted(t *testing.T) { r := w2.Reader() - serf := func(l []RefSeries) { + serf := func(l []record.RefSeries) { testutil.Equals(t, 0, len(l)) } // Weird hack to check order of reads. i := 0 - samplf := func(s []RefSample) { + samplf := func(s []record.RefSample) { if i == 0 { - testutil.Equals(t, []RefSample{{T: 1, V: 2}}, s) + testutil.Equals(t, []record.RefSample{{T: 1, V: 2}}, s) i++ } else { - testutil.Equals(t, []RefSample{{T: 99, V: 100}}, s) + testutil.Equals(t, []record.RefSample{{T: 99, V: 100}}, s) } } testutil.Ok(t, r.Read(serf, samplf, nil)) - testutil.Ok(t, w2.LogSamples([]RefSample{{T: 99, V: 100}})) + testutil.Ok(t, w2.LogSamples([]record.RefSample{{T: 99, V: 100}})) testutil.Ok(t, w2.Close()) // We should see the first valid entry and the new one, everything after @@ -482,23 +483,23 @@ func TestMigrateWAL_Fuzz(t *testing.T) { testutil.Ok(t, err) // Write some data. - testutil.Ok(t, oldWAL.LogSeries([]RefSeries{ + testutil.Ok(t, oldWAL.LogSeries([]record.RefSeries{ {Ref: 100, Labels: labels.FromStrings("abc", "def", "123", "456")}, {Ref: 1, Labels: labels.FromStrings("abc", "def2", "1234", "4567")}, })) - testutil.Ok(t, oldWAL.LogSamples([]RefSample{ + testutil.Ok(t, oldWAL.LogSamples([]record.RefSample{ {Ref: 1, T: 100, V: 200}, {Ref: 2, T: 300, V: 400}, })) - testutil.Ok(t, oldWAL.LogSeries([]RefSeries{ + testutil.Ok(t, oldWAL.LogSeries([]record.RefSeries{ {Ref: 200, Labels: labels.FromStrings("xyz", "def", "foo", "bar")}, })) - testutil.Ok(t, oldWAL.LogSamples([]RefSample{ + testutil.Ok(t, oldWAL.LogSamples([]record.RefSample{ {Ref: 3, T: 100, V: 200}, {Ref: 4, T: 300, V: 400}, })) - testutil.Ok(t, oldWAL.LogDeletes([]Stone{ - {ref: 1, intervals: []Interval{{100, 200}}}, + testutil.Ok(t, oldWAL.LogDeletes([]record.Stone{ + {Ref: 1, Intervals: []record.Interval{{100, 200}}}, })) testutil.Ok(t, oldWAL.Close()) @@ -510,8 +511,8 @@ func TestMigrateWAL_Fuzz(t *testing.T) { testutil.Ok(t, err) // We can properly write some new data after migration. - var enc RecordEncoder - testutil.Ok(t, w.Log(enc.Samples([]RefSample{ + var enc record.RecordEncoder + testutil.Ok(t, w.Log(enc.Samples([]record.RefSample{ {Ref: 500, T: 1, V: 1}, }, nil))) @@ -523,21 +524,21 @@ func TestMigrateWAL_Fuzz(t *testing.T) { r := wal.NewReader(sr) var res []interface{} - var dec RecordDecoder + var dec record.RecordDecoder for r.Next() { rec := r.Record() switch dec.Type(rec) { - case RecordSeries: + case record.RecordSeries: s, err := dec.Series(rec, nil) testutil.Ok(t, err) res = append(res, s) - case RecordSamples: + case record.RecordSamples: s, err := dec.Samples(rec, nil) testutil.Ok(t, err) res = append(res, s) - case RecordTombstones: + case record.RecordTombstones: s, err := dec.Tombstones(rec, nil) testutil.Ok(t, err) res = append(res, s) @@ -548,17 +549,17 @@ func TestMigrateWAL_Fuzz(t *testing.T) { testutil.Ok(t, r.Err()) testutil.Equals(t, []interface{}{ - []RefSeries{ + []record.RefSeries{ {Ref: 100, Labels: labels.FromStrings("abc", "def", "123", "456")}, {Ref: 1, Labels: labels.FromStrings("abc", "def2", "1234", "4567")}, }, - []RefSample{{Ref: 1, T: 100, V: 200}, {Ref: 2, T: 300, V: 400}}, - []RefSeries{ + []record.RefSample{{Ref: 1, T: 100, V: 200}, {Ref: 2, T: 300, V: 400}}, + []record.RefSeries{ {Ref: 200, Labels: labels.FromStrings("xyz", "def", "foo", "bar")}, }, - []RefSample{{Ref: 3, T: 100, V: 200}, {Ref: 4, T: 300, V: 400}}, - []Stone{{ref: 1, intervals: []Interval{{100, 200}}}}, - []RefSample{{Ref: 500, T: 1, V: 1}}, + []record.RefSample{{Ref: 3, T: 100, V: 200}, {Ref: 4, T: 300, V: 400}}, + []record.Stone{{Ref: 1, Intervals: []record.Interval{{100, 200}}}}, + []record.RefSample{{Ref: 500, T: 1, V: 1}}, }, res) // Migrating an already migrated WAL shouldn't do anything. From 643a5c9525f9ec69cccc10e936aafd63eb33ccc6 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Tue, 4 Jun 2019 13:08:57 -0700 Subject: [PATCH 02/16] Copy FromTime function from timestamp package so we don't have to vendor something from Prometheus. Signed-off-by: Callum Styan --- wal/wal_watcher.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/wal/wal_watcher.go b/wal/wal_watcher.go index f9f7776c..b85d1f06 100644 --- a/wal/wal_watcher.go +++ b/wal/wal_watcher.go @@ -28,7 +28,6 @@ import ( "github.com/go-kit/kit/log/level" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/prometheus/pkg/timestamp" "github.com/prometheus/tsdb/fileutil" "github.com/prometheus/tsdb/record" ) @@ -79,6 +78,13 @@ var ( ) ) +// This function is copied from prometheus/prometheus/pkg/timestamp to avoid adding vendor to TSDB repo. + +// FromTime returns a new millisecond timestamp from a time. +func FromTime(t time.Time) int64 { + return t.Unix()*1000 + int64(t.Nanosecond())/int64(time.Millisecond) +} + func init() { prometheus.MustRegister(watcherRecordsRead) prometheus.MustRegister(watcherRecordDecodeFails) @@ -169,7 +175,7 @@ func (w *WALWatcher) loop() { // We may encourter failures processing the WAL; we should wait and retry. for !isClosed(w.quit) { - w.startTime = timestamp.FromTime(time.Now()) + w.startTime = FromTime(time.Now()) if err := w.run(); err != nil { level.Error(w.logger).Log("msg", "error tailing WAL", "err", err) } From b84a29720b223453a13c33b357b2be5988f56e7b Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Tue, 4 Jun 2019 16:56:51 -0700 Subject: [PATCH 03/16] WAL Watcher needs to take in and pass a Registerer to LiveReader. Signed-off-by: Callum Styan --- wal/wal_watcher.go | 20 +++++++++++++++++++- wal/wal_watcher_test.go | 14 +++++++------- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/wal/wal_watcher.go b/wal/wal_watcher.go index b85d1f06..aa00acf8 100644 --- a/wal/wal_watcher.go +++ b/wal/wal_watcher.go @@ -105,6 +105,7 @@ type WALWatcher struct { logger log.Logger walDir string lastCheckpoint string + reg prometheus.Registerer startTime int64 @@ -121,12 +122,21 @@ type WALWatcher struct { } // NewWALWatcher creates a new WAL watcher for a given WriteTo. -func NewWALWatcher(logger log.Logger, name string, writer writeTo, walDir string) *WALWatcher { +func NewWALWatcher(logger log.Logger, reg prometheus.Registerer, name string, writer writeTo, walDir string) *WALWatcher { if logger == nil { logger = log.NewNopLogger() } + if reg != nil { + // We can't use MustRegister because WALWatcher's are recreated on config changes within Prometheus. + reg.Register(watcherRecordsRead) + reg.Register(watcherRecordDecodeFails) + reg.Register(watcherSamplesSentPreTailing) + reg.Register(watcherCurrentSegment) + } + return &WALWatcher{ logger: logger, + reg: reg, writer: writer, walDir: path.Join(walDir, "wal"), name: name, @@ -298,7 +308,11 @@ func (w *WALWatcher) watch(segmentNum int, tail bool) error { } defer segment.Close() +<<<<<<< HEAD reader := NewLiveReader(w.logger, prometheus.DefaultRegisterer, segment) +======= + reader := NewLiveReader(w.logger, w.reg, segment) +>>>>>>> WAL Watcher needs to take in and pass a Registerer to LiveReader. readTicker := time.NewTicker(readPeriod) defer readTicker.Stop() @@ -513,7 +527,11 @@ func (w *WALWatcher) readCheckpoint(checkpointDir string) error { } defer sr.Close() +<<<<<<< HEAD r := NewLiveReader(w.logger, prometheus.DefaultRegisterer, sr) +======= + r := NewLiveReader(w.logger, w.reg, sr) +>>>>>>> WAL Watcher needs to take in and pass a Registerer to LiveReader. if err := w.readSegment(r, index, false); err != io.EOF && err != nil { return errors.Wrap(err, "readSegment") } diff --git a/wal/wal_watcher_test.go b/wal/wal_watcher_test.go index e8e1d4b7..190bcc17 100644 --- a/wal/wal_watcher_test.go +++ b/wal/wal_watcher_test.go @@ -135,7 +135,7 @@ func TestTailSamples(t *testing.T) { testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWALWatcher(nil, "", wt, dir) + watcher := NewWALWatcher(nil, nil, "", wt, dir) watcher.startTime = now.UnixNano() // Set the Watcher's metrics so they're not nil pointers. @@ -145,7 +145,7 @@ func TestTailSamples(t *testing.T) { testutil.Ok(t, err) defer segment.Close() - reader := NewLiveReader(nil, prometheus.DefaultRegisterer, segment) + reader := NewLiveReader(nil, nil, segment) // Use tail true so we can ensure we got the right number of samples. watcher.readSegment(reader, i, true) } @@ -210,7 +210,7 @@ func TestReadToEndNoCheckpoint(t *testing.T) { testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWALWatcher(nil, "", wt, dir) + watcher := NewWALWatcher(nil, nil, "", wt, dir) go watcher.Start() expected := seriesCount @@ -292,7 +292,7 @@ func TestReadToEndWithCheckpoint(t *testing.T) { _, _, err = w.Segments() testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWALWatcher(nil, "", wt, dir) + watcher := NewWALWatcher(nil, nil, "", wt, dir) go watcher.Start() expected := seriesCount * 2 @@ -353,7 +353,7 @@ func TestReadCheckpoint(t *testing.T) { testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWALWatcher(nil, "", wt, dir) + watcher := NewWALWatcher(nil, nil, "", wt, dir) // watcher. go watcher.Start() @@ -420,7 +420,7 @@ func TestReadCheckpointMultipleSegments(t *testing.T) { } wt := newWriteToMock() - watcher := NewWALWatcher(nil, "", wt, dir) + watcher := NewWALWatcher(nil, nil, "", wt, dir) watcher.maxSegment = -1 // Set the Watcher's metrics so they're not nil pointers. @@ -480,7 +480,7 @@ func TestCheckpointSeriesReset(t *testing.T) { testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWALWatcher(nil, "", wt, dir) + watcher := NewWALWatcher(nil, nil, "", wt, dir) watcher.maxSegment = -1 go watcher.Start() From 6c4df84a0a859048782c4d5ffabf5433c9d53d2c Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Wed, 5 Jun 2019 11:42:20 -0700 Subject: [PATCH 04/16] Call Checkpoint in TestReadCheckpointMultipleSegments instead of manually creating a checkpoint dir and renaming files. Signed-off-by: Callum Styan --- wal/wal_watcher_test.go | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/wal/wal_watcher_test.go b/wal/wal_watcher_test.go index 190bcc17..00129a23 100644 --- a/wal/wal_watcher_test.go +++ b/wal/wal_watcher_test.go @@ -410,14 +410,9 @@ func TestReadCheckpointMultipleSegments(t *testing.T) { } } - // At this point we should have at least 6 segments, lets create a checkpoint dir of the first 5. - checkpointDir := dir + "/wal/checkpoint.000004" - err = os.Mkdir(checkpointDir, 0777) - testutil.Ok(t, err) - for i := 0; i <= 4; i++ { - err := os.Rename(SegmentName(dir+"/wal", i), SegmentName(checkpointDir, i)) - testutil.Ok(t, err) - } + Checkpoint(w, 0, 4, func(id uint64) bool { + return true + }, 0) wt := newWriteToMock() watcher := NewWALWatcher(nil, nil, "", wt, dir) From f45385912523788b36ef892772820fcc79018af6 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Fri, 7 Jun 2019 14:26:36 -0700 Subject: [PATCH 05/16] Move tombstones to it's own package. Signed-off-by: Callum Styan --- block.go | 24 +++++++------- compact.go | 18 +++++------ compact_test.go | 2 +- db_test.go | 27 ++++++++-------- head.go | 30 ++++++++++++----- head_test.go | 29 +++++++++-------- mocks_test.go | 2 +- querier.go | 27 ++++++++-------- querier_test.go | 39 ++++++++++++----------- record/internal.go | 23 ------------- record/record.go | 9 +++--- record/record_test.go | 21 ++++++------ {record => tombstones}/tombstones.go | 19 ++++++++++- {record => tombstones}/tombstones_test.go | 2 +- wal.go | 29 +++++++++-------- wal/checkpoint.go | 3 +- wal/wal_watcher.go | 8 ----- wal_test.go | 19 +++++------ 18 files changed, 170 insertions(+), 161 deletions(-) rename {record => tombstones}/tombstones.go (92%) rename {record => tombstones}/tombstones_test.go (99%) diff --git a/block.go b/block.go index 516bd00b..b19f7cb6 100644 --- a/block.go +++ b/block.go @@ -32,7 +32,7 @@ import ( "github.com/prometheus/tsdb/fileutil" "github.com/prometheus/tsdb/index" "github.com/prometheus/tsdb/labels" - "github.com/prometheus/tsdb/record" + "github.com/prometheus/tsdb/tombstones" ) // IndexWriter serializes the index for a block of series data. @@ -137,7 +137,7 @@ type BlockReader interface { Chunks() (ChunkReader, error) // Tombstones returns a TombstoneReader over the block's deleted data. - Tombstones() (record.TombstoneReader, error) + Tombstones() (tombstones.TombstoneReader, error) // Meta provides meta information about the block reader. Meta() BlockMeta @@ -279,7 +279,7 @@ type Block struct { chunkr ChunkReader indexr IndexReader - tombstones record.TombstoneReader + tombstones tombstones.TombstoneReader logger log.Logger @@ -321,7 +321,7 @@ func OpenBlock(logger log.Logger, dir string, pool chunkenc.Pool) (pb *Block, er } closers = append(closers, ir) - tr, sizeTomb, err := record.ReadTombstones(dir) + tr, sizeTomb, err := tombstones.ReadTombstones(dir) if err != nil { return nil, err } @@ -412,7 +412,7 @@ func (pb *Block) Chunks() (ChunkReader, error) { } // Tombstones returns a new TombstoneReader against the block data. -func (pb *Block) Tombstones() (record.TombstoneReader, error) { +func (pb *Block) Tombstones() (tombstones.TombstoneReader, error) { if err := pb.startRead(); err != nil { return nil, err } @@ -483,7 +483,7 @@ func (r blockIndexReader) Close() error { } type blockTombstoneReader struct { - record.TombstoneReader + tombstones.TombstoneReader b *Block } @@ -519,7 +519,7 @@ func (pb *Block) Delete(mint, maxt int64, ms ...labels.Matcher) error { ir := pb.indexr // Choose only valid postings which have chunks in the time-range. - stones := record.NewMemTombstones() + stones := tombstones.NewMemTombstones() var lset labels.Labels var chks []chunks.Meta @@ -535,7 +535,7 @@ Outer: if chk.OverlapsClosedInterval(mint, maxt) { // Delete only until the current values and not beyond. tmin, tmax := clampInterval(mint, maxt, chks[0].MinTime, chks[len(chks)-1].MaxTime) - stones.AddInterval(p.At(), record.Interval{tmin, tmax}) + stones.AddInterval(p.At(), tombstones.Interval{tmin, tmax}) continue Outer } } @@ -545,7 +545,7 @@ Outer: return p.Err() } - err = pb.tombstones.Iter(func(id uint64, ivs record.Intervals) error { + err = pb.tombstones.Iter(func(id uint64, ivs tombstones.Intervals) error { for _, iv := range ivs { stones.AddInterval(id, iv) } @@ -557,7 +557,7 @@ Outer: pb.tombstones = stones pb.meta.Stats.NumTombstones = pb.tombstones.Total() - n, err := record.WriteTombstoneFile(pb.logger, pb.dir, pb.tombstones) + n, err := tombstones.WriteTombstoneFile(pb.logger, pb.dir, pb.tombstones) if err != nil { return err } @@ -575,7 +575,7 @@ Outer: func (pb *Block) CleanTombstones(dest string, c Compactor) (*ulid.ULID, error) { numStones := 0 - if err := pb.tombstones.Iter(func(id uint64, ivs record.Intervals) error { + if err := pb.tombstones.Iter(func(id uint64, ivs tombstones.Intervals) error { numStones += len(ivs) return nil }); err != nil { @@ -610,7 +610,7 @@ func (pb *Block) Snapshot(dir string) error { for _, fname := range []string{ metaFilename, indexFilename, - record.TombstoneFilename, + tombstones.TombstoneFilename, } { if err := os.Link(filepath.Join(pb.dir, fname), filepath.Join(blockDir, fname)); err != nil { return errors.Wrapf(err, "create snapshot %s", fname) diff --git a/compact.go b/compact.go index 3e2652fd..01a4bee1 100644 --- a/compact.go +++ b/compact.go @@ -35,7 +35,7 @@ import ( "github.com/prometheus/tsdb/fileutil" "github.com/prometheus/tsdb/index" "github.com/prometheus/tsdb/labels" - "github.com/prometheus/tsdb/record" + "github.com/prometheus/tsdb/tombstones" ) // ExponentialBlockRanges returns the time ranges based on the stepSize. @@ -608,7 +608,7 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blocks ...BlockRe } // Create an empty tombstones file. - if _, err := record.WriteTombstoneFile(c.logger, tmp, record.NewMemTombstones()); err != nil { + if _, err := tombstones.WriteTombstoneFile(c.logger, tmp, record.NewMemTombstones()); err != nil { return errors.Wrap(err, "write new tombstones file") } @@ -877,15 +877,15 @@ type compactionSeriesSet struct { p index.Postings index IndexReader chunks ChunkReader - tombstones record.TombstoneReader + tombstones tombstones.TombstoneReader l labels.Labels c []chunks.Meta - intervals record.Intervals + intervals tombstones.Intervals err error } -func newCompactionSeriesSet(i IndexReader, c ChunkReader, t record.TombstoneReader, p index.Postings) *compactionSeriesSet { +func newCompactionSeriesSet(i IndexReader, c ChunkReader, t tombstones.TombstoneReader, p index.Postings) *compactionSeriesSet { return &compactionSeriesSet{ index: i, chunks: c, @@ -915,7 +915,7 @@ func (c *compactionSeriesSet) Next() bool { if len(c.intervals) > 0 { chks := make([]chunks.Meta, 0, len(c.c)) for _, chk := range c.c { - if !(record.Interval{chk.MinTime, chk.MaxTime}.IsSubrange(c.intervals)) { + if !(tombstones.Interval{chk.MinTime, chk.MaxTime}.IsSubrange(c.intervals)) { chks = append(chks, chk) } } @@ -943,7 +943,7 @@ func (c *compactionSeriesSet) Err() error { return c.p.Err() } -func (c *compactionSeriesSet) At() (labels.Labels, []chunks.Meta, record.Intervals) { +func (c *compactionSeriesSet) At() (labels.Labels, []chunks.Meta, tombstones.Intervals) { return c.l, c.c, c.intervals } @@ -953,7 +953,7 @@ type compactionMerger struct { aok, bok bool l labels.Labels c []chunks.Meta - intervals record.Intervals + intervals tombstones.Intervals } func newCompactionMerger(a, b ChunkSeriesSet) (*compactionMerger, error) { @@ -1030,6 +1030,6 @@ func (c *compactionMerger) Err() error { return c.b.Err() } -func (c *compactionMerger) At() (labels.Labels, []chunks.Meta, record.Intervals) { +func (c *compactionMerger) At() (labels.Labels, []chunks.Meta, tombstones.Intervals) { return c.l, c.c, c.intervals } diff --git a/compact_test.go b/compact_test.go index bee741e6..fe61da24 100644 --- a/compact_test.go +++ b/compact_test.go @@ -30,8 +30,8 @@ import ( "github.com/prometheus/tsdb/chunks" "github.com/prometheus/tsdb/fileutil" "github.com/prometheus/tsdb/labels" - "github.com/prometheus/tsdb/record" "github.com/prometheus/tsdb/testutil" + "github.com/prometheus/tsdb/tombstones" ) func TestSplitByRange(t *testing.T) { diff --git a/db_test.go b/db_test.go index 7e1e1b96..66a44661 100644 --- a/db_test.go +++ b/db_test.go @@ -35,6 +35,7 @@ import ( "github.com/prometheus/tsdb/labels" "github.com/prometheus/tsdb/record" "github.com/prometheus/tsdb/testutil" + "github.com/prometheus/tsdb/tombstones" "github.com/prometheus/tsdb/tsdbutil" "github.com/prometheus/tsdb/wal" ) @@ -244,27 +245,27 @@ func TestDeleteSimple(t *testing.T) { numSamples := int64(10) cases := []struct { - intervals record.Intervals + intervals tombstones.Intervals remaint []int64 }{ { - intervals: record.Intervals{{0, 3}}, + intervals: tombstones.Intervals{{0, 3}}, remaint: []int64{4, 5, 6, 7, 8, 9}, }, { - intervals: record.Intervals{{1, 3}}, + intervals: tombstones.Intervals{{1, 3}}, remaint: []int64{0, 4, 5, 6, 7, 8, 9}, }, { - intervals: record.Intervals{{1, 3}, {4, 7}}, + intervals: tombstones.Intervals{{1, 3}, {4, 7}}, remaint: []int64{0, 8, 9}, }, { - intervals: record.Intervals{{1, 3}, {4, 700}}, + intervals: tombstones.Intervals{{1, 3}, {4, 700}}, remaint: []int64{0}, }, { // This case is to ensure that labels and symbols are deleted. - intervals: record.Intervals{{0, 9}}, + intervals: tombstones.Intervals{{0, 9}}, remaint: []int64{}, }, } @@ -562,11 +563,11 @@ func TestDB_SnapshotWithDelete(t *testing.T) { testutil.Ok(t, app.Commit()) cases := []struct { - intervals record.Intervals + intervals tombstones.Intervals remaint []int64 }{ { - intervals: record.Intervals{{1, 3}, {4, 7}}, + intervals: tombstones.Intervals{{1, 3}, {4, 7}}, remaint: []int64{0, 8, 9}, }, } @@ -889,11 +890,11 @@ func TestTombstoneClean(t *testing.T) { testutil.Ok(t, app.Commit()) cases := []struct { - intervals record.Intervals + intervals tombstones.Intervals remaint []int64 }{ { - intervals: record.Intervals{{1, 3}, {4, 7}}, + intervals: tombstones.Intervals{{1, 3}, {4, 7}}, remaint: []int64{0, 8, 9}, }, } @@ -965,7 +966,7 @@ func TestTombstoneClean(t *testing.T) { } for _, b := range db.Blocks() { - testutil.Equals(t, record.NewMemTombstones(), b.tombstones) + testutil.Equals(t, tombstones.NewMemTombstones(), b.tombstones) } } } @@ -991,8 +992,8 @@ func TestTombstoneCleanFail(t *testing.T) { block, err := OpenBlock(nil, blockDir, nil) testutil.Ok(t, err) // Add some some fake tombstones to trigger the compaction. - tomb := record.NewMemTombstones() - tomb.AddInterval(0, record.Interval{0, 1}) + tomb := tombstones.NewMemTombstones() + tomb.AddInterval(0, tombstones.Interval{0, 1}) block.tombstones = tomb db.blocks = append(db.blocks, block) diff --git a/head.go b/head.go index c1c8e6d5..b74b7daa 100644 --- a/head.go +++ b/head.go @@ -34,6 +34,7 @@ import ( "github.com/prometheus/tsdb/index" "github.com/prometheus/tsdb/labels" "github.com/prometheus/tsdb/record" + "github.com/prometheus/tsdb/tombstones" "github.com/prometheus/tsdb/wal" ) @@ -44,7 +45,7 @@ var ( // emptyTombstoneReader is a no-op Tombstone Reader. // This is used by head to satisfy the Tombstones() function call. - emptyTombstoneReader = record.NewMemTombstones() + emptyTombstoneReader = tombstones.NewMemTombstones() ) // Head handles reads and writes of time series data within a time window. @@ -350,11 +351,20 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { } var ( +<<<<<<< HEAD dec RecordDecoder series []RefSeries samples []RefSample tstones []Stone allStones = newMemTombstones() +======= + dec record.RecordDecoder + series []record.RefSeries + samples []record.RefSample + tstones []tombstones.Stone + allStones = tombstones.NewMemTombstones() + err error +>>>>>>> Move tombstones to it's own package. ) defer func() { if err := allStones.Close(); err != nil { @@ -381,7 +391,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { if !created { // There's already a different ref for this series. multiRefLock.Lock() - multiRef[s.Ref] = series.ref + multiRef[s.Ref] = series.Ref multiRefLock.Unlock() } @@ -468,11 +478,15 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { } wg.Wait() +<<<<<<< HEAD if r.Err() != nil { return errors.Wrap(r.Err(), "read records") } if err := allStones.Iter(func(ref uint64, dranges Intervals) error { +======= + if err := allStones.Iter(func(ref uint64, dranges tombstones.Intervals) error { +>>>>>>> Move tombstones to it's own package. return h.chunkRewrite(ref, dranges) }); err != nil { return errors.Wrap(r.Err(), "deleting samples from tombstones") @@ -683,7 +697,7 @@ func (h *rangeHead) Chunks() (ChunkReader, error) { return h.head.chunksRange(h.mint, h.maxt), nil } -func (h *rangeHead) Tombstones() (record.TombstoneReader, error) { +func (h *rangeHead) Tombstones() (tombstones.TombstoneReader, error) { return emptyTombstoneReader, nil } @@ -954,7 +968,7 @@ func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error { return errors.Wrap(err, "select series") } - var stones []record.Stone + var stones []tombstones.Stone dirty := false for p.Next() { series := h.series.getByID(p.At()) @@ -966,9 +980,9 @@ func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error { // Delete only until the current values and not beyond. t0, t1 = clampInterval(mint, maxt, t0, t1) if h.wal != nil { - stones = append(stones, record.Stone{p.At(), record.Intervals{{t0, t1}}}) + stones = append(stones, tombstones.Stone{p.At(), tombstones.Intervals{{t0, t1}}}) } - if err := h.chunkRewrite(p.At(), record.Intervals{{t0, t1}}); err != nil { + if err := h.chunkRewrite(p.At(), tombstones.Intervals{{t0, t1}}); err != nil { return errors.Wrap(err, "delete samples") } dirty = true @@ -995,7 +1009,7 @@ func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error { // chunkRewrite re-writes the chunks which overlaps with deleted ranges // and removes the samples in the deleted ranges. // Chunks is deleted if no samples are left at the end. -func (h *Head) chunkRewrite(ref uint64, dranges record.Intervals) (err error) { +func (h *Head) chunkRewrite(ref uint64, dranges tombstones.Intervals) (err error) { if len(dranges) == 0 { return nil } @@ -1087,7 +1101,7 @@ func (h *Head) gc() { } // Tombstones returns a new reader over the head's tombstones -func (h *Head) Tombstones() (record.TombstoneReader, error) { +func (h *Head) Tombstones() (tombstones.TombstoneReader, error) { return emptyTombstoneReader, nil } diff --git a/head_test.go b/head_test.go index 50b66196..f297fcc6 100644 --- a/head_test.go +++ b/head_test.go @@ -32,6 +32,7 @@ import ( "github.com/prometheus/tsdb/labels" "github.com/prometheus/tsdb/record" "github.com/prometheus/tsdb/testutil" + "github.com/prometheus/tsdb/tombstones" "github.com/prometheus/tsdb/tsdbutil" "github.com/prometheus/tsdb/wal" ) @@ -59,7 +60,7 @@ func populateTestWAL(t testing.TB, w *wal.WAL, recs []interface{}) { testutil.Ok(t, w.Log(enc.Series(v, nil))) case []record.RefSample: testutil.Ok(t, w.Log(enc.Samples(v, nil))) - case []record.Stone: + case []tombstones.Stone: testutil.Ok(t, w.Log(enc.Tombstones(v, nil))) } } @@ -372,27 +373,27 @@ func TestHeadDeleteSimple(t *testing.T) { lblDefault := labels.Label{"a", "b"} cases := []struct { - dranges record.Intervals + dranges tombstones.Intervals smplsExp []sample }{ { - dranges: record.Intervals{{0, 3}}, + dranges: tombstones.Intervals{{0, 3}}, smplsExp: buildSmpls([]int64{4, 5, 6, 7, 8, 9}), }, { - dranges: record.Intervals{{1, 3}}, + dranges: tombstones.Intervals{{1, 3}}, smplsExp: buildSmpls([]int64{0, 4, 5, 6, 7, 8, 9}), }, { - dranges: record.Intervals{{1, 3}, {4, 7}}, + dranges: tombstones.Intervals{{1, 3}, {4, 7}}, smplsExp: buildSmpls([]int64{0, 8, 9}), }, { - dranges: record.Intervals{{1, 3}, {4, 700}}, + dranges: tombstones.Intervals{{1, 3}, {4, 700}}, smplsExp: buildSmpls([]int64{0}), }, { // This case is to ensure that labels and symbols are deleted. - dranges: record.Intervals{{0, 9}}, + dranges: tombstones.Intervals{{0, 9}}, smplsExp: buildSmpls([]int64{}), }, } @@ -605,7 +606,7 @@ func TestDeletedSamplesAndSeriesStillInWALAfterCheckpoint(t *testing.T) { series++ case []record.RefSample: samples++ - case []record.Stone: + case []tombstones.Stone: stones++ default: t.Fatalf("unknown record type") @@ -693,18 +694,18 @@ func TestDelete_e2e(t *testing.T) { // Delete a time-range from each-selector. dels := []struct { ms []labels.Matcher - drange record.Intervals + drange tombstones.Intervals }{ { ms: []labels.Matcher{labels.NewEqualMatcher("a", "b")}, - drange: record.Intervals{{300, 500}, {600, 670}}, + drange: tombstones.Intervals{{300, 500}, {600, 670}}, }, { ms: []labels.Matcher{ labels.NewEqualMatcher("a", "b"), labels.NewEqualMatcher("job", "prom-k8s"), }, - drange: record.Intervals{{300, 500}, {100, 670}}, + drange: tombstones.Intervals{{300, 500}, {100, 670}}, }, { ms: []labels.Matcher{ @@ -712,7 +713,7 @@ func TestDelete_e2e(t *testing.T) { labels.NewEqualMatcher("instance", "localhost:9090"), labels.NewEqualMatcher("job", "prometheus"), }, - drange: record.Intervals{{300, 400}, {100, 6700}}, + drange: tombstones.Intervals{{300, 400}, {100, 6700}}, }, // TODO: Add Regexp Matchers. } @@ -795,7 +796,7 @@ func boundedSamples(full []tsdbutil.Sample, mint, maxt int64) []tsdbutil.Sample return full } -func deletedSamples(full []tsdbutil.Sample, dranges record.Intervals) []tsdbutil.Sample { +func deletedSamples(full []tsdbutil.Sample, dranges tombstones.Intervals) []tsdbutil.Sample { ds := make([]tsdbutil.Sample, 0, len(full)) Outer: for _, s := range full { @@ -1105,7 +1106,7 @@ func TestWalRepair_DecodingError(t *testing.T) { func(rec []byte) []byte { return rec[:3] }, - enc.Tombstones([]record.Stone{{Ref: 1, Intervals: record.Intervals{}}}, []byte{}), + enc.Tombstones([]tombstones.Stone{{Ref: 1, Intervals: tombstones.Intervals{}}}, []byte{}), 9, 5, }, diff --git a/mocks_test.go b/mocks_test.go index f48ea9aa..44d8e9a5 100644 --- a/mocks_test.go +++ b/mocks_test.go @@ -18,7 +18,7 @@ import ( "github.com/prometheus/tsdb/chunks" "github.com/prometheus/tsdb/index" "github.com/prometheus/tsdb/labels" - "github.com/prometheus/tsdb/record" + "github.com/prometheus/tsdb/tombstones" ) type mockIndexWriter struct { diff --git a/querier.go b/querier.go index 2b655106..0e905783 100644 --- a/querier.go +++ b/querier.go @@ -26,6 +26,7 @@ import ( "github.com/prometheus/tsdb/index" "github.com/prometheus/tsdb/labels" "github.com/prometheus/tsdb/record" + "github.com/prometheus/tsdb/tombstones" ) // Querier provides querying access over time series data of a fixed @@ -205,7 +206,7 @@ func NewBlockQuerier(b BlockReader, mint, maxt int64) (Querier, error) { type blockQuerier struct { index IndexReader chunks ChunkReader - tombstones record.TombstoneReader + tombstones tombstones.TombstoneReader closed bool @@ -671,7 +672,7 @@ func (s *mergedVerticalSeriesSet) Next() bool { // actual series itself. type ChunkSeriesSet interface { Next() bool - At() (labels.Labels, []chunks.Meta, record.Intervals) + At() (labels.Labels, []chunks.Meta, tombstones.Intervals) Err() error } @@ -680,19 +681,19 @@ type ChunkSeriesSet interface { type baseChunkSeries struct { p index.Postings index IndexReader - tombstones record.TombstoneReader + tombstones tombstones.TombstoneReader lset labels.Labels chks []chunks.Meta - intervals record.Intervals + intervals tombstones.Intervals err error } // LookupChunkSeries retrieves all series for the given matchers and returns a ChunkSeriesSet // over them. It drops chunks based on tombstones in the given reader. -func LookupChunkSeries(ir IndexReader, tr record.TombstoneReader, ms ...labels.Matcher) (ChunkSeriesSet, error) { +func LookupChunkSeries(ir IndexReader, tr tombstones.TombstoneReader, ms ...labels.Matcher) (ChunkSeriesSet, error) { if tr == nil { - tr = record.NewMemTombstones() + tr = tombstones.NewMemTombstones() } p, err := PostingsForMatchers(ir, ms...) if err != nil { @@ -705,7 +706,7 @@ func LookupChunkSeries(ir IndexReader, tr record.TombstoneReader, ms ...labels.M }, nil } -func (s *baseChunkSeries) At() (labels.Labels, []chunks.Meta, record.Intervals) { +func (s *baseChunkSeries) At() (labels.Labels, []chunks.Meta, tombstones.Intervals) { return s.lset, s.chks, s.intervals } @@ -741,7 +742,7 @@ func (s *baseChunkSeries) Next() bool { // Only those chunks that are not entirely deleted. chks := make([]chunks.Meta, 0, len(s.chks)) for _, chk := range s.chks { - if !(record.Interval{chk.MinTime, chk.MaxTime}.IsSubrange(s.intervals)) { + if !(tombstones.Interval{chk.MinTime, chk.MaxTime}.IsSubrange(s.intervals)) { chks = append(chks, chk) } } @@ -768,10 +769,10 @@ type populatedChunkSeries struct { err error chks []chunks.Meta lset labels.Labels - intervals record.Intervals + intervals tombstones.Intervals } -func (s *populatedChunkSeries) At() (labels.Labels, []chunks.Meta, record.Intervals) { +func (s *populatedChunkSeries) At() (labels.Labels, []chunks.Meta, tombstones.Intervals) { return s.lset, s.chks, s.intervals } @@ -866,7 +867,7 @@ type chunkSeries struct { mint, maxt int64 - intervals record.Intervals + intervals tombstones.Intervals } func (s *chunkSeries) Labels() labels.Labels { @@ -1067,7 +1068,7 @@ type chunkSeriesIterator struct { maxt, mint int64 - intervals record.Intervals + intervals tombstones.Intervals } func newChunkSeriesIterator(cs []chunks.Meta, dranges Intervals, mint, maxt int64) *chunkSeriesIterator { @@ -1169,7 +1170,7 @@ func (it *chunkSeriesIterator) Err() error { type deletedIterator struct { it chunkenc.Iterator - intervals record.Intervals + intervals tombstones.Intervals } func (it *deletedIterator) At() (int64, float64) { diff --git a/querier_test.go b/querier_test.go index a1bdf395..b24ca131 100644 --- a/querier_test.go +++ b/querier_test.go @@ -31,6 +31,7 @@ import ( "github.com/prometheus/tsdb/labels" "github.com/prometheus/tsdb/record" "github.com/prometheus/tsdb/testutil" + "github.com/prometheus/tsdb/tombstones" "github.com/prometheus/tsdb/tsdbutil" ) @@ -382,7 +383,7 @@ Outer: querier := &blockQuerier{ index: ir, chunks: cr, - tombstones: record.NewMemTombstones(), + tombstones: tombstones.NewMemTombstones(), mint: c.mint, maxt: c.maxt, @@ -429,7 +430,7 @@ func TestBlockQuerierDelete(t *testing.T) { cases := struct { data []seriesSamples - tombstones record.TombstoneReader + tombstones tombstones.TombstoneReader queries []query }{ data: []seriesSamples{ @@ -474,10 +475,10 @@ func TestBlockQuerierDelete(t *testing.T) { }, }, }, - tombstones: &record.MemTombstones{IntvlGroups: map[uint64]record.Intervals{ - 1: record.Intervals{{1, 3}}, - 2: record.Intervals{{1, 3}, {6, 10}}, - 3: record.Intervals{{6, 10}}, + tombstones: &tombstones.MemTombstones{IntvlGroups: map[uint64]tombstones.Intervals{ + 1: tombstones.Intervals{{1, 3}}, + 2: tombstones.Intervals{{1, 3}, {6, 10}}, + 3: tombstones.Intervals{{6, 10}}, }}, queries: []query{ { @@ -651,7 +652,7 @@ func TestBaseChunkSeries(t *testing.T) { bcs := &baseChunkSeries{ p: index.NewListPostings(tc.postings), index: mi, - tombstones: record.NewMemTombstones(), + tombstones: tombstones.NewMemTombstones(), } i := 0 @@ -1173,7 +1174,7 @@ func (m *mockChunkSeriesSet) Next() bool { return m.i < len(m.l) } -func (m *mockChunkSeriesSet) At() (labels.Labels, []chunks.Meta, record.Intervals) { +func (m *mockChunkSeriesSet) At() (labels.Labels, []chunks.Meta, tombstones.Intervals) { return m.l[m.i], m.cm[m.i], nil } @@ -1268,18 +1269,18 @@ func TestDeletedIterator(t *testing.T) { } cases := []struct { - r record.Intervals + r tombstones.Intervals }{ - {r: record.Intervals{{1, 20}}}, - {r: record.Intervals{{1, 10}, {12, 20}, {21, 23}, {25, 30}}}, - {r: record.Intervals{{1, 10}, {12, 20}, {20, 30}}}, - {r: record.Intervals{{1, 10}, {12, 23}, {25, 30}}}, - {r: record.Intervals{{1, 23}, {12, 20}, {25, 30}}}, - {r: record.Intervals{{1, 23}, {12, 20}, {25, 3000}}}, - {r: record.Intervals{{0, 2000}}}, - {r: record.Intervals{{500, 2000}}}, - {r: record.Intervals{{0, 200}}}, - {r: record.Intervals{{1000, 20000}}}, + {r: tombstones.Intervals{{1, 20}}}, + {r: tombstones.Intervals{{1, 10}, {12, 20}, {21, 23}, {25, 30}}}, + {r: tombstones.Intervals{{1, 10}, {12, 20}, {20, 30}}}, + {r: tombstones.Intervals{{1, 10}, {12, 23}, {25, 30}}}, + {r: tombstones.Intervals{{1, 23}, {12, 20}, {25, 30}}}, + {r: tombstones.Intervals{{1, 23}, {12, 20}, {25, 3000}}}, + {r: tombstones.Intervals{{0, 2000}}}, + {r: tombstones.Intervals{{500, 2000}}}, + {r: tombstones.Intervals{{0, 200}}}, + {r: tombstones.Intervals{{1000, 20000}}}, } for _, c := range cases { diff --git a/record/internal.go b/record/internal.go index 840023c6..dbc166db 100644 --- a/record/internal.go +++ b/record/internal.go @@ -14,8 +14,6 @@ package record import ( "errors" - "hash" - "hash/crc32" "math" "os" "path/filepath" @@ -40,21 +38,6 @@ var ( ErrAmendSample = errors.New("amending sample") ) -// The table gets initialized with sync.Once but may still cause a race -// with any other use of the crc32 package anywhere. Thus we initialize it -// before. -var castagnoliTable *crc32.Table - -func init() { - castagnoliTable = crc32.MakeTable(crc32.Castagnoli) -} - -// NewCRC32 initializes a CRC32 hash with a preconfigured polynomial, so the -// polynomial may be easily changed in one location at a later time, if necessary. -func NewCRC32() hash.Hash32 { - return crc32.New(castagnoliTable) -} - type sample struct { t int64 v float64 @@ -68,12 +51,6 @@ func (s sample) V() float64 { return s.v } -// SizeReader returns the size of the object in bytes. -type SizeReader interface { - // Size returns the size in bytes. - Size() int64 -} - // RefSeries is the series labels with the series ID. type RefSeries struct { Ref uint64 diff --git a/record/record.go b/record/record.go index 887f9275..cf854b1d 100644 --- a/record/record.go +++ b/record/record.go @@ -21,6 +21,7 @@ import ( "github.com/pkg/errors" "github.com/prometheus/tsdb/encoding" "github.com/prometheus/tsdb/labels" + "github.com/prometheus/tsdb/tombstones" ) // RecordType represents the data type of a record. @@ -123,16 +124,16 @@ func (d *RecordDecoder) Samples(rec []byte, samples []RefSample) ([]RefSample, e } // Tombstones appends tombstones in rec to the given slice. -func (d *RecordDecoder) Tombstones(rec []byte, tstones []Stone) ([]Stone, error) { +func (d *RecordDecoder) Tombstones(rec []byte, tstones []tombstones.Stone) ([]tombstones.Stone, error) { dec := encoding.Decbuf{B: rec} if RecordType(dec.Byte()) != RecordTombstones { return nil, errors.New("invalid record type") } for dec.Len() > 0 && dec.Err() == nil { - tstones = append(tstones, Stone{ + tstones = append(tstones, tombstones.Stone{ Ref: dec.Be64(), - Intervals: Intervals{ + Intervals: tombstones.Intervals{ {Mint: dec.Varint64(), Maxt: dec.Varint64()}, }, }) @@ -193,7 +194,7 @@ func (e *RecordEncoder) Samples(samples []RefSample, b []byte) []byte { } // Tombstones appends the encoded tombstones to b and returns the resulting slice. -func (e *RecordEncoder) Tombstones(tstones []Stone, b []byte) []byte { +func (e *RecordEncoder) Tombstones(tstones []tombstones.Stone, b []byte) []byte { buf := encoding.Encbuf{B: b} buf.PutByte(byte(RecordTombstones)) diff --git a/record/record_test.go b/record/record_test.go index fdc69514..b9705238 100644 --- a/record/record_test.go +++ b/record/record_test.go @@ -21,6 +21,7 @@ import ( "github.com/prometheus/tsdb/encoding" "github.com/prometheus/tsdb/labels" "github.com/prometheus/tsdb/testutil" + "github.com/prometheus/tsdb/tombstones" ) func TestRecord_EncodeDecode(t *testing.T) { @@ -54,23 +55,23 @@ func TestRecord_EncodeDecode(t *testing.T) { // Intervals get split up into single entries. So we don't get back exactly // what we put in. - tstones := []Stone{ - {Ref: 123, Intervals: Intervals{ + tstones := []tombstones.Stone{ + {Ref: 123, Intervals: tombstones.Intervals{ {Mint: -1000, Maxt: 1231231}, {Mint: 5000, Maxt: 0}, }}, - {Ref: 13, Intervals: Intervals{ + {Ref: 13, Intervals: tombstones.Intervals{ {Mint: -1000, Maxt: -11}, {Mint: 5000, Maxt: 1000}, }}, } decTstones, err := dec.Tombstones(enc.Tombstones(tstones, nil), nil) testutil.Ok(t, err) - testutil.Equals(t, []Stone{ - {Ref: 123, Intervals: Intervals{{Mint: -1000, Maxt: 1231231}}}, - {Ref: 123, Intervals: Intervals{{Mint: 5000, Maxt: 0}}}, - {Ref: 13, Intervals: Intervals{{Mint: -1000, Maxt: -11}}}, - {Ref: 13, Intervals: Intervals{{Mint: 5000, Maxt: 1000}}}, + testutil.Equals(t, []tombstones.Stone{ + {Ref: 123, Intervals: tombstones.Intervals{{Mint: -1000, Maxt: 1231231}}}, + {Ref: 123, Intervals: tombstones.Intervals{{Mint: 5000, Maxt: 0}}}, + {Ref: 13, Intervals: tombstones.Intervals{{Mint: -1000, Maxt: -11}}}, + {Ref: 13, Intervals: tombstones.Intervals{{Mint: 5000, Maxt: 1000}}}, }, decTstones) } @@ -104,8 +105,8 @@ func TestRecord_Corruputed(t *testing.T) { }) t.Run("Test corrupted tombstone record", func(t *testing.T) { - tstones := []Stone{ - {Ref: 123, Intervals: Intervals{ + tstones := []tombstones.Stone{ + {Ref: 123, Intervals: tombstones.Intervals{ {Mint: -1000, Maxt: 1231231}, {Mint: 5000, Maxt: 0}, }}, diff --git a/record/tombstones.go b/tombstones/tombstones.go similarity index 92% rename from record/tombstones.go rename to tombstones/tombstones.go index 23f62ee7..c655f06d 100644 --- a/record/tombstones.go +++ b/tombstones/tombstones.go @@ -11,11 +11,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -package record +package tombstones import ( "encoding/binary" "fmt" + "hash" + "hash/crc32" "io" "io/ioutil" "os" @@ -39,6 +41,21 @@ const ( tombstoneFormatV1 = 1 ) +// The table gets initialized with sync.Once but may still cause a race +// with any other use of the crc32 package anywhere. Thus we initialize it +// before. +var castagnoliTable *crc32.Table + +func init() { + castagnoliTable = crc32.MakeTable(crc32.Castagnoli) +} + +// NewCRC32 initializes a CRC32 hash with a preconfigured polynomial, so the +// polynomial may be easily changed in one location at a later time, if necessary. +func NewCRC32() hash.Hash32 { + return crc32.New(castagnoliTable) +} + // TombstoneReader gives access to tombstone intervals by series reference. type TombstoneReader interface { // Get returns deletion intervals for the series with the given reference. diff --git a/record/tombstones_test.go b/tombstones/tombstones_test.go similarity index 99% rename from record/tombstones_test.go rename to tombstones/tombstones_test.go index 1d8bb8e6..d30eff23 100644 --- a/record/tombstones_test.go +++ b/tombstones/tombstones_test.go @@ -11,7 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package record +package tombstones import ( "io/ioutil" diff --git a/wal.go b/wal.go index 27e4c0ba..08cbedfe 100644 --- a/wal.go +++ b/wal.go @@ -35,6 +35,7 @@ import ( "github.com/prometheus/tsdb/fileutil" "github.com/prometheus/tsdb/labels" "github.com/prometheus/tsdb/record" + "github.com/prometheus/tsdb/tombstones" "github.com/prometheus/tsdb/wal" ) @@ -92,7 +93,7 @@ type WAL interface { Reader() WALReader LogSeries([]record.RefSeries) error LogSamples([]record.RefSample) error - LogDeletes([]record.Stone) error + LogDeletes([]tombstones.Stone) error Truncate(mint int64, keep func(uint64) bool) error Close() error } @@ -102,7 +103,7 @@ type WALReader interface { Read( seriesf func([]record.RefSeries), samplesf func([]record.RefSample), - deletesf func([]record.Stone), + deletesf func([]tombstones.Stone), ) error } @@ -228,7 +229,7 @@ type repairingWALReader struct { func (r *repairingWALReader) Read( seriesf func([]record.RefSeries), samplesf func([]record.RefSample), - deletesf func([]record.Stone), + deletesf func([]tombstones.Stone), ) error { err := r.r.Read(seriesf, samplesf, deletesf) if err == nil { @@ -466,7 +467,7 @@ func (w *SegmentWAL) LogSamples(samples []record.RefSample) error { } // LogDeletes write a batch of new deletes to the log. -func (w *SegmentWAL) LogDeletes(stones []record.Stone) error { +func (w *SegmentWAL) LogDeletes(stones []tombstones.Stone) error { buf := w.getBuffer() flag := w.encodeDeletes(buf, stones) @@ -811,7 +812,7 @@ func (w *SegmentWAL) encodeSamples(buf *encoding.Encbuf, samples []record.RefSam return walSamplesSimple } -func (w *SegmentWAL) encodeDeletes(buf *encoding.Encbuf, stones []record.Stone) uint8 { +func (w *SegmentWAL) encodeDeletes(buf *encoding.Encbuf, stones []tombstones.Stone) uint8 { for _, s := range stones { for _, iv := range s.Intervals { buf.PutBE64(s.Ref) @@ -859,7 +860,7 @@ func (r *walReader) Err() error { func (r *walReader) Read( seriesf func([]record.RefSeries), samplesf func([]record.RefSample), - deletesf func([]record.Stone), + deletesf func([]tombstones.Stone), ) error { // Concurrency for replaying the WAL is very limited. We at least split out decoding and // processing into separate threads. @@ -890,7 +891,7 @@ func (r *walReader) Read( } //lint:ignore SA6002 safe to ignore and actually fixing it has some performance penalty. samplePool.Put(v[:0]) - case []record.Stone: + case []tombstones.Stone: if deletesf != nil { deletesf(v) } @@ -954,11 +955,11 @@ func (r *walReader) Read( } } case WALEntryDeletes: - var deletes []record.Stone + var deletes []tombstones.Stone if v := deletePool.Get(); v == nil { - deletes = make([]record.Stone, 0, 512) + deletes = make([]tombstones.Stone, 0, 512) } else { - deletes = v.([]record.Stone) + deletes = v.([]tombstones.Stone) } err = r.decodeDeletes(flag, b, &deletes) @@ -1168,13 +1169,13 @@ func (r *walReader) decodeSamples(flag byte, b []byte, res *[]record.RefSample) return nil } -func (r *walReader) decodeDeletes(flag byte, b []byte, res *[]record.Stone) error { +func (r *walReader) decodeDeletes(flag byte, b []byte, res *[]tombstones.Stone) error { dec := &encoding.Decbuf{B: b} for dec.Len() > 0 && dec.Err() == nil { - *res = append(*res, record.Stone{ + *res = append(*res, tombstones.Stone{ Ref: dec.Be64(), - Intervals: record.Intervals{ + Intervals: tombstones.Intervals{ {Mint: dec.Varint64(), Maxt: dec.Varint64()}, }, }) @@ -1270,7 +1271,7 @@ func MigrateWAL(logger log.Logger, dir string) (err error) { } err = repl.Log(enc.Samples(s, b[:0])) }, - func(s []record.Stone) { + func(s []tombstones.Stone) { if err != nil { return } diff --git a/wal/checkpoint.go b/wal/checkpoint.go index d9595ff3..1e9caa84 100644 --- a/wal/checkpoint.go +++ b/wal/checkpoint.go @@ -28,6 +28,7 @@ import ( tsdb_errors "github.com/prometheus/tsdb/errors" "github.com/prometheus/tsdb/fileutil" "github.com/prometheus/tsdb/record" + "github.com/prometheus/tsdb/tombstones" ) // CheckpointStats returns stats about a created checkpoint. @@ -151,7 +152,7 @@ func Checkpoint(w *WAL, from, to int, keep func(id uint64) bool, mint int64) (*C var ( series []record.RefSeries samples []record.RefSample - tstones []record.Stone + tstones []tombstones.Stone dec record.RecordDecoder enc record.RecordEncoder buf []byte diff --git a/wal/wal_watcher.go b/wal/wal_watcher.go index aa00acf8..be57975f 100644 --- a/wal/wal_watcher.go +++ b/wal/wal_watcher.go @@ -308,11 +308,7 @@ func (w *WALWatcher) watch(segmentNum int, tail bool) error { } defer segment.Close() -<<<<<<< HEAD - reader := NewLiveReader(w.logger, prometheus.DefaultRegisterer, segment) -======= reader := NewLiveReader(w.logger, w.reg, segment) ->>>>>>> WAL Watcher needs to take in and pass a Registerer to LiveReader. readTicker := time.NewTicker(readPeriod) defer readTicker.Stop() @@ -527,11 +523,7 @@ func (w *WALWatcher) readCheckpoint(checkpointDir string) error { } defer sr.Close() -<<<<<<< HEAD - r := NewLiveReader(w.logger, prometheus.DefaultRegisterer, sr) -======= r := NewLiveReader(w.logger, w.reg, sr) ->>>>>>> WAL Watcher needs to take in and pass a Registerer to LiveReader. if err := w.readSegment(r, index, false); err != io.EOF && err != nil { return errors.Wrap(err, "readSegment") } diff --git a/wal_test.go b/wal_test.go index c9a9168d..c84a899d 100644 --- a/wal_test.go +++ b/wal_test.go @@ -31,6 +31,7 @@ import ( "github.com/prometheus/tsdb/labels" "github.com/prometheus/tsdb/record" "github.com/prometheus/tsdb/testutil" + "github.com/prometheus/tsdb/tombstones" "github.com/prometheus/tsdb/wal" ) @@ -175,7 +176,7 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { var ( recordedSeries [][]record.RefSeries recordedSamples [][]record.RefSample - recordedDeletes [][]record.Stone + recordedDeletes [][]tombstones.Stone ) var totalSamples int @@ -193,7 +194,7 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { var ( resultSeries [][]record.RefSeries resultSamples [][]record.RefSample - resultDeletes [][]record.Stone + resultDeletes [][]tombstones.Stone ) serf := func(series []record.RefSeries) { @@ -211,9 +212,9 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { } } - delf := func(stones []record.Stone) { + delf := func(stones []tombstones.Stone) { if len(stones) > 0 { - cst := make([]record.Stone, len(stones)) + cst := make([]tombstones.Stone, len(stones)) copy(cst, stones) resultDeletes = append(resultDeletes, cst) } @@ -230,7 +231,7 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { // Insert in batches and generate different amounts of samples for each. for i := 0; i < len(series); i += stepSize { var samples []record.RefSample - var stones []record.Stone + var stones []tombstones.Stone for j := 0; j < i*10; j++ { samples = append(samples, record.RefSample{ @@ -242,7 +243,7 @@ func TestSegmentWAL_Log_Restore(t *testing.T) { for j := 0; j < i*20; j++ { ts := rand.Int63() - stones = append(stones, record.Stone{rand.Uint64(), record.Intervals{{ts, ts + rand.Int63n(10000)}}}) + stones = append(stones, tombstones.Stone{rand.Uint64(), tombstones.Intervals{{ts, ts + rand.Int63n(10000)}}}) } lbls := series[i : i+stepSize] @@ -498,8 +499,8 @@ func TestMigrateWAL_Fuzz(t *testing.T) { {Ref: 3, T: 100, V: 200}, {Ref: 4, T: 300, V: 400}, })) - testutil.Ok(t, oldWAL.LogDeletes([]record.Stone{ - {Ref: 1, Intervals: []record.Interval{{100, 200}}}, + testutil.Ok(t, oldWAL.LogDeletes([]tombstones.Stone{ + {Ref: 1, Intervals: []tombstones.Interval{{100, 200}}}, })) testutil.Ok(t, oldWAL.Close()) @@ -558,7 +559,7 @@ func TestMigrateWAL_Fuzz(t *testing.T) { {Ref: 200, Labels: labels.FromStrings("xyz", "def", "foo", "bar")}, }, []record.RefSample{{Ref: 3, T: 100, V: 200}, {Ref: 4, T: 300, V: 400}}, - []record.Stone{{Ref: 1, Intervals: []record.Interval{{100, 200}}}}, + []tombstones.Stone{{Ref: 1, Intervals: []tombstones.Interval{{100, 200}}}}, []record.RefSample{{Ref: 500, T: 1, V: 1}}, }, res) From adfe7f438604105025e7010492ccbe56090458af Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Thu, 20 Jun 2019 14:21:05 -0700 Subject: [PATCH 06/16] Fix some stuff I broke during rebase. Signed-off-by: Callum Styan --- compact.go | 4 ++-- go.mod | 1 - go.sum | 2 -- head.go | 19 +++---------------- head_test.go | 36 ++++++++++++++++++------------------ record/internal.go | 12 ++++++------ tombstones/tombstones.go | 12 ++++++------ wal/wal_watcher.go | 5 +++-- 8 files changed, 38 insertions(+), 53 deletions(-) diff --git a/compact.go b/compact.go index 01a4bee1..8fa8edfd 100644 --- a/compact.go +++ b/compact.go @@ -608,7 +608,7 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blocks ...BlockRe } // Create an empty tombstones file. - if _, err := tombstones.WriteTombstoneFile(c.logger, tmp, record.NewMemTombstones()); err != nil { + if _, err := tombstones.WriteTombstoneFile(c.logger, tmp, tombstones.NewMemTombstones()); err != nil { return errors.Wrap(err, "write new tombstones file") } @@ -769,7 +769,7 @@ func (c *LeveledCompactor) populateBlock(blocks []BlockReader, meta *BlockMeta, // // TODO think how to avoid the typecasting to verify when it is head block. if _, isHeadChunk := chk.Chunk.(*safeChunk); isHeadChunk && chk.MaxTime >= meta.MaxTime { - dranges = append(dranges, Interval{Mint: meta.MaxTime, Maxt: math.MaxInt64}) + dranges = append(dranges, tombstones.Interval{Mint: meta.MaxTime, Maxt: math.MaxInt64}) } else // Sanity check for disk blocks. diff --git a/go.mod b/go.mod index c75e4ed7..ccdd4372 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,6 @@ require ( github.com/oklog/ulid v1.3.1 github.com/pkg/errors v0.8.0 github.com/prometheus/client_golang v1.0.0 - github.com/prometheus/prometheus v2.5.0+incompatible golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4 golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5 gopkg.in/alecthomas/kingpin.v2 v2.2.6 diff --git a/go.sum b/go.sum index ad7f9516..e854d810 100644 --- a/go.sum +++ b/go.sum @@ -59,8 +59,6 @@ github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d h1:GoAlyOgbOEIFd github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2 h1:6LJUbpNm42llc4HRCuvApCSWB/WfhuNo9K98Q9sNGfs= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= -github.com/prometheus/prometheus v2.5.0+incompatible h1:7QPitgO2kOFG8ecuRn9O/4L9+10He72rVRJvMXrE9Hg= -github.com/prometheus/prometheus v2.5.0+incompatible/go.mod h1:oAIUtOny2rjMX0OWN5vPR5/q/twIROJvdqnQKDdil/s= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ= github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= diff --git a/head.go b/head.go index b74b7daa..0c2675c8 100644 --- a/head.go +++ b/head.go @@ -351,20 +351,11 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { } var ( -<<<<<<< HEAD - dec RecordDecoder - series []RefSeries - samples []RefSample - tstones []Stone - allStones = newMemTombstones() -======= dec record.RecordDecoder series []record.RefSeries samples []record.RefSample tstones []tombstones.Stone allStones = tombstones.NewMemTombstones() - err error ->>>>>>> Move tombstones to it's own package. ) defer func() { if err := allStones.Close(); err != nil { @@ -389,7 +380,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { series, created := h.getOrCreateWithID(s.Ref, s.Labels.Hash(), s.Labels) if !created { - // There's already a different ref for this series. + // There's already a different Ref for this series. multiRefLock.Lock() multiRef[s.Ref] = series.Ref multiRefLock.Unlock() @@ -478,15 +469,11 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { } wg.Wait() -<<<<<<< HEAD if r.Err() != nil { return errors.Wrap(r.Err(), "read records") } - if err := allStones.Iter(func(ref uint64, dranges Intervals) error { -======= if err := allStones.Iter(func(ref uint64, dranges tombstones.Intervals) error { ->>>>>>> Move tombstones to it's own package. return h.chunkRewrite(ref, dranges) }); err != nil { return errors.Wrap(r.Err(), "deleting samples from tombstones") @@ -1347,8 +1334,8 @@ func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]chunks continue } // Set the head chunks as open (being appended to). - maxTime := c.maxTime - if s.headChunk == c { + maxTime := c.MaxTime + if s.HeadChunk == c { maxTime = math.MaxInt64 } diff --git a/head_test.go b/head_test.go index f297fcc6..7b4caded 100644 --- a/head_test.go +++ b/head_test.go @@ -102,28 +102,28 @@ func TestHead_ReadWAL(t *testing.T) { for _, compress := range []bool{false, true} { t.Run(fmt.Sprintf("compress=%t", compress), func(t *testing.T) { entries := []interface{}{ - []RefSeries{ + []record.RefSeries{ {Ref: 10, Labels: labels.FromStrings("a", "1")}, {Ref: 11, Labels: labels.FromStrings("a", "2")}, {Ref: 100, Labels: labels.FromStrings("a", "3")}, }, - []RefSample{ + []record.RefSample{ {Ref: 0, T: 99, V: 1}, {Ref: 10, T: 100, V: 2}, {Ref: 100, T: 100, V: 3}, }, - []RefSeries{ + []record.RefSeries{ {Ref: 50, Labels: labels.FromStrings("a", "4")}, // This series has two refs pointing to it. {Ref: 101, Labels: labels.FromStrings("a", "3")}, }, - []RefSample{ + []record.RefSample{ {Ref: 10, T: 101, V: 5}, {Ref: 50, T: 101, V: 6}, {Ref: 101, T: 101, V: 7}, }, - []Stone{ - {ref: 0, intervals: []Interval{{Mint: 99, Maxt: 101}}}, + []tombstones.Stone{ + {Ref: 0, Intervals: []tombstones.Interval{{Mint: 99, Maxt: 101}}}, }, } dir, err := ioutil.TempDir("", "test_read_wal") @@ -148,10 +148,10 @@ func TestHead_ReadWAL(t *testing.T) { s50 := head.series.getByID(50) s100 := head.series.getByID(100) - testutil.Equals(t, labels.FromStrings("a", "1"), s10.lset) - testutil.Equals(t, (*memSeries)(nil), s11) // Series without samples should be garbage colected at head.Init(). - testutil.Equals(t, labels.FromStrings("a", "4"), s50.lset) - testutil.Equals(t, labels.FromStrings("a", "3"), s100.lset) + testutil.Equals(t, labels.FromStrings("a", "1"), s10.Lset) + testutil.Equals(t, (*record.MemSeries)(nil), s11) // Series without samples should be garbage colected at head.Init(). + testutil.Equals(t, labels.FromStrings("a", "4"), s50.Lset) + testutil.Equals(t, labels.FromStrings("a", "3"), s100.Lset) expandChunk := func(c chunkenc.Iterator) (x []sample) { for c.Next() { @@ -328,14 +328,14 @@ func TestHeadDeleteSeriesWithoutSamples(t *testing.T) { for _, compress := range []bool{false, true} { t.Run(fmt.Sprintf("compress=%t", compress), func(t *testing.T) { entries := []interface{}{ - []RefSeries{ + []record.RefSeries{ {Ref: 10, Labels: labels.FromStrings("a", "1")}, }, - []RefSample{}, - []RefSeries{ + []record.RefSample{}, + []record.RefSeries{ {Ref: 50, Labels: labels.FromStrings("a", "2")}, }, - []RefSample{ + []record.RefSample{ {Ref: 50, T: 80, V: 1}, {Ref: 50, T: 90, V: 1}, }, @@ -1057,9 +1057,9 @@ func TestHead_LogRollback(t *testing.T) { testutil.Equals(t, 1, len(recs)) - series, ok := recs[0].([]RefSeries) + series, ok := recs[0].([]record.RefSeries) testutil.Assert(t, ok, "expected series record but got %+v", recs[0]) - testutil.Equals(t, []RefSeries{{Ref: 1, Labels: labels.FromStrings("a", "b")}}, series) + testutil.Equals(t, []record.RefSeries{{Ref: 1, Labels: labels.FromStrings("a", "b")}}, series) }) } } @@ -1067,7 +1067,7 @@ func TestHead_LogRollback(t *testing.T) { // TestWalRepair_DecodingError ensures that a repair is run for an error // when decoding a record. func TestWalRepair_DecodingError(t *testing.T) { - var enc RecordEncoder + var enc record.RecordEncoder for name, test := range map[string]struct { corrFunc func(rec []byte) []byte // Func that applies the corruption to a record. rec []byte @@ -1079,7 +1079,7 @@ func TestWalRepair_DecodingError(t *testing.T) { // Do not modify the base record because it is Logged multiple times. res := make([]byte, len(rec)) copy(res, rec) - res[0] = byte(RecordInvalid) + res[0] = byte(record.RecordInvalid) return res }, enc.Series([]record.RefSeries{{Ref: 1, Labels: labels.FromStrings("a", "b")}}, []byte{}), diff --git a/record/internal.go b/record/internal.go index dbc166db..89a0cc2d 100644 --- a/record/internal.go +++ b/record/internal.go @@ -74,8 +74,8 @@ type MemSeries struct { PendingCommit bool // Whether there are samples waiting to be committed to this series. Chunks []*MemChunk Lset labels.Labels + HeadChunk *MemChunk - headChunk *MemChunk chunkRange int64 firstChunkID int @@ -117,7 +117,7 @@ func (s *MemSeries) cut(mint int64) *MemChunk { MaxTime: math.MinInt64, } s.Chunks = append(s.Chunks, c) - s.headChunk = c + s.HeadChunk = c // Set upper bound on when the next chunk must be started. An earlier timestamp // may be chosen dynamically at a later point. @@ -143,7 +143,7 @@ func (s *MemSeries) ChunksMetas() []chunks.Meta { // and 'chunkRange', like how it would appear after 'newMemSeries(...)'. func (s *MemSeries) Reset() { s.Chunks = nil - s.headChunk = nil + s.HeadChunk = nil s.firstChunkID = 0 s.nextAt = math.MinInt64 s.sampleBuf = [4]sample{} @@ -197,9 +197,9 @@ func (s *MemSeries) TruncateChunksBefore(mint int64) (removed int) { s.Chunks = append(s.Chunks[:0], s.Chunks[k:]...) s.firstChunkID += k if len(s.Chunks) == 0 { - s.headChunk = nil + s.HeadChunk = nil } else { - s.headChunk = s.Chunks[len(s.Chunks)-1] + s.HeadChunk = s.Chunks[len(s.Chunks)-1] } return k @@ -270,7 +270,7 @@ func (s *MemSeries) Iterator(id int) chunkenc.Iterator { } func (s *MemSeries) head() *MemChunk { - return s.headChunk + return s.HeadChunk } type MemChunk struct { diff --git a/tombstones/tombstones.go b/tombstones/tombstones.go index c655f06d..8b79632b 100644 --- a/tombstones/tombstones.go +++ b/tombstones/tombstones.go @@ -50,9 +50,9 @@ func init() { castagnoliTable = crc32.MakeTable(crc32.Castagnoli) } -// NewCRC32 initializes a CRC32 hash with a preconfigured polynomial, so the +// newCRC32 initializes a CRC32 hash with a preconfigured polynomial, so the // polynomial may be easily changed in one location at a later time, if necessary. -func NewCRC32() hash.Hash32 { +func newCRC32() hash.Hash32 { return crc32.New(castagnoliTable) } @@ -72,7 +72,7 @@ type TombstoneReader interface { } func WriteTombstoneFile(logger log.Logger, dir string, tr TombstoneReader) (int64, error) { - path := filepath.Join(dir, tombstoneFilename) + path := filepath.Join(dir, TombstoneFilename) tmp := path + ".tmp" hash := newCRC32() var size int @@ -151,9 +151,9 @@ type Stone struct { } func ReadTombstones(dir string) (TombstoneReader, int64, error) { - b, err := ioutil.ReadFile(filepath.Join(dir, tombstoneFilename)) + b, err := ioutil.ReadFile(filepath.Join(dir, TombstoneFilename)) if os.IsNotExist(err) { - return newMemTombstones(), 0, nil + return NewMemTombstones(), 0, nil } else if err != nil { return nil, 0, err } @@ -175,7 +175,7 @@ func ReadTombstones(dir string) (TombstoneReader, int64, error) { } // Verify checksum. - hash := NewCRC32() + hash := newCRC32() if _, err := hash.Write(d.Get()); err != nil { return nil, 0, errors.Wrap(err, "write to hash") } diff --git a/wal/wal_watcher.go b/wal/wal_watcher.go index be57975f..bc21994d 100644 --- a/wal/wal_watcher.go +++ b/wal/wal_watcher.go @@ -76,6 +76,7 @@ var ( }, []string{consumer}, ) + lrMetrics = NewLiveReaderMetrics(prometheus.DefaultRegisterer) ) // This function is copied from prometheus/prometheus/pkg/timestamp to avoid adding vendor to TSDB repo. @@ -308,7 +309,7 @@ func (w *WALWatcher) watch(segmentNum int, tail bool) error { } defer segment.Close() - reader := NewLiveReader(w.logger, w.reg, segment) + reader := NewLiveReader(w.logger, lrMetrics, segment) readTicker := time.NewTicker(readPeriod) defer readTicker.Stop() @@ -523,7 +524,7 @@ func (w *WALWatcher) readCheckpoint(checkpointDir string) error { } defer sr.Close() - r := NewLiveReader(w.logger, w.reg, sr) + r := NewLiveReader(w.logger, lrMetrics, sr) if err := w.readSegment(r, index, false); err != io.EOF && err != nil { return errors.Wrap(err, "readSegment") } From f2c8171016ce1f3e572d0efa67ff3fbbc5fbb051 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Wed, 3 Jul 2019 18:31:48 -0700 Subject: [PATCH 07/16] Keep memSeries in head.go Signed-off-by: Callum Styan --- head.go | 382 +++++++++++++++++++++++++++++++++++++++------ head_test.go | 62 ++++---- querier_test.go | 13 -- record/internal.go | 269 +------------------------------ 4 files changed, 364 insertions(+), 362 deletions(-) diff --git a/head.go b/head.go index 0c2675c8..fa385ca6 100644 --- a/head.go +++ b/head.go @@ -55,6 +55,7 @@ type Head struct { wal *wal.WAL logger log.Logger appendPool sync.Pool + seriesPool sync.Pool bytesPool sync.Pool numSeries uint64 @@ -252,7 +253,7 @@ func (h *Head) processWALSamples( defer close(output) // Mitigate lock contention in getByID. - refSeries := map[uint64]*record.MemSeries{} + refSeries := map[uint64]*memSeries{} mint, maxt := int64(math.MaxInt64), int64(math.MinInt64) @@ -382,7 +383,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { if !created { // There's already a different Ref for this series. multiRefLock.Lock() - multiRef[s.Ref] = series.Ref + multiRef[s.Ref] = series.ref multiRefLock.Unlock() } @@ -770,6 +771,7 @@ func (h *Head) appender() *headAppender { mint: math.MaxInt64, maxt: math.MinInt64, samples: h.getAppendBuffer(), + sampleSeries: h.getSeriesBuffer(), } } @@ -793,6 +795,19 @@ func (h *Head) putAppendBuffer(b []record.RefSample) { h.appendPool.Put(b[:0]) } +func (h *Head) getSeriesBuffer() []*memSeries { + b := h.seriesPool.Get() + if b == nil { + return make([]*memSeries, 0, 512) + } + return b.([]*memSeries) +} + +func (h *Head) putSeriesBuffer(b []*memSeries) { + //lint:ignore SA6002 safe to ignore and actually fixing it has some performance penalty. + h.seriesPool.Put(b[:0]) +} + func (h *Head) getBytesBuffer() []byte { b := h.bytesPool.Get() if b == nil { @@ -811,8 +826,9 @@ type headAppender struct { minValidTime int64 // No samples below this timestamp are allowed. mint, maxt int64 - series []record.RefSeries - samples []record.RefSample + series []record.RefSeries + samples []record.RefSample + sampleSeries []*memSeries } func (a *headAppender) Add(lset labels.Labels, t int64, v float64) (uint64, error) { @@ -826,11 +842,11 @@ func (a *headAppender) Add(lset labels.Labels, t int64, v float64) (uint64, erro s, created := a.head.getOrCreate(lset.Hash(), lset) if created { a.series = append(a.series, record.RefSeries{ - Ref: s.Ref, + Ref: s.ref, Labels: lset, }) } - return s.Ref, a.AddFast(s.Ref, t, v) + return s.ref, a.AddFast(s.ref, t, v) } func (a *headAppender) AddFast(ref uint64, t int64, v float64) error { @@ -847,7 +863,7 @@ func (a *headAppender) AddFast(ref uint64, t int64, v float64) error { s.Unlock() return err } - s.PendingCommit = true + s.pendingCommit = true s.Unlock() if t < a.mint { @@ -858,11 +874,11 @@ func (a *headAppender) AddFast(ref uint64, t int64, v float64) error { } a.samples = append(a.samples, record.RefSample{ - Ref: ref, - T: t, - V: v, - Series: s, + Ref: ref, + T: t, + V: v, }) + a.sampleSeries = append(a.sampleSeries, s) return nil } @@ -899,18 +915,20 @@ func (a *headAppender) log() error { func (a *headAppender) Commit() error { defer a.head.metrics.activeAppenders.Dec() defer a.head.putAppendBuffer(a.samples) + defer a.head.putSeriesBuffer(a.sampleSeries) if err := a.log(); err != nil { return errors.Wrap(err, "write to WAL") } + var series *memSeries total := len(a.samples) - - for _, s := range a.samples { - s.Series.Lock() - ok, chunkCreated := s.Series.Append(s.T, s.V) - s.Series.PendingCommit = false - s.Series.Unlock() + for i, s := range a.samples { + series = a.sampleSeries[i] + series.Lock() + ok, chunkCreated := series.Append(s.T, s.V) + series.pendingCommit = false + series.Unlock() if !ok { total-- @@ -929,10 +947,12 @@ func (a *headAppender) Commit() error { func (a *headAppender) Rollback() error { a.head.metrics.activeAppenders.Dec() - for _, s := range a.samples { - s.Series.Lock() - s.Series.PendingCommit = false - s.Series.Unlock() + var series *memSeries + for i := range a.samples { + series = a.sampleSeries[i] + series.Lock() + series.pendingCommit = false + series.Unlock() } a.head.putAppendBuffer(a.samples) @@ -1004,7 +1024,7 @@ func (h *Head) chunkRewrite(ref uint64, dranges tombstones.Intervals) (err error ms := h.series.getByID(ref) ms.Lock() defer ms.Unlock() - if len(ms.Chunks) == 0 { + if len(ms.chunks) == 0 { return nil } @@ -1216,7 +1236,7 @@ func (h *headChunkReader) Chunk(ref uint64) (chunkenc.Chunk, error) { type safeChunk struct { chunkenc.Chunk - s *record.MemSeries + s *memSeries cid int } @@ -1286,7 +1306,7 @@ func (h *headIndexReader) Postings(name, value string) (index.Postings, error) { } func (h *headIndexReader) SortedPostings(p index.Postings) index.Postings { - series := make([]*record.MemSeries, 0, 128) + series := make([]*memSeries, 0, 128) // Fetch all the series only once. for p.Next() { @@ -1302,13 +1322,13 @@ func (h *headIndexReader) SortedPostings(p index.Postings) index.Postings { } sort.Slice(series, func(i, j int) bool { - return labels.Compare(series[i].Lset, series[j].Lset) < 0 + return labels.Compare(series[i].lset, series[j].lset) < 0 }) // Convert back to list. ep := make([]uint64, 0, len(series)) for _, p := range series { - ep = append(ep, p.Ref) + ep = append(ep, p.ref) } return index.NewListPostings(ep) } @@ -1321,21 +1341,21 @@ func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]chunks h.head.metrics.seriesNotFound.Inc() return record.ErrNotFound } - *lbls = append((*lbls)[:0], s.Lset...) + *lbls = append((*lbls)[:0], s.lset...) s.Lock() defer s.Unlock() *chks = (*chks)[:0] - for i, c := range s.Chunks { + for i, c := range s.chunks { // Do not expose chunks that are outside of the specified range. if !c.OverlapsClosedInterval(h.mint, h.maxt) { continue } // Set the head chunks as open (being appended to). maxTime := c.MaxTime - if s.HeadChunk == c { + if s.headChunk == c { maxTime = math.MaxInt64 } @@ -1359,7 +1379,7 @@ func (h *headIndexReader) LabelIndices() ([][]string, error) { return res, nil } -func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*record.MemSeries, bool) { +func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*memSeries, bool) { // Just using `getOrSet` below would be semantically sufficient, but we'd create // a new series on every sample inserted via Add(), which causes allocations // and makes our series IDs rather random and harder to compress in postings. @@ -1374,8 +1394,8 @@ func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*record.MemSeries, return h.getOrCreateWithID(id, hash, lset) } -func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*record.MemSeries, bool) { - s := record.NewMemSeries(lset, id, h.chunkRange) +func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSeries, bool) { + s := newMemSeries(lset, id, h.chunkRange) s, created := h.series.getOrSet(hash, s) if !created { @@ -1409,21 +1429,21 @@ func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*record.M // on top of a regular hashmap and holds a slice of series to resolve hash collisions. // Its methods require the hash to be submitted with it to avoid re-computations throughout // the code. -type seriesHashmap map[uint64][]*record.MemSeries +type seriesHashmap map[uint64][]*memSeries -func (m seriesHashmap) get(hash uint64, lset labels.Labels) *record.MemSeries { +func (m seriesHashmap) get(hash uint64, lset labels.Labels) *memSeries { for _, s := range m[hash] { - if s.Lset.Equals(lset) { + if s.lset.Equals(lset) { return s } } return nil } -func (m seriesHashmap) set(hash uint64, s *record.MemSeries) { +func (m seriesHashmap) set(hash uint64, s *memSeries) { l := m[hash] for i, prev := range l { - if prev.Lset.Equals(s.Lset) { + if prev.lset.Equals(s.lset) { l[i] = s return } @@ -1432,9 +1452,9 @@ func (m seriesHashmap) set(hash uint64, s *record.MemSeries) { } func (m seriesHashmap) del(hash uint64, lset labels.Labels) { - var rem []*record.MemSeries + var rem []*memSeries for _, s := range m[hash] { - if !s.Lset.Equals(lset) { + if !s.lset.Equals(lset) { rem = append(rem, s) } } @@ -1450,7 +1470,7 @@ func (m seriesHashmap) del(hash uint64, lset labels.Labels) { // with the maps was profiled to be slower – likely due to the additional pointer // dereferences. type stripeSeries struct { - series [stripeSize]map[uint64]*record.MemSeries + series [stripeSize]map[uint64]*memSeries hashes [stripeSize]seriesHashmap locks [stripeSize]stripeLock } @@ -1470,7 +1490,7 @@ func newStripeSeries() *stripeSeries { s := &stripeSeries{} for i := range s.series { - s.series[i] = map[uint64]*record.MemSeries{} + s.series[i] = map[uint64]*memSeries{} } for i := range s.hashes { s.hashes[i] = seriesHashmap{} @@ -1495,7 +1515,7 @@ func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) { series.Lock() rmChunks += series.TruncateChunksBefore(mint) - if len(series.Chunks) > 0 || series.PendingCommit { + if len(series.chunks) > 0 || series.pendingCommit { series.Unlock() continue } @@ -1505,15 +1525,15 @@ func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) { // series alike. // If we don't hold them all, there's a very small chance that a series receives // samples again while we are half-way into deleting it. - j := int(series.Ref & stripeMask) + j := int(series.ref & stripeMask) if i != j { s.locks[j].Lock() } - deleted[series.Ref] = struct{}{} - s.hashes[i].del(hash, series.Lset) - delete(s.series[j], series.Ref) + deleted[series.ref] = struct{}{} + s.hashes[i].del(hash, series.lset) + delete(s.series[j], series.ref) if i != j { s.locks[j].Unlock() @@ -1529,7 +1549,7 @@ func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) { return deleted, rmChunks } -func (s *stripeSeries) getByID(id uint64) *record.MemSeries { +func (s *stripeSeries) getByID(id uint64) *memSeries { i := id & stripeMask s.locks[i].RLock() @@ -1539,7 +1559,7 @@ func (s *stripeSeries) getByID(id uint64) *record.MemSeries { return series } -func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *record.MemSeries { +func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *memSeries { i := hash & stripeMask s.locks[i].RLock() @@ -1549,27 +1569,285 @@ func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *record.MemSer return series } -func (s *stripeSeries) getOrSet(hash uint64, series *record.MemSeries) (*record.MemSeries, bool) { +func (s *stripeSeries) getOrSet(hash uint64, series *memSeries) (*memSeries, bool) { i := hash & stripeMask s.locks[i].Lock() - if prev := s.hashes[i].get(hash, series.Lset); prev != nil { + if prev := s.hashes[i].get(hash, series.lset); prev != nil { s.locks[i].Unlock() return prev, false } s.hashes[i].set(hash, series) s.locks[i].Unlock() - i = series.Ref & stripeMask + i = series.ref & stripeMask s.locks[i].Lock() - s.series[i][series.Ref] = series + s.series[i][series.ref] = series s.locks[i].Unlock() return series, true } +type sample struct { + t int64 + v float64 +} + +func (s sample) T() int64 { + return s.t +} + +func (s sample) V() float64 { + return s.v +} + +// memSeries is the in-memory representation of a series. None of its methods +// are goroutine safe and it is the caller's responsibility to lock it. +type memSeries struct { + sync.Mutex + + ref uint64 + chunks []*memChunk + lset labels.Labels + headChunk *memChunk + + chunkRange int64 + firstChunkID int + + nextAt int64 // Timestamp at which to cut the next chunk. + sampleBuf [4]sample + pendingCommit bool // Whether there are samples waiting to be committed to this series. + + app chunkenc.Appender // Current appender for the chunk. +} + +func newMemSeries(lset labels.Labels, id uint64, chunkRange int64) *memSeries { + s := &memSeries{ + lset: lset, + ref: id, + chunkRange: chunkRange, + nextAt: math.MinInt64, + } + return s +} + +func (s *memSeries) MinTime() int64 { + if len(s.chunks) == 0 { + return math.MinInt64 + } + return s.chunks[0].MinTime +} + +func (s *memSeries) MaxTime() int64 { + c := s.head() + if c == nil { + return math.MinInt64 + } + return c.MaxTime +} + +func (s *memSeries) cut(mint int64) *memChunk { + c := &memChunk{ + Chunk: chunkenc.NewXORChunk(), + MinTime: mint, + MaxTime: math.MinInt64, + } + s.chunks = append(s.chunks, c) + s.headChunk = c + + // Set upper bound on when the next chunk must be started. An earlier timestamp + // may be chosen dynamically at a later point. + s.nextAt = rangeForTimestamp(mint, s.chunkRange) + + app, err := c.Chunk.Appender() + if err != nil { + panic(err) + } + s.app = app + return c +} + +func (s *memSeries) ChunksMetas() []chunks.Meta { + metas := make([]chunks.Meta, 0, len(s.chunks)) + for _, chk := range s.chunks { + metas = append(metas, chunks.Meta{Chunk: chk.Chunk, MinTime: chk.MinTime, MaxTime: chk.MaxTime}) + } + return metas +} + +// reset re-initialises all the variable in the memSeries except 'lset', 'ref', +// and 'chunkRange', like how it would appear after 'newmemSeries(...)'. +func (s *memSeries) Reset() { + s.chunks = nil + s.headChunk = nil + s.firstChunkID = 0 + s.nextAt = math.MinInt64 + s.sampleBuf = [4]sample{} + s.pendingCommit = false + s.app = nil +} + +// Appendable checks whether the given sample is valid for appending to the series. +func (s *memSeries) Appendable(t int64, v float64) error { + c := s.head() + if c == nil { + return nil + } + + if t > c.MaxTime { + return nil + } + if t < c.MaxTime { + return record.ErrOutOfOrderSample + } + // We are allowing exact duplicates as we can encounter them in valid cases + // like federation and erroring out at that time would be extremely noisy. + if math.Float64bits(s.sampleBuf[3].v) != math.Float64bits(v) { + return record.ErrAmendSample + } + return nil +} + +func (s *memSeries) Chunk(id int) *memChunk { + ix := id - s.firstChunkID + if ix < 0 || ix >= len(s.chunks) { + return nil + } + return s.chunks[ix] +} + +func (s *memSeries) ChunkID(pos int) int { + return pos + s.firstChunkID +} + +// TruncateChunksBefore removes all chunks from the series that have not timestamp +// at or after mint. Chunk IDs remain unchanged. +func (s *memSeries) TruncateChunksBefore(mint int64) (removed int) { + var k int + for i, c := range s.chunks { + if c.MaxTime >= mint { + break + } + k = i + 1 + } + s.chunks = append(s.chunks[:0], s.chunks[k:]...) + s.firstChunkID += k + if len(s.chunks) == 0 { + s.headChunk = nil + } else { + s.headChunk = s.chunks[len(s.chunks)-1] + } + + return k +} + +// Append adds the sample (t, v) to the series. +func (s *memSeries) Append(t int64, v float64) (success, chunkCreated bool) { + // Based on Gorilla white papers this offers near-optimal compression ratio + // so anything bigger that this has diminishing returns and increases + // the time range within which we have to decompress all samples. + const samplesPerChunk = 120 + + c := s.head() + + if c == nil { + c = s.cut(t) + chunkCreated = true + } + numSamples := c.Chunk.NumSamples() + + // Out of order sample. + if c.MaxTime >= t { + return false, chunkCreated + } + // If we reach 25% of a chunk's desired sample count, set a definitive time + // at which to start the next chunk. + // At latest it must happen at the timestamp set when the chunk was cut. + if numSamples == samplesPerChunk/4 { + s.nextAt = computeChunkEndTime(c.MinTime, c.MaxTime, s.nextAt) + } + if t >= s.nextAt { + c = s.cut(t) + chunkCreated = true + } + s.app.Append(t, v) + + c.MaxTime = t + + s.sampleBuf[0] = s.sampleBuf[1] + s.sampleBuf[1] = s.sampleBuf[2] + s.sampleBuf[2] = s.sampleBuf[3] + s.sampleBuf[3] = sample{t: t, v: v} + + return true, chunkCreated +} + +func (s *memSeries) Iterator(id int) chunkenc.Iterator { + c := s.Chunk(id) + // TODO(fabxc): Work around! A querier may have retrieved a pointer to a series' chunk, + // which got then garbage collected before it got accessed. + // We must ensure to not garbage collect as long as any readers still hold a reference. + if c == nil { + return chunkenc.NewNopIterator() + } + + if id-s.firstChunkID < len(s.chunks)-1 { + return c.Chunk.Iterator() + } + // Serve the last 4 samples for the last chunk from the sample buffer + // as their compressed bytes may be mutated by added samples. + it := &memSafeIterator{ + Iterator: c.Chunk.Iterator(), + i: -1, + total: c.Chunk.NumSamples(), + buf: s.sampleBuf, + } + return it +} + +func (s *memSeries) head() *memChunk { + return s.headChunk +} + +type memChunk struct { + Chunk chunkenc.Chunk + MinTime, MaxTime int64 +} + +// Returns true if the chunk overlaps [mint, maxt]. +func (mc *memChunk) OverlapsClosedInterval(mint, maxt int64) bool { + return mc.MinTime <= maxt && mint <= mc.MaxTime +} + +type memSafeIterator struct { + chunkenc.Iterator + + i int + total int + buf [4]sample +} + +func (it *memSafeIterator) Next() bool { + if it.i+1 >= it.total { + return false + } + it.i++ + if it.total-it.i > 4 { + return it.Iterator.Next() + } + return true +} + +func (it *memSafeIterator) At() (int64, float64) { + if it.total-it.i > 4 { + return it.Iterator.At() + } + s := it.buf[4-(it.total-it.i)] + return s.t, s.v +} + // computeChunkEndTime estimates the end timestamp based the beginning of a chunk, // its current timestamp and the upper bound up to which we insert data. // It assumes that the time range is 1/4 full. diff --git a/head_test.go b/head_test.go index 7b4caded..c5e4d358 100644 --- a/head_test.go +++ b/head_test.go @@ -148,10 +148,10 @@ func TestHead_ReadWAL(t *testing.T) { s50 := head.series.getByID(50) s100 := head.series.getByID(100) - testutil.Equals(t, labels.FromStrings("a", "1"), s10.Lset) - testutil.Equals(t, (*record.MemSeries)(nil), s11) // Series without samples should be garbage colected at head.Init(). - testutil.Equals(t, labels.FromStrings("a", "4"), s50.Lset) - testutil.Equals(t, labels.FromStrings("a", "3"), s100.Lset) + testutil.Equals(t, labels.FromStrings("a", "1"), s10.lset) + testutil.Equals(t, (*memSeries)(nil), s11) // Series without samples should be garbage colected at head.Init(). + testutil.Equals(t, labels.FromStrings("a", "4"), s50.lset) + testutil.Equals(t, labels.FromStrings("a", "3"), s100.lset) expandChunk := func(c chunkenc.Iterator) (x []sample) { for c.Next() { @@ -225,38 +225,38 @@ func TestHead_Truncate(t *testing.T) { s3, _ := h.getOrCreate(3, labels.FromStrings("a", "1", "b", "2")) s4, _ := h.getOrCreate(4, labels.FromStrings("a", "2", "b", "2", "c", "1")) - s1.Chunks = []*record.MemChunk{ + s1.chunks = []*memChunk{ {MinTime: 0, MaxTime: 999}, {MinTime: 1000, MaxTime: 1999}, {MinTime: 2000, MaxTime: 2999}, } - s2.Chunks = []*record.MemChunk{ + s2.chunks = []*memChunk{ {MinTime: 1000, MaxTime: 1999}, {MinTime: 2000, MaxTime: 2999}, {MinTime: 3000, MaxTime: 3999}, } - s3.Chunks = []*record.MemChunk{ + s3.chunks = []*memChunk{ {MinTime: 0, MaxTime: 999}, {MinTime: 1000, MaxTime: 1999}, } - s4.Chunks = []*record.MemChunk{} + s4.chunks = []*memChunk{} // Truncation need not be aligned. testutil.Ok(t, h.Truncate(1)) testutil.Ok(t, h.Truncate(2000)) - testutil.Equals(t, []*record.MemChunk{ + testutil.Equals(t, []*memChunk{ {MinTime: 2000, MaxTime: 2999}, - }, h.series.getByID(s1.Ref).Chunks) + }, h.series.getByID(s1.ref).chunks) - testutil.Equals(t, []*record.MemChunk{ + testutil.Equals(t, []*memChunk{ {MinTime: 2000, MaxTime: 2999}, {MinTime: 3000, MaxTime: 3999}, - }, h.series.getByID(s2.Ref).Chunks) + }, h.series.getByID(s2.ref).chunks) - testutil.Assert(t, h.series.getByID(s3.Ref) == nil, "") - testutil.Assert(t, h.series.getByID(s4.Ref) == nil, "") + testutil.Assert(t, h.series.getByID(s3.ref) == nil, "") + testutil.Assert(t, h.series.getByID(s4.ref) == nil, "") postingsA1, _ := index.ExpandPostings(h.postings.Get("a", "1")) postingsA2, _ := index.ExpandPostings(h.postings.Get("a", "2")) @@ -265,10 +265,10 @@ func TestHead_Truncate(t *testing.T) { postingsC1, _ := index.ExpandPostings(h.postings.Get("c", "1")) postingsAll, _ := index.ExpandPostings(h.postings.Get("", "")) - testutil.Equals(t, []uint64{s1.Ref}, postingsA1) - testutil.Equals(t, []uint64{s2.Ref}, postingsA2) - testutil.Equals(t, []uint64{s1.Ref, s2.Ref}, postingsB1) - testutil.Equals(t, []uint64{s1.Ref, s2.Ref}, postingsAll) + testutil.Equals(t, []uint64{s1.ref}, postingsA1) + testutil.Equals(t, []uint64{s2.ref}, postingsA2) + testutil.Equals(t, []uint64{s1.ref, s2.ref}, postingsB1) + testutil.Equals(t, []uint64{s1.ref, s2.ref}, postingsAll) testutil.Assert(t, postingsB2 == nil, "") testutil.Assert(t, postingsC1 == nil, "") @@ -290,7 +290,7 @@ func TestHead_Truncate(t *testing.T) { // Validate various behaviors brought on by firstChunkID accounting for // garbage collected chunks. func TestMemSeries_truncateChunks(t *testing.T) { - s := record.NewMemSeries(labels.FromStrings("a", "b"), 1, 2000) + s := newMemSeries(labels.FromStrings("a", "b"), 1, 2000) for i := 0; i < 4000; i += 5 { ok, _ := s.Append(int64(i), float64(i)) @@ -299,7 +299,7 @@ func TestMemSeries_truncateChunks(t *testing.T) { // Check that truncate removes half of the chunks and afterwards // that the ID of the last chunk still gives us the same chunk afterwards. - countBefore := len(s.Chunks) + countBefore := len(s.chunks) lastID := s.ChunkID(countBefore - 1) lastChunk := s.Chunk(lastID) @@ -308,9 +308,9 @@ func TestMemSeries_truncateChunks(t *testing.T) { s.TruncateChunksBefore(2000) - testutil.Equals(t, int64(2000), s.Chunks[0].MinTime) + testutil.Equals(t, int64(2000), s.chunks[0].MinTime) testutil.Assert(t, s.Chunk(0) == nil, "first chunks not gone") - testutil.Equals(t, countBefore/2, len(s.Chunks)) + testutil.Equals(t, countBefore/2, len(s.chunks)) testutil.Equals(t, lastChunk, s.Chunk(lastID)) // Validate that the series' sample buffer is applied correctly to the last chunk @@ -854,7 +854,7 @@ func TestComputeChunkEndTime(t *testing.T) { } func TestMemSeries_append(t *testing.T) { - s := record.NewMemSeries(labels.Labels{}, 1, 500) + s := newMemSeries(labels.Labels{}, 1, 500) // Add first two samples at the very end of a chunk range and the next two // on and after it. @@ -875,8 +875,8 @@ func TestMemSeries_append(t *testing.T) { testutil.Assert(t, ok, "append failed") testutil.Assert(t, !chunkCreated, "second sample should use same chunk") - testutil.Assert(t, s.Chunks[0].MinTime == 998 && s.Chunks[0].MaxTime == 999, "wrong chunk range") - testutil.Assert(t, s.Chunks[1].MinTime == 1000 && s.Chunks[1].MaxTime == 1001, "wrong chunk range") + testutil.Assert(t, s.chunks[0].MinTime == 998 && s.chunks[0].MaxTime == 999, "wrong chunk range") + testutil.Assert(t, s.chunks[1].MinTime == 1000 && s.chunks[1].MaxTime == 1001, "wrong chunk range") // Fill the range [1000,2000) with many samples. Intermediate chunks should be cut // at approximately 120 samples per chunk. @@ -885,10 +885,10 @@ func TestMemSeries_append(t *testing.T) { testutil.Assert(t, ok, "append failed") } - testutil.Assert(t, len(s.Chunks) > 7, "expected intermediate chunks") + testutil.Assert(t, len(s.chunks) > 7, "expected intermediate chunks") // All chunks but the first and last should now be moderately full. - for i, c := range s.Chunks[1 : len(s.Chunks)-1] { + for i, c := range s.chunks[1 : len(s.chunks)-1] { testutil.Assert(t, c.Chunk.NumSamples() > 100, "unexpected small chunk %d of length %d", i, c.Chunk.NumSamples()) } } @@ -902,7 +902,7 @@ func TestGCChunkAccess(t *testing.T) { h.initTime(0) s, _ := h.getOrCreate(1, labels.FromStrings("a", "1")) - s.Chunks = []*record.MemChunk{ + s.chunks = []*memChunk{ {MinTime: 0, MaxTime: 999}, {MinTime: 1000, MaxTime: 1999}, } @@ -942,7 +942,7 @@ func TestGCSeriesAccess(t *testing.T) { h.initTime(0) s, _ := h.getOrCreate(1, labels.FromStrings("a", "1")) - s.Chunks = []*record.MemChunk{ + s.chunks = []*memChunk{ {MinTime: 0, MaxTime: 999}, {MinTime: 1000, MaxTime: 1999}, } @@ -967,7 +967,7 @@ func TestGCSeriesAccess(t *testing.T) { testutil.Ok(t, h.Truncate(2000)) // Remove the series. - testutil.Equals(t, (*record.MemSeries)(nil), h.series.getByID(1)) + testutil.Equals(t, (*memSeries)(nil), h.series.getByID(1)) _, err = cr.Chunk(chunks[0].Ref) testutil.Equals(t, record.ErrNotFound, err) @@ -1030,7 +1030,7 @@ func TestRemoveSeriesAfterRollbackAndTruncate(t *testing.T) { // Truncate again, this time the series should be deleted testutil.Ok(t, h.Truncate(2050)) - testutil.Equals(t, (*record.MemSeries)(nil), h.series.getByHash(lset.Hash(), lset)) + testutil.Equals(t, (*memSeries)(nil), h.series.getByHash(lset.Hash(), lset)) } func TestHead_LogRollback(t *testing.T) { diff --git a/querier_test.go b/querier_test.go index b24ca131..7cf69078 100644 --- a/querier_test.go +++ b/querier_test.go @@ -190,19 +190,6 @@ func expandSeriesIterator(it SeriesIterator) (r []tsdbutil.Sample, err error) { return r, it.Err() } -type sample struct { - t int64 - v float64 -} - -func (s sample) T() int64 { - return s.t -} - -func (s sample) V() float64 { - return s.v -} - type seriesSamples struct { lset map[string]string chunks [][]sample diff --git a/record/internal.go b/record/internal.go index 89a0cc2d..bd7af165 100644 --- a/record/internal.go +++ b/record/internal.go @@ -14,13 +14,9 @@ package record import ( "errors" - "math" "os" "path/filepath" - "sync" - "github.com/prometheus/tsdb/chunkenc" - "github.com/prometheus/tsdb/chunks" "github.com/prometheus/tsdb/fileutil" "github.com/prometheus/tsdb/labels" ) @@ -38,19 +34,6 @@ var ( ErrAmendSample = errors.New("amending sample") ) -type sample struct { - t int64 - v float64 -} - -func (s sample) T() int64 { - return s.t -} - -func (s sample) V() float64 { - return s.v -} - // RefSeries is the series labels with the series ID. type RefSeries struct { Ref uint64 @@ -59,255 +42,9 @@ type RefSeries struct { // RefSample is a timestamp/value pair associated with a reference to a series. type RefSample struct { - Ref uint64 - T int64 - V float64 - Series *MemSeries -} - -// MemSeries is the in-memory representation of a series. None of its methods -// are goroutine safe and it is the caller's responsibility to lock it. -type MemSeries struct { - sync.Mutex - - Ref uint64 - PendingCommit bool // Whether there are samples waiting to be committed to this series. - Chunks []*MemChunk - Lset labels.Labels - HeadChunk *MemChunk - - chunkRange int64 - firstChunkID int - - nextAt int64 // Timestamp at which to cut the next chunk. - sampleBuf [4]sample - - app chunkenc.Appender // Current appender for the chunk. -} - -func NewMemSeries(lset labels.Labels, id uint64, chunkRange int64) *MemSeries { - s := &MemSeries{ - Lset: lset, - Ref: id, - chunkRange: chunkRange, - nextAt: math.MinInt64, - } - return s -} - -func (s *MemSeries) MinTime() int64 { - if len(s.Chunks) == 0 { - return math.MinInt64 - } - return s.Chunks[0].MinTime -} - -func (s *MemSeries) MaxTime() int64 { - c := s.head() - if c == nil { - return math.MinInt64 - } - return c.MaxTime -} - -func (s *MemSeries) cut(mint int64) *MemChunk { - c := &MemChunk{ - Chunk: chunkenc.NewXORChunk(), - MinTime: mint, - MaxTime: math.MinInt64, - } - s.Chunks = append(s.Chunks, c) - s.HeadChunk = c - - // Set upper bound on when the next chunk must be started. An earlier timestamp - // may be chosen dynamically at a later point. - s.nextAt = rangeForTimestamp(mint, s.chunkRange) - - app, err := c.Chunk.Appender() - if err != nil { - panic(err) - } - s.app = app - return c -} - -func (s *MemSeries) ChunksMetas() []chunks.Meta { - metas := make([]chunks.Meta, 0, len(s.Chunks)) - for _, chk := range s.Chunks { - metas = append(metas, chunks.Meta{Chunk: chk.Chunk, MinTime: chk.MinTime, MaxTime: chk.MaxTime}) - } - return metas -} - -// reset re-initialises all the variable in the MemSeries except 'lset', 'ref', -// and 'chunkRange', like how it would appear after 'newMemSeries(...)'. -func (s *MemSeries) Reset() { - s.Chunks = nil - s.HeadChunk = nil - s.firstChunkID = 0 - s.nextAt = math.MinInt64 - s.sampleBuf = [4]sample{} - s.PendingCommit = false - s.app = nil -} - -// Appendable checks whether the given sample is valid for appending to the series. -func (s *MemSeries) Appendable(t int64, v float64) error { - c := s.head() - if c == nil { - return nil - } - - if t > c.MaxTime { - return nil - } - if t < c.MaxTime { - return ErrOutOfOrderSample - } - // We are allowing exact duplicates as we can encounter them in valid cases - // like federation and erroring out at that time would be extremely noisy. - if math.Float64bits(s.sampleBuf[3].v) != math.Float64bits(v) { - return ErrAmendSample - } - return nil -} - -func (s *MemSeries) Chunk(id int) *MemChunk { - ix := id - s.firstChunkID - if ix < 0 || ix >= len(s.Chunks) { - return nil - } - return s.Chunks[ix] -} - -func (s *MemSeries) ChunkID(pos int) int { - return pos + s.firstChunkID -} - -// TruncateChunksBefore removes all chunks from the series that have not timestamp -// at or after mint. Chunk IDs remain unchanged. -func (s *MemSeries) TruncateChunksBefore(mint int64) (removed int) { - var k int - for i, c := range s.Chunks { - if c.MaxTime >= mint { - break - } - k = i + 1 - } - s.Chunks = append(s.Chunks[:0], s.Chunks[k:]...) - s.firstChunkID += k - if len(s.Chunks) == 0 { - s.HeadChunk = nil - } else { - s.HeadChunk = s.Chunks[len(s.Chunks)-1] - } - - return k -} - -// Append adds the sample (t, v) to the series. -func (s *MemSeries) Append(t int64, v float64) (success, chunkCreated bool) { - // Based on Gorilla white papers this offers near-optimal compression ratio - // so anything bigger that this has diminishing returns and increases - // the time range within which we have to decompress all samples. - const samplesPerChunk = 120 - - c := s.head() - - if c == nil { - c = s.cut(t) - chunkCreated = true - } - numSamples := c.Chunk.NumSamples() - - // Out of order sample. - if c.MaxTime >= t { - return false, chunkCreated - } - // If we reach 25% of a chunk's desired sample count, set a definitive time - // at which to start the next chunk. - // At latest it must happen at the timestamp set when the chunk was cut. - if numSamples == samplesPerChunk/4 { - s.nextAt = computeChunkEndTime(c.MinTime, c.MaxTime, s.nextAt) - } - if t >= s.nextAt { - c = s.cut(t) - chunkCreated = true - } - s.app.Append(t, v) - - c.MaxTime = t - - s.sampleBuf[0] = s.sampleBuf[1] - s.sampleBuf[1] = s.sampleBuf[2] - s.sampleBuf[2] = s.sampleBuf[3] - s.sampleBuf[3] = sample{t: t, v: v} - - return true, chunkCreated -} - -func (s *MemSeries) Iterator(id int) chunkenc.Iterator { - c := s.Chunk(id) - // TODO(fabxc): Work around! A querier may have retrieved a pointer to a series' chunk, - // which got then garbage collected before it got accessed. - // We must ensure to not garbage collect as long as any readers still hold a reference. - if c == nil { - return chunkenc.NewNopIterator() - } - - if id-s.firstChunkID < len(s.Chunks)-1 { - return c.Chunk.Iterator() - } - // Serve the last 4 samples for the last chunk from the sample buffer - // as their compressed bytes may be mutated by added samples. - it := &MemSafeIterator{ - Iterator: c.Chunk.Iterator(), - i: -1, - total: c.Chunk.NumSamples(), - buf: s.sampleBuf, - } - return it -} - -func (s *MemSeries) head() *MemChunk { - return s.HeadChunk -} - -type MemChunk struct { - Chunk chunkenc.Chunk - MinTime, MaxTime int64 -} - -// Returns true if the chunk overlaps [mint, maxt]. -func (mc *MemChunk) OverlapsClosedInterval(mint, maxt int64) bool { - return mc.MinTime <= maxt && mint <= mc.MaxTime -} - -type MemSafeIterator struct { - chunkenc.Iterator - - i int - total int - buf [4]sample -} - -func (it *MemSafeIterator) Next() bool { - if it.i+1 >= it.total { - return false - } - it.i++ - if it.total-it.i > 4 { - return it.Iterator.Next() - } - return true -} - -func (it *MemSafeIterator) At() (int64, float64) { - if it.total-it.i > 4 { - return it.Iterator.At() - } - s := it.buf[4-(it.total-it.i)] - return s.t, s.v + Ref uint64 + T int64 + V float64 } func rangeForTimestamp(t int64, width int64) (maxt int64) { From 1a4aba41326ccc8af8a7aff11acf3184dc86c1e6 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Wed, 3 Jul 2019 20:52:27 -0700 Subject: [PATCH 08/16] We can just duplicate ErrNotFound to the record package. Signed-off-by: Callum Styan --- cmd/tsdb/main.go | 3 +-- db_test.go | 6 +++--- head.go | 27 ++++++++++++++++++++------- head_test.go | 9 ++++----- querier.go | 5 ++--- querier_test.go | 3 +-- record/internal.go | 10 +--------- 7 files changed, 32 insertions(+), 31 deletions(-) diff --git a/cmd/tsdb/main.go b/cmd/tsdb/main.go index 829891ef..e3dc530a 100644 --- a/cmd/tsdb/main.go +++ b/cmd/tsdb/main.go @@ -36,7 +36,6 @@ import ( "github.com/prometheus/tsdb/chunks" tsdb_errors "github.com/prometheus/tsdb/errors" "github.com/prometheus/tsdb/labels" - "github.com/prometheus/tsdb/record" "gopkg.in/alecthomas/kingpin.v2" ) @@ -307,7 +306,7 @@ func (b *writeBenchmark) ingestScrapesShard(lbls []labels.Labels, scrapeCount in s.ref = &ref } else if err := app.AddFast(*s.ref, ts, float64(s.value)); err != nil { - if errors.Cause(err) != record.ErrNotFound { + if errors.Cause(err) != tsdb.ErrNotFound { panic(err) } diff --git a/db_test.go b/db_test.go index 66a44661..8d5d3512 100644 --- a/db_test.go +++ b/db_test.go @@ -198,7 +198,7 @@ func TestDBAppenderAddRef(t *testing.T) { testutil.Ok(t, err) err = app2.AddFast(9999999, 1, 1) - testutil.Equals(t, record.ErrNotFound, errors.Cause(err)) + testutil.Equals(t, ErrNotFound, errors.Cause(err)) testutil.Ok(t, app2.Commit()) @@ -361,7 +361,7 @@ func TestAmendDatapointCausesError(t *testing.T) { app = db.Appender() _, err = app.Add(labels.Labels{}, 0, 1) - testutil.Equals(t, record.ErrAmendSample, err) + testutil.Equals(t, ErrAmendSample, err) testutil.Ok(t, app.Rollback()) } @@ -395,7 +395,7 @@ func TestNonDuplicateNaNDatapointsCausesAmendError(t *testing.T) { app = db.Appender() _, err = app.Add(labels.Labels{}, 0, math.Float64frombits(0x7ff0000000000002)) - testutil.Equals(t, record.ErrAmendSample, err) + testutil.Equals(t, ErrAmendSample, err) } func TestSkippingInvalidValuesInSameTxn(t *testing.T) { diff --git a/head.go b/head.go index fa385ca6..fe4ff388 100644 --- a/head.go +++ b/head.go @@ -39,10 +39,21 @@ import ( ) var ( + // ErrNotFound is returned if a looked up resource was not found. + ErrNotFound = errors.Errorf("not found") + + // ErrOutOfOrderSample is returned if an appended sample has a + // timestamp smaller than the most recent sample. + ErrOutOfOrderSample = errors.New("out of order sample") + // ErrOutOfBounds is returned if an appended sample is out of the // writable time range. ErrOutOfBounds = errors.New("out of bounds") + // ErrAmendSample is returned if an appended sample has the same timestamp + // as the most recent sample but a different value. + ErrAmendSample = errors.New("amending sample") + // emptyTombstoneReader is a no-op Tombstone Reader. // This is used by head to satisfy the Tombstones() function call. emptyTombstoneReader = tombstones.NewMemTombstones() @@ -501,6 +512,8 @@ func (h *Head) Init(minValidTime int64) error { level.Info(h.logger).Log("msg", "replaying WAL, this may take awhile") // Backfill the checkpoint first if it exists. dir, startFrom, err := wal.LastCheckpoint(h.wal.Dir()) + // We need to compare err to record.ErrNotFound as that's what + // wal.LastCheckpoint would return, not tsdb.ErrNotFound. if err != nil && err != record.ErrNotFound { return errors.Wrap(err, "find last checkpoint") } @@ -731,7 +744,7 @@ func (a *initAppender) Add(lset labels.Labels, t int64, v float64) (uint64, erro func (a *initAppender) AddFast(ref uint64, t int64, v float64) error { if a.app == nil { - return record.ErrNotFound + return ErrNotFound } return a.app.AddFast(ref, t, v) } @@ -856,7 +869,7 @@ func (a *headAppender) AddFast(ref uint64, t int64, v float64) error { s := a.head.series.getByID(ref) if s == nil { - return errors.Wrap(record.ErrNotFound, "unknown series") + return errors.Wrap(ErrNotFound, "unknown series") } s.Lock() if err := s.Appendable(t, v); err != nil { @@ -1213,7 +1226,7 @@ func (h *headChunkReader) Chunk(ref uint64) (chunkenc.Chunk, error) { s := h.head.series.getByID(sid) // This means that the series has been garbage collected. if s == nil { - return nil, record.ErrNotFound + return nil, ErrNotFound } s.Lock() @@ -1223,7 +1236,7 @@ func (h *headChunkReader) Chunk(ref uint64) (chunkenc.Chunk, error) { // the specified range. if c == nil || !c.OverlapsClosedInterval(h.mint, h.maxt) { s.Unlock() - return nil, record.ErrNotFound + return nil, ErrNotFound } s.Unlock() @@ -1339,7 +1352,7 @@ func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]chunks if s == nil { h.head.metrics.seriesNotFound.Inc() - return record.ErrNotFound + return ErrNotFound } *lbls = append((*lbls)[:0], s.lset...) @@ -1700,12 +1713,12 @@ func (s *memSeries) Appendable(t int64, v float64) error { return nil } if t < c.MaxTime { - return record.ErrOutOfOrderSample + return ErrOutOfOrderSample } // We are allowing exact duplicates as we can encounter them in valid cases // like federation and erroring out at that time would be extremely noisy. if math.Float64bits(s.sampleBuf[3].v) != math.Float64bits(v) { - return record.ErrAmendSample + return ErrAmendSample } return nil } diff --git a/head_test.go b/head_test.go index c5e4d358..1a1a95b5 100644 --- a/head_test.go +++ b/head_test.go @@ -928,7 +928,7 @@ func TestGCChunkAccess(t *testing.T) { testutil.Ok(t, h.Truncate(1500)) // Remove a chunk. _, err = cr.Chunk(chunks[0].Ref) - testutil.Equals(t, record.ErrNotFound, err) + testutil.Equals(t, ErrNotFound, err) _, err = cr.Chunk(chunks[1].Ref) testutil.Ok(t, err) } @@ -970,9 +970,9 @@ func TestGCSeriesAccess(t *testing.T) { testutil.Equals(t, (*memSeries)(nil), h.series.getByID(1)) _, err = cr.Chunk(chunks[0].Ref) - testutil.Equals(t, record.ErrNotFound, err) + testutil.Equals(t, ErrNotFound, err) _, err = cr.Chunk(chunks[1].Ref) - testutil.Equals(t, record.ErrNotFound, err) + testutil.Equals(t, ErrNotFound, err) } func TestUncommittedSamplesNotLostOnTruncate(t *testing.T) { @@ -1137,10 +1137,9 @@ func TestWalRepair_DecodingError(t *testing.T) { testutil.Ok(t, err) testutil.Equals(t, 0.0, prom_testutil.ToFloat64(h.metrics.walCorruptionsTotal)) initErr := h.Init(math.MinInt64) - err = errors.Cause(initErr) // So that we can pick up errors even if wrapped. _, corrErr := err.(*wal.CorruptionErr) - testutil.Assert(t, corrErr, "reading the wal didn't return corruption error") + testutil.Assert(t, corrErr, fmt.Sprintf("reading the wal didn't return corruption error: %s", err)) testutil.Ok(t, w.Close()) } diff --git a/querier.go b/querier.go index 0e905783..d444f064 100644 --- a/querier.go +++ b/querier.go @@ -25,7 +25,6 @@ import ( tsdb_errors "github.com/prometheus/tsdb/errors" "github.com/prometheus/tsdb/index" "github.com/prometheus/tsdb/labels" - "github.com/prometheus/tsdb/record" "github.com/prometheus/tsdb/tombstones" ) @@ -723,7 +722,7 @@ func (s *baseChunkSeries) Next() bool { ref := s.p.At() if err := s.index.Series(ref, &lset, &chkMetas); err != nil { // Postings may be stale. Skip if no underlying series exists. - if errors.Cause(err) == record.ErrNotFound { + if errors.Cause(err) == ErrNotFound { continue } s.err = err @@ -803,7 +802,7 @@ func (s *populatedChunkSeries) Next() bool { c.Chunk, s.err = s.chunks.Chunk(c.Ref) if s.err != nil { // This means that the chunk has be garbage collected. Remove it from the list. - if s.err == record.ErrNotFound { + if s.err == ErrNotFound { s.err = nil // Delete in-place. s.chks = append(chks[:j], chks[j+1:]...) diff --git a/querier_test.go b/querier_test.go index 7cf69078..2794e0e6 100644 --- a/querier_test.go +++ b/querier_test.go @@ -29,7 +29,6 @@ import ( "github.com/prometheus/tsdb/chunks" "github.com/prometheus/tsdb/index" "github.com/prometheus/tsdb/labels" - "github.com/prometheus/tsdb/record" "github.com/prometheus/tsdb/testutil" "github.com/prometheus/tsdb/tombstones" "github.com/prometheus/tsdb/tsdbutil" @@ -1405,7 +1404,7 @@ func (m mockIndex) SortedPostings(p index.Postings) index.Postings { func (m mockIndex) Series(ref uint64, lset *labels.Labels, chks *[]chunks.Meta) error { s, ok := m.series[ref] if !ok { - return record.ErrNotFound + return ErrNotFound } *lset = append((*lset)[:0], s.l...) *chks = append((*chks)[:0], s.chunks...) diff --git a/record/internal.go b/record/internal.go index bd7af165..ba90c682 100644 --- a/record/internal.go +++ b/record/internal.go @@ -22,16 +22,8 @@ import ( ) var ( - // ErrOutOfOrderSample is returned if an appended sample has a - // timestamp smaller than the most recent sample. - ErrOutOfOrderSample = errors.New("out of order sample") - - // ErrNotFound is returned if a looked up resource was not found. + // ErrNotFound is returned if a looked up resource was not found. Duplicate ErrNotFound from head.go. ErrNotFound = errors.New("not found") - - // ErrAmendSample is returned if an appended sample has the same timestamp - // as the most recent sample but a different value. - ErrAmendSample = errors.New("amending sample") ) // RefSeries is the series labels with the series ID. From fd51852c14d9de089729ef29ff01c2cc55255be2 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Thu, 4 Jul 2019 08:43:53 -0700 Subject: [PATCH 09/16] These functions aren't used anywhere within the record package anymore after refactoring. Signed-off-by: Callum Styan --- record/internal.go | 40 ---------------------------------------- 1 file changed, 40 deletions(-) diff --git a/record/internal.go b/record/internal.go index ba90c682..bec2de49 100644 --- a/record/internal.go +++ b/record/internal.go @@ -14,10 +14,7 @@ package record import ( "errors" - "os" - "path/filepath" - "github.com/prometheus/tsdb/fileutil" "github.com/prometheus/tsdb/labels" ) @@ -38,40 +35,3 @@ type RefSample struct { T int64 V float64 } - -func rangeForTimestamp(t int64, width int64) (maxt int64) { - return (t/width)*width + width -} - -// computeChunkEndTime estimates the end timestamp based the beginning of a chunk, -// its current timestamp and the upper bound up to which we insert data. -// It assumes that the time range is 1/4 full. -func computeChunkEndTime(start, cur, max int64) int64 { - a := (max - start) / ((cur - start + 1) * 4) - if a == 0 { - return max - } - return start + (max-start)/a -} - -// RenameFile renames the file from, removing to if it already exists before doing the rename. -func RenameFile(from, to string) error { - if err := os.RemoveAll(to); err != nil { - return err - } - if err := os.Rename(from, to); err != nil { - return err - } - - // Directory was renamed; sync parent dir to persist rename. - pdir, err := fileutil.OpenDir(filepath.Dir(to)) - if err != nil { - return err - } - - if err = pdir.Sync(); err != nil { - pdir.Close() - return err - } - return pdir.Close() -} From 323b5c416bd5100b9604f919e9e4c952fbbfb8ac Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Thu, 4 Jul 2019 09:47:27 -0700 Subject: [PATCH 10/16] Change type names to remove record.Record... stutter. Signed-off-by: Callum Styan --- db_test.go | 4 +-- head.go | 12 ++++---- head_test.go | 14 ++++----- record/record.go | 64 ++++++++++++++++++++--------------------- record/record_test.go | 8 +++--- wal.go | 2 +- wal/checkpoint.go | 10 +++---- wal/checkpoint_test.go | 8 +++--- wal/wal_watcher.go | 62 +++++++++++++++++++-------------------- wal/wal_watcher_test.go | 24 ++++++++-------- wal_test.go | 10 +++---- 11 files changed, 109 insertions(+), 109 deletions(-) diff --git a/db_test.go b/db_test.go index 8d5d3512..dd977bc0 100644 --- a/db_test.go +++ b/db_test.go @@ -1472,7 +1472,7 @@ func TestInitializeHeadTimestamp(t *testing.T) { w, err := wal.New(nil, nil, path.Join(dir, "wal"), false) testutil.Ok(t, err) - var enc record.RecordEncoder + var enc record.Encoder err = w.Log( enc.Series([]record.RefSeries{ {Ref: 123, Labels: labels.FromStrings("a", "1")}, @@ -1522,7 +1522,7 @@ func TestInitializeHeadTimestamp(t *testing.T) { w, err := wal.New(nil, nil, path.Join(dir, "wal"), false) testutil.Ok(t, err) - var enc record.RecordEncoder + var enc record.Encoder err = w.Log( enc.Series([]record.RefSeries{ {Ref: 123, Labels: labels.FromStrings("a", "1")}, diff --git a/head.go b/head.go index fe4ff388..26c45402 100644 --- a/head.go +++ b/head.go @@ -363,7 +363,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { } var ( - dec record.RecordDecoder + dec record.Decoder series []record.RefSeries samples []record.RefSample tstones []tombstones.Stone @@ -379,7 +379,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { rec := r.Record() switch dec.Type(rec) { - case record.RecordSeries: + case record.Series: series, err = dec.Series(rec, series) if err != nil { return &wal.CorruptionErr{ @@ -402,7 +402,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { h.lastSeriesID = s.Ref } } - case record.RecordSamples: + case record.Samples: samples, err = dec.Samples(rec, samples) s := samples if err != nil { @@ -443,7 +443,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { samples = samples[m:] } samples = s // Keep whole slice for reuse. - case record.RecordTombstones: + case record.Tombstones: tstones, err = dec.Tombstones(rec, tstones) if err != nil { return &wal.CorruptionErr{ @@ -904,7 +904,7 @@ func (a *headAppender) log() error { defer func() { a.head.putBytesBuffer(buf) }() var rec []byte - var enc record.RecordEncoder + var enc record.Encoder if len(a.series) > 0 { rec = enc.Series(a.series, buf) @@ -1010,7 +1010,7 @@ func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error { if p.Err() != nil { return p.Err() } - var enc record.RecordEncoder + var enc record.Encoder if h.wal != nil { // Although we don't store the stones in the head // we need to write them to the WAL to mark these as deleted diff --git a/head_test.go b/head_test.go index 1a1a95b5..c3a2da71 100644 --- a/head_test.go +++ b/head_test.go @@ -53,7 +53,7 @@ func BenchmarkCreateSeries(b *testing.B) { } func populateTestWAL(t testing.TB, w *wal.WAL, recs []interface{}) { - var enc record.RecordEncoder + var enc record.Encoder for _, r := range recs { switch v := r.(type) { case []record.RefSeries: @@ -71,22 +71,22 @@ func readTestWAL(t testing.TB, dir string) (recs []interface{}) { testutil.Ok(t, err) defer sr.Close() - var dec record.RecordDecoder + var dec record.Decoder r := wal.NewReader(sr) for r.Next() { rec := r.Record() switch dec.Type(rec) { - case record.RecordSeries: + case record.Series: series, err := dec.Series(rec, nil) testutil.Ok(t, err) recs = append(recs, series) - case record.RecordSamples: + case record.Samples: samples, err := dec.Samples(rec, nil) testutil.Ok(t, err) recs = append(recs, samples) - case record.RecordTombstones: + case record.Tombstones: tstones, err := dec.Tombstones(rec, nil) testutil.Ok(t, err) recs = append(recs, tstones) @@ -1067,7 +1067,7 @@ func TestHead_LogRollback(t *testing.T) { // TestWalRepair_DecodingError ensures that a repair is run for an error // when decoding a record. func TestWalRepair_DecodingError(t *testing.T) { - var enc record.RecordEncoder + var enc record.Encoder for name, test := range map[string]struct { corrFunc func(rec []byte) []byte // Func that applies the corruption to a record. rec []byte @@ -1079,7 +1079,7 @@ func TestWalRepair_DecodingError(t *testing.T) { // Do not modify the base record because it is Logged multiple times. res := make([]byte, len(rec)) copy(res, rec) - res[0] = byte(record.RecordInvalid) + res[0] = byte(record.Invalid) return res }, enc.Series([]record.RefSeries{{Ref: 1, Labels: labels.FromStrings("a", "b")}}, []byte{}), diff --git a/record/record.go b/record/record.go index cf854b1d..5ab75111 100644 --- a/record/record.go +++ b/record/record.go @@ -24,43 +24,43 @@ import ( "github.com/prometheus/tsdb/tombstones" ) -// RecordType represents the data type of a record. -type RecordType uint8 +// Type represents the data type of a record. +type Type uint8 const ( - // RecordInvalid is returned for unrecognised WAL record types. - RecordInvalid RecordType = 255 - // RecordSeries is used to match WAL records of type Series. - RecordSeries RecordType = 1 - // RecordSamples is used to match WAL records of type Samples. - RecordSamples RecordType = 2 - // RecordTombstones is used to match WAL records of type Tombstones. - RecordTombstones RecordType = 3 + // Invalid is returned for unrecognised WAL record types. + Invalid Type = 255 + // Series is used to match WAL records of type Series. + Series Type = 1 + // Samples is used to match WAL records of type Samples. + Samples Type = 2 + // Tombstones is used to match WAL records of type Tombstones. + Tombstones Type = 3 ) -// RecordDecoder decodes series, sample, and tombstone records. +// Decoder decodes series, sample, and tombstone records. // The zero value is ready to use. -type RecordDecoder struct { +type Decoder struct { } // Type returns the type of the record. -// Return RecordInvalid if no valid record type is found. -func (d *RecordDecoder) Type(rec []byte) RecordType { +// Return Invalid if no valid record type is found. +func (d *Decoder) Type(rec []byte) Type { if len(rec) < 1 { - return RecordInvalid + return Invalid } - switch t := RecordType(rec[0]); t { - case RecordSeries, RecordSamples, RecordTombstones: + switch t := Type(rec[0]); t { + case Series, Samples, Tombstones: return t } - return RecordInvalid + return Invalid } // Series appends series in rec to the given slice. -func (d *RecordDecoder) Series(rec []byte, series []RefSeries) ([]RefSeries, error) { +func (d *Decoder) Series(rec []byte, series []RefSeries) ([]RefSeries, error) { dec := encoding.Decbuf{B: rec} - if RecordType(dec.Byte()) != RecordSeries { + if Type(dec.Byte()) != Series { return nil, errors.New("invalid record type") } for len(dec.B) > 0 && dec.Err() == nil { @@ -89,10 +89,10 @@ func (d *RecordDecoder) Series(rec []byte, series []RefSeries) ([]RefSeries, err } // Samples appends samples in rec to the given slice. -func (d *RecordDecoder) Samples(rec []byte, samples []RefSample) ([]RefSample, error) { +func (d *Decoder) Samples(rec []byte, samples []RefSample) ([]RefSample, error) { dec := encoding.Decbuf{B: rec} - if RecordType(dec.Byte()) != RecordSamples { + if Type(dec.Byte()) != Samples { return nil, errors.New("invalid record type") } if dec.Len() == 0 { @@ -124,10 +124,10 @@ func (d *RecordDecoder) Samples(rec []byte, samples []RefSample) ([]RefSample, e } // Tombstones appends tombstones in rec to the given slice. -func (d *RecordDecoder) Tombstones(rec []byte, tstones []tombstones.Stone) ([]tombstones.Stone, error) { +func (d *Decoder) Tombstones(rec []byte, tstones []tombstones.Stone) ([]tombstones.Stone, error) { dec := encoding.Decbuf{B: rec} - if RecordType(dec.Byte()) != RecordTombstones { + if Type(dec.Byte()) != Tombstones { return nil, errors.New("invalid record type") } for dec.Len() > 0 && dec.Err() == nil { @@ -147,15 +147,15 @@ func (d *RecordDecoder) Tombstones(rec []byte, tstones []tombstones.Stone) ([]to return tstones, nil } -// RecordEncoder encodes series, sample, and tombstones records. +// Encoder encodes series, sample, and tombstones records. // The zero value is ready to use. -type RecordEncoder struct { +type Encoder struct { } // Series appends the encoded series to b and returns the resulting slice. -func (e *RecordEncoder) Series(series []RefSeries, b []byte) []byte { +func (e *Encoder) Series(series []RefSeries, b []byte) []byte { buf := encoding.Encbuf{B: b} - buf.PutByte(byte(RecordSeries)) + buf.PutByte(byte(Series)) for _, s := range series { buf.PutBE64(s.Ref) @@ -170,9 +170,9 @@ func (e *RecordEncoder) Series(series []RefSeries, b []byte) []byte { } // Samples appends the encoded samples to b and returns the resulting slice. -func (e *RecordEncoder) Samples(samples []RefSample, b []byte) []byte { +func (e *Encoder) Samples(samples []RefSample, b []byte) []byte { buf := encoding.Encbuf{B: b} - buf.PutByte(byte(RecordSamples)) + buf.PutByte(byte(Samples)) if len(samples) == 0 { return buf.Get() @@ -194,9 +194,9 @@ func (e *RecordEncoder) Samples(samples []RefSample, b []byte) []byte { } // Tombstones appends the encoded tombstones to b and returns the resulting slice. -func (e *RecordEncoder) Tombstones(tstones []tombstones.Stone, b []byte) []byte { +func (e *Encoder) Tombstones(tstones []tombstones.Stone, b []byte) []byte { buf := encoding.Encbuf{B: b} - buf.PutByte(byte(RecordTombstones)) + buf.PutByte(byte(Tombstones)) for _, s := range tstones { for _, iv := range s.Intervals { diff --git a/record/record_test.go b/record/record_test.go index b9705238..304fa2b2 100644 --- a/record/record_test.go +++ b/record/record_test.go @@ -25,8 +25,8 @@ import ( ) func TestRecord_EncodeDecode(t *testing.T) { - var enc RecordEncoder - var dec RecordDecoder + var enc Encoder + var dec Decoder series := []RefSeries{ { @@ -78,8 +78,8 @@ func TestRecord_EncodeDecode(t *testing.T) { // TestRecord_Corruputed ensures that corrupted records return the correct error. // Bugfix check for pull/521 and pull/523. func TestRecord_Corruputed(t *testing.T) { - var enc RecordEncoder - var dec RecordDecoder + var enc Encoder + var dec Decoder t.Run("Test corrupted series record", func(t *testing.T) { series := []RefSeries{ diff --git a/wal.go b/wal.go index 08cbedfe..abcfc88d 100644 --- a/wal.go +++ b/wal.go @@ -1255,7 +1255,7 @@ func MigrateWAL(logger log.Logger, dir string) (err error) { rdr := w.Reader() var ( - enc record.RecordEncoder + enc record.Encoder b []byte ) decErr := rdr.Read( diff --git a/wal/checkpoint.go b/wal/checkpoint.go index 1e9caa84..130c8357 100644 --- a/wal/checkpoint.go +++ b/wal/checkpoint.go @@ -153,8 +153,8 @@ func Checkpoint(w *WAL, from, to int, keep func(id uint64) bool, mint int64) (*C series []record.RefSeries samples []record.RefSample tstones []tombstones.Stone - dec record.RecordDecoder - enc record.RecordEncoder + dec record.Decoder + enc record.Encoder buf []byte recs [][]byte ) @@ -168,7 +168,7 @@ func Checkpoint(w *WAL, from, to int, keep func(id uint64) bool, mint int64) (*C rec := r.Record() switch dec.Type(rec) { - case record.RecordSeries: + case record.Series: series, err = dec.Series(rec, series) if err != nil { return nil, errors.Wrap(err, "decode series") @@ -186,7 +186,7 @@ func Checkpoint(w *WAL, from, to int, keep func(id uint64) bool, mint int64) (*C stats.TotalSeries += len(series) stats.DroppedSeries += len(series) - len(repl) - case record.RecordSamples: + case record.Samples: samples, err = dec.Samples(rec, samples) if err != nil { return nil, errors.Wrap(err, "decode samples") @@ -204,7 +204,7 @@ func Checkpoint(w *WAL, from, to int, keep func(id uint64) bool, mint int64) (*C stats.TotalSamples += len(samples) stats.DroppedSamples += len(samples) - len(repl) - case record.RecordTombstones: + case record.Tombstones: tstones, err = dec.Tombstones(rec, tstones) if err != nil { return nil, errors.Wrap(err, "decode deletes") diff --git a/wal/checkpoint_test.go b/wal/checkpoint_test.go index 37e52263..1d431ae7 100644 --- a/wal/checkpoint_test.go +++ b/wal/checkpoint_test.go @@ -94,7 +94,7 @@ func TestCheckpoint(t *testing.T) { testutil.Ok(t, os.RemoveAll(dir)) }() - var enc record.RecordEncoder + var enc record.Encoder // Create a dummy segment to bump the initial number. seg, err := CreateSegment(dir, 100) testutil.Ok(t, err) @@ -165,7 +165,7 @@ func TestCheckpoint(t *testing.T) { testutil.Ok(t, err) defer sr.Close() - var dec record.RecordDecoder + var dec record.Decoder var series []record.RefSeries r := NewReader(sr) @@ -173,10 +173,10 @@ func TestCheckpoint(t *testing.T) { rec := r.Record() switch dec.Type(rec) { - case record.RecordSeries: + case record.Series: series, err = dec.Series(rec, series) testutil.Ok(t, err) - case record.RecordSamples: + case record.Samples: samples, err := dec.Samples(rec, nil) testutil.Ok(t, err) for _, s := range samples { diff --git a/wal/wal_watcher.go b/wal/wal_watcher.go index bc21994d..ad998cea 100644 --- a/wal/wal_watcher.go +++ b/wal/wal_watcher.go @@ -99,8 +99,8 @@ type writeTo interface { SeriesReset(int) } -// WALWatcher watches the TSDB WAL for a given WriteTo. -type WALWatcher struct { +// Watcher watches the TSDB WAL for a given WriteTo. +type Watcher struct { name string writer writeTo logger log.Logger @@ -122,20 +122,20 @@ type WALWatcher struct { maxSegment int } -// NewWALWatcher creates a new WAL watcher for a given WriteTo. -func NewWALWatcher(logger log.Logger, reg prometheus.Registerer, name string, writer writeTo, walDir string) *WALWatcher { +// NewWatcher creates a new WAL watcher for a given WriteTo. +func NewWatcher(logger log.Logger, reg prometheus.Registerer, name string, writer writeTo, walDir string) *Watcher { if logger == nil { logger = log.NewNopLogger() } if reg != nil { - // We can't use MustRegister because WALWatcher's are recreated on config changes within Prometheus. + // We can't use MustRegister because Watcher's are recreated on config changes within Prometheus. reg.Register(watcherRecordsRead) reg.Register(watcherRecordDecodeFails) reg.Register(watcherSamplesSentPreTailing) reg.Register(watcherCurrentSegment) } - return &WALWatcher{ + return &Watcher{ logger: logger, reg: reg, writer: writer, @@ -148,7 +148,7 @@ func NewWALWatcher(logger log.Logger, reg prometheus.Registerer, name string, wr } } -func (w *WALWatcher) setMetrics() { +func (w *Watcher) setMetrics() { // Setup the WAL Watchers metrics. We do this here rather than in the // constructor because of the ordering of creating Queue Managers's, // stopping them, and then starting new ones in storage/remote/storage.go ApplyConfig. @@ -158,16 +158,16 @@ func (w *WALWatcher) setMetrics() { w.currentSegmentMetric = watcherCurrentSegment.WithLabelValues(w.name) } -// Start the WALWatcher. -func (w *WALWatcher) Start() { +// Start the Watcher. +func (w *Watcher) Start() { w.setMetrics() level.Info(w.logger).Log("msg", "starting WAL watcher", "queue", w.name) go w.loop() } -// Stop the WALWatcher. -func (w *WALWatcher) Stop() { +// Stop the Watcher. +func (w *Watcher) Stop() { close(w.quit) <-w.done @@ -181,7 +181,7 @@ func (w *WALWatcher) Stop() { level.Info(w.logger).Log("msg", "WAL watcher stopped", "queue", w.name) } -func (w *WALWatcher) loop() { +func (w *Watcher) loop() { defer close(w.done) // We may encourter failures processing the WAL; we should wait and retry. @@ -199,7 +199,7 @@ func (w *WALWatcher) loop() { } } -func (w *WALWatcher) run() error { +func (w *Watcher) run() error { _, lastSegment, err := w.firstAndLast() if err != nil { return errors.Wrap(err, "wal.Segments") @@ -246,7 +246,7 @@ func (w *WALWatcher) run() error { } // findSegmentForIndex finds the first segment greater than or equal to index. -func (w *WALWatcher) findSegmentForIndex(index int) (int, error) { +func (w *Watcher) findSegmentForIndex(index int) (int, error) { refs, err := w.segments(w.walDir) if err != nil { return -1, nil @@ -261,7 +261,7 @@ func (w *WALWatcher) findSegmentForIndex(index int) (int, error) { return -1, errors.New("failed to find segment for index") } -func (w *WALWatcher) firstAndLast() (int, int, error) { +func (w *Watcher) firstAndLast() (int, int, error) { refs, err := w.segments(w.walDir) if err != nil { return -1, -1, nil @@ -275,7 +275,7 @@ func (w *WALWatcher) firstAndLast() (int, int, error) { // Copied from tsdb/wal/wal.go so we do not have to open a WAL. // Plan is to move WAL watcher to TSDB and dedupe these implementations. -func (w *WALWatcher) segments(dir string) ([]int, error) { +func (w *Watcher) segments(dir string) ([]int, error) { files, err := fileutil.ReadDir(dir) if err != nil { return nil, err @@ -302,7 +302,7 @@ func (w *WALWatcher) segments(dir string) ([]int, error) { // Use tail true to indicate that the reader is currently on a segment that is // actively being written to. If false, assume it's a full segment and we're // replaying it on start to cache the series records. -func (w *WALWatcher) watch(segmentNum int, tail bool) error { +func (w *Watcher) watch(segmentNum int, tail bool) error { segment, err := OpenReadSegment(SegmentName(w.walDir, segmentNum)) if err != nil { return err @@ -397,7 +397,7 @@ func (w *WALWatcher) watch(segmentNum int, tail bool) error { } } -func (w *WALWatcher) garbageCollectSeries(segmentNum int) error { +func (w *Watcher) garbageCollectSeries(segmentNum int) error { dir, _, err := LastCheckpoint(w.walDir) if err != nil && err != record.ErrNotFound { return errors.Wrap(err, "LastCheckpoint") @@ -429,19 +429,19 @@ func (w *WALWatcher) garbageCollectSeries(segmentNum int) error { return nil } -func (w *WALWatcher) readSegment(r *LiveReader, segmentNum int, tail bool) error { +func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error { var ( - dec record.RecordDecoder + dec record.Decoder series []record.RefSeries samples []record.RefSample ) for r.Next() && !isClosed(w.quit) { rec := r.Record() - w.recordsReadMetric.WithLabelValues(recordType(dec.Type(rec))).Inc() + w.recordsReadMetric.WithLabelValues(Type(dec.Type(rec))).Inc() switch dec.Type(rec) { - case record.RecordSeries: + case record.Series: series, err := dec.Series(rec, series[:0]) if err != nil { w.recordDecodeFailsMetric.Inc() @@ -449,7 +449,7 @@ func (w *WALWatcher) readSegment(r *LiveReader, segmentNum int, tail bool) error } w.writer.StoreSeries(series, segmentNum) - case record.RecordSamples: + case record.Samples: // If we're not tailing a segment we can ignore any samples records we see. // This speeds up replay of the WAL by > 10x. if !tail { @@ -471,9 +471,9 @@ func (w *WALWatcher) readSegment(r *LiveReader, segmentNum int, tail bool) error w.writer.Append(send) } - case record.RecordTombstones: + case record.Tombstones: // noop - case record.RecordInvalid: + case record.Invalid: return errors.New("invalid record") default: @@ -484,15 +484,15 @@ func (w *WALWatcher) readSegment(r *LiveReader, segmentNum int, tail bool) error return r.Err() } -func recordType(rt record.RecordType) string { +func Type(rt record.Type) string { switch rt { - case record.RecordInvalid: + case record.Invalid: return "invalid" - case record.RecordSeries: + case record.Series: return "series" - case record.RecordSamples: + case record.Samples: return "samples" - case record.RecordTombstones: + case record.Tombstones: return "tombstones" default: return "unknown" @@ -500,7 +500,7 @@ func recordType(rt record.RecordType) string { } // Read all the series records from a Checkpoint directory. -func (w *WALWatcher) readCheckpoint(checkpointDir string) error { +func (w *Watcher) readCheckpoint(checkpointDir string) error { level.Debug(w.logger).Log("msg", "reading checkpoint", "dir", checkpointDir) index, err := checkpointNum(checkpointDir) if err != nil { diff --git a/wal/wal_watcher_test.go b/wal/wal_watcher_test.go index 00129a23..377d93d9 100644 --- a/wal/wal_watcher_test.go +++ b/wal/wal_watcher_test.go @@ -102,7 +102,7 @@ func TestTailSamples(t *testing.T) { err = os.Mkdir(wdir, 0777) testutil.Ok(t, err) - enc := record.RecordEncoder{} + enc := record.Encoder{} w, err := NewSize(nil, prometheus.DefaultRegisterer, wdir, 128*pageSize, false) testutil.Ok(t, err) @@ -135,7 +135,7 @@ func TestTailSamples(t *testing.T) { testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWALWatcher(nil, nil, "", wt, dir) + watcher := NewWatcher(nil, nil, "", wt, dir) watcher.startTime = now.UnixNano() // Set the Watcher's metrics so they're not nil pointers. @@ -176,7 +176,7 @@ func TestReadToEndNoCheckpoint(t *testing.T) { var recs [][]byte - enc := record.RecordEncoder{} + enc := record.Encoder{} for i := 0; i < seriesCount; i++ { series := enc.Series([]record.RefSeries{ @@ -210,7 +210,7 @@ func TestReadToEndNoCheckpoint(t *testing.T) { testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWALWatcher(nil, nil, "", wt, dir) + watcher := NewWatcher(nil, nil, "", wt, dir) go watcher.Start() expected := seriesCount @@ -236,7 +236,7 @@ func TestReadToEndWithCheckpoint(t *testing.T) { err = os.Mkdir(wdir, 0777) testutil.Ok(t, err) - enc := record.RecordEncoder{} + enc := record.Encoder{} w, err := NewSize(nil, nil, wdir, segmentSize, false) testutil.Ok(t, err) @@ -292,7 +292,7 @@ func TestReadToEndWithCheckpoint(t *testing.T) { _, _, err = w.Segments() testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWALWatcher(nil, nil, "", wt, dir) + watcher := NewWatcher(nil, nil, "", wt, dir) go watcher.Start() expected := seriesCount * 2 @@ -318,7 +318,7 @@ func TestReadCheckpoint(t *testing.T) { os.Create(SegmentName(wdir, 30)) - enc := record.RecordEncoder{} + enc := record.Encoder{} w, err := NewSize(nil, nil, wdir, 128*pageSize, false) testutil.Ok(t, err) @@ -353,7 +353,7 @@ func TestReadCheckpoint(t *testing.T) { testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWALWatcher(nil, nil, "", wt, dir) + watcher := NewWatcher(nil, nil, "", wt, dir) // watcher. go watcher.Start() @@ -380,7 +380,7 @@ func TestReadCheckpointMultipleSegments(t *testing.T) { err = os.Mkdir(wdir, 0777) testutil.Ok(t, err) - enc := record.RecordEncoder{} + enc := record.Encoder{} w, err := NewSize(nil, nil, wdir, pageSize, false) testutil.Ok(t, err) @@ -415,7 +415,7 @@ func TestReadCheckpointMultipleSegments(t *testing.T) { }, 0) wt := newWriteToMock() - watcher := NewWALWatcher(nil, nil, "", wt, dir) + watcher := NewWatcher(nil, nil, "", wt, dir) watcher.maxSegment = -1 // Set the Watcher's metrics so they're not nil pointers. @@ -443,7 +443,7 @@ func TestCheckpointSeriesReset(t *testing.T) { err = os.Mkdir(wdir, 0777) testutil.Ok(t, err) - enc := record.RecordEncoder{} + enc := record.Encoder{} w, err := NewSize(nil, nil, wdir, segmentSize, false) testutil.Ok(t, err) @@ -475,7 +475,7 @@ func TestCheckpointSeriesReset(t *testing.T) { testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWALWatcher(nil, nil, "", wt, dir) + watcher := NewWatcher(nil, nil, "", wt, dir) watcher.maxSegment = -1 go watcher.Start() diff --git a/wal_test.go b/wal_test.go index c84a899d..1a18e2d2 100644 --- a/wal_test.go +++ b/wal_test.go @@ -512,7 +512,7 @@ func TestMigrateWAL_Fuzz(t *testing.T) { testutil.Ok(t, err) // We can properly write some new data after migration. - var enc record.RecordEncoder + var enc record.Encoder testutil.Ok(t, w.Log(enc.Samples([]record.RefSample{ {Ref: 500, T: 1, V: 1}, }, nil))) @@ -525,21 +525,21 @@ func TestMigrateWAL_Fuzz(t *testing.T) { r := wal.NewReader(sr) var res []interface{} - var dec record.RecordDecoder + var dec record.Decoder for r.Next() { rec := r.Record() switch dec.Type(rec) { - case record.RecordSeries: + case record.Series: s, err := dec.Series(rec, nil) testutil.Ok(t, err) res = append(res, s) - case record.RecordSamples: + case record.Samples: s, err := dec.Samples(rec, nil) testutil.Ok(t, err) res = append(res, s) - case record.RecordTombstones: + case record.Tombstones: s, err := dec.Tombstones(rec, nil) testutil.Ok(t, err) res = append(res, s) From fe0139201564ffd608898ad8e5c8cfafbeab86c0 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Thu, 4 Jul 2019 13:13:20 -0700 Subject: [PATCH 11/16] Rename WALWatcher -> Watcher; fix creation/registration of it's metrics. Signed-off-by: Callum Styan --- wal/{wal_watcher.go => watcher.go} | 148 ++++++++++--------- wal/{wal_watcher_test.go => watcher_test.go} | 12 +- 2 files changed, 87 insertions(+), 73 deletions(-) rename wal/{wal_watcher.go => watcher.go} (82%) rename wal/{wal_watcher_test.go => watcher_test.go} (95%) diff --git a/wal/wal_watcher.go b/wal/watcher.go similarity index 82% rename from wal/wal_watcher.go rename to wal/watcher.go index ad998cea..b3732aab 100644 --- a/wal/wal_watcher.go +++ b/wal/watcher.go @@ -39,43 +39,14 @@ const ( consumer = "consumer" ) +type watcherMetrics struct { + recordsRead *prometheus.CounterVec + recordDecodeFails *prometheus.CounterVec + samplesSentPreTailing *prometheus.CounterVec + currentSegment *prometheus.GaugeVec +} + var ( - watcherRecordsRead = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: "prometheus", - Subsystem: "wal_watcher", - Name: "records_read_total", - Help: "Number of records read by the WAL watcher from the WAL.", - }, - []string{consumer, "type"}, - ) - watcherRecordDecodeFails = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: "prometheus", - Subsystem: "wal_watcher", - Name: "record_decode_failures_total", - Help: "Number of records read by the WAL watcher that resulted in an error when decoding.", - }, - []string{consumer}, - ) - watcherSamplesSentPreTailing = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Namespace: "prometheus", - Subsystem: "wal_watcher", - Name: "samples_sent_pre_tailing_total", - Help: "Number of sample records read by the WAL watcher and sent to remote write during replay of existing WAL.", - }, - []string{consumer}, - ) - watcherCurrentSegment = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: "prometheus", - Subsystem: "wal_watcher", - Name: "current_segment", - Help: "Current segment the WAL watcher is reading records from.", - }, - []string{consumer}, - ) lrMetrics = NewLiveReaderMetrics(prometheus.DefaultRegisterer) ) @@ -86,12 +57,12 @@ func FromTime(t time.Time) int64 { return t.Unix()*1000 + int64(t.Nanosecond())/int64(time.Millisecond) } -func init() { - prometheus.MustRegister(watcherRecordsRead) - prometheus.MustRegister(watcherRecordDecodeFails) - prometheus.MustRegister(watcherSamplesSentPreTailing) - prometheus.MustRegister(watcherCurrentSegment) -} +// func init() { +// prometheus.MustRegister(watcherRecordsRead) +// prometheus.MustRegister(watcherRecordDecodeFails) +// prometheus.MustRegister(watcherSamplesSentPreTailing) +// prometheus.MustRegister(watcherCurrentSegment) +// } type writeTo interface { Append([]record.RefSample) bool @@ -106,7 +77,7 @@ type Watcher struct { logger log.Logger walDir string lastCheckpoint string - reg prometheus.Registerer + metrics *watcherMetrics startTime int64 @@ -122,27 +93,70 @@ type Watcher struct { maxSegment int } +func NewWatcherMetrics(reg prometheus.Registerer) *watcherMetrics { + m := &watcherMetrics{ + recordsRead: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "prometheus", + Subsystem: "wal_watcher", + Name: "records_read_total", + Help: "Number of records read by the WAL watcher from the WAL.", + }, + []string{consumer, "type"}, + ), + recordDecodeFails: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "prometheus", + Subsystem: "wal_watcher", + Name: "record_decode_failures_total", + Help: "Number of records read by the WAL watcher that resulted in an error when decoding.", + }, + []string{consumer}, + ), + samplesSentPreTailing: prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "prometheus", + Subsystem: "wal_watcher", + Name: "samples_sent_pre_tailing_total", + Help: "Number of sample records read by the WAL watcher and sent to remote write during replay of existing WAL.", + }, + []string{consumer}, + ), + currentSegment: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "prometheus", + Subsystem: "wal_watcher", + Name: "current_segment", + Help: "Current segment the WAL watcher is reading records from.", + }, + []string{consumer}, + ), + } + + if reg != nil { + reg.Register(m.recordsRead) + reg.Register(m.recordDecodeFails) + reg.Register(m.samplesSentPreTailing) + reg.Register(m.currentSegment) + } + + return m +} + // NewWatcher creates a new WAL watcher for a given WriteTo. -func NewWatcher(logger log.Logger, reg prometheus.Registerer, name string, writer writeTo, walDir string) *Watcher { +func NewWatcher(logger log.Logger, metrics *watcherMetrics, name string, writer writeTo, walDir string) *Watcher { if logger == nil { logger = log.NewNopLogger() } - if reg != nil { - // We can't use MustRegister because Watcher's are recreated on config changes within Prometheus. - reg.Register(watcherRecordsRead) - reg.Register(watcherRecordDecodeFails) - reg.Register(watcherSamplesSentPreTailing) - reg.Register(watcherCurrentSegment) - } return &Watcher{ - logger: logger, - reg: reg, - writer: writer, - walDir: path.Join(walDir, "wal"), - name: name, - quit: make(chan struct{}), - done: make(chan struct{}), + logger: logger, + metrics: metrics, + writer: writer, + walDir: path.Join(walDir, "wal"), + name: name, + quit: make(chan struct{}), + done: make(chan struct{}), maxSegment: -1, } @@ -152,10 +166,10 @@ func (w *Watcher) setMetrics() { // Setup the WAL Watchers metrics. We do this here rather than in the // constructor because of the ordering of creating Queue Managers's, // stopping them, and then starting new ones in storage/remote/storage.go ApplyConfig. - w.recordsReadMetric = watcherRecordsRead.MustCurryWith(prometheus.Labels{consumer: w.name}) - w.recordDecodeFailsMetric = watcherRecordDecodeFails.WithLabelValues(w.name) - w.samplesSentPreTailing = watcherSamplesSentPreTailing.WithLabelValues(w.name) - w.currentSegmentMetric = watcherCurrentSegment.WithLabelValues(w.name) + w.recordsReadMetric = w.metrics.recordsRead.MustCurryWith(prometheus.Labels{consumer: w.name}) + w.recordDecodeFailsMetric = w.metrics.recordDecodeFails.WithLabelValues(w.name) + w.samplesSentPreTailing = w.metrics.samplesSentPreTailing.WithLabelValues(w.name) + w.currentSegmentMetric = w.metrics.currentSegment.WithLabelValues(w.name) } // Start the Watcher. @@ -172,11 +186,11 @@ func (w *Watcher) Stop() { <-w.done // Records read metric has series and samples. - watcherRecordsRead.DeleteLabelValues(w.name, "series") - watcherRecordsRead.DeleteLabelValues(w.name, "samples") - watcherRecordDecodeFails.DeleteLabelValues(w.name) - watcherSamplesSentPreTailing.DeleteLabelValues(w.name) - watcherCurrentSegment.DeleteLabelValues(w.name) + w.metrics.recordsRead.DeleteLabelValues(w.name, "series") + w.metrics.recordsRead.DeleteLabelValues(w.name, "samples") + w.metrics.recordDecodeFails.DeleteLabelValues(w.name) + w.metrics.samplesSentPreTailing.DeleteLabelValues(w.name) + w.metrics.currentSegment.DeleteLabelValues(w.name) level.Info(w.logger).Log("msg", "WAL watcher stopped", "queue", w.name) } diff --git a/wal/wal_watcher_test.go b/wal/watcher_test.go similarity index 95% rename from wal/wal_watcher_test.go rename to wal/watcher_test.go index 377d93d9..ab80db85 100644 --- a/wal/wal_watcher_test.go +++ b/wal/watcher_test.go @@ -135,7 +135,7 @@ func TestTailSamples(t *testing.T) { testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWatcher(nil, nil, "", wt, dir) + watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir) watcher.startTime = now.UnixNano() // Set the Watcher's metrics so they're not nil pointers. @@ -210,7 +210,7 @@ func TestReadToEndNoCheckpoint(t *testing.T) { testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWatcher(nil, nil, "", wt, dir) + watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir) go watcher.Start() expected := seriesCount @@ -292,7 +292,7 @@ func TestReadToEndWithCheckpoint(t *testing.T) { _, _, err = w.Segments() testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWatcher(nil, nil, "", wt, dir) + watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir) go watcher.Start() expected := seriesCount * 2 @@ -353,7 +353,7 @@ func TestReadCheckpoint(t *testing.T) { testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWatcher(nil, nil, "", wt, dir) + watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir) // watcher. go watcher.Start() @@ -415,7 +415,7 @@ func TestReadCheckpointMultipleSegments(t *testing.T) { }, 0) wt := newWriteToMock() - watcher := NewWatcher(nil, nil, "", wt, dir) + watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir) watcher.maxSegment = -1 // Set the Watcher's metrics so they're not nil pointers. @@ -475,7 +475,7 @@ func TestCheckpointSeriesReset(t *testing.T) { testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWatcher(nil, nil, "", wt, dir) + watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir) watcher.maxSegment = -1 go watcher.Start() From 7e36b01db9c314e8cce533305e8b601af6627512 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Wed, 10 Jul 2019 11:19:40 -0700 Subject: [PATCH 12/16] Review fixes; mostly some things that don't need to be exported after refactor. Signed-off-by: Callum Styan --- head.go | 142 +++++++++++++-------------------------------- head_test.go | 62 ++++++++++---------- querier.go | 2 +- record/internal.go | 37 ------------ record/record.go | 18 ++++++ 5 files changed, 89 insertions(+), 172 deletions(-) delete mode 100644 record/internal.go diff --git a/head.go b/head.go index 26c45402..c309aba3 100644 --- a/head.go +++ b/head.go @@ -282,7 +282,7 @@ func (h *Head) processWALSamples( } refSeries[s.Ref] = ms } - _, chunkCreated := ms.Append(s.T, s.V) + _, chunkCreated := ms.append(s.T, s.V) if chunkCreated { h.metrics.chunksCreated.Inc() h.metrics.chunks.Inc() @@ -872,7 +872,7 @@ func (a *headAppender) AddFast(ref uint64, t int64, v float64) error { return errors.Wrap(ErrNotFound, "unknown series") } s.Lock() - if err := s.Appendable(t, v); err != nil { + if err := s.appendable(t, v); err != nil { s.Unlock() return err } @@ -939,7 +939,7 @@ func (a *headAppender) Commit() error { for i, s := range a.samples { series = a.sampleSeries[i] series.Lock() - ok, chunkCreated := series.Append(s.T, s.V) + ok, chunkCreated := series.append(s.T, s.V) series.pendingCommit = false series.Unlock() @@ -968,6 +968,7 @@ func (a *headAppender) Rollback() error { series.Unlock() } a.head.putAppendBuffer(a.samples) + a.head.putSeriesBuffer(a.sampleSeries) // Series are created in the head memory regardless of rollback. Thus we have // to log them to the WAL in any case. @@ -993,7 +994,7 @@ func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error { for p.Next() { series := h.series.getByID(p.At()) - t0, t1 := series.MinTime(), series.MaxTime() + t0, t1 := series.minTime(), series.maxTime() if t0 == math.MinInt64 || t1 == math.MinInt64 { continue } @@ -1041,14 +1042,14 @@ func (h *Head) chunkRewrite(ref uint64, dranges tombstones.Intervals) (err error return nil } - metas := ms.ChunksMetas() + metas := ms.chunksMetas() mint, maxt := metas[0].MinTime, metas[len(metas)-1].MaxTime it := newChunkSeriesIterator(metas, dranges, mint, maxt) - ms.Reset() + ms.reset() for it.Next() { t, v := it.At() - ok, _ := ms.Append(t, v) + ok, _ := ms.append(t, v) if !ok { level.Warn(h.logger).Log("msg", "failed to add sample during delete") } @@ -1230,7 +1231,7 @@ func (h *headChunkReader) Chunk(ref uint64) (chunkenc.Chunk, error) { } s.Lock() - c := s.Chunk(int(cid)) + c := s.chunk(int(cid)) // This means that the chunk has been garbage collected or is outside // the specified range. @@ -1241,7 +1242,7 @@ func (h *headChunkReader) Chunk(ref uint64) (chunkenc.Chunk, error) { s.Unlock() return &safeChunk{ - Chunk: c.Chunk, + Chunk: c.chunk, s: s, cid: int(cid), }, nil @@ -1367,7 +1368,7 @@ func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]chunks continue } // Set the head chunks as open (being appended to). - maxTime := c.MaxTime + maxTime := c.maxTime if s.headChunk == c { maxTime = math.MaxInt64 } @@ -1526,7 +1527,7 @@ func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) { for hash, all := range s.hashes[i] { for _, series := range all { series.Lock() - rmChunks += series.TruncateChunksBefore(mint) + rmChunks += series.truncateChunksBefore(mint) if len(series.chunks) > 0 || series.pendingCommit { series.Unlock() @@ -1621,11 +1622,10 @@ func (s sample) V() float64 { type memSeries struct { sync.Mutex - ref uint64 - chunks []*memChunk - lset labels.Labels - headChunk *memChunk - + ref uint64 + chunks []*memChunk + lset labels.Labels + headChunk *memChunk chunkRange int64 firstChunkID int @@ -1646,26 +1646,26 @@ func newMemSeries(lset labels.Labels, id uint64, chunkRange int64) *memSeries { return s } -func (s *memSeries) MinTime() int64 { +func (s *memSeries) minTime() int64 { if len(s.chunks) == 0 { return math.MinInt64 } - return s.chunks[0].MinTime + return s.chunks[0].minTime } -func (s *memSeries) MaxTime() int64 { +func (s *memSeries) maxTime() int64 { c := s.head() if c == nil { return math.MinInt64 } - return c.MaxTime + return c.maxTime } func (s *memSeries) cut(mint int64) *memChunk { c := &memChunk{ - Chunk: chunkenc.NewXORChunk(), - MinTime: mint, - MaxTime: math.MinInt64, + chunk: chunkenc.NewXORChunk(), + minTime: mint, + maxTime: math.MinInt64, } s.chunks = append(s.chunks, c) s.headChunk = c @@ -1674,7 +1674,7 @@ func (s *memSeries) cut(mint int64) *memChunk { // may be chosen dynamically at a later point. s.nextAt = rangeForTimestamp(mint, s.chunkRange) - app, err := c.Chunk.Appender() + app, err := c.chunk.Appender() if err != nil { panic(err) } @@ -1682,17 +1682,17 @@ func (s *memSeries) cut(mint int64) *memChunk { return c } -func (s *memSeries) ChunksMetas() []chunks.Meta { +func (s *memSeries) chunksMetas() []chunks.Meta { metas := make([]chunks.Meta, 0, len(s.chunks)) for _, chk := range s.chunks { - metas = append(metas, chunks.Meta{Chunk: chk.Chunk, MinTime: chk.MinTime, MaxTime: chk.MaxTime}) + metas = append(metas, chunks.Meta{Chunk: chk.chunk, MinTime: chk.minTime, MaxTime: chk.maxTime}) } return metas } // reset re-initialises all the variable in the memSeries except 'lset', 'ref', // and 'chunkRange', like how it would appear after 'newmemSeries(...)'. -func (s *memSeries) Reset() { +func (s *memSeries) reset() { s.chunks = nil s.headChunk = nil s.firstChunkID = 0 @@ -1703,16 +1703,16 @@ func (s *memSeries) Reset() { } // Appendable checks whether the given sample is valid for appending to the series. -func (s *memSeries) Appendable(t int64, v float64) error { +func (s *memSeries) appendable(t int64, v float64) error { c := s.head() if c == nil { return nil } - if t > c.MaxTime { + if t > c.maxTime { return nil } - if t < c.MaxTime { + if t < c.maxTime { return ErrOutOfOrderSample } // We are allowing exact duplicates as we can encounter them in valid cases @@ -1723,7 +1723,7 @@ func (s *memSeries) Appendable(t int64, v float64) error { return nil } -func (s *memSeries) Chunk(id int) *memChunk { +func (s *memSeries) chunk(id int) *memChunk { ix := id - s.firstChunkID if ix < 0 || ix >= len(s.chunks) { return nil @@ -1731,16 +1731,16 @@ func (s *memSeries) Chunk(id int) *memChunk { return s.chunks[ix] } -func (s *memSeries) ChunkID(pos int) int { +func (s *memSeries) chunkID(pos int) int { return pos + s.firstChunkID } // TruncateChunksBefore removes all chunks from the series that have not timestamp // at or after mint. Chunk IDs remain unchanged. -func (s *memSeries) TruncateChunksBefore(mint int64) (removed int) { +func (s *memSeries) truncateChunksBefore(mint int64) (removed int) { var k int for i, c := range s.chunks { - if c.MaxTime >= mint { + if c.maxTime >= mint { break } k = i + 1 @@ -1757,7 +1757,7 @@ func (s *memSeries) TruncateChunksBefore(mint int64) (removed int) { } // Append adds the sample (t, v) to the series. -func (s *memSeries) Append(t int64, v float64) (success, chunkCreated bool) { +func (s *memSeries) append(t int64, v float64) (success, chunkCreated bool) { // Based on Gorilla white papers this offers near-optimal compression ratio // so anything bigger that this has diminishing returns and increases // the time range within which we have to decompress all samples. @@ -1769,17 +1769,17 @@ func (s *memSeries) Append(t int64, v float64) (success, chunkCreated bool) { c = s.cut(t) chunkCreated = true } - numSamples := c.Chunk.NumSamples() + numSamples := c.chunk.NumSamples() // Out of order sample. - if c.MaxTime >= t { + if c.maxTime >= t { return false, chunkCreated } // If we reach 25% of a chunk's desired sample count, set a definitive time // at which to start the next chunk. // At latest it must happen at the timestamp set when the chunk was cut. if numSamples == samplesPerChunk/4 { - s.nextAt = computeChunkEndTime(c.MinTime, c.MaxTime, s.nextAt) + s.nextAt = computeChunkEndTime(c.minTime, c.maxTime, s.nextAt) } if t >= s.nextAt { c = s.cut(t) @@ -1787,7 +1787,7 @@ func (s *memSeries) Append(t int64, v float64) (success, chunkCreated bool) { } s.app.Append(t, v) - c.MaxTime = t + c.maxTime = t s.sampleBuf[0] = s.sampleBuf[1] s.sampleBuf[1] = s.sampleBuf[2] @@ -1797,70 +1797,6 @@ func (s *memSeries) Append(t int64, v float64) (success, chunkCreated bool) { return true, chunkCreated } -func (s *memSeries) Iterator(id int) chunkenc.Iterator { - c := s.Chunk(id) - // TODO(fabxc): Work around! A querier may have retrieved a pointer to a series' chunk, - // which got then garbage collected before it got accessed. - // We must ensure to not garbage collect as long as any readers still hold a reference. - if c == nil { - return chunkenc.NewNopIterator() - } - - if id-s.firstChunkID < len(s.chunks)-1 { - return c.Chunk.Iterator() - } - // Serve the last 4 samples for the last chunk from the sample buffer - // as their compressed bytes may be mutated by added samples. - it := &memSafeIterator{ - Iterator: c.Chunk.Iterator(), - i: -1, - total: c.Chunk.NumSamples(), - buf: s.sampleBuf, - } - return it -} - -func (s *memSeries) head() *memChunk { - return s.headChunk -} - -type memChunk struct { - Chunk chunkenc.Chunk - MinTime, MaxTime int64 -} - -// Returns true if the chunk overlaps [mint, maxt]. -func (mc *memChunk) OverlapsClosedInterval(mint, maxt int64) bool { - return mc.MinTime <= maxt && mint <= mc.MaxTime -} - -type memSafeIterator struct { - chunkenc.Iterator - - i int - total int - buf [4]sample -} - -func (it *memSafeIterator) Next() bool { - if it.i+1 >= it.total { - return false - } - it.i++ - if it.total-it.i > 4 { - return it.Iterator.Next() - } - return true -} - -func (it *memSafeIterator) At() (int64, float64) { - if it.total-it.i > 4 { - return it.Iterator.At() - } - s := it.buf[4-(it.total-it.i)] - return s.t, s.v -} - // computeChunkEndTime estimates the end timestamp based the beginning of a chunk, // its current timestamp and the upper bound up to which we insert data. // It assumes that the time range is 1/4 full. diff --git a/head_test.go b/head_test.go index c3a2da71..457076ab 100644 --- a/head_test.go +++ b/head_test.go @@ -226,18 +226,18 @@ func TestHead_Truncate(t *testing.T) { s4, _ := h.getOrCreate(4, labels.FromStrings("a", "2", "b", "2", "c", "1")) s1.chunks = []*memChunk{ - {MinTime: 0, MaxTime: 999}, - {MinTime: 1000, MaxTime: 1999}, - {MinTime: 2000, MaxTime: 2999}, + {minTime: 0, maxTime: 999}, + {minTime: 1000, maxTime: 1999}, + {minTime: 2000, maxTime: 2999}, } s2.chunks = []*memChunk{ - {MinTime: 1000, MaxTime: 1999}, - {MinTime: 2000, MaxTime: 2999}, - {MinTime: 3000, MaxTime: 3999}, + {minTime: 1000, maxTime: 1999}, + {minTime: 2000, maxTime: 2999}, + {minTime: 3000, maxTime: 3999}, } s3.chunks = []*memChunk{ - {MinTime: 0, MaxTime: 999}, - {MinTime: 1000, MaxTime: 1999}, + {minTime: 0, maxTime: 999}, + {minTime: 1000, maxTime: 1999}, } s4.chunks = []*memChunk{} @@ -247,12 +247,12 @@ func TestHead_Truncate(t *testing.T) { testutil.Ok(t, h.Truncate(2000)) testutil.Equals(t, []*memChunk{ - {MinTime: 2000, MaxTime: 2999}, + {minTime: 2000, maxTime: 2999}, }, h.series.getByID(s1.ref).chunks) testutil.Equals(t, []*memChunk{ - {MinTime: 2000, MaxTime: 2999}, - {MinTime: 3000, MaxTime: 3999}, + {minTime: 2000, maxTime: 2999}, + {minTime: 3000, maxTime: 3999}, }, h.series.getByID(s2.ref).chunks) testutil.Assert(t, h.series.getByID(s3.ref) == nil, "") @@ -293,25 +293,25 @@ func TestMemSeries_truncateChunks(t *testing.T) { s := newMemSeries(labels.FromStrings("a", "b"), 1, 2000) for i := 0; i < 4000; i += 5 { - ok, _ := s.Append(int64(i), float64(i)) + ok, _ := s.append(int64(i), float64(i)) testutil.Assert(t, ok == true, "sample append failed") } // Check that truncate removes half of the chunks and afterwards // that the ID of the last chunk still gives us the same chunk afterwards. countBefore := len(s.chunks) - lastID := s.ChunkID(countBefore - 1) - lastChunk := s.Chunk(lastID) + lastID := s.chunkID(countBefore - 1) + lastChunk := s.chunk(lastID) - testutil.Assert(t, s.Chunk(0) != nil, "") + testutil.Assert(t, s.chunk(0) != nil, "") testutil.Assert(t, lastChunk != nil, "") - s.TruncateChunksBefore(2000) + s.truncateChunksBefore(2000) - testutil.Equals(t, int64(2000), s.chunks[0].MinTime) - testutil.Assert(t, s.Chunk(0) == nil, "first chunks not gone") + testutil.Equals(t, int64(2000), s.chunks[0].minTime) + testutil.Assert(t, s.chunk(0) == nil, "first chunks not gone") testutil.Equals(t, countBefore/2, len(s.chunks)) - testutil.Equals(t, lastChunk, s.Chunk(lastID)) + testutil.Equals(t, lastChunk, s.chunk(lastID)) // Validate that the series' sample buffer is applied correctly to the last chunk // after truncation. @@ -859,29 +859,29 @@ func TestMemSeries_append(t *testing.T) { // Add first two samples at the very end of a chunk range and the next two // on and after it. // New chunk must correctly be cut at 1000. - ok, chunkCreated := s.Append(998, 1) + ok, chunkCreated := s.append(998, 1) testutil.Assert(t, ok, "append failed") testutil.Assert(t, chunkCreated, "first sample created chunk") - ok, chunkCreated = s.Append(999, 2) + ok, chunkCreated = s.append(999, 2) testutil.Assert(t, ok, "append failed") testutil.Assert(t, !chunkCreated, "second sample should use same chunk") - ok, chunkCreated = s.Append(1000, 3) + ok, chunkCreated = s.append(1000, 3) testutil.Assert(t, ok, "append failed") testutil.Assert(t, chunkCreated, "expected new chunk on boundary") - ok, chunkCreated = s.Append(1001, 4) + ok, chunkCreated = s.append(1001, 4) testutil.Assert(t, ok, "append failed") testutil.Assert(t, !chunkCreated, "second sample should use same chunk") - testutil.Assert(t, s.chunks[0].MinTime == 998 && s.chunks[0].MaxTime == 999, "wrong chunk range") - testutil.Assert(t, s.chunks[1].MinTime == 1000 && s.chunks[1].MaxTime == 1001, "wrong chunk range") + testutil.Assert(t, s.chunks[0].minTime == 998 && s.chunks[0].maxTime == 999, "wrong chunk range") + testutil.Assert(t, s.chunks[1].minTime == 1000 && s.chunks[1].maxTime == 1001, "wrong chunk range") // Fill the range [1000,2000) with many samples. Intermediate chunks should be cut // at approximately 120 samples per chunk. for i := 1; i < 1000; i++ { - ok, _ := s.Append(1001+int64(i), float64(i)) + ok, _ := s.append(1001+int64(i), float64(i)) testutil.Assert(t, ok, "append failed") } @@ -889,7 +889,7 @@ func TestMemSeries_append(t *testing.T) { // All chunks but the first and last should now be moderately full. for i, c := range s.chunks[1 : len(s.chunks)-1] { - testutil.Assert(t, c.Chunk.NumSamples() > 100, "unexpected small chunk %d of length %d", i, c.Chunk.NumSamples()) + testutil.Assert(t, c.chunk.NumSamples() > 100, "unexpected small chunk %d of length %d", i, c.chunk.NumSamples()) } } @@ -903,8 +903,8 @@ func TestGCChunkAccess(t *testing.T) { s, _ := h.getOrCreate(1, labels.FromStrings("a", "1")) s.chunks = []*memChunk{ - {MinTime: 0, MaxTime: 999}, - {MinTime: 1000, MaxTime: 1999}, + {minTime: 0, maxTime: 999}, + {minTime: 1000, maxTime: 1999}, } idx := h.indexRange(0, 1500) @@ -943,8 +943,8 @@ func TestGCSeriesAccess(t *testing.T) { s, _ := h.getOrCreate(1, labels.FromStrings("a", "1")) s.chunks = []*memChunk{ - {MinTime: 0, MaxTime: 999}, - {MinTime: 1000, MaxTime: 1999}, + {minTime: 0, maxTime: 999}, + {minTime: 1000, maxTime: 1999}, } idx := h.indexRange(0, 2000) diff --git a/querier.go b/querier.go index d444f064..a2ef40d5 100644 --- a/querier.go +++ b/querier.go @@ -1070,7 +1070,7 @@ type chunkSeriesIterator struct { intervals tombstones.Intervals } -func newChunkSeriesIterator(cs []chunks.Meta, dranges Intervals, mint, maxt int64) *chunkSeriesIterator { +func newChunkSeriesIterator(cs []chunks.Meta, dranges tombstones.Intervals, mint, maxt int64) *chunkSeriesIterator { csi := &chunkSeriesIterator{ chunks: cs, i: 0, diff --git a/record/internal.go b/record/internal.go deleted file mode 100644 index bec2de49..00000000 --- a/record/internal.go +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2017 The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -package record - -import ( - "errors" - - "github.com/prometheus/tsdb/labels" -) - -var ( - // ErrNotFound is returned if a looked up resource was not found. Duplicate ErrNotFound from head.go. - ErrNotFound = errors.New("not found") -) - -// RefSeries is the series labels with the series ID. -type RefSeries struct { - Ref uint64 - Labels labels.Labels -} - -// RefSample is a timestamp/value pair associated with a reference to a series. -type RefSample struct { - Ref uint64 - T int64 - V float64 -} diff --git a/record/record.go b/record/record.go index 5ab75111..2bc2bc4f 100644 --- a/record/record.go +++ b/record/record.go @@ -38,6 +38,24 @@ const ( Tombstones Type = 3 ) +var ( + // ErrNotFound is returned if a looked up resource was not found. Duplicate ErrNotFound from head.go. + ErrNotFound = errors.New("not found") +) + +// RefSeries is the series labels with the series ID. +type RefSeries struct { + Ref uint64 + Labels labels.Labels +} + +// RefSample is a timestamp/value pair associated with a reference to a series. +type RefSample struct { + Ref uint64 + T int64 + V float64 +} + // Decoder decodes series, sample, and tombstone records. // The zero value is ready to use. type Decoder struct { From c392f574b6e43c779c4e7cdd58eb6c03f6421f1d Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Wed, 17 Jul 2019 14:40:57 -0700 Subject: [PATCH 13/16] These comments don't need to change. Signed-off-by: Callum Styan --- head.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/head.go b/head.go index c309aba3..00296ed2 100644 --- a/head.go +++ b/head.go @@ -392,7 +392,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) { series, created := h.getOrCreateWithID(s.Ref, s.Labels.Hash(), s.Labels) if !created { - // There's already a different Ref for this series. + // There's already a different ref for this series. multiRefLock.Lock() multiRef[s.Ref] = series.ref multiRefLock.Unlock() @@ -1691,7 +1691,7 @@ func (s *memSeries) chunksMetas() []chunks.Meta { } // reset re-initialises all the variable in the memSeries except 'lset', 'ref', -// and 'chunkRange', like how it would appear after 'newmemSeries(...)'. +// and 'chunkRange', like how it would appear after 'newMemSeries(...)'. func (s *memSeries) reset() { s.chunks = nil s.headChunk = nil @@ -1702,7 +1702,7 @@ func (s *memSeries) reset() { s.app = nil } -// Appendable checks whether the given sample is valid for appending to the series. +// appendable checks whether the given sample is valid for appending to the series. func (s *memSeries) appendable(t int64, v float64) error { c := s.head() if c == nil { @@ -1735,7 +1735,7 @@ func (s *memSeries) chunkID(pos int) int { return pos + s.firstChunkID } -// TruncateChunksBefore removes all chunks from the series that have not timestamp +// truncateChunksBefore removes all chunks from the series that have not timestamp // at or after mint. Chunk IDs remain unchanged. func (s *memSeries) truncateChunksBefore(mint int64) (removed int) { var k int @@ -1756,7 +1756,7 @@ func (s *memSeries) truncateChunksBefore(mint int64) (removed int) { return k } -// Append adds the sample (t, v) to the series. +// append adds the sample (t, v) to the series. func (s *memSeries) append(t int64, v float64) (success, chunkCreated bool) { // Based on Gorilla white papers this offers near-optimal compression ratio // so anything bigger that this has diminishing returns and increases From 5264c568139f92896220d83f69ba297a6b6f2918 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Thu, 18 Jul 2019 06:13:25 -0700 Subject: [PATCH 14/16] Update WAL watcher from Prometheus repo, pass metrics structs around for the live reader, expose WriteTo interface. Signed-off-by: Callum Styan --- wal/watcher.go | 77 +++-- wal/watcher_test.go | 729 +++++++++++++++++++++++--------------------- 2 files changed, 415 insertions(+), 391 deletions(-) diff --git a/wal/watcher.go b/wal/watcher.go index b3732aab..c29fdc28 100644 --- a/wal/watcher.go +++ b/wal/watcher.go @@ -39,45 +39,34 @@ const ( consumer = "consumer" ) -type watcherMetrics struct { - recordsRead *prometheus.CounterVec - recordDecodeFails *prometheus.CounterVec - samplesSentPreTailing *prometheus.CounterVec - currentSegment *prometheus.GaugeVec -} - -var ( - lrMetrics = NewLiveReaderMetrics(prometheus.DefaultRegisterer) -) - +// fromTime returns a new millisecond timestamp from a time. // This function is copied from prometheus/prometheus/pkg/timestamp to avoid adding vendor to TSDB repo. - -// FromTime returns a new millisecond timestamp from a time. -func FromTime(t time.Time) int64 { +func fromTime(t time.Time) int64 { return t.Unix()*1000 + int64(t.Nanosecond())/int64(time.Millisecond) } -// func init() { -// prometheus.MustRegister(watcherRecordsRead) -// prometheus.MustRegister(watcherRecordDecodeFails) -// prometheus.MustRegister(watcherSamplesSentPreTailing) -// prometheus.MustRegister(watcherCurrentSegment) -// } - -type writeTo interface { +type WriteTo interface { Append([]record.RefSample) bool StoreSeries([]record.RefSeries, int) SeriesReset(int) } +type watcherMetrics struct { + recordsRead *prometheus.CounterVec + recordDecodeFails *prometheus.CounterVec + samplesSentPreTailing *prometheus.CounterVec + currentSegment *prometheus.GaugeVec +} + // Watcher watches the TSDB WAL for a given WriteTo. type Watcher struct { name string - writer writeTo + writer WriteTo logger log.Logger walDir string lastCheckpoint string metrics *watcherMetrics + readerMetrics *liveReaderMetrics startTime int64 @@ -144,22 +133,24 @@ func NewWatcherMetrics(reg prometheus.Registerer) *watcherMetrics { } // NewWatcher creates a new WAL watcher for a given WriteTo. -func NewWatcher(logger log.Logger, metrics *watcherMetrics, name string, writer writeTo, walDir string) *Watcher { +func NewWatcher(reg prometheus.Registerer, logger log.Logger, name string, writer WriteTo, walDir string) *Watcher { if logger == nil { logger = log.NewNopLogger() } - return &Watcher{ - logger: logger, - metrics: metrics, - writer: writer, - walDir: path.Join(walDir, "wal"), - name: name, - quit: make(chan struct{}), - done: make(chan struct{}), + w := Watcher{ + logger: logger, + writer: writer, + metrics: NewWatcherMetrics(reg), + readerMetrics: NewLiveReaderMetrics(reg), + walDir: path.Join(walDir, "wal"), + name: name, + quit: make(chan struct{}), + done: make(chan struct{}), maxSegment: -1, } + return &w } func (w *Watcher) setMetrics() { @@ -175,7 +166,7 @@ func (w *Watcher) setMetrics() { // Start the Watcher. func (w *Watcher) Start() { w.setMetrics() - level.Info(w.logger).Log("msg", "starting WAL watcher", "queue", w.name) + level.Info(w.logger).Log("msg", "starting WAL watcher", "consumer", w.name) go w.loop() } @@ -200,7 +191,7 @@ func (w *Watcher) loop() { // We may encourter failures processing the WAL; we should wait and retry. for !isClosed(w.quit) { - w.startTime = FromTime(time.Now()) + w.startTime = fromTime(time.Now()) if err := w.run(); err != nil { level.Error(w.logger).Log("msg", "error tailing WAL", "err", err) } @@ -263,7 +254,7 @@ func (w *Watcher) run() error { func (w *Watcher) findSegmentForIndex(index int) (int, error) { refs, err := w.segments(w.walDir) if err != nil { - return -1, nil + return -1, err } for _, r := range refs { @@ -278,7 +269,7 @@ func (w *Watcher) findSegmentForIndex(index int) (int, error) { func (w *Watcher) firstAndLast() (int, int, error) { refs, err := w.segments(w.walDir) if err != nil { - return -1, -1, nil + return -1, -1, err } if len(refs) == 0 { @@ -323,7 +314,7 @@ func (w *Watcher) watch(segmentNum int, tail bool) error { } defer segment.Close() - reader := NewLiveReader(w.logger, lrMetrics, segment) + reader := NewLiveReader(w.logger, w.readerMetrics, segment) readTicker := time.NewTicker(readPeriod) defer readTicker.Stop() @@ -448,11 +439,12 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error { dec record.Decoder series []record.RefSeries samples []record.RefSample + send []record.RefSample ) for r.Next() && !isClosed(w.quit) { rec := r.Record() - w.recordsReadMetric.WithLabelValues(Type(dec.Type(rec))).Inc() + w.recordsReadMetric.WithLabelValues(recordType(dec.Type(rec))).Inc() switch dec.Type(rec) { case record.Series: @@ -474,7 +466,6 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error { w.recordDecodeFailsMetric.Inc() return err } - var send []record.RefSample for _, s := range samples { if s.T > w.startTime { send = append(send, s) @@ -483,6 +474,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error { if len(send) > 0 { // Blocks until the sample is sent to all remote write endpoints or closed (because enqueue blocks). w.writer.Append(send) + send = send[:0] } case record.Tombstones: @@ -498,7 +490,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error { return r.Err() } -func Type(rt record.Type) string { +func recordType(rt record.Type) string { switch rt { case record.Invalid: return "invalid" @@ -538,7 +530,7 @@ func (w *Watcher) readCheckpoint(checkpointDir string) error { } defer sr.Close() - r := NewLiveReader(w.logger, lrMetrics, sr) + r := NewLiveReader(w.logger, w.readerMetrics, sr) if err := w.readSegment(r, index, false); err != io.EOF && err != nil { return errors.Wrap(err, "readSegment") } @@ -554,7 +546,8 @@ func (w *Watcher) readCheckpoint(checkpointDir string) error { func checkpointNum(dir string) (int, error) { // Checkpoint dir names are in the format checkpoint.000001 - chunks := strings.Split(dir, ".") + // dir may contain a hidden directory, so only check the base directory + chunks := strings.Split(path.Base(dir), ".") if len(chunks) != 2 { return 0, errors.Errorf("invalid checkpoint dir string: %s", dir) } diff --git a/wal/watcher_test.go b/wal/watcher_test.go index ab80db85..216793a6 100644 --- a/wal/watcher_test.go +++ b/wal/watcher_test.go @@ -92,71 +92,75 @@ func TestTailSamples(t *testing.T) { pageSize := 32 * 1024 const seriesCount = 10 const samplesCount = 250 - now := time.Now() - - dir, err := ioutil.TempDir("", "readCheckpoint") - testutil.Ok(t, err) - defer os.RemoveAll(dir) - - wdir := path.Join(dir, "wal") - err = os.Mkdir(wdir, 0777) - testutil.Ok(t, err) - - enc := record.Encoder{} - w, err := NewSize(nil, prometheus.DefaultRegisterer, wdir, 128*pageSize, false) - testutil.Ok(t, err) - - // Write to the initial segment then checkpoint. - for i := 0; i < seriesCount; i++ { - ref := i + 100 - series := enc.Series([]record.RefSeries{ - record.RefSeries{ - Ref: uint64(ref), - Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}}, - }, - }, nil) - testutil.Ok(t, w.Log(series)) - - for j := 0; j < samplesCount; j++ { - inner := rand.Intn(ref + 1) - sample := enc.Samples([]record.RefSample{ - record.RefSample{ - Ref: uint64(inner), - T: int64(now.UnixNano()) + 1, - V: float64(i), - }, - }, nil) - testutil.Ok(t, w.Log(sample)) - } - } + for _, compress := range []bool{false, true} { + t.Run(fmt.Sprintf("compress=%t", compress), func(t *testing.T) { + now := time.Now() + + dir, err := ioutil.TempDir("", "readCheckpoint") + testutil.Ok(t, err) + defer os.RemoveAll(dir) + + wdir := path.Join(dir, "wal") + err = os.Mkdir(wdir, 0777) + testutil.Ok(t, err) + + enc := record.Encoder{} + w, err := NewSize(nil, nil, wdir, 128*pageSize, compress) + testutil.Ok(t, err) + + // Write to the initial segment then checkpoint. + for i := 0; i < seriesCount; i++ { + ref := i + 100 + series := enc.Series([]record.RefSeries{ + record.RefSeries{ + Ref: uint64(ref), + Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}}, + }, + }, nil) + testutil.Ok(t, w.Log(series)) + + for j := 0; j < samplesCount; j++ { + inner := rand.Intn(ref + 1) + sample := enc.Samples([]record.RefSample{ + record.RefSample{ + Ref: uint64(inner), + T: int64(now.UnixNano()) + 1, + V: float64(i), + }, + }, nil) + testutil.Ok(t, w.Log(sample)) + } + } - // Start read after checkpoint, no more data written. - first, last, err := w.Segments() - testutil.Ok(t, err) + // Start read after checkpoint, no more data written. + first, last, err := w.Segments() + testutil.Ok(t, err) - wt := newWriteToMock() - watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir) - watcher.startTime = now.UnixNano() + wt := newWriteToMock() + watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir) + watcher.startTime = now.UnixNano() - // Set the Watcher's metrics so they're not nil pointers. - watcher.setMetrics() - for i := first; i <= last; i++ { - segment, err := OpenReadSegment(SegmentName(watcher.walDir, i)) - testutil.Ok(t, err) - defer segment.Close() + // Set the Watcher's metrics so they're not nil pointers. + watcher.setMetrics() + for i := first; i <= last; i++ { + segment, err := OpenReadSegment(SegmentName(watcher.walDir, i)) + testutil.Ok(t, err) + defer segment.Close() - reader := NewLiveReader(nil, nil, segment) - // Use tail true so we can ensure we got the right number of samples. - watcher.readSegment(reader, i, true) - } + reader := NewLiveReader(nil, watcher.readerMetrics, segment) + // Use tail true so we can ensure we got the right number of samples. + watcher.readSegment(reader, i, true) + } - expectedSeries := seriesCount - expectedSamples := seriesCount * samplesCount - retry(t, defaultRetryInterval, defaultRetries, func() bool { - return wt.checkNumLabels() >= expectedSeries - }) - testutil.Equals(t, expectedSeries, wt.checkNumLabels()) - testutil.Equals(t, expectedSamples, wt.samplesAppended) + expectedSeries := seriesCount + expectedSamples := seriesCount * samplesCount + retry(t, defaultRetryInterval, defaultRetries, func() bool { + return wt.checkNumLabels() >= expectedSeries + }) + testutil.Equals(t, expectedSeries, wt.checkNumLabels()) + testutil.Equals(t, expectedSamples, wt.samplesAppended) + }) + } } func TestReadToEndNoCheckpoint(t *testing.T) { @@ -164,61 +168,65 @@ func TestReadToEndNoCheckpoint(t *testing.T) { const seriesCount = 10 const samplesCount = 250 - dir, err := ioutil.TempDir("", "readToEnd_noCheckpoint") - testutil.Ok(t, err) - defer os.RemoveAll(dir) - wdir := path.Join(dir, "wal") - err = os.Mkdir(wdir, 0777) - testutil.Ok(t, err) - - w, err := NewSize(nil, nil, wdir, 128*pageSize, false) - testutil.Ok(t, err) - - var recs [][]byte - - enc := record.Encoder{} - - for i := 0; i < seriesCount; i++ { - series := enc.Series([]record.RefSeries{ - record.RefSeries{ - Ref: uint64(i), - Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}}, - }, - }, nil) - recs = append(recs, series) - for j := 0; j < samplesCount; j++ { - sample := enc.Samples([]record.RefSample{ - record.RefSample{ - Ref: uint64(j), - T: int64(i), - V: float64(i), - }, - }, nil) - - recs = append(recs, sample) - - // Randomly batch up records. - if rand.Intn(4) < 3 { - testutil.Ok(t, w.Log(recs...)) - recs = recs[:0] - } - } - } - testutil.Ok(t, w.Log(recs...)) + for _, compress := range []bool{false, true} { + t.Run(fmt.Sprintf("compress=%t", compress), func(t *testing.T) { + dir, err := ioutil.TempDir("", "readToEnd_noCheckpoint") + testutil.Ok(t, err) + defer os.RemoveAll(dir) + wdir := path.Join(dir, "wal") + err = os.Mkdir(wdir, 0777) + testutil.Ok(t, err) + + w, err := NewSize(nil, nil, wdir, 128*pageSize, compress) + testutil.Ok(t, err) - _, _, err = w.Segments() - testutil.Ok(t, err) + var recs [][]byte - wt := newWriteToMock() - watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir) - go watcher.Start() + enc := record.Encoder{} - expected := seriesCount - retry(t, defaultRetryInterval, defaultRetries, func() bool { - return wt.checkNumLabels() >= expected - }) - watcher.Stop() - testutil.Equals(t, expected, wt.checkNumLabels()) + for i := 0; i < seriesCount; i++ { + series := enc.Series([]record.RefSeries{ + record.RefSeries{ + Ref: uint64(i), + Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}}, + }, + }, nil) + recs = append(recs, series) + for j := 0; j < samplesCount; j++ { + sample := enc.Samples([]record.RefSample{ + record.RefSample{ + Ref: uint64(j), + T: int64(i), + V: float64(i), + }, + }, nil) + + recs = append(recs, sample) + + // Randomly batch up records. + if rand.Intn(4) < 3 { + testutil.Ok(t, w.Log(recs...)) + recs = recs[:0] + } + } + } + testutil.Ok(t, w.Log(recs...)) + + _, _, err = w.Segments() + testutil.Ok(t, err) + + wt := newWriteToMock() + watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir) + go watcher.Start() + + expected := seriesCount + retry(t, defaultRetryInterval, defaultRetries, func() bool { + return wt.checkNumLabels() >= expected + }) + watcher.Stop() + testutil.Equals(t, expected, wt.checkNumLabels()) + }) + } } func TestReadToEndWithCheckpoint(t *testing.T) { @@ -228,79 +236,83 @@ func TestReadToEndWithCheckpoint(t *testing.T) { const seriesCount = 10 const samplesCount = 250 - dir, err := ioutil.TempDir("", "readToEnd_withCheckpoint") - testutil.Ok(t, err) - defer os.RemoveAll(dir) - - wdir := path.Join(dir, "wal") - err = os.Mkdir(wdir, 0777) - testutil.Ok(t, err) - - enc := record.Encoder{} - w, err := NewSize(nil, nil, wdir, segmentSize, false) - testutil.Ok(t, err) - - // Write to the initial segment then checkpoint. - for i := 0; i < seriesCount; i++ { - ref := i + 100 - series := enc.Series([]record.RefSeries{ - record.RefSeries{ - Ref: uint64(ref), - Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}}, - }, - }, nil) - testutil.Ok(t, w.Log(series)) - - for j := 0; j < samplesCount; j++ { - inner := rand.Intn(ref + 1) - sample := enc.Samples([]record.RefSample{ - record.RefSample{ - Ref: uint64(inner), - T: int64(i), - V: float64(i), - }, - }, nil) - testutil.Ok(t, w.Log(sample)) - } - } + for _, compress := range []bool{false, true} { + t.Run(fmt.Sprintf("compress=%t", compress), func(t *testing.T) { + dir, err := ioutil.TempDir("", "readToEnd_withCheckpoint") + testutil.Ok(t, err) + defer os.RemoveAll(dir) + + wdir := path.Join(dir, "wal") + err = os.Mkdir(wdir, 0777) + testutil.Ok(t, err) + + enc := record.Encoder{} + w, err := NewSize(nil, nil, wdir, segmentSize, compress) + testutil.Ok(t, err) + + // Write to the initial segment then checkpoint. + for i := 0; i < seriesCount; i++ { + ref := i + 100 + series := enc.Series([]record.RefSeries{ + record.RefSeries{ + Ref: uint64(ref), + Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}}, + }, + }, nil) + testutil.Ok(t, w.Log(series)) + + for j := 0; j < samplesCount; j++ { + inner := rand.Intn(ref + 1) + sample := enc.Samples([]record.RefSample{ + record.RefSample{ + Ref: uint64(inner), + T: int64(i), + V: float64(i), + }, + }, nil) + testutil.Ok(t, w.Log(sample)) + } + } - Checkpoint(w, 0, 1, func(x uint64) bool { return true }, 0) - w.Truncate(1) - - // Write more records after checkpointing. - for i := 0; i < seriesCount; i++ { - series := enc.Series([]record.RefSeries{ - record.RefSeries{ - Ref: uint64(i), - Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}}, - }, - }, nil) - testutil.Ok(t, w.Log(series)) - - for j := 0; j < samplesCount; j++ { - sample := enc.Samples([]record.RefSample{ - record.RefSample{ - Ref: uint64(j), - T: int64(i), - V: float64(i), - }, - }, nil) - testutil.Ok(t, w.Log(sample)) - } - } + Checkpoint(w, 0, 1, func(x uint64) bool { return true }, 0) + w.Truncate(1) + + // Write more records after checkpointing. + for i := 0; i < seriesCount; i++ { + series := enc.Series([]record.RefSeries{ + record.RefSeries{ + Ref: uint64(i), + Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}}, + }, + }, nil) + testutil.Ok(t, w.Log(series)) + + for j := 0; j < samplesCount; j++ { + sample := enc.Samples([]record.RefSample{ + record.RefSample{ + Ref: uint64(j), + T: int64(i), + V: float64(i), + }, + }, nil) + testutil.Ok(t, w.Log(sample)) + } + } - _, _, err = w.Segments() - testutil.Ok(t, err) - wt := newWriteToMock() - watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir) - go watcher.Start() - - expected := seriesCount * 2 - retry(t, defaultRetryInterval, defaultRetries, func() bool { - return wt.checkNumLabels() >= expected - }) - watcher.Stop() - testutil.Equals(t, expected, wt.checkNumLabels()) + _, _, err = w.Segments() + testutil.Ok(t, err) + wt := newWriteToMock() + watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir) + go watcher.Start() + + expected := seriesCount * 2 + retry(t, defaultRetryInterval, defaultRetries, func() bool { + return wt.checkNumLabels() >= expected + }) + watcher.Stop() + testutil.Equals(t, expected, wt.checkNumLabels()) + }) + } } func TestReadCheckpoint(t *testing.T) { @@ -308,61 +320,65 @@ func TestReadCheckpoint(t *testing.T) { const seriesCount = 10 const samplesCount = 250 - dir, err := ioutil.TempDir("", "readCheckpoint") - testutil.Ok(t, err) - defer os.RemoveAll(dir) - - wdir := path.Join(dir, "wal") - err = os.Mkdir(wdir, 0777) - testutil.Ok(t, err) - - os.Create(SegmentName(wdir, 30)) - - enc := record.Encoder{} - w, err := NewSize(nil, nil, wdir, 128*pageSize, false) - testutil.Ok(t, err) - - // Write to the initial segment then checkpoint. - for i := 0; i < seriesCount; i++ { - ref := i + 100 - series := enc.Series([]record.RefSeries{ - record.RefSeries{ - Ref: uint64(ref), - Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}}, - }, - }, nil) - testutil.Ok(t, w.Log(series)) - - for j := 0; j < samplesCount; j++ { - inner := rand.Intn(ref + 1) - sample := enc.Samples([]record.RefSample{ - record.RefSample{ - Ref: uint64(inner), - T: int64(i), - V: float64(i), - }, - }, nil) - testutil.Ok(t, w.Log(sample)) - } + for _, compress := range []bool{false, true} { + t.Run(fmt.Sprintf("compress=%t", compress), func(t *testing.T) { + dir, err := ioutil.TempDir("", "readCheckpoint") + testutil.Ok(t, err) + defer os.RemoveAll(dir) + + wdir := path.Join(dir, "wal") + err = os.Mkdir(wdir, 0777) + testutil.Ok(t, err) + + os.Create(SegmentName(wdir, 30)) + + enc := record.Encoder{} + w, err := NewSize(nil, nil, wdir, 128*pageSize, compress) + testutil.Ok(t, err) + + // Write to the initial segment then checkpoint. + for i := 0; i < seriesCount; i++ { + ref := i + 100 + series := enc.Series([]record.RefSeries{ + record.RefSeries{ + Ref: uint64(ref), + Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}}, + }, + }, nil) + testutil.Ok(t, w.Log(series)) + + for j := 0; j < samplesCount; j++ { + inner := rand.Intn(ref + 1) + sample := enc.Samples([]record.RefSample{ + record.RefSample{ + Ref: uint64(inner), + T: int64(i), + V: float64(i), + }, + }, nil) + testutil.Ok(t, w.Log(sample)) + } + } + Checkpoint(w, 30, 31, func(x uint64) bool { return true }, 0) + w.Truncate(32) + + // Start read after checkpoint, no more data written. + _, _, err = w.Segments() + testutil.Ok(t, err) + + wt := newWriteToMock() + watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir) + // watcher. + go watcher.Start() + + expectedSeries := seriesCount + retry(t, defaultRetryInterval, defaultRetries, func() bool { + return wt.checkNumLabels() >= expectedSeries + }) + watcher.Stop() + testutil.Equals(t, expectedSeries, wt.checkNumLabels()) + }) } - Checkpoint(w, 30, 31, func(x uint64) bool { return true }, 0) - w.Truncate(32) - - // Start read after checkpoint, no more data written. - _, _, err = w.Segments() - testutil.Ok(t, err) - - wt := newWriteToMock() - watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir) - // watcher. - go watcher.Start() - - expectedSeries := seriesCount - retry(t, defaultRetryInterval, defaultRetries, func() bool { - return wt.checkNumLabels() >= expectedSeries - }) - watcher.Stop() - testutil.Equals(t, expectedSeries, wt.checkNumLabels()) } func TestReadCheckpointMultipleSegments(t *testing.T) { @@ -372,60 +388,64 @@ func TestReadCheckpointMultipleSegments(t *testing.T) { const seriesCount = 20 const samplesCount = 300 - dir, err := ioutil.TempDir("", "readCheckpoint") - testutil.Ok(t, err) - defer os.RemoveAll(dir) - - wdir := path.Join(dir, "wal") - err = os.Mkdir(wdir, 0777) - testutil.Ok(t, err) - - enc := record.Encoder{} - w, err := NewSize(nil, nil, wdir, pageSize, false) - testutil.Ok(t, err) - - // Write a bunch of data. - for i := 0; i < segments; i++ { - for j := 0; j < seriesCount; j++ { - ref := j + (i * 100) - series := enc.Series([]record.RefSeries{ - record.RefSeries{ - Ref: uint64(ref), - Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", j)}}, - }, - }, nil) - testutil.Ok(t, w.Log(series)) - - for k := 0; k < samplesCount; k++ { - inner := rand.Intn(ref + 1) - sample := enc.Samples([]record.RefSample{ - record.RefSample{ - Ref: uint64(inner), - T: int64(i), - V: float64(i), - }, - }, nil) - testutil.Ok(t, w.Log(sample)) + for _, compress := range []bool{false, true} { + t.Run(fmt.Sprintf("compress=%t", compress), func(t *testing.T) { + dir, err := ioutil.TempDir("", "readCheckpoint") + testutil.Ok(t, err) + defer os.RemoveAll(dir) + + wdir := path.Join(dir, "wal") + err = os.Mkdir(wdir, 0777) + testutil.Ok(t, err) + + enc := record.Encoder{} + w, err := NewSize(nil, nil, wdir, pageSize, compress) + testutil.Ok(t, err) + + // Write a bunch of data. + for i := 0; i < segments; i++ { + for j := 0; j < seriesCount; j++ { + ref := j + (i * 100) + series := enc.Series([]record.RefSeries{ + record.RefSeries{ + Ref: uint64(ref), + Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", j)}}, + }, + }, nil) + testutil.Ok(t, w.Log(series)) + + for k := 0; k < samplesCount; k++ { + inner := rand.Intn(ref + 1) + sample := enc.Samples([]record.RefSample{ + record.RefSample{ + Ref: uint64(inner), + T: int64(i), + V: float64(i), + }, + }, nil) + testutil.Ok(t, w.Log(sample)) + } + } } - } - } - Checkpoint(w, 0, 4, func(id uint64) bool { - return true - }, 0) + Checkpoint(w, 0, 4, func(id uint64) bool { + return true + }, 0) - wt := newWriteToMock() - watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir) - watcher.maxSegment = -1 + wt := newWriteToMock() + watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir) + watcher.maxSegment = -1 - // Set the Watcher's metrics so they're not nil pointers. - watcher.setMetrics() + // Set the Watcher's metrics so they're not nil pointers. + watcher.setMetrics() - lastCheckpoint, _, err := LastCheckpoint(watcher.walDir) - testutil.Ok(t, err) + lastCheckpoint, _, err := LastCheckpoint(watcher.walDir) + testutil.Ok(t, err) - err = watcher.readCheckpoint(lastCheckpoint) - testutil.Ok(t, err) + err = watcher.readCheckpoint(lastCheckpoint) + testutil.Ok(t, err) + }) + } } func TestCheckpointSeriesReset(t *testing.T) { @@ -434,71 +454,82 @@ func TestCheckpointSeriesReset(t *testing.T) { // in order to get enough segments for us to checkpoint. const seriesCount = 20 const samplesCount = 350 - - dir, err := ioutil.TempDir("", "seriesReset") - testutil.Ok(t, err) - defer os.RemoveAll(dir) - - wdir := path.Join(dir, "wal") - err = os.Mkdir(wdir, 0777) - testutil.Ok(t, err) - - enc := record.Encoder{} - w, err := NewSize(nil, nil, wdir, segmentSize, false) - testutil.Ok(t, err) - - // Write to the initial segment, then checkpoint later. - for i := 0; i < seriesCount; i++ { - ref := i + 100 - series := enc.Series([]record.RefSeries{ - record.RefSeries{ - Ref: uint64(ref), - Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}}, - }, - }, nil) - testutil.Ok(t, w.Log(series)) - - for j := 0; j < samplesCount; j++ { - inner := rand.Intn(ref + 1) - sample := enc.Samples([]record.RefSample{ - record.RefSample{ - Ref: uint64(inner), - T: int64(i), - V: float64(i), - }, - }, nil) - testutil.Ok(t, w.Log(sample)) - } + testCases := []struct { + compress bool + segments int + }{ + {compress: false, segments: 14}, + {compress: true, segments: 13}, } - _, _, err = w.Segments() - testutil.Ok(t, err) - - wt := newWriteToMock() - watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir) - watcher.maxSegment = -1 - go watcher.Start() - - expected := seriesCount - retry(t, defaultRetryInterval, defaultRetries, func() bool { - return wt.checkNumLabels() >= expected - }) - testutil.Equals(t, seriesCount, wt.checkNumLabels()) - - _, err = Checkpoint(w, 2, 4, func(x uint64) bool { return true }, 0) - testutil.Ok(t, err) - - err = w.Truncate(5) - testutil.Ok(t, err) - - _, cpi, err := LastCheckpoint(path.Join(dir, "wal")) - testutil.Ok(t, err) - err = watcher.garbageCollectSeries(cpi + 1) - testutil.Ok(t, err) + for _, tc := range testCases { + t.Run(fmt.Sprintf("compress=%t", tc.compress), func(t *testing.T) { + dir, err := ioutil.TempDir("", "seriesReset") + testutil.Ok(t, err) + defer os.RemoveAll(dir) + + wdir := path.Join(dir, "wal") + err = os.Mkdir(wdir, 0777) + testutil.Ok(t, err) + + enc := record.Encoder{} + w, err := NewSize(nil, nil, wdir, segmentSize, tc.compress) + testutil.Ok(t, err) + + // Write to the initial segment, then checkpoint later. + for i := 0; i < seriesCount; i++ { + ref := i + 100 + series := enc.Series([]record.RefSeries{ + record.RefSeries{ + Ref: uint64(ref), + Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}}, + }, + }, nil) + testutil.Ok(t, w.Log(series)) + + for j := 0; j < samplesCount; j++ { + inner := rand.Intn(ref + 1) + sample := enc.Samples([]record.RefSample{ + record.RefSample{ + Ref: uint64(inner), + T: int64(i), + V: float64(i), + }, + }, nil) + testutil.Ok(t, w.Log(sample)) + } + } - watcher.Stop() - // If you modify the checkpoint and truncate segment #'s run the test to see how - // many series records you end up with and change the last Equals check accordingly - // or modify the Equals to Assert(len(wt.seriesLabels) < seriesCount*10) - testutil.Equals(t, 14, wt.checkNumLabels()) + _, _, err = w.Segments() + testutil.Ok(t, err) + + wt := newWriteToMock() + watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir) + watcher.maxSegment = -1 + go watcher.Start() + + expected := seriesCount + retry(t, defaultRetryInterval, defaultRetries, func() bool { + return wt.checkNumLabels() >= expected + }) + testutil.Equals(t, seriesCount, wt.checkNumLabels()) + + _, err = Checkpoint(w, 2, 4, func(x uint64) bool { return true }, 0) + testutil.Ok(t, err) + + err = w.Truncate(5) + testutil.Ok(t, err) + + _, cpi, err := LastCheckpoint(path.Join(dir, "wal")) + testutil.Ok(t, err) + err = watcher.garbageCollectSeries(cpi + 1) + testutil.Ok(t, err) + + watcher.Stop() + // If you modify the checkpoint and truncate segment #'s run the test to see how + // many series records you end up with and change the last Equals check accordingly + // or modify the Equals to Assert(len(wt.seriesLabels) < seriesCount*10) + testutil.Equals(t, tc.segments, wt.checkNumLabels()) + }) + } } From 8c40bb9b4abab5513d9909c0262a3cc7aeb86d18 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Mon, 29 Jul 2019 11:11:17 -0700 Subject: [PATCH 15/16] Don't export MemTombstones. Signed-off-by: Callum Styan --- compact_test.go | 10 ++++++---- mocks_test.go | 10 ++++++---- querier_test.go | 11 ++++++----- tombstones/tombstones.go | 24 ++++++++++++------------ 4 files changed, 30 insertions(+), 25 deletions(-) diff --git a/compact_test.go b/compact_test.go index fe61da24..2e85115b 100644 --- a/compact_test.go +++ b/compact_test.go @@ -456,10 +456,12 @@ func metaRange(name string, mint, maxt int64, stats *BlockStats) dirMeta { type erringBReader struct{} -func (erringBReader) Index() (IndexReader, error) { return nil, errors.New("index") } -func (erringBReader) Chunks() (ChunkReader, error) { return nil, errors.New("chunks") } -func (erringBReader) Tombstones() (TombstoneReader, error) { return nil, errors.New("tombstones") } -func (erringBReader) Meta() BlockMeta { return BlockMeta{} } +func (erringBReader) Index() (IndexReader, error) { return nil, errors.New("index") } +func (erringBReader) Chunks() (ChunkReader, error) { return nil, errors.New("chunks") } +func (erringBReader) Tombstones() (tombstones.TombstoneReader, error) { + return nil, errors.New("tombstones") +} +func (erringBReader) Meta() BlockMeta { return BlockMeta{} } type nopChunkWriter struct{} diff --git a/mocks_test.go b/mocks_test.go index 44d8e9a5..c5bc0f6b 100644 --- a/mocks_test.go +++ b/mocks_test.go @@ -73,7 +73,9 @@ type mockBReader struct { maxt int64 } -func (r *mockBReader) Index() (IndexReader, error) { return r.ir, nil } -func (r *mockBReader) Chunks() (ChunkReader, error) { return r.cr, nil } -func (r *mockBReader) Tombstones() (TombstoneReader, error) { return newMemTombstones(), nil } -func (r *mockBReader) Meta() BlockMeta { return BlockMeta{MinTime: r.mint, MaxTime: r.maxt} } +func (r *mockBReader) Index() (IndexReader, error) { return r.ir, nil } +func (r *mockBReader) Chunks() (ChunkReader, error) { return r.cr, nil } +func (r *mockBReader) Tombstones() (tombstones.TombstoneReader, error) { + return tombstones.NewMemTombstones(), nil +} +func (r *mockBReader) Meta() BlockMeta { return BlockMeta{MinTime: r.mint, MaxTime: r.maxt} } diff --git a/querier_test.go b/querier_test.go index 2794e0e6..cbbfcdab 100644 --- a/querier_test.go +++ b/querier_test.go @@ -413,6 +413,11 @@ func TestBlockQuerierDelete(t *testing.T) { exp SeriesSet } + tstones := tombstones.NewMemTombstones() + tstones.AddInterval(1, tombstones.Interval{1, 3}) + tstones.AddInterval(2, tombstones.Interval{1, 3}, tombstones.Interval{6, 10}) + tstones.AddInterval(3, tombstones.Interval{6, 10}) + cases := struct { data []seriesSamples @@ -461,11 +466,7 @@ func TestBlockQuerierDelete(t *testing.T) { }, }, }, - tombstones: &tombstones.MemTombstones{IntvlGroups: map[uint64]tombstones.Intervals{ - 1: tombstones.Intervals{{1, 3}}, - 2: tombstones.Intervals{{1, 3}, {6, 10}}, - 3: tombstones.Intervals{{6, 10}}, - }}, + tombstones: tstones, queries: []query{ { mint: 2, diff --git a/tombstones/tombstones.go b/tombstones/tombstones.go index 8b79632b..e82bccb0 100644 --- a/tombstones/tombstones.go +++ b/tombstones/tombstones.go @@ -199,27 +199,27 @@ func ReadTombstones(dir string) (TombstoneReader, int64, error) { return stonesMap, int64(len(b)), nil } -type MemTombstones struct { - IntvlGroups map[uint64]Intervals +type memTombstones struct { + intvlGroups map[uint64]Intervals mtx sync.RWMutex } // NewMemTombstones creates new in memory TombstoneReader // that allows adding new intervals. -func NewMemTombstones() *MemTombstones { - return &MemTombstones{IntvlGroups: make(map[uint64]Intervals)} +func NewMemTombstones() *memTombstones { + return &memTombstones{intvlGroups: make(map[uint64]Intervals)} } -func (t *MemTombstones) Get(ref uint64) (Intervals, error) { +func (t *memTombstones) Get(ref uint64) (Intervals, error) { t.mtx.RLock() defer t.mtx.RUnlock() - return t.IntvlGroups[ref], nil + return t.intvlGroups[ref], nil } -func (t *MemTombstones) Iter(f func(uint64, Intervals) error) error { +func (t *memTombstones) Iter(f func(uint64, Intervals) error) error { t.mtx.RLock() defer t.mtx.RUnlock() - for ref, ivs := range t.IntvlGroups { + for ref, ivs := range t.intvlGroups { if err := f(ref, ivs); err != nil { return err } @@ -227,23 +227,23 @@ func (t *MemTombstones) Iter(f func(uint64, Intervals) error) error { return nil } -func (t *MemTombstones) Total() uint64 { +func (t *memTombstones) Total() uint64 { t.mtx.RLock() defer t.mtx.RUnlock() total := uint64(0) - for _, ivs := range t.IntvlGroups { + for _, ivs := range t.intvlGroups { total += uint64(len(ivs)) } return total } // AddInterval to an existing MemTombstones -func (t *MemTombstones) AddInterval(ref uint64, itvs ...Interval) { +func (t *memTombstones) AddInterval(ref uint64, itvs ...Interval) { t.mtx.Lock() defer t.mtx.Unlock() for _, itv := range itvs { - t.IntvlGroups[ref] = t.IntvlGroups[ref].Add(itv) + t.intvlGroups[ref] = t.intvlGroups[ref].Add(itv) } } From 3b20c644aaa1769f149b3d7929ae65f2cb09a669 Mon Sep 17 00:00:00 2001 From: Callum Styan Date: Wed, 7 Aug 2019 15:42:45 -0700 Subject: [PATCH 16/16] Export WatcherMetrics struct and pass the struct to NewWatcher rather than creating and registering in the constructor. Signed-off-by: Callum Styan --- wal/watcher.go | 20 ++++++++++---------- wal/watcher_test.go | 13 +++++++------ 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/wal/watcher.go b/wal/watcher.go index c29fdc28..fb6e3c64 100644 --- a/wal/watcher.go +++ b/wal/watcher.go @@ -51,7 +51,7 @@ type WriteTo interface { SeriesReset(int) } -type watcherMetrics struct { +type WatcherMetrics struct { recordsRead *prometheus.CounterVec recordDecodeFails *prometheus.CounterVec samplesSentPreTailing *prometheus.CounterVec @@ -65,7 +65,7 @@ type Watcher struct { logger log.Logger walDir string lastCheckpoint string - metrics *watcherMetrics + metrics *WatcherMetrics readerMetrics *liveReaderMetrics startTime int64 @@ -82,8 +82,8 @@ type Watcher struct { maxSegment int } -func NewWatcherMetrics(reg prometheus.Registerer) *watcherMetrics { - m := &watcherMetrics{ +func NewWatcherMetrics(reg prometheus.Registerer) *WatcherMetrics { + m := &WatcherMetrics{ recordsRead: prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: "prometheus", @@ -123,17 +123,17 @@ func NewWatcherMetrics(reg prometheus.Registerer) *watcherMetrics { } if reg != nil { - reg.Register(m.recordsRead) - reg.Register(m.recordDecodeFails) - reg.Register(m.samplesSentPreTailing) - reg.Register(m.currentSegment) + reg.MustRegister(m.recordsRead) + reg.MustRegister(m.recordDecodeFails) + reg.MustRegister(m.samplesSentPreTailing) + reg.MustRegister(m.currentSegment) } return m } // NewWatcher creates a new WAL watcher for a given WriteTo. -func NewWatcher(reg prometheus.Registerer, logger log.Logger, name string, writer WriteTo, walDir string) *Watcher { +func NewWatcher(reg prometheus.Registerer, metrics *WatcherMetrics, logger log.Logger, name string, writer WriteTo, walDir string) *Watcher { if logger == nil { logger = log.NewNopLogger() } @@ -141,7 +141,7 @@ func NewWatcher(reg prometheus.Registerer, logger log.Logger, name string, write w := Watcher{ logger: logger, writer: writer, - metrics: NewWatcherMetrics(reg), + metrics: metrics, readerMetrics: NewLiveReaderMetrics(reg), walDir: path.Join(walDir, "wal"), name: name, diff --git a/wal/watcher_test.go b/wal/watcher_test.go index 216793a6..e559464f 100644 --- a/wal/watcher_test.go +++ b/wal/watcher_test.go @@ -30,6 +30,7 @@ import ( var defaultRetryInterval = 100 * time.Millisecond var defaultRetries = 100 +var wMetrics = NewWatcherMetrics(prometheus.DefaultRegisterer) // retry executes f() n times at each interval until it returns true. func retry(t *testing.T, interval time.Duration, n int, f func() bool) { @@ -137,7 +138,7 @@ func TestTailSamples(t *testing.T) { testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir) + watcher := NewWatcher(prometheus.DefaultRegisterer, wMetrics, nil, "", wt, dir) watcher.startTime = now.UnixNano() // Set the Watcher's metrics so they're not nil pointers. @@ -216,7 +217,7 @@ func TestReadToEndNoCheckpoint(t *testing.T) { testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir) + watcher := NewWatcher(prometheus.DefaultRegisterer, wMetrics, nil, "", wt, dir) go watcher.Start() expected := seriesCount @@ -302,7 +303,7 @@ func TestReadToEndWithCheckpoint(t *testing.T) { _, _, err = w.Segments() testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir) + watcher := NewWatcher(prometheus.DefaultRegisterer, wMetrics, nil, "", wt, dir) go watcher.Start() expected := seriesCount * 2 @@ -367,7 +368,7 @@ func TestReadCheckpoint(t *testing.T) { testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir) + watcher := NewWatcher(prometheus.DefaultRegisterer, wMetrics, nil, "", wt, dir) // watcher. go watcher.Start() @@ -433,7 +434,7 @@ func TestReadCheckpointMultipleSegments(t *testing.T) { }, 0) wt := newWriteToMock() - watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir) + watcher := NewWatcher(prometheus.DefaultRegisterer, wMetrics, nil, "", wt, dir) watcher.maxSegment = -1 // Set the Watcher's metrics so they're not nil pointers. @@ -504,7 +505,7 @@ func TestCheckpointSeriesReset(t *testing.T) { testutil.Ok(t, err) wt := newWriteToMock() - watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir) + watcher := NewWatcher(prometheus.DefaultRegisterer, wMetrics, nil, "", wt, dir) watcher.maxSegment = -1 go watcher.Start()